diff --git a/scripts/cmake/genskel.cmake b/scripts/cmake/genskel.cmake index 34e487b10c19fd36d2aa4c35d86b71ed0c274747..60286d10bb15950dd262c02c107b0515db06f1dd 100644 --- a/scripts/cmake/genskel.cmake +++ b/scripts/cmake/genskel.cmake @@ -21,8 +21,8 @@ message(STATUS "Include Directories: ${include_dirs}") macro(genskel name) message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") if (CMAKE_BUILD_TYPE MATCHES Debug) - add_definitions(-DBPF_DEBUG) - message(STATUS "add definition: -DBPF_DEBUG") + add_definitions(-COOLBPF_DEBUG) + message(STATUS "add definition: -DCOOLBPF_DEBUG") endif () SET(BPF_C_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${name}.bpf.c) SET(BPF_O_FILE ${CMAKE_CURRENT_BINARY_DIR}/${name}.bpf.o) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f2ddacdb5dc14aa114c896a8e8523b2d8cc8b28d..0365b5878e39310d381dfbe73571788d6328fda6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -26,18 +26,18 @@ if(NOT DEFINED INSTALL_INCLUDE_DIR) set(INSTALL_INCLUDE_DIR ${CMAKE_INSTALL_PREFIX}/include) endif() -add_subdirectory(bpf) -add_subdirectory(security) -if (ENABLE_PROFILE) - add_subdirectory(profiler) -endif() - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/coolbpf.pc.in ${CMAKE_CURRENT_BINARY_DIR}/coolbpf.pc @ONLY) include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/bpf ${CMAKE_CURRENT_BINARY_DIR}/bpf) +add_subdirectory(bpf) +add_subdirectory(security) +if (ENABLE_PROFILE) + add_subdirectory(profiler) +endif() + file(GLOB sources ${PROJECT_SOURCE_DIR}/third/libbpf/src/*.c ${CMAKE_CURRENT_SOURCE_DIR}/*.c) # share library diff --git a/src/ebpf_log.h b/src/ebpf_log.h index a19324d395a68c76ff809212b03518833434552b..fdca4685f1d4c3feddfede20b1e1b0d6b8a8f40d 100644 --- a/src/ebpf_log.h +++ b/src/ebpf_log.h @@ -4,13 +4,10 @@ /* Macro to output debug logs to /sys/kernel/debug/tracing/trace_pipe */ -#ifdef BPF_DEBUG -#define BPF_DEBUG(fmt, ...) \ - ({ \ - char ____fmt[] = fmt; \ - bpf_trace_printk(____fmt, sizeof(____fmt), ##__VA_ARGS__); \ - }) +#ifdef COOLBPF_DEBUG +#include +#define BPF_DEBUG(__fmt, ...) bpf_printk(__fmt, ##__VA_ARGS__) #else // No op -#define BPF_DEBUG(fmt, ...) +#define BPF_DEBUG(__fmt, ...) #endif \ No newline at end of file diff --git a/src/security/bpf_common.h b/src/security/bpf_common.h index 5551e455df1edf537ccd5c76a5324d12adfcaca7..63acceea50e827b5e4a0c55d71cf791ccc118a98 100644 --- a/src/security/bpf_common.h +++ b/src/security/bpf_common.h @@ -65,13 +65,6 @@ struct msg_test { #define BIT(nr) (1 << (nr)) #define BIT_ULL(nr) (1ULL << (nr)) -#ifdef TETRAGON_BPF_DEBUG -#include -#define DEBUG(__fmt, ...) bpf_printk(__fmt, ##__VA_ARGS__) -#else -#define DEBUG(__fmt, ...) -#endif - #ifndef PATH_MAP_SIZE #define PATH_MAP_SIZE 4096 #endif diff --git a/src/security/bpf_exit.h b/src/security/bpf_exit.h index 50c242e7ff9d4e068df92ec52c1c88662d1f7174..d47cbe1aba35efea606cfbd395699e29fb8113b6 100644 --- a/src/security/bpf_exit.h +++ b/src/security/bpf_exit.h @@ -39,7 +39,6 @@ FUNC_INLINE void event_exit_send(void *ctx, __u32 tgid) enter = execve_map_get_noinit(tgid); if (!enter) return; - BPF_DEBUG("[kprobe][event_exit_send] pid:%u already enter.", tgid); if (enter->key.ktime) { struct task_struct *task = (struct task_struct *)bpf_get_current_task(); size_t size = sizeof(struct msg_exit); @@ -77,9 +76,7 @@ FUNC_INLINE void event_exit_send(void *ctx, __u32 tgid) __event_get_cgroup_info(task, &kube); - BPF_DEBUG("[kprobe][event_exit_send] pid:%u prepare to send event.", tgid); if (cgroup_rate(ctx, &kube, exit->common.ktime)) { - BPF_DEBUG("[kprobe][event_exit_send] pid:%u send event.", tgid); perf_event_output_metric(ctx, MSG_OP_EXIT, &tcpmon_map, BPF_F_CURRENT_CPU, exit, size); } diff --git a/src/security/bpf_process_event.h b/src/security/bpf_process_event.h index 4605de870bc7ca460a93ecd37e6a7361d4038a20..48ac76f4b056914a8d7d1a253991d59cbcc3089b 100644 --- a/src/security/bpf_process_event.h +++ b/src/security/bpf_process_event.h @@ -5,26 +5,27 @@ #ifndef SYSAK_BPF_PROCESS_EVENT_H #define SYSAK_BPF_PROCESS_EVENT_H -#include "../coolbpf.h" #include #include -#include "bpf_common.h" +#include "../coolbpf.h" #include "api.h" #include "bpf_cgroup.h" +#include "bpf_common.h" #include "bpf_cred.h" #include "compiler.h" +#include "ebpf_log.h" #define ENAMETOOLONG 36 /* File name too long */ -#define MAX_BUF_LEN 256 +#define MAX_BUF_LEN 4096 struct buffer_heap_map_value { // Buffer is twice the needed size because of the verifier. In prepend_name // unit tests, the verifier figures out that 255 is enough and that the // buffer_offset will not overflow, but in the real use-case it looks like // it's forgetting about that. - unsigned char buf[MAX_BUF_LEN * 2]; + unsigned char buf[MAX_BUF_LEN + 256]; }; struct { @@ -124,8 +125,6 @@ prepend_name(char *buf, char **bufptr, int *buflen, const char *name, u32 namele *buflen -= (namelen + write_slash); - // This will not happen as buffer_offset cannot be above 256 and namelen is - // bound to 255. Needed to make the verifier happy in older kernels. if (namelen + write_slash > buffer_offset) return -ENAMETOOLONG; @@ -335,7 +334,6 @@ __d_path_local(const struct path *path, char *buf, int *buflen, int *error) task = (struct task_struct *)bpf_get_current_task(); bpf_probe_read(&fs, sizeof(fs), _(&task->fs)); *error = path_with_deleted(path, _(&fs->root), buf, &res, buflen); - // log_debug); return res; } @@ -390,6 +388,8 @@ getcwd(struct msg_process *curr, __u32 offset, __u32 proc_pid) asm volatile("%[offset] &= 0x3ff;\n" ::[offset] "+r"(offset) :); + if (size > 255) + size = 255; asm volatile("%[size] &= 0xff;\n" ::[size] "+r"(size) :); bpf_probe_read((char *)curr + offset, size, buffer); @@ -544,16 +544,6 @@ __event_get_current_cgroup_name(struct cgroup *cgrp, struct msg_k8s *kube) { const char *name; - /* TODO: check if we have Tetragon cgroup configuration and that the - * tracking cgroup ID is set. If so then query the bpf map for - * the corresponding tracking cgroup name. - */ - - /* TODO: we gather current cgroup context, switch to tracker see above, - * and if that fails for any reason or if we don't have the cgroup name - * of tracker, then we can continue with current context. - */ - name = get_cgroup_name(cgrp); if (name) bpf_probe_read_str(kube->docker_id, KN_NAME_LENGTH, name); @@ -603,4 +593,4 @@ __event_get_cgroup_info(struct task_struct *task, struct msg_k8s *kube) return flags; } -#endif //SYSAK_BPF_PROCESS_EVENT_H +#endif // SYSAK_BPF_PROCESS_EVENT_H diff --git a/src/security/bpf_process_event_type.h b/src/security/bpf_process_event_type.h index 6fe580de1454cd377de9bbaa8606386d6ea22aa9..13bc12420cb4809c3e6a668f7d93c6dd7b632c16 100644 --- a/src/security/bpf_process_event_type.h +++ b/src/security/bpf_process_event_type.h @@ -122,6 +122,7 @@ #define EVENT_ERROR_PATH_COMPONENTS 0x400000 #define EVENT_DATA_FILENAME 0x800000 #define EVENT_DATA_ARGS 0x1000000 +#define EVENT_IN_INIT_TREE 0x2000000 #define EVENT_COMMON_FLAG_CLONE 0x01 @@ -247,8 +248,6 @@ struct msg_ns { }; // All fields aligned so no 'packed' attribute. struct msg_k8s { - __u32 net_ns; - __u32 cid; __u64 cgrpid; char docker_id[DOCKER_ID_LENGTH]; }; // All fields aligned so no 'packed' attribute. @@ -256,27 +255,19 @@ struct msg_k8s { #define BINARY_PATH_MAX_LEN 256 struct heap_exe { - // because of verifier limitations, this has to be 2 * 256 bytes while 256 - // should be theoretically sufficient, and actually is, in unit tests. - char buf[BINARY_PATH_MAX_LEN * 2]; - // offset points to the start of the path in the above buffer. Use offset to - // read the path in the buffer since it's written from the end. - char *off; + char buf[BINARY_PATH_MAX_LEN]; __u32 len; __u32 error; }; // All fields aligned so no 'packed' attribute. -#define EXECVE_EVENT_COMMON_MEMBERS \ - struct msg_common common; \ - struct msg_k8s kube; \ - struct msg_execve_key parent; \ - __u64 parent_flags; \ - struct msg_cred creds; \ - struct msg_ns ns; \ - struct msg_execve_key cleanup_key; - struct msg_execve_event { - EXECVE_EVENT_COMMON_MEMBERS + struct msg_common common; + struct msg_k8s kube; + struct msg_execve_key parent; + __u64 parent_flags; + struct msg_cred creds; + struct msg_ns ns; + struct msg_execve_key cleanup_key; /* if add anything above please also update the args of * validate_msg_execve_size() in bpf_execve_event.c */ union { diff --git a/src/security/data_event.h b/src/security/data_event.h index 24ae14fbbb43fb41130918428ea642badb1a0553..e34a956f0dc025b5d3bda81deee3efbe202ef466 100644 --- a/src/security/data_event.h +++ b/src/security/data_event.h @@ -33,7 +33,9 @@ __do_bytes(void *ctx, struct msg_data *msg, unsigned long uptr, size_t bytes) return err; msg->common.size = offsetof(struct msg_data, arg) + bytes; - perf_event_output_metric(ctx, MSG_OP_DATA, &tcpmon_map, BPF_F_CURRENT_CPU, msg, msg->common.size); + err = bpf_perf_event_output(ctx, &tcpmon_map, BPF_F_CURRENT_CPU, msg, msg->common.size); + if (err < 0) + return err; return bytes; b: return -1; @@ -43,13 +45,13 @@ FUNC_LOCAL long do_bytes(void *ctx, struct msg_data *msg, unsigned long arg, size_t bytes) { size_t rd_bytes = 0; - int err, i __maybe_unused; + int err = 0, i __maybe_unused; #ifdef __LARGE_BPF_PROG for (i = 0; i < 10; i++) { err = __do_bytes(ctx, msg, arg + rd_bytes, bytes - rd_bytes); if (err < 0) - return err; + goto error; rd_bytes += err; if (rd_bytes == bytes) return rd_bytes; @@ -58,7 +60,7 @@ do_bytes(void *ctx, struct msg_data *msg, unsigned long arg, size_t bytes) #define BYTES_COPY \ err = __do_bytes(ctx, msg, arg + rd_bytes, bytes - rd_bytes); \ if (err < 0) \ - return err; \ + goto error; \ rd_bytes += err; \ if (rd_bytes == bytes) \ return rd_bytes; @@ -73,6 +75,9 @@ do_bytes(void *ctx, struct msg_data *msg, unsigned long arg, size_t bytes) /* leftover */ return rd_bytes; +error: + perf_event_output_update_error_metric(MSG_OP_DATA, err); + return err; } FUNC_LOCAL long diff --git a/src/security/file_security.bpf.c b/src/security/file_security.bpf.c deleted file mode 100644 index ec9b52b623ba6be6fd49e0c84099f2284e785227..0000000000000000000000000000000000000000 --- a/src/security/file_security.bpf.c +++ /dev/null @@ -1,190 +0,0 @@ -#include -#include -#include -#include -#include "../coolbpf.h" - -#include "int_maps.h" -#include "filter.h" -#include "type.h" -#include "process.h" -#include "addr_lpm_maps.h" -#include "string_maps.h" -#include "bpf_exit.h" -#include "tailcall_stack.h" -#include "../ebpf_log.h" - -struct -{ - __uint(type, BPF_MAP_TYPE_PROG_ARRAY); - __uint(max_entries, 3); - __uint(key_size, sizeof(__u32)); - __uint(value_size, sizeof(__u32)); -} secure_tailcall_map SEC(".maps"); - -BPF_PERCPU_ARRAY(tailcall_stack, struct secure_tailcall_stack, 1); - -static inline __attribute__((always_inline)) u32 str_len(const char *str) -{ - u32 len = 0; -#pragma unroll - for (int i = 0; i < SYSAK_SECURE_MAX_PATH_LENGTH_LIMIT; i++) - { - if (str[i] == '\0') - break; - len++; - } - return len; -} - -static inline __attribute__((always_inline)) long copy_path(char *args, const struct path *arg) -{ - int *s = (int *)args; - int size = 0, flags = 0; - char *buffer; - void *curr = &args[4]; - umode_t i_mode; - buffer = d_path_local(arg, &size, &flags); - if (!buffer) - return 0; - // tips: path size between 0~255 - asm volatile("%[size] &= 0xff;\n" ::[size] "+r"(size) - :); - bpf_probe_read(curr, size, buffer); - *s = size; - size += 4; - BPF_CORE_READ_INTO(&i_mode, arg, dentry, d_inode, i_mode); - /* - * the format of the path is: - * ----------------------------------------- - * | 4 bytes | N bytes | 4 bytes | 2 bytes | - * | pathlen | path | flags | mode | - * ----------------------------------------- - * Next we set up the flags. - */ - asm volatile goto( - "r1 = *(u64 *)%[pid];\n" - "r7 = *(u32 *)%[offset];\n" - "if r7 s< 0 goto %l[a];\n" - "if r7 s> 1188 goto %l[a];\n" - "r1 += r7;\n" - "r2 = *(u32 *)%[flags];\n" - "*(u32 *)(r1 + 0) = r2;\n" - "r2 = *(u16 *)%[mode];\n" - "*(u16 *)(r1 + 4) = r2;\n" - : - : [pid] "m"(args), [flags] "m"(flags), [offset] "+m"(size), [mode] "m"(i_mode) - : "r0", "r1", "r2", "r7", "memory" - : a); -a: - size += sizeof(u32) + sizeof(u16); // for the flags + i_mode - return size; -} - -SEC("kprobe/security_file_permission") -int kprobe_security_file_permission(struct pt_regs *ctx) -{ - BPF_DEBUG("[kprobe][kprobe_security_file_permission] enter security_file_permission."); - __u32 zero = 0; - struct secure_tailcall_stack* stack = NULL; - stack = bpf_map_lookup_elem(&tailcall_stack, &zero); - if (!stack) return 0; - memset(stack, 0, sizeof(stack)); - struct file *file = (struct file *)PT_REGS_PARM1(ctx); - const struct path *path_arg = 0; - path_arg = _(&file->f_path); - long ret = copy_path(stack->file_data.path, path_arg); - int path_len = *(int *)stack->file_data.path; - const u32 flag_prefix = 4 + path_len; - int flag = -1; - if (flag_prefix < 2000 && flag_prefix >= 0) bpf_probe_read(&flag, 4, stack->file_data.path + flag_prefix); - const u32 mode_prefix = 8 + path_len; - short mode = -1; - if (mode_prefix < 2000 && mode_prefix >= 0) bpf_probe_read(&mode, 2, stack->file_data.path + mode_prefix); - BPF_DEBUG("[kprobe][tailcall][permission] before ~ stack path length:%d, ret:%lld, flag:%d", path_len, ret, flag); - BPF_DEBUG("[kprobe][tailcall][permission] before ~ stack path+4:%s, mode:%d", &stack->file_data.path[4], mode); - - __u32 pid = bpf_get_current_pid_tgid() >> 32; - struct execve_map_value *enter; - enter = execve_map_get_noinit(pid); - if (!enter || enter->key.ktime == 0) - { - BPF_DEBUG("[kprobe][tailcall][permission] no init!!! return! stack path:%s, pid:%u", stack->file_data.path, pid); - BPF_DEBUG("[kprobe][tailcall][permission] no init!!! return! stack path+4:%s, pid:%u", &stack->file_data.path[4], pid); - return 0; - } - BPF_DEBUG("[kprobe][kprobe_security_file_permission] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); - stack->func = SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_FILE_PERMISSION; - stack->file_data.func = TRACEPOINT_FUNC_SECURITY_FILE_PERMISSION; - stack->file_data.key = enter->key; - stack->file_data.pkey = enter->pkey; - stack->file_data.timestamp = bpf_ktime_get_ns(); - bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); - return 0; -} - -SEC("kprobe/security_mmap_file") -int kprobe_security_mmap_file(struct pt_regs *ctx) -{ - BPF_DEBUG("[kprobe][security_mmap_file] enter security_mmap_file."); - __u32 zero = 0; - struct secure_tailcall_stack* stack = NULL; - stack = bpf_map_lookup_elem(&tailcall_stack, &zero); - if (!stack) return 0; - memset(stack, 0, sizeof(stack)); - struct file *file = (struct file *)PT_REGS_PARM1(ctx); - const struct path *path_arg = 0; - path_arg = _(&file->f_path); - long ret = copy_path(stack->file_data.path, path_arg); - int path_len = *(int *)stack->file_data.path; - BPF_DEBUG("[kprobe][tailcall][mmap] before ~ stack path length:%s, ret:%lld", path_len, ret); - BPF_DEBUG("[kprobe][tailcall][mmap] before ~ stack path+4:%s", &stack->file_data.path[4]); - - __u32 pid = bpf_get_current_pid_tgid() >> 32; - struct execve_map_value *enter; - enter = execve_map_get_noinit(pid); - if (!enter || enter->key.ktime == 0) - { - return 0; - } - BPF_DEBUG("[kprobe][security_mmap_file] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); - - stack->func = SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_MMAP_FILE; - stack->file_data.func = TRACEPOINT_FUNC_SECURITY_MMAP_FILE; - stack->file_data.key = enter->key; - stack->file_data.pkey = enter->pkey; - stack->file_data.timestamp = bpf_ktime_get_ns(); - - bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); - return 0; -} - -SEC("kprobe/security_path_truncate") -int kprobe_security_path_truncate(struct pt_regs *ctx) -{ - BPF_DEBUG("[kprobe][security_path_truncate] enter security_path_truncate."); - __u32 pid = bpf_get_current_pid_tgid() >> 32; - struct execve_map_value *enter; - enter = execve_map_get_noinit(pid); - if (!enter || enter->key.ktime == 0) - { - return 0; - } - BPF_DEBUG("[kprobe][security_path_truncate] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); - __u32 zero = 0; - struct secure_tailcall_stack* stack = NULL; - stack = bpf_map_lookup_elem(&tailcall_stack, &zero); - if (!stack) return 0; - memset(stack, 0, sizeof(stack)); - stack->func = SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_PATH_TRUNCATE; - stack->file_data.func = TRACEPOINT_FUNC_SECURITY_PATH_TRUNCATE; - stack->file_data.key = enter->key; - stack->file_data.pkey = enter->pkey; - stack->file_data.timestamp = bpf_ktime_get_ns(); - struct path *path = (struct path *)PT_REGS_PARM1(ctx); - const struct path *path_arg = 0; - path_arg = _(path); - copy_path(stack->file_data.path, path_arg); - bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); - return 0; -} \ No newline at end of file diff --git a/src/security/network_security.bpf.c b/src/security/network_security.bpf.c deleted file mode 100644 index b93465384f0ebf2e05b86efcfd5c0f6a4aaacc6e..0000000000000000000000000000000000000000 --- a/src/security/network_security.bpf.c +++ /dev/null @@ -1,232 +0,0 @@ -#include -#include -#include -#include -#include "../coolbpf.h" - -#include "int_maps.h" -#include "filter.h" -#include "type.h" -#include "process.h" -#include "addr_lpm_maps.h" -#include "string_maps.h" -#include "bpf_exit.h" -#include "tailcall_stack.h" -#include "../ebpf_log.h" - -BPF_PERCPU_ARRAY(sock_secure_data_heap, struct tcp_data_t, 1); -BPF_PERCPU_ARRAY(tailcall_stack, struct secure_tailcall_stack, 1); - -struct -{ - __uint(type, BPF_MAP_TYPE_PROG_ARRAY); - __uint(max_entries, 3); - __uint(key_size, sizeof(__u32)); - __uint(value_size, sizeof(__u32)); -} secure_tailcall_map SEC(".maps"); - -static __always_inline u16 bpf_core_sock_sk_protocol_ak(struct sock *sk) -{ - return (u16)BPF_CORE_READ_BITFIELD_PROBED(sk, sk_protocol); -} - -static __always_inline u32 get_netns(struct sock *sk) { - return BPF_CORE_READ(sk, __sk_common.skc_net.net, ns.inum); -} - -// int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) -SEC("kprobe/tcp_sendmsg") -int BPF_KPROBE(kprobe_tcp_sendmsg, struct sock *sk, struct msghdr *msg, size_t size) -{ - __u32 pid = bpf_get_current_pid_tgid() >> 32; - struct execve_map_value *enter; - enter = execve_map_get_noinit(pid); - if (!enter || enter->key.ktime == 0) { - BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg] pid:%u never enter. skip collect", pid); - return 0; - } - BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); - - // define event - __u32 zero = 0; - struct tcp_data_t* data = NULL; - data = bpf_map_lookup_elem(&sock_secure_data_heap, &zero); - if (!data) return 0; - memset(data, 0, sizeof(data)); - - data->func = TRACEPOINT_FUNC_TCP_SENDMSG; - data->key = enter->key; - data->pkey = enter->pkey; - - struct inet_sock *inet = (struct inet_sock *)sk; - data->timestamp = bpf_ktime_get_ns(); - unsigned int daddr = BPF_CORE_READ(sk, __sk_common.skc_daddr); - data->daddr = bpf_htonl(daddr); - unsigned short dport = BPF_CORE_READ(sk, __sk_common.skc_dport); - data->dport = bpf_htons(dport); - unsigned int saddr = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr); - data->saddr = bpf_htonl(saddr); - unsigned short sport = BPF_CORE_READ(inet, inet_sport); - data->sport = bpf_htons(sport); - data->state = BPF_CORE_READ(sk, __sk_common.skc_state); - data->family = BPF_CORE_READ(sk, __sk_common.skc_family); - data->net_ns = get_netns(sk); - data->protocol = bpf_core_sock_sk_protocol_ak(sk); - data->bytes = size; - - struct secure_tailcall_stack* stack = NULL; - stack = bpf_map_lookup_elem(&tailcall_stack, &zero); - if (!stack) return 0; - memset(stack, 0, sizeof(stack)); - stack->func = SECURE_FUNC_TRACEPOINT_FUNC_TCP_SENDMSG; - stack->tcp_data.func = TRACEPOINT_FUNC_TCP_SENDMSG; - stack->tcp_data.key = enter->key; - stack->tcp_data.pkey = enter->pkey; - stack->tcp_data.timestamp = bpf_ktime_get_ns(); - stack->tcp_data.daddr = daddr; - stack->tcp_data.dport = bpf_htons(dport); - stack->tcp_data.saddr = saddr; - stack->tcp_data.sport = bpf_htons(sport); - stack->tcp_data.state = BPF_CORE_READ(sk, __sk_common.skc_state); - stack->tcp_data.family = BPF_CORE_READ(sk, __sk_common.skc_family); - stack->tcp_data.net_ns = get_netns(sk); - stack->tcp_data.protocol = bpf_core_sock_sk_protocol_ak(sk); - stack->tcp_data.bytes = size; - BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, daddr:%u, family:%u", - stack->tcp_data.saddr, stack->tcp_data.daddr, data->family); - BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, sport:%u, state:%u", - stack->tcp_data.daddr, stack->tcp_data.sport, data->state); - - - bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); - return 0; -} - -// void tcp_close(struct sock *sk, long timeout); -SEC("kprobe/tcp_close") -int BPF_KPROBE(kprobe_tcp_close, struct sock *sk) -{ - __u32 pid = bpf_get_current_pid_tgid() >> 32; - struct execve_map_value *enter; - enter = execve_map_get_noinit(pid); - if (!enter || enter->key.ktime == 0) { - BPF_DEBUG("[kprobe][kprobe_tcp_close] pid:%u never enter. skip collect", pid); - return 0; - } - BPF_DEBUG("[kprobe][kprobe_tcp_close] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); - - __u32 zero = 0; - struct tcp_data_t* data = NULL; - data = bpf_map_lookup_elem(&sock_secure_data_heap, &zero); - if (!data) return 0; - memset(data, 0, sizeof(data)); - - data->func = TRACEPOINT_FUNC_TCP_CLOSE; - data->key = enter->key; - data->pkey = enter->pkey; - struct inet_sock *inet = (struct inet_sock *)sk; - data->timestamp = bpf_ktime_get_ns(); - unsigned int daddr = BPF_CORE_READ(sk, __sk_common.skc_daddr); - data->daddr = bpf_htonl(daddr); - unsigned short dport = BPF_CORE_READ(sk, __sk_common.skc_dport); - data->dport = bpf_htons(dport); - unsigned int saddr = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr); - data->saddr = bpf_htonl(saddr); - unsigned short sport = BPF_CORE_READ(inet, inet_sport); - data->sport = bpf_htons(sport); - data->state = BPF_CORE_READ(sk, __sk_common.skc_state); - data->family = BPF_CORE_READ(sk, __sk_common.skc_family); - data->net_ns = get_netns(sk); - data->protocol = bpf_core_sock_sk_protocol_ak(sk); - - struct secure_tailcall_stack* stack = NULL; - stack = bpf_map_lookup_elem(&tailcall_stack, &zero); - if (!stack) return 0; - memset(stack, 0, sizeof(stack)); - stack->func = SECURE_FUNC_TRACEPOINT_FUNC_TCP_CLOSE; - stack->tcp_data.func = TRACEPOINT_FUNC_TCP_CLOSE; - stack->tcp_data.key = enter->key; - stack->tcp_data.pkey = enter->pkey; - stack->tcp_data.timestamp = bpf_ktime_get_ns(); - stack->tcp_data.daddr = daddr; - stack->tcp_data.dport = bpf_htons(dport); - stack->tcp_data.saddr = saddr; - stack->tcp_data.sport = bpf_htons(sport); - stack->tcp_data.state = BPF_CORE_READ(sk, __sk_common.skc_state); - stack->tcp_data.family = BPF_CORE_READ(sk, __sk_common.skc_family); - stack->tcp_data.net_ns = get_netns(sk); - stack->tcp_data.protocol = bpf_core_sock_sk_protocol_ak(sk); - BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, sport:%u, family:%u", - stack->tcp_data.saddr, stack->tcp_data.sport, data->family); - BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, dport:%u, state:%u", - stack->tcp_data.daddr, stack->tcp_data.dport, data->state); - - - bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); - return 0; -} - -// -SEC("kprobe/tcp_connect") -int BPF_KPROBE(kprobe_tcp_connect, struct sock *sk) { - __u32 pid = bpf_get_current_pid_tgid() >> 32; - struct execve_map_value *enter; - enter = execve_map_get_noinit(pid); - if (!enter || enter->key.ktime == 0) { - BPF_DEBUG("[kprobe][kprobe_tcp_connect] pid:%u never enter. skip collect", pid); - return 0; - } - BPF_DEBUG("[kprobe][kprobe_tcp_connect] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); - - __u32 zero = 0; - struct tcp_data_t* data = NULL; - data = bpf_map_lookup_elem(&sock_secure_data_heap, &zero); - if (!data) return 0; - memset(data, 0, sizeof(data)); - - data->func = TRACEPOINT_FUNC_TCP_CONNECT; - data->key = enter->key; - data->pkey = enter->pkey; - - struct inet_sock *inet = (struct inet_sock *)sk; - data->timestamp = bpf_ktime_get_ns(); - unsigned int daddr = BPF_CORE_READ(sk, __sk_common.skc_daddr); - data->daddr = bpf_htonl(daddr); - unsigned short dport = BPF_CORE_READ(sk, __sk_common.skc_dport); - data->dport = bpf_htons(dport); - unsigned int saddr = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr); - data->saddr = bpf_htonl(saddr); - unsigned short sport = BPF_CORE_READ(inet, inet_sport); - data->sport = bpf_htons(sport); - data->state = BPF_CORE_READ(sk, __sk_common.skc_state); - data->family = BPF_CORE_READ(sk, __sk_common.skc_family); - data->net_ns = get_netns(sk); - data->protocol = bpf_core_sock_sk_protocol_ak(sk); - - - struct secure_tailcall_stack* stack = NULL; - stack = bpf_map_lookup_elem(&tailcall_stack, &zero); - if (!stack) return 0; - memset(stack, 0, sizeof(stack)); - stack->func = SECURE_FUNC_TRACEPOINT_FUNC_TCP_CONNECT; - stack->tcp_data.func = TRACEPOINT_FUNC_TCP_CONNECT; - stack->tcp_data.key = enter->key; - stack->tcp_data.pkey = enter->pkey; - stack->tcp_data.timestamp = bpf_ktime_get_ns(); - stack->tcp_data.daddr = daddr; - stack->tcp_data.dport = bpf_htons(dport); - stack->tcp_data.saddr = saddr; - stack->tcp_data.sport = bpf_htons(sport); - stack->tcp_data.state = BPF_CORE_READ(sk, __sk_common.skc_state); - stack->tcp_data.family = BPF_CORE_READ(sk, __sk_common.skc_family); - stack->tcp_data.net_ns = get_netns(sk); - stack->tcp_data.protocol = bpf_core_sock_sk_protocol_ak(sk); - BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, sport:%u, family:%u", - stack->tcp_data.saddr, stack->tcp_data.sport, data->family); - BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, dport:%u, state:%u", - stack->tcp_data.daddr, stack->tcp_data.dport, data->state); - - - bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); - return 0; -} diff --git a/src/security/process.h b/src/security/process.h index 733b1c5f5b6c21b49891c46e96733931799de616..bc5b29a1f6a84eac8eeed64f78f31d54f5c35c1b 100644 --- a/src/security/process.h +++ b/src/security/process.h @@ -17,310 +17,6 @@ #include "type.h" #include "bpf_process_event_type.h" -// -///* Max number of args to parse */ -//#define MAXARGS 20 -///* Max length of any given arg */ -//#define MAXARGLENGTH 256 -///* This is the absolute buffer size for args and filenames including some -// * extra head room so we can append last args string to buffer. The extra -// * headroom is an unfortunate result of bounds on offset/size in -// * event_args_builder(). -// * -// * For example given an offset bounds -// * -// * offset <- (0, 100) -// * -// * We will read into the buffer using this offset giving a max offset -// * of eargs + 100. -// * -// * args[offset] <- (0, 100) -// * -// * Now we want to read this with call 45 aka bpf_probe_read_str as follows, -// * where 'kernel_struct_arg' is the kernel data struct we are reading. -// * -// * bpf_probe_read_str(args[offset], size, kernel_struct_arg) -// * -// * But we have a bit of a problem determining if 'size' is out of array -// * range. The math would be, -// * -// * size = length - offset -// * -// * Giving the remainder of the buffer, -// * -// * args offset length -// * |---------------|------------------| -// * -// * |-------size-------| -// * -// * But verifier math works on bounds so bounds analysis of size is the -// * following, -// * -// * length = 1024 -// * offset = (0, 100) -// * -// * size = length - offset -// * size = (1024) - (0, 100) -// * size <- (924, 1124) -// * -// * And verifier throws an error because args[offset + size] with bounds -// * anaylsis, -// * -// * args_(max)[100 + 1024] = args_(max)[1124] -// * -// * To circumvent this, at least until we teach the verifier about -// * dependent variables, create a maxarg value and pad arg buffer with -// * it. Giving a args buffer of size 'length + pad' with above bounds -// * analysis, -// * -// * size = length - offset -// * size = (1024) - (0, 100) -// * if size > pad goto done -// * size <- (924, 1124) // 1124 < length + pad -// * -// * Phew all clear now? -// */ -//#define CWD_MAX 256 -//#define BUFFER 1024 -//#define SIZEOF_EVENT 56 -//#define PADDED_BUFFER \ -// (BUFFER + MAXARGLENGTH + SIZEOF_EVENT + SIZEOF_EVENT + CWD_MAX) -///* This is the usable buffer size for args and filenames. It is calculated -// * as the (BUFFER SIZE - sizeof(parent) - sizeof(curr) but unfortunately -// * preprocess doesn't know types so we do it manually without sizeof(). -// */ -//#define ARGSBUFFER (BUFFER - SIZEOF_EVENT - SIZEOF_EVENT) -//#define __ASM_ARGSBUFFER 976 -//#define ARGSBUFFERMASK (ARGSBUFFER - 1) -//#define MAXARGMASK (MAXARG - 1) -//#define PATHNAME_SIZE 256 -// -///* Task flags */ -//#ifndef PF_KTHREAD -//#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ -//#endif -// -///* Msg flags */ -//#define EVENT_UNKNOWN 0x00 -//#define EVENT_EXECVE 0x01 -//#define EVENT_EXECVEAT 0x02 -//#define EVENT_PROCFS 0x04 -//#define EVENT_TRUNC_FILENAME 0x08 -//#define EVENT_TRUNC_ARGS 0x10 -//#define EVENT_TASK_WALK 0x20 -//#define EVENT_MISS 0x40 -//#define EVENT_NEEDS_AUID 0x80 -//#define EVENT_ERROR_FILENAME 0x100 -//#define EVENT_ERROR_ARGS 0x200 -//#define EVENT_NEEDS_CWD 0x400 -//#define EVENT_NO_CWD_SUPPORT 0x800 -//#define EVENT_ROOT_CWD 0x1000 -//#define EVENT_ERROR_CWD 0x2000 -//#define EVENT_CLONE 0x4000 -//#define EVENT_ERROR_SOCK 0x8000 -//#define EVENT_ERROR_CGROUP_NAME 0x010000 -//#define EVENT_ERROR_CGROUP_KN 0x020000 -//#define EVENT_ERROR_CGROUP_SUBSYSCGRP 0x040000 -//#define EVENT_ERROR_CGROUP_SUBSYS 0x080000 -//#define EVENT_ERROR_CGROUPS 0x100000 -//#define EVENT_ERROR_CGROUP_ID 0x200000 -//#define EVENT_ERROR_PATH_COMPONENTS 0x400000 -//#define EVENT_DATA_FILENAME 0x800000 -//#define EVENT_DATA_ARGS 0x1000000 -// -//#define EVENT_COMMON_FLAG_CLONE 0x01 -// -///* Docker IDs are unique at first 12 characters, but we want to get -// * 12chars plus any extra prefix used by the container environment. -// * Minikube for example prepends 'docker-' to the id. So lets copy -// * 32B and assume at least 12B of it is ID info. -// */ -//#define DOCKER_ID_LENGTH 128 -// -//struct msg_execve_key { -// __u32 pid; // Process TGID -// __u8 pad[4]; -// __u64 ktime; -//}; // All fields aligned so no 'packed' attribute. -// -///* This is the struct stored in bpf map to share info between -// * different execve hooks. -// */ -//struct execve_info { -// /* The secureexec is to reflect the kernel bprm->secureexec that is exposed -// * to userspace through auxiliary vector which can be read from -// * /proc/self/auxv or https://man7.org/linux/man-pages/man3/getauxval.3.html -// * -// * The AT_SECURE of auxv can have a value of 1 or 0 and it is set from -// * the bprm->secureexec that is a bit field. -// * If bprm->secureexec is 1 then it means executable should be treated securely. -// * Most commonly, 1 indicates that the process is executing a set-user-ID -// * or set-group-ID binary (so that its real and effective UIDs or GIDs differ -// * from one another), or that it gained capabilities by executing a binary file -// * that has capabilities (see capabilities(7)). -// * Alternatively, a nonzero value may be triggered by a Linux Security Module. -// * When this value is nonzero, the dynamic linker disables the use of certain -// * environment variables. -// * -// * The secureexec here can have the following bit flags: -// * EXEC_SETUID or EXEC_SETGID -// */ -// __u32 secureexec; -// __u32 i_nlink; /* inode links */ -// __u64 i_ino; /* inode number */ -//}; -// -///* process information -// * -// * Manually linked to ARGSBUFFER and PADDED_BUFFER if this changes then please -// * also change SIZEOF_EVENT. -// */ -//struct msg_process { -// __u32 size; -// __u32 pid; // Process TGID -// __u32 tid; // Process thread -// __u32 nspid; -// __u32 secureexec; -// __u32 uid; -// __u32 auid; -// __u32 flags; -// __u32 i_nlink; -// __u32 pad; -// __u64 i_ino; -// __u64 ktime; -// char *args; -//}; // All fields aligned so no 'packed' attribute. -// -///* msg_clone_event holds only the necessary fields to construct a new entry from -// * the parent after a clone() event. -// */ -//struct msg_clone_event { -// struct msg_common common; -// struct msg_execve_key parent; -// __u32 tgid; -// __u32 tid; -// __u32 nspid; -// __u32 flags; -// __u64 ktime; -//} __attribute__((packed)); -// -//struct exit_info { -// __u32 code; -// __u32 tid; // Thread ID -//}; -// -//struct msg_exit { -// struct msg_common common; -// struct msg_execve_key current; -// struct exit_info info; -//}; // All fields aligned so no 'packed' attribute. -// -//enum { -// ns_uts = 0, -// ns_ipc = 1, -// ns_mnt = 2, -// ns_pid = 3, -// ns_pid_for_children = 4, -// ns_net = 5, -// ns_time = 6, -// ns_time_for_children = 7, -// ns_cgroup = 8, -// ns_user = 9, -// -// // If you update the value of ns_max_types you -// // should also update parseMatchNamespaces() -// // in kernel.go -// ns_max_types = 10, -//}; -// -//struct msg_ns { -// union { -// struct { -// __u32 uts_inum; -// __u32 ipc_inum; -// __u32 mnt_inum; -// __u32 pid_inum; -// __u32 pid_for_children_inum; -// __u32 net_inum; -// __u32 time_inum; -// __u32 time_for_children_inum; -// __u32 cgroup_inum; -// __u32 user_inum; -// }; -// __u32 inum[ns_max_types]; -// }; -//}; // All fields aligned so no 'packed' attribute. -// -//struct msg_k8s { -// __u32 net_ns; -// __u32 cid; -// __u64 cgrpid; -// char docker_id[DOCKER_ID_LENGTH]; -//}; // All fields aligned so no 'packed' attribute. -// -//#define BINARY_PATH_MAX_LEN 256 -// -//struct heap_exe { -// // because of verifier limitations, this has to be 2 * 256 bytes while 256 -// // should be theoretically sufficient, and actually is, in unit tests. -// char buf[BINARY_PATH_MAX_LEN * 2]; -// // offset points to the start of the path in the above buffer. Use offset to -// // read the path in the buffer since it's written from the end. -// char *off; -// __u32 len; -// __u32 error; -//}; // All fields aligned so no 'packed' attribute. -// -//struct msg_execve_event { -// struct msg_common common; -// struct msg_k8s kube; -// struct msg_execve_key parent; -// __u64 parent_flags; -// struct msg_cred creds; -// struct msg_ns ns; -// struct msg_execve_key cleanup_key; -// /* if add anything above please also update the args of -// * validate_msg_execve_size() in bpf_execve_event.c */ -// union { -// struct msg_process process; -// char buffer[PADDED_BUFFER]; -// }; -// /* below fields are not part of the event, serve just as -// * heap for execve programs -// */ -//#ifdef __LARGE_BPF_PROG -// struct heap_exe exe; -//#endif -//}; // All fields aligned so no 'packed' attribute. -// -//// This structure stores the binary path that was recorded on execve. -//// Technically PATH_MAX is 4096 but we limit the length we store since we have -//// limits on the length of the string to compare: -//// - Artificial limits for full string comparison. -//// - Technical limits for prefix and postfix, using LPM_TRIE that have a 256 -//// bytes size limit. -//struct binary { -// // length of the path stored in path, this should be < BINARY_PATH_MAX_LEN -// // but can contain negative value in case of copy error. -// // While s16 would be sufficient, 64 bits are handy for alignment. -// __s64 path_length; -// // BINARY_PATH_MAX_LEN first bytes of the path -// char path[BINARY_PATH_MAX_LEN]; -//}; // All fields aligned so no 'packed' attribute -// -//// The execve_map_value is tracked by the TGID of the thread group -//// the msg_execve_key.pid. The thread IDs are recorded on the -//// fly and sent with every corresponding event. -//struct execve_map_value { -// struct msg_execve_key key; -// struct msg_execve_key pkey; -// __u32 flags; -// __u32 nspid; -// struct msg_ns ns; -// struct msg_capabilities caps; -// struct binary bin; -//} __attribute__((packed)) __attribute__((aligned(8))); - struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(max_entries, 1); @@ -588,20 +284,23 @@ struct { __uint(max_entries, 1); } tg_stats_map SEC(".maps"); -FUNC_INLINE void -perf_event_output_metric(void *ctx, u8 metric, void *map, u64 flags, void *data, u64 size) -{ +FUNC_INLINE void perf_event_output_update_error_metric(u8 msg_op, long err) { struct kernel_stats *valp; __u32 zero = 0; - long err; - err = bpf_perf_event_output(ctx, map, flags, data, size); - if (err < 0) { - valp = bpf_map_lookup_elem(&tg_stats_map, &zero); - if (valp) - __sync_fetch_and_add(&valp->sent_failed[metric], 1); + valp = bpf_map_lookup_elem(&tg_stats_map, &zero); + if (valp) { + __sync_fetch_and_add(&valp->sent_failed[msg_op], 1); } } +FUNC_INLINE void perf_event_output_metric(void *ctx, u8 msg_op, void *map, + u64 flags, void *data, u64 size) { + long err; + + err = bpf_perf_event_output(ctx, map, flags, data, size); + if (err < 0) + perf_event_output_update_error_metric(msg_op, err); +} #endif //SYSAK_PROCESS_H diff --git a/src/security/security.bpf.c b/src/security/security.bpf.c index f167589fd946188602626329f92edd27b5defec8..f4337bbc85606253c73f1db42c4abe6526cf1514 100644 --- a/src/security/security.bpf.c +++ b/src/security/security.bpf.c @@ -99,7 +99,6 @@ read_args(void *ctx, struct msg_execve_event *event) off = bpf_probe_read_str(&heap->maxpath, 4096, (char *)start_stack); if (off < 0) return 0; - BPF_DEBUG("[read_args] pid:%llu, args:%s", p->pid, heap->maxpath); start_stack += off; @@ -144,7 +143,6 @@ read_path(void *ctx, struct msg_execve_event *event, void *filename) earg = (void *)p + offsetof(struct msg_process, args); size = bpf_probe_read_str(earg, MAXARGLENGTH - 1, filename); - BPF_DEBUG("[read_path] pid:%llu, path:%s", p->pid, earg); if (size < 0) { flags |= EVENT_ERROR_FILENAME; size = 0; @@ -205,11 +203,22 @@ read_exe(struct task_struct *task, struct heap_exe *exe) struct file *file = BPF_CORE_READ(task, mm, exe_file); struct path *path = __builtin_preserve_access_index(&file->f_path); - exe->len = BINARY_PATH_MAX_LEN; - exe->off = (char *)&exe->buf; - exe->off = __d_path_local(path, exe->off, (int *)&exe->len, (int *)&exe->error); - if (exe->len > 0) - exe->len = BINARY_PATH_MAX_LEN - exe->len; + // we need to walk the complete 4096 len dentry in order to have an accurate + // matching on the prefix operators, even if we only keep a subset of that + char *buffer; + + buffer = d_path_local(path, (int *)&exe->len, (int *)&exe->error); + if (!buffer) + return 0; + + // buffer used by d_path_local can contain up to MAX_BUF_LEN i.e. 4096 we + // only keep the first 255 chars for our needs (we sacrifice one char to the + // verifier for the > 0 check) + if (exe->len > 255) + exe->len = 255; + asm volatile("%[len] &= 0xff;\n" + : [len] "+r"(exe->len)); + probe_read(exe->buf, exe->len, buffer); return exe->len; } @@ -219,11 +228,8 @@ read_exe(struct task_struct *task, struct heap_exe *exe) SEC("kprobe/wake_up_new_task") int BPF_KPROBE(event_wake_up_new_task, struct task_struct *task) { - __u32 cpid = bpf_get_current_pid_tgid() >> 32; - BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u enter~", cpid); struct execve_map_value *curr, *parent; struct msg_clone_event msg; - struct msg_capabilities caps; u64 msg_size = sizeof(struct msg_clone_event); struct msg_k8s kube; u32 tgid = 0; @@ -232,7 +238,6 @@ int BPF_KPROBE(event_wake_up_new_task, struct task_struct *task) return 0; tgid = BPF_CORE_READ(task, tgid); - BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u read tgid:%u ~", cpid, tgid); /* Do not try to create any msg or calling execve_map_get * (that will add a new process in the execve_map) if we @@ -241,22 +246,20 @@ int BPF_KPROBE(event_wake_up_new_task, struct task_struct *task) parent = __event_find_parent(task); if (!parent) return 0; - BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u tgid:%u has parent.", cpid, tgid); + curr = execve_map_get(tgid); if (!curr) return 0; - BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u tgid:%u new event in execve_map.", cpid, tgid); + /* Generate an EVENT_COMMON_FLAG_CLONE event once per process, * that is, thread group. */ if (curr->key.ktime != 0) return 0; - BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u tgid:%u begin init event.", cpid, tgid); /* Setup the execve_map entry. */ curr->flags = EVENT_COMMON_FLAG_CLONE; curr->key.pid = tgid; - // curr->key.ktime = get_start_time(); curr->key.ktime = bpf_ktime_get_ns(); curr->nspid = get_task_pid_vnr(); memcpy(&curr->bin, &parent->bin, sizeof(curr->bin)); @@ -266,10 +269,17 @@ int BPF_KPROBE(event_wake_up_new_task, struct task_struct *task) * before the execve hook point if they changed or not. * This needs to be converted later to credentials. */ - get_current_subj_caps(&caps, task); - curr->caps.permitted = caps.permitted; - curr->caps.effective = caps.effective; - curr->caps.inheritable = caps.inheritable; + get_current_subj_caps(&curr->caps, task); + + /* Store the thread leader namespaces so we can check later + * before the execve hook point if they changed or not. + */ + get_namespaces(&curr->ns, task); + + /* Set EVENT_IN_INIT_TREE flag on the process if its parent is in a + * container's init tree or if it has nspid=1. + */ + set_in_init_tree(curr, parent); /* Setup the msg_clone_event and sent to the user. */ msg.common.op = MSG_OP_CLONE; @@ -289,18 +299,13 @@ int BPF_KPROBE(event_wake_up_new_task, struct task_struct *task) __event_get_cgroup_info(task, &kube); - BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u tgid:%u init event done.", cpid, tgid); - if (cgroup_rate(ctx, &kube, msg.ktime)) { - BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u tgid:%u begin submit clone event.", cpid, tgid); - perf_event_output_metric(ctx, MSG_OP_CLONE, &tcpmon_map, - BPF_F_CURRENT_CPU, &msg, msg_size); + perf_event_output_metric(ctx, MSG_OP_CLONE, &tcpmon_map, BPF_F_CURRENT_CPU, &msg, msg_size); } return 0; } -////__attribute__((section("tracepoint/sys_execve"), used)) int SEC("tracepoint/sched/sched_process_exec") int event_execve(struct trace_event_raw_sched_process_exec *ctx) { @@ -338,21 +343,24 @@ int event_execve(struct trace_event_raw_sched_process_exec *ctx) p->ktime = bpf_ktime_get_ns(); p->size = offsetof(struct msg_process, args); p->auid = get_auid(); - p->uid = bpf_get_current_uid_gid(); read_execve_shared_info(ctx, p, pid); p->size += read_path(ctx, event, filename); p->size += read_args(ctx, event); p->size += read_cwd(ctx, p); - BPF_DEBUG("[event_execve] enter pid:%llu, filename:%s", p->pid, filename); event->common.op = MSG_OP_EXECVE; event->common.ktime = p->ktime; event->common.size = offsetof(struct msg_execve_event, process) + p->size; - BPF_CORE_READ_INTO(&event->kube.net_ns, task, nsproxy, net_ns, ns.inum); - get_current_subj_creds(&event->creds, task); + /** + * Instead of showing the task owner, we want to display the effective + * uid that is used to calculate the privileges of current task when + * acting upon other objects. This allows to be compatible with the 'ps' + * tool that reports snapshot of current processes. + */ + p->uid = event->creds.euid; get_namespaces(&event->ns, task); p->flags |= __event_get_cgroup_info(task, &event->kube); @@ -388,7 +396,6 @@ int execve_rate(void *ctx) SEC("tracepoint/1") int execve_send(void *ctx) { - BPF_DEBUG("[execve_send] enter ~"); struct msg_execve_event *event; struct execve_map_value *curr; struct msg_process *p; @@ -432,7 +439,13 @@ int execve_send(void *ctx) if (curr->flags & EVENT_COMMON_FLAG_CLONE) { event_set_clone(p); } - curr->flags = 0; + curr->flags &= ~EVENT_COMMON_FLAG_CLONE; + /* Set EVENT_IN_INIT_TREE flag on the process if nspid=1. + */ + set_in_init_tree(curr, NULL); + if (curr->flags & EVENT_IN_INIT_TREE) { + event->process.flags |= EVENT_IN_INIT_TREE; + } #ifdef __NS_CHANGES_FILTER if (init_curr) memcpy(&(curr->ns), &(event->ns), @@ -451,7 +464,7 @@ int execve_send(void *ctx) #ifdef __LARGE_BPF_PROG // read from proc exe stored at execve time if (event->exe.len <= BINARY_PATH_MAX_LEN) { - curr->bin.path_length = bpf_probe_read(curr->bin.path, event->exe.len, event->exe.off); + curr->bin.path_length = bpf_probe_read(curr->bin.path, event->exe.len, event->exe.buf); if (curr->bin.path_length == 0) curr->bin.path_length = event->exe.len; } @@ -473,9 +486,7 @@ int execve_send(void *ctx) sizeof(struct msg_execve_key) + sizeof(__u64) + sizeof(struct msg_cred) + sizeof(struct msg_ns) + sizeof(struct msg_execve_key) + p->size); -// BPF_DEBUG("[execve_send] before perf output ~"); perf_event_output_metric(ctx, MSG_OP_EXECVE, &tcpmon_map, BPF_F_CURRENT_CPU, event, size); -// BPF_DEBUG("[execve_send] after perf output ~"); return 0; } @@ -490,9 +501,7 @@ int event_exit_acct_process(struct pt_regs *ctx) { __u64 pid_tgid = bpf_get_current_pid_tgid(); __u32 pid = pid_tgid >> 32; - BPF_DEBUG("[kprobe][event_exit_acct_process] pid:%u enter~", pid); - event_exit_send(ctx, pid_tgid >> 32); - BPF_DEBUG("[kprobe][event_exit_acct_process] pid:%u send done ~", pid); + event_exit_send(ctx, pid); return 0; } @@ -511,8 +520,6 @@ int event_exit_disassociate_ctty(struct pt_regs *ctx) { int on_exit = (int)PT_REGS_PARM1_CORE(ctx); __u32 pid = bpf_get_current_pid_tgid() >> 32; - BPF_DEBUG("[kprobe][event_exit_disassociate_ctty] pid:%u enter~", pid); - if (on_exit) event_exit_send(ctx, pid); return 0; @@ -939,6 +946,7 @@ static inline __attribute__((always_inline)) long copy_path(char *args, const st if (!buffer) return 0; // tips: path size between 0~255 + if (size > 255) size = 255; asm volatile("%[size] &= 0xff;\n" ::[size] "+r"(size) :); bpf_probe_read(curr, size, buffer);