diff --git a/scripts/cmake/genskel.cmake b/scripts/cmake/genskel.cmake
index 34e487b10c19fd36d2aa4c35d86b71ed0c274747..60286d10bb15950dd262c02c107b0515db06f1dd 100644
--- a/scripts/cmake/genskel.cmake
+++ b/scripts/cmake/genskel.cmake
@@ -21,8 +21,8 @@ message(STATUS "Include Directories: ${include_dirs}")
 macro(genskel name)
     message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
     if (CMAKE_BUILD_TYPE MATCHES Debug)
-        add_definitions(-DBPF_DEBUG)
-        message(STATUS "add definition: -DBPF_DEBUG")
+        add_definitions(-COOLBPF_DEBUG)
+        message(STATUS "add definition: -DCOOLBPF_DEBUG")
     endif ()
     SET(BPF_C_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${name}.bpf.c)
     SET(BPF_O_FILE ${CMAKE_CURRENT_BINARY_DIR}/${name}.bpf.o)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index f2ddacdb5dc14aa114c896a8e8523b2d8cc8b28d..0365b5878e39310d381dfbe73571788d6328fda6 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -26,18 +26,18 @@ if(NOT DEFINED INSTALL_INCLUDE_DIR)
   set(INSTALL_INCLUDE_DIR ${CMAKE_INSTALL_PREFIX}/include)
 endif()
 
-add_subdirectory(bpf)
-add_subdirectory(security)
-if (ENABLE_PROFILE)
-  add_subdirectory(profiler)
-endif()
-
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/coolbpf.pc.in
                ${CMAKE_CURRENT_BINARY_DIR}/coolbpf.pc @ONLY)
 
 include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/bpf
                     ${CMAKE_CURRENT_BINARY_DIR}/bpf)
 
+add_subdirectory(bpf)
+add_subdirectory(security)
+if (ENABLE_PROFILE)
+  add_subdirectory(profiler)
+endif()
+
 file(GLOB sources ${PROJECT_SOURCE_DIR}/third/libbpf/src/*.c
      ${CMAKE_CURRENT_SOURCE_DIR}/*.c)
 # share library
diff --git a/src/ebpf_log.h b/src/ebpf_log.h
index a19324d395a68c76ff809212b03518833434552b..fdca4685f1d4c3feddfede20b1e1b0d6b8a8f40d 100644
--- a/src/ebpf_log.h
+++ b/src/ebpf_log.h
@@ -4,13 +4,10 @@
 
 /* Macro to output debug logs to /sys/kernel/debug/tracing/trace_pipe
  */
-#ifdef BPF_DEBUG
-#define BPF_DEBUG(fmt, ...)                                        \
-    ({                                                             \
-        char ____fmt[] = fmt;                                      \
-        bpf_trace_printk(____fmt, sizeof(____fmt), ##__VA_ARGS__); \
-    })
+#ifdef COOLBPF_DEBUG
+#include <bpf/bpf_tracing.h>
+#define BPF_DEBUG(__fmt, ...) bpf_printk(__fmt, ##__VA_ARGS__)
 #else
 // No op
-#define BPF_DEBUG(fmt, ...)
+#define BPF_DEBUG(__fmt, ...)
 #endif
\ No newline at end of file
diff --git a/src/security/bpf_common.h b/src/security/bpf_common.h
index 5551e455df1edf537ccd5c76a5324d12adfcaca7..63acceea50e827b5e4a0c55d71cf791ccc118a98 100644
--- a/src/security/bpf_common.h
+++ b/src/security/bpf_common.h
@@ -65,13 +65,6 @@ struct msg_test {
 #define BIT(nr)	    (1 << (nr))
 #define BIT_ULL(nr) (1ULL << (nr))
 
-#ifdef TETRAGON_BPF_DEBUG
-#include <bpf/bpf_tracing.h>
-#define DEBUG(__fmt, ...) bpf_printk(__fmt, ##__VA_ARGS__)
-#else
-#define DEBUG(__fmt, ...)
-#endif
-
 #ifndef PATH_MAP_SIZE
 #define PATH_MAP_SIZE 4096
 #endif
diff --git a/src/security/bpf_exit.h b/src/security/bpf_exit.h
index 50c242e7ff9d4e068df92ec52c1c88662d1f7174..d47cbe1aba35efea606cfbd395699e29fb8113b6 100644
--- a/src/security/bpf_exit.h
+++ b/src/security/bpf_exit.h
@@ -39,7 +39,6 @@ FUNC_INLINE void event_exit_send(void *ctx, __u32 tgid)
   enter = execve_map_get_noinit(tgid);
   if (!enter)
     return;
-  BPF_DEBUG("[kprobe][event_exit_send] pid:%u already enter.", tgid);
   if (enter->key.ktime) {
     struct task_struct *task = (struct task_struct *)bpf_get_current_task();
     size_t size = sizeof(struct msg_exit);
@@ -77,9 +76,7 @@ FUNC_INLINE void event_exit_send(void *ctx, __u32 tgid)
 
     __event_get_cgroup_info(task, &kube);
 
-    BPF_DEBUG("[kprobe][event_exit_send] pid:%u prepare to send event.", tgid);
     if (cgroup_rate(ctx, &kube, exit->common.ktime)) {
-      BPF_DEBUG("[kprobe][event_exit_send] pid:%u send event.", tgid);
       perf_event_output_metric(ctx, MSG_OP_EXIT, &tcpmon_map,
                                BPF_F_CURRENT_CPU, exit, size);
     }
diff --git a/src/security/bpf_process_event.h b/src/security/bpf_process_event.h
index 4605de870bc7ca460a93ecd37e6a7361d4038a20..48ac76f4b056914a8d7d1a253991d59cbcc3089b 100644
--- a/src/security/bpf_process_event.h
+++ b/src/security/bpf_process_event.h
@@ -5,26 +5,27 @@
 #ifndef SYSAK_BPF_PROCESS_EVENT_H
 #define SYSAK_BPF_PROCESS_EVENT_H
 
-#include "../coolbpf.h"
 #include <bpf/bpf_helpers.h>
 #include <vmlinux.h>
 
-#include "bpf_common.h"
+#include "../coolbpf.h"
 #include "api.h"
 #include "bpf_cgroup.h"
+#include "bpf_common.h"
 #include "bpf_cred.h"
 #include "compiler.h"
+#include "ebpf_log.h"
 
 #define ENAMETOOLONG 36 /* File name too long */
 
-#define MAX_BUF_LEN 256
+#define MAX_BUF_LEN 4096
 
 struct buffer_heap_map_value {
   // Buffer is twice the needed size because of the verifier. In prepend_name
   // unit tests, the verifier figures out that 255 is enough and that the
   // buffer_offset will not overflow, but in the real use-case it looks like
   // it's forgetting about that.
-  unsigned char buf[MAX_BUF_LEN * 2];
+  unsigned char buf[MAX_BUF_LEN + 256];
 };
 
 struct {
@@ -124,8 +125,6 @@ prepend_name(char *buf, char **bufptr, int *buflen, const char *name, u32 namele
 
   *buflen -= (namelen + write_slash);
 
-  // This will not happen as buffer_offset cannot be above 256 and namelen is
-  // bound to 255. Needed to make the verifier happy in older kernels.
   if (namelen + write_slash > buffer_offset)
     return -ENAMETOOLONG;
 
@@ -335,7 +334,6 @@ __d_path_local(const struct path *path, char *buf, int *buflen, int *error)
   task = (struct task_struct *)bpf_get_current_task();
   bpf_probe_read(&fs, sizeof(fs), _(&task->fs));
   *error = path_with_deleted(path, _(&fs->root), buf, &res, buflen);
-  // log_debug);
   return res;
 }
 
@@ -390,6 +388,8 @@ getcwd(struct msg_process *curr, __u32 offset, __u32 proc_pid)
 
   asm volatile("%[offset] &= 0x3ff;\n" ::[offset] "+r"(offset)
   :);
+  if (size > 255)
+    size = 255;
   asm volatile("%[size] &= 0xff;\n" ::[size] "+r"(size)
   :);
   bpf_probe_read((char *)curr + offset, size, buffer);
@@ -544,16 +544,6 @@ __event_get_current_cgroup_name(struct cgroup *cgrp, struct msg_k8s *kube)
 {
   const char *name;
 
-  /* TODO: check if we have Tetragon cgroup configuration and that the
-   *     tracking cgroup ID is set. If so then query the bpf map for
-   *     the corresponding tracking cgroup name.
-   */
-
-  /* TODO: we gather current cgroup context, switch to tracker see above,
-   *    and if that fails for any reason or if we don't have the cgroup name
-   *    of tracker, then we can continue with current context.
-   */
-
   name = get_cgroup_name(cgrp);
   if (name)
     bpf_probe_read_str(kube->docker_id, KN_NAME_LENGTH, name);
@@ -603,4 +593,4 @@ __event_get_cgroup_info(struct task_struct *task, struct msg_k8s *kube)
   return flags;
 }
 
-#endif //SYSAK_BPF_PROCESS_EVENT_H
+#endif // SYSAK_BPF_PROCESS_EVENT_H
diff --git a/src/security/bpf_process_event_type.h b/src/security/bpf_process_event_type.h
index 6fe580de1454cd377de9bbaa8606386d6ea22aa9..13bc12420cb4809c3e6a668f7d93c6dd7b632c16 100644
--- a/src/security/bpf_process_event_type.h
+++ b/src/security/bpf_process_event_type.h
@@ -122,6 +122,7 @@
 #define EVENT_ERROR_PATH_COMPONENTS   0x400000
 #define EVENT_DATA_FILENAME	      0x800000
 #define EVENT_DATA_ARGS		      0x1000000
+#define EVENT_IN_INIT_TREE	    0x2000000
 
 #define EVENT_COMMON_FLAG_CLONE 0x01
 
@@ -247,8 +248,6 @@ struct msg_ns {
 }; // All fields aligned so no 'packed' attribute.
 
 struct msg_k8s {
-  __u32 net_ns;
-  __u32 cid;
   __u64 cgrpid;
   char docker_id[DOCKER_ID_LENGTH];
 }; // All fields aligned so no 'packed' attribute.
@@ -256,27 +255,19 @@ struct msg_k8s {
 #define BINARY_PATH_MAX_LEN 256
 
 struct heap_exe {
-  // because of verifier limitations, this has to be 2 * 256 bytes while 256
-  // should be theoretically sufficient, and actually is, in unit tests.
-  char buf[BINARY_PATH_MAX_LEN * 2];
-  // offset points to the start of the path in the above buffer. Use offset to
-  // read the path in the buffer since it's written from the end.
-  char *off;
+  char buf[BINARY_PATH_MAX_LEN];
   __u32 len;
   __u32 error;
 }; // All fields aligned so no 'packed' attribute.
 
-#define EXECVE_EVENT_COMMON_MEMBERS \
-    struct msg_common common; \
-    struct msg_k8s kube; \
-    struct msg_execve_key parent; \
-    __u64 parent_flags; \
-    struct msg_cred creds; \
-    struct msg_ns ns; \
-    struct msg_execve_key cleanup_key;
-
 struct msg_execve_event {
-  EXECVE_EVENT_COMMON_MEMBERS
+  struct msg_common common;
+  struct msg_k8s kube;
+  struct msg_execve_key parent;
+  __u64 parent_flags;
+  struct msg_cred creds;
+  struct msg_ns ns;
+  struct msg_execve_key cleanup_key;
   /* if add anything above please also update the args of
    * validate_msg_execve_size() in bpf_execve_event.c */
   union {
diff --git a/src/security/data_event.h b/src/security/data_event.h
index 24ae14fbbb43fb41130918428ea642badb1a0553..e34a956f0dc025b5d3bda81deee3efbe202ef466 100644
--- a/src/security/data_event.h
+++ b/src/security/data_event.h
@@ -33,7 +33,9 @@ __do_bytes(void *ctx, struct msg_data *msg, unsigned long uptr, size_t bytes)
     return err;
 
   msg->common.size = offsetof(struct msg_data, arg) + bytes;
-  perf_event_output_metric(ctx, MSG_OP_DATA, &tcpmon_map, BPF_F_CURRENT_CPU, msg, msg->common.size);
+  err = bpf_perf_event_output(ctx, &tcpmon_map, BPF_F_CURRENT_CPU, msg, msg->common.size);
+  if (err < 0)
+    return err;
   return bytes;
   b:
   return -1;
@@ -43,13 +45,13 @@ FUNC_LOCAL long
 do_bytes(void *ctx, struct msg_data *msg, unsigned long arg, size_t bytes)
 {
   size_t rd_bytes = 0;
-  int err, i __maybe_unused;
+  int err = 0, i __maybe_unused;
 
 #ifdef __LARGE_BPF_PROG
   for (i = 0; i < 10; i++) {
 		err = __do_bytes(ctx, msg, arg + rd_bytes, bytes - rd_bytes);
 		if (err < 0)
-			return err;
+			goto error;
 		rd_bytes += err;
 		if (rd_bytes == bytes)
 			return rd_bytes;
@@ -58,7 +60,7 @@ do_bytes(void *ctx, struct msg_data *msg, unsigned long arg, size_t bytes)
 #define BYTES_COPY                                                    \
 	err = __do_bytes(ctx, msg, arg + rd_bytes, bytes - rd_bytes); \
 	if (err < 0)                                                  \
-		return err;                                           \
+		goto error;                                                 \
 	rd_bytes += err;                                              \
 	if (rd_bytes == bytes)                                        \
 		return rd_bytes;
@@ -73,6 +75,9 @@ do_bytes(void *ctx, struct msg_data *msg, unsigned long arg, size_t bytes)
 
   /* leftover */
   return rd_bytes;
+error:
+	perf_event_output_update_error_metric(MSG_OP_DATA, err);
+	return err;
 }
 
 FUNC_LOCAL long
diff --git a/src/security/file_security.bpf.c b/src/security/file_security.bpf.c
deleted file mode 100644
index ec9b52b623ba6be6fd49e0c84099f2284e785227..0000000000000000000000000000000000000000
--- a/src/security/file_security.bpf.c
+++ /dev/null
@@ -1,190 +0,0 @@
-#include <vmlinux.h>
-#include <bpf/bpf_core_read.h>
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
-#include "../coolbpf.h"
-
-#include "int_maps.h"
-#include "filter.h"
-#include "type.h"
-#include "process.h"
-#include "addr_lpm_maps.h"
-#include "string_maps.h"
-#include "bpf_exit.h"
-#include "tailcall_stack.h"
-#include "../ebpf_log.h"
-
-struct
-{
-  __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
-  __uint(max_entries, 3);
-  __uint(key_size, sizeof(__u32));
-  __uint(value_size, sizeof(__u32));
-} secure_tailcall_map SEC(".maps");
-
-BPF_PERCPU_ARRAY(tailcall_stack, struct secure_tailcall_stack, 1);
-
-static inline __attribute__((always_inline)) u32 str_len(const char *str)
-{
-  u32 len = 0;
-#pragma unroll
-  for (int i = 0; i < SYSAK_SECURE_MAX_PATH_LENGTH_LIMIT; i++)
-  {
-    if (str[i] == '\0')
-      break;
-    len++;
-  }
-  return len;
-}
-
-static inline __attribute__((always_inline)) long copy_path(char *args, const struct path *arg)
-{
-  int *s = (int *)args;
-  int size = 0, flags = 0;
-  char *buffer;
-  void *curr = &args[4];
-  umode_t i_mode;
-  buffer = d_path_local(arg, &size, &flags);
-  if (!buffer)
-    return 0;
- // tips: path size between 0~255
-  asm volatile("%[size] &= 0xff;\n" ::[size] "+r"(size)
-               :);
-  bpf_probe_read(curr, size, buffer);
-  *s = size;
-  size += 4;
-  BPF_CORE_READ_INTO(&i_mode, arg, dentry, d_inode, i_mode);
-  /*
-   * the format of the path is:
-   * -----------------------------------------
-   * | 4 bytes | N bytes | 4 bytes | 2 bytes |
-   * | pathlen |  path   |  flags  |   mode  |
-   * -----------------------------------------
-   * Next we set up the flags.
-   */
-  asm volatile goto(
-      "r1 = *(u64 *)%[pid];\n"
-      "r7 = *(u32 *)%[offset];\n"
-      "if r7 s< 0 goto %l[a];\n"
-      "if r7 s> 1188 goto %l[a];\n"
-      "r1 += r7;\n"
-      "r2 = *(u32 *)%[flags];\n"
-      "*(u32 *)(r1 + 0) = r2;\n"
-      "r2 = *(u16 *)%[mode];\n"
-      "*(u16 *)(r1 + 4) = r2;\n"
-      :
-      : [pid] "m"(args), [flags] "m"(flags), [offset] "+m"(size), [mode] "m"(i_mode)
-      : "r0", "r1", "r2", "r7", "memory"
-      : a);
-a:
-  size += sizeof(u32) + sizeof(u16); // for the flags + i_mode
-  return size;
-}
-
-SEC("kprobe/security_file_permission")
-int kprobe_security_file_permission(struct pt_regs *ctx)
-{
-  BPF_DEBUG("[kprobe][kprobe_security_file_permission] enter security_file_permission.");
-  __u32 zero = 0;
-  struct secure_tailcall_stack* stack = NULL;
-  stack = bpf_map_lookup_elem(&tailcall_stack, &zero);
-  if (!stack) return 0;
-  memset(stack, 0, sizeof(stack));
-  struct file *file = (struct file *)PT_REGS_PARM1(ctx);
-  const struct path *path_arg = 0;
-  path_arg = _(&file->f_path);
-  long ret = copy_path(stack->file_data.path, path_arg);
-  int path_len = *(int *)stack->file_data.path;
-  const u32 flag_prefix = 4 + path_len;
-  int flag = -1;
-  if (flag_prefix < 2000 && flag_prefix >= 0) bpf_probe_read(&flag, 4, stack->file_data.path + flag_prefix);
-  const u32 mode_prefix = 8 + path_len;
-  short mode = -1;
-  if (mode_prefix < 2000 && mode_prefix >= 0) bpf_probe_read(&mode, 2, stack->file_data.path + mode_prefix);
-  BPF_DEBUG("[kprobe][tailcall][permission] before ~ stack path length:%d, ret:%lld, flag:%d", path_len, ret, flag);
-  BPF_DEBUG("[kprobe][tailcall][permission] before ~ stack path+4:%s, mode:%d", &stack->file_data.path[4], mode);
-
-  __u32 pid = bpf_get_current_pid_tgid() >> 32;
-  struct execve_map_value *enter;
-  enter = execve_map_get_noinit(pid);
-  if (!enter || enter->key.ktime == 0)
-  {
-    BPF_DEBUG("[kprobe][tailcall][permission] no init!!! return! stack path:%s, pid:%u", stack->file_data.path, pid);
-    BPF_DEBUG("[kprobe][tailcall][permission] no init!!! return! stack path+4:%s, pid:%u", &stack->file_data.path[4], pid);
-    return 0;
-  }
-  BPF_DEBUG("[kprobe][kprobe_security_file_permission] pid:%u ktime:%llu already enter.", pid, enter->key.ktime);
-  stack->func = SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_FILE_PERMISSION;
-  stack->file_data.func = TRACEPOINT_FUNC_SECURITY_FILE_PERMISSION;
-  stack->file_data.key = enter->key;
-  stack->file_data.pkey = enter->pkey;
-  stack->file_data.timestamp = bpf_ktime_get_ns();
-  bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG);
-  return 0;
-}
-
-SEC("kprobe/security_mmap_file")
-int kprobe_security_mmap_file(struct pt_regs *ctx)
-{
-  BPF_DEBUG("[kprobe][security_mmap_file] enter security_mmap_file.");
-  __u32 zero = 0;
-  struct secure_tailcall_stack* stack = NULL;
-  stack = bpf_map_lookup_elem(&tailcall_stack, &zero);
-  if (!stack) return 0;
-  memset(stack, 0, sizeof(stack));
-  struct file *file = (struct file *)PT_REGS_PARM1(ctx);
-  const struct path *path_arg = 0;
-  path_arg = _(&file->f_path);
-  long ret = copy_path(stack->file_data.path, path_arg);
-  int path_len = *(int *)stack->file_data.path;
-  BPF_DEBUG("[kprobe][tailcall][mmap] before ~ stack path length:%s, ret:%lld", path_len, ret);
-  BPF_DEBUG("[kprobe][tailcall][mmap] before ~ stack path+4:%s", &stack->file_data.path[4]);
-
-  __u32 pid = bpf_get_current_pid_tgid() >> 32;
-  struct execve_map_value *enter;
-  enter = execve_map_get_noinit(pid);
-  if (!enter || enter->key.ktime == 0)
-  {
-    return 0;
-  }
-  BPF_DEBUG("[kprobe][security_mmap_file] pid:%u ktime:%llu already enter.", pid, enter->key.ktime);
-  
-  stack->func = SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_MMAP_FILE;
-  stack->file_data.func = TRACEPOINT_FUNC_SECURITY_MMAP_FILE;
-  stack->file_data.key = enter->key;
-  stack->file_data.pkey = enter->pkey;
-  stack->file_data.timestamp = bpf_ktime_get_ns();
-
-  bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG);
-  return 0;
-}
-
-SEC("kprobe/security_path_truncate")
-int kprobe_security_path_truncate(struct pt_regs *ctx)
-{
-  BPF_DEBUG("[kprobe][security_path_truncate] enter security_path_truncate.");
-  __u32 pid = bpf_get_current_pid_tgid() >> 32;
-  struct execve_map_value *enter;
-  enter = execve_map_get_noinit(pid);
-  if (!enter || enter->key.ktime == 0)
-  {
-    return 0;
-  }
-  BPF_DEBUG("[kprobe][security_path_truncate] pid:%u ktime:%llu already enter.", pid, enter->key.ktime);
-  __u32 zero = 0;  
-  struct secure_tailcall_stack* stack = NULL;
-  stack = bpf_map_lookup_elem(&tailcall_stack, &zero);
-  if (!stack) return 0;
-  memset(stack, 0, sizeof(stack));
-  stack->func = SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_PATH_TRUNCATE;
-  stack->file_data.func = TRACEPOINT_FUNC_SECURITY_PATH_TRUNCATE;
-  stack->file_data.key = enter->key;
-  stack->file_data.pkey = enter->pkey;
-  stack->file_data.timestamp = bpf_ktime_get_ns();
-  struct path *path = (struct path *)PT_REGS_PARM1(ctx);
-  const struct path *path_arg = 0;
-  path_arg = _(path);
-  copy_path(stack->file_data.path, path_arg);
-  bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG);
-  return 0;
-}
\ No newline at end of file
diff --git a/src/security/network_security.bpf.c b/src/security/network_security.bpf.c
deleted file mode 100644
index b93465384f0ebf2e05b86efcfd5c0f6a4aaacc6e..0000000000000000000000000000000000000000
--- a/src/security/network_security.bpf.c
+++ /dev/null
@@ -1,232 +0,0 @@
-#include <vmlinux.h>
-#include <bpf/bpf_core_read.h>
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
-#include "../coolbpf.h"
-
-#include "int_maps.h"
-#include "filter.h"
-#include "type.h"
-#include "process.h"
-#include "addr_lpm_maps.h"
-#include "string_maps.h"
-#include "bpf_exit.h"
-#include "tailcall_stack.h"
-#include "../ebpf_log.h"
-
-BPF_PERCPU_ARRAY(sock_secure_data_heap, struct tcp_data_t, 1);
-BPF_PERCPU_ARRAY(tailcall_stack, struct secure_tailcall_stack, 1);
-
-struct
-{
-  __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
-  __uint(max_entries, 3);
-  __uint(key_size, sizeof(__u32));
-  __uint(value_size, sizeof(__u32));
-} secure_tailcall_map SEC(".maps");
-
-static __always_inline u16 bpf_core_sock_sk_protocol_ak(struct sock *sk)
-{
-  return (u16)BPF_CORE_READ_BITFIELD_PROBED(sk, sk_protocol);
-}
-
-static __always_inline u32 get_netns(struct sock *sk) {
-  return BPF_CORE_READ(sk, __sk_common.skc_net.net, ns.inum);
-}
-
-// int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
-SEC("kprobe/tcp_sendmsg")
-int BPF_KPROBE(kprobe_tcp_sendmsg, struct sock *sk, struct msghdr *msg, size_t size)
-{
-  __u32 pid = bpf_get_current_pid_tgid() >> 32;
-  struct execve_map_value *enter;
-  enter = execve_map_get_noinit(pid);
-  if (!enter || enter->key.ktime == 0) {
-    BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg] pid:%u never enter. skip collect", pid);
-    return 0;
-  }
-  BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg] pid:%u ktime:%llu already enter.", pid, enter->key.ktime);
-
-  // define event
-  __u32 zero = 0;
-  struct tcp_data_t* data = NULL;
-  data = bpf_map_lookup_elem(&sock_secure_data_heap, &zero);
-  if (!data) return 0;
-  memset(data, 0, sizeof(data));
-
-  data->func = TRACEPOINT_FUNC_TCP_SENDMSG;
-  data->key = enter->key;
-  data->pkey = enter->pkey;
-
-  struct inet_sock *inet = (struct inet_sock *)sk;
-  data->timestamp = bpf_ktime_get_ns();
-  unsigned int daddr = BPF_CORE_READ(sk, __sk_common.skc_daddr);
-  data->daddr = bpf_htonl(daddr);
-  unsigned short dport = BPF_CORE_READ(sk, __sk_common.skc_dport);
-  data->dport = bpf_htons(dport);
-  unsigned int saddr = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr);
-  data->saddr = bpf_htonl(saddr);
-  unsigned short sport = BPF_CORE_READ(inet, inet_sport);
-  data->sport = bpf_htons(sport);
-  data->state = BPF_CORE_READ(sk, __sk_common.skc_state);
-  data->family = BPF_CORE_READ(sk, __sk_common.skc_family);
-  data->net_ns = get_netns(sk);
-  data->protocol = bpf_core_sock_sk_protocol_ak(sk);
-  data->bytes = size;
-
-  struct secure_tailcall_stack* stack = NULL;
-  stack = bpf_map_lookup_elem(&tailcall_stack, &zero);
-  if (!stack) return 0;
-  memset(stack, 0, sizeof(stack));
-  stack->func = SECURE_FUNC_TRACEPOINT_FUNC_TCP_SENDMSG;
-  stack->tcp_data.func = TRACEPOINT_FUNC_TCP_SENDMSG;
-  stack->tcp_data.key = enter->key;
-  stack->tcp_data.pkey = enter->pkey;
-  stack->tcp_data.timestamp = bpf_ktime_get_ns();
-  stack->tcp_data.daddr = daddr;
-  stack->tcp_data.dport = bpf_htons(dport);
-  stack->tcp_data.saddr = saddr;
-  stack->tcp_data.sport = bpf_htons(sport);
-  stack->tcp_data.state = BPF_CORE_READ(sk, __sk_common.skc_state);
-  stack->tcp_data.family = BPF_CORE_READ(sk, __sk_common.skc_family);
-  stack->tcp_data.net_ns = get_netns(sk);
-  stack->tcp_data.protocol = bpf_core_sock_sk_protocol_ak(sk);
-  stack->tcp_data.bytes = size;
-  BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, daddr:%u, family:%u",
-             stack->tcp_data.saddr, stack->tcp_data.daddr, data->family);
-  BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, sport:%u, state:%u",
-             stack->tcp_data.daddr, stack->tcp_data.sport, data->state);
-
-
-  bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG);
-  return 0;
-}
-
-// void tcp_close(struct sock *sk, long timeout);
-SEC("kprobe/tcp_close")
-int BPF_KPROBE(kprobe_tcp_close, struct sock *sk)
-{
-  __u32 pid = bpf_get_current_pid_tgid() >> 32;
-  struct execve_map_value *enter;
-  enter = execve_map_get_noinit(pid);
-  if (!enter || enter->key.ktime == 0) {
-    BPF_DEBUG("[kprobe][kprobe_tcp_close] pid:%u never enter. skip collect", pid);
-    return 0;
-  }
-  BPF_DEBUG("[kprobe][kprobe_tcp_close] pid:%u ktime:%llu already enter.", pid, enter->key.ktime);
-
-  __u32 zero = 0;
-  struct tcp_data_t* data = NULL;
-  data = bpf_map_lookup_elem(&sock_secure_data_heap, &zero);
-  if (!data) return 0;
-  memset(data, 0, sizeof(data));
-
-  data->func = TRACEPOINT_FUNC_TCP_CLOSE;
-  data->key = enter->key;
-  data->pkey = enter->pkey;
-  struct inet_sock *inet = (struct inet_sock *)sk;
-  data->timestamp = bpf_ktime_get_ns();
-  unsigned int daddr = BPF_CORE_READ(sk, __sk_common.skc_daddr);
-  data->daddr = bpf_htonl(daddr);
-  unsigned short dport = BPF_CORE_READ(sk, __sk_common.skc_dport);
-  data->dport = bpf_htons(dport);
-  unsigned int saddr = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr);
-  data->saddr = bpf_htonl(saddr);
-  unsigned short sport = BPF_CORE_READ(inet, inet_sport);
-  data->sport = bpf_htons(sport);
-  data->state = BPF_CORE_READ(sk, __sk_common.skc_state);
-  data->family = BPF_CORE_READ(sk, __sk_common.skc_family);
-  data->net_ns = get_netns(sk);
-  data->protocol = bpf_core_sock_sk_protocol_ak(sk);
-
-  struct secure_tailcall_stack* stack = NULL;
-  stack = bpf_map_lookup_elem(&tailcall_stack, &zero);
-  if (!stack) return 0;
-  memset(stack, 0, sizeof(stack));
-  stack->func = SECURE_FUNC_TRACEPOINT_FUNC_TCP_CLOSE;
-  stack->tcp_data.func = TRACEPOINT_FUNC_TCP_CLOSE;
-  stack->tcp_data.key = enter->key;
-  stack->tcp_data.pkey = enter->pkey;
-  stack->tcp_data.timestamp = bpf_ktime_get_ns();
-  stack->tcp_data.daddr = daddr;
-  stack->tcp_data.dport = bpf_htons(dport);
-  stack->tcp_data.saddr = saddr;
-  stack->tcp_data.sport = bpf_htons(sport);
-  stack->tcp_data.state = BPF_CORE_READ(sk, __sk_common.skc_state);
-  stack->tcp_data.family = BPF_CORE_READ(sk, __sk_common.skc_family);
-  stack->tcp_data.net_ns = get_netns(sk);
-  stack->tcp_data.protocol = bpf_core_sock_sk_protocol_ak(sk);
-  BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, sport:%u, family:%u",
-             stack->tcp_data.saddr, stack->tcp_data.sport, data->family);
-  BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, dport:%u, state:%u",
-             stack->tcp_data.daddr, stack->tcp_data.dport, data->state);
-
-
-  bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG);
-  return 0;
-}
-
-//
-SEC("kprobe/tcp_connect")
-int BPF_KPROBE(kprobe_tcp_connect, struct sock *sk) {
-  __u32 pid = bpf_get_current_pid_tgid() >> 32;
-  struct execve_map_value *enter;
-  enter = execve_map_get_noinit(pid);
-  if (!enter || enter->key.ktime == 0) {
-    BPF_DEBUG("[kprobe][kprobe_tcp_connect] pid:%u never enter. skip collect", pid);
-    return 0;
-  }
-  BPF_DEBUG("[kprobe][kprobe_tcp_connect] pid:%u ktime:%llu already enter.", pid, enter->key.ktime);
-
-  __u32 zero = 0;
-  struct tcp_data_t* data = NULL;
-  data = bpf_map_lookup_elem(&sock_secure_data_heap, &zero);
-  if (!data) return 0;
-  memset(data, 0, sizeof(data));
-
-  data->func = TRACEPOINT_FUNC_TCP_CONNECT;
-  data->key = enter->key;
-  data->pkey = enter->pkey;
-
-  struct inet_sock *inet = (struct inet_sock *)sk;
-  data->timestamp = bpf_ktime_get_ns();
-  unsigned int daddr = BPF_CORE_READ(sk, __sk_common.skc_daddr);
-  data->daddr = bpf_htonl(daddr);
-  unsigned short dport = BPF_CORE_READ(sk, __sk_common.skc_dport);
-  data->dport = bpf_htons(dport);
-  unsigned int saddr = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr);
-  data->saddr = bpf_htonl(saddr);
-  unsigned short sport = BPF_CORE_READ(inet, inet_sport);
-  data->sport = bpf_htons(sport);
-  data->state = BPF_CORE_READ(sk, __sk_common.skc_state);
-  data->family = BPF_CORE_READ(sk, __sk_common.skc_family);
-  data->net_ns = get_netns(sk);
-  data->protocol = bpf_core_sock_sk_protocol_ak(sk);
-
-
-  struct secure_tailcall_stack* stack = NULL;
-  stack = bpf_map_lookup_elem(&tailcall_stack, &zero);
-  if (!stack) return 0;
-  memset(stack, 0, sizeof(stack));
-  stack->func = SECURE_FUNC_TRACEPOINT_FUNC_TCP_CONNECT;
-  stack->tcp_data.func = TRACEPOINT_FUNC_TCP_CONNECT;
-  stack->tcp_data.key = enter->key;
-  stack->tcp_data.pkey = enter->pkey;
-  stack->tcp_data.timestamp = bpf_ktime_get_ns();
-  stack->tcp_data.daddr = daddr;
-  stack->tcp_data.dport = bpf_htons(dport);
-  stack->tcp_data.saddr = saddr;
-  stack->tcp_data.sport = bpf_htons(sport);
-  stack->tcp_data.state = BPF_CORE_READ(sk, __sk_common.skc_state);
-  stack->tcp_data.family = BPF_CORE_READ(sk, __sk_common.skc_family);
-  stack->tcp_data.net_ns = get_netns(sk);
-  stack->tcp_data.protocol = bpf_core_sock_sk_protocol_ak(sk);
-  BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, sport:%u, family:%u",
-             stack->tcp_data.saddr, stack->tcp_data.sport, data->family);
-  BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, dport:%u, state:%u",
-             stack->tcp_data.daddr, stack->tcp_data.dport, data->state);
-
-
-  bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG);
-  return 0;
-}
diff --git a/src/security/process.h b/src/security/process.h
index 733b1c5f5b6c21b49891c46e96733931799de616..bc5b29a1f6a84eac8eeed64f78f31d54f5c35c1b 100644
--- a/src/security/process.h
+++ b/src/security/process.h
@@ -17,310 +17,6 @@
 #include "type.h"
 #include "bpf_process_event_type.h"
 
-//
-///* Max number of args to parse */
-//#define MAXARGS 20
-///* Max length of any given arg */
-//#define MAXARGLENGTH 256
-///* This is the absolute buffer size for args and filenames including some
-// * extra head room so we can append last args string to buffer. The extra
-// * headroom is an unfortunate result of bounds on offset/size in
-// * event_args_builder().
-// *
-// * For example given an offset bounds
-// *
-// *   offset <- (0, 100)
-// *
-// * We will read into the buffer using this offset giving a max offset
-// * of eargs + 100.
-// *
-// *   args[offset] <- (0, 100)
-// *
-// * Now we want to read this with call 45 aka bpf_probe_read_str as follows,
-// * where 'kernel_struct_arg' is the kernel data struct we are reading.
-// *
-// *   bpf_probe_read_str(args[offset], size, kernel_struct_arg)
-// *
-// * But we have a bit of a problem determining if 'size' is out of array
-// * range. The math would be,
-// *
-// *   size = length - offset
-// *
-// * Giving the remainder of the buffer,
-// *
-// * args          offset             length
-// *    |---------------|------------------|
-// *
-// *                    |-------size-------|
-// *
-// * But verifier math works on bounds so bounds analysis of size is the
-// * following,
-// *
-// *   length = 1024
-// *   offset = (0, 100)
-// *
-// *   size = length - offset
-// *   size = (1024) - (0, 100)
-// *   size <- (924, 1124)
-// *
-// * And verifier throws an error because args[offset + size] with bounds
-// * anaylsis,
-// *
-// *   args_(max)[100 + 1024] = args_(max)[1124]
-// *
-// * To circumvent this, at least until we teach the verifier about
-// * dependent variables, create a maxarg value and pad arg buffer with
-// * it. Giving a args buffer of size 'length + pad' with above bounds
-// * analysis,
-// *
-// *   size = length - offset
-// *   size = (1024) - (0, 100)
-// *   if size > pad goto done
-// *   size <- (924, 1124) // 1124 < length + pad
-// *
-// * Phew all clear now?
-// */
-//#define CWD_MAX	     256
-//#define BUFFER	     1024
-//#define SIZEOF_EVENT 56
-//#define PADDED_BUFFER \
-//	(BUFFER + MAXARGLENGTH + SIZEOF_EVENT + SIZEOF_EVENT + CWD_MAX)
-///* This is the usable buffer size for args and filenames. It is calculated
-// * as the (BUFFER SIZE - sizeof(parent) - sizeof(curr) but unfortunately
-// * preprocess doesn't know types so we do it manually without sizeof().
-// */
-//#define ARGSBUFFER	 (BUFFER - SIZEOF_EVENT - SIZEOF_EVENT)
-//#define __ASM_ARGSBUFFER 976
-//#define ARGSBUFFERMASK	 (ARGSBUFFER - 1)
-//#define MAXARGMASK	 (MAXARG - 1)
-//#define PATHNAME_SIZE	 256
-//
-///* Task flags */
-//#ifndef PF_KTHREAD
-//#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
-//#endif
-//
-///* Msg flags */
-//#define EVENT_UNKNOWN		      0x00
-//#define EVENT_EXECVE		      0x01
-//#define EVENT_EXECVEAT		      0x02
-//#define EVENT_PROCFS		      0x04
-//#define EVENT_TRUNC_FILENAME	      0x08
-//#define EVENT_TRUNC_ARGS	      0x10
-//#define EVENT_TASK_WALK		      0x20
-//#define EVENT_MISS		      0x40
-//#define EVENT_NEEDS_AUID	      0x80
-//#define EVENT_ERROR_FILENAME	      0x100
-//#define EVENT_ERROR_ARGS	      0x200
-//#define EVENT_NEEDS_CWD		      0x400
-//#define EVENT_NO_CWD_SUPPORT	      0x800
-//#define EVENT_ROOT_CWD		      0x1000
-//#define EVENT_ERROR_CWD		      0x2000
-//#define EVENT_CLONE		      0x4000
-//#define EVENT_ERROR_SOCK	      0x8000
-//#define EVENT_ERROR_CGROUP_NAME	      0x010000
-//#define EVENT_ERROR_CGROUP_KN	      0x020000
-//#define EVENT_ERROR_CGROUP_SUBSYSCGRP 0x040000
-//#define EVENT_ERROR_CGROUP_SUBSYS     0x080000
-//#define EVENT_ERROR_CGROUPS	      0x100000
-//#define EVENT_ERROR_CGROUP_ID	      0x200000
-//#define EVENT_ERROR_PATH_COMPONENTS   0x400000
-//#define EVENT_DATA_FILENAME	      0x800000
-//#define EVENT_DATA_ARGS		      0x1000000
-//
-//#define EVENT_COMMON_FLAG_CLONE 0x01
-//
-///* Docker IDs are unique at first 12 characters, but we want to get
-// * 12chars plus any extra prefix used by the container environment.
-// * Minikube for example prepends 'docker-' to the id. So lets copy
-// * 32B and assume at least 12B of it is ID info.
-// */
-//#define DOCKER_ID_LENGTH 128
-//
-//struct msg_execve_key {
-//  __u32 pid; // Process TGID
-//  __u8 pad[4];
-//  __u64 ktime;
-//}; // All fields aligned so no 'packed' attribute.
-//
-///* This is the struct stored in bpf map to share info between
-// * different execve hooks.
-// */
-//struct execve_info {
-//  /* The secureexec is to reflect the kernel bprm->secureexec that is exposed
-//   * to userspace through auxiliary vector which can be read from
-//   * /proc/self/auxv or https://man7.org/linux/man-pages/man3/getauxval.3.html
-//   *
-//   * The AT_SECURE of auxv can have a value of 1 or 0 and it is set from
-//   * the bprm->secureexec that is a bit field.
-//   * If bprm->secureexec is 1 then it means executable should be treated securely.
-//   * Most commonly, 1 indicates that the process is executing a set-user-ID
-//   * or set-group-ID binary (so that its real and effective UIDs or GIDs differ
-//   * from one another), or that it gained capabilities by executing a binary file
-//   * that has capabilities (see capabilities(7)).
-//   * Alternatively, a nonzero value may be triggered by a Linux Security Module.
-//   * When this value is nonzero, the dynamic linker disables the use of certain
-//   * environment variables.
-//   *
-//   * The secureexec here can have the following bit flags:
-//   *   EXEC_SETUID or EXEC_SETGID
-//   */
-//  __u32 secureexec;
-//  __u32 i_nlink; /* inode links */
-//  __u64 i_ino; /* inode number */
-//};
-//
-///* process information
-// *
-// * Manually linked to ARGSBUFFER and PADDED_BUFFER if this changes then please
-// * also change SIZEOF_EVENT.
-// */
-//struct msg_process {
-//  __u32 size;
-//  __u32 pid; // Process TGID
-//  __u32 tid; // Process thread
-//  __u32 nspid;
-//  __u32 secureexec;
-//  __u32 uid;
-//  __u32 auid;
-//  __u32 flags;
-//  __u32 i_nlink;
-//  __u32 pad;
-//  __u64 i_ino;
-//  __u64 ktime;
-//  char *args;
-//}; // All fields aligned so no 'packed' attribute.
-//
-///* msg_clone_event holds only the necessary fields to construct a new entry from
-// * the parent after a clone() event.
-// */
-//struct msg_clone_event {
-//  struct msg_common common;
-//  struct msg_execve_key parent;
-//  __u32 tgid;
-//  __u32 tid;
-//  __u32 nspid;
-//  __u32 flags;
-//  __u64 ktime;
-//} __attribute__((packed));
-//
-//struct exit_info {
-//  __u32 code;
-//  __u32 tid; // Thread ID
-//};
-//
-//struct msg_exit {
-//  struct msg_common common;
-//  struct msg_execve_key current;
-//  struct exit_info info;
-//}; // All fields aligned so no 'packed' attribute.
-//
-//enum {
-//  ns_uts = 0,
-//  ns_ipc = 1,
-//  ns_mnt = 2,
-//  ns_pid = 3,
-//  ns_pid_for_children = 4,
-//  ns_net = 5,
-//  ns_time = 6,
-//  ns_time_for_children = 7,
-//  ns_cgroup = 8,
-//  ns_user = 9,
-//
-//  // If you update the value of ns_max_types you
-//  // should also update parseMatchNamespaces()
-//  // in kernel.go
-//  ns_max_types = 10,
-//};
-//
-//struct msg_ns {
-//  union {
-//    struct {
-//      __u32 uts_inum;
-//      __u32 ipc_inum;
-//      __u32 mnt_inum;
-//      __u32 pid_inum;
-//      __u32 pid_for_children_inum;
-//      __u32 net_inum;
-//      __u32 time_inum;
-//      __u32 time_for_children_inum;
-//      __u32 cgroup_inum;
-//      __u32 user_inum;
-//    };
-//    __u32 inum[ns_max_types];
-//  };
-//}; // All fields aligned so no 'packed' attribute.
-//
-//struct msg_k8s {
-//  __u32 net_ns;
-//  __u32 cid;
-//  __u64 cgrpid;
-//  char docker_id[DOCKER_ID_LENGTH];
-//}; // All fields aligned so no 'packed' attribute.
-//
-//#define BINARY_PATH_MAX_LEN 256
-//
-//struct heap_exe {
-//  // because of verifier limitations, this has to be 2 * 256 bytes while 256
-//  // should be theoretically sufficient, and actually is, in unit tests.
-//  char buf[BINARY_PATH_MAX_LEN * 2];
-//  // offset points to the start of the path in the above buffer. Use offset to
-//  // read the path in the buffer since it's written from the end.
-//  char *off;
-//  __u32 len;
-//  __u32 error;
-//}; // All fields aligned so no 'packed' attribute.
-//
-//struct msg_execve_event {
-//  struct msg_common common;
-//  struct msg_k8s kube;
-//  struct msg_execve_key parent;
-//  __u64 parent_flags;
-//  struct msg_cred creds;
-//  struct msg_ns ns;
-//  struct msg_execve_key cleanup_key;
-//  /* if add anything above please also update the args of
-//   * validate_msg_execve_size() in bpf_execve_event.c */
-//  union {
-//    struct msg_process process;
-//    char buffer[PADDED_BUFFER];
-//  };
-//  /* below fields are not part of the event, serve just as
-//   * heap for execve programs
-//   */
-//#ifdef __LARGE_BPF_PROG
-//  struct heap_exe exe;
-//#endif
-//}; // All fields aligned so no 'packed' attribute.
-//
-//// This structure stores the binary path that was recorded on execve.
-//// Technically PATH_MAX is 4096 but we limit the length we store since we have
-//// limits on the length of the string to compare:
-//// - Artificial limits for full string comparison.
-//// - Technical limits for prefix and postfix, using LPM_TRIE that have a 256
-////   bytes size limit.
-//struct binary {
-//  // length of the path stored in path, this should be < BINARY_PATH_MAX_LEN
-//  // but can contain negative value in case of copy error.
-//  // While s16 would be sufficient, 64 bits are handy for alignment.
-//  __s64 path_length;
-//  // BINARY_PATH_MAX_LEN first bytes of the path
-//  char path[BINARY_PATH_MAX_LEN];
-//}; // All fields aligned so no 'packed' attribute
-//
-//// The execve_map_value is tracked by the TGID of the thread group
-//// the msg_execve_key.pid. The thread IDs are recorded on the
-//// fly and sent with every corresponding event.
-//struct execve_map_value {
-//  struct msg_execve_key key;
-//  struct msg_execve_key pkey;
-//  __u32 flags;
-//  __u32 nspid;
-//  struct msg_ns ns;
-//  struct msg_capabilities caps;
-//  struct binary bin;
-//} __attribute__((packed)) __attribute__((aligned(8)));
-
 struct {
   __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
   __uint(max_entries, 1);
@@ -588,20 +284,23 @@ struct {
   __uint(max_entries, 1);
 } tg_stats_map SEC(".maps");
 
-FUNC_INLINE void
-perf_event_output_metric(void *ctx, u8 metric, void *map, u64 flags, void *data, u64 size)
-{
+FUNC_INLINE void perf_event_output_update_error_metric(u8 msg_op, long err) {
   struct kernel_stats *valp;
   __u32 zero = 0;
-  long err;
 
-  err = bpf_perf_event_output(ctx, map, flags, data, size);
-  if (err < 0) {
-    valp = bpf_map_lookup_elem(&tg_stats_map, &zero);
-    if (valp)
-      __sync_fetch_and_add(&valp->sent_failed[metric], 1);
+  valp = bpf_map_lookup_elem(&tg_stats_map, &zero);
+  if (valp) {
+    __sync_fetch_and_add(&valp->sent_failed[msg_op], 1);
   }
 }
 
+FUNC_INLINE void perf_event_output_metric(void *ctx, u8 msg_op, void *map,
+                                          u64 flags, void *data, u64 size) {
+  long err;
+
+  err = bpf_perf_event_output(ctx, map, flags, data, size);
+  if (err < 0)
+    perf_event_output_update_error_metric(msg_op, err);
+}
 
 #endif //SYSAK_PROCESS_H
diff --git a/src/security/security.bpf.c b/src/security/security.bpf.c
index f167589fd946188602626329f92edd27b5defec8..f4337bbc85606253c73f1db42c4abe6526cf1514 100644
--- a/src/security/security.bpf.c
+++ b/src/security/security.bpf.c
@@ -99,7 +99,6 @@ read_args(void *ctx, struct msg_execve_event *event)
   off = bpf_probe_read_str(&heap->maxpath, 4096, (char *)start_stack);
   if (off < 0)
     return 0;
-  BPF_DEBUG("[read_args] pid:%llu, args:%s", p->pid, heap->maxpath);
 
   start_stack += off;
 
@@ -144,7 +143,6 @@ read_path(void *ctx, struct msg_execve_event *event, void *filename)
   earg = (void *)p + offsetof(struct msg_process, args);
 
   size = bpf_probe_read_str(earg, MAXARGLENGTH - 1, filename);
-  BPF_DEBUG("[read_path] pid:%llu, path:%s", p->pid, earg);
   if (size < 0) {
     flags |= EVENT_ERROR_FILENAME;
     size = 0;
@@ -205,11 +203,22 @@ read_exe(struct task_struct *task, struct heap_exe *exe)
 	struct file *file = BPF_CORE_READ(task, mm, exe_file);
 	struct path *path = __builtin_preserve_access_index(&file->f_path);
 
-	exe->len = BINARY_PATH_MAX_LEN;
-	exe->off = (char *)&exe->buf;
-	exe->off = __d_path_local(path, exe->off, (int *)&exe->len, (int *)&exe->error);
-	if (exe->len > 0)
-		exe->len = BINARY_PATH_MAX_LEN - exe->len;
+	// we need to walk the complete 4096 len dentry in order to have an accurate
+	// matching on the prefix operators, even if we only keep a subset of that
+	char *buffer;
+
+	buffer = d_path_local(path, (int *)&exe->len, (int *)&exe->error);
+	if (!buffer)
+		return 0;
+
+	// buffer used by d_path_local can contain up to MAX_BUF_LEN i.e. 4096 we
+	// only keep the first 255 chars for our needs (we sacrifice one char to the
+	// verifier for the > 0 check)
+	if (exe->len > 255)
+		exe->len = 255;
+	asm volatile("%[len] &= 0xff;\n"
+		     : [len] "+r"(exe->len));
+	probe_read(exe->buf, exe->len, buffer);
 
 	return exe->len;
 }
@@ -219,11 +228,8 @@ read_exe(struct task_struct *task, struct heap_exe *exe)
 SEC("kprobe/wake_up_new_task")
 int BPF_KPROBE(event_wake_up_new_task, struct task_struct *task)
 {
-  __u32 cpid = bpf_get_current_pid_tgid() >> 32;
-  BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u enter~", cpid);
   struct execve_map_value *curr, *parent;
   struct msg_clone_event msg;
-  struct msg_capabilities caps;
   u64 msg_size = sizeof(struct msg_clone_event);
   struct msg_k8s kube;
   u32 tgid = 0;
@@ -232,7 +238,6 @@ int BPF_KPROBE(event_wake_up_new_task, struct task_struct *task)
     return 0;
 
   tgid = BPF_CORE_READ(task, tgid);
-  BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u read tgid:%u ~", cpid, tgid);
 
   /* Do not try to create any msg or calling execve_map_get
    * (that will add a new process in the execve_map) if we
@@ -241,22 +246,20 @@ int BPF_KPROBE(event_wake_up_new_task, struct task_struct *task)
   parent = __event_find_parent(task);
   if (!parent)
     return 0;
-  BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u tgid:%u has parent.", cpid, tgid);
+
   curr = execve_map_get(tgid);
   if (!curr)
     return 0;
-  BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u tgid:%u new event in execve_map.", cpid, tgid);
+
   /* Generate an EVENT_COMMON_FLAG_CLONE event once per process,
    * that is, thread group.
    */
   if (curr->key.ktime != 0)
     return 0;
 
-  BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u tgid:%u begin init event.", cpid, tgid);
   /* Setup the execve_map entry. */
   curr->flags = EVENT_COMMON_FLAG_CLONE;
   curr->key.pid = tgid;
-  // curr->key.ktime = get_start_time();
   curr->key.ktime = bpf_ktime_get_ns();
   curr->nspid = get_task_pid_vnr();
   memcpy(&curr->bin, &parent->bin, sizeof(curr->bin));
@@ -266,10 +269,17 @@ int BPF_KPROBE(event_wake_up_new_task, struct task_struct *task)
    * before the execve hook point if they changed or not.
    * This needs to be converted later to credentials.
    */
-  get_current_subj_caps(&caps, task);
-  curr->caps.permitted = caps.permitted;
-  curr->caps.effective = caps.effective;
-  curr->caps.inheritable = caps.inheritable;
+  get_current_subj_caps(&curr->caps, task);
+
+  /* Store the thread leader namespaces so we can check later
+   * before the execve hook point if they changed or not.
+   */
+  get_namespaces(&curr->ns, task);
+
+  /* Set EVENT_IN_INIT_TREE flag on the process if its parent is in a
+   * container's init tree or if it has nspid=1.
+   */
+  set_in_init_tree(curr, parent);
 
   /* Setup the msg_clone_event and sent to the user. */
   msg.common.op = MSG_OP_CLONE;
@@ -289,18 +299,13 @@ int BPF_KPROBE(event_wake_up_new_task, struct task_struct *task)
 
   __event_get_cgroup_info(task, &kube);
 
-  BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u tgid:%u init event done.", cpid, tgid);
-
   if (cgroup_rate(ctx, &kube, msg.ktime)) {
-    BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u tgid:%u begin submit clone event.", cpid, tgid);
-    perf_event_output_metric(ctx, MSG_OP_CLONE, &tcpmon_map,
-                             BPF_F_CURRENT_CPU, &msg, msg_size);
+    perf_event_output_metric(ctx, MSG_OP_CLONE, &tcpmon_map, BPF_F_CURRENT_CPU, &msg, msg_size);
   }
 
   return 0;
 }
 
-////__attribute__((section("tracepoint/sys_execve"), used)) int
 SEC("tracepoint/sched/sched_process_exec")
 int event_execve(struct trace_event_raw_sched_process_exec *ctx)
 {
@@ -338,21 +343,24 @@ int event_execve(struct trace_event_raw_sched_process_exec *ctx)
   p->ktime = bpf_ktime_get_ns();
   p->size = offsetof(struct msg_process, args);
   p->auid = get_auid();
-  p->uid = bpf_get_current_uid_gid();
   read_execve_shared_info(ctx, p, pid);
 
   p->size += read_path(ctx, event, filename);
   p->size += read_args(ctx, event);
   p->size += read_cwd(ctx, p);
-  BPF_DEBUG("[event_execve] enter pid:%llu, filename:%s", p->pid, filename);
 
   event->common.op = MSG_OP_EXECVE;
   event->common.ktime = p->ktime;
   event->common.size = offsetof(struct msg_execve_event, process) + p->size;
 
-  BPF_CORE_READ_INTO(&event->kube.net_ns, task, nsproxy, net_ns, ns.inum);
-
   get_current_subj_creds(&event->creds, task);
+  /**
+   * Instead of showing the task owner, we want to display the effective
+   * uid that is used to calculate the privileges of current task when
+   * acting upon other objects. This allows to be compatible with the 'ps'
+   * tool that reports snapshot of current processes.
+   */
+  p->uid = event->creds.euid;
   get_namespaces(&event->ns, task);
   p->flags |= __event_get_cgroup_info(task, &event->kube);
 
@@ -388,7 +396,6 @@ int execve_rate(void *ctx)
 SEC("tracepoint/1")
 int execve_send(void *ctx)
 {
-  BPF_DEBUG("[execve_send] enter ~");
   struct msg_execve_event *event;
   struct execve_map_value *curr;
   struct msg_process *p;
@@ -432,7 +439,13 @@ int execve_send(void *ctx)
     if (curr->flags & EVENT_COMMON_FLAG_CLONE) {
       event_set_clone(p);
     }
-    curr->flags = 0;
+    curr->flags &= ~EVENT_COMMON_FLAG_CLONE;
+    /* Set EVENT_IN_INIT_TREE flag on the process if nspid=1.
+     */
+    set_in_init_tree(curr, NULL);
+    if (curr->flags & EVENT_IN_INIT_TREE) {
+        event->process.flags |= EVENT_IN_INIT_TREE;
+    }
 #ifdef __NS_CHANGES_FILTER
     if (init_curr)
 			memcpy(&(curr->ns), &(event->ns),
@@ -451,7 +464,7 @@ int execve_send(void *ctx)
 #ifdef __LARGE_BPF_PROG
     // read from proc exe stored at execve time
 		if (event->exe.len <= BINARY_PATH_MAX_LEN) {
-			curr->bin.path_length = bpf_probe_read(curr->bin.path, event->exe.len, event->exe.off);
+			curr->bin.path_length = bpf_probe_read(curr->bin.path, event->exe.len, event->exe.buf);
 			if (curr->bin.path_length == 0)
 				curr->bin.path_length = event->exe.len;
 		}
@@ -473,9 +486,7 @@ int execve_send(void *ctx)
     sizeof(struct msg_execve_key) + sizeof(__u64) +
     sizeof(struct msg_cred) + sizeof(struct msg_ns) +
     sizeof(struct msg_execve_key) + p->size);
-//  BPF_DEBUG("[execve_send] before perf output ~");
   perf_event_output_metric(ctx, MSG_OP_EXECVE, &tcpmon_map, BPF_F_CURRENT_CPU, event, size);
-//  BPF_DEBUG("[execve_send] after perf output ~");
   return 0;
 }
 
@@ -490,9 +501,7 @@ int event_exit_acct_process(struct pt_regs *ctx)
 {
   __u64 pid_tgid = bpf_get_current_pid_tgid();
   __u32 pid = pid_tgid >> 32;
-  BPF_DEBUG("[kprobe][event_exit_acct_process] pid:%u enter~", pid);
-  event_exit_send(ctx, pid_tgid >> 32);
-  BPF_DEBUG("[kprobe][event_exit_acct_process] pid:%u send done ~", pid);
+  event_exit_send(ctx, pid);
   return 0;
 }
 
@@ -511,8 +520,6 @@ int event_exit_disassociate_ctty(struct pt_regs *ctx)
 {
   int on_exit = (int)PT_REGS_PARM1_CORE(ctx);
   __u32 pid = bpf_get_current_pid_tgid() >> 32;
-  BPF_DEBUG("[kprobe][event_exit_disassociate_ctty] pid:%u enter~", pid);
-
   if (on_exit)
     event_exit_send(ctx, pid);
   return 0;
@@ -939,6 +946,7 @@ static inline __attribute__((always_inline)) long copy_path(char *args, const st
   if (!buffer)
     return 0;
  // tips: path size between 0~255
+  if (size > 255) size = 255;
   asm volatile("%[size] &= 0xff;\n" ::[size] "+r"(size)
                :);
   bpf_probe_read(curr, size, buffer);