diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 8848ca6fdda051d88e24e04a0e0e2b688564d731..4bf7ddd03d93cc94bcb7f311d68f9cc6c6064a1a 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -1927,6 +1927,7 @@ CONFIG_CGROUP_NET_CLASSID=y CONFIG_NET_RX_BUSY_POLL=y CONFIG_BQL=y CONFIG_BPF_STREAM_PARSER=y +CONFIG_HISOCK=y CONFIG_NET_FLOW_LIMIT=y # diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index d00c4dd20b0f8a82c22b7985d7fce6d6a54ac7c1..1c595f3a89b571728457818da7dc572f60bd9af8 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -1894,6 +1894,7 @@ CONFIG_CGROUP_NET_CLASSID=y CONFIG_NET_RX_BUSY_POLL=y CONFIG_BQL=y CONFIG_BPF_STREAM_PARSER=y +# CONFIG_HISOCK is not set CONFIG_NET_FLOW_LIMIT=y # diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h index fb6adb1c3889cf3f8b723b24f5e3f8f07752014d..a9c988c8e217d7af6f624beb115a1d9db4306acc 100644 --- a/include/linux/bpf-cgroup-defs.h +++ b/include/linux/bpf-cgroup-defs.h @@ -45,6 +45,9 @@ enum cgroup_bpf_attach_type { CGROUP_INET_SOCK_RELEASE, CGROUP_LSM_START, CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1, +#ifdef CONFIG_HISOCK + KABI_BROKEN_INSERT_ENUM(HISOCK_EGRESS) +#endif MAX_CGROUP_BPF_ATTACH_TYPE }; diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index d4f2c8706042cd2e079775a1fa643cfc7793bfba..f94f57d185b864d44a45a18e4a1194e83dbdbfc6 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -62,6 +62,9 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type) CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE); +#ifdef CONFIG_HISOCK + CGROUP_ATYPE(HISOCK_EGRESS); +#endif default: return CGROUP_BPF_ATTACH_TYPE_INVALID; } @@ -150,6 +153,11 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, int optname, void *optval, int *optlen, int retval); +#ifdef CONFIG_HISOCK +int __cgroup_bpf_run_hisock_egress(struct sock *sk, struct sk_buff *skb, + enum cgroup_bpf_attach_type atype); +#endif + static inline enum bpf_cgroup_storage_type cgroup_storage_type( struct bpf_map *map) { @@ -401,6 +409,20 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, __ret; \ }) +#ifdef CONFIG_HISOCK +#define BPF_CGROUP_RUN_PROG_HISOCK_EGRESS(sk, skb) \ +({ \ + int __ret = HISOCK_PASS; \ + if (cgroup_bpf_enabled(HISOCK_EGRESS) && sk) { \ + typeof(sk) __sk = sk_to_full_sk(sk); \ + if (sk_fullsock(__sk) && __sk == skb_to_full_sk(skb)) \ + __ret = __cgroup_bpf_run_hisock_egress(__sk, skb, \ + HISOCK_EGRESS); \ + } \ + __ret; \ +}) +#endif + int cgroup_bpf_prog_attach(const union bpf_attr *attr, enum bpf_prog_type ptype, struct bpf_prog *prog); int cgroup_bpf_prog_detach(const union bpf_attr *attr, @@ -498,6 +520,9 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, optlen, retval) ({ retval; }) #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \ kernel_optval) ({ 0; }) +#ifdef CONFIG_HISOCK +#define BPF_CGROUP_RUN_PROG_HISOCK_EGRESS(sk, skb) ({ HISOCK_PASS; }) +#endif #define for_each_cgroup_storage_type(stype) for (; false; ) diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index f5cdd5a9e268422969ed4a0790acfd083c8262f3..15809bc5eff449276723f39964fac886ef13f1f2 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -17,6 +17,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock, struct bpf_sock, struct sock) BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr, struct bpf_sock_addr, struct bpf_sock_addr_kern) +#ifdef CONFIG_HISOCK +BPF_PROG_TYPE(BPF_PROG_TYPE_HISOCK, hisock, + struct __sk_buff, struct sk_buff) +#endif #endif BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in, struct __sk_buff, struct sk_buff) diff --git a/include/net/xdp.h b/include/net/xdp.h index 31698ef493b36ccf9d82812a626612eee8ff27e2..4ca0a42e55c6d94ccb3337222b11f0671872882a 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -150,6 +150,11 @@ xdp_prepare_buff(struct xdp_buff *xdp, unsigned char *hard_start, xdp->data_meta = meta_valid ? data : data + 1; } +struct hisock_xdp_buff { + struct xdp_buff xdp; + struct sk_buff *skb; +}; + /* Reserve memory area at end-of data area. * * This macro reserves tailroom in the XDP buffer by limiting the diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a660cb68c853b83a621b7bf517d78c08eb228ed4..782d56c4ebcac55b66a39f0a7e26f7d5dca790ef 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1006,6 +1006,9 @@ enum bpf_prog_type { BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ BPF_PROG_TYPE_NETFILTER, BPF_PROG_TYPE_SCHED, +#ifndef __GENKSYMS__ + BPF_PROG_TYPE_HISOCK, +#endif }; enum bpf_attach_type { @@ -1059,6 +1062,9 @@ enum bpf_attach_type { BPF_TCX_EGRESS, BPF_TRACE_UPROBE_MULTI, BPF_SCHED, +#ifndef __GENKSYMS__ + BPF_HISOCK_EGRESS, +#endif __MAX_BPF_ATTACH_TYPE }; @@ -6310,6 +6316,7 @@ enum xdp_action { XDP_PASS, XDP_TX, XDP_REDIRECT, + XDP_HISOCK_REDIRECT = 100, }; /* user accessible metadata for XDP packet hook @@ -7352,4 +7359,11 @@ struct bpf_iter_num { __u64 __opaque[1]; } __attribute__((aligned(8))); +enum hisock_action { + HISOCK_PASS, + HISOCK_DROP, + HISOCK_REDIRECT, + __MAX_HISOCK_ACTION, +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 39fc33e6368f21586a3260460830103c81bb93fb..b9a0ae4e4a6f5a9a8121952cc5f9493f4cdba32b 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -211,13 +211,16 @@ enum btf_kfunc_hook { BTF_KFUNC_HOOK_TRACING, BTF_KFUNC_HOOK_SYSCALL, BTF_KFUNC_HOOK_FMODRET, - BTF_KFUNC_HOOK_CGROUP_SKB, + BTF_KFUNC_HOOK_CGROUP, BTF_KFUNC_HOOK_SCHED_ACT, BTF_KFUNC_HOOK_SK_SKB, BTF_KFUNC_HOOK_SOCKET_FILTER, BTF_KFUNC_HOOK_LWT, BTF_KFUNC_HOOK_NETFILTER, BTF_KFUNC_HOOK_SCHED, +#ifdef CONFIG_HISOCK + BTF_KFUNC_HOOK_HISOCK, +#endif BTF_KFUNC_HOOK_MAX, }; @@ -8093,8 +8096,15 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) case BPF_PROG_TYPE_SYSCALL: return BTF_KFUNC_HOOK_SYSCALL; case BPF_PROG_TYPE_CGROUP_SKB: + case BPF_PROG_TYPE_CGROUP_SOCK: + case BPF_PROG_TYPE_CGROUP_DEVICE: case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: - return BTF_KFUNC_HOOK_CGROUP_SKB; + case BPF_PROG_TYPE_CGROUP_SOCKOPT: + case BPF_PROG_TYPE_CGROUP_SYSCTL: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_SOCK_OPS: +#endif + return BTF_KFUNC_HOOK_CGROUP; case BPF_PROG_TYPE_SCHED_ACT: return BTF_KFUNC_HOOK_SCHED_ACT; case BPF_PROG_TYPE_SK_SKB: @@ -8110,6 +8120,10 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) return BTF_KFUNC_HOOK_NETFILTER; case BPF_PROG_TYPE_SCHED: return BTF_KFUNC_HOOK_SCHED; +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: + return BTF_KFUNC_HOOK_HISOCK; +#endif default: return BTF_KFUNC_HOOK_MAX; } diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index cf2eb0895d403c967d63236a0984f15a67ea8b4e..90cc73c762cff1e38648fbdade7482436960891a 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -663,6 +663,12 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, */ return -EPERM; +#ifdef CONFIG_HISOCK + /* Only one bpf program can be attached to HISOCK_EGRESS */ + if (atype == HISOCK_EGRESS && prog_list_length(progs) >= 1) + return -EEXIST; +#endif + if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) return -E2BIG; @@ -1548,6 +1554,43 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); +#ifdef CONFIG_HISOCK +int __cgroup_bpf_run_hisock_egress(struct sock *sk, struct sk_buff *skb, + enum cgroup_bpf_attach_type atype) +{ + struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); + struct bpf_prog_array_item *item; + struct bpf_prog *prog; + struct bpf_prog_array *array; + struct bpf_run_ctx *old_run_ctx; + struct bpf_cg_run_ctx run_ctx; + void *saved_data_end; + u32 ret = HISOCK_PASS; + + bpf_compute_and_save_data_end(skb, &saved_data_end); + + migrate_disable(); + rcu_read_lock(); + array = rcu_dereference(cgrp->bpf.effective[atype]); + item = &array->items[0]; + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); + /* Only one bpf program can be attached to HISOCK_EGRESS */ + prog = READ_ONCE(item->prog); + if (prog) { + run_ctx.prog_item = item; + ret = __bpf_prog_run_save_cb(prog, skb); + } + bpf_reset_run_ctx(old_run_ctx); + rcu_read_unlock(); + migrate_enable(); + + bpf_restore_data_end(skb, saved_data_end); + + return ret < __MAX_HISOCK_ACTION ? ret : -EPERM; +} +EXPORT_SYMBOL(__cgroup_bpf_run_hisock_egress); +#endif + int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, short access, enum cgroup_bpf_attach_type atype) { diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 41f049ecb5c8d19a327863a61316f573f97bacec..19a13ed5406b3a8fa34997f5408e5638107ff6e6 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2696,6 +2696,7 @@ static int __init kfunc_init(void) ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &generic_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &generic_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &generic_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SKB, &generic_kfunc_set); ret = ret ?: register_btf_id_dtor_kfuncs(generic_dtors, ARRAY_SIZE(generic_dtors), THIS_MODULE); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 266e84baea8456f135048a584ccc63e488850600..7131d7bf92d7b1c28c2c1b96c98a0f57fecc3695 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2554,6 +2554,9 @@ static bool is_net_admin_prog_type(enum bpf_prog_type prog_type) case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: case BPF_PROG_TYPE_CGROUP_SOCKOPT: case BPF_PROG_TYPE_CGROUP_SYSCTL: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_EXT: /* extends any prog */ case BPF_PROG_TYPE_NETFILTER: @@ -3820,6 +3823,10 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) case BPF_CGROUP_GETSOCKOPT: case BPF_CGROUP_SETSOCKOPT: return BPF_PROG_TYPE_CGROUP_SOCKOPT; +#ifdef CONFIG_HISOCK + case BPF_HISOCK_EGRESS: + return BPF_PROG_TYPE_HISOCK; +#endif case BPF_TRACE_ITER: case BPF_TRACE_RAW_TP: case BPF_TRACE_FENTRY: @@ -3978,6 +3985,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_LSM: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif if (ptype == BPF_PROG_TYPE_LSM && prog->expected_attach_type != BPF_LSM_CGROUP) ret = -EINVAL; @@ -4043,6 +4053,9 @@ static int bpf_prog_detach(const union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_LSM: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif ret = cgroup_bpf_prog_detach(attr, ptype); break; case BPF_PROG_TYPE_SCHED_CLS: @@ -4094,6 +4107,9 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_CGROUP_GETSOCKOPT: case BPF_CGROUP_SETSOCKOPT: case BPF_LSM_CGROUP: +#ifdef CONFIG_HISOCK + case BPF_HISOCK_EGRESS: +#endif return cgroup_bpf_prog_query(attr, uattr); case BPF_LIRC_MODE2: return lirc_prog_query(attr, uattr); @@ -5054,6 +5070,9 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) case BPF_PROG_TYPE_CGROUP_DEVICE: case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_CGROUP_SOCKOPT: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif ret = cgroup_bpf_link_attach(attr, prog); break; case BPF_PROG_TYPE_EXT: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index dbf2df404460123ccdac1f3990181034336d2565..3a85c3ff4a4358c3dde96548982233c1ee7a6be9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5576,6 +5576,9 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, return true; case BPF_PROG_TYPE_CGROUP_SOCKOPT: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif if (t == BPF_WRITE) env->seen_direct_write = true; diff --git a/net/Kconfig b/net/Kconfig index 2fc1860faeb40821ea39ad4146e6a0df84441f4f..fd08800cb13042e9ebacfae95261bc47da4b412f 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -348,6 +348,16 @@ config BPF_STREAM_PARSER Enabling this allows a TCP stream parser to be used with BPF_MAP_TYPE_SOCKMAP. +config HISOCK + bool "enable HiSock Redirect Framework" + depends on INET + depends on CGROUP_BPF + depends on BPF_SYSCALL + default n + help + Enalbe HiSock, which bypasses net filter rules for specific + connections selected by bpf prog on both TX and RX directions. + config NET_FLOW_LIMIT bool depends on RPS diff --git a/net/core/dev.c b/net/core/dev.c index cbb4bd4718cdf825165c9b4bbab36ddd55f30dfd..22025bab89078a1172d4d7314db8a98cbf069d0f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5029,6 +5029,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, case XDP_REDIRECT: case XDP_TX: case XDP_PASS: +#ifdef CONFIG_HISOCK + case XDP_HISOCK_REDIRECT: +#endif break; default: bpf_warn_invalid_xdp_action(skb->dev, xdp_prog, act); @@ -5074,27 +5077,94 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) } } +#ifdef CONFIG_HISOCK +static int generic_xdp_hisock_redirect(struct sk_buff *skb) +{ + const struct iphdr *iph; + u32 len; + + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + goto out; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto free_skb; + + iph = ip_hdr(skb); + if (iph->ihl < 5 || iph->version != 4 || + ip_is_fragment(iph)) + return -EOPNOTSUPP; + + if (!pskb_may_pull(skb, iph->ihl * 4)) + goto free_skb; + + iph = ip_hdr(skb); + if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) + goto free_skb; + + len = ntohs(iph->tot_len); + if (skb->len < len || len < (iph->ihl * 4)) + goto free_skb; + + if (pskb_trim_rcsum(skb, len)) + goto free_skb; + + iph = ip_hdr(skb); + skb->transport_header = skb->network_header + iph->ihl * 4; + + skb_orphan(skb); + + if (!skb_valid_dst(skb)) { + if (ip_route_input_noref(skb, iph->daddr, iph->saddr, + iph->tos, skb->dev)) + goto free_skb; + } + + __skb_pull(skb, skb_network_header_len(skb)); + + rcu_read_lock(); + ip_protocol_deliver_rcu(dev_net(skb->dev), skb, iph->protocol); + rcu_read_unlock(); + + return 0; + +free_skb: + kfree_skb(skb); +out: + return -EFAULT; +} +#endif + static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key); int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb) { if (xdp_prog) { - struct xdp_buff xdp; + struct hisock_xdp_buff hxdp; + struct xdp_buff *xdp = &hxdp.xdp; u32 act; int err; - act = netif_receive_generic_xdp(skb, &xdp, xdp_prog); + hxdp.skb = skb; + act = netif_receive_generic_xdp(skb, xdp, xdp_prog); if (act != XDP_PASS) { switch (act) { case XDP_REDIRECT: err = xdp_do_generic_redirect(skb->dev, skb, - &xdp, xdp_prog); + xdp, xdp_prog); if (err) goto out_redir; break; case XDP_TX: generic_xdp_tx(skb, xdp_prog); break; +#ifdef CONFIG_HISOCK + case XDP_HISOCK_REDIRECT: + err = generic_xdp_hisock_redirect(skb); + if (err == -EOPNOTSUPP) + return XDP_PASS; + break; +#endif } return XDP_DROP; } diff --git a/net/core/filter.c b/net/core/filter.c index 2968f1f8dd471b063bc11d8b46c61420e5da2c64..eb888d2913e599f0598963ea5373850a4e82921f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8152,6 +8152,29 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) } } +#ifdef CONFIG_HISOCK +static const struct bpf_func_proto * +hisock_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + switch (func_id) { + case BPF_FUNC_skb_store_bytes: + return &bpf_skb_store_bytes_proto; + case BPF_FUNC_skb_load_bytes: + return &bpf_skb_load_bytes_proto; + case BPF_FUNC_skb_pull_data: + return &bpf_skb_pull_data_proto; + case BPF_FUNC_skb_change_tail: + return &bpf_skb_change_tail_proto; + case BPF_FUNC_skb_change_head: + return &bpf_skb_change_head_proto; + case BPF_FUNC_skb_adjust_room: + return &bpf_skb_adjust_room_proto; + default: + return bpf_base_func_proto(func_id); + } +} +#endif + static const struct bpf_func_proto * tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -8741,6 +8764,33 @@ static bool cg_skb_is_valid_access(int off, int size, return bpf_skb_is_valid_access(off, size, type, prog, info); } +#ifdef CONFIG_HISOCK +static bool hisock_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + switch (off) { + case bpf_ctx_range(struct __sk_buff, tc_classid): + case bpf_ctx_range(struct __sk_buff, data_meta): + case bpf_ctx_range(struct __sk_buff, tstamp): + case bpf_ctx_range(struct __sk_buff, wire_len): + return false; + } + + switch (off) { + case bpf_ctx_range(struct __sk_buff, data): + info->reg_type = PTR_TO_PACKET; + break; + case bpf_ctx_range(struct __sk_buff, data_end): + info->reg_type = PTR_TO_PACKET_END; + break; + } + + return bpf_skb_is_valid_access(off, size, type, prog, info); +} +#endif + static bool lwt_is_valid_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, @@ -11086,6 +11136,18 @@ const struct bpf_prog_ops cg_skb_prog_ops = { .test_run = bpf_prog_test_run_skb, }; +#ifdef CONFIG_HISOCK +const struct bpf_verifier_ops hisock_verifier_ops = { + .get_func_proto = hisock_func_proto, + .is_valid_access = hisock_is_valid_access, + .convert_ctx_access = bpf_convert_ctx_access, + .gen_prologue = bpf_noop_prologue, +}; + +const struct bpf_prog_ops hisock_prog_ops = { +}; +#endif + const struct bpf_verifier_ops lwt_in_verifier_ops = { .get_func_proto = lwt_in_func_proto, .is_valid_access = lwt_is_valid_access, @@ -11995,6 +12057,81 @@ __bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern, return 0; } + +#ifdef CONFIG_HISOCK +__bpf_kfunc struct dst_entry * +bpf_skops_get_ingress_dst(struct bpf_sock_ops *skops_ctx) +{ + struct bpf_sock_ops_kern *skops = (struct bpf_sock_ops_kern *)skops_ctx; + struct sock *sk = skops->sk; + struct dst_entry *dst; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + if (!sk || !sk_fullsock(sk)) + return NULL; + + dst = rcu_dereference(sk->sk_rx_dst); + if (dst) + dst = dst_check(dst, 0); + + return dst; +} + +__bpf_kfunc int bpf_xdp_set_ingress_dst(struct xdp_md *xdp_ctx, void *dst__ign) +{ + struct xdp_buff *xdp = (struct xdp_buff *)xdp_ctx; + struct hisock_xdp_buff *hxdp = (struct hisock_xdp_buff *)xdp; + struct dst_entry *_dst = (struct dst_entry *)dst__ign; + + if (!hxdp->skb) + return -EOPNOTSUPP; + + if (!_dst || !virt_addr_valid(_dst)) + return -EFAULT; + + /* same as skb_valid_dst */ + if (_dst->flags & DST_METADATA) + return -EINVAL; + + skb_dst_set_noref(hxdp->skb, _dst); + return 0; +} + +__bpf_kfunc int bpf_xdp_change_dev(struct xdp_md *xdp_ctx, u32 ifindex) +{ + struct xdp_buff *xdp = (struct xdp_buff *)xdp_ctx; + struct hisock_xdp_buff *hxdp = (void *)xdp; + struct net_device *dev; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + if (!hxdp->skb) + return -EOPNOTSUPP; + + dev = dev_get_by_index_rcu(&init_net, ifindex); + if (!dev) + return -ENODEV; + + hxdp->skb->dev = dev; + return 0; +} + +__bpf_kfunc int bpf_skb_change_dev(struct __sk_buff *skb_ctx, u32 ifindex) +{ + struct sk_buff *skb = (struct sk_buff *)skb_ctx; + struct net_device *dev; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + dev = dev_get_by_index_rcu(&init_net, ifindex); + if (!dev) + return -ENODEV; + + skb->dev = dev; + return 0; +} +#endif __diag_pop(); int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, @@ -12017,12 +12154,26 @@ BTF_SET8_END(bpf_kfunc_check_set_skb) BTF_SET8_START(bpf_kfunc_check_set_xdp) BTF_ID_FLAGS(func, bpf_dynptr_from_xdp) +#ifdef CONFIG_HISOCK +BTF_ID_FLAGS(func, bpf_xdp_set_ingress_dst) +BTF_ID_FLAGS(func, bpf_xdp_change_dev) +#endif BTF_SET8_END(bpf_kfunc_check_set_xdp) BTF_SET8_START(bpf_kfunc_check_set_sock_addr) BTF_ID_FLAGS(func, bpf_sock_addr_set_sun_path) BTF_SET8_END(bpf_kfunc_check_set_sock_addr) +#ifdef CONFIG_HISOCK +BTF_SET8_START(bpf_kfunc_check_set_sock_ops) +BTF_ID_FLAGS(func, bpf_skops_get_ingress_dst, KF_RET_NULL) +BTF_SET8_END(bpf_kfunc_check_set_sock_ops) + +BTF_SET8_START(bpf_kfunc_check_set_hisock) +BTF_ID_FLAGS(func, bpf_skb_change_dev) +BTF_SET8_END(bpf_kfunc_check_set_hisock) +#endif + static const struct btf_kfunc_id_set bpf_kfunc_set_skb = { .owner = THIS_MODULE, .set = &bpf_kfunc_check_set_skb, @@ -12038,6 +12189,18 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_sock_addr = { .set = &bpf_kfunc_check_set_sock_addr, }; +#ifdef CONFIG_HISOCK +static const struct btf_kfunc_id_set bpf_kfunc_set_sock_ops = { + .owner = THIS_MODULE, + .set = &bpf_kfunc_check_set_sock_ops, +}; + +static const struct btf_kfunc_id_set bpf_kfunc_set_hisock = { + .owner = THIS_MODULE, + .set = &bpf_kfunc_check_set_hisock, +}; +#endif + static int __init bpf_kfunc_init(void) { int ret; @@ -12053,6 +12216,10 @@ static int __init bpf_kfunc_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp); +#ifdef CONFIG_HISOCK + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SOCK_OPS, &bpf_kfunc_set_sock_ops); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_HISOCK, &bpf_kfunc_set_hisock); +#endif return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, &bpf_kfunc_set_sock_addr); } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index f49570e2f713943edd1453c26b56e2d27c38956c..89f5f3b178e1cb947ce13355430f984499766e38 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -457,6 +457,55 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4) iph->daddr = fl4->daddr; } +#ifdef CONFIG_HISOCK +static int hisock_egress_redirect_xmit(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct netdev_queue *txq; + bool free_skb = true; + int cpu, rc; + + rcu_read_lock_bh(); + + txq = netdev_core_pick_tx(dev, skb, NULL); + cpu = smp_processor_id(); + HARD_TX_LOCK(dev, txq, cpu); + if (!netif_xmit_stopped(txq)) { + rc = netdev_start_xmit(skb, dev, txq, 0); + if (dev_xmit_complete(rc)) + free_skb = false; + } + HARD_TX_UNLOCK(dev, txq); + + rcu_read_unlock_bh(); + + if (free_skb) { + rc = -ENETDOWN; + kfree_skb(skb); + } + + return rc; +} + +static int do_hisock_egress_redirect(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct iphdr *iph; + + skb->protocol = htons(ETH_P_IP); + if (!skb->dev) + skb->dev = skb_dst(skb)->dev; + + if (skb_mac_header_was_set(skb)) + return hisock_egress_redirect_xmit(skb); + + iph = ip_hdr(skb); + iph_set_totlen(iph, skb->len); + ip_send_check(iph); + + return ip_finish_output2(net, sk, skb); +} +#endif + /* Note: skb->sk can be different from sk, in case of tunnels */ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, __u8 tos) @@ -537,6 +586,25 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, skb->priority = READ_ONCE(sk->sk_priority); skb->mark = READ_ONCE(sk->sk_mark); +#ifdef CONFIG_HISOCK + res = BPF_CGROUP_RUN_PROG_HISOCK_EGRESS(sk, skb); + switch (res) { + case HISOCK_PASS: + break; + case HISOCK_REDIRECT: + res = do_hisock_egress_redirect(net, sk, skb); + rcu_read_unlock(); + return res; + default: + pr_warn_once("Illegal HiSock return value %d, expect packet loss!", res); + fallthrough; + case HISOCK_DROP: + kfree_skb(skb); + rcu_read_unlock(); + return NET_XMIT_DROP; + } +#endif + res = ip_local_out(net, sk, skb); rcu_read_unlock(); return res; diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore index 0002cd359fb119059156bec1133ab240433ee015..4505e51c3f4d2327424ac50985980d7e67bb4f6a 100644 --- a/samples/bpf/.gitignore +++ b/samples/bpf/.gitignore @@ -41,6 +41,7 @@ xdp_adjust_tail xdp_fwd xdp_router_ipv4 xdp_tx_iptunnel +hisock/hisock_cmd testfile.img hbm_out.log iperf.* diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 3fa16412db15cafa9538605a7db975ffe365ad31..2f19faa7fb63db0d575ebc9dac91654344aa78b2 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -46,6 +46,7 @@ tprogs-y += xdp_fwd tprogs-y += task_fd_query tprogs-y += ibumad tprogs-y += hbm +tprogs-y += hisock/hisock_cmd # Libbpf dependencies LIBBPF_SRC = $(TOOLS_PATH)/lib/bpf @@ -96,6 +97,7 @@ xdp_fwd-objs := xdp_fwd_user.o task_fd_query-objs := task_fd_query_user.o $(TRACE_HELPERS) ibumad-objs := ibumad_user.o hbm-objs := hbm.o $(CGROUP_HELPERS) +hisock_cmd-objs := hisock/hisock_cmd.o xdp_router_ipv4-objs := xdp_router_ipv4_user.o $(XDP_SAMPLE) @@ -149,6 +151,7 @@ always-y += task_fd_query_kern.o always-y += ibumad_kern.o always-y += hbm_out_kern.o always-y += hbm_edt_kern.o +always-y += hisock/bpf.o ifeq ($(ARCH), arm) # Strip all except -D__LINUX_ARM_ARCH__ option needed to handle linux diff --git a/samples/bpf/hisock/bpf.c b/samples/bpf/hisock/bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..46159c96cb18929a6bd081d5f290989a96cf495c --- /dev/null +++ b/samples/bpf/hisock/bpf.c @@ -0,0 +1,251 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * + * Description: End-to-End HiSock Redirect sample. + */ +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#define IP_MF 0x2000 +#define IP_OFFSET 0x1FFF +#define CSUM_SHIFT_BITS 16 + +#define SOCKOPS_SUCC 1 +#define SOCKOPS_FAIL 0 + +#define PORT_LOCAL 1 +#define PORT_REMOTE 2 + +#define MAX_NUMA 8 +#define MAX_CONN_NUMA 4096 +#define MAX_CONN (MAX_CONN_NUMA * MAX_NUMA * 2) + +struct sock_tuple { + u32 saddr; + u32 daddr; + u16 sport; + u16 dport; +}; + +struct sock_value { + struct dst_entry *ingress_dst; + struct ethhdr ingress_eth; + bool eth_updated; + u32 ingress_ifindex; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(struct sock_tuple)); + __uint(value_size, sizeof(struct sock_value)); + __uint(max_entries, MAX_CONN); +} connmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(u16)); + __uint(value_size, sizeof(u8)); + __uint(max_entries, 128); +} speed_port SEC(".maps"); + +struct dst_entry *bpf_skops_get_ingress_dst(struct bpf_sock_ops *skops) __ksym; +int bpf_xdp_set_ingress_dst(struct xdp_md *xdp, void *dst) __ksym; +int bpf_skb_change_dev(struct __sk_buff *skb, u32 ifindex) __ksym; + +static inline bool is_speed_flow(u32 local, u32 remote) +{ + u8 *val; + + val = bpf_map_lookup_elem(&speed_port, &local); + if (val && *val == PORT_LOCAL) + return true; + + val = bpf_map_lookup_elem(&speed_port, &remote); + if (val && *val == PORT_REMOTE) + return true; + + return false; +} + +SEC("hisock_sockops") +int hisock_sockops_prog(struct bpf_sock_ops *skops) +{ + struct sock_tuple key = { 0 }; + struct sock_value val = { 0 }; + struct dst_entry *dst; + + if (!is_speed_flow(skops->local_port, bpf_ntohl(skops->remote_port))) + return SOCKOPS_SUCC; + + switch (skops->op) { + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + dst = bpf_skops_get_ingress_dst(skops); + if (!dst) + return SOCKOPS_FAIL; + + key.saddr = skops->remote_ip4; + key.sport = bpf_ntohl(skops->remote_port); + key.daddr = skops->local_ip4; + key.dport = skops->local_port; + + val.ingress_dst = dst; + bpf_map_update_elem(&connmap, &key, &val, BPF_ANY); + + bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG); + break; + case BPF_SOCK_OPS_STATE_CB: + if (skops->args[1] != BPF_TCP_CLOSE_WAIT && + skops->args[1] != BPF_TCP_FIN_WAIT1 && + skops->args[1] != BPF_TCP_CLOSE) + break; + + key.saddr = skops->remote_ip4; + key.sport = bpf_ntohl(skops->remote_port); + key.daddr = skops->local_ip4; + key.dport = skops->local_port; + + bpf_map_delete_elem(&connmap, &key); + + bpf_sock_ops_cb_flags_set(skops, + skops->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_STATE_CB_FLAG); + break; + default: + break; + } + + return SOCKOPS_SUCC; +} + +SEC("hisock_ingress") +int hisock_ingress_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct sock_tuple key = { 0 }; + struct sock_value *val; + struct ethhdr *ehdr; + struct tcphdr *thdr; + struct iphdr *ihdr; + + ehdr = (struct ethhdr *)data; + if (ehdr + 1 > data_end) + return XDP_PASS; + + if (ehdr->h_proto != bpf_htons(ETH_P_IP)) + return XDP_PASS; + + ihdr = (struct iphdr *)(ehdr + 1); + if (ihdr + 1 > data_end) + return XDP_PASS; + + if (ihdr->ihl != 5 || ihdr->protocol != IPPROTO_TCP) + return XDP_PASS; + + if (ihdr->frag_off & bpf_htons(IP_MF | IP_OFFSET)) + return XDP_PASS; + + thdr = (struct tcphdr *)(ihdr + 1); + if (thdr + 1 > data_end) + return XDP_PASS; + + if (thdr->syn || thdr->fin || thdr->rst) + return XDP_PASS; + + key.saddr = ihdr->saddr; + key.sport = bpf_ntohs(thdr->source); + key.daddr = ihdr->daddr; + key.dport = bpf_ntohs(thdr->dest); + + val = bpf_map_lookup_elem(&connmap, &key); + if (!val) + return XDP_PASS; + + if (unlikely(!val->eth_updated)) { + memcpy(val->ingress_eth.h_source, ehdr->h_dest, ETH_ALEN); + memcpy(val->ingress_eth.h_dest, ehdr->h_source, ETH_ALEN); + val->ingress_eth.h_proto = ehdr->h_proto; + val->eth_updated = true; + } + + if (unlikely(!val->ingress_ifindex)) + val->ingress_ifindex = ctx->ingress_ifindex; + + if (likely(val->ingress_dst)) + bpf_xdp_set_ingress_dst(ctx, val->ingress_dst); + + return XDP_HISOCK_REDIRECT; +} + +static inline void ipv4_csum(struct iphdr *ihdr) +{ + u32 csum = 0; + u16 *next_ip_u16 = (u16 *)ihdr; + + ihdr->check = 0; + for (size_t i = 0; i < sizeof(struct iphdr) >> 1; i++) + csum += *next_ip_u16++; + + ihdr->check = ~((csum & 0xffff) + (csum >> CSUM_SHIFT_BITS)); +} + +SEC("hisock_egress") +int hisock_egress_prog(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct sock_tuple key = { 0 }; + struct sock_value *val; + struct ethhdr *ehdr; + struct iphdr *ihdr; + int ret; + + key.saddr = skb->remote_ip4; + key.sport = bpf_ntohl(skb->remote_port); + key.daddr = skb->local_ip4; + key.dport = skb->local_port; + + val = bpf_map_lookup_elem(&connmap, &key); + if (!val) + return HISOCK_PASS; + + if (unlikely(!val->eth_updated)) + goto redirect; + + ihdr = (struct iphdr *)data; + if (ihdr + 1 > data_end) + return HISOCK_PASS; + + ihdr->tot_len = bpf_htons(skb->len); + ipv4_csum(ihdr); + + ret = bpf_skb_change_head(skb, ETH_HLEN, 0); + if (ret < 0) + goto redirect; + + data = (void *)(long)skb->data; + data_end = (void *)(long)skb->data_end; + + ehdr = (struct ethhdr *)data; + if (ehdr + 1 > data_end) + return HISOCK_DROP; + + memcpy(ehdr, &val->ingress_eth, ETH_HLEN); +redirect: + if (likely(val->ingress_ifindex)) + bpf_skb_change_dev(skb, val->ingress_ifindex); + + return HISOCK_REDIRECT; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/hisock/hisock_cmd.c b/samples/bpf/hisock/hisock_cmd.c new file mode 100644 index 0000000000000000000000000000000000000000..6b64c008b6c7b0ee271c34abf7b30bfb28afc9a3 --- /dev/null +++ b/samples/bpf/hisock/hisock_cmd.c @@ -0,0 +1,405 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * + * Description: End-to-End HiSock Redirect sample. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "bpf_util.h" +#include +#include + +#define DEF_BPF_PATH "bpf.o" +#define PORT_LOCAL 1 +#define PORT_REMOTE 2 +#define MAX_IF_NUM 8 + +struct { + __u32 ifindex[MAX_IF_NUM]; + int if_num; + char *local_port; + char *remote_port; + char *cgrp_path; + char *bpf_path; + bool unload; + bool help; +} hisock; + +struct hisock_prog_info { + const char *prog_name; + enum bpf_prog_type prog_type; + enum bpf_attach_type attach_type; + int attach_flag; + int prog_fd; + bool is_xdp; +}; + +static struct hisock_prog_info prog_infos[] = { + { + .prog_name = "hisock_sockops_prog", + .prog_type = BPF_PROG_TYPE_SOCK_OPS, + .attach_type = BPF_CGROUP_SOCK_OPS, + .attach_flag = 0, + .is_xdp = false, + }, + { + .prog_name = "hisock_ingress_prog", + .prog_type = BPF_PROG_TYPE_XDP, + .attach_type = BPF_XDP, + .attach_flag = XDP_FLAGS_SKB_MODE, + .is_xdp = true, + }, + { + .prog_name = "hisock_egress_prog", + .prog_type = BPF_PROG_TYPE_HISOCK, + .attach_type = BPF_HISOCK_EGRESS, + .attach_flag = 0, + .is_xdp = false, + }, +}; + +static int set_prog_type(struct bpf_object *obj) +{ + enum bpf_attach_type attach_type; + enum bpf_prog_type prog_type; + struct bpf_program *prog; + const char *prog_name; + int i; + + bpf_object__for_each_program(prog, obj) { + prog_name = bpf_program__name(prog); + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + if (!strcmp(prog_infos[i].prog_name, prog_name)) { + prog_type = prog_infos[i].prog_type; + attach_type = prog_infos[i].attach_type; + break; + } + } + + if (i == ARRAY_SIZE(prog_infos)) + return -1; + + bpf_program__set_type(prog, prog_type); + bpf_program__set_expected_attach_type(prog, attach_type); + } + + return 0; +} + +static int find_progs(struct bpf_object *obj) +{ + struct hisock_prog_info *info; + struct bpf_program *prog; + int i, prog_fd; + + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + info = &prog_infos[i]; + prog = bpf_object__find_program_by_name(obj, info->prog_name); + if (!prog) { + fprintf(stderr, "ERROR: failed to find prog sec %s\n", info->prog_name); + return -1; + } + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + fprintf(stderr, "ERROR: failed to get fd of prog %s\n", info->prog_name); + return -1; + } + + info->prog_fd = prog_fd; + } + + return 0; +} + +static int parse_port_range(const char *port_str, __u8 status, int map_fd) +{ + char *str = strdup(port_str); + char *token, *rest = str; + __u16 port; + + while ((token = strtok_r(rest, ",", &rest))) { + char *dash = strchr(token, '-'); + + if (dash) { + *dash = '\0'; + __u16 start = atoi(token); + __u16 end = atoi(dash + 1); + + if (start > end || start == 0 || end > 65535) { + fprintf(stderr, "Invalid port range: %s\n", token); + return -1; + } + + for (port = start; port <= end; port++) + bpf_map_update_elem(map_fd, &port, &status, BPF_ANY); + + printf("Speed port range %u-%u:%u\n", start, end, status); + } else { + port = atoi(token); + if (port == 0 || port > 65535) { + fprintf(stderr, "Invalid port: %s\n", token); + return -1; + } + bpf_map_update_elem(map_fd, &port, &status, BPF_ANY); + printf("Speed port %u:%u\n", port, status); + } + } + + free(str); + return 0; +} + +static int set_speed_port(struct bpf_object *obj) +{ + int map_fd; + + map_fd = bpf_object__find_map_fd_by_name(obj, "speed_port"); + if (map_fd < 0) { + fprintf(stderr, "ERROR: failed to find map fd\n"); + return -1; + } + + if (hisock.local_port && + parse_port_range(hisock.local_port, PORT_LOCAL, map_fd)) { + fprintf(stderr, "ERROR: failed to update local port\n"); + return -1; + } + + if (hisock.remote_port && + parse_port_range(hisock.remote_port, PORT_REMOTE, map_fd)) { + fprintf(stderr, "ERROR: failed to update remote port\n"); + return -1; + } + + return 0; +} + +static int detach_progs(void) +{ + struct hisock_prog_info *info; + int i, j, cgrp_fd; + int err_cnt = 0; + + cgrp_fd = open(hisock.cgrp_path, O_DIRECTORY, O_RDONLY); + if (cgrp_fd < 0) { + fprintf(stderr, "ERROR: failed to open cgrp %s\n", hisock.cgrp_path); + return -1; + } + + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + info = &prog_infos[i]; + if (info->is_xdp) { + for (j = 0; j < hisock.if_num; j++) { + if (bpf_xdp_detach(hisock.ifindex[j], + info->attach_flag, NULL)) { + fprintf(stderr, + "ERROR: failed to detach prog %s\n", + info->prog_name); + err_cnt++; + } + } + continue; + } + + if (bpf_prog_detach(cgrp_fd, info->attach_type)) { + fprintf(stderr, "ERROR: failed to detach prog %s\n", info->prog_name); + err_cnt++; + } + } + + close(cgrp_fd); + return -err_cnt; +} + +static int attach_progs(void) +{ + struct hisock_prog_info *info; + int i, j, cgrp_fd; + + cgrp_fd = open(hisock.cgrp_path, O_DIRECTORY, O_RDONLY); + if (cgrp_fd < 0) { + fprintf(stderr, "ERROR: failed to open cgrp %s\n", hisock.cgrp_path); + return -1; + } + + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + info = &prog_infos[i]; + if (info->is_xdp) { + for (j = 0; j < hisock.if_num; j++) { + if (bpf_xdp_attach(hisock.ifindex[j], info->prog_fd, + info->attach_flag, NULL)) + goto fail; + } + continue; + } + + if (bpf_prog_attach(info->prog_fd, cgrp_fd, info->attach_type, + info->attach_flag)) + goto fail; + } + + close(cgrp_fd); + return 0; +fail: + fprintf(stderr, "ERROR: failed to attach prog %s\n", info->prog_name); + close(cgrp_fd); + detach_progs(); + return -1; +} + +static int do_hisock(void) +{ + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + struct bpf_object *obj; + + setrlimit(RLIMIT_MEMLOCK, &r); + + obj = bpf_object__open(hisock.bpf_path); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: failed to open bpf file\n"); + return -1; + } + + if (set_prog_type(obj)) { + fprintf(stderr, "ERROR: failed to set prog type\n"); + bpf_object__close(obj); + return -1; + } + + if (bpf_object__load(obj)) { + fprintf(stderr, "ERROR: failed to load bpf obj\n"); + bpf_object__close(obj); + return -1; + } + + if (find_progs(obj)) { + fprintf(stderr, "ERROR: failed to find progs\n"); + bpf_object__close(obj); + return -1; + } + + if (set_speed_port(obj)) { + fprintf(stderr, "ERROR: failed to set speed port\n"); + bpf_object__close(obj); + return -1; + } + + if (attach_progs()) { + fprintf(stderr, "ERROR: failed to attach progs\n"); + bpf_object__close(obj); + return -1; + } + + bpf_object__close(obj); + return 0; +} + +static void do_help(void) +{ + fprintf(stderr, + "Load: hisock_cmd [-f BPF_FILE] [-c CGRP_PATH] " + "[-p LOCAL_PORT] [-r REMOTE_PORT] [-i INTERFACE]\n" + "Unload: hisock_cmd -u [-c CGRP_PATH] [-i INTERFACE]\n"); +} + +static int parse_args(int argc, char **argv) +{ + char *ifname; + int opt; + + hisock.bpf_path = DEF_BPF_PATH; + hisock.if_num = 0; + + while ((opt = getopt(argc, argv, "f:c:p:r:i:uh")) != -1) { + switch (opt) { + case 'f': + hisock.bpf_path = optarg; + break; + case 'c': + hisock.cgrp_path = optarg; + break; + case 'p': + hisock.local_port = optarg; + break; + case 'r': + hisock.remote_port = optarg; + break; + case 'i': + ifname = optarg; + hisock.ifindex[hisock.if_num] = if_nametoindex(ifname); + hisock.if_num++; + break; + case 'u': + hisock.unload = true; + break; + case 'h': + hisock.help = true; + break; + default: + fprintf(stderr, "ERROR: unknown option %c\n", opt); + return -1; + } + } + + if (hisock.cgrp_path == NULL || + hisock.if_num == 0 || + (!hisock.unload && + hisock.local_port == NULL && + hisock.remote_port == NULL)) { + do_help(); + return -1; + } + + return 0; +} + +int main(int argc, char **argv) +{ + if (parse_args(argc, argv)) { + fprintf(stderr, "ERROR: failed to parse args\n"); + return -1; + } + + if (hisock.help) { + do_help(); + return 0; + } + + if (hisock.unload) { + if (detach_progs()) { + fprintf(stderr, "ERROR: failed to detach progs\n"); + return -1; + } + + printf("Unload HiSock successfully\n"); + return 0; + } + + if (do_hisock()) { + fprintf(stderr, "ERROR: failed to do hisock\n"); + return -1; + } + + printf("Load HiSock successfully\n"); + return 0; +} diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 9b302242be6c65a1b31a7dd8219380b7e2c675ca..a457d4f82da2850f0ab8a23ff3eac698e5fea938 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1006,6 +1006,9 @@ enum bpf_prog_type { BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ BPF_PROG_TYPE_NETFILTER, BPF_PROG_TYPE_SCHED, +#ifndef __GENKSYMS__ + BPF_PROG_TYPE_HISOCK, +#endif }; enum bpf_attach_type { @@ -1059,6 +1062,9 @@ enum bpf_attach_type { BPF_TCX_EGRESS, BPF_TRACE_UPROBE_MULTI, BPF_SCHED, +#ifndef __GENKSYMS__ + BPF_HISOCK_EGRESS, +#endif __MAX_BPF_ATTACH_TYPE }; @@ -6313,6 +6319,7 @@ enum xdp_action { XDP_PASS, XDP_TX, XDP_REDIRECT, + XDP_HISOCK_REDIRECT = 100, }; /* user accessible metadata for XDP packet hook @@ -7355,4 +7362,11 @@ struct bpf_iter_num { __u64 __opaque[1]; } __attribute__((aligned(8))); +enum hisock_action { + HISOCK_PASS, + HISOCK_DROP, + HISOCK_REDIRECT, + __MAX_HISOCK_ACTION, +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a57f9afffe9881435d93a91cfd18f804a71a61be..828c1d2f173eadb38ff2f4ae5e406d0861f3f114 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -123,6 +123,7 @@ static const char * const attach_type_name[] = { [BPF_TCX_EGRESS] = "tcx_egress", [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi", [BPF_SCHED] = "sched", + [BPF_HISOCK_EGRESS] = "hisock_egress", }; static const char * const link_type_name[] = { @@ -212,6 +213,7 @@ static const char * const prog_type_name[] = { [BPF_PROG_TYPE_SYSCALL] = "syscall", [BPF_PROG_TYPE_NETFILTER] = "netfilter", [BPF_PROG_TYPE_SCHED] = "sched", + [BPF_PROG_TYPE_HISOCK] = "hisock", }; static int __base_pr(enum libbpf_print_level level, const char *format, @@ -8873,6 +8875,7 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE), SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE), SEC_DEF("sched/", SCHED, BPF_SCHED, SEC_ATTACH_BTF, attach_sched), + SEC_DEF("hisock_egress", HISOCK, BPF_HISOCK_EGRESS, SEC_ATTACHABLE_OPT), }; int libbpf_register_prog_handler(const char *sec,