diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 42eb679a5b0ad007093815068be4cfdf9cf7e0ec..b709f5fae8a46f38898dc52af81bd3fbbb441284 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -1857,6 +1857,7 @@ CONFIG_BQL=y CONFIG_BPF_JIT=y CONFIG_BPF_STREAM_PARSER=y CONFIG_EULER_SOCKETMAP=y +CONFIG_HISOCK=y CONFIG_NET_FLOW_LIMIT=y # diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index f837a55e7abd4f0e74a8b2b0dca78d0903a62c6e..01faf12ddaac6d28b5166aefeed750a4ec9de60d 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -208,6 +208,10 @@ enum aarch64_insn_ldst_type { AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX, AARCH64_INSN_LDST_LOAD_EX, AARCH64_INSN_LDST_STORE_EX, +#ifdef CONFIG_HISOCK + AARCH64_INSN_LDST_LOAD_PAIR_SIGNED_OFFSET, + AARCH64_INSN_LDST_STORE_PAIR_SIGNED_OFFSET, +#endif }; enum aarch64_insn_adsb_type { @@ -297,6 +301,12 @@ __AARCH64_INSN_FUNCS(adr, 0x9F000000, 0x10000000) __AARCH64_INSN_FUNCS(adrp, 0x9F000000, 0x90000000) __AARCH64_INSN_FUNCS(prfm, 0x3FC00000, 0x39800000) __AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000) +__AARCH64_INSN_FUNCS(store_imm, 0x3FC00000, 0x39000000) +__AARCH64_INSN_FUNCS(load_imm, 0x3FC00000, 0x39400000) +__AARCH64_INSN_FUNCS(store_pre, 0x3FE00C00, 0x38000C00) +__AARCH64_INSN_FUNCS(load_pre, 0x3FE00C00, 0x38400C00) +__AARCH64_INSN_FUNCS(store_post, 0x3FE00C00, 0x38000400) +__AARCH64_INSN_FUNCS(load_post, 0x3FE00C00, 0x38400400) __AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800) __AARCH64_INSN_FUNCS(str_imm, 0x3FC00000, 0x39000000) __AARCH64_INSN_FUNCS(ldadd, 0x3F20FC00, 0x38200000) @@ -307,6 +317,8 @@ __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000) __AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000) __AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000) __AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000) +__AARCH64_INSN_FUNCS(stp, 0x7FC00000, 0x29000000) +__AARCH64_INSN_FUNCS(ldp, 0x7FC00000, 0x29400000) __AARCH64_INSN_FUNCS(stp_post, 0x7FC00000, 0x28800000) __AARCH64_INSN_FUNCS(ldp_post, 0x7FC00000, 0x28C00000) __AARCH64_INSN_FUNCS(stp_pre, 0x7FC00000, 0x29800000) @@ -339,6 +351,7 @@ __AARCH64_INSN_FUNCS(rev64, 0x7FFFFC00, 0x5AC00C00) __AARCH64_INSN_FUNCS(and, 0x7F200000, 0x0A000000) __AARCH64_INSN_FUNCS(bic, 0x7F200000, 0x0A200000) __AARCH64_INSN_FUNCS(orr, 0x7F200000, 0x2A000000) +__AARCH64_INSN_FUNCS(mov_reg, 0x7FE0FFE0, 0x2A0003E0) __AARCH64_INSN_FUNCS(orn, 0x7F200000, 0x2A200000) __AARCH64_INSN_FUNCS(eor, 0x7F200000, 0x4A000000) __AARCH64_INSN_FUNCS(eon, 0x7F200000, 0x4A200000) diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index fbd2b7eec1dc5c4cb38386ec96da546bda5224fb..25390b069a05c4f70eb5699fc68f3faa6e461339 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -745,6 +745,14 @@ u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1, case AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX: insn = aarch64_insn_get_stp_post_value(); break; +#ifdef CONFIG_HISOCK + case AARCH64_INSN_LDST_LOAD_PAIR_SIGNED_OFFSET: + insn = aarch64_insn_get_ldp_value(); + break; + case AARCH64_INSN_LDST_STORE_PAIR_SIGNED_OFFSET: + insn = aarch64_insn_get_stp_value(); + break; +#endif default: pr_err("%s: unknown load/store encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index 1835dffb810462648a3cdf52d8bd42c71b9dfc5f..c670f35f302c30570895d4db9efcc565a301634a 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -96,6 +96,21 @@ /* Rt = Rn[0]; Rt2 = Rn[8]; Rn += 16; */ #define A64_POP(Rt, Rt2, Rn) A64_LS_PAIR(Rt, Rt2, Rn, 16, LOAD, POST_INDEX) +#ifdef CONFIG_HISOCK +#define A64_STP(Rt, Rt2, Rn, offset) \ + A64_LS_PAIR(Rt, Rt2, Rn, offset, STORE, SIGNED_OFFSET) +#define A64_LDP(Rt, Rt2, Rn, offset) \ + A64_LS_PAIR(Rt, Rt2, Rn, offset, LOAD, SIGNED_OFFSET) +#define A64_STP32(Wt, Wt2, Rn, offset) \ + aarch64_insn_gen_load_store_pair(Wt, Wt2, Rn, offset, \ + AARCH64_INSN_VARIANT_32BIT, \ + AARCH64_INSN_LDST_STORE_PAIR_SIGNED_OFFSET) +#define A64_LDP32(Wt, Wt2, Rn, offset) \ + aarch64_insn_gen_load_store_pair(Wt, Wt2, Rn, offset, \ + AARCH64_INSN_VARIANT_32BIT, \ + AARCH64_INSN_LDST_LOAD_PAIR_SIGNED_OFFSET) +#endif + /* Load/store exclusive */ #define A64_SIZE(sf) \ ((sf) ? AARCH64_INSN_SIZE_64 : AARCH64_INSN_SIZE_32) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index af5760c0d2e7f5f6a7d4b8ae95ede89678b3e290..e2e065382984d49fd6bdf26b45f86c91b8e14bd9 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -21,11 +21,26 @@ #include "bpf_jit.h" +#ifdef CONFIG_HISOCK +#define TCALL_CNT (MAX_BPF_JIT_REG + 0) +#define FP_BOTTOM (MAX_BPF_JIT_REG + 1) +#define TMP_REG_1 (MAX_BPF_JIT_REG + 2) +#define TMP_REG_2 (MAX_BPF_JIT_REG + 3) +#define TMP_REG_3 (MAX_BPF_JIT_REG + 4) +#define TMP_REG_4 (MAX_BPF_JIT_REG + 5) +#define TMP_REG_5 (MAX_BPF_JIT_REG + 6) +#define TMP_REG_6 (MAX_BPF_JIT_REG + 7) +#define TMP_REG_7 (MAX_BPF_JIT_REG + 8) +#define TMP_REG_8 (MAX_BPF_JIT_REG + 9) +#define TMP_REG_9 (MAX_BPF_JIT_REG + 10) +#define TMP_REG_10 (MAX_BPF_JIT_REG + 11) +#else #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) #define TCALL_CNT (MAX_BPF_JIT_REG + 2) #define TMP_REG_3 (MAX_BPF_JIT_REG + 3) #define FP_BOTTOM (MAX_BPF_JIT_REG + 4) +#endif /* Map BPF registers to A64 registers */ static const int bpf2a64[] = { @@ -48,6 +63,15 @@ static const int bpf2a64[] = { [TMP_REG_1] = A64_R(10), [TMP_REG_2] = A64_R(11), [TMP_REG_3] = A64_R(12), +#ifdef CONFIG_HISOCK + [TMP_REG_4] = A64_R(13), + [TMP_REG_5] = A64_R(14), + [TMP_REG_6] = A64_R(15), + [TMP_REG_7] = A64_R(5), + [TMP_REG_8] = A64_R(6), + [TMP_REG_9] = A64_R(7), + [TMP_REG_10] = A64_R(28), +#endif /* tail_call_cnt */ [TCALL_CNT] = A64_R(26), /* temporary register for blinding constants */ @@ -565,6 +589,234 @@ static int add_exception_handler(const struct bpf_insn *insn, return 0; } +#ifdef CONFIG_HISOCK +static bool support_unaligned_access(void) +{ + unsigned long sctlr = SCTLR_ELx_A; + + switch (read_sysreg(CurrentEL)) { + case CurrentEL_EL1: + sctlr = read_sysreg(sctlr_el1); + break; + case CurrentEL_EL2: + sctlr = read_sysreg(sctlr_el2); + break; + default: + /* not EL1 and EL2 ? */ + break; + } + + return (sctlr & SCTLR_ELx_A) ? false : true; +} + +extern u64 bpf_ext_memcpy(void *dst, size_t dst_sz, + const void *src, size_t src_sz); + +static void emit_memcpy(struct jit_ctx *ctx, int size) +{ + u8 dst = bpf2a64[BPF_REG_1]; + u8 src = bpf2a64[BPF_REG_3]; + u8 tmp1 = bpf2a64[TMP_REG_1]; + u8 tmp2 = bpf2a64[TMP_REG_2]; + u8 tmp3 = bpf2a64[TMP_REG_3]; + u8 tmp4 = bpf2a64[TMP_REG_4]; + u8 tmp5 = bpf2a64[TMP_REG_5]; + u8 tmp6 = bpf2a64[TMP_REG_6]; + u8 tmp7 = bpf2a64[TMP_REG_7]; + u8 tmp8 = bpf2a64[TMP_REG_8]; + u8 tmp9 = bpf2a64[TMP_REG_9]; + u8 tmp10 = bpf2a64[TMP_REG_10]; + + if (!support_unaligned_access()) { + emit_call((u64)bpf_ext_memcpy, ctx); + return; + } + + switch (size) { + case 0: + break; + case 1: + emit(A64_LDRBI(tmp1, src, 0), ctx); + emit(A64_STRBI(tmp1, dst, 0), ctx); + break; + case 2: + emit(A64_LDRHI(tmp1, src, 0), ctx); + emit(A64_STRHI(tmp1, dst, 0), ctx); + break; + case 3: + emit(A64_LDRHI(tmp1, src, 0), ctx); + emit(A64_LDRBI(tmp2, src, 2), ctx); + emit(A64_STRHI(tmp1, dst, 0), ctx); + emit(A64_STRBI(tmp2, dst, 2), ctx); + break; + case 4: + emit(A64_LDR32I(tmp1, src, 0), ctx); + emit(A64_STR32I(tmp1, dst, 0), ctx); + break; + case 5: + emit(A64_LDR32I(tmp1, src, 0), ctx); + emit(A64_LDRBI(tmp2, src, 4), ctx); + emit(A64_STR32I(tmp1, dst, 0), ctx); + emit(A64_STRBI(tmp2, dst, 4), ctx); + break; + case 6: + emit(A64_LDR32I(tmp1, src, 0), ctx); + emit(A64_LDRHI(tmp2, src, 4), ctx); + emit(A64_STR32I(tmp1, dst, 0), ctx); + emit(A64_STRHI(tmp2, dst, 4), ctx); + break; + case 7: + emit(A64_LDR32I(tmp1, src, 0), ctx); + emit(A64_LDRHI(tmp2, src, 4), ctx); + emit(A64_LDRBI(tmp3, src, 6), ctx); + emit(A64_STR32I(tmp1, src, 0), ctx); + emit(A64_STRHI(tmp2, dst, 4), ctx); + emit(A64_STRBI(tmp3, dst, 6), ctx); + break; + case 8: + emit(A64_LDR64I(tmp1, src, 0), ctx); + emit(A64_STR64I(tmp1, dst, 0), ctx); + break; + case 9 ... 15: + emit(A64_ADD_I(1, tmp1, src, size), ctx); + emit(A64_ADD_I(1, tmp2, dst, size), ctx); + emit(A64_LDR64I(tmp3, src, 0), ctx); + emit(A64_LDP32(tmp4, tmp5, tmp1, -8), ctx); + emit(A64_STR64I(tmp3, dst, 0), ctx); + emit(A64_STP32(tmp4, tmp5, tmp2, -8), ctx); + break; + case 16: + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + break; + case 17 ... 31: + emit(A64_ADD_I(1, tmp1, src, size), ctx); + emit(A64_ADD_I(1, tmp2, dst, size), ctx); + emit(A64_LDP(tmp3, tmp4, src, 0), ctx); + emit(A64_LDP(tmp5, tmp6, tmp1, -16), ctx); + emit(A64_STP(tmp3, tmp4, dst, 0), ctx); + emit(A64_STP(tmp5, tmp6, tmp2, -16), ctx); + break; + case 32: + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_LDP(tmp3, tmp4, src, 16), ctx); + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + emit(A64_STP(tmp3, tmp4, dst, 16), ctx); + break; + case 33 ... 63: + emit(A64_ADD_I(1, tmp1, src, size), ctx); + emit(A64_ADD_I(1, tmp2, dst, size), ctx); + emit(A64_LDP(tmp3, tmp4, src, 0), ctx); + emit(A64_LDP(tmp5, tmp6, src, 16), ctx); + emit(A64_STP(tmp3, tmp4, dst, 0), ctx); + emit(A64_STP(tmp5, tmp6, dst, 16), ctx); + emit(A64_LDP(tmp3, tmp4, tmp1, -32), ctx); + emit(A64_LDP(tmp5, tmp6, tmp1, -16), ctx); + emit(A64_STP(tmp3, tmp4, tmp2, -32), ctx); + emit(A64_STP(tmp5, tmp6, tmp2, -16), ctx); + break; + case 64: + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_LDP(tmp3, tmp4, src, 16), ctx); + emit(A64_LDP(tmp5, tmp6, src, 32), ctx); + emit(A64_LDP(tmp7, tmp8, src, 48), ctx); + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + emit(A64_STP(tmp3, tmp4, dst, 16), ctx); + emit(A64_STP(tmp5, tmp6, dst, 32), ctx); + emit(A64_STP(tmp7, tmp8, dst, 48), ctx); + break; + case 65 ... 95: + /* copy first 48 bytes */ + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_LDP(tmp3, tmp4, src, 16), ctx); + emit(A64_LDP(tmp5, tmp6, src, 32), ctx); + + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + emit(A64_STP(tmp3, tmp4, dst, 16), ctx); + emit(A64_STP(tmp5, tmp6, dst, 32), ctx); + + /* copy last 48 bytes */ + emit(A64_ADD_I(1, tmp7, src, size), ctx); + emit(A64_ADD_I(1, tmp8, dst, size), ctx); + + emit(A64_LDP(tmp1, tmp2, tmp7, -48), ctx); + emit(A64_LDP(tmp3, tmp4, tmp7, -32), ctx); + emit(A64_LDP(tmp5, tmp6, tmp7, -16), ctx); + + emit(A64_STP(tmp1, tmp2, tmp8, -48), ctx); + emit(A64_STP(tmp3, tmp4, tmp8, -32), ctx); + emit(A64_STP(tmp5, tmp6, tmp8, -16), ctx); + break; + case 96: + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_LDP(tmp3, tmp4, src, 16), ctx); + emit(A64_LDP(tmp5, tmp6, src, 32), ctx); + emit(A64_LDP(tmp7, tmp8, src, 48), ctx); + + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + emit(A64_STP(tmp3, tmp4, dst, 16), ctx); + emit(A64_STP(tmp5, tmp6, dst, 32), ctx); + emit(A64_STP(tmp7, tmp8, dst, 48), ctx); + + emit(A64_LDP(tmp1, tmp2, src, 64), ctx); + emit(A64_LDP(tmp3, tmp4, src, 80), ctx); + emit(A64_STP(tmp1, tmp2, dst, 64), ctx); + emit(A64_STP(tmp3, tmp4, dst, 80), ctx); + break; + case 97 ... 127: + emit(A64_ADD_I(1, tmp9, src, size), ctx); + emit(A64_ADD_I(1, tmp10, dst, size), ctx); + + /* copy first 64 bytes */ + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_LDP(tmp3, tmp4, src, 16), ctx); + emit(A64_LDP(tmp5, tmp6, src, 32), ctx); + emit(A64_LDP(tmp7, tmp8, src, 48), ctx); + + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + emit(A64_STP(tmp3, tmp4, dst, 16), ctx); + emit(A64_STP(tmp5, tmp6, dst, 32), ctx); + emit(A64_STP(tmp7, tmp8, dst, 48), ctx); + + /* copy last 64 bytes */ + emit(A64_LDP(tmp1, tmp2, tmp9, -64), ctx); + emit(A64_LDP(tmp3, tmp4, tmp9, -48), ctx); + emit(A64_LDP(tmp5, tmp6, tmp9, -32), ctx); + emit(A64_LDP(tmp7, tmp8, tmp9, -16), ctx); + + emit(A64_STP(tmp1, tmp2, tmp10, -64), ctx); + emit(A64_STP(tmp3, tmp4, tmp10, -48), ctx); + emit(A64_STP(tmp5, tmp6, tmp10, -32), ctx); + emit(A64_STP(tmp7, tmp8, tmp10, -16), ctx); + break; + case 128: + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_LDP(tmp3, tmp4, src, 16), ctx); + emit(A64_LDP(tmp5, tmp6, src, 32), ctx); + emit(A64_LDP(tmp7, tmp8, src, 48), ctx); + + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + emit(A64_STP(tmp3, tmp4, dst, 16), ctx); + emit(A64_STP(tmp5, tmp6, dst, 32), ctx); + emit(A64_STP(tmp7, tmp8, dst, 48), ctx); + + emit(A64_LDP(tmp1, tmp2, src, 64), ctx); + emit(A64_LDP(tmp3, tmp4, src, 80), ctx); + emit(A64_LDP(tmp5, tmp6, src, 96), ctx); + emit(A64_LDP(tmp7, tmp8, src, 112), ctx); + + emit(A64_STP(tmp1, tmp2, dst, 64), ctx); + emit(A64_STP(tmp3, tmp4, dst, 80), ctx); + emit(A64_STP(tmp5, tmp6, dst, 96), ctx); + emit(A64_STP(tmp7, tmp8, dst, 112), ctx); + break; + default: + emit_call((u64)bpf_ext_memcpy, ctx); + break; + } +} +#endif + /* JITs an eBPF instruction. * Returns: * 0 - successfully JITed an 8-byte eBPF instruction. @@ -915,6 +1167,13 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool func_addr_fixed; u64 func_addr; +#ifdef CONFIG_HISOCK + if (insn->src_reg == 0 && insn->imm == BPF_FUNC_ext_memcpy) { + emit_memcpy(ctx, insn->off); + break; + } +#endif + ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &func_addr, &func_addr_fixed); if (ret < 0) @@ -1461,6 +1720,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) return prog; } +#ifdef CONFIG_HISOCK +bool bpf_jit_supports_ext_helper(void) +{ + return true; +} +#endif + u64 bpf_jit_alloc_exec_limit(void) { return BPF_JIT_REGION_SIZE; diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index db3e2d29b94e1d72d19647113e3b047dd2381431..48cdc46b5d335bc0d7f3393fd488b881db386b02 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -1781,6 +1781,7 @@ CONFIG_BQL=y CONFIG_BPF_JIT=y CONFIG_BPF_STREAM_PARSER=y CONFIG_EULER_SOCKETMAP=y +# CONFIG_HISOCK is not set CONFIG_NET_FLOW_LIMIT=y # diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 8347817d713cb3fc5dafea2b9745ebac857a1ed2..948ab36ae29e31267649fe3374f72f8b4bd6a029 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -50,7 +50,12 @@ enum cgroup_bpf_attach_type { CGROUP_INET6_GETSOCKNAME, CGROUP_INET_SOCK_RELEASE, #ifdef CONFIG_KABI_RESERVE +#ifdef CONFIG_HISOCK + KABI_BROKEN_REMOVE_ENUM(CGROUP_ATTACH_TYPE_KABI_RESERVE_1) + KABI_BROKEN_INSERT_ENUM(HISOCK_EGRESS) +#else CGROUP_ATTACH_TYPE_KABI_RESERVE_1, +#endif CGROUP_ATTACH_TYPE_KABI_RESERVE_2, CGROUP_ATTACH_TYPE_KABI_RESERVE_3, CGROUP_ATTACH_TYPE_KABI_RESERVE_4, @@ -58,6 +63,10 @@ enum cgroup_bpf_attach_type { CGROUP_ATTACH_TYPE_KABI_RESERVE_6, CGROUP_ATTACH_TYPE_KABI_RESERVE_7, CGROUP_ATTACH_TYPE_KABI_RESERVE_8, +#else +#ifdef CONFIG_HISOCK + HISOCK_EGRESS, +#endif #endif MAX_CGROUP_BPF_ATTACH_TYPE }; @@ -92,6 +101,9 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type) CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE); +#ifdef CONFIG_HISOCK + CGROUP_ATYPE(HISOCK_EGRESS); +#endif default: return CGROUP_BPF_ATTACH_TYPE_INVALID; } @@ -237,6 +249,11 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, int __user *optlen, int max_optlen, int retval); +#ifdef CONFIG_HISOCK +int __cgroup_bpf_run_hisock_egress(struct sock *sk, struct sk_buff *skb, + enum cgroup_bpf_attach_type atype); +#endif + static inline enum bpf_cgroup_storage_type cgroup_storage_type( struct bpf_map *map) { @@ -446,6 +463,21 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, __ret; \ }) +#ifdef CONFIG_HISOCK +#define BPF_CGROUP_RUN_PROG_HISOCK_EGRESS(sk, skb) \ +({ \ + int __ret = HISOCK_PASS; \ + if (cgroup_bpf_enabled(HISOCK_EGRESS) && \ + sk && sk == skb->sk) { \ + typeof(sk) __sk = sk_to_full_sk(sk); \ + if (sk_fullsock(__sk)) \ + __ret = __cgroup_bpf_run_hisock_egress(__sk, skb, \ + HISOCK_EGRESS); \ + } \ + __ret; \ +}) +#endif + int cgroup_bpf_prog_attach(const union bpf_attr *attr, enum bpf_prog_type ptype, struct bpf_prog *prog); int cgroup_bpf_prog_detach(const union bpf_attr *attr, @@ -526,6 +558,9 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, optlen, max_optlen, retval) ({ retval; }) #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \ kernel_optval) ({ 0; }) +#ifdef CONFIG_HISOCK +#define BPF_CGROUP_RUN_PROG_HISOCK_EGRESS(sk, skb) ({ HISOCK_PASS; }) +#endif #define for_each_cgroup_storage_type(stype) for (; false; ) diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 57954e35fd36e970ae3258eb5855643d690e196d..188c19e1142d537732652e5229b21168b4a937ad 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -17,6 +17,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock, struct bpf_sock, struct sock) BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr, struct bpf_sock_addr, struct bpf_sock_addr_kern) +#ifdef CONFIG_HISOCK +BPF_PROG_TYPE(BPF_PROG_TYPE_HISOCK, hisock, + struct __sk_buff, struct sk_buff) +#endif #endif BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in, struct __sk_buff, struct sk_buff) diff --git a/include/linux/filter.h b/include/linux/filter.h index 758adb32d3521d579f5f3460d9112ef8a940da56..602d2f358eb040d02668e40b3b2b8b2004250543 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -922,6 +922,9 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); void bpf_jit_compile(struct bpf_prog *prog); bool bpf_jit_needs_zext(void); +#ifdef CONFIG_HISOCK +bool bpf_jit_supports_ext_helper(void); +#endif u64 bpf_arch_uaddress_limit(void); bool bpf_helper_changes_pkt_data(void *func); diff --git a/include/net/xdp.h b/include/net/xdp.h index 90064cdab5197a55b935aaeba587226fdd3d66b8..66434c2eae0de93aed244aad163789f0fed065e7 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -107,6 +107,11 @@ static __always_inline void xdp_buff_clear_frags_flag(struct xdp_buff *xdp) } #endif +struct hisock_xdp_buff { + struct xdp_buff xdp; + struct sk_buff *skb; +}; + /* Reserve memory area at end-of data area. * * This macro reserves tailroom in the XDP buffer by limiting the diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ab746f19f6246254f00c2f530dcc9fc0e96def30..a83e3fac7e01192063bb98529ebc18dc208a65d4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -202,6 +202,7 @@ enum bpf_prog_type { #ifndef __GENKSYMS__ BPF_PROG_TYPE_SCHED, BPF_PROG_TYPE_NET_GLOBAL, + BPF_PROG_TYPE_HISOCK, #endif }; @@ -250,6 +251,7 @@ enum bpf_attach_type { BPF_GNET_SK_DST_SET, BPF_GNET_RCV_NIC_NODE, BPF_GNET_SEND_NIC_NODE, + BPF_HISOCK_EGRESS, #endif __MAX_BPF_ATTACH_TYPE }; @@ -3940,6 +3942,37 @@ union bpf_attr { * set prefer cpumask for the task. * Return * 0 on success, or a negative error in case of failure. + * + * void *bpf_get_ingress_dst(struct bpf_sock_ops *skops) + * Description + * Get the ingress dst entry of the full sock. + * Return + * Valid ingress dst on success, or negative error + * in case of failure. + * + * int bpf_set_ingress_dst(struct xdp_buff *xdp, void *dst) + * Description + * Set valid ingress dst entry to the skb associated + * with xdp_buff. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_change_skb_dev(void *ctx, u32 ifindex) + * Description + * Change ingress or egress device of the associated skb. + * Supports only BPF_PROG_TYPE_HISOCK and BPF_PROG_TYPE_XDP + * program types. + * + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct __sk_buff** hisock_egress programs. + * Return + * 0 on success, or negative error in case of failure. + * + * int bpf_ext_memcpy(void *dst, size_t dst_sz, const void *src, size_t src_sz) + * Description + * Copy *src_sz* bytes from *src* to *dst* if *dst_sz* >= *src_sz*. + * Return + * 0 on success, or negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4118,6 +4151,10 @@ union bpf_attr { FN(get_node_stats), \ FN(sched_net_rship_submit), \ FN(sched_set_task_prefer_cpumask), \ + FN(get_ingress_dst), \ + FN(set_ingress_dst), \ + FN(change_skb_dev), \ + FN(ext_memcpy), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -4485,6 +4522,7 @@ enum xdp_action { XDP_PASS, XDP_TX, XDP_REDIRECT, + XDP_HISOCK_REDIRECT = 100, }; /* user accessible metadata for XDP packet hook @@ -5283,4 +5321,11 @@ struct bpf_gnet_ctx { __u64 rx_dev_netns_cookie; }; +enum hisock_action { + HISOCK_PASS, + HISOCK_DROP, + HISOCK_REDIRECT, + __MAX_HISOCK_ACTION, +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 3b9addda876535b86f67085fffd07bdae4c32c85..41bd6032c6b47d4db4488d1f615aed45827e6dec 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -488,6 +488,12 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, */ return -EPERM; +#ifdef CONFIG_HISOCK + /* Only one bpf program can be attached to HISOCK_EGRESS */ + if (atype == HISOCK_EGRESS && prog_list_length(progs) >= 1) + return -EEXIST; +#endif + if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) return -E2BIG; @@ -1221,6 +1227,43 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); +#ifdef CONFIG_HISOCK +int __cgroup_bpf_run_hisock_egress(struct sock *sk, struct sk_buff *skb, + enum cgroup_bpf_attach_type atype) +{ + struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); + struct bpf_prog_array_item *item; + struct bpf_prog *prog; + struct bpf_prog_array *array; + struct bpf_run_ctx *old_run_ctx; + struct bpf_cg_run_ctx run_ctx; + void *saved_data_end; + u32 ret = HISOCK_PASS; + + bpf_compute_and_save_data_end(skb, &saved_data_end); + + migrate_disable(); + rcu_read_lock(); + array = rcu_dereference(cgrp->bpf.effective[atype]); + item = &array->items[0]; + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); + /* Only one bpf program can be attached to HISOCK_EGRESS */ + prog = READ_ONCE(item->prog); + if (prog) { + run_ctx.prog_item = item; + ret = __bpf_prog_run_save_cb(prog, skb); + } + bpf_reset_run_ctx(old_run_ctx); + rcu_read_unlock(); + migrate_enable(); + + bpf_restore_data_end(skb, saved_data_end); + + return ret < __MAX_HISOCK_ACTION ? ret : -EPERM; +} +EXPORT_SYMBOL(__cgroup_bpf_run_hisock_egress); +#endif + int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, short access, enum cgroup_bpf_attach_type atype) { diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index bc9bc96c0c4b36586759fd83d08da96d80cc6ff3..987079bdf2b4568ab52efa48bc69dadfbb40515e 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2358,6 +2358,13 @@ bool __weak bpf_jit_needs_zext(void) return false; } +#ifdef CONFIG_HISOCK +bool __weak bpf_jit_supports_ext_helper(void) +{ + return false; +} +#endif + /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call * skb_copy_bits(), so provide a weak definition of it for NET-less config. */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 205c880d9e0148a23f3a59ac4ae077caff25ec26..be142202bb6f7adc7db09af1da0468e42b9fe31d 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -665,6 +665,29 @@ const struct bpf_func_proto bpf_this_cpu_ptr_proto = { .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, }; +#ifdef CONFIG_HISOCK +BPF_CALL_4(bpf_ext_memcpy, void *, dst, size_t, dst_sz, + const void *, src, size_t, src_sz) +{ + if (dst_sz < src_sz) + return -EINVAL; + + memcpy(dst, src, src_sz); + return 0; +} + +const struct bpf_func_proto bpf_ext_memcpy_proto = { + .func = bpf_ext_memcpy, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_MEM | MEM_UNINIT, + .arg2_type = ARG_CONST_SIZE, + .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg4_type = ARG_CONST_SIZE, +}; +#endif + const struct bpf_func_proto bpf_get_current_task_proto __weak; const struct bpf_func_proto bpf_probe_read_user_proto __weak; const struct bpf_func_proto bpf_probe_read_user_str_proto __weak; @@ -721,6 +744,10 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_sched_tg_tag_of_proto; case BPF_FUNC_sched_task_tag_of: return &bpf_sched_task_tag_of_proto; +#ifdef CONFIG_HISOCK + case BPF_FUNC_ext_memcpy: + return &bpf_ext_memcpy_proto; +#endif default: break; } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b9846d8a8bde837d94bd922d32ffdc79066c3542..3b414e36df6b3268fc9013da8c00d052085e5646 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2111,6 +2111,9 @@ static bool is_net_admin_prog_type(enum bpf_prog_type prog_type) case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: case BPF_PROG_TYPE_CGROUP_SOCKOPT: case BPF_PROG_TYPE_CGROUP_SYSCTL: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_EXT: /* extends any prog */ #ifdef CONFIG_BPF_NET_GLOBAL_PROG @@ -3002,6 +3005,10 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) case BPF_CGROUP_GETSOCKOPT: case BPF_CGROUP_SETSOCKOPT: return BPF_PROG_TYPE_CGROUP_SOCKOPT; +#ifdef CONFIG_HISOCK + case BPF_HISOCK_EGRESS: + return BPF_PROG_TYPE_HISOCK; +#endif case BPF_TRACE_ITER: return BPF_PROG_TYPE_TRACING; case BPF_SK_LOOKUP: @@ -3098,6 +3105,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_SOCKOPT: case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif ret = cgroup_bpf_prog_attach(attr, ptype, prog); break; #ifdef CONFIG_BPF_NET_GLOBAL_PROG @@ -3140,6 +3150,9 @@ static int bpf_prog_detach(const union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_SOCKOPT: case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif return cgroup_bpf_prog_detach(attr, ptype); #ifdef CONFIG_BPF_NET_GLOBAL_PROG case BPF_PROG_TYPE_NET_GLOBAL: @@ -3186,6 +3199,9 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_CGROUP_SYSCTL: case BPF_CGROUP_GETSOCKOPT: case BPF_CGROUP_SETSOCKOPT: +#ifdef CONFIG_HISOCK + case BPF_HISOCK_EGRESS: +#endif return cgroup_bpf_prog_query(attr, uattr); case BPF_LIRC_MODE2: return lirc_prog_query(attr, uattr); @@ -4169,6 +4185,9 @@ static int link_create(union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_DEVICE: case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_CGROUP_SOCKOPT: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif ret = cgroup_bpf_link_attach(attr, prog); break; case BPF_PROG_TYPE_TRACING: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 11414b1efc50437294fd5159d0143649f75ae562..5d9f2f67165358842484c01a95e1adb716ddbd07 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3359,6 +3359,9 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, return true; case BPF_PROG_TYPE_CGROUP_SOCKOPT: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif if (t == BPF_WRITE) env->seen_direct_write = true; @@ -5896,6 +5899,21 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn return -EINVAL; } +#ifdef CONFIG_HISOCK + if (func_id == BPF_FUNC_ext_memcpy) { + /* XXX: cleanup & check if allowed to access dst mem */ + u32 regno = BPF_REG_1 + 3; + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + struct bpf_insn *insn = &env->prog->insnsi[env->insn_idx]; + + if (!bpf_jit_supports_ext_helper() || + reg->umax_value <= 0 || reg->umax_value > 4096) + return -ENOTSUPP; + + insn->off = reg->umax_value; + } +#endif + /* reset caller saved regs */ for (i = 0; i < CALLER_SAVED_REGS; i++) { mark_reg_not_init(env, regs, caller_saved[i]); @@ -10497,7 +10515,11 @@ static int do_check(struct bpf_verifier_env *env) env->jmps_processed++; if (opcode == BPF_CALL) { if (BPF_SRC(insn->code) != BPF_K || +#ifdef CONFIG_HISOCK + (insn->off != 0 && insn->imm != BPF_FUNC_ext_memcpy) || +#else insn->off != 0 || +#endif (insn->src_reg != BPF_REG_0 && insn->src_reg != BPF_PSEUDO_CALL) || insn->dst_reg != BPF_REG_0 || @@ -12223,6 +12245,12 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) continue; } +#ifdef CONFIG_HISOCK + /* will fixup bpf extension helper in jit */ + if (insn->imm == BPF_FUNC_ext_memcpy) + continue; +#endif + patch_call_imm: fn = env->ops->get_func_proto(insn->imm, env->prog); /* all functions that have prototype and verifier allowed diff --git a/net/Kconfig b/net/Kconfig index 071f8ee3b89fdd66c325097d937cc45ccbbf9298..d6d925d0712f963ffd4409ddee5a16fe25c23059 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -329,6 +329,16 @@ config EULER_SOCKETMAP help Enabling this support socket map in EulerOS. +config HISOCK + bool "enable HiSock Redirect Framework" + depends on INET + depends on CGROUP_BPF + depends on BPF_SYSCALL + default n + help + Enalbe HiSock, which bypasses net filter rules for specific + connections selected by bpf prog on both TX and RX directions. + config NET_FLOW_LIMIT bool depends on RPS diff --git a/net/core/dev.c b/net/core/dev.c index eda1975ef55b2019e53be3a63a513c872f0f7856..a0567446f9690bcc4100549495036b2985ab3a9d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4968,6 +4968,10 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, if (metalen) skb_metadata_set(skb, metalen); break; +#ifdef CONFIG_HISOCK + case XDP_HISOCK_REDIRECT: + break; +#endif default: bpf_warn_invalid_xdp_action(act); fallthrough; @@ -5008,27 +5012,91 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) } } +#ifdef CONFIG_HISOCK +static void generic_xdp_hisock_redirect(struct sk_buff *skb) +{ + const struct iphdr *iph; + u32 len; + + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + goto out; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto free_skb; + + iph = ip_hdr(skb); + if (iph->ihl < 5 || iph->version != 4) + goto free_skb; + + if (!pskb_may_pull(skb, iph->ihl * 4)) + goto free_skb; + + iph = ip_hdr(skb); + if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) + goto free_skb; + + len = ntohs(iph->tot_len); + if (skb->len < len || len < (iph->ihl * 4)) + goto free_skb; + + if (pskb_trim_rcsum(skb, len)) + goto free_skb; + + iph = ip_hdr(skb); + skb->transport_header = skb->network_header + iph->ihl * 4; + + skb_orphan(skb); + + if (!skb_valid_dst(skb)) { + if (ip_route_input_noref(skb, iph->daddr, iph->saddr, + iph->tos, skb->dev)) + goto free_skb; + } + + __skb_pull(skb, skb_network_header_len(skb)); + + rcu_read_lock(); + ip_protocol_deliver_rcu(dev_net(skb->dev), skb, iph->protocol); + rcu_read_unlock(); + + return; + +free_skb: + kfree_skb(skb); +out: + return; +} +#endif + static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key); int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb) { if (xdp_prog) { - struct xdp_buff xdp; + struct hisock_xdp_buff hxdp; + struct xdp_buff *xdp = &hxdp.xdp; u32 act; int err; - act = netif_receive_generic_xdp(skb, &xdp, xdp_prog); + hxdp.skb = skb; + act = netif_receive_generic_xdp(skb, xdp, xdp_prog); if (act != XDP_PASS) { switch (act) { case XDP_REDIRECT: err = xdp_do_generic_redirect(skb->dev, skb, - &xdp, xdp_prog); + xdp, xdp_prog); if (err) goto out_redir; break; case XDP_TX: generic_xdp_tx(skb, xdp_prog); break; +#ifdef CONFIG_HISOCK + case XDP_HISOCK_REDIRECT: + generic_xdp_hisock_redirect(skb); + break; +#endif } return XDP_DROP; } diff --git a/net/core/filter.c b/net/core/filter.c index 62d09520a55d73037efebe0f7f30fc1a4241c7c0..65e12bf22a68e3af8e952d0d04fa41a6addf1fd9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3691,6 +3691,30 @@ static const struct bpf_func_proto bpf_skb_adjust_room_proto = { .arg4_type = ARG_ANYTHING, }; +#ifdef CONFIG_HISOCK +BPF_CALL_2(bpf_skb_change_skb_dev, struct sk_buff *, skb, u32, ifindex) +{ + struct net_device *dev; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + dev = dev_get_by_index_rcu(&init_net, ifindex); + if (!dev) + return -ENODEV; + + skb->dev = dev; + return 0; +} + +static const struct bpf_func_proto bpf_skb_change_skb_dev_proto = { + .func = bpf_skb_change_skb_dev, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; +#endif + static u32 __bpf_skb_min_len(const struct sk_buff *skb) { u32 min_len = skb_network_offset(skb); @@ -6360,6 +6384,64 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = { .arg5_type = ARG_ANYTHING, }; +#ifdef CONFIG_HISOCK +BPF_CALL_2(bpf_xdp_set_ingress_dst, struct xdp_buff *, xdp, void *, dst) +{ + struct hisock_xdp_buff *hxdp = (struct hisock_xdp_buff *)xdp; + struct dst_entry *_dst = (struct dst_entry *)dst; + + if (!hxdp->skb) + return -EOPNOTSUPP; + + if (!_dst || !virt_addr_valid(_dst)) + return -EFAULT; + + /* same as skb_valid_dst */ + if (_dst->flags & DST_METADATA) + return -EINVAL; + + skb_dst_set_noref(hxdp->skb, _dst); + return 0; +} + +static const struct bpf_func_proto bpf_xdp_set_ingress_dst_proto = { + .func = bpf_xdp_set_ingress_dst, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; +#endif + +#ifdef CONFIG_HISOCK +BPF_CALL_2(bpf_xdp_change_skb_dev, struct xdp_buff *, xdp, u32, ifindex) +{ + struct hisock_xdp_buff *hxdp = (void *)xdp; + struct net_device *dev; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + if (!hxdp->skb) + return -EOPNOTSUPP; + + dev = dev_get_by_index_rcu(&init_net, ifindex); + if (!dev) + return -ENODEV; + + hxdp->skb->dev = dev; + return 0; +} + +static const struct bpf_func_proto bpf_xdp_change_skb_dev_proto = { + .func = bpf_xdp_change_skb_dev, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; +#endif + BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx, struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) { @@ -7066,6 +7148,34 @@ static const struct bpf_func_proto bpf_sock_ops_reserve_hdr_opt_proto = { .arg3_type = ARG_ANYTHING, }; +#ifdef CONFIG_HISOCK +BTF_ID_LIST_SINGLE(btf_dst_entity_ids, struct, dst_entry) +BPF_CALL_1(bpf_sock_ops_get_ingress_dst, struct bpf_sock_ops_kern *, sops) +{ + struct sock *sk = sops->sk; + struct dst_entry *dst; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + if (!sk || !sk_fullsock(sk)) + return (unsigned long)NULL; + + dst = rcu_dereference(sk->sk_rx_dst); + if (dst) + dst = dst_check(dst, 0); + + return (unsigned long)dst; +} + +const struct bpf_func_proto bpf_sock_ops_get_ingress_dst_proto = { + .func = bpf_sock_ops_get_ingress_dst, + .gpl_only = false, + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .ret_btf_id = &btf_dst_entity_ids[0], +}; +#endif + #endif /* CONFIG_INET */ bool bpf_helper_changes_pkt_data(void *func) @@ -7292,6 +7402,31 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) } } +#ifdef CONFIG_HISOCK +static const struct bpf_func_proto * +hisock_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + switch (func_id) { + case BPF_FUNC_skb_store_bytes: + return &bpf_skb_store_bytes_proto; + case BPF_FUNC_skb_load_bytes: + return &bpf_skb_load_bytes_proto; + case BPF_FUNC_skb_pull_data: + return &bpf_skb_pull_data_proto; + case BPF_FUNC_skb_change_tail: + return &bpf_skb_change_tail_proto; + case BPF_FUNC_skb_change_head: + return &bpf_skb_change_head_proto; + case BPF_FUNC_skb_adjust_room: + return &bpf_skb_adjust_room_proto; + case BPF_FUNC_change_skb_dev: + return &bpf_skb_change_skb_dev_proto; + default: + return bpf_base_func_proto(func_id); + } +} +#endif + static const struct bpf_func_proto * tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -7435,6 +7570,12 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_xdp_adjust_tail_proto; case BPF_FUNC_fib_lookup: return &bpf_xdp_fib_lookup_proto; +#ifdef CONFIG_HISOCK + case BPF_FUNC_set_ingress_dst: + return &bpf_xdp_set_ingress_dst_proto; + case BPF_FUNC_change_skb_dev: + return &bpf_xdp_change_skb_dev_proto; +#endif #ifdef CONFIG_INET case BPF_FUNC_sk_lookup_udp: return &bpf_xdp_sk_lookup_udp_proto; @@ -7496,6 +7637,10 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sock_ops_store_hdr_opt_proto; case BPF_FUNC_reserve_hdr_opt: return &bpf_sock_ops_reserve_hdr_opt_proto; +#ifdef CONFIG_HISOCK + case BPF_FUNC_get_ingress_dst: + return &bpf_sock_ops_get_ingress_dst_proto; +#endif case BPF_FUNC_tcp_sock: return &bpf_tcp_sock_proto; #endif /* CONFIG_INET */ @@ -7829,6 +7974,33 @@ static bool cg_skb_is_valid_access(int off, int size, return bpf_skb_is_valid_access(off, size, type, prog, info); } +#ifdef CONFIG_HISOCK +static bool hisock_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + switch (off) { + case bpf_ctx_range(struct __sk_buff, tc_classid): + case bpf_ctx_range(struct __sk_buff, data_meta): + case bpf_ctx_range(struct __sk_buff, tstamp): + case bpf_ctx_range(struct __sk_buff, wire_len): + return false; + } + + switch (off) { + case bpf_ctx_range(struct __sk_buff, data): + info->reg_type = PTR_TO_PACKET; + break; + case bpf_ctx_range(struct __sk_buff, data_end): + info->reg_type = PTR_TO_PACKET_END; + break; + } + + return bpf_skb_is_valid_access(off, size, type, prog, info); +} +#endif + static bool lwt_is_valid_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, @@ -9952,6 +10124,18 @@ const struct bpf_prog_ops cg_skb_prog_ops = { .test_run = bpf_prog_test_run_skb, }; +#ifdef CONFIG_HISOCK +const struct bpf_verifier_ops hisock_verifier_ops = { + .get_func_proto = hisock_func_proto, + .is_valid_access = hisock_is_valid_access, + .convert_ctx_access = bpf_convert_ctx_access, + .gen_prologue = bpf_noop_prologue, +}; + +const struct bpf_prog_ops hisock_prog_ops = { +}; +#endif + const struct bpf_verifier_ops lwt_in_verifier_ops = { .get_func_proto = lwt_in_func_proto, .is_valid_access = lwt_is_valid_access, diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 79ff136ae6f52d7dee849eaa4878091cbe7bfaa5..65f5f6a2b2d4478a4d276b8b675f66468db57e30 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -457,6 +457,55 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4) iph->daddr = fl4->daddr; } +#ifdef CONFIG_HISOCK +static int hisock_egress_redirect_xmit(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct netdev_queue *txq; + bool free_skb = true; + int cpu, rc; + + rcu_read_lock_bh(); + + txq = netdev_core_pick_tx(dev, skb, NULL); + cpu = smp_processor_id(); + HARD_TX_LOCK(dev, txq, cpu); + if (!netif_xmit_stopped(txq)) { + rc = netdev_start_xmit(skb, dev, txq, 0); + if (dev_xmit_complete(rc)) + free_skb = false; + } + HARD_TX_UNLOCK(dev, txq); + + rcu_read_unlock_bh(); + + if (free_skb) { + rc = -ENETDOWN; + kfree_skb(skb); + } + + return rc; +} + +static int do_hisock_egress_redirect(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct iphdr *iph; + + skb->protocol = htons(ETH_P_IP); + if (!skb->dev) + skb->dev = skb_dst(skb)->dev; + + if (skb_mac_header_was_set(skb)) + return hisock_egress_redirect_xmit(skb); + + iph = ip_hdr(skb); + iph->tot_len = htons(skb->len); + ip_send_check(iph); + + return ip_finish_output2(net, sk, skb); +} +#endif + /* Note: skb->sk can be different from sk, in case of tunnels */ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, __u8 tos) @@ -537,6 +586,23 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; +#ifdef CONFIG_HISOCK + res = BPF_CGROUP_RUN_PROG_HISOCK_EGRESS(sk, skb); + switch (res) { + case HISOCK_PASS: + break; + case HISOCK_REDIRECT: + res = do_hisock_egress_redirect(net, sk, skb); + rcu_read_unlock(); + return res; + case HISOCK_DROP: + default: + kfree_skb(skb); + rcu_read_unlock(); + return NET_XMIT_DROP; + } +#endif + res = ip_local_out(net, sk, skb); rcu_read_unlock(); return res; diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore index b2f29bc8dc43a6d8389f9cfa86fa829b16be9a12..730ce50266098c752863bf1af51a22ec6fefcd4f 100644 --- a/samples/bpf/.gitignore +++ b/samples/bpf/.gitignore @@ -52,3 +52,4 @@ xdp_tx_iptunnel xdpsock xsk_fwd testfile.img +hisock/hisock_cmd diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index aeebf5d12f32c09f7782e5f406fa715104ef7ac9..8935717b35614e12ddff2388bde73dfc828c7848 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -54,6 +54,7 @@ tprogs-y += task_fd_query tprogs-y += xdp_sample_pkts tprogs-y += ibumad tprogs-y += hbm +tprogs-y += hisock/hisock_cmd # Libbpf dependencies LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a @@ -111,6 +112,7 @@ task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS) xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS) ibumad-objs := bpf_load.o ibumad_user.o $(TRACE_HELPERS) hbm-objs := bpf_load.o hbm.o $(CGROUP_HELPERS) +hisock_cmd-objs := hisock/hisock_cmd.o # Tell kbuild to always build the programs always-y := $(tprogs-y) @@ -172,6 +174,7 @@ always-y += ibumad_kern.o always-y += hbm_out_kern.o always-y += hbm_edt_kern.o always-y += xdpsock_kern.o +always-y += hisock/bpf.o ifeq ($(ARCH), arm) # Strip all except -D__LINUX_ARM_ARCH__ option needed to handle linux diff --git a/samples/bpf/hisock/bpf.c b/samples/bpf/hisock/bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..b92c57ece6bf72e5bf15aff0191320b87274e383 --- /dev/null +++ b/samples/bpf/hisock/bpf.c @@ -0,0 +1,241 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * + * Description: End-to-End HiSock Redirect Framework sample. + */ +#include +#include +#include +#include +#include +#include + +#include +#include + +#define CSUM_SHIFT_BITS 16 + +#define SOCKOPS_SUCC 1 +#define SOCKOPS_FAIL 0 + +#define PORT_LOCAL 1 +#define PORT_REMOTE 2 + +#define MAX_NUMA 8 +#define MAX_CONN_NUMA 4096 +#define MAX_CONN (MAX_CONN_NUMA * MAX_NUMA * 2) + +struct sock_tuple { + u32 saddr; + u32 daddr; + u16 sport; + u16 dport; +}; + +struct sock_value { + struct ethhdr ingress_eth; + bool eth_updated; + u32 ingress_ifindex; + void *ingress_dst; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(struct sock_tuple)); + __uint(value_size, sizeof(struct sock_value)); + __uint(max_entries, MAX_CONN); +} connmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(u16)); + __uint(value_size, sizeof(u8)); + __uint(max_entries, 128); +} speed_port SEC(".maps"); + +static inline bool is_speed_flow(u32 local, u32 remote) +{ + u8 *val; + + val = bpf_map_lookup_elem(&speed_port, &local); + if (val && *val == PORT_LOCAL) + return true; + + val = bpf_map_lookup_elem(&speed_port, &remote); + if (val && *val == PORT_REMOTE) + return true; + + return false; +} + +SEC("hisock_sockops") +int hisock_sockops_prog(struct bpf_sock_ops *skops) +{ + struct sock_tuple key = { 0 }; + struct sock_value val = { 0 }; + void *dst; + + if (!is_speed_flow(skops->local_port, bpf_ntohl(skops->remote_port))) + return SOCKOPS_SUCC; + + switch (skops->op) { + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + dst = bpf_get_ingress_dst(skops); + if (!dst) + return SOCKOPS_FAIL; + + key.saddr = skops->remote_ip4; + key.sport = bpf_ntohl(skops->remote_port); + key.daddr = skops->local_ip4; + key.dport = skops->local_port; + + val.ingress_dst = dst; + bpf_map_update_elem(&connmap, &key, &val, BPF_ANY); + + bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG); + break; + + case BPF_SOCK_OPS_STATE_CB: + if (skops->args[1] != BPF_TCP_CLOSE_WAIT && + skops->args[1] != BPF_TCP_FIN_WAIT1 && + skops->args[1] != BPF_TCP_CLOSE) + break; + + key.saddr = skops->remote_ip4; + key.sport = bpf_ntohl(skops->remote_port); + key.daddr = skops->local_ip4; + key.dport = skops->local_port; + + bpf_map_delete_elem(&connmap, &key); + + bpf_sock_ops_cb_flags_set(skops, skops->bpf_sock_ops_cb_flags & + ~BPF_SOCK_OPS_STATE_CB_FLAG); + break; + } + + return SOCKOPS_SUCC; +} + +SEC("hisock_ingress") +int hisock_ingress_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct sock_tuple key = { 0 }; + struct sock_value *val; + struct ethhdr *ehdr; + struct tcphdr *thdr; + struct iphdr *ihdr; + + ehdr = (struct ethhdr *)data; + if (ehdr + 1 > data_end) + return XDP_PASS; + + if (ehdr->h_proto != bpf_htons(ETH_P_IP)) + return XDP_PASS; + + ihdr = (struct iphdr *)(ehdr + 1); + if (ihdr + 1 > data_end) + return XDP_PASS; + + if (ihdr->ihl != 5 || ihdr->protocol != IPPROTO_TCP) + return XDP_PASS; + + thdr = (struct tcphdr *)(ihdr + 1); + if (thdr + 1 > data_end) + return XDP_PASS; + + if (thdr->syn || thdr->fin || thdr->rst) + return XDP_PASS; + + key.saddr = ihdr->saddr; + key.sport = bpf_ntohs(thdr->source); + key.daddr = ihdr->daddr; + key.dport = bpf_ntohs(thdr->dest); + + val = bpf_map_lookup_elem(&connmap, &key); + if (!val) + return XDP_PASS; + + if (unlikely(!val->eth_updated)) { + bpf_ext_memcpy(val->ingress_eth.h_source, ETH_ALEN, + ehdr->h_dest, ETH_ALEN); + bpf_ext_memcpy(val->ingress_eth.h_dest, ETH_ALEN, + ehdr->h_source, ETH_ALEN); + val->ingress_eth.h_proto = ehdr->h_proto; + val->eth_updated = true; + } + + if (unlikely(!val->ingress_ifindex)) + val->ingress_ifindex = ctx->ingress_ifindex; + + if (likely(val->ingress_dst)) + bpf_set_ingress_dst(ctx, val->ingress_dst); + + return XDP_HISOCK_REDIRECT; +} + +static inline void ipv4_csum(struct iphdr *ihdr) +{ + u32 csum = 0; + u16 *next_ip_u16 = (u16 *)ihdr; + + ihdr->check = 0; + for (size_t i = 0; i < sizeof(struct iphdr) >> 1; i++) + csum += *next_ip_u16++; + + ihdr->check = ~((csum & 0xffff) + (csum >> CSUM_SHIFT_BITS)); +} + +SEC("hisock_egress") +int hisock_egress_prog(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct sock_tuple key = { 0 }; + struct sock_value *val; + struct ethhdr *ehdr; + struct iphdr *ihdr; + int ret; + + key.saddr = skb->remote_ip4; + key.sport = bpf_ntohl(skb->remote_port); + key.daddr = skb->local_ip4; + key.dport = skb->local_port; + + val = bpf_map_lookup_elem(&connmap, &key); + if (!val) + return HISOCK_PASS; + + if (unlikely(!val->eth_updated)) + goto redirect; + + ihdr = (struct iphdr *)data; + if (ihdr + 1 > data_end) + return HISOCK_PASS; + + ihdr->tot_len = bpf_htons(skb->len); + ipv4_csum(ihdr); + + ret = bpf_skb_change_head(skb, ETH_HLEN, 0); + if (ret < 0) + goto redirect; + + data = (void *)(long)skb->data; + data_end = (void *)(long)skb->data_end; + + ehdr = (struct ethhdr *)data; + if (ehdr + 1 > data_end) + return HISOCK_DROP; + + bpf_ext_memcpy(ehdr, ETH_HLEN, &val->ingress_eth, ETH_HLEN); +redirect: + if (likely(val->ingress_ifindex)) + bpf_change_skb_dev(skb, val->ingress_ifindex); + + return HISOCK_REDIRECT; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/hisock/hisock_cmd.c b/samples/bpf/hisock/hisock_cmd.c new file mode 100644 index 0000000000000000000000000000000000000000..f0a74d8a7a3d84d42801a47794393ee535ef47ca --- /dev/null +++ b/samples/bpf/hisock/hisock_cmd.c @@ -0,0 +1,405 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * + * Description: End-to-End HiSock Network Boost sample. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "bpf_util.h" +#include +#include + +#define DEF_BPF_PATH "bpf.o" +#define PORT_LOCAL 1 +#define PORT_REMOTE 2 +#define MAX_IF_NUM 8 + +struct { + __u32 ifindex[MAX_IF_NUM]; + int if_num; + char *local_port; + char *remote_port; + char *cgrp_path; + char *bpf_path; + bool unload; + bool help; +} hisock; + +struct hisock_prog_info { + const char *sec_name; + enum bpf_prog_type prog_type; + enum bpf_attach_type attach_type; + int attach_flag; + int prog_fd; + bool is_xdp; +}; + +static struct hisock_prog_info prog_infos[] = { + { + .sec_name = "hisock_sockops", + .prog_type = BPF_PROG_TYPE_SOCK_OPS, + .attach_type = BPF_CGROUP_SOCK_OPS, + .attach_flag = 0, + .is_xdp = false, + }, + { + .sec_name = "hisock_ingress", + .prog_type = BPF_PROG_TYPE_XDP, + .attach_type = BPF_XDP, + .attach_flag = XDP_FLAGS_SKB_MODE, + .is_xdp = true, + }, + { + .sec_name = "hisock_egress", + .prog_type = BPF_PROG_TYPE_HISOCK, + .attach_type = BPF_HISOCK_EGRESS, + .attach_flag = 0, + .is_xdp = false, + }, +}; + +static int set_prog_type(struct bpf_object *obj) +{ + enum bpf_attach_type attach_type; + enum bpf_prog_type prog_type; + struct bpf_program *prog; + const char *sec_name; + int i; + + bpf_object__for_each_program(prog, obj) { + sec_name = bpf_program__section_name(prog); + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + if (!strcmp(prog_infos[i].sec_name, sec_name)) { + prog_type = prog_infos[i].prog_type; + attach_type = prog_infos[i].attach_type; + break; + } + } + + if (i == ARRAY_SIZE(prog_infos)) + return -1; + + bpf_program__set_type(prog, prog_type); + bpf_program__set_expected_attach_type(prog, attach_type); + } + + return 0; +} + +static int find_progs(struct bpf_object *obj) +{ + struct hisock_prog_info *info; + struct bpf_program *prog; + int i, prog_fd; + + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + info = &prog_infos[i]; + prog = bpf_object__find_program_by_title(obj, info->sec_name); + if (!prog) { + fprintf(stderr, "ERROR: failed to find prog sec %s\n", info->sec_name); + return -1; + } + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + fprintf(stderr, "ERROR: failed to get fd of prog %s\n", info->sec_name); + return -1; + } + + info->prog_fd = prog_fd; + } + + return 0; +} + +static int parse_port_range(const char *port_str, __u8 status, int map_fd) +{ + char *str = strdup(port_str); + char *token, *rest = str; + __u16 port; + + while ((token = strtok_r(rest, ",", &rest))) { + char *dash = strchr(token, '-'); + + if (dash) { + *dash = '\0'; + __u16 start = atoi(token); + __u16 end = atoi(dash + 1); + + if (start > end || start == 0 || end > 65535) { + fprintf(stderr, "Invalid port range: %s\n", token); + return -1; + } + + for (port = start; port <= end; port++) + bpf_map_update_elem(map_fd, &port, &status, BPF_ANY); + + printf("Speed port range %u-%u:%u\n", start, end, status); + } else { + port = atoi(token); + if (port == 0 || port > 65535) { + fprintf(stderr, "Invalid port: %s\n", token); + return -1; + } + bpf_map_update_elem(map_fd, &port, &status, BPF_ANY); + printf("Speed port %u:%u\n", port, status); + } + } + + free(str); + return 0; +} + +static int set_speed_port(struct bpf_object *obj) +{ + int map_fd; + + map_fd = bpf_object__find_map_fd_by_name(obj, "speed_port"); + if (map_fd < 0) { + fprintf(stderr, "ERROR: failed to find map fd\n"); + return -1; + } + + if (hisock.local_port && + parse_port_range(hisock.local_port, PORT_LOCAL, map_fd)) { + fprintf(stderr, "ERROR: failed to update local port\n"); + return -1; + } + + if (hisock.remote_port && + parse_port_range(hisock.remote_port, PORT_REMOTE, map_fd)) { + fprintf(stderr, "ERROR: failed to update remote port\n"); + return -1; + } + + return 0; +} + +static int detach_progs(void) +{ + struct hisock_prog_info *info; + int i, j, cgrp_fd; + int err_cnt = 0; + + cgrp_fd = open(hisock.cgrp_path, O_DIRECTORY, O_RDONLY); + if (cgrp_fd < 0) { + fprintf(stderr, "ERROR: failed to open cgrp %s\n", hisock.cgrp_path); + return -1; + } + + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + info = &prog_infos[i]; + if (info->is_xdp) { + for (j = 0; j < hisock.if_num; j++) { + if (bpf_set_link_xdp_fd(hisock.ifindex[j], -1, + info->attach_flag)) { + fprintf(stderr, + "ERROR: failed to detach prog %s\n", + info->sec_name); + err_cnt++; + } + } + continue; + } + + if (bpf_prog_detach(cgrp_fd, info->attach_type)) { + fprintf(stderr, "ERROR: failed to detach prog %s\n", info->sec_name); + err_cnt++; + } + } + + close(cgrp_fd); + return -err_cnt; +} + +static int attach_progs(void) +{ + struct hisock_prog_info *info; + int i, j, cgrp_fd; + + cgrp_fd = open(hisock.cgrp_path, O_DIRECTORY, O_RDONLY); + if (cgrp_fd < 0) { + fprintf(stderr, "ERROR: failed to open cgrp %s\n", hisock.cgrp_path); + return -1; + } + + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + info = &prog_infos[i]; + if (info->is_xdp) { + for (j = 0; j < hisock.if_num; j++) { + if (bpf_set_link_xdp_fd(hisock.ifindex[j], info->prog_fd, + info->attach_flag)) + goto fail; + } + continue; + } + + if (bpf_prog_attach(info->prog_fd, cgrp_fd, info->attach_type, + info->attach_flag)) + goto fail; + } + + close(cgrp_fd); + return 0; +fail: + fprintf(stderr, "ERROR: failed to attach prog %s\n", info->sec_name); + close(cgrp_fd); + detach_progs(); + return -1; +} + +static int do_hisock(void) +{ + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + struct bpf_object *obj; + + setrlimit(RLIMIT_MEMLOCK, &r); + + obj = bpf_object__open(hisock.bpf_path); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: failed to open bpf file\n"); + return -1; + } + + if (set_prog_type(obj)) { + fprintf(stderr, "ERROR: failed to set prog type\n"); + bpf_object__close(obj); + return -1; + } + + if (bpf_object__load(obj)) { + fprintf(stderr, "ERROR: failed to load bpf obj\n"); + bpf_object__close(obj); + return -1; + } + + if (find_progs(obj)) { + fprintf(stderr, "ERROR: failed to find progs\n"); + bpf_object__close(obj); + return -1; + } + + if (set_speed_port(obj)) { + fprintf(stderr, "ERROR: failed to set speed port\n"); + bpf_object__close(obj); + return -1; + } + + if (attach_progs()) { + fprintf(stderr, "ERROR: failed to attach progs\n"); + bpf_object__close(obj); + return -1; + } + + bpf_object__close(obj); + return 0; +} + +static void do_help(void) +{ + fprintf(stderr, + "Load: hisock_cmd [-f BPF_FILE] [-c CGRP_PATH] " + "[-p LOCAL_PORT] [-r REMOTE_PORT] [-i INTERFACE]\n" + "Unload: hisock_cmd -u [-c CGRP_PATH] [-i INTERFACE]\n"); +} + +static int parse_args(int argc, char **argv) +{ + char *ifname; + int opt; + + hisock.bpf_path = DEF_BPF_PATH; + hisock.if_num = 0; + + while ((opt = getopt(argc, argv, "f:c:p:r:i:uh")) != -1) { + switch (opt) { + case 'f': + hisock.bpf_path = optarg; + break; + case 'c': + hisock.cgrp_path = optarg; + break; + case 'p': + hisock.local_port = optarg; + break; + case 'r': + hisock.remote_port = optarg; + break; + case 'i': + ifname = optarg; + hisock.ifindex[hisock.if_num] = if_nametoindex(ifname); + hisock.if_num++; + break; + case 'u': + hisock.unload = true; + break; + case 'h': + hisock.help = true; + break; + default: + fprintf(stderr, "ERROR: unknown option %c\n", opt); + return -1; + } + } + + if (hisock.cgrp_path == NULL || + hisock.if_num == 0 || + (!hisock.unload && + hisock.local_port == NULL && + hisock.remote_port == NULL)) { + do_help(); + return -1; + } + + return 0; +} + +int main(int argc, char **argv) +{ + if (parse_args(argc, argv)) { + fprintf(stderr, "ERROR: failed to parse args\n"); + return -1; + } + + if (hisock.help) { + do_help(); + return 0; + } + + if (hisock.unload) { + if (detach_progs()) { + fprintf(stderr, "ERROR: failed to detach progs\n"); + return -1; + } + + printf("Unload HiSock successfully\n"); + return 0; + } + + if (do_hisock()) { + fprintf(stderr, "ERROR: failed to do hisock\n"); + return -1; + } + + printf("Load HiSock successfully\n"); + return 0; +} diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index b5502a7b02f8e8390fd75c4c901062ca5de60dd6..c16dc2ba847e238fdddef67843f1af7be8a7d3ed 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -67,6 +67,7 @@ const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = { [BPF_LSM_MAC] = "lsm_mac", [BPF_SK_LOOKUP] = "sk_lookup", [BPF_SCHED] = "sched", + [BPF_HISOCK_EGRESS] = "hisock_egress", }; void p_err(const char *fmt, ...) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 22be05e8dbb40eced837960950affe8d3180e265..447b5ead97c58df5457f24f4696e122798ac0b4d 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -66,6 +66,7 @@ const char * const prog_type_name[] = { [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", [BPF_PROG_TYPE_SCHED] = "sched", [BPF_PROG_TYPE_NET_GLOBAL] = "gnet", + [BPF_PROG_TYPE_HISOCK] = "hisock", }; const size_t prog_type_name_size = ARRAY_SIZE(prog_type_name); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 77317ffb64e23c980dced0d8a9de460dac29dcb6..cab6526158fd9970592570892649ff868e313c79 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -202,6 +202,7 @@ enum bpf_prog_type { #ifndef __GENKSYMS__ BPF_PROG_TYPE_SCHED, BPF_PROG_TYPE_NET_GLOBAL, + BPF_PROG_TYPE_HISOCK, #endif }; @@ -250,6 +251,7 @@ enum bpf_attach_type { BPF_GNET_SK_DST_SET, BPF_GNET_RCV_NIC_NODE, BPF_GNET_SEND_NIC_NODE, + BPF_HISOCK_EGRESS, #endif __MAX_BPF_ATTACH_TYPE }; @@ -3940,6 +3942,37 @@ union bpf_attr { * set prefer cpumask for the task. * Return * 0 on success, or a negative error in case of failure. + * + * void *bpf_get_ingress_dst(struct bpf_sock_ops *skops) + * Description + * Get the ingress dst entry of the full sock. + * Return + * Valid ingress dst on success, or negative error + * in case of failure. + * + * int bpf_set_ingress_dst(struct xdp_buff *xdp, void *dst) + * Description + * Set valid ingress dst entry to the skb associated + * with xdp_buff. + * Return + * 0 on success, or negative error in case of failure. + * + * int bpf_change_skb_dev(void *ctx, u32 ifindex) + * Description + * Change ingress or egress device of the associated skb. + * Supports only BPF_PROG_TYPE_HISOCK and BPF_PROG_TYPE_XDP + * program types. + * + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct __sk_buff** hisock_egress programs. + * Return + * 0 on success, or negative error in case of failure. + * + * int bpf_ext_memcpy(void *dst, size_t dst_sz, const void *src, size_t src_sz) + * Description + * Copy *src_sz* bytes from *src* to *dst* if *dst_sz* >= *src_sz*. + * Return + * 0 on success, or negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4118,6 +4151,10 @@ union bpf_attr { FN(get_node_stats), \ FN(sched_net_rship_submit), \ FN(sched_set_task_prefer_cpumask), \ + FN(get_ingress_dst), \ + FN(set_ingress_dst), \ + FN(change_skb_dev), \ + FN(ext_memcpy), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -4489,6 +4526,7 @@ enum xdp_action { XDP_PASS, XDP_TX, XDP_REDIRECT, + XDP_HISOCK_REDIRECT = 100, }; /* user accessible metadata for XDP packet hook @@ -5287,4 +5325,11 @@ struct bpf_gnet_ctx { __u64 rx_dev_netns_cookie; }; +enum hisock_action { + HISOCK_PASS, + HISOCK_DROP, + HISOCK_REDIRECT, + __MAX_HISOCK_ACTION, +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index b7f71d2d7d53f1ae4974050df9b8fadb4a927c2b..3eaf5c35300832641c22bda218c89c555371c342 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -8502,6 +8502,8 @@ static const struct bpf_sec_def section_defs[] = { BPF_GNET_RCV_NIC_NODE), BPF_EAPROG_SEC("gnet/send_nic_node", BPF_PROG_TYPE_NET_GLOBAL, BPF_GNET_SEND_NIC_NODE), + BPF_APROG_SEC("hisock_egress", BPF_PROG_TYPE_HISOCK, + BPF_HISOCK_EGRESS), }; #undef BPF_PROG_SEC_IMPL