From 3a8922c3e9c903a99d8da4936c272c00bd91b66b Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Thu, 13 Jul 2023 14:01:11 +0800 Subject: [PATCH 01/21] selftests/bpf: Use libbpf_attach_type_by_name in test_socket_cookie ANBZ: #5530 commit c9bf507d0acba950e5d85ef27727ce61458ef5aa upstream. Use newly introduced libbpf_attach_type_by_name in test_socket_cookie selftest. Signed-off-by: Andrey Ignatov Signed-off-by: Daniel Borkmann Signed-off-by: Xuezhi Zhao --- tools/testing/selftests/bpf/test_socket_cookie.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c index 68e108e4687a..b6c2c605d8c0 100644 --- a/tools/testing/selftests/bpf/test_socket_cookie.c +++ b/tools/testing/selftests/bpf/test_socket_cookie.c @@ -158,11 +158,7 @@ static int run_test(int cgfd) bpf_object__for_each_program(prog, pobj) { prog_name = bpf_program__title(prog, /*needs_copy*/ false); - if (strcmp(prog_name, "cgroup/connect6") == 0) { - attach_type = BPF_CGROUP_INET6_CONNECT; - } else if (strcmp(prog_name, "sockops") == 0) { - attach_type = BPF_CGROUP_SOCK_OPS; - } else { + if (libbpf_attach_type_by_name(prog_name, &attach_type)) { log_err("Unexpected prog: %s", prog_name); goto err; } -- Gitee From 82bdf1b363c448bf5b9012c4059164a642b164f1 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 8 Jun 2023 14:52:56 +0800 Subject: [PATCH 02/21] bpf: Add iterator for spilled registers ANBZ: #5530 commit f3709f69b7c5cba6323cc03c29b64293b93be817 upstream. Add this iterator for spilled registers, it concentrates the details of how to get the current frame's spilled registers into a single macro while clarifying the intention of the code which is calling the macro. Signed-off-by: Joe Stringer Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Xuezhi Zhao --- include/linux/bpf_verifier.h | 11 +++++++++++ kernel/bpf/verifier.c | 16 +++++++--------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 1c8517320ea6..0f0903d659f9 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -137,6 +137,17 @@ struct bpf_verifier_state { bool speculative; }; +#define bpf_get_spilled_reg(slot, frame) \ + (((slot < frame->allocated_stack / BPF_REG_SIZE) && \ + (frame->stack[slot].slot_type[0] == STACK_SPILL)) \ + ? &frame->stack[slot].spilled_ptr : NULL) + +/* Iterate over 'frame', setting 'reg' to either NULL or a spilled register. */ +#define bpf_for_each_spilled_reg(iter, frame, reg) \ + for (iter = 0, reg = bpf_get_spilled_reg(iter, frame); \ + iter < frame->allocated_stack / BPF_REG_SIZE; \ + iter++, reg = bpf_get_spilled_reg(iter, frame)) + /* linked list of verifier states used to prune search */ struct bpf_verifier_state_list { struct bpf_verifier_state state; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index cabc4a7ab056..16498b904444 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2375,10 +2375,9 @@ static void __clear_all_pkt_pointers(struct bpf_verifier_env *env, if (reg_is_pkt_pointer_any(®s[i])) mark_reg_unknown(env, regs, i); - for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { - if (state->stack[i].slot_type[0] != STACK_SPILL) + bpf_for_each_spilled_reg(i, state, reg) { + if (!reg) continue; - reg = &state->stack[i].spilled_ptr; if (reg_is_pkt_pointer_any(reg)) __mark_reg_unknown(reg); } @@ -3872,10 +3871,9 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, for (j = 0; j <= vstate->curframe; j++) { state = vstate->frame[j]; - for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { - if (state->stack[i].slot_type[0] != STACK_SPILL) + bpf_for_each_spilled_reg(i, state, reg) { + if (!reg) continue; - reg = &state->stack[i].spilled_ptr; if (reg->type == type && reg->id == dst_reg->id) reg->range = max(reg->range, new_range); } @@ -4193,7 +4191,7 @@ static void mark_map_regs(struct bpf_verifier_state *vstate, u32 regno, bool is_null) { struct bpf_func_state *state = vstate->frame[vstate->curframe]; - struct bpf_reg_state *regs = state->regs; + struct bpf_reg_state *reg, *regs = state->regs; u32 id = regs[regno].id; int i, j; @@ -4202,8 +4200,8 @@ static void mark_map_regs(struct bpf_verifier_state *vstate, u32 regno, for (j = 0; j <= vstate->curframe; j++) { state = vstate->frame[j]; - for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { - if (state->stack[i].slot_type[0] != STACK_SPILL) + bpf_for_each_spilled_reg(i, state, reg) { + if (!reg) continue; mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null); } -- Gitee From 4a5867d21c5c44ae883e5b1905c8d936a02dca08 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 8 Jun 2023 15:08:11 +0800 Subject: [PATCH 03/21] bpf: Simplify ptr_min_max_vals adjustment ANBZ: #5530 commit aad2eeaf46973a0968a75640cd1f8f1c650322a0 upstream. An upcoming commit will add another two pointer types that need very similar behaviour, so generalise this function now. Signed-off-by: Joe Stringer Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Xuezhi Zhao --- kernel/bpf/verifier.c | 22 ++++++++++----------- tools/testing/selftests/bpf/test_verifier.c | 14 ++++++------- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 16498b904444..9f280ce70cf1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3103,20 +3103,18 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, return -EACCES; } - if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) { - verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n", - dst); - return -EACCES; - } - if (ptr_reg->type == CONST_PTR_TO_MAP) { - verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n", - dst); + switch (ptr_reg->type) { + case PTR_TO_MAP_VALUE_OR_NULL: + verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n", + dst, reg_type_str[ptr_reg->type]); return -EACCES; - } - if (ptr_reg->type == PTR_TO_PACKET_END) { - verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n", - dst); + case CONST_PTR_TO_MAP: + case PTR_TO_PACKET_END: + verbose(env, "R%d pointer arithmetic on %s prohibited\n", + dst, reg_type_str[ptr_reg->type]); return -EACCES; + default: + break; } /* In case of 'scalar += pointer', dst_reg inherits pointer type and id. diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 51f4236c8e7a..bcce9d9274ea 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -3697,7 +3697,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END", + .errstr = "R3 pointer arithmetic on pkt_end", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -4955,7 +4955,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 4 }, - .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL", + .errstr = "R4 pointer arithmetic on map_value_or_null", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS }, @@ -4976,7 +4976,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 4 }, - .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL", + .errstr = "R4 pointer arithmetic on map_value_or_null", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS }, @@ -4997,7 +4997,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 4 }, - .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL", + .errstr = "R4 pointer arithmetic on map_value_or_null", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS }, @@ -7312,7 +7312,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map_in_map = { 3 }, - .errstr = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited", + .errstr = "R1 pointer arithmetic on map_ptr prohibited", .result = REJECT, }, { @@ -8990,7 +8990,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END", + .errstr = "R3 pointer arithmetic on pkt_end", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, }, @@ -9009,7 +9009,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END", + .errstr = "R3 pointer arithmetic on pkt_end", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, }, -- Gitee From fa17691fb56166bb6acbdafdcc1b7ad5c8fcc8cd Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 8 Jun 2023 15:15:16 +0800 Subject: [PATCH 04/21] bpf: Reuse canonical string formatter for ctx errs ANBZ: #5530 commit 9d2be44a7f33d5ec4fbd3368317bcf5f404bb8f7 upstream. The array "reg_type_str" provides canonical formatting of register types, however a couple of places would previously check whether a register represented the context and write the name "context" directly. An upcoming commit will add another pointer type to these statements, so to provide more accurate error messages in the verifier, update these error messages to use "reg_type_str" instead. Signed-off-by: Joe Stringer Signed-off-by: Daniel Borkmann Signed-off-by: Xuezhi Zhao --- kernel/bpf/verifier.c | 7 +++---- tools/testing/selftests/bpf/test_verifier.c | 10 +++++----- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9f280ce70cf1..306492109fee 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1887,8 +1887,7 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins if (is_ctx_reg(env, insn->dst_reg) || is_pkt_reg(env, insn->dst_reg)) { verbose(env, "BPF_XADD stores into R%d %s is not allowed\n", - insn->dst_reg, is_ctx_reg(env, insn->dst_reg) ? - "context" : "packet"); + insn->dst_reg, reg_type_str[insn->dst_reg]); return -EACCES; } @@ -5448,8 +5447,8 @@ static int do_check(struct bpf_verifier_env *env) return err; if (is_ctx_reg(env, insn->dst_reg)) { - verbose(env, "BPF_ST stores into R%d context is not allowed\n", - insn->dst_reg); + verbose(env, "BPF_ST stores into R%d %s is not allowed\n", + insn->dst_reg, reg_type_str[insn->dst_reg]); return -EACCES; } diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index bcce9d9274ea..aec7efcd5c1e 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -3335,7 +3335,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0), BPF_EXIT_INSN(), }, - .errstr = "BPF_ST stores into R1 context is not allowed", + .errstr = "BPF_ST stores into R1 inv is not allowed", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -3347,7 +3347,7 @@ static struct bpf_test tests[] = { BPF_REG_0, offsetof(struct __sk_buff, mark), 0), BPF_EXIT_INSN(), }, - .errstr = "BPF_XADD stores into R1 context is not allowed", + .errstr = "BPF_XADD stores into R1 inv is not allowed", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -5325,7 +5325,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R2 leaks addr into mem", .result_unpriv = REJECT, .result = REJECT, - .errstr = "BPF_XADD stores into R1 context is not allowed", + .errstr = "BPF_XADD stores into R1 inv is not allowed", }, { "leak pointer into ctx 2", @@ -5340,7 +5340,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R10 leaks addr into mem", .result_unpriv = REJECT, .result = REJECT, - .errstr = "BPF_XADD stores into R1 context is not allowed", + .errstr = "BPF_XADD stores into R1 inv is not allowed", }, { "leak pointer into ctx 3", @@ -12305,7 +12305,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "BPF_XADD stores into R2 packet", + .errstr = "BPF_XADD stores into R2 ctx", .prog_type = BPF_PROG_TYPE_XDP, }, { -- Gitee From d769e2d5b66349605475545316dc0330b173af59 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 8 Jun 2023 15:22:58 +0800 Subject: [PATCH 05/21] bpf: Generalize ptr_or_null regs check ANBZ: #5530 commit 840b9615d6e9d134178b4dd4f3c30aa30643a379 upstream. This check will be reused by an upcoming commit for conditional jump checks for sockets. Refactor it a bit to simplify the later commit. Signed-off-by: Joe Stringer Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Xuezhi Zhao --- kernel/bpf/verifier.c | 43 +++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 306492109fee..0c3955b90622 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -248,6 +248,11 @@ static bool type_is_pkt_pointer(enum bpf_reg_type type) type == PTR_TO_PACKET_META; } +static bool reg_type_may_be_null(enum bpf_reg_type type) +{ + return type == PTR_TO_MAP_VALUE_OR_NULL; +} + /* string representation of 'enum bpf_reg_type' */ static const char * const reg_type_str[] = { [NOT_INIT] = "?", @@ -4149,12 +4154,10 @@ static void reg_combine_min_max(struct bpf_reg_state *true_src, } } -static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id, - bool is_null) +static void mark_ptr_or_null_reg(struct bpf_reg_state *reg, u32 id, + bool is_null) { - struct bpf_reg_state *reg = ®s[regno]; - - if (reg->type == PTR_TO_MAP_VALUE_OR_NULL && reg->id == id) { + if (reg_type_may_be_null(reg->type) && reg->id == id) { /* Old offset (both fixed and variable parts) should * have been known-zero, because we don't allow pointer * arithmetic on pointers that might be NULL. @@ -4167,11 +4170,13 @@ static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id, } if (is_null) { reg->type = SCALAR_VALUE; - } else if (reg->map_ptr->inner_map_meta) { - reg->type = CONST_PTR_TO_MAP; - reg->map_ptr = reg->map_ptr->inner_map_meta; - } else { - reg->type = PTR_TO_MAP_VALUE; + } else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) { + if (reg->map_ptr->inner_map_meta) { + reg->type = CONST_PTR_TO_MAP; + reg->map_ptr = reg->map_ptr->inner_map_meta; + } else { + reg->type = PTR_TO_MAP_VALUE; + } } /* We don't need id from this point onwards anymore, thus we * should better reset it, so that state pruning has chances @@ -4184,8 +4189,8 @@ static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id, /* The logic is similar to find_good_pkt_pointers(), both could eventually * be folded together at some point. */ -static void mark_map_regs(struct bpf_verifier_state *vstate, u32 regno, - bool is_null) +static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, + bool is_null) { struct bpf_func_state *state = vstate->frame[vstate->curframe]; struct bpf_reg_state *reg, *regs = state->regs; @@ -4193,14 +4198,14 @@ static void mark_map_regs(struct bpf_verifier_state *vstate, u32 regno, int i, j; for (i = 0; i < MAX_BPF_REG; i++) - mark_map_reg(regs, i, id, is_null); + mark_ptr_or_null_reg(®s[i], id, is_null); for (j = 0; j <= vstate->curframe; j++) { state = vstate->frame[j]; bpf_for_each_spilled_reg(i, state, reg) { if (!reg) continue; - mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null); + mark_ptr_or_null_reg(reg, id, is_null); } } } @@ -4415,12 +4420,14 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, /* detect if R == 0 where R is returned from bpf_map_lookup_elem() */ if (BPF_SRC(insn->code) == BPF_K && insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && - dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) { - /* Mark all identical map registers in each branch as either + reg_type_may_be_null(dst_reg->type)) { + /* Mark all identical registers in each branch as either * safe or unknown depending R == 0 or R != 0 conditional. */ - mark_map_regs(this_branch, insn->dst_reg, opcode == BPF_JNE); - mark_map_regs(other_branch, insn->dst_reg, opcode == BPF_JEQ); + mark_ptr_or_null_regs(this_branch, insn->dst_reg, + opcode == BPF_JNE); + mark_ptr_or_null_regs(other_branch, insn->dst_reg, + opcode == BPF_JEQ); } else if (!try_match_pkt_pointers(insn, dst_reg, ®s[insn->src_reg], this_branch, other_branch) && is_pointer_value(env, insn->dst_reg)) { -- Gitee From 7f5bbb618f1f73fe1d87032fc2c7459111195db4 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 8 Jun 2023 15:52:00 +0800 Subject: [PATCH 06/21] bpf: Add PTR_TO_SOCKET verifier type ANBZ: #5530 commit c64b7983288e636356f7f5f652de4813e1cfedac upstream. Teach the verifier a little bit about a new type of pointer, a PTR_TO_SOCKET. This pointer type is accessed from BPF through the 'struct bpf_sock' structure. Signed-off-by: Joe Stringer Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Xuezhi Zhao --- include/linux/bpf.h | 34 ++++++++++ include/linux/bpf_verifier.h | 2 + kernel/bpf/verifier.c | 120 +++++++++++++++++++++++++++++++---- net/core/filter.c | 30 +++++---- 4 files changed, 160 insertions(+), 26 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 886ec34a77d3..ac36683276a0 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -160,6 +160,7 @@ enum bpf_arg_type { ARG_PTR_TO_CTX, /* pointer to context */ ARG_ANYTHING, /* any (initialized) argument is ok */ + ARG_PTR_TO_SOCKET, /* pointer to bpf_sock */ }; /* type of values returned from helper functions */ @@ -168,6 +169,7 @@ enum bpf_return_type { RET_VOID, /* function doesn't return anything */ RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */ RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */ + RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */ }; /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs @@ -219,6 +221,8 @@ enum bpf_reg_type { PTR_TO_PACKET, /* reg points to skb->data */ PTR_TO_PACKET_END, /* skb->data + headlen */ PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */ + PTR_TO_SOCKET, /* reg points to struct bpf_sock */ + PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */ }; /* The information passed from prog-specific *_is_valid_access @@ -355,6 +359,11 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void); typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src, unsigned long off, unsigned long len); +typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type, + const struct bpf_insn *src, + struct bpf_insn *dst, + struct bpf_prog *prog, + u32 *target_size); u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy); @@ -859,4 +868,29 @@ extern const struct bpf_func_proto bpf_get_local_storage_proto; void bpf_user_rnd_init_once(void); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); +#if defined(CONFIG_NET) +bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, + struct bpf_insn_access_aux *info); +u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, + u32 *target_size); +#else +static inline bool bpf_sock_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + return false; +} +static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, + u32 *target_size) +{ + return 0; +} +#endif + #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 0f0903d659f9..aed2a18cdff3 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -60,6 +60,8 @@ struct bpf_reg_state { * offset, so they can share range knowledge. * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we * came from, when one is tested for != NULL. + * For PTR_TO_SOCKET this is used to share which pointers retain the + * same reference to the socket, to determine proper reference freeing. */ u32 id; /* Ordering of fields matters. See states_equal() */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0c3955b90622..85612e135b2c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -80,8 +80,8 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { * (like pointer plus pointer becomes SCALAR_VALUE type) * * When verifier sees load or store instructions the type of base register - * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK. These are three pointer - * types recognized by check_mem_access() function. + * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are + * four pointer types recognized by check_mem_access() function. * * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value' * and the range of [ptr, ptr + map's value_size) is accessible. @@ -266,6 +266,8 @@ static const char * const reg_type_str[] = { [PTR_TO_PACKET_META] = "pkt_meta", [PTR_TO_PACKET_END] = "pkt_end", [PTR_TO_FLOW_KEYS] = "flow_keys", + [PTR_TO_SOCKET] = "sock", + [PTR_TO_SOCKET_OR_NULL] = "sock_or_null", }; static char slot_type_char[] = { @@ -1025,6 +1027,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case PTR_TO_PACKET_END: case PTR_TO_FLOW_KEYS: case CONST_PTR_TO_MAP: + case PTR_TO_SOCKET: + case PTR_TO_SOCKET_OR_NULL: return true; default: return false; @@ -1479,6 +1483,28 @@ static int check_flow_keys_access(struct bpf_verifier_env *env, int off, return 0; } +static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off, + int size, enum bpf_access_type t) +{ + struct bpf_reg_state *regs = cur_regs(env); + struct bpf_reg_state *reg = ®s[regno]; + struct bpf_insn_access_aux info; + + if (reg->smin_value < 0) { + verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", + regno); + return -EACCES; + } + + if (!bpf_sock_is_valid_access(off, size, t, &info)) { + verbose(env, "invalid bpf_sock access off=%d size=%d\n", + off, size); + return -EACCES; + } + + return 0; +} + static bool __is_pointer_value(bool allow_ptr_leaks, const struct bpf_reg_state *reg) { @@ -1597,6 +1623,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, */ strict = true; break; + case PTR_TO_SOCKET: + pointer_desc = "sock "; + break; default: break; } @@ -1850,6 +1879,14 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn err = check_flow_keys_access(env, off, size); if (!err && t == BPF_READ && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); + } else if (reg->type == PTR_TO_SOCKET) { + if (t == BPF_WRITE) { + verbose(env, "cannot write into socket\n"); + return -EACCES; + } + err = check_sock_access(env, regno, off, size, t); + if (!err && value_regno >= 0) + mark_reg_unknown(env, regs, value_regno); } else { verbose(env, "R%d invalid mem access '%s'\n", regno, reg_type_str[reg->type]); @@ -2072,6 +2109,10 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, err = check_ctx_reg(env, reg, regno); if (err < 0) return err; + } else if (arg_type == ARG_PTR_TO_SOCKET) { + expected_type = PTR_TO_SOCKET; + if (type != expected_type) + goto err_type; } else if (arg_type_is_mem_ptr(arg_type)) { expected_type = PTR_TO_STACK; /* One exception here. In case function allows for NULL to be @@ -2687,6 +2728,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn } regs[BPF_REG_0].map_ptr = meta.map_ptr; regs[BPF_REG_0].id = ++env->id_gen; + } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; + regs[BPF_REG_0].id = ++env->id_gen; } else { verbose(env, "unknown return type %d of func %s#%d\n", fn->ret_type, func_id_name(func_id), func_id); @@ -3114,6 +3159,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, return -EACCES; case CONST_PTR_TO_MAP: case PTR_TO_PACKET_END: + case PTR_TO_SOCKET: + case PTR_TO_SOCKET_OR_NULL: verbose(env, "R%d pointer arithmetic on %s prohibited\n", dst, reg_type_str[ptr_reg->type]); return -EACCES; @@ -4177,6 +4224,8 @@ static void mark_ptr_or_null_reg(struct bpf_reg_state *reg, u32 id, } else { reg->type = PTR_TO_MAP_VALUE; } + } else if (reg->type == PTR_TO_SOCKET_OR_NULL) { + reg->type = PTR_TO_SOCKET; } /* We don't need id from this point onwards anymore, thus we * should better reset it, so that state pruning has chances @@ -4969,6 +5018,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, case CONST_PTR_TO_MAP: case PTR_TO_PACKET_END: case PTR_TO_FLOW_KEYS: + case PTR_TO_SOCKET: + case PTR_TO_SOCKET_OR_NULL: /* Only valid matches are exact, which memcmp() above * would have accepted */ @@ -5248,6 +5299,37 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) return 0; } +/* Return true if it's OK to have the same insn return a different type. */ +static bool reg_type_mismatch_ok(enum bpf_reg_type type) +{ + switch (type) { + case PTR_TO_CTX: + case PTR_TO_SOCKET: + case PTR_TO_SOCKET_OR_NULL: + return false; + default: + return true; + } +} + +/* If an instruction was previously used with particular pointer types, then we + * need to be careful to avoid cases such as the below, where it may be ok + * for one branch accessing the pointer, but not ok for the other branch: + * + * R1 = sock_ptr + * goto X; + * ... + * R1 = some_other_valid_ptr; + * goto X; + * ... + * R2 = *(u32 *)(R1 + 0); + */ +static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev) +{ + return src != prev && (!reg_type_mismatch_ok(src) || + !reg_type_mismatch_ok(prev)); +} + static int do_check(struct bpf_verifier_env *env) { struct bpf_verifier_state *state; @@ -5388,9 +5470,7 @@ static int do_check(struct bpf_verifier_env *env) */ *prev_src_type = src_reg_type; - } else if (src_reg_type != *prev_src_type && - (src_reg_type == PTR_TO_CTX || - *prev_src_type == PTR_TO_CTX)) { + } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) { /* ABuser program is trying to use the same insn * dst_reg = *(u32*) (src_reg + off) * with different pointer types: @@ -5435,9 +5515,7 @@ static int do_check(struct bpf_verifier_env *env) if (*prev_dst_type == NOT_INIT) { *prev_dst_type = dst_reg_type; - } else if (dst_reg_type != *prev_dst_type && - (dst_reg_type == PTR_TO_CTX || - *prev_dst_type == PTR_TO_CTX)) { + } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) { verbose(env, "same insn cannot be used with different pointers\n"); return -EINVAL; } @@ -5867,8 +5945,10 @@ static void sanitize_dead_code(struct bpf_verifier_env *env) } } -/* convert load instructions that access fields of 'struct __sk_buff' - * into sequence of instructions that access fields of 'struct sk_buff' +/* convert load instructions that access fields of a context type into a + * sequence of instructions that access fields of the underlying structure: + * struct __sk_buff -> struct sk_buff + * struct bpf_sock_ops -> struct sock */ static int convert_ctx_accesses(struct bpf_verifier_env *env) { @@ -5897,12 +5977,14 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) } } - if (!ops->convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux)) + if (bpf_prog_is_dev_bound(env->prog->aux)) return 0; insn = env->prog->insnsi + delta; for (i = 0; i < insn_cnt; i++, insn++) { + bpf_convert_ctx_access_t convert_ctx_access; + if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || insn->code == (BPF_LDX | BPF_MEM | BPF_H) || insn->code == (BPF_LDX | BPF_MEM | BPF_W) || @@ -5944,8 +6026,18 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) continue; } - if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX) + switch (env->insn_aux_data[i + delta].ptr_type) { + case PTR_TO_CTX: + if (!ops->convert_ctx_access) + continue; + convert_ctx_access = ops->convert_ctx_access; + break; + case PTR_TO_SOCKET: + convert_ctx_access = bpf_sock_convert_ctx_access; + break; + default: continue; + } ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size; size = BPF_LDST_BYTES(insn); @@ -5977,8 +6069,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) } target_size = 0; - cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog, - &target_size); + cnt = convert_ctx_access(type, insn, insn_buf, env->prog, + &target_size); if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) || (ctx_field_size && !target_size)) { verbose(env, "bpf verifier is misconfigured\n"); diff --git a/net/core/filter.c b/net/core/filter.c index cf55c2704d55..5d0713690adb 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5374,23 +5374,29 @@ static bool __sock_filter_check_size(int off, int size, return size == size_default; } -static bool sock_filter_is_valid_access(int off, int size, - enum bpf_access_type type, - const struct bpf_prog *prog, - struct bpf_insn_access_aux *info) +bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, + struct bpf_insn_access_aux *info) { if (off < 0 || off >= sizeof(struct bpf_sock)) return false; if (off % size != 0) return false; - if (!__sock_filter_check_attach_type(off, type, - prog->expected_attach_type)) - return false; if (!__sock_filter_check_size(off, size, info)) return false; return true; } +static bool sock_filter_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + if (!bpf_sock_is_valid_access(off, size, type, info)) + return false; + return __sock_filter_check_attach_type(off, type, + prog->expected_attach_type); +} + static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write, const struct bpf_prog *prog, int drop_verdict) { @@ -6104,10 +6110,10 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } -static u32 sock_filter_convert_ctx_access(enum bpf_access_type type, - const struct bpf_insn *si, - struct bpf_insn *insn_buf, - struct bpf_prog *prog, u32 *target_size) +u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, u32 *target_size) { struct bpf_insn *insn = insn_buf; int off; @@ -7019,7 +7025,7 @@ const struct bpf_prog_ops lwt_seg6local_prog_ops = { const struct bpf_verifier_ops cg_sock_verifier_ops = { .get_func_proto = sock_filter_func_proto, .is_valid_access = sock_filter_is_valid_access, - .convert_ctx_access = sock_filter_convert_ctx_access, + .convert_ctx_access = bpf_sock_convert_ctx_access, }; const struct bpf_prog_ops cg_sock_prog_ops = { -- Gitee From 6d91e77921512602ff554872beeb48286a3edbf2 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 8 Jun 2023 16:44:18 +0800 Subject: [PATCH 07/21] bpf: Macrofy stack state copy ANBZ: #5530 commit 84dbf3507349696b505b6a500722538b0683e4ac upstream. An upcoming commit will need very similar copy/realloc boilerplate, so refactor the existing stack copy/realloc functions into macros to simplify it. Signed-off-by: Joe Stringer Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Xuezhi Zhao --- kernel/bpf/verifier.c | 104 ++++++++++++++++++++++++------------------ 1 file changed, 59 insertions(+), 45 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 85612e135b2c..18cccf9e42aa 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -387,60 +387,74 @@ static void print_verifier_state(struct bpf_verifier_env *env, verbose(env, "\n"); } -static int copy_stack_state(struct bpf_func_state *dst, - const struct bpf_func_state *src) -{ - if (!src->stack) - return 0; - if (WARN_ON_ONCE(dst->allocated_stack < src->allocated_stack)) { - /* internal bug, make state invalid to reject the program */ - memset(dst, 0, sizeof(*dst)); - return -EFAULT; - } - memcpy(dst->stack, src->stack, - sizeof(*src->stack) * (src->allocated_stack / BPF_REG_SIZE)); - return 0; -} +#define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE) \ +static int copy_##NAME##_state(struct bpf_func_state *dst, \ + const struct bpf_func_state *src) \ +{ \ + if (!src->FIELD) \ + return 0; \ + if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) { \ + /* internal bug, make state invalid to reject the program */ \ + memset(dst, 0, sizeof(*dst)); \ + return -EFAULT; \ + } \ + memcpy(dst->FIELD, src->FIELD, \ + sizeof(*src->FIELD) * (src->COUNT / SIZE)); \ + return 0; \ +} +/* copy_stack_state() */ +COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE) +#undef COPY_STATE_FN + +#define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE) \ +static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \ + bool copy_old) \ +{ \ + u32 old_size = state->COUNT; \ + struct bpf_##NAME##_state *new_##FIELD; \ + int slot = size / SIZE; \ + \ + if (size <= old_size || !size) { \ + if (copy_old) \ + return 0; \ + state->COUNT = slot * SIZE; \ + if (!size && old_size) { \ + kfree(state->FIELD); \ + state->FIELD = NULL; \ + } \ + return 0; \ + } \ + new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \ + GFP_KERNEL); \ + if (!new_##FIELD) \ + return -ENOMEM; \ + if (copy_old) { \ + if (state->FIELD) \ + memcpy(new_##FIELD, state->FIELD, \ + sizeof(*new_##FIELD) * (old_size / SIZE)); \ + memset(new_##FIELD + old_size / SIZE, 0, \ + sizeof(*new_##FIELD) * (size - old_size) / SIZE); \ + } \ + state->COUNT = slot * SIZE; \ + kfree(state->FIELD); \ + state->FIELD = new_##FIELD; \ + return 0; \ +} +/* realloc_stack_state() */ +REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE) +#undef REALLOC_STATE_FN /* do_check() starts with zero-sized stack in struct bpf_verifier_state to * make it consume minimal amount of memory. check_stack_write() access from * the program calls into realloc_func_state() to grow the stack size. * Note there is a non-zero 'parent' pointer inside bpf_verifier_state - * which this function copies over. It points to previous bpf_verifier_state - * which is never reallocated + * which realloc_stack_state() copies over. It points to previous + * bpf_verifier_state which is never reallocated. */ static int realloc_func_state(struct bpf_func_state *state, int size, bool copy_old) { - u32 old_size = state->allocated_stack; - struct bpf_stack_state *new_stack; - int slot = size / BPF_REG_SIZE; - - if (size <= old_size || !size) { - if (copy_old) - return 0; - state->allocated_stack = slot * BPF_REG_SIZE; - if (!size && old_size) { - kfree(state->stack); - state->stack = NULL; - } - return 0; - } - new_stack = kmalloc_array(slot, sizeof(struct bpf_stack_state), - GFP_KERNEL); - if (!new_stack) - return -ENOMEM; - if (copy_old) { - if (state->stack) - memcpy(new_stack, state->stack, - sizeof(*new_stack) * (old_size / BPF_REG_SIZE)); - memset(new_stack + old_size / BPF_REG_SIZE, 0, - sizeof(*new_stack) * (size - old_size) / BPF_REG_SIZE); - } - state->allocated_stack = slot * BPF_REG_SIZE; - kfree(state->stack); - state->stack = new_stack; - return 0; + return realloc_stack_state(state, size, copy_old); } static void free_func_state(struct bpf_func_state *state) -- Gitee From 01e197409b54d3317aa4aa8b4bdf7740422c1c24 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 8 Jun 2023 17:58:55 +0800 Subject: [PATCH 08/21] bpf: Add reference tracking to verifier ANBZ: #5530 commit fd978bf7fd312581a7ca454a991f0ffb34c4204b upstream. Allow helper functions to acquire a reference and return it into a register. Specific pointer types such as the PTR_TO_SOCKET will implicitly represent such a reference. The verifier must ensure that these references are released exactly once in each path through the program. To achieve this, this commit assigns an id to the pointer and tracks it in the 'bpf_func_state', then when the function or program exits, verifies that all of the acquired references have been freed. When the pointer is passed to a function that frees the reference, it is removed from the 'bpf_func_state` and all existing copies of the pointer in registers are marked invalid. Signed-off-by: Joe Stringer Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Xuezhi Zhao --- include/linux/bpf_verifier.h | 24 ++- kernel/bpf/verifier.c | 305 ++++++++++++++++++++++++++++++++--- 2 files changed, 307 insertions(+), 22 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index aed2a18cdff3..e9a11143dd5e 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -107,6 +107,17 @@ struct bpf_stack_state { u8 slot_type[BPF_REG_SIZE]; }; +struct bpf_reference_state { + /* Track each reference created with a unique id, even if the same + * instruction creates the reference multiple times (eg, via CALL). + */ + int id; + /* Instruction where the allocation of this reference occurred. This + * is used purely to inform the user of a reference leak. + */ + int insn_idx; +}; + /* state of the program: * type of all registers and stack info */ @@ -125,7 +136,9 @@ struct bpf_func_state { */ u32 subprogno; - /* should be second to last. See copy_func_state() */ + /* The following fields should be last. See copy_func_state() */ + int acquired_refs; + struct bpf_reference_state *refs; int allocated_stack; struct bpf_stack_state *stack; }; @@ -236,11 +249,16 @@ __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log, __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, const char *fmt, ...); -static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) +static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env) { struct bpf_verifier_state *cur = env->cur_state; - return cur->frame[cur->curframe]->regs; + return cur->frame[cur->curframe]; +} + +static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) +{ + return cur_func(env)->regs; } int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 18cccf9e42aa..44ff9e531850 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1,5 +1,6 @@ /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com * Copyright (c) 2016 Facebook + * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -140,6 +141,18 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { * * After the call R0 is set to return type of the function and registers R1-R5 * are set to NOT_INIT to indicate that they are no longer readable. + * + * The following reference types represent a potential reference to a kernel + * resource which, after first being allocated, must be checked and freed by + * the BPF program: + * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET + * + * When the verifier sees a helper call return a reference type, it allocates a + * pointer id for the reference and stores it in the current function state. + * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into + * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type + * passes through a NULL-check conditional. For the branch wherein the state is + * changed to CONST_IMM, the verifier releases the reference. */ /* verifier_state + insn_idx are pushed to stack when branch is encountered */ @@ -188,6 +201,7 @@ struct bpf_call_arg_meta { int regno; int access_size; u64 msize_max_value; + int ptr_id; }; static DEFINE_MUTEX(bpf_verifier_lock); @@ -250,7 +264,42 @@ static bool type_is_pkt_pointer(enum bpf_reg_type type) static bool reg_type_may_be_null(enum bpf_reg_type type) { - return type == PTR_TO_MAP_VALUE_OR_NULL; + return type == PTR_TO_MAP_VALUE_OR_NULL || + type == PTR_TO_SOCKET_OR_NULL; +} + +static bool type_is_refcounted(enum bpf_reg_type type) +{ + return type == PTR_TO_SOCKET; +} + +static bool type_is_refcounted_or_null(enum bpf_reg_type type) +{ + return type == PTR_TO_SOCKET || type == PTR_TO_SOCKET_OR_NULL; +} + +static bool reg_is_refcounted(const struct bpf_reg_state *reg) +{ + return type_is_refcounted(reg->type); +} + +static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg) +{ + return type_is_refcounted_or_null(reg->type); +} + +static bool arg_type_is_refcounted(enum bpf_arg_type type) +{ + return type == ARG_PTR_TO_SOCKET; +} + +/* Determine whether the function releases some resources allocated by another + * function call. The first reference type argument will be assumed to be + * released by release_reference(). + */ +static bool is_release_function(enum bpf_func_id func_id) +{ + return false; } /* string representation of 'enum bpf_reg_type' */ @@ -384,6 +433,12 @@ static void print_verifier_state(struct bpf_verifier_env *env, else verbose(env, "=%s", types_buf); } + if (state->acquired_refs && state->refs[0].id) { + verbose(env, " refs=%d", state->refs[0].id); + for (i = 1; i < state->acquired_refs; i++) + if (state->refs[i].id) + verbose(env, ",%d", state->refs[i].id); + } verbose(env, "\n"); } @@ -402,6 +457,8 @@ static int copy_##NAME##_state(struct bpf_func_state *dst, \ sizeof(*src->FIELD) * (src->COUNT / SIZE)); \ return 0; \ } +/* copy_reference_state() */ +COPY_STATE_FN(reference, acquired_refs, refs, 1) /* copy_stack_state() */ COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE) #undef COPY_STATE_FN @@ -440,6 +497,8 @@ static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \ state->FIELD = new_##FIELD; \ return 0; \ } +/* realloc_reference_state() */ +REALLOC_STATE_FN(reference, acquired_refs, refs, 1) /* realloc_stack_state() */ REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE) #undef REALLOC_STATE_FN @@ -451,16 +510,89 @@ REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE) * which realloc_stack_state() copies over. It points to previous * bpf_verifier_state which is never reallocated. */ -static int realloc_func_state(struct bpf_func_state *state, int size, - bool copy_old) +static int realloc_func_state(struct bpf_func_state *state, int stack_size, + int refs_size, bool copy_old) +{ + int err = realloc_reference_state(state, refs_size, copy_old); + if (err) + return err; + return realloc_stack_state(state, stack_size, copy_old); +} + +/* Acquire a pointer id from the env and update the state->refs to include + * this new pointer reference. + * On success, returns a valid pointer id to associate with the register + * On failure, returns a negative errno. + */ +static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx) +{ + struct bpf_func_state *state = cur_func(env); + int new_ofs = state->acquired_refs; + int id, err; + + err = realloc_reference_state(state, state->acquired_refs + 1, true); + if (err) + return err; + id = ++env->id_gen; + state->refs[new_ofs].id = id; + state->refs[new_ofs].insn_idx = insn_idx; + + return id; +} + +/* release function corresponding to acquire_reference_state(). Idempotent. */ +static int __release_reference_state(struct bpf_func_state *state, int ptr_id) +{ + int i, last_idx; + + if (!ptr_id) + return -EFAULT; + + last_idx = state->acquired_refs - 1; + for (i = 0; i < state->acquired_refs; i++) { + if (state->refs[i].id == ptr_id) { + if (last_idx && i != last_idx) + memcpy(&state->refs[i], &state->refs[last_idx], + sizeof(*state->refs)); + memset(&state->refs[last_idx], 0, sizeof(*state->refs)); + state->acquired_refs--; + return 0; + } + } + return -EFAULT; +} + +/* variation on the above for cases where we expect that there must be an + * outstanding reference for the specified ptr_id. + */ +static int release_reference_state(struct bpf_verifier_env *env, int ptr_id) +{ + struct bpf_func_state *state = cur_func(env); + int err; + + err = __release_reference_state(state, ptr_id); + if (WARN_ON_ONCE(err != 0)) + verbose(env, "verifier internal error: can't release reference\n"); + return err; +} + +static int transfer_reference_state(struct bpf_func_state *dst, + struct bpf_func_state *src) { - return realloc_stack_state(state, size, copy_old); + int err = realloc_reference_state(dst, src->acquired_refs, false); + if (err) + return err; + err = copy_reference_state(dst, src); + if (err) + return err; + return 0; } static void free_func_state(struct bpf_func_state *state) { if (!state) return; + kfree(state->refs); kfree(state->stack); kfree(state); } @@ -486,10 +618,14 @@ static int copy_func_state(struct bpf_func_state *dst, { int err; - err = realloc_func_state(dst, src->allocated_stack, false); + err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs, + false); + if (err) + return err; + memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs)); + err = copy_reference_state(dst, src); if (err) return err; - memcpy(dst, src, offsetof(struct bpf_func_state, allocated_stack)); return copy_stack_state(dst, src); } @@ -1067,7 +1203,7 @@ static int check_stack_write(struct bpf_verifier_env *env, enum bpf_reg_type type; err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE), - true); + state->acquired_refs, true); if (err) return err; /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, @@ -1537,7 +1673,8 @@ static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) { const struct bpf_reg_state *reg = cur_regs(env) + regno; - return reg->type == PTR_TO_CTX; + return reg->type == PTR_TO_CTX || + reg->type == PTR_TO_SOCKET; } static bool is_pkt_reg(struct bpf_verifier_env *env, int regno) @@ -2127,6 +2264,12 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, expected_type = PTR_TO_SOCKET; if (type != expected_type) goto err_type; + if (meta->ptr_id || !reg->id) { + verbose(env, "verifier internal error: mismatched references meta=%d, reg=%d\n", + meta->ptr_id, reg->id); + return -EFAULT; + } + meta->ptr_id = reg->id; } else if (arg_type_is_mem_ptr(arg_type)) { expected_type = PTR_TO_STACK; /* One exception here. In case function allows for NULL to be @@ -2415,10 +2558,32 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn) return true; } +static bool check_refcount_ok(const struct bpf_func_proto *fn) +{ + int count = 0; + + if (arg_type_is_refcounted(fn->arg1_type)) + count++; + if (arg_type_is_refcounted(fn->arg2_type)) + count++; + if (arg_type_is_refcounted(fn->arg3_type)) + count++; + if (arg_type_is_refcounted(fn->arg4_type)) + count++; + if (arg_type_is_refcounted(fn->arg5_type)) + count++; + + /* We only support one arg being unreferenced at the moment, + * which is sufficient for the helper functions we have right now. + */ + return count <= 1; +} + static int check_func_proto(const struct bpf_func_proto *fn) { return check_raw_mode_ok(fn) && - check_arg_pair_ok(fn) ? 0 : -EINVAL; + check_arg_pair_ok(fn) && + check_refcount_ok(fn) ? 0 : -EINVAL; } /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] @@ -2451,12 +2616,45 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) __clear_all_pkt_pointers(env, vstate->frame[i]); } +static void release_reg_references(struct bpf_verifier_env *env, + struct bpf_func_state *state, int id) +{ + struct bpf_reg_state *regs = state->regs, *reg; + int i; + + for (i = 0; i < MAX_BPF_REG; i++) + if (regs[i].id == id) + mark_reg_unknown(env, regs, i); + + bpf_for_each_spilled_reg(i, state, reg) { + if (!reg) + continue; + if (reg_is_refcounted(reg) && reg->id == id) + __mark_reg_unknown(reg); + } +} + +/* The pointer with the specified id has released its reference to kernel + * resources. Identify all copies of the same pointer and clear the reference. + */ +static int release_reference(struct bpf_verifier_env *env, + struct bpf_call_arg_meta *meta) +{ + struct bpf_verifier_state *vstate = env->cur_state; + int i; + + for (i = 0; i <= vstate->curframe; i++) + release_reg_references(env, vstate->frame[i], meta->ptr_id); + + return release_reference_state(env, meta->ptr_id); +} + static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx) { struct bpf_verifier_state *state = env->cur_state; struct bpf_func_state *caller, *callee; - int i, subprog, target_insn; + int i, err, subprog, target_insn; if (state->curframe + 1 >= MAX_CALL_FRAMES) { verbose(env, "the call stack of %d frames is too deep\n", @@ -2494,6 +2692,11 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, state->curframe + 1 /* frameno within this callchain */, subprog /* subprog number within this prog */); + /* Transfer references to the callee */ + err = transfer_reference_state(callee, caller); + if (err) + return err; + /* copy r1 - r5 args that callee can access */ for (i = BPF_REG_1; i <= BPF_REG_5; i++) callee->regs[i] = caller->regs[i]; @@ -2524,6 +2727,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) struct bpf_verifier_state *state = env->cur_state; struct bpf_func_state *caller, *callee; struct bpf_reg_state *r0; + int err; callee = state->frame[state->curframe]; r0 = &callee->regs[BPF_REG_0]; @@ -2543,6 +2747,11 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) /* return to the caller whatever r0 had in the callee */ caller->regs[BPF_REG_0] = *r0; + /* Transfer references to the caller */ + err = transfer_reference_state(caller, callee); + if (err) + return err; + *insn_idx = callee->callsite + 1; if (env->log.level) { verbose(env, "returning from callee:\n"); @@ -2622,6 +2831,18 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, return 0; } +static int check_reference_leak(struct bpf_verifier_env *env) +{ + struct bpf_func_state *state = cur_func(env); + int i; + + for (i = 0; i < state->acquired_refs; i++) { + verbose(env, "Unreleased reference id=%d alloc_insn=%d\n", + state->refs[i].id, state->refs[i].insn_idx); + } + return state->acquired_refs ? -EINVAL : 0; +} + static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx) { const struct bpf_func_proto *fn = NULL; @@ -2700,6 +2921,18 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn return err; } + if (func_id == BPF_FUNC_tail_call) { + err = check_reference_leak(env); + if (err) { + verbose(env, "tail_call would lead to reference leak\n"); + return err; + } + } else if (is_release_function(func_id)) { + err = release_reference(env, &meta); + if (err) + return err; + } + regs = cur_regs(env); /* check that flags argument in get_local_storage(map, flags) is 0, @@ -2743,9 +2976,12 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn regs[BPF_REG_0].map_ptr = meta.map_ptr; regs[BPF_REG_0].id = ++env->id_gen; } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { + int id = acquire_reference_state(env, insn_idx); + if (id < 0) + return id; mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; - regs[BPF_REG_0].id = ++env->id_gen; + regs[BPF_REG_0].id = id; } else { verbose(env, "unknown return type %d of func %s#%d\n", fn->ret_type, func_id_name(func_id), func_id); @@ -4215,7 +4451,8 @@ static void reg_combine_min_max(struct bpf_reg_state *true_src, } } -static void mark_ptr_or_null_reg(struct bpf_reg_state *reg, u32 id, +static void mark_ptr_or_null_reg(struct bpf_func_state *state, + struct bpf_reg_state *reg, u32 id, bool is_null) { if (reg_type_may_be_null(reg->type) && reg->id == id) { @@ -4241,11 +4478,13 @@ static void mark_ptr_or_null_reg(struct bpf_reg_state *reg, u32 id, } else if (reg->type == PTR_TO_SOCKET_OR_NULL) { reg->type = PTR_TO_SOCKET; } - /* We don't need id from this point onwards anymore, thus we - * should better reset it, so that state pruning has chances - * to take effect. - */ - reg->id = 0; + if (is_null || !reg_is_refcounted(reg)) { + /* We don't need id from this point onwards anymore, + * thus we should better reset it, so that state + * pruning has chances to take effect. + */ + reg->id = 0; + } } } @@ -4260,15 +4499,18 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, u32 id = regs[regno].id; int i, j; + if (reg_is_refcounted_or_null(®s[regno]) && is_null) + __release_reference_state(state, id); + for (i = 0; i < MAX_BPF_REG; i++) - mark_ptr_or_null_reg(®s[i], id, is_null); + mark_ptr_or_null_reg(state, ®s[i], id, is_null); for (j = 0; j <= vstate->curframe; j++) { state = vstate->frame[j]; bpf_for_each_spilled_reg(i, state, reg) { if (!reg) continue; - mark_ptr_or_null_reg(reg, id, is_null); + mark_ptr_or_null_reg(state, reg, id, is_null); } } } @@ -4613,6 +4855,16 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) if (err) return err; + /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as + * gen_ld_abs() may terminate the program at runtime, leading to + * reference leak. + */ + err = check_reference_leak(env); + if (err) { + verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n"); + return err; + } + if (regs[BPF_REG_6].type != PTR_TO_CTX) { verbose(env, "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n"); @@ -5109,6 +5361,14 @@ static bool stacksafe(struct bpf_func_state *old, return true; } +static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur) +{ + if (old->acquired_refs != cur->acquired_refs) + return false; + return !memcmp(old->refs, cur->refs, + sizeof(*old->refs) * old->acquired_refs); +} + /* compare two verifier states * * all states stored in state_list are known to be valid, since @@ -5154,6 +5414,9 @@ static bool func_states_equal(struct bpf_func_state *old, if (!stacksafe(old, cur, idmap)) goto out_free; + + if (!refsafe(old, cur)) + goto out_free; ret = true; out_free: kfree(idmap); @@ -5609,6 +5872,10 @@ static int do_check(struct bpf_verifier_env *env) continue; } + err = check_reference_leak(env); + if (err) + return err; + /* eBPF calling convetion is such that R0 is used * to return the value from eBPF program. * Make sure that it's readable at this time -- Gitee From faae4ee725306f64735444e6d566578de8719e0c Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Fri, 9 Jun 2023 09:33:13 +0800 Subject: [PATCH 09/21] bpf: Add helper to retrieve socket in BPF ANBZ: #5530 commit 6acc9b432e6714d72d7d77ec7c27f6f8358d0c71 upstream. This patch adds new BPF helper functions, bpf_sk_lookup_tcp() and bpf_sk_lookup_udp() which allows BPF programs to find out if there is a socket listening on this host, and returns a socket pointer which the BPF program can then access to determine, for instance, whether to forward or drop traffic. bpf_sk_lookup_xxx() may take a reference on the socket, so when a BPF program makes use of this function, it must subsequently pass the returned pointer into the newly added sk_release() to return the reference. By way of example, the following pseudocode would filter inbound connections at XDP if there is no corresponding service listening for the traffic: struct bpf_sock_tuple tuple; struct bpf_sock_ops *sk; populate_tuple(ctx, &tuple); // Extract the 5tuple from the packet sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof tuple, netns, 0); if (!sk) { // Couldn't find a socket listening for this traffic. Drop. return TC_ACT_SHOT; } bpf_sk_release(sk, 0); return TC_ACT_OK; Signed-off-by: Joe Stringer Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Xuezhi Zhao --- include/uapi/linux/bpf.h | 93 ++++++++++++- kernel/bpf/verifier.c | 8 +- net/core/filter.c | 151 ++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 93 ++++++++++++- tools/testing/selftests/bpf/bpf_helpers.h | 12 ++ 5 files changed, 354 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e7cbc909162d..4eff772742b0 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2149,6 +2149,77 @@ union bpf_attr { * request in the skb. * Return * 0 on success, or a negative error in case of failure. + * + * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * Description + * Look for TCP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, + * and if non-NULL, released via **bpf_sk_release**\ (). + * + * The *ctx* should point to the context of the program, such as + * the skb or socket (depending on the hook in use). This is used + * to determine the base network namespace for the lookup. + * + * *tuple_size* must be one of: + * + * **sizeof**\ (*tuple*\ **->ipv4**) + * Look for an IPv4 socket. + * **sizeof**\ (*tuple*\ **->ipv6**) + * Look for an IPv6 socket. + * + * If the *netns* is zero, then the socket lookup table in the + * netns associated with the *ctx* will be used. For the TC hooks, + * this in the netns of the device in the skb. For socket hooks, + * this in the netns of the socket. If *netns* is non-zero, then + * it specifies the ID of the netns relative to the netns + * associated with the *ctx*. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * Return + * Pointer to *struct bpf_sock*, or NULL in case of failure. + * + * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * Description + * Look for UDP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, + * and if non-NULL, released via **bpf_sk_release**\ (). + * + * The *ctx* should point to the context of the program, such as + * the skb or socket (depending on the hook in use). This is used + * to determine the base network namespace for the lookup. + * + * *tuple_size* must be one of: + * + * **sizeof**\ (*tuple*\ **->ipv4**) + * Look for an IPv4 socket. + * **sizeof**\ (*tuple*\ **->ipv6**) + * Look for an IPv6 socket. + * + * If the *netns* is zero, then the socket lookup table in the + * netns associated with the *ctx* will be used. For the TC hooks, + * this in the netns of the device in the skb. For socket hooks, + * this in the netns of the socket. If *netns* is non-zero, then + * it specifies the ID of the netns relative to the netns + * associated with the *ctx*. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * Return + * Pointer to *struct bpf_sock*, or NULL in case of failure. + * + * int bpf_sk_release(struct bpf_sock *sk) + * Description + * Release the reference held by *sock*. *sock* must be a non-NULL + * pointer that was returned from bpf_sk_lookup_xxx\ (). + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2234,7 +2305,10 @@ union bpf_attr { FN(get_current_cgroup_id), \ FN(get_local_storage), \ FN(sk_select_reuseport), \ - FN(skb_ancestor_cgroup_id), + FN(skb_ancestor_cgroup_id), \ + FN(sk_lookup_tcp), \ + FN(sk_lookup_udp), \ + FN(sk_release), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2404,6 +2478,23 @@ struct bpf_sock { */ }; +struct bpf_sock_tuple { + union { + struct { + __be32 saddr; + __be32 daddr; + __be16 sport; + __be16 dport; + } ipv4; + struct { + __be32 saddr[4]; + __be32 daddr[4]; + __be16 sport; + __be16 dport; + } ipv6; + }; +}; + #define XDP_PACKET_HEADROOM 256 /* User return codes for XDP prog type. diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 44ff9e531850..e9639920d062 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -153,6 +153,12 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type * passes through a NULL-check conditional. For the branch wherein the state is * changed to CONST_IMM, the verifier releases the reference. + * + * For each helper function that allocates a reference, such as + * bpf_sk_lookup_tcp(), there is a corresponding release function, such as + * bpf_sk_release(). When a reference type passes into the release function, + * the verifier also releases the reference. If any unchecked or unreleased + * reference remains at the end of the program, the verifier rejects it. */ /* verifier_state + insn_idx are pushed to stack when branch is encountered */ @@ -299,7 +305,7 @@ static bool arg_type_is_refcounted(enum bpf_arg_type type) */ static bool is_release_function(enum bpf_func_id func_id) { - return false; + return func_id == BPF_FUNC_sk_release; } /* string representation of 'enum bpf_reg_type' */ diff --git a/net/core/filter.c b/net/core/filter.c index 5d0713690adb..9fa3b5b53141 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -58,13 +58,17 @@ #include #include #include +#include #include #include #include +#include +#include #include #include #include #include +#include #include #include #include @@ -4793,6 +4797,141 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = { }; #endif /* CONFIG_IPV6_SEG6_BPF */ +struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, + struct sk_buff *skb, u8 family, u8 proto) +{ + int dif = skb->dev->ifindex; + bool refcounted = false; + struct sock *sk = NULL; + + if (family == AF_INET) { + __be32 src4 = tuple->ipv4.saddr; + __be32 dst4 = tuple->ipv4.daddr; + int sdif = inet_sdif(skb); + + if (proto == IPPROTO_TCP) + sk = __inet_lookup(net, &tcp_hashinfo, skb, 0, + src4, tuple->ipv4.sport, + dst4, tuple->ipv4.dport, + dif, sdif, &refcounted); + else + sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport, + dst4, tuple->ipv4.dport, + dif, sdif, &udp_table, skb); +#if IS_ENABLED(CONFIG_IPV6) + } else { + struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr; + struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr; + int sdif = inet6_sdif(skb); + + if (proto == IPPROTO_TCP) + sk = __inet6_lookup(net, &tcp_hashinfo, skb, 0, + src6, tuple->ipv6.sport, + dst6, tuple->ipv6.dport, + dif, sdif, &refcounted); + else + sk = __udp6_lib_lookup(net, src6, tuple->ipv6.sport, + dst6, tuple->ipv6.dport, + dif, sdif, &udp_table, skb); +#endif + } + + if (unlikely(sk && !refcounted && !sock_flag(sk, SOCK_RCU_FREE))) { + WARN_ONCE(1, "Found non-RCU, unreferenced socket!"); + sk = NULL; + } + return sk; +} + +/* bpf_sk_lookup performs the core lookup for different types of sockets, + * taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE. + * Returns the socket as an 'unsigned long' to simplify the casting in the + * callers to satisfy BPF_CALL declarations. + */ +static unsigned long +bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, + u8 proto, u64 netns_id, u64 flags) +{ + struct net *caller_net; + struct sock *sk = NULL; + u8 family = AF_UNSPEC; + struct net *net; + + family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6; + if (unlikely(family == AF_UNSPEC || netns_id > U32_MAX || flags)) + goto out; + + if (skb->dev) + caller_net = dev_net(skb->dev); + else + caller_net = sock_net(skb->sk); + if (netns_id) { + net = get_net_ns_by_id(caller_net, netns_id); + if (unlikely(!net)) + goto out; + sk = sk_lookup(net, tuple, skb, family, proto); + put_net(net); + } else { + net = caller_net; + sk = sk_lookup(net, tuple, skb, family, proto); + } + + if (sk) + sk = sk_to_full_sk(sk); +out: + return (unsigned long) sk; +} + +BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb, + struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) +{ + return bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP, netns_id, flags); +} + +static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = { + .func = bpf_sk_lookup_tcp, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_PTR_TO_SOCKET_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + +BPF_CALL_5(bpf_sk_lookup_udp, struct sk_buff *, skb, + struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) +{ + return bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP, netns_id, flags); +} + +static const struct bpf_func_proto bpf_sk_lookup_udp_proto = { + .func = bpf_sk_lookup_udp, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_PTR_TO_SOCKET_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + +BPF_CALL_1(bpf_sk_release, struct sock *, sk) +{ + if (!sock_flag(sk, SOCK_RCU_FREE)) + sock_gen_put(sk); + return 0; +} + +static const struct bpf_func_proto bpf_sk_release_proto = { + .func = bpf_sk_release, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_SOCKET, +}; + bool bpf_helper_changes_pkt_data(void *func) { if (func == bpf_skb_vlan_push || @@ -4999,6 +5138,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_skb_ancestor_cgroup_id: return &bpf_skb_ancestor_cgroup_id_proto; #endif + case BPF_FUNC_sk_lookup_tcp: + return &bpf_sk_lookup_tcp_proto; + case BPF_FUNC_sk_lookup_udp: + return &bpf_sk_lookup_udp_proto; + case BPF_FUNC_sk_release: + return &bpf_sk_release_proto; default: return bpf_base_func_proto(func_id); } @@ -5099,6 +5244,12 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_redirect_hash_proto; case BPF_FUNC_get_local_storage: return &bpf_get_local_storage_proto; + case BPF_FUNC_sk_lookup_tcp: + return &bpf_sk_lookup_tcp_proto; + case BPF_FUNC_sk_lookup_udp: + return &bpf_sk_lookup_udp_proto; + case BPF_FUNC_sk_release: + return &bpf_sk_release_proto; default: return bpf_base_func_proto(func_id); } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 90e792965c56..b2acbe007423 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2144,6 +2144,77 @@ union bpf_attr { * request in the skb. * Return * 0 on success, or a negative error in case of failure. + * + * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * Description + * Look for TCP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, + * and if non-NULL, released via **bpf_sk_release**\ (). + * + * The *ctx* should point to the context of the program, such as + * the skb or socket (depending on the hook in use). This is used + * to determine the base network namespace for the lookup. + * + * *tuple_size* must be one of: + * + * **sizeof**\ (*tuple*\ **->ipv4**) + * Look for an IPv4 socket. + * **sizeof**\ (*tuple*\ **->ipv6**) + * Look for an IPv6 socket. + * + * If the *netns* is zero, then the socket lookup table in the + * netns associated with the *ctx* will be used. For the TC hooks, + * this in the netns of the device in the skb. For socket hooks, + * this in the netns of the socket. If *netns* is non-zero, then + * it specifies the ID of the netns relative to the netns + * associated with the *ctx*. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * Return + * Pointer to *struct bpf_sock*, or NULL in case of failure. + * + * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * Description + * Look for UDP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, + * and if non-NULL, released via **bpf_sk_release**\ (). + * + * The *ctx* should point to the context of the program, such as + * the skb or socket (depending on the hook in use). This is used + * to determine the base network namespace for the lookup. + * + * *tuple_size* must be one of: + * + * **sizeof**\ (*tuple*\ **->ipv4**) + * Look for an IPv4 socket. + * **sizeof**\ (*tuple*\ **->ipv6**) + * Look for an IPv6 socket. + * + * If the *netns* is zero, then the socket lookup table in the + * netns associated with the *ctx* will be used. For the TC hooks, + * this in the netns of the device in the skb. For socket hooks, + * this in the netns of the socket. If *netns* is non-zero, then + * it specifies the ID of the netns relative to the netns + * associated with the *ctx*. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * Return + * Pointer to *struct bpf_sock*, or NULL in case of failure. + * + * int bpf_sk_release(struct bpf_sock *sk) + * Description + * Release the reference held by *sock*. *sock* must be a non-NULL + * pointer that was returned from bpf_sk_lookup_xxx\ (). + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2229,7 +2300,10 @@ union bpf_attr { FN(get_current_cgroup_id), \ FN(get_local_storage), \ FN(sk_select_reuseport), \ - FN(skb_ancestor_cgroup_id), + FN(skb_ancestor_cgroup_id), \ + FN(sk_lookup_tcp), \ + FN(sk_lookup_udp), \ + FN(sk_release), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2399,6 +2473,23 @@ struct bpf_sock { */ }; +struct bpf_sock_tuple { + union { + struct { + __be32 saddr; + __be32 daddr; + __be16 sport; + __be16 dport; + } ipv4; + struct { + __be32 saddr[4]; + __be32 daddr[4]; + __be16 sport; + __be16 dport; + } ipv6; + }; +}; + #define XDP_PACKET_HEADROOM 256 /* User return codes for XDP prog type. diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index e4be7730222d..1d407b3494f9 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -143,6 +143,18 @@ static unsigned long long (*bpf_skb_cgroup_id)(void *ctx) = (void *) BPF_FUNC_skb_cgroup_id; static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) = (void *) BPF_FUNC_skb_ancestor_cgroup_id; +static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx, + struct bpf_sock_tuple *tuple, + int size, unsigned int netns_id, + unsigned long long flags) = + (void *) BPF_FUNC_sk_lookup_tcp; +static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx, + struct bpf_sock_tuple *tuple, + int size, unsigned int netns_id, + unsigned long long flags) = + (void *) BPF_FUNC_sk_lookup_udp; +static int (*bpf_sk_release)(struct bpf_sock *sk) = + (void *) BPF_FUNC_sk_release; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions -- Gitee From 069a6974d1cad6d5c4921664e83ac2ccb03ea843 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 13 Jun 2023 11:16:34 +0800 Subject: [PATCH 10/21] selftests/bpf: Generalize dummy program types ANBZ: #5530 commit 0c586079f852187d19fea60c9a4981ad29e22ba8 upstream. Don't hardcode the dummy program types to SOCKET_FILTER type, as this prevents testing bpf_tail_call in conjunction with other program types. Instead, use the program type specified in the test case. Signed-off-by: Joe Stringer Signed-off-by: Daniel Borkmann Signed-off-by: Xuezhi Zhao --- tools/testing/selftests/bpf/test_verifier.c | 31 +++++++++++---------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index aec7efcd5c1e..2ad442742fbc 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -12746,18 +12746,18 @@ static int create_map(uint32_t type, uint32_t size_key, return fd; } -static int create_prog_dummy1(void) +static int create_prog_dummy1(enum bpf_map_type prog_type) { struct bpf_insn prog[] = { BPF_MOV64_IMM(BPF_REG_0, 42), BPF_EXIT_INSN(), }; - return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog, + return bpf_load_program(prog_type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0); } -static int create_prog_dummy2(int mfd, int idx) +static int create_prog_dummy2(enum bpf_map_type prog_type, int mfd, int idx) { struct bpf_insn prog[] = { BPF_MOV64_IMM(BPF_REG_3, idx), @@ -12768,11 +12768,12 @@ static int create_prog_dummy2(int mfd, int idx) BPF_EXIT_INSN(), }; - return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog, + return bpf_load_program(prog_type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0); } -static int create_prog_array(uint32_t max_elem, int p1key) +static int create_prog_array(enum bpf_map_type prog_type, uint32_t max_elem, + int p1key) { int p2key = 1; int mfd, p1fd, p2fd; @@ -12784,8 +12785,8 @@ static int create_prog_array(uint32_t max_elem, int p1key) return -1; } - p1fd = create_prog_dummy1(); - p2fd = create_prog_dummy2(mfd, p2key); + p1fd = create_prog_dummy1(prog_type); + p2fd = create_prog_dummy2(prog_type, mfd, p2key); if (p1fd < 0 || p2fd < 0) goto out; if (bpf_map_update_elem(mfd, &p1key, &p1fd, BPF_ANY) < 0) @@ -12842,8 +12843,8 @@ static int create_cgroup_storage(bool percpu) static char bpf_vlog[UINT_MAX >> 8]; -static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, - int *map_fds) +static void do_test_fixup(struct bpf_test *test, enum bpf_map_type prog_type, + struct bpf_insn *prog, int *map_fds) { int *fixup_map1 = test->fixup_map1; int *fixup_map2 = test->fixup_map2; @@ -12899,7 +12900,7 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, } if (*fixup_prog1) { - map_fds[4] = create_prog_array(4, 0); + map_fds[4] = create_prog_array(prog_type, 4, 0); do { prog[*fixup_prog1].imm = map_fds[4]; fixup_prog1++; @@ -12907,7 +12908,7 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, } if (*fixup_prog2) { - map_fds[5] = create_prog_array(8, 7); + map_fds[5] = create_prog_array(prog_type, 8, 7); do { prog[*fixup_prog2].imm = map_fds[5]; fixup_prog2++; @@ -12953,11 +12954,13 @@ static void do_test_single(struct bpf_test *test, bool unpriv, for (i = 0; i < MAX_NR_MAPS; i++) map_fds[i] = -1; - do_test_fixup(test, prog, map_fds); + if (!prog_type) + prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + do_test_fixup(test, prog_type, prog, map_fds); prog_len = probe_filter_length(prog); - fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, - prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT, + fd_prog = bpf_verify_program(prog_type, prog, prog_len, + test->flags & F_LOAD_WITH_STRICT_ALIGNMENT, "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1); expected_ret = unpriv && test->result_unpriv != UNDEF ? -- Gitee From 20de2a0addc05c7085495a2150a1762d499b3de5 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 13 Jun 2023 14:09:21 +0800 Subject: [PATCH 11/21] selftests/bpf: Add tests for reference tracking ANBZ: #5530 commit b584ab8840fdc17f0d75ac62510396b8f657ed8f upstream. reference tracking: leak potential reference reference tracking: leak potential reference on stack reference tracking: leak potential reference on stack 2 reference tracking: zero potential reference reference tracking: copy and zero potential references reference tracking: release reference without check reference tracking: release reference reference tracking: release reference twice reference tracking: release reference twice inside branch reference tracking: alloc, check, free in one subbranch reference tracking: alloc, check, free in both subbranches reference tracking in call: free reference in subprog reference tracking in call: free reference in subprog and outside reference tracking in call: alloc & leak reference in subprog reference tracking in call: alloc in subprog, release outside reference tracking in call: sk_ptr leak into caller stack reference tracking in call: sk_ptr spill into caller stack reference tracking: allow LD_ABS reference tracking: forbid LD_ABS while holding reference reference tracking: allow LD_IND reference tracking: forbid LD_IND while holding reference reference tracking: check reference or tail call reference tracking: release reference then tail call reference tracking: leak possible reference over tail call reference tracking: leak checked reference over tail call reference tracking: mangle and release sock_or_null reference tracking: mangle and release sock reference tracking: access member reference tracking: write to member reference tracking: invalid 64-bit access of member reference tracking: access after release reference tracking: direct access for lookup unpriv: spill/fill of different pointers stx - ctx and sock unpriv: spill/fill of different pointers stx - leak sock unpriv: spill/fill of different pointers stx - sock and ctx (read) unpriv: spill/fill of different pointers stx - sock and ctx (write) Signed-off-by: Joe Stringer Signed-off-by: Daniel Borkmann Signed-off-by: Xuezhi Zhao --- tools/testing/selftests/bpf/test_verifier.c | 759 ++++++++++++++++++++ 1 file changed, 759 insertions(+) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 2ad442742fbc..8906a34da9a1 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -3,6 +3,7 @@ * * Copyright (c) 2014 PLUMgrid, http://plumgrid.com * Copyright (c) 2017 Facebook + * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -178,6 +179,24 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self) self->retval = (uint32_t)res; } +/* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */ +#define BPF_SK_LOOKUP \ + /* struct bpf_sock_tuple tuple = {} */ \ + BPF_MOV64_IMM(BPF_REG_2, 0), \ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8), \ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -16), \ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -24), \ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -32), \ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -40), \ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -48), \ + /* sk = sk_lookup_tcp(ctx, &tuple, sizeof tuple, 0, 0) */ \ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), \ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -48), \ + BPF_MOV64_IMM(BPF_REG_3, sizeof(struct bpf_sock_tuple)), \ + BPF_MOV64_IMM(BPF_REG_4, 0), \ + BPF_MOV64_IMM(BPF_REG_5, 0), \ + BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp) + static struct bpf_test tests[] = { { "add+sub+mul", @@ -2736,6 +2755,137 @@ static struct bpf_test tests[] = { .errstr = "same insn cannot be used with different pointers", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "unpriv: spill/fill of different pointers stx - ctx and sock", + .insns = { + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + /* struct bpf_sock *sock = bpf_sock_lookup(...); */ + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + /* u64 foo; */ + /* void *target = &foo; */ + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + /* if (skb == NULL) *target = sock; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), + /* else *target = skb; */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + /* struct __sk_buff *skb = *target; */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + /* skb->mark = 42; */ + BPF_MOV64_IMM(BPF_REG_3, 42), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, + offsetof(struct __sk_buff, mark)), + /* if (sk) bpf_sk_release(sk) */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "type=ctx expected=sock", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "unpriv: spill/fill of different pointers stx - leak sock", + .insns = { + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + /* struct bpf_sock *sock = bpf_sock_lookup(...); */ + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + /* u64 foo; */ + /* void *target = &foo; */ + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + /* if (skb == NULL) *target = sock; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), + /* else *target = skb; */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + /* struct __sk_buff *skb = *target; */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + /* skb->mark = 42; */ + BPF_MOV64_IMM(BPF_REG_3, 42), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .result = REJECT, + //.errstr = "same insn cannot be used with different pointers", + .errstr = "Unreleased reference", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "unpriv: spill/fill of different pointers stx - sock and ctx (read)", + .insns = { + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + /* struct bpf_sock *sock = bpf_sock_lookup(...); */ + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + /* u64 foo; */ + /* void *target = &foo; */ + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + /* if (skb) *target = skb */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + /* else *target = sock */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), + /* struct bpf_sock *sk = *target; */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + /* if (sk) u32 foo = sk->mark; bpf_sk_release(sk); */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct bpf_sock, mark)), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "same insn cannot be used with different pointers", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "unpriv: spill/fill of different pointers stx - sock and ctx (write)", + .insns = { + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + /* struct bpf_sock *sock = bpf_sock_lookup(...); */ + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + /* u64 foo; */ + /* void *target = &foo; */ + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + /* if (skb) *target = skb */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + /* else *target = sock */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), + /* struct bpf_sock *sk = *target; */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + /* if (sk) sk->mark = 42; bpf_sk_release(sk); */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), + BPF_MOV64_IMM(BPF_REG_3, 42), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, + offsetof(struct bpf_sock, mark)), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + //.errstr = "same insn cannot be used with different pointers", + .errstr = "cannot write into socket", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, { "unpriv: spill/fill of different pointers ldx", .insns = { @@ -12632,6 +12782,214 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, }, + { + "reference tracking: leak potential reference", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), /* leak reference */ + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, + }, + { + "reference tracking: leak potential reference on stack", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, + }, + { + "reference tracking: leak potential reference on stack 2", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, + }, + { + "reference tracking: zero potential reference", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_IMM(BPF_REG_0, 0), /* leak reference */ + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, + }, + { + "reference tracking: copy and zero potential references", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_7, 0), /* leak reference */ + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, + }, + { + "reference tracking: release reference without check", + .insns = { + BPF_SK_LOOKUP, + /* reference in r0 may be NULL */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "type=sock_or_null expected=sock", + .result = REJECT, + }, + { + "reference tracking: release reference", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "reference tracking: release reference 2", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "reference tracking: release reference twice", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "type=inv expected=sock", + .result = REJECT, + }, + { + "reference tracking: release reference twice inside branch", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), /* goto end */ + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "type=inv expected=sock", + .result = REJECT, + }, + { + "reference tracking: alloc, check, free in one subbranch", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 16), + /* if (offsetof(skb, mark) > data_len) exit; */ + BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2, + offsetof(struct __sk_buff, mark)), + BPF_SK_LOOKUP, + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 1), /* mark == 0? */ + /* Leak reference in R0 */ + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, + }, + { + "reference tracking: alloc, check, free in both subbranches", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 16), + /* if (offsetof(skb, mark) > data_len) exit; */ + BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2, + offsetof(struct __sk_buff, mark)), + BPF_SK_LOOKUP, + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 4), /* mark == 0? */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "reference tracking in call: free reference in subprog", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, { "pass modified ctx pointer to helper, 1", .insns = { @@ -12721,6 +13079,407 @@ static struct bpf_test tests[] = { .result_unpriv = REJECT, .result = ACCEPT, }, + { + "reference tracking in call: free reference in subprog and outside", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "type=inv expected=sock", + .result = REJECT, + }, + { + "reference tracking in call: alloc & leak reference in subprog", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_4), + BPF_SK_LOOKUP, + /* spill unchecked sk_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, + }, + { + "reference tracking in call: alloc in subprog, release outside", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_SK_LOOKUP, + BPF_EXIT_INSN(), /* return sk */ + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = POINTER_VALUE, + .result = ACCEPT, + }, + { + "reference tracking in call: sk_ptr leak into caller stack", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), + /* spill unchecked sk_ptr into stack of caller */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_5, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_SK_LOOKUP, + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, + }, + { + "reference tracking in call: sk_ptr spill into caller stack", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + /* spill unchecked sk_ptr into stack of caller */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_5, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + /* now the sk_ptr is verified, free the reference */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_4, 0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_SK_LOOKUP, + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "reference tracking: allow LD_ABS", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "reference tracking: forbid LD_ABS while holding reference", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_SK_LOOKUP, + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "BPF_LD_[ABS|IND] cannot be mixed with socket references", + .result = REJECT, + }, + { + "reference tracking: allow LD_IND", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_IMM(BPF_REG_7, 1), + BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "reference tracking: forbid LD_IND while holding reference", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_7, 1), + BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "BPF_LD_[ABS|IND] cannot be mixed with socket references", + .result = REJECT, + }, + { + "reference tracking: check reference or tail call", + .insns = { + BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), + BPF_SK_LOOKUP, + /* if (sk) bpf_sk_release() */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 7), + /* bpf_tail_call() */ + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 17 }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "reference tracking: release reference then tail call", + .insns = { + BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), + BPF_SK_LOOKUP, + /* if (sk) bpf_sk_release() */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + /* bpf_tail_call() */ + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 18 }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "reference tracking: leak possible reference over tail call", + .insns = { + BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), + /* Look up socket and store in REG_6 */ + BPF_SK_LOOKUP, + /* bpf_tail_call() */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 0), + /* if (sk) bpf_sk_release() */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 16 }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "tail_call would lead to reference leak", + .result = REJECT, + }, + { + "reference tracking: leak checked reference over tail call", + .insns = { + BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), + /* Look up socket and store in REG_6 */ + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* if (!sk) goto end */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + /* bpf_tail_call() */ + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 17 }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "tail_call would lead to reference leak", + .result = REJECT, + }, + { + "reference tracking: mangle and release sock_or_null", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "R1 pointer arithmetic on sock_or_null prohibited", + .result = REJECT, + }, + { + "reference tracking: mangle and release sock", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "R1 pointer arithmetic on sock prohibited", + .result = REJECT, + }, + { + "reference tracking: access member", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "reference tracking: write to member", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_LD_IMM64(BPF_REG_2, 42), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_2, + offsetof(struct bpf_sock, mark)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LD_IMM64(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "cannot write into socket", + .result = REJECT, + }, + { + "reference tracking: invalid 64-bit access of member", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "invalid bpf_sock access off=0 size=8", + .result = REJECT, + }, + { + "reference tracking: access after release", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "!read_ok", + .result = REJECT, + }, + { + "reference tracking: direct access for lookup", + .insns = { + /* Check that the packet is at least 64B long */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 64), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 9), + /* sk = sk_lookup_tcp(ctx, skb->data, ...) */ + BPF_MOV64_IMM(BPF_REG_3, sizeof(struct bpf_sock_tuple)), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, }; static int probe_filter_length(const struct bpf_insn *fp) -- Gitee From fb51aac0e28271c4a08cdc009c7ae722b99e7e91 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Fri, 9 Jun 2023 09:37:35 +0800 Subject: [PATCH 12/21] libbpf: Support loading individual progs ANBZ: #5530 commit 29cd77f41620dca22bdee092217c16b49ece8915 upstream. Allow the individual program load to be invoked. This will help with testing, where a single ELF may contain several sections, some of which denote subprograms that are expected to fail verification, along with some which are expected to pass verification. By allowing programs to be iterated and individually loaded, each program can be independently checked against its expected verification result. Signed-off-by: Joe Stringer Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Xuezhi Zhao --- tools/lib/bpf/libbpf.c | 4 ++-- tools/lib/bpf/libbpf.h | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 18c3984e7e80..a1e015bfce67 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -230,7 +230,7 @@ struct bpf_object { }; #define obj_elf_valid(o) ((o)->efile.elf) -static void bpf_program__unload(struct bpf_program *prog) +void bpf_program__unload(struct bpf_program *prog) { int i; @@ -1377,7 +1377,7 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type, return ret; } -static int +int bpf_program__load(struct bpf_program *prog, char *license, u32 kern_version) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 511c1294dcbf..2ed24d3f80b3 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -128,10 +128,13 @@ void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex); const char *bpf_program__title(struct bpf_program *prog, bool needs_copy); +int bpf_program__load(struct bpf_program *prog, char *license, + u32 kern_version); int bpf_program__fd(struct bpf_program *prog); int bpf_program__pin_instance(struct bpf_program *prog, const char *path, int instance); int bpf_program__pin(struct bpf_program *prog, const char *path); +void bpf_program__unload(struct bpf_program *prog); struct bpf_insn; -- Gitee From 96c43f920ed3ccf0081ebf9141f90927bc6d9e18 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 13 Jul 2023 10:02:50 +0800 Subject: [PATCH 13/21] selftests/bpf: Add C tests for reference tracking ANBZ: #5530 commit de375f4e91e3074a09ad337ac4b32a2f21c24d96 upstream. Add some tests that demonstrate and test the balanced lookup/free nature of socket lookup. Section names that start with "fail" represent programs that are expected to fail verification; all others should succeed. Signed-off-by: Joe Stringer Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Xuezhi Zhao --- tools/testing/selftests/bpf/Makefile | 2 +- tools/testing/selftests/bpf/test_progs.c | 38 ++++ .../selftests/bpf/test_sk_lookup_kern.c | 180 ++++++++++++++++++ 3 files changed, 219 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/test_sk_lookup_kern.c diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 794490e414c7..1dec3ab2fcde 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -36,7 +36,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \ test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \ get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \ - test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o + test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o test_sk_lookup_kern.o # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 991bbd97c630..fb540beef324 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1728,6 +1728,43 @@ static void test_task_fd_query_tp(void) "sys_enter_read"); } +static void test_reference_tracking() +{ + const char *file = "./test_sk_lookup_kern.o"; + struct bpf_object *obj; + struct bpf_program *prog; + __u32 duration; + int err = 0; + + obj = bpf_object__open(file); + if (IS_ERR(obj)) { + error_cnt++; + return; + } + + bpf_object__for_each_program(prog, obj) { + const char *title; + + /* Ignore .text sections */ + title = bpf_program__title(prog, false); + if (strstr(title, ".text") != NULL) + continue; + + bpf_program__set_type(prog, BPF_PROG_TYPE_SCHED_CLS); + + /* Expect verifier failure if test name has 'fail' */ + if (strstr(title, "fail") != NULL) { + libbpf_set_print(NULL, NULL, NULL); + err = !bpf_program__load(prog, "GPL", 0); + libbpf_set_print(printf, printf, NULL); + } else { + err = bpf_program__load(prog, "GPL", 0); + } + CHECK(err, title, "\n"); + } + bpf_object__close(obj); +} + int main(void) { jit_enabled = is_jit_enabled(); @@ -1749,6 +1786,7 @@ int main(void) test_get_stack_raw_tp(); test_task_fd_query_rawtp(); test_task_fd_query_tp(); + test_reference_tracking(); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; diff --git a/tools/testing/selftests/bpf/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/test_sk_lookup_kern.c new file mode 100644 index 000000000000..b745bdc08c2b --- /dev/null +++ b/tools/testing/selftests/bpf/test_sk_lookup_kern.c @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" +#include "bpf_endian.h" + +int _version SEC("version") = 1; +char _license[] SEC("license") = "GPL"; + +/* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */ +static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off, + void *data_end, __u16 eth_proto, + bool *ipv4) +{ + struct bpf_sock_tuple *result; + __u8 proto = 0; + __u64 ihl_len; + + if (eth_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = (struct iphdr *)(data + nh_off); + + if (iph + 1 > data_end) + return NULL; + ihl_len = iph->ihl * 4; + proto = iph->protocol; + *ipv4 = true; + result = (struct bpf_sock_tuple *)&iph->saddr; + } else if (eth_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + nh_off); + + if (ip6h + 1 > data_end) + return NULL; + ihl_len = sizeof(*ip6h); + proto = ip6h->nexthdr; + *ipv4 = true; + result = (struct bpf_sock_tuple *)&ip6h->saddr; + } + + if (data + nh_off + ihl_len > data_end || proto != IPPROTO_TCP) + return NULL; + + return result; +} + +SEC("sk_lookup_success") +int bpf_sk_lookup_test0(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct ethhdr *eth = (struct ethhdr *)(data); + struct bpf_sock_tuple *tuple; + struct bpf_sock *sk; + size_t tuple_len; + bool ipv4; + + if (eth + 1 > data_end) + return TC_ACT_SHOT; + + tuple = get_tuple(data, sizeof(*eth), data_end, eth->h_proto, &ipv4); + if (!tuple || tuple + sizeof *tuple > data_end) + return TC_ACT_SHOT; + + tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6); + sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, 0, 0); + if (sk) + bpf_sk_release(sk); + return sk ? TC_ACT_OK : TC_ACT_UNSPEC; +} + +SEC("sk_lookup_success_simple") +int bpf_sk_lookup_test1(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + struct bpf_sock *sk; + + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + if (sk) + bpf_sk_release(sk); + return 0; +} + +SEC("fail_use_after_free") +int bpf_sk_lookup_uaf(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + struct bpf_sock *sk; + __u32 family = 0; + + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + if (sk) { + bpf_sk_release(sk); + family = sk->family; + } + return family; +} + +SEC("fail_modify_sk_pointer") +int bpf_sk_lookup_modptr(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + struct bpf_sock *sk; + __u32 family; + + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + if (sk) { + sk += 1; + bpf_sk_release(sk); + } + return 0; +} + +SEC("fail_modify_sk_or_null_pointer") +int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + struct bpf_sock *sk; + __u32 family; + + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk += 1; + if (sk) + bpf_sk_release(sk); + return 0; +} + +SEC("fail_no_release") +int bpf_sk_lookup_test2(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + + bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + return 0; +} + +SEC("fail_release_twice") +int bpf_sk_lookup_test3(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + struct bpf_sock *sk; + + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + bpf_sk_release(sk); + bpf_sk_release(sk); + return 0; +} + +SEC("fail_release_unchecked") +int bpf_sk_lookup_test4(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + struct bpf_sock *sk; + + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + bpf_sk_release(sk); + return 0; +} + +void lookup_no_release(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); +} + +SEC("fail_no_release_subcall") +int bpf_sk_lookup_test5(struct __sk_buff *skb) +{ + lookup_no_release(skb); + return 0; +} -- Gitee From 65dc1e453abfa2c02b568efb3c19f5a31bc3f942 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 Jun 2023 09:46:46 +0800 Subject: [PATCH 14/21] bpf: fix building without CONFIG_INET ANBZ: #5530 commit df3f94a0bbeb6cb6a02eb16b8e76f16b33cb2f8f upstream. The newly added TCP and UDP handling fails to link when CONFIG_INET is disabled: net/core/filter.o: In function `sk_lookup': filter.c:(.text+0x7ff8): undefined reference to `tcp_hashinfo' filter.c:(.text+0x7ffc): undefined reference to `tcp_hashinfo' filter.c:(.text+0x8020): undefined reference to `__inet_lookup_established' filter.c:(.text+0x8058): undefined reference to `__inet_lookup_listener' filter.c:(.text+0x8068): undefined reference to `udp_table' filter.c:(.text+0x8070): undefined reference to `udp_table' filter.c:(.text+0x808c): undefined reference to `__udp4_lib_lookup' net/core/filter.o: In function `bpf_sk_release': filter.c:(.text+0x82e8): undefined reference to `sock_gen_put' Wrap the related sections of code in #ifdefs for the config option. Furthermore, sk_lookup() should always have been marked 'static', this also avoids a warning about a missing prototype when building with 'make W=1'. Fixes: 6acc9b432e67 ("bpf: Add helper to retrieve socket in BPF") Signed-off-by: Arnd Bergmann Signed-off-by: Joe Stringer Acked-by: Song Liu Signed-off-by: Daniel Borkmann Signed-off-by: Xuezhi Zhao --- net/core/filter.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 9fa3b5b53141..5222a44e543d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4797,8 +4797,9 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = { }; #endif /* CONFIG_IPV6_SEG6_BPF */ -struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, - struct sk_buff *skb, u8 family, u8 proto) +#ifdef CONFIG_INET +static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, + struct sk_buff *skb, u8 family, u8 proto) { int dif = skb->dev->ifindex; bool refcounted = false; @@ -4931,6 +4932,7 @@ static const struct bpf_func_proto bpf_sk_release_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_SOCKET, }; +#endif /* CONFIG_INET */ bool bpf_helper_changes_pkt_data(void *func) { @@ -5138,12 +5140,14 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_skb_ancestor_cgroup_id: return &bpf_skb_ancestor_cgroup_id_proto; #endif +#ifdef CONFIG_INET case BPF_FUNC_sk_lookup_tcp: return &bpf_sk_lookup_tcp_proto; case BPF_FUNC_sk_lookup_udp: return &bpf_sk_lookup_udp_proto; case BPF_FUNC_sk_release: return &bpf_sk_release_proto; +#endif default: return bpf_base_func_proto(func_id); } @@ -5244,12 +5248,14 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_redirect_hash_proto; case BPF_FUNC_get_local_storage: return &bpf_get_local_storage_proto; +#ifdef CONFIG_INET case BPF_FUNC_sk_lookup_tcp: return &bpf_sk_lookup_tcp_proto; case BPF_FUNC_sk_lookup_udp: return &bpf_sk_lookup_udp_proto; case BPF_FUNC_sk_release: return &bpf_sk_release_proto; +#endif default: return bpf_base_func_proto(func_id); } -- Gitee From 52b8e43e31a462d0e41c09942e930bcff41e3079 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Fri, 9 Jun 2023 09:57:50 +0800 Subject: [PATCH 15/21] bpf: Fix dev pointer dereference from sk_skb ANBZ: #5530 commit 67e89ac32828a29adc74e5c9bd59bd70943466f0 upstream. Dan Carpenter reports: The patch 6acc9b432e67: "bpf: Add helper to retrieve socket in BPF" from Oct 2, 2018, leads to the following Smatch complaint: net/core/filter.c:4893 bpf_sk_lookup() error: we previously assumed 'skb->dev' could be null (see line 4885) Fix this issue by checking skb->dev before using it. Signed-off-by: Joe Stringer Signed-off-by: Alexei Starovoitov Signed-off-by: Xuezhi Zhao --- net/core/filter.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index 5222a44e543d..fa4a41152e82 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4801,9 +4801,12 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = { static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, struct sk_buff *skb, u8 family, u8 proto) { - int dif = skb->dev->ifindex; bool refcounted = false; struct sock *sk = NULL; + int dif = 0; + + if (skb->dev) + dif = skb->dev->ifindex; if (family == AF_INET) { __be32 src4 = tuple->ipv4.saddr; -- Gitee From 2e2bee60b8dcfb10944c05e4a20dc938364be52d Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Fri, 9 Jun 2023 10:10:03 +0800 Subject: [PATCH 16/21] bpf: Allow sk_lookup with IPv6 module ANBZ: #5530 commit 8a615c6b0352a9ec56151b6c95d68e0a2eef5cf0 upstream. This is a more complete fix than d71019b54bff ("net: core: Fix build with CONFIG_IPV6=m"), so that IPv6 sockets may be looked up if the IPv6 module is loaded (not just if it's compiled in). Signed-off-by: Joe Stringer Signed-off-by: Alexei Starovoitov Signed-off-by: Xuezhi Zhao --- include/net/addrconf.h | 5 +++++ net/core/filter.c | 10 ++++++---- net/ipv6/af_inet6.c | 1 + 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index c8d5bb8b3616..ef6e4e6d612d 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -267,6 +267,11 @@ extern const struct ipv6_stub *ipv6_stub __read_mostly; struct ipv6_bpf_stub { int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len, bool force_bind_address_no_port, bool with_lock); + struct sock *(*udp6_lib_lookup)(struct net *net, + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, __be16 dport, + int dif, int sdif, struct udp_table *tbl, + struct sk_buff *skb); }; extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; diff --git a/net/core/filter.c b/net/core/filter.c index fa4a41152e82..da8e887274cc 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4833,10 +4833,12 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, src6, tuple->ipv6.sport, dst6, tuple->ipv6.dport, dif, sdif, &refcounted); - else - sk = __udp6_lib_lookup(net, src6, tuple->ipv6.sport, - dst6, tuple->ipv6.dport, - dif, sdif, &udp_table, skb); + else if (likely(ipv6_bpf_stub)) + sk = ipv6_bpf_stub->udp6_lib_lookup(net, + src6, tuple->ipv6.sport, + dst6, tuple->ipv6.dport, + dif, sdif, + &udp_table, skb); #endif } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index f3add0251786..8db31689cb69 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -918,6 +918,7 @@ static const struct ipv6_stub ipv6_stub_impl = { static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { .inet6_bind = __inet6_bind, + .udp6_lib_lookup = __udp6_lib_lookup, }; static int __init inet6_init(void) -- Gitee From fee17ddea78dc58519359e4ca74396c41fbcee5f Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Fri, 9 Jun 2023 10:20:14 +0800 Subject: [PATCH 17/21] bpf: Fix IPv6 dport byte-order in bpf_sk_lookup ANBZ: #5530 commit 5ef0ae84f02a4dbe0e09f89c6481ac13649cb19b upstream. Commit 6acc9b432e67 ("bpf: Add helper to retrieve socket in BPF") mistakenly passed the destination port in network byte-order to the IPv6 TCP/UDP socket lookup functions, which meant that BPF writers would need to either manually swap the byte-order of this field or otherwise IPv6 sockets could not be located via this helper. Fix the issue by swapping the byte-order appropriately in the helper. This also makes the API more consistent with the IPv4 version. Fixes: 6acc9b432e67 ("bpf: Add helper to retrieve socket in BPF") Signed-off-by: Joe Stringer Signed-off-by: Alexei Starovoitov Signed-off-by: Xuezhi Zhao --- net/core/filter.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index da8e887274cc..9296294e833c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4826,17 +4826,18 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, } else { struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr; struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr; + u16 hnum = ntohs(tuple->ipv6.dport); int sdif = inet6_sdif(skb); if (proto == IPPROTO_TCP) sk = __inet6_lookup(net, &tcp_hashinfo, skb, 0, src6, tuple->ipv6.sport, - dst6, tuple->ipv6.dport, + dst6, hnum, dif, sdif, &refcounted); else if (likely(ipv6_bpf_stub)) sk = ipv6_bpf_stub->udp6_lib_lookup(net, src6, tuple->ipv6.sport, - dst6, tuple->ipv6.dport, + dst6, hnum, dif, sdif, &udp_table, skb); #endif -- Gitee From 392403fccbb7e4bb115b8739dbe4682719cbc0fb Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 9 Jun 2023 10:26:34 +0800 Subject: [PATCH 18/21] bpf, verifier: fix register type dump in xadd and st ANBZ: #5530 commit 2a159c6f82381a458bc56e7e202b6bee57a2ccb7 upstream. Using reg_type_str[insn->dst_reg] is incorrect since insn->dst_reg contains the register number but not the actual register type. Add a small reg_state() helper and use it to get to the type. Also fix up the test_verifier test cases that have an incorrect errstr. Fixes: 9d2be44a7f33 ("bpf: Reuse canonical string formatter for ctx errs") Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: Xuezhi Zhao --- kernel/bpf/verifier.c | 19 +++++++++++++------ tools/testing/selftests/bpf/test_verifier.c | 10 +++++----- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e9639920d062..09c427597e47 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1670,14 +1670,19 @@ static bool __is_pointer_value(bool allow_ptr_leaks, return reg->type != SCALAR_VALUE; } +static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno) +{ + return cur_regs(env) + regno; +} + static bool is_pointer_value(struct bpf_verifier_env *env, int regno) { - return __is_pointer_value(env->allow_ptr_leaks, cur_regs(env) + regno); + return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno)); } static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) { - const struct bpf_reg_state *reg = cur_regs(env) + regno; + const struct bpf_reg_state *reg = reg_state(env, regno); return reg->type == PTR_TO_CTX || reg->type == PTR_TO_SOCKET; @@ -1685,7 +1690,7 @@ static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) static bool is_pkt_reg(struct bpf_verifier_env *env, int regno) { - const struct bpf_reg_state *reg = cur_regs(env) + regno; + const struct bpf_reg_state *reg = reg_state(env, regno); return type_is_pkt_pointer(reg->type); } @@ -2086,7 +2091,8 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins if (is_ctx_reg(env, insn->dst_reg) || is_pkt_reg(env, insn->dst_reg)) { verbose(env, "BPF_XADD stores into R%d %s is not allowed\n", - insn->dst_reg, reg_type_str[insn->dst_reg]); + insn->dst_reg, + reg_type_str[reg_state(env, insn->dst_reg)->type]); return -EACCES; } @@ -2111,7 +2117,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, int access_size, bool zero_size_allowed, struct bpf_call_arg_meta *meta) { - struct bpf_reg_state *reg = cur_regs(env) + regno; + struct bpf_reg_state *reg = reg_state(env, regno); struct bpf_func_state *state = func(env, reg); int off, i, slot, spi; @@ -5816,7 +5822,8 @@ static int do_check(struct bpf_verifier_env *env) if (is_ctx_reg(env, insn->dst_reg)) { verbose(env, "BPF_ST stores into R%d %s is not allowed\n", - insn->dst_reg, reg_type_str[insn->dst_reg]); + insn->dst_reg, + reg_type_str[reg_state(env, insn->dst_reg)->type]); return -EACCES; } diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 8906a34da9a1..bfe815c5c1e2 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -3485,7 +3485,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0), BPF_EXIT_INSN(), }, - .errstr = "BPF_ST stores into R1 inv is not allowed", + .errstr = "BPF_ST stores into R1 ctx is not allowed", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -3497,7 +3497,7 @@ static struct bpf_test tests[] = { BPF_REG_0, offsetof(struct __sk_buff, mark), 0), BPF_EXIT_INSN(), }, - .errstr = "BPF_XADD stores into R1 inv is not allowed", + .errstr = "BPF_XADD stores into R1 ctx is not allowed", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -5475,7 +5475,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R2 leaks addr into mem", .result_unpriv = REJECT, .result = REJECT, - .errstr = "BPF_XADD stores into R1 inv is not allowed", + .errstr = "BPF_XADD stores into R1 ctx is not allowed", }, { "leak pointer into ctx 2", @@ -5490,7 +5490,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R10 leaks addr into mem", .result_unpriv = REJECT, .result = REJECT, - .errstr = "BPF_XADD stores into R1 inv is not allowed", + .errstr = "BPF_XADD stores into R1 ctx is not allowed", }, { "leak pointer into ctx 3", @@ -12455,7 +12455,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "BPF_XADD stores into R2 ctx", + .errstr = "BPF_XADD stores into R2 pkt is not allowed", .prog_type = BPF_PROG_TYPE_XDP, }, { -- Gitee From d28ea7e61a68a61e762c729c3e8831326bd49f50 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Tue, 18 Jul 2023 17:32:04 +0800 Subject: [PATCH 19/21] bpf: Fix IPv6 dport byte order in bpf_sk_lookup_udp ANBZ: #5530 commit cac6cc2f5ac710334ae0f6bba5630d791c253574 upstream. Lookup functions in sk_lookup have different expectations about byte order of provided arguments. Specifically __inet_lookup, __udp4_lib_lookup and __udp6_lib_lookup expect dport to be in network byte order and do ntohs(dport) internally. At the same time __inet6_lookup expects dport to be in host byte order and correspondingly name the argument hnum. sk_lookup works correctly with __inet_lookup, __udp4_lib_lookup and __inet6_lookup with regard to dport. But in __udp6_lib_lookup case it uses host instead of expected network byte order. It makes result returned by bpf_sk_lookup_udp for IPv6 incorrect. The patch fixes byte order of dport passed to __udp6_lib_lookup. Originally sk_lookup properly handled UDPv6, but not TCPv6. 5ef0ae84f02a fixes TCPv6 but breaks UDPv6. Fixes: 5ef0ae84f02a ("bpf: Fix IPv6 dport byte-order in bpf_sk_lookup") Signed-off-by: Andrey Ignatov Acked-by: Joe Stringer Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Signed-off-by: Xuezhi Zhao --- net/core/filter.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 9296294e833c..311bb1a882ba 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4826,18 +4826,17 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, } else { struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr; struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr; - u16 hnum = ntohs(tuple->ipv6.dport); int sdif = inet6_sdif(skb); if (proto == IPPROTO_TCP) sk = __inet6_lookup(net, &tcp_hashinfo, skb, 0, src6, tuple->ipv6.sport, - dst6, hnum, + dst6, ntohs(tuple->ipv6.dport), dif, sdif, &refcounted); else if (likely(ipv6_bpf_stub)) sk = ipv6_bpf_stub->udp6_lib_lookup(net, src6, tuple->ipv6.sport, - dst6, hnum, + dst6, tuple->ipv6.dport, dif, sdif, &udp_table, skb); #endif -- Gitee From a0d6facee805f00082816f8e88580878648b87be Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Fri, 30 Nov 2018 15:32:20 -0800 Subject: [PATCH 20/21] bpf: Support sk lookup in netns with id 0 ANBZ: #5530 commit f71c6143c2038df1cb43a4b9c90740d14f77467c upstream. David Ahern and Nicolas Dichtel report that the handling of the netns id 0 is incorrect for the BPF socket lookup helpers: rather than finding the netns with id 0, it is resolving to the current netns. This renders the netns_id 0 inaccessible. To fix this, adjust the API for the netns to treat all negative s32 values as a lookup in the current netns (including u64 values which when truncated to s32 become negative), while any values with a positive value in the signed 32-bit integer space would result in a lookup for a socket in the netns corresponding to that id. As before, if the netns with that ID does not exist, no socket will be found. Any netns outside of these ranges will fail to find a corresponding socket, as those values are reserved for future usage. Signed-off-by: Joe Stringer Acked-by: Nicolas Dichtel Acked-by: Joey Pabalinas Signed-off-by: Alexei Starovoitov Signed-off-by: Xuezhi Zhao --- include/uapi/linux/bpf.h | 35 ++++++++++------- net/core/filter.c | 11 +++--- tools/include/uapi/linux/bpf.h | 39 ++++++++++++------- tools/testing/selftests/bpf/bpf_helpers.h | 4 +- .../selftests/bpf/test_sk_lookup_kern.c | 18 ++++----- 5 files changed, 63 insertions(+), 44 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4eff772742b0..4fa8c9d90e2d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2150,7 +2150,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description * Look for TCP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, @@ -2167,12 +2167,14 @@ union bpf_attr { * **sizeof**\ (*tuple*\ **->ipv6**) * Look for an IPv6 socket. * - * If the *netns* is zero, then the socket lookup table in the - * netns associated with the *ctx* will be used. For the TC hooks, - * this in the netns of the device in the skb. For socket hooks, - * this in the netns of the socket. If *netns* is non-zero, then - * it specifies the ID of the netns relative to the netns - * associated with the *ctx*. + * If the *netns* is a negative signed 32-bit integer, then the + * socket lookup table in the netns associated with the *ctx* will + * will be used. For the TC hooks, this is the netns of the device + * in the skb. For socket hooks, this is the netns of the socket. + * If *netns* is any other signed 32-bit value greater than or + * equal to zero then it specifies the ID of the netns relative to + * the netns associated with the *ctx*. *netns* values beyond the + * range of 32-bit integers are reserved for future use. * * All values for *flags* are reserved for future usage, and must * be left at zero. @@ -2182,7 +2184,7 @@ union bpf_attr { * Return * Pointer to *struct bpf_sock*, or NULL in case of failure. * - * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description * Look for UDP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, @@ -2199,12 +2201,14 @@ union bpf_attr { * **sizeof**\ (*tuple*\ **->ipv6**) * Look for an IPv6 socket. * - * If the *netns* is zero, then the socket lookup table in the - * netns associated with the *ctx* will be used. For the TC hooks, - * this in the netns of the device in the skb. For socket hooks, - * this in the netns of the socket. If *netns* is non-zero, then - * it specifies the ID of the netns relative to the netns - * associated with the *ctx*. + * If the *netns* is a negative signed 32-bit integer, then the + * socket lookup table in the netns associated with the *ctx* will + * will be used. For the TC hooks, this is the netns of the device + * in the skb. For socket hooks, this is the netns of the socket. + * If *netns* is any other signed 32-bit value greater than or + * equal to zero then it specifies the ID of the netns relative to + * the netns associated with the *ctx*. *netns* values beyond the + * range of 32-bit integers are reserved for future use. * * All values for *flags* are reserved for future usage, and must * be left at zero. @@ -2364,6 +2368,9 @@ enum bpf_func_id { /* BPF_FUNC_perf_event_output for sk_buff input context. */ #define BPF_F_CTXLEN_MASK (0xfffffULL << 32) +/* Current network namespace */ +#define BPF_F_CURRENT_NETNS (-1L) + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, diff --git a/net/core/filter.c b/net/core/filter.c index 311bb1a882ba..4af6c9c4c961 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4864,22 +4864,23 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, struct net *net; family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6; - if (unlikely(family == AF_UNSPEC || netns_id > U32_MAX || flags)) + if (unlikely(family == AF_UNSPEC || flags || + !((s32)netns_id < 0 || netns_id <= S32_MAX))) goto out; if (skb->dev) caller_net = dev_net(skb->dev); else caller_net = sock_net(skb->sk); - if (netns_id) { + if ((s32)netns_id < 0) { + net = caller_net; + sk = sk_lookup(net, tuple, skb, family, proto); + } else { net = get_net_ns_by_id(caller_net, netns_id); if (unlikely(!net)) goto out; sk = sk_lookup(net, tuple, skb, family, proto); put_net(net); - } else { - net = caller_net; - sk = sk_lookup(net, tuple, skb, family, proto); } if (sk) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index b2acbe007423..1f5c94f443ca 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2145,7 +2145,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description * Look for TCP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, @@ -2162,12 +2162,14 @@ union bpf_attr { * **sizeof**\ (*tuple*\ **->ipv6**) * Look for an IPv6 socket. * - * If the *netns* is zero, then the socket lookup table in the - * netns associated with the *ctx* will be used. For the TC hooks, - * this in the netns of the device in the skb. For socket hooks, - * this in the netns of the socket. If *netns* is non-zero, then - * it specifies the ID of the netns relative to the netns - * associated with the *ctx*. + * If the *netns* is a negative signed 32-bit integer, then the + * socket lookup table in the netns associated with the *ctx* will + * will be used. For the TC hooks, this is the netns of the device + * in the skb. For socket hooks, this is the netns of the socket. + * If *netns* is any other signed 32-bit value greater than or + * equal to zero then it specifies the ID of the netns relative to + * the netns associated with the *ctx*. *netns* values beyond the + * range of 32-bit integers are reserved for future use. * * All values for *flags* are reserved for future usage, and must * be left at zero. @@ -2176,8 +2178,10 @@ union bpf_attr { * **CONFIG_NET** configuration option. * Return * Pointer to *struct bpf_sock*, or NULL in case of failure. + * For sockets with reuseport option, *struct bpf_sock* + * return is from reuse->socks[] using hash of the packet. * - * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description * Look for UDP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, @@ -2194,12 +2198,14 @@ union bpf_attr { * **sizeof**\ (*tuple*\ **->ipv6**) * Look for an IPv6 socket. * - * If the *netns* is zero, then the socket lookup table in the - * netns associated with the *ctx* will be used. For the TC hooks, - * this in the netns of the device in the skb. For socket hooks, - * this in the netns of the socket. If *netns* is non-zero, then - * it specifies the ID of the netns relative to the netns - * associated with the *ctx*. + * If the *netns* is a negative signed 32-bit integer, then the + * socket lookup table in the netns associated with the *ctx* will + * will be used. For the TC hooks, this is the netns of the device + * in the skb. For socket hooks, this is the netns of the socket. + * If *netns* is any other signed 32-bit value greater than or + * equal to zero then it specifies the ID of the netns relative to + * the netns associated with the *ctx*. *netns* values beyond the + * range of 32-bit integers are reserved for future use. * * All values for *flags* are reserved for future usage, and must * be left at zero. @@ -2208,6 +2214,8 @@ union bpf_attr { * **CONFIG_NET** configuration option. * Return * Pointer to *struct bpf_sock*, or NULL in case of failure. + * For sockets with reuseport option, *struct bpf_sock* + * return is from reuse->socks[] using hash of the packet. * * int bpf_sk_release(struct bpf_sock *sk) * Description @@ -2359,6 +2367,9 @@ enum bpf_func_id { /* BPF_FUNC_perf_event_output for sk_buff input context. */ #define BPF_F_CTXLEN_MASK (0xfffffULL << 32) +/* Current network namespace */ +#define BPF_F_CURRENT_NETNS (-1L) + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 1d407b3494f9..8962331ed05f 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -145,12 +145,12 @@ static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) = (void *) BPF_FUNC_skb_ancestor_cgroup_id; static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx, struct bpf_sock_tuple *tuple, - int size, unsigned int netns_id, + int size, unsigned long long netns_id, unsigned long long flags) = (void *) BPF_FUNC_sk_lookup_tcp; static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx, struct bpf_sock_tuple *tuple, - int size, unsigned int netns_id, + int size, unsigned long long netns_id, unsigned long long flags) = (void *) BPF_FUNC_sk_lookup_udp; static int (*bpf_sk_release)(struct bpf_sock *sk) = diff --git a/tools/testing/selftests/bpf/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/test_sk_lookup_kern.c index b745bdc08c2b..e21cd736c196 100644 --- a/tools/testing/selftests/bpf/test_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/test_sk_lookup_kern.c @@ -72,7 +72,7 @@ int bpf_sk_lookup_test0(struct __sk_buff *skb) return TC_ACT_SHOT; tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6); - sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, 0, 0); + sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0); if (sk) bpf_sk_release(sk); return sk ? TC_ACT_OK : TC_ACT_UNSPEC; @@ -84,7 +84,7 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb) struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); if (sk) bpf_sk_release(sk); return 0; @@ -97,7 +97,7 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb) struct bpf_sock *sk; __u32 family = 0; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); if (sk) { bpf_sk_release(sk); family = sk->family; @@ -112,7 +112,7 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb) struct bpf_sock *sk; __u32 family; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); if (sk) { sk += 1; bpf_sk_release(sk); @@ -127,7 +127,7 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb) struct bpf_sock *sk; __u32 family; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); sk += 1; if (sk) bpf_sk_release(sk); @@ -139,7 +139,7 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; - bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); return 0; } @@ -149,7 +149,7 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb) struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); bpf_sk_release(sk); bpf_sk_release(sk); return 0; @@ -161,7 +161,7 @@ int bpf_sk_lookup_test4(struct __sk_buff *skb) struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); bpf_sk_release(sk); return 0; } @@ -169,7 +169,7 @@ int bpf_sk_lookup_test4(struct __sk_buff *skb) void lookup_no_release(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; - bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); } SEC("fail_no_release_subcall") -- Gitee From c8baefec4175293d34293aff6f3ff71cfe2ac0b1 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 9 Jun 2023 10:33:02 +0800 Subject: [PATCH 21/21] bpf: Fix narrow load on a bpf_sock returned from sk_lookup() ANBZ: #5530 commit 5f4566498dee5e38e36a015a968c22ed21568f0b upstream. By adding this test to test_verifier: { "reference tracking: access sk->src_ip4 (narrow load)", .insns = { BPF_SK_LOOKUP, BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, src_ip4) + 2), BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), BPF_EMIT_CALL(BPF_FUNC_sk_release), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, }, The above test loads 2 bytes from sk->src_ip4 where sk is obtained by bpf_sk_lookup_tcp(). It hits an internal verifier error from convert_ctx_accesses(): [root@arch-fb-vm1 bpf]# ./test_verifier 665 665 Failed to load prog 'Invalid argument'! 0: (b7) r2 = 0 1: (63) *(u32 *)(r10 -8) = r2 2: (7b) *(u64 *)(r10 -16) = r2 3: (7b) *(u64 *)(r10 -24) = r2 4: (7b) *(u64 *)(r10 -32) = r2 5: (7b) *(u64 *)(r10 -40) = r2 6: (7b) *(u64 *)(r10 -48) = r2 7: (bf) r2 = r10 8: (07) r2 += -48 9: (b7) r3 = 36 10: (b7) r4 = 0 11: (b7) r5 = 0 12: (85) call bpf_sk_lookup_tcp#84 13: (bf) r6 = r0 14: (15) if r0 == 0x0 goto pc+3 R0=sock(id=1,off=0,imm=0) R6=sock(id=1,off=0,imm=0) R10=fp0,call_-1 fp-8=????0000 fp-16=0000mmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm refs=1 15: (69) r2 = *(u16 *)(r0 +26) 16: (bf) r1 = r6 17: (85) call bpf_sk_release#86 18: (95) exit from 14 to 18: safe processed 20 insns (limit 131072), stack depth 48 bpf verifier is misconfigured Summary: 0 PASSED, 0 SKIPPED, 1 FAILED The bpf_sock_is_valid_access() is expecting src_ip4 can be narrowly loaded (meaning load any 1 or 2 bytes of the src_ip4) by marking info->ctx_field_size. However, this marked ctx_field_size is not used. This patch fixes it. Due to the recent refactoring in test_verifier, this new test will be added to the bpf-next branch (together with the bpf_tcp_sock patchset) to avoid merge conflict. Fixes: c64b7983288e ("bpf: Add PTR_TO_SOCKET verifier type") Cc: Joe Stringer Signed-off-by: Martin KaFai Lau Acked-by: Joe Stringer Signed-off-by: Alexei Starovoitov Signed-off-by: Xuezhi Zhao --- kernel/bpf/verifier.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 09c427597e47..012fe5e88413 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1639,12 +1639,13 @@ static int check_flow_keys_access(struct bpf_verifier_env *env, int off, return 0; } -static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off, - int size, enum bpf_access_type t) +static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, + u32 regno, int off, int size, + enum bpf_access_type t) { struct bpf_reg_state *regs = cur_regs(env); struct bpf_reg_state *reg = ®s[regno]; - struct bpf_insn_access_aux info; + struct bpf_insn_access_aux info = {}; if (reg->smin_value < 0) { verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", @@ -1658,6 +1659,8 @@ static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off, return -EACCES; } + env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; + return 0; } @@ -2046,7 +2049,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn verbose(env, "cannot write into socket\n"); return -EACCES; } - err = check_sock_access(env, regno, off, size, t); + err = check_sock_access(env, insn_idx, regno, off, size, t); if (!err && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); } else { -- Gitee