diff --git a/MAINTAINERS b/MAINTAINERS index 459fdf97ff7542027492fcae1e4fdae92373e351..6fedb88397cf7eec6898a0505cc3babe12feb615 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10117,6 +10117,7 @@ L: linux-block@vger.kernel.org L: nbd@other.debian.org F: Documentation/blockdev/nbd.txt F: drivers/block/nbd.c +F: include/trace/events/nbd.h F: include/uapi/linux/nbd.h NETWORK DROP MONITOR diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 79ec7add5f98fbaa5390bac9571f2f61652fa8c1..91690778faf9762df6496bcb4fcd8462743f2923 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -290,6 +290,8 @@ static inline bool nmi_uaccess_okay(void) return true; } +#define nmi_uaccess_okay nmi_uaccess_okay + /* Initialize cr4 shadow for this CPU. */ static inline void cr4_init_shadow(void) { diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 2a9138dab13e5a62f4a55ff9b2c2d9a110b7f897..e6f836f1da041ca035a16d80ae694f4312e310dd 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -44,6 +44,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include + static DEFINE_IDR(nbd_index_idr); static DEFINE_MUTEX(nbd_index_mutex); static int nbd_total_devices = 0; @@ -513,6 +516,10 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) if (sent) { if (sent >= sizeof(request)) { skip = sent - sizeof(request); + + /* initialize handle for tracing purposes */ + handle = nbd_cmd_handle(cmd); + goto send_pages; } iov_iter_advance(&from, sent); @@ -529,11 +536,14 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) handle = nbd_cmd_handle(cmd); memcpy(request.handle, &handle, sizeof(handle)); + trace_nbd_send_request(&request, nbd->index, blk_mq_rq_from_pdu(cmd)); + dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n", req, nbdcmd_to_ascii(type), (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); result = sock_xmit(nbd, index, 1, &from, (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent); + trace_nbd_header_sent(req, handle); if (result <= 0) { if (was_interrupted(result)) { /* If we havne't sent anything we can just return BUSY, @@ -606,6 +616,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) bio = next; } out: + trace_nbd_payload_sent(req, handle); nsock->pending = NULL; nsock->sent = 0; return 0; @@ -653,6 +664,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) tag, req); return ERR_PTR(-ENOENT); } + trace_nbd_header_received(req, handle); cmd = blk_mq_rq_to_pdu(req); mutex_lock(&cmd->lock); @@ -712,6 +724,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) } } out: + trace_nbd_payload_received(req, handle); mutex_unlock(&cmd->lock); return ret ? ERR_PTR(ret) : cmd; } diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index db72ad39853b9874e0d3e999edc284ae6efb6ac4..9c1c3b35bd309f8ed10336a09d4a94813ea4fed0 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -20,6 +20,15 @@ #include #include +/* + * Blindly accessing user memory from NMI context can be dangerous + * if we're in the middle of switching the current user task or switching + * the loaded mm. + */ +#ifndef nmi_uaccess_okay +# define nmi_uaccess_okay() true +#endif + #ifdef CONFIG_HAVE_RCU_TABLE_FREE /* * Semi RCU freeing of the page directories. diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a8653137b52cdeaed56eec9678c465e99a1f06fd..e9fde0bdfe0b4d0255e032b8a5f48e25f45f059b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -280,6 +280,7 @@ enum bpf_reg_type { PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */ PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */ PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ + PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */ }; /* The information passed from prog-specific *_is_valid_access @@ -369,6 +370,7 @@ struct bpf_prog_aux { u32 used_map_cnt; u32 max_ctx_offset; u32 max_pkt_offset; + u32 max_tp_access; u32 stack_depth; u32 id; u32 func_cnt; /* used by non-func prog as the number of func progs */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index eb1e4f867a5e35aee66cb51293ae30bde9a99641..5a9975678d6fcac4d54138826d2de88d2d6ebf0f 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -25,6 +25,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint) +BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable) #endif #ifdef CONFIG_CGROUP_BPF BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 346a1dc18218e1ccbbdfe0a264e12c73682ce6a9..e590137e7f27136f88eca6061f1de1fa220e3e5d 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -187,6 +187,7 @@ struct bpf_func_state { struct bpf_verifier_state { /* call stack tracking */ struct bpf_func_state *frame[MAX_CALL_FRAMES]; + u32 insn_idx; u32 curframe; u32 active_spin_lock; bool speculative; @@ -234,6 +235,7 @@ struct bpf_insn_aux_data { int sanitize_stack_off; /* stack slot to be cleared */ bool seen; /* this insn was processed by the verifier */ u8 alu_state; /* used in combination with alu_limit */ + bool prune_point; unsigned int orig_idx; /* original instruction index */ }; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 16523a7c4f18b6fb315daa5727a51320995d725a..b32df2de599357cfa9ff789c24c16d23ee88a956 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1216,11 +1216,19 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, unsigned int key_count); #ifdef CONFIG_NET +int skb_flow_dissector_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr); int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr); #else +static inline int skb_flow_dissector_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + return -EOPNOTSUPP; +} + static inline int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) { diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index 49ba9cde7e4bb5e1b6b66f4f94912a65df971906..b29950a19205e7784dab42924bf3465cb636d76c 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h @@ -45,6 +45,7 @@ struct bpf_raw_event_map { struct tracepoint *tp; void *bpf_func; u32 num_args; + u32 writable_size; } __aligned(32); #endif diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index 505dae0bed80bd83373811c60ce4ac60d54ee8af..d6e556c0a0852df0c357361dfbb642bdbf3e58bf 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h @@ -69,8 +69,7 @@ __bpf_trace_##call(void *__data, proto) \ * to make sure that if the tracepoint handling changes, the * bpf probe will fail to compile unless it too is updated. */ -#undef DEFINE_EVENT -#define DEFINE_EVENT(template, call, proto, args) \ +#define __DEFINE_EVENT(template, call, proto, args, size) \ static inline void bpf_test_probe_##call(void) \ { \ check_trace_callback_type_##call(__bpf_trace_##template); \ @@ -81,12 +80,36 @@ __bpf_trace_tp_map_##call = { \ .tp = &__tracepoint_##call, \ .bpf_func = (void *)__bpf_trace_##template, \ .num_args = COUNT_ARGS(args), \ + .writable_size = size, \ }; +#define FIRST(x, ...) x + +#undef DEFINE_EVENT_WRITABLE +#define DEFINE_EVENT_WRITABLE(template, call, proto, args, size) \ +static inline void bpf_test_buffer_##call(void) \ +{ \ + /* BUILD_BUG_ON() is ignored if the code is completely eliminated, but \ + * BUILD_BUG_ON_ZERO() uses a different mechanism that is not \ + * dead-code-eliminated. \ + */ \ + FIRST(proto); \ + (void)BUILD_BUG_ON_ZERO(size != sizeof(*FIRST(args))); \ +} \ +__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size) + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ + __DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), 0) #undef DEFINE_EVENT_PRINT #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef DEFINE_EVENT_WRITABLE +#undef __DEFINE_EVENT +#undef FIRST + #endif /* CONFIG_BPF_EVENTS */ diff --git a/include/trace/events/bpf_test_run.h b/include/trace/events/bpf_test_run.h new file mode 100644 index 0000000000000000000000000000000000000000..265447e3f71abeea68de239f722c6428fe5b8289 --- /dev/null +++ b/include/trace/events/bpf_test_run.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM bpf_test_run + +#if !defined(_TRACE_BPF_TEST_RUN_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_BPF_TEST_RUN_H + +#include + +DECLARE_EVENT_CLASS(bpf_test_finish, + + TP_PROTO(int *err), + + TP_ARGS(err), + + TP_STRUCT__entry( + __field(int, err) + ), + + TP_fast_assign( + __entry->err = *err; + ), + + TP_printk("bpf_test_finish with err=%d", __entry->err) +); + +#ifdef DEFINE_EVENT_WRITABLE +#undef BPF_TEST_RUN_DEFINE_EVENT +#define BPF_TEST_RUN_DEFINE_EVENT(template, call, proto, args, size) \ + DEFINE_EVENT_WRITABLE(template, call, PARAMS(proto), \ + PARAMS(args), size) +#else +#undef BPF_TEST_RUN_DEFINE_EVENT +#define BPF_TEST_RUN_DEFINE_EVENT(template, call, proto, args, size) \ + DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args)) +#endif + +BPF_TEST_RUN_DEFINE_EVENT(bpf_test_finish, bpf_test_finish, + + TP_PROTO(int *err), + + TP_ARGS(err), + + sizeof(int) +); + +#endif + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/nbd.h b/include/trace/events/nbd.h new file mode 100644 index 0000000000000000000000000000000000000000..9849956f34d85fbb264cf64e99131c4885d7b99c --- /dev/null +++ b/include/trace/events/nbd.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM nbd + +#if !defined(_TRACE_NBD_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NBD_H + +#include + +DECLARE_EVENT_CLASS(nbd_transport_event, + + TP_PROTO(struct request *req, u64 handle), + + TP_ARGS(req, handle), + + TP_STRUCT__entry( + __field(struct request *, req) + __field(u64, handle) + ), + + TP_fast_assign( + __entry->req = req; + __entry->handle = handle; + ), + + TP_printk( + "nbd transport event: request %p, handle 0x%016llx", + __entry->req, + __entry->handle + ) +); + +DEFINE_EVENT(nbd_transport_event, nbd_header_sent, + + TP_PROTO(struct request *req, u64 handle), + + TP_ARGS(req, handle) +); + +DEFINE_EVENT(nbd_transport_event, nbd_payload_sent, + + TP_PROTO(struct request *req, u64 handle), + + TP_ARGS(req, handle) +); + +DEFINE_EVENT(nbd_transport_event, nbd_header_received, + + TP_PROTO(struct request *req, u64 handle), + + TP_ARGS(req, handle) +); + +DEFINE_EVENT(nbd_transport_event, nbd_payload_received, + + TP_PROTO(struct request *req, u64 handle), + + TP_ARGS(req, handle) +); + +DECLARE_EVENT_CLASS(nbd_send_request, + + TP_PROTO(struct nbd_request *nbd_request, int index, + struct request *rq), + + TP_ARGS(nbd_request, index, rq), + + TP_STRUCT__entry( + __field(struct nbd_request *, nbd_request) + __field(u64, dev_index) + __field(struct request *, request) + ), + + TP_fast_assign( + __entry->nbd_request = 0; + __entry->dev_index = index; + __entry->request = rq; + ), + + TP_printk("nbd%lld: request %p", __entry->dev_index, __entry->request) +); + +#ifdef DEFINE_EVENT_WRITABLE +#undef NBD_DEFINE_EVENT +#define NBD_DEFINE_EVENT(template, call, proto, args, size) \ + DEFINE_EVENT_WRITABLE(template, call, PARAMS(proto), \ + PARAMS(args), size) +#else +#undef NBD_DEFINE_EVENT +#define NBD_DEFINE_EVENT(template, call, proto, args, size) \ + DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args)) +#endif + +NBD_DEFINE_EVENT(nbd_send_request, nbd_send_request, + + TP_PROTO(struct nbd_request *nbd_request, int index, + struct request *rq), + + TP_ARGS(nbd_request, index, rq), + + sizeof(struct nbd_request) +); + +#endif + +/* This part must be outside protection */ +#include diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a8d87ab34ef024d27340ee0dfb419267a0f16416..3d62b8beefef9549629509016c2adc9bea950f5e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -169,6 +169,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_REUSEPORT, BPF_PROG_TYPE_FLOW_DISSECTOR, BPF_PROG_TYPE_CGROUP_SYSCTL, + BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, }; enum bpf_attach_type { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index beeddb1abf55fccea8bb6c45484a161825c4cf0d..cd7c9da22a56971bf2423e4169a96bc0c481095e 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1803,12 +1803,16 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) return -ENOMEM; raw_tp->btp = btp; - prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd, - BPF_PROG_TYPE_RAW_TRACEPOINT); + prog = bpf_prog_get(attr->raw_tracepoint.prog_fd); if (IS_ERR(prog)) { err = PTR_ERR(prog); goto out_free_tp; } + if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT && + prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) { + err = -EINVAL; + goto out_put_prog; + } err = bpf_probe_register(raw_tp->btp, prog); if (err) @@ -2027,6 +2031,8 @@ static int bpf_prog_query(const union bpf_attr *attr, break; case BPF_LIRC_MODE2: return lirc_prog_query(attr, uattr); + case BPF_FLOW_DISSECTOR: + return skb_flow_dissector_prog_query(attr, uattr); default: return -EINVAL; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3efc91b1376a4e905dff0362465ab96c82c4bc24..bff1e01282e76ed4946e797de5322037992da0ea 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -176,7 +176,7 @@ struct bpf_verifier_stack_elem { struct bpf_verifier_stack_elem *next; }; -#define BPF_COMPLEXITY_LIMIT_STACK 1024 +#define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192 #define BPF_COMPLEXITY_LIMIT_STATES 64 #define BPF_MAP_PTR_UNPRIV 1UL @@ -404,6 +404,7 @@ static const char * const reg_type_str[] = { [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null", [PTR_TO_TCP_SOCK] = "tcp_sock", [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", + [PTR_TO_TP_BUFFER] = "tp_buffer", }; static char slot_type_char[] = { @@ -780,8 +781,9 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, if (err) goto err; elem->st.speculative |= speculative; - if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) { - verbose(env, "BPF program is too complex\n"); + if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) { + verbose(env, "The sequence of %d jumps is too complex.\n", + env->stack_size); goto err; } return &elem->st; @@ -1987,6 +1989,32 @@ static int check_ctx_reg(struct bpf_verifier_env *env, return 0; } +static int check_tp_buffer_access(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, + int regno, int off, int size) +{ + if (off < 0) { + verbose(env, + "R%d invalid tracepoint buffer access: off=%d, size=%d", + regno, off, size); + return -EACCES; + } + if (!tnum_is_const(reg->var_off) || reg->var_off.value) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose(env, + "R%d invalid variable buffer offset: off=%d, var_off=%s", + regno, off, tn_buf); + return -EACCES; + } + if (off + size > env->prog->aux->max_tp_access) + env->prog->aux->max_tp_access = off + size; + + return 0; +} + + /* truncate register to smaller size (in bytes) * must be called with size < BPF_REG_SIZE */ @@ -2131,6 +2159,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn err = check_sock_access(env, insn_idx, regno, off, size, t); if (!err && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); + } else if (reg->type == PTR_TO_TP_BUFFER) { + err = check_tp_buffer_access(env, reg, regno, off, size); + if (!err && t == BPF_READ && value_regno >= 0) + mark_reg_unknown(env, regs, value_regno); } else { verbose(env, "R%d invalid mem access '%s'\n", regno, reg_type_str[reg->type]); @@ -4465,15 +4497,35 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) return 0; } +static void __find_good_pkt_pointers(struct bpf_func_state *state, + struct bpf_reg_state *dst_reg, + enum bpf_reg_type type, u16 new_range) +{ + struct bpf_reg_state *reg; + int i; + + for (i = 0; i < MAX_BPF_REG; i++) { + reg = &state->regs[i]; + if (reg->type == type && reg->id == dst_reg->id) + /* keep the maximum range already checked */ + reg->range = max(reg->range, new_range); + } + + bpf_for_each_spilled_reg(i, state, reg) { + if (!reg) + continue; + if (reg->type == type && reg->id == dst_reg->id) + reg->range = max(reg->range, new_range); + } +} + static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, struct bpf_reg_state *dst_reg, enum bpf_reg_type type, bool range_right_open) { - struct bpf_func_state *state = vstate->frame[vstate->curframe]; - struct bpf_reg_state *regs = state->regs, *reg; u16 new_range; - int i, j; + int i; if (dst_reg->off < 0 || (dst_reg->off == 0 && range_right_open)) @@ -4538,20 +4590,9 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, * the range won't allow anything. * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16. */ - for (i = 0; i < MAX_BPF_REG; i++) - if (regs[i].type == type && regs[i].id == dst_reg->id) - /* keep the maximum range already checked */ - regs[i].range = max(regs[i].range, new_range); - - for (j = 0; j <= vstate->curframe; j++) { - state = vstate->frame[j]; - bpf_for_each_spilled_reg(i, state, reg) { - if (!reg) - continue; - if (reg->type == type && reg->id == dst_reg->id) - reg->range = max(reg->range, new_range); - } - } + for (i = 0; i <= vstate->curframe; i++) + __find_good_pkt_pointers(vstate->frame[i], dst_reg, type, + new_range); } /* compute branch direction of the expression "if (reg opcode val) goto target;" @@ -5025,6 +5066,22 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, } } +static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id, + bool is_null) +{ + struct bpf_reg_state *reg; + int i; + + for (i = 0; i < MAX_BPF_REG; i++) + mark_ptr_or_null_reg(state, &state->regs[i], id, is_null); + + bpf_for_each_spilled_reg(i, state, reg) { + if (!reg) + continue; + mark_ptr_or_null_reg(state, reg, id, is_null); + } +} + /* The logic is similar to find_good_pkt_pointers(), both could eventually * be folded together at some point. */ @@ -5032,10 +5089,10 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, bool is_null) { struct bpf_func_state *state = vstate->frame[vstate->curframe]; - struct bpf_reg_state *reg, *regs = state->regs; + struct bpf_reg_state *regs = state->regs; u32 ref_obj_id = regs[regno].ref_obj_id; u32 id = regs[regno].id; - int i, j; + int i; if (ref_obj_id && ref_obj_id == id && is_null) /* regs[regno] is in the " == NULL" branch. @@ -5044,17 +5101,8 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, */ WARN_ON_ONCE(release_reference_state(state, id)); - for (i = 0; i < MAX_BPF_REG; i++) - mark_ptr_or_null_reg(state, ®s[i], id, is_null); - - for (j = 0; j <= vstate->curframe; j++) { - state = vstate->frame[j]; - bpf_for_each_spilled_reg(i, state, reg) { - if (!reg) - continue; - mark_ptr_or_null_reg(state, reg, id, is_null); - } - } + for (i = 0; i <= vstate->curframe; i++) + __mark_ptr_or_null_regs(vstate->frame[i], id, is_null); } static bool try_match_pkt_pointers(const struct bpf_insn *insn, @@ -5556,7 +5604,25 @@ enum { BRANCH = 2, }; -#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L) +static u32 state_htab_size(struct bpf_verifier_env *env) +{ + return env->prog->len; +} + +static struct bpf_verifier_state_list **explored_state( + struct bpf_verifier_env *env, + int idx) +{ + struct bpf_verifier_state *cur = env->cur_state; + struct bpf_func_state *state = cur->frame[cur->curframe]; + + return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)]; +} + +static void init_explored_state(struct bpf_verifier_env *env, int idx) +{ + env->insn_aux_data[idx].prune_point = true; +} /* t, w, e - match pseudo-code above: * t - index of current instruction @@ -5582,7 +5648,7 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) if (e == BRANCH) /* mark branch target for state pruning */ - env->explored_states[w] = STATE_LIST_MARK; + init_explored_state(env, w); if (insn_state[w] == 0) { /* tree-edge */ @@ -5650,9 +5716,9 @@ static int check_cfg(struct bpf_verifier_env *env) else if (ret < 0) goto err_free; if (t + 1 < insn_cnt) - env->explored_states[t + 1] = STATE_LIST_MARK; + init_explored_state(env, t + 1); if (insns[t].src_reg == BPF_PSEUDO_CALL) { - env->explored_states[t] = STATE_LIST_MARK; + init_explored_state(env, t); ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env); if (ret == 1) goto peek_stack; @@ -5675,10 +5741,10 @@ static int check_cfg(struct bpf_verifier_env *env) * after every call and jump */ if (t + 1 < insn_cnt) - env->explored_states[t + 1] = STATE_LIST_MARK; + init_explored_state(env, t + 1); } else { /* conditional jump with two edges */ - env->explored_states[t] = STATE_LIST_MARK; + init_explored_state(env, t); ret = push_insn(t, t + 1, FALLTHROUGH, env); if (ret == 1) goto peek_stack; @@ -6126,12 +6192,10 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn, struct bpf_verifier_state_list *sl; int i; - sl = env->explored_states[insn]; - if (!sl) - return; - - while (sl != STATE_LIST_MARK) { - if (sl->state.curframe != cur->curframe) + sl = *explored_state(env, insn); + while (sl) { + if (sl->state.insn_idx != insn || + sl->state.curframe != cur->curframe) goto next; for (i = 0; i <= cur->curframe; i++) if (sl->state.frame[i]->callsite != cur->frame[i]->callsite) @@ -6485,18 +6549,21 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) struct bpf_verifier_state *cur = env->cur_state, *new; int i, j, err, states_cnt = 0; - pprev = &env->explored_states[insn_idx]; - sl = *pprev; - - if (!sl) + if (!env->insn_aux_data[insn_idx].prune_point) /* this 'insn_idx' instruction wasn't marked, so we will not * be doing state search here */ return 0; + pprev = explored_state(env, insn_idx); + sl = *pprev; + clean_live_states(env, insn_idx, cur); - while (sl != STATE_LIST_MARK) { + while (sl) { + states_cnt++; + if (sl->state.insn_idx != insn_idx) + goto next; if (states_equal(env, &sl->state, cur)) { sl->hit_cnt++; /* reached equivalent register/stack state, @@ -6514,7 +6581,6 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) return err; return 1; } - states_cnt++; sl->miss_cnt++; /* heuristic to determine whether this state is beneficial * to keep checking from state equivalence point of view. @@ -6541,6 +6607,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) sl = *pprev; continue; } +next: pprev = &sl->next; sl = *pprev; } @@ -6572,8 +6639,9 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) kfree(new_sl); return err; } - new_sl->next = env->explored_states[insn_idx]; - env->explored_states[insn_idx] = new_sl; + new->insn_idx = insn_idx; + new_sl->next = *explored_state(env, insn_idx); + *explored_state(env, insn_idx) = new_sl; /* connect new state to parentage chain. Current frame needs all * registers connected. Only r6 - r9 of the callers are alive (pushed * to the stack implicitly by JITs) so in callers' frames connect just @@ -8256,16 +8324,15 @@ static void free_states(struct bpf_verifier_env *env) if (!env->explored_states) return; - for (i = 0; i < env->prog->len; i++) { + for (i = 0; i < state_htab_size(env); i++) { sl = env->explored_states[i]; - if (sl) - while (sl != STATE_LIST_MARK) { - sln = sl->next; - free_verifier_state(&sl->state, false); - kfree(sl); - sl = sln; - } + while (sl) { + sln = sl->next; + free_verifier_state(&sl->state, false); + kfree(sl); + sl = sln; + } } kvfree(env->explored_states); @@ -8365,7 +8432,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, goto skip_full_check; } - env->explored_states = kvcalloc(env->prog->len, + env->explored_states = kvcalloc(state_htab_size(env), sizeof(struct bpf_verifier_state_list *), GFP_USER); ret = -ENOMEM; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 07bccf9fb697f937fed0c2fcff22ed321f0c4fcb..a29375b9d9a1f0562c5d6d610d2e65ca2418f9b6 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -14,6 +14,8 @@ #include #include +#include + #include "trace_probe.h" #include "trace.h" @@ -126,6 +128,10 @@ BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src, * access_ok() should prevent writing to non-user memory, but in * some situations (nommu, temporary switch, etc) access_ok() does * not provide enough validation, hence the check on KERNEL_DS. + * + * nmi_uaccess_okay() ensures the probe is not run in an interim + * state, when the task or mm are switched. This is specifically + * required to prevent the use of temporary mm. */ if (unlikely(in_interrupt() || @@ -133,6 +139,8 @@ BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src, return -EPERM; if (unlikely(uaccess_kernel())) return -EPERM; + if (unlikely(!nmi_uaccess_okay())) + return -EPERM; if (!access_ok(VERIFY_WRITE, unsafe_ptr, size)) return -EPERM; @@ -963,6 +971,27 @@ const struct bpf_verifier_ops raw_tracepoint_verifier_ops = { const struct bpf_prog_ops raw_tracepoint_prog_ops = { }; +static bool raw_tp_writable_prog_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + if (off == 0) { + if (size != sizeof(u64) || type != BPF_READ) + return false; + info->reg_type = PTR_TO_TP_BUFFER; + } + return raw_tp_prog_is_valid_access(off, size, type, prog, info); +} + +const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = { + .get_func_proto = raw_tp_prog_func_proto, + .is_valid_access = raw_tp_writable_prog_is_valid_access, +}; + +const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = { +}; + static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) @@ -1243,6 +1272,9 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog * if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64)) return -EINVAL; + if (prog->aux->max_tp_access > btp->writable_size) + return -EINVAL; + return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog); } diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 8c9331913753ee817413ac7b24e434c1fadb3efd..33e0dc168c16aa24fc7b9c42e17582b316b10472 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -14,6 +14,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include + static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *retval, u32 *time) { @@ -101,6 +104,7 @@ static int bpf_test_finish(const union bpf_attr *kattr, if (err != -ENOSPC) err = 0; out: + trace_bpf_test_finish(&err); return err; } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 4725fcf9e8ea43ca5c00742ab3793734f7b74564..09db923f398f77c25325a1ba1a394e09a3790267 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -65,6 +65,45 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, } EXPORT_SYMBOL(skb_flow_dissector_init); +int skb_flow_dissector_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); + u32 prog_id, prog_cnt = 0, flags = 0; + struct bpf_prog *attached; + struct net *net; + + if (attr->query.query_flags) + return -EINVAL; + + net = get_net_ns_by_fd(attr->query.target_fd); + if (IS_ERR(net)) + return PTR_ERR(net); + + rcu_read_lock(); + attached = rcu_dereference(net->flow_dissector_prog); + if (attached) { + prog_cnt = 1; + prog_id = attached->aux->id; + } + rcu_read_unlock(); + + put_net(net); + + if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) + return -EFAULT; + if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt))) + return -EFAULT; + + if (!attr->query.prog_cnt || !prog_ids || !prog_cnt) + return 0; + + if (copy_to_user(prog_ids, &prog_id, sizeof(u32))) + return -EFAULT; + + return 0; +} + int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) { diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst new file mode 100644 index 0000000000000000000000000000000000000000..2dbc1413fabd8caa116ffff4df999d3ec0c4dece --- /dev/null +++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst @@ -0,0 +1,222 @@ +================ +bpftool-btf +================ +------------------------------------------------------------------------------- +tool for inspection of BTF data +------------------------------------------------------------------------------- + +:Manual section: 8 + +SYNOPSIS +======== + + **bpftool** [*OPTIONS*] **btf** *COMMAND* + + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] } + + *COMMANDS* := { **dump** | **help** } + +BTF COMMANDS +============= + +| **bpftool** **btf dump** *BTF_SRC* +| **bpftool** **btf help** +| +| *BTF_SRC* := { **id** *BTF_ID* | **prog** *PROG* | **map** *MAP* [{**key** | **value** | **kv** | **all**}] | **file** *FILE* } +| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } +| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } + +DESCRIPTION +=========== + **bpftool btf dump** *BTF_SRC* + Dump BTF entries from a given *BTF_SRC*. + + When **id** is specified, BTF object with that ID will be + loaded and all its BTF types emitted. + + When **map** is provided, it's expected that map has + associated BTF object with BTF types describing key and + value. It's possible to select whether to dump only BTF + type(s) associated with key (**key**), value (**value**), + both key and value (**kv**), or all BTF types present in + associated BTF object (**all**). If not specified, **kv** + is assumed. + + When **prog** is provided, it's expected that program has + associated BTF object with BTF types. + + When specifying *FILE*, an ELF file is expected, containing + .BTF section with well-defined BTF binary format data, + typically produced by clang or pahole. + + **bpftool btf help** + Print short help message. + +OPTIONS +======= + -h, --help + Print short generic help message (similar to **bpftool help**). + + -V, --version + Print version number (similar to **bpftool version**). + + -j, --json + Generate JSON output. For commands that cannot produce JSON, this + option has no effect. + + -p, --pretty + Generate human-readable JSON output. Implies **-j**. + +EXAMPLES +======== +**# bpftool btf dump id 1226** +:: + + [1] PTR '(anon)' type_id=2 + [2] STRUCT 'dummy_tracepoint_args' size=16 vlen=2 + 'pad' type_id=3 bits_offset=0 + 'sock' type_id=4 bits_offset=64 + [3] INT 'long long unsigned int' size=8 bits_offset=0 nr_bits=64 encoding=(none) + [4] PTR '(anon)' type_id=5 + [5] FWD 'sock' fwd_kind=union + +This gives an example of default output for all supported BTF kinds. + +**$ cat prog.c** +:: + + struct fwd_struct; + + enum my_enum { + VAL1 = 3, + VAL2 = 7, + }; + + typedef struct my_struct my_struct_t; + + struct my_struct { + const unsigned int const_int_field; + int bitfield_field: 4; + char arr_field[16]; + const struct fwd_struct *restrict fwd_field; + enum my_enum enum_field; + volatile my_struct_t *typedef_ptr_field; + }; + + union my_union { + int a; + struct my_struct b; + }; + + struct my_struct struct_global_var __attribute__((section("data_sec"))) = { + .bitfield_field = 3, + .enum_field = VAL1, + }; + int global_var __attribute__((section("data_sec"))) = 7; + + __attribute__((noinline)) + int my_func(union my_union *arg1, int arg2) + { + static int static_var __attribute__((section("data_sec"))) = 123; + static_var++; + return static_var; + } + +**$ bpftool btf dump file prog.o** +:: + + [1] PTR '(anon)' type_id=2 + [2] UNION 'my_union' size=48 vlen=2 + 'a' type_id=3 bits_offset=0 + 'b' type_id=4 bits_offset=0 + [3] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED + [4] STRUCT 'my_struct' size=48 vlen=6 + 'const_int_field' type_id=5 bits_offset=0 + 'bitfield_field' type_id=3 bits_offset=32 bitfield_size=4 + 'arr_field' type_id=8 bits_offset=40 + 'fwd_field' type_id=10 bits_offset=192 + 'enum_field' type_id=14 bits_offset=256 + 'typedef_ptr_field' type_id=15 bits_offset=320 + [5] CONST '(anon)' type_id=6 + [6] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none) + [7] INT 'char' size=1 bits_offset=0 nr_bits=8 encoding=SIGNED + [8] ARRAY '(anon)' type_id=7 index_type_id=9 nr_elems=16 + [9] INT '__ARRAY_SIZE_TYPE__' size=4 bits_offset=0 nr_bits=32 encoding=(none) + [10] RESTRICT '(anon)' type_id=11 + [11] PTR '(anon)' type_id=12 + [12] CONST '(anon)' type_id=13 + [13] FWD 'fwd_struct' fwd_kind=union + [14] ENUM 'my_enum' size=4 vlen=2 + 'VAL1' val=3 + 'VAL2' val=7 + [15] PTR '(anon)' type_id=16 + [16] VOLATILE '(anon)' type_id=17 + [17] TYPEDEF 'my_struct_t' type_id=4 + [18] FUNC_PROTO '(anon)' ret_type_id=3 vlen=2 + 'arg1' type_id=1 + 'arg2' type_id=3 + [19] FUNC 'my_func' type_id=18 + [20] VAR 'struct_global_var' type_id=4, linkage=global-alloc + [21] VAR 'global_var' type_id=3, linkage=global-alloc + [22] VAR 'my_func.static_var' type_id=3, linkage=static + [23] DATASEC 'data_sec' size=0 vlen=3 + type_id=20 offset=0 size=48 + type_id=21 offset=0 size=4 + type_id=22 offset=52 size=4 + +The following commands print BTF types associated with specified map's key, +value, both key and value, and all BTF types, respectively. By default, both +key and value types will be printed. + +**# bpftool btf dump map id 123 key** + +:: + + [39] TYPEDEF 'u32' type_id=37 + +**# bpftool btf dump map id 123 value** + +:: + + [86] PTR '(anon)' type_id=87 + +**# bpftool btf dump map id 123 kv** + +:: + + [39] TYPEDEF 'u32' type_id=37 + [86] PTR '(anon)' type_id=87 + +**# bpftool btf dump map id 123 all** + +:: + + [1] PTR '(anon)' type_id=0 + . + . + . + [2866] ARRAY '(anon)' type_id=52 index_type_id=51 nr_elems=4 + +All the standard ways to specify map or program are supported: + +**# bpftool btf dump map id 123** + +**# bpftool btf dump map pinned /sys/fs/bpf/map_name** + +**# bpftool btf dump prog id 456** + +**# bpftool btf dump prog tag b88e0a09b1d9759d** + +**# bpftool btf dump prog pinned /sys/fs/bpf/prog_name** + +SEE ALSO +======== + **bpf**\ (2), + **bpf-helpers**\ (7), + **bpftool**\ (8), + **bpftool-map**\ (8), + **bpftool-prog**\ (8), + **bpftool-cgroup**\ (8), + **bpftool-feature**\ (8), + **bpftool-net**\ (8), + **bpftool-perf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index 89b6b10e21835f1a52b4708b2b88073b8a91bd2e..ac26876389c2383297185ddc1d3f3f1ac0067d8b 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -145,4 +145,5 @@ SEE ALSO **bpftool-map**\ (8), **bpftool-feature**\ (8), **bpftool-net**\ (8), - **bpftool-perf**\ (8) + **bpftool-perf**\ (8), + **bpftool-btf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst index 10177343b7ce2793a6deee062ffecb57e3eab355..14180e8870824304c54abb9cd67c82919f158e90 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst @@ -82,4 +82,5 @@ SEE ALSO **bpftool-map**\ (8), **bpftool-cgroup**\ (8), **bpftool-net**\ (8), - **bpftool-perf**\ (8) + **bpftool-perf**\ (8), + **bpftool-btf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 55ecf80ca03e9c81892da961f0caed1c0d918aca..13ef27b39f200ff4239741201deb95d37067464c 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -258,4 +258,5 @@ SEE ALSO **bpftool-cgroup**\ (8), **bpftool-feature**\ (8), **bpftool-net**\ (8), - **bpftool-perf**\ (8) + **bpftool-perf**\ (8), + **bpftool-btf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst index 6b692b4283734e14ad0892ed232a5361e4056f32..934580850f42b0f74a91b1323020db187652df9c 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-net.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst @@ -143,4 +143,5 @@ SEE ALSO **bpftool-map**\ (8), **bpftool-cgroup**\ (8), **bpftool-feature**\ (8), - **bpftool-perf**\ (8) + **bpftool-perf**\ (8), + **bpftool-btf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst index 86154740fabb1075edc04141e581f70c8be093d2..0c7576523a217150c4239b7e903ba82fe53baaab 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst @@ -85,4 +85,5 @@ SEE ALSO **bpftool-map**\ (8), **bpftool-cgroup**\ (8), **bpftool-feature**\ (8), - **bpftool-net**\ (8) + **bpftool-net**\ (8), + **bpftool-btf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 2f183ffd8351ed412209642ed65db158de5be840..e8118544d118282b63bfc27e515ea53ae9903d96 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -271,4 +271,5 @@ SEE ALSO **bpftool-cgroup**\ (8), **bpftool-feature**\ (8), **bpftool-net**\ (8), - **bpftool-perf**\ (8) + **bpftool-perf**\ (8), + **bpftool-btf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index deb2ca911ddfae3b383f0cd42f6948b15216d238..3e562d7fd56f942c657fdb7015de606190ea5a26 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -76,4 +76,5 @@ SEE ALSO **bpftool-cgroup**\ (8), **bpftool-feature**\ (8), **bpftool-net**\ (8), - **bpftool-perf**\ (8) + **bpftool-perf**\ (8), + **bpftool-btf**\ (8) diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 9f3ffe1e26ab6480ac74f62ca2cd748fa328f5d4..50e402a5a9c86276f2b37526f43c0ede5c1ba8a6 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -217,6 +217,7 @@ _bpftool() done cur=${words[cword]} prev=${words[cword - 1]} + pprev=${words[cword - 2]} local object=${words[1]} command=${words[2]} @@ -272,17 +273,17 @@ _bpftool() "$cur" ) ) return 0 ;; - *) - _bpftool_once_attr 'file' - if _bpftool_search_list 'xlated'; then - COMPREPLY+=( $( compgen -W 'opcodes visual linum' -- \ - "$cur" ) ) - else - COMPREPLY+=( $( compgen -W 'opcodes linum' -- \ - "$cur" ) ) - fi - return 0 - ;; + *) + _bpftool_once_attr 'file' + if _bpftool_search_list 'xlated'; then + COMPREPLY+=( $( compgen -W 'opcodes visual linum' -- \ + "$cur" ) ) + else + COMPREPLY+=( $( compgen -W 'opcodes linum' -- \ + "$cur" ) ) + fi + return 0 + ;; esac ;; pin) @@ -607,6 +608,51 @@ _bpftool() ;; esac ;; + btf) + local PROG_TYPE='id pinned tag' + local MAP_TYPE='id pinned' + case $command in + dump) + case $prev in + $command) + COMPREPLY+=( $( compgen -W "id map prog file" -- \ + "$cur" ) ) + return 0 + ;; + prog) + COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) ) + return 0 + ;; + map) + COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) ) + return 0 + ;; + id) + case $pprev in + prog) + _bpftool_get_prog_ids + ;; + map) + _bpftool_get_map_ids + ;; + esac + return 0 + ;; + *) + if [[ $cword == 6 ]] && [[ ${words[3]} == "map" ]]; then + COMPREPLY+=( $( compgen -W 'key value kv all' -- \ + "$cur" ) ) + fi + return 0 + ;; + esac + ;; + *) + [[ $prev == $object ]] && \ + COMPREPLY=( $( compgen -W 'dump help' -- "$cur" ) ) + ;; + esac + ;; cgroup) case $command in show|list) diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c new file mode 100644 index 0000000000000000000000000000000000000000..5c67bbcdf914db30dc9f858ae6556f4cb2ebeac3 --- /dev/null +++ b/tools/bpf/bpftool/btf.c @@ -0,0 +1,586 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2019 Facebook */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "btf.h" +#include "json_writer.h" +#include "main.h" + +static const char * const btf_kind_str[NR_BTF_KINDS] = { + [BTF_KIND_UNKN] = "UNKNOWN", + [BTF_KIND_INT] = "INT", + [BTF_KIND_PTR] = "PTR", + [BTF_KIND_ARRAY] = "ARRAY", + [BTF_KIND_STRUCT] = "STRUCT", + [BTF_KIND_UNION] = "UNION", + [BTF_KIND_ENUM] = "ENUM", + [BTF_KIND_FWD] = "FWD", + [BTF_KIND_TYPEDEF] = "TYPEDEF", + [BTF_KIND_VOLATILE] = "VOLATILE", + [BTF_KIND_CONST] = "CONST", + [BTF_KIND_RESTRICT] = "RESTRICT", + [BTF_KIND_FUNC] = "FUNC", + [BTF_KIND_FUNC_PROTO] = "FUNC_PROTO", + [BTF_KIND_VAR] = "VAR", + [BTF_KIND_DATASEC] = "DATASEC", +}; + +static const char *btf_int_enc_str(__u8 encoding) +{ + switch (encoding) { + case 0: + return "(none)"; + case BTF_INT_SIGNED: + return "SIGNED"; + case BTF_INT_CHAR: + return "CHAR"; + case BTF_INT_BOOL: + return "BOOL"; + default: + return "UNKN"; + } +} + +static const char *btf_var_linkage_str(__u32 linkage) +{ + switch (linkage) { + case BTF_VAR_STATIC: + return "static"; + case BTF_VAR_GLOBAL_ALLOCATED: + return "global-alloc"; + default: + return "(unknown)"; + } +} + +static const char *btf_str(const struct btf *btf, __u32 off) +{ + if (!off) + return "(anon)"; + return btf__name_by_offset(btf, off) ? : "(invalid)"; +} + +static int dump_btf_type(const struct btf *btf, __u32 id, + const struct btf_type *t) +{ + json_writer_t *w = json_wtr; + int kind, safe_kind; + + kind = BTF_INFO_KIND(t->info); + safe_kind = kind <= BTF_KIND_MAX ? kind : BTF_KIND_UNKN; + + if (json_output) { + jsonw_start_object(w); + jsonw_uint_field(w, "id", id); + jsonw_string_field(w, "kind", btf_kind_str[safe_kind]); + jsonw_string_field(w, "name", btf_str(btf, t->name_off)); + } else { + printf("[%u] %s '%s'", id, btf_kind_str[safe_kind], + btf_str(btf, t->name_off)); + } + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_INT: { + __u32 v = *(__u32 *)(t + 1); + const char *enc; + + enc = btf_int_enc_str(BTF_INT_ENCODING(v)); + + if (json_output) { + jsonw_uint_field(w, "size", t->size); + jsonw_uint_field(w, "bits_offset", BTF_INT_OFFSET(v)); + jsonw_uint_field(w, "nr_bits", BTF_INT_BITS(v)); + jsonw_string_field(w, "encoding", enc); + } else { + printf(" size=%u bits_offset=%u nr_bits=%u encoding=%s", + t->size, BTF_INT_OFFSET(v), BTF_INT_BITS(v), + enc); + } + break; + } + case BTF_KIND_PTR: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_TYPEDEF: + if (json_output) + jsonw_uint_field(w, "type_id", t->type); + else + printf(" type_id=%u", t->type); + break; + case BTF_KIND_ARRAY: { + const struct btf_array *arr = (const void *)(t + 1); + + if (json_output) { + jsonw_uint_field(w, "type_id", arr->type); + jsonw_uint_field(w, "index_type_id", arr->index_type); + jsonw_uint_field(w, "nr_elems", arr->nelems); + } else { + printf(" type_id=%u index_type_id=%u nr_elems=%u", + arr->type, arr->index_type, arr->nelems); + } + break; + } + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + const struct btf_member *m = (const void *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + int i; + + if (json_output) { + jsonw_uint_field(w, "size", t->size); + jsonw_uint_field(w, "vlen", vlen); + jsonw_name(w, "members"); + jsonw_start_array(w); + } else { + printf(" size=%u vlen=%u", t->size, vlen); + } + for (i = 0; i < vlen; i++, m++) { + const char *name = btf_str(btf, m->name_off); + __u32 bit_off, bit_sz; + + if (BTF_INFO_KFLAG(t->info)) { + bit_off = BTF_MEMBER_BIT_OFFSET(m->offset); + bit_sz = BTF_MEMBER_BITFIELD_SIZE(m->offset); + } else { + bit_off = m->offset; + bit_sz = 0; + } + + if (json_output) { + jsonw_start_object(w); + jsonw_string_field(w, "name", name); + jsonw_uint_field(w, "type_id", m->type); + jsonw_uint_field(w, "bits_offset", bit_off); + if (bit_sz) { + jsonw_uint_field(w, "bitfield_size", + bit_sz); + } + jsonw_end_object(w); + } else { + printf("\n\t'%s' type_id=%u bits_offset=%u", + name, m->type, bit_off); + if (bit_sz) + printf(" bitfield_size=%u", bit_sz); + } + } + if (json_output) + jsonw_end_array(w); + break; + } + case BTF_KIND_ENUM: { + const struct btf_enum *v = (const void *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + int i; + + if (json_output) { + jsonw_uint_field(w, "size", t->size); + jsonw_uint_field(w, "vlen", vlen); + jsonw_name(w, "values"); + jsonw_start_array(w); + } else { + printf(" size=%u vlen=%u", t->size, vlen); + } + for (i = 0; i < vlen; i++, v++) { + const char *name = btf_str(btf, v->name_off); + + if (json_output) { + jsonw_start_object(w); + jsonw_string_field(w, "name", name); + jsonw_uint_field(w, "val", v->val); + jsonw_end_object(w); + } else { + printf("\n\t'%s' val=%u", name, v->val); + } + } + if (json_output) + jsonw_end_array(w); + break; + } + case BTF_KIND_FWD: { + const char *fwd_kind = BTF_INFO_KFLAG(t->info) ? "union" + : "struct"; + + if (json_output) + jsonw_string_field(w, "fwd_kind", fwd_kind); + else + printf(" fwd_kind=%s", fwd_kind); + break; + } + case BTF_KIND_FUNC: + if (json_output) + jsonw_uint_field(w, "type_id", t->type); + else + printf(" type_id=%u", t->type); + break; + case BTF_KIND_FUNC_PROTO: { + const struct btf_param *p = (const void *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + int i; + + if (json_output) { + jsonw_uint_field(w, "ret_type_id", t->type); + jsonw_uint_field(w, "vlen", vlen); + jsonw_name(w, "params"); + jsonw_start_array(w); + } else { + printf(" ret_type_id=%u vlen=%u", t->type, vlen); + } + for (i = 0; i < vlen; i++, p++) { + const char *name = btf_str(btf, p->name_off); + + if (json_output) { + jsonw_start_object(w); + jsonw_string_field(w, "name", name); + jsonw_uint_field(w, "type_id", p->type); + jsonw_end_object(w); + } else { + printf("\n\t'%s' type_id=%u", name, p->type); + } + } + if (json_output) + jsonw_end_array(w); + break; + } + case BTF_KIND_VAR: { + const struct btf_var *v = (const void *)(t + 1); + const char *linkage; + + linkage = btf_var_linkage_str(v->linkage); + + if (json_output) { + jsonw_uint_field(w, "type_id", t->type); + jsonw_string_field(w, "linkage", linkage); + } else { + printf(" type_id=%u, linkage=%s", t->type, linkage); + } + break; + } + case BTF_KIND_DATASEC: { + const struct btf_var_secinfo *v = (const void *)(t+1); + __u16 vlen = BTF_INFO_VLEN(t->info); + int i; + + if (json_output) { + jsonw_uint_field(w, "size", t->size); + jsonw_uint_field(w, "vlen", vlen); + jsonw_name(w, "vars"); + jsonw_start_array(w); + } else { + printf(" size=%u vlen=%u", t->size, vlen); + } + for (i = 0; i < vlen; i++, v++) { + if (json_output) { + jsonw_start_object(w); + jsonw_uint_field(w, "type_id", v->type); + jsonw_uint_field(w, "offset", v->offset); + jsonw_uint_field(w, "size", v->size); + jsonw_end_object(w); + } else { + printf("\n\ttype_id=%u offset=%u size=%u", + v->type, v->offset, v->size); + } + } + if (json_output) + jsonw_end_array(w); + break; + } + default: + break; + } + + if (json_output) + jsonw_end_object(json_wtr); + else + printf("\n"); + + return 0; +} + +static int dump_btf_raw(const struct btf *btf, + __u32 *root_type_ids, int root_type_cnt) +{ + const struct btf_type *t; + int i; + + if (json_output) { + jsonw_start_object(json_wtr); + jsonw_name(json_wtr, "types"); + jsonw_start_array(json_wtr); + } + + if (root_type_cnt) { + for (i = 0; i < root_type_cnt; i++) { + t = btf__type_by_id(btf, root_type_ids[i]); + dump_btf_type(btf, root_type_ids[i], t); + } + } else { + int cnt = btf__get_nr_types(btf); + + for (i = 1; i <= cnt; i++) { + t = btf__type_by_id(btf, i); + dump_btf_type(btf, i, t); + } + } + + if (json_output) { + jsonw_end_array(json_wtr); + jsonw_end_object(json_wtr); + } + return 0; +} + +static bool check_btf_endianness(GElf_Ehdr *ehdr) +{ + static unsigned int const endian = 1; + + switch (ehdr->e_ident[EI_DATA]) { + case ELFDATA2LSB: + return *(unsigned char const *)&endian == 1; + case ELFDATA2MSB: + return *(unsigned char const *)&endian == 0; + default: + return 0; + } +} + +static int btf_load_from_elf(const char *path, struct btf **btf) +{ + int err = -1, fd = -1, idx = 0; + Elf_Data *btf_data = NULL; + Elf_Scn *scn = NULL; + Elf *elf = NULL; + GElf_Ehdr ehdr; + + if (elf_version(EV_CURRENT) == EV_NONE) { + p_err("failed to init libelf for %s", path); + return -1; + } + + fd = open(path, O_RDONLY); + if (fd < 0) { + p_err("failed to open %s: %s", path, strerror(errno)); + return -1; + } + + elf = elf_begin(fd, ELF_C_READ, NULL); + if (!elf) { + p_err("failed to open %s as ELF file", path); + goto done; + } + if (!gelf_getehdr(elf, &ehdr)) { + p_err("failed to get EHDR from %s", path); + goto done; + } + if (!check_btf_endianness(&ehdr)) { + p_err("non-native ELF endianness is not supported"); + goto done; + } + if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) { + p_err("failed to get e_shstrndx from %s\n", path); + goto done; + } + + while ((scn = elf_nextscn(elf, scn)) != NULL) { + GElf_Shdr sh; + char *name; + + idx++; + if (gelf_getshdr(scn, &sh) != &sh) { + p_err("failed to get section(%d) header from %s", + idx, path); + goto done; + } + name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name); + if (!name) { + p_err("failed to get section(%d) name from %s", + idx, path); + goto done; + } + if (strcmp(name, BTF_ELF_SEC) == 0) { + btf_data = elf_getdata(scn, 0); + if (!btf_data) { + p_err("failed to get section(%d, %s) data from %s", + idx, name, path); + goto done; + } + break; + } + } + + if (!btf_data) { + p_err("%s ELF section not found in %s", BTF_ELF_SEC, path); + goto done; + } + + *btf = btf__new(btf_data->d_buf, btf_data->d_size); + if (IS_ERR(*btf)) { + err = PTR_ERR(*btf); + *btf = NULL; + p_err("failed to load BTF data from %s: %s", + path, strerror(err)); + goto done; + } + + err = 0; +done: + if (err) { + if (*btf) { + btf__free(*btf); + *btf = NULL; + } + } + if (elf) + elf_end(elf); + close(fd); + return err; +} + +static int do_dump(int argc, char **argv) +{ + struct btf *btf = NULL; + __u32 root_type_ids[2]; + int root_type_cnt = 0; + __u32 btf_id = -1; + const char *src; + int fd = -1; + int err; + + if (!REQ_ARGS(2)) { + usage(); + return -1; + } + src = GET_ARG(); + + if (is_prefix(src, "map")) { + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + + if (!REQ_ARGS(2)) { + usage(); + return -1; + } + + fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + if (fd < 0) + return -1; + + btf_id = info.btf_id; + if (argc && is_prefix(*argv, "key")) { + root_type_ids[root_type_cnt++] = info.btf_key_type_id; + NEXT_ARG(); + } else if (argc && is_prefix(*argv, "value")) { + root_type_ids[root_type_cnt++] = info.btf_value_type_id; + NEXT_ARG(); + } else if (argc && is_prefix(*argv, "all")) { + NEXT_ARG(); + } else if (argc && is_prefix(*argv, "kv")) { + root_type_ids[root_type_cnt++] = info.btf_key_type_id; + root_type_ids[root_type_cnt++] = info.btf_value_type_id; + NEXT_ARG(); + } else { + root_type_ids[root_type_cnt++] = info.btf_key_type_id; + root_type_ids[root_type_cnt++] = info.btf_value_type_id; + } + } else if (is_prefix(src, "prog")) { + struct bpf_prog_info info = {}; + __u32 len = sizeof(info); + + if (!REQ_ARGS(2)) { + usage(); + return -1; + } + + fd = prog_parse_fd(&argc, &argv); + if (fd < 0) + return -1; + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + p_err("can't get prog info: %s", strerror(errno)); + goto done; + } + + btf_id = info.btf_id; + } else if (is_prefix(src, "id")) { + char *endptr; + + btf_id = strtoul(*argv, &endptr, 0); + if (*endptr) { + p_err("can't parse %s as ID", *argv); + return -1; + } + NEXT_ARG(); + } else if (is_prefix(src, "file")) { + err = btf_load_from_elf(*argv, &btf); + if (err) + goto done; + NEXT_ARG(); + } else { + err = -1; + p_err("unrecognized BTF source specifier: '%s'", src); + goto done; + } + + if (!btf) { + err = btf__get_from_id(btf_id, &btf); + if (err) { + p_err("get btf by id (%u): %s", btf_id, strerror(err)); + goto done; + } + if (!btf) { + err = -ENOENT; + p_err("can't find btf with ID (%u)", btf_id); + goto done; + } + } + + dump_btf_raw(btf, root_type_ids, root_type_cnt); + +done: + close(fd); + btf__free(btf); + return err; +} + +static int do_help(int argc, char **argv) +{ + if (json_output) { + jsonw_null(json_wtr); + return 0; + } + + fprintf(stderr, + "Usage: %s btf dump BTF_SRC\n" + " %s btf help\n" + "\n" + " BTF_SRC := { id BTF_ID | prog PROG | map MAP [{key | value | kv | all}] | file FILE }\n" + " " HELP_SPEC_MAP "\n" + " " HELP_SPEC_PROGRAM "\n" + " " HELP_SPEC_OPTIONS "\n" + "", + bin_name, bin_name); + + return 0; +} + +static const struct cmd cmds[] = { + { "help", do_help }, + { "dump", do_dump }, + { 0 } +}; + +int do_btf(int argc, char **argv) +{ + return cmd_select(cmds, argc, argv, do_help); +} diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index a3658eb73ffcc257b7cc68d71eb6ee7f96b4c9cf..c85809a5c2b5797cda9a4932ff333a2793003c29 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -56,7 +56,7 @@ static int do_help(int argc, char **argv) " %s batch file FILE\n" " %s version\n" "\n" - " OBJECT := { prog | map | cgroup | perf | net | feature }\n" + " OBJECT := { prog | map | cgroup | perf | net | feature | btf }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, bin_name, bin_name); @@ -188,6 +188,7 @@ static const struct cmd cmds[] = { { "perf", do_perf }, { "net", do_net }, { "feature", do_feature }, + { "btf", do_btf }, { "version", do_version }, { 0 } }; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 1ccc46169a1957627694469c035d0cf78a723c89..ce75b76d17b1b537422de33998e9e87c099a4ecd 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -74,6 +74,7 @@ static const char * const prog_type_name[] = { [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl", + [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable", }; extern const char * const map_type_name[]; @@ -150,6 +151,7 @@ int do_perf(int argc, char **arg); int do_net(int argc, char **arg); int do_tracelog(int argc, char **arg); int do_feature(int argc, char **argv); +int do_btf(int argc, char **argv); int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what); int prog_parse_fd(int *argc, char ***argv); diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index 10f9693c021303257b3f450f27467bd79ca576ad..b47cc2fff11e5ec01aac35c855516fdeae471cd5 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -3,6 +3,7 @@ #define _GNU_SOURCE #include +#include #include #include #include @@ -11,6 +12,8 @@ #include #include #include +#include +#include #include #include @@ -47,6 +50,10 @@ struct bpf_filter_t { int ifindex; }; +struct bpf_attach_info { + __u32 flow_dissector_id; +}; + static int dump_link_nlmsg(void *cookie, void *msg, struct nlattr **tb) { struct bpf_netdev_t *netinfo = cookie; @@ -179,8 +186,45 @@ static int show_dev_tc_bpf(int sock, unsigned int nl_pid, return 0; } +static int query_flow_dissector(struct bpf_attach_info *attach_info) +{ + __u32 attach_flags; + __u32 prog_ids[1]; + __u32 prog_cnt; + int err; + int fd; + + fd = open("/proc/self/ns/net", O_RDONLY); + if (fd < 0) { + p_err("can't open /proc/self/ns/net: %s", + strerror(errno)); + return -1; + } + prog_cnt = ARRAY_SIZE(prog_ids); + err = bpf_prog_query(fd, BPF_FLOW_DISSECTOR, 0, + &attach_flags, prog_ids, &prog_cnt); + close(fd); + if (err) { + if (errno == EINVAL) { + /* Older kernel's don't support querying + * flow dissector programs. + */ + errno = 0; + return 0; + } + p_err("can't query prog: %s", strerror(errno)); + return -1; + } + + if (prog_cnt == 1) + attach_info->flow_dissector_id = prog_ids[0]; + + return 0; +} + static int do_show(int argc, char **argv) { + struct bpf_attach_info attach_info = {}; int i, sock, ret, filter_idx = -1; struct bpf_netdev_t dev_array; unsigned int nl_pid; @@ -198,6 +242,10 @@ static int do_show(int argc, char **argv) usage(); } + ret = query_flow_dissector(&attach_info); + if (ret) + return -1; + sock = libbpf_netlink_open(&nl_pid); if (sock < 0) { fprintf(stderr, "failed to open netlink sock\n"); @@ -226,6 +274,12 @@ static int do_show(int argc, char **argv) } NET_END_ARRAY("\n"); } + + NET_START_ARRAY("flow_dissector", "%s:\n"); + if (attach_info.flow_dissector_id > 0) + NET_DUMP_UINT("id", "id %u", attach_info.flow_dissector_id); + NET_END_ARRAY("\n"); + NET_END_OBJECT; if (json_output) jsonw_end_array(json_wtr); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 140e2e3f6122be70b6ab0bd82df1ef0a3aa822fd..763dcff87f92989a8ca95e20d74c677303050349 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -169,6 +169,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_REUSEPORT, BPF_PROG_TYPE_FLOW_DISSECTOR, BPF_PROG_TYPE_CGROUP_SYSCTL, + BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, }; enum bpf_attach_type { @@ -1738,12 +1739,19 @@ union bpf_attr { * error if an eBPF program tries to set a callback that is not * supported in the current kernel. * - * The supported callback values that *argval* can combine are: + * *argval* is a flag array which can combine these flags: * * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out) * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission) * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change) * + * Therefore, this function can be used to clear a callback flag by + * setting the appropriate bit to zero. e.g. to disable the RTO + * callback: + * + * **bpf_sock_ops_cb_flags_set(bpf_sock,** + * **bpf_sock->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_RTO_CB_FLAG)** + * * Here are some examples of where one could call such eBPF * program: * diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 079e3f2b15607a7e7104548036ac4cd9ffd058c7..23879feef1e858b23638f4e24e7bb7fab7e9d914 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -204,6 +204,16 @@ check_abi: $(OUTPUT)libbpf.so "versioned symbols in $^ ($(VERSIONED_SYM_COUNT))." \ "Please make sure all LIBBPF_API symbols are" \ "versioned in $(VERSION_SCRIPT)." >&2; \ + readelf -s --wide $(OUTPUT)libbpf-in.o | \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$8}'| \ + sort -u > $(OUTPUT)libbpf_global_syms.tmp; \ + readelf -s --wide $(OUTPUT)libbpf.so | \ + grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \ + sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \ + diff -u $(OUTPUT)libbpf_global_syms.tmp \ + $(OUTPUT)libbpf_versioned_syms.tmp; \ + rm $(OUTPUT)libbpf_global_syms.tmp \ + $(OUTPUT)libbpf_versioned_syms.tmp; \ exit 1; \ fi diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 4428ebcac9889830a81903f56728276e58efa89f..e277d20c167132b2a6715214e18461bcfb4aec59 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2300,6 +2300,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type) case BPF_PROG_TYPE_UNSPEC: case BPF_PROG_TYPE_TRACEPOINT: case BPF_PROG_TYPE_RAW_TRACEPOINT: + case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: case BPF_PROG_TYPE_PERF_EVENT: case BPF_PROG_TYPE_CGROUP_SYSCTL: return false; diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 880c81228bbe7b952024ff43cd23111eaf7bdc02..6635a31a7a164e7d03cdd697b599774d9c360969 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -95,6 +95,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, case BPF_PROG_TYPE_CGROUP_DEVICE: case BPF_PROG_TYPE_SK_MSG: case BPF_PROG_TYPE_RAW_TRACEPOINT: + case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: case BPF_PROG_TYPE_LWT_SEG6LOCAL: case BPF_PROG_TYPE_LIRC_MODE2: case BPF_PROG_TYPE_SK_REUSEPORT: diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h new file mode 100644 index 0000000000000000000000000000000000000000..0cc5e4ee90bd7d7c5f40170537dd80266452a391 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/pyperf.h @@ -0,0 +1,268 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" + +#define FUNCTION_NAME_LEN 64 +#define FILE_NAME_LEN 128 +#define TASK_COMM_LEN 16 + +typedef struct { + int PyThreadState_frame; + int PyThreadState_thread; + int PyFrameObject_back; + int PyFrameObject_code; + int PyFrameObject_lineno; + int PyCodeObject_filename; + int PyCodeObject_name; + int String_data; + int String_size; +} OffsetConfig; + +typedef struct { + uintptr_t current_state_addr; + uintptr_t tls_key_addr; + OffsetConfig offsets; + bool use_tls; +} PidData; + +typedef struct { + uint32_t success; +} Stats; + +typedef struct { + char name[FUNCTION_NAME_LEN]; + char file[FILE_NAME_LEN]; +} Symbol; + +typedef struct { + uint32_t pid; + uint32_t tid; + char comm[TASK_COMM_LEN]; + int32_t kernel_stack_id; + int32_t user_stack_id; + bool thread_current; + bool pthread_match; + bool stack_complete; + int16_t stack_len; + int32_t stack[STACK_MAX_LEN]; + + int has_meta; + int metadata; + char dummy_safeguard; +} Event; + + +struct bpf_elf_map { + __u32 type; + __u32 size_key; + __u32 size_value; + __u32 max_elem; + __u32 flags; +}; + +typedef int pid_t; + +typedef struct { + void* f_back; // PyFrameObject.f_back, previous frame + void* f_code; // PyFrameObject.f_code, pointer to PyCodeObject + void* co_filename; // PyCodeObject.co_filename + void* co_name; // PyCodeObject.co_name +} FrameData; + +static inline __attribute__((__always_inline__)) void* +get_thread_state(void* tls_base, PidData* pidData) +{ + void* thread_state; + int key; + + bpf_probe_read(&key, sizeof(key), (void*)(long)pidData->tls_key_addr); + bpf_probe_read(&thread_state, sizeof(thread_state), + tls_base + 0x310 + key * 0x10 + 0x08); + return thread_state; +} + +static inline __attribute__((__always_inline__)) bool +get_frame_data(void* frame_ptr, PidData* pidData, FrameData* frame, Symbol* symbol) +{ + // read data from PyFrameObject + bpf_probe_read(&frame->f_back, + sizeof(frame->f_back), + frame_ptr + pidData->offsets.PyFrameObject_back); + bpf_probe_read(&frame->f_code, + sizeof(frame->f_code), + frame_ptr + pidData->offsets.PyFrameObject_code); + + // read data from PyCodeObject + if (!frame->f_code) + return false; + bpf_probe_read(&frame->co_filename, + sizeof(frame->co_filename), + frame->f_code + pidData->offsets.PyCodeObject_filename); + bpf_probe_read(&frame->co_name, + sizeof(frame->co_name), + frame->f_code + pidData->offsets.PyCodeObject_name); + // read actual names into symbol + if (frame->co_filename) + bpf_probe_read_str(&symbol->file, + sizeof(symbol->file), + frame->co_filename + pidData->offsets.String_data); + if (frame->co_name) + bpf_probe_read_str(&symbol->name, + sizeof(symbol->name), + frame->co_name + pidData->offsets.String_data); + return true; +} + +struct bpf_elf_map SEC("maps") pidmap = { + .type = BPF_MAP_TYPE_HASH, + .size_key = sizeof(int), + .size_value = sizeof(PidData), + .max_elem = 1, +}; + +struct bpf_elf_map SEC("maps") eventmap = { + .type = BPF_MAP_TYPE_HASH, + .size_key = sizeof(int), + .size_value = sizeof(Event), + .max_elem = 1, +}; + +struct bpf_elf_map SEC("maps") symbolmap = { + .type = BPF_MAP_TYPE_HASH, + .size_key = sizeof(Symbol), + .size_value = sizeof(int), + .max_elem = 1, +}; + +struct bpf_elf_map SEC("maps") statsmap = { + .type = BPF_MAP_TYPE_ARRAY, + .size_key = sizeof(Stats), + .size_value = sizeof(int), + .max_elem = 1, +}; + +struct bpf_elf_map SEC("maps") perfmap = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .size_key = sizeof(int), + .size_value = sizeof(int), + .max_elem = 32, +}; + +struct bpf_elf_map SEC("maps") stackmap = { + .type = BPF_MAP_TYPE_STACK_TRACE, + .size_key = sizeof(int), + .size_value = sizeof(long long) * 127, + .max_elem = 1000, +}; + +static inline __attribute__((__always_inline__)) int __on_event(struct pt_regs *ctx) +{ + uint64_t pid_tgid = bpf_get_current_pid_tgid(); + pid_t pid = (pid_t)(pid_tgid >> 32); + PidData* pidData = bpf_map_lookup_elem(&pidmap, &pid); + if (!pidData) + return 0; + + int zero = 0; + Event* event = bpf_map_lookup_elem(&eventmap, &zero); + if (!event) + return 0; + + event->pid = pid; + + event->tid = (pid_t)pid_tgid; + bpf_get_current_comm(&event->comm, sizeof(event->comm)); + + event->user_stack_id = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK); + event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0); + + void* thread_state_current = (void*)0; + bpf_probe_read(&thread_state_current, + sizeof(thread_state_current), + (void*)(long)pidData->current_state_addr); + + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + void* tls_base = (void*)task; + + void* thread_state = pidData->use_tls ? get_thread_state(tls_base, pidData) + : thread_state_current; + event->thread_current = thread_state == thread_state_current; + + if (pidData->use_tls) { + uint64_t pthread_created; + uint64_t pthread_self; + bpf_probe_read(&pthread_self, sizeof(pthread_self), tls_base + 0x10); + + bpf_probe_read(&pthread_created, + sizeof(pthread_created), + thread_state + pidData->offsets.PyThreadState_thread); + event->pthread_match = pthread_created == pthread_self; + } else { + event->pthread_match = 1; + } + + if (event->pthread_match || !pidData->use_tls) { + void* frame_ptr; + FrameData frame; + Symbol sym = {}; + int cur_cpu = bpf_get_smp_processor_id(); + + bpf_probe_read(&frame_ptr, + sizeof(frame_ptr), + thread_state + pidData->offsets.PyThreadState_frame); + + int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym); + if (symbol_counter == NULL) + return 0; +#pragma unroll + /* Unwind python stack */ + for (int i = 0; i < STACK_MAX_LEN; ++i) { + if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) { + int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu; + int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym); + if (!symbol_id) { + bpf_map_update_elem(&symbolmap, &sym, &zero, 0); + symbol_id = bpf_map_lookup_elem(&symbolmap, &sym); + if (!symbol_id) + return 0; + } + if (*symbol_id == new_symbol_id) + (*symbol_counter)++; + event->stack[i] = *symbol_id; + event->stack_len = i + 1; + frame_ptr = frame.f_back; + } + } + event->stack_complete = frame_ptr == NULL; + } else { + event->stack_complete = 1; + } + + Stats* stats = bpf_map_lookup_elem(&statsmap, &zero); + if (stats) + stats->success++; + + event->has_meta = 0; + bpf_perf_event_output(ctx, &perfmap, 0, event, offsetof(Event, metadata)); + return 0; +} + +SEC("raw_tracepoint/kfree_skb") +int on_event(struct pt_regs* ctx) +{ + int i, ret = 0; + ret |= __on_event(ctx); + ret |= __on_event(ctx); + ret |= __on_event(ctx); + ret |= __on_event(ctx); + ret |= __on_event(ctx); + return ret; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/pyperf100.c b/tools/testing/selftests/bpf/progs/pyperf100.c new file mode 100644 index 0000000000000000000000000000000000000000..29786325db54a53ff335f8f85ef883fce2b95c50 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/pyperf100.c @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#define STACK_MAX_LEN 100 +#include "pyperf.h" diff --git a/tools/testing/selftests/bpf/progs/pyperf180.c b/tools/testing/selftests/bpf/progs/pyperf180.c new file mode 100644 index 0000000000000000000000000000000000000000..c39f559d3100e8c32a87509e1101fc30fc73f2e8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/pyperf180.c @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#define STACK_MAX_LEN 180 +#include "pyperf.h" diff --git a/tools/testing/selftests/bpf/progs/pyperf50.c b/tools/testing/selftests/bpf/progs/pyperf50.c new file mode 100644 index 0000000000000000000000000000000000000000..ef7ce340a292d06f7626176a2fd48915df6d4051 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/pyperf50.c @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#define STACK_MAX_LEN 50 +#include "pyperf.h" diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 9a598f602acd2c3ceef3406d8fcf5dc96b7256e5..ab69a38ff8bd08bf237f5d6d75ecbdeccdc65831 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -27,6 +27,7 @@ typedef __u16 __sum16; #include #include #include +#include #include #include @@ -2380,7 +2381,7 @@ static int libbpf_debug_print_verifier_scale(enum libbpf_print_level level, return vfprintf(stderr, "%s", args); } -static int check_load(const char *file) +static int check_load(const char *file, enum bpf_prog_type type) { struct bpf_prog_load_attr attr; struct bpf_object *obj; @@ -2388,7 +2389,7 @@ static int check_load(const char *file) memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); attr.file = file; - attr.prog_type = BPF_PROG_TYPE_SCHED_CLS; + attr.prog_type = type; attr.log_level = 4; err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); bpf_object__close(obj); @@ -2399,21 +2400,26 @@ static int check_load(const char *file) void test_bpf_verif_scale(void) { - const char *file1 = "./test_verif_scale1.o"; - const char *file2 = "./test_verif_scale2.o"; - const char *file3 = "./test_verif_scale3.o"; - int err; + const char *scale[] = { + "./test_verif_scale1.o", "./test_verif_scale2.o", "./test_verif_scale3.o" + }; + const char *pyperf[] = { + "./pyperf50.o", "./pyperf100.o", "./pyperf180.o" + }; + int err, i; if (verifier_stats) libbpf_set_print(libbpf_debug_print_verifier_scale); - err = check_load(file1); - err |= check_load(file2); - err |= check_load(file3); - if (!err) - printf("test_verif_scale:OK\n"); - else - printf("test_verif_scale:FAIL\n"); + for (i = 0; i < ARRAY_SIZE(scale); i++) { + err = check_load(scale[i], BPF_PROG_TYPE_SCHED_CLS); + printf("test_scale:%s:%s\n", scale[i], err ? "FAIL" : "OK"); + } + + for (i = 0; i < ARRAY_SIZE(pyperf); i++) { + err = check_load(pyperf[i], BPF_PROG_TYPE_RAW_TRACEPOINT); + printf("test_scale:%s:%s\n", pyperf[i], err ? "FAIL" : "OK"); + } } static void test_global_data_number(struct bpf_object *obj, __u32 duration) @@ -2617,6 +2623,120 @@ static void test_flow_dissector_load_bytes(void) close(fd); } +static void test_raw_tp_writable_reject_nbd_invalid(void) +{ + __u32 duration = 0; + char error[4096]; + int bpf_fd = -1, tp_fd = -1; + + const struct bpf_insn program[] = { + /* r6 is our tp buffer */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + /* one byte beyond the end of the nbd_request struct */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6, + sizeof(struct nbd_request)), + BPF_EXIT_INSN(), + }; + + struct bpf_load_program_attr load_attr = { + .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, + .license = "GPL v2", + .insns = program, + .insns_cnt = sizeof(program) / sizeof(struct bpf_insn), + .log_level = 2, + }; + + bpf_fd = bpf_load_program_xattr(&load_attr, error, sizeof(error)); + if (CHECK(bpf_fd < 0, "bpf_raw_tracepoint_writable load", + "failed: %d errno %d\n", bpf_fd, errno)) + return; + + tp_fd = bpf_raw_tracepoint_open("nbd_send_request", bpf_fd); + if (CHECK(tp_fd >= 0, "bpf_raw_tracepoint_writable open", + "erroneously succeeded\n")) + goto out_bpffd; + + close(tp_fd); +out_bpffd: + close(bpf_fd); +} + +static void test_raw_tp_writable_test_run(void) +{ + __u32 duration = 0; + char error[4096]; + + const struct bpf_insn trace_program[] = { + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + + struct bpf_load_program_attr load_attr = { + .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, + .license = "GPL v2", + .insns = trace_program, + .insns_cnt = sizeof(trace_program) / sizeof(struct bpf_insn), + .log_level = 2, + }; + + int bpf_fd = bpf_load_program_xattr(&load_attr, error, sizeof(error)); + if (CHECK(bpf_fd < 0, "bpf_raw_tracepoint_writable loaded", + "failed: %d errno %d\n", bpf_fd, errno)) + return; + + const struct bpf_insn skb_program[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + + struct bpf_load_program_attr skb_load_attr = { + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .license = "GPL v2", + .insns = skb_program, + .insns_cnt = sizeof(skb_program) / sizeof(struct bpf_insn), + }; + + int filter_fd = + bpf_load_program_xattr(&skb_load_attr, error, sizeof(error)); + if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n", + filter_fd, errno)) + goto out_bpffd; + + int tp_fd = bpf_raw_tracepoint_open("bpf_test_finish", bpf_fd); + if (CHECK(tp_fd < 0, "bpf_raw_tracepoint_writable opened", + "failed: %d errno %d\n", tp_fd, errno)) + goto out_filterfd; + + char test_skb[128] = { + 0, + }; + + __u32 prog_ret; + int err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0, + 0, &prog_ret, 0); + CHECK(err != 42, "test_run", + "tracepoint did not modify return value\n"); + CHECK(prog_ret != 0, "test_run_ret", + "socket_filter did not return 0\n"); + + close(tp_fd); + + err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0, 0, + &prog_ret, 0); + CHECK(err != 0, "test_run_notrace", + "test_run failed with %d errno %d\n", err, errno); + CHECK(prog_ret != 0, "test_run_ret_notrace", + "socket_filter did not return 0\n"); + +out_filterfd: + close(filter_fd); +out_bpffd: + close(bpf_fd); +} + int main(int ac, char **av) { srand(time(NULL)); @@ -2655,6 +2775,8 @@ int main(int ac, char **av) test_signal_pending(BPF_PROG_TYPE_FLOW_DISSECTOR); test_bpf_verif_scale(); test_global_data(); + test_raw_tp_writable_reject_nbd_invalid(); + test_raw_tp_writable_test_run(); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index eeea520f23484b5350899eea5e255341db10f9da..3e0502af33078cf7fa19eb5abe8e41ba0f1dc5a3 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -210,33 +210,35 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self) self->retval = (uint32_t)res; } -/* test the sequence of 1k jumps */ +#define MAX_JMP_SEQ 8192 + +/* test the sequence of 8k jumps */ static void bpf_fill_scale1(struct bpf_test *self) { struct bpf_insn *insn = self->fill_insns; int i = 0, k = 0; insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); - /* test to check that the sequence of 1024 jumps is acceptable */ - while (k++ < 1024) { + /* test to check that the long sequence of jumps is acceptable */ + while (k++ < MAX_JMP_SEQ) { insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32); - insn[i++] = BPF_JMP_IMM(BPF_JGT, BPF_REG_0, bpf_semi_rand_get(), 2); + insn[i++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, bpf_semi_rand_get(), 2); insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_10); insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, -8 * (k % 64 + 1)); } - /* every jump adds 1024 steps to insn_processed, so to stay exactly - * within 1m limit add MAX_TEST_INSNS - 1025 MOVs and 1 EXIT + /* every jump adds 1 step to insn_processed, so to stay exactly + * within 1m limit add MAX_TEST_INSNS - MAX_JMP_SEQ - 1 MOVs and 1 EXIT */ - while (i < MAX_TEST_INSNS - 1025) + while (i < MAX_TEST_INSNS - MAX_JMP_SEQ - 1) insn[i++] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 42); insn[i] = BPF_EXIT_INSN(); self->prog_len = i + 1; self->retval = 42; } -/* test the sequence of 1k jumps in inner most function (function depth 8)*/ +/* test the sequence of 8k jumps in inner most function (function depth 8)*/ static void bpf_fill_scale2(struct bpf_test *self) { struct bpf_insn *insn = self->fill_insns; @@ -248,19 +250,20 @@ static void bpf_fill_scale2(struct bpf_test *self) insn[i++] = BPF_EXIT_INSN(); } insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); - /* test to check that the sequence of 1024 jumps is acceptable */ - while (k++ < 1024) { + /* test to check that the long sequence of jumps is acceptable */ + k = 0; + while (k++ < MAX_JMP_SEQ) { insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32); - insn[i++] = BPF_JMP_IMM(BPF_JGT, BPF_REG_0, bpf_semi_rand_get(), 2); + insn[i++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, bpf_semi_rand_get(), 2); insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_10); insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, -8 * (k % (64 - 4 * FUNC_NEST) + 1)); } - /* every jump adds 1024 steps to insn_processed, so to stay exactly - * within 1m limit add MAX_TEST_INSNS - 1025 MOVs and 1 EXIT + /* every jump adds 1 step to insn_processed, so to stay exactly + * within 1m limit add MAX_TEST_INSNS - MAX_JMP_SEQ - 1 MOVs and 1 EXIT */ - while (i < MAX_TEST_INSNS - 1025) + while (i < MAX_TEST_INSNS - MAX_JMP_SEQ - 1) insn[i++] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 42); insn[i] = BPF_EXIT_INSN(); self->prog_len = i + 1; @@ -16978,6 +16981,40 @@ static struct bpf_test tests[] = { .result = ACCEPT, .retval = 2, }, + { + "raw_tracepoint_writable: reject variable offset", + .insns = { + /* r6 is our tp buffer */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + + BPF_LD_MAP_FD(BPF_REG_1, 0), + /* move the key (== 0) to r10-8 */ + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + /* lookup in the map */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + + /* exit clean if null */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + + /* shift the buffer pointer to a variable location */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_0), + /* clobber whatever's there */ + BPF_MOV64_IMM(BPF_REG_7, 4242), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, 0), + + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 1, }, + .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, + .errstr = "R6 invalid variable buffer offset: off=0, var_off=(0x0; 0xffffffff)", + }, }; static int probe_filter_length(const struct bpf_insn *fp)