diff --git a/MAINTAINERS b/MAINTAINERS
index 459fdf97ff7542027492fcae1e4fdae92373e351..6fedb88397cf7eec6898a0505cc3babe12feb615 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10117,6 +10117,7 @@ L:	linux-block@vger.kernel.org
 L:	nbd@other.debian.org
 F:	Documentation/blockdev/nbd.txt
 F:	drivers/block/nbd.c
+F:	include/trace/events/nbd.h
 F:	include/uapi/linux/nbd.h
 
 NETWORK DROP MONITOR
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 79ec7add5f98fbaa5390bac9571f2f61652fa8c1..91690778faf9762df6496bcb4fcd8462743f2923 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -290,6 +290,8 @@ static inline bool nmi_uaccess_okay(void)
 	return true;
 }
 
+#define nmi_uaccess_okay nmi_uaccess_okay
+
 /* Initialize cr4 shadow for this CPU. */
 static inline void cr4_init_shadow(void)
 {
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 2a9138dab13e5a62f4a55ff9b2c2d9a110b7f897..e6f836f1da041ca035a16d80ae694f4312e310dd 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -44,6 +44,9 @@
 #include <linux/nbd-netlink.h>
 #include <net/genetlink.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/nbd.h>
+
 static DEFINE_IDR(nbd_index_idr);
 static DEFINE_MUTEX(nbd_index_mutex);
 static int nbd_total_devices = 0;
@@ -513,6 +516,10 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 	if (sent) {
 		if (sent >= sizeof(request)) {
 			skip = sent - sizeof(request);
+
+			/* initialize handle for tracing purposes */
+			handle = nbd_cmd_handle(cmd);
+
 			goto send_pages;
 		}
 		iov_iter_advance(&from, sent);
@@ -529,11 +536,14 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 	handle = nbd_cmd_handle(cmd);
 	memcpy(request.handle, &handle, sizeof(handle));
 
+	trace_nbd_send_request(&request, nbd->index, blk_mq_rq_from_pdu(cmd));
+
 	dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
 		req, nbdcmd_to_ascii(type),
 		(unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
 	result = sock_xmit(nbd, index, 1, &from,
 			(type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent);
+	trace_nbd_header_sent(req, handle);
 	if (result <= 0) {
 		if (was_interrupted(result)) {
 			/* If we havne't sent anything we can just return BUSY,
@@ -606,6 +616,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 		bio = next;
 	}
 out:
+	trace_nbd_payload_sent(req, handle);
 	nsock->pending = NULL;
 	nsock->sent = 0;
 	return 0;
@@ -653,6 +664,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 			tag, req);
 		return ERR_PTR(-ENOENT);
 	}
+	trace_nbd_header_received(req, handle);
 	cmd = blk_mq_rq_to_pdu(req);
 
 	mutex_lock(&cmd->lock);
@@ -712,6 +724,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 		}
 	}
 out:
+	trace_nbd_payload_received(req, handle);
 	mutex_unlock(&cmd->lock);
 	return ret ? ERR_PTR(ret) : cmd;
 }
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index db72ad39853b9874e0d3e999edc284ae6efb6ac4..9c1c3b35bd309f8ed10336a09d4a94813ea4fed0 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -20,6 +20,15 @@
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 
+/*
+ * Blindly accessing user memory from NMI context can be dangerous
+ * if we're in the middle of switching the current user task or switching
+ * the loaded mm.
+ */
+#ifndef nmi_uaccess_okay
+# define nmi_uaccess_okay() true
+#endif
+
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
 /*
  * Semi RCU freeing of the page directories.
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a8653137b52cdeaed56eec9678c465e99a1f06fd..e9fde0bdfe0b4d0255e032b8a5f48e25f45f059b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -280,6 +280,7 @@ enum bpf_reg_type {
 	PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
 	PTR_TO_TCP_SOCK,	 /* reg points to struct tcp_sock */
 	PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
+	PTR_TO_TP_BUFFER,	 /* reg points to a writable raw tp's buffer */
 };
 
 /* The information passed from prog-specific *_is_valid_access
@@ -369,6 +370,7 @@ struct bpf_prog_aux {
 	u32 used_map_cnt;
 	u32 max_ctx_offset;
 	u32 max_pkt_offset;
+	u32 max_tp_access;
 	u32 stack_depth;
 	u32 id;
 	u32 func_cnt; /* used by non-func prog as the number of func progs */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index eb1e4f867a5e35aee66cb51293ae30bde9a99641..5a9975678d6fcac4d54138826d2de88d2d6ebf0f 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -25,6 +25,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
 BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
 BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
 BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
+BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable)
 #endif
 #ifdef CONFIG_CGROUP_BPF
 BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 346a1dc18218e1ccbbdfe0a264e12c73682ce6a9..e590137e7f27136f88eca6061f1de1fa220e3e5d 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -187,6 +187,7 @@ struct bpf_func_state {
 struct bpf_verifier_state {
 	/* call stack tracking */
 	struct bpf_func_state *frame[MAX_CALL_FRAMES];
+	u32 insn_idx;
 	u32 curframe;
 	u32 active_spin_lock;
 	bool speculative;
@@ -234,6 +235,7 @@ struct bpf_insn_aux_data {
 	int sanitize_stack_off; /* stack slot to be cleared */
 	bool seen; /* this insn was processed by the verifier */
 	u8 alu_state; /* used in combination with alu_limit */
+	bool prune_point;
 	unsigned int orig_idx; /* original instruction index */
 };
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 16523a7c4f18b6fb315daa5727a51320995d725a..b32df2de599357cfa9ff789c24c16d23ee88a956 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1216,11 +1216,19 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
 			     unsigned int key_count);
 
 #ifdef CONFIG_NET
+int skb_flow_dissector_prog_query(const union bpf_attr *attr,
+				  union bpf_attr __user *uattr);
 int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
 				       struct bpf_prog *prog);
 
 int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr);
 #else
+static inline int skb_flow_dissector_prog_query(const union bpf_attr *attr,
+						union bpf_attr __user *uattr)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
 						     struct bpf_prog *prog)
 {
diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h
index 49ba9cde7e4bb5e1b6b66f4f94912a65df971906..b29950a19205e7784dab42924bf3465cb636d76c 100644
--- a/include/linux/tracepoint-defs.h
+++ b/include/linux/tracepoint-defs.h
@@ -45,6 +45,7 @@ struct bpf_raw_event_map {
 	struct tracepoint	*tp;
 	void			*bpf_func;
 	u32			num_args;
+	u32			writable_size;
 } __aligned(32);
 
 #endif
diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h
index 505dae0bed80bd83373811c60ce4ac60d54ee8af..d6e556c0a0852df0c357361dfbb642bdbf3e58bf 100644
--- a/include/trace/bpf_probe.h
+++ b/include/trace/bpf_probe.h
@@ -69,8 +69,7 @@ __bpf_trace_##call(void *__data, proto)					\
  * to make sure that if the tracepoint handling changes, the
  * bpf probe will fail to compile unless it too is updated.
  */
-#undef DEFINE_EVENT
-#define DEFINE_EVENT(template, call, proto, args)			\
+#define __DEFINE_EVENT(template, call, proto, args, size)		\
 static inline void bpf_test_probe_##call(void)				\
 {									\
 	check_trace_callback_type_##call(__bpf_trace_##template);	\
@@ -81,12 +80,36 @@ __bpf_trace_tp_map_##call = {						\
 	.tp		= &__tracepoint_##call,				\
 	.bpf_func	= (void *)__bpf_trace_##template,		\
 	.num_args	= COUNT_ARGS(args),				\
+	.writable_size	= size,						\
 };
 
+#define FIRST(x, ...) x
+
+#undef DEFINE_EVENT_WRITABLE
+#define DEFINE_EVENT_WRITABLE(template, call, proto, args, size)	\
+static inline void bpf_test_buffer_##call(void)				\
+{									\
+	/* BUILD_BUG_ON() is ignored if the code is completely eliminated, but \
+	 * BUILD_BUG_ON_ZERO() uses a different mechanism that is not	\
+	 * dead-code-eliminated.					\
+	 */								\
+	FIRST(proto);							\
+	(void)BUILD_BUG_ON_ZERO(size != sizeof(*FIRST(args)));		\
+}									\
+__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size)
+
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, call, proto, args)			\
+	__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), 0)
 
 #undef DEFINE_EVENT_PRINT
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
 	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#undef DEFINE_EVENT_WRITABLE
+#undef __DEFINE_EVENT
+#undef FIRST
+
 #endif /* CONFIG_BPF_EVENTS */
diff --git a/include/trace/events/bpf_test_run.h b/include/trace/events/bpf_test_run.h
new file mode 100644
index 0000000000000000000000000000000000000000..265447e3f71abeea68de239f722c6428fe5b8289
--- /dev/null
+++ b/include/trace/events/bpf_test_run.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM bpf_test_run
+
+#if !defined(_TRACE_BPF_TEST_RUN_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_BPF_TEST_RUN_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(bpf_test_finish,
+
+	TP_PROTO(int *err),
+
+	TP_ARGS(err),
+
+	TP_STRUCT__entry(
+		__field(int, err)
+	),
+
+	TP_fast_assign(
+		__entry->err = *err;
+	),
+
+	TP_printk("bpf_test_finish with err=%d", __entry->err)
+);
+
+#ifdef DEFINE_EVENT_WRITABLE
+#undef BPF_TEST_RUN_DEFINE_EVENT
+#define BPF_TEST_RUN_DEFINE_EVENT(template, call, proto, args, size)	\
+	DEFINE_EVENT_WRITABLE(template, call, PARAMS(proto),		\
+			      PARAMS(args), size)
+#else
+#undef BPF_TEST_RUN_DEFINE_EVENT
+#define BPF_TEST_RUN_DEFINE_EVENT(template, call, proto, args, size)	\
+	DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args))
+#endif
+
+BPF_TEST_RUN_DEFINE_EVENT(bpf_test_finish, bpf_test_finish,
+
+	TP_PROTO(int *err),
+
+	TP_ARGS(err),
+
+	sizeof(int)
+);
+
+#endif
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/nbd.h b/include/trace/events/nbd.h
new file mode 100644
index 0000000000000000000000000000000000000000..9849956f34d85fbb264cf64e99131c4885d7b99c
--- /dev/null
+++ b/include/trace/events/nbd.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM nbd
+
+#if !defined(_TRACE_NBD_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_NBD_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(nbd_transport_event,
+
+	TP_PROTO(struct request *req, u64 handle),
+
+	TP_ARGS(req, handle),
+
+	TP_STRUCT__entry(
+		__field(struct request *, req)
+		__field(u64, handle)
+	),
+
+	TP_fast_assign(
+		__entry->req = req;
+		__entry->handle = handle;
+	),
+
+	TP_printk(
+		"nbd transport event: request %p, handle 0x%016llx",
+		__entry->req,
+		__entry->handle
+	)
+);
+
+DEFINE_EVENT(nbd_transport_event, nbd_header_sent,
+
+	TP_PROTO(struct request *req, u64 handle),
+
+	TP_ARGS(req, handle)
+);
+
+DEFINE_EVENT(nbd_transport_event, nbd_payload_sent,
+
+	TP_PROTO(struct request *req, u64 handle),
+
+	TP_ARGS(req, handle)
+);
+
+DEFINE_EVENT(nbd_transport_event, nbd_header_received,
+
+	TP_PROTO(struct request *req, u64 handle),
+
+	TP_ARGS(req, handle)
+);
+
+DEFINE_EVENT(nbd_transport_event, nbd_payload_received,
+
+	TP_PROTO(struct request *req, u64 handle),
+
+	TP_ARGS(req, handle)
+);
+
+DECLARE_EVENT_CLASS(nbd_send_request,
+
+	TP_PROTO(struct nbd_request *nbd_request, int index,
+		 struct request *rq),
+
+	TP_ARGS(nbd_request, index, rq),
+
+	TP_STRUCT__entry(
+		__field(struct nbd_request *, nbd_request)
+		__field(u64, dev_index)
+		__field(struct request *, request)
+	),
+
+	TP_fast_assign(
+		__entry->nbd_request = 0;
+		__entry->dev_index = index;
+		__entry->request = rq;
+	),
+
+	TP_printk("nbd%lld: request %p", __entry->dev_index, __entry->request)
+);
+
+#ifdef DEFINE_EVENT_WRITABLE
+#undef NBD_DEFINE_EVENT
+#define NBD_DEFINE_EVENT(template, call, proto, args, size)		\
+	DEFINE_EVENT_WRITABLE(template, call, PARAMS(proto),		\
+			      PARAMS(args), size)
+#else
+#undef NBD_DEFINE_EVENT
+#define NBD_DEFINE_EVENT(template, call, proto, args, size)		\
+	DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args))
+#endif
+
+NBD_DEFINE_EVENT(nbd_send_request, nbd_send_request,
+
+	TP_PROTO(struct nbd_request *nbd_request, int index,
+		 struct request *rq),
+
+	TP_ARGS(nbd_request, index, rq),
+
+	sizeof(struct nbd_request)
+);
+
+#endif
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a8d87ab34ef024d27340ee0dfb419267a0f16416..3d62b8beefef9549629509016c2adc9bea950f5e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -169,6 +169,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SK_REUSEPORT,
 	BPF_PROG_TYPE_FLOW_DISSECTOR,
 	BPF_PROG_TYPE_CGROUP_SYSCTL,
+	BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
 };
 
 enum bpf_attach_type {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index beeddb1abf55fccea8bb6c45484a161825c4cf0d..cd7c9da22a56971bf2423e4169a96bc0c481095e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1803,12 +1803,16 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
 		return -ENOMEM;
 	raw_tp->btp = btp;
 
-	prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd,
-				 BPF_PROG_TYPE_RAW_TRACEPOINT);
+	prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
 	if (IS_ERR(prog)) {
 		err = PTR_ERR(prog);
 		goto out_free_tp;
 	}
+	if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT &&
+	    prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) {
+		err = -EINVAL;
+		goto out_put_prog;
+	}
 
 	err = bpf_probe_register(raw_tp->btp, prog);
 	if (err)
@@ -2027,6 +2031,8 @@ static int bpf_prog_query(const union bpf_attr *attr,
 		break;
 	case BPF_LIRC_MODE2:
 		return lirc_prog_query(attr, uattr);
+	case BPF_FLOW_DISSECTOR:
+		return skb_flow_dissector_prog_query(attr, uattr);
 	default:
 		return -EINVAL;
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 3efc91b1376a4e905dff0362465ab96c82c4bc24..bff1e01282e76ed4946e797de5322037992da0ea 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -176,7 +176,7 @@ struct bpf_verifier_stack_elem {
 	struct bpf_verifier_stack_elem *next;
 };
 
-#define BPF_COMPLEXITY_LIMIT_STACK	1024
+#define BPF_COMPLEXITY_LIMIT_JMP_SEQ	8192
 #define BPF_COMPLEXITY_LIMIT_STATES	64
 
 #define BPF_MAP_PTR_UNPRIV	1UL
@@ -404,6 +404,7 @@ static const char * const reg_type_str[] = {
 	[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
 	[PTR_TO_TCP_SOCK]	= "tcp_sock",
 	[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
+	[PTR_TO_TP_BUFFER]	= "tp_buffer",
 };
 
 static char slot_type_char[] = {
@@ -780,8 +781,9 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
 	if (err)
 		goto err;
 	elem->st.speculative |= speculative;
-	if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
-		verbose(env, "BPF program is too complex\n");
+	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
+		verbose(env, "The sequence of %d jumps is too complex.\n",
+			env->stack_size);
 		goto err;
 	}
 	return &elem->st;
@@ -1987,6 +1989,32 @@ static int check_ctx_reg(struct bpf_verifier_env *env,
 	return 0;
 }
 
+static int check_tp_buffer_access(struct bpf_verifier_env *env,
+				  const struct bpf_reg_state *reg,
+				  int regno, int off, int size)
+{
+	if (off < 0) {
+		verbose(env,
+			"R%d invalid tracepoint buffer access: off=%d, size=%d",
+			regno, off, size);
+		return -EACCES;
+	}
+	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
+		char tn_buf[48];
+
+		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+		verbose(env,
+			"R%d invalid variable buffer offset: off=%d, var_off=%s",
+			regno, off, tn_buf);
+		return -EACCES;
+	}
+	if (off + size > env->prog->aux->max_tp_access)
+		env->prog->aux->max_tp_access = off + size;
+
+	return 0;
+}
+
+
 /* truncate register to smaller size (in bytes)
  * must be called with size < BPF_REG_SIZE
  */
@@ -2131,6 +2159,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 		err = check_sock_access(env, insn_idx, regno, off, size, t);
 		if (!err && value_regno >= 0)
 			mark_reg_unknown(env, regs, value_regno);
+	} else if (reg->type == PTR_TO_TP_BUFFER) {
+		err = check_tp_buffer_access(env, reg, regno, off, size);
+		if (!err && t == BPF_READ && value_regno >= 0)
+			mark_reg_unknown(env, regs, value_regno);
 	} else {
 		verbose(env, "R%d invalid mem access '%s'\n", regno,
 			reg_type_str[reg->type]);
@@ -4465,15 +4497,35 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	return 0;
 }
 
+static void __find_good_pkt_pointers(struct bpf_func_state *state,
+				     struct bpf_reg_state *dst_reg,
+				     enum bpf_reg_type type, u16 new_range)
+{
+	struct bpf_reg_state *reg;
+	int i;
+
+	for (i = 0; i < MAX_BPF_REG; i++) {
+		reg = &state->regs[i];
+		if (reg->type == type && reg->id == dst_reg->id)
+			/* keep the maximum range already checked */
+			reg->range = max(reg->range, new_range);
+	}
+
+	bpf_for_each_spilled_reg(i, state, reg) {
+		if (!reg)
+			continue;
+		if (reg->type == type && reg->id == dst_reg->id)
+			reg->range = max(reg->range, new_range);
+	}
+}
+
 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
 				   struct bpf_reg_state *dst_reg,
 				   enum bpf_reg_type type,
 				   bool range_right_open)
 {
-	struct bpf_func_state *state = vstate->frame[vstate->curframe];
-	struct bpf_reg_state *regs = state->regs, *reg;
 	u16 new_range;
-	int i, j;
+	int i;
 
 	if (dst_reg->off < 0 ||
 	    (dst_reg->off == 0 && range_right_open))
@@ -4538,20 +4590,9 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
 	 * the range won't allow anything.
 	 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
 	 */
-	for (i = 0; i < MAX_BPF_REG; i++)
-		if (regs[i].type == type && regs[i].id == dst_reg->id)
-			/* keep the maximum range already checked */
-			regs[i].range = max(regs[i].range, new_range);
-
-	for (j = 0; j <= vstate->curframe; j++) {
-		state = vstate->frame[j];
-		bpf_for_each_spilled_reg(i, state, reg) {
-			if (!reg)
-				continue;
-			if (reg->type == type && reg->id == dst_reg->id)
-				reg->range = max(reg->range, new_range);
-		}
-	}
+	for (i = 0; i <= vstate->curframe; i++)
+		__find_good_pkt_pointers(vstate->frame[i], dst_reg, type,
+					 new_range);
 }
 
 /* compute branch direction of the expression "if (reg opcode val) goto target;"
@@ -5025,6 +5066,22 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
 	}
 }
 
+static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id,
+				    bool is_null)
+{
+	struct bpf_reg_state *reg;
+	int i;
+
+	for (i = 0; i < MAX_BPF_REG; i++)
+		mark_ptr_or_null_reg(state, &state->regs[i], id, is_null);
+
+	bpf_for_each_spilled_reg(i, state, reg) {
+		if (!reg)
+			continue;
+		mark_ptr_or_null_reg(state, reg, id, is_null);
+	}
+}
+
 /* The logic is similar to find_good_pkt_pointers(), both could eventually
  * be folded together at some point.
  */
@@ -5032,10 +5089,10 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
 				  bool is_null)
 {
 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
-	struct bpf_reg_state *reg, *regs = state->regs;
+	struct bpf_reg_state *regs = state->regs;
 	u32 ref_obj_id = regs[regno].ref_obj_id;
 	u32 id = regs[regno].id;
-	int i, j;
+	int i;
 
 	if (ref_obj_id && ref_obj_id == id && is_null)
 		/* regs[regno] is in the " == NULL" branch.
@@ -5044,17 +5101,8 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
 		 */
 		WARN_ON_ONCE(release_reference_state(state, id));
 
-	for (i = 0; i < MAX_BPF_REG; i++)
-		mark_ptr_or_null_reg(state, &regs[i], id, is_null);
-
-	for (j = 0; j <= vstate->curframe; j++) {
-		state = vstate->frame[j];
-		bpf_for_each_spilled_reg(i, state, reg) {
-			if (!reg)
-				continue;
-			mark_ptr_or_null_reg(state, reg, id, is_null);
-		}
-	}
+	for (i = 0; i <= vstate->curframe; i++)
+		__mark_ptr_or_null_regs(vstate->frame[i], id, is_null);
 }
 
 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
@@ -5556,7 +5604,25 @@ enum {
 	BRANCH = 2,
 };
 
-#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
+static u32 state_htab_size(struct bpf_verifier_env *env)
+{
+	return env->prog->len;
+}
+
+static struct bpf_verifier_state_list **explored_state(
+					struct bpf_verifier_env *env,
+					int idx)
+{
+	struct bpf_verifier_state *cur = env->cur_state;
+	struct bpf_func_state *state = cur->frame[cur->curframe];
+
+	return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
+}
+
+static void init_explored_state(struct bpf_verifier_env *env, int idx)
+{
+	env->insn_aux_data[idx].prune_point = true;
+}
 
 /* t, w, e - match pseudo-code above:
  * t - index of current instruction
@@ -5582,7 +5648,7 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
 
 	if (e == BRANCH)
 		/* mark branch target for state pruning */
-		env->explored_states[w] = STATE_LIST_MARK;
+		init_explored_state(env, w);
 
 	if (insn_state[w] == 0) {
 		/* tree-edge */
@@ -5650,9 +5716,9 @@ static int check_cfg(struct bpf_verifier_env *env)
 			else if (ret < 0)
 				goto err_free;
 			if (t + 1 < insn_cnt)
-				env->explored_states[t + 1] = STATE_LIST_MARK;
+				init_explored_state(env, t + 1);
 			if (insns[t].src_reg == BPF_PSEUDO_CALL) {
-				env->explored_states[t] = STATE_LIST_MARK;
+				init_explored_state(env, t);
 				ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
 				if (ret == 1)
 					goto peek_stack;
@@ -5675,10 +5741,10 @@ static int check_cfg(struct bpf_verifier_env *env)
 			 * after every call and jump
 			 */
 			if (t + 1 < insn_cnt)
-				env->explored_states[t + 1] = STATE_LIST_MARK;
+				init_explored_state(env, t + 1);
 		} else {
 			/* conditional jump with two edges */
-			env->explored_states[t] = STATE_LIST_MARK;
+			init_explored_state(env, t);
 			ret = push_insn(t, t + 1, FALLTHROUGH, env);
 			if (ret == 1)
 				goto peek_stack;
@@ -6126,12 +6192,10 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn,
 	struct bpf_verifier_state_list *sl;
 	int i;
 
-	sl = env->explored_states[insn];
-	if (!sl)
-		return;
-
-	while (sl != STATE_LIST_MARK) {
-		if (sl->state.curframe != cur->curframe)
+	sl = *explored_state(env, insn);
+	while (sl) {
+		if (sl->state.insn_idx != insn ||
+		    sl->state.curframe != cur->curframe)
 			goto next;
 		for (i = 0; i <= cur->curframe; i++)
 			if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
@@ -6485,18 +6549,21 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 	struct bpf_verifier_state *cur = env->cur_state, *new;
 	int i, j, err, states_cnt = 0;
 
-	pprev = &env->explored_states[insn_idx];
-	sl = *pprev;
-
-	if (!sl)
+	if (!env->insn_aux_data[insn_idx].prune_point)
 		/* this 'insn_idx' instruction wasn't marked, so we will not
 		 * be doing state search here
 		 */
 		return 0;
 
+	pprev = explored_state(env, insn_idx);
+	sl = *pprev;
+
 	clean_live_states(env, insn_idx, cur);
 
-	while (sl != STATE_LIST_MARK) {
+	while (sl) {
+		states_cnt++;
+		if (sl->state.insn_idx != insn_idx)
+			goto next;
 		if (states_equal(env, &sl->state, cur)) {
 			sl->hit_cnt++;
 			/* reached equivalent register/stack state,
@@ -6514,7 +6581,6 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 				return err;
 			return 1;
 		}
-		states_cnt++;
 		sl->miss_cnt++;
 		/* heuristic to determine whether this state is beneficial
 		 * to keep checking from state equivalence point of view.
@@ -6541,6 +6607,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 			sl = *pprev;
 			continue;
 		}
+next:
 		pprev = &sl->next;
 		sl = *pprev;
 	}
@@ -6572,8 +6639,9 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 		kfree(new_sl);
 		return err;
 	}
-	new_sl->next = env->explored_states[insn_idx];
-	env->explored_states[insn_idx] = new_sl;
+	new->insn_idx = insn_idx;
+	new_sl->next = *explored_state(env, insn_idx);
+	*explored_state(env, insn_idx) = new_sl;
 	/* connect new state to parentage chain. Current frame needs all
 	 * registers connected. Only r6 - r9 of the callers are alive (pushed
 	 * to the stack implicitly by JITs) so in callers' frames connect just
@@ -8256,16 +8324,15 @@ static void free_states(struct bpf_verifier_env *env)
 	if (!env->explored_states)
 		return;
 
-	for (i = 0; i < env->prog->len; i++) {
+	for (i = 0; i < state_htab_size(env); i++) {
 		sl = env->explored_states[i];
 
-		if (sl)
-			while (sl != STATE_LIST_MARK) {
-				sln = sl->next;
-				free_verifier_state(&sl->state, false);
-				kfree(sl);
-				sl = sln;
-			}
+		while (sl) {
+			sln = sl->next;
+			free_verifier_state(&sl->state, false);
+			kfree(sl);
+			sl = sln;
+		}
 	}
 
 	kvfree(env->explored_states);
@@ -8365,7 +8432,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
 			goto skip_full_check;
 	}
 
-	env->explored_states = kvcalloc(env->prog->len,
+	env->explored_states = kvcalloc(state_htab_size(env),
 				       sizeof(struct bpf_verifier_state_list *),
 				       GFP_USER);
 	ret = -ENOMEM;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 07bccf9fb697f937fed0c2fcff22ed321f0c4fcb..a29375b9d9a1f0562c5d6d610d2e65ca2418f9b6 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -14,6 +14,8 @@
 #include <linux/syscalls.h>
 #include <linux/error-injection.h>
 
+#include <asm/tlb.h>
+
 #include "trace_probe.h"
 #include "trace.h"
 
@@ -126,6 +128,10 @@ BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
 	 * access_ok() should prevent writing to non-user memory, but in
 	 * some situations (nommu, temporary switch, etc) access_ok() does
 	 * not provide enough validation, hence the check on KERNEL_DS.
+	 *
+	 * nmi_uaccess_okay() ensures the probe is not run in an interim
+	 * state, when the task or mm are switched. This is specifically
+	 * required to prevent the use of temporary mm.
 	 */
 
 	if (unlikely(in_interrupt() ||
@@ -133,6 +139,8 @@ BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
 		return -EPERM;
 	if (unlikely(uaccess_kernel()))
 		return -EPERM;
+	if (unlikely(!nmi_uaccess_okay()))
+		return -EPERM;
 	if (!access_ok(VERIFY_WRITE, unsafe_ptr, size))
 		return -EPERM;
 
@@ -963,6 +971,27 @@ const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
 const struct bpf_prog_ops raw_tracepoint_prog_ops = {
 };
 
+static bool raw_tp_writable_prog_is_valid_access(int off, int size,
+						 enum bpf_access_type type,
+						 const struct bpf_prog *prog,
+						 struct bpf_insn_access_aux *info)
+{
+	if (off == 0) {
+		if (size != sizeof(u64) || type != BPF_READ)
+			return false;
+		info->reg_type = PTR_TO_TP_BUFFER;
+	}
+	return raw_tp_prog_is_valid_access(off, size, type, prog, info);
+}
+
+const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = {
+	.get_func_proto  = raw_tp_prog_func_proto,
+	.is_valid_access = raw_tp_writable_prog_is_valid_access,
+};
+
+const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = {
+};
+
 static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
 				    const struct bpf_prog *prog,
 				    struct bpf_insn_access_aux *info)
@@ -1243,6 +1272,9 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *
 	if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64))
 		return -EINVAL;
 
+	if (prog->aux->max_tp_access > btp->writable_size)
+		return -EINVAL;
+
 	return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog);
 }
 
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 8c9331913753ee817413ac7b24e434c1fadb3efd..33e0dc168c16aa24fc7b9c42e17582b316b10472 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -14,6 +14,9 @@
 #include <net/sock.h>
 #include <net/tcp.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/bpf_test_run.h>
+
 static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
 			u32 *retval, u32 *time)
 {
@@ -101,6 +104,7 @@ static int bpf_test_finish(const union bpf_attr *kattr,
 	if (err != -ENOSPC)
 		err = 0;
 out:
+	trace_bpf_test_finish(&err);
 	return err;
 }
 
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 4725fcf9e8ea43ca5c00742ab3793734f7b74564..09db923f398f77c25325a1ba1a394e09a3790267 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -65,6 +65,45 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
 }
 EXPORT_SYMBOL(skb_flow_dissector_init);
 
+int skb_flow_dissector_prog_query(const union bpf_attr *attr,
+				  union bpf_attr __user *uattr)
+{
+	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
+	u32 prog_id, prog_cnt = 0, flags = 0;
+	struct bpf_prog *attached;
+	struct net *net;
+
+	if (attr->query.query_flags)
+		return -EINVAL;
+
+	net = get_net_ns_by_fd(attr->query.target_fd);
+	if (IS_ERR(net))
+		return PTR_ERR(net);
+
+	rcu_read_lock();
+	attached = rcu_dereference(net->flow_dissector_prog);
+	if (attached) {
+		prog_cnt = 1;
+		prog_id = attached->aux->id;
+	}
+	rcu_read_unlock();
+
+	put_net(net);
+
+	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
+		return -EFAULT;
+	if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
+		return -EFAULT;
+
+	if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
+		return 0;
+
+	if (copy_to_user(prog_ids, &prog_id, sizeof(u32)))
+		return -EFAULT;
+
+	return 0;
+}
+
 int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
 				       struct bpf_prog *prog)
 {
diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
new file mode 100644
index 0000000000000000000000000000000000000000..2dbc1413fabd8caa116ffff4df999d3ec0c4dece
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
@@ -0,0 +1,222 @@
+================
+bpftool-btf
+================
+-------------------------------------------------------------------------------
+tool for inspection of BTF data
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+	**bpftool** [*OPTIONS*] **btf** *COMMAND*
+
+	*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+
+	*COMMANDS* := { **dump** | **help** }
+
+BTF COMMANDS
+=============
+
+|	**bpftool** **btf dump** *BTF_SRC*
+|	**bpftool** **btf help**
+|
+|	*BTF_SRC* := { **id** *BTF_ID* | **prog** *PROG* | **map** *MAP* [{**key** | **value** | **kv** | **all**}] | **file** *FILE* }
+|	*MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
+|	*PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
+
+DESCRIPTION
+===========
+	**bpftool btf dump** *BTF_SRC*
+		  Dump BTF entries from a given *BTF_SRC*.
+
+                  When **id** is specified, BTF object with that ID will be
+                  loaded and all its BTF types emitted.
+
+                  When **map** is provided, it's expected that map has
+                  associated BTF object with BTF types describing key and
+                  value. It's possible to select whether to dump only BTF
+                  type(s) associated with key (**key**), value (**value**),
+                  both key and value (**kv**), or all BTF types present in
+                  associated BTF object (**all**). If not specified, **kv**
+                  is assumed.
+
+                  When **prog** is provided, it's expected that program has
+                  associated BTF object with BTF types.
+
+                  When specifying *FILE*, an ELF file is expected, containing
+                  .BTF section with well-defined BTF binary format data,
+                  typically produced by clang or pahole.
+
+	**bpftool btf help**
+		  Print short help message.
+
+OPTIONS
+=======
+	-h, --help
+		  Print short generic help message (similar to **bpftool help**).
+
+	-V, --version
+		  Print version number (similar to **bpftool version**).
+
+	-j, --json
+		  Generate JSON output. For commands that cannot produce JSON, this
+		  option has no effect.
+
+	-p, --pretty
+		  Generate human-readable JSON output. Implies **-j**.
+
+EXAMPLES
+========
+**# bpftool btf dump id 1226**
+::
+
+  [1] PTR '(anon)' type_id=2
+  [2] STRUCT 'dummy_tracepoint_args' size=16 vlen=2
+          'pad' type_id=3 bits_offset=0
+          'sock' type_id=4 bits_offset=64
+  [3] INT 'long long unsigned int' size=8 bits_offset=0 nr_bits=64 encoding=(none)
+  [4] PTR '(anon)' type_id=5
+  [5] FWD 'sock' fwd_kind=union
+
+This gives an example of default output for all supported BTF kinds.
+
+**$ cat prog.c**
+::
+
+  struct fwd_struct;
+
+  enum my_enum {
+          VAL1 = 3,
+          VAL2 = 7,
+  };
+
+  typedef struct my_struct my_struct_t;
+
+  struct my_struct {
+          const unsigned int const_int_field;
+          int bitfield_field: 4;
+          char arr_field[16];
+          const struct fwd_struct *restrict fwd_field;
+          enum my_enum enum_field;
+          volatile my_struct_t *typedef_ptr_field;
+  };
+
+  union my_union {
+          int a;
+          struct my_struct b;
+  };
+
+  struct my_struct struct_global_var __attribute__((section("data_sec"))) = {
+          .bitfield_field = 3,
+          .enum_field = VAL1,
+  };
+  int global_var __attribute__((section("data_sec"))) = 7;
+
+  __attribute__((noinline))
+  int my_func(union my_union *arg1, int arg2)
+  {
+          static int static_var __attribute__((section("data_sec"))) = 123;
+          static_var++;
+          return static_var;
+  }
+
+**$ bpftool btf dump file prog.o**
+::
+
+  [1] PTR '(anon)' type_id=2
+  [2] UNION 'my_union' size=48 vlen=2
+          'a' type_id=3 bits_offset=0
+          'b' type_id=4 bits_offset=0
+  [3] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
+  [4] STRUCT 'my_struct' size=48 vlen=6
+          'const_int_field' type_id=5 bits_offset=0
+          'bitfield_field' type_id=3 bits_offset=32 bitfield_size=4
+          'arr_field' type_id=8 bits_offset=40
+          'fwd_field' type_id=10 bits_offset=192
+          'enum_field' type_id=14 bits_offset=256
+          'typedef_ptr_field' type_id=15 bits_offset=320
+  [5] CONST '(anon)' type_id=6
+  [6] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none)
+  [7] INT 'char' size=1 bits_offset=0 nr_bits=8 encoding=SIGNED
+  [8] ARRAY '(anon)' type_id=7 index_type_id=9 nr_elems=16
+  [9] INT '__ARRAY_SIZE_TYPE__' size=4 bits_offset=0 nr_bits=32 encoding=(none)
+  [10] RESTRICT '(anon)' type_id=11
+  [11] PTR '(anon)' type_id=12
+  [12] CONST '(anon)' type_id=13
+  [13] FWD 'fwd_struct' fwd_kind=union
+  [14] ENUM 'my_enum' size=4 vlen=2
+          'VAL1' val=3
+          'VAL2' val=7
+  [15] PTR '(anon)' type_id=16
+  [16] VOLATILE '(anon)' type_id=17
+  [17] TYPEDEF 'my_struct_t' type_id=4
+  [18] FUNC_PROTO '(anon)' ret_type_id=3 vlen=2
+          'arg1' type_id=1
+          'arg2' type_id=3
+  [19] FUNC 'my_func' type_id=18
+  [20] VAR 'struct_global_var' type_id=4, linkage=global-alloc
+  [21] VAR 'global_var' type_id=3, linkage=global-alloc
+  [22] VAR 'my_func.static_var' type_id=3, linkage=static
+  [23] DATASEC 'data_sec' size=0 vlen=3
+          type_id=20 offset=0 size=48
+          type_id=21 offset=0 size=4
+          type_id=22 offset=52 size=4
+
+The following commands print BTF types associated with specified map's key,
+value, both key and value, and all BTF types, respectively. By default, both
+key and value types will be printed.
+
+**# bpftool btf dump map id 123 key**
+
+::
+
+  [39] TYPEDEF 'u32' type_id=37
+
+**# bpftool btf dump map id 123 value**
+
+::
+
+  [86] PTR '(anon)' type_id=87
+
+**# bpftool btf dump map id 123 kv**
+
+::
+
+  [39] TYPEDEF 'u32' type_id=37
+  [86] PTR '(anon)' type_id=87
+
+**# bpftool btf dump map id 123 all**
+
+::
+
+  [1] PTR '(anon)' type_id=0
+  .
+  .
+  .
+  [2866] ARRAY '(anon)' type_id=52 index_type_id=51 nr_elems=4
+
+All the standard ways to specify map or program are supported:
+
+**# bpftool btf dump map id 123**
+
+**# bpftool btf dump map pinned /sys/fs/bpf/map_name**
+
+**# bpftool btf dump prog id 456**
+
+**# bpftool btf dump prog tag b88e0a09b1d9759d**
+
+**# bpftool btf dump prog pinned /sys/fs/bpf/prog_name**
+
+SEE ALSO
+========
+	**bpf**\ (2),
+	**bpf-helpers**\ (7),
+	**bpftool**\ (8),
+	**bpftool-map**\ (8),
+	**bpftool-prog**\ (8),
+	**bpftool-cgroup**\ (8),
+	**bpftool-feature**\ (8),
+	**bpftool-net**\ (8),
+	**bpftool-perf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index 89b6b10e21835f1a52b4708b2b88073b8a91bd2e..ac26876389c2383297185ddc1d3f3f1ac0067d8b 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -145,4 +145,5 @@ SEE ALSO
 	**bpftool-map**\ (8),
 	**bpftool-feature**\ (8),
 	**bpftool-net**\ (8),
-	**bpftool-perf**\ (8)
+	**bpftool-perf**\ (8),
+	**bpftool-btf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
index 10177343b7ce2793a6deee062ffecb57e3eab355..14180e8870824304c54abb9cd67c82919f158e90 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
@@ -82,4 +82,5 @@ SEE ALSO
 	**bpftool-map**\ (8),
 	**bpftool-cgroup**\ (8),
 	**bpftool-net**\ (8),
-	**bpftool-perf**\ (8)
+	**bpftool-perf**\ (8),
+	**bpftool-btf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 55ecf80ca03e9c81892da961f0caed1c0d918aca..13ef27b39f200ff4239741201deb95d37067464c 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -258,4 +258,5 @@ SEE ALSO
 	**bpftool-cgroup**\ (8),
 	**bpftool-feature**\ (8),
 	**bpftool-net**\ (8),
-	**bpftool-perf**\ (8)
+	**bpftool-perf**\ (8),
+	**bpftool-btf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst
index 6b692b4283734e14ad0892ed232a5361e4056f32..934580850f42b0f74a91b1323020db187652df9c 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-net.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst
@@ -143,4 +143,5 @@ SEE ALSO
 	**bpftool-map**\ (8),
 	**bpftool-cgroup**\ (8),
 	**bpftool-feature**\ (8),
-	**bpftool-perf**\ (8)
+	**bpftool-perf**\ (8),
+	**bpftool-btf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
index 86154740fabb1075edc04141e581f70c8be093d2..0c7576523a217150c4239b7e903ba82fe53baaab 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
@@ -85,4 +85,5 @@ SEE ALSO
 	**bpftool-map**\ (8),
 	**bpftool-cgroup**\ (8),
 	**bpftool-feature**\ (8),
-	**bpftool-net**\ (8)
+	**bpftool-net**\ (8),
+	**bpftool-btf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 2f183ffd8351ed412209642ed65db158de5be840..e8118544d118282b63bfc27e515ea53ae9903d96 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -271,4 +271,5 @@ SEE ALSO
 	**bpftool-cgroup**\ (8),
 	**bpftool-feature**\ (8),
 	**bpftool-net**\ (8),
-	**bpftool-perf**\ (8)
+	**bpftool-perf**\ (8),
+	**bpftool-btf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index deb2ca911ddfae3b383f0cd42f6948b15216d238..3e562d7fd56f942c657fdb7015de606190ea5a26 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -76,4 +76,5 @@ SEE ALSO
 	**bpftool-cgroup**\ (8),
 	**bpftool-feature**\ (8),
 	**bpftool-net**\ (8),
-	**bpftool-perf**\ (8)
+	**bpftool-perf**\ (8),
+	**bpftool-btf**\ (8)
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 9f3ffe1e26ab6480ac74f62ca2cd748fa328f5d4..50e402a5a9c86276f2b37526f43c0ede5c1ba8a6 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -217,6 +217,7 @@ _bpftool()
     done
     cur=${words[cword]}
     prev=${words[cword - 1]}
+    pprev=${words[cword - 2]}
 
     local object=${words[1]} command=${words[2]}
 
@@ -272,17 +273,17 @@ _bpftool()
                                 "$cur" ) )
                             return 0
                             ;;
-                    *)
-                        _bpftool_once_attr 'file'
-                        if _bpftool_search_list 'xlated'; then
-                            COMPREPLY+=( $( compgen -W 'opcodes visual linum' -- \
-                                "$cur" ) )
-                        else
-                            COMPREPLY+=( $( compgen -W 'opcodes linum' -- \
-                                "$cur" ) )
-                        fi
-                        return 0
-                        ;;
+                        *)
+                            _bpftool_once_attr 'file'
+                            if _bpftool_search_list 'xlated'; then
+                                COMPREPLY+=( $( compgen -W 'opcodes visual linum' -- \
+                                    "$cur" ) )
+                            else
+                                COMPREPLY+=( $( compgen -W 'opcodes linum' -- \
+                                    "$cur" ) )
+                            fi
+                            return 0
+                            ;;
                     esac
                     ;;
                 pin)
@@ -607,6 +608,51 @@ _bpftool()
                     ;;
             esac
             ;;
+        btf)
+            local PROG_TYPE='id pinned tag'
+            local MAP_TYPE='id pinned'
+            case $command in
+                dump)
+                    case $prev in
+                        $command)
+                            COMPREPLY+=( $( compgen -W "id map prog file" -- \
+                                "$cur" ) )
+                            return 0
+                            ;;
+                        prog)
+                            COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) )
+                            return 0
+                            ;;
+                        map)
+                            COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) )
+                            return 0
+                            ;;
+                        id)
+                            case $pprev in
+                                prog)
+                                    _bpftool_get_prog_ids
+                                    ;;
+                                map)
+                                    _bpftool_get_map_ids
+                                    ;;
+                            esac
+                            return 0
+                            ;;
+                        *)
+                            if [[ $cword == 6 ]] && [[ ${words[3]} == "map" ]]; then
+                                 COMPREPLY+=( $( compgen -W 'key value kv all' -- \
+                                     "$cur" ) )
+                            fi
+                            return 0
+                            ;;
+                    esac
+                    ;;
+                *)
+                    [[ $prev == $object ]] && \
+                        COMPREPLY=( $( compgen -W 'dump help' -- "$cur" ) )
+                    ;;
+            esac
+            ;;
         cgroup)
             case $command in
                 show|list)
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
new file mode 100644
index 0000000000000000000000000000000000000000..5c67bbcdf914db30dc9f858ae6556f4cb2ebeac3
--- /dev/null
+++ b/tools/bpf/bpftool/btf.c
@@ -0,0 +1,586 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2019 Facebook */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/err.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <gelf.h>
+#include <bpf.h>
+#include <linux/btf.h>
+
+#include "btf.h"
+#include "json_writer.h"
+#include "main.h"
+
+static const char * const btf_kind_str[NR_BTF_KINDS] = {
+	[BTF_KIND_UNKN]		= "UNKNOWN",
+	[BTF_KIND_INT]		= "INT",
+	[BTF_KIND_PTR]		= "PTR",
+	[BTF_KIND_ARRAY]	= "ARRAY",
+	[BTF_KIND_STRUCT]	= "STRUCT",
+	[BTF_KIND_UNION]	= "UNION",
+	[BTF_KIND_ENUM]		= "ENUM",
+	[BTF_KIND_FWD]		= "FWD",
+	[BTF_KIND_TYPEDEF]	= "TYPEDEF",
+	[BTF_KIND_VOLATILE]	= "VOLATILE",
+	[BTF_KIND_CONST]	= "CONST",
+	[BTF_KIND_RESTRICT]	= "RESTRICT",
+	[BTF_KIND_FUNC]		= "FUNC",
+	[BTF_KIND_FUNC_PROTO]	= "FUNC_PROTO",
+	[BTF_KIND_VAR]		= "VAR",
+	[BTF_KIND_DATASEC]	= "DATASEC",
+};
+
+static const char *btf_int_enc_str(__u8 encoding)
+{
+	switch (encoding) {
+	case 0:
+		return "(none)";
+	case BTF_INT_SIGNED:
+		return "SIGNED";
+	case BTF_INT_CHAR:
+		return "CHAR";
+	case BTF_INT_BOOL:
+		return "BOOL";
+	default:
+		return "UNKN";
+	}
+}
+
+static const char *btf_var_linkage_str(__u32 linkage)
+{
+	switch (linkage) {
+	case BTF_VAR_STATIC:
+		return "static";
+	case BTF_VAR_GLOBAL_ALLOCATED:
+		return "global-alloc";
+	default:
+		return "(unknown)";
+	}
+}
+
+static const char *btf_str(const struct btf *btf, __u32 off)
+{
+	if (!off)
+		return "(anon)";
+	return btf__name_by_offset(btf, off) ? : "(invalid)";
+}
+
+static int dump_btf_type(const struct btf *btf, __u32 id,
+			 const struct btf_type *t)
+{
+	json_writer_t *w = json_wtr;
+	int kind, safe_kind;
+
+	kind = BTF_INFO_KIND(t->info);
+	safe_kind = kind <= BTF_KIND_MAX ? kind : BTF_KIND_UNKN;
+
+	if (json_output) {
+		jsonw_start_object(w);
+		jsonw_uint_field(w, "id", id);
+		jsonw_string_field(w, "kind", btf_kind_str[safe_kind]);
+		jsonw_string_field(w, "name", btf_str(btf, t->name_off));
+	} else {
+		printf("[%u] %s '%s'", id, btf_kind_str[safe_kind],
+		       btf_str(btf, t->name_off));
+	}
+
+	switch (BTF_INFO_KIND(t->info)) {
+	case BTF_KIND_INT: {
+		__u32 v = *(__u32 *)(t + 1);
+		const char *enc;
+
+		enc = btf_int_enc_str(BTF_INT_ENCODING(v));
+
+		if (json_output) {
+			jsonw_uint_field(w, "size", t->size);
+			jsonw_uint_field(w, "bits_offset", BTF_INT_OFFSET(v));
+			jsonw_uint_field(w, "nr_bits", BTF_INT_BITS(v));
+			jsonw_string_field(w, "encoding", enc);
+		} else {
+			printf(" size=%u bits_offset=%u nr_bits=%u encoding=%s",
+			       t->size, BTF_INT_OFFSET(v), BTF_INT_BITS(v),
+			       enc);
+		}
+		break;
+	}
+	case BTF_KIND_PTR:
+	case BTF_KIND_CONST:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_RESTRICT:
+	case BTF_KIND_TYPEDEF:
+		if (json_output)
+			jsonw_uint_field(w, "type_id", t->type);
+		else
+			printf(" type_id=%u", t->type);
+		break;
+	case BTF_KIND_ARRAY: {
+		const struct btf_array *arr = (const void *)(t + 1);
+
+		if (json_output) {
+			jsonw_uint_field(w, "type_id", arr->type);
+			jsonw_uint_field(w, "index_type_id", arr->index_type);
+			jsonw_uint_field(w, "nr_elems", arr->nelems);
+		} else {
+			printf(" type_id=%u index_type_id=%u nr_elems=%u",
+			       arr->type, arr->index_type, arr->nelems);
+		}
+		break;
+	}
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION: {
+		const struct btf_member *m = (const void *)(t + 1);
+		__u16 vlen = BTF_INFO_VLEN(t->info);
+		int i;
+
+		if (json_output) {
+			jsonw_uint_field(w, "size", t->size);
+			jsonw_uint_field(w, "vlen", vlen);
+			jsonw_name(w, "members");
+			jsonw_start_array(w);
+		} else {
+			printf(" size=%u vlen=%u", t->size, vlen);
+		}
+		for (i = 0; i < vlen; i++, m++) {
+			const char *name = btf_str(btf, m->name_off);
+			__u32 bit_off, bit_sz;
+
+			if (BTF_INFO_KFLAG(t->info)) {
+				bit_off = BTF_MEMBER_BIT_OFFSET(m->offset);
+				bit_sz = BTF_MEMBER_BITFIELD_SIZE(m->offset);
+			} else {
+				bit_off = m->offset;
+				bit_sz = 0;
+			}
+
+			if (json_output) {
+				jsonw_start_object(w);
+				jsonw_string_field(w, "name", name);
+				jsonw_uint_field(w, "type_id", m->type);
+				jsonw_uint_field(w, "bits_offset", bit_off);
+				if (bit_sz) {
+					jsonw_uint_field(w, "bitfield_size",
+							 bit_sz);
+				}
+				jsonw_end_object(w);
+			} else {
+				printf("\n\t'%s' type_id=%u bits_offset=%u",
+				       name, m->type, bit_off);
+				if (bit_sz)
+					printf(" bitfield_size=%u", bit_sz);
+			}
+		}
+		if (json_output)
+			jsonw_end_array(w);
+		break;
+	}
+	case BTF_KIND_ENUM: {
+		const struct btf_enum *v = (const void *)(t + 1);
+		__u16 vlen = BTF_INFO_VLEN(t->info);
+		int i;
+
+		if (json_output) {
+			jsonw_uint_field(w, "size", t->size);
+			jsonw_uint_field(w, "vlen", vlen);
+			jsonw_name(w, "values");
+			jsonw_start_array(w);
+		} else {
+			printf(" size=%u vlen=%u", t->size, vlen);
+		}
+		for (i = 0; i < vlen; i++, v++) {
+			const char *name = btf_str(btf, v->name_off);
+
+			if (json_output) {
+				jsonw_start_object(w);
+				jsonw_string_field(w, "name", name);
+				jsonw_uint_field(w, "val", v->val);
+				jsonw_end_object(w);
+			} else {
+				printf("\n\t'%s' val=%u", name, v->val);
+			}
+		}
+		if (json_output)
+			jsonw_end_array(w);
+		break;
+	}
+	case BTF_KIND_FWD: {
+		const char *fwd_kind = BTF_INFO_KFLAG(t->info) ? "union"
+							       : "struct";
+
+		if (json_output)
+			jsonw_string_field(w, "fwd_kind", fwd_kind);
+		else
+			printf(" fwd_kind=%s", fwd_kind);
+		break;
+	}
+	case BTF_KIND_FUNC:
+		if (json_output)
+			jsonw_uint_field(w, "type_id", t->type);
+		else
+			printf(" type_id=%u", t->type);
+		break;
+	case BTF_KIND_FUNC_PROTO: {
+		const struct btf_param *p = (const void *)(t + 1);
+		__u16 vlen = BTF_INFO_VLEN(t->info);
+		int i;
+
+		if (json_output) {
+			jsonw_uint_field(w, "ret_type_id", t->type);
+			jsonw_uint_field(w, "vlen", vlen);
+			jsonw_name(w, "params");
+			jsonw_start_array(w);
+		} else {
+			printf(" ret_type_id=%u vlen=%u", t->type, vlen);
+		}
+		for (i = 0; i < vlen; i++, p++) {
+			const char *name = btf_str(btf, p->name_off);
+
+			if (json_output) {
+				jsonw_start_object(w);
+				jsonw_string_field(w, "name", name);
+				jsonw_uint_field(w, "type_id", p->type);
+				jsonw_end_object(w);
+			} else {
+				printf("\n\t'%s' type_id=%u", name, p->type);
+			}
+		}
+		if (json_output)
+			jsonw_end_array(w);
+		break;
+	}
+	case BTF_KIND_VAR: {
+		const struct btf_var *v = (const void *)(t + 1);
+		const char *linkage;
+
+		linkage = btf_var_linkage_str(v->linkage);
+
+		if (json_output) {
+			jsonw_uint_field(w, "type_id", t->type);
+			jsonw_string_field(w, "linkage", linkage);
+		} else {
+			printf(" type_id=%u, linkage=%s", t->type, linkage);
+		}
+		break;
+	}
+	case BTF_KIND_DATASEC: {
+		const struct btf_var_secinfo *v = (const void *)(t+1);
+		__u16 vlen = BTF_INFO_VLEN(t->info);
+		int i;
+
+		if (json_output) {
+			jsonw_uint_field(w, "size", t->size);
+			jsonw_uint_field(w, "vlen", vlen);
+			jsonw_name(w, "vars");
+			jsonw_start_array(w);
+		} else {
+			printf(" size=%u vlen=%u", t->size, vlen);
+		}
+		for (i = 0; i < vlen; i++, v++) {
+			if (json_output) {
+				jsonw_start_object(w);
+				jsonw_uint_field(w, "type_id", v->type);
+				jsonw_uint_field(w, "offset", v->offset);
+				jsonw_uint_field(w, "size", v->size);
+				jsonw_end_object(w);
+			} else {
+				printf("\n\ttype_id=%u offset=%u size=%u",
+				       v->type, v->offset, v->size);
+			}
+		}
+		if (json_output)
+			jsonw_end_array(w);
+		break;
+	}
+	default:
+		break;
+	}
+
+	if (json_output)
+		jsonw_end_object(json_wtr);
+	else
+		printf("\n");
+
+	return 0;
+}
+
+static int dump_btf_raw(const struct btf *btf,
+			__u32 *root_type_ids, int root_type_cnt)
+{
+	const struct btf_type *t;
+	int i;
+
+	if (json_output) {
+		jsonw_start_object(json_wtr);
+		jsonw_name(json_wtr, "types");
+		jsonw_start_array(json_wtr);
+	}
+
+	if (root_type_cnt) {
+		for (i = 0; i < root_type_cnt; i++) {
+			t = btf__type_by_id(btf, root_type_ids[i]);
+			dump_btf_type(btf, root_type_ids[i], t);
+		}
+	} else {
+		int cnt = btf__get_nr_types(btf);
+
+		for (i = 1; i <= cnt; i++) {
+			t = btf__type_by_id(btf, i);
+			dump_btf_type(btf, i, t);
+		}
+	}
+
+	if (json_output) {
+		jsonw_end_array(json_wtr);
+		jsonw_end_object(json_wtr);
+	}
+	return 0;
+}
+
+static bool check_btf_endianness(GElf_Ehdr *ehdr)
+{
+	static unsigned int const endian = 1;
+
+	switch (ehdr->e_ident[EI_DATA]) {
+	case ELFDATA2LSB:
+		return *(unsigned char const *)&endian == 1;
+	case ELFDATA2MSB:
+		return *(unsigned char const *)&endian == 0;
+	default:
+		return 0;
+	}
+}
+
+static int btf_load_from_elf(const char *path, struct btf **btf)
+{
+	int err = -1, fd = -1, idx = 0;
+	Elf_Data *btf_data = NULL;
+	Elf_Scn *scn = NULL;
+	Elf *elf = NULL;
+	GElf_Ehdr ehdr;
+
+	if (elf_version(EV_CURRENT) == EV_NONE) {
+		p_err("failed to init libelf for %s", path);
+		return -1;
+	}
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0) {
+		p_err("failed to open %s: %s", path, strerror(errno));
+		return -1;
+	}
+
+	elf = elf_begin(fd, ELF_C_READ, NULL);
+	if (!elf) {
+		p_err("failed to open %s as ELF file", path);
+		goto done;
+	}
+	if (!gelf_getehdr(elf, &ehdr)) {
+		p_err("failed to get EHDR from %s", path);
+		goto done;
+	}
+	if (!check_btf_endianness(&ehdr)) {
+		p_err("non-native ELF endianness is not supported");
+		goto done;
+	}
+	if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) {
+		p_err("failed to get e_shstrndx from %s\n", path);
+		goto done;
+	}
+
+	while ((scn = elf_nextscn(elf, scn)) != NULL) {
+		GElf_Shdr sh;
+		char *name;
+
+		idx++;
+		if (gelf_getshdr(scn, &sh) != &sh) {
+			p_err("failed to get section(%d) header from %s",
+			      idx, path);
+			goto done;
+		}
+		name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
+		if (!name) {
+			p_err("failed to get section(%d) name from %s",
+			      idx, path);
+			goto done;
+		}
+		if (strcmp(name, BTF_ELF_SEC) == 0) {
+			btf_data = elf_getdata(scn, 0);
+			if (!btf_data) {
+				p_err("failed to get section(%d, %s) data from %s",
+				      idx, name, path);
+				goto done;
+			}
+			break;
+		}
+	}
+
+	if (!btf_data) {
+		p_err("%s ELF section not found in %s", BTF_ELF_SEC, path);
+		goto done;
+	}
+
+	*btf = btf__new(btf_data->d_buf, btf_data->d_size);
+	if (IS_ERR(*btf)) {
+		err = PTR_ERR(*btf);
+		*btf = NULL;
+		p_err("failed to load BTF data from %s: %s",
+		      path, strerror(err));
+		goto done;
+	}
+
+	err = 0;
+done:
+	if (err) {
+		if (*btf) {
+			btf__free(*btf);
+			*btf = NULL;
+		}
+	}
+	if (elf)
+		elf_end(elf);
+	close(fd);
+	return err;
+}
+
+static int do_dump(int argc, char **argv)
+{
+	struct btf *btf = NULL;
+	__u32 root_type_ids[2];
+	int root_type_cnt = 0;
+	__u32 btf_id = -1;
+	const char *src;
+	int fd = -1;
+	int err;
+
+	if (!REQ_ARGS(2)) {
+		usage();
+		return -1;
+	}
+	src = GET_ARG();
+
+	if (is_prefix(src, "map")) {
+		struct bpf_map_info info = {};
+		__u32 len = sizeof(info);
+
+		if (!REQ_ARGS(2)) {
+			usage();
+			return -1;
+		}
+
+		fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+		if (fd < 0)
+			return -1;
+
+		btf_id = info.btf_id;
+		if (argc && is_prefix(*argv, "key")) {
+			root_type_ids[root_type_cnt++] = info.btf_key_type_id;
+			NEXT_ARG();
+		} else if (argc && is_prefix(*argv, "value")) {
+			root_type_ids[root_type_cnt++] = info.btf_value_type_id;
+			NEXT_ARG();
+		} else if (argc && is_prefix(*argv, "all")) {
+			NEXT_ARG();
+		} else if (argc && is_prefix(*argv, "kv")) {
+			root_type_ids[root_type_cnt++] = info.btf_key_type_id;
+			root_type_ids[root_type_cnt++] = info.btf_value_type_id;
+			NEXT_ARG();
+		} else {
+			root_type_ids[root_type_cnt++] = info.btf_key_type_id;
+			root_type_ids[root_type_cnt++] = info.btf_value_type_id;
+		}
+	} else if (is_prefix(src, "prog")) {
+		struct bpf_prog_info info = {};
+		__u32 len = sizeof(info);
+
+		if (!REQ_ARGS(2)) {
+			usage();
+			return -1;
+		}
+
+		fd = prog_parse_fd(&argc, &argv);
+		if (fd < 0)
+			return -1;
+
+		err = bpf_obj_get_info_by_fd(fd, &info, &len);
+		if (err) {
+			p_err("can't get prog info: %s", strerror(errno));
+			goto done;
+		}
+
+		btf_id = info.btf_id;
+	} else if (is_prefix(src, "id")) {
+		char *endptr;
+
+		btf_id = strtoul(*argv, &endptr, 0);
+		if (*endptr) {
+			p_err("can't parse %s as ID", *argv);
+			return -1;
+		}
+		NEXT_ARG();
+	} else if (is_prefix(src, "file")) {
+		err = btf_load_from_elf(*argv, &btf);
+		if (err)
+			goto done;
+		NEXT_ARG();
+	} else {
+		err = -1;
+		p_err("unrecognized BTF source specifier: '%s'", src);
+		goto done;
+	}
+
+	if (!btf) {
+		err = btf__get_from_id(btf_id, &btf);
+		if (err) {
+			p_err("get btf by id (%u): %s", btf_id, strerror(err));
+			goto done;
+		}
+		if (!btf) {
+			err = -ENOENT;
+			p_err("can't find btf with ID (%u)", btf_id);
+			goto done;
+		}
+	}
+
+	dump_btf_raw(btf, root_type_ids, root_type_cnt);
+
+done:
+	close(fd);
+	btf__free(btf);
+	return err;
+}
+
+static int do_help(int argc, char **argv)
+{
+	if (json_output) {
+		jsonw_null(json_wtr);
+		return 0;
+	}
+
+	fprintf(stderr,
+		"Usage: %s btf dump BTF_SRC\n"
+		"       %s btf help\n"
+		"\n"
+		"       BTF_SRC := { id BTF_ID | prog PROG | map MAP [{key | value | kv | all}] | file FILE }\n"
+		"       " HELP_SPEC_MAP "\n"
+		"       " HELP_SPEC_PROGRAM "\n"
+		"       " HELP_SPEC_OPTIONS "\n"
+		"",
+		bin_name, bin_name);
+
+	return 0;
+}
+
+static const struct cmd cmds[] = {
+	{ "help",	do_help },
+	{ "dump",	do_dump },
+	{ 0 }
+};
+
+int do_btf(int argc, char **argv)
+{
+	return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index a3658eb73ffcc257b7cc68d71eb6ee7f96b4c9cf..c85809a5c2b5797cda9a4932ff333a2793003c29 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -56,7 +56,7 @@ static int do_help(int argc, char **argv)
 		"       %s batch file FILE\n"
 		"       %s version\n"
 		"\n"
-		"       OBJECT := { prog | map | cgroup | perf | net | feature }\n"
+		"       OBJECT := { prog | map | cgroup | perf | net | feature | btf }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, bin_name, bin_name);
@@ -188,6 +188,7 @@ static const struct cmd cmds[] = {
 	{ "perf",	do_perf },
 	{ "net",	do_net },
 	{ "feature",	do_feature },
+	{ "btf",	do_btf },
 	{ "version",	do_version },
 	{ 0 }
 };
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 1ccc46169a1957627694469c035d0cf78a723c89..ce75b76d17b1b537422de33998e9e87c099a4ecd 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -74,6 +74,7 @@ static const char * const prog_type_name[] = {
 	[BPF_PROG_TYPE_SK_REUSEPORT]		= "sk_reuseport",
 	[BPF_PROG_TYPE_FLOW_DISSECTOR]		= "flow_dissector",
 	[BPF_PROG_TYPE_CGROUP_SYSCTL]		= "cgroup_sysctl",
+	[BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE]	= "raw_tracepoint_writable",
 };
 
 extern const char * const map_type_name[];
@@ -150,6 +151,7 @@ int do_perf(int argc, char **arg);
 int do_net(int argc, char **arg);
 int do_tracelog(int argc, char **arg);
 int do_feature(int argc, char **argv);
+int do_btf(int argc, char **argv);
 
 int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what);
 int prog_parse_fd(int *argc, char ***argv);
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index 10f9693c021303257b3f450f27467bd79ca576ad..b47cc2fff11e5ec01aac35c855516fdeae471cd5 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -3,6 +3,7 @@
 
 #define _GNU_SOURCE
 #include <errno.h>
+#include <fcntl.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
@@ -11,6 +12,8 @@
 #include <linux/rtnetlink.h>
 #include <linux/tc_act/tc_bpf.h>
 #include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
 
 #include <bpf.h>
 #include <nlattr.h>
@@ -47,6 +50,10 @@ struct bpf_filter_t {
 	int		ifindex;
 };
 
+struct bpf_attach_info {
+	__u32 flow_dissector_id;
+};
+
 static int dump_link_nlmsg(void *cookie, void *msg, struct nlattr **tb)
 {
 	struct bpf_netdev_t *netinfo = cookie;
@@ -179,8 +186,45 @@ static int show_dev_tc_bpf(int sock, unsigned int nl_pid,
 	return 0;
 }
 
+static int query_flow_dissector(struct bpf_attach_info *attach_info)
+{
+	__u32 attach_flags;
+	__u32 prog_ids[1];
+	__u32 prog_cnt;
+	int err;
+	int fd;
+
+	fd = open("/proc/self/ns/net", O_RDONLY);
+	if (fd < 0) {
+		p_err("can't open /proc/self/ns/net: %s",
+		      strerror(errno));
+		return -1;
+	}
+	prog_cnt = ARRAY_SIZE(prog_ids);
+	err = bpf_prog_query(fd, BPF_FLOW_DISSECTOR, 0,
+			     &attach_flags, prog_ids, &prog_cnt);
+	close(fd);
+	if (err) {
+		if (errno == EINVAL) {
+			/* Older kernel's don't support querying
+			 * flow dissector programs.
+			 */
+			errno = 0;
+			return 0;
+		}
+		p_err("can't query prog: %s", strerror(errno));
+		return -1;
+	}
+
+	if (prog_cnt == 1)
+		attach_info->flow_dissector_id = prog_ids[0];
+
+	return 0;
+}
+
 static int do_show(int argc, char **argv)
 {
+	struct bpf_attach_info attach_info = {};
 	int i, sock, ret, filter_idx = -1;
 	struct bpf_netdev_t dev_array;
 	unsigned int nl_pid;
@@ -198,6 +242,10 @@ static int do_show(int argc, char **argv)
 		usage();
 	}
 
+	ret = query_flow_dissector(&attach_info);
+	if (ret)
+		return -1;
+
 	sock = libbpf_netlink_open(&nl_pid);
 	if (sock < 0) {
 		fprintf(stderr, "failed to open netlink sock\n");
@@ -226,6 +274,12 @@ static int do_show(int argc, char **argv)
 		}
 		NET_END_ARRAY("\n");
 	}
+
+	NET_START_ARRAY("flow_dissector", "%s:\n");
+	if (attach_info.flow_dissector_id > 0)
+		NET_DUMP_UINT("id", "id %u", attach_info.flow_dissector_id);
+	NET_END_ARRAY("\n");
+
 	NET_END_OBJECT;
 	if (json_output)
 		jsonw_end_array(json_wtr);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 140e2e3f6122be70b6ab0bd82df1ef0a3aa822fd..763dcff87f92989a8ca95e20d74c677303050349 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -169,6 +169,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SK_REUSEPORT,
 	BPF_PROG_TYPE_FLOW_DISSECTOR,
 	BPF_PROG_TYPE_CGROUP_SYSCTL,
+	BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
 };
 
 enum bpf_attach_type {
@@ -1738,12 +1739,19 @@ union bpf_attr {
  * 		error if an eBPF program tries to set a callback that is not
  * 		supported in the current kernel.
  *
- * 		The supported callback values that *argval* can combine are:
+ * 		*argval* is a flag array which can combine these flags:
  *
  * 		* **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
  * 		* **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
  * 		* **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
  *
+ * 		Therefore, this function can be used to clear a callback flag by
+ * 		setting the appropriate bit to zero. e.g. to disable the RTO
+ * 		callback:
+ *
+ * 		**bpf_sock_ops_cb_flags_set(bpf_sock,**
+ * 			**bpf_sock->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_RTO_CB_FLAG)**
+ *
  * 		Here are some examples of where one could call such eBPF
  * 		program:
  *
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 079e3f2b15607a7e7104548036ac4cd9ffd058c7..23879feef1e858b23638f4e24e7bb7fab7e9d914 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -204,6 +204,16 @@ check_abi: $(OUTPUT)libbpf.so
 		     "versioned symbols in $^ ($(VERSIONED_SYM_COUNT))." \
 		     "Please make sure all LIBBPF_API symbols are"	 \
 		     "versioned in $(VERSION_SCRIPT)." >&2;		 \
+		readelf -s --wide $(OUTPUT)libbpf-in.o |		 \
+		    awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$8}'|   \
+		    sort -u > $(OUTPUT)libbpf_global_syms.tmp;		 \
+		readelf -s --wide $(OUTPUT)libbpf.so |			 \
+		    grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 |		 \
+		    sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; 	 \
+		diff -u $(OUTPUT)libbpf_global_syms.tmp			 \
+		     $(OUTPUT)libbpf_versioned_syms.tmp;		 \
+		rm $(OUTPUT)libbpf_global_syms.tmp			 \
+		   $(OUTPUT)libbpf_versioned_syms.tmp;			 \
 		exit 1;							 \
 	fi
 
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 4428ebcac9889830a81903f56728276e58efa89f..e277d20c167132b2a6715214e18461bcfb4aec59 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2300,6 +2300,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
 	case BPF_PROG_TYPE_UNSPEC:
 	case BPF_PROG_TYPE_TRACEPOINT:
 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
+	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
 	case BPF_PROG_TYPE_PERF_EVENT:
 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
 		return false;
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 880c81228bbe7b952024ff43cd23111eaf7bdc02..6635a31a7a164e7d03cdd697b599774d9c360969 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -95,6 +95,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
 	case BPF_PROG_TYPE_CGROUP_DEVICE:
 	case BPF_PROG_TYPE_SK_MSG:
 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
+	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
 	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
 	case BPF_PROG_TYPE_LIRC_MODE2:
 	case BPF_PROG_TYPE_SK_REUSEPORT:
diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h
new file mode 100644
index 0000000000000000000000000000000000000000..0cc5e4ee90bd7d7c5f40170537dd80266452a391
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+#define FUNCTION_NAME_LEN 64
+#define FILE_NAME_LEN 128
+#define TASK_COMM_LEN 16
+
+typedef struct {
+	int PyThreadState_frame;
+	int PyThreadState_thread;
+	int PyFrameObject_back;
+	int PyFrameObject_code;
+	int PyFrameObject_lineno;
+	int PyCodeObject_filename;
+	int PyCodeObject_name;
+	int String_data;
+	int String_size;
+} OffsetConfig;
+
+typedef struct {
+	uintptr_t current_state_addr;
+	uintptr_t tls_key_addr;
+	OffsetConfig offsets;
+	bool use_tls;
+} PidData;
+
+typedef struct {
+	uint32_t success;
+} Stats;
+
+typedef struct {
+	char name[FUNCTION_NAME_LEN];
+	char file[FILE_NAME_LEN];
+} Symbol;
+
+typedef struct {
+	uint32_t pid;
+	uint32_t tid;
+	char comm[TASK_COMM_LEN];
+	int32_t kernel_stack_id;
+	int32_t user_stack_id;
+	bool thread_current;
+	bool pthread_match;
+	bool stack_complete;
+	int16_t stack_len;
+	int32_t stack[STACK_MAX_LEN];
+
+	int has_meta;
+	int metadata;
+	char dummy_safeguard;
+} Event;
+
+
+struct bpf_elf_map {
+	__u32 type;
+	__u32 size_key;
+	__u32 size_value;
+	__u32 max_elem;
+	__u32 flags;
+};
+
+typedef int pid_t;
+
+typedef struct {
+	void* f_back; // PyFrameObject.f_back, previous frame
+	void* f_code; // PyFrameObject.f_code, pointer to PyCodeObject
+	void* co_filename; // PyCodeObject.co_filename
+	void* co_name; // PyCodeObject.co_name
+} FrameData;
+
+static inline __attribute__((__always_inline__)) void*
+get_thread_state(void* tls_base, PidData* pidData)
+{
+	void* thread_state;
+	int key;
+
+	bpf_probe_read(&key, sizeof(key), (void*)(long)pidData->tls_key_addr);
+	bpf_probe_read(&thread_state, sizeof(thread_state),
+		       tls_base + 0x310 + key * 0x10 + 0x08);
+	return thread_state;
+}
+
+static inline __attribute__((__always_inline__)) bool
+get_frame_data(void* frame_ptr, PidData* pidData, FrameData* frame, Symbol* symbol)
+{
+	// read data from PyFrameObject
+	bpf_probe_read(&frame->f_back,
+		       sizeof(frame->f_back),
+		       frame_ptr + pidData->offsets.PyFrameObject_back);
+	bpf_probe_read(&frame->f_code,
+		       sizeof(frame->f_code),
+		       frame_ptr + pidData->offsets.PyFrameObject_code);
+
+	// read data from PyCodeObject
+	if (!frame->f_code)
+		return false;
+	bpf_probe_read(&frame->co_filename,
+		       sizeof(frame->co_filename),
+		       frame->f_code + pidData->offsets.PyCodeObject_filename);
+	bpf_probe_read(&frame->co_name,
+		       sizeof(frame->co_name),
+		       frame->f_code + pidData->offsets.PyCodeObject_name);
+	// read actual names into symbol
+	if (frame->co_filename)
+		bpf_probe_read_str(&symbol->file,
+				   sizeof(symbol->file),
+				   frame->co_filename + pidData->offsets.String_data);
+	if (frame->co_name)
+		bpf_probe_read_str(&symbol->name,
+				   sizeof(symbol->name),
+				   frame->co_name + pidData->offsets.String_data);
+	return true;
+}
+
+struct bpf_elf_map SEC("maps") pidmap = {
+	.type = BPF_MAP_TYPE_HASH,
+	.size_key = sizeof(int),
+	.size_value = sizeof(PidData),
+	.max_elem = 1,
+};
+
+struct bpf_elf_map SEC("maps") eventmap = {
+	.type = BPF_MAP_TYPE_HASH,
+	.size_key = sizeof(int),
+	.size_value = sizeof(Event),
+	.max_elem = 1,
+};
+
+struct bpf_elf_map SEC("maps") symbolmap = {
+	.type = BPF_MAP_TYPE_HASH,
+	.size_key = sizeof(Symbol),
+	.size_value = sizeof(int),
+	.max_elem = 1,
+};
+
+struct bpf_elf_map SEC("maps") statsmap = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.size_key = sizeof(Stats),
+	.size_value = sizeof(int),
+	.max_elem = 1,
+};
+
+struct bpf_elf_map SEC("maps") perfmap = {
+	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+	.size_key = sizeof(int),
+	.size_value = sizeof(int),
+	.max_elem = 32,
+};
+
+struct bpf_elf_map SEC("maps") stackmap = {
+	.type = BPF_MAP_TYPE_STACK_TRACE,
+	.size_key = sizeof(int),
+	.size_value = sizeof(long long) * 127,
+	.max_elem = 1000,
+};
+
+static inline __attribute__((__always_inline__)) int __on_event(struct pt_regs *ctx)
+{
+	uint64_t pid_tgid = bpf_get_current_pid_tgid();
+	pid_t pid = (pid_t)(pid_tgid >> 32);
+	PidData* pidData = bpf_map_lookup_elem(&pidmap, &pid);
+	if (!pidData)
+		return 0;
+
+	int zero = 0;
+	Event* event = bpf_map_lookup_elem(&eventmap, &zero);
+	if (!event)
+		return 0;
+
+	event->pid = pid;
+
+	event->tid = (pid_t)pid_tgid;
+	bpf_get_current_comm(&event->comm, sizeof(event->comm));
+
+	event->user_stack_id = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
+	event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0);
+
+	void* thread_state_current = (void*)0;
+	bpf_probe_read(&thread_state_current,
+		       sizeof(thread_state_current),
+		       (void*)(long)pidData->current_state_addr);
+
+	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+	void* tls_base = (void*)task;
+
+	void* thread_state = pidData->use_tls ? get_thread_state(tls_base, pidData)
+		: thread_state_current;
+	event->thread_current = thread_state == thread_state_current;
+
+	if (pidData->use_tls) {
+		uint64_t pthread_created;
+		uint64_t pthread_self;
+		bpf_probe_read(&pthread_self, sizeof(pthread_self), tls_base + 0x10);
+
+		bpf_probe_read(&pthread_created,
+			       sizeof(pthread_created),
+			       thread_state + pidData->offsets.PyThreadState_thread);
+		event->pthread_match = pthread_created == pthread_self;
+	} else {
+		event->pthread_match = 1;
+	}
+
+	if (event->pthread_match || !pidData->use_tls) {
+		void* frame_ptr;
+		FrameData frame;
+		Symbol sym = {};
+		int cur_cpu = bpf_get_smp_processor_id();
+
+		bpf_probe_read(&frame_ptr,
+			       sizeof(frame_ptr),
+			       thread_state + pidData->offsets.PyThreadState_frame);
+
+		int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
+		if (symbol_counter == NULL)
+			return 0;
+#pragma unroll
+		/* Unwind python stack */
+		for (int i = 0; i < STACK_MAX_LEN; ++i) {
+			if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
+				int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
+				int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
+				if (!symbol_id) {
+					bpf_map_update_elem(&symbolmap, &sym, &zero, 0);
+					symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
+					if (!symbol_id)
+						return 0;
+				}
+				if (*symbol_id == new_symbol_id)
+					(*symbol_counter)++;
+				event->stack[i] = *symbol_id;
+				event->stack_len = i + 1;
+				frame_ptr = frame.f_back;
+			}
+		}
+		event->stack_complete = frame_ptr == NULL;
+	} else {
+		event->stack_complete = 1;
+	}
+
+	Stats* stats = bpf_map_lookup_elem(&statsmap, &zero);
+	if (stats)
+		stats->success++;
+
+	event->has_meta = 0;
+	bpf_perf_event_output(ctx, &perfmap, 0, event, offsetof(Event, metadata));
+	return 0;
+}
+
+SEC("raw_tracepoint/kfree_skb")
+int on_event(struct pt_regs* ctx)
+{
+	int i, ret = 0;
+	ret |= __on_event(ctx);
+	ret |= __on_event(ctx);
+	ret |= __on_event(ctx);
+	ret |= __on_event(ctx);
+	ret |= __on_event(ctx);
+	return ret;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/pyperf100.c b/tools/testing/selftests/bpf/progs/pyperf100.c
new file mode 100644
index 0000000000000000000000000000000000000000..29786325db54a53ff335f8f85ef883fce2b95c50
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf100.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#define STACK_MAX_LEN 100
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf180.c b/tools/testing/selftests/bpf/progs/pyperf180.c
new file mode 100644
index 0000000000000000000000000000000000000000..c39f559d3100e8c32a87509e1101fc30fc73f2e8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf180.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#define STACK_MAX_LEN 180
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf50.c b/tools/testing/selftests/bpf/progs/pyperf50.c
new file mode 100644
index 0000000000000000000000000000000000000000..ef7ce340a292d06f7626176a2fd48915df6d4051
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf50.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#define STACK_MAX_LEN 50
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 9a598f602acd2c3ceef3406d8fcf5dc96b7256e5..ab69a38ff8bd08bf237f5d6d75ecbdeccdc65831 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -27,6 +27,7 @@ typedef __u16 __sum16;
 #include <linux/unistd.h>
 #include <linux/if.h>
 #include <linux/if_tun.h>
+#include <linux/nbd.h>
 
 #include <sys/ioctl.h>
 #include <sys/wait.h>
@@ -2380,7 +2381,7 @@ static int libbpf_debug_print_verifier_scale(enum libbpf_print_level level,
 	return vfprintf(stderr, "%s", args);
 }
 
-static int check_load(const char *file)
+static int check_load(const char *file, enum bpf_prog_type type)
 {
 	struct bpf_prog_load_attr attr;
 	struct bpf_object *obj;
@@ -2388,7 +2389,7 @@ static int check_load(const char *file)
 
 	memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
 	attr.file = file;
-	attr.prog_type = BPF_PROG_TYPE_SCHED_CLS;
+	attr.prog_type = type;
 	attr.log_level = 4;
 	err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
 	bpf_object__close(obj);
@@ -2399,21 +2400,26 @@ static int check_load(const char *file)
 
 void test_bpf_verif_scale(void)
 {
-	const char *file1 = "./test_verif_scale1.o";
-	const char *file2 = "./test_verif_scale2.o";
-	const char *file3 = "./test_verif_scale3.o";
-	int err;
+	const char *scale[] = {
+		"./test_verif_scale1.o", "./test_verif_scale2.o", "./test_verif_scale3.o"
+	};
+	const char *pyperf[] = {
+		"./pyperf50.o",	"./pyperf100.o", "./pyperf180.o"
+	};
+	int err, i;
 
 	if (verifier_stats)
 		libbpf_set_print(libbpf_debug_print_verifier_scale);
 
-	err = check_load(file1);
-	err |= check_load(file2);
-	err |= check_load(file3);
-	if (!err)
-		printf("test_verif_scale:OK\n");
-	else
-		printf("test_verif_scale:FAIL\n");
+	for (i = 0; i < ARRAY_SIZE(scale); i++) {
+		err = check_load(scale[i], BPF_PROG_TYPE_SCHED_CLS);
+		printf("test_scale:%s:%s\n", scale[i], err ? "FAIL" : "OK");
+	}
+
+	for (i = 0; i < ARRAY_SIZE(pyperf); i++) {
+		err = check_load(pyperf[i], BPF_PROG_TYPE_RAW_TRACEPOINT);
+		printf("test_scale:%s:%s\n", pyperf[i], err ? "FAIL" : "OK");
+	}
 }
 
 static void test_global_data_number(struct bpf_object *obj, __u32 duration)
@@ -2617,6 +2623,120 @@ static void test_flow_dissector_load_bytes(void)
 		close(fd);
 }
 
+static void test_raw_tp_writable_reject_nbd_invalid(void)
+{
+	__u32 duration = 0;
+	char error[4096];
+	int bpf_fd = -1, tp_fd = -1;
+
+	const struct bpf_insn program[] = {
+		/* r6 is our tp buffer */
+		BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+		/* one byte beyond the end of the nbd_request struct */
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6,
+			    sizeof(struct nbd_request)),
+		BPF_EXIT_INSN(),
+	};
+
+	struct bpf_load_program_attr load_attr = {
+		.prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
+		.license = "GPL v2",
+		.insns = program,
+		.insns_cnt = sizeof(program) / sizeof(struct bpf_insn),
+		.log_level = 2,
+	};
+
+	bpf_fd = bpf_load_program_xattr(&load_attr, error, sizeof(error));
+	if (CHECK(bpf_fd < 0, "bpf_raw_tracepoint_writable load",
+		  "failed: %d errno %d\n", bpf_fd, errno))
+		return;
+
+	tp_fd = bpf_raw_tracepoint_open("nbd_send_request", bpf_fd);
+	if (CHECK(tp_fd >= 0, "bpf_raw_tracepoint_writable open",
+		  "erroneously succeeded\n"))
+		goto out_bpffd;
+
+	close(tp_fd);
+out_bpffd:
+	close(bpf_fd);
+}
+
+static void test_raw_tp_writable_test_run(void)
+{
+	__u32 duration = 0;
+	char error[4096];
+
+	const struct bpf_insn trace_program[] = {
+		BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0),
+		BPF_MOV64_IMM(BPF_REG_0, 42),
+		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+
+	struct bpf_load_program_attr load_attr = {
+		.prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
+		.license = "GPL v2",
+		.insns = trace_program,
+		.insns_cnt = sizeof(trace_program) / sizeof(struct bpf_insn),
+		.log_level = 2,
+	};
+
+	int bpf_fd = bpf_load_program_xattr(&load_attr, error, sizeof(error));
+	if (CHECK(bpf_fd < 0, "bpf_raw_tracepoint_writable loaded",
+		  "failed: %d errno %d\n", bpf_fd, errno))
+		return;
+
+	const struct bpf_insn skb_program[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+
+	struct bpf_load_program_attr skb_load_attr = {
+		.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
+		.license = "GPL v2",
+		.insns = skb_program,
+		.insns_cnt = sizeof(skb_program) / sizeof(struct bpf_insn),
+	};
+
+	int filter_fd =
+		bpf_load_program_xattr(&skb_load_attr, error, sizeof(error));
+	if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n",
+		  filter_fd, errno))
+		goto out_bpffd;
+
+	int tp_fd = bpf_raw_tracepoint_open("bpf_test_finish", bpf_fd);
+	if (CHECK(tp_fd < 0, "bpf_raw_tracepoint_writable opened",
+		  "failed: %d errno %d\n", tp_fd, errno))
+		goto out_filterfd;
+
+	char test_skb[128] = {
+		0,
+	};
+
+	__u32 prog_ret;
+	int err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0,
+				    0, &prog_ret, 0);
+	CHECK(err != 42, "test_run",
+	      "tracepoint did not modify return value\n");
+	CHECK(prog_ret != 0, "test_run_ret",
+	      "socket_filter did not return 0\n");
+
+	close(tp_fd);
+
+	err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0, 0,
+				&prog_ret, 0);
+	CHECK(err != 0, "test_run_notrace",
+	      "test_run failed with %d errno %d\n", err, errno);
+	CHECK(prog_ret != 0, "test_run_ret_notrace",
+	      "socket_filter did not return 0\n");
+
+out_filterfd:
+	close(filter_fd);
+out_bpffd:
+	close(bpf_fd);
+}
+
 int main(int ac, char **av)
 {
 	srand(time(NULL));
@@ -2655,6 +2775,8 @@ int main(int ac, char **av)
 	test_signal_pending(BPF_PROG_TYPE_FLOW_DISSECTOR);
 	test_bpf_verif_scale();
 	test_global_data();
+	test_raw_tp_writable_reject_nbd_invalid();
+	test_raw_tp_writable_test_run();
 
 	printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
 	return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index eeea520f23484b5350899eea5e255341db10f9da..3e0502af33078cf7fa19eb5abe8e41ba0f1dc5a3 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -210,33 +210,35 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self)
 	self->retval = (uint32_t)res;
 }
 
-/* test the sequence of 1k jumps */
+#define MAX_JMP_SEQ 8192
+
+/* test the sequence of 8k jumps */
 static void bpf_fill_scale1(struct bpf_test *self)
 {
 	struct bpf_insn *insn = self->fill_insns;
 	int i = 0, k = 0;
 
 	insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
-	/* test to check that the sequence of 1024 jumps is acceptable */
-	while (k++ < 1024) {
+	/* test to check that the long sequence of jumps is acceptable */
+	while (k++ < MAX_JMP_SEQ) {
 		insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 					 BPF_FUNC_get_prandom_u32);
-		insn[i++] = BPF_JMP_IMM(BPF_JGT, BPF_REG_0, bpf_semi_rand_get(), 2);
+		insn[i++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, bpf_semi_rand_get(), 2);
 		insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_10);
 		insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6,
 					-8 * (k % 64 + 1));
 	}
-	/* every jump adds 1024 steps to insn_processed, so to stay exactly
-	 * within 1m limit add MAX_TEST_INSNS - 1025 MOVs and 1 EXIT
+	/* every jump adds 1 step to insn_processed, so to stay exactly
+	 * within 1m limit add MAX_TEST_INSNS - MAX_JMP_SEQ - 1 MOVs and 1 EXIT
 	 */
-	while (i < MAX_TEST_INSNS - 1025)
+	while (i < MAX_TEST_INSNS - MAX_JMP_SEQ - 1)
 		insn[i++] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 42);
 	insn[i] = BPF_EXIT_INSN();
 	self->prog_len = i + 1;
 	self->retval = 42;
 }
 
-/* test the sequence of 1k jumps in inner most function (function depth 8)*/
+/* test the sequence of 8k jumps in inner most function (function depth 8)*/
 static void bpf_fill_scale2(struct bpf_test *self)
 {
 	struct bpf_insn *insn = self->fill_insns;
@@ -248,19 +250,20 @@ static void bpf_fill_scale2(struct bpf_test *self)
 		insn[i++] = BPF_EXIT_INSN();
 	}
 	insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
-	/* test to check that the sequence of 1024 jumps is acceptable */
-	while (k++ < 1024) {
+	/* test to check that the long sequence of jumps is acceptable */
+	k = 0;
+	while (k++ < MAX_JMP_SEQ) {
 		insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 					 BPF_FUNC_get_prandom_u32);
-		insn[i++] = BPF_JMP_IMM(BPF_JGT, BPF_REG_0, bpf_semi_rand_get(), 2);
+		insn[i++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, bpf_semi_rand_get(), 2);
 		insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_10);
 		insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6,
 					-8 * (k % (64 - 4 * FUNC_NEST) + 1));
 	}
-	/* every jump adds 1024 steps to insn_processed, so to stay exactly
-	 * within 1m limit add MAX_TEST_INSNS - 1025 MOVs and 1 EXIT
+	/* every jump adds 1 step to insn_processed, so to stay exactly
+	 * within 1m limit add MAX_TEST_INSNS - MAX_JMP_SEQ - 1 MOVs and 1 EXIT
 	 */
-	while (i < MAX_TEST_INSNS - 1025)
+	while (i < MAX_TEST_INSNS - MAX_JMP_SEQ - 1)
 		insn[i++] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 42);
 	insn[i] = BPF_EXIT_INSN();
 	self->prog_len = i + 1;
@@ -16978,6 +16981,40 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 		.retval = 2,
 	},
+	{
+		"raw_tracepoint_writable: reject variable offset",
+		.insns = {
+			/* r6 is our tp buffer */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			/* move the key (== 0) to r10-8 */
+			BPF_MOV32_IMM(BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+			/* lookup in the map */
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+					BPF_FUNC_map_lookup_elem),
+
+			/* exit clean if null */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+			BPF_EXIT_INSN(),
+
+			/* shift the buffer pointer to a variable location */
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_0),
+			/* clobber whatever's there */
+			BPF_MOV64_IMM(BPF_REG_7, 4242),
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, 0),
+
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_hash_8b = { 1, },
+		.prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
+		.errstr = "R6 invalid variable buffer offset: off=0, var_off=(0x0; 0xffffffff)",
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)