From 24eb5867db2640ee66e3354a9a1f01f964f5967a Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 28 Jan 2019 09:21:15 -0800
Subject: [PATCH 01/23] selftests/bpf: skip sockmap in test_maps if kernel
 doesn't have support

ANBZ: #5530

commit e8ddbfb4bcb2254acb71f2ad9008d736f45273a7 upstream.

Use recently introduced bpf_probe_map_type() to skip test_sockmap()
if map creation fails. The skipped test is indicated in the output.

Example:

test_sockmap SKIP (unsupported map type BPF_MAP_TYPE_SOCKMAP)
Fork 1024 tasks to 'test_update_delete'
...
test_sockmap SKIP (unsupported map type BPF_MAP_TYPE_SOCKMAP)
Fork 1024 tasks to 'test_update_delete'
...
test_maps: OK, 2 SKIPPED

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Yuanhe Shu <xiangzao@linux.alibaba.com>
---
 tools/testing/selftests/bpf/test_maps.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index d2217a50e0a9..245c5bed2c75 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -32,6 +32,8 @@
 #define ENOTSUPP 524
 #endif
 
+static int skips;
+
 static int map_flags;
 
 #define CHECK(condition, tag, format...) ({				\
@@ -725,6 +727,15 @@ static void test_sockmap(int tasks, void *data)
 			    sizeof(key), sizeof(value),
 			    6, 0);
 	if (fd < 0) {
+		if (!bpf_probe_map_type(BPF_MAP_TYPE_SOCKMAP, 0)) {
+			printf("%s SKIP (unsupported map type BPF_MAP_TYPE_SOCKMAP)\n",
+			       __func__);
+			skips++;
+			for (i = 0; i < 6; i++)
+				close(sfd[i]);
+			return;
+		}
+
 		printf("Failed to create sockmap %i\n", fd);
 		goto out_sockmap;
 	}
@@ -1702,6 +1713,6 @@ int main(void)
 	map_flags = BPF_F_NO_PREALLOC;
 	run_all_tests();
 
-	printf("test_maps: OK\n");
+	printf("test_maps: OK, %d SKIPPED\n", skips);
 	return 0;
 }
-- 
Gitee


From d45725e48a58610c9e9f8d79336107cd039b4b41 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 28 Jan 2019 09:21:16 -0800
Subject: [PATCH 02/23] selftests/bpf: skip verifier tests for unsupported
 program types

ANBZ: #5530

commit 8184d44c9a577a2f1842ed6cc844bfd4a9981d8e upstream.

Use recently introduced bpf_probe_prog_type() to skip tests in the
test_verifier() if bpf_verify_program() fails. The skipped test is
indicated in the output.

Example:

...
679/p bpf_get_stack return R0 within range SKIP (unsupported program
type 5)
680/p ld_abs: invalid op 1 OK
...
Summary: 863 PASSED, 165 SKIPPED, 3 FAILED

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Yuanhe Shu <xiangzao@linux.alibaba.com>
---
 tools/testing/selftests/bpf/test_verifier.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 0b40a0f305f5..42927fab4103 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -33,6 +33,7 @@
 #include <linux/if_ether.h>
 
 #include <bpf/bpf.h>
+#include <bpf/libbpf.h>
 
 #ifdef HAVE_GENHDR
 # include "autoconf.h"
@@ -58,6 +59,7 @@
 
 #define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled"
 static bool unpriv_disabled = false;
+static int skips;
 
 struct bpf_test {
 	const char *descr;
@@ -15413,6 +15415,11 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 		pflags |= BPF_F_ANY_ALIGNMENT;
 	fd_prog = bpf_verify_program(prog_type, prog, prog_len, pflags,
 				     "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1);
+	if (fd_prog < 0 && !bpf_probe_prog_type(prog_type, 0)) {
+		printf("SKIP (unsupported program type %d)\n", prog_type);
+		skips++;
+		goto close_fds;
+	}
 
 	expected_ret = unpriv && test->result_unpriv != UNDEF ?
 		       test->result_unpriv : test->result;
@@ -15566,7 +15573,7 @@ static bool test_as_unpriv(struct bpf_test *test)
 
 static int do_test(bool unpriv, unsigned int from, unsigned int to)
 {
-	int i, passes = 0, errors = 0, skips = 0;
+	int i, passes = 0, errors = 0;
 
 	for (i = from; i < to; i++) {
 		struct bpf_test *test = &tests[i];
-- 
Gitee


From 4be3ca1548d364071590f86e85e9bda9f15f4c67 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 28 Jan 2019 09:21:17 -0800
Subject: [PATCH 03/23] selftests/bpf: skip verifier tests for unsupported map
 types

ANBZ: #5530

commit 9acea337ef570faa14f13232d1b2fa6615aaf569 upstream.

Use recently introduced bpf_probe_map_type() to skip tests in the
test_verifier if map creation (create_map) fails. It's handled
explicitly for each fixup, i.e. if bpf_create_map returns negative fd,
we probe the kernel for the appropriate map support and skip the
test is map type is not supported.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Yuanhe Shu <xiangzao@linux.alibaba.com>
---
 tools/testing/selftests/bpf/test_verifier.c | 36 +++++++++++++++++++--
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 42927fab4103..6a4bc2aca10c 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -15091,6 +15091,16 @@ static int probe_filter_length(const struct bpf_insn *fp)
 	return len + 1;
 }
 
+static bool skip_unsupported_map(enum bpf_map_type map_type)
+{
+	if (!bpf_probe_map_type(map_type, 0)) {
+		printf("SKIP (unsupported map type %d)\n", map_type);
+		skips++;
+		return true;
+	}
+	return false;
+}
+
 static int create_map(uint32_t type, uint32_t size_key,
 		      uint32_t size_value, uint32_t max_elem)
 {
@@ -15098,8 +15108,11 @@ static int create_map(uint32_t type, uint32_t size_key,
 
 	fd = bpf_create_map(type, size_key, size_value, max_elem,
 			    type == BPF_MAP_TYPE_HASH ? BPF_F_NO_PREALLOC : 0);
-	if (fd < 0)
+	if (fd < 0) {
+		if (skip_unsupported_map(type))
+			return -1;
 		printf("Failed to create hash map '%s'!\n", strerror(errno));
+	}
 
 	return fd;
 }
@@ -15139,6 +15152,8 @@ static int create_prog_array(enum bpf_map_type prog_type, uint32_t max_elem,
 	mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int),
 			     sizeof(int), max_elem, 0);
 	if (mfd < 0) {
+		if (skip_unsupported_map(BPF_MAP_TYPE_PROG_ARRAY))
+			return -1;
 		printf("Failed to create prog array '%s'!\n", strerror(errno));
 		return -1;
 	}
@@ -15169,15 +15184,20 @@ static int create_map_in_map(void)
 	inner_map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
 				      sizeof(int), 1, 0);
 	if (inner_map_fd < 0) {
+		if (skip_unsupported_map(BPF_MAP_TYPE_ARRAY))
+			return -1;
 		printf("Failed to create array '%s'!\n", strerror(errno));
 		return inner_map_fd;
 	}
 
 	outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL,
 					     sizeof(int), inner_map_fd, 1, 0);
-	if (outer_map_fd < 0)
+	if (outer_map_fd < 0) {
+		if (skip_unsupported_map(BPF_MAP_TYPE_ARRAY_OF_MAPS))
+			return -1;
 		printf("Failed to create array of maps '%s'!\n",
 		       strerror(errno));
+	}
 
 	close(inner_map_fd);
 
@@ -15192,9 +15212,12 @@ static int create_cgroup_storage(bool percpu)
 
 	fd = bpf_create_map(type, sizeof(struct bpf_cgroup_storage_key),
 			    TEST_DATA_LEN, 0, 0);
-	if (fd < 0)
+	if (fd < 0) {
+		if (skip_unsupported_map(type))
+			return -1;
 		printf("Failed to create cgroup storage '%s'!\n",
 		       strerror(errno));
+	}
 
 	return fd;
 }
@@ -15397,6 +15420,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	int run_errs, run_successes;
 	int map_fds[MAX_NR_MAPS];
 	const char *expected_err;
+	int fixup_skips;
 	__u32 pflags;
 	int i, err;
 
@@ -15405,7 +15429,13 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 
 	if (!prog_type)
 		prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+	fixup_skips = skips;
 	do_test_fixup(test, prog_type, prog, map_fds);
+	/* If there were some map skips during fixup due to missing bpf
+	 * features, skip this test.
+	 */
+	if (fixup_skips != skips)
+		return;
 	prog_len = probe_filter_length(prog);
 
 	pflags = 0;
-- 
Gitee


From 72729c329bf91a55b232d7660575ab4b70748195 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 28 Jan 2019 09:21:18 -0800
Subject: [PATCH 04/23] selftests/bpf: mark verifier test that uses
 bpf_trace_printk as BPF_PROG_TYPE_TRACEPOINT

ANBZ: #5530

commit cfff578ed51ca4ad4c66c8d4bd78694096ea5515 upstream.

We don't have this helper if the kernel was compiled without
CONFIG_BPF_EVENTS. Setting prog_type to BPF_PROG_TYPE_TRACEPOINT
let's verifier correctly skip this test based on the missing
prog_type support in the kernel.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Yuanhe Shu <xiangzao@linux.alibaba.com>
---
 tools/testing/selftests/bpf/test_verifier.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 6a4bc2aca10c..907560a849c9 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -2678,6 +2678,7 @@ static struct bpf_test tests[] = {
 		.errstr_unpriv = "unknown func bpf_trace_printk#6",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
 	{
 		"unpriv: pass pointer to helper function",
-- 
Gitee


From f60ff4cc7b73890bfd7ae6387d4419852a4f7f12 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 28 Jan 2019 09:21:19 -0800
Subject: [PATCH 05/23] bpf: BPF_PROG_TYPE_CGROUP_{SKB, SOCK, SOCK_ADDR}
 require cgroups enabled

ANBZ: #5530

commit befa618112a0a4590ce21d70aa35c9d341337774 upstream.

There is no way to exercise appropriate attach points without cgroups
enabled. This lets test_verifier correctly skip tests for these
prog_types if kernel was compiled without BPF cgroup support.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Yuanhe Shu <xiangzao@linux.alibaba.com>
---
 include/linux/bpf_types.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 44d9ab4809bd..08bf2f1fe553 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -6,9 +6,11 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act)
 BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp)
+#ifdef CONFIG_CGROUP_BPF
 BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb)
 BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock)
 BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr)
+#endif
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit)
-- 
Gitee


From c361d8b8a2117b67566437adf0a1cc91bd500340 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 28 Jan 2019 10:29:15 -0800
Subject: [PATCH 06/23] tools: bpftool: warn about risky prog array updates

ANBZ: #5530

commit d76198b0bc5a67c14c5209bac82481399a3387da upstream.

When prog array is updated with bpftool users often refer
to the map via the ID.  Unfortunately, that's likely
to lead to confusion because prog arrays get flushed when
the last user reference is gone.  If there is no other
reference bpftool will create one, update successfully
just to close the map again and have it flushed.

Warn about this case in non-JSON mode.

If the problem continues causing confusion we can remove
the support for referring to a map by ID for prog array
update completely.  For now it seems like the potential
inconvenience to users who know what they're doing outweighs
the benefit.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Yuanhe Shu <xiangzao@linux.alibaba.com>
---
 tools/bpf/bpftool/map.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 57db75ca12dd..3d3adf197f12 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -411,6 +411,9 @@ static int parse_elem(char **argv, struct bpf_map_info *info,
 				p_err("not enough value arguments for map of progs");
 				return -1;
 			}
+			if (is_prefix(*argv, "id"))
+				p_info("Warning: updating program array via MAP_ID, make sure this map is kept open\n"
+				       "         by some process or pinned otherwise update will be lost");
 
 			fd = prog_parse_fd(&argc, &argv);
 			if (fd < 0)
-- 
Gitee


From 9a49010bc4a3e1ead74372d0086d70df662aff2f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 28 Jan 2019 17:21:52 -0800
Subject: [PATCH 07/23] bpf: check that BPF programs run with preemption
 disabled

ANBZ: #5530

commit 568f196756ad9fe2b49c46bbf6a9de1b190438b4 upstream.

Introduce cant_sleep() macro for annotation of functions that
cannot sleep.

Use it in BPF_PROG_RUN to catch execution of BPF programs in
preemptable context.

Suggested-by: Jann Horn <jannh@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Yuanhe Shu <xiangzao@linux.alibaba.com>
---
 include/linux/filter.h |  2 +-
 include/linux/kernel.h | 14 ++++++++++++--
 kernel/sched/core.c    | 28 ++++++++++++++++++++++++++++
 3 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index fff5ca4fc7c5..1ffc13890be8 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -563,7 +563,7 @@ struct sk_filter {
 	struct bpf_prog	*prog;
 };
 
-#define BPF_PROG_RUN(filter, ctx)  (*(filter)->bpf_func)(ctx, (filter)->insnsi)
+#define BPF_PROG_RUN(filter, ctx)  ({ cant_sleep(); (*(filter)->bpf_func)(ctx, (filter)->insnsi); })
 
 #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
 
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index fbf320de83f2..649b210226fa 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -246,8 +246,10 @@ extern int _cond_resched(void);
 #endif
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-  void ___might_sleep(const char *file, int line, int preempt_offset);
-  void __might_sleep(const char *file, int line, int preempt_offset);
+extern void ___might_sleep(const char *file, int line, int preempt_offset);
+extern void __might_sleep(const char *file, int line, int preempt_offset);
+extern void __cant_sleep(const char *file, int line, int preempt_offset);
+
 /**
  * might_sleep - annotation for functions that can sleep
  *
@@ -260,6 +262,13 @@ extern int _cond_resched(void);
  */
 # define might_sleep() \
 	do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
+/**
+ * cant_sleep - annotation for functions that cannot sleep
+ *
+ * this macro will print a stack trace if it is executed with preemption enabled
+ */
+# define cant_sleep() \
+	do { __cant_sleep(__FILE__, __LINE__, 0); } while (0)
 # define sched_annotate_sleep()	(current->task_state_change = 0)
 #else
   static inline void ___might_sleep(const char *file, int line,
@@ -267,6 +276,7 @@ extern int _cond_resched(void);
   static inline void __might_sleep(const char *file, int line,
 				   int preempt_offset) { }
 # define might_sleep() do { might_resched(); } while (0)
+# define cant_sleep() do { } while (0)
 # define sched_annotate_sleep() do { } while (0)
 #endif
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 83d4ada9c974..f600ea79ff97 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6235,6 +6235,34 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
 	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
 }
 EXPORT_SYMBOL(___might_sleep);
+
+void __cant_sleep(const char *file, int line, int preempt_offset)
+{
+	static unsigned long prev_jiffy;
+
+	if (irqs_disabled())
+		return;
+
+	if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
+		return;
+
+	if (preempt_count() > preempt_offset)
+		return;
+
+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
+		return;
+	prev_jiffy = jiffies;
+
+	printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line);
+	printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
+			in_atomic(), irqs_disabled(),
+			current->pid, current->comm);
+
+	debug_show_held_locks(current);
+	dump_stack();
+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
+}
+EXPORT_SYMBOL_GPL(__cant_sleep);
 #endif
 
 #ifdef CONFIG_MAGIC_SYSRQ
-- 
Gitee


From 9d915fab9c3db91a262865192ea3e61360089cb2 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 21 Feb 2019 10:40:14 -0800
Subject: [PATCH 08/23] seccomp, bpf: disable preemption before calling into
 bpf prog

ANBZ: #5530

commit e80d02dd763093f70c3000ef34253a6d426becf6 upstream.

All BPF programs must be called with preemption disabled.

Fixes: 568f196756ad ("bpf: check that BPF programs run with preemption disabled")
Reported-by: syzbot+8bf19ee2aa580de7a2a7@syzkaller.appspotmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Yuanhe Shu <xiangzao@linux.alibaba.com>
---
 kernel/seccomp.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index fd023ac24e10..859c3108e440 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -207,6 +207,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd,
 	 * All filters in the list are evaluated and the lowest BPF return
 	 * value always takes priority (ignoring the DATA).
 	 */
+	preempt_disable();
 	for (; f; f = f->prev) {
 		u32 cur_ret = BPF_PROG_RUN(f->prog, sd);
 
@@ -215,6 +216,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd,
 			*match = f;
 		}
 	}
+	preempt_enable();
 	return ret;
 }
 #endif /* CONFIG_SECCOMP_FILTER */
-- 
Gitee


From da25da1f0663ae791c6b4800034b2ce2e8d8fc9b Mon Sep 17 00:00:00 2001
From: Anders Roxell <anders.roxell@linaro.org>
Date: Fri, 22 Feb 2019 09:46:52 +0100
Subject: [PATCH 09/23] bpf: test_bpf: turn off preemption in function
 __run_once

ANBZ: #5530

commit fd92d6648feb83888e40f0aa2f74e1d9bbd2ad7d upstream.

When running BPF test suite the following splat occurs:

[  415.930950] test_bpf: #0 TAX jited:0
[  415.931067] BUG: assuming atomic context at lib/test_bpf.c:6674
[  415.946169] in_atomic(): 0, irqs_disabled(): 0, pid: 11556, name: modprobe
[  415.953176] INFO: lockdep is turned off.
[  415.957207] CPU: 1 PID: 11556 Comm: modprobe Tainted: G        W         5.0.0-rc7-next-20190220 #1
[  415.966328] Hardware name: HiKey Development Board (DT)
[  415.971592] Call trace:
[  415.974069]  dump_backtrace+0x0/0x160
[  415.977761]  show_stack+0x24/0x30
[  415.981104]  dump_stack+0xc8/0x114
[  415.984534]  __cant_sleep+0xf0/0x108
[  415.988145]  test_bpf_init+0x5e0/0x1000 [test_bpf]
[  415.992971]  do_one_initcall+0x90/0x428
[  415.996837]  do_init_module+0x60/0x1e4
[  416.000614]  load_module+0x1de0/0x1f50
[  416.004391]  __se_sys_finit_module+0xc8/0xe0
[  416.008691]  __arm64_sys_finit_module+0x24/0x30
[  416.013255]  el0_svc_common+0x78/0x130
[  416.017031]  el0_svc_handler+0x38/0x78
[  416.020806]  el0_svc+0x8/0xc

Rework so that preemption is disabled when we loop over function
'BPF_PROG_RUN(...)'.

Fixes: 568f196756ad ("bpf: check that BPF programs run with preemption disabled")
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Yuanhe Shu <xiangzao@linux.alibaba.com>
---
 lib/test_bpf.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index aa22bcaec1dc..bb80a917fc34 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -6666,12 +6666,14 @@ static int __run_one(const struct bpf_prog *fp, const void *data,
 	u64 start, finish;
 	int ret = 0, i;
 
+	preempt_disable();
 	start = ktime_get_ns();
 
 	for (i = 0; i < runs; i++)
 		ret = BPF_PROG_RUN(fp, data);
 
 	finish = ktime_get_ns();
+	preempt_enable();
 
 	*duration = finish - start;
 	do_div(*duration, runs);
-- 
Gitee


From 67074f7fe0d0fade50bccc770ef969c43e0edfc8 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Mon, 28 Jan 2019 18:43:34 -0800
Subject: [PATCH 10/23] bpf: run bpf programs with preemption disabled

ANBZ: #5530

commit 6cab5e90ab2bd323c9f3811b6c70a4687df51e27 upstream.

Disabled preemption is necessary for proper access to per-cpu maps
from BPF programs.

But the sender side of socket filters didn't have preemption disabled:
unix_dgram_sendmsg->sk_filter->sk_filter_trim_cap->bpf_prog_run_save_cb->BPF_PROG_RUN

and a combination of af_packet with tun device didn't disable either:
tpacket_snd->packet_direct_xmit->packet_pick_tx_queue->ndo_select_queue->
  tun_select_queue->tun_ebpf_select_queue->bpf_prog_run_clear_cb->BPF_PROG_RUN

Disable preemption before executing BPF programs (both classic and extended).

Reported-by: Jann Horn <jannh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Yuanhe Shu <xiangzao@linux.alibaba.com>
---
 include/linux/filter.h | 21 ++++++++++++++++++---
 kernel/bpf/cgroup.c    |  2 +-
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1ffc13890be8..dc3c1044f247 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -642,8 +642,8 @@ static inline u8 *bpf_skb_cb(struct sk_buff *skb)
 	return qdisc_skb_cb(skb)->data;
 }
 
-static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog,
-				       struct sk_buff *skb)
+static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog,
+					 struct sk_buff *skb)
 {
 	u8 *cb_data = bpf_skb_cb(skb);
 	u8 cb_saved[BPF_SKB_CB_LEN];
@@ -662,15 +662,30 @@ static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog,
 	return res;
 }
 
+static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog,
+				       struct sk_buff *skb)
+{
+	u32 res;
+
+	preempt_disable();
+	res = __bpf_prog_run_save_cb(prog, skb);
+	preempt_enable();
+	return res;
+}
+
 static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
 					struct sk_buff *skb)
 {
 	u8 *cb_data = bpf_skb_cb(skb);
+	u32 res;
 
 	if (unlikely(prog->cb_access))
 		memset(cb_data, 0, BPF_SKB_CB_LEN);
 
-	return BPF_PROG_RUN(prog, skb);
+	preempt_disable();
+	res = BPF_PROG_RUN(prog, skb);
+	preempt_enable();
+	return res;
 }
 
 static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index ce4e26bb2cd0..075fca513091 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -572,7 +572,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
 	bpf_compute_and_save_data_end(skb, &saved_data_end);
 
 	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
-				 bpf_prog_run_save_cb);
+				 __bpf_prog_run_save_cb);
 	bpf_restore_data_end(skb, saved_data_end);
 	__skb_pull(skb, offset);
 	skb->sk = save_sk;
-- 
Gitee


From 2509f786713fe09153ca0f671a15546c1cedd64e Mon Sep 17 00:00:00 2001
From: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Date: Mon, 28 Jan 2019 23:04:46 -0500
Subject: [PATCH 11/23] bpf: fix bitrotted kerneldoc

ANBZ: #5530

commit de1da68d9c9d230e448b9fa0abff139a0468fb96 upstream.

Over the years, the function signature has changed, but the
kerneldoc block hasn't.

Signed-off-by: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Yuanhe Shu <xiangzao@linux.alibaba.com>
---
 kernel/bpf/core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 87f153ee5118..faab7d3054fa 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1260,8 +1260,9 @@ bool bpf_opcode_in_insntable(u8 code)
 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
 /**
  *	__bpf_prog_run - run eBPF program on a given context
- *	@ctx: is the data we are operating on
+ *	@regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
  *	@insn: is the array of eBPF instructions
+ *	@stack: is the eBPF storage stack
  *
  * Decode and execute eBPF instructions.
  */
-- 
Gitee


From 83d9eeb0b88a05f53a71638cfd9e1d3e38531f6c Mon Sep 17 00:00:00 2001
From: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Date: Tue, 29 Jan 2019 01:47:06 -0500
Subject: [PATCH 12/23] bpf, cgroups: clean up kerneldoc warnings

ANBZ: #5530

commit 1832f4ef5867fd3898d8a6c6c1978b75d76fc246 upstream.

Building with W=1 reveals some bitrot:

  CC      kernel/bpf/cgroup.o
kernel/bpf/cgroup.c:238: warning: Function parameter or member 'flags' not described in '__cgroup_bpf_attach'
kernel/bpf/cgroup.c:367: warning: Function parameter or member 'unused_flags' not described in '__cgroup_bpf_detach'

Add a kerneldoc line for 'flags'.

Fixing the warning for 'unused_flags' is best approached by
removing the unused parameter on the function call.

Signed-off-by: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Yuanhe Shu <xiangzao@linux.alibaba.com>
---
 include/linux/bpf-cgroup.h | 2 +-
 kernel/bpf/cgroup.c        | 3 ++-
 kernel/cgroup/cgroup.c     | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 30c9d0247e7f..ada0246d43ab 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -78,7 +78,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp);
 int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
 			enum bpf_attach_type type, u32 flags);
 int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
-			enum bpf_attach_type type, u32 flags);
+			enum bpf_attach_type type);
 int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
 		       union bpf_attr __user *uattr);
 
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 075fca513091..43a91d4fcdb3 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -230,6 +230,7 @@ static int update_effective_progs(struct cgroup *cgrp,
  * @cgrp: The cgroup which descendants to traverse
  * @prog: A program to attach
  * @type: Type of attach operation
+ * @flags: Option flags
  *
  * Must be called with cgroup_mutex held.
  */
@@ -363,7 +364,7 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
  * Must be called with cgroup_mutex held.
  */
 int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
-			enum bpf_attach_type type, u32 unused_flags)
+			enum bpf_attach_type type)
 {
 	struct list_head *progs = &cgrp->bpf.progs[type];
 	enum bpf_cgroup_storage_type stype;
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index c429a4b61429..fc05f4b9debe 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -6444,7 +6444,7 @@ int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
 	int ret;
 
 	mutex_lock(&cgroup_mutex);
-	ret = __cgroup_bpf_detach(cgrp, prog, type, flags);
+	ret = __cgroup_bpf_detach(cgrp, prog, type);
 	mutex_unlock(&cgroup_mutex);
 	return ret;
 }
-- 
Gitee


From 65da820b52949f2a5650fe4caff91e53dfa7198e Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 31 Jan 2019 15:40:04 -0800
Subject: [PATCH 13/23] bpf: introduce bpf_spin_lock

ANBZ: #5530

commit d83525ca62cf8ebe3271d14c36fb900c294274a2 upstream.

Introduce 'struct bpf_spin_lock' and bpf_spin_lock/unlock() helpers to let
bpf program serialize access to other variables.

Example:
struct hash_elem {
    int cnt;
    struct bpf_spin_lock lock;
};
struct hash_elem * val = bpf_map_lookup_elem(&hash_map, &key);
if (val) {
    bpf_spin_lock(&val->lock);
    val->cnt++;
    bpf_spin_unlock(&val->lock);
}

Restrictions and safety checks:
- bpf_spin_lock is only allowed inside HASH and ARRAY maps.
- BTF description of the map is mandatory for safety analysis.
- bpf program can take one bpf_spin_lock at a time, since two or more can
  cause dead locks.
- only one 'struct bpf_spin_lock' is allowed per map element.
  It drastically simplifies implementation yet allows bpf program to use
  any number of bpf_spin_locks.
- when bpf_spin_lock is taken the calls (either bpf2bpf or helpers) are not allowed.
- bpf program must bpf_spin_unlock() before return.
- bpf program can access 'struct bpf_spin_lock' only via
  bpf_spin_lock()/bpf_spin_unlock() helpers.
- load/store into 'struct bpf_spin_lock lock;' field is not allowed.
- to use bpf_spin_lock() helper the BTF description of map value must be
  a struct and have 'struct bpf_spin_lock anyname;' field at the top level.
  Nested lock inside another struct is not allowed.
- syscall map_lookup doesn't copy bpf_spin_lock field to user space.
- syscall map_update and program map_update do not update bpf_spin_lock field.
- bpf_spin_lock cannot be on the stack or inside networking packet.
  bpf_spin_lock can only be inside HASH or ARRAY map value.
- bpf_spin_lock is available to root only and to all program types.
- bpf_spin_lock is not allowed in inner maps of map-in-map.
- ld_abs is not allowed inside spin_lock-ed region.
- tracing progs and socket filter progs cannot use bpf_spin_lock due to
  insufficient preemption checks

Implementation details:
- cgroup-bpf class of programs can nest with xdp/tc programs.
  Hence bpf_spin_lock is equivalent to spin_lock_irqsave.
  Other solutions to avoid nested bpf_spin_lock are possible.
  Like making sure that all networking progs run with softirq disabled.
  spin_lock_irqsave is the simplest and doesn't add overhead to the
  programs that don't use it.
- arch_spinlock_t is used when its implemented as queued_spin_lock
- archs can force their own arch_spinlock_t
- on architectures where queued_spin_lock is not available and
  sizeof(arch_spinlock_t) != sizeof(__u32) trivial lock is used.
- presence of bpf_spin_lock inside map value could have been indicated via
  extra flag during map_create, but specifying it via BTF is cleaner.
  It provides introspection for map key/value and reduces user mistakes.

Next steps:
- allow bpf_spin_lock in other map types (like cgroup local storage)
- introduce BPF_F_LOCK flag for bpf_map_update() syscall and helper
  to request kernel to grab bpf_spin_lock before rewriting the value.
  That will serialize access to map elements.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Yuanhe Shu <xiangzao@linux.alibaba.com>
---
 include/linux/bpf.h          |  37 +++++++-
 include/linux/bpf_verifier.h |   1 +
 include/linux/btf.h          |   1 +
 include/uapi/linux/bpf.h     |   7 +-
 kernel/Kconfig.locks         |   3 +
 kernel/bpf/arraymap.c        |   7 +-
 kernel/bpf/btf.c             |  42 +++++++++
 kernel/bpf/core.c            |   2 +
 kernel/bpf/hashtab.c         |  21 +++--
 kernel/bpf/helpers.c         |  80 +++++++++++++++++
 kernel/bpf/map_in_map.c      |   5 ++
 kernel/bpf/syscall.c         |  21 ++++-
 kernel/bpf/verifier.c        | 169 ++++++++++++++++++++++++++++++++++-
 net/core/filter.c            |  16 +++-
 14 files changed, 386 insertions(+), 26 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 71ac8c62bc6a..4013b1a77dda 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -73,14 +73,15 @@ struct bpf_map {
 	u32 value_size;
 	u32 max_entries;
 	u32 map_flags;
-	u32 pages;
+	int spin_lock_off; /* >=0 valid offset, <0 error */
 	u32 id;
 	int numa_node;
 	u32 btf_key_type_id;
 	u32 btf_value_type_id;
 	struct btf *btf;
+	u32 pages;
 	bool unpriv_array;
-	/* 55 bytes hole */
+	/* 51 bytes hole */
 
 	/* The 3rd and 4th cacheline with misc members to avoid false sharing
 	 * particularly with refcounting.
@@ -97,6 +98,34 @@ struct bpf_map {
 	CK_HOTFIX_RESERVE(4)
 };
 
+static inline bool map_value_has_spin_lock(const struct bpf_map *map)
+{
+	return map->spin_lock_off >= 0;
+}
+
+static inline void check_and_init_map_lock(struct bpf_map *map, void *dst)
+{
+	if (likely(!map_value_has_spin_lock(map)))
+		return;
+	*(struct bpf_spin_lock *)(dst + map->spin_lock_off) =
+		(struct bpf_spin_lock){};
+}
+
+/* copy everything but bpf_spin_lock */
+static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
+{
+	if (unlikely(map_value_has_spin_lock(map))) {
+		u32 off = map->spin_lock_off;
+
+		memcpy(dst, src, off);
+		memcpy(dst + off + sizeof(struct bpf_spin_lock),
+		       src + off + sizeof(struct bpf_spin_lock),
+		       map->value_size - off - sizeof(struct bpf_spin_lock));
+	} else {
+		memcpy(dst, src, map->value_size);
+	}
+}
+
 struct bpf_offload_dev;
 struct bpf_offloaded_map;
 
@@ -168,6 +197,7 @@ enum bpf_arg_type {
 	ARG_PTR_TO_CTX,		/* pointer to context */
 	ARG_ANYTHING,		/* any (initialized) argument is ok */
 	ARG_PTR_TO_SOCKET,	/* pointer to bpf_sock */
+	ARG_PTR_TO_SPIN_LOCK,	/* pointer to bpf_spin_lock */
 };
 
 /* type of values returned from helper functions */
@@ -902,7 +932,8 @@ extern const struct bpf_func_proto bpf_msg_redirect_hash_proto;
 extern const struct bpf_func_proto bpf_msg_redirect_map_proto;
 extern const struct bpf_func_proto bpf_sk_redirect_hash_proto;
 extern const struct bpf_func_proto bpf_sk_redirect_map_proto;
-
+extern const struct bpf_func_proto bpf_spin_lock_proto;
+extern const struct bpf_func_proto bpf_spin_unlock_proto;
 extern const struct bpf_func_proto bpf_get_local_storage_proto;
 
 /* Shared helpers among cBPF and eBPF. */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index fe5278d9414e..ae95cd389265 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -148,6 +148,7 @@ struct bpf_verifier_state {
 	/* call stack tracking */
 	struct bpf_func_state *frame[MAX_CALL_FRAMES];
 	u32 curframe;
+	u32 active_spin_lock;
 	bool speculative;
 };
 
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 12502e25e767..455d31b55828 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -50,6 +50,7 @@ u32 btf_id(const struct btf *btf);
 bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
 			   const struct btf_member *m,
 			   u32 expected_offset, u32 expected_size);
+int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
 
 #ifdef CONFIG_BPF_SYSCALL
 const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 277f8cd2e6a6..0f7342e8e56d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2427,7 +2427,9 @@ union bpf_attr {
 	FN(map_peek_elem),		\
 	FN(msg_push_data),		\
 	FN(msg_pop_data),		\
-	FN(rc_pointer_rel),
+	FN(rc_pointer_rel),		\
+	FN(spin_lock),			\
+	FN(spin_unlock),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -3062,4 +3064,7 @@ struct bpf_line_info {
 	__u32	line_col;
 };
 
+struct bpf_spin_lock {
+	__u32	val;
+};
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index 84d882f3e299..fbba478ae522 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -242,6 +242,9 @@ config QUEUED_SPINLOCKS
 	def_bool y if ARCH_USE_QUEUED_SPINLOCKS
 	depends on SMP
 
+config BPF_ARCH_SPINLOCK
+	bool
+
 config ARCH_USE_QUEUED_RWLOCKS
 	bool
 
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 9d7d7671d5a2..7d78c0d0b031 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -270,9 +270,10 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
 		memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
 		       value, map->value_size);
 	else
-		memcpy(array->value +
-		       array->elem_size * (index & array->index_mask),
-		       value, map->value_size);
+		copy_map_value(map,
+			       array->value +
+			       array->elem_size * (index & array->index_mask),
+			       value);
 	return 0;
 }
 
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 4ee0b8074c12..c8b51dac8c45 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -355,6 +355,11 @@ static bool btf_type_is_struct(const struct btf_type *t)
 	return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
 }
 
+static bool __btf_type_is_struct(const struct btf_type *t)
+{
+	return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT;
+}
+
 static bool btf_type_is_array(const struct btf_type *t)
 {
 	return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
@@ -2046,6 +2051,43 @@ static void btf_struct_log(struct btf_verifier_env *env,
 	btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
 }
 
+/* find 'struct bpf_spin_lock' in map value.
+ * return >= 0 offset if found
+ * and < 0 in case of error
+ */
+int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t)
+{
+	const struct btf_member *member;
+	u32 i, off = -ENOENT;
+
+	if (!__btf_type_is_struct(t))
+		return -EINVAL;
+
+	for_each_member(i, t, member) {
+		const struct btf_type *member_type = btf_type_by_id(btf,
+								    member->type);
+		if (!__btf_type_is_struct(member_type))
+			continue;
+		if (member_type->size != sizeof(struct bpf_spin_lock))
+			continue;
+		if (strcmp(__btf_name_by_offset(btf, member_type->name_off),
+			   "bpf_spin_lock"))
+			continue;
+		if (off != -ENOENT)
+			/* only one 'struct bpf_spin_lock' is allowed */
+			return -E2BIG;
+		off = btf_member_bit_offset(t, member);
+		if (off % 8)
+			/* valid C code cannot generate such BTF */
+			return -EINVAL;
+		off /= 8;
+		if (off % __alignof__(struct bpf_spin_lock))
+			/* valid struct bpf_spin_lock will be 4 byte aligned */
+			return -EINVAL;
+	}
+	return off;
+}
+
 static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t,
 				u32 type_id, void *data, u8 bits_offset,
 				struct seq_file *m)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index faab7d3054fa..0d1586d4f3a0 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2000,6 +2000,8 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
 const struct bpf_func_proto bpf_map_push_elem_proto __weak;
 const struct bpf_func_proto bpf_map_pop_elem_proto __weak;
 const struct bpf_func_proto bpf_map_peek_elem_proto __weak;
+const struct bpf_func_proto bpf_spin_lock_proto __weak;
+const struct bpf_func_proto bpf_spin_unlock_proto __weak;
 
 const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
 const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index e0610729a604..5e729674c654 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -730,21 +730,12 @@ static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
 	       BITS_PER_LONG == 64;
 }
 
-static u32 htab_size_value(const struct bpf_htab *htab, bool percpu)
-{
-	u32 size = htab->map.value_size;
-
-	if (percpu || fd_htab_map_needs_adjust(htab))
-		size = round_up(size, 8);
-	return size;
-}
-
 static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 					 void *value, u32 key_size, u32 hash,
 					 bool percpu, bool onallcpus,
 					 struct htab_elem *old_elem)
 {
-	u32 size = htab_size_value(htab, percpu);
+	u32 size = htab->map.value_size;
 	bool prealloc = htab_is_prealloc(htab);
 	struct htab_elem *l_new, **pl_new;
 	void __percpu *pptr;
@@ -782,10 +773,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 			l_new = ERR_PTR(-ENOMEM);
 			goto dec_count;
 		}
+		check_and_init_map_lock(&htab->map,
+					l_new->key + round_up(key_size, 8));
 	}
 
 	memcpy(l_new->key, key, key_size);
 	if (percpu) {
+		size = round_up(size, 8);
 		if (prealloc) {
 			pptr = htab_elem_get_ptr(l_new, key_size);
 		} else {
@@ -803,8 +797,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 
 		if (!prealloc)
 			htab_elem_set_ptr(l_new, key_size, pptr);
-	} else {
+	} else if (fd_htab_map_needs_adjust(htab)) {
+		size = round_up(size, 8);
 		memcpy(l_new->key + round_up(key_size, 8), value, size);
+	} else {
+		copy_map_value(&htab->map,
+			       l_new->key + round_up(key_size, 8),
+			       value);
 	}
 
 	l_new->hash = hash;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index f6f0d6d272e8..d6c7859e6939 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -221,6 +221,86 @@ const struct bpf_func_proto bpf_get_current_comm_proto = {
 	.arg2_type	= ARG_CONST_SIZE,
 };
 
+#if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK)
+
+static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
+{
+	arch_spinlock_t *l = (void *)lock;
+	union {
+		__u32 val;
+		arch_spinlock_t lock;
+	} u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED };
+
+	compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0");
+	BUILD_BUG_ON(sizeof(*l) != sizeof(__u32));
+	BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32));
+	arch_spin_lock(l);
+}
+
+static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
+{
+	arch_spinlock_t *l = (void *)lock;
+
+	arch_spin_unlock(l);
+}
+
+#else
+
+static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
+{
+	atomic_t *l = (void *)lock;
+
+	BUILD_BUG_ON(sizeof(*l) != sizeof(*lock));
+	do {
+		atomic_cond_read_relaxed(l, !VAL);
+	} while (atomic_xchg(l, 1));
+}
+
+static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
+{
+	atomic_t *l = (void *)lock;
+
+	atomic_set_release(l, 0);
+}
+
+#endif
+
+static DEFINE_PER_CPU(unsigned long, irqsave_flags);
+
+notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__bpf_spin_lock(lock);
+	__this_cpu_write(irqsave_flags, flags);
+	return 0;
+}
+
+const struct bpf_func_proto bpf_spin_lock_proto = {
+	.func		= bpf_spin_lock,
+	.gpl_only	= false,
+	.ret_type	= RET_VOID,
+	.arg1_type	= ARG_PTR_TO_SPIN_LOCK,
+};
+
+notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
+{
+	unsigned long flags;
+
+	flags = __this_cpu_read(irqsave_flags);
+	__bpf_spin_unlock(lock);
+	local_irq_restore(flags);
+	return 0;
+}
+
+const struct bpf_func_proto bpf_spin_unlock_proto = {
+	.func		= bpf_spin_unlock,
+	.gpl_only	= false,
+	.ret_type	= RET_VOID,
+	.arg1_type	= ARG_PTR_TO_SPIN_LOCK,
+};
+
 #ifdef CONFIG_CGROUPS
 BPF_CALL_0(bpf_get_current_cgroup_id)
 {
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index 52378d3e34b3..583346a0ab29 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -37,6 +37,11 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
 		return ERR_PTR(-EINVAL);
 	}
 
+	if (map_value_has_spin_lock(inner_map)) {
+		fdput(f);
+		return ERR_PTR(-ENOTSUPP);
+	}
+
 	inner_map_meta_size = sizeof(*inner_map_meta);
 	/* In some cases verifier needs to access beyond just base map. */
 	if (inner_map->ops == &array_map_ops)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b30d4c855e3f..b11f247eaa77 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -464,7 +464,7 @@ int map_check_no_btf(const struct bpf_map *map,
 	return -ENOTSUPP;
 }
 
-static int map_check_btf(const struct bpf_map *map, const struct btf *btf,
+static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 			 u32 btf_key_id, u32 btf_value_id)
 {
 	const struct btf_type *key_type, *value_type;
@@ -479,6 +479,21 @@ static int map_check_btf(const struct bpf_map *map, const struct btf *btf,
 	if (!value_type || value_size != map->value_size)
 		return -EINVAL;
 
+	map->spin_lock_off = btf_find_spin_lock(btf, value_type);
+
+	if (map_value_has_spin_lock(map)) {
+		if (map->map_type != BPF_MAP_TYPE_HASH &&
+		    map->map_type != BPF_MAP_TYPE_ARRAY)
+			return -ENOTSUPP;
+		if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
+		    map->value_size) {
+			WARN_ONCE(1,
+				  "verifier bug spin_lock_off %d value_size %d\n",
+				  map->spin_lock_off, map->value_size);
+			return -EFAULT;
+		}
+	}
+
 	if (map->ops->map_check_btf)
 		ret = map->ops->map_check_btf(map, btf, key_type, value_type);
 
@@ -543,6 +558,8 @@ static int map_create(union bpf_attr *attr)
 		map->btf = btf;
 		map->btf_key_type_id = attr->btf_key_type_id;
 		map->btf_value_type_id = attr->btf_value_type_id;
+	} else {
+		map->spin_lock_off = -EINVAL;
 	}
 
 	err = security_bpf_map_alloc(map);
@@ -750,7 +767,7 @@ static int map_lookup_elem(union bpf_attr *attr)
 			err = -ENOENT;
 		} else {
 			err = 0;
-			memcpy(value, ptr, value_size);
+			copy_map_value(map, value, ptr);
 		}
 		rcu_read_unlock();
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 55571e984b51..dfb39857a420 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -211,6 +211,7 @@ struct bpf_call_arg_meta {
 	int access_size;
 	u64 msize_max_value;
 	int ptr_id;
+	int func_id;
 };
 
 static DEFINE_MUTEX(bpf_verifier_lock);
@@ -349,6 +350,12 @@ static bool reg_is_refcounted(const struct bpf_reg_state *reg)
 	return type_is_refcounted(reg->type);
 }
 
+static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
+{
+	return reg->type == PTR_TO_MAP_VALUE &&
+		map_value_has_spin_lock(reg->map_ptr);
+}
+
 static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg)
 {
 	return type_is_refcounted_or_null(reg->type);
@@ -710,6 +717,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
 	}
 	dst_state->speculative = src->speculative;
 	dst_state->curframe = src->curframe;
+	dst_state->active_spin_lock = src->active_spin_lock;
 	for (i = 0; i <= src->curframe; i++) {
 		dst = dst_state->frame[i];
 		if (!dst) {
@@ -1476,6 +1484,21 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
 	if (err)
 		verbose(env, "R%d max value is outside of the array range\n",
 			regno);
+
+	if (map_value_has_spin_lock(reg->map_ptr)) {
+		u32 lock = reg->map_ptr->spin_lock_off;
+
+		/* if any part of struct bpf_spin_lock can be touched by
+		 * load/store reject this program.
+		 * To check that [x1, x2) overlaps with [y1, y2)
+		 * it is sufficient to check x1 < y2 && y1 < x2.
+		 */
+		if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
+		     lock < reg->umax_value + off + size) {
+			verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
+			return -EACCES;
+		}
+	}
 	return err;
 }
 
@@ -2188,6 +2211,91 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
 	}
 }
 
+/* Implementation details:
+ * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
+ * Two bpf_map_lookups (even with the same key) will have different reg->id.
+ * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
+ * value_or_null->value transition, since the verifier only cares about
+ * the range of access to valid map value pointer and doesn't care about actual
+ * address of the map element.
+ * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
+ * reg->id > 0 after value_or_null->value transition. By doing so
+ * two bpf_map_lookups will be considered two different pointers that
+ * point to different bpf_spin_locks.
+ * The verifier allows taking only one bpf_spin_lock at a time to avoid
+ * dead-locks.
+ * Since only one bpf_spin_lock is allowed the checks are simpler than
+ * reg_is_refcounted() logic. The verifier needs to remember only
+ * one spin_lock instead of array of acquired_refs.
+ * cur_state->active_spin_lock remembers which map value element got locked
+ * and clears it after bpf_spin_unlock.
+ */
+static int process_spin_lock(struct bpf_verifier_env *env, int regno,
+			     bool is_lock)
+{
+	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+	struct bpf_verifier_state *cur = env->cur_state;
+	bool is_const = tnum_is_const(reg->var_off);
+	struct bpf_map *map = reg->map_ptr;
+	u64 val = reg->var_off.value;
+
+	if (reg->type != PTR_TO_MAP_VALUE) {
+		verbose(env, "R%d is not a pointer to map_value\n", regno);
+		return -EINVAL;
+	}
+	if (!is_const) {
+		verbose(env,
+			"R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
+			regno);
+		return -EINVAL;
+	}
+	if (!map->btf) {
+		verbose(env,
+			"map '%s' has to have BTF in order to use bpf_spin_lock\n",
+			map->name);
+		return -EINVAL;
+	}
+	if (!map_value_has_spin_lock(map)) {
+		if (map->spin_lock_off == -E2BIG)
+			verbose(env,
+				"map '%s' has more than one 'struct bpf_spin_lock'\n",
+				map->name);
+		else if (map->spin_lock_off == -ENOENT)
+			verbose(env,
+				"map '%s' doesn't have 'struct bpf_spin_lock'\n",
+				map->name);
+		else
+			verbose(env,
+				"map '%s' is not a struct type or bpf_spin_lock is mangled\n",
+				map->name);
+		return -EINVAL;
+	}
+	if (map->spin_lock_off != val + reg->off) {
+		verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
+			val + reg->off);
+		return -EINVAL;
+	}
+	if (is_lock) {
+		if (cur->active_spin_lock) {
+			verbose(env,
+				"Locking two bpf_spin_locks are not allowed\n");
+			return -EINVAL;
+		}
+		cur->active_spin_lock = reg->id;
+	} else {
+		if (!cur->active_spin_lock) {
+			verbose(env, "bpf_spin_unlock without taking a lock\n");
+			return -EINVAL;
+		}
+		if (cur->active_spin_lock != reg->id) {
+			verbose(env, "bpf_spin_unlock of different lock\n");
+			return -EINVAL;
+		}
+		cur->active_spin_lock = 0;
+	}
+	return 0;
+}
+
 static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
 {
 	return type == ARG_PTR_TO_MEM ||
@@ -2264,6 +2372,17 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			return -EFAULT;
 		}
 		meta->ptr_id = reg->id;
+	} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
+		if (meta->func_id == BPF_FUNC_spin_lock) {
+			if (process_spin_lock(env, regno, true))
+				return -EACCES;
+		} else if (meta->func_id == BPF_FUNC_spin_unlock) {
+			if (process_spin_lock(env, regno, false))
+				return -EACCES;
+		} else {
+			verbose(env, "verifier internal error\n");
+			return -EFAULT;
+		}
 	} else if (arg_type_is_mem_ptr(arg_type)) {
 		expected_type = PTR_TO_STACK;
 		/* One exception here. In case function allows for NULL to be
@@ -2905,6 +3024,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 		return err;
 	}
 
+	meta.func_id = func_id;
 	/* check args */
 	err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
 	if (err)
@@ -4638,7 +4758,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
 		} else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
 			reg->type = PTR_TO_SOCKET;
 		}
-		if (is_null || !reg_is_refcounted(reg)) {
+		if (is_null || !(reg_is_refcounted(reg) ||
+				 reg_may_point_to_spin_lock(reg))) {
 			/* We don't need id from this point onwards anymore,
 			 * thus we should better reset it, so that state
 			 * pruning has chances to take effect.
@@ -5053,6 +5174,11 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 		return err;
 	}
 
+	if (env->cur_state->active_spin_lock) {
+		verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
+		return -EINVAL;
+	}
+
 	if (regs[BPF_REG_6].type != PTR_TO_CTX) {
 		verbose(env,
 			"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
@@ -5793,8 +5919,11 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
 	case PTR_TO_MAP_VALUE:
 		/* If the new min/max/var_off satisfy the old ones and
 		 * everything else matches, we are OK.
-		 * We don't care about the 'id' value, because nothing
-		 * uses it for PTR_TO_MAP_VALUE (only for ..._OR_NULL)
+		 * 'id' is not compared, since it's only used for maps with
+		 * bpf_spin_lock inside map element and in such cases if
+		 * the rest of the prog is valid for one map element then
+		 * it's valid for all map elements regardless of the key
+		 * used in bpf_map_lookup()
 		 */
 		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
 		       range_within(rold, rcur) &&
@@ -5997,6 +6126,9 @@ static bool states_equal(struct bpf_verifier_env *env,
 	if (old->speculative && !cur->speculative)
 		return false;
 
+	if (old->active_spin_lock != cur->active_spin_lock)
+		return false;
+
 	/* for states to be equal callsites have to be the same
 	 * and all frame states need to be equivalent
 	 */
@@ -6415,6 +6547,12 @@ static int do_check(struct bpf_verifier_env *env)
 					return -EINVAL;
 				}
 
+				if (env->cur_state->active_spin_lock &&
+				    (insn->src_reg == BPF_PSEUDO_CALL ||
+				     insn->imm != BPF_FUNC_spin_unlock)) {
+					verbose(env, "function calls are not allowed while holding a lock\n");
+					return -EINVAL;
+				}
 				if (insn->src_reg == BPF_PSEUDO_CALL)
 					err = check_func_call(env, insn, &env->insn_idx);
 				else
@@ -6445,6 +6583,11 @@ static int do_check(struct bpf_verifier_env *env)
 					return -EINVAL;
 				}
 
+				if (env->cur_state->active_spin_lock) {
+					verbose(env, "bpf_spin_unlock is missing\n");
+					return -EINVAL;
+				}
+
 				if (state->curframe) {
 					/* exit from nested function */
 					env->prev_insn_idx = env->insn_idx;
@@ -6542,6 +6685,19 @@ static int check_map_prealloc(struct bpf_map *map)
 		!(map->map_flags & BPF_F_NO_PREALLOC);
 }
 
+static bool is_tracing_prog_type(enum bpf_prog_type type)
+{
+	switch (type) {
+	case BPF_PROG_TYPE_KPROBE:
+	case BPF_PROG_TYPE_TRACEPOINT:
+	case BPF_PROG_TYPE_PERF_EVENT:
+	case BPF_PROG_TYPE_RAW_TRACEPOINT:
+		return true;
+	default:
+		return false;
+	}
+}
+
 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
 					struct bpf_map *map,
 					struct bpf_prog *prog)
@@ -6564,6 +6720,13 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
 		}
 	}
 
+	if ((is_tracing_prog_type(prog->type) ||
+	     prog->type == BPF_PROG_TYPE_SOCKET_FILTER) &&
+	    map_value_has_spin_lock(map)) {
+		verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
+		return -EINVAL;
+	}
+
 	if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
 	    !bpf_offload_prog_map_match(prog, map)) {
 		verbose(env, "offload device mismatch between prog and map\n");
diff --git a/net/core/filter.c b/net/core/filter.c
index dbb2ace0fbd5..ed11b2860cee 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5293,10 +5293,20 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_tail_call_proto;
 	case BPF_FUNC_ktime_get_ns:
 		return &bpf_ktime_get_ns_proto;
+	default:
+		break;
+	}
+
+	if (!capable(CAP_SYS_ADMIN))
+		return NULL;
+
+	switch (func_id) {
+	case BPF_FUNC_spin_lock:
+		return &bpf_spin_lock_proto;
+	case BPF_FUNC_spin_unlock:
+		return &bpf_spin_unlock_proto;
 	case BPF_FUNC_trace_printk:
-		if (capable(CAP_SYS_ADMIN))
-			return bpf_get_trace_printk_proto();
-		/* else, fall through */
+		return bpf_get_trace_printk_proto();
 	default:
 		return NULL;
 	}
-- 
Gitee


From 1622cd8b0ad6cb570b6f4c64f3fda2bbf9b0f1ac Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 31 Jan 2019 15:40:05 -0800
Subject: [PATCH 14/23] bpf: add support for bpf_spin_lock to cgroup local
 storage

ANBZ: #5530

commit e16d2f1ab96849b4b65e64b82550a7ecdbf405eb upstream.

Allow 'struct bpf_spin_lock' to reside inside cgroup local storage.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Yuanhe Shu <xiangzao@linux.alibaba.com>
---
 kernel/bpf/local_storage.c | 2 ++
 kernel/bpf/syscall.c       | 3 ++-
 kernel/bpf/verifier.c      | 2 ++
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 07a34ef562a0..0295427f06e2 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -147,6 +147,7 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
 		return -ENOMEM;
 
 	memcpy(&new->data[0], value, map->value_size);
+	check_and_init_map_lock(map, new->data);
 
 	new = xchg(&storage->buf, new);
 	kfree_rcu(new, rcu);
@@ -483,6 +484,7 @@ struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
 		storage->buf = kmalloc_node(size, flags, map->numa_node);
 		if (!storage->buf)
 			goto enomem;
+		check_and_init_map_lock(map, storage->buf->data);
 	} else {
 		storage->percpu_buf = __alloc_percpu_gfp(size, 8, flags);
 		if (!storage->percpu_buf)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b11f247eaa77..258184bf856d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -483,7 +483,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 
 	if (map_value_has_spin_lock(map)) {
 		if (map->map_type != BPF_MAP_TYPE_HASH &&
-		    map->map_type != BPF_MAP_TYPE_ARRAY)
+		    map->map_type != BPF_MAP_TYPE_ARRAY &&
+		    map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE)
 			return -ENOTSUPP;
 		if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
 		    map->value_size) {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index dfb39857a420..0047c7c53c94 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3107,6 +3107,8 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 		regs[BPF_REG_0].map_ptr = meta.map_ptr;
 		if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
 			regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
+			if (map_value_has_spin_lock(meta.map_ptr))
+				regs[BPF_REG_0].id = ++env->id_gen;
 		} else {
 			regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
 			regs[BPF_REG_0].id = ++env->id_gen;
-- 
Gitee


From 14e05ff08b2fd99fd37952bcf0559501b86b824d Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 31 Jan 2019 15:40:06 -0800
Subject: [PATCH 15/23] tools/bpf: sync include/uapi/linux/bpf.h

ANBZ: #5530

commit 7dac3ae42cf8203c0a9c54cb387512c91b64e22a upstream.

sync bpf.h

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Yuanhe Shu <xiangzao@linux.alibaba.com>
---
 tools/include/uapi/linux/bpf.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 8cc449340628..ebae3f382d46 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2422,7 +2422,9 @@ union bpf_attr {
 	FN(map_peek_elem),		\
 	FN(msg_push_data),		\
 	FN(msg_pop_data),		\
-	FN(rc_pointer_rel),
+	FN(rc_pointer_rel),		\
+	FN(spin_lock),			\
+	FN(spin_unlock),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -3057,4 +3059,7 @@ struct bpf_line_info {
 	__u32	line_col;
 };
 
+struct bpf_spin_lock {
+	__u32	val;
+};
 #endif /* _UAPI__LINUX_BPF_H__ */
-- 
Gitee


From bf42454599eb39a065f04ed367a95f96f6af21ad Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 31 Jan 2019 15:40:07 -0800
Subject: [PATCH 16/23] selftests/bpf: add bpf_spin_lock verifier tests

ANBZ: #5530

commit b4d4556c32667728c9737619580b6ade695027e0 upstream.

add bpf_spin_lock tests to test_verifier.c that don't require
latest llvm with BTF support

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Yuanhe Shu <xiangzao@linux.alibaba.com>
---
 tools/testing/selftests/bpf/test_verifier.c | 435 +++++++++++++++++++-
 1 file changed, 434 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 907560a849c9..706ccfbad01b 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -31,6 +31,7 @@
 #include <linux/bpf_perf_event.h>
 #include <linux/bpf.h>
 #include <linux/if_ether.h>
+#include <linux/btf.h>
 
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
@@ -49,7 +50,7 @@
 
 #define MAX_INSNS	BPF_MAXINSNS
 #define MAX_FIXUPS	8
-#define MAX_NR_MAPS	13
+#define MAX_NR_MAPS	14
 #define MAX_TEST_RUNS	8
 #define POINTER_VALUE	0xcafe4all
 #define TEST_DATA_LEN	64
@@ -77,6 +78,7 @@ struct bpf_test {
 	int fixup_map_in_map[MAX_FIXUPS];
 	int fixup_cgroup_storage[MAX_FIXUPS];
 	int fixup_percpu_cgroup_storage[MAX_FIXUPS];
+	int fixup_map_spin_lock[MAX_FIXUPS];
 	const char *errstr;
 	const char *errstr_unpriv;
 	uint32_t retval, retval_unpriv, insn_processed;
@@ -15080,6 +15082,337 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 		.retval = 2,
 	},
+	{
+		"spin_lock: test1 success",
+		.insns = {
+		BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+		BPF_LD_MAP_FD(BPF_REG_1,
+				0),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+		BPF_EXIT_INSN(),
+		BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+		},
+		.fixup_map_spin_lock = { 3 },
+		.result = ACCEPT,
+		.result_unpriv = REJECT,
+		.errstr_unpriv = "",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"spin_lock: test2 direct ld/st",
+		.insns = {
+		BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+		BPF_LD_MAP_FD(BPF_REG_1,
+				0),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+		BPF_EXIT_INSN(),
+		BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+		},
+		.fixup_map_spin_lock = { 3 },
+		.result = REJECT,
+		.errstr = "cannot be accessed directly",
+		.result_unpriv = REJECT,
+		.errstr_unpriv = "",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"spin_lock: test3 direct ld/st",
+		.insns = {
+		BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+		BPF_LD_MAP_FD(BPF_REG_1,
+				0),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+		BPF_EXIT_INSN(),
+		BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 1),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+		},
+		.fixup_map_spin_lock = { 3 },
+		.result = REJECT,
+		.errstr = "cannot be accessed directly",
+		.result_unpriv = REJECT,
+		.errstr_unpriv = "",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"spin_lock: test4 direct ld/st",
+		.insns = {
+		BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+		BPF_LD_MAP_FD(BPF_REG_1,
+				0),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+		BPF_EXIT_INSN(),
+		BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_6, 3),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+		},
+		.fixup_map_spin_lock = { 3 },
+		.result = REJECT,
+		.errstr = "cannot be accessed directly",
+		.result_unpriv = REJECT,
+		.errstr_unpriv = "",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"spin_lock: test5 call within a locked region",
+		.insns = {
+		BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+		BPF_LD_MAP_FD(BPF_REG_1,
+				0),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+		BPF_EXIT_INSN(),
+		BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+		},
+		.fixup_map_spin_lock = { 3 },
+		.result = REJECT,
+		.errstr = "calls are not allowed",
+		.result_unpriv = REJECT,
+		.errstr_unpriv = "",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"spin_lock: test6 missing unlock",
+		.insns = {
+		BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+		BPF_LD_MAP_FD(BPF_REG_1,
+				0),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+		BPF_EXIT_INSN(),
+		BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+		},
+		.fixup_map_spin_lock = { 3 },
+		.result = REJECT,
+		.errstr = "unlock is missing",
+		.result_unpriv = REJECT,
+		.errstr_unpriv = "",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"spin_lock: test7 unlock without lock",
+		.insns = {
+		BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+		BPF_LD_MAP_FD(BPF_REG_1,
+				0),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+		BPF_EXIT_INSN(),
+		BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+		},
+		.fixup_map_spin_lock = { 3 },
+		.result = REJECT,
+		.errstr = "without taking a lock",
+		.result_unpriv = REJECT,
+		.errstr_unpriv = "",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"spin_lock: test8 double lock",
+		.insns = {
+		BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+		BPF_LD_MAP_FD(BPF_REG_1,
+				0),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+		BPF_EXIT_INSN(),
+		BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+		},
+		.fixup_map_spin_lock = { 3 },
+		.result = REJECT,
+		.errstr = "calls are not allowed",
+		.result_unpriv = REJECT,
+		.errstr_unpriv = "",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"spin_lock: test9 different lock",
+		.insns = {
+		BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+		BPF_LD_MAP_FD(BPF_REG_1,
+				0),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+		BPF_EXIT_INSN(),
+		BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+		BPF_LD_MAP_FD(BPF_REG_1,
+				0),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+		BPF_EXIT_INSN(),
+		BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+		},
+		.fixup_map_spin_lock = { 3, 11 },
+		.result = REJECT,
+		.errstr = "unlock of different lock",
+		.result_unpriv = REJECT,
+		.errstr_unpriv = "",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"spin_lock: test10 lock in subprog without unlock",
+		.insns = {
+		BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+		BPF_LD_MAP_FD(BPF_REG_1,
+				0),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+		BPF_EXIT_INSN(),
+		BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+		},
+		.fixup_map_spin_lock = { 3 },
+		.result = REJECT,
+		.errstr = "unlock is missing",
+		.result_unpriv = REJECT,
+		.errstr_unpriv = "",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"spin_lock: test11 ld_abs under lock",
+		.insns = {
+		BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+		BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+		BPF_LD_MAP_FD(BPF_REG_1,
+				0),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+		BPF_EXIT_INSN(),
+		BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
+		BPF_LD_ABS(BPF_B, 0),
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+		},
+		.fixup_map_spin_lock = { 4 },
+		.result = REJECT,
+		.errstr = "inside bpf_spin_lock",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
@@ -15223,6 +15556,98 @@ static int create_cgroup_storage(bool percpu)
 	return fd;
 }
 
+#define BTF_INFO_ENC(kind, kind_flag, vlen) \
+	((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
+#define BTF_TYPE_ENC(name, info, size_or_type) \
+	(name), (info), (size_or_type)
+#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \
+	((encoding) << 24 | (bits_offset) << 16 | (nr_bits))
+#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \
+	BTF_INT_ENC(encoding, bits_offset, bits)
+#define BTF_MEMBER_ENC(name, type, bits_offset) \
+	(name), (type), (bits_offset)
+
+struct btf_raw_data {
+	__u32 raw_types[64];
+	const char *str_sec;
+	__u32 str_sec_size;
+};
+
+/* struct bpf_spin_lock {
+ *   int val;
+ * };
+ * struct val {
+ *   int cnt;
+ *   struct bpf_spin_lock l;
+ * };
+ */
+static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l";
+static __u32 btf_raw_types[] = {
+	/* int */
+	BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+	/* struct bpf_spin_lock */                      /* [2] */
+	BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4),
+	BTF_MEMBER_ENC(15, 1, 0), /* int val; */
+	/* struct val */                                /* [3] */
+	BTF_TYPE_ENC(15, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+	BTF_MEMBER_ENC(19, 1, 0), /* int cnt; */
+	BTF_MEMBER_ENC(23, 2, 32),/* struct bpf_spin_lock l; */
+};
+
+static int load_btf(void)
+{
+	struct btf_header hdr = {
+		.magic = BTF_MAGIC,
+		.version = BTF_VERSION,
+		.hdr_len = sizeof(struct btf_header),
+		.type_len = sizeof(btf_raw_types),
+		.str_off = sizeof(btf_raw_types),
+		.str_len = sizeof(btf_str_sec),
+	};
+	void *ptr, *raw_btf;
+	int btf_fd;
+
+	ptr = raw_btf = malloc(sizeof(hdr) + sizeof(btf_raw_types) +
+			       sizeof(btf_str_sec));
+
+	memcpy(ptr, &hdr, sizeof(hdr));
+	ptr += sizeof(hdr);
+	memcpy(ptr, btf_raw_types, hdr.type_len);
+	ptr += hdr.type_len;
+	memcpy(ptr, btf_str_sec, hdr.str_len);
+	ptr += hdr.str_len;
+
+	btf_fd = bpf_load_btf(raw_btf, ptr - raw_btf, 0, 0, 0);
+	free(raw_btf);
+	if (btf_fd < 0)
+		return -1;
+	return btf_fd;
+}
+
+static int create_map_spin_lock(void)
+{
+	struct bpf_create_map_attr attr = {
+		.name = "test_map",
+		.map_type = BPF_MAP_TYPE_ARRAY,
+		.key_size = 4,
+		.value_size = 8,
+		.max_entries = 1,
+		.btf_key_type_id = 1,
+		.btf_value_type_id = 3,
+	};
+	int fd, btf_fd;
+
+	btf_fd = load_btf();
+	if (btf_fd < 0)
+		return -1;
+	attr.btf_fd = btf_fd;
+	fd = bpf_create_map_xattr(&attr);
+	if (fd < 0)
+		printf("Failed to create map with spin_lock\n");
+	return fd;
+}
+
 static char bpf_vlog[UINT_MAX >> 8];
 
 static void do_test_fixup(struct bpf_test *test, enum bpf_map_type prog_type,
@@ -15241,6 +15666,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_map_type prog_type,
 	int *fixup_map_in_map = test->fixup_map_in_map;
 	int *fixup_cgroup_storage = test->fixup_cgroup_storage;
 	int *fixup_percpu_cgroup_storage = test->fixup_percpu_cgroup_storage;
+	int *fixup_map_spin_lock = test->fixup_map_spin_lock;
 
 	if (test->fill_helper)
 		test->fill_helper(test);
@@ -15356,6 +15782,13 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_map_type prog_type,
 			fixup_map_stacktrace++;
 		} while (*fixup_map_stacktrace);
 	}
+	if (*fixup_map_spin_lock) {
+		map_fds[13] = create_map_spin_lock();
+		do {
+			prog[*fixup_map_spin_lock].imm = map_fds[13];
+			fixup_map_spin_lock++;
+		} while (*fixup_map_spin_lock);
+	}
 }
 
 static int set_admin(bool admin)
-- 
Gitee


From 11196fd5328efb26c57c2742a968c27f1cdccbef Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 31 Jan 2019 15:40:08 -0800
Subject: [PATCH 17/23] selftests/bpf: add bpf_spin_lock C test

ANBZ: #5530

commit ab963beb9f5db303b4fd7e34e422b96270e5b972 upstream.

add bpf_spin_lock C based test that requires latest llvm with BTF support

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Yuanhe Shu <xiangzao@linux.alibaba.com>
---
 tools/testing/selftests/bpf/Makefile         |   2 +-
 tools/testing/selftests/bpf/bpf_helpers.h    |   4 +
 tools/testing/selftests/bpf/test_progs.c     |  43 +++++++-
 tools/testing/selftests/bpf/test_spin_lock.c | 108 +++++++++++++++++++
 4 files changed, 155 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/test_spin_lock.c

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index e8bee3ad56cb..1d02c27d4df0 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -35,7 +35,7 @@ BPF_OBJ_FILES = \
 	sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
 	get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
 	test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o \
-	test_map_in_map.o
+	test_map_in_map.o test_spin_lock.o
 
 # Objects are built with default compilation flags and with sub-register
 # code-gen enabled.
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 59716f8920c0..afd8158dc1fd 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -168,6 +168,10 @@ static int (*bpf_sk_release)(struct bpf_sock *sk) =
 	(void *) BPF_FUNC_sk_release;
 static int (*bpf_rc_pointer_rel)(void *ctx, int rel_x, int rel_y) =
 	(void *) BPF_FUNC_rc_pointer_rel;
+static void (*bpf_spin_lock)(struct bpf_spin_lock *lock) =
+	(void *) BPF_FUNC_spin_lock;
+static void (*bpf_spin_unlock)(struct bpf_spin_lock *lock) =
+	(void *) BPF_FUNC_spin_unlock;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 2fed724b1e5c..5a32914f072a 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -28,7 +28,7 @@ typedef __u16 __sum16;
 #include <sys/wait.h>
 #include <sys/types.h>
 #include <fcntl.h>
-
+#include <pthread.h>
 #include <linux/bpf.h>
 #include <linux/err.h>
 #include <bpf/bpf.h>
@@ -1985,6 +1985,46 @@ static void test_flow_dissector(void)
 	bpf_object__close(obj);
 }
 
+static void *test_spin_lock(void *arg)
+{
+	__u32 duration, retval;
+	int err, prog_fd = *(u32 *) arg;
+
+	err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4),
+				NULL, NULL, &retval, &duration);
+	CHECK(err || retval, "",
+	      "err %d errno %d retval %d duration %d\n",
+	      err, errno, retval, duration);
+	pthread_exit(arg);
+}
+
+static void test_spinlock(void)
+{
+	const char *file = "./test_spin_lock.o";
+	pthread_t thread_id[4];
+	struct bpf_object *obj;
+	int prog_fd;
+	int err = 0, i;
+	void *ret;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
+	if (err) {
+		printf("test_spin_lock:bpf_prog_load errno %d\n", errno);
+		goto close_prog;
+	}
+	for (i = 0; i < 4; i++)
+		assert(pthread_create(&thread_id[i], NULL,
+				      &test_spin_lock, &prog_fd) == 0);
+	for (i = 0; i < 4; i++)
+		assert(pthread_join(thread_id[i], &ret) == 0 &&
+		       ret == (void *)&prog_fd);
+	goto close_prog_noerr;
+close_prog:
+	error_cnt++;
+close_prog_noerr:
+	bpf_object__close(obj);
+}
+
 int main(void)
 {
 	srand(time(NULL));
@@ -2013,6 +2053,7 @@ int main(void)
 	test_queue_stack_map(QUEUE);
 	test_queue_stack_map(STACK);
 	test_flow_dissector();
+	test_spinlock();
 
 	printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
 	return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
diff --git a/tools/testing/selftests/bpf/test_spin_lock.c b/tools/testing/selftests/bpf/test_spin_lock.c
new file mode 100644
index 000000000000..40f904312090
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_spin_lock.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include <linux/version.h>
+#include "bpf_helpers.h"
+
+struct hmap_elem {
+	volatile int cnt;
+	struct bpf_spin_lock lock;
+	int test_padding;
+};
+
+struct bpf_map_def SEC("maps") hmap = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(int),
+	.value_size = sizeof(struct hmap_elem),
+	.max_entries = 1,
+};
+
+BPF_ANNOTATE_KV_PAIR(hmap, int, struct hmap_elem);
+
+
+struct cls_elem {
+	struct bpf_spin_lock lock;
+	volatile int cnt;
+};
+
+struct bpf_map_def SEC("maps") cls_map = {
+	.type = BPF_MAP_TYPE_CGROUP_STORAGE,
+	.key_size = sizeof(struct bpf_cgroup_storage_key),
+	.value_size = sizeof(struct cls_elem),
+};
+
+BPF_ANNOTATE_KV_PAIR(cls_map, struct bpf_cgroup_storage_key,
+		     struct cls_elem);
+
+struct bpf_vqueue {
+	struct bpf_spin_lock lock;
+	/* 4 byte hole */
+	unsigned long long lasttime;
+	int credit;
+	unsigned int rate;
+};
+
+struct bpf_map_def SEC("maps") vqueue = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(struct bpf_vqueue),
+	.max_entries = 1,
+};
+
+BPF_ANNOTATE_KV_PAIR(vqueue, int, struct bpf_vqueue);
+#define CREDIT_PER_NS(delta, rate) (((delta) * rate) >> 20)
+
+SEC("spin_lock_demo")
+int bpf_sping_lock_test(struct __sk_buff *skb)
+{
+	volatile int credit = 0, max_credit = 100, pkt_len = 64;
+	struct hmap_elem zero = {}, *val;
+	unsigned long long curtime;
+	struct bpf_vqueue *q;
+	struct cls_elem *cls;
+	int key = 0;
+	int err = 0;
+
+	val = bpf_map_lookup_elem(&hmap, &key);
+	if (!val) {
+		bpf_map_update_elem(&hmap, &key, &zero, 0);
+		val = bpf_map_lookup_elem(&hmap, &key);
+		if (!val) {
+			err = 1;
+			goto err;
+		}
+	}
+	/* spin_lock in hash map run time test */
+	bpf_spin_lock(&val->lock);
+	if (val->cnt)
+		val->cnt--;
+	else
+		val->cnt++;
+	if (val->cnt != 0 && val->cnt != 1)
+		err = 1;
+	bpf_spin_unlock(&val->lock);
+
+	/* spin_lock in array. virtual queue demo */
+	q = bpf_map_lookup_elem(&vqueue, &key);
+	if (!q)
+		goto err;
+	curtime = bpf_ktime_get_ns();
+	bpf_spin_lock(&q->lock);
+	q->credit += CREDIT_PER_NS(curtime - q->lasttime, q->rate);
+	q->lasttime = curtime;
+	if (q->credit > max_credit)
+		q->credit = max_credit;
+	q->credit -= pkt_len;
+	credit = q->credit;
+	bpf_spin_unlock(&q->lock);
+
+	/* spin_lock in cgroup local storage */
+	cls = bpf_get_local_storage(&cls_map, 0);
+	bpf_spin_lock(&cls->lock);
+	cls->cnt++;
+	bpf_spin_unlock(&cls->lock);
+
+err:
+	return err;
+}
+char _license[] SEC("license") = "GPL";
-- 
Gitee


From e194ad8f1d745e36c7d385a4a24af651332e91bd Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 31 Jan 2019 15:40:09 -0800
Subject: [PATCH 18/23] bpf: introduce BPF_F_LOCK flag

ANBZ: #5530

commit 96049f3afd50fe8db69fa0068cdca822e747b1e4 upstream.

Introduce BPF_F_LOCK flag for map_lookup and map_update syscall commands
and for map_update() helper function.
In all these cases take a lock of existing element (which was provided
in BTF description) before copying (in or out) the rest of map value.

Implementation details that are part of uapi:

Array:
The array map takes the element lock for lookup/update.

Hash:
hash map also takes the lock for lookup/update and tries to avoid the bucket lock.
If old element exists it takes the element lock and updates the element in place.
If element doesn't exist it allocates new one and inserts into hash table
while holding the bucket lock.
In rare case the hashmap has to take both the bucket lock and the element lock
to update old value in place.

Cgroup local storage:
It is similar to array. update in place and lookup are done with lock taken.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Yuanhe Shu <xiangzao@linux.alibaba.com>
---
 include/linux/bpf.h        |  2 ++
 include/uapi/linux/bpf.h   |  1 +
 kernel/bpf/arraymap.c      | 24 ++++++++++++++--------
 kernel/bpf/hashtab.c       | 42 +++++++++++++++++++++++++++++++++++---
 kernel/bpf/helpers.c       | 16 +++++++++++++++
 kernel/bpf/local_storage.c | 14 ++++++++++++-
 kernel/bpf/syscall.c       | 25 +++++++++++++++++++++--
 7 files changed, 110 insertions(+), 14 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4013b1a77dda..dbfefadb89eb 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -125,6 +125,8 @@ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
 		memcpy(dst, src, map->value_size);
 	}
 }
+void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
+			   bool lock_src);
 
 struct bpf_offload_dev;
 struct bpf_offloaded_map;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0f7342e8e56d..7c27ce271c4b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -269,6 +269,7 @@ enum bpf_attach_type {
 #define BPF_ANY		0 /* create new element or update existing */
 #define BPF_NOEXIST	1 /* create new element if it didn't exist */
 #define BPF_EXIST	2 /* update existing element */
+#define BPF_F_LOCK	4 /* spin_lock-ed map_lookup/map_update */
 
 /* flags for BPF_MAP_CREATE command */
 #define BPF_F_NO_PREALLOC	(1U << 0)
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 7d78c0d0b031..21132ed828d0 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -253,8 +253,9 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
 {
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
 	u32 index = *(u32 *)key;
+	char *val;
 
-	if (unlikely(map_flags > BPF_EXIST))
+	if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
 		/* unknown flags */
 		return -EINVAL;
 
@@ -262,18 +263,25 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
 		/* all elements were pre-allocated, cannot insert a new one */
 		return -E2BIG;
 
-	if (unlikely(map_flags == BPF_NOEXIST))
+	if (unlikely(map_flags & BPF_NOEXIST))
 		/* all elements already exist */
 		return -EEXIST;
 
-	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
+	if (unlikely((map_flags & BPF_F_LOCK) &&
+		     !map_value_has_spin_lock(map)))
+		return -EINVAL;
+
+	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 		memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
 		       value, map->value_size);
-	else
-		copy_map_value(map,
-			       array->value +
-			       array->elem_size * (index & array->index_mask),
-			       value);
+	} else {
+		val = array->value +
+			array->elem_size * (index & array->index_mask);
+		if (map_flags & BPF_F_LOCK)
+			copy_map_value_locked(map, val, value, false);
+		else
+			copy_map_value(map, val, value);
+	}
 	return 0;
 }
 
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 5e729674c654..3a49ab2bfac1 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -816,11 +816,11 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
 		       u64 map_flags)
 {
-	if (l_old && map_flags == BPF_NOEXIST)
+	if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
 		/* elem already exists */
 		return -EEXIST;
 
-	if (!l_old && map_flags == BPF_EXIST)
+	if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
 		/* elem doesn't exist, cannot update it */
 		return -ENOENT;
 
@@ -839,7 +839,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 	u32 key_size, hash;
 	int ret;
 
-	if (unlikely(map_flags > BPF_EXIST))
+	if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
 		/* unknown flags */
 		return -EINVAL;
 
@@ -852,6 +852,28 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 	b = __select_bucket(htab, hash);
 	head = &b->head;
 
+	if (unlikely(map_flags & BPF_F_LOCK)) {
+		if (unlikely(!map_value_has_spin_lock(map)))
+			return -EINVAL;
+		/* find an element without taking the bucket lock */
+		l_old = lookup_nulls_elem_raw(head, hash, key, key_size,
+					      htab->n_buckets);
+		ret = check_flags(htab, l_old, map_flags);
+		if (ret)
+			return ret;
+		if (l_old) {
+			/* grab the element lock and update value in place */
+			copy_map_value_locked(map,
+					      l_old->key + round_up(key_size, 8),
+					      value, false);
+			return 0;
+		}
+		/* fall through, grab the bucket lock and lookup again.
+		 * 99.9% chance that the element won't be found,
+		 * but second lookup under lock has to be done.
+		 */
+	}
+
 	/* bpf_map_update_elem() can be called in_irq() */
 	raw_spin_lock_irqsave(&b->lock, flags);
 
@@ -861,6 +883,20 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 	if (ret)
 		goto err;
 
+	if (unlikely(l_old && (map_flags & BPF_F_LOCK))) {
+		/* first lookup without the bucket lock didn't find the element,
+		 * but second lookup with the bucket lock found it.
+		 * This case is highly unlikely, but has to be dealt with:
+		 * grab the element lock in addition to the bucket lock
+		 * and update element in place
+		 */
+		copy_map_value_locked(map,
+				      l_old->key + round_up(key_size, 8),
+				      value, false);
+		ret = 0;
+		goto err;
+	}
+
 	l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
 				l_old);
 	if (IS_ERR(l_new)) {
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index d6c7859e6939..356cb04fdc34 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -301,6 +301,22 @@ const struct bpf_func_proto bpf_spin_unlock_proto = {
 	.arg1_type	= ARG_PTR_TO_SPIN_LOCK,
 };
 
+void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
+			   bool lock_src)
+{
+	struct bpf_spin_lock *lock;
+
+	if (lock_src)
+		lock = src + map->spin_lock_off;
+	else
+		lock = dst + map->spin_lock_off;
+	preempt_disable();
+	____bpf_spin_lock(lock);
+	copy_map_value(map, dst, src);
+	____bpf_spin_unlock(lock);
+	preempt_enable();
+}
+
 #ifdef CONFIG_CGROUPS
 BPF_CALL_0(bpf_get_current_cgroup_id)
 {
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 0295427f06e2..6b572e2de7fb 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -131,7 +131,14 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
 	struct bpf_cgroup_storage *storage;
 	struct bpf_storage_buffer *new;
 
-	if (flags != BPF_ANY && flags != BPF_EXIST)
+	if (unlikely(flags & ~(BPF_F_LOCK | BPF_EXIST | BPF_NOEXIST)))
+		return -EINVAL;
+
+	if (unlikely(flags & BPF_NOEXIST))
+		return -EINVAL;
+
+	if (unlikely((flags & BPF_F_LOCK) &&
+		     !map_value_has_spin_lock(map)))
 		return -EINVAL;
 
 	storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,
@@ -139,6 +146,11 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
 	if (!storage)
 		return -ENOENT;
 
+	if (flags & BPF_F_LOCK) {
+		copy_map_value_locked(map, storage->buf->data, value, false);
+		return 0;
+	}
+
 	new = kmalloc_node(sizeof(struct bpf_storage_buffer) +
 			   map->value_size,
 			   __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 258184bf856d..7ddd79726778 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -683,7 +683,7 @@ static void *__bpf_copy_key(void __user *ukey, u64 key_size)
 }
 
 /* last field in 'union bpf_attr' used by this command */
-#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
+#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags
 
 static int map_lookup_elem(union bpf_attr *attr)
 {
@@ -699,6 +699,9 @@ static int map_lookup_elem(union bpf_attr *attr)
 	if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
 		return -EINVAL;
 
+	if (attr->flags & ~BPF_F_LOCK)
+		return -EINVAL;
+
 	f = fdget(ufd);
 	map = __bpf_map_get(f);
 	if (IS_ERR(map))
@@ -709,6 +712,12 @@ static int map_lookup_elem(union bpf_attr *attr)
 		goto err_put;
 	}
 
+	if ((attr->flags & BPF_F_LOCK) &&
+	    !map_value_has_spin_lock(map)) {
+		err = -EINVAL;
+		goto err_put;
+	}
+
 	key = __bpf_copy_key(ukey, map->key_size);
 	if (IS_ERR(key)) {
 		err = PTR_ERR(key);
@@ -768,7 +777,13 @@ static int map_lookup_elem(union bpf_attr *attr)
 			err = -ENOENT;
 		} else {
 			err = 0;
-			copy_map_value(map, value, ptr);
+			if (attr->flags & BPF_F_LOCK)
+				/* lock 'ptr' and copy everything but lock */
+				copy_map_value_locked(map, value, ptr, true);
+			else
+				copy_map_value(map, value, ptr);
+			/* mask lock, since value wasn't zero inited */
+			check_and_init_map_lock(map, value);
 		}
 		rcu_read_unlock();
 	}
@@ -831,6 +846,12 @@ static int map_update_elem(union bpf_attr *attr)
 		goto err_put;
 	}
 
+	if ((attr->flags & BPF_F_LOCK) &&
+	    !map_value_has_spin_lock(map)) {
+		err = -EINVAL;
+		goto err_put;
+	}
+
 	key = __bpf_copy_key(ukey, map->key_size);
 	if (IS_ERR(key)) {
 		err = PTR_ERR(key);
-- 
Gitee


From ea09ddd25f1417a4d0d31b531b502ce6a91b5b81 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 31 Jan 2019 15:40:10 -0800
Subject: [PATCH 19/23] tools/bpf: sync uapi/bpf.h

ANBZ: #5530

commit e44ac9a22b19eec92d10c7b7c69ad6f1995b4dcf upstream.

add BPF_F_LOCK definition to tools/include/uapi/linux/bpf.h

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Yuanhe Shu <xiangzao@linux.alibaba.com>
---
 tools/include/uapi/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index ebae3f382d46..33c85c56ecea 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -267,6 +267,7 @@ enum bpf_attach_type {
 #define BPF_ANY		0 /* create new element or update existing */
 #define BPF_NOEXIST	1 /* create new element if it didn't exist */
 #define BPF_EXIST	2 /* update existing element */
+#define BPF_F_LOCK	4 /* spin_lock-ed map_lookup/map_update */
 
 /* flags for BPF_MAP_CREATE command */
 #define BPF_F_NO_PREALLOC	(1U << 0)
-- 
Gitee


From 302088b82101561adfa95cbe98a6e86286930389 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 31 Jan 2019 15:40:11 -0800
Subject: [PATCH 20/23] libbpf: introduce bpf_map_lookup_elem_flags()

ANBZ: #5530

commit df5d22facd78e475da2e0d506f239e32cdffaf99 upstream.

Introduce
int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
helper to lookup array/hash/cgroup_local_storage elements with BPF_F_LOCK flag.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Yuanhe Shu <xiangzao@linux.alibaba.com>
---
 tools/lib/bpf/bpf.c      | 13 +++++++++++++
 tools/lib/bpf/bpf.h      |  2 ++
 tools/lib/bpf/libbpf.map |  1 +
 3 files changed, 16 insertions(+)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index d5d176303ca0..dabf8c9a8f8a 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -371,6 +371,19 @@ int bpf_map_lookup_elem(int fd, const void *key, void *value)
 	return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
 }
 
+int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
+{
+	union bpf_attr attr;
+
+	bzero(&attr, sizeof(attr));
+	attr.map_fd = fd;
+	attr.key = ptr_to_u64(key);
+	attr.value = ptr_to_u64(value);
+	attr.flags = flags;
+
+	return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+}
+
 int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
 {
 	union bpf_attr attr;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 64762a62c008..fd638495a7e5 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -111,6 +111,8 @@ LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value,
 				   __u64 flags);
 
 LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value);
+LIBBPF_API int bpf_map_lookup_elem_flags(int fd, const void *key, void *value,
+					 __u64 flags);
 LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
 					      void *value);
 LIBBPF_API int bpf_map_delete_elem(int fd, const void *key);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 266bc95d0142..f6f96fc38c50 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -130,4 +130,5 @@ LIBBPF_0.0.2 {
 		bpf_probe_helper;
 		bpf_probe_map_type;
 		bpf_probe_prog_type;
+		bpf_map_lookup_elem_flags;
 } LIBBPF_0.0.1;
-- 
Gitee


From afadb51481a334664bfd3b14e483ea40a010b28d Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 31 Jan 2019 15:40:12 -0800
Subject: [PATCH 21/23] selftests/bpf: test for BPF_F_LOCK

ANBZ: #5530

commit ba72a7b4badbf4dd3c49c585c3c662bacc54f46e upstream.

Add C based test that runs 4 bpf programs in parallel
that update the same hash and array maps.
And another 2 threads that read from these two maps
via lookup(key, value, BPF_F_LOCK) api
to make sure the user space sees consistent value in both
hash and array elements while user space races with kernel bpf progs.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Yuanhe Shu <xiangzao@linux.alibaba.com>
---
 tools/testing/selftests/bpf/Makefile        |  2 +-
 tools/testing/selftests/bpf/test_map_lock.c | 66 ++++++++++++++++++
 tools/testing/selftests/bpf/test_progs.c    | 74 +++++++++++++++++++++
 3 files changed, 141 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/test_map_lock.c

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 1d02c27d4df0..7438233d0681 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -35,7 +35,7 @@ BPF_OBJ_FILES = \
 	sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
 	get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
 	test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o \
-	test_map_in_map.o test_spin_lock.o
+	test_map_in_map.o test_spin_lock.o test_map_lock.o
 
 # Objects are built with default compilation flags and with sub-register
 # code-gen enabled.
diff --git a/tools/testing/selftests/bpf/test_map_lock.c b/tools/testing/selftests/bpf/test_map_lock.c
new file mode 100644
index 000000000000..af8cc68ed2f9
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_map_lock.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include <linux/version.h>
+#include "bpf_helpers.h"
+
+#define VAR_NUM 16
+
+struct hmap_elem {
+	struct bpf_spin_lock lock;
+	int var[VAR_NUM];
+};
+
+struct bpf_map_def SEC("maps") hash_map = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(int),
+	.value_size = sizeof(struct hmap_elem),
+	.max_entries = 1,
+};
+
+BPF_ANNOTATE_KV_PAIR(hash_map, int, struct hmap_elem);
+
+struct array_elem {
+	struct bpf_spin_lock lock;
+	int var[VAR_NUM];
+};
+
+struct bpf_map_def SEC("maps") array_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(struct array_elem),
+	.max_entries = 1,
+};
+
+BPF_ANNOTATE_KV_PAIR(array_map, int, struct array_elem);
+
+SEC("map_lock_demo")
+int bpf_map_lock_test(struct __sk_buff *skb)
+{
+	struct hmap_elem zero = {}, *val;
+	int rnd = bpf_get_prandom_u32();
+	int key = 0, err = 1, i;
+	struct array_elem *q;
+
+	val = bpf_map_lookup_elem(&hash_map, &key);
+	if (!val)
+		goto err;
+	/* spin_lock in hash map */
+	bpf_spin_lock(&val->lock);
+	for (i = 0; i < VAR_NUM; i++)
+		val->var[i] = rnd;
+	bpf_spin_unlock(&val->lock);
+
+	/* spin_lock in array */
+	q = bpf_map_lookup_elem(&array_map, &key);
+	if (!q)
+		goto err;
+	bpf_spin_lock(&q->lock);
+	for (i = 0; i < VAR_NUM; i++)
+		q->var[i] = rnd;
+	bpf_spin_unlock(&q->lock);
+	err = 0;
+err:
+	return err;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 5a32914f072a..2177c349d16d 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -2025,6 +2025,79 @@ static void test_spinlock(void)
 	bpf_object__close(obj);
 }
 
+static void *parallel_map_access(void *arg)
+{
+	int err, map_fd = *(u32 *) arg;
+	int vars[17], i, j, rnd, key = 0;
+
+	for (i = 0; i < 10000; i++) {
+		err = bpf_map_lookup_elem_flags(map_fd, &key, vars, BPF_F_LOCK);
+		if (err) {
+			printf("lookup failed\n");
+			error_cnt++;
+			goto out;
+		}
+		if (vars[0] != 0) {
+			printf("lookup #%d var[0]=%d\n", i, vars[0]);
+			error_cnt++;
+			goto out;
+		}
+		rnd = vars[1];
+		for (j = 2; j < 17; j++) {
+			if (vars[j] == rnd)
+				continue;
+			printf("lookup #%d var[1]=%d var[%d]=%d\n",
+			       i, rnd, j, vars[j]);
+			error_cnt++;
+			goto out;
+		}
+	}
+out:
+	pthread_exit(arg);
+}
+
+static void test_map_lock(void)
+{
+	const char *file = "./test_map_lock.o";
+	int prog_fd, map_fd[2], vars[17] = {};
+	pthread_t thread_id[6];
+	struct bpf_object *obj;
+	int err = 0, key = 0, i;
+	void *ret;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
+	if (err) {
+		printf("test_map_lock:bpf_prog_load errno %d\n", errno);
+		goto close_prog;
+	}
+	map_fd[0] = bpf_find_map(__func__, obj, "hash_map");
+	if (map_fd[0] < 0)
+		goto close_prog;
+	map_fd[1] = bpf_find_map(__func__, obj, "array_map");
+	if (map_fd[1] < 0)
+		goto close_prog;
+
+	bpf_map_update_elem(map_fd[0], &key, vars, BPF_F_LOCK);
+
+	for (i = 0; i < 4; i++)
+		assert(pthread_create(&thread_id[i], NULL,
+				      &test_spin_lock, &prog_fd) == 0);
+	for (i = 4; i < 6; i++)
+		assert(pthread_create(&thread_id[i], NULL,
+				      &parallel_map_access, &map_fd[i - 4]) == 0);
+	for (i = 0; i < 4; i++)
+		assert(pthread_join(thread_id[i], &ret) == 0 &&
+		       ret == (void *)&prog_fd);
+	for (i = 4; i < 6; i++)
+		assert(pthread_join(thread_id[i], &ret) == 0 &&
+		       ret == (void *)&map_fd[i - 4]);
+	goto close_prog_noerr;
+close_prog:
+	error_cnt++;
+close_prog_noerr:
+	bpf_object__close(obj);
+}
+
 int main(void)
 {
 	srand(time(NULL));
@@ -2054,6 +2127,7 @@ int main(void)
 	test_queue_stack_map(STACK);
 	test_flow_dissector();
 	test_spinlock();
+	test_map_lock();
 
 	printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
 	return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
-- 
Gitee


From 82c3cceffd3c975c0e358242344e0fc27d3a8cf7 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 27 Feb 2019 13:22:56 -0800
Subject: [PATCH 22/23] bpf: set inner_map_meta->spin_lock_off correctly

ANBZ: #5530

commit a115d0ed7201a5904c084ae6f07913fe2b9396a6 upstream.

Commit d83525ca62cf ("bpf: introduce bpf_spin_lock")
introduced bpf_spin_lock and the field spin_lock_off
in kernel internal structure bpf_map has the following
meaning:
  >=0 valid offset, <0 error

For every map created, the kernel will ensure
spin_lock_off has correct value.

Currently, bpf_map->spin_lock_off is not copied
from the inner map to the map_in_map inner_map_meta
during a map_in_map type map creation, so
inner_map_meta->spin_lock_off = 0.
This will give verifier wrong information that
inner_map has bpf_spin_lock and the bpf_spin_lock
is defined at offset 0. An access to offset 0
of a value pointer will trigger the following error:
   bpf_spin_lock cannot be accessed directly by load/store

This patch fixed the issue by copy inner map's spin_lock_off
value to inner_map_meta->spin_lock_off.

Fixes: d83525ca62cf ("bpf: introduce bpf_spin_lock")
Signed-off-by: Yonghong Song <yhs@fb.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Yuanhe Shu <xiangzao@linux.alibaba.com>
---
 kernel/bpf/map_in_map.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index 583346a0ab29..3dff41403583 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -58,6 +58,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
 	inner_map_meta->value_size = inner_map->value_size;
 	inner_map_meta->map_flags = inner_map->map_flags;
 	inner_map_meta->max_entries = inner_map->max_entries;
+	inner_map_meta->spin_lock_off = inner_map->spin_lock_off;
 
 	/* Misc members not needed in bpf_map_meta_equal() check. */
 	inner_map_meta->ops = inner_map->ops;
-- 
Gitee


From a517d5c5fe1f40e0cccfe51e79a6c3831dac4508 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 11 Mar 2019 22:21:09 -0700
Subject: [PATCH 23/23] selftests/bpf: fix segfault of test_progs when prog
 loading failed

ANBZ: #5530

commit d6f1837107c00fa7b2fdb606acf65b33b455dbfd upstream.

The test_progs subtests, test_spin_lock() and test_map_lock(),
requires BTF present to run successfully.
Currently, when BTF failed to load, test_progs will segfault,
  $ ./test_progs
  ...
  12: (bf) r1 = r8
  13: (85) call bpf_spin_lock#93
  map 'hash_map' has to have BTF in order to use bpf_spin_lock
  libbpf: -- END LOG --
  libbpf: failed to load program 'map_lock_demo'
  libbpf: failed to load object './test_map_lock.o'
  test_map_lock:bpf_prog_load errno 13
  Segmentation fault
The segfault is caused by uninitialized variable "obj", which
is used in bpf_object__close(obj), when bpf prog failed to load.
Initializing variable "obj" to NULL in two occasions fixed the problem.
  $ ./test_progs
  ...
  Summary: 219 PASSED, 2 FAILED
Fixes: b4d4556c3266 ("selftests/bpf: add bpf_spin_lock verifier tests")
Fixes: ba72a7b4badb ("selftests/bpf: test for BPF_F_LOCK")
Reported-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Yonghong Song <yhs@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Yuanhe Shu <xiangzao@linux.alibaba.com>
---
 tools/testing/selftests/bpf/test_progs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 2177c349d16d..6519fc92b2a8 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -2002,7 +2002,7 @@ static void test_spinlock(void)
 {
 	const char *file = "./test_spin_lock.o";
 	pthread_t thread_id[4];
-	struct bpf_object *obj;
+	struct bpf_object *obj = NULL;
 	int prog_fd;
 	int err = 0, i;
 	void *ret;
@@ -2061,7 +2061,7 @@ static void test_map_lock(void)
 	const char *file = "./test_map_lock.o";
 	int prog_fd, map_fd[2], vars[17] = {};
 	pthread_t thread_id[6];
-	struct bpf_object *obj;
+	struct bpf_object *obj = NULL;
 	int err = 0, key = 0, i;
 	void *ret;
 
-- 
Gitee