From c36e7fdce525808b0f85e6f71836380f18a6db4d Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 27 Jun 2019 20:50:47 -0500 Subject: [PATCH 01/27] bpf: Fix ORC unwinding in non-JIT BPF code ANBZ: #5530 commit e55a73251da335873a6e87d68fb17e5aabb8978e upstream. Objtool previously ignored ___bpf_prog_run() because it didn't understand the jump table. This resulted in the ORC unwinder not being able to unwind through non-JIT BPF code. Now that objtool knows how to read jump tables, remove the whitelist and annotate the jump table so objtool can recognize it. Also add an additional "const" to the jump table definition to clarify that the text pointers are constant. Otherwise GCC sets the section writable flag and the assembler spits out warnings. Fixes: d15d356887e7 ("perf/x86: Make perf callchains work without CONFIG_FRAME_POINTER") Reported-by: Song Liu Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Acked-by: Alexei Starovoitov Cc: Peter Zijlstra Cc: Kairui Song Cc: Steven Rostedt Cc: Borislav Petkov Cc: Daniel Borkmann Link: https://lkml.kernel.org/r/881939122b88f32be4c374d248c09d7527a87e35.1561685471.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Yuanhe Shu --- kernel/bpf/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 6c95fcdc354e..9b61d82858a7 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1305,7 +1305,7 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) { #define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y #define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z - static const void *jumptable[256] = { + static const void * const jumptable[256] __annotate_jump_table = { [0 ... 255] = &&default_label, /* Now overwrite non-defaults ... */ BPF_INSN_MAP(BPF_INSN_2_LBL, BPF_INSN_3_LBL), @@ -1565,7 +1565,6 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) BUG_ON(1); return 0; } -STACK_FRAME_NON_STANDARD(___bpf_prog_run); /* jump table */ #define PROG_NAME(stack_size) __bpf_prog_run##stack_size #define DEFINE_BPF_PROG_RUN(stack_size) \ -- Gitee From be89c00111755e6f4ca02319a034cebfe0c556cb Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Jul 2019 20:36:45 -0500 Subject: [PATCH 02/27] bpf: Disable GCC -fgcse optimization for ___bpf_prog_run() ANBZ: #5530 commit 3193c0836f203a91bef96d88c64cccf0be090d9c upstream. On x86-64, with CONFIG_RETPOLINE=n, GCC's "global common subexpression elimination" optimization results in ___bpf_prog_run()'s jumptable code changing from this: select_insn: jmp *jumptable(, %rax, 8) ... ALU64_ADD_X: ... jmp *jumptable(, %rax, 8) ALU_ADD_X: ... jmp *jumptable(, %rax, 8) to this: select_insn: mov jumptable, %r12 jmp *(%r12, %rax, 8) ... ALU64_ADD_X: ... jmp *(%r12, %rax, 8) ALU_ADD_X: ... jmp *(%r12, %rax, 8) The jumptable address is placed in a register once, at the beginning of the function. The function execution can then go through multiple indirect jumps which rely on that same register value. This has a few issues: 1) Objtool isn't smart enough to be able to track such a register value across multiple recursive indirect jumps through the jump table. 2) With CONFIG_RETPOLINE enabled, this optimization actually results in a small slowdown. I measured a ~4.7% slowdown in the test_bpf "tcpdump port 22" selftest. This slowdown is actually predicted by the GCC manual: Note: When compiling a program using computed gotos, a GCC extension, you may get better run-time performance if you disable the global common subexpression elimination pass by adding -fno-gcse to the command line. So just disable the optimization for this function. Fixes: e55a73251da3 ("bpf: Fix ORC unwinding in non-JIT BPF code") Reported-by: Randy Dunlap Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Acked-by: Alexei Starovoitov Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/30c3ca29ba037afcbd860a8672eef0021addf9fe.1563413318.git.jpoimboe@redhat.com Signed-off-by: Yuanhe Shu --- include/linux/compiler-gcc.h | 2 ++ include/linux/compiler_types.h | 4 ++++ kernel/bpf/core.c | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 3ebee1ce6f98..6c37c449dd20 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -234,3 +234,5 @@ #else #define __diag_GCC_8(s) #endif + +#define __no_fgcse __attribute__((optimize("-fno-gcse"))) diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 73389cc2f5bb..a3868e90a662 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -159,6 +159,10 @@ struct ftrace_likely_data { #define asm_volatile_goto(x...) asm goto(x) #endif +#ifndef __no_fgcse +# define __no_fgcse +#endif + /* Are two types/vars the same type (ignoring qualifiers)? */ #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 9b61d82858a7..4dd51765f7de 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1301,7 +1301,7 @@ bool bpf_opcode_in_insntable(u8 code) * * Decode and execute eBPF instructions. */ -static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) +static u64 __no_fgcse ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) { #define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y #define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z -- Gitee From 71da70c13f7401e263adcd6dad7438464243ae3b Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 27 Jun 2019 20:50:46 -0500 Subject: [PATCH 03/27] objtool: Add support for C jump tables ANBZ: #5530 commit 87b512def792579641499d9bef1d640994ea9c18 upstream. Objtool doesn't know how to read C jump tables, so it has to whitelist functions which use them, causing missing ORC unwinder data for such functions, e.g. ___bpf_prog_run(). C jump tables are very similar to GCC switch jump tables, which objtool already knows how to read. So adding support for C jump tables is easy. It just needs to be able to find the tables and distinguish them from other data. To allow the jump tables to be found, create an __annotate_jump_table macro which can be used to annotate them. The annotation is done by placing the jump table in an .rodata..c_jump_table section. The '.rodata' prefix ensures that the data will be placed in the rodata section by the vmlinux linker script. The double periods are part of an existing convention which distinguishes kernel sections from GCC sections. [backport note] Changes in include/linux/compiler.h has been introduced by d7c65f1f9bb5 ("compiler.h: Move instrumentation_begin()/end() to new header") Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Song Liu Cc: Kairui Song Cc: Steven Rostedt Cc: Borislav Petkov Cc: Alexei Starovoitov Cc: Daniel Borkmann Link: https://lkml.kernel.org/r/0ba2ca30442b16b97165992381ce643dc27b3d1a.1561685471.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Yuanhe Shu --- tools/objtool/check.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 89a1a34fffb1..c8c7d6d17386 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -30,6 +30,8 @@ #define FAKE_JUMP_OFFSET -1 +#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table" + struct alternative { struct list_head list; struct instruction *insn; @@ -970,9 +972,15 @@ static struct rela *find_switch_table(struct objtool_file *file, /* * Make sure the .rodata address isn't associated with a - * symbol. gcc jump tables are anonymous data. + * symbol. GCC jump tables are anonymous data. + * + * Also support C jump tables which are in the same format as + * switch jump tables. For objtool to recognize them, they + * need to be placed in the C_JUMP_TABLE_SECTION section. They + * have symbols associated with them. */ - if (find_symbol_containing(rodata_sec, table_offset)) + if (find_symbol_containing(rodata_sec, table_offset) && + strcmp(rodata_sec->name, C_JUMP_TABLE_SECTION)) continue; rodata_rela = find_rela_by_dest(rodata_sec, table_offset); @@ -1212,13 +1220,18 @@ static void mark_rodata(struct objtool_file *file) bool found = false; /* - * This searches for the .rodata section or multiple .rodata.func_name - * sections if -fdata-sections is being used. The .str.1.1 and .str.1.8 - * rodata sections are ignored as they don't contain jump tables. + * Search for the following rodata sections, each of which can + * potentially contain jump tables: + * + * - .rodata: can contain GCC switch tables + * - .rodata.: same, if -fdata-sections is being used + * - .rodata..c_jump_table: contains C annotated jump tables + * + * .rodata.str1.* sections are ignored; they don't contain jump tables. */ for_each_sec(file, sec) { - if (!strncmp(sec->name, ".rodata", 7) && - !strstr(sec->name, ".str1.")) { + if ((!strncmp(sec->name, ".rodata", 7) && !strstr(sec->name, ".str1.")) || + !strcmp(sec->name, C_JUMP_TABLE_SECTION)) { sec->rodata = true; found = true; } -- Gitee From d0b188d5ca0bec0c773e18fb1dcc9d64ca46991a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Jul 2019 20:36:53 -0500 Subject: [PATCH 04/27] objtool: Refactor jump table code ANBZ: #5530 commit e7c2bc37bfae120bce3e7cc8c8abf9d110af0757 upstream. Now that C jump tables are supported, call them "jump tables" instead of "switch tables". Also rename some other variables, add comments, and simplify the code flow a bit. Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Tested-by: Nick Desaulniers Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/cf951b0c0641628e0b9b81f7ceccd9bcabcb4bd8.1563413318.git.jpoimboe@redhat.com Signed-off-by: Yuanhe Shu --- tools/objtool/check.c | 82 +++++++++++++++++++++++-------------------- tools/objtool/elf.c | 2 +- tools/objtool/elf.h | 2 +- 3 files changed, 46 insertions(+), 40 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index c8c7d6d17386..ab28687b7407 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -563,7 +563,7 @@ static int add_jump_destinations(struct objtool_file *file) * However this code can't completely replace the * read_symbols() code because this doesn't detect the case * where the parent function's only reference to a subfunction - * is through a switch table. + * is through a jump table. */ if (insn->func && insn->jump_dest->func && insn->func != insn->jump_dest->func && @@ -833,20 +833,24 @@ static int add_special_section_alts(struct objtool_file *file) return ret; } -static int add_switch_table(struct objtool_file *file, struct instruction *insn, +static int add_jump_table(struct objtool_file *file, struct instruction *insn, struct rela *table, struct rela *next_table) { struct rela *rela = table; - struct instruction *alt_insn; + struct instruction *dest_insn; struct alternative *alt; struct symbol *pfunc = insn->func->pfunc; unsigned int prev_offset = 0; - list_for_each_entry_from(rela, &table->rela_sec->rela_list, list) { + /* + * Each @rela is a switch table relocation which points to the target + * instruction. + */ + list_for_each_entry_from(rela, &table->sec->rela_list, list) { if (rela == next_table) break; - /* Make sure the switch table entries are consecutive: */ + /* Make sure the table entries are consecutive: */ if (prev_offset && rela->offset != prev_offset + 8) break; @@ -855,12 +859,12 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn, rela->addend == pfunc->offset) break; - alt_insn = find_insn(file, rela->sym->sec, rela->addend); - if (!alt_insn) + dest_insn = find_insn(file, rela->sym->sec, rela->addend); + if (!dest_insn) break; - /* Make sure the jmp dest is in the function or subfunction: */ - if (alt_insn->func->pfunc != pfunc) + /* Make sure the destination is in the same function: */ + if (dest_insn->func->pfunc != pfunc) break; alt = malloc(sizeof(*alt)); @@ -869,7 +873,7 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn, return -1; } - alt->insn = alt_insn; + alt->insn = dest_insn; list_add_tail(&alt->list, &insn->alts); prev_offset = rela->offset; } @@ -884,7 +888,7 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn, } /* - * find_switch_table() - Given a dynamic jump, find the switch jump table in + * find_jump_table() - Given a dynamic jump, find the switch jump table in * .rodata associated with it. * * There are 3 basic patterns: @@ -926,13 +930,13 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn, * * NOTE: RETPOLINE made it harder still to decode dynamic jumps. */ -static struct rela *find_switch_table(struct objtool_file *file, +static struct rela *find_jump_table(struct objtool_file *file, struct symbol *func, struct instruction *insn) { - struct rela *text_rela, *rodata_rela; + struct rela *text_rela, *table_rela; struct instruction *orig_insn = insn; - struct section *rodata_sec; + struct section *table_sec; unsigned long table_offset; /* @@ -965,7 +969,7 @@ static struct rela *find_switch_table(struct objtool_file *file, continue; table_offset = text_rela->addend; - rodata_sec = text_rela->sym->sec; + table_sec = text_rela->sym->sec; if (text_rela->type == R_X86_64_PC32) table_offset += 4; @@ -979,29 +983,31 @@ static struct rela *find_switch_table(struct objtool_file *file, * need to be placed in the C_JUMP_TABLE_SECTION section. They * have symbols associated with them. */ - if (find_symbol_containing(rodata_sec, table_offset) && - strcmp(rodata_sec->name, C_JUMP_TABLE_SECTION)) + if (find_symbol_containing(table_sec, table_offset) && + strcmp(table_sec->name, C_JUMP_TABLE_SECTION)) continue; - rodata_rela = find_rela_by_dest(rodata_sec, table_offset); - if (rodata_rela) { - /* - * Use of RIP-relative switch jumps is quite rare, and - * indicates a rare GCC quirk/bug which can leave dead - * code behind. - */ - if (text_rela->type == R_X86_64_PC32) - file->ignore_unreachables = true; + /* Each table entry has a rela associated with it. */ + table_rela = find_rela_by_dest(table_sec, table_offset); + if (!table_rela) + continue; - return rodata_rela; - } + /* + * Use of RIP-relative switch jumps is quite rare, and + * indicates a rare GCC quirk/bug which can leave dead code + * behind. + */ + if (text_rela->type == R_X86_64_PC32) + file->ignore_unreachables = true; + + return table_rela; } return NULL; } -static int add_func_switch_tables(struct objtool_file *file, +static int add_func_jump_tables(struct objtool_file *file, struct symbol *func) { struct instruction *insn, *last = NULL, *prev_jump = NULL; @@ -1014,7 +1020,7 @@ static int add_func_switch_tables(struct objtool_file *file, /* * Store back-pointers for unconditional forward jumps such - * that find_switch_table() can back-track using those and + * that find_jump_table() can back-track using those and * avoid some potentially confusing code. */ if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest && @@ -1029,17 +1035,17 @@ static int add_func_switch_tables(struct objtool_file *file, if (insn->type != INSN_JUMP_DYNAMIC) continue; - rela = find_switch_table(file, func, insn); + rela = find_jump_table(file, func, insn); if (!rela) continue; /* - * We found a switch table, but we don't know yet how big it + * We found a jump table, but we don't know yet how big it * is. Don't add it until we reach the end of the function or - * the beginning of another switch table in the same function. + * the beginning of another jump table in the same function. */ if (prev_jump) { - ret = add_switch_table(file, prev_jump, prev_rela, rela); + ret = add_jump_table(file, prev_jump, prev_rela, rela); if (ret) return ret; } @@ -1049,7 +1055,7 @@ static int add_func_switch_tables(struct objtool_file *file, } if (prev_jump) { - ret = add_switch_table(file, prev_jump, prev_rela, NULL); + ret = add_jump_table(file, prev_jump, prev_rela, NULL); if (ret) return ret; } @@ -1062,7 +1068,7 @@ static int add_func_switch_tables(struct objtool_file *file, * section which contains a list of addresses within the function to jump to. * This finds these jump tables and adds them to the insn->alts lists. */ -static int add_switch_table_alts(struct objtool_file *file) +static int add_jump_table_alts(struct objtool_file *file) { struct section *sec; struct symbol *func; @@ -1076,7 +1082,7 @@ static int add_switch_table_alts(struct objtool_file *file) if (func->type != STT_FUNC) continue; - ret = add_func_switch_tables(file, func); + ret = add_func_jump_tables(file, func); if (ret) return ret; } @@ -1272,7 +1278,7 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - ret = add_switch_table_alts(file); + ret = add_jump_table_alts(file); if (ret) return ret; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 264d49fea814..90b0785c66c5 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -393,7 +393,7 @@ static int read_relas(struct elf *elf) rela->offset = rela->rela.r_offset; symndx = GELF_R_SYM(rela->rela.r_info); rela->sym = find_symbol_by_index(elf, symndx); - rela->rela_sec = sec; + rela->sec = sec; if (!rela->sym) { WARN("can't find rela entry symbol %d for %s", symndx, sec->name); diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index bc97ed86b9cd..fb1938a176b7 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -68,7 +68,7 @@ struct rela { struct list_head list; struct hlist_node hash; GElf_Rela rela; - struct section *rela_sec; + struct section *sec; struct symbol *sym; unsigned int type; unsigned long offset; -- Gitee From 1d3e81c10e94c633c549f5fd8497c3444c12ec5f Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 17 Jul 2019 20:36:54 -0500 Subject: [PATCH 05/27] objtool: Support repeated uses of the same C jump table ANBZ: #5530 commit bd98c81346468fc2f86aeeb44d4d0d6f763a62b7 upstream. This fixes objtool for both a GCC issue and a Clang issue: 1) GCC issue: kernel/bpf/core.o: warning: objtool: ___bpf_prog_run()+0x8d5: sibling call from callable instruction with modified stack frame With CONFIG_RETPOLINE=n, GCC is doing the following optimization in ___bpf_prog_run(). Before: select_insn: jmp *jumptable(,%rax,8) ... ALU64_ADD_X: ... jmp select_insn ALU_ADD_X: ... jmp select_insn After: select_insn: jmp *jumptable(, %rax, 8) ... ALU64_ADD_X: ... jmp *jumptable(, %rax, 8) ALU_ADD_X: ... jmp *jumptable(, %rax, 8) This confuses objtool. It has never seen multiple indirect jump sites which use the same jump table. For GCC switch tables, the only way of detecting the size of a table is by continuing to scan for more tables. The size of the previous table can only be determined after another switch table is found, or when the scan reaches the end of the function. That logic was reused for C jump tables, and was based on the assumption that each jump table only has a single jump site. The above optimization breaks that assumption. 2) Clang issue: drivers/usb/misc/sisusbvga/sisusb.o: warning: objtool: sisusb_write_mem_bulk()+0x588: can't find switch jump table With clang 9, code can be generated where a function contains two indirect jump instructions which use the same switch table. The fix is the same for both issues: split the jump table parsing into two passes. In the first pass, locate the heads of all switch tables for the function and mark their locations. In the second pass, parse the switch tables and add them. Fixes: e55a73251da3 ("bpf: Fix ORC unwinding in non-JIT BPF code") Reported-by: Randy Dunlap Reported-by: Arnd Bergmann Signed-off-by: Jann Horn Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Tested-by: Nick Desaulniers Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/e995befaada9d4d8b2cf788ff3f566ba900d2b4d.1563413318.git.jpoimboe@redhat.com Co-developed-by: Josh Poimboeuf Signed-off-by: Yuanhe Shu --- tools/objtool/check.c | 53 +++++++++++++++++++++++-------------------- tools/objtool/check.h | 1 + tools/objtool/elf.h | 1 + 3 files changed, 30 insertions(+), 25 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index ab28687b7407..15013e0fa018 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -834,7 +834,7 @@ static int add_special_section_alts(struct objtool_file *file) } static int add_jump_table(struct objtool_file *file, struct instruction *insn, - struct rela *table, struct rela *next_table) + struct rela *table) { struct rela *rela = table; struct instruction *dest_insn; @@ -847,7 +847,9 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn, * instruction. */ list_for_each_entry_from(rela, &table->sec->rela_list, list) { - if (rela == next_table) + + /* Check for the end of the table: */ + if (rela != table && rela->jump_table_start) break; /* Make sure the table entries are consecutive: */ @@ -1006,13 +1008,15 @@ static struct rela *find_jump_table(struct objtool_file *file, return NULL; } - -static int add_func_jump_tables(struct objtool_file *file, - struct symbol *func) +/* + * First pass: Mark the head of each jump table so that in the next pass, + * we know when a given jump table ends and the next one starts. + */ +static void mark_func_jump_tables(struct objtool_file *file, + struct symbol *func) { - struct instruction *insn, *last = NULL, *prev_jump = NULL; - struct rela *rela, *prev_rela = NULL; - int ret; + struct instruction *insn, *last = NULL; + struct rela *rela; func_for_each_insn_all(file, func, insn) { if (!last) @@ -1036,26 +1040,24 @@ static int add_func_jump_tables(struct objtool_file *file, continue; rela = find_jump_table(file, func, insn); - if (!rela) - continue; - - /* - * We found a jump table, but we don't know yet how big it - * is. Don't add it until we reach the end of the function or - * the beginning of another jump table in the same function. - */ - if (prev_jump) { - ret = add_jump_table(file, prev_jump, prev_rela, rela); - if (ret) - return ret; + if (rela) { + rela->jump_table_start = true; + insn->jump_table = rela; } - - prev_jump = insn; - prev_rela = rela; } +} + +static int add_func_jump_tables(struct objtool_file *file, + struct symbol *func) +{ + struct instruction *insn; + int ret; + + func_for_each_insn_all(file, func, insn) { + if (!insn->jump_table) + continue; - if (prev_jump) { - ret = add_jump_table(file, prev_jump, prev_rela, NULL); + ret = add_jump_table(file, insn, insn->jump_table); if (ret) return ret; } @@ -1082,6 +1084,7 @@ static int add_jump_table_alts(struct objtool_file *file) if (func->type != STT_FUNC) continue; + mark_func_jump_tables(file, func); ret = add_func_jump_tables(file, func); if (ret) return ret; diff --git a/tools/objtool/check.h b/tools/objtool/check.h index e6e8a655b556..98ee4175a802 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -49,6 +49,7 @@ struct instruction { struct symbol *call_dest; struct instruction *jump_dest; struct instruction *first_jump_src; + struct rela *jump_table; struct list_head alts; struct symbol *func; struct stack_op stack_op; diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index fb1938a176b7..1082e310e617 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -73,6 +73,7 @@ struct rela { unsigned int type; unsigned long offset; int addend; + bool jump_table_start; }; struct elf { -- Gitee From 681cf197580403868db096d8356e1b0e6cbae22c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 28 Oct 2020 18:15:05 +0100 Subject: [PATCH 06/27] bpf: Don't rely on GCC __attribute__((optimize)) to disable GCSE ANBZ: #5530 commit 080b6f40763565f65ebb9540219c71ce885cf568 upstream. Commit 3193c0836 ("bpf: Disable GCC -fgcse optimization for ___bpf_prog_run()") introduced a __no_fgcse macro that expands to a function scope __attribute__((optimize("-fno-gcse"))), to disable a GCC specific optimization that was causing trouble on x86 builds, and was not expected to have any positive effect in the first place. However, as the GCC manual documents, __attribute__((optimize)) is not for production use, and results in all other optimization options to be forgotten for the function in question. This can cause all kinds of trouble, but in one particular reported case, it causes -fno-asynchronous-unwind-tables to be disregarded, resulting in .eh_frame info to be emitted for the function. This reverts commit 3193c0836, and instead, it disables the -fgcse optimization for the entire source file, but only when building for X86 using GCC with CONFIG_BPF_JIT_ALWAYS_ON disabled. Note that the original commit states that CONFIG_RETPOLINE=n triggers the issue, whereas CONFIG_RETPOLINE=y performs better without the optimization, so it is kept disabled in both cases. Fixes: 3193c0836f20 ("bpf: Disable GCC -fgcse optimization for ___bpf_prog_run()") Signed-off-by: Ard Biesheuvel Signed-off-by: Alexei Starovoitov Tested-by: Geert Uytterhoeven Reviewed-by: Nick Desaulniers Link: https://lore.kernel.org/lkml/CAMuHMdUg0WJHEcq6to0-eODpXPOywLot6UD2=GFHpzoj_hCoBQ@mail.gmail.com/ Link: https://lore.kernel.org/bpf/20201028171506.15682-2-ardb@kernel.org Signed-off-by: Yuanhe Shu --- include/linux/compiler-gcc.h | 2 -- include/linux/compiler_types.h | 4 ---- kernel/bpf/Makefile | 5 +++++ kernel/bpf/core.c | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 6c37c449dd20..3ebee1ce6f98 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -234,5 +234,3 @@ #else #define __diag_GCC_8(s) #endif - -#define __no_fgcse __attribute__((optimize("-fno-gcse"))) diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index a3868e90a662..73389cc2f5bb 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -159,10 +159,6 @@ struct ftrace_likely_data { #define asm_volatile_goto(x...) asm goto(x) #endif -#ifndef __no_fgcse -# define __no_fgcse -#endif - /* Are two types/vars the same type (ignoring qualifiers)? */ #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 29d781061cd5..df3dc3b80597 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -1,6 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 obj-y := core.o CFLAGS_core.o += $(call cc-disable-warning, override-init) +ifneq ($(CONFIG_BPF_JIT_ALWAYS_ON),y) +# ___bpf_prog_run() needs GCSE disabled on x86; see 3193c0836f203 for details +cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse +endif +CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 4dd51765f7de..9b61d82858a7 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1301,7 +1301,7 @@ bool bpf_opcode_in_insntable(u8 code) * * Decode and execute eBPF instructions. */ -static u64 __no_fgcse ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) +static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) { #define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y #define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z -- Gitee From 66dfe0556417da7e39dbc745faa4881eaec5a3cb Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 1 Jul 2019 10:38:39 -0700 Subject: [PATCH 07/27] bpf: allow wide (u64) aligned stores for some fields of bpf_sock_addr ANBZ: #5530 commit 600c70bad6594cb124c641ed05355ca134650ea4 upstream. Since commit cd17d7770578 ("bpf/tools: sync bpf.h") clang decided that it can do a single u64 store into user_ip6[2] instead of two separate u32 ones: # 17: (18) r2 = 0x100000000000000 # ; ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2); # 19: (7b) *(u64 *)(r1 +16) = r2 # invalid bpf_context access off=16 size=8 >From the compiler point of view it does look like a correct thing to do, so let's support it on the kernel side. Credit to Andrii Nakryiko for a proper implementation of bpf_ctx_wide_store_ok. Cc: Andrii Nakryiko Cc: Yonghong Song Fixes: cd17d7770578 ("bpf/tools: sync bpf.h") Reported-by: kernel test robot Acked-by: Yonghong Song Acked-by: Andrii Nakryiko Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Signed-off-by: Yuanhe Shu --- include/linux/filter.h | 6 ++++++ include/uapi/linux/bpf.h | 6 +++--- net/core/filter.c | 22 ++++++++++++++-------- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 1804e532d971..834aa6e011dc 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -777,6 +777,12 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default) return size <= size_default && (size & (size - 1)) == 0; } +#define bpf_ctx_wide_store_ok(off, size, type, field) \ + (size == sizeof(__u64) && \ + off >= offsetof(type, field) && \ + off + sizeof(__u64) <= offsetofend(type, field) && \ + off % sizeof(__u64) == 0) + #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0])) static inline void bpf_prog_lock_ro(struct bpf_prog *fp) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1c76720b14b4..74537672710d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3219,7 +3219,7 @@ struct bpf_sock_addr { __u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write. * Stored in network byte order. */ - __u32 user_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write. + __u32 user_ip6[4]; /* Allows 1,2,4-byte read and 4,8-byte write. * Stored in network byte order. */ __u32 user_port; /* Allows 4-byte read and write. @@ -3228,10 +3228,10 @@ struct bpf_sock_addr { __u32 family; /* Allows 4-byte read, but no write */ __u32 type; /* Allows 4-byte read, but no write */ __u32 protocol; /* Allows 4-byte read, but no write */ - __u32 msg_src_ip4; /* Allows 1,2,4-byte read an 4-byte write. + __u32 msg_src_ip4; /* Allows 1,2,4-byte read and 4-byte write. * Stored in network byte order. */ - __u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write. + __u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read and 4,8-byte write. * Stored in network byte order. */ __bpf_md_ptr(struct bpf_sock *, sk); diff --git a/net/core/filter.c b/net/core/filter.c index a64ebef1b5d4..9ac249893734 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6805,6 +6805,16 @@ static bool sock_addr_is_valid_access(int off, int size, if (!bpf_ctx_narrow_access_ok(off, size, size_default)) return false; } else { + if (bpf_ctx_wide_store_ok(off, size, + struct bpf_sock_addr, + user_ip6)) + return true; + + if (bpf_ctx_wide_store_ok(off, size, + struct bpf_sock_addr, + msg_src_ip6)) + return true; + if (size != size_default) return false; } @@ -7629,9 +7639,6 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, /* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation. * - * It doesn't support SIZE argument though since narrow stores are not - * supported for now. - * * In addition it uses Temporary Field TF (member of struct S) as the 3rd * "register" since two registers available in convert_ctx_access are not * enough: we can't override neither SRC, since it contains value to store, nor @@ -7639,7 +7646,7 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, * instructions. But we need a temporary place to save pointer to nested * structure whose field we want to store to. */ -#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, TF) \ +#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, OFF, TF) \ do { \ int tmp_reg = BPF_REG_9; \ if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \ @@ -7650,8 +7657,7 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, offsetof(S, TF)); \ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \ si->dst_reg, offsetof(S, F)); \ - *insn++ = BPF_STX_MEM( \ - BPF_FIELD_SIZEOF(NS, NF), tmp_reg, si->src_reg, \ + *insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg, \ bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \ target_size) \ + OFF); \ @@ -7663,8 +7669,8 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, TF) \ do { \ if (type == BPF_WRITE) { \ - SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, \ - TF); \ + SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, \ + OFF, TF); \ } else { \ SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( \ S, NS, F, NF, SIZE, OFF); \ -- Gitee From 15830ae0603ee7822d39e433d51b66774484a6ce Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 1 Jul 2019 10:38:40 -0700 Subject: [PATCH 08/27] bpf: sync bpf.h to tools/ ANBZ: #5530 commit 4cfacbe6df972db4081dd76feb7871fce996118d upstream. Sync user_ip6 & msg_src_ip6 comments. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Signed-off-by: Yuanhe Shu --- tools/include/uapi/linux/bpf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index dfb5f1561ab4..c9cbb46837ba 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3216,7 +3216,7 @@ struct bpf_sock_addr { __u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write. * Stored in network byte order. */ - __u32 user_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write. + __u32 user_ip6[4]; /* Allows 1,2,4-byte read and 4,8-byte write. * Stored in network byte order. */ __u32 user_port; /* Allows 4-byte read and write. @@ -3225,10 +3225,10 @@ struct bpf_sock_addr { __u32 family; /* Allows 4-byte read, but no write */ __u32 type; /* Allows 4-byte read, but no write */ __u32 protocol; /* Allows 4-byte read, but no write */ - __u32 msg_src_ip4; /* Allows 1,2,4-byte read an 4-byte write. + __u32 msg_src_ip4; /* Allows 1,2,4-byte read and 4-byte write. * Stored in network byte order. */ - __u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write. + __u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read and 4,8-byte write. * Stored in network byte order. */ __bpf_md_ptr(struct bpf_sock *, sk); -- Gitee From 3a0ad72f8282e91b63c1892236c110794f0d2903 Mon Sep 17 00:00:00 2001 From: Florian Lehner Date: Fri, 4 Dec 2020 19:18:27 +0100 Subject: [PATCH 09/27] selftests/bpf: Print reason when a tester could not run a program ANBZ: #5530 commit 7d17167244f5415bc6bc90f5bb0074b6d79676b4 upstream. Commit 8184d44c9a57 ("selftests/bpf: skip verifier tests for unsupported program types") added a check to skip unsupported program types. As bpf_probe_prog_type can change errno, do_single_test should save it before printing a reason why a supported BPF program type failed to load. Fixes: 8184d44c9a57 ("selftests/bpf: skip verifier tests for unsupported program types") Signed-off-by: Florian Lehner Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201204181828.11974-2-dev@der-flo.net Signed-off-by: Yuanhe Shu --- tools/testing/selftests/bpf/test_verifier.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 2b4b16c3f2ee..2072b96cd385 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -17888,6 +17888,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, int run_errs, run_successes; int map_fds[MAX_NR_MAPS]; const char *expected_err; + int saved_errno; int fixup_skips; __u32 pflags; int i, err; @@ -17918,6 +17919,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, pflags |= BPF_F_ANY_ALIGNMENT; fd_prog = bpf_verify_program(prog_type, prog, prog_len, pflags, "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 4); + saved_errno = errno; if (fd_prog < 0 && !bpf_probe_prog_type(prog_type, 0)) { printf("SKIP (unsupported program type %d)\n", prog_type); skips++; @@ -17934,7 +17936,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, if (expected_ret == ACCEPT) { if (fd_prog < 0) { printf("FAIL\nFailed to load prog '%s'!\n", - strerror(errno)); + strerror(saved_errno)); goto fail_log; } #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS -- Gitee From 19cf818dc381651cd1afb870055ae4f31fbebd3d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 1 Jul 2019 16:58:55 -0700 Subject: [PATCH 10/27] libbpf: make libbpf_strerror_r agnostic to sign of error ANBZ: #5530 commit d66f43666a6811bcc3e529d5a088e1bdde94ca1b upstream. It's often inconvenient to switch sign of error when passing it into libbpf_strerror_r. It's better for it to handle that automatically. Signed-off-by: Andrii Nakryiko Reviewed-by: Stanislav Fomichev Acked-by: Song Liu Signed-off-by: Daniel Borkmann Signed-off-by: Yuanhe Shu --- tools/lib/bpf/str_error.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c index 00e48ac5b806..b8064eedc177 100644 --- a/tools/lib/bpf/str_error.c +++ b/tools/lib/bpf/str_error.c @@ -11,7 +11,7 @@ */ char *libbpf_strerror_r(int err, char *dst, int len) { - int ret = strerror_r(err, dst, len); + int ret = strerror_r(err < 0 ? -err : err, dst, len); if (ret) snprintf(dst, len, "ERROR: strerror_r(%d)=%d", err, ret); return dst; -- Gitee From f91b639c25d9758555dec48bdc50455a034e8d70 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 1 Jul 2019 16:58:56 -0700 Subject: [PATCH 11/27] libbpf: introduce concept of bpf_link ANBZ: #5530 commit 1c2e9efc26f389effc86e9b4d41f85d101f17629 upstream. bpf_link is an abstraction of an association of a BPF program and one of many possible BPF attachment points (hooks). This allows to have uniform interface for detaching BPF programs regardless of the nature of link and how it was created. Details of creation and setting up of a specific bpf_link is handled by corresponding attachment methods (bpf_program__attach_xxx) added in subsequent commits. Once successfully created, bpf_link has to be eventually destroyed with bpf_link__destroy(), at which point BPF program is disassociated from a hook and all the relevant resources are freed. Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Reviewed-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Signed-off-by: Yuanhe Shu --- tools/lib/bpf/libbpf.c | 17 +++++++++++++++++ tools/lib/bpf/libbpf.h | 4 ++++ tools/lib/bpf/libbpf.map | 3 ++- 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index bb70dca30462..865eedca4f67 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -3988,6 +3988,23 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, return 0; } +struct bpf_link { + int (*destroy)(struct bpf_link *link); +}; + +int bpf_link__destroy(struct bpf_link *link) +{ + int err; + + if (!link) + return 0; + + err = link->destroy(link); + free(link); + + return err; +} + enum bpf_perf_event_ret bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, void **copy_mem, size_t *copy_size, diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index d639f47e3110..5082a5ebb0c2 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -165,6 +165,10 @@ LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path); LIBBPF_API int bpf_program__unpin(struct bpf_program *prog, const char *path); LIBBPF_API void bpf_program__unload(struct bpf_program *prog); +struct bpf_link; + +LIBBPF_API int bpf_link__destroy(struct bpf_link *link); + struct bpf_insn; /* diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 2c6d835620d2..3cde850fc8da 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -167,10 +167,11 @@ LIBBPF_0.0.3 { LIBBPF_0.0.4 { global: + bpf_link__destroy; + bpf_object__load_xattr; btf_dump__dump_type; btf_dump__free; btf_dump__new; btf__parse_elf; - bpf_object__load_xattr; libbpf_num_possible_cpus; } LIBBPF_0.0.3; -- Gitee From 3a2e7ffaabab851da93fdfce59b7a74d0548cceb Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 1 Jul 2019 16:58:57 -0700 Subject: [PATCH 12/27] libbpf: add ability to attach/detach BPF program to perf event ANBZ: #5530 commit 63f2f5ee856ba9db751123bc5519be099a3849a7 upstream. bpf_program__attach_perf_event allows to attach BPF program to existing perf event hook, providing most generic and most low-level way to attach BPF programs. It returns struct bpf_link, which should be passed to bpf_link__destroy to detach and free resources, associated with a link. Signed-off-by: Andrii Nakryiko Reviewed-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Signed-off-by: Yuanhe Shu --- tools/lib/bpf/libbpf.c | 63 ++++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/libbpf.h | 3 ++ tools/lib/bpf/libbpf.map | 1 + 3 files changed, 67 insertions(+) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 865eedca4f67..9b5377d2e829 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -4005,6 +4006,68 @@ int bpf_link__destroy(struct bpf_link *link) return err; } +struct bpf_link_fd { + struct bpf_link link; /* has to be at the top of struct */ + int fd; /* hook FD */ +}; + +static int bpf_link__destroy_perf_event(struct bpf_link *link) +{ + struct bpf_link_fd *l = (void *)link; + int err; + + err = ioctl(l->fd, PERF_EVENT_IOC_DISABLE, 0); + if (err) + err = -errno; + + close(l->fd); + return err; +} + +struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, + int pfd) +{ + char errmsg[STRERR_BUFSIZE]; + struct bpf_link_fd *link; + int prog_fd, err; + + if (pfd < 0) { + pr_warning("program '%s': invalid perf event FD %d\n", + bpf_program__title(prog, false), pfd); + return ERR_PTR(-EINVAL); + } + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + pr_warning("program '%s': can't attach BPF program w/o FD (did you load it?)\n", + bpf_program__title(prog, false)); + return ERR_PTR(-EINVAL); + } + + link = malloc(sizeof(*link)); + if (!link) + return ERR_PTR(-ENOMEM); + link->link.destroy = &bpf_link__destroy_perf_event; + link->fd = pfd; + + if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { + err = -errno; + free(link); + pr_warning("program '%s': failed to attach to pfd %d: %s\n", + bpf_program__title(prog, false), pfd, + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + return ERR_PTR(err); + } + if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { + err = -errno; + free(link); + pr_warning("program '%s': failed to enable pfd %d: %s\n", + bpf_program__title(prog, false), pfd, + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + return ERR_PTR(err); + } + return (struct bpf_link *)link; +} + enum bpf_perf_event_ret bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, void **copy_mem, size_t *copy_size, diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 5082a5ebb0c2..1bf66c4a9330 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -169,6 +169,9 @@ struct bpf_link; LIBBPF_API int bpf_link__destroy(struct bpf_link *link); +LIBBPF_API struct bpf_link * +bpf_program__attach_perf_event(struct bpf_program *prog, int pfd); + struct bpf_insn; /* diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 3cde850fc8da..756f5aa802e9 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -169,6 +169,7 @@ LIBBPF_0.0.4 { global: bpf_link__destroy; bpf_object__load_xattr; + bpf_program__attach_perf_event; btf_dump__dump_type; btf_dump__free; btf_dump__new; -- Gitee From 49369ed86ce0f22115d4820f8abb0669677c97fe Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 1 Jul 2019 16:58:58 -0700 Subject: [PATCH 13/27] libbpf: add kprobe/uprobe attach API ANBZ: #5530 commit b26500274767bacbe571c75926fee2489f91ba28 upstream. Add ability to attach to kernel and user probes and retprobes. Implementation depends on perf event support for kprobes/uprobes. Signed-off-by: Andrii Nakryiko Reviewed-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Signed-off-by: Yuanhe Shu --- tools/lib/bpf/libbpf.c | 169 +++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/libbpf.h | 7 ++ tools/lib/bpf/libbpf.map | 2 + 3 files changed, 178 insertions(+) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 9b5377d2e829..a048542f33bd 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4068,6 +4068,175 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, return (struct bpf_link *)link; } +/* + * this function is expected to parse integer in the range of [0, 2^31-1] from + * given file using scanf format string fmt. If actual parsed value is + * negative, the result might be indistinguishable from error + */ +static int parse_uint_from_file(const char *file, const char *fmt) +{ + char buf[STRERR_BUFSIZE]; + int err, ret; + FILE *f; + + f = fopen(file, "r"); + if (!f) { + err = -errno; + pr_debug("failed to open '%s': %s\n", file, + libbpf_strerror_r(err, buf, sizeof(buf))); + return err; + } + err = fscanf(f, fmt, &ret); + if (err != 1) { + err = err == EOF ? -EIO : -errno; + pr_debug("failed to parse '%s': %s\n", file, + libbpf_strerror_r(err, buf, sizeof(buf))); + fclose(f); + return err; + } + fclose(f); + return ret; +} + +static int determine_kprobe_perf_type(void) +{ + const char *file = "/sys/bus/event_source/devices/kprobe/type"; + + return parse_uint_from_file(file, "%d\n"); +} + +static int determine_uprobe_perf_type(void) +{ + const char *file = "/sys/bus/event_source/devices/uprobe/type"; + + return parse_uint_from_file(file, "%d\n"); +} + +static int determine_kprobe_retprobe_bit(void) +{ + const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe"; + + return parse_uint_from_file(file, "config:%d\n"); +} + +static int determine_uprobe_retprobe_bit(void) +{ + const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe"; + + return parse_uint_from_file(file, "config:%d\n"); +} + +static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, + uint64_t offset, int pid) +{ + struct perf_event_attr attr = {}; + char errmsg[STRERR_BUFSIZE]; + int type, pfd, err; + + type = uprobe ? determine_uprobe_perf_type() + : determine_kprobe_perf_type(); + if (type < 0) { + pr_warning("failed to determine %s perf type: %s\n", + uprobe ? "uprobe" : "kprobe", + libbpf_strerror_r(type, errmsg, sizeof(errmsg))); + return type; + } + if (retprobe) { + int bit = uprobe ? determine_uprobe_retprobe_bit() + : determine_kprobe_retprobe_bit(); + + if (bit < 0) { + pr_warning("failed to determine %s retprobe bit: %s\n", + uprobe ? "uprobe" : "kprobe", + libbpf_strerror_r(bit, errmsg, + sizeof(errmsg))); + return bit; + } + attr.config |= 1 << bit; + } + attr.size = sizeof(attr); + attr.type = type; + attr.config1 = (uint64_t)(void *)name; /* kprobe_func or uprobe_path */ + attr.config2 = offset; /* kprobe_addr or probe_offset */ + + /* pid filter is meaningful only for uprobes */ + pfd = syscall(__NR_perf_event_open, &attr, + pid < 0 ? -1 : pid /* pid */, + pid == -1 ? 0 : -1 /* cpu */, + -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); + if (pfd < 0) { + err = -errno; + pr_warning("%s perf_event_open() failed: %s\n", + uprobe ? "uprobe" : "kprobe", + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + return err; + } + return pfd; +} + +struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, + bool retprobe, + const char *func_name) +{ + char errmsg[STRERR_BUFSIZE]; + struct bpf_link *link; + int pfd, err; + + pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name, + 0 /* offset */, -1 /* pid */); + if (pfd < 0) { + pr_warning("program '%s': failed to create %s '%s' perf event: %s\n", + bpf_program__title(prog, false), + retprobe ? "kretprobe" : "kprobe", func_name, + libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + return ERR_PTR(pfd); + } + link = bpf_program__attach_perf_event(prog, pfd); + if (IS_ERR(link)) { + close(pfd); + err = PTR_ERR(link); + pr_warning("program '%s': failed to attach to %s '%s': %s\n", + bpf_program__title(prog, false), + retprobe ? "kretprobe" : "kprobe", func_name, + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + return link; + } + return link; +} + +struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog, + bool retprobe, pid_t pid, + const char *binary_path, + size_t func_offset) +{ + char errmsg[STRERR_BUFSIZE]; + struct bpf_link *link; + int pfd, err; + + pfd = perf_event_open_probe(true /* uprobe */, retprobe, + binary_path, func_offset, pid); + if (pfd < 0) { + pr_warning("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n", + bpf_program__title(prog, false), + retprobe ? "uretprobe" : "uprobe", + binary_path, func_offset, + libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + return ERR_PTR(pfd); + } + link = bpf_program__attach_perf_event(prog, pfd); + if (IS_ERR(link)) { + close(pfd); + err = PTR_ERR(link); + pr_warning("program '%s': failed to attach to %s '%s:0x%zx': %s\n", + bpf_program__title(prog, false), + retprobe ? "uretprobe" : "uprobe", + binary_path, func_offset, + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + return link; + } + return link; +} + enum bpf_perf_event_ret bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, void **copy_mem, size_t *copy_size, diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 1bf66c4a9330..bd767cc11967 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -171,6 +171,13 @@ LIBBPF_API int bpf_link__destroy(struct bpf_link *link); LIBBPF_API struct bpf_link * bpf_program__attach_perf_event(struct bpf_program *prog, int pfd); +LIBBPF_API struct bpf_link * +bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe, + const char *func_name); +LIBBPF_API struct bpf_link * +bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe, + pid_t pid, const char *binary_path, + size_t func_offset); struct bpf_insn; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 756f5aa802e9..57a40fb60718 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -169,7 +169,9 @@ LIBBPF_0.0.4 { global: bpf_link__destroy; bpf_object__load_xattr; + bpf_program__attach_kprobe; bpf_program__attach_perf_event; + bpf_program__attach_uprobe; btf_dump__dump_type; btf_dump__free; btf_dump__new; -- Gitee From 3485982ba97e2ca54823138eaf83287602348426 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 1 Jul 2019 16:58:59 -0700 Subject: [PATCH 14/27] libbpf: add tracepoint attach API ANBZ: #5530 commit f6de59c17f111b3efc701c95b438e46245c4aeb1 upstream. Allow attaching BPF programs to kernel tracepoint BPF hooks specified by category and name. Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Reviewed-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Signed-off-by: Yuanhe Shu --- tools/lib/bpf/libbpf.c | 79 ++++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/libbpf.h | 4 ++ tools/lib/bpf/libbpf.map | 1 + 3 files changed, 84 insertions(+) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a048542f33bd..b417507c8c12 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4237,6 +4237,85 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog, return link; } +static int determine_tracepoint_id(const char *tp_category, + const char *tp_name) +{ + char file[PATH_MAX]; + int ret; + + ret = snprintf(file, sizeof(file), + "/sys/kernel/debug/tracing/events/%s/%s/id", + tp_category, tp_name); + if (ret < 0) + return -errno; + if (ret >= sizeof(file)) { + pr_debug("tracepoint %s/%s path is too long\n", + tp_category, tp_name); + return -E2BIG; + } + return parse_uint_from_file(file, "%d\n"); +} + +static int perf_event_open_tracepoint(const char *tp_category, + const char *tp_name) +{ + struct perf_event_attr attr = {}; + char errmsg[STRERR_BUFSIZE]; + int tp_id, pfd, err; + + tp_id = determine_tracepoint_id(tp_category, tp_name); + if (tp_id < 0) { + pr_warning("failed to determine tracepoint '%s/%s' perf event ID: %s\n", + tp_category, tp_name, + libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg))); + return tp_id; + } + + attr.type = PERF_TYPE_TRACEPOINT; + attr.size = sizeof(attr); + attr.config = tp_id; + + pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */, + -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); + if (pfd < 0) { + err = -errno; + pr_warning("tracepoint '%s/%s' perf_event_open() failed: %s\n", + tp_category, tp_name, + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + return err; + } + return pfd; +} + +struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog, + const char *tp_category, + const char *tp_name) +{ + char errmsg[STRERR_BUFSIZE]; + struct bpf_link *link; + int pfd, err; + + pfd = perf_event_open_tracepoint(tp_category, tp_name); + if (pfd < 0) { + pr_warning("program '%s': failed to create tracepoint '%s/%s' perf event: %s\n", + bpf_program__title(prog, false), + tp_category, tp_name, + libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + return ERR_PTR(pfd); + } + link = bpf_program__attach_perf_event(prog, pfd); + if (IS_ERR(link)) { + close(pfd); + err = PTR_ERR(link); + pr_warning("program '%s': failed to attach to tracepoint '%s/%s': %s\n", + bpf_program__title(prog, false), + tp_category, tp_name, + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + return link; + } + return link; +} + enum bpf_perf_event_ret bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, void **copy_mem, size_t *copy_size, diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index bd767cc11967..60611f4b4e1d 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -178,6 +178,10 @@ LIBBPF_API struct bpf_link * bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe, pid_t pid, const char *binary_path, size_t func_offset); +LIBBPF_API struct bpf_link * +bpf_program__attach_tracepoint(struct bpf_program *prog, + const char *tp_category, + const char *tp_name); struct bpf_insn; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 57a40fb60718..3c618b75ef65 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -171,6 +171,7 @@ LIBBPF_0.0.4 { bpf_object__load_xattr; bpf_program__attach_kprobe; bpf_program__attach_perf_event; + bpf_program__attach_tracepoint; bpf_program__attach_uprobe; btf_dump__dump_type; btf_dump__free; -- Gitee From 7df2cbe05eeb7e0895a495ac66ff05a94cbd4491 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 1 Jul 2019 16:59:00 -0700 Subject: [PATCH 15/27] libbpf: add raw tracepoint attach API ANBZ: #5530 commit 84bf5e1f4f174fc4f06bc747dafa1a7cfa4e9a67 upstream. Add a wrapper utilizing bpf_link "infrastructure" to allow attaching BPF programs to raw tracepoints. Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Reviewed-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Signed-off-by: Yuanhe Shu --- tools/lib/bpf/libbpf.c | 39 +++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/libbpf.h | 3 +++ tools/lib/bpf/libbpf.map | 1 + 3 files changed, 43 insertions(+) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index b417507c8c12..625326897321 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4316,6 +4316,45 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog, return link; } +static int bpf_link__destroy_fd(struct bpf_link *link) +{ + struct bpf_link_fd *l = (void *)link; + + return close(l->fd); +} + +struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, + const char *tp_name) +{ + char errmsg[STRERR_BUFSIZE]; + struct bpf_link_fd *link; + int prog_fd, pfd; + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + pr_warning("program '%s': can't attach before loaded\n", + bpf_program__title(prog, false)); + return ERR_PTR(-EINVAL); + } + + link = malloc(sizeof(*link)); + if (!link) + return ERR_PTR(-ENOMEM); + link->link.destroy = &bpf_link__destroy_fd; + + pfd = bpf_raw_tracepoint_open(tp_name, prog_fd); + if (pfd < 0) { + pfd = -errno; + free(link); + pr_warning("program '%s': failed to attach to raw tracepoint '%s': %s\n", + bpf_program__title(prog, false), tp_name, + libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + return ERR_PTR(pfd); + } + link->fd = pfd; + return (struct bpf_link *)link; +} + enum bpf_perf_event_ret bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, void **copy_mem, size_t *copy_size, diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 60611f4b4e1d..f55933784f95 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -182,6 +182,9 @@ LIBBPF_API struct bpf_link * bpf_program__attach_tracepoint(struct bpf_program *prog, const char *tp_category, const char *tp_name); +LIBBPF_API struct bpf_link * +bpf_program__attach_raw_tracepoint(struct bpf_program *prog, + const char *tp_name); struct bpf_insn; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 3c618b75ef65..e6b7d4edbc93 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -171,6 +171,7 @@ LIBBPF_0.0.4 { bpf_object__load_xattr; bpf_program__attach_kprobe; bpf_program__attach_perf_event; + bpf_program__attach_raw_tracepoint; bpf_program__attach_tracepoint; bpf_program__attach_uprobe; btf_dump__dump_type; -- Gitee From 8b9a9e2ee30814b9f9fb38e5aa6072074a3e2fe6 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 8 Jul 2019 21:00:07 -0700 Subject: [PATCH 16/27] libbpf: fix ptr to u64 conversion warning on 32-bit platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #5530 commit 36db2a94f19ae1fcf1e6d8253df0c32f90f92860 upstream. On 32-bit platforms compiler complains about conversion: libbpf.c: In function ‘perf_event_open_probe’: libbpf.c:4112:17: error: cast from pointer to integer of different size [-Werror=pointer-to-int-cast] attr.config1 = (uint64_t)(void *)name; /* kprobe_func or uprobe_path */ ^ Reported-by: Matt Hart Fixes: b26500274767 ("libbpf: add kprobe/uprobe attach API") Tested-by: Matt Hart Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann Signed-off-by: Yuanhe Shu --- tools/lib/bpf/libbpf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 625326897321..e9e08aeeca8d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4156,8 +4156,8 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, } attr.size = sizeof(attr); attr.type = type; - attr.config1 = (uint64_t)(void *)name; /* kprobe_func or uprobe_path */ - attr.config2 = offset; /* kprobe_addr or probe_offset */ + attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */ + attr.config2 = offset; /* kprobe_addr or probe_offset */ /* pid filter is meaningful only for uprobes */ pfd = syscall(__NR_perf_event_open, &attr, -- Gitee From 24a57b51ac7c88ffe2ef7aa26116c904af60922b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 2 Jul 2019 08:16:20 -0700 Subject: [PATCH 17/27] libbpf: fix GCC8 warning for strncpy ANBZ: #5530 commit cdfc7f888c2a355b01308e97c6df108f1c2b64e8 upstream. GCC8 started emitting warning about using strncpy with number of bytes exactly equal destination size, which is generally unsafe, as can lead to non-zero terminated string being copied. Use IFNAMSIZ - 1 as number of bytes to ensure name is always zero-terminated. Signed-off-by: Andrii Nakryiko Cc: Magnus Karlsson Acked-by: Yonghong Song Acked-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Signed-off-by: Yuanhe Shu --- tools/lib/bpf/xsk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 5f3cd83fe4cf..9dfaff57daea 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -327,7 +327,8 @@ static int xsk_get_max_queues(struct xsk_socket *xsk) channels.cmd = ETHTOOL_GCHANNELS; ifr.ifr_data = (void *)&channels; - strncpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ); + strncpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ - 1); + ifr.ifr_name[IFNAMSIZ - 1] = '\0'; err = ioctl(fd, SIOCETHTOOL, &ifr); if (err && errno != EOPNOTSUPP) { ret = -errno; -- Gitee From 488c18996a21e31d7d91214d470b55a50048cb81 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 2 Jul 2019 09:13:56 -0700 Subject: [PATCH 18/27] bpf: add BPF_CGROUP_SOCK_OPS callback that is executed on every RTT ANBZ: #5530 commit 23729ff23186424e54b4d6678fcd526cdacef4d3 upstream. Performance impact should be minimal because it's under a new BPF_SOCK_OPS_RTT_CB_FLAG flag that has to be explicitly enabled. Suggested-by: Eric Dumazet Cc: Eric Dumazet Cc: Priyaranjan Jha Cc: Yuchung Cheng Cc: Soheil Hassas Yeganeh Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Signed-off-by: Yuanhe Shu --- include/net/tcp.h | 8 ++++++++ include/uapi/linux/bpf.h | 6 +++++- net/ipv4/tcp_input.c | 4 ++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index dc723e93e349..0361aba77eae 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2208,6 +2208,14 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk) return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1); } +static inline void tcp_bpf_rtt(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTT_CB_FLAG)) + tcp_call_bpf(sk, BPF_SOCK_OPS_RTT_CB, 0, NULL); +} + #if IS_ENABLED(CONFIG_SMC) extern struct static_key_false tcp_have_smc; #endif diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 74537672710d..d1cb1b2e0146 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1770,6 +1770,7 @@ union bpf_attr { * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out) * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission) * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change) + * * **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT) * * Therefore, this function can be used to clear a callback flag by * setting the appropriate bit to zero. e.g. to disable the RTO @@ -3293,7 +3294,8 @@ struct bpf_sock_ops { #define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) #define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) #define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) -#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently +#define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3) +#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently * supported cb flags */ @@ -3348,6 +3350,8 @@ enum { BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after * socket transition to LISTEN state. */ + BPF_SOCK_OPS_RTT_CB, /* Called on every RTT. + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 20a91307d38b..4c139892d457 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -773,6 +773,8 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2; tp->rtt_seq = tp->snd_nxt; tp->mdev_max_us = tcp_rto_min_us(sk); + + tcp_bpf_rtt(sk); } } else { /* no previous measure. */ @@ -781,6 +783,8 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk)); tp->mdev_max_us = tp->rttvar_us; tp->rtt_seq = tp->snd_nxt; + + tcp_bpf_rtt(sk); } tp->srtt_us = max(1U, srtt); } -- Gitee From 35e094f5d5ac7ee5073cae6dd18fb5ab336f3707 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 2 Jul 2019 09:13:57 -0700 Subject: [PATCH 19/27] bpf: split shared bpf_tcp_sock and bpf_sock_ops implementation ANBZ: #5530 commit 2377b81de52750997726d6d43b4114e5842c4bf9 upstream. We've added bpf_tcp_sock member to bpf_sock_ops and don't expect any new tcp_sock fields in bpf_sock_ops. Let's remove CONVERT_COMMON_TCP_SOCK_FIELDS so bpf_tcp_sock can be independently extended. Cc: Eric Dumazet Cc: Priyaranjan Jha Cc: Yuchung Cheng Cc: Soheil Hassas Yeganeh Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Signed-off-by: Yuanhe Shu --- net/core/filter.c | 180 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 126 insertions(+), 54 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 9ac249893734..ab8a0ad58d21 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5122,54 +5122,6 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = { }; #endif /* CONFIG_IPV6_SEG6_BPF */ -#define CONVERT_COMMON_TCP_SOCK_FIELDS(md_type, CONVERT) \ -do { \ - switch (si->off) { \ - case offsetof(md_type, snd_cwnd): \ - CONVERT(snd_cwnd); break; \ - case offsetof(md_type, srtt_us): \ - CONVERT(srtt_us); break; \ - case offsetof(md_type, snd_ssthresh): \ - CONVERT(snd_ssthresh); break; \ - case offsetof(md_type, rcv_nxt): \ - CONVERT(rcv_nxt); break; \ - case offsetof(md_type, snd_nxt): \ - CONVERT(snd_nxt); break; \ - case offsetof(md_type, snd_una): \ - CONVERT(snd_una); break; \ - case offsetof(md_type, mss_cache): \ - CONVERT(mss_cache); break; \ - case offsetof(md_type, ecn_flags): \ - CONVERT(ecn_flags); break; \ - case offsetof(md_type, rate_delivered): \ - CONVERT(rate_delivered); break; \ - case offsetof(md_type, rate_interval_us): \ - CONVERT(rate_interval_us); break; \ - case offsetof(md_type, packets_out): \ - CONVERT(packets_out); break; \ - case offsetof(md_type, retrans_out): \ - CONVERT(retrans_out); break; \ - case offsetof(md_type, total_retrans): \ - CONVERT(total_retrans); break; \ - case offsetof(md_type, segs_in): \ - CONVERT(segs_in); break; \ - case offsetof(md_type, data_segs_in): \ - CONVERT(data_segs_in); break; \ - case offsetof(md_type, segs_out): \ - CONVERT(segs_out); break; \ - case offsetof(md_type, data_segs_out): \ - CONVERT(data_segs_out); break; \ - case offsetof(md_type, lost_out): \ - CONVERT(lost_out); break; \ - case offsetof(md_type, sacked_out): \ - CONVERT(sacked_out); break; \ - case offsetof(md_type, bytes_received): \ - CONVERT(bytes_received); break; \ - case offsetof(md_type, bytes_acked): \ - CONVERT(bytes_acked); break; \ - } \ -} while (0) - #ifdef CONFIG_INET static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, int dif, int sdif, u8 family, u8 proto) @@ -5572,9 +5524,6 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, offsetof(struct tcp_sock, FIELD)); \ } while (0) - CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_tcp_sock, - BPF_TCP_SOCK_GET_COMMON); - if (insn > insn_buf) return insn - insn_buf; @@ -5589,6 +5538,69 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, offsetof(struct tcp_sock, rtt_min) + offsetof(struct minmax_sample, v)); break; + case offsetof(struct bpf_tcp_sock, snd_cwnd): + BPF_TCP_SOCK_GET_COMMON(snd_cwnd); + break; + case offsetof(struct bpf_tcp_sock, srtt_us): + BPF_TCP_SOCK_GET_COMMON(srtt_us); + break; + case offsetof(struct bpf_tcp_sock, snd_ssthresh): + BPF_TCP_SOCK_GET_COMMON(snd_ssthresh); + break; + case offsetof(struct bpf_tcp_sock, rcv_nxt): + BPF_TCP_SOCK_GET_COMMON(rcv_nxt); + break; + case offsetof(struct bpf_tcp_sock, snd_nxt): + BPF_TCP_SOCK_GET_COMMON(snd_nxt); + break; + case offsetof(struct bpf_tcp_sock, snd_una): + BPF_TCP_SOCK_GET_COMMON(snd_una); + break; + case offsetof(struct bpf_tcp_sock, mss_cache): + BPF_TCP_SOCK_GET_COMMON(mss_cache); + break; + case offsetof(struct bpf_tcp_sock, ecn_flags): + BPF_TCP_SOCK_GET_COMMON(ecn_flags); + break; + case offsetof(struct bpf_tcp_sock, rate_delivered): + BPF_TCP_SOCK_GET_COMMON(rate_delivered); + break; + case offsetof(struct bpf_tcp_sock, rate_interval_us): + BPF_TCP_SOCK_GET_COMMON(rate_interval_us); + break; + case offsetof(struct bpf_tcp_sock, packets_out): + BPF_TCP_SOCK_GET_COMMON(packets_out); + break; + case offsetof(struct bpf_tcp_sock, retrans_out): + BPF_TCP_SOCK_GET_COMMON(retrans_out); + break; + case offsetof(struct bpf_tcp_sock, total_retrans): + BPF_TCP_SOCK_GET_COMMON(total_retrans); + break; + case offsetof(struct bpf_tcp_sock, segs_in): + BPF_TCP_SOCK_GET_COMMON(segs_in); + break; + case offsetof(struct bpf_tcp_sock, data_segs_in): + BPF_TCP_SOCK_GET_COMMON(data_segs_in); + break; + case offsetof(struct bpf_tcp_sock, segs_out): + BPF_TCP_SOCK_GET_COMMON(segs_out); + break; + case offsetof(struct bpf_tcp_sock, data_segs_out): + BPF_TCP_SOCK_GET_COMMON(data_segs_out); + break; + case offsetof(struct bpf_tcp_sock, lost_out): + BPF_TCP_SOCK_GET_COMMON(lost_out); + break; + case offsetof(struct bpf_tcp_sock, sacked_out): + BPF_TCP_SOCK_GET_COMMON(sacked_out); + break; + case offsetof(struct bpf_tcp_sock, bytes_received): + BPF_TCP_SOCK_GET_COMMON(bytes_received); + break; + case offsetof(struct bpf_tcp_sock, bytes_acked): + BPF_TCP_SOCK_GET_COMMON(bytes_acked); + break; } return insn - insn_buf; @@ -7916,9 +7928,6 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \ } while (0) - CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_sock_ops, - SOCK_OPS_GET_TCP_SOCK_FIELD); - if (insn > insn_buf) return insn - insn_buf; @@ -8088,6 +8097,69 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash, struct sock, type); break; + case offsetof(struct bpf_sock_ops, snd_cwnd): + SOCK_OPS_GET_TCP_SOCK_FIELD(snd_cwnd); + break; + case offsetof(struct bpf_sock_ops, srtt_us): + SOCK_OPS_GET_TCP_SOCK_FIELD(srtt_us); + break; + case offsetof(struct bpf_sock_ops, snd_ssthresh): + SOCK_OPS_GET_TCP_SOCK_FIELD(snd_ssthresh); + break; + case offsetof(struct bpf_sock_ops, rcv_nxt): + SOCK_OPS_GET_TCP_SOCK_FIELD(rcv_nxt); + break; + case offsetof(struct bpf_sock_ops, snd_nxt): + SOCK_OPS_GET_TCP_SOCK_FIELD(snd_nxt); + break; + case offsetof(struct bpf_sock_ops, snd_una): + SOCK_OPS_GET_TCP_SOCK_FIELD(snd_una); + break; + case offsetof(struct bpf_sock_ops, mss_cache): + SOCK_OPS_GET_TCP_SOCK_FIELD(mss_cache); + break; + case offsetof(struct bpf_sock_ops, ecn_flags): + SOCK_OPS_GET_TCP_SOCK_FIELD(ecn_flags); + break; + case offsetof(struct bpf_sock_ops, rate_delivered): + SOCK_OPS_GET_TCP_SOCK_FIELD(rate_delivered); + break; + case offsetof(struct bpf_sock_ops, rate_interval_us): + SOCK_OPS_GET_TCP_SOCK_FIELD(rate_interval_us); + break; + case offsetof(struct bpf_sock_ops, packets_out): + SOCK_OPS_GET_TCP_SOCK_FIELD(packets_out); + break; + case offsetof(struct bpf_sock_ops, retrans_out): + SOCK_OPS_GET_TCP_SOCK_FIELD(retrans_out); + break; + case offsetof(struct bpf_sock_ops, total_retrans): + SOCK_OPS_GET_TCP_SOCK_FIELD(total_retrans); + break; + case offsetof(struct bpf_sock_ops, segs_in): + SOCK_OPS_GET_TCP_SOCK_FIELD(segs_in); + break; + case offsetof(struct bpf_sock_ops, data_segs_in): + SOCK_OPS_GET_TCP_SOCK_FIELD(data_segs_in); + break; + case offsetof(struct bpf_sock_ops, segs_out): + SOCK_OPS_GET_TCP_SOCK_FIELD(segs_out); + break; + case offsetof(struct bpf_sock_ops, data_segs_out): + SOCK_OPS_GET_TCP_SOCK_FIELD(data_segs_out); + break; + case offsetof(struct bpf_sock_ops, lost_out): + SOCK_OPS_GET_TCP_SOCK_FIELD(lost_out); + break; + case offsetof(struct bpf_sock_ops, sacked_out): + SOCK_OPS_GET_TCP_SOCK_FIELD(sacked_out); + break; + case offsetof(struct bpf_sock_ops, bytes_received): + SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_received); + break; + case offsetof(struct bpf_sock_ops, bytes_acked): + SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_acked); + break; case offsetof(struct bpf_sock_ops, sk): SOCK_OPS_GET_SK(); break; -- Gitee From 1fd2d50717f8c140638fcd207edf3d936e4909c8 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 2 Jul 2019 09:13:58 -0700 Subject: [PATCH 20/27] bpf: add dsack_dups/delivered{, _ce} to bpf_tcp_sock ANBZ: #5530 commit 0357746d1e40a8226f68a42c8d7222a12d7c451f upstream. Add more fields to bpf_tcp_sock that might be useful for debugging congestion control issues. Cc: Eric Dumazet Cc: Priyaranjan Jha Cc: Yuchung Cheng Cc: Soheil Hassas Yeganeh Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Signed-off-by: Yuanhe Shu --- include/uapi/linux/bpf.h | 5 +++++ net/core/filter.c | 11 ++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d1cb1b2e0146..03a07d80f157 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3051,6 +3051,11 @@ struct bpf_tcp_sock { * sum(delta(snd_una)), or how many bytes * were acked. */ + __u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups + * total number of DSACK blocks received + */ + __u32 delivered; /* Total data packets delivered incl. rexmits */ + __u32 delivered_ce; /* Like the above but only ECE marked packets */ }; struct bpf_sock_tuple { diff --git a/net/core/filter.c b/net/core/filter.c index ab8a0ad58d21..74b4ffd2d31c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5493,7 +5493,7 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = { bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { - if (off < 0 || off >= offsetofend(struct bpf_tcp_sock, bytes_acked)) + if (off < 0 || off >= offsetofend(struct bpf_tcp_sock, delivered_ce)) return false; if (off % size != 0) @@ -5601,6 +5601,15 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, case offsetof(struct bpf_tcp_sock, bytes_acked): BPF_TCP_SOCK_GET_COMMON(bytes_acked); break; + case offsetof(struct bpf_tcp_sock, dsack_dups): + BPF_TCP_SOCK_GET_COMMON(dsack_dups); + break; + case offsetof(struct bpf_tcp_sock, delivered): + BPF_TCP_SOCK_GET_COMMON(delivered); + break; + case offsetof(struct bpf_tcp_sock, delivered_ce): + BPF_TCP_SOCK_GET_COMMON(delivered_ce); + break; } return insn - insn_buf; -- Gitee From ae24e4a13d5935d747b80c17e08c8d80e649cde1 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 2 Jul 2019 09:13:59 -0700 Subject: [PATCH 21/27] bpf: add icsk_retransmits to bpf_tcp_sock ANBZ: #5530 commit c2cb5e82a720c05b707701c75dfeb356fe184787 upstream. Add some inet_connection_sock fields to bpf_tcp_sock that might be useful for debugging congestion control issues. Cc: Eric Dumazet Cc: Priyaranjan Jha Cc: Yuchung Cheng Cc: Soheil Hassas Yeganeh Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Signed-off-by: Yuanhe Shu --- include/uapi/linux/bpf.h | 1 + net/core/filter.c | 20 +++++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 03a07d80f157..ac787c729129 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3056,6 +3056,7 @@ struct bpf_tcp_sock { */ __u32 delivered; /* Total data packets delivered incl. rexmits */ __u32 delivered_ce; /* Like the above but only ECE marked packets */ + __u32 icsk_retransmits; /* Number of unrecovered [RTO] timeouts */ }; struct bpf_sock_tuple { diff --git a/net/core/filter.c b/net/core/filter.c index 74b4ffd2d31c..289fca1671d0 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5493,7 +5493,8 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = { bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { - if (off < 0 || off >= offsetofend(struct bpf_tcp_sock, delivered_ce)) + if (off < 0 || off >= offsetofend(struct bpf_tcp_sock, + icsk_retransmits)) return false; if (off % size != 0) @@ -5524,6 +5525,20 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, offsetof(struct tcp_sock, FIELD)); \ } while (0) +#define BPF_INET_SOCK_GET_COMMON(FIELD) \ + do { \ + BUILD_BUG_ON(FIELD_SIZEOF(struct inet_connection_sock, \ + FIELD) > \ + FIELD_SIZEOF(struct bpf_tcp_sock, FIELD)); \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ + struct inet_connection_sock, \ + FIELD), \ + si->dst_reg, si->src_reg, \ + offsetof( \ + struct inet_connection_sock, \ + FIELD)); \ + } while (0) + if (insn > insn_buf) return insn - insn_buf; @@ -5610,6 +5625,9 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, case offsetof(struct bpf_tcp_sock, delivered_ce): BPF_TCP_SOCK_GET_COMMON(delivered_ce); break; + case offsetof(struct bpf_tcp_sock, icsk_retransmits): + BPF_INET_SOCK_GET_COMMON(icsk_retransmits); + break; } return insn - insn_buf; -- Gitee From 96420f87088a2ceb2fb80677fdac5c97c8d6e32c Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 2 Jul 2019 09:14:00 -0700 Subject: [PATCH 22/27] bpf/tools: sync bpf.h ANBZ: #5530 commit 692cbaa99fe446ed30bfb7e53fb2b4cc518331a1 upstream. Sync new bpf_tcp_sock fields and new BPF_PROG_TYPE_SOCK_OPS RTT callback. Cc: Eric Dumazet Cc: Priyaranjan Jha Cc: Yuchung Cheng Cc: Soheil Hassas Yeganeh Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Signed-off-by: Yuanhe Shu --- tools/include/uapi/linux/bpf.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index c9cbb46837ba..46eed9ee7cca 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1767,6 +1767,7 @@ union bpf_attr { * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out) * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission) * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change) + * * **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT) * * Therefore, this function can be used to clear a callback flag by * setting the appropriate bit to zero. e.g. to disable the RTO @@ -3047,6 +3048,12 @@ struct bpf_tcp_sock { * sum(delta(snd_una)), or how many bytes * were acked. */ + __u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups + * total number of DSACK blocks received + */ + __u32 delivered; /* Total data packets delivered incl. rexmits */ + __u32 delivered_ce; /* Like the above but only ECE marked packets */ + __u32 icsk_retransmits; /* Number of unrecovered [RTO] timeouts */ }; struct bpf_sock_tuple { @@ -3290,7 +3297,8 @@ struct bpf_sock_ops { #define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) #define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) #define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) -#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently +#define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3) +#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently * supported cb flags */ @@ -3345,6 +3353,8 @@ enum { BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after * socket transition to LISTEN state. */ + BPF_SOCK_OPS_RTT_CB, /* Called on every RTT. + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect -- Gitee From fadebbf3ab8500e3fadd44b671c21156b4586572 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 8 Jul 2019 14:57:21 +0200 Subject: [PATCH 23/27] bpf: avoid unused variable warning in tcp_bpf_rtt() ANBZ: #5530 commit bef8e2639242e7f7214f1ab3b37ace1415a4f750 upstream. When CONFIG_BPF is disabled, we get a warning for an unused variable: In file included from drivers/target/target_core_device.c:26: include/net/tcp.h:2226:19: error: unused variable 'tp' [-Werror,-Wunused-variable] struct tcp_sock *tp = tcp_sk(sk); The variable is only used in one place, so it can be replaced with its value there to avoid the warning. Fixes: 23729ff23186 ("bpf: add BPF_CGROUP_SOCK_OPS callback that is executed on every RTT") Signed-off-by: Arnd Bergmann Acked-by: Soheil Hassas Yeganeh Signed-off-by: Daniel Borkmann Signed-off-by: Yuanhe Shu --- include/net/tcp.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 0361aba77eae..5e5cfa25f195 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2210,9 +2210,7 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk) static inline void tcp_bpf_rtt(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); - - if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTT_CB_FLAG)) + if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_RTT_CB_FLAG)) tcp_call_bpf(sk, BPF_SOCK_OPS_RTT_CB, 0, NULL); } -- Gitee From ce5cff6593505eda97ce2976fcb6b53c90655d4f Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 5 Jul 2019 08:50:09 -0700 Subject: [PATCH 24/27] libbpf: capture value in BTF type info for BTF-defined map defs ANBZ: #5530 commit ef99b02b23ef4308057573f76750e661b5aea3e1 upstream. Change BTF-defined map definitions to capture compile-time integer values as part of BTF type definition, to avoid split of key/value type information and actual type/size/flags initialization for maps. Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann Signed-off-by: Yuanhe Shu --- tools/lib/bpf/libbpf.c | 58 ++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e9e08aeeca8d..3f21172d6d34 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1032,40 +1032,40 @@ static const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, } } -static bool get_map_field_int(const char *map_name, - const struct btf *btf, +/* + * Fetch integer attribute of BTF map definition. Such attributes are + * represented using a pointer to an array, in which dimensionality of array + * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY]; + * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF + * type definition, while using only sizeof(void *) space in ELF data section. + */ +static bool get_map_field_int(const char *map_name, const struct btf *btf, const struct btf_type *def, - const struct btf_member *m, - const void *data, __u32 *res) { + const struct btf_member *m, __u32 *res) { const struct btf_type *t = skip_mods_and_typedefs(btf, m->type); const char *name = btf__name_by_offset(btf, m->name_off); - __u32 int_info = *(const __u32 *)(const void *)(t + 1); + const struct btf_array *arr_info; + const struct btf_type *arr_t; - if (BTF_INFO_KIND(t->info) != BTF_KIND_INT) { - pr_warning("map '%s': attr '%s': expected INT, got %u.\n", + if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) { + pr_warning("map '%s': attr '%s': expected PTR, got %u.\n", map_name, name, BTF_INFO_KIND(t->info)); return false; } - if (t->size != 4 || BTF_INT_BITS(int_info) != 32 || - BTF_INT_OFFSET(int_info)) { - pr_warning("map '%s': attr '%s': expected 32-bit non-bitfield integer, " - "got %u-byte (%d-bit) one with bit offset %d.\n", - map_name, name, t->size, BTF_INT_BITS(int_info), - BTF_INT_OFFSET(int_info)); - return false; - } - if (BTF_INFO_KFLAG(def->info) && BTF_MEMBER_BITFIELD_SIZE(m->offset)) { - pr_warning("map '%s': attr '%s': bitfield is not supported.\n", - map_name, name); + + arr_t = btf__type_by_id(btf, t->type); + if (!arr_t) { + pr_warning("map '%s': attr '%s': type [%u] not found.\n", + map_name, name, t->type); return false; } - if (m->offset % 32) { - pr_warning("map '%s': attr '%s': unaligned fields are not supported.\n", - map_name, name); + if (BTF_INFO_KIND(arr_t->info) != BTF_KIND_ARRAY) { + pr_warning("map '%s': attr '%s': expected ARRAY, got %u.\n", + map_name, name, BTF_INFO_KIND(arr_t->info)); return false; } - - *res = *(const __u32 *)(data + m->offset / 8); + arr_info = (const void *)(arr_t + 1); + *res = arr_info->nelems; return true; } @@ -1078,7 +1078,6 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, const struct btf_var_secinfo *vi; const struct btf_var *var_extra; const struct btf_member *m; - const void *def_data; const char *map_name; struct bpf_map *map; int vlen, i; @@ -1135,7 +1134,6 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, pr_debug("map '%s': at sec_idx %d, offset %zu.\n", map_name, map->sec_idx, map->sec_offset); - def_data = data->d_buf + vi->offset; vlen = BTF_INFO_VLEN(def->info); m = (const void *)(def + 1); for (i = 0; i < vlen; i++, m++) { @@ -1148,19 +1146,19 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, } if (strcmp(name, "type") == 0) { if (!get_map_field_int(map_name, obj->btf, def, m, - def_data, &map->def.type)) + &map->def.type)) return -EINVAL; pr_debug("map '%s': found type = %u.\n", map_name, map->def.type); } else if (strcmp(name, "max_entries") == 0) { if (!get_map_field_int(map_name, obj->btf, def, m, - def_data, &map->def.max_entries)) + &map->def.max_entries)) return -EINVAL; pr_debug("map '%s': found max_entries = %u.\n", map_name, map->def.max_entries); } else if (strcmp(name, "map_flags") == 0) { if (!get_map_field_int(map_name, obj->btf, def, m, - def_data, &map->def.map_flags)) + &map->def.map_flags)) return -EINVAL; pr_debug("map '%s': found map_flags = %u.\n", map_name, map->def.map_flags); @@ -1168,7 +1166,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, __u32 sz; if (!get_map_field_int(map_name, obj->btf, def, m, - def_data, &sz)) + &sz)) return -EINVAL; pr_debug("map '%s': found key_size = %u.\n", map_name, sz); @@ -1211,7 +1209,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, __u32 sz; if (!get_map_field_int(map_name, obj->btf, def, m, - def_data, &sz)) + &sz)) return -EINVAL; pr_debug("map '%s': found value_size = %u.\n", map_name, sz); -- Gitee From 50aeef70750d0d449a3608bf389304785286d25d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 5 Jul 2019 08:50:10 -0700 Subject: [PATCH 25/27] selftests/bpf: add __uint and __type macro for BTF-defined maps ANBZ: #5530 commit 00acd00814527a1856c92beda17475205bddaba6 upstream. Add simple __uint and __type macro that hide details of how type and integer values are captured in BTF-defined maps. Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann Signed-off-by: Yuanhe Shu --- tools/testing/selftests/bpf/bpf_helpers.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 4f1caf1cf973..9eb803211fa2 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -8,6 +8,9 @@ */ #define SEC(NAME) __attribute__((section(NAME), used)) +#define __uint(name, val) int (*name)[val] +#define __type(name, val) val *name + /* helper functions called from eBPF programs written in C */ static void *(*bpf_map_lookup_elem)(void *map, const void *key) = (void *) BPF_FUNC_map_lookup_elem; -- Gitee From 64b8a7667104531273a57bcc7bd00888bd1a461e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 5 Jul 2019 08:50:11 -0700 Subject: [PATCH 26/27] selftests/bpf: convert selftests using BTF-defined maps to new syntax ANBZ: #5530 commit bc7430cc8bfb51577e466a8ca02ad87375a70bde upstream. Convert all the existing selftests that are already using BTF-defined maps to use new syntax (with no static data initialization). [backport note] Does not change the type of map socket_cookies as 69d96519dbf0(" selftests/bpf: convert socket_cookie test to sk storage") has not been introduced. Add missing test_sockopt, test_sockopt_multi and test_sockopt_sk to .gitignore Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann Signed-off-by: Yuanhe Shu --- tools/testing/selftests/bpf/.gitignore | 3 + tools/testing/selftests/bpf/progs/bpf_flow.c | 28 +++---- .../testing/selftests/bpf/progs/netcnt_prog.c | 20 ++--- .../selftests/bpf/progs/socket_cookie_prog.c | 14 ++-- .../selftests/bpf/progs/test_btf_newkv.c | 13 ++- .../bpf/progs/test_get_stack_rawtp.c | 39 ++++----- .../selftests/bpf/progs/test_global_data.c | 37 ++++----- tools/testing/selftests/bpf/progs/test_l4lb.c | 65 ++++++--------- .../selftests/bpf/progs/test_l4lb_noinline.c | 65 ++++++--------- .../selftests/bpf/progs/test_map_lock.c | 26 +++--- .../bpf/progs/test_select_reuseport_kern.c | 67 ++++++--------- .../bpf/progs/test_send_signal_kern.c | 26 +++--- .../bpf/progs/test_sock_fields_kern.c | 78 +++++++----------- .../selftests/bpf/progs/test_spin_lock.c | 36 ++++----- .../bpf/progs/test_stacktrace_build_id.c | 55 +++++-------- .../selftests/bpf/progs/test_stacktrace_map.c | 52 +++++------- .../selftests/bpf/progs/test_tcp_estats.c | 13 ++- .../selftests/bpf/progs/test_tcpbpf_kern.c | 12 +-- .../selftests/bpf/progs/test_tcpnotify_kern.c | 26 +++--- tools/testing/selftests/bpf/progs/test_xdp.c | 26 +++--- .../selftests/bpf/progs/test_xdp_noinline.c | 81 +++++++------------ 21 files changed, 299 insertions(+), 483 deletions(-) diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index e401075d0957..a29c249397d8 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -16,6 +16,9 @@ test_libbpf_open test_sock test_sock_addr test_sock_fields +test_sockopt +test_sockopt_multi +test_sockopt_sk urandom_read test_btf test_sockmap diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c index ff663ff45d62..12a50a707149 100644 --- a/tools/testing/selftests/bpf/progs/bpf_flow.c +++ b/tools/testing/selftests/bpf/progs/bpf_flow.c @@ -58,26 +58,18 @@ struct frag_hdr { }; struct { - __u32 type; - __u32 max_entries; - __u32 key_size; - __u32 value_size; -} jmp_table SEC(".maps") = { - .type = BPF_MAP_TYPE_PROG_ARRAY, - .max_entries = 8, - .key_size = sizeof(__u32), - .value_size = sizeof(__u32), -}; + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 8); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; - struct bpf_flow_keys *value; -} last_dissection SEC(".maps") = { - .type = BPF_MAP_TYPE_HASH, - .max_entries = 1024, -}; + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1024); + __type(key, __u32); + __type(value, struct bpf_flow_keys); +} last_dissection SEC(".maps"); static __always_inline int export_flow_keys(struct bpf_flow_keys *keys, int ret) diff --git a/tools/testing/selftests/bpf/progs/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c index a25c82a5b7c8..38a997852cad 100644 --- a/tools/testing/selftests/bpf/progs/netcnt_prog.c +++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c @@ -11,20 +11,16 @@ #define NS_PER_SEC 1000000000 struct { - __u32 type; - struct bpf_cgroup_storage_key *key; - struct percpu_net_cnt *value; -} percpu_netcnt SEC(".maps") = { - .type = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, -}; + __uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); + __type(key, struct bpf_cgroup_storage_key); + __type(value, struct percpu_net_cnt); +} percpu_netcnt SEC(".maps"); struct { - __u32 type; - struct bpf_cgroup_storage_key *key; - struct net_cnt *value; -} netcnt SEC(".maps") = { - .type = BPF_MAP_TYPE_CGROUP_STORAGE, -}; + __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); + __type(key, struct bpf_cgroup_storage_key); + __type(value, struct net_cnt); +} netcnt SEC(".maps"); SEC("cgroup/skb") int bpf_nextcnt(struct __sk_buff *skb) diff --git a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c index 086b3972de7c..3d21d9497a14 100644 --- a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c +++ b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c @@ -8,15 +8,11 @@ #include "bpf_endian.h" struct { - __u32 type; - __u32 map_flags; - int *key; - struct socket_cookie *value; - __u32 max_entries; -} socket_cookies SEC(".maps") = { - .type = BPF_MAP_TYPE_HASH, - .max_entries = 1 << 8, -}; + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, (1 << 8)); + __uint(key_size, sizeof(__u64)); + __uint(value_size, sizeof(__u32)); +} socket_cookies SEC(".maps"); SEC("cgroup/connect6") int set_cookie(struct bpf_sock_addr *ctx) diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c index 28c16bb583b6..5ee3622ddebb 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c @@ -21,14 +21,11 @@ struct bpf_map_def SEC("maps") btf_map_legacy = { BPF_ANNOTATE_KV_PAIR(btf_map_legacy, int, struct ipv_counts); struct { - int *key; - struct ipv_counts *value; - unsigned int type; - unsigned int max_entries; -} btf_map SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = 4, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 4); + __type(key, int); + __type(value, struct ipv_counts); +} btf_map SEC(".maps"); struct dummy_tracepoint_args { unsigned long long pad; diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c index aaabf33f5fac..cf1b8a1c7f6d 100644 --- a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c +++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c @@ -16,26 +16,18 @@ struct stack_trace_t { }; struct { - __u32 type; - __u32 max_entries; - __u32 key_size; - __u32 value_size; -} perfmap SEC(".maps") = { - .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, - .max_entries = 2, - .key_size = sizeof(int), - .value_size = sizeof(__u32), -}; + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(max_entries, 2); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(__u32)); +} perfmap SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; - struct stack_trace_t *value; -} stackdata_map SEC(".maps") = { - .type = BPF_MAP_TYPE_PERCPU_ARRAY, - .max_entries = 1, -}; + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct stack_trace_t); +} stackdata_map SEC(".maps"); /* Allocate per-cpu space twice the needed. For the code below * usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK); @@ -56,14 +48,11 @@ struct { * This is an acceptable workaround since there is one entry here. */ struct { - __u32 type; - __u32 max_entries; - __u32 *key; + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); __u64 (*value)[2 * MAX_STACK_RAWTP]; -} rawdata_map SEC(".maps") = { - .type = BPF_MAP_TYPE_PERCPU_ARRAY, - .max_entries = 1, -}; +} rawdata_map SEC(".maps"); SEC("tracepoint/raw_syscalls/sys_enter") int bpf_prog1(void *ctx) diff --git a/tools/testing/selftests/bpf/progs/test_global_data.c b/tools/testing/selftests/bpf/progs/test_global_data.c index 866cc7ddbe43..32a6073acb99 100644 --- a/tools/testing/selftests/bpf/progs/test_global_data.c +++ b/tools/testing/selftests/bpf/progs/test_global_data.c @@ -8,24 +8,18 @@ #include "bpf_helpers.h" struct { - __u32 type; - __u32 max_entries; - __u32 *key; - __u64 *value; -} result_number SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = 11, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 11); + __type(key, __u32); + __type(value, __u64); +} result_number SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 5); + __type(key, __u32); const char (*value)[32]; -} result_string SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = 5, -}; +} result_string SEC(".maps"); struct foo { __u8 a; @@ -34,14 +28,11 @@ struct foo { }; struct { - __u32 type; - __u32 max_entries; - __u32 *key; - struct foo *value; -} result_struct SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = 5, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 5); + __type(key, __u32); + __type(value, struct foo); +} result_struct SEC(".maps"); /* Relocation tests for __u64s. */ static __u64 num0; diff --git a/tools/testing/selftests/bpf/progs/test_l4lb.c b/tools/testing/selftests/bpf/progs/test_l4lb.c index 848cbb90f581..1d652ee8e73d 100644 --- a/tools/testing/selftests/bpf/progs/test_l4lb.c +++ b/tools/testing/selftests/bpf/progs/test_l4lb.c @@ -170,54 +170,39 @@ struct eth_hdr { }; struct { - __u32 type; - __u32 max_entries; - struct vip *key; - struct vip_meta *value; -} vip_map SEC(".maps") = { - .type = BPF_MAP_TYPE_HASH, - .max_entries = MAX_VIPS, -}; + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, MAX_VIPS); + __type(key, struct vip); + __type(value, struct vip_meta); +} vip_map SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; - __u32 *value; -} ch_rings SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = CH_RINGS_SIZE, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, CH_RINGS_SIZE); + __type(key, __u32); + __type(value, __u32); +} ch_rings SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; - struct real_definition *value; -} reals SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = MAX_REALS, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, MAX_REALS); + __type(key, __u32); + __type(value, struct real_definition); +} reals SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; - struct vip_stats *value; -} stats SEC(".maps") = { - .type = BPF_MAP_TYPE_PERCPU_ARRAY, - .max_entries = MAX_VIPS, -}; + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, MAX_VIPS); + __type(key, __u32); + __type(value, struct vip_stats); +} stats SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; - struct ctl_value *value; -} ctl_array SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = CTL_MAP_SIZE, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, CTL_MAP_SIZE); + __type(key, __u32); + __type(value, struct ctl_value); +} ctl_array SEC(".maps"); static __always_inline __u32 get_packet_hash(struct packet_description *pckt, bool ipv6) diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c index c63ecf3ca573..2e4efe70b1e5 100644 --- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c @@ -166,54 +166,39 @@ struct eth_hdr { }; struct { - __u32 type; - __u32 max_entries; - struct vip *key; - struct vip_meta *value; -} vip_map SEC(".maps") = { - .type = BPF_MAP_TYPE_HASH, - .max_entries = MAX_VIPS, -}; + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, MAX_VIPS); + __type(key, struct vip); + __type(value, struct vip_meta); +} vip_map SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; - __u32 *value; -} ch_rings SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = CH_RINGS_SIZE, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, CH_RINGS_SIZE); + __type(key, __u32); + __type(value, __u32); +} ch_rings SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; - struct real_definition *value; -} reals SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = MAX_REALS, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, MAX_REALS); + __type(key, __u32); + __type(value, struct real_definition); +} reals SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; - struct vip_stats *value; -} stats SEC(".maps") = { - .type = BPF_MAP_TYPE_PERCPU_ARRAY, - .max_entries = MAX_VIPS, -}; + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, MAX_VIPS); + __type(key, __u32); + __type(value, struct vip_stats); +} stats SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; - struct ctl_value *value; -} ctl_array SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = CTL_MAP_SIZE, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, CTL_MAP_SIZE); + __type(key, __u32); + __type(value, struct ctl_value); +} ctl_array SEC(".maps"); static __u32 get_packet_hash(struct packet_description *pckt, bool ipv6) diff --git a/tools/testing/selftests/bpf/progs/test_map_lock.c b/tools/testing/selftests/bpf/progs/test_map_lock.c index 40d9c2853393..bb7ce35f691b 100644 --- a/tools/testing/selftests/bpf/progs/test_map_lock.c +++ b/tools/testing/selftests/bpf/progs/test_map_lock.c @@ -12,14 +12,11 @@ struct hmap_elem { }; struct { - __u32 type; - __u32 max_entries; - __u32 *key; - struct hmap_elem *value; -} hash_map SEC(".maps") = { - .type = BPF_MAP_TYPE_HASH, - .max_entries = 1, -}; + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct hmap_elem); +} hash_map SEC(".maps"); struct array_elem { struct bpf_spin_lock lock; @@ -27,14 +24,11 @@ struct array_elem { }; struct { - __u32 type; - __u32 max_entries; - int *key; - struct array_elem *value; -} array_map SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = 1, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct array_elem); +} array_map SEC(".maps"); SEC("map_lock_demo") int bpf_map_lock_test(struct __sk_buff *skb) diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c index 435a9527733e..ea7d84f01235 100644 --- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c +++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c @@ -22,56 +22,39 @@ int _version SEC("version") = 1; #endif struct { - __u32 type; - __u32 max_entries; - __u32 key_size; - __u32 value_size; -} outer_map SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY_OF_MAPS, - .max_entries = 1, - .key_size = sizeof(__u32), - .value_size = sizeof(__u32), -}; + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} outer_map SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; - __u32 *value; -} result_map SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = NR_RESULTS, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, NR_RESULTS); + __type(key, __u32); + __type(value, __u32); +} result_map SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; - int *value; -} tmp_index_ovr_map SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = 1, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, int); +} tmp_index_ovr_map SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; - __u32 *value; -} linum_map SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = 1, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} linum_map SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; - struct data_check *value; -} data_check_map SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = 1, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct data_check); +} data_check_map SEC(".maps"); #define GOTO_DONE(_result) ({ \ result = (_result); \ diff --git a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c index 6ac68be5d68b..0e6be01157e6 100644 --- a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c +++ b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c @@ -5,24 +5,18 @@ #include "bpf_helpers.h" struct { - __u32 type; - __u32 max_entries; - __u32 *key; - __u64 *value; -} info_map SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = 1, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} info_map SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; - __u64 *value; -} status_map SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = 1, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} status_map SEC(".maps"); SEC("send_signal_demo") int bpf_send_signal_test(void *ctx) diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c index c3d383d650cb..a47b003623ef 100644 --- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c @@ -28,44 +28,32 @@ enum bpf_linum_array_idx { }; struct { - __u32 type; - __u32 max_entries; - __u32 *key; - struct sockaddr_in6 *value; -} addr_map SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = __NR_BPF_ADDR_ARRAY_IDX, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, __NR_BPF_ADDR_ARRAY_IDX); + __type(key, __u32); + __type(value, struct sockaddr_in6); +} addr_map SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; - struct bpf_sock *value; -} sock_result_map SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = __NR_BPF_RESULT_ARRAY_IDX, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX); + __type(key, __u32); + __type(value, struct bpf_sock); +} sock_result_map SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; - struct bpf_tcp_sock *value; -} tcp_sock_result_map SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = __NR_BPF_RESULT_ARRAY_IDX, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX); + __type(key, __u32); + __type(value, struct bpf_tcp_sock); +} tcp_sock_result_map SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; - __u32 *value; -} linum_map SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = __NR_BPF_LINUM_ARRAY_IDX, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, __NR_BPF_LINUM_ARRAY_IDX); + __type(key, __u32); + __type(value, __u32); +} linum_map SEC(".maps"); struct bpf_spinlock_cnt { struct bpf_spin_lock lock; @@ -73,24 +61,18 @@ struct bpf_spinlock_cnt { }; struct { - __u32 type; - __u32 map_flags; - int *key; - struct bpf_spinlock_cnt *value; -} sk_pkt_out_cnt SEC(".maps") = { - .type = BPF_MAP_TYPE_SK_STORAGE, - .map_flags = BPF_F_NO_PREALLOC, -}; + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct bpf_spinlock_cnt); +} sk_pkt_out_cnt SEC(".maps"); struct { - __u32 type; - __u32 map_flags; - int *key; - struct bpf_spinlock_cnt *value; -} sk_pkt_out_cnt10 SEC(".maps") = { - .type = BPF_MAP_TYPE_SK_STORAGE, - .map_flags = BPF_F_NO_PREALLOC, -}; + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct bpf_spinlock_cnt); +} sk_pkt_out_cnt10 SEC(".maps"); static bool is_loopback6(__u32 *a6) { diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c index 0a77ae36d981..a43b999c8da2 100644 --- a/tools/testing/selftests/bpf/progs/test_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c @@ -11,14 +11,11 @@ struct hmap_elem { }; struct { - __u32 type; - __u32 max_entries; - int *key; - struct hmap_elem *value; -} hmap SEC(".maps") = { - .type = BPF_MAP_TYPE_HASH, - .max_entries = 1, -}; + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct hmap_elem); +} hmap SEC(".maps"); struct cls_elem { struct bpf_spin_lock lock; @@ -26,12 +23,10 @@ struct cls_elem { }; struct { - __u32 type; - struct bpf_cgroup_storage_key *key; - struct cls_elem *value; -} cls_map SEC(".maps") = { - .type = BPF_MAP_TYPE_CGROUP_STORAGE, -}; + __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); + __type(key, struct bpf_cgroup_storage_key); + __type(value, struct cls_elem); +} cls_map SEC(".maps"); struct bpf_vqueue { struct bpf_spin_lock lock; @@ -42,14 +37,11 @@ struct bpf_vqueue { }; struct { - __u32 type; - __u32 max_entries; - int *key; - struct bpf_vqueue *value; -} vqueue SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = 1, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct bpf_vqueue); +} vqueue SEC(".maps"); #define CREDIT_PER_NS(delta, rate) (((delta) * rate) >> 20) diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c index fcf2280bb60c..bbfc8337b6f0 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c @@ -9,51 +9,36 @@ #endif struct { - __u32 type; - __u32 max_entries; - __u32 *key; - __u32 *value; -} control_map SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = 1, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} control_map SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; - __u32 *value; -} stackid_hmap SEC(".maps") = { - .type = BPF_MAP_TYPE_HASH, - .max_entries = 16384, -}; + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 16384); + __type(key, __u32); + __type(value, __u32); +} stackid_hmap SEC(".maps"); typedef struct bpf_stack_build_id stack_trace_t[PERF_MAX_STACK_DEPTH]; struct { - __u32 type; - __u32 max_entries; - __u32 map_flags; - __u32 key_size; - __u32 value_size; -} stackmap SEC(".maps") = { - .type = BPF_MAP_TYPE_STACK_TRACE, - .max_entries = 128, - .map_flags = BPF_F_STACK_BUILD_ID, - .key_size = sizeof(__u32), - .value_size = sizeof(stack_trace_t), -}; + __uint(type, BPF_MAP_TYPE_STACK_TRACE); + __uint(max_entries, 128); + __uint(map_flags, BPF_F_STACK_BUILD_ID); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(stack_trace_t)); +} stackmap SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 128); + __type(key, __u32); /* there seems to be a bug in kernel not handling typedef properly */ struct bpf_stack_build_id (*value)[PERF_MAX_STACK_DEPTH]; -} stack_amap SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = 128, -}; +} stack_amap SEC(".maps"); /* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */ struct random_urandom_args { diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c index 7ad09adbf648..803c15dc109d 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c @@ -9,48 +9,34 @@ #endif struct { - __u32 type; - __u32 max_entries; - __u32 *key; - __u32 *value; -} control_map SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = 1, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} control_map SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; - __u32 *value; -} stackid_hmap SEC(".maps") = { - .type = BPF_MAP_TYPE_HASH, - .max_entries = 16384, -}; + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 16384); + __type(key, __u32); + __type(value, __u32); +} stackid_hmap SEC(".maps"); typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH]; struct { - __u32 type; - __u32 max_entries; - __u32 key_size; - __u32 value_size; -} stackmap SEC(".maps") = { - .type = BPF_MAP_TYPE_STACK_TRACE, - .max_entries = 16384, - .key_size = sizeof(__u32), - .value_size = sizeof(stack_trace_t), -}; + __uint(type, BPF_MAP_TYPE_STACK_TRACE); + __uint(max_entries, 16384); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(stack_trace_t)); +} stackmap SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 16384); + __type(key, __u32); __u64 (*value)[PERF_MAX_STACK_DEPTH]; -} stack_amap SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = 16384, -}; +} stack_amap SEC(".maps"); /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ struct sched_switch_args { diff --git a/tools/testing/selftests/bpf/progs/test_tcp_estats.c b/tools/testing/selftests/bpf/progs/test_tcp_estats.c index df98f7e32832..c8c595da38d4 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_estats.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_estats.c @@ -149,14 +149,11 @@ struct tcp_estats_basic_event { }; struct { - __u32 type; - __u32 max_entries; - __u32 *key; - struct tcp_estats_basic_event *value; -} ev_record_map SEC(".maps") = { - .type = BPF_MAP_TYPE_HASH, - .max_entries = 1024, -}; + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1024); + __type(key, __u32); + __type(value, struct tcp_estats_basic_event); +} ev_record_map SEC(".maps"); struct dummy_tracepoint_args { unsigned long long pad; diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c index 236f7649bd9c..06033c0725e0 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c @@ -13,12 +13,12 @@ #include "bpf_endian.h" #include "test_tcpbpf.h" -struct bpf_map_def SEC("maps") global_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(__u32), - .value_size = sizeof(struct tcpbpf_globals), - .max_entries = 2, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 4); + __type(key, __u32); + __type(value, struct tcpbpf_globals); +} global_map SEC(".maps"); static inline void update_event_map(int event) { diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c index fc7f0c0c36b2..360264e399cc 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c @@ -15,24 +15,18 @@ #include "test_tcpnotify.h" struct { - __u32 type; - __u32 max_entries; - __u32 *key; - struct tcpnotify_globals *value; -} global_map SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = 4, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 4); + __type(key, __u32); + __type(value, struct tcpnotify_globals); +} global_map SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 key_size; - __u32 value_size; -} perf_event_map SEC(".maps") = { - .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, - .max_entries = 2, -}; + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(max_entries, 2); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(__u32)); +} perf_event_map SEC(".maps"); int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_xdp.c b/tools/testing/selftests/bpf/progs/test_xdp.c index ec3d2c1c8cf9..0941c655b07b 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp.c +++ b/tools/testing/selftests/bpf/progs/test_xdp.c @@ -23,24 +23,18 @@ int _version SEC("version") = 1; struct { - __u32 type; - __u32 max_entries; - __u32 *key; - __u64 *value; -} rxcnt SEC(".maps") = { - .type = BPF_MAP_TYPE_PERCPU_ARRAY, - .max_entries = 256, -}; + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 256); + __type(key, __u32); + __type(value, __u64); +} rxcnt SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - struct vip *key; - struct iptnl_info *value; -} vip2tnl SEC(".maps") = { - .type = BPF_MAP_TYPE_HASH, - .max_entries = MAX_IPTNL_ENTRIES, -}; + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, MAX_IPTNL_ENTRIES); + __type(key, struct vip); + __type(value, struct iptnl_info); +} vip2tnl SEC(".maps"); static __always_inline void count_tx(__u32 protocol) { diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c index cd4ae85f050f..b5068aa1f900 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c @@ -171,66 +171,47 @@ struct lb_stats { }; struct { - __u32 type; - __u32 max_entries; - struct vip_definition *key; - struct vip_meta *value; -} vip_map SEC(".maps") = { - .type = BPF_MAP_TYPE_HASH, - .max_entries = 512, -}; + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 512); + __type(key, struct vip_definition); + __type(value, struct vip_meta); +} vip_map SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 map_flags; - struct flow_key *key; - struct real_pos_lru *value; -} lru_cache SEC(".maps") = { - .type = BPF_MAP_TYPE_LRU_HASH, - .max_entries = 300, - .map_flags = 1U << 1, -}; + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(max_entries, 300); + __uint(map_flags, 1U << 1); + __type(key, struct flow_key); + __type(value, struct real_pos_lru); +} lru_cache SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; - __u32 *value; -} ch_rings SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = 12 * 655, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 12 * 655); + __type(key, __u32); + __type(value, __u32); +} ch_rings SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; - struct real_definition *value; -} reals SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = 40, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 40); + __type(key, __u32); + __type(value, struct real_definition); +} reals SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; - struct lb_stats *value; -} stats SEC(".maps") = { - .type = BPF_MAP_TYPE_PERCPU_ARRAY, - .max_entries = 515, -}; + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 515); + __type(key, __u32); + __type(value, struct lb_stats); +} stats SEC(".maps"); struct { - __u32 type; - __u32 max_entries; - __u32 *key; - struct ctl_value *value; -} ctl_array SEC(".maps") = { - .type = BPF_MAP_TYPE_ARRAY, - .max_entries = 16, -}; + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 16); + __type(key, __u32); + __type(value, struct ctl_value); +} ctl_array SEC(".maps"); struct eth_hdr { unsigned char eth_dest[6]; -- Gitee From c0c2f020433d93ca7be4273c2a86709aec7192c4 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 5 Jul 2019 08:50:12 -0700 Subject: [PATCH 27/27] selftests/bpf: convert legacy BPF maps to BTF-defined ones ANBZ: #5530 commit 1639b17c72fa7ad977ccd0ad6c673e3f7048723b upstream. Convert selftests that were originally left out and new ones added recently to consistently use BTF-defined maps. Reported-by: kernel test robot Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann Signed-off-by: Yuanhe Shu --- .../selftests/bpf/progs/get_cgroup_id_kern.c | 26 ++--- tools/testing/selftests/bpf/progs/pyperf.h | 90 +++++++------- .../bpf/progs/sockmap_verdict_prog.c | 48 ++++---- .../testing/selftests/bpf/progs/strobemeta.h | 68 +++++------ .../selftests/bpf/progs/test_map_in_map.c | 30 ++--- .../testing/selftests/bpf/progs/test_obj_id.c | 12 +- .../selftests/bpf/progs/test_xdp_loop.c | 26 ++--- .../selftests/bpf/test_queue_stack_map.h | 30 ++--- .../testing/selftests/bpf/test_sockmap_kern.h | 110 +++++++++--------- 9 files changed, 216 insertions(+), 224 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c index 014dba10b8a5..16c54ade6888 100644 --- a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c +++ b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c @@ -4,19 +4,19 @@ #include #include "bpf_helpers.h" -struct bpf_map_def SEC("maps") cg_ids = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(__u32), - .value_size = sizeof(__u64), - .max_entries = 1, -}; - -struct bpf_map_def SEC("maps") pidmap = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(__u32), - .value_size = sizeof(__u32), - .max_entries = 1, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} cg_ids SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} pidmap SEC(".maps"); SEC("tracepoint/syscalls/sys_enter_nanosleep") int trace(void *ctx) diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h index 6b0781391be5..694239de339b 100644 --- a/tools/testing/selftests/bpf/progs/pyperf.h +++ b/tools/testing/selftests/bpf/progs/pyperf.h @@ -58,14 +58,6 @@ typedef struct { } Event; -struct bpf_elf_map { - __u32 type; - __u32 size_key; - __u32 size_value; - __u32 max_elem; - __u32 flags; -}; - typedef int pid_t; typedef struct { @@ -119,47 +111,47 @@ get_frame_data(void* frame_ptr, PidData* pidData, FrameData* frame, Symbol* symb return true; } -struct bpf_elf_map SEC("maps") pidmap = { - .type = BPF_MAP_TYPE_HASH, - .size_key = sizeof(int), - .size_value = sizeof(PidData), - .max_elem = 1, -}; - -struct bpf_elf_map SEC("maps") eventmap = { - .type = BPF_MAP_TYPE_HASH, - .size_key = sizeof(int), - .size_value = sizeof(Event), - .max_elem = 1, -}; - -struct bpf_elf_map SEC("maps") symbolmap = { - .type = BPF_MAP_TYPE_HASH, - .size_key = sizeof(Symbol), - .size_value = sizeof(int), - .max_elem = 1, -}; - -struct bpf_elf_map SEC("maps") statsmap = { - .type = BPF_MAP_TYPE_ARRAY, - .size_key = sizeof(Stats), - .size_value = sizeof(int), - .max_elem = 1, -}; - -struct bpf_elf_map SEC("maps") perfmap = { - .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, - .size_key = sizeof(int), - .size_value = sizeof(int), - .max_elem = 32, -}; - -struct bpf_elf_map SEC("maps") stackmap = { - .type = BPF_MAP_TYPE_STACK_TRACE, - .size_key = sizeof(int), - .size_value = sizeof(long long) * 127, - .max_elem = 1000, -}; +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, int); + __type(value, PidData); +} pidmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, int); + __type(value, Event); +} eventmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, Symbol); + __type(value, int); +} symbolmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, Stats); +} statsmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(max_entries, 32); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} perfmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_STACK_TRACE); + __uint(max_entries, 1000); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(long long) * 127); +} stackmap SEC(".maps"); static inline __attribute__((__always_inline__)) int __on_event(struct pt_regs *ctx) { diff --git a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c index 49c9a6cf6825..2c9f883a3c9e 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c @@ -11,33 +11,33 @@ int _version SEC("version") = 1; ##__VA_ARGS__); \ }) -struct bpf_map_def SEC("maps") sock_map_rx = { - .type = BPF_MAP_TYPE_SOCKMAP, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 20, -}; +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 20); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} sock_map_rx SEC(".maps"); -struct bpf_map_def SEC("maps") sock_map_tx = { - .type = BPF_MAP_TYPE_SOCKMAP, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 20, -}; +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 20); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} sock_map_tx SEC(".maps"); -struct bpf_map_def SEC("maps") sock_map_msg = { - .type = BPF_MAP_TYPE_SOCKMAP, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 20, -}; +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 20); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} sock_map_msg SEC(".maps"); -struct bpf_map_def SEC("maps") sock_map_break = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 20, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 20); + __type(key, int); + __type(value, int); +} sock_map_break SEC(".maps"); SEC("sk_skb2") int bpf_prog2(struct __sk_buff *skb) diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index 82ee72bc53db..83e15a981cc2 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -204,40 +204,40 @@ struct strobelight_bpf_sample { char dummy_safeguard; }; -struct bpf_map_def SEC("maps") samples = { - .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 32, -}; - -struct bpf_map_def SEC("maps") stacks_0 = { - .type = BPF_MAP_TYPE_STACK_TRACE, - .key_size = sizeof(uint32_t), - .value_size = sizeof(uint64_t) * PERF_MAX_STACK_DEPTH, - .max_entries = 16, -}; - -struct bpf_map_def SEC("maps") stacks_1 = { - .type = BPF_MAP_TYPE_STACK_TRACE, - .key_size = sizeof(uint32_t), - .value_size = sizeof(uint64_t) * PERF_MAX_STACK_DEPTH, - .max_entries = 16, -}; - -struct bpf_map_def SEC("maps") sample_heap = { - .type = BPF_MAP_TYPE_PERCPU_ARRAY, - .key_size = sizeof(uint32_t), - .value_size = sizeof(struct strobelight_bpf_sample), - .max_entries = 1, -}; - -struct bpf_map_def SEC("maps") strobemeta_cfgs = { - .type = BPF_MAP_TYPE_PERCPU_ARRAY, - .key_size = sizeof(pid_t), - .value_size = sizeof(struct strobemeta_cfg), - .max_entries = STROBE_MAX_CFGS, -}; +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(max_entries, 32); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} samples SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_STACK_TRACE); + __uint(max_entries, 16); + __uint(key_size, sizeof(uint32_t)); + __uint(value_size, sizeof(uint64_t) * PERF_MAX_STACK_DEPTH); +} stacks_0 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_STACK_TRACE); + __uint(max_entries, 16); + __uint(key_size, sizeof(uint32_t)); + __uint(value_size, sizeof(uint64_t) * PERF_MAX_STACK_DEPTH); +} stacks_1 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, uint32_t); + __type(value, struct strobelight_bpf_sample); +} sample_heap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, STROBE_MAX_CFGS); + __type(key, pid_t); + __type(value, struct strobemeta_cfg); +} strobemeta_cfgs SEC(".maps"); /* Type for the dtv. */ /* https://github.com/lattera/glibc/blob/master/nptl/sysdeps/x86_64/tls.h#L34 */ diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map.c b/tools/testing/selftests/bpf/progs/test_map_in_map.c index ce923e67e08e..338a0656ec93 100644 --- a/tools/testing/selftests/bpf/progs/test_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/test_map_in_map.c @@ -5,23 +5,23 @@ #include #include "bpf_helpers.h" -struct bpf_map_def SEC("maps") mim_array = { - .type = BPF_MAP_TYPE_ARRAY_OF_MAPS, - .key_size = sizeof(int), +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1); + __uint(map_flags, 0); + __uint(key_size, sizeof(__u32)); /* must be sizeof(__u32) for map in map */ - .value_size = sizeof(__u32), - .max_entries = 1, - .map_flags = 0, -}; - -struct bpf_map_def SEC("maps") mim_hash = { - .type = BPF_MAP_TYPE_HASH_OF_MAPS, - .key_size = sizeof(int), + __uint(value_size, sizeof(__u32)); +} mim_array SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __uint(max_entries, 1); + __uint(map_flags, 0); + __uint(key_size, sizeof(int)); /* must be sizeof(__u32) for map in map */ - .value_size = sizeof(__u32), - .max_entries = 1, - .map_flags = 0, -}; + __uint(value_size, sizeof(__u32)); +} mim_hash SEC(".maps"); SEC("xdp_mimtest") int xdp_mimtest0(struct xdp_md *ctx) diff --git a/tools/testing/selftests/bpf/progs/test_obj_id.c b/tools/testing/selftests/bpf/progs/test_obj_id.c index 880d2963b472..c071d8815f48 100644 --- a/tools/testing/selftests/bpf/progs/test_obj_id.c +++ b/tools/testing/selftests/bpf/progs/test_obj_id.c @@ -16,12 +16,12 @@ int _version SEC("version") = 1; -struct bpf_map_def SEC("maps") test_map_id = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(__u32), - .value_size = sizeof(__u64), - .max_entries = 1, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} test_map_id SEC(".maps"); SEC("test_obj_id_dummy") int test_obj_id(struct __sk_buff *skb) diff --git a/tools/testing/selftests/bpf/progs/test_xdp_loop.c b/tools/testing/selftests/bpf/progs/test_xdp_loop.c index 7fa4677df22e..97175f73c3fe 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_loop.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_loop.c @@ -18,19 +18,19 @@ int _version SEC("version") = 1; -struct bpf_map_def SEC("maps") rxcnt = { - .type = BPF_MAP_TYPE_PERCPU_ARRAY, - .key_size = sizeof(__u32), - .value_size = sizeof(__u64), - .max_entries = 256, -}; - -struct bpf_map_def SEC("maps") vip2tnl = { - .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(struct vip), - .value_size = sizeof(struct iptnl_info), - .max_entries = MAX_IPTNL_ENTRIES, -}; +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 256); + __type(key, __u32); + __type(value, __u64); +} rxcnt SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, MAX_IPTNL_ENTRIES); + __type(key, struct vip); + __type(value, struct iptnl_info); +} vip2tnl SEC(".maps"); static __always_inline void count_tx(__u32 protocol) { diff --git a/tools/testing/selftests/bpf/test_queue_stack_map.h b/tools/testing/selftests/bpf/test_queue_stack_map.h index 295b9b3bc5c7..0e014d3b2b36 100644 --- a/tools/testing/selftests/bpf/test_queue_stack_map.h +++ b/tools/testing/selftests/bpf/test_queue_stack_map.h @@ -10,21 +10,21 @@ int _version SEC("version") = 1; -struct bpf_map_def __attribute__ ((section("maps"), used)) map_in = { - .type = MAP_TYPE, - .key_size = 0, - .value_size = sizeof(__u32), - .max_entries = 32, - .map_flags = 0, -}; - -struct bpf_map_def __attribute__ ((section("maps"), used)) map_out = { - .type = MAP_TYPE, - .key_size = 0, - .value_size = sizeof(__u32), - .max_entries = 32, - .map_flags = 0, -}; +struct { + __uint(type, MAP_TYPE); + __uint(max_entries, 32); + __uint(map_flags, 0); + __uint(key_size, 0); + __uint(value_size, sizeof(__u32)); +} map_in SEC(".maps"); + +struct { + __uint(type, MAP_TYPE); + __uint(max_entries, 32); + __uint(map_flags, 0); + __uint(key_size, 0); + __uint(value_size, sizeof(__u32)); +} map_out SEC(".maps"); SEC("test") int _test(struct __sk_buff *skb) diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h index e7e7fbee442d..26f2b42868cb 100644 --- a/tools/testing/selftests/bpf/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/test_sockmap_kern.h @@ -35,61 +35,61 @@ ##__VA_ARGS__); \ }) -struct bpf_map_def SEC("maps") sock_map = { - .type = TEST_MAP_TYPE, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 20, -}; - -struct bpf_map_def SEC("maps") sock_map_txmsg = { - .type = TEST_MAP_TYPE, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 20, -}; - -struct bpf_map_def SEC("maps") sock_map_redir = { - .type = TEST_MAP_TYPE, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 20, -}; - -struct bpf_map_def SEC("maps") sock_apply_bytes = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 1 -}; - -struct bpf_map_def SEC("maps") sock_cork_bytes = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 1 -}; - -struct bpf_map_def SEC("maps") sock_bytes = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 6 -}; - -struct bpf_map_def SEC("maps") sock_redir_flags = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 1 -}; - -struct bpf_map_def SEC("maps") sock_skb_opts = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 1 -}; +struct { + __uint(type, TEST_MAP_TYPE); + __uint(max_entries, 20); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} sock_map SEC(".maps"); + +struct { + __uint(type, TEST_MAP_TYPE); + __uint(max_entries, 20); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} sock_map_txmsg SEC(".maps"); + +struct { + __uint(type, TEST_MAP_TYPE); + __uint(max_entries, 20); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} sock_map_redir SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); +} sock_apply_bytes SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); +} sock_cork_bytes SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 6); + __type(key, int); + __type(value, int); +} sock_bytes SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); +} sock_redir_flags SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); +} sock_skb_opts SEC(".maps"); SEC("sk_skb1") int bpf_prog1(struct __sk_buff *skb) -- Gitee