diff --git a/Documentation/ABI/testing/sysfs-kernel-btf b/Documentation/ABI/testing/sysfs-kernel-btf new file mode 100644 index 0000000000000000000000000000000000000000..2c9744b2cd59fa5c7a054651efd7bb1edc960363 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-btf @@ -0,0 +1,17 @@ +What: /sys/kernel/btf +Date: Aug 2019 +KernelVersion: 5.5 +Contact: bpf@vger.kernel.org +Description: + Contains BTF type information and related data for kernel and + kernel modules. + +What: /sys/kernel/btf/vmlinux +Date: Aug 2019 +KernelVersion: 5.5 +Contact: bpf@vger.kernel.org +Description: + Read-only binary attribute exposing kernel's own BTF type + information with description of all internal kernel types. See + Documentation/bpf/btf.rst for detailed description of format + itself. diff --git a/Makefile b/Makefile index 676554e53e580ada503b2537e98b6153126880e9..ff8cbfb98ef8a7eb9d224bad2208c8c3e482326c 100644 --- a/Makefile +++ b/Makefile @@ -377,6 +377,7 @@ NM = $(CROSS_COMPILE)nm STRIP = $(CROSS_COMPILE)strip OBJCOPY = $(CROSS_COMPILE)objcopy OBJDUMP = $(CROSS_COMPILE)objdump +PAHOLE = pahole LEX = flex YACC = bison AWK = awk @@ -433,7 +434,7 @@ GCC_PLUGINS_CFLAGS := CLANG_FLAGS := export ARCH SRCARCH CONFIG_SHELL HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE AS LD CC -export CPP AR NM STRIP OBJCOPY OBJDUMP KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS +export CPP AR NM STRIP OBJCOPY OBJDUMP PAHOLE KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS export MAKE LEX YACC AWK GENKSYMS INSTALLKERNEL PERL PYTHON PYTHON2 PYTHON3 UTS_MACHINE export HOSTCXX KBUILD_HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS diff --git a/arch/arm64/configs/anolis-debug_defconfig b/arch/arm64/configs/anolis-debug_defconfig index b25051496a2951798ad7c34f84c07025898db589..ddfd680477e19b15b3145ad693ac05dcc9585e79 100644 --- a/arch/arm64/configs/anolis-debug_defconfig +++ b/arch/arm64/configs/anolis-debug_defconfig @@ -5948,6 +5948,7 @@ CONFIG_DEBUG_INFO=y # CONFIG_DEBUG_INFO_REDUCED is not set # CONFIG_DEBUG_INFO_SPLIT is not set CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_DEBUG_INFO_BTF=y # CONFIG_GDB_SCRIPTS is not set CONFIG_ENABLE_MUST_CHECK=y CONFIG_FRAME_WARN=2048 diff --git a/arch/arm64/configs/anolis_defconfig b/arch/arm64/configs/anolis_defconfig index a7480ccce811575aedc63e88884c99b5b040c646..f39c4522c6ba791979a3a376a7f4f948b20e76ec 100644 --- a/arch/arm64/configs/anolis_defconfig +++ b/arch/arm64/configs/anolis_defconfig @@ -5946,6 +5946,7 @@ CONFIG_DEBUG_INFO=y # CONFIG_DEBUG_INFO_REDUCED is not set # CONFIG_DEBUG_INFO_SPLIT is not set CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_DEBUG_INFO_BTF=y # CONFIG_GDB_SCRIPTS is not set CONFIG_ENABLE_MUST_CHECK=y CONFIG_FRAME_WARN=2048 diff --git a/arch/x86/configs/anolis-debug_defconfig b/arch/x86/configs/anolis-debug_defconfig index d24fff8e3c9d8bbf3cf68632c809d15f22d6317d..7c2c7911e9dba9a373410246f44ac18d524e525d 100644 --- a/arch/x86/configs/anolis-debug_defconfig +++ b/arch/x86/configs/anolis-debug_defconfig @@ -7504,6 +7504,7 @@ CONFIG_DEBUG_INFO=y # CONFIG_DEBUG_INFO_REDUCED is not set # CONFIG_DEBUG_INFO_SPLIT is not set # CONFIG_DEBUG_INFO_DWARF4 is not set +CONFIG_DEBUG_INFO_BTF=y # CONFIG_GDB_SCRIPTS is not set CONFIG_ENABLE_MUST_CHECK=y CONFIG_FRAME_WARN=2048 diff --git a/arch/x86/configs/anolis_defconfig b/arch/x86/configs/anolis_defconfig index 8d15f3e17e3e90e187b6f66a5d1427f5bda08189..3fa3155fd8f94f1b3e5997a54f1bf8eb403dd1df 100644 --- a/arch/x86/configs/anolis_defconfig +++ b/arch/x86/configs/anolis_defconfig @@ -7499,6 +7499,7 @@ CONFIG_DEBUG_INFO=y # CONFIG_DEBUG_INFO_REDUCED is not set # CONFIG_DEBUG_INFO_SPLIT is not set # CONFIG_DEBUG_INFO_DWARF4 is not set +CONFIG_DEBUG_INFO_BTF=y # CONFIG_GDB_SCRIPTS is not set CONFIG_ENABLE_MUST_CHECK=y CONFIG_FRAME_WARN=2048 diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 531d778c29ecc3dae7e9f4294effa35d6decfec4..135bb9f7889271f342926f942b2d1f356890d982 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -59,7 +59,6 @@ struct net { spinlock_t rules_mod_lock; u32 hash_mix; - atomic64_t cookie_gen; struct list_head list; /* list of network namespaces */ struct list_head exit_list; /* To linked to call pernet exit diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 919d9633a67ad2fab445d6175869ed7e39164b28..df1fd82ce4c2e65f9ce5691a613b4b7b09e9d4b8 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1467,8 +1467,8 @@ union bpf_attr { * If no cookie has been set yet, generate a new cookie. Once * generated, the socket cookie remains stable for the life of the * socket. This helper can be useful for monitoring per socket - * networking traffic statistics as it provides a unique socket - * identifier per namespace. + * networking traffic statistics as it provides a global socket + * identifier that can be assumed unique. * Return * A 8-byte long non-decreasing number on success, or 0 if the * socket field is missing inside *skb*. diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index df3dc3b80597b89eedcd71b45257a066927c3b7c..622e912a213b5895cd06f58ea595733add0b0273 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -27,3 +27,6 @@ obj-$(CONFIG_CGROUP_BPF) += cgroup.o ifeq ($(CONFIG_INET),y) obj-$(CONFIG_BPF_SYSCALL) += reuseport_array.o endif +ifeq ($(CONFIG_SYSFS),y) +obj-$(CONFIG_DEBUG_INFO_BTF) += sysfs_btf.o +endif diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c new file mode 100644 index 0000000000000000000000000000000000000000..2cc5d258e9b9489b39a865b6916cc0f0d13922ea --- /dev/null +++ b/kernel/bpf/sysfs_btf.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Provide kernel BTF information for introspection and use by eBPF tools. + */ +#include +#include +#include +#include +#include + +/* See scripts/link-vmlinux.sh, gen_btf() func for details */ +extern char __weak _binary__btf_vmlinux_bin_start[]; +extern char __weak _binary__btf_vmlinux_bin_end[]; + +static ssize_t +btf_vmlinux_read(struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t len) +{ + memcpy(buf, _binary__btf_vmlinux_bin_start + off, len); + return len; +} + +static struct bin_attribute bin_attr_btf_vmlinux __ro_after_init = { + .attr = { .name = "vmlinux", .mode = 0444, }, + .read = btf_vmlinux_read, +}; + +static struct kobject *btf_kobj; + +static int __init btf_vmlinux_init(void) +{ + bin_attr_btf_vmlinux.size = _binary__btf_vmlinux_bin_end - + _binary__btf_vmlinux_bin_start; + + if (!_binary__btf_vmlinux_bin_start || bin_attr_btf_vmlinux.size == 0) + return 0; + + btf_kobj = kobject_create_and_add("btf", kernel_kobj); + if (!btf_kobj) + return -ENOMEM; + + return sysfs_create_bin_file(btf_kobj, &bin_attr_btf_vmlinux); +} + +subsys_initcall(btf_vmlinux_init); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 157316d8f36f0f681646835ac27075f9cae71485..b8a3fa0bf4fe79b9da1e86c0c4a930504626c317 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -200,6 +200,16 @@ config DEBUG_INFO_DWARF4 But it significantly improves the success of resolving variables in gdb on optimized code. +config DEBUG_INFO_BTF + bool "Generate BTF typeinfo" + depends on DEBUG_INFO + depends on !DEBUG_INFO_SPLIT && !DEBUG_INFO_REDUCED + depends on !GCC_PLUGIN_RANDSTRUCT || COMPILE_TEST + help + Generate deduplicated BTF type information from DWARF debug info. + Turning this on expects presence of pahole tool, which will convert + DWARF type info into equivalent deduplicated BTF type info. + config GDB_SCRIPTS bool "Provide GDB scripts for kernel debugging" depends on DEBUG_INFO diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 3312a5849a974e372a49164abca8f7067a2ed7e7..c13ffbd33d8d6ad4f591571325aaee321df5b242 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -19,6 +19,7 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX]; static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); static DEFINE_MUTEX(sock_diag_table_mutex); static struct workqueue_struct *broadcast_wq; +static atomic64_t cookie_gen; u64 sock_gen_cookie(struct sock *sk) { @@ -27,7 +28,7 @@ u64 sock_gen_cookie(struct sock *sk) if (res) return res; - res = atomic64_inc_return(&sock_net(sk)->cookie_gen); + res = atomic64_inc_return(&cookie_gen); atomic64_cmpxchg(&sk->sk_cookie, 0, res); } } diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 3117da68d1f79571ffb16050f5125e1d429a26f5..6855273381c2f589dadf1ee9eeaa1bcc21408f2d 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -40,7 +40,7 @@ set -e info() { if [ "${quiet}" != "silent_" ]; then - printf " %-7s %s\n" ${1} ${2} + printf " %-7s %s\n" "${1}" "${2}" fi } @@ -79,13 +79,17 @@ modpost_link() } # Link of vmlinux -# ${1} - optional extra .o files -# ${2} - output file +# ${1} - output file +# ${2}, ${3}, ... - optional extra .o files vmlinux_link() { local lds="${objtree}/${KBUILD_LDS}" + local output=${1} local objects + # skip output file argument + shift + if [ "${SRCARCH}" != "um" ]; then objects="--whole-archive \ built-in.a \ @@ -93,9 +97,10 @@ vmlinux_link() --start-group \ ${KBUILD_VMLINUX_LIBS} \ --end-group \ - ${1}" + ${@}" - ${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} -o ${2} \ + ${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} \ + -o ${output} \ -T ${lds} ${objects} else objects="-Wl,--whole-archive \ @@ -104,9 +109,10 @@ vmlinux_link() -Wl,--start-group \ ${KBUILD_VMLINUX_LIBS} \ -Wl,--end-group \ - ${1}" + ${@}" - ${CC} ${CFLAGS_vmlinux} -o ${2} \ + ${CC} ${CFLAGS_vmlinux} \ + -o ${output} \ -Wl,-T,${lds} \ ${objects} \ -lutil -lrt -lpthread @@ -114,6 +120,40 @@ vmlinux_link() fi } +# generate .BTF typeinfo from DWARF debuginfo +# ${1} - vmlinux image +# ${2} - file to dump raw BTF data into +gen_btf() +{ + local pahole_ver + local bin_arch + + if ! [ -x "$(command -v ${PAHOLE})" ]; then + echo >&2 "BTF: ${1}: pahole (${PAHOLE}) is not available" + return 1 + fi + + pahole_ver=$(${PAHOLE} --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/') + if [ "${pahole_ver}" -lt "113" ]; then + echo >&2 "BTF: ${1}: pahole version $(${PAHOLE} --version) is too old, need at least v1.13" + return 1 + fi + + info "BTF" ${2} + vmlinux_link ${1} + LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${1} + + # dump .BTF section into raw binary file to link with final vmlinux + bin_arch=$(LANG=C ${OBJDUMP} -f ${1} | grep architecture | \ + cut -d, -f1 | cut -d' ' -f2) + bin_format=$(LANG=C ${OBJDUMP} -f ${1} | grep 'file format' | \ + awk '{print $4}') + ${OBJCOPY} --change-section-address .BTF=0 \ + --set-section-flags .BTF=alloc -O binary \ + --only-section=.BTF ${1} .btf.vmlinux.bin + ${OBJCOPY} -I binary -O ${bin_format} -B ${bin_arch} \ + --rename-section .data=.BTF .btf.vmlinux.bin ${2} +} # Create ${2} .o file with all symbols from the ${1} object file kallsyms() @@ -168,6 +208,7 @@ sorttable() # Delete output files in case of error cleanup() { + rm -f .btf.* rm -f .tmp_System.map rm -f .tmp_kallsyms* rm -f .tmp_vmlinux* @@ -240,6 +281,17 @@ ${MAKE} -f "${srctree}/scripts/Makefile.modpost" vmlinux.o # Generate weak linker script gen_weak_provide_hidden vmlinux.o +btf_vmlinux_bin_o="" +if [ -n "${CONFIG_DEBUG_INFO_BTF}" ]; then + if gen_btf .tmp_vmlinux.btf .btf.vmlinux.bin.o ; then + btf_vmlinux_bin_o=.btf.vmlinux.bin.o + else + echo >&2 "Failed to generate BTF for vmlinux" + echo >&2 "Try to disable CONFIG_DEBUG_INFO_BTF" + exit 1 + fi +fi + kallsymso="" kallsyms_vmlinux="" if [ -n "${CONFIG_KALLSYMS}" ]; then @@ -271,11 +323,11 @@ if [ -n "${CONFIG_KALLSYMS}" ]; then kallsyms_vmlinux=.tmp_vmlinux2 # step 1 - vmlinux_link "" .tmp_vmlinux1 + vmlinux_link .tmp_vmlinux1 ${btf_vmlinux_bin_o} kallsyms .tmp_vmlinux1 .tmp_kallsyms1.o # step 2 - vmlinux_link .tmp_kallsyms1.o .tmp_vmlinux2 + vmlinux_link .tmp_vmlinux2 .tmp_kallsyms1.o ${btf_vmlinux_bin_o} kallsyms .tmp_vmlinux2 .tmp_kallsyms2.o # step 3 @@ -286,14 +338,13 @@ if [ -n "${CONFIG_KALLSYMS}" ]; then kallsymso=.tmp_kallsyms3.o kallsyms_vmlinux=.tmp_vmlinux3 - vmlinux_link .tmp_kallsyms2.o .tmp_vmlinux3 - + vmlinux_link .tmp_vmlinux3 .tmp_kallsyms2.o ${btf_vmlinux_bin_o} kallsyms .tmp_vmlinux3 .tmp_kallsyms3.o fi fi info LD vmlinux -vmlinux_link "${kallsymso}" vmlinux +vmlinux_link vmlinux "${kallsymso}" "${btf_vmlinux_bin_o}" info SYSMAP System.map mksysmap vmlinux System.map diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 453a9e1e8549e219682379df5875efdcf001b411..e6802dd5d9497ebe8f51eaaf7f0a099c86b9649e 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -50,14 +50,14 @@ ifneq ($(EXTRA_LDFLAGS),) LDFLAGS += $(EXTRA_LDFLAGS) endif -LIBS = -lelf $(LIBBPF) +LIBS = -lelf -lz $(LIBBPF) INSTALL ?= install RM ?= rm -f FEATURE_USER = .bpftool -FEATURE_TESTS = libbfd disassembler-four-args reallocarray -FEATURE_DISPLAY = libbfd disassembler-four-args +FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib +FEATURE_DISPLAY = libbfd disassembler-four-args zlib check_feat := 1 NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall @@ -109,6 +109,8 @@ OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $< +$(OUTPUT)feature.o: | zdep + $(OUTPUT)bpftool: $(OBJS) $(LIBBPF) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ $(LIBS) @@ -147,6 +149,9 @@ doc-uninstall: FORCE: -.PHONY: all FORCE clean install uninstall +zdep: + @if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi + +.PHONY: all FORCE clean install uninstall zdep .PHONY: doc doc-clean doc-install doc-uninstall .DEFAULT_GOAL := all diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index d672d9086fff93a0b3660dc91c6288ac3e5a82e4..03bdc5b3ac49e11b9a2790b7c867d20178afc9d9 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -14,6 +14,7 @@ #include #include +#include #include "main.h" @@ -284,34 +285,32 @@ static void probe_jit_limit(void) } } -static char *get_kernel_config_option(FILE *fd, const char *option) +static bool read_next_kernel_config_option(gzFile file, char *buf, size_t n, + char **value) { - size_t line_n = 0, optlen = strlen(option); - char *res, *strval, *line = NULL; - ssize_t n; + char *sep; - rewind(fd); - while ((n = getline(&line, &line_n, fd)) > 0) { - if (strncmp(line, option, optlen)) + while (gzgets(file, buf, n)) { + if (strncmp(buf, "CONFIG_", 7)) continue; - /* Check we have at least '=', value, and '\n' */ - if (strlen(line) < optlen + 3) - continue; - if (*(line + optlen) != '=') + + sep = strchr(buf, '='); + if (!sep) continue; /* Trim ending '\n' */ - line[strlen(line) - 1] = '\0'; + buf[strlen(buf) - 1] = '\0'; + + /* Split on '=' and ensure that a value is present. */ + *sep = '\0'; + if (!sep[1]) + continue; - /* Copy and return config option value */ - strval = line + optlen + 1; - res = strdup(strval); - free(line); - return res; + *value = sep + 1; + return true; } - free(line); - return NULL; + return false; } static void probe_kernel_image_config(void) @@ -386,59 +385,61 @@ static void probe_kernel_image_config(void) /* test_bpf module for BPF tests */ "CONFIG_TEST_BPF", }; - char *value, *buf = NULL; + char *values[ARRAY_SIZE(options)] = { }; struct utsname utsn; char path[PATH_MAX]; - size_t i, n; - ssize_t ret; - FILE *fd; + gzFile file = NULL; + char buf[4096]; + char *value; + size_t i; - if (uname(&utsn)) - goto no_config; + if (!uname(&utsn)) { + snprintf(path, sizeof(path), "/boot/config-%s", utsn.release); - snprintf(path, sizeof(path), "/boot/config-%s", utsn.release); + /* gzopen also accepts uncompressed files. */ + file = gzopen(path, "r"); + } - fd = fopen(path, "r"); - if (!fd && errno == ENOENT) { - /* Some distributions put the config file at /proc/config, give - * it a try. - * Sometimes it is also at /proc/config.gz but we do not try - * this one for now, it would require linking against libz. + if (!file) { + /* Some distributions build with CONFIG_IKCONFIG=y and put the + * config file at /proc/config.gz. */ - fd = fopen("/proc/config", "r"); + file = gzopen("/proc/config.gz", "r"); } - if (!fd) { + if (!file) { p_info("skipping kernel config, can't open file: %s", strerror(errno)); - goto no_config; + goto end_parse; } /* Sanity checks */ - ret = getline(&buf, &n, fd); - ret = getline(&buf, &n, fd); - if (!buf || !ret) { + if (!gzgets(file, buf, sizeof(buf)) || + !gzgets(file, buf, sizeof(buf))) { p_info("skipping kernel config, can't read from file: %s", strerror(errno)); - free(buf); - goto no_config; + goto end_parse; } if (strcmp(buf, "# Automatically generated file; DO NOT EDIT.\n")) { p_info("skipping kernel config, can't find correct file"); - free(buf); - goto no_config; + goto end_parse; } - free(buf); - for (i = 0; i < ARRAY_SIZE(options); i++) { - value = get_kernel_config_option(fd, options[i]); - print_kernel_option(options[i], value); - free(value); + while (read_next_kernel_config_option(file, buf, sizeof(buf), &value)) { + for (i = 0; i < ARRAY_SIZE(options); i++) { + if (values[i] || strcmp(buf, options[i])) + continue; + + values[i] = strdup(value); + } } - fclose(fd); - return; -no_config: - for (i = 0; i < ARRAY_SIZE(options); i++) - print_kernel_option(options[i], NULL); +end_parse: + if (file) + gzclose(file); + + for (i = 0; i < ARRAY_SIZE(options); i++) { + print_kernel_option(options[i], values[i]); + free(values[i]); + } } static bool probe_bpf_syscall(const char *define_prefix) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d7217ff52ca0e84203d34ff3453309ac92101df3..d71e7d7006822ff0617798407f74ff7f7f927230 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1467,8 +1467,8 @@ union bpf_attr { * If no cookie has been set yet, generate a new cookie. Once * generated, the socket cookie remains stable for the life of the * socket. This helper can be useful for monitoring per socket - * networking traffic statistics as it provides a unique socket - * identifier per namespace. + * networking traffic statistics as it provides a global socket + * identifier that can be assumed unique. * Return * A 8-byte long non-decreasing number on success, or 0 if the * socket field is missing inside *skb*. diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7e084f1ac35b051150613e56dc85f9f42cdc679f..1e3387c96aade0e4b643f0059674b596cdfcb18e 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2857,15 +2857,61 @@ static int bpf_core_reloc_insn(struct bpf_program *prog, int insn_off, return 0; } +static struct btf *btf_load_raw(const char *path) +{ + struct btf *btf; + size_t read_cnt; + struct stat st; + void *data; + FILE *f; + + if (stat(path, &st)) + return ERR_PTR(-errno); + + data = malloc(st.st_size); + if (!data) + return ERR_PTR(-ENOMEM); + + f = fopen(path, "rb"); + if (!f) { + btf = ERR_PTR(-errno); + goto cleanup; + } + + read_cnt = fread(data, 1, st.st_size, f); + fclose(f); + if (read_cnt < st.st_size) { + btf = ERR_PTR(-EBADF); + goto cleanup; + } + + btf = btf__new(data, read_cnt); + +cleanup: + free(data); + return btf; +} + /* * Probe few well-known locations for vmlinux kernel image and try to load BTF * data out of it to use for target BTF. */ static struct btf *bpf_core_find_kernel_btf(void) { - const char *locations[] = { - "/lib/modules/%1$s/vmlinux-%1$s", - "/usr/lib/modules/%1$s/kernel/vmlinux", + struct { + const char *path_fmt; + bool raw_btf; + } locations[] = { + /* try canonical vmlinux BTF through sysfs first */ + { "/sys/kernel/btf/vmlinux", true /* raw BTF */ }, + /* fall back to trying to find vmlinux ELF on disk otherwise */ + { "/boot/vmlinux-%1$s" }, + { "/lib/modules/%1$s/vmlinux-%1$s" }, + { "/lib/modules/%1$s/build/vmlinux" }, + { "/usr/lib/modules/%1$s/kernel/vmlinux" }, + { "/usr/lib/debug/boot/vmlinux-%1$s" }, + { "/usr/lib/debug/boot/vmlinux-%1$s.debug" }, + { "/usr/lib/debug/lib/modules/%1$s/vmlinux" }, }; char path[PATH_MAX + 1]; struct utsname buf; @@ -2875,14 +2921,18 @@ static struct btf *bpf_core_find_kernel_btf(void) uname(&buf); for (i = 0; i < ARRAY_SIZE(locations); i++) { - snprintf(path, PATH_MAX, locations[i], buf.release); + snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release); if (access(path, R_OK)) continue; - btf = btf__parse_elf(path, NULL); - pr_debug("kernel BTF load from '%s': %ld\n", - path, PTR_ERR(btf)); + if (locations[i].raw_btf) + btf = btf_load_raw(path); + else + btf = btf__parse_elf(path, NULL); + + pr_debug("loading kernel BTF '%s': %ld\n", + path, IS_ERR(btf) ? PTR_ERR(btf) : 0); if (IS_ERR(btf)) continue;