From a105688e8538a1a113205e48dc9bf521a40ae055 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 29 Oct 2019 15:30:27 +0100 Subject: [PATCH 01/21] selftests/bpf: Test narrow load from bpf_sysctl.write ANBZ: #5530 commit 9ffccb76062ab882e45bbfb9d370e366c27fa04b upstream. There are tests for full and narrows loads from bpf_sysctl.file_pos, but for bpf_sysctl.write only full load is tested. Add the missing test. Suggested-by: Andrey Ignatov Signed-off-by: Ilya Leoshkevich Signed-off-by: Daniel Borkmann Acked-by: Andrey Ignatov Link: https://lore.kernel.org/bpf/20191029143027.28681-1-iii@linux.ibm.com Signed-off-by: Yuanhe Shu --- tools/testing/selftests/bpf/test_sysctl.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c index 7c6e5b173f33..40bd93a6e7ae 100644 --- a/tools/testing/selftests/bpf/test_sysctl.c +++ b/tools/testing/selftests/bpf/test_sysctl.c @@ -120,6 +120,29 @@ static struct sysctl_test tests[] = { .newval = "(none)", /* same as default, should fail anyway */ .result = OP_EPERM, }, + { + .descr = "ctx:write sysctl:write read ok narrow", + .insns = { + /* u64 w = (u16)write & 1; */ +#if __BYTE_ORDER == __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sysctl, write)), +#else + BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sysctl, write) + 2), +#endif + BPF_ALU64_IMM(BPF_AND, BPF_REG_7, 1), + /* return 1 - w; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/domainname", + .open_flags = O_WRONLY, + .newval = "(none)", /* same as default, should fail anyway */ + .result = OP_EPERM, + }, { .descr = "ctx:write sysctl:read write reject", .insns = { -- Gitee From b096251e4cf14be0e5879bf7e2deab24b0e6930c Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 26 Oct 2021 03:08:26 +0200 Subject: [PATCH 02/21] libbpf: Fix endianness detection in BPF_CORE_READ_BITFIELD_PROBED() ANBZ: #5530 commit 45f2bebc8079788f62f22d9e8b2819afb1789d7b upstream. __BYTE_ORDER is supposed to be defined by a libc, and __BYTE_ORDER__ - by a compiler. bpf_core_read.h checks __BYTE_ORDER == __LITTLE_ENDIAN, which is true if neither are defined, leading to incorrect behavior on big-endian hosts if libc headers are not included, which is often the case. Fixes: ee26dade0e3b ("libbpf: Add support for relocatable bitfields") Signed-off-by: Ilya Leoshkevich Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211026010831.748682-2-iii@linux.ibm.com Signed-off-by: Yuanhe Shu --- tools/lib/bpf/bpf_core_read.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index fae33e0314bb..1de14c9a302b 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -22,7 +22,7 @@ enum bpf_field_info_kind { #define __CORE_RELO(src, field, info) \ __builtin_preserve_field_info((src)->field, BPF_FIELD_##info) -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \ bpf_probe_read((void *)dst, \ __CORE_RELO(src, fld, BYTE_SIZE), \ -- Gitee From 14a3ab1459a5db66cd934f54b013989028e12b9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Sat, 2 Nov 2019 12:09:38 +0100 Subject: [PATCH 03/21] libbpf: Store map pin path and status in struct bpf_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #5530 commit 4580b25fcee5347327aaffcec31c615ec28a889a upstream. Support storing and setting a pin path in struct bpf_map, which can be used for automatic pinning. Also store the pin status so we can avoid attempts to re-pin a map that has already been pinned (or reused from a previous pinning). The behaviour of bpf_object__{un,}pin_maps() is changed so that if it is called with a NULL path argument (which was previously illegal), it will (un)pin only those maps that have a pin_path set. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/157269297876.394725.14782206533681896279.stgit@toke.dk Signed-off-by: Yuanhe Shu --- tools/lib/bpf/libbpf.c | 164 +++++++++++++++++++++++++++++---------- tools/lib/bpf/libbpf.h | 8 ++ tools/lib/bpf/libbpf.map | 3 + 3 files changed, 134 insertions(+), 41 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 326d85f7956e..503edcf37485 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -229,6 +229,8 @@ struct bpf_map { void *priv; bpf_map_clear_priv_t clear_priv; enum libbpf_map_type libbpf_type; + char *pin_path; + bool pinned; }; struct bpf_secdata { @@ -4195,47 +4197,119 @@ int bpf_map__pin(struct bpf_map *map, const char *path) char *cp, errmsg[STRERR_BUFSIZE]; int err; - err = check_path(path); - if (err) - return err; - if (map == NULL) { pr_warn("invalid map pointer\n"); return -EINVAL; } - if (bpf_obj_pin(map->fd, path)) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("failed to pin map: %s\n", cp); - return -errno; + if (map->pin_path) { + if (path && strcmp(path, map->pin_path)) { + pr_warn("map '%s' already has pin path '%s' different from '%s'\n", + bpf_map__name(map), map->pin_path, path); + return -EINVAL; + } else if (map->pinned) { + pr_debug("map '%s' already pinned at '%s'; not re-pinning\n", + bpf_map__name(map), map->pin_path); + return 0; + } + } else { + if (!path) { + pr_warn("missing a path to pin map '%s' at\n", + bpf_map__name(map)); + return -EINVAL; + } else if (map->pinned) { + pr_warn("map '%s' already pinned\n", bpf_map__name(map)); + return -EEXIST; + } + + map->pin_path = strdup(path); + if (!map->pin_path) { + err = -errno; + goto out_err; + } } - pr_debug("pinned map '%s'\n", path); + err = check_path(map->pin_path); + if (err) + return err; + + if (bpf_obj_pin(map->fd, map->pin_path)) { + err = -errno; + goto out_err; + } + + map->pinned = true; + pr_debug("pinned map '%s'\n", map->pin_path); return 0; + +out_err: + cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); + pr_warn("failed to pin map: %s\n", cp); + return err; } int bpf_map__unpin(struct bpf_map *map, const char *path) { int err; - err = check_path(path); - if (err) - return err; - if (map == NULL) { pr_warn("invalid map pointer\n"); return -EINVAL; } + if (map->pin_path) { + if (path && strcmp(path, map->pin_path)) { + pr_warn("map '%s' already has pin path '%s' different from '%s'\n", + bpf_map__name(map), map->pin_path, path); + return -EINVAL; + } + path = map->pin_path; + } else if (!path) { + pr_warn("no path to unpin map '%s' from\n", + bpf_map__name(map)); + return -EINVAL; + } + + err = check_path(path); + if (err) + return err; + err = unlink(path); if (err != 0) return -errno; - pr_debug("unpinned map '%s'\n", path); + + map->pinned = false; + pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path); return 0; } +int bpf_map__set_pin_path(struct bpf_map *map, const char *path) +{ + char *new = NULL; + + if (path) { + new = strdup(path); + if (!new) + return -errno; + } + + free(map->pin_path); + map->pin_path = new; + return 0; +} + +const char *bpf_map__get_pin_path(const struct bpf_map *map) +{ + return map->pin_path; +} + +bool bpf_map__is_pinned(const struct bpf_map *map) +{ + return map->pinned; +} + int bpf_object__pin_maps(struct bpf_object *obj, const char *path) { struct bpf_map *map; @@ -4254,20 +4328,27 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) return err; bpf_object__for_each_map(map, obj) { + char *pin_path = NULL; char buf[PATH_MAX]; - int len; - len = snprintf(buf, PATH_MAX, "%s/%s", path, - bpf_map__name(map)); - if (len < 0) { - err = -EINVAL; - goto err_unpin_maps; - } else if (len >= PATH_MAX) { - err = -ENAMETOOLONG; - goto err_unpin_maps; + if (path) { + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + bpf_map__name(map)); + if (len < 0) { + err = -EINVAL; + goto err_unpin_maps; + } else if (len >= PATH_MAX) { + err = -ENAMETOOLONG; + goto err_unpin_maps; + } + pin_path = buf; + } else if (!map->pin_path) { + continue; } - err = bpf_map__pin(map, buf); + err = bpf_map__pin(map, pin_path); if (err) goto err_unpin_maps; } @@ -4276,17 +4357,10 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) err_unpin_maps: while ((map = bpf_map__prev(map, obj))) { - char buf[PATH_MAX]; - int len; - - len = snprintf(buf, PATH_MAX, "%s/%s", path, - bpf_map__name(map)); - if (len < 0) - continue; - else if (len >= PATH_MAX) + if (!map->pin_path) continue; - bpf_map__unpin(map, buf); + bpf_map__unpin(map, NULL); } return err; @@ -4301,17 +4375,24 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) return -ENOENT; bpf_object__for_each_map(map, obj) { + char *pin_path = NULL; char buf[PATH_MAX]; - int len; - len = snprintf(buf, PATH_MAX, "%s/%s", path, - bpf_map__name(map)); - if (len < 0) - return -EINVAL; - else if (len >= PATH_MAX) - return -ENAMETOOLONG; + if (path) { + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + bpf_map__name(map)); + if (len < 0) + return -EINVAL; + else if (len >= PATH_MAX) + return -ENAMETOOLONG; + pin_path = buf; + } else if (!map->pin_path) { + continue; + } - err = bpf_map__unpin(map, buf); + err = bpf_map__unpin(map, pin_path); if (err) return err; } @@ -4436,6 +4517,7 @@ void bpf_object__close(struct bpf_object *obj) for (i = 0; i < obj->nr_maps; i++) { zfree(&obj->maps[i].name); + zfree(&obj->maps[i].pin_path); if (obj->maps[i].clear_priv) obj->maps[i].clear_priv(&obj->maps[i], obj->maps[i].priv); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 2b126ee5e173..e71773a6bfdf 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -124,6 +124,11 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name, __u32 *size); int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, __u32 *off); + +/* pin_maps and unpin_maps can both be called with a NULL path, in which case + * they will use the pin_path attribute of each map (and ignore all maps that + * don't have a pin_path set). + */ LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path); LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj, const char *path); @@ -387,6 +392,9 @@ LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries); LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map); LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map); LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); +LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path); +LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map); +LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map); LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 7627c68c572a..1210b079e971 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -192,6 +192,9 @@ LIBBPF_0.0.5 { LIBBPF_0.0.6 { global: + bpf_map__get_pin_path; + bpf_map__is_pinned; + bpf_map__set_pin_path; bpf_object__open_file; bpf_object__open_mem; bpf_program__get_expected_attach_type; -- Gitee From 4724cef551756408605d25fac62d4141c5e699fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Sat, 2 Nov 2019 12:09:39 +0100 Subject: [PATCH 04/21] libbpf: Move directory creation into _pin() functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #5530 commit 196f8487f51ee6e2a46f51e10ac3f4ca67574ba9 upstream. The existing pin_*() functions all try to create the parent directory before pinning. Move this check into the per-object _pin() functions instead. This ensures consistent behaviour when auto-pinning is added (which doesn't go through the top-level pin_maps() function), at the cost of a few more calls to mkdir(). Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/157269297985.394725.5882630952992598610.stgit@toke.dk Signed-off-by: Yuanhe Shu --- tools/lib/bpf/libbpf.c | 61 +++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 503edcf37485..1248f292e02a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -3975,6 +3975,28 @@ int bpf_object__load(struct bpf_object *obj) return bpf_object__load_xattr(&attr); } +static int make_parent_dir(const char *path) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + char *dname, *dir; + int err = 0; + + dname = strdup(path); + if (dname == NULL) + return -ENOMEM; + + dir = dirname(dname); + if (mkdir(dir, 0700) && errno != EEXIST) + err = -errno; + + free(dname); + if (err) { + cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); + pr_warn("failed to mkdir %s: %s\n", path, cp); + } + return err; +} + static int check_path(const char *path) { char *cp, errmsg[STRERR_BUFSIZE]; @@ -4011,6 +4033,10 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path, char *cp, errmsg[STRERR_BUFSIZE]; int err; + err = make_parent_dir(path); + if (err) + return err; + err = check_path(path); if (err) return err; @@ -4064,25 +4090,14 @@ int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, return 0; } -static int make_dir(const char *path) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - int err = 0; - - if (mkdir(path, 0700) && errno != EEXIST) - err = -errno; - - if (err) { - cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); - pr_warn("failed to mkdir %s: %s\n", path, cp); - } - return err; -} - int bpf_program__pin(struct bpf_program *prog, const char *path) { int i, err; + err = make_parent_dir(path); + if (err) + return err; + err = check_path(path); if (err) return err; @@ -4103,10 +4118,6 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) return bpf_program__pin_instance(prog, path, 0); } - err = make_dir(path); - if (err) - return err; - for (i = 0; i < prog->instances.nr; i++) { char buf[PATH_MAX]; int len; @@ -4229,6 +4240,10 @@ int bpf_map__pin(struct bpf_map *map, const char *path) } } + err = make_parent_dir(map->pin_path); + if (err) + return err; + err = check_path(map->pin_path); if (err) return err; @@ -4323,10 +4338,6 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) return -ENOENT; } - err = make_dir(path); - if (err) - return err; - bpf_object__for_each_map(map, obj) { char *pin_path = NULL; char buf[PATH_MAX]; @@ -4413,10 +4424,6 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path) return -ENOENT; } - err = make_dir(path); - if (err) - return err; - bpf_object__for_each_program(prog, obj) { char buf[PATH_MAX]; int len; -- Gitee From 3abe9ef57080acb0ed92a103394863cd23ec562d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Sat, 2 Nov 2019 12:09:41 +0100 Subject: [PATCH 05/21] libbpf: Add auto-pinning of maps when loading BPF objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #5530 commit 57a00f41644f20b11c12a27061d814655f633544 upstream. This adds support to libbpf for setting map pinning information as part of the BTF map declaration, to get automatic map pinning (and reuse) on load. The pinning type currently only supports a single PIN_BY_NAME mode, where each map will be pinned by its name in a path that can be overridden, but defaults to /sys/fs/bpf. Since auto-pinning only does something if any maps actually have a 'pinning' BTF attribute set, we default the new option to enabled, on the assumption that seamless pinning is what most callers want. When a map has a pin_path set at load time, libbpf will compare the map pinned at that location (if any), and if the attributes match, will re-use that map instead of creating a new one. If no existing map is found, the newly created map will instead be pinned at the location. Programs wanting to customise the pinning can override the pinning paths using bpf_map__set_pin_path() before calling bpf_object__load() (including setting it to NULL to disable pinning of a particular map). Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/157269298092.394725.3966306029218559681.stgit@toke.dk Signed-off-by: Yuanhe Shu --- tools/lib/bpf/bpf_helpers.h | 6 ++ tools/lib/bpf/libbpf.c | 146 ++++++++++++++++++++++++++++++++++-- tools/lib/bpf/libbpf.h | 13 +++- 3 files changed, 156 insertions(+), 9 deletions(-) diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 2babe121c16f..fa1fd80b8526 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -236,6 +236,12 @@ struct bpf_map_def { unsigned int map_flags; }; +enum libbpf_pin_type { + LIBBPF_PIN_NONE, + /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ + LIBBPF_PIN_BY_NAME, +}; + static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) = (void *) BPF_FUNC_skb_load_bytes; static int (*bpf_skb_load_bytes_relative)(void *ctx, int off, void *to, int len, __u32 start_header) = diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 1248f292e02a..b1cd01d2aff5 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1095,10 +1095,32 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf, return true; } +static int build_map_pin_path(struct bpf_map *map, const char *path) +{ + char buf[PATH_MAX]; + int err, len; + + if (!path) + path = "/sys/fs/bpf"; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map)); + if (len < 0) + return -EINVAL; + else if (len >= PATH_MAX) + return -ENAMETOOLONG; + + err = bpf_map__set_pin_path(map, buf); + if (err) + return err; + + return 0; +} + static int bpf_object__init_user_btf_map(struct bpf_object *obj, const struct btf_type *sec, int var_idx, int sec_idx, - const Elf_Data *data, bool strict) + const Elf_Data *data, bool strict, + const char *pin_root_path) { const struct btf_type *var, *def, *t; const struct btf_var_secinfo *vi; @@ -1273,6 +1295,30 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, } map->def.value_size = sz; map->btf_value_type_id = t->type; + } else if (strcmp(name, "pinning") == 0) { + __u32 val; + int err; + + if (!get_map_field_int(map_name, obj->btf, def, m, + &val)) + return -EINVAL; + pr_debug("map '%s': found pinning = %u.\n", + map_name, val); + + if (val != LIBBPF_PIN_NONE && + val != LIBBPF_PIN_BY_NAME) { + pr_warn("map '%s': invalid pinning value %u.\n", + map_name, val); + return -EINVAL; + } + if (val == LIBBPF_PIN_BY_NAME) { + err = build_map_pin_path(map, pin_root_path); + if (err) { + pr_warn("map '%s': couldn't build pin path.\n", + map_name); + return err; + } + } } else { if (strict) { pr_warn("map '%s': unknown field '%s'.\n", @@ -1292,7 +1338,8 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, return 0; } -static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict) +static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, + const char *pin_root_path) { const struct btf_type *sec = NULL; int nr_types, i, vlen, err; @@ -1334,7 +1381,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict) for (i = 0; i < vlen; i++) { err = bpf_object__init_user_btf_map(obj, sec, i, obj->efile.btf_maps_shndx, - data, strict); + data, strict, pin_root_path); if (err) return err; } @@ -1342,7 +1389,8 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict) return 0; } -static int bpf_object__init_maps(struct bpf_object *obj, bool relaxed_maps) +static int bpf_object__init_maps(struct bpf_object *obj, bool relaxed_maps, + const char *pin_root_path) { bool strict = !relaxed_maps; int err; @@ -1351,7 +1399,7 @@ static int bpf_object__init_maps(struct bpf_object *obj, bool relaxed_maps) if (err) return err; - err = bpf_object__init_user_btf_maps(obj, strict); + err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path); if (err) return err; @@ -1549,7 +1597,8 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) return 0; } -static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps) +static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, + const char *pin_root_path) { Elf *elf = obj->efile.elf; GElf_Ehdr *ep = &obj->efile.ehdr; @@ -1684,7 +1733,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps) } err = bpf_object__init_btf(obj, btf_data, btf_ext_data); if (!err) - err = bpf_object__init_maps(obj, relaxed_maps); + err = bpf_object__init_maps(obj, relaxed_maps, pin_root_path); if (!err) err = bpf_object__sanitize_and_load_btf(obj); if (!err) @@ -2161,6 +2210,66 @@ bpf_object__probe_caps(struct bpf_object *obj) return 0; } +static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) +{ + struct bpf_map_info map_info = {}; + char msg[STRERR_BUFSIZE]; + __u32 map_info_len; + + map_info_len = sizeof(map_info); + + if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len)) { + pr_warn("failed to get map info for map FD %d: %s\n", + map_fd, libbpf_strerror_r(errno, msg, sizeof(msg))); + return false; + } + + return (map_info.type == map->def.type && + map_info.key_size == map->def.key_size && + map_info.value_size == map->def.value_size && + map_info.max_entries == map->def.max_entries && + map_info.map_flags == map->def.map_flags); +} + +static int +bpf_object__reuse_map(struct bpf_map *map) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + int err, pin_fd; + + pin_fd = bpf_obj_get(map->pin_path); + if (pin_fd < 0) { + err = -errno; + if (err == -ENOENT) { + pr_debug("found no pinned map to reuse at '%s'\n", + map->pin_path); + return 0; + } + + cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); + pr_warn("couldn't retrieve pinned map '%s': %s\n", + map->pin_path, cp); + return err; + } + + if (!map_is_reuse_compat(map, pin_fd)) { + pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n", + map->pin_path); + close(pin_fd); + return -EINVAL; + } + + err = bpf_map__reuse_fd(map, pin_fd); + if (err) { + close(pin_fd); + return err; + } + map->pinned = true; + pr_debug("reused pinned map at '%s'\n", map->pin_path); + + return 0; +} + static int bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) { @@ -2203,6 +2312,15 @@ bpf_object__create_maps(struct bpf_object *obj) char *cp, errmsg[STRERR_BUFSIZE]; int *pfd = &map->fd; + if (map->pin_path) { + err = bpf_object__reuse_map(map); + if (err) { + pr_warn("error reusing pinned map %s\n", + map->name); + return err; + } + } + if (map->fd >= 0) { pr_debug("skip map create (preset) %s: fd=%d\n", map->name, map->fd); @@ -2281,6 +2399,15 @@ bpf_object__create_maps(struct bpf_object *obj) } } + if (map->pin_path && !map->pinned) { + err = bpf_map__pin(map, NULL); + if (err) { + pr_warn("failed to auto-pin map name '%s' at '%s'\n", + map->name, map->pin_path); + return err; + } + } + pr_debug("created map %s: fd=%d\n", map->name, *pfd); } @@ -3782,6 +3909,7 @@ static struct bpf_object * __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, struct bpf_object_open_opts *opts) { + const char *pin_root_path; struct bpf_program *prog; struct bpf_object *obj; const char *obj_name; @@ -3816,11 +3944,13 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, obj->relaxed_core_relocs = OPTS_GET(opts, relaxed_core_relocs, false); relaxed_maps = OPTS_GET(opts, relaxed_maps, false); + pin_root_path = OPTS_GET(opts, pin_root_path, NULL); CHECK_ERR(bpf_object__elf_init(obj), err, out); CHECK_ERR(bpf_object__check_endianness(obj), err, out); CHECK_ERR(bpf_object__probe_caps(obj), err, out); - CHECK_ERR(bpf_object__elf_collect(obj, relaxed_maps), err, out); + CHECK_ERR(bpf_object__elf_collect(obj, relaxed_maps, pin_root_path), + err, out); CHECK_ERR(bpf_object__collect_reloc(obj), err, out); bpf_object__elf_finish(obj); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index e71773a6bfdf..6ddc0419337b 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -103,8 +103,13 @@ struct bpf_object_open_opts { bool relaxed_maps; /* process CO-RE relocations non-strictly, allowing them to fail */ bool relaxed_core_relocs; + /* maps that set the 'pinning' attribute in their definition will have + * their pin_path attribute set to a file in this directory, and be + * auto-pinned to that path on load; defaults to "/sys/fs/bpf". + */ + const char *pin_root_path; }; -#define bpf_object_open_opts__last_field relaxed_core_relocs +#define bpf_object_open_opts__last_field pin_root_path LIBBPF_API struct bpf_object *bpf_object__open(const char *path); LIBBPF_API struct bpf_object * @@ -125,6 +130,12 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name, int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, __u32 *off); +enum libbpf_pin_type { + LIBBPF_PIN_NONE, + /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ + LIBBPF_PIN_BY_NAME, +}; + /* pin_maps and unpin_maps can both be called with a NULL path, in which case * they will use the pin_path attribute of each map (and ignore all maps that * don't have a pin_path set). -- Gitee From d04b88dcf0aa2d08c3372bc3f606ee3f24926082 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Sat, 2 Nov 2019 12:09:42 +0100 Subject: [PATCH 06/21] selftests: Add tests for automatic map pinning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #5530 commit 2f4a32cc83a584ac3669d44fa2aa1d7f115d44c1 upstream. This adds a new BPF selftest to exercise the new automatic map pinning code. [backport note] Introduce definition of CHECK_FAIL from (d38835b75f67 "selftests/bpf: test_progs: remove global fail/success counts"). Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/157269298209.394725.15420085139296213182.stgit@toke.dk Signed-off-by: Yuanhe Shu --- .../selftests/bpf/progs/test_pinning.c | 31 +++ .../bpf/progs/test_pinning_invalid.c | 16 ++ tools/testing/selftests/bpf/test_progs.c | 217 ++++++++++++++++++ 3 files changed, 264 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/test_pinning.c create mode 100644 tools/testing/selftests/bpf/progs/test_pinning_invalid.c diff --git a/tools/testing/selftests/bpf/progs/test_pinning.c b/tools/testing/selftests/bpf/progs/test_pinning.c new file mode 100644 index 000000000000..f69a4a50d056 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_pinning.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include "bpf_helpers.h" + +int _version SEC("version") = 1; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); + __uint(pinning, LIBBPF_PIN_BY_NAME); +} pinmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} nopinmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); + __uint(pinning, LIBBPF_PIN_NONE); +} nopinmap2 SEC(".maps"); + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_pinning_invalid.c b/tools/testing/selftests/bpf/progs/test_pinning_invalid.c new file mode 100644 index 000000000000..51b38abe7ba1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_pinning_invalid.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include "bpf_helpers.h" + +int _version SEC("version") = 1; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); + __uint(pinning, 2); /* invalid */ +} nopinmap3 SEC(".maps"); + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 716dc0a3add1..f153466eab8c 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -35,6 +35,7 @@ typedef __u16 __sum16; #include #include #include +#include #include #include #include @@ -95,6 +96,17 @@ static struct { .tcp.doff = 5, }; +#define CHECK_FAIL(condition) ({ \ + int __ret = !!(condition); \ + int __save_errno = errno; \ + if (__ret) { \ + error_cnt++; \ + fprintf(stdout, "%s:FAIL:%d\n", __func__, __LINE__); \ + } \ + errno = __save_errno; \ + __ret; \ +}) + #define _CHECK(condition, tag, duration, format...) ({ \ int __ret = !!(condition); \ if (__ret) { \ @@ -3973,6 +3985,210 @@ static void test_kfree_skb(void) bpf_object__close(obj2); } +static __u32 get_map_id(struct bpf_object *obj, const char *name) +{ + struct bpf_map_info map_info = {}; + __u32 map_info_len, duration = 0; + struct bpf_map *map; + int err; + + map_info_len = sizeof(map_info); + + map = bpf_object__find_map_by_name(obj, name); + if (CHECK(!map, "find map", "NULL map")) + return 0; + + err = bpf_obj_get_info_by_fd(bpf_map__fd(map), + &map_info, &map_info_len); + CHECK(err, "get map info", "err %d errno %d", err, errno); + return map_info.id; +} + +static void test_pinning(void) +{ + const char *file_invalid = "./test_pinning_invalid.o"; + const char *custpinpath = "/sys/fs/bpf/custom/pinmap"; + const char *nopinpath = "/sys/fs/bpf/nopinmap"; + const char *nopinpath2 = "/sys/fs/bpf/nopinmap2"; + const char *custpath = "/sys/fs/bpf/custom"; + const char *pinpath = "/sys/fs/bpf/pinmap"; + const char *file = "./test_pinning.o"; + __u32 map_id, map_id2, duration = 0; + struct stat statbuf = {}; + struct bpf_object *obj; + struct bpf_map *map; + int err; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, + .pin_root_path = custpath, + ); + + /* check that opening fails with invalid pinning value in map def */ + obj = bpf_object__open_file(file_invalid, NULL); + err = libbpf_get_error(obj); + if (CHECK(err != -EINVAL, "invalid open", "err %d errno %d\n", err, errno)) { + obj = NULL; + goto out; + } + + /* open the valid object file */ + obj = bpf_object__open_file(file, NULL); + err = libbpf_get_error(obj); + if (CHECK(err, "default open", "err %d errno %d\n", err, errno)) { + obj = NULL; + goto out; + } + + err = bpf_object__load(obj); + if (CHECK(err, "default load", "err %d errno %d\n", err, errno)) + goto out; + + /* check that pinmap was pinned */ + err = stat(pinpath, &statbuf); + if (CHECK(err, "stat pinpath", "err %d errno %d\n", err, errno)) + goto out; + + /* check that nopinmap was *not* pinned */ + err = stat(nopinpath, &statbuf); + if (CHECK(!err || errno != ENOENT, "stat nopinpath", + "err %d errno %d\n", err, errno)) + goto out; + + /* check that nopinmap2 was *not* pinned */ + err = stat(nopinpath2, &statbuf); + if (CHECK(!err || errno != ENOENT, "stat nopinpath2", + "err %d errno %d\n", err, errno)) + goto out; + + map_id = get_map_id(obj, "pinmap"); + if (!map_id) + goto out; + + bpf_object__close(obj); + + obj = bpf_object__open_file(file, NULL); + if (CHECK_FAIL(libbpf_get_error(obj))) { + obj = NULL; + goto out; + } + + err = bpf_object__load(obj); + if (CHECK(err, "default load", "err %d errno %d\n", err, errno)) + goto out; + + /* check that same map ID was reused for second load */ + map_id2 = get_map_id(obj, "pinmap"); + if (CHECK(map_id != map_id2, "check reuse", + "err %d errno %d id %d id2 %d\n", err, errno, map_id, map_id2)) + goto out; + + /* should be no-op to re-pin same map */ + map = bpf_object__find_map_by_name(obj, "pinmap"); + if (CHECK(!map, "find map", "NULL map")) + goto out; + + err = bpf_map__pin(map, NULL); + if (CHECK(err, "re-pin map", "err %d errno %d\n", err, errno)) + goto out; + + /* but error to pin at different location */ + err = bpf_map__pin(map, "/sys/fs/bpf/other"); + if (CHECK(!err, "pin map different", "err %d errno %d\n", err, errno)) + goto out; + + /* unpin maps with a pin_path set */ + err = bpf_object__unpin_maps(obj, NULL); + if (CHECK(err, "unpin maps", "err %d errno %d\n", err, errno)) + goto out; + + /* and re-pin them... */ + err = bpf_object__pin_maps(obj, NULL); + if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno)) + goto out; + + /* set pinning path of other map and re-pin all */ + map = bpf_object__find_map_by_name(obj, "nopinmap"); + if (CHECK(!map, "find map", "NULL map")) + goto out; + + err = bpf_map__set_pin_path(map, custpinpath); + if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno)) + goto out; + + /* should only pin the one unpinned map */ + err = bpf_object__pin_maps(obj, NULL); + if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno)) + goto out; + + /* check that nopinmap was pinned at the custom path */ + err = stat(custpinpath, &statbuf); + if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno)) + goto out; + + /* remove the custom pin path to re-test it with auto-pinning below */ + err = unlink(custpinpath); + if (CHECK(err, "unlink custpinpath", "err %d errno %d\n", err, errno)) + goto out; + + err = rmdir(custpath); + if (CHECK(err, "rmdir custpindir", "err %d errno %d\n", err, errno)) + goto out; + + bpf_object__close(obj); + + /* open the valid object file again */ + obj = bpf_object__open_file(file, NULL); + err = libbpf_get_error(obj); + if (CHECK(err, "default open", "err %d errno %d\n", err, errno)) { + obj = NULL; + goto out; + } + + /* swap pin paths of the two maps */ + bpf_object__for_each_map(map, obj) { + if (!strcmp(bpf_map__name(map), "nopinmap")) + err = bpf_map__set_pin_path(map, pinpath); + else if (!strcmp(bpf_map__name(map), "pinmap")) + err = bpf_map__set_pin_path(map, NULL); + else + continue; + + if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno)) + goto out; + } + + /* should fail because of map parameter mismatch */ + err = bpf_object__load(obj); + if (CHECK(err != -EINVAL, "param mismatch load", "err %d errno %d\n", err, errno)) + goto out; + + bpf_object__close(obj); + + /* test auto-pinning at custom path with open opt */ + obj = bpf_object__open_file(file, &opts); + if (CHECK_FAIL(libbpf_get_error(obj))) { + obj = NULL; + goto out; + } + + err = bpf_object__load(obj); + if (CHECK(err, "custom load", "err %d errno %d\n", err, errno)) + goto out; + + /* check that pinmap was pinned at the custom path */ + err = stat(custpinpath, &statbuf); + if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno)) + goto out; + +out: + unlink(pinpath); + unlink(nopinpath); + unlink(nopinpath2); + unlink(custpinpath); + rmdir(custpath); + if (obj) + bpf_object__close(obj); +} + int main(int ac, char **av) { srand(time(NULL)); @@ -4017,6 +4233,7 @@ int main(int ac, char **av) test_perf_buffer(); test_core_reloc(); test_kfree_skb(); + test_pinning(); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; -- Gitee From a65f211303f9deccb4414d6013fe727b08ff9a90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Sat, 9 Nov 2019 21:37:27 +0100 Subject: [PATCH 07/21] libbpf: Unpin auto-pinned maps if loading fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #5530 commit ec6d5f47bfe36f46aa0de707e5beb2f58d96b76d upstream. Since the automatic map-pinning happens during load, it will leave pinned maps around if the load fails at a later stage. Fix this by unpinning any pinned maps on cleanup. To avoid unpinning pinned maps that were reused rather than newly pinned, add a new boolean property on struct bpf_map to keep track of whether that map was reused or not; and only unpin those maps that were not reused. Fixes: 57a00f41644f ("libbpf: Add auto-pinning of maps when loading BPF objects") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: David S. Miller Acked-by: Song Liu Link: https://lore.kernel.org/bpf/157333184731.88376.9992935027056165873.stgit@toke.dk Signed-off-by: Yuanhe Shu --- tools/lib/bpf/libbpf.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index b1cd01d2aff5..a3c9bfcba261 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -231,6 +231,7 @@ struct bpf_map { enum libbpf_map_type libbpf_type; char *pin_path; bool pinned; + bool reused; }; struct bpf_secdata { @@ -2000,6 +2001,7 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd) map->def.map_flags = info.map_flags; map->btf_key_type_id = info.btf_key_type_id; map->btf_value_type_id = info.btf_value_type_id; + map->reused = true; return 0; @@ -4070,7 +4072,7 @@ int bpf_object__unload(struct bpf_object *obj) int bpf_object__load_xattr(struct bpf_object_load_attr *attr) { struct bpf_object *obj; - int err; + int err, i; if (!attr) return -EINVAL; @@ -4091,6 +4093,11 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) return 0; out: + /* unpin any maps that were auto-pinned during load */ + for (i = 0; i < obj->nr_maps; i++) + if (obj->maps[i].pinned && !obj->maps[i].reused) + bpf_map__unpin(&obj->maps[i], NULL); + bpf_object__unload(obj); pr_warn("failed to load object '%s'\n", obj->path); return err; -- Gitee From 0f35c91d79c09c97c056b761f306d39b4f600206 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Sat, 9 Nov 2019 21:37:28 +0100 Subject: [PATCH 08/21] selftests/bpf: Add tests for automatic map unpinning on load failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #5530 commit 9c4e395a1e8c1dc14189b88f24d111f80353b9a4 upstream. This add tests for the different variations of automatic map unpinning on load failure. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: David S. Miller Acked-by: Song Liu Link: https://lore.kernel.org/bpf/157333184838.88376.8243704248624814775.stgit@toke.dk Signed-off-by: Yuanhe Shu --- .../selftests/bpf/progs/test_pinning.c | 2 +- tools/testing/selftests/bpf/test_progs.c | 20 ++++++++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/test_pinning.c b/tools/testing/selftests/bpf/progs/test_pinning.c index f69a4a50d056..f20e7e00373f 100644 --- a/tools/testing/selftests/bpf/progs/test_pinning.c +++ b/tools/testing/selftests/bpf/progs/test_pinning.c @@ -21,7 +21,7 @@ struct { } nopinmap SEC(".maps"); struct { - __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, 1); __type(key, __u32); __type(value, __u64); diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index f153466eab8c..ff8095a8bdd0 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -4143,12 +4143,15 @@ static void test_pinning(void) goto out; } - /* swap pin paths of the two maps */ + /* set pin paths so that nopinmap2 will attempt to reuse the map at + * pinpath (which will fail), but not before pinmap has already been + * reused + */ bpf_object__for_each_map(map, obj) { if (!strcmp(bpf_map__name(map), "nopinmap")) + err = bpf_map__set_pin_path(map, nopinpath2); + else if (!strcmp(bpf_map__name(map), "nopinmap2")) err = bpf_map__set_pin_path(map, pinpath); - else if (!strcmp(bpf_map__name(map), "pinmap")) - err = bpf_map__set_pin_path(map, NULL); else continue; @@ -4161,6 +4164,17 @@ static void test_pinning(void) if (CHECK(err != -EINVAL, "param mismatch load", "err %d errno %d\n", err, errno)) goto out; + /* nopinmap2 should have been pinned and cleaned up again */ + err = stat(nopinpath2, &statbuf); + if (CHECK(!err || errno != ENOENT, "stat nopinpath2", + "err %d errno %d\n", err, errno)) + goto out; + + /* pinmap should still be there */ + err = stat(pinpath, &statbuf); + if (CHECK(err, "stat pinpath", "err %d errno %d\n", err, errno)) + goto out; + bpf_object__close(obj); /* test auto-pinning at custom path with open opt */ -- Gitee From e21919a28db7b607914acfaee2af4c55f4eebfd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Sat, 9 Nov 2019 21:37:29 +0100 Subject: [PATCH 09/21] libbpf: Propagate EPERM to caller on program load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #5530 commit 4f33ddb4e3e28ade924fbf05ec8bcd7d5c022fee upstream. When loading an eBPF program, libbpf overrides the return code for EPERM errors instead of returning it to the caller. This makes it hard to figure out what went wrong on load. In particular, EPERM is returned when the system rlimit is too low to lock the memory required for the BPF program. Previously, this was somewhat obscured because the rlimit error would be hit on map creation (which does return it correctly). However, since maps can now be reused, object load can proceed all the way to loading programs without hitting the error; propagating it even in this case makes it possible for the caller to react appropriately (and, e.g., attempt to raise the rlimit before retrying). Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: David S. Miller Acked-by: Song Liu Link: https://lore.kernel.org/bpf/157333184946.88376.11768171652794234561.stgit@toke.dk Signed-off-by: Yuanhe Shu --- tools/lib/bpf/libbpf.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a3c9bfcba261..d545a4e81caa 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -3765,7 +3765,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, free(log_buf); goto retry_load; } - ret = -LIBBPF_ERRNO__LOAD; + ret = -errno; cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warn("load bpf program failed: %s\n", cp); @@ -3778,23 +3778,18 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, pr_warn("Program too large (%zu insns), at most %d insns\n", load_attr.insns_cnt, BPF_MAXINSNS); ret = -LIBBPF_ERRNO__PROG2BIG; - } else { + } else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) { /* Wrong program type? */ - if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) { - int fd; - - load_attr.prog_type = BPF_PROG_TYPE_KPROBE; - load_attr.expected_attach_type = 0; - fd = bpf_load_program_xattr(&load_attr, NULL, 0); - if (fd >= 0) { - close(fd); - ret = -LIBBPF_ERRNO__PROGTYPE; - goto out; - } - } + int fd; - if (log_buf) - ret = -LIBBPF_ERRNO__KVER; + load_attr.prog_type = BPF_PROG_TYPE_KPROBE; + load_attr.expected_attach_type = 0; + fd = bpf_load_program_xattr(&load_attr, NULL, 0); + if (fd >= 0) { + close(fd); + ret = -LIBBPF_ERRNO__PROGTYPE; + goto out; + } } out: -- Gitee From a2cc6cf0e644d65ac873fd6138231ce70d72816a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Sat, 9 Nov 2019 21:37:30 +0100 Subject: [PATCH 10/21] libbpf: Use pr_warn() when printing netlink errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #5530 commit b6e99b010ecf829fd8453a7a77e389501bb81990 upstream. The netlink functions were using fprintf(stderr, ) directly to print out error messages, instead of going through the usual logging macros. This makes it impossible for the calling application to silence or redirect those error messages. Fix this by switching to pr_warn() in nlattr.c and netlink.c. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: David S. Miller Acked-by: Song Liu Link: https://lore.kernel.org/bpf/157333185055.88376.15999360127117901443.stgit@toke.dk Signed-off-by: Yuanhe Shu --- tools/lib/bpf/netlink.c | 3 ++- tools/lib/bpf/nlattr.c | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index ce3ec81b71c0..a261df9cb488 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -12,6 +12,7 @@ #include "bpf.h" #include "libbpf.h" +#include "libbpf_internal.h" #include "nlattr.h" #ifndef SOL_NETLINK @@ -43,7 +44,7 @@ int libbpf_netlink_open(__u32 *nl_pid) if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK, &one, sizeof(one)) < 0) { - fprintf(stderr, "Netlink error reporting not supported\n"); + pr_warn("Netlink error reporting not supported\n"); } if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c index 1e69c0c8d413..8db44bbfc66d 100644 --- a/tools/lib/bpf/nlattr.c +++ b/tools/lib/bpf/nlattr.c @@ -8,6 +8,7 @@ #include #include "nlattr.h" +#include "libbpf_internal.h" #include #include #include @@ -121,8 +122,8 @@ int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, } if (tb[type]) - fprintf(stderr, "Attribute of type %#x found multiple times in message, " - "previous attribute is being ignored.\n", type); + pr_warn("Attribute of type %#x found multiple times in message, " + "previous attribute is being ignored.\n", type); tb[type] = nla; } @@ -181,15 +182,14 @@ int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh) if (libbpf_nla_parse(tb, NLMSGERR_ATTR_MAX, attr, alen, extack_policy) != 0) { - fprintf(stderr, - "Failed to parse extended error attributes\n"); + pr_warn("Failed to parse extended error attributes\n"); return 0; } if (tb[NLMSGERR_ATTR_MSG]) errmsg = (char *) libbpf_nla_data(tb[NLMSGERR_ATTR_MSG]); - fprintf(stderr, "Kernel error message: %s\n", errmsg); + pr_warn("Kernel error message: %s\n", errmsg); return 0; } -- Gitee From 2cf1da83bbea578a3ae8c421a1880e9cb1be83e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Sat, 9 Nov 2019 21:37:31 +0100 Subject: [PATCH 11/21] libbpf: Add bpf_get_link_xdp_info() function to get more XDP information MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #5530 commit 473f4e133a12dd083bae044ba1782be4767177c1 upstream. Currently, libbpf only provides a function to get a single ID for the XDP program attached to the interface. However, it can be useful to get the full set of program IDs attached, along with the attachment mode, in one go. Add a new getter function to support this, using an extendible structure to carry the information. Express the old bpf_get_link_id() function in terms of the new function. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: David S. Miller Acked-by: Song Liu Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/157333185164.88376.7520653040667637246.stgit@toke.dk Signed-off-by: Yuanhe Shu --- tools/lib/bpf/libbpf.h | 10 +++++ tools/lib/bpf/libbpf.map | 1 + tools/lib/bpf/netlink.c | 84 ++++++++++++++++++++++++++-------------- 3 files changed, 67 insertions(+), 28 deletions(-) diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 6ddc0419337b..f0947cc949d2 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -427,8 +427,18 @@ LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type, struct bpf_object **pobj, int *prog_fd); +struct xdp_link_info { + __u32 prog_id; + __u32 drv_prog_id; + __u32 hw_prog_id; + __u32 skb_prog_id; + __u8 attach_mode; +}; + LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags); +LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, + size_t info_size, __u32 flags); struct perf_buffer; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 1210b079e971..c65a888776e9 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -192,6 +192,7 @@ LIBBPF_0.0.5 { LIBBPF_0.0.6 { global: + bpf_get_link_xdp_info; bpf_map__get_pin_path; bpf_map__is_pinned; bpf_map__set_pin_path; diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index a261df9cb488..5065c1aa1061 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -25,7 +25,7 @@ typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t, struct xdp_id_md { int ifindex; __u32 flags; - __u32 id; + struct xdp_link_info info; }; int libbpf_netlink_open(__u32 *nl_pid) @@ -203,26 +203,11 @@ static int __dump_link_nlmsg(struct nlmsghdr *nlh, return dump_link_nlmsg(cookie, ifi, tb); } -static unsigned char get_xdp_id_attr(unsigned char mode, __u32 flags) -{ - if (mode != XDP_ATTACHED_MULTI) - return IFLA_XDP_PROG_ID; - if (flags & XDP_FLAGS_DRV_MODE) - return IFLA_XDP_DRV_PROG_ID; - if (flags & XDP_FLAGS_HW_MODE) - return IFLA_XDP_HW_PROG_ID; - if (flags & XDP_FLAGS_SKB_MODE) - return IFLA_XDP_SKB_PROG_ID; - - return IFLA_XDP_UNSPEC; -} - -static int get_xdp_id(void *cookie, void *msg, struct nlattr **tb) +static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb) { struct nlattr *xdp_tb[IFLA_XDP_MAX + 1]; struct xdp_id_md *xdp_id = cookie; struct ifinfomsg *ifinfo = msg; - unsigned char mode, xdp_attr; int ret; if (xdp_id->ifindex && xdp_id->ifindex != ifinfo->ifi_index) @@ -238,27 +223,40 @@ static int get_xdp_id(void *cookie, void *msg, struct nlattr **tb) if (!xdp_tb[IFLA_XDP_ATTACHED]) return 0; - mode = libbpf_nla_getattr_u8(xdp_tb[IFLA_XDP_ATTACHED]); - if (mode == XDP_ATTACHED_NONE) - return 0; + xdp_id->info.attach_mode = libbpf_nla_getattr_u8( + xdp_tb[IFLA_XDP_ATTACHED]); - xdp_attr = get_xdp_id_attr(mode, xdp_id->flags); - if (!xdp_attr || !xdp_tb[xdp_attr]) + if (xdp_id->info.attach_mode == XDP_ATTACHED_NONE) return 0; - xdp_id->id = libbpf_nla_getattr_u32(xdp_tb[xdp_attr]); + if (xdp_tb[IFLA_XDP_PROG_ID]) + xdp_id->info.prog_id = libbpf_nla_getattr_u32( + xdp_tb[IFLA_XDP_PROG_ID]); + + if (xdp_tb[IFLA_XDP_SKB_PROG_ID]) + xdp_id->info.skb_prog_id = libbpf_nla_getattr_u32( + xdp_tb[IFLA_XDP_SKB_PROG_ID]); + + if (xdp_tb[IFLA_XDP_DRV_PROG_ID]) + xdp_id->info.drv_prog_id = libbpf_nla_getattr_u32( + xdp_tb[IFLA_XDP_DRV_PROG_ID]); + + if (xdp_tb[IFLA_XDP_HW_PROG_ID]) + xdp_id->info.hw_prog_id = libbpf_nla_getattr_u32( + xdp_tb[IFLA_XDP_HW_PROG_ID]); return 0; } -int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) +int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, + size_t info_size, __u32 flags) { struct xdp_id_md xdp_id = {}; int sock, ret; __u32 nl_pid; __u32 mask; - if (flags & ~XDP_FLAGS_MASK) + if (flags & ~XDP_FLAGS_MASK || !info_size) return -EINVAL; /* Check whether the single {HW,DRV,SKB} mode is set */ @@ -274,14 +272,44 @@ int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) xdp_id.ifindex = ifindex; xdp_id.flags = flags; - ret = libbpf_nl_get_link(sock, nl_pid, get_xdp_id, &xdp_id); - if (!ret) - *prog_id = xdp_id.id; + ret = libbpf_nl_get_link(sock, nl_pid, get_xdp_info, &xdp_id); + if (!ret) { + size_t sz = min(info_size, sizeof(xdp_id.info)); + + memcpy(info, &xdp_id.info, sz); + memset((void *) info + sz, 0, info_size - sz); + } close(sock); return ret; } +static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags) +{ + if (info->attach_mode != XDP_ATTACHED_MULTI) + return info->prog_id; + if (flags & XDP_FLAGS_DRV_MODE) + return info->drv_prog_id; + if (flags & XDP_FLAGS_HW_MODE) + return info->hw_prog_id; + if (flags & XDP_FLAGS_SKB_MODE) + return info->skb_prog_id; + + return 0; +} + +int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) +{ + struct xdp_link_info info; + int ret; + + ret = bpf_get_link_xdp_info(ifindex, &info, sizeof(info), flags); + if (!ret) + *prog_id = get_xdp_id(&info, flags); + + return ret; +} + int libbpf_nl_get_link(int sock, unsigned int nl_pid, libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie) { -- Gitee From b13eb5ec12402bb249595f4fbeb105d77c8517ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Sat, 9 Nov 2019 21:37:32 +0100 Subject: [PATCH 12/21] libbpf: Add getter for program size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #5530 commit 1a734efe06948c17122808f74f0c8cc550c10cf5 upstream. This adds a new getter for the BPF program size (in bytes). This is useful for a caller that is trying to predict how much memory will be locked by loading a BPF object into the kernel. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: David S. Miller Acked-by: Song Liu Link: https://lore.kernel.org/bpf/157333185272.88376.10996937115395724683.stgit@toke.dk Signed-off-by: Yuanhe Shu --- tools/lib/bpf/libbpf.c | 5 +++++ tools/lib/bpf/libbpf.h | 3 +++ tools/lib/bpf/libbpf.map | 1 + 3 files changed, 9 insertions(+) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index d545a4e81caa..dfb789fdfdef 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4826,6 +4826,11 @@ int bpf_program__fd(const struct bpf_program *prog) return bpf_program__nth_fd(prog, 0); } +size_t bpf_program__size(const struct bpf_program *prog) +{ + return prog->insns_cnt * sizeof(struct bpf_insn); +} + int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, bpf_program_prep_t prep) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index f0947cc949d2..5aa27caad6c2 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -214,6 +214,9 @@ LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog, LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy); +/* returns program size in bytes */ +LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog); + LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_version); LIBBPF_API int bpf_program__fd(const struct bpf_program *prog); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index c65a888776e9..b011d4601af7 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -202,4 +202,5 @@ LIBBPF_0.0.6 { bpf_program__get_type; bpf_program__is_tracing; bpf_program__set_tracing; + bpf_program__size; } LIBBPF_0.0.5; -- Gitee From ec1c97507cccf13f0d69ea2b354ad1c238768260 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Mon, 6 Apr 2020 22:09:45 -0700 Subject: [PATCH 13/21] libbpf: Fix bpf_get_link_xdp_id flags handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #5530 commit f07cbad29741407ace2a9688548fa93d9cb38df3 upstream. Currently if one of XDP_FLAGS_{DRV,HW,SKB}_MODE flags is passed to bpf_get_link_xdp_id() and there is a single XDP program attached to ifindex, that program's id will be returned by bpf_get_link_xdp_id() in prog_id argument no matter what mode the program is attached in, i.e. flags argument is not taken into account. For example, if there is a single program attached with XDP_FLAGS_SKB_MODE but user calls bpf_get_link_xdp_id() with flags = XDP_FLAGS_DRV_MODE, that skb program will be returned. Fix it by returning info->prog_id only if user didn't specify flags. If flags is specified then return corresponding mode-specific-field from struct xdp_link_info. The initial error was introduced in commit 50db9f073188 ("libbpf: Add a support for getting xdp prog id on ifindex") and then refactored in 473f4e133a12 so 473f4e133a12 is used in the Fixes tag. Fixes: 473f4e133a12 ("libbpf: Add bpf_get_link_xdp_info() function to get more XDP information") Signed-off-by: Andrey Ignatov Signed-off-by: Daniel Borkmann Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/0e9e30490b44b447bb2bebc69c7135e7fe7e4e40.1586236080.git.rdna@fb.com Signed-off-by: Yuanhe Shu --- tools/lib/bpf/netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 5065c1aa1061..f1e60b432920 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -286,7 +286,7 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags) { - if (info->attach_mode != XDP_ATTACHED_MULTI) + if (info->attach_mode != XDP_ATTACHED_MULTI && !flags) return info->prog_id; if (flags & XDP_FLAGS_DRV_MODE) return info->drv_prog_id; -- Gitee From 62ebe026cc8b93c069305cbb3974c56114d2d1f9 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 20 Apr 2020 10:18:43 -0600 Subject: [PATCH 14/21] libbpf: Only check mode flags in get_xdp_id ANBZ: #5530 commit 257d7d4f0e69f5e8e3d38351bdcab896719dba04 upstream. The commit in the Fixes tag changed get_xdp_id to only return prog_id if flags is 0, but there are other XDP flags than the modes - e.g., XDP_FLAGS_UPDATE_IF_NOEXIST. Since the intention was only to look at MODE flags, clear other ones before checking if flags is 0. Fixes: f07cbad29741 ("libbpf: Fix bpf_get_link_xdp_id flags handling") Signed-off-by: David Ahern Signed-off-by: Alexei Starovoitov Acked-by: Andrey Ignatov Signed-off-by: Yuanhe Shu --- tools/lib/bpf/netlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index f1e60b432920..1a2a4b7ff560 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -286,6 +286,8 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags) { + flags &= XDP_FLAGS_MODES; + if (info->attach_mode != XDP_ATTACHED_MULTI && !flags) return info->prog_id; if (flags & XDP_FLAGS_DRV_MODE) -- Gitee From a1d3b9fbaaf02f5265ba2bf55a34111972739cc7 Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Wed, 26 Aug 2020 00:55:49 -0700 Subject: [PATCH 15/21] libbpf: Fix unintentional success return code in bpf_object__load ANBZ: #5530 commit ef05afa66c59c2031a3798916ef3ff3778232129 upstream. There are code paths where EINVAL is returned directly without setting errno. In that case, errno could be 0, which would mask the failure. For example, if a careless programmer set log_level to 10000 out of laziness, they would have to spend a long time trying to figure out why. Fixes: 4f33ddb4e3e2 ("libbpf: Propagate EPERM to caller on program load") Signed-off-by: Alex Gartrell Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200826075549.1858580-1-alexgartrell@gmail.com Signed-off-by: Yuanhe Shu --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index dfb789fdfdef..88eee4162be5 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -3765,7 +3765,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, free(log_buf); goto retry_load; } - ret = -errno; + ret = errno ? -errno : -LIBBPF_ERRNO__LOAD; cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warn("load bpf program failed: %s\n", cp); -- Gitee From 974b6e23d31906a6822b0fbdd07d66795955e660 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 6 Nov 2019 12:15:00 -0800 Subject: [PATCH 16/21] libbpf: Simplify BPF_CORE_READ_BITFIELD_PROBED usage ANBZ: #5530 commit ed578021210e14f15a654c825fba6a700c9a39a7 upstream. Streamline BPF_CORE_READ_BITFIELD_PROBED interface to follow BPF_CORE_READ_BITFIELD (direct) and BPF_CORE_READ, in general, i.e., just return read result or 0, if underlying bpf_probe_read() failed. In practice, real applications rarely check bpf_probe_read() result, because it has to always work or otherwise it's a bug. So propagating internal bpf_probe_read() error from this macro hurts usability without providing real benefits in practice. This patch fixes the issue and simplifies usage, noticeable even in selftest itself. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20191106201500.2582438-1-andriin@fb.com Signed-off-by: Yuanhe Shu --- tools/lib/bpf/bpf_core_read.h | 27 ++++++++----------- .../progs/test_core_reloc_bitfields_probed.c | 19 +++++-------- 2 files changed, 18 insertions(+), 28 deletions(-) diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index 1de14c9a302b..27e20a10da66 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -39,32 +39,27 @@ enum bpf_field_info_kind { #endif /* - * Extract bitfield, identified by src->field, and put its value into u64 - * *res. All this is done in relocatable manner, so bitfield changes such as + * Extract bitfield, identified by s->field, and return its value as u64. + * All this is done in relocatable manner, so bitfield changes such as * signedness, bit size, offset changes, this will be handled automatically. * This version of macro is using bpf_probe_read() to read underlying integer * storage. Macro functions as an expression and its return type is * bpf_probe_read()'s return value: 0, on success, <0 on error. */ -#define BPF_CORE_READ_BITFIELD_PROBED(src, field, res) ({ \ - unsigned long long val; \ +#define BPF_CORE_READ_BITFIELD_PROBED(s, field) ({ \ + unsigned long long val = 0; \ \ - *res = 0; \ - val = __CORE_BITFIELD_PROBE_READ(res, src, field); \ - if (!val) { \ - *res <<= __CORE_RELO(src, field, LSHIFT_U64); \ - val = __CORE_RELO(src, field, RSHIFT_U64); \ - if (__CORE_RELO(src, field, SIGNED)) \ - *res = ((long long)*res) >> val; \ - else \ - *res = ((unsigned long long)*res) >> val; \ - val = 0; \ - } \ + __CORE_BITFIELD_PROBE_READ(&val, s, field); \ + val <<= __CORE_RELO(s, field, LSHIFT_U64); \ + if (__CORE_RELO(s, field, SIGNED)) \ + val = ((long long)val) >> __CORE_RELO(s, field, RSHIFT_U64); \ + else \ + val = val >> __CORE_RELO(s, field, RSHIFT_U64); \ val; \ }) /* - * Extract bitfield, identified by src->field, and return its value as u64. + * Extract bitfield, identified by s->field, and return its value as u64. * This version of macro is using direct memory reads and should be used from * BPF program types that support such functionality (e.g., typed raw * tracepoints). diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c b/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c index 4e4133e96c37..7d0df1b2a334 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c @@ -37,11 +37,6 @@ struct core_reloc_bitfields_output { int64_t s32; }; -#define TRANSFER_BITFIELD(in, out, field) \ - if (BPF_CORE_READ_BITFIELD_PROBED(in, field, &res)) \ - return 1; \ - out->field = res - SEC("raw_tracepoint/sys_enter") int test_core_bitfields(void *ctx) { @@ -49,13 +44,13 @@ int test_core_bitfields(void *ctx) struct core_reloc_bitfields_output *out = (void *)&data.out; uint64_t res; - TRANSFER_BITFIELD(in, out, ub1); - TRANSFER_BITFIELD(in, out, ub2); - TRANSFER_BITFIELD(in, out, ub7); - TRANSFER_BITFIELD(in, out, sb4); - TRANSFER_BITFIELD(in, out, sb20); - TRANSFER_BITFIELD(in, out, u32); - TRANSFER_BITFIELD(in, out, s32); + out->ub1 = BPF_CORE_READ_BITFIELD_PROBED(in, ub1); + out->ub2 = BPF_CORE_READ_BITFIELD_PROBED(in, ub2); + out->ub7 = BPF_CORE_READ_BITFIELD_PROBED(in, ub7); + out->sb4 = BPF_CORE_READ_BITFIELD_PROBED(in, sb4); + out->sb20 = BPF_CORE_READ_BITFIELD_PROBED(in, sb20); + out->u32 = BPF_CORE_READ_BITFIELD_PROBED(in, u32); + out->s32 = BPF_CORE_READ_BITFIELD_PROBED(in, s32); return 0; } -- Gitee From 8bea455b5769879328ca43cfbd20dee8ba5a08f1 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 7 Nov 2019 10:09:03 -0800 Subject: [PATCH 17/21] bpf: Add array support to btf_struct_access ANBZ: #5530 commit 7e3617a72df32341fea6d226cd6bb21de40c558d upstream. This patch adds array support to btf_struct_access(). It supports array of int, array of struct and multidimensional array. It also allows using u8[] as a scratch space. For example, it allows access the "char cb[48]" with size larger than the array's element "char". Another potential use case is "u64 icsk_ca_priv[]" in the tcp congestion control. btf_resolve_size() is added to resolve the size of any type. It will follow the modifier if there is any. Please see the function comment for details. This patch also adds the "off < moff" check at the beginning of the for loop. It is to reject cases when "off" is pointing to a "hole" in a struct. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191107180903.4097702-1-kafai@fb.com Signed-off-by: Yuanhe Shu --- kernel/bpf/btf.c | 195 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 166 insertions(+), 29 deletions(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 8b31fe9b789f..a2cf4f008d43 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -1057,6 +1057,82 @@ static const struct resolve_vertex *env_stack_peak(struct btf_verifier_env *env) return env->top_stack ? &env->stack[env->top_stack - 1] : NULL; } +/* Resolve the size of a passed-in "type" + * + * type: is an array (e.g. u32 array[x][y]) + * return type: type "u32[x][y]", i.e. BTF_KIND_ARRAY, + * *type_size: (x * y * sizeof(u32)). Hence, *type_size always + * corresponds to the return type. + * *elem_type: u32 + * *total_nelems: (x * y). Hence, individual elem size is + * (*type_size / *total_nelems) + * + * type: is not an array (e.g. const struct X) + * return type: type "struct X" + * *type_size: sizeof(struct X) + * *elem_type: same as return type ("struct X") + * *total_nelems: 1 + */ +static const struct btf_type * +btf_resolve_size(const struct btf *btf, const struct btf_type *type, + u32 *type_size, const struct btf_type **elem_type, + u32 *total_nelems) +{ + const struct btf_type *array_type = NULL; + const struct btf_array *array; + u32 i, size, nelems = 1; + + for (i = 0; i < MAX_RESOLVE_DEPTH; i++) { + switch (BTF_INFO_KIND(type->info)) { + /* type->size can be used */ + case BTF_KIND_INT: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_ENUM: + size = type->size; + goto resolved; + + case BTF_KIND_PTR: + size = sizeof(void *); + goto resolved; + + /* Modifiers */ + case BTF_KIND_TYPEDEF: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + type = btf_type_by_id(btf, type->type); + break; + + case BTF_KIND_ARRAY: + if (!array_type) + array_type = type; + array = btf_type_array(type); + if (nelems && array->nelems > U32_MAX / nelems) + return ERR_PTR(-EINVAL); + nelems *= array->nelems; + type = btf_type_by_id(btf, array->type); + break; + + /* type without size */ + default: + return ERR_PTR(-EINVAL); + } + } + + return ERR_PTR(-EINVAL); + +resolved: + if (nelems && size > U32_MAX / nelems) + return ERR_PTR(-EINVAL); + + *type_size = nelems * size; + *total_nelems = nelems; + *elem_type = type; + + return array_type ? : type; +} + /* The input param "type_id" must point to a needs_resolve type */ static const struct btf_type *btf_type_id_resolve(const struct btf *btf, u32 *type_id) @@ -3518,10 +3594,10 @@ int btf_struct_access(struct bpf_verifier_log *log, enum bpf_access_type atype, u32 *next_btf_id) { + u32 i, moff, mtrue_end, msize = 0, total_nelems = 0; + const struct btf_type *mtype, *elem_type = NULL; const struct btf_member *member; - const struct btf_type *mtype; const char *tname, *mname; - int i, moff = 0, msize; again: tname = __btf_name_by_offset(btf_vmlinux, t->name_off); @@ -3531,40 +3607,88 @@ int btf_struct_access(struct bpf_verifier_log *log, } for_each_member(i, t, member) { - /* offset of the field in bits */ - moff = btf_member_bit_offset(t, member); - if (btf_member_bitfield_size(t, member)) /* bitfields are not supported yet */ continue; - if (off + size <= moff / 8) + /* offset of the field in bytes */ + moff = btf_member_bit_offset(t, member) / 8; + if (off + size <= moff) /* won't find anything, field is already too far */ break; + /* In case of "off" is pointing to holes of a struct */ + if (off < moff) + continue; /* type of the field */ mtype = btf_type_by_id(btf_vmlinux, member->type); mname = __btf_name_by_offset(btf_vmlinux, member->name_off); - /* skip modifiers */ - while (btf_type_is_modifier(mtype)) - mtype = btf_type_by_id(btf_vmlinux, mtype->type); - - if (btf_type_is_array(mtype)) - /* array deref is not supported yet */ - continue; - - if (!btf_type_has_size(mtype) && !btf_type_is_ptr(mtype)) { + mtype = btf_resolve_size(btf_vmlinux, mtype, &msize, + &elem_type, &total_nelems); + if (IS_ERR(mtype)) { bpf_log(log, "field %s doesn't have size\n", mname); return -EFAULT; } - if (btf_type_is_ptr(mtype)) - msize = 8; - else - msize = mtype->size; - if (off >= moff / 8 + msize) + + mtrue_end = moff + msize; + if (off >= mtrue_end) /* no overlap with member, keep iterating */ continue; + + if (btf_type_is_array(mtype)) { + u32 elem_idx; + + /* btf_resolve_size() above helps to + * linearize a multi-dimensional array. + * + * The logic here is treating an array + * in a struct as the following way: + * + * struct outer { + * struct inner array[2][2]; + * }; + * + * looks like: + * + * struct outer { + * struct inner array_elem0; + * struct inner array_elem1; + * struct inner array_elem2; + * struct inner array_elem3; + * }; + * + * When accessing outer->array[1][0], it moves + * moff to "array_elem2", set mtype to + * "struct inner", and msize also becomes + * sizeof(struct inner). Then most of the + * remaining logic will fall through without + * caring the current member is an array or + * not. + * + * Unlike mtype/msize/moff, mtrue_end does not + * change. The naming difference ("_true") tells + * that it is not always corresponding to + * the current mtype/msize/moff. + * It is the true end of the current + * member (i.e. array in this case). That + * will allow an int array to be accessed like + * a scratch space, + * i.e. allow access beyond the size of + * the array's element as long as it is + * within the mtrue_end boundary. + */ + + /* skip empty array */ + if (moff == mtrue_end) + continue; + + msize /= total_nelems; + elem_idx = (off - moff) / msize; + moff += elem_idx * msize; + mtype = elem_type; + } + /* the 'off' we're looking for is either equal to start * of this field or inside of this struct */ @@ -3573,28 +3697,41 @@ int btf_struct_access(struct bpf_verifier_log *log, t = mtype; /* adjust offset we're looking for */ - off -= moff / 8; + off -= moff; goto again; } - if (msize != size) { - /* field access size doesn't match */ - bpf_log(log, - "cannot access %d bytes in struct %s field %s that has size %d\n", - size, tname, mname, msize); - return -EACCES; - } if (btf_type_is_ptr(mtype)) { const struct btf_type *stype; u32 id; + if (msize != size || off != moff) { + bpf_log(log, + "cannot access ptr member %s with moff %u in struct %s with off %u size %u\n", + mname, moff, tname, off, size); + return -EACCES; + } + stype = btf_type_skip_modifiers(btf_vmlinux, mtype->type, &id); if (btf_type_is_struct(stype)) { *next_btf_id = id; return PTR_TO_BTF_ID; } } - /* all other fields are treated as scalars */ + + /* Allow more flexible access within an int as long as + * it is within mtrue_end. + * Since mtrue_end could be the end of an array, + * that also allows using an array of int as a scratch + * space. e.g. skb->cb[]. + */ + if (off + size > mtrue_end) { + bpf_log(log, + "access beyond the end of member %s (mend:%u) in struct %s with off %u size %u\n", + mname, mtrue_end, tname, off, size); + return -EACCES; + } + return SCALAR_VALUE; } bpf_log(log, "struct %s doesn't have field at offset %d\n", tname, off); -- Gitee From df40f44161d1d497e5a8e9bacb7c39ec022b1913 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 7 Nov 2019 10:09:05 -0800 Subject: [PATCH 18/21] bpf: Add cb access in kfree_skb test ANBZ: #5530 commit ed5941af3f67ba9062b98aba1a6ca398867dc0de upstream. Access the skb->cb[] in the kfree_skb test. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191107180905.4097871-1-kafai@fb.com Signed-off-by: Yuanhe Shu --- tools/testing/selftests/bpf/progs/kfree_skb.c | 25 +++++++-- tools/testing/selftests/bpf/test_progs.c | 54 +++++++++++++++---- 2 files changed, 63 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c b/tools/testing/selftests/bpf/progs/kfree_skb.c index 26d6fe87ae15..a9bc207d254b 100644 --- a/tools/testing/selftests/bpf/progs/kfree_skb.c +++ b/tools/testing/selftests/bpf/progs/kfree_skb.c @@ -51,6 +51,7 @@ struct sk_buff { refcount_t users; unsigned char *data; char __pkt_type_offset[0]; + char cb[48]; }; /* copy arguments from @@ -65,28 +66,41 @@ struct trace_kfree_skb { void *location; }; +struct meta { + int ifindex; + __u32 cb32_0; + __u8 cb8_0; +}; + SEC("tp_btf/kfree_skb") int trace_kfree_skb(struct trace_kfree_skb *ctx) { struct sk_buff *skb = ctx->skb; struct net_device *dev; - int ifindex; struct callback_head *ptr; void *func; int users; unsigned char *data; unsigned short pkt_data; + struct meta meta = {}; char pkt_type; + __u32 *cb32; + __u8 *cb8; __builtin_preserve_access_index(({ users = skb->users.refs.counter; data = skb->data; dev = skb->dev; - ifindex = dev->ifindex; ptr = dev->ifalias->rcuhead.next; func = ptr->func; + cb8 = (__u8 *)&skb->cb; + cb32 = (__u32 *)&skb->cb; })); + meta.ifindex = _(dev->ifindex); + meta.cb8_0 = cb8[8]; + meta.cb32_0 = cb32[2]; + bpf_probe_read(&pkt_type, sizeof(pkt_type), _(&skb->__pkt_type_offset)); pkt_type &= 7; @@ -98,14 +112,15 @@ int trace_kfree_skb(struct trace_kfree_skb *ctx) _(skb->len), users, pkt_type); bpf_printk("skb->queue_mapping %d\n", _(skb->queue_mapping)); bpf_printk("dev->ifindex %d data %llx pkt_data %x\n", - ifindex, data, pkt_data); + meta.ifindex, data, pkt_data); + bpf_printk("cb8_0:%x cb32_0:%x\n", meta.cb8_0, meta.cb32_0); - if (users != 1 || pkt_data != bpf_htons(0x86dd) || ifindex != 1) + if (users != 1 || pkt_data != bpf_htons(0x86dd) || meta.ifindex != 1) /* raw tp ignores return value */ return 0; /* send first 72 byte of the packet to user space */ bpf_skb_output(skb, &perf_buf_map, (72ull << 32) | BPF_F_CURRENT_CPU, - &ifindex, sizeof(ifindex)); + &meta, sizeof(meta)); return 0; } diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index ff8095a8bdd0..3fdde68a1490 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -3897,15 +3897,38 @@ struct ipv6_packet { struct tcphdr tcp; } __packed; +struct meta { + int ifindex; + __u32 cb32_0; + __u8 cb8_0; +}; + +static union { + __u32 cb32[5]; + __u8 cb8[20]; +} cb = { + .cb32[0] = 0x81828384, +}; + static void on_sample_kfree_skb(void *ctx, int cpu, void *data, __u32 size) { - int ifindex = *(int *)data, duration = 0; - struct ipv6_packet *pkt_v6 = data + 4; + struct meta *meta = (struct meta *)data; + struct ipv6_packet *pkt_v6 = data + sizeof(*meta); + int duration = 0; - if (ifindex != 1) + if (CHECK(size != 72 + sizeof(*meta), "check_size", "size %u != %zu\n", + size, 72 + sizeof(*meta))) + return; + if (CHECK(meta->ifindex != 1, "check_meta_ifindex", + "meta->ifindex = %d\n", meta->ifindex)) /* spurious kfree_skb not on loopback device */ return; - if (CHECK(size != 76, "check_size", "size %u != 76\n", size)) + if (CHECK(meta->cb8_0 != cb.cb8[0], "check_cb8_0", "cb8_0 %x != %x\n", + meta->cb8_0, cb.cb8[0])) + return; + if (CHECK(meta->cb32_0 != cb.cb32[0], "check_cb32_0", + "cb32_0 %x != %x\n", + meta->cb32_0, cb.cb32[0])) return; if (CHECK(pkt_v6->eth.h_proto != 0xdd86, "check_eth", "h_proto %x\n", pkt_v6->eth.h_proto)) @@ -3922,6 +3945,13 @@ static void on_sample_kfree_skb(void *ctx, int cpu, void *data, __u32 size) static void test_kfree_skb(void) { + struct __sk_buff skb = {}; + struct bpf_prog_test_run_attr tattr = { + .data_in = &pkt_v6, + .data_size_in = sizeof(pkt_v6), + .ctx_in = &skb, + .ctx_size_in = sizeof(skb), + }; struct bpf_prog_load_attr attr = { .file = "./kfree_skb.o", }; @@ -3932,11 +3962,12 @@ static void test_kfree_skb(void) struct bpf_link *link = NULL; struct bpf_map *perf_buf_map; struct bpf_program *prog; - __u32 duration, retval; - int err, pkt_fd, kfree_skb_fd; + int err, kfree_skb_fd; bool passed = false; + __u32 duration = 0; - err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &pkt_fd); + err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, + &obj, &tattr.prog_fd); if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) return; @@ -3962,11 +3993,12 @@ static void test_kfree_skb(void) if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb))) goto close_prog; - err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), - NULL, NULL, &retval, &duration); - CHECK(err || retval, "ipv6", + memcpy(skb.cb, &cb, sizeof(cb)); + err = bpf_prog_test_run_xattr(&tattr); + duration = tattr.duration; + CHECK(err || tattr.retval, "ipv6", "err %d errno %d retval %d duration %d\n", - err, errno, retval, duration); + err, errno, tattr.retval, duration); /* read perf buffer */ err = perf_buffer__poll(pb, 100); -- Gitee From 8f810804610ed0108088400914df0461b02d965b Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 7 Nov 2019 11:03:49 +0100 Subject: [PATCH 19/21] tools, bpf_asm: Warn when jumps are out of range ANBZ: #5530 commit 7e22077d0c73a68ff3fd8b3d2f6564fcbcf8cb23 upstream. When compiling larger programs with bpf_asm, it's possible to accidentally exceed jt/jf range, in which case it won't complain, but rather silently emit a truncated offset, leading to a "happy debugging" situation. Add a warning to help detecting such issues. It could be made an error instead, but this might break compilation of existing code (which might be working by accident). Signed-off-by: Ilya Leoshkevich Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20191107100349.88976-1-iii@linux.ibm.com Signed-off-by: Yuanhe Shu --- tools/bpf/bpf_exp.y | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tools/bpf/bpf_exp.y b/tools/bpf/bpf_exp.y index 56ba1de50784..8d48e896be50 100644 --- a/tools/bpf/bpf_exp.y +++ b/tools/bpf/bpf_exp.y @@ -545,6 +545,16 @@ static void bpf_reduce_k_jumps(void) } } +static uint8_t bpf_encode_jt_jf_offset(int off, int i) +{ + int delta = off - i - 1; + + if (delta < 0 || delta > 255) + fprintf(stderr, "warning: insn #%d jumps to insn #%d, " + "which is out of range\n", i, off); + return (uint8_t) delta; +} + static void bpf_reduce_jt_jumps(void) { int i; @@ -552,7 +562,7 @@ static void bpf_reduce_jt_jumps(void) for (i = 0; i < curr_instr; i++) { if (labels_jt[i]) { int off = bpf_find_insns_offset(labels_jt[i]); - out[i].jt = (uint8_t) (off - i -1); + out[i].jt = bpf_encode_jt_jf_offset(off, i); } } } @@ -564,7 +574,7 @@ static void bpf_reduce_jf_jumps(void) for (i = 0; i < curr_instr; i++) { if (labels_jf[i]) { int off = bpf_find_insns_offset(labels_jf[i]); - out[i].jf = (uint8_t) (off - i - 1); + out[i].jf = bpf_encode_jt_jf_offset(off, i); } } } -- Gitee From b5a47d632f5887aa280f1a83071ab9c6b0fe49c1 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Thu, 7 Nov 2019 18:47:36 +0100 Subject: [PATCH 20/21] libbpf: Support XDP_SHARED_UMEM with external XDP program ANBZ: #5530 commit cbf07409d0c2afad7bb54be039490bffad8bc737 upstream. Add support in libbpf to create multiple sockets that share a single umem. Note that an external XDP program need to be supplied that routes the incoming traffic to the desired sockets. So you need to supply the libbpf_flag XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD and load your own XDP program. Signed-off-by: Magnus Karlsson Signed-off-by: Alexei Starovoitov Tested-by: William Tu Acked-by: Jonathan Lemon Link: https://lore.kernel.org/bpf/1573148860-30254-2-git-send-email-magnus.karlsson@intel.com Signed-off-by: Yuanhe Shu --- tools/lib/bpf/xsk.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 988f2b9280f9..eb77873e3e4f 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -493,15 +493,21 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, if (!umem || !xsk_ptr || !rx || !tx) return -EFAULT; - if (umem->refcount) { - pr_warn("Error: shared umems not supported by libbpf.\n"); - return -EBUSY; - } - xsk = calloc(1, sizeof(*xsk)); if (!xsk) return -ENOMEM; + err = xsk_set_xdp_socket_config(&xsk->config, usr_config); + if (err) + goto out_xsk_alloc; + + if (umem->refcount && + !(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { + pr_warn("Error: shared umems not supported by libbpf supplied XDP program.\n"); + err = -EBUSY; + goto out_xsk_alloc; + } + if (umem->refcount++ > 0) { xsk->fd = socket(AF_XDP, SOCK_RAW, 0); if (xsk->fd < 0) { @@ -523,10 +529,6 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, memcpy(xsk->ifname, ifname, IFNAMSIZ - 1); xsk->ifname[IFNAMSIZ - 1] = '\0'; - err = xsk_set_xdp_socket_config(&xsk->config, usr_config); - if (err) - goto out_socket; - if (rx) { err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, &xsk->config.rx_size, @@ -593,7 +595,12 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, sxdp.sxdp_family = PF_XDP; sxdp.sxdp_ifindex = xsk->ifindex; sxdp.sxdp_queue_id = xsk->queue_id; - sxdp.sxdp_flags = xsk->config.bind_flags; + if (umem->refcount > 1) { + sxdp.sxdp_flags = XDP_SHARED_UMEM; + sxdp.sxdp_shared_umem_fd = umem->fd; + } else { + sxdp.sxdp_flags = xsk->config.bind_flags; + } err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp)); if (err) { -- Gitee From 769248ecda8459e352a11ae757d8d0602b901449 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Thu, 7 Nov 2019 18:47:38 +0100 Subject: [PATCH 21/21] libbpf: Allow for creating Rx or Tx only AF_XDP sockets ANBZ: #5530 commit a68977d269dac0c7aa4a6f650b5e9191dd764861 upstream. The libbpf AF_XDP code is extended to allow for the creation of Rx only or Tx only sockets. Previously it returned an error if the socket was not initialized for both Rx and Tx. Signed-off-by: Magnus Karlsson Signed-off-by: Alexei Starovoitov Tested-by: William Tu Acked-by: Jonathan Lemon Link: https://lore.kernel.org/bpf/1573148860-30254-4-git-send-email-magnus.karlsson@intel.com Signed-off-by: Yuanhe Shu --- tools/lib/bpf/xsk.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index eb77873e3e4f..2564f9417e9e 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -468,7 +468,8 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk) } } - err = xsk_set_bpf_maps(xsk); + if (xsk->rx) + err = xsk_set_bpf_maps(xsk); if (err) { xsk_delete_bpf_maps(xsk); close(xsk->prog_fd); @@ -490,7 +491,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, socklen_t optlen; int err; - if (!umem || !xsk_ptr || !rx || !tx) + if (!umem || !xsk_ptr || !(rx || tx)) return -EFAULT; xsk = calloc(1, sizeof(*xsk)); -- Gitee