From de874f3eb2a86fb52fa14f691b586132d375e75d Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 2 Jun 2023 10:32:26 +0800 Subject: [PATCH 01/12] bpf/verifier: display non-spill stack slot types in print_verifier_state ANBZ: #5530 commit 8efea21d333d21e1f9177579ffdc69556314f603 upstream If a stack slot does not hold a spilled register (STACK_SPILL), then each of its eight bytes could potentially have a different slot_type. This information can be important for debugging, and previously we either did not print anything for the stack slot, or just printed fp-X=0 in the case where its first byte was STACK_ZERO. Instead, print eight characters with either 0 (STACK_ZERO), m (STACK_MISC) or ? (STACK_INVALID) for any stack slot which is neither STACK_SPILL nor entirely STACK_INVALID. Signed-off-by: Edward Cree Signed-off-by: Alexei Starovoitov Signed-off-by: zhaoxuezhi --- kernel/bpf/verifier.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9b74115452c6..5ef094796925 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -263,6 +263,13 @@ static const char * const reg_type_str[] = { [PTR_TO_PACKET_END] = "pkt_end", }; +static char slot_type_char[] = { + [STACK_INVALID] = '?', + [STACK_SPILL] = 'r', + [STACK_MISC] = 'm', + [STACK_ZERO] = '0', +}; + static void print_liveness(struct bpf_verifier_env *env, enum bpf_reg_liveness live) { @@ -349,15 +356,26 @@ static void print_verifier_state(struct bpf_verifier_env *env, } } for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { - if (state->stack[i].slot_type[0] == STACK_SPILL) { - verbose(env, " fp%d", - (-i - 1) * BPF_REG_SIZE); - print_liveness(env, state->stack[i].spilled_ptr.live); + char types_buf[BPF_REG_SIZE + 1]; + bool valid = false; + int j; + + for (j = 0; j < BPF_REG_SIZE; j++) { + if (state->stack[i].slot_type[j] != STACK_INVALID) + valid = true; + types_buf[j] = slot_type_char[ + state->stack[i].slot_type[j]]; + } + types_buf[BPF_REG_SIZE] = 0; + if (!valid) + continue; + verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); + print_liveness(env, state->stack[i].spilled_ptr.live); + if (state->stack[i].slot_type[0] == STACK_SPILL) verbose(env, "=%s", reg_type_str[state->stack[i].spilled_ptr.type]); - } - if (state->stack[i].slot_type[0] == STACK_ZERO) - verbose(env, " fp%d=0", (-i - 1) * BPF_REG_SIZE); + else + verbose(env, "=%s", types_buf); } verbose(env, "\n"); } -- Gitee From f4c1e370fb79a36310855ee5efe4b2d7924ca341 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 2 Jun 2023 16:20:00 +0800 Subject: [PATCH 02/12] tools/bpf: move bpf/lib netlink related functions into a new file ANBZ: #5530 commit f7010770fbac47b1fc9fb723b1d2019eb23c04f2 upstream There are no functionality change for this patch. In the subsequent patches, more netlink related library functions will be added and a separate file is better than cluttering bpf.c. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Signed-off-by: zhaoxuezhi --- tools/lib/bpf/Build | 2 +- tools/lib/bpf/bpf.c | 129 ------------------------------- tools/lib/bpf/netlink.c | 165 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 166 insertions(+), 130 deletions(-) create mode 100644 tools/lib/bpf/netlink.c diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 6eb9bacd1948..7bc31c905018 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1 +1 @@ -libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o +libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 482025b72839..eba3ad604244 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -28,16 +28,8 @@ #include #include "bpf.h" #include "libbpf.h" -#include "nlattr.h" -#include -#include -#include #include -#ifndef SOL_NETLINK -#define SOL_NETLINK 270 -#endif - /* * When building perf, unistd.h is overridden. __NR_bpf is * required to be defined explicitly. @@ -521,127 +513,6 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd) return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr)); } -int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags) -{ - struct sockaddr_nl sa; - int sock, seq = 0, len, ret = -1; - char buf[4096]; - struct nlattr *nla, *nla_xdp; - struct { - struct nlmsghdr nh; - struct ifinfomsg ifinfo; - char attrbuf[64]; - } req; - struct nlmsghdr *nh; - struct nlmsgerr *err; - socklen_t addrlen; - int one = 1; - - memset(&sa, 0, sizeof(sa)); - sa.nl_family = AF_NETLINK; - - sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - if (sock < 0) { - return -errno; - } - - if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK, - &one, sizeof(one)) < 0) { - fprintf(stderr, "Netlink error reporting not supported\n"); - } - - if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { - ret = -errno; - goto cleanup; - } - - addrlen = sizeof(sa); - if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) { - ret = -errno; - goto cleanup; - } - - if (addrlen != sizeof(sa)) { - ret = -LIBBPF_ERRNO__INTERNAL; - goto cleanup; - } - - memset(&req, 0, sizeof(req)); - req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); - req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; - req.nh.nlmsg_type = RTM_SETLINK; - req.nh.nlmsg_pid = 0; - req.nh.nlmsg_seq = ++seq; - req.ifinfo.ifi_family = AF_UNSPEC; - req.ifinfo.ifi_index = ifindex; - - /* started nested attribute for XDP */ - nla = (struct nlattr *)(((char *)&req) - + NLMSG_ALIGN(req.nh.nlmsg_len)); - nla->nla_type = NLA_F_NESTED | IFLA_XDP; - nla->nla_len = NLA_HDRLEN; - - /* add XDP fd */ - nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); - nla_xdp->nla_type = IFLA_XDP_FD; - nla_xdp->nla_len = NLA_HDRLEN + sizeof(int); - memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd)); - nla->nla_len += nla_xdp->nla_len; - - /* if user passed in any flags, add those too */ - if (flags) { - nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); - nla_xdp->nla_type = IFLA_XDP_FLAGS; - nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags); - memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags)); - nla->nla_len += nla_xdp->nla_len; - } - - req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len); - - if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { - ret = -errno; - goto cleanup; - } - - len = recv(sock, buf, sizeof(buf), 0); - if (len < 0) { - ret = -errno; - goto cleanup; - } - - for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); - nh = NLMSG_NEXT(nh, len)) { - if (nh->nlmsg_pid != sa.nl_pid) { - ret = -LIBBPF_ERRNO__WRNGPID; - goto cleanup; - } - if (nh->nlmsg_seq != seq) { - ret = -LIBBPF_ERRNO__INVSEQ; - goto cleanup; - } - switch (nh->nlmsg_type) { - case NLMSG_ERROR: - err = (struct nlmsgerr *)NLMSG_DATA(nh); - if (!err->error) - continue; - ret = err->error; - nla_dump_errormsg(nh); - goto cleanup; - case NLMSG_DONE: - break; - default: - break; - } - } - - ret = 0; - -cleanup: - close(sock); - return ret; -} - int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, bool do_log) { diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c new file mode 100644 index 000000000000..ccaa991fe9d8 --- /dev/null +++ b/tools/lib/bpf/netlink.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* Copyright (c) 2018 Facebook */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bpf.h" +#include "libbpf.h" +#include "nlattr.h" + +#ifndef SOL_NETLINK +#define SOL_NETLINK 270 +#endif + +static int bpf_netlink_open(__u32 *nl_pid) +{ + struct sockaddr_nl sa; + socklen_t addrlen; + int one = 1, ret; + int sock; + + memset(&sa, 0, sizeof(sa)); + sa.nl_family = AF_NETLINK; + + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock < 0) + return -errno; + + if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK, + &one, sizeof(one)) < 0) { + fprintf(stderr, "Netlink error reporting not supported\n"); + } + + if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { + ret = -errno; + goto cleanup; + } + + addrlen = sizeof(sa); + if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) { + ret = -errno; + goto cleanup; + } + + if (addrlen != sizeof(sa)) { + ret = -LIBBPF_ERRNO__INTERNAL; + goto cleanup; + } + + *nl_pid = sa.nl_pid; + return sock; + +cleanup: + close(sock); + return ret; +} + +static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq) +{ + struct nlmsgerr *err; + struct nlmsghdr *nh; + char buf[4096]; + int len, ret; + + while (1) { + len = recv(sock, buf, sizeof(buf), 0); + if (len < 0) { + ret = -errno; + goto done; + } + + for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); + nh = NLMSG_NEXT(nh, len)) { + if (nh->nlmsg_pid != nl_pid) { + ret = -LIBBPF_ERRNO__WRNGPID; + goto done; + } + if (nh->nlmsg_seq != seq) { + ret = -LIBBPF_ERRNO__INVSEQ; + goto done; + } + switch (nh->nlmsg_type) { + case NLMSG_ERROR: + err = (struct nlmsgerr *)NLMSG_DATA(nh); + if (!err->error) + continue; + ret = err->error; + nla_dump_errormsg(nh); + goto done; + case NLMSG_DONE: + return 0; + default: + break; + } + } + } + ret = 0; +done: + return ret; +} + +int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags) +{ + int sock, seq = 0, ret; + struct nlattr *nla, *nla_xdp; + struct { + struct nlmsghdr nh; + struct ifinfomsg ifinfo; + char attrbuf[64]; + } req; + __u32 nl_pid; + + sock = bpf_netlink_open(&nl_pid); + if (sock < 0) + return sock; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_type = RTM_SETLINK; + req.nh.nlmsg_pid = 0; + req.nh.nlmsg_seq = ++seq; + req.ifinfo.ifi_family = AF_UNSPEC; + req.ifinfo.ifi_index = ifindex; + + /* started nested attribute for XDP */ + nla = (struct nlattr *)(((char *)&req) + + NLMSG_ALIGN(req.nh.nlmsg_len)); + nla->nla_type = NLA_F_NESTED | IFLA_XDP; + nla->nla_len = NLA_HDRLEN; + + /* add XDP fd */ + nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); + nla_xdp->nla_type = IFLA_XDP_FD; + nla_xdp->nla_len = NLA_HDRLEN + sizeof(int); + memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd)); + nla->nla_len += nla_xdp->nla_len; + + /* if user passed in any flags, add those too */ + if (flags) { + nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); + nla_xdp->nla_type = IFLA_XDP_FLAGS; + nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags); + memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags)); + nla->nla_len += nla_xdp->nla_len; + } + + req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len); + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + ret = -errno; + goto cleanup; + } + ret = bpf_netlink_recv(sock, nl_pid, seq); + +cleanup: + close(sock); + return ret; +} -- Gitee From 16726e6d68398b46b570dfb9d2690b07ab8c6fd1 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 2 Jun 2023 16:22:21 +0800 Subject: [PATCH 03/12] tools/bpf: add more netlink functionalities in lib/bpf ANBZ: #5530 commit 36f1678d9e0b5d2e0236046d9659e0348b4719a8 upstream This patch added a few netlink attribute parsing functions and the netlink API functions to query networking links, tc classes, tc qdiscs and tc filters. For example, the following API is to get networking links: int nl_get_link(int sock, unsigned int nl_pid, dump_nlmsg_t dump_link_nlmsg, void *cookie); Note that when the API is called, the user also provided a callback function with the following signature: int (*dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb); The "cookie" is the parameter the user passed to the API and will be available for the callback function. The "msg" is the information about the result, e.g., ifinfomsg or tcmsg. The "tb" is the parsed netlink attributes. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Signed-off-by: zhaoxuezhi --- tools/lib/bpf/libbpf.h | 16 ++++ tools/lib/bpf/libbpf_errno.c | 1 + tools/lib/bpf/netlink.c | 165 ++++++++++++++++++++++++++++++++++- tools/lib/bpf/nlattr.c | 33 ++++--- tools/lib/bpf/nlattr.h | 38 ++++++++ 5 files changed, 238 insertions(+), 15 deletions(-) diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 96c55fac54c3..e3b00e23e181 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -46,6 +46,7 @@ enum libbpf_errno { LIBBPF_ERRNO__PROGTYPE, /* Kernel doesn't support this program type */ LIBBPF_ERRNO__WRNGPID, /* Wrong pid in netlink message */ LIBBPF_ERRNO__INVSEQ, /* Invalid netlink sequence */ + LIBBPF_ERRNO__NLPARSE, /* netlink parsing error */ __LIBBPF_ERRNO__END, }; @@ -297,4 +298,19 @@ int bpf_perf_event_read_simple(void *mem, unsigned long size, unsigned long page_size, void **buf, size_t *buf_len, bpf_perf_event_print_t fn, void *priv); + +struct nlmsghdr; +struct nlattr; +typedef int (*dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb); +typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, dump_nlmsg_t, + void *cookie); +int bpf_netlink_open(unsigned int *nl_pid); +int nl_get_link(int sock, unsigned int nl_pid, dump_nlmsg_t dump_link_nlmsg, + void *cookie); +int nl_get_class(int sock, unsigned int nl_pid, int ifindex, + dump_nlmsg_t dump_class_nlmsg, void *cookie); +int nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, + dump_nlmsg_t dump_qdisc_nlmsg, void *cookie); +int nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, + dump_nlmsg_t dump_filter_nlmsg, void *cookie); #endif diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c index d2d17226d9d6..451283328fa9 100644 --- a/tools/lib/bpf/libbpf_errno.c +++ b/tools/lib/bpf/libbpf_errno.c @@ -43,6 +43,7 @@ static const char *libbpf_strerror_table[NR_ERRNO] = { [ERRCODE_OFFSET(PROGTYPE)] = "Kernel doesn't support this program type", [ERRCODE_OFFSET(WRNGPID)] = "Wrong pid in netlink message", [ERRCODE_OFFSET(INVSEQ)] = "Invalid netlink sequence", + [ERRCODE_OFFSET(NLPARSE)] = "Incorrect netlink message parsing", }; int libbpf_strerror(int err, char *buf, size_t size) diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index ccaa991fe9d8..469e068dd0c5 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -18,7 +18,7 @@ #define SOL_NETLINK 270 #endif -static int bpf_netlink_open(__u32 *nl_pid) +int bpf_netlink_open(__u32 *nl_pid) { struct sockaddr_nl sa; socklen_t addrlen; @@ -61,7 +61,9 @@ static int bpf_netlink_open(__u32 *nl_pid) return ret; } -static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq) +static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq, + __dump_nlmsg_t _fn, dump_nlmsg_t fn, + void *cookie) { struct nlmsgerr *err; struct nlmsghdr *nh; @@ -98,6 +100,11 @@ static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq) default: break; } + if (_fn) { + ret = _fn(nh, fn, cookie); + if (ret) + return ret; + } } } ret = 0; @@ -157,9 +164,161 @@ int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags) ret = -errno; goto cleanup; } - ret = bpf_netlink_recv(sock, nl_pid, seq); + ret = bpf_netlink_recv(sock, nl_pid, seq, NULL, NULL, NULL); cleanup: close(sock); return ret; } + +static int __dump_link_nlmsg(struct nlmsghdr *nlh, dump_nlmsg_t dump_link_nlmsg, + void *cookie) +{ + struct nlattr *tb[IFLA_MAX + 1], *attr; + struct ifinfomsg *ifi = NLMSG_DATA(nlh); + int len; + + len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi)); + attr = (struct nlattr *) ((void *) ifi + NLMSG_ALIGN(sizeof(*ifi))); + if (nla_parse(tb, IFLA_MAX, attr, len, NULL) != 0) + return -LIBBPF_ERRNO__NLPARSE; + + return dump_link_nlmsg(cookie, ifi, tb); +} + +int nl_get_link(int sock, unsigned int nl_pid, dump_nlmsg_t dump_link_nlmsg, + void *cookie) +{ + struct { + struct nlmsghdr nlh; + struct ifinfomsg ifm; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), + .nlh.nlmsg_type = RTM_GETLINK, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .ifm.ifi_family = AF_PACKET, + }; + int seq = time(NULL); + + req.nlh.nlmsg_seq = seq; + if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) + return -errno; + + return bpf_netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg, + dump_link_nlmsg, cookie); +} + +static int __dump_class_nlmsg(struct nlmsghdr *nlh, + dump_nlmsg_t dump_class_nlmsg, void *cookie) +{ + struct nlattr *tb[TCA_MAX + 1], *attr; + struct tcmsg *t = NLMSG_DATA(nlh); + int len; + + len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t)); + attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t))); + if (nla_parse(tb, TCA_MAX, attr, len, NULL) != 0) + return -LIBBPF_ERRNO__NLPARSE; + + return dump_class_nlmsg(cookie, t, tb); +} + +int nl_get_class(int sock, unsigned int nl_pid, int ifindex, + dump_nlmsg_t dump_class_nlmsg, void *cookie) +{ + struct { + struct nlmsghdr nlh; + struct tcmsg t; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), + .nlh.nlmsg_type = RTM_GETTCLASS, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .t.tcm_family = AF_UNSPEC, + .t.tcm_ifindex = ifindex, + }; + int seq = time(NULL); + + req.nlh.nlmsg_seq = seq; + if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) + return -errno; + + return bpf_netlink_recv(sock, nl_pid, seq, __dump_class_nlmsg, + dump_class_nlmsg, cookie); +} + +static int __dump_qdisc_nlmsg(struct nlmsghdr *nlh, + dump_nlmsg_t dump_qdisc_nlmsg, void *cookie) +{ + struct nlattr *tb[TCA_MAX + 1], *attr; + struct tcmsg *t = NLMSG_DATA(nlh); + int len; + + len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t)); + attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t))); + if (nla_parse(tb, TCA_MAX, attr, len, NULL) != 0) + return -LIBBPF_ERRNO__NLPARSE; + + return dump_qdisc_nlmsg(cookie, t, tb); +} + +int nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, + dump_nlmsg_t dump_qdisc_nlmsg, void *cookie) +{ + struct { + struct nlmsghdr nlh; + struct tcmsg t; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), + .nlh.nlmsg_type = RTM_GETQDISC, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .t.tcm_family = AF_UNSPEC, + .t.tcm_ifindex = ifindex, + }; + int seq = time(NULL); + + req.nlh.nlmsg_seq = seq; + if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) + return -errno; + + return bpf_netlink_recv(sock, nl_pid, seq, __dump_qdisc_nlmsg, + dump_qdisc_nlmsg, cookie); +} + +static int __dump_filter_nlmsg(struct nlmsghdr *nlh, + dump_nlmsg_t dump_filter_nlmsg, void *cookie) +{ + struct nlattr *tb[TCA_MAX + 1], *attr; + struct tcmsg *t = NLMSG_DATA(nlh); + int len; + + len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t)); + attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t))); + if (nla_parse(tb, TCA_MAX, attr, len, NULL) != 0) + return -LIBBPF_ERRNO__NLPARSE; + + return dump_filter_nlmsg(cookie, t, tb); +} + +int nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, + dump_nlmsg_t dump_filter_nlmsg, void *cookie) +{ + struct { + struct nlmsghdr nlh; + struct tcmsg t; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), + .nlh.nlmsg_type = RTM_GETTFILTER, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .t.tcm_family = AF_UNSPEC, + .t.tcm_ifindex = ifindex, + .t.tcm_parent = handle, + }; + int seq = time(NULL); + + req.nlh.nlmsg_seq = seq; + if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) + return -errno; + + return bpf_netlink_recv(sock, nl_pid, seq, __dump_filter_nlmsg, + dump_filter_nlmsg, cookie); +} diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c index 4719434278b2..49f514119bdb 100644 --- a/tools/lib/bpf/nlattr.c +++ b/tools/lib/bpf/nlattr.c @@ -26,11 +26,6 @@ static uint16_t nla_attr_minlen[NLA_TYPE_MAX+1] = { [NLA_FLAG] = 0, }; -static int nla_len(const struct nlattr *nla) -{ - return nla->nla_len - NLA_HDRLEN; -} - static struct nlattr *nla_next(const struct nlattr *nla, int *remaining) { int totlen = NLA_ALIGN(nla->nla_len); @@ -46,11 +41,6 @@ static int nla_ok(const struct nlattr *nla, int remaining) nla->nla_len <= remaining; } -static void *nla_data(const struct nlattr *nla) -{ - return (char *) nla + NLA_HDRLEN; -} - static int nla_type(const struct nlattr *nla) { return nla->nla_type & NLA_TYPE_MASK; @@ -114,8 +104,8 @@ static inline int nlmsg_len(const struct nlmsghdr *nlh) * @see nla_validate * @return 0 on success or a negative error code. */ -static int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, - struct nla_policy *policy) +int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, + struct nla_policy *policy) { struct nlattr *nla; int rem, err; @@ -146,6 +136,25 @@ static int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int return err; } +/** + * Create attribute index based on nested attribute + * @arg tb Index array to be filled (maxtype+1 elements). + * @arg maxtype Maximum attribute type expected and accepted. + * @arg nla Nested Attribute. + * @arg policy Attribute validation policy. + * + * Feeds the stream of attributes nested into the specified attribute + * to nla_parse(). + * + * @see nla_parse + * @return 0 on success or a negative error code. + */ +int nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla, + struct nla_policy *policy) +{ + return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy); +} + /* dump netlink extended ack error message */ int nla_dump_errormsg(struct nlmsghdr *nlh) { diff --git a/tools/lib/bpf/nlattr.h b/tools/lib/bpf/nlattr.h index 931a71f68f93..a6e2396bce7c 100644 --- a/tools/lib/bpf/nlattr.h +++ b/tools/lib/bpf/nlattr.h @@ -67,6 +67,44 @@ struct nla_policy { nla_ok(pos, rem); \ pos = nla_next(pos, &(rem))) +/** + * nla_data - head of payload + * @nla: netlink attribute + */ +static inline void *nla_data(const struct nlattr *nla) +{ + return (char *) nla + NLA_HDRLEN; +} + +static inline uint8_t nla_getattr_u8(const struct nlattr *nla) +{ + return *(uint8_t *)nla_data(nla); +} + +static inline uint32_t nla_getattr_u32(const struct nlattr *nla) +{ + return *(uint32_t *)nla_data(nla); +} + +static inline const char *nla_getattr_str(const struct nlattr *nla) +{ + return (const char *)nla_data(nla); +} + +/** + * nla_len - length of payload + * @nla: netlink attribute + */ +static inline int nla_len(const struct nlattr *nla) +{ + return nla->nla_len - NLA_HDRLEN; +} + +int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, + struct nla_policy *policy); +int nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla, + struct nla_policy *policy); + int nla_dump_errormsg(struct nlmsghdr *nlh); #endif /* __NLATTR_H */ -- Gitee From c99ae20083067b4b016ec64499e1f8eaf5ceacd5 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 2 Jun 2023 16:27:30 +0800 Subject: [PATCH 04/12] tools/bpf: bpftool: add net support ANBZ: #5530 commit f6f3bac08ff9855d803081a353a1fafaa8845739 upstream Add "bpftool net" support. Networking devices are enumerated to dump device index/name associated with xdp progs. For each networking device, tc classes and qdiscs are enumerated in order to check their bpf filters. In addition, root handle and clsact ingress/egress are also checked for bpf filters. Not all filter information is printed out. Only ifindex, kind, filter name, prog_id and tag are printed out, which are good enough to show attachment information. If the filter action is a bpf action, its bpf program id, bpf name and tag will be printed out as well. For example, $ ./bpftool net xdp [ ifindex 2 devname eth0 prog_id 198 ] tc_filters [ ifindex 2 kind qdisc_htb name prefix_matcher.o:[cls_prefix_matcher_htb] prog_id 111727 tag d08fe3b4319bc2fd act [] ifindex 2 kind qdisc_clsact_ingress name fbflow_icmp prog_id 130246 tag 3f265c7f26db62c9 act [] ifindex 2 kind qdisc_clsact_egress name prefix_matcher.o:[cls_prefix_matcher_clsact] prog_id 111726 tag 99a197826974c876 ifindex 2 kind qdisc_clsact_egress name cls_fg_dscp prog_id 108619 tag dc4630674fd72dcc act [] ifindex 2 kind qdisc_clsact_egress name fbflow_egress prog_id 130245 tag 72d2d830d6888d2c ] $ ./bpftool -jp net [{ "xdp": [{ "ifindex": 2, "devname": "eth0", "prog_id": 198 } ], "tc_filters": [{ "ifindex": 2, "kind": "qdisc_htb", "name": "prefix_matcher.o:[cls_prefix_matcher_htb]", "prog_id": 111727, "tag": "d08fe3b4319bc2fd", "act": [] },{ "ifindex": 2, "kind": "qdisc_clsact_ingress", "name": "fbflow_icmp", "prog_id": 130246, "tag": "3f265c7f26db62c9", "act": [] },{ "ifindex": 2, "kind": "qdisc_clsact_egress", "name": "prefix_matcher.o:[cls_prefix_matcher_clsact]", "prog_id": 111726, "tag": "99a197826974c876" },{ "ifindex": 2, "kind": "qdisc_clsact_egress", "name": "cls_fg_dscp", "prog_id": 108619, "tag": "dc4630674fd72dcc", "act": [] },{ "ifindex": 2, "kind": "qdisc_clsact_egress", "name": "fbflow_egress", "prog_id": 130245, "tag": "72d2d830d6888d2c" } ] } ] Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Signed-off-by: zhaoxuezhi --- .../bpf/bpftool/Documentation/bpftool-net.rst | 133 ++++++++++ tools/bpf/bpftool/Documentation/bpftool.rst | 6 +- tools/bpf/bpftool/bash-completion/bpftool | 17 +- tools/bpf/bpftool/main.c | 3 +- tools/bpf/bpftool/main.h | 7 + tools/bpf/bpftool/net.c | 233 ++++++++++++++++++ tools/bpf/bpftool/netlink_dumper.c | 181 ++++++++++++++ tools/bpf/bpftool/netlink_dumper.h | 103 ++++++++ 8 files changed, 676 insertions(+), 7 deletions(-) create mode 100644 tools/bpf/bpftool/Documentation/bpftool-net.rst create mode 100644 tools/bpf/bpftool/net.c create mode 100644 tools/bpf/bpftool/netlink_dumper.c create mode 100644 tools/bpf/bpftool/netlink_dumper.h diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst new file mode 100644 index 000000000000..48a61837a264 --- /dev/null +++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst @@ -0,0 +1,133 @@ +================ +bpftool-net +================ +------------------------------------------------------------------------------- +tool for inspection of netdev/tc related bpf prog attachments +------------------------------------------------------------------------------- + +:Manual section: 8 + +SYNOPSIS +======== + + **bpftool** [*OPTIONS*] **net** *COMMAND* + + *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] } + + *COMMANDS* := + { **show** | **list** } [ **dev** name ] | **help** + +NET COMMANDS +============ + +| **bpftool** **net { show | list } [ dev name ]** +| **bpftool** **net help** + +DESCRIPTION +=========== + **bpftool net { show | list } [ dev name ]** + List all networking device driver and tc attachment in the system. + + Output will start with all xdp program attachment, followed by + all tc class/qdisc bpf program attachments. Both xdp programs and + tc programs are ordered based on ifindex number. If multiple bpf + programs attached to the same networking device through **tc filter**, + the order will be first all bpf programs attached to tc classes, then + all bpf programs attached to non clsact qdiscs, and finally all + bpf programs attached to root and clsact qdisc. + + **bpftool net help** + Print short help message. + +OPTIONS +======= + -h, --help + Print short generic help message (similar to **bpftool help**). + + -v, --version + Print version number (similar to **bpftool version**). + + -j, --json + Generate JSON output. For commands that cannot produce JSON, this + option has no effect. + + -p, --pretty + Generate human-readable JSON output. Implies **-j**. + +EXAMPLES +======== + +| **# bpftool net** + +:: + + xdp [ + ifindex 2 devname eth0 prog_id 198 + ] + tc_filters [ + ifindex 2 kind qdisc_htb name prefix_matcher.o:[cls_prefix_matcher_htb] + prog_id 111727 tag d08fe3b4319bc2fd act [] + ifindex 2 kind qdisc_clsact_ingress name fbflow_icmp + prog_id 130246 tag 3f265c7f26db62c9 act [] + ifindex 2 kind qdisc_clsact_egress name prefix_matcher.o:[cls_prefix_matcher_clsact] + prog_id 111726 tag 99a197826974c876 + ifindex 2 kind qdisc_clsact_egress name cls_fg_dscp + prog_id 108619 tag dc4630674fd72dcc act [] + ifindex 2 kind qdisc_clsact_egress name fbflow_egress + prog_id 130245 tag 72d2d830d6888d2c + ] + +| +| **# bpftool -jp net** + +:: + + [{ + "xdp": [{ + "ifindex": 2, + "devname": "eth0", + "prog_id": 198 + } + ], + "tc_filters": [{ + "ifindex": 2, + "kind": "qdisc_htb", + "name": "prefix_matcher.o:[cls_prefix_matcher_htb]", + "prog_id": 111727, + "tag": "d08fe3b4319bc2fd", + "act": [] + },{ + "ifindex": 2, + "kind": "qdisc_clsact_ingress", + "name": "fbflow_icmp", + "prog_id": 130246, + "tag": "3f265c7f26db62c9", + "act": [] + },{ + "ifindex": 2, + "kind": "qdisc_clsact_egress", + "name": "prefix_matcher.o:[cls_prefix_matcher_clsact]", + "prog_id": 111726, + "tag": "99a197826974c876" + },{ + "ifindex": 2, + "kind": "qdisc_clsact_egress", + "name": "cls_fg_dscp", + "prog_id": 108619, + "tag": "dc4630674fd72dcc", + "act": [] + },{ + "ifindex": 2, + "kind": "qdisc_clsact_egress", + "name": "fbflow_egress", + "prog_id": 130245, + "tag": "72d2d830d6888d2c" + } + ] + } + ] + + +SEE ALSO +======== + **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index b6f5d560460d..8dda77daeda9 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -16,7 +16,7 @@ SYNOPSIS **bpftool** **version** - *OBJECT* := { **map** | **program** | **cgroup** | **perf** } + *OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** } *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** } | { **-j** | **--json** } [{ **-p** | **--pretty** }] } @@ -32,6 +32,8 @@ SYNOPSIS *PERF-COMMANDS* := { **show** | **list** | **help** } + *NET-COMMANDS* := { **show** | **list** | **help** } + DESCRIPTION =========== *bpftool* allows for inspection and simple modification of BPF objects @@ -58,4 +60,4 @@ OPTIONS SEE ALSO ======== **bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8) - **bpftool-perf**\ (8) + **bpftool-perf**\ (8), **bpftool-net**\ (8) diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index c2b6b2176f3b..ec577af2e5f0 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -494,10 +494,10 @@ _bpftool() _filedir return 0 ;; - tree) - _filedir - return 0 - ;; + tree) + _filedir + return 0 + ;; attach|detach) local ATTACH_TYPES='ingress egress sock_create sock_ops \ device bind4 bind6 post_bind4 post_bind6 connect4 \ @@ -552,6 +552,15 @@ _bpftool() ;; esac ;; + net) + case $command in + *) + [[ $prev == $object ]] && \ + COMPREPLY=( $( compgen -W 'help \ + show list' -- "$cur" ) ) + ;; + esac + ;; esac } && complete -F _bpftool bpftool diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index d15a62be6cf0..79dc3f193547 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -85,7 +85,7 @@ static int do_help(int argc, char **argv) " %s batch file FILE\n" " %s version\n" "\n" - " OBJECT := { prog | map | cgroup | perf }\n" + " OBJECT := { prog | map | cgroup | perf | net }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, bin_name, bin_name); @@ -215,6 +215,7 @@ static const struct cmd cmds[] = { { "map", do_map }, { "cgroup", do_cgroup }, { "perf", do_perf }, + { "net", do_net }, { "version", do_version }, { 0 } }; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 057a227bdb9f..0a9d2779635c 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -136,6 +136,7 @@ int do_map(int argc, char **arg); int do_event_pipe(int argc, char **argv); int do_cgroup(int argc, char **arg); int do_perf(int argc, char **arg); +int do_net(int argc, char **arg); int prog_parse_fd(int *argc, char ***argv); int map_parse_fd(int *argc, char ***argv); @@ -165,4 +166,10 @@ struct btf_dumper { */ int btf_dumper_type(const struct btf_dumper *d, __u32 type_id, const void *data); + +struct nlattr; +struct ifinfomsg; +struct tcmsg; +int do_xdp_dump(struct ifinfomsg *ifinfo, struct nlattr **tb); +int do_filter_dump(struct tcmsg *ifinfo, struct nlattr **tb, const char *kind); #endif diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c new file mode 100644 index 000000000000..77dd73dd9ade --- /dev/null +++ b/tools/bpf/bpftool/net.c @@ -0,0 +1,233 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright (C) 2018 Facebook + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "main.h" +#include "netlink_dumper.h" + +struct bpf_netdev_t { + int *ifindex_array; + int used_len; + int array_len; + int filter_idx; +}; + +struct tc_kind_handle { + char kind[64]; + int handle; +}; + +struct bpf_tcinfo_t { + struct tc_kind_handle *handle_array; + int used_len; + int array_len; + bool is_qdisc; +}; + +static int dump_link_nlmsg(void *cookie, void *msg, struct nlattr **tb) +{ + struct bpf_netdev_t *netinfo = cookie; + struct ifinfomsg *ifinfo = msg; + + if (netinfo->filter_idx > 0 && netinfo->filter_idx != ifinfo->ifi_index) + return 0; + + if (netinfo->used_len == netinfo->array_len) { + netinfo->ifindex_array = realloc(netinfo->ifindex_array, + (netinfo->array_len + 16) * sizeof(int)); + netinfo->array_len += 16; + } + netinfo->ifindex_array[netinfo->used_len++] = ifinfo->ifi_index; + + return do_xdp_dump(ifinfo, tb); +} + +static int dump_class_qdisc_nlmsg(void *cookie, void *msg, struct nlattr **tb) +{ + struct bpf_tcinfo_t *tcinfo = cookie; + struct tcmsg *info = msg; + + if (tcinfo->is_qdisc) { + /* skip clsact qdisc */ + if (tb[TCA_KIND] && + strcmp(nla_data(tb[TCA_KIND]), "clsact") == 0) + return 0; + if (info->tcm_handle == 0) + return 0; + } + + if (tcinfo->used_len == tcinfo->array_len) { + tcinfo->handle_array = realloc(tcinfo->handle_array, + (tcinfo->array_len + 16) * sizeof(struct tc_kind_handle)); + tcinfo->array_len += 16; + } + tcinfo->handle_array[tcinfo->used_len].handle = info->tcm_handle; + snprintf(tcinfo->handle_array[tcinfo->used_len].kind, + sizeof(tcinfo->handle_array[tcinfo->used_len].kind), + "%s_%s", + tcinfo->is_qdisc ? "qdisc" : "class", + tb[TCA_KIND] ? nla_getattr_str(tb[TCA_KIND]) : "unknown"); + tcinfo->used_len++; + + return 0; +} + +static int dump_filter_nlmsg(void *cookie, void *msg, struct nlattr **tb) +{ + const char *kind = cookie; + + return do_filter_dump((struct tcmsg *)msg, tb, kind); +} + +static int show_dev_tc_bpf(int sock, unsigned int nl_pid, int ifindex) +{ + struct bpf_tcinfo_t tcinfo; + int i, handle, ret; + + tcinfo.handle_array = NULL; + tcinfo.used_len = 0; + tcinfo.array_len = 0; + + tcinfo.is_qdisc = false; + ret = nl_get_class(sock, nl_pid, ifindex, dump_class_qdisc_nlmsg, + &tcinfo); + if (ret) + return ret; + + tcinfo.is_qdisc = true; + ret = nl_get_qdisc(sock, nl_pid, ifindex, dump_class_qdisc_nlmsg, + &tcinfo); + if (ret) + return ret; + + for (i = 0; i < tcinfo.used_len; i++) { + ret = nl_get_filter(sock, nl_pid, ifindex, + tcinfo.handle_array[i].handle, + dump_filter_nlmsg, + tcinfo.handle_array[i].kind); + if (ret) + return ret; + } + + /* root, ingress and egress handle */ + handle = TC_H_ROOT; + ret = nl_get_filter(sock, nl_pid, ifindex, handle, dump_filter_nlmsg, + "root"); + if (ret) + return ret; + + handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS); + ret = nl_get_filter(sock, nl_pid, ifindex, handle, dump_filter_nlmsg, + "qdisc_clsact_ingress"); + if (ret) + return ret; + + handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS); + ret = nl_get_filter(sock, nl_pid, ifindex, handle, dump_filter_nlmsg, + "qdisc_clsact_egress"); + if (ret) + return ret; + + return 0; +} + +static int do_show(int argc, char **argv) +{ + int i, sock, ret, filter_idx = -1; + struct bpf_netdev_t dev_array; + unsigned int nl_pid; + char err_buf[256]; + + if (argc == 2) { + if (strcmp(argv[0], "dev") != 0) + usage(); + filter_idx = if_nametoindex(argv[1]); + if (filter_idx == 0) { + fprintf(stderr, "invalid dev name %s\n", argv[1]); + return -1; + } + } else if (argc != 0) { + usage(); + } + + sock = bpf_netlink_open(&nl_pid); + if (sock < 0) { + fprintf(stderr, "failed to open netlink sock\n"); + return -1; + } + + dev_array.ifindex_array = NULL; + dev_array.used_len = 0; + dev_array.array_len = 0; + dev_array.filter_idx = filter_idx; + + if (json_output) + jsonw_start_array(json_wtr); + NET_START_OBJECT; + NET_START_ARRAY("xdp", "\n"); + ret = nl_get_link(sock, nl_pid, dump_link_nlmsg, &dev_array); + NET_END_ARRAY("\n"); + + if (!ret) { + NET_START_ARRAY("tc_filters", "\n"); + for (i = 0; i < dev_array.used_len; i++) { + ret = show_dev_tc_bpf(sock, nl_pid, + dev_array.ifindex_array[i]); + if (ret) + break; + } + NET_END_ARRAY("\n"); + } + NET_END_OBJECT; + if (json_output) + jsonw_end_array(json_wtr); + + if (ret) { + if (json_output) + jsonw_null(json_wtr); + libbpf_strerror(ret, err_buf, sizeof(err_buf)); + fprintf(stderr, "Error: %s\n", err_buf); + } + free(dev_array.ifindex_array); + close(sock); + return ret; +} + +static int do_help(int argc, char **argv) +{ + if (json_output) { + jsonw_null(json_wtr); + return 0; + } + + fprintf(stderr, + "Usage: %s %s { show | list } [dev ]\n" + " %s %s help\n", + bin_name, argv[-2], bin_name, argv[-2]); + + return 0; +} + +static const struct cmd cmds[] = { + { "show", do_show }, + { "list", do_show }, + { "help", do_help }, + { 0 } +}; + +int do_net(int argc, char **argv) +{ + return cmd_select(cmds, argc, argv, do_help); +} diff --git a/tools/bpf/bpftool/netlink_dumper.c b/tools/bpf/bpftool/netlink_dumper.c new file mode 100644 index 000000000000..e12494fd1d2e --- /dev/null +++ b/tools/bpf/bpftool/netlink_dumper.c @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright (C) 2018 Facebook + +#include +#include +#include +#include +#include + +#include +#include "main.h" +#include "netlink_dumper.h" + +static void xdp_dump_prog_id(struct nlattr **tb, int attr, + const char *type) +{ + if (!tb[attr]) + return; + + NET_DUMP_UINT(type, nla_getattr_u32(tb[attr])) +} + +static int do_xdp_dump_one(struct nlattr *attr, unsigned int ifindex, + const char *name) +{ + struct nlattr *tb[IFLA_XDP_MAX + 1]; + unsigned char mode; + + if (nla_parse_nested(tb, IFLA_XDP_MAX, attr, NULL) < 0) + return -1; + + if (!tb[IFLA_XDP_ATTACHED]) + return 0; + + mode = nla_getattr_u8(tb[IFLA_XDP_ATTACHED]); + if (mode == XDP_ATTACHED_NONE) + return 0; + + NET_START_OBJECT; + NET_DUMP_UINT("ifindex", ifindex); + + if (name) + NET_DUMP_STR("devname", name); + + if (tb[IFLA_XDP_PROG_ID]) + NET_DUMP_UINT("prog_id", nla_getattr_u32(tb[IFLA_XDP_PROG_ID])); + + if (mode == XDP_ATTACHED_MULTI) { + xdp_dump_prog_id(tb, IFLA_XDP_SKB_PROG_ID, "generic_prog_id"); + xdp_dump_prog_id(tb, IFLA_XDP_DRV_PROG_ID, "drv_prog_id"); + xdp_dump_prog_id(tb, IFLA_XDP_HW_PROG_ID, "offload_prog_id"); + } + + NET_END_OBJECT_FINAL; + return 0; +} + +int do_xdp_dump(struct ifinfomsg *ifinfo, struct nlattr **tb) +{ + if (!tb[IFLA_XDP]) + return 0; + + return do_xdp_dump_one(tb[IFLA_XDP], ifinfo->ifi_index, + nla_getattr_str(tb[IFLA_IFNAME])); +} + +static char *hexstring_n2a(const unsigned char *str, int len, + char *buf, int blen) +{ + char *ptr = buf; + int i; + + for (i = 0; i < len; i++) { + if (blen < 3) + break; + sprintf(ptr, "%02x", str[i]); + ptr += 2; + blen -= 2; + } + return buf; +} + +static int do_bpf_dump_one_act(struct nlattr *attr) +{ + struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; + char buf[256]; + + if (nla_parse_nested(tb, TCA_ACT_BPF_MAX, attr, NULL) < 0) + return -LIBBPF_ERRNO__NLPARSE; + + if (!tb[TCA_ACT_BPF_PARMS]) + return -LIBBPF_ERRNO__NLPARSE; + + NET_START_OBJECT_NESTED2; + if (tb[TCA_ACT_BPF_NAME]) + NET_DUMP_STR("name", nla_getattr_str(tb[TCA_ACT_BPF_NAME])); + if (tb[TCA_ACT_BPF_ID]) + NET_DUMP_UINT("bpf_id", nla_getattr_u32(tb[TCA_ACT_BPF_ID])); + if (tb[TCA_ACT_BPF_TAG]) + NET_DUMP_STR("tag", hexstring_n2a(nla_data(tb[TCA_ACT_BPF_TAG]), + nla_len(tb[TCA_ACT_BPF_TAG]), + buf, sizeof(buf))); + NET_END_OBJECT_NESTED; + return 0; +} + +static int do_dump_one_act(struct nlattr *attr) +{ + struct nlattr *tb[TCA_ACT_MAX + 1]; + + if (!attr) + return 0; + + if (nla_parse_nested(tb, TCA_ACT_MAX, attr, NULL) < 0) + return -LIBBPF_ERRNO__NLPARSE; + + if (tb[TCA_ACT_KIND] && strcmp(nla_data(tb[TCA_ACT_KIND]), "bpf") == 0) + return do_bpf_dump_one_act(tb[TCA_ACT_OPTIONS]); + + return 0; +} + +static int do_bpf_act_dump(struct nlattr *attr) +{ + struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; + int act, ret; + + if (nla_parse_nested(tb, TCA_ACT_MAX_PRIO, attr, NULL) < 0) + return -LIBBPF_ERRNO__NLPARSE; + + NET_START_ARRAY("act", ""); + for (act = 0; act <= TCA_ACT_MAX_PRIO; act++) { + ret = do_dump_one_act(tb[act]); + if (ret) + break; + } + NET_END_ARRAY(" "); + + return ret; +} + +static int do_bpf_filter_dump(struct nlattr *attr) +{ + struct nlattr *tb[TCA_BPF_MAX + 1]; + char buf[256]; + int ret; + + if (nla_parse_nested(tb, TCA_BPF_MAX, attr, NULL) < 0) + return -LIBBPF_ERRNO__NLPARSE; + + if (tb[TCA_BPF_NAME]) + NET_DUMP_STR("name", nla_getattr_str(tb[TCA_BPF_NAME])); + if (tb[TCA_BPF_ID]) + NET_DUMP_UINT("prog_id", nla_getattr_u32(tb[TCA_BPF_ID])); + if (tb[TCA_BPF_TAG]) + NET_DUMP_STR("tag", hexstring_n2a(nla_data(tb[TCA_BPF_TAG]), + nla_len(tb[TCA_BPF_TAG]), + buf, sizeof(buf))); + if (tb[TCA_BPF_ACT]) { + ret = do_bpf_act_dump(tb[TCA_BPF_ACT]); + if (ret) + return ret; + } + + return 0; +} + +int do_filter_dump(struct tcmsg *info, struct nlattr **tb, const char *kind) +{ + int ret = 0; + + if (tb[TCA_OPTIONS] && strcmp(nla_data(tb[TCA_KIND]), "bpf") == 0) { + NET_START_OBJECT; + NET_DUMP_UINT("ifindex", info->tcm_ifindex); + NET_DUMP_STR("kind", kind); + ret = do_bpf_filter_dump(tb[TCA_OPTIONS]); + NET_END_OBJECT_FINAL; + } + + return ret; +} diff --git a/tools/bpf/bpftool/netlink_dumper.h b/tools/bpf/bpftool/netlink_dumper.h new file mode 100644 index 000000000000..552d8851ac06 --- /dev/null +++ b/tools/bpf/bpftool/netlink_dumper.h @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright (C) 2018 Facebook + +#ifndef _NETLINK_DUMPER_H_ +#define _NETLINK_DUMPER_H_ + +#define NET_START_OBJECT \ +{ \ + if (json_output) \ + jsonw_start_object(json_wtr); \ +} + +#define NET_START_OBJECT_NESTED(name) \ +{ \ + if (json_output) { \ + jsonw_name(json_wtr, name); \ + jsonw_start_object(json_wtr); \ + } else { \ + fprintf(stderr, "%s {", name); \ + } \ +} + +#define NET_START_OBJECT_NESTED2 \ +{ \ + if (json_output) \ + jsonw_start_object(json_wtr); \ + else \ + fprintf(stderr, "{"); \ +} + +#define NET_END_OBJECT_NESTED \ +{ \ + if (json_output) \ + jsonw_end_object(json_wtr); \ + else \ + fprintf(stderr, "}"); \ +} + +#define NET_END_OBJECT \ +{ \ + if (json_output) \ + jsonw_end_object(json_wtr); \ +} + +#define NET_END_OBJECT_FINAL \ +{ \ + if (json_output) \ + jsonw_end_object(json_wtr); \ + else \ + fprintf(stderr, "\n"); \ +} + +#define NET_START_ARRAY(name, newline) \ +{ \ + if (json_output) { \ + jsonw_name(json_wtr, name); \ + jsonw_start_array(json_wtr); \ + } else { \ + fprintf(stderr, "%s [%s", name, newline);\ + } \ +} + +#define NET_END_ARRAY(endstr) \ +{ \ + if (json_output) \ + jsonw_end_array(json_wtr); \ + else \ + fprintf(stderr, "]%s", endstr); \ +} + +#define NET_DUMP_UINT(name, val) \ +{ \ + if (json_output) \ + jsonw_uint_field(json_wtr, name, val); \ + else \ + fprintf(stderr, "%s %d ", name, val); \ +} + +#define NET_DUMP_LLUINT(name, val) \ +{ \ + if (json_output) \ + jsonw_lluint_field(json_wtr, name, val);\ + else \ + fprintf(stderr, "%s %lld ", name, val); \ +} + +#define NET_DUMP_STR(name, str) \ +{ \ + if (json_output) \ + jsonw_string_field(json_wtr, name, str);\ + else \ + fprintf(stderr, "%s %s ", name, str); \ +} + +#define NET_DUMP_STR_ONLY(str) \ +{ \ + if (json_output) \ + jsonw_string(json_wtr, str); \ + else \ + fprintf(stderr, "%s ", str); \ +} + +#endif -- Gitee From 83b53756cc09d0c60882d9cd4dbb0fac17cd4732 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 2 Jun 2023 16:30:50 +0800 Subject: [PATCH 05/12] tools/bpf: fix a netlink recv issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #5530 commit 9d0b3c1f1451d1b9a33de3c70ae3d50ccd77db1a upstream Commit f7010770fbac ("tools/bpf: move bpf/lib netlink related functions into a new file") introduced a while loop for the netlink recv path. This while loop is needed since the buffer in recv syscall may not be enough to hold all the information and in such cases multiple recv calls are needed. There is a bug introduced by the above commit as the while loop may block on recv syscall if there is no more messages are expected. The netlink message header flag NLM_F_MULTI is used to indicate that more messages are expected and this patch fixed the bug by doing further recv syscall only if multipart message is expected. The patch added another fix regarding to message length of 0. When netlink recv returns message length of 0, there will be no more messages for returning data so the while loop can end. Fixes: f7010770fbac ("tools/bpf: move bpf/lib netlink related functions into a new file") Reported-by: Björn Töpel Tested-by: Björn Töpel Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Signed-off-by: zhaoxuezhi --- tools/lib/bpf/netlink.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 469e068dd0c5..fde1d7bf8199 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -65,18 +65,23 @@ static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq, __dump_nlmsg_t _fn, dump_nlmsg_t fn, void *cookie) { + bool multipart = true; struct nlmsgerr *err; struct nlmsghdr *nh; char buf[4096]; int len, ret; - while (1) { + while (multipart) { + multipart = false; len = recv(sock, buf, sizeof(buf), 0); if (len < 0) { ret = -errno; goto done; } + if (len == 0) + break; + for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); nh = NLMSG_NEXT(nh, len)) { if (nh->nlmsg_pid != nl_pid) { @@ -87,6 +92,8 @@ static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq, ret = -LIBBPF_ERRNO__INVSEQ; goto done; } + if (nh->nlmsg_flags & NLM_F_MULTI) + multipart = true; switch (nh->nlmsg_type) { case NLMSG_ERROR: err = (struct nlmsgerr *)NLMSG_DATA(nh); -- Gitee From 165d6f0ad582067e91c1422546628176eb927aa9 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 2 Jun 2023 16:34:15 +0800 Subject: [PATCH 06/12] tools/bpf: bpftool: improve output format for bpftool net ANBZ: #5530 commit 7900efc19214e326913dc0f0e8ded24adc0018f2 upstream This is a followup patch for Commit f6f3bac08ff9 ("tools/bpf: bpftool: add net support"). Some improvements are made for the bpftool net output. Specially, plain output is more concise such that per attachment should nicely fit in one line. Compared to previous output, the prog tag is removed since it can be easily obtained with program id. Similar to xdp attachments, the device name is added to tc attachments. The bpf program attached through shared block mechanism is supported as well. $ ip link add dev v1 type veth peer name v2 $ tc qdisc add dev v1 ingress_block 10 egress_block 20 clsact $ tc qdisc add dev v2 ingress_block 10 egress_block 20 clsact $ tc filter add block 10 protocol ip prio 25 bpf obj bpf_shared.o sec ingress flowid 1:1 $ tc filter add block 20 protocol ip prio 30 bpf obj bpf_cyclic.o sec classifier flowid 1:1 $ bpftool net xdp: tc: v2(7) clsact/ingress bpf_shared.o:[ingress] id 23 v2(7) clsact/egress bpf_cyclic.o:[classifier] id 24 v1(8) clsact/ingress bpf_shared.o:[ingress] id 23 v1(8) clsact/egress bpf_cyclic.o:[classifier] id 24 The documentation and "bpftool net help" are updated to make it clear that current implementation only supports xdp and tc attachments. For programs attached to cgroups, "bpftool cgroup" can be used to dump attachments. For other programs e.g. sk_{filter,skb,msg,reuseport} and lwt/seg6, iproute2 tools should be used. The new output: $ bpftool net xdp: eth0(2) driver id 198 tc: eth0(2) clsact/ingress fbflow_icmp id 335 act [{icmp_action id 336}] eth0(2) clsact/egress fbflow_egress id 334 $ bpftool -jp net [{ "xdp": [{ "devname": "eth0", "ifindex": 2, "mode": "driver", "id": 198 } ], "tc": [{ "devname": "eth0", "ifindex": 2, "kind": "clsact/ingress", "name": "fbflow_icmp", "id": 335, "act": [{ "name": "icmp_action", "id": 336 } ] },{ "devname": "eth0", "ifindex": 2, "kind": "clsact/egress", "name": "fbflow_egress", "id": 334 } ] } ] Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann Signed-off-by: zhaoxuezhi --- .../bpf/bpftool/Documentation/bpftool-net.rst | 78 +++++++------ tools/bpf/bpftool/main.h | 3 +- tools/bpf/bpftool/net.c | 103 ++++++++++++------ tools/bpf/bpftool/netlink_dumper.c | 85 +++++++-------- tools/bpf/bpftool/netlink_dumper.h | 22 ++-- 5 files changed, 161 insertions(+), 130 deletions(-) diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst index 48a61837a264..408ec30d8872 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-net.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst @@ -26,9 +26,20 @@ NET COMMANDS DESCRIPTION =========== **bpftool net { show | list } [ dev name ]** - List all networking device driver and tc attachment in the system. - - Output will start with all xdp program attachment, followed by + List bpf program attachments in the kernel networking subsystem. + + Currently, only device driver xdp attachments and tc filter + classification/action attachments are implemented, i.e., for + program types **BPF_PROG_TYPE_SCHED_CLS**, + **BPF_PROG_TYPE_SCHED_ACT** and **BPF_PROG_TYPE_XDP**. + For programs attached to a particular cgroup, e.g., + **BPF_PROG_TYPE_CGROUP_SKB**, **BPF_PROG_TYPE_CGROUP_SOCK**, + **BPF_PROG_TYPE_SOCK_OPS** and **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, + users can use **bpftool cgroup** to dump cgroup attachments. + For sk_{filter, skb, msg, reuseport} and lwt/seg6 + bpf programs, users should consult other tools, e.g., iproute2. + + The current output will start with all xdp program attachments, followed by all tc class/qdisc bpf program attachments. Both xdp programs and tc programs are ordered based on ifindex number. If multiple bpf programs attached to the same networking device through **tc filter**, @@ -61,21 +72,15 @@ EXAMPLES :: - xdp [ - ifindex 2 devname eth0 prog_id 198 - ] - tc_filters [ - ifindex 2 kind qdisc_htb name prefix_matcher.o:[cls_prefix_matcher_htb] - prog_id 111727 tag d08fe3b4319bc2fd act [] - ifindex 2 kind qdisc_clsact_ingress name fbflow_icmp - prog_id 130246 tag 3f265c7f26db62c9 act [] - ifindex 2 kind qdisc_clsact_egress name prefix_matcher.o:[cls_prefix_matcher_clsact] - prog_id 111726 tag 99a197826974c876 - ifindex 2 kind qdisc_clsact_egress name cls_fg_dscp - prog_id 108619 tag dc4630674fd72dcc act [] - ifindex 2 kind qdisc_clsact_egress name fbflow_egress - prog_id 130245 tag 72d2d830d6888d2c - ] + xdp: + eth0(2) driver id 198 + + tc: + eth0(2) htb name prefix_matcher.o:[cls_prefix_matcher_htb] id 111727 act [] + eth0(2) clsact/ingress fbflow_icmp id 130246 act [] + eth0(2) clsact/egress prefix_matcher.o:[cls_prefix_matcher_clsact] id 111726 + eth0(2) clsact/egress cls_fg_dscp id 108619 act [] + eth0(2) clsact/egress fbflow_egress id 130245 | | **# bpftool -jp net** @@ -84,44 +89,45 @@ EXAMPLES [{ "xdp": [{ - "ifindex": 2, "devname": "eth0", - "prog_id": 198 + "ifindex": 2, + "mode": "driver", + "id": 198 } ], - "tc_filters": [{ + "tc": [{ + "devname": "eth0", "ifindex": 2, - "kind": "qdisc_htb", + "kind": "htb", "name": "prefix_matcher.o:[cls_prefix_matcher_htb]", - "prog_id": 111727, - "tag": "d08fe3b4319bc2fd", + "id": 111727, "act": [] },{ + "devname": "eth0", "ifindex": 2, - "kind": "qdisc_clsact_ingress", + "kind": "clsact/ingress", "name": "fbflow_icmp", - "prog_id": 130246, - "tag": "3f265c7f26db62c9", + "id": 130246, "act": [] },{ + "devname": "eth0", "ifindex": 2, - "kind": "qdisc_clsact_egress", + "kind": "clsact/egress", "name": "prefix_matcher.o:[cls_prefix_matcher_clsact]", - "prog_id": 111726, - "tag": "99a197826974c876" + "id": 111726, },{ + "devname": "eth0", "ifindex": 2, - "kind": "qdisc_clsact_egress", + "kind": "clsact/egress", "name": "cls_fg_dscp", - "prog_id": 108619, - "tag": "dc4630674fd72dcc", + "id": 108619, "act": [] },{ + "devname": "eth0", "ifindex": 2, - "kind": "qdisc_clsact_egress", + "kind": "clsact/egress", "name": "fbflow_egress", - "prog_id": 130245, - "tag": "72d2d830d6888d2c" + "id": 130245, } ] } diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 0a9d2779635c..38413a16ab92 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -171,5 +171,6 @@ struct nlattr; struct ifinfomsg; struct tcmsg; int do_xdp_dump(struct ifinfomsg *ifinfo, struct nlattr **tb); -int do_filter_dump(struct tcmsg *ifinfo, struct nlattr **tb, const char *kind); +int do_filter_dump(struct tcmsg *ifinfo, struct nlattr **tb, const char *kind, + const char *devname, int ifindex); #endif diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index 77dd73dd9ade..ed205ee57655 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -2,6 +2,7 @@ // Copyright (C) 2018 Facebook #define _GNU_SOURCE +#include #include #include #include @@ -17,8 +18,13 @@ #include "main.h" #include "netlink_dumper.h" +struct ip_devname_ifindex { + char devname[64]; + int ifindex; +}; + struct bpf_netdev_t { - int *ifindex_array; + struct ip_devname_ifindex *devices; int used_len; int array_len; int filter_idx; @@ -36,6 +42,12 @@ struct bpf_tcinfo_t { bool is_qdisc; }; +struct bpf_filter_t { + const char *kind; + const char *devname; + int ifindex; +}; + static int dump_link_nlmsg(void *cookie, void *msg, struct nlattr **tb) { struct bpf_netdev_t *netinfo = cookie; @@ -45,11 +57,20 @@ static int dump_link_nlmsg(void *cookie, void *msg, struct nlattr **tb) return 0; if (netinfo->used_len == netinfo->array_len) { - netinfo->ifindex_array = realloc(netinfo->ifindex_array, - (netinfo->array_len + 16) * sizeof(int)); + netinfo->devices = realloc(netinfo->devices, + (netinfo->array_len + 16) * + sizeof(struct ip_devname_ifindex)); + if (!netinfo->devices) + return -ENOMEM; + netinfo->array_len += 16; } - netinfo->ifindex_array[netinfo->used_len++] = ifinfo->ifi_index; + netinfo->devices[netinfo->used_len].ifindex = ifinfo->ifi_index; + snprintf(netinfo->devices[netinfo->used_len].devname, + sizeof(netinfo->devices[netinfo->used_len].devname), + "%s", + tb[IFLA_IFNAME] ? nla_getattr_str(tb[IFLA_IFNAME]) : ""); + netinfo->used_len++; return do_xdp_dump(ifinfo, tb); } @@ -71,13 +92,15 @@ static int dump_class_qdisc_nlmsg(void *cookie, void *msg, struct nlattr **tb) if (tcinfo->used_len == tcinfo->array_len) { tcinfo->handle_array = realloc(tcinfo->handle_array, (tcinfo->array_len + 16) * sizeof(struct tc_kind_handle)); + if (!tcinfo->handle_array) + return -ENOMEM; + tcinfo->array_len += 16; } tcinfo->handle_array[tcinfo->used_len].handle = info->tcm_handle; snprintf(tcinfo->handle_array[tcinfo->used_len].kind, sizeof(tcinfo->handle_array[tcinfo->used_len].kind), - "%s_%s", - tcinfo->is_qdisc ? "qdisc" : "class", + "%s", tb[TCA_KIND] ? nla_getattr_str(tb[TCA_KIND]) : "unknown"); tcinfo->used_len++; @@ -86,60 +109,71 @@ static int dump_class_qdisc_nlmsg(void *cookie, void *msg, struct nlattr **tb) static int dump_filter_nlmsg(void *cookie, void *msg, struct nlattr **tb) { - const char *kind = cookie; + const struct bpf_filter_t *filter_info = cookie; - return do_filter_dump((struct tcmsg *)msg, tb, kind); + return do_filter_dump((struct tcmsg *)msg, tb, filter_info->kind, + filter_info->devname, filter_info->ifindex); } -static int show_dev_tc_bpf(int sock, unsigned int nl_pid, int ifindex) +static int show_dev_tc_bpf(int sock, unsigned int nl_pid, + struct ip_devname_ifindex *dev) { + struct bpf_filter_t filter_info; struct bpf_tcinfo_t tcinfo; - int i, handle, ret; + int i, handle, ret = 0; tcinfo.handle_array = NULL; tcinfo.used_len = 0; tcinfo.array_len = 0; tcinfo.is_qdisc = false; - ret = nl_get_class(sock, nl_pid, ifindex, dump_class_qdisc_nlmsg, + ret = nl_get_class(sock, nl_pid, dev->ifindex, dump_class_qdisc_nlmsg, &tcinfo); if (ret) - return ret; + goto out; tcinfo.is_qdisc = true; - ret = nl_get_qdisc(sock, nl_pid, ifindex, dump_class_qdisc_nlmsg, + ret = nl_get_qdisc(sock, nl_pid, dev->ifindex, dump_class_qdisc_nlmsg, &tcinfo); if (ret) - return ret; + goto out; + filter_info.devname = dev->devname; + filter_info.ifindex = dev->ifindex; for (i = 0; i < tcinfo.used_len; i++) { - ret = nl_get_filter(sock, nl_pid, ifindex, + filter_info.kind = tcinfo.handle_array[i].kind; + ret = nl_get_filter(sock, nl_pid, dev->ifindex, tcinfo.handle_array[i].handle, dump_filter_nlmsg, - tcinfo.handle_array[i].kind); + &filter_info); if (ret) - return ret; + goto out; } /* root, ingress and egress handle */ handle = TC_H_ROOT; - ret = nl_get_filter(sock, nl_pid, ifindex, handle, dump_filter_nlmsg, - "root"); + filter_info.kind = "root"; + ret = nl_get_filter(sock, nl_pid, dev->ifindex, handle, + dump_filter_nlmsg, &filter_info); if (ret) - return ret; + goto out; handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS); - ret = nl_get_filter(sock, nl_pid, ifindex, handle, dump_filter_nlmsg, - "qdisc_clsact_ingress"); + filter_info.kind = "clsact/ingress"; + ret = nl_get_filter(sock, nl_pid, dev->ifindex, handle, + dump_filter_nlmsg, &filter_info); if (ret) - return ret; + goto out; handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS); - ret = nl_get_filter(sock, nl_pid, ifindex, handle, dump_filter_nlmsg, - "qdisc_clsact_egress"); + filter_info.kind = "clsact/egress"; + ret = nl_get_filter(sock, nl_pid, dev->ifindex, handle, + dump_filter_nlmsg, &filter_info); if (ret) - return ret; + goto out; +out: + free(tcinfo.handle_array); return 0; } @@ -168,7 +202,7 @@ static int do_show(int argc, char **argv) return -1; } - dev_array.ifindex_array = NULL; + dev_array.devices = NULL; dev_array.used_len = 0; dev_array.array_len = 0; dev_array.filter_idx = filter_idx; @@ -176,15 +210,15 @@ static int do_show(int argc, char **argv) if (json_output) jsonw_start_array(json_wtr); NET_START_OBJECT; - NET_START_ARRAY("xdp", "\n"); + NET_START_ARRAY("xdp", "%s:\n"); ret = nl_get_link(sock, nl_pid, dump_link_nlmsg, &dev_array); NET_END_ARRAY("\n"); if (!ret) { - NET_START_ARRAY("tc_filters", "\n"); + NET_START_ARRAY("tc", "%s:\n"); for (i = 0; i < dev_array.used_len; i++) { ret = show_dev_tc_bpf(sock, nl_pid, - dev_array.ifindex_array[i]); + &dev_array.devices[i]); if (ret) break; } @@ -200,7 +234,7 @@ static int do_show(int argc, char **argv) libbpf_strerror(ret, err_buf, sizeof(err_buf)); fprintf(stderr, "Error: %s\n", err_buf); } - free(dev_array.ifindex_array); + free(dev_array.devices); close(sock); return ret; } @@ -214,7 +248,12 @@ static int do_help(int argc, char **argv) fprintf(stderr, "Usage: %s %s { show | list } [dev ]\n" - " %s %s help\n", + " %s %s help\n" + "Note: Only xdp and tc attachments are supported now.\n" + " For progs attached to cgroups, use \"bpftool cgroup\"\n" + " to dump program attachments. For program types\n" + " sk_{filter,skb,msg,reuseport} and lwt/seg6, please\n" + " consult iproute2.\n", bin_name, argv[-2], bin_name, argv[-2]); return 0; diff --git a/tools/bpf/bpftool/netlink_dumper.c b/tools/bpf/bpftool/netlink_dumper.c index e12494fd1d2e..6f5e9cc6836c 100644 --- a/tools/bpf/bpftool/netlink_dumper.c +++ b/tools/bpf/bpftool/netlink_dumper.c @@ -12,12 +12,18 @@ #include "netlink_dumper.h" static void xdp_dump_prog_id(struct nlattr **tb, int attr, - const char *type) + const char *mode, + bool new_json_object) { if (!tb[attr]) return; - NET_DUMP_UINT(type, nla_getattr_u32(tb[attr])) + if (new_json_object) + NET_START_OBJECT + NET_DUMP_STR("mode", " %s", mode); + NET_DUMP_UINT("id", " id %u", nla_getattr_u32(tb[attr])) + if (new_json_object) + NET_END_OBJECT } static int do_xdp_dump_one(struct nlattr *attr, unsigned int ifindex, @@ -37,18 +43,26 @@ static int do_xdp_dump_one(struct nlattr *attr, unsigned int ifindex, return 0; NET_START_OBJECT; - NET_DUMP_UINT("ifindex", ifindex); - if (name) - NET_DUMP_STR("devname", name); - - if (tb[IFLA_XDP_PROG_ID]) - NET_DUMP_UINT("prog_id", nla_getattr_u32(tb[IFLA_XDP_PROG_ID])); + NET_DUMP_STR("devname", "%s", name); + NET_DUMP_UINT("ifindex", "(%d)", ifindex); if (mode == XDP_ATTACHED_MULTI) { - xdp_dump_prog_id(tb, IFLA_XDP_SKB_PROG_ID, "generic_prog_id"); - xdp_dump_prog_id(tb, IFLA_XDP_DRV_PROG_ID, "drv_prog_id"); - xdp_dump_prog_id(tb, IFLA_XDP_HW_PROG_ID, "offload_prog_id"); + if (json_output) { + jsonw_name(json_wtr, "multi_attachments"); + jsonw_start_array(json_wtr); + } + xdp_dump_prog_id(tb, IFLA_XDP_SKB_PROG_ID, "generic", true); + xdp_dump_prog_id(tb, IFLA_XDP_DRV_PROG_ID, "driver", true); + xdp_dump_prog_id(tb, IFLA_XDP_HW_PROG_ID, "offload", true); + if (json_output) + jsonw_end_array(json_wtr); + } else if (mode == XDP_ATTACHED_DRV) { + xdp_dump_prog_id(tb, IFLA_XDP_PROG_ID, "driver", false); + } else if (mode == XDP_ATTACHED_SKB) { + xdp_dump_prog_id(tb, IFLA_XDP_PROG_ID, "generic", false); + } else if (mode == XDP_ATTACHED_HW) { + xdp_dump_prog_id(tb, IFLA_XDP_PROG_ID, "offload", false); } NET_END_OBJECT_FINAL; @@ -64,26 +78,9 @@ int do_xdp_dump(struct ifinfomsg *ifinfo, struct nlattr **tb) nla_getattr_str(tb[IFLA_IFNAME])); } -static char *hexstring_n2a(const unsigned char *str, int len, - char *buf, int blen) -{ - char *ptr = buf; - int i; - - for (i = 0; i < len; i++) { - if (blen < 3) - break; - sprintf(ptr, "%02x", str[i]); - ptr += 2; - blen -= 2; - } - return buf; -} - static int do_bpf_dump_one_act(struct nlattr *attr) { struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; - char buf[256]; if (nla_parse_nested(tb, TCA_ACT_BPF_MAX, attr, NULL) < 0) return -LIBBPF_ERRNO__NLPARSE; @@ -93,13 +90,11 @@ static int do_bpf_dump_one_act(struct nlattr *attr) NET_START_OBJECT_NESTED2; if (tb[TCA_ACT_BPF_NAME]) - NET_DUMP_STR("name", nla_getattr_str(tb[TCA_ACT_BPF_NAME])); + NET_DUMP_STR("name", "%s", + nla_getattr_str(tb[TCA_ACT_BPF_NAME])); if (tb[TCA_ACT_BPF_ID]) - NET_DUMP_UINT("bpf_id", nla_getattr_u32(tb[TCA_ACT_BPF_ID])); - if (tb[TCA_ACT_BPF_TAG]) - NET_DUMP_STR("tag", hexstring_n2a(nla_data(tb[TCA_ACT_BPF_TAG]), - nla_len(tb[TCA_ACT_BPF_TAG]), - buf, sizeof(buf))); + NET_DUMP_UINT("id", " id %u", + nla_getattr_u32(tb[TCA_ACT_BPF_ID])); NET_END_OBJECT_NESTED; return 0; } @@ -128,13 +123,13 @@ static int do_bpf_act_dump(struct nlattr *attr) if (nla_parse_nested(tb, TCA_ACT_MAX_PRIO, attr, NULL) < 0) return -LIBBPF_ERRNO__NLPARSE; - NET_START_ARRAY("act", ""); + NET_START_ARRAY("act", " %s ["); for (act = 0; act <= TCA_ACT_MAX_PRIO; act++) { ret = do_dump_one_act(tb[act]); if (ret) break; } - NET_END_ARRAY(" "); + NET_END_ARRAY("] "); return ret; } @@ -142,20 +137,15 @@ static int do_bpf_act_dump(struct nlattr *attr) static int do_bpf_filter_dump(struct nlattr *attr) { struct nlattr *tb[TCA_BPF_MAX + 1]; - char buf[256]; int ret; if (nla_parse_nested(tb, TCA_BPF_MAX, attr, NULL) < 0) return -LIBBPF_ERRNO__NLPARSE; if (tb[TCA_BPF_NAME]) - NET_DUMP_STR("name", nla_getattr_str(tb[TCA_BPF_NAME])); + NET_DUMP_STR("name", " %s", nla_getattr_str(tb[TCA_BPF_NAME])); if (tb[TCA_BPF_ID]) - NET_DUMP_UINT("prog_id", nla_getattr_u32(tb[TCA_BPF_ID])); - if (tb[TCA_BPF_TAG]) - NET_DUMP_STR("tag", hexstring_n2a(nla_data(tb[TCA_BPF_TAG]), - nla_len(tb[TCA_BPF_TAG]), - buf, sizeof(buf))); + NET_DUMP_UINT("id", " id %u", nla_getattr_u32(tb[TCA_BPF_ID])); if (tb[TCA_BPF_ACT]) { ret = do_bpf_act_dump(tb[TCA_BPF_ACT]); if (ret) @@ -165,14 +155,17 @@ static int do_bpf_filter_dump(struct nlattr *attr) return 0; } -int do_filter_dump(struct tcmsg *info, struct nlattr **tb, const char *kind) +int do_filter_dump(struct tcmsg *info, struct nlattr **tb, const char *kind, + const char *devname, int ifindex) { int ret = 0; if (tb[TCA_OPTIONS] && strcmp(nla_data(tb[TCA_KIND]), "bpf") == 0) { NET_START_OBJECT; - NET_DUMP_UINT("ifindex", info->tcm_ifindex); - NET_DUMP_STR("kind", kind); + if (devname[0] != '\0') + NET_DUMP_STR("devname", "%s", devname); + NET_DUMP_UINT("ifindex", "(%u)", ifindex); + NET_DUMP_STR("kind", " %s", kind); ret = do_bpf_filter_dump(tb[TCA_OPTIONS]); NET_END_OBJECT_FINAL; } diff --git a/tools/bpf/bpftool/netlink_dumper.h b/tools/bpf/bpftool/netlink_dumper.h index 552d8851ac06..0788cfbbed0e 100644 --- a/tools/bpf/bpftool/netlink_dumper.h +++ b/tools/bpf/bpftool/netlink_dumper.h @@ -50,13 +50,13 @@ fprintf(stderr, "\n"); \ } -#define NET_START_ARRAY(name, newline) \ +#define NET_START_ARRAY(name, fmt_str) \ { \ if (json_output) { \ jsonw_name(json_wtr, name); \ jsonw_start_array(json_wtr); \ } else { \ - fprintf(stderr, "%s [%s", name, newline);\ + fprintf(stderr, fmt_str, name); \ } \ } @@ -65,31 +65,23 @@ if (json_output) \ jsonw_end_array(json_wtr); \ else \ - fprintf(stderr, "]%s", endstr); \ + fprintf(stderr, "%s", endstr); \ } -#define NET_DUMP_UINT(name, val) \ +#define NET_DUMP_UINT(name, fmt_str, val) \ { \ if (json_output) \ jsonw_uint_field(json_wtr, name, val); \ else \ - fprintf(stderr, "%s %d ", name, val); \ + fprintf(stderr, fmt_str, val); \ } -#define NET_DUMP_LLUINT(name, val) \ -{ \ - if (json_output) \ - jsonw_lluint_field(json_wtr, name, val);\ - else \ - fprintf(stderr, "%s %lld ", name, val); \ -} - -#define NET_DUMP_STR(name, str) \ +#define NET_DUMP_STR(name, fmt_str, str) \ { \ if (json_output) \ jsonw_string_field(json_wtr, name, str);\ else \ - fprintf(stderr, "%s %s ", name, str); \ + fprintf(stderr, fmt_str, str); \ } #define NET_DUMP_STR_ONLY(str) \ -- Gitee From 28922e3005fb85878363e0c2543fee3632d348ef Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 2 Jun 2023 16:41:26 +0800 Subject: [PATCH 07/12] bpftool: Fix bpftool net output ANBZ: #5530 commit 53d6eb08e9f185834231d5c32499b10310cce3aa upstream Print `bpftool net` output to stdout instead of stderr. Only errors should be printed to stderr. Regular output should go to stdout and this is what all other subcommands of bpftool do, including --json and --pretty formats of `bpftool net` itself. Fixes: commit f6f3bac08ff9 ("tools/bpf: bpftool: add net support") Signed-off-by: Andrey Ignatov Acked-by: Yonghong Song Acked-by: Song Liu Signed-off-by: Daniel Borkmann Signed-off-by: zhaoxuezhi --- tools/bpf/bpftool/netlink_dumper.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/bpf/bpftool/netlink_dumper.h b/tools/bpf/bpftool/netlink_dumper.h index 0788cfbbed0e..e3516b586a34 100644 --- a/tools/bpf/bpftool/netlink_dumper.h +++ b/tools/bpf/bpftool/netlink_dumper.h @@ -16,7 +16,7 @@ jsonw_name(json_wtr, name); \ jsonw_start_object(json_wtr); \ } else { \ - fprintf(stderr, "%s {", name); \ + fprintf(stdout, "%s {", name); \ } \ } @@ -25,7 +25,7 @@ if (json_output) \ jsonw_start_object(json_wtr); \ else \ - fprintf(stderr, "{"); \ + fprintf(stdout, "{"); \ } #define NET_END_OBJECT_NESTED \ @@ -33,7 +33,7 @@ if (json_output) \ jsonw_end_object(json_wtr); \ else \ - fprintf(stderr, "}"); \ + fprintf(stdout, "}"); \ } #define NET_END_OBJECT \ @@ -47,7 +47,7 @@ if (json_output) \ jsonw_end_object(json_wtr); \ else \ - fprintf(stderr, "\n"); \ + fprintf(stdout, "\n"); \ } #define NET_START_ARRAY(name, fmt_str) \ @@ -56,7 +56,7 @@ jsonw_name(json_wtr, name); \ jsonw_start_array(json_wtr); \ } else { \ - fprintf(stderr, fmt_str, name); \ + fprintf(stdout, fmt_str, name); \ } \ } @@ -65,7 +65,7 @@ if (json_output) \ jsonw_end_array(json_wtr); \ else \ - fprintf(stderr, "%s", endstr); \ + fprintf(stdout, "%s", endstr); \ } #define NET_DUMP_UINT(name, fmt_str, val) \ @@ -73,7 +73,7 @@ if (json_output) \ jsonw_uint_field(json_wtr, name, val); \ else \ - fprintf(stderr, fmt_str, val); \ + fprintf(stdout, fmt_str, val); \ } #define NET_DUMP_STR(name, fmt_str, str) \ @@ -81,7 +81,7 @@ if (json_output) \ jsonw_string_field(json_wtr, name, str);\ else \ - fprintf(stderr, fmt_str, str); \ + fprintf(stdout, fmt_str, str); \ } #define NET_DUMP_STR_ONLY(str) \ @@ -89,7 +89,7 @@ if (json_output) \ jsonw_string(json_wtr, str); \ else \ - fprintf(stderr, "%s ", str); \ + fprintf(stdout, "%s ", str); \ } #endif -- Gitee From 4e6879dc2a580f3817304a5c62fba1436b5e5c43 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 5 Jul 2023 14:11:36 +0800 Subject: [PATCH 08/12] tools/bpftool: copy a few net uapi headers to tools directory ANBZ: #5530 commit 49a249c387268882e8393dd1fafc51e3b21cb7a2 upstream Commit f6f3bac08ff9 ("tools/bpf: bpftool: add net support") added certain networking support to bpftool. The implementation relies on a relatively recent uapi header file linux/tc_act/tc_bpf.h on the host which contains the marco definition of TCA_ACT_BPF_ID. Unfortunately, this is not the case for all distributions. See the email message below where rhel-7.2 does not have an up-to-date linux/tc_act/tc_bpf.h. https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1799211.html Further investigation found that linux/pkt_cls.h is also needed for macro TCA_BPF_TAG. This patch fixed the issue by copying linux/tc_act/tc_bpf.h and linux/pkt_cls.h from kernel include/uapi directory to tools/include/uapi directory so building the bpftool does not depend on host system for these files. Fixes: f6f3bac08ff9 ("tools/bpf: bpftool: add net support") Reported-by: kernel test robot Cc: Li Zhijian Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann Signed-off-by: zhaoxuezhi --- tools/include/uapi/linux/pkt_cls.h | 612 +++++++++++++++++++++++ tools/include/uapi/linux/tc_act/tc_bpf.h | 37 ++ 2 files changed, 649 insertions(+) create mode 100644 tools/include/uapi/linux/pkt_cls.h create mode 100644 tools/include/uapi/linux/tc_act/tc_bpf.h diff --git a/tools/include/uapi/linux/pkt_cls.h b/tools/include/uapi/linux/pkt_cls.h new file mode 100644 index 000000000000..682cda46aa1b --- /dev/null +++ b/tools/include/uapi/linux/pkt_cls.h @@ -0,0 +1,612 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LINUX_PKT_CLS_H +#define __LINUX_PKT_CLS_H + +#include +#include + +#define TC_COOKIE_MAX_SIZE 16 + +/* Action attributes */ +enum { + TCA_ACT_UNSPEC, + TCA_ACT_KIND, + TCA_ACT_OPTIONS, + TCA_ACT_INDEX, + TCA_ACT_STATS, + TCA_ACT_PAD, + TCA_ACT_COOKIE, + __TCA_ACT_MAX +}; + +#define TCA_ACT_MAX __TCA_ACT_MAX +#define TCA_OLD_COMPAT (TCA_ACT_MAX+1) +#define TCA_ACT_MAX_PRIO 32 +#define TCA_ACT_BIND 1 +#define TCA_ACT_NOBIND 0 +#define TCA_ACT_UNBIND 1 +#define TCA_ACT_NOUNBIND 0 +#define TCA_ACT_REPLACE 1 +#define TCA_ACT_NOREPLACE 0 + +#define TC_ACT_UNSPEC (-1) +#define TC_ACT_OK 0 +#define TC_ACT_RECLASSIFY 1 +#define TC_ACT_SHOT 2 +#define TC_ACT_PIPE 3 +#define TC_ACT_STOLEN 4 +#define TC_ACT_QUEUED 5 +#define TC_ACT_REPEAT 6 +#define TC_ACT_REDIRECT 7 +#define TC_ACT_TRAP 8 /* For hw path, this means "trap to cpu" + * and don't further process the frame + * in hardware. For sw path, this is + * equivalent of TC_ACT_STOLEN - drop + * the skb and act like everything + * is alright. + */ +#define TC_ACT_VALUE_MAX TC_ACT_TRAP + +/* There is a special kind of actions called "extended actions", + * which need a value parameter. These have a local opcode located in + * the highest nibble, starting from 1. The rest of the bits + * are used to carry the value. These two parts together make + * a combined opcode. + */ +#define __TC_ACT_EXT_SHIFT 28 +#define __TC_ACT_EXT(local) ((local) << __TC_ACT_EXT_SHIFT) +#define TC_ACT_EXT_VAL_MASK ((1 << __TC_ACT_EXT_SHIFT) - 1) +#define TC_ACT_EXT_OPCODE(combined) ((combined) & (~TC_ACT_EXT_VAL_MASK)) +#define TC_ACT_EXT_CMP(combined, opcode) (TC_ACT_EXT_OPCODE(combined) == opcode) + +#define TC_ACT_JUMP __TC_ACT_EXT(1) +#define TC_ACT_GOTO_CHAIN __TC_ACT_EXT(2) +#define TC_ACT_EXT_OPCODE_MAX TC_ACT_GOTO_CHAIN + +/* Action type identifiers*/ +enum { + TCA_ID_UNSPEC=0, + TCA_ID_POLICE=1, + /* other actions go here */ + __TCA_ID_MAX=255 +}; + +#define TCA_ID_MAX __TCA_ID_MAX + +struct tc_police { + __u32 index; + int action; +#define TC_POLICE_UNSPEC TC_ACT_UNSPEC +#define TC_POLICE_OK TC_ACT_OK +#define TC_POLICE_RECLASSIFY TC_ACT_RECLASSIFY +#define TC_POLICE_SHOT TC_ACT_SHOT +#define TC_POLICE_PIPE TC_ACT_PIPE + + __u32 limit; + __u32 burst; + __u32 mtu; + struct tc_ratespec rate; + struct tc_ratespec peakrate; + int refcnt; + int bindcnt; + __u32 capab; +}; + +struct tcf_t { + __u64 install; + __u64 lastuse; + __u64 expires; + __u64 firstuse; +}; + +struct tc_cnt { + int refcnt; + int bindcnt; +}; + +#define tc_gen \ + __u32 index; \ + __u32 capab; \ + int action; \ + int refcnt; \ + int bindcnt + +enum { + TCA_POLICE_UNSPEC, + TCA_POLICE_TBF, + TCA_POLICE_RATE, + TCA_POLICE_PEAKRATE, + TCA_POLICE_AVRATE, + TCA_POLICE_RESULT, + TCA_POLICE_TM, + TCA_POLICE_PAD, + __TCA_POLICE_MAX +#define TCA_POLICE_RESULT TCA_POLICE_RESULT +}; + +#define TCA_POLICE_MAX (__TCA_POLICE_MAX - 1) + +/* tca flags definitions */ +#define TCA_CLS_FLAGS_SKIP_HW (1 << 0) /* don't offload filter to HW */ +#define TCA_CLS_FLAGS_SKIP_SW (1 << 1) /* don't use filter in SW */ +#define TCA_CLS_FLAGS_IN_HW (1 << 2) /* filter is offloaded to HW */ +#define TCA_CLS_FLAGS_NOT_IN_HW (1 << 3) /* filter isn't offloaded to HW */ +#define TCA_CLS_FLAGS_VERBOSE (1 << 4) /* verbose logging */ + +/* U32 filters */ + +#define TC_U32_HTID(h) ((h)&0xFFF00000) +#define TC_U32_USERHTID(h) (TC_U32_HTID(h)>>20) +#define TC_U32_HASH(h) (((h)>>12)&0xFF) +#define TC_U32_NODE(h) ((h)&0xFFF) +#define TC_U32_KEY(h) ((h)&0xFFFFF) +#define TC_U32_UNSPEC 0 +#define TC_U32_ROOT (0xFFF00000) + +enum { + TCA_U32_UNSPEC, + TCA_U32_CLASSID, + TCA_U32_HASH, + TCA_U32_LINK, + TCA_U32_DIVISOR, + TCA_U32_SEL, + TCA_U32_POLICE, + TCA_U32_ACT, + TCA_U32_INDEV, + TCA_U32_PCNT, + TCA_U32_MARK, + TCA_U32_FLAGS, + TCA_U32_PAD, + __TCA_U32_MAX +}; + +#define TCA_U32_MAX (__TCA_U32_MAX - 1) + +struct tc_u32_key { + __be32 mask; + __be32 val; + int off; + int offmask; +}; + +struct tc_u32_sel { + unsigned char flags; + unsigned char offshift; + unsigned char nkeys; + + __be16 offmask; + __u16 off; + short offoff; + + short hoff; + __be32 hmask; + struct tc_u32_key keys[0]; +}; + +struct tc_u32_mark { + __u32 val; + __u32 mask; + __u32 success; +}; + +struct tc_u32_pcnt { + __u64 rcnt; + __u64 rhit; + __u64 kcnts[0]; +}; + +/* Flags */ + +#define TC_U32_TERMINAL 1 +#define TC_U32_OFFSET 2 +#define TC_U32_VAROFFSET 4 +#define TC_U32_EAT 8 + +#define TC_U32_MAXDEPTH 8 + + +/* RSVP filter */ + +enum { + TCA_RSVP_UNSPEC, + TCA_RSVP_CLASSID, + TCA_RSVP_DST, + TCA_RSVP_SRC, + TCA_RSVP_PINFO, + TCA_RSVP_POLICE, + TCA_RSVP_ACT, + __TCA_RSVP_MAX +}; + +#define TCA_RSVP_MAX (__TCA_RSVP_MAX - 1 ) + +struct tc_rsvp_gpi { + __u32 key; + __u32 mask; + int offset; +}; + +struct tc_rsvp_pinfo { + struct tc_rsvp_gpi dpi; + struct tc_rsvp_gpi spi; + __u8 protocol; + __u8 tunnelid; + __u8 tunnelhdr; + __u8 pad; +}; + +/* ROUTE filter */ + +enum { + TCA_ROUTE4_UNSPEC, + TCA_ROUTE4_CLASSID, + TCA_ROUTE4_TO, + TCA_ROUTE4_FROM, + TCA_ROUTE4_IIF, + TCA_ROUTE4_POLICE, + TCA_ROUTE4_ACT, + __TCA_ROUTE4_MAX +}; + +#define TCA_ROUTE4_MAX (__TCA_ROUTE4_MAX - 1) + + +/* FW filter */ + +enum { + TCA_FW_UNSPEC, + TCA_FW_CLASSID, + TCA_FW_POLICE, + TCA_FW_INDEV, /* used by CONFIG_NET_CLS_IND */ + TCA_FW_ACT, /* used by CONFIG_NET_CLS_ACT */ + TCA_FW_MASK, + __TCA_FW_MAX +}; + +#define TCA_FW_MAX (__TCA_FW_MAX - 1) + +/* TC index filter */ + +enum { + TCA_TCINDEX_UNSPEC, + TCA_TCINDEX_HASH, + TCA_TCINDEX_MASK, + TCA_TCINDEX_SHIFT, + TCA_TCINDEX_FALL_THROUGH, + TCA_TCINDEX_CLASSID, + TCA_TCINDEX_POLICE, + TCA_TCINDEX_ACT, + __TCA_TCINDEX_MAX +}; + +#define TCA_TCINDEX_MAX (__TCA_TCINDEX_MAX - 1) + +/* Flow filter */ + +enum { + FLOW_KEY_SRC, + FLOW_KEY_DST, + FLOW_KEY_PROTO, + FLOW_KEY_PROTO_SRC, + FLOW_KEY_PROTO_DST, + FLOW_KEY_IIF, + FLOW_KEY_PRIORITY, + FLOW_KEY_MARK, + FLOW_KEY_NFCT, + FLOW_KEY_NFCT_SRC, + FLOW_KEY_NFCT_DST, + FLOW_KEY_NFCT_PROTO_SRC, + FLOW_KEY_NFCT_PROTO_DST, + FLOW_KEY_RTCLASSID, + FLOW_KEY_SKUID, + FLOW_KEY_SKGID, + FLOW_KEY_VLAN_TAG, + FLOW_KEY_RXHASH, + __FLOW_KEY_MAX, +}; + +#define FLOW_KEY_MAX (__FLOW_KEY_MAX - 1) + +enum { + FLOW_MODE_MAP, + FLOW_MODE_HASH, +}; + +enum { + TCA_FLOW_UNSPEC, + TCA_FLOW_KEYS, + TCA_FLOW_MODE, + TCA_FLOW_BASECLASS, + TCA_FLOW_RSHIFT, + TCA_FLOW_ADDEND, + TCA_FLOW_MASK, + TCA_FLOW_XOR, + TCA_FLOW_DIVISOR, + TCA_FLOW_ACT, + TCA_FLOW_POLICE, + TCA_FLOW_EMATCHES, + TCA_FLOW_PERTURB, + __TCA_FLOW_MAX +}; + +#define TCA_FLOW_MAX (__TCA_FLOW_MAX - 1) + +/* Basic filter */ + +enum { + TCA_BASIC_UNSPEC, + TCA_BASIC_CLASSID, + TCA_BASIC_EMATCHES, + TCA_BASIC_ACT, + TCA_BASIC_POLICE, + __TCA_BASIC_MAX +}; + +#define TCA_BASIC_MAX (__TCA_BASIC_MAX - 1) + + +/* Cgroup classifier */ + +enum { + TCA_CGROUP_UNSPEC, + TCA_CGROUP_ACT, + TCA_CGROUP_POLICE, + TCA_CGROUP_EMATCHES, + __TCA_CGROUP_MAX, +}; + +#define TCA_CGROUP_MAX (__TCA_CGROUP_MAX - 1) + +/* BPF classifier */ + +#define TCA_BPF_FLAG_ACT_DIRECT (1 << 0) + +enum { + TCA_BPF_UNSPEC, + TCA_BPF_ACT, + TCA_BPF_POLICE, + TCA_BPF_CLASSID, + TCA_BPF_OPS_LEN, + TCA_BPF_OPS, + TCA_BPF_FD, + TCA_BPF_NAME, + TCA_BPF_FLAGS, + TCA_BPF_FLAGS_GEN, + TCA_BPF_TAG, + TCA_BPF_ID, + __TCA_BPF_MAX, +}; + +#define TCA_BPF_MAX (__TCA_BPF_MAX - 1) + +/* Flower classifier */ + +enum { + TCA_FLOWER_UNSPEC, + TCA_FLOWER_CLASSID, + TCA_FLOWER_INDEV, + TCA_FLOWER_ACT, + TCA_FLOWER_KEY_ETH_DST, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_DST_MASK, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_SRC, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_SRC_MASK, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_TYPE, /* be16 */ + TCA_FLOWER_KEY_IP_PROTO, /* u8 */ + TCA_FLOWER_KEY_IPV4_SRC, /* be32 */ + TCA_FLOWER_KEY_IPV4_SRC_MASK, /* be32 */ + TCA_FLOWER_KEY_IPV4_DST, /* be32 */ + TCA_FLOWER_KEY_IPV4_DST_MASK, /* be32 */ + TCA_FLOWER_KEY_IPV6_SRC, /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_SRC_MASK, /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_DST, /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_DST_MASK, /* struct in6_addr */ + TCA_FLOWER_KEY_TCP_SRC, /* be16 */ + TCA_FLOWER_KEY_TCP_DST, /* be16 */ + TCA_FLOWER_KEY_UDP_SRC, /* be16 */ + TCA_FLOWER_KEY_UDP_DST, /* be16 */ + + TCA_FLOWER_FLAGS, + TCA_FLOWER_KEY_VLAN_ID, /* be16 */ + TCA_FLOWER_KEY_VLAN_PRIO, /* u8 */ + TCA_FLOWER_KEY_VLAN_ETH_TYPE, /* be16 */ + + TCA_FLOWER_KEY_ENC_KEY_ID, /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_SRC, /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,/* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_DST, /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,/* be32 */ + TCA_FLOWER_KEY_ENC_IPV6_SRC, /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,/* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_DST, /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,/* struct in6_addr */ + + TCA_FLOWER_KEY_TCP_SRC_MASK, /* be16 */ + TCA_FLOWER_KEY_TCP_DST_MASK, /* be16 */ + TCA_FLOWER_KEY_UDP_SRC_MASK, /* be16 */ + TCA_FLOWER_KEY_UDP_DST_MASK, /* be16 */ + TCA_FLOWER_KEY_SCTP_SRC_MASK, /* be16 */ + TCA_FLOWER_KEY_SCTP_DST_MASK, /* be16 */ + + TCA_FLOWER_KEY_SCTP_SRC, /* be16 */ + TCA_FLOWER_KEY_SCTP_DST, /* be16 */ + + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, /* be16 */ + + TCA_FLOWER_KEY_FLAGS, /* be32 */ + TCA_FLOWER_KEY_FLAGS_MASK, /* be32 */ + + TCA_FLOWER_KEY_ICMPV4_CODE, /* u8 */ + TCA_FLOWER_KEY_ICMPV4_CODE_MASK,/* u8 */ + TCA_FLOWER_KEY_ICMPV4_TYPE, /* u8 */ + TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,/* u8 */ + TCA_FLOWER_KEY_ICMPV6_CODE, /* u8 */ + TCA_FLOWER_KEY_ICMPV6_CODE_MASK,/* u8 */ + TCA_FLOWER_KEY_ICMPV6_TYPE, /* u8 */ + TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,/* u8 */ + + TCA_FLOWER_KEY_ARP_SIP, /* be32 */ + TCA_FLOWER_KEY_ARP_SIP_MASK, /* be32 */ + TCA_FLOWER_KEY_ARP_TIP, /* be32 */ + TCA_FLOWER_KEY_ARP_TIP_MASK, /* be32 */ + TCA_FLOWER_KEY_ARP_OP, /* u8 */ + TCA_FLOWER_KEY_ARP_OP_MASK, /* u8 */ + TCA_FLOWER_KEY_ARP_SHA, /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_SHA_MASK, /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_THA, /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_THA_MASK, /* ETH_ALEN */ + + TCA_FLOWER_KEY_MPLS_TTL, /* u8 - 8 bits */ + TCA_FLOWER_KEY_MPLS_BOS, /* u8 - 1 bit */ + TCA_FLOWER_KEY_MPLS_TC, /* u8 - 3 bits */ + TCA_FLOWER_KEY_MPLS_LABEL, /* be32 - 20 bits */ + + TCA_FLOWER_KEY_TCP_FLAGS, /* be16 */ + TCA_FLOWER_KEY_TCP_FLAGS_MASK, /* be16 */ + + TCA_FLOWER_KEY_IP_TOS, /* u8 */ + TCA_FLOWER_KEY_IP_TOS_MASK, /* u8 */ + TCA_FLOWER_KEY_IP_TTL, /* u8 */ + TCA_FLOWER_KEY_IP_TTL_MASK, /* u8 */ + + TCA_FLOWER_KEY_CVLAN_ID, /* be16 */ + TCA_FLOWER_KEY_CVLAN_PRIO, /* u8 */ + TCA_FLOWER_KEY_CVLAN_ETH_TYPE, /* be16 */ + + TCA_FLOWER_KEY_ENC_IP_TOS, /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TOS_MASK, /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TTL, /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TTL_MASK, /* u8 */ + + TCA_FLOWER_KEY_ENC_OPTS, + TCA_FLOWER_KEY_ENC_OPTS_MASK, + + TCA_FLOWER_IN_HW_COUNT, + + __TCA_FLOWER_MAX, +}; + +#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1) + +enum { + TCA_FLOWER_KEY_ENC_OPTS_UNSPEC, + TCA_FLOWER_KEY_ENC_OPTS_GENEVE, /* Nested + * TCA_FLOWER_KEY_ENC_OPT_GENEVE_ + * attributes + */ + __TCA_FLOWER_KEY_ENC_OPTS_MAX, +}; + +#define TCA_FLOWER_KEY_ENC_OPTS_MAX (__TCA_FLOWER_KEY_ENC_OPTS_MAX - 1) + +enum { + TCA_FLOWER_KEY_ENC_OPT_GENEVE_UNSPEC, + TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS, /* u16 */ + TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE, /* u8 */ + TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA, /* 4 to 128 bytes */ + + __TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX, +}; + +#define TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX \ + (__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX - 1) + +enum { + TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0), + TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1), +}; + +/* Match-all classifier */ + +enum { + TCA_MATCHALL_UNSPEC, + TCA_MATCHALL_CLASSID, + TCA_MATCHALL_ACT, + TCA_MATCHALL_FLAGS, + __TCA_MATCHALL_MAX, +}; + +#define TCA_MATCHALL_MAX (__TCA_MATCHALL_MAX - 1) + +/* Extended Matches */ + +struct tcf_ematch_tree_hdr { + __u16 nmatches; + __u16 progid; +}; + +enum { + TCA_EMATCH_TREE_UNSPEC, + TCA_EMATCH_TREE_HDR, + TCA_EMATCH_TREE_LIST, + __TCA_EMATCH_TREE_MAX +}; +#define TCA_EMATCH_TREE_MAX (__TCA_EMATCH_TREE_MAX - 1) + +struct tcf_ematch_hdr { + __u16 matchid; + __u16 kind; + __u16 flags; + __u16 pad; /* currently unused */ +}; + +/* 0 1 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + * +-----------------------+-+-+---+ + * | Unused |S|I| R | + * +-----------------------+-+-+---+ + * + * R(2) ::= relation to next ematch + * where: 0 0 END (last ematch) + * 0 1 AND + * 1 0 OR + * 1 1 Unused (invalid) + * I(1) ::= invert result + * S(1) ::= simple payload + */ +#define TCF_EM_REL_END 0 +#define TCF_EM_REL_AND (1<<0) +#define TCF_EM_REL_OR (1<<1) +#define TCF_EM_INVERT (1<<2) +#define TCF_EM_SIMPLE (1<<3) + +#define TCF_EM_REL_MASK 3 +#define TCF_EM_REL_VALID(v) (((v) & TCF_EM_REL_MASK) != TCF_EM_REL_MASK) + +enum { + TCF_LAYER_LINK, + TCF_LAYER_NETWORK, + TCF_LAYER_TRANSPORT, + __TCF_LAYER_MAX +}; +#define TCF_LAYER_MAX (__TCF_LAYER_MAX - 1) + +/* Ematch type assignments + * 1..32767 Reserved for ematches inside kernel tree + * 32768..65535 Free to use, not reliable + */ +#define TCF_EM_CONTAINER 0 +#define TCF_EM_CMP 1 +#define TCF_EM_NBYTE 2 +#define TCF_EM_U32 3 +#define TCF_EM_META 4 +#define TCF_EM_TEXT 5 +#define TCF_EM_VLAN 6 +#define TCF_EM_CANID 7 +#define TCF_EM_IPSET 8 +#define TCF_EM_IPT 9 +#define TCF_EM_MAX 9 + +enum { + TCF_EM_PROG_TC +}; + +enum { + TCF_EM_OPND_EQ, + TCF_EM_OPND_GT, + TCF_EM_OPND_LT +}; + +#endif diff --git a/tools/include/uapi/linux/tc_act/tc_bpf.h b/tools/include/uapi/linux/tc_act/tc_bpf.h new file mode 100644 index 000000000000..6e89a5df49a4 --- /dev/null +++ b/tools/include/uapi/linux/tc_act/tc_bpf.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * Copyright (c) 2015 Jiri Pirko + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __LINUX_TC_BPF_H +#define __LINUX_TC_BPF_H + +#include + +#define TCA_ACT_BPF 13 + +struct tc_act_bpf { + tc_gen; +}; + +enum { + TCA_ACT_BPF_UNSPEC, + TCA_ACT_BPF_TM, + TCA_ACT_BPF_PARMS, + TCA_ACT_BPF_OPS_LEN, + TCA_ACT_BPF_OPS, + TCA_ACT_BPF_FD, + TCA_ACT_BPF_NAME, + TCA_ACT_BPF_PAD, + TCA_ACT_BPF_TAG, + TCA_ACT_BPF_ID, + __TCA_ACT_BPF_MAX, +}; +#define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1) + +#endif -- Gitee From d5cae250ab947930330ec184f5d750d4fefb3a33 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 5 Jul 2023 14:24:03 +0800 Subject: [PATCH 09/12] bpf: pull in pkt_sched.h header for tooling to fix bpftool build ANBZ: #5530 commit ad6dd7a9c47ba587c0aba57f73737764cd31136f upstream Dan reported that bpftool does not compile for him: $ make tools/bpf DESCEND bpf Auto-detecting system features: .. libbfd: [ on ] .. disassembler-four-args: [ OFF ] DESCEND bpftool Auto-detecting system features: .. libbfd: [ on ] .. disassembler-four-args: [ OFF ] CC /opt/linux.git/tools/bpf/bpftool/net.o In file included from /opt/linux.git/tools/include/uapi/linux/pkt_cls.h:6:0, from /opt/linux.git/tools/include/uapi/linux/tc_act/tc_bpf.h:14, from net.c:13: net.c: In function 'show_dev_tc_bpf': net.c:164:21: error: 'TC_H_CLSACT' undeclared (first use in this function) handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS); [...] Fix it by importing pkt_sched.h header copy into tooling infrastructure. Fixes: 49a249c38726 ("tools/bpftool: copy a few net uapi headers to tools directory") Fixes: f6f3bac08ff9 ("tools/bpf: bpftool: add net support") Reported-by: Dan Gilson Reference: https://bugzilla.kernel.org/show_bug.cgi?id=202315 Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Signed-off-by: zhaoxuezhi --- tools/include/uapi/linux/pkt_sched.h | 1163 ++++++++++++++++++++++++++ 1 file changed, 1163 insertions(+) create mode 100644 tools/include/uapi/linux/pkt_sched.h diff --git a/tools/include/uapi/linux/pkt_sched.h b/tools/include/uapi/linux/pkt_sched.h new file mode 100644 index 000000000000..25060205bf3f --- /dev/null +++ b/tools/include/uapi/linux/pkt_sched.h @@ -0,0 +1,1163 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LINUX_PKT_SCHED_H +#define __LINUX_PKT_SCHED_H + +#include + +/* Logical priority bands not depending on specific packet scheduler. + Every scheduler will map them to real traffic classes, if it has + no more precise mechanism to classify packets. + + These numbers have no special meaning, though their coincidence + with obsolete IPv6 values is not occasional :-). New IPv6 drafts + preferred full anarchy inspired by diffserv group. + + Note: TC_PRIO_BESTEFFORT does not mean that it is the most unhappy + class, actually, as rule it will be handled with more care than + filler or even bulk. + */ + +#define TC_PRIO_BESTEFFORT 0 +#define TC_PRIO_FILLER 1 +#define TC_PRIO_BULK 2 +#define TC_PRIO_INTERACTIVE_BULK 4 +#define TC_PRIO_INTERACTIVE 6 +#define TC_PRIO_CONTROL 7 + +#define TC_PRIO_MAX 15 + +/* Generic queue statistics, available for all the elements. + Particular schedulers may have also their private records. + */ + +struct tc_stats { + __u64 bytes; /* Number of enqueued bytes */ + __u32 packets; /* Number of enqueued packets */ + __u32 drops; /* Packets dropped because of lack of resources */ + __u32 overlimits; /* Number of throttle events when this + * flow goes out of allocated bandwidth */ + __u32 bps; /* Current flow byte rate */ + __u32 pps; /* Current flow packet rate */ + __u32 qlen; + __u32 backlog; +}; + +struct tc_estimator { + signed char interval; + unsigned char ewma_log; +}; + +/* "Handles" + --------- + + All the traffic control objects have 32bit identifiers, or "handles". + + They can be considered as opaque numbers from user API viewpoint, + but actually they always consist of two fields: major and + minor numbers, which are interpreted by kernel specially, + that may be used by applications, though not recommended. + + F.e. qdisc handles always have minor number equal to zero, + classes (or flows) have major equal to parent qdisc major, and + minor uniquely identifying class inside qdisc. + + Macros to manipulate handles: + */ + +#define TC_H_MAJ_MASK (0xFFFF0000U) +#define TC_H_MIN_MASK (0x0000FFFFU) +#define TC_H_MAJ(h) ((h)&TC_H_MAJ_MASK) +#define TC_H_MIN(h) ((h)&TC_H_MIN_MASK) +#define TC_H_MAKE(maj,min) (((maj)&TC_H_MAJ_MASK)|((min)&TC_H_MIN_MASK)) + +#define TC_H_UNSPEC (0U) +#define TC_H_ROOT (0xFFFFFFFFU) +#define TC_H_INGRESS (0xFFFFFFF1U) +#define TC_H_CLSACT TC_H_INGRESS + +#define TC_H_MIN_PRIORITY 0xFFE0U +#define TC_H_MIN_INGRESS 0xFFF2U +#define TC_H_MIN_EGRESS 0xFFF3U + +/* Need to corrospond to iproute2 tc/tc_core.h "enum link_layer" */ +enum tc_link_layer { + TC_LINKLAYER_UNAWARE, /* Indicate unaware old iproute2 util */ + TC_LINKLAYER_ETHERNET, + TC_LINKLAYER_ATM, +}; +#define TC_LINKLAYER_MASK 0x0F /* limit use to lower 4 bits */ + +struct tc_ratespec { + unsigned char cell_log; + __u8 linklayer; /* lower 4 bits */ + unsigned short overhead; + short cell_align; + unsigned short mpu; + __u32 rate; +}; + +#define TC_RTAB_SIZE 1024 + +struct tc_sizespec { + unsigned char cell_log; + unsigned char size_log; + short cell_align; + int overhead; + unsigned int linklayer; + unsigned int mpu; + unsigned int mtu; + unsigned int tsize; +}; + +enum { + TCA_STAB_UNSPEC, + TCA_STAB_BASE, + TCA_STAB_DATA, + __TCA_STAB_MAX +}; + +#define TCA_STAB_MAX (__TCA_STAB_MAX - 1) + +/* FIFO section */ + +struct tc_fifo_qopt { + __u32 limit; /* Queue length: bytes for bfifo, packets for pfifo */ +}; + +/* SKBPRIO section */ + +/* + * Priorities go from zero to (SKBPRIO_MAX_PRIORITY - 1). + * SKBPRIO_MAX_PRIORITY should be at least 64 in order for skbprio to be able + * to map one to one the DS field of IPV4 and IPV6 headers. + * Memory allocation grows linearly with SKBPRIO_MAX_PRIORITY. + */ + +#define SKBPRIO_MAX_PRIORITY 64 + +struct tc_skbprio_qopt { + __u32 limit; /* Queue length in packets. */ +}; + +/* PRIO section */ + +#define TCQ_PRIO_BANDS 16 +#define TCQ_MIN_PRIO_BANDS 2 + +struct tc_prio_qopt { + int bands; /* Number of bands */ + __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */ +}; + +/* MULTIQ section */ + +struct tc_multiq_qopt { + __u16 bands; /* Number of bands */ + __u16 max_bands; /* Maximum number of queues */ +}; + +/* PLUG section */ + +#define TCQ_PLUG_BUFFER 0 +#define TCQ_PLUG_RELEASE_ONE 1 +#define TCQ_PLUG_RELEASE_INDEFINITE 2 +#define TCQ_PLUG_LIMIT 3 + +struct tc_plug_qopt { + /* TCQ_PLUG_BUFFER: Inset a plug into the queue and + * buffer any incoming packets + * TCQ_PLUG_RELEASE_ONE: Dequeue packets from queue head + * to beginning of the next plug. + * TCQ_PLUG_RELEASE_INDEFINITE: Dequeue all packets from queue. + * Stop buffering packets until the next TCQ_PLUG_BUFFER + * command is received (just act as a pass-thru queue). + * TCQ_PLUG_LIMIT: Increase/decrease queue size + */ + int action; + __u32 limit; +}; + +/* TBF section */ + +struct tc_tbf_qopt { + struct tc_ratespec rate; + struct tc_ratespec peakrate; + __u32 limit; + __u32 buffer; + __u32 mtu; +}; + +enum { + TCA_TBF_UNSPEC, + TCA_TBF_PARMS, + TCA_TBF_RTAB, + TCA_TBF_PTAB, + TCA_TBF_RATE64, + TCA_TBF_PRATE64, + TCA_TBF_BURST, + TCA_TBF_PBURST, + TCA_TBF_PAD, + __TCA_TBF_MAX, +}; + +#define TCA_TBF_MAX (__TCA_TBF_MAX - 1) + + +/* TEQL section */ + +/* TEQL does not require any parameters */ + +/* SFQ section */ + +struct tc_sfq_qopt { + unsigned quantum; /* Bytes per round allocated to flow */ + int perturb_period; /* Period of hash perturbation */ + __u32 limit; /* Maximal packets in queue */ + unsigned divisor; /* Hash divisor */ + unsigned flows; /* Maximal number of flows */ +}; + +struct tc_sfqred_stats { + __u32 prob_drop; /* Early drops, below max threshold */ + __u32 forced_drop; /* Early drops, after max threshold */ + __u32 prob_mark; /* Marked packets, below max threshold */ + __u32 forced_mark; /* Marked packets, after max threshold */ + __u32 prob_mark_head; /* Marked packets, below max threshold */ + __u32 forced_mark_head;/* Marked packets, after max threshold */ +}; + +struct tc_sfq_qopt_v1 { + struct tc_sfq_qopt v0; + unsigned int depth; /* max number of packets per flow */ + unsigned int headdrop; +/* SFQRED parameters */ + __u32 limit; /* HARD maximal flow queue length (bytes) */ + __u32 qth_min; /* Min average length threshold (bytes) */ + __u32 qth_max; /* Max average length threshold (bytes) */ + unsigned char Wlog; /* log(W) */ + unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ + unsigned char Scell_log; /* cell size for idle damping */ + unsigned char flags; + __u32 max_P; /* probability, high resolution */ +/* SFQRED stats */ + struct tc_sfqred_stats stats; +}; + + +struct tc_sfq_xstats { + __s32 allot; +}; + +/* RED section */ + +enum { + TCA_RED_UNSPEC, + TCA_RED_PARMS, + TCA_RED_STAB, + TCA_RED_MAX_P, + __TCA_RED_MAX, +}; + +#define TCA_RED_MAX (__TCA_RED_MAX - 1) + +struct tc_red_qopt { + __u32 limit; /* HARD maximal queue length (bytes) */ + __u32 qth_min; /* Min average length threshold (bytes) */ + __u32 qth_max; /* Max average length threshold (bytes) */ + unsigned char Wlog; /* log(W) */ + unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ + unsigned char Scell_log; /* cell size for idle damping */ + unsigned char flags; +#define TC_RED_ECN 1 +#define TC_RED_HARDDROP 2 +#define TC_RED_ADAPTATIVE 4 +}; + +struct tc_red_xstats { + __u32 early; /* Early drops */ + __u32 pdrop; /* Drops due to queue limits */ + __u32 other; /* Drops due to drop() calls */ + __u32 marked; /* Marked packets */ +}; + +/* GRED section */ + +#define MAX_DPs 16 + +enum { + TCA_GRED_UNSPEC, + TCA_GRED_PARMS, + TCA_GRED_STAB, + TCA_GRED_DPS, + TCA_GRED_MAX_P, + TCA_GRED_LIMIT, + TCA_GRED_VQ_LIST, /* nested TCA_GRED_VQ_ENTRY */ + __TCA_GRED_MAX, +}; + +#define TCA_GRED_MAX (__TCA_GRED_MAX - 1) + +enum { + TCA_GRED_VQ_ENTRY_UNSPEC, + TCA_GRED_VQ_ENTRY, /* nested TCA_GRED_VQ_* */ + __TCA_GRED_VQ_ENTRY_MAX, +}; +#define TCA_GRED_VQ_ENTRY_MAX (__TCA_GRED_VQ_ENTRY_MAX - 1) + +enum { + TCA_GRED_VQ_UNSPEC, + TCA_GRED_VQ_PAD, + TCA_GRED_VQ_DP, /* u32 */ + TCA_GRED_VQ_STAT_BYTES, /* u64 */ + TCA_GRED_VQ_STAT_PACKETS, /* u32 */ + TCA_GRED_VQ_STAT_BACKLOG, /* u32 */ + TCA_GRED_VQ_STAT_PROB_DROP, /* u32 */ + TCA_GRED_VQ_STAT_PROB_MARK, /* u32 */ + TCA_GRED_VQ_STAT_FORCED_DROP, /* u32 */ + TCA_GRED_VQ_STAT_FORCED_MARK, /* u32 */ + TCA_GRED_VQ_STAT_PDROP, /* u32 */ + TCA_GRED_VQ_STAT_OTHER, /* u32 */ + TCA_GRED_VQ_FLAGS, /* u32 */ + __TCA_GRED_VQ_MAX +}; + +#define TCA_GRED_VQ_MAX (__TCA_GRED_VQ_MAX - 1) + +struct tc_gred_qopt { + __u32 limit; /* HARD maximal queue length (bytes) */ + __u32 qth_min; /* Min average length threshold (bytes) */ + __u32 qth_max; /* Max average length threshold (bytes) */ + __u32 DP; /* up to 2^32 DPs */ + __u32 backlog; + __u32 qave; + __u32 forced; + __u32 early; + __u32 other; + __u32 pdrop; + __u8 Wlog; /* log(W) */ + __u8 Plog; /* log(P_max/(qth_max-qth_min)) */ + __u8 Scell_log; /* cell size for idle damping */ + __u8 prio; /* prio of this VQ */ + __u32 packets; + __u32 bytesin; +}; + +/* gred setup */ +struct tc_gred_sopt { + __u32 DPs; + __u32 def_DP; + __u8 grio; + __u8 flags; + __u16 pad1; +}; + +/* CHOKe section */ + +enum { + TCA_CHOKE_UNSPEC, + TCA_CHOKE_PARMS, + TCA_CHOKE_STAB, + TCA_CHOKE_MAX_P, + __TCA_CHOKE_MAX, +}; + +#define TCA_CHOKE_MAX (__TCA_CHOKE_MAX - 1) + +struct tc_choke_qopt { + __u32 limit; /* Hard queue length (packets) */ + __u32 qth_min; /* Min average threshold (packets) */ + __u32 qth_max; /* Max average threshold (packets) */ + unsigned char Wlog; /* log(W) */ + unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ + unsigned char Scell_log; /* cell size for idle damping */ + unsigned char flags; /* see RED flags */ +}; + +struct tc_choke_xstats { + __u32 early; /* Early drops */ + __u32 pdrop; /* Drops due to queue limits */ + __u32 other; /* Drops due to drop() calls */ + __u32 marked; /* Marked packets */ + __u32 matched; /* Drops due to flow match */ +}; + +/* HTB section */ +#define TC_HTB_NUMPRIO 8 +#define TC_HTB_MAXDEPTH 8 +#define TC_HTB_PROTOVER 3 /* the same as HTB and TC's major */ + +struct tc_htb_opt { + struct tc_ratespec rate; + struct tc_ratespec ceil; + __u32 buffer; + __u32 cbuffer; + __u32 quantum; + __u32 level; /* out only */ + __u32 prio; +}; +struct tc_htb_glob { + __u32 version; /* to match HTB/TC */ + __u32 rate2quantum; /* bps->quantum divisor */ + __u32 defcls; /* default class number */ + __u32 debug; /* debug flags */ + + /* stats */ + __u32 direct_pkts; /* count of non shaped packets */ +}; +enum { + TCA_HTB_UNSPEC, + TCA_HTB_PARMS, + TCA_HTB_INIT, + TCA_HTB_CTAB, + TCA_HTB_RTAB, + TCA_HTB_DIRECT_QLEN, + TCA_HTB_RATE64, + TCA_HTB_CEIL64, + TCA_HTB_PAD, + __TCA_HTB_MAX, +}; + +#define TCA_HTB_MAX (__TCA_HTB_MAX - 1) + +struct tc_htb_xstats { + __u32 lends; + __u32 borrows; + __u32 giants; /* unused since 'Make HTB scheduler work with TSO.' */ + __s32 tokens; + __s32 ctokens; +}; + +/* HFSC section */ + +struct tc_hfsc_qopt { + __u16 defcls; /* default class */ +}; + +struct tc_service_curve { + __u32 m1; /* slope of the first segment in bps */ + __u32 d; /* x-projection of the first segment in us */ + __u32 m2; /* slope of the second segment in bps */ +}; + +struct tc_hfsc_stats { + __u64 work; /* total work done */ + __u64 rtwork; /* work done by real-time criteria */ + __u32 period; /* current period */ + __u32 level; /* class level in hierarchy */ +}; + +enum { + TCA_HFSC_UNSPEC, + TCA_HFSC_RSC, + TCA_HFSC_FSC, + TCA_HFSC_USC, + __TCA_HFSC_MAX, +}; + +#define TCA_HFSC_MAX (__TCA_HFSC_MAX - 1) + + +/* CBQ section */ + +#define TC_CBQ_MAXPRIO 8 +#define TC_CBQ_MAXLEVEL 8 +#define TC_CBQ_DEF_EWMA 5 + +struct tc_cbq_lssopt { + unsigned char change; + unsigned char flags; +#define TCF_CBQ_LSS_BOUNDED 1 +#define TCF_CBQ_LSS_ISOLATED 2 + unsigned char ewma_log; + unsigned char level; +#define TCF_CBQ_LSS_FLAGS 1 +#define TCF_CBQ_LSS_EWMA 2 +#define TCF_CBQ_LSS_MAXIDLE 4 +#define TCF_CBQ_LSS_MINIDLE 8 +#define TCF_CBQ_LSS_OFFTIME 0x10 +#define TCF_CBQ_LSS_AVPKT 0x20 + __u32 maxidle; + __u32 minidle; + __u32 offtime; + __u32 avpkt; +}; + +struct tc_cbq_wrropt { + unsigned char flags; + unsigned char priority; + unsigned char cpriority; + unsigned char __reserved; + __u32 allot; + __u32 weight; +}; + +struct tc_cbq_ovl { + unsigned char strategy; +#define TC_CBQ_OVL_CLASSIC 0 +#define TC_CBQ_OVL_DELAY 1 +#define TC_CBQ_OVL_LOWPRIO 2 +#define TC_CBQ_OVL_DROP 3 +#define TC_CBQ_OVL_RCLASSIC 4 + unsigned char priority2; + __u16 pad; + __u32 penalty; +}; + +struct tc_cbq_police { + unsigned char police; + unsigned char __res1; + unsigned short __res2; +}; + +struct tc_cbq_fopt { + __u32 split; + __u32 defmap; + __u32 defchange; +}; + +struct tc_cbq_xstats { + __u32 borrows; + __u32 overactions; + __s32 avgidle; + __s32 undertime; +}; + +enum { + TCA_CBQ_UNSPEC, + TCA_CBQ_LSSOPT, + TCA_CBQ_WRROPT, + TCA_CBQ_FOPT, + TCA_CBQ_OVL_STRATEGY, + TCA_CBQ_RATE, + TCA_CBQ_RTAB, + TCA_CBQ_POLICE, + __TCA_CBQ_MAX, +}; + +#define TCA_CBQ_MAX (__TCA_CBQ_MAX - 1) + +/* dsmark section */ + +enum { + TCA_DSMARK_UNSPEC, + TCA_DSMARK_INDICES, + TCA_DSMARK_DEFAULT_INDEX, + TCA_DSMARK_SET_TC_INDEX, + TCA_DSMARK_MASK, + TCA_DSMARK_VALUE, + __TCA_DSMARK_MAX, +}; + +#define TCA_DSMARK_MAX (__TCA_DSMARK_MAX - 1) + +/* ATM section */ + +enum { + TCA_ATM_UNSPEC, + TCA_ATM_FD, /* file/socket descriptor */ + TCA_ATM_PTR, /* pointer to descriptor - later */ + TCA_ATM_HDR, /* LL header */ + TCA_ATM_EXCESS, /* excess traffic class (0 for CLP) */ + TCA_ATM_ADDR, /* PVC address (for output only) */ + TCA_ATM_STATE, /* VC state (ATM_VS_*; for output only) */ + __TCA_ATM_MAX, +}; + +#define TCA_ATM_MAX (__TCA_ATM_MAX - 1) + +/* Network emulator */ + +enum { + TCA_NETEM_UNSPEC, + TCA_NETEM_CORR, + TCA_NETEM_DELAY_DIST, + TCA_NETEM_REORDER, + TCA_NETEM_CORRUPT, + TCA_NETEM_LOSS, + TCA_NETEM_RATE, + TCA_NETEM_ECN, + TCA_NETEM_RATE64, + TCA_NETEM_PAD, + TCA_NETEM_LATENCY64, + TCA_NETEM_JITTER64, + TCA_NETEM_SLOT, + TCA_NETEM_SLOT_DIST, + __TCA_NETEM_MAX, +}; + +#define TCA_NETEM_MAX (__TCA_NETEM_MAX - 1) + +struct tc_netem_qopt { + __u32 latency; /* added delay (us) */ + __u32 limit; /* fifo limit (packets) */ + __u32 loss; /* random packet loss (0=none ~0=100%) */ + __u32 gap; /* re-ordering gap (0 for none) */ + __u32 duplicate; /* random packet dup (0=none ~0=100%) */ + __u32 jitter; /* random jitter in latency (us) */ +}; + +struct tc_netem_corr { + __u32 delay_corr; /* delay correlation */ + __u32 loss_corr; /* packet loss correlation */ + __u32 dup_corr; /* duplicate correlation */ +}; + +struct tc_netem_reorder { + __u32 probability; + __u32 correlation; +}; + +struct tc_netem_corrupt { + __u32 probability; + __u32 correlation; +}; + +struct tc_netem_rate { + __u32 rate; /* byte/s */ + __s32 packet_overhead; + __u32 cell_size; + __s32 cell_overhead; +}; + +struct tc_netem_slot { + __s64 min_delay; /* nsec */ + __s64 max_delay; + __s32 max_packets; + __s32 max_bytes; + __s64 dist_delay; /* nsec */ + __s64 dist_jitter; /* nsec */ +}; + +enum { + NETEM_LOSS_UNSPEC, + NETEM_LOSS_GI, /* General Intuitive - 4 state model */ + NETEM_LOSS_GE, /* Gilbert Elliot models */ + __NETEM_LOSS_MAX +}; +#define NETEM_LOSS_MAX (__NETEM_LOSS_MAX - 1) + +/* State transition probabilities for 4 state model */ +struct tc_netem_gimodel { + __u32 p13; + __u32 p31; + __u32 p32; + __u32 p14; + __u32 p23; +}; + +/* Gilbert-Elliot models */ +struct tc_netem_gemodel { + __u32 p; + __u32 r; + __u32 h; + __u32 k1; +}; + +#define NETEM_DIST_SCALE 8192 +#define NETEM_DIST_MAX 16384 + +/* DRR */ + +enum { + TCA_DRR_UNSPEC, + TCA_DRR_QUANTUM, + __TCA_DRR_MAX +}; + +#define TCA_DRR_MAX (__TCA_DRR_MAX - 1) + +struct tc_drr_stats { + __u32 deficit; +}; + +/* MQPRIO */ +#define TC_QOPT_BITMASK 15 +#define TC_QOPT_MAX_QUEUE 16 + +enum { + TC_MQPRIO_HW_OFFLOAD_NONE, /* no offload requested */ + TC_MQPRIO_HW_OFFLOAD_TCS, /* offload TCs, no queue counts */ + __TC_MQPRIO_HW_OFFLOAD_MAX +}; + +#define TC_MQPRIO_HW_OFFLOAD_MAX (__TC_MQPRIO_HW_OFFLOAD_MAX - 1) + +enum { + TC_MQPRIO_MODE_DCB, + TC_MQPRIO_MODE_CHANNEL, + __TC_MQPRIO_MODE_MAX +}; + +#define __TC_MQPRIO_MODE_MAX (__TC_MQPRIO_MODE_MAX - 1) + +enum { + TC_MQPRIO_SHAPER_DCB, + TC_MQPRIO_SHAPER_BW_RATE, /* Add new shapers below */ + __TC_MQPRIO_SHAPER_MAX +}; + +#define __TC_MQPRIO_SHAPER_MAX (__TC_MQPRIO_SHAPER_MAX - 1) + +struct tc_mqprio_qopt { + __u8 num_tc; + __u8 prio_tc_map[TC_QOPT_BITMASK + 1]; + __u8 hw; + __u16 count[TC_QOPT_MAX_QUEUE]; + __u16 offset[TC_QOPT_MAX_QUEUE]; +}; + +#define TC_MQPRIO_F_MODE 0x1 +#define TC_MQPRIO_F_SHAPER 0x2 +#define TC_MQPRIO_F_MIN_RATE 0x4 +#define TC_MQPRIO_F_MAX_RATE 0x8 + +enum { + TCA_MQPRIO_UNSPEC, + TCA_MQPRIO_MODE, + TCA_MQPRIO_SHAPER, + TCA_MQPRIO_MIN_RATE64, + TCA_MQPRIO_MAX_RATE64, + __TCA_MQPRIO_MAX, +}; + +#define TCA_MQPRIO_MAX (__TCA_MQPRIO_MAX - 1) + +/* SFB */ + +enum { + TCA_SFB_UNSPEC, + TCA_SFB_PARMS, + __TCA_SFB_MAX, +}; + +#define TCA_SFB_MAX (__TCA_SFB_MAX - 1) + +/* + * Note: increment, decrement are Q0.16 fixed-point values. + */ +struct tc_sfb_qopt { + __u32 rehash_interval; /* delay between hash move, in ms */ + __u32 warmup_time; /* double buffering warmup time in ms (warmup_time < rehash_interval) */ + __u32 max; /* max len of qlen_min */ + __u32 bin_size; /* maximum queue length per bin */ + __u32 increment; /* probability increment, (d1 in Blue) */ + __u32 decrement; /* probability decrement, (d2 in Blue) */ + __u32 limit; /* max SFB queue length */ + __u32 penalty_rate; /* inelastic flows are rate limited to 'rate' pps */ + __u32 penalty_burst; +}; + +struct tc_sfb_xstats { + __u32 earlydrop; + __u32 penaltydrop; + __u32 bucketdrop; + __u32 queuedrop; + __u32 childdrop; /* drops in child qdisc */ + __u32 marked; + __u32 maxqlen; + __u32 maxprob; + __u32 avgprob; +}; + +#define SFB_MAX_PROB 0xFFFF + +/* QFQ */ +enum { + TCA_QFQ_UNSPEC, + TCA_QFQ_WEIGHT, + TCA_QFQ_LMAX, + __TCA_QFQ_MAX +}; + +#define TCA_QFQ_MAX (__TCA_QFQ_MAX - 1) + +struct tc_qfq_stats { + __u32 weight; + __u32 lmax; +}; + +/* CODEL */ + +enum { + TCA_CODEL_UNSPEC, + TCA_CODEL_TARGET, + TCA_CODEL_LIMIT, + TCA_CODEL_INTERVAL, + TCA_CODEL_ECN, + TCA_CODEL_CE_THRESHOLD, + __TCA_CODEL_MAX +}; + +#define TCA_CODEL_MAX (__TCA_CODEL_MAX - 1) + +struct tc_codel_xstats { + __u32 maxpacket; /* largest packet we've seen so far */ + __u32 count; /* how many drops we've done since the last time we + * entered dropping state + */ + __u32 lastcount; /* count at entry to dropping state */ + __u32 ldelay; /* in-queue delay seen by most recently dequeued packet */ + __s32 drop_next; /* time to drop next packet */ + __u32 drop_overlimit; /* number of time max qdisc packet limit was hit */ + __u32 ecn_mark; /* number of packets we ECN marked instead of dropped */ + __u32 dropping; /* are we in dropping state ? */ + __u32 ce_mark; /* number of CE marked packets because of ce_threshold */ +}; + +/* FQ_CODEL */ + +enum { + TCA_FQ_CODEL_UNSPEC, + TCA_FQ_CODEL_TARGET, + TCA_FQ_CODEL_LIMIT, + TCA_FQ_CODEL_INTERVAL, + TCA_FQ_CODEL_ECN, + TCA_FQ_CODEL_FLOWS, + TCA_FQ_CODEL_QUANTUM, + TCA_FQ_CODEL_CE_THRESHOLD, + TCA_FQ_CODEL_DROP_BATCH_SIZE, + TCA_FQ_CODEL_MEMORY_LIMIT, + __TCA_FQ_CODEL_MAX +}; + +#define TCA_FQ_CODEL_MAX (__TCA_FQ_CODEL_MAX - 1) + +enum { + TCA_FQ_CODEL_XSTATS_QDISC, + TCA_FQ_CODEL_XSTATS_CLASS, +}; + +struct tc_fq_codel_qd_stats { + __u32 maxpacket; /* largest packet we've seen so far */ + __u32 drop_overlimit; /* number of time max qdisc + * packet limit was hit + */ + __u32 ecn_mark; /* number of packets we ECN marked + * instead of being dropped + */ + __u32 new_flow_count; /* number of time packets + * created a 'new flow' + */ + __u32 new_flows_len; /* count of flows in new list */ + __u32 old_flows_len; /* count of flows in old list */ + __u32 ce_mark; /* packets above ce_threshold */ + __u32 memory_usage; /* in bytes */ + __u32 drop_overmemory; +}; + +struct tc_fq_codel_cl_stats { + __s32 deficit; + __u32 ldelay; /* in-queue delay seen by most recently + * dequeued packet + */ + __u32 count; + __u32 lastcount; + __u32 dropping; + __s32 drop_next; +}; + +struct tc_fq_codel_xstats { + __u32 type; + union { + struct tc_fq_codel_qd_stats qdisc_stats; + struct tc_fq_codel_cl_stats class_stats; + }; +}; + +/* FQ */ + +enum { + TCA_FQ_UNSPEC, + + TCA_FQ_PLIMIT, /* limit of total number of packets in queue */ + + TCA_FQ_FLOW_PLIMIT, /* limit of packets per flow */ + + TCA_FQ_QUANTUM, /* RR quantum */ + + TCA_FQ_INITIAL_QUANTUM, /* RR quantum for new flow */ + + TCA_FQ_RATE_ENABLE, /* enable/disable rate limiting */ + + TCA_FQ_FLOW_DEFAULT_RATE,/* obsolete, do not use */ + + TCA_FQ_FLOW_MAX_RATE, /* per flow max rate */ + + TCA_FQ_BUCKETS_LOG, /* log2(number of buckets) */ + + TCA_FQ_FLOW_REFILL_DELAY, /* flow credit refill delay in usec */ + + TCA_FQ_ORPHAN_MASK, /* mask applied to orphaned skb hashes */ + + TCA_FQ_LOW_RATE_THRESHOLD, /* per packet delay under this rate */ + + TCA_FQ_CE_THRESHOLD, /* DCTCP-like CE-marking threshold */ + + __TCA_FQ_MAX +}; + +#define TCA_FQ_MAX (__TCA_FQ_MAX - 1) + +struct tc_fq_qd_stats { + __u64 gc_flows; + __u64 highprio_packets; + __u64 tcp_retrans; + __u64 throttled; + __u64 flows_plimit; + __u64 pkts_too_long; + __u64 allocation_errors; + __s64 time_next_delayed_flow; + __u32 flows; + __u32 inactive_flows; + __u32 throttled_flows; + __u32 unthrottle_latency_ns; + __u64 ce_mark; /* packets above ce_threshold */ +}; + +/* Heavy-Hitter Filter */ + +enum { + TCA_HHF_UNSPEC, + TCA_HHF_BACKLOG_LIMIT, + TCA_HHF_QUANTUM, + TCA_HHF_HH_FLOWS_LIMIT, + TCA_HHF_RESET_TIMEOUT, + TCA_HHF_ADMIT_BYTES, + TCA_HHF_EVICT_TIMEOUT, + TCA_HHF_NON_HH_WEIGHT, + __TCA_HHF_MAX +}; + +#define TCA_HHF_MAX (__TCA_HHF_MAX - 1) + +struct tc_hhf_xstats { + __u32 drop_overlimit; /* number of times max qdisc packet limit + * was hit + */ + __u32 hh_overlimit; /* number of times max heavy-hitters was hit */ + __u32 hh_tot_count; /* number of captured heavy-hitters so far */ + __u32 hh_cur_count; /* number of current heavy-hitters */ +}; + +/* PIE */ +enum { + TCA_PIE_UNSPEC, + TCA_PIE_TARGET, + TCA_PIE_LIMIT, + TCA_PIE_TUPDATE, + TCA_PIE_ALPHA, + TCA_PIE_BETA, + TCA_PIE_ECN, + TCA_PIE_BYTEMODE, + __TCA_PIE_MAX +}; +#define TCA_PIE_MAX (__TCA_PIE_MAX - 1) + +struct tc_pie_xstats { + __u32 prob; /* current probability */ + __u32 delay; /* current delay in ms */ + __u32 avg_dq_rate; /* current average dq_rate in bits/pie_time */ + __u32 packets_in; /* total number of packets enqueued */ + __u32 dropped; /* packets dropped due to pie_action */ + __u32 overlimit; /* dropped due to lack of space in queue */ + __u32 maxq; /* maximum queue size */ + __u32 ecn_mark; /* packets marked with ecn*/ +}; + +/* CBS */ +struct tc_cbs_qopt { + __u8 offload; + __u8 _pad[3]; + __s32 hicredit; + __s32 locredit; + __s32 idleslope; + __s32 sendslope; +}; + +enum { + TCA_CBS_UNSPEC, + TCA_CBS_PARMS, + __TCA_CBS_MAX, +}; + +#define TCA_CBS_MAX (__TCA_CBS_MAX - 1) + + +/* ETF */ +struct tc_etf_qopt { + __s32 delta; + __s32 clockid; + __u32 flags; +#define TC_ETF_DEADLINE_MODE_ON BIT(0) +#define TC_ETF_OFFLOAD_ON BIT(1) +}; + +enum { + TCA_ETF_UNSPEC, + TCA_ETF_PARMS, + __TCA_ETF_MAX, +}; + +#define TCA_ETF_MAX (__TCA_ETF_MAX - 1) + + +/* CAKE */ +enum { + TCA_CAKE_UNSPEC, + TCA_CAKE_PAD, + TCA_CAKE_BASE_RATE64, + TCA_CAKE_DIFFSERV_MODE, + TCA_CAKE_ATM, + TCA_CAKE_FLOW_MODE, + TCA_CAKE_OVERHEAD, + TCA_CAKE_RTT, + TCA_CAKE_TARGET, + TCA_CAKE_AUTORATE, + TCA_CAKE_MEMORY, + TCA_CAKE_NAT, + TCA_CAKE_RAW, + TCA_CAKE_WASH, + TCA_CAKE_MPU, + TCA_CAKE_INGRESS, + TCA_CAKE_ACK_FILTER, + TCA_CAKE_SPLIT_GSO, + __TCA_CAKE_MAX +}; +#define TCA_CAKE_MAX (__TCA_CAKE_MAX - 1) + +enum { + __TCA_CAKE_STATS_INVALID, + TCA_CAKE_STATS_PAD, + TCA_CAKE_STATS_CAPACITY_ESTIMATE64, + TCA_CAKE_STATS_MEMORY_LIMIT, + TCA_CAKE_STATS_MEMORY_USED, + TCA_CAKE_STATS_AVG_NETOFF, + TCA_CAKE_STATS_MIN_NETLEN, + TCA_CAKE_STATS_MAX_NETLEN, + TCA_CAKE_STATS_MIN_ADJLEN, + TCA_CAKE_STATS_MAX_ADJLEN, + TCA_CAKE_STATS_TIN_STATS, + TCA_CAKE_STATS_DEFICIT, + TCA_CAKE_STATS_COBALT_COUNT, + TCA_CAKE_STATS_DROPPING, + TCA_CAKE_STATS_DROP_NEXT_US, + TCA_CAKE_STATS_P_DROP, + TCA_CAKE_STATS_BLUE_TIMER_US, + __TCA_CAKE_STATS_MAX +}; +#define TCA_CAKE_STATS_MAX (__TCA_CAKE_STATS_MAX - 1) + +enum { + __TCA_CAKE_TIN_STATS_INVALID, + TCA_CAKE_TIN_STATS_PAD, + TCA_CAKE_TIN_STATS_SENT_PACKETS, + TCA_CAKE_TIN_STATS_SENT_BYTES64, + TCA_CAKE_TIN_STATS_DROPPED_PACKETS, + TCA_CAKE_TIN_STATS_DROPPED_BYTES64, + TCA_CAKE_TIN_STATS_ACKS_DROPPED_PACKETS, + TCA_CAKE_TIN_STATS_ACKS_DROPPED_BYTES64, + TCA_CAKE_TIN_STATS_ECN_MARKED_PACKETS, + TCA_CAKE_TIN_STATS_ECN_MARKED_BYTES64, + TCA_CAKE_TIN_STATS_BACKLOG_PACKETS, + TCA_CAKE_TIN_STATS_BACKLOG_BYTES, + TCA_CAKE_TIN_STATS_THRESHOLD_RATE64, + TCA_CAKE_TIN_STATS_TARGET_US, + TCA_CAKE_TIN_STATS_INTERVAL_US, + TCA_CAKE_TIN_STATS_WAY_INDIRECT_HITS, + TCA_CAKE_TIN_STATS_WAY_MISSES, + TCA_CAKE_TIN_STATS_WAY_COLLISIONS, + TCA_CAKE_TIN_STATS_PEAK_DELAY_US, + TCA_CAKE_TIN_STATS_AVG_DELAY_US, + TCA_CAKE_TIN_STATS_BASE_DELAY_US, + TCA_CAKE_TIN_STATS_SPARSE_FLOWS, + TCA_CAKE_TIN_STATS_BULK_FLOWS, + TCA_CAKE_TIN_STATS_UNRESPONSIVE_FLOWS, + TCA_CAKE_TIN_STATS_MAX_SKBLEN, + TCA_CAKE_TIN_STATS_FLOW_QUANTUM, + __TCA_CAKE_TIN_STATS_MAX +}; +#define TCA_CAKE_TIN_STATS_MAX (__TCA_CAKE_TIN_STATS_MAX - 1) +#define TC_CAKE_MAX_TINS (8) + +enum { + CAKE_FLOW_NONE = 0, + CAKE_FLOW_SRC_IP, + CAKE_FLOW_DST_IP, + CAKE_FLOW_HOSTS, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_DST_IP */ + CAKE_FLOW_FLOWS, + CAKE_FLOW_DUAL_SRC, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_FLOWS */ + CAKE_FLOW_DUAL_DST, /* = CAKE_FLOW_DST_IP | CAKE_FLOW_FLOWS */ + CAKE_FLOW_TRIPLE, /* = CAKE_FLOW_HOSTS | CAKE_FLOW_FLOWS */ + CAKE_FLOW_MAX, +}; + +enum { + CAKE_DIFFSERV_DIFFSERV3 = 0, + CAKE_DIFFSERV_DIFFSERV4, + CAKE_DIFFSERV_DIFFSERV8, + CAKE_DIFFSERV_BESTEFFORT, + CAKE_DIFFSERV_PRECEDENCE, + CAKE_DIFFSERV_MAX +}; + +enum { + CAKE_ACK_NONE = 0, + CAKE_ACK_FILTER, + CAKE_ACK_AGGRESSIVE, + CAKE_ACK_MAX +}; + +enum { + CAKE_ATM_NONE = 0, + CAKE_ATM_ATM, + CAKE_ATM_PTM, + CAKE_ATM_MAX +}; + + +/* TAPRIO */ +enum { + TC_TAPRIO_CMD_SET_GATES = 0x00, + TC_TAPRIO_CMD_SET_AND_HOLD = 0x01, + TC_TAPRIO_CMD_SET_AND_RELEASE = 0x02, +}; + +enum { + TCA_TAPRIO_SCHED_ENTRY_UNSPEC, + TCA_TAPRIO_SCHED_ENTRY_INDEX, /* u32 */ + TCA_TAPRIO_SCHED_ENTRY_CMD, /* u8 */ + TCA_TAPRIO_SCHED_ENTRY_GATE_MASK, /* u32 */ + TCA_TAPRIO_SCHED_ENTRY_INTERVAL, /* u32 */ + __TCA_TAPRIO_SCHED_ENTRY_MAX, +}; +#define TCA_TAPRIO_SCHED_ENTRY_MAX (__TCA_TAPRIO_SCHED_ENTRY_MAX - 1) + +/* The format for schedule entry list is: + * [TCA_TAPRIO_SCHED_ENTRY_LIST] + * [TCA_TAPRIO_SCHED_ENTRY] + * [TCA_TAPRIO_SCHED_ENTRY_CMD] + * [TCA_TAPRIO_SCHED_ENTRY_GATES] + * [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] + */ +enum { + TCA_TAPRIO_SCHED_UNSPEC, + TCA_TAPRIO_SCHED_ENTRY, + __TCA_TAPRIO_SCHED_MAX, +}; + +#define TCA_TAPRIO_SCHED_MAX (__TCA_TAPRIO_SCHED_MAX - 1) + +enum { + TCA_TAPRIO_ATTR_UNSPEC, + TCA_TAPRIO_ATTR_PRIOMAP, /* struct tc_mqprio_qopt */ + TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST, /* nested of entry */ + TCA_TAPRIO_ATTR_SCHED_BASE_TIME, /* s64 */ + TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY, /* single entry */ + TCA_TAPRIO_ATTR_SCHED_CLOCKID, /* s32 */ + TCA_TAPRIO_PAD, + __TCA_TAPRIO_ATTR_MAX, +}; + +#define TCA_TAPRIO_ATTR_MAX (__TCA_TAPRIO_ATTR_MAX - 1) + +#endif -- Gitee From bd1fd3b971dd35f1228a7092cd09f2b3fb670b4c Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Fri, 9 Jun 2023 17:25:12 +0800 Subject: [PATCH 10/12] bpftool: Fix compilation failure for net.o with older glibc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #5530 commit 6f02b540d7597f357bc6ee711346761045d4e108 upstream For older glibc ~2.17, #include'ing both linux/if.h and net/if.h fails due to complaints about redefinition of interface flags: CC net.o In file included from net.c:13:0: /usr/include/linux/if.h:71:2: error: redeclaration of enumerator ‘IFF_UP’ IFF_UP = 1<<0, /* sysfs */ ^ /usr/include/net/if.h:44:5: note: previous definition of ‘IFF_UP’ was here IFF_UP = 0x1, /* Interface is up. */ The issue was fixed in kernel headers in [1], but since compilation of net.c picks up system headers the problem can recur. Dropping #include resolves the issue and it is not needed for compilation anyhow. [1] https://lore.kernel.org/netdev/1461512707-23058-1-git-send-email-mikko.rapeli__34748.27880641$1462831734$gmane$org@iki.fi/ Fixes: f6f3bac08ff9 ("tools/bpf: bpftool: add net support") Signed-off-by: Alan Maguire Signed-off-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/1609948746-15369-1-git-send-email-alan.maguire@oracle.com Signed-off-by: zhaoxuezhi --- tools/bpf/bpftool/net.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index ed205ee57655..63b47b8e5e41 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include -- Gitee From 6478c7b08ddd7512423ccad576c230a385408835 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 2 Jun 2023 17:10:48 +0800 Subject: [PATCH 11/12] bpf: add bpffs pretty print for program array map ANBZ: #5530 commit a7c19db38d62fc1ce797dba19936e9f81cf2b9fb upstream Added bpffs pretty print for program array map. For a particular array index, if the program array points to a valid program, the ": " will be printed out like 0: 6 which means bpf program with id "6" is installed at index "0". Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Signed-off-by: zhaoxuezhi --- kernel/bpf/arraymap.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 0c17aab3ce5f..1b2cccff5983 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -529,6 +529,29 @@ static void bpf_fd_array_map_clear(struct bpf_map *map) fd_array_map_delete_elem(map, &i); } +static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key, + struct seq_file *m) +{ + void **elem, *ptr; + u32 prog_id; + + rcu_read_lock(); + + elem = array_map_lookup_elem(map, key); + if (elem) { + ptr = READ_ONCE(*elem); + if (ptr) { + seq_printf(m, "%u: ", *(u32 *)key); + prog_id = prog_fd_array_sys_lookup_elem(ptr); + btf_type_seq_show(map->btf, map->btf_value_type_id, + &prog_id, m); + seq_puts(m, "\n"); + } + } + + rcu_read_unlock(); +} + const struct bpf_map_ops prog_array_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_map_alloc, @@ -540,7 +563,7 @@ const struct bpf_map_ops prog_array_map_ops = { .map_fd_put_ptr = prog_fd_array_put_ptr, .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, .map_release_uref = bpf_fd_array_map_clear, - .map_check_btf = map_check_no_btf, + .map_seq_show_elem = prog_array_map_seq_show_elem, }; static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file, -- Gitee From 13c00472b4e788d9c325cf663bcc271bd94e9220 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 2 Jun 2023 17:13:50 +0800 Subject: [PATCH 12/12] tools/bpf: bpftool: support prog array map and map of maps ANBZ: #5530 commit ad3338d2508cd7752accdd39881deced1ec2b8a1 upstream Currently, prog array map and map of maps are not supported in bpftool. This patch added the support. Different from other map types, for prog array map and map of maps, the key returned bpf_get_next_key() may not point to a valid value. So for these two map types, no error will be printed out when such a scenario happens. The following is the plain and json dump if btf is not available: $ ./bpftool map dump id 10 key: 08 00 00 00 value: 5c 01 00 00 Found 1 element $ ./bpftool -jp map dump id 10 [{ "key": ["0x08","0x00","0x00","0x00" ], "value": ["0x5c","0x01","0x00","0x00" ] }] If the BTF is available, the dump looks below: $ ./bpftool map dump id 2 [{ "key": 0, "value": 7 } ] $ ./bpftool -jp map dump id 2 [{ "key": ["0x00","0x00","0x00","0x00" ], "value": ["0x07","0x00","0x00","0x00" ], "formatted": { "key": 0, "value": 7 } }] Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Signed-off-by: zhaoxuezhi --- tools/bpf/bpftool/map.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index ec73d83d0d31..f41e2907a336 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -660,12 +660,6 @@ static int do_dump(int argc, char **argv) if (fd < 0) return -1; - if (map_is_map_of_maps(info.type) || map_is_map_of_progs(info.type)) { - p_err("Dumping maps of maps and program maps not supported"); - close(fd); - return -1; - } - key = malloc(info.key_size); value = alloc_value(&info); if (!key || !value) { @@ -719,7 +713,9 @@ static int do_dump(int argc, char **argv) } else { print_entry_plain(&info, key, value); } - } else { + num_elems++; + } else if (!map_is_map_of_maps(info.type) && + !map_is_map_of_progs(info.type)) { if (json_output) { jsonw_name(json_wtr, "key"); print_hex_data_json(key, info.key_size); @@ -736,7 +732,6 @@ static int do_dump(int argc, char **argv) } prev_key = key; - num_elems++; } if (json_output) -- Gitee