diff --git a/0001-dpdk-add-secure-compile-option-and-fPIC-option.patch b/0001-dpdk-add-secure-compile-option-and-fPIC-option.patch new file mode 100644 index 0000000000000000000000000000000000000000..0ac0273b3c5c7befc392b9a2f122201a047d70a2 --- /dev/null +++ b/0001-dpdk-add-secure-compile-option-and-fPIC-option.patch @@ -0,0 +1,53 @@ +From 62729b425f3b3a9ccb53b7a57f3dcc0db76d039e Mon Sep 17 00:00:00 2001 +From: zhuhengbo +Date: Thu, 19 Mar 2020 17:10:51 +0800 +Subject: [PATCH] dpdk: + add-secure-compile-option-and-compile-with-fPIC-for-static-lib + +Signed-off-by: zhuhengbo +--- + lib/librte_eal/common/include/rte_log.h | 1 + + mk/rte.lib.mk | 1 + + mk/target/generic/rte.vars.mk | 2 ++ + 3 files changed, 4 insertions(+) + +diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h +index 1bb0e66..6426ea2 100644 +--- a/lib/librte_eal/common/include/rte_log.h ++++ b/lib/librte_eal/common/include/rte_log.h +@@ -311,6 +311,7 @@ int rte_log(uint32_t level, uint32_t logtype, const char *format, ...) + * - Negative on error. + */ + int rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap) ++ __attribute__((weak)) + __attribute__((format(printf,3,0))); + + /** +diff --git a/mk/rte.lib.mk b/mk/rte.lib.mk +index 655a1b1..4516d1c 100644 +--- a/mk/rte.lib.mk ++++ b/mk/rte.lib.mk +@@ -6,6 +6,7 @@ include $(RTE_SDK)/mk/internal/rte.install-pre.mk + include $(RTE_SDK)/mk/internal/rte.clean-pre.mk + include $(RTE_SDK)/mk/internal/rte.build-pre.mk + ++CFLAGS += -fPIC + EXTLIB_BUILD ?= n + + # VPATH contains at least SRCDIR +diff --git a/mk/target/generic/rte.vars.mk b/mk/target/generic/rte.vars.mk +index 3747221..bf3f4ff 100644 +--- a/mk/target/generic/rte.vars.mk ++++ b/mk/target/generic/rte.vars.mk +@@ -75,6 +75,8 @@ ifeq ($(KERNELRELEASE),) + include $(RTE_SDK)/mk/rte.cpuflags.mk + + # merge all CFLAGS ++CPU_CFLAGS += -fPIE -pie -fPIC -fstack-protector-strong -D_FORTIFY_SOURCE=2 -O2 -Wall ++CPU_CFLAGS += -Wl,-z,relro,-z,now,-z,noexecstack -Wtrampolines + CFLAGS := $(CPU_CFLAGS) $(EXECENV_CFLAGS) $(TOOLCHAIN_CFLAGS) $(MACHINE_CFLAGS) + CFLAGS += $(TARGET_CFLAGS) + +-- +2.19.1 + diff --git a/0002-dpdk-add-secure-option-in-makefile.patch b/0002-dpdk-add-secure-option-in-makefile.patch new file mode 100644 index 0000000000000000000000000000000000000000..d1e7ad65221a83fdd9b5a7296181838c3231504a --- /dev/null +++ b/0002-dpdk-add-secure-option-in-makefile.patch @@ -0,0 +1,35 @@ +From 94cc085f2890fefd1f91c38b245262c4da232e02 Mon Sep 17 00:00:00 2001 +From: zhuhengbo +Date: Thu, 19 Mar 2020 17:31:31 +0800 +Subject: [PATCH] dpdk: add secure option in makefile. + +reason: add secure option in makefile. + +Signed-off-by: zhuhengbo +--- + mk/exec-env/linux/rte.vars.mk | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/mk/exec-env/linux/rte.vars.mk b/mk/exec-env/linux/rte.vars.mk +index bea3f76..6844281 100644 +--- a/mk/exec-env/linux/rte.vars.mk ++++ b/mk/exec-env/linux/rte.vars.mk +@@ -11,10 +11,13 @@ + # + # examples for RTE_EXEC_ENV: linux, freebsd + # ++ ++SEC_FLAGS = -fstack-protector-all -Wall -Wl,-z,relro,-z,now -Wl,-z,noexecstack -Wtrampolines -fPIC ++ + ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),y) +-EXECENV_CFLAGS = -pthread -fPIC ++EXECENV_CFLAGS = -pthread -fPIC $(SEC_FLAGS) + else +-EXECENV_CFLAGS = -pthread ++EXECENV_CFLAGS = -pthread $(SEC_FLAGS) + endif + + # include in every library to build +-- +2.19.1 + diff --git a/0003-dpdk-bugfix-the-deadlock-in-rte_eal_init.patch b/0003-dpdk-bugfix-the-deadlock-in-rte_eal_init.patch new file mode 100644 index 0000000000000000000000000000000000000000..a77728e6b86646df783719f0c472f11143070033 --- /dev/null +++ b/0003-dpdk-bugfix-the-deadlock-in-rte_eal_init.patch @@ -0,0 +1,72 @@ +From dee3ff16473b956d8cfca15baa419e5dfdf47130 Mon Sep 17 00:00:00 2001 +From: zhuhengbo +Date: Thu, 19 Mar 2020 17:14:25 +0800 +Subject: [PATCH] dpdk: bugfix the deadlock in rte_eal_init when executes this + function concurrently + +Signed-off-by: zhuhengbo +--- + lib/librte_eal/linux/eal/eal.c | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +diff --git a/lib/librte_eal/linux/eal/eal.c b/lib/librte_eal/linux/eal/eal.c +index c4233ec..a3bb9c6 100644 +--- a/lib/librte_eal/linux/eal/eal.c ++++ b/lib/librte_eal/linux/eal/eal.c +@@ -1128,7 +1128,7 @@ rte_eal_init(int argc, char **argv) + rte_eal_init_alert("Cannot get hugepage information."); + rte_errno = EACCES; + rte_atomic32_clear(&run_once); +- return -1; ++ goto out; + } + } + +@@ -1152,7 +1152,7 @@ rte_eal_init(int argc, char **argv) + rte_eal_init_alert("Cannot init logging."); + rte_errno = ENOMEM; + rte_atomic32_clear(&run_once); +- return -1; ++ goto out; + } + + #ifdef VFIO_PRESENT +@@ -1160,7 +1160,7 @@ rte_eal_init(int argc, char **argv) + rte_eal_init_alert("Cannot init VFIO"); + rte_errno = EAGAIN; + rte_atomic32_clear(&run_once); +- return -1; ++ goto out; + } + #endif + /* in secondary processes, memory init may allocate additional fbarrays +@@ -1170,13 +1170,13 @@ rte_eal_init(int argc, char **argv) + if (rte_eal_memzone_init() < 0) { + rte_eal_init_alert("Cannot init memzone"); + rte_errno = ENODEV; +- return -1; ++ goto out; + } + + if (rte_eal_memory_init() < 0) { + rte_eal_init_alert("Cannot init memory"); + rte_errno = ENOMEM; +- return -1; ++ goto out; + } + + /* the directories are locked during eal_hugepage_info_init */ +@@ -1297,6 +1297,10 @@ rte_eal_init(int argc, char **argv) + rte_option_init(); + + return fctret; ++ ++out: ++ eal_hugedirs_unlock(); ++ return -1; + } + + static int +-- +2.19.1 + diff --git a/0004-dpdk-master-core-donot-set-affinity-in-libstorage.patch b/0004-dpdk-master-core-donot-set-affinity-in-libstorage.patch new file mode 100644 index 0000000000000000000000000000000000000000..ce6ef10860d639169543484365f055425ad2d87a --- /dev/null +++ b/0004-dpdk-master-core-donot-set-affinity-in-libstorage.patch @@ -0,0 +1,73 @@ +From c2d29472c3ddd1b2d66f34ae4025c9e074913eaa Mon Sep 17 00:00:00 2001 +From: zhuhengbo +Date: Thu, 19 Mar 2020 17:38:13 +0800 +Subject: [PATCH] dpdk: master core donot set affinity in libstorage + +Signed-off-by: zhuhengbo +--- + lib/librte_eal/common/eal_private.h | 6 ++++++ + lib/librte_eal/linux/eal/eal.c | 12 ++++++++++++ + lib/librte_eal/linux/eal/eal_thread.c | 2 +- + 3 files changed, 19 insertions(+), 1 deletion(-) + +diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h +index 8a9d493..597fd02 100644 +--- a/lib/librte_eal/common/eal_private.h ++++ b/lib/librte_eal/common/eal_private.h +@@ -444,4 +444,10 @@ rte_option_usage(void); + uint64_t + eal_get_baseaddr(void); + ++/** ++ * Determine whether the master core needs to set affinity. ++ * The master thread in the LibStorage application cannot set affinity. ++ **/ ++bool ++eal_is_master_set_affinity(void); + #endif /* _EAL_PRIVATE_H_ */ +diff --git a/lib/librte_eal/linux/eal/eal.c b/lib/librte_eal/linux/eal/eal.c +index a3bb9c6..8bb1842 100644 +--- a/lib/librte_eal/linux/eal/eal.c ++++ b/lib/librte_eal/linux/eal/eal.c +@@ -103,6 +103,13 @@ static char runtime_dir[PATH_MAX]; + + static const char *default_runtime_dir = "/var/run"; + ++static bool master_set_affinity = true; ++bool ++eal_is_master_set_affinity(void) ++{ ++ return master_set_affinity; ++} ++ + int + eal_create_runtime_dir(void) + { +@@ -985,6 +992,11 @@ rte_eal_init(int argc, char **argv) + strlcpy(logid, p ? p + 1 : argv[0], sizeof(logid)); + thread_id = pthread_self(); + ++ /* Master thread don't set affinity in LibStorage application */ ++ if (strstr(logid, "LibStorage") != NULL) { ++ master_set_affinity = false; ++ } ++ + eal_reset_internal_config(&internal_config); + + /* set log level as early as possible */ +diff --git a/lib/librte_eal/linux/eal/eal_thread.c b/lib/librte_eal/linux/eal/eal_thread.c +index 379773b..5b06108 100644 +--- a/lib/librte_eal/linux/eal/eal_thread.c ++++ b/lib/librte_eal/linux/eal/eal_thread.c +@@ -84,7 +84,7 @@ void eal_thread_init_master(unsigned lcore_id) + RTE_PER_LCORE(_lcore_id) = lcore_id; + + /* set CPU affinity */ +- if (eal_thread_set_affinity() < 0) ++ if (eal_is_master_set_affinity() && eal_thread_set_affinity() < 0) + rte_panic("cannot set affinity\n"); + } + +-- +2.19.1 + diff --git a/0005-dpdk-change-the-log-level-in-prepare_numa.patch b/0005-dpdk-change-the-log-level-in-prepare_numa.patch new file mode 100644 index 0000000000000000000000000000000000000000..6adec3b3320bd9a7afb5285ed72fde32dcfc00ef --- /dev/null +++ b/0005-dpdk-change-the-log-level-in-prepare_numa.patch @@ -0,0 +1,28 @@ +From e970ca944126de31844a323b8e9e014ee2a9e128 Mon Sep 17 00:00:00 2001 +From: zhuhengbo +Date: Thu, 19 Mar 2020 17:44:24 +0800 +Subject: [PATCH] dpdk: change the log level in prepare_numa + +reason: prevent rushing logs + +Signed-off-by: zhuhengbo +--- + lib/librte_eal/linux/eal/eal_memalloc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/librte_eal/linux/eal/eal_memalloc.c b/lib/librte_eal/linux/eal/eal_memalloc.c +index af6d0d0..cad4934 100644 +--- a/lib/librte_eal/linux/eal/eal_memalloc.c ++++ b/lib/librte_eal/linux/eal/eal_memalloc.c +@@ -167,7 +167,7 @@ prepare_numa(int *oldpolicy, struct bitmask *oldmask, int socket_id) + RTE_LOG(DEBUG, EAL, "Trying to obtain current memory policy.\n"); + if (get_mempolicy(oldpolicy, oldmask->maskp, + oldmask->size + 1, 0, 0) < 0) { +- RTE_LOG(ERR, EAL, ++ RTE_LOG(DEBUG, EAL, + "Failed to get current mempolicy: %s. " + "Assuming MPOL_DEFAULT.\n", strerror(errno)); + *oldpolicy = MPOL_DEFAULT; +-- +2.19.1 + diff --git a/0006-dpdk-fix-dpdk-coredump-problem.patch b/0006-dpdk-fix-dpdk-coredump-problem.patch new file mode 100644 index 0000000000000000000000000000000000000000..57815d48bf5819646c1fb21103eec10d1137d845 --- /dev/null +++ b/0006-dpdk-fix-dpdk-coredump-problem.patch @@ -0,0 +1,35 @@ +From a78efd329d52e1adf813eb1b76352c2680b75961 Mon Sep 17 00:00:00 2001 +From: zhuhengbo +Date: Thu, 19 Mar 2020 17:49:53 +0800 +Subject: [PATCH] dpdk: modification summary + +Signed-off-by: zhuhengbo +--- + lib/librte_eal/linux/eal/eal_interrupts.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/lib/librte_eal/linux/eal/eal_interrupts.c b/lib/librte_eal/linux/eal/eal_interrupts.c +index 1955324..3d73cce 100644 +--- a/lib/librte_eal/linux/eal/eal_interrupts.c ++++ b/lib/librte_eal/linux/eal/eal_interrupts.c +@@ -1070,7 +1070,7 @@ eal_intr_thread_main(__rte_unused void *arg) + */ + if (epoll_ctl(pfd, EPOLL_CTL_ADD, intr_pipe.readfd, + &pipe_event) < 0) { +- rte_panic("Error adding fd to %d epoll_ctl, %s\n", ++ RTE_LOG(ERR, EAL, "Error adding fd to %d epoll_ctl, %s\n", + intr_pipe.readfd, strerror(errno)); + } + numfds++; +@@ -1089,7 +1089,7 @@ eal_intr_thread_main(__rte_unused void *arg) + */ + if (epoll_ctl(pfd, EPOLL_CTL_ADD, + src->intr_handle.fd, &ev) < 0){ +- rte_panic("Error adding fd %d epoll_ctl, %s\n", ++ RTE_LOG(ERR, EAL, "Error adding fd %d epoll_ctl, %s\n", + src->intr_handle.fd, strerror(errno)); + } + else +-- +2.19.1 + diff --git a/0007-dpdk-add-secure-compile-option-in-pmdinfogen-Makefil.patch b/0007-dpdk-add-secure-compile-option-in-pmdinfogen-Makefil.patch new file mode 100644 index 0000000000000000000000000000000000000000..0cd821002563e34925d106d8c67bf4f97d982d4e --- /dev/null +++ b/0007-dpdk-add-secure-compile-option-in-pmdinfogen-Makefil.patch @@ -0,0 +1,26 @@ +From e7c97339d38f9d2655ca7834a99cc95b7427dd5c Mon Sep 17 00:00:00 2001 +From: zhuhengbo +Date: Thu, 19 Mar 2020 17:53:22 +0800 +Subject: [PATCH] dpdk: add secure compile option in pmdinfogen Makefile + +Signed-off-by: zhuhengbo +--- + buildtools/pmdinfogen/Makefile | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/buildtools/pmdinfogen/Makefile b/buildtools/pmdinfogen/Makefile +index a97a764..af41c74 100644 +--- a/buildtools/pmdinfogen/Makefile ++++ b/buildtools/pmdinfogen/Makefile +@@ -15,6 +15,8 @@ HOSTAPP = dpdk-pmdinfogen + SRCS-y += pmdinfogen.c + + HOST_CFLAGS += $(HOST_WERROR_FLAGS) -g ++HOST_CFLAGS += -fPIE -fPIC -fstack-protector-strong -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror + HOST_CFLAGS += -I$(RTE_OUTPUT)/include + ++HOST_LDFLAGS += -Wl,-z,relro,-z,now -pie + include $(RTE_SDK)/mk/rte.hostapp.mk +-- +2.19.1 + diff --git a/0008-dpdk-fix-cpu-flag-error-in-Intel-R-Xeon-R-CPU-E5-262.patch b/0008-dpdk-fix-cpu-flag-error-in-Intel-R-Xeon-R-CPU-E5-262.patch new file mode 100644 index 0000000000000000000000000000000000000000..170eb50b76553cd1db63c9556de7acbaab95fdee --- /dev/null +++ b/0008-dpdk-fix-cpu-flag-error-in-Intel-R-Xeon-R-CPU-E5-262.patch @@ -0,0 +1,92 @@ +From 145e9a29777cc660bd031670a7aeb8a4d3cb88a8 Mon Sep 17 00:00:00 2001 +From: zhuhengbo +Date: Thu, 30 Apr 2020 02:53:08 -0400 +Subject: [PATCH] dpdk: fix cpu flag error in Intel(R) Xeon(R) CPU E5-2620 v3 @ + 2.40GHz + +Signed-off-by: zhuhengbo +--- + config/defconfig_x86_64-cpu_v2-linux-gcc | 1 + + config/defconfig_x86_64-cpu_v2-linuxapp-gcc | 14 ++++++++ + mk/machine/cpu_v2/rte.vars.mk | 39 +++++++++++++++++++++ + 3 files changed, 54 insertions(+) + create mode 120000 config/defconfig_x86_64-cpu_v2-linux-gcc + create mode 100644 config/defconfig_x86_64-cpu_v2-linuxapp-gcc + create mode 100644 mk/machine/cpu_v2/rte.vars.mk + +diff --git a/config/defconfig_x86_64-cpu_v2-linux-gcc b/config/defconfig_x86_64-cpu_v2-linux-gcc +new file mode 120000 +index 0000000..64f21b6 +--- /dev/null ++++ b/config/defconfig_x86_64-cpu_v2-linux-gcc +@@ -0,0 +1 @@ ++defconfig_x86_64-cpu_v2-linuxapp-gcc +\ No newline at end of file +diff --git a/config/defconfig_x86_64-cpu_v2-linuxapp-gcc b/config/defconfig_x86_64-cpu_v2-linuxapp-gcc +new file mode 100644 +index 0000000..2748e30 +--- /dev/null ++++ b/config/defconfig_x86_64-cpu_v2-linuxapp-gcc +@@ -0,0 +1,14 @@ ++# SPDX-License-Identifier: BSD-3-Clause ++# Copyright(c) 2010-2014 Intel Corporation ++ ++#include "common_linux" ++ ++CONFIG_RTE_MACHINE="cpu_v2" ++ ++CONFIG_RTE_ARCH="x86_64" ++CONFIG_RTE_ARCH_X86_64=y ++CONFIG_RTE_ARCH_X86=y ++CONFIG_RTE_ARCH_64=y ++ ++CONFIG_RTE_TOOLCHAIN="gcc" ++CONFIG_RTE_TOOLCHAIN_GCC=y +diff --git a/mk/machine/cpu_v2/rte.vars.mk b/mk/machine/cpu_v2/rte.vars.mk +new file mode 100644 +index 0000000..ffa7d3f +--- /dev/null ++++ b/mk/machine/cpu_v2/rte.vars.mk +@@ -0,0 +1,39 @@ ++# SPDX-License-Identifier: BSD-3-Clause ++# Copyright(c) 2010-2014 Intel Corporation ++ ++# ++# machine: ++# ++# - can define ARCH variable (overridden by cmdline value) ++# - can define CROSS variable (overridden by cmdline value) ++# - define MACHINE_CFLAGS variable (overridden by cmdline value) ++# - define MACHINE_LDFLAGS variable (overridden by cmdline value) ++# - define MACHINE_ASFLAGS variable (overridden by cmdline value) ++# - can define CPU_CFLAGS variable (overridden by cmdline value) that ++# overrides the one defined in arch. ++# - can define CPU_LDFLAGS variable (overridden by cmdline value) that ++# overrides the one defined in arch. ++# - can define CPU_ASFLAGS variable (overridden by cmdline value) that ++# overrides the one defined in arch. ++# - may override any previously defined variable ++# ++ ++# ARCH = ++# CROSS = ++# MACHINE_CFLAGS = ++# MACHINE_LDFLAGS = ++# MACHINE_ASFLAGS = ++# CPU_CFLAGS = ++# CPU_LDFLAGS = ++# CPU_ASFLAGS = ++ ++MACHINE_CFLAGS = -march=core-avx-i ++ ++# On FreeBSD systems, sometimes the correct CPU type is not picked up. ++# To get everything to compile, we need SSE4.2 support, so check if that is ++# reported by compiler. If not, check if the CPU actually supports it, and if ++# so, set the compilation target to be a corei7, minimum target with SSE4.2. ++SSE42_SUPPORT=$(shell $(CC) -march=native -dM -E - +Date: Sat, 6 Nov 2021 20:10:49 +0800 +Subject: [PATCH] 0009-dpdk-add-support-gazelle + +--- + config/common_base | 3 +- + config/rte_config.h | 3 +- + lib/librte_eal/common/eal_common_fbarray.c | 106 ++++++- + lib/librte_eal/common/eal_common_memory.c | 88 ++++-- + lib/librte_eal/common/eal_common_options.c | 46 ++- + lib/librte_eal/common/eal_filesystem.h | 56 +++- + lib/librte_eal/common/eal_internal_cfg.h | 2 + + lib/librte_eal/common/eal_memalloc.h | 7 + + lib/librte_eal/common/eal_options.h | 7 +- + lib/librte_eal/common/eal_private.h | 25 +- + lib/librte_eal/common/include/rte_eal.h | 10 +- + lib/librte_eal/common/include/rte_fbarray.h | 7 + + lib/librte_eal/common/include/rte_memory.h | 20 +- + lib/librte_eal/linux/eal/eal.c | 277 ++++++++++++++++--- + lib/librte_eal/linux/eal/eal_hugepage_info.c | 2 +- + lib/librte_eal/linux/eal/eal_memalloc.c | 127 +++++++-- + lib/librte_eal/linux/eal/eal_memory.c | 171 ++++++++++-- + lib/librte_ring/rte_ring.h | 75 +++++ + 18 files changed, 903 insertions(+), 129 deletions(-) + +diff --git a/config/common_base b/config/common_base +index 7dec7ed..57b1349 100644 +--- a/config/common_base ++++ b/config/common_base +@@ -95,7 +95,8 @@ CONFIG_RTE_MAX_MEMSEG_PER_TYPE=32768 + CONFIG_RTE_MAX_MEM_MB_PER_TYPE=131072 + # global maximum usable amount of VA, in megabytes + CONFIG_RTE_MAX_MEM_MB=524288 +-CONFIG_RTE_MAX_MEMZONE=2560 ++CONFIG_RTE_MAX_MEMZONE=65535 ++CONFIG_RTE_MAX_SECONDARY=256 + CONFIG_RTE_MAX_TAILQ=32 + CONFIG_RTE_ENABLE_ASSERT=n + CONFIG_RTE_LOG_DP_LEVEL=RTE_LOG_INFO +diff --git a/config/rte_config.h b/config/rte_config.h +index d30786b..b848b1c 100644 +--- a/config/rte_config.h ++++ b/config/rte_config.h +@@ -39,7 +39,8 @@ + #define RTE_MAX_MEMSEG_PER_TYPE 32768 + #define RTE_MAX_MEM_MB_PER_TYPE 65536 + #define RTE_MAX_MEM_MB 524288 +-#define RTE_MAX_MEMZONE 2560 ++#define RTE_MAX_MEMZONE 65535 ++#define RTE_MAX_SECONDARY 256 + #define RTE_MAX_TAILQ 32 + #define RTE_LOG_DP_LEVEL RTE_LOG_INFO + #define RTE_BACKTRACE 1 +diff --git a/lib/librte_eal/common/eal_common_fbarray.c b/lib/librte_eal/common/eal_common_fbarray.c +index 1312f93..b611ffa 100644 +--- a/lib/librte_eal/common/eal_common_fbarray.c ++++ b/lib/librte_eal/common/eal_common_fbarray.c +@@ -833,8 +833,9 @@ rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len, + return -1; + } + +-int +-rte_fbarray_attach(struct rte_fbarray *arr) ++static int ++__rte_fbarray_attach(struct rte_fbarray *arr, const char *runtime_dir, ++ const struct internal_config *internal_cfg) + { + struct mem_area *ma = NULL, *tmp = NULL; + size_t page_sz, mmap_len; +@@ -870,13 +871,15 @@ rte_fbarray_attach(struct rte_fbarray *arr) + + mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len); + +- /* check the tailq - maybe user has already mapped this address space */ +- rte_spinlock_lock(&mem_area_lock); ++ if (!internal_cfg->pri_and_sec) { ++ /* check the tailq - maybe user has already mapped this address space */ ++ rte_spinlock_lock(&mem_area_lock); + +- TAILQ_FOREACH(tmp, &mem_area_tailq, next) { +- if (overlap(tmp, arr->data, mmap_len)) { +- rte_errno = EEXIST; +- goto fail; ++ TAILQ_FOREACH(tmp, &mem_area_tailq, next) { ++ if (overlap(tmp, arr->data, mmap_len)) { ++ rte_errno = EEXIST; ++ goto fail; ++ } + } + } + +@@ -886,7 +889,7 @@ rte_fbarray_attach(struct rte_fbarray *arr) + if (data == NULL) + goto fail; + +- eal_get_fbarray_path(path, sizeof(path), arr->name); ++ eal_sec_get_fbarray_path(path, sizeof(path), arr->name, runtime_dir); + + fd = open(path, O_RDWR); + if (fd < 0) { +@@ -903,16 +906,27 @@ rte_fbarray_attach(struct rte_fbarray *arr) + if (resize_and_map(fd, data, mmap_len)) + goto fail; + ++ if (internal_cfg->pri_and_sec) { ++ if (flock(fd, LOCK_UN)) { ++ rte_errno = errno; ++ goto fail; ++ } ++ close(fd); ++ fd = -1; ++ } ++ + /* store our new memory area */ + ma->addr = data; + ma->fd = fd; /* keep fd until detach/destroy */ + ma->len = mmap_len; + +- TAILQ_INSERT_TAIL(&mem_area_tailq, ma, next); ++ if (!internal_cfg->pri_and_sec) { ++ TAILQ_INSERT_TAIL(&mem_area_tailq, ma, next); + +- /* we're done */ ++ /* we're done */ + +- rte_spinlock_unlock(&mem_area_lock); ++ rte_spinlock_unlock(&mem_area_lock); ++ } + return 0; + fail: + if (data) +@@ -924,6 +938,30 @@ rte_fbarray_attach(struct rte_fbarray *arr) + return -1; + } + ++int ++rte_fbarray_attach(struct rte_fbarray *arr) ++{ ++ return __rte_fbarray_attach(arr, rte_eal_get_runtime_dir(), &internal_config); ++} ++ ++int ++rte_sec_fbarray_attach(struct rte_fbarray *arr, ++ const int switch_pri_and_sec, const int sec_idx) ++{ ++ struct internal_config *internal_cfg = NULL; ++ char *runtime_dir = NULL; ++ ++ if (!switch_pri_and_sec) { ++ runtime_dir = rte_eal_get_runtime_dir(); ++ internal_cfg = &internal_config; ++ } else { ++ runtime_dir = rte_eal_sec_get_runtime_dir(sec_idx); ++ internal_cfg = rte_eal_sec_get_internal_config(sec_idx); ++ } ++ ++ return __rte_fbarray_attach(arr, runtime_dir, internal_cfg); ++} ++ + int + rte_fbarray_detach(struct rte_fbarray *arr) + { +@@ -1063,6 +1101,50 @@ rte_fbarray_destroy(struct rte_fbarray *arr) + return ret; + } + ++int ++rte_sec_fbarray_destroy(struct rte_fbarray *arr, ++ const int sec_idx) ++{ ++ int fd, ret; ++ size_t mmap_len; ++ char path[PATH_MAX]; ++ ++ if (arr == NULL) { ++ rte_errno = EINVAL; ++ return -1; ++ } ++ ++ size_t page_sz = sysconf(_SC_PAGESIZE); ++ ++ if (page_sz == (size_t)-1) ++ return -1; ++ ++ mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len); ++ munmap(arr->data, mmap_len); ++ ++ /* try deleting the file */ ++ eal_sec_get_fbarray_path(path, sizeof(path), arr->name, rte_eal_sec_get_runtime_dir(sec_idx)); ++ ++ fd = open(path, O_RDONLY); ++ if (fd < 0) { ++ RTE_LOG(ERR, EAL, "Could not open fbarray file: %s\n", ++ strerror(errno)); ++ return -1; ++ } ++ if (flock(fd, LOCK_EX | LOCK_NB)) { ++ RTE_LOG(DEBUG, EAL, "Cannot destroy fbarray - another process is using it\n"); ++ rte_errno = EBUSY; ++ ret = -1; ++ } else { ++ ret = 0; ++ unlink(path); ++ memset(arr, 0, sizeof(*arr)); ++ } ++ close(fd); ++ ++ return ret; ++} ++ + void * + rte_fbarray_get(const struct rte_fbarray *arr, unsigned int idx) + { +diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c +index 4a9cc1f..842fc9b 100644 +--- a/lib/librte_eal/common/eal_common_memory.c ++++ b/lib/librte_eal/common/eal_common_memory.c +@@ -206,9 +206,9 @@ virt2memseg(const void *addr, const struct rte_memseg_list *msl) + } + + static struct rte_memseg_list * +-virt2memseg_list(const void *addr) ++virt2memseg_list(const void *addr, const struct rte_config *rte_cfg) + { +- struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; ++ struct rte_mem_config *mcfg = rte_cfg->mem_config; + struct rte_memseg_list *msl; + int msl_idx; + +@@ -230,7 +230,13 @@ virt2memseg_list(const void *addr) + struct rte_memseg_list * + rte_mem_virt2memseg_list(const void *addr) + { +- return virt2memseg_list(addr); ++ return virt2memseg_list(addr, rte_eal_get_configuration()); ++} ++ ++struct rte_memseg_list * ++rte_sec_mem_virt2memseg_list(const void *addr, const struct rte_config *rte_cfg) ++{ ++ return virt2memseg_list(addr, rte_cfg); + } + + struct virtiova { +@@ -283,11 +289,25 @@ rte_mem_iova2virt(rte_iova_t iova) + return vi.virt; + } + ++static struct rte_memseg * ++__rte_mem_virt2memseg(const void *addr, const struct rte_memseg_list *msl, ++ const struct rte_config *rte_cfg) ++{ ++ return virt2memseg(addr, msl != NULL ? msl : ++ rte_sec_mem_virt2memseg_list(addr, rte_cfg)); ++} ++ + struct rte_memseg * + rte_mem_virt2memseg(const void *addr, const struct rte_memseg_list *msl) + { +- return virt2memseg(addr, msl != NULL ? msl : +- rte_mem_virt2memseg_list(addr)); ++ return __rte_mem_virt2memseg(addr, msl, rte_eal_get_configuration()); ++} ++ ++struct rte_memseg * ++rte_sec_mem_virt2memseg(const void *addr, const struct rte_memseg_list *msl, ++ const struct rte_config *rte_cfg) ++{ ++ return __rte_mem_virt2memseg(addr, msl, rte_cfg); + } + + static int +@@ -889,10 +909,14 @@ rte_extmem_detach(void *va_addr, size_t len) + } + + /* init memory subsystem */ +-int +-rte_eal_memory_init(void) ++static int ++__rte_eal_memory_init(__attribute__((__unused__)) const char *runtime_dir, ++ const struct internal_config *internal_cfg, ++ struct rte_config *rte_cfg, ++ const int switch_pri_and_sec, ++ const int sec_idx) + { +- struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; ++ struct rte_mem_config *mcfg = rte_cfg->mem_config; + int retval; + RTE_LOG(DEBUG, EAL, "Setting up physically contiguous memory...\n"); + +@@ -900,25 +924,57 @@ rte_eal_memory_init(void) + return -1; + + /* lock mem hotplug here, to prevent races while we init */ +- rte_mcfg_mem_read_lock(); ++ rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); + +- if (rte_eal_memseg_init() < 0) ++ if (rte_eal_memseg_init(switch_pri_and_sec, sec_idx) < 0) + goto fail; + +- if (eal_memalloc_init() < 0) +- goto fail; ++ if (!internal_cfg->pri_and_sec) ++ if (eal_memalloc_init() < 0) ++ goto fail; + +- retval = rte_eal_process_type() == RTE_PROC_PRIMARY ? ++ retval = rte_cfg->process_type == RTE_PROC_PRIMARY ? + rte_eal_hugepage_init() : +- rte_eal_hugepage_attach(); ++ rte_eal_hugepage_attach(switch_pri_and_sec, sec_idx); + if (retval < 0) + goto fail; + +- if (internal_config.no_shconf == 0 && rte_eal_memdevice_init() < 0) ++ if (internal_cfg->no_shconf == 0 && rte_eal_memdevice_init() < 0) + goto fail; + + return 0; + fail: +- rte_mcfg_mem_read_unlock(); ++ rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); + return -1; + } ++ ++int ++rte_eal_memory_init(void) ++{ ++ const int unused_idx = -1; ++ ++ return __rte_eal_memory_init(rte_eal_get_runtime_dir(), ++ &internal_config, rte_eal_get_configuration(), ++ false, unused_idx); ++} ++ ++int ++rte_eal_sec_memory_init(const int sec_idx) ++{ ++ int ret; ++ struct rte_config *rte_cfg = rte_eal_sec_get_configuration(sec_idx); ++ ++ ret = __rte_eal_memory_init(rte_eal_sec_get_runtime_dir(sec_idx), ++ rte_eal_sec_get_internal_config(sec_idx), rte_cfg, ++ true, sec_idx); ++ ++ rte_rwlock_read_unlock(&rte_cfg->mem_config->memory_hotplug_lock); ++ ++ return ret; ++} ++ ++int ++rte_eal_sec_memory_cleanup(const int sec_idx) ++{ ++ return eal_memalloc_destroy(sec_idx); ++} +diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c +index a7f9c5f..34f4199 100644 +--- a/lib/librte_eal/common/eal_common_options.c ++++ b/lib/librte_eal/common/eal_common_options.c +@@ -82,6 +82,7 @@ eal_long_options[] = { + {OPT_LEGACY_MEM, 0, NULL, OPT_LEGACY_MEM_NUM }, + {OPT_SINGLE_FILE_SEGMENTS, 0, NULL, OPT_SINGLE_FILE_SEGMENTS_NUM}, + {OPT_MATCH_ALLOCATIONS, 0, NULL, OPT_MATCH_ALLOCATIONS_NUM}, ++ {OPT_MAP_PERFECT, 0, NULL, OPT_MAP_PERFECT_NUM }, + {0, 0, NULL, 0 } + }; + +@@ -221,6 +222,7 @@ eal_reset_internal_config(struct internal_config *internal_cfg) + internal_cfg->user_mbuf_pool_ops_name = NULL; + CPU_ZERO(&internal_cfg->ctrl_cpuset); + internal_cfg->init_complete = 0; ++ internal_cfg->map_perfect = 0; + } + + static int +@@ -1097,7 +1099,7 @@ eal_parse_iova_mode(const char *name) + } + + static int +-eal_parse_base_virtaddr(const char *arg) ++eal_parse_base_virtaddr(const char *arg, struct internal_config *conf) + { + char *end; + uint64_t addr; +@@ -1120,7 +1122,7 @@ eal_parse_base_virtaddr(const char *arg) + * it can align to 2MB for x86. So this alignment can also be used + * on x86 and other architectures. + */ +- internal_config.base_virtaddr = ++ conf->base_virtaddr = + RTE_PTR_ALIGN_CEIL((uintptr_t)addr, (size_t)RTE_PGSIZE_16M); + + return 0; +@@ -1440,7 +1442,7 @@ eal_parse_common_option(int opt, const char *optarg, + } + break; + case OPT_BASE_VIRTADDR_NUM: +- if (eal_parse_base_virtaddr(optarg) < 0) { ++ if (eal_parse_base_virtaddr(optarg, conf) < 0) { + RTE_LOG(ERR, EAL, "invalid parameter for --" + OPT_BASE_VIRTADDR "\n"); + return -1; +@@ -1553,11 +1555,33 @@ eal_adjust_config(struct internal_config *internal_cfg) + } + + int +-eal_check_common_options(struct internal_config *internal_cfg) ++eal_sec_adjust_config(struct internal_config *internal_cfg) + { +- struct rte_config *cfg = rte_eal_get_configuration(); ++ struct internal_config *internal_cfg_head; ++ internal_cfg->process_type = RTE_PROC_SECONDARY; ++ ++ internal_cfg_head = rte_eal_sec_get_internal_config(0); ++ for (int i = 0; i < RTE_MAX_SECONDARY; ++i) { ++ if (!internal_cfg_head[i].pri_and_sec) ++ continue; ++ if (internal_cfg == &internal_cfg_head[i]) ++ continue; ++ if (!strcmp(internal_cfg_head[i].hugefile_prefix, internal_cfg->hugefile_prefix)) ++ return -EALREADY; ++ } ++ ++ for (int i = 0; i < RTE_MAX_NUMA_NODES; i++) ++ internal_cfg->memory += internal_cfg->socket_mem[i]; ++ ++ return 0; ++} + +- if (cfg->lcore_role[cfg->master_lcore] != ROLE_RTE) { ++int ++eal_check_common_options(struct internal_config *internal_cfg, ++ struct rte_config *cfg) ++{ ++ if (!internal_cfg->pri_and_sec && ++ cfg->lcore_role[cfg->master_lcore] != ROLE_RTE) { + RTE_LOG(ERR, EAL, "Master lcore is not enabled for DPDK\n"); + return -1; + } +@@ -1602,7 +1626,7 @@ eal_check_common_options(struct internal_config *internal_cfg) + "be specified together with --"OPT_NO_HUGE"\n"); + return -1; + } +- if (internal_config.force_socket_limits && internal_config.legacy_mem) { ++ if (internal_cfg->force_socket_limits && internal_config.legacy_mem) { + RTE_LOG(ERR, EAL, "Option --"OPT_SOCKET_LIMIT + " is only supported in non-legacy memory mode\n"); + } +@@ -1635,6 +1659,14 @@ eal_check_common_options(struct internal_config *internal_cfg) + "-m or --"OPT_SOCKET_MEM"\n"); + } + ++ if (internal_cfg->map_perfect || internal_cfg->pri_and_sec) { ++ if (!internal_cfg->legacy_mem || internal_cfg->in_memory || internal_cfg->no_hugetlbfs) { ++ RTE_LOG(ERR, EAL, "Option --"OPT_LEGACY_MEM" or "OPT_IN_MEMORY" or "OPT_NO_HUGE" " ++ "is not compatible with --"OPT_MAP_PERFECT" and "OPT_PRI_AND_SEC"\n"); ++ return -1; ++ } ++ } ++ + return 0; + } + +diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h +index 5d21f07..e65a183 100644 +--- a/lib/librte_eal/common/eal_filesystem.h ++++ b/lib/librte_eal/common/eal_filesystem.h +@@ -23,7 +23,8 @@ + + /* sets up platform-specific runtime data dir */ + int +-eal_create_runtime_dir(void); ++eal_create_runtime_dir(char *runtime_dir, const int buflen, ++ const struct internal_config *conf); + + int + eal_clean_runtime_dir(void); +@@ -34,15 +35,27 @@ eal_get_hugefile_prefix(void); + + #define RUNTIME_CONFIG_FNAME "config" + static inline const char * +-eal_runtime_config_path(void) ++__eal_runtime_config_path(const char *runtime_dir) + { + static char buffer[PATH_MAX]; /* static so auto-zeroed */ + +- snprintf(buffer, sizeof(buffer), "%s/%s", rte_eal_get_runtime_dir(), ++ snprintf(buffer, sizeof(buffer), "%s/%s", runtime_dir, + RUNTIME_CONFIG_FNAME); + return buffer; + } + ++static inline const char * ++eal_runtime_config_path(void) ++{ ++ return __eal_runtime_config_path(rte_eal_get_runtime_dir()); ++} ++ ++static inline const char * ++eal_sec_runtime_config_path(const char *runtime_dir) ++{ ++ return __eal_runtime_config_path(runtime_dir); ++} ++ + /** Path of primary/secondary communication unix socket file. */ + #define MP_SOCKET_FNAME "mp_socket" + static inline const char * +@@ -57,12 +70,29 @@ eal_mp_socket_path(void) + + #define FBARRAY_NAME_FMT "%s/fbarray_%s" + static inline const char * +-eal_get_fbarray_path(char *buffer, size_t buflen, const char *name) { +- snprintf(buffer, buflen, FBARRAY_NAME_FMT, rte_eal_get_runtime_dir(), ++__eal_get_fbarray_path(char *buffer, size_t buflen, const char *name, ++ const char *runtime_dir) ++{ ++ snprintf(buffer, buflen, FBARRAY_NAME_FMT, runtime_dir, + name); + return buffer; + } + ++static inline const char * ++eal_get_fbarray_path(char *buffer, size_t buflen, const char *name) ++{ ++ return __eal_get_fbarray_path(buffer, buflen, name, ++ rte_eal_get_runtime_dir()); ++} ++ ++static inline const char * ++eal_sec_get_fbarray_path(char *buffer, size_t buflen, ++ const char *name, const char *runtime_dir) ++{ ++ return __eal_get_fbarray_path(buffer, buflen, name, ++ runtime_dir); ++} ++ + /** Path of hugepage info file. */ + #define HUGEPAGE_INFO_FNAME "hugepage_info" + static inline const char * +@@ -78,15 +108,27 @@ eal_hugepage_info_path(void) + /** Path of hugepage data file. */ + #define HUGEPAGE_DATA_FNAME "hugepage_data" + static inline const char * +-eal_hugepage_data_path(void) ++__eal_hugepage_data_path(const char *runtime_dir) + { + static char buffer[PATH_MAX]; /* static so auto-zeroed */ + +- snprintf(buffer, sizeof(buffer), "%s/%s", rte_eal_get_runtime_dir(), ++ snprintf(buffer, sizeof(buffer), "%s/%s", runtime_dir, + HUGEPAGE_DATA_FNAME); + return buffer; + } + ++static inline const char * ++eal_hugepage_data_path(void) ++{ ++ return __eal_hugepage_data_path(rte_eal_get_runtime_dir()); ++} ++ ++static inline const char * ++eal_sec_hugepage_data_path(const char *runtime_dir) ++{ ++ return __eal_hugepage_data_path(runtime_dir); ++} ++ + /** String format for hugepage map files. */ + #define HUGEFILE_FMT "%s/%smap_%d" + static inline const char * +diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h +index a42f349..50d5da1 100644 +--- a/lib/librte_eal/common/eal_internal_cfg.h ++++ b/lib/librte_eal/common/eal_internal_cfg.h +@@ -82,6 +82,8 @@ struct internal_config { + rte_cpuset_t ctrl_cpuset; /**< cpuset for ctrl threads */ + volatile unsigned int init_complete; + /**< indicates whether EAL has completed initialization */ ++ volatile unsigned pri_and_sec; ++ volatile unsigned map_perfect; + }; + extern struct internal_config internal_config; /**< Global EAL configuration. */ + +diff --git a/lib/librte_eal/common/eal_memalloc.h b/lib/librte_eal/common/eal_memalloc.h +index e953cd8..d5ea6e1 100644 +--- a/lib/librte_eal/common/eal_memalloc.h ++++ b/lib/librte_eal/common/eal_memalloc.h +@@ -83,6 +83,10 @@ eal_memalloc_get_seg_fd(int list_idx, int seg_idx); + int + eal_memalloc_set_seg_fd(int list_idx, int seg_idx, int fd); + ++int ++eal_sec_memalloc_set_seg_fd(int list_idx, int seg_idx, int fd, ++ const int switch_pri_and_sec, const int sec_idx); ++ + /* returns 0 or -errno */ + int + eal_memalloc_set_seg_list_fd(int list_idx, int fd); +@@ -93,4 +97,7 @@ eal_memalloc_get_seg_fd_offset(int list_idx, int seg_idx, size_t *offset); + int + eal_memalloc_init(void); + ++int ++eal_memalloc_destroy(const int sec_idx); ++ + #endif /* EAL_MEMALLOC_H */ +diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h +index 9855429..b42d41d 100644 +--- a/lib/librte_eal/common/eal_options.h ++++ b/lib/librte_eal/common/eal_options.h +@@ -69,6 +69,10 @@ enum { + OPT_IOVA_MODE_NUM, + #define OPT_MATCH_ALLOCATIONS "match-allocations" + OPT_MATCH_ALLOCATIONS_NUM, ++#define OPT_PRI_AND_SEC "pri-and-sec" ++ OPT_PRI_AND_SEC_NUM, ++#define OPT_MAP_PERFECT "map-perfect" ++ OPT_MAP_PERFECT_NUM, + OPT_LONG_MAX_NUM + }; + +@@ -79,8 +83,9 @@ int eal_parse_common_option(int opt, const char *argv, + struct internal_config *conf); + int eal_option_device_parse(void); + int eal_adjust_config(struct internal_config *internal_cfg); ++int eal_sec_adjust_config(struct internal_config *internal_cfg); + int eal_cleanup_config(struct internal_config *internal_cfg); +-int eal_check_common_options(struct internal_config *internal_cfg); ++int eal_check_common_options(struct internal_config *internal_cfg, struct rte_config *cfg); + void eal_common_usage(void); + enum rte_proc_type_t eal_proc_type_detect(void); + int eal_plugins_init(void); +diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h +index 597fd02..1fd32a9 100644 +--- a/lib/librte_eal/common/eal_private.h ++++ b/lib/librte_eal/common/eal_private.h +@@ -113,7 +113,8 @@ int rte_eal_cpu_init(void); + * @return + * 0 on success, negative on error + */ +-int rte_eal_memseg_init(void); ++//int rte_eal_memseg_init(void); ++int rte_eal_memseg_init(const int switch_pri_and_sec, const int sec_idx); + + /** + * Map memory +@@ -127,6 +128,9 @@ int rte_eal_memseg_init(void); + */ + int rte_eal_memory_init(void); + ++int rte_eal_sec_memory_init(const int sec_idx); ++int rte_eal_sec_memory_cleanup(const int sec_idx); ++ + /** + * Configure timers + * +@@ -291,7 +295,8 @@ int rte_eal_hugepage_init(void); + * + * This function is private to the EAL. + */ +-int rte_eal_hugepage_attach(void); ++//int rte_eal_hugepage_attach(void); ++int rte_eal_hugepage_attach(const int switch_pri_and_sec, const int sec_idx); + + /** + * Find a bus capable of identifying a device. +@@ -450,4 +455,20 @@ eal_get_baseaddr(void); + **/ + bool + eal_is_master_set_affinity(void); ++ ++ ++/****** APIs for libnet ******/ ++#include ++ ++struct rte_memseg * ++rte_sec_mem_virt2memseg(const void *addr, const struct rte_memseg_list *msl, ++ const struct rte_config *rte_cfg); ++ ++struct rte_memseg_list * ++rte_sec_mem_virt2memseg_list(const void *addr, const struct rte_config *rte_cfg); ++ ++int ++rte_sec_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg, ++ struct rte_config *rte_cfg); ++ + #endif /* _EAL_PRIVATE_H_ */ +diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h +index 2f9ed29..ac1dc1d 100644 +--- a/lib/librte_eal/common/include/rte_eal.h ++++ b/lib/librte_eal/common/include/rte_eal.h +@@ -485,9 +485,17 @@ rte_eal_mbuf_user_pool_ops(void); + * @return + * The runtime directory path of DPDK + */ +-const char * ++char * + rte_eal_get_runtime_dir(void); + ++/****** APIs for libnet ******/ ++char *rte_eal_sec_get_runtime_dir(const int sec_idx); ++struct rte_config *rte_eal_sec_get_configuration(const int sec_idx); ++struct internal_config *rte_eal_sec_get_internal_config(const int sec_idx); ++ ++int rte_eal_sec_attach(int argc, char **argv); ++int rte_eal_sec_detach(const char *file_prefix, int length); ++ + #ifdef __cplusplus + } + #endif +diff --git a/lib/librte_eal/common/include/rte_fbarray.h b/lib/librte_eal/common/include/rte_fbarray.h +index 6dccdbe..dffee1e 100644 +--- a/lib/librte_eal/common/include/rte_fbarray.h ++++ b/lib/librte_eal/common/include/rte_fbarray.h +@@ -101,6 +101,10 @@ __rte_experimental + int + rte_fbarray_attach(struct rte_fbarray *arr); + ++int ++rte_sec_fbarray_attach(struct rte_fbarray *arr, ++ const int switch_pri_and_sec, const int sec_idx); ++ + + /** + * Deallocate resources for an already allocated and correctly set up +@@ -123,6 +127,9 @@ __rte_experimental + int + rte_fbarray_destroy(struct rte_fbarray *arr); + ++int ++rte_sec_fbarray_destroy(struct rte_fbarray *arr, ++ const int sec_idx); + + /** + * Deallocate resources for an already allocated and correctly set up +diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h +index 3d8d0bd..4dd6daa 100644 +--- a/lib/librte_eal/common/include/rte_memory.h ++++ b/lib/librte_eal/common/include/rte_memory.h +@@ -152,7 +152,12 @@ rte_mem_iova2virt(rte_iova_t iova); + __rte_experimental + struct rte_memseg * + rte_mem_virt2memseg(const void *virt, const struct rte_memseg_list *msl); +- ++/* ++__rte_experimental ++struct rte_memseg * ++rte_sec_mem_virt2memseg(const void *addr, const struct rte_memseg_list *msl, ++ const struct rte_config *rte_cfg); ++*/ + /** + * Get memseg list corresponding to virtual memory address. + * +@@ -164,7 +169,11 @@ rte_mem_virt2memseg(const void *virt, const struct rte_memseg_list *msl); + __rte_experimental + struct rte_memseg_list * + rte_mem_virt2memseg_list(const void *virt); +- ++/* ++__rte_experimental ++struct rte_memseg_list * ++rte_sec_mem_virt2memseg_list(const void *addr, const struct rte_config *rte_cfg); ++*/ + /** + * Memseg walk function prototype. + * +@@ -282,7 +291,12 @@ rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg); + __rte_experimental + int + rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg); +- ++/* ++__rte_experimental ++int ++rte_sec_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg, ++ struct rte_config *rte_cfg); ++*/ + /** + * Walk each VA-contiguous area without performing any locking. + * +diff --git a/lib/librte_eal/linux/eal/eal.c b/lib/librte_eal/linux/eal/eal.c +index 8bb1842..a1f2b42 100644 +--- a/lib/librte_eal/linux/eal/eal.c ++++ b/lib/librte_eal/linux/eal/eal.c +@@ -103,6 +103,12 @@ static char runtime_dir[PATH_MAX]; + + static const char *default_runtime_dir = "/var/run"; + ++/****** APIs for libnet ******/ ++static unsigned int sec_count = 0; ++static struct rte_config sec_rte_config[RTE_MAX_SECONDARY]; ++static struct internal_config sec_internal_config[RTE_MAX_SECONDARY]; ++static char sec_runtime_dir[RTE_MAX_SECONDARY][PATH_MAX]; ++ + static bool master_set_affinity = true; + bool + eal_is_master_set_affinity(void) +@@ -111,7 +117,8 @@ eal_is_master_set_affinity(void) + } + + int +-eal_create_runtime_dir(void) ++eal_create_runtime_dir(char *runtime_dir, const int buflen, ++ const struct internal_config *conf) + { + const char *directory = default_runtime_dir; + const char *xdg_runtime_dir = getenv("XDG_RUNTIME_DIR"); +@@ -134,8 +141,8 @@ eal_create_runtime_dir(void) + } + + /* create prefix-specific subdirectory under DPDK runtime dir */ +- ret = snprintf(runtime_dir, sizeof(runtime_dir), "%s/%s", +- tmp, eal_get_hugefile_prefix()); ++ ret = snprintf(runtime_dir, buflen, "%s/%s", ++ tmp, conf->hugefile_prefix); + if (ret < 0 || ret == sizeof(runtime_dir)) { + RTE_LOG(ERR, EAL, "Error creating prefix-specific runtime path name\n"); + return -1; +@@ -246,12 +253,18 @@ eal_clean_runtime_dir(void) + return -1; + } + +-const char * ++char * + rte_eal_get_runtime_dir(void) + { + return runtime_dir; + } + ++char * ++rte_eal_sec_get_runtime_dir(const int sec_idx) ++{ ++ return sec_runtime_dir[sec_idx]; ++} ++ + /* Return user provided mbuf pool ops name */ + const char * + rte_eal_mbuf_user_pool_ops(void) +@@ -266,6 +279,18 @@ rte_eal_get_configuration(void) + return &rte_config; + } + ++struct rte_config * ++rte_eal_sec_get_configuration(const int sec_idx) ++{ ++ return &sec_rte_config[sec_idx]; ++} ++ ++struct internal_config * ++rte_eal_sec_get_internal_config(const int sec_idx) ++{ ++ return &sec_internal_config[sec_idx]; ++} ++ + enum rte_iova_mode + rte_eal_iova_mode(void) + { +@@ -395,18 +420,22 @@ rte_eal_config_create(void) + + /* attach to an existing shared memory config */ + static int +-rte_eal_config_attach(void) ++__rte_eal_config_attach(const int mmap_flags, int *mem_cfg_fd, ++ const char *runtime_dir, ++ const struct internal_config *internal_cfg, ++ struct rte_config *rte_cfg) + { + struct rte_mem_config *mem_config; ++ int mcfg_fd = *mem_cfg_fd; + +- const char *pathname = eal_runtime_config_path(); ++ const char *pathname = eal_sec_runtime_config_path(runtime_dir); + +- if (internal_config.no_shconf) ++ if (internal_cfg->no_shconf) + return 0; + +- if (mem_cfg_fd < 0){ +- mem_cfg_fd = open(pathname, O_RDWR); +- if (mem_cfg_fd < 0) { ++ if (mcfg_fd < 0){ ++ mcfg_fd = open(pathname, O_RDWR); ++ if (mcfg_fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open '%s' for rte_mem_config\n", + pathname); + return -1; +@@ -415,20 +444,29 @@ rte_eal_config_attach(void) + + /* map it as read-only first */ + mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config), +- PROT_READ, MAP_SHARED, mem_cfg_fd, 0); ++ mmap_flags, MAP_SHARED, mcfg_fd, 0); + if (mem_config == MAP_FAILED) { +- close(mem_cfg_fd); +- mem_cfg_fd = -1; ++ close(mcfg_fd); ++ mcfg_fd = -1; + RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config! error %i (%s)\n", + errno, strerror(errno)); + return -1; + } + +- rte_config.mem_config = mem_config; ++ rte_cfg->mem_config = mem_config; ++ *mem_cfg_fd = mcfg_fd; + + return 0; + } + ++static int ++rte_eal_config_attach(void) ++{ ++ return __rte_eal_config_attach(PROT_READ, &mem_cfg_fd, ++ rte_eal_get_runtime_dir(), &internal_config, ++ rte_eal_get_configuration()); ++} ++ + /* reattach the shared config at exact memory location primary process has it */ + static int + rte_eal_config_reattach(void) +@@ -531,6 +569,45 @@ rte_config_init(void) + return 0; + } + ++static void ++rte_sec_config_init(const int sec_idx) ++{ ++ int mem_cfg_fd = -1; ++ int mmap_flags = PROT_READ | PROT_WRITE; ++ ++ struct rte_config *rte_cfg = rte_eal_sec_get_configuration(sec_idx); ++ struct internal_config *internal_cfg = rte_eal_sec_get_internal_config(sec_idx); ++ ++ rte_cfg->process_type = internal_cfg->process_type; ++ ++ __rte_eal_config_attach(mmap_flags, &mem_cfg_fd, ++ rte_eal_sec_get_runtime_dir(sec_idx), ++ internal_cfg, rte_cfg); ++ ++ close(mem_cfg_fd); ++} ++ ++static int ++eal_sec_config_cleanup(const int sec_idx) ++{ ++ int ret; ++ struct rte_config *lc_rte_cfg = rte_eal_sec_get_configuration(sec_idx); ++ struct internal_config *lc_internal_cfg = rte_eal_sec_get_internal_config(sec_idx); ++ char *lc_runtime_dir = rte_eal_sec_get_runtime_dir(sec_idx); ++ ++ ret = munmap(lc_rte_cfg->mem_config, sizeof(*lc_rte_cfg->mem_config)); ++ if (ret < 0) { ++ RTE_LOG(ERR, EAL, "Failed to unmap config memory!\n"); ++ return -1; ++ } ++ ++ memset(lc_rte_cfg, 0, sizeof(*lc_rte_cfg)); ++ memset(lc_internal_cfg, 0, sizeof(*lc_internal_cfg)); ++ memset(lc_runtime_dir, 0, PATH_MAX); ++ ++ return 0; ++} ++ + /* Unlocks hugepage directories that were locked by eal_hugepage_info_init */ + static void + eal_hugedirs_unlock(void) +@@ -566,6 +643,7 @@ eal_usage(const char *prgname) + " --"OPT_LEGACY_MEM" Legacy memory mode (no dynamic allocation, contiguous segments)\n" + " --"OPT_SINGLE_FILE_SEGMENTS" Put all hugepage memory in single files\n" + " --"OPT_MATCH_ALLOCATIONS" Free hugepages exactly as allocated\n" ++ " --"OPT_MAP_PERFECT" Map virtual addresses according to configured hugepage size\n" + "\n"); + /* Allow the application to print its usage message too if hook is set */ + if ( rte_application_usage_hook ) { +@@ -693,7 +771,9 @@ eal_log_level_parse(int argc, char **argv) + + /* Parse the argument given in the command line of the application */ + static int +-eal_parse_args(int argc, char **argv) ++__eal_parse_args(int argc, char **argv, char *runtime_dir, const int buflen, ++ struct internal_config *internal_cfg, ++ struct rte_config *rte_cfg) + { + int opt, ret; + char **argvopt; +@@ -724,7 +804,7 @@ eal_parse_args(int argc, char **argv) + goto out; + } + +- ret = eal_parse_common_option(opt, optarg, &internal_config); ++ ret = eal_parse_common_option(opt, optarg, internal_cfg); + /* common parser is not happy */ + if (ret < 0) { + eal_usage(prgname); +@@ -747,9 +827,9 @@ eal_parse_args(int argc, char **argv) + RTE_LOG(ERR, EAL, "Could not store hugepage directory\n"); + else { + /* free old hugepage dir */ +- if (internal_config.hugepage_dir != NULL) +- free(internal_config.hugepage_dir); +- internal_config.hugepage_dir = hdir; ++ if (internal_cfg->hugepage_dir != NULL) ++ free(internal_cfg->hugepage_dir); ++ internal_cfg->hugepage_dir = hdir; + } + break; + } +@@ -760,34 +840,34 @@ eal_parse_args(int argc, char **argv) + RTE_LOG(ERR, EAL, "Could not store file prefix\n"); + else { + /* free old prefix */ +- if (internal_config.hugefile_prefix != NULL) +- free(internal_config.hugefile_prefix); +- internal_config.hugefile_prefix = prefix; ++ if (internal_cfg->hugefile_prefix != NULL) ++ free(internal_cfg->hugefile_prefix); ++ internal_cfg->hugefile_prefix = prefix; + } + break; + } + case OPT_SOCKET_MEM_NUM: + if (eal_parse_socket_arg(optarg, +- internal_config.socket_mem) < 0) { ++ internal_cfg->socket_mem) < 0) { + RTE_LOG(ERR, EAL, "invalid parameters for --" + OPT_SOCKET_MEM "\n"); + eal_usage(prgname); + ret = -1; + goto out; + } +- internal_config.force_sockets = 1; ++ internal_cfg->force_sockets = 1; + break; + + case OPT_SOCKET_LIMIT_NUM: + if (eal_parse_socket_arg(optarg, +- internal_config.socket_limit) < 0) { ++ internal_cfg->socket_limit) < 0) { + RTE_LOG(ERR, EAL, "invalid parameters for --" + OPT_SOCKET_LIMIT "\n"); + eal_usage(prgname); + ret = -1; + goto out; + } +- internal_config.force_socket_limits = 1; ++ internal_cfg->force_socket_limits = 1; + break; + + case OPT_VFIO_INTR_NUM: +@@ -801,7 +881,7 @@ eal_parse_args(int argc, char **argv) + break; + + case OPT_CREATE_UIO_DEV_NUM: +- internal_config.create_uio_dev = 1; ++ internal_cfg->create_uio_dev = 1; + break; + + case OPT_MBUF_POOL_OPS_NAME_NUM: +@@ -811,17 +891,21 @@ eal_parse_args(int argc, char **argv) + RTE_LOG(ERR, EAL, "Could not store mbuf pool ops name\n"); + else { + /* free old ops name */ +- if (internal_config.user_mbuf_pool_ops_name != ++ if (internal_cfg->user_mbuf_pool_ops_name != + NULL) +- free(internal_config.user_mbuf_pool_ops_name); ++ free(internal_cfg->user_mbuf_pool_ops_name); + +- internal_config.user_mbuf_pool_ops_name = ++ internal_cfg->user_mbuf_pool_ops_name = + ops_name; + } + break; + } + case OPT_MATCH_ALLOCATIONS_NUM: +- internal_config.match_allocations = 1; ++ internal_cfg->match_allocations = 1; ++ break; ++ ++ case OPT_MAP_PERFECT_NUM: ++ internal_cfg->map_perfect = 1; + break; + + default: +@@ -844,20 +928,25 @@ eal_parse_args(int argc, char **argv) + } + + /* create runtime data directory */ +- if (internal_config.no_shconf == 0 && +- eal_create_runtime_dir() < 0) { ++ if (internal_cfg->no_shconf == 0 && ++ eal_create_runtime_dir(runtime_dir, buflen, internal_cfg) < 0) { + RTE_LOG(ERR, EAL, "Cannot create runtime directory\n"); + ret = -1; + goto out; + } + +- if (eal_adjust_config(&internal_config) != 0) { +- ret = -1; +- goto out; ++ if (!internal_cfg->pri_and_sec) { ++ ret = eal_adjust_config(internal_cfg); ++ if (ret != 0) ++ goto out; ++ } else { ++ ret = eal_sec_adjust_config(internal_cfg); ++ if (ret != 0) ++ goto out; + } + + /* sanity checks */ +- if (eal_check_common_options(&internal_config) != 0) { ++ if (eal_check_common_options(internal_cfg, rte_cfg) != 0) { + eal_usage(prgname); + ret = -1; + goto out; +@@ -876,6 +965,24 @@ eal_parse_args(int argc, char **argv) + return ret; + } + ++static int ++eal_parse_args(int argc, char **argv) ++{ ++ return __eal_parse_args(argc, argv, ++ rte_eal_get_runtime_dir(), PATH_MAX, ++ &internal_config, ++ rte_eal_get_configuration()); ++} ++ ++static int ++eal_sec_parse_args(int argc, char **argv, const int sec_idx) ++{ ++ return __eal_parse_args(argc, argv, ++ rte_eal_sec_get_runtime_dir(sec_idx), PATH_MAX, ++ rte_eal_sec_get_internal_config(sec_idx), ++ rte_eal_sec_get_configuration(sec_idx)); ++} ++ + static int + check_socket(const struct rte_memseg_list *msl, void *arg) + { +@@ -1406,3 +1513,99 @@ rte_eal_check_module(const char *module_name) + /* Module has been found */ + return 1; + } ++ ++ ++/****** APIs for libnet ******/ ++int ++rte_eal_sec_attach(int argc, char **argv) ++{ ++ int ret; ++ int sec_idx = -1; ++ struct internal_config *lc_internal_cfg = NULL; ++ ++ if (sec_count >= RTE_MAX_SECONDARY) { ++ RTE_LOG(ERR, EAL, "Too many secondary processes: %d.\n", sec_count); ++ rte_errno = EINVAL; ++ return -1; ++ } ++ ++ for (int i = 0; i < RTE_MAX_SECONDARY; ++i) { ++ if (sec_internal_config[i].pri_and_sec == 0) { ++ sec_internal_config[i].pri_and_sec = 1; ++ sec_idx = i; ++ break; ++ } ++ } ++ lc_internal_cfg = rte_eal_sec_get_internal_config(sec_idx); ++ ++ eal_reset_internal_config(lc_internal_cfg); ++ ++ ret = eal_sec_parse_args(argc, argv, sec_idx); ++ if (ret < 0) { ++ if (ret == -EALREADY) { ++ RTE_LOG(ERR, EAL, "file_refix %s already called initialization.\n", ++ lc_internal_cfg->hugefile_prefix); ++ rte_errno = EALREADY; ++ } else { ++ RTE_LOG(ERR, EAL, "Invalid 'command line' arguments.\n"); ++ rte_errno = EINVAL; ++ } ++ return -1; ++ } ++ ++ rte_sec_config_init(sec_idx); ++ ++ ret = rte_eal_sec_memory_init(sec_idx); ++ if (ret < 0) { ++ RTE_LOG(ERR, EAL, "Cannot init memory\n"); ++ rte_errno = ENOMEM; ++ return -1; ++ } ++ ++ sec_count++; ++ return 0; ++} ++ ++int ++rte_eal_sec_detach(const char *file_prefix, int length) ++{ ++ int ret; ++ int sec_idx = -1; ++ ++ if (!file_prefix || length <= 0) { ++ RTE_LOG(ERR, EAL, "Invalid 'file_prefix or length' arguments.\n"); ++ rte_errno = EINVAL; ++ return -1; ++ } ++ ++ for (int i = 0; i < RTE_MAX_SECONDARY; ++i) { ++ if (sec_internal_config[i].pri_and_sec == 0) ++ continue; ++ if (!strncmp(sec_internal_config[i].hugefile_prefix, file_prefix, length)) { ++ sec_idx = i; ++ break; ++ } ++ } ++ if (sec_idx == -1) { ++ RTE_LOG(ERR, EAL, "Cannot find file_prefix %s.\n", file_prefix); ++ rte_errno = EINVAL; ++ return -1; ++ } ++ ++ ret = rte_eal_sec_memory_cleanup(sec_idx); ++ if (ret < 0) { ++ RTE_LOG(ERR, EAL, "Cannot cleanup memory\n"); ++ rte_errno = ENOMEM; ++ return -1; ++ } ++ ++ ret = eal_sec_config_cleanup(sec_idx); ++ if (ret < 0) { ++ RTE_LOG(ERR, EAL, "Cannot cleanup hugepage sharefile.\n"); ++ rte_errno = EACCES; ++ return -1; ++ } ++ ++ sec_count--; ++ return 0; ++} +diff --git a/lib/librte_eal/linux/eal/eal_hugepage_info.c b/lib/librte_eal/linux/eal/eal_hugepage_info.c +index 91a4fed..911acec 100644 +--- a/lib/librte_eal/linux/eal/eal_hugepage_info.c ++++ b/lib/librte_eal/linux/eal/eal_hugepage_info.c +@@ -350,7 +350,7 @@ calc_num_pages(struct hugepage_info *hpi, struct dirent *dirent) + */ + total_pages = 0; + /* we also don't want to do this for legacy init */ +- if (!internal_config.legacy_mem) ++ if (!internal_config.legacy_mem || internal_config.map_perfect) + for (i = 0; i < rte_socket_count(); i++) { + int socket = rte_socket_id_by_idx(i); + unsigned int num_pages = +diff --git a/lib/librte_eal/linux/eal/eal_memalloc.c b/lib/librte_eal/linux/eal/eal_memalloc.c +index cad4934..8e7f120 100644 +--- a/lib/librte_eal/linux/eal/eal_memalloc.c ++++ b/lib/librte_eal/linux/eal/eal_memalloc.c +@@ -95,12 +95,14 @@ static int fallocate_supported = -1; /* unknown */ + * they will be initialized at startup, and filled as we allocate/deallocate + * segments. + */ +-static struct { ++struct fd_list{ + int *fds; /**< dynamically allocated array of segment lock fd's */ + int memseg_list_fd; /**< memseg list fd */ + int len; /**< total length of the array */ + int count; /**< entries used in an array */ +-} fd_list[RTE_MAX_MEMSEG_LISTS]; ++}; ++static struct fd_list fd_list[RTE_MAX_MEMSEG_LISTS]; ++static struct fd_list sec_fd_list[RTE_MAX_SECONDARY][RTE_MAX_MEMSEG_LISTS]; + + /** local copy of a memory map, used to synchronize memory hotplug in MP */ + static struct rte_memseg_list local_memsegs[RTE_MAX_MEMSEG_LISTS]; +@@ -1391,13 +1393,13 @@ secondary_msl_create_walk(const struct rte_memseg_list *msl, + } + + static int +-alloc_list(int list_idx, int len) ++__alloc_list(int list_idx, int len, struct fd_list *fd_ls) + { + int *data; + int i; + + /* single-file segments mode does not need fd list */ +- if (!internal_config.single_file_segments) { ++ if (!internal_config.single_file_segments) { // sec todo + /* ensure we have space to store fd per each possible segment */ + data = malloc(sizeof(int) * len); + if (data == NULL) { +@@ -1407,19 +1409,31 @@ alloc_list(int list_idx, int len) + /* set all fd's as invalid */ + for (i = 0; i < len; i++) + data[i] = -1; +- fd_list[list_idx].fds = data; +- fd_list[list_idx].len = len; ++ fd_ls[list_idx].fds = data; ++ fd_ls[list_idx].len = len; + } else { +- fd_list[list_idx].fds = NULL; +- fd_list[list_idx].len = 0; ++ fd_ls[list_idx].fds = NULL; ++ fd_ls[list_idx].len = 0; + } + +- fd_list[list_idx].count = 0; +- fd_list[list_idx].memseg_list_fd = -1; ++ fd_ls[list_idx].count = 0; ++ fd_ls[list_idx].memseg_list_fd = -1; + + return 0; + } + ++static int ++alloc_list(int list_idx, int len) ++{ ++ return __alloc_list(list_idx, len, fd_list); ++} ++ ++static int ++sec_alloc_list(int list_idx, int len, struct fd_list *fd_ls) ++{ ++ return __alloc_list(list_idx, len, fd_ls); ++} ++ + static int + fd_list_create_walk(const struct rte_memseg_list *msl, + void *arg __rte_unused) +@@ -1437,27 +1451,71 @@ fd_list_create_walk(const struct rte_memseg_list *msl, + return alloc_list(msl_idx, len); + } + +-int +-eal_memalloc_set_seg_fd(int list_idx, int seg_idx, int fd) ++static int ++fd_list_destroy_walk(const struct rte_memseg_list *msl, const int sec_idx) + { +- struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; ++ struct rte_mem_config *mcfg = rte_eal_sec_get_configuration(sec_idx)->mem_config; ++ struct fd_list *fd_ls = sec_fd_list[sec_idx]; ++ int list_idx; ++ ++ list_idx = msl - mcfg->memsegs; ++ if (fd_ls[list_idx].len != 0) { ++ free(fd_ls[list_idx].fds); ++ /* We have closed fd, seeing in function of eal_legacy_hugepage_attach. */ ++ //close(fd_ls[list_idx].fds[seg_idx]); ++ } ++ memset(&fd_ls[list_idx], 0, sizeof(fd_ls[list_idx])); ++ ++ return 0; ++} ++ ++static int ++__eal_memalloc_set_seg_fd(int list_idx, int seg_idx, int fd, ++ const struct rte_config *rte_cfg, struct fd_list *fd_ls) ++{ ++ struct rte_mem_config *mcfg = rte_cfg->mem_config; + + /* single file segments mode doesn't support individual segment fd's */ +- if (internal_config.single_file_segments) ++ if (internal_config.single_file_segments) // sec todo + return -ENOTSUP; + + /* if list is not allocated, allocate it */ +- if (fd_list[list_idx].len == 0) { ++ if (fd_ls[list_idx].len == 0) { + int len = mcfg->memsegs[list_idx].memseg_arr.len; + +- if (alloc_list(list_idx, len) < 0) ++ if (sec_alloc_list(list_idx, len, fd_ls) < 0) + return -ENOMEM; + } +- fd_list[list_idx].fds[seg_idx] = fd; ++ fd_ls[list_idx].fds[seg_idx] = fd; + + return 0; + } + ++int ++eal_memalloc_set_seg_fd(int list_idx, int seg_idx, int fd) ++{ ++ return __eal_memalloc_set_seg_fd(list_idx, seg_idx, fd, ++ rte_eal_get_configuration(), fd_list); ++} ++ ++int ++eal_sec_memalloc_set_seg_fd(int list_idx, int seg_idx, int fd, ++ const int switch_pri_and_sec, const int sec_idx) ++{ ++ struct rte_config *rte_cfg = NULL; ++ struct fd_list *fd_ls = NULL; ++ ++ if (!switch_pri_and_sec) { ++ rte_cfg = rte_eal_get_configuration(); ++ fd_ls = &fd_list[0]; ++ } else { ++ rte_cfg = rte_eal_sec_get_configuration(sec_idx); ++ fd_ls = &sec_fd_list[sec_idx][0]; ++ } ++ ++ return __eal_memalloc_set_seg_fd(list_idx, seg_idx, fd, rte_cfg, fd_ls); ++} ++ + int + eal_memalloc_set_seg_list_fd(int list_idx, int fd) + { +@@ -1602,3 +1660,38 @@ eal_memalloc_init(void) + return -1; + return 0; + } ++ ++int ++eal_memalloc_destroy(const int sec_idx) ++{ ++ int msl_idx = 0; ++ struct rte_memseg_list *msl; ++ struct rte_mem_config *mcfg = rte_eal_sec_get_configuration(sec_idx)->mem_config; ++ ++ for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) { ++ ++ msl = &mcfg->memsegs[msl_idx]; ++ ++ /* skip empty memseg lists */ ++ if (msl->memseg_arr.len == 0) ++ continue; ++ ++ if (rte_sec_fbarray_destroy(&msl->memseg_arr, sec_idx)) { ++ RTE_LOG(ERR, EAL, "Cannot clear secondary process local memseg lists\n"); ++ return -1; ++ } ++ ++ if (munmap(msl->base_va, msl->len) < 0) { ++ RTE_LOG(ERR, EAL, "Failed to unmap memseg lists\n"); ++ return -1; ++ } ++ memset(msl, 0, sizeof(*msl)); ++ ++ if (fd_list_destroy_walk(msl, sec_idx)) { ++ RTE_LOG(ERR, EAL, "Failed to clear secondary fd_list.\n"); ++ return -1; ++ } ++ } ++ ++ return 0; ++} +diff --git a/lib/librte_eal/linux/eal/eal_memory.c b/lib/librte_eal/linux/eal/eal_memory.c +index 43e4ffc..ac81f43 100644 +--- a/lib/librte_eal/linux/eal/eal_memory.c ++++ b/lib/librte_eal/linux/eal/eal_memory.c +@@ -1055,10 +1055,10 @@ remap_needed_hugepages(struct hugepage_file *hugepages, int n_pages) + * address to lower address. Here, physical addresses are in + * descending order. + */ +- else if ((prev->physaddr - cur->physaddr) != cur->size) ++ else if (!internal_config.map_perfect && (prev->physaddr - cur->physaddr) != cur->size) + new_memseg = 1; + #else +- else if ((cur->physaddr - prev->physaddr) != cur->size) ++ else if (!internal_config.map_perfect && (cur->physaddr - prev->physaddr) != cur->size) + new_memseg = 1; + #endif + +@@ -1457,6 +1457,24 @@ eal_legacy_hugepage_init(void) + /* meanwhile, also initialize used_hp hugepage sizes in used_hp */ + used_hp[i].hugepage_sz = internal_config.hugepage_info[i].hugepage_sz; + ++ if (internal_config.map_perfect) { ++ int sys_num_pages = 0; ++ int need_num_pages = 0; ++ struct rte_memseg_list *msl; ++ ++ for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { ++ sys_num_pages += internal_config.hugepage_info[i].num_pages[j]; ++ } ++ ++ for (j = 0; j < RTE_MAX_MEMSEG_LISTS; j++) { ++ msl = &mcfg->memsegs[j]; ++ if (internal_config.hugepage_info[i].hugepage_sz == msl->page_sz) ++ need_num_pages += msl->memseg_arr.len; ++ } ++ ++ internal_config.hugepage_info[i].num_pages[0] = RTE_MIN(sys_num_pages, need_num_pages); ++ } ++ + nr_hugepages += internal_config.hugepage_info[i].num_pages[0]; + } + +@@ -1537,8 +1555,13 @@ eal_legacy_hugepage_init(void) + goto fail; + } + +- qsort(&tmp_hp[hp_offset], hpi->num_pages[0], +- sizeof(struct hugepage_file), cmp_physaddr); ++ /* continuous physical memory does not bring performance improvements, ++ * so no sorting is performed for quick startup. ++ */ ++ if (!internal_config.map_perfect) { ++ qsort(&tmp_hp[hp_offset], hpi->num_pages[0], ++ sizeof(struct hugepage_file), cmp_physaddr); ++ } + + /* we have processed a num of hugepages of this size, so inc offset */ + hp_offset += hpi->num_pages[0]; +@@ -1857,9 +1880,9 @@ getFileSize(int fd) + * in order to form a contiguous block in the virtual memory space + */ + static int +-eal_legacy_hugepage_attach(void) ++eal_legacy_hugepage_attach(const int switch_pri_and_sec, const int sec_idx) + { +- struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; ++ struct rte_mem_config *mcfg = NULL; + struct hugepage_file *hp = NULL; + unsigned int num_hp = 0; + unsigned int i = 0; +@@ -1867,6 +1890,22 @@ eal_legacy_hugepage_attach(void) + off_t size = 0; + int fd, fd_hugepage = -1; + ++ struct rte_config *rte_cfg = NULL; ++ struct internal_config *internal_cfg = NULL; ++ char *runtime_dir = NULL; ++ ++ if (!switch_pri_and_sec) { ++ runtime_dir = rte_eal_get_runtime_dir(); ++ rte_cfg = rte_eal_get_configuration(); ++ internal_cfg = &internal_config; ++ } else { ++ runtime_dir = rte_eal_sec_get_runtime_dir(sec_idx); ++ rte_cfg = rte_eal_sec_get_configuration(sec_idx); ++ internal_cfg = rte_eal_sec_get_internal_config(sec_idx); ++ } ++ ++ mcfg = rte_cfg->mem_config; ++ + if (aslr_enabled() > 0) { + RTE_LOG(WARNING, EAL, "WARNING: Address Space Layout Randomization " + "(ASLR) is enabled in the kernel.\n"); +@@ -1874,10 +1913,10 @@ eal_legacy_hugepage_attach(void) + "into secondary processes\n"); + } + +- fd_hugepage = open(eal_hugepage_data_path(), O_RDONLY); ++ fd_hugepage = open(eal_sec_hugepage_data_path(runtime_dir), O_RDONLY); + if (fd_hugepage < 0) { + RTE_LOG(ERR, EAL, "Could not open %s\n", +- eal_hugepage_data_path()); ++ eal_sec_hugepage_data_path(runtime_dir)); + goto error; + } + +@@ -1885,7 +1924,7 @@ eal_legacy_hugepage_attach(void) + hp = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd_hugepage, 0); + if (hp == MAP_FAILED) { + RTE_LOG(ERR, EAL, "Could not mmap %s\n", +- eal_hugepage_data_path()); ++ eal_sec_hugepage_data_path(runtime_dir)); + goto error; + } + +@@ -1932,13 +1971,13 @@ eal_legacy_hugepage_attach(void) + } + + /* find segment data */ +- msl = rte_mem_virt2memseg_list(map_addr); ++ msl = rte_sec_mem_virt2memseg_list(map_addr, rte_cfg); + if (msl == NULL) { + RTE_LOG(DEBUG, EAL, "%s(): Cannot find memseg list\n", + __func__); + goto fd_error; + } +- ms = rte_mem_virt2memseg(map_addr, msl); ++ ms = rte_sec_mem_virt2memseg(map_addr, msl, rte_cfg); + if (ms == NULL) { + RTE_LOG(DEBUG, EAL, "%s(): Cannot find memseg\n", + __func__); +@@ -1953,8 +1992,16 @@ eal_legacy_hugepage_attach(void) + goto fd_error; + } + ++ /* No hugefile lock is required in PRI_AND_SEC mode, close it ++ * to avoid opening too much fd. ++ */ ++ if (internal_cfg->pri_and_sec) { ++ close(fd); ++ fd = -1; ++ } ++ + /* store segment fd internally */ +- if (eal_memalloc_set_seg_fd(msl_idx, ms_idx, fd) < 0) ++ if (eal_sec_memalloc_set_seg_fd(msl_idx, ms_idx, fd, switch_pri_and_sec, sec_idx) < 0) + RTE_LOG(ERR, EAL, "Could not store segment fd: %s\n", + rte_strerror(rte_errno)); + } +@@ -2003,10 +2050,17 @@ rte_eal_hugepage_init(void) + } + + int +-rte_eal_hugepage_attach(void) ++rte_eal_hugepage_attach(const int switch_pri_and_sec, const int sec_idx) + { +- return internal_config.legacy_mem ? +- eal_legacy_hugepage_attach() : ++ struct internal_config *internal_cfg; ++ ++ if (!switch_pri_and_sec) ++ internal_cfg = &internal_config; ++ else ++ internal_cfg = rte_eal_sec_get_internal_config(sec_idx); ++ ++ return internal_cfg->legacy_mem ? ++ eal_legacy_hugepage_attach(switch_pri_and_sec, sec_idx) : + eal_hugepage_attach(); + } + +@@ -2215,6 +2269,50 @@ memseg_primary_init_32(void) + return 0; + } + ++static int ++eal_sec_set_num_pages(struct internal_config *internal_cfg, ++ struct hugepage_info *used_hp) ++{ ++ int ret; ++ int hp_sz_idx; ++ uint64_t memory[RTE_MAX_NUMA_NODES]; ++ ++ if (!internal_cfg || !used_hp) { ++ return -1; ++ } ++ ++ for (hp_sz_idx = 0; ++ hp_sz_idx < (int) internal_cfg->num_hugepage_sizes; ++ hp_sz_idx++) { ++ struct hugepage_info *hpi; ++ hpi = &internal_cfg->hugepage_info[hp_sz_idx]; ++ used_hp[hp_sz_idx].hugepage_sz = hpi->hugepage_sz; ++ } ++ ++ for (hp_sz_idx = 0; hp_sz_idx < RTE_MAX_NUMA_NODES; hp_sz_idx++) ++ memory[hp_sz_idx] = internal_cfg->socket_mem[hp_sz_idx]; ++ ++ ret = calc_num_pages_per_socket(memory, ++ internal_cfg->hugepage_info, used_hp, ++ internal_cfg->num_hugepage_sizes); ++ ++ return ret; ++} ++ ++static int ++eal_sec_get_num_pages(const struct hugepage_info *used_hp, ++ uint64_t hugepage_sz, int socket) ++{ ++ int hp_sz_idx; ++ ++ for (hp_sz_idx = 0; hp_sz_idx < MAX_HUGEPAGE_SIZES; hp_sz_idx++) { ++ if (used_hp[hp_sz_idx].hugepage_sz == hugepage_sz) ++ return used_hp[hp_sz_idx].num_pages[socket]; ++ } ++ ++ return 0; ++} ++ + static int __rte_unused + memseg_primary_init(void) + { +@@ -2228,11 +2326,20 @@ memseg_primary_init(void) + uint64_t max_mem, max_mem_per_type; + unsigned int max_seglists_per_type; + unsigned int n_memtypes, cur_type; ++ struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES]; + + /* no-huge does not need this at all */ + if (internal_config.no_hugetlbfs) + return 0; + ++ if (internal_config.map_perfect) { ++ memset(used_hp, 0, sizeof(used_hp)); ++ ret = eal_sec_set_num_pages(&internal_config, used_hp); ++ if (ret == -1) { ++ RTE_LOG(ERR, EAL, "Cannot get num pages\n"); ++ } ++ } ++ + /* + * figuring out amount of memory we're going to have is a long and very + * involved process. the basic element we're operating with is a memory +@@ -2329,6 +2436,7 @@ memseg_primary_init(void) + struct memtype *type = &memtypes[cur_type]; + uint64_t max_mem_per_list, pagesz; + int socket_id; ++ unsigned int need_n_segs, cur_n_segs; + + pagesz = type->page_sz; + socket_id = type->socket_id; +@@ -2372,8 +2480,17 @@ memseg_primary_init(void) + "n_segs:%i socket_id:%i hugepage_sz:%" PRIu64 "\n", + n_seglists, n_segs, socket_id, pagesz); + ++ if (internal_config.map_perfect) ++ need_n_segs = eal_sec_get_num_pages(used_hp, pagesz, socket_id); ++ else ++ need_n_segs = n_segs; ++ + /* create all segment lists */ +- for (cur_seglist = 0; cur_seglist < n_seglists; cur_seglist++) { ++ for (cur_seglist = 0; cur_seglist < n_seglists && need_n_segs > 0; cur_seglist++) { ++ cur_n_segs = RTE_MIN(need_n_segs, n_segs); ++ if (internal_config.map_perfect) ++ need_n_segs -= cur_n_segs; ++ + if (msl_idx >= RTE_MAX_MEMSEG_LISTS) { + RTE_LOG(ERR, EAL, + "No more space in memseg lists, please increase %s\n", +@@ -2382,7 +2499,7 @@ memseg_primary_init(void) + } + msl = &mcfg->memsegs[msl_idx++]; + +- if (alloc_memseg_list(msl, pagesz, n_segs, ++ if (alloc_memseg_list(msl, pagesz, cur_n_segs, + socket_id, cur_seglist)) + goto out; + +@@ -2400,9 +2517,10 @@ memseg_primary_init(void) + } + + static int +-memseg_secondary_init(void) ++memseg_secondary_init(struct rte_config *rte_cfg, ++ const int switch_pri_and_sec, const int sec_idx) + { +- struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; ++ struct rte_mem_config *mcfg = rte_cfg->mem_config; + int msl_idx = 0; + struct rte_memseg_list *msl; + +@@ -2414,7 +2532,7 @@ memseg_secondary_init(void) + if (msl->memseg_arr.len == 0) + continue; + +- if (rte_fbarray_attach(&msl->memseg_arr)) { ++ if (rte_sec_fbarray_attach(&msl->memseg_arr, switch_pri_and_sec, sec_idx)) { + RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n"); + return -1; + } +@@ -2430,11 +2548,18 @@ memseg_secondary_init(void) + } + + int +-rte_eal_memseg_init(void) ++rte_eal_memseg_init(const int switch_pri_and_sec, const int sec_idx) + { + /* increase rlimit to maximum */ + struct rlimit lim; + ++ struct rte_config *rte_cfg = NULL; ++ if (!switch_pri_and_sec) { ++ rte_cfg = rte_eal_get_configuration(); ++ } else { ++ rte_cfg = rte_eal_sec_get_configuration(sec_idx); ++ } ++ + if (getrlimit(RLIMIT_NOFILE, &lim) == 0) { + /* set limit to maximum */ + lim.rlim_cur = lim.rlim_max; +@@ -2458,11 +2583,11 @@ rte_eal_memseg_init(void) + } + #endif + +- return rte_eal_process_type() == RTE_PROC_PRIMARY ? ++ return rte_cfg->process_type == RTE_PROC_PRIMARY ? + #ifndef RTE_ARCH_64 + memseg_primary_init_32() : + #else + memseg_primary_init() : + #endif +- memseg_secondary_init(); ++ memseg_secondary_init(rte_cfg, switch_pri_and_sec, sec_idx); + } +diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h +index 2a9f768..0eb3a48 100644 +--- a/lib/librte_ring/rte_ring.h ++++ b/lib/librte_ring/rte_ring.h +@@ -953,6 +953,81 @@ rte_ring_dequeue_burst(struct rte_ring *r, void **obj_table, + r->cons.single, available); + } + ++/****** APIs for libnet ******/ ++static __rte_always_inline unsigned ++rte_ring_cn_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned int n) ++{ ++ const uint32_t old_head = r->prod.tail; ++ rte_smp_rmb(); ++ ++ const uint32_t entries = r->cons.head - old_head; ++ if (n > entries) { ++ n = entries; ++ } ++ if (unlikely(n == 0)) { ++ return 0; ++ } ++ ++ r->prod.head = old_head + n; ++ rte_smp_rmb(); ++ ++ DEQUEUE_PTRS(r, &r[1], old_head, obj_table, n, void *); ++ return n; ++} ++ ++static __rte_always_inline void ++rte_ring_cn_enqueue(struct rte_ring *r) ++{ ++ rte_smp_wmb(); ++ r->prod.tail = r->prod.head; ++} ++ ++static __rte_always_inline unsigned ++rte_ring_en_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned int n) ++{ ++ const uint32_t old_tail = r->cons.tail; ++ rte_smp_rmb(); ++ ++ const uint32_t entries = r->prod.tail - old_tail; ++ if (n > entries) { ++ n = entries; ++ } ++ if (unlikely(n == 0)) { ++ return 0; ++ } ++ ++ const uint32_t new_tail = old_tail + n; ++ rte_smp_rmb(); ++ ++ DEQUEUE_PTRS(r, &r[1], old_tail, obj_table, n, void *); ++ rte_smp_rmb(); ++ ++ r->cons.tail = new_tail; ++ return n; ++} ++ ++static __rte_always_inline unsigned ++rte_ring_en_enqueue_bulk(struct rte_ring *r, void **obj_table, unsigned int n) ++{ ++ const uint32_t capacity = r->capacity; ++ const uint32_t old_head = r->cons.head; ++ rte_smp_rmb(); ++ ++ const uint32_t entries = capacity + r->cons.tail - old_head; ++ if (n > entries) { ++ return 0; ++ } ++ ++ const uint32_t new_head = old_head + n; ++ rte_smp_rmb(); ++ ++ ENQUEUE_PTRS(r, &r[1], old_head, obj_table, n, void *); ++ rte_smp_wmb(); ++ ++ r->cons.head = new_head; ++ return n; ++} ++ + #ifdef __cplusplus + } + #endif +-- +2.30.0 + diff --git a/0010-dpdk-fix-error-in-clearing-secondary-process-memseg-lists.patch b/0010-dpdk-fix-error-in-clearing-secondary-process-memseg-lists.patch new file mode 100644 index 0000000000000000000000000000000000000000..ddb4c023dcd5026ac5a8f056e48db5b5b29968c9 --- /dev/null +++ b/0010-dpdk-fix-error-in-clearing-secondary-process-memseg-lists.patch @@ -0,0 +1,43 @@ +From 4bda889d737ee2b1fb2381e658bcf4f2a7ca21c8 Mon Sep 17 00:00:00 2001 +From: HuangLiming +Date: Tue, 18 Aug 2020 04:58:53 -0400 +Subject: [PATCH] fix error in clearing secondary process memseg lists + +Signed-off-by: HuangLiming +--- + lib/librte_eal/common/eal_common_fbarray.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/lib/librte_eal/common/eal_common_fbarray.c b/lib/librte_eal/common/eal_common_fbarray.c +index b611ffa..116c695 100644 +--- a/lib/librte_eal/common/eal_common_fbarray.c ++++ b/lib/librte_eal/common/eal_common_fbarray.c +@@ -1105,7 +1105,7 @@ int + rte_sec_fbarray_destroy(struct rte_fbarray *arr, + const int sec_idx) + { +- int fd, ret; ++ int fd; + size_t mmap_len; + char path[PATH_MAX]; + +@@ -1134,15 +1134,13 @@ rte_sec_fbarray_destroy(struct rte_fbarray *arr, + if (flock(fd, LOCK_EX | LOCK_NB)) { + RTE_LOG(DEBUG, EAL, "Cannot destroy fbarray - another process is using it\n"); + rte_errno = EBUSY; +- ret = -1; + } else { +- ret = 0; + unlink(path); + memset(arr, 0, sizeof(*arr)); + } + close(fd); + +- return ret; ++ return 0; + } + + void * +-- +2.21.0 + diff --git a/0011-dpdk-fix-coredump-when-primary-process-attach-without-shared-file.patch b/0011-dpdk-fix-coredump-when-primary-process-attach-without-shared-file.patch new file mode 100644 index 0000000000000000000000000000000000000000..86960449fda48e27ebaddaa54fea8d5111b54507 --- /dev/null +++ b/0011-dpdk-fix-coredump-when-primary-process-attach-without-shared-file.patch @@ -0,0 +1,62 @@ +From 561a37288d629398f976dfa4e57854b7ea484cc7 Mon Sep 17 00:00:00 2001 +From: yuanyunkang +Date: Sat, 22 Aug 2020 14:39:16 +0800 +Subject: [PATCH] dpdk:fix coredump when primary process attach without shared + file + +Signed-off-by: yuanyunkang +--- + lib/librte_eal/linux/eal/eal.c | 16 +++++++++++++--- + 1 file changed, 13 insertions(+), 3 deletions(-) + +diff --git a/lib/librte_eal/linux/eal/eal.c b/lib/librte_eal/linux/eal/eal.c +index a1f2b42..ff86ff9 100644 +--- a/lib/librte_eal/linux/eal/eal.c ++++ b/lib/librte_eal/linux/eal/eal.c +@@ -569,22 +569,28 @@ rte_config_init(void) + return 0; + } + +-static void ++static int + rte_sec_config_init(const int sec_idx) + { + int mem_cfg_fd = -1; + int mmap_flags = PROT_READ | PROT_WRITE; ++ int ret = -1; + + struct rte_config *rte_cfg = rte_eal_sec_get_configuration(sec_idx); + struct internal_config *internal_cfg = rte_eal_sec_get_internal_config(sec_idx); + + rte_cfg->process_type = internal_cfg->process_type; + +- __rte_eal_config_attach(mmap_flags, &mem_cfg_fd, ++ ret = __rte_eal_config_attach(mmap_flags, &mem_cfg_fd, + rte_eal_sec_get_runtime_dir(sec_idx), + internal_cfg, rte_cfg); ++ if (ret < 0) { ++ RTE_LOG(ERR, EAL, "Cannot attach shared memory\n"); ++ return -1; ++ } + + close(mem_cfg_fd); ++ return 0; + } + + static int +@@ -1553,7 +1559,11 @@ rte_eal_sec_attach(int argc, char **argv) + return -1; + } + +- rte_sec_config_init(sec_idx); ++ ret = rte_sec_config_init(sec_idx); ++ if (ret < 0) { ++ RTE_LOG(ERR, EAL, "Cannot init sec config\n"); ++ return -1; ++ } + + ret = rte_eal_sec_memory_init(sec_idx); + if (ret < 0) { +-- +2.19.1 + diff --git a/0012-dpdk-fix-fbarray-memseg-destory-error-during-detach.patch b/0012-dpdk-fix-fbarray-memseg-destory-error-during-detach.patch new file mode 100644 index 0000000000000000000000000000000000000000..27642e51d28e777f17c81e25176a4c6c82db4927 --- /dev/null +++ b/0012-dpdk-fix-fbarray-memseg-destory-error-during-detach.patch @@ -0,0 +1,31 @@ +From e5cc58807c8d03554f2c3f0eee3b0b6d6f44278f Mon Sep 17 00:00:00 2001 +From: HuangLiming +Date: Sat, 22 Aug 2020 05:32:47 -0400 +Subject: [PATCH] fix fbarray memseg destory error during detach without shared + file + +Signed-off-by: HuangLiming +--- + lib/librte_eal/common/eal_common_fbarray.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/lib/librte_eal/common/eal_common_fbarray.c b/lib/librte_eal/common/eal_common_fbarray.c +index 116c695..d1aa074 100644 +--- a/lib/librte_eal/common/eal_common_fbarray.c ++++ b/lib/librte_eal/common/eal_common_fbarray.c +@@ -1127,9 +1127,9 @@ rte_sec_fbarray_destroy(struct rte_fbarray *arr, + + fd = open(path, O_RDONLY); + if (fd < 0) { +- RTE_LOG(ERR, EAL, "Could not open fbarray file: %s\n", +- strerror(errno)); +- return -1; ++ RTE_LOG(WARNING, EAL, "Could not open %s: %s, and just skip it\n", ++ path, strerror(errno)); ++ return 0; + } + if (flock(fd, LOCK_EX | LOCK_NB)) { + RTE_LOG(DEBUG, EAL, "Cannot destroy fbarray - another process is using it\n"); +-- +2.21.0 + diff --git a/0013-dpdk-optimize-the-efficiency-of-compiling-dpdk.patch b/0013-dpdk-optimize-the-efficiency-of-compiling-dpdk.patch new file mode 100644 index 0000000000000000000000000000000000000000..3a23de0e3553f148c63be5f25ef30c38bace38a1 --- /dev/null +++ b/0013-dpdk-optimize-the-efficiency-of-compiling-dpdk.patch @@ -0,0 +1,195 @@ +From 5e554c15982617a89b85aeb71592c20bfa7bdecd Mon Sep 17 00:00:00 2001 +From: Renmingshuai +Date: Tue, 13 Apr 2021 16:25:43 +0800 +Subject: [PATCH] optimize the efficiency of compiling dpdk + +--- + config/common_base | 5 +++ + mk/rte.combinedlib.mk | 10 +++++ + mk/rte.lib.mk | 102 +++++++++++++++++++++++++++++++++++++++--- + 3 files changed, 110 insertions(+), 7 deletions(-) + +diff --git a/config/common_base b/config/common_base +index 57b1349..392e6c3 100644 +--- a/config/common_base ++++ b/config/common_base +@@ -59,6 +59,11 @@ CONFIG_RTE_ENABLE_LTO=n + # + CONFIG_RTE_BUILD_SHARED_LIB=n + ++# ++# Compile to both static library and share library ++# ++CONFIG_RTE_BUILD_BOTH_STATIC_AND_SHARED_LIBS=n ++ + # + # Use newest code breaking previous ABI + # +diff --git a/mk/rte.combinedlib.mk b/mk/rte.combinedlib.mk +index 9d0f935..1088543 100644 +--- a/mk/rte.combinedlib.mk ++++ b/mk/rte.combinedlib.mk +@@ -15,9 +15,16 @@ RTE_LIBNAME := dpdk + COMBINEDLIB := lib$(RTE_LIBNAME)$(EXT) + + LIBS := $(filter-out $(COMBINEDLIB), $(sort $(notdir $(wildcard $(RTE_OUTPUT)/lib/*$(EXT))))) ++ifeq ($(CONFIG_RTE_BUILD_BOTH_STATIC_AND_SHARED_LIBS),y) ++COMBINEDLIB_SO := lib$(RTE_LIBNAME).so ++LIBS_SO := $(filter-out $(COMBINEDLIB_SO), $(sort $(notdir $(wildcard $(RTE_OUTPUT)/lib/*.so)))) ++endif + + all: FORCE + $(Q)echo "GROUP ( $(LIBS) )" > $(RTE_OUTPUT)/lib/$(COMBINEDLIB) ++ifeq ($(CONFIG_RTE_BUILD_BOTH_STATIC_AND_SHARED_LIBS),y) ++ $(Q)echo "GROUP ( $(LIBS_SO) )" > $(RTE_OUTPUT)/lib/$(COMBINEDLIB_SO) ++endif + + # + # Clean all generated files +@@ -25,6 +32,9 @@ all: FORCE + .PHONY: clean + clean: + $(Q)rm -f $(RTE_OUTPUT)/lib/$(COMBINEDLIB) ++ifeq ($(CONFIG_RTE_BUILD_BOTH_STATIC_AND_SHARED_LIBS),y) ++ $(Q)rm -f $(RTE_OUTPUT)/lib/$(COMBINEDLIB_SO) ++endif + + .PHONY: FORCE + FORCE: +diff --git a/mk/rte.lib.mk b/mk/rte.lib.mk +index 4516d1c..78f3c27 100644 +--- a/mk/rte.lib.mk ++++ b/mk/rte.lib.mk +@@ -19,13 +19,6 @@ else ifeq ($(LIBABIVER),) + LIBABIVER := 0.$(shell cat $(RTE_SRCDIR)/ABI_VERSION | tr -d '.') + endif + +-ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),y) +-LIB := $(patsubst %.a,%.so.$(LIBABIVER),$(LIB)) +-ifeq ($(EXTLIB_BUILD),n) +-CPU_LDFLAGS += --version-script=$(SRCDIR)/$(EXPORT_MAP) +-endif +-endif +- + + _BUILD = $(LIB) + PREINSTALL = $(SYMLINK-FILES-y) +@@ -34,6 +27,16 @@ _CLEAN = doclean + + LDLIBS += $(EXECENV_LDLIBS-y) + ++ifeq ($(CONFIG_RTE_BUILD_BOTH_STATIC_AND_SHARED_LIBS),y) ++LIB_SO = $(LIB) ++LIB_SO := $(patsubst %.a,%.so.$(LIBABIVER),$(LIB_SO)) ++ifeq ($(EXTLIB_BUILD),n) ++CPU_LDFLAGS += --version-script=$(SRCDIR)/$(EXPORT_MAP) ++endif ++_BUILD += $(LIB_SO) ++_INSTALL += $(INSTALL-FILES-y) $(RTE_OUTPUT)/lib/$(LIB_SO) ++endif ++ + .PHONY: all + all: install + +@@ -74,6 +77,89 @@ ifneq ($(CC_SUPPORTS_Z),false) + NO_UNDEFINED := -z defs + endif + ++ifeq ($(CONFIG_RTE_BUILD_BOTH_STATIC_AND_SHARED_LIBS),y) ++O_TO_S = $(LD) -L$(RTE_SDK_BIN)/lib $(_CPU_LDFLAGS) $(EXTRA_LDFLAGS) \ ++ -shared $(OBJS-y) $(NO_UNDEFINED) $(LDLIBS) -Wl,-soname,$(LIB_SO) -o $(LIB_SO) ++O_TO_S_STR = $(subst ','\'',$(O_TO_S)) #'# fix syntax highlight ++O_TO_S_DISP = $(if $(V),"$(O_TO_S_STR)"," LD $(@)") ++O_TO_S_DO = @set -e; \ ++ echo $(O_TO_S_DISP); \ ++ $(O_TO_S) && \ ++ echo $(O_TO_S_CMD) > $(call exe2cmd,$(@)) ++ ++-include .$(LIB_SO).cmd ++ ++# ++# Archive objects in .a file if needed ++# ++$(LIB): $(OBJS-y) $(DEP_$(LIB)) FORCE ++ @[ -d $(dir $@) ] || mkdir -p $(dir $@) ++ $(if $(D),\ ++ @echo -n "$< -> $@ " ; \ ++ echo -n "file_missing=$(call boolean,$(file_missing)) " ; \ ++ echo -n "cmdline_changed=$(call boolean,$(call cmdline_changed,$(O_TO_A_STR))) " ; \ ++ echo -n "depfile_missing=$(call boolean,$(depfile_missing)) " ; \ ++ echo "depfile_newer=$(call boolean,$(depfile_newer)) ") ++ $(if $(or \ ++ $(file_missing),\ ++ $(call cmdline_changed,$(O_TO_A_STR)),\ ++ $(depfile_missing),\ ++ $(depfile_newer)),\ ++ $(O_TO_A_DO)) ++ ++$(LIB_SO): $(OBJS-y) $(DEP_$(LIB_SO)) FORCE ++ifeq ($(LIBABIVER),) ++ @echo "Must Specify a $(LIB_SO) ABI version" ++ @false ++endif ++ @[ -d $(dir $@) ] || mkdir -p $(dir $@) ++ $(if $(D),\ ++ @echo -n "$< -> $@ " ; \ ++ echo -n "file_missing=$(call boolean,$(file_missing)) " ; \ ++ echo -n "cmdline_changed=$(call boolean,$(call cmdline_changed,$(O_TO_S_STR))) " ; \ ++ echo -n "depfile_missing=$(call boolean,$(depfile_missing)) " ; \ ++ echo "depfile_newer=$(call boolean,$(depfile_newer)) ") ++ $(if $(or \ ++ $(file_missing),\ ++ $(call cmdline_changed,$(O_TO_S_STR)),\ ++ $(depfile_missing),\ ++ $(depfile_newer)),\ ++ $(O_TO_S_DO)) ++ ++# ++# install lib in $(RTE_OUTPUT)/lib ++# ++$(RTE_OUTPUT)/lib/$(LIB): $(LIB) ++ @echo " INSTALL-LIB $(LIB)" ++ @[ -d $(RTE_OUTPUT)/lib ] || mkdir -p $(RTE_OUTPUT)/lib ++ cp -f $(LIB) $(RTE_OUTPUT)/lib ++ ++$(RTE_OUTPUT)/lib/$(LIB_SO): $(LIB_SO) ++ @echo " INSTALL-LIB $(LIB_SO)" ++ @[ -d $(RTE_OUTPUT)/lib ] || mkdir -p $(RTE_OUTPUT)/lib ++ cp -f $(LIB_SO) $(RTE_OUTPUT)/lib ++ ln -s -f $< $(shell echo $@ | sed 's/\.so.*/.so/') ++ ++# ++# Clean all generated files ++# ++.PHONY: clean ++clean: _postclean ++ ++.PHONY: doclean ++doclean: ++ $(Q)rm -rf $(LIB) $(LIB_SO) $(OBJS-all) $(DEPS-all) $(DEPSTMP-all) \ ++ $(CMDS-all) .$(LIB).cmd $(INSTALL-FILES-all) *.pmd.c *.pmd.o ++ $(Q)rm -f $(_BUILD_TARGETS) $(_INSTALL_TARGETS) $(_CLEAN_TARGETS) ++ ++else ++ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),y) ++LIB := $(patsubst %.a,%.so.$(LIBABIVER),$(LIB)) ++ifeq ($(EXTLIB_BUILD),n) ++CPU_LDFLAGS += --version-script=$(SRCDIR)/$(EXPORT_MAP) ++endif ++endif ++ + O_TO_S = $(LD) -L$(RTE_SDK_BIN)/lib $(_CPU_LDFLAGS) $(EXTRA_LDFLAGS) \ + -shared $(OBJS-y) $(NO_UNDEFINED) $(LDLIBS) -Wl,-soname,$(LIB) -o $(LIB) + O_TO_S_STR = $(subst ','\'',$(O_TO_S)) #'# fix syntax highlight +@@ -148,6 +234,8 @@ doclean: + $(CMDS-all) .$(LIB).cmd $(INSTALL-FILES-all) *.pmd.c *.pmd.o + $(Q)rm -f $(_BUILD_TARGETS) $(_INSTALL_TARGETS) $(_CLEAN_TARGETS) + ++endif ++ + include $(RTE_SDK)/mk/internal/rte.compile-post.mk + include $(RTE_SDK)/mk/internal/rte.install-post.mk + include $(RTE_SDK)/mk/internal/rte.clean-post.mk +-- +2.19.1 + diff --git a/CVE-2020-10722.patch b/CVE-2020-10722.patch new file mode 100644 index 0000000000000000000000000000000000000000..15c19d9a9106d6d2bc6459b009ac4d6cb835bb15 --- /dev/null +++ b/CVE-2020-10722.patch @@ -0,0 +1,48 @@ +From 2cf9c470ebff0091e41af85f16ab906fd98cf9af Mon Sep 17 00:00:00 2001 +From: Maxime Coquelin +Date: Tue, 21 Apr 2020 11:16:56 +0200 +Subject: vhost: check log mmap offset and size overflow + +vhost_user_set_log_base() is a message handler that is +called to handle the VHOST_USER_SET_LOG_BASE message. +Its payload contains a 64 bit size and offset. Both are +added up and used as a size when calling mmap(). + +There is no integer overflow check. If an integer overflow +occurs a smaller memory map would be created than +requested. Since the returned mapping is mapped as writable +and used for logging, a memory corruption could occur. + +Fixes: fbc4d248b198 ("vhost: fix offset while mmaping log base address") + +This issue has been assigned CVE-2020-10722 + +Reported-by: Ilja Van Sprundel +Signed-off-by: Maxime Coquelin +Reviewed-by: Xiaolong Ye +Reviewed-by: Ilja Van Sprundel +--- + lib/librte_vhost/vhost_user.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c +index 40c4520..02962fc 100644 +--- a/lib/librte_vhost/vhost_user.c ++++ b/lib/librte_vhost/vhost_user.c +@@ -2060,10 +2060,10 @@ vhost_user_set_log_base(struct virtio_net **pdev, struct VhostUserMsg *msg, + size = msg->payload.log.mmap_size; + off = msg->payload.log.mmap_offset; + +- /* Don't allow mmap_offset to point outside the mmap region */ +- if (off > size) { ++ /* Check for mmap size and offset overflow. */ ++ if (off >= -size) { + RTE_LOG(ERR, VHOST_CONFIG, +- "log offset %#"PRIx64" exceeds log size %#"PRIx64"\n", ++ "log offset %#"PRIx64" and log size %#"PRIx64" overflow\n", + off, size); + return RTE_VHOST_MSG_RESULT_ERR; + } +-- +cgit v1.0 + diff --git a/CVE-2020-10723.patch b/CVE-2020-10723.patch new file mode 100644 index 0000000000000000000000000000000000000000..045a33084cc9bc3a931567b71b42908e9e487670 --- /dev/null +++ b/CVE-2020-10723.patch @@ -0,0 +1,57 @@ +From 8e9652b0b616a3704b5cb5a3dccb2c239e16ab9c Mon Sep 17 00:00:00 2001 +From: Maxime Coquelin +Date: Tue, 21 Apr 2020 18:17:43 +0200 +Subject: vhost: fix vring index check + +vhost_user_check_and_alloc_queue_pair() is used to extract +a vring index from a payload. This function validates the +index and is called early on in when performing message +handling. Most message handlers depend on it correctly +validating the vring index. + +Depending on the message type the vring index is in +different parts of the payload. The function contains a +switch/case for each type and copies the index. This is +stored in a uint16. This index is then validated. Depending +on the message, the source index is an unsigned int. If +integer truncation occurs (uint->uint16) the top 16 bits +of the index are never validated. + +When they are used later on (e.g. in +vhost_user_set_vring_num() or vhost_user_set_vring_addr()) +it can lead to out of bound indexing. The out of bound +indexed data gets written to, and hence this can cause +memory corruption. + +This patch fixes this vulnerability by declaring vring +index as an unsigned int in +vhost_user_check_and_alloc_queue_pair(). + +Fixes: 160cbc815b41 ("vhost: remove a hack on queue allocation") + +This issue has been assigned CVE-2020-10723 + +Reported-by: Ilja Van Sprundel +Signed-off-by: Maxime Coquelin +Reviewed-by: Xiaolong Ye +Reviewed-by: Ilja Van Sprundel +--- + lib/librte_vhost/vhost_user.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c +index 02962fc..d196142 100644 +--- a/lib/librte_vhost/vhost_user.c ++++ b/lib/librte_vhost/vhost_user.c +@@ -2508,7 +2508,7 @@ static int + vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev, + struct VhostUserMsg *msg) + { +- uint16_t vring_idx; ++ uint32_t vring_idx; + + switch (msg->request.master) { + case VHOST_USER_SET_VRING_KICK: +-- +cgit v1.0 + diff --git a/CVE-2020-10724.patch b/CVE-2020-10724.patch new file mode 100644 index 0000000000000000000000000000000000000000..729d4db0fec3e84959ebccb02045c5fe32488dee --- /dev/null +++ b/CVE-2020-10724.patch @@ -0,0 +1,76 @@ +From 963b6eea05f3ee720fcfecd110e20f61b92205d6 Mon Sep 17 00:00:00 2001 +From: Maxime Coquelin +Date: Tue, 21 Apr 2020 19:10:09 +0200 +Subject: vhost/crypto: validate keys lengths + +transform_cipher_param() and transform_chain_param() handle +the payload data for the VHOST_USER_CRYPTO_CREATE_SESS +message. These payloads have to be validated, since it +could come from untrusted sources. + +Two buffers and their lenghts are defined in this payload, +one the the auth key and one for the cipher key. But above +functions do not validate the key length inputs, which could +lead to read out of bounds, as buffers have static sizes of +64 bytes for the cipher key and 512 bytes for the auth key. + +This patch adds necessary checks on the key length field +before being used. + +Fixes: e80a98708166 ("vhost/crypto: add session message handler") + +This issue has been assigned CVE-2020-10724 + +Reported-by: Ilja Van Sprundel +Signed-off-by: Maxime Coquelin +Reviewed-by: Xiaolong Ye +Reviewed-by: Ilja Van Sprundel +--- + lib/librte_vhost/vhost_crypto.c | 17 +++++++++++++++++ + 1 file changed, 17 insertions(+) + +diff --git a/lib/librte_vhost/vhost_crypto.c b/lib/librte_vhost/vhost_crypto.c +index 6891197..07a4115 100644 +--- a/lib/librte_vhost/vhost_crypto.c ++++ b/lib/librte_vhost/vhost_crypto.c +@@ -237,6 +237,11 @@ transform_cipher_param(struct rte_crypto_sym_xform *xform, + if (unlikely(ret < 0)) + return ret; + ++ if (param->cipher_key_len > VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH) { ++ VC_LOG_DBG("Invalid cipher key length\n"); ++ return -VIRTIO_CRYPTO_BADMSG; ++ } ++ + xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER; + xform->cipher.key.length = param->cipher_key_len; + if (xform->cipher.key.length > 0) +@@ -287,6 +292,12 @@ transform_chain_param(struct rte_crypto_sym_xform *xforms, + &xform_cipher->cipher.algo); + if (unlikely(ret < 0)) + return ret; ++ ++ if (param->cipher_key_len > VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH) { ++ VC_LOG_DBG("Invalid cipher key length\n"); ++ return -VIRTIO_CRYPTO_BADMSG; ++ } ++ + xform_cipher->type = RTE_CRYPTO_SYM_XFORM_CIPHER; + xform_cipher->cipher.key.length = param->cipher_key_len; + xform_cipher->cipher.key.data = param->cipher_key_buf; +@@ -301,6 +312,12 @@ transform_chain_param(struct rte_crypto_sym_xform *xforms, + ret = auth_algo_transform(param->hash_algo, &xform_auth->auth.algo); + if (unlikely(ret < 0)) + return ret; ++ ++ if (param->auth_key_len > VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH) { ++ VC_LOG_DBG("Invalid auth key length\n"); ++ return -VIRTIO_CRYPTO_BADMSG; ++ } ++ + xform_auth->auth.digest_length = param->digest_len; + xform_auth->auth.key.length = param->auth_key_len; + xform_auth->auth.key.data = param->auth_key_buf; +-- +cgit v1.0 + diff --git a/CVE-2020-10725.patch b/CVE-2020-10725.patch new file mode 100644 index 0000000000000000000000000000000000000000..a6eba33544b93824c9d0e362757c887583fd7390 --- /dev/null +++ b/CVE-2020-10725.patch @@ -0,0 +1,44 @@ +From cd0ea71bb6a7d1c503bf2f6f1e3c455cf246d9a1 Mon Sep 17 00:00:00 2001 +From: Marvin Liu +Date: Wed, 8 Apr 2020 17:13:55 +0800 +Subject: vhost: fix translated address not checked + +Malicious guest can construct desc with invalid address and zero buffer +length. That will request vhost to check both translated address and +translated data length. This patch will add missed address check. + +Fixes: 75ed51697820 ("vhost: add packed ring batch dequeue") +Fixes: ef861692c398 ("vhost: add packed ring batch enqueue") + +This issue has been assigned CVE-2020-10725 + +Signed-off-by: Marvin Liu +Reviewed-by: Maxime Coquelin +--- + lib/librte_vhost/virtio_net.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c +index ac2842b..33f1025 100644 +--- a/lib/librte_vhost/virtio_net.c ++++ b/lib/librte_vhost/virtio_net.c +@@ -1086,6 +1086,8 @@ virtio_dev_rx_batch_packed(struct virtio_net *dev, + VHOST_ACCESS_RW); + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { ++ if (unlikely(!desc_addrs[i])) ++ return -1; + if (unlikely(lens[i] != descs[avail_idx + i].len)) + return -1; + } +@@ -1841,6 +1843,8 @@ vhost_reserve_avail_batch_packed(struct virtio_net *dev, + } + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { ++ if (unlikely(!desc_addrs[i])) ++ return -1; + if (unlikely((lens[i] != descs[avail_idx + i].len))) + return -1; + } +-- +cgit v1.0 \ No newline at end of file diff --git a/CVE-2020-10726.patch b/CVE-2020-10726.patch new file mode 100644 index 0000000000000000000000000000000000000000..853ef905c5282f816411f738db95470ef51a5490 --- /dev/null +++ b/CVE-2020-10726.patch @@ -0,0 +1,51 @@ +From 95e1f29c26777ee36456e340ed9c2b07472add28 Mon Sep 17 00:00:00 2001 +From: Xiaolong Ye +Date: Wed, 8 Apr 2020 15:31:35 +0800 +Subject: vhost: fix potential memory space leak + +A malicious container which has direct access to the vhost-user socket +can keep sending VHOST_USER_GET_INFLIGHT_FD messages which may cause +leaking resources until resulting a DOS. Fix it by unmapping the +dev->inflight_info->addr before assigning new mapped addr to it. + +Fixes: d87f1a1cb7b6 ("vhost: support inflight info sharing") + +This issue has been assigned CVE-2020-10726 + +Signed-off-by: Xiaolong Ye +Reviewed-by: Maxime Coquelin +--- + lib/librte_vhost/vhost_user.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c +index d196142..2a4ba20 100644 +--- a/lib/librte_vhost/vhost_user.c ++++ b/lib/librte_vhost/vhost_user.c +@@ -1440,6 +1440,11 @@ vhost_user_get_inflight_fd(struct virtio_net **pdev, + } + memset(addr, 0, mmap_size); + ++ if (dev->inflight_info->addr) { ++ munmap(dev->inflight_info->addr, dev->inflight_info->size); ++ dev->inflight_info->addr = NULL; ++ } ++ + dev->inflight_info->addr = addr; + dev->inflight_info->size = msg->payload.inflight.mmap_size = mmap_size; + dev->inflight_info->fd = msg->fds[0] = fd; +@@ -1524,8 +1529,10 @@ vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg, + } + } + +- if (dev->inflight_info->addr) ++ if (dev->inflight_info->addr) { + munmap(dev->inflight_info->addr, dev->inflight_info->size); ++ dev->inflight_info->addr = NULL; ++ } + + addr = mmap(0, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, + fd, mmap_offset); +-- +cgit v1.0 + diff --git a/CVE-2020-14374.patch b/CVE-2020-14374.patch new file mode 100644 index 0000000000000000000000000000000000000000..14bc0d9ae09b6e9b672130118397406e6e44c500 --- /dev/null +++ b/CVE-2020-14374.patch @@ -0,0 +1,40 @@ +From 6a3a414698e45cf01bb1489ef81f4d663a88047b Mon Sep 17 00:00:00 2001 +From: Fan Zhang +Date: Thu, 16 Apr 2020 11:29:06 +0100 +Subject: vhost/crypto: fix data length check + +This patch fixes the incorrect data length check to vhost crypto. +Instead of blindly accepting the descriptor length as data length, the +change compare the request provided data length and descriptor length +first. The security issue CVE-2020-14374 is not fixed alone by this +patch, part of the fix is done through: +"vhost/crypto: fix missed request check for copy mode". + +CVE-2020-14374 +Fixes: 3c79609fda7c ("vhost/crypto: handle virtually non-contiguous buffers") +Cc: stable@dpdk.org + +Signed-off-by: Fan Zhang +Acked-by: Chenbo Xia + +reference:https://git.dpdk.org/dpdk-stable/commit/?h=19.11&id=6a3a414698e4 +Signed-off-by: gaoxingwang +--- + lib/librte_vhost/vhost_crypto.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/librte_vhost/vhost_crypto.c b/lib/librte_vhost/vhost_crypto.c +index f1cc32a..cf9aa25 100644 +--- a/lib/librte_vhost/vhost_crypto.c ++++ b/lib/librte_vhost/vhost_crypto.c +@@ -624,7 +624,7 @@ copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req, + desc = &vc_req->head[desc->next]; + rte_prefetch0(&vc_req->head[desc->next]); + to_copy = RTE_MIN(desc->len, (uint32_t)left); +- dlen = desc->len; ++ dlen = to_copy; + src = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen, + VHOST_ACCESS_RO); + if (unlikely(!src || !dlen)) { +-- +cgit v1.0 diff --git a/CVE-2020-14375.patch b/CVE-2020-14375.patch new file mode 100644 index 0000000000000000000000000000000000000000..02afbfa399239346dbc46ae260490dc6eba5e8c5 --- /dev/null +++ b/CVE-2020-14375.patch @@ -0,0 +1,803 @@ +From e2666ec24535c7ba0fb325d61a753bcabc8bf1ae Mon Sep 17 00:00:00 2001 +From: Fan Zhang +Date: Wed, 9 Sep 2020 09:35:53 +0100 +Subject: vhost/crypto: fix possible TOCTOU attack + +This patch fixes the possible time-of-check to time-of-use (TOCTOU) +attack problem by copying request data and descriptor index to local +variable prior to process. + +Also the original sequential read of descriptors may lead to TOCTOU +attack. This patch fixes the problem by loading all descriptors of a +request to local buffer before processing. + +CVE-2020-14375 +Fixes: 3bb595ecd682 ("vhost/crypto: add request handler") +Cc: stable@dpdk.org + +Signed-off-by: Fan Zhang +Acked-by: Chenbo Xia + +reference:https://git.dpdk.org/dpdk-stable/commit/?h=19.11&id=e2666ec24535 +Signed-off-by: gaoxingwang +--- + lib/librte_vhost/rte_vhost_crypto.h | 2 + + lib/librte_vhost/vhost_crypto.c | 391 ++++++++++++++++++------------------ + 2 files changed, 202 insertions(+), 191 deletions(-) + +diff --git a/lib/librte_vhost/rte_vhost_crypto.h b/lib/librte_vhost/rte_vhost_crypto.h +index 866a592..b54d61d 100644 +--- a/lib/librte_vhost/rte_vhost_crypto.h ++++ b/lib/librte_vhost/rte_vhost_crypto.h +@@ -7,10 +7,12 @@ + + #define VHOST_CRYPTO_MBUF_POOL_SIZE (8192) + #define VHOST_CRYPTO_MAX_BURST_SIZE (64) ++#define VHOST_CRYPTO_MAX_DATA_SIZE (4096) + #define VHOST_CRYPTO_SESSION_MAP_ENTRIES (1024) /**< Max nb sessions */ + /** max nb virtual queues in a burst for finalizing*/ + #define VIRTIO_CRYPTO_MAX_NUM_BURST_VQS (64) + #define VHOST_CRYPTO_MAX_IV_LEN (32) ++#define VHOST_CRYPTO_MAX_N_DESC (32) + + enum rte_vhost_crypto_zero_copy { + RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE = 0, +diff --git a/lib/librte_vhost/vhost_crypto.c b/lib/librte_vhost/vhost_crypto.c +index cf9aa25..e08f9c6 100644 +--- a/lib/librte_vhost/vhost_crypto.c ++++ b/lib/librte_vhost/vhost_crypto.c +@@ -46,6 +46,14 @@ + #define IOVA_TO_VVA(t, r, a, l, p) \ + ((t)(uintptr_t)vhost_iova_to_vva(r->dev, r->vq, a, l, p)) + ++/* ++ * vhost_crypto_desc is used to copy original vring_desc to the local buffer ++ * before processing (except the next index). The copy result will be an ++ * array of vhost_crypto_desc elements that follows the sequence of original ++ * vring_desc.next is arranged. ++ */ ++#define vhost_crypto_desc vring_desc ++ + static int + cipher_algo_transform(uint32_t virtio_cipher_algo, + enum rte_crypto_cipher_algorithm *algo) +@@ -479,83 +487,71 @@ vhost_crypto_msg_post_handler(int vid, void *msg) + return ret; + } + +-static __rte_always_inline struct vring_desc * +-find_write_desc(struct vring_desc *head, struct vring_desc *desc, +- uint32_t *nb_descs, uint32_t vq_size) ++static __rte_always_inline struct vhost_crypto_desc * ++find_write_desc(struct vhost_crypto_desc *head, struct vhost_crypto_desc *desc, ++ uint32_t max_n_descs) + { +- if (desc->flags & VRING_DESC_F_WRITE) +- return desc; +- +- while (desc->flags & VRING_DESC_F_NEXT) { +- if (unlikely(*nb_descs == 0 || desc->next >= vq_size)) +- return NULL; +- (*nb_descs)--; ++ if (desc < head) ++ return NULL; + +- desc = &head[desc->next]; ++ while (desc - head < (int)max_n_descs) { + if (desc->flags & VRING_DESC_F_WRITE) + return desc; ++ desc++; + } + + return NULL; + } + +-static struct virtio_crypto_inhdr * +-reach_inhdr(struct vhost_crypto_data_req *vc_req, struct vring_desc *desc, +- uint32_t *nb_descs, uint32_t vq_size) ++static __rte_always_inline struct virtio_crypto_inhdr * ++reach_inhdr(struct vhost_crypto_data_req *vc_req, ++ struct vhost_crypto_desc *head, ++ uint32_t max_n_descs) + { +- uint64_t dlen; + struct virtio_crypto_inhdr *inhdr; ++ struct vhost_crypto_desc *last = head + (max_n_descs - 1); ++ uint64_t dlen = last->len; + +- while (desc->flags & VRING_DESC_F_NEXT) { +- if (unlikely(*nb_descs == 0 || desc->next >= vq_size)) +- return NULL; +- (*nb_descs)--; +- desc = &vc_req->head[desc->next]; +- } ++ if (unlikely(dlen != sizeof(*inhdr))) ++ return NULL; + +- dlen = desc->len; +- inhdr = IOVA_TO_VVA(struct virtio_crypto_inhdr *, vc_req, desc->addr, ++ inhdr = IOVA_TO_VVA(struct virtio_crypto_inhdr *, vc_req, last->addr, + &dlen, VHOST_ACCESS_WO); +- if (unlikely(!inhdr || dlen != desc->len)) ++ if (unlikely(!inhdr || dlen != last->len)) + return NULL; + + return inhdr; + } + + static __rte_always_inline int +-move_desc(struct vring_desc *head, struct vring_desc **cur_desc, +- uint32_t size, uint32_t *nb_descs, uint32_t vq_size) ++move_desc(struct vhost_crypto_desc *head, ++ struct vhost_crypto_desc **cur_desc, ++ uint32_t size, uint32_t max_n_descs) + { +- struct vring_desc *desc = *cur_desc; ++ struct vhost_crypto_desc *desc = *cur_desc; + int left = size - desc->len; + +- while ((desc->flags & VRING_DESC_F_NEXT) && left > 0) { +- if (unlikely(*nb_descs == 0 || desc->next >= vq_size)) +- return -1; +- +- desc = &head[desc->next]; +- rte_prefetch0(&head[desc->next]); ++ while (desc->flags & VRING_DESC_F_NEXT && left > 0 && ++ desc >= head && ++ desc - head < (int)max_n_descs) { ++ desc++; + left -= desc->len; +- if (left > 0) +- (*nb_descs)--; + } + + if (unlikely(left > 0)) + return -1; + +- if (unlikely(*nb_descs == 0)) ++ if (unlikely(head - desc == (int)max_n_descs)) + *cur_desc = NULL; +- else { +- if (unlikely(desc->next >= vq_size)) +- return -1; +- *cur_desc = &head[desc->next]; +- } ++ else ++ *cur_desc = desc + 1; + + return 0; + } + + static __rte_always_inline void * +-get_data_ptr(struct vhost_crypto_data_req *vc_req, struct vring_desc *cur_desc, ++get_data_ptr(struct vhost_crypto_data_req *vc_req, ++ struct vhost_crypto_desc *cur_desc, + uint8_t perm) + { + void *data; +@@ -570,12 +566,13 @@ get_data_ptr(struct vhost_crypto_data_req *vc_req, struct vring_desc *cur_desc, + return data; + } + +-static int ++static __rte_always_inline int + copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req, +- struct vring_desc **cur_desc, uint32_t size, +- uint32_t *nb_descs, uint32_t vq_size) ++ struct vhost_crypto_desc *head, ++ struct vhost_crypto_desc **cur_desc, ++ uint32_t size, uint32_t max_n_descs) + { +- struct vring_desc *desc = *cur_desc; ++ struct vhost_crypto_desc *desc = *cur_desc; + uint64_t remain, addr, dlen, len; + uint32_t to_copy; + uint8_t *data = dst_data; +@@ -614,15 +611,8 @@ copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req, + + left -= to_copy; + +- while ((desc->flags & VRING_DESC_F_NEXT) && left > 0) { +- if (unlikely(*nb_descs == 0 || desc->next >= vq_size)) { +- VC_LOG_ERR("Invalid descriptors"); +- return -1; +- } +- (*nb_descs)--; +- +- desc = &vc_req->head[desc->next]; +- rte_prefetch0(&vc_req->head[desc->next]); ++ while (desc >= head && desc - head < (int)max_n_descs && left) { ++ desc++; + to_copy = RTE_MIN(desc->len, (uint32_t)left); + dlen = to_copy; + src = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen, +@@ -663,13 +653,10 @@ copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req, + return -1; + } + +- if (unlikely(*nb_descs == 0)) ++ if (unlikely(desc - head == (int)max_n_descs)) + *cur_desc = NULL; +- else { +- if (unlikely(desc->next >= vq_size)) +- return -1; +- *cur_desc = &vc_req->head[desc->next]; +- } ++ else ++ *cur_desc = desc + 1; + + return 0; + } +@@ -681,6 +668,7 @@ write_back_data(struct vhost_crypto_data_req *vc_req) + + while (wb_data) { + rte_memcpy(wb_data->dst, wb_data->src, wb_data->len); ++ memset(wb_data->src, 0, wb_data->len); + wb_last = wb_data; + wb_data = wb_data->next; + rte_mempool_put(vc_req->wb_pool, wb_last); +@@ -722,17 +710,18 @@ free_wb_data(struct vhost_crypto_writeback_data *wb_data, + * @return + * The pointer to the start of the write back data linked list. + */ +-static struct vhost_crypto_writeback_data * ++static __rte_always_inline struct vhost_crypto_writeback_data * + prepare_write_back_data(struct vhost_crypto_data_req *vc_req, +- struct vring_desc **cur_desc, ++ struct vhost_crypto_desc *head_desc, ++ struct vhost_crypto_desc **cur_desc, + struct vhost_crypto_writeback_data **end_wb_data, + uint8_t *src, + uint32_t offset, + uint64_t write_back_len, +- uint32_t *nb_descs, uint32_t vq_size) ++ uint32_t max_n_descs) + { + struct vhost_crypto_writeback_data *wb_data, *head; +- struct vring_desc *desc = *cur_desc; ++ struct vhost_crypto_desc *desc = *cur_desc; + uint64_t dlen; + uint8_t *dst; + int ret; +@@ -775,14 +764,10 @@ prepare_write_back_data(struct vhost_crypto_data_req *vc_req, + } else + offset -= desc->len; + +- while (write_back_len) { +- if (unlikely(*nb_descs == 0 || desc->next >= vq_size)) { +- VC_LOG_ERR("Invalid descriptors"); +- goto error_exit; +- } +- (*nb_descs)--; +- +- desc = &vc_req->head[desc->next]; ++ while (write_back_len && ++ desc >= head_desc && ++ desc - head_desc < (int)max_n_descs) { ++ desc++; + if (unlikely(!(desc->flags & VRING_DESC_F_WRITE))) { + VC_LOG_ERR("incorrect descriptor"); + goto error_exit; +@@ -821,13 +806,10 @@ prepare_write_back_data(struct vhost_crypto_data_req *vc_req, + wb_data->next = NULL; + } + +- if (unlikely(*nb_descs == 0)) ++ if (unlikely(desc - head_desc == (int)max_n_descs)) + *cur_desc = NULL; +- else { +- if (unlikely(desc->next >= vq_size)) +- goto error_exit; +- *cur_desc = &vc_req->head[desc->next]; +- } ++ else ++ *cur_desc = desc + 1; + + *end_wb_data = wb_data; + +@@ -851,14 +833,14 @@ vhost_crypto_check_cipher_request(struct virtio_crypto_cipher_data_req *req) + return VIRTIO_CRYPTO_BADMSG; + } + +-static uint8_t ++static __rte_always_inline uint8_t + prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + struct vhost_crypto_data_req *vc_req, + struct virtio_crypto_cipher_data_req *cipher, +- struct vring_desc *cur_desc, +- uint32_t *nb_descs, uint32_t vq_size) ++ struct vhost_crypto_desc *head, ++ uint32_t max_n_descs) + { +- struct vring_desc *desc = cur_desc; ++ struct vhost_crypto_desc *desc = head; + struct vhost_crypto_writeback_data *ewb = NULL; + struct rte_mbuf *m_src = op->sym->m_src, *m_dst = op->sym->m_dst; + uint8_t *iv_data = rte_crypto_op_ctod_offset(op, uint8_t *, IV_OFFSET); +@@ -869,8 +851,8 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + + /* prepare */ + /* iv */ +- if (unlikely(copy_data(iv_data, vc_req, &desc, cipher->para.iv_len, +- nb_descs, vq_size) < 0)) { ++ if (unlikely(copy_data(iv_data, vc_req, head, &desc, ++ cipher->para.iv_len, max_n_descs))) { + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } +@@ -888,9 +870,8 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + goto error_exit; + } + +- if (unlikely(move_desc(vc_req->head, &desc, +- cipher->para.src_data_len, nb_descs, +- vq_size) < 0)) { ++ if (unlikely(move_desc(head, &desc, cipher->para.src_data_len, ++ max_n_descs) < 0)) { + VC_LOG_ERR("Incorrect descriptor"); + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; +@@ -901,8 +882,8 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + vc_req->wb_pool = vcrypto->wb_pool; + m_src->data_len = cipher->para.src_data_len; + if (unlikely(copy_data(rte_pktmbuf_mtod(m_src, uint8_t *), +- vc_req, &desc, cipher->para.src_data_len, +- nb_descs, vq_size) < 0)) { ++ vc_req, head, &desc, cipher->para.src_data_len, ++ max_n_descs) < 0)) { + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } +@@ -913,7 +894,7 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + } + + /* dst */ +- desc = find_write_desc(vc_req->head, desc, nb_descs, vq_size); ++ desc = find_write_desc(head, desc, max_n_descs); + if (unlikely(!desc)) { + VC_LOG_ERR("Cannot find write location"); + ret = VIRTIO_CRYPTO_BADMSG; +@@ -931,9 +912,8 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + goto error_exit; + } + +- if (unlikely(move_desc(vc_req->head, &desc, +- cipher->para.dst_data_len, +- nb_descs, vq_size) < 0)) { ++ if (unlikely(move_desc(head, &desc, cipher->para.dst_data_len, ++ max_n_descs) < 0)) { + VC_LOG_ERR("Incorrect descriptor"); + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; +@@ -942,9 +922,9 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + m_dst->data_len = cipher->para.dst_data_len; + break; + case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE: +- vc_req->wb = prepare_write_back_data(vc_req, &desc, &ewb, ++ vc_req->wb = prepare_write_back_data(vc_req, head, &desc, &ewb, + rte_pktmbuf_mtod(m_src, uint8_t *), 0, +- cipher->para.dst_data_len, nb_descs, vq_size); ++ cipher->para.dst_data_len, max_n_descs); + if (unlikely(vc_req->wb == NULL)) { + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; +@@ -986,33 +966,33 @@ static __rte_always_inline uint8_t + vhost_crypto_check_chain_request(struct virtio_crypto_alg_chain_data_req *req) + { + if (likely((req->para.iv_len <= VHOST_CRYPTO_MAX_IV_LEN) && +- (req->para.src_data_len <= RTE_MBUF_DEFAULT_DATAROOM) && ++ (req->para.src_data_len <= VHOST_CRYPTO_MAX_DATA_SIZE) && + (req->para.dst_data_len >= req->para.src_data_len) && +- (req->para.dst_data_len <= RTE_MBUF_DEFAULT_DATAROOM) && ++ (req->para.dst_data_len <= VHOST_CRYPTO_MAX_DATA_SIZE) && + (req->para.cipher_start_src_offset < +- RTE_MBUF_DEFAULT_DATAROOM) && +- (req->para.len_to_cipher < RTE_MBUF_DEFAULT_DATAROOM) && ++ VHOST_CRYPTO_MAX_DATA_SIZE) && ++ (req->para.len_to_cipher <= VHOST_CRYPTO_MAX_DATA_SIZE) && + (req->para.hash_start_src_offset < +- RTE_MBUF_DEFAULT_DATAROOM) && +- (req->para.len_to_hash < RTE_MBUF_DEFAULT_DATAROOM) && ++ VHOST_CRYPTO_MAX_DATA_SIZE) && ++ (req->para.len_to_hash <= VHOST_CRYPTO_MAX_DATA_SIZE) && + (req->para.cipher_start_src_offset + req->para.len_to_cipher <= + req->para.src_data_len) && + (req->para.hash_start_src_offset + req->para.len_to_hash <= + req->para.src_data_len) && + (req->para.dst_data_len + req->para.hash_result_len <= +- RTE_MBUF_DEFAULT_DATAROOM))) ++ VHOST_CRYPTO_MAX_DATA_SIZE))) + return VIRTIO_CRYPTO_OK; + return VIRTIO_CRYPTO_BADMSG; + } + +-static uint8_t ++static __rte_always_inline uint8_t + prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + struct vhost_crypto_data_req *vc_req, + struct virtio_crypto_alg_chain_data_req *chain, +- struct vring_desc *cur_desc, +- uint32_t *nb_descs, uint32_t vq_size) ++ struct vhost_crypto_desc *head, ++ uint32_t max_n_descs) + { +- struct vring_desc *desc = cur_desc, *digest_desc; ++ struct vhost_crypto_desc *desc = head, *digest_desc; + struct vhost_crypto_writeback_data *ewb = NULL, *ewb2 = NULL; + struct rte_mbuf *m_src = op->sym->m_src, *m_dst = op->sym->m_dst; + uint8_t *iv_data = rte_crypto_op_ctod_offset(op, uint8_t *, IV_OFFSET); +@@ -1025,8 +1005,8 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + + /* prepare */ + /* iv */ +- if (unlikely(copy_data(iv_data, vc_req, &desc, +- chain->para.iv_len, nb_descs, vq_size) < 0)) { ++ if (unlikely(copy_data(iv_data, vc_req, head, &desc, ++ chain->para.iv_len, max_n_descs) < 0)) { + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } +@@ -1045,9 +1025,8 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + goto error_exit; + } + +- if (unlikely(move_desc(vc_req->head, &desc, +- chain->para.src_data_len, +- nb_descs, vq_size) < 0)) { ++ if (unlikely(move_desc(head, &desc, chain->para.src_data_len, ++ max_n_descs) < 0)) { + VC_LOG_ERR("Incorrect descriptor"); + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; +@@ -1057,8 +1036,8 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + vc_req->wb_pool = vcrypto->wb_pool; + m_src->data_len = chain->para.src_data_len; + if (unlikely(copy_data(rte_pktmbuf_mtod(m_src, uint8_t *), +- vc_req, &desc, chain->para.src_data_len, +- nb_descs, vq_size) < 0)) { ++ vc_req, head, &desc, chain->para.src_data_len, ++ max_n_descs) < 0)) { + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } +@@ -1070,7 +1049,7 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + } + + /* dst */ +- desc = find_write_desc(vc_req->head, desc, nb_descs, vq_size); ++ desc = find_write_desc(head, desc, max_n_descs); + if (unlikely(!desc)) { + VC_LOG_ERR("Cannot find write location"); + ret = VIRTIO_CRYPTO_BADMSG; +@@ -1089,8 +1068,7 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + } + + if (unlikely(move_desc(vc_req->head, &desc, +- chain->para.dst_data_len, +- nb_descs, vq_size) < 0)) { ++ chain->para.dst_data_len, max_n_descs) < 0)) { + VC_LOG_ERR("Incorrect descriptor"); + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; +@@ -1106,9 +1084,9 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + goto error_exit; + } + +- if (unlikely(move_desc(vc_req->head, &desc, ++ if (unlikely(move_desc(head, &desc, + chain->para.hash_result_len, +- nb_descs, vq_size) < 0)) { ++ max_n_descs) < 0)) { + VC_LOG_ERR("Incorrect descriptor"); + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; +@@ -1116,34 +1094,34 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + + break; + case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE: +- vc_req->wb = prepare_write_back_data(vc_req, &desc, &ewb, ++ vc_req->wb = prepare_write_back_data(vc_req, head, &desc, &ewb, + rte_pktmbuf_mtod(m_src, uint8_t *), + chain->para.cipher_start_src_offset, + chain->para.dst_data_len - +- chain->para.cipher_start_src_offset, +- nb_descs, vq_size); ++ chain->para.cipher_start_src_offset, ++ max_n_descs); + if (unlikely(vc_req->wb == NULL)) { + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; + } + ++ digest_desc = desc; + digest_offset = m_src->data_len; + digest_addr = rte_pktmbuf_mtod_offset(m_src, void *, + digest_offset); +- digest_desc = desc; + + /** create a wb_data for digest */ +- ewb->next = prepare_write_back_data(vc_req, &desc, &ewb2, +- digest_addr, 0, chain->para.hash_result_len, +- nb_descs, vq_size); ++ ewb->next = prepare_write_back_data(vc_req, head, &desc, ++ &ewb2, digest_addr, 0, ++ chain->para.hash_result_len, max_n_descs); + if (unlikely(ewb->next == NULL)) { + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; + } + +- if (unlikely(copy_data(digest_addr, vc_req, &digest_desc, ++ if (unlikely(copy_data(digest_addr, vc_req, head, &digest_desc, + chain->para.hash_result_len, +- nb_descs, vq_size) < 0)) { ++ max_n_descs) < 0)) { + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } +@@ -1193,74 +1171,103 @@ error_exit: + static __rte_always_inline int + vhost_crypto_process_one_req(struct vhost_crypto *vcrypto, + struct vhost_virtqueue *vq, struct rte_crypto_op *op, +- struct vring_desc *head, uint16_t desc_idx) ++ struct vring_desc *head, struct vhost_crypto_desc *descs, ++ uint16_t desc_idx) + { + struct vhost_crypto_data_req *vc_req = rte_mbuf_to_priv(op->sym->m_src); + struct rte_cryptodev_sym_session *session; +- struct virtio_crypto_op_data_req *req, tmp_req; ++ struct virtio_crypto_op_data_req req; + struct virtio_crypto_inhdr *inhdr; +- struct vring_desc *desc = NULL; ++ struct vhost_crypto_desc *desc = descs; ++ struct vring_desc *src_desc; + uint64_t session_id; + uint64_t dlen; +- uint32_t nb_descs = vq->size; +- int err = 0; ++ uint32_t nb_descs = 0, max_n_descs, i; ++ int err; + + vc_req->desc_idx = desc_idx; + vc_req->dev = vcrypto->dev; + vc_req->vq = vq; + +- if (likely(head->flags & VRING_DESC_F_INDIRECT)) { +- dlen = head->len; +- nb_descs = dlen / sizeof(struct vring_desc); +- /* drop invalid descriptors */ +- if (unlikely(nb_descs > vq->size)) +- return -1; +- desc = IOVA_TO_VVA(struct vring_desc *, vc_req, head->addr, +- &dlen, VHOST_ACCESS_RO); +- if (unlikely(!desc || dlen != head->len)) +- return -1; +- desc_idx = 0; +- head = desc; +- } else { +- desc = head; ++ if (unlikely((head->flags & VRING_DESC_F_INDIRECT) == 0)) { ++ VC_LOG_ERR("Invalid descriptor"); ++ return -1; + } + +- vc_req->head = head; +- vc_req->zero_copy = vcrypto->option; ++ dlen = head->len; ++ src_desc = IOVA_TO_VVA(struct vring_desc *, vc_req, head->addr, ++ &dlen, VHOST_ACCESS_RO); ++ if (unlikely(!src_desc || dlen != head->len)) { ++ VC_LOG_ERR("Invalid descriptor"); ++ return -1; ++ } ++ head = src_desc; + +- req = get_data_ptr(vc_req, desc, VHOST_ACCESS_RO); +- if (unlikely(req == NULL)) { +- switch (vcrypto->option) { +- case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE: +- err = VIRTIO_CRYPTO_BADMSG; +- VC_LOG_ERR("Invalid descriptor"); +- goto error_exit; +- case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE: +- req = &tmp_req; +- if (unlikely(copy_data(req, vc_req, &desc, sizeof(*req), +- &nb_descs, vq->size) < 0)) { +- err = VIRTIO_CRYPTO_BADMSG; +- VC_LOG_ERR("Invalid descriptor"); +- goto error_exit; ++ nb_descs = max_n_descs = dlen / sizeof(struct vring_desc); ++ if (unlikely(nb_descs > VHOST_CRYPTO_MAX_N_DESC || nb_descs == 0)) { ++ err = VIRTIO_CRYPTO_ERR; ++ VC_LOG_ERR("Cannot process num of descriptors %u", nb_descs); ++ if (nb_descs > 0) { ++ struct vring_desc *inhdr_desc = head; ++ while (inhdr_desc->flags & VRING_DESC_F_NEXT) { ++ if (inhdr_desc->next >= max_n_descs) ++ return -1; ++ inhdr_desc = &head[inhdr_desc->next]; + } +- break; +- default: +- err = VIRTIO_CRYPTO_ERR; +- VC_LOG_ERR("Invalid option"); +- goto error_exit; ++ if (inhdr_desc->len != sizeof(*inhdr)) ++ return -1; ++ inhdr = IOVA_TO_VVA(struct virtio_crypto_inhdr *, ++ vc_req, inhdr_desc->addr, &dlen, ++ VHOST_ACCESS_WO); ++ if (unlikely(!inhdr || dlen != inhdr_desc->len)) ++ return -1; ++ inhdr->status = VIRTIO_CRYPTO_ERR; ++ return -1; + } +- } else { +- if (unlikely(move_desc(vc_req->head, &desc, +- sizeof(*req), &nb_descs, vq->size) < 0)) { +- VC_LOG_ERR("Incorrect descriptor"); ++ } ++ ++ /* copy descriptors to local variable */ ++ for (i = 0; i < max_n_descs; i++) { ++ desc->addr = src_desc->addr; ++ desc->len = src_desc->len; ++ desc->flags = src_desc->flags; ++ desc++; ++ if (unlikely((src_desc->flags & VRING_DESC_F_NEXT) == 0)) ++ break; ++ if (unlikely(src_desc->next >= max_n_descs)) { ++ err = VIRTIO_CRYPTO_BADMSG; ++ VC_LOG_ERR("Invalid descriptor"); + goto error_exit; + } ++ src_desc = &head[src_desc->next]; ++ } ++ ++ vc_req->head = head; ++ vc_req->zero_copy = vcrypto->option; ++ ++ nb_descs = desc - descs; ++ desc = descs; ++ ++ if (unlikely(desc->len < sizeof(req))) { ++ err = VIRTIO_CRYPTO_BADMSG; ++ VC_LOG_ERR("Invalid descriptor"); ++ goto error_exit; + } + +- switch (req->header.opcode) { ++ if (unlikely(copy_data(&req, vc_req, descs, &desc, sizeof(req), ++ max_n_descs) < 0)) { ++ err = VIRTIO_CRYPTO_BADMSG; ++ VC_LOG_ERR("Invalid descriptor"); ++ goto error_exit; ++ } ++ ++ /* desc is advanced by 1 now */ ++ max_n_descs -= 1; ++ ++ switch (req.header.opcode) { + case VIRTIO_CRYPTO_CIPHER_ENCRYPT: + case VIRTIO_CRYPTO_CIPHER_DECRYPT: +- session_id = req->header.session_id; ++ session_id = req.header.session_id; + + /* one branch to avoid unnecessary table lookup */ + if (vcrypto->cache_session_id != session_id) { +@@ -1286,19 +1293,19 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto, + goto error_exit; + } + +- switch (req->u.sym_req.op_type) { ++ switch (req.u.sym_req.op_type) { + case VIRTIO_CRYPTO_SYM_OP_NONE: + err = VIRTIO_CRYPTO_NOTSUPP; + break; + case VIRTIO_CRYPTO_SYM_OP_CIPHER: + err = prepare_sym_cipher_op(vcrypto, op, vc_req, +- &req->u.sym_req.u.cipher, desc, +- &nb_descs, vq->size); ++ &req.u.sym_req.u.cipher, desc, ++ max_n_descs); + break; + case VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING: + err = prepare_sym_chain_op(vcrypto, op, vc_req, +- &req->u.sym_req.u.chain, desc, +- &nb_descs, vq->size); ++ &req.u.sym_req.u.chain, desc, ++ max_n_descs); + break; + } + if (unlikely(err != 0)) { +@@ -1307,8 +1314,9 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto, + } + break; + default: ++ err = VIRTIO_CRYPTO_ERR; + VC_LOG_ERR("Unsupported symmetric crypto request type %u", +- req->header.opcode); ++ req.header.opcode); + goto error_exit; + } + +@@ -1316,7 +1324,7 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto, + + error_exit: + +- inhdr = reach_inhdr(vc_req, desc, &nb_descs, vq->size); ++ inhdr = reach_inhdr(vc_req, descs, max_n_descs); + if (likely(inhdr != NULL)) + inhdr->status = (uint8_t)err; + +@@ -1330,17 +1338,16 @@ vhost_crypto_finalize_one_request(struct rte_crypto_op *op, + struct rte_mbuf *m_src = op->sym->m_src; + struct rte_mbuf *m_dst = op->sym->m_dst; + struct vhost_crypto_data_req *vc_req = rte_mbuf_to_priv(m_src); +- uint16_t desc_idx; ++ struct vhost_virtqueue *vq = vc_req->vq; ++ uint16_t used_idx = vc_req->desc_idx, desc_idx; + + if (unlikely(!vc_req)) { + VC_LOG_ERR("Failed to retrieve vc_req"); + return NULL; + } + +- if (old_vq && (vc_req->vq != old_vq)) +- return vc_req->vq; +- +- desc_idx = vc_req->desc_idx; ++ if (old_vq && (vq != old_vq)) ++ return vq; + + if (unlikely(op->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) + vc_req->inhdr->status = VIRTIO_CRYPTO_ERR; +@@ -1349,8 +1356,9 @@ vhost_crypto_finalize_one_request(struct rte_crypto_op *op, + write_back_data(vc_req); + } + +- vc_req->vq->used->ring[desc_idx].id = desc_idx; +- vc_req->vq->used->ring[desc_idx].len = vc_req->len; ++ desc_idx = vq->avail->ring[used_idx]; ++ vq->used->ring[desc_idx].id = vq->avail->ring[desc_idx]; ++ vq->used->ring[desc_idx].len = vc_req->len; + + rte_mempool_put(m_src->pool, (void *)m_src); + +@@ -1448,7 +1456,7 @@ rte_vhost_crypto_create(int vid, uint8_t cryptodev_id, + vcrypto->mbuf_pool = rte_pktmbuf_pool_create(name, + VHOST_CRYPTO_MBUF_POOL_SIZE, 512, + sizeof(struct vhost_crypto_data_req), +- RTE_MBUF_DEFAULT_DATAROOM * 2 + RTE_PKTMBUF_HEADROOM, ++ VHOST_CRYPTO_MAX_DATA_SIZE + RTE_PKTMBUF_HEADROOM, + rte_socket_id()); + if (!vcrypto->mbuf_pool) { + VC_LOG_ERR("Failed to creath mbuf pool"); +@@ -1574,6 +1582,7 @@ rte_vhost_crypto_fetch_requests(int vid, uint32_t qid, + struct rte_crypto_op **ops, uint16_t nb_ops) + { + struct rte_mbuf *mbufs[VHOST_CRYPTO_MAX_BURST_SIZE * 2]; ++ struct vhost_crypto_desc descs[VHOST_CRYPTO_MAX_N_DESC]; + struct virtio_net *dev = get_device(vid); + struct vhost_crypto *vcrypto; + struct vhost_virtqueue *vq; +@@ -1632,7 +1641,7 @@ rte_vhost_crypto_fetch_requests(int vid, uint32_t qid, + op->sym->m_dst->data_off = 0; + + if (unlikely(vhost_crypto_process_one_req(vcrypto, vq, +- op, head, desc_idx) < 0)) ++ op, head, descs, used_idx) < 0)) + break; + } + +@@ -1661,7 +1670,7 @@ rte_vhost_crypto_fetch_requests(int vid, uint32_t qid, + op->sym->m_src->data_off = 0; + + if (unlikely(vhost_crypto_process_one_req(vcrypto, vq, +- op, head, desc_idx) < 0)) ++ op, head, descs, desc_idx) < 0)) + break; + } + +-- +cgit v1.0 diff --git a/CVE-2020-14376-CVE-2020-14377.patch b/CVE-2020-14376-CVE-2020-14377.patch new file mode 100644 index 0000000000000000000000000000000000000000..c28e251ccea18adf55a08f8660e15d6b970848f6 --- /dev/null +++ b/CVE-2020-14376-CVE-2020-14377.patch @@ -0,0 +1,163 @@ +From e4a7c14f02480a41992414afb5e011f8ff8f02f3 Mon Sep 17 00:00:00 2001 +From: Fan Zhang +Date: Tue, 14 Apr 2020 17:26:48 +0100 +Subject: vhost/crypto: fix missed request check for copy mode + +This patch fixes the missed request check to vhost crypto +copy mode. + +CVE-2020-14376 +CVE-2020-14377 +Fixes: 3bb595ecd682 ("vhost/crypto: add request handler") +Cc: stable@dpdk.org + +Signed-off-by: Fan Zhang +Acked-by: Chenbo Xia + +reference:https://git.dpdk.org/dpdk-stable/commit/?h=19.11&id=e4a7c14f0248 +Signed-off-by: gaoxingwang +--- + lib/librte_vhost/vhost_crypto.c | 68 ++++++++++++++++++++++++++++------------- + 1 file changed, 47 insertions(+), 21 deletions(-) + +diff --git a/lib/librte_vhost/vhost_crypto.c b/lib/librte_vhost/vhost_crypto.c +index 86747dd..494f490 100644 +--- a/lib/librte_vhost/vhost_crypto.c ++++ b/lib/librte_vhost/vhost_crypto.c +@@ -756,7 +756,7 @@ prepare_write_back_data(struct vhost_crypto_data_req *vc_req, + } + + wb_data->dst = dst; +- wb_data->len = desc->len - offset; ++ wb_data->len = RTE_MIN(desc->len - offset, write_back_len); + write_back_len -= wb_data->len; + src += offset + wb_data->len; + offset = 0; +@@ -840,6 +840,17 @@ error_exit: + return NULL; + } + ++static __rte_always_inline uint8_t ++vhost_crypto_check_cipher_request(struct virtio_crypto_cipher_data_req *req) ++{ ++ if (likely((req->para.iv_len <= VHOST_CRYPTO_MAX_IV_LEN) && ++ (req->para.src_data_len <= RTE_MBUF_DEFAULT_BUF_SIZE) && ++ (req->para.dst_data_len >= req->para.src_data_len) && ++ (req->para.dst_data_len <= RTE_MBUF_DEFAULT_BUF_SIZE))) ++ return VIRTIO_CRYPTO_OK; ++ return VIRTIO_CRYPTO_BADMSG; ++} ++ + static uint8_t + prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + struct vhost_crypto_data_req *vc_req, +@@ -851,7 +862,10 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + struct vhost_crypto_writeback_data *ewb = NULL; + struct rte_mbuf *m_src = op->sym->m_src, *m_dst = op->sym->m_dst; + uint8_t *iv_data = rte_crypto_op_ctod_offset(op, uint8_t *, IV_OFFSET); +- uint8_t ret = 0; ++ uint8_t ret = vhost_crypto_check_cipher_request(cipher); ++ ++ if (unlikely(ret != VIRTIO_CRYPTO_OK)) ++ goto error_exit; + + /* prepare */ + /* iv */ +@@ -861,10 +875,9 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + goto error_exit; + } + +- m_src->data_len = cipher->para.src_data_len; +- + switch (vcrypto->option) { + case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE: ++ m_src->data_len = cipher->para.src_data_len; + m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr, + cipher->para.src_data_len); + m_src->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RO); +@@ -886,13 +899,7 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + break; + case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE: + vc_req->wb_pool = vcrypto->wb_pool; +- +- if (unlikely(cipher->para.src_data_len > +- RTE_MBUF_DEFAULT_BUF_SIZE)) { +- VC_LOG_ERR("Not enough space to do data copy"); +- ret = VIRTIO_CRYPTO_ERR; +- goto error_exit; +- } ++ m_src->data_len = cipher->para.src_data_len; + if (unlikely(copy_data(rte_pktmbuf_mtod(m_src, uint8_t *), + vc_req, &desc, cipher->para.src_data_len, + nb_descs, vq_size) < 0)) { +@@ -975,6 +982,29 @@ error_exit: + return ret; + } + ++static __rte_always_inline uint8_t ++vhost_crypto_check_chain_request(struct virtio_crypto_alg_chain_data_req *req) ++{ ++ if (likely((req->para.iv_len <= VHOST_CRYPTO_MAX_IV_LEN) && ++ (req->para.src_data_len <= RTE_MBUF_DEFAULT_DATAROOM) && ++ (req->para.dst_data_len >= req->para.src_data_len) && ++ (req->para.dst_data_len <= RTE_MBUF_DEFAULT_DATAROOM) && ++ (req->para.cipher_start_src_offset < ++ RTE_MBUF_DEFAULT_DATAROOM) && ++ (req->para.len_to_cipher < RTE_MBUF_DEFAULT_DATAROOM) && ++ (req->para.hash_start_src_offset < ++ RTE_MBUF_DEFAULT_DATAROOM) && ++ (req->para.len_to_hash < RTE_MBUF_DEFAULT_DATAROOM) && ++ (req->para.cipher_start_src_offset + req->para.len_to_cipher <= ++ req->para.src_data_len) && ++ (req->para.hash_start_src_offset + req->para.len_to_hash <= ++ req->para.src_data_len) && ++ (req->para.dst_data_len + req->para.hash_result_len <= ++ RTE_MBUF_DEFAULT_DATAROOM))) ++ return VIRTIO_CRYPTO_OK; ++ return VIRTIO_CRYPTO_BADMSG; ++} ++ + static uint8_t + prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + struct vhost_crypto_data_req *vc_req, +@@ -988,7 +1018,10 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + uint8_t *iv_data = rte_crypto_op_ctod_offset(op, uint8_t *, IV_OFFSET); + uint32_t digest_offset; + void *digest_addr; +- uint8_t ret = 0; ++ uint8_t ret = vhost_crypto_check_chain_request(chain); ++ ++ if (unlikely(ret != VIRTIO_CRYPTO_OK)) ++ goto error_exit; + + /* prepare */ + /* iv */ +@@ -998,10 +1031,9 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + goto error_exit; + } + +- m_src->data_len = chain->para.src_data_len; +- + switch (vcrypto->option) { + case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE: ++ m_src->data_len = chain->para.src_data_len; + m_dst->data_len = chain->para.dst_data_len; + + m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr, +@@ -1023,13 +1055,7 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + break; + case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE: + vc_req->wb_pool = vcrypto->wb_pool; +- +- if (unlikely(chain->para.src_data_len > +- RTE_MBUF_DEFAULT_BUF_SIZE)) { +- VC_LOG_ERR("Not enough space to do data copy"); +- ret = VIRTIO_CRYPTO_ERR; +- goto error_exit; +- } ++ m_src->data_len = chain->para.src_data_len; + if (unlikely(copy_data(rte_pktmbuf_mtod(m_src, uint8_t *), + vc_req, &desc, chain->para.src_data_len, + nb_descs, vq_size) < 0)) { +-- +cgit v1.0 diff --git a/CVE-2020-14378.patch b/CVE-2020-14378.patch new file mode 100644 index 0000000000000000000000000000000000000000..902dc04e5bacde16ff9bf3a1ee7aff4206afdd96 --- /dev/null +++ b/CVE-2020-14378.patch @@ -0,0 +1,42 @@ +From 81e9694830209207cbba599b62858c97c3ed5cfe Mon Sep 17 00:00:00 2001 +From: Fan Zhang +Date: Tue, 14 Apr 2020 16:52:47 +0100 +Subject: vhost/crypto: fix incorrect descriptor deduction + +This patch fixes the incorrect descriptor deduction for vhost crypto. + +CVE-2020-14378 +Fixes: 16d2e718b8ce ("vhost/crypto: fix possible out of bound access") +Cc: stable@dpdk.org + +Signed-off-by: Fan Zhang +Acked-by: Chenbo Xia + +reference:https://git.dpdk.org/dpdk-stable/commit/?h=19.11&id=81e969483020 +Signed-off-by: gaoxingwang +--- + lib/librte_vhost/vhost_crypto.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/lib/librte_vhost/vhost_crypto.c b/lib/librte_vhost/vhost_crypto.c +index 0f9df40..86747dd 100644 +--- a/lib/librte_vhost/vhost_crypto.c ++++ b/lib/librte_vhost/vhost_crypto.c +@@ -530,13 +530,14 @@ move_desc(struct vring_desc *head, struct vring_desc **cur_desc, + int left = size - desc->len; + + while ((desc->flags & VRING_DESC_F_NEXT) && left > 0) { +- (*nb_descs)--; + if (unlikely(*nb_descs == 0 || desc->next >= vq_size)) + return -1; + + desc = &head[desc->next]; + rte_prefetch0(&head[desc->next]); + left -= desc->len; ++ if (left > 0) ++ (*nb_descs)--; + } + + if (unlikely(left > 0)) +-- +cgit v1.0 diff --git a/README.en.md b/README.en.md deleted file mode 100644 index f04a225035a12d3d745c57c2367ac83292a7fece..0000000000000000000000000000000000000000 --- a/README.en.md +++ /dev/null @@ -1,36 +0,0 @@ -# dpdk - -#### Description -{**When you're done, you can delete the content in this README and update the file with details for others getting started with your repository**} - -#### Software Architecture -Software architecture description - -#### Installation - -1. xxxx -2. xxxx -3. xxxx - -#### Instructions - -1. xxxx -2. xxxx -3. xxxx - -#### Contribution - -1. Fork the repository -2. Create Feat_xxx branch -3. Commit your code -4. Create Pull Request - - -#### Gitee Feature - -1. You can use Readme\_XXX.md to support different languages, such as Readme\_en.md, Readme\_zh.md -2. Gitee blog [blog.gitee.com](https://blog.gitee.com) -3. Explore open source project [https://gitee.com/explore](https://gitee.com/explore) -4. The most valuable open source project [GVP](https://gitee.com/gvp) -5. The manual of Gitee [https://gitee.com/help](https://gitee.com/help) -6. The most popular members [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) diff --git a/README.md b/README.md deleted file mode 100644 index c36bbd93bf7a6d1ab5ca65fe7f18feeb662aad37..0000000000000000000000000000000000000000 --- a/README.md +++ /dev/null @@ -1,39 +0,0 @@ -# dpdk - -#### 介绍 -{**以下是码云平台说明,您可以替换此简介** -码云是 OSCHINA 推出的基于 Git 的代码托管平台(同时支持 SVN)。专为开发者提供稳定、高效、安全的云端软件开发协作平台 -无论是个人、团队、或是企业,都能够用码云实现代码托管、项目管理、协作开发。企业项目请看 [https://gitee.com/enterprises](https://gitee.com/enterprises)} - -#### 软件架构 -软件架构说明 - - -#### 安装教程 - -1. xxxx -2. xxxx -3. xxxx - -#### 使用说明 - -1. xxxx -2. xxxx -3. xxxx - -#### 参与贡献 - -1. Fork 本仓库 -2. 新建 Feat_xxx 分支 -3. 提交代码 -4. 新建 Pull Request - - -#### 码云特技 - -1. 使用 Readme\_XXX.md 来支持不同的语言,例如 Readme\_en.md, Readme\_zh.md -2. 码云官方博客 [blog.gitee.com](https://blog.gitee.com) -3. 你可以 [https://gitee.com/explore](https://gitee.com/explore) 这个地址来了解码云上的优秀开源项目 -4. [GVP](https://gitee.com/gvp) 全称是码云最有价值开源项目,是码云综合评定出的优秀开源项目 -5. 码云官方提供的使用手册 [https://gitee.com/help](https://gitee.com/help) -6. 码云封面人物是一档用来展示码云会员风采的栏目 [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) diff --git a/backport-0001-net-softnic-fix-memory-leak-as-profile-is-freed.patch b/backport-0001-net-softnic-fix-memory-leak-as-profile-is-freed.patch new file mode 100644 index 0000000000000000000000000000000000000000..4166c88a152f01317206b6d232882d4af3afa5c3 --- /dev/null +++ b/backport-0001-net-softnic-fix-memory-leak-as-profile-is-freed.patch @@ -0,0 +1,37 @@ +From b3bc560bd6bdf3c9851d25bc0a66cb24aa1fd48c Mon Sep 17 00:00:00 2001 +From: Dapeng Yu +Date: Wed, 28 Jul 2021 14:05:39 +0800 +Subject: [PATCH] net/softnic: fix memory leak as profile is freed + +In function softnic_table_action_profile_free(), the memory referenced +by pointer "ap" in the instance of "struct softnic_table_action_profile" +is not freed. + +This patch fixes it. + +Fixes: a737dd4e5863 ("net/softnic: add table action profile") +Cc: stable@dpdk.org + +Signed-off-by: Dapeng Yu +Acked-by: Jasvinder Singh +Conflict: NA +Reference: https://github.com/DPDK/dpdk/commit/b3bc560bd6bdf3c9851d25bc0a66cb24aa1fd48c +--- + drivers/net/softnic/rte_eth_softnic_action.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/softnic/rte_eth_softnic_action.c b/drivers/net/softnic/rte_eth_softnic_action.c +index 92c744dc9a..33be9552a6 100644 +--- a/drivers/net/softnic/rte_eth_softnic_action.c ++++ b/drivers/net/softnic/rte_eth_softnic_action.c +@@ -183,6 +183,7 @@ softnic_table_action_profile_free(struct pmd_internals *p) + break; + + TAILQ_REMOVE(&p->table_action_profile_list, profile, node); ++ rte_table_action_profile_free(profile->ap); + free(profile); + } + } +-- +2.23.0 + diff --git a/backport-0002-net-virtio-fix-interrupt-handle-leak.patch b/backport-0002-net-virtio-fix-interrupt-handle-leak.patch new file mode 100644 index 0000000000000000000000000000000000000000..00859297a1529a97f8428cac9e427da4549cbcfd --- /dev/null +++ b/backport-0002-net-virtio-fix-interrupt-handle-leak.patch @@ -0,0 +1,43 @@ +From 7b9195154926b808e3ae23750eaff3e81cd5f529 Mon Sep 17 00:00:00 2001 +From: Gaoxiang Liu +Date: Mon, 26 Jul 2021 22:42:05 +0800 +Subject: [PATCH] net/virtio: fix interrupt handle leak + +Free memory of interrupt handle in virtio_user_dev_uninit() to +avoid memory leak. +when virtio user dev closes, memory of interrupt handle is not freed +that is allocated in virtio_user_fill_intr_handle(). + +Fixes: 3d4fb6fd2505 ("net/virtio-user: support Rx interrupt") +Cc: stable@dpdk.org + +Signed-off-by: Gaoxiang Liu +Reviewed-by: Chenbo Xia +Reviewed-by: Maxime Coquelin +Conflict: dev->hw.port_id to dev->port_id +Reference: https://github.com/DPDK/dpdk/commit/7b9195154926b808e3ae23750eaff3e81cd5f529 +--- + drivers/net/virtio/virtio_user/virtio_user_dev.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c +index ea016e8..6b91806 100644 +--- a/drivers/net/virtio/virtio_user/virtio_user_dev.c ++++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c +@@ -528,6 +528,13 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev) + { + uint32_t i; + ++ struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->port_id]; ++ ++ if (eth_dev->intr_handle) { ++ free(eth_dev->intr_handle); ++ eth_dev->intr_handle = NULL; ++ } ++ + virtio_user_stop_device(dev); + + rte_mem_event_callback_unregister(VIRTIO_USER_MEM_EVENT_CLB_NAME, dev); +-- +2.23.0 + diff --git a/backport-0003-vhost-fix-crash-on-reconnect.patch b/backport-0003-vhost-fix-crash-on-reconnect.patch new file mode 100644 index 0000000000000000000000000000000000000000..4ba93cdab22c0d3b4cf5addfca3e73bcf0b74162 --- /dev/null +++ b/backport-0003-vhost-fix-crash-on-reconnect.patch @@ -0,0 +1,44 @@ +From 3c929a0bb3e7addc5103227bff126b8b9dd952ef Mon Sep 17 00:00:00 2001 +From: Maxime Coquelin +Date: Mon, 26 Jul 2021 09:58:14 +0200 +Subject: [PATCH] vhost: fix crash on reconnect + +When the vhost-user frontend like Virtio-user tries to +reconnect to the restarted Vhost backend, the Vhost backend +segfaults when multiqueue is enabled. + +This is caused by VHOST_USER_GET_VRING_BASE being called for +a virtqueue that has not been created before, causing a NULL +pointer dereferencing. + +This patch adds the VHOST_USER_GET_VRING_BASE requests to +the list of requests that trigger queue pair allocations. + +Fixes: 160cbc815b41 ("vhost: remove a hack on queue allocation") +Cc: stable@dpdk.org + +Reported-by: Yinan Wang +Signed-off-by: Maxime Coquelin +Tested-by: Yinan Wang +Reviewed-by: Chenbo Xia +Conflict: change vhost dir to librte_vhost +Reference: https://github.com/DPDK/dpdk/commit/3c929a0bb3e7addc5103227bff126b8b9dd952ef +--- + lib/librte_vhost/vhost_user.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c +index 433f412fa8..29a4c9af60 100644 +--- a/lib/librte_vhost/vhost_user.c ++++ b/lib/librte_vhost/vhost_user.c +@@ -2796,6 +2796,7 @@ vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev, + break; + case VHOST_USER_SET_VRING_NUM: + case VHOST_USER_SET_VRING_BASE: ++ case VHOST_USER_GET_VRING_BASE: + case VHOST_USER_SET_VRING_ENABLE: + vring_idx = msg->payload.state.index; + break; +-- +2.23.0 + diff --git a/backport-0004-net-virtio-report-maximum-MTU-in-device-info.patch b/backport-0004-net-virtio-report-maximum-MTU-in-device-info.patch new file mode 100644 index 0000000000000000000000000000000000000000..a43e5aa67e5da8bfe0bfc25610356c3f2a5d3f47 --- /dev/null +++ b/backport-0004-net-virtio-report-maximum-MTU-in-device-info.patch @@ -0,0 +1,37 @@ +From 11d7bc9ff074dc5e37dd9ab51bb365669d08c3d6 Mon Sep 17 00:00:00 2001 +From: Ivan Ilchenko +Date: Wed, 21 Jul 2021 12:22:25 +0300 +Subject: [PATCH] net/virtio: report maximum MTU in device info + +Fix the driver to report maximum MTU obtained from config if +VIRTIO_NET_F_MTU is supported or calculated based on maximum +Rx packet length. + +Fixes: ad97ceece12c ("ethdev: add min/max MTU to device info") +Cc: stable@dpdk.org + +Signed-off-by: Ivan Ilchenko +Signed-off-by: Andrew Rybchenko +Reviewed-by: Maxime Coquelin +Conflict: NA +Reference: https://github.com/DPDK/dpdk/commit/11d7bc9ff074dc5e37dd9ab51bb365669d08c3d6 + +--- + drivers/net/virtio/virtio_ethdev.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c +index 044eb10..89e4c23 100644 +--- a/drivers/net/virtio/virtio_ethdev.c ++++ b/drivers/net/virtio/virtio_ethdev.c +@@ -2436,6 +2436,7 @@ virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) + dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE; + dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN; + dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS; ++ dev_info->max_mtu = hw->max_mtu; + + host_features = VTPCI_OPS(hw)->get_features(hw); + dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP; +-- +2.23.0 + diff --git a/backport-0005-bus-dpaa-fix-freeing-in-FMAN-interface-destructor.patch b/backport-0005-bus-dpaa-fix-freeing-in-FMAN-interface-destructor.patch new file mode 100644 index 0000000000000000000000000000000000000000..91b3b4c7a56377995eb15a80b7e02f18fddd467a --- /dev/null +++ b/backport-0005-bus-dpaa-fix-freeing-in-FMAN-interface-destructor.patch @@ -0,0 +1,33 @@ +From 5ddcf3de6bc08fa7c14fd1ead86012aa575cf665 Mon Sep 17 00:00:00 2001 +From: Hemant Agrawal +Date: Mon, 19 Jul 2021 19:29:11 +0530 +Subject: [PATCH] bus/dpaa: fix freeing in FMAN interface destructor + +if was allocated with rte_malloc, free shall be equivalent. + +Fixes: 4762b3d419c3 ("bus/dpaa: delay fman device list to bus probe") +Cc: stable@dpdk.org + +Signed-off-by: Hemant Agrawal +Conflict: NA +Reference: https://github.com/DPDK/dpdk/commit/5ddcf3de6bc08fa7c14fd1ead86012aa575cf665 +--- + drivers/bus/dpaa/base/fman/fman.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/bus/dpaa/base/fman/fman.c b/drivers/bus/dpaa/base/fman/fman.c +index 692071b4b0..a14004d7fc 100644 +--- a/drivers/bus/dpaa/base/fman/fman.c ++++ b/drivers/bus/dpaa/base/fman/fman.c +@@ -50,7 +50,7 @@ if_destructor(struct __fman_if *__if) + free(bp); + } + cleanup: +- free(__if); ++ rte_free(__if); + } + + static int +-- +2.23.0 + diff --git a/backport-0006-distributor-fix-128-bit-write-alignment.patch b/backport-0006-distributor-fix-128-bit-write-alignment.patch new file mode 100644 index 0000000000000000000000000000000000000000..4d6ec8b6d5616985b172bbb9ff4cb6271dea3f2a --- /dev/null +++ b/backport-0006-distributor-fix-128-bit-write-alignment.patch @@ -0,0 +1,38 @@ +From de8606bf73323dfa8395f2dc0a93dc6194ff21b7 Mon Sep 17 00:00:00 2001 +From: David Hunt +Date: Fri, 16 Jul 2021 14:32:37 +0100 +Subject: [PATCH] distributor: fix 128-bit write alignment + +When the distributor sample app is built as a 32-bit app, +the data buffer passed to find_match_vec can be unaligned, +causing a segmentation fault due to writing a 128-bit value +using _mm_store_si128(). 128-bit align the data being +passed in so this does not happen. + +Fixes: 775003ad2f96 ("distributor: add new burst-capable library") +Cc: stable@dpdk.org + +Signed-off-by: David Hunt +Conflict: mv distributor dir to librte_distributor +Reference: https://github.com/DPDK/dpdk/commit/de8606bf73323dfa8395f2dc0a93dc6194ff21b7 + +--- + lib/librte_distributor/rte_distributor.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/librte_distributor/rte_distributor.c b/lib/librte_distributor/rte_distributor.c +index 6c5b0c8..f6a0107 100644 +--- a/lib/librte_distributor/rte_distributor.c ++++ b/lib/librte_distributor/rte_distributor.c +@@ -373,7 +373,7 @@ rte_distributor_process(struct rte_distributor *d, + } + + while (next_idx < num_mbufs) { +- uint16_t matches[RTE_DIST_BURST_SIZE]; ++ uint16_t matches[RTE_DIST_BURST_SIZE] __rte_aligned(128); + unsigned int pkts; + + /* Sync with worker on GET_BUF flag. */ +-- +2.23.0 + diff --git a/dpdk.spec b/dpdk.spec index a8206246d5afe49f779c322f351084c7eb6bc536..7620503b85748d221b7a427c33ded9132e57e524 100644 --- a/dpdk.spec +++ b/dpdk.spec @@ -1,10 +1,41 @@ Name: dpdk Version: 19.11 -Release: 0 +Release: 15 Packager: packaging@6wind.com URL: http://dpdk.org %global source_version 19.11 -Source: %{name}-%{version}.tar.xz +Source: https://git.dpdk.org/dpdk/snapshot/%{name}-%{version}.tar.xz + +Patch0: CVE-2020-10725.patch +Patch1: CVE-2020-10722.patch +Patch2: CVE-2020-10723.patch +Patch3: CVE-2020-10724.patch +Patch4: CVE-2020-10726.patch +Patch5: CVE-2020-14378.patch +Patch6: CVE-2020-14376-CVE-2020-14377.patch +Patch7: fix-pool-allocation.patch +Patch8: CVE-2020-14374.patch +Patch9: CVE-2020-14375.patch +Patch10: fix-populate-with-small-virtual-chunks.patch +Patch11: 0001-dpdk-add-secure-compile-option-and-fPIC-option.patch +Patch12: 0002-dpdk-add-secure-option-in-makefile.patch +Patch13: 0003-dpdk-bugfix-the-deadlock-in-rte_eal_init.patch +Patch14: 0004-dpdk-master-core-donot-set-affinity-in-libstorage.patch +Patch15: 0005-dpdk-change-the-log-level-in-prepare_numa.patch +Patch16: 0006-dpdk-fix-dpdk-coredump-problem.patch +Patch17: 0007-dpdk-add-secure-compile-option-in-pmdinfogen-Makefil.patch +Patch18: 0008-dpdk-fix-cpu-flag-error-in-Intel-R-Xeon-R-CPU-E5-262.patch +Patch19: 0009-dpdk-add-support-gazelle.patch +Patch20: 0010-dpdk-fix-error-in-clearing-secondary-process-memseg-lists.patch +Patch21: 0011-dpdk-fix-coredump-when-primary-process-attach-without-shared-file.patch +Patch22: 0012-dpdk-fix-fbarray-memseg-destory-error-during-detach.patch +Patch23: 0013-dpdk-optimize-the-efficiency-of-compiling-dpdk.patch +Patch24: backport-0001-net-softnic-fix-memory-leak-as-profile-is-freed.patch +Patch25: backport-0002-net-virtio-fix-interrupt-handle-leak.patch +Patch26: backport-0003-vhost-fix-crash-on-reconnect.patch +Patch27: backport-0004-net-virtio-report-maximum-MTU-in-device-info.patch +Patch28: backport-0005-bus-dpaa-fix-freeing-in-FMAN-interface-destructor.patch +Patch29: backport-0006-distributor-fix-128-bit-write-alignment.patch Summary: Data Plane Development Kit core Group: System Environment/Libraries @@ -21,12 +52,14 @@ ExclusiveArch: i686 x86_64 aarch64 %global config x86_64-%{machine}-linux-gcc %endif -BuildRequires: kernel-devel, kernel, libpcap-devel -BuildRequires: kernel-source +BuildRequires: gcc +BuildRequires: kernel-devel, libpcap-devel BuildRequires: numactl-devel libconfig-devel BuildRequires: module-init-tools uname-build-checks libnl3 libmnl BuildRequires: glibc glibc-devel libibverbs libibverbs-devel libmnl-devel -BuildRequires: texlive + +Requires: python3-pyelftools + %define kern_devel_ver %(uname -r) %description DPDK core includes kernel modules, core libraries and tools. @@ -57,16 +90,22 @@ Requires: dpdk = %{version} This package contains the pdump tool for capture the dpdk network packets. %prep -%setup -q +%autosetup -n %{name}-%{version} -p1 %build namer=%{kern_devel_ver} export RTE_KERNELDIR=/lib/modules/${namer}/build +export EXTRA_CFLAGS="-fstack-protector-strong" make O=%{target} T=%{config} config -#make .so libraries for spdk -sed -ri 's,(CONFIG_RTE_BUILD_SHARED_LIB=).*,\1y,' %{target}/.config +#make .a and .so libraries for spdk +sed -ri 's,(RTE_BUILD_BOTH_STATIC_AND_SHARED_LIBS=).*,\1y,' %{target}/.config sed -ri 's,(CONFIG_RTE_LIB_LIBOS=).*,\1n,' %{target}/.config -make O=%{target} %{?_smp_mflags} +sed -ri 's,(RTE_MACHINE=).*,\1%{machine},' %{target}/.config +sed -ri 's,(RTE_APP_TEST=).*,\1n,' %{target}/.config +sed -ri 's,(RTE_NEXT_ABI=).*,\1n,' %{target}/.config +sed -ri 's,(LIBRTE_VHOST=).*,\1y,' %{target}/.config +#sed -ri 's,(LIBRTE_PMD_PCAP=).*,\1y,' %{target}/.config +make O=%{target} -j16 %install namer=%{kern_devel_ver} @@ -92,22 +131,8 @@ cp -ar ./%{target}/lib/librte_cmdline.so* $RPM_BUILD_ROOT/lib64/ cp -ar ./%{target}/lib/librte_net.so* $RPM_BUILD_ROOT/lib64/ cp -ar ./%{target}/lib/librte_meter.so* $RPM_BUILD_ROOT/lib64/ -sed -ri 's,(RTE_MACHINE=).*,\1%{machine},' %{target}/.config -sed -ri 's,(RTE_APP_TEST=).*,\1n,' %{target}/.config -sed -ri 's,(RTE_BUILD_SHARED_LIB=).*,\1n,' %{target}/.config -sed -ri 's,(RTE_NEXT_ABI=).*,\1n,' %{target}/.config -sed -ri 's,(LIBRTE_VHOST=).*,\1y,' %{target}/.config -#sed -ri 's,(LIBRTE_PMD_PCAP=).*,\1y,' %{target}/.config -sed -ri 's,(CONFIG_RTE_BUILD_SHARED_LIB=).*,\1n,' %{target}/.config -make O=%{target} %{?_smp_mflags} #make O=%{target} doc -make install O=%{target} RTE_KERNELDIR=/lib/modules/${namer}/build \ - kerneldir=/lib/modules/${namer}/extra/dpdk DESTDIR=%{buildroot} \ - prefix=%{_prefix} bindir=%{_bindir} sbindir=%{_sbindir} \ - includedir=%{_includedir}/dpdk libdir=%{_libdir} \ - datadir=%{_datadir}/dpdk docdir=%{_docdir}/dpdk - mkdir -p $RPM_BUILD_ROOT/usr/include/%{name}-%{version}/ ln -s /usr/share/dpdk/mk $RPM_BUILD_ROOT/usr/include/%{name}-%{version}/ ln -s /usr/share/dpdk/%{target} $RPM_BUILD_ROOT/usr/include/%{name}-%{version}/ @@ -127,6 +152,7 @@ strip -g $RPM_BUILD_ROOT/lib/modules/${namer}/extra/dpdk/rte_kni.ko %dir %{_datadir}/dpdk %{_datadir}/dpdk/usertools/*.py %{_datadir}/dpdk/usertools/*.sh +%{_sbindir}/dpdk-devbind /lib/modules/%{kern_devel_ver}/extra/dpdk/* /lib64/librte*.so* @@ -136,13 +162,13 @@ strip -g $RPM_BUILD_ROOT/lib/modules/${namer}/extra/dpdk/rte_kni.ko %{_datadir}/dpdk/buildtools %{_datadir}/dpdk/%{target} %{_datadir}/dpdk/examples -%{_sbindir}/* %{_bindir}/* %{_libdir}/* %dir /usr/include/%{name}-%{version}/ /usr/include/%{name}-%{version}/* %dir /usr/include/dpdk/ /usr/include/dpdk/* +%exclude /usr/bin/dpdk-pdump %files doc #%doc %{_docdir}/dpdk @@ -159,5 +185,57 @@ strip -g $RPM_BUILD_ROOT/lib/modules/${namer}/extra/dpdk/rte_kni.ko /usr/sbin/depmod %changelog +* Mon Nov 15 2021 wuchangsheng - 19.11-15 +- backport pathes from community +- net/softnic fix memory leak as profile is freed +- net/virtio fix interrupt handle leak +- vhost fix crash on reconnect +- net/virtio report maximum MTU in device info +- bus/dpaa fix freeing in FMAN interface destructor +- distributor fix 128 bit write alignment + + +* Sat Nov 6 2021 wuchangsheng - 19.11-14 +- merge patches that add support gazelle into one + +* Tue Jul 13 2021 huangliming - 19.11-13 +- remove redundant README files + +* Mon Jul 12 2021 huangliming - 19.11-12 +- change the patch installation to autosetup + +* Tue Jun 08 2021 huangliming - 19.11-11 +- add gcc BuildRequires + +* Mon May 24 2021 renmingshuai - 19.11-10 +- optimize the efficiency of compiling dpdk + +* Mon May 24 2021 wutao - 19.11-9 +- add fstack-protector-strong gcc flags + +* Mon Apr 5 2021 wu-changsheng<851744572@qq.com> - 19.11-8 +- add support for gazelle + +* Thu Jan 28 2021 huangliming - 19.11-7 +- fix populate with small virtual chunks + +* Thu Jan 28 2021 huangliming - 19.11-6 +-fix yum update dpdk-tools conflict with dpdk-devel + +* Tue Oct 20 2020 zhaowei - 19.11-5 +-fix CVE-2020-14374 CVE-2020-14375 + +* Tue Oct 20 2020 chenxiang - 19.11-4 +-fix CVE-2020-14376 CVE-2020-14377 CVE-2020-14378 + +* Wed Sep 23 2020 hubble_zhu - 19.11-3 +-Add requires for dpdk-pmdinfo + +* Thu Sep 3 2020 zhaowei - 19.11-2 +-update source URL + +* Wed May 27 2020 chenxiang - 19.11-1 +-fix CVE-2020-10722 CVE-2020-10723 CVE-2020-10724 CVE-2020-10725 + * Wed May 27 2020 openEuler dpdk version-release -first package diff --git a/fix-pool-allocation.patch b/fix-pool-allocation.patch new file mode 100644 index 0000000000000000000000000000000000000000..648bbee4a84532aa7e42bddb05064e69e5881bb7 --- /dev/null +++ b/fix-pool-allocation.patch @@ -0,0 +1,45 @@ +From 3f2635c5a9c3df4ba7cc0d6598a2023569ca3d39 Mon Sep 17 00:00:00 2001 +From: Fan Zhang +Date: Tue, 14 Apr 2020 16:19:51 +0100 +Subject: vhost/crypto: fix pool allocation + +This patch fixes the missing iv space allocation in crypto +operation mempool. + +Fixes: 709521f4c2cd ("examples/vhost_crypto: support multi-core") +Cc: stable@dpdk.org + +Signed-off-by: Fan Zhang +Acked-by: Chenbo Xia +--- + examples/vhost_crypto/main.c | 2 +- + lib/librte_vhost/rte_vhost_crypto.h | 1 + + 2 files changed, 2 insertions(+), 1 deletion(-) + +diff --git a/examples/vhost_crypto/main.c b/examples/vhost_crypto/main.c +index 1d7ba94..11b022e 100644 +--- a/examples/vhost_crypto/main.c ++++ b/examples/vhost_crypto/main.c +@@ -544,7 +544,7 @@ main(int argc, char *argv[]) + snprintf(name, 127, "COPPOOL_%u", lo->lcore_id); + info->cop_pool = rte_crypto_op_pool_create(name, + RTE_CRYPTO_OP_TYPE_SYMMETRIC, NB_MEMPOOL_OBJS, +- NB_CACHE_OBJS, 0, ++ NB_CACHE_OBJS, VHOST_CRYPTO_MAX_IV_LEN, + rte_lcore_to_socket_id(lo->lcore_id)); + + if (!info->cop_pool) { +diff --git a/lib/librte_vhost/rte_vhost_crypto.h b/lib/librte_vhost/rte_vhost_crypto.h +index d29871c..866a592 100644 +--- a/lib/librte_vhost/rte_vhost_crypto.h ++++ b/lib/librte_vhost/rte_vhost_crypto.h +@@ -10,6 +10,7 @@ + #define VHOST_CRYPTO_SESSION_MAP_ENTRIES (1024) /**< Max nb sessions */ + /** max nb virtual queues in a burst for finalizing*/ + #define VIRTIO_CRYPTO_MAX_NUM_BURST_VQS (64) ++#define VHOST_CRYPTO_MAX_IV_LEN (32) + + enum rte_vhost_crypto_zero_copy { + RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE = 0, +-- +cgit v1.0 diff --git a/fix-populate-with-small-virtual-chunks.patch b/fix-populate-with-small-virtual-chunks.patch new file mode 100644 index 0000000000000000000000000000000000000000..95933a30cddd21770fa1bb4b3ba47c18ee6a2d50 --- /dev/null +++ b/fix-populate-with-small-virtual-chunks.patch @@ -0,0 +1,112 @@ +From 43503c59adee6cae7069da23e105c24e044bf72c Mon Sep 17 00:00:00 2001 +From: Olivier Matz +Date: Fri, 17 Jan 2020 15:57:52 +0100 +Subject: mempool: fix populate with small virtual chunks + +To populate a mempool with a virtual area, the mempool code calls +rte_mempool_populate_iova() for each iova-contiguous area. It happens +(rarely) that this area is too small to store one object. In this case, +rte_mempool_populate_iova() returns an error, which is forwarded by +rte_mempool_populate_virt(). + +This case should not throw an error in rte_mempool_populate_virt(). +Instead, the area that is too small should just be ignored. + +To fix this issue, change the return value of +rte_mempool_populate_iova() to 0 when no object can be populated, +so it can be ignored by the caller. As this would be an API/ABI change, +only do this modification internally for now. + +Fixes: 354788b60cfd ("mempool: allow populating with unaligned virtual area") +Cc: stable@dpdk.org + +Signed-off-by: Olivier Matz +Tested-by: Anatoly Burakov +Tested-by: Alvin Zhang + +Conflict:NA +Reference:http://git.dpdk.org/dpdk/patch/?id=43503c59adee6cae7069da23e105c24e044bf72c +Signed-off-by:wuchangsheng +--- + lib/librte_mempool/rte_mempool.c | 30 +++++++++++++++++++++++++----- + 1 file changed, 25 insertions(+), 5 deletions(-) + +diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c +index aea5972..08906df 100644 +--- a/lib/librte_mempool/rte_mempool.c ++++ b/lib/librte_mempool/rte_mempool.c +@@ -297,8 +297,8 @@ mempool_ops_alloc_once(struct rte_mempool *mp) + * zone. Return the number of objects added, or a negative value + * on error. + */ +-int +-rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr, ++static int ++__rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr, + rte_iova_t iova, size_t len, rte_mempool_memchunk_free_cb_t *free_cb, + void *opaque) + { +@@ -332,7 +332,7 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr, + off = RTE_PTR_ALIGN_CEIL(vaddr, RTE_MEMPOOL_ALIGN) - vaddr; + + if (off > len) { +- ret = -EINVAL; ++ ret = 0; + goto fail; + } + +@@ -343,7 +343,7 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr, + + /* not enough room to store one object */ + if (i == 0) { +- ret = -EINVAL; ++ ret = 0; + goto fail; + } + +@@ -356,6 +356,21 @@ fail: + return ret; + } + ++int ++rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr, ++ rte_iova_t iova, size_t len, rte_mempool_memchunk_free_cb_t *free_cb, ++ void *opaque) ++{ ++ int ret; ++ ++ ret = __rte_mempool_populate_iova(mp, vaddr, iova, len, free_cb, ++ opaque); ++ if (ret == 0) ++ ret = -EINVAL; ++ ++ return ret; ++} ++ + static rte_iova_t + get_iova(void *addr) + { +@@ -406,8 +421,10 @@ rte_mempool_populate_virt(struct rte_mempool *mp, char *addr, + break; + } + +- ret = rte_mempool_populate_iova(mp, addr + off, iova, ++ ret = __rte_mempool_populate_iova(mp, addr + off, iova, + phys_len, free_cb, opaque); ++ if (ret == 0) ++ continue; + if (ret < 0) + goto fail; + /* no need to call the free callback for next chunks */ +@@ -415,6 +432,9 @@ rte_mempool_populate_virt(struct rte_mempool *mp, char *addr, + cnt += ret; + } + ++ if (cnt == 0) ++ return -EINVAL; ++ + return cnt; + + fail: +-- +cgit v1.0