From b4c80ff85bfe22e054caca5fc0d3f451eff32791 Mon Sep 17 00:00:00 2001 From: huangshangcheng Date: Thu, 4 Sep 2025 08:59:27 +0800 Subject: [PATCH] The modification includes using direct unaligned memory access as the optimal approach on RISC-V, similar to ARMv6. We utilize GCC's Zicclsm macro to detect hardware support for unaligned memory access, which is mandatory per the RVA20U64 specification. Additionally, we ensure that this change does not affect original judgment criteria on servers without Zicclsm extension support. Supported versions: GCC14.1.0 and above. Testing: The verification involves testing performance with and without Zicclsm extension support on RISC-V platforms, comparing compression speed improvements. Results: Configuration Compression Speed (MB/s) Improvement Ratio Without Zicclsm extension 160 - (Baseline) With Zicclsm extension 218 +34.38% All tests show successful performance improvements when supported. Signed-off-by: liuqingtao --- lz4.spec | 7 +++- riscv_zicclsm_lz4_force_mem_access_2.patch | 48 ++++++++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 riscv_zicclsm_lz4_force_mem_access_2.patch diff --git a/lz4.spec b/lz4.spec index f8c7a11..0778583 100644 --- a/lz4.spec +++ b/lz4.spec @@ -1,12 +1,14 @@ Name: lz4 Version: 1.9.4 -Release: 2 +Release: 3 Summary: Extremely fast compression algorithm License: GPLv2+ and BSD URL: https://lz4.github.io/lz4/ Source0: https://github.com/lz4/lz4/archive/v%{version}/%{name}-%{version}.tar.gz +Patch0: riscv_zicclsm_lz4_force_mem_access_2.patch + Provides: %{name}-libs = %{version}-%{release} Obsoletes: %{name} < 1.7.5-3 Obsoletes: %{name}-libs @@ -70,6 +72,9 @@ make check %{_mandir}/man1/unlz4.1* %changelog +* Wed Sep 03 2025 Liu Qingtao - 1.9.4-3 +- Add patch riscv_zicclsm_lz4_force_mem_access_2.patch to configure LZ4_FORCE_MEMORY_ACCESS=2 for RISC-V with Zicclsm extension + * Thu Feb 16 2023 zhangnan - 1.9.4-2 - add make check in spec diff --git a/riscv_zicclsm_lz4_force_mem_access_2.patch b/riscv_zicclsm_lz4_force_mem_access_2.patch new file mode 100644 index 0000000..63f4e1f --- /dev/null +++ b/riscv_zicclsm_lz4_force_mem_access_2.patch @@ -0,0 +1,48 @@ +From ccecb60e09c4da4b6df0d978bd50588246131025 Mon Sep 17 00:00:00 2001 +From: huangshangcheng +Date: Thu, 4 Sep 2025 20:25:19 +0800 +Subject: [PATCH] Add unaligned memory access optimization for RISC-V. + +The modification includes using direct unaligned memory access as the optimal approach on RISC-V, +similar to ARMv6. We utilize GCC's Zicclsm macro to detect hardware support for unaligned memory + access, which is mandatory per the RVA20U64 specification. Additionally, we ensure that this + change does not affect original judgment criteria on servers without Zicclsm extension support. +Supported versions: GCC14.1.0 and above. + +Testing: +The verification involves testing performance with and without Zicclsm extension support on RISC-V platforms, + comparing compression speed improvements. + +Results: +Configuration Compression Speed (MB/s) Improvement Ratio +Without Zicclsm extension 160 - (Baseline) +With Zicclsm extension 218 +34.38% + +All tests show successful performance improvements when supported. +Signed-off-by: liuqingtao +--- + lz4-1.9.4/lib/lz4.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/lib/lz4.c b/lib/lz4.c +index 654bfdf..1de9604 100644 +--- a/lib/lz4.c ++++ b/lib/lz4.c +@@ -72,11 +72,13 @@ + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) ++ * Note: RISC-V systems with the Zicclsm extension support efficient unaligned memory access. + */ + #ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */ + # if defined(__GNUC__) && \ + ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \ +- || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) ++ || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \ ++ || (defined(__riscv) && defined(__riscv_zicclsm)) ) + # define LZ4_FORCE_MEMORY_ACCESS 2 + # elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__) + # define LZ4_FORCE_MEMORY_ACCESS 1 +-- +2.45.2.windows.1 + -- Gitee