From 2a6d053deb6843bf46bffca2b30fe7beac2c4ad4 Mon Sep 17 00:00:00 2001 From: dongji Date: Wed, 10 Sep 2025 14:50:51 +0800 Subject: [PATCH] Optimize CRC calculation for the RISC-V64 architecture by replacing time-consuming vredxor.vv instructions with better performance combination of vslidexx.vi and vxor.vv instructions. Dependencies on the Zvbb extension have been removed. Testing The verification method involves running autogen.sh and configure to generate the Makefile, followed by executing the make checks command. The verification results below were obtained on a RISC-V64 OpenEuler 24.03 virtual machine on QEMU 9.0.1 All tests successful. make --no-print-directory check-TESTS PASS: raid/pq_gen_test PASS: crc/crc16_t10dif_test PASS: crc/crc16_t10dif_copy_test PASS: raid/xor_gen_test PASS: crc/crc64_funcs_test PASS: crc/crc32_funcs_test PASS: igzip/igzip_wrapper_hdr_test PASS: igzip/checksum32_funcs_test PASS: erasure_code/gf_inverse_test PASS: erasure_code/gf_vect_mul_test PASS: raid/xor_check_test PASS: raid/pq_check_test PASS: igzip/igzip_rand_test PASS: mem/mem_zero_detect_test PASS: erasure_code/erasure_code_test PASS: erasure_code/erasure_code_update_test ============================================================================ Testsuite summary for libisal 2.30.0 ============================================================================ # TOTAL: 16 # PASS: 16 # SKIP: 0 # XFAIL: 0 # FAIL: 0 # XPASS: 0 # ERROR: 0 ============================================================================ Signed-off-by: liuqingtao --- ...imize-CRC-calculation-for-the-RISC-V.patch | 2173 +++++++++++++++++ isa-l.spec | 8 +- 2 files changed, 2179 insertions(+), 2 deletions(-) create mode 100644 Feature-Optimize-CRC-calculation-for-the-RISC-V.patch diff --git a/Feature-Optimize-CRC-calculation-for-the-RISC-V.patch b/Feature-Optimize-CRC-calculation-for-the-RISC-V.patch new file mode 100644 index 0000000..7328cc5 --- /dev/null +++ b/Feature-Optimize-CRC-calculation-for-the-RISC-V.patch @@ -0,0 +1,2173 @@ +From 069f340f77240f62cdab8c8c9701e4191be5f1ad Mon Sep 17 00:00:00 2001 +From: dongji +Date: Tue, 9 Sep 2025 20:23:50 +0800 +Subject: [PATCH] Optimize CRC calculation for the RISC-V64 architecture by replacing + time-consuming vredxor.vv instructions with better performance + combination of vslidexx.vi and vxor.vv instructions. Dependencies on + the Zvbb extension have been removed. + + Testing + The verification method involves + running autogen.sh and configure to generate the Makefile, followed by + executing the make checks command. The verification results below were + obtained on a RISC-V64 OpenEuler 24.03 virtual machine on QEMU 9.0.1 + + All tests successful. + make --no-print-directory check-TESTS + PASS: raid/pq_gen_test + PASS: crc/crc16_t10dif_test + PASS: crc/crc16_t10dif_copy_test + PASS: raid/xor_gen_test + PASS: crc/crc64_funcs_test + PASS: crc/crc32_funcs_test + PASS: igzip/igzip_wrapper_hdr_test + PASS: igzip/checksum32_funcs_test + PASS: erasure_code/gf_inverse_test + PASS: erasure_code/gf_vect_mul_test + PASS: raid/xor_check_test + PASS: raid/pq_check_test + PASS: igzip/igzip_rand_test + PASS: mem/mem_zero_detect_test + PASS: erasure_code/erasure_code_test + PASS: erasure_code/erasure_code_update_test + ============================================================================ + Testsuite summary for libisal 2.30.0 + ============================================================================ + # TOTAL: 16 + # PASS: 16 + # SKIP: 0 + # XFAIL: 0 + # FAIL: 0 + # XPASS: 0 + # ERROR: 0 + ============================================================================ + +Signed-off-by: liuqingtao +--- + Makefile.am | 7 +- + configure.ac | 98 +++++++++-- + crc/riscv64/Makefile.am | 1 - + crc/riscv64/crc16_t10dif_copy_vclmul.S | 175 +++++++++----------- + crc/riscv64/crc16_t10dif_vclmul.S | 157 +++++++++--------- + crc/riscv64/crc32_gzip_refl_vclmul.S | 5 +- + crc/riscv64/crc32_gzip_refl_vclmul.h | 12 +- + crc/riscv64/crc32_ieee_norm_vclmul.S | 5 +- + crc/riscv64/crc32_ieee_norm_vclmul.h | 12 +- + crc/riscv64/crc32_iscsi_refl_vclmul.S | 11 +- + crc/riscv64/crc32_iscsi_refl_vclmul.h | 12 +- + crc/riscv64/crc32_norm_common_vclmul.h | 12 +- + crc/riscv64/crc32_refl_common_vclmul.h | 5 +- + crc/riscv64/crc64_ecma_norm_vclmul.S | 5 +- + crc/riscv64/crc64_ecma_norm_vclmul.h | 10 +- + crc/riscv64/crc64_ecma_refl_vclmul.S | 5 +- + crc/riscv64/crc64_ecma_refl_vclmul.h | 12 +- + crc/riscv64/crc64_iso_norm_vclmul.S | 5 +- + crc/riscv64/crc64_iso_norm_vclmul.h | 12 +- + crc/riscv64/crc64_iso_refl_vclmul.S | 5 +- + crc/riscv64/crc64_iso_refl_vclmul.h | 12 +- + crc/riscv64/crc64_jones_norm_vclmul.S | 5 +- + crc/riscv64/crc64_jones_norm_vclmul.h | 12 +- + crc/riscv64/crc64_jones_refl_vclmul.S | 5 +- + crc/riscv64/crc64_jones_refl_vclmul.h | 12 +- + crc/riscv64/crc64_norm_common_vclmul.h | 12 +- + crc/riscv64/crc64_refl_common_vclmul.h | 6 +- + crc/riscv64/crc_common_vclmul.h | 217 +++++++++++++------------ + crc/riscv64/crc_multibinary_riscv.S | 3 +- + crc/riscv64/crc_riscv64_dispatcher.c | 164 ++++++++++--------- + include/riscv64_multibinary.h | 186 ++++++++++----------- + 31 files changed, 637 insertions(+), 563 deletions(-) + +diff --git a/Makefile.am b/Makefile.am +index 7602018..3f2e212 100644 +--- a/Makefile.am ++++ b/Makefile.am +@@ -158,15 +158,10 @@ AM_CFLAGS = ${my_CFLAGS} ${INCLUDE} $(src_include) ${D} + if CPU_AARCH64 + AM_CCASFLAGS = ${AM_CFLAGS} + else +-if CPU_RISCV64 +-AM_CCASFLAGS = ${AM_CFLAGS} +-else + AM_CCASFLAGS = ${yasm_args} ${INCLUDE} ${src_include} ${DEFS} ${D} + endif +-endif +- + if CPU_RISCV64 +-AM_CFLAGS += -march=rv64gcv_zbc_zvbc_zvbb ++AM_CCASFLAGS = ${AM_CFLAGS} + endif + + .asm.s: +diff --git a/configure.ac b/configure.ac +index f69ae10..2208ad6 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -40,16 +40,10 @@ AM_CONDITIONAL([CPU_AARCH64], [test "$CPU" = "aarch64"]) + AM_CONDITIONAL([CPU_PPC64LE], [test "$CPU" = "ppc64le"]) + AM_CONDITIONAL([CPU_RISCV64], [test "$CPU" = "riscv64"]) + AM_CONDITIONAL([CPU_UNDEFINED], [test "x$CPU" = "x"]) +- +-if test "$CPU" = "x86_64"; then +- is_x86=yes +-else +- if test "$CPU" = "x86_32"; then +- is_x86=yes +- else +- is_x86=no +- fi +-fi ++AM_CONDITIONAL([HAVE_RVV], [false]) ++AM_CONDITIONAL([HAVE_ZBC], [false]) ++AM_CONDITIONAL([HAVE_ZVBC], [false]) ++AM_CONDITIONAL([HAVE_HWPROBE_H], [false]) + + # Check for programs + AC_PROG_CC_STDC +@@ -61,6 +55,90 @@ AC_PREFIX_DEFAULT([/usr]) + AC_PROG_SED + AC_PROG_MKDIR_P + ++case "${CPU}" in ++ ++ x86_64) ++ is_x86=yes ++ ;; ++ ++ x86_32) ++ is_x86=yes ++ ;; ++ ++ riscv64) ++ AC_CHECK_HEADER([asm/hwprobe.h], ++ [AC_DEFINE([HAVE_HWPROBE_H], [1], [Define if asm/hwprobe.h exists]) ++ AM_CONDITIONAL([HAVE_HWPROBE_H], [true]) hwprobe_h=yes], ++ [AC_DEFINE([HAVE_HWPROBE_H], [0], [Define if asm/hwprobe.h not exists]) ++ AM_CONDITIONAL([HAVE_HWPROBE_H], [false]) hwprobe_h=no] ++ ) ++ AC_MSG_CHECKING([RVV support]) ++ AC_COMPILE_IFELSE( ++ [AC_LANG_PROGRAM([], [ ++ __asm__ volatile( ++ ".option arch, +v\n" ++ "vsetivli zero, 0, e8, m1, ta, ma\n" ++ ); ++ ])], ++ [AC_DEFINE([HAVE_RVV], [1], [Enable RVV instructions]) ++ AM_CONDITIONAL([HAVE_RVV], [true]) rvv=yes], ++ [AC_DEFINE([HAVE_RVV], [0], [Disable RVV instructions]) ++ AM_CONDITIONAL([HAVE_RVV], [false]) rvv=no] ++ ) ++ AC_MSG_RESULT([$rvv]) ++ if test "x$hwprobe_h" = "xyes"; then ++ AC_MSG_CHECKING([ZBC support]) ++ AC_COMPILE_IFELSE( ++ [AC_LANG_PROGRAM([#include ], [ ++ int a = RISCV_HWPROBE_EXT_ZBC; ++ __asm__ volatile( ++ ".option arch, +zbc\n" ++ "clmul zero, zero, zero\n" ++ "clmulh zero, zero, zero\n" ++ ); ++ ])], ++ [AC_DEFINE([HAVE_ZBC], [1], [Enable ZBC instructions]) ++ AM_CONDITIONAL([HAVE_ZBC], [true]) zbc=yes], ++ [AC_DEFINE([HAVE_ZBC], [0], [Disable ZBC instructions]) ++ AM_CONDITIONAL([HAVE_ZBC], [false]) zbc=no] ++ ) ++ AC_MSG_RESULT([$zbc]) ++ AC_MSG_CHECKING([ZVBC support]) ++ AC_COMPILE_IFELSE( ++ [AC_LANG_PROGRAM([#include ], [ ++ int a = RISCV_HWPROBE_EXT_ZVBC; ++ __asm__ volatile( ++ ".option arch, +v, +zvbc\n" ++ "vsetivli zero, 2, e64, m1, ta, ma\n" ++ "vmv.s.x v0, zero\n" ++ "vclmul.vv v0, v0, v0\n" ++ "vclmulh.vv v0, v0, v0\n" ++ ); ++ ])], ++ [AC_DEFINE([HAVE_ZVBC], [1], [Enable ZVBC instructions]) ++ AM_CONDITIONAL([HAVE_ZVBC], [true]) zvbc=yes], ++ [AC_DEFINE([HAVE_ZVBC], [0], [Disable ZVBC instructions]) ++ AM_CONDITIONAL([HAVE_ZVBC], [false]) zvbc=no] ++ ) ++ AC_MSG_RESULT([$zvbc]) ++ fi ++ if test "x$rvv" = "xyes"; then ++ rvv_arch="rv64gcv" ++ AS_IF([test "x$hwprobe_h" = "xyes"], ++ [AS_IF([test "x$zbc" = "xyes" && test "x$zvbc" = "xyes"], ++ [rvv_arch="rv64gcv_zbc_zvbc"] ++ )] ++ ) ++ CFLAGS+=" -march=$rvv_arch" ++ CCASFLAGS+=" -march=$rvv_arch" ++ fi ++ ;; ++ ++ *) ++ is_x86=no ++ ++esac ++ + # Options + AC_ARG_ENABLE([debug], + AS_HELP_STRING([--enable-debug], [enable debug messages @<:@default=disabled@:>@]), +diff --git a/crc/riscv64/Makefile.am b/crc/riscv64/Makefile.am +index b78dbe1..5c6b134 100644 +--- a/crc/riscv64/Makefile.am ++++ b/crc/riscv64/Makefile.am +@@ -26,7 +26,6 @@ + # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ######################################################################### +- + lsrc_riscv64 += \ + crc/riscv64/crc_multibinary_riscv.S \ + crc/riscv64/crc_riscv64_dispatcher.c +diff --git a/crc/riscv64/crc16_t10dif_copy_vclmul.S b/crc/riscv64/crc16_t10dif_copy_vclmul.S +index 331e9ce..d05eaf3 100644 +--- a/crc/riscv64/crc16_t10dif_copy_vclmul.S ++++ b/crc/riscv64/crc16_t10dif_copy_vclmul.S +@@ -1,5 +1,5 @@ + ######################################################################## +-# Copyright(c) 2025 ZTE Corporation All rights reserved. ++# Copyright (c) 2025 ZTE Corporation. + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions +@@ -26,7 +26,7 @@ + # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ######################################################################### +- ++#if HAVE_RVV && HAVE_ZBC && HAVE_ZVBC + .section .text + .align 2 + .global crc16_t10dif_copy_vclmul +@@ -74,47 +74,29 @@ crc16_t10dif_copy_vclmul: + .crc_fold: + # Initialize vector registers + vsetivli zero, 2, e64, m1, ta, ma +- vle64.v v6, 0(a2) +- addi a2, a2, 16 +- vle64.v v7, 0(a2) +- addi a2, a2, 16 +- vle64.v v8, 0(a2) +- addi a2, a2, 16 +- vle64.v v9, 0(a2) +- addi a2, a2, 16 ++ vl4re64.v v4, 0(a2) ++ addi a2, a2, 64 + addi a3, a3, -64 + +- vse64.v v6, (a1) +- addi a1, a1, 16 +- vse64.v v7, (a1) +- addi a1, a1, 16 +- vse64.v v8, (a1) +- addi a1, a1, 16 +- vse64.v v9, (a1) +- addi a1, a1, 16 ++ vs4r.v v4, (a1) ++ addi a1, a1, 64 + + # Prepare initial vector ++ la t0, .shuffle_data_mask ++ vsetivli zero, 16, e8, m1, ta, ma ++ vle8.v v13, 0(t0) + slli a0, a0, 32 +- vmv.s.x v4, zero +- vrev8.v v6, v6 +- vrev8.v v7, v7 +- vrev8.v v8, v8 +- vrev8.v v9, v9 +- vslidedown.vi v0, v6, 1 +- vslidedown.vi v1, v7, 1 +- vslidedown.vi v2, v8, 1 +- vslidedown.vi v3, v9, 1 +- vslideup.vi v0, v6, 1 +- vslideup.vi v1, v7, 1 +- vslideup.vi v2, v8, 1 +- vslideup.vi v3, v9, 1 ++ vrgather.vv v0, v4, v13 ++ vrgather.vv v1, v5, v13 ++ vrgather.vv v2, v6, v13 ++ vrgather.vv v3, v7, v13 ++ vsetivli zero, 2, e64, m1, ta, ma + + vmv.v.x v5, a0 ++ vmv.s.x v4, zero + vslideup.vi v4, v5, 1 + + vxor.vv v0, v0, v4 +- +- vmv.v.x v8, zero + bltu a3, t1, final_fold + + # Load constants +@@ -125,64 +107,55 @@ crc16_t10dif_copy_vclmul: + + # Main processing loop + loop_start: +- vle64.v v9, (a2) +- addi a2, a2, 16 +- vle64.v v10, (a2) +- addi a2, a2, 16 +- vle64.v v11, (a2) +- addi a2, a2, 16 +- vle64.v v12, (a2) +- addi a2, a2, 16 +- vse64.v v9, (a1) +- addi a1, a1, 16 +- vse64.v v10, (a1) +- addi a1, a1, 16 +- vse64.v v11, (a1) +- addi a1, a1, 16 +- vse64.v v12, (a1) +- addi a1, a1, 16 ++ vl4re64.v v8, (a2) ++ addi a2, a2, 64 ++ vs4r.v v8, (a1) ++ addi a1, a1, 64 + + vclmul.vv v4, v0, v5 + vclmulh.vv v0, v0, v5 +- vredxor.vs v0, v0, v8 +- vredxor.vs v4, v4, v8 +- vslideup.vi v4, v0, 1 +- vrev8.v v9, v9 +- vslidedown.vi v6, v9, 1 +- vslideup.vi v6, v9, 1 +- vxor.vv v0, v4, v6 ++ vslidedown.vi v15, v4, 1 ++ vslidedown.vi v14, v0, 1 ++ vxor.vv v15, v15, v4 ++ vxor.vv v14, v14, v0 ++ vslideup.vi v15, v14, 1 + + # Process v1-v3 (similar to v0) + + vclmul.vv v4, v1, v5 + vclmulh.vv v1, v1, v5 +- vredxor.vs v1, v1, v8 +- vredxor.vs v4, v4, v8 +- vslideup.vi v4, v1, 1 +- vrev8.v v10, v10 +- vslidedown.vi v6, v10, 1 +- vslideup.vi v6, v10, 1 +- vxor.vv v1, v4, v6 ++ vslidedown.vi v16, v4, 1 ++ vslidedown.vi v14, v1, 1 ++ vxor.vv v16, v16, v4 ++ vxor.vv v14, v14, v1 ++ vslideup.vi v16, v14, 1 + + vclmul.vv v4, v2, v5 + vclmulh.vv v2, v2, v5 +- vredxor.vs v2, v2, v8 +- vredxor.vs v4, v4, v8 +- vslideup.vi v4, v2, 1 +- vrev8.v v11, v11 +- vslidedown.vi v6, v11, 1 +- vslideup.vi v6, v11, 1 +- vxor.vv v2, v4, v6 ++ vslidedown.vi v17, v4, 1 ++ vslidedown.vi v14, v2, 1 ++ vxor.vv v17, v17, v4 ++ vxor.vv v14, v14, v2 ++ vslideup.vi v17, v14, 1 + + vclmul.vv v4, v3, v5 + vclmulh.vv v3, v3, v5 +- vredxor.vs v3, v3, v8 +- vredxor.vs v4, v4, v8 +- vslideup.vi v4, v3, 1 +- vrev8.v v12, v12 +- vslidedown.vi v6, v12, 1 +- vslideup.vi v6, v12, 1 +- vxor.vv v3, v4, v6 ++ vslidedown.vi v18, v4, 1 ++ vslidedown.vi v14, v3, 1 ++ vxor.vv v18, v18, v4 ++ vxor.vv v14, v14, v3 ++ vslideup.vi v18, v14, 1 ++ ++ vsetivli zero, 16, e8, m1, ta, ma ++ vrgather.vv v0, v8, v13 ++ vrgather.vv v1, v9, v13 ++ vrgather.vv v2, v10, v13 ++ vrgather.vv v3, v11, v13 ++ vsetivli zero, 2, e64, m1, ta, ma ++ vxor.vv v0, v0, v15 ++ vxor.vv v1, v1, v16 ++ vxor.vv v2, v2, v17 ++ vxor.vv v3, v3, v18 + + addi a3, a3, -64 + bge a3, t0, loop_start +@@ -192,22 +165,30 @@ final_fold: + vle64.v v5, 0(t0) + vclmul.vv v6, v0, v5 + vclmulh.vv v7, v0, v5 +- vredxor.vs v6, v6, v8 +- vredxor.vs v7, v7, v8 +- vslideup.vi v6, v7, 1 +- vxor.vv v0, v6, v1 ++ vslidedown.vi v8, v6, 1 ++ vslidedown.vi v9, v7, 1 ++ vxor.vv v8, v8, v6 ++ vxor.vv v9, v9, v7 ++ vslideup.vi v8, v9, 1 ++ vxor.vv v0, v8, v1 ++ + vclmul.vv v6, v0, v5 + vclmulh.vv v7, v0, v5 +- vredxor.vs v6, v6, v8 +- vredxor.vs v7, v7, v8 +- vslideup.vi v6, v7, 1 +- vxor.vv v0, v6, v2 ++ vslidedown.vi v8, v6, 1 ++ vslidedown.vi v9, v7, 1 ++ vxor.vv v8, v8, v6 ++ vxor.vv v9, v9, v7 ++ vslideup.vi v8, v9, 1 ++ vxor.vv v0, v8, v2 ++ + vclmul.vv v6, v0, v5 + vclmulh.vv v7, v0, v5 +- vredxor.vs v6, v6, v8 +- vredxor.vs v7, v7, v8 +- vslideup.vi v6, v7, 1 +- vxor.vv v0, v6, v3 ++ vslidedown.vi v8, v6, 1 ++ vslidedown.vi v9, v7, 1 ++ vxor.vv v8, v8, v6 ++ vxor.vv v9, v9, v7 ++ vslideup.vi v8, v9, 1 ++ vxor.vv v0, v8, v3 + + # Store result + addi sp, sp, -16 +@@ -229,10 +210,10 @@ final_fold: + + # Barrett reduction + srli a5, a4, 32 +- li t2, 0x1f65a57f9 # x_quo ++ li t2, 0x1f65a57f9 + clmul a5, t2, a5 + srli a5, a5, 32 +- li t3, 0x18bb70000 # x_poly ++ li t3, 0x18bb70000 + clmul a5, a5, t3 + xor a0, a5, a4 + addi sp, sp, 16 +@@ -241,7 +222,6 @@ tail_processing: + # Process remaining bytes + beqz a3, .end + +- # Call crc16_t10dif_generic equivalent for remaining bytes + jal x0, .crc_table_loop_pre + + .section .rodata +@@ -259,8 +239,8 @@ k_const2: + .quad 0x4c1a0000 + + .LANCHOR0 = . + 0 +- .type crc16tab, %object +- .size crc16tab, 1024 ++ .type crc16tab, %object ++ .size crc16tab, 1024 + crc16tab: + .word 0x00000000, 0x8bb70000, 0x9cd90000, 0x176e0000, 0xb2050000, 0x39b20000, 0x2edc0000, 0xa56b0000 + .word 0xefbd0000, 0x640a0000, 0x73640000, 0xf8d30000, 0x5db80000, 0xd60f0000, 0xc1610000, 0x4ad60000 +@@ -295,4 +275,11 @@ crc16tab: + .word 0x1f650000, 0x94d20000, 0x83bc0000, 0x080b0000, 0xad600000, 0x26d70000, 0x31b90000, 0xba0e0000 + .word 0xf0d80000, 0x7b6f0000, 0x6c010000, 0xe7b60000, 0x42dd0000, 0xc96a0000, 0xde040000, 0x55b30000 + ++.shuffle_data_mask = . + 0 ++ .type shuffle_data, %object ++ .size shuffle_data, 16 ++shuffle_data: ++ .byte 15, 14, 13, 12, 11, 10, 9, 8 ++ .byte 7, 6, 5, 4, 3, 2, 1, 0 + ++#endif +\ No newline at end of file +diff --git a/crc/riscv64/crc16_t10dif_vclmul.S b/crc/riscv64/crc16_t10dif_vclmul.S +index 42a9f2f..c8eaef8 100644 +--- a/crc/riscv64/crc16_t10dif_vclmul.S ++++ b/crc/riscv64/crc16_t10dif_vclmul.S +@@ -1,5 +1,5 @@ + ######################################################################## +-# Copyright(c) 2025 ZTE Corporation All rights reserved. ++# Copyright (c) 2025 ZTE Corporation. + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions +@@ -26,7 +26,7 @@ + # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ######################################################################### +- ++#if HAVE_RVV && HAVE_ZBC && HAVE_ZVBC + .section .text + .align 2 + .global crc16_t10dif_vclmul +@@ -40,7 +40,7 @@ + + crc16_t10dif_vclmul: + # Initialize state +- slli a0, a0, 16 # state = crc << 16 ++ slli a0, a0, 16 + + # Check if length >= 64 + li t1, 64 +@@ -72,38 +72,27 @@ crc16_t10dif_vclmul: + .crc_fold: + # Initialize vector registers + vsetivli zero, 2, e64, m1, ta, ma +- vle64.v v6, 0(a1) +- addi a1, a1, 16 +- vle64.v v7, 0(a1) +- addi a1, a1, 16 +- vle64.v v8, 0(a1) +- addi a1, a1, 16 +- vle64.v v9, 0(a1) +- addi a1, a1, 16 ++ ++ vl4re64.v v4, (a1) ++ addi a1, a1, 64 + addi a2, a2, -64 + + # Prepare initial vector ++ la t0, .shuffle_data_mask ++ vsetivli zero, 16, e8, m1, ta, ma ++ vle8.v v13, 0(t0) + slli a0, a0, 32 +- vmv.s.x v4, zero +- vrev8.v v6, v6 +- vrev8.v v7, v7 +- vrev8.v v8, v8 +- vrev8.v v9, v9 +- vslidedown.vi v0, v6, 1 +- vslidedown.vi v1, v7, 1 +- vslidedown.vi v2, v8, 1 +- vslidedown.vi v3, v9, 1 +- vslideup.vi v0, v6, 1 +- vslideup.vi v1, v7, 1 +- vslideup.vi v2, v8, 1 +- vslideup.vi v3, v9, 1 ++ vrgather.vv v0, v4, v13 ++ vrgather.vv v1, v5, v13 ++ vrgather.vv v2, v6, v13 ++ vrgather.vv v3, v7, v13 ++ vsetivli zero, 2, e64, m1, ta, ma + + vmv.v.x v5, a0 ++ vmv.s.x v4, zero + vslideup.vi v4, v5, 1 + + vxor.vv v0, v0, v4 +- +- vmv.v.x v8, zero + bltu a2, t1, final_fold + + # Load constants +@@ -112,56 +101,54 @@ crc16_t10dif_vclmul: + + # Main processing loop + loop_start: +- vle64.v v9, (a1) +- addi a1, a1, 16 +- vle64.v v10, (a1) +- addi a1, a1, 16 +- vle64.v v11, (a1) +- addi a1, a1, 16 +- vle64.v v12, (a1) +- addi a1, a1, 16 ++ ++ vl4re64.v v8, (a1) ++ addi a1, a1, 64 + + vclmul.vv v4, v0, v5 + vclmulh.vv v0, v0, v5 +- vredxor.vs v0, v0, v8 +- vredxor.vs v4, v4, v8 +- vslideup.vi v4, v0, 1 +- vrev8.v v9, v9 +- vslidedown.vi v6, v9, 1 +- vslideup.vi v6, v9, 1 +- vxor.vv v0, v4, v6 ++ vslidedown.vi v15, v4, 1 ++ vslidedown.vi v14, v0, 1 ++ vxor.vv v15, v15, v4 ++ vxor.vv v14, v14, v0 ++ vslideup.vi v15, v14, 1 + + # Process v1-v3 (similar to v0) + + vclmul.vv v4, v1, v5 + vclmulh.vv v1, v1, v5 +- vredxor.vs v1, v1, v8 +- vredxor.vs v4, v4, v8 +- vslideup.vi v4, v1, 1 +- vrev8.v v10, v10 +- vslidedown.vi v6, v10, 1 +- vslideup.vi v6, v10, 1 +- vxor.vv v1, v4, v6 ++ vslidedown.vi v16, v4, 1 ++ vslidedown.vi v14, v1, 1 ++ vxor.vv v16, v16, v4 ++ vxor.vv v14, v14, v1 ++ vslideup.vi v16, v14, 1 + + vclmul.vv v4, v2, v5 + vclmulh.vv v2, v2, v5 +- vredxor.vs v2, v2, v8 +- vredxor.vs v4, v4, v8 +- vslideup.vi v4, v2, 1 +- vrev8.v v11, v11 +- vslidedown.vi v6, v11, 1 +- vslideup.vi v6, v11, 1 +- vxor.vv v2, v4, v6 ++ vslidedown.vi v17, v4, 1 ++ vslidedown.vi v14, v2, 1 ++ vxor.vv v17, v17, v4 ++ vxor.vv v14, v14, v2 ++ vslideup.vi v17, v14, 1 + + vclmul.vv v4, v3, v5 + vclmulh.vv v3, v3, v5 +- vredxor.vs v3, v3, v8 +- vredxor.vs v4, v4, v8 +- vslideup.vi v4, v3, 1 +- vrev8.v v12, v12 +- vslidedown.vi v6, v12, 1 +- vslideup.vi v6, v12, 1 +- vxor.vv v3, v4, v6 ++ vslidedown.vi v18, v4, 1 ++ vslidedown.vi v14, v3, 1 ++ vxor.vv v18, v18, v4 ++ vxor.vv v14, v14, v3 ++ vslideup.vi v18, v14, 1 ++ ++ vsetivli zero, 16, e8, m1, ta, ma ++ vrgather.vv v0, v8, v13 ++ vrgather.vv v1, v9, v13 ++ vrgather.vv v2, v10, v13 ++ vrgather.vv v3, v11, v13 ++ vsetivli zero, 2, e64, m1, ta, ma ++ vxor.vv v0, v0, v15 ++ vxor.vv v1, v1, v16 ++ vxor.vv v2, v2, v17 ++ vxor.vv v3, v3, v18 + + addi a2, a2, -64 + bge a2, t1, loop_start +@@ -171,24 +158,30 @@ final_fold: + vle64.v v5, 0(t0) + vclmul.vv v6, v0, v5 + vclmulh.vv v7, v0, v5 +- vredxor.vs v6, v6, v8 +- vredxor.vs v7, v7, v8 +- vslideup.vi v6, v7, 1 +- vxor.vv v0, v6, v1 ++ vslidedown.vi v8, v6, 1 ++ vslidedown.vi v9, v7, 1 ++ vxor.vv v8, v8, v6 ++ vxor.vv v9, v9, v7 ++ vslideup.vi v8, v9, 1 ++ vxor.vv v0, v8, v1 + + vclmul.vv v6, v0, v5 + vclmulh.vv v7, v0, v5 +- vredxor.vs v6, v6, v8 +- vredxor.vs v7, v7, v8 +- vslideup.vi v6, v7, 1 +- vxor.vv v0, v6, v2 ++ vslidedown.vi v8, v6, 1 ++ vslidedown.vi v9, v7, 1 ++ vxor.vv v8, v8, v6 ++ vxor.vv v9, v9, v7 ++ vslideup.vi v8, v9, 1 ++ vxor.vv v0, v8, v2 + + vclmul.vv v6, v0, v5 + vclmulh.vv v7, v0, v5 +- vredxor.vs v6, v6, v8 +- vredxor.vs v7, v7, v8 +- vslideup.vi v6, v7, 1 +- vxor.vv v0, v6, v3 ++ vslidedown.vi v8, v6, 1 ++ vslidedown.vi v9, v7, 1 ++ vxor.vv v8, v8, v6 ++ vxor.vv v9, v9, v7 ++ vslideup.vi v8, v9, 1 ++ vxor.vv v0, v8, v3 + + # Store result + addi sp, sp, -16 +@@ -211,10 +204,10 @@ final_fold: + + # Barrett reduction + srli a5, a4, 32 +- li t2, 0x1f65a57f9 # x_quo ++ li t2, 0x1f65a57f9 + clmul a5, t2, a5 + srli a5, a5, 32 +- li t4, 0x18bb70000 # x_poly ++ li t4, 0x18bb70000 + clmul a5, a5, t4 + xor a0, a5, a4 + +@@ -222,7 +215,6 @@ tail_processing: + # Process remaining bytes + beqz a2, .end + +- # Call crc16_t10dif_generic equivalent for remaining bytes + jal x0, .crc_table_loop_pre + + .section .rodata +@@ -240,8 +232,8 @@ k_const2: + .quad 0x4c1a0000 + + .LANCHOR0 = . + 0 +- .type crc16tab, %object +- .size crc16tab, 1024 ++ .type crc16tab, %object ++ .size crc16tab, 1024 + crc16tab: + .word 0x00000000, 0x8bb70000, 0x9cd90000, 0x176e0000, 0xb2050000, 0x39b20000, 0x2edc0000, 0xa56b0000 + .word 0xefbd0000, 0x640a0000, 0x73640000, 0xf8d30000, 0x5db80000, 0xd60f0000, 0xc1610000, 0x4ad60000 +@@ -276,4 +268,11 @@ crc16tab: + .word 0x1f650000, 0x94d20000, 0x83bc0000, 0x080b0000, 0xad600000, 0x26d70000, 0x31b90000, 0xba0e0000 + .word 0xf0d80000, 0x7b6f0000, 0x6c010000, 0xe7b60000, 0x42dd0000, 0xc96a0000, 0xde040000, 0x55b30000 + ++.shuffle_data_mask = . + 0 ++ .type shuffle_data, %object ++ .size shuffle_data, 16 ++shuffle_data: ++ .byte 15, 14, 13, 12, 11, 10, 9, 8 ++ .byte 7, 6, 5, 4, 3, 2, 1, 0 + ++#endif +\ No newline at end of file +diff --git a/crc/riscv64/crc32_gzip_refl_vclmul.S b/crc/riscv64/crc32_gzip_refl_vclmul.S +index 4a32b7c..c327816 100644 +--- a/crc/riscv64/crc32_gzip_refl_vclmul.S ++++ b/crc/riscv64/crc32_gzip_refl_vclmul.S +@@ -26,8 +26,9 @@ + # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ######################################################################### +- ++#if HAVE_RVV && HAVE_ZBC && HAVE_ZVBC + #include "crc32_gzip_refl_vclmul.h" + #include "crc32_refl_common_vclmul.h" + +-crc32_refl_func crc32_gzip_refl_vclmul +\ No newline at end of file ++crc32_refl_func crc32_gzip_refl_vclmul ++#endif +\ No newline at end of file +diff --git a/crc/riscv64/crc32_gzip_refl_vclmul.h b/crc/riscv64/crc32_gzip_refl_vclmul.h +index 3fee53e..d91dab6 100644 +--- a/crc/riscv64/crc32_gzip_refl_vclmul.h ++++ b/crc/riscv64/crc32_gzip_refl_vclmul.h +@@ -35,7 +35,7 @@ + .section .rodata + .text + .align 3 +- .set .crc_loop_const,. + 0 ++ .set .crc_loop_const,. + 0 + .type const_2, %object + .size const_2, 16 + const_2: +@@ -45,10 +45,10 @@ const_2: + .quad 0xccaa009e + + .text +- .align 4 +- .set .lanchor_crc_tab,. + 0 +- .type crc32_table_gzip_refl, %object +- .size crc32_table_gzip_refl, 1024 ++ .align 4 ++ .set .lanchor_crc_tab,. + 0 ++ .type crc32_table_gzip_refl, %object ++ .size crc32_table_gzip_refl, 1024 + crc32_table_gzip_refl: + .word 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3 + .word 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91 +@@ -81,4 +81,4 @@ crc32_table_gzip_refl: + .word 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db + .word 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9 + .word 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf +- .word 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d ++ .word 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d +\ No newline at end of file +diff --git a/crc/riscv64/crc32_ieee_norm_vclmul.S b/crc/riscv64/crc32_ieee_norm_vclmul.S +index a909c7b..6005f04 100644 +--- a/crc/riscv64/crc32_ieee_norm_vclmul.S ++++ b/crc/riscv64/crc32_ieee_norm_vclmul.S +@@ -26,8 +26,9 @@ + # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ######################################################################### +- ++#if HAVE_RVV && HAVE_ZBC && HAVE_ZVBC + #include "crc32_ieee_norm_vclmul.h" + #include "crc32_norm_common_vclmul.h" + +-crc32_norm_func crc32_ieee_norm_vclmul +\ No newline at end of file ++crc32_norm_func crc32_ieee_norm_vclmul ++#endif +\ No newline at end of file +diff --git a/crc/riscv64/crc32_ieee_norm_vclmul.h b/crc/riscv64/crc32_ieee_norm_vclmul.h +index 93443bf..71a6058 100644 +--- a/crc/riscv64/crc32_ieee_norm_vclmul.h ++++ b/crc/riscv64/crc32_ieee_norm_vclmul.h +@@ -35,7 +35,7 @@ + .section .rodata + .text + .align 4 +- .set .crc_loop_const,. + 0 ++ .set .crc_loop_const,. + 0 + .type const_2, %object + .size const_2, 32 + const_2: +@@ -45,10 +45,10 @@ const_2: + .quad 0xc5b9cd4c + + .text +- .align 4 +- .set .lanchor_crc_tab,. + 0 +- .type crc32_table_ieee_norm, %object +- .size crc32_table_ieee_norm, 1024 ++ .align 4 ++ .set .lanchor_crc_tab,. + 0 ++ .type crc32_table_ieee_norm, %object ++ .size crc32_table_ieee_norm, 1024 + crc32_table_ieee_norm: + .word 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005 + .word 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd +@@ -81,4 +81,4 @@ crc32_table_ieee_norm: + .word 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c + .word 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18, 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4 + .word 0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c +- .word 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 ++ .word 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 +\ No newline at end of file +diff --git a/crc/riscv64/crc32_iscsi_refl_vclmul.S b/crc/riscv64/crc32_iscsi_refl_vclmul.S +index 3b5b355..1534238 100644 +--- a/crc/riscv64/crc32_iscsi_refl_vclmul.S ++++ b/crc/riscv64/crc32_iscsi_refl_vclmul.S +@@ -26,16 +26,16 @@ + # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ######################################################################### +- ++#if HAVE_RVV && HAVE_ZBC && HAVE_ZVBC + #include "crc32_iscsi_refl_vclmul.h" + #include "crc32_refl_common_vclmul.h" + + crc32_refl_func crc32_iscsi_refl_vclmul_internal + + .text +- .align 3 +- .global crc32_iscsi_refl_vclmul +- .type crc32_iscsi_refl_vclmul, %function ++ .align 3 ++ .global crc32_iscsi_refl_vclmul ++ .type crc32_iscsi_refl_vclmul, %function + crc32_iscsi_refl_vclmul: + mv a7, a2 + sext.w a2, a1 +@@ -53,4 +53,5 @@ crc32_iscsi_refl_vclmul: + addi sp, sp, 8 + xori a0, a0, -1 + and a0, a0, t5 +- ret +\ No newline at end of file ++ ret ++#endif +\ No newline at end of file +diff --git a/crc/riscv64/crc32_iscsi_refl_vclmul.h b/crc/riscv64/crc32_iscsi_refl_vclmul.h +index df87fdb..54247e9 100644 +--- a/crc/riscv64/crc32_iscsi_refl_vclmul.h ++++ b/crc/riscv64/crc32_iscsi_refl_vclmul.h +@@ -35,7 +35,7 @@ + .section .rodata + .text + .align 3 +- .set .crc_loop_const,. + 0 ++ .set .crc_loop_const,. + 0 + .type const_2, %object + .size const_2, 16 + const_2: +@@ -45,10 +45,10 @@ const_2: + .quad 0x493c7d27 + + .text +- .align 4 +- .set .lanchor_crc_tab,. + 0 +- .type crc32_table_iscsi_refl, %object +- .size crc32_table_iscsi_refl, 1024 ++ .align 4 ++ .set .lanchor_crc_tab,. + 0 ++ .type crc32_table_iscsi_refl, %object ++ .size crc32_table_iscsi_refl, 1024 + crc32_table_iscsi_refl: + .word 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB + .word 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24 +@@ -81,4 +81,4 @@ crc32_table_iscsi_refl: + .word 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE, 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1 + .word 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E + .word 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E +- .word 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351 ++ .word 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351 +\ No newline at end of file +diff --git a/crc/riscv64/crc32_norm_common_vclmul.h b/crc/riscv64/crc32_norm_common_vclmul.h +index fe021a7..07ed3d3 100644 +--- a/crc/riscv64/crc32_norm_common_vclmul.h ++++ b/crc/riscv64/crc32_norm_common_vclmul.h +@@ -31,7 +31,7 @@ + + .macro crc32_norm_func name + .text +- .align 3 ++ .align 3 + .type \name, @function + .global \name + \name: +@@ -67,11 +67,8 @@ + + .align 2 + .crc_clmul_pre: +- vsetivli zero, 2, e64, m1, ta, ma + slli seed, seed, 32 +- + crc_norm_load_first_block +- vmv.s.x vec_zero, zero + crc_load_p4 + addi tmp_0, len, -64 + bltu tmp_0, tmp_1, .clmul_loop_end +@@ -107,5 +104,10 @@ + xor seed, tmp_1, tmp_4 + + j .crc_tab_pre +- .size \name, .-\name ++ .size \name, .-\name ++ .section .rodata.cst16,"aM",@progbits,16 ++ .align 4 ++.shuffle_data: ++ .byte 15, 14, 13, 12, 11, 10, 9, 8 ++ .byte 7, 6, 5, 4, 3, 2, 1, 0 + .endm +\ No newline at end of file +diff --git a/crc/riscv64/crc32_refl_common_vclmul.h b/crc/riscv64/crc32_refl_common_vclmul.h +index 2e2461d..fd64a16 100644 +--- a/crc/riscv64/crc32_refl_common_vclmul.h ++++ b/crc/riscv64/crc32_refl_common_vclmul.h +@@ -31,7 +31,7 @@ + + .macro crc32_refl_func name + .text +- .align 3 ++ .align 3 + .type \name, @function + .global \name + \name: +@@ -71,7 +71,6 @@ + .crc_clmul_pre: + vsetivli zero, 2, e64, m1, ta, ma + crc_refl_load_first_block +- vmv.s.x vec_zero, zero + crc_load_p4 + addi tmp_0, len, -64 + bltu tmp_0, tmp_1, .clmul_loop_end +@@ -108,5 +107,5 @@ + srai seed, tmp_4, 0x20 + + j .crc_tab_pre +- .size \name, .-\name ++ .size \name, .-\name + .endm +\ No newline at end of file +diff --git a/crc/riscv64/crc64_ecma_norm_vclmul.S b/crc/riscv64/crc64_ecma_norm_vclmul.S +index b0ec7ce..a7c13d0 100644 +--- a/crc/riscv64/crc64_ecma_norm_vclmul.S ++++ b/crc/riscv64/crc64_ecma_norm_vclmul.S +@@ -26,8 +26,9 @@ + # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ######################################################################### +- ++#if HAVE_RVV && HAVE_ZBC && HAVE_ZVBC + #include "crc64_ecma_norm_vclmul.h" + #include "crc64_norm_common_vclmul.h" + +-crc64_norm_func crc64_ecma_norm_vclmul +\ No newline at end of file ++crc64_norm_func crc64_ecma_norm_vclmul ++#endif +\ No newline at end of file +diff --git a/crc/riscv64/crc64_ecma_norm_vclmul.h b/crc/riscv64/crc64_ecma_norm_vclmul.h +index 7e4fe07..579b1af 100644 +--- a/crc/riscv64/crc64_ecma_norm_vclmul.h ++++ b/crc/riscv64/crc64_ecma_norm_vclmul.h +@@ -34,7 +34,7 @@ + .section .rodata + .text + .align 4 +- .set .crc_loop_const,. + 0 ++ .set .crc_loop_const,. + 0 + .type const_2, %object + .size const_2, 64 + const_2: +@@ -44,10 +44,10 @@ const_2: + .quad 0x4eb938a7d257740e + + .text +- .align 4 +- .set .lanchor_crc_tab,. + 0 +- .type crc64_table_ecma_norm, %object +- .size crc64_table_ecma_norm, 2048 ++ .align 4 ++ .set .lanchor_crc_tab,. + 0 ++ .type crc64_table_ecma_norm, %object ++ .size crc64_table_ecma_norm, 2048 + crc64_table_ecma_norm: + .dword 0x0000000000000000, 0x42f0e1eba9ea3693 + .dword 0x85e1c3d753d46d26, 0xc711223cfa3e5bb5 +diff --git a/crc/riscv64/crc64_ecma_refl_vclmul.S b/crc/riscv64/crc64_ecma_refl_vclmul.S +index 3dc92a5..7d352cc 100644 +--- a/crc/riscv64/crc64_ecma_refl_vclmul.S ++++ b/crc/riscv64/crc64_ecma_refl_vclmul.S +@@ -26,8 +26,9 @@ + # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ######################################################################### +- ++#if HAVE_RVV && HAVE_ZBC && HAVE_ZVBC + #include "crc64_ecma_refl_vclmul.h" + #include "crc64_refl_common_vclmul.h" + +-crc64_refl_func crc64_ecma_refl_vclmul +\ No newline at end of file ++crc64_refl_func crc64_ecma_refl_vclmul ++#endif +\ No newline at end of file +diff --git a/crc/riscv64/crc64_ecma_refl_vclmul.h b/crc/riscv64/crc64_ecma_refl_vclmul.h +index a3d7632..25a0a2a 100644 +--- a/crc/riscv64/crc64_ecma_refl_vclmul.h ++++ b/crc/riscv64/crc64_ecma_refl_vclmul.h +@@ -34,7 +34,7 @@ + .section .rodata + .text + .align 3 +- .set .crc_loop_const,. + 0 ++ .set .crc_loop_const,. + 0 + .type const_2, %object + .size const_2, 64 + const_2: +@@ -44,10 +44,10 @@ const_2: + .quad 0xdabe95afc7875f40 + + .text +- .align 4 +- .set .lanchor_crc_tab,. + 0 +- .type crc64_table_ecma_refl, %object +- .size crc64_table_ecma_refl, 2048 ++ .align 4 ++ .set .lanchor_crc_tab,. + 0 ++ .type crc64_table_ecma_refl, %object ++ .size crc64_table_ecma_refl, 2048 + crc64_table_ecma_refl: + .dword 0x0000000000000000, 0xb32e4cbe03a75f6f + .dword 0xf4843657a840a05b, 0x47aa7ae9abe7ff34 +@@ -176,4 +176,4 @@ crc64_table_ecma_refl: + .dword 0xdcd7181e300f9e5e, 0x6ff954a033a8c131 + .dword 0x28532e49984f3e05, 0x9b7d62f79be8616a + .dword 0xa707db9acf80c06d, 0x14299724cc279f02 +- .dword 0x5383edcd67c06036, 0xe0ada17364673f59 ++ .dword 0x5383edcd67c06036, 0xe0ada17364673f59 +\ No newline at end of file +diff --git a/crc/riscv64/crc64_iso_norm_vclmul.S b/crc/riscv64/crc64_iso_norm_vclmul.S +index 93f1290..4a53d44 100644 +--- a/crc/riscv64/crc64_iso_norm_vclmul.S ++++ b/crc/riscv64/crc64_iso_norm_vclmul.S +@@ -26,8 +26,9 @@ + # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ######################################################################### +- ++#if HAVE_RVV && HAVE_ZBC && HAVE_ZVBC + #include "crc64_iso_norm_vclmul.h" + #include "crc64_norm_common_vclmul.h" + +-crc64_norm_func crc64_iso_norm_vclmul +\ No newline at end of file ++crc64_norm_func crc64_iso_norm_vclmul ++#endif +\ No newline at end of file +diff --git a/crc/riscv64/crc64_iso_norm_vclmul.h b/crc/riscv64/crc64_iso_norm_vclmul.h +index 3fe52ef..eca31cd 100644 +--- a/crc/riscv64/crc64_iso_norm_vclmul.h ++++ b/crc/riscv64/crc64_iso_norm_vclmul.h +@@ -34,7 +34,7 @@ + .section .rodata + .text + .align 3 +- .set .crc_loop_const,. + 0 ++ .set .crc_loop_const,. + 0 + .type const_2, %object + .size const_2, 64 + const_2: +@@ -44,10 +44,10 @@ const_2: + .quad 0x0000000000001db7 + + .text +- .align 4 +- .set .lanchor_crc_tab,. + 0 +- .type crc64_table_iso_norm, %object +- .size crc64_table_iso_norm, 2048 ++ .align 4 ++ .set .lanchor_crc_tab,. + 0 ++ .type crc64_table_iso_norm, %object ++ .size crc64_table_iso_norm, 2048 + crc64_table_iso_norm: + .dword 0x0000000000000000, 0x000000000000001b + .dword 0x0000000000000036, 0x000000000000002d +@@ -176,4 +176,4 @@ crc64_table_iso_norm: + .dword 0x0000000000000948, 0x0000000000000953 + .dword 0x000000000000097e, 0x0000000000000965 + .dword 0x0000000000000924, 0x000000000000093f +- .dword 0x0000000000000912, 0x0000000000000909 ++ .dword 0x0000000000000912, 0x0000000000000909 +\ No newline at end of file +diff --git a/crc/riscv64/crc64_iso_refl_vclmul.S b/crc/riscv64/crc64_iso_refl_vclmul.S +index 9e3a9b8..4f3c18f 100644 +--- a/crc/riscv64/crc64_iso_refl_vclmul.S ++++ b/crc/riscv64/crc64_iso_refl_vclmul.S +@@ -26,8 +26,9 @@ + # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ######################################################################### +- ++#if HAVE_RVV && HAVE_ZBC && HAVE_ZVBC + #include "crc64_iso_refl_vclmul.h" + #include "crc64_refl_common_vclmul.h" + +-crc64_refl_func crc64_iso_refl_vclmul +\ No newline at end of file ++crc64_refl_func crc64_iso_refl_vclmul ++#endif +\ No newline at end of file +diff --git a/crc/riscv64/crc64_iso_refl_vclmul.h b/crc/riscv64/crc64_iso_refl_vclmul.h +index 5b2ad8c..cae418e 100644 +--- a/crc/riscv64/crc64_iso_refl_vclmul.h ++++ b/crc/riscv64/crc64_iso_refl_vclmul.h +@@ -34,7 +34,7 @@ + .section .rodata + .text + .align 3 +- .set .crc_loop_const,. + 0 ++ .set .crc_loop_const,. + 0 + .type const_2, %object + .size const_2, 64 + const_2: +@@ -44,10 +44,10 @@ const_2: + .quad 0xf500000000000001 + + .text +- .align 4 +- .set .lanchor_crc_tab,. + 0 +- .type crc64_table_iso_refl, %object +- .size crc64_table_iso_refl, 2048 ++ .align 4 ++ .set .lanchor_crc_tab,. + 0 ++ .type crc64_table_iso_refl, %object ++ .size crc64_table_iso_refl, 2048 + crc64_table_iso_refl: + .dword 0x0000000000000000, 0x01b0000000000000 + .dword 0x0360000000000000, 0x02d0000000000000 +@@ -176,4 +176,4 @@ crc64_table_iso_refl: + .dword 0x9480000000000000, 0x9530000000000000 + .dword 0x97e0000000000000, 0x9650000000000000 + .dword 0x9240000000000000, 0x93f0000000000000 +- .dword 0x9120000000000000, 0x9090000000000000 ++ .dword 0x9120000000000000, 0x9090000000000000 +\ No newline at end of file +diff --git a/crc/riscv64/crc64_jones_norm_vclmul.S b/crc/riscv64/crc64_jones_norm_vclmul.S +index 17630f7..fdfd799 100644 +--- a/crc/riscv64/crc64_jones_norm_vclmul.S ++++ b/crc/riscv64/crc64_jones_norm_vclmul.S +@@ -26,8 +26,9 @@ + # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ######################################################################### +- ++#if HAVE_RVV && HAVE_ZBC && HAVE_ZVBC + #include "crc64_jones_norm_vclmul.h" + #include "crc64_norm_common_vclmul.h" + +-crc64_norm_func crc64_jones_norm_vclmul +\ No newline at end of file ++crc64_norm_func crc64_jones_norm_vclmul ++#endif +\ No newline at end of file +diff --git a/crc/riscv64/crc64_jones_norm_vclmul.h b/crc/riscv64/crc64_jones_norm_vclmul.h +index 9fe71bc..0e252c7 100644 +--- a/crc/riscv64/crc64_jones_norm_vclmul.h ++++ b/crc/riscv64/crc64_jones_norm_vclmul.h +@@ -34,7 +34,7 @@ + .section .rodata + .text + .align 4 +- .set .crc_loop_const,. + 0 ++ .set .crc_loop_const,. + 0 + .type const_2, %object + .size const_2, 64 + const_2: +@@ -44,10 +44,10 @@ const_2: + .quad 0x698b74157cfbd736 + + .text +- .align 4 +- .set .lanchor_crc_tab,. + 0 +- .type crc64_table_jones_norm, %object +- .size crc64_table_jones_norm, 2048 ++ .align 4 ++ .set .lanchor_crc_tab,. + 0 ++ .type crc64_table_jones_norm, %object ++ .size crc64_table_jones_norm, 2048 + crc64_table_jones_norm: + .dword 0x0000000000000000, 0xad93d23594c935a9 + .dword 0xf6b4765ebd5b5efb, 0x5b27a46b29926b52 +@@ -176,4 +176,4 @@ crc64_table_jones_norm: + .dword 0x0f3dad1425e60e99, 0xa2ae7f21b12f3b30 + .dword 0xf989db4a98bd5062, 0x541a097f0c7465cb + .dword 0x4fc6939ccb9986c6, 0xe25541a95f50b36f +- .dword 0xb972e5c276c2d83d, 0x14e137f7e20bed94 ++ .dword 0xb972e5c276c2d83d, 0x14e137f7e20bed94 +\ No newline at end of file +diff --git a/crc/riscv64/crc64_jones_refl_vclmul.S b/crc/riscv64/crc64_jones_refl_vclmul.S +index aeb91cf..af4bcff 100644 +--- a/crc/riscv64/crc64_jones_refl_vclmul.S ++++ b/crc/riscv64/crc64_jones_refl_vclmul.S +@@ -26,8 +26,9 @@ + # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ######################################################################### +- ++#if HAVE_RVV && HAVE_ZBC && HAVE_ZVBC + #include "crc64_jones_refl_vclmul.h" + #include "crc64_refl_common_vclmul.h" + +-crc64_refl_func crc64_jones_refl_vclmul +\ No newline at end of file ++crc64_refl_func crc64_jones_refl_vclmul ++#endif +\ No newline at end of file +diff --git a/crc/riscv64/crc64_jones_refl_vclmul.h b/crc/riscv64/crc64_jones_refl_vclmul.h +index feb949d..faea6a9 100644 +--- a/crc/riscv64/crc64_jones_refl_vclmul.h ++++ b/crc/riscv64/crc64_jones_refl_vclmul.h +@@ -34,7 +34,7 @@ + .section .rodata + .text + .align 3 +- .set .crc_loop_const,. + 0 ++ .set .crc_loop_const,. + 0 + .type const_2, %object + .size const_2, 64 + const_2: +@@ -44,10 +44,10 @@ const_2: + .quad 0x381d0015c96f4444 + + .text +- .align 4 +- .set .lanchor_crc_tab,. + 0 +- .type crc64_table_jones_refl, %object +- .size crc64_table_jones_refl, 2048 ++ .align 4 ++ .set .lanchor_crc_tab,. + 0 ++ .type crc64_table_jones_refl, %object ++ .size crc64_table_jones_refl, 2048 + crc64_table_jones_refl: + .dword 0x0000000000000000, 0x7ad870c830358979 + .dword 0xf5b0e190606b12f2, 0x8f689158505e9b8b +@@ -176,4 +176,4 @@ crc64_table_jones_refl: + .dword 0x66e7a46c27f3aa2c, 0x1c3fd4a417c62355 + .dword 0x935745fc4798b8de, 0xe98f353477ad31a7 + .dword 0xa6df411fbfb21ca3, 0xdc0731d78f8795da +- .dword 0x536fa08fdfd90e51, 0x29b7d047efec8728 ++ .dword 0x536fa08fdfd90e51, 0x29b7d047efec8728 +\ No newline at end of file +diff --git a/crc/riscv64/crc64_norm_common_vclmul.h b/crc/riscv64/crc64_norm_common_vclmul.h +index 2e6e46e..cfc9338 100644 +--- a/crc/riscv64/crc64_norm_common_vclmul.h ++++ b/crc/riscv64/crc64_norm_common_vclmul.h +@@ -31,7 +31,7 @@ + + .macro crc64_norm_func name + .text +- .align 3 ++ .align 3 + .type \name, @function + .global \name + \name: +@@ -65,10 +65,7 @@ + + .align 2 + .crc_clmul_pre: +- vsetivli zero, 2, e64, m1, ta, ma +- vmv.s.x vec_zero, zero + crc_norm_load_first_block +- vmv.s.x vec_zero, zero + crc_load_p4 + addi tmp_0, len, -64 + bltu tmp_0, tmp_1, .clmul_loop_end +@@ -98,5 +95,10 @@ + xor seed, tmp_1, tmp_4 + + j .crc_tab_pre +- .size \name, .-\name ++ .size \name, .-\name ++ .section .rodata.cst16,"aM",@progbits,16 ++ .align 4 ++.shuffle_data: ++ .byte 15, 14, 13, 12, 11, 10, 9, 8 ++ .byte 7, 6, 5, 4, 3, 2, 1, 0 + .endm +\ No newline at end of file +diff --git a/crc/riscv64/crc64_refl_common_vclmul.h b/crc/riscv64/crc64_refl_common_vclmul.h +index 6664518..17b3472 100644 +--- a/crc/riscv64/crc64_refl_common_vclmul.h ++++ b/crc/riscv64/crc64_refl_common_vclmul.h +@@ -31,7 +31,7 @@ + + .macro crc64_refl_func name + .text +- .align 3 ++ .align 3 + .type \name, @function + .global \name + \name: +@@ -66,9 +66,7 @@ + .align 2 + .crc_clmul_pre: + vsetivli zero, 2, e64, m1, ta, ma +- vmv.s.x vec_zero, zero + crc_refl_load_first_block +- vmv.s.x vec_zero, zero + crc_load_p4 + addi tmp_0, len, -64 + bltu tmp_0, tmp_1, .clmul_loop_end +@@ -97,5 +95,5 @@ + xor seed, tmp_4, tmp_5 + + j .crc_tab_pre +- .size \name, .-\name ++ .size \name, .-\name + .endm +\ No newline at end of file +diff --git a/crc/riscv64/crc_common_vclmul.h b/crc/riscv64/crc_common_vclmul.h +index f0c8be7..cc7256d 100644 +--- a/crc/riscv64/crc_common_vclmul.h ++++ b/crc/riscv64/crc_common_vclmul.h +@@ -26,7 +26,6 @@ + # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ######################################################################### +- + // parameters + #define seed a0 + #define buf a1 +@@ -56,20 +55,22 @@ + #define vec_5 v5 + #define vec_6 v6 + #define vec_7 v7 +-#define vec_zero v8 + #define vec_8 v8 + #define vec_9 v9 ++#define vec_10 v10 ++#define vec_11 v11 ++#define vec_12 v12 ++#define vec_shuffle v13 ++#define vec_14 v14 ++#define vec_15 v15 ++#define vec_16 v16 ++#define vec_17 v17 ++#define vec_18 v18 + + .macro crc_refl_load_first_block + mv buf_iter, buf +- vle64.v vec_0, 0(buf_iter) +- addi buf_iter, buf_iter, 16 +- vle64.v vec_1, 0(buf_iter) +- addi buf_iter, buf_iter, 16 +- vle64.v vec_2, 0(buf_iter) +- addi buf_iter, buf_iter, 16 +- vle64.v vec_3, 0(buf_iter) +- addi buf_iter, buf_iter, 16 ++ vl4re64.v vec_0, 0(buf_iter) ++ addi buf_iter, buf_iter, 64 + andi counter, len, ~63 + addi tmp_0, counter, -64 + +@@ -80,33 +81,25 @@ + .endm + + .macro crc_norm_load_first_block ++ la tmp_0, .shuffle_data + mv buf_iter, buf +- vle64.v vec_6, 0(buf_iter) +- addi buf_iter, buf_iter, 16 +- vle64.v vec_7, 0(buf_iter) +- addi buf_iter, buf_iter, 16 +- vle64.v vec_8, 0(buf_iter) +- addi buf_iter, buf_iter, 16 +- vle64.v vec_9, 0(buf_iter) +- addi buf_iter, buf_iter, 16 ++ vl4re64.v vec_4, (buf_iter) ++ ++ vsetivli zero, 16, e8, m1, ta, ma ++ vle8.v vec_shuffle, 0(tmp_0) ++ vrgather.vv vec_0, vec_4, vec_shuffle ++ vrgather.vv vec_1, vec_5, vec_shuffle ++ vrgather.vv vec_2, vec_6, vec_shuffle ++ vrgather.vv vec_3, vec_7, vec_shuffle ++ vsetivli zero, 2, e64, m1, ta, ma ++ ++ addi buf_iter, buf_iter, 64 + andi counter, len, ~63 + addi tmp_0, counter, -64 + + vmv.s.x vec_4, zero + vmv.s.x vec_5, seed + vslideup.vi vec_4, vec_5, 1 +- vrev8.v vec_6, vec_6 +- vrev8.v vec_7, vec_7 +- vrev8.v vec_8, vec_8 +- vrev8.v vec_9, vec_9 +- vslidedown.vi vec_0, vec_6, 1 +- vslidedown.vi vec_1, vec_7, 1 +- vslidedown.vi vec_2, vec_8, 1 +- vslidedown.vi vec_3, vec_9, 1 +- vslideup.vi vec_0, vec_6, 1 +- vslideup.vi vec_1, vec_7, 1 +- vslideup.vi vec_2, vec_8, 1 +- vslideup.vi vec_3, vec_9, 1 + vxor.vv vec_0, vec_0, vec_4 + .endm + +@@ -119,96 +112,98 @@ + .macro crc_refl_loop + .align 3 + .clmul_loop: +- vle64.v vec_7, 0(buf_iter) ++ vl4re64.v vec_8, (buf_iter) ++ + vclmul.vv vec_4, vec_0, vec_5 + vclmulh.vv vec_0, vec_0, vec_5 +- vredxor.vs vec_0, vec_0, vec_zero +- vredxor.vs vec_4, vec_4, vec_zero +- vslideup.vi vec_4, vec_0, 1 +- vxor.vv vec_0, vec_4, vec_7 ++ vslidedown.vi vec_15, vec_4, 1 ++ vslidedown.vi vec_14, vec_0, 1 ++ vxor.vv vec_15, vec_15, vec_4 ++ vxor.vv vec_14, vec_14, vec_0 ++ vslideup.vi vec_15, vec_14, 1 + +- addi buf_iter, buf_iter, 16 +- vle64.v vec_7, 0(buf_iter) + vclmul.vv vec_4, vec_1, vec_5 + vclmulh.vv vec_1, vec_1, vec_5 +- vredxor.vs vec_1, vec_1, vec_zero +- vredxor.vs vec_4, vec_4, vec_zero +- vslideup.vi vec_4, vec_1, 1 +- vxor.vv vec_1, vec_4, vec_7 ++ vslidedown.vi vec_16, vec_4, 1 ++ vslidedown.vi vec_14, vec_1, 1 ++ vxor.vv vec_16, vec_16, vec_4 ++ vxor.vv vec_14, vec_14, vec_1 ++ vslideup.vi vec_16, vec_14, 1 + +- addi buf_iter, buf_iter, 16 +- vle64.v vec_7, 0(buf_iter) + vclmul.vv vec_4, vec_2, vec_5 + vclmulh.vv vec_2, vec_2, vec_5 +- vredxor.vs vec_2, vec_2, vec_zero +- vredxor.vs vec_4, vec_4, vec_zero +- vslideup.vi vec_4, vec_2, 1 +- vxor.vv vec_2, vec_4, vec_7 ++ vslidedown.vi vec_17, vec_4, 1 ++ vslidedown.vi vec_14, vec_2, 1 ++ vxor.vv vec_17, vec_17, vec_4 ++ vxor.vv vec_14, vec_14, vec_2 ++ vslideup.vi vec_17, vec_14, 1 + +- addi buf_iter, buf_iter, 16 +- vle64.v vec_7, 0(buf_iter) + vclmul.vv vec_4, vec_3, vec_5 + vclmulh.vv vec_3, vec_3, vec_5 +- vredxor.vs vec_3, vec_3, vec_zero +- vredxor.vs vec_4, vec_4, vec_zero +- vslideup.vi vec_4, vec_3, 1 +- vxor.vv vec_3, vec_4, vec_7 +- +- addi buf_iter, buf_iter, 16 ++ vslidedown.vi vec_18, vec_4, 1 ++ vslidedown.vi vec_14, vec_3, 1 ++ vxor.vv vec_18, vec_18, vec_4 ++ vxor.vv vec_14, vec_14, vec_3 ++ vslideup.vi vec_18, vec_14, 1 ++ ++ vxor.vv vec_0, vec_8, vec_15 ++ vxor.vv vec_1, vec_9, vec_16 ++ vxor.vv vec_2, vec_10, vec_17 ++ vxor.vv vec_3, vec_11, vec_18 ++ ++ addi buf_iter, buf_iter, 64 + bne buf_iter, buf_end, .clmul_loop + .endm + + .macro crc_norm_loop +- .align 3 ++ .align 3 + .clmul_loop: +- vle64.v vec_7, 0(buf_iter) ++ vl4re64.v vec_8, (buf_iter) ++ + vclmul.vv vec_4, vec_0, vec_5 + vclmulh.vv vec_0, vec_0, vec_5 +- vredxor.vs vec_0, vec_0, vec_zero +- vredxor.vs vec_4, vec_4, vec_zero +- vslideup.vi vec_4, vec_0, 1 +- vrev8.v vec_7, vec_7 +- vslidedown.vi vec_6, vec_7, 1 +- vslideup.vi vec_6, vec_7, 1 +- vxor.vv vec_0, vec_4, vec_6 +- +- addi buf_iter, buf_iter, 16 +- vle64.v vec_7, 0(buf_iter) ++ vslidedown.vi vec_15, vec_4, 1 ++ vslidedown.vi vec_14, vec_0, 1 ++ vxor.vv vec_15, vec_15, vec_4 ++ vxor.vv vec_14, vec_14, vec_0 ++ vslideup.vi vec_15, vec_14, 1 ++ + vclmul.vv vec_4, vec_1, vec_5 + vclmulh.vv vec_1, vec_1, vec_5 +- vredxor.vs vec_1, vec_1, vec_zero +- vredxor.vs vec_4, vec_4, vec_zero +- vslideup.vi vec_4, vec_1, 1 +- vrev8.v vec_7, vec_7 +- vslidedown.vi vec_6, vec_7, 1 +- vslideup.vi vec_6, vec_7, 1 +- vxor.vv vec_1, vec_4, vec_6 +- +- addi buf_iter, buf_iter, 16 +- vle64.v vec_7, 0(buf_iter) ++ vslidedown.vi vec_16, vec_4, 1 ++ vslidedown.vi vec_14, vec_1, 1 ++ vxor.vv vec_16, vec_16, vec_4 ++ vxor.vv vec_14, vec_14, vec_1 ++ vslideup.vi vec_16, vec_14, 1 ++ + vclmul.vv vec_4, vec_2, vec_5 + vclmulh.vv vec_2, vec_2, vec_5 +- vredxor.vs vec_2, vec_2, vec_zero +- vredxor.vs vec_4, vec_4, vec_zero +- vslideup.vi vec_4, vec_2, 1 +- vrev8.v vec_7, vec_7 +- vslidedown.vi vec_6, vec_7, 1 +- vslideup.vi vec_6, vec_7, 1 +- vxor.vv vec_2, vec_4, vec_6 +- +- addi buf_iter, buf_iter, 16 +- vle64.v vec_7, 0(buf_iter) ++ vslidedown.vi vec_17, vec_4, 1 ++ vslidedown.vi vec_14, vec_2, 1 ++ vxor.vv vec_17, vec_17, vec_4 ++ vxor.vv vec_14, vec_14, vec_2 ++ vslideup.vi vec_17, vec_14, 1 ++ + vclmul.vv vec_4, vec_3, vec_5 + vclmulh.vv vec_3, vec_3, vec_5 +- vredxor.vs vec_3, vec_3, vec_zero +- vredxor.vs vec_4, vec_4, vec_zero +- vslideup.vi vec_4, vec_3, 1 +- vrev8.v vec_7, vec_7 +- vslidedown.vi vec_6, vec_7, 1 +- vslideup.vi vec_6, vec_7, 1 +- vxor.vv vec_3, vec_4, vec_6 +- +- addi buf_iter, buf_iter, 16 ++ vslidedown.vi vec_18, vec_4, 1 ++ vslidedown.vi vec_14, vec_3, 1 ++ vxor.vv vec_18, vec_18, vec_4 ++ vxor.vv vec_14, vec_14, vec_3 ++ vslideup.vi vec_18, vec_14, 1 ++ ++ vsetivli zero, 16, e8, m1, ta, ma ++ vrgather.vv vec_0, vec_8, vec_shuffle ++ vrgather.vv vec_1, vec_9, vec_shuffle ++ vrgather.vv vec_2, vec_10, vec_shuffle ++ vrgather.vv vec_3, vec_11, vec_shuffle ++ vsetivli zero, 2, e64, m1, ta, ma ++ vxor.vv vec_0, vec_0, vec_15 ++ vxor.vv vec_1, vec_1, vec_16 ++ vxor.vv vec_2, vec_2, vec_17 ++ vxor.vv vec_3, vec_3, vec_18 ++ ++ addi buf_iter, buf_iter, 64 + bne buf_iter, buf_end, .clmul_loop + .endm + +@@ -216,20 +211,26 @@ + vle64.v vec_5, 0(tmp_4) + vclmul.vv vec_6, vec_0, vec_5 + vclmulh.vv vec_7, vec_0, vec_5 +- vredxor.vs vec_6, vec_6, vec_zero +- vredxor.vs vec_7, vec_7, vec_zero +- vslideup.vi vec_6, vec_7, 1 +- vxor.vv vec_0, vec_6, vec_1 ++ vslidedown.vi vec_8, vec_6, 1 ++ vslidedown.vi vec_9, vec_7, 1 ++ vxor.vv vec_8, vec_8, vec_6 ++ vxor.vv vec_9, vec_9, vec_7 ++ vslideup.vi vec_8, vec_9, 1 ++ vxor.vv vec_0, vec_8, vec_1 + vclmul.vv vec_6, vec_0, vec_5 + vclmulh.vv vec_7, vec_0, vec_5 +- vredxor.vs vec_6, vec_6, vec_zero +- vredxor.vs vec_7, vec_7, vec_zero +- vslideup.vi vec_6, vec_7, 1 +- vxor.vv vec_0, vec_6, vec_2 ++ vslidedown.vi vec_8, vec_6, 1 ++ vslidedown.vi vec_9, vec_7, 1 ++ vxor.vv vec_8, vec_8, vec_6 ++ vxor.vv vec_9, vec_9, vec_7 ++ vslideup.vi vec_8, vec_9, 1 ++ vxor.vv vec_0, vec_8, vec_2 + vclmul.vv vec_6, vec_0, vec_5 + vclmulh.vv vec_7, vec_0, vec_5 +- vredxor.vs vec_6, vec_6, vec_zero +- vredxor.vs vec_7, vec_7, vec_zero +- vslideup.vi vec_6, vec_7, 1 +- vxor.vv vec_0, vec_6, vec_3 ++ vslidedown.vi vec_8, vec_6, 1 ++ vslidedown.vi vec_9, vec_7, 1 ++ vxor.vv vec_8, vec_8, vec_6 ++ vxor.vv vec_9, vec_9, vec_7 ++ vslideup.vi vec_8, vec_9, 1 ++ vxor.vv vec_0, vec_8, vec_3 + .endm +\ No newline at end of file +diff --git a/crc/riscv64/crc_multibinary_riscv.S b/crc/riscv64/crc_multibinary_riscv.S +index ef36681..eab6b85 100644 +--- a/crc/riscv64/crc_multibinary_riscv.S ++++ b/crc/riscv64/crc_multibinary_riscv.S +@@ -1,5 +1,5 @@ + ######################################################################## +-# Copyright(c) 2025 ZTE Corporation All rights reserved. ++# Copyright (c) 2025 ZTE Corporation. + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions +@@ -28,7 +28,6 @@ + ######################################################################### + #include + +- + mbin_interface crc32_iscsi + mbin_interface crc16_t10dif + mbin_interface crc16_t10dif_copy +diff --git a/crc/riscv64/crc_riscv64_dispatcher.c b/crc/riscv64/crc_riscv64_dispatcher.c +index eec3cd7..bb3b470 100644 +--- a/crc/riscv64/crc_riscv64_dispatcher.c ++++ b/crc/riscv64/crc_riscv64_dispatcher.c +@@ -27,143 +27,159 @@ + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************/ + #include ++#include "crc.h" ++#include "crc64.h" ++ ++extern uint16_t ++crc16_t10dif_vclmul(uint16_t, uint8_t *, uint64_t); ++ ++extern uint16_t ++crc16_t10dif_copy_vclmul(uint16_t, uint8_t *, uint8_t *, uint64_t); ++ ++extern uint32_t ++crc32_ieee_norm_vclmul(uint32_t, uint8_t *, uint64_t); ++ ++extern unsigned int ++crc32_iscsi_refl_vclmul(unsigned char *, int, unsigned int); ++ ++extern uint32_t ++crc32_gzip_refl_vclmul(uint32_t, uint8_t *, uint64_t); ++ ++extern uint64_t ++crc64_ecma_refl_vclmul(uint64_t, const unsigned char *, uint64_t); ++ ++extern uint64_t ++crc64_ecma_norm_vclmul(uint64_t, const unsigned char *, uint64_t); ++ ++extern uint64_t ++crc64_iso_refl_vclmul(uint64_t, const unsigned char *, uint64_t); ++ ++extern uint64_t ++crc64_iso_norm_vclmul(uint64_t, const unsigned char *, uint64_t); ++ ++extern uint64_t ++crc64_jones_refl_vclmul(uint64_t, const unsigned char *, uint64_t); ++ ++extern uint64_t ++crc64_jones_norm_vclmul(uint64_t, const unsigned char *, uint64_t); + + DEFINE_INTERFACE_DISPATCHER(crc16_t10dif) + { ++#if HAVE_RVV && HAVE_ZBC && HAVE_ZVBC + unsigned long auxval = getauxval(AT_HWCAP); +- if (auxval & HWCAP_RV('V')) { +- if (has_riscv_ext("zvbc") && has_riscv_ext("zvbb") && has_riscv_ext("zbc")) +- return PROVIDER_INFO(crc16_t10dif_vclmul); ++ if (auxval & HWCAP_RV('V') && CHECK_RISCV_EXTENSIONS("ZVBC", "ZBC")) { ++ return crc16_t10dif_vclmul; + } +- +- return PROVIDER_BASIC(crc16_t10dif); ++#endif ++ return crc16_t10dif_base; + } + + DEFINE_INTERFACE_DISPATCHER(crc16_t10dif_copy) + { +- ++#if HAVE_RVV && HAVE_ZBC && HAVE_ZVBC + unsigned long auxval = getauxval(AT_HWCAP); +- if (auxval & HWCAP_RV('V')) { +- if (has_riscv_ext("zvbc") && has_riscv_ext("zvbb") && has_riscv_ext("zbc")) +- return PROVIDER_INFO(crc16_t10dif_copy_vclmul); ++ if (auxval & HWCAP_RV('V') && CHECK_RISCV_EXTENSIONS("ZVBC", "ZBC")) { ++ return crc16_t10dif_copy_vclmul; + } +- +- return PROVIDER_BASIC(crc16_t10dif_copy); ++#endif ++ return crc16_t10dif_copy_base; + } + + DEFINE_INTERFACE_DISPATCHER(crc32_ieee) + { +- ++#if HAVE_RVV && HAVE_ZBC && HAVE_ZVBC + unsigned long auxval = getauxval(AT_HWCAP); +- if (auxval & HWCAP_RV('V')) { +- if (has_riscv_ext("zvbc") && has_riscv_ext("zvbb") && has_riscv_ext("zbc")) +- return PROVIDER_INFO(crc32_ieee_norm_vclmul); +- ++ if (auxval & HWCAP_RV('V') && CHECK_RISCV_EXTENSIONS("ZVBC", "ZBC")) { ++ return crc32_ieee_norm_vclmul; + } +- +- return PROVIDER_BASIC(crc32_ieee); ++#endif ++ return crc32_ieee_base; + } + + DEFINE_INTERFACE_DISPATCHER(crc32_iscsi) + { +- ++#if HAVE_RVV && HAVE_ZBC && HAVE_ZVBC + unsigned long auxval = getauxval(AT_HWCAP); +- if (auxval & HWCAP_RV('V')) { +- if (has_riscv_ext("zvbc") && has_riscv_ext("zvbb") && has_riscv_ext("zbc")) +- return PROVIDER_INFO(crc32_iscsi_refl_vclmul); +- ++ if (auxval & HWCAP_RV('V') && CHECK_RISCV_EXTENSIONS("ZVBC", "ZBC")) { ++ return crc32_iscsi_refl_vclmul; + } +- +- return PROVIDER_BASIC(crc32_iscsi); ++#endif ++ return crc32_iscsi_base; + } + + DEFINE_INTERFACE_DISPATCHER(crc32_gzip_refl) + { +- ++#if HAVE_RVV && HAVE_ZBC && HAVE_ZVBC + unsigned long auxval = getauxval(AT_HWCAP); +- if (auxval & HWCAP_RV('V')) { +- if (has_riscv_ext("zvbc") && has_riscv_ext("zvbb") && has_riscv_ext("zbc")) +- return PROVIDER_INFO(crc32_gzip_refl_vclmul); +- ++ if (auxval & HWCAP_RV('V') && CHECK_RISCV_EXTENSIONS("ZVBC", "ZBC")) { ++ return crc32_gzip_refl_vclmul; + } +- +- return PROVIDER_BASIC(crc32_gzip_refl); ++#endif ++ return crc32_gzip_refl_base; + } + + DEFINE_INTERFACE_DISPATCHER(crc64_ecma_refl) + { +- ++#if HAVE_RVV && HAVE_ZBC && HAVE_ZVBC + unsigned long auxval = getauxval(AT_HWCAP); +- if (auxval & HWCAP_RV('V')) { +- if (has_riscv_ext("zvbc") && has_riscv_ext("zvbb") && has_riscv_ext("zbc")) +- return PROVIDER_INFO(crc64_ecma_refl_vclmul); +- ++ if (auxval & HWCAP_RV('V') && CHECK_RISCV_EXTENSIONS("ZVBC", "ZBC")) { ++ return crc64_ecma_refl_vclmul; + } +- +- return PROVIDER_BASIC(crc64_ecma_refl); ++#endif ++ return crc64_ecma_refl_base; + } + + DEFINE_INTERFACE_DISPATCHER(crc64_ecma_norm) + { +- ++#if HAVE_RVV && HAVE_ZBC && HAVE_ZVBC + unsigned long auxval = getauxval(AT_HWCAP); +- if (auxval & HWCAP_RV('V')) { +- if (has_riscv_ext("zvbc") && has_riscv_ext("zvbb") && has_riscv_ext("zbc")) +- return PROVIDER_INFO(crc64_ecma_norm_vclmul); +- ++ if (auxval & HWCAP_RV('V') && CHECK_RISCV_EXTENSIONS("ZVBC", "ZBC")) { ++ return crc64_ecma_norm_vclmul; + } +- +- return PROVIDER_BASIC(crc64_ecma_norm); ++#endif ++ return crc64_ecma_norm_base; + } + + DEFINE_INTERFACE_DISPATCHER(crc64_iso_refl) + { +- ++#if HAVE_RVV && HAVE_ZBC && HAVE_ZVBC + unsigned long auxval = getauxval(AT_HWCAP); +- if (auxval & HWCAP_RV('V')) { +- if (has_riscv_ext("zvbc") && has_riscv_ext("zvbb") && has_riscv_ext("zbc")) +- return PROVIDER_INFO(crc64_iso_refl_vclmul); +- ++ if (auxval & HWCAP_RV('V') && CHECK_RISCV_EXTENSIONS("ZVBC", "ZBC")) { ++ return crc64_iso_refl_vclmul; + } +- +- return PROVIDER_BASIC(crc64_iso_refl); ++#endif ++ return crc64_iso_refl_base; + } + + DEFINE_INTERFACE_DISPATCHER(crc64_iso_norm) + { +- ++#if HAVE_RVV && HAVE_ZBC && HAVE_ZVBC + unsigned long auxval = getauxval(AT_HWCAP); +- if (auxval & HWCAP_RV('V')) { +- if (has_riscv_ext("zvbc") && has_riscv_ext("zvbb") && has_riscv_ext("zbc")) +- return PROVIDER_INFO(crc64_iso_norm_vclmul); +- ++ if (auxval & HWCAP_RV('V') && CHECK_RISCV_EXTENSIONS("ZVBC", "ZBC")) { ++ return crc64_iso_norm_vclmul; + } +- +- return PROVIDER_BASIC(crc64_iso_norm); ++#endif ++ return crc64_iso_norm_base; + } + + DEFINE_INTERFACE_DISPATCHER(crc64_jones_refl) + { +- ++#if HAVE_RVV && HAVE_ZBC && HAVE_ZVBC + unsigned long auxval = getauxval(AT_HWCAP); +- if (auxval & HWCAP_RV('V')) { +- if (has_riscv_ext("zvbc") && has_riscv_ext("zvbb") && has_riscv_ext("zbc")) +- return PROVIDER_INFO(crc64_jones_refl_vclmul); +- ++ if (auxval & HWCAP_RV('V') && CHECK_RISCV_EXTENSIONS("ZVBC", "ZBC")) { ++ return crc64_jones_refl_vclmul; + } +- +- return PROVIDER_BASIC(crc64_jones_refl); ++#endif ++ return crc64_jones_refl_base; + } + + DEFINE_INTERFACE_DISPATCHER(crc64_jones_norm) + { +- ++#if HAVE_RVV && HAVE_ZBC && HAVE_ZVBC + unsigned long auxval = getauxval(AT_HWCAP); +- if (auxval & HWCAP_RV('V')) { +- if (has_riscv_ext("zvbc") && has_riscv_ext("zvbb") && has_riscv_ext("zbc")) +- return PROVIDER_INFO(crc64_jones_norm_vclmul); +- ++ if (auxval & HWCAP_RV('V') && CHECK_RISCV_EXTENSIONS("ZVBC", "ZBC")) { ++ return crc64_jones_norm_vclmul; + } +- +- return PROVIDER_BASIC(crc64_jones_norm); ++#endif ++ return crc64_jones_norm_base; + } +\ No newline at end of file +diff --git a/include/riscv64_multibinary.h b/include/riscv64_multibinary.h +index ad66970..8222a94 100644 +--- a/include/riscv64_multibinary.h ++++ b/include/riscv64_multibinary.h +@@ -1,5 +1,5 @@ + /********************************************************************** +- Copyright(c) 2024 All rights reserved. ++ Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS). + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions +@@ -10,6 +10,9 @@ + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. ++ * Neither the name of ISCAS nor the names of its ++ contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +@@ -23,12 +26,10 @@ + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************/ +- +-#ifndef __RISCV64_MULTIBINARY_H__ +-#define __RISCV64_MULTIBINARY_H__ +- ++#ifndef __RISCV_MULTIBINARY_H__ ++#define __RISCV_MULTIBINARY_H__ + #ifndef __riscv +-#error "This file is for RISC-V only" ++#error "This file is for riscv only" + #endif + + #ifdef __ASSEMBLY__ +@@ -45,119 +46,108 @@ + * 3. Prototype should be *"void * \name\()_dispatcher"* + * 4. The dispather should return the right function pointer , revision and a string information . + **/ +- .macro mbin_interface name:req +- .section .data +- .align 3 +- .global \name\()_dispatcher_info +- .type \name\()_dispatcher_info, @object ++.macro mbin_interface name:req ++ .section .data ++ .align 3 ++ .global \name\()_dispatcher_info ++ .type \name\()_dispatcher_info, @object + \name\()_dispatcher_info: +- .quad \name\()_mbinit +- .section .text +- .global \name\()_mbinit ++ .quad \name\()_mbinit ++ .section .text ++ .global \name\()_mbinit + \name\()_mbinit: +- addi sp, sp, -56 +- sd ra, 48(sp) +- sd a0, 0(sp) +- sd a1, 8(sp) +- sd a2, 16(sp) +- sd a3, 24(sp) +- sd a4, 32(sp) +- sd a5, 40(sp) +- call \name\()_dispatcher +- mv t2, a0 +- la t0, \name\()_dispatcher_info +- sd a0, 0(t0) +- ld ra, 48(sp) +- ld a0, 0(sp) +- ld a1, 8(sp) +- ld a2, 16(sp) +- ld a3, 24(sp) +- ld a4, 32(sp) +- ld a5, 40(sp) +- addi sp, sp, 56 +- jr t2 ++ addi sp, sp, -56 ++ sd ra, 48(sp) ++ sd a0, 0(sp) ++ sd a1, 8(sp) ++ sd a2, 16(sp) ++ sd a3, 24(sp) ++ sd a4, 32(sp) ++ sd a5, 40(sp) ++ call \name\()_dispatcher ++ mv t2, a0 ++ la t0, \name\()_dispatcher_info ++ sd a0, 0(t0) ++ ld ra, 48(sp) ++ ld a0, 0(sp) ++ ld a1, 8(sp) ++ ld a2, 16(sp) ++ ld a3, 24(sp) ++ ld a4, 32(sp) ++ ld a5, 40(sp) ++ addi sp, sp, 56 ++ jr t2 + .global \name\() + .type \name,%function + \name\(): +- la t0, \name\()_dispatcher_info +- ld t1, 0(t0) +- jr t1 ++ la t0, \name\()_dispatcher_info ++ ld t1, 0(t0) ++ jr t1 + .size \name,. - \name + .endm + + /** +-* mbin_interface_base is used for the interfaces which have only +-* noarch implementation +-*/ ++ * mbin_interface_base is used for the interfaces which have only ++ * noarch implementation ++ */ + .macro mbin_interface_base name:req, base:req +- .extern \base +- .data +- .align 3 +- .global \name\()_dispatcher_info +- .type \name\()_dispatcher_info, @object ++ .extern \base ++ .data ++ .align 3 ++ .global \name\()_dispatcher_info ++ .type \name\()_dispatcher_info, @object + \name\()_dispatcher_info: +- .dword \base +- .text +- .global \name +- .type \name, @function ++ .dword \base ++ .text ++ .global \name ++ .type \name, @function + \name: +- la t0, \name\()_dispatcher_info +- ld t0, (t0) +- jr t0 ++ la t0, \name\()_dispatcher_info ++ ld t0, (t0) ++ jr t0 + .endm +- + #else /* __ASSEMBLY__ */ + #include +-#include ++#if HAVE_HWPROBE_H ++#include ++#endif ++#include ++#include + #include + #define HWCAP_RV(letter) (1ul << ((letter) - 'A')) + +-/* Define interface dispatcher macro */ +-#define DEFINE_INTERFACE_DISPATCHER(name) \ +- void * name##_dispatcher(void) +- +-/* Define basic provider macro */ +-#define PROVIDER_BASIC(name) \ +- PROVIDER_INFO(name##_base) ++#if HAVE_ZBC && HAVE_ZVBC ++#define EXT_CODE(ext) ( \ ++ strcmp(ext, "ZBC") == 0 ? RISCV_HWPROBE_EXT_ZBC : \ ++ strcmp(ext, "ZVBC") == 0 ? RISCV_HWPROBE_EXT_ZVBC : \ ++ -1) ++#endif + +-#define DO_DIGNOSTIC(x) _Pragma GCC diagnostic ignored "-W"#x +-#define DO_PRAGMA(x) _Pragma (#x) +-#define DIGNOSTIC_IGNORE(x) DO_PRAGMA(GCC diagnostic ignored #x) +-#define DIGNOSTIC_PUSH() DO_PRAGMA(GCC diagnostic push) +-#define DIGNOSTIC_POP() DO_PRAGMA(GCC diagnostic pop) ++#define INIT_PROBE_STRUCT() \ ++ (struct riscv_hwprobe){ \ ++ .key = RISCV_HWPROBE_KEY_IMA_EXT_0 \ ++ } + +-#define PROVIDER_INFO(_func_entry) \ +- ({ DIGNOSTIC_PUSH() \ +- DIGNOSTIC_IGNORE(-Wnested-externs) \ +- extern void _func_entry(void); \ +- DIGNOSTIC_POP() \ +- _func_entry; \ +- }) ++#ifdef EXT_CODE ++static inline int check_riscv_extensions(const char **extensions, size_t count) ++{ ++ struct riscv_hwprobe _probe = INIT_PROBE_STRUCT(); ++ syscall(__NR_riscv_hwprobe, &_probe, 1, 0, NULL, 0); ++ for (size_t i = 0; i < count; i++) { ++ if (!(_probe.value & EXT_CODE(extensions[i]))) { ++ return 0; ++ } ++ } ++ return 1; ++} + +-/* RISC-V extension detection */ +-static inline int has_riscv_ext(const char *ext) { +- FILE *fp = fopen("/proc/cpuinfo", "r"); +- if (!fp) return 0; ++#define CHECK_RISCV_EXTENSIONS(...) \ ++ check_riscv_extensions((const char*[]){ __VA_ARGS__ }, \ ++ sizeof((const char*[]){ __VA_ARGS__ })/sizeof(const char*)) ++#endif + +- char line[1024]; +- int found = 0; +- while (fgets(line, sizeof(line), fp)) { +- char *isa = strstr(line, "isa"); +- if (isa) { +- char *colon = strchr(isa, ':'); +- if (colon) { +- char *isa_str = colon + 1; +- while (*isa_str == ' ' || *isa_str == '\t') isa_str++; +- if (strstr(isa_str, ext)) { +- found = 1; +- break; +- } +- } +- } +- } +- fclose(fp); +- return found; +-} ++#define DEFINE_INTERFACE_DISPATCHER(name) \ ++ void * name##_dispatcher(void) + + #endif /* __ASSEMBLY__ */ +-#endif /* __RISCV64_MULTIBINARY_H__ */ +\ No newline at end of file ++#endif /* __RISCV_MULTIBINARY_H__ */ +\ No newline at end of file +-- +2.27.0 + diff --git a/isa-l.spec b/isa-l.spec index 0ecbea6..cdf1760 100644 --- a/isa-l.spec +++ b/isa-l.spec @@ -2,13 +2,14 @@ %define isal_devname libisa-l-devel Name: isa-l Version: 2.30.0 -Release: 3 +Release: 4 Summary: Intelligent Storage Acceleration Library License: BSD-3-Clause URL: https://github.com/intel/isa-l Source0: https://github.com/intel/isa-l/archive/refs/tags/v%{version}.tar.gz Patch1: Feature-Add-Basic-RISC-V-And-CRC-Vector-support.patch +Patch2: Feature-Optimize-CRC-calculation-for-the-RISC-V.patch BuildRequires: yasm gcc BuildRequires: autoconf automake libtool @@ -70,7 +71,10 @@ find %{?buildroot} -name *.la -print0 | xargs -r0 rm -f %{_libdir}/pkgconfig/libisal.pc %changelog -* Fri Jul 18 2025 liuqingtao - 2.30.0-3 +* Tue Sep 09 2025 liuqingtao - 2.30.0-4 +- Optimize CRC calculation for the RISC-V + +* Fri Jul 18 2025 liuqingtao - 2.30.0-3 - Add basic RISC-V and CRC Vector support * Sat Feb 25 2023 yaoxin - 2.30.0-2 -- Gitee