From 81e30808f57eb335a94a6dc46433f81e9ffb16b0 Mon Sep 17 00:00:00 2001 From: Srangrang Date: Tue, 3 Jun 2025 19:05:47 +0800 Subject: [PATCH 1/7] chore: add support of riscv for sha2 - add sha2_256_riscv.S with zbb instruction - add asm_type config for riscv Related to #40 Co-authored-by: DA Cherry-picked from: https://gitcode.com/openHiTLS/openhitls/merge_requests/317 --- config/json/feature.json | 6 +- crypto/sha2/src/asm/sha2_256_riscv.S | 382 +++++++++++++++++++++++++++ crypto/sha2/src/asm/sha2_512_riscv.S | 353 +++++++++++++++++++++++++ 3 files changed, 740 insertions(+), 1 deletion(-) create mode 100644 crypto/sha2/src/asm/sha2_256_riscv.S create mode 100644 crypto/sha2/src/asm/sha2_512_riscv.S diff --git a/config/json/feature.json b/config/json/feature.json index c4474fcf..47adda37 100644 --- a/config/json/feature.json +++ b/config/json/feature.json @@ -134,6 +134,9 @@ "modes": null, "sm4": null, "ecc": null + }, + "riscv": { + "sha2": null } } }, @@ -579,7 +582,8 @@ "x8664":{ "x8664":["crypto/sha2/src/asm_*.c", "crypto/sha2/src/asm/*_x86_64.S"], "avx512":["crypto/sha2/src/asm_*.c", "crypto/sha2/src/asm/*_x86_64.S"] - } + }, + "riscv": ["crypto/sha2/src/asm_*.c", "crypto/sha2/src/asm/*_riscv.S"] }, ".deps": ["platform::Secure_C", "bsl::sal"] }, diff --git a/crypto/sha2/src/asm/sha2_256_riscv.S b/crypto/sha2/src/asm/sha2_256_riscv.S new file mode 100644 index 00000000..f1daaa88 --- /dev/null +++ b/crypto/sha2/src/asm/sha2_256_riscv.S @@ -0,0 +1,382 @@ +/* + * This file is part of the openHiTLS project. + * + * openHiTLS is licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ + +#include "hitls_build.h" +#ifdef HITLS_CRYPTO_SHA256 + +.section .rodata +.balign 64 +.K256: + .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 + .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 + .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 + .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 + .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc + .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da + .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 + .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 + .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 + .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 + .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 + .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 + .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 + .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 + .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 + .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +.size K256, .-K256 + +/* + * Macro description: Prepares the message schedule w for i = 0 to 15. + * Input register: + * INDEX: Int, current round index + * w_i: Temporary register for W[INDEX] + * Modify the register: w_i + * Output register: + * w_i: Latest W[i] value, W[i] = M(i) + * Function/Macro Call:None + */ + .macro MSGSCHEDULE_W_16 INDEX, w_i + lw \w_i, (4*\INDEX)(a1) # Load the message block M(i) into w_i + rev8 \w_i, \w_i # Reverse the byte order of w_i + srli \w_i, \w_i, 32 # Shift right by 32 bits to align the bits + sw \w_i, (4*\INDEX)(sp) # Store the latest W[i] value + .endm + +/* + * Macro description: Prepares the message schedule w for i = 16 to 63. + * Input register: + * INDEX: Int, current round index + * Modify the register: t1, t2, t3, t4, t5, t6 + * Output register: + * W[INDEX & 0x0f]: Latest W[i] value, W[i] = sigma1(W[i-2]) + W[i-7] + sigma0(W[i-15]) + W[i-16] + * Function/Macro Call:None + */ + .macro MSGSCHEDULE_W_64 INDEX + lw t1, (((\INDEX-2)&0x0f)*4)(sp) # Load W[i-2] + lw t2, (((\INDEX-15)&0x0f)*4)(sp) # Load W[i-15] + lw t3, (((\INDEX-7)&0x0f)*4)(sp) # Load W[i-7] + lw t4, ((\INDEX&0x0f)*4)(sp) # Load W[i-16] + + roriw t5, t1, 17 + roriw t6, t1, 19 + + srliw t1, t1, 10 + xor t1, t1, t5 + xor t1, t1, t6 + addw t1, t1, t3 # t1 = sigma1(W[i-2]) + W[i-7] + + roriw t5, t2, 7 + roriw t6, t2, 18 + + srliw t2, t2, 3 + xor t2, t2, t5 + xor t2, t2, t6 + addw t1, t1, t2 # t1 = sigma1(W[i-2]) + W[i-7] + sigma0(W[i-15]) + addw t1, t1, t4 # t1 = sigma1(W[i-2]) + W[i-7] + sigma0(W[i-15]) + W[i-16] + sw t1, (4*(\INDEX&0x0f))(sp) + .endm + +/* + * Macro description: Caculate SHA-256 T1 value and update t1 register. + * Input register: + * INDEX: Int, current round index + * e, f, g, h: SHA-256 registers for T1 calculation + * K: Base address register for the constant table (= t0 = storing .K256 address) + * Modify the register: t1, t2, t3, t4, h + * Output register: + * h: Updated value after adding W[i], K[i], and sigma1(e) + * t1: T1 result (intermediate value for SHA-256 round function) + * Function/Macro Call:None + */ + .macro SHA256_T1 INDEX, e, f, g, h, K + lw t4, 4*\INDEX(\K) + addw \h, \h, t1 # h += W[i] + addw \h, \h, t4 # h += K[i] + + roriw t2, \e, 6 # t2 = e ror 6 + roriw t3, \e, 11 # t3 = e ror 11 + roriw t4, \e, 25 # t4 = e ror 25 + + xor t2, t2, t3 # t2 = t2 ^ t3 + xor t1, \f, \g # t1 = f ^ g + xor t2, t2, t4 # t2 = t2 ^ t4 + and t1, t1, \e # t1 = (f ^ g) & e + addw \h, \h, t2 # h += (e ror 6) ^ (e ror 11) ^ (e ror 25) + xor t1, t1, \g # t1 = (f ^ g) & e ^ g + addw t1, t1, \h # t1 = (f ^ g) & e ^ g + h + .endm + +/* + * Macro description: Calculate SHA-256 T2 value and update t2 register. + * Input register: + * INDEX: Int, current round index + * a, b, c: SHA-256 working registers + * Modify the register: t2, t3, t4 + * Output register: + * t2: T2 result (intermediate value for SHA-256 round function) + * Function/Macro Call: None + */ + .macro SHA256_T2 INDEX, a, b, c + roriw t2, \a, 2 # t2 = a ror 2 + roriw t3, \a, 13 # t3 = a ror 13 + roriw t4, \a, 22 # t4 = a ror 22 + + xor t2, t2, t3 # t2 = t2 ^ t3 + xor t2, t2, t4 # t2 = t2 ^ t4 + xor t4, \b, \c # t4 = b ^ c + and t3, \b, \c # t3 = b & c + and t4, t4, \a # t4 = (b ^ c) & a + xor t4, t4, t3 # t4 = (b ^ c) & a ^ (b & c) + addw t2, t2, t4 # t2 = (b ^ c) & a ^ (b & c) + (a ror 2) ^ (a ror 13) ^ (a ror 22) + .endm + +/* + * Macro description: Perform one SHA-256 round calculation. + * Input register: + * INDEX: Int, current round index + * a, b, c, d, e, f, g, h: SHA-256 working registers + * Modify the register: t1, t2, t3, t4, d, h + * Output register: + * d: Updated value after adding T1 + * h: Updated value after adding T1 and T2 + * Function/Macro Call: SHA256_T1, SHA256_T2 + */ + .macro ROUND INDEX, a, b, c, d, e, f, g, h + SHA256_T1 \INDEX, \e, \f, \g, \h, t0 + SHA256_T2 \INDEX, \a, \b, \c + addw \d, \d, t1 # d += t1 + addw \h, t2, t1 # h = t1 + t2 + .endm + +/* + * Macro description: Perform one SHA-256 round for i = 0 to 15 (message schedule from input block). + * Input register: + * INDEX: Int, current round index + * a, b, c, d, e, f, g, h: SHA-256 working registers + * w_i: Temporary register for W[INDEX] + * Modify the register: t1, t2, t3, t4, t5, t6, w_i, d, h + * Output register: + * d: Updated value after adding T1 + * h: Updated value after adding T1 and T2 + * Function/Macro Call: MSGSCHEDULE_W_16, ROUND + */ + .macro ROUND_16 INDEX, a, b, c, d, e, f, g, h, w_i + MSGSCHEDULE_W_16 \INDEX, \w_i + ROUND \INDEX, \a, \b, \c, \d, \e, \f, \g, \h + .endm + +/* + * Macro description: Perform one SHA-256 round for i = 16 to 63 (message schedule from previous W). + * Input register: + * INDEX: Int, current round index + * a, b, c, d, e, f, g, h: SHA-256 working registers + * Modify the register: t1, t2, t3, t4, t5, t6, d, h + * Output register: + * d: Updated value after adding T1 + * h: Updated value after adding T1 and T2 + * Function/Macro Call: MSGSCHEDULE_W_64, ROUND + */ + .macro ROUND_64 INDEX, a, b, c, d, e, f, g, h + MSGSCHEDULE_W_64 \INDEX + ROUND \INDEX, \a, \b, \c, \d, \e, \f, \g, \h + .endm + +/* + * Function Description:Performs 64 rounds of compression calculation based on the input plaintext data + * and updates the hash value. + * Function prototype:void SHA256CompressMultiBlocks(uint32_t hash[8], const uint8_t *in, uint32_t num); + * Input register: + * a0: Storage address of the hash value + * a1: Pointer to the input data address + * a2: Number of 64 rounds of cycles + * Modify the register: t0-t6, s0-s11, a0-a2, sp, ra + * Output register: None + * Function/Macro Call: ROUND_16, ROUND_64, MSGSCHEDULE_W_16, MSGSCHEDULE_W_64, SHA256_T1, SHA256_T2 + * + */ + .text + .align 2 + .global SHA256CompressMultiBlocks + .type SHA256CompressMultiBlocks, @function +SHA256CompressMultiBlocks: + # 保存现场 + addi sp, sp, -96 + sd s0, 0(sp) + sd s1, 8(sp) + sd s2, 16(sp) + sd s3, 24(sp) + sd s4, 32(sp) + sd s5, 40(sp) + sd s6, 48(sp) + sd s7, 56(sp) + sd s8, 64(sp) + sd s9, 72(sp) + sd s10, 80(sp) + sd s11, 88(sp) + + addi sp, sp, -64 + + la t0, .K256 # Load the address of the K constants + + lw s2, 0(a0) #A load hash[0] + lw s3, 4(a0) #B load hash[1] + lw s4, 8(a0) #C load hash[2] + lw s5, 12(a0) #D load hash[3] + lw s6, 16(a0) #E load hash[4] + lw s7, 20(a0) #F load hash[5] + lw s8, 24(a0) #G load hash[6] + lw s9, 28(a0) #H load hash[7] + +Lloop_compress_64: + + addi a2, a2, -1 + + ROUND_16 0, s2, s3, s4, s5, s6, s7, s8, s9, t1 + ROUND_16 1, s9, s2, s3, s4, s5, s6, s7, s8, t1 + ROUND_16 2, s8, s9, s2, s3, s4, s5, s6, s7, t1 + ROUND_16 3, s7, s8, s9, s2, s3, s4, s5, s6, t1 + + ROUND_16 4, s6, s7, s8, s9, s2, s3, s4, s5, t1 + ROUND_16 5, s5, s6, s7, s8, s9, s2, s3, s4, t1 + ROUND_16 6, s4, s5, s6, s7, s8, s9, s2, s3, t1 + ROUND_16 7, s3, s4, s5, s6, s7, s8, s9, s2, t1 + + ROUND_16 8, s2, s3, s4, s5, s6, s7, s8, s9, t1 + ROUND_16 9, s9, s2, s3, s4, s5, s6, s7, s8, t1 + ROUND_16 10, s8, s9, s2, s3, s4, s5, s6, s7, t1 + ROUND_16 11, s7, s8, s9, s2, s3, s4, s5, s6, t1 + + ROUND_16 12, s6, s7, s8, s9, s2, s3, s4, s5, t1 + ROUND_16 13, s5, s6, s7, s8, s9, s2, s3, s4, t1 + ROUND_16 14, s4, s5, s6, s7, s8, s9, s2, s3, t1 + ROUND_16 15, s3, s4, s5, s6, s7, s8, s9, s2, t1 + + ROUND_64 16, s2, s3, s4, s5, s6, s7, s8, s9 + ROUND_64 17, s9, s2, s3, s4, s5, s6, s7, s8 + ROUND_64 18, s8, s9, s2, s3, s4, s5, s6, s7 + ROUND_64 19, s7, s8, s9, s2, s3, s4, s5, s6 + + ROUND_64 20, s6, s7, s8, s9, s2, s3, s4, s5 + ROUND_64 21, s5, s6, s7, s8, s9, s2, s3, s4 + ROUND_64 22, s4, s5, s6, s7, s8, s9, s2, s3 + ROUND_64 23, s3, s4, s5, s6, s7, s8, s9, s2 + + ROUND_64 24, s2, s3, s4, s5, s6, s7, s8, s9 + ROUND_64 25, s9, s2, s3, s4, s5, s6, s7, s8 + ROUND_64 26, s8, s9, s2, s3, s4, s5, s6, s7 + ROUND_64 27, s7, s8, s9, s2, s3, s4, s5, s6 + + ROUND_64 28, s6, s7, s8, s9, s2, s3, s4, s5 + ROUND_64 29, s5, s6, s7, s8, s9, s2, s3, s4 + ROUND_64 30, s4, s5, s6, s7, s8, s9, s2, s3 + ROUND_64 31, s3, s4, s5, s6, s7, s8, s9, s2 + + ROUND_64 32, s2, s3, s4, s5, s6, s7, s8, s9 + ROUND_64 33, s9, s2, s3, s4, s5, s6, s7, s8 + ROUND_64 34, s8, s9, s2, s3, s4, s5, s6, s7 + ROUND_64 35, s7, s8, s9, s2, s3, s4, s5, s6 + + ROUND_64 36, s6, s7, s8, s9, s2, s3, s4, s5 + ROUND_64 37, s5, s6, s7, s8, s9, s2, s3, s4 + ROUND_64 38, s4, s5, s6, s7, s8, s9, s2, s3 + ROUND_64 39, s3, s4, s5, s6, s7, s8, s9, s2 + + ROUND_64 40, s2, s3, s4, s5, s6, s7, s8, s9 + ROUND_64 41, s9, s2, s3, s4, s5, s6, s7, s8 + ROUND_64 42, s8, s9, s2, s3, s4, s5, s6, s7 + ROUND_64 43, s7, s8, s9, s2, s3, s4, s5, s6 + + ROUND_64 44, s6, s7, s8, s9, s2, s3, s4, s5 + ROUND_64 45, s5, s6, s7, s8, s9, s2, s3, s4 + ROUND_64 46, s4, s5, s6, s7, s8, s9, s2, s3 + ROUND_64 47, s3, s4, s5, s6, s7, s8, s9, s2 + + ROUND_64 48, s2, s3, s4, s5, s6, s7, s8, s9 + ROUND_64 49, s9, s2, s3, s4, s5, s6, s7, s8 + ROUND_64 50, s8, s9, s2, s3, s4, s5, s6, s7 + ROUND_64 51, s7, s8, s9, s2, s3, s4, s5, s6 + + ROUND_64 52, s6, s7, s8, s9, s2, s3, s4, s5 + ROUND_64 53, s5, s6, s7, s8, s9, s2, s3, s4 + ROUND_64 54, s4, s5, s6, s7, s8, s9, s2, s3 + ROUND_64 55, s3, s4, s5, s6, s7, s8, s9, s2 + + ROUND_64 56, s2, s3, s4, s5, s6, s7, s8, s9 + ROUND_64 57, s9, s2, s3, s4, s5, s6, s7, s8 + ROUND_64 58, s8, s9, s2, s3, s4, s5, s6, s7 + ROUND_64 59, s7, s8, s9, s2, s3, s4, s5, s6 + + ROUND_64 60, s6, s7, s8, s9, s2, s3, s4, s5 + ROUND_64 61, s5, s6, s7, s8, s9, s2, s3, s4 + ROUND_64 62, s4, s5, s6, s7, s8, s9, s2, s3 + ROUND_64 63, s3, s4, s5, s6, s7, s8, s9, s2 + + lw t1, 0(a0) # Load hash[0] + lw t2, 4(a0) # Load hash[1] + lw t3, 8(a0) # Load hash[2] + lw t4, 12(a0) # Load hash[3] + + addw s2, s2, t1 # Update hash[0] + addw s3, s3, t2 # Update hash[1] + addw s4, s4, t3 # Update hash[2] + addw s5, s5, t4 # Update hash[3] + + sw s2, 0(a0) # Store updated hash[0] + sw s3, 4(a0) # Store updated hash[1] + sw s4, 8(a0) # Store updated hash[2] + sw s5, 12(a0) # Store updated hash[3] + + lw t1, 16(a0) # Load hash[4] + lw t2, 20(a0) # Load hash[5] + lw t3, 24(a0) # Load hash[6] + lw t4, 28(a0) # Load hash[7] + + addw s6, s6, t1 # Update hash[4] + addw s7, s7, t2 # Update hash[5] + addw s8, s8, t3 # Update hash[6] + addw s9, s9, t4 # Update hash[7] + + sw s6, 16(a0) # Store updated hash[4] + sw s7, 20(a0) # Store updated hash[5] + sw s8, 24(a0) # Store updated hash[6] + sw s9, 28(a0) # Store updated hash[7] + + addi a1, a1, 64 # Move to the next block of input data + + bnez a2, Lloop_compress_64 + + + addi sp, sp, 64 + + ld s0, 0(sp) + ld s1, 8(sp) + ld s2, 16(sp) + ld s3, 24(sp) + ld s4, 32(sp) + ld s5, 40(sp) + ld s6, 48(sp) + ld s7, 56(sp) + ld s8, 64(sp) + ld s9, 72(sp) + ld s10, 80(sp) + ld s11, 88(sp) + + addi sp, sp, 96 + + ret + + .size SHA256CompressMultiBlocks, .-SHA256CompressMultiBlocks + +#endif // HITLS_CRYPTO_SHA256 \ No newline at end of file diff --git a/crypto/sha2/src/asm/sha2_512_riscv.S b/crypto/sha2/src/asm/sha2_512_riscv.S new file mode 100644 index 00000000..336e8b42 --- /dev/null +++ b/crypto/sha2/src/asm/sha2_512_riscv.S @@ -0,0 +1,353 @@ +/* + * This file is part of the openHiTLS project. + * + * openHiTLS is licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ + +#include "hitls_build.h" +#ifdef HITLS_CRYPTO_SHA512 + +.section .rodata +.balign 64 +.K512: + .dword 0x428a2f98d728ae22, 0x7137449123ef65cd + .dword 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc + .dword 0x3956c25bf348b538, 0x59f111f1b605d019 + .dword 0x923f82a4af194f9b, 0xab1c5ed5da6d8118 + .dword 0xd807aa98a3030242, 0x12835b0145706fbe + .dword 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2 + .dword 0x72be5d74f27b896f, 0x80deb1fe3b1696b1 + .dword 0x9bdc06a725c71235, 0xc19bf174cf692694 + .dword 0xe49b69c19ef14ad2, 0xefbe4786384f25e3 + .dword 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65 + .dword 0x2de92c6f592b0275, 0x4a7484aa6ea6e483 + .dword 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5 + .dword 0x983e5152ee66dfab, 0xa831c66d2db43210 + .dword 0xb00327c898fb213f, 0xbf597fc7beef0ee4 + .dword 0xc6e00bf33da88fc2, 0xd5a79147930aa725 + .dword 0x06ca6351e003826f, 0x142929670a0e6e70 + .dword 0x27b70a8546d22ffc, 0x2e1b21385c26c926 + .dword 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df + .dword 0x650a73548baf63de, 0x766a0abb3c77b2a8 + .dword 0x81c2c92e47edaee6, 0x92722c851482353b + .dword 0xa2bfe8a14cf10364, 0xa81a664bbc423001 + .dword 0xc24b8b70d0f89791, 0xc76c51a30654be30 + .dword 0xd192e819d6ef5218, 0xd69906245565a910 + .dword 0xf40e35855771202a, 0x106aa07032bbd1b8 + .dword 0x19a4c116b8d2d0c8, 0x1e376c085141ab53 + .dword 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8 + .dword 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb + .dword 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3 + .dword 0x748f82ee5defb2fc, 0x78a5636f43172f60 + .dword 0x84c87814a1f0ab72, 0x8cc702081a6439ec + .dword 0x90befffa23631e28, 0xa4506cebde82bde9 + .dword 0xbef9a3f7b2c67915, 0xc67178f2e372532b + .dword 0xca273eceea26619c, 0xd186b8c721c0c207 + .dword 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178 + .dword 0x06f067aa72176fba, 0x0a637dc5a2c898a6 + .dword 0x113f9804bef90dae, 0x1b710b35131c471b + .dword 0x28db77f523047d84, 0x32caab7b40c72493 + .dword 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c + .dword 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a + .dword 0x5fcb6fab3ad6faec, 0x6c44198c4a475817 +.size K512,.-K512 + +/* + * Macro description: prepares the message schedule w for i = 0 to 15. + * Input register: + * INDEX: Int + w_i: W[INDEX] + * Modify the register: w_i + * Output register: + * w_i: Latest W[i] value, W[i] = M(i) + * Function/Macro Call:None + */ + .macro MSGSCHEDULE_W_16 INDEX, w_i + lw \w_i, (4*\INDEX)(a1) # Load the message block M(i) into w_i + rev8 \w_i, \w_i # Reverse the byte order of w_i + srli \w_i, \w_i, 32 # Shift right by 32 bits to align the bits + sw \w_i, (4*\INDEX)(sp) # Store the latest W[i] value + .endm + + .macro MSGSCHEDULE_W_64 INDEX + lw t1, (((\INDEX-2)&0x0f)*4)(sp) # Load W[i-2] + lw t2, (((\INDEX-15)&0x0f)*4)(sp) # Load W[i-15] + lw t3, (((\INDEX-7)&0x0f)*4)(sp) # Load W[i-7] + lw t4, ((\INDEX&0x0f)*4)(sp) # Load W[i-16] + + roriw t5, t1, 17 + roriw t6, t1, 19 + + srliw t1, t1, 10 + xor t1, t1, t5 + xor t1, t1, t6 + addw t1, t1, t3 # t1 = sigma1(W[i-2]) + W[i-7] + + roriw t5, t2, 7 + roriw t6, t2, 18 + + srliw t2, t2, 3 + xor t2, t2, t5 + xor t2, t2, t6 + addw t1, t1, t2 # t1 = sigma1(W[i-2]) + W[i-7] + sigma0(W[i-15]) + addw t1, t1, t4 # t1 = sigma1(W[i-2]) + W[i-7] + sigma0(W[i-15]) + W[i-16] + sw t1, (4*(\INDEX&0x0f))(sp) + .endm + + + .macro SHA512_T1 INDEX, e, f, g, h, K + lw t4, 4*\INDEX(\K) + addw \h, \h, t1 # h += W[i] + addw \h, \h, t4 # h += K[i] + + roriw t2, \e, 6 # t2 = e ror 6 + roriw t3, \e, 11 # t3 = e ror 11 + roriw t4, \e, 25 # t4 = e ror 25 + + xor t2, t2, t3 # t2 = t2 ^ t3 + xor t1, \f, \g # t1 = f ^ g + xor t2, t2, t4 # t2 = t2 ^ t4 + and t1, t1, \e # t1 = (f ^ g) & e + addw \h, \h, t2 # h += (e ror 6) ^ (e ror 11) ^ (e ror 25) + xor t1, t1, \g # t1 = (f ^ g) & e ^ g + addw t1, t1, \h # t1 = (f ^ g) & e ^ g + h + .endm + + .macro SHA512_T2 INDEX, a, b, c + roriw t2, \a, 2 # t2 = a ror 2 + roriw t3, \a, 13 # t3 = a ror 13 + roriw t4, \a, 22 # t4 = a ror 22 + + xor t2, t2, t3 # t2 = t2 ^ t3 + xor t2, t2, t4 # t2 = t2 ^ t4 + xor t4, \b, \c # t4 = b ^ c + and t3, \b, \c # t3 = b & c + and t4, t4, \a # t4 = (b ^ c) & a + xor t4, t4, t3 # t4 = (b ^ c) & a ^ (b & c) + addw t2, t2, t4 # t2 = (b ^ c) & a ^ (b & c) + (a ror 2) ^ (a ror 13) ^ (a ror 22) + .endm + +/* + * Macro description: prepares the message schedule w for i = 0 to 15. + * Input register: + * INDEX: Int + w_i: W[INDEX] + * Modify the register: w_i + * Output register: + * w_i: Latest W[i] value, W[i] = M(i) + * Function/Macro Call:None + */ + .macro ROUND INDEX, a, b, c, d, e, f, g, h + SHA512_T1 \INDEX, \e, \f, \g, \h, t0 + SHA512_T2 \INDEX, \a, \b, \c + addw \d, \d, t1 # d += t1 + addw \h, t2, t1 # h = t1 + t2 + .endm + + + .macro ROUND_16 INDEX, a, b, c, d, e, f, g, h, w_i + MSGSCHEDULE_W_16 \INDEX, \w_i + ROUND \INDEX, \a, \b, \c, \d, \e, \f, \g, \h + .endm + + .macro ROUND_64 INDEX, a, b, c, d, e, f, g, h + MSGSCHEDULE_W_64 \INDEX + ROUND \INDEX, \a, \b, \c, \d, \e, \f, \g, \h + .endm + +/* + * Function Description:Performs 64 rounds of compression calculation based on the input plaintext data + * and updates the hash value. + * Function prototype:void SHA512CompressMultiBlocks(uint32_t hash[8], const uint8_t *in, uint32_t num); + * Input register: + * x0: Storage address of the hash value + * x1: Pointer to the input data address + * x2: Number of 64 rounds of cycles + * Modify the register: x0-x17 + * Output register: None + * Function/Macro Call: None + * + */ + .text + .align 2 + .global SHA512CompressMultiBlocks + .type SHA512CompressMultiBlocks, @function +SHA512CompressMultiBlocks: + # 保存现场 + addi sp, sp, -96 + sd s0, 0(sp) + sd s1, 8(sp) + sd s2, 16(sp) + sd s3, 24(sp) + sd s4, 32(sp) + sd s5, 40(sp) + sd s6, 48(sp) + sd s7, 56(sp) + sd s8, 64(sp) + sd s9, 72(sp) + sd s10, 80(sp) + sd s11, 88(sp) + + addi sp, sp, -64 + + la t0, .K512 # Load the address of the K constants + + lw s2, 0(a0) #A load hash[0] + lw s3, 4(a0) #B load hash[1] + lw s4, 8(a0) #C load hash[2] + lw s5, 12(a0) #D load hash[3] + lw s6, 16(a0) #E load hash[4] + lw s7, 20(a0) #F load hash[5] + lw s8, 24(a0) #G load hash[6] + lw s9, 28(a0) #H load hash[7] + +Lloop_compress_64: + + addi a2, a2, -1 + + ROUND_16 0, s2, s3, s4, s5, s6, s7, s8, s9, t1 + ROUND_16 1, s9, s2, s3, s4, s5, s6, s7, s8, t1 + ROUND_16 2, s8, s9, s2, s3, s4, s5, s6, s7, t1 + ROUND_16 3, s7, s8, s9, s2, s3, s4, s5, s6, t1 + + ROUND_16 4, s6, s7, s8, s9, s2, s3, s4, s5, t1 + ROUND_16 5, s5, s6, s7, s8, s9, s2, s3, s4, t1 + ROUND_16 6, s4, s5, s6, s7, s8, s9, s2, s3, t1 + ROUND_16 7, s3, s4, s5, s6, s7, s8, s9, s2, t1 + + ROUND_16 8, s2, s3, s4, s5, s6, s7, s8, s9, t1 + ROUND_16 9, s9, s2, s3, s4, s5, s6, s7, s8, t1 + ROUND_16 10, s8, s9, s2, s3, s4, s5, s6, s7, t1 + ROUND_16 11, s7, s8, s9, s2, s3, s4, s5, s6, t1 + + ROUND_16 12, s6, s7, s8, s9, s2, s3, s4, s5, t1 + ROUND_16 13, s5, s6, s7, s8, s9, s2, s3, s4, t1 + ROUND_16 14, s4, s5, s6, s7, s8, s9, s2, s3, t1 + ROUND_16 15, s3, s4, s5, s6, s7, s8, s9, s2, t1 + + ROUND_64 16, s2, s3, s4, s5, s6, s7, s8, s9 + ROUND_64 17, s9, s2, s3, s4, s5, s6, s7, s8 + ROUND_64 18, s8, s9, s2, s3, s4, s5, s6, s7 + ROUND_64 19, s7, s8, s9, s2, s3, s4, s5, s6 + + ROUND_64 20, s6, s7, s8, s9, s2, s3, s4, s5 + ROUND_64 21, s5, s6, s7, s8, s9, s2, s3, s4 + ROUND_64 22, s4, s5, s6, s7, s8, s9, s2, s3 + ROUND_64 23, s3, s4, s5, s6, s7, s8, s9, s2 + + ROUND_64 24, s2, s3, s4, s5, s6, s7, s8, s9 + ROUND_64 25, s9, s2, s3, s4, s5, s6, s7, s8 + ROUND_64 26, s8, s9, s2, s3, s4, s5, s6, s7 + ROUND_64 27, s7, s8, s9, s2, s3, s4, s5, s6 + + ROUND_64 28, s6, s7, s8, s9, s2, s3, s4, s5 + ROUND_64 29, s5, s6, s7, s8, s9, s2, s3, s4 + ROUND_64 30, s4, s5, s6, s7, s8, s9, s2, s3 + ROUND_64 31, s3, s4, s5, s6, s7, s8, s9, s2 + + ROUND_64 32, s2, s3, s4, s5, s6, s7, s8, s9 + ROUND_64 33, s9, s2, s3, s4, s5, s6, s7, s8 + ROUND_64 34, s8, s9, s2, s3, s4, s5, s6, s7 + ROUND_64 35, s7, s8, s9, s2, s3, s4, s5, s6 + + ROUND_64 36, s6, s7, s8, s9, s2, s3, s4, s5 + ROUND_64 37, s5, s6, s7, s8, s9, s2, s3, s4 + ROUND_64 38, s4, s5, s6, s7, s8, s9, s2, s3 + ROUND_64 39, s3, s4, s5, s6, s7, s8, s9, s2 + + ROUND_64 40, s2, s3, s4, s5, s6, s7, s8, s9 + ROUND_64 41, s9, s2, s3, s4, s5, s6, s7, s8 + ROUND_64 42, s8, s9, s2, s3, s4, s5, s6, s7 + ROUND_64 43, s7, s8, s9, s2, s3, s4, s5, s6 + + ROUND_64 44, s6, s7, s8, s9, s2, s3, s4, s5 + ROUND_64 45, s5, s6, s7, s8, s9, s2, s3, s4 + ROUND_64 46, s4, s5, s6, s7, s8, s9, s2, s3 + ROUND_64 47, s3, s4, s5, s6, s7, s8, s9, s2 + + ROUND_64 48, s2, s3, s4, s5, s6, s7, s8, s9 + ROUND_64 49, s9, s2, s3, s4, s5, s6, s7, s8 + ROUND_64 50, s8, s9, s2, s3, s4, s5, s6, s7 + ROUND_64 51, s7, s8, s9, s2, s3, s4, s5, s6 + + ROUND_64 52, s6, s7, s8, s9, s2, s3, s4, s5 + ROUND_64 53, s5, s6, s7, s8, s9, s2, s3, s4 + ROUND_64 54, s4, s5, s6, s7, s8, s9, s2, s3 + ROUND_64 55, s3, s4, s5, s6, s7, s8, s9, s2 + + ROUND_64 56, s2, s3, s4, s5, s6, s7, s8, s9 + ROUND_64 57, s9, s2, s3, s4, s5, s6, s7, s8 + ROUND_64 58, s8, s9, s2, s3, s4, s5, s6, s7 + ROUND_64 59, s7, s8, s9, s2, s3, s4, s5, s6 + + ROUND_64 60, s6, s7, s8, s9, s2, s3, s4, s5 + ROUND_64 61, s5, s6, s7, s8, s9, s2, s3, s4 + ROUND_64 62, s4, s5, s6, s7, s8, s9, s2, s3 + ROUND_64 63, s3, s4, s5, s6, s7, s8, s9, s2 + + lw t1, 0(a0) # Load hash[0] + lw t2, 4(a0) # Load hash[1] + lw t3, 8(a0) # Load hash[2] + lw t4, 12(a0) # Load hash[3] + + addw s2, s2, t1 # Update hash[0] + addw s3, s3, t2 # Update hash[1] + addw s4, s4, t3 # Update hash[2] + addw s5, s5, t4 # Update hash[3] + + sw s2, 0(a0) # Store updated hash[0] + sw s3, 4(a0) # Store updated hash[1] + sw s4, 8(a0) # Store updated hash[2] + sw s5, 12(a0) # Store updated hash[3] + + lw t1, 16(a0) # Load hash[4] + lw t2, 20(a0) # Load hash[5] + lw t3, 24(a0) # Load hash[6] + lw t4, 28(a0) # Load hash[7] + + addw s6, s6, t1 # Update hash[4] + addw s7, s7, t2 # Update hash[5] + addw s8, s8, t3 # Update hash[6] + addw s9, s9, t4 # Update hash[7] + + sw s6, 16(a0) # Store updated hash[4] + sw s7, 20(a0) # Store updated hash[5] + sw s8, 24(a0) # Store updated hash[6] + sw s9, 28(a0) # Store updated hash[7] + + addi a1, a1, 64 # Move to the next block of input data + + bnez a2, Lloop_compress_64 + + + addi sp, sp, 64 + + ld s0, 0(sp) + ld s1, 8(sp) + ld s2, 16(sp) + ld s3, 24(sp) + ld s4, 32(sp) + ld s5, 40(sp) + ld s6, 48(sp) + ld s7, 56(sp) + ld s8, 64(sp) + ld s9, 72(sp) + ld s10, 80(sp) + ld s11, 88(sp) + + addi sp, sp, 96 + + ret + + .size SHA512CompressMultiBlocks, .-SHA512CompressMultiBlocks + +#endif // HITLS_CRYPTO_SHA512 \ No newline at end of file -- Gitee From bdfaf7e807971a4732e661fa496cd243ba87041d Mon Sep 17 00:00:00 2001 From: Srangrang Date: Wed, 24 Sep 2025 19:22:26 +0800 Subject: [PATCH 2/7] feat: add support of riscv for sha512 - add sha2_512_riscv.S with zbb instruction Cherry-picked from: https://gitcode.com/openHiTLS/openhitls/merge_requests/317 --- crypto/sha2/src/asm/sha2_512_riscv.S | 308 ++++++++++++++------------- 1 file changed, 163 insertions(+), 145 deletions(-) diff --git a/crypto/sha2/src/asm/sha2_512_riscv.S b/crypto/sha2/src/asm/sha2_512_riscv.S index 336e8b42..2e2aeb34 100644 --- a/crypto/sha2/src/asm/sha2_512_riscv.S +++ b/crypto/sha2/src/asm/sha2_512_riscv.S @@ -72,68 +72,67 @@ * Function/Macro Call:None */ .macro MSGSCHEDULE_W_16 INDEX, w_i - lw \w_i, (4*\INDEX)(a1) # Load the message block M(i) into w_i + ld \w_i, (8*\INDEX)(a1) # Load the message block M(i) into w_i rev8 \w_i, \w_i # Reverse the byte order of w_i - srli \w_i, \w_i, 32 # Shift right by 32 bits to align the bits - sw \w_i, (4*\INDEX)(sp) # Store the latest W[i] value + sw \w_i, (8*\INDEX)(sp) # Store the latest W[i] value .endm - .macro MSGSCHEDULE_W_64 INDEX - lw t1, (((\INDEX-2)&0x0f)*4)(sp) # Load W[i-2] - lw t2, (((\INDEX-15)&0x0f)*4)(sp) # Load W[i-15] - lw t3, (((\INDEX-7)&0x0f)*4)(sp) # Load W[i-7] - lw t4, ((\INDEX&0x0f)*4)(sp) # Load W[i-16] + .macro MSGSCHEDULE_W_80 INDEX + ld t1, (((\INDEX-2)&0x0f)*8)(sp) # Load W[i-2] + ld t2, (((\INDEX-15)&0x0f)*8)(sp) # Load W[i-15] + ld t3, (((\INDEX-7)&0x0f)*8)(sp) # Load W[i-7] + ld t4, ((\INDEX&0x0f)*8)(sp) # Load W[i-16] - roriw t5, t1, 17 - roriw t6, t1, 19 + rori t5, t1, 19 + rori t6, t1, 61 - srliw t1, t1, 10 + srli t1, t1, 6 xor t1, t1, t5 xor t1, t1, t6 - addw t1, t1, t3 # t1 = sigma1(W[i-2]) + W[i-7] + add t1, t1, t3 # t1 = sigma1(W[i-2]) + W[i-7] - roriw t5, t2, 7 - roriw t6, t2, 18 + rori t5, t2, 1 + rori t6, t2, 8 - srliw t2, t2, 3 + srli t2, t2, 7 xor t2, t2, t5 xor t2, t2, t6 - addw t1, t1, t2 # t1 = sigma1(W[i-2]) + W[i-7] + sigma0(W[i-15]) - addw t1, t1, t4 # t1 = sigma1(W[i-2]) + W[i-7] + sigma0(W[i-15]) + W[i-16] - sw t1, (4*(\INDEX&0x0f))(sp) + add t1, t1, t2 # t1 = sigma1(W[i-2]) + W[i-7] + sigma0(W[i-15]) + add t1, t1, t4 # t1 = sigma1(W[i-2]) + W[i-7] + sigma0(W[i-15]) + W[i-16] + sd t1, (8*(\INDEX&0x0f))(sp) .endm .macro SHA512_T1 INDEX, e, f, g, h, K - lw t4, 4*\INDEX(\K) - addw \h, \h, t1 # h += W[i] - addw \h, \h, t4 # h += K[i] + ld t4, 8*\INDEX(\K) + add \h, \h, t1 # h += W[i] + add \h, \h, t4 # h += K[i] - roriw t2, \e, 6 # t2 = e ror 6 - roriw t3, \e, 11 # t3 = e ror 11 - roriw t4, \e, 25 # t4 = e ror 25 + rori t2, \e, 14 # t2 = e ror 6 + rori t3, \e, 18 # t3 = e ror 11 + rori t4, \e, 41 # t4 = e ror 25 xor t2, t2, t3 # t2 = t2 ^ t3 xor t1, \f, \g # t1 = f ^ g xor t2, t2, t4 # t2 = t2 ^ t4 and t1, t1, \e # t1 = (f ^ g) & e - addw \h, \h, t2 # h += (e ror 6) ^ (e ror 11) ^ (e ror 25) + add \h, \h, t2 # h += (e ror 6) ^ (e ror 11) ^ (e ror 25) xor t1, t1, \g # t1 = (f ^ g) & e ^ g - addw t1, t1, \h # t1 = (f ^ g) & e ^ g + h + add t1, t1, \h # t1 = (f ^ g) & e ^ g + h .endm .macro SHA512_T2 INDEX, a, b, c - roriw t2, \a, 2 # t2 = a ror 2 - roriw t3, \a, 13 # t3 = a ror 13 - roriw t4, \a, 22 # t4 = a ror 22 + rori t2, \a, 28 # t2 = a ror 2 + rori t3, \a, 34 # t3 = a ror 13 + rori t4, \a, 39 # t4 = a ror 22 xor t2, t2, t3 # t2 = t2 ^ t3 - xor t2, t2, t4 # t2 = t2 ^ t4 - xor t4, \b, \c # t4 = b ^ c + xor t5, \b, \c and t3, \b, \c # t3 = b & c - and t4, t4, \a # t4 = (b ^ c) & a - xor t4, t4, t3 # t4 = (b ^ c) & a ^ (b & c) - addw t2, t2, t4 # t2 = (b ^ c) & a ^ (b & c) + (a ror 2) ^ (a ror 13) ^ (a ror 22) + and t5, t5, \a # t4 = (b ^ c) & a + xor t2, t2, t4 # t4 = (b ^ c) & a ^ (b & c) + xor t3, t3, t5 + add t2, t2, t3 # t2 = (b ^ c) & a ^ (b & c) + (a ror 2) ^ (a ror 13) ^ (a ror 22) .endm /* @@ -149,8 +148,8 @@ .macro ROUND INDEX, a, b, c, d, e, f, g, h SHA512_T1 \INDEX, \e, \f, \g, \h, t0 SHA512_T2 \INDEX, \a, \b, \c - addw \d, \d, t1 # d += t1 - addw \h, t2, t1 # h = t1 + t2 + add \d, \d, t1 # d += t1 + add \h, t2, t1 # h = t1 + t2 .endm @@ -159,8 +158,8 @@ ROUND \INDEX, \a, \b, \c, \d, \e, \f, \g, \h .endm - .macro ROUND_64 INDEX, a, b, c, d, e, f, g, h - MSGSCHEDULE_W_64 \INDEX + .macro ROUND_80 INDEX, a, b, c, d, e, f, g, h + MSGSCHEDULE_W_80 \INDEX ROUND \INDEX, \a, \b, \c, \d, \e, \f, \g, \h .endm @@ -178,11 +177,10 @@ * */ .text - .align 2 + .align 3 .global SHA512CompressMultiBlocks .type SHA512CompressMultiBlocks, @function SHA512CompressMultiBlocks: - # 保存现场 addi sp, sp, -96 sd s0, 0(sp) sd s1, 8(sp) @@ -197,20 +195,20 @@ SHA512CompressMultiBlocks: sd s10, 80(sp) sd s11, 88(sp) - addi sp, sp, -64 + addi sp, sp, -128 la t0, .K512 # Load the address of the K constants lw s2, 0(a0) #A load hash[0] - lw s3, 4(a0) #B load hash[1] - lw s4, 8(a0) #C load hash[2] - lw s5, 12(a0) #D load hash[3] - lw s6, 16(a0) #E load hash[4] - lw s7, 20(a0) #F load hash[5] - lw s8, 24(a0) #G load hash[6] - lw s9, 28(a0) #H load hash[7] + lw s3, 8(a0) #B load hash[1] + lw s4, 16(a0) #C load hash[2] + lw s5, 24(a0) #D load hash[3] + lw s6, 32(a0) #E load hash[4] + lw s7, 40(a0) #F load hash[5] + lw s8, 48(a0) #G load hash[6] + lw s9, 56(a0) #H load hash[7] -Lloop_compress_64: +Lloop_compress_80: addi a2, a2, -1 @@ -234,102 +232,122 @@ Lloop_compress_64: ROUND_16 14, s4, s5, s6, s7, s8, s9, s2, s3, t1 ROUND_16 15, s3, s4, s5, s6, s7, s8, s9, s2, t1 - ROUND_64 16, s2, s3, s4, s5, s6, s7, s8, s9 - ROUND_64 17, s9, s2, s3, s4, s5, s6, s7, s8 - ROUND_64 18, s8, s9, s2, s3, s4, s5, s6, s7 - ROUND_64 19, s7, s8, s9, s2, s3, s4, s5, s6 - - ROUND_64 20, s6, s7, s8, s9, s2, s3, s4, s5 - ROUND_64 21, s5, s6, s7, s8, s9, s2, s3, s4 - ROUND_64 22, s4, s5, s6, s7, s8, s9, s2, s3 - ROUND_64 23, s3, s4, s5, s6, s7, s8, s9, s2 - - ROUND_64 24, s2, s3, s4, s5, s6, s7, s8, s9 - ROUND_64 25, s9, s2, s3, s4, s5, s6, s7, s8 - ROUND_64 26, s8, s9, s2, s3, s4, s5, s6, s7 - ROUND_64 27, s7, s8, s9, s2, s3, s4, s5, s6 - - ROUND_64 28, s6, s7, s8, s9, s2, s3, s4, s5 - ROUND_64 29, s5, s6, s7, s8, s9, s2, s3, s4 - ROUND_64 30, s4, s5, s6, s7, s8, s9, s2, s3 - ROUND_64 31, s3, s4, s5, s6, s7, s8, s9, s2 - - ROUND_64 32, s2, s3, s4, s5, s6, s7, s8, s9 - ROUND_64 33, s9, s2, s3, s4, s5, s6, s7, s8 - ROUND_64 34, s8, s9, s2, s3, s4, s5, s6, s7 - ROUND_64 35, s7, s8, s9, s2, s3, s4, s5, s6 - - ROUND_64 36, s6, s7, s8, s9, s2, s3, s4, s5 - ROUND_64 37, s5, s6, s7, s8, s9, s2, s3, s4 - ROUND_64 38, s4, s5, s6, s7, s8, s9, s2, s3 - ROUND_64 39, s3, s4, s5, s6, s7, s8, s9, s2 - - ROUND_64 40, s2, s3, s4, s5, s6, s7, s8, s9 - ROUND_64 41, s9, s2, s3, s4, s5, s6, s7, s8 - ROUND_64 42, s8, s9, s2, s3, s4, s5, s6, s7 - ROUND_64 43, s7, s8, s9, s2, s3, s4, s5, s6 - - ROUND_64 44, s6, s7, s8, s9, s2, s3, s4, s5 - ROUND_64 45, s5, s6, s7, s8, s9, s2, s3, s4 - ROUND_64 46, s4, s5, s6, s7, s8, s9, s2, s3 - ROUND_64 47, s3, s4, s5, s6, s7, s8, s9, s2 - - ROUND_64 48, s2, s3, s4, s5, s6, s7, s8, s9 - ROUND_64 49, s9, s2, s3, s4, s5, s6, s7, s8 - ROUND_64 50, s8, s9, s2, s3, s4, s5, s6, s7 - ROUND_64 51, s7, s8, s9, s2, s3, s4, s5, s6 - - ROUND_64 52, s6, s7, s8, s9, s2, s3, s4, s5 - ROUND_64 53, s5, s6, s7, s8, s9, s2, s3, s4 - ROUND_64 54, s4, s5, s6, s7, s8, s9, s2, s3 - ROUND_64 55, s3, s4, s5, s6, s7, s8, s9, s2 - - ROUND_64 56, s2, s3, s4, s5, s6, s7, s8, s9 - ROUND_64 57, s9, s2, s3, s4, s5, s6, s7, s8 - ROUND_64 58, s8, s9, s2, s3, s4, s5, s6, s7 - ROUND_64 59, s7, s8, s9, s2, s3, s4, s5, s6 - - ROUND_64 60, s6, s7, s8, s9, s2, s3, s4, s5 - ROUND_64 61, s5, s6, s7, s8, s9, s2, s3, s4 - ROUND_64 62, s4, s5, s6, s7, s8, s9, s2, s3 - ROUND_64 63, s3, s4, s5, s6, s7, s8, s9, s2 - - lw t1, 0(a0) # Load hash[0] - lw t2, 4(a0) # Load hash[1] - lw t3, 8(a0) # Load hash[2] - lw t4, 12(a0) # Load hash[3] - - addw s2, s2, t1 # Update hash[0] - addw s3, s3, t2 # Update hash[1] - addw s4, s4, t3 # Update hash[2] - addw s5, s5, t4 # Update hash[3] - - sw s2, 0(a0) # Store updated hash[0] - sw s3, 4(a0) # Store updated hash[1] - sw s4, 8(a0) # Store updated hash[2] - sw s5, 12(a0) # Store updated hash[3] - - lw t1, 16(a0) # Load hash[4] - lw t2, 20(a0) # Load hash[5] - lw t3, 24(a0) # Load hash[6] - lw t4, 28(a0) # Load hash[7] - - addw s6, s6, t1 # Update hash[4] - addw s7, s7, t2 # Update hash[5] - addw s8, s8, t3 # Update hash[6] - addw s9, s9, t4 # Update hash[7] - - sw s6, 16(a0) # Store updated hash[4] - sw s7, 20(a0) # Store updated hash[5] - sw s8, 24(a0) # Store updated hash[6] - sw s9, 28(a0) # Store updated hash[7] - - addi a1, a1, 64 # Move to the next block of input data - - bnez a2, Lloop_compress_64 - - - addi sp, sp, 64 + ROUND_80 16, s2, s3, s4, s5, s6, s7, s8, s9 + ROUND_80 17, s9, s2, s3, s4, s5, s6, s7, s8 + ROUND_80 18, s8, s9, s2, s3, s4, s5, s6, s7 + ROUND_80 19, s7, s8, s9, s2, s3, s4, s5, s6 + + ROUND_80 20, s6, s7, s8, s9, s2, s3, s4, s5 + ROUND_80 21, s5, s6, s7, s8, s9, s2, s3, s4 + ROUND_80 22, s4, s5, s6, s7, s8, s9, s2, s3 + ROUND_80 23, s3, s4, s5, s6, s7, s8, s9, s2 + + ROUND_80 24, s2, s3, s4, s5, s6, s7, s8, s9 + ROUND_80 25, s9, s2, s3, s4, s5, s6, s7, s8 + ROUND_80 26, s8, s9, s2, s3, s4, s5, s6, s7 + ROUND_80 27, s7, s8, s9, s2, s3, s4, s5, s6 + + ROUND_80 28, s6, s7, s8, s9, s2, s3, s4, s5 + ROUND_80 29, s5, s6, s7, s8, s9, s2, s3, s4 + ROUND_80 30, s4, s5, s6, s7, s8, s9, s2, s3 + ROUND_80 31, s3, s4, s5, s6, s7, s8, s9, s2 + + ROUND_80 32, s2, s3, s4, s5, s6, s7, s8, s9 + ROUND_80 33, s9, s2, s3, s4, s5, s6, s7, s8 + ROUND_80 34, s8, s9, s2, s3, s4, s5, s6, s7 + ROUND_80 35, s7, s8, s9, s2, s3, s4, s5, s6 + + ROUND_80 36, s6, s7, s8, s9, s2, s3, s4, s5 + ROUND_80 37, s5, s6, s7, s8, s9, s2, s3, s4 + ROUND_80 38, s4, s5, s6, s7, s8, s9, s2, s3 + ROUND_80 39, s3, s4, s5, s6, s7, s8, s9, s2 + + ROUND_80 40, s2, s3, s4, s5, s6, s7, s8, s9 + ROUND_80 41, s9, s2, s3, s4, s5, s6, s7, s8 + ROUND_80 42, s8, s9, s2, s3, s4, s5, s6, s7 + ROUND_80 43, s7, s8, s9, s2, s3, s4, s5, s6 + + ROUND_80 44, s6, s7, s8, s9, s2, s3, s4, s5 + ROUND_80 45, s5, s6, s7, s8, s9, s2, s3, s4 + ROUND_80 46, s4, s5, s6, s7, s8, s9, s2, s3 + ROUND_80 47, s3, s4, s5, s6, s7, s8, s9, s2 + + ROUND_80 48, s2, s3, s4, s5, s6, s7, s8, s9 + ROUND_80 49, s9, s2, s3, s4, s5, s6, s7, s8 + ROUND_80 50, s8, s9, s2, s3, s4, s5, s6, s7 + ROUND_80 51, s7, s8, s9, s2, s3, s4, s5, s6 + + ROUND_80 52, s6, s7, s8, s9, s2, s3, s4, s5 + ROUND_80 53, s5, s6, s7, s8, s9, s2, s3, s4 + ROUND_80 54, s4, s5, s6, s7, s8, s9, s2, s3 + ROUND_80 55, s3, s4, s5, s6, s7, s8, s9, s2 + + ROUND_80 56, s2, s3, s4, s5, s6, s7, s8, s9 + ROUND_80 57, s9, s2, s3, s4, s5, s6, s7, s8 + ROUND_80 58, s8, s9, s2, s3, s4, s5, s6, s7 + ROUND_80 59, s7, s8, s9, s2, s3, s4, s5, s6 + + ROUND_80 60, s6, s7, s8, s9, s2, s3, s4, s5 + ROUND_80 61, s5, s6, s7, s8, s9, s2, s3, s4 + ROUND_80 62, s4, s5, s6, s7, s8, s9, s2, s3 + ROUND_80 63, s3, s4, s5, s6, s7, s8, s9, s2 + + ROUND_80 64, s2, s3, s4, s5, s6, s7, s8, s9 + ROUND_80 65, s9, s2, s3, s4, s5, s6, s7, s8 + ROUND_80 66, s8, s9, s2, s3, s4, s5, s6, s7 + ROUND_80 67, s7, s8, s9, s2, s3, s4, s5, s6 + + ROUND_80 68, s6, s7, s8, s9, s2, s3, s4, s5 + ROUND_80 69, s5, s6, s7, s8, s9, s2, s3, s4 + ROUND_80 70, s4, s5, s6, s7, s8, s9, s2, s3 + ROUND_80 71, s3, s4, s5, s6, s7, s8, s9, s2 + + ROUND_80 72, s2, s3, s4, s5, s6, s7, s8, s9 + ROUND_80 73, s9, s2, s3, s4, s5, s6, s7, s8 + ROUND_80 74, s8, s9, s2, s3, s4, s5, s6, s7 + ROUND_80 75, s7, s8, s9, s2, s3, s4, s5, s6 + + ROUND_80 76, s6, s7, s8, s9, s2, s3, s4, s5 + ROUND_80 77, s5, s6, s7, s8, s9, s2, s3, s4 + ROUND_80 78, s4, s5, s6, s7, s8, s9, s2, s3 + ROUND_80 79, s3, s4, s5, s6, s7, s8, s9, s2 + + ld t1, 0(a0) # Load hash[0] + ld t2, 8(a0) # Load hash[1] + ld t3, 16(a0) # Load hash[2] + ld t4, 24(a0) # Load hash[3] + + add s2, s2, t1 # Update hash[0] + add s3, s3, t2 # Update hash[1] + add s4, s4, t3 # Update hash[2] + add s5, s5, t4 # Update hash[3] + + sd s2, 0(a0) # Store updated hash[0] + sd s3, 8(a0) # Store updated hash[1] + sd s4, 16(a0) # Store updated hash[2] + sd s5, 24(a0) # Store updated hash[3] + + ld t1, 32(a0) # Load hash[4] + ld t2, 40(a0) # Load hash[5] + ld t3, 48(a0) # Load hash[6] + ld t4, 56(a0) # Load hash[7] + + add s6, s6, t1 # Update hash[4] + add s7, s7, t2 # Update hash[5] + add s8, s8, t3 # Update hash[6] + add s9, s9, t4 # Update hash[7] + + sd s6, 32(a0) # Store updated hash[4] + sd s7, 40(a0) # Store updated hash[5] + sd s8, 48(a0) # Store updated hash[6] + sd s9, 56(a0) # Store updated hash[7] + + addi a1, a1, 128 # Move to the next block of input data + + bnez a2, Lloop_compress_80 + + + addi sp, sp, 128 ld s0, 0(sp) ld s1, 8(sp) -- Gitee From 3ee25da3b733dfa11f22051d5fce74a7da0795bb Mon Sep 17 00:00:00 2001 From: Srangrang Date: Tue, 3 Jun 2025 23:22:06 +0800 Subject: [PATCH 3/7] fix: resolve sha512 load and store instructions - Modify some load/store instructions in sha2_512_riscv.S - sha2_512_riscv.S is now correct Related to issue #40 Co-authored-by D_AO Cherry-picked from: https://gitcode.com/openHiTLS/openhitls/merge_requests/317 --- crypto/sha2/src/asm/sha2_256_riscv.S | 1 - crypto/sha2/src/asm/sha2_512_riscv.S | 39 ++++++++++++++-------------- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/crypto/sha2/src/asm/sha2_256_riscv.S b/crypto/sha2/src/asm/sha2_256_riscv.S index f1daaa88..fc433227 100644 --- a/crypto/sha2/src/asm/sha2_256_riscv.S +++ b/crypto/sha2/src/asm/sha2_256_riscv.S @@ -35,7 +35,6 @@ .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 -.size K256, .-K256 /* * Macro description: Prepares the message schedule w for i = 0 to 15. diff --git a/crypto/sha2/src/asm/sha2_512_riscv.S b/crypto/sha2/src/asm/sha2_512_riscv.S index 2e2aeb34..bd741a07 100644 --- a/crypto/sha2/src/asm/sha2_512_riscv.S +++ b/crypto/sha2/src/asm/sha2_512_riscv.S @@ -59,7 +59,6 @@ .dword 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c .dword 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a .dword 0x5fcb6fab3ad6faec, 0x6c44198c4a475817 -.size K512,.-K512 /* * Macro description: prepares the message schedule w for i = 0 to 15. @@ -74,7 +73,7 @@ .macro MSGSCHEDULE_W_16 INDEX, w_i ld \w_i, (8*\INDEX)(a1) # Load the message block M(i) into w_i rev8 \w_i, \w_i # Reverse the byte order of w_i - sw \w_i, (8*\INDEX)(sp) # Store the latest W[i] value + sd \w_i, (8*\INDEX)(sp) # Store the latest W[i] value .endm .macro MSGSCHEDULE_W_80 INDEX @@ -122,17 +121,17 @@ .endm .macro SHA512_T2 INDEX, a, b, c - rori t2, \a, 28 # t2 = a ror 2 - rori t3, \a, 34 # t3 = a ror 13 - rori t4, \a, 39 # t4 = a ror 22 + rori t2, \a, 28 # t2 = a ror 34 + rori t3, \a, 34 # t3 = a ror 30 + rori t4, \a, 39 # t4 = a ror 25 - xor t2, t2, t3 # t2 = t2 ^ t3 - xor t5, \b, \c + xor t2, t2, t3 # t2 = a ror 34 ^ a ror 30 + xor t5, \b, \c # t5 = b ^ c and t3, \b, \c # t3 = b & c - and t5, t5, \a # t4 = (b ^ c) & a - xor t2, t2, t4 # t4 = (b ^ c) & a ^ (b & c) - xor t3, t3, t5 - add t2, t2, t3 # t2 = (b ^ c) & a ^ (b & c) + (a ror 2) ^ (a ror 13) ^ (a ror 22) + and t5, t5, \a # t5 = (b ^ c) & a + xor t2, t2, t4 # t2 = a ror 34 ^ a ror 30 ^ a ror 25 + xor t3, t3, t5 # t3 = (b ^ c) & a ^ (b & c) + add t2, t2, t3 # t2 = a ror 34 ^ a ror 30 ^ a ror 25 + (b ^ c) & a ^ (b & c) .endm /* @@ -199,14 +198,14 @@ SHA512CompressMultiBlocks: la t0, .K512 # Load the address of the K constants - lw s2, 0(a0) #A load hash[0] - lw s3, 8(a0) #B load hash[1] - lw s4, 16(a0) #C load hash[2] - lw s5, 24(a0) #D load hash[3] - lw s6, 32(a0) #E load hash[4] - lw s7, 40(a0) #F load hash[5] - lw s8, 48(a0) #G load hash[6] - lw s9, 56(a0) #H load hash[7] + ld s2, 0(a0) #A load hash[0] + ld s3, 8(a0) #B load hash[1] + ld s4, 16(a0) #C load hash[2] + ld s5, 24(a0) #D load hash[3] + ld s6, 32(a0) #E load hash[4] + ld s7, 40(a0) #F load hash[5] + ld s8, 48(a0) #G load hash[6] + ld s9, 56(a0) #H load hash[7] Lloop_compress_80: @@ -314,7 +313,7 @@ Lloop_compress_80: ld t1, 0(a0) # Load hash[0] ld t2, 8(a0) # Load hash[1] - ld t3, 16(a0) # Load hash[2] + ld t3, 16(a0) # Load hash[2] ld t4, 24(a0) # Load hash[3] add s2, s2, t1 # Update hash[0] -- Gitee From f8379eac88b35e2dfdcb3e86a623ea58ae2bcd09 Mon Sep 17 00:00:00 2001 From: Srangrang Date: Tue, 3 Jun 2025 23:44:49 +0800 Subject: [PATCH 4/7] style: update sha512 alg annotation of RISCV - update file sha2_512_riscv.S annotation Related to issue #40 Cherry-picked from: https://gitcode.com/openHiTLS/openhitls/merge_requests/317 --- crypto/sha2/src/asm/sha2_512_riscv.S | 124 ++++++++++++++++++++------- 1 file changed, 93 insertions(+), 31 deletions(-) diff --git a/crypto/sha2/src/asm/sha2_512_riscv.S b/crypto/sha2/src/asm/sha2_512_riscv.S index bd741a07..c6ef9268 100644 --- a/crypto/sha2/src/asm/sha2_512_riscv.S +++ b/crypto/sha2/src/asm/sha2_512_riscv.S @@ -69,6 +69,7 @@ * Output register: * w_i: Latest W[i] value, W[i] = M(i) * Function/Macro Call:None + * */ .macro MSGSCHEDULE_W_16 INDEX, w_i ld \w_i, (8*\INDEX)(a1) # Load the message block M(i) into w_i @@ -76,6 +77,17 @@ sd \w_i, (8*\INDEX)(sp) # Store the latest W[i] value .endm + +/* + * Macro description: Prepares the message schedule w for i = 16 to 79. + * Input register: + * INDEX: Int, current round index + * Modify the register: t1, t2, t3, t4, t5, t6 + * Output register: + * W[INDEX & 0x0f]: Latest W[i] value, W[i] = sigma1(W[i-2]) + W[i-7] + sigma0(W[i-15]) + W[i-16] + * Function/Macro Call: None + * + */ .macro MSGSCHEDULE_W_80 INDEX ld t1, (((\INDEX-2)&0x0f)*8)(sp) # Load W[i-2] ld t2, (((\INDEX-15)&0x0f)*8)(sp) # Load W[i-15] @@ -101,49 +113,74 @@ sd t1, (8*(\INDEX&0x0f))(sp) .endm - +/* + * Macro description: Calculate SHA-512 T1 value and update t1 register. + * Input register: + * INDEX: Int, current round index + * e, f, g, h: SHA-512 registers for T1 calculation + * K: Base address register for the constant table (e.g., t0, storing .K512 address) + * Modify the register: t1, t2, t3, t4, h + * Output register: + * h: Updated value after adding W[i], K[i], and sigma1(e) + * t1: T1 result (intermediate value for SHA-512 round function) + * Function/Macro Call: None + * + */ .macro SHA512_T1 INDEX, e, f, g, h, K ld t4, 8*\INDEX(\K) add \h, \h, t1 # h += W[i] add \h, \h, t4 # h += K[i] - rori t2, \e, 14 # t2 = e ror 6 - rori t3, \e, 18 # t3 = e ror 11 - rori t4, \e, 41 # t4 = e ror 25 + rori t2, \e, 14 # t2 = e ror 14 + rori t3, \e, 18 # t3 = e ror 18 + rori t4, \e, 41 # t4 = e ror 41 - xor t2, t2, t3 # t2 = t2 ^ t3 + xor t2, t2, t3 # t2 = (e ror 14) ^ (e ror 18) xor t1, \f, \g # t1 = f ^ g - xor t2, t2, t4 # t2 = t2 ^ t4 + xor t2, t2, t4 # t2 = (e ror 14) ^ (e ror 18) ^ (e ror 41) and t1, t1, \e # t1 = (f ^ g) & e - add \h, \h, t2 # h += (e ror 6) ^ (e ror 11) ^ (e ror 25) + add \h, \h, t2 # h += (e ror 14) ^ (e ror 18) ^ (e ror 41) xor t1, t1, \g # t1 = (f ^ g) & e ^ g - add t1, t1, \h # t1 = (f ^ g) & e ^ g + h + add t1, t1, \h # t1 = (f ^ g) & e ^ g + h .endm +/* + * Macro description: Calculate SHA-512 T2 value and update t2 register. + * Input register: + * INDEX: Int, current round index + * a, b, c: SHA-512 working registers + * Modify the register: t2, t3, t4, t5 + * Output register: + * t2: T2 result (intermediate value for SHA-512 round function) + * Function/Macro Call: None + * + */ .macro SHA512_T2 INDEX, a, b, c - rori t2, \a, 28 # t2 = a ror 34 - rori t3, \a, 34 # t3 = a ror 30 - rori t4, \a, 39 # t4 = a ror 25 + rori t2, \a, 28 # t2 = a ror 28 + rori t3, \a, 34 # t3 = a ror 34 + rori t4, \a, 39 # t4 = a ror 39 - xor t2, t2, t3 # t2 = a ror 34 ^ a ror 30 + xor t2, t2, t3 # t2 = (a ror 28) ^ (a ror 34) xor t5, \b, \c # t5 = b ^ c and t3, \b, \c # t3 = b & c and t5, t5, \a # t5 = (b ^ c) & a - xor t2, t2, t4 # t2 = a ror 34 ^ a ror 30 ^ a ror 25 - xor t3, t3, t5 # t3 = (b ^ c) & a ^ (b & c) - add t2, t2, t3 # t2 = a ror 34 ^ a ror 30 ^ a ror 25 + (b ^ c) & a ^ (b & c) + xor t2, t2, t4 # t2 = (a ror 28) ^ (a ror 34) ^ (a ror 39) + xor t3, t3, t5 # t3 = (b & c) ^ ((b ^ c) & a) + add t2, t2, t3 # t2 = (a ror 28) ^ (a ror 34) ^ (a ror 39) + ((b & c) ^ ((b ^ c) & a)) .endm /* - * Macro description: prepares the message schedule w for i = 0 to 15. + * Macro description: Perform one SHA-512 round calculation. * Input register: - * INDEX: Int - w_i: W[INDEX] - * Modify the register: w_i + * INDEX: Int, current round index + * a, b, c, d, e, f, g, h: SHA-512 working registers + * Modify the register: t1, t2, t3, t4, d, h * Output register: - * w_i: Latest W[i] value, W[i] = M(i) - * Function/Macro Call:None - */ + * d: Updated value after adding T1 + * h: Updated value after adding T1 and T2 + * Function/Macro Call: SHA512_T1, SHA512_T2 + * + */ .macro ROUND INDEX, a, b, c, d, e, f, g, h SHA512_T1 \INDEX, \e, \f, \g, \h, t0 SHA512_T2 \INDEX, \a, \b, \c @@ -151,28 +188,53 @@ add \h, t2, t1 # h = t1 + t2 .endm - +/* + * Macro description: Perform one SHA-512 round for i = 0 to 15 (message schedule from input block). + * Input register: + * INDEX: Int, current round index + * a, b, c, d, e, f, g, h: SHA-512 working registers + * w_i: Temporary register for W[INDEX] + * Modify the register: t1, t2, t3, t4, w_i, d, h + * Output register: + * d: Updated value after adding T1 + * h: Updated value after adding T1 and T2 + * Function/Macro Call: MSGSCHEDULE_W_16, ROUND + * + */ .macro ROUND_16 INDEX, a, b, c, d, e, f, g, h, w_i MSGSCHEDULE_W_16 \INDEX, \w_i ROUND \INDEX, \a, \b, \c, \d, \e, \f, \g, \h .endm +/* + * Macro description: Perform one SHA-512 round for i = 16 to 79 (message schedule from previous W). + * Input register: + * INDEX: Int, current round index + * a, b, c, d, e, f, g, h: SHA-512 working registers + * Modify the register: t1, t2, t3, t4, t5, t6, d, h + * Output register: + * d: Updated value after adding T1 + * h: Updated value after adding T1 and T2 + * Function/Macro Call: MSGSCHEDULE_W_80, ROUND + * + */ .macro ROUND_80 INDEX, a, b, c, d, e, f, g, h MSGSCHEDULE_W_80 \INDEX ROUND \INDEX, \a, \b, \c, \d, \e, \f, \g, \h .endm /* - * Function Description:Performs 64 rounds of compression calculation based on the input plaintext data + * Function Description: Performs 80 rounds of compression calculation based on the input plaintext data * and updates the hash value. - * Function prototype:void SHA512CompressMultiBlocks(uint32_t hash[8], const uint8_t *in, uint32_t num); + * Function prototype: void SHA512CompressMultiBlocks(uint64_t hash[8], const uint8_t *in, uint64_t num); * Input register: - * x0: Storage address of the hash value - * x1: Pointer to the input data address - * x2: Number of 64 rounds of cycles - * Modify the register: x0-x17 - * Output register: None - * Function/Macro Call: None + * a0: Storage address of the hash value (hash[8]) + * a1: Pointer to the input data address + * a2: Number of 80-round cycles (number of message blocks) + * Modify the register: + * t0-t6, s0-s11, a0-a2, sp, ra + * Output register: None + * Function/Macro Call: ROUND_16, ROUND_80, MSGSCHEDULE_W_16, MSGSCHEDULE_W_80, SHA512_T1, SHA512_T2 * */ .text -- Gitee From c43ad0b6e70e11d69ae081fa202e53a9a8cb9d8c Mon Sep 17 00:00:00 2001 From: Srangrang Date: Sun, 24 Aug 2025 21:05:51 +0800 Subject: [PATCH 5/7] feat: add detection of riscv cpu capabilities - Add v, zbb, zknd, zkne, zknh, zksed, zksh extended instruction detection - Implement sha2 assembly code checking under riscv cpu Cherry-picked from: https://gitcode.com/openHiTLS/openhitls/merge_requests/317 --- .gitignore | 5 ++ .../macro_config/hitls_config_layer_crypto.h | 2 +- crypto/ealinit/src/asmcap_alg_asm.c | 5 ++ crypto/ealinit/src/cpucap.c | 63 ++++++++++++++++++- crypto/include/crypt_riscv.h | 38 +++++++++++ crypto/include/crypt_utils.h | 12 +++- crypto/sha2/src/asm/sha2_256_riscv.S | 1 - 7 files changed, 120 insertions(+), 6 deletions(-) create mode 100644 crypto/include/crypt_riscv.h diff --git a/.gitignore b/.gitignore index 88b82b9f..c177d605 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,8 @@ testcode/framework/tls/lib/ testcode/output/ platform/* .DS_Store +output +test* +.gitignore +riscv.toolchain.cmake + diff --git a/config/macro_config/hitls_config_layer_crypto.h b/config/macro_config/hitls_config_layer_crypto.h index cb04a017..ea949270 100644 --- a/config/macro_config/hitls_config_layer_crypto.h +++ b/config/macro_config/hitls_config_layer_crypto.h @@ -416,7 +416,7 @@ #define HITLS_CRYPTO_SHA1_ASM #endif -#if defined(HITLS_CRYPTO_SHA2_X8664) && !defined(HITLS_CRYPTO_SHA2_ASM) +#if (defined(HITLS_CRYPTO_SHA2_X8664) || defined(HITLS_CRYPTO_SHA2_RISCV)) && !defined(HITLS_CRYPTO_SHA2_ASM) #define HITLS_CRYPTO_SHA2_ASM #endif diff --git a/crypto/ealinit/src/asmcap_alg_asm.c b/crypto/ealinit/src/asmcap_alg_asm.c index e9502cc2..27e19852 100644 --- a/crypto/ealinit/src/asmcap_alg_asm.c +++ b/crypto/ealinit/src/asmcap_alg_asm.c @@ -124,6 +124,11 @@ int32_t CRYPT_SHA2_AsmCheck(void) BSL_ERR_PUSH_ERROR(CRYPT_EAL_ALG_ASM_NOT_SUPPORT); return CRYPT_EAL_ALG_ASM_NOT_SUPPORT; } +#elif defined(HITLS_CRYPTO_SHA2_RISCV) + if(!IsSupportZBB()) { + BSL_ERR_PUSH_ERROR(CRYPT_EAL_ALG_ASM_NOT_SUPPORT); + return CRYPT_EAL_ALG_ASM_NOT_SUPPORT; + } #endif return CRYPT_SUCCESS; } diff --git a/crypto/ealinit/src/cpucap.c b/crypto/ealinit/src/cpucap.c index af587cd2..fbc66848 100644 --- a/crypto/ealinit/src/cpucap.c +++ b/crypto/ealinit/src/cpucap.c @@ -232,7 +232,64 @@ void getarmcap(void) } #endif // HITLS_CRYPTO_NO_AUXVAL -#endif // x86_64 || __arm__ || __arm || __aarch64__ +#elif defined(__riscv) || defined (__riscv64) +#include "crypt_riscv.h" +#include +#include + +uint64_t g_cryptRiscvCpuInfo = 0; + +bool IsSupportZBB(void) +{ + return g_cryptRiscvCpuInfo & CRYPT_RISCV_ZBB; +} + +bool IsSupportAESD(void) +{ + return g_cryptRiscvCpuInfo & CRYPT_RISCV_ZKND; +} + +bool IsSupportAESE(void) +{ + return g_cryptRiscvCpuInfo & CRYPT_RISCV_ZKNE; +} + +bool IsSupportSHA2(void) +{ + return g_cryptRiscvCpuInfo & CRYPT_RISCV_ZKNH; +} + +bool IsSupportSM4(void) +{ + return g_cryptRiscvCpuInfo & CRYPT_RISCV_ZKSED; +} + +bool IsSupportSM3(void) +{ + return g_cryptRiscvCpuInfo & CRYPT_RISCV_ZKSH; +} + +bool IsSupportV(void) +{ + return g_cryptRiscvCpuInfo & CRYPT_RISCV_V; +} + + +void getriscvcap(void) +{ + struct riscv_hwprobe pairs[] = { + {RISCV_HWPROBE_KEY_IMA_EXT_0, 0}, + }; + + int ret = syscall(__NR_riscv_hwprobe, pairs, 1, 0, NULL, 0); + + if(ret == 0) { + g_cryptRiscvCpuInfo = pairs[0].value; + } +} +#endif // x86_64 || __arm__ || __arm || __aarch64__ || __riscv + + void GetCpuInstrSupportState(void) { @@ -269,5 +326,7 @@ void GetCpuInstrSupportState(void) g_cryptArmCpuInfo = getauxval(CRYPT_CE); } #endif // HITLS_CRYPTO_NO_AUXVAL -#endif // defined(__arm__) || defined (__arm) || defined(__aarch64__) +#elif defined(__riscv) || defined (__riscv64) + getriscvcap(); +#endif // defined(__riscv) || defined (__riscv64) } \ No newline at end of file diff --git a/crypto/include/crypt_riscv.h b/crypto/include/crypt_riscv.h new file mode 100644 index 00000000..04233fd2 --- /dev/null +++ b/crypto/include/crypt_riscv.h @@ -0,0 +1,38 @@ +/* + * This file is part of the openHiTLS project. + * + * openHiTLS is licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ + +#ifndef CRYPT_RISCV_H +#define CRYPT_RISCV_H + +#ifndef __NR_riscv_hwprobe +#define __NR_riscv_hwprobe 258 +#endif + +#if defined(__riscv) || defined(__riscv64) +#define CRYPT_CAP __NR_riscv_hwprobe +#define CRYPT_RISCV_V (1 << 2) +#define CRYPT_RISCV_ZBB (1 << 4) +#define CRYPT_RISCV_ZKND (1 << 11) +#define CRYPT_RISCV_ZKNE (1 << 12) +#define CRYPT_RISCV_ZKNH (1 << 13) +#define CRYPT_RISCV_ZKSED (1 << 14) +#define CRYPT_RISCV_ZKSH (1 << 15) +#endif + +#ifndef __ASSEMBLER__ +extern uint64_t g_cryptRiscvCpuInfo; +#endif + +#endif \ No newline at end of file diff --git a/crypto/include/crypt_utils.h b/crypto/include/crypt_utils.h index d7d26a3f..b36f4c71 100644 --- a/crypto/include/crypt_utils.h +++ b/crypto/include/crypt_utils.h @@ -365,8 +365,16 @@ bool IsSupportNEON(void); #if defined(__aarch64__) bool IsSupportSHA512(void); #endif // __aarch64__ - -#endif // __arm__ || __arm || __aarch64__ +#elif defined(__riscv) || defined (__riscv64) + +bool IsSupportZBB(void); +bool IsSupportAESD(void); +bool IsSupportAESE(void); +bool IsSupportSHA2(void); +bool IsSupportSM4(void); +bool IsSupportSM3(void); +bool IsSupportV(void); +#endif // __riscv || __riscv64 #ifdef __cplusplus } diff --git a/crypto/sha2/src/asm/sha2_256_riscv.S b/crypto/sha2/src/asm/sha2_256_riscv.S index fc433227..3446f3cb 100644 --- a/crypto/sha2/src/asm/sha2_256_riscv.S +++ b/crypto/sha2/src/asm/sha2_256_riscv.S @@ -210,7 +210,6 @@ .global SHA256CompressMultiBlocks .type SHA256CompressMultiBlocks, @function SHA256CompressMultiBlocks: - # 保存现场 addi sp, sp, -96 sd s0, 0(sp) sd s1, 8(sp) -- Gitee From a23c03f8fd7a6dc9834bb139c2b5bad441504086 Mon Sep 17 00:00:00 2001 From: Srangrang Date: Thu, 18 Sep 2025 15:32:26 +0800 Subject: [PATCH 6/7] fix:fix sha512 test case failure and modify symbol visibility, etc. - add 0 len judgment for sha512. - remove some self-test content in .gitignore. - modify symbol visibiliti in assembly code. - add definition to distinguish rv32 and rv64. Related to #40 Cherry-picked from: https://gitcode.com/openHiTLS/openhitls/merge_requests/317 --- .gitignore | 4 -- config/json/feature.json | 4 +- .../macro_config/hitls_config_layer_crypto.h | 2 +- crypto/ealinit/src/asmcap_alg_asm.c | 2 +- .../{sha2_256_riscv.S => sha2_256_riscv64.S} | 10 +-- .../{sha2_512_riscv.S => sha2_512_riscv64.S} | 71 ++++++++++--------- 6 files changed, 46 insertions(+), 47 deletions(-) rename crypto/sha2/src/asm/{sha2_256_riscv.S => sha2_256_riscv64.S} (98%) rename crypto/sha2/src/asm/{sha2_512_riscv.S => sha2_512_riscv64.S} (89%) diff --git a/.gitignore b/.gitignore index c177d605..c59a79b3 100644 --- a/.gitignore +++ b/.gitignore @@ -6,8 +6,4 @@ testcode/framework/tls/lib/ testcode/output/ platform/* .DS_Store -output -test* -.gitignore -riscv.toolchain.cmake diff --git a/config/json/feature.json b/config/json/feature.json index 47adda37..3c59c4fd 100644 --- a/config/json/feature.json +++ b/config/json/feature.json @@ -135,7 +135,7 @@ "sm4": null, "ecc": null }, - "riscv": { + "riscv64": { "sha2": null } } @@ -583,7 +583,7 @@ "x8664":["crypto/sha2/src/asm_*.c", "crypto/sha2/src/asm/*_x86_64.S"], "avx512":["crypto/sha2/src/asm_*.c", "crypto/sha2/src/asm/*_x86_64.S"] }, - "riscv": ["crypto/sha2/src/asm_*.c", "crypto/sha2/src/asm/*_riscv.S"] + "riscv64": ["crypto/sha2/src/asm_*.c", "crypto/sha2/src/asm/*_riscv64.S"] }, ".deps": ["platform::Secure_C", "bsl::sal"] }, diff --git a/config/macro_config/hitls_config_layer_crypto.h b/config/macro_config/hitls_config_layer_crypto.h index ea949270..e0e9701d 100644 --- a/config/macro_config/hitls_config_layer_crypto.h +++ b/config/macro_config/hitls_config_layer_crypto.h @@ -416,7 +416,7 @@ #define HITLS_CRYPTO_SHA1_ASM #endif -#if (defined(HITLS_CRYPTO_SHA2_X8664) || defined(HITLS_CRYPTO_SHA2_RISCV)) && !defined(HITLS_CRYPTO_SHA2_ASM) +#if (defined(HITLS_CRYPTO_SHA2_X8664) || defined(HITLS_CRYPTO_SHA2_RISCV64)) && !defined(HITLS_CRYPTO_SHA2_ASM) #define HITLS_CRYPTO_SHA2_ASM #endif diff --git a/crypto/ealinit/src/asmcap_alg_asm.c b/crypto/ealinit/src/asmcap_alg_asm.c index 27e19852..35f5d655 100644 --- a/crypto/ealinit/src/asmcap_alg_asm.c +++ b/crypto/ealinit/src/asmcap_alg_asm.c @@ -124,7 +124,7 @@ int32_t CRYPT_SHA2_AsmCheck(void) BSL_ERR_PUSH_ERROR(CRYPT_EAL_ALG_ASM_NOT_SUPPORT); return CRYPT_EAL_ALG_ASM_NOT_SUPPORT; } -#elif defined(HITLS_CRYPTO_SHA2_RISCV) +#elif defined(HITLS_CRYPTO_SHA2_RISCV64) if(!IsSupportZBB()) { BSL_ERR_PUSH_ERROR(CRYPT_EAL_ALG_ASM_NOT_SUPPORT); return CRYPT_EAL_ALG_ASM_NOT_SUPPORT; diff --git a/crypto/sha2/src/asm/sha2_256_riscv.S b/crypto/sha2/src/asm/sha2_256_riscv64.S similarity index 98% rename from crypto/sha2/src/asm/sha2_256_riscv.S rename to crypto/sha2/src/asm/sha2_256_riscv64.S index 3446f3cb..35034d0c 100644 --- a/crypto/sha2/src/asm/sha2_256_riscv.S +++ b/crypto/sha2/src/asm/sha2_256_riscv64.S @@ -18,7 +18,7 @@ .section .rodata .balign 64 -.K256: +.LK256: .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 @@ -92,7 +92,7 @@ * Input register: * INDEX: Int, current round index * e, f, g, h: SHA-256 registers for T1 calculation - * K: Base address register for the constant table (= t0 = storing .K256 address) + * K: Base address register for the constant table (= t0 = storing .LK256 address) * Modify the register: t1, t2, t3, t4, h * Output register: * h: Updated value after adding W[i], K[i], and sigma1(e) @@ -226,7 +226,7 @@ SHA256CompressMultiBlocks: addi sp, sp, -64 - la t0, .K256 # Load the address of the K constants + la t0, .LK256 # Load the address of the K constants lw s2, 0(a0) #A load hash[0] lw s3, 4(a0) #B load hash[1] @@ -237,7 +237,7 @@ SHA256CompressMultiBlocks: lw s8, 24(a0) #G load hash[6] lw s9, 28(a0) #H load hash[7] -Lloop_compress_64: +.Lloop_compress_64: addi a2, a2, -1 @@ -353,7 +353,7 @@ Lloop_compress_64: addi a1, a1, 64 # Move to the next block of input data - bnez a2, Lloop_compress_64 + bnez a2, .Lloop_compress_64 addi sp, sp, 64 diff --git a/crypto/sha2/src/asm/sha2_512_riscv.S b/crypto/sha2/src/asm/sha2_512_riscv64.S similarity index 89% rename from crypto/sha2/src/asm/sha2_512_riscv.S rename to crypto/sha2/src/asm/sha2_512_riscv64.S index c6ef9268..3d11f24f 100644 --- a/crypto/sha2/src/asm/sha2_512_riscv.S +++ b/crypto/sha2/src/asm/sha2_512_riscv64.S @@ -17,8 +17,8 @@ #ifdef HITLS_CRYPTO_SHA512 .section .rodata -.balign 64 -.K512: +.align 3 +.LK512: .dword 0x428a2f98d728ae22, 0x7137449123ef65cd .dword 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc .dword 0x3956c25bf348b538, 0x59f111f1b605d019 @@ -64,17 +64,17 @@ * Macro description: prepares the message schedule w for i = 0 to 15. * Input register: * INDEX: Int - w_i: W[INDEX] - * Modify the register: w_i + t1: W[INDEX] + * Modify the register: t1 * Output register: - * w_i: Latest W[i] value, W[i] = M(i) + * t1: Latest W[i] value, W[i] = M(i) * Function/Macro Call:None * */ - .macro MSGSCHEDULE_W_16 INDEX, w_i - ld \w_i, (8*\INDEX)(a1) # Load the message block M(i) into w_i - rev8 \w_i, \w_i # Reverse the byte order of w_i - sd \w_i, (8*\INDEX)(sp) # Store the latest W[i] value + .macro MSGSCHEDULE_W_16 INDEX + ld t1, (8*\INDEX)(a1) # Load the message block M(i) into t1 + rev8 t1, t1 # Reverse the byte order of t1 + sd t1, (8*\INDEX)(sp) # Store the latest W[i] value .endm @@ -118,7 +118,7 @@ * Input register: * INDEX: Int, current round index * e, f, g, h: SHA-512 registers for T1 calculation - * K: Base address register for the constant table (e.g., t0, storing .K512 address) + * K: Base address register for the constant table (e.g., t0, storing .LK512 address) * Modify the register: t1, t2, t3, t4, h * Output register: * h: Updated value after adding W[i], K[i], and sigma1(e) @@ -147,7 +147,6 @@ /* * Macro description: Calculate SHA-512 T2 value and update t2 register. * Input register: - * INDEX: Int, current round index * a, b, c: SHA-512 working registers * Modify the register: t2, t3, t4, t5 * Output register: @@ -155,7 +154,7 @@ * Function/Macro Call: None * */ - .macro SHA512_T2 INDEX, a, b, c + .macro SHA512_T2 a, b, c rori t2, \a, 28 # t2 = a ror 28 rori t3, \a, 34 # t3 = a ror 34 rori t4, \a, 39 # t4 = a ror 39 @@ -183,7 +182,7 @@ */ .macro ROUND INDEX, a, b, c, d, e, f, g, h SHA512_T1 \INDEX, \e, \f, \g, \h, t0 - SHA512_T2 \INDEX, \a, \b, \c + SHA512_T2 \a, \b, \c add \d, \d, t1 # d += t1 add \h, t2, t1 # h = t1 + t2 .endm @@ -201,8 +200,8 @@ * Function/Macro Call: MSGSCHEDULE_W_16, ROUND * */ - .macro ROUND_16 INDEX, a, b, c, d, e, f, g, h, w_i - MSGSCHEDULE_W_16 \INDEX, \w_i + .macro ROUND_16 INDEX, a, b, c, d, e, f, g, h + MSGSCHEDULE_W_16 \INDEX ROUND \INDEX, \a, \b, \c, \d, \e, \f, \g, \h .endm @@ -242,6 +241,9 @@ .global SHA512CompressMultiBlocks .type SHA512CompressMultiBlocks, @function SHA512CompressMultiBlocks: + + beqz a2, .Lend_sha512 + addi sp, sp, -96 sd s0, 0(sp) sd s1, 8(sp) @@ -258,7 +260,7 @@ SHA512CompressMultiBlocks: addi sp, sp, -128 - la t0, .K512 # Load the address of the K constants + la t0, .LK512 # Load the address of the K constants ld s2, 0(a0) #A load hash[0] ld s3, 8(a0) #B load hash[1] @@ -269,29 +271,29 @@ SHA512CompressMultiBlocks: ld s8, 48(a0) #G load hash[6] ld s9, 56(a0) #H load hash[7] -Lloop_compress_80: +.Lloop_compress_80: addi a2, a2, -1 - ROUND_16 0, s2, s3, s4, s5, s6, s7, s8, s9, t1 - ROUND_16 1, s9, s2, s3, s4, s5, s6, s7, s8, t1 - ROUND_16 2, s8, s9, s2, s3, s4, s5, s6, s7, t1 - ROUND_16 3, s7, s8, s9, s2, s3, s4, s5, s6, t1 + ROUND_16 0, s2, s3, s4, s5, s6, s7, s8, s9 + ROUND_16 1, s9, s2, s3, s4, s5, s6, s7, s8 + ROUND_16 2, s8, s9, s2, s3, s4, s5, s6, s7 + ROUND_16 3, s7, s8, s9, s2, s3, s4, s5, s6 - ROUND_16 4, s6, s7, s8, s9, s2, s3, s4, s5, t1 - ROUND_16 5, s5, s6, s7, s8, s9, s2, s3, s4, t1 - ROUND_16 6, s4, s5, s6, s7, s8, s9, s2, s3, t1 - ROUND_16 7, s3, s4, s5, s6, s7, s8, s9, s2, t1 + ROUND_16 4, s6, s7, s8, s9, s2, s3, s4, s5 + ROUND_16 5, s5, s6, s7, s8, s9, s2, s3, s4 + ROUND_16 6, s4, s5, s6, s7, s8, s9, s2, s3 + ROUND_16 7, s3, s4, s5, s6, s7, s8, s9, s2 - ROUND_16 8, s2, s3, s4, s5, s6, s7, s8, s9, t1 - ROUND_16 9, s9, s2, s3, s4, s5, s6, s7, s8, t1 - ROUND_16 10, s8, s9, s2, s3, s4, s5, s6, s7, t1 - ROUND_16 11, s7, s8, s9, s2, s3, s4, s5, s6, t1 + ROUND_16 8, s2, s3, s4, s5, s6, s7, s8, s9 + ROUND_16 9, s9, s2, s3, s4, s5, s6, s7, s8 + ROUND_16 10, s8, s9, s2, s3, s4, s5, s6, s7 + ROUND_16 11, s7, s8, s9, s2, s3, s4, s5, s6 - ROUND_16 12, s6, s7, s8, s9, s2, s3, s4, s5, t1 - ROUND_16 13, s5, s6, s7, s8, s9, s2, s3, s4, t1 - ROUND_16 14, s4, s5, s6, s7, s8, s9, s2, s3, t1 - ROUND_16 15, s3, s4, s5, s6, s7, s8, s9, s2, t1 + ROUND_16 12, s6, s7, s8, s9, s2, s3, s4, s5 + ROUND_16 13, s5, s6, s7, s8, s9, s2, s3, s4 + ROUND_16 14, s4, s5, s6, s7, s8, s9, s2, s3 + ROUND_16 15, s3, s4, s5, s6, s7, s8, s9, s2 ROUND_80 16, s2, s3, s4, s5, s6, s7, s8, s9 ROUND_80 17, s9, s2, s3, s4, s5, s6, s7, s8 @@ -405,7 +407,7 @@ Lloop_compress_80: addi a1, a1, 128 # Move to the next block of input data - bnez a2, Lloop_compress_80 + bnez a2, .Lloop_compress_80 addi sp, sp, 128 @@ -425,6 +427,7 @@ Lloop_compress_80: addi sp, sp, 96 +.Lend_sha512: ret .size SHA512CompressMultiBlocks, .-SHA512CompressMultiBlocks -- Gitee From 68da7460fafa01d5c45d9e6c541147bd9adc2089 Mon Sep 17 00:00:00 2001 From: Srangrang Date: Wed, 24 Sep 2025 20:13:38 +0800 Subject: [PATCH 7/7] style: restore .gitignore file - restore .gitignore Cherry-picked from: https://gitcode.com/openHiTLS/openhitls/merge_requests/317 --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index c59a79b3..88b82b9f 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,3 @@ testcode/framework/tls/lib/ testcode/output/ platform/* .DS_Store - -- Gitee