diff --git a/0003-isa-l-update.patch b/0003-isa-l-update.patch new file mode 100644 index 0000000000000000000000000000000000000000..3be66ec0a338ec91771f4b4d52f2323c884433db --- /dev/null +++ b/0003-isa-l-update.patch @@ -0,0 +1,17749 @@ +From 794413ddd24c26852a44ce35e9b87e78c9d4d8b4 Mon Sep 17 00:00:00 2001 +From: Greg Tucker +Date: Fri, 6 Mar 2020 13:45:59 -0700 +Subject: [PATCH 01/42] ec: Remove arch-specific redundant gf_nvect tests + +The gf_{2-6}vect_dot_prod tests were kept in other_tests since the 5,6vect +functions were not strictly called by the higher level ec_encode_data() and +needed independent testing. As this has now changed the extra tests can be +removed as redundant. + +Change-Id: I8a95e31487b150a2a8f929c5586785524d951fde +Signed-off-by: Greg Tucker +--- + erasure_code/Makefile.am | 14 - + erasure_code/gf_2vect_dot_prod_sse_test.c | 480 ------------ + erasure_code/gf_3vect_dot_prod_sse_test.c | 586 -------------- + erasure_code/gf_4vect_dot_prod_sse_test.c | 695 ----------------- + erasure_code/gf_5vect_dot_prod_sse_test.c | 805 ------------------- + erasure_code/gf_6vect_dot_prod_sse_test.c | 911 ---------------------- + 6 files changed, 3491 deletions(-) + delete mode 100644 erasure_code/gf_2vect_dot_prod_sse_test.c + delete mode 100644 erasure_code/gf_3vect_dot_prod_sse_test.c + delete mode 100644 erasure_code/gf_4vect_dot_prod_sse_test.c + delete mode 100644 erasure_code/gf_5vect_dot_prod_sse_test.c + delete mode 100644 erasure_code/gf_6vect_dot_prod_sse_test.c + +diff --git a/src/isa-l/erasure_code/Makefile.am b/src/isa-l/erasure_code/Makefile.am +index 12a3185..f1d0d1d 100644 +--- a/src/isa-l/erasure_code/Makefile.am ++++ b/src/isa-l/erasure_code/Makefile.am +@@ -149,19 +149,5 @@ perf_tests += erasure_code/gf_vect_mul_perf \ + + other_tests += erasure_code/gen_rs_matrix_limits + +-other_tests_x86_64 += \ +- erasure_code/gf_2vect_dot_prod_sse_test \ +- erasure_code/gf_3vect_dot_prod_sse_test \ +- erasure_code/gf_4vect_dot_prod_sse_test \ +- erasure_code/gf_5vect_dot_prod_sse_test \ +- erasure_code/gf_6vect_dot_prod_sse_test +- +-other_tests_x86_32 += \ +- erasure_code/gf_2vect_dot_prod_sse_test \ +- erasure_code/gf_3vect_dot_prod_sse_test \ +- erasure_code/gf_4vect_dot_prod_sse_test \ +- erasure_code/gf_5vect_dot_prod_sse_test \ +- erasure_code/gf_6vect_dot_prod_sse_test +- + other_src += include/test.h \ + include/types.h +diff --git a/src/isa-l/erasure_code/gf_2vect_dot_prod_sse_test.c b/src/isa-l/erasure_code/gf_2vect_dot_prod_sse_test.c +deleted file mode 100644 +index f4fd9d0..0000000 +--- a/src/isa-l/erasure_code/gf_2vect_dot_prod_sse_test.c ++++ /dev/null +@@ -1,480 +0,0 @@ +-/********************************************************************** +- Copyright(c) 2011-2015 Intel Corporation All rights reserved. +- +- Redistribution and use in source and binary forms, with or without +- modification, are permitted provided that the following conditions +- are met: +- * Redistributions of source code must retain the above copyright +- notice, this list of conditions and the following disclaimer. +- * Redistributions in binary form must reproduce the above copyright +- notice, this list of conditions and the following disclaimer in +- the documentation and/or other materials provided with the +- distribution. +- * Neither the name of Intel Corporation nor the names of its +- contributors may be used to endorse or promote products derived +- from this software without specific prior written permission. +- +- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +-**********************************************************************/ +- +-#include +-#include +-#include // for memset, memcmp +-#include "erasure_code.h" +-#include "types.h" +- +-#ifndef FUNCTION_UNDER_TEST +-# define FUNCTION_UNDER_TEST gf_2vect_dot_prod_sse +-#endif +-#ifndef TEST_MIN_SIZE +-# define TEST_MIN_SIZE 16 +-#endif +- +-#define str(s) #s +-#define xstr(s) str(s) +- +-#define TEST_LEN 8192 +-#define TEST_SIZE (TEST_LEN/2) +-#define TEST_MEM TEST_SIZE +-#define TEST_LOOPS 10000 +-#define TEST_TYPE_STR "" +- +-#ifndef TEST_SOURCES +-# define TEST_SOURCES 16 +-#endif +-#ifndef RANDOMS +-# define RANDOMS 20 +-#endif +- +-#ifdef EC_ALIGNED_ADDR +-// Define power of 2 range to check ptr, len alignment +-# define PTR_ALIGN_CHK_B 0 +-# define LEN_ALIGN_CHK_B 0 // 0 for aligned only +-#else +-// Define power of 2 range to check ptr, len alignment +-# define PTR_ALIGN_CHK_B 32 +-# define LEN_ALIGN_CHK_B 32 // 0 for aligned only +-#endif +- +-typedef unsigned char u8; +- +-extern void FUNCTION_UNDER_TEST(int len, int vlen, unsigned char *gftbls, +- unsigned char **src, unsigned char **dest); +- +-void dump(unsigned char *buf, int len) +-{ +- int i; +- for (i = 0; i < len;) { +- printf(" %2x", 0xff & buf[i++]); +- if (i % 32 == 0) +- printf("\n"); +- } +- printf("\n"); +-} +- +-void dump_matrix(unsigned char **s, int k, int m) +-{ +- int i, j; +- for (i = 0; i < k; i++) { +- for (j = 0; j < m; j++) { +- printf(" %2x", s[i][j]); +- } +- printf("\n"); +- } +- printf("\n"); +-} +- +-void dump_u8xu8(unsigned char *s, int k, int m) +-{ +- int i, j; +- for (i = 0; i < k; i++) { +- for (j = 0; j < m; j++) { +- printf(" %2x", 0xff & s[j + (i * m)]); +- } +- printf("\n"); +- } +- printf("\n"); +-} +- +-int main(int argc, char *argv[]) +-{ +- int i, j, rtest, srcs; +- void *buf; +- u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g_tbls[2 * TEST_SOURCES * 32]; +- u8 *dest1, *dest2, *dest_ref1, *dest_ref2, *dest_ptrs[2]; +- u8 *buffs[TEST_SOURCES]; +- +- int align, size; +- unsigned char *efence_buffs[TEST_SOURCES]; +- unsigned int offset; +- u8 *ubuffs[TEST_SOURCES]; +- u8 *udest_ptrs[2]; +- +- printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN); +- +- // Allocate the arrays +- for (i = 0; i < TEST_SOURCES; i++) { +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- buffs[i] = buf; +- } +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest1 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest2 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest_ref1 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest_ref2 = buf; +- +- dest_ptrs[0] = dest1; +- dest_ptrs[1] = dest2; +- +- // Test of all zeros +- for (i = 0; i < TEST_SOURCES; i++) +- memset(buffs[i], 0, TEST_LEN); +- +- memset(dest1, 0, TEST_LEN); +- memset(dest2, 0, TEST_LEN); +- memset(dest_ref1, 0, TEST_LEN); +- memset(dest_ref2, 0, TEST_LEN); +- memset(g1, 2, TEST_SOURCES); +- memset(g2, 1, TEST_SOURCES); +- +- for (i = 0; i < TEST_SOURCES; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]); +- } +- +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs, +- dest_ref2); +- +- FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); +- +- if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { +- printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n"); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, 25); +- printf("dprod_dut:"); +- dump(dest1, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { +- printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n"); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, 25); +- printf("dprod_dut:"); +- dump(dest2, 25); +- return -1; +- } +- +- putchar('.'); +- +- // Rand data test +- +- for (rtest = 0; rtest < RANDOMS; rtest++) { +- for (i = 0; i < TEST_SOURCES; i++) +- for (j = 0; j < TEST_LEN; j++) +- buffs[i][j] = rand(); +- +- for (i = 0; i < TEST_SOURCES; i++) { +- g1[i] = rand(); +- g2[i] = rand(); +- } +- +- for (i = 0; i < TEST_SOURCES; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); +- } +- +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], +- buffs, dest_ref2); +- +- FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); +- +- if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, 25); +- printf("dprod_dut:"); +- dump(dest1, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, 25); +- printf("dprod_dut:"); +- dump(dest2, 25); +- return -1; +- } +- +- putchar('.'); +- } +- +- // Rand data test with varied parameters +- for (rtest = 0; rtest < RANDOMS; rtest++) { +- for (srcs = TEST_SOURCES; srcs > 0; srcs--) { +- for (i = 0; i < srcs; i++) +- for (j = 0; j < TEST_LEN; j++) +- buffs[i][j] = rand(); +- +- for (i = 0; i < srcs; i++) { +- g1[i] = rand(); +- g2[i] = rand(); +- } +- +- for (i = 0; i < srcs; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); +- } +- +- gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1); +- gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs, +- dest_ref2); +- +- FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs); +- +- if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) +- " test1 srcs=%d\n", srcs); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, 25); +- printf("dprod_dut:"); +- dump(dest1, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) +- " test2 srcs=%d\n", srcs); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, 25); +- printf("dprod_dut:"); +- dump(dest2, 25); +- return -1; +- } +- +- putchar('.'); +- } +- } +- +- // Run tests at end of buffer for Electric Fence +- align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; +- for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { +- for (i = 0; i < TEST_SOURCES; i++) +- for (j = 0; j < TEST_LEN; j++) +- buffs[i][j] = rand(); +- +- for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end +- efence_buffs[i] = buffs[i] + TEST_LEN - size; +- +- for (i = 0; i < TEST_SOURCES; i++) { +- g1[i] = rand(); +- g2[i] = rand(); +- } +- +- for (i = 0; i < TEST_SOURCES; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); +- } +- +- gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1); +- gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], +- efence_buffs, dest_ref2); +- +- FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs); +- +- if (0 != memcmp(dest_ref1, dest1, size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); +- dump_matrix(efence_buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, align); +- printf("dprod_dut:"); +- dump(dest1, align); +- return -1; +- } +- +- if (0 != memcmp(dest_ref2, dest2, size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); +- dump_matrix(efence_buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, align); +- printf("dprod_dut:"); +- dump(dest2, align); +- return -1; +- } +- +- putchar('.'); +- } +- +- // Test rand ptr alignment if available +- +- for (rtest = 0; rtest < RANDOMS; rtest++) { +- size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); +- srcs = rand() % TEST_SOURCES; +- if (srcs == 0) +- continue; +- +- offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; +- // Add random offsets +- for (i = 0; i < srcs; i++) +- ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); +- +- udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset)); +- udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset)); +- +- memset(dest1, 0, TEST_LEN); // zero pad to check write-over +- memset(dest2, 0, TEST_LEN); +- +- for (i = 0; i < srcs; i++) +- for (j = 0; j < size; j++) +- ubuffs[i][j] = rand(); +- +- for (i = 0; i < srcs; i++) { +- g1[i] = rand(); +- g2[i] = rand(); +- } +- +- for (i = 0; i < srcs; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); +- } +- +- gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2); +- +- FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs); +- +- if (memcmp(dest_ref1, udest_ptrs[0], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", +- srcs); +- dump_matrix(ubuffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, 25); +- printf("dprod_dut:"); +- dump(udest_ptrs[0], 25); +- return -1; +- } +- if (memcmp(dest_ref2, udest_ptrs[1], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", +- srcs); +- dump_matrix(ubuffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, 25); +- printf("dprod_dut:"); +- dump(udest_ptrs[1], 25); +- return -1; +- } +- // Confirm that padding around dests is unchanged +- memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff +- offset = udest_ptrs[0] - dest1; +- +- if (memcmp(dest1, dest_ref1, offset)) { +- printf("Fail rand ualign pad1 start\n"); +- return -1; +- } +- if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { +- printf("Fail rand ualign pad1 end\n"); +- return -1; +- } +- +- offset = udest_ptrs[1] - dest2; +- if (memcmp(dest2, dest_ref1, offset)) { +- printf("Fail rand ualign pad2 start\n"); +- return -1; +- } +- if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { +- printf("Fail rand ualign pad2 end\n"); +- return -1; +- } +- +- putchar('.'); +- } +- +- // Test all size alignment +- align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; +- +- for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { +- srcs = TEST_SOURCES; +- +- for (i = 0; i < srcs; i++) +- for (j = 0; j < size; j++) +- buffs[i][j] = rand(); +- +- for (i = 0; i < srcs; i++) { +- g1[i] = rand(); +- g2[i] = rand(); +- } +- +- for (i = 0; i < srcs; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); +- } +- +- gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2); +- +- FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs); +- +- if (memcmp(dest_ref1, dest_ptrs[0], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", +- size); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, 25); +- printf("dprod_dut:"); +- dump(dest_ptrs[0], 25); +- return -1; +- } +- if (memcmp(dest_ref2, dest_ptrs[1], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", +- size); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, 25); +- printf("dprod_dut:"); +- dump(dest_ptrs[1], 25); +- return -1; +- } +- } +- +- printf("Pass\n"); +- return 0; +- +-} +diff --git a/src/isa-l/erasure_code/gf_3vect_dot_prod_sse_test.c b/src/isa-l/erasure_code/gf_3vect_dot_prod_sse_test.c +deleted file mode 100644 +index 32609c7..0000000 +--- a/src/isa-l/erasure_code/gf_3vect_dot_prod_sse_test.c ++++ /dev/null +@@ -1,586 +0,0 @@ +-/********************************************************************** +- Copyright(c) 2011-2015 Intel Corporation All rights reserved. +- +- Redistribution and use in source and binary forms, with or without +- modification, are permitted provided that the following conditions +- are met: +- * Redistributions of source code must retain the above copyright +- notice, this list of conditions and the following disclaimer. +- * Redistributions in binary form must reproduce the above copyright +- notice, this list of conditions and the following disclaimer in +- the documentation and/or other materials provided with the +- distribution. +- * Neither the name of Intel Corporation nor the names of its +- contributors may be used to endorse or promote products derived +- from this software without specific prior written permission. +- +- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +-**********************************************************************/ +- +-#include +-#include +-#include // for memset, memcmp +-#include "erasure_code.h" +-#include "types.h" +- +-#ifndef FUNCTION_UNDER_TEST +-# define FUNCTION_UNDER_TEST gf_3vect_dot_prod_sse +-#endif +-#ifndef TEST_MIN_SIZE +-# define TEST_MIN_SIZE 16 +-#endif +- +-#define str(s) #s +-#define xstr(s) str(s) +- +-#define TEST_LEN 8192 +-#define TEST_SIZE (TEST_LEN/2) +-#define TEST_MEM TEST_SIZE +-#define TEST_LOOPS 10000 +-#define TEST_TYPE_STR "" +- +-#ifndef TEST_SOURCES +-# define TEST_SOURCES 16 +-#endif +-#ifndef RANDOMS +-# define RANDOMS 20 +-#endif +- +-#ifdef EC_ALIGNED_ADDR +-// Define power of 2 range to check ptr, len alignment +-# define PTR_ALIGN_CHK_B 0 +-# define LEN_ALIGN_CHK_B 0 // 0 for aligned only +-#else +-// Define power of 2 range to check ptr, len alignment +-# define PTR_ALIGN_CHK_B 32 +-# define LEN_ALIGN_CHK_B 32 // 0 for aligned only +-#endif +- +-typedef unsigned char u8; +- +-extern void FUNCTION_UNDER_TEST(int len, int vlen, unsigned char *gftbls, +- unsigned char **src, unsigned char **dest); +- +-void dump(unsigned char *buf, int len) +-{ +- int i; +- for (i = 0; i < len;) { +- printf(" %2x", 0xff & buf[i++]); +- if (i % 32 == 0) +- printf("\n"); +- } +- printf("\n"); +-} +- +-void dump_matrix(unsigned char **s, int k, int m) +-{ +- int i, j; +- for (i = 0; i < k; i++) { +- for (j = 0; j < m; j++) { +- printf(" %2x", s[i][j]); +- } +- printf("\n"); +- } +- printf("\n"); +-} +- +-void dump_u8xu8(unsigned char *s, int k, int m) +-{ +- int i, j; +- for (i = 0; i < k; i++) { +- for (j = 0; j < m; j++) { +- printf(" %2x", 0xff & s[j + (i * m)]); +- } +- printf("\n"); +- } +- printf("\n"); +-} +- +-int main(int argc, char *argv[]) +-{ +- int i, j, rtest, srcs; +- void *buf; +- u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES]; +- u8 g_tbls[3 * TEST_SOURCES * 32], *dest_ptrs[3], *buffs[TEST_SOURCES]; +- u8 *dest1, *dest2, *dest3, *dest_ref1, *dest_ref2, *dest_ref3; +- +- int align, size; +- unsigned char *efence_buffs[TEST_SOURCES]; +- unsigned int offset; +- u8 *ubuffs[TEST_SOURCES]; +- u8 *udest_ptrs[3]; +- printf(xstr(FUNCTION_UNDER_TEST) "_test: %dx%d ", TEST_SOURCES, TEST_LEN); +- +- // Allocate the arrays +- for (i = 0; i < TEST_SOURCES; i++) { +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- buffs[i] = buf; +- } +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest1 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest2 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest3 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest_ref1 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail");; +- return -1; +- } +- dest_ref2 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest_ref3 = buf; +- +- dest_ptrs[0] = dest1; +- dest_ptrs[1] = dest2; +- dest_ptrs[2] = dest3; +- +- // Test of all zeros +- for (i = 0; i < TEST_SOURCES; i++) +- memset(buffs[i], 0, TEST_LEN); +- +- memset(dest1, 0, TEST_LEN); +- memset(dest2, 0, TEST_LEN); +- memset(dest3, 0, TEST_LEN); +- memset(dest_ref1, 0, TEST_LEN); +- memset(dest_ref2, 0, TEST_LEN); +- memset(dest_ref3, 0, TEST_LEN); +- memset(g1, 2, TEST_SOURCES); +- memset(g2, 1, TEST_SOURCES); +- memset(g3, 7, TEST_SOURCES); +- +- for (i = 0; i < TEST_SOURCES; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]); +- gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]); +- } +- +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs, +- dest_ref2); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs, +- dest_ref3); +- +- FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); +- +- if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { +- printf("Fail zero" xstr(FUNCTION_UNDER_TEST) " test1\n"); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, 25); +- printf("dprod_dut:"); +- dump(dest1, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { +- printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n"); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, 25); +- printf("dprod_dut:"); +- dump(dest2, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { +- printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n"); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref3, 25); +- printf("dprod_dut:"); +- dump(dest3, 25); +- return -1; +- } +- +- putchar('.'); +- +- // Rand data test +- +- for (rtest = 0; rtest < RANDOMS; rtest++) { +- for (i = 0; i < TEST_SOURCES; i++) +- for (j = 0; j < TEST_LEN; j++) +- buffs[i][j] = rand(); +- +- for (i = 0; i < TEST_SOURCES; i++) { +- g1[i] = rand(); +- g2[i] = rand(); +- g3[i] = rand(); +- } +- +- for (i = 0; i < TEST_SOURCES; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); +- gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); +- } +- +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], +- buffs, dest_ref2); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], +- buffs, dest_ref3); +- +- FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); +- +- if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, 25); +- printf("dprod_dut:"); +- dump(dest1, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, 25); +- printf("dprod_dut:"); +- dump(dest2, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref3, 25); +- printf("dprod_dut:"); +- dump(dest3, 25); +- return -1; +- } +- +- putchar('.'); +- } +- +- // Rand data test with varied parameters +- for (rtest = 0; rtest < RANDOMS; rtest++) { +- for (srcs = TEST_SOURCES; srcs > 0; srcs--) { +- for (i = 0; i < srcs; i++) +- for (j = 0; j < TEST_LEN; j++) +- buffs[i][j] = rand(); +- +- for (i = 0; i < srcs; i++) { +- g1[i] = rand(); +- g2[i] = rand(); +- g3[i] = rand(); +- } +- +- for (i = 0; i < srcs; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); +- gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); +- } +- +- gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1); +- gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs, +- dest_ref2); +- gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs, +- dest_ref3); +- +- FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs); +- +- if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) +- " test1 srcs=%d\n", srcs); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, 25); +- printf("dprod_dut:"); +- dump(dest1, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) +- " test2 srcs=%d\n", srcs); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, 25); +- printf("dprod_dut:"); +- dump(dest2, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) +- " test3 srcs=%d\n", srcs); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref3, 25); +- printf("dprod_dut:"); +- dump(dest3, 25); +- return -1; +- } +- +- putchar('.'); +- } +- } +- +- // Run tests at end of buffer for Electric Fence +- align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; +- for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { +- for (i = 0; i < TEST_SOURCES; i++) +- for (j = 0; j < TEST_LEN; j++) +- buffs[i][j] = rand(); +- +- for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end +- efence_buffs[i] = buffs[i] + TEST_LEN - size; +- +- for (i = 0; i < TEST_SOURCES; i++) { +- g1[i] = rand(); +- g2[i] = rand(); +- g3[i] = rand(); +- } +- +- for (i = 0; i < TEST_SOURCES; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); +- gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); +- } +- +- gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1); +- gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], +- efence_buffs, dest_ref2); +- gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], +- efence_buffs, dest_ref3); +- +- FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs); +- +- if (0 != memcmp(dest_ref1, dest1, size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); +- dump_matrix(efence_buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, align); +- printf("dprod_dut:"); +- dump(dest1, align); +- return -1; +- } +- +- if (0 != memcmp(dest_ref2, dest2, size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); +- dump_matrix(efence_buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, align); +- printf("dprod_dut:"); +- dump(dest2, align); +- return -1; +- } +- +- if (0 != memcmp(dest_ref3, dest3, size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); +- dump_matrix(efence_buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref3, align); +- printf("dprod_dut:"); +- dump(dest3, align); +- return -1; +- } +- +- putchar('.'); +- } +- +- // Test rand ptr alignment if available +- +- for (rtest = 0; rtest < RANDOMS; rtest++) { +- size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); +- srcs = rand() % TEST_SOURCES; +- if (srcs == 0) +- continue; +- +- offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; +- // Add random offsets +- for (i = 0; i < srcs; i++) +- ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); +- +- udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset)); +- udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset)); +- udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset)); +- +- memset(dest1, 0, TEST_LEN); // zero pad to check write-over +- memset(dest2, 0, TEST_LEN); +- memset(dest3, 0, TEST_LEN); +- +- for (i = 0; i < srcs; i++) +- for (j = 0; j < size; j++) +- ubuffs[i][j] = rand(); +- +- for (i = 0; i < srcs; i++) { +- g1[i] = rand(); +- g2[i] = rand(); +- g3[i] = rand(); +- } +- +- for (i = 0; i < srcs; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); +- gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); +- } +- +- gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3); +- +- FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs); +- +- if (memcmp(dest_ref1, udest_ptrs[0], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", +- srcs); +- dump_matrix(ubuffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, 25); +- printf("dprod_dut:"); +- dump(udest_ptrs[0], 25); +- return -1; +- } +- if (memcmp(dest_ref2, udest_ptrs[1], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", +- srcs); +- dump_matrix(ubuffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, 25); +- printf("dprod_dut:"); +- dump(udest_ptrs[1], 25); +- return -1; +- } +- if (memcmp(dest_ref3, udest_ptrs[2], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", +- srcs); +- dump_matrix(ubuffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref3, 25); +- printf("dprod_dut:"); +- dump(udest_ptrs[2], 25); +- return -1; +- } +- // Confirm that padding around dests is unchanged +- memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff +- offset = udest_ptrs[0] - dest1; +- +- if (memcmp(dest1, dest_ref1, offset)) { +- printf("Fail rand ualign pad1 start\n"); +- return -1; +- } +- if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { +- printf("Fail rand ualign pad1 end\n"); +- return -1; +- } +- +- offset = udest_ptrs[1] - dest2; +- if (memcmp(dest2, dest_ref1, offset)) { +- printf("Fail rand ualign pad2 start\n"); +- return -1; +- } +- if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { +- printf("Fail rand ualign pad2 end\n"); +- return -1; +- } +- +- offset = udest_ptrs[2] - dest3; +- if (memcmp(dest3, dest_ref1, offset)) { +- printf("Fail rand ualign pad3 start\n"); +- return -1; +- } +- if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { +- printf("Fail rand ualign pad3 end\n");; +- return -1; +- } +- +- putchar('.'); +- } +- +- // Test all size alignment +- align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; +- +- for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { +- srcs = TEST_SOURCES; +- +- for (i = 0; i < srcs; i++) +- for (j = 0; j < size; j++) +- buffs[i][j] = rand(); +- +- for (i = 0; i < srcs; i++) { +- g1[i] = rand(); +- g2[i] = rand(); +- g3[i] = rand(); +- } +- +- for (i = 0; i < srcs; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); +- gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); +- } +- +- gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3); +- +- FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs); +- +- if (memcmp(dest_ref1, dest_ptrs[0], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", +- size); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, 25); +- printf("dprod_dut:"); +- dump(dest_ptrs[0], 25); +- return -1; +- } +- if (memcmp(dest_ref2, dest_ptrs[1], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", +- size); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, 25); +- printf("dprod_dut:"); +- dump(dest_ptrs[1], 25); +- return -1; +- } +- if (memcmp(dest_ref3, dest_ptrs[2], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", +- size); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref3, 25); +- printf("dprod_dut:"); +- dump(dest_ptrs[2], 25); +- return -1; +- } +- } +- +- printf("Pass\n"); +- return 0; +- +-} +diff --git a/src/isa-l/erasure_code/gf_4vect_dot_prod_sse_test.c b/src/isa-l/erasure_code/gf_4vect_dot_prod_sse_test.c +deleted file mode 100644 +index 0352eef..0000000 +--- a/src/isa-l/erasure_code/gf_4vect_dot_prod_sse_test.c ++++ /dev/null +@@ -1,695 +0,0 @@ +-/********************************************************************** +- Copyright(c) 2011-2015 Intel Corporation All rights reserved. +- +- Redistribution and use in source and binary forms, with or without +- modification, are permitted provided that the following conditions +- are met: +- * Redistributions of source code must retain the above copyright +- notice, this list of conditions and the following disclaimer. +- * Redistributions in binary form must reproduce the above copyright +- notice, this list of conditions and the following disclaimer in +- the documentation and/or other materials provided with the +- distribution. +- * Neither the name of Intel Corporation nor the names of its +- contributors may be used to endorse or promote products derived +- from this software without specific prior written permission. +- +- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +-**********************************************************************/ +- +-#include +-#include +-#include // for memset, memcmp +-#include "erasure_code.h" +-#include "types.h" +- +-#ifndef FUNCTION_UNDER_TEST +-# define FUNCTION_UNDER_TEST gf_4vect_dot_prod_sse +-#endif +-#ifndef TEST_MIN_SIZE +-# define TEST_MIN_SIZE 16 +-#endif +- +-#define str(s) #s +-#define xstr(s) str(s) +- +-#define TEST_LEN 8192 +-#define TEST_SIZE (TEST_LEN/2) +-#define TEST_MEM TEST_SIZE +-#define TEST_LOOPS 10000 +-#define TEST_TYPE_STR "" +- +-#ifndef TEST_SOURCES +-# define TEST_SOURCES 16 +-#endif +-#ifndef RANDOMS +-# define RANDOMS 20 +-#endif +- +-#ifdef EC_ALIGNED_ADDR +-// Define power of 2 range to check ptr, len alignment +-# define PTR_ALIGN_CHK_B 0 +-# define LEN_ALIGN_CHK_B 0 // 0 for aligned only +-#else +-// Define power of 2 range to check ptr, len alignment +-# define PTR_ALIGN_CHK_B 32 +-# define LEN_ALIGN_CHK_B 32 // 0 for aligned only +-#endif +- +-typedef unsigned char u8; +- +-extern void FUNCTION_UNDER_TEST(int len, int vlen, unsigned char *gftbls, +- unsigned char **src, unsigned char **dest); +- +-void dump(unsigned char *buf, int len) +-{ +- int i; +- for (i = 0; i < len;) { +- printf(" %2x", 0xff & buf[i++]); +- if (i % 32 == 0) +- printf("\n"); +- } +- printf("\n"); +-} +- +-void dump_matrix(unsigned char **s, int k, int m) +-{ +- int i, j; +- for (i = 0; i < k; i++) { +- for (j = 0; j < m; j++) { +- printf(" %2x", s[i][j]); +- } +- printf("\n"); +- } +- printf("\n"); +-} +- +-void dump_u8xu8(unsigned char *s, int k, int m) +-{ +- int i, j; +- for (i = 0; i < k; i++) { +- for (j = 0; j < m; j++) { +- printf(" %2x", 0xff & s[j + (i * m)]); +- } +- printf("\n"); +- } +- printf("\n"); +-} +- +-int main(int argc, char *argv[]) +-{ +- int i, j, rtest, srcs; +- void *buf; +- u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES]; +- u8 g4[TEST_SOURCES], g_tbls[4 * TEST_SOURCES * 32], *buffs[TEST_SOURCES]; +- u8 *dest1, *dest2, *dest3, *dest4, *dest_ref1, *dest_ref2, *dest_ref3; +- u8 *dest_ref4, *dest_ptrs[4]; +- +- int align, size; +- unsigned char *efence_buffs[TEST_SOURCES]; +- unsigned int offset; +- u8 *ubuffs[TEST_SOURCES]; +- u8 *udest_ptrs[4]; +- printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN); +- +- // Allocate the arrays +- for (i = 0; i < TEST_SOURCES; i++) { +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- buffs[i] = buf; +- } +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest1 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest2 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest3 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest4 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest_ref1 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest_ref2 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest_ref3 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest_ref4 = buf; +- +- dest_ptrs[0] = dest1; +- dest_ptrs[1] = dest2; +- dest_ptrs[2] = dest3; +- dest_ptrs[3] = dest4; +- +- // Test of all zeros +- for (i = 0; i < TEST_SOURCES; i++) +- memset(buffs[i], 0, TEST_LEN); +- +- memset(dest1, 0, TEST_LEN); +- memset(dest2, 0, TEST_LEN); +- memset(dest3, 0, TEST_LEN); +- memset(dest4, 0, TEST_LEN); +- memset(dest_ref1, 0, TEST_LEN); +- memset(dest_ref2, 0, TEST_LEN); +- memset(dest_ref3, 0, TEST_LEN); +- memset(dest_ref4, 0, TEST_LEN); +- memset(g1, 2, TEST_SOURCES); +- memset(g2, 1, TEST_SOURCES); +- memset(g3, 7, TEST_SOURCES); +- memset(g4, 3, TEST_SOURCES); +- +- for (i = 0; i < TEST_SOURCES; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]); +- gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]); +- gf_vect_mul_init(g4[i], &g_tbls[96 * TEST_SOURCES + i * 32]); +- } +- +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs, +- dest_ref2); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs, +- dest_ref3); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs, +- dest_ref4); +- +- FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); +- +- if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { +- printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n"); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, 25); +- printf("dprod_dut:"); +- dump(dest1, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { +- printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n"); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, 25); +- printf("dprod_dut:"); +- dump(dest2, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { +- printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n"); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref3, 25); +- printf("dprod_dut:"); +- dump(dest3, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { +- printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test4\n"); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref4, 25); +- printf("dprod_dut:"); +- dump(dest4, 25); +- return -1; +- } +- +- putchar('.'); +- +- // Rand data test +- +- for (rtest = 0; rtest < RANDOMS; rtest++) { +- for (i = 0; i < TEST_SOURCES; i++) +- for (j = 0; j < TEST_LEN; j++) +- buffs[i][j] = rand(); +- +- for (i = 0; i < TEST_SOURCES; i++) { +- g1[i] = rand(); +- g2[i] = rand(); +- g3[i] = rand(); +- g4[i] = rand(); +- } +- +- for (i = 0; i < TEST_SOURCES; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); +- gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); +- gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]); +- } +- +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], +- buffs, dest_ref2); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], +- buffs, dest_ref3); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], +- buffs, dest_ref4); +- +- FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); +- +- if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, 25); +- printf("dprod_dut:"); +- dump(dest1, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, 25); +- printf("dprod_dut:"); +- dump(dest2, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref3, 25); +- printf("dprod_dut:"); +- dump(dest3, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref4, 25); +- printf("dprod_dut:"); +- dump(dest4, 25); +- return -1; +- } +- +- putchar('.'); +- } +- +- // Rand data test with varied parameters +- for (rtest = 0; rtest < RANDOMS; rtest++) { +- for (srcs = TEST_SOURCES; srcs > 0; srcs--) { +- for (i = 0; i < srcs; i++) +- for (j = 0; j < TEST_LEN; j++) +- buffs[i][j] = rand(); +- +- for (i = 0; i < srcs; i++) { +- g1[i] = rand(); +- g2[i] = rand(); +- g3[i] = rand(); +- g4[i] = rand(); +- } +- +- for (i = 0; i < srcs; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); +- gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); +- gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); +- } +- +- gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1); +- gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs, +- dest_ref2); +- gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs, +- dest_ref3); +- gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[96 * srcs], buffs, +- dest_ref4); +- +- FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs); +- +- if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) +- " test1 srcs=%d\n", srcs); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, 25); +- printf("dprod_dut:"); +- dump(dest1, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) +- " test2 srcs=%d\n", srcs); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, 25); +- printf("dprod_dut:"); +- dump(dest2, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) +- " test3 srcs=%d\n", srcs); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref3, 25); +- printf("dprod_dut:"); +- dump(dest3, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) +- " test4 srcs=%d\n", srcs); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref4, 25); +- printf("dprod_dut:"); +- dump(dest4, 25); +- return -1; +- } +- +- putchar('.'); +- } +- } +- +- // Run tests at end of buffer for Electric Fence +- align = (LEN_ALIGN_CHK_B != 0) ? 1 : 32; +- for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { +- for (i = 0; i < TEST_SOURCES; i++) +- for (j = 0; j < TEST_LEN; j++) +- buffs[i][j] = rand(); +- +- for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end +- efence_buffs[i] = buffs[i] + TEST_LEN - size; +- +- for (i = 0; i < TEST_SOURCES; i++) { +- g1[i] = rand(); +- g2[i] = rand(); +- g3[i] = rand(); +- g4[i] = rand(); +- } +- +- for (i = 0; i < TEST_SOURCES; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); +- gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); +- gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]); +- } +- +- gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1); +- gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], +- efence_buffs, dest_ref2); +- gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], +- efence_buffs, dest_ref3); +- gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], +- efence_buffs, dest_ref4); +- +- FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs); +- +- if (0 != memcmp(dest_ref1, dest1, size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); +- dump_matrix(efence_buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, align); +- printf("dprod_dut:"); +- dump(dest1, align); +- return -1; +- } +- +- if (0 != memcmp(dest_ref2, dest2, size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); +- dump_matrix(efence_buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, align); +- printf("dprod_dut:"); +- dump(dest2, align); +- return -1; +- } +- +- if (0 != memcmp(dest_ref3, dest3, size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); +- dump_matrix(efence_buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref3, align); +- printf("dprod_dut:"); +- dump(dest3, align); +- return -1; +- } +- +- if (0 != memcmp(dest_ref4, dest4, size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest); +- dump_matrix(efence_buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref4, align); +- printf("dprod_dut:"); +- dump(dest4, align); +- return -1; +- } +- +- putchar('.'); +- } +- +- // Test rand ptr alignment if available +- +- for (rtest = 0; rtest < RANDOMS; rtest++) { +- size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); +- srcs = rand() % TEST_SOURCES; +- if (srcs == 0) +- continue; +- +- offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; +- // Add random offsets +- for (i = 0; i < srcs; i++) +- ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); +- +- udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset)); +- udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset)); +- udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset)); +- udest_ptrs[3] = dest4 + (rand() & (PTR_ALIGN_CHK_B - offset)); +- +- memset(dest1, 0, TEST_LEN); // zero pad to check write-over +- memset(dest2, 0, TEST_LEN); +- memset(dest3, 0, TEST_LEN); +- memset(dest4, 0, TEST_LEN); +- +- for (i = 0; i < srcs; i++) +- for (j = 0; j < size; j++) +- ubuffs[i][j] = rand(); +- +- for (i = 0; i < srcs; i++) { +- g1[i] = rand(); +- g2[i] = rand(); +- g3[i] = rand(); +- g4[i] = rand(); +- } +- +- for (i = 0; i < srcs; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); +- gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); +- gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); +- } +- +- gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], ubuffs, dest_ref4); +- +- FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs); +- +- if (memcmp(dest_ref1, udest_ptrs[0], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", +- srcs); +- dump_matrix(ubuffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, 25); +- printf("dprod_dut:"); +- dump(udest_ptrs[0], 25); +- return -1; +- } +- if (memcmp(dest_ref2, udest_ptrs[1], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", +- srcs); +- dump_matrix(ubuffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, 25); +- printf("dprod_dut:"); +- dump(udest_ptrs[1], 25); +- return -1; +- } +- if (memcmp(dest_ref3, udest_ptrs[2], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", +- srcs); +- dump_matrix(ubuffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref3, 25); +- printf("dprod_dut:"); +- dump(udest_ptrs[2], 25); +- return -1; +- } +- if (memcmp(dest_ref4, udest_ptrs[3], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", +- srcs); +- dump_matrix(ubuffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref4, 25); +- printf("dprod_dut:"); +- dump(udest_ptrs[3], 25); +- return -1; +- } +- // Confirm that padding around dests is unchanged +- memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff +- offset = udest_ptrs[0] - dest1; +- +- if (memcmp(dest1, dest_ref1, offset)) { +- printf("Fail rand ualign pad1 start\n"); +- return -1; +- } +- if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { +- printf("Fail rand ualign pad1 end\n"); +- printf("size=%d offset=%d srcs=%d\n", size, offset, srcs); +- return -1; +- } +- +- offset = udest_ptrs[1] - dest2; +- if (memcmp(dest2, dest_ref1, offset)) { +- printf("Fail rand ualign pad2 start\n"); +- return -1; +- } +- if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { +- printf("Fail rand ualign pad2 end\n"); +- return -1; +- } +- +- offset = udest_ptrs[2] - dest3; +- if (memcmp(dest3, dest_ref1, offset)) { +- printf("Fail rand ualign pad3 start\n"); +- return -1; +- } +- if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { +- printf("Fail rand ualign pad3 end\n"); +- return -1; +- } +- +- offset = udest_ptrs[3] - dest4; +- if (memcmp(dest4, dest_ref1, offset)) { +- printf("Fail rand ualign pad4 start\n"); +- return -1; +- } +- if (memcmp(dest4 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { +- printf("Fail rand ualign pad4 end\n"); +- return -1; +- } +- +- putchar('.'); +- } +- +- // Test all size alignment +- align = (LEN_ALIGN_CHK_B != 0) ? 1 : 32; +- +- for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { +- srcs = TEST_SOURCES; +- +- for (i = 0; i < srcs; i++) +- for (j = 0; j < size; j++) +- buffs[i][j] = rand(); +- +- for (i = 0; i < srcs; i++) { +- g1[i] = rand(); +- g2[i] = rand(); +- g3[i] = rand(); +- g4[i] = rand(); +- } +- +- for (i = 0; i < srcs; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); +- gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); +- gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); +- } +- +- gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], buffs, dest_ref4); +- +- FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs); +- +- if (memcmp(dest_ref1, dest_ptrs[0], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", +- size); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, 25); +- printf("dprod_dut:"); +- dump(dest_ptrs[0], 25); +- return -1; +- } +- if (memcmp(dest_ref2, dest_ptrs[1], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", +- size); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, 25); +- printf("dprod_dut:"); +- dump(dest_ptrs[1], 25); +- return -1; +- } +- if (memcmp(dest_ref3, dest_ptrs[2], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", +- size); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref3, 25); +- printf("dprod_dut:"); +- dump(dest_ptrs[2], 25); +- return -1; +- } +- if (memcmp(dest_ref4, dest_ptrs[3], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", +- size); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref4, 25); +- printf("dprod_dut:"); +- dump(dest_ptrs[3], 25); +- return -1; +- } +- } +- +- printf("Pass\n"); +- return 0; +- +-} +diff --git a/src/isa-l/erasure_code/gf_5vect_dot_prod_sse_test.c b/src/isa-l/erasure_code/gf_5vect_dot_prod_sse_test.c +deleted file mode 100644 +index 977054c..0000000 +--- a/src/isa-l/erasure_code/gf_5vect_dot_prod_sse_test.c ++++ /dev/null +@@ -1,805 +0,0 @@ +-/********************************************************************** +- Copyright(c) 2011-2015 Intel Corporation All rights reserved. +- +- Redistribution and use in source and binary forms, with or without +- modification, are permitted provided that the following conditions +- are met: +- * Redistributions of source code must retain the above copyright +- notice, this list of conditions and the following disclaimer. +- * Redistributions in binary form must reproduce the above copyright +- notice, this list of conditions and the following disclaimer in +- the documentation and/or other materials provided with the +- distribution. +- * Neither the name of Intel Corporation nor the names of its +- contributors may be used to endorse or promote products derived +- from this software without specific prior written permission. +- +- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +-**********************************************************************/ +- +-#include +-#include +-#include // for memset, memcmp +-#include "erasure_code.h" +-#include "types.h" +- +-#ifndef FUNCTION_UNDER_TEST +-# define FUNCTION_UNDER_TEST gf_5vect_dot_prod_sse +-#endif +-#ifndef TEST_MIN_SIZE +-# define TEST_MIN_SIZE 16 +-#endif +- +-#define str(s) #s +-#define xstr(s) str(s) +- +-#define TEST_LEN 8192 +-#define TEST_SIZE (TEST_LEN/2) +-#define TEST_MEM TEST_SIZE +-#define TEST_LOOPS 20000 +-#define TEST_TYPE_STR "" +- +-#ifndef TEST_SOURCES +-# define TEST_SOURCES 16 +-#endif +-#ifndef RANDOMS +-# define RANDOMS 20 +-#endif +- +-#ifdef EC_ALIGNED_ADDR +-// Define power of 2 range to check ptr, len alignment +-# define PTR_ALIGN_CHK_B 0 +-# define LEN_ALIGN_CHK_B 0 // 0 for aligned only +-#else +-// Define power of 2 range to check ptr, len alignment +-# define PTR_ALIGN_CHK_B 32 +-# define LEN_ALIGN_CHK_B 32 // 0 for aligned only +-#endif +- +-typedef unsigned char u8; +- +-void dump(unsigned char *buf, int len) +-{ +- int i; +- for (i = 0; i < len;) { +- printf(" %2x", 0xff & buf[i++]); +- if (i % 32 == 0) +- printf("\n"); +- } +- printf("\n"); +-} +- +-void dump_matrix(unsigned char **s, int k, int m) +-{ +- int i, j; +- for (i = 0; i < k; i++) { +- for (j = 0; j < m; j++) { +- printf(" %2x", s[i][j]); +- } +- printf("\n"); +- } +- printf("\n"); +-} +- +-void dump_u8xu8(unsigned char *s, int k, int m) +-{ +- int i, j; +- for (i = 0; i < k; i++) { +- for (j = 0; j < m; j++) { +- printf(" %2x", 0xff & s[j + (i * m)]); +- } +- printf("\n"); +- } +- printf("\n"); +-} +- +-int main(int argc, char *argv[]) +-{ +- int i, j, rtest, srcs; +- void *buf; +- u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES]; +- u8 g4[TEST_SOURCES], g5[TEST_SOURCES], *g_tbls; +- u8 *dest1, *dest2, *dest3, *dest4, *dest5, *buffs[TEST_SOURCES]; +- u8 *dest_ref1, *dest_ref2, *dest_ref3, *dest_ref4, *dest_ref5; +- u8 *dest_ptrs[5]; +- +- int align, size; +- unsigned char *efence_buffs[TEST_SOURCES]; +- unsigned int offset; +- u8 *ubuffs[TEST_SOURCES]; +- u8 *udest_ptrs[5]; +- printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN); +- +- // Allocate the arrays +- for (i = 0; i < TEST_SOURCES; i++) { +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- buffs[i] = buf; +- } +- +- if (posix_memalign(&buf, 16, 2 * (6 * TEST_SOURCES * 32))) { +- printf("alloc error: Fail"); +- return -1; +- } +- g_tbls = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest1 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest2 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest3 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest4 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest5 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest_ref1 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest_ref2 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest_ref3 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest_ref4 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest_ref5 = buf; +- +- dest_ptrs[0] = dest1; +- dest_ptrs[1] = dest2; +- dest_ptrs[2] = dest3; +- dest_ptrs[3] = dest4; +- dest_ptrs[4] = dest5; +- +- // Test of all zeros +- for (i = 0; i < TEST_SOURCES; i++) +- memset(buffs[i], 0, TEST_LEN); +- +- memset(dest1, 0, TEST_LEN); +- memset(dest2, 0, TEST_LEN); +- memset(dest3, 0, TEST_LEN); +- memset(dest4, 0, TEST_LEN); +- memset(dest5, 0, TEST_LEN); +- memset(dest_ref1, 0, TEST_LEN); +- memset(dest_ref2, 0, TEST_LEN); +- memset(dest_ref3, 0, TEST_LEN); +- memset(dest_ref4, 0, TEST_LEN); +- memset(dest_ref5, 0, TEST_LEN); +- memset(g1, 2, TEST_SOURCES); +- memset(g2, 1, TEST_SOURCES); +- memset(g3, 7, TEST_SOURCES); +- memset(g4, 9, TEST_SOURCES); +- memset(g5, 4, TEST_SOURCES); +- +- for (i = 0; i < TEST_SOURCES; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]); +- gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]); +- gf_vect_mul_init(g4[i], &g_tbls[96 * TEST_SOURCES + i * 32]); +- gf_vect_mul_init(g5[i], &g_tbls[128 * TEST_SOURCES + i * 32]); +- } +- +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs, +- dest_ref2); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs, +- dest_ref3); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs, +- dest_ref4); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], buffs, +- dest_ref5); +- +- FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); +- +- if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { +- printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n"); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, 25); +- printf("dprod_dut:"); +- dump(dest1, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { +- printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n"); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, 25); +- printf("dprod_dut:"); +- dump(dest2, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { +- printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n"); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref3, 25); +- printf("dprod_dut:"); +- dump(dest3, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { +- printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test4\n"); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref4, 25); +- printf("dprod_dut:"); +- dump(dest4, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) { +- printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test5\n"); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref5, 25); +- printf("dprod_dut:"); +- dump(dest5, 25); +- return -1; +- } +- putchar('.'); +- +- // Rand data test +- +- for (rtest = 0; rtest < RANDOMS; rtest++) { +- for (i = 0; i < TEST_SOURCES; i++) +- for (j = 0; j < TEST_LEN; j++) +- buffs[i][j] = rand(); +- +- for (i = 0; i < TEST_SOURCES; i++) { +- g1[i] = rand(); +- g2[i] = rand(); +- g3[i] = rand(); +- g4[i] = rand(); +- g5[i] = rand(); +- } +- +- for (i = 0; i < TEST_SOURCES; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); +- gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); +- gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]); +- gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]); +- } +- +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], +- buffs, dest_ref2); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], +- buffs, dest_ref3); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], +- buffs, dest_ref4); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], +- buffs, dest_ref5); +- +- FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); +- +- if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, 25); +- printf("dprod_dut:"); +- dump(dest1, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, 25); +- printf("dprod_dut:"); +- dump(dest2, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref3, 25); +- printf("dprod_dut:"); +- dump(dest3, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref4, 25); +- printf("dprod_dut:"); +- dump(dest4, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref5, 25); +- printf("dprod_dut:"); +- dump(dest5, 25); +- return -1; +- } +- +- putchar('.'); +- } +- +- // Rand data test with varied parameters +- for (rtest = 0; rtest < RANDOMS; rtest++) { +- for (srcs = TEST_SOURCES; srcs > 0; srcs--) { +- for (i = 0; i < srcs; i++) +- for (j = 0; j < TEST_LEN; j++) +- buffs[i][j] = rand(); +- +- for (i = 0; i < srcs; i++) { +- g1[i] = rand(); +- g2[i] = rand(); +- g3[i] = rand(); +- g4[i] = rand(); +- g5[i] = rand(); +- } +- +- for (i = 0; i < srcs; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); +- gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); +- gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); +- gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]); +- } +- +- gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1); +- gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs, +- dest_ref2); +- gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs, +- dest_ref3); +- gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[96 * srcs], buffs, +- dest_ref4); +- gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[128 * srcs], buffs, +- dest_ref5); +- +- FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs); +- +- if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) +- " test1 srcs=%d\n", srcs); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, 25); +- printf("dprod_dut:"); +- dump(dest1, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) +- " test2 srcs=%d\n", srcs); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, 25); +- printf("dprod_dut:"); +- dump(dest2, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) +- " test3 srcs=%d\n", srcs); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref3, 25); +- printf("dprod_dut:"); +- dump(dest3, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) +- " test4 srcs=%d\n", srcs); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref4, 25); +- printf("dprod_dut:"); +- dump(dest4, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) +- " test5 srcs=%d\n", srcs); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref5, 25); +- printf("dprod_dut:"); +- dump(dest5, 25); +- return -1; +- } +- +- putchar('.'); +- } +- } +- +- // Run tests at end of buffer for Electric Fence +- align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; +- for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { +- for (i = 0; i < TEST_SOURCES; i++) +- for (j = 0; j < TEST_LEN; j++) +- buffs[i][j] = rand(); +- +- for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end +- efence_buffs[i] = buffs[i] + TEST_LEN - size; +- +- for (i = 0; i < TEST_SOURCES; i++) { +- g1[i] = rand(); +- g2[i] = rand(); +- g3[i] = rand(); +- g4[i] = rand(); +- g5[i] = rand(); +- } +- +- for (i = 0; i < TEST_SOURCES; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); +- gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); +- gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]); +- gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]); +- } +- +- gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1); +- gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], +- efence_buffs, dest_ref2); +- gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], +- efence_buffs, dest_ref3); +- gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], +- efence_buffs, dest_ref4); +- gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], +- efence_buffs, dest_ref5); +- +- FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs); +- +- if (0 != memcmp(dest_ref1, dest1, size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); +- dump_matrix(efence_buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, align); +- printf("dprod_dut:"); +- dump(dest1, align); +- return -1; +- } +- +- if (0 != memcmp(dest_ref2, dest2, size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); +- dump_matrix(efence_buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, align); +- printf("dprod_dut:"); +- dump(dest2, align); +- return -1; +- } +- +- if (0 != memcmp(dest_ref3, dest3, size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); +- dump_matrix(efence_buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref3, align); +- printf("dprod_dut:"); +- dump(dest3, align); +- return -1; +- } +- +- if (0 != memcmp(dest_ref4, dest4, size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest); +- dump_matrix(efence_buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref4, align); +- printf("dprod_dut:"); +- dump(dest4, align); +- return -1; +- } +- +- if (0 != memcmp(dest_ref5, dest5, size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest); +- dump_matrix(efence_buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref5, align); +- printf("dprod_dut:"); +- dump(dest5, align); +- return -1; +- } +- +- putchar('.'); +- } +- +- // Test rand ptr alignment if available +- +- for (rtest = 0; rtest < RANDOMS; rtest++) { +- size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); +- srcs = rand() % TEST_SOURCES; +- if (srcs == 0) +- continue; +- +- offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; +- // Add random offsets +- for (i = 0; i < srcs; i++) +- ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); +- +- udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset)); +- udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset)); +- udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset)); +- udest_ptrs[3] = dest4 + (rand() & (PTR_ALIGN_CHK_B - offset)); +- udest_ptrs[4] = dest5 + (rand() & (PTR_ALIGN_CHK_B - offset)); +- +- memset(dest1, 0, TEST_LEN); // zero pad to check write-over +- memset(dest2, 0, TEST_LEN); +- memset(dest3, 0, TEST_LEN); +- memset(dest4, 0, TEST_LEN); +- memset(dest5, 0, TEST_LEN); +- +- for (i = 0; i < srcs; i++) +- for (j = 0; j < size; j++) +- ubuffs[i][j] = rand(); +- +- for (i = 0; i < srcs; i++) { +- g1[i] = rand(); +- g2[i] = rand(); +- g3[i] = rand(); +- g4[i] = rand(); +- g5[i] = rand(); +- } +- +- for (i = 0; i < srcs; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); +- gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); +- gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); +- gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]); +- } +- +- gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], ubuffs, dest_ref4); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], ubuffs, dest_ref5); +- +- FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs); +- +- if (memcmp(dest_ref1, udest_ptrs[0], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", +- srcs); +- dump_matrix(ubuffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, 25); +- printf("dprod_dut:"); +- dump(udest_ptrs[0], 25); +- return -1; +- } +- if (memcmp(dest_ref2, udest_ptrs[1], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", +- srcs); +- dump_matrix(ubuffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, 25); +- printf("dprod_dut:"); +- dump(udest_ptrs[1], 25); +- return -1; +- } +- if (memcmp(dest_ref3, udest_ptrs[2], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", +- srcs); +- dump_matrix(ubuffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref3, 25); +- printf("dprod_dut:"); +- dump(udest_ptrs[2], 25); +- return -1; +- } +- if (memcmp(dest_ref4, udest_ptrs[3], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", +- srcs); +- dump_matrix(ubuffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref4, 25); +- printf("dprod_dut:"); +- dump(udest_ptrs[3], 25); +- return -1; +- } +- if (memcmp(dest_ref5, udest_ptrs[4], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", +- srcs); +- dump_matrix(ubuffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref5, 25); +- printf("dprod_dut:"); +- dump(udest_ptrs[4], 25); +- return -1; +- } +- // Confirm that padding around dests is unchanged +- memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff +- offset = udest_ptrs[0] - dest1; +- +- if (memcmp(dest1, dest_ref1, offset)) { +- printf("Fail rand ualign pad1 start\n"); +- return -1; +- } +- if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { +- printf("Fail rand ualign pad1 end\n"); +- return -1; +- } +- +- offset = udest_ptrs[1] - dest2; +- if (memcmp(dest2, dest_ref1, offset)) { +- printf("Fail rand ualign pad2 start\n"); +- return -1; +- } +- if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { +- printf("Fail rand ualign pad2 end\n"); +- return -1; +- } +- +- offset = udest_ptrs[2] - dest3; +- if (memcmp(dest3, dest_ref1, offset)) { +- printf("Fail rand ualign pad3 start\n"); +- return -1; +- } +- if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { +- printf("Fail rand ualign pad3 end\n"); +- return -1; +- } +- +- offset = udest_ptrs[3] - dest4; +- if (memcmp(dest4, dest_ref1, offset)) { +- printf("Fail rand ualign pad4 start\n"); +- return -1; +- } +- if (memcmp(dest4 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { +- printf("Fail rand ualign pad4 end\n"); +- return -1; +- } +- +- offset = udest_ptrs[4] - dest5; +- if (memcmp(dest5, dest_ref1, offset)) { +- printf("Fail rand ualign pad5 start\n"); +- return -1; +- } +- if (memcmp(dest5 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { +- printf("Fail rand ualign pad5 end\n"); +- return -1; +- } +- +- putchar('.'); +- } +- +- // Test all size alignment +- align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; +- +- for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { +- srcs = TEST_SOURCES; +- +- for (i = 0; i < srcs; i++) +- for (j = 0; j < size; j++) +- buffs[i][j] = rand(); +- +- for (i = 0; i < srcs; i++) { +- g1[i] = rand(); +- g2[i] = rand(); +- g3[i] = rand(); +- g4[i] = rand(); +- g5[i] = rand(); +- } +- +- for (i = 0; i < srcs; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); +- gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); +- gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); +- gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]); +- } +- +- gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], buffs, dest_ref4); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], buffs, dest_ref5); +- +- FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs); +- +- if (memcmp(dest_ref1, dest_ptrs[0], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", +- size); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, 25); +- printf("dprod_dut:"); +- dump(dest_ptrs[0], 25); +- +- return -1; +- } +- if (memcmp(dest_ref2, dest_ptrs[1], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", +- size); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, 25); +- printf("dprod_dut:"); +- dump(dest_ptrs[1], 25); +- return -1; +- } +- if (memcmp(dest_ref3, dest_ptrs[2], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", +- size); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref3, 25); +- printf("dprod_dut:"); +- dump(dest_ptrs[2], 25); +- return -1; +- } +- if (memcmp(dest_ref4, dest_ptrs[3], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", +- size); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref4, 25); +- printf("dprod_dut:"); +- dump(dest_ptrs[3], 25); +- return -1; +- } +- if (memcmp(dest_ref5, dest_ptrs[4], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", +- size); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref5, 25); +- printf("dprod_dut:"); +- dump(dest_ptrs[4], 25); +- return -1; +- } +- } +- +- printf("Pass\n"); +- return 0; +- +-} +diff --git a/src/isa-l/erasure_code/gf_6vect_dot_prod_sse_test.c b/src/isa-l/erasure_code/gf_6vect_dot_prod_sse_test.c +deleted file mode 100644 +index 96f67f1..0000000 +--- a/src/isa-l/erasure_code/gf_6vect_dot_prod_sse_test.c ++++ /dev/null +@@ -1,911 +0,0 @@ +-/********************************************************************** +- Copyright(c) 2011-2015 Intel Corporation All rights reserved. +- +- Redistribution and use in source and binary forms, with or without +- modification, are permitted provided that the following conditions +- are met: +- * Redistributions of source code must retain the above copyright +- notice, this list of conditions and the following disclaimer. +- * Redistributions in binary form must reproduce the above copyright +- notice, this list of conditions and the following disclaimer in +- the documentation and/or other materials provided with the +- distribution. +- * Neither the name of Intel Corporation nor the names of its +- contributors may be used to endorse or promote products derived +- from this software without specific prior written permission. +- +- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +-**********************************************************************/ +- +-#include +-#include +-#include // for memset, memcmp +-#include "erasure_code.h" +-#include "types.h" +- +-#ifndef FUNCTION_UNDER_TEST +-# define FUNCTION_UNDER_TEST gf_6vect_dot_prod_sse +-#endif +-#ifndef TEST_MIN_SIZE +-# define TEST_MIN_SIZE 16 +-#endif +- +-#define str(s) #s +-#define xstr(s) str(s) +- +-#define TEST_LEN 8192 +-#define TEST_SIZE (TEST_LEN/2) +-#define TEST_MEM TEST_SIZE +-#define TEST_LOOPS 20000 +-#define TEST_TYPE_STR "" +- +-#ifndef TEST_SOURCES +-# define TEST_SOURCES 16 +-#endif +-#ifndef RANDOMS +-# define RANDOMS 20 +-#endif +- +-#ifdef EC_ALIGNED_ADDR +-// Define power of 2 range to check ptr, len alignment +-# define PTR_ALIGN_CHK_B 0 +-# define LEN_ALIGN_CHK_B 0 // 0 for aligned only +-#else +-// Define power of 2 range to check ptr, len alignment +-# define PTR_ALIGN_CHK_B 32 +-# define LEN_ALIGN_CHK_B 32 // 0 for aligned only +-#endif +- +-typedef unsigned char u8; +- +-void dump(unsigned char *buf, int len) +-{ +- int i; +- for (i = 0; i < len;) { +- printf(" %2x", 0xff & buf[i++]); +- if (i % 32 == 0) +- printf("\n"); +- } +- printf("\n"); +-} +- +-void dump_matrix(unsigned char **s, int k, int m) +-{ +- int i, j; +- for (i = 0; i < k; i++) { +- for (j = 0; j < m; j++) { +- printf(" %2x", s[i][j]); +- } +- printf("\n"); +- } +- printf("\n"); +-} +- +-void dump_u8xu8(unsigned char *s, int k, int m) +-{ +- int i, j; +- for (i = 0; i < k; i++) { +- for (j = 0; j < m; j++) { +- printf(" %2x", 0xff & s[j + (i * m)]); +- } +- printf("\n"); +- } +- printf("\n"); +-} +- +-int main(int argc, char *argv[]) +-{ +- int i, j, rtest, srcs; +- void *buf; +- u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES]; +- u8 g4[TEST_SOURCES], g5[TEST_SOURCES], g6[TEST_SOURCES], *g_tbls; +- u8 *dest1, *dest2, *dest3, *dest4, *dest5, *dest6, *dest_ref1; +- u8 *dest_ref2, *dest_ref3, *dest_ref4, *dest_ref5, *dest_ref6; +- u8 *dest_ptrs[6], *buffs[TEST_SOURCES]; +- +- int align, size; +- unsigned char *efence_buffs[TEST_SOURCES]; +- unsigned int offset; +- u8 *ubuffs[TEST_SOURCES]; +- u8 *udest_ptrs[6]; +- printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN); +- +- // Allocate the arrays +- for (i = 0; i < TEST_SOURCES; i++) { +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- buffs[i] = buf; +- } +- +- if (posix_memalign(&buf, 16, 2 * (6 * TEST_SOURCES * 32))) { +- printf("alloc error: Fail"); +- return -1; +- } +- g_tbls = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest1 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest2 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest3 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest4 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest5 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest6 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest_ref1 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest_ref2 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest_ref3 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest_ref4 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest_ref5 = buf; +- +- if (posix_memalign(&buf, 64, TEST_LEN)) { +- printf("alloc error: Fail"); +- return -1; +- } +- dest_ref6 = buf; +- +- dest_ptrs[0] = dest1; +- dest_ptrs[1] = dest2; +- dest_ptrs[2] = dest3; +- dest_ptrs[3] = dest4; +- dest_ptrs[4] = dest5; +- dest_ptrs[5] = dest6; +- +- // Test of all zeros +- for (i = 0; i < TEST_SOURCES; i++) +- memset(buffs[i], 0, TEST_LEN); +- +- memset(dest1, 0, TEST_LEN); +- memset(dest2, 0, TEST_LEN); +- memset(dest3, 0, TEST_LEN); +- memset(dest4, 0, TEST_LEN); +- memset(dest5, 0, TEST_LEN); +- memset(dest6, 0, TEST_LEN); +- memset(dest_ref1, 0, TEST_LEN); +- memset(dest_ref2, 0, TEST_LEN); +- memset(dest_ref3, 0, TEST_LEN); +- memset(dest_ref4, 0, TEST_LEN); +- memset(dest_ref5, 0, TEST_LEN); +- memset(dest_ref6, 0, TEST_LEN); +- memset(g1, 2, TEST_SOURCES); +- memset(g2, 1, TEST_SOURCES); +- memset(g3, 7, TEST_SOURCES); +- memset(g4, 9, TEST_SOURCES); +- memset(g5, 4, TEST_SOURCES); +- memset(g6, 0xe6, TEST_SOURCES); +- +- for (i = 0; i < TEST_SOURCES; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]); +- gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]); +- gf_vect_mul_init(g4[i], &g_tbls[96 * TEST_SOURCES + i * 32]); +- gf_vect_mul_init(g5[i], &g_tbls[128 * TEST_SOURCES + i * 32]); +- gf_vect_mul_init(g6[i], &g_tbls[160 * TEST_SOURCES + i * 32]); +- } +- +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs, +- dest_ref2); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs, +- dest_ref3); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs, +- dest_ref4); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], buffs, +- dest_ref5); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES], buffs, +- dest_ref6); +- +- FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); +- +- if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { +- printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n"); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, 25); +- printf("dprod_dut:"); +- dump(dest1, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { +- printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n"); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, 25); +- printf("dprod_dut:"); +- dump(dest2, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { +- printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n"); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref3, 25); +- printf("dprod_dut:"); +- dump(dest3, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { +- printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test4\n"); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref4, 25); +- printf("dprod_dut:"); +- dump(dest4, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) { +- printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test5\n"); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref5, 25); +- printf("dprod_dut:"); +- dump(dest5, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref6, dest6, TEST_LEN)) { +- printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test6\n"); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref6, 25); +- printf("dprod_dut:"); +- dump(dest6, 25); +- return -1; +- } +- putchar('.'); +- +- // Rand data test +- +- for (rtest = 0; rtest < RANDOMS; rtest++) { +- for (i = 0; i < TEST_SOURCES; i++) +- for (j = 0; j < TEST_LEN; j++) +- buffs[i][j] = rand(); +- +- for (i = 0; i < TEST_SOURCES; i++) { +- g1[i] = rand(); +- g2[i] = rand(); +- g3[i] = rand(); +- g4[i] = rand(); +- g5[i] = rand(); +- g6[i] = rand(); +- } +- +- for (i = 0; i < TEST_SOURCES; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); +- gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); +- gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]); +- gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]); +- gf_vect_mul_init(g6[i], &g_tbls[(160 * TEST_SOURCES) + (i * 32)]); +- } +- +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], +- buffs, dest_ref2); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], +- buffs, dest_ref3); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], +- buffs, dest_ref4); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], +- buffs, dest_ref5); +- gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES], +- buffs, dest_ref6); +- +- FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); +- +- if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, 25); +- printf("dprod_dut:"); +- dump(dest1, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, 25); +- printf("dprod_dut:"); +- dump(dest2, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref3, 25); +- printf("dprod_dut:"); +- dump(dest3, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref4, 25); +- printf("dprod_dut:"); +- dump(dest4, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref5, 25); +- printf("dprod_dut:"); +- dump(dest5, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref6, dest6, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test6 %d\n", rtest); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref6, 25); +- printf("dprod_dut:"); +- dump(dest6, 25); +- return -1; +- } +- +- putchar('.'); +- } +- +- // Rand data test with varied parameters +- for (rtest = 0; rtest < RANDOMS; rtest++) { +- for (srcs = TEST_SOURCES; srcs > 0; srcs--) { +- for (i = 0; i < srcs; i++) +- for (j = 0; j < TEST_LEN; j++) +- buffs[i][j] = rand(); +- +- for (i = 0; i < srcs; i++) { +- g1[i] = rand(); +- g2[i] = rand(); +- g3[i] = rand(); +- g4[i] = rand(); +- g5[i] = rand(); +- g6[i] = rand(); +- } +- +- for (i = 0; i < srcs; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); +- gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); +- gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); +- gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]); +- gf_vect_mul_init(g6[i], &g_tbls[(160 * srcs) + (i * 32)]); +- } +- +- gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1); +- gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs, +- dest_ref2); +- gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs, +- dest_ref3); +- gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[96 * srcs], buffs, +- dest_ref4); +- gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[128 * srcs], buffs, +- dest_ref5); +- gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[160 * srcs], buffs, +- dest_ref6); +- +- FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs); +- +- if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) +- " test1 srcs=%d\n", srcs); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, 25); +- printf("dprod_dut:"); +- dump(dest1, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) +- " test2 srcs=%d\n", srcs); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, 25); +- printf("dprod_dut:"); +- dump(dest2, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) +- " test3 srcs=%d\n", srcs); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref3, 25); +- printf("dprod_dut:"); +- dump(dest3, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) +- " test4 srcs=%d\n", srcs); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref4, 25); +- printf("dprod_dut:"); +- dump(dest4, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) +- " test5 srcs=%d\n", srcs); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref5, 25); +- printf("dprod_dut:"); +- dump(dest5, 25); +- return -1; +- } +- if (0 != memcmp(dest_ref6, dest6, TEST_LEN)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) +- " test6 srcs=%d\n", srcs); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref6, 25); +- printf("dprod_dut:"); +- dump(dest6, 25); +- return -1; +- } +- +- putchar('.'); +- } +- } +- +- // Run tests at end of buffer for Electric Fence +- align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; +- for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { +- for (i = 0; i < TEST_SOURCES; i++) +- for (j = 0; j < TEST_LEN; j++) +- buffs[i][j] = rand(); +- +- for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end +- efence_buffs[i] = buffs[i] + TEST_LEN - size; +- +- for (i = 0; i < TEST_SOURCES; i++) { +- g1[i] = rand(); +- g2[i] = rand(); +- g3[i] = rand(); +- g4[i] = rand(); +- g5[i] = rand(); +- g6[i] = rand(); +- } +- +- for (i = 0; i < TEST_SOURCES; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); +- gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); +- gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]); +- gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]); +- gf_vect_mul_init(g6[i], &g_tbls[(160 * TEST_SOURCES) + (i * 32)]); +- } +- +- gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1); +- gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], +- efence_buffs, dest_ref2); +- gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], +- efence_buffs, dest_ref3); +- gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], +- efence_buffs, dest_ref4); +- gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], +- efence_buffs, dest_ref5); +- gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES], +- efence_buffs, dest_ref6); +- +- FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs); +- +- if (0 != memcmp(dest_ref1, dest1, size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); +- dump_matrix(efence_buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, align); +- printf("dprod_dut:"); +- dump(dest1, align); +- return -1; +- } +- +- if (0 != memcmp(dest_ref2, dest2, size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); +- dump_matrix(efence_buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, align); +- printf("dprod_dut:"); +- dump(dest2, align); +- return -1; +- } +- +- if (0 != memcmp(dest_ref3, dest3, size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); +- dump_matrix(efence_buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref3, align); +- printf("dprod_dut:"); +- dump(dest3, align); +- return -1; +- } +- +- if (0 != memcmp(dest_ref4, dest4, size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest); +- dump_matrix(efence_buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref4, align); +- printf("dprod_dut:"); +- dump(dest4, align); +- return -1; +- } +- +- if (0 != memcmp(dest_ref5, dest5, size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest); +- dump_matrix(efence_buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref5, align); +- printf("dprod_dut:"); +- dump(dest5, align); +- return -1; +- } +- +- if (0 != memcmp(dest_ref6, dest6, size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test6 %d\n", rtest); +- dump_matrix(efence_buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref6, align); +- printf("dprod_dut:"); +- dump(dest6, align); +- return -1; +- } +- +- putchar('.'); +- } +- +- // Test rand ptr alignment if available +- +- for (rtest = 0; rtest < RANDOMS; rtest++) { +- size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); +- srcs = rand() % TEST_SOURCES; +- if (srcs == 0) +- continue; +- +- offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; +- // Add random offsets +- for (i = 0; i < srcs; i++) +- ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); +- +- udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset)); +- udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset)); +- udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset)); +- udest_ptrs[3] = dest4 + (rand() & (PTR_ALIGN_CHK_B - offset)); +- udest_ptrs[4] = dest5 + (rand() & (PTR_ALIGN_CHK_B - offset)); +- udest_ptrs[5] = dest6 + (rand() & (PTR_ALIGN_CHK_B - offset)); +- +- memset(dest1, 0, TEST_LEN); // zero pad to check write-over +- memset(dest2, 0, TEST_LEN); +- memset(dest3, 0, TEST_LEN); +- memset(dest4, 0, TEST_LEN); +- memset(dest5, 0, TEST_LEN); +- memset(dest6, 0, TEST_LEN); +- +- for (i = 0; i < srcs; i++) +- for (j = 0; j < size; j++) +- ubuffs[i][j] = rand(); +- +- for (i = 0; i < srcs; i++) { +- g1[i] = rand(); +- g2[i] = rand(); +- g3[i] = rand(); +- g4[i] = rand(); +- g5[i] = rand(); +- g6[i] = rand(); +- } +- +- for (i = 0; i < srcs; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); +- gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); +- gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); +- gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]); +- gf_vect_mul_init(g6[i], &g_tbls[(160 * srcs) + (i * 32)]); +- } +- +- gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], ubuffs, dest_ref4); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], ubuffs, dest_ref5); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[160 * srcs], ubuffs, dest_ref6); +- +- FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs); +- +- if (memcmp(dest_ref1, udest_ptrs[0], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", +- srcs); +- dump_matrix(ubuffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, 25); +- printf("dprod_dut:"); +- dump(udest_ptrs[0], 25); +- return -1; +- } +- if (memcmp(dest_ref2, udest_ptrs[1], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", +- srcs); +- dump_matrix(ubuffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, 25); +- printf("dprod_dut:"); +- dump(udest_ptrs[1], 25); +- return -1; +- } +- if (memcmp(dest_ref3, udest_ptrs[2], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", +- srcs); +- dump_matrix(ubuffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref3, 25); +- printf("dprod_dut:"); +- dump(udest_ptrs[2], 25); +- return -1; +- } +- if (memcmp(dest_ref4, udest_ptrs[3], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", +- srcs); +- dump_matrix(ubuffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref4, 25); +- printf("dprod_dut:"); +- dump(udest_ptrs[3], 25); +- return -1; +- } +- if (memcmp(dest_ref5, udest_ptrs[4], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", +- srcs); +- dump_matrix(ubuffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref5, 25); +- printf("dprod_dut:"); +- dump(udest_ptrs[4], 25); +- return -1; +- } +- if (memcmp(dest_ref6, udest_ptrs[5], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", +- srcs); +- dump_matrix(ubuffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref6, 25); +- printf("dprod_dut:"); +- dump(udest_ptrs[5], 25); +- return -1; +- } +- // Confirm that padding around dests is unchanged +- memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff +- offset = udest_ptrs[0] - dest1; +- +- if (memcmp(dest1, dest_ref1, offset)) { +- printf("Fail rand ualign pad1 start\n"); +- return -1; +- } +- if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { +- printf("Fail rand ualign pad1 end\n"); +- return -1; +- } +- +- offset = udest_ptrs[1] - dest2; +- if (memcmp(dest2, dest_ref1, offset)) { +- printf("Fail rand ualign pad2 start\n"); +- return -1; +- } +- if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { +- printf("Fail rand ualign pad2 end\n"); +- return -1; +- } +- +- offset = udest_ptrs[2] - dest3; +- if (memcmp(dest3, dest_ref1, offset)) { +- printf("Fail rand ualign pad3 start\n"); +- return -1; +- } +- if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { +- printf("Fail rand ualign pad3 end\n"); +- return -1; +- } +- +- offset = udest_ptrs[3] - dest4; +- if (memcmp(dest4, dest_ref1, offset)) { +- printf("Fail rand ualign pad4 start\n"); +- return -1; +- } +- if (memcmp(dest4 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { +- printf("Fail rand ualign pad4 end\n"); +- return -1; +- } +- +- offset = udest_ptrs[4] - dest5; +- if (memcmp(dest5, dest_ref1, offset)) { +- printf("Fail rand ualign pad5 start\n"); +- return -1; +- } +- if (memcmp(dest5 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { +- printf("Fail rand ualign pad5 end\n"); +- return -1; +- } +- +- offset = udest_ptrs[5] - dest6; +- if (memcmp(dest6, dest_ref1, offset)) { +- printf("Fail rand ualign pad6 start\n"); +- return -1; +- } +- if (memcmp(dest6 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { +- printf("Fail rand ualign pad6 end\n"); +- return -1; +- } +- +- putchar('.'); +- } +- +- // Test all size alignment +- align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; +- +- for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { +- srcs = TEST_SOURCES; +- +- for (i = 0; i < srcs; i++) +- for (j = 0; j < size; j++) +- buffs[i][j] = rand(); +- +- for (i = 0; i < srcs; i++) { +- g1[i] = rand(); +- g2[i] = rand(); +- g3[i] = rand(); +- g4[i] = rand(); +- g5[i] = rand(); +- g6[i] = rand(); +- } +- +- for (i = 0; i < srcs; i++) { +- gf_vect_mul_init(g1[i], &g_tbls[i * 32]); +- gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); +- gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); +- gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); +- gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]); +- gf_vect_mul_init(g6[i], &g_tbls[(160 * srcs) + (i * 32)]); +- } +- +- gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], buffs, dest_ref4); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], buffs, dest_ref5); +- gf_vect_dot_prod_base(size, srcs, &g_tbls[160 * srcs], buffs, dest_ref6); +- +- FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs); +- +- if (memcmp(dest_ref1, dest_ptrs[0], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", +- size); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref1, 25); +- printf("dprod_dut:"); +- dump(dest_ptrs[0], 25); +- return -1; +- } +- if (memcmp(dest_ref2, dest_ptrs[1], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", +- size); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref2, 25); +- printf("dprod_dut:"); +- dump(dest_ptrs[1], 25); +- return -1; +- } +- if (memcmp(dest_ref3, dest_ptrs[2], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", +- size); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref3, 25); +- printf("dprod_dut:"); +- dump(dest_ptrs[2], 25); +- return -1; +- } +- if (memcmp(dest_ref4, dest_ptrs[3], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", +- size); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref4, 25); +- printf("dprod_dut:"); +- dump(dest_ptrs[3], 25); +- return -1; +- } +- if (memcmp(dest_ref5, dest_ptrs[4], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", +- size); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref5, 25); +- printf("dprod_dut:"); +- dump(dest_ptrs[4], 25); +- return -1; +- } +- if (memcmp(dest_ref6, dest_ptrs[5], size)) { +- printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", +- size); +- dump_matrix(buffs, 5, TEST_SOURCES); +- printf("dprod_base:"); +- dump(dest_ref6, 25); +- printf("dprod_dut:"); +- dump(dest_ptrs[5], 25); +- return -1; +- } +- } +- +- printf("Pass\n"); +- return 0; +- +-} +-- +2.20.1.windows.1 + + +From 7c0ab1d459c2bd43bd900432d721acc4aaf448ad Mon Sep 17 00:00:00 2001 +From: Greg Tucker +Date: Fri, 6 Mar 2020 17:43:39 -0700 +Subject: [PATCH 02/42] build: Add auto regenerate of nmake file + +Change-Id: Icaa64aa35697c87779df18c3941d3df0f3256546 +Signed-off-by: Greg Tucker +--- + Makefile.nmake | 209 +++++++++++++++++++++++++++------------------ + Makefile.unx | 1 + + tools/gen_nmake.mk | 109 +++++++++++++++++++++++ + 3 files changed, 234 insertions(+), 85 deletions(-) + create mode 100644 tools/gen_nmake.mk + +diff --git a/src/isa-l/Makefile.nmake b/src/isa-l/Makefile.nmake +index ac3fcbf..8ae4223 100644 +--- a/src/isa-l/Makefile.nmake ++++ b/src/isa-l/Makefile.nmake +@@ -1,5 +1,5 @@ + ######################################################################## +-# Copyright(c) 2011-2016 Intel Corporation All rights reserved. ++# Copyright(c) 2011-2017 Intel Corporation All rights reserved. + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions +@@ -27,126 +27,141 @@ + # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ######################################################################## + +-objs = \ ++# This file can be auto-regenerated with $make -f Makefile.unx Makefile.nmake ++ ++objs = \ + bin\ec_base.obj \ ++ bin\raid_base.obj \ ++ bin\crc_base.obj \ ++ bin\crc64_base.obj \ ++ bin\igzip.obj \ ++ bin\hufftables_c.obj \ ++ bin\igzip_base.obj \ ++ bin\igzip_icf_base.obj \ ++ bin\adler32_base.obj \ ++ bin\flatten_ll.obj \ ++ bin\encode_df.obj \ ++ bin\igzip_icf_body.obj \ ++ bin\huff_codes.obj \ ++ bin\igzip_inflate.obj \ ++ bin\mem_zero_detect_base.obj \ + bin\ec_highlevel_func.obj \ +- bin\ec_multibinary.obj \ +- bin\gf_2vect_dot_prod_avx.obj \ +- bin\gf_2vect_dot_prod_avx2.obj \ +- bin\gf_2vect_dot_prod_avx512.obj \ ++ bin\gf_vect_mul_sse.obj \ ++ bin\gf_vect_mul_avx.obj \ ++ bin\gf_vect_dot_prod_sse.obj \ ++ bin\gf_vect_dot_prod_avx.obj \ ++ bin\gf_vect_dot_prod_avx2.obj \ + bin\gf_2vect_dot_prod_sse.obj \ +- bin\gf_2vect_mad_avx.obj \ +- bin\gf_2vect_mad_avx2.obj \ +- bin\gf_2vect_mad_avx512.obj \ +- bin\gf_2vect_mad_sse.obj \ +- bin\gf_3vect_dot_prod_avx.obj \ +- bin\gf_3vect_dot_prod_avx2.obj \ +- bin\gf_3vect_dot_prod_avx512.obj \ + bin\gf_3vect_dot_prod_sse.obj \ +- bin\gf_3vect_mad_avx.obj \ +- bin\gf_3vect_mad_avx2.obj \ +- bin\gf_3vect_mad_avx512.obj \ +- bin\gf_3vect_mad_sse.obj \ +- bin\gf_4vect_dot_prod_avx.obj \ +- bin\gf_4vect_dot_prod_avx2.obj \ +- bin\gf_4vect_dot_prod_avx512.obj \ + bin\gf_4vect_dot_prod_sse.obj \ +- bin\gf_4vect_mad_avx.obj \ +- bin\gf_4vect_mad_avx2.obj \ +- bin\gf_4vect_mad_avx512.obj \ +- bin\gf_4vect_mad_sse.obj \ +- bin\gf_5vect_dot_prod_avx.obj \ +- bin\gf_5vect_dot_prod_avx2.obj \ + bin\gf_5vect_dot_prod_sse.obj \ +- bin\gf_5vect_mad_avx.obj \ +- bin\gf_5vect_mad_avx2.obj \ +- bin\gf_5vect_mad_sse.obj \ ++ bin\gf_6vect_dot_prod_sse.obj \ ++ bin\gf_2vect_dot_prod_avx.obj \ ++ bin\gf_3vect_dot_prod_avx.obj \ ++ bin\gf_4vect_dot_prod_avx.obj \ ++ bin\gf_5vect_dot_prod_avx.obj \ + bin\gf_6vect_dot_prod_avx.obj \ ++ bin\gf_2vect_dot_prod_avx2.obj \ ++ bin\gf_3vect_dot_prod_avx2.obj \ ++ bin\gf_4vect_dot_prod_avx2.obj \ ++ bin\gf_5vect_dot_prod_avx2.obj \ + bin\gf_6vect_dot_prod_avx2.obj \ +- bin\gf_6vect_dot_prod_sse.obj \ +- bin\gf_6vect_mad_avx.obj \ +- bin\gf_6vect_mad_avx2.obj \ ++ bin\gf_vect_mad_sse.obj \ ++ bin\gf_2vect_mad_sse.obj \ ++ bin\gf_3vect_mad_sse.obj \ ++ bin\gf_4vect_mad_sse.obj \ ++ bin\gf_5vect_mad_sse.obj \ + bin\gf_6vect_mad_sse.obj \ +- bin\gf_vect_dot_prod_avx.obj \ +- bin\gf_vect_dot_prod_avx2.obj \ +- bin\gf_vect_dot_prod_avx512.obj \ +- bin\gf_vect_dot_prod_sse.obj \ + bin\gf_vect_mad_avx.obj \ ++ bin\gf_2vect_mad_avx.obj \ ++ bin\gf_3vect_mad_avx.obj \ ++ bin\gf_4vect_mad_avx.obj \ ++ bin\gf_5vect_mad_avx.obj \ ++ bin\gf_6vect_mad_avx.obj \ + bin\gf_vect_mad_avx2.obj \ ++ bin\gf_2vect_mad_avx2.obj \ ++ bin\gf_3vect_mad_avx2.obj \ ++ bin\gf_4vect_mad_avx2.obj \ ++ bin\gf_5vect_mad_avx2.obj \ ++ bin\gf_6vect_mad_avx2.obj \ ++ bin\ec_multibinary.obj \ ++ bin\gf_vect_dot_prod_avx512.obj \ ++ bin\gf_2vect_dot_prod_avx512.obj \ ++ bin\gf_3vect_dot_prod_avx512.obj \ ++ bin\gf_4vect_dot_prod_avx512.obj \ ++ bin\gf_5vect_dot_prod_avx512.obj \ ++ bin\gf_6vect_dot_prod_avx512.obj \ + bin\gf_vect_mad_avx512.obj \ +- bin\gf_vect_mad_sse.obj \ +- bin\gf_vect_mul_avx.obj \ +- bin\gf_vect_mul_sse.obj \ ++ bin\gf_2vect_mad_avx512.obj \ ++ bin\gf_3vect_mad_avx512.obj \ ++ bin\gf_4vect_mad_avx512.obj \ ++ bin\gf_5vect_mad_avx512.obj \ ++ bin\gf_6vect_mad_avx512.obj \ ++ bin\xor_gen_sse.obj \ ++ bin\pq_gen_sse.obj \ ++ bin\xor_check_sse.obj \ + bin\pq_check_sse.obj \ + bin\pq_gen_avx.obj \ ++ bin\xor_gen_avx.obj \ + bin\pq_gen_avx2.obj \ ++ bin\xor_gen_avx512.obj \ + bin\pq_gen_avx512.obj \ +- bin\pq_gen_sse.obj \ +- bin\raid_base.obj \ + bin\raid_multibinary.obj \ +- bin\xor_check_sse.obj \ +- bin\xor_gen_avx.obj \ +- bin\xor_gen_avx512.obj \ +- bin\xor_gen_sse.obj \ + bin\crc16_t10dif_01.obj \ + bin\crc16_t10dif_by4.obj \ + bin\crc16_t10dif_02.obj \ ++ bin\crc16_t10dif_by16_10.obj \ + bin\crc16_t10dif_copy_by4.obj \ + bin\crc16_t10dif_copy_by4_02.obj \ + bin\crc32_ieee_01.obj \ + bin\crc32_ieee_02.obj \ + bin\crc32_ieee_by4.obj \ +- bin\crc32_iscsi_00.obj \ ++ bin\crc32_ieee_by16_10.obj \ + bin\crc32_iscsi_01.obj \ +- bin\crc64_base.obj \ +- bin\crc64_ecma_norm_by8.obj \ ++ bin\crc32_iscsi_00.obj \ ++ bin\crc_multibinary.obj \ ++ bin\crc64_multibinary.obj \ + bin\crc64_ecma_refl_by8.obj \ +- bin\crc64_iso_norm_by8.obj \ ++ bin\crc64_ecma_refl_by16_10.obj \ ++ bin\crc64_ecma_norm_by8.obj \ ++ bin\crc64_ecma_norm_by16_10.obj \ + bin\crc64_iso_refl_by8.obj \ +- bin\crc64_jones_norm_by8.obj \ ++ bin\crc64_iso_refl_by16_10.obj \ ++ bin\crc64_iso_norm_by8.obj \ ++ bin\crc64_iso_norm_by16_10.obj \ + bin\crc64_jones_refl_by8.obj \ +- bin\crc64_multibinary.obj \ +- bin\crc_base.obj \ +- bin\adler32_base.obj \ +- bin\crc_multibinary.obj \ +- bin\huff_codes.obj \ +- bin\hufftables_c.obj \ +- bin\igzip.obj \ +- bin\igzip_base.obj \ ++ bin\crc64_jones_refl_by16_10.obj \ ++ bin\crc64_jones_norm_by8.obj \ ++ bin\crc64_jones_norm_by16_10.obj \ ++ bin\crc32_gzip_refl_by8.obj \ ++ bin\crc32_gzip_refl_by8_02.obj \ ++ bin\crc32_gzip_refl_by16_10.obj \ + bin\igzip_body.obj \ +- bin\igzip_decode_block_stateless_01.obj \ +- bin\igzip_decode_block_stateless_04.obj \ + bin\igzip_finish.obj \ +- bin\flatten_ll.obj \ +- bin\encode_df.obj \ +- bin\encode_df_04.obj \ +- bin\proc_heap.obj \ + bin\igzip_icf_body_h1_gr_bt.obj \ + bin\igzip_icf_finish.obj \ +- bin\igzip_icf_base.obj \ +- bin\igzip_inflate.obj \ +- bin\igzip_inflate_multibinary.obj \ +- bin\igzip_multibinary.obj \ +- bin\igzip_update_histogram_01.obj \ +- bin\igzip_update_histogram_04.obj \ + bin\rfc1951_lookup.obj \ +- bin\crc32_gzip_refl_by8.obj \ +- bin\crc32_gzip_refl_by8_02.obj \ +- bin\crc32_gzip_refl_by16_10.obj \ + bin\adler32_sse.obj \ + bin\adler32_avx2_4.obj \ ++ bin\igzip_multibinary.obj \ ++ bin\igzip_update_histogram_01.obj \ ++ bin\igzip_update_histogram_04.obj \ ++ bin\igzip_decode_block_stateless_01.obj \ ++ bin\igzip_decode_block_stateless_04.obj \ ++ bin\igzip_inflate_multibinary.obj \ ++ bin\encode_df_04.obj \ ++ bin\encode_df_06.obj \ ++ bin\proc_heap.obj \ + bin\igzip_deflate_hash.obj \ +- bin\igzip_gen_icf_map_lh1_04.obj \ + bin\igzip_gen_icf_map_lh1_06.obj \ ++ bin\igzip_gen_icf_map_lh1_04.obj \ + bin\igzip_set_long_icf_fg_04.obj \ + bin\igzip_set_long_icf_fg_06.obj \ +- bin\igzip_icf_body.obj \ + bin\mem_zero_detect_avx.obj \ +- bin\mem_zero_detect_base.obj \ +- bin\mem_multibinary.obj \ +- bin\mem_zero_detect_sse.obj ++ bin\mem_zero_detect_sse.obj \ ++ bin\mem_multibinary.obj + +-INCLUDES = -I./ -Ierasure_code/ -Iraid/ -Icrc/ -Iigzip/ -Iinclude/ -Imem/ ++INCLUDES = -I./ -Ierasure_code/ -Iraid/ -Icrc/ -Iigzip/ -Iprograms/ -Imem/ -Iinclude/ -Itests/fuzz/ -Iexamples/ec/ + LINKFLAGS = /nologo + CFLAGS = -O2 -D NDEBUG /nologo -D_USE_MATH_DEFINES -Qstd=c99 $(INCLUDES) $(D) + AFLAGS = -f win64 $(INCLUDES) $(D) +@@ -189,13 +204,27 @@ $? + {igzip}.asm.obj: + $(AS) $(AFLAGS) -o $@ $? + ++{programs}.c.obj: ++ $(CC) $(CFLAGS) /c -Fo$@ $? ++{programs}.asm.obj: ++ $(AS) $(AFLAGS) -o $@ $? ++ + {mem}.c.obj: + $(CC) $(CFLAGS) /c -Fo$@ $? + {mem}.asm.obj: + $(AS) $(AFLAGS) -o $@ $? + ++ + # Examples +-ex = xor_example.exe crc_simple_test.exe crc64_example.exe igzip_example.exe igzip_sync_flush_example.exe ++ex = \ ++ xor_example.exe \ ++ crc_simple_test.exe \ ++ crc64_example.exe \ ++ igzip_example.exe \ ++ igzip_sync_flush_example.exe \ ++ ec_simple_example.exe \ ++ ec_piggyback_example.exe ++ + ex: lib $(ex) + + $(ex): $(@B).obj +@@ -215,10 +244,11 @@ checks = \ + pq_check_test.exe \ + crc16_t10dif_test.exe \ + crc16_t10dif_copy_test.exe \ +- crc32_funcs_test.exe \ + crc64_funcs_test.exe \ +- igzip_wrapper_hdr_test.exe \ ++ crc32_funcs_test.exe \ + igzip_rand_test.exe \ ++ igzip_wrapper_hdr_test.exe \ ++ checksum32_funcs_test.exe \ + mem_zero_detect_test.exe + + checks: lib $(checks) +@@ -244,21 +274,29 @@ perfs = \ + gf_vect_dot_prod_1tbl.exe \ + erasure_code_perf.exe \ + erasure_code_base_perf.exe \ +- erasure_code_sse_perf.exe \ + erasure_code_update_perf.exe \ + xor_gen_perf.exe \ + pq_gen_perf.exe \ + crc16_t10dif_perf.exe \ ++ crc16_t10dif_copy_perf.exe \ ++ crc16_t10dif_op_perf.exe \ + crc32_ieee_perf.exe \ + crc32_iscsi_perf.exe \ +- igzip_perf.exe \ +- igzip_sync_flush_perf.exe \ ++ crc64_funcs_perf.exe \ + crc32_gzip_refl_perf.exe \ ++ adler32_perf.exe \ + mem_zero_detect_perf.exe + + perfs: lib $(perfs) + $(perfs): $(@B).obj + ++progs = \ ++ igzip.exe ++ ++progs: lib $(progs) ++igzip.exe: programs\igzip_cli.obj ++ link /out:$@ $(LINKFLAGS) isa-l.lib $? ++ + clean: + -if exist *.obj del *.obj + -if exist bin\*.obj del bin\*.obj +@@ -268,4 +306,5 @@ clean: + -if exist isa-l.dll del isa-l.dll + + zlib.lib: ++igzip_perf.exe: zlib.lib + igzip_inflate_test.exe: zlib.lib +diff --git a/src/isa-l/Makefile.unx b/src/isa-l/Makefile.unx +index 16ad1ff..ada6eb1 100644 +--- a/src/isa-l/Makefile.unx ++++ b/src/isa-l/Makefile.unx +@@ -51,5 +51,6 @@ endif + lib_name := bin/isa-l.a + + include make.inc ++include tools/gen_nmake.mk + + VPATH = . $(units) include tests/fuzz examples/ec +diff --git a/src/isa-l/tools/gen_nmake.mk b/src/isa-l/tools/gen_nmake.mk +new file mode 100644 +index 0000000..b998ee5 +--- /dev/null ++++ b/src/isa-l/tools/gen_nmake.mk +@@ -0,0 +1,109 @@ ++FORCE: ++Makefile.nmake: FORCE ++ @echo Regenerating $@ ++ @echo '########################################################################' > $@ ++ @cat LICENSE | sed -e 's/^/#/ ' >> $@ ++ @echo '########################################################################' >> $@ ++ @echo '' >> $@ ++ @echo '# This file can be auto-regenerated with $$make -f Makefile.unx $@' >> $@ ++ @echo '' >> $@ ++ @echo -n 'objs =' >> $@ ++ @$(foreach o, $(subst /,\\,$(objs:.o=.obj)), printf " %s\n\t%s" \\ $(o) >> $@; ) ++ @echo '' >> $@ ++ @echo '' >> $@ ++ @echo 'INCLUDES = $(INCLUDE)' >> $@ ++ @echo 'LINKFLAGS = /nologo' >> $@ ++ @echo 'CFLAGS = -O2 -D NDEBUG /nologo -D_USE_MATH_DEFINES -Qstd=c99 $$(INCLUDES) $$(D)' >> $@ ++ @echo 'AFLAGS = -f win64 $$(INCLUDES) $$(D)' >> $@ ++ @echo 'CC = icl' >> $@ ++ @echo 'AS = yasm' >> $@ ++ @echo '' >> $@ ++ @echo 'lib: bin static dll' >> $@ ++ @echo 'static: bin isa-l_static.lib' >> $@ ++ @echo 'dll: bin isa-l.dll' >> $@ ++ @echo '' >> $@ ++ @echo 'bin: ; -mkdir $$@' >> $@ ++ @echo '' >> $@ ++ @echo 'isa-l_static.lib: $$(objs)' >> $@ ++ @echo ' lib -out:$$@ @<<' >> $@ ++ @echo '$$?' >> $@ ++ @echo '<<' >> $@ ++ @echo '' >> $@ ++ @echo 'isa-l.dll: $$(objs)' >> $@ ++ @echo ' link -out:$$@ -dll -def:isa-l.def @<<' >> $@ ++ @echo '$$?' >> $@ ++ @echo '<<' >> $@ ++ @echo '' >> $@ ++ @$(foreach b, $(units), \ ++ printf "{%s}.c.obj:\n\t\$$(CC) \$$(CFLAGS) /c -Fo\$$@ \$$?\n{%s}.asm.obj:\n\t\$$(AS) \$$(AFLAGS) -o \$$@ \$$?\n\n" $(b) $(b) >> $@; ) ++ @echo '' >> $@ ++ifneq (,$(examples)) ++ @echo "# Examples" >> $@ ++ @echo -n 'ex =' >> $@ ++ @$(foreach ex, $(notdir $(examples)), printf " %s\n\t%s.exe" \\ $(ex) >> $@; ) ++ @echo '' >> $@ ++ @echo '' >> $@ ++ @echo 'ex: lib $$(ex)' >> $@ ++ @echo '' >> $@ ++ @echo '$$(ex): $$(@B).obj' >> $@ ++endif ++ @echo '' >> $@ ++ @echo '.obj.exe:' >> $@ ++ @echo ' link /out:$$@ $$(LINKFLAGS) isa-l.lib $$?' >> $@ ++ @echo '' >> $@ ++ @echo '# Check tests' >> $@ ++ @echo -n 'checks =' >> $@ ++ @$(foreach check, $(notdir $(check_tests)), printf " %s\n\t%s.exe" \\ $(check) >> $@; ) ++ @echo '' >> $@ ++ @echo '' >> $@ ++ @echo 'checks: lib $$(checks)' >> $@ ++ @echo '$$(checks): $$(@B).obj' >> $@ ++ @echo 'check: $$(checks)' >> $@ ++ @echo ' !$$?' >> $@ ++ @echo '' >> $@ ++ @echo '# Unit tests' >> $@ ++ @echo -n 'tests =' >> $@ ++ @$(foreach test, $(notdir $(unit_tests)), printf " %s\n\t%s.exe" \\ $(test) >> $@; ) ++ @echo '' >> $@ ++ @echo '' >> $@ ++ @echo 'tests: lib $$(tests)' >> $@ ++ @echo '$$(tests): $$(@B).obj' >> $@ ++ @echo '' >> $@ ++ @echo '# Performance tests' >> $@ ++ @echo -n 'perfs =' >> $@ ++ @$(foreach perf, $(notdir $(perf_tests)), printf " %s\n\t%s.exe" \\ $(perf) >> $@; ) ++ @echo '' >> $@ ++ @echo '' >> $@ ++ @echo 'perfs: lib $$(perfs)' >> $@ ++ @echo '$$(perfs): $$(@B).obj' >> $@ ++ @echo '' >> $@ ++ @echo -n 'progs =' >> $@ ++ @$(foreach prog, $(notdir $(bin_PROGRAMS)), printf " %s\n\t%s.exe" \\ $(prog) >> $@; ) ++ @echo '' >> $@ ++ @echo '' >> $@ ++ @echo 'progs: lib $$(progs)' >> $@ ++ @$(foreach p, $(notdir $(bin_PROGRAMS)), \ ++ printf "%s.exe: %s\n\tlink /out:\$$@ \$$(LINKFLAGS) isa-l.lib \$$?\n" $(p) $(subst /,\\,$(programs_$(p)_SOURCES:.c=.obj)) >> $@; ) ++ @echo '' >> $@ ++ @echo 'clean:' >> $@ ++ @echo ' -if exist *.obj del *.obj' >> $@ ++ @echo ' -if exist bin\*.obj del bin\*.obj' >> $@ ++ @echo ' -if exist isa-l_static.lib del isa-l_static.lib' >> $@ ++ @echo ' -if exist *.exe del *.exe' >> $@ ++ @echo ' -if exist isa-l.lib del isa-l.lib' >> $@ ++ @echo ' -if exist isa-l.dll del isa-l.dll' >> $@ ++ @echo '' >> $@ ++ $(if $(findstring igzip,$(units)),@echo 'zlib.lib:' >> $@ ) ++ @cat $(foreach unit,$(units), $(unit)/Makefile.am) | sed \ ++ -e '/: /!d' \ ++ -e 's/\([^ :]*\)[ ]*/\1.exe /g' \ ++ -e :c -e 's/:\(.*\).exe/:\1/;tc' \ ++ -e 's/\.o[ $$]/.obj /g' \ ++ -e 's/\.o\.exe[ ]:/.obj:/g' \ ++ -e '/CFLAGS_.*+=/d' \ ++ -e '/:.*\%.*:/d' \ ++ -e 's/ :/:/' \ ++ -e 's/LDLIBS *+=//' \ ++ -e 's/-lz/zlib.lib/src/isa-l/' \ ++ -e 's/ $$//' \ ++ >> $@ +-- +2.20.1.windows.1 + + +From 472e7011e8f670ace5464c68fc55ae20a24ceea5 Mon Sep 17 00:00:00 2001 +From: Greg Tucker +Date: Mon, 16 Mar 2020 16:12:54 -0700 +Subject: [PATCH 03/42] ec: Change use of windows macro save_xmm128 to vec + +For builds under windows this could emit a non-vec mov that's not optional for +AVX versions. + +Change-Id: I31e6ea3b62d48c5a13f6e83f8d684f0b5551087b +Signed-off-by: Greg Tucker +--- + erasure_code/gf_2vect_dot_prod_avx.asm | 6 ++--- + erasure_code/gf_3vect_dot_prod_avx.asm | 12 +++++----- + erasure_code/gf_4vect_dot_prod_avx.asm | 18 +++++++-------- + erasure_code/gf_5vect_dot_prod_avx.asm | 20 ++++++++-------- + erasure_code/gf_6vect_dot_prod_avx.asm | 20 ++++++++-------- + erasure_code/gf_vect_mul_avx.asm | 10 ++++---- + raid/pq_gen_avx.asm | 32 +++++++++++++------------- + 7 files changed, 59 insertions(+), 59 deletions(-) + +diff --git a/src/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm +index 6b68d93..99bc95a 100644 +--- a/src/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm ++++ b/src/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm +@@ -84,9 +84,9 @@ + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size +- save_xmm128 xmm6, 0*16 +- save_xmm128 xmm7, 1*16 +- save_xmm128 xmm8, 2*16 ++ vmovdqa [rsp + 0*16], xmm6 ++ vmovdqa [rsp + 1*16], xmm7 ++ vmovdqa [rsp + 2*16], xmm8 + save_reg r12, 3*16 + 0*8 + save_reg r13, 3*16 + 1*8 + save_reg r14, 3*16 + 2*8 +diff --git a/src/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm +index b006cf1..a8b46e8 100644 +--- a/src/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm ++++ b/src/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm +@@ -87,12 +87,12 @@ + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size +- save_xmm128 xmm6, 0*16 +- save_xmm128 xmm7, 1*16 +- save_xmm128 xmm8, 2*16 +- save_xmm128 xmm9, 3*16 +- save_xmm128 xmm10, 4*16 +- save_xmm128 xmm11, 5*16 ++ vmovdqa [rsp + 0*16], xmm6 ++ vmovdqa [rsp + 1*16], xmm7 ++ vmovdqa [rsp + 2*16], xmm8 ++ vmovdqa [rsp + 3*16], xmm9 ++ vmovdqa [rsp + 4*16], xmm10 ++ vmovdqa [rsp + 5*16], xmm11 + save_reg r12, 6*16 + 0*8 + save_reg r13, 6*16 + 1*8 + save_reg r14, 6*16 + 2*8 +diff --git a/src/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm +index ccfc8ce..fbb58cc 100644 +--- a/src/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm ++++ b/src/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm +@@ -95,15 +95,15 @@ + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size +- save_xmm128 xmm6, 0*16 +- save_xmm128 xmm7, 1*16 +- save_xmm128 xmm8, 2*16 +- save_xmm128 xmm9, 3*16 +- save_xmm128 xmm10, 4*16 +- save_xmm128 xmm11, 5*16 +- save_xmm128 xmm12, 6*16 +- save_xmm128 xmm13, 7*16 +- save_xmm128 xmm14, 8*16 ++ vmovdqa [rsp + 0*16], xmm6 ++ vmovdqa [rsp + 1*16], xmm7 ++ vmovdqa [rsp + 2*16], xmm8 ++ vmovdqa [rsp + 3*16], xmm9 ++ vmovdqa [rsp + 4*16], xmm10 ++ vmovdqa [rsp + 5*16], xmm11 ++ vmovdqa [rsp + 6*16], xmm12 ++ vmovdqa [rsp + 7*16], xmm13 ++ vmovdqa [rsp + 8*16], xmm14 + save_reg r12, 9*16 + 0*8 + save_reg r13, 9*16 + 1*8 + save_reg r14, 9*16 + 2*8 +diff --git a/src/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm +index eb1c15e..d955fc4 100644 +--- a/src/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm ++++ b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm +@@ -89,16 +89,16 @@ + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size +- save_xmm128 xmm6, 0*16 +- save_xmm128 xmm7, 1*16 +- save_xmm128 xmm8, 2*16 +- save_xmm128 xmm9, 3*16 +- save_xmm128 xmm10, 4*16 +- save_xmm128 xmm11, 5*16 +- save_xmm128 xmm12, 6*16 +- save_xmm128 xmm13, 7*16 +- save_xmm128 xmm14, 8*16 +- save_xmm128 xmm15, 9*16 ++ vmovdqa [rsp + 0*16], xmm6 ++ vmovdqa [rsp + 1*16], xmm7 ++ vmovdqa [rsp + 2*16], xmm8 ++ vmovdqa [rsp + 3*16], xmm9 ++ vmovdqa [rsp + 4*16], xmm10 ++ vmovdqa [rsp + 5*16], xmm11 ++ vmovdqa [rsp + 6*16], xmm12 ++ vmovdqa [rsp + 7*16], xmm13 ++ vmovdqa [rsp + 8*16], xmm14 ++ vmovdqa [rsp + 9*16], xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 +diff --git a/src/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm +index a519d52..f64e9ef 100644 +--- a/src/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm ++++ b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm +@@ -89,16 +89,16 @@ + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size +- save_xmm128 xmm6, 0*16 +- save_xmm128 xmm7, 1*16 +- save_xmm128 xmm8, 2*16 +- save_xmm128 xmm9, 3*16 +- save_xmm128 xmm10, 4*16 +- save_xmm128 xmm11, 5*16 +- save_xmm128 xmm12, 6*16 +- save_xmm128 xmm13, 7*16 +- save_xmm128 xmm14, 8*16 +- save_xmm128 xmm15, 9*16 ++ vmovdqa [rsp + 0*16], xmm6 ++ vmovdqa [rsp + 1*16], xmm7 ++ vmovdqa [rsp + 2*16], xmm8 ++ vmovdqa [rsp + 3*16], xmm9 ++ vmovdqa [rsp + 4*16], xmm10 ++ vmovdqa [rsp + 5*16], xmm11 ++ vmovdqa [rsp + 6*16], xmm12 ++ vmovdqa [rsp + 7*16], xmm13 ++ vmovdqa [rsp + 8*16], xmm14 ++ vmovdqa [rsp + 9*16], xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 +diff --git a/src/isa-l/erasure_code/gf_vect_mul_avx.asm b/src/isa-l/erasure_code/gf_vect_mul_avx.asm +index c1a9b97..ec6a64f 100644 +--- a/src/isa-l/erasure_code/gf_vect_mul_avx.asm ++++ b/src/isa-l/erasure_code/gf_vect_mul_avx.asm +@@ -56,11 +56,11 @@ + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size +- save_xmm128 xmm6, 0*16 +- save_xmm128 xmm7, 1*16 +- save_xmm128 xmm13, 2*16 +- save_xmm128 xmm14, 3*16 +- save_xmm128 xmm15, 4*16 ++ vmovdqa [rsp + 0*16], xmm6 ++ vmovdqa [rsp + 1*16], xmm7 ++ vmovdqa [rsp + 2*16], xmm13 ++ vmovdqa [rsp + 3*16], xmm14 ++ vmovdqa [rsp + 4*16], xmm15 + end_prolog + %endmacro + +diff --git a/src/isa-l/raid/pq_gen_avx.asm b/src/isa-l/raid/pq_gen_avx.asm +index 54c0ded..513530c 100644 +--- a/src/isa-l/raid/pq_gen_avx.asm ++++ b/src/isa-l/raid/pq_gen_avx.asm +@@ -63,26 +63,26 @@ + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size +- save_xmm128 xmm6, 0*16 +- save_xmm128 xmm7, 1*16 +- save_xmm128 xmm8, 2*16 +- save_xmm128 xmm9, 3*16 +- save_xmm128 xmm10, 4*16 +- save_xmm128 xmm11, 5*16 +- save_xmm128 xmm14, 6*16 +- save_xmm128 xmm15, 7*16 ++ vmovdqa [rsp + 0*16], xmm6 ++ vmovdqa [rsp + 1*16], xmm7 ++ vmovdqa [rsp + 2*16], xmm8 ++ vmovdqa [rsp + 3*16], xmm9 ++ vmovdqa [rsp + 4*16], xmm10 ++ vmovdqa [rsp + 5*16], xmm11 ++ vmovdqa [rsp + 6*16], xmm14 ++ vmovdqa [rsp + 7*16], xmm15 + end_prolog + %endmacro + + %macro FUNC_RESTORE 0 +- movdqa xmm6, [rsp + 0*16] +- movdqa xmm7, [rsp + 1*16] +- movdqa xmm8, [rsp + 2*16] +- movdqa xmm9, [rsp + 3*16] +- movdqa xmm10, [rsp + 4*16] +- movdqa xmm11, [rsp + 5*16] +- movdqa xmm14, [rsp + 6*16] +- movdqa xmm15, [rsp + 7*16] ++ vmovdqa xmm6, [rsp + 0*16] ++ vmovdqa xmm7, [rsp + 1*16] ++ vmovdqa xmm8, [rsp + 2*16] ++ vmovdqa xmm9, [rsp + 3*16] ++ vmovdqa xmm10, [rsp + 4*16] ++ vmovdqa xmm11, [rsp + 5*16] ++ vmovdqa xmm14, [rsp + 6*16] ++ vmovdqa xmm15, [rsp + 7*16] + add rsp, stack_size + %endmacro + %endif +-- +2.20.1.windows.1 + + +From 5ab40c79cc2ef7f8dd3ca5fdd1726dee80ff259a Mon Sep 17 00:00:00 2001 +From: Greg Tucker +Date: Tue, 17 Mar 2020 17:52:06 -0700 +Subject: [PATCH 04/42] ec: Fix windows reg push for avx512 + +Push of registers overlapped xmm push. Error was not reachable without windows +nasm support and so went undetected. + +Change-Id: I0ffd66f6d32ac37ea03fe9b11924968aa50f8fa7 +Signed-off-by: Greg Tucker +--- + erasure_code/gf_5vect_dot_prod_avx512.asm | 32 +++++++++++------------ + erasure_code/gf_6vect_dot_prod_avx512.asm | 32 +++++++++++------------ + 2 files changed, 32 insertions(+), 32 deletions(-) + +diff --git a/src/isa-l/erasure_code/gf_5vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx512.asm +index 8b80d2b..41e266b 100644 +--- a/src/isa-l/erasure_code/gf_5vect_dot_prod_avx512.asm ++++ b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx512.asm +@@ -113,14 +113,14 @@ + vmovdqa [rsp + 7*16], xmm13 + vmovdqa [rsp + 8*16], xmm14 + vmovdqa [rsp + 9*16], xmm15 +- save_reg r12, 9*16 + 0*8 +- save_reg r13, 9*16 + 1*8 +- save_reg r14, 9*16 + 2*8 +- save_reg r15, 9*16 + 3*8 +- save_reg rdi, 9*16 + 4*8 +- save_reg rsi, 9*16 + 5*8 +- save_reg rbp, 9*16 + 6*8 +- save_reg rbx, 9*16 + 7*8 ++ save_reg r12, 10*16 + 0*8 ++ save_reg r13, 10*16 + 1*8 ++ save_reg r14, 10*16 + 2*8 ++ save_reg r15, 10*16 + 3*8 ++ save_reg rdi, 10*16 + 4*8 ++ save_reg rsi, 10*16 + 5*8 ++ save_reg rbp, 10*16 + 6*8 ++ save_reg rbx, 10*16 + 7*8 + end_prolog + mov arg4, arg(4) + %endmacro +@@ -136,14 +136,14 @@ + vmovdqa xmm13, [rsp + 7*16] + vmovdqa xmm14, [rsp + 8*16] + vmovdqa xmm15, [rsp + 9*16] +- mov r12, [rsp + 9*16 + 0*8] +- mov r13, [rsp + 9*16 + 1*8] +- mov r14, [rsp + 9*16 + 2*8] +- mov r15, [rsp + 9*16 + 3*8] +- mov rdi, [rsp + 9*16 + 4*8] +- mov rsi, [rsp + 9*16 + 5*8] +- mov rbp, [rsp + 9*16 + 6*8] +- mov rbx, [rsp + 9*16 + 7*8] ++ mov r12, [rsp + 10*16 + 0*8] ++ mov r13, [rsp + 10*16 + 1*8] ++ mov r14, [rsp + 10*16 + 2*8] ++ mov r15, [rsp + 10*16 + 3*8] ++ mov rdi, [rsp + 10*16 + 4*8] ++ mov rsi, [rsp + 10*16 + 5*8] ++ mov rbp, [rsp + 10*16 + 6*8] ++ mov rbx, [rsp + 10*16 + 7*8] + add rsp, stack_size + %endmacro + %endif +diff --git a/src/isa-l/erasure_code/gf_6vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx512.asm +index bfcfacb..b2bd002 100644 +--- a/src/isa-l/erasure_code/gf_6vect_dot_prod_avx512.asm ++++ b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx512.asm +@@ -113,14 +113,14 @@ + vmovdqa [rsp + 7*16], xmm13 + vmovdqa [rsp + 8*16], xmm14 + vmovdqa [rsp + 9*16], xmm15 +- save_reg r12, 9*16 + 0*8 +- save_reg r13, 9*16 + 1*8 +- save_reg r14, 9*16 + 2*8 +- save_reg r15, 9*16 + 3*8 +- save_reg rdi, 9*16 + 4*8 +- save_reg rsi, 9*16 + 5*8 +- save_reg rbp, 9*16 + 6*8 +- save_reg rbx, 9*16 + 7*8 ++ save_reg r12, 10*16 + 0*8 ++ save_reg r13, 10*16 + 1*8 ++ save_reg r14, 10*16 + 2*8 ++ save_reg r15, 10*16 + 3*8 ++ save_reg rdi, 10*16 + 4*8 ++ save_reg rsi, 10*16 + 5*8 ++ save_reg rbp, 10*16 + 6*8 ++ save_reg rbx, 10*16 + 7*8 + end_prolog + mov arg4, arg(4) + %endmacro +@@ -136,14 +136,14 @@ + vmovdqa xmm13, [rsp + 7*16] + vmovdqa xmm14, [rsp + 8*16] + vmovdqa xmm15, [rsp + 9*16] +- mov r12, [rsp + 9*16 + 0*8] +- mov r13, [rsp + 9*16 + 1*8] +- mov r14, [rsp + 9*16 + 2*8] +- mov r15, [rsp + 9*16 + 3*8] +- mov rdi, [rsp + 9*16 + 4*8] +- mov rsi, [rsp + 9*16 + 5*8] +- mov rbp, [rsp + 9*16 + 6*8] +- mov rbx, [rsp + 9*16 + 7*8] ++ mov r12, [rsp + 10*16 + 0*8] ++ mov r13, [rsp + 10*16 + 1*8] ++ mov r14, [rsp + 10*16 + 2*8] ++ mov r15, [rsp + 10*16 + 3*8] ++ mov rdi, [rsp + 10*16 + 4*8] ++ mov rsi, [rsp + 10*16 + 5*8] ++ mov rbp, [rsp + 10*16 + 6*8] ++ mov rbx, [rsp + 10*16 + 7*8] + add rsp, stack_size + %endmacro + %endif +-- +2.20.1.windows.1 + + +From ede04f0a1f4d7ea00cbd13d33c9db35d17575a38 Mon Sep 17 00:00:00 2001 +From: Greg Tucker +Date: Mon, 16 Mar 2020 16:23:55 -0700 +Subject: [PATCH 05/42] build: Fix for windows to allow nasm use + +Previously windows build could only use yasm because some procedural items such +as proc_start were not supported by nasm. This adds a few macros and fixes so +nasm can be used to build on windows. + +Change-Id: Ia05dc3ff482f33b0f915bb1be3c7df5e4a753b3a +Signed-off-by: Greg Tucker +--- + crc/crc16_t10dif_01.asm | 2 +- + crc/crc16_t10dif_02.asm | 2 +- + crc/crc16_t10dif_by16_10.asm | 2 +- + crc/crc16_t10dif_by4.asm | 2 +- + crc/crc16_t10dif_copy_by4.asm | 2 +- + crc/crc16_t10dif_copy_by4_02.asm | 2 +- + crc/crc32_gzip_refl_by16_10.asm | 2 +- + crc/crc32_gzip_refl_by8.asm | 2 +- + crc/crc32_gzip_refl_by8_02.asm | 2 +- + crc/crc32_ieee_01.asm | 2 +- + crc/crc32_ieee_02.asm | 2 +- + crc/crc32_ieee_by16_10.asm | 2 +- + crc/crc32_ieee_by4.asm | 2 +- + crc/crc32_iscsi_00.asm | 2 +- + crc/crc32_iscsi_01.asm | 2 +- + crc/crc64_ecma_norm_by8.asm | 2 +- + crc/crc64_ecma_refl_by8.asm | 2 +- + crc/crc64_iso_norm_by16_10.asm | 2 +- + crc/crc64_iso_norm_by8.asm | 2 +- + crc/crc64_iso_refl_by16_10.asm | 2 +- + crc/crc64_iso_refl_by8.asm | 2 +- + crc/crc64_jones_norm_by8.asm | 2 +- + crc/crc64_jones_refl_by8.asm | 2 +- + crc/crc_multibinary.asm | 6 +-- + erasure_code/gf_2vect_dot_prod_avx.asm | 2 +- + erasure_code/gf_2vect_dot_prod_avx2.asm | 2 +- + erasure_code/gf_2vect_dot_prod_avx512.asm | 2 +- + erasure_code/gf_2vect_dot_prod_sse.asm | 2 +- + erasure_code/gf_2vect_mad_avx.asm | 2 +- + erasure_code/gf_2vect_mad_avx2.asm | 2 +- + erasure_code/gf_2vect_mad_avx512.asm | 2 +- + erasure_code/gf_2vect_mad_sse.asm | 2 +- + erasure_code/gf_3vect_dot_prod_avx.asm | 2 +- + erasure_code/gf_3vect_dot_prod_avx2.asm | 2 +- + erasure_code/gf_3vect_dot_prod_avx512.asm | 2 +- + erasure_code/gf_3vect_dot_prod_sse.asm | 2 +- + erasure_code/gf_3vect_mad_avx.asm | 2 +- + erasure_code/gf_3vect_mad_avx2.asm | 2 +- + erasure_code/gf_3vect_mad_avx512.asm | 2 +- + erasure_code/gf_3vect_mad_sse.asm | 2 +- + erasure_code/gf_4vect_dot_prod_avx.asm | 2 +- + erasure_code/gf_4vect_dot_prod_avx2.asm | 2 +- + erasure_code/gf_4vect_dot_prod_avx512.asm | 2 +- + erasure_code/gf_4vect_dot_prod_sse.asm | 2 +- + erasure_code/gf_4vect_mad_avx.asm | 2 +- + erasure_code/gf_4vect_mad_avx2.asm | 2 +- + erasure_code/gf_4vect_mad_avx512.asm | 2 +- + erasure_code/gf_4vect_mad_sse.asm | 2 +- + erasure_code/gf_5vect_dot_prod_avx.asm | 2 +- + erasure_code/gf_5vect_dot_prod_avx2.asm | 2 +- + erasure_code/gf_5vect_dot_prod_avx512.asm | 2 +- + erasure_code/gf_5vect_dot_prod_sse.asm | 2 +- + erasure_code/gf_5vect_mad_avx.asm | 2 +- + erasure_code/gf_5vect_mad_avx2.asm | 2 +- + erasure_code/gf_5vect_mad_avx512.asm | 2 +- + erasure_code/gf_5vect_mad_sse.asm | 2 +- + erasure_code/gf_6vect_dot_prod_avx.asm | 2 +- + erasure_code/gf_6vect_dot_prod_avx2.asm | 2 +- + erasure_code/gf_6vect_dot_prod_avx512.asm | 2 +- + erasure_code/gf_6vect_dot_prod_sse.asm | 2 +- + erasure_code/gf_6vect_mad_avx.asm | 2 +- + erasure_code/gf_6vect_mad_avx2.asm | 2 +- + erasure_code/gf_6vect_mad_avx512.asm | 2 +- + erasure_code/gf_6vect_mad_sse.asm | 2 +- + erasure_code/gf_vect_dot_prod_avx.asm | 2 +- + erasure_code/gf_vect_dot_prod_avx2.asm | 2 +- + erasure_code/gf_vect_dot_prod_avx512.asm | 2 +- + erasure_code/gf_vect_dot_prod_sse.asm | 2 +- + erasure_code/gf_vect_mad_avx.asm | 2 +- + erasure_code/gf_vect_mad_avx2.asm | 2 +- + erasure_code/gf_vect_mad_avx512.asm | 2 +- + erasure_code/gf_vect_mad_sse.asm | 2 +- + erasure_code/gf_vect_mul_avx.asm | 2 +- + erasure_code/gf_vect_mul_sse.asm | 2 +- + igzip/adler32_avx2_4.asm | 5 +- + igzip/adler32_sse.asm | 6 ++- + igzip/encode_df_04.asm | 3 ++ + igzip/encode_df_06.asm | 3 ++ + igzip/igzip_body.asm | 5 ++ + igzip/igzip_decode_block_stateless.asm | 4 ++ + igzip/igzip_deflate_hash.asm | 4 ++ + igzip/igzip_finish.asm | 5 ++ + igzip/igzip_gen_icf_map_lh1_04.asm | 4 ++ + igzip/igzip_gen_icf_map_lh1_06.asm | 4 ++ + igzip/igzip_icf_body_h1_gr_bt.asm | 4 ++ + igzip/igzip_icf_finish.asm | 4 ++ + igzip/igzip_set_long_icf_fg_04.asm | 4 ++ + igzip/igzip_set_long_icf_fg_06.asm | 4 ++ + igzip/igzip_update_histogram.asm | 4 ++ + igzip/proc_heap.asm | 4 ++ + igzip/rfc1951_lookup.asm | 2 +- + include/multibinary.asm | 2 +- + include/reg_sizes.asm | 66 ++++++++++++++++------- + mem/mem_zero_detect_avx.asm | 2 +- + mem/mem_zero_detect_sse.asm | 2 +- + raid/pq_check_sse.asm | 2 +- + raid/pq_check_sse_i32.asm | 2 +- + raid/pq_gen_avx.asm | 2 +- + raid/pq_gen_avx2.asm | 2 +- + raid/pq_gen_avx512.asm | 2 +- + raid/pq_gen_sse.asm | 2 +- + raid/pq_gen_sse_i32.asm | 2 +- + raid/raid_multibinary.asm | 4 +- + raid/xor_check_sse.asm | 2 +- + raid/xor_gen_avx.asm | 2 +- + raid/xor_gen_avx512.asm | 2 +- + raid/xor_gen_sse.asm | 2 +- + 107 files changed, 206 insertions(+), 113 deletions(-) + +diff --git a/src/isa-l/crc/crc16_t10dif_01.asm b/src/isa-l/crc/crc16_t10dif_01.asm +index f79cd3f..33f4555 100644 +--- a/src/isa-l/crc/crc16_t10dif_01.asm ++++ b/src/isa-l/crc/crc16_t10dif_01.asm +@@ -73,7 +73,7 @@ section .text + %endif + + align 16 +-global crc16_t10dif_01:ISAL_SYM_TYPE_FUNCTION ++mk_global crc16_t10dif_01, function + crc16_t10dif_01: + + ; adjust the 16-bit initial_crc value, scale it to 32 bits +diff --git a/src/isa-l/crc/crc16_t10dif_02.asm b/src/isa-l/crc/crc16_t10dif_02.asm +index 97fa5e4..157ac53 100644 +--- a/src/isa-l/crc/crc16_t10dif_02.asm ++++ b/src/isa-l/crc/crc16_t10dif_02.asm +@@ -73,7 +73,7 @@ section .text + %endif + + align 16 +-global crc16_t10dif_02:ISAL_SYM_TYPE_FUNCTION ++mk_global crc16_t10dif_02, function + crc16_t10dif_02: + + ; adjust the 16-bit initial_crc value, scale it to 32 bits +diff --git a/src/isa-l/crc/crc16_t10dif_by16_10.asm b/src/isa-l/crc/crc16_t10dif_by16_10.asm +index 9f39307..479b635 100644 +--- a/src/isa-l/crc/crc16_t10dif_by16_10.asm ++++ b/src/isa-l/crc/crc16_t10dif_by16_10.asm +@@ -82,7 +82,7 @@ section .text + %endif + + align 16 +-global FUNCTION_NAME:ISAL_SYM_TYPE_FUNCTION ++mk_global FUNCTION_NAME, function + FUNCTION_NAME: + + ; adjust the 16-bit initial_crc value, scale it to 32 bits +diff --git a/src/isa-l/crc/crc16_t10dif_by4.asm b/src/isa-l/crc/crc16_t10dif_by4.asm +index 722ed95..bde071a 100644 +--- a/src/isa-l/crc/crc16_t10dif_by4.asm ++++ b/src/isa-l/crc/crc16_t10dif_by4.asm +@@ -66,7 +66,7 @@ section .text + %endif + + align 16 +-global crc16_t10dif_by4:ISAL_SYM_TYPE_FUNCTION ++mk_global crc16_t10dif_by4, function + crc16_t10dif_by4: + + ; adjust the 16-bit initial_crc value, scale it to 32 bits +diff --git a/src/isa-l/crc/crc16_t10dif_copy_by4.asm b/src/isa-l/crc/crc16_t10dif_copy_by4.asm +index fd9b754..0f82d69 100644 +--- a/src/isa-l/crc/crc16_t10dif_copy_by4.asm ++++ b/src/isa-l/crc/crc16_t10dif_copy_by4.asm +@@ -69,7 +69,7 @@ section .text + %endif + + align 16 +-global crc16_t10dif_copy_by4:ISAL_SYM_TYPE_FUNCTION ++mk_global crc16_t10dif_copy_by4, function + crc16_t10dif_copy_by4: + + ; adjust the 16-bit initial_crc value, scale it to 32 bits +diff --git a/src/isa-l/crc/crc16_t10dif_copy_by4_02.asm b/src/isa-l/crc/crc16_t10dif_copy_by4_02.asm +index e12d81f..1a7338f 100644 +--- a/src/isa-l/crc/crc16_t10dif_copy_by4_02.asm ++++ b/src/isa-l/crc/crc16_t10dif_copy_by4_02.asm +@@ -69,7 +69,7 @@ section .text + %endif + + align 16 +-global crc16_t10dif_copy_by4_02:ISAL_SYM_TYPE_FUNCTION ++mk_global crc16_t10dif_copy_by4_02, function + crc16_t10dif_copy_by4_02: + + ; adjust the 16-bit initial_crc value, scale it to 32 bits +diff --git a/src/isa-l/crc/crc32_gzip_refl_by16_10.asm b/src/isa-l/crc/crc32_gzip_refl_by16_10.asm +index 40236f6..69cb366 100644 +--- a/src/isa-l/crc/crc32_gzip_refl_by16_10.asm ++++ b/src/isa-l/crc/crc32_gzip_refl_by16_10.asm +@@ -92,7 +92,7 @@ section .text + %endif + + align 16 +-global FUNCTION_NAME:ISAL_SYM_TYPE_FUNCTION ++mk_global FUNCTION_NAME, function + FUNCTION_NAME: + + not arg1_low32 +diff --git a/src/isa-l/crc/crc32_gzip_refl_by8.asm b/src/isa-l/crc/crc32_gzip_refl_by8.asm +index 62f7e7d..780ae35 100644 +--- a/src/isa-l/crc/crc32_gzip_refl_by8.asm ++++ b/src/isa-l/crc/crc32_gzip_refl_by8.asm +@@ -86,7 +86,7 @@ section .text + %endif + + align 16 +-global crc32_gzip_refl_by8:ISAL_SYM_TYPE_FUNCTION ++mk_global crc32_gzip_refl_by8, function + crc32_gzip_refl_by8: + + ; unsigned long c = crc ^ 0xffffffffL; +diff --git a/src/isa-l/crc/crc32_gzip_refl_by8_02.asm b/src/isa-l/crc/crc32_gzip_refl_by8_02.asm +index 80d849e..bba5ae6 100644 +--- a/src/isa-l/crc/crc32_gzip_refl_by8_02.asm ++++ b/src/isa-l/crc/crc32_gzip_refl_by8_02.asm +@@ -86,7 +86,7 @@ section .text + %endif + + align 16 +-global crc32_gzip_refl_by8_02:ISAL_SYM_TYPE_FUNCTION ++mk_global crc32_gzip_refl_by8_02, function + crc32_gzip_refl_by8_02: + not arg1_low32 + sub rsp, VARIABLE_OFFSET +diff --git a/src/isa-l/crc/crc32_ieee_01.asm b/src/isa-l/crc/crc32_ieee_01.asm +index 32495ed..5b9d465 100644 +--- a/src/isa-l/crc/crc32_ieee_01.asm ++++ b/src/isa-l/crc/crc32_ieee_01.asm +@@ -72,7 +72,7 @@ section .text + %define VARIABLE_OFFSET 16*2+8 + %endif + align 16 +-global crc32_ieee_01:ISAL_SYM_TYPE_FUNCTION ++mk_global crc32_ieee_01, function + crc32_ieee_01: + + not arg1_low32 ;~init_crc +diff --git a/src/isa-l/crc/crc32_ieee_02.asm b/src/isa-l/crc/crc32_ieee_02.asm +index 8a472b0..411e923 100644 +--- a/src/isa-l/crc/crc32_ieee_02.asm ++++ b/src/isa-l/crc/crc32_ieee_02.asm +@@ -72,7 +72,7 @@ section .text + %define VARIABLE_OFFSET 16*2+8 + %endif + align 16 +-global crc32_ieee_02:ISAL_SYM_TYPE_FUNCTION ++mk_global crc32_ieee_02, function + crc32_ieee_02: + + not arg1_low32 ;~init_crc +diff --git a/src/isa-l/crc/crc32_ieee_by16_10.asm b/src/isa-l/crc/crc32_ieee_by16_10.asm +index 200fd93..c6aa741 100644 +--- a/src/isa-l/crc/crc32_ieee_by16_10.asm ++++ b/src/isa-l/crc/crc32_ieee_by16_10.asm +@@ -82,7 +82,7 @@ section .text + %endif + + align 16 +-global FUNCTION_NAME:ISAL_SYM_TYPE_FUNCTION ++mk_global FUNCTION_NAME, function + FUNCTION_NAME: + + not arg1_low32 +diff --git a/src/isa-l/crc/crc32_ieee_by4.asm b/src/isa-l/crc/crc32_ieee_by4.asm +index 39bed5a..2ce2289 100644 +--- a/src/isa-l/crc/crc32_ieee_by4.asm ++++ b/src/isa-l/crc/crc32_ieee_by4.asm +@@ -74,7 +74,7 @@ section .text + %endif + + align 16 +-global crc32_ieee_by4:ISAL_SYM_TYPE_FUNCTION ++mk_global crc32_ieee_by4, function + crc32_ieee_by4: + + not arg1_low32 +diff --git a/src/isa-l/crc/crc32_iscsi_00.asm b/src/isa-l/crc/crc32_iscsi_00.asm +index 4f81e3a..e1ad903 100644 +--- a/src/isa-l/crc/crc32_iscsi_00.asm ++++ b/src/isa-l/crc/crc32_iscsi_00.asm +@@ -153,7 +153,7 @@ default rel + ;;; crc_init = r8 + ;;; + +-global crc32_iscsi_00:ISAL_SYM_TYPE_FUNCTION ++mk_global crc32_iscsi_00, function + crc32_iscsi_00: + + %ifidn __OUTPUT_FORMAT__, elf64 +diff --git a/src/isa-l/crc/crc32_iscsi_01.asm b/src/isa-l/crc/crc32_iscsi_01.asm +index 2a81517..30adb04 100644 +--- a/src/isa-l/crc/crc32_iscsi_01.asm ++++ b/src/isa-l/crc/crc32_iscsi_01.asm +@@ -50,7 +50,7 @@ default rel + ;;; len = rdx + ;;; crc_init = r8 + +-global crc32_iscsi_01:ISAL_SYM_TYPE_FUNCTION ++mk_global crc32_iscsi_01, function + crc32_iscsi_01: + + %ifidn __OUTPUT_FORMAT__, elf64 +diff --git a/src/isa-l/crc/crc64_ecma_norm_by8.asm b/src/isa-l/crc/crc64_ecma_norm_by8.asm +index 6770e34..5599d98 100644 +--- a/src/isa-l/crc/crc64_ecma_norm_by8.asm ++++ b/src/isa-l/crc/crc64_ecma_norm_by8.asm +@@ -62,7 +62,7 @@ section .text + %define VARIABLE_OFFSET 16*2+8 + %endif + align 16 +-global crc64_ecma_norm_by8:ISAL_SYM_TYPE_FUNCTION ++mk_global crc64_ecma_norm_by8, function + crc64_ecma_norm_by8: + + not arg1 ;~init_crc +diff --git a/src/isa-l/crc/crc64_ecma_refl_by8.asm b/src/isa-l/crc/crc64_ecma_refl_by8.asm +index e6518f4..b641934 100644 +--- a/src/isa-l/crc/crc64_ecma_refl_by8.asm ++++ b/src/isa-l/crc/crc64_ecma_refl_by8.asm +@@ -68,7 +68,7 @@ section .text + + + align 16 +-global crc64_ecma_refl_by8:ISAL_SYM_TYPE_FUNCTION ++mk_global crc64_ecma_refl_by8, function + crc64_ecma_refl_by8: + ; uint64_t c = crc ^ 0xffffffff,ffffffffL; + not arg1 +diff --git a/src/isa-l/crc/crc64_iso_norm_by16_10.asm b/src/isa-l/crc/crc64_iso_norm_by16_10.asm +index c9f38b3..28630a1 100644 +--- a/src/isa-l/crc/crc64_iso_norm_by16_10.asm ++++ b/src/isa-l/crc/crc64_iso_norm_by16_10.asm +@@ -69,7 +69,7 @@ section .text + %endif + + align 16 +-global FUNCTION_NAME:ISAL_SYM_TYPE_FUNCTION ++mk_global FUNCTION_NAME, function + FUNCTION_NAME: + not arg1 + sub rsp, VARIABLE_OFFSET +diff --git a/src/isa-l/crc/crc64_iso_norm_by8.asm b/src/isa-l/crc/crc64_iso_norm_by8.asm +index 9bc38ec..887fca8 100644 +--- a/src/isa-l/crc/crc64_iso_norm_by8.asm ++++ b/src/isa-l/crc/crc64_iso_norm_by8.asm +@@ -61,7 +61,7 @@ section .text + %define VARIABLE_OFFSET 16*2+8 + %endif + align 16 +-global crc64_iso_norm_by8:ISAL_SYM_TYPE_FUNCTION ++mk_global crc64_iso_norm_by8, function + crc64_iso_norm_by8: + + not arg1 ;~init_crc +diff --git a/src/isa-l/crc/crc64_iso_refl_by16_10.asm b/src/isa-l/crc/crc64_iso_refl_by16_10.asm +index 0ee9b93..d58ac0a 100644 +--- a/src/isa-l/crc/crc64_iso_refl_by16_10.asm ++++ b/src/isa-l/crc/crc64_iso_refl_by16_10.asm +@@ -70,7 +70,7 @@ section .text + %endif + + align 16 +-global FUNCTION_NAME:ISAL_SYM_TYPE_FUNCTION ++mk_global FUNCTION_NAME, function + FUNCTION_NAME: + not arg1 + sub rsp, VARIABLE_OFFSET +diff --git a/src/isa-l/crc/crc64_iso_refl_by8.asm b/src/isa-l/crc/crc64_iso_refl_by8.asm +index 564a510..3abc5da 100644 +--- a/src/isa-l/crc/crc64_iso_refl_by8.asm ++++ b/src/isa-l/crc/crc64_iso_refl_by8.asm +@@ -65,7 +65,7 @@ section .text + + + align 16 +-global crc64_iso_refl_by8:ISAL_SYM_TYPE_FUNCTION ++mk_global crc64_iso_refl_by8, function + crc64_iso_refl_by8: + ; uint64_t c = crc ^ 0xffffffff,ffffffffL; + not arg1 +diff --git a/src/isa-l/crc/crc64_jones_norm_by8.asm b/src/isa-l/crc/crc64_jones_norm_by8.asm +index 44ad726..bc3b521 100644 +--- a/src/isa-l/crc/crc64_jones_norm_by8.asm ++++ b/src/isa-l/crc/crc64_jones_norm_by8.asm +@@ -61,7 +61,7 @@ section .text + %define VARIABLE_OFFSET 16*2+8 + %endif + align 16 +-global crc64_jones_norm_by8:ISAL_SYM_TYPE_FUNCTION ++mk_global crc64_jones_norm_by8, function + crc64_jones_norm_by8: + + not arg1 ;~init_crc +diff --git a/src/isa-l/crc/crc64_jones_refl_by8.asm b/src/isa-l/crc/crc64_jones_refl_by8.asm +index 7081f54..a9ea19a 100644 +--- a/src/isa-l/crc/crc64_jones_refl_by8.asm ++++ b/src/isa-l/crc/crc64_jones_refl_by8.asm +@@ -65,7 +65,7 @@ section .text + + + align 16 +-global crc64_jones_refl_by8:ISAL_SYM_TYPE_FUNCTION ++mk_global crc64_jones_refl_by8, function + crc64_jones_refl_by8: + ; uint64_t c = crc ^ 0xffffffff,ffffffffL; + not arg1 +diff --git a/src/isa-l/crc/crc_multibinary.asm b/src/isa-l/crc/crc_multibinary.asm +index 9628b16..b1f425a 100644 +--- a/src/isa-l/crc/crc_multibinary.asm ++++ b/src/isa-l/crc/crc_multibinary.asm +@@ -79,7 +79,7 @@ section .text + ;;;; + ; crc32_iscsi multibinary function + ;;;; +-global crc32_iscsi:ISAL_SYM_TYPE_FUNCTION ++mk_global crc32_iscsi, function + crc32_iscsi_mbinit: + call crc32_iscsi_dispatch_init + crc32_iscsi: +@@ -113,7 +113,7 @@ crc32_iscsi_dispatch_init: + ;;;; + ; crc32_ieee multibinary function + ;;;; +-global crc32_ieee:ISAL_SYM_TYPE_FUNCTION ++mk_global crc32_ieee, function + crc32_ieee_mbinit: + call crc32_ieee_dispatch_init + crc32_ieee: +@@ -192,7 +192,7 @@ crc32_ieee_dispatch_init: + ;;;; + ; crc16_t10dif multibinary function + ;;;; +-global crc16_t10dif:ISAL_SYM_TYPE_FUNCTION ++mk_global crc16_t10dif, function + crc16_t10dif_mbinit: + call crc16_t10dif_dispatch_init + crc16_t10dif: +diff --git a/src/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm +index 99bc95a..f512d7d 100644 +--- a/src/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm ++++ b/src/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm +@@ -238,7 +238,7 @@ section .text + %endif + + align 16 +-global gf_2vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_2vect_dot_prod_avx, function + + func(gf_2vect_dot_prod_avx) + FUNC_SAVE +diff --git a/src/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm b/src/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm +index db37b0e..ba704d0 100644 +--- a/src/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm ++++ b/src/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm +@@ -248,7 +248,7 @@ section .text + %endif + + align 16 +-global gf_2vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_2vect_dot_prod_avx2, function + + func(gf_2vect_dot_prod_avx2) + FUNC_SAVE +diff --git a/src/isa-l/erasure_code/gf_2vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_2vect_dot_prod_avx512.asm +index 470051d..2444216 100644 +--- a/src/isa-l/erasure_code/gf_2vect_dot_prod_avx512.asm ++++ b/src/isa-l/erasure_code/gf_2vect_dot_prod_avx512.asm +@@ -160,7 +160,7 @@ default rel + section .text + + align 16 +-global gf_2vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_2vect_dot_prod_avx512, function + func(gf_2vect_dot_prod_avx512) + FUNC_SAVE + sub len, 64 +diff --git a/src/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm b/src/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm +index 05a0c28..7e1006b 100644 +--- a/src/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm ++++ b/src/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm +@@ -238,7 +238,7 @@ section .text + %endif + + align 16 +-global gf_2vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_2vect_dot_prod_sse, function + + func(gf_2vect_dot_prod_sse) + FUNC_SAVE +diff --git a/src/isa-l/erasure_code/gf_2vect_mad_avx.asm b/src/isa-l/erasure_code/gf_2vect_mad_avx.asm +index fcf3a75..65af8b0 100644 +--- a/src/isa-l/erasure_code/gf_2vect_mad_avx.asm ++++ b/src/isa-l/erasure_code/gf_2vect_mad_avx.asm +@@ -155,7 +155,7 @@ section .text + + + align 16 +-global gf_2vect_mad_avx:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_2vect_mad_avx, function + + func(gf_2vect_mad_avx) + FUNC_SAVE +diff --git a/src/isa-l/erasure_code/gf_2vect_mad_avx2.asm b/src/isa-l/erasure_code/gf_2vect_mad_avx2.asm +index 0e77ebe..f4c1cae 100644 +--- a/src/isa-l/erasure_code/gf_2vect_mad_avx2.asm ++++ b/src/isa-l/erasure_code/gf_2vect_mad_avx2.asm +@@ -163,7 +163,7 @@ section .text + %define xtmpd2 ymm9 + + align 16 +-global gf_2vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_2vect_mad_avx2, function + + func(gf_2vect_mad_avx2) + FUNC_SAVE +diff --git a/src/isa-l/erasure_code/gf_2vect_mad_avx512.asm b/src/isa-l/erasure_code/gf_2vect_mad_avx512.asm +index 6d972bb..5a35a89 100644 +--- a/src/isa-l/erasure_code/gf_2vect_mad_avx512.asm ++++ b/src/isa-l/erasure_code/gf_2vect_mad_avx512.asm +@@ -149,7 +149,7 @@ section .text + %define xmask0f zmm14 + + align 16 +-global gf_2vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_2vect_mad_avx512, function + func(gf_2vect_mad_avx512) + FUNC_SAVE + sub len, 64 +diff --git a/src/isa-l/erasure_code/gf_2vect_mad_sse.asm b/src/isa-l/erasure_code/gf_2vect_mad_sse.asm +index 7ee1b24..c85b431 100644 +--- a/src/isa-l/erasure_code/gf_2vect_mad_sse.asm ++++ b/src/isa-l/erasure_code/gf_2vect_mad_sse.asm +@@ -154,7 +154,7 @@ section .text + + + align 16 +-global gf_2vect_mad_sse:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_2vect_mad_sse, function + func(gf_2vect_mad_sse) + FUNC_SAVE + sub len, 16 +diff --git a/src/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm +index a8b46e8..deb44d0 100644 +--- a/src/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm ++++ b/src/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm +@@ -261,7 +261,7 @@ section .text + %endif + + align 16 +-global gf_3vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_3vect_dot_prod_avx, function + func(gf_3vect_dot_prod_avx) + FUNC_SAVE + SLDR len, len_m +diff --git a/src/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm b/src/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm +index 38dddcf..fa55dd6 100644 +--- a/src/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm ++++ b/src/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm +@@ -269,7 +269,7 @@ section .text + %endif + + align 16 +-global gf_3vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_3vect_dot_prod_avx2, function + func(gf_3vect_dot_prod_avx2) + FUNC_SAVE + SLDR len, len_m +diff --git a/src/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm +index 057cd37..eecde81 100644 +--- a/src/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm ++++ b/src/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm +@@ -173,7 +173,7 @@ default rel + section .text + + align 16 +-global gf_3vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_3vect_dot_prod_avx512, function + func(gf_3vect_dot_prod_avx512) + FUNC_SAVE + sub len, 64 +diff --git a/src/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm b/src/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm +index da0bdf9..2b13e71 100644 +--- a/src/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm ++++ b/src/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm +@@ -261,7 +261,7 @@ section .text + %endif + + align 16 +-global gf_3vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_3vect_dot_prod_sse, function + func(gf_3vect_dot_prod_sse) + FUNC_SAVE + SLDR len, len_m +diff --git a/src/isa-l/erasure_code/gf_3vect_mad_avx.asm b/src/isa-l/erasure_code/gf_3vect_mad_avx.asm +index 1f40eb7..4aea710 100644 +--- a/src/isa-l/erasure_code/gf_3vect_mad_avx.asm ++++ b/src/isa-l/erasure_code/gf_3vect_mad_avx.asm +@@ -158,7 +158,7 @@ section .text + %define xd3 xtmph1 + + align 16 +-global gf_3vect_mad_avx:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_3vect_mad_avx, function + func(gf_3vect_mad_avx) + FUNC_SAVE + sub len, 16 +diff --git a/src/isa-l/erasure_code/gf_3vect_mad_avx2.asm b/src/isa-l/erasure_code/gf_3vect_mad_avx2.asm +index 0b36661..e8071dd 100644 +--- a/src/isa-l/erasure_code/gf_3vect_mad_avx2.asm ++++ b/src/isa-l/erasure_code/gf_3vect_mad_avx2.asm +@@ -165,7 +165,7 @@ section .text + %define xd3 ymm10 + + align 16 +-global gf_3vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_3vect_mad_avx2, function + func(gf_3vect_mad_avx2) + FUNC_SAVE + sub len, 32 +diff --git a/src/isa-l/erasure_code/gf_3vect_mad_avx512.asm b/src/isa-l/erasure_code/gf_3vect_mad_avx512.asm +index dcafbc7..b8b8d9b 100644 +--- a/src/isa-l/erasure_code/gf_3vect_mad_avx512.asm ++++ b/src/isa-l/erasure_code/gf_3vect_mad_avx512.asm +@@ -152,7 +152,7 @@ section .text + %define xmask0f zmm17 + + align 16 +-global gf_3vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_3vect_mad_avx512, function + func(gf_3vect_mad_avx512) + FUNC_SAVE + sub len, 64 +diff --git a/src/isa-l/erasure_code/gf_3vect_mad_sse.asm b/src/isa-l/erasure_code/gf_3vect_mad_sse.asm +index 0d9028b..10744ec 100644 +--- a/src/isa-l/erasure_code/gf_3vect_mad_sse.asm ++++ b/src/isa-l/erasure_code/gf_3vect_mad_sse.asm +@@ -156,7 +156,7 @@ section .text + %define xd3 xtmph1 + + align 16 +-global gf_3vect_mad_sse:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_3vect_mad_sse, function + func(gf_3vect_mad_sse) + FUNC_SAVE + sub len, 16 +diff --git a/src/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm +index fbb58cc..f436048 100644 +--- a/src/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm ++++ b/src/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm +@@ -294,7 +294,7 @@ section .text + %define xp4 xmm5 + %endif + align 16 +-global gf_4vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_4vect_dot_prod_avx, function + func(gf_4vect_dot_prod_avx) + FUNC_SAVE + SLDR len, len_m +diff --git a/src/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm b/src/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm +index 181a18d..0c7ae4e 100644 +--- a/src/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm ++++ b/src/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm +@@ -302,7 +302,7 @@ section .text + %define xp4 ymm5 + %endif + align 16 +-global gf_4vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_4vect_dot_prod_avx2, function + func(gf_4vect_dot_prod_avx2) + FUNC_SAVE + SLDR len, len_m +diff --git a/src/isa-l/erasure_code/gf_4vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_4vect_dot_prod_avx512.asm +index 9288678..6d67426 100644 +--- a/src/isa-l/erasure_code/gf_4vect_dot_prod_avx512.asm ++++ b/src/isa-l/erasure_code/gf_4vect_dot_prod_avx512.asm +@@ -191,7 +191,7 @@ default rel + section .text + + align 16 +-global gf_4vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_4vect_dot_prod_avx512, function + func(gf_4vect_dot_prod_avx512) + FUNC_SAVE + sub len, 64 +diff --git a/src/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm b/src/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm +index b329624..25134c7 100644 +--- a/src/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm ++++ b/src/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm +@@ -294,7 +294,7 @@ section .text + %define xp4 xmm5 + %endif + align 16 +-global gf_4vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_4vect_dot_prod_sse, function + func(gf_4vect_dot_prod_sse) + FUNC_SAVE + SLDR len, len_m +diff --git a/src/isa-l/erasure_code/gf_4vect_mad_avx.asm b/src/isa-l/erasure_code/gf_4vect_mad_avx.asm +index 62441c1..284c76b 100644 +--- a/src/isa-l/erasure_code/gf_4vect_mad_avx.asm ++++ b/src/isa-l/erasure_code/gf_4vect_mad_avx.asm +@@ -169,7 +169,7 @@ section .text + %define xd4 xtmpl1 + + align 16 +-global gf_4vect_mad_avx:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_4vect_mad_avx, function + func(gf_4vect_mad_avx) + FUNC_SAVE + sub len, 16 +diff --git a/src/isa-l/erasure_code/gf_4vect_mad_avx2.asm b/src/isa-l/erasure_code/gf_4vect_mad_avx2.asm +index 9a7b7d9..bf6cc7e 100644 +--- a/src/isa-l/erasure_code/gf_4vect_mad_avx2.asm ++++ b/src/isa-l/erasure_code/gf_4vect_mad_avx2.asm +@@ -165,7 +165,7 @@ section .text + %define xd4 ymm10 + + align 16 +-global gf_4vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_4vect_mad_avx2, function + func(gf_4vect_mad_avx2) + FUNC_SAVE + sub len, 32 +diff --git a/src/isa-l/erasure_code/gf_4vect_mad_avx512.asm b/src/isa-l/erasure_code/gf_4vect_mad_avx512.asm +index bc836af..3948ab1 100644 +--- a/src/isa-l/erasure_code/gf_4vect_mad_avx512.asm ++++ b/src/isa-l/erasure_code/gf_4vect_mad_avx512.asm +@@ -159,7 +159,7 @@ section .text + %define xtmpl5 zmm23 + + align 16 +-global gf_4vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_4vect_mad_avx512, function + func(gf_4vect_mad_avx512) + FUNC_SAVE + sub len, 64 +diff --git a/src/isa-l/erasure_code/gf_4vect_mad_sse.asm b/src/isa-l/erasure_code/gf_4vect_mad_sse.asm +index c3d4c5d..377b31f 100644 +--- a/src/isa-l/erasure_code/gf_4vect_mad_sse.asm ++++ b/src/isa-l/erasure_code/gf_4vect_mad_sse.asm +@@ -168,7 +168,7 @@ section .text + %define xd4 xtmpl1 + + align 16 +-global gf_4vect_mad_sse:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_4vect_mad_sse, function + func(gf_4vect_mad_sse) + FUNC_SAVE + sub len, 16 +diff --git a/src/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm +index d955fc4..3226dde 100644 +--- a/src/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm ++++ b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm +@@ -184,7 +184,7 @@ section .text + %define xp5 xmm6 + + align 16 +-global gf_5vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_5vect_dot_prod_avx, function + func(gf_5vect_dot_prod_avx) + FUNC_SAVE + sub len, 16 +diff --git a/src/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm +index dfafd8a..4bee087 100644 +--- a/src/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm ++++ b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm +@@ -189,7 +189,7 @@ section .text + %define xp5 ymm6 + + align 16 +-global gf_5vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_5vect_dot_prod_avx2, function + func(gf_5vect_dot_prod_avx2) + FUNC_SAVE + sub len, 32 +diff --git a/src/isa-l/erasure_code/gf_5vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx512.asm +index 41e266b..e955ea5 100644 +--- a/src/isa-l/erasure_code/gf_5vect_dot_prod_avx512.asm ++++ b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx512.asm +@@ -211,7 +211,7 @@ default rel + section .text + + align 16 +-global gf_5vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_5vect_dot_prod_avx512, function + func(gf_5vect_dot_prod_avx512) + FUNC_SAVE + sub len, 64 +diff --git a/src/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm b/src/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm +index 59b0ac2..5ff9460 100644 +--- a/src/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm ++++ b/src/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm +@@ -184,7 +184,7 @@ section .text + %define xp5 xmm14 + + align 16 +-global gf_5vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_5vect_dot_prod_sse, function + func(gf_5vect_dot_prod_sse) + FUNC_SAVE + sub len, 16 +diff --git a/src/isa-l/erasure_code/gf_5vect_mad_avx.asm b/src/isa-l/erasure_code/gf_5vect_mad_avx.asm +index 696b6a0..ccdbc6e 100644 +--- a/src/isa-l/erasure_code/gf_5vect_mad_avx.asm ++++ b/src/isa-l/erasure_code/gf_5vect_mad_avx.asm +@@ -178,7 +178,7 @@ section .text + + + align 16 +-global gf_5vect_mad_avx:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_5vect_mad_avx, function + func(gf_5vect_mad_avx) + FUNC_SAVE + sub len, 16 +diff --git a/src/isa-l/erasure_code/gf_5vect_mad_avx2.asm b/src/isa-l/erasure_code/gf_5vect_mad_avx2.asm +index 3c65c05..ac61437 100644 +--- a/src/isa-l/erasure_code/gf_5vect_mad_avx2.asm ++++ b/src/isa-l/erasure_code/gf_5vect_mad_avx2.asm +@@ -166,7 +166,7 @@ section .text + %define xd5 ymm9 + + align 16 +-global gf_5vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_5vect_mad_avx2, function + func(gf_5vect_mad_avx2) + FUNC_SAVE + sub len, 32 +diff --git a/src/isa-l/erasure_code/gf_5vect_mad_avx512.asm b/src/isa-l/erasure_code/gf_5vect_mad_avx512.asm +index 96b498c..5de47d1 100644 +--- a/src/isa-l/erasure_code/gf_5vect_mad_avx512.asm ++++ b/src/isa-l/erasure_code/gf_5vect_mad_avx512.asm +@@ -167,7 +167,7 @@ section .text + %define xtmph5 zmm27 + + align 16 +-global gf_5vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_5vect_mad_avx512, function + func(gf_5vect_mad_avx512) + FUNC_SAVE + sub len, 64 +diff --git a/src/isa-l/erasure_code/gf_5vect_mad_sse.asm b/src/isa-l/erasure_code/gf_5vect_mad_sse.asm +index b16f405..fc99aaf 100644 +--- a/src/isa-l/erasure_code/gf_5vect_mad_sse.asm ++++ b/src/isa-l/erasure_code/gf_5vect_mad_sse.asm +@@ -177,7 +177,7 @@ section .text + + + align 16 +-global gf_5vect_mad_sse:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_5vect_mad_sse, function + func(gf_5vect_mad_sse) + FUNC_SAVE + sub len, 16 +diff --git a/src/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm +index f64e9ef..1f9df8d 100644 +--- a/src/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm ++++ b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm +@@ -182,7 +182,7 @@ section .text + %define xp6 xmm7 + + align 16 +-global gf_6vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_6vect_dot_prod_avx, function + func(gf_6vect_dot_prod_avx) + FUNC_SAVE + sub len, 16 +diff --git a/src/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm +index a57c52a..ccb4e77 100644 +--- a/src/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm ++++ b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm +@@ -187,7 +187,7 @@ section .text + %define xp6 ymm7 + + align 16 +-global gf_6vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_6vect_dot_prod_avx2, function + func(gf_6vect_dot_prod_avx2) + FUNC_SAVE + sub len, 32 +diff --git a/src/isa-l/erasure_code/gf_6vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx512.asm +index b2bd002..6ebfd26 100644 +--- a/src/isa-l/erasure_code/gf_6vect_dot_prod_avx512.asm ++++ b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx512.asm +@@ -215,7 +215,7 @@ default rel + section .text + + align 16 +-global gf_6vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_6vect_dot_prod_avx512, function + func(gf_6vect_dot_prod_avx512) + FUNC_SAVE + sub len, 64 +diff --git a/src/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm b/src/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm +index b628811..51bd116 100644 +--- a/src/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm ++++ b/src/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm +@@ -182,7 +182,7 @@ section .text + %define xp6 xmm13 + + align 16 +-global gf_6vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_6vect_dot_prod_sse, function + func(gf_6vect_dot_prod_sse) + FUNC_SAVE + sub len, 16 +diff --git a/src/isa-l/erasure_code/gf_6vect_mad_avx.asm b/src/isa-l/erasure_code/gf_6vect_mad_avx.asm +index f2e04cd..4e20dbb 100644 +--- a/src/isa-l/erasure_code/gf_6vect_mad_avx.asm ++++ b/src/isa-l/erasure_code/gf_6vect_mad_avx.asm +@@ -184,7 +184,7 @@ section .text + + + align 16 +-global gf_6vect_mad_avx:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_6vect_mad_avx, function + func(gf_6vect_mad_avx) + FUNC_SAVE + sub len, 16 +diff --git a/src/isa-l/erasure_code/gf_6vect_mad_avx2.asm b/src/isa-l/erasure_code/gf_6vect_mad_avx2.asm +index b344532..45d750e 100644 +--- a/src/isa-l/erasure_code/gf_6vect_mad_avx2.asm ++++ b/src/isa-l/erasure_code/gf_6vect_mad_avx2.asm +@@ -177,7 +177,7 @@ section .text + %define xd6 xd1 + + align 16 +-global gf_6vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_6vect_mad_avx2, function + func(gf_6vect_mad_avx2) + FUNC_SAVE + sub len, 32 +diff --git a/src/isa-l/erasure_code/gf_6vect_mad_avx512.asm b/src/isa-l/erasure_code/gf_6vect_mad_avx512.asm +index 5f31bf1..6ae11f3 100644 +--- a/src/isa-l/erasure_code/gf_6vect_mad_avx512.asm ++++ b/src/isa-l/erasure_code/gf_6vect_mad_avx512.asm +@@ -181,7 +181,7 @@ section .text + %define xtmph6 zmm31 + + align 16 +-global gf_6vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_6vect_mad_avx512, function + func(gf_6vect_mad_avx512) + FUNC_SAVE + sub len, 64 +diff --git a/src/isa-l/erasure_code/gf_6vect_mad_sse.asm b/src/isa-l/erasure_code/gf_6vect_mad_sse.asm +index 4fed2aa..695fd6b 100644 +--- a/src/isa-l/erasure_code/gf_6vect_mad_sse.asm ++++ b/src/isa-l/erasure_code/gf_6vect_mad_sse.asm +@@ -185,7 +185,7 @@ section .text + + + align 16 +-global gf_6vect_mad_sse:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_6vect_mad_sse, function + func(gf_6vect_mad_sse) + FUNC_SAVE + sub len, 16 +diff --git a/src/isa-l/erasure_code/gf_vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_vect_dot_prod_avx.asm +index c123a3d..179e985 100644 +--- a/src/isa-l/erasure_code/gf_vect_dot_prod_avx.asm ++++ b/src/isa-l/erasure_code/gf_vect_dot_prod_avx.asm +@@ -194,7 +194,7 @@ section .text + %define xp xmm2 + + align 16 +-global gf_vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_vect_dot_prod_avx, function + func(gf_vect_dot_prod_avx) + FUNC_SAVE + SLDR len, len_m +diff --git a/src/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm b/src/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm +index f84dd47..2cfa0f0 100644 +--- a/src/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm ++++ b/src/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm +@@ -202,7 +202,7 @@ section .text + %define xp ymm2 + + align 16 +-global gf_vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_vect_dot_prod_avx2, function + func(gf_vect_dot_prod_avx2) + FUNC_SAVE + SLDR len, len_m +diff --git a/src/isa-l/erasure_code/gf_vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_vect_dot_prod_avx512.asm +index ad01fcf..203e95d 100644 +--- a/src/isa-l/erasure_code/gf_vect_dot_prod_avx512.asm ++++ b/src/isa-l/erasure_code/gf_vect_dot_prod_avx512.asm +@@ -128,7 +128,7 @@ default rel + section .text + + align 16 +-global gf_vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_vect_dot_prod_avx512, function + func(gf_vect_dot_prod_avx512) + FUNC_SAVE + xor pos, pos +diff --git a/src/isa-l/erasure_code/gf_vect_dot_prod_sse.asm b/src/isa-l/erasure_code/gf_vect_dot_prod_sse.asm +index 108fa36..602bd89 100644 +--- a/src/isa-l/erasure_code/gf_vect_dot_prod_sse.asm ++++ b/src/isa-l/erasure_code/gf_vect_dot_prod_sse.asm +@@ -194,7 +194,7 @@ section .text + %define xp xmm2 + + align 16 +-global gf_vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_vect_dot_prod_sse, function + func(gf_vect_dot_prod_sse) + FUNC_SAVE + SLDR len, len_m +diff --git a/src/isa-l/erasure_code/gf_vect_mad_avx.asm b/src/isa-l/erasure_code/gf_vect_mad_avx.asm +index f444d11..2b0e623 100644 +--- a/src/isa-l/erasure_code/gf_vect_mad_avx.asm ++++ b/src/isa-l/erasure_code/gf_vect_mad_avx.asm +@@ -131,7 +131,7 @@ section .text + %define xtmpd xmm5 + + align 16 +-global gf_vect_mad_avx:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_vect_mad_avx, function + func(gf_vect_mad_avx) + FUNC_SAVE + sub len, 16 +diff --git a/src/isa-l/erasure_code/gf_vect_mad_avx2.asm b/src/isa-l/erasure_code/gf_vect_mad_avx2.asm +index b65d0aa..9941fca 100644 +--- a/src/isa-l/erasure_code/gf_vect_mad_avx2.asm ++++ b/src/isa-l/erasure_code/gf_vect_mad_avx2.asm +@@ -139,7 +139,7 @@ section .text + %define xtmpd ymm5 + + align 16 +-global gf_vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_vect_mad_avx2, function + func(gf_vect_mad_avx2) + FUNC_SAVE + sub len, 32 +diff --git a/src/isa-l/erasure_code/gf_vect_mad_avx512.asm b/src/isa-l/erasure_code/gf_vect_mad_avx512.asm +index 44fb653..931e0cc 100644 +--- a/src/isa-l/erasure_code/gf_vect_mad_avx512.asm ++++ b/src/isa-l/erasure_code/gf_vect_mad_avx512.asm +@@ -127,7 +127,7 @@ section .text + %define xmask0f zmm8 + + align 16 +-global gf_vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_vect_mad_avx512, function + func(gf_vect_mad_avx512) + FUNC_SAVE + sub len, 64 +diff --git a/src/isa-l/erasure_code/gf_vect_mad_sse.asm b/src/isa-l/erasure_code/gf_vect_mad_sse.asm +index 8d7e5ee..1ea69fe 100644 +--- a/src/isa-l/erasure_code/gf_vect_mad_sse.asm ++++ b/src/isa-l/erasure_code/gf_vect_mad_sse.asm +@@ -131,7 +131,7 @@ section .text + + + align 16 +-global gf_vect_mad_sse:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_vect_mad_sse, function + func(gf_vect_mad_sse) + FUNC_SAVE + sub len, 16 +diff --git a/src/isa-l/erasure_code/gf_vect_mul_avx.asm b/src/isa-l/erasure_code/gf_vect_mul_avx.asm +index ec6a64f..0186bbc 100644 +--- a/src/isa-l/erasure_code/gf_vect_mul_avx.asm ++++ b/src/isa-l/erasure_code/gf_vect_mul_avx.asm +@@ -111,7 +111,7 @@ section .text + %define xtmp2c xmm7 + + align 16 +-global gf_vect_mul_avx:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_vect_mul_avx, function + func(gf_vect_mul_avx) + FUNC_SAVE + mov pos, 0 +diff --git a/src/isa-l/erasure_code/gf_vect_mul_sse.asm b/src/isa-l/erasure_code/gf_vect_mul_sse.asm +index 36323d6..bad257a 100644 +--- a/src/isa-l/erasure_code/gf_vect_mul_sse.asm ++++ b/src/isa-l/erasure_code/gf_vect_mul_sse.asm +@@ -112,7 +112,7 @@ section .text + + + align 16 +-global gf_vect_mul_sse:ISAL_SYM_TYPE_FUNCTION ++mk_global gf_vect_mul_sse, function + func(gf_vect_mul_sse) + FUNC_SAVE + mov pos, 0 +diff --git a/src/isa-l/igzip/adler32_avx2_4.asm b/src/isa-l/igzip/adler32_avx2_4.asm +index 8f9d6d5..62c62bb 100644 +--- a/src/isa-l/igzip/adler32_avx2_4.asm ++++ b/src/isa-l/igzip/adler32_avx2_4.asm +@@ -123,8 +123,11 @@ default rel + %define yshuf0 ymm6 + %define yshuf1 ymm7 + ++[bits 64] ++default rel ++section .text + +-global adler32_avx2_4:ISAL_SYM_TYPE_FUNCTION ++mk_global adler32_avx2_4, function + func(adler32_avx2_4) + FUNC_SAVE + +diff --git a/src/isa-l/igzip/adler32_sse.asm b/src/isa-l/igzip/adler32_sse.asm +index 83f577d..6aea7cb 100644 +--- a/src/isa-l/igzip/adler32_sse.asm ++++ b/src/isa-l/igzip/adler32_sse.asm +@@ -104,7 +104,11 @@ default rel + %define xdata1 xmm3 + %define xsa xmm4 + +-global adler32_sse:ISAL_SYM_TYPE_FUNCTION ++[bits 64] ++default rel ++section .text ++ ++mk_global adler32_sse, function + func(adler32_sse) + FUNC_SAVE + +diff --git a/src/isa-l/igzip/encode_df_04.asm b/src/isa-l/igzip/encode_df_04.asm +index 81287cc..2c52af8 100644 +--- a/src/isa-l/igzip/encode_df_04.asm ++++ b/src/isa-l/igzip/encode_df_04.asm +@@ -172,6 +172,9 @@ stack_size equ gpr_save_mem_size + xmm_save_mem_size + bitbuf_mem_size + + %endmacro + ++default rel ++section .text ++ + global encode_deflate_icf_ %+ ARCH + encode_deflate_icf_ %+ ARCH: + FUNC_SAVE +diff --git a/src/isa-l/igzip/encode_df_06.asm b/src/isa-l/igzip/encode_df_06.asm +index 9fa5163..aaec754 100644 +--- a/src/isa-l/igzip/encode_df_06.asm ++++ b/src/isa-l/igzip/encode_df_06.asm +@@ -185,6 +185,9 @@ stack_size equ gpr_save_mem_size + xmm_save_mem_size + bitbuf_mem_size + + %endmacro + ++default rel ++section .text ++ + global encode_deflate_icf_ %+ ARCH + encode_deflate_icf_ %+ ARCH: + FUNC_SAVE +diff --git a/src/isa-l/igzip/igzip_body.asm b/src/isa-l/igzip/igzip_body.asm +index 43de234..d69b27c 100644 +--- a/src/isa-l/igzip/igzip_body.asm ++++ b/src/isa-l/igzip/igzip_body.asm +@@ -134,6 +134,11 @@ stack_size equ 4*8 + 8*8 + 4*16 + 8 + %if ARCH == 04 + %define USE_HSWNI + %endif ++ ++[bits 64] ++default rel ++section .text ++ + ; void isal_deflate_body ( isal_zstream *stream ) + ; arg 1: rcx: addr of stream + global isal_deflate_body_ %+ ARCH +diff --git a/src/isa-l/igzip/igzip_decode_block_stateless.asm b/src/isa-l/igzip/igzip_decode_block_stateless.asm +index f5e35cd..733194b 100644 +--- a/src/isa-l/igzip/igzip_decode_block_stateless.asm ++++ b/src/isa-l/igzip/igzip_decode_block_stateless.asm +@@ -459,6 +459,10 @@ stack_size equ 4 * 8 + 8 * 8 + decode_next_dist %%state, %%lookup_size, %%state_offset, %%read_in, %%read_in_length, %%next_sym, %%next_extra_bits, %%next_bits + %endm + ++[bits 64] ++default rel ++section .text ++ + global decode_huffman_code_block_stateless_ %+ ARCH + decode_huffman_code_block_stateless_ %+ ARCH %+ : + +diff --git a/src/isa-l/igzip/igzip_deflate_hash.asm b/src/isa-l/igzip/igzip_deflate_hash.asm +index b61c4be..bcb0d5d 100644 +--- a/src/isa-l/igzip/igzip_deflate_hash.asm ++++ b/src/isa-l/igzip/igzip_deflate_hash.asm +@@ -98,6 +98,10 @@ + %endif + %endm + ++[bits 64] ++default rel ++section .text ++ + global isal_deflate_hash_crc_01 + isal_deflate_hash_crc_01: + FUNC_SAVE +diff --git a/src/isa-l/igzip/igzip_finish.asm b/src/isa-l/igzip/igzip_finish.asm +index 36823e1..fbf8839 100644 +--- a/src/isa-l/igzip/igzip_finish.asm ++++ b/src/isa-l/igzip/igzip_finish.asm +@@ -85,6 +85,11 @@ + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + f_end_i_mem_offset equ 0 ; local variable (8 bytes) + stack_size equ 8 ++ ++[bits 64] ++default rel ++section .text ++ + ; void isal_deflate_finish ( isal_zstream *stream ) + ; arg 1: rcx: addr of stream + global isal_deflate_finish_01 +diff --git a/src/isa-l/igzip/igzip_gen_icf_map_lh1_04.asm b/src/isa-l/igzip/igzip_gen_icf_map_lh1_04.asm +index 9b5e85a..077f56c 100644 +--- a/src/isa-l/igzip/igzip_gen_icf_map_lh1_04.asm ++++ b/src/isa-l/igzip/igzip_gen_icf_map_lh1_04.asm +@@ -169,6 +169,10 @@ + %define VECT_SIZE 8 + %define HASH_BYTES 2 + ++[bits 64] ++default rel ++section .text ++ + global gen_icf_map_lh1_04 + func(gen_icf_map_lh1_04) + FUNC_SAVE +diff --git a/src/isa-l/igzip/igzip_gen_icf_map_lh1_06.asm b/src/isa-l/igzip/igzip_gen_icf_map_lh1_06.asm +index 69af940..d134357 100644 +--- a/src/isa-l/igzip/igzip_gen_icf_map_lh1_06.asm ++++ b/src/isa-l/igzip/igzip_gen_icf_map_lh1_06.asm +@@ -160,6 +160,10 @@ + %define VECT_SIZE 16 + %define HASH_BYTES 2 + ++[bits 64] ++default rel ++section .text ++ + global gen_icf_map_lh1_06 + func(gen_icf_map_lh1_06) + FUNC_SAVE +diff --git a/src/isa-l/igzip/igzip_icf_body_h1_gr_bt.asm b/src/isa-l/igzip/igzip_icf_body_h1_gr_bt.asm +index c059178..51871c5 100644 +--- a/src/isa-l/igzip/igzip_icf_body_h1_gr_bt.asm ++++ b/src/isa-l/igzip/igzip_icf_body_h1_gr_bt.asm +@@ -155,6 +155,10 @@ stack_size equ 11*8 + 8*8 + 4*16 + %define USE_HSWNI + %endif + ++[bits 64] ++default rel ++section .text ++ + ; void isal_deflate_icf_body ( isal_zstream *stream ) + ; we make 6 different versions of this function + ; arg 1: rcx: addr of stream +diff --git a/src/isa-l/igzip/igzip_icf_finish.asm b/src/isa-l/igzip/igzip_icf_finish.asm +index ccff445..b9f88a9 100644 +--- a/src/isa-l/igzip/igzip_icf_finish.asm ++++ b/src/isa-l/igzip/igzip_icf_finish.asm +@@ -94,6 +94,10 @@ stack_size equ 5*8 + + %xdefine METHOD hash_hist + ++[bits 64] ++default rel ++section .text ++ + ; void isal_deflate_icf_finish ( isal_zstream *stream ) + ; arg 1: rcx: addr of stream + global isal_deflate_icf_finish_ %+ METHOD %+ _01 +diff --git a/src/isa-l/igzip/igzip_set_long_icf_fg_04.asm b/src/isa-l/igzip/igzip_set_long_icf_fg_04.asm +index f5c2b98..070e614 100644 +--- a/src/isa-l/igzip/igzip_set_long_icf_fg_04.asm ++++ b/src/isa-l/igzip/igzip_set_long_icf_fg_04.asm +@@ -135,6 +135,10 @@ default rel + %endif + %define VECT_SIZE 8 + ++[bits 64] ++default rel ++section .text ++ + global set_long_icf_fg_04 + func(set_long_icf_fg_04) + FUNC_SAVE +diff --git a/src/isa-l/igzip/igzip_set_long_icf_fg_06.asm b/src/isa-l/igzip/igzip_set_long_icf_fg_06.asm +index 39708ed..b36871c 100644 +--- a/src/isa-l/igzip/igzip_set_long_icf_fg_06.asm ++++ b/src/isa-l/igzip/igzip_set_long_icf_fg_06.asm +@@ -142,6 +142,10 @@ + %endif + %define VECT_SIZE 16 + ++[bits 64] ++default rel ++section .text ++ + global set_long_icf_fg_06 + func(set_long_icf_fg_06) + FUNC_SAVE +diff --git a/src/isa-l/igzip/igzip_update_histogram.asm b/src/isa-l/igzip/igzip_update_histogram.asm +index 34ecaf1..e1939ad 100644 +--- a/src/isa-l/igzip/igzip_update_histogram.asm ++++ b/src/isa-l/igzip/igzip_update_histogram.asm +@@ -249,6 +249,10 @@ _hash_offset equ (_dist_offset + 8 * DIST_LEN) + cmovle %%dist_coded, %%dist + %endm + ++[bits 64] ++default rel ++section .text ++ + ; void isal_update_histogram + global isal_update_histogram_ %+ ARCH + isal_update_histogram_ %+ ARCH %+ : +diff --git a/src/isa-l/igzip/proc_heap.asm b/src/isa-l/igzip/proc_heap.asm +index 40b18ab..5ed9c8e 100644 +--- a/src/isa-l/igzip/proc_heap.asm ++++ b/src/isa-l/igzip/proc_heap.asm +@@ -54,6 +54,10 @@ + %define i r11 + %define tmp2 r12 + ++[bits 64] ++default rel ++section .text ++ + global build_huff_tree + build_huff_tree: + %ifidn __OUTPUT_FORMAT__, win64 +diff --git a/src/isa-l/igzip/rfc1951_lookup.asm b/src/isa-l/igzip/rfc1951_lookup.asm +index ebf81aa..0701402 100644 +--- a/src/isa-l/igzip/rfc1951_lookup.asm ++++ b/src/isa-l/igzip/rfc1951_lookup.asm +@@ -45,7 +45,7 @@ section .data + ;; uint16_t len_start[32]; + ;; }; + +-global rfc1951_lookup_table:ISAL_SYM_TYPE_DATA_INTERNAL ++mk_global rfc1951_lookup_table, data, internal + rfc1951_lookup_table: + len_to_code: + db 0x00, 0x00, 0x00 +diff --git a/src/isa-l/include/multibinary.asm b/src/isa-l/include/multibinary.asm +index bd3a529..16838cb 100644 +--- a/src/isa-l/include/multibinary.asm ++++ b/src/isa-l/include/multibinary.asm +@@ -69,7 +69,7 @@ + mbin_def_ptr %1_mbinit + + section .text +- global %1:ISAL_SYM_TYPE_FUNCTION ++ mk_global %1, function + %1_mbinit: + ;;; only called the first time to setup hardware match + call %1_dispatch_init +diff --git a/src/isa-l/include/reg_sizes.asm b/src/isa-l/include/reg_sizes.asm +index fec6a8a..37d61f8 100644 +--- a/src/isa-l/include/reg_sizes.asm ++++ b/src/isa-l/include/reg_sizes.asm +@@ -30,14 +30,6 @@ + %ifndef _REG_SIZES_ASM_ + %define _REG_SIZES_ASM_ + +-%ifdef __NASM_VER__ +-%ifidn __OUTPUT_FORMAT__, win64 +-%error nasm not supported in windows +-%else +-%define endproc_frame +-%endif +-%endif +- + %ifndef AS_FEATURE_LEVEL + %define AS_FEATURE_LEVEL 4 + %endif +@@ -220,19 +212,57 @@ section .text + %define WRT_OPT + %endif + ++%macro mk_global 1-3 ++ %ifdef __NASM_VER__ ++ %ifidn __OUTPUT_FORMAT__, macho64 ++ global %1 ++ %elifidn __OUTPUT_FORMAT__, win64 ++ global %1 ++ %else ++ global %1:%2 %3 ++ %endif ++ %else ++ global %1:%2 %3 ++ %endif ++%endmacro ++ ++ ++; Fixes for nasm lack of MS proc helpers ++%ifdef __NASM_VER__ ++ %ifidn __OUTPUT_FORMAT__, win64 ++ %macro alloc_stack 1 ++ sub rsp, %1 ++ %endmacro ++ ++ %macro proc_frame 1 ++ %1: ++ %endmacro ++ ++ %macro save_xmm128 2 ++ movdqa [rsp + %2], %1 ++ %endmacro ++ ++ %macro save_reg 2 ++ mov [rsp + %2], %1 ++ %endmacro ++ ++ %macro rex_push_reg 1 ++ push %1 ++ %endmacro ++ ++ %macro push_reg 1 ++ push %1 ++ %endmacro ++ ++ %define end_prolog ++ %endif ++ ++ %define endproc_frame ++%endif ++ + %ifidn __OUTPUT_FORMAT__, macho64 + %define elf64 macho64 + mac_equ equ 1 +- %ifdef __NASM_VER__ +- %define ISAL_SYM_TYPE_FUNCTION +- %define ISAL_SYM_TYPE_DATA_INTERNAL +- %else +- %define ISAL_SYM_TYPE_FUNCTION function +- %define ISAL_SYM_TYPE_DATA_INTERNAL data internal +- %endif +-%else +- %define ISAL_SYM_TYPE_FUNCTION function +- %define ISAL_SYM_TYPE_DATA_INTERNAL data internal + %endif + + %macro slversion 4 +diff --git a/src/isa-l/mem/mem_zero_detect_avx.asm b/src/isa-l/mem/mem_zero_detect_avx.asm +index 871b652..e85e08d 100644 +--- a/src/isa-l/mem/mem_zero_detect_avx.asm ++++ b/src/isa-l/mem/mem_zero_detect_avx.asm +@@ -73,7 +73,7 @@ default rel + section .text + + align 16 +-global mem_zero_detect_avx:ISAL_SYM_TYPE_FUNCTION ++mk_global mem_zero_detect_avx, function + func(mem_zero_detect_avx) + FUNC_SAVE + mov pos, 0 +diff --git a/src/isa-l/mem/mem_zero_detect_sse.asm b/src/isa-l/mem/mem_zero_detect_sse.asm +index 63dad4f..78350aa 100644 +--- a/src/isa-l/mem/mem_zero_detect_sse.asm ++++ b/src/isa-l/mem/mem_zero_detect_sse.asm +@@ -73,7 +73,7 @@ default rel + section .text + + align 16 +-global mem_zero_detect_sse:ISAL_SYM_TYPE_FUNCTION ++mk_global mem_zero_detect_sse, function + func(mem_zero_detect_sse) + FUNC_SAVE + mov pos, 0 +diff --git a/src/isa-l/raid/pq_check_sse.asm b/src/isa-l/raid/pq_check_sse.asm +index 57cab3a..ca32051 100644 +--- a/src/isa-l/raid/pq_check_sse.asm ++++ b/src/isa-l/raid/pq_check_sse.asm +@@ -122,7 +122,7 @@ default rel + section .text + + align 16 +-global pq_check_sse:ISAL_SYM_TYPE_FUNCTION ++mk_global pq_check_sse, function + func(pq_check_sse) + FUNC_SAVE + sub vec, 3 ;Keep as offset to last source +diff --git a/src/isa-l/raid/pq_check_sse_i32.asm b/src/isa-l/raid/pq_check_sse_i32.asm +index 1c3b95d..f05d43a 100644 +--- a/src/isa-l/raid/pq_check_sse_i32.asm ++++ b/src/isa-l/raid/pq_check_sse_i32.asm +@@ -141,7 +141,7 @@ + section .text + + align 16 +-global pq_check_sse:ISAL_SYM_TYPE_FUNCTION ++mk_global pq_check_sse, function + func(pq_check_sse) + FUNC_SAVE + sub vec, 3 ;Keep as offset to last source +diff --git a/src/isa-l/raid/pq_gen_avx.asm b/src/isa-l/raid/pq_gen_avx.asm +index 513530c..57d2b22 100644 +--- a/src/isa-l/raid/pq_gen_avx.asm ++++ b/src/isa-l/raid/pq_gen_avx.asm +@@ -125,7 +125,7 @@ default rel + section .text + + align 16 +-global pq_gen_avx:ISAL_SYM_TYPE_FUNCTION ++mk_global pq_gen_avx, function + func(pq_gen_avx) + FUNC_SAVE + sub vec, 3 ;Keep as offset to last source +diff --git a/src/isa-l/raid/pq_gen_avx2.asm b/src/isa-l/raid/pq_gen_avx2.asm +index 2222151..7def9ea 100644 +--- a/src/isa-l/raid/pq_gen_avx2.asm ++++ b/src/isa-l/raid/pq_gen_avx2.asm +@@ -126,7 +126,7 @@ default rel + section .text + + align 16 +-global pq_gen_avx2:ISAL_SYM_TYPE_FUNCTION ++mk_global pq_gen_avx2, function + func(pq_gen_avx2) + FUNC_SAVE + sub vec, 3 ;Keep as offset to last source +diff --git a/src/isa-l/raid/pq_gen_avx512.asm b/src/isa-l/raid/pq_gen_avx512.asm +index 639cef2..9ec6584 100644 +--- a/src/isa-l/raid/pq_gen_avx512.asm ++++ b/src/isa-l/raid/pq_gen_avx512.asm +@@ -123,7 +123,7 @@ default rel + section .text + + align 16 +-global pq_gen_avx512:ISAL_SYM_TYPE_FUNCTION ++mk_global pq_gen_avx512, function + func(pq_gen_avx512) + FUNC_SAVE + sub vec, 3 ;Keep as offset to last source +diff --git a/src/isa-l/raid/pq_gen_sse.asm b/src/isa-l/raid/pq_gen_sse.asm +index f95e75a..4c5a349 100644 +--- a/src/isa-l/raid/pq_gen_sse.asm ++++ b/src/isa-l/raid/pq_gen_sse.asm +@@ -122,7 +122,7 @@ default rel + section .text + + align 16 +-global pq_gen_sse:ISAL_SYM_TYPE_FUNCTION ++mk_global pq_gen_sse, function + func(pq_gen_sse) + FUNC_SAVE + sub vec, 3 ;Keep as offset to last source +diff --git a/src/isa-l/raid/pq_gen_sse_i32.asm b/src/isa-l/raid/pq_gen_sse_i32.asm +index 57064b7..7a918f4 100644 +--- a/src/isa-l/raid/pq_gen_sse_i32.asm ++++ b/src/isa-l/raid/pq_gen_sse_i32.asm +@@ -140,7 +140,7 @@ + section .text + + align 16 +-global pq_gen_sse:ISAL_SYM_TYPE_FUNCTION ++mk_global pq_gen_sse, function + func(pq_gen_sse) + FUNC_SAVE + sub vec, 3 ;Keep as offset to last source +diff --git a/src/isa-l/raid/raid_multibinary.asm b/src/isa-l/raid/raid_multibinary.asm +index 3180627..c84e5ef 100644 +--- a/src/isa-l/raid/raid_multibinary.asm ++++ b/src/isa-l/raid/raid_multibinary.asm +@@ -72,7 +72,7 @@ section .text + ;;;; + ; pq_check multibinary function + ;;;; +-global pq_check:ISAL_SYM_TYPE_FUNCTION ++mk_global pq_check, function + pq_check_mbinit: + call pq_check_dispatch_init + pq_check: +@@ -104,7 +104,7 @@ pq_check_dispatch_init: + ;;;; + ; xor_check multibinary function + ;;;; +-global xor_check:ISAL_SYM_TYPE_FUNCTION ++mk_global xor_check, function + xor_check_mbinit: + call xor_check_dispatch_init + xor_check: +diff --git a/src/isa-l/raid/xor_check_sse.asm b/src/isa-l/raid/xor_check_sse.asm +index 395ee20..9620412 100644 +--- a/src/isa-l/raid/xor_check_sse.asm ++++ b/src/isa-l/raid/xor_check_sse.asm +@@ -137,7 +137,7 @@ + section .text + + align 16 +-global xor_check_sse:ISAL_SYM_TYPE_FUNCTION ++mk_global xor_check_sse, function + func(xor_check_sse) + FUNC_SAVE + %ifidn PS,8 ;64-bit code +diff --git a/src/isa-l/raid/xor_gen_avx.asm b/src/isa-l/raid/xor_gen_avx.asm +index c1e5be0..cddd539 100644 +--- a/src/isa-l/raid/xor_gen_avx.asm ++++ b/src/isa-l/raid/xor_gen_avx.asm +@@ -100,7 +100,7 @@ default rel + section .text + + align 16 +-global xor_gen_avx:ISAL_SYM_TYPE_FUNCTION ++mk_global xor_gen_avx, function + func(xor_gen_avx) + + FUNC_SAVE +diff --git a/src/isa-l/raid/xor_gen_avx512.asm b/src/isa-l/raid/xor_gen_avx512.asm +index 4beae09..552c590 100644 +--- a/src/isa-l/raid/xor_gen_avx512.asm ++++ b/src/isa-l/raid/xor_gen_avx512.asm +@@ -103,7 +103,7 @@ default rel + section .text + + align 16 +-global xor_gen_avx512:ISAL_SYM_TYPE_FUNCTION ++mk_global xor_gen_avx512, function + func(xor_gen_avx512) + FUNC_SAVE + sub vec, 2 ;Keep as offset to last source +diff --git a/src/isa-l/raid/xor_gen_sse.asm b/src/isa-l/raid/xor_gen_sse.asm +index a1c16c7..7509548 100644 +--- a/src/isa-l/raid/xor_gen_sse.asm ++++ b/src/isa-l/raid/xor_gen_sse.asm +@@ -137,7 +137,7 @@ + section .text + + align 16 +-global xor_gen_sse:ISAL_SYM_TYPE_FUNCTION ++mk_global xor_gen_sse, function + func(xor_gen_sse) + FUNC_SAVE + %ifidn PS,8 ;64-bit code +-- +2.20.1.windows.1 + + +From af13ed6136c530f44a1047b7e95a06cbb545fb3d Mon Sep 17 00:00:00 2001 +From: Greg Tucker +Date: Fri, 20 Mar 2020 12:26:30 -0700 +Subject: [PATCH 06/42] ec: Fix second windows reg push for avx512 + +Change improper stack push in windows prolog. Error was not reachable without +windows nasm support and so went undetected. + +Change-Id: I8b715195d1c8efd173843c043d42fc610ddebd17 +Signed-off-by: Greg Tucker +--- + erasure_code/gf_vect_dot_prod_avx512.asm | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/src/isa-l/erasure_code/gf_vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_vect_dot_prod_avx512.asm +index 203e95d..8a02fd8 100644 +--- a/src/isa-l/erasure_code/gf_vect_dot_prod_avx512.asm ++++ b/src/isa-l/erasure_code/gf_vect_dot_prod_avx512.asm +@@ -73,15 +73,15 @@ + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size +- save_reg r12, 9*16 + 0*8 +- save_reg r15, 9*16 + 3*8 ++ save_reg r12, 0*8 ++ save_reg r15, 1*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 +- mov r12, [rsp + 9*16 + 0*8] +- mov r15, [rsp + 9*16 + 3*8] ++ mov r12, [rsp + 0*8] ++ mov r15, [rsp + 1*8] + add rsp, stack_size + %endmacro + %endif +-- +2.20.1.windows.1 + + +From 85f947e1202558926357876166a889c8bf53ccf5 Mon Sep 17 00:00:00 2001 +From: Jerry Yu +Date: Tue, 17 Mar 2020 13:18:56 +0800 +Subject: [PATCH 07/42] ci: remove unused drone configuration + +Change-Id: I20bded8111deb122757dbf259d17cd80010c2bb6 +Signed-off-by: Jerry Yu +--- + .drone.yml | 89 ------------------------------------------------------ + 1 file changed, 89 deletions(-) + delete mode 100644 .drone.yml + +diff --git a/src/isa-l/.drone.yml b/src/isa-l/.drone.yml +deleted file mode 100644 +index 4a30e90..0000000 +--- a/src/isa-l/.drone.yml ++++ /dev/null +@@ -1,89 +0,0 @@ +-kind: pipeline +-name: arm64-linux-gcc-5.4 +- +-platform: +- os: linux +- arch: arm64 +- +-steps: +-- name: arm64-linux-gcc-5.4 +- image: ubuntu:xenial +- environment: +- C_COMPILER: gcc +- commands: +- - if [ -n "$C_COMPILER" ]; then export CC="$C_COMPILER"; fi +- - if [ -n "$AS_ASSEMBL" ]; then export AS="$AS_ASSEMBL"; fi +- - apt-get -qq update +- - apt-get install -qq -y build-essential git indent libtool libz-dev yasm autoconf +- - if [ -n "$CC" ]; then $CC --version; fi +- - if [ -n "$AS" ]; then $AS --version; fi +- - ./tools/test_autorun.sh "$TEST_TYPE" +- +---- +-kind: pipeline +-name: arm64-linux-gcc-4.7 +- +-platform: +- os: linux +- arch: arm64 +- +-steps: +-- name: arm64-linux-gcc-4.7 +- image: ubuntu:xenial +- environment: +- C_COMPILER: gcc-4.7 +- commands: +- - if [ -n "$C_COMPILER" ]; then export CC="$C_COMPILER"; fi +- - if [ -n "$AS_ASSEMBL" ]; then export AS="$AS_ASSEMBL"; fi +- - apt-get -qq update +- - apt-get install -qq -y build-essential git indent libtool libz-dev software-properties-common yasm autoconf +- - add-apt-repository -y ppa:ubuntu-toolchain-r/test +- - apt-get -qq update +- - apt-get install -qq -y g++-4.7 +- - if [ -n "$CC" ]; then $CC --version; fi +- - if [ -n "$AS" ]; then $AS --version; fi +- - ./tools/test_autorun.sh "$TEST_TYPE" +- +---- +-kind: pipeline +-name: arm64-linux-gcc-6 +- +-platform: +- os: linux +- arch: arm64 +- +-steps: +-- name: arm64-linux-gcc-6 +- image: debian:9 +- environment: +- C_COMPILER: gcc +- commands: +- - if [ -n "$C_COMPILER" ]; then export CC="$C_COMPILER"; fi +- - if [ -n "$AS_ASSEMBL" ]; then export AS="$AS_ASSEMBL"; fi +- - apt-get -q update +- - apt-get install -y build-essential git indent libtool libz-dev software-properties-common yasm autoconf +- - if [ -n "$CC" ]; then $CC --version; fi +- - if [ -n "$AS" ]; then $AS --version; fi +- - ./tools/test_autorun.sh "$TEST_TYPE" +- +---- +-kind: pipeline +-name: arm64-linux-extended-tests +- +-platform: +- os: linux +- arch: arm64 +- +-steps: +-- name: arm64-linux-extended-tests +- image: ubuntu:xenial +- environment: +- TEST_TYPE: ext +- commands: +- - if [ -n "$C_COMPILER" ]; then export CC="$C_COMPILER"; fi +- - if [ -n "$AS_ASSEMBL" ]; then export AS="$AS_ASSEMBL"; fi +- - apt-get -qq update +- - apt-get install -qq -y build-essential git indent libtool libz-dev software-properties-common yasm autoconf +- - if [ -n "$CC" ]; then $CC --version; fi +- - if [ -n "$AS" ]; then $AS --version; fi +- - ./tools/test_autorun.sh "$TEST_TYPE" +-- +2.20.1.windows.1 + + +From f2cf2609cd07c383524d9aecd0a7f668f6b7eafc Mon Sep 17 00:00:00 2001 +From: Jerry Yu +Date: Mon, 2 Mar 2020 13:09:08 +0800 +Subject: [PATCH 08/42] multi-binary:Add microarchitecture id reader + +This patch provides microarchitecture information +and make microarchitecture optimization possible. It +will trap into kernel due to mrs instruction. So it +should be called only in dispatcher, that will be +called only once in program lifecycle. And HWCAP must +be match,That will make sure there are no illegal +instruction errors. + +Change-Id: I393ec742010bf3f10ce335482c0350aa4202c788 +Signed-off-by: Jerry Yu +--- + include/aarch64_multibinary.h | 92 ++++++++++++++++++++++++++++++++++- + 1 file changed, 91 insertions(+), 1 deletion(-) + +diff --git a/src/isa-l/include/aarch64_multibinary.h b/src/isa-l/include/aarch64_multibinary.h +index 6db05d7..e31451b 100644 +--- a/src/isa-l/include/aarch64_multibinary.h ++++ b/src/isa-l/include/aarch64_multibinary.h +@@ -1,5 +1,5 @@ + /********************************************************************** +- Copyright(c) 2019 Arm Corporation All rights reserved. ++ Copyright(c) 2020 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions +@@ -217,5 +217,95 @@ + _func_entry; \ + }) + ++/** ++ * Micro-Architector definitions ++ * Reference: https://developer.arm.com/docs/ddi0595/f/aarch64-system-registers/midr_el1 ++ */ ++ ++#define CPU_IMPLEMENTER_RESERVE 0x00 ++#define CPU_IMPLEMENTER_ARM 0x41 ++ ++ ++#define CPU_PART_CORTEX_A57 0xD07 ++#define CPU_PART_CORTEX_A72 0xD08 ++#define CPU_PART_NEOVERSE_N1 0xD0C ++ ++#define MICRO_ARCH_ID(imp,part) \ ++ (((CPU_IMPLEMENTER_##imp&0xff)<<24)|((CPU_PART_##part&0xfff)<<4)) ++ ++#ifndef HWCAP_CPUID ++#define HWCAP_CPUID (1<<11) ++#endif ++ ++/** ++ * @brief get_micro_arch_id ++ * ++ * read micro-architector register instruction if possible.This function ++ * provides microarchitecture information and make microarchitecture optimization ++ * possible. ++ * ++ * Read system registers(MRS) is forbidden in userspace. If executed, it ++ * will raise illegal instruction error. Kernel provides a solution for ++ * this issue. The solution depends on HWCAP_CPUID flags. Reference(1) ++ * describes how to use it. It provides a "illegal insstruction" handler ++ * in kernel space, the handler will execute MRS and return the correct ++ * value to userspace. ++ * ++ * To avoid too many kernel trap, this function MUST be only called in ++ * dispatcher. And HWCAP must be match,That will make sure there are no ++ * illegal instruction errors. HWCAP_CPUID should be available to get the ++ * best performance. ++ * ++ * NOTICE: ++ * - HWCAP_CPUID should be available. Otherwise it returns reserve value ++ * - It MUST be called inside dispather. ++ * - It MUST meet the HWCAP requirements ++ * ++ * Example: ++ * DEFINE_INTERFACE_DISPATCHER(crc32_iscsi) ++ * { ++ * unsigned long auxval = getauxval(AT_HWCAP); ++ * // MUST do the judgement is MUST. ++ * if ((HWCAP_CRC32 | HWCAP_PMULL) == (auxval & (HWCAP_CRC32 | HWCAP_PMULL))) { ++ * switch (get_micro_arch_id()) { ++ * case MICRO_ARCH_ID(ARM, CORTEX_A57): ++ * return PROVIDER_INFO(crc32_pmull_crc_for_a57); ++ * case MICRO_ARCH_ID(ARM, CORTEX_A72): ++ * return PROVIDER_INFO(crc32_pmull_crc_for_a72); ++ * case MICRO_ARCH_ID(ARM, NEOVERSE_N1): ++ * return PROVIDER_INFO(crc32_pmull_crc_for_n1); ++ * case default: ++ * return PROVIDER_INFO(crc32_pmull_crc_for_others); ++ * } ++ * } ++ * return PROVIDER_BASIC(crc32_iscsi); ++ * } ++ * KNOWN ISSUE: ++ * On a heterogeneous system (big.LITTLE), it will work but the performance ++ * might not be the best one as expected. ++ * ++ * If this function is called on the big core, it will return the function ++ * optimized for the big core. ++ * ++ * If execution is then scheduled to the little core. It will still work (1), ++ * but the function won't be optimized for the little core, thus the performance ++ * won't be as expected. ++ * ++ * References: ++ * - [CPU Feature detection](https://git.kernel.org/pub/src/isa-l/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/arm64/cpu-feature-registers.rst?h=v5.5) ++ * ++ */ ++static inline uint32_t get_micro_arch_id(void) ++{ ++ uint32_t id=CPU_IMPLEMENTER_RESERVE; ++ if ((getauxval(AT_HWCAP) & HWCAP_CPUID)) { ++ /** Here will trap into kernel space */ ++ asm("mrs %0, MIDR_EL1 " : "=r" (id)); ++ } ++ return id&0xff00fff0; ++} ++ ++ ++ + #endif /* __ASSEMBLY__ */ + #endif +-- +2.20.1.windows.1 + + +From a2fc2c000d2dd6872b330554506eafb20bb99561 Mon Sep 17 00:00:00 2001 +From: Jerry Yu +Date: Mon, 2 Mar 2020 13:34:44 +0800 +Subject: [PATCH 09/42] crc32:Add optimization implementation for Neoverse N1 + +This patch is base on reference(1) algorithm with some changes. +- Redefine the block number to two. + - That's due to only two pipe-line can be used in CRC32 calculate. +- Redefine the block size: + - The block size of CRC is 1536B and PMULL is 512B +- Interleave CRC and PMULL instructions. +The optimization parameters are calculated base on reference(2) + +References: +- https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf +- https://developer.arm.com/docs/swog309707/a + +Change-Id: I1c9e593d59b521f56e4b3c807b396c083c181636 +Signed-off-by: Jerry Yu +--- + crc/aarch64/Makefile.am | 6 +- + crc/aarch64/crc32_common_mix_neoverse_n1.S | 434 +++++++++++++++++++++ + crc/aarch64/crc32_mix_neoverse_n1.S | 66 ++++ + crc/aarch64/crc32c_mix_neoverse_n1.S | 64 +++ + crc/aarch64/crc_aarch64_dispatcher.c | 14 +- + 5 files changed, 581 insertions(+), 3 deletions(-) + create mode 100644 crc/aarch64/crc32_common_mix_neoverse_n1.S + create mode 100644 crc/aarch64/crc32_mix_neoverse_n1.S + create mode 100644 crc/aarch64/crc32c_mix_neoverse_n1.S + +diff --git a/src/isa-l/crc/aarch64/Makefile.am b/src/isa-l/crc/aarch64/Makefile.am +index 57061f0..9fbb019 100644 +--- a/src/isa-l/crc/aarch64/Makefile.am ++++ b/src/isa-l/crc/aarch64/Makefile.am +@@ -1,5 +1,5 @@ + ######################################################################## +-# Copyright(c) 2019 Arm Corporation All rights reserved. ++# Copyright(c) 2020 Arm Corporation All rights reserved. + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions +@@ -44,4 +44,6 @@ lsrc_aarch64 += \ + crc/aarch64/crc64_iso_refl_pmull.S \ + crc/aarch64/crc64_iso_norm_pmull.S \ + crc/aarch64/crc64_jones_refl_pmull.S \ +- crc/aarch64/crc64_jones_norm_pmull.S ++ crc/aarch64/crc64_jones_norm_pmull.S \ ++ crc/aarch64/crc32_mix_neoverse_n1.S \ ++ crc/aarch64/crc32c_mix_neoverse_n1.S +diff --git a/src/isa-l/crc/aarch64/crc32_common_mix_neoverse_n1.S b/src/isa-l/crc/aarch64/crc32_common_mix_neoverse_n1.S +new file mode 100644 +index 0000000..91b331d +--- /dev/null ++++ b/src/isa-l/crc/aarch64/crc32_common_mix_neoverse_n1.S +@@ -0,0 +1,434 @@ ++/********************************************************************** ++ Copyright(c) 2020 Arm Corporation All rights reserved. ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in ++ the documentation and/or other materials provided with the ++ distribution. ++ * Neither the name of Arm Corporation nor the names of its ++ contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++**********************************************************************/ ++ ++ ++.macro declare_var_vector_reg name:req,reg:req ++ \name\()_q .req q\reg ++ \name\()_v .req v\reg ++ \name\()_s .req s\reg ++ \name\()_d .req d\reg ++.endm ++ declare_var_vector_reg k1k2,20 ++ declare_var_vector_reg k3k4,21 ++ declare_var_vector_reg poly,22 ++ declare_var_vector_reg k5k0,23 ++ declare_var_vector_reg mask,24 ++ declare_var_vector_reg fold_poly,25 ++ ++ declare_var_vector_reg tmp0,0 ++ declare_var_vector_reg tmp1,1 ++ declare_var_vector_reg tmp2,2 ++ declare_var_vector_reg tmp3,3 ++ declare_var_vector_reg tmp4,4 ++ declare_var_vector_reg tmp5,5 ++ declare_var_vector_reg tmp6,6 ++ declare_var_vector_reg tmp7,7 ++ declare_var_vector_reg pmull_data0,16 ++ declare_var_vector_reg pmull_data1,17 ++ declare_var_vector_reg pmull_data2,18 ++ declare_var_vector_reg pmull_data3,19 ++ ++ vzr .req v26 ++ ++ BUF .req x0 ++ LEN .req x1 ++ CRC .req x2 ++ wCRC .req w2 ++ const_addr .req x3 ++ crc_blk_ptr .req x4 ++ pmull_blk_ptr .req x5 ++ crc_data0 .req x6 ++ crc_data1 .req x7 ++ crc_data2 .req x19 ++ crc_data3 .req x20 ++ wPmull .req w21 ++ ++ data0 .req x4 ++ data1 .req x5 ++ data2 .req x6 ++ data3 .req x7 ++ wdata .req w4 ++ ++.macro pmull_fold ++ ++ pmull2 tmp4_v.1q, tmp0_v.2d, k1k2_v.2d ++ pmull2 tmp5_v.1q, tmp1_v.2d, k1k2_v.2d ++ pmull2 tmp6_v.1q, tmp2_v.2d, k1k2_v.2d ++ pmull2 tmp7_v.1q, tmp3_v.2d, k1k2_v.2d ++ ++ pmull tmp0_v.1q, tmp0_v.1d, k1k2_v.1d ++ pmull tmp1_v.1q, tmp1_v.1d, k1k2_v.1d ++ pmull tmp2_v.1q, tmp2_v.1d, k1k2_v.1d ++ pmull tmp3_v.1q, tmp3_v.1d, k1k2_v.1d ++ ld1 {pmull_data0_v.16b-pmull_data3_v.16b},[pmull_blk_ptr],#64 ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ ++ eor tmp0_v.16b, tmp0_v.16b, tmp4_v.16b ++ eor tmp1_v.16b, tmp1_v.16b, tmp5_v.16b ++ eor tmp2_v.16b, tmp2_v.16b, tmp6_v.16b ++ eor tmp3_v.16b, tmp3_v.16b, tmp7_v.16b ++ ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ eor tmp0_v.16b, tmp0_v.16b, v16.16b ++ eor tmp1_v.16b, tmp1_v.16b, v17.16b ++ eor tmp2_v.16b, tmp2_v.16b, v18.16b ++ eor tmp3_v.16b, tmp3_v.16b, v19.16b ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++.endm ++ ++ ++ ++.macro crc32_common_mix poly_type ++ .set MIX_BLK_SIZE,2048 ++ add pmull_blk_ptr,BUF,MIX_BLK_SIZE-512 ++.ifc \poly_type,crc32 ++ mvn wCRC,wCRC ++.endif ++ cmp LEN,MIX_BLK_SIZE-1 ++ mov pmull_blk_ptr,BUF ++ bls start_final ++ adr const_addr, .Lconstants ++ ld1 {k1k2_v.16b,k3k4_v.16b,poly_v.16b},[const_addr],#48 ++ mov crc_blk_ptr,BUF ++ movi vzr.16b, #0 ++ ld1 {k5k0_v.8b,mask_v.8b,fold_poly_v.8b},[const_addr] ++ ++loop_2048: ++ mov crc_blk_ptr,BUF ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ add pmull_blk_ptr,pmull_blk_ptr,MIX_BLK_SIZE-512 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ ld1 {tmp0_v.16b-tmp3_v.16b}, [pmull_blk_ptr], #0x40 ++ sub LEN,LEN,MIX_BLK_SIZE ++ cmp LEN,MIX_BLK_SIZE ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ ++ pmull_fold ++ pmull_fold ++ pmull_fold ++ pmull_fold ++ pmull_fold ++ pmull_fold ++ pmull_fold ++ ++ /* Folding cache line into 128bit */ ++ pmull2 tmp4_v.1q, tmp0_v.2d, k3k4_v.2d ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ pmull tmp0_v.1q, tmp0_v.1d, k3k4_v.1d ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ eor tmp0_v.16b, tmp0_v.16b, tmp4_v.16b ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ eor tmp0_v.16b, tmp0_v.16b, tmp1_v.16b ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ pmull2 tmp4_v.1q, tmp0_v.2d, k3k4_v.2d ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ pmull tmp0_v.1q, tmp0_v.1d, k3k4_v.1d ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ eor tmp0_v.16b, tmp0_v.16b, tmp4_v.16b ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ eor tmp0_v.16b, tmp0_v.16b, tmp2_v.16b ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ pmull2 tmp4_v.1q, tmp0_v.2d, k3k4_v.2d ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ pmull tmp0_v.1q, tmp0_v.1d, k3k4_v.1d ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ eor tmp0_v.16b, tmp0_v.16b, tmp4_v.16b ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ eor tmp0_v.16b, tmp0_v.16b, tmp3_v.16b ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ ++ ++ /** ++ * perform the last 64 bit fold, also ++ * adds 32 zeroes to the input stream ++ */ ++ ext tmp1_v.16b, tmp0_v.16b, tmp0_v.16b, #8 ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ pmull2 tmp1_v.1q, tmp1_v.2d, k3k4_v.2d ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ ext tmp0_v.16b, tmp0_v.16b, vzr.16b, #8 ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ eor tmp0_v.16b, tmp0_v.16b, tmp1_v.16b ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ ++ /* final 32-bit fold */ ++ ext tmp1_v.16b, tmp0_v.16b, vzr.16b, #4 ++ and tmp0_v.16b, tmp0_v.16b, mask_v.16b ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ pmull tmp0_v.1q, tmp0_v.1d, k5k0_v.1d ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ eor tmp0_v.16b, tmp0_v.16b, tmp1_v.16b ++ ++ /** ++ * Finish up with the bit-reversed barrett ++ * reduction 64 ==> 32 bits ++ */ ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ and tmp1_v.16b, tmp0_v.16b, mask_v.16b ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ ext tmp1_v.16b, vzr.16b, tmp1_v.16b, #8 ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ pmull2 tmp1_v.1q, tmp1_v.2d, poly_v.2d ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ and tmp1_v.16b, tmp1_v.16b, mask_v.16b ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ pmull tmp1_v.1q, tmp1_v.1d, poly_v.1d ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ eor tmp0_v.16b, tmp0_v.16b, tmp1_v.16b ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ mov wPmull, tmp0_v.s[1] ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ ldp crc_data0,crc_data1,[crc_blk_ptr],16 ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 ++ ++ crc32_u64 wCRC,wCRC,crc_data0 ++ crc32_u64 wCRC,wCRC,crc_data1 ++ crc32_u64 wCRC,wCRC,crc_data2 ++ crc32_u64 wCRC,wCRC,crc_data3 ++ ++ fmov d0, CRC ++ mov w6, 0 ++ pmull v0.1q, v0.1d, fold_poly_v.1d ++ fmov CRC, d0 ++ add BUF,BUF,MIX_BLK_SIZE ++ crc32_u64 w6, w6, CRC ++ eor wCRC, w6, wPmull ++ bge loop_2048 ++start_final: ++ cmp LEN, 63 ++ bls .loop_16B ++.loop_64B: ++ ldp data0, data1, [BUF],#16 ++ sub LEN,LEN,#64 ++ ldp data2, data3, [BUF],#16 ++ cmp LEN,#64 ++ crc32_u64 wCRC, wCRC, data0 ++ crc32_u64 wCRC, wCRC, data1 ++ ldp data0, data1, [BUF],#16 ++ crc32_u64 wCRC, wCRC, data2 ++ crc32_u64 wCRC, wCRC, data3 ++ ldp data2, data3, [BUF],#16 ++ crc32_u64 wCRC, wCRC, data0 ++ crc32_u64 wCRC, wCRC, data1 ++ crc32_u64 wCRC, wCRC, data2 ++ crc32_u64 wCRC, wCRC, data3 ++ bge .loop_64B ++ ++.loop_16B: ++ cmp x1, 15 ++ bls .less_16B ++ ldp data0, data1, [BUF],#16 ++ sub LEN,LEN,#16 ++ cmp LEN,15 ++ crc32_u64 wCRC, wCRC, data0 ++ crc32_u64 wCRC, wCRC, data1 ++ bls .less_16B ++ ldp data0, data1, [BUF],#16 ++ sub LEN,LEN,#16 ++ cmp LEN,15 ++ crc32_u64 wCRC, wCRC, data0 ++ crc32_u64 wCRC, wCRC, data1 ++ bls .less_16B ++ ldp data0, data1, [BUF],#16 ++ sub LEN,LEN,#16 //MUST less than 16B ++ crc32_u64 wCRC, wCRC, data0 ++ crc32_u64 wCRC, wCRC, data1 ++.less_16B: ++ cmp LEN, 7 ++ bls .less_8B ++ ldr data0, [BUF], 8 ++ sub LEN, LEN, #8 ++ crc32_u64 wCRC, wCRC, data0 ++.less_8B: ++ cmp LEN, 3 ++ bls .less_4B ++ ldr wdata, [BUF], 4 ++ sub LEN, LEN, #4 ++ crc32_u32 wCRC, wCRC, wdata ++.less_4B: ++ cmp LEN, 1 ++ bls .less_2B ++ ldrh wdata, [BUF], 2 ++ sub LEN, LEN, #2 ++ crc32_u16 wCRC, wCRC, wdata ++.less_2B: ++ cbz LEN, .finish_exit ++ ldrb wdata, [BUF] ++ crc32_u8 wCRC, wCRC, wdata ++.finish_exit: ++.ifc \poly_type,crc32 ++ mvn w0, wCRC ++.else ++ mov w0, wCRC ++.endif ++ ret ++.endm ++ +diff --git a/src/isa-l/crc/aarch64/crc32_mix_neoverse_n1.S b/src/isa-l/crc/aarch64/crc32_mix_neoverse_n1.S +new file mode 100644 +index 0000000..fa29770 +--- /dev/null ++++ b/src/isa-l/crc/aarch64/crc32_mix_neoverse_n1.S +@@ -0,0 +1,66 @@ ++/********************************************************************** ++ Copyright(c) 2020 Arm Corporation All rights reserved. ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in ++ the documentation and/or other materials provided with the ++ distribution. ++ * Neither the name of Arm Corporation nor the names of its ++ contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++**********************************************************************/ ++ ++ .text ++ .align 6 ++ .arch armv8-a+crypto+crc ++ ++#include "crc32_common_mix_neoverse_n1.S" ++.Lconstants: ++ .octa 0x00000001c6e415960000000154442bd4 ++ .octa 0x00000000ccaa009e00000001751997d0 ++ .octa 0x00000001F701164100000001DB710641 ++ .quad 0x0000000163cd6124 ++ .quad 0x00000000FFFFFFFF ++ .quad 0x000000000c30f51d ++.macro crc32_u64 dst,src,data ++ crc32x \dst,\src,\data ++.endm ++.macro crc32_u32 dst,src,data ++ crc32w \dst,\src,\data ++.endm ++.macro crc32_u16 dst,src,data ++ crc32h \dst,\src,\data ++.endm ++.macro crc32_u8 dst,src,data ++ crc32b \dst,\src,\data ++.endm ++ ++ ++/** ++ * uint32_t crc32_mix_neoverse_n1(uint32_t * BUF, ++ * size_t LEN, uint CRC) ++ */ ++ .align 6 ++ .global crc32_mix_neoverse_n1 ++ .type crc32_mix_neoverse_n1, %function ++crc32_mix_neoverse_n1: ++ crc32_common_mix crc32 ++ .size crc32_mix_neoverse_n1, .-crc32_mix_neoverse_n1 ++ +diff --git a/src/isa-l/crc/aarch64/crc32c_mix_neoverse_n1.S b/src/isa-l/crc/aarch64/crc32c_mix_neoverse_n1.S +new file mode 100644 +index 0000000..6982b39 +--- /dev/null ++++ b/src/isa-l/crc/aarch64/crc32c_mix_neoverse_n1.S +@@ -0,0 +1,64 @@ ++/********************************************************************** ++ Copyright(c) 2020 Arm Corporation All rights reserved. ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in ++ the documentation and/or other materials provided with the ++ distribution. ++ * Neither the name of Arm Corporation nor the names of its ++ contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++**********************************************************************/ ++ ++ .text ++ .align 6 ++ .arch armv8-a+crypto+crc ++ ++#include "crc32_common_mix_neoverse_n1.S" ++.Lconstants: ++ .octa 0x000000009e4addf800000000740eef02 ++ .octa 0x000000014cd00bd600000000f20c0dfe ++ .octa 0x00000000dea713f10000000105ec76f0 ++ .quad 0x00000000dd45aab8 ++ .quad 0x00000000FFFFFFFF ++ .quad 0x00000000dd7e3b0c ++ ++.macro crc32_u64 dst,src,data ++ crc32cx \dst,\src,\data ++.endm ++.macro crc32_u32 dst,src,data ++ crc32cw \dst,\src,\data ++.endm ++.macro crc32_u16 dst,src,data ++ crc32ch \dst,\src,\data ++.endm ++.macro crc32_u8 dst,src,data ++ crc32cb \dst,\src,\data ++.endm ++/** ++ * uint32_t crc32c_mix_neoverse_n1(uint32_t * BUF, ++ * size_t LEN, uint CRC) ++ */ ++ .align 6 ++ .global crc32c_mix_neoverse_n1 ++ .type crc32c_mix_neoverse_n1, %function ++crc32c_mix_neoverse_n1: ++ crc32_common_mix crc32c ++ .size crc32c_mix_neoverse_n1, .-crc32c_mix_neoverse_n1 +diff --git a/src/isa-l/crc/aarch64/crc_aarch64_dispatcher.c b/src/isa-l/crc/aarch64/crc_aarch64_dispatcher.c +index bac9eeb..2df0f28 100644 +--- a/src/isa-l/crc/aarch64/crc_aarch64_dispatcher.c ++++ b/src/isa-l/crc/aarch64/crc_aarch64_dispatcher.c +@@ -1,5 +1,5 @@ + /********************************************************************** +- Copyright(c) 2019 Arm Corporation All rights reserved. ++ Copyright(c) 2019-2020 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions +@@ -62,6 +62,12 @@ DEFINE_INTERFACE_DISPATCHER(crc32_ieee) + DEFINE_INTERFACE_DISPATCHER(crc32_iscsi) + { + unsigned long auxval = getauxval(AT_HWCAP); ++ if ((HWCAP_CRC32 | HWCAP_PMULL) == (auxval & (HWCAP_CRC32 | HWCAP_PMULL))) { ++ switch (get_micro_arch_id()) { ++ case MICRO_ARCH_ID(ARM, NEOVERSE_N1): ++ return PROVIDER_INFO(crc32c_mix_neoverse_n1); ++ } ++ } + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(crc32_iscsi_refl_hw_fold); + if (auxval & HWCAP_PMULL) { +@@ -74,6 +80,12 @@ DEFINE_INTERFACE_DISPATCHER(crc32_iscsi) + DEFINE_INTERFACE_DISPATCHER(crc32_gzip_refl) + { + unsigned long auxval = getauxval(AT_HWCAP); ++ if ((HWCAP_CRC32 | HWCAP_PMULL) == (auxval & (HWCAP_CRC32 | HWCAP_PMULL))) { ++ switch (get_micro_arch_id()) { ++ case MICRO_ARCH_ID(ARM, NEOVERSE_N1): ++ return PROVIDER_INFO(crc32_mix_neoverse_n1); ++ } ++ } + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(crc32_gzip_refl_hw_fold); + if (auxval & HWCAP_PMULL) +-- +2.20.1.windows.1 + + +From 5e586843ebcc072c638894b3c099ec617a852fe1 Mon Sep 17 00:00:00 2001 +From: Greg Tucker +Date: Mon, 30 Mar 2020 15:41:31 -0700 +Subject: [PATCH 10/42] build: Change ms nmake default to nasm and add pdb gen + +The nmake default is changed for a modern nasm. Older nasm and yasm versions +will still work with windows but the nmake options must be changed appropriately +for max AS_FEATURE_LEVEL to match. Also now generates debug symbol pdb files. + +Change-Id: I94a2dd7ecf541c6564ccbd4a184c33995d7b31ad +Signed-off-by: Poornima Kumar +Signed-off-by: Greg Tucker +--- + Makefile.nmake | 21 ++++++++++++++------- + tools/gen_nmake.mk | 21 ++++++++++++++------- + 2 files changed, 28 insertions(+), 14 deletions(-) + +diff --git a/src/isa-l/Makefile.nmake b/src/isa-l/Makefile.nmake +index 8ae4223..660722a 100644 +--- a/src/isa-l/Makefile.nmake ++++ b/src/isa-l/Makefile.nmake +@@ -161,12 +161,17 @@ objs = \ + bin\mem_zero_detect_sse.obj \ + bin\mem_multibinary.obj + +-INCLUDES = -I./ -Ierasure_code/ -Iraid/ -Icrc/ -Iigzip/ -Iprograms/ -Imem/ -Iinclude/ -Itests/fuzz/ -Iexamples/ec/ +-LINKFLAGS = /nologo +-CFLAGS = -O2 -D NDEBUG /nologo -D_USE_MATH_DEFINES -Qstd=c99 $(INCLUDES) $(D) +-AFLAGS = -f win64 $(INCLUDES) $(D) +-CC = icl +-AS = yasm ++INCLUDES = -I./ -Ierasure_code/ -Iraid/ -Icrc/ -Iigzip/ -Iprograms/ -Imem/ -Iinclude/ -Itests/fuzz/ -Iexamples/ec/ ++# Modern asm feature level, consider upgrading nasm/yasm before decreasing feature_level ++FEAT_FLAGS = -DHAVE_AS_KNOWS_AVX512 -DAS_FEATURE_LEVEL=10 ++CFLAGS_REL = -O2 -DNDEBUG /Z7 /MD /Gy ++CFLAGS_DBG = -Od -DDEBUG /Z7 /MDd ++LINKFLAGS = -nologo -incremental:no -debug ++CFLAGS = $(CFLAGS_REL) -nologo -D_USE_MATH_DEFINES $(FEAT_FLAGS) $(INCLUDES) $(D) ++AFLAGS = -f win64 $(FEAT_FLAGS) $(INCLUDES) $(D) ++CC = cl ++# or CC = icl -Qstd=c99 ++AS = nasm + + lib: bin static dll + static: bin isa-l_static.lib +@@ -180,7 +185,7 @@ $? + << + + isa-l.dll: $(objs) +- link -out:$@ -dll -def:isa-l.def @<< ++ link -out:$@ -dll -def:isa-l.def $(LINKFLAGS) @<< + $? + << + +@@ -302,8 +307,10 @@ clean: + -if exist bin\*.obj del bin\*.obj + -if exist isa-l_static.lib del isa-l_static.lib + -if exist *.exe del *.exe ++ -if exist *.pdb del *.pdb + -if exist isa-l.lib del isa-l.lib + -if exist isa-l.dll del isa-l.dll ++ -if exist isa-l.exp del isa-l.exp + + zlib.lib: + igzip_perf.exe: zlib.lib +diff --git a/src/isa-l/tools/gen_nmake.mk b/src/isa-l/tools/gen_nmake.mk +index b998ee5..dedea90 100644 +--- a/src/isa-l/tools/gen_nmake.mk ++++ b/src/isa-l/tools/gen_nmake.mk +@@ -11,12 +11,17 @@ Makefile.nmake: FORCE + @$(foreach o, $(subst /,\\,$(objs:.o=.obj)), printf " %s\n\t%s" \\ $(o) >> $@; ) + @echo '' >> $@ + @echo '' >> $@ +- @echo 'INCLUDES = $(INCLUDE)' >> $@ +- @echo 'LINKFLAGS = /nologo' >> $@ +- @echo 'CFLAGS = -O2 -D NDEBUG /nologo -D_USE_MATH_DEFINES -Qstd=c99 $$(INCLUDES) $$(D)' >> $@ +- @echo 'AFLAGS = -f win64 $$(INCLUDES) $$(D)' >> $@ +- @echo 'CC = icl' >> $@ +- @echo 'AS = yasm' >> $@ ++ @echo 'INCLUDES = $(INCLUDE)' >> $@ ++ @echo '# Modern asm feature level, consider upgrading nasm/yasm before decreasing feature_level' >> $@ ++ @echo 'FEAT_FLAGS = -DHAVE_AS_KNOWS_AVX512 -DAS_FEATURE_LEVEL=10' >> $@ ++ @echo 'CFLAGS_REL = -O2 -DNDEBUG /Z7 /MD /Gy' >> $@ ++ @echo 'CFLAGS_DBG = -Od -DDEBUG /Z7 /MDd' >> $@ ++ @echo 'LINKFLAGS = -nologo -incremental:no -debug' >> $@ ++ @echo 'CFLAGS = $$(CFLAGS_REL) -nologo -D_USE_MATH_DEFINES $$(FEAT_FLAGS) $$(INCLUDES) $$(D)' >> $@ ++ @echo 'AFLAGS = -f win64 $$(FEAT_FLAGS) $$(INCLUDES) $$(D)' >> $@ ++ @echo 'CC = cl' >> $@ ++ @echo '# or CC = icl -Qstd=c99' >> $@ ++ @echo 'AS = nasm' >> $@ + @echo '' >> $@ + @echo 'lib: bin static dll' >> $@ + @echo 'static: bin isa-l_static.lib' >> $@ +@@ -30,7 +35,7 @@ Makefile.nmake: FORCE + @echo '<<' >> $@ + @echo '' >> $@ + @echo 'isa-l.dll: $$(objs)' >> $@ +- @echo ' link -out:$$@ -dll -def:isa-l.def @<<' >> $@ ++ @echo ' link -out:$$@ -dll -def:isa-l.def $$(LINKFLAGS) @<<' >> $@ + @echo '$$?' >> $@ + @echo '<<' >> $@ + @echo '' >> $@ +@@ -90,8 +95,10 @@ endif + @echo ' -if exist bin\*.obj del bin\*.obj' >> $@ + @echo ' -if exist isa-l_static.lib del isa-l_static.lib' >> $@ + @echo ' -if exist *.exe del *.exe' >> $@ ++ @echo ' -if exist *.pdb del *.pdb' >> $@ + @echo ' -if exist isa-l.lib del isa-l.lib' >> $@ + @echo ' -if exist isa-l.dll del isa-l.dll' >> $@ ++ @echo ' -if exist isa-l.exp del isa-l.exp' >> $@ + @echo '' >> $@ + $(if $(findstring igzip,$(units)),@echo 'zlib.lib:' >> $@ ) + @cat $(foreach unit,$(units), $(unit)/Makefile.am) | sed \ +-- +2.20.1.windows.1 + + +From 0033f4218936756441124dde36f2491c6b04c496 Mon Sep 17 00:00:00 2001 +From: Jerry Yu +Date: Thu, 2 Apr 2020 22:05:29 +0800 +Subject: [PATCH 11/42] crc32:Optimize crc32/c for cortex-a72 + +Change-Id: Ib1658fd4b87b31d8ea6c93f697b50d9b409c186e +Signed-off-by: Jerry Yu +--- + crc/aarch64/Makefile.am | 4 +- + crc/aarch64/crc32_common_crc_ext_cortex_a72.S | 135 ++++++++++++++++++ + crc/aarch64/crc32_crc_ext_cortex_a72.S | 69 +++++++++ + crc/aarch64/crc32c_crc_ext_cortex_a72.S | 68 +++++++++ + crc/aarch64/crc_aarch64_dispatcher.c | 13 ++ + 5 files changed, 288 insertions(+), 1 deletion(-) + create mode 100644 crc/aarch64/crc32_common_crc_ext_cortex_a72.S + create mode 100644 crc/aarch64/crc32_crc_ext_cortex_a72.S + create mode 100644 crc/aarch64/crc32c_crc_ext_cortex_a72.S + +diff --git a/src/isa-l/crc/aarch64/Makefile.am b/src/isa-l/crc/aarch64/Makefile.am +index 9fbb019..d99e12b 100644 +--- a/src/isa-l/crc/aarch64/Makefile.am ++++ b/src/isa-l/crc/aarch64/Makefile.am +@@ -46,4 +46,6 @@ lsrc_aarch64 += \ + crc/aarch64/crc64_jones_refl_pmull.S \ + crc/aarch64/crc64_jones_norm_pmull.S \ + crc/aarch64/crc32_mix_neoverse_n1.S \ +- crc/aarch64/crc32c_mix_neoverse_n1.S ++ crc/aarch64/crc32c_mix_neoverse_n1.S \ ++ crc/aarch64/crc32_crc_ext_cortex_a72.S \ ++ crc/aarch64/crc32c_crc_ext_cortex_a72.S +diff --git a/src/isa-l/crc/aarch64/crc32_common_crc_ext_cortex_a72.S b/src/isa-l/crc/aarch64/crc32_common_crc_ext_cortex_a72.S +new file mode 100644 +index 0000000..7c9ca35 +--- /dev/null ++++ b/src/isa-l/crc/aarch64/crc32_common_crc_ext_cortex_a72.S +@@ -0,0 +1,135 @@ ++/********************************************************************** ++ Copyright(c) 2020 Arm Corporation All rights reserved. ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in ++ the documentation and/or other materials provided with the ++ distribution. ++ * Neither the name of Arm Corporation nor the names of its ++ contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++**********************************************************************/ ++ ++ ++ ++ ++.macro crc32_hw_common poly_type ++ cbz LEN, .zero_length_ret ++.ifc \poly_type,crc32 ++ mvn wCRC,wCRC ++.endif ++ tbz BUF, 0, .align_short ++ ldrb wdata,[BUF],1 ++ sub LEN,LEN,1 ++ crc32_u8 wCRC,wCRC,wdata ++.align_short: ++ tst BUF,2 ++ ccmp LEN,1,0,ne ++ bhi .align_short_2 ++ tst BUF,4 ++ ccmp LEN,3,0,ne ++ bhi .align_word ++ ++.align_finish: ++ ++ cmp LEN, 63 ++ bls .loop_16B ++.loop_64B: ++ ldp data0, data1, [BUF],#16 ++ sub LEN,LEN,#64 ++ ldp data2, data3, [BUF],#16 ++ cmp LEN,#64 ++ crc32_u64 wCRC, wCRC, data0 ++ crc32_u64 wCRC, wCRC, data1 ++ ldp data0, data1, [BUF],#16 ++ crc32_u64 wCRC, wCRC, data2 ++ crc32_u64 wCRC, wCRC, data3 ++ ldp data2, data3, [BUF],#16 ++ crc32_u64 wCRC, wCRC, data0 ++ crc32_u64 wCRC, wCRC, data1 ++ crc32_u64 wCRC, wCRC, data2 ++ crc32_u64 wCRC, wCRC, data3 ++ bge .loop_64B ++ ++.loop_16B: ++ cmp LEN, 15 ++ bls .less_16B ++ ldp data0, data1, [BUF],#16 ++ sub LEN,LEN,#16 ++ cmp LEN,15 ++ crc32_u64 wCRC, wCRC, data0 ++ crc32_u64 wCRC, wCRC, data1 ++ bls .less_16B ++ ldp data0, data1, [BUF],#16 ++ sub LEN,LEN,#16 ++ cmp LEN,15 ++ crc32_u64 wCRC, wCRC, data0 ++ crc32_u64 wCRC, wCRC, data1 ++ bls .less_16B ++ ldp data0, data1, [BUF],#16 ++ sub LEN,LEN,#16 //MUST less than 16B ++ crc32_u64 wCRC, wCRC, data0 ++ crc32_u64 wCRC, wCRC, data1 ++.less_16B: ++ cmp LEN, 7 ++ bls .less_8B ++ ldr data0, [BUF], 8 ++ sub LEN, LEN, #8 ++ crc32_u64 wCRC, wCRC, data0 ++.less_8B: ++ cmp LEN, 3 ++ bls .less_4B ++ ldr wdata, [BUF], 4 ++ sub LEN, LEN, #4 ++ crc32_u32 wCRC, wCRC, wdata ++.less_4B: ++ cmp LEN, 1 ++ bls .less_2B ++ ldrh wdata, [BUF], 2 ++ sub LEN, LEN, #2 ++ crc32_u16 wCRC, wCRC, wdata ++.less_2B: ++ cbz LEN, .finish_exit ++ ldrb wdata, [BUF] ++ crc32_u8 wCRC, wCRC, wdata ++.finish_exit: ++.ifc \poly_type,crc32 ++ mvn w0, wCRC ++.else ++ mov w0, wCRC ++.endif ++ ret ++.zero_length_ret: ++ mov w0, wCRC ++ ret ++.align_short_2: ++ ldrh wdata, [BUF], 2 ++ sub LEN, LEN, 2 ++ tst BUF, 4 ++ crc32_u16 wCRC, wCRC, wdata ++ ccmp LEN, 3, 0, ne ++ bls .align_finish ++.align_word: ++ ldr wdata, [BUF], 4 ++ sub LEN, LEN, #4 ++ crc32_u32 wCRC, wCRC, wdata ++ b .align_finish ++ ++.endm +diff --git a/src/isa-l/crc/aarch64/crc32_crc_ext_cortex_a72.S b/src/isa-l/crc/aarch64/crc32_crc_ext_cortex_a72.S +new file mode 100644 +index 0000000..4335bf2 +--- /dev/null ++++ b/src/isa-l/crc/aarch64/crc32_crc_ext_cortex_a72.S +@@ -0,0 +1,69 @@ ++/********************************************************************** ++ Copyright(c) 2020 Arm Corporation All rights reserved. ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in ++ the documentation and/or other materials provided with the ++ distribution. ++ * Neither the name of Arm Corporation nor the names of its ++ contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++**********************************************************************/ ++ ++ .text ++ .align 6 ++ .arch armv8-a+crc ++ ++ ++#include "crc32_common_crc_ext_cortex_a72.S" ++ ++ BUF .req x1 ++ LEN .req x2 ++ wCRC .req w0 ++ data0 .req x4 ++ data1 .req x5 ++ data2 .req x6 ++ data3 .req x7 ++ wdata .req w3 ++.macro crc32_u64 dst,src,data ++ crc32x \dst,\src,\data ++.endm ++.macro crc32_u32 dst,src,data ++ crc32w \dst,\src,\data ++.endm ++.macro crc32_u16 dst,src,data ++ crc32h \dst,\src,\data ++.endm ++.macro crc32_u8 dst,src,data ++ crc32b \dst,\src,\data ++.endm ++ ++ /** ++ * uint32_t crc32_crc_ext_cortex_a72( ++ * uint32_t init_crc, ++ * const unsigned char *buf, ++ * uint64_t len); ++ */ ++ .global crc32_crc_ext_cortex_a72 ++ .type crc32_crc_ext_cortex_a72, %function ++crc32_crc_ext_cortex_a72: ++ crc32_hw_common crc32 ++ ret ++ .size crc32_crc_ext_cortex_a72, .-crc32_crc_ext_cortex_a72 +diff --git a/src/isa-l/crc/aarch64/crc32c_crc_ext_cortex_a72.S b/src/isa-l/crc/aarch64/crc32c_crc_ext_cortex_a72.S +new file mode 100644 +index 0000000..64ccf69 +--- /dev/null ++++ b/src/isa-l/crc/aarch64/crc32c_crc_ext_cortex_a72.S +@@ -0,0 +1,68 @@ ++/********************************************************************** ++ Copyright(c) 2020 Arm Corporation All rights reserved. ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in ++ the documentation and/or other materials provided with the ++ distribution. ++ * Neither the name of Arm Corporation nor the names of its ++ contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++**********************************************************************/ ++ ++ .text ++ .align 6 ++ .arch armv8-a+crc ++ ++ ++#include "crc32_common_crc_ext_cortex_a72.S" ++ BUF .req x0 ++ LEN .req x1 ++ wCRC .req w2 ++ data0 .req x4 ++ data1 .req x5 ++ data2 .req x6 ++ data3 .req x7 ++ wdata .req w3 ++.macro crc32_u64 dst,src,data ++ crc32cx \dst,\src,\data ++.endm ++.macro crc32_u32 dst,src,data ++ crc32cw \dst,\src,\data ++.endm ++.macro crc32_u16 dst,src,data ++ crc32ch \dst,\src,\data ++.endm ++.macro crc32_u8 dst,src,data ++ crc32cb \dst,\src,\data ++.endm ++ ++ /** ++ * uint32_t crc32c_crc_ext_cortex_a72( ++ * unsigned char const *buffer, ++ * size_t len, ++ * uint crc32 ) ++ */ ++ .global crc32c_crc_ext_cortex_a72 ++ .type crc32c_crc_ext_cortex_a72, %function ++crc32c_crc_ext_cortex_a72: ++ crc32_hw_common crc32c ++ ret ++ .size crc32c_crc_ext_cortex_a72, .-crc32c_crc_ext_cortex_a72 +diff --git a/src/isa-l/crc/aarch64/crc_aarch64_dispatcher.c b/src/isa-l/crc/aarch64/crc_aarch64_dispatcher.c +index 2df0f28..3fada02 100644 +--- a/src/isa-l/crc/aarch64/crc_aarch64_dispatcher.c ++++ b/src/isa-l/crc/aarch64/crc_aarch64_dispatcher.c +@@ -62,6 +62,12 @@ DEFINE_INTERFACE_DISPATCHER(crc32_ieee) + DEFINE_INTERFACE_DISPATCHER(crc32_iscsi) + { + unsigned long auxval = getauxval(AT_HWCAP); ++ if (auxval & HWCAP_CRC32) { ++ switch (get_micro_arch_id()) { ++ case MICRO_ARCH_ID(ARM, CORTEX_A72): ++ return PROVIDER_INFO(crc32c_crc_ext_cortex_a72); ++ } ++ } + if ((HWCAP_CRC32 | HWCAP_PMULL) == (auxval & (HWCAP_CRC32 | HWCAP_PMULL))) { + switch (get_micro_arch_id()) { + case MICRO_ARCH_ID(ARM, NEOVERSE_N1): +@@ -80,6 +86,13 @@ DEFINE_INTERFACE_DISPATCHER(crc32_iscsi) + DEFINE_INTERFACE_DISPATCHER(crc32_gzip_refl) + { + unsigned long auxval = getauxval(AT_HWCAP); ++ ++ if (auxval & HWCAP_CRC32) { ++ switch (get_micro_arch_id()) { ++ case MICRO_ARCH_ID(ARM, CORTEX_A72): ++ return PROVIDER_INFO(crc32_crc_ext_cortex_a72); ++ } ++ } + if ((HWCAP_CRC32 | HWCAP_PMULL) == (auxval & (HWCAP_CRC32 | HWCAP_PMULL))) { + switch (get_micro_arch_id()) { + case MICRO_ARCH_ID(ARM, NEOVERSE_N1): +-- +2.20.1.windows.1 + + +From 9bcd6768fd907b2172330d4897c8330fb12ea02e Mon Sep 17 00:00:00 2001 +From: Jerry Yu +Date: Fri, 3 Apr 2020 16:52:47 +0800 +Subject: [PATCH 12/42] crc32:Adjust hardware folding algorithm flags + +Hardware folding algorithm depend on CRC32 and PMULL instruction. +And it should match both flags . + +Change-Id: I361068402db1fe6d7c0bd8d2c7048f1d94880233 +Signed-off-by: Jerry Yu +--- + crc/aarch64/crc_aarch64_dispatcher.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/src/isa-l/crc/aarch64/crc_aarch64_dispatcher.c b/src/isa-l/crc/aarch64/crc_aarch64_dispatcher.c +index 3fada02..1df2833 100644 +--- a/src/isa-l/crc/aarch64/crc_aarch64_dispatcher.c ++++ b/src/isa-l/crc/aarch64/crc_aarch64_dispatcher.c +@@ -73,9 +73,9 @@ DEFINE_INTERFACE_DISPATCHER(crc32_iscsi) + case MICRO_ARCH_ID(ARM, NEOVERSE_N1): + return PROVIDER_INFO(crc32c_mix_neoverse_n1); + } +- } +- if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(crc32_iscsi_refl_hw_fold); ++ } ++ + if (auxval & HWCAP_PMULL) { + return PROVIDER_INFO(crc32_iscsi_refl_pmull); + } +@@ -98,9 +98,9 @@ DEFINE_INTERFACE_DISPATCHER(crc32_gzip_refl) + case MICRO_ARCH_ID(ARM, NEOVERSE_N1): + return PROVIDER_INFO(crc32_mix_neoverse_n1); + } +- } +- if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(crc32_gzip_refl_hw_fold); ++ } ++ + if (auxval & HWCAP_PMULL) + return PROVIDER_INFO(crc32_gzip_refl_pmull); + +-- +2.20.1.windows.1 + + +From 92fc8733fabd6be625c57e2ce441fd7851dff38a Mon Sep 17 00:00:00 2001 +From: Jerry Yu +Date: Fri, 3 Apr 2020 12:31:31 +0800 +Subject: [PATCH 13/42] crc32: Fix prototype mismatch bug + +Change-Id: I7c8a2348441f32a43ff386122612405e418d9947 +Signed-off-by: Jerry Yu +--- + crc/aarch64/crc32_common_mix_neoverse_n1.S | 12 ++++-------- + crc/aarch64/crc32_mix_neoverse_n1.S | 8 ++++++-- + crc/aarch64/crc32c_mix_neoverse_n1.S | 6 +++++- + 3 files changed, 15 insertions(+), 11 deletions(-) + +diff --git a/src/isa-l/crc/aarch64/crc32_common_mix_neoverse_n1.S b/src/isa-l/crc/aarch64/crc32_common_mix_neoverse_n1.S +index 91b331d..c8524a3 100644 +--- a/src/isa-l/crc/aarch64/crc32_common_mix_neoverse_n1.S ++++ b/src/isa-l/crc/aarch64/crc32_common_mix_neoverse_n1.S +@@ -56,18 +56,14 @@ + + vzr .req v26 + +- BUF .req x0 +- LEN .req x1 +- CRC .req x2 +- wCRC .req w2 + const_addr .req x3 + crc_blk_ptr .req x4 + pmull_blk_ptr .req x5 + crc_data0 .req x6 + crc_data1 .req x7 +- crc_data2 .req x19 +- crc_data3 .req x20 +- wPmull .req w21 ++ crc_data2 .req x9 ++ crc_data3 .req x10 ++ wPmull .req w11 + + data0 .req x4 + data1 .req x5 +@@ -383,7 +379,7 @@ start_final: + bge .loop_64B + + .loop_16B: +- cmp x1, 15 ++ cmp LEN, 15 + bls .less_16B + ldp data0, data1, [BUF],#16 + sub LEN,LEN,#16 +diff --git a/src/isa-l/crc/aarch64/crc32_mix_neoverse_n1.S b/src/isa-l/crc/aarch64/crc32_mix_neoverse_n1.S +index fa29770..2713e62 100644 +--- a/src/isa-l/crc/aarch64/crc32_mix_neoverse_n1.S ++++ b/src/isa-l/crc/aarch64/crc32_mix_neoverse_n1.S +@@ -54,9 +54,13 @@ + + + /** +- * uint32_t crc32_mix_neoverse_n1(uint32_t * BUF, +- * size_t LEN, uint CRC) ++ * uint32_t crc32_mix_neoverse_n1(uint CRC ,uint8_t * BUF, ++ * size_t LEN) + */ ++ BUF .req x1 ++ LEN .req x2 ++ CRC .req x0 ++ wCRC .req w0 + .align 6 + .global crc32_mix_neoverse_n1 + .type crc32_mix_neoverse_n1, %function +diff --git a/src/isa-l/crc/aarch64/crc32c_mix_neoverse_n1.S b/src/isa-l/crc/aarch64/crc32c_mix_neoverse_n1.S +index 6982b39..c1b3835 100644 +--- a/src/isa-l/crc/aarch64/crc32c_mix_neoverse_n1.S ++++ b/src/isa-l/crc/aarch64/crc32c_mix_neoverse_n1.S +@@ -53,9 +53,13 @@ + crc32cb \dst,\src,\data + .endm + /** +- * uint32_t crc32c_mix_neoverse_n1(uint32_t * BUF, ++ * uint32_t crc32c_mix_neoverse_n1(uint8_t * BUF, + * size_t LEN, uint CRC) + */ ++ BUF .req x0 ++ LEN .req x1 ++ CRC .req x2 ++ wCRC .req w2 + .align 6 + .global crc32c_mix_neoverse_n1 + .type crc32c_mix_neoverse_n1, %function +-- +2.20.1.windows.1 + + +From 6c4d3dbf6cb994addf4233c7e8918b94db7fbd65 Mon Sep 17 00:00:00 2001 +From: Jerry Yu +Date: Tue, 7 Apr 2020 16:31:18 +0800 +Subject: [PATCH 14/42] crc32:NeoverseN1: Change CRC32/PMULL order to PMULL + first + +To reduce the cache missing events, the mix layout is changed +to PMULL+CRC. It also relaxes the final delay caused by data +dependency. +As results, the cold perf was improved about 20% and warm perf +was improved about 4%. + +Change-Id: I7756f846edcb4f1665b4643a5a0e02283938cfdf +Signed-off-by: Jerry Yu +--- + crc/aarch64/crc32_common_mix_neoverse_n1.S | 32 ++++++++++++---------- + crc/aarch64/crc32_mix_neoverse_n1.S | 2 +- + crc/aarch64/crc32c_mix_neoverse_n1.S | 2 +- + 3 files changed, 19 insertions(+), 17 deletions(-) + +diff --git a/src/isa-l/crc/aarch64/crc32_common_mix_neoverse_n1.S b/src/isa-l/crc/aarch64/crc32_common_mix_neoverse_n1.S +index c8524a3..4911a30 100644 +--- a/src/isa-l/crc/aarch64/crc32_common_mix_neoverse_n1.S ++++ b/src/isa-l/crc/aarch64/crc32_common_mix_neoverse_n1.S +@@ -64,6 +64,7 @@ + crc_data2 .req x9 + crc_data3 .req x10 + wPmull .req w11 ++ xPmull .req x11 + + data0 .req x4 + data1 .req x5 +@@ -117,27 +118,29 @@ + + .macro crc32_common_mix poly_type + .set MIX_BLK_SIZE,2048 +- add pmull_blk_ptr,BUF,MIX_BLK_SIZE-512 ++ + .ifc \poly_type,crc32 + mvn wCRC,wCRC + .endif + cmp LEN,MIX_BLK_SIZE-1 +- mov pmull_blk_ptr,BUF +- bls start_final + adr const_addr, .Lconstants ++ bls start_final + ld1 {k1k2_v.16b,k3k4_v.16b,poly_v.16b},[const_addr],#48 +- mov crc_blk_ptr,BUF + movi vzr.16b, #0 + ld1 {k5k0_v.8b,mask_v.8b,fold_poly_v.8b},[const_addr] + + loop_2048: +- mov crc_blk_ptr,BUF ++ ld1 {tmp0_v.16b-tmp3_v.16b}, [BUF] ++ add pmull_blk_ptr,BUF,0x40 ++ add crc_blk_ptr, BUF,512 ++ mov tmp4_v.16b,vzr.16b ++ fmov tmp4_s, wCRC + ldp crc_data0,crc_data1,[crc_blk_ptr],16 +- add pmull_blk_ptr,pmull_blk_ptr,MIX_BLK_SIZE-512 +- ldp crc_data2,crc_data3,[crc_blk_ptr],16 +- ld1 {tmp0_v.16b-tmp3_v.16b}, [pmull_blk_ptr], #0x40 ++ eor tmp0_v.16b,tmp0_v.16b,tmp4_v.16b ++ mov wCRC, 0 + sub LEN,LEN,MIX_BLK_SIZE + cmp LEN,MIX_BLK_SIZE ++ ldp crc_data2,crc_data3,[crc_blk_ptr],16 + crc32_u64 wCRC,wCRC,crc_data0 + crc32_u64 wCRC,wCRC,crc_data1 + ldp crc_data0,crc_data1,[crc_blk_ptr],16 +@@ -324,7 +327,8 @@ loop_2048: + eor tmp0_v.16b, tmp0_v.16b, tmp1_v.16b + crc32_u64 wCRC,wCRC,crc_data2 + crc32_u64 wCRC,wCRC,crc_data3 +- mov wPmull, tmp0_v.s[1] ++ mov tmp4_v.16b,vzr.16b ++ mov tmp4_v.s[0], tmp0_v.s[1] + ldp crc_data2,crc_data3,[crc_blk_ptr],16 + crc32_u64 wCRC,wCRC,crc_data0 + crc32_u64 wCRC,wCRC,crc_data1 +@@ -350,13 +354,11 @@ loop_2048: + crc32_u64 wCRC,wCRC,crc_data2 + crc32_u64 wCRC,wCRC,crc_data3 + +- fmov d0, CRC +- mov w6, 0 +- pmull v0.1q, v0.1d, fold_poly_v.1d +- fmov CRC, d0 ++ pmull tmp4_v.1q, tmp4_v.1d, fold_poly_v.1d + add BUF,BUF,MIX_BLK_SIZE +- crc32_u64 w6, w6, CRC +- eor wCRC, w6, wPmull ++ fmov xPmull, tmp4_d ++ crc32_u64 wPmull, wzr, xPmull ++ eor wCRC, wPmull, wCRC + bge loop_2048 + start_final: + cmp LEN, 63 +diff --git a/src/isa-l/crc/aarch64/crc32_mix_neoverse_n1.S b/src/isa-l/crc/aarch64/crc32_mix_neoverse_n1.S +index 2713e62..62b40e1 100644 +--- a/src/isa-l/crc/aarch64/crc32_mix_neoverse_n1.S ++++ b/src/isa-l/crc/aarch64/crc32_mix_neoverse_n1.S +@@ -38,7 +38,7 @@ + .octa 0x00000001F701164100000001DB710641 + .quad 0x0000000163cd6124 + .quad 0x00000000FFFFFFFF +- .quad 0x000000000c30f51d ++ .quad 0x000000001753ab84 + .macro crc32_u64 dst,src,data + crc32x \dst,\src,\data + .endm +diff --git a/src/isa-l/crc/aarch64/crc32c_mix_neoverse_n1.S b/src/isa-l/crc/aarch64/crc32c_mix_neoverse_n1.S +index c1b3835..a98511a 100644 +--- a/src/isa-l/crc/aarch64/crc32c_mix_neoverse_n1.S ++++ b/src/isa-l/crc/aarch64/crc32c_mix_neoverse_n1.S +@@ -38,7 +38,7 @@ + .octa 0x00000000dea713f10000000105ec76f0 + .quad 0x00000000dd45aab8 + .quad 0x00000000FFFFFFFF +- .quad 0x00000000dd7e3b0c ++ .quad 0x000000009ef68d35 + + .macro crc32_u64 dst,src,data + crc32cx \dst,\src,\data +-- +2.20.1.windows.1 + + +From 031450f6977243159dd227c0505cbed0c1666270 Mon Sep 17 00:00:00 2001 +From: Zhiyuan Zhu +Date: Sun, 26 Apr 2020 13:26:05 +0000 +Subject: [PATCH 15/42] crc32: Implement default mix mode optimization + +Change-Id: Ib3bf04215cca491db522ec33905fe48df173cc2f +Signed-off-by: Zhiyuan Zhu +--- + crc/aarch64/Makefile.am | 2 + + crc/aarch64/crc32_mix_default.S | 107 +++++ + crc/aarch64/crc32_mix_default_common.S | 563 +++++++++++++++++++++++++ + crc/aarch64/crc32c_mix_default.S | 109 +++++ + crc/aarch64/crc_aarch64_dispatcher.c | 4 +- + 5 files changed, 783 insertions(+), 2 deletions(-) + create mode 100644 crc/aarch64/crc32_mix_default.S + create mode 100644 crc/aarch64/crc32_mix_default_common.S + create mode 100644 crc/aarch64/crc32c_mix_default.S + +diff --git a/src/isa-l/crc/aarch64/Makefile.am b/src/isa-l/crc/aarch64/Makefile.am +index d99e12b..a43ca30 100644 +--- a/src/isa-l/crc/aarch64/Makefile.am ++++ b/src/isa-l/crc/aarch64/Makefile.am +@@ -45,6 +45,8 @@ lsrc_aarch64 += \ + crc/aarch64/crc64_iso_norm_pmull.S \ + crc/aarch64/crc64_jones_refl_pmull.S \ + crc/aarch64/crc64_jones_norm_pmull.S \ ++ crc/aarch64/crc32_mix_default.S \ ++ crc/aarch64/crc32c_mix_default.S \ + crc/aarch64/crc32_mix_neoverse_n1.S \ + crc/aarch64/crc32c_mix_neoverse_n1.S \ + crc/aarch64/crc32_crc_ext_cortex_a72.S \ +diff --git a/src/isa-l/crc/aarch64/crc32_mix_default.S b/src/isa-l/crc/aarch64/crc32_mix_default.S +new file mode 100644 +index 0000000..05c3407 +--- /dev/null ++++ b/src/isa-l/crc/aarch64/crc32_mix_default.S +@@ -0,0 +1,107 @@ ++/********************************************************************** ++ Copyright(c) 2020 Arm Corporation All rights reserved. ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in ++ the documentation and/or other materials provided with the ++ distribution. ++ * Neither the name of Arm Corporation nor the names of its ++ contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++**********************************************************************/ ++ ++ .arch armv8-a+crypto+crc ++ .text ++ .align 6 ++ ++#define CRC32 ++ ++.macro crc32_u64 dst,src,data ++ crc32x \dst,\src,\data ++.endm ++ ++.macro crc32_u32 dst,src,data ++ crc32w \dst,\src,\data ++.endm ++ ++.macro crc32_u16 dst,src,data ++ crc32h \dst,\src,\data ++.endm ++ ++.macro crc32_u8 dst,src,data ++ crc32b \dst,\src,\data ++.endm ++ ++#include "crc32_mix_default_common.S" ++ ++ .global crc32_mix_default ++ .type crc32_mix_default, %function ++crc32_mix_default: ++ crc32_mix_main_default ++ .size crc32_mix_default, .-crc32_mix_default ++ ++ .section .rodata ++ .align 4 ++ .set lanchor_crc32,. + 0 ++ ++ .type k1k2, %object ++ .size k1k2, 16 ++k1k2: ++ .xword 0x0154442bd4 ++ .xword 0x01c6e41596 ++ ++ .type k3k4, %object ++ .size k3k4, 16 ++k3k4: ++ .xword 0x01751997d0 ++ .xword 0x00ccaa009e ++ ++ .type k5k0, %object ++ .size k5k0, 16 ++k5k0: ++ .xword 0x0163cd6124 ++ .xword 0 ++ ++ .type poly, %object ++ .size poly, 16 ++poly: ++ .xword 0x01db710641 ++ .xword 0x01f7011641 ++ ++ .type crc32_const, %object ++ .size crc32_const, 48 ++crc32_const: ++ .xword 0x1753ab84 ++ .xword 0 ++ .xword 0xbbf2f6d6 ++ .xword 0 ++ .xword 0x0c30f51d ++ .xword 0 ++ ++ .align 4 ++ .set .lanchor_mask,. + 0 ++ ++ .type mask, %object ++ .size mask, 16 ++mask: ++ .word -1 ++ .word 0 ++ .word -1 ++ .word 0 +diff --git a/src/isa-l/crc/aarch64/crc32_mix_default_common.S b/src/isa-l/crc/aarch64/crc32_mix_default_common.S +new file mode 100644 +index 0000000..106da20 +--- /dev/null ++++ b/src/isa-l/crc/aarch64/crc32_mix_default_common.S +@@ -0,0 +1,563 @@ ++/********************************************************************** ++ Copyright(c) 2020 Arm Corporation All rights reserved. ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in ++ the documentation and/or other materials provided with the ++ distribution. ++ * Neither the name of Arm Corporation nor the names of its ++ contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++**********************************************************************/ ++ ++.macro declare_generic_reg name:req, reg:req, default:req ++ \name .req \default\reg ++ w_\name .req w\reg ++ x_\name .req x\reg ++.endm ++ ++.macro declare_neon_reg name:req, reg:req, default:req ++ \name .req \default\reg ++ v_\name .req v\reg ++ q_\name .req q\reg ++ d_\name .req d\reg ++ s_\name .req s\reg ++.endm ++ ++/********************************************************************** ++ variables ++**********************************************************************/ ++ declare_generic_reg crc, 0,w ++ declare_generic_reg buf, 1,x ++ declare_generic_reg len, 2,x ++ declare_generic_reg buf_saved, 3,x ++ declare_generic_reg buf_iter, 4,x ++ declare_generic_reg len_saved, 5,x ++ declare_generic_reg buf_tmp, 6,x ++ ++ declare_generic_reg crc0, 7,x ++ declare_generic_reg crc1, 8,x ++ declare_generic_reg crc2, 9,x ++ declare_generic_reg pconst, 10,x ++ declare_generic_reg data_crc0, 11,x ++ declare_generic_reg data_crc1, 12,x ++ declare_generic_reg data_crc2, 13,x ++ ++ declare_generic_reg size, 9,x ++ declare_generic_reg crc_tmp, 10,w ++ declare_generic_reg size_tmp, 11,x ++ declare_generic_reg data_tmp1, 11,x ++ declare_generic_reg data_tmp2, 12,x ++ declare_generic_reg data_tmp3, 13,x ++ ++ declare_generic_reg tmp, 14,x ++ declare_generic_reg tmp1, 15,x ++ ++// return ++ declare_generic_reg ret_crc, 0,w ++ ++/********************************************************************** ++ simd variables ++**********************************************************************/ ++ declare_neon_reg a0, 0,v ++ declare_neon_reg a1, 1,v ++ declare_neon_reg a2, 2,v ++ declare_neon_reg a3, 3,v ++ declare_neon_reg a4, 4,v ++ ++ declare_neon_reg a5, 16,v ++ declare_neon_reg a6, 17,v ++ declare_neon_reg a7, 18,v ++ declare_neon_reg a8, 19,v ++ ++ declare_neon_reg y5, 20,v ++ declare_neon_reg y6, 21,v ++ declare_neon_reg y7, 22,v ++ declare_neon_reg y8, 23,v ++ ++ declare_neon_reg neon_zero, 24,v ++ declare_neon_reg neon_tmp, 24,v ++ ++ declare_neon_reg k5k0, 25,v ++ declare_neon_reg neon_tmp1, 26,v ++ declare_neon_reg neon_tmp2, 27,v ++ declare_neon_reg neon_tmp3, 28,v ++ ++ declare_neon_reg crc_pmull, 29,v ++ declare_neon_reg neon_crc0, 30,v ++ declare_neon_reg neon_crc1, 31,v ++ ++ declare_neon_reg neon_const0, 5,v ++ declare_neon_reg neon_const1, 6,v ++ declare_neon_reg neon_const2, 7,v ++ ++// constants ++ .equ offset_k3k4, 16 ++ .equ offset_k5k0, 32 ++ .equ offset_poly, 48 ++ .equ offset_crc32_const, 64 ++ ++// pmull fold ++.macro pmull_fold ++ ldr x_data_crc0, [x_buf_tmp, 464] ++ ldr x_data_crc1, [x_buf_tmp, 976] ++ ldr x_data_crc2, [x_buf_tmp, 1488] ++ ++ pmull v_a5.1q, v_a1.1d, v_a0.1d ++ crc32_u64 w_crc0, w_crc0, x_data_crc0 ++ crc32_u64 w_crc1, w_crc1, x_data_crc1 ++ crc32_u64 w_crc2, w_crc2, x_data_crc2 ++ ++ ldr x_data_crc0, [x_buf_tmp, 472] ++ ldr x_data_crc1, [x_buf_tmp, 984] ++ ldr x_data_crc2, [x_buf_tmp, 1496] ++ ++ pmull v_a6.1q, v_a2.1d, v_a0.1d ++ crc32_u64 w_crc0, w_crc0, x_data_crc0 ++ crc32_u64 w_crc1, w_crc1, x_data_crc1 ++ crc32_u64 w_crc2, w_crc2, x_data_crc2 ++ ++ ldr x_data_crc0, [x_buf_tmp, 480] ++ ldr x_data_crc1, [x_buf_tmp, 992] ++ ldr x_data_crc2, [x_buf_tmp, 1504] ++ ++ pmull v_a7.1q, v_a3.1d, v_a0.1d ++ crc32_u64 w_crc0, w_crc0, x_data_crc0 ++ crc32_u64 w_crc1, w_crc1, x_data_crc1 ++ crc32_u64 w_crc2, w_crc2, x_data_crc2 ++ ++ ldr x_data_crc0, [x_buf_tmp, 488] ++ ldr x_data_crc1, [x_buf_tmp, 1000] ++ ldr x_data_crc2, [x_buf_tmp, 1512] ++ ++ pmull v_a8.1q, v_a4.1d, v_a0.1d ++ crc32_u64 w_crc0, w_crc0, x_data_crc0 ++ crc32_u64 w_crc1, w_crc1, x_data_crc1 ++ crc32_u64 w_crc2, w_crc2, x_data_crc2 ++ ++ ldr x_data_crc0, [x_buf_tmp, 496] ++ ldr x_data_crc1, [x_buf_tmp, 1008] ++ ldr x_data_crc2, [x_buf_tmp, 1520] ++ ++ pmull2 v_a1.1q, v_a1.2d, v_a0.2d ++ crc32_u64 w_crc0, w_crc0, x_data_crc0 ++ crc32_u64 w_crc1, w_crc1, x_data_crc1 ++ crc32_u64 w_crc2, w_crc2, x_data_crc2 ++ ++ ld1 {v_y5.4s, v_y6.4s, v_y7.4s, v_y8.4s}, [x_buf_tmp] ++ ++ ldr x_data_crc0, [x_buf_tmp, 504] ++ ldr x_data_crc1, [x_buf_tmp, 1016] ++ ldr x_data_crc2, [x_buf_tmp, 1528] ++ ++ pmull2 v_a2.1q, v_a2.2d, v_a0.2d ++ crc32_u64 w_crc0, w_crc0, x_data_crc0 ++ crc32_u64 w_crc1, w_crc1, x_data_crc1 ++ crc32_u64 w_crc2, w_crc2, x_data_crc2 ++ ++ pmull2 v_a3.1q, v_a3.2d, v_a0.2d ++ pmull2 v_a4.1q, v_a4.2d, v_a0.2d ++ ++ eor v_y5.16b, v_y5.16b, v_a5.16b ++ eor v_y6.16b, v_y6.16b, v_a6.16b ++ eor v_y7.16b, v_y7.16b, v_a7.16b ++ eor v_y8.16b, v_y8.16b, v_a8.16b ++ ++ ldr x_data_crc0, [x_buf_tmp, 512] ++ ldr x_data_crc1, [x_buf_tmp, 1024] ++ ldr x_data_crc2, [x_buf_tmp, 1536] ++ ++ eor v_a1.16b, v_y5.16b, v_a1.16b ++ eor v_a2.16b, v_y6.16b, v_a2.16b ++ eor v_a3.16b, v_y7.16b, v_a3.16b ++ eor v_a4.16b, v_y8.16b, v_a4.16b ++ ++ crc32_u64 w_crc0, w_crc0, x_data_crc0 ++ crc32_u64 w_crc1, w_crc1, x_data_crc1 ++ crc32_u64 w_crc2, w_crc2, x_data_crc2 ++ ++ ldr x_data_crc0, [x_buf_tmp, 520] ++ ldr x_data_crc1, [x_buf_tmp, 1032] ++ ldr x_data_crc2, [x_buf_tmp, 1544] ++ ++ crc32_u64 w_crc0, w_crc0, x_data_crc0 ++ crc32_u64 w_crc1, w_crc1, x_data_crc1 ++ crc32_u64 w_crc2, w_crc2, x_data_crc2 ++.endm ++ ++// crc32 mix for 2048 byte input data ++.macro crc32_mix2048 ++ fmov s_a1, w_crc ++ movi v_neon_tmp.4s, 0 ++ ++ adrp x_pconst, lanchor_crc32 ++ add x_buf_tmp, x_buf, 64 ++ ++ ldr x_data_crc0, [x_buf, 512] ++ ldr x_data_crc1, [x_buf, 1024] ++ ldr x_data_crc2, [x_buf, 1536] ++ ++ crc32_u64 w_crc0, wzr, x_data_crc0 ++ crc32_u64 w_crc1, wzr, x_data_crc1 ++ crc32_u64 w_crc2, wzr, x_data_crc2 ++ ++#ifdef CRC32 ++ mvn v_a1.8b, v_a1.8b ++#endif ++ ++ ins v_neon_tmp.s[0], v_a1.s[0] ++ ++ ld1 {v_a1.4s, v_a2.4s, v_a3.4s, v_a4.4s}, [x_buf] ++ ++ ldr x_data_crc0, [x_buf, 520] ++ ldr x_data_crc1, [x_buf, 1032] ++ ldr x_data_crc2, [x_buf, 1544] ++ ++ eor v_a1.16b, v_a1.16b, v_neon_tmp.16b ++ ldr q_a0, [x_pconst, #:lo12:lanchor_crc32] // k1k2 ++ ++ crc32_u64 w_crc0, w_crc0, x_data_crc0 ++ crc32_u64 w_crc1, w_crc1, x_data_crc1 ++ crc32_u64 w_crc2, w_crc2, x_data_crc2 ++ ++// loop start, unroll the loop ++ .align 4 ++ pmull_fold ++ ++ add x_buf_tmp, x_buf_tmp, 64 ++ pmull_fold ++ ++ add x_buf_tmp, x_buf_tmp, 64 ++ pmull_fold ++ ++ add x_buf_tmp, x_buf_tmp, 64 ++ pmull_fold ++ ++ add x_buf_tmp, x_buf_tmp, 64 ++ pmull_fold ++ ++ add x_buf_tmp, x_buf_tmp, 64 ++ pmull_fold ++ ++ add x_buf_tmp, x_buf_tmp, 64 ++ pmull_fold ++// loop end ++ ++// PMULL: fold into 128-bits ++ add x_pconst, x_pconst, :lo12:lanchor_crc32 ++ ++ ldr x_data_crc0, [x_buf, 976] ++ ldr x_data_crc1, [x_buf, 1488] ++ ldr x_data_crc2, [x_buf, 2000] ++ ++ ldr q_a0, [x_pconst, offset_k3k4] // k3k4 ++ ++ crc32_u64 w_crc0, w_crc0, x_data_crc0 ++ crc32_u64 w_crc1, w_crc1, x_data_crc1 ++ crc32_u64 w_crc2, w_crc2, x_data_crc2 ++ ++ pmull v_a5.1q, v_a1.1d, v_a0.1d ++ pmull2 v_a1.1q, v_a1.2d, v_a0.2d ++ ++ eor v_a1.16b, v_a5.16b, v_a1.16b ++ eor v_a1.16b, v_a1.16b, v_a2.16b ++ ++ ldr x_data_crc0, [x_buf, 984] ++ ldr x_data_crc1, [x_buf, 1496] ++ ldr x_data_crc2, [x_buf, 2008] ++ ++ crc32_u64 w_crc0, w_crc0, x_data_crc0 ++ crc32_u64 w_crc1, w_crc1, x_data_crc1 ++ crc32_u64 w_crc2, w_crc2, x_data_crc2 ++ ++ pmull v_a5.1q, v_a1.1d, v_a0.1d ++ pmull2 v_a1.1q, v_a1.2d, v_a0.2d ++ ++ ldr x_data_crc0, [x_buf, 992] ++ ldr x_data_crc1, [x_buf, 1504] ++ ldr x_data_crc2, [x_buf, 2016] ++ ++ eor v_a1.16b, v_a5.16b, v_a1.16b ++ eor v_a1.16b, v_a1.16b, v_a3.16b ++ ++ crc32_u64 w_crc0, w_crc0, x_data_crc0 ++ crc32_u64 w_crc1, w_crc1, x_data_crc1 ++ crc32_u64 w_crc2, w_crc2, x_data_crc2 ++ ++ pmull v_a5.1q, v_a1.1d, v_a0.1d ++ pmull2 v_a1.1q, v_a1.2d, v_a0.2d ++ ++ ldr x_data_crc0, [x_buf, 1000] ++ ldr x_data_crc1, [x_buf, 1512] ++ ldr x_data_crc2, [x_buf, 2024] ++ ++ eor v_a1.16b, v_a5.16b, v_a1.16b ++ eor v_a1.16b, v_a1.16b, v_a4.16b ++ ++// PMULL: fold 128-bits to 64-bits ++ crc32_u64 w_crc0, w_crc0, x_data_crc0 ++ crc32_u64 w_crc1, w_crc1, x_data_crc1 ++ crc32_u64 w_crc2, w_crc2, x_data_crc2 ++ ++ dup d_a0, v_a0.d[1] ++ pmull v_a2.1q, v_a1.1d, v_a0.1d ++ ++ movi v_neon_zero.4s, 0 ++ ldr q_k5k0, [x_pconst, offset_k5k0] // k5k0 ++ adrp x_tmp, .lanchor_mask ++ ++ ldr x_data_crc0, [x_buf, 1008] ++ ldr x_data_crc1, [x_buf, 1520] ++ ldr x_data_crc2, [x_buf, 2032] ++ ++ ext v_a1.16b, v_a1.16b, v_neon_zero.16b, #8 ++ eor v_a1.16b, v_a2.16b, v_a1.16b ++ ldr q_neon_tmp3, [x_tmp, #:lo12:.lanchor_mask] ++ ++ crc32_u64 w_crc0, w_crc0, x_data_crc0 ++ crc32_u64 w_crc1, w_crc1, x_data_crc1 ++ crc32_u64 w_crc2, w_crc2, x_data_crc2 ++ ++ dup d_a0, v_k5k0.d[1] ++ pmull v_a3.1q, v_a2.1d, v_a0.1d ++ ++ ext v_a2.16b, v_a1.16b, v_neon_zero.16b, #4 ++ and v_a1.16b, v_a1.16b, v_neon_tmp3.16b ++ pmull v_a1.1q, v_a1.1d, v_k5k0.1d ++ eor v_a1.16b, v_a2.16b, v_a1.16b ++ ++// PMULL: barret reduce to 32-bits ++ ldr q_neon_tmp1, [x_pconst, offset_poly] // poly ++ ++ ldr x_data_crc0, [x_buf, 1016] ++ ldr x_data_crc1, [x_buf, 1528] ++ ldr x_data_crc2, [x_buf, 2040] ++ ++ dup d_neon_tmp2, v_neon_tmp1.d[1] ++ ++ crc32_u64 w_crc0, w_crc0, x_data_crc0 ++ crc32_u64 w_crc1, w_crc1, x_data_crc1 ++ crc32_u64 w_crc2, w_crc2, x_data_crc2 ++ ++ and v_a2.16b, v_a1.16b, v_neon_tmp3.16b ++ pmull v_a2.1q, v_a2.1d, v_neon_tmp2.1d ++ and v_a2.16b, v_neon_tmp3.16b, v_a2.16b ++ pmull v_a2.1q, v_a2.1d, v_neon_tmp1.1d ++ ++// crc_pmull result ++ eor v_a1.16b, v_a1.16b, v_a2.16b ++ dup s_crc_pmull, v_a1.s[1] ++ ++// merge crc_pmull, crc0, crc1, crc2 using pmull instruction ++ fmov s_neon_crc0, w_crc0 ++ fmov s_neon_crc1, w_crc1 ++ ++ ldr q_neon_const0, [x_pconst, offset_crc32_const] ++ ldr q_neon_const1, [x_pconst, offset_crc32_const+16] ++ ldr q_neon_const2, [x_pconst, offset_crc32_const+32] ++ ++ pmull v_crc_pmull.1q, v_crc_pmull.1d, v_neon_const0.1d ++ pmull v_neon_crc0.1q, v_neon_crc0.1d, v_neon_const1.1d ++ pmull v_neon_crc1.1q, v_neon_crc1.1d, v_neon_const2.1d ++ ++ fmov x_tmp1, d_neon_crc0 ++ crc32_u64 w_crc0, wzr, x_tmp1 ++ ++ fmov x_tmp1, d_neon_crc1 ++ crc32_u64 w_crc1, wzr, x_tmp1 ++ ++ eor w_ret_crc, w_crc1, w_crc0 ++ ++ fmov x_tmp1, d_crc_pmull ++ crc32_u64 w_tmp, wzr, x_tmp1 ++ ++ eor w_crc2, w_tmp, w_crc2 ++ ++// handle crc32/crc32c ++#ifdef CRC32 ++ eon w_ret_crc, w_crc2, w_ret_crc ++#else ++ eor w_ret_crc, w_crc2, w_ret_crc ++#endif ++.endm ++ ++// crc32 mix main default ++.macro crc32_mix_main_default ++ cmp x_len, 2047 ++ mov x_len_saved, x_len ++ mov x_buf_saved, x_buf ++ bls .less_than_2048 ++ ++ sub x_buf_iter, x_len, #2048 ++ stp x29, x30, [sp, -16]! ++ ++ mov x29, sp ++ and x_buf_iter, x_buf_iter, -2048 ++ add x_buf_iter, x_buf_iter, 2048 ++ add x_buf_iter, x_buf, x_buf_iter ++ ++ .align 4 ++.loop_mix: ++ mov x_buf, x_buf_saved ++ crc32_mix2048 ++ ++ add x_buf_saved, x_buf_saved, 2048 ++ cmp x_buf_saved, x_buf_iter ++ bne .loop_mix ++ ++ and x_len_saved, x_len_saved, 2047 ++ cbnz x_len_saved, .remain_ldp ++ ++ ldp x29, x30, [sp], 16 ++ ret ++ ++ .align 4 ++.remain_ldp: ++ mov w_crc_tmp, crc ++ ldp x29, x30, [sp], 16 ++ mov size, x_len_saved ++ mov buf, x_buf_iter ++ b .crc32_hw_handle ++ ++.remain: ++ mov w_crc_tmp, crc ++ mov size, x_len_saved ++ mov buf, x_buf_saved ++ b .crc32_hw_handle ++ ++ .align 4 ++.less_than_2048: ++ cbnz x_len, .remain ++ ret ++ ++.crc32_hw_handle: ++ cmp size, 63 ++ ++#ifdef CRC32 ++ mvn crc_tmp, crc_tmp ++#endif ++ ++ bls .less_than_64 ++ sub buf_saved, size, #64 ++ and buf_saved, buf_saved, -64 ++ add buf_saved, buf_saved, 64 ++ add buf_saved, buf, buf_saved ++ ++ .align 4 ++.loop_64: ++ ldp data_tmp1, data_tmp2, [buf] ++ ldr data_tmp3, [buf, 16] ++ crc32_u64 crc_tmp, crc_tmp, data_tmp1 ++ crc32_u64 crc_tmp, crc_tmp, data_tmp2 ++ ++ ldp data_tmp1, data_tmp2, [buf, 24] ++ add buf, buf, 64 ++ ++ crc32_u64 crc_tmp, crc_tmp, data_tmp3 ++ ldr data_tmp3, [buf, -24] ++ ++ crc32_u64 crc_tmp, crc_tmp, data_tmp1 ++ crc32_u64 crc_tmp, crc_tmp, data_tmp2 ++ ++ ldp data_tmp1, data_tmp2, [buf, -16] ++ cmp buf_saved, buf ++ crc32_u64 crc_tmp, crc_tmp, data_tmp3 ++ ++ crc32_u64 crc_tmp, crc_tmp, data_tmp1 ++ crc32_u64 crc_tmp, crc_tmp, data_tmp2 ++ bne .loop_64 ++ ++ and size, size, 63 ++.less_than_64: ++ cmp size, 7 ++ bls .crc32_hw_w ++ ++ ldr data_tmp2, [buf] ++ sub size_tmp, size, #8 ++ cmp size_tmp, 7 ++ crc32_u64 crc_tmp, crc_tmp, data_tmp2 ++ bls .crc32_hw_w_pre ++ ++ ldr data_tmp2, [buf, 8] ++ sub data_tmp3, size, #16 ++ cmp data_tmp3, 7 ++ crc32_u64 crc_tmp, crc_tmp, data_tmp2 ++ bls .crc32_hw_w_pre ++ ++ ldr data_tmp2, [buf, 16] ++ sub data_tmp3, size, #24 ++ cmp data_tmp3, 7 ++ crc32_u64 crc_tmp, crc_tmp, data_tmp2 ++ bls .crc32_hw_w_pre ++ ++ ldr data_tmp2, [buf, 24] ++ sub data_tmp3, size, #32 ++ cmp data_tmp3, 7 ++ crc32_u64 crc_tmp, crc_tmp, data_tmp2 ++ bls .crc32_hw_w_pre ++ ++ ldr data_tmp2, [buf, 32] ++ sub data_tmp3, size, #40 ++ cmp data_tmp3, 7 ++ crc32_u64 crc_tmp, crc_tmp, data_tmp2 ++ bls .crc32_hw_w_pre ++ ++ ldr data_tmp2, [buf, 40] ++ sub data_tmp3, size, #48 ++ cmp data_tmp3, 7 ++ crc32_u64 crc_tmp, crc_tmp, data_tmp2 ++ bls .crc32_hw_w_pre ++ ++ ldr data_tmp2, [buf, 48] ++ crc32_u64 crc_tmp, crc_tmp, data_tmp2 ++ ++.crc32_hw_w_pre: ++ and size_tmp, size_tmp, -8 ++ and size, size, 7 ++ add size_tmp, size_tmp, 8 ++ add buf, buf, size_tmp ++ ++.crc32_hw_w: ++ cmp size, 3 ++ bls .crc32_hw_h ++ ldr w_data_tmp2, [buf], 4 ++ sub size, size, #4 ++ crc32_u32 crc_tmp, crc_tmp, w_data_tmp2 ++ ++.crc32_hw_h: ++ cmp size, 1 ++ bls .crc32_hw_b ++ ldrh w_data_tmp2, [buf], 2 ++ sub size, size, #2 ++ crc32_u16 crc_tmp, crc_tmp, w_data_tmp2 ++ ++.crc32_hw_b: ++ cbz size, .crc32_hw_done ++ ldrb w_data_tmp2, [buf] ++ crc32_u8 crc_tmp, crc_tmp, w_data_tmp2 ++ ++.crc32_hw_done: ++#ifdef CRC32 ++ mvn ret_crc, crc_tmp ++#else ++ mov ret_crc, crc_tmp ++#endif ++ ret ++.endm +diff --git a/src/isa-l/crc/aarch64/crc32c_mix_default.S b/src/isa-l/crc/aarch64/crc32c_mix_default.S +new file mode 100644 +index 0000000..87b8ce3 +--- /dev/null ++++ b/src/isa-l/crc/aarch64/crc32c_mix_default.S +@@ -0,0 +1,109 @@ ++/********************************************************************** ++ Copyright(c) 2020 Arm Corporation All rights reserved. ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in ++ the documentation and/or other materials provided with the ++ distribution. ++ * Neither the name of Arm Corporation nor the names of its ++ contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++**********************************************************************/ ++ ++ .text ++ .arch armv8-a+crypto+crc ++ .align 6 ++ ++.macro crc32_u64 dst,src,data ++ crc32cx \dst,\src,\data ++.endm ++ ++.macro crc32_u32 dst,src,data ++ crc32cw \dst,\src,\data ++.endm ++ ++.macro crc32_u16 dst,src,data ++ crc32ch \dst,\src,\data ++.endm ++ ++.macro crc32_u8 dst,src,data ++ crc32cb \dst,\src,\data ++.endm ++ ++#include "crc32_mix_default_common.S" ++ ++ .global crc32c_mix_default ++ .type crc32c_mix_default, %function ++crc32c_mix_default: ++ mov w3, w2 ++ sxtw x2, w1 ++ mov x1, x0 ++ mov w0, w3 ++ crc32_mix_main_default ++ .size crc32c_mix_default, .-crc32c_mix_default ++ ++ .section .rodata ++ .align 4 ++ .set lanchor_crc32,. + 0 ++ ++ .type k1k2, %object ++ .size k1k2, 16 ++k1k2: ++ .xword 0x00740eef02 ++ .xword 0x009e4addf8 ++ ++ .type k3k4, %object ++ .size k3k4, 16 ++k3k4: ++ .xword 0x00f20c0dfe ++ .xword 0x014cd00bd6 ++ ++ .type k5k0, %object ++ .size k5k0, 16 ++k5k0: ++ .xword 0x00dd45aab8 ++ .xword 0 ++ ++ .type poly, %object ++ .size poly, 16 ++poly: ++ .xword 0x0105ec76f0 ++ .xword 0x00dea713f1 ++ ++ .type crc32_const, %object ++ .size crc32_const, 48 ++crc32_const: ++ .xword 0x9ef68d35 ++ .xword 0 ++ .xword 0x170076fa ++ .xword 0 ++ .xword 0xdd7e3b0c ++ .xword 0 ++ ++ .align 4 ++ .set .lanchor_mask,. + 0 ++ ++ .type mask, %object ++ .size mask, 16 ++mask: ++ .word -1 ++ .word 0 ++ .word -1 ++ .word 0 +diff --git a/src/isa-l/crc/aarch64/crc_aarch64_dispatcher.c b/src/isa-l/crc/aarch64/crc_aarch64_dispatcher.c +index 1df2833..b28a3a1 100644 +--- a/src/isa-l/crc/aarch64/crc_aarch64_dispatcher.c ++++ b/src/isa-l/crc/aarch64/crc_aarch64_dispatcher.c +@@ -73,7 +73,7 @@ DEFINE_INTERFACE_DISPATCHER(crc32_iscsi) + case MICRO_ARCH_ID(ARM, NEOVERSE_N1): + return PROVIDER_INFO(crc32c_mix_neoverse_n1); + } +- return PROVIDER_INFO(crc32_iscsi_refl_hw_fold); ++ return PROVIDER_INFO(crc32c_mix_default); + } + + if (auxval & HWCAP_PMULL) { +@@ -98,7 +98,7 @@ DEFINE_INTERFACE_DISPATCHER(crc32_gzip_refl) + case MICRO_ARCH_ID(ARM, NEOVERSE_N1): + return PROVIDER_INFO(crc32_mix_neoverse_n1); + } +- return PROVIDER_INFO(crc32_gzip_refl_hw_fold); ++ return PROVIDER_INFO(crc32_mix_default); + } + + if (auxval & HWCAP_PMULL) +-- +2.20.1.windows.1 + + +From cd888f01a447dd04c3a8b50362079648d432d2ca Mon Sep 17 00:00:00 2001 +From: "H.J. Lu" +Date: Fri, 22 May 2020 10:46:50 -0700 +Subject: [PATCH 16/42] x86: Add ENDBR32/ENDBR64 at function entries for Intel + CET + +To support Intel CET, all indirect branch targets must start with +ENDBR32/ENDBR64. Here is a patch to define endbranch and add it to +function entries in x86 assembly codes which are indirect branch +targets as discovered by running testsuite on Intel CET machine and +visual inspection. + +Verified with + +$ CC="gcc -Wl,-z,cet-report=error -fcf-protection" CXX="g++ -Wl,-z,cet-report=error -fcf-protection" .../configure x86_64-linux +$ make -j8 +$ make -j8 check + +with both nasm and yasm on both CET and non-CET machines. + +Change-Id: I9822578e7294fb5043a64ab7de5c41de81a7d337 +Signed-off-by: H.J. Lu +--- + crc/crc16_t10dif_01.asm | 1 + + crc/crc16_t10dif_02.asm | 1 + + crc/crc16_t10dif_by16_10.asm | 1 + + crc/crc16_t10dif_by4.asm | 1 + + crc/crc16_t10dif_copy_by4.asm | 1 + + crc/crc16_t10dif_copy_by4_02.asm | 1 + + crc/crc32_gzip_refl_by16_10.asm | 1 + + crc/crc32_gzip_refl_by8.asm | 1 + + crc/crc32_gzip_refl_by8_02.asm | 1 + + crc/crc32_ieee_01.asm | 1 + + crc/crc32_ieee_02.asm | 1 + + crc/crc32_ieee_by16_10.asm | 1 + + crc/crc32_ieee_by4.asm | 1 + + crc/crc32_iscsi_00.asm | 1 + + crc/crc32_iscsi_01.asm | 2 ++ + crc/crc64_ecma_norm_by8.asm | 1 + + crc/crc64_ecma_refl_by8.asm | 1 + + crc/crc64_iso_norm_by16_10.asm | 1 + + crc/crc64_iso_norm_by8.asm | 1 + + crc/crc64_iso_refl_by16_10.asm | 1 + + crc/crc64_iso_refl_by8.asm | 1 + + crc/crc64_jones_norm_by8.asm | 1 + + crc/crc64_jones_refl_by8.asm | 1 + + crc/crc_multibinary.asm | 6 ++++++ + erasure_code/gf_2vect_dot_prod_avx.asm | 4 ++-- + erasure_code/gf_2vect_dot_prod_avx2.asm | 4 ++-- + erasure_code/gf_2vect_dot_prod_avx512.asm | 2 +- + erasure_code/gf_2vect_dot_prod_sse.asm | 4 ++-- + erasure_code/gf_2vect_mad_avx.asm | 2 +- + erasure_code/gf_2vect_mad_avx2.asm | 2 +- + erasure_code/gf_2vect_mad_avx512.asm | 2 +- + erasure_code/gf_2vect_mad_sse.asm | 2 +- + erasure_code/gf_3vect_dot_prod_avx.asm | 4 ++-- + erasure_code/gf_3vect_dot_prod_avx2.asm | 4 ++-- + erasure_code/gf_3vect_dot_prod_avx512.asm | 2 +- + erasure_code/gf_3vect_dot_prod_sse.asm | 4 ++-- + erasure_code/gf_3vect_mad_avx.asm | 2 +- + erasure_code/gf_3vect_mad_avx2.asm | 2 +- + erasure_code/gf_3vect_mad_avx512.asm | 2 +- + erasure_code/gf_3vect_mad_sse.asm | 2 +- + erasure_code/gf_4vect_dot_prod_avx.asm | 4 ++-- + erasure_code/gf_4vect_dot_prod_avx2.asm | 4 ++-- + erasure_code/gf_4vect_dot_prod_avx512.asm | 2 +- + erasure_code/gf_4vect_dot_prod_sse.asm | 4 ++-- + erasure_code/gf_4vect_mad_avx.asm | 2 +- + erasure_code/gf_4vect_mad_avx2.asm | 2 +- + erasure_code/gf_4vect_mad_avx512.asm | 2 +- + erasure_code/gf_4vect_mad_sse.asm | 2 +- + erasure_code/gf_5vect_dot_prod_avx.asm | 2 +- + erasure_code/gf_5vect_dot_prod_avx2.asm | 2 +- + erasure_code/gf_5vect_dot_prod_avx512.asm | 2 +- + erasure_code/gf_5vect_dot_prod_sse.asm | 2 +- + erasure_code/gf_5vect_mad_avx.asm | 2 +- + erasure_code/gf_5vect_mad_avx2.asm | 2 +- + erasure_code/gf_5vect_mad_avx512.asm | 2 +- + erasure_code/gf_5vect_mad_sse.asm | 2 +- + erasure_code/gf_6vect_dot_prod_avx.asm | 2 +- + erasure_code/gf_6vect_dot_prod_avx2.asm | 2 +- + erasure_code/gf_6vect_dot_prod_avx512.asm | 2 +- + erasure_code/gf_6vect_dot_prod_sse.asm | 2 +- + erasure_code/gf_6vect_mad_avx.asm | 2 +- + erasure_code/gf_6vect_mad_avx2.asm | 2 +- + erasure_code/gf_6vect_mad_avx512.asm | 2 +- + erasure_code/gf_6vect_mad_sse.asm | 2 +- + erasure_code/gf_vect_dot_prod_avx.asm | 4 ++-- + erasure_code/gf_vect_dot_prod_avx2.asm | 4 ++-- + erasure_code/gf_vect_dot_prod_avx512.asm | 2 +- + erasure_code/gf_vect_dot_prod_sse.asm | 4 ++-- + erasure_code/gf_vect_mad_avx.asm | 2 +- + erasure_code/gf_vect_mad_avx2.asm | 2 +- + erasure_code/gf_vect_mad_avx512.asm | 2 +- + erasure_code/gf_vect_mad_sse.asm | 2 +- + erasure_code/gf_vect_mul_avx.asm | 2 +- + erasure_code/gf_vect_mul_sse.asm | 2 +- + igzip/adler32_avx2_4.asm | 2 +- + igzip/adler32_sse.asm | 2 +- + igzip/encode_df_04.asm | 1 + + igzip/encode_df_06.asm | 1 + + igzip/igzip_body.asm | 1 + + igzip/igzip_decode_block_stateless.asm | 1 + + igzip/igzip_deflate_hash.asm | 1 + + igzip/igzip_finish.asm | 1 + + igzip/igzip_gen_icf_map_lh1_04.asm | 3 ++- + igzip/igzip_gen_icf_map_lh1_06.asm | 3 ++- + igzip/igzip_icf_body_h1_gr_bt.asm | 1 + + igzip/igzip_icf_finish.asm | 1 + + igzip/igzip_set_long_icf_fg_04.asm | 3 ++- + igzip/igzip_set_long_icf_fg_06.asm | 3 ++- + igzip/igzip_update_histogram.asm | 1 + + igzip/proc_heap.asm | 2 ++ + include/multibinary.asm | 2 ++ + include/reg_sizes.asm | 13 +++++++++++++ + mem/mem_zero_detect_avx.asm | 2 +- + mem/mem_zero_detect_sse.asm | 2 +- + raid/pq_check_sse.asm | 2 +- + raid/pq_check_sse_i32.asm | 4 ++-- + raid/pq_gen_avx.asm | 2 +- + raid/pq_gen_avx2.asm | 2 +- + raid/pq_gen_avx512.asm | 2 +- + raid/pq_gen_sse.asm | 2 +- + raid/pq_gen_sse_i32.asm | 4 ++-- + raid/raid_multibinary.asm | 4 ++++ + raid/xor_check_sse.asm | 4 ++-- + raid/xor_gen_avx.asm | 2 +- + raid/xor_gen_avx512.asm | 2 +- + raid/xor_gen_sse.asm | 4 ++-- + 106 files changed, 149 insertions(+), 85 deletions(-) + +diff --git a/src/isa-l/crc/crc16_t10dif_01.asm b/src/isa-l/crc/crc16_t10dif_01.asm +index 33f4555..536b6f3 100644 +--- a/src/isa-l/crc/crc16_t10dif_01.asm ++++ b/src/isa-l/crc/crc16_t10dif_01.asm +@@ -75,6 +75,7 @@ section .text + align 16 + mk_global crc16_t10dif_01, function + crc16_t10dif_01: ++ endbranch + + ; adjust the 16-bit initial_crc value, scale it to 32 bits + shl arg1_low32, 16 +diff --git a/src/isa-l/crc/crc16_t10dif_02.asm b/src/isa-l/crc/crc16_t10dif_02.asm +index 157ac53..0e392af 100644 +--- a/src/isa-l/crc/crc16_t10dif_02.asm ++++ b/src/isa-l/crc/crc16_t10dif_02.asm +@@ -75,6 +75,7 @@ section .text + align 16 + mk_global crc16_t10dif_02, function + crc16_t10dif_02: ++ endbranch + + ; adjust the 16-bit initial_crc value, scale it to 32 bits + shl arg1_low32, 16 +diff --git a/src/isa-l/crc/crc16_t10dif_by16_10.asm b/src/isa-l/crc/crc16_t10dif_by16_10.asm +index 479b635..27a2e02 100644 +--- a/src/isa-l/crc/crc16_t10dif_by16_10.asm ++++ b/src/isa-l/crc/crc16_t10dif_by16_10.asm +@@ -84,6 +84,7 @@ section .text + align 16 + mk_global FUNCTION_NAME, function + FUNCTION_NAME: ++ endbranch + + ; adjust the 16-bit initial_crc value, scale it to 32 bits + shl arg1_low32, 16 +diff --git a/src/isa-l/crc/crc16_t10dif_by4.asm b/src/isa-l/crc/crc16_t10dif_by4.asm +index bde071a..1326eb2 100644 +--- a/src/isa-l/crc/crc16_t10dif_by4.asm ++++ b/src/isa-l/crc/crc16_t10dif_by4.asm +@@ -68,6 +68,7 @@ section .text + align 16 + mk_global crc16_t10dif_by4, function + crc16_t10dif_by4: ++ endbranch + + ; adjust the 16-bit initial_crc value, scale it to 32 bits + shl arg1_low32, 16 +diff --git a/src/isa-l/crc/crc16_t10dif_copy_by4.asm b/src/isa-l/crc/crc16_t10dif_copy_by4.asm +index 0f82d69..b8a6838 100644 +--- a/src/isa-l/crc/crc16_t10dif_copy_by4.asm ++++ b/src/isa-l/crc/crc16_t10dif_copy_by4.asm +@@ -71,6 +71,7 @@ section .text + align 16 + mk_global crc16_t10dif_copy_by4, function + crc16_t10dif_copy_by4: ++ endbranch + + ; adjust the 16-bit initial_crc value, scale it to 32 bits + shl arg1_low32, 16 +diff --git a/src/isa-l/crc/crc16_t10dif_copy_by4_02.asm b/src/isa-l/crc/crc16_t10dif_copy_by4_02.asm +index 1a7338f..254a187 100644 +--- a/src/isa-l/crc/crc16_t10dif_copy_by4_02.asm ++++ b/src/isa-l/crc/crc16_t10dif_copy_by4_02.asm +@@ -71,6 +71,7 @@ section .text + align 16 + mk_global crc16_t10dif_copy_by4_02, function + crc16_t10dif_copy_by4_02: ++ endbranch + + ; adjust the 16-bit initial_crc value, scale it to 32 bits + shl arg1_low32, 16 +diff --git a/src/isa-l/crc/crc32_gzip_refl_by16_10.asm b/src/isa-l/crc/crc32_gzip_refl_by16_10.asm +index 69cb366..15280b8 100644 +--- a/src/isa-l/crc/crc32_gzip_refl_by16_10.asm ++++ b/src/isa-l/crc/crc32_gzip_refl_by16_10.asm +@@ -94,6 +94,7 @@ section .text + align 16 + mk_global FUNCTION_NAME, function + FUNCTION_NAME: ++ endbranch + + not arg1_low32 + sub rsp, VARIABLE_OFFSET +diff --git a/src/isa-l/crc/crc32_gzip_refl_by8.asm b/src/isa-l/crc/crc32_gzip_refl_by8.asm +index 780ae35..4384024 100644 +--- a/src/isa-l/crc/crc32_gzip_refl_by8.asm ++++ b/src/isa-l/crc/crc32_gzip_refl_by8.asm +@@ -88,6 +88,7 @@ section .text + align 16 + mk_global crc32_gzip_refl_by8, function + crc32_gzip_refl_by8: ++ endbranch + + ; unsigned long c = crc ^ 0xffffffffL; + not arg1_low32 ; +diff --git a/src/isa-l/crc/crc32_gzip_refl_by8_02.asm b/src/isa-l/crc/crc32_gzip_refl_by8_02.asm +index bba5ae6..712fe87 100644 +--- a/src/isa-l/crc/crc32_gzip_refl_by8_02.asm ++++ b/src/isa-l/crc/crc32_gzip_refl_by8_02.asm +@@ -88,6 +88,7 @@ section .text + align 16 + mk_global crc32_gzip_refl_by8_02, function + crc32_gzip_refl_by8_02: ++ endbranch + not arg1_low32 + sub rsp, VARIABLE_OFFSET + +diff --git a/src/isa-l/crc/crc32_ieee_01.asm b/src/isa-l/crc/crc32_ieee_01.asm +index 5b9d465..368261d 100644 +--- a/src/isa-l/crc/crc32_ieee_01.asm ++++ b/src/isa-l/crc/crc32_ieee_01.asm +@@ -74,6 +74,7 @@ section .text + align 16 + mk_global crc32_ieee_01, function + crc32_ieee_01: ++ endbranch + + not arg1_low32 ;~init_crc + +diff --git a/src/isa-l/crc/crc32_ieee_02.asm b/src/isa-l/crc/crc32_ieee_02.asm +index 411e923..95d53e8 100644 +--- a/src/isa-l/crc/crc32_ieee_02.asm ++++ b/src/isa-l/crc/crc32_ieee_02.asm +@@ -74,6 +74,7 @@ section .text + align 16 + mk_global crc32_ieee_02, function + crc32_ieee_02: ++ endbranch + + not arg1_low32 ;~init_crc + +diff --git a/src/isa-l/crc/crc32_ieee_by16_10.asm b/src/isa-l/crc/crc32_ieee_by16_10.asm +index c6aa741..5c3f52a 100644 +--- a/src/isa-l/crc/crc32_ieee_by16_10.asm ++++ b/src/isa-l/crc/crc32_ieee_by16_10.asm +@@ -84,6 +84,7 @@ section .text + align 16 + mk_global FUNCTION_NAME, function + FUNCTION_NAME: ++ endbranch + + not arg1_low32 + sub rsp, VARIABLE_OFFSET +diff --git a/src/isa-l/crc/crc32_ieee_by4.asm b/src/isa-l/crc/crc32_ieee_by4.asm +index 2ce2289..f432640 100644 +--- a/src/isa-l/crc/crc32_ieee_by4.asm ++++ b/src/isa-l/crc/crc32_ieee_by4.asm +@@ -76,6 +76,7 @@ section .text + align 16 + mk_global crc32_ieee_by4, function + crc32_ieee_by4: ++ endbranch + + not arg1_low32 + +diff --git a/src/isa-l/crc/crc32_iscsi_00.asm b/src/isa-l/crc/crc32_iscsi_00.asm +index e1ad903..1a5e029 100644 +--- a/src/isa-l/crc/crc32_iscsi_00.asm ++++ b/src/isa-l/crc/crc32_iscsi_00.asm +@@ -155,6 +155,7 @@ default rel + + mk_global crc32_iscsi_00, function + crc32_iscsi_00: ++ endbranch + + %ifidn __OUTPUT_FORMAT__, elf64 + %define bufp rdi +diff --git a/src/isa-l/crc/crc32_iscsi_01.asm b/src/isa-l/crc/crc32_iscsi_01.asm +index 30adb04..e0f2b5e 100644 +--- a/src/isa-l/crc/crc32_iscsi_01.asm ++++ b/src/isa-l/crc/crc32_iscsi_01.asm +@@ -52,6 +52,7 @@ default rel + + mk_global crc32_iscsi_01, function + crc32_iscsi_01: ++ endbranch + + %ifidn __OUTPUT_FORMAT__, elf64 + %define bufp rdi +@@ -214,6 +215,7 @@ non_prefetch: + %rep 128-1 + + CONCAT(crc_,i,:) ++ endbranch + crc32 crc_init, qword [block_0 - i*8] + crc32 crc1, qword [block_1 - i*8] + crc32 crc2, qword [block_2 - i*8] +diff --git a/src/isa-l/crc/crc64_ecma_norm_by8.asm b/src/isa-l/crc/crc64_ecma_norm_by8.asm +index 5599d98..ca99e34 100644 +--- a/src/isa-l/crc/crc64_ecma_norm_by8.asm ++++ b/src/isa-l/crc/crc64_ecma_norm_by8.asm +@@ -64,6 +64,7 @@ section .text + align 16 + mk_global crc64_ecma_norm_by8, function + crc64_ecma_norm_by8: ++ endbranch + + not arg1 ;~init_crc + +diff --git a/src/isa-l/crc/crc64_ecma_refl_by8.asm b/src/isa-l/crc/crc64_ecma_refl_by8.asm +index b641934..c09ddfa 100644 +--- a/src/isa-l/crc/crc64_ecma_refl_by8.asm ++++ b/src/isa-l/crc/crc64_ecma_refl_by8.asm +@@ -70,6 +70,7 @@ section .text + align 16 + mk_global crc64_ecma_refl_by8, function + crc64_ecma_refl_by8: ++ endbranch + ; uint64_t c = crc ^ 0xffffffff,ffffffffL; + not arg1 + sub rsp, VARIABLE_OFFSET +diff --git a/src/isa-l/crc/crc64_iso_norm_by16_10.asm b/src/isa-l/crc/crc64_iso_norm_by16_10.asm +index 28630a1..4eefbd3 100644 +--- a/src/isa-l/crc/crc64_iso_norm_by16_10.asm ++++ b/src/isa-l/crc/crc64_iso_norm_by16_10.asm +@@ -71,6 +71,7 @@ section .text + align 16 + mk_global FUNCTION_NAME, function + FUNCTION_NAME: ++ endbranch + not arg1 + sub rsp, VARIABLE_OFFSET + +diff --git a/src/isa-l/crc/crc64_iso_norm_by8.asm b/src/isa-l/crc/crc64_iso_norm_by8.asm +index 887fca8..16147d5 100644 +--- a/src/isa-l/crc/crc64_iso_norm_by8.asm ++++ b/src/isa-l/crc/crc64_iso_norm_by8.asm +@@ -63,6 +63,7 @@ section .text + align 16 + mk_global crc64_iso_norm_by8, function + crc64_iso_norm_by8: ++ endbranch + + not arg1 ;~init_crc + +diff --git a/src/isa-l/crc/crc64_iso_refl_by16_10.asm b/src/isa-l/crc/crc64_iso_refl_by16_10.asm +index d58ac0a..e5d5a08 100644 +--- a/src/isa-l/crc/crc64_iso_refl_by16_10.asm ++++ b/src/isa-l/crc/crc64_iso_refl_by16_10.asm +@@ -72,6 +72,7 @@ section .text + align 16 + mk_global FUNCTION_NAME, function + FUNCTION_NAME: ++ endbranch + not arg1 + sub rsp, VARIABLE_OFFSET + +diff --git a/src/isa-l/crc/crc64_iso_refl_by8.asm b/src/isa-l/crc/crc64_iso_refl_by8.asm +index 3abc5da..b6dfcf0 100644 +--- a/src/isa-l/crc/crc64_iso_refl_by8.asm ++++ b/src/isa-l/crc/crc64_iso_refl_by8.asm +@@ -67,6 +67,7 @@ section .text + align 16 + mk_global crc64_iso_refl_by8, function + crc64_iso_refl_by8: ++ endbranch + ; uint64_t c = crc ^ 0xffffffff,ffffffffL; + not arg1 + sub rsp, VARIABLE_OFFSET +diff --git a/src/isa-l/crc/crc64_jones_norm_by8.asm b/src/isa-l/crc/crc64_jones_norm_by8.asm +index bc3b521..0cf8b4a 100644 +--- a/src/isa-l/crc/crc64_jones_norm_by8.asm ++++ b/src/isa-l/crc/crc64_jones_norm_by8.asm +@@ -63,6 +63,7 @@ section .text + align 16 + mk_global crc64_jones_norm_by8, function + crc64_jones_norm_by8: ++ endbranch + + not arg1 ;~init_crc + +diff --git a/src/isa-l/crc/crc64_jones_refl_by8.asm b/src/isa-l/crc/crc64_jones_refl_by8.asm +index a9ea19a..eea9c8d 100644 +--- a/src/isa-l/crc/crc64_jones_refl_by8.asm ++++ b/src/isa-l/crc/crc64_jones_refl_by8.asm +@@ -67,6 +67,7 @@ section .text + align 16 + mk_global crc64_jones_refl_by8, function + crc64_jones_refl_by8: ++ endbranch + ; uint64_t c = crc ^ 0xffffffff,ffffffffL; + not arg1 + sub rsp, VARIABLE_OFFSET +diff --git a/src/isa-l/crc/crc_multibinary.asm b/src/isa-l/crc/crc_multibinary.asm +index b1f425a..8b9d7bd 100644 +--- a/src/isa-l/crc/crc_multibinary.asm ++++ b/src/isa-l/crc/crc_multibinary.asm +@@ -81,8 +81,10 @@ section .text + ;;;; + mk_global crc32_iscsi, function + crc32_iscsi_mbinit: ++ endbranch + call crc32_iscsi_dispatch_init + crc32_iscsi: ++ endbranch + jmp qword [crc32_iscsi_dispatched] + + crc32_iscsi_dispatch_init: +@@ -115,8 +117,10 @@ crc32_iscsi_dispatch_init: + ;;;; + mk_global crc32_ieee, function + crc32_ieee_mbinit: ++ endbranch + call crc32_ieee_dispatch_init + crc32_ieee: ++ endbranch + jmp qword [crc32_ieee_dispatched] + + crc32_ieee_dispatch_init: +@@ -194,8 +198,10 @@ crc32_ieee_dispatch_init: + ;;;; + mk_global crc16_t10dif, function + crc16_t10dif_mbinit: ++ endbranch + call crc16_t10dif_dispatch_init + crc16_t10dif: ++ endbranch + jmp qword [crc16_t10dif_dispatched] + + crc16_t10dif_dispatch_init: +diff --git a/src/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm +index f512d7d..cfbc2eb 100644 +--- a/src/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm ++++ b/src/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm +@@ -52,7 +52,7 @@ + %define PS 8 + %define LOG_PS 3 + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + %endmacro +@@ -127,7 +127,7 @@ + + %define PS 4 + %define LOG_PS 2 +- %define func(x) x: ++ %define func(x) x: endbranch + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + +diff --git a/src/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm b/src/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm +index ba704d0..a06f67a 100644 +--- a/src/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm ++++ b/src/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm +@@ -54,7 +54,7 @@ + %define PS 8 + %define LOG_PS 3 + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + %endmacro +@@ -131,7 +131,7 @@ + + %define PS 4 + %define LOG_PS 2 +- %define func(x) x: ++ %define func(x) x: endbranch + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + +diff --git a/src/isa-l/erasure_code/gf_2vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_2vect_dot_prod_avx512.asm +index 2444216..92d7e9d 100644 +--- a/src/isa-l/erasure_code/gf_2vect_dot_prod_avx512.asm ++++ b/src/isa-l/erasure_code/gf_2vect_dot_prod_avx512.asm +@@ -50,7 +50,7 @@ + %define PS 8 + %define LOG_PS 3 + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + %endmacro +diff --git a/src/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm b/src/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm +index 7e1006b..f7e44e7 100644 +--- a/src/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm ++++ b/src/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm +@@ -52,7 +52,7 @@ + %define PS 8 + %define LOG_PS 3 + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + %endmacro +@@ -127,7 +127,7 @@ + + %define PS 4 + %define LOG_PS 2 +- %define func(x) x: ++ %define func(x) x: endbranch + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + +diff --git a/src/isa-l/erasure_code/gf_2vect_mad_avx.asm b/src/isa-l/erasure_code/gf_2vect_mad_avx.asm +index 65af8b0..995c36b 100644 +--- a/src/isa-l/erasure_code/gf_2vect_mad_avx.asm ++++ b/src/isa-l/erasure_code/gf_2vect_mad_avx.asm +@@ -97,7 +97,7 @@ + %define return rax + %define return.w eax + +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +diff --git a/src/isa-l/erasure_code/gf_2vect_mad_avx2.asm b/src/isa-l/erasure_code/gf_2vect_mad_avx2.asm +index f4c1cae..751677d 100644 +--- a/src/isa-l/erasure_code/gf_2vect_mad_avx2.asm ++++ b/src/isa-l/erasure_code/gf_2vect_mad_avx2.asm +@@ -104,7 +104,7 @@ + %define return rax + %define return.w eax + +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +diff --git a/src/isa-l/erasure_code/gf_2vect_mad_avx512.asm b/src/isa-l/erasure_code/gf_2vect_mad_avx512.asm +index 5a35a89..ce37248 100644 +--- a/src/isa-l/erasure_code/gf_2vect_mad_avx512.asm ++++ b/src/isa-l/erasure_code/gf_2vect_mad_avx512.asm +@@ -45,7 +45,7 @@ + %define tmp r11 + %define tmp2 r10 + %define return rax +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +diff --git a/src/isa-l/erasure_code/gf_2vect_mad_sse.asm b/src/isa-l/erasure_code/gf_2vect_mad_sse.asm +index c85b431..2bff82f 100644 +--- a/src/isa-l/erasure_code/gf_2vect_mad_sse.asm ++++ b/src/isa-l/erasure_code/gf_2vect_mad_sse.asm +@@ -97,7 +97,7 @@ + %define return rax + %define return.w eax + +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +diff --git a/src/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm +index deb44d0..79c7ed4 100644 +--- a/src/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm ++++ b/src/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm +@@ -52,7 +52,7 @@ + %define PS 8 + %define LOG_PS 3 + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + push r13 +@@ -139,7 +139,7 @@ + + %define PS 4 + %define LOG_PS 2 +- %define func(x) x: ++ %define func(x) x: endbranch + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + +diff --git a/src/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm b/src/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm +index fa55dd6..606c3a1 100644 +--- a/src/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm ++++ b/src/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm +@@ -54,7 +54,7 @@ + %define PS 8 + %define LOG_PS 3 + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + push r13 +@@ -143,7 +143,7 @@ + + %define PS 4 + %define LOG_PS 2 +- %define func(x) x: ++ %define func(x) x: endbranch + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + +diff --git a/src/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm +index eecde81..81e96f2 100644 +--- a/src/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm ++++ b/src/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm +@@ -53,7 +53,7 @@ + %define PS 8 + %define LOG_PS 3 + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + push r13 +diff --git a/src/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm b/src/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm +index 2b13e71..d52c72b 100644 +--- a/src/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm ++++ b/src/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm +@@ -52,7 +52,7 @@ + %define PS 8 + %define LOG_PS 3 + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + push r13 +@@ -139,7 +139,7 @@ + + %define PS 4 + %define LOG_PS 2 +- %define func(x) x: ++ %define func(x) x: endbranch + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + +diff --git a/src/isa-l/erasure_code/gf_3vect_mad_avx.asm b/src/isa-l/erasure_code/gf_3vect_mad_avx.asm +index 4aea710..13963f6 100644 +--- a/src/isa-l/erasure_code/gf_3vect_mad_avx.asm ++++ b/src/isa-l/erasure_code/gf_3vect_mad_avx.asm +@@ -97,7 +97,7 @@ + %define return rax + %define return.w eax + +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +diff --git a/src/isa-l/erasure_code/gf_3vect_mad_avx2.asm b/src/isa-l/erasure_code/gf_3vect_mad_avx2.asm +index e8071dd..797d954 100644 +--- a/src/isa-l/erasure_code/gf_3vect_mad_avx2.asm ++++ b/src/isa-l/erasure_code/gf_3vect_mad_avx2.asm +@@ -103,7 +103,7 @@ + %define return rax + %define return.w eax + +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +diff --git a/src/isa-l/erasure_code/gf_3vect_mad_avx512.asm b/src/isa-l/erasure_code/gf_3vect_mad_avx512.asm +index b8b8d9b..bc61900 100644 +--- a/src/isa-l/erasure_code/gf_3vect_mad_avx512.asm ++++ b/src/isa-l/erasure_code/gf_3vect_mad_avx512.asm +@@ -44,7 +44,7 @@ + %define arg5 r9 + %define tmp r11 + %define return rax +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +diff --git a/src/isa-l/erasure_code/gf_3vect_mad_sse.asm b/src/isa-l/erasure_code/gf_3vect_mad_sse.asm +index 10744ec..c0fd0b9 100644 +--- a/src/isa-l/erasure_code/gf_3vect_mad_sse.asm ++++ b/src/isa-l/erasure_code/gf_3vect_mad_sse.asm +@@ -96,7 +96,7 @@ + %define return rax + %define return.w eax + +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +diff --git a/src/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm +index f436048..bad8692 100644 +--- a/src/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm ++++ b/src/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm +@@ -54,7 +54,7 @@ + %define PS 8 + %define LOG_PS 3 + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + push r13 +@@ -159,7 +159,7 @@ + + %define PS 4 + %define LOG_PS 2 +- %define func(x) x: ++ %define func(x) x: endbranch + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + +diff --git a/src/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm b/src/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm +index 0c7ae4e..e422e28 100644 +--- a/src/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm ++++ b/src/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm +@@ -56,7 +56,7 @@ + %define PS 8 + %define LOG_PS 3 + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + push r13 +@@ -163,7 +163,7 @@ + + %define PS 4 + %define LOG_PS 2 +- %define func(x) x: ++ %define func(x) x: endbranch + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + +diff --git a/src/isa-l/erasure_code/gf_4vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_4vect_dot_prod_avx512.asm +index 6d67426..9d32973 100644 +--- a/src/isa-l/erasure_code/gf_4vect_dot_prod_avx512.asm ++++ b/src/isa-l/erasure_code/gf_4vect_dot_prod_avx512.asm +@@ -55,7 +55,7 @@ + %define PS 8 + %define LOG_PS 3 + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + push r13 +diff --git a/src/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm b/src/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm +index 25134c7..25b5cff 100644 +--- a/src/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm ++++ b/src/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm +@@ -54,7 +54,7 @@ + %define PS 8 + %define LOG_PS 3 + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + push r13 +@@ -159,7 +159,7 @@ + + %define PS 4 + %define LOG_PS 2 +- %define func(x) x: ++ %define func(x) x: endbranch + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + +diff --git a/src/isa-l/erasure_code/gf_4vect_mad_avx.asm b/src/isa-l/erasure_code/gf_4vect_mad_avx.asm +index 284c76b..3a00623 100644 +--- a/src/isa-l/erasure_code/gf_4vect_mad_avx.asm ++++ b/src/isa-l/erasure_code/gf_4vect_mad_avx.asm +@@ -103,7 +103,7 @@ + %define return rax + %define return.w eax + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + %endmacro +diff --git a/src/isa-l/erasure_code/gf_4vect_mad_avx2.asm b/src/isa-l/erasure_code/gf_4vect_mad_avx2.asm +index bf6cc7e..e1cf910 100644 +--- a/src/isa-l/erasure_code/gf_4vect_mad_avx2.asm ++++ b/src/isa-l/erasure_code/gf_4vect_mad_avx2.asm +@@ -101,7 +101,7 @@ + %define return rax + %define return.w eax + +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +diff --git a/src/isa-l/erasure_code/gf_4vect_mad_avx512.asm b/src/isa-l/erasure_code/gf_4vect_mad_avx512.asm +index 3948ab1..77dc76b 100644 +--- a/src/isa-l/erasure_code/gf_4vect_mad_avx512.asm ++++ b/src/isa-l/erasure_code/gf_4vect_mad_avx512.asm +@@ -44,7 +44,7 @@ + %define arg5 r9 + %define tmp r11 + %define return rax +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +diff --git a/src/isa-l/erasure_code/gf_4vect_mad_sse.asm b/src/isa-l/erasure_code/gf_4vect_mad_sse.asm +index 377b31f..d5efc97 100644 +--- a/src/isa-l/erasure_code/gf_4vect_mad_sse.asm ++++ b/src/isa-l/erasure_code/gf_4vect_mad_sse.asm +@@ -103,7 +103,7 @@ + %define return rax + %define return.w eax + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + %endmacro +diff --git a/src/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm +index 3226dde..a5bdb2a 100644 +--- a/src/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm ++++ b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm +@@ -51,7 +51,7 @@ + %define PS 8 + %define LOG_PS 3 + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + push r13 +diff --git a/src/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm +index 4bee087..d019e97 100644 +--- a/src/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm ++++ b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm +@@ -53,7 +53,7 @@ + %define PS 8 + %define LOG_PS 3 + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + push r13 +diff --git a/src/isa-l/erasure_code/gf_5vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx512.asm +index e955ea5..1cca65b 100644 +--- a/src/isa-l/erasure_code/gf_5vect_dot_prod_avx512.asm ++++ b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx512.asm +@@ -57,7 +57,7 @@ + %define PS 8 + %define LOG_PS 3 + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + push r13 +diff --git a/src/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm b/src/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm +index 5ff9460..c96bed5 100644 +--- a/src/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm ++++ b/src/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm +@@ -51,7 +51,7 @@ + %define PS 8 + %define LOG_PS 3 + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + push r13 +diff --git a/src/isa-l/erasure_code/gf_5vect_mad_avx.asm b/src/isa-l/erasure_code/gf_5vect_mad_avx.asm +index ccdbc6e..e9e246c 100644 +--- a/src/isa-l/erasure_code/gf_5vect_mad_avx.asm ++++ b/src/isa-l/erasure_code/gf_5vect_mad_avx.asm +@@ -107,7 +107,7 @@ + %define return rax + %define return.w eax + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + push r13 +diff --git a/src/isa-l/erasure_code/gf_5vect_mad_avx2.asm b/src/isa-l/erasure_code/gf_5vect_mad_avx2.asm +index ac61437..87038a7 100644 +--- a/src/isa-l/erasure_code/gf_5vect_mad_avx2.asm ++++ b/src/isa-l/erasure_code/gf_5vect_mad_avx2.asm +@@ -103,7 +103,7 @@ + %define return rax + %define return.w eax + +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +diff --git a/src/isa-l/erasure_code/gf_5vect_mad_avx512.asm b/src/isa-l/erasure_code/gf_5vect_mad_avx512.asm +index 5de47d1..e2a1455 100644 +--- a/src/isa-l/erasure_code/gf_5vect_mad_avx512.asm ++++ b/src/isa-l/erasure_code/gf_5vect_mad_avx512.asm +@@ -45,7 +45,7 @@ + %define tmp r11 + %define tmp2 r10 + %define return rax +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +diff --git a/src/isa-l/erasure_code/gf_5vect_mad_sse.asm b/src/isa-l/erasure_code/gf_5vect_mad_sse.asm +index fc99aaf..17760d0 100644 +--- a/src/isa-l/erasure_code/gf_5vect_mad_sse.asm ++++ b/src/isa-l/erasure_code/gf_5vect_mad_sse.asm +@@ -107,7 +107,7 @@ + %define return rax + %define return.w eax + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + push r13 +diff --git a/src/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm +index 1f9df8d..7604711 100644 +--- a/src/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm ++++ b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm +@@ -51,7 +51,7 @@ + %define PS 8 + %define LOG_PS 3 + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + push r13 +diff --git a/src/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm +index ccb4e77..5885d97 100644 +--- a/src/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm ++++ b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm +@@ -53,7 +53,7 @@ + %define PS 8 + %define LOG_PS 3 + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + push r13 +diff --git a/src/isa-l/erasure_code/gf_6vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx512.asm +index 6ebfd26..bb25e67 100644 +--- a/src/isa-l/erasure_code/gf_6vect_dot_prod_avx512.asm ++++ b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx512.asm +@@ -57,7 +57,7 @@ + %define PS 8 + %define LOG_PS 3 + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + push r13 +diff --git a/src/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm b/src/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm +index 51bd116..41176bb 100644 +--- a/src/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm ++++ b/src/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm +@@ -51,7 +51,7 @@ + %define PS 8 + %define LOG_PS 3 + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + push r13 +diff --git a/src/isa-l/erasure_code/gf_6vect_mad_avx.asm b/src/isa-l/erasure_code/gf_6vect_mad_avx.asm +index 4e20dbb..c9ce490 100644 +--- a/src/isa-l/erasure_code/gf_6vect_mad_avx.asm ++++ b/src/isa-l/erasure_code/gf_6vect_mad_avx.asm +@@ -111,7 +111,7 @@ + %define return rax + %define return.w eax + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + push r13 +diff --git a/src/isa-l/erasure_code/gf_6vect_mad_avx2.asm b/src/isa-l/erasure_code/gf_6vect_mad_avx2.asm +index 45d750e..8f94c6a 100644 +--- a/src/isa-l/erasure_code/gf_6vect_mad_avx2.asm ++++ b/src/isa-l/erasure_code/gf_6vect_mad_avx2.asm +@@ -107,7 +107,7 @@ + %define return rax + %define return.w eax + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + %endmacro +diff --git a/src/isa-l/erasure_code/gf_6vect_mad_avx512.asm b/src/isa-l/erasure_code/gf_6vect_mad_avx512.asm +index 6ae11f3..c2383a2 100644 +--- a/src/isa-l/erasure_code/gf_6vect_mad_avx512.asm ++++ b/src/isa-l/erasure_code/gf_6vect_mad_avx512.asm +@@ -46,7 +46,7 @@ + %define tmp2 r10 + %define tmp3 r12 ;must be saved and restored + %define return rax +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + %endmacro +diff --git a/src/isa-l/erasure_code/gf_6vect_mad_sse.asm b/src/isa-l/erasure_code/gf_6vect_mad_sse.asm +index 695fd6b..f33ec06 100644 +--- a/src/isa-l/erasure_code/gf_6vect_mad_sse.asm ++++ b/src/isa-l/erasure_code/gf_6vect_mad_sse.asm +@@ -113,7 +113,7 @@ + %define return rax + %define return.w eax + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + push r13 +diff --git a/src/isa-l/erasure_code/gf_vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_vect_dot_prod_avx.asm +index 179e985..7bd8700 100644 +--- a/src/isa-l/erasure_code/gf_vect_dot_prod_avx.asm ++++ b/src/isa-l/erasure_code/gf_vect_dot_prod_avx.asm +@@ -48,7 +48,7 @@ + %endmacro + %define SSTR SLDR + %define PS 8 +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +@@ -106,7 +106,7 @@ + + %define PS 4 + %define LOG_PS 2 +- %define func(x) x: ++ %define func(x) x: endbranch + %define arg(x) [ebp + PS*2 + PS*x] + + %define trans ecx ;trans is for the variables in stack +diff --git a/src/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm b/src/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm +index 2cfa0f0..c385e3b 100644 +--- a/src/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm ++++ b/src/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm +@@ -51,7 +51,7 @@ + %endmacro + %define SSTR SLDR + %define PS 8 +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +@@ -111,7 +111,7 @@ + + %define PS 4 + %define LOG_PS 2 +- %define func(x) x: ++ %define func(x) x: endbranch + %define arg(x) [ebp + PS*2 + PS*x] + + %define trans ecx ;trans is for the variables in stack +diff --git a/src/isa-l/erasure_code/gf_vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_vect_dot_prod_avx512.asm +index 8a02fd8..37fe082 100644 +--- a/src/isa-l/erasure_code/gf_vect_dot_prod_avx512.asm ++++ b/src/isa-l/erasure_code/gf_vect_dot_prod_avx512.asm +@@ -49,7 +49,7 @@ + %define PS 8 + %define LOG_PS 3 + +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +diff --git a/src/isa-l/erasure_code/gf_vect_dot_prod_sse.asm b/src/isa-l/erasure_code/gf_vect_dot_prod_sse.asm +index 602bd89..9b0a440 100644 +--- a/src/isa-l/erasure_code/gf_vect_dot_prod_sse.asm ++++ b/src/isa-l/erasure_code/gf_vect_dot_prod_sse.asm +@@ -48,7 +48,7 @@ + %endmacro + %define SSTR SLDR + %define PS 8 +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +@@ -106,7 +106,7 @@ + + %define PS 4 + %define LOG_PS 2 +- %define func(x) x: ++ %define func(x) x: endbranch + %define arg(x) [ebp + PS*2 + PS*x] + + %define trans ecx ;trans is for the variables in stack +diff --git a/src/isa-l/erasure_code/gf_vect_mad_avx.asm b/src/isa-l/erasure_code/gf_vect_mad_avx.asm +index 2b0e623..448fbd7 100644 +--- a/src/isa-l/erasure_code/gf_vect_mad_avx.asm ++++ b/src/isa-l/erasure_code/gf_vect_mad_avx.asm +@@ -82,7 +82,7 @@ + %define return rax + %define return.w eax + +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +diff --git a/src/isa-l/erasure_code/gf_vect_mad_avx2.asm b/src/isa-l/erasure_code/gf_vect_mad_avx2.asm +index 9941fca..097d8fa 100644 +--- a/src/isa-l/erasure_code/gf_vect_mad_avx2.asm ++++ b/src/isa-l/erasure_code/gf_vect_mad_avx2.asm +@@ -88,7 +88,7 @@ + %define return rax + %define return.w eax + +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +diff --git a/src/isa-l/erasure_code/gf_vect_mad_avx512.asm b/src/isa-l/erasure_code/gf_vect_mad_avx512.asm +index 931e0cc..43982e3 100644 +--- a/src/isa-l/erasure_code/gf_vect_mad_avx512.asm ++++ b/src/isa-l/erasure_code/gf_vect_mad_avx512.asm +@@ -44,7 +44,7 @@ + %define arg5 r9 + %define tmp r11 + %define return rax +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +diff --git a/src/isa-l/erasure_code/gf_vect_mad_sse.asm b/src/isa-l/erasure_code/gf_vect_mad_sse.asm +index 1ea69fe..1044404 100644 +--- a/src/isa-l/erasure_code/gf_vect_mad_sse.asm ++++ b/src/isa-l/erasure_code/gf_vect_mad_sse.asm +@@ -82,7 +82,7 @@ + %define return rax + %define return.w eax + +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +diff --git a/src/isa-l/erasure_code/gf_vect_mul_avx.asm b/src/isa-l/erasure_code/gf_vect_mul_avx.asm +index 0186bbc..91f6d6d 100644 +--- a/src/isa-l/erasure_code/gf_vect_mul_avx.asm ++++ b/src/isa-l/erasure_code/gf_vect_mul_avx.asm +@@ -42,7 +42,7 @@ + %define arg5 r9 + %define tmp r11 + %define return rax +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + +diff --git a/src/isa-l/erasure_code/gf_vect_mul_sse.asm b/src/isa-l/erasure_code/gf_vect_mul_sse.asm +index bad257a..fefe7ef 100644 +--- a/src/isa-l/erasure_code/gf_vect_mul_sse.asm ++++ b/src/isa-l/erasure_code/gf_vect_mul_sse.asm +@@ -42,7 +42,7 @@ + %define arg5 r9 + %define tmp r11 + %define return rax +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + +diff --git a/src/isa-l/igzip/adler32_avx2_4.asm b/src/isa-l/igzip/adler32_avx2_4.asm +index 62c62bb..798310f 100644 +--- a/src/isa-l/igzip/adler32_avx2_4.asm ++++ b/src/isa-l/igzip/adler32_avx2_4.asm +@@ -55,7 +55,7 @@ default rel + %define b_d r8d + %define end r13 + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + push r13 +diff --git a/src/isa-l/igzip/adler32_sse.asm b/src/isa-l/igzip/adler32_sse.asm +index 6aea7cb..fc986cb 100644 +--- a/src/isa-l/igzip/adler32_sse.asm ++++ b/src/isa-l/igzip/adler32_sse.asm +@@ -52,7 +52,7 @@ default rel + %define b_d r8d + %define end r13 + +- %define func(x) x: ++ %define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + push r13 +diff --git a/src/isa-l/igzip/encode_df_04.asm b/src/isa-l/igzip/encode_df_04.asm +index 2c52af8..5b913ae 100644 +--- a/src/isa-l/igzip/encode_df_04.asm ++++ b/src/isa-l/igzip/encode_df_04.asm +@@ -177,6 +177,7 @@ section .text + + global encode_deflate_icf_ %+ ARCH + encode_deflate_icf_ %+ ARCH: ++ endbranch + FUNC_SAVE + + %ifnidn ptr, arg1 +diff --git a/src/isa-l/igzip/encode_df_06.asm b/src/isa-l/igzip/encode_df_06.asm +index aaec754..9e74795 100644 +--- a/src/isa-l/igzip/encode_df_06.asm ++++ b/src/isa-l/igzip/encode_df_06.asm +@@ -190,6 +190,7 @@ section .text + + global encode_deflate_icf_ %+ ARCH + encode_deflate_icf_ %+ ARCH: ++ endbranch + FUNC_SAVE + + %ifnidn ptr, arg1 +diff --git a/src/isa-l/igzip/igzip_body.asm b/src/isa-l/igzip/igzip_body.asm +index d69b27c..b9620d6 100644 +--- a/src/isa-l/igzip/igzip_body.asm ++++ b/src/isa-l/igzip/igzip_body.asm +@@ -143,6 +143,7 @@ section .text + ; arg 1: rcx: addr of stream + global isal_deflate_body_ %+ ARCH + isal_deflate_body_ %+ ARCH %+ : ++ endbranch + %ifidn __OUTPUT_FORMAT__, elf64 + mov rcx, rdi + %endif +diff --git a/src/isa-l/igzip/igzip_decode_block_stateless.asm b/src/isa-l/igzip/igzip_decode_block_stateless.asm +index 733194b..22f3bf2 100644 +--- a/src/isa-l/igzip/igzip_decode_block_stateless.asm ++++ b/src/isa-l/igzip/igzip_decode_block_stateless.asm +@@ -465,6 +465,7 @@ section .text + + global decode_huffman_code_block_stateless_ %+ ARCH + decode_huffman_code_block_stateless_ %+ ARCH %+ : ++ endbranch + + FUNC_SAVE + +diff --git a/src/isa-l/igzip/igzip_deflate_hash.asm b/src/isa-l/igzip/igzip_deflate_hash.asm +index bcb0d5d..32a1482 100644 +--- a/src/isa-l/igzip/igzip_deflate_hash.asm ++++ b/src/isa-l/igzip/igzip_deflate_hash.asm +@@ -104,6 +104,7 @@ section .text + + global isal_deflate_hash_crc_01 + isal_deflate_hash_crc_01: ++ endbranch + FUNC_SAVE + + neg f_i +diff --git a/src/isa-l/igzip/igzip_finish.asm b/src/isa-l/igzip/igzip_finish.asm +index fbf8839..2b539dd 100644 +--- a/src/isa-l/igzip/igzip_finish.asm ++++ b/src/isa-l/igzip/igzip_finish.asm +@@ -94,6 +94,7 @@ section .text + ; arg 1: rcx: addr of stream + global isal_deflate_finish_01 + isal_deflate_finish_01: ++ endbranch + PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15 + sub rsp, stack_size + +diff --git a/src/isa-l/igzip/igzip_gen_icf_map_lh1_04.asm b/src/isa-l/igzip/igzip_gen_icf_map_lh1_04.asm +index 077f56c..d188846 100644 +--- a/src/isa-l/igzip/igzip_gen_icf_map_lh1_04.asm ++++ b/src/isa-l/igzip/igzip_gen_icf_map_lh1_04.asm +@@ -147,7 +147,7 @@ + %define stack_size 16 + %define local_storage_offset 0 + +-%define func(x) x: ++%define func(x) x: endbranch + %macro FUNC_SAVE 0 + push rbp + push r12 +@@ -175,6 +175,7 @@ section .text + + global gen_icf_map_lh1_04 + func(gen_icf_map_lh1_04) ++ endbranch + FUNC_SAVE + + mov file_start, [stream + _next_in] +diff --git a/src/isa-l/igzip/igzip_gen_icf_map_lh1_06.asm b/src/isa-l/igzip/igzip_gen_icf_map_lh1_06.asm +index d134357..7985ab5 100644 +--- a/src/isa-l/igzip/igzip_gen_icf_map_lh1_06.asm ++++ b/src/isa-l/igzip/igzip_gen_icf_map_lh1_06.asm +@@ -143,7 +143,7 @@ + add rsp, stack_size + %endm + %else +-%define func(x) x: ++%define func(x) x: endbranch + %macro FUNC_SAVE 0 + push rbp + push r12 +@@ -166,6 +166,7 @@ section .text + + global gen_icf_map_lh1_06 + func(gen_icf_map_lh1_06) ++ endbranch + FUNC_SAVE + + mov file_start, [stream + _next_in] +diff --git a/src/isa-l/igzip/igzip_icf_body_h1_gr_bt.asm b/src/isa-l/igzip/igzip_icf_body_h1_gr_bt.asm +index 51871c5..c74a24d 100644 +--- a/src/isa-l/igzip/igzip_icf_body_h1_gr_bt.asm ++++ b/src/isa-l/igzip/igzip_icf_body_h1_gr_bt.asm +@@ -164,6 +164,7 @@ section .text + ; arg 1: rcx: addr of stream + global isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH + isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ : ++ endbranch + %ifidn __OUTPUT_FORMAT__, elf64 + mov rcx, rdi + %endif +diff --git a/src/isa-l/igzip/igzip_icf_finish.asm b/src/isa-l/igzip/igzip_icf_finish.asm +index b9f88a9..231ac06 100644 +--- a/src/isa-l/igzip/igzip_icf_finish.asm ++++ b/src/isa-l/igzip/igzip_icf_finish.asm +@@ -102,6 +102,7 @@ section .text + ; arg 1: rcx: addr of stream + global isal_deflate_icf_finish_ %+ METHOD %+ _01 + isal_deflate_icf_finish_ %+ METHOD %+ _01: ++ endbranch + PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15 + sub rsp, stack_size + +diff --git a/src/isa-l/igzip/igzip_set_long_icf_fg_04.asm b/src/isa-l/igzip/igzip_set_long_icf_fg_04.asm +index 070e614..09fcb64 100644 +--- a/src/isa-l/igzip/igzip_set_long_icf_fg_04.asm ++++ b/src/isa-l/igzip/igzip_set_long_icf_fg_04.asm +@@ -122,7 +122,7 @@ default rel + add rsp, stack_size + %endm + %else +-%define func(x) x: ++%define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + push r13 +@@ -141,6 +141,7 @@ section .text + + global set_long_icf_fg_04 + func(set_long_icf_fg_04) ++ endbranch + FUNC_SAVE + + lea end_in, [next_in + arg3] +diff --git a/src/isa-l/igzip/igzip_set_long_icf_fg_06.asm b/src/isa-l/igzip/igzip_set_long_icf_fg_06.asm +index b36871c..3152ef4 100644 +--- a/src/isa-l/igzip/igzip_set_long_icf_fg_06.asm ++++ b/src/isa-l/igzip/igzip_set_long_icf_fg_06.asm +@@ -129,7 +129,7 @@ + add rsp, stack_size + %endm + %else +-%define func(x) x: ++%define func(x) x: endbranch + %macro FUNC_SAVE 0 + push r12 + push r13 +@@ -148,6 +148,7 @@ section .text + + global set_long_icf_fg_06 + func(set_long_icf_fg_06) ++ endbranch + FUNC_SAVE + + lea end_in, [next_in + arg3] +diff --git a/src/isa-l/igzip/igzip_update_histogram.asm b/src/isa-l/igzip/igzip_update_histogram.asm +index e1939ad..698c8be 100644 +--- a/src/isa-l/igzip/igzip_update_histogram.asm ++++ b/src/isa-l/igzip/igzip_update_histogram.asm +@@ -256,6 +256,7 @@ section .text + ; void isal_update_histogram + global isal_update_histogram_ %+ ARCH + isal_update_histogram_ %+ ARCH %+ : ++ endbranch + FUNC_SAVE + + %ifnidn file_start, arg0 +diff --git a/src/isa-l/igzip/proc_heap.asm b/src/isa-l/igzip/proc_heap.asm +index 5ed9c8e..ea9365a 100644 +--- a/src/isa-l/igzip/proc_heap.asm ++++ b/src/isa-l/igzip/proc_heap.asm +@@ -60,6 +60,7 @@ section .text + + global build_huff_tree + build_huff_tree: ++ endbranch + %ifidn __OUTPUT_FORMAT__, win64 + push rsi + push rdi +@@ -108,6 +109,7 @@ build_huff_tree: + align 32 + global build_heap + build_heap: ++ endbranch + %ifidn __OUTPUT_FORMAT__, win64 + push rsi + push rdi +diff --git a/src/isa-l/include/multibinary.asm b/src/isa-l/include/multibinary.asm +index 16838cb..588352a 100644 +--- a/src/isa-l/include/multibinary.asm ++++ b/src/isa-l/include/multibinary.asm +@@ -71,10 +71,12 @@ + section .text + mk_global %1, function + %1_mbinit: ++ endbranch + ;;; only called the first time to setup hardware match + call %1_dispatch_init + ;;; falls thru to execute the hw optimized code + %1: ++ endbranch + jmp mbin_ptr_sz [%1_dispatched] + %endmacro + +diff --git a/src/isa-l/include/reg_sizes.asm b/src/isa-l/include/reg_sizes.asm +index 37d61f8..b7ad842 100644 +--- a/src/isa-l/include/reg_sizes.asm ++++ b/src/isa-l/include/reg_sizes.asm +@@ -200,9 +200,22 @@ section .note.GNU-stack noalloc noexec nowrite progbits + section .text + %endif + %ifidn __OUTPUT_FORMAT__,elf64 ++ %define __x86_64__ + section .note.GNU-stack noalloc noexec nowrite progbits + section .text + %endif ++%ifidn __OUTPUT_FORMAT__,win64 ++ %define __x86_64__ ++%endif ++%ifidn __OUTPUT_FORMAT__,macho64 ++ %define __x86_64__ ++%endif ++ ++%ifdef __x86_64__ ++ %define endbranch db 0xf3, 0x0f, 0x1e, 0xfa ++%else ++ %define endbranch db 0xf3, 0x0f, 0x1e, 0xfb ++%endif + + %ifdef REL_TEXT + %define WRT_OPT +diff --git a/src/isa-l/mem/mem_zero_detect_avx.asm b/src/isa-l/mem/mem_zero_detect_avx.asm +index e85e08d..1b5de84 100644 +--- a/src/isa-l/mem/mem_zero_detect_avx.asm ++++ b/src/isa-l/mem/mem_zero_detect_avx.asm +@@ -40,7 +40,7 @@ + %define tmpb r11b + %define tmp3 arg4 + %define return rax +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +diff --git a/src/isa-l/mem/mem_zero_detect_sse.asm b/src/isa-l/mem/mem_zero_detect_sse.asm +index 78350aa..c84f0f0 100644 +--- a/src/isa-l/mem/mem_zero_detect_sse.asm ++++ b/src/isa-l/mem/mem_zero_detect_sse.asm +@@ -40,7 +40,7 @@ + %define tmpb r11b + %define tmp3 arg4 + %define return rax +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +diff --git a/src/isa-l/raid/pq_check_sse.asm b/src/isa-l/raid/pq_check_sse.asm +index ca32051..f2bc8a6 100644 +--- a/src/isa-l/raid/pq_check_sse.asm ++++ b/src/isa-l/raid/pq_check_sse.asm +@@ -46,7 +46,7 @@ + %define tmp r11 + %define tmp3 arg4 + %define return rax +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +diff --git a/src/isa-l/raid/pq_check_sse_i32.asm b/src/isa-l/raid/pq_check_sse_i32.asm +index f05d43a..3271c03 100644 +--- a/src/isa-l/raid/pq_check_sse_i32.asm ++++ b/src/isa-l/raid/pq_check_sse_i32.asm +@@ -46,7 +46,7 @@ + %define tmp r11 + %define return rax + %define PS 8 +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + +@@ -79,7 +79,7 @@ + %define arg1 ecx + %define return eax + %define PS 4 +- %define func(x) x: ++ %define func(x) x: endbranch + %define arg(x) [ebp+8+PS*x] + %define arg2 edi ; must sav/restore + %define arg3 esi +diff --git a/src/isa-l/raid/pq_gen_avx.asm b/src/isa-l/raid/pq_gen_avx.asm +index 57d2b22..db4bcfb 100644 +--- a/src/isa-l/raid/pq_gen_avx.asm ++++ b/src/isa-l/raid/pq_gen_avx.asm +@@ -46,7 +46,7 @@ + %define tmp r11 + %define tmp3 arg4 + %define return rax +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +diff --git a/src/isa-l/raid/pq_gen_avx2.asm b/src/isa-l/raid/pq_gen_avx2.asm +index 7def9ea..a0bf0cc 100644 +--- a/src/isa-l/raid/pq_gen_avx2.asm ++++ b/src/isa-l/raid/pq_gen_avx2.asm +@@ -46,7 +46,7 @@ + %define tmp r11 + %define tmp3 arg4 + %define return rax +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +diff --git a/src/isa-l/raid/pq_gen_avx512.asm b/src/isa-l/raid/pq_gen_avx512.asm +index 9ec6584..179ad5c 100644 +--- a/src/isa-l/raid/pq_gen_avx512.asm ++++ b/src/isa-l/raid/pq_gen_avx512.asm +@@ -49,7 +49,7 @@ + %define tmp r11 + %define tmp3 arg4 + %define return rax +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +diff --git a/src/isa-l/raid/pq_gen_sse.asm b/src/isa-l/raid/pq_gen_sse.asm +index 4c5a349..b6d5148 100644 +--- a/src/isa-l/raid/pq_gen_sse.asm ++++ b/src/isa-l/raid/pq_gen_sse.asm +@@ -46,7 +46,7 @@ + %define tmp r11 + %define tmp3 arg4 + %define return rax +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + %endif +diff --git a/src/isa-l/raid/pq_gen_sse_i32.asm b/src/isa-l/raid/pq_gen_sse_i32.asm +index 7a918f4..8dabb78 100644 +--- a/src/isa-l/raid/pq_gen_sse_i32.asm ++++ b/src/isa-l/raid/pq_gen_sse_i32.asm +@@ -46,7 +46,7 @@ + %define tmp r11 + %define return rax + %define PS 8 +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + +@@ -78,7 +78,7 @@ + %define arg1 ecx + %define return eax + %define PS 4 +- %define func(x) x: ++ %define func(x) x: endbranch + %define arg(x) [ebp+8+PS*x] + %define arg2 edi ; must sav/restore + %define arg3 esi +diff --git a/src/isa-l/raid/raid_multibinary.asm b/src/isa-l/raid/raid_multibinary.asm +index c84e5ef..47ef1e3 100644 +--- a/src/isa-l/raid/raid_multibinary.asm ++++ b/src/isa-l/raid/raid_multibinary.asm +@@ -74,8 +74,10 @@ section .text + ;;;; + mk_global pq_check, function + pq_check_mbinit: ++ endbranch + call pq_check_dispatch_init + pq_check: ++ endbranch + jmp qword [pq_check_dispatched] + + pq_check_dispatch_init: +@@ -106,8 +108,10 @@ pq_check_dispatch_init: + ;;;; + mk_global xor_check, function + xor_check_mbinit: ++ endbranch + call xor_check_dispatch_init + xor_check: ++ endbranch + jmp qword [xor_check_dispatched] + + xor_check_dispatch_init: +diff --git a/src/isa-l/raid/xor_check_sse.asm b/src/isa-l/raid/xor_check_sse.asm +index 9620412..a5fe0b2 100644 +--- a/src/isa-l/raid/xor_check_sse.asm ++++ b/src/isa-l/raid/xor_check_sse.asm +@@ -49,7 +49,7 @@ + %define tmp3 arg4 + %define return rax + %define PS 8 +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + +@@ -88,7 +88,7 @@ + %define tmp3 edx + %define return eax + %define PS 4 +- %define func(x) x: ++ %define func(x) x: endbranch + %define arg(x) [ebp+8+PS*x] + %define arg2 edi ; must sav/restore + %define arg3 esi +diff --git a/src/isa-l/raid/xor_gen_avx.asm b/src/isa-l/raid/xor_gen_avx.asm +index cddd539..b5527b2 100644 +--- a/src/isa-l/raid/xor_gen_avx.asm ++++ b/src/isa-l/raid/xor_gen_avx.asm +@@ -45,7 +45,7 @@ + %define arg5 r9 + %define tmp r11 + %define tmp3 arg4 +- %define func(x) x: ++ %define func(x) x: endbranch + %define return rax + %define FUNC_SAVE + %define FUNC_RESTORE +diff --git a/src/isa-l/raid/xor_gen_avx512.asm b/src/isa-l/raid/xor_gen_avx512.asm +index 552c590..5b07868 100644 +--- a/src/isa-l/raid/xor_gen_avx512.asm ++++ b/src/isa-l/raid/xor_gen_avx512.asm +@@ -47,7 +47,7 @@ + %define arg5 r9 + %define tmp r11 + %define tmp3 arg4 +- %define func(x) x: ++ %define func(x) x: endbranch + %define return rax + %define FUNC_SAVE + %define FUNC_RESTORE +diff --git a/src/isa-l/raid/xor_gen_sse.asm b/src/isa-l/raid/xor_gen_sse.asm +index 7509548..f31ae63 100644 +--- a/src/isa-l/raid/xor_gen_sse.asm ++++ b/src/isa-l/raid/xor_gen_sse.asm +@@ -49,7 +49,7 @@ + %define tmp3 arg4 + %define return rax + %define PS 8 +- %define func(x) x: ++ %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE + +@@ -88,7 +88,7 @@ + %define tmp3 edx + %define return eax + %define PS 4 +- %define func(x) x: ++ %define func(x) x: endbranch + %define arg(x) [ebp+8+PS*x] + %define arg2 edi ; must sav/restore + %define arg3 esi +-- +2.20.1.windows.1 + + +From 8074e3fe1b9398a9d3b717267790050fc5041594 Mon Sep 17 00:00:00 2001 +From: "H.J. Lu" +Date: Fri, 22 May 2020 10:17:59 -0700 +Subject: [PATCH 17/42] x86: Generate .note.gnu.property section for ELF output + +We should generate .note.gnu.property section with x86 assembly codes +for ELF outputs to mark Intel CET support when Intel CET is enabled +since all input files must be marked with Intel CET support in order +for linker to mark output with Intel CET support. Since nasm and yasm +can't generate the proper .note.gnu.property section, yasm-cet-filter.sh +and yasm-filter.sh are added to generate the proper .note.gnu.property +with linker help. + +Verified with + +$ CC="gcc -Wl,-z,cet-report=error -fcf-protection" CXX="g++ -Wl,-z,cet-report=error -fcf-protection" .../configure x86_64-linux +$ make -j8 + +on Linux/x86-64. + +Change-Id: I14e03a8a9031c8397dc36939a528cf5a827d775a +Signed-off-by: H.J. Lu +--- + Makefile.am | 11 ++++++++ + configure.ac | 12 +++++++++ + tools/nasm-cet-filter.sh | 56 ++++++++++++++++++++++++++++++++++++++++ + tools/yasm-cet-filter.sh | 47 +++++++++++++++++++++++++++++++++ + 4 files changed, 126 insertions(+) + create mode 100755 tools/nasm-cet-filter.sh + create mode 100755 tools/yasm-cet-filter.sh + +diff --git a/src/isa-l/Makefile.am b/src/isa-l/Makefile.am +index 15c9cf0..51f7f4c 100644 +--- a/src/isa-l/Makefile.am ++++ b/src/isa-l/Makefile.am +@@ -117,12 +117,23 @@ test: $(addsuffix .run,$(unit_tests)) + @echo Completed run: $< + + # Support for yasm/nasm/gas ++if INTEL_CET_ENABLED ++export CET_LD=$(LD) ++endif + if USE_YASM ++if INTEL_CET_ENABLED ++ as_filter = ${srcdir}/tools/yasm-cet-filter.sh ++else + as_filter = ${srcdir}/tools/yasm-filter.sh + endif ++endif + if USE_NASM ++if INTEL_CET_ENABLED ++ as_filter = ${srcdir}/tools/nasm-cet-filter.sh ++else + as_filter = ${srcdir}/tools/nasm-filter.sh + endif ++endif + if CPU_AARCH64 + as_filter = $(CC) -D__ASSEMBLY__ + endif +diff --git a/src/isa-l/configure.ac b/src/isa-l/configure.ac +index 3834d64..8c1f042 100644 +--- a/src/isa-l/configure.ac ++++ b/src/isa-l/configure.ac +@@ -51,6 +51,7 @@ fi + + # Check for programs + AC_PROG_CC_STDC ++AC_PROG_LD + AC_USE_SYSTEM_EXTENSIONS + AM_SILENT_RULES([yes]) + LT_INIT +@@ -68,6 +69,17 @@ AS_IF([test "x$enable_debug" = "xyes"], [ + + # If this build is for x86, look for yasm and nasm + if test x"$is_x86" = x"yes"; then ++ AC_MSG_CHECKING([whether Intel CET is enabled]) ++ AC_TRY_COMPILE([],[ ++#ifndef __CET__ ++# error CET is not enabled ++#endif], ++ [AC_MSG_RESULT([yes]) ++ intel_cet_enabled=yes], ++ [AC_MSG_RESULT([no]) ++ intel_cet_enabled=no]) ++ AM_CONDITIONAL(INTEL_CET_ENABLED, [test x"$intel_cet_enabled" = x"yes"]) ++ + # Pick an assembler yasm or nasm + if test x"$AS" = x""; then + # Check for yasm and yasm features +diff --git a/src/isa-l/tools/nasm-cet-filter.sh b/src/isa-l/tools/nasm-cet-filter.sh +new file mode 100755 +index 0000000..19e0385 +--- /dev/null ++++ b/src/isa-l/tools/nasm-cet-filter.sh +@@ -0,0 +1,56 @@ ++#/bin/sh ++ ++# Filter out unnecessary options added by automake ++ ++while [ -n "$*" ]; do ++ case "$1" in ++ -o ) ++ # Supported options with arg ++ options="$options $1 $2" ++ shift ++ object="$1" ++ shift ++ ;; ++ -f | -D ) ++ # Supported options with arg ++ options="$options $1 $2" ++ shift ++ shift ++ ;; ++ -I | -i ) ++ options="$options $1 $2/" ++ shift ++ shift ++ ;; ++ --prefix* ) ++ # Supported options without arg ++ options="$options $1" ++ shift ++ ;; ++ -I* | -i* ) ++ options="$options $1/" ++ shift ++ ;; ++ -D* ) # For defines we need to remove spaces ++ case "$1" in ++ *' '* ) ;; ++ *) options="$options $1" ;; ++ esac ++ shift ++ ;; ++ #-blah ) ++ # Unsupported options with args - none known ++ -* ) ++ # Unsupported options with no args ++ shift ++ ;; ++ * ) ++ args="$args $1" ++ shift ++ ;; ++ esac ++done ++ ++nasm $options $args ++$CET_LD -r -z ibt -z shstk -o $object.tmp $object ++mv $object.tmp $object +diff --git a/src/isa-l/tools/yasm-cet-filter.sh b/src/isa-l/tools/yasm-cet-filter.sh +new file mode 100755 +index 0000000..d7b3e97 +--- /dev/null ++++ b/src/isa-l/tools/yasm-cet-filter.sh +@@ -0,0 +1,47 @@ ++#/bin/sh ++ ++# Filter out unnecessary options added by automake ++ ++while [ -n "$*" ]; do ++ case "$1" in ++ -o ) ++ # Supported options with arg ++ options="$options $1 $2" ++ shift ++ object="$1" ++ shift ++ ;; ++ -f | -I | -i | -D ) ++ # Supported options with arg ++ options="$options $1 $2" ++ shift ++ shift ++ ;; ++ -I* | -i* | --prefix* ) ++ # Supported options without arg ++ options="$options $1" ++ shift ++ ;; ++ -D* ) # For defines we need to remove spaces ++ case "$1" in ++ *' '* ) ;; ++ *) options="$options $1" ;; ++ esac ++ shift ++ ;; ++ #-blah ) ++ # Unsupported options with args - none known ++ -* ) ++ # Unsupported options with no args ++ shift ++ ;; ++ * ) ++ args="$args $1" ++ shift ++ ;; ++ esac ++done ++ ++yasm $options $args ++$CET_LD -r -z ibt -z shstk -o $object.tmp $object ++mv $object.tmp $object +-- +2.20.1.windows.1 + + +From 14e0081bef4032bb232e57e2e646021cdf1d86bd Mon Sep 17 00:00:00 2001 +From: Jerry Yu +Date: Wed, 3 Jun 2020 02:51:25 +0000 +Subject: [PATCH 18/42] build: fix build break on non-x86 platform + +Arm64 and ppc64 build reports below error: +"configure: error: conditional "INTEL_CET_ENABLED" was never defined." +And the error should be report in all non-x86 platform. + +Change-Id: I4c1b2fc99091424cfd5c62cf4d6536222b66712d +Signed-off-by: Jerry Yu +--- + configure.ac | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/src/isa-l/configure.ac b/src/isa-l/configure.ac +index 8c1f042..09e491f 100644 +--- a/src/isa-l/configure.ac ++++ b/src/isa-l/configure.ac +@@ -78,7 +78,7 @@ if test x"$is_x86" = x"yes"; then + intel_cet_enabled=yes], + [AC_MSG_RESULT([no]) + intel_cet_enabled=no]) +- AM_CONDITIONAL(INTEL_CET_ENABLED, [test x"$intel_cet_enabled" = x"yes"]) ++ + + # Pick an assembler yasm or nasm + if test x"$AS" = x""; then +@@ -261,6 +261,7 @@ else + AM_CONDITIONAL(DARWIN, test "x" = "y") + fi + ++AM_CONDITIONAL(INTEL_CET_ENABLED, [test x"$intel_cet_enabled" = x"yes"]) + + # Check for header files + AC_CHECK_HEADERS([limits.h stdint.h stdlib.h string.h]) +-- +2.20.1.windows.1 + + +From 1c71f9c0aec8fdb95ea7b7468591cd46cb111c2f Mon Sep 17 00:00:00 2001 +From: Jerry Yu +Date: Fri, 3 Jul 2020 18:15:56 +0800 +Subject: [PATCH 19/42] crc32: tweak performance of crc32/crc32c + +Tweak performances with prefetch instructions. + +Below is the test results: +- Neoverse N1: ~30% +- Cortex-A72: ~3% +- Cortex-A57: ~90% +- Others: 50% - 5x + +Change-Id: I3ab292a953043dbaea98af3c66778f57da3a1331 +Signed-off-by: Jerry Yu +--- + crc/aarch64/Makefile.am | 20 +- + crc/aarch64/crc32_aarch64_common.h | 321 ++++++++++++++++++ + crc/aarch64/crc32_gzip_refl_3crc_fold.S | 95 ++++++ + ...cortex_a72.S => crc32_gzip_refl_crc_ext.S} | 17 +- + crc/aarch64/crc32_gzip_refl_hw_fold.S | 176 ---------- + crc/aarch64/crc32_iscsi_3crc_fold.S | 97 ++++++ + ...ext_cortex_a72.S => crc32_iscsi_crc_ext.S} | 17 +- + crc/aarch64/crc32_iscsi_refl_hw_fold.S | 172 ---------- + crc/aarch64/crc_aarch64_dispatcher.c | 20 +- + 9 files changed, 547 insertions(+), 388 deletions(-) + create mode 100644 crc/aarch64/crc32_aarch64_common.h + create mode 100644 crc/aarch64/crc32_gzip_refl_3crc_fold.S + rename crc/aarch64/{crc32_crc_ext_cortex_a72.S => crc32_gzip_refl_crc_ext.S} (85%) + delete mode 100644 crc/aarch64/crc32_gzip_refl_hw_fold.S + create mode 100644 crc/aarch64/crc32_iscsi_3crc_fold.S + rename crc/aarch64/{crc32c_crc_ext_cortex_a72.S => crc32_iscsi_crc_ext.S} (84%) + delete mode 100644 crc/aarch64/crc32_iscsi_refl_hw_fold.S + +diff --git a/src/isa-l/crc/aarch64/Makefile.am b/src/isa-l/crc/aarch64/Makefile.am +index a43ca30..5113b77 100644 +--- a/src/isa-l/crc/aarch64/Makefile.am ++++ b/src/isa-l/crc/aarch64/Makefile.am +@@ -34,20 +34,24 @@ lsrc_aarch64 += \ + lsrc_aarch64 += \ + crc/aarch64/crc16_t10dif_pmull.S \ + crc/aarch64/crc16_t10dif_copy_pmull.S \ +- crc/aarch64/crc32_iscsi_refl_pmull.S \ +- crc/aarch64/crc32_iscsi_refl_hw_fold.S \ + crc/aarch64/crc32_ieee_norm_pmull.S \ +- crc/aarch64/crc32_gzip_refl_pmull.S \ +- crc/aarch64/crc32_gzip_refl_hw_fold.S \ + crc/aarch64/crc64_ecma_refl_pmull.S \ + crc/aarch64/crc64_ecma_norm_pmull.S \ + crc/aarch64/crc64_iso_refl_pmull.S \ + crc/aarch64/crc64_iso_norm_pmull.S \ + crc/aarch64/crc64_jones_refl_pmull.S \ +- crc/aarch64/crc64_jones_norm_pmull.S \ ++ crc/aarch64/crc64_jones_norm_pmull.S ++ ++#CRC32/CRC32C for micro-architecture ++lsrc_aarch64 += \ ++ crc/aarch64/crc32_iscsi_refl_pmull.S \ ++ crc/aarch64/crc32_gzip_refl_pmull.S \ ++ crc/aarch64/crc32_iscsi_3crc_fold.S \ ++ crc/aarch64/crc32_gzip_refl_3crc_fold.S \ ++ crc/aarch64/crc32_iscsi_crc_ext.S \ ++ crc/aarch64/crc32_gzip_refl_crc_ext.S \ + crc/aarch64/crc32_mix_default.S \ + crc/aarch64/crc32c_mix_default.S \ + crc/aarch64/crc32_mix_neoverse_n1.S \ +- crc/aarch64/crc32c_mix_neoverse_n1.S \ +- crc/aarch64/crc32_crc_ext_cortex_a72.S \ +- crc/aarch64/crc32c_crc_ext_cortex_a72.S ++ crc/aarch64/crc32c_mix_neoverse_n1.S ++ +diff --git a/src/isa-l/crc/aarch64/crc32_aarch64_common.h b/src/isa-l/crc/aarch64/crc32_aarch64_common.h +new file mode 100644 +index 0000000..a2ef22a +--- /dev/null ++++ b/src/isa-l/crc/aarch64/crc32_aarch64_common.h +@@ -0,0 +1,321 @@ ++/********************************************************************** ++ Copyright(c) 2020 Arm Corporation All rights reserved. ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in ++ the documentation and/or other materials provided with the ++ distribution. ++ * Neither the name of Arm Corporation nor the names of its ++ contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++**********************************************************************/ ++ ++ ++ ++ ++.macro crc32_hw_common poly_type ++ ++.ifc \poly_type,crc32 ++ mvn wCRC,wCRC ++.endif ++ cbz LEN, .zero_length_ret ++ tbz BUF, 0, .align_short ++ ldrb wdata,[BUF],1 ++ sub LEN,LEN,1 ++ crc32_u8 wCRC,wCRC,wdata ++.align_short: ++ tst BUF,2 ++ ccmp LEN,1,0,ne ++ bhi .align_short_2 ++ tst BUF,4 ++ ccmp LEN,3,0,ne ++ bhi .align_word ++ ++.align_finish: ++ ++ cmp LEN, 63 ++ bls .loop_16B ++.loop_64B: ++ ldp data0, data1, [BUF],#16 ++ prfm pldl2keep,[BUF,2048] ++ sub LEN,LEN,#64 ++ ldp data2, data3, [BUF],#16 ++ prfm pldl1keep,[BUF,256] ++ cmp LEN,#64 ++ crc32_u64 wCRC, wCRC, data0 ++ crc32_u64 wCRC, wCRC, data1 ++ ldp data0, data1, [BUF],#16 ++ crc32_u64 wCRC, wCRC, data2 ++ crc32_u64 wCRC, wCRC, data3 ++ ldp data2, data3, [BUF],#16 ++ crc32_u64 wCRC, wCRC, data0 ++ crc32_u64 wCRC, wCRC, data1 ++ crc32_u64 wCRC, wCRC, data2 ++ crc32_u64 wCRC, wCRC, data3 ++ bge .loop_64B ++ ++.loop_16B: ++ cmp LEN, 15 ++ bls .less_16B ++ ldp data0, data1, [BUF],#16 ++ sub LEN,LEN,#16 ++ cmp LEN,15 ++ crc32_u64 wCRC, wCRC, data0 ++ crc32_u64 wCRC, wCRC, data1 ++ bls .less_16B ++ ldp data0, data1, [BUF],#16 ++ sub LEN,LEN,#16 ++ cmp LEN,15 ++ crc32_u64 wCRC, wCRC, data0 ++ crc32_u64 wCRC, wCRC, data1 ++ bls .less_16B ++ ldp data0, data1, [BUF],#16 ++ sub LEN,LEN,#16 //MUST less than 16B ++ crc32_u64 wCRC, wCRC, data0 ++ crc32_u64 wCRC, wCRC, data1 ++.less_16B: ++ cmp LEN, 7 ++ bls .less_8B ++ ldr data0, [BUF], 8 ++ sub LEN, LEN, #8 ++ crc32_u64 wCRC, wCRC, data0 ++.less_8B: ++ cmp LEN, 3 ++ bls .less_4B ++ ldr wdata, [BUF], 4 ++ sub LEN, LEN, #4 ++ crc32_u32 wCRC, wCRC, wdata ++.less_4B: ++ cmp LEN, 1 ++ bls .less_2B ++ ldrh wdata, [BUF], 2 ++ sub LEN, LEN, #2 ++ crc32_u16 wCRC, wCRC, wdata ++.less_2B: ++ cbz LEN, .zero_length_ret ++ ldrb wdata, [BUF] ++ crc32_u8 wCRC, wCRC, wdata ++.zero_length_ret: ++.ifc \poly_type,crc32 ++ mvn w0, wCRC ++.else ++ mov w0, wCRC ++.endif ++ ret ++.align_short_2: ++ ldrh wdata, [BUF], 2 ++ sub LEN, LEN, 2 ++ tst BUF, 4 ++ crc32_u16 wCRC, wCRC, wdata ++ ccmp LEN, 3, 0, ne ++ bls .align_finish ++.align_word: ++ ldr wdata, [BUF], 4 ++ sub LEN, LEN, #4 ++ crc32_u32 wCRC, wCRC, wdata ++ b .align_finish ++.endm ++ ++.macro crc32_3crc_fold poly_type ++.ifc \poly_type,crc32 ++ mvn wCRC,wCRC ++.endif ++ cbz LEN, .zero_length_ret ++ tbz BUF, 0, .align_short ++ ldrb wdata,[BUF],1 ++ sub LEN,LEN,1 ++ crc32_u8 wCRC,wCRC,wdata ++.align_short: ++ tst BUF,2 ++ ccmp LEN,1,0,ne ++ bhi .align_short_2 ++ tst BUF,4 ++ ccmp LEN,3,0,ne ++ bhi .align_word ++ ++.align_finish: ++ cmp LEN,1023 ++ adr const_adr, .Lconstants ++ bls 1f ++ ldp dconst0,dconst1,[const_adr] ++2: ++ ldr crc0_data0,[ptr_crc0],8 ++ prfm pldl2keep,[ptr_crc0,3*1024-8] ++ mov crc1,0 ++ mov crc2,0 ++ add ptr_crc1,ptr_crc0,336 ++ add ptr_crc2,ptr_crc0,336*2 ++ crc32_u64 crc0,crc0,crc0_data0 ++ .set offset,0 ++ .set ptr_offset,8 ++ .rept 5 ++ ldp crc0_data0,crc0_data1,[ptr_crc0],16 ++ ldp crc1_data0,crc1_data1,[ptr_crc1],16 ++ .set offset,offset+64 ++ .set ptr_offset,ptr_offset+16 ++ prfm pldl2keep,[ptr_crc0,3*1024-ptr_offset+offset] ++ crc32_u64 crc0,crc0,crc0_data0 ++ crc32_u64 crc0,crc0,crc0_data1 ++ ldp crc2_data0,crc2_data1,[ptr_crc2],16 ++ crc32_u64 crc1,crc1,crc1_data0 ++ crc32_u64 crc1,crc1,crc1_data1 ++ crc32_u64 crc2,crc2,crc2_data0 ++ crc32_u64 crc2,crc2,crc2_data1 ++ .endr ++ .set l1_offset,0 ++ .rept 10 ++ ldp crc0_data0,crc0_data1,[ptr_crc0],16 ++ ldp crc1_data0,crc1_data1,[ptr_crc1],16 ++ .set offset,offset+64 ++ .set ptr_offset,ptr_offset+16 ++ prfm pldl2keep,[ptr_crc0,3*1024-ptr_offset+offset] ++ prfm pldl1keep,[ptr_crc0,2*1024-ptr_offset+l1_offset] ++ .set l1_offset,l1_offset+64 ++ crc32_u64 crc0,crc0,crc0_data0 ++ crc32_u64 crc0,crc0,crc0_data1 ++ ldp crc2_data0,crc2_data1,[ptr_crc2],16 ++ crc32_u64 crc1,crc1,crc1_data0 ++ crc32_u64 crc1,crc1,crc1_data1 ++ crc32_u64 crc2,crc2,crc2_data0 ++ crc32_u64 crc2,crc2,crc2_data1 ++ .endr ++ ++ .rept 6 ++ ldp crc0_data0,crc0_data1,[ptr_crc0],16 ++ ldp crc1_data0,crc1_data1,[ptr_crc1],16 ++ .set ptr_offset,ptr_offset+16 ++ prfm pldl1keep,[ptr_crc0,2*1024-ptr_offset+l1_offset] ++ .set l1_offset,l1_offset+64 ++ crc32_u64 crc0,crc0,crc0_data0 ++ crc32_u64 crc0,crc0,crc0_data1 ++ ldp crc2_data0,crc2_data1,[ptr_crc2],16 ++ crc32_u64 crc1,crc1,crc1_data0 ++ crc32_u64 crc1,crc1,crc1_data1 ++ crc32_u64 crc2,crc2,crc2_data0 ++ crc32_u64 crc2,crc2,crc2_data1 ++ .endr ++ ldr crc2_data0,[ptr_crc2] ++ fmov dtmp0,xcrc0 ++ fmov dtmp1,xcrc1 ++ crc32_u64 crc2,crc2,crc2_data0 ++ add ptr_crc0,ptr_crc0,1024-(336+8) ++ pmull vtmp0.1q,vtmp0.1d,vconst0.1d ++ sub LEN,LEN,1024 ++ pmull vtmp1.1q,vtmp1.1d,vconst1.1d ++ cmp LEN,1024 ++ fmov xcrc0,dtmp0 ++ fmov xcrc1,dtmp1 ++ crc32_u64 crc0,wzr,xcrc0 ++ crc32_u64 crc1,wzr,xcrc1 ++ ++ eor crc0,crc0,crc2 ++ eor crc0,crc0,crc1 ++ ++ bhs 2b ++1: ++ cmp LEN, 63 ++ bls .loop_16B ++.loop_64B: ++ ldp data0, data1, [BUF],#16 ++ sub LEN,LEN,#64 ++ ldp data2, data3, [BUF],#16 ++ cmp LEN,#64 ++ crc32_u64 wCRC, wCRC, data0 ++ crc32_u64 wCRC, wCRC, data1 ++ ldp data0, data1, [BUF],#16 ++ crc32_u64 wCRC, wCRC, data2 ++ crc32_u64 wCRC, wCRC, data3 ++ ldp data2, data3, [BUF],#16 ++ crc32_u64 wCRC, wCRC, data0 ++ crc32_u64 wCRC, wCRC, data1 ++ crc32_u64 wCRC, wCRC, data2 ++ crc32_u64 wCRC, wCRC, data3 ++ bge .loop_64B ++ ++.loop_16B: ++ cmp LEN, 15 ++ bls .less_16B ++ ldp data0, data1, [BUF],#16 ++ sub LEN,LEN,#16 ++ cmp LEN,15 ++ crc32_u64 wCRC, wCRC, data0 ++ crc32_u64 wCRC, wCRC, data1 ++ bls .less_16B ++ ldp data0, data1, [BUF],#16 ++ sub LEN,LEN,#16 ++ cmp LEN,15 ++ crc32_u64 wCRC, wCRC, data0 ++ crc32_u64 wCRC, wCRC, data1 ++ bls .less_16B ++ ldp data0, data1, [BUF],#16 ++ sub LEN,LEN,#16 //MUST less than 16B ++ crc32_u64 wCRC, wCRC, data0 ++ crc32_u64 wCRC, wCRC, data1 ++.less_16B: ++ cmp LEN, 7 ++ bls .less_8B ++ ldr data0, [BUF], 8 ++ sub LEN, LEN, #8 ++ crc32_u64 wCRC, wCRC, data0 ++.less_8B: ++ cmp LEN, 3 ++ bls .less_4B ++ ldr wdata, [BUF], 4 ++ sub LEN, LEN, #4 ++ crc32_u32 wCRC, wCRC, wdata ++.less_4B: ++ cmp LEN, 1 ++ bls .less_2B ++ ldrh wdata, [BUF], 2 ++ sub LEN, LEN, #2 ++ crc32_u16 wCRC, wCRC, wdata ++.less_2B: ++ cbz LEN, .zero_length_ret ++ ldrb wdata, [BUF] ++ crc32_u8 wCRC, wCRC, wdata ++.zero_length_ret: ++.ifc \poly_type,crc32 ++ mvn w0, wCRC ++.else ++ mov w0, wCRC ++.endif ++ ret ++.align_short_2: ++ ldrh wdata, [BUF], 2 ++ sub LEN, LEN, 2 ++ tst BUF, 4 ++ crc32_u16 wCRC, wCRC, wdata ++ ccmp LEN, 3, 0, ne ++ bls .align_finish ++.align_word: ++ ldr wdata, [BUF], 4 ++ sub LEN, LEN, #4 ++ crc32_u32 wCRC, wCRC, wdata ++ b .align_finish ++.Lconstants: ++.ifc \poly_type,crc32 ++ .quad 0xb486819b ++ .quad 0x76278617 ++.else ++ .quad 0xe417f38a ++ .quad 0x8f158014 ++.endif ++ ++.endm +diff --git a/src/isa-l/crc/aarch64/crc32_gzip_refl_3crc_fold.S b/src/isa-l/crc/aarch64/crc32_gzip_refl_3crc_fold.S +new file mode 100644 +index 0000000..116d62c +--- /dev/null ++++ b/src/isa-l/crc/aarch64/crc32_gzip_refl_3crc_fold.S +@@ -0,0 +1,95 @@ ++######################################################################## ++# Copyright(c) 2020 Arm Corporation All rights reserved. ++# ++# Redistribution and use in source and binary forms, with or without ++# modification, are permitted provided that the following conditions ++# are met: ++# * Redistributions of source code must retain the above copyright ++# notice, this list of conditions and the following disclaimer. ++# * Redistributions in binary form must reproduce the above copyright ++# notice, this list of conditions and the following disclaimer in ++# the documentation and/or other materials provided with the ++# distribution. ++# * Neither the name of Arm Corporation nor the names of its ++# contributors may be used to endorse or promote products derived ++# from this software without specific prior written permission. ++# ++# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++######################################################################### ++#include "crc32_aarch64_common.h" ++ .text ++ .align 6 ++ .arch armv8-a+crc+crypto ++.macro crc32_u64 dst,src,data ++ crc32x \dst,\src,\data ++.endm ++.macro crc32_u32 dst,src,data ++ crc32w \dst,\src,\data ++.endm ++.macro crc32_u16 dst,src,data ++ crc32h \dst,\src,\data ++.endm ++.macro crc32_u8 dst,src,data ++ crc32b \dst,\src,\data ++.endm ++.macro declare_var_vector_reg name:req,reg:req ++ q\name .req q\reg ++ v\name .req v\reg ++ s\name .req s\reg ++ d\name .req d\reg ++.endm ++ ++ BUF .req x1 ++ ptr_crc0 .req x1 ++ LEN .req x2 ++ wCRC .req w0 ++ crc0 .req w0 ++ xcrc0 .req x0 ++ ++ crc1 .req w3 ++ crc2 .req w4 ++ xcrc1 .req x3 ++ const_adr .req x3 ++ ptr_crc1 .req x6 ++ ptr_crc2 .req x7 ++ crc0_data0 .req x9 ++ crc0_data1 .req x10 ++ crc1_data0 .req x11 ++ crc1_data1 .req x12 ++ crc2_data0 .req x13 ++ crc2_data1 .req x14 ++ ++ wdata .req w3 ++ data0 .req x3 ++ data1 .req x4 ++ data2 .req x5 ++ data3 .req x6 ++ ++ declare_var_vector_reg tmp0,0 ++ declare_var_vector_reg tmp1,1 ++ declare_var_vector_reg const0,2 ++ declare_var_vector_reg const1,3 ++ ++/** ++ uint32_t crc32_gzip_refl( ++ uint32_t wCRC, ++ const unsigned char *BUF, ++ uint64_t LEN ++ ); ++*/ ++ ++ .global crc32_gzip_refl_3crc_fold ++ .type crc32_gzip_refl_3crc_fold, %function ++crc32_gzip_refl_3crc_fold: ++ crc32_3crc_fold crc32 ++ .size crc32_gzip_refl_3crc_fold, .-crc32_gzip_refl_3crc_fold +diff --git a/src/isa-l/crc/aarch64/crc32_crc_ext_cortex_a72.S b/src/isa-l/crc/aarch64/crc32_gzip_refl_crc_ext.S +similarity index 85% +rename from crc/aarch64/crc32_crc_ext_cortex_a72.S +rename to crc/aarch64/crc32_gzip_refl_crc_ext.S +index 4335bf2..8e3d227 100644 +--- a/src/isa-l/crc/aarch64/crc32_crc_ext_cortex_a72.S ++++ b/src/isa-l/crc/aarch64/crc32_gzip_refl_crc_ext.S +@@ -32,7 +32,7 @@ + .arch armv8-a+crc + + +-#include "crc32_common_crc_ext_cortex_a72.S" ++#include "crc32_aarch64_common.h" + + BUF .req x1 + LEN .req x2 +@@ -56,14 +56,11 @@ + .endm + + /** +- * uint32_t crc32_crc_ext_cortex_a72( +- * uint32_t init_crc, +- * const unsigned char *buf, +- * uint64_t len); ++ * uint32_t crc32_gzip_refl_crc_ext(const unsigned char *BUF, ++ * uint64_t LEN,uint32_t wCRC); + */ +- .global crc32_crc_ext_cortex_a72 +- .type crc32_crc_ext_cortex_a72, %function +-crc32_crc_ext_cortex_a72: ++ .global crc32_gzip_refl_crc_ext ++ .type crc32_gzip_refl_crc_ext, %function ++crc32_gzip_refl_crc_ext: + crc32_hw_common crc32 +- ret +- .size crc32_crc_ext_cortex_a72, .-crc32_crc_ext_cortex_a72 ++ .size crc32_gzip_refl_crc_ext, .-crc32_gzip_refl_crc_ext +diff --git a/src/isa-l/crc/aarch64/crc32_gzip_refl_hw_fold.S b/src/isa-l/crc/aarch64/crc32_gzip_refl_hw_fold.S +deleted file mode 100644 +index 98cf129..0000000 +--- a/src/isa-l/crc/aarch64/crc32_gzip_refl_hw_fold.S ++++ /dev/null +@@ -1,176 +0,0 @@ +-######################################################################## +-# Copyright(c) 2019 Arm Corporation All rights reserved. +-# +-# Redistribution and use in source and binary forms, with or without +-# modification, are permitted provided that the following conditions +-# are met: +-# * Redistributions of source code must retain the above copyright +-# notice, this list of conditions and the following disclaimer. +-# * Redistributions in binary form must reproduce the above copyright +-# notice, this list of conditions and the following disclaimer in +-# the documentation and/or other materials provided with the +-# distribution. +-# * Neither the name of Arm Corporation nor the names of its +-# contributors may be used to endorse or promote products derived +-# from this software without specific prior written permission. +-# +-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +-######################################################################### +- +- .arch armv8-a+crc+crypto +- .text +- .align 3 +- .global crc32_gzip_refl_hw_fold +- .type crc32_gzip_refl_hw_fold, %function +- +-/* uint32_t crc32_gzip_refl_hw_fold(uint32_t seed, const unsigned char *buf, uint64_t len) */ +- +-w_seed .req w0 +-w_crc .req w0 +-x_buf .req x1 +-x_len .req x2 +- +-x_buf_loop_end .req x10 +-x_buf_iter .req x10 +- +-x_tmp .req x15 +-w_tmp .req w15 +- +-d_c0 .req d3 +-d_c1 .req d1 +-v_c0 .req v3 +-v_c1 .req v1 +-crc32_gzip_refl_hw_fold: +- mvn w_seed, w_seed +- cmp x_len, 1023 +- mov x_buf_iter, x_buf +- bls .loop_fold_end +- +- sub x_buf_loop_end, x_len, #1024 +- and x_buf_loop_end, x_buf_loop_end, -1024 +- add x_buf_loop_end, x_buf_loop_end, 1024 +- add x_buf_loop_end, x_buf, x_buf_loop_end +- +- mov x_tmp, 0x819b +- movk x_tmp, 0xb486, lsl 16 +- fmov d_c0, x_tmp +- +- mov x_tmp, 0x8617 +- movk x_tmp, 0x7627, lsl 16 +- fmov d_c1, x_tmp +- +-x_in64 .req x3 +-w_crc0 .req w0 +-w_crc1 .req w4 +-w_crc2 .req w5 +- +-d_crc0 .req d4 +-d_crc1 .req d5 +-v_crc0 .req v4 +-v_crc1 .req v5 +- .align 3 +-.loop_fold: +- add x9, x_buf, 336 +- mov x_in64, x_buf +- mov w_crc1, 0 +- mov w_crc2, 0 +- +- .align 3 +-.loop_for: +- ldr x8, [x_in64] +- ldr x7, [x_in64, 336] +- ldr x6, [x_in64, 672] +- +- add x_in64, x_in64, 8 +- cmp x_in64, x9 +- +- crc32x w_crc0, w_crc0, x8 +- crc32x w_crc1, w_crc1, x7 +- crc32x w_crc2, w_crc2, x6 +- bne .loop_for +- +- uxtw x_tmp, w_crc0 +- fmov d_crc0, x_tmp +- pmull v_crc0.1q, v_crc0.1d, v_c0.1d +- +- uxtw x_tmp, w_crc1 +- fmov d_crc1, x_tmp +- pmull v_crc1.1q, v_crc1.1d, v_c1.1d +- +- ldr x_tmp, [x_buf, 1008] +- crc32x w_crc2, w_crc2, x_tmp +- +- fmov x_tmp, d_crc0 +- crc32x w_crc0, wzr, x_tmp +- +- fmov x_tmp, d_crc1 +- crc32x w_crc1, wzr, x_tmp +- +- eor w_crc0, w_crc0, w_crc1 +- eor w_crc0, w_crc0, w_crc2 +- +- ldr x_tmp, [x_buf, 1016] +- crc32x w_crc0, w_crc0, x_tmp +- +- add x_buf, x_buf, 1024 +- cmp x_buf_loop_end, x_buf +- bne .loop_fold +- +- and x_len, x_len, 1023 +- +-x_buf_loop_size8_end .req x3 +-.loop_fold_end: +- cmp x_len, 7 +- bls .size_4 +- +- sub x_buf_loop_size8_end, x_len, #8 +- and x_buf_loop_size8_end, x_buf_loop_size8_end, -8 +- add x_buf_loop_size8_end, x_buf_loop_size8_end, 8 +- add x_buf_loop_size8_end, x_buf_iter, x_buf_loop_size8_end +- +- .align 3 +-.loop_size_8: +- ldr x_tmp, [x_buf_iter], 8 +- crc32x w_crc, w_crc, x_tmp +- +- cmp x_buf_iter, x_buf_loop_size8_end +- bne .loop_size_8 +- +- and x_len, x_len, 7 +-.size_4: +- cmp x_len, 3 +- bls .size_2 +- +- ldr w_tmp, [x_buf_iter], 4 +- crc32w w_crc, w_crc, w_tmp +- +- sub x_len, x_len, #4 +-.size_2: +- cmp x_len, 1 +- bls .size_1 +- +- ldrh w_tmp, [x_buf_iter], 2 +- crc32h w_crc, w_crc, w_tmp +- +- sub x_len, x_len, #2 +-.size_1: +- cbz x_len, .done +- +- ldrb w_tmp, [x_buf_iter] +- crc32b w_crc, w_crc, w_tmp +- +-.done: +- mvn w_crc, w_crc +- ret +- +- .size crc32_gzip_refl_hw_fold, .-crc32_gzip_refl_hw_fold +diff --git a/src/isa-l/crc/aarch64/crc32_iscsi_3crc_fold.S b/src/isa-l/crc/aarch64/crc32_iscsi_3crc_fold.S +new file mode 100644 +index 0000000..2beaa80 +--- /dev/null ++++ b/src/isa-l/crc/aarch64/crc32_iscsi_3crc_fold.S +@@ -0,0 +1,97 @@ ++######################################################################## ++# Copyright(c) 2020 Arm Corporation All rights reserved. ++# ++# Redistribution and use in source and binary forms, with or without ++# modification, are permitted provided that the following conditions ++# are met: ++# * Redistributions of source code must retain the above copyright ++# notice, this list of conditions and the following disclaimer. ++# * Redistributions in binary form must reproduce the above copyright ++# notice, this list of conditions and the following disclaimer in ++# the documentation and/or other materials provided with the ++# distribution. ++# * Neither the name of Arm Corporation nor the names of its ++# contributors may be used to endorse or promote products derived ++# from this software without specific prior written permission. ++# ++# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++######################################################################### ++ ++ ++ .text ++ .align 6 ++ .arch armv8-a+crc+crypto ++#include "crc32_aarch64_common.h" ++.macro crc32_u64 dst,src,data ++ crc32cx \dst,\src,\data ++.endm ++.macro crc32_u32 dst,src,data ++ crc32cw \dst,\src,\data ++.endm ++.macro crc32_u16 dst,src,data ++ crc32ch \dst,\src,\data ++.endm ++.macro crc32_u8 dst,src,data ++ crc32cb \dst,\src,\data ++.endm ++.macro declare_var_vector_reg name:req,reg:req ++ q\name .req q\reg ++ v\name .req v\reg ++ s\name .req s\reg ++ d\name .req d\reg ++.endm ++ ++ BUF .req x0 ++ LEN .req x1 ++ wCRC .req w2 ++ crc0 .req w2 ++ crc1 .req w3 ++ crc2 .req w4 ++ xcrc0 .req x2 ++ xcrc1 .req x3 ++ const_adr .req x3 ++ ptr_crc0 .req x0 ++ ptr_crc1 .req x6 ++ ptr_crc2 .req x7 ++ crc0_data0 .req x9 ++ crc0_data1 .req x10 ++ crc1_data0 .req x11 ++ crc1_data1 .req x12 ++ crc2_data0 .req x13 ++ crc2_data1 .req x14 ++ ++ wdata .req w3 ++ data0 .req x3 ++ data1 .req x4 ++ data2 .req x5 ++ data3 .req x6 ++ ++ declare_var_vector_reg tmp0,0 ++ declare_var_vector_reg tmp1,1 ++ declare_var_vector_reg const0,2 ++ declare_var_vector_reg const1,3 ++ ++/** ++ unsigned int crc32_iscsi( ++ unsigned char *BUF, ++ int LEN, ++ unsigned int wCRC ++ ); ++ ++*/ ++ ++ .global crc32_iscsi_3crc_fold ++ .type crc32_iscsi_3crc_fold, %function ++crc32_iscsi_3crc_fold: ++ crc32_3crc_fold crc32c ++ .size crc32_iscsi_3crc_fold, .-crc32_iscsi_3crc_fold +diff --git a/src/isa-l/crc/aarch64/crc32c_crc_ext_cortex_a72.S b/src/isa-l/crc/aarch64/crc32_iscsi_crc_ext.S +similarity index 84% +rename from crc/aarch64/crc32c_crc_ext_cortex_a72.S +rename to crc/aarch64/crc32_iscsi_crc_ext.S +index 64ccf69..359401a 100644 +--- a/src/isa-l/crc/aarch64/crc32c_crc_ext_cortex_a72.S ++++ b/src/isa-l/crc/aarch64/crc32_iscsi_crc_ext.S +@@ -32,7 +32,7 @@ + .arch armv8-a+crc + + +-#include "crc32_common_crc_ext_cortex_a72.S" ++#include "crc32_aarch64_common.h" + BUF .req x0 + LEN .req x1 + wCRC .req w2 +@@ -55,14 +55,11 @@ + .endm + + /** +- * uint32_t crc32c_crc_ext_cortex_a72( +- * unsigned char const *buffer, +- * size_t len, +- * uint crc32 ) ++ * uint32_t crc32_iscsi_crc_ext(const unsigned char *BUF, ++ * uint64_t LEN,uint32_t wCRC); + */ +- .global crc32c_crc_ext_cortex_a72 +- .type crc32c_crc_ext_cortex_a72, %function +-crc32c_crc_ext_cortex_a72: ++ .global crc32_iscsi_crc_ext ++ .type crc32_iscsi_crc_ext, %function ++crc32_iscsi_crc_ext: + crc32_hw_common crc32c +- ret +- .size crc32c_crc_ext_cortex_a72, .-crc32c_crc_ext_cortex_a72 ++ .size crc32_iscsi_crc_ext, .-crc32_iscsi_crc_ext +diff --git a/src/isa-l/crc/aarch64/crc32_iscsi_refl_hw_fold.S b/src/isa-l/crc/aarch64/crc32_iscsi_refl_hw_fold.S +deleted file mode 100644 +index 85527c2..0000000 +--- a/src/isa-l/crc/aarch64/crc32_iscsi_refl_hw_fold.S ++++ /dev/null +@@ -1,172 +0,0 @@ +-######################################################################## +-# Copyright(c) 2019 Arm Corporation All rights reserved. +-# +-# Redistribution and use in source and binary forms, with or without +-# modification, are permitted provided that the following conditions +-# are met: +-# * Redistributions of source code must retain the above copyright +-# notice, this list of conditions and the following disclaimer. +-# * Redistributions in binary form must reproduce the above copyright +-# notice, this list of conditions and the following disclaimer in +-# the documentation and/or other materials provided with the +-# distribution. +-# * Neither the name of Arm Corporation nor the names of its +-# contributors may be used to endorse or promote products derived +-# from this software without specific prior written permission. +-# +-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +-######################################################################### +- +- .arch armv8-a+crc+crypto +- .text +- .align 3 +- .global crc32_iscsi_refl_hw_fold +- .type crc32_iscsi_refl_hw_fold, %function +- +-/* unsigned int crc32_iscsi_refl_hw_fold(unsigned char *buffer, int len, unsigned int crc_init) */ +- +-x_buffer .req x0 +-w_len .req w1 +-w_crc_init .req w2 +-w_crc .req w2 +- +-w_len_loop_end .req w9 +-x_buf_loop_end .req x9 +-x_buf_iter .req x9 +- +-x_tmp .req x15 +-w_tmp .req w15 +- +-w_crc_ret .req w0 +-crc32_iscsi_refl_hw_fold: +- cmp w_len, 1023 +- mov x_buf_iter, x_buffer +- ble .loop_fold_end +- +- sub w10, w_len, #1024 +- lsr w12, w10, 10 +- lsl w_len_loop_end, w12, 10 +- +- add x_buf_loop_end, x_buf_loop_end, 1024 +- add x_buf_loop_end, x_buffer, x_buf_loop_end +- +- mov x_tmp, 0xf38a +- movk x_tmp, 0xe417, lsl 16 +- fmov d3, x_tmp +- +- mov x_tmp, 0x8014 +- movk x_tmp, 0x8f15, lsl 16 +- fmov d1, x_tmp +- +-x_in64 .req x1 +-w_crc0 .req w2 +-w_crc1 .req w3 +-w_crc2 .req w4 +- .align 3 +-.loop_fold: +- add x8, x_buffer, 336 +- mov x_in64, x_buffer +- mov w_crc1, 0 +- mov w_crc2, 0 +- +- .align 3 +-.loop_for: +- ldr x7, [x_in64] +- ldr x6, [x_in64, 336] +- ldr x5, [x_in64, 672] +- +- add x_in64, x_in64, 8 +- cmp x_in64, x8 +- +- crc32cx w_crc0, w_crc0, x7 +- crc32cx w_crc1, w_crc1, x6 +- crc32cx w_crc2, w_crc2, x5 +- bne .loop_for +- +- uxtw x_tmp, w_crc0 +- fmov d4, x_tmp +- pmull v2.1q, v4.1d, v3.1d +- +- uxtw x_tmp, w_crc1 +- fmov d5, x_tmp +- pmull v5.1q, v5.1d, v1.1d +- +- fmov x_tmp, d2 +- crc32cx w_crc0, wzr, x_tmp +- +- fmov x_tmp, d5 +- crc32cx w_crc1, wzr, x_tmp +- +- ldr x_tmp, [x_buffer, 1008] +- crc32cx w_crc2, w_crc2, x_tmp +- +- eor w_crc1, w_crc1, w_crc0 +- eor w_crc1, w_crc1, w_crc2 +- +- ldr x_tmp, [x_buffer, 1016] +- crc32cx w_crc0, w_crc1, x_tmp +- +- add x_buffer, x_buffer, 1024 +- cmp x_buf_loop_end, x_buffer +- bne .loop_fold +- +- sub w_len, w10, w12, lsl 10 +- +-x_buf_loop_size8_end .req x3 +-.loop_fold_end: +- cmp w_len, 7 +- ble .size_4 +- +- sub w_len, w_len, #8 +- lsr w4, w_len, 3 +- lsl w3, w4, 3 +- add x_buf_loop_size8_end, x_buf_loop_size8_end, 8 +- add x_buf_loop_size8_end, x_buf_iter, x_buf_loop_size8_end +- +- .align 3 +-.loop_size_8: +- ldr x_tmp, [x_buf_iter], 8 +- crc32cx w_crc, w_crc, x_tmp +- +- cmp x_buf_iter, x_buf_loop_size8_end +- bne .loop_size_8 +- +- sub w_len, w_len, w4, lsl 3 +-.size_4: +- cmp w_len, 3 +- ble .size_2 +- +- ldr w_tmp, [x_buf_iter], 4 +- crc32cw w_crc, w_crc, w_tmp +- sub w_len, w_len, #4 +- +-.size_2: +- cmp w_len, 1 +- ble .size_1 +- +- ldrh w_tmp, [x_buf_iter], 2 +- crc32ch w_crc, w_crc, w_tmp +- sub w_len, w_len, #2 +- +-.size_1: +- mov w_crc_ret, w_crc +- cmp w_len, 1 +- bne .done +- +- ldrb w_tmp, [x_buf_iter] +- crc32cb w_crc_ret, w_crc, w_tmp +- +-.done: +- ret +- +- .size crc32_iscsi_refl_hw_fold, .-crc32_iscsi_refl_hw_fold +diff --git a/src/isa-l/crc/aarch64/crc_aarch64_dispatcher.c b/src/isa-l/crc/aarch64/crc_aarch64_dispatcher.c +index b28a3a1..22ea72b 100644 +--- a/src/isa-l/crc/aarch64/crc_aarch64_dispatcher.c ++++ b/src/isa-l/crc/aarch64/crc_aarch64_dispatcher.c +@@ -64,16 +64,14 @@ DEFINE_INTERFACE_DISPATCHER(crc32_iscsi) + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) { + switch (get_micro_arch_id()) { ++ case MICRO_ARCH_ID(ARM, NEOVERSE_N1): ++ case MICRO_ARCH_ID(ARM, CORTEX_A57): + case MICRO_ARCH_ID(ARM, CORTEX_A72): +- return PROVIDER_INFO(crc32c_crc_ext_cortex_a72); ++ return PROVIDER_INFO(crc32_iscsi_crc_ext); + } + } + if ((HWCAP_CRC32 | HWCAP_PMULL) == (auxval & (HWCAP_CRC32 | HWCAP_PMULL))) { +- switch (get_micro_arch_id()) { +- case MICRO_ARCH_ID(ARM, NEOVERSE_N1): +- return PROVIDER_INFO(crc32c_mix_neoverse_n1); +- } +- return PROVIDER_INFO(crc32c_mix_default); ++ return PROVIDER_INFO(crc32_iscsi_3crc_fold); + } + + if (auxval & HWCAP_PMULL) { +@@ -89,16 +87,14 @@ DEFINE_INTERFACE_DISPATCHER(crc32_gzip_refl) + + if (auxval & HWCAP_CRC32) { + switch (get_micro_arch_id()) { ++ case MICRO_ARCH_ID(ARM, NEOVERSE_N1): ++ case MICRO_ARCH_ID(ARM, CORTEX_A57): + case MICRO_ARCH_ID(ARM, CORTEX_A72): +- return PROVIDER_INFO(crc32_crc_ext_cortex_a72); ++ return PROVIDER_INFO(crc32_gzip_refl_crc_ext); + } + } + if ((HWCAP_CRC32 | HWCAP_PMULL) == (auxval & (HWCAP_CRC32 | HWCAP_PMULL))) { +- switch (get_micro_arch_id()) { +- case MICRO_ARCH_ID(ARM, NEOVERSE_N1): +- return PROVIDER_INFO(crc32_mix_neoverse_n1); +- } +- return PROVIDER_INFO(crc32_mix_default); ++ return PROVIDER_INFO(crc32_gzip_refl_3crc_fold); + } + + if (auxval & HWCAP_PMULL) +-- +2.20.1.windows.1 + + +From 2049d8dc816154392868e271e3857666cc704a60 Mon Sep 17 00:00:00 2001 +From: Ruben Vorderman +Date: Wed, 22 Jul 2020 11:26:38 +0200 +Subject: [PATCH 20/42] Add conda shield to readme + +This will make it easier for users to get the latest version. Installing with conda is easier than compiling it yourself. Distro packages (such as Debian's) do not always ship the latest version while conda-forge can. This badge will advertise this install method. + +Change-Id: I99a1853a00e55fdf0c574c9906675738ac278121 +Signed-off-by: Ruben Vorderman +--- + README.md | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/isa-l/README.md b/src/isa-l/README.md +index b8fd220..8190784 100644 +--- a/src/isa-l/README.md ++++ b/src/isa-l/README.md +@@ -2,6 +2,7 @@ Intel(R) Intelligent Storage Acceleration Library + ================================================= + + [![Build Status](https://travis-ci.org/intel/isa-l.svg?branch=master)](https://travis-ci.org/intel/isa-l) ++[![Package on conda-forge](https://img.shields.io/conda/src/isa-l/v/conda-forge/isa-l.svg)](https://anaconda.org/conda-forge/isa-l) + + ISA-L is a collection of optimized low-level functions targeting storage + applications. ISA-L includes: +-- +2.20.1.windows.1 + + +From 93049d0d1f712c907863ab785d014e825917ae77 Mon Sep 17 00:00:00 2001 +From: Greg Tucker +Date: Wed, 8 Jul 2020 19:13:33 -0700 +Subject: [PATCH 21/42] igzip: Fix read header for correct null checking and + init + +Issue with reading header only appears when combined with new feature in cli of +multiple concatenated gzip files. + +Change-Id: Id8df9150c6f27d8b22e810b511291f3fcf136723 +Signed-off-by: Greg Tucker +--- + igzip/igzip_inflate.c | 43 +++++++++++++++++++++++++------------------ + 1 file changed, 25 insertions(+), 18 deletions(-) + +diff --git a/src/isa-l/igzip/igzip_inflate.c b/src/isa-l/igzip/igzip_inflate.c +index 775e8f3..3e302a9 100644 +--- a/src/isa-l/igzip/igzip_inflate.c ++++ b/src/isa-l/igzip/igzip_inflate.c +@@ -1749,6 +1749,7 @@ void isal_inflate_reset(struct inflate_state *state) + state->write_overflow_len = 0; + state->copy_overflow_length = 0; + state->copy_overflow_distance = 0; ++ state->wrapper_flag = 0; + state->tmp_in_size = 0; + state->tmp_out_processed = 0; + state->tmp_out_valid = 0; +@@ -1786,21 +1787,24 @@ static inline uint32_t fixed_size_read(struct inflate_state *state, + } + + static inline uint32_t buffer_header_copy(struct inflate_state *state, uint32_t in_len, +- uint8_t * buf, uint32_t buf_len, uint32_t buf_error) ++ uint8_t * buf, uint32_t buffer_len, uint32_t offset, ++ uint32_t buf_error) + { + uint32_t len = in_len; ++ uint32_t buf_len = buffer_len - offset; ++ + if (len > state->avail_in) + len = state->avail_in; + + if (buf != NULL && buf_len < len) { +- memcpy(buf, state->next_in, buf_len); ++ memcpy(&buf[offset], state->next_in, buf_len); + state->next_in += buf_len; + state->avail_in -= buf_len; + state->count = in_len - buf_len; + return buf_error; + } else { + if (buf != NULL) +- memcpy(buf, state->next_in, len); ++ memcpy(&buf[offset], state->next_in, len); + state->next_in += len; + state->avail_in -= len; + state->count = in_len - len; +@@ -1813,9 +1817,10 @@ static inline uint32_t buffer_header_copy(struct inflate_state *state, uint32_t + } + + static inline uint32_t string_header_copy(struct inflate_state *state, +- char *str_buf, uint32_t str_len, uint32_t str_error) ++ char *str_buf, uint32_t str_len, ++ uint32_t offset, uint32_t str_error) + { +- uint32_t len, max_len = str_len; ++ uint32_t len, max_len = str_len - offset; + + if (max_len > state->avail_in || str_buf == NULL) + max_len = state->avail_in; +@@ -1823,13 +1828,13 @@ static inline uint32_t string_header_copy(struct inflate_state *state, + len = strnlen((char *)state->next_in, max_len); + + if (str_buf != NULL) +- memcpy(str_buf, state->next_in, len); ++ memcpy(&str_buf[offset], state->next_in, len); + + state->next_in += len; + state->avail_in -= len; + state->count += len; + +- if (str_buf != NULL && len == str_len) ++ if (str_buf != NULL && len == (str_len - offset)) + return str_error; + else if (state->avail_in <= 0) + return ISAL_END_INPUT; +@@ -2002,9 +2007,9 @@ int isal_read_gzip_header(struct inflate_state *state, struct isal_gzip_header * + case ISAL_GZIP_EXTRA: + offset = gz_hdr->extra_len - count; + ret = +- buffer_header_copy(state, count, gz_hdr->extra + offset, +- gz_hdr->extra_buf_len - offset, +- ISAL_EXTRA_OVERFLOW); ++ buffer_header_copy(state, count, gz_hdr->extra, ++ gz_hdr->extra_buf_len, ++ offset, ISAL_EXTRA_OVERFLOW); + + if (ret) { + state->block_state = ISAL_GZIP_EXTRA; +@@ -2017,9 +2022,9 @@ int isal_read_gzip_header(struct inflate_state *state, struct isal_gzip_header * + if (flags & NAME_FLAG) { + case ISAL_GZIP_NAME: + offset = state->count; +- ret = string_header_copy(state, gz_hdr->name + offset, +- gz_hdr->name_buf_len - offset, +- ISAL_NAME_OVERFLOW); ++ ret = string_header_copy(state, gz_hdr->name, ++ gz_hdr->name_buf_len, ++ offset, ISAL_NAME_OVERFLOW); + if (ret) { + state->block_state = ISAL_GZIP_NAME; + break; +@@ -2029,9 +2034,9 @@ int isal_read_gzip_header(struct inflate_state *state, struct isal_gzip_header * + if (flags & COMMENT_FLAG) { + case ISAL_GZIP_COMMENT: + offset = state->count; +- ret = string_header_copy(state, gz_hdr->comment + offset, +- gz_hdr->comment_buf_len - offset, +- ISAL_COMMENT_OVERFLOW); ++ ret = string_header_copy(state, gz_hdr->comment, ++ gz_hdr->comment_buf_len, ++ offset, ISAL_COMMENT_OVERFLOW); + if (ret) { + state->block_state = ISAL_GZIP_COMMENT; + break; +@@ -2147,11 +2152,12 @@ int isal_inflate_stateless(struct inflate_state *state) + + if (state->crc_flag == IGZIP_GZIP) { + struct isal_gzip_header gz_hdr; ++ isal_gzip_header_init(&gz_hdr); + ret = isal_read_gzip_header(state, &gz_hdr); + if (ret) + return ret; + } else if (state->crc_flag == IGZIP_ZLIB) { +- struct isal_zlib_header z_hdr; ++ struct isal_zlib_header z_hdr = { 0 }; + ret = isal_read_zlib_header(state, &z_hdr); + if (ret) + return ret; +@@ -2219,13 +2225,14 @@ int isal_inflate(struct inflate_state *state) + + if (!state->wrapper_flag && state->crc_flag == IGZIP_GZIP) { + struct isal_gzip_header gz_hdr; ++ isal_gzip_header_init(&gz_hdr); + ret = isal_read_gzip_header(state, &gz_hdr); + if (ret < 0) + return ret; + else if (ret > 0) + return ISAL_DECOMP_OK; + } else if (!state->wrapper_flag && state->crc_flag == IGZIP_ZLIB) { +- struct isal_zlib_header z_hdr; ++ struct isal_zlib_header z_hdr = { 0 }; + ret = isal_read_zlib_header(state, &z_hdr); + if (ret < 0) + return ret; +-- +2.20.1.windows.1 + + +From ae45f60e780ccc614bb2bd1c4e7116bd10eb70bb Mon Sep 17 00:00:00 2001 +From: Greg Tucker +Date: Mon, 6 Jul 2020 19:36:19 -0700 +Subject: [PATCH 22/42] igzip: Add cli feature to inflate concatenated gz files + +Change-Id: I2beade6682e78fda30a18228a8660201ae7bf718 +Signed-off-by: Greg Tucker +--- + programs/igzip_cli.c | 53 ++++++++++++++++++++++++++++++++++++- + programs/igzip_cli_check.sh | 16 +++++++++++ + 2 files changed, 68 insertions(+), 1 deletion(-) + +diff --git a/src/isa-l/programs/igzip_cli.c b/src/isa-l/programs/igzip_cli.c +index 9a20b9b..53124af 100644 +--- a/src/isa-l/programs/igzip_cli.c ++++ b/src/isa-l/programs/igzip_cli.c +@@ -817,6 +817,7 @@ int decompress_file(void) + int suffix_index = 0; + uint32_t file_time; + ++ // Allocate mem and setup to hold gzip header info + if (infile_name_len == stdin_file_name_len && + infile_name != NULL && + memcmp(infile_name, stdin_file_name, infile_name_len) == 0) { +@@ -884,6 +885,7 @@ int decompress_file(void) + state.next_in = inbuf; + state.avail_in = fread_safe(state.next_in, 1, inbuf_size, in, infile_name); + ++ // Actually read and save the header info + ret = isal_read_gzip_header(&state, &gz_hdr); + if (ret != ISAL_DECOMP_OK) { + log_print(ERROR, "igzip: Error invalid gzip header found for file %s\n", +@@ -915,6 +917,7 @@ int decompress_file(void) + goto decompress_file_cleanup; + } + ++ // Start reading in compressed data and decompress + do { + if (state.avail_in == 0) { + state.next_in = inbuf; +@@ -936,7 +939,55 @@ int decompress_file(void) + if (out != NULL) + fwrite_safe(outbuf, 1, state.next_out - outbuf, out, outfile_name); + +- } while (!feof(in) || state.avail_out == 0); ++ } while (state.block_state != ISAL_BLOCK_FINISH // while not done ++ && (!feof(in) || state.avail_out == 0) // and work to do ++ ); ++ ++ // Add the following to look for and decode additional concatenated files ++ if (!feof(in) && state.avail_in == 0) { ++ state.next_in = inbuf; ++ state.avail_in = fread_safe(state.next_in, 1, inbuf_size, in, infile_name); ++ } ++ ++ while (state.avail_in > 0 && state.next_in[0] == 31) { ++ // Look for magic numbers for gzip header. Follows the gzread() decision ++ // whether to treat as trailing junk ++ if (state.avail_in > 1 && state.next_in[1] != 139) ++ break; ++ ++ isal_inflate_reset(&state); ++ state.crc_flag = ISAL_GZIP; // Let isal_inflate() process extra headers ++ do { ++ if (state.avail_in == 0 && !feof(in)) { ++ state.next_in = inbuf; ++ state.avail_in = ++ fread_safe(state.next_in, 1, inbuf_size, in, infile_name); ++ } ++ ++ state.next_out = outbuf; ++ state.avail_out = outbuf_size; ++ ++ ret = isal_inflate(&state); ++ if (ret != ISAL_DECOMP_OK) { ++ log_print(ERROR, ++ "igzip: Error while decompressing extra concatenated" ++ "gzip files on %s\n", infile_name); ++ goto decompress_file_cleanup; ++ } ++ ++ if (out != NULL) ++ fwrite_safe(outbuf, 1, state.next_out - outbuf, out, ++ outfile_name); ++ ++ } while (state.block_state != ISAL_BLOCK_FINISH ++ && (!feof(in) || state.avail_out == 0)); ++ ++ if (!feof(in) && state.avail_in == 0) { ++ state.next_in = inbuf; ++ state.avail_in = ++ fread_safe(state.next_in, 1, inbuf_size, in, infile_name); ++ } ++ } + + if (state.block_state != ISAL_BLOCK_FINISH) + log_print(ERROR, "igzip: Error %s does not contain a complete gzip file\n", +diff --git a/src/isa-l/programs/igzip_cli_check.sh b/src/isa-l/programs/igzip_cli_check.sh +index da83578..5d97763 100755 +--- a/src/isa-l/programs/igzip_cli_check.sh ++++ b/src/isa-l/programs/igzip_cli_check.sh +@@ -77,6 +77,22 @@ cat $TEST_FILE | $IGZIP | $IGZIP -d | $DIFF $TEST_FILE - || ret=1 + cat $TEST_FILE | $IGZIP - | $IGZIP -d - | $DIFF $TEST_FILE - || ret=1 + pass_check $ret "Piping compression and decompression" + ++# Test multiple concatenated gzip files ++ret=0 ++(for i in `seq 3`; do $IGZIP -c $TEST_FILE ; done) | $IGZIP -t || ret=1 ++pass_check $ret "Multiple gzip concatenated files" ++ ++if command -V md5sum >/dev/null 2>&1; then ++ sum1=$((for i in `seq 15`; do $IGZIP -c $TEST_FILE; done) | $IGZIP -cd | md5sum) ++ sum2=$((for i in `seq 15`; do cat $TEST_FILE; done) | md5sum) ++ [[ "$sum1" == "$sum2" ]] && ret=0 || ret=1 ++ pass_check $ret "Multiple large gzip concat test" ++ clear_dir ++else ++ echo "Skip: Multiple large gzip concat test" ++fi ++ ++ + #Test outifle options + $IGZIP $TEST_FILE -o $file2$ds && $IGZIP $file2$ds -d -o $file1 && \ + test -f $file2$ds && test -f $file1 && $DIFF $TEST_FILE $file1 +-- +2.20.1.windows.1 + + +From ec73d39086d78649e0d23c8ade039a41e2ead1ff Mon Sep 17 00:00:00 2001 +From: Greg Tucker +Date: Fri, 21 Aug 2020 17:15:58 -0700 +Subject: [PATCH 23/42] crc: Add new vclmul version of crc32_iscsi + +Change-Id: I1c509c6ea312b6eb4e1c2c1c8bb7044f7b043e0d +Signed-off-by: Greg Tucker +--- + crc/Makefile.am | 1 + + crc/crc32_iscsi_by16_10.asm | 556 ++++++++++++++++++++++++++++++++++++ + crc/crc_multibinary.asm | 53 +++- + 3 files changed, 604 insertions(+), 6 deletions(-) + create mode 100644 crc/crc32_iscsi_by16_10.asm + +diff --git a/src/isa-l/crc/Makefile.am b/src/isa-l/crc/Makefile.am +index 64bef85..f12441c 100644 +--- a/src/isa-l/crc/Makefile.am ++++ b/src/isa-l/crc/Makefile.am +@@ -50,6 +50,7 @@ lsrc_x86_64 += \ + crc/crc32_ieee_by16_10.asm \ + crc/crc32_iscsi_01.asm \ + crc/crc32_iscsi_00.asm \ ++ crc/crc32_iscsi_by16_10.asm \ + crc/crc_multibinary.asm \ + crc/crc64_multibinary.asm \ + crc/crc64_ecma_refl_by8.asm \ +diff --git a/src/isa-l/crc/crc32_iscsi_by16_10.asm b/src/isa-l/crc/crc32_iscsi_by16_10.asm +new file mode 100644 +index 0000000..4c63bab +--- /dev/null ++++ b/src/isa-l/crc/crc32_iscsi_by16_10.asm +@@ -0,0 +1,556 @@ ++;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ++; Copyright(c) 2011-2020 Intel Corporation All rights reserved. ++; ++; Redistribution and use in source and binary forms, with or without ++; modification, are permitted provided that the following conditions ++; are met: ++; * Redistributions of source code must retain the above copyright ++; notice, this list of conditions and the following disclaimer. ++; * Redistributions in binary form must reproduce the above copyright ++; notice, this list of conditions and the following disclaimer in ++; the documentation and/or other materials provided with the ++; distribution. ++; * Neither the name of Intel Corporation nor the names of its ++; contributors may be used to endorse or promote products derived ++; from this software without specific prior written permission. ++; ++; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ++ ++;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ++; Function API: ++; UINT32 crc32_iscsi_by16_10( ++; UINT32 init_crc, //initial CRC value, 32 bits ++; const unsigned char *buf, //buffer pointer to calculate CRC on ++; UINT64 len //buffer length in bytes (64-bit data) ++; ); ++; ++; Authors: ++; Erdinc Ozturk ++; Vinodh Gopal ++; James Guilford ++; ++; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction" ++; URL: http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf ++; ++; ++ ++%include "reg_sizes.asm" ++ ++%ifndef FUNCTION_NAME ++%define FUNCTION_NAME crc32_iscsi_by16_10 ++%endif ++ ++%if (AS_FEATURE_LEVEL) >= 10 ++ ++[bits 64] ++default rel ++ ++section .text ++ ++ ++%ifidn __OUTPUT_FORMAT__, win64 ++ %xdefine arg1 r8 ++ %xdefine arg2 rcx ++ %xdefine arg3 rdx ++ ++ %xdefine arg1_low32 r8d ++%else ++ %xdefine arg1 rdx ++ %xdefine arg2 rdi ++ %xdefine arg3 rsi ++ ++ %xdefine arg1_low32 edx ++%endif ++ ++%define TMP 16*0 ++%ifidn __OUTPUT_FORMAT__, win64 ++ %define XMM_SAVE 16*2 ++ %define VARIABLE_OFFSET 16*12+8 ++%else ++ %define VARIABLE_OFFSET 16*2+8 ++%endif ++ ++align 16 ++mk_global FUNCTION_NAME, function ++FUNCTION_NAME: ++ endbranch ++ sub rsp, VARIABLE_OFFSET ++ ++%ifidn __OUTPUT_FORMAT__, win64 ++ ; push the xmm registers into the stack to maintain ++ vmovdqa [rsp + XMM_SAVE + 16*0], xmm6 ++ vmovdqa [rsp + XMM_SAVE + 16*1], xmm7 ++ vmovdqa [rsp + XMM_SAVE + 16*2], xmm8 ++ vmovdqa [rsp + XMM_SAVE + 16*3], xmm9 ++ vmovdqa [rsp + XMM_SAVE + 16*4], xmm10 ++ vmovdqa [rsp + XMM_SAVE + 16*5], xmm11 ++ vmovdqa [rsp + XMM_SAVE + 16*6], xmm12 ++ vmovdqa [rsp + XMM_SAVE + 16*7], xmm13 ++ vmovdqa [rsp + XMM_SAVE + 16*8], xmm14 ++ vmovdqa [rsp + XMM_SAVE + 16*9], xmm15 ++%endif ++ ++ ; check if smaller than 256B ++ cmp arg3, 256 ++ jl .less_than_256 ++ ++ ; load the initial crc value ++ vmovd xmm10, arg1_low32 ; initial crc ++ ++ ; receive the initial 64B data, xor the initial crc value ++ vmovdqu8 zmm0, [arg2+16*0] ++ vmovdqu8 zmm4, [arg2+16*4] ++ vpxorq zmm0, zmm10 ++ vbroadcasti32x4 zmm10, [rk3] ;xmm10 has rk3 and rk4 ++ ;imm value of pclmulqdq instruction will determine which constant to use ++ ++ sub arg3, 256 ++ cmp arg3, 256 ++ jl .fold_128_B_loop ++ ++ vmovdqu8 zmm7, [arg2+16*8] ++ vmovdqu8 zmm8, [arg2+16*12] ++ vbroadcasti32x4 zmm16, [rk_1] ;zmm16 has rk-1 and rk-2 ++ sub arg3, 256 ++ ++.fold_256_B_loop: ++ add arg2, 256 ++ vmovdqu8 zmm3, [arg2+16*0] ++ vpclmulqdq zmm1, zmm0, zmm16, 0x10 ++ vpclmulqdq zmm2, zmm0, zmm16, 0x01 ++ vpxorq zmm0, zmm1, zmm2 ++ vpxorq zmm0, zmm0, zmm3 ++ ++ vmovdqu8 zmm9, [arg2+16*4] ++ vpclmulqdq zmm5, zmm4, zmm16, 0x10 ++ vpclmulqdq zmm6, zmm4, zmm16, 0x01 ++ vpxorq zmm4, zmm5, zmm6 ++ vpxorq zmm4, zmm4, zmm9 ++ ++ vmovdqu8 zmm11, [arg2+16*8] ++ vpclmulqdq zmm12, zmm7, zmm16, 0x10 ++ vpclmulqdq zmm13, zmm7, zmm16, 0x01 ++ vpxorq zmm7, zmm12, zmm13 ++ vpxorq zmm7, zmm7, zmm11 ++ ++ vmovdqu8 zmm17, [arg2+16*12] ++ vpclmulqdq zmm14, zmm8, zmm16, 0x10 ++ vpclmulqdq zmm15, zmm8, zmm16, 0x01 ++ vpxorq zmm8, zmm14, zmm15 ++ vpxorq zmm8, zmm8, zmm17 ++ ++ sub arg3, 256 ++ jge .fold_256_B_loop ++ ++ ;; Fold 256 into 128 ++ add arg2, 256 ++ vpclmulqdq zmm1, zmm0, zmm10, 0x01 ++ vpclmulqdq zmm2, zmm0, zmm10, 0x10 ++ vpternlogq zmm7, zmm1, zmm2, 0x96 ; xor ABC ++ ++ vpclmulqdq zmm5, zmm4, zmm10, 0x01 ++ vpclmulqdq zmm6, zmm4, zmm10, 0x10 ++ vpternlogq zmm8, zmm5, zmm6, 0x96 ; xor ABC ++ ++ vmovdqa32 zmm0, zmm7 ++ vmovdqa32 zmm4, zmm8 ++ ++ add arg3, 128 ++ jmp .fold_128_B_register ++ ++ ++ ++ ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The fold_128_B_loop ++ ; loop will fold 128B at a time until we have 128+y Bytes of buffer ++ ++ ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel ++.fold_128_B_loop: ++ add arg2, 128 ++ vmovdqu8 zmm8, [arg2+16*0] ++ vpclmulqdq zmm2, zmm0, zmm10, 0x10 ++ vpclmulqdq zmm1, zmm0, zmm10, 0x01 ++ vpxorq zmm0, zmm2, zmm1 ++ vpxorq zmm0, zmm0, zmm8 ++ ++ vmovdqu8 zmm9, [arg2+16*4] ++ vpclmulqdq zmm5, zmm4, zmm10, 0x10 ++ vpclmulqdq zmm6, zmm4, zmm10, 0x01 ++ vpxorq zmm4, zmm5, zmm6 ++ vpxorq zmm4, zmm4, zmm9 ++ ++ sub arg3, 128 ++ jge .fold_128_B_loop ++ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ++ ++ add arg2, 128 ++ ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128 ++ ; the 128B of folded data is in 8 of the xmm registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 ++ ++.fold_128_B_register: ++ ; fold the 8 128b parts into 1 xmm register with different constants ++ vmovdqu8 zmm16, [rk9] ; multiply by rk9-rk16 ++ vmovdqu8 zmm11, [rk17] ; multiply by rk17-rk20, rk1,rk2, 0,0 ++ vpclmulqdq zmm1, zmm0, zmm16, 0x01 ++ vpclmulqdq zmm2, zmm0, zmm16, 0x10 ++ vextracti64x2 xmm7, zmm4, 3 ; save last that has no multiplicand ++ ++ vpclmulqdq zmm5, zmm4, zmm11, 0x01 ++ vpclmulqdq zmm6, zmm4, zmm11, 0x10 ++ vmovdqa xmm10, [rk1] ; Needed later in reduction loop ++ vpternlogq zmm1, zmm2, zmm5, 0x96 ; xor ABC ++ vpternlogq zmm1, zmm6, zmm7, 0x96 ; xor ABC ++ ++ vshufi64x2 zmm8, zmm1, zmm1, 0x4e ; Swap 1,0,3,2 - 01 00 11 10 ++ vpxorq ymm8, ymm8, ymm1 ++ vextracti64x2 xmm5, ymm8, 1 ++ vpxorq xmm7, xmm5, xmm8 ++ ++ ; instead of 128, we add 128-16 to the loop counter to save 1 instruction from the loop ++ ; instead of a cmp instruction, we use the negative flag with the jl instruction ++ add arg3, 128-16 ++ jl .final_reduction_for_128 ++ ++ ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory ++ ; we can fold 16 bytes at a time if y>=16 ++ ; continue folding 16B at a time ++ ++.16B_reduction_loop: ++ vpclmulqdq xmm8, xmm7, xmm10, 0x1 ++ vpclmulqdq xmm7, xmm7, xmm10, 0x10 ++ vpxor xmm7, xmm8 ++ vmovdqu xmm0, [arg2] ++ vpxor xmm7, xmm0 ++ add arg2, 16 ++ sub arg3, 16 ++ ; instead of a cmp instruction, we utilize the flags with the jge instruction ++ ; equivalent of: cmp arg3, 16-16 ++ ; check if there is any more 16B in the buffer to be able to fold ++ jge .16B_reduction_loop ++ ++ ;now we have 16+z bytes left to reduce, where 0<= z < 16. ++ ;first, we reduce the data in the xmm7 register ++ ++ ++.final_reduction_for_128: ++ add arg3, 16 ++ je .128_done ++ ++ ; here we are getting data that is less than 16 bytes. ++ ; since we know that there was data before the pointer, we can offset ++ ; the input pointer before the actual point, to receive exactly 16 bytes. ++ ; after that the registers need to be adjusted. ++.get_last_two_xmms: ++ ++ vmovdqa xmm2, xmm7 ++ vmovdqu xmm1, [arg2 - 16 + arg3] ++ ++ ; get rid of the extra data that was loaded before ++ ; load the shift constant ++ lea rax, [pshufb_shf_table] ++ add rax, arg3 ++ vmovdqu xmm0, [rax] ++ ++ vpshufb xmm7, xmm0 ++ vpxor xmm0, [mask3] ++ vpshufb xmm2, xmm0 ++ ++ vpblendvb xmm2, xmm2, xmm1, xmm0 ++ ;;;;;;;;;; ++ vpclmulqdq xmm8, xmm7, xmm10, 0x1 ++ vpclmulqdq xmm7, xmm7, xmm10, 0x10 ++ vpxor xmm7, xmm8 ++ vpxor xmm7, xmm2 ++ ++.128_done: ++ ; compute crc of a 128-bit value ++ vmovdqa xmm10, [rk5] ++ vmovdqa xmm0, xmm7 ++ ++ ;64b fold ++ vpclmulqdq xmm7, xmm10, 0 ++ vpsrldq xmm0, 8 ++ vpxor xmm7, xmm0 ++ ++ ;32b fold ++ vmovdqa xmm0, xmm7 ++ vpslldq xmm7, 4 ++ vpclmulqdq xmm7, xmm10, 0x10 ++ vpxor xmm7, xmm0 ++ ++ ++ ;barrett reduction ++.barrett: ++ vpand xmm7, [mask2] ++ vmovdqa xmm1, xmm7 ++ vmovdqa xmm2, xmm7 ++ vmovdqa xmm10, [rk7] ++ ++ vpclmulqdq xmm7, xmm10, 0 ++ vpxor xmm7, xmm2 ++ vpand xmm7, [mask] ++ vmovdqa xmm2, xmm7 ++ vpclmulqdq xmm7, xmm10, 0x10 ++ vpxor xmm7, xmm2 ++ vpxor xmm7, xmm1 ++ vpextrd eax, xmm7, 2 ++ ++.cleanup: ++ ++%ifidn __OUTPUT_FORMAT__, win64 ++ vmovdqa xmm6, [rsp + XMM_SAVE + 16*0] ++ vmovdqa xmm7, [rsp + XMM_SAVE + 16*1] ++ vmovdqa xmm8, [rsp + XMM_SAVE + 16*2] ++ vmovdqa xmm9, [rsp + XMM_SAVE + 16*3] ++ vmovdqa xmm10, [rsp + XMM_SAVE + 16*4] ++ vmovdqa xmm11, [rsp + XMM_SAVE + 16*5] ++ vmovdqa xmm12, [rsp + XMM_SAVE + 16*6] ++ vmovdqa xmm13, [rsp + XMM_SAVE + 16*7] ++ vmovdqa xmm14, [rsp + XMM_SAVE + 16*8] ++ vmovdqa xmm15, [rsp + XMM_SAVE + 16*9] ++%endif ++ add rsp, VARIABLE_OFFSET ++ ret ++ ++ ++;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ++;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ++;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ++;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ++ ++align 16 ++.less_than_256: ++ ++ ; check if there is enough buffer to be able to fold 16B at a time ++ cmp arg3, 32 ++ jl .less_than_32 ++ ++ ; if there is, load the constants ++ vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 ++ ++ vmovd xmm0, arg1_low32 ; get the initial crc value ++ vmovdqu xmm7, [arg2] ; load the plaintext ++ vpxor xmm7, xmm0 ++ ++ ; update the buffer pointer ++ add arg2, 16 ++ ++ ; update the counter. subtract 32 instead of 16 to save one instruction from the loop ++ sub arg3, 32 ++ ++ jmp .16B_reduction_loop ++ ++ ++align 16 ++.less_than_32: ++ ; mov initial crc to the return value. this is necessary for zero-length buffers. ++ mov eax, arg1_low32 ++ test arg3, arg3 ++ je .cleanup ++ ++ vmovd xmm0, arg1_low32 ; get the initial crc value ++ ++ cmp arg3, 16 ++ je .exact_16_left ++ jl .less_than_16_left ++ ++ vmovdqu xmm7, [arg2] ; load the plaintext ++ vpxor xmm7, xmm0 ; xor the initial crc value ++ add arg2, 16 ++ sub arg3, 16 ++ vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 ++ jmp .get_last_two_xmms ++ ++align 16 ++.less_than_16_left: ++ ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first. ++ ++ vpxor xmm1, xmm1 ++ mov r11, rsp ++ vmovdqa [r11], xmm1 ++ ++ cmp arg3, 4 ++ jl .only_less_than_4 ++ ++ ; backup the counter value ++ mov r9, arg3 ++ cmp arg3, 8 ++ jl .less_than_8_left ++ ++ ; load 8 Bytes ++ mov rax, [arg2] ++ mov [r11], rax ++ add r11, 8 ++ sub arg3, 8 ++ add arg2, 8 ++.less_than_8_left: ++ ++ cmp arg3, 4 ++ jl .less_than_4_left ++ ++ ; load 4 Bytes ++ mov eax, [arg2] ++ mov [r11], eax ++ add r11, 4 ++ sub arg3, 4 ++ add arg2, 4 ++.less_than_4_left: ++ ++ cmp arg3, 2 ++ jl .less_than_2_left ++ ++ ; load 2 Bytes ++ mov ax, [arg2] ++ mov [r11], ax ++ add r11, 2 ++ sub arg3, 2 ++ add arg2, 2 ++.less_than_2_left: ++ cmp arg3, 1 ++ jl .zero_left ++ ++ ; load 1 Byte ++ mov al, [arg2] ++ mov [r11], al ++ ++.zero_left: ++ vmovdqa xmm7, [rsp] ++ vpxor xmm7, xmm0 ; xor the initial crc value ++ ++ lea rax,[pshufb_shf_table] ++ vmovdqu xmm0, [rax + r9] ++ vpshufb xmm7,xmm0 ++ jmp .128_done ++ ++align 16 ++.exact_16_left: ++ vmovdqu xmm7, [arg2] ++ vpxor xmm7, xmm0 ; xor the initial crc value ++ jmp .128_done ++ ++.only_less_than_4: ++ cmp arg3, 3 ++ jl .only_less_than_3 ++ ++ ; load 3 Bytes ++ mov al, [arg2] ++ mov [r11], al ++ ++ mov al, [arg2+1] ++ mov [r11+1], al ++ ++ mov al, [arg2+2] ++ mov [r11+2], al ++ ++ vmovdqa xmm7, [rsp] ++ vpxor xmm7, xmm0 ; xor the initial crc value ++ ++ vpslldq xmm7, 5 ++ jmp .barrett ++ ++.only_less_than_3: ++ cmp arg3, 2 ++ jl .only_less_than_2 ++ ++ ; load 2 Bytes ++ mov al, [arg2] ++ mov [r11], al ++ ++ mov al, [arg2+1] ++ mov [r11+1], al ++ ++ vmovdqa xmm7, [rsp] ++ vpxor xmm7, xmm0 ; xor the initial crc value ++ ++ vpslldq xmm7, 6 ++ jmp .barrett ++ ++.only_less_than_2: ++ ; load 1 Byte ++ mov al, [arg2] ++ mov [r11], al ++ ++ vmovdqa xmm7, [rsp] ++ vpxor xmm7, xmm0 ; xor the initial crc value ++ ++ vpslldq xmm7, 7 ++ jmp .barrett ++ ++section .data ++align 32 ++ ++%ifndef USE_CONSTS ++; precomputed constants ++rk_1: dq 0x00000000b9e02b86 ++rk_2: dq 0x00000000dcb17aa4 ++rk1: dq 0x00000000493c7d27 ++rk2: dq 0x0000000ec1068c50 ++rk3: dq 0x0000000206e38d70 ++rk4: dq 0x000000006992cea2 ++rk5: dq 0x00000000493c7d27 ++rk6: dq 0x00000000dd45aab8 ++rk7: dq 0x00000000dea713f0 ++rk8: dq 0x0000000105ec76f0 ++rk9: dq 0x0000000047db8317 ++rk10: dq 0x000000002ad91c30 ++rk11: dq 0x000000000715ce53 ++rk12: dq 0x00000000c49f4f67 ++rk13: dq 0x0000000039d3b296 ++rk14: dq 0x00000000083a6eec ++rk15: dq 0x000000009e4addf8 ++rk16: dq 0x00000000740eef02 ++rk17: dq 0x00000000ddc0152b ++rk18: dq 0x000000001c291d04 ++rk19: dq 0x00000000ba4fc28e ++rk20: dq 0x000000003da6d0cb ++ ++rk_1b: dq 0x00000000493c7d27 ++rk_2b: dq 0x0000000ec1068c50 ++ dq 0x0000000000000000 ++ dq 0x0000000000000000 ++ ++%else ++INCLUDE_CONSTS ++%endif ++ ++pshufb_shf_table: ++; use these values for shift constants for the pshufb instruction ++; different alignments result in values as shown: ++; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1 ++; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2 ++; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3 ++; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4 ++; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5 ++; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6 ++; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7 ++; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8 ++; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9 ++; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10 ++; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11 ++; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12 ++; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13 ++; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14 ++; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15 ++dq 0x8786858483828100, 0x8f8e8d8c8b8a8988 ++dq 0x0706050403020100, 0x000e0d0c0b0a0908 ++ ++mask: dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000 ++mask2: dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF ++mask3: dq 0x8080808080808080, 0x8080808080808080 ++ ++%else ; Assembler doesn't understand these opcodes. Add empty symbol for windows. ++%ifidn __OUTPUT_FORMAT__, win64 ++global no_ %+ FUNCTION_NAME ++no_ %+ FUNCTION_NAME %+ : ++%endif ++%endif ; (AS_FEATURE_LEVEL) >= 10 +diff --git a/src/isa-l/crc/crc_multibinary.asm b/src/isa-l/crc/crc_multibinary.asm +index 8b9d7bd..a28a468 100644 +--- a/src/isa-l/crc/crc_multibinary.asm ++++ b/src/isa-l/crc/crc_multibinary.asm +@@ -57,6 +57,7 @@ extern crc16_t10dif_copy_base + %if (AS_FEATURE_LEVEL) >= 10 + extern crc32_gzip_refl_by16_10 + extern crc32_ieee_by16_10 ++extern crc32_iscsi_by16_10 + extern crc16_t10dif_by16_10 + %endif + +@@ -93,18 +94,58 @@ crc32_iscsi_dispatch_init: + push rcx + push rdx + push rsi ++ push rdi + lea rsi, [crc32_iscsi_base WRT_OPT] ; Default + + mov eax, 1 + cpuid +- lea rbx, [crc32_iscsi_00 WRT_OPT] +- lea rax, [crc32_iscsi_01 WRT_OPT] ++ mov ebx, ecx ; save cpuid1.ecx ++ test ecx, FLAG_CPUID1_ECX_SSE4_2 ++ jz .crc_iscsi_init_done ; use iscsi_base ++ lea rsi, [crc32_iscsi_00 WRT_OPT] ++ test ecx, FLAG_CPUID1_ECX_CLMUL ++ jz .crc_iscsi_init_done ; use ieee_base ++ lea rsi, [crc32_iscsi_01 WRT_OPT] ++ ++ ;; Test for XMM_YMM support/AVX ++ test ecx, FLAG_CPUID1_ECX_OSXSAVE ++ je .crc_iscsi_init_done ++ xor ecx, ecx ++ xgetbv ; xcr -> edx:eax ++ mov edi, eax ; save xgetvb.eax ++ ++ and eax, FLAG_XGETBV_EAX_XMM_YMM ++ cmp eax, FLAG_XGETBV_EAX_XMM_YMM ++ jne .crc_iscsi_init_done ++ test ebx, FLAG_CPUID1_ECX_AVX ++ je .crc_iscsi_init_done ++ ;; AVX/02 opt if available ++ ++%if AS_FEATURE_LEVEL >= 10 ++ ;; Test for AVX2 ++ xor ecx, ecx ++ mov eax, 7 ++ cpuid ++ test ebx, FLAG_CPUID7_EBX_AVX2 ++ je .crc_iscsi_init_done ; No AVX2 possible ++ ++ ;; Test for AVX512 ++ and edi, FLAG_XGETBV_EAX_ZMM_OPM ++ cmp edi, FLAG_XGETBV_EAX_ZMM_OPM ++ jne .crc_iscsi_init_done ; No AVX512 possible ++ and ebx, FLAGS_CPUID7_EBX_AVX512_G1 ++ cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1 ++ jne .crc_iscsi_init_done ++ ++ and ecx, FLAGS_CPUID7_ECX_AVX512_G2 ++ cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2 ++ lea rbx, [crc32_iscsi_by16_10 WRT_OPT] ; AVX512/10 opt ++ cmove rsi, rbx ++%endif + +- test ecx, FLAG_CPUID1_ECX_SSE4_2 +- cmovne rsi, rbx +- test ecx, FLAG_CPUID1_ECX_CLMUL +- cmovne rsi, rax ++.crc_iscsi_init_done: + mov [crc32_iscsi_dispatched], rsi ++ pop rdi + pop rsi + pop rdx + pop rcx +-- +2.20.1.windows.1 + + +From 24623b8b8202ed626274a5481fd2619939cec759 Mon Sep 17 00:00:00 2001 +From: Greg Tucker +Date: Wed, 26 Aug 2020 09:49:23 -0700 +Subject: [PATCH 24/42] crc: Fix missing object omitted from nmake file + +Previous new crc version missed the update for nmake. + +Change-Id: Ie529ee9d70d8d0ab8a8af3bd2720405802180d1e +Signed-off-by: Greg Tucker +--- + Makefile.nmake | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/isa-l/Makefile.nmake b/src/isa-l/Makefile.nmake +index 660722a..6360d1f 100644 +--- a/src/isa-l/Makefile.nmake ++++ b/src/isa-l/Makefile.nmake +@@ -119,6 +119,7 @@ objs = \ + bin\crc32_ieee_by16_10.obj \ + bin\crc32_iscsi_01.obj \ + bin\crc32_iscsi_00.obj \ ++ bin\crc32_iscsi_by16_10.obj \ + bin\crc_multibinary.obj \ + bin\crc64_multibinary.obj \ + bin\crc64_ecma_refl_by8.obj \ +-- +2.20.1.windows.1 + + +From 794b8b60c12933ff45effdf94580de1fb5e62d4e Mon Sep 17 00:00:00 2001 +From: Greg Tucker +Date: Wed, 26 Aug 2020 11:41:03 -0700 +Subject: [PATCH 25/42] build: Add test to check for nmake consistency + +Change-Id: I1180ba749d54e7ef433b01b33450e52ac5dbb2bb +Signed-off-by: Greg Tucker +--- + tools/gen_nmake.mk | 11 +++++++++-- + tools/test_extended.sh | 5 +++++ + 2 files changed, 14 insertions(+), 2 deletions(-) + +diff --git a/src/isa-l/tools/gen_nmake.mk b/src/isa-l/tools/gen_nmake.mk +index dedea90..8e6330d 100644 +--- a/src/isa-l/tools/gen_nmake.mk ++++ b/src/isa-l/tools/gen_nmake.mk +@@ -1,11 +1,18 @@ ++# Regenerate nmake file from makefiles or check its consistency ++ ++test_nmake_file: tst.nmake ++ @diff -u Makefile.nmake tst.nmake || (echo Potential nmake consistency issue; $(RM) tst.nmake; false;) ++ @echo No nmake consistency issues ++ @$(RM) tst.nmake ++ + FORCE: +-Makefile.nmake: FORCE ++Makefile.nmake tst.nmake: FORCE + @echo Regenerating $@ + @echo '########################################################################' > $@ + @cat LICENSE | sed -e 's/^/#/ ' >> $@ + @echo '########################################################################' >> $@ + @echo '' >> $@ +- @echo '# This file can be auto-regenerated with $$make -f Makefile.unx $@' >> $@ ++ @echo '# This file can be auto-regenerated with $$make -f Makefile.unx Makefile.nmake' >> $@ + @echo '' >> $@ + @echo -n 'objs =' >> $@ + @$(foreach o, $(subst /,\\,$(objs:.o=.obj)), printf " %s\n\t%s" \\ $(o) >> $@; ) +diff --git a/src/isa-l/tools/test_extended.sh b/src/isa-l/tools/test_extended.sh +index 8f46a1f..1c05d05 100755 +--- a/src/isa-l/tools/test_extended.sh ++++ b/src/isa-l/tools/test_extended.sh +@@ -170,6 +170,11 @@ msg+=$'Custom hufftable build: Pass\n' + + $MAKE -f Makefile.unx clean + ++test_start "nmake_file_consistency" ++$MAKE -f Makefile.unx test_nmake_file ++test_end "nmake_file_consistency" $? ++msg+=$'Nmake file consistency: Pass\n' ++ + # noarch build + test_start "noarch_build" + time $MAKE -f Makefile.unx -j $cpus arch=noarch $build_opt +-- +2.20.1.windows.1 + + +From cc9ed539725b4277d76701415983b83f4e6de3d6 Mon Sep 17 00:00:00 2001 +From: Greg Tucker +Date: Thu, 27 Aug 2020 11:16:30 -0700 +Subject: [PATCH 26/42] build: Fix nmake check for multiple arch + +Change-Id: I36c3616163f6fec61dda9cf8b35ca561e59477c9 +Signed-off-by: Greg Tucker +--- + tools/test_extended.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/isa-l/tools/test_extended.sh b/src/isa-l/tools/test_extended.sh +index 1c05d05..4527416 100755 +--- a/src/isa-l/tools/test_extended.sh ++++ b/src/isa-l/tools/test_extended.sh +@@ -171,7 +171,7 @@ msg+=$'Custom hufftable build: Pass\n' + $MAKE -f Makefile.unx clean + + test_start "nmake_file_consistency" +-$MAKE -f Makefile.unx test_nmake_file ++$MAKE -f Makefile.unx host_cpu="x86_64" test_nmake_file + test_end "nmake_file_consistency" $? + msg+=$'Nmake file consistency: Pass\n' + +-- +2.20.1.windows.1 + + +From d7927673ba9908178aecf5d4084ac6e681688d0d Mon Sep 17 00:00:00 2001 +From: Greg Tucker +Date: Tue, 20 Oct 2020 09:55:24 -0700 +Subject: [PATCH 27/42] igzip: Inflate detect pre-gen header and use + pre-expanded + +Performance improvement for inflate to skip the time-consuming process of decode +table expansion when the header matches a known common dymanic one such as +produced by level 0 compression. + +Change-Id: Ia2550b812a062b7cc2eb1b72bcb609f1a631e40b +Signed-off-by: Greg Tucker +--- + igzip/generate_static_inflate.c | 33 + + igzip/igzip_inflate.c | 76 ++ + igzip/static_inflate.h | 1332 +++++++++++++++++++++++++++++++ + tools/test_extended.sh | 4 +- + 4 files changed, 1442 insertions(+), 3 deletions(-) + +diff --git a/src/isa-l/igzip/generate_static_inflate.c b/src/isa-l/igzip/generate_static_inflate.c +index f4bf5ac..4bf9a6a 100644 +--- a/src/isa-l/igzip/generate_static_inflate.c ++++ b/src/isa-l/igzip/generate_static_inflate.c +@@ -36,6 +36,8 @@ + + #define STATIC_INFLATE_FILE "static_inflate.h" + ++extern struct isal_hufftables hufftables_default; ++ + /** + * @brief Prints a table of uint16_t elements to a file. + * @param outfile: the file the table is printed to. +@@ -133,6 +135,7 @@ int main(int argc, char *argv[]) + printf("Error creating file hufftables_c.c\n"); + return 1; + } ++ // Add decode tables describing a type 2 static (fixed) header + + fprintf(file, "#ifndef STATIC_HEADER_H\n" "#define STATIC_HEADER_H\n\n"); + +@@ -157,6 +160,36 @@ int main(int argc, char *argv[]) + fprintf(file, "};\n\n"); + + fprintf(file, "#endif\n"); ++ ++ // Add other tables for known dynamic headers - level 0 ++ ++ isal_inflate_init(&state); ++ ++ state.next_in = (uint8_t *) & hufftables_default.deflate_hdr; ++ state.avail_in = sizeof(hufftables_default.deflate_hdr); ++ state.next_out = tmp_space; ++ state.avail_out = sizeof(tmp_space); ++ ++ isal_inflate(&state); ++ ++ fprintf(file, "struct inflate_huff_code_large pregen_lit_huff_code = {\n"); ++ fprint_uint32_table(file, state.lit_huff_code.short_code_lookup, ++ sizeof(state.lit_huff_code.short_code_lookup) / sizeof(uint32_t), ++ "\t.short_code_lookup = {", "\t},\n\n", "\t\t"); ++ fprint_uint16_table(file, state.lit_huff_code.long_code_lookup, ++ sizeof(state.lit_huff_code.long_code_lookup) / sizeof(uint16_t), ++ "\t.long_code_lookup = {", "\t}\n", "\t\t"); ++ fprintf(file, "};\n\n"); ++ ++ fprintf(file, "struct inflate_huff_code_small pregen_dist_huff_code = {\n"); ++ fprint_uint16_table(file, state.dist_huff_code.short_code_lookup, ++ sizeof(state.dist_huff_code.short_code_lookup) / sizeof(uint16_t), ++ "\t.short_code_lookup = {", "\t},\n\n", "\t\t"); ++ fprint_uint16_table(file, state.dist_huff_code.long_code_lookup, ++ sizeof(state.dist_huff_code.long_code_lookup) / sizeof(uint16_t), ++ "\t.long_code_lookup = {", "\t}\n", "\t\t"); ++ fprintf(file, "};\n\n"); ++ + fclose(file); + + return 0; +diff --git a/src/isa-l/igzip/igzip_inflate.c b/src/isa-l/igzip/igzip_inflate.c +index 3e302a9..aa602a4 100644 +--- a/src/isa-l/igzip/igzip_inflate.c ++++ b/src/isa-l/igzip/igzip_inflate.c +@@ -53,6 +53,7 @@ + #endif + + extern int decode_huffman_code_block_stateless(struct inflate_state *, uint8_t * start_out); ++extern struct isal_hufftables hufftables_default; /* For known header detection */ + + #define LARGE_SHORT_SYM_LEN 25 + #define LARGE_SHORT_SYM_MASK ((1 << LARGE_SHORT_SYM_LEN) - 1) +@@ -932,6 +933,76 @@ static void inline make_inflate_huff_code_header(struct inflate_huff_code_small + } + } + ++static int header_matches_pregen(struct inflate_state *state) ++{ ++#ifndef ISAL_STATIC_INFLATE_TABLE ++ return 0; ++#else ++ uint8_t *in, *hdr; ++ uint32_t in_end_bits, hdr_end_bits; ++ uint32_t bytes_read_in, header_len, last_bits, last_bit_mask; ++ uint64_t bits_read_mask; ++ uint64_t hdr_stash, in_stash; ++ const uint32_t bits_read_prior = 3; // Have read bfinal(1) and btype(2) ++ ++ /* Check if stashed read_in_bytes match header */ ++ hdr = &(hufftables_default.deflate_hdr[0]); ++ bits_read_mask = (1ull << state->read_in_length) - 1; ++ hdr_stash = (*((uint64_t *) hdr) >> bits_read_prior) & bits_read_mask; ++ in_stash = state->read_in & bits_read_mask; ++ ++ if (hdr_stash != in_stash) ++ return 0; ++ ++ /* Check if input is byte aligned */ ++ if ((state->read_in_length + bits_read_prior) % 8) ++ return 0; ++ ++ /* Check if header bulk is the same */ ++ in = state->next_in; ++ bytes_read_in = (state->read_in_length + bits_read_prior) / 8; ++ header_len = hufftables_default.deflate_hdr_count; ++ ++ if (memcmp(in, &hdr[bytes_read_in], header_len - bytes_read_in)) ++ return 0; ++ ++ /* If there are any last/end bits to the header check them too */ ++ last_bits = hufftables_default.deflate_hdr_extra_bits; ++ last_bit_mask = (1 << last_bits) - 1; ++ ++ if (0 == last_bits) { ++ state->next_in += header_len - bytes_read_in; ++ state->avail_in -= header_len - bytes_read_in; ++ state->read_in_length = 0; ++ state->read_in = 0; ++ return 1; ++ } ++ ++ in_end_bits = in[header_len - bytes_read_in] & last_bit_mask; ++ hdr_end_bits = hdr[header_len] & last_bit_mask; ++ if (in_end_bits == hdr_end_bits) { ++ state->next_in += header_len - bytes_read_in; ++ state->avail_in -= header_len - bytes_read_in; ++ state->read_in_length = 0; ++ state->read_in = 0; ++ inflate_in_read_bits(state, last_bits); ++ return 1; ++ } ++ ++ return 0; ++#endif // ISAL_STATIC_INFLATE_TABLE ++} ++ ++static int setup_pregen_header(struct inflate_state *state) ++{ ++#ifdef ISAL_STATIC_INFLATE_TABLE ++ memcpy(&state->lit_huff_code, &pregen_lit_huff_code, sizeof(pregen_lit_huff_code)); ++ memcpy(&state->dist_huff_code, &pregen_dist_huff_code, sizeof(pregen_dist_huff_code)); ++ state->block_state = ISAL_BLOCK_CODED; ++#endif // ISAL_STATIC_INFLATE_TABLE ++ return 0; ++} ++ + /* Sets the inflate_huff_codes in state to be the huffcodes corresponding to the + * deflate static header */ + static int inline setup_static_header(struct inflate_state *state) +@@ -1190,6 +1261,11 @@ static int inline setup_dynamic_header(struct inflate_state *state) + 0x0a, 0x05, 0x0b, 0x04, 0x0c, 0x03, 0x0d, 0x02, 0x0e, 0x01, 0x0f + }; + ++ /* If you are given a whole header and it matches the pregen header */ ++ if (state->avail_in > (hufftables_default.deflate_hdr_count + sizeof(uint64_t)) ++ && header_matches_pregen(state)) ++ return setup_pregen_header(state); ++ + if (state->bfinal && state->avail_in <= SINGLE_SYM_THRESH) { + multisym = SINGLE_SYM_FLAG; + } else if (state->bfinal && state->avail_in <= DOUBLE_SYM_THRESH) { +diff --git a/src/isa-l/igzip/static_inflate.h b/src/isa-l/igzip/static_inflate.h +index 33542fe..eb6fd1b 100644 +--- a/src/isa-l/igzip/static_inflate.h ++++ b/src/isa-l/igzip/static_inflate.h +@@ -1344,3 +1344,1335 @@ struct inflate_huff_code_small static_dist_huff_code = { + }; + + #endif ++struct inflate_huff_code_large pregen_lit_huff_code = { ++ .short_code_lookup = { ++ 0x24000102, 0x64000065, 0x44000103, 0x84000077, ++ 0x24000102, 0x74000068, 0x54000020, 0x940000e0, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000035, ++ 0x24000102, 0x74000108, 0x64000109, 0xa40000fd, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000006, ++ 0x24000102, 0x74000070, 0x54000105, 0xa4000059, ++ 0x24000102, 0x7400000a, 0x44000104, 0x94000049, ++ 0x24000102, 0x84000030, 0x64000000, 0xb40000ad, ++ 0x24000102, 0x64000069, 0x44000103, 0x9400010d, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa4000023, ++ 0x24000102, 0x64000106, 0x44000104, 0x9400003f, ++ 0x24000102, 0x84000004, 0x6400010a, 0xb4000087, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000010, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000a6, ++ 0x24000102, 0x74000063, 0x44000104, 0x94000054, ++ 0x24000102, 0x84000042, 0x64000061, 0xb40000d7, ++ 0x24000102, 0x64000065, 0x44000103, 0x840000ff, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa4000011, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000039, ++ 0x24000102, 0x84000001, 0x64000109, 0xb4000017, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000b, ++ 0x24000102, 0x74000074, 0x54000105, 0xa400007c, ++ 0x24000102, 0x7400002c, 0x44000104, 0x9400004f, ++ 0x24000102, 0x84000032, 0x64000000, 0xb40000c4, ++ 0x24000102, 0x64000069, 0x44000103, 0xa4000111, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa400003d, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000045, ++ 0x24000102, 0x8400002d, 0x6400010a, 0xb400009a, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000028, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000e3, ++ 0x24000102, 0x74000064, 0x44000104, 0x94000080, ++ 0x24000102, 0x84000066, 0x64000061, 0xb40000eb, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000079, ++ 0x24000102, 0x74000068, 0x54000020, 0x940000fe, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000037, ++ 0x24000102, 0x74000108, 0x64000109, 0x36000008, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000008, ++ 0x24000102, 0x74000070, 0x54000105, 0xa4000060, ++ 0x24000102, 0x7400000a, 0x44000104, 0x9400004d, ++ 0x24000102, 0x84000031, 0x64000000, 0xb40000b7, ++ 0x24000102, 0x64000069, 0x44000103, 0x9400010f, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa400002a, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000043, ++ 0x24000102, 0x8400000d, 0x6400010a, 0xb4000092, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000022, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000c1, ++ 0x24000102, 0x74000063, 0x44000104, 0x94000076, ++ 0x24000102, 0x84000062, 0x64000061, 0xb40000df, ++ 0x24000102, 0x64000065, 0x44000103, 0x9400010b, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa4000018, ++ 0x24000102, 0x64000073, 0x44000104, 0x9400003b, ++ 0x24000102, 0x84000003, 0x64000109, 0xb400007b, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000e, ++ 0x24000102, 0x74000074, 0x54000105, 0xa400008f, ++ 0x24000102, 0x7400002c, 0x44000104, 0x94000052, ++ 0x24000102, 0x84000041, 0x64000000, 0xb40000cd, ++ 0x24000102, 0x64000069, 0x44000103, 0x94000002, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa4000055, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000047, ++ 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a3, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000033, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000f3, ++ 0x24000102, 0x74000064, 0x44000104, 0x940000c2, ++ 0x24000102, 0x84000067, 0x64000061, 0xb40000f6, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000077, ++ 0x24000102, 0x74000068, 0x54000020, 0x940000f0, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000036, ++ 0x24000102, 0x74000108, 0x64000109, 0xc400011d, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000007, ++ 0x24000102, 0x74000070, 0x54000105, 0xa400005c, ++ 0x24000102, 0x7400000a, 0x44000104, 0x9400004c, ++ 0x24000102, 0x84000030, 0x64000000, 0xb40000b2, ++ 0x24000102, 0x64000069, 0x44000103, 0x9400010e, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa4000025, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000040, ++ 0x24000102, 0x84000004, 0x6400010a, 0xb400008c, ++ 0x24000102, 0x64000072, 0x44000103, 0x9400001f, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000b4, ++ 0x24000102, 0x74000063, 0x44000104, 0x9400006b, ++ 0x24000102, 0x84000042, 0x64000061, 0xb40000db, ++ 0x24000102, 0x64000065, 0x44000103, 0x840000ff, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa4000013, ++ 0x24000102, 0x64000073, 0x44000104, 0x9400003a, ++ 0x24000102, 0x84000001, 0x64000109, 0xb400001d, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000c, ++ 0x24000102, 0x74000074, 0x54000105, 0xa400007f, ++ 0x24000102, 0x7400002c, 0x44000104, 0x94000050, ++ 0x24000102, 0x84000032, 0x64000000, 0xb40000c9, ++ 0x24000102, 0x64000069, 0x44000103, 0xa4000112, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa400004b, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000046, ++ 0x24000102, 0x8400002d, 0x6400010a, 0xb400009e, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000029, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000e8, ++ 0x24000102, 0x74000064, 0x44000104, 0x940000c0, ++ 0x24000102, 0x84000066, 0x64000061, 0xb40000ef, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000079, ++ 0x24000102, 0x74000068, 0x54000020, 0xb4000115, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000038, ++ 0x24000102, 0x74000108, 0x64000109, 0x36000018, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000009, ++ 0x24000102, 0x74000070, 0x54000105, 0xa4000071, ++ 0x24000102, 0x7400000a, 0x44000104, 0x9400004e, ++ 0x24000102, 0x84000031, 0x64000000, 0xb40000bb, ++ 0x24000102, 0x64000069, 0x44000103, 0x94000110, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa400002f, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000044, ++ 0x24000102, 0x8400000d, 0x6400010a, 0xb4000096, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000027, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000cf, ++ 0x24000102, 0x74000063, 0x44000104, 0x94000078, ++ 0x24000102, 0x84000062, 0x64000061, 0xb40000e5, ++ 0x24000102, 0x64000065, 0x44000103, 0x9400010c, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa400001e, ++ 0x24000102, 0x64000073, 0x44000104, 0x9400003e, ++ 0x24000102, 0x84000003, 0x64000109, 0xb4000083, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000f, ++ 0x24000102, 0x74000074, 0x54000105, 0xa40000a0, ++ 0x24000102, 0x7400002c, 0x44000104, 0x94000053, ++ 0x24000102, 0x84000041, 0x64000000, 0xb40000d3, ++ 0x24000102, 0x64000069, 0x44000103, 0x94000005, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa4000057, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000048, ++ 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a9, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000034, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000f9, ++ 0x24000102, 0x74000064, 0x44000104, 0x940000c3, ++ 0x24000102, 0x84000067, 0x64000061, 0x3e000120, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000077, ++ 0x24000102, 0x74000068, 0x54000020, 0x940000e0, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000035, ++ 0x24000102, 0x74000108, 0x64000109, 0xc4000119, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000006, ++ 0x24000102, 0x74000070, 0x54000105, 0xa400005b, ++ 0x24000102, 0x7400000a, 0x44000104, 0x94000049, ++ 0x24000102, 0x84000030, 0x64000000, 0xb40000af, ++ 0x24000102, 0x64000069, 0x44000103, 0x9400010d, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa4000024, ++ 0x24000102, 0x64000106, 0x44000104, 0x9400003f, ++ 0x24000102, 0x84000004, 0x6400010a, 0xb4000089, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000010, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000b0, ++ 0x24000102, 0x74000063, 0x44000104, 0x94000054, ++ 0x24000102, 0x84000042, 0x64000061, 0xb40000d9, ++ 0x24000102, 0x64000065, 0x44000103, 0x840000ff, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa4000012, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000039, ++ 0x24000102, 0x84000001, 0x64000109, 0xb400001a, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000b, ++ 0x24000102, 0x74000074, 0x54000105, 0xa400007e, ++ 0x24000102, 0x7400002c, 0x44000104, 0x9400004f, ++ 0x24000102, 0x84000032, 0x64000000, 0xb40000c6, ++ 0x24000102, 0x64000069, 0x44000103, 0xa4000113, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa400004a, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000045, ++ 0x24000102, 0x8400002d, 0x6400010a, 0xb400009c, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000028, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000e7, ++ 0x24000102, 0x74000064, 0x44000104, 0x94000080, ++ 0x24000102, 0x84000066, 0x64000061, 0xb40000ed, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000079, ++ 0x24000102, 0x74000068, 0x54000020, 0x940000fe, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000037, ++ 0x24000102, 0x74000108, 0x64000109, 0x36000010, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000008, ++ 0x24000102, 0x74000070, 0x54000105, 0xa400006a, ++ 0x24000102, 0x7400000a, 0x44000104, 0x9400004d, ++ 0x24000102, 0x84000031, 0x64000000, 0xb40000b9, ++ 0x24000102, 0x64000069, 0x44000103, 0x9400010f, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa400002b, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000043, ++ 0x24000102, 0x8400000d, 0x6400010a, 0xb4000094, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000022, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000c7, ++ 0x24000102, 0x74000063, 0x44000104, 0x94000076, ++ 0x24000102, 0x84000062, 0x64000061, 0xb40000e2, ++ 0x24000102, 0x64000065, 0x44000103, 0x9400010b, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa400001c, ++ 0x24000102, 0x64000073, 0x44000104, 0x9400003b, ++ 0x24000102, 0x84000003, 0x64000109, 0xb4000081, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000e, ++ 0x24000102, 0x74000074, 0x54000105, 0xa4000090, ++ 0x24000102, 0x7400002c, 0x44000104, 0x94000052, ++ 0x24000102, 0x84000041, 0x64000000, 0xb40000d1, ++ 0x24000102, 0x64000069, 0x44000103, 0x94000002, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa4000056, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000047, ++ 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a7, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000033, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000f8, ++ 0x24000102, 0x74000064, 0x44000104, 0x940000c2, ++ 0x24000102, 0x84000067, 0x64000061, 0xb40000fa, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000077, ++ 0x24000102, 0x74000068, 0x54000020, 0x940000f0, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000036, ++ 0x24000102, 0x74000108, 0x64000109, 0x36000000, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000007, ++ 0x24000102, 0x74000070, 0x54000105, 0xa400005f, ++ 0x24000102, 0x7400000a, 0x44000104, 0x9400004c, ++ 0x24000102, 0x84000030, 0x64000000, 0xb40000b5, ++ 0x24000102, 0x64000069, 0x44000103, 0x9400010e, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa4000026, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000040, ++ 0x24000102, 0x84000004, 0x6400010a, 0xb400008e, ++ 0x24000102, 0x64000072, 0x44000103, 0x9400001f, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000bd, ++ 0x24000102, 0x74000063, 0x44000104, 0x9400006b, ++ 0x24000102, 0x84000042, 0x64000061, 0xb40000dd, ++ 0x24000102, 0x64000065, 0x44000103, 0x840000ff, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa4000014, ++ 0x24000102, 0x64000073, 0x44000104, 0x9400003a, ++ 0x24000102, 0x84000001, 0x64000109, 0xb400005d, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000c, ++ 0x24000102, 0x74000074, 0x54000105, 0xa400008b, ++ 0x24000102, 0x7400002c, 0x44000104, 0x94000050, ++ 0x24000102, 0x84000032, 0x64000000, 0xb40000cb, ++ 0x24000102, 0x64000069, 0x44000103, 0xa4000114, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa4000051, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000046, ++ 0x24000102, 0x8400002d, 0x6400010a, 0xb40000a1, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000029, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000f1, ++ 0x24000102, 0x74000064, 0x44000104, 0x940000c0, ++ 0x24000102, 0x84000066, 0x64000061, 0xb40000f4, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000079, ++ 0x24000102, 0x74000068, 0x54000020, 0xb4000116, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000038, ++ 0x24000102, 0x74000108, 0x64000109, 0xb4000015, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000009, ++ 0x24000102, 0x74000070, 0x54000105, 0xa400007a, ++ 0x24000102, 0x7400000a, 0x44000104, 0x9400004e, ++ 0x24000102, 0x84000031, 0x64000000, 0xb40000be, ++ 0x24000102, 0x64000069, 0x44000103, 0x94000110, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa400003c, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000044, ++ 0x24000102, 0x8400000d, 0x6400010a, 0xb4000098, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000027, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000d0, ++ 0x24000102, 0x74000063, 0x44000104, 0x94000078, ++ 0x24000102, 0x84000062, 0x64000061, 0xb40000e9, ++ 0x24000102, 0x64000065, 0x44000103, 0x9400010c, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa4000021, ++ 0x24000102, 0x64000073, 0x44000104, 0x9400003e, ++ 0x24000102, 0x84000003, 0x64000109, 0xb4000085, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000f, ++ 0x24000102, 0x74000074, 0x54000105, 0xa40000a4, ++ 0x24000102, 0x7400002c, 0x44000104, 0x94000053, ++ 0x24000102, 0x84000041, 0x64000000, 0xb40000d5, ++ 0x24000102, 0x64000069, 0x44000103, 0x94000005, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa4000058, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000048, ++ 0x24000102, 0x8400002e, 0x6400010a, 0xb40000ab, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000034, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000fc, ++ 0x24000102, 0x74000064, 0x44000104, 0x940000c3, ++ 0x24000102, 0x84000067, 0x64000061, 0x42000130, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000077, ++ 0x24000102, 0x74000068, 0x54000020, 0x940000e0, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000035, ++ 0x24000102, 0x74000108, 0x64000109, 0xa40000fd, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000006, ++ 0x24000102, 0x74000070, 0x54000105, 0xa4000059, ++ 0x24000102, 0x7400000a, 0x44000104, 0x94000049, ++ 0x24000102, 0x84000030, 0x64000000, 0xb40000ae, ++ 0x24000102, 0x64000069, 0x44000103, 0x9400010d, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa4000023, ++ 0x24000102, 0x64000106, 0x44000104, 0x9400003f, ++ 0x24000102, 0x84000004, 0x6400010a, 0xb4000088, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000010, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000a6, ++ 0x24000102, 0x74000063, 0x44000104, 0x94000054, ++ 0x24000102, 0x84000042, 0x64000061, 0xb40000d8, ++ 0x24000102, 0x64000065, 0x44000103, 0x840000ff, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa4000011, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000039, ++ 0x24000102, 0x84000001, 0x64000109, 0xb4000019, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000b, ++ 0x24000102, 0x74000074, 0x54000105, 0xa400007c, ++ 0x24000102, 0x7400002c, 0x44000104, 0x9400004f, ++ 0x24000102, 0x84000032, 0x64000000, 0xb40000c5, ++ 0x24000102, 0x64000069, 0x44000103, 0xa4000111, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa400003d, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000045, ++ 0x24000102, 0x8400002d, 0x6400010a, 0xb400009b, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000028, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000e3, ++ 0x24000102, 0x74000064, 0x44000104, 0x94000080, ++ 0x24000102, 0x84000066, 0x64000061, 0xb40000ec, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000079, ++ 0x24000102, 0x74000068, 0x54000020, 0x940000fe, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000037, ++ 0x24000102, 0x74000108, 0x64000109, 0x3600000a, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000008, ++ 0x24000102, 0x74000070, 0x54000105, 0xa4000060, ++ 0x24000102, 0x7400000a, 0x44000104, 0x9400004d, ++ 0x24000102, 0x84000031, 0x64000000, 0xb40000b8, ++ 0x24000102, 0x64000069, 0x44000103, 0x9400010f, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa400002a, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000043, ++ 0x24000102, 0x8400000d, 0x6400010a, 0xb4000093, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000022, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000c1, ++ 0x24000102, 0x74000063, 0x44000104, 0x94000076, ++ 0x24000102, 0x84000062, 0x64000061, 0xb40000e1, ++ 0x24000102, 0x64000065, 0x44000103, 0x9400010b, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa4000018, ++ 0x24000102, 0x64000073, 0x44000104, 0x9400003b, ++ 0x24000102, 0x84000003, 0x64000109, 0xb400007d, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000e, ++ 0x24000102, 0x74000074, 0x54000105, 0xa400008f, ++ 0x24000102, 0x7400002c, 0x44000104, 0x94000052, ++ 0x24000102, 0x84000041, 0x64000000, 0xb40000ce, ++ 0x24000102, 0x64000069, 0x44000103, 0x94000002, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa4000055, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000047, ++ 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a5, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000033, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000f3, ++ 0x24000102, 0x74000064, 0x44000104, 0x940000c2, ++ 0x24000102, 0x84000067, 0x64000061, 0xb40000f7, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000077, ++ 0x24000102, 0x74000068, 0x54000020, 0x940000f0, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000036, ++ 0x24000102, 0x74000108, 0x64000109, 0xc400011e, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000007, ++ 0x24000102, 0x74000070, 0x54000105, 0xa400005c, ++ 0x24000102, 0x7400000a, 0x44000104, 0x9400004c, ++ 0x24000102, 0x84000030, 0x64000000, 0xb40000b3, ++ 0x24000102, 0x64000069, 0x44000103, 0x9400010e, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa4000025, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000040, ++ 0x24000102, 0x84000004, 0x6400010a, 0xb400008d, ++ 0x24000102, 0x64000072, 0x44000103, 0x9400001f, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000b4, ++ 0x24000102, 0x74000063, 0x44000104, 0x9400006b, ++ 0x24000102, 0x84000042, 0x64000061, 0xb40000dc, ++ 0x24000102, 0x64000065, 0x44000103, 0x840000ff, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa4000013, ++ 0x24000102, 0x64000073, 0x44000104, 0x9400003a, ++ 0x24000102, 0x84000001, 0x64000109, 0xb400005a, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000c, ++ 0x24000102, 0x74000074, 0x54000105, 0xa400007f, ++ 0x24000102, 0x7400002c, 0x44000104, 0x94000050, ++ 0x24000102, 0x84000032, 0x64000000, 0xb40000ca, ++ 0x24000102, 0x64000069, 0x44000103, 0xa4000112, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa400004b, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000046, ++ 0x24000102, 0x8400002d, 0x6400010a, 0xb400009f, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000029, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000e8, ++ 0x24000102, 0x74000064, 0x44000104, 0x940000c0, ++ 0x24000102, 0x84000066, 0x64000061, 0xb40000f2, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000079, ++ 0x24000102, 0x74000068, 0x54000020, 0xb4000117, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000038, ++ 0x24000102, 0x74000108, 0x64000109, 0x3600001a, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000009, ++ 0x24000102, 0x74000070, 0x54000105, 0xa4000071, ++ 0x24000102, 0x7400000a, 0x44000104, 0x9400004e, ++ 0x24000102, 0x84000031, 0x64000000, 0xb40000bc, ++ 0x24000102, 0x64000069, 0x44000103, 0x94000110, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa400002f, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000044, ++ 0x24000102, 0x8400000d, 0x6400010a, 0xb4000097, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000027, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000cf, ++ 0x24000102, 0x74000063, 0x44000104, 0x94000078, ++ 0x24000102, 0x84000062, 0x64000061, 0xb40000e6, ++ 0x24000102, 0x64000065, 0x44000103, 0x9400010c, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa400001e, ++ 0x24000102, 0x64000073, 0x44000104, 0x9400003e, ++ 0x24000102, 0x84000003, 0x64000109, 0xb4000084, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000f, ++ 0x24000102, 0x74000074, 0x54000105, 0xa40000a0, ++ 0x24000102, 0x7400002c, 0x44000104, 0x94000053, ++ 0x24000102, 0x84000041, 0x64000000, 0xb40000d4, ++ 0x24000102, 0x64000069, 0x44000103, 0x94000005, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa4000057, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000048, ++ 0x24000102, 0x8400002e, 0x6400010a, 0xb40000aa, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000034, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000f9, ++ 0x24000102, 0x74000064, 0x44000104, 0x940000c3, ++ 0x24000102, 0x84000067, 0x64000061, 0xb4000200, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000077, ++ 0x24000102, 0x74000068, 0x54000020, 0x940000e0, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000035, ++ 0x24000102, 0x74000108, 0x64000109, 0xc400011a, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000006, ++ 0x24000102, 0x74000070, 0x54000105, 0xa400005b, ++ 0x24000102, 0x7400000a, 0x44000104, 0x94000049, ++ 0x24000102, 0x84000030, 0x64000000, 0xb40000b1, ++ 0x24000102, 0x64000069, 0x44000103, 0x9400010d, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa4000024, ++ 0x24000102, 0x64000106, 0x44000104, 0x9400003f, ++ 0x24000102, 0x84000004, 0x6400010a, 0xb400008a, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000010, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000b0, ++ 0x24000102, 0x74000063, 0x44000104, 0x94000054, ++ 0x24000102, 0x84000042, 0x64000061, 0xb40000da, ++ 0x24000102, 0x64000065, 0x44000103, 0x840000ff, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa4000012, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000039, ++ 0x24000102, 0x84000001, 0x64000109, 0xb400001b, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000b, ++ 0x24000102, 0x74000074, 0x54000105, 0xa400007e, ++ 0x24000102, 0x7400002c, 0x44000104, 0x9400004f, ++ 0x24000102, 0x84000032, 0x64000000, 0xb40000c8, ++ 0x24000102, 0x64000069, 0x44000103, 0xa4000113, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa400004a, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000045, ++ 0x24000102, 0x8400002d, 0x6400010a, 0xb400009d, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000028, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000e7, ++ 0x24000102, 0x74000064, 0x44000104, 0x94000080, ++ 0x24000102, 0x84000066, 0x64000061, 0xb40000ee, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000079, ++ 0x24000102, 0x74000068, 0x54000020, 0x940000fe, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000037, ++ 0x24000102, 0x74000108, 0x64000109, 0x36000012, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000008, ++ 0x24000102, 0x74000070, 0x54000105, 0xa400006a, ++ 0x24000102, 0x7400000a, 0x44000104, 0x9400004d, ++ 0x24000102, 0x84000031, 0x64000000, 0xb40000ba, ++ 0x24000102, 0x64000069, 0x44000103, 0x9400010f, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa400002b, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000043, ++ 0x24000102, 0x8400000d, 0x6400010a, 0xb4000095, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000022, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000c7, ++ 0x24000102, 0x74000063, 0x44000104, 0x94000076, ++ 0x24000102, 0x84000062, 0x64000061, 0xb40000e4, ++ 0x24000102, 0x64000065, 0x44000103, 0x9400010b, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa400001c, ++ 0x24000102, 0x64000073, 0x44000104, 0x9400003b, ++ 0x24000102, 0x84000003, 0x64000109, 0xb4000082, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000e, ++ 0x24000102, 0x74000074, 0x54000105, 0xa4000090, ++ 0x24000102, 0x7400002c, 0x44000104, 0x94000052, ++ 0x24000102, 0x84000041, 0x64000000, 0xb40000d2, ++ 0x24000102, 0x64000069, 0x44000103, 0x94000002, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa4000056, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000047, ++ 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a8, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000033, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000f8, ++ 0x24000102, 0x74000064, 0x44000104, 0x940000c2, ++ 0x24000102, 0x84000067, 0x64000061, 0xb40000fb, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000077, ++ 0x24000102, 0x74000068, 0x54000020, 0x940000f0, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000036, ++ 0x24000102, 0x74000108, 0x64000109, 0x36000002, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000007, ++ 0x24000102, 0x74000070, 0x54000105, 0xa400005f, ++ 0x24000102, 0x7400000a, 0x44000104, 0x9400004c, ++ 0x24000102, 0x84000030, 0x64000000, 0xb40000b6, ++ 0x24000102, 0x64000069, 0x44000103, 0x9400010e, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa4000026, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000040, ++ 0x24000102, 0x84000004, 0x6400010a, 0xb4000091, ++ 0x24000102, 0x64000072, 0x44000103, 0x9400001f, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000bd, ++ 0x24000102, 0x74000063, 0x44000104, 0x9400006b, ++ 0x24000102, 0x84000042, 0x64000061, 0xb40000de, ++ 0x24000102, 0x64000065, 0x44000103, 0x840000ff, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa4000014, ++ 0x24000102, 0x64000073, 0x44000104, 0x9400003a, ++ 0x24000102, 0x84000001, 0x64000109, 0xb400005e, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000c, ++ 0x24000102, 0x74000074, 0x54000105, 0xa400008b, ++ 0x24000102, 0x7400002c, 0x44000104, 0x94000050, ++ 0x24000102, 0x84000032, 0x64000000, 0xb40000cc, ++ 0x24000102, 0x64000069, 0x44000103, 0xa4000114, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa4000051, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000046, ++ 0x24000102, 0x8400002d, 0x6400010a, 0xb40000a2, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000029, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000f1, ++ 0x24000102, 0x74000064, 0x44000104, 0x940000c0, ++ 0x24000102, 0x84000066, 0x64000061, 0xb40000f5, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000079, ++ 0x24000102, 0x74000068, 0x54000020, 0xb4000118, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000038, ++ 0x24000102, 0x74000108, 0x64000109, 0xb4000016, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000009, ++ 0x24000102, 0x74000070, 0x54000105, 0xa400007a, ++ 0x24000102, 0x7400000a, 0x44000104, 0x9400004e, ++ 0x24000102, 0x84000031, 0x64000000, 0xb40000bf, ++ 0x24000102, 0x64000069, 0x44000103, 0x94000110, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa400003c, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000044, ++ 0x24000102, 0x8400000d, 0x6400010a, 0xb4000099, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000027, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000d0, ++ 0x24000102, 0x74000063, 0x44000104, 0x94000078, ++ 0x24000102, 0x84000062, 0x64000061, 0xb40000ea, ++ 0x24000102, 0x64000065, 0x44000103, 0x9400010c, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa4000021, ++ 0x24000102, 0x64000073, 0x44000104, 0x9400003e, ++ 0x24000102, 0x84000003, 0x64000109, 0xb4000086, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000f, ++ 0x24000102, 0x74000074, 0x54000105, 0xa40000a4, ++ 0x24000102, 0x7400002c, 0x44000104, 0x94000053, ++ 0x24000102, 0x84000041, 0x64000000, 0xb40000d6, ++ 0x24000102, 0x64000069, 0x44000103, 0x94000005, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa4000058, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000048, ++ 0x24000102, 0x8400002e, 0x6400010a, 0xb40000ac, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000034, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000fc, ++ 0x24000102, 0x74000064, 0x44000104, 0x940000c3, ++ 0x24000102, 0x84000067, 0x64000061, 0x46000140, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000077, ++ 0x24000102, 0x74000068, 0x54000020, 0x940000e0, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000035, ++ 0x24000102, 0x74000108, 0x64000109, 0xa40000fd, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000006, ++ 0x24000102, 0x74000070, 0x54000105, 0xa4000059, ++ 0x24000102, 0x7400000a, 0x44000104, 0x94000049, ++ 0x24000102, 0x84000030, 0x64000000, 0xb40000ad, ++ 0x24000102, 0x64000069, 0x44000103, 0x9400010d, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa4000023, ++ 0x24000102, 0x64000106, 0x44000104, 0x9400003f, ++ 0x24000102, 0x84000004, 0x6400010a, 0xb4000087, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000010, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000a6, ++ 0x24000102, 0x74000063, 0x44000104, 0x94000054, ++ 0x24000102, 0x84000042, 0x64000061, 0xb40000d7, ++ 0x24000102, 0x64000065, 0x44000103, 0x840000ff, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa4000011, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000039, ++ 0x24000102, 0x84000001, 0x64000109, 0xb4000017, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000b, ++ 0x24000102, 0x74000074, 0x54000105, 0xa400007c, ++ 0x24000102, 0x7400002c, 0x44000104, 0x9400004f, ++ 0x24000102, 0x84000032, 0x64000000, 0xb40000c4, ++ 0x24000102, 0x64000069, 0x44000103, 0xa4000111, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa400003d, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000045, ++ 0x24000102, 0x8400002d, 0x6400010a, 0xb400009a, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000028, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000e3, ++ 0x24000102, 0x74000064, 0x44000104, 0x94000080, ++ 0x24000102, 0x84000066, 0x64000061, 0xb40000eb, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000079, ++ 0x24000102, 0x74000068, 0x54000020, 0x940000fe, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000037, ++ 0x24000102, 0x74000108, 0x64000109, 0x3600000c, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000008, ++ 0x24000102, 0x74000070, 0x54000105, 0xa4000060, ++ 0x24000102, 0x7400000a, 0x44000104, 0x9400004d, ++ 0x24000102, 0x84000031, 0x64000000, 0xb40000b7, ++ 0x24000102, 0x64000069, 0x44000103, 0x9400010f, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa400002a, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000043, ++ 0x24000102, 0x8400000d, 0x6400010a, 0xb4000092, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000022, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000c1, ++ 0x24000102, 0x74000063, 0x44000104, 0x94000076, ++ 0x24000102, 0x84000062, 0x64000061, 0xb40000df, ++ 0x24000102, 0x64000065, 0x44000103, 0x9400010b, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa4000018, ++ 0x24000102, 0x64000073, 0x44000104, 0x9400003b, ++ 0x24000102, 0x84000003, 0x64000109, 0xb400007b, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000e, ++ 0x24000102, 0x74000074, 0x54000105, 0xa400008f, ++ 0x24000102, 0x7400002c, 0x44000104, 0x94000052, ++ 0x24000102, 0x84000041, 0x64000000, 0xb40000cd, ++ 0x24000102, 0x64000069, 0x44000103, 0x94000002, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa4000055, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000047, ++ 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a3, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000033, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000f3, ++ 0x24000102, 0x74000064, 0x44000104, 0x940000c2, ++ 0x24000102, 0x84000067, 0x64000061, 0xb40000f6, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000077, ++ 0x24000102, 0x74000068, 0x54000020, 0x940000f0, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000036, ++ 0x24000102, 0x74000108, 0x64000109, 0xc400011f, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000007, ++ 0x24000102, 0x74000070, 0x54000105, 0xa400005c, ++ 0x24000102, 0x7400000a, 0x44000104, 0x9400004c, ++ 0x24000102, 0x84000030, 0x64000000, 0xb40000b2, ++ 0x24000102, 0x64000069, 0x44000103, 0x9400010e, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa4000025, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000040, ++ 0x24000102, 0x84000004, 0x6400010a, 0xb400008c, ++ 0x24000102, 0x64000072, 0x44000103, 0x9400001f, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000b4, ++ 0x24000102, 0x74000063, 0x44000104, 0x9400006b, ++ 0x24000102, 0x84000042, 0x64000061, 0xb40000db, ++ 0x24000102, 0x64000065, 0x44000103, 0x840000ff, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa4000013, ++ 0x24000102, 0x64000073, 0x44000104, 0x9400003a, ++ 0x24000102, 0x84000001, 0x64000109, 0xb400001d, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000c, ++ 0x24000102, 0x74000074, 0x54000105, 0xa400007f, ++ 0x24000102, 0x7400002c, 0x44000104, 0x94000050, ++ 0x24000102, 0x84000032, 0x64000000, 0xb40000c9, ++ 0x24000102, 0x64000069, 0x44000103, 0xa4000112, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa400004b, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000046, ++ 0x24000102, 0x8400002d, 0x6400010a, 0xb400009e, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000029, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000e8, ++ 0x24000102, 0x74000064, 0x44000104, 0x940000c0, ++ 0x24000102, 0x84000066, 0x64000061, 0xb40000ef, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000079, ++ 0x24000102, 0x74000068, 0x54000020, 0xb4000115, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000038, ++ 0x24000102, 0x74000108, 0x64000109, 0x3600001c, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000009, ++ 0x24000102, 0x74000070, 0x54000105, 0xa4000071, ++ 0x24000102, 0x7400000a, 0x44000104, 0x9400004e, ++ 0x24000102, 0x84000031, 0x64000000, 0xb40000bb, ++ 0x24000102, 0x64000069, 0x44000103, 0x94000110, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa400002f, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000044, ++ 0x24000102, 0x8400000d, 0x6400010a, 0xb4000096, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000027, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000cf, ++ 0x24000102, 0x74000063, 0x44000104, 0x94000078, ++ 0x24000102, 0x84000062, 0x64000061, 0xb40000e5, ++ 0x24000102, 0x64000065, 0x44000103, 0x9400010c, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa400001e, ++ 0x24000102, 0x64000073, 0x44000104, 0x9400003e, ++ 0x24000102, 0x84000003, 0x64000109, 0xb4000083, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000f, ++ 0x24000102, 0x74000074, 0x54000105, 0xa40000a0, ++ 0x24000102, 0x7400002c, 0x44000104, 0x94000053, ++ 0x24000102, 0x84000041, 0x64000000, 0xb40000d3, ++ 0x24000102, 0x64000069, 0x44000103, 0x94000005, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa4000057, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000048, ++ 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a9, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000034, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000f9, ++ 0x24000102, 0x74000064, 0x44000104, 0x940000c3, ++ 0x24000102, 0x84000067, 0x64000061, 0x3e000128, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000077, ++ 0x24000102, 0x74000068, 0x54000020, 0x940000e0, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000035, ++ 0x24000102, 0x74000108, 0x64000109, 0xc400011b, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000006, ++ 0x24000102, 0x74000070, 0x54000105, 0xa400005b, ++ 0x24000102, 0x7400000a, 0x44000104, 0x94000049, ++ 0x24000102, 0x84000030, 0x64000000, 0xb40000af, ++ 0x24000102, 0x64000069, 0x44000103, 0x9400010d, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa4000024, ++ 0x24000102, 0x64000106, 0x44000104, 0x9400003f, ++ 0x24000102, 0x84000004, 0x6400010a, 0xb4000089, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000010, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000b0, ++ 0x24000102, 0x74000063, 0x44000104, 0x94000054, ++ 0x24000102, 0x84000042, 0x64000061, 0xb40000d9, ++ 0x24000102, 0x64000065, 0x44000103, 0x840000ff, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa4000012, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000039, ++ 0x24000102, 0x84000001, 0x64000109, 0xb400001a, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000b, ++ 0x24000102, 0x74000074, 0x54000105, 0xa400007e, ++ 0x24000102, 0x7400002c, 0x44000104, 0x9400004f, ++ 0x24000102, 0x84000032, 0x64000000, 0xb40000c6, ++ 0x24000102, 0x64000069, 0x44000103, 0xa4000113, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa400004a, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000045, ++ 0x24000102, 0x8400002d, 0x6400010a, 0xb400009c, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000028, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000e7, ++ 0x24000102, 0x74000064, 0x44000104, 0x94000080, ++ 0x24000102, 0x84000066, 0x64000061, 0xb40000ed, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000079, ++ 0x24000102, 0x74000068, 0x54000020, 0x940000fe, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000037, ++ 0x24000102, 0x74000108, 0x64000109, 0x36000014, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000008, ++ 0x24000102, 0x74000070, 0x54000105, 0xa400006a, ++ 0x24000102, 0x7400000a, 0x44000104, 0x9400004d, ++ 0x24000102, 0x84000031, 0x64000000, 0xb40000b9, ++ 0x24000102, 0x64000069, 0x44000103, 0x9400010f, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa400002b, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000043, ++ 0x24000102, 0x8400000d, 0x6400010a, 0xb4000094, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000022, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000c7, ++ 0x24000102, 0x74000063, 0x44000104, 0x94000076, ++ 0x24000102, 0x84000062, 0x64000061, 0xb40000e2, ++ 0x24000102, 0x64000065, 0x44000103, 0x9400010b, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa400001c, ++ 0x24000102, 0x64000073, 0x44000104, 0x9400003b, ++ 0x24000102, 0x84000003, 0x64000109, 0xb4000081, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000e, ++ 0x24000102, 0x74000074, 0x54000105, 0xa4000090, ++ 0x24000102, 0x7400002c, 0x44000104, 0x94000052, ++ 0x24000102, 0x84000041, 0x64000000, 0xb40000d1, ++ 0x24000102, 0x64000069, 0x44000103, 0x94000002, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa4000056, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000047, ++ 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a7, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000033, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000f8, ++ 0x24000102, 0x74000064, 0x44000104, 0x940000c2, ++ 0x24000102, 0x84000067, 0x64000061, 0xb40000fa, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000077, ++ 0x24000102, 0x74000068, 0x54000020, 0x940000f0, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000036, ++ 0x24000102, 0x74000108, 0x64000109, 0x36000004, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000007, ++ 0x24000102, 0x74000070, 0x54000105, 0xa400005f, ++ 0x24000102, 0x7400000a, 0x44000104, 0x9400004c, ++ 0x24000102, 0x84000030, 0x64000000, 0xb40000b5, ++ 0x24000102, 0x64000069, 0x44000103, 0x9400010e, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa4000026, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000040, ++ 0x24000102, 0x84000004, 0x6400010a, 0xb400008e, ++ 0x24000102, 0x64000072, 0x44000103, 0x9400001f, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000bd, ++ 0x24000102, 0x74000063, 0x44000104, 0x9400006b, ++ 0x24000102, 0x84000042, 0x64000061, 0xb40000dd, ++ 0x24000102, 0x64000065, 0x44000103, 0x840000ff, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa4000014, ++ 0x24000102, 0x64000073, 0x44000104, 0x9400003a, ++ 0x24000102, 0x84000001, 0x64000109, 0xb400005d, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000c, ++ 0x24000102, 0x74000074, 0x54000105, 0xa400008b, ++ 0x24000102, 0x7400002c, 0x44000104, 0x94000050, ++ 0x24000102, 0x84000032, 0x64000000, 0xb40000cb, ++ 0x24000102, 0x64000069, 0x44000103, 0xa4000114, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa4000051, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000046, ++ 0x24000102, 0x8400002d, 0x6400010a, 0xb40000a1, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000029, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000f1, ++ 0x24000102, 0x74000064, 0x44000104, 0x940000c0, ++ 0x24000102, 0x84000066, 0x64000061, 0xb40000f4, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000079, ++ 0x24000102, 0x74000068, 0x54000020, 0xb4000116, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000038, ++ 0x24000102, 0x74000108, 0x64000109, 0xb4000015, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000009, ++ 0x24000102, 0x74000070, 0x54000105, 0xa400007a, ++ 0x24000102, 0x7400000a, 0x44000104, 0x9400004e, ++ 0x24000102, 0x84000031, 0x64000000, 0xb40000be, ++ 0x24000102, 0x64000069, 0x44000103, 0x94000110, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa400003c, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000044, ++ 0x24000102, 0x8400000d, 0x6400010a, 0xb4000098, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000027, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000d0, ++ 0x24000102, 0x74000063, 0x44000104, 0x94000078, ++ 0x24000102, 0x84000062, 0x64000061, 0xb40000e9, ++ 0x24000102, 0x64000065, 0x44000103, 0x9400010c, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa4000021, ++ 0x24000102, 0x64000073, 0x44000104, 0x9400003e, ++ 0x24000102, 0x84000003, 0x64000109, 0xb4000085, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000f, ++ 0x24000102, 0x74000074, 0x54000105, 0xa40000a4, ++ 0x24000102, 0x7400002c, 0x44000104, 0x94000053, ++ 0x24000102, 0x84000041, 0x64000000, 0xb40000d5, ++ 0x24000102, 0x64000069, 0x44000103, 0x94000005, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa4000058, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000048, ++ 0x24000102, 0x8400002e, 0x6400010a, 0xb40000ab, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000034, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000fc, ++ 0x24000102, 0x74000064, 0x44000104, 0x940000c3, ++ 0x24000102, 0x84000067, 0x64000061, 0x46000160, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000077, ++ 0x24000102, 0x74000068, 0x54000020, 0x940000e0, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000035, ++ 0x24000102, 0x74000108, 0x64000109, 0xa40000fd, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000006, ++ 0x24000102, 0x74000070, 0x54000105, 0xa4000059, ++ 0x24000102, 0x7400000a, 0x44000104, 0x94000049, ++ 0x24000102, 0x84000030, 0x64000000, 0xb40000ae, ++ 0x24000102, 0x64000069, 0x44000103, 0x9400010d, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa4000023, ++ 0x24000102, 0x64000106, 0x44000104, 0x9400003f, ++ 0x24000102, 0x84000004, 0x6400010a, 0xb4000088, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000010, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000a6, ++ 0x24000102, 0x74000063, 0x44000104, 0x94000054, ++ 0x24000102, 0x84000042, 0x64000061, 0xb40000d8, ++ 0x24000102, 0x64000065, 0x44000103, 0x840000ff, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa4000011, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000039, ++ 0x24000102, 0x84000001, 0x64000109, 0xb4000019, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000b, ++ 0x24000102, 0x74000074, 0x54000105, 0xa400007c, ++ 0x24000102, 0x7400002c, 0x44000104, 0x9400004f, ++ 0x24000102, 0x84000032, 0x64000000, 0xb40000c5, ++ 0x24000102, 0x64000069, 0x44000103, 0xa4000111, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa400003d, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000045, ++ 0x24000102, 0x8400002d, 0x6400010a, 0xb400009b, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000028, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000e3, ++ 0x24000102, 0x74000064, 0x44000104, 0x94000080, ++ 0x24000102, 0x84000066, 0x64000061, 0xb40000ec, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000079, ++ 0x24000102, 0x74000068, 0x54000020, 0x940000fe, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000037, ++ 0x24000102, 0x74000108, 0x64000109, 0x3600000e, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000008, ++ 0x24000102, 0x74000070, 0x54000105, 0xa4000060, ++ 0x24000102, 0x7400000a, 0x44000104, 0x9400004d, ++ 0x24000102, 0x84000031, 0x64000000, 0xb40000b8, ++ 0x24000102, 0x64000069, 0x44000103, 0x9400010f, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa400002a, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000043, ++ 0x24000102, 0x8400000d, 0x6400010a, 0xb4000093, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000022, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000c1, ++ 0x24000102, 0x74000063, 0x44000104, 0x94000076, ++ 0x24000102, 0x84000062, 0x64000061, 0xb40000e1, ++ 0x24000102, 0x64000065, 0x44000103, 0x9400010b, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa4000018, ++ 0x24000102, 0x64000073, 0x44000104, 0x9400003b, ++ 0x24000102, 0x84000003, 0x64000109, 0xb400007d, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000e, ++ 0x24000102, 0x74000074, 0x54000105, 0xa400008f, ++ 0x24000102, 0x7400002c, 0x44000104, 0x94000052, ++ 0x24000102, 0x84000041, 0x64000000, 0xb40000ce, ++ 0x24000102, 0x64000069, 0x44000103, 0x94000002, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa4000055, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000047, ++ 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a5, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000033, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000f3, ++ 0x24000102, 0x74000064, 0x44000104, 0x940000c2, ++ 0x24000102, 0x84000067, 0x64000061, 0xb40000f7, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000077, ++ 0x24000102, 0x74000068, 0x54000020, 0x940000f0, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000036, ++ 0x24000102, 0x74000108, 0x64000109, 0xc4000120, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000007, ++ 0x24000102, 0x74000070, 0x54000105, 0xa400005c, ++ 0x24000102, 0x7400000a, 0x44000104, 0x9400004c, ++ 0x24000102, 0x84000030, 0x64000000, 0xb40000b3, ++ 0x24000102, 0x64000069, 0x44000103, 0x9400010e, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa4000025, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000040, ++ 0x24000102, 0x84000004, 0x6400010a, 0xb400008d, ++ 0x24000102, 0x64000072, 0x44000103, 0x9400001f, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000b4, ++ 0x24000102, 0x74000063, 0x44000104, 0x9400006b, ++ 0x24000102, 0x84000042, 0x64000061, 0xb40000dc, ++ 0x24000102, 0x64000065, 0x44000103, 0x840000ff, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa4000013, ++ 0x24000102, 0x64000073, 0x44000104, 0x9400003a, ++ 0x24000102, 0x84000001, 0x64000109, 0xb400005a, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000c, ++ 0x24000102, 0x74000074, 0x54000105, 0xa400007f, ++ 0x24000102, 0x7400002c, 0x44000104, 0x94000050, ++ 0x24000102, 0x84000032, 0x64000000, 0xb40000ca, ++ 0x24000102, 0x64000069, 0x44000103, 0xa4000112, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa400004b, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000046, ++ 0x24000102, 0x8400002d, 0x6400010a, 0xb400009f, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000029, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000e8, ++ 0x24000102, 0x74000064, 0x44000104, 0x940000c0, ++ 0x24000102, 0x84000066, 0x64000061, 0xb40000f2, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000079, ++ 0x24000102, 0x74000068, 0x54000020, 0xb4000117, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000038, ++ 0x24000102, 0x74000108, 0x64000109, 0x3600001e, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000009, ++ 0x24000102, 0x74000070, 0x54000105, 0xa4000071, ++ 0x24000102, 0x7400000a, 0x44000104, 0x9400004e, ++ 0x24000102, 0x84000031, 0x64000000, 0xb40000bc, ++ 0x24000102, 0x64000069, 0x44000103, 0x94000110, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa400002f, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000044, ++ 0x24000102, 0x8400000d, 0x6400010a, 0xb4000097, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000027, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000cf, ++ 0x24000102, 0x74000063, 0x44000104, 0x94000078, ++ 0x24000102, 0x84000062, 0x64000061, 0xb40000e6, ++ 0x24000102, 0x64000065, 0x44000103, 0x9400010c, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa400001e, ++ 0x24000102, 0x64000073, 0x44000104, 0x9400003e, ++ 0x24000102, 0x84000003, 0x64000109, 0xb4000084, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000f, ++ 0x24000102, 0x74000074, 0x54000105, 0xa40000a0, ++ 0x24000102, 0x7400002c, 0x44000104, 0x94000053, ++ 0x24000102, 0x84000041, 0x64000000, 0xb40000d4, ++ 0x24000102, 0x64000069, 0x44000103, 0x94000005, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa4000057, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000048, ++ 0x24000102, 0x8400002e, 0x6400010a, 0xb40000aa, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000034, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000f9, ++ 0x24000102, 0x74000064, 0x44000104, 0x940000c3, ++ 0x24000102, 0x84000067, 0x64000061, 0xb4000200, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000077, ++ 0x24000102, 0x74000068, 0x54000020, 0x940000e0, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000035, ++ 0x24000102, 0x74000108, 0x64000109, 0xc400011c, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000006, ++ 0x24000102, 0x74000070, 0x54000105, 0xa400005b, ++ 0x24000102, 0x7400000a, 0x44000104, 0x94000049, ++ 0x24000102, 0x84000030, 0x64000000, 0xb40000b1, ++ 0x24000102, 0x64000069, 0x44000103, 0x9400010d, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa4000024, ++ 0x24000102, 0x64000106, 0x44000104, 0x9400003f, ++ 0x24000102, 0x84000004, 0x6400010a, 0xb400008a, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000010, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000b0, ++ 0x24000102, 0x74000063, 0x44000104, 0x94000054, ++ 0x24000102, 0x84000042, 0x64000061, 0xb40000da, ++ 0x24000102, 0x64000065, 0x44000103, 0x840000ff, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa4000012, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000039, ++ 0x24000102, 0x84000001, 0x64000109, 0xb400001b, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000b, ++ 0x24000102, 0x74000074, 0x54000105, 0xa400007e, ++ 0x24000102, 0x7400002c, 0x44000104, 0x9400004f, ++ 0x24000102, 0x84000032, 0x64000000, 0xb40000c8, ++ 0x24000102, 0x64000069, 0x44000103, 0xa4000113, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa400004a, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000045, ++ 0x24000102, 0x8400002d, 0x6400010a, 0xb400009d, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000028, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000e7, ++ 0x24000102, 0x74000064, 0x44000104, 0x94000080, ++ 0x24000102, 0x84000066, 0x64000061, 0xb40000ee, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000079, ++ 0x24000102, 0x74000068, 0x54000020, 0x940000fe, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000037, ++ 0x24000102, 0x74000108, 0x64000109, 0x36000016, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000008, ++ 0x24000102, 0x74000070, 0x54000105, 0xa400006a, ++ 0x24000102, 0x7400000a, 0x44000104, 0x9400004d, ++ 0x24000102, 0x84000031, 0x64000000, 0xb40000ba, ++ 0x24000102, 0x64000069, 0x44000103, 0x9400010f, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa400002b, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000043, ++ 0x24000102, 0x8400000d, 0x6400010a, 0xb4000095, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000022, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000c7, ++ 0x24000102, 0x74000063, 0x44000104, 0x94000076, ++ 0x24000102, 0x84000062, 0x64000061, 0xb40000e4, ++ 0x24000102, 0x64000065, 0x44000103, 0x9400010b, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa400001c, ++ 0x24000102, 0x64000073, 0x44000104, 0x9400003b, ++ 0x24000102, 0x84000003, 0x64000109, 0xb4000082, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000e, ++ 0x24000102, 0x74000074, 0x54000105, 0xa4000090, ++ 0x24000102, 0x7400002c, 0x44000104, 0x94000052, ++ 0x24000102, 0x84000041, 0x64000000, 0xb40000d2, ++ 0x24000102, 0x64000069, 0x44000103, 0x94000002, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa4000056, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000047, ++ 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a8, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000033, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000f8, ++ 0x24000102, 0x74000064, 0x44000104, 0x940000c2, ++ 0x24000102, 0x84000067, 0x64000061, 0xb40000fb, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000077, ++ 0x24000102, 0x74000068, 0x54000020, 0x940000f0, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000036, ++ 0x24000102, 0x74000108, 0x64000109, 0x36000006, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000007, ++ 0x24000102, 0x74000070, 0x54000105, 0xa400005f, ++ 0x24000102, 0x7400000a, 0x44000104, 0x9400004c, ++ 0x24000102, 0x84000030, 0x64000000, 0xb40000b6, ++ 0x24000102, 0x64000069, 0x44000103, 0x9400010e, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa4000026, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000040, ++ 0x24000102, 0x84000004, 0x6400010a, 0xb4000091, ++ 0x24000102, 0x64000072, 0x44000103, 0x9400001f, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000bd, ++ 0x24000102, 0x74000063, 0x44000104, 0x9400006b, ++ 0x24000102, 0x84000042, 0x64000061, 0xb40000de, ++ 0x24000102, 0x64000065, 0x44000103, 0x840000ff, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa4000014, ++ 0x24000102, 0x64000073, 0x44000104, 0x9400003a, ++ 0x24000102, 0x84000001, 0x64000109, 0xb400005e, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000c, ++ 0x24000102, 0x74000074, 0x54000105, 0xa400008b, ++ 0x24000102, 0x7400002c, 0x44000104, 0x94000050, ++ 0x24000102, 0x84000032, 0x64000000, 0xb40000cc, ++ 0x24000102, 0x64000069, 0x44000103, 0xa4000114, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa4000051, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000046, ++ 0x24000102, 0x8400002d, 0x6400010a, 0xb40000a2, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000029, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000f1, ++ 0x24000102, 0x74000064, 0x44000104, 0x940000c0, ++ 0x24000102, 0x84000066, 0x64000061, 0xb40000f5, ++ 0x24000102, 0x64000065, 0x44000103, 0x84000079, ++ 0x24000102, 0x74000068, 0x54000020, 0xb4000118, ++ 0x24000102, 0x64000073, 0x44000104, 0x94000038, ++ 0x24000102, 0x74000108, 0x64000109, 0xb4000016, ++ 0x24000102, 0x6400006f, 0x44000103, 0x94000009, ++ 0x24000102, 0x74000070, 0x54000105, 0xa400007a, ++ 0x24000102, 0x7400000a, 0x44000104, 0x9400004e, ++ 0x24000102, 0x84000031, 0x64000000, 0xb40000bf, ++ 0x24000102, 0x64000069, 0x44000103, 0x94000110, ++ 0x24000102, 0x7400006d, 0x54000020, 0xa400003c, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000044, ++ 0x24000102, 0x8400000d, 0x6400010a, 0xb4000099, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000027, ++ 0x24000102, 0x74000075, 0x54000105, 0xa40000d0, ++ 0x24000102, 0x74000063, 0x44000104, 0x94000078, ++ 0x24000102, 0x84000062, 0x64000061, 0xb40000ea, ++ 0x24000102, 0x64000065, 0x44000103, 0x9400010c, ++ 0x24000102, 0x7400006c, 0x54000020, 0xa4000021, ++ 0x24000102, 0x64000073, 0x44000104, 0x9400003e, ++ 0x24000102, 0x84000003, 0x64000109, 0xb4000086, ++ 0x24000102, 0x6400006f, 0x44000103, 0x9400000f, ++ 0x24000102, 0x74000074, 0x54000105, 0xa40000a4, ++ 0x24000102, 0x7400002c, 0x44000104, 0x94000053, ++ 0x24000102, 0x84000041, 0x64000000, 0xb40000d6, ++ 0x24000102, 0x64000069, 0x44000103, 0x94000005, ++ 0x24000102, 0x7400006e, 0x54000020, 0xa4000058, ++ 0x24000102, 0x64000106, 0x44000104, 0x94000048, ++ 0x24000102, 0x8400002e, 0x6400010a, 0xb40000ac, ++ 0x24000102, 0x64000072, 0x44000103, 0x94000034, ++ 0x24000102, 0x74000107, 0x54000105, 0xa40000fc, ++ 0x24000102, 0x74000064, 0x44000104, 0x940000c3, ++ 0x24000102, 0x84000067, 0x64000061, 0x52000020 }, ++ ++ .long_code_lookup = { ++ 0x3521, 0x3525, 0x3522, 0x3526, 0x3523, 0x3527, 0x3524, 0x3528, ++ 0x3529, 0x352d, 0x352a, 0x352e, 0x352b, 0x352f, 0x352c, 0x3530, ++ 0x3531, 0x3535, 0x3532, 0x3536, 0x3533, 0x3537, 0x3534, 0x3538, ++ 0x3539, 0x353d, 0x353a, 0x353e, 0x353b, 0x353f, 0x353c, 0x3540, ++ 0x49a1, 0x3d00, 0x49a2, 0x51c1, 0x49a3, 0x3d01, 0x49a4, 0x51e1, ++ 0x49a5, 0x3d00, 0x49a6, 0x51c2, 0x49a7, 0x3d01, 0x49a8, 0x51e2, ++ 0x49a9, 0x3d00, 0x49aa, 0x51c3, 0x49ab, 0x3d01, 0x49ac, 0x51e3, ++ 0x49ad, 0x3d00, 0x49ae, 0x51c4, 0x49af, 0x3d01, 0x49b0, 0x51e4, ++ 0x49b1, 0x3d00, 0x49b2, 0x51c5, 0x49b3, 0x3d01, 0x49b4, 0x51e5, ++ 0x49b5, 0x3d00, 0x49b6, 0x51c6, 0x49b7, 0x3d01, 0x49b8, 0x51e6, ++ 0x49b9, 0x3d00, 0x49ba, 0x51c7, 0x49bb, 0x3d01, 0x49bc, 0x51e7, ++ 0x49bd, 0x3d00, 0x49be, 0x51c8, 0x49bf, 0x3d01, 0x49c0, 0x51e8, ++ 0x49a1, 0x3d00, 0x49a2, 0x51c9, 0x49a3, 0x3d01, 0x49a4, 0x51e9, ++ 0x49a5, 0x3d00, 0x49a6, 0x51ca, 0x49a7, 0x3d01, 0x49a8, 0x51ea, ++ 0x49a9, 0x3d00, 0x49aa, 0x51cb, 0x49ab, 0x3d01, 0x49ac, 0x51eb, ++ 0x49ad, 0x3d00, 0x49ae, 0x51cc, 0x49af, 0x3d01, 0x49b0, 0x51ec, ++ 0x49b1, 0x3d00, 0x49b2, 0x51cd, 0x49b3, 0x3d01, 0x49b4, 0x51ed, ++ 0x49b5, 0x3d00, 0x49b6, 0x51ce, 0x49b7, 0x3d01, 0x49b8, 0x51ee, ++ 0x49b9, 0x3d00, 0x49ba, 0x51cf, 0x49bb, 0x3d01, 0x49bc, 0x51ef, ++ 0x49bd, 0x3d00, 0x49be, 0x51d0, 0x49bf, 0x3d01, 0x49c0, 0x51f0, ++ 0x49a1, 0x3d00, 0x49a2, 0x51d1, 0x49a3, 0x3d01, 0x49a4, 0x51f1, ++ 0x49a5, 0x3d00, 0x49a6, 0x51d2, 0x49a7, 0x3d01, 0x49a8, 0x51f2, ++ 0x49a9, 0x3d00, 0x49aa, 0x51d3, 0x49ab, 0x3d01, 0x49ac, 0x51f3, ++ 0x49ad, 0x3d00, 0x49ae, 0x51d4, 0x49af, 0x3d01, 0x49b0, 0x51f4, ++ 0x49b1, 0x3d00, 0x49b2, 0x51d5, 0x49b3, 0x3d01, 0x49b4, 0x51f5, ++ 0x49b5, 0x3d00, 0x49b6, 0x51d6, 0x49b7, 0x3d01, 0x49b8, 0x51f6, ++ 0x49b9, 0x3d00, 0x49ba, 0x51d7, 0x49bb, 0x3d01, 0x49bc, 0x51f7, ++ 0x49bd, 0x3d00, 0x49be, 0x51d8, 0x49bf, 0x3d01, 0x49c0, 0x51f8, ++ 0x49a1, 0x3d00, 0x49a2, 0x51d9, 0x49a3, 0x3d01, 0x49a4, 0x51f9, ++ 0x49a5, 0x3d00, 0x49a6, 0x51da, 0x49a7, 0x3d01, 0x49a8, 0x51fa, ++ 0x49a9, 0x3d00, 0x49aa, 0x51db, 0x49ab, 0x3d01, 0x49ac, 0x51fb, ++ 0x49ad, 0x3d00, 0x49ae, 0x51dc, 0x49af, 0x3d01, 0x49b0, 0x51fc, ++ 0x49b1, 0x3d00, 0x49b2, 0x51dd, 0x49b3, 0x3d01, 0x49b4, 0x51fd, ++ 0x49b5, 0x3d00, 0x49b6, 0x51de, 0x49b7, 0x3d01, 0x49b8, 0x51fe, ++ 0x49b9, 0x3d00, 0x49ba, 0x51df, 0x49bb, 0x3d01, 0x49bc, 0x51ff, ++ 0x49bd, 0x3d00, 0x49be, 0x51e0, 0x49bf, 0x3d01, 0x49c0, 0x5200, ++ 0x3d41, 0x3d43, 0x3d45, 0x3d47, 0x3d49, 0x3d4b, 0x3d4d, 0x3d4f, ++ 0x3d42, 0x3d44, 0x3d46, 0x3d48, 0x3d4a, 0x3d4c, 0x3d4e, 0x3d50, ++ 0x4151, 0x4152, 0x4153, 0x4154, 0x4155, 0x4156, 0x4157, 0x4158, ++ 0x4159, 0x415a, 0x415b, 0x415c, 0x415d, 0x415e, 0x415f, 0x4160, ++ 0x4561, 0x4571, 0x4562, 0x4572, 0x4563, 0x4573, 0x4564, 0x4574, ++ 0x4565, 0x4575, 0x4566, 0x4576, 0x4567, 0x4577, 0x4568, 0x4578, ++ 0x4569, 0x4579, 0x456a, 0x457a, 0x456b, 0x457b, 0x456c, 0x457c, ++ 0x456d, 0x457d, 0x456e, 0x457e, 0x456f, 0x457f, 0x4570, 0x4580, ++ 0x4581, 0x4582, 0x4583, 0x4584, 0x4585, 0x4586, 0x4587, 0x4588, ++ 0x4589, 0x458a, 0x458b, 0x458c, 0x458d, 0x458e, 0x458f, 0x4590, ++ 0x4591, 0x4592, 0x4593, 0x4594, 0x4595, 0x4596, 0x4597, 0x4598, ++ 0x4599, 0x459a, 0x459b, 0x459c, 0x459d, 0x459e, 0x459f, 0x45a0, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 } ++}; ++ ++struct inflate_huff_code_small pregen_dist_huff_code = { ++ .short_code_lookup = { ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4000, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4024, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4002, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4801, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4000, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4024, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4002, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4803, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4000, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4024, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4002, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4801, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4000, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4024, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4002, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846, ++ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, ++ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, ++ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, ++ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4803 }, ++ ++ .long_code_lookup = { ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 } ++}; ++ +diff --git a/src/isa-l/tools/test_extended.sh b/src/isa-l/tools/test_extended.sh +index 4527416..a5f1e96 100755 +--- a/src/isa-l/tools/test_extended.sh ++++ b/src/isa-l/tools/test_extended.sh +@@ -158,11 +158,9 @@ msg+=$'Examples run: Pass\n' + # Test custom hufftables + test_start "generate_custom_hufftables" + ./generate_custom_hufftables $in_file ++$MAKE -f Makefile.unx clean + $MAKE -f Makefile.unx -j $cpus D="NO_STATIC_INFLATE_H" checks + ./igzip_rand_test $in_file +-./generate_static_inflate +-diff -q static_inflate.h igzip/static_inflate.h +-rm -rf static_inflate.h + rm -rf hufftables_c.c + test_end "generate_custom_hufftables" $? + +-- +2.20.1.windows.1 + + +From 63dffab948cf8918d085e9cd7c1ab2127acd534e Mon Sep 17 00:00:00 2001 +From: Greg Tucker +Date: Tue, 20 Oct 2020 09:55:40 -0700 +Subject: [PATCH 28/42] igzip: Change pre-gen inflate table to multi-symbol + +Change-Id: I4b0dac1e5aa2796be17644b893e3b6c7aed05876 +Signed-off-by: Greg Tucker +--- + igzip/generate_static_inflate.c | 21 +- + igzip/static_inflate.h | 1190 +++++++++++++++---------------- + 2 files changed, 610 insertions(+), 601 deletions(-) + +diff --git a/src/isa-l/igzip/generate_static_inflate.c b/src/isa-l/igzip/generate_static_inflate.c +index 4bf9a6a..3917677 100644 +--- a/src/isa-l/igzip/generate_static_inflate.c ++++ b/src/isa-l/igzip/generate_static_inflate.c +@@ -35,6 +35,7 @@ + #include "igzip_lib.h" + + #define STATIC_INFLATE_FILE "static_inflate.h" ++#define DOUBLE_SYM_THRESH (4 * 1024) + + extern struct isal_hufftables hufftables_default; + +@@ -118,12 +119,18 @@ int main(int argc, char *argv[]) + struct inflate_state state; + FILE *file; + uint8_t static_deflate_hdr = 3; +- uint8_t tmp_space[8]; ++ uint8_t tmp_space[8], *in_buf; ++ ++ if (NULL == (in_buf = malloc(DOUBLE_SYM_THRESH + 1))) { ++ printf("Can not allocote memory\n"); ++ return 1; ++ } + + isal_inflate_init(&state); + +- state.next_in = &static_deflate_hdr; +- state.avail_in = sizeof(static_deflate_hdr); ++ memcpy(in_buf, &static_deflate_hdr, sizeof(static_deflate_hdr)); ++ state.next_in = in_buf; ++ state.avail_in = DOUBLE_SYM_THRESH + 1; + state.next_out = tmp_space; + state.avail_out = sizeof(tmp_space); + +@@ -165,8 +172,10 @@ int main(int argc, char *argv[]) + + isal_inflate_init(&state); + +- state.next_in = (uint8_t *) & hufftables_default.deflate_hdr; +- state.avail_in = sizeof(hufftables_default.deflate_hdr); ++ memcpy(in_buf, &hufftables_default.deflate_hdr, ++ sizeof(hufftables_default.deflate_hdr)); ++ state.next_in = in_buf; ++ state.avail_in = DOUBLE_SYM_THRESH + 1; + state.next_out = tmp_space; + state.avail_out = sizeof(tmp_space); + +@@ -191,6 +200,6 @@ int main(int argc, char *argv[]) + fprintf(file, "};\n\n"); + + fclose(file); +- ++ free(in_buf); + return 0; + } +diff --git a/src/isa-l/igzip/static_inflate.h b/src/isa-l/igzip/static_inflate.h +index eb6fd1b..6ed8d30 100644 +--- a/src/isa-l/igzip/static_inflate.h ++++ b/src/isa-l/igzip/static_inflate.h +@@ -1346,376 +1346,376 @@ struct inflate_huff_code_small static_dist_huff_code = { + #endif + struct inflate_huff_code_large pregen_lit_huff_code = { + .short_code_lookup = { +- 0x24000102, 0x64000065, 0x44000103, 0x84000077, +- 0x24000102, 0x74000068, 0x54000020, 0x940000e0, +- 0x24000102, 0x64000073, 0x44000104, 0x94000035, +- 0x24000102, 0x74000108, 0x64000109, 0xa40000fd, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000006, +- 0x24000102, 0x74000070, 0x54000105, 0xa4000059, +- 0x24000102, 0x7400000a, 0x44000104, 0x94000049, +- 0x24000102, 0x84000030, 0x64000000, 0xb40000ad, +- 0x24000102, 0x64000069, 0x44000103, 0x9400010d, +- 0x24000102, 0x7400006d, 0x54000020, 0xa4000023, +- 0x24000102, 0x64000106, 0x44000104, 0x9400003f, +- 0x24000102, 0x84000004, 0x6400010a, 0xb4000087, +- 0x24000102, 0x64000072, 0x44000103, 0x94000010, +- 0x24000102, 0x74000075, 0x54000105, 0xa40000a6, +- 0x24000102, 0x74000063, 0x44000104, 0x94000054, +- 0x24000102, 0x84000042, 0x64000061, 0xb40000d7, +- 0x24000102, 0x64000065, 0x44000103, 0x840000ff, +- 0x24000102, 0x7400006c, 0x54000020, 0xa4000011, +- 0x24000102, 0x64000073, 0x44000104, 0x94000039, +- 0x24000102, 0x84000001, 0x64000109, 0xb4000017, +- 0x24000102, 0x6400006f, 0x44000103, 0x9400000b, +- 0x24000102, 0x74000074, 0x54000105, 0xa400007c, +- 0x24000102, 0x7400002c, 0x44000104, 0x9400004f, +- 0x24000102, 0x84000032, 0x64000000, 0xb40000c4, +- 0x24000102, 0x64000069, 0x44000103, 0xa4000111, +- 0x24000102, 0x7400006e, 0x54000020, 0xa400003d, +- 0x24000102, 0x64000106, 0x44000104, 0x94000045, +- 0x24000102, 0x8400002d, 0x6400010a, 0xb400009a, +- 0x24000102, 0x64000072, 0x44000103, 0x94000028, +- 0x24000102, 0x74000107, 0x54000105, 0xa40000e3, +- 0x24000102, 0x74000064, 0x44000104, 0x94000080, +- 0x24000102, 0x84000066, 0x64000061, 0xb40000eb, +- 0x24000102, 0x64000065, 0x44000103, 0x84000079, +- 0x24000102, 0x74000068, 0x54000020, 0x940000fe, +- 0x24000102, 0x64000073, 0x44000104, 0x94000037, ++ 0x24000102, 0x88010265, 0x44000103, 0xa8010277, ++ 0x24000102, 0x98010268, 0x78010220, 0xb80102e0, ++ 0x24000102, 0x88010273, 0x44000104, 0xb8010235, ++ 0x24000102, 0x74000108, 0x64000109, 0xc80102fd, ++ 0x24000102, 0x8801026f, 0x44000103, 0xb8010206, ++ 0x24000102, 0x98010270, 0x54000105, 0xc8010259, ++ 0x24000102, 0x9801020a, 0x44000104, 0xb8010249, ++ 0x24000102, 0xa8010230, 0x88010200, 0xb40000ad, ++ 0x24000102, 0x88010269, 0x44000103, 0x9400010d, ++ 0x24000102, 0x9801026d, 0xb8006520, 0xc8010223, ++ 0x24000102, 0x64000106, 0x44000104, 0xb801023f, ++ 0x24000102, 0xa8010204, 0x6400010a, 0xb4000087, ++ 0x24000102, 0x88010272, 0x44000103, 0xb8010210, ++ 0x24000102, 0x98010275, 0x54000105, 0xc80102a6, ++ 0x24000102, 0x98010263, 0x44000104, 0xb8010254, ++ 0x24000102, 0xa8010242, 0x88010261, 0xb40000d7, ++ 0x24000102, 0xc8006565, 0x44000103, 0xa80102ff, ++ 0x24000102, 0x9801026c, 0x98010320, 0xc8010211, ++ 0x24000102, 0xc8006573, 0x44000104, 0xb8010239, ++ 0x24000102, 0xa8010201, 0x64000109, 0xb4000017, ++ 0x24000102, 0xc800656f, 0x44000103, 0xb801020b, ++ 0x24000102, 0x98010274, 0x54000105, 0xc801027c, ++ 0x24000102, 0x9801022c, 0x44000104, 0xb801024f, ++ 0x24000102, 0xa8010232, 0xc8006500, 0xb40000c4, ++ 0x24000102, 0xc8006569, 0x44000103, 0xa4000111, ++ 0x24000102, 0x9801026e, 0x54000020, 0xc801023d, ++ 0x24000102, 0x64000106, 0x44000104, 0xb8010245, ++ 0x24000102, 0xa801022d, 0x6400010a, 0xb400009a, ++ 0x24000102, 0xc8006572, 0x44000103, 0xb8010228, ++ 0x24000102, 0x74000107, 0x54000105, 0xc80102e3, ++ 0x24000102, 0x98010264, 0x44000104, 0xb8010280, ++ 0x24000102, 0xa8010266, 0xc8006561, 0xb40000eb, ++ 0x24000102, 0xa8010365, 0x44000103, 0xa8010279, ++ 0x24000102, 0x74000068, 0x78010220, 0xb80102fe, ++ 0x24000102, 0xa8010373, 0x44000104, 0xb8010237, + 0x24000102, 0x74000108, 0x64000109, 0x36000008, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000008, +- 0x24000102, 0x74000070, 0x54000105, 0xa4000060, +- 0x24000102, 0x7400000a, 0x44000104, 0x9400004d, +- 0x24000102, 0x84000031, 0x64000000, 0xb40000b7, +- 0x24000102, 0x64000069, 0x44000103, 0x9400010f, +- 0x24000102, 0x7400006d, 0x54000020, 0xa400002a, +- 0x24000102, 0x64000106, 0x44000104, 0x94000043, +- 0x24000102, 0x8400000d, 0x6400010a, 0xb4000092, +- 0x24000102, 0x64000072, 0x44000103, 0x94000022, +- 0x24000102, 0x74000075, 0x54000105, 0xa40000c1, +- 0x24000102, 0x74000063, 0x44000104, 0x94000076, +- 0x24000102, 0x84000062, 0x64000061, 0xb40000df, ++ 0x24000102, 0xa801036f, 0x44000103, 0xb8010208, ++ 0x24000102, 0x74000070, 0x54000105, 0xc8010260, ++ 0x24000102, 0x7400000a, 0x44000104, 0xb801024d, ++ 0x24000102, 0xa8010231, 0xa8010300, 0xb40000b7, ++ 0x24000102, 0xa8010369, 0x44000103, 0x9400010f, ++ 0x24000102, 0x7400006d, 0xc8006820, 0xc801022a, ++ 0x24000102, 0x64000106, 0x44000104, 0xb8010243, ++ 0x24000102, 0xa801020d, 0x6400010a, 0xb4000092, ++ 0x24000102, 0xa8010372, 0x44000103, 0xb8010222, ++ 0x24000102, 0x74000075, 0x54000105, 0xc80102c1, ++ 0x24000102, 0x74000063, 0x44000104, 0xb8010276, ++ 0x24000102, 0xa8010262, 0xa8010361, 0xb40000df, + 0x24000102, 0x64000065, 0x44000103, 0x9400010b, +- 0x24000102, 0x7400006c, 0x54000020, 0xa4000018, +- 0x24000102, 0x64000073, 0x44000104, 0x9400003b, +- 0x24000102, 0x84000003, 0x64000109, 0xb400007b, +- 0x24000102, 0x6400006f, 0x44000103, 0x9400000e, +- 0x24000102, 0x74000074, 0x54000105, 0xa400008f, +- 0x24000102, 0x7400002c, 0x44000104, 0x94000052, +- 0x24000102, 0x84000041, 0x64000000, 0xb40000cd, +- 0x24000102, 0x64000069, 0x44000103, 0x94000002, +- 0x24000102, 0x7400006e, 0x54000020, 0xa4000055, +- 0x24000102, 0x64000106, 0x44000104, 0x94000047, +- 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a3, +- 0x24000102, 0x64000072, 0x44000103, 0x94000033, +- 0x24000102, 0x74000107, 0x54000105, 0xa40000f3, +- 0x24000102, 0x74000064, 0x44000104, 0x940000c2, +- 0x24000102, 0x84000067, 0x64000061, 0xb40000f6, +- 0x24000102, 0x64000065, 0x44000103, 0x84000077, +- 0x24000102, 0x74000068, 0x54000020, 0x940000f0, +- 0x24000102, 0x64000073, 0x44000104, 0x94000036, ++ 0x24000102, 0x7400006c, 0xa8002020, 0xc8010218, ++ 0x24000102, 0x64000073, 0x44000104, 0xb801023b, ++ 0x24000102, 0xa8010203, 0x64000109, 0xb400007b, ++ 0x24000102, 0x6400006f, 0x44000103, 0xb801020e, ++ 0x24000102, 0x74000074, 0x54000105, 0xc801028f, ++ 0x24000102, 0x7400002c, 0x44000104, 0xb8010252, ++ 0x24000102, 0xa8010241, 0x64000000, 0xb40000cd, ++ 0x24000102, 0x64000069, 0x44000103, 0xb8010202, ++ 0x24000102, 0x7400006e, 0x54000020, 0xc8010255, ++ 0x24000102, 0x64000106, 0x44000104, 0xb8010247, ++ 0x24000102, 0xa801022e, 0x6400010a, 0xb40000a3, ++ 0x24000102, 0x64000072, 0x44000103, 0xb8010233, ++ 0x24000102, 0x74000107, 0x54000105, 0xc80102f3, ++ 0x24000102, 0x74000064, 0x44000104, 0xb80102c2, ++ 0x24000102, 0xa8010267, 0x64000061, 0xb40000f6, ++ 0x24000102, 0x88010265, 0x44000103, 0x84000077, ++ 0x24000102, 0xb8010368, 0x78010220, 0xb80102f0, ++ 0x24000102, 0x88010273, 0x44000104, 0xb8010236, + 0x24000102, 0x74000108, 0x64000109, 0xc400011d, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000007, +- 0x24000102, 0x74000070, 0x54000105, 0xa400005c, +- 0x24000102, 0x7400000a, 0x44000104, 0x9400004c, +- 0x24000102, 0x84000030, 0x64000000, 0xb40000b2, +- 0x24000102, 0x64000069, 0x44000103, 0x9400010e, +- 0x24000102, 0x7400006d, 0x54000020, 0xa4000025, +- 0x24000102, 0x64000106, 0x44000104, 0x94000040, ++ 0x24000102, 0x8801026f, 0x44000103, 0xb8010207, ++ 0x24000102, 0xb8010370, 0x54000105, 0xc801025c, ++ 0x24000102, 0xb801030a, 0x44000104, 0xb801024c, ++ 0x24000102, 0x84000030, 0x88010200, 0xb40000b2, ++ 0x24000102, 0x88010269, 0x44000103, 0x9400010e, ++ 0x24000102, 0xb801036d, 0xb8007320, 0xc8010225, ++ 0x24000102, 0x64000106, 0x44000104, 0xb8010240, + 0x24000102, 0x84000004, 0x6400010a, 0xb400008c, +- 0x24000102, 0x64000072, 0x44000103, 0x9400001f, +- 0x24000102, 0x74000075, 0x54000105, 0xa40000b4, +- 0x24000102, 0x74000063, 0x44000104, 0x9400006b, +- 0x24000102, 0x84000042, 0x64000061, 0xb40000db, ++ 0x24000102, 0x88010272, 0x44000103, 0xb801021f, ++ 0x24000102, 0xb8010375, 0x54000105, 0xc80102b4, ++ 0x24000102, 0xb8010363, 0x44000104, 0xb801026b, ++ 0x24000102, 0x84000042, 0x88010261, 0xb40000db, + 0x24000102, 0x64000065, 0x44000103, 0x840000ff, +- 0x24000102, 0x7400006c, 0x54000020, 0xa4000013, +- 0x24000102, 0x64000073, 0x44000104, 0x9400003a, ++ 0x24000102, 0xb801036c, 0x98010420, 0xc8010213, ++ 0x24000102, 0x64000073, 0x44000104, 0xb801023a, + 0x24000102, 0x84000001, 0x64000109, 0xb400001d, +- 0x24000102, 0x6400006f, 0x44000103, 0x9400000c, +- 0x24000102, 0x74000074, 0x54000105, 0xa400007f, +- 0x24000102, 0x7400002c, 0x44000104, 0x94000050, ++ 0x24000102, 0x6400006f, 0x44000103, 0xb801020c, ++ 0x24000102, 0xb8010374, 0x54000105, 0xc801027f, ++ 0x24000102, 0xb801032c, 0x44000104, 0xb8010250, + 0x24000102, 0x84000032, 0x64000000, 0xb40000c9, + 0x24000102, 0x64000069, 0x44000103, 0xa4000112, +- 0x24000102, 0x7400006e, 0x54000020, 0xa400004b, +- 0x24000102, 0x64000106, 0x44000104, 0x94000046, ++ 0x24000102, 0xb801036e, 0x54000020, 0xc801024b, ++ 0x24000102, 0x64000106, 0x44000104, 0xb8010246, + 0x24000102, 0x8400002d, 0x6400010a, 0xb400009e, +- 0x24000102, 0x64000072, 0x44000103, 0x94000029, +- 0x24000102, 0x74000107, 0x54000105, 0xa40000e8, +- 0x24000102, 0x74000064, 0x44000104, 0x940000c0, ++ 0x24000102, 0x64000072, 0x44000103, 0xb8010229, ++ 0x24000102, 0x74000107, 0x54000105, 0xc80102e8, ++ 0x24000102, 0xb8010364, 0x44000104, 0xb80102c0, + 0x24000102, 0x84000066, 0x64000061, 0xb40000ef, +- 0x24000102, 0x64000065, 0x44000103, 0x84000079, +- 0x24000102, 0x74000068, 0x54000020, 0xb4000115, +- 0x24000102, 0x64000073, 0x44000104, 0x94000038, ++ 0x24000102, 0xb8002065, 0x44000103, 0x84000079, ++ 0x24000102, 0x74000068, 0x78010220, 0xb4000115, ++ 0x24000102, 0xb8002073, 0x44000104, 0xb8010238, + 0x24000102, 0x74000108, 0x64000109, 0x36000018, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000009, +- 0x24000102, 0x74000070, 0x54000105, 0xa4000071, +- 0x24000102, 0x7400000a, 0x44000104, 0x9400004e, +- 0x24000102, 0x84000031, 0x64000000, 0xb40000bb, +- 0x24000102, 0x64000069, 0x44000103, 0x94000110, +- 0x24000102, 0x7400006d, 0x54000020, 0xa400002f, +- 0x24000102, 0x64000106, 0x44000104, 0x94000044, ++ 0x24000102, 0xb800206f, 0x44000103, 0xb8010209, ++ 0x24000102, 0x74000070, 0x54000105, 0xc8010271, ++ 0x24000102, 0x7400000a, 0x44000104, 0xb801024e, ++ 0x24000102, 0x84000031, 0xb8002000, 0xb40000bb, ++ 0x24000102, 0xb8002069, 0x44000103, 0x94000110, ++ 0x24000102, 0x7400006d, 0xc8010820, 0xc801022f, ++ 0x24000102, 0x64000106, 0x44000104, 0xb8010244, + 0x24000102, 0x8400000d, 0x6400010a, 0xb4000096, +- 0x24000102, 0x64000072, 0x44000103, 0x94000027, +- 0x24000102, 0x74000075, 0x54000105, 0xa40000cf, +- 0x24000102, 0x74000063, 0x44000104, 0x94000078, +- 0x24000102, 0x84000062, 0x64000061, 0xb40000e5, ++ 0x24000102, 0xb8002072, 0x44000103, 0xb8010227, ++ 0x24000102, 0x74000075, 0x54000105, 0xc80102cf, ++ 0x24000102, 0x74000063, 0x44000104, 0xb8010278, ++ 0x24000102, 0x84000062, 0xb8002061, 0xb40000e5, + 0x24000102, 0x64000065, 0x44000103, 0x9400010c, +- 0x24000102, 0x7400006c, 0x54000020, 0xa400001e, +- 0x24000102, 0x64000073, 0x44000104, 0x9400003e, ++ 0x24000102, 0x7400006c, 0xb8010920, 0xc801021e, ++ 0x24000102, 0x64000073, 0x44000104, 0xb801023e, + 0x24000102, 0x84000003, 0x64000109, 0xb4000083, +- 0x24000102, 0x6400006f, 0x44000103, 0x9400000f, +- 0x24000102, 0x74000074, 0x54000105, 0xa40000a0, +- 0x24000102, 0x7400002c, 0x44000104, 0x94000053, ++ 0x24000102, 0x6400006f, 0x44000103, 0xb801020f, ++ 0x24000102, 0x74000074, 0x54000105, 0xc80102a0, ++ 0x24000102, 0x7400002c, 0x44000104, 0xb8010253, + 0x24000102, 0x84000041, 0x64000000, 0xb40000d3, +- 0x24000102, 0x64000069, 0x44000103, 0x94000005, +- 0x24000102, 0x7400006e, 0x54000020, 0xa4000057, +- 0x24000102, 0x64000106, 0x44000104, 0x94000048, ++ 0x24000102, 0x64000069, 0x44000103, 0xb8010205, ++ 0x24000102, 0x7400006e, 0x54000020, 0xc8010257, ++ 0x24000102, 0x64000106, 0x44000104, 0xb8010248, + 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a9, +- 0x24000102, 0x64000072, 0x44000103, 0x94000034, +- 0x24000102, 0x74000107, 0x54000105, 0xa40000f9, +- 0x24000102, 0x74000064, 0x44000104, 0x940000c3, ++ 0x24000102, 0x64000072, 0x44000103, 0xb8010234, ++ 0x24000102, 0x74000107, 0x54000105, 0xc80102f9, ++ 0x24000102, 0x74000064, 0x44000104, 0xb80102c3, + 0x24000102, 0x84000067, 0x64000061, 0x3e000120, +- 0x24000102, 0x64000065, 0x44000103, 0x84000077, +- 0x24000102, 0x74000068, 0x54000020, 0x940000e0, +- 0x24000102, 0x64000073, 0x44000104, 0x94000035, ++ 0x24000102, 0x88010265, 0x44000103, 0xc8010377, ++ 0x24000102, 0x98010268, 0x78010220, 0x940000e0, ++ 0x24000102, 0x88010273, 0x44000104, 0x94000035, + 0x24000102, 0x74000108, 0x64000109, 0xc4000119, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000006, +- 0x24000102, 0x74000070, 0x54000105, 0xa400005b, +- 0x24000102, 0x7400000a, 0x44000104, 0x94000049, +- 0x24000102, 0x84000030, 0x64000000, 0xb40000af, +- 0x24000102, 0x64000069, 0x44000103, 0x9400010d, +- 0x24000102, 0x7400006d, 0x54000020, 0xa4000024, ++ 0x24000102, 0x8801026f, 0x44000103, 0x94000006, ++ 0x24000102, 0x98010270, 0x54000105, 0xc801025b, ++ 0x24000102, 0x9801020a, 0x44000104, 0x94000049, ++ 0x24000102, 0xc8010330, 0x88010200, 0xb40000af, ++ 0x24000102, 0x88010269, 0x44000103, 0x9400010d, ++ 0x24000102, 0x9801026d, 0xb8006f20, 0xc8010224, + 0x24000102, 0x64000106, 0x44000104, 0x9400003f, +- 0x24000102, 0x84000004, 0x6400010a, 0xb4000089, +- 0x24000102, 0x64000072, 0x44000103, 0x94000010, +- 0x24000102, 0x74000075, 0x54000105, 0xa40000b0, +- 0x24000102, 0x74000063, 0x44000104, 0x94000054, +- 0x24000102, 0x84000042, 0x64000061, 0xb40000d9, +- 0x24000102, 0x64000065, 0x44000103, 0x840000ff, +- 0x24000102, 0x7400006c, 0x54000020, 0xa4000012, +- 0x24000102, 0x64000073, 0x44000104, 0x94000039, +- 0x24000102, 0x84000001, 0x64000109, 0xb400001a, +- 0x24000102, 0x6400006f, 0x44000103, 0x9400000b, +- 0x24000102, 0x74000074, 0x54000105, 0xa400007e, +- 0x24000102, 0x7400002c, 0x44000104, 0x9400004f, +- 0x24000102, 0x84000032, 0x64000000, 0xb40000c6, +- 0x24000102, 0x64000069, 0x44000103, 0xa4000113, +- 0x24000102, 0x7400006e, 0x54000020, 0xa400004a, ++ 0x24000102, 0xc8010304, 0x6400010a, 0xb4000089, ++ 0x24000102, 0x88010272, 0x44000103, 0x94000010, ++ 0x24000102, 0x98010275, 0x54000105, 0xc80102b0, ++ 0x24000102, 0x98010263, 0x44000104, 0x94000054, ++ 0x24000102, 0xc8010342, 0x88010261, 0xb40000d9, ++ 0x24000102, 0xc8007365, 0x44000103, 0xc80103ff, ++ 0x24000102, 0x9801026c, 0x98010320, 0xc8010212, ++ 0x24000102, 0xc8007373, 0x44000104, 0x94000039, ++ 0x24000102, 0xc8010301, 0x64000109, 0xb400001a, ++ 0x24000102, 0xc800736f, 0x44000103, 0x9400000b, ++ 0x24000102, 0x98010274, 0x54000105, 0xc801027e, ++ 0x24000102, 0x9801022c, 0x44000104, 0x9400004f, ++ 0x24000102, 0xc8010332, 0xc8007300, 0xb40000c6, ++ 0x24000102, 0xc8007369, 0x44000103, 0xa4000113, ++ 0x24000102, 0x9801026e, 0x54000020, 0xc801024a, + 0x24000102, 0x64000106, 0x44000104, 0x94000045, +- 0x24000102, 0x8400002d, 0x6400010a, 0xb400009c, +- 0x24000102, 0x64000072, 0x44000103, 0x94000028, +- 0x24000102, 0x74000107, 0x54000105, 0xa40000e7, +- 0x24000102, 0x74000064, 0x44000104, 0x94000080, +- 0x24000102, 0x84000066, 0x64000061, 0xb40000ed, +- 0x24000102, 0x64000065, 0x44000103, 0x84000079, +- 0x24000102, 0x74000068, 0x54000020, 0x940000fe, +- 0x24000102, 0x64000073, 0x44000104, 0x94000037, ++ 0x24000102, 0xc801032d, 0x6400010a, 0xb400009c, ++ 0x24000102, 0xc8007372, 0x44000103, 0x94000028, ++ 0x24000102, 0x74000107, 0x54000105, 0xc80102e7, ++ 0x24000102, 0x98010264, 0x44000104, 0x94000080, ++ 0x24000102, 0xc8010366, 0xc8007361, 0xb40000ed, ++ 0x24000102, 0xa8010465, 0x44000103, 0xc8010379, ++ 0x24000102, 0x74000068, 0x78010220, 0x940000fe, ++ 0x24000102, 0xa8010473, 0x44000104, 0x94000037, + 0x24000102, 0x74000108, 0x64000109, 0x36000010, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000008, +- 0x24000102, 0x74000070, 0x54000105, 0xa400006a, ++ 0x24000102, 0xa801046f, 0x44000103, 0x94000008, ++ 0x24000102, 0x74000070, 0x54000105, 0xc801026a, + 0x24000102, 0x7400000a, 0x44000104, 0x9400004d, +- 0x24000102, 0x84000031, 0x64000000, 0xb40000b9, +- 0x24000102, 0x64000069, 0x44000103, 0x9400010f, +- 0x24000102, 0x7400006d, 0x54000020, 0xa400002b, ++ 0x24000102, 0xc8010331, 0xa8010400, 0xb40000b9, ++ 0x24000102, 0xa8010469, 0x44000103, 0x9400010f, ++ 0x24000102, 0x7400006d, 0xc8007020, 0xc801022b, + 0x24000102, 0x64000106, 0x44000104, 0x94000043, +- 0x24000102, 0x8400000d, 0x6400010a, 0xb4000094, +- 0x24000102, 0x64000072, 0x44000103, 0x94000022, +- 0x24000102, 0x74000075, 0x54000105, 0xa40000c7, ++ 0x24000102, 0xc801030d, 0x6400010a, 0xb4000094, ++ 0x24000102, 0xa8010472, 0x44000103, 0x94000022, ++ 0x24000102, 0x74000075, 0x54000105, 0xc80102c7, + 0x24000102, 0x74000063, 0x44000104, 0x94000076, +- 0x24000102, 0x84000062, 0x64000061, 0xb40000e2, ++ 0x24000102, 0xc8010362, 0xa8010461, 0xb40000e2, + 0x24000102, 0x64000065, 0x44000103, 0x9400010b, +- 0x24000102, 0x7400006c, 0x54000020, 0xa400001c, ++ 0x24000102, 0x7400006c, 0xa8010520, 0xc801021c, + 0x24000102, 0x64000073, 0x44000104, 0x9400003b, +- 0x24000102, 0x84000003, 0x64000109, 0xb4000081, ++ 0x24000102, 0xc8010303, 0x64000109, 0xb4000081, + 0x24000102, 0x6400006f, 0x44000103, 0x9400000e, +- 0x24000102, 0x74000074, 0x54000105, 0xa4000090, ++ 0x24000102, 0x74000074, 0x54000105, 0xc8010290, + 0x24000102, 0x7400002c, 0x44000104, 0x94000052, +- 0x24000102, 0x84000041, 0x64000000, 0xb40000d1, ++ 0x24000102, 0xc8010341, 0x64000000, 0xb40000d1, + 0x24000102, 0x64000069, 0x44000103, 0x94000002, +- 0x24000102, 0x7400006e, 0x54000020, 0xa4000056, ++ 0x24000102, 0x7400006e, 0x54000020, 0xc8010256, + 0x24000102, 0x64000106, 0x44000104, 0x94000047, +- 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a7, ++ 0x24000102, 0xc801032e, 0x6400010a, 0xb40000a7, + 0x24000102, 0x64000072, 0x44000103, 0x94000033, +- 0x24000102, 0x74000107, 0x54000105, 0xa40000f8, ++ 0x24000102, 0x74000107, 0x54000105, 0xc80102f8, + 0x24000102, 0x74000064, 0x44000104, 0x940000c2, +- 0x24000102, 0x84000067, 0x64000061, 0xb40000fa, +- 0x24000102, 0x64000065, 0x44000103, 0x84000077, +- 0x24000102, 0x74000068, 0x54000020, 0x940000f0, +- 0x24000102, 0x64000073, 0x44000104, 0x94000036, ++ 0x24000102, 0xc8010367, 0x64000061, 0xb40000fa, ++ 0x24000102, 0x88010265, 0x44000103, 0x84000077, ++ 0x24000102, 0xc8002068, 0x78010220, 0x940000f0, ++ 0x24000102, 0x88010273, 0x44000104, 0x94000036, + 0x24000102, 0x74000108, 0x64000109, 0x36000000, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000007, +- 0x24000102, 0x74000070, 0x54000105, 0xa400005f, +- 0x24000102, 0x7400000a, 0x44000104, 0x9400004c, +- 0x24000102, 0x84000030, 0x64000000, 0xb40000b5, +- 0x24000102, 0x64000069, 0x44000103, 0x9400010e, +- 0x24000102, 0x7400006d, 0x54000020, 0xa4000026, ++ 0x24000102, 0x8801026f, 0x44000103, 0x94000007, ++ 0x24000102, 0xc8002070, 0x54000105, 0xc801025f, ++ 0x24000102, 0xc800200a, 0x44000104, 0x9400004c, ++ 0x24000102, 0x84000030, 0x88010200, 0xb40000b5, ++ 0x24000102, 0x88010269, 0x44000103, 0x9400010e, ++ 0x24000102, 0xc800206d, 0xc8000a20, 0xc8010226, + 0x24000102, 0x64000106, 0x44000104, 0x94000040, + 0x24000102, 0x84000004, 0x6400010a, 0xb400008e, +- 0x24000102, 0x64000072, 0x44000103, 0x9400001f, +- 0x24000102, 0x74000075, 0x54000105, 0xa40000bd, +- 0x24000102, 0x74000063, 0x44000104, 0x9400006b, +- 0x24000102, 0x84000042, 0x64000061, 0xb40000dd, ++ 0x24000102, 0x88010272, 0x44000103, 0x9400001f, ++ 0x24000102, 0xc8002075, 0x54000105, 0xc80102bd, ++ 0x24000102, 0xc8002063, 0x44000104, 0x9400006b, ++ 0x24000102, 0x84000042, 0x88010261, 0xb40000dd, + 0x24000102, 0x64000065, 0x44000103, 0x840000ff, +- 0x24000102, 0x7400006c, 0x54000020, 0xa4000014, ++ 0x24000102, 0xc800206c, 0x98010420, 0xc8010214, + 0x24000102, 0x64000073, 0x44000104, 0x9400003a, + 0x24000102, 0x84000001, 0x64000109, 0xb400005d, + 0x24000102, 0x6400006f, 0x44000103, 0x9400000c, +- 0x24000102, 0x74000074, 0x54000105, 0xa400008b, +- 0x24000102, 0x7400002c, 0x44000104, 0x94000050, ++ 0x24000102, 0xc8002074, 0x54000105, 0xc801028b, ++ 0x24000102, 0xc800202c, 0x44000104, 0x94000050, + 0x24000102, 0x84000032, 0x64000000, 0xb40000cb, + 0x24000102, 0x64000069, 0x44000103, 0xa4000114, +- 0x24000102, 0x7400006e, 0x54000020, 0xa4000051, ++ 0x24000102, 0xc800206e, 0x54000020, 0xc8010251, + 0x24000102, 0x64000106, 0x44000104, 0x94000046, + 0x24000102, 0x8400002d, 0x6400010a, 0xb40000a1, + 0x24000102, 0x64000072, 0x44000103, 0x94000029, +- 0x24000102, 0x74000107, 0x54000105, 0xa40000f1, +- 0x24000102, 0x74000064, 0x44000104, 0x940000c0, ++ 0x24000102, 0x74000107, 0x54000105, 0xc80102f1, ++ 0x24000102, 0xc8002064, 0x44000104, 0x940000c0, + 0x24000102, 0x84000066, 0x64000061, 0xb40000f4, +- 0x24000102, 0x64000065, 0x44000103, 0x84000079, +- 0x24000102, 0x74000068, 0x54000020, 0xb4000116, +- 0x24000102, 0x64000073, 0x44000104, 0x94000038, ++ 0x24000102, 0xc8010965, 0x44000103, 0x84000079, ++ 0x24000102, 0x74000068, 0x78010220, 0xb4000116, ++ 0x24000102, 0xc8010973, 0x44000104, 0x94000038, + 0x24000102, 0x74000108, 0x64000109, 0xb4000015, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000009, +- 0x24000102, 0x74000070, 0x54000105, 0xa400007a, ++ 0x24000102, 0xc801096f, 0x44000103, 0x94000009, ++ 0x24000102, 0x74000070, 0x54000105, 0xc801027a, + 0x24000102, 0x7400000a, 0x44000104, 0x9400004e, +- 0x24000102, 0x84000031, 0x64000000, 0xb40000be, +- 0x24000102, 0x64000069, 0x44000103, 0x94000110, +- 0x24000102, 0x7400006d, 0x54000020, 0xa400003c, ++ 0x24000102, 0x84000031, 0xc8010900, 0xb40000be, ++ 0x24000102, 0xc8010969, 0x44000103, 0x94000110, ++ 0x24000102, 0x7400006d, 0x54000020, 0xc801023c, + 0x24000102, 0x64000106, 0x44000104, 0x94000044, + 0x24000102, 0x8400000d, 0x6400010a, 0xb4000098, +- 0x24000102, 0x64000072, 0x44000103, 0x94000027, +- 0x24000102, 0x74000075, 0x54000105, 0xa40000d0, ++ 0x24000102, 0xc8010972, 0x44000103, 0x94000027, ++ 0x24000102, 0x74000075, 0x54000105, 0xc80102d0, + 0x24000102, 0x74000063, 0x44000104, 0x94000078, +- 0x24000102, 0x84000062, 0x64000061, 0xb40000e9, ++ 0x24000102, 0x84000062, 0xc8010961, 0xb40000e9, + 0x24000102, 0x64000065, 0x44000103, 0x9400010c, +- 0x24000102, 0x7400006c, 0x54000020, 0xa4000021, ++ 0x24000102, 0x7400006c, 0xb8000020, 0xc8010221, + 0x24000102, 0x64000073, 0x44000104, 0x9400003e, + 0x24000102, 0x84000003, 0x64000109, 0xb4000085, + 0x24000102, 0x6400006f, 0x44000103, 0x9400000f, +- 0x24000102, 0x74000074, 0x54000105, 0xa40000a4, ++ 0x24000102, 0x74000074, 0x54000105, 0xc80102a4, + 0x24000102, 0x7400002c, 0x44000104, 0x94000053, + 0x24000102, 0x84000041, 0x64000000, 0xb40000d5, + 0x24000102, 0x64000069, 0x44000103, 0x94000005, +- 0x24000102, 0x7400006e, 0x54000020, 0xa4000058, ++ 0x24000102, 0x7400006e, 0x54000020, 0xc8010258, + 0x24000102, 0x64000106, 0x44000104, 0x94000048, + 0x24000102, 0x8400002e, 0x6400010a, 0xb40000ab, + 0x24000102, 0x64000072, 0x44000103, 0x94000034, +- 0x24000102, 0x74000107, 0x54000105, 0xa40000fc, ++ 0x24000102, 0x74000107, 0x54000105, 0xc80102fc, + 0x24000102, 0x74000064, 0x44000104, 0x940000c3, + 0x24000102, 0x84000067, 0x64000061, 0x42000130, +- 0x24000102, 0x64000065, 0x44000103, 0x84000077, +- 0x24000102, 0x74000068, 0x54000020, 0x940000e0, +- 0x24000102, 0x64000073, 0x44000104, 0x94000035, ++ 0x24000102, 0x88010265, 0x44000103, 0xa8010277, ++ 0x24000102, 0x98010268, 0x78010220, 0x940000e0, ++ 0x24000102, 0x88010273, 0x44000104, 0x94000035, + 0x24000102, 0x74000108, 0x64000109, 0xa40000fd, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000006, +- 0x24000102, 0x74000070, 0x54000105, 0xa4000059, +- 0x24000102, 0x7400000a, 0x44000104, 0x94000049, +- 0x24000102, 0x84000030, 0x64000000, 0xb40000ae, +- 0x24000102, 0x64000069, 0x44000103, 0x9400010d, +- 0x24000102, 0x7400006d, 0x54000020, 0xa4000023, ++ 0x24000102, 0x8801026f, 0x44000103, 0x94000006, ++ 0x24000102, 0x98010270, 0x54000105, 0xa4000059, ++ 0x24000102, 0x9801020a, 0x44000104, 0x94000049, ++ 0x24000102, 0xa8010230, 0x88010200, 0xb40000ae, ++ 0x24000102, 0x88010269, 0x44000103, 0x9400010d, ++ 0x24000102, 0x9801026d, 0xb8006920, 0xa4000023, + 0x24000102, 0x64000106, 0x44000104, 0x9400003f, +- 0x24000102, 0x84000004, 0x6400010a, 0xb4000088, +- 0x24000102, 0x64000072, 0x44000103, 0x94000010, +- 0x24000102, 0x74000075, 0x54000105, 0xa40000a6, +- 0x24000102, 0x74000063, 0x44000104, 0x94000054, +- 0x24000102, 0x84000042, 0x64000061, 0xb40000d8, +- 0x24000102, 0x64000065, 0x44000103, 0x840000ff, +- 0x24000102, 0x7400006c, 0x54000020, 0xa4000011, +- 0x24000102, 0x64000073, 0x44000104, 0x94000039, +- 0x24000102, 0x84000001, 0x64000109, 0xb4000019, +- 0x24000102, 0x6400006f, 0x44000103, 0x9400000b, +- 0x24000102, 0x74000074, 0x54000105, 0xa400007c, +- 0x24000102, 0x7400002c, 0x44000104, 0x9400004f, +- 0x24000102, 0x84000032, 0x64000000, 0xb40000c5, +- 0x24000102, 0x64000069, 0x44000103, 0xa4000111, +- 0x24000102, 0x7400006e, 0x54000020, 0xa400003d, ++ 0x24000102, 0xa8010204, 0x6400010a, 0xb4000088, ++ 0x24000102, 0x88010272, 0x44000103, 0x94000010, ++ 0x24000102, 0x98010275, 0x54000105, 0xa40000a6, ++ 0x24000102, 0x98010263, 0x44000104, 0x94000054, ++ 0x24000102, 0xa8010242, 0x88010261, 0xb40000d8, ++ 0x24000102, 0xc8006f65, 0x44000103, 0xa80102ff, ++ 0x24000102, 0x9801026c, 0x98010320, 0xa4000011, ++ 0x24000102, 0xc8006f73, 0x44000104, 0x94000039, ++ 0x24000102, 0xa8010201, 0x64000109, 0xb4000019, ++ 0x24000102, 0xc8006f6f, 0x44000103, 0x9400000b, ++ 0x24000102, 0x98010274, 0x54000105, 0xa400007c, ++ 0x24000102, 0x9801022c, 0x44000104, 0x9400004f, ++ 0x24000102, 0xa8010232, 0xc8006f00, 0xb40000c5, ++ 0x24000102, 0xc8006f69, 0x44000103, 0xa4000111, ++ 0x24000102, 0x9801026e, 0x54000020, 0xa400003d, + 0x24000102, 0x64000106, 0x44000104, 0x94000045, +- 0x24000102, 0x8400002d, 0x6400010a, 0xb400009b, +- 0x24000102, 0x64000072, 0x44000103, 0x94000028, ++ 0x24000102, 0xa801022d, 0x6400010a, 0xb400009b, ++ 0x24000102, 0xc8006f72, 0x44000103, 0x94000028, + 0x24000102, 0x74000107, 0x54000105, 0xa40000e3, +- 0x24000102, 0x74000064, 0x44000104, 0x94000080, +- 0x24000102, 0x84000066, 0x64000061, 0xb40000ec, +- 0x24000102, 0x64000065, 0x44000103, 0x84000079, +- 0x24000102, 0x74000068, 0x54000020, 0x940000fe, +- 0x24000102, 0x64000073, 0x44000104, 0x94000037, ++ 0x24000102, 0x98010264, 0x44000104, 0x94000080, ++ 0x24000102, 0xa8010266, 0xc8006f61, 0xb40000ec, ++ 0x24000102, 0xa8010365, 0x44000103, 0xa8010279, ++ 0x24000102, 0x74000068, 0x78010220, 0x940000fe, ++ 0x24000102, 0xa8010373, 0x44000104, 0x94000037, + 0x24000102, 0x74000108, 0x64000109, 0x3600000a, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000008, ++ 0x24000102, 0xa801036f, 0x44000103, 0x94000008, + 0x24000102, 0x74000070, 0x54000105, 0xa4000060, + 0x24000102, 0x7400000a, 0x44000104, 0x9400004d, +- 0x24000102, 0x84000031, 0x64000000, 0xb40000b8, +- 0x24000102, 0x64000069, 0x44000103, 0x9400010f, +- 0x24000102, 0x7400006d, 0x54000020, 0xa400002a, ++ 0x24000102, 0xa8010231, 0xa8010300, 0xb40000b8, ++ 0x24000102, 0xa8010369, 0x44000103, 0x9400010f, ++ 0x24000102, 0x7400006d, 0xc8006d20, 0xa400002a, + 0x24000102, 0x64000106, 0x44000104, 0x94000043, +- 0x24000102, 0x8400000d, 0x6400010a, 0xb4000093, +- 0x24000102, 0x64000072, 0x44000103, 0x94000022, ++ 0x24000102, 0xa801020d, 0x6400010a, 0xb4000093, ++ 0x24000102, 0xa8010372, 0x44000103, 0x94000022, + 0x24000102, 0x74000075, 0x54000105, 0xa40000c1, + 0x24000102, 0x74000063, 0x44000104, 0x94000076, +- 0x24000102, 0x84000062, 0x64000061, 0xb40000e1, ++ 0x24000102, 0xa8010262, 0xa8010361, 0xb40000e1, + 0x24000102, 0x64000065, 0x44000103, 0x9400010b, +- 0x24000102, 0x7400006c, 0x54000020, 0xa4000018, ++ 0x24000102, 0x7400006c, 0xa8002020, 0xa4000018, + 0x24000102, 0x64000073, 0x44000104, 0x9400003b, +- 0x24000102, 0x84000003, 0x64000109, 0xb400007d, ++ 0x24000102, 0xa8010203, 0x64000109, 0xb400007d, + 0x24000102, 0x6400006f, 0x44000103, 0x9400000e, + 0x24000102, 0x74000074, 0x54000105, 0xa400008f, + 0x24000102, 0x7400002c, 0x44000104, 0x94000052, +- 0x24000102, 0x84000041, 0x64000000, 0xb40000ce, ++ 0x24000102, 0xa8010241, 0x64000000, 0xb40000ce, + 0x24000102, 0x64000069, 0x44000103, 0x94000002, + 0x24000102, 0x7400006e, 0x54000020, 0xa4000055, + 0x24000102, 0x64000106, 0x44000104, 0x94000047, +- 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a5, ++ 0x24000102, 0xa801022e, 0x6400010a, 0xb40000a5, + 0x24000102, 0x64000072, 0x44000103, 0x94000033, + 0x24000102, 0x74000107, 0x54000105, 0xa40000f3, + 0x24000102, 0x74000064, 0x44000104, 0x940000c2, +- 0x24000102, 0x84000067, 0x64000061, 0xb40000f7, +- 0x24000102, 0x64000065, 0x44000103, 0x84000077, +- 0x24000102, 0x74000068, 0x54000020, 0x940000f0, +- 0x24000102, 0x64000073, 0x44000104, 0x94000036, ++ 0x24000102, 0xa8010267, 0x64000061, 0xb40000f7, ++ 0x24000102, 0x88010265, 0x44000103, 0x84000077, ++ 0x24000102, 0xb8010468, 0x78010220, 0x940000f0, ++ 0x24000102, 0x88010273, 0x44000104, 0x94000036, + 0x24000102, 0x74000108, 0x64000109, 0xc400011e, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000007, +- 0x24000102, 0x74000070, 0x54000105, 0xa400005c, +- 0x24000102, 0x7400000a, 0x44000104, 0x9400004c, +- 0x24000102, 0x84000030, 0x64000000, 0xb40000b3, +- 0x24000102, 0x64000069, 0x44000103, 0x9400010e, +- 0x24000102, 0x7400006d, 0x54000020, 0xa4000025, ++ 0x24000102, 0x8801026f, 0x44000103, 0x94000007, ++ 0x24000102, 0xb8010470, 0x54000105, 0xa400005c, ++ 0x24000102, 0xb801040a, 0x44000104, 0x9400004c, ++ 0x24000102, 0x84000030, 0x88010200, 0xb40000b3, ++ 0x24000102, 0x88010269, 0x44000103, 0x9400010e, ++ 0x24000102, 0xb801046d, 0xb8010620, 0xa4000025, + 0x24000102, 0x64000106, 0x44000104, 0x94000040, + 0x24000102, 0x84000004, 0x6400010a, 0xb400008d, +- 0x24000102, 0x64000072, 0x44000103, 0x9400001f, +- 0x24000102, 0x74000075, 0x54000105, 0xa40000b4, +- 0x24000102, 0x74000063, 0x44000104, 0x9400006b, +- 0x24000102, 0x84000042, 0x64000061, 0xb40000dc, ++ 0x24000102, 0x88010272, 0x44000103, 0x9400001f, ++ 0x24000102, 0xb8010475, 0x54000105, 0xa40000b4, ++ 0x24000102, 0xb8010463, 0x44000104, 0x9400006b, ++ 0x24000102, 0x84000042, 0x88010261, 0xb40000dc, + 0x24000102, 0x64000065, 0x44000103, 0x840000ff, +- 0x24000102, 0x7400006c, 0x54000020, 0xa4000013, ++ 0x24000102, 0xb801046c, 0x98010420, 0xa4000013, + 0x24000102, 0x64000073, 0x44000104, 0x9400003a, + 0x24000102, 0x84000001, 0x64000109, 0xb400005a, + 0x24000102, 0x6400006f, 0x44000103, 0x9400000c, +- 0x24000102, 0x74000074, 0x54000105, 0xa400007f, +- 0x24000102, 0x7400002c, 0x44000104, 0x94000050, ++ 0x24000102, 0xb8010474, 0x54000105, 0xa400007f, ++ 0x24000102, 0xb801042c, 0x44000104, 0x94000050, + 0x24000102, 0x84000032, 0x64000000, 0xb40000ca, + 0x24000102, 0x64000069, 0x44000103, 0xa4000112, +- 0x24000102, 0x7400006e, 0x54000020, 0xa400004b, ++ 0x24000102, 0xb801046e, 0x54000020, 0xa400004b, + 0x24000102, 0x64000106, 0x44000104, 0x94000046, + 0x24000102, 0x8400002d, 0x6400010a, 0xb400009f, + 0x24000102, 0x64000072, 0x44000103, 0x94000029, + 0x24000102, 0x74000107, 0x54000105, 0xa40000e8, +- 0x24000102, 0x74000064, 0x44000104, 0x940000c0, ++ 0x24000102, 0xb8010464, 0x44000104, 0x940000c0, + 0x24000102, 0x84000066, 0x64000061, 0xb40000f2, +- 0x24000102, 0x64000065, 0x44000103, 0x84000079, +- 0x24000102, 0x74000068, 0x54000020, 0xb4000117, +- 0x24000102, 0x64000073, 0x44000104, 0x94000038, ++ 0x24000102, 0xb8010565, 0x44000103, 0x84000079, ++ 0x24000102, 0x74000068, 0x78010220, 0xb4000117, ++ 0x24000102, 0xb8010573, 0x44000104, 0x94000038, + 0x24000102, 0x74000108, 0x64000109, 0x3600001a, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000009, ++ 0x24000102, 0xb801056f, 0x44000103, 0x94000009, + 0x24000102, 0x74000070, 0x54000105, 0xa4000071, + 0x24000102, 0x7400000a, 0x44000104, 0x9400004e, +- 0x24000102, 0x84000031, 0x64000000, 0xb40000bc, +- 0x24000102, 0x64000069, 0x44000103, 0x94000110, ++ 0x24000102, 0x84000031, 0xb8010500, 0xb40000bc, ++ 0x24000102, 0xb8010569, 0x44000103, 0x94000110, + 0x24000102, 0x7400006d, 0x54000020, 0xa400002f, + 0x24000102, 0x64000106, 0x44000104, 0x94000044, + 0x24000102, 0x8400000d, 0x6400010a, 0xb4000097, +- 0x24000102, 0x64000072, 0x44000103, 0x94000027, ++ 0x24000102, 0xb8010572, 0x44000103, 0x94000027, + 0x24000102, 0x74000075, 0x54000105, 0xa40000cf, + 0x24000102, 0x74000063, 0x44000104, 0x94000078, +- 0x24000102, 0x84000062, 0x64000061, 0xb40000e6, ++ 0x24000102, 0x84000062, 0xb8010561, 0xb40000e6, + 0x24000102, 0x64000065, 0x44000103, 0x9400010c, +- 0x24000102, 0x7400006c, 0x54000020, 0xa400001e, ++ 0x24000102, 0x7400006c, 0xb8010a20, 0xa400001e, + 0x24000102, 0x64000073, 0x44000104, 0x9400003e, + 0x24000102, 0x84000003, 0x64000109, 0xb4000084, + 0x24000102, 0x6400006f, 0x44000103, 0x9400000f, +@@ -1730,56 +1730,56 @@ struct inflate_huff_code_large pregen_lit_huff_code = { + 0x24000102, 0x74000107, 0x54000105, 0xa40000f9, + 0x24000102, 0x74000064, 0x44000104, 0x940000c3, + 0x24000102, 0x84000067, 0x64000061, 0xb4000200, +- 0x24000102, 0x64000065, 0x44000103, 0x84000077, +- 0x24000102, 0x74000068, 0x54000020, 0x940000e0, +- 0x24000102, 0x64000073, 0x44000104, 0x94000035, ++ 0x24000102, 0x88010265, 0x44000103, 0x84000077, ++ 0x24000102, 0x98010268, 0x78010220, 0x940000e0, ++ 0x24000102, 0x88010273, 0x44000104, 0x94000035, + 0x24000102, 0x74000108, 0x64000109, 0xc400011a, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000006, +- 0x24000102, 0x74000070, 0x54000105, 0xa400005b, +- 0x24000102, 0x7400000a, 0x44000104, 0x94000049, +- 0x24000102, 0x84000030, 0x64000000, 0xb40000b1, +- 0x24000102, 0x64000069, 0x44000103, 0x9400010d, +- 0x24000102, 0x7400006d, 0x54000020, 0xa4000024, ++ 0x24000102, 0x8801026f, 0x44000103, 0x94000006, ++ 0x24000102, 0x98010270, 0x54000105, 0xa400005b, ++ 0x24000102, 0x9801020a, 0x44000104, 0x94000049, ++ 0x24000102, 0x84000030, 0x88010200, 0xb40000b1, ++ 0x24000102, 0x88010269, 0x44000103, 0x9400010d, ++ 0x24000102, 0x9801026d, 0xb8007220, 0xa4000024, + 0x24000102, 0x64000106, 0x44000104, 0x9400003f, + 0x24000102, 0x84000004, 0x6400010a, 0xb400008a, +- 0x24000102, 0x64000072, 0x44000103, 0x94000010, +- 0x24000102, 0x74000075, 0x54000105, 0xa40000b0, +- 0x24000102, 0x74000063, 0x44000104, 0x94000054, +- 0x24000102, 0x84000042, 0x64000061, 0xb40000da, ++ 0x24000102, 0x88010272, 0x44000103, 0x94000010, ++ 0x24000102, 0x98010275, 0x54000105, 0xa40000b0, ++ 0x24000102, 0x98010263, 0x44000104, 0x94000054, ++ 0x24000102, 0x84000042, 0x88010261, 0xb40000da, + 0x24000102, 0x64000065, 0x44000103, 0x840000ff, +- 0x24000102, 0x7400006c, 0x54000020, 0xa4000012, ++ 0x24000102, 0x9801026c, 0x98010320, 0xa4000012, + 0x24000102, 0x64000073, 0x44000104, 0x94000039, + 0x24000102, 0x84000001, 0x64000109, 0xb400001b, + 0x24000102, 0x6400006f, 0x44000103, 0x9400000b, +- 0x24000102, 0x74000074, 0x54000105, 0xa400007e, +- 0x24000102, 0x7400002c, 0x44000104, 0x9400004f, ++ 0x24000102, 0x98010274, 0x54000105, 0xa400007e, ++ 0x24000102, 0x9801022c, 0x44000104, 0x9400004f, + 0x24000102, 0x84000032, 0x64000000, 0xb40000c8, + 0x24000102, 0x64000069, 0x44000103, 0xa4000113, +- 0x24000102, 0x7400006e, 0x54000020, 0xa400004a, ++ 0x24000102, 0x9801026e, 0x54000020, 0xa400004a, + 0x24000102, 0x64000106, 0x44000104, 0x94000045, + 0x24000102, 0x8400002d, 0x6400010a, 0xb400009d, + 0x24000102, 0x64000072, 0x44000103, 0x94000028, + 0x24000102, 0x74000107, 0x54000105, 0xa40000e7, +- 0x24000102, 0x74000064, 0x44000104, 0x94000080, ++ 0x24000102, 0x98010264, 0x44000104, 0x94000080, + 0x24000102, 0x84000066, 0x64000061, 0xb40000ee, +- 0x24000102, 0x64000065, 0x44000103, 0x84000079, +- 0x24000102, 0x74000068, 0x54000020, 0x940000fe, +- 0x24000102, 0x64000073, 0x44000104, 0x94000037, ++ 0x24000102, 0xa8010465, 0x44000103, 0x84000079, ++ 0x24000102, 0x74000068, 0x78010220, 0x940000fe, ++ 0x24000102, 0xa8010473, 0x44000104, 0x94000037, + 0x24000102, 0x74000108, 0x64000109, 0x36000012, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000008, ++ 0x24000102, 0xa801046f, 0x44000103, 0x94000008, + 0x24000102, 0x74000070, 0x54000105, 0xa400006a, + 0x24000102, 0x7400000a, 0x44000104, 0x9400004d, +- 0x24000102, 0x84000031, 0x64000000, 0xb40000ba, +- 0x24000102, 0x64000069, 0x44000103, 0x9400010f, +- 0x24000102, 0x7400006d, 0x54000020, 0xa400002b, ++ 0x24000102, 0x84000031, 0xa8010400, 0xb40000ba, ++ 0x24000102, 0xa8010469, 0x44000103, 0x9400010f, ++ 0x24000102, 0x7400006d, 0xc8007520, 0xa400002b, + 0x24000102, 0x64000106, 0x44000104, 0x94000043, + 0x24000102, 0x8400000d, 0x6400010a, 0xb4000095, +- 0x24000102, 0x64000072, 0x44000103, 0x94000022, ++ 0x24000102, 0xa8010472, 0x44000103, 0x94000022, + 0x24000102, 0x74000075, 0x54000105, 0xa40000c7, + 0x24000102, 0x74000063, 0x44000104, 0x94000076, +- 0x24000102, 0x84000062, 0x64000061, 0xb40000e4, ++ 0x24000102, 0x84000062, 0xa8010461, 0xb40000e4, + 0x24000102, 0x64000065, 0x44000103, 0x9400010b, +- 0x24000102, 0x7400006c, 0x54000020, 0xa400001c, ++ 0x24000102, 0x7400006c, 0xa8010520, 0xa400001c, + 0x24000102, 0x64000073, 0x44000104, 0x9400003b, + 0x24000102, 0x84000003, 0x64000109, 0xb4000082, + 0x24000102, 0x6400006f, 0x44000103, 0x9400000e, +@@ -1794,24 +1794,24 @@ struct inflate_huff_code_large pregen_lit_huff_code = { + 0x24000102, 0x74000107, 0x54000105, 0xa40000f8, + 0x24000102, 0x74000064, 0x44000104, 0x940000c2, + 0x24000102, 0x84000067, 0x64000061, 0xb40000fb, +- 0x24000102, 0x64000065, 0x44000103, 0x84000077, +- 0x24000102, 0x74000068, 0x54000020, 0x940000f0, +- 0x24000102, 0x64000073, 0x44000104, 0x94000036, ++ 0x24000102, 0x88010265, 0x44000103, 0x84000077, ++ 0x24000102, 0x74000068, 0x78010220, 0x940000f0, ++ 0x24000102, 0x88010273, 0x44000104, 0x94000036, + 0x24000102, 0x74000108, 0x64000109, 0x36000002, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000007, ++ 0x24000102, 0x8801026f, 0x44000103, 0x94000007, + 0x24000102, 0x74000070, 0x54000105, 0xa400005f, + 0x24000102, 0x7400000a, 0x44000104, 0x9400004c, +- 0x24000102, 0x84000030, 0x64000000, 0xb40000b6, +- 0x24000102, 0x64000069, 0x44000103, 0x9400010e, +- 0x24000102, 0x7400006d, 0x54000020, 0xa4000026, ++ 0x24000102, 0x84000030, 0x88010200, 0xb40000b6, ++ 0x24000102, 0x88010269, 0x44000103, 0x9400010e, ++ 0x24000102, 0x7400006d, 0xc8006320, 0xa4000026, + 0x24000102, 0x64000106, 0x44000104, 0x94000040, + 0x24000102, 0x84000004, 0x6400010a, 0xb4000091, +- 0x24000102, 0x64000072, 0x44000103, 0x9400001f, ++ 0x24000102, 0x88010272, 0x44000103, 0x9400001f, + 0x24000102, 0x74000075, 0x54000105, 0xa40000bd, + 0x24000102, 0x74000063, 0x44000104, 0x9400006b, +- 0x24000102, 0x84000042, 0x64000061, 0xb40000de, ++ 0x24000102, 0x84000042, 0x88010261, 0xb40000de, + 0x24000102, 0x64000065, 0x44000103, 0x840000ff, +- 0x24000102, 0x7400006c, 0x54000020, 0xa4000014, ++ 0x24000102, 0x7400006c, 0x98010420, 0xa4000014, + 0x24000102, 0x64000073, 0x44000104, 0x9400003a, + 0x24000102, 0x84000001, 0x64000109, 0xb400005e, + 0x24000102, 0x6400006f, 0x44000103, 0x9400000c, +@@ -1826,24 +1826,24 @@ struct inflate_huff_code_large pregen_lit_huff_code = { + 0x24000102, 0x74000107, 0x54000105, 0xa40000f1, + 0x24000102, 0x74000064, 0x44000104, 0x940000c0, + 0x24000102, 0x84000066, 0x64000061, 0xb40000f5, +- 0x24000102, 0x64000065, 0x44000103, 0x84000079, +- 0x24000102, 0x74000068, 0x54000020, 0xb4000118, +- 0x24000102, 0x64000073, 0x44000104, 0x94000038, ++ 0x24000102, 0xc8000065, 0x44000103, 0x84000079, ++ 0x24000102, 0x74000068, 0x78010220, 0xb4000118, ++ 0x24000102, 0xc8000073, 0x44000104, 0x94000038, + 0x24000102, 0x74000108, 0x64000109, 0xb4000016, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000009, ++ 0x24000102, 0xc800006f, 0x44000103, 0x94000009, + 0x24000102, 0x74000070, 0x54000105, 0xa400007a, + 0x24000102, 0x7400000a, 0x44000104, 0x9400004e, +- 0x24000102, 0x84000031, 0x64000000, 0xb40000bf, +- 0x24000102, 0x64000069, 0x44000103, 0x94000110, ++ 0x24000102, 0x84000031, 0xc8000000, 0xb40000bf, ++ 0x24000102, 0xc8000069, 0x44000103, 0x94000110, + 0x24000102, 0x7400006d, 0x54000020, 0xa400003c, + 0x24000102, 0x64000106, 0x44000104, 0x94000044, + 0x24000102, 0x8400000d, 0x6400010a, 0xb4000099, +- 0x24000102, 0x64000072, 0x44000103, 0x94000027, ++ 0x24000102, 0xc8000072, 0x44000103, 0x94000027, + 0x24000102, 0x74000075, 0x54000105, 0xa40000d0, + 0x24000102, 0x74000063, 0x44000104, 0x94000078, +- 0x24000102, 0x84000062, 0x64000061, 0xb40000ea, ++ 0x24000102, 0x84000062, 0xc8000061, 0xb40000ea, + 0x24000102, 0x64000065, 0x44000103, 0x9400010c, +- 0x24000102, 0x7400006c, 0x54000020, 0xa4000021, ++ 0x24000102, 0x7400006c, 0xb8006120, 0xa4000021, + 0x24000102, 0x64000073, 0x44000104, 0x9400003e, + 0x24000102, 0x84000003, 0x64000109, 0xb4000086, + 0x24000102, 0x6400006f, 0x44000103, 0x9400000f, +@@ -1858,248 +1858,248 @@ struct inflate_huff_code_large pregen_lit_huff_code = { + 0x24000102, 0x74000107, 0x54000105, 0xa40000fc, + 0x24000102, 0x74000064, 0x44000104, 0x940000c3, + 0x24000102, 0x84000067, 0x64000061, 0x46000140, +- 0x24000102, 0x64000065, 0x44000103, 0x84000077, +- 0x24000102, 0x74000068, 0x54000020, 0x940000e0, +- 0x24000102, 0x64000073, 0x44000104, 0x94000035, ++ 0x24000102, 0x88010265, 0x44000103, 0xa8010277, ++ 0x24000102, 0x98010268, 0x78010220, 0xb80102e0, ++ 0x24000102, 0x88010273, 0x44000104, 0xb8010235, + 0x24000102, 0x74000108, 0x64000109, 0xa40000fd, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000006, +- 0x24000102, 0x74000070, 0x54000105, 0xa4000059, +- 0x24000102, 0x7400000a, 0x44000104, 0x94000049, +- 0x24000102, 0x84000030, 0x64000000, 0xb40000ad, +- 0x24000102, 0x64000069, 0x44000103, 0x9400010d, +- 0x24000102, 0x7400006d, 0x54000020, 0xa4000023, +- 0x24000102, 0x64000106, 0x44000104, 0x9400003f, +- 0x24000102, 0x84000004, 0x6400010a, 0xb4000087, +- 0x24000102, 0x64000072, 0x44000103, 0x94000010, +- 0x24000102, 0x74000075, 0x54000105, 0xa40000a6, +- 0x24000102, 0x74000063, 0x44000104, 0x94000054, +- 0x24000102, 0x84000042, 0x64000061, 0xb40000d7, +- 0x24000102, 0x64000065, 0x44000103, 0x840000ff, +- 0x24000102, 0x7400006c, 0x54000020, 0xa4000011, +- 0x24000102, 0x64000073, 0x44000104, 0x94000039, +- 0x24000102, 0x84000001, 0x64000109, 0xb4000017, +- 0x24000102, 0x6400006f, 0x44000103, 0x9400000b, +- 0x24000102, 0x74000074, 0x54000105, 0xa400007c, +- 0x24000102, 0x7400002c, 0x44000104, 0x9400004f, +- 0x24000102, 0x84000032, 0x64000000, 0xb40000c4, +- 0x24000102, 0x64000069, 0x44000103, 0xa4000111, +- 0x24000102, 0x7400006e, 0x54000020, 0xa400003d, +- 0x24000102, 0x64000106, 0x44000104, 0x94000045, +- 0x24000102, 0x8400002d, 0x6400010a, 0xb400009a, +- 0x24000102, 0x64000072, 0x44000103, 0x94000028, ++ 0x24000102, 0x8801026f, 0x44000103, 0xb8010206, ++ 0x24000102, 0x98010270, 0x54000105, 0xa4000059, ++ 0x24000102, 0x9801020a, 0x44000104, 0xb8010249, ++ 0x24000102, 0xa8010230, 0x88010200, 0xb40000ad, ++ 0x24000102, 0x88010269, 0x44000103, 0x9400010d, ++ 0x24000102, 0x9801026d, 0xb8006520, 0xa4000023, ++ 0x24000102, 0x64000106, 0x44000104, 0xb801023f, ++ 0x24000102, 0xa8010204, 0x6400010a, 0xb4000087, ++ 0x24000102, 0x88010272, 0x44000103, 0xb8010210, ++ 0x24000102, 0x98010275, 0x54000105, 0xa40000a6, ++ 0x24000102, 0x98010263, 0x44000104, 0xb8010254, ++ 0x24000102, 0xa8010242, 0x88010261, 0xb40000d7, ++ 0x24000102, 0xc8006965, 0x44000103, 0xa80102ff, ++ 0x24000102, 0x9801026c, 0x98010320, 0xa4000011, ++ 0x24000102, 0xc8006973, 0x44000104, 0xb8010239, ++ 0x24000102, 0xa8010201, 0x64000109, 0xb4000017, ++ 0x24000102, 0xc800696f, 0x44000103, 0xb801020b, ++ 0x24000102, 0x98010274, 0x54000105, 0xa400007c, ++ 0x24000102, 0x9801022c, 0x44000104, 0xb801024f, ++ 0x24000102, 0xa8010232, 0xc8006900, 0xb40000c4, ++ 0x24000102, 0xc8006969, 0x44000103, 0xa4000111, ++ 0x24000102, 0x9801026e, 0x54000020, 0xa400003d, ++ 0x24000102, 0x64000106, 0x44000104, 0xb8010245, ++ 0x24000102, 0xa801022d, 0x6400010a, 0xb400009a, ++ 0x24000102, 0xc8006972, 0x44000103, 0xb8010228, + 0x24000102, 0x74000107, 0x54000105, 0xa40000e3, +- 0x24000102, 0x74000064, 0x44000104, 0x94000080, +- 0x24000102, 0x84000066, 0x64000061, 0xb40000eb, +- 0x24000102, 0x64000065, 0x44000103, 0x84000079, +- 0x24000102, 0x74000068, 0x54000020, 0x940000fe, +- 0x24000102, 0x64000073, 0x44000104, 0x94000037, ++ 0x24000102, 0x98010264, 0x44000104, 0xb8010280, ++ 0x24000102, 0xa8010266, 0xc8006961, 0xb40000eb, ++ 0x24000102, 0xa8010365, 0x44000103, 0xa8010279, ++ 0x24000102, 0x74000068, 0x78010220, 0xb80102fe, ++ 0x24000102, 0xa8010373, 0x44000104, 0xb8010237, + 0x24000102, 0x74000108, 0x64000109, 0x3600000c, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000008, ++ 0x24000102, 0xa801036f, 0x44000103, 0xb8010208, + 0x24000102, 0x74000070, 0x54000105, 0xa4000060, +- 0x24000102, 0x7400000a, 0x44000104, 0x9400004d, +- 0x24000102, 0x84000031, 0x64000000, 0xb40000b7, +- 0x24000102, 0x64000069, 0x44000103, 0x9400010f, +- 0x24000102, 0x7400006d, 0x54000020, 0xa400002a, +- 0x24000102, 0x64000106, 0x44000104, 0x94000043, +- 0x24000102, 0x8400000d, 0x6400010a, 0xb4000092, +- 0x24000102, 0x64000072, 0x44000103, 0x94000022, ++ 0x24000102, 0x7400000a, 0x44000104, 0xb801024d, ++ 0x24000102, 0xa8010231, 0xa8010300, 0xb40000b7, ++ 0x24000102, 0xa8010369, 0x44000103, 0x9400010f, ++ 0x24000102, 0x7400006d, 0xc8006c20, 0xa400002a, ++ 0x24000102, 0x64000106, 0x44000104, 0xb8010243, ++ 0x24000102, 0xa801020d, 0x6400010a, 0xb4000092, ++ 0x24000102, 0xa8010372, 0x44000103, 0xb8010222, + 0x24000102, 0x74000075, 0x54000105, 0xa40000c1, +- 0x24000102, 0x74000063, 0x44000104, 0x94000076, +- 0x24000102, 0x84000062, 0x64000061, 0xb40000df, ++ 0x24000102, 0x74000063, 0x44000104, 0xb8010276, ++ 0x24000102, 0xa8010262, 0xa8010361, 0xb40000df, + 0x24000102, 0x64000065, 0x44000103, 0x9400010b, +- 0x24000102, 0x7400006c, 0x54000020, 0xa4000018, +- 0x24000102, 0x64000073, 0x44000104, 0x9400003b, +- 0x24000102, 0x84000003, 0x64000109, 0xb400007b, +- 0x24000102, 0x6400006f, 0x44000103, 0x9400000e, ++ 0x24000102, 0x7400006c, 0xa8002020, 0xa4000018, ++ 0x24000102, 0x64000073, 0x44000104, 0xb801023b, ++ 0x24000102, 0xa8010203, 0x64000109, 0xb400007b, ++ 0x24000102, 0x6400006f, 0x44000103, 0xb801020e, + 0x24000102, 0x74000074, 0x54000105, 0xa400008f, +- 0x24000102, 0x7400002c, 0x44000104, 0x94000052, +- 0x24000102, 0x84000041, 0x64000000, 0xb40000cd, +- 0x24000102, 0x64000069, 0x44000103, 0x94000002, ++ 0x24000102, 0x7400002c, 0x44000104, 0xb8010252, ++ 0x24000102, 0xa8010241, 0x64000000, 0xb40000cd, ++ 0x24000102, 0x64000069, 0x44000103, 0xb8010202, + 0x24000102, 0x7400006e, 0x54000020, 0xa4000055, +- 0x24000102, 0x64000106, 0x44000104, 0x94000047, +- 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a3, +- 0x24000102, 0x64000072, 0x44000103, 0x94000033, ++ 0x24000102, 0x64000106, 0x44000104, 0xb8010247, ++ 0x24000102, 0xa801022e, 0x6400010a, 0xb40000a3, ++ 0x24000102, 0x64000072, 0x44000103, 0xb8010233, + 0x24000102, 0x74000107, 0x54000105, 0xa40000f3, +- 0x24000102, 0x74000064, 0x44000104, 0x940000c2, +- 0x24000102, 0x84000067, 0x64000061, 0xb40000f6, +- 0x24000102, 0x64000065, 0x44000103, 0x84000077, +- 0x24000102, 0x74000068, 0x54000020, 0x940000f0, +- 0x24000102, 0x64000073, 0x44000104, 0x94000036, ++ 0x24000102, 0x74000064, 0x44000104, 0xb80102c2, ++ 0x24000102, 0xa8010267, 0x64000061, 0xb40000f6, ++ 0x24000102, 0x88010265, 0x44000103, 0x84000077, ++ 0x24000102, 0xb8010368, 0x78010220, 0xb80102f0, ++ 0x24000102, 0x88010273, 0x44000104, 0xb8010236, + 0x24000102, 0x74000108, 0x64000109, 0xc400011f, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000007, +- 0x24000102, 0x74000070, 0x54000105, 0xa400005c, +- 0x24000102, 0x7400000a, 0x44000104, 0x9400004c, +- 0x24000102, 0x84000030, 0x64000000, 0xb40000b2, +- 0x24000102, 0x64000069, 0x44000103, 0x9400010e, +- 0x24000102, 0x7400006d, 0x54000020, 0xa4000025, +- 0x24000102, 0x64000106, 0x44000104, 0x94000040, ++ 0x24000102, 0x8801026f, 0x44000103, 0xb8010207, ++ 0x24000102, 0xb8010370, 0x54000105, 0xa400005c, ++ 0x24000102, 0xb801030a, 0x44000104, 0xb801024c, ++ 0x24000102, 0x84000030, 0x88010200, 0xb40000b2, ++ 0x24000102, 0x88010269, 0x44000103, 0x9400010e, ++ 0x24000102, 0xb801036d, 0xb8007320, 0xa4000025, ++ 0x24000102, 0x64000106, 0x44000104, 0xb8010240, + 0x24000102, 0x84000004, 0x6400010a, 0xb400008c, +- 0x24000102, 0x64000072, 0x44000103, 0x9400001f, +- 0x24000102, 0x74000075, 0x54000105, 0xa40000b4, +- 0x24000102, 0x74000063, 0x44000104, 0x9400006b, +- 0x24000102, 0x84000042, 0x64000061, 0xb40000db, ++ 0x24000102, 0x88010272, 0x44000103, 0xb801021f, ++ 0x24000102, 0xb8010375, 0x54000105, 0xa40000b4, ++ 0x24000102, 0xb8010363, 0x44000104, 0xb801026b, ++ 0x24000102, 0x84000042, 0x88010261, 0xb40000db, + 0x24000102, 0x64000065, 0x44000103, 0x840000ff, +- 0x24000102, 0x7400006c, 0x54000020, 0xa4000013, +- 0x24000102, 0x64000073, 0x44000104, 0x9400003a, ++ 0x24000102, 0xb801036c, 0x98010420, 0xa4000013, ++ 0x24000102, 0x64000073, 0x44000104, 0xb801023a, + 0x24000102, 0x84000001, 0x64000109, 0xb400001d, +- 0x24000102, 0x6400006f, 0x44000103, 0x9400000c, +- 0x24000102, 0x74000074, 0x54000105, 0xa400007f, +- 0x24000102, 0x7400002c, 0x44000104, 0x94000050, ++ 0x24000102, 0x6400006f, 0x44000103, 0xb801020c, ++ 0x24000102, 0xb8010374, 0x54000105, 0xa400007f, ++ 0x24000102, 0xb801032c, 0x44000104, 0xb8010250, + 0x24000102, 0x84000032, 0x64000000, 0xb40000c9, + 0x24000102, 0x64000069, 0x44000103, 0xa4000112, +- 0x24000102, 0x7400006e, 0x54000020, 0xa400004b, +- 0x24000102, 0x64000106, 0x44000104, 0x94000046, ++ 0x24000102, 0xb801036e, 0x54000020, 0xa400004b, ++ 0x24000102, 0x64000106, 0x44000104, 0xb8010246, + 0x24000102, 0x8400002d, 0x6400010a, 0xb400009e, +- 0x24000102, 0x64000072, 0x44000103, 0x94000029, ++ 0x24000102, 0x64000072, 0x44000103, 0xb8010229, + 0x24000102, 0x74000107, 0x54000105, 0xa40000e8, +- 0x24000102, 0x74000064, 0x44000104, 0x940000c0, ++ 0x24000102, 0xb8010364, 0x44000104, 0xb80102c0, + 0x24000102, 0x84000066, 0x64000061, 0xb40000ef, +- 0x24000102, 0x64000065, 0x44000103, 0x84000079, +- 0x24000102, 0x74000068, 0x54000020, 0xb4000115, +- 0x24000102, 0x64000073, 0x44000104, 0x94000038, ++ 0x24000102, 0xb8002065, 0x44000103, 0x84000079, ++ 0x24000102, 0x74000068, 0x78010220, 0xb4000115, ++ 0x24000102, 0xb8002073, 0x44000104, 0xb8010238, + 0x24000102, 0x74000108, 0x64000109, 0x3600001c, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000009, ++ 0x24000102, 0xb800206f, 0x44000103, 0xb8010209, + 0x24000102, 0x74000070, 0x54000105, 0xa4000071, +- 0x24000102, 0x7400000a, 0x44000104, 0x9400004e, +- 0x24000102, 0x84000031, 0x64000000, 0xb40000bb, +- 0x24000102, 0x64000069, 0x44000103, 0x94000110, ++ 0x24000102, 0x7400000a, 0x44000104, 0xb801024e, ++ 0x24000102, 0x84000031, 0xb8002000, 0xb40000bb, ++ 0x24000102, 0xb8002069, 0x44000103, 0x94000110, + 0x24000102, 0x7400006d, 0x54000020, 0xa400002f, +- 0x24000102, 0x64000106, 0x44000104, 0x94000044, ++ 0x24000102, 0x64000106, 0x44000104, 0xb8010244, + 0x24000102, 0x8400000d, 0x6400010a, 0xb4000096, +- 0x24000102, 0x64000072, 0x44000103, 0x94000027, ++ 0x24000102, 0xb8002072, 0x44000103, 0xb8010227, + 0x24000102, 0x74000075, 0x54000105, 0xa40000cf, +- 0x24000102, 0x74000063, 0x44000104, 0x94000078, +- 0x24000102, 0x84000062, 0x64000061, 0xb40000e5, ++ 0x24000102, 0x74000063, 0x44000104, 0xb8010278, ++ 0x24000102, 0x84000062, 0xb8002061, 0xb40000e5, + 0x24000102, 0x64000065, 0x44000103, 0x9400010c, +- 0x24000102, 0x7400006c, 0x54000020, 0xa400001e, +- 0x24000102, 0x64000073, 0x44000104, 0x9400003e, ++ 0x24000102, 0x7400006c, 0xb8010920, 0xa400001e, ++ 0x24000102, 0x64000073, 0x44000104, 0xb801023e, + 0x24000102, 0x84000003, 0x64000109, 0xb4000083, +- 0x24000102, 0x6400006f, 0x44000103, 0x9400000f, ++ 0x24000102, 0x6400006f, 0x44000103, 0xb801020f, + 0x24000102, 0x74000074, 0x54000105, 0xa40000a0, +- 0x24000102, 0x7400002c, 0x44000104, 0x94000053, ++ 0x24000102, 0x7400002c, 0x44000104, 0xb8010253, + 0x24000102, 0x84000041, 0x64000000, 0xb40000d3, +- 0x24000102, 0x64000069, 0x44000103, 0x94000005, ++ 0x24000102, 0x64000069, 0x44000103, 0xb8010205, + 0x24000102, 0x7400006e, 0x54000020, 0xa4000057, +- 0x24000102, 0x64000106, 0x44000104, 0x94000048, ++ 0x24000102, 0x64000106, 0x44000104, 0xb8010248, + 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a9, +- 0x24000102, 0x64000072, 0x44000103, 0x94000034, ++ 0x24000102, 0x64000072, 0x44000103, 0xb8010234, + 0x24000102, 0x74000107, 0x54000105, 0xa40000f9, +- 0x24000102, 0x74000064, 0x44000104, 0x940000c3, ++ 0x24000102, 0x74000064, 0x44000104, 0xb80102c3, + 0x24000102, 0x84000067, 0x64000061, 0x3e000128, +- 0x24000102, 0x64000065, 0x44000103, 0x84000077, +- 0x24000102, 0x74000068, 0x54000020, 0x940000e0, +- 0x24000102, 0x64000073, 0x44000104, 0x94000035, ++ 0x24000102, 0x88010265, 0x44000103, 0xc8010477, ++ 0x24000102, 0x98010268, 0x78010220, 0x940000e0, ++ 0x24000102, 0x88010273, 0x44000104, 0x94000035, + 0x24000102, 0x74000108, 0x64000109, 0xc400011b, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000006, +- 0x24000102, 0x74000070, 0x54000105, 0xa400005b, +- 0x24000102, 0x7400000a, 0x44000104, 0x94000049, +- 0x24000102, 0x84000030, 0x64000000, 0xb40000af, +- 0x24000102, 0x64000069, 0x44000103, 0x9400010d, +- 0x24000102, 0x7400006d, 0x54000020, 0xa4000024, ++ 0x24000102, 0x8801026f, 0x44000103, 0x94000006, ++ 0x24000102, 0x98010270, 0x54000105, 0xa400005b, ++ 0x24000102, 0x9801020a, 0x44000104, 0x94000049, ++ 0x24000102, 0xc8010430, 0x88010200, 0xb40000af, ++ 0x24000102, 0x88010269, 0x44000103, 0x9400010d, ++ 0x24000102, 0x9801026d, 0xb8006f20, 0xa4000024, + 0x24000102, 0x64000106, 0x44000104, 0x9400003f, +- 0x24000102, 0x84000004, 0x6400010a, 0xb4000089, +- 0x24000102, 0x64000072, 0x44000103, 0x94000010, +- 0x24000102, 0x74000075, 0x54000105, 0xa40000b0, +- 0x24000102, 0x74000063, 0x44000104, 0x94000054, +- 0x24000102, 0x84000042, 0x64000061, 0xb40000d9, +- 0x24000102, 0x64000065, 0x44000103, 0x840000ff, +- 0x24000102, 0x7400006c, 0x54000020, 0xa4000012, +- 0x24000102, 0x64000073, 0x44000104, 0x94000039, +- 0x24000102, 0x84000001, 0x64000109, 0xb400001a, +- 0x24000102, 0x6400006f, 0x44000103, 0x9400000b, +- 0x24000102, 0x74000074, 0x54000105, 0xa400007e, +- 0x24000102, 0x7400002c, 0x44000104, 0x9400004f, +- 0x24000102, 0x84000032, 0x64000000, 0xb40000c6, +- 0x24000102, 0x64000069, 0x44000103, 0xa4000113, +- 0x24000102, 0x7400006e, 0x54000020, 0xa400004a, ++ 0x24000102, 0xc8010404, 0x6400010a, 0xb4000089, ++ 0x24000102, 0x88010272, 0x44000103, 0x94000010, ++ 0x24000102, 0x98010275, 0x54000105, 0xa40000b0, ++ 0x24000102, 0x98010263, 0x44000104, 0x94000054, ++ 0x24000102, 0xc8010442, 0x88010261, 0xb40000d9, ++ 0x24000102, 0xc8010665, 0x44000103, 0xc80104ff, ++ 0x24000102, 0x9801026c, 0x98010320, 0xa4000012, ++ 0x24000102, 0xc8010673, 0x44000104, 0x94000039, ++ 0x24000102, 0xc8010401, 0x64000109, 0xb400001a, ++ 0x24000102, 0xc801066f, 0x44000103, 0x9400000b, ++ 0x24000102, 0x98010274, 0x54000105, 0xa400007e, ++ 0x24000102, 0x9801022c, 0x44000104, 0x9400004f, ++ 0x24000102, 0xc8010432, 0xc8010600, 0xb40000c6, ++ 0x24000102, 0xc8010669, 0x44000103, 0xa4000113, ++ 0x24000102, 0x9801026e, 0x54000020, 0xa400004a, + 0x24000102, 0x64000106, 0x44000104, 0x94000045, +- 0x24000102, 0x8400002d, 0x6400010a, 0xb400009c, +- 0x24000102, 0x64000072, 0x44000103, 0x94000028, ++ 0x24000102, 0xc801042d, 0x6400010a, 0xb400009c, ++ 0x24000102, 0xc8010672, 0x44000103, 0x94000028, + 0x24000102, 0x74000107, 0x54000105, 0xa40000e7, +- 0x24000102, 0x74000064, 0x44000104, 0x94000080, +- 0x24000102, 0x84000066, 0x64000061, 0xb40000ed, +- 0x24000102, 0x64000065, 0x44000103, 0x84000079, +- 0x24000102, 0x74000068, 0x54000020, 0x940000fe, +- 0x24000102, 0x64000073, 0x44000104, 0x94000037, ++ 0x24000102, 0x98010264, 0x44000104, 0x94000080, ++ 0x24000102, 0xc8010466, 0xc8010661, 0xb40000ed, ++ 0x24000102, 0xa8010465, 0x44000103, 0xc8010479, ++ 0x24000102, 0x74000068, 0x78010220, 0x940000fe, ++ 0x24000102, 0xa8010473, 0x44000104, 0x94000037, + 0x24000102, 0x74000108, 0x64000109, 0x36000014, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000008, ++ 0x24000102, 0xa801046f, 0x44000103, 0x94000008, + 0x24000102, 0x74000070, 0x54000105, 0xa400006a, + 0x24000102, 0x7400000a, 0x44000104, 0x9400004d, +- 0x24000102, 0x84000031, 0x64000000, 0xb40000b9, +- 0x24000102, 0x64000069, 0x44000103, 0x9400010f, +- 0x24000102, 0x7400006d, 0x54000020, 0xa400002b, ++ 0x24000102, 0xc8010431, 0xa8010400, 0xb40000b9, ++ 0x24000102, 0xa8010469, 0x44000103, 0x9400010f, ++ 0x24000102, 0x7400006d, 0xc8007420, 0xa400002b, + 0x24000102, 0x64000106, 0x44000104, 0x94000043, +- 0x24000102, 0x8400000d, 0x6400010a, 0xb4000094, +- 0x24000102, 0x64000072, 0x44000103, 0x94000022, ++ 0x24000102, 0xc801040d, 0x6400010a, 0xb4000094, ++ 0x24000102, 0xa8010472, 0x44000103, 0x94000022, + 0x24000102, 0x74000075, 0x54000105, 0xa40000c7, + 0x24000102, 0x74000063, 0x44000104, 0x94000076, +- 0x24000102, 0x84000062, 0x64000061, 0xb40000e2, ++ 0x24000102, 0xc8010462, 0xa8010461, 0xb40000e2, + 0x24000102, 0x64000065, 0x44000103, 0x9400010b, +- 0x24000102, 0x7400006c, 0x54000020, 0xa400001c, ++ 0x24000102, 0x7400006c, 0xa8010520, 0xa400001c, + 0x24000102, 0x64000073, 0x44000104, 0x9400003b, +- 0x24000102, 0x84000003, 0x64000109, 0xb4000081, ++ 0x24000102, 0xc8010403, 0x64000109, 0xb4000081, + 0x24000102, 0x6400006f, 0x44000103, 0x9400000e, + 0x24000102, 0x74000074, 0x54000105, 0xa4000090, + 0x24000102, 0x7400002c, 0x44000104, 0x94000052, +- 0x24000102, 0x84000041, 0x64000000, 0xb40000d1, ++ 0x24000102, 0xc8010441, 0x64000000, 0xb40000d1, + 0x24000102, 0x64000069, 0x44000103, 0x94000002, + 0x24000102, 0x7400006e, 0x54000020, 0xa4000056, + 0x24000102, 0x64000106, 0x44000104, 0x94000047, +- 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a7, ++ 0x24000102, 0xc801042e, 0x6400010a, 0xb40000a7, + 0x24000102, 0x64000072, 0x44000103, 0x94000033, + 0x24000102, 0x74000107, 0x54000105, 0xa40000f8, + 0x24000102, 0x74000064, 0x44000104, 0x940000c2, +- 0x24000102, 0x84000067, 0x64000061, 0xb40000fa, +- 0x24000102, 0x64000065, 0x44000103, 0x84000077, +- 0x24000102, 0x74000068, 0x54000020, 0x940000f0, +- 0x24000102, 0x64000073, 0x44000104, 0x94000036, ++ 0x24000102, 0xc8010467, 0x64000061, 0xb40000fa, ++ 0x24000102, 0x88010265, 0x44000103, 0x84000077, ++ 0x24000102, 0xc8010568, 0x78010220, 0x940000f0, ++ 0x24000102, 0x88010273, 0x44000104, 0x94000036, + 0x24000102, 0x74000108, 0x64000109, 0x36000004, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000007, +- 0x24000102, 0x74000070, 0x54000105, 0xa400005f, +- 0x24000102, 0x7400000a, 0x44000104, 0x9400004c, +- 0x24000102, 0x84000030, 0x64000000, 0xb40000b5, +- 0x24000102, 0x64000069, 0x44000103, 0x9400010e, +- 0x24000102, 0x7400006d, 0x54000020, 0xa4000026, ++ 0x24000102, 0x8801026f, 0x44000103, 0x94000007, ++ 0x24000102, 0xc8010570, 0x54000105, 0xa400005f, ++ 0x24000102, 0xc801050a, 0x44000104, 0x9400004c, ++ 0x24000102, 0x84000030, 0x88010200, 0xb40000b5, ++ 0x24000102, 0x88010269, 0x44000103, 0x9400010e, ++ 0x24000102, 0xc801056d, 0xc8002c20, 0xa4000026, + 0x24000102, 0x64000106, 0x44000104, 0x94000040, + 0x24000102, 0x84000004, 0x6400010a, 0xb400008e, +- 0x24000102, 0x64000072, 0x44000103, 0x9400001f, +- 0x24000102, 0x74000075, 0x54000105, 0xa40000bd, +- 0x24000102, 0x74000063, 0x44000104, 0x9400006b, +- 0x24000102, 0x84000042, 0x64000061, 0xb40000dd, ++ 0x24000102, 0x88010272, 0x44000103, 0x9400001f, ++ 0x24000102, 0xc8010575, 0x54000105, 0xa40000bd, ++ 0x24000102, 0xc8010563, 0x44000104, 0x9400006b, ++ 0x24000102, 0x84000042, 0x88010261, 0xb40000dd, + 0x24000102, 0x64000065, 0x44000103, 0x840000ff, +- 0x24000102, 0x7400006c, 0x54000020, 0xa4000014, ++ 0x24000102, 0xc801056c, 0x98010420, 0xa4000014, + 0x24000102, 0x64000073, 0x44000104, 0x9400003a, + 0x24000102, 0x84000001, 0x64000109, 0xb400005d, + 0x24000102, 0x6400006f, 0x44000103, 0x9400000c, +- 0x24000102, 0x74000074, 0x54000105, 0xa400008b, +- 0x24000102, 0x7400002c, 0x44000104, 0x94000050, ++ 0x24000102, 0xc8010574, 0x54000105, 0xa400008b, ++ 0x24000102, 0xc801052c, 0x44000104, 0x94000050, + 0x24000102, 0x84000032, 0x64000000, 0xb40000cb, + 0x24000102, 0x64000069, 0x44000103, 0xa4000114, +- 0x24000102, 0x7400006e, 0x54000020, 0xa4000051, ++ 0x24000102, 0xc801056e, 0x54000020, 0xa4000051, + 0x24000102, 0x64000106, 0x44000104, 0x94000046, + 0x24000102, 0x8400002d, 0x6400010a, 0xb40000a1, + 0x24000102, 0x64000072, 0x44000103, 0x94000029, + 0x24000102, 0x74000107, 0x54000105, 0xa40000f1, +- 0x24000102, 0x74000064, 0x44000104, 0x940000c0, ++ 0x24000102, 0xc8010564, 0x44000104, 0x940000c0, + 0x24000102, 0x84000066, 0x64000061, 0xb40000f4, +- 0x24000102, 0x64000065, 0x44000103, 0x84000079, +- 0x24000102, 0x74000068, 0x54000020, 0xb4000116, +- 0x24000102, 0x64000073, 0x44000104, 0x94000038, ++ 0x24000102, 0xc8010a65, 0x44000103, 0x84000079, ++ 0x24000102, 0x74000068, 0x78010220, 0xb4000116, ++ 0x24000102, 0xc8010a73, 0x44000104, 0x94000038, + 0x24000102, 0x74000108, 0x64000109, 0xb4000015, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000009, ++ 0x24000102, 0xc8010a6f, 0x44000103, 0x94000009, + 0x24000102, 0x74000070, 0x54000105, 0xa400007a, + 0x24000102, 0x7400000a, 0x44000104, 0x9400004e, +- 0x24000102, 0x84000031, 0x64000000, 0xb40000be, +- 0x24000102, 0x64000069, 0x44000103, 0x94000110, ++ 0x24000102, 0x84000031, 0xc8010a00, 0xb40000be, ++ 0x24000102, 0xc8010a69, 0x44000103, 0x94000110, + 0x24000102, 0x7400006d, 0x54000020, 0xa400003c, + 0x24000102, 0x64000106, 0x44000104, 0x94000044, + 0x24000102, 0x8400000d, 0x6400010a, 0xb4000098, +- 0x24000102, 0x64000072, 0x44000103, 0x94000027, ++ 0x24000102, 0xc8010a72, 0x44000103, 0x94000027, + 0x24000102, 0x74000075, 0x54000105, 0xa40000d0, + 0x24000102, 0x74000063, 0x44000104, 0x94000078, +- 0x24000102, 0x84000062, 0x64000061, 0xb40000e9, ++ 0x24000102, 0x84000062, 0xc8010a61, 0xb40000e9, + 0x24000102, 0x64000065, 0x44000103, 0x9400010c, +- 0x24000102, 0x7400006c, 0x54000020, 0xa4000021, ++ 0x24000102, 0x7400006c, 0xb8000020, 0xa4000021, + 0x24000102, 0x64000073, 0x44000104, 0x9400003e, + 0x24000102, 0x84000003, 0x64000109, 0xb4000085, + 0x24000102, 0x6400006f, 0x44000103, 0x9400000f, +@@ -2114,120 +2114,120 @@ struct inflate_huff_code_large pregen_lit_huff_code = { + 0x24000102, 0x74000107, 0x54000105, 0xa40000fc, + 0x24000102, 0x74000064, 0x44000104, 0x940000c3, + 0x24000102, 0x84000067, 0x64000061, 0x46000160, +- 0x24000102, 0x64000065, 0x44000103, 0x84000077, +- 0x24000102, 0x74000068, 0x54000020, 0x940000e0, +- 0x24000102, 0x64000073, 0x44000104, 0x94000035, ++ 0x24000102, 0x88010265, 0x44000103, 0xa8010277, ++ 0x24000102, 0x98010268, 0x78010220, 0x940000e0, ++ 0x24000102, 0x88010273, 0x44000104, 0x94000035, + 0x24000102, 0x74000108, 0x64000109, 0xa40000fd, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000006, +- 0x24000102, 0x74000070, 0x54000105, 0xa4000059, +- 0x24000102, 0x7400000a, 0x44000104, 0x94000049, +- 0x24000102, 0x84000030, 0x64000000, 0xb40000ae, +- 0x24000102, 0x64000069, 0x44000103, 0x9400010d, +- 0x24000102, 0x7400006d, 0x54000020, 0xa4000023, ++ 0x24000102, 0x8801026f, 0x44000103, 0x94000006, ++ 0x24000102, 0x98010270, 0x54000105, 0xa4000059, ++ 0x24000102, 0x9801020a, 0x44000104, 0x94000049, ++ 0x24000102, 0xa8010230, 0x88010200, 0xb40000ae, ++ 0x24000102, 0x88010269, 0x44000103, 0x9400010d, ++ 0x24000102, 0x9801026d, 0xb8006920, 0xa4000023, + 0x24000102, 0x64000106, 0x44000104, 0x9400003f, +- 0x24000102, 0x84000004, 0x6400010a, 0xb4000088, +- 0x24000102, 0x64000072, 0x44000103, 0x94000010, +- 0x24000102, 0x74000075, 0x54000105, 0xa40000a6, +- 0x24000102, 0x74000063, 0x44000104, 0x94000054, +- 0x24000102, 0x84000042, 0x64000061, 0xb40000d8, +- 0x24000102, 0x64000065, 0x44000103, 0x840000ff, +- 0x24000102, 0x7400006c, 0x54000020, 0xa4000011, +- 0x24000102, 0x64000073, 0x44000104, 0x94000039, +- 0x24000102, 0x84000001, 0x64000109, 0xb4000019, +- 0x24000102, 0x6400006f, 0x44000103, 0x9400000b, +- 0x24000102, 0x74000074, 0x54000105, 0xa400007c, +- 0x24000102, 0x7400002c, 0x44000104, 0x9400004f, +- 0x24000102, 0x84000032, 0x64000000, 0xb40000c5, +- 0x24000102, 0x64000069, 0x44000103, 0xa4000111, +- 0x24000102, 0x7400006e, 0x54000020, 0xa400003d, ++ 0x24000102, 0xa8010204, 0x6400010a, 0xb4000088, ++ 0x24000102, 0x88010272, 0x44000103, 0x94000010, ++ 0x24000102, 0x98010275, 0x54000105, 0xa40000a6, ++ 0x24000102, 0x98010263, 0x44000104, 0x94000054, ++ 0x24000102, 0xa8010242, 0x88010261, 0xb40000d8, ++ 0x24000102, 0xc8007265, 0x44000103, 0xa80102ff, ++ 0x24000102, 0x9801026c, 0x98010320, 0xa4000011, ++ 0x24000102, 0xc8007273, 0x44000104, 0x94000039, ++ 0x24000102, 0xa8010201, 0x64000109, 0xb4000019, ++ 0x24000102, 0xc800726f, 0x44000103, 0x9400000b, ++ 0x24000102, 0x98010274, 0x54000105, 0xa400007c, ++ 0x24000102, 0x9801022c, 0x44000104, 0x9400004f, ++ 0x24000102, 0xa8010232, 0xc8007200, 0xb40000c5, ++ 0x24000102, 0xc8007269, 0x44000103, 0xa4000111, ++ 0x24000102, 0x9801026e, 0x54000020, 0xa400003d, + 0x24000102, 0x64000106, 0x44000104, 0x94000045, +- 0x24000102, 0x8400002d, 0x6400010a, 0xb400009b, +- 0x24000102, 0x64000072, 0x44000103, 0x94000028, ++ 0x24000102, 0xa801022d, 0x6400010a, 0xb400009b, ++ 0x24000102, 0xc8007272, 0x44000103, 0x94000028, + 0x24000102, 0x74000107, 0x54000105, 0xa40000e3, +- 0x24000102, 0x74000064, 0x44000104, 0x94000080, +- 0x24000102, 0x84000066, 0x64000061, 0xb40000ec, +- 0x24000102, 0x64000065, 0x44000103, 0x84000079, +- 0x24000102, 0x74000068, 0x54000020, 0x940000fe, +- 0x24000102, 0x64000073, 0x44000104, 0x94000037, ++ 0x24000102, 0x98010264, 0x44000104, 0x94000080, ++ 0x24000102, 0xa8010266, 0xc8007261, 0xb40000ec, ++ 0x24000102, 0xa8010365, 0x44000103, 0xa8010279, ++ 0x24000102, 0x74000068, 0x78010220, 0x940000fe, ++ 0x24000102, 0xa8010373, 0x44000104, 0x94000037, + 0x24000102, 0x74000108, 0x64000109, 0x3600000e, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000008, ++ 0x24000102, 0xa801036f, 0x44000103, 0x94000008, + 0x24000102, 0x74000070, 0x54000105, 0xa4000060, + 0x24000102, 0x7400000a, 0x44000104, 0x9400004d, +- 0x24000102, 0x84000031, 0x64000000, 0xb40000b8, +- 0x24000102, 0x64000069, 0x44000103, 0x9400010f, +- 0x24000102, 0x7400006d, 0x54000020, 0xa400002a, ++ 0x24000102, 0xa8010231, 0xa8010300, 0xb40000b8, ++ 0x24000102, 0xa8010369, 0x44000103, 0x9400010f, ++ 0x24000102, 0x7400006d, 0xc8006e20, 0xa400002a, + 0x24000102, 0x64000106, 0x44000104, 0x94000043, +- 0x24000102, 0x8400000d, 0x6400010a, 0xb4000093, +- 0x24000102, 0x64000072, 0x44000103, 0x94000022, ++ 0x24000102, 0xa801020d, 0x6400010a, 0xb4000093, ++ 0x24000102, 0xa8010372, 0x44000103, 0x94000022, + 0x24000102, 0x74000075, 0x54000105, 0xa40000c1, + 0x24000102, 0x74000063, 0x44000104, 0x94000076, +- 0x24000102, 0x84000062, 0x64000061, 0xb40000e1, ++ 0x24000102, 0xa8010262, 0xa8010361, 0xb40000e1, + 0x24000102, 0x64000065, 0x44000103, 0x9400010b, +- 0x24000102, 0x7400006c, 0x54000020, 0xa4000018, ++ 0x24000102, 0x7400006c, 0xa8002020, 0xa4000018, + 0x24000102, 0x64000073, 0x44000104, 0x9400003b, +- 0x24000102, 0x84000003, 0x64000109, 0xb400007d, ++ 0x24000102, 0xa8010203, 0x64000109, 0xb400007d, + 0x24000102, 0x6400006f, 0x44000103, 0x9400000e, + 0x24000102, 0x74000074, 0x54000105, 0xa400008f, + 0x24000102, 0x7400002c, 0x44000104, 0x94000052, +- 0x24000102, 0x84000041, 0x64000000, 0xb40000ce, ++ 0x24000102, 0xa8010241, 0x64000000, 0xb40000ce, + 0x24000102, 0x64000069, 0x44000103, 0x94000002, + 0x24000102, 0x7400006e, 0x54000020, 0xa4000055, + 0x24000102, 0x64000106, 0x44000104, 0x94000047, +- 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a5, ++ 0x24000102, 0xa801022e, 0x6400010a, 0xb40000a5, + 0x24000102, 0x64000072, 0x44000103, 0x94000033, + 0x24000102, 0x74000107, 0x54000105, 0xa40000f3, + 0x24000102, 0x74000064, 0x44000104, 0x940000c2, +- 0x24000102, 0x84000067, 0x64000061, 0xb40000f7, +- 0x24000102, 0x64000065, 0x44000103, 0x84000077, +- 0x24000102, 0x74000068, 0x54000020, 0x940000f0, +- 0x24000102, 0x64000073, 0x44000104, 0x94000036, ++ 0x24000102, 0xa8010267, 0x64000061, 0xb40000f7, ++ 0x24000102, 0x88010265, 0x44000103, 0x84000077, ++ 0x24000102, 0xb8010468, 0x78010220, 0x940000f0, ++ 0x24000102, 0x88010273, 0x44000104, 0x94000036, + 0x24000102, 0x74000108, 0x64000109, 0xc4000120, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000007, +- 0x24000102, 0x74000070, 0x54000105, 0xa400005c, +- 0x24000102, 0x7400000a, 0x44000104, 0x9400004c, +- 0x24000102, 0x84000030, 0x64000000, 0xb40000b3, +- 0x24000102, 0x64000069, 0x44000103, 0x9400010e, +- 0x24000102, 0x7400006d, 0x54000020, 0xa4000025, ++ 0x24000102, 0x8801026f, 0x44000103, 0x94000007, ++ 0x24000102, 0xb8010470, 0x54000105, 0xa400005c, ++ 0x24000102, 0xb801040a, 0x44000104, 0x9400004c, ++ 0x24000102, 0x84000030, 0x88010200, 0xb40000b3, ++ 0x24000102, 0x88010269, 0x44000103, 0x9400010e, ++ 0x24000102, 0xb801046d, 0xb8010620, 0xa4000025, + 0x24000102, 0x64000106, 0x44000104, 0x94000040, + 0x24000102, 0x84000004, 0x6400010a, 0xb400008d, +- 0x24000102, 0x64000072, 0x44000103, 0x9400001f, +- 0x24000102, 0x74000075, 0x54000105, 0xa40000b4, +- 0x24000102, 0x74000063, 0x44000104, 0x9400006b, +- 0x24000102, 0x84000042, 0x64000061, 0xb40000dc, ++ 0x24000102, 0x88010272, 0x44000103, 0x9400001f, ++ 0x24000102, 0xb8010475, 0x54000105, 0xa40000b4, ++ 0x24000102, 0xb8010463, 0x44000104, 0x9400006b, ++ 0x24000102, 0x84000042, 0x88010261, 0xb40000dc, + 0x24000102, 0x64000065, 0x44000103, 0x840000ff, +- 0x24000102, 0x7400006c, 0x54000020, 0xa4000013, ++ 0x24000102, 0xb801046c, 0x98010420, 0xa4000013, + 0x24000102, 0x64000073, 0x44000104, 0x9400003a, + 0x24000102, 0x84000001, 0x64000109, 0xb400005a, + 0x24000102, 0x6400006f, 0x44000103, 0x9400000c, +- 0x24000102, 0x74000074, 0x54000105, 0xa400007f, +- 0x24000102, 0x7400002c, 0x44000104, 0x94000050, ++ 0x24000102, 0xb8010474, 0x54000105, 0xa400007f, ++ 0x24000102, 0xb801042c, 0x44000104, 0x94000050, + 0x24000102, 0x84000032, 0x64000000, 0xb40000ca, + 0x24000102, 0x64000069, 0x44000103, 0xa4000112, +- 0x24000102, 0x7400006e, 0x54000020, 0xa400004b, ++ 0x24000102, 0xb801046e, 0x54000020, 0xa400004b, + 0x24000102, 0x64000106, 0x44000104, 0x94000046, + 0x24000102, 0x8400002d, 0x6400010a, 0xb400009f, + 0x24000102, 0x64000072, 0x44000103, 0x94000029, + 0x24000102, 0x74000107, 0x54000105, 0xa40000e8, +- 0x24000102, 0x74000064, 0x44000104, 0x940000c0, ++ 0x24000102, 0xb8010464, 0x44000104, 0x940000c0, + 0x24000102, 0x84000066, 0x64000061, 0xb40000f2, +- 0x24000102, 0x64000065, 0x44000103, 0x84000079, +- 0x24000102, 0x74000068, 0x54000020, 0xb4000117, +- 0x24000102, 0x64000073, 0x44000104, 0x94000038, ++ 0x24000102, 0xb8010565, 0x44000103, 0x84000079, ++ 0x24000102, 0x74000068, 0x78010220, 0xb4000117, ++ 0x24000102, 0xb8010573, 0x44000104, 0x94000038, + 0x24000102, 0x74000108, 0x64000109, 0x3600001e, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000009, ++ 0x24000102, 0xb801056f, 0x44000103, 0x94000009, + 0x24000102, 0x74000070, 0x54000105, 0xa4000071, + 0x24000102, 0x7400000a, 0x44000104, 0x9400004e, +- 0x24000102, 0x84000031, 0x64000000, 0xb40000bc, +- 0x24000102, 0x64000069, 0x44000103, 0x94000110, ++ 0x24000102, 0x84000031, 0xb8010500, 0xb40000bc, ++ 0x24000102, 0xb8010569, 0x44000103, 0x94000110, + 0x24000102, 0x7400006d, 0x54000020, 0xa400002f, + 0x24000102, 0x64000106, 0x44000104, 0x94000044, + 0x24000102, 0x8400000d, 0x6400010a, 0xb4000097, +- 0x24000102, 0x64000072, 0x44000103, 0x94000027, ++ 0x24000102, 0xb8010572, 0x44000103, 0x94000027, + 0x24000102, 0x74000075, 0x54000105, 0xa40000cf, + 0x24000102, 0x74000063, 0x44000104, 0x94000078, +- 0x24000102, 0x84000062, 0x64000061, 0xb40000e6, ++ 0x24000102, 0x84000062, 0xb8010561, 0xb40000e6, + 0x24000102, 0x64000065, 0x44000103, 0x9400010c, +- 0x24000102, 0x7400006c, 0x54000020, 0xa400001e, ++ 0x24000102, 0x7400006c, 0xb8010a20, 0xa400001e, + 0x24000102, 0x64000073, 0x44000104, 0x9400003e, + 0x24000102, 0x84000003, 0x64000109, 0xb4000084, + 0x24000102, 0x6400006f, 0x44000103, 0x9400000f, +@@ -2242,56 +2242,56 @@ struct inflate_huff_code_large pregen_lit_huff_code = { + 0x24000102, 0x74000107, 0x54000105, 0xa40000f9, + 0x24000102, 0x74000064, 0x44000104, 0x940000c3, + 0x24000102, 0x84000067, 0x64000061, 0xb4000200, +- 0x24000102, 0x64000065, 0x44000103, 0x84000077, +- 0x24000102, 0x74000068, 0x54000020, 0x940000e0, +- 0x24000102, 0x64000073, 0x44000104, 0x94000035, ++ 0x24000102, 0x88010265, 0x44000103, 0x84000077, ++ 0x24000102, 0x98010268, 0x78010220, 0x940000e0, ++ 0x24000102, 0x88010273, 0x44000104, 0x94000035, + 0x24000102, 0x74000108, 0x64000109, 0xc400011c, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000006, +- 0x24000102, 0x74000070, 0x54000105, 0xa400005b, +- 0x24000102, 0x7400000a, 0x44000104, 0x94000049, +- 0x24000102, 0x84000030, 0x64000000, 0xb40000b1, +- 0x24000102, 0x64000069, 0x44000103, 0x9400010d, +- 0x24000102, 0x7400006d, 0x54000020, 0xa4000024, ++ 0x24000102, 0x8801026f, 0x44000103, 0x94000006, ++ 0x24000102, 0x98010270, 0x54000105, 0xa400005b, ++ 0x24000102, 0x9801020a, 0x44000104, 0x94000049, ++ 0x24000102, 0x84000030, 0x88010200, 0xb40000b1, ++ 0x24000102, 0x88010269, 0x44000103, 0x9400010d, ++ 0x24000102, 0x9801026d, 0xb8007220, 0xa4000024, + 0x24000102, 0x64000106, 0x44000104, 0x9400003f, + 0x24000102, 0x84000004, 0x6400010a, 0xb400008a, +- 0x24000102, 0x64000072, 0x44000103, 0x94000010, +- 0x24000102, 0x74000075, 0x54000105, 0xa40000b0, +- 0x24000102, 0x74000063, 0x44000104, 0x94000054, +- 0x24000102, 0x84000042, 0x64000061, 0xb40000da, ++ 0x24000102, 0x88010272, 0x44000103, 0x94000010, ++ 0x24000102, 0x98010275, 0x54000105, 0xa40000b0, ++ 0x24000102, 0x98010263, 0x44000104, 0x94000054, ++ 0x24000102, 0x84000042, 0x88010261, 0xb40000da, + 0x24000102, 0x64000065, 0x44000103, 0x840000ff, +- 0x24000102, 0x7400006c, 0x54000020, 0xa4000012, ++ 0x24000102, 0x9801026c, 0x98010320, 0xa4000012, + 0x24000102, 0x64000073, 0x44000104, 0x94000039, + 0x24000102, 0x84000001, 0x64000109, 0xb400001b, + 0x24000102, 0x6400006f, 0x44000103, 0x9400000b, +- 0x24000102, 0x74000074, 0x54000105, 0xa400007e, +- 0x24000102, 0x7400002c, 0x44000104, 0x9400004f, ++ 0x24000102, 0x98010274, 0x54000105, 0xa400007e, ++ 0x24000102, 0x9801022c, 0x44000104, 0x9400004f, + 0x24000102, 0x84000032, 0x64000000, 0xb40000c8, + 0x24000102, 0x64000069, 0x44000103, 0xa4000113, +- 0x24000102, 0x7400006e, 0x54000020, 0xa400004a, ++ 0x24000102, 0x9801026e, 0x54000020, 0xa400004a, + 0x24000102, 0x64000106, 0x44000104, 0x94000045, + 0x24000102, 0x8400002d, 0x6400010a, 0xb400009d, + 0x24000102, 0x64000072, 0x44000103, 0x94000028, + 0x24000102, 0x74000107, 0x54000105, 0xa40000e7, +- 0x24000102, 0x74000064, 0x44000104, 0x94000080, ++ 0x24000102, 0x98010264, 0x44000104, 0x94000080, + 0x24000102, 0x84000066, 0x64000061, 0xb40000ee, +- 0x24000102, 0x64000065, 0x44000103, 0x84000079, +- 0x24000102, 0x74000068, 0x54000020, 0x940000fe, +- 0x24000102, 0x64000073, 0x44000104, 0x94000037, ++ 0x24000102, 0xa8010465, 0x44000103, 0x84000079, ++ 0x24000102, 0x74000068, 0x78010220, 0x940000fe, ++ 0x24000102, 0xa8010473, 0x44000104, 0x94000037, + 0x24000102, 0x74000108, 0x64000109, 0x36000016, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000008, ++ 0x24000102, 0xa801046f, 0x44000103, 0x94000008, + 0x24000102, 0x74000070, 0x54000105, 0xa400006a, + 0x24000102, 0x7400000a, 0x44000104, 0x9400004d, +- 0x24000102, 0x84000031, 0x64000000, 0xb40000ba, +- 0x24000102, 0x64000069, 0x44000103, 0x9400010f, +- 0x24000102, 0x7400006d, 0x54000020, 0xa400002b, ++ 0x24000102, 0x84000031, 0xa8010400, 0xb40000ba, ++ 0x24000102, 0xa8010469, 0x44000103, 0x9400010f, ++ 0x24000102, 0x7400006d, 0xc8010720, 0xa400002b, + 0x24000102, 0x64000106, 0x44000104, 0x94000043, + 0x24000102, 0x8400000d, 0x6400010a, 0xb4000095, +- 0x24000102, 0x64000072, 0x44000103, 0x94000022, ++ 0x24000102, 0xa8010472, 0x44000103, 0x94000022, + 0x24000102, 0x74000075, 0x54000105, 0xa40000c7, + 0x24000102, 0x74000063, 0x44000104, 0x94000076, +- 0x24000102, 0x84000062, 0x64000061, 0xb40000e4, ++ 0x24000102, 0x84000062, 0xa8010461, 0xb40000e4, + 0x24000102, 0x64000065, 0x44000103, 0x9400010b, +- 0x24000102, 0x7400006c, 0x54000020, 0xa400001c, ++ 0x24000102, 0x7400006c, 0xa8010520, 0xa400001c, + 0x24000102, 0x64000073, 0x44000104, 0x9400003b, + 0x24000102, 0x84000003, 0x64000109, 0xb4000082, + 0x24000102, 0x6400006f, 0x44000103, 0x9400000e, +@@ -2306,24 +2306,24 @@ struct inflate_huff_code_large pregen_lit_huff_code = { + 0x24000102, 0x74000107, 0x54000105, 0xa40000f8, + 0x24000102, 0x74000064, 0x44000104, 0x940000c2, + 0x24000102, 0x84000067, 0x64000061, 0xb40000fb, +- 0x24000102, 0x64000065, 0x44000103, 0x84000077, +- 0x24000102, 0x74000068, 0x54000020, 0x940000f0, +- 0x24000102, 0x64000073, 0x44000104, 0x94000036, ++ 0x24000102, 0x88010265, 0x44000103, 0x84000077, ++ 0x24000102, 0x74000068, 0x78010220, 0x940000f0, ++ 0x24000102, 0x88010273, 0x44000104, 0x94000036, + 0x24000102, 0x74000108, 0x64000109, 0x36000006, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000007, ++ 0x24000102, 0x8801026f, 0x44000103, 0x94000007, + 0x24000102, 0x74000070, 0x54000105, 0xa400005f, + 0x24000102, 0x7400000a, 0x44000104, 0x9400004c, +- 0x24000102, 0x84000030, 0x64000000, 0xb40000b6, +- 0x24000102, 0x64000069, 0x44000103, 0x9400010e, +- 0x24000102, 0x7400006d, 0x54000020, 0xa4000026, ++ 0x24000102, 0x84000030, 0x88010200, 0xb40000b6, ++ 0x24000102, 0x88010269, 0x44000103, 0x9400010e, ++ 0x24000102, 0x7400006d, 0xc8006420, 0xa4000026, + 0x24000102, 0x64000106, 0x44000104, 0x94000040, + 0x24000102, 0x84000004, 0x6400010a, 0xb4000091, +- 0x24000102, 0x64000072, 0x44000103, 0x9400001f, ++ 0x24000102, 0x88010272, 0x44000103, 0x9400001f, + 0x24000102, 0x74000075, 0x54000105, 0xa40000bd, + 0x24000102, 0x74000063, 0x44000104, 0x9400006b, +- 0x24000102, 0x84000042, 0x64000061, 0xb40000de, ++ 0x24000102, 0x84000042, 0x88010261, 0xb40000de, + 0x24000102, 0x64000065, 0x44000103, 0x840000ff, +- 0x24000102, 0x7400006c, 0x54000020, 0xa4000014, ++ 0x24000102, 0x7400006c, 0x98010420, 0xa4000014, + 0x24000102, 0x64000073, 0x44000104, 0x9400003a, + 0x24000102, 0x84000001, 0x64000109, 0xb400005e, + 0x24000102, 0x6400006f, 0x44000103, 0x9400000c, +@@ -2338,24 +2338,24 @@ struct inflate_huff_code_large pregen_lit_huff_code = { + 0x24000102, 0x74000107, 0x54000105, 0xa40000f1, + 0x24000102, 0x74000064, 0x44000104, 0x940000c0, + 0x24000102, 0x84000066, 0x64000061, 0xb40000f5, +- 0x24000102, 0x64000065, 0x44000103, 0x84000079, +- 0x24000102, 0x74000068, 0x54000020, 0xb4000118, +- 0x24000102, 0x64000073, 0x44000104, 0x94000038, ++ 0x24000102, 0xc8006165, 0x44000103, 0x84000079, ++ 0x24000102, 0x74000068, 0x78010220, 0xb4000118, ++ 0x24000102, 0xc8006173, 0x44000104, 0x94000038, + 0x24000102, 0x74000108, 0x64000109, 0xb4000016, +- 0x24000102, 0x6400006f, 0x44000103, 0x94000009, ++ 0x24000102, 0xc800616f, 0x44000103, 0x94000009, + 0x24000102, 0x74000070, 0x54000105, 0xa400007a, + 0x24000102, 0x7400000a, 0x44000104, 0x9400004e, +- 0x24000102, 0x84000031, 0x64000000, 0xb40000bf, +- 0x24000102, 0x64000069, 0x44000103, 0x94000110, ++ 0x24000102, 0x84000031, 0xc8006100, 0xb40000bf, ++ 0x24000102, 0xc8006169, 0x44000103, 0x94000110, + 0x24000102, 0x7400006d, 0x54000020, 0xa400003c, + 0x24000102, 0x64000106, 0x44000104, 0x94000044, + 0x24000102, 0x8400000d, 0x6400010a, 0xb4000099, +- 0x24000102, 0x64000072, 0x44000103, 0x94000027, ++ 0x24000102, 0xc8006172, 0x44000103, 0x94000027, + 0x24000102, 0x74000075, 0x54000105, 0xa40000d0, + 0x24000102, 0x74000063, 0x44000104, 0x94000078, +- 0x24000102, 0x84000062, 0x64000061, 0xb40000ea, ++ 0x24000102, 0x84000062, 0xc8006161, 0xb40000ea, + 0x24000102, 0x64000065, 0x44000103, 0x9400010c, +- 0x24000102, 0x7400006c, 0x54000020, 0xa4000021, ++ 0x24000102, 0x7400006c, 0xb8006120, 0xa4000021, + 0x24000102, 0x64000073, 0x44000104, 0x9400003e, + 0x24000102, 0x84000003, 0x64000109, 0xb4000086, + 0x24000102, 0x6400006f, 0x44000103, 0x9400000f, +-- +2.20.1.windows.1 + + +From 9968e7a032212aa66826414cac560e4a4b2a2cc5 Mon Sep 17 00:00:00 2001 +From: Greg Tucker +Date: Tue, 20 Oct 2020 09:55:47 -0700 +Subject: [PATCH 29/42] Change gen cust hufftables to accept dictionary + +Change-Id: I4eed03bdb91030b16b3ecfd8076adc890e4f59a2 +Signed-off-by: Greg Tucker +--- + igzip/generate_custom_hufftables.c | 160 +++++++++++++++++++++++++++-- + 1 file changed, 152 insertions(+), 8 deletions(-) + +diff --git a/src/isa-l/igzip/generate_custom_hufftables.c b/src/isa-l/igzip/generate_custom_hufftables.c +index 60df1b0..4815faf 100644 +--- a/src/isa-l/igzip/generate_custom_hufftables.c ++++ b/src/isa-l/igzip/generate_custom_hufftables.c +@@ -64,6 +64,9 @@ + #include + #include "igzip_lib.h" + ++#include "huff_codes.h" ++#include "huffman.h" ++ + /*These max code lengths are limited by how the data is stored in + * hufftables.asm. The deflate standard max is 15.*/ + +@@ -233,25 +236,159 @@ void fprint_header(FILE * output_file) + fprintf(output_file, "const uint32_t zlib_trl_bytes = %d;\n", ZLIB_TRAILER_SIZE); + } + ++static uint32_t convert_dist_to_dist_sym(uint32_t dist) ++{ ++ assert(dist <= 32768 && dist > 0); ++ if (dist <= 32768) { ++ uint32_t msb = dist > 4 ? bsr(dist - 1) - 2 : 0; ++ return (msb * 2) + ((dist - 1) >> msb); ++ } else { ++ return ~0; ++ } ++} ++ ++/** ++ * @brief Returns the deflate symbol value for a repeat length. ++ */ ++static uint32_t convert_length_to_len_sym(uint32_t length) ++{ ++ assert(length > 2 && length < 259); ++ ++ /* Based on tables on page 11 in RFC 1951 */ ++ if (length < 11) ++ return 257 + length - 3; ++ else if (length < 19) ++ return 261 + (length - 3) / 2; ++ else if (length < 35) ++ return 265 + (length - 3) / 4; ++ else if (length < 67) ++ return 269 + (length - 3) / 8; ++ else if (length < 131) ++ return 273 + (length - 3) / 16; ++ else if (length < 258) ++ return 277 + (length - 3) / 32; ++ else ++ return 285; ++} ++ ++void isal_update_histogram_dict(uint8_t * start_stream, int dict_length, int length, ++ struct isal_huff_histogram *histogram) ++{ ++ uint32_t literal = 0, hash; ++ uint16_t seen, *last_seen = histogram->hash_table; ++ uint8_t *current, *end_stream, *next_hash, *end, *end_dict; ++ uint32_t match_length; ++ uint32_t dist; ++ uint64_t *lit_len_histogram = histogram->lit_len_histogram; ++ uint64_t *dist_histogram = histogram->dist_histogram; ++ ++ if (length <= 0) ++ return; ++ ++ end_stream = start_stream + dict_length + length; ++ end_dict = start_stream + dict_length; ++ ++ memset(last_seen, 0, sizeof(histogram->hash_table)); /* Initialize last_seen to be 0. */ ++ ++ for (current = start_stream; current < end_dict - 4; current++) { ++ literal = load_u32(current); ++ hash = compute_hash(literal) & LVL0_HASH_MASK; ++ last_seen[hash] = (current - start_stream) & 0xFFFF; ++ } ++ ++ for (current = start_stream + dict_length; current < end_stream - 3; current++) { ++ literal = load_u32(current); ++ hash = compute_hash(literal) & LVL0_HASH_MASK; ++ seen = last_seen[hash]; ++ last_seen[hash] = (current - start_stream) & 0xFFFF; ++ dist = (current - start_stream - seen) & 0xFFFF; ++ if (dist - 1 < D - 1) { ++ assert(start_stream <= current - dist); ++ match_length = ++ compare258(current - dist, current, end_stream - current); ++ if (match_length >= SHORTEST_MATCH) { ++ next_hash = current; ++#ifdef ISAL_LIMIT_HASH_UPDATE ++ end = next_hash + 3; ++#else ++ end = next_hash + match_length; ++#endif ++ if (end > end_stream - 3) ++ end = end_stream - 3; ++ next_hash++; ++ for (; next_hash < end; next_hash++) { ++ literal = load_u32(next_hash); ++ hash = compute_hash(literal) & LVL0_HASH_MASK; ++ last_seen[hash] = (next_hash - start_stream) & 0xFFFF; ++ } ++ ++ dist_histogram[convert_dist_to_dist_sym(dist)] += 1; ++ lit_len_histogram[convert_length_to_len_sym(match_length)] += ++ 1; ++ current += match_length - 1; ++ continue; ++ } ++ } ++ lit_len_histogram[literal & 0xFF] += 1; ++ } ++ ++ for (; current < end_stream; current++) ++ lit_len_histogram[*current] += 1; ++ ++ lit_len_histogram[256] += 1; ++ return; ++} ++ + int main(int argc, char *argv[]) + { + long int file_length; ++ int argi = 1; + uint8_t *stream = NULL; + struct isal_hufftables hufftables; + struct isal_huff_histogram histogram; + struct isal_zstream tmp_stream; +- FILE *file; ++ FILE *file = NULL; ++ FILE *dict_file = NULL; ++ long int dict_file_length = 0; ++ uint8_t *dict_stream = NULL; + + if (argc == 1) { + printf("Error, no input file.\n"); + return 1; + } + ++ if (argc > 3 && argv[1][0] == '-' && argv[1][1] == 'd') { ++ dict_file = fopen(argv[2], "r"); ++ ++ fseek(dict_file, 0, SEEK_END); ++ dict_file_length = ftell(dict_file); ++ fseek(dict_file, 0, SEEK_SET); ++ dict_file_length -= ftell(dict_file); ++ dict_stream = malloc(dict_file_length); ++ if (dict_stream == NULL) { ++ printf("Failed to allocate memory to read in dictionary file\n"); ++ fclose(dict_file); ++ return 1; ++ } ++ if (fread(dict_stream, 1, dict_file_length, dict_file) != dict_file_length) { ++ printf("Error occurred when reading dictionary file"); ++ fclose(dict_file); ++ free(dict_stream); ++ return 1; ++ } ++ isal_update_histogram(dict_stream, dict_file_length, &histogram); ++ ++ printf("Read %ld bytes of dictionary file %s\n", dict_file_length, argv[2]); ++ argi += 2; ++ fclose(dict_file); ++ free(dict_stream); ++ } ++ + memset(&histogram, 0, sizeof(histogram)); /* Initialize histograms. */ + +- while (argc > 1) { +- printf("Processing %s\n", argv[argc - 1]); +- file = fopen(argv[argc - 1], "r"); ++ while (argi < argc) { ++ printf("Processing %s\n", argv[argi]); ++ file = fopen(argv[argi], "r"); + if (file == NULL) { + printf("Error opening file\n"); + return 1; +@@ -260,13 +397,16 @@ int main(int argc, char *argv[]) + file_length = ftell(file); + fseek(file, 0, SEEK_SET); + file_length -= ftell(file); +- stream = malloc(file_length); ++ stream = malloc(file_length + dict_file_length); + if (stream == NULL) { + printf("Failed to allocate memory to read in file\n"); + fclose(file); + return 1; + } +- if (fread(stream, 1, file_length, file) != file_length) { ++ if (dict_file_length > 0) ++ memcpy(stream, dict_stream, dict_file_length); ++ ++ if (fread(&stream[dict_file_length], 1, file_length, file) != file_length) { + printf("Error occurred when reading file"); + fclose(file); + free(stream); +@@ -275,11 +415,15 @@ int main(int argc, char *argv[]) + + /* Create a histogram of frequency of symbols found in stream to + * generate the huffman tree.*/ +- isal_update_histogram(stream, file_length, &histogram); ++ if (0 == dict_file_length) ++ isal_update_histogram(stream, file_length, &histogram); ++ else ++ isal_update_histogram_dict(stream, dict_file_length, file_length, ++ &histogram); + + fclose(file); + free(stream); +- argc--; ++ argi++; + } + + isal_create_hufftables(&hufftables, &histogram); +-- +2.20.1.windows.1 + + +From 89f7c46cd53a71a31f99aaa3c9aa5776d9b7b8ea Mon Sep 17 00:00:00 2001 +From: Greg Tucker +Date: Tue, 20 Oct 2020 09:55:53 -0700 +Subject: [PATCH 30/42] Change igzip_file_perf to accept 0 time + +Change-Id: Ie2edf8e742d0bcdd9a008704f997006f8f5009ac +Signed-off-by: Greg Tucker +--- + igzip/igzip_file_perf.c | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +diff --git a/src/isa-l/igzip/igzip_file_perf.c b/src/isa-l/igzip/igzip_file_perf.c +index c04ed24..fafb174 100644 +--- a/src/isa-l/igzip/igzip_file_perf.c ++++ b/src/isa-l/igzip/igzip_file_perf.c +@@ -98,7 +98,7 @@ int usage(void) + " -h help\n" + " -X use compression level X with 0 <= X <= 1\n" + " -b input buffer size, 0 buffers all the input\n" +- " -i