From 74c8611e4ab4e5670dff5c850127b7bb33ae8bae Mon Sep 17 00:00:00 2001 From: wuyuechuan Date: Wed, 9 Feb 2022 10:04:31 +0800 Subject: [PATCH 1/2] probackup: support backup compress files --- build/script/aarch64_lite_list | 2 + build/script/aarch64_opengauss_list | 2 + build/script/opengauss_release_list_mini | 2 + .../opengauss_release_list_ubuntu_single | 2 + build/script/x86_64_lite_list | 2 + build/script/x86_64_opengauss_list | 3 +- contrib/pagehack/CMakeLists.txt | 10 +- contrib/pagehack/Makefile | 8 +- contrib/pagehack/compression_algorithm.ini | 1308 ----------------- contrib/pagehack/openGaussCompression.cpp | 177 --- contrib/pagehack/openGaussCompression.h | 40 - contrib/pagehack/pagehack.cpp | 125 +- src/Makefile.global.in | 5 +- src/bin/pg_ctl/CMakeLists.txt | 21 +- src/bin/pg_ctl/Makefile | 8 +- src/bin/pg_probackup/CMakeLists.txt | 10 +- src/bin/pg_probackup/Makefile | 6 +- src/bin/pg_probackup/catalog.cpp | 15 + src/bin/pg_probackup/data.cpp | 32 +- src/bin/pg_probackup/dir.cpp | 88 +- src/bin/pg_probackup/file.cpp | 156 +- src/bin/pg_probackup/file.h | 13 +- src/bin/pg_probackup/merge.cpp | 5 +- src/bin/pg_probackup/pg_probackupb.h | 3 + src/bin/pg_probackup/restore.cpp | 22 + src/bin/pg_rewind/CMakeLists.txt | 2 +- src/bin/pg_rewind/Makefile | 6 +- src/bin/pg_rewind/compressed_rewind.cpp | 129 -- src/bin/pg_rewind/compressed_rewind.h | 21 - src/bin/pg_rewind/fetch.cpp | 56 +- src/bin/pg_rewind/file_ops.cpp | 160 +- src/bin/pg_rewind/file_ops.h | 9 +- src/bin/pg_rewind/filemap.cpp | 4 +- .../backend/utils/adt/pg_lzcompress.cpp | 299 +--- .../storage/page/checksum_impl.cpp | 74 + src/gausskernel/storage/smgr/md.cpp | 2 +- .../storage/smgr/page_compression.cpp | 14 - src/include/knl/knl_session.h | 5 + src/include/storage/checksum_impl.h | 2 +- src/include/storage/page_compression.h | 8 +- src/include/storage/page_compression_impl.h | 52 +- src/include/utils/pg_lzcompress.h | 7 +- src/lib/CMakeLists.txt | 2 + src/lib/Makefile | 2 +- src/lib/page_compression/CMakeLists.txt | 15 + src/lib/page_compression/Makefile | 65 + src/lib/page_compression/PageCompression.cpp | 521 +++++++ src/lib/page_compression/PageCompression.h | 81 + .../page_compression}/compressed_common.h | 0 src/lib/page_compression/page_compression.sh | 35 + 50 files changed, 1280 insertions(+), 2356 deletions(-) delete mode 100644 contrib/pagehack/compression_algorithm.ini delete mode 100644 contrib/pagehack/openGaussCompression.cpp delete mode 100644 contrib/pagehack/openGaussCompression.h delete mode 100644 src/bin/pg_rewind/compressed_rewind.cpp delete mode 100644 src/bin/pg_rewind/compressed_rewind.h create mode 100644 src/lib/page_compression/CMakeLists.txt create mode 100644 src/lib/page_compression/Makefile create mode 100644 src/lib/page_compression/PageCompression.cpp create mode 100644 src/lib/page_compression/PageCompression.h rename src/{bin/pg_rewind => lib/page_compression}/compressed_common.h (100%) create mode 100644 src/lib/page_compression/page_compression.sh diff --git a/build/script/aarch64_lite_list b/build/script/aarch64_lite_list index 05a5a48e99c..46d59f9bfc2 100644 --- a/build/script/aarch64_lite_list +++ b/build/script/aarch64_lite_list @@ -710,6 +710,8 @@ ./lib/libcurl.so.4 ./lib/libcurl.so.4.7.0 ./lib/libxgboost.so +./lib/libpagecompression.so +./lib/libpagecompression.so.1 ./lib/postgresql/latin2_and_win1250.so ./lib/postgresql/euc2004_sjis2004.so ./lib/postgresql/euc_kr_and_mic.so diff --git a/build/script/aarch64_opengauss_list b/build/script/aarch64_opengauss_list index d17be26fa0d..89af4cb2fcd 100644 --- a/build/script/aarch64_opengauss_list +++ b/build/script/aarch64_opengauss_list @@ -867,6 +867,8 @@ ./lib/libzstd.so.1 ./lib/libzstd.so.1.5.0 ./lib/libxgboost.so +./lib/libpagecompression.so +./lib/libpagecompression.so.1 ./include/postgresql/server/postgres_ext.h ./include/postgresql/server/pg_config_os.h diff --git a/build/script/opengauss_release_list_mini b/build/script/opengauss_release_list_mini index 99f12d438c4..236367fb39e 100644 --- a/build/script/opengauss_release_list_mini +++ b/build/script/opengauss_release_list_mini @@ -865,6 +865,8 @@ ./lib/libnghttp2.so ./lib/libnghttp2.so.14 ./lib/libnghttp2.so.14.20.0 +./lib/libpagecompression.so +./lib/libpagecompression.so.1 ./lib/libpcre.so ./lib/libpcre.so.1 ./lib/libpcre.so.1.2.12 diff --git a/build/script/opengauss_release_list_ubuntu_single b/build/script/opengauss_release_list_ubuntu_single index 52fb5888818..52ba5da8892 100644 --- a/build/script/opengauss_release_list_ubuntu_single +++ b/build/script/opengauss_release_list_ubuntu_single @@ -864,6 +864,8 @@ ./lib/libnghttp2.so ./lib/libnghttp2.so.14 ./lib/libnghttp2.so.14.20.0 +./lib/libpagecompression.so +./lib/libpagecompression.so.1 ./lib/libpcre.so ./lib/libpcre.so.1 ./lib/libpcre.so.1.2.12 diff --git a/build/script/x86_64_lite_list b/build/script/x86_64_lite_list index b51ae6ba97a..8857cfd6857 100644 --- a/build/script/x86_64_lite_list +++ b/build/script/x86_64_lite_list @@ -706,6 +706,8 @@ ./lib/libcurl.so.4 ./lib/libcurl.so.4.7.0 ./lib/libxgboost.so +./lib/libpagecompression.so +./lib/libpagecompression.so.1 ./lib/postgresql/latin2_and_win1250.so ./lib/postgresql/euc2004_sjis2004.so ./lib/postgresql/euc_kr_and_mic.so diff --git a/build/script/x86_64_opengauss_list b/build/script/x86_64_opengauss_list index d64614e2943..4632f23429a 100644 --- a/build/script/x86_64_opengauss_list +++ b/build/script/x86_64_opengauss_list @@ -867,7 +867,8 @@ ./lib/libzstd.so.1 ./lib/libzstd.so.1.5.0 ./lib/libxgboost.so - +./lib/libpagecompression.so +./lib/libpagecompression.so.1 ./include/postgresql/server/postgres_ext.h ./include/postgresql/server/pg_config_os.h ./include/postgresql/server/pgtime.h diff --git a/contrib/pagehack/CMakeLists.txt b/contrib/pagehack/CMakeLists.txt index cc6a658e2d2..dfcfededf00 100644 --- a/contrib/pagehack/CMakeLists.txt +++ b/contrib/pagehack/CMakeLists.txt @@ -2,22 +2,22 @@ # pagehack AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} TGT_pagehack_SRC) set(TGT_pagehack_INC - ${TGT_pq_INC} ${ZSTD_INCLUDE_PATH} ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SRC_DIR}/lib/gstrace + ${TGT_pq_INC} ${ZSTD_INCLUDE_PATH} ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SRC_DIR}/lib/gstrace ${PROJECT_SRC_DIR}/lib/page_compression ) set(pagehack_DEF_OPTIONS ${MACRO_OPTIONS}) if(${ENABLE_DEBUG} STREQUAL "ON") - set(pagehack_DEF_OPTIONS ${pagehack_DEF_OPTIONS} -DDEBUG) + set(pagehack_DEF_OPTIONS ${pagehack_DEF_OPTIONS} -DDEBUG -DFRONTEND) endif() set(pagehack_COMPILE_OPTIONS ${OS_OPTIONS} ${PROTECT_OPTIONS} ${WARNING_OPTIONS} ${CHECK_OPTIONS} ${BIN_SECURE_OPTIONS} ${OPTIMIZE_OPTIONS}) set(pagehack_LINK_OPTIONS ${BIN_LINK_OPTIONS}) -set(pagehack_LINK_LIBS -lpgport -lcrypt -ldl -lm -ledit -lssl -lcrypto -lsecurec -lrt -lz -lminiunz -lzstd) +set(pagehack_LINK_LIBS -lpgport -lcrypt -ldl -lm -ledit -lssl -lcrypto -lsecurec -lrt -lz -lminiunz -lzstd -lpagecompression) add_bintarget(pagehack TGT_pagehack_SRC TGT_pagehack_INC "${pagehack_DEF_OPTIONS}" "${pagehack_COMPILE_OPTIONS}" "${pagehack_LINK_OPTIONS}" "${pagehack_LINK_LIBS}") -add_dependencies(pagehack pgport_static) +add_dependencies(pagehack pgport_static pagecompression) target_link_directories(pagehack PUBLIC ${LIBOPENSSL_LIB_PATH} ${PROTOBUF_LIB_PATH} ${LIBPARQUET_LIB_PATH} ${LIBCURL_LIB_PATH} ${SECURE_LIB_PATH} ${ZLIB_LIB_PATH} ${LIBOBS_LIB_PATH} ${LIBEDIT_LIB_PATH} ${LIBCGROUP_LIB_PATH} ${CMAKE_BINARY_DIR}/lib - ${ZSTD_LIB_PATH} + ${ZSTD_LIB_PATH} ${PROJECT_SRC_DIR}/lib/page_compression ) install(TARGETS pagehack RUNTIME DESTINATION bin) diff --git a/contrib/pagehack/Makefile b/contrib/pagehack/Makefile index fe8eca4077b..95c791076f3 100644 --- a/contrib/pagehack/Makefile +++ b/contrib/pagehack/Makefile @@ -1,9 +1,10 @@ # contrib/pagehack/Makefile MODULE_big = pagehack -OBJS = openGaussCompression.o pagehack.o +OBJS = pagehack.o # executable program, even there is no database server/client PROGRAM = pagehack +all: submake-pagecompression ifdef USE_PGXS PG_CONFIG = pg_config @@ -13,8 +14,9 @@ else subdir = contrib/pagehack top_builddir = ../.. include $(top_builddir)/src/Makefile.global -override CFLAGS += -lzstd - +override CPPFLAGS += -I${top_builddir}/src/lib/page_compression +override LDFLAGS += -L${top_builddir}/src/lib/page_compression +override CFLAGS += -lpagecompression -lzstd ifeq ($(enable_debug), yes) PG_CPPFLAGS += -DDEBUG endif diff --git a/contrib/pagehack/compression_algorithm.ini b/contrib/pagehack/compression_algorithm.ini deleted file mode 100644 index efe51da599e..00000000000 --- a/contrib/pagehack/compression_algorithm.ini +++ /dev/null @@ -1,1308 +0,0 @@ -size_t GetSizeOfHeadData(bool heapPageData) -{ - if (heapPageData) { - return SizeOfHeapPageHeaderData; - } else { - return SizeOfPageHeaderData; - } -} - -// maybe some itemid is not valid -uint16 HeapPageCalcRealRowCnt (char *buf) { - HeapPageHeaderData *page = (HeapPageHeaderData *)buf; - uint16 cnt = 0; - uint16 i; - uint16 row_cnt = (page->pd_lower - GetPageHeaderSize(page)) / sizeof(ItemIdData); - - for (i = 0; i < row_cnt; i++) { - if (ItemIdIsNormal(GET_ITEMID_BY_IDX(buf, i))) { - cnt++; - } - } - return cnt; -} - -void DecompressDeconvertRows(char *buf, char *aux_buf, int16 *real_order, uint16 max_row_len, uint16 real_row_cnt) { - errno_t ret; - HeapPageHeaderData *page = (HeapPageHeaderData *)buf; - uint16 row_cnt = real_row_cnt; - uint32 total_size = page->pd_special - page->pd_upper; - char *copy_begin = buf + page->pd_upper; - char *row; - uint16 i, j, k, cur, up, row_size; - - ret = memset_sp(aux_buf, BLCKSZ, 0, BLCKSZ); - securec_check(ret, "", ""); - - k = 0; - for (i = 0; i < max_row_len; i++) { - for (j = 0; j < row_cnt; j++) { - up = (j == (row_cnt - 1)) ? page->pd_special : GET_ITEMID_BY_IDX(buf, (real_order[j + 1]))->lp_off; - cur = GET_ITEMID_BY_IDX(buf, (real_order[j]))->lp_off; - row_size = up - cur; - row = aux_buf + cur; - if (i < row_size) { - row[i] = copy_begin[k++]; // this part is reshaped - } - } - } - - if (k != total_size) { - printf("ERROR!!! pg_deconvert_rows error...!!!\n"); - ASSERT(0); - return; - } - - // cp aux_buf to page_buf - ret = memcpy_sp(copy_begin, total_size, aux_buf + page->pd_upper, total_size); - securec_check(ret, "", ""); - return ; -} - -// 1: as tuple_offset order, that means asc order. -// 2: store all itemid's idx. -// 3:maybe some itemid is not in order. -void CompressConvertItemRealOrder(char *buf, int16 *real_order, uint16 real_row_cnt) { - HeapPageHeaderData *page = (HeapPageHeaderData *)buf; - uint16 row_cnt = (page->pd_lower - GetPageHeaderSize(page)) / sizeof(ItemIdData); - ItemIdData *begin = (ItemIdData *)(buf + GetPageHeaderSize(page)); - int16 *link_order = real_order + real_row_cnt; - - int16 i, head, curr, prev; - int16 end = -1; // invalid index - - head = end; - // very likely to seems that itemids stored by desc order, and ignore invalid itemid - for (i = 0; i < row_cnt; i++) { - if (!ItemIdIsNormal(begin + i)) { - continue; - } - - if (head == end) { // set the head idx, insert the first - link_order[i] = end; - head = i; - continue; - } - - if ((begin + i)->lp_off < (begin + head)->lp_off) { - link_order[i] = head; // update the head idx - head = i; - continue; - } - - prev = head; - curr = link_order[head]; - while ((curr != end) && ((begin + i)->lp_off > (begin + curr)->lp_off)) { - prev = curr; - curr = link_order[curr]; - } - - link_order[prev] = i; - link_order[i] = curr; - } - - // arrange the link to array - curr = head; - for (i = 0; i < real_row_cnt; i++) { - real_order[i] = curr; - curr = link_order[curr]; - } - - if (curr != end) { - printf("ERROR!!! pre_convert_real_order error...!!!\n"); - ASSERT(0); - return; - } - -} - -int DecompressPage(const char* src, char* dst, uint8 algorithm) -{ - if (PageIs8BXidHeapVersion(src)) { - return TemplateDecompressPage(src, dst, algorithm); - } else { - return TemplateDecompressPage(src, dst, algorithm); - } -} - -void cprs_diff_deconvert_rows(char *buf, uint32 offset, uint16 min_row_len, uint16 real_row_cnt) { - uint16 row_cnt = real_row_cnt; - uint32 common_size = min_row_len; - uint8 *copy_begin = (uint8 *)(buf + offset); - uint16 i, j; - - for (i = 0; i < common_size; i++) { - for (j = 1; j < row_cnt; j++) { - copy_begin[i * row_cnt + j] += copy_begin[i * row_cnt + (j - 1)]; - } - } - return ; -} - -// to find all row size are diffs in MIN_DIFF_SIZE byts. -bool CompressConvertCheck(char *buf, int16 **real_order, uint16 *max_row_len, uint16 *min_row_len, uint16 *real_row_cnt) { - HeapPageHeaderData *page = (HeapPageHeaderData *)buf; - uint16 row_cnt = (page->pd_lower - GetPageHeaderSize(page)) / sizeof(ItemIdData); - int16 i, row_size; - ItemIdData *ptr = NULL; - uint16 up = page->pd_special; - uint16 min_size = GS_INVALID_ID16; - uint16 max_size = 0; - errno_t ret; - if (page->pd_lower < GetPageHeaderSize(page) || (page->pd_lower > page->pd_upper)) { - return false; - } - - uint16 normal_row_cnt = HeapPageCalcRealRowCnt(buf); - if (normal_row_cnt < MIN_CONVERT_CNT) { // no need convert - return false; - } - - // to store the real tuple order. - /* - --------------------------|-------------------------- - xxxxxxxxxxxxxxxxxxxxxxxxxx|xxxxxxxxxxxxxxxxxxxxxxxxxx - --------------------------|-------------------------- - */ - // the first part is real array order, and the second part is link. - *real_order = (int16 *)malloc(sizeof(uint16) * row_cnt * 2); - if (*real_order == NULL) { - printf("zfunc compress file"); - return false; - } - ret = memset_sp(*real_order, sizeof(uint16) * row_cnt * 2, 0, sizeof(uint16) * row_cnt * 2); - securec_check(ret, "", ""); - - // order the ItemIds by tuple_offset order. - CompressConvertItemRealOrder(buf, *real_order, normal_row_cnt); - - // do the check, to check all size of tuples. - for (i = normal_row_cnt - 1; i >= 0; i--) { - ptr = GET_ITEMID_BY_IDX(buf, ((*real_order)[i])); - - row_size = up - ptr->lp_off; - if (row_size < MIN_CONVERT_CNT * 2) { - return false; - } - - min_size = (row_size < min_size) ? row_size : min_size; - max_size = (row_size > max_size) ? row_size : max_size; - - if ((max_size - min_size) > MIN_DIFF_SIZE) { // no need convert - return false; - } - up = ptr->lp_off; - } - - // get the min row common size. - *max_row_len = max_size; - *min_row_len = min_size; - *real_row_cnt = normal_row_cnt; - return true; -} - -void DecompressDeconvertItemIds(char *buf, char *aux_buf) { - errno_t ret; - HeapPageHeaderData *page = (HeapPageHeaderData *)buf; - uint16 row_cnt = (page->pd_lower - GetPageHeaderSize(page)) / sizeof(ItemIdData); - uint32 total_size = row_cnt * sizeof(ItemIdData); - char *copy_begin = buf + GetPageHeaderSize(page); - uint16 i, j, k; - - // clear aux_buf - ret = memset_sp(aux_buf, BLCKSZ, 0, BLCKSZ); - securec_check(ret, "", ""); - - k = 0; - for (i = 0; i < sizeof(ItemIdData); i++) { - for (j = 0; j < row_cnt; j++) { - aux_buf[j * sizeof(ItemIdData) + i] = copy_begin[k++]; - } - } - - // cp aux_buf to page_buf - ret = memcpy_sp(copy_begin, total_size, aux_buf, total_size); - securec_check(ret, "", ""); - return ; -} - - -void DecompressDeconvertOnePage(char *buf, char *aux_buf, bool diff_convert) { - uint16 max_row_len = 0; - uint16 min_row_len = 0; - int16 *real_order = NULL; // itemids are not in order sometimes. we must find the real - uint16 real_row_cnt = 0; - - if (diff_convert) { - cprs_diff_deconvert_rows(buf, GetPageHeaderSize(buf), sizeof(ItemIdData), - (((HeapPageHeaderData *)buf)->pd_lower - GetPageHeaderSize(buf)) / sizeof(ItemIdData)); - } - - // =======firstly, arrange the itemids. - DecompressDeconvertItemIds(buf, aux_buf); - - if (!CompressConvertCheck(buf, &real_order, &max_row_len, &min_row_len, &real_row_cnt)) { - if (real_order != NULL) { - free(real_order); - } - ASSERT(0); - return ; - } - - // =======and last, the tuples - if (diff_convert) { - cprs_diff_deconvert_rows(buf, ((HeapPageHeaderData *)buf)->pd_upper, min_row_len, real_row_cnt); - } - DecompressDeconvertRows(buf, aux_buf, real_order, max_row_len, real_row_cnt); - - if (real_order != NULL) { - free(real_order); - } - return ; -} - - -void DecompressPageDeconvert(char *src, bool diff_convert) -{ - char *aux_buf = NULL; - errno_t rc; - - aux_buf = (char *)malloc(BLCKSZ); - if (aux_buf == NULL) { - // add log - return; - } - rc = memset_s(aux_buf, BLCKSZ, 0, BLCKSZ); - securec_check(rc, "", ""); - - // do convert - DecompressDeconvertOnePage(src, aux_buf, diff_convert); - - if (aux_buf != NULL) { - free(aux_buf); - } -} - - -/** - * DecompressPage() -- Decompress one compressed page. - * return size of decompressed page which should be BLCKSZ or - * -1 for decompress error - * -2 for unrecognized compression algorithm - * - * note:The size of dst must be greater than or equal to BLCKSZ. - */ -template -int TemplateDecompressPage(const char* src, char* dst, uint8 algorithm) -{ - int decompressed_size; - char* data; - uint32 size; - bool byte_convert, diff_convert; - size_t sizeOfPageHeaderData = GetSizeOfHeadData(heapPageData); - int rc = memcpy_s(dst, sizeOfPageHeaderData, src, sizeOfPageHeaderData); - securec_check(rc, "", ""); - - if (heapPageData) { - data = ((HeapPageCompressData*) src)->data; - size = ((HeapPageCompressData*) src)->size; - byte_convert = ((HeapPageCompressData*) src)->byte_convert; - diff_convert = ((HeapPageCompressData*) src)->diff_convert; - } else { - data = ((PageCompressData*) src)->data; - size = ((PageCompressData*) src)->size; - byte_convert = ((PageCompressData*) src)->byte_convert; - diff_convert = ((PageCompressData*) src)->diff_convert; - } - - switch (algorithm) { - case COMPRESS_ALGORITHM_PGLZ: - decompressed_size = lz_decompress( - data, size, dst + sizeOfPageHeaderData, BLCKSZ - sizeOfPageHeaderData, false); - break; - case COMPRESS_ALGORITHM_ZSTD: - decompressed_size = - ZSTD_decompress(dst + sizeOfPageHeaderData, BLCKSZ - sizeOfPageHeaderData, data, size); - - if (ZSTD_isError(decompressed_size)) { - return -1; - } - - break; - - default: - return COMPRESS_UNSUPPORTED_ERROR; - break; - } - - if (byte_convert) { - // deconvert dst - DecompressPageDeconvert(dst, diff_convert); - } - - return sizeOfPageHeaderData + decompressed_size; -} - -// pg_lz -/* ---------- - * pg_lzcompress.c - - * - * This is an implementation of LZ compression for PostgreSQL. - * It uses a simple history table and generates 2-3 byte tags - * capable of backward copy information for 3-273 bytes with - * a max offset of 4095. - * - * Entry routines: - * - * bool - * pglz_compress(const char *source, int32 slen, PGLZ_Header *dest, - * const PGLZ_Strategy *strategy); - * - * source is the input data to be compressed. - * - * slen is the length of the input data. - * - * dest is the output area for the compressed result. - * It must be at least as big as PGLZ_MAX_OUTPUT(slen). - * - * strategy is a pointer to some information controlling - * the compression algorithm. If NULL, the compiled - * in default strategy is used. - * - * The return value is TRUE if compression succeeded, - * FALSE if not; in the latter case the contents of dest - * are undefined. - * - * void - * pglz_decompress(const PGLZ_Header *source, char *dest) - * - * source is the compressed input. - * - * dest is the area where the uncompressed data will be - * written to. It is the callers responsibility to - * provide enough space. The required amount can be - * obtained with the macro PGLZ_RAW_SIZE(source). - * - * The data is written to buff exactly as it was handed - * to pglz_compress(). No terminating zero byte is added. - * - * The decompression algorithm and internal data format: - * - * PGLZ_Header is defined as - * - * typedef struct PGLZ_Header { - * int32 vl_len_; - * int32 rawsize; - * } - * - * The header is followed by the compressed data itself. - * - * The data representation is easiest explained by describing - * the process of decompression. - * - * If VARSIZE(x) == rawsize + sizeof(PGLZ_Header), then the data - * is stored uncompressed as plain bytes. Thus, the decompressor - * simply copies rawsize bytes from the location after the - * header to the destination. - * - * Otherwise the first byte after the header tells what to do - * the next 8 times. We call this the control byte. - * - * An unset bit in the control byte means, that one uncompressed - * byte follows, which is copied from input to output. - * - * A set bit in the control byte means, that a tag of 2-3 bytes - * follows. A tag contains information to copy some bytes, that - * are already in the output buffer, to the current location in - * the output. Let's call the three tag bytes T1, T2 and T3. The - * position of the data to copy is coded as an offset from the - * actual output position. - * - * The offset is in the upper nibble of T1 and in T2. - * The length is in the lower nibble of T1. - * - * So the 16 bits of a 2 byte tag are coded as - * - * 7---T1--0 7---T2--0 - * OOOO LLLL OOOO OOOO - * - * This limits the offset to 1-4095 (12 bits) and the length - * to 3-18 (4 bits) because 3 is always added to it. To emit - * a tag of 2 bytes with a length of 2 only saves one control - * bit. But we lose one byte in the possible length of a tag. - * - * In the actual implementation, the 2 byte tag's length is - * limited to 3-17, because the value 0xF in the length nibble - * has special meaning. It means, that the next following - * byte (T3) has to be added to the length value of 18. That - * makes total limits of 1-4095 for offset and 3-273 for length. - * - * Now that we have successfully decoded a tag. We simply copy - * the output that occurred bytes back to the current - * output location in the specified . Thus, a - * sequence of 200 spaces (think about bpchar fields) could be - * coded in 4 bytes. One literal space and a three byte tag to - * copy 199 bytes with a -1 offset. Whow - that's a compression - * rate of 98%! Well, the implementation needs to save the - * original data size too, so we need another 4 bytes for it - * and end up with a total compression rate of 96%, what's still - * worth a Whow. - * - * The compression algorithm - * - * The following uses numbers used in the default strategy. - * - * The compressor works best for attributes of a size between - * 1K and 1M. For smaller items there's not that much chance of - * redundancy in the character sequence (except for large areas - * of identical bytes like trailing spaces) and for bigger ones - * our 4K maximum look-back distance is too small. - * - * The compressor creates a table for 8192 lists of positions. - * For each input position (except the last 3), a hash key is - * built from the 4 next input bytes and the position remembered - * in the appropriate list. Thus, the table points to linked - * lists of likely to be at least in the first 4 characters - * matching strings. This is done on the fly while the input - * is compressed into the output area. Table entries are only - * kept for the last 4096 input positions, since we cannot use - * back-pointers larger than that anyway. - * - * For each byte in the input, it's hash key (built from this - * byte and the next 3) is used to find the appropriate list - * in the table. The lists remember the positions of all bytes - * that had the same hash key in the past in increasing backward - * offset order. Now for all entries in the used lists, the - * match length is computed by comparing the characters from the - * entries position with the characters from the actual input - * position. - * - * The compressor starts with a so called "good_match" of 128. - * It is a "prefer speed against compression ratio" optimizer. - * So if the first entry looked at already has 128 or more - * matching characters, the lookup stops and that position is - * used for the next tag in the output. - * - * For each subsequent entry in the history list, the "good_match" - * is lowered by 10%. So the compressor will be more happy with - * short matches the farer it has to go back in the history. - * Another "speed against ratio" preference characteristic of - * the algorithm. - * - * Thus there are 3 stop conditions for the lookup of matches: - * - * - a match >= good_match is found - * - there are no more history entries to look at - * - the next history entry is already too far back - * to be coded into a tag. - * - * Finally the match algorithm checks that at least a match - * of 3 or more bytes has been found, because thats the smallest - * amount of copy information to code into a tag. If so, a tag - * is omitted and all the input bytes covered by that are just - * scanned for the history add's, otherwise a literal character - * is omitted and only his history entry added. - * - * Acknowledgements: - * - * Many thanks to Adisak Pochanayon, who's article about SLZ - * inspired me to write the PostgreSQL compression this way. - * - * Jan Wieck - * - * Copyright (c) 1999-2012, PostgreSQL Global Development Group - * - * src/backend/utils/adt/pg_lzcompress.c - * ---------- - */ -#include "postgres.h" -#include "knl/knl_variable.h" - -#include - -#include "utils/pg_lzcompress.h" - -/* ---------- - * The provided standard strategies - * ---------- - */ -static const PGLZ_Strategy strategy_default_data = { - 32, /* Data chunks less than 32 bytes are not - * compressed */ - INT_MAX, /* No upper limit on what we'll try to - * compress */ - 25, /* Require 25% compression rate, or not worth - * it */ - 1024, /* Give up if no compression in the first 1KB */ - 128, /* Stop history lookup if a match of 128 bytes - * is found */ - 10 /* Lower good match size by 10% at every loop - * iteration */ -}; -const PGLZ_Strategy* const PGLZ_strategy_default = &strategy_default_data; - -static const PGLZ_Strategy strategy_always_data = { - 0, /* Chunks of any size are compressed */ - INT_MAX, - 0, /* It's enough to save one single byte */ - INT_MAX, /* Never give up early */ - 128, /* Stop history lookup if a match of 128 bytes - * is found */ - 6 /* Look harder for a good match */ -}; -const PGLZ_Strategy* const PGLZ_strategy_always = &strategy_always_data; - -/* ---------- - * pglz_hist_idx - - * - * Computes the history table slot for the lookup by the next 4 - * characters in the input. - * - * NB: because we use the next 4 characters, we are not guaranteed to - * find 3-character matches; they very possibly will be in the wrong - * hash list. This seems an acceptable tradeoff for spreading out the - * hash keys more. - * ---------- - */ -#define pglz_hist_idx(_s, _e) \ - (((((_e) - (_s)) < 4) ? (int)(_s)[0] \ - : (((unsigned char)((_s)[0]) << 9) ^ ((unsigned char)((_s)[1]) << 6) ^ \ - ((unsigned char)((_s)[2]) << 3) ^ (unsigned char)((_s)[3]))) & \ - (PGLZ_HISTORY_MASK)) - -/* ---------- - * pglz_hist_add - - * - * Adds a new entry to the history table. - * - * If _recycle is true, then we are recycling a previously used entry, - * and must first delink it from its old hashcode's linked list. - * - * NOTE: beware of multiple evaluations of macro's arguments, and note that - * _hn and _recycle are modified in the macro. - * ---------- - */ -#define pglz_hist_add(_hs, _he, _hn, _recycle, _s, _e) \ - do { \ - int __hindex = pglz_hist_idx((_s), (_e)); \ - PGLZ_HistEntry** __myhsp = &(_hs)[__hindex]; \ - PGLZ_HistEntry* __myhe = &(_he)[_hn]; \ - if (_recycle) { \ - if (__myhe->prev == NULL) \ - (_hs)[__myhe->hindex] = __myhe->next; \ - else \ - __myhe->prev->next = __myhe->next; \ - if (__myhe->next != NULL) \ - __myhe->next->prev = __myhe->prev; \ - } \ - __myhe->next = *__myhsp; \ - __myhe->prev = NULL; \ - __myhe->hindex = __hindex; \ - __myhe->pos = (_s); \ - if (*__myhsp != NULL) \ - (*__myhsp)->prev = __myhe; \ - *__myhsp = __myhe; \ - if (++(_hn) >= PGLZ_HISTORY_SIZE) { \ - (_hn) = 0; \ - (_recycle) = true; \ - } \ - } while (0) - -/* ---------- - * pglz_out_ctrl - - * - * Outputs the last and allocates a new control byte if needed. - * ---------- - */ -#define pglz_out_ctrl(__ctrlp, __ctrlb, __ctrl, __buf) \ - do { \ - if ((((unsigned char)(__ctrl)) & 0xff) == 0) { \ - *(__ctrlp) = __ctrlb; \ - __ctrlp = (__buf)++; \ - __ctrlb = 0; \ - __ctrl = 1; \ - } \ - } while (0) - -/* ---------- - * pglz_out_literal - - * - * Outputs a literal byte to the destination buffer including the - * appropriate control bit. - * ---------- - */ -#define pglz_out_literal(_ctrlp, _ctrlb, _ctrl, _buf, _byte) \ - do { \ - pglz_out_ctrl(_ctrlp, _ctrlb, _ctrl, _buf); \ - *(_buf)++ = (unsigned char)(_byte); \ - (_ctrl) <<= 1; \ - } while (0) - -/* ---------- - * pglz_out_tag - - * - * Outputs a backward reference tag of 2-4 bytes (depending on - * offset and length) to the destination buffer including the - * appropriate control bit. - * ---------- - */ -#define pglz_out_tag(_ctrlp, _ctrlb, _ctrl, _buf, _len, _off) \ - do { \ - pglz_out_ctrl(_ctrlp, _ctrlb, _ctrl, _buf); \ - (_ctrlb) |= (_ctrl); \ - (_ctrl) <<= 1; \ - if ((_len) > 17) { \ - (_buf)[0] = (unsigned char)((((uint32)(_off)&0xf00) >> 4) | 0x0f); \ - (_buf)[1] = (unsigned char)(((uint32)(_off)&0xff)); \ - (_buf)[2] = (unsigned char)((_len)-18); \ - (_buf) += 3; \ - } else { \ - (_buf)[0] = (unsigned char)((((uint32)(_off)&0xf00) >> 4) | ((uint32)(_len)-3)); \ - (_buf)[1] = (unsigned char)((uint32)(_off)&0xff); \ - (_buf) += 2; \ - } \ - } while (0) - -#define HIST_START_LEN (sizeof(PGLZ_HistEntry*) * PGLZ_HISTORY_LISTS) -#define HIST_ENTRIES_LEN (sizeof(PGLZ_HistEntry) * PGLZ_HISTORY_SIZE) - -#define PGLZ_MAX_HISTORY_LISTS 8192 /* must be power of 2 */ -static PGLZ_HistEntry* hist_start[PGLZ_MAX_HISTORY_LISTS]; -static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE + 1]; - -/* ---------- - * pglz_find_match - - * - * Lookup the history table if the actual input stream matches - * another sequence of characters, starting somewhere earlier - * in the input buffer. - * ---------- - */ -static inline int pglz_find_match( - PGLZ_HistEntry** hstart, const char* input, const char* end, int* lenp, int* offp, int good_match, int good_drop) -{ - PGLZ_HistEntry* hent = NULL; - int32 len = 0; - int32 off = 0; - - /* - * Traverse the linked history list until a good enough match is found. - */ - hent = hstart[pglz_hist_idx(input, end)]; - while (hent != NULL) { - const char* ip = input; - const char* hp = hent->pos; - int32 thisoff; - int32 thislen; - - /* - * Stop if the offset does not fit into our tag anymore. - */ - thisoff = ip - hp; - if (thisoff >= 0x0fff) - break; - - /* - * Determine length of match. A better match must be larger than the - * best so far. And if we already have a match of 16 or more bytes, - * it's worth the call overhead to use memcmp() to check if this match - * is equal for the same size. After that we must fallback to - * character by character comparison to know the exact position where - * the diff occurred. - */ - thislen = 0; - if (len >= 16) { - if (memcmp(ip, hp, len) == 0) { - thislen = len; - ip += len; - hp += len; - while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH) { - thislen++; - ip++; - hp++; - } - } - } else { - while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH) { - thislen++; - ip++; - hp++; - } - } - - /* - * Remember this match as the best (if it is) - */ - if (thislen > len) { - len = thislen; - off = thisoff; - } - - /* - * Advance to the next history entry - */ - hent = hent->next; - - /* - * Be happy with lesser good matches the more entries we visited. But - * no point in doing calculation if we're at end of list. - */ - if (hent != NULL) { - if (len >= good_match) - break; - good_match -= (good_match * good_drop) / 100; - } - } - - /* - * Return match information only if it results at least in one byte - * reduction. - */ - if (len > 2) { - *lenp = len; - *offp = off; - return 1; - } - - return 0; -} - -/* ---------- - * lz_compress - - * - * Compresses source into dest using strategy. Returns the number of - * bytes written in buffer dest, or -1 if compression fails. - * ---------- - */ -int32 lz_compress(const char* source, int32 slen, char* dest) -{ - unsigned char* bp = (unsigned char*) dest; - unsigned char* bstart = bp; - int hist_next = 0; - bool hist_recycle = false; - const char* dp = source; - const char* dend = source + slen; - unsigned char ctrl_dummy = 0; - unsigned char* ctrlp = &ctrl_dummy; - unsigned char ctrlb = 0; - unsigned char ctrl = 0; - bool found_match = false; - int32 match_len; - int32 match_off; - int32 good_match; - int32 good_drop; - int32 result_size; - int32 result_max; - int32 need_rate; - errno_t rc; - - const PGLZ_Strategy* strategy = PGLZ_strategy_always; - /* - * Our fallback strategy is the default. - */ - if (strategy == NULL) { - strategy = PGLZ_strategy_default; - } - - /* - * If the strategy forbids compression (at all or if source chunk size out - * of range), fail. - */ - if (strategy->match_size_good <= 0 || slen < strategy->min_input_size || slen > strategy->max_input_size) { - return -1; - } - - /* - * Limit the match parameters to the supported range. - */ - good_match = strategy->match_size_good; - if (good_match > PGLZ_MAX_MATCH) { - good_match = PGLZ_MAX_MATCH; - } else if (good_match < 17) { - good_match = 17; - } - - good_drop = strategy->match_size_drop; - if (good_drop < 0) { - good_drop = 0; - } else if (good_drop > 100) { - good_drop = 100; - } - - need_rate = strategy->min_comp_rate; - if (need_rate < 0) { - need_rate = 0; - } else if (need_rate > 99) { - need_rate = 99; - } - - /* - * Compute the maximum result size allowed by the strategy, namely the - * input size minus the minimum wanted compression rate. This had better - * be <= slen, else we might overrun the provided output buffer. - */ - if (slen > (INT_MAX / 100)) { - /* Approximate to avoid overflow */ - result_max = (slen / 100) * (100 - need_rate); - } else { - result_max = (slen * (100 - need_rate)) / 100; - } - - /* - * Initialize the history lists to empty. We do not need to zero the - * hist_entries[] array; its entries are initialized as they are used. - */ - rc = memset_s(hist_start, HIST_START_LEN, 0, HIST_START_LEN); - securec_check(rc, "\0", "\0"); - - /* - * Compress the source directly into the output buffer. - */ - while (dp < dend) { - /* - * If we already exceeded the maximum result size, fail. - * - * We check once per loop; since the loop body could emit as many as 4 - * bytes (a control byte and 3-byte tag), PGLZ_MAX_OUTPUT() had better - * allow 4 slop bytes. - */ - if (bp - bstart >= result_max) { - return -1; - } - - /* - * If we've emitted more than first_success_by bytes without finding - * anything compressible at all, fail. This lets us fall out - * reasonably quickly when looking at incompressible input (such as - * pre-compressed data). - */ - if (!found_match && bp - bstart >= strategy->first_success_by) { - return -1; - } - - /* - * Try to find a match in the history - */ - if (pglz_find_match(hist_start, dp, dend, &match_len, &match_off, good_match, good_drop)) { - /* - * Create the tag and add history entries for all matched - * characters. - */ - pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off); - while (match_len--) { - pglz_hist_add( - hist_start, hist_entries, hist_next, hist_recycle, dp, - dend); - dp++; /* Do not do this ++ in the line above! */ - /* The macro would do it four times - Jan. */ - } - found_match = true; - } else { - /* - * No match found. Copy one literal byte. - */ - pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp); - pglz_hist_add( - hist_start, hist_entries, hist_next, hist_recycle, dp, dend); - dp++; /* Do not do this ++ in the line above! */ - /* The macro would do it four times - Jan. */ - } - } - - /* - * Write out the last control byte and check that we haven't overrun the - * output size allowed by the strategy. - */ - *ctrlp = ctrlb; - result_size = bp - bstart; - if (result_size >= result_max) { - return -1; - } - - /* success */ - return result_size; -} - -/* ---------- - * pglz_decompress - - * - * Decompresses source into dest. Returns the number of bytes - * decompressed in the destination buffer, and *optionally* - * checks that both the source and dest buffers have been - * fully read and written to, respectively. - * ---------- - */ -int32 lz_decompress(const char* source, int32 slen, char* dest, int32 rawsize, bool check_complete) -{ - const unsigned char* sp; - const unsigned char* srcend; - unsigned char* dp; - unsigned char* destend; - errno_t rc = 0; - - sp = (const unsigned char*) source; - srcend = ((const unsigned char*) source) + slen; - dp = (unsigned char*) dest; - destend = dp + rawsize; - - while (sp < srcend && dp < destend) { - /* - * Read one control byte and process the next 8 items (or as many as - * remain in the compressed input). - */ - unsigned char ctrl = *sp++; - int ctrlc; - - for (ctrlc = 0; ctrlc < 8 && sp < srcend && dp < destend; ctrlc++) { - - if (ctrl & 1) { - /* - * Set control bit means we must read a match tag. The match - * is coded with two bytes. First byte uses lower nibble to - * code length - 3. Higher nibble contains upper 4 bits of the - * offset. The next following byte contains the lower 8 bits - * of the offset. If the length is coded as 18, another - * extension tag byte tells how much longer the match really - * was (0-255). - */ - int32 len; - int32 off; - - len = (sp[0] & 0x0f) + 3; - off = ((sp[0] & 0xf0) << 4) | sp[1]; - sp += 2; - if (len == 18) { - len += *sp++; - } - - /* - * Now we copy the bytes specified by the tag from OUTPUT to - * OUTPUT (copy len bytes from dp - off to dp). The copied - * areas could overlap, to preven possible uncertainty, we - * copy only non-overlapping regions. - */ - len = Min(len, destend - dp); - while (off < len) { - /*--------- - * When offset is smaller than length - source and - * destination regions overlap. memmove() is resolving - * this overlap in an incompatible way with pglz. Thus we - * resort to memcpy()-ing non-overlapping regions. - * - * Consider input: 112341234123412341234 - * At byte 5 here ^ we have match with length 16 and - * offset 4. 11234M(len=16, off=4) - * We are decoding first period of match and rewrite match - * 112341234M(len=12, off=8) - * - * The same match is now at position 9, it points to the - * same start byte of output, but from another position: - * the offset is doubled. - * - * We iterate through this offset growth until we can - * proceed to usual memcpy(). If we would try to decode - * the match at byte 5 (len=16, off=4) by memmove() we - * would issue memmove(5, 1, 16) which would produce - * 112341234XXXXXXXXXXXX, where series of X is 12 - * undefined bytes, that were at bytes [5:17]. - * --------- - */ - errno_t rc = memcpy_s(dp, off + 1, dp - off, off); - securec_check(rc, "", ""); - len -= off; - dp += off; - off += off; - } - rc = memcpy_s(dp, len + 1, dp - off, len); - securec_check(rc, "", ""); - dp += len; - } else { - /* - * An unset control bit means LITERAL BYTE. So we just copy - * one from INPUT to OUTPUT. - */ - *dp++ = *sp++; - } - - /* - * Advance the control bit - */ - ctrl >>= 1; - } - } - - /* - * Check we decompressed the right amount. If we are slicing, then we - * won't necessarily be at the end of the source or dest buffers when we - * hit a stop, so we don't test them. - */ - if (check_complete && (dp != destend || sp != srcend)) { - return -1; - } - - /* - * That's it. - */ - return (char*) dp - dest; -} - - -int CompressPage(const char* src, char* dst, int dst_size, RelFileCompressOption option) -{ - if (PageIs8BXidHeapVersion(src)) { - return TemplateCompressPage(src, dst, dst_size, option); - } else { - return TemplateCompressPage(src, dst, dst_size, option); - } -} - -void CompressConvertRows(char *buf, char *aux_buf, int16 *real_order, uint16 max_row_len, uint16 real_row_cnt) { - errno_t ret; - HeapPageHeaderData *page = (HeapPageHeaderData *)buf; - uint16 row_cnt = real_row_cnt; - uint32 total_size = page->pd_special - page->pd_upper; - char *copy_begin = buf + page->pd_upper; - char *row; - uint16 i, j, k, cur, up, row_size; - - ret = memset_sp(aux_buf, BLCKSZ, 0, BLCKSZ); - securec_check(ret, "", ""); - - k = 0; - for (i = 0; i < max_row_len; i++) { - for (j = 0; j < row_cnt; j++) { - up = (j == (row_cnt - 1)) ? page->pd_special : GET_ITEMID_BY_IDX(buf, (real_order[j + 1]))->lp_off; - cur = GET_ITEMID_BY_IDX(buf, (real_order[j]))->lp_off; - row_size = up - cur; - row = buf + cur; - if (i < row_size) { - aux_buf[k++] = row[i]; // this part is reshaped - } - } - } - - if (k != total_size) { - printf("ERROR!!! convert_rows_2 error...!!!\n"); - ASSERT(0); - return; - } - - // cp aux_buf to page_buf - ret = memcpy_sp(copy_begin, total_size, aux_buf, total_size); - securec_check(ret, "", ""); - return ; -} - -void CompressConvertItemIds(char *buf, char *aux_buf) { - errno_t ret; - HeapPageHeaderData *page = (HeapPageHeaderData *)buf; - uint16 row_cnt = (page->pd_lower - GetPageHeaderSize(page)) / sizeof(ItemIdData); - uint32 total_size = row_cnt * sizeof(ItemIdData); - char *copy_begin = buf + GetPageHeaderSize(page); - uint16 i, j, k; - - // clear aux_buf - ret = memset_sp(aux_buf, BLCKSZ, 0, BLCKSZ); - securec_check(ret, "", ""); - - k = 0; - for (i = 0; i < row_cnt; i++) { - for (j = 0; j < sizeof(ItemIdData); j++) { - aux_buf[j * row_cnt + i] = copy_begin[k++]; - } - } - - // cp aux_buf to page_buf - ret = memcpy_sp(copy_begin, total_size, aux_buf, total_size); - securec_check(ret, "", ""); - return ; -} - -void cprs_diff_convert_rows(char *buf, uint32 offset,uint16 min_row_len, uint16 real_row_cnt) { - uint16 row_cnt = real_row_cnt; - uint32 common_size = min_row_len; - uint8 *copy_begin = (uint8 *)(buf + offset); - uint16 i, j; - - for (i = 0; i < common_size; i++) { - for (j = row_cnt - 1; j > 0; j--) { - copy_begin[i * row_cnt + j] -= copy_begin[i * row_cnt + (j - 1)]; - } - } - return ; -} - -bool CompressConvertOnePage(char *buf, char *aux_buf, bool diff_convert) { - uint16 max_row_len = 0; - uint16 min_row_len = 0; - int16 *real_order = NULL; // itemids are not in order sometimes. we must find the real - uint16 real_row_cnt = 0; - if (!CompressConvertCheck(buf, &real_order, &max_row_len, &min_row_len, &real_row_cnt)) { - if (real_order != NULL) { - free(real_order); - } - return false; - } - - CompressConvertRows(buf, aux_buf, real_order, max_row_len, real_row_cnt); - CompressConvertItemIds(buf, aux_buf); - - if (diff_convert) { - cprs_diff_convert_rows(buf, ((HeapPageHeaderData *)buf)->pd_upper, min_row_len, real_row_cnt); - cprs_diff_convert_rows(buf, GetPageHeaderSize(buf), sizeof(ItemIdData), - (((HeapPageHeaderData *)buf)->pd_lower - GetPageHeaderSize(buf)) / sizeof(ItemIdData)); - } - - if (real_order != NULL) { - free(real_order); - } - return true; -} - -void CompressPagePrepareConvert(char *src, bool diff_convert, bool *real_ByteConvert) -{ - char *aux_buf = NULL; - errno_t rc; - - aux_buf = (char *)malloc(BLCKSZ); - if (aux_buf == NULL) { - // add log - return; - } - rc = memset_sp(aux_buf, BLCKSZ, 0, BLCKSZ); - securec_check(rc, "", ""); - - // do convert - *real_ByteConvert = false; - if (CompressConvertOnePage(src, aux_buf, diff_convert)) { - *real_ByteConvert = true; - } - - if (aux_buf != NULL) { - free(aux_buf); - } -} - - -/** - * CompressPage() -- Compress one page. - * - * Only the parts other than the page header will be compressed. The - * compressed data is rounded by chunck_size, The insufficient part is - * filled with zero. Compression needs to be able to save at least one - * chunk of space, otherwise it fail. - * This function returen the size of compressed data or - * -1 for compression fail - * COMPRESS_UNSUPPORTED_ERROR for unrecognized compression algorithm - */ -template -int TemplateCompressPage(const char* src, char* dst, int dst_size, RelFileCompressOption option) -{ - int compressed_size; - int8 level = option.compressLevelSymbol ? option.compressLevel : -option.compressLevel; - size_t sizeOfHeaderData = GetSizeOfHeadData(heapPageData); - char *src_copy = NULL; - bool real_ByteConvert = false; - errno_t rc; - char* data; - - if (option.byteConvert) { - // copy and maybe change it - src_copy = (char *)malloc(BLCKSZ); - if (src_copy == NULL) { - // add log - return -1; - } - rc = memcpy_s(src_copy, BLCKSZ, src, BLCKSZ); - securec_check(rc, "", ""); - CompressPagePrepareConvert(src_copy, option.diffConvert, &real_ByteConvert); /* preprocess convert src */ - } - - if (heapPageData) { - data = ((HeapPageCompressData*)dst)->data; - } else { - data = ((PageCompressData*)dst)->data; - } - - switch (option.compressAlgorithm) { - case COMPRESS_ALGORITHM_PGLZ: - if (real_ByteConvert) { - compressed_size = lz_compress(src_copy + sizeOfHeaderData, BLCKSZ - sizeOfHeaderData, data); - } else { - compressed_size = lz_compress(src + sizeOfHeaderData, BLCKSZ - sizeOfHeaderData, data); - } - break; - case COMPRESS_ALGORITHM_ZSTD: { - if (level == 0 || level < MIN_ZSTD_COMPRESSION_LEVEL || level > MAX_ZSTD_COMPRESSION_LEVEL) { - level = DEFAULT_ZSTD_COMPRESSION_LEVEL; - } - - if (real_ByteConvert) { - compressed_size = ZSTD_compress(data, dst_size, src_copy + sizeOfHeaderData, BLCKSZ - sizeOfHeaderData, level); - } else { - compressed_size = ZSTD_compress(data, dst_size, src + sizeOfHeaderData, BLCKSZ - sizeOfHeaderData, level); - } - - if (ZSTD_isError(compressed_size)) { - if (src_copy != NULL) { - free(src_copy); - } - return -1; - } - break; - } - default: - if (src_copy != NULL) { - free(src_copy); - } - return COMPRESS_UNSUPPORTED_ERROR; - } - - if (compressed_size < 0) { - if (src_copy != NULL) { - free(src_copy); - } - return -1; - } - - if (heapPageData) { - HeapPageCompressData* pcdptr = ((HeapPageCompressData*)dst); - rc = memcpy_s(pcdptr->page_header, sizeOfHeaderData, src, sizeOfHeaderData); - securec_check(rc, "", ""); - pcdptr->size = compressed_size; - pcdptr->byte_convert = real_ByteConvert; - pcdptr->diff_convert = option.diffConvert; - } else { - PageCompressData* pcdptr = ((PageCompressData*)dst); - rc = memcpy_s(pcdptr->page_header, sizeOfHeaderData, src, sizeOfHeaderData); - securec_check(rc, "", ""); - pcdptr->size = compressed_size; - pcdptr->byte_convert = real_ByteConvert; - pcdptr->diff_convert = option.diffConvert; - } - - if (src_copy != NULL) { - free(src_copy); - } - return SIZE_OF_PAGE_COMPRESS_DATA_HEADER_DATA(heapPageData) + compressed_size; -} - -/** - * CompressPageBufferBound() - * -- Get the destination buffer boundary to compress one page. - * Return needed destination buffer size for compress one page or - * -1 for unrecognized compression algorithm - */ -int CompressPageBufferBound(const char* page, uint8 algorithm) -{ - switch (algorithm) { - case COMPRESS_ALGORITHM_PGLZ: - return BLCKSZ + 4; - case COMPRESS_ALGORITHM_ZSTD: - return ZSTD_compressBound(BLCKSZ - GetPageHeaderSize(page)); - default: - return -1; - } -} - - diff --git a/contrib/pagehack/openGaussCompression.cpp b/contrib/pagehack/openGaussCompression.cpp deleted file mode 100644 index 04030c630f1..00000000000 --- a/contrib/pagehack/openGaussCompression.cpp +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2012-2018. All rights reserved. - */ - -#include "openGaussCompression.h" -#include "storage/checksum_impl.h" -#include "storage/page_compression_impl.h" - -void OpenGaussCompression::SetFilePath(const char *filePath, int segNo) -{ - int rc = snprintf_s(pcaFilePath, MAXPGPATH, MAXPGPATH - 1, PCA_SUFFIX, filePath); - securec_check_ss_c(rc, "\0", "\0"); - rc = snprintf_s(pcdFilePath, MAXPGPATH, MAXPGPATH - 1, PCD_SUFFIX, filePath); - securec_check_ss_c(rc, "\0", "\0"); - - this->segmentNo = segNo; -} - -OpenGaussCompression::~OpenGaussCompression() -{ - if (pcaFd != nullptr) { - fclose(pcaFd); - } - if (pcdFd != nullptr) { - fclose(pcdFd); - } - if (header != nullptr) { - pc_munmap(header); - } -} - -bool OpenGaussCompression::TryOpen() -{ - if ((pcaFd = fopen(this->pcaFilePath, "rb+")) == nullptr) { - return false; - } - if ((pcdFd = fopen(this->pcdFilePath, "rb+")) == nullptr) { - return false; - } - if (fseeko(pcaFd, (off_t)offsetof(PageCompressHeader, chunk_size), SEEK_SET) != 0) { - return false; - } - if (fread(&chunkSize, sizeof(chunkSize), 1, this->pcaFd) <= 0) { - return false; - } - header = pc_mmap(fileno(pcaFd), chunkSize, false); - return true; -} -bool OpenGaussCompression::ReadChunkOfBlock(char *dst, size_t *dstLen, BlockNumber blockNumber) -{ - auto currentAddr = GET_PAGE_COMPRESS_ADDR(header, chunkSize, blockNumber); - do { - auto chunkNum = currentAddr->nchunks; - for (uint8 i = 0; i < chunkNum; i++) { - off_t seekPos = (off_t)OFFSET_OF_PAGE_COMPRESS_CHUNK(chunkSize, currentAddr->chunknos[i]); - uint8 start = i; - while (i < chunkNum - 1 && currentAddr->chunknos[i + 1] == currentAddr->chunknos[i] + 1) { - i++; - } - if (fseeko(this->pcdFd, seekPos, SEEK_SET) != 0) { - return false; - } - size_t readAmount = chunkSize * (i - start + 1); - if (fread(dst + start * chunkSize, 1, readAmount, this->pcdFd) != readAmount && ferror(this->pcdFd)) { - return false; - } - *dstLen += readAmount; - } - if (chunkNum == 0 || DecompressPage(dst, decompressedBuffer, header->algorithm) == BLCKSZ) { - break; - } - } while (true); - if (PageIs8BXidHeapVersion(dst)) { - byteConvert = ((HeapPageCompressData *)dst)->byte_convert; - diffConvert = ((HeapPageCompressData *)dst)->diff_convert; - } else { - byteConvert = ((PageCompressData *)dst)->byte_convert; - diffConvert = ((PageCompressData *)dst)->diff_convert; - } - this->blockNumber = blockNumber; - return true; -} - -bool OpenGaussCompression::WriteBackCompressedData(char *source, size_t sourceLen, BlockNumber blockNumber) -{ - auto currentAddr = GET_PAGE_COMPRESS_ADDR(header, chunkSize, blockNumber); - for (size_t i = 0; i < currentAddr->nchunks; ++i) { - off_t seekPos = (off_t)OFFSET_OF_PAGE_COMPRESS_CHUNK(chunkSize, currentAddr->chunknos[i]); - if (fseeko(this->pcdFd, seekPos, SEEK_SET) != 0) { - return false; - } - Assert(sourceLen >= i * chunkSize); - auto writeCount = fwrite(source + i * chunkSize, 1, chunkSize, this->pcdFd); - bool success = chunkSize == writeCount; - if (!success) { - return false; - } - } - fflush(this->pcdFd); - return true; -} - -void OpenGaussCompression::MarkUncompressedDirty() -{ - constexpr int writeLen = BLCKSZ / 2; - unsigned char fill_byte[writeLen] = {0xFF}; - for (int i = 0; i < writeLen; i++) - fill_byte[i] = 0xFF; - auto rc = memcpy_s(decompressedBuffer + writeLen, BLCKSZ - writeLen, fill_byte, writeLen); - securec_check(rc, "", ""); -} - -BlockNumber OpenGaussCompression::GetMaxBlockNumber() -{ - return (BlockNumber)pg_atomic_read_u32(&header->nblocks); -} - -char *OpenGaussCompression::GetPcdFilePath() -{ - return this->pcdFilePath; -} - -char *OpenGaussCompression::GetDecompressedPage() -{ - return this->decompressedBuffer; -} - -bool OpenGaussCompression::WriteBackUncompressedData() -{ - auto algorithm = header->algorithm; - auto workBufferSize = CompressPageBufferBound(decompressedBuffer, algorithm); - if (workBufferSize < 0) { - return false; - } - char *work_buffer = (char *)malloc(workBufferSize); - RelFileCompressOption relFileCompressOption; - relFileCompressOption.compressPreallocChunks = 0; - relFileCompressOption.compressLevelSymbol = true; - relFileCompressOption.compressLevel = 1; - relFileCompressOption.compressAlgorithm = algorithm; - relFileCompressOption.byteConvert = byteConvert; - relFileCompressOption.diffConvert = diffConvert; - - auto compress_buffer_size = CompressPage(decompressedBuffer, work_buffer, workBufferSize, relFileCompressOption); - if (compress_buffer_size < 0) { - return false; - } - uint8 nchunks = (compress_buffer_size - 1) / chunkSize + 1; - auto bufferSize = chunkSize * nchunks; - if (bufferSize >= BLCKSZ) { - /* store original page if can not save space? */ - free(work_buffer); - work_buffer = (char *)decompressedBuffer; - nchunks = BLCKSZ / chunkSize; - } else { - /* fill zero in the last chunk */ - if (compress_buffer_size < bufferSize) { - auto leftSize = bufferSize - compress_buffer_size; - errno_t rc = memset_s(work_buffer + compress_buffer_size, leftSize, 0, leftSize); - securec_check(rc, "", ""); - } - } - uint8 need_chunks = nchunks; - PageCompressAddr *pcAddr = GET_PAGE_COMPRESS_ADDR(header, chunkSize, blockNumber); - if (pcAddr->allocated_chunks < need_chunks) { - auto chunkno = pg_atomic_fetch_add_u32(&header->allocated_chunks, need_chunks - pcAddr->allocated_chunks); - for (uint8 i = pcAddr->allocated_chunks; i < need_chunks; ++i) { - pcAddr->chunknos[i] = ++chunkno; - } - pcAddr->allocated_chunks = need_chunks; - pcAddr->nchunks = need_chunks; - } - return this->WriteBackCompressedData(work_buffer, compress_buffer_size, blockNumber); -} - - -#include "compression_algorithm.ini" \ No newline at end of file diff --git a/contrib/pagehack/openGaussCompression.h b/contrib/pagehack/openGaussCompression.h deleted file mode 100644 index 016c04fafa7..00000000000 --- a/contrib/pagehack/openGaussCompression.h +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef OPENGAUSS_SERVER_OPENGAUSSCOMPRESSION_H -#define OPENGAUSS_SERVER_OPENGAUSSCOMPRESSION_H -#define FRONTEND 1 - - -#include -#include "c.h" -#include "storage/buf/block.h" -#include "storage/page_compression.h" - -class OpenGaussCompression { -private: - FILE* pcaFd = nullptr; - FILE* pcdFd = nullptr; - char pcaFilePath[MAXPGPATH]; - char pcdFilePath[MAXPGPATH]; - PageCompressHeader* header = nullptr; - -private: - int segmentNo; - BlockNumber blockNumber; - decltype(PageCompressHeader::chunk_size) chunkSize; - char decompressedBuffer[BLCKSZ]; - bool byteConvert; - bool diffConvert; - -public: - void SetFilePath(const char* filePath, int segNo); - virtual ~OpenGaussCompression(); - bool TryOpen(); - bool ReadChunkOfBlock(char* dst, size_t* dstLen, BlockNumber blockNumber); - bool WriteBackCompressedData(char* source, size_t sourceLen, BlockNumber blockNumber); - bool WriteBackUncompressedData(); - void MarkUncompressedDirty(); - BlockNumber GetMaxBlockNumber(); - char* GetPcdFilePath(); - char* GetDecompressedPage(); -}; - -#endif // OPENGAUSS_SERVER_OPENGAUSSCOMPRESSION_H diff --git a/contrib/pagehack/pagehack.cpp b/contrib/pagehack/pagehack.cpp index 3ad787973ce..6378f962ce1 100644 --- a/contrib/pagehack/pagehack.cpp +++ b/contrib/pagehack/pagehack.cpp @@ -91,8 +91,7 @@ #include "tsdb/utils/constant_def.h" #endif -#include "openGaussCompression.h" - +#include "PageCompression.h" /* Max number of pg_class oid, currently about 4000 */ #define MAX_PG_CLASS_ID 10000 @@ -727,70 +726,6 @@ static const uint8 number_of_meta_bits[256] = {0, 2, 2}; -uint32 pg_checksum_block(char* data, uint32 size) -{ - uint32 sums[N_SUMS]; - uint32(*dataArr)[N_SUMS] = (uint32(*)[N_SUMS])data; - uint32 result = 0; - errno_t rc; - uint32 i, j; - - /* ensure that the size is compatible with the algorithm */ - Assert((size % (sizeof(uint32) * N_SUMS)) == 0); - - /* initialize partial checksums to their corresponding offsets */ - rc = memcpy_s(sums, sizeof(sums), g_checksumBaseOffsets, sizeof(g_checksumBaseOffsets)); - securec_check(rc, "", ""); - - /* main checksum calculation */ - for (i = 0; i < size / sizeof(uint32) / N_SUMS; i++) { - for (j = 0; j < N_SUMS; j++) { - CHECKSUM_COMP(sums[j], dataArr[i][j]); - } - } - - /* finally add in two rounds of zeroes for additional mixing */ - for (i = 0; i < CHECKSUM_CACL_ROUNDS; i++) { - for (j = 0; j < N_SUMS; j++) { - CHECKSUM_COMP(sums[j], 0); - } - } - - /* xor fold partial checksums together */ - for (i = 0; i < N_SUMS; i++) { - result ^= sums[i]; - } - - return result; -} - -uint16 pg_checksum_page(char* page, BlockNumber blkno) -{ - PageHeader phdr = (PageHeader)page; - uint16 save_checksum; - uint32 checksum; - - /* - * Save pd_checksum and temporarily set it to zero, so that the checksum - * calculation isn't affected by the old checksum stored on the page. - * Restore it after, because actually updating the checksum is NOT part of - * the API of this function. - */ - save_checksum = phdr->pd_checksum; - phdr->pd_checksum = 0; - checksum = pg_checksum_block(page, BLCKSZ); - phdr->pd_checksum = save_checksum; - - /* Mix in the block number to detect transposed pages */ - checksum ^= blkno; - - /* - * Reduce to a uint16 (to fit in the pd_checksum field) with an offset of - * one. That avoids checksums of zero, which seems like a good idea. - */ - return (checksum % UINT16_MAX) + 1; -} - /* * SpaceGetBlockFreeLevel * Returns the block free level according to freespace. @@ -3167,55 +3102,69 @@ static BlockNumber CalculateMaxBlockNumber(BlockNumber blknum, BlockNumber start return number; } -static int parse_page_file(const char* filename, SegmentType type, const uint32 start_point, const uint32 number_read) +static void MarkBufferDirty(char *buffer, size_t len) +{ + int writeLen = len / 2; + unsigned char fill_byte[writeLen] = {0xFF}; + for (int i = 0; i < writeLen; i++) + fill_byte[i] = 0xFF; + auto rc = memcpy_s(buffer + writeLen, BLCKSZ - writeLen, fill_byte, writeLen); + securec_check(rc, "", ""); +} + +static int parse_page_file(const char *filename, SegmentType type, const uint32 start_point, const uint32 number_read) { if (type != SEG_HEAP && type != SEG_INDEX_BTREE) { return parse_uncompressed_page_file(filename, type, start_point, number_read); } - - auto openGaussCompression = new OpenGaussCompression(); - openGaussCompression->SetFilePath(filename, SegNo); - bool success = openGaussCompression->TryOpen(); - if (!success) { - delete openGaussCompression; + + auto pageCompression = new PageCompression(); + if (pageCompression->Init(filename, MAXPGPATH, SegNo) != SUCCESS) { + delete pageCompression; return parse_uncompressed_page_file(filename, type, start_point, number_read); } BlockNumber start = start_point; - BlockNumber blknum = openGaussCompression->GetMaxBlockNumber(); + BlockNumber blknum = pageCompression->GetMaxBlockNumber(); BlockNumber number = CalculateMaxBlockNumber(blknum, start, number_read); if (number == InvalidBlockNumber) { - delete openGaussCompression; + delete pageCompression; return false; } char compressed[BLCKSZ]; - size_t compressedLen; + char decompressed[BLCKSZ]; while (start < number) { - if (!openGaussCompression->ReadChunkOfBlock(compressed, &compressedLen, start)) { - fprintf(stderr, "read block %d failed, filename: %s: %s\n", start, openGaussCompression->GetPcdFilePath(), - strerror(errno)); - delete openGaussCompression; + auto compressedSize = pageCompression->ReadCompressedBuffer(start, compressed, BLCKSZ); + if (compressedSize == 0) { + fprintf(stderr, "read block %d failed, filename: %s_pcd: %s\n", start, filename, strerror(errno)); + delete pageCompression; return false; } - if (!parse_a_page(openGaussCompression->GetDecompressedPage(), start, blknum, type)) { + char *parseFile = NULL; + if (compressedSize < BLCKSZ) { + pageCompression->DecompressedPage(compressed, decompressed); + parseFile = decompressed; + } else { + parseFile = compressed; + } + if (!parse_a_page(parseFile, start, blknum, type)) { fprintf(stderr, "Error during parsing block %d/%d\n", start, blknum); - delete openGaussCompression; + delete pageCompression; return false; } if ((write_back && num_item) || dirty_page) { if (dirty_page) { - openGaussCompression->MarkUncompressedDirty(); + MarkBufferDirty(parseFile, BLCKSZ); } - if (!openGaussCompression->WriteBackUncompressedData()) { - fprintf(stderr, "write back failed, filename: %s: %s\n", openGaussCompression->GetPcdFilePath(), - strerror(errno)); - delete openGaussCompression; + if (!pageCompression->WriteBackUncompressedData(compressed, compressedSize, parseFile, BLCKSZ, start)) { + fprintf(stderr, "write back failed, filename: %s_pcd: %s\n", filename, strerror(errno)); + delete pageCompression; return false; } } start++; } - delete openGaussCompression; + delete pageCompression; return true; } diff --git a/src/Makefile.global.in b/src/Makefile.global.in index 111c7f77005..876a06ad05d 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -1148,13 +1148,16 @@ submake-libpgport: submake-libplpgsql: $(MAKE) -C $(top_builddir)/src/common/pl/plpgsql/src all +submake-pagecompression: + $(MAKE) -C $(top_builddir)/src/lib/page_compression all + submake-libalarmclient: $(MAKE) -C $(top_builddir)/src/lib/alarm submake-libcarbondata: $(MAKE) -C $(top_builddir)/../contrib/carbondata all -.PHONY: submake-libpq submake-libpgport submake-libplpgsql submake-libalarmclient submake-libcarbondata +.PHONY: submake-libpq submake-libpgport submake-libplpgsql submake-libalarmclient submake-libcarbondata submake-pagecompression ########################################################################## diff --git a/src/bin/pg_ctl/CMakeLists.txt b/src/bin/pg_ctl/CMakeLists.txt index ea75b668fae..1264daf16bd 100755 --- a/src/bin/pg_ctl/CMakeLists.txt +++ b/src/bin/pg_ctl/CMakeLists.txt @@ -4,15 +4,7 @@ execute_process( COMMAND ln -fs ${PROJECT_SRC_DIR}/gausskernel/storage/access/redo/xlogreader_common.cpp ${CMAKE_CURRENT_SOURCE_DIR}/xlogreader_common.cpp ) -set(TGT_gsctl_SRC - ${CMAKE_CURRENT_SOURCE_DIR}/backup.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/pg_build.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/pg_ctl.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/receivelog.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/streamutil.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/xlogreader_common.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/xlogreader.cpp -) +AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} TGT_gsctl_SRC) if("${ENABLE_MOT}" STREQUAL "ON") list(APPEND TGT_gsctl_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fetchmot.cpp) @@ -28,21 +20,22 @@ set(TGT_gsctl_INC ${PROJECT_TRUNK_DIR}/distribute/include ${LIBHOTPATCH_INCLUDE_PATH} ${ZLIB_INCLUDE_PATH} + ${ZSTD_INCLUDE_PATH} + ${PROJECT_SRC_DIR}/lib/page_compression ) set(gsctl_DEF_OPTIONS ${MACRO_OPTIONS} -DHAVE_LIBZ -DFRONTEND) set(gsctl_COMPILE_OPTIONS ${OPTIMIZE_OPTIONS} ${OS_OPTIONS} ${PROTECT_OPTIONS} ${WARNING_OPTIONS} ${BIN_SECURE_OPTIONS} ${CHECK_OPTIONS}) set(gsctl_LINK_OPTIONS ${BIN_LINK_OPTIONS}) -set(gsctl_LINK_LIBS libelog.a libbuildquery.a pg_rewind.a libpgcommon.a libhotpatchclient.a libpgport.a -lpq -lcrypt -ldl -lm -lssl -lcrypto -l${SECURE_C_CHECK} -pthread -lrt -lz -lminiunz) +set(gsctl_LINK_LIBS libelog.a libbuildquery.a pg_rewind.a libpgcommon.a libhotpatchclient.a libpgport.a -lpq -lcrypt -ldl -lm -lssl -lcrypto -l${SECURE_C_CHECK} -pthread -lrt -lz -lminiunz -lpagecompression -lzstd) if(NOT "${ENABLE_LITE_MODE}" STREQUAL "ON") list(APPEND gsctl_LINK_LIBS -lgssapi_krb5_gauss -lgssrpc_gauss -lkrb5_gauss -lkrb5support_gauss -lk5crypto_gauss -lcom_err_gauss) endif() add_bintarget(gs_ctl TGT_gsctl_SRC TGT_gsctl_INC "${gsctl_DEF_OPTIONS}" "${gsctl_COMPILE_OPTIONS}" "${gsctl_LINK_OPTIONS}" "${gsctl_LINK_LIBS}") -add_dependencies(gs_ctl elog_static buildquery_static pg_rewind_static pgcommon_static hotpatchclient_static pgport_static pq) +add_dependencies(gs_ctl elog_static buildquery_static pg_rewind_static pgcommon_static hotpatchclient_static pgport_static pq pagecompression) target_link_directories(gs_ctl PUBLIC ${LIBOPENSSL_LIB_PATH} ${PROTOBUF_LIB_PATH} ${LIBPARQUET_LIB_PATH} ${LIBCURL_LIB_PATH} ${ZLIB_LIB_PATH} ${LIBOBS_LIB_PATH} ${LIBEDIT_LIB_PATH} ${LIBCGROUP_LIB_PATH} ${SECURE_LIB_PATH} - ${LIBHOTPATCH_LIB_PATH} ${KERBEROS_LIB_PATH} ${CMAKE_BINARY_DIR}/lib + ${LIBHOTPATCH_LIB_PATH} ${KERBEROS_LIB_PATH} ${CMAKE_BINARY_DIR}/lib ${ZSTD_LIB_PATH} ) -install(TARGETS gs_ctl RUNTIME DESTINATION bin) - +install(TARGETS gs_ctl RUNTIME DESTINATION bin) \ No newline at end of file diff --git a/src/bin/pg_ctl/Makefile b/src/bin/pg_ctl/Makefile index 8e2c47cbef4..0c3dd40bc52 100644 --- a/src/bin/pg_ctl/Makefile +++ b/src/bin/pg_ctl/Makefile @@ -16,10 +16,10 @@ subdir = src/bin/pg_ctl top_builddir = ../../.. include $(top_builddir)/src/Makefile.global -override CPPFLAGS := -I$(libpq_srcdir) -I$(ZLIB_INCLUDE_PATH) $(CPPFLAGS) -DHAVE_LIBZ -DFRONTEND -I$(top_builddir)/src/bin/pg_rewind - +override CPPFLAGS := -I$(libpq_srcdir) -I$(ZLIB_INCLUDE_PATH) $(CPPFLAGS) -DHAVE_LIBZ -DFRONTEND -I$(top_builddir)/src/bin/pg_rewind -I${top_builddir}/src/lib/page_compression +override LDFLAGS += -L${top_builddir}/src/lib/page_compression ifeq ($(enable_lite_mode), no) - LIBS += -lgssapi_krb5_gauss -lgssrpc_gauss -lkrb5_gauss -lkrb5support_gauss -lk5crypto_gauss -lcom_err_gauss + LIBS += -lgssapi_krb5_gauss -lgssrpc_gauss -lkrb5_gauss -lkrb5support_gauss -lk5crypto_gauss -lcom_err_gauss -lpagecompression -lzstd endif ifneq "$(MAKECMDGOALS)" "clean" @@ -40,7 +40,7 @@ OBJS= pg_ctl.o pg_build.o backup.o receivelog.o streamutil.o xlogreader.o xlogr $(top_builddir)/src/lib/hotpatch/client/libhotpatchclient.a endif -all: gs_ctl +all: submake-pagecompression gs_ctl gs_ctl: $(OBJS) | submake-libpq submake-libpgport $(CC) -fPIC $(CXXFLAGS) $(OBJS) $(LIBS) $(libpq_pgport) $(LDFLAGS) $(LDFLAGS_EX) -o $@$(X) diff --git a/src/bin/pg_probackup/CMakeLists.txt b/src/bin/pg_probackup/CMakeLists.txt index d23567fe654..a44f022d6bc 100755 --- a/src/bin/pg_probackup/CMakeLists.txt +++ b/src/bin/pg_probackup/CMakeLists.txt @@ -13,19 +13,19 @@ execute_process( AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} TGT_probackup_SRC) -set(TGT_probackup_INC ${ZLIB_INCLUDE_PATH}) +set(TGT_probackup_INC ${ZLIB_INCLUDE_PATH} ${PROJECT_SRC_DIR}/lib/page_compression) set(probackup_DEF_OPTIONS ${MACRO_OPTIONS} -DFRONTEND -DHAVE_LIBZ) set(probackup_COMPILE_OPTIONS ${PROTECT_OPTIONS} ${BIN_SECURE_OPTIONS} ${OS_OPTIONS} ${WARNING_OPTIONS} ${OPTIMIZE_OPTIONS} ${CHECK_OPTIONS}) set(probackup_LINK_OPTIONS ${BIN_LINK_OPTIONS}) -set(probackup_LINK_LIBS libpgcommon.a libpgport.a -lcrypt -ldl -lm -lssl -lcrypto -l${SECURE_C_CHECK} -lrt -lz -lminiunz -llz4 -lpq) +set(probackup_LINK_LIBS libpgcommon.a libpgport.a -lcrypt -ldl -lm -lssl -lcrypto -l${SECURE_C_CHECK} -lrt -lz -lminiunz -llz4 -lpq -lpagecompression -lzstd) if(NOT "${ENABLE_LITE_MODE}" STREQUAL "ON") list(APPEND probackup_LINK_LIBS -lgssapi_krb5_gauss -lgssrpc_gauss -lkrb5_gauss -lkrb5support_gauss -lk5crypto_gauss -lcom_err_gauss) endif() add_bintarget(gs_probackup TGT_probackup_SRC TGT_probackup_INC "${probackup_DEF_OPTIONS}" "${probackup_COMPILE_OPTIONS}" "${probackup_LINK_OPTIONS}" "${probackup_LINK_LIBS}") -add_dependencies(gs_probackup pq pgport_static) +add_dependencies(gs_probackup pq pgport_static pagecompression) target_link_directories(gs_probackup PUBLIC - ${CMAKE_BINARY_DIR}/lib ${LIBOPENSSL_LIB_PATH} ${LIBEDIT_LIB_PATH} - ${ZLIB_LIB_PATH} ${KERBEROS_LIB_PATH} ${LZ4_LIB_PATH} ${SECURE_LIB_PATH} + ${CMAKE_BINARY_DIR}/lib ${LIBOPENSSL_LIB_PATH} ${LIBEDIT_LIB_PATH} ${ZSTD_LIB_PATH} + ${ZLIB_LIB_PATH} ${KERBEROS_LIB_PATH} ${LZ4_LIB_PATH} ${SECURE_LIB_PATH} ) install(TARGETS gs_probackup RUNTIME DESTINATION bin) diff --git a/src/bin/pg_probackup/Makefile b/src/bin/pg_probackup/Makefile index f8723fb6317..9f3e8178a51 100644 --- a/src/bin/pg_probackup/Makefile +++ b/src/bin/pg_probackup/Makefile @@ -38,10 +38,14 @@ ifeq ($(enable_lite_mode), no) LIBS += -lgssapi_krb5_gauss -lgssrpc_gauss -lkrb5_gauss -lkrb5support_gauss -lk5crypto_gauss -lcom_err_gauss endif PG_CPPFLAGS = -I$(libpq_srcdir) ${PTHREAD_CFLAGS} -Isrc -I$(top_builddir)/$(subdir) -I$(LZ4_INCLUDE_PATH) -I$(ZLIB_INCLUDE_PATH) +# add page_compression so .h +LDFLAGS += -L../../lib/page_compression +PG_CPPFLAGS = -I../../lib/page_compression +LIBS += -lpagecompression -lzstd override CPPFLAGS := -DFRONTEND $(CPPFLAGS) $(PG_CPPFLAGS) -DHAVE_LIBZ PG_LIBS_INTERNAL = $(libpq_pgport) ${PTHREAD_CFLAGS} -all: $(PROGRAM) +all: submake-pagecompression $(PROGRAM) gs_probackup: $(OBJS) | submake-libpq submake-libpgport $(CC) $(CXXFLAGS) $(OBJS) $(LIBS) $(libpq_pgport) $(LDFLAGS) $(LDFLAGS_EX) -o $@$(X) diff --git a/src/bin/pg_probackup/catalog.cpp b/src/bin/pg_probackup/catalog.cpp index 857758765fe..9a89699a667 100644 --- a/src/bin/pg_probackup/catalog.cpp +++ b/src/bin/pg_probackup/catalog.cpp @@ -1979,6 +1979,19 @@ void flush_and_close_file(pgBackup *backup, bool sync, FILE *out, char *control_ control_path_temp, strerror(errno)); } +inline int WriteCompressOption(pgFile *file, char *line, int remainLen, int len) +{ + if (file->is_datafile && file->compressedFile) { + auto nRet = + snprintf_s(line + len, remainLen - len, remainLen - len - 1, + ",\"compressedFile\":\"%d\",\"compressedChunkSize\":\"%d\",\"compressedAlgorithm\":\"%d\"", 1, + file->compressedChunkSize, file->compressedAlgorithm); + securec_check_ss_c(nRet, "\0", "\0"); + return nRet; + } + return 0; +} + /* * Output the list of files to backup catalog DATABASE_FILE_LIST */ @@ -2072,6 +2085,8 @@ write_backup_filelist(pgBackup *backup, parray *files, const char *root, nRet = snprintf_s(line+len, remainLen - len,remainLen - len - 1,",\"segno\":\"%d\"", file->segno); securec_check_ss_c(nRet, "\0", "\0"); len += nRet; + /* persistence compress option */ + len += WriteCompressOption(file, line, remainLen, len); } if (file->linked) diff --git a/src/bin/pg_probackup/data.cpp b/src/bin/pg_probackup/data.cpp index d2851987c7a..c4ae4a9eb3a 100644 --- a/src/bin/pg_probackup/data.cpp +++ b/src/bin/pg_probackup/data.cpp @@ -13,6 +13,7 @@ #include "storage/checksum.h" #include "storage/checksum_impl.h" +#include "PageCompression.h" #include "pg_lzcompress.h" #include "file.h" @@ -380,7 +381,8 @@ prepare_page(ConnectionArgs *conn_arg, Page page, bool strict, uint32 checksum_version, const char *from_fullpath, - PageState *page_st) + PageState *page_st, PageCompression *pageCompression = NULL) + { int try_again = PAGE_READ_ATTEMPTS; bool page_is_valid = false; @@ -400,7 +402,7 @@ prepare_page(ConnectionArgs *conn_arg, while (!page_is_valid && try_again--) { /* read the block */ - int read_len = fio_pread(in, page, blknum * BLCKSZ); + int read_len = fio_pread(in, page, blknum * BLCKSZ, pageCompression); /* The block could have been truncated. It is fine. */ if (read_len == 0) @@ -1512,8 +1514,9 @@ validate_one_page(Page page, BlockNumber absolute_blkno, if (checksum_version) { /* Checksums are enabled, so check them. */ - if (page_st->checksum != ((PageHeader) page)->pd_checksum) + if (page_st->checksum != ((PageHeader) page)->pd_checksum && !PageCompression::InnerPageCompressChecksum(page)) { return PAGE_CHECKSUM_MISMATCH; + } } /* At this point page header is sane, if checksums are enabled - the`re ok. @@ -2043,13 +2046,25 @@ send_pages(ConnectionArgs* conn_arg, const char *to_fullpath, const char *from_f BlockNumber blknum = 0; datapagemap_iterator_t *iter = NULL; int compressed_size = 0; + PageCompression* pageCompression = NULL; + std::unique_ptr pageCompressionPtr = NULL; /* stdio buffers */ char *in_buf = NULL; char *out_buf = NULL; - /* open source file for read */ - in = fopen(from_fullpath, PG_BINARY_R); + if (file->compressedFile) { + /* init pageCompression and return pcdFd for error check */ + pageCompression = new PageCompression(); + pageCompressionPtr = std::unique_ptr(pageCompression); + pageCompression->Init(from_fullpath, MAXPGPATH, file->segno, file->compressedChunkSize); + in = pageCompression->GetPcdFile(); + /* force compress page if file is compressed file */ + calg = (calg == NOT_DEFINED_COMPRESS || calg == NONE_COMPRESS) ? PGLZ_COMPRESS : calg; + } else { + /* open source file for read */ + in = fopen(from_fullpath, PG_BINARY_R); + } if (in == NULL) { /* @@ -2087,7 +2102,7 @@ send_pages(ConnectionArgs* conn_arg, const char *to_fullpath, const char *from_f int rc = prepare_page(conn_arg, file, prev_backup_start_lsn, blknum, in, backup_mode, curr_page, true, checksum_version, - from_fullpath, &page_st); + from_fullpath, &page_st, pageCompression); if (rc == PageIsTruncated) break; @@ -2147,7 +2162,10 @@ send_pages(ConnectionArgs* conn_arg, const char *to_fullpath, const char *from_f if (in && fclose(in)) elog(ERROR, "Cannot close the source file \"%s\": %s", to_fullpath, strerror(errno)); - + + if (pageCompressionPtr) { + pageCompressionPtr->ResetPcdFd(); + } /* close local output file */ if (out && fclose(out)) elog(ERROR, "Cannot close the backup file \"%s\": %s", diff --git a/src/bin/pg_probackup/dir.cpp b/src/bin/pg_probackup/dir.cpp index bb1b0857f73..00079dd1dbf 100644 --- a/src/bin/pg_probackup/dir.cpp +++ b/src/bin/pg_probackup/dir.cpp @@ -24,6 +24,7 @@ #include "configuration.h" #include "common/fe_memutils.h" +#include "PageCompression.h" /* * The contents of these directories are removed or recreated during server @@ -228,6 +229,11 @@ pgFileInit(const char *rel_path) /* Number of blocks backed up during backup */ file->n_headers = 0; + /* set uncompressed file default */ + file->compressedFile = false; + file->compressedAlgorithm = 0; + file->compressedChunkSize = 0; + return file; } @@ -839,6 +845,42 @@ static char check_digit_file(pgFile *file) return -1; } +static inline void SetFileCompressOption(pgFile *file, char *child, size_t childLen) +{ + if (file->is_datafile && PageCompression::IsCompressedTableFile(child, childLen)) { + std::unique_ptr pageCompression = std::make_unique(); + COMPRESS_ERROR_STATE state = pageCompression->Init(child, childLen, InvalidBlockNumber); + if (state != SUCCESS) { + elog(ERROR, "can not read block of '%s_pca': ", child); + } + file->compressedFile = true; + file->size = pageCompression->GetMaxBlockNumber() * BLCKSZ; + file->compressedChunkSize = pageCompression->GetChunkSize(); + file->compressedAlgorithm = pageCompression->GetAlgorithm(); + } +} + +bool SkipSomeDirFile(pgFile *file, struct dirent *dent, bool skipHidden) +{ + /* Skip entries point current dir or parent dir */ + if (S_ISDIR(file->mode) && (strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0)) { + return false; + } + /* skip hidden files and directories */ + if (skipHidden && file->name[0] == '.') { + elog(WARNING, "Skip hidden file: '%s'", file->name); + return false; + } + /* + * Add only files, directories and links. Skip sockets and other + * unexpected file formats. + */ + if (!S_ISDIR(file->mode) && !S_ISREG(file->mode)) { + elog(WARNING, "Skip '%s': unexpected file format", file->name); + return false; + } + return true; +} /* * List files in parent->path directory. If "exclude" is true do not add into * "files" files from pgdata_exclude_files and directories from @@ -879,34 +921,16 @@ dir_list_file_internal(parray *files, pgFile *parent, const char *parent_dir, join_path_components(child, parent_dir, dent->d_name); join_path_components(rel_child, parent->rel_path, dent->d_name); - file = pgFileNew(child, rel_child, follow_symlink, external_dir_num, - location); - if (file == NULL) - continue; - - /* Skip entries point current dir or parent dir */ - if (S_ISDIR(file->mode) && - (strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0)) - { - pgFileFree(file); + /* skip real compressed file cause we mark compress flag at oid file */ + if (PageCompression::SkipCompressedFile(child, MAXPGPATH)) { continue; } - /* skip hidden files and directories */ - if (skip_hidden && file->name[0] == '.') - { - elog(WARNING, "Skip hidden file: '%s'", child); - pgFileFree(file); + file = pgFileNew(child, rel_child, follow_symlink, external_dir_num, location); + if (file == NULL) continue; - } - /* - * Add only files, directories and links. Skip sockets and other - * unexpected file formats. - */ - if (!S_ISDIR(file->mode) && !S_ISREG(file->mode)) - { - elog(WARNING, "Skip '%s': unexpected file format", child); + if (!SkipSomeDirFile(file, dent, skip_hidden)) { pgFileFree(file); continue; } @@ -925,6 +949,9 @@ dir_list_file_internal(parray *files, pgFile *parent, const char *parent_dir, /* We add the directory itself which content was excluded */ parray_append(files, file); continue; + } else if (check_res == CHECK_TRUE) { + /* persistence compress option */ + SetFileCompressOption(file, child, MAXPGPATH); } } @@ -1614,6 +1641,9 @@ dir_read_file_list(const char *root, const char *external_prefix, file->write_size = (int64) write_size; file->mode = (mode_t) mode; file->is_datafile = is_datafile ? true : false; + file->compressedFile = false; + file->compressedChunkSize = 0; + file->compressedAlgorithm = 0; file->is_cfs = is_cfs ? true : false; file->crc = (pg_crc32) crc; file->compress_alg = parse_compress_alg(compress_alg_string); @@ -1630,6 +1660,18 @@ dir_read_file_list(const char *root, const char *external_prefix, canonicalize_path(file->linked); } + /* read compress option from control file */ + int64 compressedFile = 0; + if (get_control_value(buf, "compressedFile", NULL, &compressedFile, false)) { + file->compressedFile = true; + int64 compressedAlgorithm = 0; + int64 compressedChunkSize = 0; + get_control_value(buf, "compressedAlgorithm", NULL, &compressedAlgorithm, true); + get_control_value(buf, "compressedChunkSize", NULL, &compressedChunkSize, true); + file->compressedAlgorithm = (uint8)compressedAlgorithm; + file->compressedChunkSize = (int16)compressedChunkSize; + } + if (get_control_value(buf, "segno", NULL, &segno, false)) file->segno = (int) segno; diff --git a/src/bin/pg_probackup/file.cpp b/src/bin/pg_probackup/file.cpp index 286550a7fdd..959619e36e9 100644 --- a/src/bin/pg_probackup/file.cpp +++ b/src/bin/pg_probackup/file.cpp @@ -59,6 +59,9 @@ typedef struct size_t size; time_t mtime; bool is_datafile; + bool compressedFile; + uint16 compressedChunkSize; + uint8 compressedAlgorithm; bool is_database; Oid tblspcOid; Oid dbOid; @@ -513,6 +516,7 @@ FILE* fio_fopen(char const* path, char const* mode, fio_location location) } int fio_fprintf(FILE* f, char const* format, ...) __attribute__ ((format (printf, 2, 3))); +static char *ProcessErrorIn(int out, fio_header &hdr, const char *fromFullpath); /* Format output to file stream */ int fio_fprintf(FILE* f, char const* format, ...) { @@ -621,7 +625,7 @@ int fio_truncate(int fd, off_t size) /* * Read file from specified location. */ -int fio_pread(FILE* f, void* buf, off_t offs) +int fio_pread(FILE* f, void* buf, off_t offs, PageCompression* pageCompression) { if (fio_is_remote_file(f)) { @@ -647,12 +651,15 @@ int fio_pread(FILE* f, void* buf, off_t offs) else { /* For local file, opened by fopen, we should use stdio functions */ - int rc = fseek(f, offs, SEEK_SET); - - if (rc < 0) - return rc; - - return fread(buf, 1, BLCKSZ, f); + if (pageCompression) { + return pageCompression->ReadCompressedBuffer(offs / BLCKSZ, (char*)buf, BLCKSZ, true); + } else { + int rc = fseek(f, offs, SEEK_SET); + if (rc < 0) { + return rc; + } + return fread(buf, 1, BLCKSZ, f); + } } } @@ -767,6 +774,15 @@ ssize_t fio_fwrite_compressed(FILE* f, void const* buf, size_t size, int compres } } +void fio_construct_compressed(void const *buf, size_t size) +{ + fio_header hdr; + hdr.cop = FIO_COSTRUCT_COMPRESSED; + hdr.size = size; + IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr)); + IO_CHECK(fio_write_all(fio_stdout, buf, size), size); +} + static ssize_t fio_write_compressed_impl(int fd, void const* buf, size_t size, int compress_alg) { @@ -1308,7 +1324,7 @@ static void fio_send_pages_impl(int out, char* buf) fio_header hdr; fio_send_request *req = (fio_send_request*) buf; char *from_fullpath = (char*) buf + sizeof(fio_send_request); - bool with_pagemap = req->bitmapsize > 0 ? true : false; + bool with_pagemap = req->bitmapsize > 0; /* error reporting */ char *errormsg = NULL; /* parse buffer */ @@ -1318,39 +1334,22 @@ static void fio_send_pages_impl(int out, char* buf) int32 hdr_num = -1; int32 cur_pos_out = 0; BackupPageHeader2 *headers = NULL; + PageCompression* pageCompression = NULL; int nRet = 0; - /* open source file */ - in = fopen(from_fullpath, PG_BINARY_R); + if (PageCompression::IsCompressedTableFile(from_fullpath, MAXPGPATH)) { + /* init pageCompression and return pcdFd for error check */ + pageCompression = new PageCompression(); + pageCompression->Init(from_fullpath, MAXPGPATH, req->segmentno / RELSEG_SIZE); + in = pageCompression->GetPcdFile(); + } else { + /* open source file */ + in = fopen(from_fullpath, PG_BINARY_R); + } + if (!in) { - hdr.cop = FIO_ERROR; - - /* do not send exact wording of ENOENT error message - * because it is a very common error in our case, so - * error code is enough. - */ - if (errno == ENOENT) - { - hdr.arg = FILE_MISSING; - hdr.size = 0; - } - else - { - hdr.arg = OPEN_FAILED; - errormsg = (char *)pgut_malloc(ERRMSG_MAX_LEN); - /* Construct the error message */ - nRet = snprintf_s(errormsg, ERRMSG_MAX_LEN,ERRMSG_MAX_LEN - 1, "Cannot open file \"%s\": %s", - from_fullpath, strerror(errno)); - securec_check_ss_c(nRet, "\0", "\0"); - hdr.size = strlen(errormsg) + 1; - } - - /* send header and message */ - IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); - if (errormsg) - IO_CHECK(fio_write_all(out, errormsg, hdr.size), hdr.size); - + errormsg = ProcessErrorIn(out, hdr, from_fullpath); goto cleanup; } @@ -1385,21 +1384,25 @@ static void fio_send_pages_impl(int out, char* buf) /* read page, check header and validate checksumms */ for (;;) { - /* - * Optimize stdio buffer usage, fseek only when current position - * does not match the position of requested block. - */ - if (current_pos != (int)(blknum*BLCKSZ)) - { - current_pos = blknum*BLCKSZ; - if (fseek(in, current_pos, SEEK_SET) != 0) - elog(ERROR, "fseek to position %u is failed on remote file '%s': %s", - current_pos, from_fullpath, strerror(errno)); - } + if (pageCompression) { + read_len = pageCompression->ReadCompressedBuffer(blknum, read_buffer, BLCKSZ, true); + } else { + /* + * Optimize stdio buffer usage, fseek only when current position + * does not match the position of requested block. + */ + if (current_pos != (int)(blknum*BLCKSZ)) + { + current_pos = blknum*BLCKSZ; + if (fseek(in, current_pos, SEEK_SET) != 0) + elog(ERROR, "fseek to position %u is failed on remote file '%s': %s", + current_pos, from_fullpath, strerror(errno)); + } - read_len = fread(read_buffer, 1, BLCKSZ, in); + read_len = fread(read_buffer, 1, BLCKSZ, in); - current_pos += read_len; + current_pos += read_len; + } /* report error */ if (ferror(in)) @@ -1560,11 +1563,45 @@ eof: pg_free(iter); pg_free(errormsg); pg_free(headers); - if (in) - fclose(in); + if (pageCompression) { + /* in will be closed */ + delete pageCompression; + } else { + if (in) + fclose(in); + } return; } +static char *ProcessErrorIn(int out, fio_header &hdr, const char *fromFullpath) +{ + char *errormsg = NULL; + hdr.cop = FIO_ERROR; + + /* do not send exact wording of ENOENT error message + * because it is a very common error in our case, so + * error code is enough. + */ + if (errno == ENOENT) { + hdr.arg = FILE_MISSING; + hdr.size = 0; + } else { + hdr.arg = OPEN_FAILED; + errormsg = (char *)pgut_malloc(ERRMSG_MAX_LEN); + /* Construct the error message */ + error_t nRet = snprintf_s(errormsg, ERRMSG_MAX_LEN, ERRMSG_MAX_LEN - 1, "Cannot open file \"%s\": %s", + fromFullpath, strerror(errno)); + securec_check_ss_c(nRet, "\0", "\0"); + hdr.size = strlen(errormsg) + 1; + } + + /* send header and message */ + IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr)); + if (errormsg) + IO_CHECK(fio_write_all(out, errormsg, hdr.size), hdr.size); + return errormsg; +} + /* Receive chunks of data and write them to destination file. * Return codes: * SEND_OK (0) @@ -1817,6 +1854,9 @@ void fio_list_dir(parray *files, const char *root, bool exclude, file->forkName = fio_file.forkName; file->segno = fio_file.segno; file->external_dir_num = fio_file.external_dir_num; + file->compressedFile = fio_file.compressedFile; + file->compressedChunkSize = fio_file.compressedChunkSize; + file->compressedAlgorithm = fio_file.compressedAlgorithm; if (fio_file.linked_len > 0) { @@ -1893,6 +1933,9 @@ static void fio_list_dir_impl(int out, char* buf) fio_file.forkName = file->forkName; fio_file.segno = file->segno; fio_file.external_dir_num = file->external_dir_num; + fio_file.compressedFile = file->compressedFile; + fio_file.compressedChunkSize = file->compressedChunkSize; + fio_file.compressedAlgorithm = file->compressedAlgorithm; if (file->linked) fio_file.linked_len = strlen(file->linked) + 1; @@ -2211,6 +2254,12 @@ void fio_communicate(int in, int out) case FIO_WRITE_COMPRESSED: /* Write to the current position in file */ IO_CHECK(fio_write_compressed_impl(fd[hdr.handle], buf, hdr.size, hdr.arg), BLCKSZ); break; + case FIO_COSTRUCT_COMPRESSED: { + CompressCommunicate *cm = (CompressCommunicate *)buf; + auto result = ConstructCompressedFile(cm->path, cm->segmentNo, cm->chunkSize, cm->algorithm); + IO_CHECK(result, SUCCESS); + break; + } case FIO_READ: /* Read from the current position in file */ if ((size_t)hdr.arg > buf_size) { size_t oldSize = buf_size; @@ -2338,5 +2387,4 @@ void fio_communicate(int in, int out) perror("read"); exit(EXIT_FAILURE); } -} - +} \ No newline at end of file diff --git a/src/bin/pg_probackup/file.h b/src/bin/pg_probackup/file.h index 254a433f90a..47e6eae1493 100644 --- a/src/bin/pg_probackup/file.h +++ b/src/bin/pg_probackup/file.h @@ -16,6 +16,8 @@ #include #include +#include "PageCompression.h" + #ifdef HAVE_LIBZ #include #endif @@ -47,6 +49,7 @@ typedef enum FIO_CLOSEDIR, FIO_PAGE, FIO_WRITE_COMPRESSED, + FIO_COSTRUCT_COMPRESSED, FIO_GET_CRC32, /* used for incremental restore */ FIO_GET_CHECKSUM_MAP, @@ -106,9 +109,10 @@ extern void fio_communicate(int in, int out); extern int fio_get_agent_version(void); extern FILE* fio_fopen(char const* name, char const* mode, fio_location location); extern size_t fio_fwrite(FILE* f, void const* buf, size_t size); +extern void fio_construct_compressed(void const* buf, size_t size); extern ssize_t fio_fwrite_compressed(FILE* f, void const* buf, size_t size, int compress_alg); extern ssize_t fio_fread(FILE* f, void* buf, size_t size); -extern int fio_pread(FILE* f, void* buf, off_t offs); +extern int fio_pread(FILE* f, void* buf, off_t offs, PageCompression* pageCompression = NULL); extern int fio_fprintf(FILE* f, char const* arg, ...);// pg_attribute_printf(2, 3); extern int fio_fflush(FILE* f); extern int fio_fseek(FILE* f, off_t offs); @@ -142,6 +146,13 @@ extern int fio_closedir(DIR *dirp); extern FILE* fio_open_stream(char const* name, fio_location location); extern int fio_close_stream(FILE* f); +struct CompressCommunicate { + char path[MAXPGPATH]; + uintptr_t segmentNo; + int chunkSize; + int algorithm; +}; + #ifdef HAVE_LIBZ extern gzFile fio_gzopen(char const* path, char const* mode, int level, fio_location location); extern int fio_gzclose(gzFile file); diff --git a/src/bin/pg_probackup/merge.cpp b/src/bin/pg_probackup/merge.cpp index d06bfebe9b6..fee84d8d0b0 100644 --- a/src/bin/pg_probackup/merge.cpp +++ b/src/bin/pg_probackup/merge.cpp @@ -1137,7 +1137,10 @@ merge_files(void *arg) arguments->full_external_prefix); done: - parray_append(arguments->merge_filelist, tmp_file); + tmp_file->compressedFile = dest_file->compressedFile; + tmp_file->compressedAlgorithm = dest_file->compressedAlgorithm; + tmp_file->compressedChunkSize = dest_file->compressedChunkSize; + parray_append(arguments->merge_filelist, tmp_file); } /* Data files merging is successful */ diff --git a/src/bin/pg_probackup/pg_probackupb.h b/src/bin/pg_probackup/pg_probackupb.h index 215b8705320..a7d77030f91 100644 --- a/src/bin/pg_probackup/pg_probackupb.h +++ b/src/bin/pg_probackup/pg_probackupb.h @@ -32,6 +32,9 @@ typedef struct pgFile_t char *rel_path; /* relative path of the file */ char *linked; /* path of the linked file */ bool is_datafile; /* true if the file is PostgreSQL data file */ + bool compressedFile; /* true if the file is the openGauss compressed file */ + uint16 compressedChunkSize; /* chunk size of compressed file */ + uint8 compressedAlgorithm; /* algorithm of comrpessed file */ Oid tblspcOid; /* tblspcOid extracted from path, if applicable */ Oid dbOid; /* dbOid extracted from path, if applicable */ Oid relOid; /* relOid extracted from path, if applicable */ diff --git a/src/bin/pg_probackup/restore.cpp b/src/bin/pg_probackup/restore.cpp index 17edc27811e..9654adb5249 100644 --- a/src/bin/pg_probackup/restore.cpp +++ b/src/bin/pg_probackup/restore.cpp @@ -1104,6 +1104,27 @@ static void sync_restored_files(parray *dest_files, elog(INFO, "Restored backup files are synced, time elapsed: %s", pretty_time); } +inline void RestoreCompressFile(FILE *out, char *to_fullpath, size_t pathLen, pgFile *dest_file) +{ + if (dest_file->is_datafile && dest_file->compressedFile && !dest_file->is_cfs) { + if (!fio_is_remote_file(out)) { + auto result = ConstructCompressedFile(to_fullpath, dest_file->segno, dest_file->compressedChunkSize, + dest_file->compressedAlgorithm); + if (result != SUCCESS) { + elog(ERROR, "Cannot copy compressed file \"%s\": %s", to_fullpath, strerror(errno)); + } + } else { + CompressCommunicate communicate; + errno_t rc = memcpy_s(communicate.path, MAXPGPATH, to_fullpath, MAXPGPATH); + securec_check(rc, "", ""); + communicate.chunkSize = dest_file->compressedChunkSize; + communicate.segmentNo = dest_file->segno; + communicate.algorithm = dest_file->compressedAlgorithm; + fio_construct_compressed((void*)&communicate, sizeof(communicate)); + } + } +} + /* * Restore files into $PGDATA. */ @@ -1260,6 +1281,7 @@ done: elog(ERROR, "Cannot close file \"%s\": %s", to_fullpath, strerror(errno)); + RestoreCompressFile(out, to_fullpath, MAXPGPATH, dest_file); /* free pagemap used for restore optimization */ pg_free(dest_file->pagemap.bitmap); diff --git a/src/bin/pg_rewind/CMakeLists.txt b/src/bin/pg_rewind/CMakeLists.txt index 01fc53978a8..2f9adfc24e1 100755 --- a/src/bin/pg_rewind/CMakeLists.txt +++ b/src/bin/pg_rewind/CMakeLists.txt @@ -5,7 +5,6 @@ set(TGT_rewind_SRC ${CMAKE_CURRENT_SOURCE_DIR}/datapagemap.cpp ${CMAKE_CURRENT_SOURCE_DIR}/filemap.cpp ${CMAKE_CURRENT_SOURCE_DIR}/file_ops.cpp ${CMAKE_CURRENT_SOURCE_DIR}/logging.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/compressed_rewind.cpp ${CMAKE_CURRENT_SOURCE_DIR}/parsexlog.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pg_rewind.cpp ) @@ -18,6 +17,7 @@ set(TGT_rewind_INC ${PROJECT_SRC_DIR}/include/libpq ${LIBOPENSSL_INCLUDE_PATH} ${ZSTD_INCLUDE_PATH} + ${PROJECT_SRC_DIR}/lib/page_compression ) set(rewind_DEF_OPTIONS ${MACRO_OPTIONS}) diff --git a/src/bin/pg_rewind/Makefile b/src/bin/pg_rewind/Makefile index 3d0bcdd99ab..b557a32a83a 100644 --- a/src/bin/pg_rewind/Makefile +++ b/src/bin/pg_rewind/Makefile @@ -18,7 +18,8 @@ include $(top_builddir)/src/Makefile.global PG_CPPFLAGS = -I$(libpq_srcdir) PG_LIBS = $(libpq_pgport) -override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) -I$(top_builddir)/src/bin/pg_ctl +override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) -I$(top_builddir)/src/bin/pg_ctl -I${top_builddir}/src/lib/page_compression +override LDFLAGS := -L../../src/lib/page_compression ifneq "$(MAKECMDGOALS)" "clean" ifneq "$(MAKECMDGOALS)" "distclean" ifneq "$(shell which g++ |grep hutaf_llt |wc -l)" "1" @@ -26,8 +27,7 @@ ifneq "$(MAKECMDGOALS)" "clean" endif endif endif -OBJS = file_ops.o datapagemap.o fetch.o filemap.o logging.o parsexlog.o pg_rewind.o compressed_rewind.o - +OBJS = file_ops.o datapagemap.o fetch.o filemap.o logging.o parsexlog.o pg_rewind.o #all:gs_rewind.a pg_rewind.a: $(OBJS) diff --git a/src/bin/pg_rewind/compressed_rewind.cpp b/src/bin/pg_rewind/compressed_rewind.cpp deleted file mode 100644 index 138afcfe070..00000000000 --- a/src/bin/pg_rewind/compressed_rewind.cpp +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2012-2018. All rights reserved. - * - * openGauss is licensed under Mulan PSL v2. - * You can use this software according to the terms and conditions of the Mulan PSL v2. - * You may obtain a copy of Mulan PSL v2 at: - * - * http://license.coscl.org.cn/MulanPSL2 - * - * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, - * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, - * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. - * See the Mulan PSL v2 for more details. - * ------------------------------------------------------------------------- - * - * compressed_rewind.cpp - * Functions for fetching compressed table. - * - * - * IDENTIFICATION - * ./src/bin/pg_rewind/compressed_rewind.cpp - * - * ------------------------------------------------------------------------- - */ -#include "compressed_rewind.h" -#include "libpq/libpq-fe.h" -#include "lib/string.h" -#include "logging.h" -#include "filemap.h" -#include "utils/elog.h" -#include "file_ops.h" - -void FormatPathToPca(const char* path, char* dst, size_t len, bool withPrefix) -{ - errno_t rc; - if (withPrefix) { - rc = snprintf_s(dst, len, len - 1, "%s/" PCA_SUFFIX, pg_data, path); - } else { - rc = snprintf_s(dst, len, len - 1, PCA_SUFFIX, path); - } - securec_check_ss_c(rc, "\0", "\0"); -} - -void FormatPathToPcd(const char* path, char* dst, size_t len, bool withPrefix) -{ - errno_t rc; - if (withPrefix) { - rc = snprintf_s(dst, len, len - 1, "%s/" PCD_SUFFIX, pg_data, path); - } else { - rc = snprintf_s(dst, len, len - 1, PCD_SUFFIX, path); - } - securec_check_ss_c(rc, "\0", "\0"); -} - -template -bool ReadCompressedInfo(T& t, off_t offset, FILE* file, char* pcaFilePath, size_t len) -{ - if (fseeko(file, offset, SEEK_SET) != 0) { - pg_fatal("could not seek in file \"%s\": \"%lu\": %s\n", pcaFilePath, len, strerror(errno)); - return false; - } - if (fread(&t, sizeof(t), 1, file) <= 0) { - pg_fatal("could not open file \"%s\": \"%lu\": %s\n", pcaFilePath, len, strerror(errno)); - return false; - } - return true; -} - -/** - * write RewindCompressInfo - * @param file file fp - * @param pcaFilePath file path,for ereport - * @param rewindCompressInfo pointer of return - * @return sucesss or not - */ -static bool ReadRewindCompressedInfo(FILE* file, char* pcaFilePath, size_t len, RewindCompressInfo* rewindCompressInfo) -{ - off_t offset = (off_t)offsetof(PageCompressHeader, chunk_size); - if (!ReadCompressedInfo(rewindCompressInfo->chunkSize, offset, file, pcaFilePath, len)) { - return false; - } - offset = (off_t)offsetof(PageCompressHeader, algorithm); - if (!ReadCompressedInfo(rewindCompressInfo->algorithm, offset, file, pcaFilePath, len)) { - return false; - } - offset = (off_t)offsetof(PageCompressHeader, nblocks); - if (!ReadCompressedInfo(rewindCompressInfo->oldBlockNumber, offset, file, pcaFilePath, len)) { - return false; - } - rewindCompressInfo->compressed = true; - return true; -} - -bool FetchSourcePca(const char* strValue, RewindCompressInfo* rewindCompressInfo) -{ - size_t length = 0; - PageCompressHeader* ptr = (PageCompressHeader*)PQunescapeBytea((const unsigned char*)strValue, &length); - rewindCompressInfo->compressed = false; - if (length == sizeof(PageCompressHeader)) { - rewindCompressInfo->compressed = true; - rewindCompressInfo->algorithm = ptr->algorithm; - rewindCompressInfo->newBlockNumber = ptr->nblocks; - rewindCompressInfo->oldBlockNumber = 0; - rewindCompressInfo->chunkSize = ptr->chunk_size; - } - PQfreemem(ptr); - return rewindCompressInfo->compressed; -} - -bool ProcessLocalPca(const char* tablePath, RewindCompressInfo* rewindCompressInfo) -{ - rewindCompressInfo->compressed = false; - if (!isRelDataFile(tablePath)) { - return false; - } - char pcaFilePath[MAXPGPATH]; - FormatPathToPca(tablePath, pcaFilePath, MAXPGPATH, true); - FILE* file = fopen(pcaFilePath, "rb"); - if (file == NULL) { - if (errno == ENOENT) { - return false; - } - pg_fatal("could not open file \"%s\": %s\n", pcaFilePath, strerror(errno)); - return false; - } - bool success = ReadRewindCompressedInfo(file, pcaFilePath, MAXPGPATH, rewindCompressInfo); - fclose(file); - return success; -} \ No newline at end of file diff --git a/src/bin/pg_rewind/compressed_rewind.h b/src/bin/pg_rewind/compressed_rewind.h deleted file mode 100644 index 1c46d6bf4dd..00000000000 --- a/src/bin/pg_rewind/compressed_rewind.h +++ /dev/null @@ -1,21 +0,0 @@ -/* ------------------------------------------------------------------------- - * - * compressed_rewind.h - * - * Copyright (c) 2021 Huawei Technologies Co.,Ltd. - * - * ------------------------------------------------------------------------- - */ -#ifndef OPENGAUSS_SERVER_COMPRESS_COMPRESSED_REWIND_H -#define OPENGAUSS_SERVER_COMPRESS_COMPRESSED_REWIND_H - -#include "compressed_common.h" -#include "storage/page_compression.h" -#include "storage/smgr/relfilenode.h" - -extern bool FetchSourcePca(const char* strValue, RewindCompressInfo* rewindCompressInfo); -extern bool ProcessLocalPca(const char* tablePath, RewindCompressInfo* rewindCompressInfo); -extern void FormatPathToPca(const char* path, char* dst, size_t len, bool withPrefix = false); -extern void FormatPathToPcd(const char* path, char* dst, size_t len, bool withPrefix = false); - -#endif // OPENGAUSS_SERVER_COMPRESS_COMPRESSED_REWIND_H diff --git a/src/bin/pg_rewind/fetch.cpp b/src/bin/pg_rewind/fetch.cpp index 29696f953f6..52ae4405b62 100755 --- a/src/bin/pg_rewind/fetch.cpp +++ b/src/bin/pg_rewind/fetch.cpp @@ -23,8 +23,8 @@ #include "libpq/libpq-fe.h" #include "libpq/libpq-int.h" #include "common/fe_memutils.h" -#include "compressed_rewind.h" #include "catalog/catalog.h" +#include "PageCompression.h" #include "catalog/pg_type.h" PGconn* conn = NULL; @@ -323,9 +323,14 @@ BuildErrorCode fetchSourceFileList() } RewindCompressInfo rewindCompressInfo; RewindCompressInfo *pointer = NULL; - if (!PQgetisnull(res, i, 4) && FetchSourcePca(PQgetvalue(res, i, 4), &rewindCompressInfo)) { - filesize = rewindCompressInfo.newBlockNumber * BLCKSZ; - pointer = &rewindCompressInfo; + if (!PQgetisnull(res, i, 4)) { + size_t length = 0; + auto ptr = PQunescapeBytea((const unsigned char*)PQgetvalue(res, i, 4), &length); + if (FetchSourcePca(ptr, length, &rewindCompressInfo)) { + filesize = rewindCompressInfo.newBlockNumber * BLCKSZ; + pointer = &rewindCompressInfo; + } + PQfreemem(ptr); } process_source_file(path, type, filesize, link_target, pointer); PG_CHECKBUILD_AND_FREE_PGRESULT_RETURN(res); @@ -467,19 +472,9 @@ static BuildErrorCode receiveFileChunks(const char* sql, FILE* file) securec_check_c(errorno, "\0", "\0"); chunkSize = ntohl(chunkSize); bool rebuild = *PQgetvalue(res, 0, 6) != 0; - char dst[MAXPGPATH]; - /* open pca */ - FormatPathToPca(filename, dst, MAXPGPATH, false); - OpenCompressedPcaFile(dst, chunkSize, algorithm, rebuild); - - /* open pcd */ - FormatPathToPcd(filename, dst, MAXPGPATH, false); - open_target_file(dst, false); - BlockNumber blockNumber = chunkoff; - size_t blockSize = chunkspace; - + CompressedFileInit(filename, chunkSize, algorithm, rebuild); /* fetch result */ - FetchCompressedFile(chunk, blockNumber, blockSize); + FetchCompressedFile(chunk, (BlockNumber)chunkoff, (size_t)chunkspace); } } return BUILD_SUCCESS; @@ -591,13 +586,17 @@ static void CompressedFileCopy(const file_entry_t* entry, bool rebuild) static void CompressedFileRemove(const file_entry_t* entry) { - remove_target((file_entry_t*) entry); - char* path = entry->path; - char dst[MAXPGPATH]; - FormatPathToPca(path, dst, MAXPGPATH); - remove_target_file(dst, false); - FormatPathToPcd(path, dst, MAXPGPATH); - remove_target_file(dst, false); + char path[MAXPGPATH]; + error_t rc = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s/%s", pg_data, entry->path); + securec_check_ss_c(rc, "\0", "\0"); + COMPRESS_ERROR_STATE result = PageCompression::RemoveCompressedFile(path); + if (result == NORMAL_MISSING_ERROR || result == NORMAL_UNLINK_ERROR) { + pg_fatal("could not remove compress file \"%s\": %s\n", path, strerror(errno)); + } else if (result == PCA_MISSING_ERROR || result == PCA_UNLINK_ERROR) { + pg_fatal("could not remove compress file \"%s_pca\": %s\n", path, strerror(errno)); + } else if (result == PCD_MISSING_ERROR || result == PCD_UNLINK_ERROR) { + pg_fatal("could not remove compress file \"%s_pcd\": %s\n", path, strerror(errno)); + } pg_log(PG_DEBUG, "CompressedFileRemove: %s\n", path); } @@ -966,20 +965,15 @@ static BuildErrorCode recurse_dir(const char* datadir, const char* parentpath, p struct stat fst; char fullpath[MAXPGPATH]; char path[MAXPGPATH]; - const size_t MINPCANAMESIZE = 4; if (strcmp(xlde->d_name, ".") == 0 || strcmp(xlde->d_name, "..") == 0) continue; /* Skip compressed page files */ size_t dirNamePath = strlen(xlde->d_name); - if (dirNamePath >= MINPCANAMESIZE) { - const char* suffix = xlde->d_name + dirNamePath - MINPCANAMESIZE; - if (strncmp(suffix, "_pca", MINPCANAMESIZE) == 0 || strncmp(suffix, "_pcd", MINPCANAMESIZE) == 0) { - continue; - } + if (PageCompression::SkipCompressedFile(xlde->d_name, dirNamePath)) { + continue; } - ss_c = snprintf_s(fullpath, MAXPGPATH, MAXPGPATH - 1, "%s/%s", fullparentpath, xlde->d_name); securec_check_ss_c(ss_c, "\0", "\0"); @@ -1012,7 +1006,7 @@ static BuildErrorCode recurse_dir(const char* datadir, const char* parentpath, p uint64 fileSize = (uint64)fst.st_size; RewindCompressInfo rewindCompressInfo; RewindCompressInfo *pointer = NULL; - if (ProcessLocalPca(path, &rewindCompressInfo)) { + if (ProcessLocalPca(path, &rewindCompressInfo, pg_data)) { fileSize = rewindCompressInfo.oldBlockNumber * BLCKSZ; pointer = &rewindCompressInfo; } diff --git a/src/bin/pg_rewind/file_ops.cpp b/src/bin/pg_rewind/file_ops.cpp index 2deea08d814..9c39a280261 100644 --- a/src/bin/pg_rewind/file_ops.cpp +++ b/src/bin/pg_rewind/file_ops.cpp @@ -25,10 +25,9 @@ #include "common/fe_memutils.h" #include "common/build_query/build_query.h" -#include "compressed_rewind.h" -#include "storage/page_compression_impl.h" #include "replication/replicainternal.h" +#include #define BLOCKSIZE (8 * 1024) /* @@ -38,7 +37,7 @@ static int dstfd = -1; static char dstpath[MAXPGPATH] = ""; static bool g_isRelDataFile = false; -static CompressedPcaInfo g_compressedPcaInfo; +static PageCompression* g_pageCompression = NULL; static void create_target_dir(const char* path); static void remove_target_dir(const char* path); @@ -102,6 +101,7 @@ void close_target_file(void) } dstfd = -1; + CompressFileClose(); } void write_target_range(char* buf, off_t begin, size_t size, int space, bool compressed) @@ -1235,136 +1235,74 @@ void CompressedFileTruncate(const char *path, const RewindCompressInfo *rewindCo if (dry_run) { return; } - - uint16 chunkSize = rewindCompressInfo->chunkSize; - + /* sanity check */ BlockNumber oldBlockNumber = rewindCompressInfo->oldBlockNumber; BlockNumber newBlockNumber = rewindCompressInfo->newBlockNumber; - Assert(oldBlockNumber > newBlockNumber); - char pcaPath[MAXPGPATH]; - FormatPathToPca(path, pcaPath, MAXPGPATH, true); - - int pcaFd = open(pcaPath, O_RDWR | PG_BINARY, 0600); - if (pcaFd < 0) { - pg_fatal("CompressedFileTruncate: could not open file \"%s\": %s\n", pcaPath, strerror(errno)); - return; - } - - PageCompressHeader* map = pc_mmap(pcaFd, chunkSize, false); - if (map == MAP_FAILED) { - pg_fatal("CompressedFileTruncate: Failed to mmap file \"%s\": %s\n", pcaPath, strerror(errno)); - return; - } - /* write zero to truncated addr */ - for (BlockNumber blockNumber = newBlockNumber; blockNumber < oldBlockNumber; ++blockNumber) { - PageCompressAddr* addr = GET_PAGE_COMPRESS_ADDR(map, chunkSize, blockNumber); - for (size_t i = 0; i < addr->allocated_chunks; ++i) { - addr->chunknos[i] = 0; - } - addr->nchunks = 0; - addr->allocated_chunks = 0; - addr->checksum = 0; - } - map->last_synced_nblocks = map->nblocks = newBlockNumber; - - /* find the max used chunk number */ - pc_chunk_number_t beforeUsedChunks = map->allocated_chunks; - pc_chunk_number_t max_used_chunkno = 0; - for (BlockNumber blockNumber = 0; blockNumber < newBlockNumber; ++blockNumber) { - PageCompressAddr* addr = GET_PAGE_COMPRESS_ADDR(map, chunkSize, blockNumber); - for (uint8 i = 0; i < addr->allocated_chunks; i++) { - if (addr->chunknos[i] > max_used_chunkno) { - max_used_chunkno = addr->chunknos[i]; - } - } - } - map->allocated_chunks = map->last_synced_allocated_chunks = max_used_chunkno; - /* truncate pcd qfile */ - if (beforeUsedChunks > max_used_chunkno) { - char pcdPath[MAXPGPATH]; - FormatPathToPcd(path, pcdPath, MAXPGPATH, false); - truncate_target_file(pcdPath, max_used_chunkno * chunkSize); - } - pc_munmap(map); + /* construct full path */ + char fullPath[MAXPGPATH]; + errno_t rc = snprintf_s(fullPath, MAXPGPATH, MAXPGPATH - 1, "%s/%s", pg_data, path); + securec_check_ss_c(rc, "\0", "\0"); + /* call truncate of pageCompression */ + std::unique_ptr pageCompression = std::make_unique(); + /* segno is no used here */ + auto result = pageCompression->Init(fullPath, MAXPGPATH, -1, rewindCompressInfo->chunkSize); + FileProcessErrorReport(fullPath, result); + result = pageCompression->TruncateFile(oldBlockNumber, newBlockNumber); + FileProcessErrorReport(fullPath, result); pg_log(PG_DEBUG, "CompressedFileTruncate: %s\n", path); } -void OpenCompressedPcaFile(const char* fileName, int32 chunkSize, int32 algorithm, bool rebuild) +void FetchCompressedFile(char* buf, BlockNumber blockNumber, int32 size) +{ + g_pageCompression->WriteBufferToCurrentBlock(buf, blockNumber, size); +} + +void CompressedFileInit(const char* fileName, int32 chunkSize, int32 algorithm, bool rebuild) { if (dry_run) { return; } - if (g_compressedPcaInfo.pcaFd != -1 && strcmp(fileName, &g_compressedPcaInfo.path[strlen(pg_data) + 1]) == 0) { + + if (g_pageCompression != NULL && strcmp(fileName, &g_pageCompression->GetInitPath()[strlen(pg_data) + 1]) == 0) { /* already open */ return; } - CloseCompressedPcaFile(); - int rc = snprintf_s(g_compressedPcaInfo.path, sizeof(g_compressedPcaInfo.path), - sizeof(g_compressedPcaInfo.path) - 1, - "%s/%s", pg_data, fileName); + CompressFileClose(); + /* format full poth */ + char dstPath[MAXPGPATH]; + error_t rc = snprintf_s(dstPath, sizeof(dstPath), sizeof(dstPath) - 1, "%s/%s", pg_data, fileName); securec_check_ss_c(rc, "\0", "\0"); - int mode = O_RDWR | PG_BINARY; - mode = rebuild ? (mode | O_TRUNC | O_CREAT) : mode; - - g_compressedPcaInfo.pcaFd = open(g_compressedPcaInfo.path, mode, S_IRUSR | S_IWUSR); - if (g_compressedPcaInfo.pcaFd < 0) { - pg_fatal("could not open compressed pca file \"%s\": %s\n", g_compressedPcaInfo.path, strerror(errno)); - return; - } - g_compressedPcaInfo.algorithm = algorithm; - g_compressedPcaInfo.chunkSize = chunkSize; - g_compressedPcaInfo.pcaMap = (char*) pc_mmap(g_compressedPcaInfo.pcaFd, chunkSize, false); - if ((void*)g_compressedPcaInfo.pcaMap == MAP_FAILED) { - pg_fatal("OpenCompressedPcaFile: Failed to mmap file \"%s\": %s\n", g_compressedPcaInfo.path, strerror(errno)); - return; + g_pageCompression = new PageCompression(); + /* segment number only used for checksum */ + auto state = g_pageCompression->Init(dstPath, strlen(dstPath), -1, chunkSize, rebuild); + FileProcessErrorReport(dstPath, state); + if (rebuild) { + PageCompressHeader* header = g_pageCompression->GetPageCompressHeader(); + header->algorithm = algorithm; + header->chunk_size= chunkSize; } } -void CloseCompressedPcaFile() +void CompressFileClose() { - if (g_compressedPcaInfo.pcaFd == -1) { - return; + if (g_pageCompression != NULL) { + delete g_pageCompression; + g_pageCompression = NULL; } - pc_munmap((PageCompressHeader*)g_compressedPcaInfo.pcaMap); - if (close(g_compressedPcaInfo.pcaFd) != 0) { - pg_fatal("could not close target file \"%s\": %s\n", g_compressedPcaInfo.path, gs_strerror(errno)); - } - g_compressedPcaInfo.pcaFd = -1; - g_compressedPcaInfo.pcaMap = NULL; - g_compressedPcaInfo.chunkSize = 0; - g_compressedPcaInfo.algorithm = 0; } -void FetchCompressedFile(char* buf, BlockNumber blockNumber, int32 size) +bool FileProcessErrorReport(const char *path, COMPRESS_ERROR_STATE errorState) { - int32 chunkSize = g_compressedPcaInfo.chunkSize; - int needChunks = size / chunkSize; - - PageCompressHeader* pcMap = (PageCompressHeader*) g_compressedPcaInfo.pcaMap; - PageCompressAddr* pcAddr = GET_PAGE_COMPRESS_ADDR(pcMap, chunkSize, blockNumber); - - // 2. allocate chunks - if (pcAddr->allocated_chunks < needChunks) { - auto chunkno = pg_atomic_fetch_add_u32(&pcMap->allocated_chunks, needChunks - pcAddr->allocated_chunks); - for (int i = pcAddr->allocated_chunks; i < needChunks; i++) { - pcAddr->chunknos[i] = ++chunkno; - } - pcAddr->allocated_chunks = needChunks; - } - for (int32 i = 0; i < needChunks; ++i) { - auto buffer_pos = buf + chunkSize * i; - off_t seekpos = (off_t) OFFSET_OF_PAGE_COMPRESS_CHUNK(chunkSize, pcAddr->chunknos[i]); - int32 start = i; - while (i < needChunks - 1 && pcAddr->chunknos[i + 1] == pcAddr->chunknos[i] + 1) { - i++; - } - int write_amount = chunkSize * (i - start + 1); - // open file dstfd - write_target_range(buffer_pos, seekpos, write_amount, 0, true); + auto errorStr = strerror(errno); + switch (errorState) { + case SUCCESS: + return true; + default: + pg_fatal("process compressed file \"%s\": %s\n", path, errorStr); + break; } - pcAddr->nchunks = pcAddr->allocated_chunks; - pcAddr->checksum = AddrChecksum32(blockNumber, pcAddr, chunkSize); -} + return false; +} \ No newline at end of file diff --git a/src/bin/pg_rewind/file_ops.h b/src/bin/pg_rewind/file_ops.h index 4b6b2b13227..9c386b50a55 100644 --- a/src/bin/pg_rewind/file_ops.h +++ b/src/bin/pg_rewind/file_ops.h @@ -12,6 +12,7 @@ #include "filemap.h" #include "compressed_common.h" +#include "PageCompression.h" extern char* pg_data; extern void open_target_file(const char* path, bool trunc); @@ -42,9 +43,11 @@ extern void delete_target_file(const char* file); extern bool isPathInFilemap(const char* path); extern bool tablespaceDataIsValid(const char* path); extern void copy_file(const char* fromfile, char* tofile); + extern void CompressedFileTruncate(const char* path, const RewindCompressInfo* rewindCompressInfo); -void FetchCompressedFile(char* buf, BlockNumber begin, int32 size); -void OpenCompressedPcaFile(const char* fileName, int32 chunkSize, int32 algorithm, bool rebuild); -void CloseCompressedPcaFile(); +extern void FetchCompressedFile(char* buf, BlockNumber begin, int32 size); +extern void CompressFileClose(); +extern void CompressedFileInit(const char* fileName, int32 chunkSize, int32 algorithm, bool rebuild); +extern bool FileProcessErrorReport(const char *path, COMPRESS_ERROR_STATE errorState); #endif /* FILE_OPS_H */ diff --git a/src/bin/pg_rewind/filemap.cpp b/src/bin/pg_rewind/filemap.cpp index cd96e41cd47..5b4b3647c2a 100755 --- a/src/bin/pg_rewind/filemap.cpp +++ b/src/bin/pg_rewind/filemap.cpp @@ -19,7 +19,7 @@ #include "catalog/catalog.h" #include "catalog/pg_tablespace.h" #include "common/fe_memutils.h" -#include "compressed_rewind.h" +#include "PageCompression.h" #include "storage/cu.h" #include "storage/smgr/fd.h" @@ -508,7 +508,7 @@ void process_source_file(const char* path, file_type_t type, size_t newsize, con /* mod blocksize 8k to avoid half page write */ RewindCompressInfo oldRewindCompressInfo; bool sourceCompressed = info != NULL; - bool targetCompressed = ProcessLocalPca(path, &oldRewindCompressInfo); + bool targetCompressed = isreldatafile && ProcessLocalPca(path, &oldRewindCompressInfo, pg_data); if (sourceCompressed && !targetCompressed) { info->compressed = false; action = FILE_ACTION_REMOVE; diff --git a/src/common/backend/utils/adt/pg_lzcompress.cpp b/src/common/backend/utils/adt/pg_lzcompress.cpp index 256c317048a..dfd7180e126 100644 --- a/src/common/backend/utils/adt/pg_lzcompress.cpp +++ b/src/common/backend/utils/adt/pg_lzcompress.cpp @@ -320,6 +320,13 @@ const PGLZ_Strategy* const PGLZ_strategy_always = &strategy_always_data; #define HIST_START_LEN (sizeof(PGLZ_HistEntry*) * PGLZ_HISTORY_LISTS) #define HIST_ENTRIES_LEN (sizeof(PGLZ_HistEntry) * PGLZ_HISTORY_SIZE) +#ifndef FRONTEND +#define hist_start (u_sess->utils_cxt.hist_start) +#define hist_entries (u_sess->utils_cxt.hist_entries) +#else +static PGLZ_HistEntry *hist_start[PGLZ_HISTORY_LISTS]; +static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE]; +#endif /* ---------- * pglz_find_match - * @@ -498,7 +505,7 @@ bool pglz_compress(const char* source, int32 slen, PGLZ_Header* dest, const PGLZ * Initialize the history lists to empty. We do not need to zero the * u_sess->utils_cxt.hist_entries[] array; its entries are initialized as they are used. */ - errno_t rc = memset_s(u_sess->utils_cxt.hist_start, HIST_START_LEN, 0, HIST_START_LEN); + errno_t rc = memset_s(hist_start, HIST_START_LEN, 0, HIST_START_LEN); securec_check(rc, "\0", "\0"); /* @@ -527,7 +534,7 @@ bool pglz_compress(const char* source, int32 slen, PGLZ_Header* dest, const PGLZ /* * Try to find a match in the history */ - if (pglz_find_match(u_sess->utils_cxt.hist_start, dp, dend, &match_len, &match_off, good_match, good_drop)) { + if (pglz_find_match(hist_start, dp, dend, &match_len, &match_off, good_match, good_drop)) { /* * Create the tag and add history entries for all matched * characters. @@ -535,7 +542,7 @@ bool pglz_compress(const char* source, int32 slen, PGLZ_Header* dest, const PGLZ pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off); while (match_len--) { pglz_hist_add( - u_sess->utils_cxt.hist_start, u_sess->utils_cxt.hist_entries, hist_next, hist_recycle, dp, dend); + hist_start, hist_entries, hist_next, hist_recycle, dp, dend); dp++; /* Do not do this ++ in the line above! */ /* The macro would do it four times - Jan. */ } @@ -546,7 +553,7 @@ bool pglz_compress(const char* source, int32 slen, PGLZ_Header* dest, const PGLZ */ pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp); pglz_hist_add( - u_sess->utils_cxt.hist_start, u_sess->utils_cxt.hist_entries, hist_next, hist_recycle, dp, dend); + hist_start, hist_entries, hist_next, hist_recycle, dp, dend); dp++; /* Do not do this ++ in the line above! */ /* The macro would do it four times - Jan. */ } @@ -575,7 +582,7 @@ bool pglz_compress(const char* source, int32 slen, PGLZ_Header* dest, const PGLZ * Decompresses source into dest. * ---------- */ -void pglz_decompress(const PGLZ_Header* source, char* dest) +int32 pglz_decompress(const PGLZ_Header* source, char* dest) { const unsigned char* sp = NULL; const unsigned char* srcend = NULL; @@ -657,288 +664,16 @@ void pglz_decompress(const PGLZ_Header* source, char* dest) /* * Check we decompressed the right amount. */ - if (dp != destend || sp != srcend) + if (dp != destend || sp != srcend) { +#ifndef FRONTEND ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("compressed data is corrupt"))); - - /* - * That's it. - */ -} - -/* ---------- - * lz_compress - - * - * Compresses source into dest using strategy. Returns the number of - * bytes written in buffer dest, or -1 if compression fails. - * ---------- - */ -int32 lz_compress(const char* source, int32 slen, char* dest) -{ - unsigned char* bp = (unsigned char*) dest; - unsigned char* bstart = bp; - int hist_next = 0; - bool hist_recycle = false; - const char* dp = source; - const char* dend = source + slen; - unsigned char ctrl_dummy = 0; - unsigned char* ctrlp = &ctrl_dummy; - unsigned char ctrlb = 0; - unsigned char ctrl = 0; - bool found_match = false; - int32 match_len; - int32 match_off; - int32 good_match; - int32 good_drop; - int32 result_size; - int32 result_max; - int32 need_rate; - errno_t rc; - - const PGLZ_Strategy* strategy = PGLZ_strategy_always; - /* - * Our fallback strategy is the default. - */ - if (strategy == NULL) { - strategy = PGLZ_strategy_default; - } - - /* - * If the strategy forbids compression (at all or if source chunk size out - * of range), fail. - */ - if (strategy->match_size_good <= 0 || slen < strategy->min_input_size || slen > strategy->max_input_size) { - return -1; - } - - /* - * Limit the match parameters to the supported range. - */ - good_match = strategy->match_size_good; - if (good_match > PGLZ_MAX_MATCH) { - good_match = PGLZ_MAX_MATCH; - } else if (good_match < 17) { - good_match = 17; - } - - good_drop = strategy->match_size_drop; - if (good_drop < 0) { - good_drop = 0; - } else if (good_drop > 100) { - good_drop = 100; - } - - need_rate = strategy->min_comp_rate; - if (need_rate < 0) { - need_rate = 0; - } else if (need_rate > 99) { - need_rate = 99; - } - - /* - * Compute the maximum result size allowed by the strategy, namely the - * input size minus the minimum wanted compression rate. This had better - * be <= slen, else we might overrun the provided output buffer. - */ - if (slen > (INT_MAX / 100)) { - /* Approximate to avoid overflow */ - result_max = (slen / 100) * (100 - need_rate); - } else { - result_max = (slen * (100 - need_rate)) / 100; - } - - /* - * Initialize the history lists to empty. We do not need to zero the - * hist_entries[] array; its entries are initialized as they are used. - */ - rc = memset_s(u_sess->utils_cxt.hist_start, HIST_START_LEN, 0, HIST_START_LEN); - securec_check(rc, "\0", "\0"); - - /* - * Compress the source directly into the output buffer. - */ - while (dp < dend) { - /* - * If we already exceeded the maximum result size, fail. - * - * We check once per loop; since the loop body could emit as many as 4 - * bytes (a control byte and 3-byte tag), PGLZ_MAX_OUTPUT() had better - * allow 4 slop bytes. - */ - if (bp - bstart >= result_max) { - return -1; - } - - /* - * If we've emitted more than first_success_by bytes without finding - * anything compressible at all, fail. This lets us fall out - * reasonably quickly when looking at incompressible input (such as - * pre-compressed data). - */ - if (!found_match && bp - bstart >= strategy->first_success_by) { - return -1; - } - - /* - * Try to find a match in the history - */ - if (pglz_find_match(u_sess->utils_cxt.hist_start, dp, dend, &match_len, &match_off, good_match, good_drop)) { - /* - * Create the tag and add history entries for all matched - * characters. - */ - pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off); - while (match_len--) { - pglz_hist_add( - u_sess->utils_cxt.hist_start, u_sess->utils_cxt.hist_entries, hist_next, hist_recycle, dp, - dend); - dp++; /* Do not do this ++ in the line above! */ - /* The macro would do it four times - Jan. */ - } - found_match = true; - } else { - /* - * No match found. Copy one literal byte. - */ - pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp); - pglz_hist_add( - u_sess->utils_cxt.hist_start, u_sess->utils_cxt.hist_entries, hist_next, hist_recycle, dp, dend); - dp++; /* Do not do this ++ in the line above! */ - /* The macro would do it four times - Jan. */ - } - } - - /* - * Write out the last control byte and check that we haven't overrun the - * output size allowed by the strategy. - */ - *ctrlp = ctrlb; - result_size = bp - bstart; - if (result_size >= result_max) { - return -1; - } - - /* success */ - return result_size; -} - -/* ---------- - * pglz_decompress - - * - * Decompresses source into dest. Returns the number of bytes - * decompressed in the destination buffer, and *optionally* - * checks that both the source and dest buffers have been - * fully read and written to, respectively. - * ---------- - */ -int32 lz_decompress(const char* source, int32 slen, char* dest, int32 rawsize, bool check_complete) -{ - const unsigned char* sp; - const unsigned char* srcend; - unsigned char* dp; - unsigned char* destend; - errno_t rc = 0; - - sp = (const unsigned char*) source; - srcend = ((const unsigned char*) source) + slen; - dp = (unsigned char*) dest; - destend = dp + rawsize; - - while (sp < srcend && dp < destend) { - /* - * Read one control byte and process the next 8 items (or as many as - * remain in the compressed input). - */ - unsigned char ctrl = *sp++; - int ctrlc; - - for (ctrlc = 0; ctrlc < 8 && sp < srcend && dp < destend; ctrlc++) { - if (ctrl & 1) { - /* - * Set control bit means we must read a match tag. The match - * is coded with two bytes. First byte uses lower nibble to - * code length - 3. Higher nibble contains upper 4 bits of the - * offset. The next following byte contains the lower 8 bits - * of the offset. If the length is coded as 18, another - * extension tag byte tells how much longer the match really - * was (0-255). - */ - int32 len; - int32 off; - - len = (sp[0] & 0x0f) + 3; - off = ((sp[0] & 0xf0) << 4) | sp[1]; - sp += 2; - if (len == 18) { - len += *sp++; - } - - /* - * Now we copy the bytes specified by the tag from OUTPUT to - * OUTPUT (copy len bytes from dp - off to dp). The copied - * areas could overlap, to preven possible uncertainty, we - * copy only non-overlapping regions. - */ - len = Min(len, destend - dp); - while (off < len) { - /*--------- - * When offset is smaller than length - source and - * destination regions overlap. memmove() is resolving - * this overlap in an incompatible way with pglz. Thus we - * resort to memcpy()-ing non-overlapping regions. - * - * Consider input: 112341234123412341234 - * At byte 5 here ^ we have match with length 16 and - * offset 4. 11234M(len=16, off=4) - * We are decoding first period of match and rewrite match - * 112341234M(len=12, off=8) - * - * The same match is now at position 9, it points to the - * same start byte of output, but from another position: - * the offset is doubled. - * - * We iterate through this offset growth until we can - * proceed to usual memcpy(). If we would try to decode - * the match at byte 5 (len=16, off=4) by memmove() we - * would issue memmove(5, 1, 16) which would produce - * 112341234XXXXXXXXXXXX, where series of X is 12 - * undefined bytes, that were at bytes [5:17]. - * --------- - */ - errno_t rc = memcpy_s(dp, off + 1, dp - off, off); - securec_check(rc, "", ""); - len -= off; - dp += off; - off += off; - } - rc = memcpy_s(dp, len + 1, dp - off, len); - securec_check(rc, "", ""); - dp += len; - } else { - /* - * An unset control bit means LITERAL BYTE. So we just copy - * one from INPUT to OUTPUT. - */ - *dp++ = *sp++; - } - - /* - * Advance the control bit - */ - ctrl >>= 1; - } - } - - /* - * Check we decompressed the right amount. If we are slicing, then we - * won't necessarily be at the end of the source or dest buffers when we - * hit a stop, so we don't test them. - */ - if (check_complete && (dp != destend || sp != srcend)) { +#else return -1; +#endif } /* * That's it. */ return (char*) dp - dest; -} +} \ No newline at end of file diff --git a/src/gausskernel/storage/page/checksum_impl.cpp b/src/gausskernel/storage/page/checksum_impl.cpp index 0d306ef2e18..bd7b4df4ff0 100644 --- a/src/gausskernel/storage/page/checksum_impl.cpp +++ b/src/gausskernel/storage/page/checksum_impl.cpp @@ -16,12 +16,86 @@ #include "knl/knl_variable.h" #include "storage/checksum_impl.h" +void ChecksumForZeroPadding(uint32 *sums, const uint32 *dataArr, uint32 currentLeft, uint32 alignSize); + static inline uint32 pg_checksum_init(uint32 seed, uint32 value) { CHECKSUM_COMP(seed, value); return seed; } +uint32 DataBlockChecksum(char* data, uint32 size, bool zeroing) +{ + uint32 sums[N_SUMS]; + uint32* dataArr = (uint32*)data; + uint32 result = 0; + uint32 i, j; + uint32 currentLeft = size; + + /* ensure that the size is compatible with the algorithm */ + uint32 alignSize = sizeof(uint32) * N_SUMS; + Assert(zeroing || (size % alignSize == 0)); + + /* initialize partial checksums to their corresponding offsets */ + auto realSize = size < alignSize ? size : alignSize; + + uint32 *initUint32 = NULL; + char usedForInit[sizeof(uint32) * N_SUMS] = {0}; + if (zeroing && size < alignSize) { + errno_t rc = memcpy_s(usedForInit, alignSize, (char *) dataArr, realSize); + securec_check(rc, "", ""); + currentLeft -= realSize; + initUint32 = (uint32*)usedForInit; + } else { + initUint32 = dataArr; + currentLeft -= alignSize; + } + + for (j = 0; j < N_SUMS; j += 2) { + sums[j] = pg_checksum_init(g_checksumBaseOffsets[j], initUint32[j]); + sums[j + 1] = pg_checksum_init(g_checksumBaseOffsets[j + 1], initUint32[j + 1]); + } + dataArr += N_SUMS; + + /* main checksum calculation */ + for (i = 1; i < size / alignSize; i++) { + for (j = 0; j < N_SUMS; j += 2) { + CHECKSUM_COMP(sums[j], dataArr[j]); + CHECKSUM_COMP(sums[j + 1], dataArr[j + 1]); + } + dataArr += N_SUMS; + } + + /* checksum for zero padding */ + currentLeft -= alignSize * (i - 1); + if (currentLeft > 0 && currentLeft < alignSize && zeroing) { + ChecksumForZeroPadding(sums, dataArr, currentLeft, alignSize); + } + + /* finally add in two rounds of zeroes for additional mixing */ + for (j = 0; j < N_SUMS; j++) { + CHECKSUM_COMP(sums[j], 0); + CHECKSUM_COMP(sums[j], 0); + + /* xor fold partial checksums together */ + result ^= sums[j]; + } + + return result; +} + +void ChecksumForZeroPadding(uint32 *sums, const uint32 *dataArr, uint32 currentLeft, uint32 alignSize) +{ + auto maxLen = sizeof(uint32) * N_SUMS; + char currentLeftChars[maxLen] = {0}; + errno_t rc = memcpy_s(currentLeftChars, maxLen, (char *)dataArr, currentLeft); + securec_check(rc, "", ""); + for (int j = 0; j < N_SUMS; j += 2) { + CHECKSUM_COMP(sums[j], ((uint32 *)currentLeftChars)[j]); + CHECKSUM_COMP(sums[j + 1], ((uint32 *)currentLeftChars)[j + 1]); + } +} + uint32 pg_checksum_block(char* data, uint32 size) { uint32 sums[N_SUMS]; diff --git a/src/gausskernel/storage/smgr/md.cpp b/src/gausskernel/storage/smgr/md.cpp index 1a4ca549849..e165b1001da 100644 --- a/src/gausskernel/storage/smgr/md.cpp +++ b/src/gausskernel/storage/smgr/md.cpp @@ -2382,7 +2382,7 @@ void mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks) /* find the max used chunkno */ for (BlockNumber blk = (BlockNumber) 0; blk < (BlockNumber) last_seg_blocks; blk++) { pcAddr = GET_PAGE_COMPRESS_ADDR(pcMap, chunk_size, blk); - /* check allocated_chunks for one page */ + /* check allocated_chunks for one page */ if (pcAddr->allocated_chunks > BLCKSZ / chunk_size) { ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("invalid chunks %u of block %u in file \"%s\"", diff --git a/src/gausskernel/storage/smgr/page_compression.cpp b/src/gausskernel/storage/smgr/page_compression.cpp index eed1be81639..fb1167fb12f 100644 --- a/src/gausskernel/storage/smgr/page_compression.cpp +++ b/src/gausskernel/storage/smgr/page_compression.cpp @@ -450,20 +450,6 @@ size_t ReadAllChunkOfBlock(char *dst, size_t destLen, BlockNumber blockNumber, R return allocatedChunks * chunkSize; } -CompressedFileType IsCompressedFile(char *fileName, size_t fileNameLen) -{ - size_t suffixLen = 4; - if (fileNameLen >= suffixLen) { - const char *suffix = fileName + fileNameLen - suffixLen; - if (strncmp(suffix, "_pca", suffixLen) == 0) { - return COMPRESSED_TABLE_PCA_FILE; - } else if (strncmp(suffix, "_pcd", suffixLen) == 0) { - return COMPRESSED_TABLE_PCD_FILE; - } - } - return COMPRESSED_TYPE_UNKNOWN; -} - void ReleaseMap(PageCompressHeader* map, const char* fileName) { if (map != NULL && pc_munmap(map) != 0) { diff --git a/src/include/knl/knl_session.h b/src/include/knl/knl_session.h index 2cf6c45138b..fbd7b338bd1 100644 --- a/src/include/knl/knl_session.h +++ b/src/include/knl/knl_session.h @@ -470,6 +470,11 @@ typedef struct knl_u_utils_context { struct PGLZ_HistEntry* hist_entries; + struct PGLZ_HistEntry** new_hist_start; + + struct PGLZ_HistEntry* new_hist_entries; + + char* analysis_options_configure; int* guc_new_value; diff --git a/src/include/storage/checksum_impl.h b/src/include/storage/checksum_impl.h index 0176a0e888b..3b6fed990e4 100644 --- a/src/include/storage/checksum_impl.h +++ b/src/include/storage/checksum_impl.h @@ -160,5 +160,5 @@ static const uint32 g_checksumBaseOffsets[N_SUMS] = {0x5B1F36E9, * boundary. */ uint32 pg_checksum_block(char* data, uint32 size); - +uint32 DataBlockChecksum(char* data, uint32 size, bool zeroing); uint16 pg_checksum_page(char* page, BlockNumber blkno); diff --git a/src/include/storage/page_compression.h b/src/include/storage/page_compression.h index 5a181d1a44c..33b671ed38f 100644 --- a/src/include/storage/page_compression.h +++ b/src/include/storage/page_compression.h @@ -42,9 +42,9 @@ constexpr uint32 COMPRESS_ADDRESS_FLUSH_CHUNKS = 5000; #define SUPPORT_COMPRESSED(relKind, relam) \ ((relKind) == RELKIND_RELATION || ((relKind) == RELKIND_INDEX && (relam) == BTREE_AM_OID)) -#define REL_SUPPORT_COMPRESSED(relation) \ - ((relation->rd_rel->relkind) == RELKIND_RELATION || \ - ((relation->rd_rel->relkind) == RELKIND_INDEX && (relation->rd_rel->relam) == BTREE_AM_OID)) +#define REL_SUPPORT_COMPRESSED(relation) \ + (((relation)->rd_rel->relkind) == RELKIND_RELATION || \ + (((relation)->rd_rel->relkind) == RELKIND_INDEX && ((relation)->rd_rel->relam) == BTREE_AM_OID)) typedef uint32 pc_chunk_number_t; const uint32 PAGE_COMPRESSION_VERSION = 92603; @@ -97,6 +97,7 @@ struct ReadBlockChunksStruct { typedef struct PageCompressData { char page_header[SizeOfPageHeaderData]; /* page header */ + uint32 crc32; uint32 size : 16; /* size of compressed data */ uint32 byte_convert : 1; uint32 diff_convert : 1; @@ -107,6 +108,7 @@ typedef struct PageCompressData { typedef struct HeapPageCompressData { char page_header[SizeOfHeapPageHeaderData]; /* page header */ + uint32 crc32; uint32 size : 16; /* size of compressed data */ uint32 byte_convert : 1; uint32 diff_convert : 1; diff --git a/src/include/storage/page_compression_impl.h b/src/include/storage/page_compression_impl.h index 4193b715246..f072170b360 100644 --- a/src/include/storage/page_compression_impl.h +++ b/src/include/storage/page_compression_impl.h @@ -20,6 +20,7 @@ #include #include "storage/page_compression.h" +#include "storage/checksum_impl.h" #include "utils/pg_lzcompress.h" #include @@ -41,8 +42,6 @@ #endif -#ifndef FRONTEND - /** * return data of page * @param dst HeapPageCompressData or HeapPageCompressData @@ -420,13 +419,19 @@ int TemplateCompressPage(const char* src, char* dst, int dst_size, RelFileCompre char* data = GetPageCompressedData(dst, heapPageData); switch (option.compressAlgorithm) { - case COMPRESS_ALGORITHM_PGLZ: + case COMPRESS_ALGORITHM_PGLZ: { + bool success; if (real_ByteConvert) { - compressed_size = lz_compress(src_copy + sizeOfHeaderData, BLCKSZ - sizeOfHeaderData, data); + success = pglz_compress(src_copy + sizeOfHeaderData, BLCKSZ - sizeOfHeaderData, (PGLZ_Header *)data, + PGLZ_strategy_default); } else { - compressed_size = lz_compress(src + sizeOfHeaderData, BLCKSZ - sizeOfHeaderData, data); + success = pglz_compress(src + sizeOfHeaderData, BLCKSZ - sizeOfHeaderData, (PGLZ_Header *)data, + PGLZ_strategy_default); } + compressed_size = success ? VARSIZE(data) : BLCKSZ; + compressed_size = compressed_size < BLCKSZ ? compressed_size : BLCKSZ; break; + } case COMPRESS_ALGORITHM_ZSTD: { if (level == 0 || level < MIN_ZSTD_COMPRESSION_LEVEL || level > MAX_ZSTD_COMPRESSION_LEVEL) { level = DEFAULT_ZSTD_COMPRESSION_LEVEL; @@ -461,6 +466,7 @@ int TemplateCompressPage(const char* src, char* dst, int dst_size, RelFileCompre rc = memcpy_s(pcdptr->page_header, sizeOfHeaderData, src, sizeOfHeaderData); securec_check(rc, "", ""); pcdptr->size = compressed_size; + pcdptr->crc32 = DataBlockChecksum(data, compressed_size, true); pcdptr->byte_convert = real_ByteConvert; pcdptr->diff_convert = option.diffConvert; } else { @@ -468,6 +474,7 @@ int TemplateCompressPage(const char* src, char* dst, int dst_size, RelFileCompre rc = memcpy_s(pcdptr->page_header, sizeOfHeaderData, src, sizeOfHeaderData); securec_check(rc, "", ""); pcdptr->size = compressed_size; + pcdptr->crc32 = DataBlockChecksum(data, compressed_size, true); pcdptr->byte_convert = real_ByteConvert; pcdptr->diff_convert = option.diffConvert; } @@ -599,6 +606,7 @@ int TemplateDecompressPage(const char* src, char* dst, uint8 algorithm) int decompressed_size; char* data; uint32 size; + uint32 crc32; bool byte_convert, diff_convert; size_t headerSize = GetSizeOfHeadData(heapPageData); int rc = memcpy_s(dst, headerSize, src, headerSize); @@ -607,18 +615,26 @@ int TemplateDecompressPage(const char* src, char* dst, uint8 algorithm) if (heapPageData) { data = ((HeapPageCompressData*)src)->data; size = ((HeapPageCompressData*)src)->size; + crc32 = ((HeapPageCompressData*)src)->crc32; byte_convert = ((HeapPageCompressData*)src)->byte_convert; diff_convert = ((HeapPageCompressData*)src)->diff_convert; } else { data = ((PageCompressData*)src)->data; size = ((PageCompressData*)src)->size; + crc32 = ((PageCompressData*)src)->crc32; byte_convert = ((PageCompressData*)src)->byte_convert; diff_convert = ((PageCompressData*)src)->diff_convert; } + if (DataBlockChecksum(data, size, true) != crc32) { + return -2; + } switch (algorithm) { case COMPRESS_ALGORITHM_PGLZ: - decompressed_size = lz_decompress(data, size, dst + headerSize, BLCKSZ - headerSize, false); + decompressed_size = pglz_decompress((const PGLZ_Header* )data, dst + headerSize); + if (decompressed_size == -1) { + return -1; + } break; case COMPRESS_ALGORITHM_ZSTD: decompressed_size = ZSTD_decompress(dst + headerSize, BLCKSZ - headerSize, data, size); @@ -637,7 +653,6 @@ int TemplateDecompressPage(const char* src, char* dst, uint8 algorithm) return headerSize + decompressed_size; } -#endif /** * pc_mmap() -- create memory map for page compress file's address area. @@ -656,8 +671,8 @@ PageCompressHeader* pc_mmap(int fd, int chunk_size, bool readonly) extern PageCompressHeader* pc_mmap_real_size(int fd, int pc_memory_map_size, bool readonly) { PageCompressHeader* map = NULL; - int file_size = lseek(fd, 0, SEEK_END); - if (file_size != pc_memory_map_size) { + int fileSize = lseek(fd, 0, SEEK_END); + if (fileSize != pc_memory_map_size) { if (ftruncate(fd, pc_memory_map_size) != 0) { return (PageCompressHeader*) MAP_FAILED; } @@ -685,11 +700,6 @@ int pc_munmap(PageCompressHeader *map) */ int pc_msync(PageCompressHeader *map) { -#ifndef FRONTEND - if (!u_sess->attr.attr_storage.enableFsync) { - return 0; - } -#endif return msync(map, SIZE_OF_PAGE_COMPRESS_ADDR_FILE(map->chunk_size), MS_SYNC); } @@ -718,4 +728,18 @@ uint32 AddrChecksum32(BlockNumber blockNumber, const PageCompressAddr* pageCompr return checkSum; } +CompressedFileType IsCompressedFile(char *fileName, size_t fileNameLen) +{ + size_t suffixLen = 4; + if (fileNameLen >= suffixLen) { + const char *suffix = fileName + fileNameLen - suffixLen; + if (strncmp(suffix, "_pca", suffixLen) == 0) { + return COMPRESSED_TABLE_PCA_FILE; + } else if (strncmp(suffix, "_pcd", suffixLen) == 0) { + return COMPRESSED_TABLE_PCD_FILE; + } + } + return COMPRESSED_TYPE_UNKNOWN; +} + #endif diff --git a/src/include/utils/pg_lzcompress.h b/src/include/utils/pg_lzcompress.h index 431b4ba1b7f..b21a0b12ea0 100644 --- a/src/include/utils/pg_lzcompress.h +++ b/src/include/utils/pg_lzcompress.h @@ -125,11 +125,6 @@ extern const PGLZ_Strategy* const PGLZ_strategy_always; * ---------- */ extern bool pglz_compress(const char* source, int32 slen, PGLZ_Header* dest, const PGLZ_Strategy* strategy); - -extern void pglz_decompress(const PGLZ_Header* source, char* dest); - -extern int32 lz_compress(const char* source, int32 slen, char* dest); - -extern int32 lz_decompress(const char* source, int32 slen, char* dest, int32 rawsize, bool check_complete); +extern int32 pglz_decompress(const PGLZ_Header* source, char* dest); #endif /* _PG_LZCOMPRESS_H_ */ diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index d30c9a68f5c..2e153d9568f 100755 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -13,6 +13,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/alarm ${CMAKE_CURRENT_SOURCE_DIR}/build_query ${CMAKE_CURRENT_SOURCE_DIR}/config + ${CMAKE_CURRENT_SOURCE_DIR}/page_compression ${CMAKE_CURRENT_SOURCE_DIR}/pgcommon ${CMAKE_CURRENT_SOURCE_DIR}/cm_common ${CMAKE_CURRENT_SOURCE_DIR}/cm_communication @@ -24,6 +25,7 @@ add_subdirectory(hotpatch) add_subdirectory(alarm) add_subdirectory(build_query) add_subdirectory(config) +add_subdirectory(page_compression) add_subdirectory(pgcommon) add_subdirectory(cm_common) add_subdirectory(cm_communication) diff --git a/src/lib/Makefile b/src/lib/Makefile index 8f0c8021b5b..ebf86ee9f66 100644 --- a/src/lib/Makefile +++ b/src/lib/Makefile @@ -26,7 +26,7 @@ subdir = src/lib top_builddir = ../.. include $(top_builddir)/src/Makefile.global -SUBDIRS = build_query config pgcommon alarm gstrace hotpatch cm_common cm_communication +SUBDIRS = build_query config pgcommon alarm gstrace hotpatch cm_common cm_communication page_compression $(recurse) diff --git a/src/lib/page_compression/CMakeLists.txt b/src/lib/page_compression/CMakeLists.txt new file mode 100644 index 00000000000..3d5618f964f --- /dev/null +++ b/src/lib/page_compression/CMakeLists.txt @@ -0,0 +1,15 @@ +#This is the main CMAKE for build all components. +execute_process(COMMAND sh ${CMAKE_CURRENT_SOURCE_DIR}/page_compression.sh ${PROJECT_SRC_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) +MESSAGE( STATUS "pagehack sh ${CMAKE_CURRENT_SOURCE_DIR}/page_compression.sh ${PROJECT_SRC_DIR} ${CMAKE_CURRENT_SOURCE_DIR}.") +AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} TGT_pagecompression_SRC) +set(TGT_pagecompression_INC ${ZSTD_INCLUDE_PATH}) +set(pagecompression_DEF_OPTIONS ${MACRO_OPTIONS} -DFRONTEND) +set(pagecompreesion_COMPILE_OPTIONS ${OPTIMIZE_OPTIONS} ${OS_OPTIONS} ${PROTECT_OPTIONS} ${WARNING_OPTIONS} ${LIB_SECURE_OPTIONS} ${CHECK_OPTIONS}) +set(pagecompression_LINK_OPTIONS ${LIB_LINK_OPTIONS}) +set(pagecompression_LINK_LIBS -lzstd) +list(REMOVE_ITEM pagecompression_LINK_OPTIONS -pthread) +add_shared_libtarget(pagecompression TGT_pagecompression_SRC TGT_pagecompression_INC "${pagecompression_DEF_OPTIONS}" "${pagecompreesion_COMPILE_OPTIONS}" "${pagecompression_LINK_OPTIONS}" "${pagecompression_LINK_LIBS}") +SET_TARGET_PROPERTIES(pagecompression PROPERTIES VERSION 1) + + +install(TARGETS pagecompression LIBRARY DESTINATION lib) diff --git a/src/lib/page_compression/Makefile b/src/lib/page_compression/Makefile new file mode 100644 index 00000000000..d122759b593 --- /dev/null +++ b/src/lib/page_compression/Makefile @@ -0,0 +1,65 @@ +# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# --------------------------------------------------------------------------------------- +# +# Makefile +# Makefile for src/lib/config/ +# +# IDENTIFICATION +# src/lib/config/Makefile +# +# --------------------------------------------------------------------------------------- + +subdir = src/lib/page_compression +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +VERSION = 1 + +ifneq "$(MAKECMDGOALS)" "clean" + ifneq "$(MAKECMDGOALS)" "distclean" + ifneq "$(shell which g++ |grep hutaf_llt |wc -l)" "1" + -include $(DEPEND) + endif + endif +endif + +override CPPFLAGS := $(filter-out -fPIE, $(CPPFLAGS)) -fPIC -DFRONTEND -I. # backend/utils/errcodes.h +override CFLAGS := $(filter-out -fPIE, $(CFLAGS)) -fPIC -DFRONTEND -I. # backend/utils/errcodes.h + +OBJS = PageCompression.o + +TARGET = libpagecompression.a + +all: libpagecompression.so + +libpagecompression.so: + sh page_compression.sh $(top_builddir)/src . + $(CC) -fstack-protector-strong -Wl,-z,relro,-z,now -fPIC -shared $(CFLAGS) $(CPPFLAGS) PageCompression.cpp checksum_impl.cpp pg_lzcompress.cpp -o libpagecompression.so.$(VERSION) + rm -f libpagecompression.so && \ + ln -s libpagecompression.so.$(VERSION) libpagecompression.so + +install: all installdirs + $(INSTALL_STLIB) libpagecompression.so.$(VERSION) '$(DESTDIR)$(libdir)/libpagecompression.so.$(VERSION)' + cd '$(DESTDIR)$(libdir)' && \ + rm -f libpagecompression.so && \ + ln -s libpagecompression.so.$(VERSION) libpagecompression.so + +installdirs: + $(MKDIR_P) '$(DESTDIR)$(libdir)' + +clean distclean maintainer-clean: + rm -f $(OBJS) libpagecompression.a pg_lzcompress.o checksum_impl.o libpagecompression.so libpagecompression.so.$(VERSION) *.depend + rm -f checksum_impl.cpp pg_lzcompress.cpp + rm -rf utils storage diff --git a/src/lib/page_compression/PageCompression.cpp b/src/lib/page_compression/PageCompression.cpp new file mode 100644 index 00000000000..b4b6b7dbfde --- /dev/null +++ b/src/lib/page_compression/PageCompression.cpp @@ -0,0 +1,521 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. + */ +#include "PageCompression.h" +#include "utils/pg_lzcompress.h" +#include "storage/page_compression_impl.h" + +#include + + +void FormatPathToPca(const char *path, char *dst, size_t len, const char *prefix) +{ + errno_t rc; + if (prefix) { + rc = snprintf_s(dst, len, len - 1, "%s/" PCA_SUFFIX, prefix, path); + } else { + rc = snprintf_s(dst, len, len - 1, PCA_SUFFIX, path); + } + securec_check_ss_c(rc, "\0", "\0"); +} + +void FormatPathToPcd(const char *path, char *dst, size_t len, const char *prefix) +{ + errno_t rc; + if (prefix) { + rc = snprintf_s(dst, len, len - 1, "%s/" PCD_SUFFIX, prefix, path); + } else { + rc = snprintf_s(dst, len, len - 1, PCD_SUFFIX, path); + } + securec_check_ss_c(rc, "\0", "\0"); +} + +template +COMPRESS_ERROR_STATE ReadCompressedInfo(T &t, off_t offset, FILE *file) +{ + if (fseeko(file, offset, SEEK_SET) != 0) { + return PCA_SEEK_ERROR; + } + if (fread((void *)(&t), sizeof(t), 1, file) <= 0) { + return PCA_READ_ERROR; + } + return SUCCESS; +} + +/** + * write RewindCompressInfo + * @param file file fp + * @param pcaFilePath file path,for ereport + * @param rewindCompressInfo pointer of return + * @return sucesss or not + */ +static bool ReadRewindCompressedInfo(FILE *file, RewindCompressInfo *rewindCompressInfo) +{ + off_t offset = (off_t)offsetof(PageCompressHeader, chunk_size); + if (ReadCompressedInfo(rewindCompressInfo->chunkSize, offset, file) != SUCCESS) { + return false; + } + offset = (off_t)offsetof(PageCompressHeader, algorithm); + if (ReadCompressedInfo(rewindCompressInfo->algorithm, offset, file) != SUCCESS) { + return false; + } + offset = (off_t)offsetof(PageCompressHeader, nblocks); + if (ReadCompressedInfo(rewindCompressInfo->oldBlockNumber, offset, file) != SUCCESS) { + return false; + } + rewindCompressInfo->compressed = true; + return true; +} + +bool FetchSourcePca(unsigned char *pageCompressHeader, size_t len, RewindCompressInfo *rewindCompressInfo) +{ + PageCompressHeader *ptr = (PageCompressHeader *)pageCompressHeader; + rewindCompressInfo->compressed = false; + if (len == sizeof(PageCompressHeader)) { + rewindCompressInfo->compressed = true; + rewindCompressInfo->algorithm = ptr->algorithm; + rewindCompressInfo->newBlockNumber = ptr->nblocks; + rewindCompressInfo->oldBlockNumber = 0; + rewindCompressInfo->chunkSize = ptr->chunk_size; + } + return rewindCompressInfo->compressed; +} + +bool ProcessLocalPca(const char *tablePath, RewindCompressInfo *rewindCompressInfo, const char *prefix) +{ + rewindCompressInfo->compressed = false; + char pcaFilePath[MAXPGPATH]; + FormatPathToPca(tablePath, pcaFilePath, MAXPGPATH, prefix); + FILE *file = fopen(pcaFilePath, "rb"); + if (file == NULL) { + if (errno == ENOENT) { + return false; + } + return false; + } + bool success = ReadRewindCompressedInfo(file, rewindCompressInfo); + fclose(file); + return success; +} + +constexpr int MAX_RETRY_LIMIT = 60; +constexpr long RETRY_SLEEP_TIME = 1000000L; + +BlockNumber PageCompression::GetSegmentNo() const +{ + return this->segmentNo; +} + +size_t PageCompression::ReadCompressedBuffer(BlockNumber blockNum, char *buffer, size_t bufferLen, bool zeroAlign) +{ + auto chunkSize = this->header->chunk_size; + PageCompressAddr *currentAddr = GET_PAGE_COMPRESS_ADDR(this->header, chunkSize, blockNum); + size_t tryCount = 0; + size_t actualSize = 0; + do { + auto chunkNum = currentAddr->nchunks; + actualSize = chunkSize * chunkNum; + for (uint8 i = 0; i < chunkNum; i++) { + off_t seekPos = (off_t)OFFSET_OF_PAGE_COMPRESS_CHUNK(chunkSize, currentAddr->chunknos[i]); + uint8 start = i; + while (i < chunkNum - 1 && currentAddr->chunknos[i + 1] == currentAddr->chunknos[i] + 1) { + i++; + } + if (fseeko(this->pcdFile, seekPos, SEEK_SET) != 0) { + return 0; + } + size_t readAmount = chunkSize * (i - start + 1); + if (fread(buffer + start * chunkSize, 1, readAmount, this->pcdFile) != readAmount && + ferror(this->pcdFile)) { + return 0; + } + } + if (chunkNum == 0) { + return 0; + } + + /* compressed chunk */ + if (chunkNum * chunkSize < BLCKSZ) { + if (PageCompression::InnerPageCompressChecksum(buffer)) { + break; + } + } else if (PageIsNew(buffer) || pg_checksum_page(buffer, this->segmentNo * RELSEG_SIZE + blockNum) == + (PageHeader(buffer))->pd_checksum) { + break; + } + + if (tryCount < MAX_RETRY_LIMIT) { + ++tryCount; + pg_usleep(RETRY_SLEEP_TIME); + } else { + return 0; + } + } while (true); + + if (zeroAlign) { + error_t rc = memset_s(buffer + actualSize, bufferLen - actualSize, 0, bufferLen - actualSize); + securec_check(rc, "\0", "\0"); + actualSize = bufferLen; + } + return actualSize; +} + +PageCompression::~PageCompression() +{ + if (this->header) { + pc_munmap(this->header); + } + if (this->pcaFile) { + fclose(this->pcaFile); + } + if (this->pcdFile) { + fclose(this->pcdFile); + } +} + +bool PageCompression::InnerPageCompressChecksum(const char *buffer) +{ + char *data = NULL; + size_t dataLen; + uint32 crc32; + if (PageIs8BXidHeapVersion(buffer)) { + HeapPageCompressData *heapPageData = (HeapPageCompressData *)buffer; + data = heapPageData->data; + dataLen = heapPageData->size; + crc32 = heapPageData->crc32; + } else { + PageCompressData *heapPageData = (PageCompressData *)buffer; + data = heapPageData->data; + dataLen = heapPageData->size; + crc32 = heapPageData->crc32; + } + return DataBlockChecksum(data, dataLen, true) == crc32; +} + +const char *PageCompression::GetInitPath() const +{ + return this->initPath; +} + +COMPRESS_ERROR_STATE PageCompression::Init(const char *filePath, size_t len, BlockNumber inSegmentNo, uint16 chunkSize, + bool create) +{ + errno_t rc = memcpy_s(this->initPath, MAXPGPATH, filePath, len); + securec_check(rc, "", ""); + + this->segmentNo = inSegmentNo; + char compressedFilePath[MAXPGPATH]; + FormatPathToPca(filePath, compressedFilePath, MAXPGPATH); + if ((this->pcaFile = fopen(compressedFilePath, create ? "wb+" : "rb+")) == nullptr) { + return PCA_OPEN_ERROR; + } + + FormatPathToPcd(filePath, compressedFilePath, MAXPGPATH); + if ((this->pcdFile = fopen(compressedFilePath, create ? "wb+" : "rb+")) == nullptr) { + return PCD_OPEN_ERROR; + } + + if (chunkSize == 0) { + /* read chunk size from pca file if chunk size is invalid */ + auto state = ReadCompressedInfo(chunkSize, (off_t)offsetof(PageCompressHeader, chunk_size), this->pcaFile); + if (state != SUCCESS) { + return state; + } + } + this->chunkSize = chunkSize; + if ((this->header = pc_mmap(fileno(this->pcaFile), chunkSize, false)) == MAP_FAILED) { + return PCA_MMAP_ERROR; + } + return SUCCESS; +} + +bool PageCompression::SkipCompressedFile(const char *fileName, size_t len) +{ + auto realSize = strlen(fileName); + auto fileType = IsCompressedFile((char *)fileName, realSize < len ? realSize : len); + return fileType != COMPRESSED_TYPE_UNKNOWN; +} + +bool PageCompression::IsCompressedTableFile(const char *fileName, size_t len) +{ + char pcdFilePath[MAXPGPATH]; + errno_t rc = snprintf_s(pcdFilePath, MAXPGPATH, MAXPGPATH - 1, PCD_SUFFIX, fileName); + securec_check_ss_c(rc, "\0", "\0"); + struct stat buf; + int result = stat(pcdFilePath, &buf); + return result == 0; +} + +void PageCompression::ResetPcdFd() +{ + this->pcdFile = NULL; +} + +FILE *PageCompression::GetPcdFile() const +{ + return this->pcdFile; +} + +PageCompressHeader *PageCompression::GetPageCompressHeader() const +{ + return this->header; +} + +BlockNumber PageCompression::GetMaxBlockNumber() const +{ + return (BlockNumber)pg_atomic_read_u32(&header->nblocks); +} + +bool PageCompression::WriteBufferToCurrentBlock(const char *buf, BlockNumber blockNumber, int32 size) +{ + decltype(PageCompressHeader::chunk_size) curChunkSize = this->chunkSize; + int needChunks = size / curChunkSize; + + PageCompressHeader *pcMap = this->header; + PageCompressAddr *pcAddr = GET_PAGE_COMPRESS_ADDR(pcMap, curChunkSize, blockNumber); + + /* allocate chunks */ + if (pcAddr->allocated_chunks < needChunks) { + auto chunkno = pg_atomic_fetch_add_u32(&pcMap->allocated_chunks, needChunks - pcAddr->allocated_chunks); + for (int i = pcAddr->allocated_chunks; i < needChunks; i++) { + pcAddr->chunknos[i] = ++chunkno; + } + pcAddr->allocated_chunks = needChunks; + } + + for (int32 i = 0; i < needChunks; ++i) { + auto buffer_pos = buf + curChunkSize * i; + off_t seekpos = (off_t)OFFSET_OF_PAGE_COMPRESS_CHUNK(curChunkSize, pcAddr->chunknos[i]); + int32 start = i; + /* merge continuous write */ + while (i < needChunks - 1 && pcAddr->chunknos[i + 1] == pcAddr->chunknos[i] + 1) { + i++; + } + size_t write_amount = curChunkSize * (i - start + 1); + if (fseek(this->pcdFile, seekpos, SEEK_SET) < 0) { + return false; + } + if (fwrite(buffer_pos, 1, write_amount, this->pcdFile) != write_amount) { + return false; + } + } + /* set other data of pcAddr */ + pcAddr->nchunks = needChunks; + pcAddr->checksum = AddrChecksum32(blockNumber, pcAddr, curChunkSize); + return true; +} + +/** + * return chunk-aligned size of buffer + * @param buffer compressed page buffer + * @param chunkSize chunk size + * @return return chunk-aligned size of buffer + */ +size_t CalRealWriteSize(char *buffer, BlockNumber segmentNo, BlockNumber blockNumber, + decltype(PageCompressHeader::chunk_size) chunkSize) +{ + size_t compressedBufferSize; + uint32 crc32; + char *data; + if (PageIs8BXidHeapVersion(buffer)) { + HeapPageCompressData *heapPageData = (HeapPageCompressData *)buffer; + compressedBufferSize = heapPageData->size + offsetof(HeapPageCompressData, data); + crc32 = heapPageData->crc32; + data = heapPageData->data; + } else { + PageCompressData *heapPageData = (PageCompressData *)buffer; + compressedBufferSize = heapPageData->size + offsetof(PageCompressData, data); + crc32 = heapPageData->crc32; + data = heapPageData->data; + } + if (compressedBufferSize > 0 && compressedBufferSize <= ((size_t)chunkSize * (BLCKSZ / chunkSize - 1)) && + DataBlockChecksum(data, compressedBufferSize, true) == crc32) { + return ((compressedBufferSize - 1) / chunkSize + 1) * chunkSize; + } + /* uncompressed page */ + return BLCKSZ; +} + +template +std::unique_ptr make_unique(Args &&...args) +{ + return std::unique_ptr(new T(std::forward(args)...)); +} + +COMPRESS_ERROR_STATE ConstructCompressedFile(const char *toFullPath, BlockNumber segmentNo, uint16 chunkSize, + uint8 algorithm) +{ + std::unique_ptr pageCompression = make_unique(); + auto result = pageCompression->Init(toFullPath, MAXPGPATH, segmentNo, chunkSize, true); + if (result != SUCCESS) { + return result; + } + + PageCompressHeader *header = pageCompression->GetPageCompressHeader(); + header->chunk_size = chunkSize; + header->algorithm = algorithm; + + /* read page by page */ + FILE *dataFile = fopen(toFullPath, "rb+"); + if (dataFile == NULL) { + return NORMAL_OPEN_ERROR; + } + if (fseek(dataFile, 0L, SEEK_END) < 0) { + return NORMAL_SEEK_ERROR; + } + /* read file size of toFullPath */ + off_t size = ftell(dataFile); + if (fseek(dataFile, 0L, 0) < 0) { + return NORMAL_SEEK_ERROR; + } + + BlockNumber maxBlockNumber = size / BLCKSZ; + BlockNumber blockNumber = 0; + char buffer[BLCKSZ]; + for (blockNumber = 0; blockNumber < maxBlockNumber; blockNumber++) { + if (fread(buffer, 1, BLCKSZ, dataFile) != BLCKSZ) { + return NORMAL_READ_ERROR; + } + size_t realSize = CalRealWriteSize(buffer, pageCompression->GetSegmentNo(), blockNumber, chunkSize); + pageCompression->WriteBufferToCurrentBlock(buffer, blockNumber, realSize); + } + header->nblocks = blockNumber; + /* truncate tmp oid file */ + if (ftruncate(fileno(dataFile), 0L)) { + return PCD_TRUNCATE_ERROR; + } + return SUCCESS; +} + +bool PageCompression::DecompressedPage(const char *src, char *dest) const +{ + if (DecompressPage(src, dest, header->algorithm) == BLCKSZ) { + return true; + } + return false; +} + +bool PageCompression::WriteBackUncompressedData(const char *compressed, size_t compressedLen, char *buffer, size_t size, + BlockNumber blockNumber) +{ + /* if compressed page is uncompressed, write back directly */ + if (compressedLen == BLCKSZ) { + return this->WriteBufferToCurrentBlock(buffer, blockNumber, size); + } + + bool byteConvert; + bool diffConvert; + if (PageIs8BXidHeapVersion(compressed)) { + byteConvert = ((HeapPageCompressData *)compressed)->byte_convert; + diffConvert = ((HeapPageCompressData *)compressed)->diff_convert; + } else { + byteConvert = ((PageCompressData *)compressed)->byte_convert; + diffConvert = ((PageCompressData *)compressed)->diff_convert; + } + + auto algorithm = header->algorithm; + auto workBufferSize = CompressPageBufferBound(buffer, algorithm); + if (workBufferSize < 0) { + return false; + } + char *workBuffer = (char *)malloc(workBufferSize); + RelFileCompressOption relFileCompressOption; + relFileCompressOption.compressPreallocChunks = 0; + relFileCompressOption.compressLevelSymbol = true; + relFileCompressOption.compressLevel = 1; + relFileCompressOption.compressAlgorithm = algorithm; + relFileCompressOption.byteConvert = byteConvert; + relFileCompressOption.diffConvert = diffConvert; + + auto compress_buffer_size = CompressPage(buffer, workBuffer, workBufferSize, relFileCompressOption); + if (compress_buffer_size < 0) { + return false; + } + uint8 nchunks = (compress_buffer_size - 1) / chunkSize + 1; + auto bufferSize = chunkSize * nchunks; + if (bufferSize >= BLCKSZ) { + /* store original page if can not save space? */ + free(workBuffer); + workBuffer = (char *)buffer; + nchunks = BLCKSZ / chunkSize; + } else { + /* fill zero in the last chunk */ + if (compress_buffer_size < bufferSize) { + auto leftSize = bufferSize - compress_buffer_size; + errno_t rc = memset_s(workBuffer + compress_buffer_size, leftSize, 0, leftSize); + securec_check(rc, "", ""); + } + } + return this->WriteBufferToCurrentBlock(workBuffer, blockNumber, bufferSize > BLCKSZ ? BLCKSZ : bufferSize); +} + +COMPRESS_ERROR_STATE PageCompression::TruncateFile(BlockNumber oldBlockNumber, BlockNumber newBlockNumber) +{ + auto map = this->header; + /* write zero to truncated addr */ + for (BlockNumber blockNumber = newBlockNumber; blockNumber < oldBlockNumber; ++blockNumber) { + PageCompressAddr *addr = GET_PAGE_COMPRESS_ADDR(map, this->chunkSize, blockNumber); + for (size_t i = 0; i < addr->allocated_chunks; ++i) { + addr->chunknos[i] = 0; + } + addr->nchunks = 0; + addr->allocated_chunks = 0; + addr->checksum = 0; + } + map->last_synced_nblocks = map->nblocks = newBlockNumber; + + /* find the max used chunk number */ + pc_chunk_number_t beforeUsedChunks = map->allocated_chunks; + pc_chunk_number_t max_used_chunkno = 0; + for (BlockNumber blockNumber = 0; blockNumber < newBlockNumber; ++blockNumber) { + PageCompressAddr *addr = GET_PAGE_COMPRESS_ADDR(map, this->chunkSize, blockNumber); + for (uint8 i = 0; i < addr->allocated_chunks; i++) { + if (addr->chunknos[i] > max_used_chunkno) { + max_used_chunkno = addr->chunknos[i]; + } + } + } + map->allocated_chunks = map->last_synced_allocated_chunks = max_used_chunkno; + + /* truncate pcd qfile */ + if (beforeUsedChunks > max_used_chunkno) { + if (ftruncate(fileno(this->pcdFile), max_used_chunkno * chunkSize) != 0) { + return PCD_TRUNCATE_ERROR; + } + } + return SUCCESS; +} + +COMPRESS_ERROR_STATE PageCompression::RemoveCompressedFile(const char *path) +{ + char dst[MAXPGPATH]; + if (unlink(path) != 0) { + if (errno == ENOENT) { + return NORMAL_MISSING_ERROR; + } + return NORMAL_UNLINK_ERROR; + } + FormatPathToPca(path, dst, MAXPGPATH); + if (unlink(dst) != 0) { + if (errno == ENOENT) { + return PCA_MISSING_ERROR; + } + return PCA_UNLINK_ERROR; + } + FormatPathToPcd(path, dst, MAXPGPATH); + if (unlink(dst) != 0) { + if (errno == ENOENT) { + return PCD_MISSING_ERROR; + } + return PCA_UNLINK_ERROR; + } + return SUCCESS; +} +decltype(PageCompressHeader::chunk_size) PageCompression::GetChunkSize() const +{ + return this->chunkSize; +} +decltype(PageCompressHeader::algorithm) PageCompression::GetAlgorithm() const +{ + return this->header->algorithm; +} diff --git a/src/lib/page_compression/PageCompression.h b/src/lib/page_compression/PageCompression.h new file mode 100644 index 00000000000..5e7e543f9df --- /dev/null +++ b/src/lib/page_compression/PageCompression.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. + */ + +#ifndef OPENGAUSS_SERVER_PAGECOMPRESSION_H +#define OPENGAUSS_SERVER_PAGECOMPRESSION_H + +#include "c.h" +#include "compressed_common.h" +#include "storage/page_compression.h" + +#ifndef palloc +#define palloc(sz) malloc(sz) +#endif +#ifndef pfree +#define pfree(ptr) free(ptr) +#endif + +enum COMPRESS_ERROR_STATE { + SUCCESS, + NORMAL_OPEN_ERROR, + NORMAL_READ_ERROR, + NORMAL_SEEK_ERROR, + NORMAL_MISSING_ERROR, + NORMAL_UNLINK_ERROR, + PCA_OPEN_ERROR, + PCA_READ_ERROR, + PCA_SEEK_ERROR, + PCA_WRITE_ERROR, + PCA_MISSING_ERROR, + PCA_UNLINK_ERROR, + PCD_OPEN_ERROR, + PCD_READ_ERROR, + PCD_SEEK_ERROR, + PCD_WRITE_ERROR, + PCD_MISSING_ERROR, + PCD_UNLINK_ERROR, + PCD_TRUNCATE_ERROR, + PCA_MMAP_ERROR +}; +COMPRESS_ERROR_STATE ConstructCompressedFile(const char *toFullPath, BlockNumber segmentNo, uint16 chunkSize, + uint8 algorithm); +extern bool FetchSourcePca(unsigned char* pageCompressHeader, size_t len, RewindCompressInfo* rewindCompressInfo); +bool ProcessLocalPca(const char *tablePath, RewindCompressInfo *rewindCompressInfo, const char *prefix = NULL); +void FormatPathToPca(const char *path, char *dst, size_t len, const char *pg_data = NULL); +void FormatPathToPcd(const char *path, char *dst, size_t len, const char *pg_data = NULL); + +class PageCompression { +public: + ~PageCompression(); + COMPRESS_ERROR_STATE Init(const char *filePath, size_t len, BlockNumber inSegmentNo, + decltype(PageCompressHeader::chunk_size) chunkSize = 0, bool create = false); + FILE *GetPcdFile() const; + BlockNumber GetSegmentNo() const; + BlockNumber GetMaxBlockNumber() const; + decltype(PageCompressHeader::chunk_size) GetChunkSize() const; + decltype(PageCompressHeader::algorithm) GetAlgorithm() const; + PageCompressHeader *GetPageCompressHeader() const; + size_t ReadCompressedBuffer(BlockNumber blockNum, char *buffer, size_t bufferLen, bool zeroAlign = false); + bool WriteBufferToCurrentBlock(const char *buf, BlockNumber blockNumber, int32 size); + bool DecompressedPage(const char *src, char *dest) const; + bool WriteBackUncompressedData(const char *uncompressed, size_t uncompressedLen, char *buffer, size_t size, + BlockNumber blockNumber); + COMPRESS_ERROR_STATE TruncateFile(BlockNumber oldBlockNumber, BlockNumber newBlockNumber); + const char *GetInitPath() const; + void ResetPcdFd(); +public: + static bool SkipCompressedFile(const char *fileName, size_t len); + static bool IsCompressedTableFile(const char *fileName, size_t len); + static COMPRESS_ERROR_STATE RemoveCompressedFile(const char *path); + static bool InnerPageCompressChecksum(const char *buffer); +private: + PageCompressHeader *header; + char initPath[MAXPGPATH]; + decltype(PageCompressHeader::chunk_size) chunkSize = 0; + FILE *pcaFile = nullptr; + FILE *pcdFile = nullptr; + BlockNumber segmentNo; +}; + +#endif // OPENGAUSS_SERVER_PAGECOMPRESSION_H diff --git a/src/bin/pg_rewind/compressed_common.h b/src/lib/page_compression/compressed_common.h similarity index 100% rename from src/bin/pg_rewind/compressed_common.h rename to src/lib/page_compression/compressed_common.h diff --git a/src/lib/page_compression/page_compression.sh b/src/lib/page_compression/page_compression.sh new file mode 100644 index 00000000000..1e0a6238424 --- /dev/null +++ b/src/lib/page_compression/page_compression.sh @@ -0,0 +1,35 @@ +project_dir=$1 +source_dir=$2 +# clean +if [[ -e ${source_dir}/checksum_impl.cpp ]];then + rm ${source_dir}/checksum_impl.cpp +fi +if [[ -e ${source_dir}/pg_lzcompress.cpp ]];then + rm ${source_dir}/pg_lzcompress.cpp +fi +if [[ -e ${source_dir}/pgsleep.cpp ]];then + rm ${source_dir}/pgsleep.cpp +fi +if [[ -e ${source_dir}/pg_lzcompress.h ]];then + rm ${source_dir}/pg_lzcompress.h +fi +rm -rf ${source_dir}/storage +rm -rf ${source_dir}/utils +if [[ -e ${source_dir}/PageCompression.cpp ]] && [[ -L ${source_dir}/PageCompression.cpp ]];then + rm ${source_dir}/PageCompression.cpp +fi + + +# setup file +mkdir -p ${source_dir}/storage +mkdir -p ${source_dir}/utils +ln -fs ${project_dir}/gausskernel/storage/page/checksum_impl.cpp ${source_dir}/checksum_impl.cpp +ln -fs ${project_dir}/common/backend/utils/adt/pg_lzcompress.cpp ${source_dir}/pg_lzcompress.cpp +ln -fs ${project_dir}/common/port/pgsleep.cpp ${source_dir}/pgsleep.cpp +ln -fs ${project_dir}/include/utils/pg_lzcompress.h ${source_dir}/pg_lzcompress.h +if [[ ! -e ${source_dir}/PageCompression.cpp ]]; then + ln -fs ${project_dir}/lib/page_compression/PageCompression.cpp ${source_dir}/PageCompression.cpp +fi +echo '' > ${source_dir}/utils/errcodes.h +#link: pg_lzcompress.cpp->knl_variable.h->knl_instance.h->double_write_basic.h->lwlock.h->lwlocknames.h +echo "#define NUM_INDIVIDUAL_LWLOCKS 0" > ${source_dir}/storage/lwlocknames.h \ No newline at end of file -- Gitee From 260d3f3ea0fbaaf29b379a53de79bc36008dc187 Mon Sep 17 00:00:00 2001 From: wuyuechuan Date: Thu, 17 Mar 2022 16:22:39 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E4=BC=98=E5=8C=96gs=20ctl=E3=80=81baseback?= =?UTF-8?q?up=E8=BF=87=E7=A8=8B=E5=AE=8C=E6=95=B4=E6=A0=A1=E9=AA=8C?= =?UTF-8?q?=E6=80=A7=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/common/backend/utils/adt/genfile.cpp | 3 --- .../storage/replication/basebackup.cpp | 3 +-- .../storage/smgr/page_compression.cpp | 23 +++++++++++++------ src/include/storage/page_compression.h | 2 -- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/common/backend/utils/adt/genfile.cpp b/src/common/backend/utils/adt/genfile.cpp index a9636514363..5f8304b9cd0 100644 --- a/src/common/backend/utils/adt/genfile.cpp +++ b/src/common/backend/utils/adt/genfile.cpp @@ -370,11 +370,8 @@ static void ReadBinaryFileBlocksFirstCall(PG_FUNCTION_ARGS, int32 startBlockNum, if (fp == NULL) { ereport(ERROR, (errcode_for_file_access(), errmsg("could not open file \"%s\": %m", pcdFilePath))); } - char* pageBuffer = (char*)palloc(BLCKSZ); itemState->pcaFile = pcaFile; itemState->rbStruct.header = map; - itemState->rbStruct.pageBuffer = pageBuffer; - itemState->rbStruct.pageBufferLen = BLCKSZ; itemState->rbStruct.fp = fp; itemState->rbStruct.segmentNo = segmentNo; itemState->rbStruct.fileName = pcdFilePath; diff --git a/src/gausskernel/storage/replication/basebackup.cpp b/src/gausskernel/storage/replication/basebackup.cpp index 781e1ccf375..a84becd7d54 100755 --- a/src/gausskernel/storage/replication/basebackup.cpp +++ b/src/gausskernel/storage/replication/basebackup.cpp @@ -1997,8 +1997,7 @@ static void SendCompressedFile(char* readFileName, int basePathLen, struct stat& bool* onlyExtend = (bool*)palloc0(totalBlockNum * sizeof(bool)); /* allocated in advance to prevent repeated allocated */ - char pageBuffer[BLCKSZ]; - ReadBlockChunksStruct rbStruct{map, pageBuffer, BLCKSZ, fp, segmentNo, readFileName}; + ReadBlockChunksStruct rbStruct{map, fp, segmentNo, readFileName}; for (blockNum = 0; blockNum < totalBlockNum; blockNum++) { PageCompressAddr* addr = GET_PAGE_COMPRESS_ADDR(transfer, chunkSize, blockNum); /* skip some blocks which only extends. The size of blocks is 0. */ diff --git a/src/gausskernel/storage/smgr/page_compression.cpp b/src/gausskernel/storage/smgr/page_compression.cpp index fb1167fb12f..f83fc46fae1 100644 --- a/src/gausskernel/storage/smgr/page_compression.cpp +++ b/src/gausskernel/storage/smgr/page_compression.cpp @@ -384,7 +384,6 @@ size_t ReadAllChunkOfBlock(char *dst, size_t destLen, BlockNumber blockNumber, R (ERRCODE_INVALID_PARAMETER_VALUE, errmsg("blocknum \"%u\" exceeds max block number", blockNumber))); } - char* pageBuffer = rbStruct.pageBuffer; const char* fileName = rbStruct.fileName; decltype(PageCompressHeader::chunk_size) chunkSize = header->chunk_size; decltype(ReadBlockChunksStruct::segmentNo) segmentNo = rbStruct.segmentNo; @@ -416,12 +415,22 @@ size_t ReadAllChunkOfBlock(char *dst, size_t destLen, BlockNumber blockNumber, R if (nchunks == 0) { break; } - if (DecompressPage(dst, pageBuffer, header->algorithm) == BLCKSZ) { - PageHeader phdr = PageHeader(pageBuffer); - BlockNumber blkNo = blockNumber + segmentNo * ((BlockNumber)RELSEG_SIZE); - if (PageIsNew(phdr) || pg_checksum_page(pageBuffer, blkNo) == phdr->pd_checksum) { - break; - } + char *data = NULL; + size_t dataLen; + uint32 crc32; + if (PageIs8BXidHeapVersion(dst)) { + HeapPageCompressData *heapPageData = (HeapPageCompressData *)dst; + data = heapPageData->data; + dataLen = heapPageData->size; + crc32 = heapPageData->crc32; + } else { + PageCompressData *heapPageData = (PageCompressData *)dst; + data = heapPageData->data; + dataLen = heapPageData->size; + crc32 = heapPageData->crc32; + } + if (DataBlockChecksum(data, dataLen, true) == crc32) { + break; } if (tryCount < MAX_RETRY_LIMIT) { diff --git a/src/include/storage/page_compression.h b/src/include/storage/page_compression.h index 33b671ed38f..ce90ed84fa2 100644 --- a/src/include/storage/page_compression.h +++ b/src/include/storage/page_compression.h @@ -88,8 +88,6 @@ typedef struct PageCompressAddr { struct ReadBlockChunksStruct { PageCompressHeader* header; // header: pca file - char* pageBuffer; // pageBuffer: decompressed page - size_t pageBufferLen; FILE* fp; // fp: table fp int segmentNo; char* fileName; // fileName: for error report -- Gitee