diff --git a/contrib/pagehack/pagehack.cpp b/contrib/pagehack/pagehack.cpp index 8adaed889b6ca9527b77c166522b3c10aabd1203..9c527326ff70fa6b73c864890d3fae00559a5f01 100644 --- a/contrib/pagehack/pagehack.cpp +++ b/contrib/pagehack/pagehack.cpp @@ -60,6 +60,8 @@ #include "access/ustore/knl_utuple.h" #include "access/ustore/knl_uundorecord.h" #include "access/double_write_basic.h" +#include "access/extreme_rto/standby_read/block_info_meta.h" +#include "access/extreme_rto/standby_read/lsn_info_meta.h" #include "catalog/pg_control.h" #include "catalog/pg_attribute.h" #include "catalog/pg_class.h" @@ -102,7 +104,11 @@ /* Number of pg_class types */ #define CLASS_TYPE_NUM 512 #define TEN 10 - +#define BLOCK_META_INFO_NUM_PER_PAGE 127 +#define BASE_PAGE_MAP_SIZE 16 +#define BASE_PAGE_MAP_BIT_SIZE (BASE_PAGE_MAP_SIZE * BITS_PER_BYTE) +#define DIVIDED_BY_TWO 2 +#define WAL_ID_OFFSET 32 typedef unsigned char* binary; static const char* indents[] = { // 10 tab is enough to used. @@ -830,7 +836,9 @@ typedef enum HackingType { HACKING_UNDO_RECORD, HACKING_UNDO_FIX, HACKING_SEGMENT, - NUM_HACKINGTYPE + NUM_HACKINGTYPE, + HACKING_LSN_INFO_META, + HACKING_BLOCK_INFO_META, } HackingType; static HackingType hackingtype = HACKING_HEAP; @@ -856,7 +864,9 @@ static const char* HACKINGTYPE[] = {"heap", "undo_slot", "undo_record", "undo_fix", - "segment" + "segment", + "lsn_info_meta", + "block_info_meta" }; const char* PageTypeNames[] = {"DATA", "FSM", "VM"}; @@ -912,6 +922,7 @@ typedef struct FSMAddress { const int FSM_BOTTOM_LEVEL = 0; using namespace undo; +using namespace extreme_rto_standby_read; static void formatBytes(unsigned char* start, int len) { @@ -5036,6 +5047,248 @@ static int ParseUndoSlot(const char *filename) return true; } +static void parse_map_position(uint8 map) +{ + uint8 pagemap[BITS_PER_BYTE] = { 0 }; + int pos = 0; + pos = 0; + while (map > 0) { + pagemap[pos] = map % DIVIDED_BY_TWO; + ++pos; + map /= DIVIDED_BY_TWO; + } + for (int loop = BITS_PER_BYTE - 1; loop >= 0; loop--) { + fprintf(stdout, "%u", pagemap[loop]); + } + fprintf(stdout, " "); +} + +static void parse_lsn_info_head(LsnInfoPageHeader *header) +{ + PageXLogRecPtr lsn = header->lsn; + fprintf(stdout, "%slsn: xlogid %u, xrecoff %u, lsn %lu\n", + indents[indentLevel], lsn.xlogid, lsn.xrecoff, ((uint64)lsn.xlogid << WAL_ID_OFFSET) | lsn.xrecoff); + fprintf(stdout, "%schecksum: %u, flags: %u, version: %u", + indents[indentLevel], header->checksum, header->flags, header->version); + fprintf(stdout, "%sbase page map: ", indents[indentLevel]); + for (uint32 loop = 0; loop < BASE_PAGE_MAP_SIZE; loop++) { + parse_map_position(header->base_page_map[loop]); + } + fprintf(stdout, "\n"); +} + +static void parse_lsn_info_node(LsnInfoNode *lsninfo) +{ + fprintf(stdout, "%slsn info list: prev %lu, next: %lu\n", + indents[indentLevel], lsninfo->lsn_list.prev, lsninfo->lsn_list.next); + fprintf(stdout, "%sflags: %u, type: %u, used: %u\n", + indents[indentLevel], lsninfo->flags, lsninfo->type, lsninfo->used); + fprintf(stdout, "%slsn:", indents[indentLevel]); + for (uint loop = 0; loop < LSN_NUM_PER_NODE; loop++) { + fprintf(stdout, " %lu", lsninfo->lsn[loop]); + } + fprintf(stdout, "\n"); +} + +static void parse_base_page_info_node(BasePageInfoNode *pageinfo) +{ + RelFileNode rnode = pageinfo->relfilenode; + fprintf(stdout, "%slsn info:\n", indents[indentLevel]); + + ++indentLevel; + parse_lsn_info_node(&(pageinfo->lsn_info_node)); + --indentLevel; + + fprintf(stdout, "%sbase page list: prev %lu, next: %lu\n", + indents[indentLevel], pageinfo->base_page_list.prev, pageinfo->base_page_list.next); + fprintf(stdout, "%scurrent page lsn: %lu\n", + indents[indentLevel], pageinfo->cur_page_lsn); + fprintf(stdout, "%srefile node:\n", indents[indentLevel]); + ++indentLevel; + fprintf(stdout, "%sspcnode: %u, dbnode: %u, relnode: %u, bucketnode: %d, opt: %u\n", + indents[indentLevel], rnode.spcNode, rnode.dbNode, rnode.relNode, rnode.bucketNode, rnode.opt); + --indentLevel; + fprintf(stdout, "%sfork num: %d, block num: %u\n", + indents[indentLevel], pageinfo->fork_num, pageinfo->block_num); + fprintf(stdout, "%snext base page lsn: %lu, base page position: %lu\n", + indents[indentLevel], pageinfo->next_base_page_lsn, pageinfo->base_page_position); +} + +static void parse_lsn_info_block(FILE* fd, uint8 isbasepage[], uint32 &handledblock, uint32 loop) +{ + char bufferlsn[sizeof(LsnInfoNode)]; + char bufferpage[sizeof(BasePageInfoNode)]; + LsnInfoNode *lsnInfo = NULL; + BasePageInfoNode *basepageinfo = NULL; + + if (isbasepage[handledblock]) { + fprintf(stdout, "it's a basepage.\n"); + (void)fread(bufferpage, 1, sizeof(BasePageInfoNode), fd); + basepageinfo = (BasePageInfoNode *)bufferpage; + if (basepageinfo->lsn_info_node.type != LSN_INFO_TYPE_BASE_PAGE) { + fprintf(stderr, "Data at page %u, block %u must be base page, but its type is: %u.\n", + loop, handledblock, basepageinfo->lsn_info_node.type); // report error but continue. + } + parse_base_page_info_node(basepageinfo); + handledblock += 2; // index need add by 2 for basepage takes 2 blocks. + } else { + (void)fread(bufferlsn, 1, sizeof(LsnInfoNode), fd); + lsnInfo = (LsnInfoNode *)bufferlsn; + if (!is_lsn_info_node_valid(lsnInfo->flags)) { + fprintf(stdout, "Data at page %u, block %u is not valid.\n", loop, handledblock); + } else { + fprintf(stdout, "it's a lsn page.\n"); + if (lsnInfo->type != LSN_INFO_TYPE_LSNS) { + fprintf(stderr, "Data at page %u, block %u must be lsn page, but its type is: %u.\n", + loop, handledblock, lsnInfo->type); // report error but continue. + } + parse_lsn_info_node(lsnInfo); + } + handledblock++; + } +} + +static bool parse_lsn_info_meta(const char *filename) +{ + char bufferhead[sizeof(LsnInfoPageHeader)]; + LsnInfoPageHeader *pageheader = NULL; + FILE* fd = NULL; + uint32 loop, loopmap, loopbit, handledblock; + uint8 pagemappos; + uint8 isbasepage[BASE_PAGE_MAP_BIT_SIZE] = { 0 }; + if (NULL == (fd = fopen(filename, "rb"))) { + fprintf(stderr, "%s: %s\n", filename, strerror(errno)); + return false; + } + + fseek(fd, 0, SEEK_END); + long size = ftell(fd); + rewind(fd); + + if (size % BLCKSZ != 0) { + fprintf(stderr, "Reading lsn/page info meta file error: file size is not divisible by page size(8k).\n"); + fclose(fd); + return false; + } + + long pagenum = size / BLCKSZ; + fprintf(stdout, "file length is %ld, blknum is %ld\n", size, pagenum); + + for (loop = 1; loop <= pagenum; loop++) { + fprintf(stdout, "Page %u information:\n", loop); + ++indentLevel; + if (fread(bufferhead, 1, sizeof(LsnInfoPageHeader), fd) != sizeof(LsnInfoPageHeader)) { + fprintf(stderr, "%sReading header error", indents[indentLevel]); + fclose(fd); + return false; + } + + pageheader = (LsnInfoPageHeader *)bufferhead; + if (!is_lsn_info_page_valid(pageheader)) { + fseek(fd, (BASE_PAGE_MAP_BIT_SIZE - 1) * BLCKSZ, SEEK_SET); // push 127 * 64 bytes + fprintf(stdout, "%sPage %u is not valid.\n", indents[indentLevel], loop); + --indentLevel; + continue; + } + parse_lsn_info_head(pageheader); + + pagemappos = 0; + for (loopmap = 0; loopmap < BASE_PAGE_MAP_SIZE; loopmap++) { + for (loopbit = 0; loopbit < BITS_PER_BYTE; loopbit++) { + isbasepage[pagemappos] = (((pageheader->base_page_map[loopmap]) & (0x1 << loopbit)) >> loopbit); + pagemappos++; + } + } + + handledblock = 1; // 1st block is handled as header + while (handledblock < BASE_PAGE_MAP_BIT_SIZE) { + fprintf(stdout, "%sBlock %u information: ", indents[indentLevel], handledblock); + ++indentLevel; + parse_lsn_info_block(fd, isbasepage, handledblock, loop); + --indentLevel; + } + memset_s(isbasepage, sizeof(isbasepage), 0, sizeof(isbasepage)); + --indentLevel; + } + fclose(fd); + return true; +} + +static void parse_block_info_head(BlockInfoPageHeader *header) +{ + PageXLogRecPtr lsn = header->lsn; + fprintf(stdout, "%slsn: xlogid %u, xrecoff %u, lsn %lu\n", + indents[indentLevel], lsn.xlogid, lsn.xrecoff, ((uint64)lsn.xlogid << WAL_ID_OFFSET) | lsn.xrecoff); + fprintf(stdout, "%schecksum: %u, flags: %u\n", + indents[indentLevel], header->checksum, header->flags); + fprintf(stdout, "%sversion: %u, total_block_num: %lu\n", + indents[indentLevel], header->version, header->total_block_num); +} + +static void parse_block_info_content(BlockMetaInfo *blockInfo) +{ + fprintf(stdout, "%stimeline: %u, record_num: %u\n", + indents[indentLevel], blockInfo->timeline, blockInfo->record_num); + fprintf(stdout, "%smin_lsn: %lu, max_lsn: %lu, flags: %u\n", + indents[indentLevel], blockInfo->min_lsn, blockInfo->max_lsn, blockInfo->flags); + fprintf(stdout, "%slsn_info_list: prev %lu, next: %lu\n", + indents[indentLevel], blockInfo->lsn_info_list.prev, blockInfo->lsn_info_list.next); +} + +static bool parse_block_info_meta(const char *filename) +{ + char bufferhead[sizeof(BlockInfoPageHeader)]; + char bufferblock[sizeof(BlockMetaInfo)]; + uint32 loop, loopinfo; + FILE* fd = NULL; + + if (NULL == (fd = fopen(filename, "rb"))) { + fprintf(stderr, "%s: %s\n", filename, strerror(errno)); + return false; + } + + fseek(fd, 0, SEEK_END); + long size = ftell(fd); + rewind(fd); + + if (size % BLCKSZ != 0) { + fprintf(stderr, "Reading block info meta file error: file size is not divisible by page size(8k).\n"); + fclose(fd); + return false; + } + long pagenum = size / BLCKSZ; + fprintf(stdout, "file length is %ld, blknum is %ld\n", size, pagenum); + + for (loop = 0; loop < pagenum; loop++) { + fprintf(stdout, "Page %u information:\n", loop); + ++indentLevel; + + if (fread(bufferhead, 1, sizeof(BlockInfoPageHeader), fd) != sizeof(BlockInfoPageHeader)) { + fprintf(stderr, "%sReading header error", indents[indentLevel]); + fclose(fd); + return false; + } + parse_block_info_head((BlockInfoPageHeader *)bufferhead); + + for (loopinfo = 0; loopinfo < BLOCK_META_INFO_NUM_PER_PAGE; loopinfo++) { + fprintf(stdout, "%sBlock %u information:\n", indents[indentLevel], loopinfo); + ++indentLevel; + if (fread(bufferblock, 1, sizeof(BlockMetaInfo), fd) != sizeof(BlockMetaInfo)) { + fprintf(stderr, "%sReading block meta file error at %u page, %u block.\n", + indents[indentLevel], loop, loopinfo); + fclose(fd); + return false; + } + parse_block_info_content((BlockMetaInfo *)bufferblock); + --indentLevel; + } + --indentLevel; + } + + fclose(fd); + return true; +} + typedef struct UndoHeader { UndoRecordHeader whdr_; UndoRecordBlock wblk_; @@ -5956,6 +6209,18 @@ int main(int argc, char** argv) break; case HACKING_UNDO_FIX: break; + case HACKING_LSN_INFO_META: + if (!parse_lsn_info_meta(filename)) { + fprintf(stderr, "Error during parsing lsn info meta file %s\n", filename); + exit(1); + } + break; + case HACKING_BLOCK_INFO_META: + if (!parse_block_info_meta(filename)) { + fprintf(stderr, "Error during parsing block info meta file %s\n", filename); + exit(1); + } + break; default: /* should be impossible to be here */ Assert(false); diff --git a/src/bin/gs_guc/cluster_guc.conf b/src/bin/gs_guc/cluster_guc.conf index 3213539055f9a62ae57a4e02b35b26fc726a5f2e..bd587bef8bc010025c016395b5464e829f738ae4 100755 --- a/src/bin/gs_guc/cluster_guc.conf +++ b/src/bin/gs_guc/cluster_guc.conf @@ -104,6 +104,7 @@ dirty_page_percent_max|real|0.1,1|NULL|NULL| group_concat_max_len|int64|0,9223372036854775807|NULL|NULL check_function_bodies|bool|0,0|NULL|NULL| checkpoint_completion_target|real|0,1|NULL|NULL| +standby_force_recyle_ratio|real|0,1|NULL|NULL| checkpoint_segments|int|1,2147483646|NULL|NULL| checkpoint_timeout|int|30,3600|s|NULL| checkpoint_warning|int|0,2147483647|s|NULL| @@ -268,6 +269,9 @@ geqo_effort|int|1,10|NULL|NULL| geqo_generations|int|0,2147483647|NULL|NULL| hadr_max_size_for_xlog_receiver|int|0,2147483647|kB|NULL| hadr_recovery_time_target|int|0,3600|NULL|NULL| +standby_recycle_interval|int|0,86400|s|NULL| +standby_max_query_time|int|0,86400|s|NULL| +base_page_saved_interval|int|4,2000|NULL|NULL| hadr_recovery_point_target|int|0,3600|NULL|NULL| hadr_super_user_record_path|string|0,0|NULL|NULL| hll_default_log2m|int|10,16|NULL|NULL| @@ -708,6 +712,8 @@ undo_zone_count|int|0,1048576|NULL|NULL| stream_cluster_run_mode|enum|cluster_primary,cluster_standby|NULL|NULL| xlog_file_size|int64|1048576,576460752303423487|B|The value must be an integer multiple of 16777216(16M)| xlog_file_path|string|0,0|NULL|NULL| +max_standby_base_page_size|int64|0,576460752303423487|B|NULL| +max_standby_lsn_info_size|int64|0,576460752303423487|B|NULL| plsql_show_all_error|bool|0,0|NULL|NULL| partition_page_estimation|bool|0,0|NULL|NULL| enable_auto_clean_unique_sql|bool|0,0|NULL|NULL| diff --git a/src/bin/initdb/initdb.cpp b/src/bin/initdb/initdb.cpp index 7419849f7345a8fdca16140d9cc328a1fc9e5d9b..75d5e6bcf6d13a57f330b305c0736322bf9f5174 100644 --- a/src/bin/initdb/initdb.cpp +++ b/src/bin/initdb/initdb.cpp @@ -3594,7 +3594,8 @@ static bool check_locale_encoding(const char* locale_encoding, int user_enc) #ifdef WIN32 user_enc == PG_UTF8 || #endif - user_enc == PG_SQL_ASCII)) { + user_enc == PG_SQL_ASCII || + (user_enc == PG_GB18030_2022 && locale_enc == PG_GB18030))) { write_stderr(_("%s: encoding mismatch\n"), progname); write_stderr(_("The encoding you selected (%s) and the encoding that the\n" "selected locale uses (%s) do not match. This would lead to\n" diff --git a/src/bin/pg_rewind/fetch.cpp b/src/bin/pg_rewind/fetch.cpp index 19d818eba37f3894b40213faf53816ba067bb224..e1279943acafcb00d2d21673965ff5632f9533ca 100755 --- a/src/bin/pg_rewind/fetch.cpp +++ b/src/bin/pg_rewind/fetch.cpp @@ -27,6 +27,7 @@ #include "PageCompression.h" #include "catalog/pg_type.h" #include "storage/file/fio_device.h" +#include "access/extreme_rto/standby_read/standby_read_base.h" PGconn* conn = NULL; char source_slot_name[NAMEDATALEN] = {0}; @@ -303,6 +304,9 @@ BuildErrorCode fetchSourceFileList() continue; if (NULL != strstr(path, "disable_conn_file")) continue; + if (NULL != strstr(path, EXRTO_FILE_DIR)) { + continue; + } if (PQgetisnull(res, 0, 1)) { /* diff --git a/src/common/backend/catalog/builtin_funcs.ini b/src/common/backend/catalog/builtin_funcs.ini index 0c59329f6d6a1487c631b8f9934bf93f2bc3a329..77c2dc72f25a3c19fbf721a361f816a98f2e021b 100755 --- a/src/common/backend/catalog/builtin_funcs.ini +++ b/src/common/backend/catalog/builtin_funcs.ini @@ -12904,3 +12904,7 @@ AddFuncGroup( "gs_repair_file", 1, AddBuiltinFunc(_0(4771), _1("gs_repair_file"), _2(3), _3(true), _4(true), _5(gs_repair_file), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(1000), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(3, 26, 25, 23), _21(3, 26, 25, 23), _22(3, 'i', 'i', 'i'), _23(3, "tableoid", "path", "timeout"), _24(NULL), _25("gs_repair_file"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33(NULL), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) ), + AddFuncGroup( + "gs_hot_standby_space_info", 1, + AddBuiltinFunc(_0(6218), _1("gs_hot_standby_space_info"), _2(0), _3(false), _4(true), _5(gs_hot_standby_space_info), _6(2249), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(0), _21(6, 28, 28, 28, 28, 28, 28), _22(6, 'o', 'o', 'o', 'o', 'o', 'o'), _23(6, "base_page_file_num", "base_page_total_size", "lsn_info_meta_file_num", "lsn_info_meta_total_size", "block_info_meta_file_num", "block_info_meta_total_size"), _24(NULL), _25("gs_hot_standby_space_info"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33(NULL), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), diff --git a/src/common/backend/catalog/storage.cpp b/src/common/backend/catalog/storage.cpp index 67e231c5f16033c5c4a631268acbb6d690fb9703..6d6d559fb69af5d9b40dd779a3dc7c7d9b78e2a7 100644 --- a/src/common/backend/catalog/storage.cpp +++ b/src/common/backend/catalog/storage.cpp @@ -24,6 +24,7 @@ #include "access/cstore_am.h" #include "access/visibilitymap.h" +#include "access/multi_redo_api.h" #include "access/xact.h" #include "access/xlog.h" #include "access/xloginsert.h" @@ -42,6 +43,7 @@ #include "pgxc/pgxc.h" #include "storage/freespace.h" #include "storage/lmgr.h" +#include "storage/procarray.h" #include "storage/smgr/smgr.h" #include "storage/smgr/segment.h" #include "threadpool/threadpool.h" @@ -608,7 +610,7 @@ void RelationPreserveStorage(RelFileNode rnode, bool atCommit) * This includes getting rid of any buffers for the blocks that are to be * dropped. */ -void RelationTruncate(Relation rel, BlockNumber nblocks) +void RelationTruncate(Relation rel, BlockNumber nblocks, TransactionId latest_removed_xid) { /* Currently, segment-page tables should not be truncated */ Assert(!RelationIsSegmentTable(rel)); @@ -675,14 +677,13 @@ void RelationTruncate(Relation rel, BlockNumber nblocks) uint size; uint8 info = XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE; + size = sizeof(xl_smgr_truncate_compress); xlrec.xlrec.blkno = nblocks; + xlrec.pageCompressOpts = rel->rd_node.opt; + xlrec.latest_removed_xid = latest_removed_xid; if (rel->rd_node.opt != 0) { - xlrec.pageCompressOpts = rel->rd_node.opt; - size = sizeof(xl_smgr_truncate_compress); info |= XLR_REL_COMPRESS; - } else { - size = sizeof(xl_smgr_truncate); } RelFileNodeRelCopy(xlrec.xlrec.rnode, rel->rd_node); @@ -713,7 +714,7 @@ void RelationTruncate(Relation rel, BlockNumber nblocks) BatchClearBadBlock(rel->rd_node, MAIN_FORKNUM, nblocks); } -void PartitionTruncate(Relation parent, Partition part, BlockNumber nblocks) +void PartitionTruncate(Relation parent, Partition part, BlockNumber nblocks, TransactionId latest_removed_xid) { /* Currently, segment-page tables should not be truncated */ Assert(!RelationIsSegmentTable(parent)); @@ -764,14 +765,16 @@ void PartitionTruncate(Relation parent, Partition part, BlockNumber nblocks) uint8 info = XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE; int redoSize; + redoSize = sizeof(xl_smgr_truncate_compress); + + xlrec.xlrec.blkno = nblocks; + xlrec.pageCompressOpts = rel->rd_node.opt; + xlrec.latest_removed_xid = latest_removed_xid; + if (rel->rd_node.opt != 0) { - xlrec.pageCompressOpts = rel->rd_node.opt; info |= XLR_REL_COMPRESS; - redoSize = sizeof(xl_smgr_truncate_compress); - } else { - redoSize = sizeof(xl_smgr_truncate); } - xlrec.xlrec.blkno = nblocks; + RelFileNodeRelCopy(xlrec.xlrec.rnode, part->pd_node); XLogBeginInsert(); @@ -1242,8 +1245,26 @@ void smgr_redo_create(RelFileNode rnode, ForkNumber forkNum, char *data) } } -void xlog_block_smgr_redo_truncate(RelFileNode rnode, BlockNumber blkno, XLogRecPtr lsn) +void smgr_redo_truncate_cancel_conflicting_proc(TransactionId latest_removed_xid) { + if (IS_EXRTO_READ) { + const int max_check_times = 1000; + int check_times = 0; + bool conflict = true; + bool reach_max_check_times = false; + while (conflict && check_times < max_check_times) { + RedoInterruptCallBack(); + check_times++; + reach_max_check_times = (check_times == max_check_times); + conflict = proc_array_cancel_conflicting_proc(latest_removed_xid, reach_max_check_times); + } + } +} + +void xlog_block_smgr_redo_truncate(RelFileNode rnode, BlockNumber blkno, XLogRecPtr lsn, + TransactionId latest_removed_xid) +{ + smgr_redo_truncate_cancel_conflicting_proc(latest_removed_xid); SMgrRelation reln = smgropen(rnode, InvalidBackendId); smgrcreate(reln, MAIN_FORKNUM, true); UpdateMinRecoveryPoint(lsn, false); @@ -1264,6 +1285,7 @@ void smgr_redo(XLogReaderState* record) XLogRecPtr lsn = record->EndRecPtr; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; bool compress = (bool)(XLogRecGetInfo(record) & XLR_REL_COMPRESS); + TransactionId latest_removed_xid = InvalidTransactionId; /* Backup blocks are not used in smgr records */ Assert(!XLogRecHasAnyBlockRefs(record)); @@ -1280,6 +1302,9 @@ void smgr_redo(XLogReaderState* record) RelFileNode rnode; RelFileNodeCopy(rnode, xlrec->rnode, (int2)XLogRecGetBucketId(record)); rnode.opt = compress ? ((xl_smgr_truncate_compress*)(void *)XLogRecGetData(record))->pageCompressOpts : 0; + if (XLogRecGetDataLen(record) == TRUNCATE_CONTAIN_XID_SIZE) { + latest_removed_xid = ((xl_smgr_truncate_compress*)(void *)XLogRecGetData(record))->latest_removed_xid; + } /* * Forcibly create relation if it doesn't exist (which suggests that * it was dropped somewhere later in the WAL sequence). As in @@ -1305,7 +1330,7 @@ void smgr_redo(XLogReaderState* record) */ /* Also tell xlogutils.c about it */ - xlog_block_smgr_redo_truncate(rnode, xlrec->blkno, lsn); + xlog_block_smgr_redo_truncate(rnode, xlrec->blkno, lsn, latest_removed_xid); } else ereport(PANIC, (errmsg("smgr_redo: unknown op code %u", info))); } diff --git a/src/common/backend/utils/init/globals.cpp b/src/common/backend/utils/init/globals.cpp index 99f82fd1366309446e7baa788291ba0d82abc834..1dd115185be00d0bbeaebdd923e44a376436dcf3 100644 --- a/src/common/backend/utils/init/globals.cpp +++ b/src/common/backend/utils/init/globals.cpp @@ -75,12 +75,13 @@ bool will_shutdown = false; * NEXT | 92899 | ? | ? * ********************************************/ -const uint32 GRAND_VERSION_NUM = 92907; +const uint32 GRAND_VERSION_NUM = 92909; /******************************************** * 2.VERSION NUM FOR EACH FEATURE * Please write indescending order. ********************************************/ +const uint32 GB18030_2022_VERSION_NUM = 92908; const uint32 PARAM_MARK_VERSION_NUM = 92907; const uint32 TIMESCALE_DB_VERSION_NUM = 92904; const uint32 MULTI_CHARSET_VERSION_NUM = 92903; diff --git a/src/common/backend/utils/mb/Unicode/CMakeLists.txt b/src/common/backend/utils/mb/Unicode/CMakeLists.txt index 924c40d2ea5e076514aa9e25f5610da9a6cb0a45..8e7f4b48000376394d3723bd981af20f1042aa6a 100644 --- a/src/common/backend/utils/mb/Unicode/CMakeLists.txt +++ b/src/common/backend/utils/mb/Unicode/CMakeLists.txt @@ -154,3 +154,10 @@ add_custom_command( DEPENDS UCS_to_BIG5.pl BIG5.TXT CP950.TXT COMMENT "Now Generating *.map" ) + +add_custom_command( + OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/gb18030_to_utf8_2022.map ${CMAKE_CURRENT_SOURCE_DIR}/utf8_to_gb18030_2022.map + COMMAND perl ${CMAKE_CURRENT_SOURCE_DIR}/UCS_to_GB18030_2022.pl + DEPENDS UCS_to_GB18030_2022.pl gb-18030-2022.xml + COMMENT "Now Generating *.map" +) \ No newline at end of file diff --git a/src/common/backend/utils/mb/Unicode/Makefile b/src/common/backend/utils/mb/Unicode/Makefile index a777c8e6e4ed3bb2bebcf2482ea654553f5a5069..8580b491ebaa3d17b8584f4c0441b7774ded416f 100644 --- a/src/common/backend/utils/mb/Unicode/Makefile +++ b/src/common/backend/utils/mb/Unicode/Makefile @@ -50,6 +50,7 @@ SPECIALMAPS = euc_cn_to_utf8.map utf8_to_euc_cn.map \ euc_tw_to_utf8.map utf8_to_euc_tw.map \ sjis_to_utf8.map utf8_to_sjis.map \ gb18030_to_utf8.map utf8_to_gb18030.map \ + gb18030_to_utf8_2022.map utf8_to_gb18030_2022.map \ big5_to_utf8.map utf8_to_big5.map MAPS = $(GENERICMAPS) $(SPECIALMAPS) @@ -89,6 +90,9 @@ sjis_to_utf8.map utf8_to_sjis.map : CP932.TXT gb18030_to_utf8.map utf8_to_gb18030.map : gb-18030-2000.xml $(PERL) $(srcdir)/UCS_to_GB18030.pl +gb18030_to_utf8_2022.map utf8_to_gb18030_2022.map : gb-18030-2022.xml + $(PERL) $(srcdir)/UCS_to_GB18030_2022.pl + big5_to_utf8.map utf8_to_big5.map : BIG5.TXT CP950.TXT $(PERL) $(srcdir)/UCS_to_BIG5.pl diff --git a/src/common/backend/utils/mb/Unicode/UCS_to_GB18030_2022.pl b/src/common/backend/utils/mb/Unicode/UCS_to_GB18030_2022.pl new file mode 100644 index 0000000000000000000000000000000000000000..ec61f20fbb0d507506353b6ac3fc46fd6fbc06ce --- /dev/null +++ b/src/common/backend/utils/mb/Unicode/UCS_to_GB18030_2022.pl @@ -0,0 +1,92 @@ +#! /usr/bin/perl +# +# Copyright (c) 2007-2012, 2023, PostgreSQL Global Development Group +# +# src/backend/utils/mb/Unicode/UCS_to_GB18030_2022.pl +# +# Generate UTF-8 <--> GB18030-2022 code conversion tables from +# "gb-18030-2022.xml" +# +# The lines we care about in the source file look like +# +# where the "u" field is the Unicode code point in hex, +# and the "b" field is the hex byte sequence for GB18030 + +require "ucs2utf.pl"; + + +$change_file = "gb-18030-2022.xml"; + +open(CODE_TABLE, $change_file) || die("cannot open $change_file"); + +while () +{ + next if (! m/= 0x80 && $ucs_code >= 0x0080) + { + $utf_code = &ucs2utf($ucs_code); + if ($code_u{$utf_code} ne "") + { + printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs_code; + next; + } + if ($code_c{$code_gb} ne "") + { + printf STDERR "Warning: duplicate GB18030: %08x\n", $code_gb; + next; + } + $code_u{$utf_code} = $code_gb; + $code_c{$code_gb} = $utf_code; + $number++; + } +} +close(CODE_TABLE); + +$change_map_file = "gb18030_to_utf8_2022.map"; +open(CHANGE_MAP, "> $change_map_file") || die("cannot open $change_map_file"); +print CHANGE_MAP "static pg_local_to_utf LUmapGB18030_2022[ $number ] = {\n"; + +$count = $number; +for $pos (sort { $a <=> $b } keys(%code_c)) +{ + $utf_code = $code_c{$pos}; + $count--; + if ($count == 0) + { + printf CHANGE_MAP " {0x%04x, 0x%04x}\n", $pos, $utf_code; + } + else + { + printf CHANGE_MAP " {0x%04x, 0x%04x},\n", $pos, $utf_code; + } +} + +print CHANGE_MAP "};\n"; +close(CHANGE_MAP); + +$change_map_file = "utf8_to_gb18030_2022.map"; +open(CHANGE_MAP, "> $change_map_file") || die("cannot open $change_map_file"); +print CHANGE_MAP "static pg_utf_to_local ULmapGB18030_2022[ $number ] = {\n"; + +$count = $number; +for $pos (sort { $a <=> $b } keys(%code_u)) +{ + $code_gb = $code_u{$pos}; + $count--; + if ($count == 0) + { + printf CHANGE_MAP " {0x%04x, 0x%04x}\n", $pos, $code_gb; + } + else + { + printf CHANGE_MAP " {0x%04x, 0x%04x},\n", $pos, $code_gb; + } +} + +print CHANGE_MAP "};\n"; +close(CHANGE_MAP); diff --git a/src/common/backend/utils/mb/Unicode/gb-18030-2022.xml b/src/common/backend/utils/mb/Unicode/gb-18030-2022.xml new file mode 100644 index 0000000000000000000000000000000000000000..79ce3ffc4fda910b802eefbfa413ff35fff078ad --- /dev/null +++ b/src/common/backend/utils/mb/Unicode/gb-18030-2022.xml @@ -0,0 +1,78 @@ + + + + + + A list of character encodings in which the location of the GB/T 13000 code is changed compared to version 2000 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/common/backend/utils/mb/Unicode/gb18030_to_utf8_2022.map b/src/common/backend/utils/mb/Unicode/gb18030_to_utf8_2022.map new file mode 100644 index 0000000000000000000000000000000000000000..275420c45cc06a8c903cdf4df264aa6839ce6f03 --- /dev/null +++ b/src/common/backend/utils/mb/Unicode/gb18030_to_utf8_2022.map @@ -0,0 +1,40 @@ +static pg_local_to_utf LUmapGB18030_2022[ 38 ] = { + {0xa6d9, 0xefb890}, + {0xa6da, 0xefb892}, + {0xa6db, 0xefb891}, + {0xa6dc, 0xefb893}, + {0xa6dd, 0xefb894}, + {0xa6de, 0xefb895}, + {0xa6df, 0xefb896}, + {0xa6ec, 0xefb897}, + {0xa6ed, 0xefb898}, + {0xa6f3, 0xefb899}, + {0xa8bc, 0xe1b8bf}, + {0xfe59, 0xe9beb4}, + {0xfe61, 0xe9beb5}, + {0xfe66, 0xe9beb6}, + {0xfe67, 0xe9beb7}, + {0xfe6d, 0xe9beb8}, + {0xfe7e, 0xe9beb9}, + {0xfe90, 0xe9beba}, + {0xfea0, 0xe9bebb}, + {0x8135f437, 0xee9f87}, + {0x82359037, 0xeea09e}, + {0x82359038, 0xeea0a6}, + {0x82359039, 0xeea0ab}, + {0x82359130, 0xeea0ac}, + {0x82359131, 0xeea0b2}, + {0x82359132, 0xeea183}, + {0x82359133, 0xeea194}, + {0x82359134, 0xeea1a4}, + {0x84318236, 0xee9e8d}, + {0x84318237, 0xee9e8f}, + {0x84318238, 0xee9e8e}, + {0x84318239, 0xee9e90}, + {0x84318330, 0xee9e91}, + {0x84318331, 0xee9e92}, + {0x84318332, 0xee9e93}, + {0x84318333, 0xee9e94}, + {0x84318334, 0xee9e95}, + {0x84318335, 0xee9e96} +}; diff --git a/src/common/backend/utils/mb/Unicode/utf8_to_gb18030_2022.map b/src/common/backend/utils/mb/Unicode/utf8_to_gb18030_2022.map new file mode 100644 index 0000000000000000000000000000000000000000..9fafde791447c1410ea0c9d39ef5f4815378e53d --- /dev/null +++ b/src/common/backend/utils/mb/Unicode/utf8_to_gb18030_2022.map @@ -0,0 +1,40 @@ +static pg_utf_to_local ULmapGB18030_2022[ 38 ] = { + {0xe1b8bf, 0xa8bc}, + {0xe9beb4, 0xfe59}, + {0xe9beb5, 0xfe61}, + {0xe9beb6, 0xfe66}, + {0xe9beb7, 0xfe67}, + {0xe9beb8, 0xfe6d}, + {0xe9beb9, 0xfe7e}, + {0xe9beba, 0xfe90}, + {0xe9bebb, 0xfea0}, + {0xee9e8d, 0x84318236}, + {0xee9e8e, 0x84318238}, + {0xee9e8f, 0x84318237}, + {0xee9e90, 0x84318239}, + {0xee9e91, 0x84318330}, + {0xee9e92, 0x84318331}, + {0xee9e93, 0x84318332}, + {0xee9e94, 0x84318333}, + {0xee9e95, 0x84318334}, + {0xee9e96, 0x84318335}, + {0xee9f87, 0x8135f437}, + {0xeea09e, 0x82359037}, + {0xeea0a6, 0x82359038}, + {0xeea0ab, 0x82359039}, + {0xeea0ac, 0x82359130}, + {0xeea0b2, 0x82359131}, + {0xeea183, 0x82359132}, + {0xeea194, 0x82359133}, + {0xeea1a4, 0x82359134}, + {0xefb890, 0xa6d9}, + {0xefb891, 0xa6db}, + {0xefb892, 0xa6da}, + {0xefb893, 0xa6dc}, + {0xefb894, 0xa6dd}, + {0xefb895, 0xa6de}, + {0xefb896, 0xa6df}, + {0xefb897, 0xa6ec}, + {0xefb898, 0xa6ed}, + {0xefb899, 0xa6f3} +}; diff --git a/src/common/backend/utils/mb/conv.cpp b/src/common/backend/utils/mb/conv.cpp index c36ca872461680d7ec803ac79bce0ee4b1faf61e..816185d4183ac8330276032a50551ba6ef28081b 100644 --- a/src/common/backend/utils/mb/conv.cpp +++ b/src/common/backend/utils/mb/conv.cpp @@ -14,6 +14,8 @@ #include "postgres.h" #include "knl/knl_variable.h" #include "mb/pg_wchar.h" +#include "Unicode/gb18030_to_utf8_2022.map" +#include "Unicode/utf8_to_gb18030_2022.map" /* * LATINn ---> MIC when the charset's local codes map directly to MIC @@ -479,6 +481,16 @@ void UtfToLocal(const unsigned char *utf, int len, unsigned char *iso, const pg_ l = l_save; } /* Now check ordinary map */ + // add gb18030-2022 conv judge. + if (encoding == PG_GB18030_2022) { + p = (pg_utf_to_local*)bsearch(&iutf, ULmapGB18030_2022, + lengthof(ULmapGB18030_2022), sizeof(pg_utf_to_local), compare1); + if (p != NULL) { + iso = store_coded_char(iso, p->code); + continue; + } + } + p = (pg_utf_to_local *)bsearch(&iutf, map, mapsize, sizeof(pg_utf_to_local), compare1); if (p != NULL) { iso = store_coded_char(iso, p->code); @@ -602,6 +614,15 @@ void LocalToUtf(const unsigned char *iso, int len, unsigned char *utf, const pg_ iiso |= *iso++; } + // add gb18030-2022 conv judge + if (encoding == PG_GB18030_2022) { + p = (pg_local_to_utf*)bsearch(&iiso, LUmapGB18030_2022, + lengthof(LUmapGB18030_2022), sizeof(pg_local_to_utf), compare2); + if (p != NULL) { + utf = store_coded_char(utf, p->utf); + continue; + } + } p = (pg_local_to_utf*)bsearch(&iiso, map, mapsize, sizeof(pg_local_to_utf), compare2); if (p != NULL) { utf = store_coded_char(utf, p->utf); diff --git a/src/common/backend/utils/mb/conversion_procs/CMakeLists.txt b/src/common/backend/utils/mb/conversion_procs/CMakeLists.txt index 47def6cc978e9ce32d337e94ca63f7a25f258315..25148ad9e3912bd2b58b4b406d3731cd9fc31dca 100755 --- a/src/common/backend/utils/mb/conversion_procs/CMakeLists.txt +++ b/src/common/backend/utils/mb/conversion_procs/CMakeLists.txt @@ -8,6 +8,7 @@ SET(unicode_cmd_src "${PROJECT_SRC_DIR}/common/backend/utils/mb/Unicode|||perl UCS_to_SJIS.pl|" "${PROJECT_SRC_DIR}/common/backend/utils/mb/Unicode|||perl UCS_to_GB18030.pl|" "${PROJECT_SRC_DIR}/common/backend/utils/mb/Unicode|||perl UCS_to_BIG5.pl|" + "${PROJECT_SRC_DIR}/common/backend/utils/mb/Unicode|||perl UCS_to_GB18030_2022.pl|" ) add_cmd_gen_when_configure(perl_target unicode_cmd_src) diff --git a/src/common/backend/utils/mb/conversion_procs/Makefile b/src/common/backend/utils/mb/conversion_procs/Makefile index c3b697d47884df574cfa7caa3b4bd4425d953c47..06937885ebc30800937aa265d872b78b4bdd4787 100644 --- a/src/common/backend/utils/mb/conversion_procs/Makefile +++ b/src/common/backend/utils/mb/conversion_procs/Makefile @@ -120,6 +120,8 @@ CONVERSIONS = \ utf8_to_euc_tw UTF8 EUC_TW utf8_to_euc_tw utf8_and_euc_tw \ gb18030_to_utf8 GB18030 UTF8 gb18030_to_utf8 utf8_and_gb18030 \ utf8_to_gb18030 UTF8 GB18030 utf8_to_gb18030 utf8_and_gb18030 \ + gb18030_2022_to_utf8 GB18030_2022 UTF8 gb18030_2022_to_utf8 utf8_and_gb18030 \ + utf8_to_gb18030_2022 UTF8 GB18030_2022 utf8_to_gb18030_2022 utf8_and_gb18030 \ gbk_to_utf8 GBK UTF8 gbk_to_utf8 utf8_and_gbk \ utf8_to_gbk UTF8 GBK utf8_to_gbk utf8_and_gbk \ utf8_to_iso_8859_2 UTF8 LATIN2 utf8_to_iso8859 utf8_and_iso8859 \ diff --git a/src/common/backend/utils/mb/conversion_procs/conversion_create.sql.in b/src/common/backend/utils/mb/conversion_procs/conversion_create.sql.in index 9fa3be54bbe174469b7e225737025a42b6962394..899b680807d5c5ac760b4a4cbd0251efaee51e3f 100644 --- a/src/common/backend/utils/mb/conversion_procs/conversion_create.sql.in +++ b/src/common/backend/utils/mb/conversion_procs/conversion_create.sql.in @@ -130,3 +130,5 @@ shift_jis_2004_to_utf8 SHIFT_JIS_2004 UTF8 shift_jis_2004_to_utf8 utf8_and_sjis2 utf8_to_shift_jis_2004 UTF8 SHIFT_JIS_2004 utf8_to_shift_jis_2004 utf8_and_sjis2004 euc_jis_2004_to_shift_jis_2004 EUC_JIS_2004 SHIFT_JIS_2004 euc_jis_2004_to_shift_jis_2004 euc2004_sjis2004 shift_jis_2004_to_euc_jis_2004 SHIFT_JIS_2004 EUC_JIS_2004 shift_jis_2004_to_euc_jis_2004 euc2004_sjis2004 +gb18030_2022_to_utf8 GB18030_2022 UTF8 gb18030_2022_to_utf8 utf8_and_gb18030 +utf8_to_gb18030_2022 UTF8 GB18030_2022 utf8_to_gb18030_2022 utf8_and_gb18030 \ No newline at end of file diff --git a/src/common/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.cpp b/src/common/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.cpp index 52277e6c99f1c3d412f7485f8f77e0d6baafe32f..ff3c43eef95372b00d38c67fe39e1701a9716a63 100644 --- a/src/common/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.cpp +++ b/src/common/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.cpp @@ -23,9 +23,15 @@ PG_MODULE_MAGIC; PG_FUNCTION_INFO_V1(gb18030_to_utf8); PG_FUNCTION_INFO_V1(utf8_to_gb18030); +PG_FUNCTION_INFO_V1(gb18030_2022_to_utf8); +PG_FUNCTION_INFO_V1(utf8_to_gb18030_2022); + extern "C" Datum gb18030_to_utf8(PG_FUNCTION_ARGS); extern "C" Datum utf8_to_gb18030(PG_FUNCTION_ARGS); +extern "C" Datum gb18030_2022_to_utf8(PG_FUNCTION_ARGS); +extern "C" Datum utf8_to_gb18030_2022(PG_FUNCTION_ARGS); + /* * Convert 4-byte GB18030 characters to and from a linear code space * @@ -195,3 +201,31 @@ Datum utf8_to_gb18030(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } + +// convert the GB18030-2022 code to the UTF8 code +Datum gb18030_2022_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char* src = (unsigned char*)PG_GETARG_CSTRING(2); + unsigned char* dest = (unsigned char*)PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + // check whether the conversion relationship between two character sets exists. + CHECK_ENCODING_CONVERSION_ARGS(PG_GB18030_2022, PG_UTF8); + + LocalToUtf(src, len, dest, LUmapGB18030, lengthof(LUmapGB18030), NULL, 0, conv_18030_to_utf8, PG_GB18030_2022); + + PG_RETURN_VOID(); +} + +// convert the UTF8 code to the GB18030-2022 code. +Datum utf8_to_gb18030_2022(PG_FUNCTION_ARGS) +{ + unsigned char* src = (unsigned char*)PG_GETARG_CSTRING(2); + unsigned char* dest = (unsigned char*)PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + // check whether the conversion relationship between two character sets exists. + CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_GB18030_2022); + + UtfToLocal(src, len, dest, ULmapGB18030, lengthof(ULmapGB18030), NULL, 0, conv_utf8_to_18030, PG_GB18030_2022); + + PG_RETURN_VOID(); +} \ No newline at end of file diff --git a/src/common/backend/utils/mb/encnames.cpp b/src/common/backend/utils/mb/encnames.cpp index 9481606e615d0f3f3af642126ca143cd195db3fc..4cda548b26d5beecdf7ed5631607a4e6993d6dd1 100644 --- a/src/common/backend/utils/mb/encnames.cpp +++ b/src/common/backend/utils/mb/encnames.cpp @@ -45,6 +45,7 @@ pg_encname pg_encname_tbl[] = { {"euckr", PG_EUC_KR}, /* EUC-KR; Extended Unix Code for Korean, KS X 1001 standard */ {"euctw", PG_EUC_TW}, /* EUC-TW; Extended Unix Code for traditional Chinese */ {"gb18030", PG_GB18030}, /* GB18030;GB18030 */ + {"gb180302022", PG_GB18030_2022}, /* GB18030-2022;version 2022 for GB18030 */ {"gbk", PG_GBK}, /* GBK; Chinese Windows CodePage 936 simplified Chinese */ {"iso88591", PG_LATIN1}, /* ISO-8859-1; RFC1345,KXS2 */ {"iso885910", PG_LATIN6}, /* ISO-8859-10; RFC1345,KXS2 */ @@ -188,6 +189,7 @@ pg_enc2name pg_enc2name_tbl[] = {DEF_ENC2NAME(SQL_ASCII, 0), DEF_ENC2NAME(WIN1257, 1257), DEF_ENC2NAME(KOI8U, 21866), DEF_ENC2NAME(GB18030, 54936), + DEF_ENC2NAME(GB18030_2022, 54936), DEF_ENC2NAME(SJIS, 932), DEF_ENC2NAME(BIG5, 950), DEF_ENC2NAME(UHC, 0), diff --git a/src/common/backend/utils/mb/mbutils.cpp b/src/common/backend/utils/mb/mbutils.cpp index e336cea1d710f4dce6412f972c6820730347fd5f..85663c196f1ee219c8d34bac1ccf333220109a9b 100644 --- a/src/common/backend/utils/mb/mbutils.cpp +++ b/src/common/backend/utils/mb/mbutils.cpp @@ -39,6 +39,24 @@ typedef struct ConvProcInfo { static char* perform_default_encoding_conversion(const char* src, int len, bool is_client_to_server); static int cliplen(const char* str, int len, int limit); +// Determine whether the current case needs to be converted +bool NoNeedToConvert(int srcEncoding, int destEncoding) +{ + if (srcEncoding == destEncoding) { + return true; + } + if (srcEncoding == PG_SQL_ASCII || destEncoding == PG_SQL_ASCII) { + return true; + } + if (srcEncoding == PG_GB18030_2022 && destEncoding == PG_GB18030) { + return true; + } + if (srcEncoding == PG_GB18030 && destEncoding == PG_GB18030_2022) { + return true; + } + return false; +} + /* * Prepare for a future call to SetClientEncoding. Success should mean * that SetClientEncoding is guaranteed to succeed for this encoding request. @@ -66,7 +84,7 @@ int PrepareClientEncoding(int encoding) * Check for cases that require no conversion function. */ current_server_encoding = GetDatabaseEncoding(); - if (current_server_encoding == encoding || current_server_encoding == PG_SQL_ASCII || encoding == PG_SQL_ASCII) { + if (NoNeedToConvert(current_server_encoding, encoding)) { return 0; } @@ -159,7 +177,7 @@ int SetClientEncoding(int encoding) * Check for cases that require no conversion function. */ current_server_encoding = GetDatabaseEncoding(); - if (current_server_encoding == encoding || current_server_encoding == PG_SQL_ASCII || encoding == PG_SQL_ASCII) { + if (NoNeedToConvert(current_server_encoding, encoding)) { u_sess->mb_cxt.ClientEncoding = &pg_enc2name_tbl[encoding]; u_sess->mb_cxt.ToServerConvProc = NULL; u_sess->mb_cxt.ToClientConvProc = NULL; @@ -277,10 +295,7 @@ unsigned char* pg_do_encoding_conversion(unsigned char* src, int len, int src_en if (!IsTransactionState()) { return src; } - if (src_encoding == dest_encoding) { - return src; - } - if (src_encoding == PG_SQL_ASCII || dest_encoding == PG_SQL_ASCII) { + if (NoNeedToConvert(src_encoding, dest_encoding)) { return src; } if (len <= 0) { @@ -673,7 +688,8 @@ char* pg_any_to_server(const char* s, int len, int encoding) bulkload_illegal_chars_conversion = true; } - if (encoding == u_sess->mb_cxt.DatabaseEncoding->encoding || encoding == PG_SQL_ASCII) { + if (encoding == u_sess->mb_cxt.DatabaseEncoding->encoding || encoding == PG_SQL_ASCII || + (encoding == PG_GB18030 && u_sess->mb_cxt.DatabaseEncoding->encoding == PG_GB18030_2022)) { /* * No conversion is needed, but we must still validate the data. */ diff --git a/src/common/backend/utils/mb/wchar.cpp b/src/common/backend/utils/mb/wchar.cpp index be064bf2c9c9493b7e4dcec5498d0793dd937078..3d95322a01bed897dae3d1fb9cdb07c842e25101 100644 --- a/src/common/backend/utils/mb/wchar.cpp +++ b/src/common/backend/utils/mb/wchar.cpp @@ -1990,6 +1990,12 @@ pg_wchar_tbl pg_wchar_table[] = { pg_gb18030_dsplen, pg_gb18030_verifier, 4}, /* PG_GB18030 */ + {pg_gb180302wchar_with_len, + pg_wchar2gb18030_with_len, + pg_gb18030_mblen, + pg_gb18030_dsplen, + pg_gb18030_verifier, + 4}, /* PG_GB18030_2022 */ {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2}, /* PG_SJIS */ {0, 0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2}, /* PG_BIG5 */ {0, 0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifier, 2}, /* PG_UHC */ diff --git a/src/common/backend/utils/misc/guc.cpp b/src/common/backend/utils/misc/guc.cpp index 87f65f653b490e5ea6bc52e8be15f1c8ba1569d4..8c382b3761e85cc6abb42e62dc7cd1254f62e6c5 100755 --- a/src/common/backend/utils/misc/guc.cpp +++ b/src/common/backend/utils/misc/guc.cpp @@ -174,6 +174,7 @@ #include "utils/guc_resource.h" #include "utils/mem_snapshot.h" #include "nodes/parsenodes_common.h" +#include "mb/pg_wchar.h" #ifndef PG_KRB_SRVTAB #define PG_KRB_SRVTAB "" @@ -8306,6 +8307,9 @@ static void set_config_sourcefile(const char* name, char* sourcefile, int source */ void SetConfigOption(const char* name, const char* value, GucContext context, GucSource source) { + if (strcmp(name, "client_encoding") == 0 && pg_char_to_encoding(value) == PG_GB18030_2022) { + value = "gb18030"; + } (void)set_config_option(name, value, context, source, GUC_ACTION_SET, true, 0); } diff --git a/src/common/backend/utils/misc/guc/guc_storage.cpp b/src/common/backend/utils/misc/guc/guc_storage.cpp index ca3561f28f8142260806ac34574c210cbb978579..f3e3be89cc54b7b7dd3c4fd7ee81012c96fed3ac 100755 --- a/src/common/backend/utils/misc/guc/guc_storage.cpp +++ b/src/common/backend/utils/misc/guc/guc_storage.cpp @@ -3263,6 +3263,48 @@ static void InitStorageConfigureNamesInt() NULL, NULL, NULL}, + {{"standby_recycle_interval", + PGC_SIGHUP, + NODE_ALL, + RESOURCES_RECOVERY, + gettext_noop("Sets the maximum wait time to recycle."), + NULL, + GUC_UNIT_S}, + &g_instance.attr.attr_storage.standby_recycle_interval, + 10, /* 10s */ + 0, + 3600 * 24, /* 24hour */ + NULL, + NULL, + NULL}, + {{"standby_max_query_time", + PGC_SIGHUP, + NODE_ALL, + RESOURCES_RECOVERY, + gettext_noop("Sets the maximum time allowed for query on standby."), + NULL, + GUC_UNIT_S}, + &g_instance.attr.attr_storage.standby_max_query_time, + 600, /* 10min */ + 0, + 3600 * 24, /* 24hour */ + NULL, + NULL, + NULL}, + {{"base_page_saved_interval", + PGC_POSTMASTER, + NODE_ALL, + RESOURCES_RECOVERY, + gettext_noop("Save a base page every time the page redo as many xlogs as the parameter value."), + NULL, + 0}, + &g_instance.attr.attr_storage.base_page_saved_interval, + 400, + 5, + 2000, + NULL, + NULL, + NULL}, {{"force_promote", PGC_POSTMASTER, NODE_ALL, @@ -3891,6 +3933,19 @@ static void InitStorageConfigureNamesReal() NULL, NULL, NULL}, + {{"standby_force_recyle_ratio", + PGC_SIGHUP, + NODE_ALL, + RESOURCES_RECOVERY, + gettext_noop("Sets the ratio that triggers forced recycling in extreme-rto standby read."), + NULL}, + &g_instance.attr.attr_storage.standby_force_recyle_ratio, + 0.8, + 0.0, + 1.0, + NULL, + NULL, + NULL}, {{"bypass_dram", PGC_SIGHUP, NODE_ALL, @@ -4041,6 +4096,32 @@ static void InitStorageConfigureNamesInt64() NULL, NULL, NULL}, + {{"max_standby_base_page_size", + PGC_POSTMASTER, + NODE_ALL, + RESOURCES_RECOVERY, + gettext_noop("Sets the max size of base page files on standby"), + NULL}, + &g_instance.attr.attr_storage.max_standby_base_page_size, + INT64CONST(0x4000000000), /* 256GB */ + INT64CONST(0), + INT64CONST(0x7FFFFFFFFFFFFFF), + NULL, + NULL, + NULL}, + {{"max_standby_lsn_info_size", + PGC_POSTMASTER, + NODE_ALL, + RESOURCES_RECOVERY, + gettext_noop("Sets the max size of lsn info files on standby"), + NULL}, + &g_instance.attr.attr_storage.max_standby_lsn_info_size, + INT64CONST(0x4000000000), /* 256GB */ + INT64CONST(0), + INT64CONST(0x7FFFFFFFFFFFFFF), + NULL, + NULL, + NULL}, /* End-of-list marker */ {{NULL, (GucContext)0, diff --git a/src/common/backend/utils/time/snapmgr.cpp b/src/common/backend/utils/time/snapmgr.cpp index ca405605d2aff0a3ec8daaf4dfb190e63d87eac4..3a288e254c3d1e6fc2200e02151f78b10b3bbdd5 100644 --- a/src/common/backend/utils/time/snapmgr.cpp +++ b/src/common/backend/utils/time/snapmgr.cpp @@ -1062,6 +1062,9 @@ static void SnapshotResetXmin(void) t_thrd.proc->snapCSN = InvalidCommitSeqNo; t_thrd.pgxact->csn_min = InvalidCommitSeqNo; t_thrd.pgxact->csn_dr = InvalidCommitSeqNo; + + t_thrd.proc->exrto_read_lsn = 0; + t_thrd.proc->exrto_gen_snap_time = 0; } } diff --git a/src/gausskernel/CMakeLists.txt b/src/gausskernel/CMakeLists.txt index 29fa43a5ce2e54dea57f02ff8633d99477b7a662..21b9e9e44fba85c31eff9fd6ac5c5f3601d245c5 100755 --- a/src/gausskernel/CMakeLists.txt +++ b/src/gausskernel/CMakeLists.txt @@ -179,6 +179,7 @@ list(APPEND gaussdb_objects $ $ $ + $ $ $ $ diff --git a/src/gausskernel/optimizer/commands/dbcommands.cpp b/src/gausskernel/optimizer/commands/dbcommands.cpp index df588ce4d52cdc314e3206179ffe4f64178ab50d..631a9683687c2550fc908c68d3fc9c3d835d83cc 100644 --- a/src/gausskernel/optimizer/commands/dbcommands.cpp +++ b/src/gausskernel/optimizer/commands/dbcommands.cpp @@ -33,6 +33,8 @@ #include "access/xloginsert.h" #include "access/xlogutils.h" #include "access/multixact.h" +#include "access/multi_redo_api.h" +#include "access/extreme_rto/standby_read/block_info_meta.h" #include "catalog/catalog.h" #include "catalog/dependency.h" #include "catalog/indexing.h" @@ -267,6 +269,10 @@ Oid createdb(const CreatedbStmt* stmt) if (encoding < 0) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("%s is not a valid encoding name", encoding_name))); + if (t_thrd.proc->workingVersionNum < GB18030_2022_VERSION_NUM && encoding == PG_GB18030_2022) { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Not support to create database encoding %s in upgrade!", encoding_name))); + } } else ereport(ERROR, (errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE), @@ -783,7 +789,8 @@ void check_encoding_locale_matches(int encoding, const char* collate, const char #ifdef WIN32 encoding == PG_UTF8 || #endif - (encoding == PG_SQL_ASCII && superuser()))) + (encoding == PG_SQL_ASCII && superuser() || + (encoding == PG_GB18030_2022 && ctype_encoding == PG_GB18030)))) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("encoding \"%s\" does not match locale \"%s\"", pg_encoding_to_char(encoding), ctype), @@ -794,7 +801,8 @@ void check_encoding_locale_matches(int encoding, const char* collate, const char #ifdef WIN32 encoding == PG_UTF8 || #endif - (encoding == PG_SQL_ASCII && superuser()))) + (encoding == PG_SQL_ASCII && superuser() || + (encoding == PG_GB18030_2022 && collate_encoding == PG_GB18030)))) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("encoding \"%s\" does not match locale \"%s\"", pg_encoding_to_char(encoding), collate), @@ -2434,7 +2442,10 @@ void do_db_drop(Oid dbId, Oid tbSpcId) if (!rmtree(dst_path, true)) { ereport(WARNING, (errmsg("some useless files may be left behind in old database directory \"%s\"", dst_path))); } - + if (IS_EXRTO_READ) { + /* remove file start with {db_id}_ */ + extreme_rto_standby_read::remove_block_meta_info_files_of_db(dbId); + } if (InHotStandby) { /* * Release locks prior to commit. XXX There is a race condition diff --git a/src/gausskernel/optimizer/commands/vacuumlazy.cpp b/src/gausskernel/optimizer/commands/vacuumlazy.cpp index 6593883faf0fbd1fddc325f4d4e36ef23b43e98f..75792c5cd16ceb978bba61a4568dc29f66173c57 100644 --- a/src/gausskernel/optimizer/commands/vacuumlazy.cpp +++ b/src/gausskernel/optimizer/commands/vacuumlazy.cpp @@ -2057,9 +2057,9 @@ lazy_truncate_heap(Relation onerel, VacuumStmt *vacstmt, LVRelStats *vacrelstats */ if (RelationIsPartition(onerel)) { Assert(vacstmt->onepart && vacstmt->onepartrel); - PartitionTruncate(vacstmt->onepartrel, vacstmt->onepart, new_rel_pages); + PartitionTruncate(vacstmt->onepartrel, vacstmt->onepart, new_rel_pages, vacrelstats->latestRemovedXid); } else { - RelationTruncate(onerel, new_rel_pages); + RelationTruncate(onerel, new_rel_pages, vacrelstats->latestRemovedXid); } /* diff --git a/src/gausskernel/optimizer/commands/variable.cpp b/src/gausskernel/optimizer/commands/variable.cpp index 5f5acd1d7c40f6fd585956d4aef6c17dfc53de19..bd1cb4a994c23f480e208406176d1f4bebc97fce 100644 --- a/src/gausskernel/optimizer/commands/variable.cpp +++ b/src/gausskernel/optimizer/commands/variable.cpp @@ -762,7 +762,7 @@ bool check_client_encoding(char** newval, void** extra, GucSource source) /* Look up the encoding by name */ encoding = pg_valid_client_encoding(*newval); - if (encoding < 0) { + if (encoding < 0 || encoding == PG_GB18030_2022) { return false; } diff --git a/src/gausskernel/process/postmaster/postmaster.cpp b/src/gausskernel/process/postmaster/postmaster.cpp index ea5d789a776becb114182cb563d622b0647ab184..c85ca2f44778acbfa40282eb1e354180b829d14b 100644 --- a/src/gausskernel/process/postmaster/postmaster.cpp +++ b/src/gausskernel/process/postmaster/postmaster.cpp @@ -80,6 +80,7 @@ #endif #include "access/cbmparsexlog.h" +#include "access/extreme_rto/standby_read.h" #include "access/obs/obs_am.h" #include "access/transam.h" #include "access/ustore/undo/knl_uundoapi.h" @@ -233,6 +234,7 @@ #include "access/multi_redo_api.h" #include "postmaster/postmaster.h" #include "access/parallel_recovery/dispatcher.h" +#include "access/extreme_rto/standby_read/standby_read_base.h" #include "utils/distribute_test.h" #ifdef ENABLE_MULTIPLE_NODES #include "tsdb/compaction/compaction_entry.h" @@ -313,6 +315,8 @@ extern void gs_set_hs_shm_data(HaShmemData* ha_shm_data); extern void ReaperBackendMain(); extern void AdjustThreadAffinity(); +extern void exrto_standby_read_init(); + #define EXTERN_SLOTS_NUM 17 volatile PMState pmState = PM_INIT; bool dummyStandbyMode = false; @@ -3090,10 +3094,7 @@ int PostmasterMain(int argc, char* argv[]) /* init sharestorge(dorado) */ ShareStorageInit(); - - /* - * We're ready to rock and roll... - */ + exrto_standby_read_init(); if (ENABLE_DMS && ENABLE_REFORM) { if (!DMSWaitInitStartup()) { if (g_instance.pid_cxt.StartupPID == 0) { @@ -3347,10 +3348,10 @@ static void CheckExtremeRtoGUCConflicts(void) } #ifndef ENABLE_MULTIPLE_NODES - if ((g_instance.attr.attr_storage.recovery_parse_workers > 1) && g_instance.attr.attr_storage.EnableHotStandby) { + if (IS_DISASTER_RECOVER_MODE &&(g_instance.attr.attr_storage.recovery_parse_workers > 1) && g_instance.attr.attr_storage.EnableHotStandby) { ereport(ERROR, (errcode(ERRCODE_SYSTEM_ERROR), - errmsg("extreme rto could not support hot standby."), + errmsg("For disaster standby cluster, extreme rto could not support hot standby."), errhint("Either turn off extreme rto, or turn off hot_standby."))); } #endif @@ -4313,7 +4314,7 @@ static int ServerLoop(void) if (g_instance.attr.attr_storage.enable_ustore && g_instance.pid_cxt.GlobalStatsPID == 0 && - pmState == PM_RUN) { + (pmState == PM_RUN || pmState == PM_HOT_STANDBY)) { g_instance.pid_cxt.GlobalStatsPID = initialize_util_thread(GLOBALSTATS_THREAD); } @@ -5094,7 +5095,7 @@ int ProcessStartupPacket(Port* port, bool SSLdone) } else { #ifdef ENABLE_MULTIPLE_NODES if (STANDBY_MODE == hashmdata->current_mode && (!IS_MULTI_DISASTER_RECOVER_MODE || GTM_FREE_MODE || - g_instance.attr.attr_storage.recovery_parse_workers > 1)) { + (IS_PGXC_DATANODE && !g_instance.attr.attr_storage.EnableHotStandby))) { ereport(ERROR, (errcode(ERRCODE_CANNOT_CONNECT_NOW), errmsg("can not accept connection in standby mode."))); } @@ -5868,6 +5869,10 @@ static void SIGHUP_handler(SIGNAL_ARGS) signal_child(g_instance.pid_cxt.UndoRecyclerPID, SIGHUP); } + if (g_instance.pid_cxt.exrto_recycler_pid != 0) { + signal_child(g_instance.pid_cxt.exrto_recycler_pid, SIGHUP); + } + if (g_instance.pid_cxt.GlobalStatsPID != 0) { signal_child(g_instance.pid_cxt.GlobalStatsPID, SIGHUP); } @@ -6959,7 +6964,11 @@ static void reaper(SIGNAL_ARGS) if (g_instance.pid_cxt.CBMWriterPID == 0 && !dummyStandbyMode && u_sess->attr.attr_storage.enable_cbm_tracking) - g_instance.pid_cxt.CBMWriterPID = initialize_util_thread(CBMWRITER); + + + if (IS_EXRTO_READ && g_instance.pid_cxt.exrto_recycler_pid == 0) { + g_instance.pid_cxt.exrto_recycler_pid = initialize_util_thread(EXRTO_RECYCLER); + } /* * Likewise, start other special children as needed. In a restart @@ -7746,6 +7755,15 @@ static void reaper(SIGNAL_ARGS) continue; } + if (pid == g_instance.pid_cxt.exrto_recycler_pid) { + g_instance.pid_cxt.exrto_recycler_pid = 0; + + if (!EXIT_STATUS_0(exitstatus)) { + HandleChildCrash(pid, exitstatus, _("Exrto recycle process")); + } + continue; + } + if (get_real_recovery_parallelism() > 1) { PageRedoExitStatus pageredoStatus = CheckExitPageWorkers(pid); if (pageredoStatus == PAGE_REDO_THREAD_EXIT_NORMAL) { @@ -8328,6 +8346,7 @@ static void AsssertAllChildThreadExit() Assert(g_instance.pid_cxt.CommPoolerCleanPID == 0); Assert(g_instance.pid_cxt.UndoLauncherPID == 0); Assert(g_instance.pid_cxt.UndoRecyclerPID == 0); + Assert(g_instance.pid_cxt.exrto_recycler_pid == 0); #ifndef ENABLE_MULTIPLE_NODES Assert(g_instance.pid_cxt.ApplyLauncerPID == 0); #endif @@ -8401,7 +8420,7 @@ static void PostmasterStateMachine(void) #endif /* ENABLE_MULTIPLE_NODES */ g_instance.pid_cxt.UndoLauncherPID == 0 && g_instance.pid_cxt.UndoRecyclerPID == 0 && - g_instance.pid_cxt.GlobalStatsPID == 0 && + g_instance.pid_cxt.exrto_recycler_pid == 0 && g_instance.pid_cxt.GlobalStatsPID == 0 && #ifndef ENABLE_MULTIPLE_NODES g_instance.pid_cxt.ApplyLauncerPID == 0 && #endif @@ -8619,6 +8638,7 @@ static void PostmasterStateMachine(void) hashmdata = t_thrd.postmaster_cxt.HaShmData; hashmdata->current_mode = cur_mode; NotifyGscHotStandby(); + exrto_standby_read_init(); g_instance.pid_cxt.StartupPID = initialize_util_thread(STARTUP); Assert(g_instance.pid_cxt.StartupPID != 0); pmState = PM_STARTUP; @@ -8662,6 +8682,7 @@ static void PostmasterStateMachine(void) PMUpdateDBState(STARTING_STATE, get_cur_mode(), get_cur_repl_num()); } + exrto_standby_read_init(); g_instance.pid_cxt.StartupPID = initialize_util_thread(STARTUP); Assert(g_instance.pid_cxt.StartupPID != 0); pmState = PM_STARTUP; @@ -13197,6 +13218,21 @@ bool PMstateIsRun(void) return PM_RUN == pmState; } +bool pm_state_is_startup() +{ + return (pmState == PM_STARTUP); +} + +bool pm_state_is_recovery() +{ + return (pmState == PM_RECOVERY); +} + +bool pm_state_is_hot_standby() +{ + return (pmState == PM_HOT_STANDBY); +} + /* malloc api of cJSON at backend side */ static void* cJSON_internal_malloc(size_t size) { @@ -13296,6 +13332,9 @@ static void SetAuxType() case SHARE_STORAGE_XLOG_COPYER: t_thrd.bootstrap_cxt.MyAuxProcType = XlogCopyBackendProcess; break; + case EXRTO_RECYCLER: + t_thrd.bootstrap_cxt.MyAuxProcType = ExrtoRecyclerProcess; + break; #ifdef ENABLE_MULTIPLE_NODES case BARRIER_PREPARSE: t_thrd.bootstrap_cxt.MyAuxProcType = BarrierPreParseBackendProcess; @@ -13589,6 +13628,10 @@ int GaussDbAuxiliaryThreadMain(knl_thread_arg* arg) SharedStorageXlogCopyBackendMain(); proc_exit(1); break; + case EXRTO_RECYCLER: + extreme_rto::exrto_recycle_main(); + proc_exit(1); + break; #ifdef ENABLE_MULTIPLE_NODES case BARRIER_PREPARSE: BarrierPreParseMain(); @@ -13846,6 +13889,7 @@ int GaussDbThreadMain(knl_thread_arg* arg) case PAGEREPAIR_THREAD: case HEARTBEAT: case SHARE_STORAGE_XLOG_COPYER: + case EXRTO_RECYCLER: #ifdef ENABLE_MULTIPLE_NODES case BARRIER_PREPARSE: case TS_COMPACTION: @@ -14399,6 +14443,7 @@ static ThreadMetaData GaussdbThreadGate[] = { { GaussDbThreadMain, APPLY_WORKER, "applyworker", "apply worker" }, { GaussDbThreadMain, STACK_PERF_WORKER, "stack_perf", "stack perf worker" }, { GaussDbThreadMain, DMS_AUXILIARY_THREAD, "dms_auxiliary", "maintenance xmin in dms" }, + { GaussDbThreadMain, EXRTO_RECYCLER, "exrtorecycler", "exrto recycler" }, /* Keep the block in the end if it may be absent !!! */ #ifdef ENABLE_MULTIPLE_NODES diff --git a/src/gausskernel/process/threadpool/knl_instance.cpp b/src/gausskernel/process/threadpool/knl_instance.cpp index 216f6f21d843baf7d149a5e1a8e49a0b4d4f4ef6..92ef81cb28b71e75e5328ed2bb005172163e5202 100755 --- a/src/gausskernel/process/threadpool/knl_instance.cpp +++ b/src/gausskernel/process/threadpool/knl_instance.cpp @@ -324,7 +324,7 @@ static void knl_g_parallel_redo_init(knl_g_parallel_redo_context* predo_cxt) rc = memset_s(&predo_cxt->redoCpuBindcontrl, sizeof(RedoCpuBindControl), 0, sizeof(RedoCpuBindControl)); securec_check(rc, "", ""); - + predo_cxt->global_recycle_lsn = InvalidXLogRecPtr; predo_cxt->redoItemHash = NULL; } @@ -501,6 +501,7 @@ static void KnlGUndoInit(knl_g_undo_context *undoCxt) undoCxt->undoChainTotalSize = 0; undoCxt->globalFrozenXid = InvalidTransactionId; undoCxt->globalRecycleXid = InvalidTransactionId; + undoCxt->is_exrto_residual_undo_file_recycled = false; } static void knl_g_flashback_init(knl_g_flashback_context *flashbackCxt) diff --git a/src/gausskernel/process/threadpool/knl_thread.cpp b/src/gausskernel/process/threadpool/knl_thread.cpp index c321ac6e10262978510439ae999c424de1f32888..3c26794b790f118bdbf55a6ff30c5ed3611e9dc7 100755 --- a/src/gausskernel/process/threadpool/knl_thread.cpp +++ b/src/gausskernel/process/threadpool/knl_thread.cpp @@ -960,6 +960,15 @@ static void knl_t_page_redo_init(knl_t_page_redo_context* page_redo_cxt) page_redo_cxt->got_SIGHUP = false; page_redo_cxt->sleep_long = false; page_redo_cxt->check_repair = false; + page_redo_cxt->redo_worker_ptr = NULL; +} + +static void knl_t_exrto_recycle_init(knl_t_exrto_recycle_context* exrto_recycle_cxt) +{ + exrto_recycle_cxt->shutdown_requested = false; + exrto_recycle_cxt->got_SIGHUP = false; + exrto_recycle_cxt->lsn_info.lsn_num = 0; + exrto_recycle_cxt->lsn_info.lsn_array = NULL; } static void knl_t_parallel_decode_init(knl_t_parallel_decode_worker_context* parallel_decode_cxt) @@ -1315,7 +1324,9 @@ static void knl_t_storage_init(knl_t_storage_context* storage_cxt) storage_cxt->BackendWritebackContext = (WritebackContext*)palloc0(sizeof(WritebackContext)); storage_cxt->SharedBufHash = NULL; storage_cxt->InProgressBuf = NULL; + storage_cxt->ParentInProgressBuf = NULL; storage_cxt->IsForInput = false; + storage_cxt->ParentIsForInput = false; storage_cxt->PinCountWaitBuf = NULL; storage_cxt->InProgressAioDispatch = NULL; storage_cxt->InProgressAioDispatchCount = 0; @@ -1887,6 +1898,7 @@ void knl_thread_init(knl_thread_role role) knl_t_pencentile_init(&t_thrd.percentile_cxt); knl_t_perf_snap_init(&t_thrd.perf_snap_cxt); knl_t_page_redo_init(&t_thrd.page_redo_cxt); + knl_t_exrto_recycle_init(&t_thrd.exrto_recycle_cxt); knl_t_parallel_decode_init(&t_thrd.parallel_decode_cxt); knl_t_parallel_decode_reader_init(&t_thrd.logicalreadworker_cxt); knl_t_heartbeat_init(&t_thrd.heartbeat_cxt); @@ -1952,6 +1964,7 @@ void RedoInterruptCallBack() Assert(!AmStartupProcess()); Assert(!AmPageRedoWorker()); + Assert(!AmErosRecyclerProcess()); } void RedoPageRepairCallBack(RepairBlockKey key, XLogPhyBlock pblk) diff --git a/src/gausskernel/storage/access/redo/CMakeLists.txt b/src/gausskernel/storage/access/redo/CMakeLists.txt index 16abb90f353806000e563b281f93d811b2918592..22cecf9f2485bc1fb6da4a82189c6e231195bd54 100755 --- a/src/gausskernel/storage/access/redo/CMakeLists.txt +++ b/src/gausskernel/storage/access/redo/CMakeLists.txt @@ -1,4 +1,5 @@ #This is the main CMAKE for build bin. +add_subdirectory(standby_read) AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} TGT_redo_SRC) set(TGT_redo_INC diff --git a/src/gausskernel/storage/access/redo/Makefile b/src/gausskernel/storage/access/redo/Makefile index 9ec819f8bab42a23b2ce8a2b05972c1175e8ec2a..07f1f8ca6e50158f19a5f3dfef759bf7d945748a 100644 --- a/src/gausskernel/storage/access/redo/Makefile +++ b/src/gausskernel/storage/access/redo/Makefile @@ -22,6 +22,7 @@ #------------------------------------------------------------------------- subdir = src/gausskernel/storage/access/redo +SUBDIRS = standby_read top_builddir = ../../../../.. include $(top_builddir)/src/Makefile.global diff --git a/src/gausskernel/storage/access/redo/redo_dbcommands.cpp b/src/gausskernel/storage/access/redo/redo_dbcommands.cpp index bc8314f42741370da2bfa121cad872b237c68770..4ab0a3ac41abed1164fab1140b43c753d43c0786 100644 --- a/src/gausskernel/storage/access/redo/redo_dbcommands.cpp +++ b/src/gausskernel/storage/access/redo/redo_dbcommands.cpp @@ -86,6 +86,7 @@ XLogRecParseState *DbaseRedoParseToBlock(XLogReaderState *record, uint32 *blockn if ((info == XLOG_DBASE_CREATE) || (info == XLOG_DBASE_DROP)) { recordstatehead = DatabaseXlogCommonParseToBlock(record, blocknum); + recordstatehead->isFullSync = record->isFullSync; } else { ereport(PANIC, (errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE), errmsg("DbaseRedoParseToBlock: unknown op code %u", info))); diff --git a/src/gausskernel/storage/access/redo/redo_storage.cpp b/src/gausskernel/storage/access/redo/redo_storage.cpp index 35aa13e0aabf9717e03568ba924cd831d3fc4dfa..73a25fbeceb99e5b04937c2479913db29d5f1de9 100644 --- a/src/gausskernel/storage/access/redo/redo_storage.cpp +++ b/src/gausskernel/storage/access/redo/redo_storage.cpp @@ -68,7 +68,7 @@ XLogRecParseState *smgr_xlog_relnode_parse_to_block(XLogReaderState *record, uin XLogRecSetBlockCommonState(record, BLOCK_DATA_DDL_TYPE, filenode, recordstatehead); XLogRecSetBlockDdlState(&(recordstatehead->blockparse.extra_rec.blockddlrec), ddltype, - (char *)XLogRecGetData(record), 1, compress); + (char *)XLogRecGetData(record), 1, compress, XLogRecGetDataLen(record)); return recordstatehead; } diff --git a/src/gausskernel/storage/access/redo/redo_visibilitymap.cpp b/src/gausskernel/storage/access/redo/redo_visibilitymap.cpp index 0f3c428833f221e4e3d0fb5a6f34cb0f0b5a4985..753b99d7ba4e98e9776de180cdc17346381c1d7d 100644 --- a/src/gausskernel/storage/access/redo/redo_visibilitymap.cpp +++ b/src/gausskernel/storage/access/redo/redo_visibilitymap.cpp @@ -49,6 +49,7 @@ bool visibilitymap_clear_page(Page mapPage, BlockNumber heapBlk) void visibilitymap_clear_buffer(RedoBufferInfo *bufferInfo, BlockNumber heapBlk) { if (visibilitymap_clear_page(bufferInfo->pageinfo.page, heapBlk)) { + PageSetLSN(bufferInfo->pageinfo.page, bufferInfo->lsn, false); MakeRedoBufferDirty(bufferInfo); } } diff --git a/src/gausskernel/storage/access/redo/redo_xlogutils.cpp b/src/gausskernel/storage/access/redo/redo_xlogutils.cpp index 0fb813d5b503a3296c0ef27ceb938b245907640d..7bfcbaf22d5337b1a7ce539f999fcf4701356104 100644 --- a/src/gausskernel/storage/access/redo/redo_xlogutils.cpp +++ b/src/gausskernel/storage/access/redo/redo_xlogutils.cpp @@ -50,6 +50,9 @@ #include "access/ustore/knl_uextremeredo.h" #include "commands/dbcommands.h" +#include "access/extreme_rto/standby_read/block_info_meta.h" +#include "access/extreme_rto/batch_redo.h" +#include "access/extreme_rto/page_redo.h" #include "access/twophase.h" #include "access/redo_common.h" #include "ddes/dms/ss_dms_bufmgr.h" @@ -805,11 +808,12 @@ void XLogUpdateCopyedBlockState(XLogRecParseState *recordblockstate, XLogBlockPa } void XLogRecSetBlockDdlState(XLogBlockDdlParse *blockddlstate, uint32 blockddltype, char *mainData, - int rels, bool compress) + int rels, bool compress, uint32 mainDataLen) { Assert(blockddlstate != NULL); blockddlstate->blockddltype = blockddltype; blockddlstate->rels = rels; + blockddlstate->mainDataLen = mainDataLen; blockddlstate->mainData = mainData; blockddlstate->compress = compress; } @@ -1495,9 +1499,14 @@ void XLogBlockDdlDoSmgrAction(XLogBlockHead *blockhead, void *blockrecbody, Redo case BLOCK_DDL_CREATE_RELNODE: smgr_redo_create(rnode, blockhead->forknum, blockddlrec->mainData); break; - case BLOCK_DDL_TRUNCATE_RELNODE: - xlog_block_smgr_redo_truncate(rnode, blockhead->blkno, blockhead->end_ptr); + case BLOCK_DDL_TRUNCATE_RELNODE: { + TransactionId latest_removed_xid = InvalidTransactionId; + if (blockddlrec->mainDataLen == TRUNCATE_CONTAIN_XID_SIZE) { + latest_removed_xid = ((xl_smgr_truncate_compress*)blockddlrec->mainData)->latest_removed_xid; + } + xlog_block_smgr_redo_truncate(rnode, blockhead->blkno, blockhead->end_ptr, latest_removed_xid); break; + } case BLOCK_DDL_DROP_RELNODE: { bool compress = blockddlrec->compress; ColFileNodeRel *xnodes = (ColFileNodeRel *)blockddlrec->mainData; @@ -1748,26 +1757,38 @@ bool XLogBlockRedoForExtremeRTO(XLogRecParseState *redoblocktate, RedoBufferInfo } bool checkvalid = XLogBlockRefreshRedoBufferInfo(blockhead, bufferinfo); - if (!checkvalid) { + if (unlikely(!checkvalid)) { ereport(PANIC, (errmsg("XLogBlockRedoForExtremeRTO: redobuffer checkfailed"))); } - if (block_valid <= BLOCK_DATA_FSM_TYPE) { - if (redoaction != BLK_DONE) { - GetRedoStartTime(redoCost); - Assert(block_valid == g_xlogExtRtoRedoTable[block_valid].block_valid); - g_xlogExtRtoRedoTable[block_valid].xlog_redoextrto(blockhead, blockrecbody, bufferinfo); - CountRedoTime(redoCost); - } -#ifdef USE_ASSERT_CHECKING - if (block_valid != BLOCK_DATA_UNDO_TYPE && !bufferinfo->pageinfo.ignorecheck) { - DoRecordCheck(redoblocktate, PageGetLSN(bufferinfo->pageinfo.page), true); - } -#endif - AddReadBlock(redoblocktate, (u_sess->instr_cxt.pg_buffer_usage->shared_blks_read - readcount)); - } else { + + if (unlikely(block_valid > BLOCK_DATA_FSM_TYPE)) { ereport(WARNING, (errmsg("XLogBlockRedoForExtremeRTO: unsuport type %u, lsn %X/%X", (uint32)block_valid, (uint32)(blockhead->end_ptr >> 32), (uint32)(blockhead->end_ptr)))); + return false; + } + + if ((block_valid != BLOCK_DATA_UNDO_TYPE) && g_instance.attr.attr_storage.EnableHotStandby && + XLByteLT(PageGetLSN(bufferinfo->pageinfo.page), blockhead->end_ptr)) { + BufferTag buf_tag; + INIT_BUFFERTAG(buf_tag, bufferinfo->blockinfo.rnode, + bufferinfo->blockinfo.forknum, bufferinfo->blockinfo.blkno); + extreme_rto_standby_read::insert_lsn_to_block_info(&extreme_rto::g_redoWorker->standby_read_meta_info, buf_tag, + bufferinfo->pageinfo.page, blockhead->start_ptr); + } + + if (redoaction != BLK_DONE) { + GetRedoStartTime(redoCost); + Assert(block_valid == g_xlogExtRtoRedoTable[block_valid].block_valid); + g_xlogExtRtoRedoTable[block_valid].xlog_redoextrto(blockhead, blockrecbody, bufferinfo); + CountRedoTime(redoCost); + } +#ifdef USE_ASSERT_CHECKING + if (block_valid != BLOCK_DATA_UNDO_TYPE && !bufferinfo->pageinfo.ignorecheck) { + DoRecordCheck(redoblocktate, PageGetLSN(bufferinfo->pageinfo.page), true); } +#endif + AddReadBlock(redoblocktate, (u_sess->instr_cxt.pg_buffer_usage->shared_blks_read - readcount)); + return false; } @@ -1865,6 +1886,119 @@ void XLogBlockDispatchForExtermeRTO(XLogRecParseState *recordblockstate) } while (nextstate != NULL); } +bool find_target_state(XLogRecParseState *state_iter, const RedoBufferTag &target_tag) +{ + RelFileNode n; + uint32 blk; + ForkNumber fork; + extreme_rto::PRXLogRecGetBlockTag(state_iter, &n, &blk, &fork); + if (RelFileNodeEquals(n, target_tag.rnode) && target_tag.blkno == blk && target_tag.forknum == fork) { + return true; + } else { + return false; + } +} + +void wal_block_redo_for_extreme_rto_read(XLogRecParseState *state, RedoBufferInfo *buf_info) +{ + uint16 block_valid; + void *block_rec_body; + XLogBlockHead *block_head; + const int shift_size = 32; + + /* decode blockdata body */ + block_head = &state->blockparse.blockhead; + block_rec_body = &state->blockparse.extra_rec; + block_valid = XLogBlockHeadGetValidInfo(block_head); + + bool check_valid = XLogBlockRefreshRedoBufferInfo(block_head, buf_info); + if (!check_valid) { + ereport(ERROR, (errmsg("wal_block_redo_for_extreme_rto: redobuffer checkfailed"))); + } + if (block_valid <= BLOCK_DATA_FSM_TYPE) { + Assert(block_valid == g_xlogExtRtoRedoTable[block_valid].block_valid); + g_xlogExtRtoRedoTable[block_valid].xlog_redoextrto(block_head, block_rec_body, buf_info); + } else { + ereport(ERROR, (errmsg("wal_block_redo_for_extreme_rto: unsuport type %u, lsn %X/%X", (uint32)block_valid, + (uint32)(block_head->end_ptr >> shift_size), (uint32)(block_head->end_ptr)))); + } +} + +void init_redo_buffer_info(RedoBufferInfo *rb_info, const BufferTag &buf_tag, Buffer buf) +{ + rb_info->lsn = InvalidXLogRecPtr; + rb_info->buf = buf; + rb_info->blockinfo.rnode = buf_tag.rnode; + rb_info->blockinfo.forknum = buf_tag.forkNum; + rb_info->blockinfo.blkno = buf_tag.blockNum; + rb_info->blockinfo.pblk.block = InvalidBlockNumber; + rb_info->blockinfo.pblk.lsn = InvalidXLogRecPtr; + rb_info->blockinfo.pblk.relNode = InvalidOid; + rb_info->pageinfo.page = BufferGetPage(buf); + rb_info->pageinfo.pagesize = BufferGetPageSize(buf); +#ifdef USE_ASSERT_CHECKING + rb_info->pageinfo.ignorecheck = false; /* initial value */ +#endif + rb_info->dirtyflag = false; /* initial value, actually, dirtyflag is useless in extreme RTO read */ +} + +void redo_target_page(const BufferTag &buf_tag, StandbyReadLsnInfoArray *lsn_info, Buffer base_page_buf) +{ + char *error_msg = NULL; + RedoParseManager redo_pm; + + XLogReaderState *xlog_reader = XLogReaderAllocate(&read_local_xlog_page, NULL); + /* do we need register interrupt func here? like ProcessConfigFile */ + XLogParseBufferInitFunc(&redo_pm, MAX_BUFFER_NUM_PER_WAL_RECORD, NULL, NULL); + if (xlog_reader == NULL) { + ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"), + errdetail("Failed while allocating an XLog reading processor."))); + } + + RedoBufferInfo buf_info; + init_redo_buffer_info(&buf_info, buf_tag, base_page_buf); + for (uint32 i = 0; i < lsn_info->lsn_num; i++) { + XLogRecord *record = XLogReadRecord(xlog_reader, lsn_info->lsn_array[i], &error_msg); + if (record == NULL) { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not read two-phase state from xlog at %X/%X, errormsg: %s", + (uint32)(lsn_info->lsn_array[i] >> LSN_MOVE32), (uint32)(lsn_info->lsn_array[i]), + error_msg ? error_msg : " "))); + } + + uint32 num = 0; + XLogRecParseState *state = XLogParseToBlockCommonFunc(xlog_reader, &num); + + if (num == 0) { + ereport(ERROR, (errmsg("internal error, xlog in lsn %X/%X doesn't contain any block.", + (uint32)(lsn_info->lsn_array[i] >> LSN_MOVE32), (uint32)(lsn_info->lsn_array[i])))); + } + + if (state == NULL) { + ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"), + errdetail("Failed while wal parse to block."))); + } + XLogRecParseState *state_iter = state; + while (state_iter != NULL) { + if (find_target_state(state_iter, buf_info.blockinfo)) { + break; + } + state_iter = (XLogRecParseState *)(state_iter->nextrecord); + } + if (state_iter == NULL) { + ereport(ERROR, (errmsg("internal error, xlog in lsn %X/%X doesn't contain target block.", + (uint32)(lsn_info->lsn_array[i] >> LSN_MOVE32), (uint32)(lsn_info->lsn_array[i])))); + } + buf_info.lsn = state_iter->blockparse.blockhead.end_ptr; + buf_info.blockinfo.pblk = state_iter->blockparse.blockhead.pblk; + wal_block_redo_for_extreme_rto_read(state_iter, &buf_info); + XLogBlockParseStateRelease(state); + } + + XLogReaderFree(xlog_reader); + XLogParseBufferDestoryFunc(&redo_pm); +} + #ifdef EXTREME_RTO_DEBUG_AB void DoThreadExit() { diff --git a/src/gausskernel/storage/access/redo/standby_read/CMakeLists.txt b/src/gausskernel/storage/access/redo/standby_read/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..7f8959afaf1610c56a36744a4550818089951a54 --- /dev/null +++ b/src/gausskernel/storage/access/redo/standby_read/CMakeLists.txt @@ -0,0 +1,23 @@ +#This is the main CMAKE for build bin. + + +AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} TGT_standby_read_SRC) + +set(TGT_standby_read_INC + ${PROJECT_SRC_DIR}/tools/log_fdw + ${PROJECT_TRUNK_DIR}/distribute/bin/gds + ${PROJECT_SRC_DIR}/include/iprange + ${PROJECT_SRC_DIR}/include/libcomm + ${PROJECT_SRC_DIR}/include + ${PROJECT_SRC_DIR}/lib/gstrace + ${LIBCGROUP_INCLUDE_PATH} + ${ZLIB_INCLUDE_PATH} + ${LIBCURL_INCLUDE_PATH} + ${LZ4_INCLUDE_PATH} + ${EVENT_INCLUDE_PATH} +) + +set(standby_read_DEF_OPTIONS ${MACRO_OPTIONS}) +set(standby_read_COMPILE_OPTIONS ${OPTIMIZE_OPTIONS} ${OS_OPTIONS} ${PROTECT_OPTIONS} ${WARNING_OPTIONS} ${BIN_SECURE_OPTIONS} ${CHECK_OPTIONS}) +set(standby_read_LINK_OPTIONS ${BIN_LINK_OPTIONS}) +add_static_objtarget(gausskernel_storage_access_redo_standby_read TGT_standby_read_SRC TGT_standby_read_INC "${standby_read_DEF_OPTIONS}" "${standby_read_COMPILE_OPTIONS}" "${standby_read_LINK_OPTIONS}") diff --git a/src/gausskernel/storage/access/redo/standby_read/Makefile b/src/gausskernel/storage/access/redo/standby_read/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..2367845295217cde9fde41c5fdab5b8189d6202c --- /dev/null +++ b/src/gausskernel/storage/access/redo/standby_read/Makefile @@ -0,0 +1,37 @@ +# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# --------------------------------------------------------------------------------------- +# +# Makefile-- +# Makefile for access/psort +# +# IDENTIFICATION +# src/backend/access/psort/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/gausskernel/storage/access/redo/standby_read +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +ifneq "$(MAKECMDGOALS)" "clean" + ifneq "$(MAKECMDGOALS)" "distclean" + ifneq "$(shell which g++ |grep hutaf_llt |wc -l)" "1" + -include $(DEPEND) + endif + endif +endif +OBJS = base_page_proc.o block_info_proc.o lsn_info_double_list.o lsn_info_proc.o standby_read_interface.o + +include $(top_srcdir)/src/gausskernel/common.mk diff --git a/src/gausskernel/storage/access/redo/standby_read/base_page_proc.cpp b/src/gausskernel/storage/access/redo/standby_read/base_page_proc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..51445710641f2cd59fa9cc5c2c42f935b532abf8 --- /dev/null +++ b/src/gausskernel/storage/access/redo/standby_read/base_page_proc.cpp @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * base_page_proc.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/access/redo/standby_read/base_page_proc.cpp + * + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "access/extreme_rto/batch_redo.h" +#include "access/extreme_rto/dispatcher.h" +#include "access/extreme_rto/page_redo.h" +#include "access/extreme_rto/standby_read/standby_read_base.h" +#include "storage/buf/buf_internals.h" + +namespace extreme_rto_standby_read { + +inline RelFileNode make_base_page_relfilenode(uint32 batch_id, uint32 redo_worker_id, BasePagePosition position) +{ + RelFileNode rnode; + rnode.spcNode = EXRTO_BASE_PAGE_SPACE_OID; + rnode.dbNode = (batch_id << LOW_WORKERID_BITS) | redo_worker_id; + rnode.relNode = (uint32)((position / BLCKSZ) >> UINT64_HALF); + rnode.bucketNode = InvalidBktId; + rnode.opt = DefaultFileNodeOpt; + + return rnode; +} + +Buffer buffer_read_base_page(uint32 batch_id, uint32 redo_id, BasePagePosition position, ReadBufferMode mode) +{ + RelFileNode rnode = make_base_page_relfilenode(batch_id, redo_id, position); + BlockNumber blocknum = (BlockNumber)(position / BLCKSZ); + bool hit = false; + SMgrRelation smgr = smgropen(rnode, InvalidBackendId); + Buffer buffer = + ReadBuffer_common(smgr, RELPERSISTENCE_PERMANENT, MAIN_FORKNUM, blocknum, mode, NULL, &hit, NULL); + if (buffer == InvalidBuffer) { + ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), + (errmsg("invalid buffer when read base page, batch_id: %u, redo_worker_id: %u, blocknum: %lu", + batch_id, redo_id, position / BLCKSZ)))); + } + + return buffer; +} + +void generate_base_page(StandbyReadMetaInfo* meta_info, const Page src_page) +{ + BasePagePosition position = meta_info->base_page_next_position; + + Buffer dest_buf = buffer_read_base_page(meta_info->batch_id, meta_info->redo_id, position, RBM_ZERO_AND_LOCK); + + Page dest_page = BufferGetPage(dest_buf); + errno_t rc = memcpy_s(dest_page, BLCKSZ, src_page, BLCKSZ); + securec_check(rc, "\0", "\0"); + MarkBufferDirty(dest_buf); + UnlockReleaseBuffer(dest_buf); + + meta_info->base_page_next_position += BLCKSZ; +} + +void read_base_page(const BufferTag& buf_tag, BasePagePosition position, BufferDesc* dest_buf_desc) +{ + extreme_rto::RedoItemTag redo_item_tag; + const uint32 worker_num_per_mng = extreme_rto::get_page_redo_worker_num_per_manager(); + + /* batch id and worker id start from 1 when reading a page */ + uint32 batch_id = extreme_rto::GetSlotId(buf_tag.rnode, 0, 0, extreme_rto::GetBatchCount()) + 1; + INIT_REDO_ITEM_TAG(redo_item_tag, buf_tag.rnode, buf_tag.forkNum, buf_tag.blockNum); + uint32 redo_worker_id = extreme_rto::GetWorkerId(&redo_item_tag, worker_num_per_mng) + 1; + + Buffer buffer = buffer_read_base_page(batch_id, redo_worker_id, position, RBM_NORMAL); + + LockBuffer(buffer, BUFFER_LOCK_SHARE); + Page src_page = BufferGetPage(buffer); + Size page_size = BufferGetPageSize(buffer); + Page dest_page = (Page)BufHdrGetBlock(dest_buf_desc); + errno_t rc = memcpy_s(dest_page, page_size, src_page, page_size); + securec_check(rc, "\0", "\0"); + UnlockReleaseBuffer(buffer); +} + +void recycle_base_page_file(uint32 batch_id, uint32 redo_id, BasePagePosition recycle_pos) +{ + RelFileNode rnode = make_base_page_relfilenode(batch_id, redo_id, recycle_pos); + SMgrRelation smgr = smgropen(rnode, InvalidBackendId); + + smgrdounlink(smgr, true, (BlockNumber)(recycle_pos / BLCKSZ)); +} + +} // namespace extreme_rto_standby_read + diff --git a/src/gausskernel/storage/access/redo/standby_read/block_info_proc.cpp b/src/gausskernel/storage/access/redo/standby_read/block_info_proc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..356a6687b28c97a1b2af607c9af66d929f3a1855 --- /dev/null +++ b/src/gausskernel/storage/access/redo/standby_read/block_info_proc.cpp @@ -0,0 +1,299 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * block_info_proc.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/recovery/parallel/blocklevel/standby_read/block_info_proc.cpp + * + * ------------------------------------------------------------------------- + */ + +#include +#include "access/extreme_rto/standby_read/block_info_meta.h" +#include "access/extreme_rto/standby_read/lsn_info_meta.h" +#include "storage/smgr/relfilenode.h" + +namespace extreme_rto_standby_read { + +void block_info_page_init(Page page) +{ + static_assert(sizeof(BlockInfoPageHeader) == BLOCK_INFO_HEAD_SIZE, "BlockInfoPageHeader size is not 64 bytes"); + static_assert(sizeof(BlockMetaInfo) == BLOCK_INFO_SIZE, "BlockMetaInfo size is not 64 bytes"); + + BlockInfoPageHeader* page_header = (BlockInfoPageHeader*)page; + errno_t ret = memset_s(page_header, BLCKSZ, 0, BLCKSZ); + securec_check(ret, "", ""); + page_header->flags |= BLOCK_INFO_PAGE_VALID_FLAG; + page_header->version = BLOCK_INFO_PAGE_VERSION; +} + +inline BlockNumber data_block_number_to_meta_page_number(BlockNumber block_num) +{ + return block_num / BLOCK_INFO_NUM_PER_PAGE; +} + +inline uint32 block_info_meta_page_offset(BlockNumber block_num) +{ + return (block_num % BLOCK_INFO_NUM_PER_PAGE) * BLOCK_INFO_SIZE + BLOCK_INFO_HEAD_SIZE; +} + +// get page, just have pin, no lock +BlockMetaInfo* get_block_meta_info_by_relfilenode( + const BufferTag& buf_tag, BufferAccessStrategy strategy, ReadBufferMode mode, Buffer* buffer) +{ + RelFileNode standby_read_rnode = buf_tag.rnode; + standby_read_rnode.spcNode = EXRTO_BLOCK_INFO_SPACE_OID; + SMgrRelation smgr = smgropen(standby_read_rnode, InvalidBackendId); + bool hit = false; + + BlockNumber meta_block_num = data_block_number_to_meta_page_number(buf_tag.blockNum); + *buffer = ReadBuffer_common(smgr, 0, buf_tag.forkNum, meta_block_num, mode, strategy, &hit, NULL); + + if (*buffer == InvalidBuffer) { + return NULL; + } + + Page page = BufferGetPage(*buffer); + if (!is_block_info_page_valid((BlockInfoPageHeader*)page)) { + if (mode == RBM_NORMAL) { + ReleaseBuffer(*buffer); + return NULL; + } + } + + uint32 offset = block_info_meta_page_offset(buf_tag.blockNum); + BlockMetaInfo *block_info = ((BlockMetaInfo*)(page + offset)); + if (!is_block_meta_info_valid(block_info) && mode == RBM_NORMAL) { + ReleaseBuffer(*buffer); + + return NULL; + } + + return block_info; +} + +void init_block_info(BlockMetaInfo* block_info, XLogRecPtr max_lsn) +{ + errno_t ret = memset_s(block_info, BLOCK_INFO_SIZE, 0, BLOCK_INFO_SIZE); + securec_check(ret, "", ""); + block_info->timeline = t_thrd.shemem_ptr_cxt.ControlFile->timeline; + block_info->flags |= BLOCK_INFO_NODE_VALID_FLAG; + lsn_info_list_init(&block_info->lsn_info_list); + lsn_info_list_init(&block_info->base_page_info_list); + block_info->max_lsn = max_lsn; // just for update first base page info' lsn + block_info->min_lsn = max_lsn; +} + +void insert_lsn_to_block_info( + StandbyReadMetaInfo* meta_info, const BufferTag& buf_tag, const Page base_page, XLogRecPtr next_lsn) +{ + Buffer block_info_buf = InvalidBuffer; + BlockMetaInfo* block_info = get_block_meta_info_by_relfilenode(buf_tag, NULL, RBM_ZERO_ON_ERROR, &block_info_buf); + if (unlikely(block_info == NULL || block_info_buf == InvalidBuffer)) { + ereport(PANIC, (errmsg("insert lsn failed,block invalid %u/%u/%u %d %u", buf_tag.rnode.spcNode, + buf_tag.rnode.dbNode, buf_tag.rnode.relNode, buf_tag.forkNum, buf_tag.blockNum))); + } + LockBuffer(block_info_buf, BUFFER_LOCK_EXCLUSIVE); + Page page = BufferGetPage(block_info_buf); + XLogRecPtr current_page_lsn = PageGetLSN(base_page); + if (!is_block_meta_info_valid(block_info)) { + if (!is_block_info_page_valid((BlockInfoPageHeader*)page)) { + block_info_page_init(page); + } + + init_block_info(block_info, current_page_lsn); + } + + if (block_info->record_num == 0 || + (block_info->record_num % (uint32)g_instance.attr.attr_storage.base_page_saved_interval) == 0) { + insert_base_page_to_lsn_info(meta_info, &block_info->lsn_info_list, &block_info->base_page_info_list, buf_tag, + base_page, current_page_lsn, next_lsn); + } else { + insert_lsn_to_lsn_info(meta_info, &block_info->lsn_info_list, next_lsn); + } + + Assert(block_info->max_lsn <= next_lsn); + block_info->max_lsn = next_lsn; + + ++(block_info->record_num); + + standby_read_meta_page_set_lsn(page, next_lsn); + MarkBufferDirty(block_info_buf); + UnlockReleaseBuffer(block_info_buf); +} + +StandbyReadRecyleState recyle_block_info( + const BufferTag& buf_tag, LsnInfoPosition base_page_info_pos, XLogRecPtr next_base_page_lsn, XLogRecPtr recyle_lsn) +{ + Buffer buffer = InvalidBuffer; + BlockMetaInfo* block_meta_info = get_block_meta_info_by_relfilenode(buf_tag, NULL, RBM_NORMAL, &buffer); + if ((block_meta_info == NULL) || (buffer == InvalidBuffer)) { + // no block info, should not at this branch + ereport(WARNING, (errmsg("block meta is invalid %u/%u/%u %d %u", buf_tag.rnode.spcNode, buf_tag.rnode.dbNode, + buf_tag.rnode.relNode, buf_tag.forkNum, buf_tag.blockNum))); + return STANDBY_READ_RECLYE_ALL; + } + StandbyReadRecyleState stat = STANDBY_READ_RECLYE_NONE; + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + Assert(((block_meta_info->flags & BLOCK_INFO_NODE_VALID_FLAG) == BLOCK_INFO_NODE_VALID_FLAG)); + if (XLByteLT(block_meta_info->max_lsn, recyle_lsn)) { + block_meta_info->flags &= ~BLOCK_INFO_NODE_VALID_FLAG; + stat = STANDBY_READ_RECLYE_ALL; + MarkBufferDirty(buffer); + } else if (XLogRecPtrIsValid(next_base_page_lsn)) { + LsnInfoPosition min_page_info_pos = LSN_INFO_LIST_HEAD; + XLogRecPtr min_lsn = InvalidXLogRecPtr; + recycle_one_lsn_info_list(buf_tag, base_page_info_pos, recyle_lsn, &min_page_info_pos, &min_lsn); + + Assert(INFO_POSITION_IS_VALID(min_page_info_pos)); + if (block_meta_info->base_page_info_list.next != min_page_info_pos) { + block_meta_info->min_lsn = min_lsn; + block_meta_info->lsn_info_list.next = min_page_info_pos; + block_meta_info->base_page_info_list.next = min_page_info_pos; + stat = STANDBY_READ_RECLYE_UPDATE; + MarkBufferDirty(buffer); + } + } + UnlockReleaseBuffer(buffer); + return stat; +} + +static void reset_tmp_lsn_info_array(StandbyReadLsnInfoArray* lsn_info) +{ + lsn_info->lsn_num = 0; + lsn_info->base_page_lsn = InvalidXLogRecPtr; + if (lsn_info->lsn_array == NULL) { + uint32 max_save_nums = (uint32)g_instance.attr.attr_storage.base_page_saved_interval; + lsn_info->lsn_array = (XLogRecPtr*)MemoryContextAlloc( + THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), sizeof(XLogRecPtr) * max_save_nums); + } +} + +bool get_page_lsn_info(const BufferTag& buf_tag, BufferAccessStrategy strategy, XLogRecPtr read_lsn, + StandbyReadLsnInfoArray* lsn_info) +{ + Buffer buf; + BlockMetaInfo* block_meta_info = get_block_meta_info_by_relfilenode(buf_tag, strategy, RBM_NORMAL, &buf); + if (block_meta_info == NULL) { + return false; + } + + LockBuffer(buf, BUFFER_LOCK_SHARE); + + if (XLByteLT(read_lsn, block_meta_info->min_lsn)) { + UnlockReleaseBuffer(buf); + ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), + (errmsg("block old version can not found %u/%u/%u %d %u read lsn %lu, min lsn %lu", + buf_tag.rnode.spcNode, buf_tag.rnode.dbNode, buf_tag.rnode.relNode, buf_tag.forkNum, + buf_tag.blockNum, read_lsn, block_meta_info->min_lsn)))); + return false; + } + + Assert(block_meta_info->base_page_info_list.prev != LSN_INFO_LIST_HEAD); + reset_tmp_lsn_info_array(lsn_info); + get_lsn_info_for_read(buf_tag, block_meta_info->base_page_info_list.prev, lsn_info, read_lsn); + UnlockReleaseBuffer(buf); + return true; +} + +/* + * recycle one block info file + * rnode: database oid. + */ +void remove_one_block_info_file(const RelFileNode rnode) +{ + DropRelFileNodeShareBuffers(rnode, MAIN_FORKNUM, 0); + DropRelFileNodeShareBuffers(rnode, FSM_FORKNUM, 0); + DropRelFileNodeShareBuffers(rnode, VISIBILITYMAP_FORKNUM, 0); + + SMgrRelation srel = smgropen(rnode, InvalidBackendId); + smgrdounlink(srel, true); + smgrclose(srel); +} +/* + * recycle all relation files when drop db occurs. + * db_id: database oid. + */ +void remove_block_meta_info_files_of_db(Oid db_oid, Oid rel_oid) +{ + char pathbuf[EXRTO_FILE_PATH_LEN]; + char **filenames; + char **filename; + struct stat statbuf; + /* get block info file directory */ + char exrto_block_info_dir[EXRTO_FILE_PATH_LEN] = {0}; + int rc = snprintf_s(exrto_block_info_dir, EXRTO_FILE_PATH_LEN, EXRTO_FILE_PATH_LEN - 1, "%s/%s", EXRTO_FILE_DIR, + EXRTO_FILE_SUB_DIR[BLOCK_INFO_META]); + securec_check_ss(rc, "", ""); + /* get all files' name from block meta file directory */ + filenames = pgfnames(exrto_block_info_dir); + if (filenames == NULL) { + return; + } + char target_prefix[EXRTO_FILE_PATH_LEN] = {0}; + if (rel_oid != InvalidOid) { + rc = sprintf_s(target_prefix, EXRTO_FILE_PATH_LEN, "%u_%u_", db_oid, rel_oid); + } else { + rc = sprintf_s(target_prefix, EXRTO_FILE_PATH_LEN, "%u_", db_oid); + } + securec_check_ss(rc, "", ""); + /* use the prefix name to match up files we want to delete */ + size_t prefix_len = strlen(target_prefix); + for (filename = filenames; *filename != NULL; filename++) { + char *fname = *filename; + size_t fname_len = strlen(fname); + /* + * the length of prefix is less than the length of file name and must be the same under the same prefix_len + */ + if (prefix_len >= fname_len || strncmp(target_prefix, fname, prefix_len) != 0) { + continue; + } + rc = + snprintf_s(pathbuf, EXRTO_FILE_PATH_LEN, EXRTO_FILE_PATH_LEN - 1, "%s/%s", exrto_block_info_dir, *filename); + securec_check_ss(rc, "", ""); + /* may be can be some error */ + if (lstat(pathbuf, &statbuf) != 0) { + if (errno != ENOENT) { +#ifndef FRONTEND + ereport(WARNING, (errmsg("could not stat file or directory \"%s\" \n", pathbuf))); +#else + fprintf(stderr, _("could not stat file or directory \"%s\": %s\n"), pathbuf, gs_strerror(errno)); +#endif + } + continue; + } + /* if the file is a directory, don't touch it */ + if (S_ISDIR(statbuf.st_mode)) { + /* skip dir */ + continue; + } + /* delete this file we found */ + if (unlink(pathbuf) != 0) { + if (errno != ENOENT) { +#ifndef FRONTEND + ereport(WARNING, (errmsg("could not remove file or directory \"%s\" ", pathbuf))); +#else + fprintf(stderr, _("could not remove file or directory \"%s\": %s\n"), pathbuf, gs_strerror(errno)); +#endif + } + } + } + pgfnames_cleanup(filenames); + return; +} + +} // namespace extreme_rto_standby_read + diff --git a/src/gausskernel/storage/access/redo/standby_read/lsn_info_double_list.cpp b/src/gausskernel/storage/access/redo/standby_read/lsn_info_double_list.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bffb2be17311df7be7f1eae26be89d94e0f97b4e --- /dev/null +++ b/src/gausskernel/storage/access/redo/standby_read/lsn_info_double_list.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * lsn_info_double_list.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/access/redo/standby_read/lsn_info_double_list.cpp + * + * ------------------------------------------------------------------------- + */ + +#include "access/extreme_rto/standby_read/lsn_info_double_list.h" +#include "access/extreme_rto/standby_read/lsn_info_meta.h" + +namespace extreme_rto_standby_read { + +void lsn_info_list_init(LsnInfoDoubleList* node) +{ + node->next = LSN_INFO_LIST_HEAD; + node->prev = LSN_INFO_LIST_HEAD; +} + +/* + * modify the tail of list to link new node (block meta table's page lock is held) + */ +void info_list_modify_old_tail(StandbyReadMetaInfo *meta_info, LsnInfoPosition old_tail_pos, + LsnInfoPosition insert_pos, XLogRecPtr current_page_lsn, XLogRecPtr next_lsn, bool is_lsn_info) +{ + Page page = NULL; + LsnInfo lsn_info = NULL; + BasePageInfo base_page_info = NULL; + uint32 batch_id = meta_info->batch_id; + uint32 worker_id = meta_info->redo_id; + Buffer buffer = InvalidBuffer; + uint32 offset; + + page = get_lsn_info_page(batch_id, worker_id, old_tail_pos, RBM_ZERO_ON_ERROR, &buffer); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + + offset = lsn_info_postion_to_offset(old_tail_pos); + Assert(offset >= LSN_INFO_HEAD_SIZE); + Assert(offset % LSN_INFO_NODE_SIZE == 0); + if (is_lsn_info) { + lsn_info = (LsnInfo)(page + offset); + Assert(lsn_info->lsn_list.next == LSN_INFO_LIST_HEAD); + lsn_info->lsn_list.next = insert_pos; + Assert(is_lsn_info_node_valid(lsn_info->flags)); + } else { + base_page_info = (BasePageInfo)(page + offset); + Assert(base_page_info->base_page_list.next == LSN_INFO_LIST_HEAD); + base_page_info->base_page_list.next = insert_pos; + base_page_info->next_base_page_lsn = current_page_lsn; + Assert(is_lsn_info_node_valid(base_page_info->lsn_info_node.flags)); + Assert(XLByteLT(base_page_info->cur_page_lsn, current_page_lsn)); + } + + standby_read_meta_page_set_lsn(page, next_lsn); + MarkBufferDirty(buffer); + UnlockReleaseBuffer(buffer); +} + +} // namespace extreme_rto_standby_read diff --git a/src/gausskernel/storage/access/redo/standby_read/lsn_info_proc.cpp b/src/gausskernel/storage/access/redo/standby_read/lsn_info_proc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..765538955435e903806d64fdd4053081419442ed --- /dev/null +++ b/src/gausskernel/storage/access/redo/standby_read/lsn_info_proc.cpp @@ -0,0 +1,650 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * lsn_info_proc.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/recovery/parallel/blocklevel/standby_read/lsn_info_proc.cpp + * + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "access/extreme_rto/batch_redo.h" +#include "access/extreme_rto/dispatcher.h" +#include "access/extreme_rto/page_redo.h" +#include "access/extreme_rto/standby_read/block_info_meta.h" +#include "access/extreme_rto/standby_read/lsn_info_double_list.h" +#include "access/extreme_rto/standby_read/lsn_info_meta.h" +#include "access/extreme_rto/standby_read/standby_read_base.h" + +namespace extreme_rto_standby_read { + +void lsn_info_page_init(Page page) +{ + static_assert(sizeof(LsnInfoPageHeader) == LSN_INFO_HEAD_SIZE, "LsnInfoPageHeader size is not 64 bytes"); + static_assert(sizeof(LsnInfoNode) == LSN_INFO_NODE_SIZE, "LsnInfoNode size is not 64 bytes"); + static_assert(sizeof(BasePageInfoNode) == BASE_PAGE_INFO_NODE_SIZE, "BasePageInfoNode size is not 128 bytes"); + + LsnInfoPageHeader *page_header = (LsnInfoPageHeader *)page; + errno_t ret = memset_s(page_header, BLCKSZ, 0, BLCKSZ); + securec_check(ret, "", ""); + page_header->flags |= LSN_INFO_PAGE_VALID_FLAG; + page_header->version = LSN_INFO_PAGE_VERSION; +} + +void lsn_info_init(LsnInfo lsn_info) +{ + errno_t ret = memset_s(lsn_info, LSN_INFO_NODE_SIZE, 0, LSN_INFO_NODE_SIZE); + securec_check(ret, "", ""); + + lsn_info->flags |= LSN_INFO_NODE_VALID_FLAG; + lsn_info->type = LSN_INFO_TYPE_LSNS; + lsn_info_list_init(&lsn_info->lsn_list); +} +void base_page_info_init(BasePageInfo base_page_info) +{ + errno_t ret = memset_s(base_page_info, BASE_PAGE_INFO_NODE_SIZE, 0, BASE_PAGE_INFO_NODE_SIZE); + securec_check(ret, "", ""); + + base_page_info->lsn_info_node.flags |= LSN_INFO_NODE_VALID_FLAG; + base_page_info->lsn_info_node.type = LSN_INFO_TYPE_BASE_PAGE; + lsn_info_list_init(&base_page_info->lsn_info_node.lsn_list); + lsn_info_list_init(&base_page_info->base_page_list); +} + +RelFileNode make_lsn_info_relfilenode(uint32 batch_id, uint32 worker_id, LsnInfoPosition position) +{ + RelFileNode rnode = {0}; + rnode.spcNode = EXRTO_LSN_INFO_SPACE_OID; + rnode.dbNode = (batch_id << LOW_WORKERID_BITS) | worker_id; + rnode.relNode = (uint32)((position / BLCKSZ) >> UINT64_HALF); + rnode.bucketNode = InvalidBktId; + rnode.opt = DefaultFileNodeOpt; + + return rnode; +} + +Page get_lsn_info_page(uint32 batch_id, uint32 worker_id, LsnInfoPosition position, ReadBufferMode mode, + Buffer* buffer) +{ + RelFileNode rnode; + BlockNumber block_num; + bool hit = false; + Page page = NULL; + + rnode = make_lsn_info_relfilenode(batch_id, worker_id, position); + block_num = (uint32)(position / BLCKSZ); /* high 32 bits are stored in the relNode. */ + + SMgrRelation smgr = smgropen(rnode, InvalidBackendId); + *buffer = ReadBuffer_common(smgr, RELPERSISTENCE_PERMANENT, MAIN_FORKNUM, block_num, mode, NULL, &hit, NULL); + + if (*buffer == InvalidBuffer) { + ereport(WARNING, (errcode_for_file_access(), + errmsg("block is invalid %u/%u/%u %d %u, batch_id: %u, redo_worker_id: %u", + rnode.spcNode, rnode.dbNode, rnode.relNode, MAIN_FORKNUM, block_num, + batch_id, worker_id))); + return NULL; + } + + page = BufferGetPage(*buffer); + if (!is_lsn_info_page_valid((LsnInfoPageHeader*)page)) { + if (mode == RBM_NORMAL) { + ReleaseBuffer(*buffer); + *buffer = InvalidBuffer; + return NULL; + } + /* make sure to make buffer dirty outside */ + lsn_info_page_init(page); + } + + return page; +} + +LsnInfoPosition create_lsn_info_node(StandbyReadMetaInfo *meta_info, LsnInfoPosition old_tail_pos, + XLogRecPtr next_lsn, bool create_in_old_page, Page old_page) +{ + Page page = NULL; + LsnInfo lsn_info = NULL; + uint32 batch_id = meta_info->batch_id; + uint32 worker_id = meta_info->redo_id; + LsnInfoPosition insert_pos = meta_info->lsn_table_next_position; + Buffer buffer = InvalidBuffer; + uint32 offset; + + offset = lsn_info_postion_to_offset(insert_pos); + if (offset == 0) { + insert_pos += LSN_INFO_HEAD_SIZE; /* actual insert position */ + offset += LSN_INFO_HEAD_SIZE; + } + Assert(offset % LSN_INFO_NODE_SIZE == 0); + + if (create_in_old_page) { + /* in old page, buffer is already locked */ + lsn_info = (LsnInfo)(old_page + offset); + } else { + page = get_lsn_info_page(batch_id, worker_id, insert_pos, RBM_ZERO_ON_ERROR, &buffer); + + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + lsn_info = (LsnInfo)(page + offset); + } + + lsn_info_init(lsn_info); + lsn_info->lsn[lsn_info->used] = next_lsn; + lsn_info->used++; + lsn_info->lsn_list.prev = old_tail_pos; + + if (!create_in_old_page) { + standby_read_meta_page_set_lsn(page, next_lsn); + MarkBufferDirty(buffer); + UnlockReleaseBuffer(buffer); + } + /* update meta info */ + meta_info->lsn_table_next_position = insert_pos + LSN_INFO_NODE_SIZE; + + return insert_pos; +} + +void insert_lsn_to_lsn_info(StandbyReadMetaInfo *meta_info, LsnInfoDoubleList *lsn_head, XLogRecPtr next_lsn) +{ + Page page = NULL; + LsnInfo lsn_info = NULL; + uint32 batch_id = meta_info->batch_id; + uint32 worker_id = meta_info->redo_id; + LsnInfoPosition tail_pos = lsn_head->prev; /* lsn info node tail */ + LsnInfoPosition insert_pos = meta_info->lsn_table_next_position; + Buffer buffer = InvalidBuffer; + uint32 offset; + + Assert(!INFO_POSITION_IS_INVALID(tail_pos)); + page = get_lsn_info_page(batch_id, worker_id, tail_pos, RBM_ZERO_ON_ERROR, &buffer); + + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + offset = lsn_info_postion_to_offset(tail_pos); + lsn_info = (LsnInfo)(page + offset); + Assert(offset >= LSN_INFO_HEAD_SIZE); + Assert(offset % LSN_INFO_NODE_SIZE == 0); + Assert(is_lsn_info_node_valid(lsn_info->flags)); + Assert(lsn_info->lsn_list.next == LSN_INFO_LIST_HEAD); + if (lsn_info->used < LSN_NUM_PER_NODE) { + lsn_info->lsn[lsn_info->used] = next_lsn; + lsn_info->used++; + + standby_read_meta_page_set_lsn(page, next_lsn); + MarkBufferDirty(buffer); + UnlockReleaseBuffer(buffer); + } else { + /* + * There is no free space in the old lsn info node, create a new one. + */ + bool create_in_old_page = (insert_pos / BLCKSZ) == (tail_pos / BLCKSZ); + /* insert position maybe changed */ + insert_pos = create_lsn_info_node(meta_info, tail_pos, next_lsn, create_in_old_page, page); + + /* modify lsn info list */ + lsn_info->lsn_list.next = insert_pos; + standby_read_meta_page_set_lsn(page, next_lsn); + MarkBufferDirty(buffer); + UnlockReleaseBuffer(buffer); + /* update lsn info tail in block info meta */ + lsn_head->prev = insert_pos; + } +} + +LsnInfoPosition create_base_page_info_node(StandbyReadMetaInfo *meta_info, + LsnInfoPosition old_lsn_tail_pos, LsnInfoPosition old_base_page_tail_pos, const BufferTag* buf_tag, + XLogRecPtr current_page_lsn, XLogRecPtr next_lsn) +{ + Page page = NULL; + BasePageInfo base_page_info = NULL; + uint32 batch_id = meta_info->batch_id; + uint32 worker_id = meta_info->redo_id; + LsnInfoPosition insert_pos = meta_info->lsn_table_next_position; + BasePagePosition base_page_pos = meta_info->base_page_next_position; + Buffer buffer = InvalidBuffer; + uint32 offset; + uint32 remain_size; + + /* + * If there is not enough space in current page, we insert base page info node in next page. + */ + remain_size = BLCKSZ - insert_pos % BLCKSZ; + if (remain_size < BASE_PAGE_INFO_NODE_SIZE) { + Assert(remain_size == LSN_INFO_NODE_SIZE); + insert_pos += LSN_INFO_NODE_SIZE; /* switch to next page */ + } + + offset = lsn_info_postion_to_offset(insert_pos); + Assert(offset % LSN_INFO_NODE_SIZE == 0); + if (offset == 0) { + insert_pos += LSN_INFO_HEAD_SIZE; /* actual insert position */ + offset += LSN_INFO_HEAD_SIZE; + } + + page = get_lsn_info_page(batch_id, worker_id, insert_pos, RBM_ZERO_ON_ERROR, &buffer); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + + base_page_info = (BasePageInfo)(page + offset); + + base_page_info_init(base_page_info); + base_page_info->lsn_info_node.lsn_list.prev = old_lsn_tail_pos; + base_page_info->lsn_info_node.lsn[0] = next_lsn; + base_page_info->lsn_info_node.used++; + base_page_info->base_page_list.prev = old_base_page_tail_pos; + base_page_info->cur_page_lsn = current_page_lsn; + base_page_info->relfilenode = buf_tag->rnode; + base_page_info->fork_num = buf_tag->forkNum; + base_page_info->block_num = buf_tag->blockNum; + base_page_info->next_base_page_lsn = InvalidXLogRecPtr; + base_page_info->base_page_position = base_page_pos; + + set_base_page_map_bit(page, offset); + + standby_read_meta_page_set_lsn(page, next_lsn); + MarkBufferDirty(buffer); + UnlockReleaseBuffer(buffer); + + /* update meta info */ + meta_info->lsn_table_next_position = insert_pos + BASE_PAGE_INFO_NODE_SIZE; + + return insert_pos; +} + +void insert_base_page_to_lsn_info(StandbyReadMetaInfo *meta_info, LsnInfoDoubleList *lsn_head, + LsnInfoDoubleList *base_page_head, const BufferTag& buf_tag, const Page base_page, XLogRecPtr current_page_lsn, + XLogRecPtr next_lsn) +{ + LsnInfoPosition old_lsn_tail_pos = lsn_head->prev; + LsnInfoPosition old_base_page_tail_pos = base_page_head->prev; + LsnInfoPosition insert_pos; + + /* possibly modified meta_info */ + insert_pos = create_base_page_info_node(meta_info, old_lsn_tail_pos, old_base_page_tail_pos, &buf_tag, + current_page_lsn, next_lsn); + + /* modify old tail information of lsn info node and base page info node */ + if (old_lsn_tail_pos != LSN_INFO_LIST_HEAD) { + info_list_modify_old_tail(meta_info, old_lsn_tail_pos, insert_pos, current_page_lsn, next_lsn, true); + } + if (old_base_page_tail_pos != LSN_INFO_LIST_HEAD) { + info_list_modify_old_tail(meta_info, old_base_page_tail_pos, insert_pos, current_page_lsn, next_lsn, false); + } + + /* modify block info meta */ + lsn_head->prev = insert_pos; + base_page_head->prev = insert_pos; + + if (INFO_POSITION_IS_INVALID(lsn_head->next)) { + lsn_head->next = insert_pos; + } + if (INFO_POSITION_IS_INVALID(base_page_head->next)) { + base_page_head->next = insert_pos; + } + + /* generate base page */ + generate_base_page(meta_info, base_page); +} + +void get_lsn_info_for_read(const BufferTag& buf_tag, LsnInfoPosition latest_lsn_base_page_pos, + StandbyReadLsnInfoArray* lsn_info_list, XLogRecPtr read_lsn) +{ + BasePageInfo base_page_info = NULL; + LsnInfoPosition next_lsn_info_pos; + Buffer buffer; + + XLogRecPtr page_lsn; + XLogRecPtr xlog_lsn; + uint32 batch_id; + uint32 worker_id; + XLogRecPtr *lsn_arry = lsn_info_list->lsn_array; + + /* get batch id and page redo worker id */ + extreme_rto::RedoItemTag redo_item_tag; + const uint32 worker_num_per_mng = extreme_rto::get_page_redo_worker_num_per_manager(); + INIT_REDO_ITEM_TAG(redo_item_tag, buf_tag.rnode, buf_tag.forkNum, buf_tag.blockNum); + /* batch id and worker id start from 1 when reading a page */ + batch_id = extreme_rto::GetSlotId(buf_tag.rnode, 0, 0, extreme_rto::get_batch_redo_num()) + 1; + worker_id = extreme_rto::GetWorkerId(&redo_item_tag, worker_num_per_mng) + 1; + + /* find fisrt base page whose lsn less than read lsn form tail to head */ + do { + /* reach the end of the list */ + if (INFO_POSITION_IS_INVALID(latest_lsn_base_page_pos)) { + ereport(ERROR, ( + errmsg("can not find base page, block is %u/%u/%u %d %u, batch_id: %u, redo_worker_id: %u", + buf_tag.rnode.spcNode, buf_tag.rnode.dbNode, buf_tag.rnode.relNode, buf_tag.forkNum, + buf_tag.blockNum, batch_id, worker_id))); + break; + } + buffer = InvalidBuffer; + Page page = get_lsn_info_page(batch_id, worker_id, latest_lsn_base_page_pos, RBM_NORMAL, &buffer); + if (unlikely(page == NULL || buffer == InvalidBuffer)) { + ereport(ERROR, + (errmsg(EXRTOFORMAT("get_lsn_info_page failed, batch_id: %u, redo_id: %u, pos: %lu"), batch_id, + worker_id, latest_lsn_base_page_pos))); + } + LockBuffer(buffer, BUFFER_LOCK_SHARE); + + uint32 offset = lsn_info_postion_to_offset(latest_lsn_base_page_pos); + base_page_info = (BasePageInfo)(page + offset); + + page_lsn = base_page_info->cur_page_lsn; + lsn_info_list->base_page_pos = base_page_info->base_page_position; + lsn_info_list->base_page_lsn = base_page_info->cur_page_lsn; + Assert(is_base_page_type(base_page_info->lsn_info_node.type)); + + /* If we find the desired page, keep it locked */ + if (XLByteLT(page_lsn, read_lsn)) { + break; + } + UnlockReleaseBuffer(buffer); + latest_lsn_base_page_pos = base_page_info->base_page_list.prev; + } while (true); + + LsnInfo lsn_info = &base_page_info->lsn_info_node; + bool find_end = false; + uint32 lsn_num = 0; + do { + for (uint16 i = 0; i < lsn_info->used; ++i) { + xlog_lsn = lsn_info->lsn[i]; + if (XLByteLE(read_lsn, xlog_lsn)) { + find_end = true; + break; + } + + lsn_arry[lsn_num++] = xlog_lsn; + } + next_lsn_info_pos = lsn_info->lsn_list.next; + UnlockReleaseBuffer(buffer); + /* reach the end of the list */ + if (find_end || next_lsn_info_pos == LSN_INFO_LIST_HEAD) { + break; + } + + Page page = get_lsn_info_page(batch_id, worker_id, next_lsn_info_pos, RBM_NORMAL, &buffer); + if (unlikely(page == NULL || buffer == InvalidBuffer)) { + ereport(ERROR, + (errmsg(EXRTOFORMAT("get_lsn_info_page failed, batch_id: %u, redo_id: %u, pos: %lu"), batch_id, + worker_id, next_lsn_info_pos))); + } + Assert(buffer != InvalidBuffer); + LockBuffer(buffer, BUFFER_LOCK_SHARE); + + uint32 offset = lsn_info_postion_to_offset(next_lsn_info_pos); + lsn_info = (LsnInfo)(page + offset); + } while (true); + + lsn_info_list->lsn_num = lsn_num; +} + +static bool check_base_page_loc_valid(uint32 base_page_loc) +{ + if (base_page_loc < LSN_INFO_HEAD_SIZE || base_page_loc > BLCKSZ - BASE_PAGE_INFO_NODE_SIZE || + base_page_loc % LSN_INFO_HEAD_SIZE != 0) { + ereport(ERROR, (errmsg("invalid BasePageInfo location:%u, page size:%d", base_page_loc, BLCKSZ))); + return false; + } + return true; +} + +/* + * set LsnInfoPageHeader::base_page_map specific bit from 0 to 1. + * the bit is correspond to some 64bytes range space in this page. + * params explanation. + * page: some page block in RAM(one block occupies 8192bytes in memory). + * base_page_loc: the offset of some BasePageInfoNode object from the beginning of this page. + * LsnInfoPageHeader::base_page_map has 128 bit which is mapped to 8192bytes page. + * every bit represent 64 bytes (64 = 8192/128). + * we can assume bit 0 map to [0, 64) of the page; + * bit 1 map to [64, 128) of the page; + * ...... + * bit 127 map to [8128, 8192) of the page; + * LsnInfoPageHeader is the page header which occupies 64bytes, so bit 0 is always 0. + * LSN_INFO_HEAD_SIZE,LSN_INFO_NODE_SIZE,BASE_PAGE_INFO_NODE_SIZE must be integer mutiple of 64, + * so we can use base_page_map to map page memory. + */ +void set_base_page_map_bit(Page page, uint32 base_page_loc) +{ + /* + * make sure base_page_loc is in specific range + * base_page_loc must be an integer multiple of LSN_INFO_HEAD_SIZE + */ + check_base_page_loc_valid(base_page_loc); + + LsnInfoPageHeader *page_header = (LsnInfoPageHeader *)page; + uint8 *base_page_map = page_header->base_page_map; + uint32 which_bit = base_page_loc / LSN_INFO_NODE_SIZE; + uint32 which_bytes = which_bit / BYTE_BITS; // uint8 has 8 bits or 8*sizeof(uint8) bits + uint32 bit_offset = which_bit % BYTE_BITS; + base_page_map[which_bytes] |= ((uint8)((uint8)1 << bit_offset)); +} + +static void check_base_page_map_bit_loc_valid(uint32 which_bit) +{ + if (which_bit >= BASE_PAGE_MAP_SIZE * BYTE_BITS) { + ereport(ERROR, (errmsg("invalid base_page_map bit location:%u, " + "the valid range is [%u, %u).", which_bit, 0U, BASE_PAGE_MAP_SIZE * BYTE_BITS))); + } +} + +/* + * check if LsnInfoPageHeader::base_page_map specific bit equal to 1. + * page: the page in which LsnInfoPageHeader object you want to check. + * which_bit: the bit you want to check. + * if the target bit is equal to 1, return true. + */ +bool is_base_page_map_bit_set(Page page, uint32 which_bit) +{ + check_base_page_map_bit_loc_valid(which_bit); + + LsnInfoPageHeader *page_header = (LsnInfoPageHeader *)page; + uint8 *base_page_map = page_header->base_page_map; + uint32 which_bytes = which_bit / BYTE_BITS; // uint8 has 8 bits or 8*sizeof(uint8) bits + uint32 bit_offset = which_bit % BYTE_BITS; + return (base_page_map[which_bytes] & (((uint8)1) << bit_offset)) != 0; +} + +void recycle_lsn_info_file(uint32 batch_id, uint32 redo_id, BasePagePosition recycle_pos) +{ + RelFileNode rnode = make_lsn_info_relfilenode(batch_id, redo_id, recycle_pos); + SMgrRelation smgr = smgropen(rnode, InvalidBackendId); + + smgrdounlink(smgr, true, (BlockNumber)(recycle_pos / BLCKSZ)); +} + +void recycle_one_lsn_info_list(const BufferTag& buf_tag, LsnInfoPosition page_info_pos, + XLogRecPtr recycle_lsn, LsnInfoPosition *min_page_info_pos, XLogRecPtr *min_lsn) +{ + /* get batch id and page redo worker id */ + extreme_rto::RedoItemTag redo_item_tag; + const uint32 worker_num_per_mng = extreme_rto::get_page_redo_worker_num_per_manager(); + INIT_REDO_ITEM_TAG(redo_item_tag, buf_tag.rnode, buf_tag.forkNum, buf_tag.blockNum); + /* batch id and worker id start from 1 when reading a page */ + uint32 batch_id = extreme_rto::GetSlotId(buf_tag.rnode, 0, 0, extreme_rto::get_batch_redo_num()) + 1; + uint32 worker_id = extreme_rto::GetWorkerId(&redo_item_tag, worker_num_per_mng) + 1; + + while (INFO_POSITION_IS_VALID(page_info_pos)) { + Buffer buffer = InvalidBuffer; + Page page = get_lsn_info_page(batch_id, worker_id, page_info_pos, RBM_NORMAL, &buffer); + if (unlikely(page == NULL || buffer == InvalidBuffer)) { + ereport(PANIC, (errmsg(EXRTOFORMAT("get_lsn_info_page failed, batch_id: %u, redo_id: %u, pos: %lu"), + batch_id, worker_id, page_info_pos))); + } + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + + uint32 offset = lsn_info_postion_to_offset(page_info_pos); + BasePageInfo base_page_info = (BasePageInfo)(page + offset); + Assert(is_base_page_type(base_page_info->lsn_info_node.type)); + + *min_page_info_pos = page_info_pos; + *min_lsn = base_page_info->cur_page_lsn; + + /* retain a page version with page lsn less than recycle lsn */ + XLogRecPtr next_base_page_lsn = base_page_info->next_base_page_lsn; + if (XLogRecPtrIsInvalid(next_base_page_lsn) || XLByteLT(recycle_lsn, next_base_page_lsn)) { + UnlockReleaseBuffer(buffer); + break; + } + + base_page_info->lsn_info_node.flags &= ~LSN_INFO_NODE_VALID_FLAG; + page_info_pos = base_page_info->base_page_list.next; + MarkBufferDirty(buffer); + UnlockReleaseBuffer(buffer); + } +} + +void invalid_base_page_list(StandbyReadMetaInfo *meta_info, Buffer buffer, uint32 offset) +{ + LsnInfoPosition page_info_pos; + Page page = BufferGetPage(buffer); + BasePageInfo base_page_info = (BasePageInfo)(page + offset); + /* set invalid flags */ + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + base_page_info->lsn_info_node.flags &= ~LSN_INFO_NODE_VALID_FLAG; + page_info_pos = base_page_info->base_page_list.next; + MarkBufferDirty(buffer); + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); /* keep buffer pinned */ + + uint32 batch_id = meta_info->batch_id; + uint32 worker_id = meta_info->redo_id; + while (INFO_POSITION_IS_VALID(page_info_pos)) { + page = get_lsn_info_page(batch_id, worker_id, page_info_pos, RBM_NORMAL, &buffer); + if (unlikely(page == NULL || buffer == InvalidBuffer)) { + ereport(PANIC, (errmsg(EXRTOFORMAT("get_lsn_info_page failed, batch_id: %u, redo_id: %u, pos: %lu"), + batch_id, worker_id, page_info_pos))); + } + offset = lsn_info_postion_to_offset(page_info_pos); + base_page_info = (BasePageInfo)(page + offset); + + /* unset valid flags */ + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + base_page_info->lsn_info_node.flags &= ~LSN_INFO_NODE_VALID_FLAG; + page_info_pos = base_page_info->base_page_list.next; + MarkBufferDirty(buffer); + UnlockReleaseBuffer(buffer); + } +} + +inline void update_recycle_lsn_per_worker(StandbyReadMetaInfo *meta_info, XLogRecPtr lsn) +{ + Assert(XLogRecPtrIsValid(lsn)); + if (XLogRecPtrIsInvalid(meta_info->recycle_lsn_per_worker) || + XLByteLT(meta_info->recycle_lsn_per_worker, lsn)) { + meta_info->recycle_lsn_per_worker = lsn; + } + ereport(LOG, (errmsg(EXRTOFORMAT( + "[exrto_recycle] update recycle lsn per worker , batch_id: %u, redo_id: %u, recycle lsn: %08X/%08X"), + meta_info->batch_id, meta_info->redo_id, (uint32)(lsn >> UINT64_HALF), (uint32)lsn))); +} + +bool recycle_one_lsn_info_page(StandbyReadMetaInfo *meta_info, XLogRecPtr recycle_lsn, + BasePagePosition *base_page_position) +{ + uint32 batch_id = meta_info->batch_id; + uint32 worker_id = meta_info->redo_id; + Buffer buffer = InvalidBuffer; + LsnInfoPosition recycle_pos = meta_info->lsn_table_recyle_position; + Page page = get_lsn_info_page(batch_id, worker_id, recycle_pos, RBM_NORMAL, &buffer); + if (unlikely(page == NULL || buffer == InvalidBuffer)) { + ereport(PANIC, (errmsg(EXRTOFORMAT("get_lsn_info_page failed, batch_id: %u, redo_id: %u, pos: %lu"), + batch_id, worker_id, recycle_pos))); + } + + bool buffer_is_locked = false; + /* skip page header */ + for (uint32 bit = 1; bit < BASE_PAGE_MAP_SIZE * BYTE_BITS; bit++) { + if (!buffer_is_locked) { + LockBuffer(buffer, BUFFER_LOCK_SHARE); + buffer_is_locked = true; + } + + if (!is_base_page_map_bit_set(page, bit)) { + continue; + } + uint32 offset = bit_to_offset(bit); + BasePageInfo base_page_info = (BasePageInfo)(page + offset); + LsnInfoPosition cur_base_page_info_pos = recycle_pos + offset; + Assert(is_base_page_type(base_page_info->lsn_info_node.type)); + + /* block meta file may be dropped */ + if (!is_lsn_info_node_valid(base_page_info->lsn_info_node.flags)) { + continue; + } + + /* retain a page version with page lsn less than recycle lsn */ + XLogRecPtr base_page_lsn = base_page_info->cur_page_lsn; + if (XLogRecPtrIsInvalid(base_page_lsn)) { + base_page_lsn = base_page_info->lsn_info_node.lsn[0]; + } + XLogRecPtr next_base_page_lsn = base_page_info->next_base_page_lsn; + *base_page_position = base_page_info->base_page_position; + if (XLogRecPtrIsValid(next_base_page_lsn) && XLByteLT(recycle_lsn, next_base_page_lsn)) { + update_recycle_lsn_per_worker(meta_info, base_page_lsn); + UnlockReleaseBuffer(buffer); + return false; + } + + BufferTag buf_tag; + INIT_BUFFERTAG(buf_tag, base_page_info->relfilenode, base_page_info->fork_num, base_page_info->block_num); + + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + buffer_is_locked = false; + + StandbyReadRecyleState stat = + recyle_block_info(buf_tag, cur_base_page_info_pos, next_base_page_lsn, recycle_lsn); + if (stat == STANDBY_READ_RECLYE_ALL) { + invalid_base_page_list(meta_info, buffer, offset); + } else if (stat == STANDBY_READ_RECLYE_NONE) { + Assert(XLogRecPtrIsInvalid(next_base_page_lsn)); + update_recycle_lsn_per_worker(meta_info, base_page_lsn); + ReleaseBuffer(buffer); + return false; + } + } + + if (buffer_is_locked) { + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + } + ReleaseBuffer(buffer); + return true; +} + +void standby_read_recyle_per_workers(StandbyReadMetaInfo *meta_info, XLogRecPtr recycle_lsn) +{ + Assert(meta_info->batch_id > 0); + Assert(meta_info->redo_id > 0); + bool recycle_next_page = true; + BasePagePosition base_page_position = meta_info->base_page_recyle_position; + + while (meta_info->lsn_table_recyle_position + BLCKSZ < meta_info->lsn_table_next_position) { + recycle_next_page = recycle_one_lsn_info_page(meta_info, recycle_lsn, &base_page_position); + if (!recycle_next_page) { + break; + } + /* update recycle position */ + meta_info->lsn_table_recyle_position += BLCKSZ; + Assert(meta_info->lsn_table_recyle_position % BLCKSZ == 0); + RedoInterruptCallBack(); + } + + meta_info->base_page_recyle_position = base_page_position; + Assert(meta_info->base_page_recyle_position % BLCKSZ == 0); + Assert(meta_info->base_page_recyle_position <= meta_info->base_page_next_position); + + recycle_lsn_info_file(meta_info->batch_id, meta_info->redo_id, meta_info->lsn_table_recyle_position); + recycle_base_page_file(meta_info->batch_id, meta_info->redo_id, meta_info->base_page_recyle_position); +} + +} // namespace extreme_rto_standby_read diff --git a/src/gausskernel/storage/access/redo/standby_read/standby_read_interface.cpp b/src/gausskernel/storage/access/redo/standby_read/standby_read_interface.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f64492a46b77fb5884a88f7fb85eb935f4b89b2a --- /dev/null +++ b/src/gausskernel/storage/access/redo/standby_read/standby_read_interface.cpp @@ -0,0 +1,351 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * standby_read_interface.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/access/redo/standby_read/standby_read_interface.cpp + * + * ------------------------------------------------------------------------- + */ + +#include +#include "access/extreme_rto/page_redo.h" +#include "access/extreme_rto/standby_read/block_info_meta.h" +#include "access/extreme_rto/standby_read/lsn_info_meta.h" +#include "access/multi_redo_api.h" +#include "pgstat.h" +#include "storage/smgr/relfilenode.h" +#include "storage/buf/buf_internals.h" +#include "storage/buf/bufmgr.h" +#include "storage/smgr/segment.h" +#include "utils/rel.h" +#include "utils/palloc.h" +#include "access/extreme_rto/dispatcher.h" +#include "funcapi.h" + +const char* EXRTO_BASE_PAGE_SUB_DIR = "base_page"; +const char* EXRTO_LSN_INFO_SUB_DIR = "lsn_info_meta"; +const char* EXRTO_BLOCK_INFO_SUB_DIR = "block_info_meta"; +const char* EXRTO_FILE_SUB_DIR[] = { + EXRTO_BASE_PAGE_SUB_DIR, EXRTO_LSN_INFO_SUB_DIR, EXRTO_BLOCK_INFO_SUB_DIR}; +const uint32 EXRTO_FILE_PATH_LEN = 1024; + +void make_standby_read_node(XLogRecPtr read_lsn, RelFileNode& read_node) +{ + read_node.spcNode = (Oid)(read_lsn >> 32); + read_node.dbNode = (Oid)(read_lsn); + read_node.relNode = InvalidOid; // make sure it can be InvalidOid or not + read_node.opt = 0; + read_node.bucketNode = InvalidBktId; +} + +BufferDesc* alloc_standby_read_buf( + const BufferTag& buf_tag, BufferAccessStrategy strategy, bool& found, XLogRecPtr read_lsn) +{ + RelFileNode read_node; + make_standby_read_node(read_lsn, read_node); + BufferDesc* buf_desc = BufferAlloc(read_node, 0, buf_tag.forkNum, buf_tag.blockNum, strategy, &found, NULL); + + return buf_desc; +} + +Buffer get_newest_page_for_read(Relation reln, ForkNumber fork_num, BlockNumber block_num, ReadBufferMode mode, + BufferAccessStrategy strategy, XLogRecPtr read_lsn) +{ + bool hit = false; + + Buffer newest_buf = ReadBuffer_common( + reln->rd_smgr, reln->rd_rel->relpersistence, fork_num, block_num, mode, strategy, &hit, NULL); + if (BufferIsInvalid(newest_buf)) { + return InvalidBuffer; + } + + LockBuffer(newest_buf, BUFFER_LOCK_SHARE); + Page newest_page = BufferGetPage(newest_buf); + XLogRecPtr page_lsn = PageGetLSN(newest_page); + if (XLByteLT(read_lsn, page_lsn)) { + UnlockReleaseBuffer(newest_buf); + return InvalidBuffer; + } + + BufferTag buf_tag = { + .rnode = reln->rd_smgr->smgr_rnode.node, + .forkNum = fork_num, + .blockNum = block_num, + }; + ResourceOwnerEnlargeBuffers(t_thrd.utils_cxt.CurrentResourceOwner); + BufferDesc* buf_desc = alloc_standby_read_buf(buf_tag, strategy, hit, page_lsn); + + if (hit) { + UnlockReleaseBuffer(newest_buf); + return BufferDescriptorGetBuffer(buf_desc); + } + Page read_page = (Page)BufHdrGetBlock(buf_desc); + + errno_t rc = memcpy_s(read_page, BLCKSZ, newest_page, BLCKSZ); + securec_check(rc, "\0", "\0"); + UnlockReleaseBuffer(newest_buf); + buf_desc->extra->lsn_on_disk = PageGetLSN(read_page); +#ifdef USE_ASSERT_CHECKING + buf_desc->lsn_dirty = InvalidXLogRecPtr; +#endif + + TerminateBufferIO(buf_desc, false, (BM_VALID | BM_IS_TMP_BUF)); + return BufferDescriptorGetBuffer(buf_desc); +} + +Buffer standby_read_buf( + Relation reln, ForkNumber fork_num, BlockNumber block_num, ReadBufferMode mode, BufferAccessStrategy strategy) +{ + /* Open it at the smgr level */ + RelationOpenSmgr(reln); // need or not ????? + pgstat_count_buffer_read(reln); + pgstatCountBlocksFetched4SessionLevel(); + + if (RelationisEncryptEnable(reln)) { + reln->rd_smgr->encrypt = true; + } + + bool hit = false; + BufferTag buf_tag = { + .rnode = reln->rd_smgr->smgr_rnode.node, + .forkNum = fork_num, + .blockNum = block_num, + }; + XLogRecPtr read_lsn = t_thrd.proc->exrto_read_lsn; + if (read_lsn == InvalidXLogRecPtr) { + Assert(IsSystemRelation(reln)); + read_lsn = MAX_XLOG_REC_PTR; + } + + Buffer read_buf = get_newest_page_for_read(reln, fork_num, block_num, mode, strategy, read_lsn); + + if (read_buf != InvalidBuffer) { + // newest page's lsn smaller than read lsn + return read_buf; + } + ResourceOwnerEnlargeBuffers(t_thrd.utils_cxt.CurrentResourceOwner); + // read lsn info + StandbyReadLsnInfoArray *lsn_info = &t_thrd.exrto_recycle_cxt.lsn_info; + bool result = extreme_rto_standby_read::get_page_lsn_info(buf_tag, strategy, read_lsn, lsn_info); + if (!result) { + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + (errmsg("standby_read_buf couldnot found buf %u/%u/%u %d %u read lsn %lu", buf_tag.rnode.spcNode, + buf_tag.rnode.dbNode, buf_tag.rnode.relNode, buf_tag.forkNum, buf_tag.blockNum, read_lsn)))); + return InvalidBuffer; + } + + // read lsn info + XLogRecPtr expected_lsn = InvalidXLogRecPtr; + if (lsn_info->lsn_num == 0) { + expected_lsn = lsn_info->base_page_lsn; + } else { + Assert(lsn_info->lsn_array[lsn_info->lsn_num - 1] > 0); + Assert(lsn_info->lsn_array[lsn_info->lsn_num - 1] < read_lsn); + Assert(lsn_info->lsn_array[lsn_info->lsn_num - 1] >= lsn_info->base_page_lsn); + expected_lsn = lsn_info->lsn_array[lsn_info->lsn_num - 1]; + } + + BufferDesc* buf_desc = alloc_standby_read_buf(buf_tag, strategy, hit, expected_lsn); + + if (hit) { + return BufferDescriptorGetBuffer(buf_desc); + } + buffer_in_progress_pop(); + // read_base_page + extreme_rto_standby_read::read_base_page(buf_tag, lsn_info->base_page_pos, buf_desc); + if (lsn_info->lsn_num > 0) { + redo_target_page(buf_tag, lsn_info, BufferDescriptorGetBuffer(buf_desc)); + } + Page page = BufferGetPage(BufferDescriptorGetBuffer(buf_desc)); + buf_desc->extra->lsn_on_disk = PageGetLSN(page); +#ifdef USE_ASSERT_CHECKING + buf_desc->lsn_dirty = InvalidXLogRecPtr; +#endif + buffer_in_progress_push(); + TerminateBufferIO(buf_desc, false, (BM_VALID | BM_IS_TMP_BUF)); + + return BufferDescriptorGetBuffer(buf_desc); +} + +void make_exrto_file_directory() +{ + if (!IS_EXRTO_READ) { + return; + } + if (mkdir(EXRTO_FILE_DIR, S_IRWXU) < 0 && errno != EEXIST) { + ereport(ERROR, (errcode_for_file_access(), errmsg("could not create directory \"%s\": %m", EXRTO_FILE_DIR))); + } + + char sub_dir[EXRTO_FILE_PATH_LEN]; + errno_t rc = EOK; + for (ExRTOFileType type = BASE_PAGE; type <= BLOCK_INFO_META; type = static_cast(type + 1)) { + rc = snprintf_s(sub_dir, EXRTO_FILE_PATH_LEN, EXRTO_FILE_PATH_LEN - 1, "%s/%s", EXRTO_FILE_DIR, + EXRTO_FILE_SUB_DIR[type]); + securec_check_ss(rc, "\0", "\0"); + if (mkdir(sub_dir, S_IRWXU) < 0 && errno != EEXIST) { + ereport(ERROR, (errcode_for_file_access(), errmsg("could not create directory \"%s\": %m", sub_dir))); + } + } +} + +void exrto_clean_dir(void) +{ + int ret = 0; + ereport(LOG, (errmsg("exrto_clean_dir: start to clean dir."))); + if (!isDirExist(EXRTO_FILE_DIR)) { + return; + } + + if (!isDirExist(EXRTO_OLD_FILE_DIR)) { + ereport(LOG, (errmsg("exrto_clean_dir: rename standby_read to standby_read_old."))); + ret = rename(EXRTO_FILE_DIR, EXRTO_OLD_FILE_DIR); + if (ret != 0) { + ereport(ERROR, (errcode_for_file_access(), + errmsg("failed to rename exrto standby_read dir: %s\n", EXRTO_FILE_DIR))); + return; + } + } else { + ereport(LOG, (errmsg("exrto_clean_dir: remove standby_read."))); + if (!rmtree(EXRTO_FILE_DIR, true)) { + ereport(WARNING, (errcode_for_file_access(), + errmsg("could not remove exrto standby_read dir: %s\n", EXRTO_FILE_DIR))); + } + } +} + +/* This function will be attached to the recycle thread */ +void exrto_recycle_old_dir(void) +{ + if (!rmtree(EXRTO_OLD_FILE_DIR, true)) { + ereport(WARNING, (errcode_for_file_access(), + errmsg("could not remove exrto standby_read_old dir: %s\n", EXRTO_OLD_FILE_DIR))); + } +} + +void exrto_standby_read_init() +{ + exrto_clean_dir(); + if (IS_EXRTO_READ) { + make_exrto_file_directory(); + } +} + +Datum gs_hot_standby_space_info(PG_FUNCTION_ARGS) +{ +#define EXRTO_HOT_STANDBY_SPACE_INFO_INFONUM 6 + Datum values[EXRTO_HOT_STANDBY_SPACE_INFO_INFONUM]; + errno_t rc; + bool nulls[EXRTO_HOT_STANDBY_SPACE_INFO_INFONUM]; + HeapTuple tuple = NULL; + TupleDesc tupdesc = NULL; + uint64 lsn_file_size = 0; + uint64 lsn_file_num = 0; + uint64 basepage_file_size = 0; + uint64 basepage_file_num = 0; + uint64 block_meta_file_size = 0; + uint64 block_meta_file_num = 0; + uint32 worker_nums; + + rc = memset_s(values, sizeof(values), 0, sizeof(values)); + securec_check(rc, "\0", "\0"); + + rc = memset_s(nulls, sizeof(nulls), 0, sizeof(nulls)); + securec_check(rc, "\0", "\0"); + + tupdesc = CreateTemplateTupleDesc(EXRTO_HOT_STANDBY_SPACE_INFO_INFONUM, false); + TupleDescInitEntry(tupdesc, (AttrNumber)ARG_1, "base_page_file_num", XIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber)ARG_2, "base_page_total_size", XIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber)ARG_3, "lsn_info_meta_file_num", XIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber)ARG_4, "lsn_info_meta_total_size", XIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber)ARG_5, "block_info_meta_file_num", XIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber)ARG_6, "block_info_meta_total_size", XIDOID, -1, 0); + + tupdesc = BlessTupleDesc(tupdesc); + + SpinLockAcquire(&(g_instance.comm_cxt.predo_cxt.destroy_lock)); + if (extreme_rto::g_dispatcher == NULL) { + worker_nums = 0; + } else { + worker_nums = extreme_rto::g_dispatcher->allWorkersCnt; + } + + for (uint32 i = 0; i < worker_nums; ++i) { + extreme_rto::PageRedoWorker* page_redo_worker = extreme_rto::g_dispatcher->allWorkers[i]; + if (page_redo_worker->role != extreme_rto::REDO_PAGE_WORKER) { + continue; + } + StandbyReadMetaInfo meta_info = page_redo_worker->standby_read_meta_info; + + uint64 lsn_file_size_per_thread = 0; + if (meta_info.lsn_table_next_position > meta_info.lsn_table_recyle_position) { + lsn_file_size_per_thread = meta_info.lsn_table_next_position - meta_info.lsn_table_recyle_position; + /* in 0~lsn_table_recyle_position No data is stored, + means the size of one lsn info file does not reach maxsize + eg:0~100KB(lsn_table_recyle_position), 100KB~(16M+100KB)(lsn_table_next_position), filenum:2, size:16M */ + lsn_file_num += meta_info.lsn_table_next_position / EXRTO_LSN_INFO_FILE_MAXSIZE + + ((meta_info.lsn_table_next_position % EXRTO_LSN_INFO_FILE_MAXSIZE) > 0 ? 1 : 0) - + (meta_info.lsn_table_recyle_position / EXRTO_LSN_INFO_FILE_MAXSIZE); + } + lsn_file_size += lsn_file_size_per_thread; + + uint64 basepage_file_size_per_thread = 0; + if (meta_info.base_page_next_position > meta_info.base_page_recyle_position) { + basepage_file_size_per_thread = meta_info.base_page_next_position - meta_info.base_page_recyle_position; + basepage_file_num += meta_info.base_page_next_position / EXRTO_BASE_PAGE_FILE_MAXSIZE + + ((meta_info.base_page_next_position % EXRTO_BASE_PAGE_FILE_MAXSIZE) > 0 ? 1 : 0) - + (meta_info.base_page_recyle_position / EXRTO_BASE_PAGE_FILE_MAXSIZE); + } + basepage_file_size += basepage_file_size_per_thread; + } + SpinLockRelease(&(g_instance.comm_cxt.predo_cxt.destroy_lock)); + + char block_meta_file_dir[EXRTO_FILE_PATH_LEN]; + char block_meta_file_name[EXRTO_FILE_PATH_LEN]; + struct dirent *de = NULL; + struct stat st; + + rc = snprintf_s(block_meta_file_dir, EXRTO_FILE_PATH_LEN, EXRTO_FILE_PATH_LEN - 1, "./%s/%s", + EXRTO_FILE_DIR, EXRTO_BLOCK_INFO_SUB_DIR); + securec_check_ss(rc, "\0", "\0"); + + DIR *dir = opendir(block_meta_file_dir); + while ((dir != NULL) && (de = gs_readdir(dir)) != NULL) { + if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) { + continue; + } + rc = snprintf_s(block_meta_file_name, EXRTO_FILE_PATH_LEN, EXRTO_FILE_PATH_LEN - 1, "%s/%s", + block_meta_file_dir, de->d_name); + securec_check_ss(rc, "\0", "\0"); + if (lstat(block_meta_file_name, &st) != 0) { + continue; + } + block_meta_file_num++; + block_meta_file_size = block_meta_file_size + (uint64)st.st_size; + } + + values[ARG_0] = TransactionIdGetDatum(basepage_file_num); + values[ARG_1] = TransactionIdGetDatum(basepage_file_size); + values[ARG_2] = TransactionIdGetDatum(lsn_file_num); + values[ARG_3] = TransactionIdGetDatum(lsn_file_size); + values[ARG_4] = TransactionIdGetDatum(block_meta_file_num); + values[ARG_5] = TransactionIdGetDatum(block_meta_file_size); + + tuple = heap_form_tuple(tupdesc, values, nulls); + PG_RETURN_DATUM(HeapTupleGetDatum(tuple)); +} + diff --git a/src/gausskernel/storage/access/transam/extreme_rto/Makefile b/src/gausskernel/storage/access/transam/extreme_rto/Makefile index 6b4b4968ec027b9c3970557afeab2372680004bf..06e25e75f8bdede59c7d1e269138e99f9f790763 100644 --- a/src/gausskernel/storage/access/transam/extreme_rto/Makefile +++ b/src/gausskernel/storage/access/transam/extreme_rto/Makefile @@ -26,6 +26,6 @@ top_builddir = ../../../../../.. include $(top_builddir)/src/Makefile.global OBJS = dispatcher.o page_redo.o posix_semaphore.o redo_item.o \ - spsc_blocking_queue.o txn_redo.o batch_redo.o xlog_read.o + spsc_blocking_queue.o txn_redo.o batch_redo.o xlog_read.o exrto_recycle.o include $(top_srcdir)/src/gausskernel/common.mk diff --git a/src/gausskernel/storage/access/transam/extreme_rto/batch_redo.cpp b/src/gausskernel/storage/access/transam/extreme_rto/batch_redo.cpp index e45dff0b6fc2bcc4fbc4c1e6c68d1aa18477beb1..c81eb9f5e3b0744dbcdd17999a7a660412511574 100644 --- a/src/gausskernel/storage/access/transam/extreme_rto/batch_redo.cpp +++ b/src/gausskernel/storage/access/transam/extreme_rto/batch_redo.cpp @@ -44,25 +44,6 @@ #include "access/xlogproc.h" namespace extreme_rto { -static inline void PRXLogRecGetBlockTag(XLogRecParseState *recordBlockState, RelFileNode *rnode, BlockNumber *blknum, - ForkNumber *forknum) -{ - XLogBlockParse *blockparse = &(recordBlockState->blockparse); - - if (rnode != NULL) { - rnode->dbNode = blockparse->blockhead.dbNode; - rnode->relNode = blockparse->blockhead.relNode; - rnode->spcNode = blockparse->blockhead.spcNode; - rnode->bucketNode = blockparse->blockhead.bucketNode; - rnode->opt = blockparse->blockhead.opt; - } - if (blknum != NULL) { - *blknum = blockparse->blockhead.blkno; - } - if (forknum != NULL) { - *forknum = blockparse->blockhead.forknum; - } -} void PRInitRedoItemEntry(RedoItemHashEntry *redoItemHashEntry) { diff --git a/src/gausskernel/storage/access/transam/extreme_rto/dispatcher.cpp b/src/gausskernel/storage/access/transam/extreme_rto/dispatcher.cpp index 9a1daeb2b7b9af5dae7b7e66e99992d0f7a39afe..61e853c84999f9cf6e5abcf296af26f3a992fabf 100755 --- a/src/gausskernel/storage/access/transam/extreme_rto/dispatcher.cpp +++ b/src/gausskernel/storage/access/transam/extreme_rto/dispatcher.cpp @@ -391,7 +391,7 @@ void SSAllocRecordReadBuffer(XLogReaderState *xlogreader, uint32 privateLen) #endif } -void HandleStartupInterruptsForExtremeRto() +void StartupInterruptsForExtremeRto() { Assert(AmStartupProcess()); @@ -400,7 +400,7 @@ void HandleStartupInterruptsForExtremeRto() uint32 triggeredstate = pg_atomic_read_u32(&(g_startupTriggerState)); if (triggeredstate != newtriggered) { ereport(LOG, (errmodule(MOD_REDO), errcode(ERRCODE_LOG), - errmsg("HandleStartupInterruptsForExtremeRto:g_startupTriggerState set from %u to %u", + errmsg("StartupInterruptsForExtremeRto:g_startupTriggerState set from %u to %u", triggeredstate, newtriggered))); pg_atomic_write_u32(&(g_startupTriggerState), newtriggered); } @@ -452,9 +452,15 @@ void StartRecoveryWorkers(XLogReaderState *xlogreader, uint32 privateLen) SpinLockAcquire(&(g_instance.comm_cxt.predo_cxt.rwlock)); g_instance.comm_cxt.predo_cxt.state = REDO_IN_PROGRESS; SpinLockRelease(&(g_instance.comm_cxt.predo_cxt.rwlock)); + + Assert(g_instance.pid_cxt.exrto_recycler_pid == 0); + if (g_instance.attr.attr_storage.EnableHotStandby) { + g_instance.pid_cxt.exrto_recycler_pid = initialize_util_thread(EXRTO_RECYCLER); + } + on_shmem_exit(StopRecoveryWorkers, 0); - g_dispatcher->oldStartupIntrruptFunc = RegisterRedoInterruptCallBack(HandleStartupInterruptsForExtremeRto); + g_dispatcher->oldStartupIntrruptFunc = RegisterRedoInterruptCallBack(StartupInterruptsForExtremeRto); close_readFile_if_open(); } @@ -563,6 +569,9 @@ static void StartPageRedoWorkers(uint32 totalThrdNum) for (uint32 j = 0; j < batchWorkerPerMng; j++) { RedoRoleInit(&(g_dispatcher->pageLines[i].redoThd[j]), tmpWorkers[workerCnt++], REDO_PAGE_WORKER, j, isUndoSpaceWorker); + // start from 1 not 0 + g_dispatcher->pageLines[i].redoThd[j]->standby_read_meta_info.batch_id = i + 1; + g_dispatcher->pageLines[i].redoThd[j]->standby_read_meta_info.redo_id = j + 1; } g_dispatcher->pageLines[i].redoThdNum = batchWorkerPerMng; } @@ -607,6 +616,10 @@ bool DispathCouldExit() } } + if (g_instance.pid_cxt.exrto_recycler_pid != 0) { + return false; + } + return true; } @@ -636,6 +649,17 @@ void SendSingalToPageWorker(int signal) } } +void send_signal_to_eros_recycle_worker(int signal) +{ + if (g_instance.pid_cxt.exrto_recycler_pid != 0) { + int err = gs_signal_send(g_instance.pid_cxt.exrto_recycler_pid, signal); + if (err != 0) { + ereport(WARNING, (errmsg("Dispatch kill(pid %lu, signal %d) failed: \"%s\",", + g_instance.pid_cxt.exrto_recycler_pid, signal, gs_strerror(err)))); + } + } +} + /* Run from the dispatcher thread. */ static void StopRecoveryWorkers(int code, Datum arg) { @@ -643,6 +667,7 @@ static void StopRecoveryWorkers(int code, Datum arg) errmsg("parallel redo workers are going to stop, code:%d, arg:%lu", code, DatumGetUInt64(arg)))); SendSingalToPageWorker(SIGTERM); + send_signal_to_eros_recycle_worker(SIGTERM); uint64 count = 0; while (!DispathCouldExit()) { @@ -1125,6 +1150,7 @@ static bool DispatchDataBaseRecord(XLogReaderState *record, List *expectedTLIs, if (IsDataBaseDrop(record)) { isNeedFullSync = true; + record->isFullSync = true; RedoItem *item = GetRedoItemPtr(record); ReferenceRedoItem(item); @@ -1132,10 +1158,18 @@ static bool DispatchDataBaseRecord(XLogReaderState *record, List *expectedTLIs, ReferenceRedoItem(item); AddPageRedoItem(g_dispatcher->pageLines[i].batchThd, item); } - DereferenceRedoItem(item); + AddTxnRedoItem(g_dispatcher->trxnLine.managerThd, item); } else { /* database dir may impact many rel so need to sync to all pageworks */ - DispatchRecordWithoutPage(record, expectedTLIs); + record->isFullSync = true; + RedoItem *item = GetRedoItemPtr(record); + + ReferenceRedoItem(item); + for (uint32 i = 0; i < g_dispatcher->pageLineNum; i++) { + ReferenceRedoItem(item); + AddPageRedoItem(g_dispatcher->pageLines[i].batchThd, item); + } + AddTxnRedoItem(g_dispatcher->trxnLine.managerThd, item); g_dispatcher->needFullSyncCheckpoint = true; } @@ -1911,6 +1945,7 @@ void SendRecoveryEndMarkToWorkersAndWaitForFinish(int code) errmsg("[REDO_LOG_TRACE]SendRecoveryEndMarkToWorkersAndWaitForFinish, ready to stop redo workers, code: %d", code))); if ((get_real_recovery_parallelism() > 1) && (GetBatchCount() > 0)) { + send_signal_to_eros_recycle_worker(SIGTERM); WaitPageRedoWorkerReachLastMark(g_dispatcher->readLine.readPageThd); PageRedoPipeline *pl = g_dispatcher->pageLines; /* send end mark */ diff --git a/src/gausskernel/storage/access/transam/extreme_rto/exrto_recycle.cpp b/src/gausskernel/storage/access/transam/extreme_rto/exrto_recycle.cpp new file mode 100644 index 0000000000000000000000000000000000000000..30f94c2a3daf91106e8ba70411ecfcad6b11bbf8 --- /dev/null +++ b/src/gausskernel/storage/access/transam/extreme_rto/exrto_recycle.cpp @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * exrto_recycle.cpp + * + * clean thread for standby read on block level page redo + * + * IDENTIFICATION + * src/gausskernel/storage/access/transam/extreme_rto/exrto_recycle.cpp + * + * ------------------------------------------------------------------------- + */ + +#include "access/extreme_rto/page_redo.h" +#include "access/extreme_rto/dispatcher.h" +#include "access/extreme_rto/standby_read/lsn_info_meta.h" +#include "access/multi_redo_api.h" +#include "storage/ipc.h" +#include "storage/smgr/smgr.h" +#include "utils/memutils.h" + +namespace extreme_rto { +static void exrto_recycle_sighup_handler(SIGNAL_ARGS) +{ + int save_errno = errno; + t_thrd.exrto_recycle_cxt.got_SIGHUP = true; + if (t_thrd.proc) + SetLatch(&t_thrd.proc->procLatch); + errno = save_errno; +} + +static void exrto_recycle_shutdown_handler(SIGNAL_ARGS) +{ + int save_errno = errno; + t_thrd.exrto_recycle_cxt.shutdown_requested = true; + if (t_thrd.proc) { + SetLatch(&t_thrd.proc->procLatch); + } + errno = save_errno; +} + +static void exrto_recycle_quick_die(SIGNAL_ARGS) +{ + int status = 2; + gs_signal_setmask(&t_thrd.libpq_cxt.BlockSig, NULL); + on_exit_reset(); + proc_exit(status); +} + +static void exrto_recycle_setup_signal_handlers() +{ + (void)gspqsignal(SIGHUP, exrto_recycle_sighup_handler); + (void)gspqsignal(SIGINT, SIG_IGN); + (void)gspqsignal(SIGTERM, exrto_recycle_shutdown_handler); + (void)gspqsignal(SIGQUIT, exrto_recycle_quick_die); + (void)gspqsignal(SIGALRM, SIG_IGN); + (void)gspqsignal(SIGPIPE, SIG_IGN); + (void)gspqsignal(SIGUSR1, SIG_IGN); + (void)gspqsignal(SIGUSR2, SIG_IGN); + (void)gspqsignal(SIGCHLD, SIG_IGN); + (void)gspqsignal(SIGTTIN, SIG_IGN); + (void)gspqsignal(SIGTTOU, SIG_IGN); + (void)gspqsignal(SIGCONT, SIG_IGN); + (void)gspqsignal(SIGWINCH, SIG_IGN); + + gs_signal_setmask(&t_thrd.libpq_cxt.UnBlockSig, NULL); + (void)gs_signal_unblock_sigusr2(); +} + +static void handle_exrto_recycle_shutdown() +{ + ereport(LOG, (errmsg("exrto recycle exit for request"))); + ResourceOwnerRelease(t_thrd.utils_cxt.CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true); + proc_exit(0); +} + +static void exrto_recycle_wait() +{ + int rc = 0; + rc = WaitLatch(&t_thrd.proc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, 1000L); /* 1s */ + /* Clear any already-pending wakeups */ + ResetLatch(&t_thrd.proc->procLatch); + if (((unsigned int)rc) & WL_POSTMASTER_DEATH) { + gs_thread_exit(1); + } +} + +bool check_if_need_force_recycle() +{ + uint32 worker_nums = g_dispatcher->allWorkersCnt; + PageRedoWorker** workers = g_dispatcher->allWorkers; + int64 total_base_page_size = 0; + int64 total_lsn_info_size = 0; + double ratio = g_instance.attr.attr_storage.standby_force_recyle_ratio; + + for (uint32 i = 0; i < worker_nums; ++i) { + PageRedoWorker* page_redo_worker = workers[i]; + StandbyReadMetaInfo meta_info = page_redo_worker->standby_read_meta_info; + if (page_redo_worker->role != REDO_PAGE_WORKER) { + continue; + } + total_base_page_size += (meta_info.base_page_next_position - meta_info.base_page_recyle_position); + total_lsn_info_size += (meta_info.lsn_table_next_position - meta_info.lsn_table_recyle_position); + } + + if (total_base_page_size > g_instance.attr.attr_storage.max_standby_base_page_size * ratio || + total_lsn_info_size > g_instance.attr.attr_storage.max_standby_lsn_info_size * ratio) { + return true; + } + + return false; +} + +void do_standby_read_recyle(XLogRecPtr recycle_lsn) +{ + uint32 worker_nums = g_dispatcher->allWorkersCnt; + PageRedoWorker** workers = g_dispatcher->allWorkers; + XLogRecPtr min_recycle_lsn = InvalidXLogRecPtr; + for (uint32 i = 0; i < worker_nums; ++i) { + PageRedoWorker* page_redo_worker = workers[i]; + if (page_redo_worker->role != REDO_PAGE_WORKER) { + continue; + } + extreme_rto_standby_read::standby_read_recyle_per_workers(&page_redo_worker->standby_read_meta_info, recycle_lsn); + if (XLogRecPtrIsInvalid(min_recycle_lsn) || + XLByteLT(page_redo_worker->standby_read_meta_info.recycle_lsn_per_worker, min_recycle_lsn)) { + min_recycle_lsn = page_redo_worker->standby_read_meta_info.recycle_lsn_per_worker; + } + } + if (XLByteLT(g_instance.comm_cxt.predo_cxt.global_recycle_lsn, min_recycle_lsn)) { + pg_atomic_write_u64(&g_instance.comm_cxt.predo_cxt.global_recycle_lsn, min_recycle_lsn); + ereport(LOG, + (errmsg(EXRTOFORMAT("[exrto_recycle] update global recycle lsn: %08X/%08X"), + (uint32)(min_recycle_lsn >> UINT64_HALF), (uint32)min_recycle_lsn))); + } +} + +void exrto_recycle_interrupt() +{ + if (t_thrd.exrto_recycle_cxt.got_SIGHUP) { + t_thrd.exrto_recycle_cxt.got_SIGHUP = false; + ProcessConfigFile(PGC_SIGHUP); + } + + if (t_thrd.exrto_recycle_cxt.shutdown_requested) { + handle_exrto_recycle_shutdown(); + } +} + +void exrto_recycle_main() +{ + t_thrd.utils_cxt.CurrentResourceOwner = ResourceOwnerCreate(NULL, "exrto recycler", + THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE)); + MemoryContext exrto_recycle_context = AllocSetContextCreate(t_thrd.top_mem_cxt, + "Exrto Recycler", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + (void)MemoryContextSwitchTo(exrto_recycle_context); + + ereport(LOG, (errmsg("exrto recycle started"))); + exrto_recycle_setup_signal_handlers(); + + /* + * Unblock signals (they were blocked when the postmaster forked us) + */ + gs_signal_setmask(&t_thrd.libpq_cxt.UnBlockSig, NULL); + (void)gs_signal_unblock_sigusr2(); + + pgstat_report_appname("exrto recycler"); + pgstat_report_activity(STATE_IDLE, NULL); + + bool need_force_recyle = false; + int sleep_count = 0; + RegisterRedoInterruptCallBack(exrto_recycle_interrupt); + + if (pmState == PM_RUN && isDirExist(EXRTO_FILE_DIR)) { + buffer_drop_exrto_standby_read_buffers(); + exrto_clean_dir(); + } + if (isDirExist(EXRTO_OLD_FILE_DIR)) { + exrto_recycle_old_dir(); + ereport(LOG, (errmsg("exrto recycle: clear standby_read_old dir success"))); + } else { + ereport(LOG, (errmsg("exrto recycle: standby_read_old dir not exist"))); + } + + if (!IS_EXRTO_READ || !RecoveryInProgress()) { + ereport(LOG, + (errmsg("exrto recycle is available only when exrto standby read is supported"))); + handle_exrto_recycle_shutdown(); + } + while (true) { + RedoInterruptCallBack(); + exrto_recycle_wait(); + ++sleep_count; + + /* + * standby_recycle_interval = 0 means do not recyle + */ + if (g_instance.attr.attr_storage.standby_recycle_interval == 0) { + continue; + } + + need_force_recyle = check_if_need_force_recycle(); + if (!need_force_recyle && sleep_count < g_instance.attr.attr_storage.standby_recycle_interval) { + continue; + } + + sleep_count = 0; + + XLogRecPtr recycle_lsn = exrto_calculate_recycle_position(need_force_recyle); + if (XLogRecPtrIsInvalid(recycle_lsn)) { + continue; + } + + do_standby_read_recyle(recycle_lsn); + smgrcloseall(); + MemoryContextResetAndDeleteChildren(exrto_recycle_context); + } + handle_exrto_recycle_shutdown(); +} +} /* namespace extreme_rto */ \ No newline at end of file diff --git a/src/gausskernel/storage/access/transam/extreme_rto/page_redo.cpp b/src/gausskernel/storage/access/transam/extreme_rto/page_redo.cpp index e2b656aaa1bbbc2b11d4d7507f3476ad6a95a1aa..24f8a54f046ed0e95e27e9420075e7bacf551b4d 100755 --- a/src/gausskernel/storage/access/transam/extreme_rto/page_redo.cpp +++ b/src/gausskernel/storage/access/transam/extreme_rto/page_redo.cpp @@ -51,6 +51,7 @@ #include "storage/smgr/relfilenode_hash.h" #include "storage/standby.h" #include "storage/pmsignal.h" +#include "storage/procarray.h" #include "utils/guc.h" #include "utils/palloc.h" #include "portability/instr_time.h" @@ -63,6 +64,7 @@ #include "commands/tablespace.h" #include "access/extreme_rto/page_redo.h" #include "access/extreme_rto/dispatcher.h" +#include "access/extreme_rto/standby_read/lsn_info_meta.h" #include "access/extreme_rto/txn_redo.h" #include "access/extreme_rto/xlog_read.h" #include "pgstat.h" @@ -183,6 +185,9 @@ void RedoWorkerQueueCallBack() bool RedoWorkerIsUndoSpaceWorker() { + if (g_redoWorker == NULL) { + return false; + } return g_redoWorker->isUndoSpaceWorker; } @@ -562,8 +567,6 @@ bool BatchRedoDistributeItems(void **eleArry, uint32 eleNum) BatchRedoProcLsnForwarder((RedoItem *)eleArry[i]); } else if (eleArry[i] == (void *)&g_cleanupMark) { BatchRedoProcCleanupMark((RedoItem *)eleArry[i]); - } else if (eleArry[i] == (void *)&g_closefdMark) { - smgrcloseall(); } else if (eleArry[i] == (void *)&g_cleanInvalidPageMark) { forget_range_invalid_pages((void *)eleArry[i]); } else { @@ -639,26 +642,21 @@ void RedoPageManagerDistributeToAllOneBlock(XLogRecParseState *ddlParseState) } } -void RedoPageManagerDistributeBlockRecord(HTAB *redoItemHash, XLogRecParseState *parsestate) +void RedoPageManagerDistributeBlockRecord(XLogRecParseState *record_block_state) { PageRedoPipeline *myRedoLine = &g_dispatcher->pageLines[g_redoWorker->slotId]; const uint32 WorkerNumPerMng = myRedoLine->redoThdNum; - HASH_SEQ_STATUS status; - RedoItemHashEntry *redoItemEntry = NULL; - HTAB *curMap = redoItemHash; - hash_seq_init(&status, curMap); - - while ((redoItemEntry = (RedoItemHashEntry *)hash_seq_search(&status)) != NULL) { - uint32 workId = GetWorkerId(&redoItemEntry->redoItemTag, WorkerNumPerMng); - AddPageRedoItem(myRedoLine->redoThd[workId], redoItemEntry->head); + uint32 work_id; + RelFileNode rel_node; + ForkNumber fork_num; + BlockNumber blk_no; + RedoItemTag redo_item_tag; - if (hash_search(curMap, (void *)&redoItemEntry->redoItemTag, HASH_REMOVE, NULL) == NULL) - ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("hash table corrupted"))); - } - - if (parsestate != NULL) { - RedoPageManagerDistributeToAllOneBlock(parsestate); - } + PRXLogRecGetBlockTag(record_block_state, &rel_node, &blk_no, &fork_num); + INIT_REDO_ITEM_TAG(redo_item_tag, rel_node, fork_num, blk_no); + work_id = GetWorkerId(&redo_item_tag, WorkerNumPerMng); + record_block_state->nextrecord = NULL; + AddPageRedoItem(myRedoLine->redoThd[work_id], record_block_state); } void WaitCurrentPipeLineRedoWorkersQueueEmpty() @@ -762,13 +760,24 @@ void RedoPageManagerSyncDdlAction(XLogRecParseState *parsestate) XLogBlockParseStateRelease(parsestate); } -void RedoPageManagerDoDropAction(XLogRecParseState *parsestate, HTAB *hashMap) +void RedoPageManagerDoDatabaseAction(XLogRecParseState *parsestate) { - XLogRecParseState *newState = XLogParseBufferCopy(parsestate); - PRTrackClearBlock(newState, hashMap); - RedoPageManagerDistributeBlockRecord(hashMap, parsestate); + RedoPageManagerDistributeToAllOneBlock(parsestate); WaitCurrentPipeLineRedoWorkersQueueEmpty(); - RedoPageManagerSyncDdlAction(parsestate); + RedoPageManagerSmgrClose(parsestate); + + bool need_wait = parsestate->isFullSync; + if (need_wait) { + pg_atomic_write_u32(&g_redoWorker->fullSyncFlag, 1); + } + parsestate->nextrecord = NULL; + XLogBlockParseStateRelease(parsestate); + + uint32 val = pg_atomic_read_u32(&g_redoWorker->fullSyncFlag); + while (val != 0) { + RedoInterruptCallBack(); + val = pg_atomic_read_u32(&g_redoWorker->fullSyncFlag); + } } void RedoPageManagerDoSmgrAction(XLogRecParseState *recordblockstate) @@ -790,16 +799,14 @@ void RedoPageManagerDoSmgrAction(XLogRecParseState *recordblockstate) XLogBlockParseStateRelease(recordblockstate); } -void RedoPageManagerDoDataTypeAction(XLogRecParseState *parsestate, HTAB *hashMap) +void RedoPageManagerDoDataTypeAction(XLogRecParseState *parsestate) { XLogBlockDdlParse *ddlrecparse = NULL; XLogBlockParseGetDdlParse(parsestate, ddlrecparse); if (ddlrecparse->blockddltype == BLOCK_DDL_DROP_RELNODE || ddlrecparse->blockddltype == BLOCK_DDL_TRUNCATE_RELNODE) { - XLogRecParseState *newState = XLogParseBufferCopy(parsestate); - PRTrackClearBlock(newState, hashMap); - RedoPageManagerDistributeBlockRecord(hashMap, parsestate); + RedoPageManagerDistributeToAllOneBlock(parsestate); WaitCurrentPipeLineRedoWorkersQueueEmpty(); } @@ -839,10 +846,10 @@ void PageManagerProcCleanupMark(RedoItem *cleanupMark) ereport(LOG, (errcode(ERRCODE_LOG), errmsg("[ForceFinish]PageManagerProcCleanupMark has cleaned InvalidPages"))); } -void PageManagerProcCheckPoint(HTAB *hashMap, XLogRecParseState *parseState) +void PageManagerProcCheckPoint(XLogRecParseState *parseState) { Assert(IsCheckPoint(parseState)); - RedoPageManagerDistributeBlockRecord(hashMap, parseState); + RedoPageManagerDistributeToAllOneBlock(parseState); bool needWait = parseState->isFullSync; if (needWait) { pg_atomic_write_u32(&g_redoWorker->fullSyncFlag, 1); @@ -865,9 +872,8 @@ void PageManagerProcCheckPoint(HTAB *hashMap, XLogRecParseState *parseState) } } -void PageManagerProcCreateTableSpace(HTAB *hashMap, XLogRecParseState *parseState) +void PageManagerProcCreateTableSpace(XLogRecParseState *parseState) { - RedoPageManagerDistributeBlockRecord(hashMap, NULL); bool needWait = parseState->isFullSync; if (needWait) { pg_atomic_write_u32(&g_redoWorker->fullSyncFlag, 1); @@ -881,16 +887,14 @@ void PageManagerProcCreateTableSpace(HTAB *hashMap, XLogRecParseState *parseStat } } -void PageManagerProcSegFullSyncState(HTAB *hashMap, XLogRecParseState *parseState) +void PageManagerProcSegFullSyncState(XLogRecParseState *parseState) { - RedoPageManagerDistributeBlockRecord(hashMap, NULL); WaitCurrentPipeLineRedoWorkersQueueEmpty(); RedoPageManagerSyncDdlAction(parseState); } -void PageManagerProcSegPipeLineSyncState(HTAB *hashMap, XLogRecParseState *parseState) +void PageManagerProcSegPipeLineSyncState(XLogRecParseState *parseState) { - RedoPageManagerDistributeBlockRecord(hashMap, NULL); WaitCurrentPipeLineRedoWorkersQueueEmpty(); MemoryContext oldCtx = MemoryContextSwitchTo(g_redoWorker->oldCtx); @@ -917,40 +921,38 @@ static void WaitNextBarrier(XLogRecParseState *parseState) void PageManagerRedoParseState(XLogRecParseState *preState) { - HTAB *hashMap = g_dispatcher->pageLines[g_redoWorker->slotId].managerThd->redoItemHash; - switch (preState->blockparse.blockhead.block_valid) { case BLOCK_DATA_MAIN_DATA_TYPE: case BLOCK_DATA_UNDO_TYPE: case BLOCK_DATA_VM_TYPE: case BLOCK_DATA_FSM_TYPE: GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_3]); - PRTrackAddBlock(preState, hashMap); + RedoPageManagerDistributeBlockRecord(preState); CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_3]); break; case BLOCK_DATA_DDL_TYPE: GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_4]); - RedoPageManagerDoDataTypeAction(preState, hashMap); + RedoPageManagerDoDataTypeAction(preState); CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_4]); break; case BLOCK_DATA_SEG_EXTEND: GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_4]); - PageManagerProcSegPipeLineSyncState(hashMap, preState); + PageManagerProcSegPipeLineSyncState(preState); CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_4]); break; + case BLOCK_DATA_CREATE_DATABASE_TYPE: case BLOCK_DATA_DROP_DATABASE_TYPE: GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_5]); - RedoPageManagerDoDropAction(preState, hashMap); + RedoPageManagerDoDatabaseAction(preState); CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_5]); break; case BLOCK_DATA_DROP_TBLSPC_TYPE: /* just make sure any other ddl before drop tblspc is done */ XLogBlockParseStateRelease(preState); break; - case BLOCK_DATA_CREATE_DATABASE_TYPE: case BLOCK_DATA_SEG_FILE_EXTEND_TYPE: GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_6]); - RedoPageManagerDistributeBlockRecord(hashMap, NULL); + RedoPageManagerDistributeBlockRecord(NULL); /* wait until queue empty */ WaitCurrentPipeLineRedoWorkersQueueEmpty(); /* do atcual action */ @@ -959,31 +961,30 @@ void PageManagerRedoParseState(XLogRecParseState *preState) break; case BLOCK_DATA_SEG_FULL_SYNC_TYPE: GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_8]); - PageManagerProcSegFullSyncState(hashMap, preState); + PageManagerProcSegFullSyncState(preState); CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_8]); break; case BLOCK_DATA_CREATE_TBLSPC_TYPE: GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_7]); - PageManagerProcCreateTableSpace(hashMap, preState); + PageManagerProcCreateTableSpace(preState); CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_7]); break; case BLOCK_DATA_XLOG_COMMON_TYPE: - PageManagerProcCheckPoint(hashMap, preState); + PageManagerProcCheckPoint(preState); break; case BLOCK_DATA_NEWCU_TYPE: - RedoPageManagerDistributeBlockRecord(hashMap, NULL); PageManagerDistributeBcmBlock(preState); break; case BLOCK_DATA_SEG_SPACE_DROP: case BLOCK_DATA_SEG_SPACE_SHRINK: GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_8]); - RedoPageManagerDistributeBlockRecord(hashMap, preState); + RedoPageManagerDistributeToAllOneBlock(preState); WaitCurrentPipeLineRedoWorkersQueueEmpty(); RedoPageManagerSyncDdlAction(preState); CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_8]); break; case BLOCK_DATA_BARRIER_TYPE: - RedoPageManagerDistributeBlockRecord(hashMap, preState); + RedoPageManagerDistributeToAllOneBlock(preState); WaitNextBarrier(preState); break; default: @@ -992,71 +993,60 @@ void PageManagerRedoParseState(XLogRecParseState *preState) } } -bool PageManagerRedoDistributeItems(void **eleArry, uint32 eleNum) +bool PageManagerRedoDistributeItems(XLogRecParseState *record_block_state) { - HTAB *hashMap = g_dispatcher->pageLines[g_redoWorker->slotId].managerThd->redoItemHash; + if (record_block_state == (void *)&g_redoEndMark) { + return true; + } else if (record_block_state == (void *)&g_GlobalLsnForwarder) { + PageManagerProcLsnForwarder((RedoItem *) record_block_state); + return false; + } else if (record_block_state == (void *)&g_cleanupMark) { + PageManagerProcCleanupMark((RedoItem *) record_block_state); + return false; + } else if (record_block_state == (void *)&g_cleanInvalidPageMark) { + forget_range_invalid_pages((void *)record_block_state); + return false; + } - for (uint32 i = 0; i < eleNum; i++) { - if (eleArry[i] == (void *)&g_redoEndMark) { - RedoPageManagerDistributeBlockRecord(hashMap, NULL); - return true; - } else if (eleArry[i] == (void *)&g_GlobalLsnForwarder) { - RedoPageManagerDistributeBlockRecord(hashMap, NULL); - PageManagerProcLsnForwarder((RedoItem *)eleArry[i]); - continue; - } else if (eleArry[i] == (void *)&g_cleanupMark) { - PageManagerProcCleanupMark((RedoItem *)eleArry[i]); - continue; - } else if (eleArry[i] == (void *)&g_closefdMark) { - smgrcloseall(); - continue; - } else if (eleArry[i] == (void *)&g_cleanInvalidPageMark) { - forget_range_invalid_pages((void *)eleArry[i]); - continue; - } - XLogRecParseState *recordblockstate = (XLogRecParseState *)eleArry[i]; - XLogRecParseState *nextState = recordblockstate; - do { - XLogRecParseState *preState = nextState; - nextState = (XLogRecParseState *)nextState->nextrecord; - preState->nextrecord = NULL; + XLogRecParseState *next_state = record_block_state; + do { + XLogRecParseState *pre_state = next_state; + next_state = (XLogRecParseState *)next_state->nextrecord; + pre_state->nextrecord = NULL; #ifdef ENABLE_UT - TestXLogRecParseStateEventProbe(UTEST_EVENT_RTO_PAGEMGR_REDO_BEFORE_DISTRIBUTE_ITEMS, - __FUNCTION__, preState); + TestXLogRecParseStateEventProbe(UTEST_EVENT_RTO_PAGEMGR_REDO_BEFORE_DISTRIBUTE_ITEMS, + __FUNCTION__, pre_state); #endif - PageManagerRedoParseState(preState); + PageManagerRedoParseState(pre_state); #ifdef ENABLE_UT - TestXLogRecParseStateEventProbe(UTEST_EVENT_RTO_PAGEMGR_REDO_AFTER_DISTRIBUTE_ITEMS, - __FUNCTION__, preState); + TestXLogRecParseStateEventProbe(UTEST_EVENT_RTO_PAGEMGR_REDO_AFTER_DISTRIBUTE_ITEMS, + __FUNCTION__, pre_state); #endif - } while (nextState != NULL); - } - GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_9]); - RedoPageManagerDistributeBlockRecord(hashMap, NULL); - CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_9]); + } while (next_state != NULL); + return false; } void RedoPageManagerMain() { - void **eleArry; - uint32 eleNum; + XLogRecParseState *record_block_state; + bool is_end; (void)RegisterRedoInterruptCallBack(HandlePageRedoInterrupts); - g_redoWorker->redoItemHash = PRRedoItemHashInitialize(g_redoWorker->oldCtx); XLogParseBufferInitFunc(&(g_redoWorker->parseManager), MAX_PARSE_BUFF_NUM, &recordRefOperate, RedoInterruptCallBack); GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_1]); - while (SPSCBlockingQueueGetAll(g_redoWorker->queue, &eleArry, &eleNum)) { - CountAndGetRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_1], g_redoWorker->timeCostList[TIME_COST_STEP_2]); - bool isEnd = PageManagerRedoDistributeItems(eleArry, eleNum); - SPSCBlockingQueuePopN(g_redoWorker->queue, eleNum); - CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_2]); - if (isEnd) - break; - + while (true) { + if (!SPSCBlockingQueueIsEmpty(g_redoWorker->queue)) { + record_block_state = (XLogRecParseState *)SPSCBlockingQueueTake(g_redoWorker->queue); + CountAndGetRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_1], g_redoWorker->timeCostList[TIME_COST_STEP_2]); + is_end = PageManagerRedoDistributeItems(record_block_state); + CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_2]); + if (is_end) + break; + } RedoInterruptCallBack(); ADD_ABNORMAL_POSITION(5); GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_1]); @@ -1105,7 +1095,8 @@ bool TrxnManagerDistributeItemsBeforeEnd(RedoItem *item) } else { GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_4]); if (IsCheckPoint(&item->record) || IsTableSpaceDrop(&item->record) || IsTableSpaceCreate(&item->record) || - (IsXactXlog(&item->record) && XactWillRemoveRelFiles(&item->record)) || IsBarrierRelated(&item->record)) { + (IsXactXlog(&item->record) && XactWillRemoveRelFiles(&item->record)) || IsBarrierRelated(&item->record) || + IsDataBaseDrop(&item->record) || IsDataBaseCreate(&item->record)) { uint32 relCount; do { RedoInterruptCallBack(); @@ -1200,6 +1191,11 @@ void TrxnManagerMain() void TrxnWorkerProcLsnForwarder(RedoItem *lsnForwarder) { SetCompletedReadEndPtr(g_redoWorker, lsnForwarder->record.ReadRecPtr, lsnForwarder->record.EndRecPtr); + uint32 refcout = pg_atomic_read_u32(&lsnForwarder->record.refcount); + while (refcout > 1) { + refcout = pg_atomic_read_u32(&lsnForwarder->record.refcount); + RedoInterruptCallBack(); + } (void)pg_atomic_sub_fetch_u32(&lsnForwarder->record.refcount, 1); } @@ -1258,13 +1254,11 @@ void TrxnWorkMain() if ((void *)item == (void *)&g_GlobalLsnForwarder) { TrxnWorkerProcLsnForwarder((RedoItem *)item); SPSCBlockingQueuePop(g_redoWorker->queue); - } else if ((void *)item == (void *)&g_cleanupMark) { + exrto_generate_snapshot(g_redoWorker->lastReplayedReadRecPtr); + } else if (unlikely((void *)item == (void *)&g_cleanupMark)) { TrxnWorkrProcCleanupMark((RedoItem *)item); SPSCBlockingQueuePop(g_redoWorker->queue); - } else if ((void *)item == (void *)&g_closefdMark) { - smgrcloseall(); - SPSCBlockingQueuePop(g_redoWorker->queue); - } else if ((void *)item == (void *)&g_cleanInvalidPageMark) { + } else if (unlikely((void *)item == (void *)&g_cleanInvalidPageMark)) { forget_range_invalid_pages((void *)item); SPSCBlockingQueuePop(g_redoWorker->queue); } else { @@ -1283,6 +1277,12 @@ void TrxnWorkMain() TrxnWorkNotifyRedoWorker(); } + if (IsCheckPoint(&item->record) || (IsXactXlog(&item->record) && + XactWillRemoveRelFiles(&item->record)) || IsBarrierRelated(&item->record) || + IsDataBaseDrop(&item->record)) { + exrto_generate_snapshot(g_redoWorker->lastReplayedEndRecPtr); + } + if (XactHasSegpageRelFiles(&item->record)) { uint32 expected = 1; pg_atomic_compare_exchange_u32((volatile uint32 *)&(g_dispatcher->segpageXactDoneFlag), &expected, 0); @@ -1415,12 +1415,6 @@ void RedoPageWorkerMain() continue; } - if ((void *)redoblockstateHead == (void *)&g_closefdMark) { - smgrcloseall(); - SPSCBlockingQueuePop(g_redoWorker->queue); - continue; - } - if ((void *)redoblockstateHead == (void *)&g_cleanInvalidPageMark) { forget_range_invalid_pages((void *)redoblockstateHead); SPSCBlockingQueuePop(g_redoWorker->queue); @@ -1761,26 +1755,26 @@ void DispatchCleanupMarkToAllRedoWorker() } } -void DispatchClosefdMarkToAllRedoWorker() +void DispatchCleanInvalidPageMarkToAllRedoWorker(RepairFileKey key) { for (uint32 i = 0; i < g_dispatcher->allWorkersCnt; i++) { PageRedoWorker *worker = g_dispatcher->allWorkers[i]; - if (worker->role == REDO_PAGE_WORKER || worker->role == REDO_PAGE_MNG || - worker->role == REDO_TRXN_MNG || worker->role == REDO_TRXN_WORKER) { - SPSCBlockingQueuePut(worker->queue, &g_closefdMark); + if (worker->role == REDO_PAGE_WORKER) { + errno_t rc = memcpy_s((char*)&g_cleanInvalidPageMark, + sizeof(RepairFileKey), (char*)&key, sizeof(RepairFileKey)); + securec_check(rc, "", ""); + SPSCBlockingQueuePut(worker->queue, &g_cleanInvalidPageMark); } } } -void DispatchCleanInvalidPageMarkToAllRedoWorker(RepairFileKey key) +void DispatchClosefdMarkToAllRedoWorker() { for (uint32 i = 0; i < g_dispatcher->allWorkersCnt; i++) { PageRedoWorker *worker = g_dispatcher->allWorkers[i]; - if (worker->role == REDO_PAGE_WORKER) { - errno_t rc = memcpy_s((char*)&g_cleanInvalidPageMark, - sizeof(RepairFileKey), (char*)&key, sizeof(RepairFileKey)); - securec_check(rc, "", ""); - SPSCBlockingQueuePut(worker->queue, &g_cleanInvalidPageMark); + if (worker->role == REDO_PAGE_WORKER || worker->role == REDO_PAGE_MNG || + worker->role == REDO_TRXN_MNG || worker->role == REDO_TRXN_WORKER) { + SPSCBlockingQueuePut(worker->queue, &g_closefdMark); } } } @@ -2492,6 +2486,7 @@ void ParallelRedoThreadMain() ParallelRedoThreadRegister(); ereport(LOG, (errmsg("Page-redo-worker thread %u started, role:%u, slotId:%u.", g_redoWorker->id, g_redoWorker->role, g_redoWorker->slotId))); + t_thrd.page_redo_cxt.redo_worker_ptr = g_redoWorker; // regitster default interrupt call back (void)RegisterRedoInterruptCallBack(HandlePageRedoInterrupts); SetupSignalHandlers(); @@ -3011,4 +3006,328 @@ void SeqCheckRemoteReadAndRepairPage() } } +void exrto_generate_snapshot(XLogRecPtr trxn_lsn) +{ + if (!g_instance.attr.attr_storage.EnableHotStandby) { + return; + } + + ExrtoSnapshot exrto_snapshot = &g_dispatcher->exrto_snapshot; + /* + * do not generate the same snapshot repeatedly. + */ + if (XLByteLE(trxn_lsn, exrto_snapshot->read_lsn)) { + return; + } + + if (XLogRecPtrIsInvalid(t_thrd.xlog_cxt.minRecoveryPoint)) { + return; + } + if (XLByteLT(trxn_lsn, exrto_snapshot->read_lsn)) { + return; + } + + SnapshotData snapshot; + + (void)GetSnapshotData(&snapshot, false); + + Assert(snapshot.takenDuringRecovery); + (void)LWLockAcquire(ExrtoSnapshotLock, LW_EXCLUSIVE); + exrto_snapshot->snapshot_csn = snapshot.snapshotcsn; + exrto_snapshot->xmin = snapshot.xmin; + exrto_snapshot->xmax = snapshot.xmax; + exrto_snapshot->read_lsn = trxn_lsn; + exrto_snapshot->gen_snap_time = GetCurrentTimestamp(); + LWLockRelease(ExrtoSnapshotLock); +} + +void exrto_read_snapshot(Snapshot snapshot) +{ + if (t_thrd.role != WORKER && t_thrd.role != THREADPOOL_WORKER) { + return; + } + + if (g_dispatcher == NULL) { + ereport(ERROR, + (errmsg("g_dispatcher is not init")));; + } + + ExrtoSnapshot exrto_snapshot = &g_dispatcher->exrto_snapshot; + (void)LWLockAcquire(ExrtoSnapshotLock, LW_SHARED); + if (XLByteEQ(exrto_snapshot->read_lsn, 0)) { + LWLockRelease(ExrtoSnapshotLock); + ereport(ERROR, + (errmsg("could not get a valid snapshot with extreme rto"))); + } + snapshot->snapshotcsn = exrto_snapshot->snapshot_csn; + snapshot->xmin = exrto_snapshot->xmin; + snapshot->xmax = exrto_snapshot->xmax; + + t_thrd.pgxact->xmin = exrto_snapshot->xmin; + t_thrd.proc->exrto_read_lsn = exrto_snapshot->read_lsn; + t_thrd.proc->exrto_gen_snap_time = exrto_snapshot->gen_snap_time; + u_sess->utils_cxt.TransactionXmin = exrto_snapshot->xmin; + u_sess->utils_cxt.exrto_read_lsn = exrto_snapshot->read_lsn; + + LWLockRelease(ExrtoSnapshotLock); + Assert(XLogRecPtrIsValid(t_thrd.proc->exrto_read_lsn)); +} + +static inline uint64 get_force_recycle_pos(uint64 recycle_pos, uint64 insert_pos) +{ + const double force_recyle_ratio = 0.3; /* to be adjusted */ + Assert(recycle_pos <= insert_pos); + return recycle_pos + (uint64)((insert_pos - recycle_pos) * force_recyle_ratio); +} + +XLogRecPtr calculate_force_recycle_lsn_per_worker(StandbyReadMetaInfo* meta_info) +{ + uint64 base_page_recycle_pos; + uint64 lsn_info_recycle_pos; + XLogRecPtr base_page_recycle_lsn = InvalidXLogRecPtr; + XLogRecPtr lsn_info_recycle_lsn = InvalidXLogRecPtr; + Buffer buffer; + Page page; + + /* for base page */ + if (meta_info->base_page_recyle_position < meta_info->base_page_next_position) { + base_page_recycle_pos = get_force_recycle_pos(meta_info->base_page_recyle_position, + meta_info->base_page_next_position); + buffer = extreme_rto_standby_read::buffer_read_base_page(meta_info->batch_id, meta_info->redo_id, + base_page_recycle_pos, RBM_NORMAL); + LockBuffer(buffer, BUFFER_LOCK_SHARE); + base_page_recycle_lsn = PageGetLSN(BufferGetPage(buffer)); + UnlockReleaseBuffer(buffer); + } + + /* for lsn info */ + if (meta_info->lsn_table_recyle_position < meta_info->lsn_table_next_position) { + lsn_info_recycle_pos = get_force_recycle_pos(meta_info->lsn_table_recyle_position, + meta_info->lsn_table_next_position); + page = extreme_rto_standby_read::get_lsn_info_page(meta_info->batch_id, meta_info->redo_id, + lsn_info_recycle_pos, RBM_NORMAL, &buffer); + if (unlikely(page == NULL || buffer == InvalidBuffer)) { + ereport(PANIC, + (errmsg(EXRTOFORMAT("get_lsn_info_page failed, batch_id: %u, redo_id: %u, pos: %lu"), + meta_info->batch_id, meta_info->redo_id, lsn_info_recycle_pos))); + } + LockBuffer(buffer, BUFFER_LOCK_SHARE); + extreme_rto_standby_read::LsnInfo lsn_info = (extreme_rto_standby_read::LsnInfo)(page + + extreme_rto_standby_read::LSN_INFO_HEAD_SIZE); + lsn_info_recycle_lsn = lsn_info->lsn[0]; + UnlockReleaseBuffer(buffer); + } + + return rtl::max(base_page_recycle_lsn, lsn_info_recycle_lsn); +} + +void calculate_force_recycle_lsn(XLogRecPtr &recycle_lsn) +{ + XLogRecPtr recycle_lsn_per_worker; + uint32 worker_nums = g_dispatcher->allWorkersCnt; + PageRedoWorker** workers = g_dispatcher->allWorkers; + + for (uint32 i = 0; i < worker_nums; ++i) { + PageRedoWorker* page_redo_worker = workers[i]; + if (page_redo_worker->role != REDO_PAGE_WORKER) { + continue; + } + recycle_lsn_per_worker = calculate_force_recycle_lsn_per_worker(&page_redo_worker->standby_read_meta_info); + if (XLByteLT(recycle_lsn, recycle_lsn_per_worker)) { + recycle_lsn = recycle_lsn_per_worker; + } + } + ereport(LOG, + (errmsg(EXRTOFORMAT("[exrto_recycle] try force recycle, recycle lsn: %08X/%08X"), + (uint32)(recycle_lsn >> UINT64_HALF), (uint32)recycle_lsn))); +} + +static inline bool exceed_standby_max_query_time(TimestampTz start_time) +{ + return TimestampDifferenceExceeds(start_time, GetCurrentTimestamp(), + g_instance.attr.attr_storage.standby_max_query_time * MSECS_PER_SEC); +} + +/* 1. resolve recycle conflict with backends + * 2. get oldest xmin and oldest readlsn of backends. */ +void proc_array_get_oldeset_readlsn(XLogRecPtr recycle_lsn, XLogRecPtr &oldest_lsn, TransactionId &oldest_xmin, + bool &conflict) +{ + ProcArrayStruct* proc_array = g_instance.proc_array_idx; + conflict = false; + + LWLockAcquire(ProcArrayLock, LW_SHARED); + for (int index = 0; index < proc_array->numProcs; index++) { + int pg_proc_no = proc_array->pgprocnos[index]; + PGPROC* pg_proc = g_instance.proc_base_all_procs[pg_proc_no]; + PGXACT* pg_xact = &g_instance.proc_base_all_xacts[pg_proc_no]; + XLogRecPtr read_lsn = pg_proc->exrto_read_lsn; + TransactionId pxmin = pg_xact->xmin; + + if (pg_proc->pid == 0 || !TransactionIdIsValid(pxmin) || XLogRecPtrIsInvalid(read_lsn)) { + continue; + } + + Assert(!(pg_xact->vacuumFlags & PROC_IN_VACUUM)); + /* + * Backend is doing logical decoding which manages xmin + * separately, check below. + */ + if (pg_xact->vacuumFlags & PROC_IN_LOGICAL_DECODING) { + continue; + } + + /* cancel query when its read_lsn < recycle_lsn or its runtime > standby_max_query_time */ + if (XLByteLT(read_lsn, recycle_lsn) || exceed_standby_max_query_time(pg_proc->exrto_gen_snap_time)) { + pg_proc->recoveryConflictPending = true; + conflict = true; + if (pg_proc->pid != 0) { + /* + * Kill the pid if it's still here. If not, that's what we + * wanted so ignore any errors. + */ + (void)SendProcSignal(pg_proc->pid, PROCSIG_RECOVERY_CONFLICT_SNAPSHOT, pg_proc->backendId); + /* + * Wait a little bit for it to die so that we avoid flooding + * an unresponsive backend when system is heavily loaded. + */ + pg_usleep(5000L); + } + continue; + } + + if (XLogRecPtrIsInvalid(oldest_lsn) || + (XLogRecPtrIsValid(read_lsn) && XLByteLT(read_lsn, oldest_lsn))) { + oldest_lsn = read_lsn; + } + + if (!TransactionIdIsValid(oldest_xmin) || + (TransactionIdIsValid(pxmin) && TransactionIdFollows(oldest_xmin, pxmin))) { + oldest_xmin = pxmin; + } + } + LWLockRelease(ProcArrayLock); +} + +void proc_array_get_oldeset_xmin_for_undo(TransactionId &oldest_xmin) +{ + ProcArrayStruct* proc_array = g_instance.proc_array_idx; + + LWLockAcquire(ProcArrayLock, LW_SHARED); + for (int index = 0; index < proc_array->numProcs; index++) { + int pg_proc_no = proc_array->pgprocnos[index]; + PGPROC* pg_proc = g_instance.proc_base_all_procs[pg_proc_no]; + PGXACT* pg_xact = &g_instance.proc_base_all_xacts[pg_proc_no]; + TransactionId pxmin = pg_xact->xmin; + + if (pg_proc->pid == 0 || !TransactionIdIsValid(pxmin)) { + continue; + } + + Assert(!(pg_xact->vacuumFlags & PROC_IN_VACUUM)); + /* + * Backend is doing logical decoding which manages xmin + * separately, check below. + */ + if (pg_xact->vacuumFlags & PROC_IN_LOGICAL_DECODING) { + continue; + } + if (!TransactionIdIsValid(oldest_xmin) || + (TransactionIdIsValid(pxmin) && TransactionIdFollows(oldest_xmin, pxmin))) { + oldest_xmin = pxmin; + } + } + LWLockRelease(ProcArrayLock); +} + +XLogRecPtr exrto_calculate_recycle_position(bool force_recyle) +{ + Assert(t_thrd.role != PAGEREDO); + Assert(IS_EXRTO_READ); + + XLogRecPtr recycle_lsn = g_instance.comm_cxt.predo_cxt.global_recycle_lsn; + XLogRecPtr oldest_lsn = InvalidXLogRecPtr; + TransactionId oldest_xmin = InvalidTransactionId; + bool conflict = false; + const int max_check_times = 1000; + int check_times = 0; + + if (force_recyle) { + calculate_force_recycle_lsn(recycle_lsn); + } + + /* Loop checks to avoid conflicting queries that were not successfully canceled. */ + do { + RedoInterruptCallBack(); + proc_array_get_oldeset_readlsn(recycle_lsn, oldest_lsn, oldest_xmin, conflict); + check_times++; + } while (conflict && check_times < max_check_times); + + /* + * If there is no backend read threads, set read oldest lsn to snapshot lsn. + */ + if (XLogRecPtrIsInvalid(oldest_lsn)) { + ExrtoSnapshot exrto_snapshot = NULL; + exrto_snapshot = &g_dispatcher->exrto_snapshot; + (void)LWLockAcquire(ExrtoSnapshotLock, LW_SHARED); + if (XLByteEQ(exrto_snapshot->read_lsn, 0)) { + ereport(WARNING, + (errmsg("could not get a valid snapshot with extreme rto"))); + } else { + oldest_lsn = exrto_snapshot->read_lsn; + oldest_xmin = exrto_snapshot->xmin; + } + + LWLockRelease(ExrtoSnapshotLock); + } + recycle_lsn = rtl::max(recycle_lsn, oldest_lsn); + + ereport( + LOG, + (errmsg( + EXRTOFORMAT( + "[exrto_recycle] calculate recycle position, oldestlsn: %08X/%08X, snapshot read_lsn: %08X/%08X, try " + "recycle lsn: %08X/%08X"), + (uint32)(oldest_lsn >> UINT64_HALF), (uint32)oldest_lsn, + (uint32)(g_dispatcher->exrto_snapshot.read_lsn >> UINT64_HALF), + (uint32)g_dispatcher->exrto_snapshot.read_lsn, (uint32)(recycle_lsn >> UINT64_HALF), (uint32)recycle_lsn))); + + return recycle_lsn; +} + +TransactionId exrto_calculate_recycle_xmin_for_undo() +{ + Assert(t_thrd.role != PAGEREDO); + Assert(IS_EXRTO_READ); + TransactionId oldest_xmin = InvalidTransactionId; + TransactionId snapshot_xmin = InvalidTransactionId; + proc_array_get_oldeset_xmin_for_undo(oldest_xmin); + + /* + * If there is no backend read threads, set read oldest lsn to snapshot lsn. + */ + if (oldest_xmin == InvalidTransactionId) { + ExrtoSnapshot exrto_snapshot = NULL; + exrto_snapshot = &g_dispatcher->exrto_snapshot; + (void)LWLockAcquire(ExrtoSnapshotLock, LW_SHARED); + if (XLByteEQ(exrto_snapshot->xmin, InvalidTransactionId)) { + ereport( + WARNING, + (errmsg("exrto_calculate_recycle_xmin_for_undo: could not get a valid snapshot in exrto_snapshot"))); + } else { + snapshot_xmin = exrto_snapshot->xmin; + } + + LWLockRelease(ExrtoSnapshotLock); + } + ereport(DEBUG1, (errmodule(MOD_UNDO), + errmsg(UNDOFORMAT("exrto_calculate_recycle_xmin_for_undo: oldest_xmin: %lu, snapshot_xmin: %lu."), + oldest_xmin, snapshot_xmin))); + + if (oldest_xmin == InvalidTransactionId) { + return snapshot_xmin; + } + return oldest_xmin; +} + } // namespace extreme_rto \ No newline at end of file diff --git a/src/gausskernel/storage/access/transam/multi_redo_api.cpp b/src/gausskernel/storage/access/transam/multi_redo_api.cpp index 2d70a75a657454e7f3d5c183581f889438bdeace..7b2b564443b4ec49b1812f0695499a90c599cd2f 100644 --- a/src/gausskernel/storage/access/transam/multi_redo_api.cpp +++ b/src/gausskernel/storage/access/transam/multi_redo_api.cpp @@ -66,9 +66,9 @@ void DispatchRedoRecord(XLogReaderState *record, List *expectedTLIs, TimestampTz g_instance.comm_cxt.localinfo_cxt.term_from_xlog = term; } - long readbufcountbefore = u_sess->instr_cxt.pg_buffer_usage->local_blks_read; + long readbufcountbefore = u_sess->instr_cxt.pg_buffer_usage->shared_blks_read; ApplyRedoRecord(record); - record->readblocks = u_sess->instr_cxt.pg_buffer_usage->local_blks_read - readbufcountbefore; + record->readblocks = u_sess->instr_cxt.pg_buffer_usage->shared_blks_read - readbufcountbefore; CountXLogNumbers(record); if (XLogRecGetRmid(record) == RM_XACT_ID) SetLatestXTime(recordXTime); @@ -134,6 +134,10 @@ bool IsAllPageWorkerExit() } g_instance.comm_cxt.predo_cxt.totalNum = 0; } + + if (g_instance.pid_cxt.exrto_recycler_pid != 0) { + return false; + } ereport(LOG, (errmodule(MOD_REDO), errcode(ERRCODE_LOG), errmsg("page workers all exit or not open parallel redo"))); diff --git a/src/gausskernel/storage/access/transam/xact.cpp b/src/gausskernel/storage/access/transam/xact.cpp index aae577d6eb055083ecff5363c9cdbeee689c7c49..68e632abbe99ade5a7462589a01a3d2c98f95847 100755 --- a/src/gausskernel/storage/access/transam/xact.cpp +++ b/src/gausskernel/storage/access/transam/xact.cpp @@ -46,6 +46,7 @@ #include "access/xloginsert.h" #include "access/xlogutils.h" #include "access/multi_redo_api.h" +#include "access/extreme_rto/standby_read/block_info_meta.h" #include "catalog/catalog.h" #include "catalog/namespace.h" #include "catalog/pg_authid.h" @@ -7206,6 +7207,15 @@ static void unlink_relfiles(_in_ ColFileNode *xnodes, _in_ int nrels) smgrdounlink(srel, true); smgrclose(srel); + /* + * recycle exrto files when dropping table occurs. + */ + if (IS_EXRTO_READ) { + RelFileNode block_meta_file = relFileNode; + block_meta_file.spcNode = EXRTO_BLOCK_INFO_SPACE_OID; + extreme_rto_standby_read::remove_one_block_info_file(block_meta_file); + } + UnlockRelFileNode(relFileNode, AccessExclusiveLock); /* diff --git a/src/gausskernel/storage/access/transam/xlog.cpp b/src/gausskernel/storage/access/transam/xlog.cpp index 3a85240b58eb8502e359921f9d7f93b95124b018..28fe664273eeb4d0b4867f2a0594ce97997cb657 100755 --- a/src/gausskernel/storage/access/transam/xlog.cpp +++ b/src/gausskernel/storage/access/transam/xlog.cpp @@ -38,6 +38,7 @@ #include "access/double_write.h" #include "access/heapam.h" #include "access/multixact.h" +#include "access/multi_redo_api.h" #include "access/rewriteheap.h" #include "access/subtrans.h" #include "access/transam.h" @@ -10424,6 +10425,12 @@ void StartupXLOG(void) } else { Insert->fullPageWrites = t_thrd.xlog_cxt.lastFullPageWrites; } + + if (IS_EXRTO_READ) { + /* we are going to be master, we need to recycle residual_undo_file again */ + g_instance.undo_cxt.is_exrto_residual_undo_file_recycled = false; + } + LocalSetXLogInsertAllowed(); UpdateFullPageWrites(); t_thrd.xlog_cxt.LocalXLogInsertAllowed = -1; @@ -13182,6 +13189,16 @@ static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo, XLogRecPtr curIns segno = mainStandbySegNo; } } + + if (IS_EXRTO_READ) { + XLogRecPtr recycle_recptr = pg_atomic_read_u64(&g_instance.comm_cxt.predo_cxt.global_recycle_lsn); + XLogSegNo recyle_segno; + XLByteToSeg(recycle_recptr, recyle_segno); + if (recyle_segno < segno && recyle_segno > 0) { + segno = recyle_segno; + } + } + /* don't delete WAL segments newer than the calculated segment */ if (segno < *logSegNo && segno > 0) { *logSegNo = segno; @@ -16019,8 +16036,8 @@ void SetXLogReplayRecPtr(XLogRecPtr readRecPtr, XLogRecPtr endRecPtr) SpinLockRelease(&xlogctl->info_lck); if (isUpdated) { RedoSpeedDiag(readRecPtr, endRecPtr); + update_dirty_page_queue_rec_lsn(readRecPtr); } - update_dirty_page_queue_rec_lsn(readRecPtr); #ifndef ENABLE_MULTIPLE_NODES if (g_instance.attr.attr_storage.dcf_attr.enable_dcf) { int ret = dcf_set_election_priority(1, endRecPtr); diff --git a/src/gausskernel/storage/access/transam/xlogfuncs.cpp b/src/gausskernel/storage/access/transam/xlogfuncs.cpp index 2636fcfae660efdeaea2d3ff46ddc759a4c1b197..9533951c6ef95377a30e389607fa4712938d7694 100755 --- a/src/gausskernel/storage/access/transam/xlogfuncs.cpp +++ b/src/gausskernel/storage/access/transam/xlogfuncs.cpp @@ -2046,6 +2046,23 @@ Datum gs_streaming_dr_in_switchover(PG_FUNCTION_ARGS) Datum gs_streaming_dr_service_truncation_check(PG_FUNCTION_ARGS) { #ifndef ENABLE_LITE_MODE + int dr_sender_num = 0; + + for (int i = 1; i < MAX_REPLNODE_NUM; i++) { + ReplConnInfo *replConnInfo = NULL; + replConnInfo = t_thrd.postmaster_cxt.ReplConnArray[i]; + + /* Number of DR replconninfo */ + if (replConnInfo != NULL && replConnInfo->isCrossRegion) { + dr_sender_num++; + } + } + if (IS_PGXC_COORDINATOR) { + g_instance.streaming_dr_cxt.hadrWalSndNum = dr_sender_num; + } else { + g_instance.streaming_dr_cxt.hadrWalSndNum = dr_sender_num > 0 ? 1 : 0; + } + for (int i = 0; i < g_instance.attr.attr_storage.max_wal_senders; i++) { /* use volatile pointer to prevent code rearrangement */ volatile WalSnd *walsnd = &t_thrd.walsender_cxt.WalSndCtl->walsnds[i]; @@ -2057,7 +2074,6 @@ Datum gs_streaming_dr_service_truncation_check(PG_FUNCTION_ARGS) SpinLockAcquire(&walsnd->mutex); if (walsnd->interactiveState == SDRS_DEFAULT) { walsnd->interactiveState = SDRS_INTERACTION_BEGIN; - g_instance.streaming_dr_cxt.hadrWalSndNum++; } SpinLockRelease(&walsnd->mutex); } diff --git a/src/gausskernel/storage/access/ustore/knl_uextremeredo.cpp b/src/gausskernel/storage/access/ustore/knl_uextremeredo.cpp index 6e70f321f1a729b17d8bf7fea1dade541ed2e000..9149870fe0c48809584715497f535acd5f32aace 100644 --- a/src/gausskernel/storage/access/ustore/knl_uextremeredo.cpp +++ b/src/gausskernel/storage/access/ustore/knl_uextremeredo.cpp @@ -1148,9 +1148,6 @@ void UHeapXlogFreezeTDOperatorPage(RedoBufferInfo *buffer, void *recorddata) UHeapPageTDData *tdPtr = (UHeapPageTDData *)PageGetTDPointer(page); TD *transinfo = tdPtr->td_info; - if (InHotStandby && TransactionIdIsValid(xlrec->latestFrozenXid)) - ResolveRecoveryConflictWithSnapshot(xlrec->latestFrozenXid, buffer->blockinfo.rnode, buffer->lsn); - UHeapFreezeOrInvalidateTuples(buffer->buf, nFrozen, frozenSlots, true); for (int i = 0; i < nFrozen; i++) { @@ -1520,12 +1517,6 @@ void UHeapRedoDataBlock(XLogBlockHead *blockhead, XLogBlockDataParse *blockdatar } } -#ifdef ENABLE_MULTIPLE_NODES -const static bool SUPPORT_HOT_STANDBY = false; /* don't support consistency view */ -#else -const static bool SUPPORT_HOT_STANDBY = true; -#endif - void UHeap2XlogFreezeOperatorPage(RedoBufferInfo *buffer, void *recorddata, void *blkdata, Size datalen) { XlUHeapFreeze *xlrec = (XlUHeapFreeze *)recorddata; @@ -1536,14 +1527,6 @@ void UHeap2XlogFreezeOperatorPage(RedoBufferInfo *buffer, void *recorddata, void OffsetNumber *offsetsEnd = NULL; UHeapTupleData utuple; - /* - * In Hot Standby mode, ensure that there's no queries running which still - * consider the frozen xids as running. - */ - if (InHotStandby && SUPPORT_HOT_STANDBY) { - ResolveRecoveryConflictWithSnapshot(cutoffXid, buffer->blockinfo.rnode, buffer->lsn); - } - if (datalen > 0) { offsetsEnd = (OffsetNumber *)((char *)offsets + datalen); @@ -2019,12 +2002,18 @@ static void RedoUndoDiscardBlock(XLogBlockHead *blockhead, XLogBlockUndoParse *b XLogRecPtr lsn = blockdatarec->undoDiscardParse.lsn; UndoZone *zone = UndoZoneGroup::GetUndoZone(zoneId); + ereport(DEBUG1, (errmodule(MOD_UNDO), errmsg(UNDOFORMAT( + "redo_undo_discard_block zid=%d, isZoneNull:%d, zone_lsn:%lu, lsn:%lu, end_slot:%lu, end_undo_ptr:%lu, " + "recycled_xid:%lu."), zoneId, (int)(zone == NULL), zone->GetLSN(), lsn, endSlot, endUndoPtr, recycledXid))); if (zone == NULL) { return; } if (zone->GetLSN() < lsn) { zone->LockUndoZone(); Assert(blockdatarec->undoDiscardParse.startSlot == zone->GetRecycleTSlotPtr()); + if (IS_EXRTO_READ && (!g_instance.undo_cxt.is_exrto_residual_undo_file_recycled)) { + zone->set_recycle_tslot_ptr_exrto(endSlot); + } zone->SetRecycleTSlotPtr(endSlot); zone->SetDiscardURecPtr(endUndoPtr); zone->SetForceDiscardURecPtr(endUndoPtr); @@ -2048,12 +2037,19 @@ static void RedoUndoUnlinkBlock(XLogBlockHead *blockhead, XLogBlockUndoParse *bl XLogRecPtr unlinkLsn = blockdatarec->undoUnlinkParse.unlinkLsn; UndoLogOffset newHead = blockdatarec->undoUnlinkParse.headOffset; UndoLogOffset head = usp->Head(); + ereport(DEBUG1, (errmodule(MOD_UNDO), errmsg(UNDOFORMAT( + "redo_undo_unlink_block, zid=%d, usp_lsn:%lu, unlink_lsn:%lu, head:%lu, new_head:%lu."), + zoneId, usp->LSN(), unlinkLsn, head, newHead))); if (usp->LSN() < unlinkLsn) { zone->ForgetUndoBuffer(head, newHead, UNDO_DB_OID); usp->LockSpace(); usp->MarkDirty(); - usp->UnlinkUndoLog(zoneId, newHead, UNDO_DB_OID); + if (IS_EXRTO_STANDBY_READ) { + usp->SetHead(newHead); + } else { + usp->UnlinkUndoLog(zoneId, newHead, UNDO_DB_OID); + } usp->SetLSN(unlinkLsn); usp->UnlockSpace(); } @@ -2071,12 +2067,19 @@ static void RedoSlotUnlinkBlock(XLogBlockHead *blockhead, XLogBlockUndoParse *bl XLogRecPtr unlinkLsn = blockdatarec->undoUnlinkParse.unlinkLsn; UndoLogOffset newHead = blockdatarec->undoUnlinkParse.headOffset; UndoLogOffset head = usp->Head(); + ereport(DEBUG1, (errmodule(MOD_UNDO), errmsg(UNDOFORMAT( + "redo_slot_unlink_block, zid=%d, usp_lsn:%lu, unlink_lsn:%lu, head:%lu, new_head:%lu."), + zoneId, usp->LSN(), unlinkLsn, head, newHead))); if (usp->LSN() < unlinkLsn) { zone->ForgetUndoBuffer(head, newHead, UNDO_SLOT_DB_OID); usp->LockSpace(); usp->MarkDirty(); - usp->UnlinkUndoLog(zoneId, newHead, UNDO_SLOT_DB_OID); + if (IS_EXRTO_STANDBY_READ) { + usp->SetHead(newHead); + } else { + usp->UnlinkUndoLog(zoneId, newHead, UNDO_SLOT_DB_OID); + } usp->SetLSN(unlinkLsn); usp->UnlockSpace(); } diff --git a/src/gausskernel/storage/access/ustore/undo/knl_uundoapi.cpp b/src/gausskernel/storage/access/ustore/undo/knl_uundoapi.cpp index bb72f6e2791d78127a9bc44da9fd2a913353d302..87acf77e598f4fa704e2bb1fe0df169ff16d8ace 100644 --- a/src/gausskernel/storage/access/ustore/undo/knl_uundoapi.cpp +++ b/src/gausskernel/storage/access/ustore/undo/knl_uundoapi.cpp @@ -22,6 +22,7 @@ #include "access/ustore/undo/knl_uundoxlog.h" #include "access/ustore/knl_whitebox_test.h" #include "access/transam.h" +#include "access/multi_redo_api.h" #include "catalog/pg_class.h" #include "knl/knl_session.h" #include "knl/knl_thread.h" @@ -281,9 +282,14 @@ UndoRecordState CheckUndoRecordValid(UndoRecPtr urp, bool checkForceRecycle, Tra UndoZone *uzone = UndoZoneGroup::GetUndoZone(zid, false); if (uzone == NULL) { return UNDO_RECORD_INVALID; - } else { - return uzone->CheckUndoRecordValid(UNDO_PTR_GET_OFFSET(urp), checkForceRecycle, lastXid); } + + if (IS_EXRTO_STANDBY_READ) { + return uzone->check_record_valid_exrto(UNDO_PTR_GET_OFFSET(urp), checkForceRecycle, lastXid); + } + + + return uzone->CheckUndoRecordValid(UNDO_PTR_GET_OFFSET(urp), checkForceRecycle, lastXid); } /* @@ -640,6 +646,7 @@ void RecoveryUndoSystemMeta(void) /* Close fd. */ close(fd); + ereport(LOG, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("recovery_meta: undo recovery finish."))); #endif } } diff --git a/src/gausskernel/storage/access/ustore/undo/knl_uundorecycle.cpp b/src/gausskernel/storage/access/ustore/undo/knl_uundorecycle.cpp index 75ac16a3205048e1fb5b0524462fb817845b489b..44debc23e594d6c458400b2b666d5a3dec673342 100755 --- a/src/gausskernel/storage/access/ustore/undo/knl_uundorecycle.cpp +++ b/src/gausskernel/storage/access/ustore/undo/knl_uundorecycle.cpp @@ -31,6 +31,8 @@ #include "access/ustore/undo/knl_uundoxlog.h" #include "access/ustore/knl_undorequest.h" #include "access/ustore/knl_whitebox_test.h" +#include "access/multi_redo_api.h" +#include "access/extreme_rto/page_redo.h" #include "gssignal/gs_signal.h" #include "knl/knl_thread.h" #include "storage/ipc.h" @@ -528,6 +530,162 @@ static void RecycleWaitIfNotUsed() } } +void exrto_standby_release_space(UndoZone *zone, TransactionId recycle_xid, UndoRecPtr start_undo_ptr, + UndoRecPtr end_undo_ptr, UndoSlotPtr recycle_exrto) +{ + UndoRecPtr oldest_end_undo_ptr = end_undo_ptr; + Assert(TransactionIdIsValid(recycle_xid) && (zone->get_recycle_xid_exrto() < recycle_xid)); + zone->LockUndoZone(); + if (!zone->CheckRecycle(start_undo_ptr, end_undo_ptr)) { + ereport(PANIC, (errmodule(MOD_UNDO), + errmsg(UNDOFORMAT("zone %d recycle start %lu >= recycle end %lu."), + zone->GetZoneId(), start_undo_ptr, end_undo_ptr))); + } + if (IS_VALID_UNDO_REC_PTR(oldest_end_undo_ptr)) { + int start_zid = UNDO_PTR_GET_ZONE_ID(start_undo_ptr); + int end_zid = UNDO_PTR_GET_ZONE_ID(oldest_end_undo_ptr); + if (unlikely(start_zid != end_zid)) { + oldest_end_undo_ptr = MAKE_UNDO_PTR(start_zid, UNDO_LOG_MAX_SIZE); + } + zone->set_discard_urec_ptr_exrto(oldest_end_undo_ptr); + } + + zone->set_recycle_xid_exrto(recycle_xid); + zone->set_force_discard_urec_ptr_exrto(end_undo_ptr); + zone->set_recycle_tslot_ptr_exrto(recycle_exrto); + zone->UnlockUndoZone(); + zone->ReleaseSpace(start_undo_ptr, end_undo_ptr, &g_forceRecycleSize); + zone->ReleaseSlotSpace(0, recycle_exrto, &g_forceRecycleSize); +} + +bool exrto_standby_recycle_space(UndoZone *zone, TransactionId recycle_xmin) +{ + UndoSlotPtr recycle_exrto = zone->get_recycle_tslot_ptr_exrto(); + UndoSlotPtr recycle_primary = zone->GetRecycleTSlotPtr(); + undo::TransactionSlot *slot = NULL; + UndoRecPtr end_undo_ptr = INVALID_UNDO_REC_PTR; + TransactionId recycle_xid = InvalidTransactionId; + bool undo_recycled = false; + bool result = false; + UndoSlotPtr start = INVALID_UNDO_SLOT_PTR; + ereport(DEBUG1, (errmodule(MOD_UNDO), + errmsg(UNDOFORMAT("exrto_standby_recycle_space zone_id:%d, recycle_xmin:%lu, recycle_exrto:%lu, " + "recycle_primary:%lu."), + zone->GetZoneId(), recycle_xmin, recycle_exrto, recycle_primary))); + + while (recycle_exrto < recycle_primary) { + UndoSlotBuffer& slot_buf = g_slotBufferCache->FetchTransactionBuffer(recycle_exrto); + UndoRecPtr start_undo_ptr = INVALID_UNDO_REC_PTR; + start = recycle_exrto; + slot_buf.PrepareTransactionSlot(recycle_exrto); + undo_recycled = false; + Assert(slot_buf.BufBlock() == UNDO_PTR_GET_BLOCK_NUM(recycle_exrto)); + while (slot_buf.BufBlock() == UNDO_PTR_GET_BLOCK_NUM(recycle_exrto) && (recycle_exrto < recycle_primary)) { + slot = slot_buf.FetchTransactionSlot(recycle_exrto); + if (!TransactionIdIsValid(slot->XactId())) { + break; + } + if (slot->StartUndoPtr() == INVALID_UNDO_REC_PTR) { + break; + } + + if (TransactionIdFollowsOrEquals(slot->XactId(), recycle_xmin)) { + break; + } + ereport(DEBUG1, (errmodule(MOD_UNDO), + errmsg(UNDOFORMAT("recycle zone %d, exrto transaction slot %lu xid %lu start ptr %lu end ptr %lu."), + zone->GetZoneId(), recycle_exrto, slot->XactId(), + slot->StartUndoPtr(), slot->EndUndoPtr()))); + if (!start_undo_ptr) { + start_undo_ptr = slot->StartUndoPtr(); + } + end_undo_ptr = slot->EndUndoPtr(); + recycle_xid = slot->XactId(); + undo_recycled = true; + recycle_exrto = GetNextSlotPtr(recycle_exrto); + /* if next recycle_exrto is in different slot_buf, release current slot_buf. */ + if (slot_buf.BufBlock() != UNDO_PTR_GET_BLOCK_NUM(recycle_exrto)) { + g_slotBufferCache->RemoveSlotBuffer(start); + slot_buf.Release(); + } + } + if (undo_recycled) { + exrto_standby_release_space(zone, recycle_xid, start_undo_ptr, end_undo_ptr, recycle_exrto); + result = true; + } else { + /* zone has nothing to recycle. */ + break; + } + } + return result; +} + +bool exrto_standby_recycle_undo_zone() +{ + uint32 idx = 0; + bool recycled = false; + if (g_instance.undo_cxt.uZoneCount == 0 || g_instance.undo_cxt.uZones == NULL) { + return recycled; + } + TransactionId recycle_xmin = extreme_rto::exrto_calculate_recycle_xmin_for_undo(); + for (idx = 0; idx < PERSIST_ZONE_COUNT && !t_thrd.undorecycler_cxt.shutdown_requested; idx++) { + UndoZone *zone = (UndoZone *)g_instance.undo_cxt.uZones[idx]; + if (zone == NULL) { + continue; + } + if (zone->Used_exrto()) { + if (exrto_standby_recycle_space(zone, recycle_xmin)) { + recycled = true; + } + } + } + smgrcloseall(); + return recycled; +} + +/* recycle residual_undo_file which may be leftover by exrto read in standby */ +void exrto_recycle_residual_undo_file() +{ + uint32 idx = 0; + uint64 record_file_cnt = 0; + uint64 slot_file_cnt = 0; + if (g_instance.undo_cxt.is_exrto_residual_undo_file_recycled) { + return; + } + ereport(LOG, (errmodule(MOD_UNDO), + errmsg(UNDOFORMAT("exrto_recycle_residual_undo_file begin uZoneCount is %u."), + g_instance.undo_cxt.uZoneCount))); + if (g_instance.undo_cxt.uZoneCount == 0 || g_instance.undo_cxt.uZones == NULL) { + g_instance.undo_cxt.is_exrto_residual_undo_file_recycled = true; + ereport(LOG, (errmodule(MOD_UNDO), + errmsg(UNDOFORMAT("exrto_recycle_residual_undo_file uZoneCount is zero or uZones is null.")))); + return; + } + for (idx = 0; idx < PERSIST_ZONE_COUNT && !t_thrd.undorecycler_cxt.shutdown_requested; idx++) { + UndoZone *zone = (UndoZone *)g_instance.undo_cxt.uZones[idx]; + if (zone == NULL) { + continue; + } + record_file_cnt += zone->release_residual_record_space(); + slot_file_cnt += zone->release_residual_slot_space(); + } + smgrcloseall(); + ereport(LOG, (errmodule(MOD_UNDO), + errmsg(UNDOFORMAT("exrto_recycle_residual_undo_file release record_file_cnt:%lu, " + "slot_file_cnt:%lu."), record_file_cnt, slot_file_cnt))); + g_instance.undo_cxt.is_exrto_residual_undo_file_recycled = true; +} + +void recycle_wait(bool recycled, uint64 *non_recycled) +{ + if (!recycled) { + *non_recycled += UNDO_RECYCLE_TIMEOUT_DELTA; + WaitRecycleThread(*non_recycled); + } else { + *non_recycled = 0; + } +} + void UndoRecycleMain() { sigjmp_buf localSigjmpBuf; @@ -646,6 +804,10 @@ void UndoRecycleMain() t_thrd.undorecycler_cxt.got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); } + if (t_thrd.undorecycler_cxt.shutdown_requested) { + ShutDownRecycle(recycleMaxXIDs); + } + exrto_recycle_residual_undo_file(); if (!RecoveryInProgress()) { TransactionId recycleXmin = InvalidTransactionId; TransactionId oldestXmin = GetOldestXminForUndo(&recycleXmin); @@ -751,15 +913,10 @@ void UndoRecycleMain() pg_atomic_write_u64(&g_instance.undo_cxt.globalRecycleXid, oldestXidHavingUndo); } } - if (!recycled) { - nonRecycled += UNDO_RECYCLE_TIMEOUT_DELTA; - WaitRecycleThread(nonRecycled); - } else { - nonRecycled = 0; - } - } else { - WaitRecycleThread(nonRecycled); + } else if (IS_EXRTO_STANDBY_READ) { + recycled = exrto_standby_recycle_undo_zone(); } + recycle_wait(recycled, &nonRecycled); } ShutDownRecycle(recycleMaxXIDs); } diff --git a/src/gausskernel/storage/access/ustore/undo/knl_uundospace.cpp b/src/gausskernel/storage/access/ustore/undo/knl_uundospace.cpp index 24a29e01e10769a408ef007f95ff77aa63613c29..3d60a8a4d75a315459e3563215173df61eb59e74 100644 --- a/src/gausskernel/storage/access/ustore/undo/knl_uundospace.cpp +++ b/src/gausskernel/storage/access/ustore/undo/knl_uundospace.cpp @@ -20,6 +20,7 @@ #include "access/ustore/knl_whitebox_test.h" #include "storage/lock/lwlock.h" #include "storage/smgr/smgr.h" +#include "access/multi_redo_api.h" namespace undo { static uint64 USEG_SIZE(uint32 dbId) @@ -49,6 +50,26 @@ uint32 UndoSpace::Used(void) return (uint32)((tail_ - head_) / BLCKSZ); } +UndoLogOffset UndoSpace::find_oldest_offset(int zid, uint32 db_id) const +{ + UndoLogOffset offset = head_; + BlockNumber blockno; + RelFileNode rnode; + UNDO_PTR_ASSIGN_REL_FILE_NODE(rnode, MAKE_UNDO_PTR(zid, offset), db_id); + SMgrRelation reln = smgropen(rnode, InvalidBackendId); + uint64 seg_size = USEG_SIZE(db_id); + while (offset >=seg_size) { + offset -= seg_size; + blockno = (BlockNumber)(offset / BLCKSZ); + if (!smgrexists(reln, MAIN_FORKNUM, blockno)) { + offset += seg_size; + break; + } + } + smgrclose(reln); + return offset; +} + /* Create segments needed to increase end_ to newEnd. */ void UndoSpace::ExtendUndoLog(int zid, UndoLogOffset offset, uint32 dbId) { @@ -91,7 +112,17 @@ void UndoSpace::ExtendUndoLog(int zid, UndoLogOffset offset, uint32 dbId) void UndoSpace::UnlinkUndoLog(int zid, UndoLogOffset offset, uint32 dbId) { RelFileNode rnode; - UndoLogOffset head = head_; + UndoLogOffset head; + UndoLogOffset old_head; + if (IS_EXRTO_STANDBY_READ) { + head = head_exrto; + old_head = head_exrto; + set_head_exrto(offset); + } else { + head = head_; + old_head = head_; + SetHead(offset); + } Assert(head < offset && head_ <= tail_); UNDO_PTR_ASSIGN_REL_FILE_NODE(rnode, MAKE_UNDO_PTR(zid, offset), dbId); SMgrRelation reln = smgropen(rnode, InvalidBackendId); @@ -104,6 +135,9 @@ void UndoSpace::UnlinkUndoLog(int zid, UndoLogOffset offset, uint32 dbId) while (head < offset) { /* Create a new undo segment. */ smgrdounlink(reln, t_thrd.xlog_cxt.InRecovery, (head / BLCKSZ)); + ereport(DEBUG1, (errmodule(MOD_UNDO), errmsg(UNDOFORMAT( + "unlink undo log, zid=%d, dbid=%u, new_head=%lu, segId:%lu."), + zid, dbId, offset, head/segSize))); if (g_instance.undo_cxt.undoTotalSize < segBlocks) { ereport(PANIC, (errmodule(MOD_UNDO), errmsg(UNDOFORMAT( "unlink undo log, total blocks=%u < segment size."), @@ -114,8 +148,32 @@ void UndoSpace::UnlinkUndoLog(int zid, UndoLogOffset offset, uint32 dbId) } smgrclose(reln); ereport(DEBUG1, (errmodule(MOD_UNDO), errmsg(UNDOFORMAT( - "unlink undo log, total blocks=%u, zid=%d, dbid=%u, head=%lu."), - g_instance.undo_cxt.undoTotalSize, zid, dbId, offset))); + "unlink undo log, total blocks=%u, zid=%d, dbid=%u, head=%lu, old_head:%lu."), + g_instance.undo_cxt.undoTotalSize, zid, dbId, offset, old_head))); + return; +} + +/* + * Unlink undo segment files which are residual in extreme RTO standby read, + * unlink from start to end(not include). + */ +void UndoSpace::unlink_residual_log(int zid, UndoLogOffset start, UndoLogOffset end, uint32 db_id) const +{ + RelFileNode rnode; + UNDO_PTR_ASSIGN_REL_FILE_NODE(rnode, MAKE_UNDO_PTR(zid, start), db_id); + SMgrRelation reln = smgropen(rnode, InvalidBackendId); + uint64 seg_size = USEG_SIZE(db_id); + + while (start/seg_size < end/seg_size) { + /* delete a new undo segment. */ + BlockNumber block = (BlockNumber)(start / BLCKSZ); + smgrdounlink(reln, t_thrd.xlog_cxt.InRecovery, block); + ereport(DEBUG1, (errmodule(MOD_UNDO), errmsg(UNDOFORMAT( + "unlink_residual_log, zid=%d, dbid=%u, start=%lu, end=%lu, segId:%lu, endSegId:%lu."), + zid, db_id, start, end, start/seg_size, end/seg_size))); + start += seg_size; + } + smgrclose(reln); return; } @@ -383,6 +441,7 @@ void UndoSpace::RecoveryUndoSpace(int fd, UndoSpaceType type) usp->MarkClean(); usp->SetLSN(uspMetaInfo->lsn); usp->SetHead(uspMetaInfo->head); + usp->set_head_exrto(uspMetaInfo->head); usp->SetTail(uspMetaInfo->tail); if (type == UNDO_LOG_SPACE) { usp->CreateNonExistsUndoFile(zoneId, UNDO_DB_OID); @@ -390,6 +449,9 @@ void UndoSpace::RecoveryUndoSpace(int fd, UndoSpaceType type) usp->CreateNonExistsUndoFile(zoneId, UNDO_SLOT_DB_OID); } pg_atomic_fetch_add_u32(&g_instance.undo_cxt.undoTotalSize, usp->Used()); + ereport(DEBUG1, (errmsg(UNDOFORMAT("recovery_space_meta, zone_id:%u, type:%u, " + "lsn:%lu, head:%lu, tail:%lu."), + zoneId, type, uspMetaInfo->lsn, uspMetaInfo->head, uspMetaInfo->tail))); } pfree(persistBlock); } diff --git a/src/gausskernel/storage/access/ustore/undo/knl_uundozone.cpp b/src/gausskernel/storage/access/ustore/undo/knl_uundozone.cpp index 288f12ad5510aa3fce127a8890864d7da0643dc8..7ac14bfd4c3dcf0c702decbb259ef0a0acc9dde8 100644 --- a/src/gausskernel/storage/access/ustore/undo/knl_uundozone.cpp +++ b/src/gausskernel/storage/access/ustore/undo/knl_uundozone.cpp @@ -21,6 +21,7 @@ #include "access/ustore/undo/knl_uundotxn.h" #include "access/ustore/undo/knl_uundospace.h" #include "access/ustore/knl_whitebox_test.h" +#include "access/multi_redo_api.h" #include "knl/knl_thread.h" #include "miscadmin.h" #include "storage/smgr/fd.h" @@ -43,11 +44,15 @@ UndoZone::UndoZone() SetLSN(0); SetInsertURecPtr(UNDO_LOG_BLOCK_HEADER_SIZE); SetDiscardURecPtr(UNDO_LOG_BLOCK_HEADER_SIZE); + set_discard_urec_ptr_exrto(UNDO_LOG_BLOCK_HEADER_SIZE); SetForceDiscardURecPtr(UNDO_LOG_BLOCK_HEADER_SIZE); + set_force_discard_urec_ptr_exrto(UNDO_LOG_BLOCK_HEADER_SIZE); SetAllocateTSlotPtr(UNDO_LOG_BLOCK_HEADER_SIZE); SetRecycleTSlotPtr(UNDO_LOG_BLOCK_HEADER_SIZE); + set_recycle_tslot_ptr_exrto(UNDO_LOG_BLOCK_HEADER_SIZE); SetFrozenSlotPtr(INVALID_UNDO_SLOT_PTR); SetRecycleXid(InvalidTransactionId); + set_recycle_xid_exrto(InvalidTransactionId); SetFrozenXid(InvalidTransactionId); InitSlotBuffer(); SetAttachPid(0); @@ -56,12 +61,14 @@ UndoZone::UndoZone() undoSpace_.LockInit(); undoSpace_.SetLSN(0); undoSpace_.SetHead(0); + undoSpace_.set_head_exrto(0); undoSpace_.SetTail(0); slotSpace_.MarkClean(); slotSpace_.LockInit(); slotSpace_.SetLSN(0); slotSpace_.SetHead(0); + slotSpace_.set_head_exrto(0); slotSpace_.SetTail(0); } @@ -80,10 +87,16 @@ bool UndoZone::CheckRecycle(UndoRecPtr starturp, UndoRecPtr endurp) int endZid = UNDO_PTR_GET_ZONE_ID(endurp); UndoLogOffset start = UNDO_PTR_GET_OFFSET(starturp); UndoLogOffset end = UNDO_PTR_GET_OFFSET(endurp); - Assert(start == forceDiscardURecPtr_); + UndoLogOffset force_discard_urec_ptr; + if (IS_EXRTO_STANDBY_READ) { + force_discard_urec_ptr = force_discard_urec_ptr_exrto; + } else { + force_discard_urec_ptr = forceDiscardURecPtr_; + } + Assert(start == force_discard_urec_ptr); WHITEBOX_TEST_STUB(UNDO_CHECK_RECYCLE_FAILED, WhiteboxDefaultErrorEmit); - if ((startZid == endZid) && (forceDiscardURecPtr_ <= insertURecPtr_) && (end <= insertURecPtr_) + if ((startZid == endZid) && (force_discard_urec_ptr <= insertURecPtr_) && (end <= insertURecPtr_) && (start < end)) { return true; } @@ -130,6 +143,48 @@ UndoRecordState UndoZone::CheckUndoRecordValid(UndoLogOffset offset, bool checkF return UNDO_RECORD_DISCARD; } +/* + * Check whether the undo record is discarded or not. If it's already discarded + * return false otherwise return true. Caller must hold the space discardLock_. + */ +UndoRecordState UndoZone::check_record_valid_exrto(UndoLogOffset offset, bool check_force_recycle, + TransactionId *last_xid) const +{ + Assert((offset < UNDO_LOG_MAX_SIZE) && (offset >= UNDO_LOG_BLOCK_HEADER_SIZE)); + Assert(force_discard_urec_ptr_exrto <= insertURecPtr_); + + if (offset >= this->insertURecPtr_) { + ereport(DEBUG1, (errmsg(UNDOFORMAT("The undo record not insert yet: zid=%d, " + "insert=%lu, offset=%lu."), + this->zid_, this->insertURecPtr_, offset))); + return UNDO_RECORD_NOT_INSERT; + } + if (offset >= this->force_discard_urec_ptr_exrto) { + return UNDO_RECORD_NORMAL; + } + if (last_xid != NULL) { + *last_xid = recycle_xid_exrto; + } + if (offset >= this->discard_urec_ptr_exrto && check_force_recycle) { + TransactionId recycle_xmin; + TransactionId oldest_xmin = GetOldestXminForUndo(&recycle_xmin); + if (TransactionIdPrecedes(recycle_xid_exrto, recycle_xmin)) { + ereport(DEBUG1, (errmsg( + UNDOFORMAT("oldestxmin %lu, recycle_xmin %lu > recyclexid_exrto %lu: zid=%d," + "force_discard_urec_ptr_exrto=%lu, discard_urec_ptr_exrto=%lu, offset=%lu."), + oldest_xmin, recycle_xmin, recycle_xid_exrto, this->zid_, this->force_discard_urec_ptr_exrto, + this->discard_urec_ptr_exrto, offset))); + return UNDO_RECORD_DISCARD; + } + ereport(DEBUG1, (errmsg(UNDOFORMAT("The record has been force recycled: zid=%d, " + "force_discard_urec_ptr_exrto=%lu, " + "discard_urec_ptr_exrto=%lu, offset=%lu."), + this->zid_, this->force_discard_urec_ptr_exrto, this->discard_urec_ptr_exrto, offset))); + return UNDO_RECORD_FORCE_DISCARD; + } + return UNDO_RECORD_DISCARD; +} + /* * Drop all buffers for the given undo log, from the start to end. */ @@ -220,7 +275,14 @@ UndoSlotPtr UndoZone::AllocateSlotSpace(void) void UndoZone::ReleaseSpace(UndoRecPtr starturp, UndoRecPtr endurp, int *forceRecycleSize) { UndoLogOffset end = UNDO_PTR_GET_OFFSET(endurp); - int startSegno = (int)(undoSpace_.Head() / UNDO_LOG_SEGMENT_SIZE); + int startSegno; + UndoLogOffset head; + if (IS_EXRTO_STANDBY_READ) { + head = undoSpace_.Head_exrto(); + } else { + head = undoSpace_.Head(); + } + startSegno = (int)(head / UNDO_LOG_SEGMENT_SIZE); int endSegno = (int)(end / UNDO_LOG_SEGMENT_SIZE); if (unlikely(startSegno < endSegno)) { @@ -229,10 +291,10 @@ void UndoZone::ReleaseSpace(UndoRecPtr starturp, UndoRecPtr endurp, int *forceRe } ForgetUndoBuffer(startSegno * UNDO_LOG_SEGMENT_SIZE, endSegno * UNDO_LOG_SEGMENT_SIZE, UNDO_DB_OID); undoSpace_.LockSpace(); - UndoRecPtr prevHead = MAKE_UNDO_PTR(zid_, undoSpace_.Head()); + UndoRecPtr prevHead = MAKE_UNDO_PTR(zid_, head); undoSpace_.UnlinkUndoLog(zid_, endSegno * UNDO_LOG_SEGMENT_SIZE, UNDO_DB_OID); Assert(undoSpace_.Head() <= insertURecPtr_); - if (pLevel_ == UNDO_PERMANENT) { + if (pLevel_ == UNDO_PERMANENT && (!IS_EXRTO_STANDBY_READ)) { START_CRIT_SECTION(); undoSpace_.MarkDirty(); XlogUndoUnlink undoUnlink; @@ -247,11 +309,35 @@ void UndoZone::ReleaseSpace(UndoRecPtr starturp, UndoRecPtr endurp, int *forceRe return; } +/* Release undo space from starturp to endurp and advance discard. */ +uint64 UndoZone::release_residual_record_space() +{ + undoSpace_.LockSpace(); + UndoLogOffset unlink_start = undoSpace_.find_oldest_offset(zid_, UNDO_DB_OID); + UndoLogOffset unlink_end = undoSpace_.Head(); + undoSpace_.unlink_residual_log(zid_, unlink_start, unlink_end, UNDO_DB_OID); + undoSpace_.UnlockSpace(); + if (unlink_start > unlink_end) { + ereport(WARNING, (errmsg(UNDOFORMAT("release_residual_record_space start:%lu " + "is bigger than end:%lu."), + unlink_start, unlink_end))); + return 0; + } else { + return (unlink_end / UNDO_LOG_SEGMENT_SIZE) - (unlink_start / UNDO_LOG_SEGMENT_SIZE); + } +} + /* Release slot space from starturp to endurp and advance discard. */ void UndoZone::ReleaseSlotSpace(UndoRecPtr startSlotPtr, UndoRecPtr endSlotPtr, int *forceRecycleSize) { UndoLogOffset end = UNDO_PTR_GET_OFFSET(endSlotPtr); - int startSegno = (int)(slotSpace_.Head() / UNDO_META_SEGMENT_SIZE); + UndoLogOffset head; + if (IS_EXRTO_STANDBY_READ) { + head = slotSpace_.Head_exrto(); + } else { + head = slotSpace_.Head(); + } + int startSegno = (int)(head / UNDO_META_SEGMENT_SIZE); int endSegno = (int)(end / UNDO_META_SEGMENT_SIZE); if (unlikely(startSegno < endSegno)) { @@ -260,10 +346,10 @@ void UndoZone::ReleaseSlotSpace(UndoRecPtr startSlotPtr, UndoRecPtr endSlotPtr, } ForgetUndoBuffer(startSegno * UNDO_META_SEGMENT_SIZE, endSegno * UNDO_META_SEGMENT_SIZE, UNDO_SLOT_DB_OID); slotSpace_.LockSpace(); - UndoRecPtr prevHead = MAKE_UNDO_PTR(zid_, slotSpace_.Head()); + UndoRecPtr prevHead = MAKE_UNDO_PTR(zid_, head); slotSpace_.UnlinkUndoLog(zid_, endSegno * UNDO_META_SEGMENT_SIZE, UNDO_SLOT_DB_OID); Assert(slotSpace_.Head() <= allocateTSlotPtr_); - if (pLevel_ == UNDO_PERMANENT) { + if (pLevel_ == UNDO_PERMANENT && !(IS_EXRTO_STANDBY_READ)) { START_CRIT_SECTION(); slotSpace_.MarkDirty(); XlogUndoUnlink undoUnlink; @@ -278,6 +364,24 @@ void UndoZone::ReleaseSlotSpace(UndoRecPtr startSlotPtr, UndoRecPtr endSlotPtr, return; } +/* Release slot space from starturp to endurp and advance discard. */ +uint64 UndoZone::release_residual_slot_space() +{ + slotSpace_.LockSpace(); + UndoLogOffset unlink_start = slotSpace_.find_oldest_offset(zid_, UNDO_SLOT_DB_OID); + UndoLogOffset unlink_end = slotSpace_.Head(); + slotSpace_.unlink_residual_log(zid_, unlink_start, unlink_end, UNDO_SLOT_DB_OID); + slotSpace_.UnlockSpace(); + if (unlink_start > unlink_end) { + ereport(WARNING, (errmsg(UNDOFORMAT("release_residual_slot_space start:%lu is bigger " + "than end:%lu."), + unlink_start, unlink_end))); + return 0; + } else { + return (unlink_end / UNDO_META_SEGMENT_SIZE) - (unlink_start / UNDO_META_SEGMENT_SIZE); + } +} + void UndoZone::PrepareSwitch(void) { WHITEBOX_TEST_STUB(UNDO_PREPARE_SWITCH_FAILED, WhiteboxDefaultErrorEmit); @@ -513,10 +617,19 @@ static void RecoveryZone(UndoZone *uzone, uzone->SetLSN(uspMetaInfo->lsn); uzone->SetInsertURecPtr(uspMetaInfo->insertURecPtr); uzone->SetDiscardURecPtr(uspMetaInfo->discardURecPtr); + uzone->set_discard_urec_ptr_exrto(uspMetaInfo->discardURecPtr); uzone->SetForceDiscardURecPtr(uspMetaInfo->forceDiscardURecPtr); + uzone->set_force_discard_urec_ptr_exrto(uspMetaInfo->forceDiscardURecPtr); uzone->SetAllocateTSlotPtr(uspMetaInfo->allocateTSlotPtr); uzone->SetRecycleTSlotPtr(uspMetaInfo->recycleTSlotPtr); + uzone->set_recycle_tslot_ptr_exrto(uspMetaInfo->recycleTSlotPtr); uzone->SetRecycleXid(uspMetaInfo->recycleXid); + uzone->set_recycle_xid_exrto(uspMetaInfo->recycleXid); + ereport(DEBUG1, (errmodule(MOD_UNDO), errmsg(UNDOFORMAT("recovery_zone id:%d, lsn:%lu, " + "insert_urec_ptr:%lu, discard_urec_ptr:%lu, force_discard_urec_ptr:%lu, allocate_tslot_ptr:%lu, " + "recycle_tslot_ptr:%lu, recycle_xid:%lu."), zoneId, uspMetaInfo->lsn, uspMetaInfo->insertURecPtr, + uspMetaInfo->discardURecPtr, uspMetaInfo->forceDiscardURecPtr, uspMetaInfo->allocateTSlotPtr, + uspMetaInfo->recycleTSlotPtr, uspMetaInfo->recycleXid))); } /* Initialize parameters in the undo zone. */ @@ -528,11 +641,15 @@ void InitZone(UndoZone *uzone, const int zoneId, UndoPersistence upersistence) uzone->SetLSN(0); uzone->SetInsertURecPtr(UNDO_LOG_BLOCK_HEADER_SIZE); uzone->SetDiscardURecPtr(UNDO_LOG_BLOCK_HEADER_SIZE); + uzone->set_discard_urec_ptr_exrto(UNDO_LOG_BLOCK_HEADER_SIZE); uzone->SetForceDiscardURecPtr(UNDO_LOG_BLOCK_HEADER_SIZE); + uzone->set_force_discard_urec_ptr_exrto(UNDO_LOG_BLOCK_HEADER_SIZE); uzone->SetAllocateTSlotPtr(UNDO_LOG_BLOCK_HEADER_SIZE); uzone->SetRecycleTSlotPtr(UNDO_LOG_BLOCK_HEADER_SIZE); + uzone->set_recycle_tslot_ptr_exrto(UNDO_LOG_BLOCK_HEADER_SIZE); uzone->SetFrozenSlotPtr(INVALID_UNDO_SLOT_PTR); uzone->SetRecycleXid(InvalidTransactionId); + uzone->set_recycle_xid_exrto(InvalidTransactionId); uzone->SetFrozenXid(InvalidTransactionId); uzone->SetAttachPid(0); } @@ -544,6 +661,7 @@ void InitUndoSpace(UndoZone *uzone, UndoSpaceType type) usp->MarkClean(); usp->SetLSN(0); usp->SetHead(0); + usp->set_head_exrto(0); usp->SetTail(0); } diff --git a/src/gausskernel/storage/buffer/bufmgr.cpp b/src/gausskernel/storage/buffer/bufmgr.cpp index 311a80eb8980c9c5c70f56e36959ed33c854943d..63cf83f6f80b7cea7442d6ca98abd20958f590f4 100644 --- a/src/gausskernel/storage/buffer/bufmgr.cpp +++ b/src/gausskernel/storage/buffer/bufmgr.cpp @@ -129,8 +129,7 @@ static int ts_ckpt_progress_comparator(Datum a, Datum b, void *arg); static bool ReadBuffer_common_ReadBlock(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, bool isExtend, Block bufBlock, const XLogPhyBlock *pblk, bool *need_repair); -static Buffer ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, - ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit, const XLogPhyBlock *pblk); + static void TerminateBufferIO_common(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits); /* @@ -351,8 +350,6 @@ void ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref) static void BufferSync(int flags); static void TerminateBufferIO_common(BufferDesc* buf, bool clear_dirty, uint32 set_flag_bits); void shared_buffer_write_error_callback(void* arg); -static BufferDesc* BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, - BufferAccessStrategy strategy, bool* foundPtr, const XLogPhyBlock *pblk); static int rnode_comparator(const void* p1, const void* p2); @@ -1676,6 +1673,10 @@ Buffer ReadBuffer(Relation reln, BlockNumber block_num) Buffer ReadBufferExtended(Relation reln, ForkNumber fork_num, BlockNumber block_num, ReadBufferMode mode, BufferAccessStrategy strategy) { + if (IsExtremeRtoRunning() && !AmPageRedoWorker()) { + return standby_read_buf(reln, fork_num, block_num, mode, strategy); + } + bool hit = false; Buffer buf; @@ -2227,7 +2228,7 @@ static inline void BufferDescSetPBLK(BufferDesc *buf, const XLogPhyBlock *pblk) * * *hit is set to true if the request was satisfied from shared buffer cache. */ -static Buffer ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, +Buffer ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit, const XLogPhyBlock *pblk) { BufferDesc *bufHdr = NULL; @@ -2288,7 +2289,7 @@ static Buffer ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumb * lookup the buffer. IO_IN_PROGRESS is set if the requested block is * not currently in memory. */ - bufHdr = BufferAlloc(smgr, relpersistence, forkNum, blockNum, strategy, &found, pblk); + bufHdr = BufferAlloc(smgr->smgr_rnode.node, relpersistence, forkNum, blockNum, strategy, &found, pblk); if (g_instance.attr.attr_security.enable_tde && IS_PGXC_DATANODE) { bufHdr->extra->encrypt = smgr->encrypt ? true : false; /* set tde flag */ } @@ -2670,14 +2671,15 @@ void PageCheckWhenChosedElimination(const BufferDesc *buf, uint32 oldFlags) * * No locks are held either at entry or exit. */ -static BufferDesc *BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber fork_num, BlockNumber block_num, - BufferAccessStrategy strategy, bool *found, const XLogPhyBlock *pblk) +BufferDesc *BufferAlloc(const RelFileNode &rel_file_node, char relpersistence, ForkNumber fork_num, + BlockNumber block_num, BufferAccessStrategy strategy, bool *found, + const XLogPhyBlock *pblk) { if (g_instance.attr.attr_storage.nvm_attr.enable_nvm) { - return NvmBufferAlloc(smgr, relpersistence, fork_num, block_num, strategy, found, pblk); + return NvmBufferAlloc(rel_file_node, relpersistence, fork_num, block_num, strategy, found, pblk); } - Assert(!IsSegmentPhysicalRelNode(smgr->smgr_rnode.node)); + Assert(!IsSegmentPhysicalRelNode(rel_file_node)); BufferTag new_tag; /* identity of requested block */ uint32 new_hash; /* hash value for newTag */ @@ -2692,7 +2694,7 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumbe uint32 buf_state; /* create a tag so we can lookup the buffer */ - INIT_BUFFERTAG(new_tag, smgr->smgr_rnode.node, fork_num, block_num); + INIT_BUFFERTAG(new_tag, rel_file_node, fork_num, block_num); /* determine its hash code and partition lock ID */ new_hash = BufTableHashCode(&new_tag); @@ -2844,8 +2846,8 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumbe } /* OK, do the I/O */ - TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_START(fork_num, block_num, smgr->smgr_rnode.node.spcNode, - smgr->smgr_rnode.node.dbNode, smgr->smgr_rnode.node.relNode); + TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_START(fork_num, block_num, rel_file_node.spcNode, + rel_file_node.dbNode, rel_file_node.relNode); /* during initdb, not need flush dw file */ if (dw_enabled() && pg_atomic_read_u32(&g_instance.ckpt_cxt_ctl->current_page_writer_count) > 0) { @@ -2868,8 +2870,8 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumbe ScheduleBufferTagForWriteback(t_thrd.storage_cxt.BackendWritebackContext, &buf->tag); - TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_DONE(fork_num, block_num, smgr->smgr_rnode.node.spcNode, - smgr->smgr_rnode.node.dbNode, smgr->smgr_rnode.node.relNode); + TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_DONE(fork_num, block_num, rel_file_node.spcNode, + rel_file_node.dbNode, rel_file_node.relNode); } else { /* * Someone else has locked the buffer, so give it up and loop @@ -5422,6 +5424,31 @@ void DropDatabaseBuffers(Oid dbid) gstrace_exit(GS_TRC_ID_DropDatabaseBuffers); } +void buffer_drop_exrto_standby_read_buffers() +{ + int i = 0; + ereport(LOG, (errmsg("buffer_drop_exrto_standby_read_buffers: start to drop buffers."))); + while (i < TOTAL_BUFFER_NUM) { + BufferDesc *buf_desc = GetBufferDescriptor(i); + uint32 buf_state; + /* + * Some safe unlocked checks can be done to reduce the number of cycle. + */ + if (!IS_EXRTO_RELFILENODE(buf_desc->tag.rnode)) { + i++; + continue; + } + + buf_state = LockBufHdr(buf_desc); + if (IS_EXRTO_RELFILENODE(buf_desc->tag.rnode)) { + InvalidateBuffer(buf_desc); /* with buffer head lock released */ + } else { + UnlockBufHdr(buf_desc, buf_state); + } + i++; + } +} + /* ----------------------------------------------------------------- * PrintBufferDescs * @@ -5690,6 +5717,12 @@ void MarkBufferDirtyHint(Buffer buffer, bool buffer_std) buf_desc = GetBufferDescriptor(buffer - 1); Assert(GetPrivateRefCount(buffer) > 0); + + // temp buf just for old page version, could not write to disk + if (pg_atomic_read_u32(&buf_desc->state) & BM_IS_TMP_BUF) { + return; + } + /* here, either share or exclusive lock is OK */ if (!LWLockHeldByMe(buf_desc->content_lock)) ereport(PANIC, (errcode(ERRCODE_INVALID_BUFFER), @@ -5723,8 +5756,9 @@ void MarkBufferDirtyHint(Buffer buffer, bool buffer_std) * The incremental checkpoint is protected by the doublewriter, the * half-write problem does not occur. */ - if (!ENABLE_INCRE_CKPT && XLogHintBitIsNeeded() && - (pg_atomic_read_u32(&buf_desc->state) & BM_PERMANENT)) { + bool need_write_wal = + (!ENABLE_INCRE_CKPT && XLogHintBitIsNeeded() && (pg_atomic_read_u32(&buf_desc->state) & BM_PERMANENT)); + if (need_write_wal) { /* * If we're in recovery we cannot dirty a page because of a hint. * We can set the hint, just not dirty the page as a result so the @@ -7352,3 +7386,18 @@ bool IsPageHitBufferPool(RelFileNode& node, ForkNumber forkNum, BlockNumber bloc } return false; } + +void buffer_in_progress_pop() +{ + Assert(t_thrd.storage_cxt.ParentInProgressBuf == NULL); + t_thrd.storage_cxt.ParentInProgressBuf = t_thrd.storage_cxt.InProgressBuf; + t_thrd.storage_cxt.ParentIsForInput = t_thrd.storage_cxt.IsForInput; + t_thrd.storage_cxt.InProgressBuf = NULL; +} + +void buffer_in_progress_push() +{ + t_thrd.storage_cxt.InProgressBuf = t_thrd.storage_cxt.ParentInProgressBuf; + t_thrd.storage_cxt.IsForInput = t_thrd.storage_cxt.ParentIsForInput; + t_thrd.storage_cxt.ParentInProgressBuf = NULL; +} diff --git a/src/gausskernel/storage/ipc/procarray.cpp b/src/gausskernel/storage/ipc/procarray.cpp index a2bec4d0e7a7da5aea991dc0bd7bcbdbad410274..b23a6b590a5ea7f9440c5a2f67e91771c604e6b5 100755 --- a/src/gausskernel/storage/ipc/procarray.cpp +++ b/src/gausskernel/storage/ipc/procarray.cpp @@ -76,6 +76,7 @@ #include "access/clog.h" #include "access/csnlog.h" +#include "access/extreme_rto/page_redo.h" #include "access/subtrans.h" #include "access/transam.h" #include "access/twophase.h" @@ -541,6 +542,8 @@ void ProcArrayEndTransaction(PGPROC* proc, TransactionId latestXid, bool isCommi pgxact->xmin = InvalidTransactionId; proc->snapXmax = InvalidTransactionId; proc->snapCSN = InvalidCommitSeqNo; + proc->exrto_read_lsn = 0; + proc->exrto_gen_snap_time = 0; pgxact->csn_min = InvalidCommitSeqNo; pgxact->csn_dr = InvalidCommitSeqNo; /* must be cleared with xid/xmin: */ @@ -585,6 +588,8 @@ static inline void ProcArrayEndTransactionInternal(PGPROC* proc, PGXACT* pgxact, pgxact->xmin = InvalidTransactionId; proc->snapXmax = InvalidTransactionId; proc->snapCSN = InvalidCommitSeqNo; + proc->exrto_read_lsn = 0; + proc->exrto_gen_snap_time = 0; pgxact->csn_min = InvalidCommitSeqNo; pgxact->csn_dr = InvalidCommitSeqNo; /* must be cleared with xid/xmin: */ @@ -827,6 +832,8 @@ void ProcArrayClearTransaction(PGPROC* proc) /* Clear the subtransaction-XID cache too */ pgxact->nxids = 0; + proc->exrto_read_lsn = 0; + proc->exrto_gen_snap_time = 0; /* Free xid cache memory if needed */ ResetProcXidCache(proc, true); } @@ -2107,7 +2114,7 @@ RETRY: /* reset xmin before acquiring lwlock, in case blocking redo */ t_thrd.pgxact->xmin = InvalidTransactionId; RETRY_GET: - if (snapshot->takenDuringRecovery && !StreamThreadAmI() && + if (snapshot->takenDuringRecovery && !StreamThreadAmI() && !IS_EXRTO_READ && !u_sess->proc_cxt.clientIsCMAgent) { if (InterruptPending) { (void)pgstat_report_waitstatus(oldStatus); @@ -2429,6 +2436,10 @@ GROUP_GET_SNAPSHOT: (void)pgstat_report_waitstatus(oldStatus); } + if (IsExtremeRtoRunning() && pmState == PM_HOT_STANDBY) { + extreme_rto::exrto_read_snapshot(snapshot); + } + return snapshot; } @@ -3200,6 +3211,59 @@ ThreadId CancelVirtualTransaction(const VirtualTransactionId& vxid, ProcSignalRe return pid; } +bool proc_array_cancel_conflicting_proc(TransactionId latest_removed_xid, bool reach_max_check_times) +{ + ProcArrayStruct* proc_array = g_instance.proc_array_idx; + bool conflict = false; + + LWLockAcquire(ProcArrayLock, LW_SHARED); + for (int index = 0; index < proc_array->numProcs; index++) { + int pg_proc_no = proc_array->pgprocnos[index]; + PGPROC* pg_proc = g_instance.proc_base_all_procs[pg_proc_no]; + PGXACT* pg_xact = &g_instance.proc_base_all_xacts[pg_proc_no]; + XLogRecPtr read_lsn = pg_proc->exrto_read_lsn; + TransactionId pxmin = pg_xact->xmin; + + if (pg_proc->pid == 0 || !TransactionIdIsValid(pxmin) || XLogRecPtrIsInvalid(read_lsn)) { + continue; + } + + Assert(!(pg_xact->vacuumFlags & PROC_IN_VACUUM)); + /* + * Backend is doing logical decoding which manages xmin + * separately, check below. + */ + if (pg_xact->vacuumFlags & PROC_IN_LOGICAL_DECODING) { + continue; + } + + /* cancel query when its xmin < latest_removed_xid */ + if (TransactionIdPrecedesOrEquals(pxmin, latest_removed_xid)) { + conflict = true; + pg_proc->recoveryConflictPending = true; + if (pg_proc->pid != 0) { + /* + * Kill the pid if it's still here. If not, that's what we + * wanted so ignore any errors. + */ + (void)SendProcSignal(pg_proc->pid, PROCSIG_RECOVERY_CONFLICT_SNAPSHOT, pg_proc->backendId); + /* + * Wait a little bit for it to die so that we avoid flooding + * an unresponsive backend when system is heavily loaded. + */ + pg_usleep(5000L); + } + } + if (reach_max_check_times) { + ereport(WARNING, ( + errmsg("can not cancel thread while redo truncate, thread id = %lu", pg_proc->pid))); + } + } + LWLockRelease(ProcArrayLock); + + return conflict; +} + /* * MinimumActiveBackends --- count backends (other than myself) that are * in active transactions. Return true if the count exceeds the diff --git a/src/gausskernel/storage/lmgr/lwlocknames.txt b/src/gausskernel/storage/lmgr/lwlocknames.txt index dffbc1a6ef32e12e708cfd1d3b73bd0692c081e1..14a5191734a57a024f8e864f34af4234a5a68bdf 100755 --- a/src/gausskernel/storage/lmgr/lwlocknames.txt +++ b/src/gausskernel/storage/lmgr/lwlocknames.txt @@ -140,3 +140,4 @@ DropArchiveSlotLock 130 AboCacheLock 131 OndemandXLogMemAllocLock 132 OndemandXLogFileHandleLock 133 +ExrtoSnapshotLock 134 diff --git a/src/gausskernel/storage/lmgr/proc.cpp b/src/gausskernel/storage/lmgr/proc.cpp index 8830f5863701387ac2359c261da00d96caaac709..29ee4bf0b2a7fa2db586ece38c45e6556ef53935 100755 --- a/src/gausskernel/storage/lmgr/proc.cpp +++ b/src/gausskernel/storage/lmgr/proc.cpp @@ -950,6 +950,8 @@ void InitProcess(void) t_thrd.proc->snap_refcnt_bitmap = 0; #endif + t_thrd.proc->exrto_read_lsn = 0; + t_thrd.proc->exrto_gen_snap_time = 0; /* Check that group locking fields are in a proper initial state. */ Assert(t_thrd.proc->lockGroupLeader == NULL); Assert(dlist_is_empty(&t_thrd.proc->lockGroupMembers)); @@ -1109,6 +1111,8 @@ void InitAuxiliaryProcess(void) t_thrd.pgxact->xmin = InvalidTransactionId; t_thrd.proc->snapXmax = InvalidTransactionId; t_thrd.proc->snapCSN = InvalidCommitSeqNo; + t_thrd.proc->exrto_read_lsn = 0; + t_thrd.proc->exrto_gen_snap_time = 0; t_thrd.pgxact->csn_min = InvalidCommitSeqNo; t_thrd.pgxact->csn_dr = InvalidCommitSeqNo; t_thrd.proc->backendId = InvalidBackendId; diff --git a/src/gausskernel/storage/nvm/nvmbuffer.cpp b/src/gausskernel/storage/nvm/nvmbuffer.cpp index 5ade48cab920ea88cb199cbb2cac30de96ec157f..0b3d5918cd0408554ab951bc4f0bcd4f109fb9e4 100644 --- a/src/gausskernel/storage/nvm/nvmbuffer.cpp +++ b/src/gausskernel/storage/nvm/nvmbuffer.cpp @@ -255,10 +255,10 @@ restart: return; } -BufferDesc *NvmBufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber fork_num, +BufferDesc *NvmBufferAlloc(const RelFileNode& rel_file_node, char relpersistence, ForkNumber fork_num, BlockNumber block_num, BufferAccessStrategy strategy, bool *found, const XLogPhyBlock *pblk) { - Assert(!IsSegmentPhysicalRelNode(smgr->smgr_rnode.node)); + Assert(!IsSegmentPhysicalRelNode(rel_file_node)); BufferTag new_tag; /* identity of requested block */ uint32 new_hash; /* hash value for newTag */ @@ -276,7 +276,7 @@ BufferDesc *NvmBufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber fo errno_t rc; /* create a tag so we can lookup the buffer */ - INIT_BUFFERTAG(new_tag, smgr->smgr_rnode.node, fork_num, block_num); + INIT_BUFFERTAG(new_tag, rel_file_node, fork_num, block_num); /* determine its hash code and partition lock ID */ new_hash = BufTableHashCode(&new_tag); diff --git a/src/gausskernel/storage/page/bufpage.cpp b/src/gausskernel/storage/page/bufpage.cpp index 153ab6beecb69e83f066c2448e8a15f902d2a5ab..4b1a456c23d9e961ad3de1e4d9a1aba99c4515de 100644 --- a/src/gausskernel/storage/page/bufpage.cpp +++ b/src/gausskernel/storage/page/bufpage.cpp @@ -60,6 +60,7 @@ bool PageIsVerified(Page page, BlockNumber blkno) bool header_sane = false; bool all_zeroes = false; uint16 checksum = 0; + bool is_exrto_page = bool(p->pd_flags & PD_EXRTO_PAGE); /* * Don't verify page data unless the page passes basic non-zero test @@ -76,8 +77,8 @@ bool PageIsVerified(Page page, BlockNumber blkno) * the block can still reveal problems, which is why we offer the * checksum option. */ - if ((p->pd_flags & ~PD_VALID_FLAG_BITS) == 0 && p->pd_lower <= p->pd_upper && p->pd_upper <= p->pd_special && - p->pd_special <= BLCKSZ && p->pd_special == MAXALIGN(p->pd_special)) { + if (is_exrto_page || ((p->pd_flags & ~PD_VALID_FLAG_BITS) == 0 && p->pd_lower <= p->pd_upper && + p->pd_upper <= p->pd_special && p->pd_special <= BLCKSZ && p->pd_special == MAXALIGN(p->pd_special))) { header_sane = true; } diff --git a/src/gausskernel/storage/replication/basebackup.cpp b/src/gausskernel/storage/replication/basebackup.cpp index 9542a425e565923ed1a3dcb2d91a6ed14c351efb..e8523cfc6f5a12d2ea66a713ce1a4034f244b0c7 100755 --- a/src/gausskernel/storage/replication/basebackup.cpp +++ b/src/gausskernel/storage/replication/basebackup.cpp @@ -19,6 +19,7 @@ #include "access/xlog_internal.h" /* for pg_start/stop_backup */ #include "access/cbmparsexlog.h" +#include "access/extreme_rto/standby_read/standby_read_base.h" #include "catalog/catalog.h" #include "catalog/pg_type.h" #include "gs_thread.h" @@ -1216,6 +1217,9 @@ bool IsSkipDir(const char * dirName) /* Skip temporary files */ if (strncmp(dirName, PG_TEMP_FILE_PREFIX, strlen(PG_TEMP_FILE_PREFIX)) == 0) return true; + if (strncmp(dirName, EXRTO_FILE_DIR, strlen(EXRTO_FILE_DIR)) == 0) { + return true; + } /* * If there's a backup_label file, it belongs to a backup started by diff --git a/src/gausskernel/storage/replication/slot.cpp b/src/gausskernel/storage/replication/slot.cpp index c26f39ceb6152af3a62d0d21ab6bacfe3c56e886..587929995039a488d3f3544bed8243945794086f 100644 --- a/src/gausskernel/storage/replication/slot.cpp +++ b/src/gausskernel/storage/replication/slot.cpp @@ -691,6 +691,8 @@ void ReplicationSlotRelease(void) LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); t_thrd.pgxact->xmin = InvalidTransactionId; t_thrd.pgxact->vacuumFlags &= ~PROC_IN_LOGICAL_DECODING; + t_thrd.proc->exrto_read_lsn = 0; + t_thrd.proc->exrto_gen_snap_time = 0; LWLockRelease(ProcArrayLock); } diff --git a/src/gausskernel/storage/replication/walreceiver.cpp b/src/gausskernel/storage/replication/walreceiver.cpp index 9a21024213b5de33ddfc1f0c259972a165fa4349..ba49b9ae1fc32bef0d201dc13f1f17ba05042af4 100755 --- a/src/gausskernel/storage/replication/walreceiver.cpp +++ b/src/gausskernel/storage/replication/walreceiver.cpp @@ -1699,6 +1699,8 @@ static void XLogWalRcvSendHSFeedback(void) else xmin = InvalidTransactionId; t_thrd.pgxact->xmin = InvalidTransactionId; + t_thrd.proc->exrto_read_lsn = 0; + t_thrd.proc->exrto_gen_snap_time = 0; /* * Always send feedback message. */ diff --git a/src/gausskernel/storage/replication/walsender.cpp b/src/gausskernel/storage/replication/walsender.cpp index 6d4a2ed3ea6b158a0a1a791f86b6d9e116d455d9..f15b692f663b78be65914519abc4c1f6c0bf8fe8 100755 --- a/src/gausskernel/storage/replication/walsender.cpp +++ b/src/gausskernel/storage/replication/walsender.cpp @@ -2945,6 +2945,8 @@ static void PhysicalReplicationSlotNewXmin(TransactionId feedbackXmin) SpinLockAcquire(&slot->mutex); t_thrd.pgxact->xmin = InvalidTransactionId; + t_thrd.proc->exrto_read_lsn = 0; + t_thrd.proc->exrto_gen_snap_time = 0; /* * For physical replication we don't need the the interlock provided * by xmin and effective_xmin since the consequences of a missed increase diff --git a/src/gausskernel/storage/smgr/Makefile b/src/gausskernel/storage/smgr/Makefile index 0e7ef320b1275bfa93db8f4a9ce5b72adb5557dd..b6bca4a44c19f6e12aec56e466dcfd0e6d5b3fd8 100644 --- a/src/gausskernel/storage/smgr/Makefile +++ b/src/gausskernel/storage/smgr/Makefile @@ -9,7 +9,7 @@ ifneq "$(MAKECMDGOALS)" "clean" endif endif endif -OBJS = md.o smgr.o smgrtype.o knl_uundofile.o segstore.o page_compression.o +OBJS = md.o smgr.o smgrtype.o knl_uundofile.o segstore.o page_compression.o storage_exrto_file.o SUBDIRS = segment cfs diff --git a/src/gausskernel/storage/smgr/smgr.cpp b/src/gausskernel/storage/smgr/smgr.cpp index f4b4c60b3c14e519ae25c35b4ff940b147b23a2d..b7ff7a4ffd48b6b4209ebab100acf36efa305f53 100755 --- a/src/gausskernel/storage/smgr/smgr.cpp +++ b/src/gausskernel/storage/smgr/smgr.cpp @@ -122,14 +122,37 @@ static const f_smgr smgrsw[] = { seg_async_write, seg_move_buckets }, + + /* extreme-rto standby read */ + { + exrto_init, + NULL, + exrto_close, + NULL, + exrto_exists, + exrto_unlink, + exrto_extend, + NULL, + exrto_read, + exrto_write, + exrto_writeback, + exrto_nblocks, + exrto_truncate, + NULL, + NULL, + NULL, + NULL + } }; static const int NSmgr = lengthof(smgrsw); static void push_unlink_rel_one_fork_to_hashtbl(RelFileNode node, ForkNumber forkNum); -static inline int ChooseSmgrManager(RelFileNode rnode) +static inline int ChooseSmgrManager(const RelFileNode& rnode) { - if (rnode.dbNode == UNDO_DB_OID || rnode.dbNode == UNDO_SLOT_DB_OID) { + if (IS_EXRTO_RELFILENODE(rnode)) { + return EXRTO_MANAGER; + } else if (rnode.dbNode == UNDO_DB_OID || rnode.dbNode == UNDO_SLOT_DB_OID) { return UNDO_MANAGER; } else if (IsSegmentFileNode(rnode)) { return SEGMENT_MANAGER; @@ -313,7 +336,7 @@ SMgrRelation smgropen(const RelFileNode& rnode, BackendId backend, int col /* = reln->smgr_bcm_nblocks[colnum] = InvalidBlockNumber; } - if (reln->smgr_which == UNDO_MANAGER) { + if (reln->smgr_which == UNDO_MANAGER || reln->smgr_which == EXRTO_MANAGER) { fdNeeded = 1; } @@ -411,8 +434,15 @@ void smgrclose(SMgrRelation reln, BlockNumber blockNum) ereport(DEBUG5, (errmsg("smgr close %p", reln))); SMgrRelation* owner = NULL; int forknum; + int max_forknum; + + if (reln->smgr_which == EXRTO_MANAGER && reln->smgr_rnode.node.spcNode == EXRTO_BLOCK_INFO_SPACE_OID) { + max_forknum = EXRTO_FORK_NUM; + } else { + max_forknum = reln->md_fdarray_size; + } - for (forknum = 0; forknum < (int)(reln->md_fdarray_size); forknum++) { + for (forknum = 0; forknum < max_forknum; forknum++) { (*(smgrsw[reln->smgr_which].smgr_close))(reln, (ForkNumber)forknum, blockNum); } owner = reln->smgr_owner; @@ -567,12 +597,19 @@ void smgrdounlink(SMgrRelation reln, bool isRedo, BlockNumber blockNum) RelFileNodeBackend rnode = reln->smgr_rnode; int which = reln->smgr_which; int forknum; + int max_forknum; HTAB *unlink_rel_hashtbl = g_instance.bgwriter_cxt.unlink_rel_hashtbl; DelFileTag *entry = NULL; bool found = false; + if (which == EXRTO_MANAGER && reln->smgr_rnode.node.spcNode == EXRTO_BLOCK_INFO_SPACE_OID) { + max_forknum = EXRTO_FORK_NUM; + } else { + max_forknum = reln->md_fdarray_size; + } + /* Close the forks at smgr level */ - for (forknum = 0; forknum < (int)(reln->md_fdarray_size); forknum++) { + for (forknum = 0; forknum < max_forknum; forknum++) { (*(smgrsw[which].smgr_close))(reln, (ForkNumber)forknum, blockNum); } if (which == UNDO_MANAGER) { diff --git a/src/gausskernel/storage/smgr/storage_exrto_file.cpp b/src/gausskernel/storage/smgr/storage_exrto_file.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a3234cee4fd2310deffa59b3fcdcc208964561e6 --- /dev/null +++ b/src/gausskernel/storage/smgr/storage_exrto_file.cpp @@ -0,0 +1,545 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * storage_exrto_file.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/smgr/storage_exrto_file.cpp + * + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "miscadmin.h" +#include "storage/smgr/fd.h" +#include "storage/vfd.h" +#include "storage/smgr/smgr.h" +#include "utils/memutils.h" +#include "access/extreme_rto/standby_read/standby_read_base.h" +#include "access/extreme_rto/standby_read/block_info_meta.h" + +const uint32 EXRTO_BASE_PAGE_FILE_BLOCKS = EXRTO_BASE_PAGE_FILE_MAXSIZE / BLCKSZ; +const uint32 EXRTO_LSN_INFO_FILE_BLOCKS = EXRTO_LSN_INFO_FILE_MAXSIZE / BLCKSZ; +const uint32 EXRTO_BLOCK_INFO_FILE_BLOCKS = RELSEG_SIZE; + +const int EXTEND_BLOCKS_NUM = 16; +const uint64 EXRTO_INVALID_BLOCK_NUMBER = 0xFFFFFFFFFFFFFFFFL; + +const uint32 EXRTO_FILE_SIZE[] = { + EXRTO_BASE_PAGE_FILE_MAXSIZE, EXRTO_LSN_INFO_FILE_MAXSIZE, EXRTO_BLOCK_INFO_FILE_MAXSIZE}; +const uint32 EXRTO_FILE_BLOCKS[] = { + EXRTO_BASE_PAGE_FILE_BLOCKS, EXRTO_LSN_INFO_FILE_BLOCKS, EXRTO_BLOCK_INFO_FILE_BLOCKS}; + +typedef struct _ExRTOFileState { + uint64 segno[EXRTO_FORK_NUM]; + File file[EXRTO_FORK_NUM]; +} ExRTOFileState; + +static inline ExRTOFileType exrto_file_type(uint32 space_oid) +{ + if (space_oid == EXRTO_BASE_PAGE_SPACE_OID) { + return BASE_PAGE; + } else if (space_oid == EXRTO_LSN_INFO_SPACE_OID) { + return LSN_INFO_META; + } else { + return BLOCK_INFO_META; + } +} + +static inline void set_file_state(ExRTOFileState *state, ForkNumber forknum, uint64 segno, File file) +{ + state->segno[forknum] = segno; + state->file[forknum] = file; +} + +static inline uint64 get_total_block_num(ExRTOFileType type, uint32 high, uint32 low) +{ + if (type == BASE_PAGE || type == LSN_INFO_META) { + return ((uint64)high << UINT64_HALF) | low; + } else { + return (uint64)low; + } +} + +static ExRTOFileState *alloc_file_state(void) +{ + MemoryContext current; + ExRTOFileState *state; + if (EnableLocalSysCache()) { + current = t_thrd.lsc_cxt.lsc->lsc_mydb_memcxt; + } else { + current = u_sess->storage_cxt.exrto_standby_read_file_cxt; + } + state = (ExRTOFileState *)MemoryContextAllocZero(current, sizeof(ExRTOFileState)); + for (int i = 0; i < EXRTO_FORK_NUM; i++) { + state->file[i] = -1; + } + + return state; +} + +static void exrto_get_file_path(const RelFileNode node, ForkNumber forknum, uint64 segno, char *path) +{ + ExRTOFileType type; + char filename[EXRTO_FILE_PATH_LEN]; + errno_t rc = EOK; + + type = exrto_file_type(node.spcNode); + if (type == BASE_PAGE || type == LSN_INFO_META) { + uint32 batch_id = node.dbNode >> LOW_WORKERID_BITS; + uint32 worker_id = node.dbNode & LOW_WORKERID_MASK; + rc = snprintf_s(filename, EXRTO_FILE_PATH_LEN, EXRTO_FILE_PATH_LEN - 1, "%02X%02X%016X", + batch_id, worker_id, segno); + } else { + rc = snprintf_s(filename, EXRTO_FILE_PATH_LEN, EXRTO_FILE_PATH_LEN - 1, "%u_%u_%s.%u", + node.dbNode, node.relNode, forkNames[forknum], (uint32)segno); + } + securec_check_ss(rc, "\0", "\0"); + + rc = snprintf_s(path, EXRTO_FILE_PATH_LEN, EXRTO_FILE_PATH_LEN - 1, "%s/%s/%s", + EXRTO_FILE_DIR, EXRTO_FILE_SUB_DIR[type], filename); + securec_check_ss(rc, "\0", "\0"); + + return; +} + +static uint64 get_seg_num(const RelFileNodeBackend& smgr_rnode, BlockNumber blocknum) +{ + ExRTOFileType type; + uint32 blocks_per_file; + uint64 total_blocknum; + uint64 segno; + + type = exrto_file_type(smgr_rnode.node.spcNode); + blocks_per_file = EXRTO_FILE_BLOCKS[type]; + total_blocknum = get_total_block_num(type, smgr_rnode.node.relNode, blocknum); + segno = (total_blocknum / blocks_per_file); + + return segno; +} + +static RelFileNodeForkNum exrto_file_relfilenode_forknum_fill(const RelFileNodeBackend &rnode, + ForkNumber forknum, uint64 segno) +{ + RelFileNodeForkNum node; + ExRTOFileType type; + + errno_t rc = memset_s(&node, sizeof(RelFileNodeForkNum), 0, sizeof(RelFileNodeForkNum)); + securec_check(rc, "", ""); + node.rnode = rnode; + type = exrto_file_type(rnode.node.spcNode); + if (type == BASE_PAGE || type == LSN_INFO_META) { + node.rnode.node.relNode = segno >> UINT64_HALF; + } + node.forknumber = forknum; + node.segno = (uint32)segno; + node.storage = ROW_STORE; + + return node; +} + +static ExRTOFileState *exrto_open_file(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + ExtensionBehavior behavior) +{ + ExRTOFileState* state = (ExRTOFileState *)reln->fileState; + uint64 segno; + uint32 flags = O_RDWR | PG_BINARY; + char file_path[EXRTO_FILE_PATH_LEN]; + RelFileNodeForkNum filenode; + File fd; + + segno = get_seg_num(reln->smgr_rnode, blocknum); + /* No work if already open */ + if (state != NULL) { + if (state->file[forknum] > 0) { + if (state->segno[forknum] == segno) { + return state; + } + /* This is not the file we're looking for. */ + FileClose(state->file[forknum]); + } + } else { + state = alloc_file_state(); + reln->fileState = state; + } + set_file_state(state, forknum, 0, -1); + + if (behavior == EXTENSION_CREATE) { + flags |= O_CREAT; + } + ADIO_RUN() { + flags |= O_DIRECT; + } + ADIO_END(); + + exrto_get_file_path(reln->smgr_rnode.node, forknum, segno, file_path); + filenode = exrto_file_relfilenode_forknum_fill(reln->smgr_rnode, forknum, segno); + fd = DataFileIdOpenFile(file_path, filenode, (int)flags, S_IRUSR | S_IWUSR); + if (fd < 0) { + if ((behavior == EXTENSION_RETURN_NULL) && FILE_POSSIBLY_DELETED(errno)) { + return NULL; + } + exrto_close(reln, forknum, InvalidBlockNumber); + ereport(ERROR, + (errcode_for_file_access(), errmsg("could not open file \"%s\": %m", file_path))); + } + + set_file_state(state, forknum, segno, fd); + + return state; +} + +BlockNumber get_single_file_nblocks(SMgrRelation reln, ForkNumber forknum, const ExRTOFileState*state) +{ + Assert(state != NULL); + + char *filename = FilePathName(state->file[forknum]); + off_t len = FileSeek(state->file[forknum], 0L, SEEK_END); + if (len < 0) { + exrto_close(reln, forknum, InvalidBlockNumber); + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not seek to end of file \"%s\": %m", filename))); + } + + /* note that this calculation will ignore any partial block at EOF */ + return (BlockNumber)(len / BLCKSZ); +} + +void exrto_init(void) +{ + if (EnableLocalSysCache()) { + return; + } + Assert(u_sess->storage_cxt.exrto_standby_read_file_cxt == NULL); + u_sess->storage_cxt.exrto_standby_read_file_cxt = + AllocSetContextCreate(u_sess->top_mem_cxt, "ExrtoFileSmgr", ALLOCSET_DEFAULT_SIZES); +} + +void exrto_close(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) +{ + ExRTOFileState* state = (ExRTOFileState*)reln->fileState; + + /* No work if already closed */ + if (state == NULL) { + return; + } + reln->fileState = NULL; /* prevent dangling pointer after error */ + + /* if not closed already */ + if (state->file[forknum] >= 0) { + FileClose(state->file[forknum]); + } + pfree(state); +} + +bool exrto_exists(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) +{ + /* + * Close it first, to ensure that we notice if the fork has been unlinked + * since we opened it. + */ + exrto_close(reln, forknum, blocknum); + + bool isExist = false; + if (exrto_open_file(reln, forknum, blocknum, EXTENSION_RETURN_NULL) != NULL) { + isExist = true; + } + exrto_close(reln, forknum, blocknum); + return isExist; +} + +bool exrto_unlink_single_file(const RelFileNodeBackend &rnode, ForkNumber forknum, uint64 segno) +{ + struct stat stat_buf; + char segpath[EXRTO_FILE_PATH_LEN]; + + exrto_get_file_path(rnode.node, forknum, segno, segpath); + if (stat(segpath, &stat_buf) < 0) { + if (errno != ENOENT) { + ereport(WARNING, (errcode_for_file_access(), + errmsg("could not stat file \"%s\" before removing: %m", segpath))); + } + return false; + } + if (unlink(segpath) < 0) { + ereport(WARNING, (errcode_for_file_access(), + errmsg("could not remove file \"%s\": %m", segpath))); + } + return true; +} + +void exrto_unlink_file(const RelFileNodeBackend &rnode, ForkNumber forknum, BlockNumber blocknum) +{ + uint64 segno; + ExRTOFileType type = exrto_file_type(rnode.node.spcNode); + if (type == BLOCK_INFO_META) { + /* unlink all files */ + extreme_rto_standby_read::remove_block_meta_info_files_of_db(rnode.node.dbNode, rnode.node.relNode); + } else if (type == BASE_PAGE || type == LSN_INFO_META) { + /* just unlink the files before the file where blocknum is */ + segno = get_seg_num(rnode, blocknum); + while (segno != 0) { + segno -= 1; + if (!exrto_unlink_single_file(rnode, forknum, segno)) { + return; + } + } + } +} + +void exrto_unlink(const RelFileNodeBackend &rnode, ForkNumber forknum, bool is_redo, BlockNumber blocknum) +{ + ExRTOFileType type = exrto_file_type(rnode.node.spcNode); + if (type == BASE_PAGE || type == LSN_INFO_META) { + forknum = MAIN_FORKNUM; + } + if (forknum == InvalidForkNumber) { + for (int fork_num = 0; fork_num < EXRTO_FORK_NUM; fork_num++) { + exrto_unlink_file(rnode, (ForkNumber)fork_num, blocknum); + } + } else { + exrto_unlink_file(rnode, forknum, blocknum); + } +} + +/* extend EXTEND_BLOCKS_NUM pages */ +void exrto_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skip_fsync) +{ + ExRTOFileState *state = NULL; + ExRTOFileType type; + uint64 total_block_num; + off_t seekpos; + int nbytes; + struct stat file_stat; + char* filename; + + type = exrto_file_type(reln->smgr_rnode.node.spcNode); + total_block_num = get_total_block_num(type, reln->smgr_rnode.node.relNode, blocknum); + if (total_block_num == EXRTO_INVALID_BLOCK_NUMBER) { + ereport(ERROR, + (errmsg("cannot extend file beyond %lu blocks.", EXRTO_INVALID_BLOCK_NUMBER))); + } + seekpos = (off_t)BLCKSZ * (total_block_num % EXRTO_FILE_BLOCKS[type]); + + state = exrto_open_file(reln, forknum, blocknum, EXTENSION_CREATE); + filename = FilePathName(state->file[forknum]); + if (stat(filename, &file_stat) < 0) { + exrto_close(reln, forknum, InvalidBlockNumber); + ereport(ERROR, (errmsg("could not stat file \"%s\": %m.", filename))); + } + Assert(file_stat.st_size % BLCKSZ == 0); + Assert(file_stat.st_size <= EXRTO_FILE_SIZE[type]); + + if (seekpos < file_stat.st_size) { + /* no need to extend */ + return; + } + + int extend_size = rtl::min(rtl::max(EXTEND_BLOCKS_NUM * BLCKSZ, (int)((seekpos - file_stat.st_size) + BLCKSZ)), + (int)(EXRTO_FILE_SIZE[type] - file_stat.st_size)); + nbytes = FilePWrite(state->file[forknum], NULL, extend_size, file_stat.st_size); + if (nbytes != extend_size) { + exrto_close(reln, forknum, InvalidBlockNumber); + if (nbytes < 0) { + ereport(ERROR, (errmsg("could not extend file \"%s\": %m.", filename))); + } + ereport(ERROR, + (errmsg("could not extend file \"%s\": wrote only %d of %d bytes.", filename, nbytes, extend_size))); + } + + Assert(get_single_file_nblocks(reln, forknum, state) <= ((BlockNumber)EXRTO_FILE_BLOCKS[type])); +} + +SMGR_READ_STATUS exrto_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer) +{ + ExRTOFileState *state = NULL; + ExRTOFileType type; + ExtensionBehavior behavior; + uint64 total_block_num; + off_t seekpos; + int nbytes; + errno_t rc; + + type = exrto_file_type(reln->smgr_rnode.node.spcNode); + if (type == LSN_INFO_META || type == BLOCK_INFO_META) { + behavior = EXTENSION_RETURN_NULL; + } else { + behavior = EXTENSION_FAIL; + } + + total_block_num = get_total_block_num(type, reln->smgr_rnode.node.relNode, blocknum); + if (total_block_num == EXRTO_INVALID_BLOCK_NUMBER) { + ereport(ERROR, + (errmsg("cannot read file beyond %lu blocks.", EXRTO_INVALID_BLOCK_NUMBER))); + } + seekpos = (off_t)BLCKSZ * (total_block_num % EXRTO_FILE_BLOCKS[type]); + + state = exrto_open_file(reln, forknum, blocknum, behavior); + if (state == NULL) { + /* For lsn info and block info page, just set buffer to all zeros when not found on disk. */ + rc = memset_s(buffer, BLCKSZ, 0, BLCKSZ); + securec_check(rc, "\0", "\0"); + return SMGR_RD_OK; + } + + nbytes = FilePRead(state->file[forknum], buffer, BLCKSZ, seekpos); + if (nbytes == 0 && (type == LSN_INFO_META || type == BLOCK_INFO_META)) { + rc = memset_s(buffer, BLCKSZ, 0, BLCKSZ); + securec_check(rc, "\0", "\0"); + return SMGR_RD_OK; + } + if (nbytes != BLCKSZ) { + char *filename = FilePathName(state->file[forknum]); + exrto_close(reln, forknum, InvalidBlockNumber); + if (nbytes < 0) { + ereport(ERROR, + (errmsg("could not read block %u in file \"%s\": %m.", blocknum, filename))); + } + ereport(ERROR, + (errmsg("could not read block %u in file \"%s\": read only %d of %d bytes.", blocknum, filename, + nbytes, BLCKSZ))); + } + + if (PageIsVerified((Page)buffer, blocknum)) { + return SMGR_RD_OK; + } else { + return SMGR_RD_CRC_ERROR; + } +} + +void exrto_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const char *buffer, bool skip_fsync) +{ + ExRTOFileState *state = NULL; + ExRTOFileType type; + uint64 total_block_num; + off_t seekpos; + int nbytes; + + type = exrto_file_type(reln->smgr_rnode.node.spcNode); + total_block_num = get_total_block_num(type, reln->smgr_rnode.node.relNode, blocknum); + if (total_block_num == EXRTO_INVALID_BLOCK_NUMBER) { + ereport(ERROR, + (errmsg("cannot write file beyond %lu blocks.", EXRTO_INVALID_BLOCK_NUMBER))); + } + seekpos = (off_t)BLCKSZ * (total_block_num % EXRTO_FILE_BLOCKS[type]); + + Assert(seekpos < (off_t)EXRTO_FILE_SIZE[type]); + + state = exrto_open_file(reln, forknum, blocknum, EXTENSION_CREATE); + nbytes = FilePWrite(state->file[forknum], buffer, BLCKSZ, seekpos); + if (nbytes != BLCKSZ) { + char *filename = FilePathName(state->file[forknum]); + exrto_close(reln, forknum, InvalidBlockNumber); + if (nbytes < 0) { + ereport(ERROR, + (errmsg("could not write block %u in file \"%s\": %m.", blocknum, filename))); + } + ereport(ERROR, + (errmsg("could not write block %u in file \"%s\": wrote only %d of %d bytes.", + blocknum, filename, nbytes, BLCKSZ))); + } +} + +BlockNumber exrto_nblocks(SMgrRelation, ForkNumber) +{ + return MaxBlockNumber; +} + +void exrto_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks) +{ + ExRTOFileType type = exrto_file_type(reln->smgr_rnode.node.spcNode); + Assert(type == BLOCK_INFO_META); + + BlockNumber curnblk = exrto_nblocks(reln, forknum); + if (curnblk == 0) { + return; + } + + if (nblocks > curnblk) { + ereport(ERROR, + (errcode_for_file_access(), errmsg("could not truncate file \"%s\" to %u blocks: it's only %u blocks now", + relpath(reln->smgr_rnode, forknum), nblocks, curnblk))); + } + if (nblocks == curnblk) { + return; + } + + uint32 blocks_per_file = EXRTO_FILE_BLOCKS[type]; + for (BlockNumber prior_blocks = 0;; prior_blocks += blocks_per_file) { + struct stat stat_buf; + char segpath[EXRTO_FILE_PATH_LEN]; + uint64 segno = get_seg_num(reln->smgr_rnode, prior_blocks); + exrto_get_file_path(reln->smgr_rnode.node, forknum, segno, segpath); + if (stat(segpath, &stat_buf) < 0) { + if (errno != ENOENT) { + ereport( + WARNING, + (errcode_for_file_access(), errmsg("could not stat file \"%s\" before truncate: %m", segpath))); + } + break; + } + + ExRTOFileState *state = exrto_open_file(reln, forknum, prior_blocks, EXTENSION_FAIL); + if (prior_blocks > nblocks) { + if (FileTruncate(state->file[forknum], 0) < 0) { + ereport(DEBUG1, + (errcode_for_file_access(), errmsg("could not truncate file \"%s\": %m", segpath))); + } + } else if (prior_blocks + ((BlockNumber)blocks_per_file) > nblocks) { + BlockNumber last_seg_block = nblocks - prior_blocks; + off_t truncate_offset = (off_t)last_seg_block * BLCKSZ; + + if (FileTruncate(state->file[forknum], truncate_offset) < 0) { + ereport(DEBUG1, + (errcode_for_file_access(), errmsg("could not truncate file \"%s\": %m", segpath))); + } + } + exrto_close(reln, forknum, InvalidBlockNumber); + } +} + +void exrto_writeback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks) +{ + ExRTOFileType type; + uint64 total_block_num; + type = exrto_file_type(reln->smgr_rnode.node.spcNode); + total_block_num = get_total_block_num(type, reln->smgr_rnode.node.relNode, blocknum); + + while (nblocks > 0) { + BlockNumber nflush = nblocks; + off_t seekpos; + ExRTOFileState *state = NULL; + uint64 segnum_start, segnum_end; + state = exrto_open_file(reln, forknum, blocknum, EXTENSION_CREATE); + segnum_start = total_block_num / EXRTO_FILE_BLOCKS[type]; + segnum_end = (total_block_num + nblocks - 1) / EXRTO_FILE_BLOCKS[type]; + + if (segnum_start != segnum_end) { + nflush = EXRTO_FILE_BLOCKS[type] - (uint32)(total_block_num % EXRTO_FILE_BLOCKS[type]); + } + + Assert(nflush >= 1); + Assert(nflush <= nblocks); + + seekpos = (off_t)BLCKSZ * (total_block_num % EXRTO_FILE_BLOCKS[type]); + FileWriteback(state->file[forknum], seekpos, (off_t)BLCKSZ * nflush); + + nblocks -= nflush; + /* ensure that the relnode is not changed */ + Assert(((total_block_num + nflush) >> UINT64_HALF) == (total_block_num >> UINT64_HALF)); + total_block_num += nflush; + blocknum = (BlockNumber)total_block_num; + } +} diff --git a/src/include/access/extreme_rto/batch_redo.h b/src/include/access/extreme_rto/batch_redo.h index 54d2a5be07a336eb2cba3f0137a84eb72f06c174..55b0c1f5dd2a2e041d871a277490b3c7cd207088 100644 --- a/src/include/access/extreme_rto/batch_redo.h +++ b/src/include/access/extreme_rto/batch_redo.h @@ -63,6 +63,26 @@ typedef struct redoitemhashentry { int redoItemNum; } RedoItemHashEntry; +inline void PRXLogRecGetBlockTag(XLogRecParseState *recordBlockState, RelFileNode *rnode, BlockNumber *blknum, + ForkNumber *forknum) +{ + XLogBlockParse *blockparse = &(recordBlockState->blockparse); + + if (rnode != NULL) { + rnode->dbNode = blockparse->blockhead.dbNode; + rnode->relNode = blockparse->blockhead.relNode; + rnode->spcNode = blockparse->blockhead.spcNode; + rnode->bucketNode = blockparse->blockhead.bucketNode; + rnode->opt = blockparse->blockhead.opt; + } + if (blknum != NULL) { + *blknum = blockparse->blockhead.blkno; + } + if (forknum != NULL) { + *forknum = blockparse->blockhead.forknum; + } +} + extern void PRPrintRedoItemHashTab(HTAB *redoItemHash); extern HTAB *PRRedoItemHashInitialize(MemoryContext context); extern void PRTrackClearBlock(XLogRecParseState *recordBlockState, HTAB *redoItemHash); diff --git a/src/include/access/extreme_rto/dispatcher.h b/src/include/access/extreme_rto/dispatcher.h index 70b3a5b48904097990f72f1bfdea16d53606cba6..ed5e61058155f78a1cf5ea438244f8093f80862a 100644 --- a/src/include/access/extreme_rto/dispatcher.h +++ b/src/include/access/extreme_rto/dispatcher.h @@ -165,6 +165,7 @@ typedef struct { volatile bool recoveryStop; volatile XLogRedoNumStatics xlogStatics[RM_NEXT_ID][MAX_XLOG_INFO_NUM]; RedoTimeCost *startupTimeCost; + ExrtoSnapshotData exrto_snapshot; } LogDispatcher; typedef struct { diff --git a/src/include/access/extreme_rto/page_redo.h b/src/include/access/extreme_rto/page_redo.h index 3ffa739e6ffd63918379525952f6b014a45e694b..7d789f85871bac495c5314407fa87a345994c225 100644 --- a/src/include/access/extreme_rto/page_redo.h +++ b/src/include/access/extreme_rto/page_redo.h @@ -33,8 +33,10 @@ #include "nodes/pg_list.h" #include "storage/proc.h" +#include "access/extreme_rto/batch_redo.h" #include "access/extreme_rto/posix_semaphore.h" #include "access/extreme_rto/spsc_blocking_queue.h" +#include "access/extreme_rto/standby_read/standby_read_base.h" #include "access/xlogproc.h" #include "postmaster/pagerepair.h" @@ -185,6 +187,7 @@ struct PageRedoWorker { HTAB *badPageHashTbl; char page[BLCKSZ]; XLogBlockDataParse *curRedoBlockState; + StandbyReadMetaInfo standby_read_meta_info; }; @@ -240,6 +243,7 @@ void DispatchClosefdMarkToAllRedoWorker(); void DispatchCleanInvalidPageMarkToAllRedoWorker(RepairFileKey key); const char *RedoWokerRole2Str(RedoRole role); +uint32 GetWorkerId(const RedoItemTag *redo_item_tag, uint32 worker_count); /* block or file repair function */ @@ -253,6 +257,9 @@ void BatchClearRecoveryThreadHashTbl(Oid spcNode, Oid dbNode); void RecordBadBlockAndPushToRemote(XLogBlockDataParse *datadecode, PageErrorType error_type, XLogRecPtr old_lsn, XLogPhyBlock pblk); void SeqCheckRemoteReadAndRepairPage(); - +void exrto_generate_snapshot(XLogRecPtr trxn_lsn); +void exrto_read_snapshot(Snapshot snapshot); +XLogRecPtr exrto_calculate_recycle_position(bool force_recyle); +TransactionId exrto_calculate_recycle_xmin_for_undo(); } // namespace extreme_rto #endif diff --git a/src/include/access/extreme_rto/standby_read.h b/src/include/access/extreme_rto/standby_read.h new file mode 100644 index 0000000000000000000000000000000000000000..d54e3cc42609bd30e643769f0edadbb6e0f7d25e --- /dev/null +++ b/src/include/access/extreme_rto/standby_read.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * standby_read.h + * + * IDENTIFICATION + * src/include/access/extreme_rto/standby_read.h + * + * --------------------------------------------------------------------------------------- + */ + +#ifndef EXTREME_RTO_STANDBY_READ_H +#define EXTREME_RTO_STANDBY_READ_H + +namespace extreme_rto { +void exrto_recycle_main(); +} /* namespace extreme_rto */ +#endif diff --git a/src/include/access/extreme_rto/standby_read/block_info_meta.h b/src/include/access/extreme_rto/standby_read/block_info_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..b1d9eb18a34ab637c62632fb3289d75004a37697 --- /dev/null +++ b/src/include/access/extreme_rto/standby_read/block_info_meta.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * block_info_meta.h + * + * + * + * IDENTIFICATION + * src/include/access/extreme_rto/standby_read/block_info_meta.h + * + * --------------------------------------------------------------------------------------- + */ + +#ifndef BLOCK_INFO_META_H +#define BLOCK_INFO_META_H + +#include "gs_thread.h" +#include "postgres.h" +#include "access/xlogdefs.h" +#include "access/extreme_rto/standby_read/lsn_info_double_list.h" +#include "access/extreme_rto/standby_read/standby_read_base.h" +#include "storage/buf/bufpage.h" +#include "storage/buf/buf_internals.h" + +namespace extreme_rto_standby_read { + +const static uint32 BLOCK_INFO_PAGE_HEAD_PAD_SIZE = 40; +const static uint32 BLOCK_INFO_PAGE_VERSION = 1; // currently the first version of extreme rto standby read + +typedef struct _BlockInfoPageHeader { + PageXLogRecPtr lsn; /* LSN: next byte after last byte of wal record for last change to this page */ + uint16 checksum; /* checksum */ + uint16 flags; + uint32 version; + uint64 total_block_num; // all blocks of this table, only update on the first page + uint8 pad[BLOCK_INFO_PAGE_HEAD_PAD_SIZE]; +} BlockInfoPageHeader; + +#define BLOCK_INFO_PAGE_VALID_FLAG 0x0400 + +typedef struct _BlockMetaInfo { + uint32 timeline; + uint32 record_num; + XLogRecPtr min_lsn; + XLogRecPtr max_lsn; + uint32 flags; + uint32 pad; + LsnInfoDoubleList lsn_info_list; + LsnInfoDoubleList base_page_info_list; +} BlockMetaInfo; + +#define BLOCK_INFO_NODE_VALID_FLAG (1 << 24) +#define BLOCK_INFO_NODE_UPDATE_FLAG (1 << 25) +#define BLOCK_INFO_NODE_REFCOUNT_MASK 0xFFFFF +#define IS_BLOCK_INFO_UPDATING(_flags) ((_flags & BLOCK_INFO_NODE_UPDATE_FLAG) == BLOCK_INFO_NODE_UPDATE_FLAG) + +const static uint32 BLOCK_INFO_HEAD_SIZE = 64; // do not modify +const static uint32 BLOCK_INFO_SIZE = 64; // do not modify + +static const uint32 BLOCK_INFO_NUM_PER_PAGE = (BLCKSZ - BLOCK_INFO_HEAD_SIZE) / BLOCK_INFO_SIZE; + +typedef enum { + STANDBY_READ_RECLYE_NONE, + STANDBY_READ_RECLYE_UPDATE, + STANDBY_READ_RECLYE_ALL, +} StandbyReadRecyleState; + +BlockMetaInfo* get_block_meta_info_by_relfilenode( + const BufferTag& buf_tag, BufferAccessStrategy strategy, ReadBufferMode mode, Buffer* buffer); +void insert_lsn_to_block_info( + StandbyReadMetaInfo* mete_info, const BufferTag& buf_tag, const Page base_page, XLogRecPtr next_lsn); +StandbyReadRecyleState recyle_block_info( + const BufferTag& buf_tag, LsnInfoPosition base_page_info_pos, XLogRecPtr next_base_page_lsn, XLogRecPtr recyle_lsn); +bool get_page_lsn_info(const BufferTag& buf_tag, BufferAccessStrategy strategy, XLogRecPtr read_lsn, + StandbyReadLsnInfoArray* lsn_info); +static inline bool is_block_info_page_valid(BlockInfoPageHeader* header) +{ + return ((header->flags & BLOCK_INFO_PAGE_VALID_FLAG) == BLOCK_INFO_PAGE_VALID_FLAG); +} + +static inline bool is_block_meta_info_valid(BlockMetaInfo* meta_info) +{ + return (((meta_info->flags & BLOCK_INFO_NODE_VALID_FLAG) == BLOCK_INFO_NODE_VALID_FLAG) && + meta_info->timeline == t_thrd.shemem_ptr_cxt.ControlFile->timeline); +} + +void remove_one_block_info_file(const RelFileNode rnode); + +void remove_block_meta_info_files_of_db(Oid db_oid, Oid rel_oid = InvalidOid); + +} // namespace extreme_rto_standby_read + +#endif \ No newline at end of file diff --git a/src/include/access/extreme_rto/standby_read/lsn_info_double_list.h b/src/include/access/extreme_rto/standby_read/lsn_info_double_list.h new file mode 100644 index 0000000000000000000000000000000000000000..c3df271e468ab0d50cfd926f3e3b9b8a688267cf --- /dev/null +++ b/src/include/access/extreme_rto/standby_read/lsn_info_double_list.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * lsn_info_double_list.h + * + * + * + * IDENTIFICATION + * src/include/access/extreme_rto/standby_read/lsn_info_double_list.h + * + * --------------------------------------------------------------------------------------- + */ + +#ifndef LSN_INFO_DOUBLE_LIST_H +#define LSN_INFO_DOUBLE_LIST_H + +#include "gs_thread.h" +#include "postgres.h" +#include "access/extreme_rto/standby_read/standby_read_base.h" + +namespace extreme_rto_standby_read { +typedef uint64 LsnInfoPosition; + +static const LsnInfoPosition LSN_INFO_LIST_HEAD = 0xFFFFFFFFFFFFFFFFL; + +#define INFO_POSITION_IS_VALID(p) ((p) != 0xFFFFFFFFFFFFFFFFL) +#define INFO_POSITION_IS_INVALID(p) ((p) == 0xFFFFFFFFFFFFFFFFL) +typedef struct _LsnInfoDoubleList { + LsnInfoPosition prev; // not pointer, is position in lsn info meta table + LsnInfoPosition next; // not pointer, is position in lsn info meta table +} LsnInfoDoubleList; + +void lsn_info_list_init(LsnInfoDoubleList* node); +void info_list_modify_old_tail(StandbyReadMetaInfo *meta_info, LsnInfoPosition old_tail_pos, + LsnInfoPosition insert_pos, XLogRecPtr current_page_lsn, XLogRecPtr next_lsn, bool is_lsn_info); +} // namespace extreme_rto_standby_read +#endif \ No newline at end of file diff --git a/src/include/access/extreme_rto/standby_read/lsn_info_meta.h b/src/include/access/extreme_rto/standby_read/lsn_info_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..7694bb984afa4ff2a9854e03475553dac3f544fe --- /dev/null +++ b/src/include/access/extreme_rto/standby_read/lsn_info_meta.h @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * lsn_info_meta.h + * + * + * + * IDENTIFICATION + * src/include/access/extreme_rto/standby_read/lsn_info_meta.h + * + * --------------------------------------------------------------------------------------- + */ + +#ifndef LSN_INFO_META_H +#define LSN_INFO_META_H + +#include "gs_thread.h" +#include "postgres.h" +#include "storage/buf/bufpage.h" +#include "storage/buf/buf_internals.h" +#include "access/extreme_rto/standby_read/lsn_info_double_list.h" +#include "access/extreme_rto/standby_read/standby_read_base.h" + +namespace extreme_rto_standby_read { +const static uint32 BASE_PAGE_MAP_SIZE = 16; +const static uint32 LSN_INFO_PAGE_HEAD_PAD_SIZE = 32; +const static uint32 LSN_INFO_PAGE_VERSION = 1; /* currently the first version of extreme rto standby read */ +const static uint32 LSN_NUM_PER_NODE = 5; +const static uint32 BYTE_BITS = 8; + +typedef struct _LsnInfoPageHeader { + PageXLogRecPtr lsn; /* LSN: next byte after last byte of wal record for last change to this page */ + uint16 checksum; /* checksum */ + uint16 flags; + uint32 version; + uint8 base_page_map[BASE_PAGE_MAP_SIZE]; + uint8 pad[LSN_INFO_PAGE_HEAD_PAD_SIZE]; +} LsnInfoPageHeader; + +typedef struct _LsnInfoNode { + LsnInfoDoubleList lsn_list; + uint32 flags; + uint16 type; + uint16 used; + XLogRecPtr lsn[LSN_NUM_PER_NODE]; +} LsnInfoNode; + +typedef struct _BasePageInfoNode { + LsnInfoNode lsn_info_node; + LsnInfoDoubleList base_page_list; + XLogRecPtr cur_page_lsn; + RelFileNode relfilenode; + ForkNumber fork_num; + BlockNumber block_num; + XLogRecPtr next_base_page_lsn; + BasePagePosition base_page_position; +} BasePageInfoNode; + +typedef LsnInfoNode* LsnInfo; +typedef BasePageInfoNode* BasePageInfo; + +const static uint32 LSN_INFO_HEAD_SIZE = 64; // do not modify +const static uint32 LSN_INFO_NODE_SIZE = 64; // do not modify +const static uint32 BASE_PAGE_INFO_NODE_SIZE = 128; // do not modify + +#define LSN_INFO_NODE_VALID_FLAG (1 << 24) +#define LSN_INFO_NODE_UPDATE_FLAG (1 << 25) +#define LSN_INFO_PAGE_VALID_FLAG 0x0400 + +typedef enum { + LSN_INFO_TYPE_BASE_PAGE = 1, + LSN_INFO_TYPE_LSNS, +} LsnInfoType; + +static inline bool is_lsn_info_node_valid(uint32 flags) +{ + return ((flags & LSN_INFO_NODE_VALID_FLAG) == LSN_INFO_NODE_VALID_FLAG); +} + +static inline bool is_lsn_info_node_updating(uint32 flags) +{ + return ((flags & LSN_INFO_NODE_UPDATE_FLAG) == LSN_INFO_NODE_UPDATE_FLAG); +} + +static inline bool is_lsn_info_page_valid(LsnInfoPageHeader *header) +{ + return ((header->flags & LSN_INFO_PAGE_VALID_FLAG) == LSN_INFO_PAGE_VALID_FLAG); +} + +static inline bool is_base_page_type(uint16 type) +{ + return (type == LSN_INFO_TYPE_BASE_PAGE); +} + +static inline bool is_lsn_type(uint16 type) +{ + return (type == LSN_INFO_TYPE_LSNS); +} + +inline uint32 lsn_info_postion_to_offset(LsnInfoPosition position) +{ + return position % BLCKSZ; +} + +static inline uint32 bit_to_offset(uint32 which_bit) +{ + return which_bit * LSN_INFO_NODE_SIZE; +} + +Page get_lsn_info_page(uint32 batch_id, uint32 worker_id, LsnInfoPosition position, ReadBufferMode mode, + Buffer* buffer); +void read_lsn_info_before(uint64 start_position, XLogRecPtr *readed_array, XLogRecPtr end_lsn); +LsnInfoDoubleList* lsn_info_position_to_node_ptr(LsnInfoPosition pos); + +// block meta table's page lock is held +void insert_lsn_to_lsn_info(StandbyReadMetaInfo* mete_info, LsnInfoDoubleList* head, + XLogRecPtr next_lsn); + +// block meta table's page lock is held +void insert_base_page_to_lsn_info(StandbyReadMetaInfo* meta_info, LsnInfoDoubleList* lsn_head, + LsnInfoDoubleList* base_page_head, const BufferTag& buf_tag, const Page base_page, XLogRecPtr curent_page_lsn, + XLogRecPtr next_lsn); + +void get_lsn_info_for_read(const BufferTag& buf_tag, LsnInfoPosition latest_lsn_base_page_pos, + StandbyReadLsnInfoArray* lsn_info_list, XLogRecPtr read_lsn); + +Buffer buffer_read_base_page(uint32 batch_id, uint32 redo_id, BasePagePosition position, ReadBufferMode mode); +void generate_base_page(StandbyReadMetaInfo* meta_info, const Page src_page); +void read_base_page(const BufferTag& buf_tag, BasePagePosition position, BufferDesc* dest_buf_desc); +void recycle_base_page_file(uint32 batch_id, uint32 redo_id, BasePagePosition recycle_pos); + +void set_base_page_map_bit(Page page, uint32 base_page_loc); +bool is_base_page_map_bit_set(Page page, uint32 which_bit); +void recycle_one_lsn_info_list(const BufferTag& buf_tag, LsnInfoPosition page_info_pos, + XLogRecPtr recycle_lsn, LsnInfoPosition *min_page_info_pos, XLogRecPtr *min_lsn); +void standby_read_recyle_per_workers(StandbyReadMetaInfo *standby_read_meta_info, XLogRecPtr recycle_lsn); + +} // namespace extreme_rto_standby_read +#endif \ No newline at end of file diff --git a/src/include/access/extreme_rto/standby_read/standby_read_base.h b/src/include/access/extreme_rto/standby_read/standby_read_base.h new file mode 100644 index 0000000000000000000000000000000000000000..714b475068d77b4cfb8a1a9d6d4dd87fc46f2011 --- /dev/null +++ b/src/include/access/extreme_rto/standby_read/standby_read_base.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * standby_read_base.h + * + * + * + * IDENTIFICATION + * src/include/access/extreme_rto/standby_read/standby_read_base.h + * + * --------------------------------------------------------------------------------------- + */ + +#ifndef STANDBY_READ_BASE_H +#define STANDBY_READ_BASE_H + +#include "gs_thread.h" +#include "postgres.h" +#include "storage/buf/bufpage.h" +#include "postmaster/alarmchecker.h" + +#define EXRTO_FILE_DIR "standby_read" +#define EXRTO_OLD_FILE_DIR "standby_read_old" + +static const uint32 EXRTO_BASE_PAGE_FILE_MAXSIZE = 64 * 1024 * 1024; /* 64MB */ +static const uint32 EXRTO_LSN_INFO_FILE_MAXSIZE = 16 * 1024 * 1024; /* 16MB */ +static const uint32 EXRTO_BLOCK_INFO_FILE_MAXSIZE = RELSEG_SIZE * BLCKSZ; + +extern const char* EXRTO_FILE_SUB_DIR[]; +extern const uint32 EXRTO_FILE_PATH_LEN; + +#define UINT64_HALF 32 +#define LOW_WORKERID_BITS 16 +#define LOW_WORKERID_MASK ((1U << LOW_WORKERID_BITS) - 1) + +#define EXRTODEBUGINFO , __FUNCTION__, __LINE__ +#define EXRTODEBUGSTR "[%s:%d]" +#define EXRTOFORMAT(f) EXRTODEBUGSTR f EXRTODEBUGINFO + +enum ExRTOFileType { + BASE_PAGE = 0, + LSN_INFO_META, + BLOCK_INFO_META, +}; + +typedef uint64 BasePagePosition; + +typedef struct _StandbyReadMetaInfo { + uint32 batch_id; + uint32 redo_id; + uint64 lsn_table_recyle_position; + uint64 lsn_table_next_position; // next position can insert node, shoud jump page header before use + BasePagePosition base_page_recyle_position; + BasePagePosition base_page_next_position; // next position can insert page + XLogRecPtr recycle_lsn_per_worker; +} StandbyReadMetaInfo; + +inline void standby_read_meta_page_set_lsn(Page page, XLogRecPtr LSN) +{ + if (XLByteLT(LSN, PageGetLSN(page))) { + return; + } + PageSetLSNInternal(page, LSN); +} + +void exrto_clean_dir(void); +void exrto_recycle_old_dir(void); +void exrto_standby_read_init(); +#endif \ No newline at end of file diff --git a/src/include/access/multi_redo_api.h b/src/include/access/multi_redo_api.h index 750e5b4518f49c7593eb5d45709d1ca13aee704b..331bcd897f10898809ea5aa4ea62b1399b693f9c 100644 --- a/src/include/access/multi_redo_api.h +++ b/src/include/access/multi_redo_api.h @@ -34,6 +34,7 @@ #include "storage/proc.h" #include "access/redo_statistic.h" #include "access/extreme_rto_redo_api.h" +#include "postmaster/postmaster.h" #ifdef ENABLE_LITE_MODE #define ENABLE_ONDEMAND_RECOVERY false @@ -65,6 +66,9 @@ static const uint32 PAGE_REDO_WORKER_READY = 2; static const uint32 PAGE_REDO_WORKER_EXIT = 3; static const uint32 BIG_RECORD_LENGTH = XLOG_BLCKSZ * 16; +#define IS_EXRTO_READ (g_instance.attr.attr_storage.EnableHotStandby && IsExtremeRedo()) +#define IS_EXRTO_STANDBY_READ (IS_EXRTO_READ && pm_state_is_hot_standby()) + static inline int get_real_recovery_parallelism() { return g_instance.attr.attr_storage.real_recovery_parallelism; diff --git a/src/include/access/ustore/undo/knl_uundospace.h b/src/include/access/ustore/undo/knl_uundospace.h index e5cc112d7cde6226c47c1bb6dc7f640f11b35cbb..cd9832e79059370594cc8e9c897740536564327b 100644 --- a/src/include/access/ustore/undo/knl_uundospace.h +++ b/src/include/access/ustore/undo/knl_uundospace.h @@ -51,6 +51,10 @@ public: { return this->head_; } + inline UndoLogOffset Head_exrto(void) + { + return this->head_exrto; + } inline UndoLogOffset Tail(void) { return this->tail_; @@ -66,6 +70,10 @@ public: { this->head_ = head; } + inline void set_head_exrto(UndoRecPtr head) + { + this->head_exrto = head; + } inline void SetTail(UndoRecPtr tail) { this->tail_ = tail; @@ -109,10 +117,14 @@ public: void CreateNonExistsUndoFile(int zid, uint32 dbId); static void CheckPointUndoSpace(int fd, UndoSpaceType type); static void RecoveryUndoSpace(int fd, UndoSpaceType type); + UndoLogOffset find_oldest_offset(int zid, uint32 db_id) const; + void unlink_residual_log(int zid, UndoLogOffset start, UndoLogOffset end, uint32 db_id) const; private: /* next insertion point (head), this backend is the only one that can modify insert. */ UndoLogOffset head_; + /* real next insertion point (head), this backend is the only one that can modify insert. */ + UndoLogOffset head_exrto; /* one past end of highest segment, need lock befor modify end. */ UndoLogOffset tail_; diff --git a/src/include/access/ustore/undo/knl_uundozone.h b/src/include/access/ustore/undo/knl_uundozone.h index a2402299f3a00228e79cb157abff33f37fdfc39f..5d87696ae10ada237b63c196fd355722cc3f599b 100644 --- a/src/include/access/ustore/undo/knl_uundozone.h +++ b/src/include/access/ustore/undo/knl_uundozone.h @@ -119,6 +119,10 @@ public: { return MAKE_UNDO_PTR(zid_, recycleTSlotPtr_); } + inline UndoSlotPtr get_recycle_tslot_ptr_exrto(void) + { + return MAKE_UNDO_PTR(zid_, recycle_tslot_ptr_exrto); + } inline UndoSlotPtr GetFrozenSlotPtr(void) { return frozenSlotPtr_; @@ -127,6 +131,10 @@ public: { return recycleXid_; } + inline TransactionId get_recycle_xid_exrto(void) + { + return recycle_xid_exrto; + } inline TransactionId GetFrozenXid(void) { return frozenXid_; @@ -156,10 +164,18 @@ public: { discardURecPtr_ = UNDO_PTR_GET_OFFSET(discard); } + inline void set_discard_urec_ptr_exrto(UndoRecPtr discard) + { + discard_urec_ptr_exrto = UNDO_PTR_GET_OFFSET(discard); + } inline void SetForceDiscardURecPtr(UndoRecPtr discard) { forceDiscardURecPtr_ = UNDO_PTR_GET_OFFSET(discard); - } + } + inline void set_force_discard_urec_ptr_exrto(UndoRecPtr discard) + { + force_discard_urec_ptr_exrto = UNDO_PTR_GET_OFFSET(discard); + } inline void SetAttachPid(ThreadId attachPid) { attachPid_ = attachPid; @@ -176,6 +192,10 @@ public: { recycleTSlotPtr_ = UNDO_PTR_GET_OFFSET(recycle); } + inline void set_recycle_tslot_ptr_exrto(UndoSlotPtr recycle) + { + recycle_tslot_ptr_exrto = UNDO_PTR_GET_OFFSET(recycle); + } inline void SetLSN(XLogRecPtr lsn) { lsn_ = lsn; @@ -188,6 +208,10 @@ public: { recycleXid_ = recycleXid; } + inline void set_recycle_xid_exrto(TransactionId recycle_xid) + { + recycle_xid_exrto = recycle_xid; + } inline void SetFrozenXid(TransactionId frozenXid) { frozenXid_ = frozenXid; @@ -200,6 +224,10 @@ public: { return insertURecPtr_ != forceDiscardURecPtr_; } + inline bool Used_exrto(void) + { + return insertURecPtr_ != force_discard_urec_ptr_exrto; + } /* Lock and unlock undozone. */ void InitLock(void) { @@ -300,6 +328,10 @@ public: /* Recovery undospace info from persistent file. */ static void RecoveryUndoZone(int fd); + UndoRecordState check_record_valid_exrto(UndoLogOffset offset, bool check_force_recycle, + TransactionId *last_xid) const; + uint64 release_residual_record_space(); + uint64 release_residual_slot_space(); private: static const uint32 UNDO_ZONE_ATTACHED = 1; @@ -316,6 +348,13 @@ private: TransactionId recycleXid_; TransactionId frozenXid_; ThreadId attachPid_; + + /* for extreme RTO read. */ + UndoSlotOffset recycle_tslot_ptr_exrto; + UndoLogOffset discard_urec_ptr_exrto; + UndoLogOffset force_discard_urec_ptr_exrto; + TransactionId recycle_xid_exrto; + /* Need Lock undo zone before alloc, preventing from checkpoint. */ LWLock *lock_; /* Lsn for undo zone meta. */ diff --git a/src/include/access/xlogproc.h b/src/include/access/xlogproc.h index fba9b6e3fd385d08ea0b2b3f4f63b0c5c8d38604..ed9d926e7fce5ff67df49e0d21953eb0f5183e12 100755 --- a/src/include/access/xlogproc.h +++ b/src/include/access/xlogproc.h @@ -216,6 +216,7 @@ typedef enum { typedef struct { uint32 blockddltype; int rels; + uint32 mainDataLen; char *mainData; bool compress; } XLogBlockDdlParse; @@ -947,6 +948,10 @@ static inline Buffer AtomicExchangeBuffer(volatile Buffer *ptr, Buffer newval) return old; } +/* this is an estimated value */ +static const uint32 MAX_BUFFER_NUM_PER_WAL_RECORD = XLR_MAX_BLOCK_ID + 1; +static const uint32 LSN_MOVE32 = 10; + void HeapXlogCleanOperatorPage( RedoBufferInfo* buffer, void* recorddata, void* blkdata, Size datalen, Size* freespace, bool repairFragmentation); void HeapXlogFreezeOperatorPage(RedoBufferInfo* buffer, void* recorddata, void* blkdata, Size datalen, @@ -1117,7 +1122,7 @@ void SegPageRedoDataBlock(XLogBlockHead *blockhead, XLogBlockDataParse *blockdat extern void xlog_redo_data_block( XLogBlockHead* blockhead, XLogBlockDataParse* blockdatarec, RedoBufferInfo* bufferinfo); extern void XLogRecSetBlockDdlState(XLogBlockDdlParse* blockddlstate, uint32 blockddltype, char *mainData, - int rels = 1, bool compress = false); + int rels = 1, bool compress = false, uint32 main_data_len = 0); XLogRedoAction XLogCheckBlockDataRedoAction(XLogBlockDataParse* datadecode, RedoBufferInfo* bufferinfo); void BtreeRedoDataBlock(XLogBlockHead* blockhead, XLogBlockDataParse* blockdatarec, RedoBufferInfo* bufferinfo); @@ -1275,5 +1280,6 @@ extern bool IsCheckPoint(const XLogRecParseState *parseState); void redo_atomic_xlog_dispatch(uint8 opCode, RedoBufferInfo *redo_buf, const char *data); void seg_redo_new_page_copy_and_flush(BufferTag *tag, char *data, XLogRecPtr lsn); +void redo_target_page(const BufferTag& buf_tag, StandbyReadLsnInfoArray* lsn_info, Buffer base_page_buf); #endif diff --git a/src/include/catalog/storage.h b/src/include/catalog/storage.h index 66dcbb71c8d56b9650348330c2cf89b1dbcaad6d..200f0c189aac34ffe0c8c349d110df0a1fcc8e2c 100644 --- a/src/include/catalog/storage.h +++ b/src/include/catalog/storage.h @@ -27,8 +27,8 @@ extern void RelationCreateStorage(RelFileNode rnode, char relpersistence, Oid ow Relation rel = NULL); extern void RelationDropStorage(Relation rel, bool isDfsTruncate = false); extern void RelationPreserveStorage(RelFileNode rnode, bool atCommit); -extern void RelationTruncate(Relation rel, BlockNumber nblocks); -extern void PartitionTruncate(Relation parent, Partition part, BlockNumber nblocks); +extern void RelationTruncate(Relation rel, BlockNumber nblocks, TransactionId latest_removed_xid = InvalidTransactionId); +extern void PartitionTruncate(Relation parent, Partition part, BlockNumber nblocks, TransactionId latest_removed_xid = InvalidTransactionId); extern void PartitionDropStorage(Relation rel, Partition part); extern void BucketCreateStorage(RelFileNode rnode, Oid bucketOid, Oid ownerid); extern void InsertStorageIntoPendingList(_in_ const RelFileNode* rnode, _in_ AttrNumber attrnum, _in_ BackendId backend, diff --git a/src/include/catalog/storage_xlog.h b/src/include/catalog/storage_xlog.h index 841c42fd5ded25a053fd88919945c4535661e160..1f3ce16b6cb04b3c2f01e3e9ae773b970354cdab 100644 --- a/src/include/catalog/storage_xlog.h +++ b/src/include/catalog/storage_xlog.h @@ -51,8 +51,11 @@ typedef struct xl_smgr_truncate { typedef struct xl_smgr_truncate_compress { xl_smgr_truncate xlrec; uint2 pageCompressOpts; + TransactionId latest_removed_xid; } xl_smgr_truncate_compress; +#define TRUNCATE_CONTAIN_XID_SIZE (offsetof(xl_smgr_truncate_compress, latest_removed_xid) + sizeof(TransactionId)) + extern void log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum); extern void smgr_redo(XLogReaderState *record); @@ -60,7 +63,8 @@ extern void smgr_desc(StringInfo buf, XLogReaderState *record); extern const char* smgr_type_name(uint8 subtype); extern void smgr_redo_create(RelFileNode rnode, ForkNumber forkNum, char *data); -extern void xlog_block_smgr_redo_truncate(RelFileNode rnode, BlockNumber blkno, XLogRecPtr lsn); +extern void xlog_block_smgr_redo_truncate(RelFileNode rnode, BlockNumber blkno, XLogRecPtr lsn, + TransactionId latest_removed_xid); /* An xlog combined by multiply sub-xlog, it will be decoded again */ #define XLOG_SEG_ATOMIC_OPERATION 0x00 diff --git a/src/include/catalog/upgrade_sql/rollback_catalog_maindb/rollback-post_catalog_maindb_92_908.sql b/src/include/catalog/upgrade_sql/rollback_catalog_maindb/rollback-post_catalog_maindb_92_908.sql new file mode 100644 index 0000000000000000000000000000000000000000..f1bdb839d2d0f383a558f4a9580335fd38c69ea0 --- /dev/null +++ b/src/include/catalog/upgrade_sql/rollback_catalog_maindb/rollback-post_catalog_maindb_92_908.sql @@ -0,0 +1,19 @@ +-- ---------------------------------------------------------------- +-- roolback pg_catalog.pg_conversion +-- ---------------------------------------------------------------- + +delete from pg_catalog.pg_conversion where conname = 'gb18030_2022_to_utf8'; +delete from pg_catalog.pg_conversion where conname = 'utf8_to_gb18030_2022'; +DROP FUNCTION IF EXISTS pg_catalog.gb18030_2022_to_utf8(integer, integer, cstring, internal, integer) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.utf8_to_gb18030_2022(integer, integer, cstring, internal, integer) CASCADE; + +UPDATE pg_catalog.pg_conversion SET conforencoding=37 WHERE conname like 'sjis_to_%'; +UPDATE pg_catalog.pg_conversion SET contoencoding=37 WHERE conname like '%_to_sjis'; +UPDATE pg_catalog.pg_conversion SET conforencoding=38 WHERE conname like 'big5_to_%'; +UPDATE pg_catalog.pg_conversion SET contoencoding=38 WHERE conname like '%_to_big5'; +UPDATE pg_catalog.pg_conversion SET conforencoding=39 WHERE conname like 'uhc_to_%'; +UPDATE pg_catalog.pg_conversion SET contoencoding=39 WHERE conname like '%_to_uhc'; +UPDATE pg_catalog.pg_conversion SET conforencoding=40 WHERE conname like 'johab_to_%'; +UPDATE pg_catalog.pg_conversion SET contoencoding=40 WHERE conname like '%to_johab'; +UPDATE pg_catalog.pg_conversion SET conforencoding=41 WHERE conname like 'shift_jis_2004_to_%'; +UPDATE pg_catalog.pg_conversion SET contoencoding=41 WHERE conname like '%to_shift_jis_2004'; \ No newline at end of file diff --git a/src/include/catalog/upgrade_sql/rollback_catalog_maindb/rollback-post_catalog_maindb_92_909.sql b/src/include/catalog/upgrade_sql/rollback_catalog_maindb/rollback-post_catalog_maindb_92_909.sql new file mode 100644 index 0000000000000000000000000000000000000000..1ceaa4bdff9f9a08f2a54ea97db462bb66026333 --- /dev/null +++ b/src/include/catalog/upgrade_sql/rollback_catalog_maindb/rollback-post_catalog_maindb_92_909.sql @@ -0,0 +1 @@ +DROP FUNCTION IF EXISTS pg_catalog.gs_hot_standby_space_info() cascade; \ No newline at end of file diff --git a/src/include/catalog/upgrade_sql/rollback_catalog_otherdb/rollback-post_catalog_otherdb_92_908.sql b/src/include/catalog/upgrade_sql/rollback_catalog_otherdb/rollback-post_catalog_otherdb_92_908.sql new file mode 100644 index 0000000000000000000000000000000000000000..f1bdb839d2d0f383a558f4a9580335fd38c69ea0 --- /dev/null +++ b/src/include/catalog/upgrade_sql/rollback_catalog_otherdb/rollback-post_catalog_otherdb_92_908.sql @@ -0,0 +1,19 @@ +-- ---------------------------------------------------------------- +-- roolback pg_catalog.pg_conversion +-- ---------------------------------------------------------------- + +delete from pg_catalog.pg_conversion where conname = 'gb18030_2022_to_utf8'; +delete from pg_catalog.pg_conversion where conname = 'utf8_to_gb18030_2022'; +DROP FUNCTION IF EXISTS pg_catalog.gb18030_2022_to_utf8(integer, integer, cstring, internal, integer) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.utf8_to_gb18030_2022(integer, integer, cstring, internal, integer) CASCADE; + +UPDATE pg_catalog.pg_conversion SET conforencoding=37 WHERE conname like 'sjis_to_%'; +UPDATE pg_catalog.pg_conversion SET contoencoding=37 WHERE conname like '%_to_sjis'; +UPDATE pg_catalog.pg_conversion SET conforencoding=38 WHERE conname like 'big5_to_%'; +UPDATE pg_catalog.pg_conversion SET contoencoding=38 WHERE conname like '%_to_big5'; +UPDATE pg_catalog.pg_conversion SET conforencoding=39 WHERE conname like 'uhc_to_%'; +UPDATE pg_catalog.pg_conversion SET contoencoding=39 WHERE conname like '%_to_uhc'; +UPDATE pg_catalog.pg_conversion SET conforencoding=40 WHERE conname like 'johab_to_%'; +UPDATE pg_catalog.pg_conversion SET contoencoding=40 WHERE conname like '%to_johab'; +UPDATE pg_catalog.pg_conversion SET conforencoding=41 WHERE conname like 'shift_jis_2004_to_%'; +UPDATE pg_catalog.pg_conversion SET contoencoding=41 WHERE conname like '%to_shift_jis_2004'; \ No newline at end of file diff --git a/src/include/catalog/upgrade_sql/rollback_catalog_otherdb/rollback-post_catalog_otherdb_92_909.sql b/src/include/catalog/upgrade_sql/rollback_catalog_otherdb/rollback-post_catalog_otherdb_92_909.sql new file mode 100644 index 0000000000000000000000000000000000000000..1ceaa4bdff9f9a08f2a54ea97db462bb66026333 --- /dev/null +++ b/src/include/catalog/upgrade_sql/rollback_catalog_otherdb/rollback-post_catalog_otherdb_92_909.sql @@ -0,0 +1 @@ +DROP FUNCTION IF EXISTS pg_catalog.gs_hot_standby_space_info() cascade; \ No newline at end of file diff --git a/src/include/catalog/upgrade_sql/upgrade_catalog_maindb/upgrade-post_catalog_maindb_92_908.sql b/src/include/catalog/upgrade_sql/upgrade_catalog_maindb/upgrade-post_catalog_maindb_92_908.sql new file mode 100644 index 0000000000000000000000000000000000000000..109cf32fcefdb1ddefbf24ad264a46977981e9f9 --- /dev/null +++ b/src/include/catalog/upgrade_sql/upgrade_catalog_maindb/upgrade-post_catalog_maindb_92_908.sql @@ -0,0 +1,36 @@ +-- ---------------------------------------------------------------- +-- upgrade pg_catalog.pg_conversion +-- ---------------------------------------------------------------- + +DROP FUNCTION IF EXISTS pg_catalog.gb18030_2022_to_utf8(integer, integer, cstring, internal, integer) CASCADE; +CREATE OR REPLACE FUNCTION pg_catalog.gb18030_2022_to_utf8(integer, integer, cstring, internal, integer) +RETURNS void +LANGUAGE c +STRICT NOT FENCED NOT SHIPPABLE +AS '$libdir/utf8_and_gb18030', $function$gb18030_2022_to_utf8$function$; +COMMENT ON FUNCTION pg_catalog.gb18030_2022_to_utf8(INTEGER, INTEGER, CSTRING, INTERNAL, INTEGER) +IS 'internal conversion function for GB18030_2022 to UTF8'; + +insert into pg_catalog.pg_conversion values ('gb18030_2022_to_utf8', 11, 10, 37, 7, 'gb18030_2022_to_utf8', true); + +DROP FUNCTION IF EXISTS pg_catalog.utf8_to_gb18030_2022(integer, integer, cstring, internal, integer) CASCADE; +CREATE OR REPLACE FUNCTION pg_catalog.utf8_to_gb18030_2022(integer, integer, cstring, internal, integer) +RETURNS void +LANGUAGE c +STRICT NOT FENCED NOT SHIPPABLE +AS '$libdir/utf8_and_gb18030', $function$utf8_to_gb18030_2022$function$; +COMMENT ON FUNCTION pg_catalog.utf8_to_gb18030_2022(INTEGER, INTEGER, CSTRING, INTERNAL, INTEGER) +IS 'internal conversion function for UTF8 to GB18030_2022'; + +insert into pg_catalog.pg_conversion values ('utf8_to_gb18030_2022', 11, 10, 7, 37, 'utf8_to_gb18030_2022', true); + +UPDATE pg_catalog.pg_conversion SET conforencoding=38 WHERE conname like 'sjis_to_%'; +UPDATE pg_catalog.pg_conversion SET contoencoding=38 WHERE conname like '%_to_sjis'; +UPDATE pg_catalog.pg_conversion SET conforencoding=39 WHERE conname like 'big5_to_%'; +UPDATE pg_catalog.pg_conversion SET contoencoding=39 WHERE conname like '%_to_big5'; +UPDATE pg_catalog.pg_conversion SET conforencoding=40 WHERE conname like 'uhc_to_%'; +UPDATE pg_catalog.pg_conversion SET contoencoding=40 WHERE conname like '%_to_uhc'; +UPDATE pg_catalog.pg_conversion SET conforencoding=41 WHERE conname like 'johab_to_%'; +UPDATE pg_catalog.pg_conversion SET contoencoding=41 WHERE conname like '%to_johab'; +UPDATE pg_catalog.pg_conversion SET conforencoding=42 WHERE conname like 'shift_jis_2004_to_%'; +UPDATE pg_catalog.pg_conversion SET contoencoding=42 WHERE conname like '%to_shift_jis_2004'; \ No newline at end of file diff --git a/src/include/catalog/upgrade_sql/upgrade_catalog_maindb/upgrade-post_catalog_maindb_92_909.sql b/src/include/catalog/upgrade_sql/upgrade_catalog_maindb/upgrade-post_catalog_maindb_92_909.sql new file mode 100644 index 0000000000000000000000000000000000000000..9317bbf1fc2cb03f52715cf92c5d4062b7e939a4 --- /dev/null +++ b/src/include/catalog/upgrade_sql/upgrade_catalog_maindb/upgrade-post_catalog_maindb_92_909.sql @@ -0,0 +1,11 @@ +DROP FUNCTION IF EXISTS pg_catalog.gs_hot_standby_space_info() cascade; +SET LOCAL inplace_upgrade_next_system_object_oids = IUO_PROC, 6218; +CREATE OR REPLACE FUNCTION pg_catalog.gs_hot_standby_space_info +( OUT base_page_file_num xid, + OUT base_page_total_size xid, + OUT lsn_info_meta_file_num xid, + OUT lsn_info_meta_total_size xid, + OUT block_info_meta_file_num xid, + OUT block_info_meta_total_size xid + ) +RETURNS SETOF record LANGUAGE INTERNAL ROWS 1 STRICT as 'gs_hot_standby_space_info'; \ No newline at end of file diff --git a/src/include/catalog/upgrade_sql/upgrade_catalog_otherdb/upgrade-post_catalog_otherdb_92_908.sql b/src/include/catalog/upgrade_sql/upgrade_catalog_otherdb/upgrade-post_catalog_otherdb_92_908.sql new file mode 100644 index 0000000000000000000000000000000000000000..109cf32fcefdb1ddefbf24ad264a46977981e9f9 --- /dev/null +++ b/src/include/catalog/upgrade_sql/upgrade_catalog_otherdb/upgrade-post_catalog_otherdb_92_908.sql @@ -0,0 +1,36 @@ +-- ---------------------------------------------------------------- +-- upgrade pg_catalog.pg_conversion +-- ---------------------------------------------------------------- + +DROP FUNCTION IF EXISTS pg_catalog.gb18030_2022_to_utf8(integer, integer, cstring, internal, integer) CASCADE; +CREATE OR REPLACE FUNCTION pg_catalog.gb18030_2022_to_utf8(integer, integer, cstring, internal, integer) +RETURNS void +LANGUAGE c +STRICT NOT FENCED NOT SHIPPABLE +AS '$libdir/utf8_and_gb18030', $function$gb18030_2022_to_utf8$function$; +COMMENT ON FUNCTION pg_catalog.gb18030_2022_to_utf8(INTEGER, INTEGER, CSTRING, INTERNAL, INTEGER) +IS 'internal conversion function for GB18030_2022 to UTF8'; + +insert into pg_catalog.pg_conversion values ('gb18030_2022_to_utf8', 11, 10, 37, 7, 'gb18030_2022_to_utf8', true); + +DROP FUNCTION IF EXISTS pg_catalog.utf8_to_gb18030_2022(integer, integer, cstring, internal, integer) CASCADE; +CREATE OR REPLACE FUNCTION pg_catalog.utf8_to_gb18030_2022(integer, integer, cstring, internal, integer) +RETURNS void +LANGUAGE c +STRICT NOT FENCED NOT SHIPPABLE +AS '$libdir/utf8_and_gb18030', $function$utf8_to_gb18030_2022$function$; +COMMENT ON FUNCTION pg_catalog.utf8_to_gb18030_2022(INTEGER, INTEGER, CSTRING, INTERNAL, INTEGER) +IS 'internal conversion function for UTF8 to GB18030_2022'; + +insert into pg_catalog.pg_conversion values ('utf8_to_gb18030_2022', 11, 10, 7, 37, 'utf8_to_gb18030_2022', true); + +UPDATE pg_catalog.pg_conversion SET conforencoding=38 WHERE conname like 'sjis_to_%'; +UPDATE pg_catalog.pg_conversion SET contoencoding=38 WHERE conname like '%_to_sjis'; +UPDATE pg_catalog.pg_conversion SET conforencoding=39 WHERE conname like 'big5_to_%'; +UPDATE pg_catalog.pg_conversion SET contoencoding=39 WHERE conname like '%_to_big5'; +UPDATE pg_catalog.pg_conversion SET conforencoding=40 WHERE conname like 'uhc_to_%'; +UPDATE pg_catalog.pg_conversion SET contoencoding=40 WHERE conname like '%_to_uhc'; +UPDATE pg_catalog.pg_conversion SET conforencoding=41 WHERE conname like 'johab_to_%'; +UPDATE pg_catalog.pg_conversion SET contoencoding=41 WHERE conname like '%to_johab'; +UPDATE pg_catalog.pg_conversion SET conforencoding=42 WHERE conname like 'shift_jis_2004_to_%'; +UPDATE pg_catalog.pg_conversion SET contoencoding=42 WHERE conname like '%to_shift_jis_2004'; \ No newline at end of file diff --git a/src/include/catalog/upgrade_sql/upgrade_catalog_otherdb/upgrade-post_catalog_otherdb_92_909.sql b/src/include/catalog/upgrade_sql/upgrade_catalog_otherdb/upgrade-post_catalog_otherdb_92_909.sql new file mode 100644 index 0000000000000000000000000000000000000000..9317bbf1fc2cb03f52715cf92c5d4062b7e939a4 --- /dev/null +++ b/src/include/catalog/upgrade_sql/upgrade_catalog_otherdb/upgrade-post_catalog_otherdb_92_909.sql @@ -0,0 +1,11 @@ +DROP FUNCTION IF EXISTS pg_catalog.gs_hot_standby_space_info() cascade; +SET LOCAL inplace_upgrade_next_system_object_oids = IUO_PROC, 6218; +CREATE OR REPLACE FUNCTION pg_catalog.gs_hot_standby_space_info +( OUT base_page_file_num xid, + OUT base_page_total_size xid, + OUT lsn_info_meta_file_num xid, + OUT lsn_info_meta_total_size xid, + OUT block_info_meta_file_num xid, + OUT block_info_meta_total_size xid + ) +RETURNS SETOF record LANGUAGE INTERNAL ROWS 1 STRICT as 'gs_hot_standby_space_info'; \ No newline at end of file diff --git a/src/include/gs_thread.h b/src/include/gs_thread.h index b8caca6ae2476db7da7d16a2b435ac61f7ea6883..8427048beadd545feb0da18e292055bbf19db7b5 100755 --- a/src/include/gs_thread.h +++ b/src/include/gs_thread.h @@ -124,6 +124,7 @@ typedef enum knl_thread_role { APPLY_WORKER, STACK_PERF_WORKER, DMS_AUXILIARY_THREAD, + EXRTO_RECYCLER, BARRIER_PREPARSE, TS_COMPACTION, TS_COMPACTION_CONSUMER, diff --git a/src/include/knl/knl_guc/knl_instance_attr_storage.h b/src/include/knl/knl_guc/knl_instance_attr_storage.h index 7161e8d1152ba728fe5a21ec00b5b3fa0eb21e1b..b9c572b816da4d9d6d412f2b8d0a6072fadd34a4 100755 --- a/src/include/knl/knl_guc/knl_instance_attr_storage.h +++ b/src/include/knl/knl_guc/knl_instance_attr_storage.h @@ -208,6 +208,14 @@ typedef struct knl_instance_attr_storage { int max_logical_replication_workers; char *redo_bind_cpu_attr; int max_active_gtt; + + /* extreme-rto standby read */ + int64 max_standby_base_page_size; + int64 max_standby_lsn_info_size; + int base_page_saved_interval; + double standby_force_recyle_ratio; + int standby_recycle_interval; + int standby_max_query_time; #ifndef ENABLE_MULTIPLE_NODES bool enable_save_confirmed_lsn; #endif diff --git a/src/include/knl/knl_instance.h b/src/include/knl/knl_instance.h index b5ae35361618172054306bfd912aaf7a4cf8602f..592d4e0339c38ec31e508f7270036f444c64e8bd 100755 --- a/src/include/knl/knl_instance.h +++ b/src/include/knl/knl_instance.h @@ -113,6 +113,8 @@ enum knl_parallel_redo_state { REDO_DONE, }; +typedef struct ExrtoSnapshotData* ExrtoSnapshot; + /* all process level attribute which expose to user */ typedef struct knl_instance_attr { @@ -216,6 +218,7 @@ typedef struct knl_g_pid_context { ThreadId LogicalReadWorkerPID; ThreadId LogicalDecoderWorkerPID; ThreadId BarrierPreParsePID; + ThreadId exrto_recycler_pid; ThreadId ApplyLauncerPID; ThreadId StackPerfPID; ThreadId CfsShrinkerPID; @@ -746,7 +749,7 @@ typedef struct knl_g_parallel_redo_context { char* ali_buf; XLogRedoNumStatics xlogStatics[RM_NEXT_ID][MAX_XLOG_INFO_NUM]; RedoCpuBindControl redoCpuBindcontrl; - + XLogRecPtr global_recycle_lsn; /* extreme-rto standby read */ HTAB **redoItemHash; /* used in ondemand extreme RTO */ } knl_g_parallel_redo_context; @@ -918,6 +921,7 @@ typedef struct knl_g_undo_context { pg_atomic_uint64 globalFrozenXid; /* Oldest transaction id which is having undo. */ pg_atomic_uint64 globalRecycleXid; + bool is_exrto_residual_undo_file_recycled; } knl_g_undo_context; typedef struct knl_g_flashback_context { diff --git a/src/include/knl/knl_session.h b/src/include/knl/knl_session.h index 0112d3a7b169dd2b66617b2ab8a3f4b81083ca02..aaadd977a292fc95e5deb8635801d561e100bfa2 100644 --- a/src/include/knl/knl_session.h +++ b/src/include/knl/knl_session.h @@ -685,6 +685,9 @@ typedef struct knl_u_utils_context { HTAB* set_user_params_htab; DestReceiver* spi_printtupDR; + + /* backend read lsn for read on standby in extreme rto */ + XLogRecPtr exrto_read_lsn; } knl_u_utils_context; typedef struct knl_u_security_context { @@ -1851,6 +1854,9 @@ typedef struct knl_u_storage_context { /* md.cpp */ MemoryContext MdCxt; /* context for all md.c allocations */ + /* exrto_file.cpp */ + MemoryContext exrto_standby_read_file_cxt; + /* sync.cpp */ MemoryContext pendingOpsCxt; struct HTAB *pendingOps; diff --git a/src/include/knl/knl_thread.h b/src/include/knl/knl_thread.h index 12d8a18f9ed9ab069c5a965f557dad4bc6cc7429..088c655bbf0a3430ffcdb8ea42d2990798a10723 100755 --- a/src/include/knl/knl_thread.h +++ b/src/include/knl/knl_thread.h @@ -77,6 +77,7 @@ #include "port/pg_crc32c.h" #include "ddes/dms/ss_common_attr.h" #include "ddes/dms/ss_txnstatus.h" +#include "access/extreme_rto/standby_read/standby_read_base.h" #define MAX_PATH_LEN 1024 extern const int g_reserve_param_num; @@ -1944,8 +1945,22 @@ typedef struct { volatile sig_atomic_t got_SIGHUP; volatile sig_atomic_t sleep_long; volatile sig_atomic_t check_repair; + void *redo_worker_ptr; } knl_t_page_redo_context; +typedef struct _StandbyReadLsnInfoArray { + XLogRecPtr *lsn_array; + uint32 lsn_num; + XLogRecPtr base_page_lsn; + BasePagePosition base_page_pos; +} StandbyReadLsnInfoArray; + +typedef struct { + volatile sig_atomic_t shutdown_requested; + volatile sig_atomic_t got_SIGHUP; + StandbyReadLsnInfoArray lsn_info; +} knl_t_exrto_recycle_context; + typedef struct knl_t_startup_context { /* * Flags set by interrupt handlers for later service in the redo loop. @@ -2564,8 +2579,10 @@ typedef struct knl_t_storage_context { struct HTAB* SharedBufHash; struct HTAB* BufFreeListHash; struct BufferDesc* InProgressBuf; + struct BufferDesc* ParentInProgressBuf; /* local state for StartBufferIO and related functions */ volatile bool IsForInput; + volatile bool ParentIsForInput; /* local state for LockBufferForCleanup */ struct BufferDesc* PinCountWaitBuf; /* local state for aio clean up resource */ @@ -3483,6 +3500,7 @@ typedef struct knl_thrd_context { knl_t_percentile_context percentile_cxt; knl_t_perf_snap_context perf_snap_cxt; knl_t_page_redo_context page_redo_cxt; + knl_t_exrto_recycle_context exrto_recycle_cxt; knl_t_parallel_decode_worker_context parallel_decode_cxt; knl_t_logical_read_worker_context logicalreadworker_cxt; knl_t_heartbeat_context heartbeat_cxt; diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index dadc6b8f9fb51d2ffd678e783b24eccfa51ac1c8..22c553a2ca3ce36bdc6c2be9cf62b7214d979095 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -223,6 +223,7 @@ typedef enum pg_enc { PG_WIN1257, /* windows-1257 */ PG_KOI8U, /* KOI8-U */ PG_GB18030, /* GB18030 */ + PG_GB18030_2022, /* GB18030-2022 */ /* PG_ENCODING_BE_LAST points to the above entry */ /* followings are for client encoding only */ @@ -235,7 +236,7 @@ typedef enum pg_enc { } pg_enc; -#define PG_ENCODING_BE_LAST PG_GB18030 +#define PG_ENCODING_BE_LAST PG_GB18030_2022 /* * Please use these tests before access to pg_encconv_tbl[] diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 056ebc32a25ef6a111bcdf0528e676807f6caf09..abe48258f369469f9176744fd0fc3e5c77eec225 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -135,6 +135,7 @@ extern const uint32 CREATE_INDEX_IF_NOT_EXISTS_VERSION_NUM; extern const uint32 SLOW_SQL_VERSION_NUM; extern const uint32 INDEX_HINT_VERSION_NUM; extern const uint32 CREATE_TABLE_AS_VERSION_NUM; +extern const uint32 GB18030_2022_VERSION_NUM; extern void register_backend_version(uint32 backend_version); extern bool contain_backend_version(uint32 version_number); @@ -565,6 +566,7 @@ typedef enum { XlogCopyBackendProcess, BarrierPreParseBackendProcess, DmsAuxiliaryProcess, + ExrtoRecyclerProcess, NUM_SINGLE_AUX_PROC, /* Sentry for auxiliary type with single thread. */ /* @@ -609,6 +611,7 @@ typedef enum { #define AmTsCompactionAuxiliaryProcess() (t_thrd.bootstrap_cxt.MyAuxProcType == TsCompactionAuxiliaryProcess) #define AmPageRedoWorker() (t_thrd.bootstrap_cxt.MyAuxProcType == PageRedoProcess) #define AmDmsReformProcProcess() (t_thrd.role == DMS_WORKER && t_thrd.dms_cxt.is_reform_proc) +#define AmErosRecyclerProcess() (t_thrd.bootstrap_cxt.MyAuxProcType == ExrtoRecyclerProcess) diff --git a/src/include/postgres.h b/src/include/postgres.h index 724c7895d923712f62cb577c5d78b33781ad1aa0..429da1baabc9b3155ceefde345e6b4604bed2c5b 100644 --- a/src/include/postgres.h +++ b/src/include/postgres.h @@ -1026,4 +1026,6 @@ extern void exec_describe_statement_message(const char* stmt_name); extern void exec_get_ddl_params(StringInfo input_message); #endif +#define STRUCT_CONTAINER(type, membername, ptr) ((type *)((char *)(ptr)-offsetof(type, membername))) + #endif /* POSTGRES_H */ diff --git a/src/include/postmaster/postmaster.h b/src/include/postmaster/postmaster.h index 82e7ad1d25b79a45d0a0705ec564bd955389e486..0902eae8d4dfcb856e047843d8ed768f512c5778 100755 --- a/src/include/postmaster/postmaster.h +++ b/src/include/postmaster/postmaster.h @@ -245,6 +245,9 @@ extern bool SetDBStateFileState(DbState state, bool optional); extern void GPCResetAll(); extern void initRandomState(TimestampTz start_time, TimestampTz stop_time); extern bool PMstateIsRun(void); +extern bool pm_state_is_startup(); +extern bool pm_state_is_recovery(); +extern bool pm_state_is_hot_standby(); extern ServerMode GetHaShmemMode(void); extern void InitProcessAndShareMemory(); extern void InitShmemForDcfCallBack(); diff --git a/src/include/replication/walreceiver.h b/src/include/replication/walreceiver.h index 26d889f5d82a2018eef516dcddaff926109266ac..ea1fb952f672161c7989fa6ad346f4107644867d 100755 --- a/src/include/replication/walreceiver.h +++ b/src/include/replication/walreceiver.h @@ -41,6 +41,9 @@ #define IS_PAUSE_BY_TARGET_BARRIER 0x00000001 #define IS_CANCEL_LOG_CTRL 0x00000010 +#define IS_DISASTER_RECOVER_MODE \ + (static_cast(g_instance.attr.attr_common.stream_cluster_run_mode) == RUN_MODE_STANDBY) + #ifdef ENABLE_MULTIPLE_NODES #define AM_HADR_CN_WAL_RECEIVER (t_thrd.postmaster_cxt.HaShmData->is_cross_region && \ t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE && IS_PGXC_COORDINATOR) diff --git a/src/include/storage/buf/buf_internals.h b/src/include/storage/buf/buf_internals.h index b5a2d24b6314b42b9ac82f8a340fe25704480f81..c233d89a18f9cbba0dd339c72bdbab84b5a0a2c2 100644 --- a/src/include/storage/buf/buf_internals.h +++ b/src/include/storage/buf/buf_internals.h @@ -57,6 +57,7 @@ */ #define BM_IN_MIGRATE (1U << 16) /* buffer is migrating */ #define BM_IS_META (1U << 17) +#define BM_IS_TMP_BUF (1U << 21) /* temp buf, can not write to disk */ #define BM_LOCKED (1U << 22) /* buffer header is locked */ #define BM_DIRTY (1U << 23) /* data needs writing */ #define BM_VALID (1U << 24) /* data is valid */ @@ -285,6 +286,23 @@ extern "C" { pg_atomic_write_u32(&(desc)->state, (s) & (~BM_LOCKED)); \ } while (0) +#define FIX_SEG_BUFFER_TAG(node, tag, rel_node, block_num) \ + do { \ + if (IsSegmentFileNode(node)) { \ + tag.rnode.relnode = rel_node; \ + tag.blocknum = block_num; \ + tag.rnode.bucketnode = SegmentBktId; \ + } \ + } while (0) + +#define FIX_BUFFER_DESC(buf, pblk) \ + do { \ + Assert(PhyBlockIsValid(*pblk)); \ + buf->seg_fileno = pblk->rel_node; \ + buf->seg_blockno = pblk->block; \ + buf->seg_lsn = pblk->lsn; \ + } while (0) + extern bool retryLockBufHdr(BufferDesc* desc, uint32* buf_state); /* * The PendingWriteback & WritebackContext structure are used to keep diff --git a/src/include/storage/buf/bufmgr.h b/src/include/storage/buf/bufmgr.h index 5581d23f6e9133308cc0047dc054c00db8266d11..61aa35e549168cf5771b11650577f8231c455c88 100644 --- a/src/include/storage/buf/bufmgr.h +++ b/src/include/storage/buf/bufmgr.h @@ -320,6 +320,7 @@ extern void DropRelFileNodeAllBuffersUsingScan(RelFileNode* rnode, int rnode_len extern void DropRelFileNodeOneForkAllBuffersUsingHash(HTAB *relfilenode_hashtbl); extern void DropDatabaseBuffers(Oid dbid); +extern void buffer_drop_exrto_standby_read_buffers(); extern BlockNumber PartitionGetNumberOfBlocksInFork(Relation relation, Partition partition, ForkNumber forkNum, bool estimate = false); @@ -423,4 +424,13 @@ extern void ReadBuffer_common_for_check(ReadBufferMode readmode, BufferDesc* buf const XLogPhyBlock *pblk, Block bufBlock); extern BufferDesc *RedoForOndemandExtremeRTOQuery(BufferDesc *bufHdr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode); +extern Buffer standby_read_buf(Relation reln, ForkNumber fork_num, BlockNumber block_num, ReadBufferMode mode, + BufferAccessStrategy strategy); +typedef struct SMgrRelationData *SMgrRelation; +BufferDesc *BufferAlloc(const RelFileNode &rel_file_node, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, + BufferAccessStrategy strategy, bool *foundPtr, const XLogPhyBlock *pblk); +Buffer ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, + ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit, const XLogPhyBlock *pblk); +void buffer_in_progress_pop(); +void buffer_in_progress_push(); #endif diff --git a/src/include/storage/buf/bufpage.h b/src/include/storage/buf/bufpage.h index cb0fbcc98b7aed0df32cc554e6394c1d129b6052..5384af391fbdf74451227c6696740ad5df3de709 100644 --- a/src/include/storage/buf/bufpage.h +++ b/src/include/storage/buf/bufpage.h @@ -206,6 +206,7 @@ typedef HeapPageHeaderData* HeapPageHeader; #define PD_ENCRYPT_PAGE 0x0020 /* is a encryt cluster */ #define PD_CHECKSUM_FNV1A 0x0040 /* page checksum using FNV-1a hash */ #define PD_JUST_AFTER_FPW 0x0080 /* page just after redo full page write */ +#define PD_EXRTO_PAGE 0x0400 /* is a rto file page */ #define PD_TDE_PAGE 0x0100 /* there is TdePageInfo at the end of a page */ #define PD_VALID_FLAG_BITS 0x01FF /* OR of all valid pd_flags bits */ diff --git a/src/include/storage/nvm/nvm.h b/src/include/storage/nvm/nvm.h index 5501081f052852e9fd4cb15198551a3412d608bb..bc5db0fe2f385c4d7bc866e2872b9b6ad9c6fa03 100644 --- a/src/include/storage/nvm/nvm.h +++ b/src/include/storage/nvm/nvm.h @@ -28,7 +28,7 @@ void nvm_init(void); -BufferDesc *NvmBufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber fork_num, +BufferDesc *NvmBufferAlloc(const RelFileNode& rel_file_node, char relpersistence, ForkNumber fork_num, BlockNumber block_num, BufferAccessStrategy strategy, bool *found, const XLogPhyBlock *pblk); #endif diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index bc122ff3f5efe361adc69eec163ff96cec438858..b1abe2d8bcd1236d1bc62a769d3b9053d42ef8ff 100755 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -272,6 +272,8 @@ struct PGPROC { uint64 snap_refcnt_bitmap; #endif + XLogRecPtr exrto_read_lsn; /* calculate recycle lsn for read on standby in extreme rto */ + TimestampTz exrto_gen_snap_time; LWLock* subxidsLock; struct XidCache subxids; /* cache for subtransaction XIDs */ diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h index 91f114628412fea74197376af90332b4e4605baa..290d88c504e94e0c5feb0755d0f5fd87553e2cb4 100755 --- a/src/include/storage/procarray.h +++ b/src/include/storage/procarray.h @@ -115,6 +115,8 @@ extern VirtualTransactionId *GetConflictingVirtualXIDs(TransactionId limitXmin, CommitSeqNo limitXminCSN = InvalidCommitSeqNo, TransactionId* xminArray = NULL); extern ThreadId CancelVirtualTransaction(const VirtualTransactionId& vxid, ProcSignalReason sigmode); +extern bool proc_array_cancel_conflicting_proc(TransactionId latest_removed_xid, + bool reach_max_check_times); extern bool MinimumActiveBackends(int min); extern int CountDBBackends(Oid database_oid); diff --git a/src/include/storage/smgr/relfilenode.h b/src/include/storage/smgr/relfilenode.h index 89a3725c788b2cdcbc79722c763a249b71d8856c..c06955bd04ca3a5fe91e73de4675802f4a206452 100644 --- a/src/include/storage/smgr/relfilenode.h +++ b/src/include/storage/smgr/relfilenode.h @@ -116,7 +116,7 @@ typedef struct RelFileNodeV2 { } RelFileNodeV2; -#define IsSegmentFileNode(rnode) ((rnode).bucketNode > InvalidBktId) +#define IsSegmentFileNode(rnode) ((rnode).bucketNode > InvalidBktId && (rnode).spcNode != EXRTO_BLOCK_INFO_SPACE_OID) #define IsHeapFileNode(rnode) (!IsSegmentFileNode(rnode)) #define IsSegmentPhysicalRelNode(rNode) (IsSegmentFileNode(rNode) && (rNode).relNode <= 5) diff --git a/src/include/storage/smgr/smgr.h b/src/include/storage/smgr/smgr.h index 9a68cb4be44c4099005d7d5819e1d6bec44b38db..9acbb1643392fae636d70d249ba5897c09d7827d 100644 --- a/src/include/storage/smgr/smgr.h +++ b/src/include/storage/smgr/smgr.h @@ -125,12 +125,20 @@ enum SMGR_READ_STATUS { #define UNDO_DB_OID (9) #define UNDO_SLOT_DB_OID (10) +#define EXRTO_BASE_PAGE_SPACE_OID (6) +#define EXRTO_LSN_INFO_SPACE_OID (7) +#define EXRTO_BLOCK_INFO_SPACE_OID (8) +#define EXRTO_FORK_NUM 3 + #define MD_MANAGER (0) #define UNDO_MANAGER (1) #define SEGMENT_MANAGER (2) +#define EXRTO_MANAGER (3) #define IS_UNDO_RELFILENODE(rnode) ((rnode).dbNode == UNDO_DB_OID || (rnode).dbNode == UNDO_SLOT_DB_OID) - +#define IS_EXRTO_RELFILENODE(rnode) ((rnode).spcNode == EXRTO_BASE_PAGE_SPACE_OID || \ + (rnode).spcNode == EXRTO_LSN_INFO_SPACE_OID || \ + (rnode).spcNode == EXRTO_BLOCK_INFO_SPACE_OID) /* * On Windows, we have to interpret EACCES as possibly meaning the same as * ENOENT, because if a file is unlinked-but-not-yet-gone on that platform, @@ -250,4 +258,16 @@ extern void partition_create_new_storage(Relation rel, Partition part, const Rel extern ScalarToDatum GetTransferFuncByTypeOid(Oid attTypeOid); extern bool check_unlink_rel_hashtbl(RelFileNode rnode, ForkNumber forknum); +/* storage_exrto_file.cpp */ +void exrto_init(void); +void exrto_close(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum); +bool exrto_exists(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum); +void exrto_unlink(const RelFileNodeBackend& rnode, ForkNumber forknum, bool is_redo, BlockNumber blocknum); +void exrto_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skip_fsync); +SMGR_READ_STATUS exrto_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer); +void exrto_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const char *buffer, bool skip_fsync); +BlockNumber exrto_nblocks(SMgrRelation reln, ForkNumber forknum); +void exrto_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks); +void exrto_writeback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks); + #endif /* SMGR_H */ diff --git a/src/include/utils/snapshot.h b/src/include/utils/snapshot.h index de5b0d85a0be78daf745e2938c6b30729189b67e..1c38951e6bed28b87833f51b697eed5bc80b1ab5 100644 --- a/src/include/utils/snapshot.h +++ b/src/include/utils/snapshot.h @@ -265,6 +265,32 @@ typedef struct SnapshotData { GTM_SnapshotType gtm_snapshot_type; } SnapshotData; +typedef struct ExrtoSnapshotData* ExrtoSnapshot; + +typedef struct ExrtoSnapshotData { + /* + * The remaining fields are used only for MVCC snapshots, and are normally + * just zeroes in special snapshots. (But xmin and xmax are used + * specially by HeapTupleSatisfiesDirty.) + * + * An MVCC snapshot can never see the effects of XIDs >= xmax. It can see + * the effects of all older XIDs except those listed in the snapshot. xmin + * is stored as an optimization to avoid needing to search the XID arrays + * for most tuples. + */ + TransactionId xmin; /* all XID < xmin are visible to me */ + TransactionId xmax; /* all XID >= xmax are invisible to me */ + + /* + * This snapshot can see the effects of all transactions with CSN <= + * snapshotcsn. + */ + CommitSeqNo snapshot_csn; + + XLogRecPtr read_lsn; /* xact lsn when generate snapshot */ + TimestampTz gen_snap_time; +} ExrtoSnapshotData; + /* * Result codes for AM API tuple_{update,delete,lock}, and for visibility. */ diff --git a/src/test/regress/input/ts_gb18030_utf8.source b/src/test/regress/input/ts_gb18030_utf8.source new file mode 100644 index 0000000000000000000000000000000000000000..d24e74de0f7c82d0a0944ee594ed0be92a0de15b --- /dev/null +++ b/src/test/regress/input/ts_gb18030_utf8.source @@ -0,0 +1,414 @@ +create database gb18030_2022 encoding='gb18030-2022' LC_COLLATE='zh_CN.GB18030' LC_CTYPE ='zh_CN.GB18030' TEMPLATE=template0; +\c gb18030_2022 + +show server_encoding; +set client_encoding = 'UTF8'; +show client_encoding; +--1. 编码映射关系发生改变的字符: +-- 查看未发生变化的字符串: +select convert_to('中国', 'GB18030-2022'); + +--插入了涉及GB18030-2000升级GB18030-2022后GB18030与UTF-8转换关系变更的19个字符。 +create table tb_test(id int, content text); + +insert into tb_test (id , content) +select 1, convert_from('\xA8BC', 'GB18030-2022'); + +insert into tb_test (id , content) +select 2, convert_from('\xA6D9', 'GB18030-2022'); + +insert into tb_test (id , content) +select 3, convert_from('\xA6DA', 'GB18030-2022'); + +insert into tb_test (id , content) +select 4, convert_from('\xA6DB', 'GB18030-2022'); + +insert into tb_test (id , content) +select 5, convert_from('\xA6DC', 'GB18030-2022'); + +insert into tb_test (id , content) +select 6, convert_from('\xA6DD', 'GB18030-2022'); + +insert into tb_test (id , content) +select 7, convert_from('\xA6DE', 'GB18030-2022'); + +insert into tb_test (id , content) +select 8, convert_from('\xA6DF', 'GB18030-2022'); + +insert into tb_test (id , content) +select 9, convert_from('\xA6EC', 'GB18030-2022'); + +insert into tb_test (id , content) +select 10, convert_from('\xA6ED', 'GB18030-2022'); + +insert into tb_test (id , content) +select 11, convert_from('\xA6F3', 'GB18030-2022'); + +insert into tb_test (id , content) +select 12, convert_from('\xFE59', 'GB18030-2022'); + +insert into tb_test (id , content) +select 13, convert_from('\xFE61', 'GB18030-2022'); + +insert into tb_test (id , content) +select 14, convert_from('\xFE66', 'GB18030-2022'); + +insert into tb_test (id , content) +select 15, convert_from('\xFE67', 'GB18030-2022'); + +insert into tb_test (id , content) +select 16, convert_from('\xFE6D', 'GB18030-2022'); + +insert into tb_test (id , content) +select 17, convert_from('\xFE7E', 'GB18030-2022'); + +insert into tb_test (id , content) +select 18, convert_from('\xFE90', 'GB18030-2022'); + +insert into tb_test (id , content) +select 19, convert_from('\xFEA0', 'GB18030-2022'); + +insert into tb_test (id , content) select 20, convert_from('\x8135F437', 'GB18030-2022'); +insert into tb_test (id , content) select 21, convert_from('\x84318236', 'GB18030-2022'); +insert into tb_test (id , content) select 22, convert_from('\x84318238', 'GB18030-2022'); +insert into tb_test (id , content) select 23, convert_from('\x84318237', 'GB18030-2022'); +insert into tb_test (id , content) select 24, convert_from('\x84318239', 'GB18030-2022'); +insert into tb_test (id , content) select 25, convert_from('\x84318330', 'GB18030-2022'); +insert into tb_test (id , content) select 26, convert_from('\x84318331', 'GB18030-2022'); +insert into tb_test (id , content) select 27, convert_from('\x84318332', 'GB18030-2022'); +insert into tb_test (id , content) select 28, convert_from('\x84318333', 'GB18030-2022'); +insert into tb_test (id , content) select 29, convert_from('\x84318334', 'GB18030-2022'); +insert into tb_test (id , content) select 30, convert_from('\x84318335', 'GB18030-2022'); +insert into tb_test (id , content) select 31, convert_from('\x82359037', 'GB18030-2022'); +insert into tb_test (id , content) select 32, convert_from('\x82359038', 'GB18030-2022'); +insert into tb_test (id , content) select 33, convert_from('\x82359039', 'GB18030-2022'); +insert into tb_test (id , content) select 34, convert_from('\x82359130', 'GB18030-2022'); +insert into tb_test (id , content) select 35, convert_from('\x82359131', 'GB18030-2022'); +insert into tb_test (id , content) select 36, convert_from('\x82359132', 'GB18030-2022'); +insert into tb_test (id , content) select 37, convert_from('\x82359133', 'GB18030-2022'); +insert into tb_test (id , content) select 38, convert_from('\x82359134', 'GB18030-2022'); + +--显示这19个字符 +select * from tb_test order by id; +--查看GB18030-2022编码 +select convert_to(content, 'GB18030-2022') from tb_test order by id; +--查看GB18030-2000编码 +select convert_to(content, 'GB18030') from tb_test order by id; +--转换为UTF-8编码,UTF-8是Unicode的计算机编码形式,想显示为Unicode编码还需进一步转换,数据库中不支持此功能。 +select convert_to(content, 'utf8') from tb_test order by id; + + +--2. 新增字符举例 +--CBJ统一汉字扩充B +select convert_from('\x95328236', 'GB18030-2022'); +select convert_from('\x9835F336', 'GB18030-2022'); +--CJK统一汉字 +select convert_from('\x82358F33', 'GB18030-2022'); +select convert_from('\x82359636', 'GB18030-2022'); +--CJK统一汉子扩充C +select convert_from('\x9835F738', 'GB18030-2022'); +select convert_from('\x98399E36', 'GB18030-2022'); +--CJK统一汉子扩充D +select convert_from('\x98399F38', 'GB18030-2022'); +select convert_from('\x9839B539', 'GB18030-2022'); +--CJK统一汉子扩充E +select convert_from('\x9839B632', 'GB18030-2022'); +select convert_from('\x9933FE33', 'GB18030-2022'); +--CJK统一汉子扩充F +select convert_from('\x99348138', 'GB18030-2022'); +select convert_from('\x9939F730', 'GB18030-2022'); +--康熙部首 +select convert_from('\x81398B32', 'GB18030-2022'); +select convert_from('\x8139A035', 'GB18030-2022'); +--西双版纳新傣文字符 +select convert_from('\x8134F932', 'GB18030-2022'); +select convert_from('\x81358437', 'GB18030-2022'); +--西双版纳老傣文字符 +select convert_from('\x81358B32', 'GB18030-2022'); +select convert_from('\x81359933', 'GB18030-2022'); +--傈僳文字符 +select convert_from('\x82369535', 'GB18030-2022'); +select convert_from('\x82369A32', 'GB18030-2022'); +--蒙古文BIRGA符号 +select convert_from('\x9034C538', 'GB18030-2022'); +select convert_from('\x9034C730', 'GB18030-2022'); +--滇东北苗文字符 +select convert_from('\x9232C636', 'GB18030-2022'); +select convert_from('\x9232D625', 'GB18030-2022'); + +--插入了涉及2000升级到2022新增的字符举例 +create table in_test(id int, content text); + +--CBJ统一汉字扩充B +insert into in_test (id , content) +select 1,convert_from('\x95328236', 'GB18030-2022'); + +insert into in_test (id , content) +select 2,convert_from('\x9835F336', 'GB18030-2022'); + +--CJK统一汉字 +insert into in_test (id , content) +select 3,convert_from('\x82358F33', 'GB18030-2022'); + +insert into in_test (id , content) +select 4,convert_from('\x82359636', 'GB18030-2022'); + +--CJK统一汉子扩充C +insert into in_test (id , content) +select 5,convert_from('\x9835F738', 'GB18030-2022'); + +insert into in_test (id , content) +select 6,convert_from('\x98399E36', 'GB18030-2022'); + +--CJK统一汉子扩充D +insert into in_test (id , content) +select 7,convert_from('\x98399F38', 'GB18030-2022'); + +insert into in_test (id , content) +select 8,convert_from('\x9839B539', 'GB18030-2022'); + +--CJK统一汉子扩充E +insert into in_test (id , content) +select 9,convert_from('\x9839B632', 'GB18030-2022'); + +insert into in_test (id , content) +select 10,convert_from('\x9933FE33', 'GB18030-2022'); + +--CJK统一汉子扩充F +insert into in_test (id , content) +select 11,convert_from('\x99348138', 'GB18030-2022'); + +insert into in_test (id , content) +select 12,convert_from('\x9939F730', 'GB18030-2022'); + +--康熙部首 +insert into in_test (id , content) +select 13,convert_from('\x81398B32', 'GB18030-2022'); + +insert into in_test (id , content) +select 14,convert_from('\x8139A035', 'GB18030-2022'); + +--西双版纳新傣文字符 +insert into in_test (id , content) +select 15,convert_from('\x8134F932', 'GB18030-2022'); + +insert into in_test (id , content) +select 16,convert_from('\x81358437', 'GB18030-2022'); + +--西双版纳老傣文字符 +insert into in_test (id , content) +select 17,convert_from('\x81358B32', 'GB18030-2022'); + +insert into in_test (id , content) +select 18,convert_from('\x81359933', 'GB18030-2022'); + +--傈僳文字符 +insert into in_test (id , content) +select 19,convert_from('\x82369535', 'GB18030-2022'); + +insert into in_test (id , content) +select 20,convert_from('\x82369A32', 'GB18030-2022'); + +--蒙古文BIRGA符号 +insert into in_test (id , content) +select 21,convert_from('\x9034C538', 'GB18030-2022'); + +insert into in_test (id , content) +select 22,convert_from('\x9034C730', 'GB18030-2022'); + +--滇东北苗文字符 +insert into in_test (id , content) +select 23,convert_from('\x9232C636', 'GB18030-2022'); + +insert into in_test (id , content) +select 24,convert_from('\x9232D625', 'GB18030-2022'); + +--显示这24个字符 +select * from in_test order by id; +--查看GB18030-2022编码 +select convert_to(content, 'GB18030-2022') from in_test order by id; +--转换为UTF-8编码,UTF-8是Unicode的计算机编码形式,想显示为Unicode编码还需进一步转换,数据库中不支持此功能。 +select convert_to(content, 'utf8') from in_test order by id; + +drop table in_test; +drop table tb_test; + +--3. 正常的增删改查语句测试 +create table 表1(id int, 人名 name); + +insert into 表1(id, 人名) values(1, '小明!'); +select * from 表1; + +alter table 表1 drop 人名; +select * from 表1; + +alter table 表1 add 学校 text; +insert into 表1(id , 学校) select 2, convert_to('@华为大学¥', 'GB18030-2022'); +select * from 表1; + +drop table 表1; + +--4. 不存在映射关系时 +select convert('\xFD308130', 'GB18030-2022', 'UTF8'); +select convert('\xFE39FE39', 'GB18030-2022', 'UTF8'); + +--5. 测试gb18030_2022数据库中的字符串相关 +-- E021-03 character string literals +SELECT 'first line' +' - next line' + ' - third line' + AS "Three lines to one"; + +-- illegal string continuation syntax +SELECT 'first line' +' - next line' /* this comment is not allowed here */ +' - third line' + AS "Illegal comment within continuation"; + +-- Unicode escapes +SET standard_conforming_strings TO on; + +SELECT U&'d\0061t\+000061' AS U&"d\0061t\+000061"; +SELECT U&'d!0061t\+000061' UESCAPE '!' AS U&"d*0061t\+000061" UESCAPE '*'; + +-- bytea +SET bytea_output TO hex; +SELECT E'\\xDeAdBeEf'::bytea; +SELECT E'\\x De Ad Be Ef '::bytea; +SELECT E'\\xDeAdBeE'::bytea; +SELECT E'\\xDeAdBeEx'::bytea; +SELECT E'\\xDe00BeEf'::bytea; +SELECT E'DeAdBeEf'::bytea; +SELECT E'De\\000dBeEf'::bytea; +SELECT E'De\123dBeEf'::bytea; +SELECT E'De\\123dBeEf'::bytea; +SELECT E'De\\678dBeEf'::bytea; + +SET bytea_output TO escape; +SELECT E'\\xDeAdBeEf'::bytea; +SELECT E'\\x De Ad Be Ef '::bytea; +SELECT E'\\xDe00BeEf'::bytea; +SELECT E'DeAdBeEf'::bytea; +SELECT E'De\\000dBeEf'::bytea; +SELECT E'De\\123dBeEf'::bytea; + +SET bytea_output TO hex; + +SELECT CAST(name 'namefield' AS text) AS "text(name)"; + +-- E021-09 trim function +SELECT TRIM(BOTH FROM ' bunch o blanks ') = 'bunch o blanks' AS "bunch o blanks"; + +-- E021-06 substring expression +SELECT SUBSTRING('1234567890' FROM 3) = '34567890' AS "34567890"; + +-- PostgreSQL extension to allow using back reference in replace string; +SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3'); + +-- set so we can tell NULL from empty string +\pset null '\\N' + +-- return all matches from regexp +SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$); + +-- split string on regexp +SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', $re$\s+$re$) AS foo; +SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', $re$\s+$re$); + +-- change NULL-display back +\pset null '' + +-- E021-11 position expression +SELECT POSITION('4' IN '1234567890') = '4' AS "4"; + +SELECT POSITION('5' IN '1234567890') = '5' AS "5"; + +-- T312 character overlay function +SELECT OVERLAY('abcdef' PLACING '45' FROM 4) AS "abc45f"; + +-- E061-04 like predicate +SELECT 'hawkeye' LIKE 'h%' AS "true"; +SELECT 'hawkeye' NOT LIKE 'h%' AS "false"; + +-- unused escape character +SELECT 'hawkeye' LIKE 'h%' ESCAPE '#' AS "true"; +SELECT 'hawkeye' NOT LIKE 'h%' ESCAPE '#' AS "false"; + +-- +-- test ILIKE (case-insensitive LIKE) +-- Be sure to form every test as an ILIKE/NOT ILIKE pair. +-- + +SELECT 'hawkeye' ILIKE 'h%' AS "true"; +SELECT 'hawkeye' NOT ILIKE 'h%' AS "false"; + +--6. 使用字符串相关函数 +--重复字符串 +select repeat('中国', 3); + +--返回字符串的前n个字符 +select left('中国!number1', 7); + +--返回长度 +select length('中国!number1'); + +--反转字符串 +select reverse('中国!number1'); + +--md5算法加密 +select md5('中国!number1'); + +-- test strpos +SELECT strpos('abcdef', 'cd') AS "pos_3"; +SELECT strpos('abcdef', 'xy') AS "pos_0"; + +SELECT replace('yabadabadoo', 'ba', '123') AS "ya123da123doo"; + +select split_part('joeuser@mydatabase','@',3) AS "empty string"; + +select to_hex(256::bigint*256::bigint*256::bigint*256::bigint - 1) AS "ffffffff"; + +--返回字符串中第一个字符的十进制表示形式 +select ascii('xyz'); +select ascii('中xyz'); +select ascii('ḿxyz'); + +-- 7. 检查GB18030-2022与GB18030的关系 +select convert('中国', 'GB18030', 'GB18030-2022'); + +select convert('中国', 'GB18030-2022', 'GB18030'); + +select convert('\xA8BC', 'GB18030-2022', 'UTF8'); + +select convert('\xA8BC', 'GB18030', 'UTF8'); + +\c regression +clean connection to all force for database gb18030_2022; +drop database gb18030_2022; + +-- 8. 在UTF8环境下检测GB18030-2022与UTF8的转换 +select convert('中国&华为*GaussDB', 'UTF8', 'GB18030-2022'); + +select convert('ḿ', 'UTF8', 'GB18030-2022'); + +-- 9. 测试create database时encoding与本地设置不匹配 +create database gb18030_2022 encoding='gb18030-2022' LC_COLLATE='en_US.utf-8' LC_CTYPE ='en_US.utf-8' TEMPLATE=template0; + +--10. 测试initdb +\! rm -f @abs_bindir@/test_initdb.log +\! mkdir -p @testtablespace@/test2 +\! mkdir -p @testtablespace@/test2/pg_location +\! @abs_bindir@/gs_initdb -S -D @testtablespace@/test2 --nodename coorn2 -U test_initdb -w test@123 --locale=zh_CN.gb18030 -E GB18030_2022 >> @abs_bindir@/test_initdb2.log 2>&1 +\! cat @abs_bindir@/test_initdb2.log | grep ok +\! rm -f @abs_bindir@/test_initdb2.log +\! rm -rf @testtablespace@/test2 + +--11. 测试升级回滚 +select oid, * from pg_conversion where conname like '%gb18030%' order by conname; + +select oid, * from pg_proc where proname like '%gb18030%' order by proname; + +--12. 看护client_encoding不能设置为GB18030_2022 +set client_encoding = GB18030_2022; + +ALTER SESSION SET NAMES 'GB18030_2022'; \ No newline at end of file diff --git a/src/test/regress/output/recovery_2pc_tools.source b/src/test/regress/output/recovery_2pc_tools.source index 6b0ac7591b0481f91dd21e1633c5863fbdb0a702..2fd58569c7f50efda606ffaa9ee9b5351da45919 100644 --- a/src/test/regress/output/recovery_2pc_tools.source +++ b/src/test/regress/output/recovery_2pc_tools.source @@ -78,6 +78,7 @@ select name,vartype,unit,min_val,max_val from pg_settings where name <> 'qunit_c backtrace_min_messages | enum | | | backwrite_quantity | integer | 8kB | 128 | 131072 basebackup_timeout | integer | s | 0 | 2147483647 + base_page_saved_interval | integer | | 5 | 2000 bbox_blanklist_items | string | | | bbox_dump_count | integer | | 1 | 20 bbox_dump_path | string | | | @@ -487,6 +488,8 @@ select name,vartype,unit,min_val,max_val from pg_settings where name <> 'qunit_c max_size_for_xlog_prune | integer | kB | 0 | 2147483647 max_stack_depth | integer | kB | 100 | 2147483647 max_standby_archive_delay | integer | ms | -1 | 2147483647 + max_standby_base_page_size | int64 | | 0 | 576460752303423487 + max_standby_lsn_info_size | int64 | | 0 | 576460752303423487 max_standby_streaming_delay | integer | ms | -1 | 2147483647 max_sync_workers_per_subscription | integer | | 0 | 262143 max_undo_workers | integer | | 1 | 100 @@ -660,6 +663,9 @@ select name,vartype,unit,min_val,max_val from pg_settings where name <> 'qunit_c ss_txnstatus_cache_size | integer | | 0 | 524288 ss_work_thread_count | integer | | 16 | 128 standard_conforming_strings | bool | | | + standby_force_recyle_ratio | real | | 0 | 1 + standby_max_query_time | integer | s | 0 | 86400 + standby_recycle_interval | integer | s | 0 | 86400 standby_shared_buffers_fraction | real | | 0.1 | 1 statement_timeout | integer | ms | 0 | 2147483647 stats_temp_directory | string | | | diff --git a/src/test/regress/output/ts_gb18030_utf8.source b/src/test/regress/output/ts_gb18030_utf8.source new file mode 100644 index 0000000000000000000000000000000000000000..d712a77da683068dd0837768576fd9e96f2b9063 --- /dev/null +++ b/src/test/regress/output/ts_gb18030_utf8.source @@ -0,0 +1,1142 @@ +create database gb18030_2022 encoding='gb18030-2022' LC_COLLATE='zh_CN.GB18030' LC_CTYPE ='zh_CN.GB18030' TEMPLATE=template0; +\c gb18030_2022 + +show server_encoding; + server_encoding +----------------- + GB18030_2022 +(1 row) + +set client_encoding = 'UTF8'; +show client_encoding; + client_encoding +----------------- + UTF8 +(1 row) + +--1. 编码映射关系发生改变的字符: +-- 查看未发生变化的字符串: +select convert_to('中国', 'GB18030-2022'); + convert_to +------------ + \xd6d0b9fa +(1 row) + + +--插入了涉及GB18030-2000升级GB18030-2022后GB18030与UTF-8转换关系变更的19个字符。 +create table tb_test(id int, content text); + +insert into tb_test (id , content) +select 1, convert_from('\xA8BC', 'GB18030-2022'); + +insert into tb_test (id , content) +select 2, convert_from('\xA6D9', 'GB18030-2022'); + +insert into tb_test (id , content) +select 3, convert_from('\xA6DA', 'GB18030-2022'); + +insert into tb_test (id , content) +select 4, convert_from('\xA6DB', 'GB18030-2022'); + +insert into tb_test (id , content) +select 5, convert_from('\xA6DC', 'GB18030-2022'); + +insert into tb_test (id , content) +select 6, convert_from('\xA6DD', 'GB18030-2022'); + +insert into tb_test (id , content) +select 7, convert_from('\xA6DE', 'GB18030-2022'); + +insert into tb_test (id , content) +select 8, convert_from('\xA6DF', 'GB18030-2022'); + +insert into tb_test (id , content) +select 9, convert_from('\xA6EC', 'GB18030-2022'); + +insert into tb_test (id , content) +select 10, convert_from('\xA6ED', 'GB18030-2022'); + +insert into tb_test (id , content) +select 11, convert_from('\xA6F3', 'GB18030-2022'); + +insert into tb_test (id , content) +select 12, convert_from('\xFE59', 'GB18030-2022'); + +insert into tb_test (id , content) +select 13, convert_from('\xFE61', 'GB18030-2022'); + +insert into tb_test (id , content) +select 14, convert_from('\xFE66', 'GB18030-2022'); + +insert into tb_test (id , content) +select 15, convert_from('\xFE67', 'GB18030-2022'); + +insert into tb_test (id , content) +select 16, convert_from('\xFE6D', 'GB18030-2022'); + +insert into tb_test (id , content) +select 17, convert_from('\xFE7E', 'GB18030-2022'); + +insert into tb_test (id , content) +select 18, convert_from('\xFE90', 'GB18030-2022'); + +insert into tb_test (id , content) +select 19, convert_from('\xFEA0', 'GB18030-2022'); + +insert into tb_test (id , content) select 20, convert_from('\x8135F437', 'GB18030-2022'); +insert into tb_test (id , content) select 21, convert_from('\x84318236', 'GB18030-2022'); +insert into tb_test (id , content) select 22, convert_from('\x84318238', 'GB18030-2022'); +insert into tb_test (id , content) select 23, convert_from('\x84318237', 'GB18030-2022'); +insert into tb_test (id , content) select 24, convert_from('\x84318239', 'GB18030-2022'); +insert into tb_test (id , content) select 25, convert_from('\x84318330', 'GB18030-2022'); +insert into tb_test (id , content) select 26, convert_from('\x84318331', 'GB18030-2022'); +insert into tb_test (id , content) select 27, convert_from('\x84318332', 'GB18030-2022'); +insert into tb_test (id , content) select 28, convert_from('\x84318333', 'GB18030-2022'); +insert into tb_test (id , content) select 29, convert_from('\x84318334', 'GB18030-2022'); +insert into tb_test (id , content) select 30, convert_from('\x84318335', 'GB18030-2022'); +insert into tb_test (id , content) select 31, convert_from('\x82359037', 'GB18030-2022'); +insert into tb_test (id , content) select 32, convert_from('\x82359038', 'GB18030-2022'); +insert into tb_test (id , content) select 33, convert_from('\x82359039', 'GB18030-2022'); +insert into tb_test (id , content) select 34, convert_from('\x82359130', 'GB18030-2022'); +insert into tb_test (id , content) select 35, convert_from('\x82359131', 'GB18030-2022'); +insert into tb_test (id , content) select 36, convert_from('\x82359132', 'GB18030-2022'); +insert into tb_test (id , content) select 37, convert_from('\x82359133', 'GB18030-2022'); +insert into tb_test (id , content) select 38, convert_from('\x82359134', 'GB18030-2022'); + +--显示这19个字符 +select * from tb_test order by id; + id | content +----+--------- + 1 | ḿ + 2 | ︐ + 3 | ︒ + 4 | ︑ + 5 | ︓ + 6 | ︔ + 7 | ︕ + 8 | ︖ + 9 | ︗ + 10 | ︘ + 11 | ︙ + 12 | 龴 + 13 | 龵 + 14 | 龶 + 15 | 龷 + 16 | 龸 + 17 | 龹 + 18 | 龺 + 19 | 龻 + 20 |  + 21 |  + 22 |  + 23 |  + 24 |  + 25 |  + 26 |  + 27 |  + 28 |  + 29 |  + 30 |  + 31 |  + 32 |  + 33 |  + 34 |  + 35 |  + 36 |  + 37 |  + 38 |  +(38 rows) + +--查看GB18030-2022编码 +select convert_to(content, 'GB18030-2022') from tb_test order by id; + convert_to +------------ + \xa8bc + \xa6d9 + \xa6da + \xa6db + \xa6dc + \xa6dd + \xa6de + \xa6df + \xa6ec + \xa6ed + \xa6f3 + \xfe59 + \xfe61 + \xfe66 + \xfe67 + \xfe6d + \xfe7e + \xfe90 + \xfea0 + \x8135f437 + \x84318236 + \x84318238 + \x84318237 + \x84318239 + \x84318330 + \x84318331 + \x84318332 + \x84318333 + \x84318334 + \x84318335 + \x82359037 + \x82359038 + \x82359039 + \x82359130 + \x82359131 + \x82359132 + \x82359133 + \x82359134 +(38 rows) + +--查看GB18030-2000编码 +select convert_to(content, 'GB18030') from tb_test order by id; + convert_to +------------ + \xa8bc + \xa6d9 + \xa6da + \xa6db + \xa6dc + \xa6dd + \xa6de + \xa6df + \xa6ec + \xa6ed + \xa6f3 + \xfe59 + \xfe61 + \xfe66 + \xfe67 + \xfe6d + \xfe7e + \xfe90 + \xfea0 + \x8135f437 + \x84318236 + \x84318238 + \x84318237 + \x84318239 + \x84318330 + \x84318331 + \x84318332 + \x84318333 + \x84318334 + \x84318335 + \x82359037 + \x82359038 + \x82359039 + \x82359130 + \x82359131 + \x82359132 + \x82359133 + \x82359134 +(38 rows) + +--转换为UTF-8编码,UTF-8是Unicode的计算机编码形式,想显示为Unicode编码还需进一步转换,数据库中不支持此功能。 +select convert_to(content, 'utf8') from tb_test order by id; + convert_to +------------ + \xe1b8bf + \xefb890 + \xefb892 + \xefb891 + \xefb893 + \xefb894 + \xefb895 + \xefb896 + \xefb897 + \xefb898 + \xefb899 + \xe9beb4 + \xe9beb5 + \xe9beb6 + \xe9beb7 + \xe9beb8 + \xe9beb9 + \xe9beba + \xe9bebb + \xee9f87 + \xee9e8d + \xee9e8e + \xee9e8f + \xee9e90 + \xee9e91 + \xee9e92 + \xee9e93 + \xee9e94 + \xee9e95 + \xee9e96 + \xeea09e + \xeea0a6 + \xeea0ab + \xeea0ac + \xeea0b2 + \xeea183 + \xeea194 + \xeea1a4 +(38 rows) + + + +--2. 新增字符举例 +--CBJ统一汉字扩充B +select convert_from('\x95328236', 'GB18030-2022'); + convert_from +-------------- + 𠀀 +(1 row) + +select convert_from('\x9835F336', 'GB18030-2022'); + convert_from +-------------- + 𪛖 +(1 row) + +--CJK统一汉字 +select convert_from('\x82358F33', 'GB18030-2022'); + convert_from +-------------- + 龦 +(1 row) + +select convert_from('\x82359636', 'GB18030-2022'); + convert_from +-------------- + 鿯 +(1 row) + +--CJK统一汉子扩充C +select convert_from('\x9835F738', 'GB18030-2022'); + convert_from +-------------- + 𪜀 +(1 row) + +select convert_from('\x98399E36', 'GB18030-2022'); + convert_from +-------------- + 𫜴 +(1 row) + +--CJK统一汉子扩充D +select convert_from('\x98399F38', 'GB18030-2022'); + convert_from +-------------- + 𫝀 +(1 row) + +select convert_from('\x9839B539', 'GB18030-2022'); + convert_from +-------------- + 𫠝 +(1 row) + +--CJK统一汉子扩充E +select convert_from('\x9839B632', 'GB18030-2022'); + convert_from +-------------- + 𫠠 +(1 row) + +select convert_from('\x9933FE33', 'GB18030-2022'); + convert_from +-------------- + 𬺡 +(1 row) + +--CJK统一汉子扩充F +select convert_from('\x99348138', 'GB18030-2022'); + convert_from +-------------- + 𬺰 +(1 row) + +select convert_from('\x9939F730', 'GB18030-2022'); + convert_from +-------------- + 𮯠 +(1 row) + +--康熙部首 +select convert_from('\x81398B32', 'GB18030-2022'); + convert_from +-------------- + ⼀ +(1 row) + +select convert_from('\x8139A035', 'GB18030-2022'); + convert_from +-------------- + ⿕ +(1 row) + +--西双版纳新傣文字符 +select convert_from('\x8134F932', 'GB18030-2022'); + convert_from +-------------- + ᦀ +(1 row) + +select convert_from('\x81358437', 'GB18030-2022'); + convert_from +-------------- + ᧟ +(1 row) + +--西双版纳老傣文字符 +select convert_from('\x81358B32', 'GB18030-2022'); + convert_from +-------------- + ᨠ +(1 row) + +select convert_from('\x81359933', 'GB18030-2022'); + convert_from +-------------- + ᪭ +(1 row) + +--傈僳文字符 +select convert_from('\x82369535', 'GB18030-2022'); + convert_from +-------------- + ꓐ +(1 row) + +select convert_from('\x82369A32', 'GB18030-2022'); + convert_from +-------------- + ꓿ +(1 row) + +--蒙古文BIRGA符号 +select convert_from('\x9034C538', 'GB18030-2022'); + convert_from +-------------- + 𑙠 +(1 row) + +select convert_from('\x9034C730', 'GB18030-2022'); + convert_from +-------------- + 𑙬 +(1 row) + +--滇东北苗文字符 +select convert_from('\x9232C636', 'GB18030-2022'); + convert_from +-------------- + 𖼀 +(1 row) + +select convert_from('\x9232D625', 'GB18030-2022'); + convert_from +-------------- + 𖾏 +(1 row) + + +--插入了涉及2000升级到2022新增的字符举例 +create table in_test(id int, content text); + +--CBJ统一汉字扩充B +insert into in_test (id , content) +select 1,convert_from('\x95328236', 'GB18030-2022'); + +insert into in_test (id , content) +select 2,convert_from('\x9835F336', 'GB18030-2022'); + +--CJK统一汉字 +insert into in_test (id , content) +select 3,convert_from('\x82358F33', 'GB18030-2022'); + +insert into in_test (id , content) +select 4,convert_from('\x82359636', 'GB18030-2022'); + +--CJK统一汉子扩充C +insert into in_test (id , content) +select 5,convert_from('\x9835F738', 'GB18030-2022'); + +insert into in_test (id , content) +select 6,convert_from('\x98399E36', 'GB18030-2022'); + +--CJK统一汉子扩充D +insert into in_test (id , content) +select 7,convert_from('\x98399F38', 'GB18030-2022'); + +insert into in_test (id , content) +select 8,convert_from('\x9839B539', 'GB18030-2022'); + +--CJK统一汉子扩充E +insert into in_test (id , content) +select 9,convert_from('\x9839B632', 'GB18030-2022'); + +insert into in_test (id , content) +select 10,convert_from('\x9933FE33', 'GB18030-2022'); + +--CJK统一汉子扩充F +insert into in_test (id , content) +select 11,convert_from('\x99348138', 'GB18030-2022'); + +insert into in_test (id , content) +select 12,convert_from('\x9939F730', 'GB18030-2022'); + +--康熙部首 +insert into in_test (id , content) +select 13,convert_from('\x81398B32', 'GB18030-2022'); + +insert into in_test (id , content) +select 14,convert_from('\x8139A035', 'GB18030-2022'); + +--西双版纳新傣文字符 +insert into in_test (id , content) +select 15,convert_from('\x8134F932', 'GB18030-2022'); + +insert into in_test (id , content) +select 16,convert_from('\x81358437', 'GB18030-2022'); + +--西双版纳老傣文字符 +insert into in_test (id , content) +select 17,convert_from('\x81358B32', 'GB18030-2022'); + +insert into in_test (id , content) +select 18,convert_from('\x81359933', 'GB18030-2022'); + +--傈僳文字符 +insert into in_test (id , content) +select 19,convert_from('\x82369535', 'GB18030-2022'); + +insert into in_test (id , content) +select 20,convert_from('\x82369A32', 'GB18030-2022'); + +--蒙古文BIRGA符号 +insert into in_test (id , content) +select 21,convert_from('\x9034C538', 'GB18030-2022'); + +insert into in_test (id , content) +select 22,convert_from('\x9034C730', 'GB18030-2022'); + +--滇东北苗文字符 +insert into in_test (id , content) +select 23,convert_from('\x9232C636', 'GB18030-2022'); + +insert into in_test (id , content) +select 24,convert_from('\x9232D625', 'GB18030-2022'); + +--显示这24个字符 +select * from in_test order by id; + id | content +----+--------- + 1 | 𠀀 + 2 | 𪛖 + 3 | 龦 + 4 | 鿯 + 5 | 𪜀 + 6 | 𫜴 + 7 | 𫝀 + 8 | 𫠝 + 9 | 𫠠 + 10 | 𬺡 + 11 | 𬺰 + 12 | 𮯠 + 13 | ⼀ + 14 | ⿕ + 15 | ᦀ + 16 | ᧟ + 17 | ᨠ + 18 | ᪭ + 19 | ꓐ + 20 | ꓿ + 21 | 𑙠 + 22 | 𑙬 + 23 | 𖼀 + 24 | 𖾏 +(24 rows) + +--查看GB18030-2022编码 +select convert_to(content, 'GB18030-2022') from in_test order by id; + convert_to +------------ + \x95328236 + \x9835f336 + \x82358f33 + \x82359636 + \x9835f738 + \x98399e36 + \x98399f38 + \x9839b539 + \x9839b632 + \x9933fe33 + \x99348138 + \x9939f730 + \x81398b32 + \x8139a035 + \x8134f932 + \x81358437 + \x81358b32 + \x81359933 + \x82369535 + \x82369a32 + \x9034c538 + \x9034c730 + \x9232c636 + \x9232d625 +(24 rows) + +--转换为UTF-8编码,UTF-8是Unicode的计算机编码形式,想显示为Unicode编码还需进一步转换,数据库中不支持此功能。 +select convert_to(content, 'utf8') from in_test order by id; + convert_to +------------ + \xf0a08080 + \xf0aa9b96 + \xe9bea6 + \xe9bfaf + \xf0aa9c80 + \xf0ab9cb4 + \xf0ab9d80 + \xf0aba09d + \xf0aba0a0 + \xf0acbaa1 + \xf0acbab0 + \xf0aeafa0 + \xe2bc80 + \xe2bf95 + \xe1a680 + \xe1a79f + \xe1a8a0 + \xe1aaad + \xea9390 + \xea93bf + \xf09199a0 + \xf09199ac + \xf096bc80 + \xf096be8f +(24 rows) + + +drop table in_test; +drop table tb_test; + +--3. 正常的增删改查语句测试 +create table 表1(id int, 人名 name); + +insert into 表1(id, 人名) values(1, '小明!'); +select * from 表1; + id | 人名 +----+-------- + 1 | 小明! +(1 row) + + +alter table 表1 drop 人名; +select * from 表1; + id +---- + 1 +(1 row) + + +alter table 表1 add 学校 text; +insert into 表1(id , 学校) select 2, convert_to('@华为大学¥', 'GB18030-2022'); +select * from 表1; + id | 学校 +----+-------------------------- + 1 | + 2 | \x40bbaaceaab4f3d1a7a3a4 +(2 rows) + + +drop table 表1; + +--4. 不存在映射关系时 +select convert('\xFD308130', 'GB18030-2022', 'UTF8'); +ERROR: character with byte sequence 0xfd 0x30 0x81 0x30 in encoding "GB18030_2022" has no equivalent in encoding "UTF8" +CONTEXT: referenced column: convert +select convert('\xFE39FE39', 'GB18030-2022', 'UTF8'); +ERROR: character with byte sequence 0xfe 0x39 0xfe 0x39 in encoding "GB18030_2022" has no equivalent in encoding "UTF8" +CONTEXT: referenced column: convert + +--5. 测试gb18030_2022数据库中的字符串相关 +-- E021-03 character string literals +SELECT 'first line' +' - next line' + ' - third line' + AS "Three lines to one"; + Three lines to one +------------------------------------- + first line - next line - third line +(1 row) + + +-- illegal string continuation syntax +SELECT 'first line' +' - next line' /* this comment is not allowed here */ +' - third line' + AS "Illegal comment within continuation"; +ERROR: syntax error at or near "' - third line'" +LINE 3: ' - third line' + ^ + +-- Unicode escapes +SET standard_conforming_strings TO on; + +SELECT U&'d\0061t\+000061' AS U&"d\0061t\+000061"; + data +------ + data +(1 row) + +SELECT U&'d!0061t\+000061' UESCAPE '!' AS U&"d*0061t\+000061" UESCAPE '*'; + dat\+000061 +------------- + dat\+000061 +(1 row) + + +-- bytea +SET bytea_output TO hex; +SELECT E'\\xDeAdBeEf'::bytea; + bytea +------------ + \xdeadbeef +(1 row) + +SELECT E'\\x De Ad Be Ef '::bytea; + bytea +------------ + \xdeadbeef +(1 row) + +SELECT E'\\xDeAdBeE'::bytea; +ERROR: invalid hexadecimal data: odd number of digits +LINE 1: SELECT E'\\xDeAdBeE'::bytea; + ^ +CONTEXT: referenced column: bytea +SELECT E'\\xDeAdBeEx'::bytea; +ERROR: invalid hexadecimal digit: "x" +LINE 1: SELECT E'\\xDeAdBeEx'::bytea; + ^ +CONTEXT: referenced column: bytea +SELECT E'\\xDe00BeEf'::bytea; + bytea +------------ + \xde00beef +(1 row) + +SELECT E'DeAdBeEf'::bytea; + bytea +-------------------- + \x4465416442654566 +(1 row) + +SELECT E'De\\000dBeEf'::bytea; + bytea +-------------------- + \x4465006442654566 +(1 row) + +SELECT E'De\123dBeEf'::bytea; + bytea +-------------------- + \x4465536442654566 +(1 row) + +SELECT E'De\\123dBeEf'::bytea; + bytea +-------------------- + \x4465536442654566 +(1 row) + +SELECT E'De\\678dBeEf'::bytea; +ERROR: invalid input syntax for type bytea +LINE 1: SELECT E'De\\678dBeEf'::bytea; + ^ +CONTEXT: referenced column: bytea + +SET bytea_output TO escape; +SELECT E'\\xDeAdBeEf'::bytea; + bytea +------------------ + \336\255\276\357 +(1 row) + +SELECT E'\\x De Ad Be Ef '::bytea; + bytea +------------------ + \336\255\276\357 +(1 row) + +SELECT E'\\xDe00BeEf'::bytea; + bytea +------------------ + \336\000\276\357 +(1 row) + +SELECT E'DeAdBeEf'::bytea; + bytea +---------- + DeAdBeEf +(1 row) + +SELECT E'De\\000dBeEf'::bytea; + bytea +------------- + De\000dBeEf +(1 row) + +SELECT E'De\\123dBeEf'::bytea; + bytea +---------- + DeSdBeEf +(1 row) + + +SET bytea_output TO hex; + +SELECT CAST(name 'namefield' AS text) AS "text(name)"; + text(name) +------------ + namefield +(1 row) + + +-- E021-09 trim function +SELECT TRIM(BOTH FROM ' bunch o blanks ') = 'bunch o blanks' AS "bunch o blanks"; + bunch o blanks +---------------- + t +(1 row) + + +-- E021-06 substring expression +SELECT SUBSTRING('1234567890' FROM 3) = '34567890' AS "34567890"; + 34567890 +---------- + t +(1 row) + + +-- PostgreSQL extension to allow using back reference in replace string; +SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3'); + regexp_replace +---------------- + (111) 222-3333 +(1 row) + + +-- set so we can tell NULL from empty string +\pset null '\\N' + +-- return all matches from regexp +SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$); + regexp_matches +---------------- + {bar,beque} +(1 row) + + +-- split string on regexp +SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', $re$\s+$re$) AS foo; + foo | length +--------+-------- + the | 3 + quick | 5 + brown | 5 + fox | 3 + jumped | 6 + over | 4 + the | 3 + lazy | 4 + dog | 3 +(9 rows) + +SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', $re$\s+$re$); + regexp_split_to_array +------------------------------------------------ + {the,quick,brown,fox,jumped,over,the,lazy,dog} +(1 row) + + +-- change NULL-display back +\pset null '' + +-- E021-11 position expression +SELECT POSITION('4' IN '1234567890') = '4' AS "4"; + 4 +--- + t +(1 row) + + +SELECT POSITION('5' IN '1234567890') = '5' AS "5"; + 5 +--- + t +(1 row) + + +-- T312 character overlay function +SELECT OVERLAY('abcdef' PLACING '45' FROM 4) AS "abc45f"; + abc45f +-------- + abc45f +(1 row) + + +-- E061-04 like predicate +SELECT 'hawkeye' LIKE 'h%' AS "true"; + true +------ + t +(1 row) + +SELECT 'hawkeye' NOT LIKE 'h%' AS "false"; + false +------- + f +(1 row) + + +-- unused escape character +SELECT 'hawkeye' LIKE 'h%' ESCAPE '#' AS "true"; + true +------ + t +(1 row) + +SELECT 'hawkeye' NOT LIKE 'h%' ESCAPE '#' AS "false"; + false +------- + f +(1 row) + + +-- +-- test ILIKE (case-insensitive LIKE) +-- Be sure to form every test as an ILIKE/NOT ILIKE pair. +-- + +SELECT 'hawkeye' ILIKE 'h%' AS "true"; + true +------ + t +(1 row) + +SELECT 'hawkeye' NOT ILIKE 'h%' AS "false"; + false +------- + f +(1 row) + + +--6. 使用字符串相关函数 +--重复字符串 +select repeat('中国', 3); + repeat +-------------- + 中国中国中国 +(1 row) + + +--返回字符串的前n个字符 +select left('中国!number1', 7); + left +------------ + 中国!numb +(1 row) + + +--返回长度 +select length('中国!number1'); + length +-------- + 10 +(1 row) + + +--反转字符串 +select reverse('中国!number1'); + reverse +--------------- + 1rebmun!国中 +(1 row) + + +--md5算法加密 +select md5('中国!number1'); + md5 +---------------------------------- + 764c69059680eb8f52946f9f4936737a +(1 row) + + +-- test strpos +SELECT strpos('abcdef', 'cd') AS "pos_3"; + pos_3 +------- + 3 +(1 row) + +SELECT strpos('abcdef', 'xy') AS "pos_0"; + pos_0 +------- + 0 +(1 row) + + +SELECT replace('yabadabadoo', 'ba', '123') AS "ya123da123doo"; + ya123da123doo +--------------- + ya123da123doo +(1 row) + + +select split_part('joeuser@mydatabase','@',3) AS "empty string"; + empty string +-------------- + +(1 row) + + +select to_hex(256::bigint*256::bigint*256::bigint*256::bigint - 1) AS "ffffffff"; + ffffffff +---------- + ffffffff +(1 row) + + +--返回字符串中第一个字符的十进制表示形式 +select ascii('xyz'); + ascii +------- + 120 +(1 row) + +select ascii('中xyz'); +ERROR: requested character too large +CONTEXT: referenced column: ascii +select ascii('ḿxyz'); +ERROR: requested character too large +CONTEXT: referenced column: ascii + +-- 7. 检查GB18030-2022与GB18030的关系 +select convert('中国', 'GB18030', 'GB18030-2022'); + convert +------------ + \xd6d0b9fa +(1 row) + + +select convert('中国', 'GB18030-2022', 'GB18030'); + convert +------------ + \xd6d0b9fa +(1 row) + + +select convert('\xA8BC', 'GB18030-2022', 'UTF8'); + convert +---------- + \xe1b8bf +(1 row) + + +select convert('\xA8BC', 'GB18030', 'UTF8'); + convert +---------- + \xee9f87 +(1 row) + + +\c regression +clean connection to all force for database gb18030_2022; +drop database gb18030_2022; + +-- 8. 在UTF8环境下检测GB18030-2022与UTF8的转换 +select convert('中国&华为*GaussDB', 'UTF8', 'GB18030-2022'); + convert +-------------------------------------- + \xd6d0b9fa26bbaaceaa2a47617573734442 +(1 row) + + +select convert('ḿ', 'UTF8', 'GB18030-2022'); + convert +--------- + \xa8bc +(1 row) + + +-- 9. 测试create database时encoding与本地设置不匹配 +create database gb18030_2022 encoding='gb18030-2022' LC_COLLATE='en_US.utf-8' LC_CTYPE ='en_US.utf-8' TEMPLATE=template0; +ERROR: encoding "GB18030_2022" does not match locale "en_US.utf-8" +DETAIL: The chosen LC_CTYPE setting requires encoding "UTF8". + +--10. 测试initdb +\! rm -f @abs_bindir@/test_initdb.log +\! mkdir -p @testtablespace@/test2 +\! mkdir -p @testtablespace@/test2/pg_location +\! @abs_bindir@/gs_initdb -S -D @testtablespace@/test2 --nodename coorn2 -U test_initdb -w test@123 --locale=zh_CN.gb18030 -E GB18030_2022 >> @abs_bindir@/test_initdb2.log 2>&1 +\! cat @abs_bindir@/test_initdb2.log | grep ok +ok +creating subdirectories ... in ordinary occasionok +creating configuration files ... ok +ok +initializing pg_authid ... ok +setting password ... ok +initializing dependencies ... ok +loading PL/pgSQL server-side language ... ok +creating system views ... ok +creating performance views ... ok +loading system objects' descriptions ... ok +creating collations ... ok +creating conversions ... ok +creating dictionaries ... ok +setting privileges on built-in objects ... ok +initialize global configure for bucketmap length ... ok +creating information schema ... ok +loading foreign-data wrapper for distfs access ... ok +loading foreign-data wrapper for log access ... ok +loading hstore extension ... ok +loading security plugin ... ok +update system tables ... ok +creating snapshots catalog ... ok +vacuuming database template1 ... ok +copying template1 to template0 ... ok +copying template1 to postgres ... ok +freezing database template0 ... ok +freezing database template1 ... ok +freezing database postgres ... ok +\! rm -f @abs_bindir@/test_initdb2.log +\! rm -rf @testtablespace@/test2 + +--11. 测试升级回滚 +select oid, * from pg_conversion where conname like '%gb18030%' order by conname; + oid | conname | connamespace | conowner | conforencoding | contoencoding | conproc | condefault +-------+----------------------+--------------+----------+----------------+---------------+----------------------+------------ +--? .* | gb18030_2022_to_utf8 | 11 | 10 | 37 | 7 | gb18030_2022_to_utf8 | t +--? .* | gb18030_to_utf8 | 11 | 10 | 36 | 7 | gb18030_to_utf8 | t +--? .* | utf8_to_gb18030 | 11 | 10 | 7 | 36 | utf8_to_gb18030 | t +--? .* | utf8_to_gb18030_2022 | 11 | 10 | 7 | 37 | utf8_to_gb18030_2022 | t +(4 rows) + + +select oid, * from pg_proc where proname like '%gb18030%' order by proname; + oid | proname | pronamespace | proowner | prolang | procost | prorows | provariadic | protransform | proisagg | proiswindow | prosecdef | proleakproof | proisstrict | proretset | provolatile | pronargs | pronargdefaults | prorettype | proargtypes | proallargtypes | proargmodes | proargnames | proargdefaults | prosrc | probin | proconfig | proacl | prodefaultargpos | fencedmode | proshippable | propackage | prokind | proargsrc | propackageid | proisprivate | proargtypesext | prodefaultargposext | allargtypes | allargtypesext| gb18030_2022_to_utf8 | 11 | 10 | 13 | 1 | 0 | 0 | - | f | f | f | f | t | f | v | 5 | 0 | 2278 | 23 23 2275 2281 23 | | | | | gb18030_2022_to_utf8 | $libdir/utf8_and_gb18030 | | | | f | f | f | f | | 0 | f | | | 23 23 2275 2281 23 | +--? .* | gb18030_to_utf8 | 11 | 10 | 13 | 1 | 0 | 0 | - | f | f | f | f | t | f | v | 5 | 0 | 2278 | 23 23 2275 2281 23 | | | | | gb18030_to_utf8 | $libdir/utf8_and_gb18030 | | | | f | f | f | f | | 0 | f | | | 23 23 2275 2281 23 | +--? .* | utf8_to_gb18030 | 11 | 10 | 13 | 1 | 0 | 0 | - | f | f | f | f | t | f | v | 5 | 0 | 2278 | 23 23 2275 2281 23 | | | | | utf8_to_gb18030 | $libdir/utf8_and_gb18030 | | | | f | f | f | f | | 0 | f | | | 23 23 2275 2281 23 | +--? .* | utf8_to_gb18030_2022 | 11 | 10 | 13 | 1 | 0 | 0 | - | f | f | f | f | t | f | v | 5 | 0 | 2278 | 23 23 2275 2281 23 | | | | | utf8_to_gb18030_2022 | $libdir/utf8_and_gb18030 | | | | f | f | f | f | | 0 | f | | | 23 23 2275 2281 23 | +(4 rows) + + +--12. 看护client_encoding不能设置为GB18030_2022 +set client_encoding = GB18030_2022; +ERROR: invalid value for parameter "client_encoding": "gb18030_2022" + +ALTER SESSION SET NAMES 'GB18030_2022'; +ERROR: invalid value for parameter "client_encoding": "GB18030_2022" diff --git a/src/test/regress/parallel_schedule0 b/src/test/regress/parallel_schedule0 index d3135fa2d2e1e366d5085dc09888c42d1f07528a..90cdb60ca015961b513ffb8da8804aa5e46e9552 100644 --- a/src/test/regress/parallel_schedule0 +++ b/src/test/regress/parallel_schedule0 @@ -1099,3 +1099,4 @@ test: enable_expr_fusion_flatten # test for on update timestamp and generated column test: on_update_session1 on_update_session2 +test: ts_gb18030_utf8 \ No newline at end of file diff --git a/src/test/regress/sql/ts_gb18030_utf8.sql b/src/test/regress/sql/ts_gb18030_utf8.sql deleted file mode 100644 index 1b14f27692ca76ade6d55327de968f7a871abfe4..0000000000000000000000000000000000000000 --- a/src/test/regress/sql/ts_gb18030_utf8.sql +++ /dev/null @@ -1,20 +0,0 @@ -create database gb18030 encoding='gb18030' LC_COLLATE='zh_CN.GB18030' LC_CTYPE ='zh_CN.GB18030' TEMPLATE=template0; -\c gb18030 - -show server_encoding; -create table tb_test(id int, content text); - -insert into tb_test values(1, 'abcdefghigkABCDEFGHIJK'); -insert into tb_test values(2, '12'); -insert into tb_test values(3, 'ĺ'); -insert into tb_test values(4, '019808'); -insert into tb_test values(5, '94 95 92 94 97 98 99 90'); -insert into tb_test values(5, '25'); - -select * from tb_test order by id; -select convert_to(content, 'utf8') from tb_test order by id; - -drop table tb_test; -\c regression -clean connection to all force for database gb18030; -drop database gb18030;