diff --git a/contrib/pagehack/pagehack.cpp b/contrib/pagehack/pagehack.cpp
index 8adaed889b6ca9527b77c166522b3c10aabd1203..9c527326ff70fa6b73c864890d3fae00559a5f01 100644
--- a/contrib/pagehack/pagehack.cpp
+++ b/contrib/pagehack/pagehack.cpp
@@ -60,6 +60,8 @@
#include "access/ustore/knl_utuple.h"
#include "access/ustore/knl_uundorecord.h"
#include "access/double_write_basic.h"
+#include "access/extreme_rto/standby_read/block_info_meta.h"
+#include "access/extreme_rto/standby_read/lsn_info_meta.h"
#include "catalog/pg_control.h"
#include "catalog/pg_attribute.h"
#include "catalog/pg_class.h"
@@ -102,7 +104,11 @@
/* Number of pg_class types */
#define CLASS_TYPE_NUM 512
#define TEN 10
-
+#define BLOCK_META_INFO_NUM_PER_PAGE 127
+#define BASE_PAGE_MAP_SIZE 16
+#define BASE_PAGE_MAP_BIT_SIZE (BASE_PAGE_MAP_SIZE * BITS_PER_BYTE)
+#define DIVIDED_BY_TWO 2
+#define WAL_ID_OFFSET 32
typedef unsigned char* binary;
static const char* indents[] = { // 10 tab is enough to used.
@@ -830,7 +836,9 @@ typedef enum HackingType {
HACKING_UNDO_RECORD,
HACKING_UNDO_FIX,
HACKING_SEGMENT,
- NUM_HACKINGTYPE
+ NUM_HACKINGTYPE,
+ HACKING_LSN_INFO_META,
+ HACKING_BLOCK_INFO_META,
} HackingType;
static HackingType hackingtype = HACKING_HEAP;
@@ -856,7 +864,9 @@ static const char* HACKINGTYPE[] = {"heap",
"undo_slot",
"undo_record",
"undo_fix",
- "segment"
+ "segment",
+ "lsn_info_meta",
+ "block_info_meta"
};
const char* PageTypeNames[] = {"DATA", "FSM", "VM"};
@@ -912,6 +922,7 @@ typedef struct FSMAddress {
const int FSM_BOTTOM_LEVEL = 0;
using namespace undo;
+using namespace extreme_rto_standby_read;
static void formatBytes(unsigned char* start, int len)
{
@@ -5036,6 +5047,248 @@ static int ParseUndoSlot(const char *filename)
return true;
}
+static void parse_map_position(uint8 map)
+{
+ uint8 pagemap[BITS_PER_BYTE] = { 0 };
+ int pos = 0;
+ pos = 0;
+ while (map > 0) {
+ pagemap[pos] = map % DIVIDED_BY_TWO;
+ ++pos;
+ map /= DIVIDED_BY_TWO;
+ }
+ for (int loop = BITS_PER_BYTE - 1; loop >= 0; loop--) {
+ fprintf(stdout, "%u", pagemap[loop]);
+ }
+ fprintf(stdout, " ");
+}
+
+static void parse_lsn_info_head(LsnInfoPageHeader *header)
+{
+ PageXLogRecPtr lsn = header->lsn;
+ fprintf(stdout, "%slsn: xlogid %u, xrecoff %u, lsn %lu\n",
+ indents[indentLevel], lsn.xlogid, lsn.xrecoff, ((uint64)lsn.xlogid << WAL_ID_OFFSET) | lsn.xrecoff);
+ fprintf(stdout, "%schecksum: %u, flags: %u, version: %u",
+ indents[indentLevel], header->checksum, header->flags, header->version);
+ fprintf(stdout, "%sbase page map: ", indents[indentLevel]);
+ for (uint32 loop = 0; loop < BASE_PAGE_MAP_SIZE; loop++) {
+ parse_map_position(header->base_page_map[loop]);
+ }
+ fprintf(stdout, "\n");
+}
+
+static void parse_lsn_info_node(LsnInfoNode *lsninfo)
+{
+ fprintf(stdout, "%slsn info list: prev %lu, next: %lu\n",
+ indents[indentLevel], lsninfo->lsn_list.prev, lsninfo->lsn_list.next);
+ fprintf(stdout, "%sflags: %u, type: %u, used: %u\n",
+ indents[indentLevel], lsninfo->flags, lsninfo->type, lsninfo->used);
+ fprintf(stdout, "%slsn:", indents[indentLevel]);
+ for (uint loop = 0; loop < LSN_NUM_PER_NODE; loop++) {
+ fprintf(stdout, " %lu", lsninfo->lsn[loop]);
+ }
+ fprintf(stdout, "\n");
+}
+
+static void parse_base_page_info_node(BasePageInfoNode *pageinfo)
+{
+ RelFileNode rnode = pageinfo->relfilenode;
+ fprintf(stdout, "%slsn info:\n", indents[indentLevel]);
+
+ ++indentLevel;
+ parse_lsn_info_node(&(pageinfo->lsn_info_node));
+ --indentLevel;
+
+ fprintf(stdout, "%sbase page list: prev %lu, next: %lu\n",
+ indents[indentLevel], pageinfo->base_page_list.prev, pageinfo->base_page_list.next);
+ fprintf(stdout, "%scurrent page lsn: %lu\n",
+ indents[indentLevel], pageinfo->cur_page_lsn);
+ fprintf(stdout, "%srefile node:\n", indents[indentLevel]);
+ ++indentLevel;
+ fprintf(stdout, "%sspcnode: %u, dbnode: %u, relnode: %u, bucketnode: %d, opt: %u\n",
+ indents[indentLevel], rnode.spcNode, rnode.dbNode, rnode.relNode, rnode.bucketNode, rnode.opt);
+ --indentLevel;
+ fprintf(stdout, "%sfork num: %d, block num: %u\n",
+ indents[indentLevel], pageinfo->fork_num, pageinfo->block_num);
+ fprintf(stdout, "%snext base page lsn: %lu, base page position: %lu\n",
+ indents[indentLevel], pageinfo->next_base_page_lsn, pageinfo->base_page_position);
+}
+
+static void parse_lsn_info_block(FILE* fd, uint8 isbasepage[], uint32 &handledblock, uint32 loop)
+{
+ char bufferlsn[sizeof(LsnInfoNode)];
+ char bufferpage[sizeof(BasePageInfoNode)];
+ LsnInfoNode *lsnInfo = NULL;
+ BasePageInfoNode *basepageinfo = NULL;
+
+ if (isbasepage[handledblock]) {
+ fprintf(stdout, "it's a basepage.\n");
+ (void)fread(bufferpage, 1, sizeof(BasePageInfoNode), fd);
+ basepageinfo = (BasePageInfoNode *)bufferpage;
+ if (basepageinfo->lsn_info_node.type != LSN_INFO_TYPE_BASE_PAGE) {
+ fprintf(stderr, "Data at page %u, block %u must be base page, but its type is: %u.\n",
+ loop, handledblock, basepageinfo->lsn_info_node.type); // report error but continue.
+ }
+ parse_base_page_info_node(basepageinfo);
+ handledblock += 2; // index need add by 2 for basepage takes 2 blocks.
+ } else {
+ (void)fread(bufferlsn, 1, sizeof(LsnInfoNode), fd);
+ lsnInfo = (LsnInfoNode *)bufferlsn;
+ if (!is_lsn_info_node_valid(lsnInfo->flags)) {
+ fprintf(stdout, "Data at page %u, block %u is not valid.\n", loop, handledblock);
+ } else {
+ fprintf(stdout, "it's a lsn page.\n");
+ if (lsnInfo->type != LSN_INFO_TYPE_LSNS) {
+ fprintf(stderr, "Data at page %u, block %u must be lsn page, but its type is: %u.\n",
+ loop, handledblock, lsnInfo->type); // report error but continue.
+ }
+ parse_lsn_info_node(lsnInfo);
+ }
+ handledblock++;
+ }
+}
+
+static bool parse_lsn_info_meta(const char *filename)
+{
+ char bufferhead[sizeof(LsnInfoPageHeader)];
+ LsnInfoPageHeader *pageheader = NULL;
+ FILE* fd = NULL;
+ uint32 loop, loopmap, loopbit, handledblock;
+ uint8 pagemappos;
+ uint8 isbasepage[BASE_PAGE_MAP_BIT_SIZE] = { 0 };
+ if (NULL == (fd = fopen(filename, "rb"))) {
+ fprintf(stderr, "%s: %s\n", filename, strerror(errno));
+ return false;
+ }
+
+ fseek(fd, 0, SEEK_END);
+ long size = ftell(fd);
+ rewind(fd);
+
+ if (size % BLCKSZ != 0) {
+ fprintf(stderr, "Reading lsn/page info meta file error: file size is not divisible by page size(8k).\n");
+ fclose(fd);
+ return false;
+ }
+
+ long pagenum = size / BLCKSZ;
+ fprintf(stdout, "file length is %ld, blknum is %ld\n", size, pagenum);
+
+ for (loop = 1; loop <= pagenum; loop++) {
+ fprintf(stdout, "Page %u information:\n", loop);
+ ++indentLevel;
+ if (fread(bufferhead, 1, sizeof(LsnInfoPageHeader), fd) != sizeof(LsnInfoPageHeader)) {
+ fprintf(stderr, "%sReading header error", indents[indentLevel]);
+ fclose(fd);
+ return false;
+ }
+
+ pageheader = (LsnInfoPageHeader *)bufferhead;
+ if (!is_lsn_info_page_valid(pageheader)) {
+ fseek(fd, (BASE_PAGE_MAP_BIT_SIZE - 1) * BLCKSZ, SEEK_SET); // push 127 * 64 bytes
+ fprintf(stdout, "%sPage %u is not valid.\n", indents[indentLevel], loop);
+ --indentLevel;
+ continue;
+ }
+ parse_lsn_info_head(pageheader);
+
+ pagemappos = 0;
+ for (loopmap = 0; loopmap < BASE_PAGE_MAP_SIZE; loopmap++) {
+ for (loopbit = 0; loopbit < BITS_PER_BYTE; loopbit++) {
+ isbasepage[pagemappos] = (((pageheader->base_page_map[loopmap]) & (0x1 << loopbit)) >> loopbit);
+ pagemappos++;
+ }
+ }
+
+ handledblock = 1; // 1st block is handled as header
+ while (handledblock < BASE_PAGE_MAP_BIT_SIZE) {
+ fprintf(stdout, "%sBlock %u information: ", indents[indentLevel], handledblock);
+ ++indentLevel;
+ parse_lsn_info_block(fd, isbasepage, handledblock, loop);
+ --indentLevel;
+ }
+ memset_s(isbasepage, sizeof(isbasepage), 0, sizeof(isbasepage));
+ --indentLevel;
+ }
+ fclose(fd);
+ return true;
+}
+
+static void parse_block_info_head(BlockInfoPageHeader *header)
+{
+ PageXLogRecPtr lsn = header->lsn;
+ fprintf(stdout, "%slsn: xlogid %u, xrecoff %u, lsn %lu\n",
+ indents[indentLevel], lsn.xlogid, lsn.xrecoff, ((uint64)lsn.xlogid << WAL_ID_OFFSET) | lsn.xrecoff);
+ fprintf(stdout, "%schecksum: %u, flags: %u\n",
+ indents[indentLevel], header->checksum, header->flags);
+ fprintf(stdout, "%sversion: %u, total_block_num: %lu\n",
+ indents[indentLevel], header->version, header->total_block_num);
+}
+
+static void parse_block_info_content(BlockMetaInfo *blockInfo)
+{
+ fprintf(stdout, "%stimeline: %u, record_num: %u\n",
+ indents[indentLevel], blockInfo->timeline, blockInfo->record_num);
+ fprintf(stdout, "%smin_lsn: %lu, max_lsn: %lu, flags: %u\n",
+ indents[indentLevel], blockInfo->min_lsn, blockInfo->max_lsn, blockInfo->flags);
+ fprintf(stdout, "%slsn_info_list: prev %lu, next: %lu\n",
+ indents[indentLevel], blockInfo->lsn_info_list.prev, blockInfo->lsn_info_list.next);
+}
+
+static bool parse_block_info_meta(const char *filename)
+{
+ char bufferhead[sizeof(BlockInfoPageHeader)];
+ char bufferblock[sizeof(BlockMetaInfo)];
+ uint32 loop, loopinfo;
+ FILE* fd = NULL;
+
+ if (NULL == (fd = fopen(filename, "rb"))) {
+ fprintf(stderr, "%s: %s\n", filename, strerror(errno));
+ return false;
+ }
+
+ fseek(fd, 0, SEEK_END);
+ long size = ftell(fd);
+ rewind(fd);
+
+ if (size % BLCKSZ != 0) {
+ fprintf(stderr, "Reading block info meta file error: file size is not divisible by page size(8k).\n");
+ fclose(fd);
+ return false;
+ }
+ long pagenum = size / BLCKSZ;
+ fprintf(stdout, "file length is %ld, blknum is %ld\n", size, pagenum);
+
+ for (loop = 0; loop < pagenum; loop++) {
+ fprintf(stdout, "Page %u information:\n", loop);
+ ++indentLevel;
+
+ if (fread(bufferhead, 1, sizeof(BlockInfoPageHeader), fd) != sizeof(BlockInfoPageHeader)) {
+ fprintf(stderr, "%sReading header error", indents[indentLevel]);
+ fclose(fd);
+ return false;
+ }
+ parse_block_info_head((BlockInfoPageHeader *)bufferhead);
+
+ for (loopinfo = 0; loopinfo < BLOCK_META_INFO_NUM_PER_PAGE; loopinfo++) {
+ fprintf(stdout, "%sBlock %u information:\n", indents[indentLevel], loopinfo);
+ ++indentLevel;
+ if (fread(bufferblock, 1, sizeof(BlockMetaInfo), fd) != sizeof(BlockMetaInfo)) {
+ fprintf(stderr, "%sReading block meta file error at %u page, %u block.\n",
+ indents[indentLevel], loop, loopinfo);
+ fclose(fd);
+ return false;
+ }
+ parse_block_info_content((BlockMetaInfo *)bufferblock);
+ --indentLevel;
+ }
+ --indentLevel;
+ }
+
+ fclose(fd);
+ return true;
+}
+
typedef struct UndoHeader {
UndoRecordHeader whdr_;
UndoRecordBlock wblk_;
@@ -5956,6 +6209,18 @@ int main(int argc, char** argv)
break;
case HACKING_UNDO_FIX:
break;
+ case HACKING_LSN_INFO_META:
+ if (!parse_lsn_info_meta(filename)) {
+ fprintf(stderr, "Error during parsing lsn info meta file %s\n", filename);
+ exit(1);
+ }
+ break;
+ case HACKING_BLOCK_INFO_META:
+ if (!parse_block_info_meta(filename)) {
+ fprintf(stderr, "Error during parsing block info meta file %s\n", filename);
+ exit(1);
+ }
+ break;
default:
/* should be impossible to be here */
Assert(false);
diff --git a/src/bin/gs_guc/cluster_guc.conf b/src/bin/gs_guc/cluster_guc.conf
index 3213539055f9a62ae57a4e02b35b26fc726a5f2e..bd587bef8bc010025c016395b5464e829f738ae4 100755
--- a/src/bin/gs_guc/cluster_guc.conf
+++ b/src/bin/gs_guc/cluster_guc.conf
@@ -104,6 +104,7 @@ dirty_page_percent_max|real|0.1,1|NULL|NULL|
group_concat_max_len|int64|0,9223372036854775807|NULL|NULL
check_function_bodies|bool|0,0|NULL|NULL|
checkpoint_completion_target|real|0,1|NULL|NULL|
+standby_force_recyle_ratio|real|0,1|NULL|NULL|
checkpoint_segments|int|1,2147483646|NULL|NULL|
checkpoint_timeout|int|30,3600|s|NULL|
checkpoint_warning|int|0,2147483647|s|NULL|
@@ -268,6 +269,9 @@ geqo_effort|int|1,10|NULL|NULL|
geqo_generations|int|0,2147483647|NULL|NULL|
hadr_max_size_for_xlog_receiver|int|0,2147483647|kB|NULL|
hadr_recovery_time_target|int|0,3600|NULL|NULL|
+standby_recycle_interval|int|0,86400|s|NULL|
+standby_max_query_time|int|0,86400|s|NULL|
+base_page_saved_interval|int|4,2000|NULL|NULL|
hadr_recovery_point_target|int|0,3600|NULL|NULL|
hadr_super_user_record_path|string|0,0|NULL|NULL|
hll_default_log2m|int|10,16|NULL|NULL|
@@ -708,6 +712,8 @@ undo_zone_count|int|0,1048576|NULL|NULL|
stream_cluster_run_mode|enum|cluster_primary,cluster_standby|NULL|NULL|
xlog_file_size|int64|1048576,576460752303423487|B|The value must be an integer multiple of 16777216(16M)|
xlog_file_path|string|0,0|NULL|NULL|
+max_standby_base_page_size|int64|0,576460752303423487|B|NULL|
+max_standby_lsn_info_size|int64|0,576460752303423487|B|NULL|
plsql_show_all_error|bool|0,0|NULL|NULL|
partition_page_estimation|bool|0,0|NULL|NULL|
enable_auto_clean_unique_sql|bool|0,0|NULL|NULL|
diff --git a/src/bin/initdb/initdb.cpp b/src/bin/initdb/initdb.cpp
index 7419849f7345a8fdca16140d9cc328a1fc9e5d9b..75d5e6bcf6d13a57f330b305c0736322bf9f5174 100644
--- a/src/bin/initdb/initdb.cpp
+++ b/src/bin/initdb/initdb.cpp
@@ -3594,7 +3594,8 @@ static bool check_locale_encoding(const char* locale_encoding, int user_enc)
#ifdef WIN32
user_enc == PG_UTF8 ||
#endif
- user_enc == PG_SQL_ASCII)) {
+ user_enc == PG_SQL_ASCII ||
+ (user_enc == PG_GB18030_2022 && locale_enc == PG_GB18030))) {
write_stderr(_("%s: encoding mismatch\n"), progname);
write_stderr(_("The encoding you selected (%s) and the encoding that the\n"
"selected locale uses (%s) do not match. This would lead to\n"
diff --git a/src/bin/pg_rewind/fetch.cpp b/src/bin/pg_rewind/fetch.cpp
index 19d818eba37f3894b40213faf53816ba067bb224..e1279943acafcb00d2d21673965ff5632f9533ca 100755
--- a/src/bin/pg_rewind/fetch.cpp
+++ b/src/bin/pg_rewind/fetch.cpp
@@ -27,6 +27,7 @@
#include "PageCompression.h"
#include "catalog/pg_type.h"
#include "storage/file/fio_device.h"
+#include "access/extreme_rto/standby_read/standby_read_base.h"
PGconn* conn = NULL;
char source_slot_name[NAMEDATALEN] = {0};
@@ -303,6 +304,9 @@ BuildErrorCode fetchSourceFileList()
continue;
if (NULL != strstr(path, "disable_conn_file"))
continue;
+ if (NULL != strstr(path, EXRTO_FILE_DIR)) {
+ continue;
+ }
if (PQgetisnull(res, 0, 1)) {
/*
diff --git a/src/common/backend/catalog/builtin_funcs.ini b/src/common/backend/catalog/builtin_funcs.ini
index 0c59329f6d6a1487c631b8f9934bf93f2bc3a329..77c2dc72f25a3c19fbf721a361f816a98f2e021b 100755
--- a/src/common/backend/catalog/builtin_funcs.ini
+++ b/src/common/backend/catalog/builtin_funcs.ini
@@ -12904,3 +12904,7 @@ AddFuncGroup(
"gs_repair_file", 1,
AddBuiltinFunc(_0(4771), _1("gs_repair_file"), _2(3), _3(true), _4(true), _5(gs_repair_file), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(1000), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(3, 26, 25, 23), _21(3, 26, 25, 23), _22(3, 'i', 'i', 'i'), _23(3, "tableoid", "path", "timeout"), _24(NULL), _25("gs_repair_file"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33(NULL), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0))
),
+ AddFuncGroup(
+ "gs_hot_standby_space_info", 1,
+ AddBuiltinFunc(_0(6218), _1("gs_hot_standby_space_info"), _2(0), _3(false), _4(true), _5(gs_hot_standby_space_info), _6(2249), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(0), _21(6, 28, 28, 28, 28, 28, 28), _22(6, 'o', 'o', 'o', 'o', 'o', 'o'), _23(6, "base_page_file_num", "base_page_total_size", "lsn_info_meta_file_num", "lsn_info_meta_total_size", "block_info_meta_file_num", "block_info_meta_total_size"), _24(NULL), _25("gs_hot_standby_space_info"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33(NULL), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0))
+ ),
diff --git a/src/common/backend/catalog/storage.cpp b/src/common/backend/catalog/storage.cpp
index 67e231c5f16033c5c4a631268acbb6d690fb9703..6d6d559fb69af5d9b40dd779a3dc7c7d9b78e2a7 100644
--- a/src/common/backend/catalog/storage.cpp
+++ b/src/common/backend/catalog/storage.cpp
@@ -24,6 +24,7 @@
#include "access/cstore_am.h"
#include "access/visibilitymap.h"
+#include "access/multi_redo_api.h"
#include "access/xact.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
@@ -42,6 +43,7 @@
#include "pgxc/pgxc.h"
#include "storage/freespace.h"
#include "storage/lmgr.h"
+#include "storage/procarray.h"
#include "storage/smgr/smgr.h"
#include "storage/smgr/segment.h"
#include "threadpool/threadpool.h"
@@ -608,7 +610,7 @@ void RelationPreserveStorage(RelFileNode rnode, bool atCommit)
* This includes getting rid of any buffers for the blocks that are to be
* dropped.
*/
-void RelationTruncate(Relation rel, BlockNumber nblocks)
+void RelationTruncate(Relation rel, BlockNumber nblocks, TransactionId latest_removed_xid)
{
/* Currently, segment-page tables should not be truncated */
Assert(!RelationIsSegmentTable(rel));
@@ -675,14 +677,13 @@ void RelationTruncate(Relation rel, BlockNumber nblocks)
uint size;
uint8 info = XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE;
+ size = sizeof(xl_smgr_truncate_compress);
xlrec.xlrec.blkno = nblocks;
+ xlrec.pageCompressOpts = rel->rd_node.opt;
+ xlrec.latest_removed_xid = latest_removed_xid;
if (rel->rd_node.opt != 0) {
- xlrec.pageCompressOpts = rel->rd_node.opt;
- size = sizeof(xl_smgr_truncate_compress);
info |= XLR_REL_COMPRESS;
- } else {
- size = sizeof(xl_smgr_truncate);
}
RelFileNodeRelCopy(xlrec.xlrec.rnode, rel->rd_node);
@@ -713,7 +714,7 @@ void RelationTruncate(Relation rel, BlockNumber nblocks)
BatchClearBadBlock(rel->rd_node, MAIN_FORKNUM, nblocks);
}
-void PartitionTruncate(Relation parent, Partition part, BlockNumber nblocks)
+void PartitionTruncate(Relation parent, Partition part, BlockNumber nblocks, TransactionId latest_removed_xid)
{
/* Currently, segment-page tables should not be truncated */
Assert(!RelationIsSegmentTable(parent));
@@ -764,14 +765,16 @@ void PartitionTruncate(Relation parent, Partition part, BlockNumber nblocks)
uint8 info = XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE;
int redoSize;
+ redoSize = sizeof(xl_smgr_truncate_compress);
+
+ xlrec.xlrec.blkno = nblocks;
+ xlrec.pageCompressOpts = rel->rd_node.opt;
+ xlrec.latest_removed_xid = latest_removed_xid;
+
if (rel->rd_node.opt != 0) {
- xlrec.pageCompressOpts = rel->rd_node.opt;
info |= XLR_REL_COMPRESS;
- redoSize = sizeof(xl_smgr_truncate_compress);
- } else {
- redoSize = sizeof(xl_smgr_truncate);
}
- xlrec.xlrec.blkno = nblocks;
+
RelFileNodeRelCopy(xlrec.xlrec.rnode, part->pd_node);
XLogBeginInsert();
@@ -1242,8 +1245,26 @@ void smgr_redo_create(RelFileNode rnode, ForkNumber forkNum, char *data)
}
}
-void xlog_block_smgr_redo_truncate(RelFileNode rnode, BlockNumber blkno, XLogRecPtr lsn)
+void smgr_redo_truncate_cancel_conflicting_proc(TransactionId latest_removed_xid)
{
+ if (IS_EXRTO_READ) {
+ const int max_check_times = 1000;
+ int check_times = 0;
+ bool conflict = true;
+ bool reach_max_check_times = false;
+ while (conflict && check_times < max_check_times) {
+ RedoInterruptCallBack();
+ check_times++;
+ reach_max_check_times = (check_times == max_check_times);
+ conflict = proc_array_cancel_conflicting_proc(latest_removed_xid, reach_max_check_times);
+ }
+ }
+}
+
+void xlog_block_smgr_redo_truncate(RelFileNode rnode, BlockNumber blkno, XLogRecPtr lsn,
+ TransactionId latest_removed_xid)
+{
+ smgr_redo_truncate_cancel_conflicting_proc(latest_removed_xid);
SMgrRelation reln = smgropen(rnode, InvalidBackendId);
smgrcreate(reln, MAIN_FORKNUM, true);
UpdateMinRecoveryPoint(lsn, false);
@@ -1264,6 +1285,7 @@ void smgr_redo(XLogReaderState* record)
XLogRecPtr lsn = record->EndRecPtr;
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
bool compress = (bool)(XLogRecGetInfo(record) & XLR_REL_COMPRESS);
+ TransactionId latest_removed_xid = InvalidTransactionId;
/* Backup blocks are not used in smgr records */
Assert(!XLogRecHasAnyBlockRefs(record));
@@ -1280,6 +1302,9 @@ void smgr_redo(XLogReaderState* record)
RelFileNode rnode;
RelFileNodeCopy(rnode, xlrec->rnode, (int2)XLogRecGetBucketId(record));
rnode.opt = compress ? ((xl_smgr_truncate_compress*)(void *)XLogRecGetData(record))->pageCompressOpts : 0;
+ if (XLogRecGetDataLen(record) == TRUNCATE_CONTAIN_XID_SIZE) {
+ latest_removed_xid = ((xl_smgr_truncate_compress*)(void *)XLogRecGetData(record))->latest_removed_xid;
+ }
/*
* Forcibly create relation if it doesn't exist (which suggests that
* it was dropped somewhere later in the WAL sequence). As in
@@ -1305,7 +1330,7 @@ void smgr_redo(XLogReaderState* record)
*/
/* Also tell xlogutils.c about it */
- xlog_block_smgr_redo_truncate(rnode, xlrec->blkno, lsn);
+ xlog_block_smgr_redo_truncate(rnode, xlrec->blkno, lsn, latest_removed_xid);
} else
ereport(PANIC, (errmsg("smgr_redo: unknown op code %u", info)));
}
diff --git a/src/common/backend/utils/init/globals.cpp b/src/common/backend/utils/init/globals.cpp
index 99f82fd1366309446e7baa788291ba0d82abc834..1dd115185be00d0bbeaebdd923e44a376436dcf3 100644
--- a/src/common/backend/utils/init/globals.cpp
+++ b/src/common/backend/utils/init/globals.cpp
@@ -75,12 +75,13 @@ bool will_shutdown = false;
* NEXT | 92899 | ? | ?
*
********************************************/
-const uint32 GRAND_VERSION_NUM = 92907;
+const uint32 GRAND_VERSION_NUM = 92909;
/********************************************
* 2.VERSION NUM FOR EACH FEATURE
* Please write indescending order.
********************************************/
+const uint32 GB18030_2022_VERSION_NUM = 92908;
const uint32 PARAM_MARK_VERSION_NUM = 92907;
const uint32 TIMESCALE_DB_VERSION_NUM = 92904;
const uint32 MULTI_CHARSET_VERSION_NUM = 92903;
diff --git a/src/common/backend/utils/mb/Unicode/CMakeLists.txt b/src/common/backend/utils/mb/Unicode/CMakeLists.txt
index 924c40d2ea5e076514aa9e25f5610da9a6cb0a45..8e7f4b48000376394d3723bd981af20f1042aa6a 100644
--- a/src/common/backend/utils/mb/Unicode/CMakeLists.txt
+++ b/src/common/backend/utils/mb/Unicode/CMakeLists.txt
@@ -154,3 +154,10 @@ add_custom_command(
DEPENDS UCS_to_BIG5.pl BIG5.TXT CP950.TXT
COMMENT "Now Generating *.map"
)
+
+add_custom_command(
+ OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/gb18030_to_utf8_2022.map ${CMAKE_CURRENT_SOURCE_DIR}/utf8_to_gb18030_2022.map
+ COMMAND perl ${CMAKE_CURRENT_SOURCE_DIR}/UCS_to_GB18030_2022.pl
+ DEPENDS UCS_to_GB18030_2022.pl gb-18030-2022.xml
+ COMMENT "Now Generating *.map"
+)
\ No newline at end of file
diff --git a/src/common/backend/utils/mb/Unicode/Makefile b/src/common/backend/utils/mb/Unicode/Makefile
index a777c8e6e4ed3bb2bebcf2482ea654553f5a5069..8580b491ebaa3d17b8584f4c0441b7774ded416f 100644
--- a/src/common/backend/utils/mb/Unicode/Makefile
+++ b/src/common/backend/utils/mb/Unicode/Makefile
@@ -50,6 +50,7 @@ SPECIALMAPS = euc_cn_to_utf8.map utf8_to_euc_cn.map \
euc_tw_to_utf8.map utf8_to_euc_tw.map \
sjis_to_utf8.map utf8_to_sjis.map \
gb18030_to_utf8.map utf8_to_gb18030.map \
+ gb18030_to_utf8_2022.map utf8_to_gb18030_2022.map \
big5_to_utf8.map utf8_to_big5.map
MAPS = $(GENERICMAPS) $(SPECIALMAPS)
@@ -89,6 +90,9 @@ sjis_to_utf8.map utf8_to_sjis.map : CP932.TXT
gb18030_to_utf8.map utf8_to_gb18030.map : gb-18030-2000.xml
$(PERL) $(srcdir)/UCS_to_GB18030.pl
+gb18030_to_utf8_2022.map utf8_to_gb18030_2022.map : gb-18030-2022.xml
+ $(PERL) $(srcdir)/UCS_to_GB18030_2022.pl
+
big5_to_utf8.map utf8_to_big5.map : BIG5.TXT CP950.TXT
$(PERL) $(srcdir)/UCS_to_BIG5.pl
diff --git a/src/common/backend/utils/mb/Unicode/UCS_to_GB18030_2022.pl b/src/common/backend/utils/mb/Unicode/UCS_to_GB18030_2022.pl
new file mode 100644
index 0000000000000000000000000000000000000000..ec61f20fbb0d507506353b6ac3fc46fd6fbc06ce
--- /dev/null
+++ b/src/common/backend/utils/mb/Unicode/UCS_to_GB18030_2022.pl
@@ -0,0 +1,92 @@
+#! /usr/bin/perl
+#
+# Copyright (c) 2007-2012, 2023, PostgreSQL Global Development Group
+#
+# src/backend/utils/mb/Unicode/UCS_to_GB18030_2022.pl
+#
+# Generate UTF-8 <--> GB18030-2022 code conversion tables from
+# "gb-18030-2022.xml"
+#
+# The lines we care about in the source file look like
+#
+# where the "u" field is the Unicode code point in hex,
+# and the "b" field is the hex byte sequence for GB18030
+
+require "ucs2utf.pl";
+
+
+$change_file = "gb-18030-2022.xml";
+
+open(CODE_TABLE, $change_file) || die("cannot open $change_file");
+
+while ()
+{
+ next if (! m/= 0x80 && $ucs_code >= 0x0080)
+ {
+ $utf_code = &ucs2utf($ucs_code);
+ if ($code_u{$utf_code} ne "")
+ {
+ printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs_code;
+ next;
+ }
+ if ($code_c{$code_gb} ne "")
+ {
+ printf STDERR "Warning: duplicate GB18030: %08x\n", $code_gb;
+ next;
+ }
+ $code_u{$utf_code} = $code_gb;
+ $code_c{$code_gb} = $utf_code;
+ $number++;
+ }
+}
+close(CODE_TABLE);
+
+$change_map_file = "gb18030_to_utf8_2022.map";
+open(CHANGE_MAP, "> $change_map_file") || die("cannot open $change_map_file");
+print CHANGE_MAP "static pg_local_to_utf LUmapGB18030_2022[ $number ] = {\n";
+
+$count = $number;
+for $pos (sort { $a <=> $b } keys(%code_c))
+{
+ $utf_code = $code_c{$pos};
+ $count--;
+ if ($count == 0)
+ {
+ printf CHANGE_MAP " {0x%04x, 0x%04x}\n", $pos, $utf_code;
+ }
+ else
+ {
+ printf CHANGE_MAP " {0x%04x, 0x%04x},\n", $pos, $utf_code;
+ }
+}
+
+print CHANGE_MAP "};\n";
+close(CHANGE_MAP);
+
+$change_map_file = "utf8_to_gb18030_2022.map";
+open(CHANGE_MAP, "> $change_map_file") || die("cannot open $change_map_file");
+print CHANGE_MAP "static pg_utf_to_local ULmapGB18030_2022[ $number ] = {\n";
+
+$count = $number;
+for $pos (sort { $a <=> $b } keys(%code_u))
+{
+ $code_gb = $code_u{$pos};
+ $count--;
+ if ($count == 0)
+ {
+ printf CHANGE_MAP " {0x%04x, 0x%04x}\n", $pos, $code_gb;
+ }
+ else
+ {
+ printf CHANGE_MAP " {0x%04x, 0x%04x},\n", $pos, $code_gb;
+ }
+}
+
+print CHANGE_MAP "};\n";
+close(CHANGE_MAP);
diff --git a/src/common/backend/utils/mb/Unicode/gb-18030-2022.xml b/src/common/backend/utils/mb/Unicode/gb-18030-2022.xml
new file mode 100644
index 0000000000000000000000000000000000000000..79ce3ffc4fda910b802eefbfa413ff35fff078ad
--- /dev/null
+++ b/src/common/backend/utils/mb/Unicode/gb-18030-2022.xml
@@ -0,0 +1,78 @@
+
+
+
+
+
+ A list of character encodings in which the location of the GB/T 13000 code is changed compared to version 2000
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/common/backend/utils/mb/Unicode/gb18030_to_utf8_2022.map b/src/common/backend/utils/mb/Unicode/gb18030_to_utf8_2022.map
new file mode 100644
index 0000000000000000000000000000000000000000..275420c45cc06a8c903cdf4df264aa6839ce6f03
--- /dev/null
+++ b/src/common/backend/utils/mb/Unicode/gb18030_to_utf8_2022.map
@@ -0,0 +1,40 @@
+static pg_local_to_utf LUmapGB18030_2022[ 38 ] = {
+ {0xa6d9, 0xefb890},
+ {0xa6da, 0xefb892},
+ {0xa6db, 0xefb891},
+ {0xa6dc, 0xefb893},
+ {0xa6dd, 0xefb894},
+ {0xa6de, 0xefb895},
+ {0xa6df, 0xefb896},
+ {0xa6ec, 0xefb897},
+ {0xa6ed, 0xefb898},
+ {0xa6f3, 0xefb899},
+ {0xa8bc, 0xe1b8bf},
+ {0xfe59, 0xe9beb4},
+ {0xfe61, 0xe9beb5},
+ {0xfe66, 0xe9beb6},
+ {0xfe67, 0xe9beb7},
+ {0xfe6d, 0xe9beb8},
+ {0xfe7e, 0xe9beb9},
+ {0xfe90, 0xe9beba},
+ {0xfea0, 0xe9bebb},
+ {0x8135f437, 0xee9f87},
+ {0x82359037, 0xeea09e},
+ {0x82359038, 0xeea0a6},
+ {0x82359039, 0xeea0ab},
+ {0x82359130, 0xeea0ac},
+ {0x82359131, 0xeea0b2},
+ {0x82359132, 0xeea183},
+ {0x82359133, 0xeea194},
+ {0x82359134, 0xeea1a4},
+ {0x84318236, 0xee9e8d},
+ {0x84318237, 0xee9e8f},
+ {0x84318238, 0xee9e8e},
+ {0x84318239, 0xee9e90},
+ {0x84318330, 0xee9e91},
+ {0x84318331, 0xee9e92},
+ {0x84318332, 0xee9e93},
+ {0x84318333, 0xee9e94},
+ {0x84318334, 0xee9e95},
+ {0x84318335, 0xee9e96}
+};
diff --git a/src/common/backend/utils/mb/Unicode/utf8_to_gb18030_2022.map b/src/common/backend/utils/mb/Unicode/utf8_to_gb18030_2022.map
new file mode 100644
index 0000000000000000000000000000000000000000..9fafde791447c1410ea0c9d39ef5f4815378e53d
--- /dev/null
+++ b/src/common/backend/utils/mb/Unicode/utf8_to_gb18030_2022.map
@@ -0,0 +1,40 @@
+static pg_utf_to_local ULmapGB18030_2022[ 38 ] = {
+ {0xe1b8bf, 0xa8bc},
+ {0xe9beb4, 0xfe59},
+ {0xe9beb5, 0xfe61},
+ {0xe9beb6, 0xfe66},
+ {0xe9beb7, 0xfe67},
+ {0xe9beb8, 0xfe6d},
+ {0xe9beb9, 0xfe7e},
+ {0xe9beba, 0xfe90},
+ {0xe9bebb, 0xfea0},
+ {0xee9e8d, 0x84318236},
+ {0xee9e8e, 0x84318238},
+ {0xee9e8f, 0x84318237},
+ {0xee9e90, 0x84318239},
+ {0xee9e91, 0x84318330},
+ {0xee9e92, 0x84318331},
+ {0xee9e93, 0x84318332},
+ {0xee9e94, 0x84318333},
+ {0xee9e95, 0x84318334},
+ {0xee9e96, 0x84318335},
+ {0xee9f87, 0x8135f437},
+ {0xeea09e, 0x82359037},
+ {0xeea0a6, 0x82359038},
+ {0xeea0ab, 0x82359039},
+ {0xeea0ac, 0x82359130},
+ {0xeea0b2, 0x82359131},
+ {0xeea183, 0x82359132},
+ {0xeea194, 0x82359133},
+ {0xeea1a4, 0x82359134},
+ {0xefb890, 0xa6d9},
+ {0xefb891, 0xa6db},
+ {0xefb892, 0xa6da},
+ {0xefb893, 0xa6dc},
+ {0xefb894, 0xa6dd},
+ {0xefb895, 0xa6de},
+ {0xefb896, 0xa6df},
+ {0xefb897, 0xa6ec},
+ {0xefb898, 0xa6ed},
+ {0xefb899, 0xa6f3}
+};
diff --git a/src/common/backend/utils/mb/conv.cpp b/src/common/backend/utils/mb/conv.cpp
index c36ca872461680d7ec803ac79bce0ee4b1faf61e..816185d4183ac8330276032a50551ba6ef28081b 100644
--- a/src/common/backend/utils/mb/conv.cpp
+++ b/src/common/backend/utils/mb/conv.cpp
@@ -14,6 +14,8 @@
#include "postgres.h"
#include "knl/knl_variable.h"
#include "mb/pg_wchar.h"
+#include "Unicode/gb18030_to_utf8_2022.map"
+#include "Unicode/utf8_to_gb18030_2022.map"
/*
* LATINn ---> MIC when the charset's local codes map directly to MIC
@@ -479,6 +481,16 @@ void UtfToLocal(const unsigned char *utf, int len, unsigned char *iso, const pg_
l = l_save;
}
/* Now check ordinary map */
+ // add gb18030-2022 conv judge.
+ if (encoding == PG_GB18030_2022) {
+ p = (pg_utf_to_local*)bsearch(&iutf, ULmapGB18030_2022,
+ lengthof(ULmapGB18030_2022), sizeof(pg_utf_to_local), compare1);
+ if (p != NULL) {
+ iso = store_coded_char(iso, p->code);
+ continue;
+ }
+ }
+
p = (pg_utf_to_local *)bsearch(&iutf, map, mapsize, sizeof(pg_utf_to_local), compare1);
if (p != NULL) {
iso = store_coded_char(iso, p->code);
@@ -602,6 +614,15 @@ void LocalToUtf(const unsigned char *iso, int len, unsigned char *utf, const pg_
iiso |= *iso++;
}
+ // add gb18030-2022 conv judge
+ if (encoding == PG_GB18030_2022) {
+ p = (pg_local_to_utf*)bsearch(&iiso, LUmapGB18030_2022,
+ lengthof(LUmapGB18030_2022), sizeof(pg_local_to_utf), compare2);
+ if (p != NULL) {
+ utf = store_coded_char(utf, p->utf);
+ continue;
+ }
+ }
p = (pg_local_to_utf*)bsearch(&iiso, map, mapsize, sizeof(pg_local_to_utf), compare2);
if (p != NULL) {
utf = store_coded_char(utf, p->utf);
diff --git a/src/common/backend/utils/mb/conversion_procs/CMakeLists.txt b/src/common/backend/utils/mb/conversion_procs/CMakeLists.txt
index 47def6cc978e9ce32d337e94ca63f7a25f258315..25148ad9e3912bd2b58b4b406d3731cd9fc31dca 100755
--- a/src/common/backend/utils/mb/conversion_procs/CMakeLists.txt
+++ b/src/common/backend/utils/mb/conversion_procs/CMakeLists.txt
@@ -8,6 +8,7 @@ SET(unicode_cmd_src
"${PROJECT_SRC_DIR}/common/backend/utils/mb/Unicode|||perl UCS_to_SJIS.pl|"
"${PROJECT_SRC_DIR}/common/backend/utils/mb/Unicode|||perl UCS_to_GB18030.pl|"
"${PROJECT_SRC_DIR}/common/backend/utils/mb/Unicode|||perl UCS_to_BIG5.pl|"
+ "${PROJECT_SRC_DIR}/common/backend/utils/mb/Unicode|||perl UCS_to_GB18030_2022.pl|"
)
add_cmd_gen_when_configure(perl_target unicode_cmd_src)
diff --git a/src/common/backend/utils/mb/conversion_procs/Makefile b/src/common/backend/utils/mb/conversion_procs/Makefile
index c3b697d47884df574cfa7caa3b4bd4425d953c47..06937885ebc30800937aa265d872b78b4bdd4787 100644
--- a/src/common/backend/utils/mb/conversion_procs/Makefile
+++ b/src/common/backend/utils/mb/conversion_procs/Makefile
@@ -120,6 +120,8 @@ CONVERSIONS = \
utf8_to_euc_tw UTF8 EUC_TW utf8_to_euc_tw utf8_and_euc_tw \
gb18030_to_utf8 GB18030 UTF8 gb18030_to_utf8 utf8_and_gb18030 \
utf8_to_gb18030 UTF8 GB18030 utf8_to_gb18030 utf8_and_gb18030 \
+ gb18030_2022_to_utf8 GB18030_2022 UTF8 gb18030_2022_to_utf8 utf8_and_gb18030 \
+ utf8_to_gb18030_2022 UTF8 GB18030_2022 utf8_to_gb18030_2022 utf8_and_gb18030 \
gbk_to_utf8 GBK UTF8 gbk_to_utf8 utf8_and_gbk \
utf8_to_gbk UTF8 GBK utf8_to_gbk utf8_and_gbk \
utf8_to_iso_8859_2 UTF8 LATIN2 utf8_to_iso8859 utf8_and_iso8859 \
diff --git a/src/common/backend/utils/mb/conversion_procs/conversion_create.sql.in b/src/common/backend/utils/mb/conversion_procs/conversion_create.sql.in
index 9fa3be54bbe174469b7e225737025a42b6962394..899b680807d5c5ac760b4a4cbd0251efaee51e3f 100644
--- a/src/common/backend/utils/mb/conversion_procs/conversion_create.sql.in
+++ b/src/common/backend/utils/mb/conversion_procs/conversion_create.sql.in
@@ -130,3 +130,5 @@ shift_jis_2004_to_utf8 SHIFT_JIS_2004 UTF8 shift_jis_2004_to_utf8 utf8_and_sjis2
utf8_to_shift_jis_2004 UTF8 SHIFT_JIS_2004 utf8_to_shift_jis_2004 utf8_and_sjis2004
euc_jis_2004_to_shift_jis_2004 EUC_JIS_2004 SHIFT_JIS_2004 euc_jis_2004_to_shift_jis_2004 euc2004_sjis2004
shift_jis_2004_to_euc_jis_2004 SHIFT_JIS_2004 EUC_JIS_2004 shift_jis_2004_to_euc_jis_2004 euc2004_sjis2004
+gb18030_2022_to_utf8 GB18030_2022 UTF8 gb18030_2022_to_utf8 utf8_and_gb18030
+utf8_to_gb18030_2022 UTF8 GB18030_2022 utf8_to_gb18030_2022 utf8_and_gb18030
\ No newline at end of file
diff --git a/src/common/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.cpp b/src/common/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.cpp
index 52277e6c99f1c3d412f7485f8f77e0d6baafe32f..ff3c43eef95372b00d38c67fe39e1701a9716a63 100644
--- a/src/common/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.cpp
+++ b/src/common/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.cpp
@@ -23,9 +23,15 @@ PG_MODULE_MAGIC;
PG_FUNCTION_INFO_V1(gb18030_to_utf8);
PG_FUNCTION_INFO_V1(utf8_to_gb18030);
+PG_FUNCTION_INFO_V1(gb18030_2022_to_utf8);
+PG_FUNCTION_INFO_V1(utf8_to_gb18030_2022);
+
extern "C" Datum gb18030_to_utf8(PG_FUNCTION_ARGS);
extern "C" Datum utf8_to_gb18030(PG_FUNCTION_ARGS);
+extern "C" Datum gb18030_2022_to_utf8(PG_FUNCTION_ARGS);
+extern "C" Datum utf8_to_gb18030_2022(PG_FUNCTION_ARGS);
+
/*
* Convert 4-byte GB18030 characters to and from a linear code space
*
@@ -195,3 +201,31 @@ Datum utf8_to_gb18030(PG_FUNCTION_ARGS)
PG_RETURN_VOID();
}
+
+// convert the GB18030-2022 code to the UTF8 code
+Datum gb18030_2022_to_utf8(PG_FUNCTION_ARGS)
+{
+ unsigned char* src = (unsigned char*)PG_GETARG_CSTRING(2);
+ unsigned char* dest = (unsigned char*)PG_GETARG_CSTRING(3);
+ int len = PG_GETARG_INT32(4);
+ // check whether the conversion relationship between two character sets exists.
+ CHECK_ENCODING_CONVERSION_ARGS(PG_GB18030_2022, PG_UTF8);
+
+ LocalToUtf(src, len, dest, LUmapGB18030, lengthof(LUmapGB18030), NULL, 0, conv_18030_to_utf8, PG_GB18030_2022);
+
+ PG_RETURN_VOID();
+}
+
+// convert the UTF8 code to the GB18030-2022 code.
+Datum utf8_to_gb18030_2022(PG_FUNCTION_ARGS)
+{
+ unsigned char* src = (unsigned char*)PG_GETARG_CSTRING(2);
+ unsigned char* dest = (unsigned char*)PG_GETARG_CSTRING(3);
+ int len = PG_GETARG_INT32(4);
+ // check whether the conversion relationship between two character sets exists.
+ CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_GB18030_2022);
+
+ UtfToLocal(src, len, dest, ULmapGB18030, lengthof(ULmapGB18030), NULL, 0, conv_utf8_to_18030, PG_GB18030_2022);
+
+ PG_RETURN_VOID();
+}
\ No newline at end of file
diff --git a/src/common/backend/utils/mb/encnames.cpp b/src/common/backend/utils/mb/encnames.cpp
index 9481606e615d0f3f3af642126ca143cd195db3fc..4cda548b26d5beecdf7ed5631607a4e6993d6dd1 100644
--- a/src/common/backend/utils/mb/encnames.cpp
+++ b/src/common/backend/utils/mb/encnames.cpp
@@ -45,6 +45,7 @@ pg_encname pg_encname_tbl[] = {
{"euckr", PG_EUC_KR}, /* EUC-KR; Extended Unix Code for Korean, KS X 1001 standard */
{"euctw", PG_EUC_TW}, /* EUC-TW; Extended Unix Code for traditional Chinese */
{"gb18030", PG_GB18030}, /* GB18030;GB18030 */
+ {"gb180302022", PG_GB18030_2022}, /* GB18030-2022;version 2022 for GB18030 */
{"gbk", PG_GBK}, /* GBK; Chinese Windows CodePage 936 simplified Chinese */
{"iso88591", PG_LATIN1}, /* ISO-8859-1; RFC1345,KXS2 */
{"iso885910", PG_LATIN6}, /* ISO-8859-10; RFC1345,KXS2 */
@@ -188,6 +189,7 @@ pg_enc2name pg_enc2name_tbl[] = {DEF_ENC2NAME(SQL_ASCII, 0),
DEF_ENC2NAME(WIN1257, 1257),
DEF_ENC2NAME(KOI8U, 21866),
DEF_ENC2NAME(GB18030, 54936),
+ DEF_ENC2NAME(GB18030_2022, 54936),
DEF_ENC2NAME(SJIS, 932),
DEF_ENC2NAME(BIG5, 950),
DEF_ENC2NAME(UHC, 0),
diff --git a/src/common/backend/utils/mb/mbutils.cpp b/src/common/backend/utils/mb/mbutils.cpp
index e336cea1d710f4dce6412f972c6820730347fd5f..85663c196f1ee219c8d34bac1ccf333220109a9b 100644
--- a/src/common/backend/utils/mb/mbutils.cpp
+++ b/src/common/backend/utils/mb/mbutils.cpp
@@ -39,6 +39,24 @@ typedef struct ConvProcInfo {
static char* perform_default_encoding_conversion(const char* src, int len, bool is_client_to_server);
static int cliplen(const char* str, int len, int limit);
+// Determine whether the current case needs to be converted
+bool NoNeedToConvert(int srcEncoding, int destEncoding)
+{
+ if (srcEncoding == destEncoding) {
+ return true;
+ }
+ if (srcEncoding == PG_SQL_ASCII || destEncoding == PG_SQL_ASCII) {
+ return true;
+ }
+ if (srcEncoding == PG_GB18030_2022 && destEncoding == PG_GB18030) {
+ return true;
+ }
+ if (srcEncoding == PG_GB18030 && destEncoding == PG_GB18030_2022) {
+ return true;
+ }
+ return false;
+}
+
/*
* Prepare for a future call to SetClientEncoding. Success should mean
* that SetClientEncoding is guaranteed to succeed for this encoding request.
@@ -66,7 +84,7 @@ int PrepareClientEncoding(int encoding)
* Check for cases that require no conversion function.
*/
current_server_encoding = GetDatabaseEncoding();
- if (current_server_encoding == encoding || current_server_encoding == PG_SQL_ASCII || encoding == PG_SQL_ASCII) {
+ if (NoNeedToConvert(current_server_encoding, encoding)) {
return 0;
}
@@ -159,7 +177,7 @@ int SetClientEncoding(int encoding)
* Check for cases that require no conversion function.
*/
current_server_encoding = GetDatabaseEncoding();
- if (current_server_encoding == encoding || current_server_encoding == PG_SQL_ASCII || encoding == PG_SQL_ASCII) {
+ if (NoNeedToConvert(current_server_encoding, encoding)) {
u_sess->mb_cxt.ClientEncoding = &pg_enc2name_tbl[encoding];
u_sess->mb_cxt.ToServerConvProc = NULL;
u_sess->mb_cxt.ToClientConvProc = NULL;
@@ -277,10 +295,7 @@ unsigned char* pg_do_encoding_conversion(unsigned char* src, int len, int src_en
if (!IsTransactionState()) {
return src;
}
- if (src_encoding == dest_encoding) {
- return src;
- }
- if (src_encoding == PG_SQL_ASCII || dest_encoding == PG_SQL_ASCII) {
+ if (NoNeedToConvert(src_encoding, dest_encoding)) {
return src;
}
if (len <= 0) {
@@ -673,7 +688,8 @@ char* pg_any_to_server(const char* s, int len, int encoding)
bulkload_illegal_chars_conversion = true;
}
- if (encoding == u_sess->mb_cxt.DatabaseEncoding->encoding || encoding == PG_SQL_ASCII) {
+ if (encoding == u_sess->mb_cxt.DatabaseEncoding->encoding || encoding == PG_SQL_ASCII ||
+ (encoding == PG_GB18030 && u_sess->mb_cxt.DatabaseEncoding->encoding == PG_GB18030_2022)) {
/*
* No conversion is needed, but we must still validate the data.
*/
diff --git a/src/common/backend/utils/mb/wchar.cpp b/src/common/backend/utils/mb/wchar.cpp
index be064bf2c9c9493b7e4dcec5498d0793dd937078..3d95322a01bed897dae3d1fb9cdb07c842e25101 100644
--- a/src/common/backend/utils/mb/wchar.cpp
+++ b/src/common/backend/utils/mb/wchar.cpp
@@ -1990,6 +1990,12 @@ pg_wchar_tbl pg_wchar_table[] = {
pg_gb18030_dsplen,
pg_gb18030_verifier,
4}, /* PG_GB18030 */
+ {pg_gb180302wchar_with_len,
+ pg_wchar2gb18030_with_len,
+ pg_gb18030_mblen,
+ pg_gb18030_dsplen,
+ pg_gb18030_verifier,
+ 4}, /* PG_GB18030_2022 */
{0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2}, /* PG_SJIS */
{0, 0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2}, /* PG_BIG5 */
{0, 0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifier, 2}, /* PG_UHC */
diff --git a/src/common/backend/utils/misc/guc.cpp b/src/common/backend/utils/misc/guc.cpp
index 87f65f653b490e5ea6bc52e8be15f1c8ba1569d4..8c382b3761e85cc6abb42e62dc7cd1254f62e6c5 100755
--- a/src/common/backend/utils/misc/guc.cpp
+++ b/src/common/backend/utils/misc/guc.cpp
@@ -174,6 +174,7 @@
#include "utils/guc_resource.h"
#include "utils/mem_snapshot.h"
#include "nodes/parsenodes_common.h"
+#include "mb/pg_wchar.h"
#ifndef PG_KRB_SRVTAB
#define PG_KRB_SRVTAB ""
@@ -8306,6 +8307,9 @@ static void set_config_sourcefile(const char* name, char* sourcefile, int source
*/
void SetConfigOption(const char* name, const char* value, GucContext context, GucSource source)
{
+ if (strcmp(name, "client_encoding") == 0 && pg_char_to_encoding(value) == PG_GB18030_2022) {
+ value = "gb18030";
+ }
(void)set_config_option(name, value, context, source, GUC_ACTION_SET, true, 0);
}
diff --git a/src/common/backend/utils/misc/guc/guc_storage.cpp b/src/common/backend/utils/misc/guc/guc_storage.cpp
index ca3561f28f8142260806ac34574c210cbb978579..f3e3be89cc54b7b7dd3c4fd7ee81012c96fed3ac 100755
--- a/src/common/backend/utils/misc/guc/guc_storage.cpp
+++ b/src/common/backend/utils/misc/guc/guc_storage.cpp
@@ -3263,6 +3263,48 @@ static void InitStorageConfigureNamesInt()
NULL,
NULL,
NULL},
+ {{"standby_recycle_interval",
+ PGC_SIGHUP,
+ NODE_ALL,
+ RESOURCES_RECOVERY,
+ gettext_noop("Sets the maximum wait time to recycle."),
+ NULL,
+ GUC_UNIT_S},
+ &g_instance.attr.attr_storage.standby_recycle_interval,
+ 10, /* 10s */
+ 0,
+ 3600 * 24, /* 24hour */
+ NULL,
+ NULL,
+ NULL},
+ {{"standby_max_query_time",
+ PGC_SIGHUP,
+ NODE_ALL,
+ RESOURCES_RECOVERY,
+ gettext_noop("Sets the maximum time allowed for query on standby."),
+ NULL,
+ GUC_UNIT_S},
+ &g_instance.attr.attr_storage.standby_max_query_time,
+ 600, /* 10min */
+ 0,
+ 3600 * 24, /* 24hour */
+ NULL,
+ NULL,
+ NULL},
+ {{"base_page_saved_interval",
+ PGC_POSTMASTER,
+ NODE_ALL,
+ RESOURCES_RECOVERY,
+ gettext_noop("Save a base page every time the page redo as many xlogs as the parameter value."),
+ NULL,
+ 0},
+ &g_instance.attr.attr_storage.base_page_saved_interval,
+ 400,
+ 5,
+ 2000,
+ NULL,
+ NULL,
+ NULL},
{{"force_promote",
PGC_POSTMASTER,
NODE_ALL,
@@ -3891,6 +3933,19 @@ static void InitStorageConfigureNamesReal()
NULL,
NULL,
NULL},
+ {{"standby_force_recyle_ratio",
+ PGC_SIGHUP,
+ NODE_ALL,
+ RESOURCES_RECOVERY,
+ gettext_noop("Sets the ratio that triggers forced recycling in extreme-rto standby read."),
+ NULL},
+ &g_instance.attr.attr_storage.standby_force_recyle_ratio,
+ 0.8,
+ 0.0,
+ 1.0,
+ NULL,
+ NULL,
+ NULL},
{{"bypass_dram",
PGC_SIGHUP,
NODE_ALL,
@@ -4041,6 +4096,32 @@ static void InitStorageConfigureNamesInt64()
NULL,
NULL,
NULL},
+ {{"max_standby_base_page_size",
+ PGC_POSTMASTER,
+ NODE_ALL,
+ RESOURCES_RECOVERY,
+ gettext_noop("Sets the max size of base page files on standby"),
+ NULL},
+ &g_instance.attr.attr_storage.max_standby_base_page_size,
+ INT64CONST(0x4000000000), /* 256GB */
+ INT64CONST(0),
+ INT64CONST(0x7FFFFFFFFFFFFFF),
+ NULL,
+ NULL,
+ NULL},
+ {{"max_standby_lsn_info_size",
+ PGC_POSTMASTER,
+ NODE_ALL,
+ RESOURCES_RECOVERY,
+ gettext_noop("Sets the max size of lsn info files on standby"),
+ NULL},
+ &g_instance.attr.attr_storage.max_standby_lsn_info_size,
+ INT64CONST(0x4000000000), /* 256GB */
+ INT64CONST(0),
+ INT64CONST(0x7FFFFFFFFFFFFFF),
+ NULL,
+ NULL,
+ NULL},
/* End-of-list marker */
{{NULL,
(GucContext)0,
diff --git a/src/common/backend/utils/time/snapmgr.cpp b/src/common/backend/utils/time/snapmgr.cpp
index ca405605d2aff0a3ec8daaf4dfb190e63d87eac4..3a288e254c3d1e6fc2200e02151f78b10b3bbdd5 100644
--- a/src/common/backend/utils/time/snapmgr.cpp
+++ b/src/common/backend/utils/time/snapmgr.cpp
@@ -1062,6 +1062,9 @@ static void SnapshotResetXmin(void)
t_thrd.proc->snapCSN = InvalidCommitSeqNo;
t_thrd.pgxact->csn_min = InvalidCommitSeqNo;
t_thrd.pgxact->csn_dr = InvalidCommitSeqNo;
+
+ t_thrd.proc->exrto_read_lsn = 0;
+ t_thrd.proc->exrto_gen_snap_time = 0;
}
}
diff --git a/src/gausskernel/CMakeLists.txt b/src/gausskernel/CMakeLists.txt
index 29fa43a5ce2e54dea57f02ff8633d99477b7a662..21b9e9e44fba85c31eff9fd6ac5c5f3601d245c5 100755
--- a/src/gausskernel/CMakeLists.txt
+++ b/src/gausskernel/CMakeLists.txt
@@ -179,6 +179,7 @@ list(APPEND gaussdb_objects
$
$
$
+ $
$
$
$
diff --git a/src/gausskernel/optimizer/commands/dbcommands.cpp b/src/gausskernel/optimizer/commands/dbcommands.cpp
index df588ce4d52cdc314e3206179ffe4f64178ab50d..631a9683687c2550fc908c68d3fc9c3d835d83cc 100644
--- a/src/gausskernel/optimizer/commands/dbcommands.cpp
+++ b/src/gausskernel/optimizer/commands/dbcommands.cpp
@@ -33,6 +33,8 @@
#include "access/xloginsert.h"
#include "access/xlogutils.h"
#include "access/multixact.h"
+#include "access/multi_redo_api.h"
+#include "access/extreme_rto/standby_read/block_info_meta.h"
#include "catalog/catalog.h"
#include "catalog/dependency.h"
#include "catalog/indexing.h"
@@ -267,6 +269,10 @@ Oid createdb(const CreatedbStmt* stmt)
if (encoding < 0)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("%s is not a valid encoding name", encoding_name)));
+ if (t_thrd.proc->workingVersionNum < GB18030_2022_VERSION_NUM && encoding == PG_GB18030_2022) {
+ ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("Not support to create database encoding %s in upgrade!", encoding_name)));
+ }
} else
ereport(ERROR,
(errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE),
@@ -783,7 +789,8 @@ void check_encoding_locale_matches(int encoding, const char* collate, const char
#ifdef WIN32
encoding == PG_UTF8 ||
#endif
- (encoding == PG_SQL_ASCII && superuser())))
+ (encoding == PG_SQL_ASCII && superuser() ||
+ (encoding == PG_GB18030_2022 && ctype_encoding == PG_GB18030))))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("encoding \"%s\" does not match locale \"%s\"", pg_encoding_to_char(encoding), ctype),
@@ -794,7 +801,8 @@ void check_encoding_locale_matches(int encoding, const char* collate, const char
#ifdef WIN32
encoding == PG_UTF8 ||
#endif
- (encoding == PG_SQL_ASCII && superuser())))
+ (encoding == PG_SQL_ASCII && superuser() ||
+ (encoding == PG_GB18030_2022 && collate_encoding == PG_GB18030))))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("encoding \"%s\" does not match locale \"%s\"", pg_encoding_to_char(encoding), collate),
@@ -2434,7 +2442,10 @@ void do_db_drop(Oid dbId, Oid tbSpcId)
if (!rmtree(dst_path, true)) {
ereport(WARNING, (errmsg("some useless files may be left behind in old database directory \"%s\"", dst_path)));
}
-
+ if (IS_EXRTO_READ) {
+ /* remove file start with {db_id}_ */
+ extreme_rto_standby_read::remove_block_meta_info_files_of_db(dbId);
+ }
if (InHotStandby) {
/*
* Release locks prior to commit. XXX There is a race condition
diff --git a/src/gausskernel/optimizer/commands/vacuumlazy.cpp b/src/gausskernel/optimizer/commands/vacuumlazy.cpp
index 6593883faf0fbd1fddc325f4d4e36ef23b43e98f..75792c5cd16ceb978bba61a4568dc29f66173c57 100644
--- a/src/gausskernel/optimizer/commands/vacuumlazy.cpp
+++ b/src/gausskernel/optimizer/commands/vacuumlazy.cpp
@@ -2057,9 +2057,9 @@ lazy_truncate_heap(Relation onerel, VacuumStmt *vacstmt, LVRelStats *vacrelstats
*/
if (RelationIsPartition(onerel)) {
Assert(vacstmt->onepart && vacstmt->onepartrel);
- PartitionTruncate(vacstmt->onepartrel, vacstmt->onepart, new_rel_pages);
+ PartitionTruncate(vacstmt->onepartrel, vacstmt->onepart, new_rel_pages, vacrelstats->latestRemovedXid);
} else {
- RelationTruncate(onerel, new_rel_pages);
+ RelationTruncate(onerel, new_rel_pages, vacrelstats->latestRemovedXid);
}
/*
diff --git a/src/gausskernel/optimizer/commands/variable.cpp b/src/gausskernel/optimizer/commands/variable.cpp
index 5f5acd1d7c40f6fd585956d4aef6c17dfc53de19..bd1cb4a994c23f480e208406176d1f4bebc97fce 100644
--- a/src/gausskernel/optimizer/commands/variable.cpp
+++ b/src/gausskernel/optimizer/commands/variable.cpp
@@ -762,7 +762,7 @@ bool check_client_encoding(char** newval, void** extra, GucSource source)
/* Look up the encoding by name */
encoding = pg_valid_client_encoding(*newval);
- if (encoding < 0) {
+ if (encoding < 0 || encoding == PG_GB18030_2022) {
return false;
}
diff --git a/src/gausskernel/process/postmaster/postmaster.cpp b/src/gausskernel/process/postmaster/postmaster.cpp
index ea5d789a776becb114182cb563d622b0647ab184..c85ca2f44778acbfa40282eb1e354180b829d14b 100644
--- a/src/gausskernel/process/postmaster/postmaster.cpp
+++ b/src/gausskernel/process/postmaster/postmaster.cpp
@@ -80,6 +80,7 @@
#endif
#include "access/cbmparsexlog.h"
+#include "access/extreme_rto/standby_read.h"
#include "access/obs/obs_am.h"
#include "access/transam.h"
#include "access/ustore/undo/knl_uundoapi.h"
@@ -233,6 +234,7 @@
#include "access/multi_redo_api.h"
#include "postmaster/postmaster.h"
#include "access/parallel_recovery/dispatcher.h"
+#include "access/extreme_rto/standby_read/standby_read_base.h"
#include "utils/distribute_test.h"
#ifdef ENABLE_MULTIPLE_NODES
#include "tsdb/compaction/compaction_entry.h"
@@ -313,6 +315,8 @@ extern void gs_set_hs_shm_data(HaShmemData* ha_shm_data);
extern void ReaperBackendMain();
extern void AdjustThreadAffinity();
+extern void exrto_standby_read_init();
+
#define EXTERN_SLOTS_NUM 17
volatile PMState pmState = PM_INIT;
bool dummyStandbyMode = false;
@@ -3090,10 +3094,7 @@ int PostmasterMain(int argc, char* argv[])
/* init sharestorge(dorado) */
ShareStorageInit();
-
- /*
- * We're ready to rock and roll...
- */
+ exrto_standby_read_init();
if (ENABLE_DMS && ENABLE_REFORM) {
if (!DMSWaitInitStartup()) {
if (g_instance.pid_cxt.StartupPID == 0) {
@@ -3347,10 +3348,10 @@ static void CheckExtremeRtoGUCConflicts(void)
}
#ifndef ENABLE_MULTIPLE_NODES
- if ((g_instance.attr.attr_storage.recovery_parse_workers > 1) && g_instance.attr.attr_storage.EnableHotStandby) {
+ if (IS_DISASTER_RECOVER_MODE &&(g_instance.attr.attr_storage.recovery_parse_workers > 1) && g_instance.attr.attr_storage.EnableHotStandby) {
ereport(ERROR,
(errcode(ERRCODE_SYSTEM_ERROR),
- errmsg("extreme rto could not support hot standby."),
+ errmsg("For disaster standby cluster, extreme rto could not support hot standby."),
errhint("Either turn off extreme rto, or turn off hot_standby.")));
}
#endif
@@ -4313,7 +4314,7 @@ static int ServerLoop(void)
if (g_instance.attr.attr_storage.enable_ustore &&
g_instance.pid_cxt.GlobalStatsPID == 0 &&
- pmState == PM_RUN) {
+ (pmState == PM_RUN || pmState == PM_HOT_STANDBY)) {
g_instance.pid_cxt.GlobalStatsPID = initialize_util_thread(GLOBALSTATS_THREAD);
}
@@ -5094,7 +5095,7 @@ int ProcessStartupPacket(Port* port, bool SSLdone)
} else {
#ifdef ENABLE_MULTIPLE_NODES
if (STANDBY_MODE == hashmdata->current_mode && (!IS_MULTI_DISASTER_RECOVER_MODE || GTM_FREE_MODE ||
- g_instance.attr.attr_storage.recovery_parse_workers > 1)) {
+ (IS_PGXC_DATANODE && !g_instance.attr.attr_storage.EnableHotStandby))) {
ereport(ERROR, (errcode(ERRCODE_CANNOT_CONNECT_NOW),
errmsg("can not accept connection in standby mode.")));
}
@@ -5868,6 +5869,10 @@ static void SIGHUP_handler(SIGNAL_ARGS)
signal_child(g_instance.pid_cxt.UndoRecyclerPID, SIGHUP);
}
+ if (g_instance.pid_cxt.exrto_recycler_pid != 0) {
+ signal_child(g_instance.pid_cxt.exrto_recycler_pid, SIGHUP);
+ }
+
if (g_instance.pid_cxt.GlobalStatsPID != 0) {
signal_child(g_instance.pid_cxt.GlobalStatsPID, SIGHUP);
}
@@ -6959,7 +6964,11 @@ static void reaper(SIGNAL_ARGS)
if (g_instance.pid_cxt.CBMWriterPID == 0 && !dummyStandbyMode &&
u_sess->attr.attr_storage.enable_cbm_tracking)
- g_instance.pid_cxt.CBMWriterPID = initialize_util_thread(CBMWRITER);
+
+
+ if (IS_EXRTO_READ && g_instance.pid_cxt.exrto_recycler_pid == 0) {
+ g_instance.pid_cxt.exrto_recycler_pid = initialize_util_thread(EXRTO_RECYCLER);
+ }
/*
* Likewise, start other special children as needed. In a restart
@@ -7746,6 +7755,15 @@ static void reaper(SIGNAL_ARGS)
continue;
}
+ if (pid == g_instance.pid_cxt.exrto_recycler_pid) {
+ g_instance.pid_cxt.exrto_recycler_pid = 0;
+
+ if (!EXIT_STATUS_0(exitstatus)) {
+ HandleChildCrash(pid, exitstatus, _("Exrto recycle process"));
+ }
+ continue;
+ }
+
if (get_real_recovery_parallelism() > 1) {
PageRedoExitStatus pageredoStatus = CheckExitPageWorkers(pid);
if (pageredoStatus == PAGE_REDO_THREAD_EXIT_NORMAL) {
@@ -8328,6 +8346,7 @@ static void AsssertAllChildThreadExit()
Assert(g_instance.pid_cxt.CommPoolerCleanPID == 0);
Assert(g_instance.pid_cxt.UndoLauncherPID == 0);
Assert(g_instance.pid_cxt.UndoRecyclerPID == 0);
+ Assert(g_instance.pid_cxt.exrto_recycler_pid == 0);
#ifndef ENABLE_MULTIPLE_NODES
Assert(g_instance.pid_cxt.ApplyLauncerPID == 0);
#endif
@@ -8401,7 +8420,7 @@ static void PostmasterStateMachine(void)
#endif /* ENABLE_MULTIPLE_NODES */
g_instance.pid_cxt.UndoLauncherPID == 0 && g_instance.pid_cxt.UndoRecyclerPID == 0 &&
- g_instance.pid_cxt.GlobalStatsPID == 0 &&
+ g_instance.pid_cxt.exrto_recycler_pid == 0 && g_instance.pid_cxt.GlobalStatsPID == 0 &&
#ifndef ENABLE_MULTIPLE_NODES
g_instance.pid_cxt.ApplyLauncerPID == 0 &&
#endif
@@ -8619,6 +8638,7 @@ static void PostmasterStateMachine(void)
hashmdata = t_thrd.postmaster_cxt.HaShmData;
hashmdata->current_mode = cur_mode;
NotifyGscHotStandby();
+ exrto_standby_read_init();
g_instance.pid_cxt.StartupPID = initialize_util_thread(STARTUP);
Assert(g_instance.pid_cxt.StartupPID != 0);
pmState = PM_STARTUP;
@@ -8662,6 +8682,7 @@ static void PostmasterStateMachine(void)
PMUpdateDBState(STARTING_STATE, get_cur_mode(), get_cur_repl_num());
}
+ exrto_standby_read_init();
g_instance.pid_cxt.StartupPID = initialize_util_thread(STARTUP);
Assert(g_instance.pid_cxt.StartupPID != 0);
pmState = PM_STARTUP;
@@ -13197,6 +13218,21 @@ bool PMstateIsRun(void)
return PM_RUN == pmState;
}
+bool pm_state_is_startup()
+{
+ return (pmState == PM_STARTUP);
+}
+
+bool pm_state_is_recovery()
+{
+ return (pmState == PM_RECOVERY);
+}
+
+bool pm_state_is_hot_standby()
+{
+ return (pmState == PM_HOT_STANDBY);
+}
+
/* malloc api of cJSON at backend side */
static void* cJSON_internal_malloc(size_t size)
{
@@ -13296,6 +13332,9 @@ static void SetAuxType()
case SHARE_STORAGE_XLOG_COPYER:
t_thrd.bootstrap_cxt.MyAuxProcType = XlogCopyBackendProcess;
break;
+ case EXRTO_RECYCLER:
+ t_thrd.bootstrap_cxt.MyAuxProcType = ExrtoRecyclerProcess;
+ break;
#ifdef ENABLE_MULTIPLE_NODES
case BARRIER_PREPARSE:
t_thrd.bootstrap_cxt.MyAuxProcType = BarrierPreParseBackendProcess;
@@ -13589,6 +13628,10 @@ int GaussDbAuxiliaryThreadMain(knl_thread_arg* arg)
SharedStorageXlogCopyBackendMain();
proc_exit(1);
break;
+ case EXRTO_RECYCLER:
+ extreme_rto::exrto_recycle_main();
+ proc_exit(1);
+ break;
#ifdef ENABLE_MULTIPLE_NODES
case BARRIER_PREPARSE:
BarrierPreParseMain();
@@ -13846,6 +13889,7 @@ int GaussDbThreadMain(knl_thread_arg* arg)
case PAGEREPAIR_THREAD:
case HEARTBEAT:
case SHARE_STORAGE_XLOG_COPYER:
+ case EXRTO_RECYCLER:
#ifdef ENABLE_MULTIPLE_NODES
case BARRIER_PREPARSE:
case TS_COMPACTION:
@@ -14399,6 +14443,7 @@ static ThreadMetaData GaussdbThreadGate[] = {
{ GaussDbThreadMain, APPLY_WORKER, "applyworker", "apply worker" },
{ GaussDbThreadMain, STACK_PERF_WORKER, "stack_perf", "stack perf worker" },
{ GaussDbThreadMain, DMS_AUXILIARY_THREAD, "dms_auxiliary", "maintenance xmin in dms" },
+ { GaussDbThreadMain, EXRTO_RECYCLER, "exrtorecycler", "exrto recycler" },
/* Keep the block in the end if it may be absent !!! */
#ifdef ENABLE_MULTIPLE_NODES
diff --git a/src/gausskernel/process/threadpool/knl_instance.cpp b/src/gausskernel/process/threadpool/knl_instance.cpp
index 216f6f21d843baf7d149a5e1a8e49a0b4d4f4ef6..92ef81cb28b71e75e5328ed2bb005172163e5202 100755
--- a/src/gausskernel/process/threadpool/knl_instance.cpp
+++ b/src/gausskernel/process/threadpool/knl_instance.cpp
@@ -324,7 +324,7 @@ static void knl_g_parallel_redo_init(knl_g_parallel_redo_context* predo_cxt)
rc = memset_s(&predo_cxt->redoCpuBindcontrl, sizeof(RedoCpuBindControl), 0, sizeof(RedoCpuBindControl));
securec_check(rc, "", "");
-
+ predo_cxt->global_recycle_lsn = InvalidXLogRecPtr;
predo_cxt->redoItemHash = NULL;
}
@@ -501,6 +501,7 @@ static void KnlGUndoInit(knl_g_undo_context *undoCxt)
undoCxt->undoChainTotalSize = 0;
undoCxt->globalFrozenXid = InvalidTransactionId;
undoCxt->globalRecycleXid = InvalidTransactionId;
+ undoCxt->is_exrto_residual_undo_file_recycled = false;
}
static void knl_g_flashback_init(knl_g_flashback_context *flashbackCxt)
diff --git a/src/gausskernel/process/threadpool/knl_thread.cpp b/src/gausskernel/process/threadpool/knl_thread.cpp
index c321ac6e10262978510439ae999c424de1f32888..3c26794b790f118bdbf55a6ff30c5ed3611e9dc7 100755
--- a/src/gausskernel/process/threadpool/knl_thread.cpp
+++ b/src/gausskernel/process/threadpool/knl_thread.cpp
@@ -960,6 +960,15 @@ static void knl_t_page_redo_init(knl_t_page_redo_context* page_redo_cxt)
page_redo_cxt->got_SIGHUP = false;
page_redo_cxt->sleep_long = false;
page_redo_cxt->check_repair = false;
+ page_redo_cxt->redo_worker_ptr = NULL;
+}
+
+static void knl_t_exrto_recycle_init(knl_t_exrto_recycle_context* exrto_recycle_cxt)
+{
+ exrto_recycle_cxt->shutdown_requested = false;
+ exrto_recycle_cxt->got_SIGHUP = false;
+ exrto_recycle_cxt->lsn_info.lsn_num = 0;
+ exrto_recycle_cxt->lsn_info.lsn_array = NULL;
}
static void knl_t_parallel_decode_init(knl_t_parallel_decode_worker_context* parallel_decode_cxt)
@@ -1315,7 +1324,9 @@ static void knl_t_storage_init(knl_t_storage_context* storage_cxt)
storage_cxt->BackendWritebackContext = (WritebackContext*)palloc0(sizeof(WritebackContext));
storage_cxt->SharedBufHash = NULL;
storage_cxt->InProgressBuf = NULL;
+ storage_cxt->ParentInProgressBuf = NULL;
storage_cxt->IsForInput = false;
+ storage_cxt->ParentIsForInput = false;
storage_cxt->PinCountWaitBuf = NULL;
storage_cxt->InProgressAioDispatch = NULL;
storage_cxt->InProgressAioDispatchCount = 0;
@@ -1887,6 +1898,7 @@ void knl_thread_init(knl_thread_role role)
knl_t_pencentile_init(&t_thrd.percentile_cxt);
knl_t_perf_snap_init(&t_thrd.perf_snap_cxt);
knl_t_page_redo_init(&t_thrd.page_redo_cxt);
+ knl_t_exrto_recycle_init(&t_thrd.exrto_recycle_cxt);
knl_t_parallel_decode_init(&t_thrd.parallel_decode_cxt);
knl_t_parallel_decode_reader_init(&t_thrd.logicalreadworker_cxt);
knl_t_heartbeat_init(&t_thrd.heartbeat_cxt);
@@ -1952,6 +1964,7 @@ void RedoInterruptCallBack()
Assert(!AmStartupProcess());
Assert(!AmPageRedoWorker());
+ Assert(!AmErosRecyclerProcess());
}
void RedoPageRepairCallBack(RepairBlockKey key, XLogPhyBlock pblk)
diff --git a/src/gausskernel/storage/access/redo/CMakeLists.txt b/src/gausskernel/storage/access/redo/CMakeLists.txt
index 16abb90f353806000e563b281f93d811b2918592..22cecf9f2485bc1fb6da4a82189c6e231195bd54 100755
--- a/src/gausskernel/storage/access/redo/CMakeLists.txt
+++ b/src/gausskernel/storage/access/redo/CMakeLists.txt
@@ -1,4 +1,5 @@
#This is the main CMAKE for build bin.
+add_subdirectory(standby_read)
AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} TGT_redo_SRC)
set(TGT_redo_INC
diff --git a/src/gausskernel/storage/access/redo/Makefile b/src/gausskernel/storage/access/redo/Makefile
index 9ec819f8bab42a23b2ce8a2b05972c1175e8ec2a..07f1f8ca6e50158f19a5f3dfef759bf7d945748a 100644
--- a/src/gausskernel/storage/access/redo/Makefile
+++ b/src/gausskernel/storage/access/redo/Makefile
@@ -22,6 +22,7 @@
#-------------------------------------------------------------------------
subdir = src/gausskernel/storage/access/redo
+SUBDIRS = standby_read
top_builddir = ../../../../..
include $(top_builddir)/src/Makefile.global
diff --git a/src/gausskernel/storage/access/redo/redo_dbcommands.cpp b/src/gausskernel/storage/access/redo/redo_dbcommands.cpp
index bc8314f42741370da2bfa121cad872b237c68770..4ab0a3ac41abed1164fab1140b43c753d43c0786 100644
--- a/src/gausskernel/storage/access/redo/redo_dbcommands.cpp
+++ b/src/gausskernel/storage/access/redo/redo_dbcommands.cpp
@@ -86,6 +86,7 @@ XLogRecParseState *DbaseRedoParseToBlock(XLogReaderState *record, uint32 *blockn
if ((info == XLOG_DBASE_CREATE) || (info == XLOG_DBASE_DROP)) {
recordstatehead = DatabaseXlogCommonParseToBlock(record, blocknum);
+ recordstatehead->isFullSync = record->isFullSync;
} else {
ereport(PANIC,
(errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE), errmsg("DbaseRedoParseToBlock: unknown op code %u", info)));
diff --git a/src/gausskernel/storage/access/redo/redo_storage.cpp b/src/gausskernel/storage/access/redo/redo_storage.cpp
index 35aa13e0aabf9717e03568ba924cd831d3fc4dfa..73a25fbeceb99e5b04937c2479913db29d5f1de9 100644
--- a/src/gausskernel/storage/access/redo/redo_storage.cpp
+++ b/src/gausskernel/storage/access/redo/redo_storage.cpp
@@ -68,7 +68,7 @@ XLogRecParseState *smgr_xlog_relnode_parse_to_block(XLogReaderState *record, uin
XLogRecSetBlockCommonState(record, BLOCK_DATA_DDL_TYPE, filenode, recordstatehead);
XLogRecSetBlockDdlState(&(recordstatehead->blockparse.extra_rec.blockddlrec), ddltype,
- (char *)XLogRecGetData(record), 1, compress);
+ (char *)XLogRecGetData(record), 1, compress, XLogRecGetDataLen(record));
return recordstatehead;
}
diff --git a/src/gausskernel/storage/access/redo/redo_visibilitymap.cpp b/src/gausskernel/storage/access/redo/redo_visibilitymap.cpp
index 0f3c428833f221e4e3d0fb5a6f34cb0f0b5a4985..753b99d7ba4e98e9776de180cdc17346381c1d7d 100644
--- a/src/gausskernel/storage/access/redo/redo_visibilitymap.cpp
+++ b/src/gausskernel/storage/access/redo/redo_visibilitymap.cpp
@@ -49,6 +49,7 @@ bool visibilitymap_clear_page(Page mapPage, BlockNumber heapBlk)
void visibilitymap_clear_buffer(RedoBufferInfo *bufferInfo, BlockNumber heapBlk)
{
if (visibilitymap_clear_page(bufferInfo->pageinfo.page, heapBlk)) {
+ PageSetLSN(bufferInfo->pageinfo.page, bufferInfo->lsn, false);
MakeRedoBufferDirty(bufferInfo);
}
}
diff --git a/src/gausskernel/storage/access/redo/redo_xlogutils.cpp b/src/gausskernel/storage/access/redo/redo_xlogutils.cpp
index 0fb813d5b503a3296c0ef27ceb938b245907640d..7bfcbaf22d5337b1a7ce539f999fcf4701356104 100644
--- a/src/gausskernel/storage/access/redo/redo_xlogutils.cpp
+++ b/src/gausskernel/storage/access/redo/redo_xlogutils.cpp
@@ -50,6 +50,9 @@
#include "access/ustore/knl_uextremeredo.h"
#include "commands/dbcommands.h"
+#include "access/extreme_rto/standby_read/block_info_meta.h"
+#include "access/extreme_rto/batch_redo.h"
+#include "access/extreme_rto/page_redo.h"
#include "access/twophase.h"
#include "access/redo_common.h"
#include "ddes/dms/ss_dms_bufmgr.h"
@@ -805,11 +808,12 @@ void XLogUpdateCopyedBlockState(XLogRecParseState *recordblockstate, XLogBlockPa
}
void XLogRecSetBlockDdlState(XLogBlockDdlParse *blockddlstate, uint32 blockddltype, char *mainData,
- int rels, bool compress)
+ int rels, bool compress, uint32 mainDataLen)
{
Assert(blockddlstate != NULL);
blockddlstate->blockddltype = blockddltype;
blockddlstate->rels = rels;
+ blockddlstate->mainDataLen = mainDataLen;
blockddlstate->mainData = mainData;
blockddlstate->compress = compress;
}
@@ -1495,9 +1499,14 @@ void XLogBlockDdlDoSmgrAction(XLogBlockHead *blockhead, void *blockrecbody, Redo
case BLOCK_DDL_CREATE_RELNODE:
smgr_redo_create(rnode, blockhead->forknum, blockddlrec->mainData);
break;
- case BLOCK_DDL_TRUNCATE_RELNODE:
- xlog_block_smgr_redo_truncate(rnode, blockhead->blkno, blockhead->end_ptr);
+ case BLOCK_DDL_TRUNCATE_RELNODE: {
+ TransactionId latest_removed_xid = InvalidTransactionId;
+ if (blockddlrec->mainDataLen == TRUNCATE_CONTAIN_XID_SIZE) {
+ latest_removed_xid = ((xl_smgr_truncate_compress*)blockddlrec->mainData)->latest_removed_xid;
+ }
+ xlog_block_smgr_redo_truncate(rnode, blockhead->blkno, blockhead->end_ptr, latest_removed_xid);
break;
+ }
case BLOCK_DDL_DROP_RELNODE: {
bool compress = blockddlrec->compress;
ColFileNodeRel *xnodes = (ColFileNodeRel *)blockddlrec->mainData;
@@ -1748,26 +1757,38 @@ bool XLogBlockRedoForExtremeRTO(XLogRecParseState *redoblocktate, RedoBufferInfo
}
bool checkvalid = XLogBlockRefreshRedoBufferInfo(blockhead, bufferinfo);
- if (!checkvalid) {
+ if (unlikely(!checkvalid)) {
ereport(PANIC, (errmsg("XLogBlockRedoForExtremeRTO: redobuffer checkfailed")));
}
- if (block_valid <= BLOCK_DATA_FSM_TYPE) {
- if (redoaction != BLK_DONE) {
- GetRedoStartTime(redoCost);
- Assert(block_valid == g_xlogExtRtoRedoTable[block_valid].block_valid);
- g_xlogExtRtoRedoTable[block_valid].xlog_redoextrto(blockhead, blockrecbody, bufferinfo);
- CountRedoTime(redoCost);
- }
-#ifdef USE_ASSERT_CHECKING
- if (block_valid != BLOCK_DATA_UNDO_TYPE && !bufferinfo->pageinfo.ignorecheck) {
- DoRecordCheck(redoblocktate, PageGetLSN(bufferinfo->pageinfo.page), true);
- }
-#endif
- AddReadBlock(redoblocktate, (u_sess->instr_cxt.pg_buffer_usage->shared_blks_read - readcount));
- } else {
+
+ if (unlikely(block_valid > BLOCK_DATA_FSM_TYPE)) {
ereport(WARNING, (errmsg("XLogBlockRedoForExtremeRTO: unsuport type %u, lsn %X/%X", (uint32)block_valid,
(uint32)(blockhead->end_ptr >> 32), (uint32)(blockhead->end_ptr))));
+ return false;
+ }
+
+ if ((block_valid != BLOCK_DATA_UNDO_TYPE) && g_instance.attr.attr_storage.EnableHotStandby &&
+ XLByteLT(PageGetLSN(bufferinfo->pageinfo.page), blockhead->end_ptr)) {
+ BufferTag buf_tag;
+ INIT_BUFFERTAG(buf_tag, bufferinfo->blockinfo.rnode,
+ bufferinfo->blockinfo.forknum, bufferinfo->blockinfo.blkno);
+ extreme_rto_standby_read::insert_lsn_to_block_info(&extreme_rto::g_redoWorker->standby_read_meta_info, buf_tag,
+ bufferinfo->pageinfo.page, blockhead->start_ptr);
+ }
+
+ if (redoaction != BLK_DONE) {
+ GetRedoStartTime(redoCost);
+ Assert(block_valid == g_xlogExtRtoRedoTable[block_valid].block_valid);
+ g_xlogExtRtoRedoTable[block_valid].xlog_redoextrto(blockhead, blockrecbody, bufferinfo);
+ CountRedoTime(redoCost);
+ }
+#ifdef USE_ASSERT_CHECKING
+ if (block_valid != BLOCK_DATA_UNDO_TYPE && !bufferinfo->pageinfo.ignorecheck) {
+ DoRecordCheck(redoblocktate, PageGetLSN(bufferinfo->pageinfo.page), true);
}
+#endif
+ AddReadBlock(redoblocktate, (u_sess->instr_cxt.pg_buffer_usage->shared_blks_read - readcount));
+
return false;
}
@@ -1865,6 +1886,119 @@ void XLogBlockDispatchForExtermeRTO(XLogRecParseState *recordblockstate)
} while (nextstate != NULL);
}
+bool find_target_state(XLogRecParseState *state_iter, const RedoBufferTag &target_tag)
+{
+ RelFileNode n;
+ uint32 blk;
+ ForkNumber fork;
+ extreme_rto::PRXLogRecGetBlockTag(state_iter, &n, &blk, &fork);
+ if (RelFileNodeEquals(n, target_tag.rnode) && target_tag.blkno == blk && target_tag.forknum == fork) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+void wal_block_redo_for_extreme_rto_read(XLogRecParseState *state, RedoBufferInfo *buf_info)
+{
+ uint16 block_valid;
+ void *block_rec_body;
+ XLogBlockHead *block_head;
+ const int shift_size = 32;
+
+ /* decode blockdata body */
+ block_head = &state->blockparse.blockhead;
+ block_rec_body = &state->blockparse.extra_rec;
+ block_valid = XLogBlockHeadGetValidInfo(block_head);
+
+ bool check_valid = XLogBlockRefreshRedoBufferInfo(block_head, buf_info);
+ if (!check_valid) {
+ ereport(ERROR, (errmsg("wal_block_redo_for_extreme_rto: redobuffer checkfailed")));
+ }
+ if (block_valid <= BLOCK_DATA_FSM_TYPE) {
+ Assert(block_valid == g_xlogExtRtoRedoTable[block_valid].block_valid);
+ g_xlogExtRtoRedoTable[block_valid].xlog_redoextrto(block_head, block_rec_body, buf_info);
+ } else {
+ ereport(ERROR, (errmsg("wal_block_redo_for_extreme_rto: unsuport type %u, lsn %X/%X", (uint32)block_valid,
+ (uint32)(block_head->end_ptr >> shift_size), (uint32)(block_head->end_ptr))));
+ }
+}
+
+void init_redo_buffer_info(RedoBufferInfo *rb_info, const BufferTag &buf_tag, Buffer buf)
+{
+ rb_info->lsn = InvalidXLogRecPtr;
+ rb_info->buf = buf;
+ rb_info->blockinfo.rnode = buf_tag.rnode;
+ rb_info->blockinfo.forknum = buf_tag.forkNum;
+ rb_info->blockinfo.blkno = buf_tag.blockNum;
+ rb_info->blockinfo.pblk.block = InvalidBlockNumber;
+ rb_info->blockinfo.pblk.lsn = InvalidXLogRecPtr;
+ rb_info->blockinfo.pblk.relNode = InvalidOid;
+ rb_info->pageinfo.page = BufferGetPage(buf);
+ rb_info->pageinfo.pagesize = BufferGetPageSize(buf);
+#ifdef USE_ASSERT_CHECKING
+ rb_info->pageinfo.ignorecheck = false; /* initial value */
+#endif
+ rb_info->dirtyflag = false; /* initial value, actually, dirtyflag is useless in extreme RTO read */
+}
+
+void redo_target_page(const BufferTag &buf_tag, StandbyReadLsnInfoArray *lsn_info, Buffer base_page_buf)
+{
+ char *error_msg = NULL;
+ RedoParseManager redo_pm;
+
+ XLogReaderState *xlog_reader = XLogReaderAllocate(&read_local_xlog_page, NULL);
+ /* do we need register interrupt func here? like ProcessConfigFile */
+ XLogParseBufferInitFunc(&redo_pm, MAX_BUFFER_NUM_PER_WAL_RECORD, NULL, NULL);
+ if (xlog_reader == NULL) {
+ ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"),
+ errdetail("Failed while allocating an XLog reading processor.")));
+ }
+
+ RedoBufferInfo buf_info;
+ init_redo_buffer_info(&buf_info, buf_tag, base_page_buf);
+ for (uint32 i = 0; i < lsn_info->lsn_num; i++) {
+ XLogRecord *record = XLogReadRecord(xlog_reader, lsn_info->lsn_array[i], &error_msg);
+ if (record == NULL) {
+ ereport(ERROR, (errcode_for_file_access(),
+ errmsg("could not read two-phase state from xlog at %X/%X, errormsg: %s",
+ (uint32)(lsn_info->lsn_array[i] >> LSN_MOVE32), (uint32)(lsn_info->lsn_array[i]),
+ error_msg ? error_msg : " ")));
+ }
+
+ uint32 num = 0;
+ XLogRecParseState *state = XLogParseToBlockCommonFunc(xlog_reader, &num);
+
+ if (num == 0) {
+ ereport(ERROR, (errmsg("internal error, xlog in lsn %X/%X doesn't contain any block.",
+ (uint32)(lsn_info->lsn_array[i] >> LSN_MOVE32), (uint32)(lsn_info->lsn_array[i]))));
+ }
+
+ if (state == NULL) {
+ ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"),
+ errdetail("Failed while wal parse to block.")));
+ }
+ XLogRecParseState *state_iter = state;
+ while (state_iter != NULL) {
+ if (find_target_state(state_iter, buf_info.blockinfo)) {
+ break;
+ }
+ state_iter = (XLogRecParseState *)(state_iter->nextrecord);
+ }
+ if (state_iter == NULL) {
+ ereport(ERROR, (errmsg("internal error, xlog in lsn %X/%X doesn't contain target block.",
+ (uint32)(lsn_info->lsn_array[i] >> LSN_MOVE32), (uint32)(lsn_info->lsn_array[i]))));
+ }
+ buf_info.lsn = state_iter->blockparse.blockhead.end_ptr;
+ buf_info.blockinfo.pblk = state_iter->blockparse.blockhead.pblk;
+ wal_block_redo_for_extreme_rto_read(state_iter, &buf_info);
+ XLogBlockParseStateRelease(state);
+ }
+
+ XLogReaderFree(xlog_reader);
+ XLogParseBufferDestoryFunc(&redo_pm);
+}
+
#ifdef EXTREME_RTO_DEBUG_AB
void DoThreadExit()
{
diff --git a/src/gausskernel/storage/access/redo/standby_read/CMakeLists.txt b/src/gausskernel/storage/access/redo/standby_read/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7f8959afaf1610c56a36744a4550818089951a54
--- /dev/null
+++ b/src/gausskernel/storage/access/redo/standby_read/CMakeLists.txt
@@ -0,0 +1,23 @@
+#This is the main CMAKE for build bin.
+
+
+AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} TGT_standby_read_SRC)
+
+set(TGT_standby_read_INC
+ ${PROJECT_SRC_DIR}/tools/log_fdw
+ ${PROJECT_TRUNK_DIR}/distribute/bin/gds
+ ${PROJECT_SRC_DIR}/include/iprange
+ ${PROJECT_SRC_DIR}/include/libcomm
+ ${PROJECT_SRC_DIR}/include
+ ${PROJECT_SRC_DIR}/lib/gstrace
+ ${LIBCGROUP_INCLUDE_PATH}
+ ${ZLIB_INCLUDE_PATH}
+ ${LIBCURL_INCLUDE_PATH}
+ ${LZ4_INCLUDE_PATH}
+ ${EVENT_INCLUDE_PATH}
+)
+
+set(standby_read_DEF_OPTIONS ${MACRO_OPTIONS})
+set(standby_read_COMPILE_OPTIONS ${OPTIMIZE_OPTIONS} ${OS_OPTIONS} ${PROTECT_OPTIONS} ${WARNING_OPTIONS} ${BIN_SECURE_OPTIONS} ${CHECK_OPTIONS})
+set(standby_read_LINK_OPTIONS ${BIN_LINK_OPTIONS})
+add_static_objtarget(gausskernel_storage_access_redo_standby_read TGT_standby_read_SRC TGT_standby_read_INC "${standby_read_DEF_OPTIONS}" "${standby_read_COMPILE_OPTIONS}" "${standby_read_LINK_OPTIONS}")
diff --git a/src/gausskernel/storage/access/redo/standby_read/Makefile b/src/gausskernel/storage/access/redo/standby_read/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..2367845295217cde9fde41c5fdab5b8189d6202c
--- /dev/null
+++ b/src/gausskernel/storage/access/redo/standby_read/Makefile
@@ -0,0 +1,37 @@
+#
+# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
+#
+# openGauss is licensed under Mulan PSL v2.
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+#
+# http://license.coscl.org.cn/MulanPSL2
+#
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+# See the Mulan PSL v2 for more details.
+# ---------------------------------------------------------------------------------------
+#
+# Makefile--
+# Makefile for access/psort
+#
+# IDENTIFICATION
+# src/backend/access/psort/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/gausskernel/storage/access/redo/standby_read
+top_builddir = ../../../../../..
+include $(top_builddir)/src/Makefile.global
+
+ifneq "$(MAKECMDGOALS)" "clean"
+ ifneq "$(MAKECMDGOALS)" "distclean"
+ ifneq "$(shell which g++ |grep hutaf_llt |wc -l)" "1"
+ -include $(DEPEND)
+ endif
+ endif
+endif
+OBJS = base_page_proc.o block_info_proc.o lsn_info_double_list.o lsn_info_proc.o standby_read_interface.o
+
+include $(top_srcdir)/src/gausskernel/common.mk
diff --git a/src/gausskernel/storage/access/redo/standby_read/base_page_proc.cpp b/src/gausskernel/storage/access/redo/standby_read/base_page_proc.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..51445710641f2cd59fa9cc5c2c42f935b532abf8
--- /dev/null
+++ b/src/gausskernel/storage/access/redo/standby_read/base_page_proc.cpp
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2020 Huawei Technologies Co.,Ltd.
+ *
+ * openGauss is licensed under Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *
+ * http://license.coscl.org.cn/MulanPSL2
+ *
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * -------------------------------------------------------------------------
+ *
+ * base_page_proc.cpp
+ *
+ * IDENTIFICATION
+ * src/gausskernel/storage/access/redo/standby_read/base_page_proc.cpp
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+#include "access/extreme_rto/batch_redo.h"
+#include "access/extreme_rto/dispatcher.h"
+#include "access/extreme_rto/page_redo.h"
+#include "access/extreme_rto/standby_read/standby_read_base.h"
+#include "storage/buf/buf_internals.h"
+
+namespace extreme_rto_standby_read {
+
+inline RelFileNode make_base_page_relfilenode(uint32 batch_id, uint32 redo_worker_id, BasePagePosition position)
+{
+ RelFileNode rnode;
+ rnode.spcNode = EXRTO_BASE_PAGE_SPACE_OID;
+ rnode.dbNode = (batch_id << LOW_WORKERID_BITS) | redo_worker_id;
+ rnode.relNode = (uint32)((position / BLCKSZ) >> UINT64_HALF);
+ rnode.bucketNode = InvalidBktId;
+ rnode.opt = DefaultFileNodeOpt;
+
+ return rnode;
+}
+
+Buffer buffer_read_base_page(uint32 batch_id, uint32 redo_id, BasePagePosition position, ReadBufferMode mode)
+{
+ RelFileNode rnode = make_base_page_relfilenode(batch_id, redo_id, position);
+ BlockNumber blocknum = (BlockNumber)(position / BLCKSZ);
+ bool hit = false;
+ SMgrRelation smgr = smgropen(rnode, InvalidBackendId);
+ Buffer buffer =
+ ReadBuffer_common(smgr, RELPERSISTENCE_PERMANENT, MAIN_FORKNUM, blocknum, mode, NULL, &hit, NULL);
+ if (buffer == InvalidBuffer) {
+ ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR),
+ (errmsg("invalid buffer when read base page, batch_id: %u, redo_worker_id: %u, blocknum: %lu",
+ batch_id, redo_id, position / BLCKSZ))));
+ }
+
+ return buffer;
+}
+
+void generate_base_page(StandbyReadMetaInfo* meta_info, const Page src_page)
+{
+ BasePagePosition position = meta_info->base_page_next_position;
+
+ Buffer dest_buf = buffer_read_base_page(meta_info->batch_id, meta_info->redo_id, position, RBM_ZERO_AND_LOCK);
+
+ Page dest_page = BufferGetPage(dest_buf);
+ errno_t rc = memcpy_s(dest_page, BLCKSZ, src_page, BLCKSZ);
+ securec_check(rc, "\0", "\0");
+ MarkBufferDirty(dest_buf);
+ UnlockReleaseBuffer(dest_buf);
+
+ meta_info->base_page_next_position += BLCKSZ;
+}
+
+void read_base_page(const BufferTag& buf_tag, BasePagePosition position, BufferDesc* dest_buf_desc)
+{
+ extreme_rto::RedoItemTag redo_item_tag;
+ const uint32 worker_num_per_mng = extreme_rto::get_page_redo_worker_num_per_manager();
+
+ /* batch id and worker id start from 1 when reading a page */
+ uint32 batch_id = extreme_rto::GetSlotId(buf_tag.rnode, 0, 0, extreme_rto::GetBatchCount()) + 1;
+ INIT_REDO_ITEM_TAG(redo_item_tag, buf_tag.rnode, buf_tag.forkNum, buf_tag.blockNum);
+ uint32 redo_worker_id = extreme_rto::GetWorkerId(&redo_item_tag, worker_num_per_mng) + 1;
+
+ Buffer buffer = buffer_read_base_page(batch_id, redo_worker_id, position, RBM_NORMAL);
+
+ LockBuffer(buffer, BUFFER_LOCK_SHARE);
+ Page src_page = BufferGetPage(buffer);
+ Size page_size = BufferGetPageSize(buffer);
+ Page dest_page = (Page)BufHdrGetBlock(dest_buf_desc);
+ errno_t rc = memcpy_s(dest_page, page_size, src_page, page_size);
+ securec_check(rc, "\0", "\0");
+ UnlockReleaseBuffer(buffer);
+}
+
+void recycle_base_page_file(uint32 batch_id, uint32 redo_id, BasePagePosition recycle_pos)
+{
+ RelFileNode rnode = make_base_page_relfilenode(batch_id, redo_id, recycle_pos);
+ SMgrRelation smgr = smgropen(rnode, InvalidBackendId);
+
+ smgrdounlink(smgr, true, (BlockNumber)(recycle_pos / BLCKSZ));
+}
+
+} // namespace extreme_rto_standby_read
+
diff --git a/src/gausskernel/storage/access/redo/standby_read/block_info_proc.cpp b/src/gausskernel/storage/access/redo/standby_read/block_info_proc.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..356a6687b28c97a1b2af607c9af66d929f3a1855
--- /dev/null
+++ b/src/gausskernel/storage/access/redo/standby_read/block_info_proc.cpp
@@ -0,0 +1,299 @@
+/*
+ * Copyright (c) 2020 Huawei Technologies Co.,Ltd.
+ *
+ * openGauss is licensed under Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *
+ * http://license.coscl.org.cn/MulanPSL2
+ *
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * -------------------------------------------------------------------------
+ *
+ * block_info_proc.cpp
+ *
+ * IDENTIFICATION
+ * src/gausskernel/storage/recovery/parallel/blocklevel/standby_read/block_info_proc.cpp
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include
+#include "access/extreme_rto/standby_read/block_info_meta.h"
+#include "access/extreme_rto/standby_read/lsn_info_meta.h"
+#include "storage/smgr/relfilenode.h"
+
+namespace extreme_rto_standby_read {
+
+void block_info_page_init(Page page)
+{
+ static_assert(sizeof(BlockInfoPageHeader) == BLOCK_INFO_HEAD_SIZE, "BlockInfoPageHeader size is not 64 bytes");
+ static_assert(sizeof(BlockMetaInfo) == BLOCK_INFO_SIZE, "BlockMetaInfo size is not 64 bytes");
+
+ BlockInfoPageHeader* page_header = (BlockInfoPageHeader*)page;
+ errno_t ret = memset_s(page_header, BLCKSZ, 0, BLCKSZ);
+ securec_check(ret, "", "");
+ page_header->flags |= BLOCK_INFO_PAGE_VALID_FLAG;
+ page_header->version = BLOCK_INFO_PAGE_VERSION;
+}
+
+inline BlockNumber data_block_number_to_meta_page_number(BlockNumber block_num)
+{
+ return block_num / BLOCK_INFO_NUM_PER_PAGE;
+}
+
+inline uint32 block_info_meta_page_offset(BlockNumber block_num)
+{
+ return (block_num % BLOCK_INFO_NUM_PER_PAGE) * BLOCK_INFO_SIZE + BLOCK_INFO_HEAD_SIZE;
+}
+
+// get page, just have pin, no lock
+BlockMetaInfo* get_block_meta_info_by_relfilenode(
+ const BufferTag& buf_tag, BufferAccessStrategy strategy, ReadBufferMode mode, Buffer* buffer)
+{
+ RelFileNode standby_read_rnode = buf_tag.rnode;
+ standby_read_rnode.spcNode = EXRTO_BLOCK_INFO_SPACE_OID;
+ SMgrRelation smgr = smgropen(standby_read_rnode, InvalidBackendId);
+ bool hit = false;
+
+ BlockNumber meta_block_num = data_block_number_to_meta_page_number(buf_tag.blockNum);
+ *buffer = ReadBuffer_common(smgr, 0, buf_tag.forkNum, meta_block_num, mode, strategy, &hit, NULL);
+
+ if (*buffer == InvalidBuffer) {
+ return NULL;
+ }
+
+ Page page = BufferGetPage(*buffer);
+ if (!is_block_info_page_valid((BlockInfoPageHeader*)page)) {
+ if (mode == RBM_NORMAL) {
+ ReleaseBuffer(*buffer);
+ return NULL;
+ }
+ }
+
+ uint32 offset = block_info_meta_page_offset(buf_tag.blockNum);
+ BlockMetaInfo *block_info = ((BlockMetaInfo*)(page + offset));
+ if (!is_block_meta_info_valid(block_info) && mode == RBM_NORMAL) {
+ ReleaseBuffer(*buffer);
+
+ return NULL;
+ }
+
+ return block_info;
+}
+
+void init_block_info(BlockMetaInfo* block_info, XLogRecPtr max_lsn)
+{
+ errno_t ret = memset_s(block_info, BLOCK_INFO_SIZE, 0, BLOCK_INFO_SIZE);
+ securec_check(ret, "", "");
+ block_info->timeline = t_thrd.shemem_ptr_cxt.ControlFile->timeline;
+ block_info->flags |= BLOCK_INFO_NODE_VALID_FLAG;
+ lsn_info_list_init(&block_info->lsn_info_list);
+ lsn_info_list_init(&block_info->base_page_info_list);
+ block_info->max_lsn = max_lsn; // just for update first base page info' lsn
+ block_info->min_lsn = max_lsn;
+}
+
+void insert_lsn_to_block_info(
+ StandbyReadMetaInfo* meta_info, const BufferTag& buf_tag, const Page base_page, XLogRecPtr next_lsn)
+{
+ Buffer block_info_buf = InvalidBuffer;
+ BlockMetaInfo* block_info = get_block_meta_info_by_relfilenode(buf_tag, NULL, RBM_ZERO_ON_ERROR, &block_info_buf);
+ if (unlikely(block_info == NULL || block_info_buf == InvalidBuffer)) {
+ ereport(PANIC, (errmsg("insert lsn failed,block invalid %u/%u/%u %d %u", buf_tag.rnode.spcNode,
+ buf_tag.rnode.dbNode, buf_tag.rnode.relNode, buf_tag.forkNum, buf_tag.blockNum)));
+ }
+ LockBuffer(block_info_buf, BUFFER_LOCK_EXCLUSIVE);
+ Page page = BufferGetPage(block_info_buf);
+ XLogRecPtr current_page_lsn = PageGetLSN(base_page);
+ if (!is_block_meta_info_valid(block_info)) {
+ if (!is_block_info_page_valid((BlockInfoPageHeader*)page)) {
+ block_info_page_init(page);
+ }
+
+ init_block_info(block_info, current_page_lsn);
+ }
+
+ if (block_info->record_num == 0 ||
+ (block_info->record_num % (uint32)g_instance.attr.attr_storage.base_page_saved_interval) == 0) {
+ insert_base_page_to_lsn_info(meta_info, &block_info->lsn_info_list, &block_info->base_page_info_list, buf_tag,
+ base_page, current_page_lsn, next_lsn);
+ } else {
+ insert_lsn_to_lsn_info(meta_info, &block_info->lsn_info_list, next_lsn);
+ }
+
+ Assert(block_info->max_lsn <= next_lsn);
+ block_info->max_lsn = next_lsn;
+
+ ++(block_info->record_num);
+
+ standby_read_meta_page_set_lsn(page, next_lsn);
+ MarkBufferDirty(block_info_buf);
+ UnlockReleaseBuffer(block_info_buf);
+}
+
+StandbyReadRecyleState recyle_block_info(
+ const BufferTag& buf_tag, LsnInfoPosition base_page_info_pos, XLogRecPtr next_base_page_lsn, XLogRecPtr recyle_lsn)
+{
+ Buffer buffer = InvalidBuffer;
+ BlockMetaInfo* block_meta_info = get_block_meta_info_by_relfilenode(buf_tag, NULL, RBM_NORMAL, &buffer);
+ if ((block_meta_info == NULL) || (buffer == InvalidBuffer)) {
+ // no block info, should not at this branch
+ ereport(WARNING, (errmsg("block meta is invalid %u/%u/%u %d %u", buf_tag.rnode.spcNode, buf_tag.rnode.dbNode,
+ buf_tag.rnode.relNode, buf_tag.forkNum, buf_tag.blockNum)));
+ return STANDBY_READ_RECLYE_ALL;
+ }
+ StandbyReadRecyleState stat = STANDBY_READ_RECLYE_NONE;
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+ Assert(((block_meta_info->flags & BLOCK_INFO_NODE_VALID_FLAG) == BLOCK_INFO_NODE_VALID_FLAG));
+ if (XLByteLT(block_meta_info->max_lsn, recyle_lsn)) {
+ block_meta_info->flags &= ~BLOCK_INFO_NODE_VALID_FLAG;
+ stat = STANDBY_READ_RECLYE_ALL;
+ MarkBufferDirty(buffer);
+ } else if (XLogRecPtrIsValid(next_base_page_lsn)) {
+ LsnInfoPosition min_page_info_pos = LSN_INFO_LIST_HEAD;
+ XLogRecPtr min_lsn = InvalidXLogRecPtr;
+ recycle_one_lsn_info_list(buf_tag, base_page_info_pos, recyle_lsn, &min_page_info_pos, &min_lsn);
+
+ Assert(INFO_POSITION_IS_VALID(min_page_info_pos));
+ if (block_meta_info->base_page_info_list.next != min_page_info_pos) {
+ block_meta_info->min_lsn = min_lsn;
+ block_meta_info->lsn_info_list.next = min_page_info_pos;
+ block_meta_info->base_page_info_list.next = min_page_info_pos;
+ stat = STANDBY_READ_RECLYE_UPDATE;
+ MarkBufferDirty(buffer);
+ }
+ }
+ UnlockReleaseBuffer(buffer);
+ return stat;
+}
+
+static void reset_tmp_lsn_info_array(StandbyReadLsnInfoArray* lsn_info)
+{
+ lsn_info->lsn_num = 0;
+ lsn_info->base_page_lsn = InvalidXLogRecPtr;
+ if (lsn_info->lsn_array == NULL) {
+ uint32 max_save_nums = (uint32)g_instance.attr.attr_storage.base_page_saved_interval;
+ lsn_info->lsn_array = (XLogRecPtr*)MemoryContextAlloc(
+ THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), sizeof(XLogRecPtr) * max_save_nums);
+ }
+}
+
+bool get_page_lsn_info(const BufferTag& buf_tag, BufferAccessStrategy strategy, XLogRecPtr read_lsn,
+ StandbyReadLsnInfoArray* lsn_info)
+{
+ Buffer buf;
+ BlockMetaInfo* block_meta_info = get_block_meta_info_by_relfilenode(buf_tag, strategy, RBM_NORMAL, &buf);
+ if (block_meta_info == NULL) {
+ return false;
+ }
+
+ LockBuffer(buf, BUFFER_LOCK_SHARE);
+
+ if (XLByteLT(read_lsn, block_meta_info->min_lsn)) {
+ UnlockReleaseBuffer(buf);
+ ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR),
+ (errmsg("block old version can not found %u/%u/%u %d %u read lsn %lu, min lsn %lu",
+ buf_tag.rnode.spcNode, buf_tag.rnode.dbNode, buf_tag.rnode.relNode, buf_tag.forkNum,
+ buf_tag.blockNum, read_lsn, block_meta_info->min_lsn))));
+ return false;
+ }
+
+ Assert(block_meta_info->base_page_info_list.prev != LSN_INFO_LIST_HEAD);
+ reset_tmp_lsn_info_array(lsn_info);
+ get_lsn_info_for_read(buf_tag, block_meta_info->base_page_info_list.prev, lsn_info, read_lsn);
+ UnlockReleaseBuffer(buf);
+ return true;
+}
+
+/*
+ * recycle one block info file
+ * rnode: database oid.
+ */
+void remove_one_block_info_file(const RelFileNode rnode)
+{
+ DropRelFileNodeShareBuffers(rnode, MAIN_FORKNUM, 0);
+ DropRelFileNodeShareBuffers(rnode, FSM_FORKNUM, 0);
+ DropRelFileNodeShareBuffers(rnode, VISIBILITYMAP_FORKNUM, 0);
+
+ SMgrRelation srel = smgropen(rnode, InvalidBackendId);
+ smgrdounlink(srel, true);
+ smgrclose(srel);
+}
+/*
+ * recycle all relation files when drop db occurs.
+ * db_id: database oid.
+ */
+void remove_block_meta_info_files_of_db(Oid db_oid, Oid rel_oid)
+{
+ char pathbuf[EXRTO_FILE_PATH_LEN];
+ char **filenames;
+ char **filename;
+ struct stat statbuf;
+ /* get block info file directory */
+ char exrto_block_info_dir[EXRTO_FILE_PATH_LEN] = {0};
+ int rc = snprintf_s(exrto_block_info_dir, EXRTO_FILE_PATH_LEN, EXRTO_FILE_PATH_LEN - 1, "%s/%s", EXRTO_FILE_DIR,
+ EXRTO_FILE_SUB_DIR[BLOCK_INFO_META]);
+ securec_check_ss(rc, "", "");
+ /* get all files' name from block meta file directory */
+ filenames = pgfnames(exrto_block_info_dir);
+ if (filenames == NULL) {
+ return;
+ }
+ char target_prefix[EXRTO_FILE_PATH_LEN] = {0};
+ if (rel_oid != InvalidOid) {
+ rc = sprintf_s(target_prefix, EXRTO_FILE_PATH_LEN, "%u_%u_", db_oid, rel_oid);
+ } else {
+ rc = sprintf_s(target_prefix, EXRTO_FILE_PATH_LEN, "%u_", db_oid);
+ }
+ securec_check_ss(rc, "", "");
+ /* use the prefix name to match up files we want to delete */
+ size_t prefix_len = strlen(target_prefix);
+ for (filename = filenames; *filename != NULL; filename++) {
+ char *fname = *filename;
+ size_t fname_len = strlen(fname);
+ /*
+ * the length of prefix is less than the length of file name and must be the same under the same prefix_len
+ */
+ if (prefix_len >= fname_len || strncmp(target_prefix, fname, prefix_len) != 0) {
+ continue;
+ }
+ rc =
+ snprintf_s(pathbuf, EXRTO_FILE_PATH_LEN, EXRTO_FILE_PATH_LEN - 1, "%s/%s", exrto_block_info_dir, *filename);
+ securec_check_ss(rc, "", "");
+ /* may be can be some error */
+ if (lstat(pathbuf, &statbuf) != 0) {
+ if (errno != ENOENT) {
+#ifndef FRONTEND
+ ereport(WARNING, (errmsg("could not stat file or directory \"%s\" \n", pathbuf)));
+#else
+ fprintf(stderr, _("could not stat file or directory \"%s\": %s\n"), pathbuf, gs_strerror(errno));
+#endif
+ }
+ continue;
+ }
+ /* if the file is a directory, don't touch it */
+ if (S_ISDIR(statbuf.st_mode)) {
+ /* skip dir */
+ continue;
+ }
+ /* delete this file we found */
+ if (unlink(pathbuf) != 0) {
+ if (errno != ENOENT) {
+#ifndef FRONTEND
+ ereport(WARNING, (errmsg("could not remove file or directory \"%s\" ", pathbuf)));
+#else
+ fprintf(stderr, _("could not remove file or directory \"%s\": %s\n"), pathbuf, gs_strerror(errno));
+#endif
+ }
+ }
+ }
+ pgfnames_cleanup(filenames);
+ return;
+}
+
+} // namespace extreme_rto_standby_read
+
diff --git a/src/gausskernel/storage/access/redo/standby_read/lsn_info_double_list.cpp b/src/gausskernel/storage/access/redo/standby_read/lsn_info_double_list.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..bffb2be17311df7be7f1eae26be89d94e0f97b4e
--- /dev/null
+++ b/src/gausskernel/storage/access/redo/standby_read/lsn_info_double_list.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2020 Huawei Technologies Co.,Ltd.
+ *
+ * openGauss is licensed under Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *
+ * http://license.coscl.org.cn/MulanPSL2
+ *
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * -------------------------------------------------------------------------
+ *
+ * lsn_info_double_list.cpp
+ *
+ * IDENTIFICATION
+ * src/gausskernel/storage/access/redo/standby_read/lsn_info_double_list.cpp
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include "access/extreme_rto/standby_read/lsn_info_double_list.h"
+#include "access/extreme_rto/standby_read/lsn_info_meta.h"
+
+namespace extreme_rto_standby_read {
+
+void lsn_info_list_init(LsnInfoDoubleList* node)
+{
+ node->next = LSN_INFO_LIST_HEAD;
+ node->prev = LSN_INFO_LIST_HEAD;
+}
+
+/*
+ * modify the tail of list to link new node (block meta table's page lock is held)
+ */
+void info_list_modify_old_tail(StandbyReadMetaInfo *meta_info, LsnInfoPosition old_tail_pos,
+ LsnInfoPosition insert_pos, XLogRecPtr current_page_lsn, XLogRecPtr next_lsn, bool is_lsn_info)
+{
+ Page page = NULL;
+ LsnInfo lsn_info = NULL;
+ BasePageInfo base_page_info = NULL;
+ uint32 batch_id = meta_info->batch_id;
+ uint32 worker_id = meta_info->redo_id;
+ Buffer buffer = InvalidBuffer;
+ uint32 offset;
+
+ page = get_lsn_info_page(batch_id, worker_id, old_tail_pos, RBM_ZERO_ON_ERROR, &buffer);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ offset = lsn_info_postion_to_offset(old_tail_pos);
+ Assert(offset >= LSN_INFO_HEAD_SIZE);
+ Assert(offset % LSN_INFO_NODE_SIZE == 0);
+ if (is_lsn_info) {
+ lsn_info = (LsnInfo)(page + offset);
+ Assert(lsn_info->lsn_list.next == LSN_INFO_LIST_HEAD);
+ lsn_info->lsn_list.next = insert_pos;
+ Assert(is_lsn_info_node_valid(lsn_info->flags));
+ } else {
+ base_page_info = (BasePageInfo)(page + offset);
+ Assert(base_page_info->base_page_list.next == LSN_INFO_LIST_HEAD);
+ base_page_info->base_page_list.next = insert_pos;
+ base_page_info->next_base_page_lsn = current_page_lsn;
+ Assert(is_lsn_info_node_valid(base_page_info->lsn_info_node.flags));
+ Assert(XLByteLT(base_page_info->cur_page_lsn, current_page_lsn));
+ }
+
+ standby_read_meta_page_set_lsn(page, next_lsn);
+ MarkBufferDirty(buffer);
+ UnlockReleaseBuffer(buffer);
+}
+
+} // namespace extreme_rto_standby_read
diff --git a/src/gausskernel/storage/access/redo/standby_read/lsn_info_proc.cpp b/src/gausskernel/storage/access/redo/standby_read/lsn_info_proc.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..765538955435e903806d64fdd4053081419442ed
--- /dev/null
+++ b/src/gausskernel/storage/access/redo/standby_read/lsn_info_proc.cpp
@@ -0,0 +1,650 @@
+/*
+ * Copyright (c) 2020 Huawei Technologies Co.,Ltd.
+ *
+ * openGauss is licensed under Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *
+ * http://license.coscl.org.cn/MulanPSL2
+ *
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * -------------------------------------------------------------------------
+ *
+ * lsn_info_proc.cpp
+ *
+ * IDENTIFICATION
+ * src/gausskernel/storage/recovery/parallel/blocklevel/standby_read/lsn_info_proc.cpp
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+#include "access/extreme_rto/batch_redo.h"
+#include "access/extreme_rto/dispatcher.h"
+#include "access/extreme_rto/page_redo.h"
+#include "access/extreme_rto/standby_read/block_info_meta.h"
+#include "access/extreme_rto/standby_read/lsn_info_double_list.h"
+#include "access/extreme_rto/standby_read/lsn_info_meta.h"
+#include "access/extreme_rto/standby_read/standby_read_base.h"
+
+namespace extreme_rto_standby_read {
+
+void lsn_info_page_init(Page page)
+{
+ static_assert(sizeof(LsnInfoPageHeader) == LSN_INFO_HEAD_SIZE, "LsnInfoPageHeader size is not 64 bytes");
+ static_assert(sizeof(LsnInfoNode) == LSN_INFO_NODE_SIZE, "LsnInfoNode size is not 64 bytes");
+ static_assert(sizeof(BasePageInfoNode) == BASE_PAGE_INFO_NODE_SIZE, "BasePageInfoNode size is not 128 bytes");
+
+ LsnInfoPageHeader *page_header = (LsnInfoPageHeader *)page;
+ errno_t ret = memset_s(page_header, BLCKSZ, 0, BLCKSZ);
+ securec_check(ret, "", "");
+ page_header->flags |= LSN_INFO_PAGE_VALID_FLAG;
+ page_header->version = LSN_INFO_PAGE_VERSION;
+}
+
+void lsn_info_init(LsnInfo lsn_info)
+{
+ errno_t ret = memset_s(lsn_info, LSN_INFO_NODE_SIZE, 0, LSN_INFO_NODE_SIZE);
+ securec_check(ret, "", "");
+
+ lsn_info->flags |= LSN_INFO_NODE_VALID_FLAG;
+ lsn_info->type = LSN_INFO_TYPE_LSNS;
+ lsn_info_list_init(&lsn_info->lsn_list);
+}
+void base_page_info_init(BasePageInfo base_page_info)
+{
+ errno_t ret = memset_s(base_page_info, BASE_PAGE_INFO_NODE_SIZE, 0, BASE_PAGE_INFO_NODE_SIZE);
+ securec_check(ret, "", "");
+
+ base_page_info->lsn_info_node.flags |= LSN_INFO_NODE_VALID_FLAG;
+ base_page_info->lsn_info_node.type = LSN_INFO_TYPE_BASE_PAGE;
+ lsn_info_list_init(&base_page_info->lsn_info_node.lsn_list);
+ lsn_info_list_init(&base_page_info->base_page_list);
+}
+
+RelFileNode make_lsn_info_relfilenode(uint32 batch_id, uint32 worker_id, LsnInfoPosition position)
+{
+ RelFileNode rnode = {0};
+ rnode.spcNode = EXRTO_LSN_INFO_SPACE_OID;
+ rnode.dbNode = (batch_id << LOW_WORKERID_BITS) | worker_id;
+ rnode.relNode = (uint32)((position / BLCKSZ) >> UINT64_HALF);
+ rnode.bucketNode = InvalidBktId;
+ rnode.opt = DefaultFileNodeOpt;
+
+ return rnode;
+}
+
+Page get_lsn_info_page(uint32 batch_id, uint32 worker_id, LsnInfoPosition position, ReadBufferMode mode,
+ Buffer* buffer)
+{
+ RelFileNode rnode;
+ BlockNumber block_num;
+ bool hit = false;
+ Page page = NULL;
+
+ rnode = make_lsn_info_relfilenode(batch_id, worker_id, position);
+ block_num = (uint32)(position / BLCKSZ); /* high 32 bits are stored in the relNode. */
+
+ SMgrRelation smgr = smgropen(rnode, InvalidBackendId);
+ *buffer = ReadBuffer_common(smgr, RELPERSISTENCE_PERMANENT, MAIN_FORKNUM, block_num, mode, NULL, &hit, NULL);
+
+ if (*buffer == InvalidBuffer) {
+ ereport(WARNING, (errcode_for_file_access(),
+ errmsg("block is invalid %u/%u/%u %d %u, batch_id: %u, redo_worker_id: %u",
+ rnode.spcNode, rnode.dbNode, rnode.relNode, MAIN_FORKNUM, block_num,
+ batch_id, worker_id)));
+ return NULL;
+ }
+
+ page = BufferGetPage(*buffer);
+ if (!is_lsn_info_page_valid((LsnInfoPageHeader*)page)) {
+ if (mode == RBM_NORMAL) {
+ ReleaseBuffer(*buffer);
+ *buffer = InvalidBuffer;
+ return NULL;
+ }
+ /* make sure to make buffer dirty outside */
+ lsn_info_page_init(page);
+ }
+
+ return page;
+}
+
+LsnInfoPosition create_lsn_info_node(StandbyReadMetaInfo *meta_info, LsnInfoPosition old_tail_pos,
+ XLogRecPtr next_lsn, bool create_in_old_page, Page old_page)
+{
+ Page page = NULL;
+ LsnInfo lsn_info = NULL;
+ uint32 batch_id = meta_info->batch_id;
+ uint32 worker_id = meta_info->redo_id;
+ LsnInfoPosition insert_pos = meta_info->lsn_table_next_position;
+ Buffer buffer = InvalidBuffer;
+ uint32 offset;
+
+ offset = lsn_info_postion_to_offset(insert_pos);
+ if (offset == 0) {
+ insert_pos += LSN_INFO_HEAD_SIZE; /* actual insert position */
+ offset += LSN_INFO_HEAD_SIZE;
+ }
+ Assert(offset % LSN_INFO_NODE_SIZE == 0);
+
+ if (create_in_old_page) {
+ /* in old page, buffer is already locked */
+ lsn_info = (LsnInfo)(old_page + offset);
+ } else {
+ page = get_lsn_info_page(batch_id, worker_id, insert_pos, RBM_ZERO_ON_ERROR, &buffer);
+
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+ lsn_info = (LsnInfo)(page + offset);
+ }
+
+ lsn_info_init(lsn_info);
+ lsn_info->lsn[lsn_info->used] = next_lsn;
+ lsn_info->used++;
+ lsn_info->lsn_list.prev = old_tail_pos;
+
+ if (!create_in_old_page) {
+ standby_read_meta_page_set_lsn(page, next_lsn);
+ MarkBufferDirty(buffer);
+ UnlockReleaseBuffer(buffer);
+ }
+ /* update meta info */
+ meta_info->lsn_table_next_position = insert_pos + LSN_INFO_NODE_SIZE;
+
+ return insert_pos;
+}
+
+void insert_lsn_to_lsn_info(StandbyReadMetaInfo *meta_info, LsnInfoDoubleList *lsn_head, XLogRecPtr next_lsn)
+{
+ Page page = NULL;
+ LsnInfo lsn_info = NULL;
+ uint32 batch_id = meta_info->batch_id;
+ uint32 worker_id = meta_info->redo_id;
+ LsnInfoPosition tail_pos = lsn_head->prev; /* lsn info node tail */
+ LsnInfoPosition insert_pos = meta_info->lsn_table_next_position;
+ Buffer buffer = InvalidBuffer;
+ uint32 offset;
+
+ Assert(!INFO_POSITION_IS_INVALID(tail_pos));
+ page = get_lsn_info_page(batch_id, worker_id, tail_pos, RBM_ZERO_ON_ERROR, &buffer);
+
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+ offset = lsn_info_postion_to_offset(tail_pos);
+ lsn_info = (LsnInfo)(page + offset);
+ Assert(offset >= LSN_INFO_HEAD_SIZE);
+ Assert(offset % LSN_INFO_NODE_SIZE == 0);
+ Assert(is_lsn_info_node_valid(lsn_info->flags));
+ Assert(lsn_info->lsn_list.next == LSN_INFO_LIST_HEAD);
+ if (lsn_info->used < LSN_NUM_PER_NODE) {
+ lsn_info->lsn[lsn_info->used] = next_lsn;
+ lsn_info->used++;
+
+ standby_read_meta_page_set_lsn(page, next_lsn);
+ MarkBufferDirty(buffer);
+ UnlockReleaseBuffer(buffer);
+ } else {
+ /*
+ * There is no free space in the old lsn info node, create a new one.
+ */
+ bool create_in_old_page = (insert_pos / BLCKSZ) == (tail_pos / BLCKSZ);
+ /* insert position maybe changed */
+ insert_pos = create_lsn_info_node(meta_info, tail_pos, next_lsn, create_in_old_page, page);
+
+ /* modify lsn info list */
+ lsn_info->lsn_list.next = insert_pos;
+ standby_read_meta_page_set_lsn(page, next_lsn);
+ MarkBufferDirty(buffer);
+ UnlockReleaseBuffer(buffer);
+ /* update lsn info tail in block info meta */
+ lsn_head->prev = insert_pos;
+ }
+}
+
+LsnInfoPosition create_base_page_info_node(StandbyReadMetaInfo *meta_info,
+ LsnInfoPosition old_lsn_tail_pos, LsnInfoPosition old_base_page_tail_pos, const BufferTag* buf_tag,
+ XLogRecPtr current_page_lsn, XLogRecPtr next_lsn)
+{
+ Page page = NULL;
+ BasePageInfo base_page_info = NULL;
+ uint32 batch_id = meta_info->batch_id;
+ uint32 worker_id = meta_info->redo_id;
+ LsnInfoPosition insert_pos = meta_info->lsn_table_next_position;
+ BasePagePosition base_page_pos = meta_info->base_page_next_position;
+ Buffer buffer = InvalidBuffer;
+ uint32 offset;
+ uint32 remain_size;
+
+ /*
+ * If there is not enough space in current page, we insert base page info node in next page.
+ */
+ remain_size = BLCKSZ - insert_pos % BLCKSZ;
+ if (remain_size < BASE_PAGE_INFO_NODE_SIZE) {
+ Assert(remain_size == LSN_INFO_NODE_SIZE);
+ insert_pos += LSN_INFO_NODE_SIZE; /* switch to next page */
+ }
+
+ offset = lsn_info_postion_to_offset(insert_pos);
+ Assert(offset % LSN_INFO_NODE_SIZE == 0);
+ if (offset == 0) {
+ insert_pos += LSN_INFO_HEAD_SIZE; /* actual insert position */
+ offset += LSN_INFO_HEAD_SIZE;
+ }
+
+ page = get_lsn_info_page(batch_id, worker_id, insert_pos, RBM_ZERO_ON_ERROR, &buffer);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ base_page_info = (BasePageInfo)(page + offset);
+
+ base_page_info_init(base_page_info);
+ base_page_info->lsn_info_node.lsn_list.prev = old_lsn_tail_pos;
+ base_page_info->lsn_info_node.lsn[0] = next_lsn;
+ base_page_info->lsn_info_node.used++;
+ base_page_info->base_page_list.prev = old_base_page_tail_pos;
+ base_page_info->cur_page_lsn = current_page_lsn;
+ base_page_info->relfilenode = buf_tag->rnode;
+ base_page_info->fork_num = buf_tag->forkNum;
+ base_page_info->block_num = buf_tag->blockNum;
+ base_page_info->next_base_page_lsn = InvalidXLogRecPtr;
+ base_page_info->base_page_position = base_page_pos;
+
+ set_base_page_map_bit(page, offset);
+
+ standby_read_meta_page_set_lsn(page, next_lsn);
+ MarkBufferDirty(buffer);
+ UnlockReleaseBuffer(buffer);
+
+ /* update meta info */
+ meta_info->lsn_table_next_position = insert_pos + BASE_PAGE_INFO_NODE_SIZE;
+
+ return insert_pos;
+}
+
+void insert_base_page_to_lsn_info(StandbyReadMetaInfo *meta_info, LsnInfoDoubleList *lsn_head,
+ LsnInfoDoubleList *base_page_head, const BufferTag& buf_tag, const Page base_page, XLogRecPtr current_page_lsn,
+ XLogRecPtr next_lsn)
+{
+ LsnInfoPosition old_lsn_tail_pos = lsn_head->prev;
+ LsnInfoPosition old_base_page_tail_pos = base_page_head->prev;
+ LsnInfoPosition insert_pos;
+
+ /* possibly modified meta_info */
+ insert_pos = create_base_page_info_node(meta_info, old_lsn_tail_pos, old_base_page_tail_pos, &buf_tag,
+ current_page_lsn, next_lsn);
+
+ /* modify old tail information of lsn info node and base page info node */
+ if (old_lsn_tail_pos != LSN_INFO_LIST_HEAD) {
+ info_list_modify_old_tail(meta_info, old_lsn_tail_pos, insert_pos, current_page_lsn, next_lsn, true);
+ }
+ if (old_base_page_tail_pos != LSN_INFO_LIST_HEAD) {
+ info_list_modify_old_tail(meta_info, old_base_page_tail_pos, insert_pos, current_page_lsn, next_lsn, false);
+ }
+
+ /* modify block info meta */
+ lsn_head->prev = insert_pos;
+ base_page_head->prev = insert_pos;
+
+ if (INFO_POSITION_IS_INVALID(lsn_head->next)) {
+ lsn_head->next = insert_pos;
+ }
+ if (INFO_POSITION_IS_INVALID(base_page_head->next)) {
+ base_page_head->next = insert_pos;
+ }
+
+ /* generate base page */
+ generate_base_page(meta_info, base_page);
+}
+
+void get_lsn_info_for_read(const BufferTag& buf_tag, LsnInfoPosition latest_lsn_base_page_pos,
+ StandbyReadLsnInfoArray* lsn_info_list, XLogRecPtr read_lsn)
+{
+ BasePageInfo base_page_info = NULL;
+ LsnInfoPosition next_lsn_info_pos;
+ Buffer buffer;
+
+ XLogRecPtr page_lsn;
+ XLogRecPtr xlog_lsn;
+ uint32 batch_id;
+ uint32 worker_id;
+ XLogRecPtr *lsn_arry = lsn_info_list->lsn_array;
+
+ /* get batch id and page redo worker id */
+ extreme_rto::RedoItemTag redo_item_tag;
+ const uint32 worker_num_per_mng = extreme_rto::get_page_redo_worker_num_per_manager();
+ INIT_REDO_ITEM_TAG(redo_item_tag, buf_tag.rnode, buf_tag.forkNum, buf_tag.blockNum);
+ /* batch id and worker id start from 1 when reading a page */
+ batch_id = extreme_rto::GetSlotId(buf_tag.rnode, 0, 0, extreme_rto::get_batch_redo_num()) + 1;
+ worker_id = extreme_rto::GetWorkerId(&redo_item_tag, worker_num_per_mng) + 1;
+
+ /* find fisrt base page whose lsn less than read lsn form tail to head */
+ do {
+ /* reach the end of the list */
+ if (INFO_POSITION_IS_INVALID(latest_lsn_base_page_pos)) {
+ ereport(ERROR, (
+ errmsg("can not find base page, block is %u/%u/%u %d %u, batch_id: %u, redo_worker_id: %u",
+ buf_tag.rnode.spcNode, buf_tag.rnode.dbNode, buf_tag.rnode.relNode, buf_tag.forkNum,
+ buf_tag.blockNum, batch_id, worker_id)));
+ break;
+ }
+ buffer = InvalidBuffer;
+ Page page = get_lsn_info_page(batch_id, worker_id, latest_lsn_base_page_pos, RBM_NORMAL, &buffer);
+ if (unlikely(page == NULL || buffer == InvalidBuffer)) {
+ ereport(ERROR,
+ (errmsg(EXRTOFORMAT("get_lsn_info_page failed, batch_id: %u, redo_id: %u, pos: %lu"), batch_id,
+ worker_id, latest_lsn_base_page_pos)));
+ }
+ LockBuffer(buffer, BUFFER_LOCK_SHARE);
+
+ uint32 offset = lsn_info_postion_to_offset(latest_lsn_base_page_pos);
+ base_page_info = (BasePageInfo)(page + offset);
+
+ page_lsn = base_page_info->cur_page_lsn;
+ lsn_info_list->base_page_pos = base_page_info->base_page_position;
+ lsn_info_list->base_page_lsn = base_page_info->cur_page_lsn;
+ Assert(is_base_page_type(base_page_info->lsn_info_node.type));
+
+ /* If we find the desired page, keep it locked */
+ if (XLByteLT(page_lsn, read_lsn)) {
+ break;
+ }
+ UnlockReleaseBuffer(buffer);
+ latest_lsn_base_page_pos = base_page_info->base_page_list.prev;
+ } while (true);
+
+ LsnInfo lsn_info = &base_page_info->lsn_info_node;
+ bool find_end = false;
+ uint32 lsn_num = 0;
+ do {
+ for (uint16 i = 0; i < lsn_info->used; ++i) {
+ xlog_lsn = lsn_info->lsn[i];
+ if (XLByteLE(read_lsn, xlog_lsn)) {
+ find_end = true;
+ break;
+ }
+
+ lsn_arry[lsn_num++] = xlog_lsn;
+ }
+ next_lsn_info_pos = lsn_info->lsn_list.next;
+ UnlockReleaseBuffer(buffer);
+ /* reach the end of the list */
+ if (find_end || next_lsn_info_pos == LSN_INFO_LIST_HEAD) {
+ break;
+ }
+
+ Page page = get_lsn_info_page(batch_id, worker_id, next_lsn_info_pos, RBM_NORMAL, &buffer);
+ if (unlikely(page == NULL || buffer == InvalidBuffer)) {
+ ereport(ERROR,
+ (errmsg(EXRTOFORMAT("get_lsn_info_page failed, batch_id: %u, redo_id: %u, pos: %lu"), batch_id,
+ worker_id, next_lsn_info_pos)));
+ }
+ Assert(buffer != InvalidBuffer);
+ LockBuffer(buffer, BUFFER_LOCK_SHARE);
+
+ uint32 offset = lsn_info_postion_to_offset(next_lsn_info_pos);
+ lsn_info = (LsnInfo)(page + offset);
+ } while (true);
+
+ lsn_info_list->lsn_num = lsn_num;
+}
+
+static bool check_base_page_loc_valid(uint32 base_page_loc)
+{
+ if (base_page_loc < LSN_INFO_HEAD_SIZE || base_page_loc > BLCKSZ - BASE_PAGE_INFO_NODE_SIZE ||
+ base_page_loc % LSN_INFO_HEAD_SIZE != 0) {
+ ereport(ERROR, (errmsg("invalid BasePageInfo location:%u, page size:%d", base_page_loc, BLCKSZ)));
+ return false;
+ }
+ return true;
+}
+
+/*
+ * set LsnInfoPageHeader::base_page_map specific bit from 0 to 1.
+ * the bit is correspond to some 64bytes range space in this page.
+ * params explanation.
+ * page: some page block in RAM(one block occupies 8192bytes in memory).
+ * base_page_loc: the offset of some BasePageInfoNode object from the beginning of this page.
+ * LsnInfoPageHeader::base_page_map has 128 bit which is mapped to 8192bytes page.
+ * every bit represent 64 bytes (64 = 8192/128).
+ * we can assume bit 0 map to [0, 64) of the page;
+ * bit 1 map to [64, 128) of the page;
+ * ......
+ * bit 127 map to [8128, 8192) of the page;
+ * LsnInfoPageHeader is the page header which occupies 64bytes, so bit 0 is always 0.
+ * LSN_INFO_HEAD_SIZE,LSN_INFO_NODE_SIZE,BASE_PAGE_INFO_NODE_SIZE must be integer mutiple of 64,
+ * so we can use base_page_map to map page memory.
+ */
+void set_base_page_map_bit(Page page, uint32 base_page_loc)
+{
+ /*
+ * make sure base_page_loc is in specific range
+ * base_page_loc must be an integer multiple of LSN_INFO_HEAD_SIZE
+ */
+ check_base_page_loc_valid(base_page_loc);
+
+ LsnInfoPageHeader *page_header = (LsnInfoPageHeader *)page;
+ uint8 *base_page_map = page_header->base_page_map;
+ uint32 which_bit = base_page_loc / LSN_INFO_NODE_SIZE;
+ uint32 which_bytes = which_bit / BYTE_BITS; // uint8 has 8 bits or 8*sizeof(uint8) bits
+ uint32 bit_offset = which_bit % BYTE_BITS;
+ base_page_map[which_bytes] |= ((uint8)((uint8)1 << bit_offset));
+}
+
+static void check_base_page_map_bit_loc_valid(uint32 which_bit)
+{
+ if (which_bit >= BASE_PAGE_MAP_SIZE * BYTE_BITS) {
+ ereport(ERROR, (errmsg("invalid base_page_map bit location:%u, "
+ "the valid range is [%u, %u).", which_bit, 0U, BASE_PAGE_MAP_SIZE * BYTE_BITS)));
+ }
+}
+
+/*
+ * check if LsnInfoPageHeader::base_page_map specific bit equal to 1.
+ * page: the page in which LsnInfoPageHeader object you want to check.
+ * which_bit: the bit you want to check.
+ * if the target bit is equal to 1, return true.
+ */
+bool is_base_page_map_bit_set(Page page, uint32 which_bit)
+{
+ check_base_page_map_bit_loc_valid(which_bit);
+
+ LsnInfoPageHeader *page_header = (LsnInfoPageHeader *)page;
+ uint8 *base_page_map = page_header->base_page_map;
+ uint32 which_bytes = which_bit / BYTE_BITS; // uint8 has 8 bits or 8*sizeof(uint8) bits
+ uint32 bit_offset = which_bit % BYTE_BITS;
+ return (base_page_map[which_bytes] & (((uint8)1) << bit_offset)) != 0;
+}
+
+void recycle_lsn_info_file(uint32 batch_id, uint32 redo_id, BasePagePosition recycle_pos)
+{
+ RelFileNode rnode = make_lsn_info_relfilenode(batch_id, redo_id, recycle_pos);
+ SMgrRelation smgr = smgropen(rnode, InvalidBackendId);
+
+ smgrdounlink(smgr, true, (BlockNumber)(recycle_pos / BLCKSZ));
+}
+
+void recycle_one_lsn_info_list(const BufferTag& buf_tag, LsnInfoPosition page_info_pos,
+ XLogRecPtr recycle_lsn, LsnInfoPosition *min_page_info_pos, XLogRecPtr *min_lsn)
+{
+ /* get batch id and page redo worker id */
+ extreme_rto::RedoItemTag redo_item_tag;
+ const uint32 worker_num_per_mng = extreme_rto::get_page_redo_worker_num_per_manager();
+ INIT_REDO_ITEM_TAG(redo_item_tag, buf_tag.rnode, buf_tag.forkNum, buf_tag.blockNum);
+ /* batch id and worker id start from 1 when reading a page */
+ uint32 batch_id = extreme_rto::GetSlotId(buf_tag.rnode, 0, 0, extreme_rto::get_batch_redo_num()) + 1;
+ uint32 worker_id = extreme_rto::GetWorkerId(&redo_item_tag, worker_num_per_mng) + 1;
+
+ while (INFO_POSITION_IS_VALID(page_info_pos)) {
+ Buffer buffer = InvalidBuffer;
+ Page page = get_lsn_info_page(batch_id, worker_id, page_info_pos, RBM_NORMAL, &buffer);
+ if (unlikely(page == NULL || buffer == InvalidBuffer)) {
+ ereport(PANIC, (errmsg(EXRTOFORMAT("get_lsn_info_page failed, batch_id: %u, redo_id: %u, pos: %lu"),
+ batch_id, worker_id, page_info_pos)));
+ }
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ uint32 offset = lsn_info_postion_to_offset(page_info_pos);
+ BasePageInfo base_page_info = (BasePageInfo)(page + offset);
+ Assert(is_base_page_type(base_page_info->lsn_info_node.type));
+
+ *min_page_info_pos = page_info_pos;
+ *min_lsn = base_page_info->cur_page_lsn;
+
+ /* retain a page version with page lsn less than recycle lsn */
+ XLogRecPtr next_base_page_lsn = base_page_info->next_base_page_lsn;
+ if (XLogRecPtrIsInvalid(next_base_page_lsn) || XLByteLT(recycle_lsn, next_base_page_lsn)) {
+ UnlockReleaseBuffer(buffer);
+ break;
+ }
+
+ base_page_info->lsn_info_node.flags &= ~LSN_INFO_NODE_VALID_FLAG;
+ page_info_pos = base_page_info->base_page_list.next;
+ MarkBufferDirty(buffer);
+ UnlockReleaseBuffer(buffer);
+ }
+}
+
+void invalid_base_page_list(StandbyReadMetaInfo *meta_info, Buffer buffer, uint32 offset)
+{
+ LsnInfoPosition page_info_pos;
+ Page page = BufferGetPage(buffer);
+ BasePageInfo base_page_info = (BasePageInfo)(page + offset);
+ /* set invalid flags */
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+ base_page_info->lsn_info_node.flags &= ~LSN_INFO_NODE_VALID_FLAG;
+ page_info_pos = base_page_info->base_page_list.next;
+ MarkBufferDirty(buffer);
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK); /* keep buffer pinned */
+
+ uint32 batch_id = meta_info->batch_id;
+ uint32 worker_id = meta_info->redo_id;
+ while (INFO_POSITION_IS_VALID(page_info_pos)) {
+ page = get_lsn_info_page(batch_id, worker_id, page_info_pos, RBM_NORMAL, &buffer);
+ if (unlikely(page == NULL || buffer == InvalidBuffer)) {
+ ereport(PANIC, (errmsg(EXRTOFORMAT("get_lsn_info_page failed, batch_id: %u, redo_id: %u, pos: %lu"),
+ batch_id, worker_id, page_info_pos)));
+ }
+ offset = lsn_info_postion_to_offset(page_info_pos);
+ base_page_info = (BasePageInfo)(page + offset);
+
+ /* unset valid flags */
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+ base_page_info->lsn_info_node.flags &= ~LSN_INFO_NODE_VALID_FLAG;
+ page_info_pos = base_page_info->base_page_list.next;
+ MarkBufferDirty(buffer);
+ UnlockReleaseBuffer(buffer);
+ }
+}
+
+inline void update_recycle_lsn_per_worker(StandbyReadMetaInfo *meta_info, XLogRecPtr lsn)
+{
+ Assert(XLogRecPtrIsValid(lsn));
+ if (XLogRecPtrIsInvalid(meta_info->recycle_lsn_per_worker) ||
+ XLByteLT(meta_info->recycle_lsn_per_worker, lsn)) {
+ meta_info->recycle_lsn_per_worker = lsn;
+ }
+ ereport(LOG, (errmsg(EXRTOFORMAT(
+ "[exrto_recycle] update recycle lsn per worker , batch_id: %u, redo_id: %u, recycle lsn: %08X/%08X"),
+ meta_info->batch_id, meta_info->redo_id, (uint32)(lsn >> UINT64_HALF), (uint32)lsn)));
+}
+
+bool recycle_one_lsn_info_page(StandbyReadMetaInfo *meta_info, XLogRecPtr recycle_lsn,
+ BasePagePosition *base_page_position)
+{
+ uint32 batch_id = meta_info->batch_id;
+ uint32 worker_id = meta_info->redo_id;
+ Buffer buffer = InvalidBuffer;
+ LsnInfoPosition recycle_pos = meta_info->lsn_table_recyle_position;
+ Page page = get_lsn_info_page(batch_id, worker_id, recycle_pos, RBM_NORMAL, &buffer);
+ if (unlikely(page == NULL || buffer == InvalidBuffer)) {
+ ereport(PANIC, (errmsg(EXRTOFORMAT("get_lsn_info_page failed, batch_id: %u, redo_id: %u, pos: %lu"),
+ batch_id, worker_id, recycle_pos)));
+ }
+
+ bool buffer_is_locked = false;
+ /* skip page header */
+ for (uint32 bit = 1; bit < BASE_PAGE_MAP_SIZE * BYTE_BITS; bit++) {
+ if (!buffer_is_locked) {
+ LockBuffer(buffer, BUFFER_LOCK_SHARE);
+ buffer_is_locked = true;
+ }
+
+ if (!is_base_page_map_bit_set(page, bit)) {
+ continue;
+ }
+ uint32 offset = bit_to_offset(bit);
+ BasePageInfo base_page_info = (BasePageInfo)(page + offset);
+ LsnInfoPosition cur_base_page_info_pos = recycle_pos + offset;
+ Assert(is_base_page_type(base_page_info->lsn_info_node.type));
+
+ /* block meta file may be dropped */
+ if (!is_lsn_info_node_valid(base_page_info->lsn_info_node.flags)) {
+ continue;
+ }
+
+ /* retain a page version with page lsn less than recycle lsn */
+ XLogRecPtr base_page_lsn = base_page_info->cur_page_lsn;
+ if (XLogRecPtrIsInvalid(base_page_lsn)) {
+ base_page_lsn = base_page_info->lsn_info_node.lsn[0];
+ }
+ XLogRecPtr next_base_page_lsn = base_page_info->next_base_page_lsn;
+ *base_page_position = base_page_info->base_page_position;
+ if (XLogRecPtrIsValid(next_base_page_lsn) && XLByteLT(recycle_lsn, next_base_page_lsn)) {
+ update_recycle_lsn_per_worker(meta_info, base_page_lsn);
+ UnlockReleaseBuffer(buffer);
+ return false;
+ }
+
+ BufferTag buf_tag;
+ INIT_BUFFERTAG(buf_tag, base_page_info->relfilenode, base_page_info->fork_num, base_page_info->block_num);
+
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+ buffer_is_locked = false;
+
+ StandbyReadRecyleState stat =
+ recyle_block_info(buf_tag, cur_base_page_info_pos, next_base_page_lsn, recycle_lsn);
+ if (stat == STANDBY_READ_RECLYE_ALL) {
+ invalid_base_page_list(meta_info, buffer, offset);
+ } else if (stat == STANDBY_READ_RECLYE_NONE) {
+ Assert(XLogRecPtrIsInvalid(next_base_page_lsn));
+ update_recycle_lsn_per_worker(meta_info, base_page_lsn);
+ ReleaseBuffer(buffer);
+ return false;
+ }
+ }
+
+ if (buffer_is_locked) {
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+ }
+ ReleaseBuffer(buffer);
+ return true;
+}
+
+void standby_read_recyle_per_workers(StandbyReadMetaInfo *meta_info, XLogRecPtr recycle_lsn)
+{
+ Assert(meta_info->batch_id > 0);
+ Assert(meta_info->redo_id > 0);
+ bool recycle_next_page = true;
+ BasePagePosition base_page_position = meta_info->base_page_recyle_position;
+
+ while (meta_info->lsn_table_recyle_position + BLCKSZ < meta_info->lsn_table_next_position) {
+ recycle_next_page = recycle_one_lsn_info_page(meta_info, recycle_lsn, &base_page_position);
+ if (!recycle_next_page) {
+ break;
+ }
+ /* update recycle position */
+ meta_info->lsn_table_recyle_position += BLCKSZ;
+ Assert(meta_info->lsn_table_recyle_position % BLCKSZ == 0);
+ RedoInterruptCallBack();
+ }
+
+ meta_info->base_page_recyle_position = base_page_position;
+ Assert(meta_info->base_page_recyle_position % BLCKSZ == 0);
+ Assert(meta_info->base_page_recyle_position <= meta_info->base_page_next_position);
+
+ recycle_lsn_info_file(meta_info->batch_id, meta_info->redo_id, meta_info->lsn_table_recyle_position);
+ recycle_base_page_file(meta_info->batch_id, meta_info->redo_id, meta_info->base_page_recyle_position);
+}
+
+} // namespace extreme_rto_standby_read
diff --git a/src/gausskernel/storage/access/redo/standby_read/standby_read_interface.cpp b/src/gausskernel/storage/access/redo/standby_read/standby_read_interface.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f64492a46b77fb5884a88f7fb85eb935f4b89b2a
--- /dev/null
+++ b/src/gausskernel/storage/access/redo/standby_read/standby_read_interface.cpp
@@ -0,0 +1,351 @@
+/*
+ * Copyright (c) 2020 Huawei Technologies Co.,Ltd.
+ *
+ * openGauss is licensed under Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *
+ * http://license.coscl.org.cn/MulanPSL2
+ *
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * -------------------------------------------------------------------------
+ *
+ * standby_read_interface.cpp
+ *
+ * IDENTIFICATION
+ * src/gausskernel/storage/access/redo/standby_read/standby_read_interface.cpp
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include
+#include "access/extreme_rto/page_redo.h"
+#include "access/extreme_rto/standby_read/block_info_meta.h"
+#include "access/extreme_rto/standby_read/lsn_info_meta.h"
+#include "access/multi_redo_api.h"
+#include "pgstat.h"
+#include "storage/smgr/relfilenode.h"
+#include "storage/buf/buf_internals.h"
+#include "storage/buf/bufmgr.h"
+#include "storage/smgr/segment.h"
+#include "utils/rel.h"
+#include "utils/palloc.h"
+#include "access/extreme_rto/dispatcher.h"
+#include "funcapi.h"
+
+const char* EXRTO_BASE_PAGE_SUB_DIR = "base_page";
+const char* EXRTO_LSN_INFO_SUB_DIR = "lsn_info_meta";
+const char* EXRTO_BLOCK_INFO_SUB_DIR = "block_info_meta";
+const char* EXRTO_FILE_SUB_DIR[] = {
+ EXRTO_BASE_PAGE_SUB_DIR, EXRTO_LSN_INFO_SUB_DIR, EXRTO_BLOCK_INFO_SUB_DIR};
+const uint32 EXRTO_FILE_PATH_LEN = 1024;
+
+void make_standby_read_node(XLogRecPtr read_lsn, RelFileNode& read_node)
+{
+ read_node.spcNode = (Oid)(read_lsn >> 32);
+ read_node.dbNode = (Oid)(read_lsn);
+ read_node.relNode = InvalidOid; // make sure it can be InvalidOid or not
+ read_node.opt = 0;
+ read_node.bucketNode = InvalidBktId;
+}
+
+BufferDesc* alloc_standby_read_buf(
+ const BufferTag& buf_tag, BufferAccessStrategy strategy, bool& found, XLogRecPtr read_lsn)
+{
+ RelFileNode read_node;
+ make_standby_read_node(read_lsn, read_node);
+ BufferDesc* buf_desc = BufferAlloc(read_node, 0, buf_tag.forkNum, buf_tag.blockNum, strategy, &found, NULL);
+
+ return buf_desc;
+}
+
+Buffer get_newest_page_for_read(Relation reln, ForkNumber fork_num, BlockNumber block_num, ReadBufferMode mode,
+ BufferAccessStrategy strategy, XLogRecPtr read_lsn)
+{
+ bool hit = false;
+
+ Buffer newest_buf = ReadBuffer_common(
+ reln->rd_smgr, reln->rd_rel->relpersistence, fork_num, block_num, mode, strategy, &hit, NULL);
+ if (BufferIsInvalid(newest_buf)) {
+ return InvalidBuffer;
+ }
+
+ LockBuffer(newest_buf, BUFFER_LOCK_SHARE);
+ Page newest_page = BufferGetPage(newest_buf);
+ XLogRecPtr page_lsn = PageGetLSN(newest_page);
+ if (XLByteLT(read_lsn, page_lsn)) {
+ UnlockReleaseBuffer(newest_buf);
+ return InvalidBuffer;
+ }
+
+ BufferTag buf_tag = {
+ .rnode = reln->rd_smgr->smgr_rnode.node,
+ .forkNum = fork_num,
+ .blockNum = block_num,
+ };
+ ResourceOwnerEnlargeBuffers(t_thrd.utils_cxt.CurrentResourceOwner);
+ BufferDesc* buf_desc = alloc_standby_read_buf(buf_tag, strategy, hit, page_lsn);
+
+ if (hit) {
+ UnlockReleaseBuffer(newest_buf);
+ return BufferDescriptorGetBuffer(buf_desc);
+ }
+ Page read_page = (Page)BufHdrGetBlock(buf_desc);
+
+ errno_t rc = memcpy_s(read_page, BLCKSZ, newest_page, BLCKSZ);
+ securec_check(rc, "\0", "\0");
+ UnlockReleaseBuffer(newest_buf);
+ buf_desc->extra->lsn_on_disk = PageGetLSN(read_page);
+#ifdef USE_ASSERT_CHECKING
+ buf_desc->lsn_dirty = InvalidXLogRecPtr;
+#endif
+
+ TerminateBufferIO(buf_desc, false, (BM_VALID | BM_IS_TMP_BUF));
+ return BufferDescriptorGetBuffer(buf_desc);
+}
+
+Buffer standby_read_buf(
+ Relation reln, ForkNumber fork_num, BlockNumber block_num, ReadBufferMode mode, BufferAccessStrategy strategy)
+{
+ /* Open it at the smgr level */
+ RelationOpenSmgr(reln); // need or not ?????
+ pgstat_count_buffer_read(reln);
+ pgstatCountBlocksFetched4SessionLevel();
+
+ if (RelationisEncryptEnable(reln)) {
+ reln->rd_smgr->encrypt = true;
+ }
+
+ bool hit = false;
+ BufferTag buf_tag = {
+ .rnode = reln->rd_smgr->smgr_rnode.node,
+ .forkNum = fork_num,
+ .blockNum = block_num,
+ };
+ XLogRecPtr read_lsn = t_thrd.proc->exrto_read_lsn;
+ if (read_lsn == InvalidXLogRecPtr) {
+ Assert(IsSystemRelation(reln));
+ read_lsn = MAX_XLOG_REC_PTR;
+ }
+
+ Buffer read_buf = get_newest_page_for_read(reln, fork_num, block_num, mode, strategy, read_lsn);
+
+ if (read_buf != InvalidBuffer) {
+ // newest page's lsn smaller than read lsn
+ return read_buf;
+ }
+ ResourceOwnerEnlargeBuffers(t_thrd.utils_cxt.CurrentResourceOwner);
+ // read lsn info
+ StandbyReadLsnInfoArray *lsn_info = &t_thrd.exrto_recycle_cxt.lsn_info;
+ bool result = extreme_rto_standby_read::get_page_lsn_info(buf_tag, strategy, read_lsn, lsn_info);
+ if (!result) {
+ ereport(ERROR,
+ (errcode(ERRCODE_INTERNAL_ERROR),
+ (errmsg("standby_read_buf couldnot found buf %u/%u/%u %d %u read lsn %lu", buf_tag.rnode.spcNode,
+ buf_tag.rnode.dbNode, buf_tag.rnode.relNode, buf_tag.forkNum, buf_tag.blockNum, read_lsn))));
+ return InvalidBuffer;
+ }
+
+ // read lsn info
+ XLogRecPtr expected_lsn = InvalidXLogRecPtr;
+ if (lsn_info->lsn_num == 0) {
+ expected_lsn = lsn_info->base_page_lsn;
+ } else {
+ Assert(lsn_info->lsn_array[lsn_info->lsn_num - 1] > 0);
+ Assert(lsn_info->lsn_array[lsn_info->lsn_num - 1] < read_lsn);
+ Assert(lsn_info->lsn_array[lsn_info->lsn_num - 1] >= lsn_info->base_page_lsn);
+ expected_lsn = lsn_info->lsn_array[lsn_info->lsn_num - 1];
+ }
+
+ BufferDesc* buf_desc = alloc_standby_read_buf(buf_tag, strategy, hit, expected_lsn);
+
+ if (hit) {
+ return BufferDescriptorGetBuffer(buf_desc);
+ }
+ buffer_in_progress_pop();
+ // read_base_page
+ extreme_rto_standby_read::read_base_page(buf_tag, lsn_info->base_page_pos, buf_desc);
+ if (lsn_info->lsn_num > 0) {
+ redo_target_page(buf_tag, lsn_info, BufferDescriptorGetBuffer(buf_desc));
+ }
+ Page page = BufferGetPage(BufferDescriptorGetBuffer(buf_desc));
+ buf_desc->extra->lsn_on_disk = PageGetLSN(page);
+#ifdef USE_ASSERT_CHECKING
+ buf_desc->lsn_dirty = InvalidXLogRecPtr;
+#endif
+ buffer_in_progress_push();
+ TerminateBufferIO(buf_desc, false, (BM_VALID | BM_IS_TMP_BUF));
+
+ return BufferDescriptorGetBuffer(buf_desc);
+}
+
+void make_exrto_file_directory()
+{
+ if (!IS_EXRTO_READ) {
+ return;
+ }
+ if (mkdir(EXRTO_FILE_DIR, S_IRWXU) < 0 && errno != EEXIST) {
+ ereport(ERROR, (errcode_for_file_access(), errmsg("could not create directory \"%s\": %m", EXRTO_FILE_DIR)));
+ }
+
+ char sub_dir[EXRTO_FILE_PATH_LEN];
+ errno_t rc = EOK;
+ for (ExRTOFileType type = BASE_PAGE; type <= BLOCK_INFO_META; type = static_cast(type + 1)) {
+ rc = snprintf_s(sub_dir, EXRTO_FILE_PATH_LEN, EXRTO_FILE_PATH_LEN - 1, "%s/%s", EXRTO_FILE_DIR,
+ EXRTO_FILE_SUB_DIR[type]);
+ securec_check_ss(rc, "\0", "\0");
+ if (mkdir(sub_dir, S_IRWXU) < 0 && errno != EEXIST) {
+ ereport(ERROR, (errcode_for_file_access(), errmsg("could not create directory \"%s\": %m", sub_dir)));
+ }
+ }
+}
+
+void exrto_clean_dir(void)
+{
+ int ret = 0;
+ ereport(LOG, (errmsg("exrto_clean_dir: start to clean dir.")));
+ if (!isDirExist(EXRTO_FILE_DIR)) {
+ return;
+ }
+
+ if (!isDirExist(EXRTO_OLD_FILE_DIR)) {
+ ereport(LOG, (errmsg("exrto_clean_dir: rename standby_read to standby_read_old.")));
+ ret = rename(EXRTO_FILE_DIR, EXRTO_OLD_FILE_DIR);
+ if (ret != 0) {
+ ereport(ERROR, (errcode_for_file_access(),
+ errmsg("failed to rename exrto standby_read dir: %s\n", EXRTO_FILE_DIR)));
+ return;
+ }
+ } else {
+ ereport(LOG, (errmsg("exrto_clean_dir: remove standby_read.")));
+ if (!rmtree(EXRTO_FILE_DIR, true)) {
+ ereport(WARNING, (errcode_for_file_access(),
+ errmsg("could not remove exrto standby_read dir: %s\n", EXRTO_FILE_DIR)));
+ }
+ }
+}
+
+/* This function will be attached to the recycle thread */
+void exrto_recycle_old_dir(void)
+{
+ if (!rmtree(EXRTO_OLD_FILE_DIR, true)) {
+ ereport(WARNING, (errcode_for_file_access(),
+ errmsg("could not remove exrto standby_read_old dir: %s\n", EXRTO_OLD_FILE_DIR)));
+ }
+}
+
+void exrto_standby_read_init()
+{
+ exrto_clean_dir();
+ if (IS_EXRTO_READ) {
+ make_exrto_file_directory();
+ }
+}
+
+Datum gs_hot_standby_space_info(PG_FUNCTION_ARGS)
+{
+#define EXRTO_HOT_STANDBY_SPACE_INFO_INFONUM 6
+ Datum values[EXRTO_HOT_STANDBY_SPACE_INFO_INFONUM];
+ errno_t rc;
+ bool nulls[EXRTO_HOT_STANDBY_SPACE_INFO_INFONUM];
+ HeapTuple tuple = NULL;
+ TupleDesc tupdesc = NULL;
+ uint64 lsn_file_size = 0;
+ uint64 lsn_file_num = 0;
+ uint64 basepage_file_size = 0;
+ uint64 basepage_file_num = 0;
+ uint64 block_meta_file_size = 0;
+ uint64 block_meta_file_num = 0;
+ uint32 worker_nums;
+
+ rc = memset_s(values, sizeof(values), 0, sizeof(values));
+ securec_check(rc, "\0", "\0");
+
+ rc = memset_s(nulls, sizeof(nulls), 0, sizeof(nulls));
+ securec_check(rc, "\0", "\0");
+
+ tupdesc = CreateTemplateTupleDesc(EXRTO_HOT_STANDBY_SPACE_INFO_INFONUM, false);
+ TupleDescInitEntry(tupdesc, (AttrNumber)ARG_1, "base_page_file_num", XIDOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber)ARG_2, "base_page_total_size", XIDOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber)ARG_3, "lsn_info_meta_file_num", XIDOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber)ARG_4, "lsn_info_meta_total_size", XIDOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber)ARG_5, "block_info_meta_file_num", XIDOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber)ARG_6, "block_info_meta_total_size", XIDOID, -1, 0);
+
+ tupdesc = BlessTupleDesc(tupdesc);
+
+ SpinLockAcquire(&(g_instance.comm_cxt.predo_cxt.destroy_lock));
+ if (extreme_rto::g_dispatcher == NULL) {
+ worker_nums = 0;
+ } else {
+ worker_nums = extreme_rto::g_dispatcher->allWorkersCnt;
+ }
+
+ for (uint32 i = 0; i < worker_nums; ++i) {
+ extreme_rto::PageRedoWorker* page_redo_worker = extreme_rto::g_dispatcher->allWorkers[i];
+ if (page_redo_worker->role != extreme_rto::REDO_PAGE_WORKER) {
+ continue;
+ }
+ StandbyReadMetaInfo meta_info = page_redo_worker->standby_read_meta_info;
+
+ uint64 lsn_file_size_per_thread = 0;
+ if (meta_info.lsn_table_next_position > meta_info.lsn_table_recyle_position) {
+ lsn_file_size_per_thread = meta_info.lsn_table_next_position - meta_info.lsn_table_recyle_position;
+ /* in 0~lsn_table_recyle_position No data is stored,
+ means the size of one lsn info file does not reach maxsize
+ eg:0~100KB(lsn_table_recyle_position), 100KB~(16M+100KB)(lsn_table_next_position), filenum:2, size:16M */
+ lsn_file_num += meta_info.lsn_table_next_position / EXRTO_LSN_INFO_FILE_MAXSIZE +
+ ((meta_info.lsn_table_next_position % EXRTO_LSN_INFO_FILE_MAXSIZE) > 0 ? 1 : 0) -
+ (meta_info.lsn_table_recyle_position / EXRTO_LSN_INFO_FILE_MAXSIZE);
+ }
+ lsn_file_size += lsn_file_size_per_thread;
+
+ uint64 basepage_file_size_per_thread = 0;
+ if (meta_info.base_page_next_position > meta_info.base_page_recyle_position) {
+ basepage_file_size_per_thread = meta_info.base_page_next_position - meta_info.base_page_recyle_position;
+ basepage_file_num += meta_info.base_page_next_position / EXRTO_BASE_PAGE_FILE_MAXSIZE +
+ ((meta_info.base_page_next_position % EXRTO_BASE_PAGE_FILE_MAXSIZE) > 0 ? 1 : 0) -
+ (meta_info.base_page_recyle_position / EXRTO_BASE_PAGE_FILE_MAXSIZE);
+ }
+ basepage_file_size += basepage_file_size_per_thread;
+ }
+ SpinLockRelease(&(g_instance.comm_cxt.predo_cxt.destroy_lock));
+
+ char block_meta_file_dir[EXRTO_FILE_PATH_LEN];
+ char block_meta_file_name[EXRTO_FILE_PATH_LEN];
+ struct dirent *de = NULL;
+ struct stat st;
+
+ rc = snprintf_s(block_meta_file_dir, EXRTO_FILE_PATH_LEN, EXRTO_FILE_PATH_LEN - 1, "./%s/%s",
+ EXRTO_FILE_DIR, EXRTO_BLOCK_INFO_SUB_DIR);
+ securec_check_ss(rc, "\0", "\0");
+
+ DIR *dir = opendir(block_meta_file_dir);
+ while ((dir != NULL) && (de = gs_readdir(dir)) != NULL) {
+ if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) {
+ continue;
+ }
+ rc = snprintf_s(block_meta_file_name, EXRTO_FILE_PATH_LEN, EXRTO_FILE_PATH_LEN - 1, "%s/%s",
+ block_meta_file_dir, de->d_name);
+ securec_check_ss(rc, "\0", "\0");
+ if (lstat(block_meta_file_name, &st) != 0) {
+ continue;
+ }
+ block_meta_file_num++;
+ block_meta_file_size = block_meta_file_size + (uint64)st.st_size;
+ }
+
+ values[ARG_0] = TransactionIdGetDatum(basepage_file_num);
+ values[ARG_1] = TransactionIdGetDatum(basepage_file_size);
+ values[ARG_2] = TransactionIdGetDatum(lsn_file_num);
+ values[ARG_3] = TransactionIdGetDatum(lsn_file_size);
+ values[ARG_4] = TransactionIdGetDatum(block_meta_file_num);
+ values[ARG_5] = TransactionIdGetDatum(block_meta_file_size);
+
+ tuple = heap_form_tuple(tupdesc, values, nulls);
+ PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
+}
+
diff --git a/src/gausskernel/storage/access/transam/extreme_rto/Makefile b/src/gausskernel/storage/access/transam/extreme_rto/Makefile
index 6b4b4968ec027b9c3970557afeab2372680004bf..06e25e75f8bdede59c7d1e269138e99f9f790763 100644
--- a/src/gausskernel/storage/access/transam/extreme_rto/Makefile
+++ b/src/gausskernel/storage/access/transam/extreme_rto/Makefile
@@ -26,6 +26,6 @@ top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
OBJS = dispatcher.o page_redo.o posix_semaphore.o redo_item.o \
- spsc_blocking_queue.o txn_redo.o batch_redo.o xlog_read.o
+ spsc_blocking_queue.o txn_redo.o batch_redo.o xlog_read.o exrto_recycle.o
include $(top_srcdir)/src/gausskernel/common.mk
diff --git a/src/gausskernel/storage/access/transam/extreme_rto/batch_redo.cpp b/src/gausskernel/storage/access/transam/extreme_rto/batch_redo.cpp
index e45dff0b6fc2bcc4fbc4c1e6c68d1aa18477beb1..c81eb9f5e3b0744dbcdd17999a7a660412511574 100644
--- a/src/gausskernel/storage/access/transam/extreme_rto/batch_redo.cpp
+++ b/src/gausskernel/storage/access/transam/extreme_rto/batch_redo.cpp
@@ -44,25 +44,6 @@
#include "access/xlogproc.h"
namespace extreme_rto {
-static inline void PRXLogRecGetBlockTag(XLogRecParseState *recordBlockState, RelFileNode *rnode, BlockNumber *blknum,
- ForkNumber *forknum)
-{
- XLogBlockParse *blockparse = &(recordBlockState->blockparse);
-
- if (rnode != NULL) {
- rnode->dbNode = blockparse->blockhead.dbNode;
- rnode->relNode = blockparse->blockhead.relNode;
- rnode->spcNode = blockparse->blockhead.spcNode;
- rnode->bucketNode = blockparse->blockhead.bucketNode;
- rnode->opt = blockparse->blockhead.opt;
- }
- if (blknum != NULL) {
- *blknum = blockparse->blockhead.blkno;
- }
- if (forknum != NULL) {
- *forknum = blockparse->blockhead.forknum;
- }
-}
void PRInitRedoItemEntry(RedoItemHashEntry *redoItemHashEntry)
{
diff --git a/src/gausskernel/storage/access/transam/extreme_rto/dispatcher.cpp b/src/gausskernel/storage/access/transam/extreme_rto/dispatcher.cpp
index 9a1daeb2b7b9af5dae7b7e66e99992d0f7a39afe..61e853c84999f9cf6e5abcf296af26f3a992fabf 100755
--- a/src/gausskernel/storage/access/transam/extreme_rto/dispatcher.cpp
+++ b/src/gausskernel/storage/access/transam/extreme_rto/dispatcher.cpp
@@ -391,7 +391,7 @@ void SSAllocRecordReadBuffer(XLogReaderState *xlogreader, uint32 privateLen)
#endif
}
-void HandleStartupInterruptsForExtremeRto()
+void StartupInterruptsForExtremeRto()
{
Assert(AmStartupProcess());
@@ -400,7 +400,7 @@ void HandleStartupInterruptsForExtremeRto()
uint32 triggeredstate = pg_atomic_read_u32(&(g_startupTriggerState));
if (triggeredstate != newtriggered) {
ereport(LOG, (errmodule(MOD_REDO), errcode(ERRCODE_LOG),
- errmsg("HandleStartupInterruptsForExtremeRto:g_startupTriggerState set from %u to %u",
+ errmsg("StartupInterruptsForExtremeRto:g_startupTriggerState set from %u to %u",
triggeredstate, newtriggered)));
pg_atomic_write_u32(&(g_startupTriggerState), newtriggered);
}
@@ -452,9 +452,15 @@ void StartRecoveryWorkers(XLogReaderState *xlogreader, uint32 privateLen)
SpinLockAcquire(&(g_instance.comm_cxt.predo_cxt.rwlock));
g_instance.comm_cxt.predo_cxt.state = REDO_IN_PROGRESS;
SpinLockRelease(&(g_instance.comm_cxt.predo_cxt.rwlock));
+
+ Assert(g_instance.pid_cxt.exrto_recycler_pid == 0);
+ if (g_instance.attr.attr_storage.EnableHotStandby) {
+ g_instance.pid_cxt.exrto_recycler_pid = initialize_util_thread(EXRTO_RECYCLER);
+ }
+
on_shmem_exit(StopRecoveryWorkers, 0);
- g_dispatcher->oldStartupIntrruptFunc = RegisterRedoInterruptCallBack(HandleStartupInterruptsForExtremeRto);
+ g_dispatcher->oldStartupIntrruptFunc = RegisterRedoInterruptCallBack(StartupInterruptsForExtremeRto);
close_readFile_if_open();
}
@@ -563,6 +569,9 @@ static void StartPageRedoWorkers(uint32 totalThrdNum)
for (uint32 j = 0; j < batchWorkerPerMng; j++) {
RedoRoleInit(&(g_dispatcher->pageLines[i].redoThd[j]), tmpWorkers[workerCnt++], REDO_PAGE_WORKER, j,
isUndoSpaceWorker);
+ // start from 1 not 0
+ g_dispatcher->pageLines[i].redoThd[j]->standby_read_meta_info.batch_id = i + 1;
+ g_dispatcher->pageLines[i].redoThd[j]->standby_read_meta_info.redo_id = j + 1;
}
g_dispatcher->pageLines[i].redoThdNum = batchWorkerPerMng;
}
@@ -607,6 +616,10 @@ bool DispathCouldExit()
}
}
+ if (g_instance.pid_cxt.exrto_recycler_pid != 0) {
+ return false;
+ }
+
return true;
}
@@ -636,6 +649,17 @@ void SendSingalToPageWorker(int signal)
}
}
+void send_signal_to_eros_recycle_worker(int signal)
+{
+ if (g_instance.pid_cxt.exrto_recycler_pid != 0) {
+ int err = gs_signal_send(g_instance.pid_cxt.exrto_recycler_pid, signal);
+ if (err != 0) {
+ ereport(WARNING, (errmsg("Dispatch kill(pid %lu, signal %d) failed: \"%s\",",
+ g_instance.pid_cxt.exrto_recycler_pid, signal, gs_strerror(err))));
+ }
+ }
+}
+
/* Run from the dispatcher thread. */
static void StopRecoveryWorkers(int code, Datum arg)
{
@@ -643,6 +667,7 @@ static void StopRecoveryWorkers(int code, Datum arg)
errmsg("parallel redo workers are going to stop, code:%d, arg:%lu",
code, DatumGetUInt64(arg))));
SendSingalToPageWorker(SIGTERM);
+ send_signal_to_eros_recycle_worker(SIGTERM);
uint64 count = 0;
while (!DispathCouldExit()) {
@@ -1125,6 +1150,7 @@ static bool DispatchDataBaseRecord(XLogReaderState *record, List *expectedTLIs,
if (IsDataBaseDrop(record)) {
isNeedFullSync = true;
+ record->isFullSync = true;
RedoItem *item = GetRedoItemPtr(record);
ReferenceRedoItem(item);
@@ -1132,10 +1158,18 @@ static bool DispatchDataBaseRecord(XLogReaderState *record, List *expectedTLIs,
ReferenceRedoItem(item);
AddPageRedoItem(g_dispatcher->pageLines[i].batchThd, item);
}
- DereferenceRedoItem(item);
+ AddTxnRedoItem(g_dispatcher->trxnLine.managerThd, item);
} else {
/* database dir may impact many rel so need to sync to all pageworks */
- DispatchRecordWithoutPage(record, expectedTLIs);
+ record->isFullSync = true;
+ RedoItem *item = GetRedoItemPtr(record);
+
+ ReferenceRedoItem(item);
+ for (uint32 i = 0; i < g_dispatcher->pageLineNum; i++) {
+ ReferenceRedoItem(item);
+ AddPageRedoItem(g_dispatcher->pageLines[i].batchThd, item);
+ }
+ AddTxnRedoItem(g_dispatcher->trxnLine.managerThd, item);
g_dispatcher->needFullSyncCheckpoint = true;
}
@@ -1911,6 +1945,7 @@ void SendRecoveryEndMarkToWorkersAndWaitForFinish(int code)
errmsg("[REDO_LOG_TRACE]SendRecoveryEndMarkToWorkersAndWaitForFinish, ready to stop redo workers, code: %d",
code)));
if ((get_real_recovery_parallelism() > 1) && (GetBatchCount() > 0)) {
+ send_signal_to_eros_recycle_worker(SIGTERM);
WaitPageRedoWorkerReachLastMark(g_dispatcher->readLine.readPageThd);
PageRedoPipeline *pl = g_dispatcher->pageLines;
/* send end mark */
diff --git a/src/gausskernel/storage/access/transam/extreme_rto/exrto_recycle.cpp b/src/gausskernel/storage/access/transam/extreme_rto/exrto_recycle.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..30f94c2a3daf91106e8ba70411ecfcad6b11bbf8
--- /dev/null
+++ b/src/gausskernel/storage/access/transam/extreme_rto/exrto_recycle.cpp
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2020 Huawei Technologies Co.,Ltd.
+ *
+ * openGauss is licensed under Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *
+ * http://license.coscl.org.cn/MulanPSL2
+ *
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * -------------------------------------------------------------------------
+ *
+ * exrto_recycle.cpp
+ *
+ * clean thread for standby read on block level page redo
+ *
+ * IDENTIFICATION
+ * src/gausskernel/storage/access/transam/extreme_rto/exrto_recycle.cpp
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include "access/extreme_rto/page_redo.h"
+#include "access/extreme_rto/dispatcher.h"
+#include "access/extreme_rto/standby_read/lsn_info_meta.h"
+#include "access/multi_redo_api.h"
+#include "storage/ipc.h"
+#include "storage/smgr/smgr.h"
+#include "utils/memutils.h"
+
+namespace extreme_rto {
+static void exrto_recycle_sighup_handler(SIGNAL_ARGS)
+{
+ int save_errno = errno;
+ t_thrd.exrto_recycle_cxt.got_SIGHUP = true;
+ if (t_thrd.proc)
+ SetLatch(&t_thrd.proc->procLatch);
+ errno = save_errno;
+}
+
+static void exrto_recycle_shutdown_handler(SIGNAL_ARGS)
+{
+ int save_errno = errno;
+ t_thrd.exrto_recycle_cxt.shutdown_requested = true;
+ if (t_thrd.proc) {
+ SetLatch(&t_thrd.proc->procLatch);
+ }
+ errno = save_errno;
+}
+
+static void exrto_recycle_quick_die(SIGNAL_ARGS)
+{
+ int status = 2;
+ gs_signal_setmask(&t_thrd.libpq_cxt.BlockSig, NULL);
+ on_exit_reset();
+ proc_exit(status);
+}
+
+static void exrto_recycle_setup_signal_handlers()
+{
+ (void)gspqsignal(SIGHUP, exrto_recycle_sighup_handler);
+ (void)gspqsignal(SIGINT, SIG_IGN);
+ (void)gspqsignal(SIGTERM, exrto_recycle_shutdown_handler);
+ (void)gspqsignal(SIGQUIT, exrto_recycle_quick_die);
+ (void)gspqsignal(SIGALRM, SIG_IGN);
+ (void)gspqsignal(SIGPIPE, SIG_IGN);
+ (void)gspqsignal(SIGUSR1, SIG_IGN);
+ (void)gspqsignal(SIGUSR2, SIG_IGN);
+ (void)gspqsignal(SIGCHLD, SIG_IGN);
+ (void)gspqsignal(SIGTTIN, SIG_IGN);
+ (void)gspqsignal(SIGTTOU, SIG_IGN);
+ (void)gspqsignal(SIGCONT, SIG_IGN);
+ (void)gspqsignal(SIGWINCH, SIG_IGN);
+
+ gs_signal_setmask(&t_thrd.libpq_cxt.UnBlockSig, NULL);
+ (void)gs_signal_unblock_sigusr2();
+}
+
+static void handle_exrto_recycle_shutdown()
+{
+ ereport(LOG, (errmsg("exrto recycle exit for request")));
+ ResourceOwnerRelease(t_thrd.utils_cxt.CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true);
+ proc_exit(0);
+}
+
+static void exrto_recycle_wait()
+{
+ int rc = 0;
+ rc = WaitLatch(&t_thrd.proc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, 1000L); /* 1s */
+ /* Clear any already-pending wakeups */
+ ResetLatch(&t_thrd.proc->procLatch);
+ if (((unsigned int)rc) & WL_POSTMASTER_DEATH) {
+ gs_thread_exit(1);
+ }
+}
+
+bool check_if_need_force_recycle()
+{
+ uint32 worker_nums = g_dispatcher->allWorkersCnt;
+ PageRedoWorker** workers = g_dispatcher->allWorkers;
+ int64 total_base_page_size = 0;
+ int64 total_lsn_info_size = 0;
+ double ratio = g_instance.attr.attr_storage.standby_force_recyle_ratio;
+
+ for (uint32 i = 0; i < worker_nums; ++i) {
+ PageRedoWorker* page_redo_worker = workers[i];
+ StandbyReadMetaInfo meta_info = page_redo_worker->standby_read_meta_info;
+ if (page_redo_worker->role != REDO_PAGE_WORKER) {
+ continue;
+ }
+ total_base_page_size += (meta_info.base_page_next_position - meta_info.base_page_recyle_position);
+ total_lsn_info_size += (meta_info.lsn_table_next_position - meta_info.lsn_table_recyle_position);
+ }
+
+ if (total_base_page_size > g_instance.attr.attr_storage.max_standby_base_page_size * ratio ||
+ total_lsn_info_size > g_instance.attr.attr_storage.max_standby_lsn_info_size * ratio) {
+ return true;
+ }
+
+ return false;
+}
+
+void do_standby_read_recyle(XLogRecPtr recycle_lsn)
+{
+ uint32 worker_nums = g_dispatcher->allWorkersCnt;
+ PageRedoWorker** workers = g_dispatcher->allWorkers;
+ XLogRecPtr min_recycle_lsn = InvalidXLogRecPtr;
+ for (uint32 i = 0; i < worker_nums; ++i) {
+ PageRedoWorker* page_redo_worker = workers[i];
+ if (page_redo_worker->role != REDO_PAGE_WORKER) {
+ continue;
+ }
+ extreme_rto_standby_read::standby_read_recyle_per_workers(&page_redo_worker->standby_read_meta_info, recycle_lsn);
+ if (XLogRecPtrIsInvalid(min_recycle_lsn) ||
+ XLByteLT(page_redo_worker->standby_read_meta_info.recycle_lsn_per_worker, min_recycle_lsn)) {
+ min_recycle_lsn = page_redo_worker->standby_read_meta_info.recycle_lsn_per_worker;
+ }
+ }
+ if (XLByteLT(g_instance.comm_cxt.predo_cxt.global_recycle_lsn, min_recycle_lsn)) {
+ pg_atomic_write_u64(&g_instance.comm_cxt.predo_cxt.global_recycle_lsn, min_recycle_lsn);
+ ereport(LOG,
+ (errmsg(EXRTOFORMAT("[exrto_recycle] update global recycle lsn: %08X/%08X"),
+ (uint32)(min_recycle_lsn >> UINT64_HALF), (uint32)min_recycle_lsn)));
+ }
+}
+
+void exrto_recycle_interrupt()
+{
+ if (t_thrd.exrto_recycle_cxt.got_SIGHUP) {
+ t_thrd.exrto_recycle_cxt.got_SIGHUP = false;
+ ProcessConfigFile(PGC_SIGHUP);
+ }
+
+ if (t_thrd.exrto_recycle_cxt.shutdown_requested) {
+ handle_exrto_recycle_shutdown();
+ }
+}
+
+void exrto_recycle_main()
+{
+ t_thrd.utils_cxt.CurrentResourceOwner = ResourceOwnerCreate(NULL, "exrto recycler",
+ THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE));
+ MemoryContext exrto_recycle_context = AllocSetContextCreate(t_thrd.top_mem_cxt,
+ "Exrto Recycler",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+ (void)MemoryContextSwitchTo(exrto_recycle_context);
+
+ ereport(LOG, (errmsg("exrto recycle started")));
+ exrto_recycle_setup_signal_handlers();
+
+ /*
+ * Unblock signals (they were blocked when the postmaster forked us)
+ */
+ gs_signal_setmask(&t_thrd.libpq_cxt.UnBlockSig, NULL);
+ (void)gs_signal_unblock_sigusr2();
+
+ pgstat_report_appname("exrto recycler");
+ pgstat_report_activity(STATE_IDLE, NULL);
+
+ bool need_force_recyle = false;
+ int sleep_count = 0;
+ RegisterRedoInterruptCallBack(exrto_recycle_interrupt);
+
+ if (pmState == PM_RUN && isDirExist(EXRTO_FILE_DIR)) {
+ buffer_drop_exrto_standby_read_buffers();
+ exrto_clean_dir();
+ }
+ if (isDirExist(EXRTO_OLD_FILE_DIR)) {
+ exrto_recycle_old_dir();
+ ereport(LOG, (errmsg("exrto recycle: clear standby_read_old dir success")));
+ } else {
+ ereport(LOG, (errmsg("exrto recycle: standby_read_old dir not exist")));
+ }
+
+ if (!IS_EXRTO_READ || !RecoveryInProgress()) {
+ ereport(LOG,
+ (errmsg("exrto recycle is available only when exrto standby read is supported")));
+ handle_exrto_recycle_shutdown();
+ }
+ while (true) {
+ RedoInterruptCallBack();
+ exrto_recycle_wait();
+ ++sleep_count;
+
+ /*
+ * standby_recycle_interval = 0 means do not recyle
+ */
+ if (g_instance.attr.attr_storage.standby_recycle_interval == 0) {
+ continue;
+ }
+
+ need_force_recyle = check_if_need_force_recycle();
+ if (!need_force_recyle && sleep_count < g_instance.attr.attr_storage.standby_recycle_interval) {
+ continue;
+ }
+
+ sleep_count = 0;
+
+ XLogRecPtr recycle_lsn = exrto_calculate_recycle_position(need_force_recyle);
+ if (XLogRecPtrIsInvalid(recycle_lsn)) {
+ continue;
+ }
+
+ do_standby_read_recyle(recycle_lsn);
+ smgrcloseall();
+ MemoryContextResetAndDeleteChildren(exrto_recycle_context);
+ }
+ handle_exrto_recycle_shutdown();
+}
+} /* namespace extreme_rto */
\ No newline at end of file
diff --git a/src/gausskernel/storage/access/transam/extreme_rto/page_redo.cpp b/src/gausskernel/storage/access/transam/extreme_rto/page_redo.cpp
index e2b656aaa1bbbc2b11d4d7507f3476ad6a95a1aa..24f8a54f046ed0e95e27e9420075e7bacf551b4d 100755
--- a/src/gausskernel/storage/access/transam/extreme_rto/page_redo.cpp
+++ b/src/gausskernel/storage/access/transam/extreme_rto/page_redo.cpp
@@ -51,6 +51,7 @@
#include "storage/smgr/relfilenode_hash.h"
#include "storage/standby.h"
#include "storage/pmsignal.h"
+#include "storage/procarray.h"
#include "utils/guc.h"
#include "utils/palloc.h"
#include "portability/instr_time.h"
@@ -63,6 +64,7 @@
#include "commands/tablespace.h"
#include "access/extreme_rto/page_redo.h"
#include "access/extreme_rto/dispatcher.h"
+#include "access/extreme_rto/standby_read/lsn_info_meta.h"
#include "access/extreme_rto/txn_redo.h"
#include "access/extreme_rto/xlog_read.h"
#include "pgstat.h"
@@ -183,6 +185,9 @@ void RedoWorkerQueueCallBack()
bool RedoWorkerIsUndoSpaceWorker()
{
+ if (g_redoWorker == NULL) {
+ return false;
+ }
return g_redoWorker->isUndoSpaceWorker;
}
@@ -562,8 +567,6 @@ bool BatchRedoDistributeItems(void **eleArry, uint32 eleNum)
BatchRedoProcLsnForwarder((RedoItem *)eleArry[i]);
} else if (eleArry[i] == (void *)&g_cleanupMark) {
BatchRedoProcCleanupMark((RedoItem *)eleArry[i]);
- } else if (eleArry[i] == (void *)&g_closefdMark) {
- smgrcloseall();
} else if (eleArry[i] == (void *)&g_cleanInvalidPageMark) {
forget_range_invalid_pages((void *)eleArry[i]);
} else {
@@ -639,26 +642,21 @@ void RedoPageManagerDistributeToAllOneBlock(XLogRecParseState *ddlParseState)
}
}
-void RedoPageManagerDistributeBlockRecord(HTAB *redoItemHash, XLogRecParseState *parsestate)
+void RedoPageManagerDistributeBlockRecord(XLogRecParseState *record_block_state)
{
PageRedoPipeline *myRedoLine = &g_dispatcher->pageLines[g_redoWorker->slotId];
const uint32 WorkerNumPerMng = myRedoLine->redoThdNum;
- HASH_SEQ_STATUS status;
- RedoItemHashEntry *redoItemEntry = NULL;
- HTAB *curMap = redoItemHash;
- hash_seq_init(&status, curMap);
-
- while ((redoItemEntry = (RedoItemHashEntry *)hash_seq_search(&status)) != NULL) {
- uint32 workId = GetWorkerId(&redoItemEntry->redoItemTag, WorkerNumPerMng);
- AddPageRedoItem(myRedoLine->redoThd[workId], redoItemEntry->head);
+ uint32 work_id;
+ RelFileNode rel_node;
+ ForkNumber fork_num;
+ BlockNumber blk_no;
+ RedoItemTag redo_item_tag;
- if (hash_search(curMap, (void *)&redoItemEntry->redoItemTag, HASH_REMOVE, NULL) == NULL)
- ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("hash table corrupted")));
- }
-
- if (parsestate != NULL) {
- RedoPageManagerDistributeToAllOneBlock(parsestate);
- }
+ PRXLogRecGetBlockTag(record_block_state, &rel_node, &blk_no, &fork_num);
+ INIT_REDO_ITEM_TAG(redo_item_tag, rel_node, fork_num, blk_no);
+ work_id = GetWorkerId(&redo_item_tag, WorkerNumPerMng);
+ record_block_state->nextrecord = NULL;
+ AddPageRedoItem(myRedoLine->redoThd[work_id], record_block_state);
}
void WaitCurrentPipeLineRedoWorkersQueueEmpty()
@@ -762,13 +760,24 @@ void RedoPageManagerSyncDdlAction(XLogRecParseState *parsestate)
XLogBlockParseStateRelease(parsestate);
}
-void RedoPageManagerDoDropAction(XLogRecParseState *parsestate, HTAB *hashMap)
+void RedoPageManagerDoDatabaseAction(XLogRecParseState *parsestate)
{
- XLogRecParseState *newState = XLogParseBufferCopy(parsestate);
- PRTrackClearBlock(newState, hashMap);
- RedoPageManagerDistributeBlockRecord(hashMap, parsestate);
+ RedoPageManagerDistributeToAllOneBlock(parsestate);
WaitCurrentPipeLineRedoWorkersQueueEmpty();
- RedoPageManagerSyncDdlAction(parsestate);
+ RedoPageManagerSmgrClose(parsestate);
+
+ bool need_wait = parsestate->isFullSync;
+ if (need_wait) {
+ pg_atomic_write_u32(&g_redoWorker->fullSyncFlag, 1);
+ }
+ parsestate->nextrecord = NULL;
+ XLogBlockParseStateRelease(parsestate);
+
+ uint32 val = pg_atomic_read_u32(&g_redoWorker->fullSyncFlag);
+ while (val != 0) {
+ RedoInterruptCallBack();
+ val = pg_atomic_read_u32(&g_redoWorker->fullSyncFlag);
+ }
}
void RedoPageManagerDoSmgrAction(XLogRecParseState *recordblockstate)
@@ -790,16 +799,14 @@ void RedoPageManagerDoSmgrAction(XLogRecParseState *recordblockstate)
XLogBlockParseStateRelease(recordblockstate);
}
-void RedoPageManagerDoDataTypeAction(XLogRecParseState *parsestate, HTAB *hashMap)
+void RedoPageManagerDoDataTypeAction(XLogRecParseState *parsestate)
{
XLogBlockDdlParse *ddlrecparse = NULL;
XLogBlockParseGetDdlParse(parsestate, ddlrecparse);
if (ddlrecparse->blockddltype == BLOCK_DDL_DROP_RELNODE ||
ddlrecparse->blockddltype == BLOCK_DDL_TRUNCATE_RELNODE) {
- XLogRecParseState *newState = XLogParseBufferCopy(parsestate);
- PRTrackClearBlock(newState, hashMap);
- RedoPageManagerDistributeBlockRecord(hashMap, parsestate);
+ RedoPageManagerDistributeToAllOneBlock(parsestate);
WaitCurrentPipeLineRedoWorkersQueueEmpty();
}
@@ -839,10 +846,10 @@ void PageManagerProcCleanupMark(RedoItem *cleanupMark)
ereport(LOG, (errcode(ERRCODE_LOG), errmsg("[ForceFinish]PageManagerProcCleanupMark has cleaned InvalidPages")));
}
-void PageManagerProcCheckPoint(HTAB *hashMap, XLogRecParseState *parseState)
+void PageManagerProcCheckPoint(XLogRecParseState *parseState)
{
Assert(IsCheckPoint(parseState));
- RedoPageManagerDistributeBlockRecord(hashMap, parseState);
+ RedoPageManagerDistributeToAllOneBlock(parseState);
bool needWait = parseState->isFullSync;
if (needWait) {
pg_atomic_write_u32(&g_redoWorker->fullSyncFlag, 1);
@@ -865,9 +872,8 @@ void PageManagerProcCheckPoint(HTAB *hashMap, XLogRecParseState *parseState)
}
}
-void PageManagerProcCreateTableSpace(HTAB *hashMap, XLogRecParseState *parseState)
+void PageManagerProcCreateTableSpace(XLogRecParseState *parseState)
{
- RedoPageManagerDistributeBlockRecord(hashMap, NULL);
bool needWait = parseState->isFullSync;
if (needWait) {
pg_atomic_write_u32(&g_redoWorker->fullSyncFlag, 1);
@@ -881,16 +887,14 @@ void PageManagerProcCreateTableSpace(HTAB *hashMap, XLogRecParseState *parseStat
}
}
-void PageManagerProcSegFullSyncState(HTAB *hashMap, XLogRecParseState *parseState)
+void PageManagerProcSegFullSyncState(XLogRecParseState *parseState)
{
- RedoPageManagerDistributeBlockRecord(hashMap, NULL);
WaitCurrentPipeLineRedoWorkersQueueEmpty();
RedoPageManagerSyncDdlAction(parseState);
}
-void PageManagerProcSegPipeLineSyncState(HTAB *hashMap, XLogRecParseState *parseState)
+void PageManagerProcSegPipeLineSyncState(XLogRecParseState *parseState)
{
- RedoPageManagerDistributeBlockRecord(hashMap, NULL);
WaitCurrentPipeLineRedoWorkersQueueEmpty();
MemoryContext oldCtx = MemoryContextSwitchTo(g_redoWorker->oldCtx);
@@ -917,40 +921,38 @@ static void WaitNextBarrier(XLogRecParseState *parseState)
void PageManagerRedoParseState(XLogRecParseState *preState)
{
- HTAB *hashMap = g_dispatcher->pageLines[g_redoWorker->slotId].managerThd->redoItemHash;
-
switch (preState->blockparse.blockhead.block_valid) {
case BLOCK_DATA_MAIN_DATA_TYPE:
case BLOCK_DATA_UNDO_TYPE:
case BLOCK_DATA_VM_TYPE:
case BLOCK_DATA_FSM_TYPE:
GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_3]);
- PRTrackAddBlock(preState, hashMap);
+ RedoPageManagerDistributeBlockRecord(preState);
CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_3]);
break;
case BLOCK_DATA_DDL_TYPE:
GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_4]);
- RedoPageManagerDoDataTypeAction(preState, hashMap);
+ RedoPageManagerDoDataTypeAction(preState);
CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_4]);
break;
case BLOCK_DATA_SEG_EXTEND:
GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_4]);
- PageManagerProcSegPipeLineSyncState(hashMap, preState);
+ PageManagerProcSegPipeLineSyncState(preState);
CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_4]);
break;
+ case BLOCK_DATA_CREATE_DATABASE_TYPE:
case BLOCK_DATA_DROP_DATABASE_TYPE:
GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_5]);
- RedoPageManagerDoDropAction(preState, hashMap);
+ RedoPageManagerDoDatabaseAction(preState);
CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_5]);
break;
case BLOCK_DATA_DROP_TBLSPC_TYPE:
/* just make sure any other ddl before drop tblspc is done */
XLogBlockParseStateRelease(preState);
break;
- case BLOCK_DATA_CREATE_DATABASE_TYPE:
case BLOCK_DATA_SEG_FILE_EXTEND_TYPE:
GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_6]);
- RedoPageManagerDistributeBlockRecord(hashMap, NULL);
+ RedoPageManagerDistributeBlockRecord(NULL);
/* wait until queue empty */
WaitCurrentPipeLineRedoWorkersQueueEmpty();
/* do atcual action */
@@ -959,31 +961,30 @@ void PageManagerRedoParseState(XLogRecParseState *preState)
break;
case BLOCK_DATA_SEG_FULL_SYNC_TYPE:
GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_8]);
- PageManagerProcSegFullSyncState(hashMap, preState);
+ PageManagerProcSegFullSyncState(preState);
CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_8]);
break;
case BLOCK_DATA_CREATE_TBLSPC_TYPE:
GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_7]);
- PageManagerProcCreateTableSpace(hashMap, preState);
+ PageManagerProcCreateTableSpace(preState);
CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_7]);
break;
case BLOCK_DATA_XLOG_COMMON_TYPE:
- PageManagerProcCheckPoint(hashMap, preState);
+ PageManagerProcCheckPoint(preState);
break;
case BLOCK_DATA_NEWCU_TYPE:
- RedoPageManagerDistributeBlockRecord(hashMap, NULL);
PageManagerDistributeBcmBlock(preState);
break;
case BLOCK_DATA_SEG_SPACE_DROP:
case BLOCK_DATA_SEG_SPACE_SHRINK:
GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_8]);
- RedoPageManagerDistributeBlockRecord(hashMap, preState);
+ RedoPageManagerDistributeToAllOneBlock(preState);
WaitCurrentPipeLineRedoWorkersQueueEmpty();
RedoPageManagerSyncDdlAction(preState);
CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_8]);
break;
case BLOCK_DATA_BARRIER_TYPE:
- RedoPageManagerDistributeBlockRecord(hashMap, preState);
+ RedoPageManagerDistributeToAllOneBlock(preState);
WaitNextBarrier(preState);
break;
default:
@@ -992,71 +993,60 @@ void PageManagerRedoParseState(XLogRecParseState *preState)
}
}
-bool PageManagerRedoDistributeItems(void **eleArry, uint32 eleNum)
+bool PageManagerRedoDistributeItems(XLogRecParseState *record_block_state)
{
- HTAB *hashMap = g_dispatcher->pageLines[g_redoWorker->slotId].managerThd->redoItemHash;
+ if (record_block_state == (void *)&g_redoEndMark) {
+ return true;
+ } else if (record_block_state == (void *)&g_GlobalLsnForwarder) {
+ PageManagerProcLsnForwarder((RedoItem *) record_block_state);
+ return false;
+ } else if (record_block_state == (void *)&g_cleanupMark) {
+ PageManagerProcCleanupMark((RedoItem *) record_block_state);
+ return false;
+ } else if (record_block_state == (void *)&g_cleanInvalidPageMark) {
+ forget_range_invalid_pages((void *)record_block_state);
+ return false;
+ }
- for (uint32 i = 0; i < eleNum; i++) {
- if (eleArry[i] == (void *)&g_redoEndMark) {
- RedoPageManagerDistributeBlockRecord(hashMap, NULL);
- return true;
- } else if (eleArry[i] == (void *)&g_GlobalLsnForwarder) {
- RedoPageManagerDistributeBlockRecord(hashMap, NULL);
- PageManagerProcLsnForwarder((RedoItem *)eleArry[i]);
- continue;
- } else if (eleArry[i] == (void *)&g_cleanupMark) {
- PageManagerProcCleanupMark((RedoItem *)eleArry[i]);
- continue;
- } else if (eleArry[i] == (void *)&g_closefdMark) {
- smgrcloseall();
- continue;
- } else if (eleArry[i] == (void *)&g_cleanInvalidPageMark) {
- forget_range_invalid_pages((void *)eleArry[i]);
- continue;
- }
- XLogRecParseState *recordblockstate = (XLogRecParseState *)eleArry[i];
- XLogRecParseState *nextState = recordblockstate;
- do {
- XLogRecParseState *preState = nextState;
- nextState = (XLogRecParseState *)nextState->nextrecord;
- preState->nextrecord = NULL;
+ XLogRecParseState *next_state = record_block_state;
+ do {
+ XLogRecParseState *pre_state = next_state;
+ next_state = (XLogRecParseState *)next_state->nextrecord;
+ pre_state->nextrecord = NULL;
#ifdef ENABLE_UT
- TestXLogRecParseStateEventProbe(UTEST_EVENT_RTO_PAGEMGR_REDO_BEFORE_DISTRIBUTE_ITEMS,
- __FUNCTION__, preState);
+ TestXLogRecParseStateEventProbe(UTEST_EVENT_RTO_PAGEMGR_REDO_BEFORE_DISTRIBUTE_ITEMS,
+ __FUNCTION__, pre_state);
#endif
- PageManagerRedoParseState(preState);
+ PageManagerRedoParseState(pre_state);
#ifdef ENABLE_UT
- TestXLogRecParseStateEventProbe(UTEST_EVENT_RTO_PAGEMGR_REDO_AFTER_DISTRIBUTE_ITEMS,
- __FUNCTION__, preState);
+ TestXLogRecParseStateEventProbe(UTEST_EVENT_RTO_PAGEMGR_REDO_AFTER_DISTRIBUTE_ITEMS,
+ __FUNCTION__, pre_state);
#endif
- } while (nextState != NULL);
- }
- GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_9]);
- RedoPageManagerDistributeBlockRecord(hashMap, NULL);
- CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_9]);
+ } while (next_state != NULL);
+
return false;
}
void RedoPageManagerMain()
{
- void **eleArry;
- uint32 eleNum;
+ XLogRecParseState *record_block_state;
+ bool is_end;
(void)RegisterRedoInterruptCallBack(HandlePageRedoInterrupts);
- g_redoWorker->redoItemHash = PRRedoItemHashInitialize(g_redoWorker->oldCtx);
XLogParseBufferInitFunc(&(g_redoWorker->parseManager), MAX_PARSE_BUFF_NUM, &recordRefOperate,
RedoInterruptCallBack);
GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_1]);
- while (SPSCBlockingQueueGetAll(g_redoWorker->queue, &eleArry, &eleNum)) {
- CountAndGetRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_1], g_redoWorker->timeCostList[TIME_COST_STEP_2]);
- bool isEnd = PageManagerRedoDistributeItems(eleArry, eleNum);
- SPSCBlockingQueuePopN(g_redoWorker->queue, eleNum);
- CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_2]);
- if (isEnd)
- break;
-
+ while (true) {
+ if (!SPSCBlockingQueueIsEmpty(g_redoWorker->queue)) {
+ record_block_state = (XLogRecParseState *)SPSCBlockingQueueTake(g_redoWorker->queue);
+ CountAndGetRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_1], g_redoWorker->timeCostList[TIME_COST_STEP_2]);
+ is_end = PageManagerRedoDistributeItems(record_block_state);
+ CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_2]);
+ if (is_end)
+ break;
+ }
RedoInterruptCallBack();
ADD_ABNORMAL_POSITION(5);
GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_1]);
@@ -1105,7 +1095,8 @@ bool TrxnManagerDistributeItemsBeforeEnd(RedoItem *item)
} else {
GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_4]);
if (IsCheckPoint(&item->record) || IsTableSpaceDrop(&item->record) || IsTableSpaceCreate(&item->record) ||
- (IsXactXlog(&item->record) && XactWillRemoveRelFiles(&item->record)) || IsBarrierRelated(&item->record)) {
+ (IsXactXlog(&item->record) && XactWillRemoveRelFiles(&item->record)) || IsBarrierRelated(&item->record) ||
+ IsDataBaseDrop(&item->record) || IsDataBaseCreate(&item->record)) {
uint32 relCount;
do {
RedoInterruptCallBack();
@@ -1200,6 +1191,11 @@ void TrxnManagerMain()
void TrxnWorkerProcLsnForwarder(RedoItem *lsnForwarder)
{
SetCompletedReadEndPtr(g_redoWorker, lsnForwarder->record.ReadRecPtr, lsnForwarder->record.EndRecPtr);
+ uint32 refcout = pg_atomic_read_u32(&lsnForwarder->record.refcount);
+ while (refcout > 1) {
+ refcout = pg_atomic_read_u32(&lsnForwarder->record.refcount);
+ RedoInterruptCallBack();
+ }
(void)pg_atomic_sub_fetch_u32(&lsnForwarder->record.refcount, 1);
}
@@ -1258,13 +1254,11 @@ void TrxnWorkMain()
if ((void *)item == (void *)&g_GlobalLsnForwarder) {
TrxnWorkerProcLsnForwarder((RedoItem *)item);
SPSCBlockingQueuePop(g_redoWorker->queue);
- } else if ((void *)item == (void *)&g_cleanupMark) {
+ exrto_generate_snapshot(g_redoWorker->lastReplayedReadRecPtr);
+ } else if (unlikely((void *)item == (void *)&g_cleanupMark)) {
TrxnWorkrProcCleanupMark((RedoItem *)item);
SPSCBlockingQueuePop(g_redoWorker->queue);
- } else if ((void *)item == (void *)&g_closefdMark) {
- smgrcloseall();
- SPSCBlockingQueuePop(g_redoWorker->queue);
- } else if ((void *)item == (void *)&g_cleanInvalidPageMark) {
+ } else if (unlikely((void *)item == (void *)&g_cleanInvalidPageMark)) {
forget_range_invalid_pages((void *)item);
SPSCBlockingQueuePop(g_redoWorker->queue);
} else {
@@ -1283,6 +1277,12 @@ void TrxnWorkMain()
TrxnWorkNotifyRedoWorker();
}
+ if (IsCheckPoint(&item->record) || (IsXactXlog(&item->record) &&
+ XactWillRemoveRelFiles(&item->record)) || IsBarrierRelated(&item->record) ||
+ IsDataBaseDrop(&item->record)) {
+ exrto_generate_snapshot(g_redoWorker->lastReplayedEndRecPtr);
+ }
+
if (XactHasSegpageRelFiles(&item->record)) {
uint32 expected = 1;
pg_atomic_compare_exchange_u32((volatile uint32 *)&(g_dispatcher->segpageXactDoneFlag), &expected, 0);
@@ -1415,12 +1415,6 @@ void RedoPageWorkerMain()
continue;
}
- if ((void *)redoblockstateHead == (void *)&g_closefdMark) {
- smgrcloseall();
- SPSCBlockingQueuePop(g_redoWorker->queue);
- continue;
- }
-
if ((void *)redoblockstateHead == (void *)&g_cleanInvalidPageMark) {
forget_range_invalid_pages((void *)redoblockstateHead);
SPSCBlockingQueuePop(g_redoWorker->queue);
@@ -1761,26 +1755,26 @@ void DispatchCleanupMarkToAllRedoWorker()
}
}
-void DispatchClosefdMarkToAllRedoWorker()
+void DispatchCleanInvalidPageMarkToAllRedoWorker(RepairFileKey key)
{
for (uint32 i = 0; i < g_dispatcher->allWorkersCnt; i++) {
PageRedoWorker *worker = g_dispatcher->allWorkers[i];
- if (worker->role == REDO_PAGE_WORKER || worker->role == REDO_PAGE_MNG ||
- worker->role == REDO_TRXN_MNG || worker->role == REDO_TRXN_WORKER) {
- SPSCBlockingQueuePut(worker->queue, &g_closefdMark);
+ if (worker->role == REDO_PAGE_WORKER) {
+ errno_t rc = memcpy_s((char*)&g_cleanInvalidPageMark,
+ sizeof(RepairFileKey), (char*)&key, sizeof(RepairFileKey));
+ securec_check(rc, "", "");
+ SPSCBlockingQueuePut(worker->queue, &g_cleanInvalidPageMark);
}
}
}
-void DispatchCleanInvalidPageMarkToAllRedoWorker(RepairFileKey key)
+void DispatchClosefdMarkToAllRedoWorker()
{
for (uint32 i = 0; i < g_dispatcher->allWorkersCnt; i++) {
PageRedoWorker *worker = g_dispatcher->allWorkers[i];
- if (worker->role == REDO_PAGE_WORKER) {
- errno_t rc = memcpy_s((char*)&g_cleanInvalidPageMark,
- sizeof(RepairFileKey), (char*)&key, sizeof(RepairFileKey));
- securec_check(rc, "", "");
- SPSCBlockingQueuePut(worker->queue, &g_cleanInvalidPageMark);
+ if (worker->role == REDO_PAGE_WORKER || worker->role == REDO_PAGE_MNG ||
+ worker->role == REDO_TRXN_MNG || worker->role == REDO_TRXN_WORKER) {
+ SPSCBlockingQueuePut(worker->queue, &g_closefdMark);
}
}
}
@@ -2492,6 +2486,7 @@ void ParallelRedoThreadMain()
ParallelRedoThreadRegister();
ereport(LOG, (errmsg("Page-redo-worker thread %u started, role:%u, slotId:%u.", g_redoWorker->id,
g_redoWorker->role, g_redoWorker->slotId)));
+ t_thrd.page_redo_cxt.redo_worker_ptr = g_redoWorker;
// regitster default interrupt call back
(void)RegisterRedoInterruptCallBack(HandlePageRedoInterrupts);
SetupSignalHandlers();
@@ -3011,4 +3006,328 @@ void SeqCheckRemoteReadAndRepairPage()
}
}
+void exrto_generate_snapshot(XLogRecPtr trxn_lsn)
+{
+ if (!g_instance.attr.attr_storage.EnableHotStandby) {
+ return;
+ }
+
+ ExrtoSnapshot exrto_snapshot = &g_dispatcher->exrto_snapshot;
+ /*
+ * do not generate the same snapshot repeatedly.
+ */
+ if (XLByteLE(trxn_lsn, exrto_snapshot->read_lsn)) {
+ return;
+ }
+
+ if (XLogRecPtrIsInvalid(t_thrd.xlog_cxt.minRecoveryPoint)) {
+ return;
+ }
+ if (XLByteLT(trxn_lsn, exrto_snapshot->read_lsn)) {
+ return;
+ }
+
+ SnapshotData snapshot;
+
+ (void)GetSnapshotData(&snapshot, false);
+
+ Assert(snapshot.takenDuringRecovery);
+ (void)LWLockAcquire(ExrtoSnapshotLock, LW_EXCLUSIVE);
+ exrto_snapshot->snapshot_csn = snapshot.snapshotcsn;
+ exrto_snapshot->xmin = snapshot.xmin;
+ exrto_snapshot->xmax = snapshot.xmax;
+ exrto_snapshot->read_lsn = trxn_lsn;
+ exrto_snapshot->gen_snap_time = GetCurrentTimestamp();
+ LWLockRelease(ExrtoSnapshotLock);
+}
+
+void exrto_read_snapshot(Snapshot snapshot)
+{
+ if (t_thrd.role != WORKER && t_thrd.role != THREADPOOL_WORKER) {
+ return;
+ }
+
+ if (g_dispatcher == NULL) {
+ ereport(ERROR,
+ (errmsg("g_dispatcher is not init")));;
+ }
+
+ ExrtoSnapshot exrto_snapshot = &g_dispatcher->exrto_snapshot;
+ (void)LWLockAcquire(ExrtoSnapshotLock, LW_SHARED);
+ if (XLByteEQ(exrto_snapshot->read_lsn, 0)) {
+ LWLockRelease(ExrtoSnapshotLock);
+ ereport(ERROR,
+ (errmsg("could not get a valid snapshot with extreme rto")));
+ }
+ snapshot->snapshotcsn = exrto_snapshot->snapshot_csn;
+ snapshot->xmin = exrto_snapshot->xmin;
+ snapshot->xmax = exrto_snapshot->xmax;
+
+ t_thrd.pgxact->xmin = exrto_snapshot->xmin;
+ t_thrd.proc->exrto_read_lsn = exrto_snapshot->read_lsn;
+ t_thrd.proc->exrto_gen_snap_time = exrto_snapshot->gen_snap_time;
+ u_sess->utils_cxt.TransactionXmin = exrto_snapshot->xmin;
+ u_sess->utils_cxt.exrto_read_lsn = exrto_snapshot->read_lsn;
+
+ LWLockRelease(ExrtoSnapshotLock);
+ Assert(XLogRecPtrIsValid(t_thrd.proc->exrto_read_lsn));
+}
+
+static inline uint64 get_force_recycle_pos(uint64 recycle_pos, uint64 insert_pos)
+{
+ const double force_recyle_ratio = 0.3; /* to be adjusted */
+ Assert(recycle_pos <= insert_pos);
+ return recycle_pos + (uint64)((insert_pos - recycle_pos) * force_recyle_ratio);
+}
+
+XLogRecPtr calculate_force_recycle_lsn_per_worker(StandbyReadMetaInfo* meta_info)
+{
+ uint64 base_page_recycle_pos;
+ uint64 lsn_info_recycle_pos;
+ XLogRecPtr base_page_recycle_lsn = InvalidXLogRecPtr;
+ XLogRecPtr lsn_info_recycle_lsn = InvalidXLogRecPtr;
+ Buffer buffer;
+ Page page;
+
+ /* for base page */
+ if (meta_info->base_page_recyle_position < meta_info->base_page_next_position) {
+ base_page_recycle_pos = get_force_recycle_pos(meta_info->base_page_recyle_position,
+ meta_info->base_page_next_position);
+ buffer = extreme_rto_standby_read::buffer_read_base_page(meta_info->batch_id, meta_info->redo_id,
+ base_page_recycle_pos, RBM_NORMAL);
+ LockBuffer(buffer, BUFFER_LOCK_SHARE);
+ base_page_recycle_lsn = PageGetLSN(BufferGetPage(buffer));
+ UnlockReleaseBuffer(buffer);
+ }
+
+ /* for lsn info */
+ if (meta_info->lsn_table_recyle_position < meta_info->lsn_table_next_position) {
+ lsn_info_recycle_pos = get_force_recycle_pos(meta_info->lsn_table_recyle_position,
+ meta_info->lsn_table_next_position);
+ page = extreme_rto_standby_read::get_lsn_info_page(meta_info->batch_id, meta_info->redo_id,
+ lsn_info_recycle_pos, RBM_NORMAL, &buffer);
+ if (unlikely(page == NULL || buffer == InvalidBuffer)) {
+ ereport(PANIC,
+ (errmsg(EXRTOFORMAT("get_lsn_info_page failed, batch_id: %u, redo_id: %u, pos: %lu"),
+ meta_info->batch_id, meta_info->redo_id, lsn_info_recycle_pos)));
+ }
+ LockBuffer(buffer, BUFFER_LOCK_SHARE);
+ extreme_rto_standby_read::LsnInfo lsn_info = (extreme_rto_standby_read::LsnInfo)(page +
+ extreme_rto_standby_read::LSN_INFO_HEAD_SIZE);
+ lsn_info_recycle_lsn = lsn_info->lsn[0];
+ UnlockReleaseBuffer(buffer);
+ }
+
+ return rtl::max(base_page_recycle_lsn, lsn_info_recycle_lsn);
+}
+
+void calculate_force_recycle_lsn(XLogRecPtr &recycle_lsn)
+{
+ XLogRecPtr recycle_lsn_per_worker;
+ uint32 worker_nums = g_dispatcher->allWorkersCnt;
+ PageRedoWorker** workers = g_dispatcher->allWorkers;
+
+ for (uint32 i = 0; i < worker_nums; ++i) {
+ PageRedoWorker* page_redo_worker = workers[i];
+ if (page_redo_worker->role != REDO_PAGE_WORKER) {
+ continue;
+ }
+ recycle_lsn_per_worker = calculate_force_recycle_lsn_per_worker(&page_redo_worker->standby_read_meta_info);
+ if (XLByteLT(recycle_lsn, recycle_lsn_per_worker)) {
+ recycle_lsn = recycle_lsn_per_worker;
+ }
+ }
+ ereport(LOG,
+ (errmsg(EXRTOFORMAT("[exrto_recycle] try force recycle, recycle lsn: %08X/%08X"),
+ (uint32)(recycle_lsn >> UINT64_HALF), (uint32)recycle_lsn)));
+}
+
+static inline bool exceed_standby_max_query_time(TimestampTz start_time)
+{
+ return TimestampDifferenceExceeds(start_time, GetCurrentTimestamp(),
+ g_instance.attr.attr_storage.standby_max_query_time * MSECS_PER_SEC);
+}
+
+/* 1. resolve recycle conflict with backends
+ * 2. get oldest xmin and oldest readlsn of backends. */
+void proc_array_get_oldeset_readlsn(XLogRecPtr recycle_lsn, XLogRecPtr &oldest_lsn, TransactionId &oldest_xmin,
+ bool &conflict)
+{
+ ProcArrayStruct* proc_array = g_instance.proc_array_idx;
+ conflict = false;
+
+ LWLockAcquire(ProcArrayLock, LW_SHARED);
+ for (int index = 0; index < proc_array->numProcs; index++) {
+ int pg_proc_no = proc_array->pgprocnos[index];
+ PGPROC* pg_proc = g_instance.proc_base_all_procs[pg_proc_no];
+ PGXACT* pg_xact = &g_instance.proc_base_all_xacts[pg_proc_no];
+ XLogRecPtr read_lsn = pg_proc->exrto_read_lsn;
+ TransactionId pxmin = pg_xact->xmin;
+
+ if (pg_proc->pid == 0 || !TransactionIdIsValid(pxmin) || XLogRecPtrIsInvalid(read_lsn)) {
+ continue;
+ }
+
+ Assert(!(pg_xact->vacuumFlags & PROC_IN_VACUUM));
+ /*
+ * Backend is doing logical decoding which manages xmin
+ * separately, check below.
+ */
+ if (pg_xact->vacuumFlags & PROC_IN_LOGICAL_DECODING) {
+ continue;
+ }
+
+ /* cancel query when its read_lsn < recycle_lsn or its runtime > standby_max_query_time */
+ if (XLByteLT(read_lsn, recycle_lsn) || exceed_standby_max_query_time(pg_proc->exrto_gen_snap_time)) {
+ pg_proc->recoveryConflictPending = true;
+ conflict = true;
+ if (pg_proc->pid != 0) {
+ /*
+ * Kill the pid if it's still here. If not, that's what we
+ * wanted so ignore any errors.
+ */
+ (void)SendProcSignal(pg_proc->pid, PROCSIG_RECOVERY_CONFLICT_SNAPSHOT, pg_proc->backendId);
+ /*
+ * Wait a little bit for it to die so that we avoid flooding
+ * an unresponsive backend when system is heavily loaded.
+ */
+ pg_usleep(5000L);
+ }
+ continue;
+ }
+
+ if (XLogRecPtrIsInvalid(oldest_lsn) ||
+ (XLogRecPtrIsValid(read_lsn) && XLByteLT(read_lsn, oldest_lsn))) {
+ oldest_lsn = read_lsn;
+ }
+
+ if (!TransactionIdIsValid(oldest_xmin) ||
+ (TransactionIdIsValid(pxmin) && TransactionIdFollows(oldest_xmin, pxmin))) {
+ oldest_xmin = pxmin;
+ }
+ }
+ LWLockRelease(ProcArrayLock);
+}
+
+void proc_array_get_oldeset_xmin_for_undo(TransactionId &oldest_xmin)
+{
+ ProcArrayStruct* proc_array = g_instance.proc_array_idx;
+
+ LWLockAcquire(ProcArrayLock, LW_SHARED);
+ for (int index = 0; index < proc_array->numProcs; index++) {
+ int pg_proc_no = proc_array->pgprocnos[index];
+ PGPROC* pg_proc = g_instance.proc_base_all_procs[pg_proc_no];
+ PGXACT* pg_xact = &g_instance.proc_base_all_xacts[pg_proc_no];
+ TransactionId pxmin = pg_xact->xmin;
+
+ if (pg_proc->pid == 0 || !TransactionIdIsValid(pxmin)) {
+ continue;
+ }
+
+ Assert(!(pg_xact->vacuumFlags & PROC_IN_VACUUM));
+ /*
+ * Backend is doing logical decoding which manages xmin
+ * separately, check below.
+ */
+ if (pg_xact->vacuumFlags & PROC_IN_LOGICAL_DECODING) {
+ continue;
+ }
+ if (!TransactionIdIsValid(oldest_xmin) ||
+ (TransactionIdIsValid(pxmin) && TransactionIdFollows(oldest_xmin, pxmin))) {
+ oldest_xmin = pxmin;
+ }
+ }
+ LWLockRelease(ProcArrayLock);
+}
+
+XLogRecPtr exrto_calculate_recycle_position(bool force_recyle)
+{
+ Assert(t_thrd.role != PAGEREDO);
+ Assert(IS_EXRTO_READ);
+
+ XLogRecPtr recycle_lsn = g_instance.comm_cxt.predo_cxt.global_recycle_lsn;
+ XLogRecPtr oldest_lsn = InvalidXLogRecPtr;
+ TransactionId oldest_xmin = InvalidTransactionId;
+ bool conflict = false;
+ const int max_check_times = 1000;
+ int check_times = 0;
+
+ if (force_recyle) {
+ calculate_force_recycle_lsn(recycle_lsn);
+ }
+
+ /* Loop checks to avoid conflicting queries that were not successfully canceled. */
+ do {
+ RedoInterruptCallBack();
+ proc_array_get_oldeset_readlsn(recycle_lsn, oldest_lsn, oldest_xmin, conflict);
+ check_times++;
+ } while (conflict && check_times < max_check_times);
+
+ /*
+ * If there is no backend read threads, set read oldest lsn to snapshot lsn.
+ */
+ if (XLogRecPtrIsInvalid(oldest_lsn)) {
+ ExrtoSnapshot exrto_snapshot = NULL;
+ exrto_snapshot = &g_dispatcher->exrto_snapshot;
+ (void)LWLockAcquire(ExrtoSnapshotLock, LW_SHARED);
+ if (XLByteEQ(exrto_snapshot->read_lsn, 0)) {
+ ereport(WARNING,
+ (errmsg("could not get a valid snapshot with extreme rto")));
+ } else {
+ oldest_lsn = exrto_snapshot->read_lsn;
+ oldest_xmin = exrto_snapshot->xmin;
+ }
+
+ LWLockRelease(ExrtoSnapshotLock);
+ }
+ recycle_lsn = rtl::max(recycle_lsn, oldest_lsn);
+
+ ereport(
+ LOG,
+ (errmsg(
+ EXRTOFORMAT(
+ "[exrto_recycle] calculate recycle position, oldestlsn: %08X/%08X, snapshot read_lsn: %08X/%08X, try "
+ "recycle lsn: %08X/%08X"),
+ (uint32)(oldest_lsn >> UINT64_HALF), (uint32)oldest_lsn,
+ (uint32)(g_dispatcher->exrto_snapshot.read_lsn >> UINT64_HALF),
+ (uint32)g_dispatcher->exrto_snapshot.read_lsn, (uint32)(recycle_lsn >> UINT64_HALF), (uint32)recycle_lsn)));
+
+ return recycle_lsn;
+}
+
+TransactionId exrto_calculate_recycle_xmin_for_undo()
+{
+ Assert(t_thrd.role != PAGEREDO);
+ Assert(IS_EXRTO_READ);
+ TransactionId oldest_xmin = InvalidTransactionId;
+ TransactionId snapshot_xmin = InvalidTransactionId;
+ proc_array_get_oldeset_xmin_for_undo(oldest_xmin);
+
+ /*
+ * If there is no backend read threads, set read oldest lsn to snapshot lsn.
+ */
+ if (oldest_xmin == InvalidTransactionId) {
+ ExrtoSnapshot exrto_snapshot = NULL;
+ exrto_snapshot = &g_dispatcher->exrto_snapshot;
+ (void)LWLockAcquire(ExrtoSnapshotLock, LW_SHARED);
+ if (XLByteEQ(exrto_snapshot->xmin, InvalidTransactionId)) {
+ ereport(
+ WARNING,
+ (errmsg("exrto_calculate_recycle_xmin_for_undo: could not get a valid snapshot in exrto_snapshot")));
+ } else {
+ snapshot_xmin = exrto_snapshot->xmin;
+ }
+
+ LWLockRelease(ExrtoSnapshotLock);
+ }
+ ereport(DEBUG1, (errmodule(MOD_UNDO),
+ errmsg(UNDOFORMAT("exrto_calculate_recycle_xmin_for_undo: oldest_xmin: %lu, snapshot_xmin: %lu."),
+ oldest_xmin, snapshot_xmin)));
+
+ if (oldest_xmin == InvalidTransactionId) {
+ return snapshot_xmin;
+ }
+ return oldest_xmin;
+}
+
} // namespace extreme_rto
\ No newline at end of file
diff --git a/src/gausskernel/storage/access/transam/multi_redo_api.cpp b/src/gausskernel/storage/access/transam/multi_redo_api.cpp
index 2d70a75a657454e7f3d5c183581f889438bdeace..7b2b564443b4ec49b1812f0695499a90c599cd2f 100644
--- a/src/gausskernel/storage/access/transam/multi_redo_api.cpp
+++ b/src/gausskernel/storage/access/transam/multi_redo_api.cpp
@@ -66,9 +66,9 @@ void DispatchRedoRecord(XLogReaderState *record, List *expectedTLIs, TimestampTz
g_instance.comm_cxt.localinfo_cxt.term_from_xlog = term;
}
- long readbufcountbefore = u_sess->instr_cxt.pg_buffer_usage->local_blks_read;
+ long readbufcountbefore = u_sess->instr_cxt.pg_buffer_usage->shared_blks_read;
ApplyRedoRecord(record);
- record->readblocks = u_sess->instr_cxt.pg_buffer_usage->local_blks_read - readbufcountbefore;
+ record->readblocks = u_sess->instr_cxt.pg_buffer_usage->shared_blks_read - readbufcountbefore;
CountXLogNumbers(record);
if (XLogRecGetRmid(record) == RM_XACT_ID)
SetLatestXTime(recordXTime);
@@ -134,6 +134,10 @@ bool IsAllPageWorkerExit()
}
g_instance.comm_cxt.predo_cxt.totalNum = 0;
}
+
+ if (g_instance.pid_cxt.exrto_recycler_pid != 0) {
+ return false;
+ }
ereport(LOG,
(errmodule(MOD_REDO), errcode(ERRCODE_LOG), errmsg("page workers all exit or not open parallel redo")));
diff --git a/src/gausskernel/storage/access/transam/xact.cpp b/src/gausskernel/storage/access/transam/xact.cpp
index aae577d6eb055083ecff5363c9cdbeee689c7c49..68e632abbe99ade5a7462589a01a3d2c98f95847 100755
--- a/src/gausskernel/storage/access/transam/xact.cpp
+++ b/src/gausskernel/storage/access/transam/xact.cpp
@@ -46,6 +46,7 @@
#include "access/xloginsert.h"
#include "access/xlogutils.h"
#include "access/multi_redo_api.h"
+#include "access/extreme_rto/standby_read/block_info_meta.h"
#include "catalog/catalog.h"
#include "catalog/namespace.h"
#include "catalog/pg_authid.h"
@@ -7206,6 +7207,15 @@ static void unlink_relfiles(_in_ ColFileNode *xnodes, _in_ int nrels)
smgrdounlink(srel, true);
smgrclose(srel);
+ /*
+ * recycle exrto files when dropping table occurs.
+ */
+ if (IS_EXRTO_READ) {
+ RelFileNode block_meta_file = relFileNode;
+ block_meta_file.spcNode = EXRTO_BLOCK_INFO_SPACE_OID;
+ extreme_rto_standby_read::remove_one_block_info_file(block_meta_file);
+ }
+
UnlockRelFileNode(relFileNode, AccessExclusiveLock);
/*
diff --git a/src/gausskernel/storage/access/transam/xlog.cpp b/src/gausskernel/storage/access/transam/xlog.cpp
index 3a85240b58eb8502e359921f9d7f93b95124b018..28fe664273eeb4d0b4867f2a0594ce97997cb657 100755
--- a/src/gausskernel/storage/access/transam/xlog.cpp
+++ b/src/gausskernel/storage/access/transam/xlog.cpp
@@ -38,6 +38,7 @@
#include "access/double_write.h"
#include "access/heapam.h"
#include "access/multixact.h"
+#include "access/multi_redo_api.h"
#include "access/rewriteheap.h"
#include "access/subtrans.h"
#include "access/transam.h"
@@ -10424,6 +10425,12 @@ void StartupXLOG(void)
} else {
Insert->fullPageWrites = t_thrd.xlog_cxt.lastFullPageWrites;
}
+
+ if (IS_EXRTO_READ) {
+ /* we are going to be master, we need to recycle residual_undo_file again */
+ g_instance.undo_cxt.is_exrto_residual_undo_file_recycled = false;
+ }
+
LocalSetXLogInsertAllowed();
UpdateFullPageWrites();
t_thrd.xlog_cxt.LocalXLogInsertAllowed = -1;
@@ -13182,6 +13189,16 @@ static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo, XLogRecPtr curIns
segno = mainStandbySegNo;
}
}
+
+ if (IS_EXRTO_READ) {
+ XLogRecPtr recycle_recptr = pg_atomic_read_u64(&g_instance.comm_cxt.predo_cxt.global_recycle_lsn);
+ XLogSegNo recyle_segno;
+ XLByteToSeg(recycle_recptr, recyle_segno);
+ if (recyle_segno < segno && recyle_segno > 0) {
+ segno = recyle_segno;
+ }
+ }
+
/* don't delete WAL segments newer than the calculated segment */
if (segno < *logSegNo && segno > 0) {
*logSegNo = segno;
@@ -16019,8 +16036,8 @@ void SetXLogReplayRecPtr(XLogRecPtr readRecPtr, XLogRecPtr endRecPtr)
SpinLockRelease(&xlogctl->info_lck);
if (isUpdated) {
RedoSpeedDiag(readRecPtr, endRecPtr);
+ update_dirty_page_queue_rec_lsn(readRecPtr);
}
- update_dirty_page_queue_rec_lsn(readRecPtr);
#ifndef ENABLE_MULTIPLE_NODES
if (g_instance.attr.attr_storage.dcf_attr.enable_dcf) {
int ret = dcf_set_election_priority(1, endRecPtr);
diff --git a/src/gausskernel/storage/access/transam/xlogfuncs.cpp b/src/gausskernel/storage/access/transam/xlogfuncs.cpp
index 2636fcfae660efdeaea2d3ff46ddc759a4c1b197..9533951c6ef95377a30e389607fa4712938d7694 100755
--- a/src/gausskernel/storage/access/transam/xlogfuncs.cpp
+++ b/src/gausskernel/storage/access/transam/xlogfuncs.cpp
@@ -2046,6 +2046,23 @@ Datum gs_streaming_dr_in_switchover(PG_FUNCTION_ARGS)
Datum gs_streaming_dr_service_truncation_check(PG_FUNCTION_ARGS)
{
#ifndef ENABLE_LITE_MODE
+ int dr_sender_num = 0;
+
+ for (int i = 1; i < MAX_REPLNODE_NUM; i++) {
+ ReplConnInfo *replConnInfo = NULL;
+ replConnInfo = t_thrd.postmaster_cxt.ReplConnArray[i];
+
+ /* Number of DR replconninfo */
+ if (replConnInfo != NULL && replConnInfo->isCrossRegion) {
+ dr_sender_num++;
+ }
+ }
+ if (IS_PGXC_COORDINATOR) {
+ g_instance.streaming_dr_cxt.hadrWalSndNum = dr_sender_num;
+ } else {
+ g_instance.streaming_dr_cxt.hadrWalSndNum = dr_sender_num > 0 ? 1 : 0;
+ }
+
for (int i = 0; i < g_instance.attr.attr_storage.max_wal_senders; i++) {
/* use volatile pointer to prevent code rearrangement */
volatile WalSnd *walsnd = &t_thrd.walsender_cxt.WalSndCtl->walsnds[i];
@@ -2057,7 +2074,6 @@ Datum gs_streaming_dr_service_truncation_check(PG_FUNCTION_ARGS)
SpinLockAcquire(&walsnd->mutex);
if (walsnd->interactiveState == SDRS_DEFAULT) {
walsnd->interactiveState = SDRS_INTERACTION_BEGIN;
- g_instance.streaming_dr_cxt.hadrWalSndNum++;
}
SpinLockRelease(&walsnd->mutex);
}
diff --git a/src/gausskernel/storage/access/ustore/knl_uextremeredo.cpp b/src/gausskernel/storage/access/ustore/knl_uextremeredo.cpp
index 6e70f321f1a729b17d8bf7fea1dade541ed2e000..9149870fe0c48809584715497f535acd5f32aace 100644
--- a/src/gausskernel/storage/access/ustore/knl_uextremeredo.cpp
+++ b/src/gausskernel/storage/access/ustore/knl_uextremeredo.cpp
@@ -1148,9 +1148,6 @@ void UHeapXlogFreezeTDOperatorPage(RedoBufferInfo *buffer, void *recorddata)
UHeapPageTDData *tdPtr = (UHeapPageTDData *)PageGetTDPointer(page);
TD *transinfo = tdPtr->td_info;
- if (InHotStandby && TransactionIdIsValid(xlrec->latestFrozenXid))
- ResolveRecoveryConflictWithSnapshot(xlrec->latestFrozenXid, buffer->blockinfo.rnode, buffer->lsn);
-
UHeapFreezeOrInvalidateTuples(buffer->buf, nFrozen, frozenSlots, true);
for (int i = 0; i < nFrozen; i++) {
@@ -1520,12 +1517,6 @@ void UHeapRedoDataBlock(XLogBlockHead *blockhead, XLogBlockDataParse *blockdatar
}
}
-#ifdef ENABLE_MULTIPLE_NODES
-const static bool SUPPORT_HOT_STANDBY = false; /* don't support consistency view */
-#else
-const static bool SUPPORT_HOT_STANDBY = true;
-#endif
-
void UHeap2XlogFreezeOperatorPage(RedoBufferInfo *buffer, void *recorddata, void *blkdata, Size datalen)
{
XlUHeapFreeze *xlrec = (XlUHeapFreeze *)recorddata;
@@ -1536,14 +1527,6 @@ void UHeap2XlogFreezeOperatorPage(RedoBufferInfo *buffer, void *recorddata, void
OffsetNumber *offsetsEnd = NULL;
UHeapTupleData utuple;
- /*
- * In Hot Standby mode, ensure that there's no queries running which still
- * consider the frozen xids as running.
- */
- if (InHotStandby && SUPPORT_HOT_STANDBY) {
- ResolveRecoveryConflictWithSnapshot(cutoffXid, buffer->blockinfo.rnode, buffer->lsn);
- }
-
if (datalen > 0) {
offsetsEnd = (OffsetNumber *)((char *)offsets + datalen);
@@ -2019,12 +2002,18 @@ static void RedoUndoDiscardBlock(XLogBlockHead *blockhead, XLogBlockUndoParse *b
XLogRecPtr lsn = blockdatarec->undoDiscardParse.lsn;
UndoZone *zone = UndoZoneGroup::GetUndoZone(zoneId);
+ ereport(DEBUG1, (errmodule(MOD_UNDO), errmsg(UNDOFORMAT(
+ "redo_undo_discard_block zid=%d, isZoneNull:%d, zone_lsn:%lu, lsn:%lu, end_slot:%lu, end_undo_ptr:%lu, "
+ "recycled_xid:%lu."), zoneId, (int)(zone == NULL), zone->GetLSN(), lsn, endSlot, endUndoPtr, recycledXid)));
if (zone == NULL) {
return;
}
if (zone->GetLSN() < lsn) {
zone->LockUndoZone();
Assert(blockdatarec->undoDiscardParse.startSlot == zone->GetRecycleTSlotPtr());
+ if (IS_EXRTO_READ && (!g_instance.undo_cxt.is_exrto_residual_undo_file_recycled)) {
+ zone->set_recycle_tslot_ptr_exrto(endSlot);
+ }
zone->SetRecycleTSlotPtr(endSlot);
zone->SetDiscardURecPtr(endUndoPtr);
zone->SetForceDiscardURecPtr(endUndoPtr);
@@ -2048,12 +2037,19 @@ static void RedoUndoUnlinkBlock(XLogBlockHead *blockhead, XLogBlockUndoParse *bl
XLogRecPtr unlinkLsn = blockdatarec->undoUnlinkParse.unlinkLsn;
UndoLogOffset newHead = blockdatarec->undoUnlinkParse.headOffset;
UndoLogOffset head = usp->Head();
+ ereport(DEBUG1, (errmodule(MOD_UNDO), errmsg(UNDOFORMAT(
+ "redo_undo_unlink_block, zid=%d, usp_lsn:%lu, unlink_lsn:%lu, head:%lu, new_head:%lu."),
+ zoneId, usp->LSN(), unlinkLsn, head, newHead)));
if (usp->LSN() < unlinkLsn) {
zone->ForgetUndoBuffer(head, newHead, UNDO_DB_OID);
usp->LockSpace();
usp->MarkDirty();
- usp->UnlinkUndoLog(zoneId, newHead, UNDO_DB_OID);
+ if (IS_EXRTO_STANDBY_READ) {
+ usp->SetHead(newHead);
+ } else {
+ usp->UnlinkUndoLog(zoneId, newHead, UNDO_DB_OID);
+ }
usp->SetLSN(unlinkLsn);
usp->UnlockSpace();
}
@@ -2071,12 +2067,19 @@ static void RedoSlotUnlinkBlock(XLogBlockHead *blockhead, XLogBlockUndoParse *bl
XLogRecPtr unlinkLsn = blockdatarec->undoUnlinkParse.unlinkLsn;
UndoLogOffset newHead = blockdatarec->undoUnlinkParse.headOffset;
UndoLogOffset head = usp->Head();
+ ereport(DEBUG1, (errmodule(MOD_UNDO), errmsg(UNDOFORMAT(
+ "redo_slot_unlink_block, zid=%d, usp_lsn:%lu, unlink_lsn:%lu, head:%lu, new_head:%lu."),
+ zoneId, usp->LSN(), unlinkLsn, head, newHead)));
if (usp->LSN() < unlinkLsn) {
zone->ForgetUndoBuffer(head, newHead, UNDO_SLOT_DB_OID);
usp->LockSpace();
usp->MarkDirty();
- usp->UnlinkUndoLog(zoneId, newHead, UNDO_SLOT_DB_OID);
+ if (IS_EXRTO_STANDBY_READ) {
+ usp->SetHead(newHead);
+ } else {
+ usp->UnlinkUndoLog(zoneId, newHead, UNDO_SLOT_DB_OID);
+ }
usp->SetLSN(unlinkLsn);
usp->UnlockSpace();
}
diff --git a/src/gausskernel/storage/access/ustore/undo/knl_uundoapi.cpp b/src/gausskernel/storage/access/ustore/undo/knl_uundoapi.cpp
index bb72f6e2791d78127a9bc44da9fd2a913353d302..87acf77e598f4fa704e2bb1fe0df169ff16d8ace 100644
--- a/src/gausskernel/storage/access/ustore/undo/knl_uundoapi.cpp
+++ b/src/gausskernel/storage/access/ustore/undo/knl_uundoapi.cpp
@@ -22,6 +22,7 @@
#include "access/ustore/undo/knl_uundoxlog.h"
#include "access/ustore/knl_whitebox_test.h"
#include "access/transam.h"
+#include "access/multi_redo_api.h"
#include "catalog/pg_class.h"
#include "knl/knl_session.h"
#include "knl/knl_thread.h"
@@ -281,9 +282,14 @@ UndoRecordState CheckUndoRecordValid(UndoRecPtr urp, bool checkForceRecycle, Tra
UndoZone *uzone = UndoZoneGroup::GetUndoZone(zid, false);
if (uzone == NULL) {
return UNDO_RECORD_INVALID;
- } else {
- return uzone->CheckUndoRecordValid(UNDO_PTR_GET_OFFSET(urp), checkForceRecycle, lastXid);
}
+
+ if (IS_EXRTO_STANDBY_READ) {
+ return uzone->check_record_valid_exrto(UNDO_PTR_GET_OFFSET(urp), checkForceRecycle, lastXid);
+ }
+
+
+ return uzone->CheckUndoRecordValid(UNDO_PTR_GET_OFFSET(urp), checkForceRecycle, lastXid);
}
/*
@@ -640,6 +646,7 @@ void RecoveryUndoSystemMeta(void)
/* Close fd. */
close(fd);
+ ereport(LOG, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("recovery_meta: undo recovery finish.")));
#endif
}
}
diff --git a/src/gausskernel/storage/access/ustore/undo/knl_uundorecycle.cpp b/src/gausskernel/storage/access/ustore/undo/knl_uundorecycle.cpp
index 75ac16a3205048e1fb5b0524462fb817845b489b..44debc23e594d6c458400b2b666d5a3dec673342 100755
--- a/src/gausskernel/storage/access/ustore/undo/knl_uundorecycle.cpp
+++ b/src/gausskernel/storage/access/ustore/undo/knl_uundorecycle.cpp
@@ -31,6 +31,8 @@
#include "access/ustore/undo/knl_uundoxlog.h"
#include "access/ustore/knl_undorequest.h"
#include "access/ustore/knl_whitebox_test.h"
+#include "access/multi_redo_api.h"
+#include "access/extreme_rto/page_redo.h"
#include "gssignal/gs_signal.h"
#include "knl/knl_thread.h"
#include "storage/ipc.h"
@@ -528,6 +530,162 @@ static void RecycleWaitIfNotUsed()
}
}
+void exrto_standby_release_space(UndoZone *zone, TransactionId recycle_xid, UndoRecPtr start_undo_ptr,
+ UndoRecPtr end_undo_ptr, UndoSlotPtr recycle_exrto)
+{
+ UndoRecPtr oldest_end_undo_ptr = end_undo_ptr;
+ Assert(TransactionIdIsValid(recycle_xid) && (zone->get_recycle_xid_exrto() < recycle_xid));
+ zone->LockUndoZone();
+ if (!zone->CheckRecycle(start_undo_ptr, end_undo_ptr)) {
+ ereport(PANIC, (errmodule(MOD_UNDO),
+ errmsg(UNDOFORMAT("zone %d recycle start %lu >= recycle end %lu."),
+ zone->GetZoneId(), start_undo_ptr, end_undo_ptr)));
+ }
+ if (IS_VALID_UNDO_REC_PTR(oldest_end_undo_ptr)) {
+ int start_zid = UNDO_PTR_GET_ZONE_ID(start_undo_ptr);
+ int end_zid = UNDO_PTR_GET_ZONE_ID(oldest_end_undo_ptr);
+ if (unlikely(start_zid != end_zid)) {
+ oldest_end_undo_ptr = MAKE_UNDO_PTR(start_zid, UNDO_LOG_MAX_SIZE);
+ }
+ zone->set_discard_urec_ptr_exrto(oldest_end_undo_ptr);
+ }
+
+ zone->set_recycle_xid_exrto(recycle_xid);
+ zone->set_force_discard_urec_ptr_exrto(end_undo_ptr);
+ zone->set_recycle_tslot_ptr_exrto(recycle_exrto);
+ zone->UnlockUndoZone();
+ zone->ReleaseSpace(start_undo_ptr, end_undo_ptr, &g_forceRecycleSize);
+ zone->ReleaseSlotSpace(0, recycle_exrto, &g_forceRecycleSize);
+}
+
+bool exrto_standby_recycle_space(UndoZone *zone, TransactionId recycle_xmin)
+{
+ UndoSlotPtr recycle_exrto = zone->get_recycle_tslot_ptr_exrto();
+ UndoSlotPtr recycle_primary = zone->GetRecycleTSlotPtr();
+ undo::TransactionSlot *slot = NULL;
+ UndoRecPtr end_undo_ptr = INVALID_UNDO_REC_PTR;
+ TransactionId recycle_xid = InvalidTransactionId;
+ bool undo_recycled = false;
+ bool result = false;
+ UndoSlotPtr start = INVALID_UNDO_SLOT_PTR;
+ ereport(DEBUG1, (errmodule(MOD_UNDO),
+ errmsg(UNDOFORMAT("exrto_standby_recycle_space zone_id:%d, recycle_xmin:%lu, recycle_exrto:%lu, "
+ "recycle_primary:%lu."),
+ zone->GetZoneId(), recycle_xmin, recycle_exrto, recycle_primary)));
+
+ while (recycle_exrto < recycle_primary) {
+ UndoSlotBuffer& slot_buf = g_slotBufferCache->FetchTransactionBuffer(recycle_exrto);
+ UndoRecPtr start_undo_ptr = INVALID_UNDO_REC_PTR;
+ start = recycle_exrto;
+ slot_buf.PrepareTransactionSlot(recycle_exrto);
+ undo_recycled = false;
+ Assert(slot_buf.BufBlock() == UNDO_PTR_GET_BLOCK_NUM(recycle_exrto));
+ while (slot_buf.BufBlock() == UNDO_PTR_GET_BLOCK_NUM(recycle_exrto) && (recycle_exrto < recycle_primary)) {
+ slot = slot_buf.FetchTransactionSlot(recycle_exrto);
+ if (!TransactionIdIsValid(slot->XactId())) {
+ break;
+ }
+ if (slot->StartUndoPtr() == INVALID_UNDO_REC_PTR) {
+ break;
+ }
+
+ if (TransactionIdFollowsOrEquals(slot->XactId(), recycle_xmin)) {
+ break;
+ }
+ ereport(DEBUG1, (errmodule(MOD_UNDO),
+ errmsg(UNDOFORMAT("recycle zone %d, exrto transaction slot %lu xid %lu start ptr %lu end ptr %lu."),
+ zone->GetZoneId(), recycle_exrto, slot->XactId(),
+ slot->StartUndoPtr(), slot->EndUndoPtr())));
+ if (!start_undo_ptr) {
+ start_undo_ptr = slot->StartUndoPtr();
+ }
+ end_undo_ptr = slot->EndUndoPtr();
+ recycle_xid = slot->XactId();
+ undo_recycled = true;
+ recycle_exrto = GetNextSlotPtr(recycle_exrto);
+ /* if next recycle_exrto is in different slot_buf, release current slot_buf. */
+ if (slot_buf.BufBlock() != UNDO_PTR_GET_BLOCK_NUM(recycle_exrto)) {
+ g_slotBufferCache->RemoveSlotBuffer(start);
+ slot_buf.Release();
+ }
+ }
+ if (undo_recycled) {
+ exrto_standby_release_space(zone, recycle_xid, start_undo_ptr, end_undo_ptr, recycle_exrto);
+ result = true;
+ } else {
+ /* zone has nothing to recycle. */
+ break;
+ }
+ }
+ return result;
+}
+
+bool exrto_standby_recycle_undo_zone()
+{
+ uint32 idx = 0;
+ bool recycled = false;
+ if (g_instance.undo_cxt.uZoneCount == 0 || g_instance.undo_cxt.uZones == NULL) {
+ return recycled;
+ }
+ TransactionId recycle_xmin = extreme_rto::exrto_calculate_recycle_xmin_for_undo();
+ for (idx = 0; idx < PERSIST_ZONE_COUNT && !t_thrd.undorecycler_cxt.shutdown_requested; idx++) {
+ UndoZone *zone = (UndoZone *)g_instance.undo_cxt.uZones[idx];
+ if (zone == NULL) {
+ continue;
+ }
+ if (zone->Used_exrto()) {
+ if (exrto_standby_recycle_space(zone, recycle_xmin)) {
+ recycled = true;
+ }
+ }
+ }
+ smgrcloseall();
+ return recycled;
+}
+
+/* recycle residual_undo_file which may be leftover by exrto read in standby */
+void exrto_recycle_residual_undo_file()
+{
+ uint32 idx = 0;
+ uint64 record_file_cnt = 0;
+ uint64 slot_file_cnt = 0;
+ if (g_instance.undo_cxt.is_exrto_residual_undo_file_recycled) {
+ return;
+ }
+ ereport(LOG, (errmodule(MOD_UNDO),
+ errmsg(UNDOFORMAT("exrto_recycle_residual_undo_file begin uZoneCount is %u."),
+ g_instance.undo_cxt.uZoneCount)));
+ if (g_instance.undo_cxt.uZoneCount == 0 || g_instance.undo_cxt.uZones == NULL) {
+ g_instance.undo_cxt.is_exrto_residual_undo_file_recycled = true;
+ ereport(LOG, (errmodule(MOD_UNDO),
+ errmsg(UNDOFORMAT("exrto_recycle_residual_undo_file uZoneCount is zero or uZones is null."))));
+ return;
+ }
+ for (idx = 0; idx < PERSIST_ZONE_COUNT && !t_thrd.undorecycler_cxt.shutdown_requested; idx++) {
+ UndoZone *zone = (UndoZone *)g_instance.undo_cxt.uZones[idx];
+ if (zone == NULL) {
+ continue;
+ }
+ record_file_cnt += zone->release_residual_record_space();
+ slot_file_cnt += zone->release_residual_slot_space();
+ }
+ smgrcloseall();
+ ereport(LOG, (errmodule(MOD_UNDO),
+ errmsg(UNDOFORMAT("exrto_recycle_residual_undo_file release record_file_cnt:%lu, "
+ "slot_file_cnt:%lu."), record_file_cnt, slot_file_cnt)));
+ g_instance.undo_cxt.is_exrto_residual_undo_file_recycled = true;
+}
+
+void recycle_wait(bool recycled, uint64 *non_recycled)
+{
+ if (!recycled) {
+ *non_recycled += UNDO_RECYCLE_TIMEOUT_DELTA;
+ WaitRecycleThread(*non_recycled);
+ } else {
+ *non_recycled = 0;
+ }
+}
+
void UndoRecycleMain()
{
sigjmp_buf localSigjmpBuf;
@@ -646,6 +804,10 @@ void UndoRecycleMain()
t_thrd.undorecycler_cxt.got_SIGHUP = false;
ProcessConfigFile(PGC_SIGHUP);
}
+ if (t_thrd.undorecycler_cxt.shutdown_requested) {
+ ShutDownRecycle(recycleMaxXIDs);
+ }
+ exrto_recycle_residual_undo_file();
if (!RecoveryInProgress()) {
TransactionId recycleXmin = InvalidTransactionId;
TransactionId oldestXmin = GetOldestXminForUndo(&recycleXmin);
@@ -751,15 +913,10 @@ void UndoRecycleMain()
pg_atomic_write_u64(&g_instance.undo_cxt.globalRecycleXid, oldestXidHavingUndo);
}
}
- if (!recycled) {
- nonRecycled += UNDO_RECYCLE_TIMEOUT_DELTA;
- WaitRecycleThread(nonRecycled);
- } else {
- nonRecycled = 0;
- }
- } else {
- WaitRecycleThread(nonRecycled);
+ } else if (IS_EXRTO_STANDBY_READ) {
+ recycled = exrto_standby_recycle_undo_zone();
}
+ recycle_wait(recycled, &nonRecycled);
}
ShutDownRecycle(recycleMaxXIDs);
}
diff --git a/src/gausskernel/storage/access/ustore/undo/knl_uundospace.cpp b/src/gausskernel/storage/access/ustore/undo/knl_uundospace.cpp
index 24a29e01e10769a408ef007f95ff77aa63613c29..3d60a8a4d75a315459e3563215173df61eb59e74 100644
--- a/src/gausskernel/storage/access/ustore/undo/knl_uundospace.cpp
+++ b/src/gausskernel/storage/access/ustore/undo/knl_uundospace.cpp
@@ -20,6 +20,7 @@
#include "access/ustore/knl_whitebox_test.h"
#include "storage/lock/lwlock.h"
#include "storage/smgr/smgr.h"
+#include "access/multi_redo_api.h"
namespace undo {
static uint64 USEG_SIZE(uint32 dbId)
@@ -49,6 +50,26 @@ uint32 UndoSpace::Used(void)
return (uint32)((tail_ - head_) / BLCKSZ);
}
+UndoLogOffset UndoSpace::find_oldest_offset(int zid, uint32 db_id) const
+{
+ UndoLogOffset offset = head_;
+ BlockNumber blockno;
+ RelFileNode rnode;
+ UNDO_PTR_ASSIGN_REL_FILE_NODE(rnode, MAKE_UNDO_PTR(zid, offset), db_id);
+ SMgrRelation reln = smgropen(rnode, InvalidBackendId);
+ uint64 seg_size = USEG_SIZE(db_id);
+ while (offset >=seg_size) {
+ offset -= seg_size;
+ blockno = (BlockNumber)(offset / BLCKSZ);
+ if (!smgrexists(reln, MAIN_FORKNUM, blockno)) {
+ offset += seg_size;
+ break;
+ }
+ }
+ smgrclose(reln);
+ return offset;
+}
+
/* Create segments needed to increase end_ to newEnd. */
void UndoSpace::ExtendUndoLog(int zid, UndoLogOffset offset, uint32 dbId)
{
@@ -91,7 +112,17 @@ void UndoSpace::ExtendUndoLog(int zid, UndoLogOffset offset, uint32 dbId)
void UndoSpace::UnlinkUndoLog(int zid, UndoLogOffset offset, uint32 dbId)
{
RelFileNode rnode;
- UndoLogOffset head = head_;
+ UndoLogOffset head;
+ UndoLogOffset old_head;
+ if (IS_EXRTO_STANDBY_READ) {
+ head = head_exrto;
+ old_head = head_exrto;
+ set_head_exrto(offset);
+ } else {
+ head = head_;
+ old_head = head_;
+ SetHead(offset);
+ }
Assert(head < offset && head_ <= tail_);
UNDO_PTR_ASSIGN_REL_FILE_NODE(rnode, MAKE_UNDO_PTR(zid, offset), dbId);
SMgrRelation reln = smgropen(rnode, InvalidBackendId);
@@ -104,6 +135,9 @@ void UndoSpace::UnlinkUndoLog(int zid, UndoLogOffset offset, uint32 dbId)
while (head < offset) {
/* Create a new undo segment. */
smgrdounlink(reln, t_thrd.xlog_cxt.InRecovery, (head / BLCKSZ));
+ ereport(DEBUG1, (errmodule(MOD_UNDO), errmsg(UNDOFORMAT(
+ "unlink undo log, zid=%d, dbid=%u, new_head=%lu, segId:%lu."),
+ zid, dbId, offset, head/segSize)));
if (g_instance.undo_cxt.undoTotalSize < segBlocks) {
ereport(PANIC, (errmodule(MOD_UNDO), errmsg(UNDOFORMAT(
"unlink undo log, total blocks=%u < segment size."),
@@ -114,8 +148,32 @@ void UndoSpace::UnlinkUndoLog(int zid, UndoLogOffset offset, uint32 dbId)
}
smgrclose(reln);
ereport(DEBUG1, (errmodule(MOD_UNDO), errmsg(UNDOFORMAT(
- "unlink undo log, total blocks=%u, zid=%d, dbid=%u, head=%lu."),
- g_instance.undo_cxt.undoTotalSize, zid, dbId, offset)));
+ "unlink undo log, total blocks=%u, zid=%d, dbid=%u, head=%lu, old_head:%lu."),
+ g_instance.undo_cxt.undoTotalSize, zid, dbId, offset, old_head)));
+ return;
+}
+
+/*
+ * Unlink undo segment files which are residual in extreme RTO standby read,
+ * unlink from start to end(not include).
+ */
+void UndoSpace::unlink_residual_log(int zid, UndoLogOffset start, UndoLogOffset end, uint32 db_id) const
+{
+ RelFileNode rnode;
+ UNDO_PTR_ASSIGN_REL_FILE_NODE(rnode, MAKE_UNDO_PTR(zid, start), db_id);
+ SMgrRelation reln = smgropen(rnode, InvalidBackendId);
+ uint64 seg_size = USEG_SIZE(db_id);
+
+ while (start/seg_size < end/seg_size) {
+ /* delete a new undo segment. */
+ BlockNumber block = (BlockNumber)(start / BLCKSZ);
+ smgrdounlink(reln, t_thrd.xlog_cxt.InRecovery, block);
+ ereport(DEBUG1, (errmodule(MOD_UNDO), errmsg(UNDOFORMAT(
+ "unlink_residual_log, zid=%d, dbid=%u, start=%lu, end=%lu, segId:%lu, endSegId:%lu."),
+ zid, db_id, start, end, start/seg_size, end/seg_size)));
+ start += seg_size;
+ }
+ smgrclose(reln);
return;
}
@@ -383,6 +441,7 @@ void UndoSpace::RecoveryUndoSpace(int fd, UndoSpaceType type)
usp->MarkClean();
usp->SetLSN(uspMetaInfo->lsn);
usp->SetHead(uspMetaInfo->head);
+ usp->set_head_exrto(uspMetaInfo->head);
usp->SetTail(uspMetaInfo->tail);
if (type == UNDO_LOG_SPACE) {
usp->CreateNonExistsUndoFile(zoneId, UNDO_DB_OID);
@@ -390,6 +449,9 @@ void UndoSpace::RecoveryUndoSpace(int fd, UndoSpaceType type)
usp->CreateNonExistsUndoFile(zoneId, UNDO_SLOT_DB_OID);
}
pg_atomic_fetch_add_u32(&g_instance.undo_cxt.undoTotalSize, usp->Used());
+ ereport(DEBUG1, (errmsg(UNDOFORMAT("recovery_space_meta, zone_id:%u, type:%u, "
+ "lsn:%lu, head:%lu, tail:%lu."),
+ zoneId, type, uspMetaInfo->lsn, uspMetaInfo->head, uspMetaInfo->tail)));
}
pfree(persistBlock);
}
diff --git a/src/gausskernel/storage/access/ustore/undo/knl_uundozone.cpp b/src/gausskernel/storage/access/ustore/undo/knl_uundozone.cpp
index 288f12ad5510aa3fce127a8890864d7da0643dc8..7ac14bfd4c3dcf0c702decbb259ef0a0acc9dde8 100644
--- a/src/gausskernel/storage/access/ustore/undo/knl_uundozone.cpp
+++ b/src/gausskernel/storage/access/ustore/undo/knl_uundozone.cpp
@@ -21,6 +21,7 @@
#include "access/ustore/undo/knl_uundotxn.h"
#include "access/ustore/undo/knl_uundospace.h"
#include "access/ustore/knl_whitebox_test.h"
+#include "access/multi_redo_api.h"
#include "knl/knl_thread.h"
#include "miscadmin.h"
#include "storage/smgr/fd.h"
@@ -43,11 +44,15 @@ UndoZone::UndoZone()
SetLSN(0);
SetInsertURecPtr(UNDO_LOG_BLOCK_HEADER_SIZE);
SetDiscardURecPtr(UNDO_LOG_BLOCK_HEADER_SIZE);
+ set_discard_urec_ptr_exrto(UNDO_LOG_BLOCK_HEADER_SIZE);
SetForceDiscardURecPtr(UNDO_LOG_BLOCK_HEADER_SIZE);
+ set_force_discard_urec_ptr_exrto(UNDO_LOG_BLOCK_HEADER_SIZE);
SetAllocateTSlotPtr(UNDO_LOG_BLOCK_HEADER_SIZE);
SetRecycleTSlotPtr(UNDO_LOG_BLOCK_HEADER_SIZE);
+ set_recycle_tslot_ptr_exrto(UNDO_LOG_BLOCK_HEADER_SIZE);
SetFrozenSlotPtr(INVALID_UNDO_SLOT_PTR);
SetRecycleXid(InvalidTransactionId);
+ set_recycle_xid_exrto(InvalidTransactionId);
SetFrozenXid(InvalidTransactionId);
InitSlotBuffer();
SetAttachPid(0);
@@ -56,12 +61,14 @@ UndoZone::UndoZone()
undoSpace_.LockInit();
undoSpace_.SetLSN(0);
undoSpace_.SetHead(0);
+ undoSpace_.set_head_exrto(0);
undoSpace_.SetTail(0);
slotSpace_.MarkClean();
slotSpace_.LockInit();
slotSpace_.SetLSN(0);
slotSpace_.SetHead(0);
+ slotSpace_.set_head_exrto(0);
slotSpace_.SetTail(0);
}
@@ -80,10 +87,16 @@ bool UndoZone::CheckRecycle(UndoRecPtr starturp, UndoRecPtr endurp)
int endZid = UNDO_PTR_GET_ZONE_ID(endurp);
UndoLogOffset start = UNDO_PTR_GET_OFFSET(starturp);
UndoLogOffset end = UNDO_PTR_GET_OFFSET(endurp);
- Assert(start == forceDiscardURecPtr_);
+ UndoLogOffset force_discard_urec_ptr;
+ if (IS_EXRTO_STANDBY_READ) {
+ force_discard_urec_ptr = force_discard_urec_ptr_exrto;
+ } else {
+ force_discard_urec_ptr = forceDiscardURecPtr_;
+ }
+ Assert(start == force_discard_urec_ptr);
WHITEBOX_TEST_STUB(UNDO_CHECK_RECYCLE_FAILED, WhiteboxDefaultErrorEmit);
- if ((startZid == endZid) && (forceDiscardURecPtr_ <= insertURecPtr_) && (end <= insertURecPtr_)
+ if ((startZid == endZid) && (force_discard_urec_ptr <= insertURecPtr_) && (end <= insertURecPtr_)
&& (start < end)) {
return true;
}
@@ -130,6 +143,48 @@ UndoRecordState UndoZone::CheckUndoRecordValid(UndoLogOffset offset, bool checkF
return UNDO_RECORD_DISCARD;
}
+/*
+ * Check whether the undo record is discarded or not. If it's already discarded
+ * return false otherwise return true. Caller must hold the space discardLock_.
+ */
+UndoRecordState UndoZone::check_record_valid_exrto(UndoLogOffset offset, bool check_force_recycle,
+ TransactionId *last_xid) const
+{
+ Assert((offset < UNDO_LOG_MAX_SIZE) && (offset >= UNDO_LOG_BLOCK_HEADER_SIZE));
+ Assert(force_discard_urec_ptr_exrto <= insertURecPtr_);
+
+ if (offset >= this->insertURecPtr_) {
+ ereport(DEBUG1, (errmsg(UNDOFORMAT("The undo record not insert yet: zid=%d, "
+ "insert=%lu, offset=%lu."),
+ this->zid_, this->insertURecPtr_, offset)));
+ return UNDO_RECORD_NOT_INSERT;
+ }
+ if (offset >= this->force_discard_urec_ptr_exrto) {
+ return UNDO_RECORD_NORMAL;
+ }
+ if (last_xid != NULL) {
+ *last_xid = recycle_xid_exrto;
+ }
+ if (offset >= this->discard_urec_ptr_exrto && check_force_recycle) {
+ TransactionId recycle_xmin;
+ TransactionId oldest_xmin = GetOldestXminForUndo(&recycle_xmin);
+ if (TransactionIdPrecedes(recycle_xid_exrto, recycle_xmin)) {
+ ereport(DEBUG1, (errmsg(
+ UNDOFORMAT("oldestxmin %lu, recycle_xmin %lu > recyclexid_exrto %lu: zid=%d,"
+ "force_discard_urec_ptr_exrto=%lu, discard_urec_ptr_exrto=%lu, offset=%lu."),
+ oldest_xmin, recycle_xmin, recycle_xid_exrto, this->zid_, this->force_discard_urec_ptr_exrto,
+ this->discard_urec_ptr_exrto, offset)));
+ return UNDO_RECORD_DISCARD;
+ }
+ ereport(DEBUG1, (errmsg(UNDOFORMAT("The record has been force recycled: zid=%d, "
+ "force_discard_urec_ptr_exrto=%lu, "
+ "discard_urec_ptr_exrto=%lu, offset=%lu."),
+ this->zid_, this->force_discard_urec_ptr_exrto, this->discard_urec_ptr_exrto, offset)));
+ return UNDO_RECORD_FORCE_DISCARD;
+ }
+ return UNDO_RECORD_DISCARD;
+}
+
/*
* Drop all buffers for the given undo log, from the start to end.
*/
@@ -220,7 +275,14 @@ UndoSlotPtr UndoZone::AllocateSlotSpace(void)
void UndoZone::ReleaseSpace(UndoRecPtr starturp, UndoRecPtr endurp, int *forceRecycleSize)
{
UndoLogOffset end = UNDO_PTR_GET_OFFSET(endurp);
- int startSegno = (int)(undoSpace_.Head() / UNDO_LOG_SEGMENT_SIZE);
+ int startSegno;
+ UndoLogOffset head;
+ if (IS_EXRTO_STANDBY_READ) {
+ head = undoSpace_.Head_exrto();
+ } else {
+ head = undoSpace_.Head();
+ }
+ startSegno = (int)(head / UNDO_LOG_SEGMENT_SIZE);
int endSegno = (int)(end / UNDO_LOG_SEGMENT_SIZE);
if (unlikely(startSegno < endSegno)) {
@@ -229,10 +291,10 @@ void UndoZone::ReleaseSpace(UndoRecPtr starturp, UndoRecPtr endurp, int *forceRe
}
ForgetUndoBuffer(startSegno * UNDO_LOG_SEGMENT_SIZE, endSegno * UNDO_LOG_SEGMENT_SIZE, UNDO_DB_OID);
undoSpace_.LockSpace();
- UndoRecPtr prevHead = MAKE_UNDO_PTR(zid_, undoSpace_.Head());
+ UndoRecPtr prevHead = MAKE_UNDO_PTR(zid_, head);
undoSpace_.UnlinkUndoLog(zid_, endSegno * UNDO_LOG_SEGMENT_SIZE, UNDO_DB_OID);
Assert(undoSpace_.Head() <= insertURecPtr_);
- if (pLevel_ == UNDO_PERMANENT) {
+ if (pLevel_ == UNDO_PERMANENT && (!IS_EXRTO_STANDBY_READ)) {
START_CRIT_SECTION();
undoSpace_.MarkDirty();
XlogUndoUnlink undoUnlink;
@@ -247,11 +309,35 @@ void UndoZone::ReleaseSpace(UndoRecPtr starturp, UndoRecPtr endurp, int *forceRe
return;
}
+/* Release undo space from starturp to endurp and advance discard. */
+uint64 UndoZone::release_residual_record_space()
+{
+ undoSpace_.LockSpace();
+ UndoLogOffset unlink_start = undoSpace_.find_oldest_offset(zid_, UNDO_DB_OID);
+ UndoLogOffset unlink_end = undoSpace_.Head();
+ undoSpace_.unlink_residual_log(zid_, unlink_start, unlink_end, UNDO_DB_OID);
+ undoSpace_.UnlockSpace();
+ if (unlink_start > unlink_end) {
+ ereport(WARNING, (errmsg(UNDOFORMAT("release_residual_record_space start:%lu "
+ "is bigger than end:%lu."),
+ unlink_start, unlink_end)));
+ return 0;
+ } else {
+ return (unlink_end / UNDO_LOG_SEGMENT_SIZE) - (unlink_start / UNDO_LOG_SEGMENT_SIZE);
+ }
+}
+
/* Release slot space from starturp to endurp and advance discard. */
void UndoZone::ReleaseSlotSpace(UndoRecPtr startSlotPtr, UndoRecPtr endSlotPtr, int *forceRecycleSize)
{
UndoLogOffset end = UNDO_PTR_GET_OFFSET(endSlotPtr);
- int startSegno = (int)(slotSpace_.Head() / UNDO_META_SEGMENT_SIZE);
+ UndoLogOffset head;
+ if (IS_EXRTO_STANDBY_READ) {
+ head = slotSpace_.Head_exrto();
+ } else {
+ head = slotSpace_.Head();
+ }
+ int startSegno = (int)(head / UNDO_META_SEGMENT_SIZE);
int endSegno = (int)(end / UNDO_META_SEGMENT_SIZE);
if (unlikely(startSegno < endSegno)) {
@@ -260,10 +346,10 @@ void UndoZone::ReleaseSlotSpace(UndoRecPtr startSlotPtr, UndoRecPtr endSlotPtr,
}
ForgetUndoBuffer(startSegno * UNDO_META_SEGMENT_SIZE, endSegno * UNDO_META_SEGMENT_SIZE, UNDO_SLOT_DB_OID);
slotSpace_.LockSpace();
- UndoRecPtr prevHead = MAKE_UNDO_PTR(zid_, slotSpace_.Head());
+ UndoRecPtr prevHead = MAKE_UNDO_PTR(zid_, head);
slotSpace_.UnlinkUndoLog(zid_, endSegno * UNDO_META_SEGMENT_SIZE, UNDO_SLOT_DB_OID);
Assert(slotSpace_.Head() <= allocateTSlotPtr_);
- if (pLevel_ == UNDO_PERMANENT) {
+ if (pLevel_ == UNDO_PERMANENT && !(IS_EXRTO_STANDBY_READ)) {
START_CRIT_SECTION();
slotSpace_.MarkDirty();
XlogUndoUnlink undoUnlink;
@@ -278,6 +364,24 @@ void UndoZone::ReleaseSlotSpace(UndoRecPtr startSlotPtr, UndoRecPtr endSlotPtr,
return;
}
+/* Release slot space from starturp to endurp and advance discard. */
+uint64 UndoZone::release_residual_slot_space()
+{
+ slotSpace_.LockSpace();
+ UndoLogOffset unlink_start = slotSpace_.find_oldest_offset(zid_, UNDO_SLOT_DB_OID);
+ UndoLogOffset unlink_end = slotSpace_.Head();
+ slotSpace_.unlink_residual_log(zid_, unlink_start, unlink_end, UNDO_SLOT_DB_OID);
+ slotSpace_.UnlockSpace();
+ if (unlink_start > unlink_end) {
+ ereport(WARNING, (errmsg(UNDOFORMAT("release_residual_slot_space start:%lu is bigger "
+ "than end:%lu."),
+ unlink_start, unlink_end)));
+ return 0;
+ } else {
+ return (unlink_end / UNDO_META_SEGMENT_SIZE) - (unlink_start / UNDO_META_SEGMENT_SIZE);
+ }
+}
+
void UndoZone::PrepareSwitch(void)
{
WHITEBOX_TEST_STUB(UNDO_PREPARE_SWITCH_FAILED, WhiteboxDefaultErrorEmit);
@@ -513,10 +617,19 @@ static void RecoveryZone(UndoZone *uzone,
uzone->SetLSN(uspMetaInfo->lsn);
uzone->SetInsertURecPtr(uspMetaInfo->insertURecPtr);
uzone->SetDiscardURecPtr(uspMetaInfo->discardURecPtr);
+ uzone->set_discard_urec_ptr_exrto(uspMetaInfo->discardURecPtr);
uzone->SetForceDiscardURecPtr(uspMetaInfo->forceDiscardURecPtr);
+ uzone->set_force_discard_urec_ptr_exrto(uspMetaInfo->forceDiscardURecPtr);
uzone->SetAllocateTSlotPtr(uspMetaInfo->allocateTSlotPtr);
uzone->SetRecycleTSlotPtr(uspMetaInfo->recycleTSlotPtr);
+ uzone->set_recycle_tslot_ptr_exrto(uspMetaInfo->recycleTSlotPtr);
uzone->SetRecycleXid(uspMetaInfo->recycleXid);
+ uzone->set_recycle_xid_exrto(uspMetaInfo->recycleXid);
+ ereport(DEBUG1, (errmodule(MOD_UNDO), errmsg(UNDOFORMAT("recovery_zone id:%d, lsn:%lu, "
+ "insert_urec_ptr:%lu, discard_urec_ptr:%lu, force_discard_urec_ptr:%lu, allocate_tslot_ptr:%lu, "
+ "recycle_tslot_ptr:%lu, recycle_xid:%lu."), zoneId, uspMetaInfo->lsn, uspMetaInfo->insertURecPtr,
+ uspMetaInfo->discardURecPtr, uspMetaInfo->forceDiscardURecPtr, uspMetaInfo->allocateTSlotPtr,
+ uspMetaInfo->recycleTSlotPtr, uspMetaInfo->recycleXid)));
}
/* Initialize parameters in the undo zone. */
@@ -528,11 +641,15 @@ void InitZone(UndoZone *uzone, const int zoneId, UndoPersistence upersistence)
uzone->SetLSN(0);
uzone->SetInsertURecPtr(UNDO_LOG_BLOCK_HEADER_SIZE);
uzone->SetDiscardURecPtr(UNDO_LOG_BLOCK_HEADER_SIZE);
+ uzone->set_discard_urec_ptr_exrto(UNDO_LOG_BLOCK_HEADER_SIZE);
uzone->SetForceDiscardURecPtr(UNDO_LOG_BLOCK_HEADER_SIZE);
+ uzone->set_force_discard_urec_ptr_exrto(UNDO_LOG_BLOCK_HEADER_SIZE);
uzone->SetAllocateTSlotPtr(UNDO_LOG_BLOCK_HEADER_SIZE);
uzone->SetRecycleTSlotPtr(UNDO_LOG_BLOCK_HEADER_SIZE);
+ uzone->set_recycle_tslot_ptr_exrto(UNDO_LOG_BLOCK_HEADER_SIZE);
uzone->SetFrozenSlotPtr(INVALID_UNDO_SLOT_PTR);
uzone->SetRecycleXid(InvalidTransactionId);
+ uzone->set_recycle_xid_exrto(InvalidTransactionId);
uzone->SetFrozenXid(InvalidTransactionId);
uzone->SetAttachPid(0);
}
@@ -544,6 +661,7 @@ void InitUndoSpace(UndoZone *uzone, UndoSpaceType type)
usp->MarkClean();
usp->SetLSN(0);
usp->SetHead(0);
+ usp->set_head_exrto(0);
usp->SetTail(0);
}
diff --git a/src/gausskernel/storage/buffer/bufmgr.cpp b/src/gausskernel/storage/buffer/bufmgr.cpp
index 311a80eb8980c9c5c70f56e36959ed33c854943d..63cf83f6f80b7cea7442d6ca98abd20958f590f4 100644
--- a/src/gausskernel/storage/buffer/bufmgr.cpp
+++ b/src/gausskernel/storage/buffer/bufmgr.cpp
@@ -129,8 +129,7 @@ static int ts_ckpt_progress_comparator(Datum a, Datum b, void *arg);
static bool ReadBuffer_common_ReadBlock(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
BlockNumber blockNum, ReadBufferMode mode, bool isExtend, Block bufBlock, const XLogPhyBlock *pblk,
bool *need_repair);
-static Buffer ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum,
- ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit, const XLogPhyBlock *pblk);
+
static void TerminateBufferIO_common(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits);
/*
@@ -351,8 +350,6 @@ void ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref)
static void BufferSync(int flags);
static void TerminateBufferIO_common(BufferDesc* buf, bool clear_dirty, uint32 set_flag_bits);
void shared_buffer_write_error_callback(void* arg);
-static BufferDesc* BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum,
- BufferAccessStrategy strategy, bool* foundPtr, const XLogPhyBlock *pblk);
static int rnode_comparator(const void* p1, const void* p2);
@@ -1676,6 +1673,10 @@ Buffer ReadBuffer(Relation reln, BlockNumber block_num)
Buffer ReadBufferExtended(Relation reln, ForkNumber fork_num, BlockNumber block_num, ReadBufferMode mode,
BufferAccessStrategy strategy)
{
+ if (IsExtremeRtoRunning() && !AmPageRedoWorker()) {
+ return standby_read_buf(reln, fork_num, block_num, mode, strategy);
+ }
+
bool hit = false;
Buffer buf;
@@ -2227,7 +2228,7 @@ static inline void BufferDescSetPBLK(BufferDesc *buf, const XLogPhyBlock *pblk)
*
* *hit is set to true if the request was satisfied from shared buffer cache.
*/
-static Buffer ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum,
+Buffer ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum,
ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit, const XLogPhyBlock *pblk)
{
BufferDesc *bufHdr = NULL;
@@ -2288,7 +2289,7 @@ static Buffer ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumb
* lookup the buffer. IO_IN_PROGRESS is set if the requested block is
* not currently in memory.
*/
- bufHdr = BufferAlloc(smgr, relpersistence, forkNum, blockNum, strategy, &found, pblk);
+ bufHdr = BufferAlloc(smgr->smgr_rnode.node, relpersistence, forkNum, blockNum, strategy, &found, pblk);
if (g_instance.attr.attr_security.enable_tde && IS_PGXC_DATANODE) {
bufHdr->extra->encrypt = smgr->encrypt ? true : false; /* set tde flag */
}
@@ -2670,14 +2671,15 @@ void PageCheckWhenChosedElimination(const BufferDesc *buf, uint32 oldFlags)
*
* No locks are held either at entry or exit.
*/
-static BufferDesc *BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber fork_num, BlockNumber block_num,
- BufferAccessStrategy strategy, bool *found, const XLogPhyBlock *pblk)
+BufferDesc *BufferAlloc(const RelFileNode &rel_file_node, char relpersistence, ForkNumber fork_num,
+ BlockNumber block_num, BufferAccessStrategy strategy, bool *found,
+ const XLogPhyBlock *pblk)
{
if (g_instance.attr.attr_storage.nvm_attr.enable_nvm) {
- return NvmBufferAlloc(smgr, relpersistence, fork_num, block_num, strategy, found, pblk);
+ return NvmBufferAlloc(rel_file_node, relpersistence, fork_num, block_num, strategy, found, pblk);
}
- Assert(!IsSegmentPhysicalRelNode(smgr->smgr_rnode.node));
+ Assert(!IsSegmentPhysicalRelNode(rel_file_node));
BufferTag new_tag; /* identity of requested block */
uint32 new_hash; /* hash value for newTag */
@@ -2692,7 +2694,7 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumbe
uint32 buf_state;
/* create a tag so we can lookup the buffer */
- INIT_BUFFERTAG(new_tag, smgr->smgr_rnode.node, fork_num, block_num);
+ INIT_BUFFERTAG(new_tag, rel_file_node, fork_num, block_num);
/* determine its hash code and partition lock ID */
new_hash = BufTableHashCode(&new_tag);
@@ -2844,8 +2846,8 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumbe
}
/* OK, do the I/O */
- TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_START(fork_num, block_num, smgr->smgr_rnode.node.spcNode,
- smgr->smgr_rnode.node.dbNode, smgr->smgr_rnode.node.relNode);
+ TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_START(fork_num, block_num, rel_file_node.spcNode,
+ rel_file_node.dbNode, rel_file_node.relNode);
/* during initdb, not need flush dw file */
if (dw_enabled() && pg_atomic_read_u32(&g_instance.ckpt_cxt_ctl->current_page_writer_count) > 0) {
@@ -2868,8 +2870,8 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumbe
ScheduleBufferTagForWriteback(t_thrd.storage_cxt.BackendWritebackContext, &buf->tag);
- TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_DONE(fork_num, block_num, smgr->smgr_rnode.node.spcNode,
- smgr->smgr_rnode.node.dbNode, smgr->smgr_rnode.node.relNode);
+ TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_DONE(fork_num, block_num, rel_file_node.spcNode,
+ rel_file_node.dbNode, rel_file_node.relNode);
} else {
/*
* Someone else has locked the buffer, so give it up and loop
@@ -5422,6 +5424,31 @@ void DropDatabaseBuffers(Oid dbid)
gstrace_exit(GS_TRC_ID_DropDatabaseBuffers);
}
+void buffer_drop_exrto_standby_read_buffers()
+{
+ int i = 0;
+ ereport(LOG, (errmsg("buffer_drop_exrto_standby_read_buffers: start to drop buffers.")));
+ while (i < TOTAL_BUFFER_NUM) {
+ BufferDesc *buf_desc = GetBufferDescriptor(i);
+ uint32 buf_state;
+ /*
+ * Some safe unlocked checks can be done to reduce the number of cycle.
+ */
+ if (!IS_EXRTO_RELFILENODE(buf_desc->tag.rnode)) {
+ i++;
+ continue;
+ }
+
+ buf_state = LockBufHdr(buf_desc);
+ if (IS_EXRTO_RELFILENODE(buf_desc->tag.rnode)) {
+ InvalidateBuffer(buf_desc); /* with buffer head lock released */
+ } else {
+ UnlockBufHdr(buf_desc, buf_state);
+ }
+ i++;
+ }
+}
+
/* -----------------------------------------------------------------
* PrintBufferDescs
*
@@ -5690,6 +5717,12 @@ void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
buf_desc = GetBufferDescriptor(buffer - 1);
Assert(GetPrivateRefCount(buffer) > 0);
+
+ // temp buf just for old page version, could not write to disk
+ if (pg_atomic_read_u32(&buf_desc->state) & BM_IS_TMP_BUF) {
+ return;
+ }
+
/* here, either share or exclusive lock is OK */
if (!LWLockHeldByMe(buf_desc->content_lock))
ereport(PANIC, (errcode(ERRCODE_INVALID_BUFFER),
@@ -5723,8 +5756,9 @@ void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
* The incremental checkpoint is protected by the doublewriter, the
* half-write problem does not occur.
*/
- if (!ENABLE_INCRE_CKPT && XLogHintBitIsNeeded() &&
- (pg_atomic_read_u32(&buf_desc->state) & BM_PERMANENT)) {
+ bool need_write_wal =
+ (!ENABLE_INCRE_CKPT && XLogHintBitIsNeeded() && (pg_atomic_read_u32(&buf_desc->state) & BM_PERMANENT));
+ if (need_write_wal) {
/*
* If we're in recovery we cannot dirty a page because of a hint.
* We can set the hint, just not dirty the page as a result so the
@@ -7352,3 +7386,18 @@ bool IsPageHitBufferPool(RelFileNode& node, ForkNumber forkNum, BlockNumber bloc
}
return false;
}
+
+void buffer_in_progress_pop()
+{
+ Assert(t_thrd.storage_cxt.ParentInProgressBuf == NULL);
+ t_thrd.storage_cxt.ParentInProgressBuf = t_thrd.storage_cxt.InProgressBuf;
+ t_thrd.storage_cxt.ParentIsForInput = t_thrd.storage_cxt.IsForInput;
+ t_thrd.storage_cxt.InProgressBuf = NULL;
+}
+
+void buffer_in_progress_push()
+{
+ t_thrd.storage_cxt.InProgressBuf = t_thrd.storage_cxt.ParentInProgressBuf;
+ t_thrd.storage_cxt.IsForInput = t_thrd.storage_cxt.ParentIsForInput;
+ t_thrd.storage_cxt.ParentInProgressBuf = NULL;
+}
diff --git a/src/gausskernel/storage/ipc/procarray.cpp b/src/gausskernel/storage/ipc/procarray.cpp
index a2bec4d0e7a7da5aea991dc0bd7bcbdbad410274..b23a6b590a5ea7f9440c5a2f67e91771c604e6b5 100755
--- a/src/gausskernel/storage/ipc/procarray.cpp
+++ b/src/gausskernel/storage/ipc/procarray.cpp
@@ -76,6 +76,7 @@
#include "access/clog.h"
#include "access/csnlog.h"
+#include "access/extreme_rto/page_redo.h"
#include "access/subtrans.h"
#include "access/transam.h"
#include "access/twophase.h"
@@ -541,6 +542,8 @@ void ProcArrayEndTransaction(PGPROC* proc, TransactionId latestXid, bool isCommi
pgxact->xmin = InvalidTransactionId;
proc->snapXmax = InvalidTransactionId;
proc->snapCSN = InvalidCommitSeqNo;
+ proc->exrto_read_lsn = 0;
+ proc->exrto_gen_snap_time = 0;
pgxact->csn_min = InvalidCommitSeqNo;
pgxact->csn_dr = InvalidCommitSeqNo;
/* must be cleared with xid/xmin: */
@@ -585,6 +588,8 @@ static inline void ProcArrayEndTransactionInternal(PGPROC* proc, PGXACT* pgxact,
pgxact->xmin = InvalidTransactionId;
proc->snapXmax = InvalidTransactionId;
proc->snapCSN = InvalidCommitSeqNo;
+ proc->exrto_read_lsn = 0;
+ proc->exrto_gen_snap_time = 0;
pgxact->csn_min = InvalidCommitSeqNo;
pgxact->csn_dr = InvalidCommitSeqNo;
/* must be cleared with xid/xmin: */
@@ -827,6 +832,8 @@ void ProcArrayClearTransaction(PGPROC* proc)
/* Clear the subtransaction-XID cache too */
pgxact->nxids = 0;
+ proc->exrto_read_lsn = 0;
+ proc->exrto_gen_snap_time = 0;
/* Free xid cache memory if needed */
ResetProcXidCache(proc, true);
}
@@ -2107,7 +2114,7 @@ RETRY:
/* reset xmin before acquiring lwlock, in case blocking redo */
t_thrd.pgxact->xmin = InvalidTransactionId;
RETRY_GET:
- if (snapshot->takenDuringRecovery && !StreamThreadAmI() &&
+ if (snapshot->takenDuringRecovery && !StreamThreadAmI() && !IS_EXRTO_READ &&
!u_sess->proc_cxt.clientIsCMAgent) {
if (InterruptPending) {
(void)pgstat_report_waitstatus(oldStatus);
@@ -2429,6 +2436,10 @@ GROUP_GET_SNAPSHOT:
(void)pgstat_report_waitstatus(oldStatus);
}
+ if (IsExtremeRtoRunning() && pmState == PM_HOT_STANDBY) {
+ extreme_rto::exrto_read_snapshot(snapshot);
+ }
+
return snapshot;
}
@@ -3200,6 +3211,59 @@ ThreadId CancelVirtualTransaction(const VirtualTransactionId& vxid, ProcSignalRe
return pid;
}
+bool proc_array_cancel_conflicting_proc(TransactionId latest_removed_xid, bool reach_max_check_times)
+{
+ ProcArrayStruct* proc_array = g_instance.proc_array_idx;
+ bool conflict = false;
+
+ LWLockAcquire(ProcArrayLock, LW_SHARED);
+ for (int index = 0; index < proc_array->numProcs; index++) {
+ int pg_proc_no = proc_array->pgprocnos[index];
+ PGPROC* pg_proc = g_instance.proc_base_all_procs[pg_proc_no];
+ PGXACT* pg_xact = &g_instance.proc_base_all_xacts[pg_proc_no];
+ XLogRecPtr read_lsn = pg_proc->exrto_read_lsn;
+ TransactionId pxmin = pg_xact->xmin;
+
+ if (pg_proc->pid == 0 || !TransactionIdIsValid(pxmin) || XLogRecPtrIsInvalid(read_lsn)) {
+ continue;
+ }
+
+ Assert(!(pg_xact->vacuumFlags & PROC_IN_VACUUM));
+ /*
+ * Backend is doing logical decoding which manages xmin
+ * separately, check below.
+ */
+ if (pg_xact->vacuumFlags & PROC_IN_LOGICAL_DECODING) {
+ continue;
+ }
+
+ /* cancel query when its xmin < latest_removed_xid */
+ if (TransactionIdPrecedesOrEquals(pxmin, latest_removed_xid)) {
+ conflict = true;
+ pg_proc->recoveryConflictPending = true;
+ if (pg_proc->pid != 0) {
+ /*
+ * Kill the pid if it's still here. If not, that's what we
+ * wanted so ignore any errors.
+ */
+ (void)SendProcSignal(pg_proc->pid, PROCSIG_RECOVERY_CONFLICT_SNAPSHOT, pg_proc->backendId);
+ /*
+ * Wait a little bit for it to die so that we avoid flooding
+ * an unresponsive backend when system is heavily loaded.
+ */
+ pg_usleep(5000L);
+ }
+ }
+ if (reach_max_check_times) {
+ ereport(WARNING, (
+ errmsg("can not cancel thread while redo truncate, thread id = %lu", pg_proc->pid)));
+ }
+ }
+ LWLockRelease(ProcArrayLock);
+
+ return conflict;
+}
+
/*
* MinimumActiveBackends --- count backends (other than myself) that are
* in active transactions. Return true if the count exceeds the
diff --git a/src/gausskernel/storage/lmgr/lwlocknames.txt b/src/gausskernel/storage/lmgr/lwlocknames.txt
index dffbc1a6ef32e12e708cfd1d3b73bd0692c081e1..14a5191734a57a024f8e864f34af4234a5a68bdf 100755
--- a/src/gausskernel/storage/lmgr/lwlocknames.txt
+++ b/src/gausskernel/storage/lmgr/lwlocknames.txt
@@ -140,3 +140,4 @@ DropArchiveSlotLock 130
AboCacheLock 131
OndemandXLogMemAllocLock 132
OndemandXLogFileHandleLock 133
+ExrtoSnapshotLock 134
diff --git a/src/gausskernel/storage/lmgr/proc.cpp b/src/gausskernel/storage/lmgr/proc.cpp
index 8830f5863701387ac2359c261da00d96caaac709..29ee4bf0b2a7fa2db586ece38c45e6556ef53935 100755
--- a/src/gausskernel/storage/lmgr/proc.cpp
+++ b/src/gausskernel/storage/lmgr/proc.cpp
@@ -950,6 +950,8 @@ void InitProcess(void)
t_thrd.proc->snap_refcnt_bitmap = 0;
#endif
+ t_thrd.proc->exrto_read_lsn = 0;
+ t_thrd.proc->exrto_gen_snap_time = 0;
/* Check that group locking fields are in a proper initial state. */
Assert(t_thrd.proc->lockGroupLeader == NULL);
Assert(dlist_is_empty(&t_thrd.proc->lockGroupMembers));
@@ -1109,6 +1111,8 @@ void InitAuxiliaryProcess(void)
t_thrd.pgxact->xmin = InvalidTransactionId;
t_thrd.proc->snapXmax = InvalidTransactionId;
t_thrd.proc->snapCSN = InvalidCommitSeqNo;
+ t_thrd.proc->exrto_read_lsn = 0;
+ t_thrd.proc->exrto_gen_snap_time = 0;
t_thrd.pgxact->csn_min = InvalidCommitSeqNo;
t_thrd.pgxact->csn_dr = InvalidCommitSeqNo;
t_thrd.proc->backendId = InvalidBackendId;
diff --git a/src/gausskernel/storage/nvm/nvmbuffer.cpp b/src/gausskernel/storage/nvm/nvmbuffer.cpp
index 5ade48cab920ea88cb199cbb2cac30de96ec157f..0b3d5918cd0408554ab951bc4f0bcd4f109fb9e4 100644
--- a/src/gausskernel/storage/nvm/nvmbuffer.cpp
+++ b/src/gausskernel/storage/nvm/nvmbuffer.cpp
@@ -255,10 +255,10 @@ restart:
return;
}
-BufferDesc *NvmBufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber fork_num,
+BufferDesc *NvmBufferAlloc(const RelFileNode& rel_file_node, char relpersistence, ForkNumber fork_num,
BlockNumber block_num, BufferAccessStrategy strategy, bool *found, const XLogPhyBlock *pblk)
{
- Assert(!IsSegmentPhysicalRelNode(smgr->smgr_rnode.node));
+ Assert(!IsSegmentPhysicalRelNode(rel_file_node));
BufferTag new_tag; /* identity of requested block */
uint32 new_hash; /* hash value for newTag */
@@ -276,7 +276,7 @@ BufferDesc *NvmBufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber fo
errno_t rc;
/* create a tag so we can lookup the buffer */
- INIT_BUFFERTAG(new_tag, smgr->smgr_rnode.node, fork_num, block_num);
+ INIT_BUFFERTAG(new_tag, rel_file_node, fork_num, block_num);
/* determine its hash code and partition lock ID */
new_hash = BufTableHashCode(&new_tag);
diff --git a/src/gausskernel/storage/page/bufpage.cpp b/src/gausskernel/storage/page/bufpage.cpp
index 153ab6beecb69e83f066c2448e8a15f902d2a5ab..4b1a456c23d9e961ad3de1e4d9a1aba99c4515de 100644
--- a/src/gausskernel/storage/page/bufpage.cpp
+++ b/src/gausskernel/storage/page/bufpage.cpp
@@ -60,6 +60,7 @@ bool PageIsVerified(Page page, BlockNumber blkno)
bool header_sane = false;
bool all_zeroes = false;
uint16 checksum = 0;
+ bool is_exrto_page = bool(p->pd_flags & PD_EXRTO_PAGE);
/*
* Don't verify page data unless the page passes basic non-zero test
@@ -76,8 +77,8 @@ bool PageIsVerified(Page page, BlockNumber blkno)
* the block can still reveal problems, which is why we offer the
* checksum option.
*/
- if ((p->pd_flags & ~PD_VALID_FLAG_BITS) == 0 && p->pd_lower <= p->pd_upper && p->pd_upper <= p->pd_special &&
- p->pd_special <= BLCKSZ && p->pd_special == MAXALIGN(p->pd_special)) {
+ if (is_exrto_page || ((p->pd_flags & ~PD_VALID_FLAG_BITS) == 0 && p->pd_lower <= p->pd_upper &&
+ p->pd_upper <= p->pd_special && p->pd_special <= BLCKSZ && p->pd_special == MAXALIGN(p->pd_special))) {
header_sane = true;
}
diff --git a/src/gausskernel/storage/replication/basebackup.cpp b/src/gausskernel/storage/replication/basebackup.cpp
index 9542a425e565923ed1a3dcb2d91a6ed14c351efb..e8523cfc6f5a12d2ea66a713ce1a4034f244b0c7 100755
--- a/src/gausskernel/storage/replication/basebackup.cpp
+++ b/src/gausskernel/storage/replication/basebackup.cpp
@@ -19,6 +19,7 @@
#include "access/xlog_internal.h" /* for pg_start/stop_backup */
#include "access/cbmparsexlog.h"
+#include "access/extreme_rto/standby_read/standby_read_base.h"
#include "catalog/catalog.h"
#include "catalog/pg_type.h"
#include "gs_thread.h"
@@ -1216,6 +1217,9 @@ bool IsSkipDir(const char * dirName)
/* Skip temporary files */
if (strncmp(dirName, PG_TEMP_FILE_PREFIX, strlen(PG_TEMP_FILE_PREFIX)) == 0)
return true;
+ if (strncmp(dirName, EXRTO_FILE_DIR, strlen(EXRTO_FILE_DIR)) == 0) {
+ return true;
+ }
/*
* If there's a backup_label file, it belongs to a backup started by
diff --git a/src/gausskernel/storage/replication/slot.cpp b/src/gausskernel/storage/replication/slot.cpp
index c26f39ceb6152af3a62d0d21ab6bacfe3c56e886..587929995039a488d3f3544bed8243945794086f 100644
--- a/src/gausskernel/storage/replication/slot.cpp
+++ b/src/gausskernel/storage/replication/slot.cpp
@@ -691,6 +691,8 @@ void ReplicationSlotRelease(void)
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
t_thrd.pgxact->xmin = InvalidTransactionId;
t_thrd.pgxact->vacuumFlags &= ~PROC_IN_LOGICAL_DECODING;
+ t_thrd.proc->exrto_read_lsn = 0;
+ t_thrd.proc->exrto_gen_snap_time = 0;
LWLockRelease(ProcArrayLock);
}
diff --git a/src/gausskernel/storage/replication/walreceiver.cpp b/src/gausskernel/storage/replication/walreceiver.cpp
index 9a21024213b5de33ddfc1f0c259972a165fa4349..ba49b9ae1fc32bef0d201dc13f1f17ba05042af4 100755
--- a/src/gausskernel/storage/replication/walreceiver.cpp
+++ b/src/gausskernel/storage/replication/walreceiver.cpp
@@ -1699,6 +1699,8 @@ static void XLogWalRcvSendHSFeedback(void)
else
xmin = InvalidTransactionId;
t_thrd.pgxact->xmin = InvalidTransactionId;
+ t_thrd.proc->exrto_read_lsn = 0;
+ t_thrd.proc->exrto_gen_snap_time = 0;
/*
* Always send feedback message.
*/
diff --git a/src/gausskernel/storage/replication/walsender.cpp b/src/gausskernel/storage/replication/walsender.cpp
index 6d4a2ed3ea6b158a0a1a791f86b6d9e116d455d9..f15b692f663b78be65914519abc4c1f6c0bf8fe8 100755
--- a/src/gausskernel/storage/replication/walsender.cpp
+++ b/src/gausskernel/storage/replication/walsender.cpp
@@ -2945,6 +2945,8 @@ static void PhysicalReplicationSlotNewXmin(TransactionId feedbackXmin)
SpinLockAcquire(&slot->mutex);
t_thrd.pgxact->xmin = InvalidTransactionId;
+ t_thrd.proc->exrto_read_lsn = 0;
+ t_thrd.proc->exrto_gen_snap_time = 0;
/*
* For physical replication we don't need the the interlock provided
* by xmin and effective_xmin since the consequences of a missed increase
diff --git a/src/gausskernel/storage/smgr/Makefile b/src/gausskernel/storage/smgr/Makefile
index 0e7ef320b1275bfa93db8f4a9ce5b72adb5557dd..b6bca4a44c19f6e12aec56e466dcfd0e6d5b3fd8 100644
--- a/src/gausskernel/storage/smgr/Makefile
+++ b/src/gausskernel/storage/smgr/Makefile
@@ -9,7 +9,7 @@ ifneq "$(MAKECMDGOALS)" "clean"
endif
endif
endif
-OBJS = md.o smgr.o smgrtype.o knl_uundofile.o segstore.o page_compression.o
+OBJS = md.o smgr.o smgrtype.o knl_uundofile.o segstore.o page_compression.o storage_exrto_file.o
SUBDIRS = segment cfs
diff --git a/src/gausskernel/storage/smgr/smgr.cpp b/src/gausskernel/storage/smgr/smgr.cpp
index f4b4c60b3c14e519ae25c35b4ff940b147b23a2d..b7ff7a4ffd48b6b4209ebab100acf36efa305f53 100755
--- a/src/gausskernel/storage/smgr/smgr.cpp
+++ b/src/gausskernel/storage/smgr/smgr.cpp
@@ -122,14 +122,37 @@ static const f_smgr smgrsw[] = {
seg_async_write,
seg_move_buckets
},
+
+ /* extreme-rto standby read */
+ {
+ exrto_init,
+ NULL,
+ exrto_close,
+ NULL,
+ exrto_exists,
+ exrto_unlink,
+ exrto_extend,
+ NULL,
+ exrto_read,
+ exrto_write,
+ exrto_writeback,
+ exrto_nblocks,
+ exrto_truncate,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+ }
};
static const int NSmgr = lengthof(smgrsw);
static void push_unlink_rel_one_fork_to_hashtbl(RelFileNode node, ForkNumber forkNum);
-static inline int ChooseSmgrManager(RelFileNode rnode)
+static inline int ChooseSmgrManager(const RelFileNode& rnode)
{
- if (rnode.dbNode == UNDO_DB_OID || rnode.dbNode == UNDO_SLOT_DB_OID) {
+ if (IS_EXRTO_RELFILENODE(rnode)) {
+ return EXRTO_MANAGER;
+ } else if (rnode.dbNode == UNDO_DB_OID || rnode.dbNode == UNDO_SLOT_DB_OID) {
return UNDO_MANAGER;
} else if (IsSegmentFileNode(rnode)) {
return SEGMENT_MANAGER;
@@ -313,7 +336,7 @@ SMgrRelation smgropen(const RelFileNode& rnode, BackendId backend, int col /* =
reln->smgr_bcm_nblocks[colnum] = InvalidBlockNumber;
}
- if (reln->smgr_which == UNDO_MANAGER) {
+ if (reln->smgr_which == UNDO_MANAGER || reln->smgr_which == EXRTO_MANAGER) {
fdNeeded = 1;
}
@@ -411,8 +434,15 @@ void smgrclose(SMgrRelation reln, BlockNumber blockNum)
ereport(DEBUG5, (errmsg("smgr close %p", reln)));
SMgrRelation* owner = NULL;
int forknum;
+ int max_forknum;
+
+ if (reln->smgr_which == EXRTO_MANAGER && reln->smgr_rnode.node.spcNode == EXRTO_BLOCK_INFO_SPACE_OID) {
+ max_forknum = EXRTO_FORK_NUM;
+ } else {
+ max_forknum = reln->md_fdarray_size;
+ }
- for (forknum = 0; forknum < (int)(reln->md_fdarray_size); forknum++) {
+ for (forknum = 0; forknum < max_forknum; forknum++) {
(*(smgrsw[reln->smgr_which].smgr_close))(reln, (ForkNumber)forknum, blockNum);
}
owner = reln->smgr_owner;
@@ -567,12 +597,19 @@ void smgrdounlink(SMgrRelation reln, bool isRedo, BlockNumber blockNum)
RelFileNodeBackend rnode = reln->smgr_rnode;
int which = reln->smgr_which;
int forknum;
+ int max_forknum;
HTAB *unlink_rel_hashtbl = g_instance.bgwriter_cxt.unlink_rel_hashtbl;
DelFileTag *entry = NULL;
bool found = false;
+ if (which == EXRTO_MANAGER && reln->smgr_rnode.node.spcNode == EXRTO_BLOCK_INFO_SPACE_OID) {
+ max_forknum = EXRTO_FORK_NUM;
+ } else {
+ max_forknum = reln->md_fdarray_size;
+ }
+
/* Close the forks at smgr level */
- for (forknum = 0; forknum < (int)(reln->md_fdarray_size); forknum++) {
+ for (forknum = 0; forknum < max_forknum; forknum++) {
(*(smgrsw[which].smgr_close))(reln, (ForkNumber)forknum, blockNum);
}
if (which == UNDO_MANAGER) {
diff --git a/src/gausskernel/storage/smgr/storage_exrto_file.cpp b/src/gausskernel/storage/smgr/storage_exrto_file.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a3234cee4fd2310deffa59b3fcdcc208964561e6
--- /dev/null
+++ b/src/gausskernel/storage/smgr/storage_exrto_file.cpp
@@ -0,0 +1,545 @@
+/*
+ * Copyright (c) 2020 Huawei Technologies Co.,Ltd.
+ *
+ * openGauss is licensed under Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *
+ * http://license.coscl.org.cn/MulanPSL2
+ *
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * -------------------------------------------------------------------------
+ *
+ * storage_exrto_file.cpp
+ *
+ * IDENTIFICATION
+ * src/gausskernel/storage/smgr/storage_exrto_file.cpp
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+#include "miscadmin.h"
+#include "storage/smgr/fd.h"
+#include "storage/vfd.h"
+#include "storage/smgr/smgr.h"
+#include "utils/memutils.h"
+#include "access/extreme_rto/standby_read/standby_read_base.h"
+#include "access/extreme_rto/standby_read/block_info_meta.h"
+
+const uint32 EXRTO_BASE_PAGE_FILE_BLOCKS = EXRTO_BASE_PAGE_FILE_MAXSIZE / BLCKSZ;
+const uint32 EXRTO_LSN_INFO_FILE_BLOCKS = EXRTO_LSN_INFO_FILE_MAXSIZE / BLCKSZ;
+const uint32 EXRTO_BLOCK_INFO_FILE_BLOCKS = RELSEG_SIZE;
+
+const int EXTEND_BLOCKS_NUM = 16;
+const uint64 EXRTO_INVALID_BLOCK_NUMBER = 0xFFFFFFFFFFFFFFFFL;
+
+const uint32 EXRTO_FILE_SIZE[] = {
+ EXRTO_BASE_PAGE_FILE_MAXSIZE, EXRTO_LSN_INFO_FILE_MAXSIZE, EXRTO_BLOCK_INFO_FILE_MAXSIZE};
+const uint32 EXRTO_FILE_BLOCKS[] = {
+ EXRTO_BASE_PAGE_FILE_BLOCKS, EXRTO_LSN_INFO_FILE_BLOCKS, EXRTO_BLOCK_INFO_FILE_BLOCKS};
+
+typedef struct _ExRTOFileState {
+ uint64 segno[EXRTO_FORK_NUM];
+ File file[EXRTO_FORK_NUM];
+} ExRTOFileState;
+
+static inline ExRTOFileType exrto_file_type(uint32 space_oid)
+{
+ if (space_oid == EXRTO_BASE_PAGE_SPACE_OID) {
+ return BASE_PAGE;
+ } else if (space_oid == EXRTO_LSN_INFO_SPACE_OID) {
+ return LSN_INFO_META;
+ } else {
+ return BLOCK_INFO_META;
+ }
+}
+
+static inline void set_file_state(ExRTOFileState *state, ForkNumber forknum, uint64 segno, File file)
+{
+ state->segno[forknum] = segno;
+ state->file[forknum] = file;
+}
+
+static inline uint64 get_total_block_num(ExRTOFileType type, uint32 high, uint32 low)
+{
+ if (type == BASE_PAGE || type == LSN_INFO_META) {
+ return ((uint64)high << UINT64_HALF) | low;
+ } else {
+ return (uint64)low;
+ }
+}
+
+static ExRTOFileState *alloc_file_state(void)
+{
+ MemoryContext current;
+ ExRTOFileState *state;
+ if (EnableLocalSysCache()) {
+ current = t_thrd.lsc_cxt.lsc->lsc_mydb_memcxt;
+ } else {
+ current = u_sess->storage_cxt.exrto_standby_read_file_cxt;
+ }
+ state = (ExRTOFileState *)MemoryContextAllocZero(current, sizeof(ExRTOFileState));
+ for (int i = 0; i < EXRTO_FORK_NUM; i++) {
+ state->file[i] = -1;
+ }
+
+ return state;
+}
+
+static void exrto_get_file_path(const RelFileNode node, ForkNumber forknum, uint64 segno, char *path)
+{
+ ExRTOFileType type;
+ char filename[EXRTO_FILE_PATH_LEN];
+ errno_t rc = EOK;
+
+ type = exrto_file_type(node.spcNode);
+ if (type == BASE_PAGE || type == LSN_INFO_META) {
+ uint32 batch_id = node.dbNode >> LOW_WORKERID_BITS;
+ uint32 worker_id = node.dbNode & LOW_WORKERID_MASK;
+ rc = snprintf_s(filename, EXRTO_FILE_PATH_LEN, EXRTO_FILE_PATH_LEN - 1, "%02X%02X%016X",
+ batch_id, worker_id, segno);
+ } else {
+ rc = snprintf_s(filename, EXRTO_FILE_PATH_LEN, EXRTO_FILE_PATH_LEN - 1, "%u_%u_%s.%u",
+ node.dbNode, node.relNode, forkNames[forknum], (uint32)segno);
+ }
+ securec_check_ss(rc, "\0", "\0");
+
+ rc = snprintf_s(path, EXRTO_FILE_PATH_LEN, EXRTO_FILE_PATH_LEN - 1, "%s/%s/%s",
+ EXRTO_FILE_DIR, EXRTO_FILE_SUB_DIR[type], filename);
+ securec_check_ss(rc, "\0", "\0");
+
+ return;
+}
+
+static uint64 get_seg_num(const RelFileNodeBackend& smgr_rnode, BlockNumber blocknum)
+{
+ ExRTOFileType type;
+ uint32 blocks_per_file;
+ uint64 total_blocknum;
+ uint64 segno;
+
+ type = exrto_file_type(smgr_rnode.node.spcNode);
+ blocks_per_file = EXRTO_FILE_BLOCKS[type];
+ total_blocknum = get_total_block_num(type, smgr_rnode.node.relNode, blocknum);
+ segno = (total_blocknum / blocks_per_file);
+
+ return segno;
+}
+
+static RelFileNodeForkNum exrto_file_relfilenode_forknum_fill(const RelFileNodeBackend &rnode,
+ ForkNumber forknum, uint64 segno)
+{
+ RelFileNodeForkNum node;
+ ExRTOFileType type;
+
+ errno_t rc = memset_s(&node, sizeof(RelFileNodeForkNum), 0, sizeof(RelFileNodeForkNum));
+ securec_check(rc, "", "");
+ node.rnode = rnode;
+ type = exrto_file_type(rnode.node.spcNode);
+ if (type == BASE_PAGE || type == LSN_INFO_META) {
+ node.rnode.node.relNode = segno >> UINT64_HALF;
+ }
+ node.forknumber = forknum;
+ node.segno = (uint32)segno;
+ node.storage = ROW_STORE;
+
+ return node;
+}
+
+static ExRTOFileState *exrto_open_file(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
+ ExtensionBehavior behavior)
+{
+ ExRTOFileState* state = (ExRTOFileState *)reln->fileState;
+ uint64 segno;
+ uint32 flags = O_RDWR | PG_BINARY;
+ char file_path[EXRTO_FILE_PATH_LEN];
+ RelFileNodeForkNum filenode;
+ File fd;
+
+ segno = get_seg_num(reln->smgr_rnode, blocknum);
+ /* No work if already open */
+ if (state != NULL) {
+ if (state->file[forknum] > 0) {
+ if (state->segno[forknum] == segno) {
+ return state;
+ }
+ /* This is not the file we're looking for. */
+ FileClose(state->file[forknum]);
+ }
+ } else {
+ state = alloc_file_state();
+ reln->fileState = state;
+ }
+ set_file_state(state, forknum, 0, -1);
+
+ if (behavior == EXTENSION_CREATE) {
+ flags |= O_CREAT;
+ }
+ ADIO_RUN() {
+ flags |= O_DIRECT;
+ }
+ ADIO_END();
+
+ exrto_get_file_path(reln->smgr_rnode.node, forknum, segno, file_path);
+ filenode = exrto_file_relfilenode_forknum_fill(reln->smgr_rnode, forknum, segno);
+ fd = DataFileIdOpenFile(file_path, filenode, (int)flags, S_IRUSR | S_IWUSR);
+ if (fd < 0) {
+ if ((behavior == EXTENSION_RETURN_NULL) && FILE_POSSIBLY_DELETED(errno)) {
+ return NULL;
+ }
+ exrto_close(reln, forknum, InvalidBlockNumber);
+ ereport(ERROR,
+ (errcode_for_file_access(), errmsg("could not open file \"%s\": %m", file_path)));
+ }
+
+ set_file_state(state, forknum, segno, fd);
+
+ return state;
+}
+
+BlockNumber get_single_file_nblocks(SMgrRelation reln, ForkNumber forknum, const ExRTOFileState*state)
+{
+ Assert(state != NULL);
+
+ char *filename = FilePathName(state->file[forknum]);
+ off_t len = FileSeek(state->file[forknum], 0L, SEEK_END);
+ if (len < 0) {
+ exrto_close(reln, forknum, InvalidBlockNumber);
+ ereport(ERROR, (errcode_for_file_access(),
+ errmsg("could not seek to end of file \"%s\": %m", filename)));
+ }
+
+ /* note that this calculation will ignore any partial block at EOF */
+ return (BlockNumber)(len / BLCKSZ);
+}
+
+void exrto_init(void)
+{
+ if (EnableLocalSysCache()) {
+ return;
+ }
+ Assert(u_sess->storage_cxt.exrto_standby_read_file_cxt == NULL);
+ u_sess->storage_cxt.exrto_standby_read_file_cxt =
+ AllocSetContextCreate(u_sess->top_mem_cxt, "ExrtoFileSmgr", ALLOCSET_DEFAULT_SIZES);
+}
+
+void exrto_close(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
+{
+ ExRTOFileState* state = (ExRTOFileState*)reln->fileState;
+
+ /* No work if already closed */
+ if (state == NULL) {
+ return;
+ }
+ reln->fileState = NULL; /* prevent dangling pointer after error */
+
+ /* if not closed already */
+ if (state->file[forknum] >= 0) {
+ FileClose(state->file[forknum]);
+ }
+ pfree(state);
+}
+
+bool exrto_exists(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
+{
+ /*
+ * Close it first, to ensure that we notice if the fork has been unlinked
+ * since we opened it.
+ */
+ exrto_close(reln, forknum, blocknum);
+
+ bool isExist = false;
+ if (exrto_open_file(reln, forknum, blocknum, EXTENSION_RETURN_NULL) != NULL) {
+ isExist = true;
+ }
+ exrto_close(reln, forknum, blocknum);
+ return isExist;
+}
+
+bool exrto_unlink_single_file(const RelFileNodeBackend &rnode, ForkNumber forknum, uint64 segno)
+{
+ struct stat stat_buf;
+ char segpath[EXRTO_FILE_PATH_LEN];
+
+ exrto_get_file_path(rnode.node, forknum, segno, segpath);
+ if (stat(segpath, &stat_buf) < 0) {
+ if (errno != ENOENT) {
+ ereport(WARNING, (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\" before removing: %m", segpath)));
+ }
+ return false;
+ }
+ if (unlink(segpath) < 0) {
+ ereport(WARNING, (errcode_for_file_access(),
+ errmsg("could not remove file \"%s\": %m", segpath)));
+ }
+ return true;
+}
+
+void exrto_unlink_file(const RelFileNodeBackend &rnode, ForkNumber forknum, BlockNumber blocknum)
+{
+ uint64 segno;
+ ExRTOFileType type = exrto_file_type(rnode.node.spcNode);
+ if (type == BLOCK_INFO_META) {
+ /* unlink all files */
+ extreme_rto_standby_read::remove_block_meta_info_files_of_db(rnode.node.dbNode, rnode.node.relNode);
+ } else if (type == BASE_PAGE || type == LSN_INFO_META) {
+ /* just unlink the files before the file where blocknum is */
+ segno = get_seg_num(rnode, blocknum);
+ while (segno != 0) {
+ segno -= 1;
+ if (!exrto_unlink_single_file(rnode, forknum, segno)) {
+ return;
+ }
+ }
+ }
+}
+
+void exrto_unlink(const RelFileNodeBackend &rnode, ForkNumber forknum, bool is_redo, BlockNumber blocknum)
+{
+ ExRTOFileType type = exrto_file_type(rnode.node.spcNode);
+ if (type == BASE_PAGE || type == LSN_INFO_META) {
+ forknum = MAIN_FORKNUM;
+ }
+ if (forknum == InvalidForkNumber) {
+ for (int fork_num = 0; fork_num < EXRTO_FORK_NUM; fork_num++) {
+ exrto_unlink_file(rnode, (ForkNumber)fork_num, blocknum);
+ }
+ } else {
+ exrto_unlink_file(rnode, forknum, blocknum);
+ }
+}
+
+/* extend EXTEND_BLOCKS_NUM pages */
+void exrto_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skip_fsync)
+{
+ ExRTOFileState *state = NULL;
+ ExRTOFileType type;
+ uint64 total_block_num;
+ off_t seekpos;
+ int nbytes;
+ struct stat file_stat;
+ char* filename;
+
+ type = exrto_file_type(reln->smgr_rnode.node.spcNode);
+ total_block_num = get_total_block_num(type, reln->smgr_rnode.node.relNode, blocknum);
+ if (total_block_num == EXRTO_INVALID_BLOCK_NUMBER) {
+ ereport(ERROR,
+ (errmsg("cannot extend file beyond %lu blocks.", EXRTO_INVALID_BLOCK_NUMBER)));
+ }
+ seekpos = (off_t)BLCKSZ * (total_block_num % EXRTO_FILE_BLOCKS[type]);
+
+ state = exrto_open_file(reln, forknum, blocknum, EXTENSION_CREATE);
+ filename = FilePathName(state->file[forknum]);
+ if (stat(filename, &file_stat) < 0) {
+ exrto_close(reln, forknum, InvalidBlockNumber);
+ ereport(ERROR, (errmsg("could not stat file \"%s\": %m.", filename)));
+ }
+ Assert(file_stat.st_size % BLCKSZ == 0);
+ Assert(file_stat.st_size <= EXRTO_FILE_SIZE[type]);
+
+ if (seekpos < file_stat.st_size) {
+ /* no need to extend */
+ return;
+ }
+
+ int extend_size = rtl::min(rtl::max(EXTEND_BLOCKS_NUM * BLCKSZ, (int)((seekpos - file_stat.st_size) + BLCKSZ)),
+ (int)(EXRTO_FILE_SIZE[type] - file_stat.st_size));
+ nbytes = FilePWrite(state->file[forknum], NULL, extend_size, file_stat.st_size);
+ if (nbytes != extend_size) {
+ exrto_close(reln, forknum, InvalidBlockNumber);
+ if (nbytes < 0) {
+ ereport(ERROR, (errmsg("could not extend file \"%s\": %m.", filename)));
+ }
+ ereport(ERROR,
+ (errmsg("could not extend file \"%s\": wrote only %d of %d bytes.", filename, nbytes, extend_size)));
+ }
+
+ Assert(get_single_file_nblocks(reln, forknum, state) <= ((BlockNumber)EXRTO_FILE_BLOCKS[type]));
+}
+
+SMGR_READ_STATUS exrto_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer)
+{
+ ExRTOFileState *state = NULL;
+ ExRTOFileType type;
+ ExtensionBehavior behavior;
+ uint64 total_block_num;
+ off_t seekpos;
+ int nbytes;
+ errno_t rc;
+
+ type = exrto_file_type(reln->smgr_rnode.node.spcNode);
+ if (type == LSN_INFO_META || type == BLOCK_INFO_META) {
+ behavior = EXTENSION_RETURN_NULL;
+ } else {
+ behavior = EXTENSION_FAIL;
+ }
+
+ total_block_num = get_total_block_num(type, reln->smgr_rnode.node.relNode, blocknum);
+ if (total_block_num == EXRTO_INVALID_BLOCK_NUMBER) {
+ ereport(ERROR,
+ (errmsg("cannot read file beyond %lu blocks.", EXRTO_INVALID_BLOCK_NUMBER)));
+ }
+ seekpos = (off_t)BLCKSZ * (total_block_num % EXRTO_FILE_BLOCKS[type]);
+
+ state = exrto_open_file(reln, forknum, blocknum, behavior);
+ if (state == NULL) {
+ /* For lsn info and block info page, just set buffer to all zeros when not found on disk. */
+ rc = memset_s(buffer, BLCKSZ, 0, BLCKSZ);
+ securec_check(rc, "\0", "\0");
+ return SMGR_RD_OK;
+ }
+
+ nbytes = FilePRead(state->file[forknum], buffer, BLCKSZ, seekpos);
+ if (nbytes == 0 && (type == LSN_INFO_META || type == BLOCK_INFO_META)) {
+ rc = memset_s(buffer, BLCKSZ, 0, BLCKSZ);
+ securec_check(rc, "\0", "\0");
+ return SMGR_RD_OK;
+ }
+ if (nbytes != BLCKSZ) {
+ char *filename = FilePathName(state->file[forknum]);
+ exrto_close(reln, forknum, InvalidBlockNumber);
+ if (nbytes < 0) {
+ ereport(ERROR,
+ (errmsg("could not read block %u in file \"%s\": %m.", blocknum, filename)));
+ }
+ ereport(ERROR,
+ (errmsg("could not read block %u in file \"%s\": read only %d of %d bytes.", blocknum, filename,
+ nbytes, BLCKSZ)));
+ }
+
+ if (PageIsVerified((Page)buffer, blocknum)) {
+ return SMGR_RD_OK;
+ } else {
+ return SMGR_RD_CRC_ERROR;
+ }
+}
+
+void exrto_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const char *buffer, bool skip_fsync)
+{
+ ExRTOFileState *state = NULL;
+ ExRTOFileType type;
+ uint64 total_block_num;
+ off_t seekpos;
+ int nbytes;
+
+ type = exrto_file_type(reln->smgr_rnode.node.spcNode);
+ total_block_num = get_total_block_num(type, reln->smgr_rnode.node.relNode, blocknum);
+ if (total_block_num == EXRTO_INVALID_BLOCK_NUMBER) {
+ ereport(ERROR,
+ (errmsg("cannot write file beyond %lu blocks.", EXRTO_INVALID_BLOCK_NUMBER)));
+ }
+ seekpos = (off_t)BLCKSZ * (total_block_num % EXRTO_FILE_BLOCKS[type]);
+
+ Assert(seekpos < (off_t)EXRTO_FILE_SIZE[type]);
+
+ state = exrto_open_file(reln, forknum, blocknum, EXTENSION_CREATE);
+ nbytes = FilePWrite(state->file[forknum], buffer, BLCKSZ, seekpos);
+ if (nbytes != BLCKSZ) {
+ char *filename = FilePathName(state->file[forknum]);
+ exrto_close(reln, forknum, InvalidBlockNumber);
+ if (nbytes < 0) {
+ ereport(ERROR,
+ (errmsg("could not write block %u in file \"%s\": %m.", blocknum, filename)));
+ }
+ ereport(ERROR,
+ (errmsg("could not write block %u in file \"%s\": wrote only %d of %d bytes.",
+ blocknum, filename, nbytes, BLCKSZ)));
+ }
+}
+
+BlockNumber exrto_nblocks(SMgrRelation, ForkNumber)
+{
+ return MaxBlockNumber;
+}
+
+void exrto_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
+{
+ ExRTOFileType type = exrto_file_type(reln->smgr_rnode.node.spcNode);
+ Assert(type == BLOCK_INFO_META);
+
+ BlockNumber curnblk = exrto_nblocks(reln, forknum);
+ if (curnblk == 0) {
+ return;
+ }
+
+ if (nblocks > curnblk) {
+ ereport(ERROR,
+ (errcode_for_file_access(), errmsg("could not truncate file \"%s\" to %u blocks: it's only %u blocks now",
+ relpath(reln->smgr_rnode, forknum), nblocks, curnblk)));
+ }
+ if (nblocks == curnblk) {
+ return;
+ }
+
+ uint32 blocks_per_file = EXRTO_FILE_BLOCKS[type];
+ for (BlockNumber prior_blocks = 0;; prior_blocks += blocks_per_file) {
+ struct stat stat_buf;
+ char segpath[EXRTO_FILE_PATH_LEN];
+ uint64 segno = get_seg_num(reln->smgr_rnode, prior_blocks);
+ exrto_get_file_path(reln->smgr_rnode.node, forknum, segno, segpath);
+ if (stat(segpath, &stat_buf) < 0) {
+ if (errno != ENOENT) {
+ ereport(
+ WARNING,
+ (errcode_for_file_access(), errmsg("could not stat file \"%s\" before truncate: %m", segpath)));
+ }
+ break;
+ }
+
+ ExRTOFileState *state = exrto_open_file(reln, forknum, prior_blocks, EXTENSION_FAIL);
+ if (prior_blocks > nblocks) {
+ if (FileTruncate(state->file[forknum], 0) < 0) {
+ ereport(DEBUG1,
+ (errcode_for_file_access(), errmsg("could not truncate file \"%s\": %m", segpath)));
+ }
+ } else if (prior_blocks + ((BlockNumber)blocks_per_file) > nblocks) {
+ BlockNumber last_seg_block = nblocks - prior_blocks;
+ off_t truncate_offset = (off_t)last_seg_block * BLCKSZ;
+
+ if (FileTruncate(state->file[forknum], truncate_offset) < 0) {
+ ereport(DEBUG1,
+ (errcode_for_file_access(), errmsg("could not truncate file \"%s\": %m", segpath)));
+ }
+ }
+ exrto_close(reln, forknum, InvalidBlockNumber);
+ }
+}
+
+void exrto_writeback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
+{
+ ExRTOFileType type;
+ uint64 total_block_num;
+ type = exrto_file_type(reln->smgr_rnode.node.spcNode);
+ total_block_num = get_total_block_num(type, reln->smgr_rnode.node.relNode, blocknum);
+
+ while (nblocks > 0) {
+ BlockNumber nflush = nblocks;
+ off_t seekpos;
+ ExRTOFileState *state = NULL;
+ uint64 segnum_start, segnum_end;
+ state = exrto_open_file(reln, forknum, blocknum, EXTENSION_CREATE);
+ segnum_start = total_block_num / EXRTO_FILE_BLOCKS[type];
+ segnum_end = (total_block_num + nblocks - 1) / EXRTO_FILE_BLOCKS[type];
+
+ if (segnum_start != segnum_end) {
+ nflush = EXRTO_FILE_BLOCKS[type] - (uint32)(total_block_num % EXRTO_FILE_BLOCKS[type]);
+ }
+
+ Assert(nflush >= 1);
+ Assert(nflush <= nblocks);
+
+ seekpos = (off_t)BLCKSZ * (total_block_num % EXRTO_FILE_BLOCKS[type]);
+ FileWriteback(state->file[forknum], seekpos, (off_t)BLCKSZ * nflush);
+
+ nblocks -= nflush;
+ /* ensure that the relnode is not changed */
+ Assert(((total_block_num + nflush) >> UINT64_HALF) == (total_block_num >> UINT64_HALF));
+ total_block_num += nflush;
+ blocknum = (BlockNumber)total_block_num;
+ }
+}
diff --git a/src/include/access/extreme_rto/batch_redo.h b/src/include/access/extreme_rto/batch_redo.h
index 54d2a5be07a336eb2cba3f0137a84eb72f06c174..55b0c1f5dd2a2e041d871a277490b3c7cd207088 100644
--- a/src/include/access/extreme_rto/batch_redo.h
+++ b/src/include/access/extreme_rto/batch_redo.h
@@ -63,6 +63,26 @@ typedef struct redoitemhashentry {
int redoItemNum;
} RedoItemHashEntry;
+inline void PRXLogRecGetBlockTag(XLogRecParseState *recordBlockState, RelFileNode *rnode, BlockNumber *blknum,
+ ForkNumber *forknum)
+{
+ XLogBlockParse *blockparse = &(recordBlockState->blockparse);
+
+ if (rnode != NULL) {
+ rnode->dbNode = blockparse->blockhead.dbNode;
+ rnode->relNode = blockparse->blockhead.relNode;
+ rnode->spcNode = blockparse->blockhead.spcNode;
+ rnode->bucketNode = blockparse->blockhead.bucketNode;
+ rnode->opt = blockparse->blockhead.opt;
+ }
+ if (blknum != NULL) {
+ *blknum = blockparse->blockhead.blkno;
+ }
+ if (forknum != NULL) {
+ *forknum = blockparse->blockhead.forknum;
+ }
+}
+
extern void PRPrintRedoItemHashTab(HTAB *redoItemHash);
extern HTAB *PRRedoItemHashInitialize(MemoryContext context);
extern void PRTrackClearBlock(XLogRecParseState *recordBlockState, HTAB *redoItemHash);
diff --git a/src/include/access/extreme_rto/dispatcher.h b/src/include/access/extreme_rto/dispatcher.h
index 70b3a5b48904097990f72f1bfdea16d53606cba6..ed5e61058155f78a1cf5ea438244f8093f80862a 100644
--- a/src/include/access/extreme_rto/dispatcher.h
+++ b/src/include/access/extreme_rto/dispatcher.h
@@ -165,6 +165,7 @@ typedef struct {
volatile bool recoveryStop;
volatile XLogRedoNumStatics xlogStatics[RM_NEXT_ID][MAX_XLOG_INFO_NUM];
RedoTimeCost *startupTimeCost;
+ ExrtoSnapshotData exrto_snapshot;
} LogDispatcher;
typedef struct {
diff --git a/src/include/access/extreme_rto/page_redo.h b/src/include/access/extreme_rto/page_redo.h
index 3ffa739e6ffd63918379525952f6b014a45e694b..7d789f85871bac495c5314407fa87a345994c225 100644
--- a/src/include/access/extreme_rto/page_redo.h
+++ b/src/include/access/extreme_rto/page_redo.h
@@ -33,8 +33,10 @@
#include "nodes/pg_list.h"
#include "storage/proc.h"
+#include "access/extreme_rto/batch_redo.h"
#include "access/extreme_rto/posix_semaphore.h"
#include "access/extreme_rto/spsc_blocking_queue.h"
+#include "access/extreme_rto/standby_read/standby_read_base.h"
#include "access/xlogproc.h"
#include "postmaster/pagerepair.h"
@@ -185,6 +187,7 @@ struct PageRedoWorker {
HTAB *badPageHashTbl;
char page[BLCKSZ];
XLogBlockDataParse *curRedoBlockState;
+ StandbyReadMetaInfo standby_read_meta_info;
};
@@ -240,6 +243,7 @@ void DispatchClosefdMarkToAllRedoWorker();
void DispatchCleanInvalidPageMarkToAllRedoWorker(RepairFileKey key);
const char *RedoWokerRole2Str(RedoRole role);
+uint32 GetWorkerId(const RedoItemTag *redo_item_tag, uint32 worker_count);
/* block or file repair function */
@@ -253,6 +257,9 @@ void BatchClearRecoveryThreadHashTbl(Oid spcNode, Oid dbNode);
void RecordBadBlockAndPushToRemote(XLogBlockDataParse *datadecode, PageErrorType error_type,
XLogRecPtr old_lsn, XLogPhyBlock pblk);
void SeqCheckRemoteReadAndRepairPage();
-
+void exrto_generate_snapshot(XLogRecPtr trxn_lsn);
+void exrto_read_snapshot(Snapshot snapshot);
+XLogRecPtr exrto_calculate_recycle_position(bool force_recyle);
+TransactionId exrto_calculate_recycle_xmin_for_undo();
} // namespace extreme_rto
#endif
diff --git a/src/include/access/extreme_rto/standby_read.h b/src/include/access/extreme_rto/standby_read.h
new file mode 100644
index 0000000000000000000000000000000000000000..d54e3cc42609bd30e643769f0edadbb6e0f7d25e
--- /dev/null
+++ b/src/include/access/extreme_rto/standby_read.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2020 Huawei Technologies Co.,Ltd.
+ *
+ * openGauss is licensed under Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *
+ * http://license.coscl.org.cn/MulanPSL2
+ *
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * ---------------------------------------------------------------------------------------
+ *
+ * standby_read.h
+ *
+ * IDENTIFICATION
+ * src/include/access/extreme_rto/standby_read.h
+ *
+ * ---------------------------------------------------------------------------------------
+ */
+
+#ifndef EXTREME_RTO_STANDBY_READ_H
+#define EXTREME_RTO_STANDBY_READ_H
+
+namespace extreme_rto {
+void exrto_recycle_main();
+} /* namespace extreme_rto */
+#endif
diff --git a/src/include/access/extreme_rto/standby_read/block_info_meta.h b/src/include/access/extreme_rto/standby_read/block_info_meta.h
new file mode 100644
index 0000000000000000000000000000000000000000..b1d9eb18a34ab637c62632fb3289d75004a37697
--- /dev/null
+++ b/src/include/access/extreme_rto/standby_read/block_info_meta.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2020 Huawei Technologies Co.,Ltd.
+ *
+ * openGauss is licensed under Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *
+ * http://license.coscl.org.cn/MulanPSL2
+ *
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * ---------------------------------------------------------------------------------------
+ *
+ * block_info_meta.h
+ *
+ *
+ *
+ * IDENTIFICATION
+ * src/include/access/extreme_rto/standby_read/block_info_meta.h
+ *
+ * ---------------------------------------------------------------------------------------
+ */
+
+#ifndef BLOCK_INFO_META_H
+#define BLOCK_INFO_META_H
+
+#include "gs_thread.h"
+#include "postgres.h"
+#include "access/xlogdefs.h"
+#include "access/extreme_rto/standby_read/lsn_info_double_list.h"
+#include "access/extreme_rto/standby_read/standby_read_base.h"
+#include "storage/buf/bufpage.h"
+#include "storage/buf/buf_internals.h"
+
+namespace extreme_rto_standby_read {
+
+const static uint32 BLOCK_INFO_PAGE_HEAD_PAD_SIZE = 40;
+const static uint32 BLOCK_INFO_PAGE_VERSION = 1; // currently the first version of extreme rto standby read
+
+typedef struct _BlockInfoPageHeader {
+ PageXLogRecPtr lsn; /* LSN: next byte after last byte of wal record for last change to this page */
+ uint16 checksum; /* checksum */
+ uint16 flags;
+ uint32 version;
+ uint64 total_block_num; // all blocks of this table, only update on the first page
+ uint8 pad[BLOCK_INFO_PAGE_HEAD_PAD_SIZE];
+} BlockInfoPageHeader;
+
+#define BLOCK_INFO_PAGE_VALID_FLAG 0x0400
+
+typedef struct _BlockMetaInfo {
+ uint32 timeline;
+ uint32 record_num;
+ XLogRecPtr min_lsn;
+ XLogRecPtr max_lsn;
+ uint32 flags;
+ uint32 pad;
+ LsnInfoDoubleList lsn_info_list;
+ LsnInfoDoubleList base_page_info_list;
+} BlockMetaInfo;
+
+#define BLOCK_INFO_NODE_VALID_FLAG (1 << 24)
+#define BLOCK_INFO_NODE_UPDATE_FLAG (1 << 25)
+#define BLOCK_INFO_NODE_REFCOUNT_MASK 0xFFFFF
+#define IS_BLOCK_INFO_UPDATING(_flags) ((_flags & BLOCK_INFO_NODE_UPDATE_FLAG) == BLOCK_INFO_NODE_UPDATE_FLAG)
+
+const static uint32 BLOCK_INFO_HEAD_SIZE = 64; // do not modify
+const static uint32 BLOCK_INFO_SIZE = 64; // do not modify
+
+static const uint32 BLOCK_INFO_NUM_PER_PAGE = (BLCKSZ - BLOCK_INFO_HEAD_SIZE) / BLOCK_INFO_SIZE;
+
+typedef enum {
+ STANDBY_READ_RECLYE_NONE,
+ STANDBY_READ_RECLYE_UPDATE,
+ STANDBY_READ_RECLYE_ALL,
+} StandbyReadRecyleState;
+
+BlockMetaInfo* get_block_meta_info_by_relfilenode(
+ const BufferTag& buf_tag, BufferAccessStrategy strategy, ReadBufferMode mode, Buffer* buffer);
+void insert_lsn_to_block_info(
+ StandbyReadMetaInfo* mete_info, const BufferTag& buf_tag, const Page base_page, XLogRecPtr next_lsn);
+StandbyReadRecyleState recyle_block_info(
+ const BufferTag& buf_tag, LsnInfoPosition base_page_info_pos, XLogRecPtr next_base_page_lsn, XLogRecPtr recyle_lsn);
+bool get_page_lsn_info(const BufferTag& buf_tag, BufferAccessStrategy strategy, XLogRecPtr read_lsn,
+ StandbyReadLsnInfoArray* lsn_info);
+static inline bool is_block_info_page_valid(BlockInfoPageHeader* header)
+{
+ return ((header->flags & BLOCK_INFO_PAGE_VALID_FLAG) == BLOCK_INFO_PAGE_VALID_FLAG);
+}
+
+static inline bool is_block_meta_info_valid(BlockMetaInfo* meta_info)
+{
+ return (((meta_info->flags & BLOCK_INFO_NODE_VALID_FLAG) == BLOCK_INFO_NODE_VALID_FLAG) &&
+ meta_info->timeline == t_thrd.shemem_ptr_cxt.ControlFile->timeline);
+}
+
+void remove_one_block_info_file(const RelFileNode rnode);
+
+void remove_block_meta_info_files_of_db(Oid db_oid, Oid rel_oid = InvalidOid);
+
+} // namespace extreme_rto_standby_read
+
+#endif
\ No newline at end of file
diff --git a/src/include/access/extreme_rto/standby_read/lsn_info_double_list.h b/src/include/access/extreme_rto/standby_read/lsn_info_double_list.h
new file mode 100644
index 0000000000000000000000000000000000000000..c3df271e468ab0d50cfd926f3e3b9b8a688267cf
--- /dev/null
+++ b/src/include/access/extreme_rto/standby_read/lsn_info_double_list.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Huawei Technologies Co.,Ltd.
+ *
+ * openGauss is licensed under Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *
+ * http://license.coscl.org.cn/MulanPSL2
+ *
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * ---------------------------------------------------------------------------------------
+ *
+ * lsn_info_double_list.h
+ *
+ *
+ *
+ * IDENTIFICATION
+ * src/include/access/extreme_rto/standby_read/lsn_info_double_list.h
+ *
+ * ---------------------------------------------------------------------------------------
+ */
+
+#ifndef LSN_INFO_DOUBLE_LIST_H
+#define LSN_INFO_DOUBLE_LIST_H
+
+#include "gs_thread.h"
+#include "postgres.h"
+#include "access/extreme_rto/standby_read/standby_read_base.h"
+
+namespace extreme_rto_standby_read {
+typedef uint64 LsnInfoPosition;
+
+static const LsnInfoPosition LSN_INFO_LIST_HEAD = 0xFFFFFFFFFFFFFFFFL;
+
+#define INFO_POSITION_IS_VALID(p) ((p) != 0xFFFFFFFFFFFFFFFFL)
+#define INFO_POSITION_IS_INVALID(p) ((p) == 0xFFFFFFFFFFFFFFFFL)
+typedef struct _LsnInfoDoubleList {
+ LsnInfoPosition prev; // not pointer, is position in lsn info meta table
+ LsnInfoPosition next; // not pointer, is position in lsn info meta table
+} LsnInfoDoubleList;
+
+void lsn_info_list_init(LsnInfoDoubleList* node);
+void info_list_modify_old_tail(StandbyReadMetaInfo *meta_info, LsnInfoPosition old_tail_pos,
+ LsnInfoPosition insert_pos, XLogRecPtr current_page_lsn, XLogRecPtr next_lsn, bool is_lsn_info);
+} // namespace extreme_rto_standby_read
+#endif
\ No newline at end of file
diff --git a/src/include/access/extreme_rto/standby_read/lsn_info_meta.h b/src/include/access/extreme_rto/standby_read/lsn_info_meta.h
new file mode 100644
index 0000000000000000000000000000000000000000..7694bb984afa4ff2a9854e03475553dac3f544fe
--- /dev/null
+++ b/src/include/access/extreme_rto/standby_read/lsn_info_meta.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2020 Huawei Technologies Co.,Ltd.
+ *
+ * openGauss is licensed under Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *
+ * http://license.coscl.org.cn/MulanPSL2
+ *
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * ---------------------------------------------------------------------------------------
+ *
+ * lsn_info_meta.h
+ *
+ *
+ *
+ * IDENTIFICATION
+ * src/include/access/extreme_rto/standby_read/lsn_info_meta.h
+ *
+ * ---------------------------------------------------------------------------------------
+ */
+
+#ifndef LSN_INFO_META_H
+#define LSN_INFO_META_H
+
+#include "gs_thread.h"
+#include "postgres.h"
+#include "storage/buf/bufpage.h"
+#include "storage/buf/buf_internals.h"
+#include "access/extreme_rto/standby_read/lsn_info_double_list.h"
+#include "access/extreme_rto/standby_read/standby_read_base.h"
+
+namespace extreme_rto_standby_read {
+const static uint32 BASE_PAGE_MAP_SIZE = 16;
+const static uint32 LSN_INFO_PAGE_HEAD_PAD_SIZE = 32;
+const static uint32 LSN_INFO_PAGE_VERSION = 1; /* currently the first version of extreme rto standby read */
+const static uint32 LSN_NUM_PER_NODE = 5;
+const static uint32 BYTE_BITS = 8;
+
+typedef struct _LsnInfoPageHeader {
+ PageXLogRecPtr lsn; /* LSN: next byte after last byte of wal record for last change to this page */
+ uint16 checksum; /* checksum */
+ uint16 flags;
+ uint32 version;
+ uint8 base_page_map[BASE_PAGE_MAP_SIZE];
+ uint8 pad[LSN_INFO_PAGE_HEAD_PAD_SIZE];
+} LsnInfoPageHeader;
+
+typedef struct _LsnInfoNode {
+ LsnInfoDoubleList lsn_list;
+ uint32 flags;
+ uint16 type;
+ uint16 used;
+ XLogRecPtr lsn[LSN_NUM_PER_NODE];
+} LsnInfoNode;
+
+typedef struct _BasePageInfoNode {
+ LsnInfoNode lsn_info_node;
+ LsnInfoDoubleList base_page_list;
+ XLogRecPtr cur_page_lsn;
+ RelFileNode relfilenode;
+ ForkNumber fork_num;
+ BlockNumber block_num;
+ XLogRecPtr next_base_page_lsn;
+ BasePagePosition base_page_position;
+} BasePageInfoNode;
+
+typedef LsnInfoNode* LsnInfo;
+typedef BasePageInfoNode* BasePageInfo;
+
+const static uint32 LSN_INFO_HEAD_SIZE = 64; // do not modify
+const static uint32 LSN_INFO_NODE_SIZE = 64; // do not modify
+const static uint32 BASE_PAGE_INFO_NODE_SIZE = 128; // do not modify
+
+#define LSN_INFO_NODE_VALID_FLAG (1 << 24)
+#define LSN_INFO_NODE_UPDATE_FLAG (1 << 25)
+#define LSN_INFO_PAGE_VALID_FLAG 0x0400
+
+typedef enum {
+ LSN_INFO_TYPE_BASE_PAGE = 1,
+ LSN_INFO_TYPE_LSNS,
+} LsnInfoType;
+
+static inline bool is_lsn_info_node_valid(uint32 flags)
+{
+ return ((flags & LSN_INFO_NODE_VALID_FLAG) == LSN_INFO_NODE_VALID_FLAG);
+}
+
+static inline bool is_lsn_info_node_updating(uint32 flags)
+{
+ return ((flags & LSN_INFO_NODE_UPDATE_FLAG) == LSN_INFO_NODE_UPDATE_FLAG);
+}
+
+static inline bool is_lsn_info_page_valid(LsnInfoPageHeader *header)
+{
+ return ((header->flags & LSN_INFO_PAGE_VALID_FLAG) == LSN_INFO_PAGE_VALID_FLAG);
+}
+
+static inline bool is_base_page_type(uint16 type)
+{
+ return (type == LSN_INFO_TYPE_BASE_PAGE);
+}
+
+static inline bool is_lsn_type(uint16 type)
+{
+ return (type == LSN_INFO_TYPE_LSNS);
+}
+
+inline uint32 lsn_info_postion_to_offset(LsnInfoPosition position)
+{
+ return position % BLCKSZ;
+}
+
+static inline uint32 bit_to_offset(uint32 which_bit)
+{
+ return which_bit * LSN_INFO_NODE_SIZE;
+}
+
+Page get_lsn_info_page(uint32 batch_id, uint32 worker_id, LsnInfoPosition position, ReadBufferMode mode,
+ Buffer* buffer);
+void read_lsn_info_before(uint64 start_position, XLogRecPtr *readed_array, XLogRecPtr end_lsn);
+LsnInfoDoubleList* lsn_info_position_to_node_ptr(LsnInfoPosition pos);
+
+// block meta table's page lock is held
+void insert_lsn_to_lsn_info(StandbyReadMetaInfo* mete_info, LsnInfoDoubleList* head,
+ XLogRecPtr next_lsn);
+
+// block meta table's page lock is held
+void insert_base_page_to_lsn_info(StandbyReadMetaInfo* meta_info, LsnInfoDoubleList* lsn_head,
+ LsnInfoDoubleList* base_page_head, const BufferTag& buf_tag, const Page base_page, XLogRecPtr curent_page_lsn,
+ XLogRecPtr next_lsn);
+
+void get_lsn_info_for_read(const BufferTag& buf_tag, LsnInfoPosition latest_lsn_base_page_pos,
+ StandbyReadLsnInfoArray* lsn_info_list, XLogRecPtr read_lsn);
+
+Buffer buffer_read_base_page(uint32 batch_id, uint32 redo_id, BasePagePosition position, ReadBufferMode mode);
+void generate_base_page(StandbyReadMetaInfo* meta_info, const Page src_page);
+void read_base_page(const BufferTag& buf_tag, BasePagePosition position, BufferDesc* dest_buf_desc);
+void recycle_base_page_file(uint32 batch_id, uint32 redo_id, BasePagePosition recycle_pos);
+
+void set_base_page_map_bit(Page page, uint32 base_page_loc);
+bool is_base_page_map_bit_set(Page page, uint32 which_bit);
+void recycle_one_lsn_info_list(const BufferTag& buf_tag, LsnInfoPosition page_info_pos,
+ XLogRecPtr recycle_lsn, LsnInfoPosition *min_page_info_pos, XLogRecPtr *min_lsn);
+void standby_read_recyle_per_workers(StandbyReadMetaInfo *standby_read_meta_info, XLogRecPtr recycle_lsn);
+
+} // namespace extreme_rto_standby_read
+#endif
\ No newline at end of file
diff --git a/src/include/access/extreme_rto/standby_read/standby_read_base.h b/src/include/access/extreme_rto/standby_read/standby_read_base.h
new file mode 100644
index 0000000000000000000000000000000000000000..714b475068d77b4cfb8a1a9d6d4dd87fc46f2011
--- /dev/null
+++ b/src/include/access/extreme_rto/standby_read/standby_read_base.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Huawei Technologies Co.,Ltd.
+ *
+ * openGauss is licensed under Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *
+ * http://license.coscl.org.cn/MulanPSL2
+ *
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * ---------------------------------------------------------------------------------------
+ *
+ * standby_read_base.h
+ *
+ *
+ *
+ * IDENTIFICATION
+ * src/include/access/extreme_rto/standby_read/standby_read_base.h
+ *
+ * ---------------------------------------------------------------------------------------
+ */
+
+#ifndef STANDBY_READ_BASE_H
+#define STANDBY_READ_BASE_H
+
+#include "gs_thread.h"
+#include "postgres.h"
+#include "storage/buf/bufpage.h"
+#include "postmaster/alarmchecker.h"
+
+#define EXRTO_FILE_DIR "standby_read"
+#define EXRTO_OLD_FILE_DIR "standby_read_old"
+
+static const uint32 EXRTO_BASE_PAGE_FILE_MAXSIZE = 64 * 1024 * 1024; /* 64MB */
+static const uint32 EXRTO_LSN_INFO_FILE_MAXSIZE = 16 * 1024 * 1024; /* 16MB */
+static const uint32 EXRTO_BLOCK_INFO_FILE_MAXSIZE = RELSEG_SIZE * BLCKSZ;
+
+extern const char* EXRTO_FILE_SUB_DIR[];
+extern const uint32 EXRTO_FILE_PATH_LEN;
+
+#define UINT64_HALF 32
+#define LOW_WORKERID_BITS 16
+#define LOW_WORKERID_MASK ((1U << LOW_WORKERID_BITS) - 1)
+
+#define EXRTODEBUGINFO , __FUNCTION__, __LINE__
+#define EXRTODEBUGSTR "[%s:%d]"
+#define EXRTOFORMAT(f) EXRTODEBUGSTR f EXRTODEBUGINFO
+
+enum ExRTOFileType {
+ BASE_PAGE = 0,
+ LSN_INFO_META,
+ BLOCK_INFO_META,
+};
+
+typedef uint64 BasePagePosition;
+
+typedef struct _StandbyReadMetaInfo {
+ uint32 batch_id;
+ uint32 redo_id;
+ uint64 lsn_table_recyle_position;
+ uint64 lsn_table_next_position; // next position can insert node, shoud jump page header before use
+ BasePagePosition base_page_recyle_position;
+ BasePagePosition base_page_next_position; // next position can insert page
+ XLogRecPtr recycle_lsn_per_worker;
+} StandbyReadMetaInfo;
+
+inline void standby_read_meta_page_set_lsn(Page page, XLogRecPtr LSN)
+{
+ if (XLByteLT(LSN, PageGetLSN(page))) {
+ return;
+ }
+ PageSetLSNInternal(page, LSN);
+}
+
+void exrto_clean_dir(void);
+void exrto_recycle_old_dir(void);
+void exrto_standby_read_init();
+#endif
\ No newline at end of file
diff --git a/src/include/access/multi_redo_api.h b/src/include/access/multi_redo_api.h
index 750e5b4518f49c7593eb5d45709d1ca13aee704b..331bcd897f10898809ea5aa4ea62b1399b693f9c 100644
--- a/src/include/access/multi_redo_api.h
+++ b/src/include/access/multi_redo_api.h
@@ -34,6 +34,7 @@
#include "storage/proc.h"
#include "access/redo_statistic.h"
#include "access/extreme_rto_redo_api.h"
+#include "postmaster/postmaster.h"
#ifdef ENABLE_LITE_MODE
#define ENABLE_ONDEMAND_RECOVERY false
@@ -65,6 +66,9 @@ static const uint32 PAGE_REDO_WORKER_READY = 2;
static const uint32 PAGE_REDO_WORKER_EXIT = 3;
static const uint32 BIG_RECORD_LENGTH = XLOG_BLCKSZ * 16;
+#define IS_EXRTO_READ (g_instance.attr.attr_storage.EnableHotStandby && IsExtremeRedo())
+#define IS_EXRTO_STANDBY_READ (IS_EXRTO_READ && pm_state_is_hot_standby())
+
static inline int get_real_recovery_parallelism()
{
return g_instance.attr.attr_storage.real_recovery_parallelism;
diff --git a/src/include/access/ustore/undo/knl_uundospace.h b/src/include/access/ustore/undo/knl_uundospace.h
index e5cc112d7cde6226c47c1bb6dc7f640f11b35cbb..cd9832e79059370594cc8e9c897740536564327b 100644
--- a/src/include/access/ustore/undo/knl_uundospace.h
+++ b/src/include/access/ustore/undo/knl_uundospace.h
@@ -51,6 +51,10 @@ public:
{
return this->head_;
}
+ inline UndoLogOffset Head_exrto(void)
+ {
+ return this->head_exrto;
+ }
inline UndoLogOffset Tail(void)
{
return this->tail_;
@@ -66,6 +70,10 @@ public:
{
this->head_ = head;
}
+ inline void set_head_exrto(UndoRecPtr head)
+ {
+ this->head_exrto = head;
+ }
inline void SetTail(UndoRecPtr tail)
{
this->tail_ = tail;
@@ -109,10 +117,14 @@ public:
void CreateNonExistsUndoFile(int zid, uint32 dbId);
static void CheckPointUndoSpace(int fd, UndoSpaceType type);
static void RecoveryUndoSpace(int fd, UndoSpaceType type);
+ UndoLogOffset find_oldest_offset(int zid, uint32 db_id) const;
+ void unlink_residual_log(int zid, UndoLogOffset start, UndoLogOffset end, uint32 db_id) const;
private:
/* next insertion point (head), this backend is the only one that can modify insert. */
UndoLogOffset head_;
+ /* real next insertion point (head), this backend is the only one that can modify insert. */
+ UndoLogOffset head_exrto;
/* one past end of highest segment, need lock befor modify end. */
UndoLogOffset tail_;
diff --git a/src/include/access/ustore/undo/knl_uundozone.h b/src/include/access/ustore/undo/knl_uundozone.h
index a2402299f3a00228e79cb157abff33f37fdfc39f..5d87696ae10ada237b63c196fd355722cc3f599b 100644
--- a/src/include/access/ustore/undo/knl_uundozone.h
+++ b/src/include/access/ustore/undo/knl_uundozone.h
@@ -119,6 +119,10 @@ public:
{
return MAKE_UNDO_PTR(zid_, recycleTSlotPtr_);
}
+ inline UndoSlotPtr get_recycle_tslot_ptr_exrto(void)
+ {
+ return MAKE_UNDO_PTR(zid_, recycle_tslot_ptr_exrto);
+ }
inline UndoSlotPtr GetFrozenSlotPtr(void)
{
return frozenSlotPtr_;
@@ -127,6 +131,10 @@ public:
{
return recycleXid_;
}
+ inline TransactionId get_recycle_xid_exrto(void)
+ {
+ return recycle_xid_exrto;
+ }
inline TransactionId GetFrozenXid(void)
{
return frozenXid_;
@@ -156,10 +164,18 @@ public:
{
discardURecPtr_ = UNDO_PTR_GET_OFFSET(discard);
}
+ inline void set_discard_urec_ptr_exrto(UndoRecPtr discard)
+ {
+ discard_urec_ptr_exrto = UNDO_PTR_GET_OFFSET(discard);
+ }
inline void SetForceDiscardURecPtr(UndoRecPtr discard)
{
forceDiscardURecPtr_ = UNDO_PTR_GET_OFFSET(discard);
- }
+ }
+ inline void set_force_discard_urec_ptr_exrto(UndoRecPtr discard)
+ {
+ force_discard_urec_ptr_exrto = UNDO_PTR_GET_OFFSET(discard);
+ }
inline void SetAttachPid(ThreadId attachPid)
{
attachPid_ = attachPid;
@@ -176,6 +192,10 @@ public:
{
recycleTSlotPtr_ = UNDO_PTR_GET_OFFSET(recycle);
}
+ inline void set_recycle_tslot_ptr_exrto(UndoSlotPtr recycle)
+ {
+ recycle_tslot_ptr_exrto = UNDO_PTR_GET_OFFSET(recycle);
+ }
inline void SetLSN(XLogRecPtr lsn)
{
lsn_ = lsn;
@@ -188,6 +208,10 @@ public:
{
recycleXid_ = recycleXid;
}
+ inline void set_recycle_xid_exrto(TransactionId recycle_xid)
+ {
+ recycle_xid_exrto = recycle_xid;
+ }
inline void SetFrozenXid(TransactionId frozenXid)
{
frozenXid_ = frozenXid;
@@ -200,6 +224,10 @@ public:
{
return insertURecPtr_ != forceDiscardURecPtr_;
}
+ inline bool Used_exrto(void)
+ {
+ return insertURecPtr_ != force_discard_urec_ptr_exrto;
+ }
/* Lock and unlock undozone. */
void InitLock(void)
{
@@ -300,6 +328,10 @@ public:
/* Recovery undospace info from persistent file. */
static void RecoveryUndoZone(int fd);
+ UndoRecordState check_record_valid_exrto(UndoLogOffset offset, bool check_force_recycle,
+ TransactionId *last_xid) const;
+ uint64 release_residual_record_space();
+ uint64 release_residual_slot_space();
private:
static const uint32 UNDO_ZONE_ATTACHED = 1;
@@ -316,6 +348,13 @@ private:
TransactionId recycleXid_;
TransactionId frozenXid_;
ThreadId attachPid_;
+
+ /* for extreme RTO read. */
+ UndoSlotOffset recycle_tslot_ptr_exrto;
+ UndoLogOffset discard_urec_ptr_exrto;
+ UndoLogOffset force_discard_urec_ptr_exrto;
+ TransactionId recycle_xid_exrto;
+
/* Need Lock undo zone before alloc, preventing from checkpoint. */
LWLock *lock_;
/* Lsn for undo zone meta. */
diff --git a/src/include/access/xlogproc.h b/src/include/access/xlogproc.h
index fba9b6e3fd385d08ea0b2b3f4f63b0c5c8d38604..ed9d926e7fce5ff67df49e0d21953eb0f5183e12 100755
--- a/src/include/access/xlogproc.h
+++ b/src/include/access/xlogproc.h
@@ -216,6 +216,7 @@ typedef enum {
typedef struct {
uint32 blockddltype;
int rels;
+ uint32 mainDataLen;
char *mainData;
bool compress;
} XLogBlockDdlParse;
@@ -947,6 +948,10 @@ static inline Buffer AtomicExchangeBuffer(volatile Buffer *ptr, Buffer newval)
return old;
}
+/* this is an estimated value */
+static const uint32 MAX_BUFFER_NUM_PER_WAL_RECORD = XLR_MAX_BLOCK_ID + 1;
+static const uint32 LSN_MOVE32 = 10;
+
void HeapXlogCleanOperatorPage(
RedoBufferInfo* buffer, void* recorddata, void* blkdata, Size datalen, Size* freespace, bool repairFragmentation);
void HeapXlogFreezeOperatorPage(RedoBufferInfo* buffer, void* recorddata, void* blkdata, Size datalen,
@@ -1117,7 +1122,7 @@ void SegPageRedoDataBlock(XLogBlockHead *blockhead, XLogBlockDataParse *blockdat
extern void xlog_redo_data_block(
XLogBlockHead* blockhead, XLogBlockDataParse* blockdatarec, RedoBufferInfo* bufferinfo);
extern void XLogRecSetBlockDdlState(XLogBlockDdlParse* blockddlstate, uint32 blockddltype, char *mainData,
- int rels = 1, bool compress = false);
+ int rels = 1, bool compress = false, uint32 main_data_len = 0);
XLogRedoAction XLogCheckBlockDataRedoAction(XLogBlockDataParse* datadecode, RedoBufferInfo* bufferinfo);
void BtreeRedoDataBlock(XLogBlockHead* blockhead, XLogBlockDataParse* blockdatarec, RedoBufferInfo* bufferinfo);
@@ -1275,5 +1280,6 @@ extern bool IsCheckPoint(const XLogRecParseState *parseState);
void redo_atomic_xlog_dispatch(uint8 opCode, RedoBufferInfo *redo_buf, const char *data);
void seg_redo_new_page_copy_and_flush(BufferTag *tag, char *data, XLogRecPtr lsn);
+void redo_target_page(const BufferTag& buf_tag, StandbyReadLsnInfoArray* lsn_info, Buffer base_page_buf);
#endif
diff --git a/src/include/catalog/storage.h b/src/include/catalog/storage.h
index 66dcbb71c8d56b9650348330c2cf89b1dbcaad6d..200f0c189aac34ffe0c8c349d110df0a1fcc8e2c 100644
--- a/src/include/catalog/storage.h
+++ b/src/include/catalog/storage.h
@@ -27,8 +27,8 @@ extern void RelationCreateStorage(RelFileNode rnode, char relpersistence, Oid ow
Relation rel = NULL);
extern void RelationDropStorage(Relation rel, bool isDfsTruncate = false);
extern void RelationPreserveStorage(RelFileNode rnode, bool atCommit);
-extern void RelationTruncate(Relation rel, BlockNumber nblocks);
-extern void PartitionTruncate(Relation parent, Partition part, BlockNumber nblocks);
+extern void RelationTruncate(Relation rel, BlockNumber nblocks, TransactionId latest_removed_xid = InvalidTransactionId);
+extern void PartitionTruncate(Relation parent, Partition part, BlockNumber nblocks, TransactionId latest_removed_xid = InvalidTransactionId);
extern void PartitionDropStorage(Relation rel, Partition part);
extern void BucketCreateStorage(RelFileNode rnode, Oid bucketOid, Oid ownerid);
extern void InsertStorageIntoPendingList(_in_ const RelFileNode* rnode, _in_ AttrNumber attrnum, _in_ BackendId backend,
diff --git a/src/include/catalog/storage_xlog.h b/src/include/catalog/storage_xlog.h
index 841c42fd5ded25a053fd88919945c4535661e160..1f3ce16b6cb04b3c2f01e3e9ae773b970354cdab 100644
--- a/src/include/catalog/storage_xlog.h
+++ b/src/include/catalog/storage_xlog.h
@@ -51,8 +51,11 @@ typedef struct xl_smgr_truncate {
typedef struct xl_smgr_truncate_compress {
xl_smgr_truncate xlrec;
uint2 pageCompressOpts;
+ TransactionId latest_removed_xid;
} xl_smgr_truncate_compress;
+#define TRUNCATE_CONTAIN_XID_SIZE (offsetof(xl_smgr_truncate_compress, latest_removed_xid) + sizeof(TransactionId))
+
extern void log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum);
extern void smgr_redo(XLogReaderState *record);
@@ -60,7 +63,8 @@ extern void smgr_desc(StringInfo buf, XLogReaderState *record);
extern const char* smgr_type_name(uint8 subtype);
extern void smgr_redo_create(RelFileNode rnode, ForkNumber forkNum, char *data);
-extern void xlog_block_smgr_redo_truncate(RelFileNode rnode, BlockNumber blkno, XLogRecPtr lsn);
+extern void xlog_block_smgr_redo_truncate(RelFileNode rnode, BlockNumber blkno, XLogRecPtr lsn,
+ TransactionId latest_removed_xid);
/* An xlog combined by multiply sub-xlog, it will be decoded again */
#define XLOG_SEG_ATOMIC_OPERATION 0x00
diff --git a/src/include/catalog/upgrade_sql/rollback_catalog_maindb/rollback-post_catalog_maindb_92_908.sql b/src/include/catalog/upgrade_sql/rollback_catalog_maindb/rollback-post_catalog_maindb_92_908.sql
new file mode 100644
index 0000000000000000000000000000000000000000..f1bdb839d2d0f383a558f4a9580335fd38c69ea0
--- /dev/null
+++ b/src/include/catalog/upgrade_sql/rollback_catalog_maindb/rollback-post_catalog_maindb_92_908.sql
@@ -0,0 +1,19 @@
+-- ----------------------------------------------------------------
+-- roolback pg_catalog.pg_conversion
+-- ----------------------------------------------------------------
+
+delete from pg_catalog.pg_conversion where conname = 'gb18030_2022_to_utf8';
+delete from pg_catalog.pg_conversion where conname = 'utf8_to_gb18030_2022';
+DROP FUNCTION IF EXISTS pg_catalog.gb18030_2022_to_utf8(integer, integer, cstring, internal, integer) CASCADE;
+DROP FUNCTION IF EXISTS pg_catalog.utf8_to_gb18030_2022(integer, integer, cstring, internal, integer) CASCADE;
+
+UPDATE pg_catalog.pg_conversion SET conforencoding=37 WHERE conname like 'sjis_to_%';
+UPDATE pg_catalog.pg_conversion SET contoencoding=37 WHERE conname like '%_to_sjis';
+UPDATE pg_catalog.pg_conversion SET conforencoding=38 WHERE conname like 'big5_to_%';
+UPDATE pg_catalog.pg_conversion SET contoencoding=38 WHERE conname like '%_to_big5';
+UPDATE pg_catalog.pg_conversion SET conforencoding=39 WHERE conname like 'uhc_to_%';
+UPDATE pg_catalog.pg_conversion SET contoencoding=39 WHERE conname like '%_to_uhc';
+UPDATE pg_catalog.pg_conversion SET conforencoding=40 WHERE conname like 'johab_to_%';
+UPDATE pg_catalog.pg_conversion SET contoencoding=40 WHERE conname like '%to_johab';
+UPDATE pg_catalog.pg_conversion SET conforencoding=41 WHERE conname like 'shift_jis_2004_to_%';
+UPDATE pg_catalog.pg_conversion SET contoencoding=41 WHERE conname like '%to_shift_jis_2004';
\ No newline at end of file
diff --git a/src/include/catalog/upgrade_sql/rollback_catalog_maindb/rollback-post_catalog_maindb_92_909.sql b/src/include/catalog/upgrade_sql/rollback_catalog_maindb/rollback-post_catalog_maindb_92_909.sql
new file mode 100644
index 0000000000000000000000000000000000000000..1ceaa4bdff9f9a08f2a54ea97db462bb66026333
--- /dev/null
+++ b/src/include/catalog/upgrade_sql/rollback_catalog_maindb/rollback-post_catalog_maindb_92_909.sql
@@ -0,0 +1 @@
+DROP FUNCTION IF EXISTS pg_catalog.gs_hot_standby_space_info() cascade;
\ No newline at end of file
diff --git a/src/include/catalog/upgrade_sql/rollback_catalog_otherdb/rollback-post_catalog_otherdb_92_908.sql b/src/include/catalog/upgrade_sql/rollback_catalog_otherdb/rollback-post_catalog_otherdb_92_908.sql
new file mode 100644
index 0000000000000000000000000000000000000000..f1bdb839d2d0f383a558f4a9580335fd38c69ea0
--- /dev/null
+++ b/src/include/catalog/upgrade_sql/rollback_catalog_otherdb/rollback-post_catalog_otherdb_92_908.sql
@@ -0,0 +1,19 @@
+-- ----------------------------------------------------------------
+-- roolback pg_catalog.pg_conversion
+-- ----------------------------------------------------------------
+
+delete from pg_catalog.pg_conversion where conname = 'gb18030_2022_to_utf8';
+delete from pg_catalog.pg_conversion where conname = 'utf8_to_gb18030_2022';
+DROP FUNCTION IF EXISTS pg_catalog.gb18030_2022_to_utf8(integer, integer, cstring, internal, integer) CASCADE;
+DROP FUNCTION IF EXISTS pg_catalog.utf8_to_gb18030_2022(integer, integer, cstring, internal, integer) CASCADE;
+
+UPDATE pg_catalog.pg_conversion SET conforencoding=37 WHERE conname like 'sjis_to_%';
+UPDATE pg_catalog.pg_conversion SET contoencoding=37 WHERE conname like '%_to_sjis';
+UPDATE pg_catalog.pg_conversion SET conforencoding=38 WHERE conname like 'big5_to_%';
+UPDATE pg_catalog.pg_conversion SET contoencoding=38 WHERE conname like '%_to_big5';
+UPDATE pg_catalog.pg_conversion SET conforencoding=39 WHERE conname like 'uhc_to_%';
+UPDATE pg_catalog.pg_conversion SET contoencoding=39 WHERE conname like '%_to_uhc';
+UPDATE pg_catalog.pg_conversion SET conforencoding=40 WHERE conname like 'johab_to_%';
+UPDATE pg_catalog.pg_conversion SET contoencoding=40 WHERE conname like '%to_johab';
+UPDATE pg_catalog.pg_conversion SET conforencoding=41 WHERE conname like 'shift_jis_2004_to_%';
+UPDATE pg_catalog.pg_conversion SET contoencoding=41 WHERE conname like '%to_shift_jis_2004';
\ No newline at end of file
diff --git a/src/include/catalog/upgrade_sql/rollback_catalog_otherdb/rollback-post_catalog_otherdb_92_909.sql b/src/include/catalog/upgrade_sql/rollback_catalog_otherdb/rollback-post_catalog_otherdb_92_909.sql
new file mode 100644
index 0000000000000000000000000000000000000000..1ceaa4bdff9f9a08f2a54ea97db462bb66026333
--- /dev/null
+++ b/src/include/catalog/upgrade_sql/rollback_catalog_otherdb/rollback-post_catalog_otherdb_92_909.sql
@@ -0,0 +1 @@
+DROP FUNCTION IF EXISTS pg_catalog.gs_hot_standby_space_info() cascade;
\ No newline at end of file
diff --git a/src/include/catalog/upgrade_sql/upgrade_catalog_maindb/upgrade-post_catalog_maindb_92_908.sql b/src/include/catalog/upgrade_sql/upgrade_catalog_maindb/upgrade-post_catalog_maindb_92_908.sql
new file mode 100644
index 0000000000000000000000000000000000000000..109cf32fcefdb1ddefbf24ad264a46977981e9f9
--- /dev/null
+++ b/src/include/catalog/upgrade_sql/upgrade_catalog_maindb/upgrade-post_catalog_maindb_92_908.sql
@@ -0,0 +1,36 @@
+-- ----------------------------------------------------------------
+-- upgrade pg_catalog.pg_conversion
+-- ----------------------------------------------------------------
+
+DROP FUNCTION IF EXISTS pg_catalog.gb18030_2022_to_utf8(integer, integer, cstring, internal, integer) CASCADE;
+CREATE OR REPLACE FUNCTION pg_catalog.gb18030_2022_to_utf8(integer, integer, cstring, internal, integer)
+RETURNS void
+LANGUAGE c
+STRICT NOT FENCED NOT SHIPPABLE
+AS '$libdir/utf8_and_gb18030', $function$gb18030_2022_to_utf8$function$;
+COMMENT ON FUNCTION pg_catalog.gb18030_2022_to_utf8(INTEGER, INTEGER, CSTRING, INTERNAL, INTEGER)
+IS 'internal conversion function for GB18030_2022 to UTF8';
+
+insert into pg_catalog.pg_conversion values ('gb18030_2022_to_utf8', 11, 10, 37, 7, 'gb18030_2022_to_utf8', true);
+
+DROP FUNCTION IF EXISTS pg_catalog.utf8_to_gb18030_2022(integer, integer, cstring, internal, integer) CASCADE;
+CREATE OR REPLACE FUNCTION pg_catalog.utf8_to_gb18030_2022(integer, integer, cstring, internal, integer)
+RETURNS void
+LANGUAGE c
+STRICT NOT FENCED NOT SHIPPABLE
+AS '$libdir/utf8_and_gb18030', $function$utf8_to_gb18030_2022$function$;
+COMMENT ON FUNCTION pg_catalog.utf8_to_gb18030_2022(INTEGER, INTEGER, CSTRING, INTERNAL, INTEGER)
+IS 'internal conversion function for UTF8 to GB18030_2022';
+
+insert into pg_catalog.pg_conversion values ('utf8_to_gb18030_2022', 11, 10, 7, 37, 'utf8_to_gb18030_2022', true);
+
+UPDATE pg_catalog.pg_conversion SET conforencoding=38 WHERE conname like 'sjis_to_%';
+UPDATE pg_catalog.pg_conversion SET contoencoding=38 WHERE conname like '%_to_sjis';
+UPDATE pg_catalog.pg_conversion SET conforencoding=39 WHERE conname like 'big5_to_%';
+UPDATE pg_catalog.pg_conversion SET contoencoding=39 WHERE conname like '%_to_big5';
+UPDATE pg_catalog.pg_conversion SET conforencoding=40 WHERE conname like 'uhc_to_%';
+UPDATE pg_catalog.pg_conversion SET contoencoding=40 WHERE conname like '%_to_uhc';
+UPDATE pg_catalog.pg_conversion SET conforencoding=41 WHERE conname like 'johab_to_%';
+UPDATE pg_catalog.pg_conversion SET contoencoding=41 WHERE conname like '%to_johab';
+UPDATE pg_catalog.pg_conversion SET conforencoding=42 WHERE conname like 'shift_jis_2004_to_%';
+UPDATE pg_catalog.pg_conversion SET contoencoding=42 WHERE conname like '%to_shift_jis_2004';
\ No newline at end of file
diff --git a/src/include/catalog/upgrade_sql/upgrade_catalog_maindb/upgrade-post_catalog_maindb_92_909.sql b/src/include/catalog/upgrade_sql/upgrade_catalog_maindb/upgrade-post_catalog_maindb_92_909.sql
new file mode 100644
index 0000000000000000000000000000000000000000..9317bbf1fc2cb03f52715cf92c5d4062b7e939a4
--- /dev/null
+++ b/src/include/catalog/upgrade_sql/upgrade_catalog_maindb/upgrade-post_catalog_maindb_92_909.sql
@@ -0,0 +1,11 @@
+DROP FUNCTION IF EXISTS pg_catalog.gs_hot_standby_space_info() cascade;
+SET LOCAL inplace_upgrade_next_system_object_oids = IUO_PROC, 6218;
+CREATE OR REPLACE FUNCTION pg_catalog.gs_hot_standby_space_info
+( OUT base_page_file_num xid,
+ OUT base_page_total_size xid,
+ OUT lsn_info_meta_file_num xid,
+ OUT lsn_info_meta_total_size xid,
+ OUT block_info_meta_file_num xid,
+ OUT block_info_meta_total_size xid
+ )
+RETURNS SETOF record LANGUAGE INTERNAL ROWS 1 STRICT as 'gs_hot_standby_space_info';
\ No newline at end of file
diff --git a/src/include/catalog/upgrade_sql/upgrade_catalog_otherdb/upgrade-post_catalog_otherdb_92_908.sql b/src/include/catalog/upgrade_sql/upgrade_catalog_otherdb/upgrade-post_catalog_otherdb_92_908.sql
new file mode 100644
index 0000000000000000000000000000000000000000..109cf32fcefdb1ddefbf24ad264a46977981e9f9
--- /dev/null
+++ b/src/include/catalog/upgrade_sql/upgrade_catalog_otherdb/upgrade-post_catalog_otherdb_92_908.sql
@@ -0,0 +1,36 @@
+-- ----------------------------------------------------------------
+-- upgrade pg_catalog.pg_conversion
+-- ----------------------------------------------------------------
+
+DROP FUNCTION IF EXISTS pg_catalog.gb18030_2022_to_utf8(integer, integer, cstring, internal, integer) CASCADE;
+CREATE OR REPLACE FUNCTION pg_catalog.gb18030_2022_to_utf8(integer, integer, cstring, internal, integer)
+RETURNS void
+LANGUAGE c
+STRICT NOT FENCED NOT SHIPPABLE
+AS '$libdir/utf8_and_gb18030', $function$gb18030_2022_to_utf8$function$;
+COMMENT ON FUNCTION pg_catalog.gb18030_2022_to_utf8(INTEGER, INTEGER, CSTRING, INTERNAL, INTEGER)
+IS 'internal conversion function for GB18030_2022 to UTF8';
+
+insert into pg_catalog.pg_conversion values ('gb18030_2022_to_utf8', 11, 10, 37, 7, 'gb18030_2022_to_utf8', true);
+
+DROP FUNCTION IF EXISTS pg_catalog.utf8_to_gb18030_2022(integer, integer, cstring, internal, integer) CASCADE;
+CREATE OR REPLACE FUNCTION pg_catalog.utf8_to_gb18030_2022(integer, integer, cstring, internal, integer)
+RETURNS void
+LANGUAGE c
+STRICT NOT FENCED NOT SHIPPABLE
+AS '$libdir/utf8_and_gb18030', $function$utf8_to_gb18030_2022$function$;
+COMMENT ON FUNCTION pg_catalog.utf8_to_gb18030_2022(INTEGER, INTEGER, CSTRING, INTERNAL, INTEGER)
+IS 'internal conversion function for UTF8 to GB18030_2022';
+
+insert into pg_catalog.pg_conversion values ('utf8_to_gb18030_2022', 11, 10, 7, 37, 'utf8_to_gb18030_2022', true);
+
+UPDATE pg_catalog.pg_conversion SET conforencoding=38 WHERE conname like 'sjis_to_%';
+UPDATE pg_catalog.pg_conversion SET contoencoding=38 WHERE conname like '%_to_sjis';
+UPDATE pg_catalog.pg_conversion SET conforencoding=39 WHERE conname like 'big5_to_%';
+UPDATE pg_catalog.pg_conversion SET contoencoding=39 WHERE conname like '%_to_big5';
+UPDATE pg_catalog.pg_conversion SET conforencoding=40 WHERE conname like 'uhc_to_%';
+UPDATE pg_catalog.pg_conversion SET contoencoding=40 WHERE conname like '%_to_uhc';
+UPDATE pg_catalog.pg_conversion SET conforencoding=41 WHERE conname like 'johab_to_%';
+UPDATE pg_catalog.pg_conversion SET contoencoding=41 WHERE conname like '%to_johab';
+UPDATE pg_catalog.pg_conversion SET conforencoding=42 WHERE conname like 'shift_jis_2004_to_%';
+UPDATE pg_catalog.pg_conversion SET contoencoding=42 WHERE conname like '%to_shift_jis_2004';
\ No newline at end of file
diff --git a/src/include/catalog/upgrade_sql/upgrade_catalog_otherdb/upgrade-post_catalog_otherdb_92_909.sql b/src/include/catalog/upgrade_sql/upgrade_catalog_otherdb/upgrade-post_catalog_otherdb_92_909.sql
new file mode 100644
index 0000000000000000000000000000000000000000..9317bbf1fc2cb03f52715cf92c5d4062b7e939a4
--- /dev/null
+++ b/src/include/catalog/upgrade_sql/upgrade_catalog_otherdb/upgrade-post_catalog_otherdb_92_909.sql
@@ -0,0 +1,11 @@
+DROP FUNCTION IF EXISTS pg_catalog.gs_hot_standby_space_info() cascade;
+SET LOCAL inplace_upgrade_next_system_object_oids = IUO_PROC, 6218;
+CREATE OR REPLACE FUNCTION pg_catalog.gs_hot_standby_space_info
+( OUT base_page_file_num xid,
+ OUT base_page_total_size xid,
+ OUT lsn_info_meta_file_num xid,
+ OUT lsn_info_meta_total_size xid,
+ OUT block_info_meta_file_num xid,
+ OUT block_info_meta_total_size xid
+ )
+RETURNS SETOF record LANGUAGE INTERNAL ROWS 1 STRICT as 'gs_hot_standby_space_info';
\ No newline at end of file
diff --git a/src/include/gs_thread.h b/src/include/gs_thread.h
index b8caca6ae2476db7da7d16a2b435ac61f7ea6883..8427048beadd545feb0da18e292055bbf19db7b5 100755
--- a/src/include/gs_thread.h
+++ b/src/include/gs_thread.h
@@ -124,6 +124,7 @@ typedef enum knl_thread_role {
APPLY_WORKER,
STACK_PERF_WORKER,
DMS_AUXILIARY_THREAD,
+ EXRTO_RECYCLER,
BARRIER_PREPARSE,
TS_COMPACTION,
TS_COMPACTION_CONSUMER,
diff --git a/src/include/knl/knl_guc/knl_instance_attr_storage.h b/src/include/knl/knl_guc/knl_instance_attr_storage.h
index 7161e8d1152ba728fe5a21ec00b5b3fa0eb21e1b..b9c572b816da4d9d6d412f2b8d0a6072fadd34a4 100755
--- a/src/include/knl/knl_guc/knl_instance_attr_storage.h
+++ b/src/include/knl/knl_guc/knl_instance_attr_storage.h
@@ -208,6 +208,14 @@ typedef struct knl_instance_attr_storage {
int max_logical_replication_workers;
char *redo_bind_cpu_attr;
int max_active_gtt;
+
+ /* extreme-rto standby read */
+ int64 max_standby_base_page_size;
+ int64 max_standby_lsn_info_size;
+ int base_page_saved_interval;
+ double standby_force_recyle_ratio;
+ int standby_recycle_interval;
+ int standby_max_query_time;
#ifndef ENABLE_MULTIPLE_NODES
bool enable_save_confirmed_lsn;
#endif
diff --git a/src/include/knl/knl_instance.h b/src/include/knl/knl_instance.h
index b5ae35361618172054306bfd912aaf7a4cf8602f..592d4e0339c38ec31e508f7270036f444c64e8bd 100755
--- a/src/include/knl/knl_instance.h
+++ b/src/include/knl/knl_instance.h
@@ -113,6 +113,8 @@ enum knl_parallel_redo_state {
REDO_DONE,
};
+typedef struct ExrtoSnapshotData* ExrtoSnapshot;
+
/* all process level attribute which expose to user */
typedef struct knl_instance_attr {
@@ -216,6 +218,7 @@ typedef struct knl_g_pid_context {
ThreadId LogicalReadWorkerPID;
ThreadId LogicalDecoderWorkerPID;
ThreadId BarrierPreParsePID;
+ ThreadId exrto_recycler_pid;
ThreadId ApplyLauncerPID;
ThreadId StackPerfPID;
ThreadId CfsShrinkerPID;
@@ -746,7 +749,7 @@ typedef struct knl_g_parallel_redo_context {
char* ali_buf;
XLogRedoNumStatics xlogStatics[RM_NEXT_ID][MAX_XLOG_INFO_NUM];
RedoCpuBindControl redoCpuBindcontrl;
-
+ XLogRecPtr global_recycle_lsn; /* extreme-rto standby read */
HTAB **redoItemHash; /* used in ondemand extreme RTO */
} knl_g_parallel_redo_context;
@@ -918,6 +921,7 @@ typedef struct knl_g_undo_context {
pg_atomic_uint64 globalFrozenXid;
/* Oldest transaction id which is having undo. */
pg_atomic_uint64 globalRecycleXid;
+ bool is_exrto_residual_undo_file_recycled;
} knl_g_undo_context;
typedef struct knl_g_flashback_context {
diff --git a/src/include/knl/knl_session.h b/src/include/knl/knl_session.h
index 0112d3a7b169dd2b66617b2ab8a3f4b81083ca02..aaadd977a292fc95e5deb8635801d561e100bfa2 100644
--- a/src/include/knl/knl_session.h
+++ b/src/include/knl/knl_session.h
@@ -685,6 +685,9 @@ typedef struct knl_u_utils_context {
HTAB* set_user_params_htab;
DestReceiver* spi_printtupDR;
+
+ /* backend read lsn for read on standby in extreme rto */
+ XLogRecPtr exrto_read_lsn;
} knl_u_utils_context;
typedef struct knl_u_security_context {
@@ -1851,6 +1854,9 @@ typedef struct knl_u_storage_context {
/* md.cpp */
MemoryContext MdCxt; /* context for all md.c allocations */
+ /* exrto_file.cpp */
+ MemoryContext exrto_standby_read_file_cxt;
+
/* sync.cpp */
MemoryContext pendingOpsCxt;
struct HTAB *pendingOps;
diff --git a/src/include/knl/knl_thread.h b/src/include/knl/knl_thread.h
index 12d8a18f9ed9ab069c5a965f557dad4bc6cc7429..088c655bbf0a3430ffcdb8ea42d2990798a10723 100755
--- a/src/include/knl/knl_thread.h
+++ b/src/include/knl/knl_thread.h
@@ -77,6 +77,7 @@
#include "port/pg_crc32c.h"
#include "ddes/dms/ss_common_attr.h"
#include "ddes/dms/ss_txnstatus.h"
+#include "access/extreme_rto/standby_read/standby_read_base.h"
#define MAX_PATH_LEN 1024
extern const int g_reserve_param_num;
@@ -1944,8 +1945,22 @@ typedef struct {
volatile sig_atomic_t got_SIGHUP;
volatile sig_atomic_t sleep_long;
volatile sig_atomic_t check_repair;
+ void *redo_worker_ptr;
} knl_t_page_redo_context;
+typedef struct _StandbyReadLsnInfoArray {
+ XLogRecPtr *lsn_array;
+ uint32 lsn_num;
+ XLogRecPtr base_page_lsn;
+ BasePagePosition base_page_pos;
+} StandbyReadLsnInfoArray;
+
+typedef struct {
+ volatile sig_atomic_t shutdown_requested;
+ volatile sig_atomic_t got_SIGHUP;
+ StandbyReadLsnInfoArray lsn_info;
+} knl_t_exrto_recycle_context;
+
typedef struct knl_t_startup_context {
/*
* Flags set by interrupt handlers for later service in the redo loop.
@@ -2564,8 +2579,10 @@ typedef struct knl_t_storage_context {
struct HTAB* SharedBufHash;
struct HTAB* BufFreeListHash;
struct BufferDesc* InProgressBuf;
+ struct BufferDesc* ParentInProgressBuf;
/* local state for StartBufferIO and related functions */
volatile bool IsForInput;
+ volatile bool ParentIsForInput;
/* local state for LockBufferForCleanup */
struct BufferDesc* PinCountWaitBuf;
/* local state for aio clean up resource */
@@ -3483,6 +3500,7 @@ typedef struct knl_thrd_context {
knl_t_percentile_context percentile_cxt;
knl_t_perf_snap_context perf_snap_cxt;
knl_t_page_redo_context page_redo_cxt;
+ knl_t_exrto_recycle_context exrto_recycle_cxt;
knl_t_parallel_decode_worker_context parallel_decode_cxt;
knl_t_logical_read_worker_context logicalreadworker_cxt;
knl_t_heartbeat_context heartbeat_cxt;
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
index dadc6b8f9fb51d2ffd678e783b24eccfa51ac1c8..22c553a2ca3ce36bdc6c2be9cf62b7214d979095 100644
--- a/src/include/mb/pg_wchar.h
+++ b/src/include/mb/pg_wchar.h
@@ -223,6 +223,7 @@ typedef enum pg_enc {
PG_WIN1257, /* windows-1257 */
PG_KOI8U, /* KOI8-U */
PG_GB18030, /* GB18030 */
+ PG_GB18030_2022, /* GB18030-2022 */
/* PG_ENCODING_BE_LAST points to the above entry */
/* followings are for client encoding only */
@@ -235,7 +236,7 @@ typedef enum pg_enc {
} pg_enc;
-#define PG_ENCODING_BE_LAST PG_GB18030
+#define PG_ENCODING_BE_LAST PG_GB18030_2022
/*
* Please use these tests before access to pg_encconv_tbl[]
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 056ebc32a25ef6a111bcdf0528e676807f6caf09..abe48258f369469f9176744fd0fc3e5c77eec225 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -135,6 +135,7 @@ extern const uint32 CREATE_INDEX_IF_NOT_EXISTS_VERSION_NUM;
extern const uint32 SLOW_SQL_VERSION_NUM;
extern const uint32 INDEX_HINT_VERSION_NUM;
extern const uint32 CREATE_TABLE_AS_VERSION_NUM;
+extern const uint32 GB18030_2022_VERSION_NUM;
extern void register_backend_version(uint32 backend_version);
extern bool contain_backend_version(uint32 version_number);
@@ -565,6 +566,7 @@ typedef enum {
XlogCopyBackendProcess,
BarrierPreParseBackendProcess,
DmsAuxiliaryProcess,
+ ExrtoRecyclerProcess,
NUM_SINGLE_AUX_PROC, /* Sentry for auxiliary type with single thread. */
/*
@@ -609,6 +611,7 @@ typedef enum {
#define AmTsCompactionAuxiliaryProcess() (t_thrd.bootstrap_cxt.MyAuxProcType == TsCompactionAuxiliaryProcess)
#define AmPageRedoWorker() (t_thrd.bootstrap_cxt.MyAuxProcType == PageRedoProcess)
#define AmDmsReformProcProcess() (t_thrd.role == DMS_WORKER && t_thrd.dms_cxt.is_reform_proc)
+#define AmErosRecyclerProcess() (t_thrd.bootstrap_cxt.MyAuxProcType == ExrtoRecyclerProcess)
diff --git a/src/include/postgres.h b/src/include/postgres.h
index 724c7895d923712f62cb577c5d78b33781ad1aa0..429da1baabc9b3155ceefde345e6b4604bed2c5b 100644
--- a/src/include/postgres.h
+++ b/src/include/postgres.h
@@ -1026,4 +1026,6 @@ extern void exec_describe_statement_message(const char* stmt_name);
extern void exec_get_ddl_params(StringInfo input_message);
#endif
+#define STRUCT_CONTAINER(type, membername, ptr) ((type *)((char *)(ptr)-offsetof(type, membername)))
+
#endif /* POSTGRES_H */
diff --git a/src/include/postmaster/postmaster.h b/src/include/postmaster/postmaster.h
index 82e7ad1d25b79a45d0a0705ec564bd955389e486..0902eae8d4dfcb856e047843d8ed768f512c5778 100755
--- a/src/include/postmaster/postmaster.h
+++ b/src/include/postmaster/postmaster.h
@@ -245,6 +245,9 @@ extern bool SetDBStateFileState(DbState state, bool optional);
extern void GPCResetAll();
extern void initRandomState(TimestampTz start_time, TimestampTz stop_time);
extern bool PMstateIsRun(void);
+extern bool pm_state_is_startup();
+extern bool pm_state_is_recovery();
+extern bool pm_state_is_hot_standby();
extern ServerMode GetHaShmemMode(void);
extern void InitProcessAndShareMemory();
extern void InitShmemForDcfCallBack();
diff --git a/src/include/replication/walreceiver.h b/src/include/replication/walreceiver.h
index 26d889f5d82a2018eef516dcddaff926109266ac..ea1fb952f672161c7989fa6ad346f4107644867d 100755
--- a/src/include/replication/walreceiver.h
+++ b/src/include/replication/walreceiver.h
@@ -41,6 +41,9 @@
#define IS_PAUSE_BY_TARGET_BARRIER 0x00000001
#define IS_CANCEL_LOG_CTRL 0x00000010
+#define IS_DISASTER_RECOVER_MODE \
+ (static_cast(g_instance.attr.attr_common.stream_cluster_run_mode) == RUN_MODE_STANDBY)
+
#ifdef ENABLE_MULTIPLE_NODES
#define AM_HADR_CN_WAL_RECEIVER (t_thrd.postmaster_cxt.HaShmData->is_cross_region && \
t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE && IS_PGXC_COORDINATOR)
diff --git a/src/include/storage/buf/buf_internals.h b/src/include/storage/buf/buf_internals.h
index b5a2d24b6314b42b9ac82f8a340fe25704480f81..c233d89a18f9cbba0dd339c72bdbab84b5a0a2c2 100644
--- a/src/include/storage/buf/buf_internals.h
+++ b/src/include/storage/buf/buf_internals.h
@@ -57,6 +57,7 @@
*/
#define BM_IN_MIGRATE (1U << 16) /* buffer is migrating */
#define BM_IS_META (1U << 17)
+#define BM_IS_TMP_BUF (1U << 21) /* temp buf, can not write to disk */
#define BM_LOCKED (1U << 22) /* buffer header is locked */
#define BM_DIRTY (1U << 23) /* data needs writing */
#define BM_VALID (1U << 24) /* data is valid */
@@ -285,6 +286,23 @@ extern "C" {
pg_atomic_write_u32(&(desc)->state, (s) & (~BM_LOCKED)); \
} while (0)
+#define FIX_SEG_BUFFER_TAG(node, tag, rel_node, block_num) \
+ do { \
+ if (IsSegmentFileNode(node)) { \
+ tag.rnode.relnode = rel_node; \
+ tag.blocknum = block_num; \
+ tag.rnode.bucketnode = SegmentBktId; \
+ } \
+ } while (0)
+
+#define FIX_BUFFER_DESC(buf, pblk) \
+ do { \
+ Assert(PhyBlockIsValid(*pblk)); \
+ buf->seg_fileno = pblk->rel_node; \
+ buf->seg_blockno = pblk->block; \
+ buf->seg_lsn = pblk->lsn; \
+ } while (0)
+
extern bool retryLockBufHdr(BufferDesc* desc, uint32* buf_state);
/*
* The PendingWriteback & WritebackContext structure are used to keep
diff --git a/src/include/storage/buf/bufmgr.h b/src/include/storage/buf/bufmgr.h
index 5581d23f6e9133308cc0047dc054c00db8266d11..61aa35e549168cf5771b11650577f8231c455c88 100644
--- a/src/include/storage/buf/bufmgr.h
+++ b/src/include/storage/buf/bufmgr.h
@@ -320,6 +320,7 @@ extern void DropRelFileNodeAllBuffersUsingScan(RelFileNode* rnode, int rnode_len
extern void DropRelFileNodeOneForkAllBuffersUsingHash(HTAB *relfilenode_hashtbl);
extern void DropDatabaseBuffers(Oid dbid);
+extern void buffer_drop_exrto_standby_read_buffers();
extern BlockNumber PartitionGetNumberOfBlocksInFork(Relation relation, Partition partition, ForkNumber forkNum,
bool estimate = false);
@@ -423,4 +424,13 @@ extern void ReadBuffer_common_for_check(ReadBufferMode readmode, BufferDesc* buf
const XLogPhyBlock *pblk, Block bufBlock);
extern BufferDesc *RedoForOndemandExtremeRTOQuery(BufferDesc *bufHdr, char relpersistence,
ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode);
+extern Buffer standby_read_buf(Relation reln, ForkNumber fork_num, BlockNumber block_num, ReadBufferMode mode,
+ BufferAccessStrategy strategy);
+typedef struct SMgrRelationData *SMgrRelation;
+BufferDesc *BufferAlloc(const RelFileNode &rel_file_node, char relpersistence, ForkNumber forkNum, BlockNumber blockNum,
+ BufferAccessStrategy strategy, bool *foundPtr, const XLogPhyBlock *pblk);
+Buffer ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum,
+ ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit, const XLogPhyBlock *pblk);
+void buffer_in_progress_pop();
+void buffer_in_progress_push();
#endif
diff --git a/src/include/storage/buf/bufpage.h b/src/include/storage/buf/bufpage.h
index cb0fbcc98b7aed0df32cc554e6394c1d129b6052..5384af391fbdf74451227c6696740ad5df3de709 100644
--- a/src/include/storage/buf/bufpage.h
+++ b/src/include/storage/buf/bufpage.h
@@ -206,6 +206,7 @@ typedef HeapPageHeaderData* HeapPageHeader;
#define PD_ENCRYPT_PAGE 0x0020 /* is a encryt cluster */
#define PD_CHECKSUM_FNV1A 0x0040 /* page checksum using FNV-1a hash */
#define PD_JUST_AFTER_FPW 0x0080 /* page just after redo full page write */
+#define PD_EXRTO_PAGE 0x0400 /* is a rto file page */
#define PD_TDE_PAGE 0x0100 /* there is TdePageInfo at the end of a page */
#define PD_VALID_FLAG_BITS 0x01FF /* OR of all valid pd_flags bits */
diff --git a/src/include/storage/nvm/nvm.h b/src/include/storage/nvm/nvm.h
index 5501081f052852e9fd4cb15198551a3412d608bb..bc5db0fe2f385c4d7bc866e2872b9b6ad9c6fa03 100644
--- a/src/include/storage/nvm/nvm.h
+++ b/src/include/storage/nvm/nvm.h
@@ -28,7 +28,7 @@
void nvm_init(void);
-BufferDesc *NvmBufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber fork_num,
+BufferDesc *NvmBufferAlloc(const RelFileNode& rel_file_node, char relpersistence, ForkNumber fork_num,
BlockNumber block_num, BufferAccessStrategy strategy, bool *found, const XLogPhyBlock *pblk);
#endif
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index bc122ff3f5efe361adc69eec163ff96cec438858..b1abe2d8bcd1236d1bc62a769d3b9053d42ef8ff 100755
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -272,6 +272,8 @@ struct PGPROC {
uint64 snap_refcnt_bitmap;
#endif
+ XLogRecPtr exrto_read_lsn; /* calculate recycle lsn for read on standby in extreme rto */
+ TimestampTz exrto_gen_snap_time;
LWLock* subxidsLock;
struct XidCache subxids; /* cache for subtransaction XIDs */
diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h
index 91f114628412fea74197376af90332b4e4605baa..290d88c504e94e0c5feb0755d0f5fd87553e2cb4 100755
--- a/src/include/storage/procarray.h
+++ b/src/include/storage/procarray.h
@@ -115,6 +115,8 @@ extern VirtualTransactionId *GetConflictingVirtualXIDs(TransactionId limitXmin,
CommitSeqNo limitXminCSN = InvalidCommitSeqNo,
TransactionId* xminArray = NULL);
extern ThreadId CancelVirtualTransaction(const VirtualTransactionId& vxid, ProcSignalReason sigmode);
+extern bool proc_array_cancel_conflicting_proc(TransactionId latest_removed_xid,
+ bool reach_max_check_times);
extern bool MinimumActiveBackends(int min);
extern int CountDBBackends(Oid database_oid);
diff --git a/src/include/storage/smgr/relfilenode.h b/src/include/storage/smgr/relfilenode.h
index 89a3725c788b2cdcbc79722c763a249b71d8856c..c06955bd04ca3a5fe91e73de4675802f4a206452 100644
--- a/src/include/storage/smgr/relfilenode.h
+++ b/src/include/storage/smgr/relfilenode.h
@@ -116,7 +116,7 @@ typedef struct RelFileNodeV2 {
} RelFileNodeV2;
-#define IsSegmentFileNode(rnode) ((rnode).bucketNode > InvalidBktId)
+#define IsSegmentFileNode(rnode) ((rnode).bucketNode > InvalidBktId && (rnode).spcNode != EXRTO_BLOCK_INFO_SPACE_OID)
#define IsHeapFileNode(rnode) (!IsSegmentFileNode(rnode))
#define IsSegmentPhysicalRelNode(rNode) (IsSegmentFileNode(rNode) && (rNode).relNode <= 5)
diff --git a/src/include/storage/smgr/smgr.h b/src/include/storage/smgr/smgr.h
index 9a68cb4be44c4099005d7d5819e1d6bec44b38db..9acbb1643392fae636d70d249ba5897c09d7827d 100644
--- a/src/include/storage/smgr/smgr.h
+++ b/src/include/storage/smgr/smgr.h
@@ -125,12 +125,20 @@ enum SMGR_READ_STATUS {
#define UNDO_DB_OID (9)
#define UNDO_SLOT_DB_OID (10)
+#define EXRTO_BASE_PAGE_SPACE_OID (6)
+#define EXRTO_LSN_INFO_SPACE_OID (7)
+#define EXRTO_BLOCK_INFO_SPACE_OID (8)
+#define EXRTO_FORK_NUM 3
+
#define MD_MANAGER (0)
#define UNDO_MANAGER (1)
#define SEGMENT_MANAGER (2)
+#define EXRTO_MANAGER (3)
#define IS_UNDO_RELFILENODE(rnode) ((rnode).dbNode == UNDO_DB_OID || (rnode).dbNode == UNDO_SLOT_DB_OID)
-
+#define IS_EXRTO_RELFILENODE(rnode) ((rnode).spcNode == EXRTO_BASE_PAGE_SPACE_OID || \
+ (rnode).spcNode == EXRTO_LSN_INFO_SPACE_OID || \
+ (rnode).spcNode == EXRTO_BLOCK_INFO_SPACE_OID)
/*
* On Windows, we have to interpret EACCES as possibly meaning the same as
* ENOENT, because if a file is unlinked-but-not-yet-gone on that platform,
@@ -250,4 +258,16 @@ extern void partition_create_new_storage(Relation rel, Partition part, const Rel
extern ScalarToDatum GetTransferFuncByTypeOid(Oid attTypeOid);
extern bool check_unlink_rel_hashtbl(RelFileNode rnode, ForkNumber forknum);
+/* storage_exrto_file.cpp */
+void exrto_init(void);
+void exrto_close(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum);
+bool exrto_exists(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum);
+void exrto_unlink(const RelFileNodeBackend& rnode, ForkNumber forknum, bool is_redo, BlockNumber blocknum);
+void exrto_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skip_fsync);
+SMGR_READ_STATUS exrto_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer);
+void exrto_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const char *buffer, bool skip_fsync);
+BlockNumber exrto_nblocks(SMgrRelation reln, ForkNumber forknum);
+void exrto_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks);
+void exrto_writeback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks);
+
#endif /* SMGR_H */
diff --git a/src/include/utils/snapshot.h b/src/include/utils/snapshot.h
index de5b0d85a0be78daf745e2938c6b30729189b67e..1c38951e6bed28b87833f51b697eed5bc80b1ab5 100644
--- a/src/include/utils/snapshot.h
+++ b/src/include/utils/snapshot.h
@@ -265,6 +265,32 @@ typedef struct SnapshotData {
GTM_SnapshotType gtm_snapshot_type;
} SnapshotData;
+typedef struct ExrtoSnapshotData* ExrtoSnapshot;
+
+typedef struct ExrtoSnapshotData {
+ /*
+ * The remaining fields are used only for MVCC snapshots, and are normally
+ * just zeroes in special snapshots. (But xmin and xmax are used
+ * specially by HeapTupleSatisfiesDirty.)
+ *
+ * An MVCC snapshot can never see the effects of XIDs >= xmax. It can see
+ * the effects of all older XIDs except those listed in the snapshot. xmin
+ * is stored as an optimization to avoid needing to search the XID arrays
+ * for most tuples.
+ */
+ TransactionId xmin; /* all XID < xmin are visible to me */
+ TransactionId xmax; /* all XID >= xmax are invisible to me */
+
+ /*
+ * This snapshot can see the effects of all transactions with CSN <=
+ * snapshotcsn.
+ */
+ CommitSeqNo snapshot_csn;
+
+ XLogRecPtr read_lsn; /* xact lsn when generate snapshot */
+ TimestampTz gen_snap_time;
+} ExrtoSnapshotData;
+
/*
* Result codes for AM API tuple_{update,delete,lock}, and for visibility.
*/
diff --git a/src/test/regress/input/ts_gb18030_utf8.source b/src/test/regress/input/ts_gb18030_utf8.source
new file mode 100644
index 0000000000000000000000000000000000000000..d24e74de0f7c82d0a0944ee594ed0be92a0de15b
--- /dev/null
+++ b/src/test/regress/input/ts_gb18030_utf8.source
@@ -0,0 +1,414 @@
+create database gb18030_2022 encoding='gb18030-2022' LC_COLLATE='zh_CN.GB18030' LC_CTYPE ='zh_CN.GB18030' TEMPLATE=template0;
+\c gb18030_2022
+
+show server_encoding;
+set client_encoding = 'UTF8';
+show client_encoding;
+--1. 编码映射关系发生改变的字符:
+-- 查看未发生变化的字符串:
+select convert_to('中国', 'GB18030-2022');
+
+--插入了涉及GB18030-2000升级GB18030-2022后GB18030与UTF-8转换关系变更的19个字符。
+create table tb_test(id int, content text);
+
+insert into tb_test (id , content)
+select 1, convert_from('\xA8BC', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 2, convert_from('\xA6D9', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 3, convert_from('\xA6DA', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 4, convert_from('\xA6DB', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 5, convert_from('\xA6DC', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 6, convert_from('\xA6DD', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 7, convert_from('\xA6DE', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 8, convert_from('\xA6DF', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 9, convert_from('\xA6EC', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 10, convert_from('\xA6ED', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 11, convert_from('\xA6F3', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 12, convert_from('\xFE59', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 13, convert_from('\xFE61', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 14, convert_from('\xFE66', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 15, convert_from('\xFE67', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 16, convert_from('\xFE6D', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 17, convert_from('\xFE7E', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 18, convert_from('\xFE90', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 19, convert_from('\xFEA0', 'GB18030-2022');
+
+insert into tb_test (id , content) select 20, convert_from('\x8135F437', 'GB18030-2022');
+insert into tb_test (id , content) select 21, convert_from('\x84318236', 'GB18030-2022');
+insert into tb_test (id , content) select 22, convert_from('\x84318238', 'GB18030-2022');
+insert into tb_test (id , content) select 23, convert_from('\x84318237', 'GB18030-2022');
+insert into tb_test (id , content) select 24, convert_from('\x84318239', 'GB18030-2022');
+insert into tb_test (id , content) select 25, convert_from('\x84318330', 'GB18030-2022');
+insert into tb_test (id , content) select 26, convert_from('\x84318331', 'GB18030-2022');
+insert into tb_test (id , content) select 27, convert_from('\x84318332', 'GB18030-2022');
+insert into tb_test (id , content) select 28, convert_from('\x84318333', 'GB18030-2022');
+insert into tb_test (id , content) select 29, convert_from('\x84318334', 'GB18030-2022');
+insert into tb_test (id , content) select 30, convert_from('\x84318335', 'GB18030-2022');
+insert into tb_test (id , content) select 31, convert_from('\x82359037', 'GB18030-2022');
+insert into tb_test (id , content) select 32, convert_from('\x82359038', 'GB18030-2022');
+insert into tb_test (id , content) select 33, convert_from('\x82359039', 'GB18030-2022');
+insert into tb_test (id , content) select 34, convert_from('\x82359130', 'GB18030-2022');
+insert into tb_test (id , content) select 35, convert_from('\x82359131', 'GB18030-2022');
+insert into tb_test (id , content) select 36, convert_from('\x82359132', 'GB18030-2022');
+insert into tb_test (id , content) select 37, convert_from('\x82359133', 'GB18030-2022');
+insert into tb_test (id , content) select 38, convert_from('\x82359134', 'GB18030-2022');
+
+--显示这19个字符
+select * from tb_test order by id;
+--查看GB18030-2022编码
+select convert_to(content, 'GB18030-2022') from tb_test order by id;
+--查看GB18030-2000编码
+select convert_to(content, 'GB18030') from tb_test order by id;
+--转换为UTF-8编码,UTF-8是Unicode的计算机编码形式,想显示为Unicode编码还需进一步转换,数据库中不支持此功能。
+select convert_to(content, 'utf8') from tb_test order by id;
+
+
+--2. 新增字符举例
+--CBJ统一汉字扩充B
+select convert_from('\x95328236', 'GB18030-2022');
+select convert_from('\x9835F336', 'GB18030-2022');
+--CJK统一汉字
+select convert_from('\x82358F33', 'GB18030-2022');
+select convert_from('\x82359636', 'GB18030-2022');
+--CJK统一汉子扩充C
+select convert_from('\x9835F738', 'GB18030-2022');
+select convert_from('\x98399E36', 'GB18030-2022');
+--CJK统一汉子扩充D
+select convert_from('\x98399F38', 'GB18030-2022');
+select convert_from('\x9839B539', 'GB18030-2022');
+--CJK统一汉子扩充E
+select convert_from('\x9839B632', 'GB18030-2022');
+select convert_from('\x9933FE33', 'GB18030-2022');
+--CJK统一汉子扩充F
+select convert_from('\x99348138', 'GB18030-2022');
+select convert_from('\x9939F730', 'GB18030-2022');
+--康熙部首
+select convert_from('\x81398B32', 'GB18030-2022');
+select convert_from('\x8139A035', 'GB18030-2022');
+--西双版纳新傣文字符
+select convert_from('\x8134F932', 'GB18030-2022');
+select convert_from('\x81358437', 'GB18030-2022');
+--西双版纳老傣文字符
+select convert_from('\x81358B32', 'GB18030-2022');
+select convert_from('\x81359933', 'GB18030-2022');
+--傈僳文字符
+select convert_from('\x82369535', 'GB18030-2022');
+select convert_from('\x82369A32', 'GB18030-2022');
+--蒙古文BIRGA符号
+select convert_from('\x9034C538', 'GB18030-2022');
+select convert_from('\x9034C730', 'GB18030-2022');
+--滇东北苗文字符
+select convert_from('\x9232C636', 'GB18030-2022');
+select convert_from('\x9232D625', 'GB18030-2022');
+
+--插入了涉及2000升级到2022新增的字符举例
+create table in_test(id int, content text);
+
+--CBJ统一汉字扩充B
+insert into in_test (id , content)
+select 1,convert_from('\x95328236', 'GB18030-2022');
+
+insert into in_test (id , content)
+select 2,convert_from('\x9835F336', 'GB18030-2022');
+
+--CJK统一汉字
+insert into in_test (id , content)
+select 3,convert_from('\x82358F33', 'GB18030-2022');
+
+insert into in_test (id , content)
+select 4,convert_from('\x82359636', 'GB18030-2022');
+
+--CJK统一汉子扩充C
+insert into in_test (id , content)
+select 5,convert_from('\x9835F738', 'GB18030-2022');
+
+insert into in_test (id , content)
+select 6,convert_from('\x98399E36', 'GB18030-2022');
+
+--CJK统一汉子扩充D
+insert into in_test (id , content)
+select 7,convert_from('\x98399F38', 'GB18030-2022');
+
+insert into in_test (id , content)
+select 8,convert_from('\x9839B539', 'GB18030-2022');
+
+--CJK统一汉子扩充E
+insert into in_test (id , content)
+select 9,convert_from('\x9839B632', 'GB18030-2022');
+
+insert into in_test (id , content)
+select 10,convert_from('\x9933FE33', 'GB18030-2022');
+
+--CJK统一汉子扩充F
+insert into in_test (id , content)
+select 11,convert_from('\x99348138', 'GB18030-2022');
+
+insert into in_test (id , content)
+select 12,convert_from('\x9939F730', 'GB18030-2022');
+
+--康熙部首
+insert into in_test (id , content)
+select 13,convert_from('\x81398B32', 'GB18030-2022');
+
+insert into in_test (id , content)
+select 14,convert_from('\x8139A035', 'GB18030-2022');
+
+--西双版纳新傣文字符
+insert into in_test (id , content)
+select 15,convert_from('\x8134F932', 'GB18030-2022');
+
+insert into in_test (id , content)
+select 16,convert_from('\x81358437', 'GB18030-2022');
+
+--西双版纳老傣文字符
+insert into in_test (id , content)
+select 17,convert_from('\x81358B32', 'GB18030-2022');
+
+insert into in_test (id , content)
+select 18,convert_from('\x81359933', 'GB18030-2022');
+
+--傈僳文字符
+insert into in_test (id , content)
+select 19,convert_from('\x82369535', 'GB18030-2022');
+
+insert into in_test (id , content)
+select 20,convert_from('\x82369A32', 'GB18030-2022');
+
+--蒙古文BIRGA符号
+insert into in_test (id , content)
+select 21,convert_from('\x9034C538', 'GB18030-2022');
+
+insert into in_test (id , content)
+select 22,convert_from('\x9034C730', 'GB18030-2022');
+
+--滇东北苗文字符
+insert into in_test (id , content)
+select 23,convert_from('\x9232C636', 'GB18030-2022');
+
+insert into in_test (id , content)
+select 24,convert_from('\x9232D625', 'GB18030-2022');
+
+--显示这24个字符
+select * from in_test order by id;
+--查看GB18030-2022编码
+select convert_to(content, 'GB18030-2022') from in_test order by id;
+--转换为UTF-8编码,UTF-8是Unicode的计算机编码形式,想显示为Unicode编码还需进一步转换,数据库中不支持此功能。
+select convert_to(content, 'utf8') from in_test order by id;
+
+drop table in_test;
+drop table tb_test;
+
+--3. 正常的增删改查语句测试
+create table 表1(id int, 人名 name);
+
+insert into 表1(id, 人名) values(1, '小明!');
+select * from 表1;
+
+alter table 表1 drop 人名;
+select * from 表1;
+
+alter table 表1 add 学校 text;
+insert into 表1(id , 学校) select 2, convert_to('@华为大学¥', 'GB18030-2022');
+select * from 表1;
+
+drop table 表1;
+
+--4. 不存在映射关系时
+select convert('\xFD308130', 'GB18030-2022', 'UTF8');
+select convert('\xFE39FE39', 'GB18030-2022', 'UTF8');
+
+--5. 测试gb18030_2022数据库中的字符串相关
+-- E021-03 character string literals
+SELECT 'first line'
+' - next line'
+ ' - third line'
+ AS "Three lines to one";
+
+-- illegal string continuation syntax
+SELECT 'first line'
+' - next line' /* this comment is not allowed here */
+' - third line'
+ AS "Illegal comment within continuation";
+
+-- Unicode escapes
+SET standard_conforming_strings TO on;
+
+SELECT U&'d\0061t\+000061' AS U&"d\0061t\+000061";
+SELECT U&'d!0061t\+000061' UESCAPE '!' AS U&"d*0061t\+000061" UESCAPE '*';
+
+-- bytea
+SET bytea_output TO hex;
+SELECT E'\\xDeAdBeEf'::bytea;
+SELECT E'\\x De Ad Be Ef '::bytea;
+SELECT E'\\xDeAdBeE'::bytea;
+SELECT E'\\xDeAdBeEx'::bytea;
+SELECT E'\\xDe00BeEf'::bytea;
+SELECT E'DeAdBeEf'::bytea;
+SELECT E'De\\000dBeEf'::bytea;
+SELECT E'De\123dBeEf'::bytea;
+SELECT E'De\\123dBeEf'::bytea;
+SELECT E'De\\678dBeEf'::bytea;
+
+SET bytea_output TO escape;
+SELECT E'\\xDeAdBeEf'::bytea;
+SELECT E'\\x De Ad Be Ef '::bytea;
+SELECT E'\\xDe00BeEf'::bytea;
+SELECT E'DeAdBeEf'::bytea;
+SELECT E'De\\000dBeEf'::bytea;
+SELECT E'De\\123dBeEf'::bytea;
+
+SET bytea_output TO hex;
+
+SELECT CAST(name 'namefield' AS text) AS "text(name)";
+
+-- E021-09 trim function
+SELECT TRIM(BOTH FROM ' bunch o blanks ') = 'bunch o blanks' AS "bunch o blanks";
+
+-- E021-06 substring expression
+SELECT SUBSTRING('1234567890' FROM 3) = '34567890' AS "34567890";
+
+-- PostgreSQL extension to allow using back reference in replace string;
+SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3');
+
+-- set so we can tell NULL from empty string
+\pset null '\\N'
+
+-- return all matches from regexp
+SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$);
+
+-- split string on regexp
+SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', $re$\s+$re$) AS foo;
+SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', $re$\s+$re$);
+
+-- change NULL-display back
+\pset null ''
+
+-- E021-11 position expression
+SELECT POSITION('4' IN '1234567890') = '4' AS "4";
+
+SELECT POSITION('5' IN '1234567890') = '5' AS "5";
+
+-- T312 character overlay function
+SELECT OVERLAY('abcdef' PLACING '45' FROM 4) AS "abc45f";
+
+-- E061-04 like predicate
+SELECT 'hawkeye' LIKE 'h%' AS "true";
+SELECT 'hawkeye' NOT LIKE 'h%' AS "false";
+
+-- unused escape character
+SELECT 'hawkeye' LIKE 'h%' ESCAPE '#' AS "true";
+SELECT 'hawkeye' NOT LIKE 'h%' ESCAPE '#' AS "false";
+
+--
+-- test ILIKE (case-insensitive LIKE)
+-- Be sure to form every test as an ILIKE/NOT ILIKE pair.
+--
+
+SELECT 'hawkeye' ILIKE 'h%' AS "true";
+SELECT 'hawkeye' NOT ILIKE 'h%' AS "false";
+
+--6. 使用字符串相关函数
+--重复字符串
+select repeat('中国', 3);
+
+--返回字符串的前n个字符
+select left('中国!number1', 7);
+
+--返回长度
+select length('中国!number1');
+
+--反转字符串
+select reverse('中国!number1');
+
+--md5算法加密
+select md5('中国!number1');
+
+-- test strpos
+SELECT strpos('abcdef', 'cd') AS "pos_3";
+SELECT strpos('abcdef', 'xy') AS "pos_0";
+
+SELECT replace('yabadabadoo', 'ba', '123') AS "ya123da123doo";
+
+select split_part('joeuser@mydatabase','@',3) AS "empty string";
+
+select to_hex(256::bigint*256::bigint*256::bigint*256::bigint - 1) AS "ffffffff";
+
+--返回字符串中第一个字符的十进制表示形式
+select ascii('xyz');
+select ascii('中xyz');
+select ascii('ḿxyz');
+
+-- 7. 检查GB18030-2022与GB18030的关系
+select convert('中国', 'GB18030', 'GB18030-2022');
+
+select convert('中国', 'GB18030-2022', 'GB18030');
+
+select convert('\xA8BC', 'GB18030-2022', 'UTF8');
+
+select convert('\xA8BC', 'GB18030', 'UTF8');
+
+\c regression
+clean connection to all force for database gb18030_2022;
+drop database gb18030_2022;
+
+-- 8. 在UTF8环境下检测GB18030-2022与UTF8的转换
+select convert('中国&华为*GaussDB', 'UTF8', 'GB18030-2022');
+
+select convert('ḿ', 'UTF8', 'GB18030-2022');
+
+-- 9. 测试create database时encoding与本地设置不匹配
+create database gb18030_2022 encoding='gb18030-2022' LC_COLLATE='en_US.utf-8' LC_CTYPE ='en_US.utf-8' TEMPLATE=template0;
+
+--10. 测试initdb
+\! rm -f @abs_bindir@/test_initdb.log
+\! mkdir -p @testtablespace@/test2
+\! mkdir -p @testtablespace@/test2/pg_location
+\! @abs_bindir@/gs_initdb -S -D @testtablespace@/test2 --nodename coorn2 -U test_initdb -w test@123 --locale=zh_CN.gb18030 -E GB18030_2022 >> @abs_bindir@/test_initdb2.log 2>&1
+\! cat @abs_bindir@/test_initdb2.log | grep ok
+\! rm -f @abs_bindir@/test_initdb2.log
+\! rm -rf @testtablespace@/test2
+
+--11. 测试升级回滚
+select oid, * from pg_conversion where conname like '%gb18030%' order by conname;
+
+select oid, * from pg_proc where proname like '%gb18030%' order by proname;
+
+--12. 看护client_encoding不能设置为GB18030_2022
+set client_encoding = GB18030_2022;
+
+ALTER SESSION SET NAMES 'GB18030_2022';
\ No newline at end of file
diff --git a/src/test/regress/output/recovery_2pc_tools.source b/src/test/regress/output/recovery_2pc_tools.source
index 6b0ac7591b0481f91dd21e1633c5863fbdb0a702..2fd58569c7f50efda606ffaa9ee9b5351da45919 100644
--- a/src/test/regress/output/recovery_2pc_tools.source
+++ b/src/test/regress/output/recovery_2pc_tools.source
@@ -78,6 +78,7 @@ select name,vartype,unit,min_val,max_val from pg_settings where name <> 'qunit_c
backtrace_min_messages | enum | | |
backwrite_quantity | integer | 8kB | 128 | 131072
basebackup_timeout | integer | s | 0 | 2147483647
+ base_page_saved_interval | integer | | 5 | 2000
bbox_blanklist_items | string | | |
bbox_dump_count | integer | | 1 | 20
bbox_dump_path | string | | |
@@ -487,6 +488,8 @@ select name,vartype,unit,min_val,max_val from pg_settings where name <> 'qunit_c
max_size_for_xlog_prune | integer | kB | 0 | 2147483647
max_stack_depth | integer | kB | 100 | 2147483647
max_standby_archive_delay | integer | ms | -1 | 2147483647
+ max_standby_base_page_size | int64 | | 0 | 576460752303423487
+ max_standby_lsn_info_size | int64 | | 0 | 576460752303423487
max_standby_streaming_delay | integer | ms | -1 | 2147483647
max_sync_workers_per_subscription | integer | | 0 | 262143
max_undo_workers | integer | | 1 | 100
@@ -660,6 +663,9 @@ select name,vartype,unit,min_val,max_val from pg_settings where name <> 'qunit_c
ss_txnstatus_cache_size | integer | | 0 | 524288
ss_work_thread_count | integer | | 16 | 128
standard_conforming_strings | bool | | |
+ standby_force_recyle_ratio | real | | 0 | 1
+ standby_max_query_time | integer | s | 0 | 86400
+ standby_recycle_interval | integer | s | 0 | 86400
standby_shared_buffers_fraction | real | | 0.1 | 1
statement_timeout | integer | ms | 0 | 2147483647
stats_temp_directory | string | | |
diff --git a/src/test/regress/output/ts_gb18030_utf8.source b/src/test/regress/output/ts_gb18030_utf8.source
new file mode 100644
index 0000000000000000000000000000000000000000..d712a77da683068dd0837768576fd9e96f2b9063
--- /dev/null
+++ b/src/test/regress/output/ts_gb18030_utf8.source
@@ -0,0 +1,1142 @@
+create database gb18030_2022 encoding='gb18030-2022' LC_COLLATE='zh_CN.GB18030' LC_CTYPE ='zh_CN.GB18030' TEMPLATE=template0;
+\c gb18030_2022
+
+show server_encoding;
+ server_encoding
+-----------------
+ GB18030_2022
+(1 row)
+
+set client_encoding = 'UTF8';
+show client_encoding;
+ client_encoding
+-----------------
+ UTF8
+(1 row)
+
+--1. 编码映射关系发生改变的字符:
+-- 查看未发生变化的字符串:
+select convert_to('中国', 'GB18030-2022');
+ convert_to
+------------
+ \xd6d0b9fa
+(1 row)
+
+
+--插入了涉及GB18030-2000升级GB18030-2022后GB18030与UTF-8转换关系变更的19个字符。
+create table tb_test(id int, content text);
+
+insert into tb_test (id , content)
+select 1, convert_from('\xA8BC', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 2, convert_from('\xA6D9', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 3, convert_from('\xA6DA', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 4, convert_from('\xA6DB', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 5, convert_from('\xA6DC', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 6, convert_from('\xA6DD', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 7, convert_from('\xA6DE', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 8, convert_from('\xA6DF', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 9, convert_from('\xA6EC', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 10, convert_from('\xA6ED', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 11, convert_from('\xA6F3', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 12, convert_from('\xFE59', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 13, convert_from('\xFE61', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 14, convert_from('\xFE66', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 15, convert_from('\xFE67', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 16, convert_from('\xFE6D', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 17, convert_from('\xFE7E', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 18, convert_from('\xFE90', 'GB18030-2022');
+
+insert into tb_test (id , content)
+select 19, convert_from('\xFEA0', 'GB18030-2022');
+
+insert into tb_test (id , content) select 20, convert_from('\x8135F437', 'GB18030-2022');
+insert into tb_test (id , content) select 21, convert_from('\x84318236', 'GB18030-2022');
+insert into tb_test (id , content) select 22, convert_from('\x84318238', 'GB18030-2022');
+insert into tb_test (id , content) select 23, convert_from('\x84318237', 'GB18030-2022');
+insert into tb_test (id , content) select 24, convert_from('\x84318239', 'GB18030-2022');
+insert into tb_test (id , content) select 25, convert_from('\x84318330', 'GB18030-2022');
+insert into tb_test (id , content) select 26, convert_from('\x84318331', 'GB18030-2022');
+insert into tb_test (id , content) select 27, convert_from('\x84318332', 'GB18030-2022');
+insert into tb_test (id , content) select 28, convert_from('\x84318333', 'GB18030-2022');
+insert into tb_test (id , content) select 29, convert_from('\x84318334', 'GB18030-2022');
+insert into tb_test (id , content) select 30, convert_from('\x84318335', 'GB18030-2022');
+insert into tb_test (id , content) select 31, convert_from('\x82359037', 'GB18030-2022');
+insert into tb_test (id , content) select 32, convert_from('\x82359038', 'GB18030-2022');
+insert into tb_test (id , content) select 33, convert_from('\x82359039', 'GB18030-2022');
+insert into tb_test (id , content) select 34, convert_from('\x82359130', 'GB18030-2022');
+insert into tb_test (id , content) select 35, convert_from('\x82359131', 'GB18030-2022');
+insert into tb_test (id , content) select 36, convert_from('\x82359132', 'GB18030-2022');
+insert into tb_test (id , content) select 37, convert_from('\x82359133', 'GB18030-2022');
+insert into tb_test (id , content) select 38, convert_from('\x82359134', 'GB18030-2022');
+
+--显示这19个字符
+select * from tb_test order by id;
+ id | content
+----+---------
+ 1 | ḿ
+ 2 | ︐
+ 3 | ︒
+ 4 | ︑
+ 5 | ︓
+ 6 | ︔
+ 7 | ︕
+ 8 | ︖
+ 9 | ︗
+ 10 | ︘
+ 11 | ︙
+ 12 | 龴
+ 13 | 龵
+ 14 | 龶
+ 15 | 龷
+ 16 | 龸
+ 17 | 龹
+ 18 | 龺
+ 19 | 龻
+ 20 |
+ 21 |
+ 22 |
+ 23 |
+ 24 |
+ 25 |
+ 26 |
+ 27 |
+ 28 |
+ 29 |
+ 30 |
+ 31 |
+ 32 |
+ 33 |
+ 34 |
+ 35 |
+ 36 |
+ 37 |
+ 38 |
+(38 rows)
+
+--查看GB18030-2022编码
+select convert_to(content, 'GB18030-2022') from tb_test order by id;
+ convert_to
+------------
+ \xa8bc
+ \xa6d9
+ \xa6da
+ \xa6db
+ \xa6dc
+ \xa6dd
+ \xa6de
+ \xa6df
+ \xa6ec
+ \xa6ed
+ \xa6f3
+ \xfe59
+ \xfe61
+ \xfe66
+ \xfe67
+ \xfe6d
+ \xfe7e
+ \xfe90
+ \xfea0
+ \x8135f437
+ \x84318236
+ \x84318238
+ \x84318237
+ \x84318239
+ \x84318330
+ \x84318331
+ \x84318332
+ \x84318333
+ \x84318334
+ \x84318335
+ \x82359037
+ \x82359038
+ \x82359039
+ \x82359130
+ \x82359131
+ \x82359132
+ \x82359133
+ \x82359134
+(38 rows)
+
+--查看GB18030-2000编码
+select convert_to(content, 'GB18030') from tb_test order by id;
+ convert_to
+------------
+ \xa8bc
+ \xa6d9
+ \xa6da
+ \xa6db
+ \xa6dc
+ \xa6dd
+ \xa6de
+ \xa6df
+ \xa6ec
+ \xa6ed
+ \xa6f3
+ \xfe59
+ \xfe61
+ \xfe66
+ \xfe67
+ \xfe6d
+ \xfe7e
+ \xfe90
+ \xfea0
+ \x8135f437
+ \x84318236
+ \x84318238
+ \x84318237
+ \x84318239
+ \x84318330
+ \x84318331
+ \x84318332
+ \x84318333
+ \x84318334
+ \x84318335
+ \x82359037
+ \x82359038
+ \x82359039
+ \x82359130
+ \x82359131
+ \x82359132
+ \x82359133
+ \x82359134
+(38 rows)
+
+--转换为UTF-8编码,UTF-8是Unicode的计算机编码形式,想显示为Unicode编码还需进一步转换,数据库中不支持此功能。
+select convert_to(content, 'utf8') from tb_test order by id;
+ convert_to
+------------
+ \xe1b8bf
+ \xefb890
+ \xefb892
+ \xefb891
+ \xefb893
+ \xefb894
+ \xefb895
+ \xefb896
+ \xefb897
+ \xefb898
+ \xefb899
+ \xe9beb4
+ \xe9beb5
+ \xe9beb6
+ \xe9beb7
+ \xe9beb8
+ \xe9beb9
+ \xe9beba
+ \xe9bebb
+ \xee9f87
+ \xee9e8d
+ \xee9e8e
+ \xee9e8f
+ \xee9e90
+ \xee9e91
+ \xee9e92
+ \xee9e93
+ \xee9e94
+ \xee9e95
+ \xee9e96
+ \xeea09e
+ \xeea0a6
+ \xeea0ab
+ \xeea0ac
+ \xeea0b2
+ \xeea183
+ \xeea194
+ \xeea1a4
+(38 rows)
+
+
+
+--2. 新增字符举例
+--CBJ统一汉字扩充B
+select convert_from('\x95328236', 'GB18030-2022');
+ convert_from
+--------------
+ 𠀀
+(1 row)
+
+select convert_from('\x9835F336', 'GB18030-2022');
+ convert_from
+--------------
+ 𪛖
+(1 row)
+
+--CJK统一汉字
+select convert_from('\x82358F33', 'GB18030-2022');
+ convert_from
+--------------
+ 龦
+(1 row)
+
+select convert_from('\x82359636', 'GB18030-2022');
+ convert_from
+--------------
+ 鿯
+(1 row)
+
+--CJK统一汉子扩充C
+select convert_from('\x9835F738', 'GB18030-2022');
+ convert_from
+--------------
+ 𪜀
+(1 row)
+
+select convert_from('\x98399E36', 'GB18030-2022');
+ convert_from
+--------------
+ 𫜴
+(1 row)
+
+--CJK统一汉子扩充D
+select convert_from('\x98399F38', 'GB18030-2022');
+ convert_from
+--------------
+ 𫝀
+(1 row)
+
+select convert_from('\x9839B539', 'GB18030-2022');
+ convert_from
+--------------
+ 𫠝
+(1 row)
+
+--CJK统一汉子扩充E
+select convert_from('\x9839B632', 'GB18030-2022');
+ convert_from
+--------------
+ 𫠠
+(1 row)
+
+select convert_from('\x9933FE33', 'GB18030-2022');
+ convert_from
+--------------
+ 𬺡
+(1 row)
+
+--CJK统一汉子扩充F
+select convert_from('\x99348138', 'GB18030-2022');
+ convert_from
+--------------
+ 𬺰
+(1 row)
+
+select convert_from('\x9939F730', 'GB18030-2022');
+ convert_from
+--------------
+ 𮯠
+(1 row)
+
+--康熙部首
+select convert_from('\x81398B32', 'GB18030-2022');
+ convert_from
+--------------
+ ⼀
+(1 row)
+
+select convert_from('\x8139A035', 'GB18030-2022');
+ convert_from
+--------------
+ ⿕
+(1 row)
+
+--西双版纳新傣文字符
+select convert_from('\x8134F932', 'GB18030-2022');
+ convert_from
+--------------
+ ᦀ
+(1 row)
+
+select convert_from('\x81358437', 'GB18030-2022');
+ convert_from
+--------------
+ ᧟
+(1 row)
+
+--西双版纳老傣文字符
+select convert_from('\x81358B32', 'GB18030-2022');
+ convert_from
+--------------
+ ᨠ
+(1 row)
+
+select convert_from('\x81359933', 'GB18030-2022');
+ convert_from
+--------------
+ ᪭
+(1 row)
+
+--傈僳文字符
+select convert_from('\x82369535', 'GB18030-2022');
+ convert_from
+--------------
+ ꓐ
+(1 row)
+
+select convert_from('\x82369A32', 'GB18030-2022');
+ convert_from
+--------------
+ ꓿
+(1 row)
+
+--蒙古文BIRGA符号
+select convert_from('\x9034C538', 'GB18030-2022');
+ convert_from
+--------------
+ 𑙠
+(1 row)
+
+select convert_from('\x9034C730', 'GB18030-2022');
+ convert_from
+--------------
+ 𑙬
+(1 row)
+
+--滇东北苗文字符
+select convert_from('\x9232C636', 'GB18030-2022');
+ convert_from
+--------------
+ 𖼀
+(1 row)
+
+select convert_from('\x9232D625', 'GB18030-2022');
+ convert_from
+--------------
+ 𖾏
+(1 row)
+
+
+--插入了涉及2000升级到2022新增的字符举例
+create table in_test(id int, content text);
+
+--CBJ统一汉字扩充B
+insert into in_test (id , content)
+select 1,convert_from('\x95328236', 'GB18030-2022');
+
+insert into in_test (id , content)
+select 2,convert_from('\x9835F336', 'GB18030-2022');
+
+--CJK统一汉字
+insert into in_test (id , content)
+select 3,convert_from('\x82358F33', 'GB18030-2022');
+
+insert into in_test (id , content)
+select 4,convert_from('\x82359636', 'GB18030-2022');
+
+--CJK统一汉子扩充C
+insert into in_test (id , content)
+select 5,convert_from('\x9835F738', 'GB18030-2022');
+
+insert into in_test (id , content)
+select 6,convert_from('\x98399E36', 'GB18030-2022');
+
+--CJK统一汉子扩充D
+insert into in_test (id , content)
+select 7,convert_from('\x98399F38', 'GB18030-2022');
+
+insert into in_test (id , content)
+select 8,convert_from('\x9839B539', 'GB18030-2022');
+
+--CJK统一汉子扩充E
+insert into in_test (id , content)
+select 9,convert_from('\x9839B632', 'GB18030-2022');
+
+insert into in_test (id , content)
+select 10,convert_from('\x9933FE33', 'GB18030-2022');
+
+--CJK统一汉子扩充F
+insert into in_test (id , content)
+select 11,convert_from('\x99348138', 'GB18030-2022');
+
+insert into in_test (id , content)
+select 12,convert_from('\x9939F730', 'GB18030-2022');
+
+--康熙部首
+insert into in_test (id , content)
+select 13,convert_from('\x81398B32', 'GB18030-2022');
+
+insert into in_test (id , content)
+select 14,convert_from('\x8139A035', 'GB18030-2022');
+
+--西双版纳新傣文字符
+insert into in_test (id , content)
+select 15,convert_from('\x8134F932', 'GB18030-2022');
+
+insert into in_test (id , content)
+select 16,convert_from('\x81358437', 'GB18030-2022');
+
+--西双版纳老傣文字符
+insert into in_test (id , content)
+select 17,convert_from('\x81358B32', 'GB18030-2022');
+
+insert into in_test (id , content)
+select 18,convert_from('\x81359933', 'GB18030-2022');
+
+--傈僳文字符
+insert into in_test (id , content)
+select 19,convert_from('\x82369535', 'GB18030-2022');
+
+insert into in_test (id , content)
+select 20,convert_from('\x82369A32', 'GB18030-2022');
+
+--蒙古文BIRGA符号
+insert into in_test (id , content)
+select 21,convert_from('\x9034C538', 'GB18030-2022');
+
+insert into in_test (id , content)
+select 22,convert_from('\x9034C730', 'GB18030-2022');
+
+--滇东北苗文字符
+insert into in_test (id , content)
+select 23,convert_from('\x9232C636', 'GB18030-2022');
+
+insert into in_test (id , content)
+select 24,convert_from('\x9232D625', 'GB18030-2022');
+
+--显示这24个字符
+select * from in_test order by id;
+ id | content
+----+---------
+ 1 | 𠀀
+ 2 | 𪛖
+ 3 | 龦
+ 4 | 鿯
+ 5 | 𪜀
+ 6 | 𫜴
+ 7 | 𫝀
+ 8 | 𫠝
+ 9 | 𫠠
+ 10 | 𬺡
+ 11 | 𬺰
+ 12 | 𮯠
+ 13 | ⼀
+ 14 | ⿕
+ 15 | ᦀ
+ 16 | ᧟
+ 17 | ᨠ
+ 18 | ᪭
+ 19 | ꓐ
+ 20 | ꓿
+ 21 | 𑙠
+ 22 | 𑙬
+ 23 | 𖼀
+ 24 | 𖾏
+(24 rows)
+
+--查看GB18030-2022编码
+select convert_to(content, 'GB18030-2022') from in_test order by id;
+ convert_to
+------------
+ \x95328236
+ \x9835f336
+ \x82358f33
+ \x82359636
+ \x9835f738
+ \x98399e36
+ \x98399f38
+ \x9839b539
+ \x9839b632
+ \x9933fe33
+ \x99348138
+ \x9939f730
+ \x81398b32
+ \x8139a035
+ \x8134f932
+ \x81358437
+ \x81358b32
+ \x81359933
+ \x82369535
+ \x82369a32
+ \x9034c538
+ \x9034c730
+ \x9232c636
+ \x9232d625
+(24 rows)
+
+--转换为UTF-8编码,UTF-8是Unicode的计算机编码形式,想显示为Unicode编码还需进一步转换,数据库中不支持此功能。
+select convert_to(content, 'utf8') from in_test order by id;
+ convert_to
+------------
+ \xf0a08080
+ \xf0aa9b96
+ \xe9bea6
+ \xe9bfaf
+ \xf0aa9c80
+ \xf0ab9cb4
+ \xf0ab9d80
+ \xf0aba09d
+ \xf0aba0a0
+ \xf0acbaa1
+ \xf0acbab0
+ \xf0aeafa0
+ \xe2bc80
+ \xe2bf95
+ \xe1a680
+ \xe1a79f
+ \xe1a8a0
+ \xe1aaad
+ \xea9390
+ \xea93bf
+ \xf09199a0
+ \xf09199ac
+ \xf096bc80
+ \xf096be8f
+(24 rows)
+
+
+drop table in_test;
+drop table tb_test;
+
+--3. 正常的增删改查语句测试
+create table 表1(id int, 人名 name);
+
+insert into 表1(id, 人名) values(1, '小明!');
+select * from 表1;
+ id | 人名
+----+--------
+ 1 | 小明!
+(1 row)
+
+
+alter table 表1 drop 人名;
+select * from 表1;
+ id
+----
+ 1
+(1 row)
+
+
+alter table 表1 add 学校 text;
+insert into 表1(id , 学校) select 2, convert_to('@华为大学¥', 'GB18030-2022');
+select * from 表1;
+ id | 学校
+----+--------------------------
+ 1 |
+ 2 | \x40bbaaceaab4f3d1a7a3a4
+(2 rows)
+
+
+drop table 表1;
+
+--4. 不存在映射关系时
+select convert('\xFD308130', 'GB18030-2022', 'UTF8');
+ERROR: character with byte sequence 0xfd 0x30 0x81 0x30 in encoding "GB18030_2022" has no equivalent in encoding "UTF8"
+CONTEXT: referenced column: convert
+select convert('\xFE39FE39', 'GB18030-2022', 'UTF8');
+ERROR: character with byte sequence 0xfe 0x39 0xfe 0x39 in encoding "GB18030_2022" has no equivalent in encoding "UTF8"
+CONTEXT: referenced column: convert
+
+--5. 测试gb18030_2022数据库中的字符串相关
+-- E021-03 character string literals
+SELECT 'first line'
+' - next line'
+ ' - third line'
+ AS "Three lines to one";
+ Three lines to one
+-------------------------------------
+ first line - next line - third line
+(1 row)
+
+
+-- illegal string continuation syntax
+SELECT 'first line'
+' - next line' /* this comment is not allowed here */
+' - third line'
+ AS "Illegal comment within continuation";
+ERROR: syntax error at or near "' - third line'"
+LINE 3: ' - third line'
+ ^
+
+-- Unicode escapes
+SET standard_conforming_strings TO on;
+
+SELECT U&'d\0061t\+000061' AS U&"d\0061t\+000061";
+ data
+------
+ data
+(1 row)
+
+SELECT U&'d!0061t\+000061' UESCAPE '!' AS U&"d*0061t\+000061" UESCAPE '*';
+ dat\+000061
+-------------
+ dat\+000061
+(1 row)
+
+
+-- bytea
+SET bytea_output TO hex;
+SELECT E'\\xDeAdBeEf'::bytea;
+ bytea
+------------
+ \xdeadbeef
+(1 row)
+
+SELECT E'\\x De Ad Be Ef '::bytea;
+ bytea
+------------
+ \xdeadbeef
+(1 row)
+
+SELECT E'\\xDeAdBeE'::bytea;
+ERROR: invalid hexadecimal data: odd number of digits
+LINE 1: SELECT E'\\xDeAdBeE'::bytea;
+ ^
+CONTEXT: referenced column: bytea
+SELECT E'\\xDeAdBeEx'::bytea;
+ERROR: invalid hexadecimal digit: "x"
+LINE 1: SELECT E'\\xDeAdBeEx'::bytea;
+ ^
+CONTEXT: referenced column: bytea
+SELECT E'\\xDe00BeEf'::bytea;
+ bytea
+------------
+ \xde00beef
+(1 row)
+
+SELECT E'DeAdBeEf'::bytea;
+ bytea
+--------------------
+ \x4465416442654566
+(1 row)
+
+SELECT E'De\\000dBeEf'::bytea;
+ bytea
+--------------------
+ \x4465006442654566
+(1 row)
+
+SELECT E'De\123dBeEf'::bytea;
+ bytea
+--------------------
+ \x4465536442654566
+(1 row)
+
+SELECT E'De\\123dBeEf'::bytea;
+ bytea
+--------------------
+ \x4465536442654566
+(1 row)
+
+SELECT E'De\\678dBeEf'::bytea;
+ERROR: invalid input syntax for type bytea
+LINE 1: SELECT E'De\\678dBeEf'::bytea;
+ ^
+CONTEXT: referenced column: bytea
+
+SET bytea_output TO escape;
+SELECT E'\\xDeAdBeEf'::bytea;
+ bytea
+------------------
+ \336\255\276\357
+(1 row)
+
+SELECT E'\\x De Ad Be Ef '::bytea;
+ bytea
+------------------
+ \336\255\276\357
+(1 row)
+
+SELECT E'\\xDe00BeEf'::bytea;
+ bytea
+------------------
+ \336\000\276\357
+(1 row)
+
+SELECT E'DeAdBeEf'::bytea;
+ bytea
+----------
+ DeAdBeEf
+(1 row)
+
+SELECT E'De\\000dBeEf'::bytea;
+ bytea
+-------------
+ De\000dBeEf
+(1 row)
+
+SELECT E'De\\123dBeEf'::bytea;
+ bytea
+----------
+ DeSdBeEf
+(1 row)
+
+
+SET bytea_output TO hex;
+
+SELECT CAST(name 'namefield' AS text) AS "text(name)";
+ text(name)
+------------
+ namefield
+(1 row)
+
+
+-- E021-09 trim function
+SELECT TRIM(BOTH FROM ' bunch o blanks ') = 'bunch o blanks' AS "bunch o blanks";
+ bunch o blanks
+----------------
+ t
+(1 row)
+
+
+-- E021-06 substring expression
+SELECT SUBSTRING('1234567890' FROM 3) = '34567890' AS "34567890";
+ 34567890
+----------
+ t
+(1 row)
+
+
+-- PostgreSQL extension to allow using back reference in replace string;
+SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3');
+ regexp_replace
+----------------
+ (111) 222-3333
+(1 row)
+
+
+-- set so we can tell NULL from empty string
+\pset null '\\N'
+
+-- return all matches from regexp
+SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$);
+ regexp_matches
+----------------
+ {bar,beque}
+(1 row)
+
+
+-- split string on regexp
+SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', $re$\s+$re$) AS foo;
+ foo | length
+--------+--------
+ the | 3
+ quick | 5
+ brown | 5
+ fox | 3
+ jumped | 6
+ over | 4
+ the | 3
+ lazy | 4
+ dog | 3
+(9 rows)
+
+SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', $re$\s+$re$);
+ regexp_split_to_array
+------------------------------------------------
+ {the,quick,brown,fox,jumped,over,the,lazy,dog}
+(1 row)
+
+
+-- change NULL-display back
+\pset null ''
+
+-- E021-11 position expression
+SELECT POSITION('4' IN '1234567890') = '4' AS "4";
+ 4
+---
+ t
+(1 row)
+
+
+SELECT POSITION('5' IN '1234567890') = '5' AS "5";
+ 5
+---
+ t
+(1 row)
+
+
+-- T312 character overlay function
+SELECT OVERLAY('abcdef' PLACING '45' FROM 4) AS "abc45f";
+ abc45f
+--------
+ abc45f
+(1 row)
+
+
+-- E061-04 like predicate
+SELECT 'hawkeye' LIKE 'h%' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'hawkeye' NOT LIKE 'h%' AS "false";
+ false
+-------
+ f
+(1 row)
+
+
+-- unused escape character
+SELECT 'hawkeye' LIKE 'h%' ESCAPE '#' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'hawkeye' NOT LIKE 'h%' ESCAPE '#' AS "false";
+ false
+-------
+ f
+(1 row)
+
+
+--
+-- test ILIKE (case-insensitive LIKE)
+-- Be sure to form every test as an ILIKE/NOT ILIKE pair.
+--
+
+SELECT 'hawkeye' ILIKE 'h%' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'hawkeye' NOT ILIKE 'h%' AS "false";
+ false
+-------
+ f
+(1 row)
+
+
+--6. 使用字符串相关函数
+--重复字符串
+select repeat('中国', 3);
+ repeat
+--------------
+ 中国中国中国
+(1 row)
+
+
+--返回字符串的前n个字符
+select left('中国!number1', 7);
+ left
+------------
+ 中国!numb
+(1 row)
+
+
+--返回长度
+select length('中国!number1');
+ length
+--------
+ 10
+(1 row)
+
+
+--反转字符串
+select reverse('中国!number1');
+ reverse
+---------------
+ 1rebmun!国中
+(1 row)
+
+
+--md5算法加密
+select md5('中国!number1');
+ md5
+----------------------------------
+ 764c69059680eb8f52946f9f4936737a
+(1 row)
+
+
+-- test strpos
+SELECT strpos('abcdef', 'cd') AS "pos_3";
+ pos_3
+-------
+ 3
+(1 row)
+
+SELECT strpos('abcdef', 'xy') AS "pos_0";
+ pos_0
+-------
+ 0
+(1 row)
+
+
+SELECT replace('yabadabadoo', 'ba', '123') AS "ya123da123doo";
+ ya123da123doo
+---------------
+ ya123da123doo
+(1 row)
+
+
+select split_part('joeuser@mydatabase','@',3) AS "empty string";
+ empty string
+--------------
+
+(1 row)
+
+
+select to_hex(256::bigint*256::bigint*256::bigint*256::bigint - 1) AS "ffffffff";
+ ffffffff
+----------
+ ffffffff
+(1 row)
+
+
+--返回字符串中第一个字符的十进制表示形式
+select ascii('xyz');
+ ascii
+-------
+ 120
+(1 row)
+
+select ascii('中xyz');
+ERROR: requested character too large
+CONTEXT: referenced column: ascii
+select ascii('ḿxyz');
+ERROR: requested character too large
+CONTEXT: referenced column: ascii
+
+-- 7. 检查GB18030-2022与GB18030的关系
+select convert('中国', 'GB18030', 'GB18030-2022');
+ convert
+------------
+ \xd6d0b9fa
+(1 row)
+
+
+select convert('中国', 'GB18030-2022', 'GB18030');
+ convert
+------------
+ \xd6d0b9fa
+(1 row)
+
+
+select convert('\xA8BC', 'GB18030-2022', 'UTF8');
+ convert
+----------
+ \xe1b8bf
+(1 row)
+
+
+select convert('\xA8BC', 'GB18030', 'UTF8');
+ convert
+----------
+ \xee9f87
+(1 row)
+
+
+\c regression
+clean connection to all force for database gb18030_2022;
+drop database gb18030_2022;
+
+-- 8. 在UTF8环境下检测GB18030-2022与UTF8的转换
+select convert('中国&华为*GaussDB', 'UTF8', 'GB18030-2022');
+ convert
+--------------------------------------
+ \xd6d0b9fa26bbaaceaa2a47617573734442
+(1 row)
+
+
+select convert('ḿ', 'UTF8', 'GB18030-2022');
+ convert
+---------
+ \xa8bc
+(1 row)
+
+
+-- 9. 测试create database时encoding与本地设置不匹配
+create database gb18030_2022 encoding='gb18030-2022' LC_COLLATE='en_US.utf-8' LC_CTYPE ='en_US.utf-8' TEMPLATE=template0;
+ERROR: encoding "GB18030_2022" does not match locale "en_US.utf-8"
+DETAIL: The chosen LC_CTYPE setting requires encoding "UTF8".
+
+--10. 测试initdb
+\! rm -f @abs_bindir@/test_initdb.log
+\! mkdir -p @testtablespace@/test2
+\! mkdir -p @testtablespace@/test2/pg_location
+\! @abs_bindir@/gs_initdb -S -D @testtablespace@/test2 --nodename coorn2 -U test_initdb -w test@123 --locale=zh_CN.gb18030 -E GB18030_2022 >> @abs_bindir@/test_initdb2.log 2>&1
+\! cat @abs_bindir@/test_initdb2.log | grep ok
+ok
+creating subdirectories ... in ordinary occasionok
+creating configuration files ... ok
+ok
+initializing pg_authid ... ok
+setting password ... ok
+initializing dependencies ... ok
+loading PL/pgSQL server-side language ... ok
+creating system views ... ok
+creating performance views ... ok
+loading system objects' descriptions ... ok
+creating collations ... ok
+creating conversions ... ok
+creating dictionaries ... ok
+setting privileges on built-in objects ... ok
+initialize global configure for bucketmap length ... ok
+creating information schema ... ok
+loading foreign-data wrapper for distfs access ... ok
+loading foreign-data wrapper for log access ... ok
+loading hstore extension ... ok
+loading security plugin ... ok
+update system tables ... ok
+creating snapshots catalog ... ok
+vacuuming database template1 ... ok
+copying template1 to template0 ... ok
+copying template1 to postgres ... ok
+freezing database template0 ... ok
+freezing database template1 ... ok
+freezing database postgres ... ok
+\! rm -f @abs_bindir@/test_initdb2.log
+\! rm -rf @testtablespace@/test2
+
+--11. 测试升级回滚
+select oid, * from pg_conversion where conname like '%gb18030%' order by conname;
+ oid | conname | connamespace | conowner | conforencoding | contoencoding | conproc | condefault
+-------+----------------------+--------------+----------+----------------+---------------+----------------------+------------
+--? .* | gb18030_2022_to_utf8 | 11 | 10 | 37 | 7 | gb18030_2022_to_utf8 | t
+--? .* | gb18030_to_utf8 | 11 | 10 | 36 | 7 | gb18030_to_utf8 | t
+--? .* | utf8_to_gb18030 | 11 | 10 | 7 | 36 | utf8_to_gb18030 | t
+--? .* | utf8_to_gb18030_2022 | 11 | 10 | 7 | 37 | utf8_to_gb18030_2022 | t
+(4 rows)
+
+
+select oid, * from pg_proc where proname like '%gb18030%' order by proname;
+ oid | proname | pronamespace | proowner | prolang | procost | prorows | provariadic | protransform | proisagg | proiswindow | prosecdef | proleakproof | proisstrict | proretset | provolatile | pronargs | pronargdefaults | prorettype | proargtypes | proallargtypes | proargmodes | proargnames | proargdefaults | prosrc | probin | proconfig | proacl | prodefaultargpos | fencedmode | proshippable | propackage | prokind | proargsrc | propackageid | proisprivate | proargtypesext | prodefaultargposext | allargtypes | allargtypesext
+-------+----------------------+--------------+----------+---------+---------+---------+-------------+--------------+----------+-------------+-----------+--------------+-------------+-----------+-------------+----------+-----------------+------------+--------------------+----------------+-------------+-------------+----------------+----------------------+--------------------------+-----------+--------+------------------+------------+--------------+------------+---------+-----------+--------------+--------------+----------------+---------------------+--------------------+----------------
+--? .* | gb18030_2022_to_utf8 | 11 | 10 | 13 | 1 | 0 | 0 | - | f | f | f | f | t | f | v | 5 | 0 | 2278 | 23 23 2275 2281 23 | | | | | gb18030_2022_to_utf8 | $libdir/utf8_and_gb18030 | | | | f | f | f | f | | 0 | f | | | 23 23 2275 2281 23 |
+--? .* | gb18030_to_utf8 | 11 | 10 | 13 | 1 | 0 | 0 | - | f | f | f | f | t | f | v | 5 | 0 | 2278 | 23 23 2275 2281 23 | | | | | gb18030_to_utf8 | $libdir/utf8_and_gb18030 | | | | f | f | f | f | | 0 | f | | | 23 23 2275 2281 23 |
+--? .* | utf8_to_gb18030 | 11 | 10 | 13 | 1 | 0 | 0 | - | f | f | f | f | t | f | v | 5 | 0 | 2278 | 23 23 2275 2281 23 | | | | | utf8_to_gb18030 | $libdir/utf8_and_gb18030 | | | | f | f | f | f | | 0 | f | | | 23 23 2275 2281 23 |
+--? .* | utf8_to_gb18030_2022 | 11 | 10 | 13 | 1 | 0 | 0 | - | f | f | f | f | t | f | v | 5 | 0 | 2278 | 23 23 2275 2281 23 | | | | | utf8_to_gb18030_2022 | $libdir/utf8_and_gb18030 | | | | f | f | f | f | | 0 | f | | | 23 23 2275 2281 23 |
+(4 rows)
+
+
+--12. 看护client_encoding不能设置为GB18030_2022
+set client_encoding = GB18030_2022;
+ERROR: invalid value for parameter "client_encoding": "gb18030_2022"
+
+ALTER SESSION SET NAMES 'GB18030_2022';
+ERROR: invalid value for parameter "client_encoding": "GB18030_2022"
diff --git a/src/test/regress/parallel_schedule0 b/src/test/regress/parallel_schedule0
index d3135fa2d2e1e366d5085dc09888c42d1f07528a..90cdb60ca015961b513ffb8da8804aa5e46e9552 100644
--- a/src/test/regress/parallel_schedule0
+++ b/src/test/regress/parallel_schedule0
@@ -1099,3 +1099,4 @@ test: enable_expr_fusion_flatten
# test for on update timestamp and generated column
test: on_update_session1 on_update_session2
+test: ts_gb18030_utf8
\ No newline at end of file
diff --git a/src/test/regress/sql/ts_gb18030_utf8.sql b/src/test/regress/sql/ts_gb18030_utf8.sql
deleted file mode 100644
index 1b14f27692ca76ade6d55327de968f7a871abfe4..0000000000000000000000000000000000000000
--- a/src/test/regress/sql/ts_gb18030_utf8.sql
+++ /dev/null
@@ -1,20 +0,0 @@
-create database gb18030 encoding='gb18030' LC_COLLATE='zh_CN.GB18030' LC_CTYPE ='zh_CN.GB18030' TEMPLATE=template0;
-\c gb18030
-
-show server_encoding;
-create table tb_test(id int, content text);
-
-insert into tb_test values(1, 'abcdefghigkABCDEFGHIJK');
-insert into tb_test values(2, '12');
-insert into tb_test values(3, 'ĺ');
-insert into tb_test values(4, '019808');
-insert into tb_test values(5, '94 95 92 94 97 98 99 90');
-insert into tb_test values(5, '25');
-
-select * from tb_test order by id;
-select convert_to(content, 'utf8') from tb_test order by id;
-
-drop table tb_test;
-\c regression
-clean connection to all force for database gb18030;
-drop database gb18030;