From fedcde61f37e1c292f0578746a17d54cb335617d Mon Sep 17 00:00:00 2001 From: openGaussDev Date: Wed, 30 Aug 2023 11:48:40 +0800 Subject: [PATCH] fix problems of RTO STANDBY READ Offering: openGaussDevMore detail: fix problems of RTO STANDBY READ --- contrib/ndpplugin/common.h | 2 + contrib/pagehack/pagehack.cpp | 9 +- contrib/pg_xlogdump/pg_xlogdump.cpp | 7 +- src/bin/gs_guc/cluster_guc.conf | 8 +- src/common/backend/catalog/pg_partition.cpp | 3 +- src/common/backend/catalog/storage.cpp | 2 + src/common/backend/utils/cache/inval.cpp | 27 +- .../utils/cache/knl_globalsysdbcache.cpp | 4 +- src/common/backend/utils/cache/relcache.cpp | 6 +- src/common/backend/utils/cache/relmapper.cpp | 13 +- src/common/backend/utils/error/be_module.cpp | 2 +- .../backend/utils/misc/guc/guc_storage.cpp | 14 +- src/common/backend/utils/time/snapmgr.cpp | 52 +- .../optimizer/commands/dbcommands.cpp | 9 +- .../optimizer/commands/tablespace.cpp | 7 +- .../process/postmaster/postmaster.cpp | 48 +- src/gausskernel/process/stream/streamMain.cpp | 15 + src/gausskernel/process/tcop/postgres.cpp | 2 +- .../process/threadpool/knl_instance.cpp | 9 +- .../process/threadpool/knl_session.cpp | 3 + .../process/threadpool/knl_thread.cpp | 1 + .../process/threadpool/threadpool_worker.cpp | 4 + .../storage/access/heap/heapam.cpp | 7 +- .../storage/access/heap/tuptoaster.cpp | 6 +- .../storage/access/nbtree/nbtpage.cpp | 1 + .../storage/access/nbtree/nbtxlog.cpp | 2 +- .../storage/access/redo/redo_storage.cpp | 1 + .../storage/access/redo/redo_xlogutils.cpp | 27 +- .../storage/access/redo/standby_read/Makefile | 4 +- .../redo/standby_read/base_page_proc.cpp | 21 +- .../redo/standby_read/block_info_proc.cpp | 151 +++--- .../standby_read/lsn_info_double_list.cpp | 1 - .../redo/standby_read/lsn_info_proc.cpp | 71 ++- .../standby_read/standby_read_delay_ddl.cpp | 448 +++++++++++++++++ .../standby_read/standby_read_interface.cpp | 475 +++++++++++++++++- .../storage/access/spgist/spgxlog.cpp | 2 +- .../storage/access/transam/clog.cpp | 1 + .../access/transam/extreme_rto/dispatcher.cpp | 39 +- .../transam/extreme_rto/exrto_recycle.cpp | 15 +- .../access/transam/extreme_rto/page_redo.cpp | 309 ++++++++---- .../storage/access/transam/multi_redo_api.cpp | 3 +- .../access/transam/multi_redo_settings.cpp | 5 +- .../ondemand_extreme_rto/page_redo.cpp | 5 +- .../transam/parallel_recovery/dispatcher.cpp | 12 +- .../storage/access/transam/xact.cpp | 118 ++++- .../storage/access/transam/xlog.cpp | 84 ++-- .../storage/access/transam/xlogutils.cpp | 16 +- .../storage/access/ubtree/ubtxlog.cpp | 4 +- .../access/ustore/knl_uextremeredo.cpp | 38 +- .../storage/access/ustore/knl_utuptoaster.cpp | 4 +- .../storage/access/ustore/knl_uvisibility.cpp | 8 + .../access/ustore/undo/knl_uundoapi.cpp | 1 + .../access/ustore/undo/knl_uundorecycle.cpp | 25 +- .../access/ustore/undo/knl_uundozone.cpp | 39 +- src/gausskernel/storage/buffer/bufmgr.cpp | 132 +++-- src/gausskernel/storage/ipc/procarray.cpp | 60 ++- src/gausskernel/storage/ipc/sinval.cpp | 10 +- src/gausskernel/storage/ipc/sinvaladt.cpp | 24 +- src/gausskernel/storage/lmgr/lmgr.cpp | 8 +- src/gausskernel/storage/lmgr/lwlocknames.txt | 3 + src/gausskernel/storage/lmgr/proc.cpp | 7 +- src/gausskernel/storage/page/gs_xlogdump.cpp | 4 +- .../storage/replication/basebackup.cpp | 18 + .../heartbeat/libpq/fe-connect.cpp | 2 + src/gausskernel/storage/replication/slot.cpp | 1 + .../storage/replication/walreceiver.cpp | 1 + .../storage/replication/walsender.cpp | 1 + src/gausskernel/storage/smgr/md.cpp | 3 +- .../storage/smgr/storage_exrto_file.cpp | 181 ++++--- src/include/access/extreme_rto/dispatcher.h | 2 +- .../standby_read/block_info_meta.h | 6 +- .../standby_read/standby_read_base.h | 46 ++ .../standby_read/standby_read_delay_ddl.h | 40 ++ src/include/access/multi_redo_api.h | 9 +- src/include/access/rmgrlist.h | 2 +- .../access/ustore/undo/knl_uundozone.h | 4 +- src/include/access/xact.h | 6 +- src/include/access/xlogproc.h | 4 +- src/include/commands/dbcommands.h | 1 + .../knl/knl_guc/knl_instance_attr_storage.h | 2 +- src/include/knl/knl_instance.h | 15 +- src/include/knl/knl_session.h | 3 - src/include/knl/knl_thread.h | 15 +- src/include/storage/buf/bufmgr.h | 4 +- src/include/storage/lmgr.h | 3 +- src/include/storage/proc.h | 8 +- src/include/storage/procarray.h | 5 +- src/include/storage/sinval.h | 8 +- src/include/storage/sinvaladt.h | 2 +- src/include/storage/smgr/smgr.h | 2 +- src/include/utils/be_module.h | 2 + src/include/utils/inval.h | 3 +- src/include/utils/snapmgr.h | 3 + src/include/utils/snapshot.h | 7 +- src/test/ha/GNUmakefile | 3 + src/test/ha/ha_exrto_standby_read | 2 + src/test/ha/ha_schedule_single_standby_read | 1 + .../ha/results/exrtostandbyread/.gitignore | 4 + src/test/ha/run_ha_exrto_standby_read.sh | 59 +++ src/test/ha/run_ha_single_standby_read.sh | 50 ++ src/test/ha/standby_env.sh | 2 +- .../single_standby_read_base.sh | 144 ++++++ .../start_exrto_standby_read.sh | 81 +++ .../start_exrto_standby_read_multi_data.sh | 91 ++++ src/test/regress/CMakeLists.txt | 1 + src/test/regress/parallel_schedule0 | 2 +- src/test/regress/parallel_schedule0A | 2 +- src/test/regress/single_check.sh | 4 +- 108 files changed, 2652 insertions(+), 641 deletions(-) create mode 100644 src/gausskernel/storage/access/redo/standby_read/standby_read_delay_ddl.cpp create mode 100644 src/include/access/extreme_rto/standby_read/standby_read_delay_ddl.h create mode 100644 src/test/ha/ha_exrto_standby_read create mode 100644 src/test/ha/ha_schedule_single_standby_read create mode 100644 src/test/ha/results/exrtostandbyread/.gitignore create mode 100644 src/test/ha/run_ha_exrto_standby_read.sh create mode 100644 src/test/ha/run_ha_single_standby_read.sh create mode 100644 src/test/ha/testcase/exrtostandbyread/single_standby_read_base.sh create mode 100644 src/test/ha/testcase/exrtostandbyread/start_exrto_standby_read.sh create mode 100644 src/test/ha/testcase/exrtostandbyread/start_exrto_standby_read_multi_data.sh diff --git a/contrib/ndpplugin/common.h b/contrib/ndpplugin/common.h index 7291d0615a..0631b34b2b 100644 --- a/contrib/ndpplugin/common.h +++ b/contrib/ndpplugin/common.h @@ -242,6 +242,8 @@ typedef unsigned int Oid; #define InvalidOid 0 #define InvalidBktId (-1) +#define ExrtoReadStartLSNBktId (-5) +#define ExrtoReadEndLSNBktId (-6) typedef Oid regproc; typedef regproc RegProcedure; diff --git a/contrib/pagehack/pagehack.cpp b/contrib/pagehack/pagehack.cpp index 9c527326ff..662b905c23 100644 --- a/contrib/pagehack/pagehack.cpp +++ b/contrib/pagehack/pagehack.cpp @@ -5172,7 +5172,7 @@ static bool parse_lsn_info_meta(const char *filename) } long pagenum = size / BLCKSZ; - fprintf(stdout, "file length is %ld, blknum is %ld\n", size, pagenum); + fprintf(stdout, "file length is %ld, pagenum is %ld\n", size, pagenum); for (loop = 1; loop <= pagenum; loop++) { fprintf(stdout, "Page %u information:\n", loop); @@ -5231,8 +5231,9 @@ static void parse_block_info_content(BlockMetaInfo *blockInfo) indents[indentLevel], blockInfo->timeline, blockInfo->record_num); fprintf(stdout, "%smin_lsn: %lu, max_lsn: %lu, flags: %u\n", indents[indentLevel], blockInfo->min_lsn, blockInfo->max_lsn, blockInfo->flags); - fprintf(stdout, "%slsn_info_list: prev %lu, next: %lu\n", - indents[indentLevel], blockInfo->lsn_info_list.prev, blockInfo->lsn_info_list.next); + fprintf(stdout, "%slsn_info_list: prev %lu, next: %lu. base_page_info_list: prev %lu, next: %lu\n", + indents[indentLevel], blockInfo->lsn_info_list.prev, blockInfo->lsn_info_list.next, + blockInfo->base_page_info_list.prev, blockInfo->base_page_info_list.next); } static bool parse_block_info_meta(const char *filename) @@ -5257,7 +5258,7 @@ static bool parse_block_info_meta(const char *filename) return false; } long pagenum = size / BLCKSZ; - fprintf(stdout, "file length is %ld, blknum is %ld\n", size, pagenum); + fprintf(stdout, "file length is %ld, pagenum is %ld\n", size, pagenum); for (loop = 0; loop < pagenum; loop++) { fprintf(stdout, "Page %u information:\n", loop); diff --git a/contrib/pg_xlogdump/pg_xlogdump.cpp b/contrib/pg_xlogdump/pg_xlogdump.cpp index 5d1ce64c9e..9a33e96858 100644 --- a/contrib/pg_xlogdump/pg_xlogdump.cpp +++ b/contrib/pg_xlogdump/pg_xlogdump.cpp @@ -85,7 +85,7 @@ static void XLogDumpXLogRead(char* directory, TimeLineID timeline_id, XLogRecPtr static int XLogDumpReadPage(XLogReaderState* state, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetPtr, char* readBuff, TimeLineID* curFileTLI, char* xlog_path = NULL); static void XLogDumpCountRecord(XLogDumpConfig* config, XLogDumpStats* stats, XLogReaderState* record); -static void XLogDumpDisplayRecord(XLogDumpConfig* config, XLogReaderState* record); +void XLogDumpDisplayRecord(XLogDumpConfig* config, XLogReaderState* record); static void XLogDumpStatsRow(const char* name, uint64 n, uint64 total_count, uint64 rec_len, uint64 total_rec_len, uint64 fpi_len, uint64 total_fpi_len, uint64 tot_len, uint64 total_len); static void XLogDumpDisplayStats(XLogDumpConfig* config, XLogDumpStats* stats); @@ -534,7 +534,7 @@ static const char* XLogGetForkNames(ForkNumber forknum) /* * Print a record to stdout */ -static void XLogDumpDisplayRecord(XLogDumpConfig* config, XLogReaderState* record) +void XLogDumpDisplayRecord(XLogDumpConfig* config, XLogReaderState* record) { const RmgrDescData* desc = &RmgrDescTable[XLogRecGetRmid(record)]; RelFileNode rnode; @@ -577,7 +577,8 @@ static void XLogDumpDisplayRecord(XLogDumpConfig* config, XLogReaderState* recor XLogRecGetPhysicalBlock(record, block_id, &seg_fileno, &seg_blockno); // output format: ", blkref #%u: rel %u/%u/%u/%d storage %s fork %s blk %u (phy loc %u/%u) lastlsn %X/%X" - printf(", blkref #%d: rel %u/%u/%u", block_id, rnode.spcNode, rnode.dbNode, rnode.relNode); + printf(", blkref #%d: rel %u/%u/%u/%d/%d, forknum:%d", block_id, rnode.spcNode, rnode.dbNode, rnode.relNode, + rnode.bucketNode, rnode.opt, forknum); if (IsBucketFileNode(rnode)) { printf("/%d", rnode.bucketNode); } diff --git a/src/bin/gs_guc/cluster_guc.conf b/src/bin/gs_guc/cluster_guc.conf index b52f72f7f3..68e265ef25 100755 --- a/src/bin/gs_guc/cluster_guc.conf +++ b/src/bin/gs_guc/cluster_guc.conf @@ -104,7 +104,7 @@ dirty_page_percent_max|real|0.1,1|NULL|NULL| group_concat_max_len|int64|0,9223372036854775807|NULL|NULL check_function_bodies|bool|0,0|NULL|NULL| checkpoint_completion_target|real|0,1|NULL|NULL| -standby_force_recyle_ratio|real|0,1|NULL|NULL| +standby_force_recycle_ratio|real|0,1|NULL|NULL| checkpoint_segments|int|1,2147483646|NULL|NULL| checkpoint_timeout|int|30,3600|s|NULL| checkpoint_warning|int|0,2147483647|s|NULL| @@ -272,7 +272,7 @@ hadr_max_size_for_xlog_receiver|int|0,2147483647|kB|NULL| hadr_recovery_time_target|int|0,3600|NULL|NULL| standby_recycle_interval|int|0,86400|s|NULL| standby_max_query_time|int|0,86400|s|NULL| -base_page_saved_interval|int|4,2000|NULL|NULL| +base_page_saved_interval|int|5,2000|NULL|NULL| hadr_recovery_point_target|int|0,3600|NULL|NULL| hadr_super_user_record_path|string|0,0|NULL|NULL| hll_default_log2m|int|10,16|NULL|NULL| @@ -713,8 +713,8 @@ undo_zone_count|int|0,1048576|NULL|NULL| stream_cluster_run_mode|enum|cluster_primary,cluster_standby|NULL|NULL| xlog_file_size|int64|1048576,576460752303423487|B|The value must be an integer multiple of 16777216(16M)| xlog_file_path|string|0,0|NULL|NULL| -max_standby_base_page_size|int64|0,576460752303423487|B|NULL| -max_standby_lsn_info_size|int64|0,576460752303423487|B|NULL| +max_standby_base_page_size|int64|1073741824,576460752303423487|B|NULL| +max_standby_lsn_info_size|int64|1073741824,576460752303423487|B|NULL| plsql_show_all_error|bool|0,0|NULL|NULL| partition_page_estimation|bool|0,0|NULL|NULL| enable_auto_clean_unique_sql|bool|0,0|NULL|NULL| diff --git a/src/common/backend/catalog/pg_partition.cpp b/src/common/backend/catalog/pg_partition.cpp index ed1dc256d4..87a5401d27 100644 --- a/src/common/backend/catalog/pg_partition.cpp +++ b/src/common/backend/catalog/pg_partition.cpp @@ -25,6 +25,7 @@ */ #include "access/sysattr.h" +#include "access/multi_redo_api.h" #include "catalog/namespace.h" #include "catalog/pg_partition_fn.h" #include "catalog/pg_partition.h" @@ -1007,7 +1008,7 @@ List* searchPgPartitionByParentId(char parttype, Oid parentId, ScanDirection dir */ Snapshot snapshot = NULL; snapshot = SnapshotNow; - if (HistoricSnapshotActive()) { + if (HistoricSnapshotActive() || IS_EXRTO_RECOVERY_IN_PROGRESS) { snapshot = GetCatalogSnapshot(); } diff --git a/src/common/backend/catalog/storage.cpp b/src/common/backend/catalog/storage.cpp index 6d6d559fb6..d27c6aa14a 100644 --- a/src/common/backend/catalog/storage.cpp +++ b/src/common/backend/catalog/storage.cpp @@ -1269,7 +1269,9 @@ void xlog_block_smgr_redo_truncate(RelFileNode rnode, BlockNumber blkno, XLogRec smgrcreate(reln, MAIN_FORKNUM, true); UpdateMinRecoveryPoint(lsn, false); LockRelFileNode(rnode, AccessExclusiveLock); + (void)LWLockAcquire(RedoTruncateLock, LW_EXCLUSIVE); smgrtruncate(reln, MAIN_FORKNUM, blkno); + LWLockRelease(RedoTruncateLock); XLogTruncateRelation(rnode, MAIN_FORKNUM, blkno); Relation rel = CreateFakeRelcacheEntry(rnode); if (smgrexists(reln, FSM_FORKNUM)) diff --git a/src/common/backend/utils/cache/inval.cpp b/src/common/backend/utils/cache/inval.cpp index f50ca6122d..e15dcdd280 100644 --- a/src/common/backend/utils/cache/inval.cpp +++ b/src/common/backend/utils/cache/inval.cpp @@ -117,6 +117,7 @@ #include "utils/syscache.h" #include "access/heapam.h" #include "catalog/pgxc_class.h" +#include "access/multi_redo_api.h" /* * To minimize palloc traffic, we keep pending requests in successively- @@ -909,6 +910,20 @@ void AcceptInvalidationMessages() --u_sess->inval_cxt.DeepthInAcceptInvalidationMessage; } +void reset_invalidation_cache() +{ + if (EnableLocalSysCache()) { + if (!IS_THREAD_POOL_WORKER) { + InvalidateSystemCaches(); + } else { + InvalidateThreadSystemCaches(); + InvalidateSessionSystemCaches(); + } + return; + } + InvalidateSystemCaches(); +} + /* * AtStart_Inval * Initialize inval lists at start of a main transaction. @@ -1057,12 +1072,16 @@ int xactGetCommittedInvalidationMessages(SharedInvalidationMessage** msgs, bool* * before and after we send the SI messages. See AtEOXact_Inval() */ void ProcessCommittedInvalidationMessages( - SharedInvalidationMessage* msgs, int nmsgs, bool RelcacheInitFileInval, Oid dbid, Oid tsid) + SharedInvalidationMessage* msgs, int nmsgs, bool RelcacheInitFileInval, Oid dbid, Oid tsid, XLogRecPtr lsn) { if (nmsgs <= 0) { return; } + if (!IS_EXRTO_READ) { + lsn = 0; + } + ereport(trace_recovery(DEBUG4), (errmsg("replaying commit with %d messages%s", nmsgs, @@ -1083,7 +1102,7 @@ void ProcessCommittedInvalidationMessages( u_sess->proc_cxt.DatabasePath = NULL; } - SendSharedInvalidMessages(msgs, nmsgs); + send_shared_invalid_messages(msgs, nmsgs, lsn); if (RelcacheInitFileInval) { RelationCacheInitFilePostInvalidate(); @@ -1565,13 +1584,13 @@ void CacheInvalidateSmgr(RelFileNodeBackend rnode) * should happen in low-level relmapper.c routines, which are executed while * replaying WAL as well as when creating it. */ -void CacheInvalidateRelmap(Oid databaseId) +void CacheInvalidateRelmap(Oid databaseId, XLogRecPtr lsn) { SharedInvalidationMessage msg; msg.rm.id = SHAREDINVALRELMAP_ID; msg.rm.dbId = databaseId; - SendSharedInvalidMessages(&msg, 1); + send_shared_invalid_messages(&msg, 1, lsn); } /* diff --git a/src/common/backend/utils/cache/knl_globalsysdbcache.cpp b/src/common/backend/utils/cache/knl_globalsysdbcache.cpp index 7f536f6c69..f740eebac4 100644 --- a/src/common/backend/utils/cache/knl_globalsysdbcache.cpp +++ b/src/common/backend/utils/cache/knl_globalsysdbcache.cpp @@ -18,6 +18,7 @@ #include "utils/memutils.h" #include "utils/builtins.h" #include "access/xlog.h" +#include "access/multi_redo_api.h" #include "catalog/pg_authid.h" #include "catalog/pg_auth_members.h" #include "catalog/pg_database.h" @@ -682,7 +683,8 @@ void GlobalSysDBCache::InitSysCacheRelIds() void GlobalSysDBCache::RefreshHotStandby() { Assert(EnableGlobalSysCache()); - hot_standby = (t_thrd.postmaster_cxt.HaShmData->current_mode != STANDBY_MODE || XLogStandbyInfoActive()); + hot_standby = (t_thrd.postmaster_cxt.HaShmData->current_mode != STANDBY_MODE || (XLogStandbyInfoActive() && + !IsExtremeRedo())); if (hot_standby || !m_is_inited) { return; } diff --git a/src/common/backend/utils/cache/relcache.cpp b/src/common/backend/utils/cache/relcache.cpp index df44246d9a..fa87ca9efd 100755 --- a/src/common/backend/utils/cache/relcache.cpp +++ b/src/common/backend/utils/cache/relcache.cpp @@ -40,6 +40,7 @@ #include "access/xact.h" #include "access/xlog.h" #include "access/multixact.h" +#include "access/multi_redo_api.h" #include "catalog/catalog.h" #include "catalog/heap.h" #include "catalog/catversion.h" @@ -1366,7 +1367,7 @@ HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic) * relfilenode of non mapped system relations during decoding. */ snapshot = SnapshotNow; - if (HistoricSnapshotActive() && !force_non_historic) { + if (HistoricSnapshotActive() && !force_non_historic || IS_EXRTO_RECOVERY_IN_PROGRESS) { snapshot = GetCatalogSnapshot(); } @@ -1601,7 +1602,7 @@ static void RelationBuildTupleDesc(Relation relation, bool onlyLoadInitDefVal) */ Snapshot snapshot = NULL; snapshot = SnapshotNow; - if (HistoricSnapshotActive()) { + if (HistoricSnapshotActive() || IS_EXRTO_RECOVERY_IN_PROGRESS) { snapshot = GetCatalogSnapshot(); } @@ -8741,4 +8742,3 @@ bool IsRelationReplidentKey(Relation r, int attno) RelationClose(idx_rel); return false; } - diff --git a/src/common/backend/utils/cache/relmapper.cpp b/src/common/backend/utils/cache/relmapper.cpp index 594ca88011..f8e4823704 100644 --- a/src/common/backend/utils/cache/relmapper.cpp +++ b/src/common/backend/utils/cache/relmapper.cpp @@ -47,6 +47,7 @@ #include "access/xact.h" #include "access/xlog.h" #include "access/xloginsert.h" +#include "access/multi_redo_api.h" #include "catalog/catalog.h" #include "catalog/pg_tablespace.h" #include "catalog/storage.h" @@ -61,7 +62,7 @@ static void apply_map_update(RelMapFile* map, Oid relationId, Oid fileNode, bool add_okay); static void merge_map_updates(RelMapFile* map, const RelMapFile* updates, bool add_okay); static void write_relmap_file(bool shared, RelMapFile* newmap, bool write_wal, bool send_sinval, bool preserve_files, - Oid dbid, Oid tsid, const char* dbpath); + Oid dbid, Oid tsid, const char* dbpath, XLogRecPtr redo_lsn = 0); static void perform_relmap_update(bool shared, const RelMapFile* updates); static int WriteOldVersionRelmap(RelMapFile* map, int fd); static int ReadOldVersionRelmap(RelMapFile* map, int fd); @@ -742,7 +743,7 @@ loop: * map update could be happening. */ static void write_relmap_file(bool shared, RelMapFile* newmap, bool write_wal, bool send_sinval, bool preserve_files, - Oid dbid, Oid tsid, const char* dbpath) + Oid dbid, Oid tsid, const char* dbpath, XLogRecPtr redo_lsn) { int fd; RelMapFile* real_map = NULL; @@ -870,7 +871,7 @@ static void write_relmap_file(bool shared, RelMapFile* newmap, bool write_wal, b * as soon as others began to use the now-committed data. */ if (send_sinval) { - CacheInvalidateRelmap(dbid); + CacheInvalidateRelmap(dbid, redo_lsn); } /* * Make sure that the files listed in the map are not deleted if the outer @@ -1101,7 +1102,11 @@ void relmap_redo(XLogReaderState* record) */ XLogRecPtr lsn = record->EndRecPtr; UpdateMinRecoveryPoint(lsn, false); - write_relmap_file((xlrec->dbid == InvalidOid), &new_map, false, true, false, xlrec->dbid, xlrec->tsid, dbpath); + if (!IS_EXRTO_READ) { + lsn = 0; + } + write_relmap_file((xlrec->dbid == InvalidOid), &new_map, false, true, false, xlrec->dbid, xlrec->tsid, dbpath, + lsn); pfree_ext(dbpath); } else { ereport(PANIC, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("relmap_redo: unknown op code %u", info))); diff --git a/src/common/backend/utils/error/be_module.cpp b/src/common/backend/utils/error/be_module.cpp index 6d2d0d7e43..c564d52647 100755 --- a/src/common/backend/utils/error/be_module.cpp +++ b/src/common/backend/utils/error/be_module.cpp @@ -129,6 +129,7 @@ const module_data module_map[] = {{MOD_ALL, "ALL"}, {MOD_LOGICAL_DECODE, "LOGICAL_DECODE"}, {MOD_GPRC, "GPRC"}, {MOD_DISASTER_READ, "DISASTER_READ"}, + {MOD_STANDBY_READ, "STANDBY_READ"}, {MODE_REPSYNC, "REPSYNC"}, {MOD_SQLPATCH, "SQLPATCH"}, {MOD_DMS, "DMS"}, @@ -272,4 +273,3 @@ module_logging_enable_comm(ModuleId module_id) { enable_module_logging(module_id); } - diff --git a/src/common/backend/utils/misc/guc/guc_storage.cpp b/src/common/backend/utils/misc/guc/guc_storage.cpp index f8f7b9ee18..add3f82219 100755 --- a/src/common/backend/utils/misc/guc/guc_storage.cpp +++ b/src/common/backend/utils/misc/guc/guc_storage.cpp @@ -3263,6 +3263,7 @@ static void InitStorageConfigureNamesInt() NULL, NULL, NULL}, +#ifndef ENABLE_LITE_MODE {{"standby_recycle_interval", PGC_SIGHUP, NODE_ALL, @@ -3305,6 +3306,7 @@ static void InitStorageConfigureNamesInt() NULL, NULL, NULL}, +#endif {{"force_promote", PGC_POSTMASTER, NODE_ALL, @@ -3933,19 +3935,21 @@ static void InitStorageConfigureNamesReal() NULL, NULL, NULL}, - {{"standby_force_recyle_ratio", +#ifndef ENABLE_LITE_MODE + {{"standby_force_recycle_ratio", PGC_SIGHUP, NODE_ALL, RESOURCES_RECOVERY, gettext_noop("Sets the ratio that triggers forced recycling in extreme-rto standby read."), NULL}, - &g_instance.attr.attr_storage.standby_force_recyle_ratio, + &g_instance.attr.attr_storage.standby_force_recycle_ratio, 0.8, 0.0, 1.0, NULL, NULL, NULL}, +#endif {{"bypass_dram", PGC_SIGHUP, NODE_ALL, @@ -4096,6 +4100,7 @@ static void InitStorageConfigureNamesInt64() NULL, NULL, NULL}, +#ifndef ENABLE_LITE_MODE {{"max_standby_base_page_size", PGC_POSTMASTER, NODE_ALL, @@ -4104,7 +4109,7 @@ static void InitStorageConfigureNamesInt64() NULL}, &g_instance.attr.attr_storage.max_standby_base_page_size, INT64CONST(0x4000000000), /* 256GB */ - INT64CONST(0), + INT64CONST(0x40000000), /* 1GB */ INT64CONST(0x7FFFFFFFFFFFFFF), NULL, NULL, @@ -4117,11 +4122,12 @@ static void InitStorageConfigureNamesInt64() NULL}, &g_instance.attr.attr_storage.max_standby_lsn_info_size, INT64CONST(0x4000000000), /* 256GB */ - INT64CONST(0), + INT64CONST(0x40000000), /* 1GB */ INT64CONST(0x7FFFFFFFFFFFFFF), NULL, NULL, NULL}, +#endif /* End-of-list marker */ {{NULL, (GucContext)0, diff --git a/src/common/backend/utils/time/snapmgr.cpp b/src/common/backend/utils/time/snapmgr.cpp index 3a288e254c..68edd053da 100644 --- a/src/common/backend/utils/time/snapmgr.cpp +++ b/src/common/backend/utils/time/snapmgr.cpp @@ -45,9 +45,11 @@ #include #include "access/csnlog.h" +#include "access/multi_redo_api.h" #include "access/transam.h" #include "access/twophase.h" #include "access/xact.h" +#include "access/multi_redo_api.h" #include "catalog/pg_type.h" #include "funcapi.h" #include "miscadmin.h" @@ -248,6 +250,10 @@ loop: ereport(FATAL, (errmsg("SS xid %lu's csn %lu is still COMMITTING after Master txn waited.", xid, csn))); } if (looped) { + /* don't change csn log in recovery */ + if (snapshot->takenDuringRecovery) { + return false; + } ereport(DEBUG1, (errmsg("transaction id %lu's csn %ld is changed to ABORT after lockwait.", xid, csn))); /* recheck if transaction id is finished */ RecheckXidFinish(xid, csn); @@ -281,9 +287,9 @@ loop: *sync = true; } if (TransactionIdIsValid(parentXid)) - SyncWaitXidEnd(parentXid, buffer); + SyncWaitXidEnd(parentXid, buffer, snapshot); else - SyncWaitXidEnd(xid, buffer); + SyncWaitXidEnd(xid, buffer, snapshot); looped = true; parentXid = InvalidTransactionId; goto loop; @@ -529,7 +535,7 @@ Snapshot GetTransactionSnapshot(bool force_local_snapshot) return u_sess->utils_cxt.CurrentSnapshot; } - if (IsolationUsesXactSnapshot()) { + if (IsolationUsesXactSnapshot()|| IS_EXRTO_STANDBY_READ) { #ifdef PGXC /* * Consider this test case taken from portals.sql @@ -554,12 +560,17 @@ Snapshot GetTransactionSnapshot(bool force_local_snapshot) if (IsConnFromCoord()) SnapshotSetCommandId(GetCurrentCommandId(false)); #endif + if (IS_EXRTO_STANDBY_READ) { + t_thrd.pgxact->xmin = u_sess->utils_cxt.CurrentSnapshot->xmin; + t_thrd.proc->exrto_min = u_sess->utils_cxt.CurrentSnapshot->read_lsn; + t_thrd.proc->exrto_read_lsn = t_thrd.proc->exrto_min; + t_thrd.proc->exrto_gen_snap_time = GetCurrentTimestamp(); + } return u_sess->utils_cxt.CurrentSnapshot; } Assert(!(u_sess->utils_cxt.CurrentSnapshot != NULL && u_sess->utils_cxt.CurrentSnapshot->user_data != NULL)); u_sess->utils_cxt.CurrentSnapshot = GetSnapshotData(u_sess->utils_cxt.CurrentSnapshotData, force_local_snapshot); - return u_sess->utils_cxt.CurrentSnapshot; } @@ -615,6 +626,16 @@ Snapshot GetLatestSnapshot(void) return u_sess->utils_cxt.SecondarySnapshot; } +Snapshot get_standby_snapshot() +{ + if (u_sess->utils_cxt.FirstSnapshotSet) { + Assert(!(u_sess->utils_cxt.CurrentSnapshot != NULL && u_sess->utils_cxt.CurrentSnapshot->user_data != NULL)); + return u_sess->utils_cxt.CurrentSnapshot; + } + + return GetTransactionSnapshot(); +} + /* * GetCatalogSnapshot * Get a snapshot that is sufficiently up-to-date for scan of the @@ -630,9 +651,27 @@ Snapshot GetCatalogSnapshot() if (HistoricSnapshotActive()) return u_sess->utils_cxt.HistoricSnapshot; + if (IS_EXRTO_RECOVERY_IN_PROGRESS && t_thrd.role != TRACK_STMT_WORKER && !dummyStandbyMode) { + return get_standby_snapshot(); + } + return SnapshotNow; } +/* + * get_toast_snapshot + * Get a snapshot that is sufficiently up-to-date for scan of the + * toast with the specified OID. + */ +Snapshot get_toast_snapshot() +{ + if (IS_EXRTO_RECOVERY_IN_PROGRESS && t_thrd.role != TRACK_STMT_WORKER && !dummyStandbyMode) { + return get_standby_snapshot(); + } + + return SnapshotToast; +} + /* * SnapshotSetCommandId * Propagate CommandCounterIncrement into the static snapshots, if set @@ -1225,6 +1264,11 @@ void AtEOXact_Snapshot(bool isCommit) u_sess->utils_cxt.FirstSnapshotSet = false; SnapshotResetXmin(); + t_thrd.proc->exrto_min = InvalidXLogRecPtr; + if (IS_EXRTO_STANDBY_READ && t_thrd.proc->exrto_reload_cache) { + t_thrd.proc->exrto_reload_cache = false; + reset_invalidation_cache(); + } } /* diff --git a/src/gausskernel/optimizer/commands/dbcommands.cpp b/src/gausskernel/optimizer/commands/dbcommands.cpp index 631a968368..57d17e67df 100644 --- a/src/gausskernel/optimizer/commands/dbcommands.cpp +++ b/src/gausskernel/optimizer/commands/dbcommands.cpp @@ -35,6 +35,7 @@ #include "access/multixact.h" #include "access/multi_redo_api.h" #include "access/extreme_rto/standby_read/block_info_meta.h" +#include "access/extreme_rto/standby_read/standby_read_delay_ddl.h" #include "catalog/catalog.h" #include "catalog/dependency.h" #include "catalog/indexing.h" @@ -2442,7 +2443,7 @@ void do_db_drop(Oid dbId, Oid tbSpcId) if (!rmtree(dst_path, true)) { ereport(WARNING, (errmsg("some useless files may be left behind in old database directory \"%s\"", dst_path))); } - if (IS_EXRTO_READ) { + if (RecoveryInProgress() && IS_EXRTO_READ) { /* remove file start with {db_id}_ */ extreme_rto_standby_read::remove_block_meta_info_files_of_db(dbId); } @@ -2489,7 +2490,11 @@ void xlogRemoveRemainSegsByDropDB(Oid dbId, Oid tablespaceId) void xlog_db_drop(XLogRecPtr lsn, Oid dbId, Oid tbSpcId) { UpdateMinRecoveryPoint(lsn, false); - do_db_drop(dbId, tbSpcId); + if (IS_EXRTO_READ) { + update_delay_ddl_db(dbId, tbSpcId, lsn); + } else { + do_db_drop(dbId, tbSpcId); + } xlogRemoveRemainSegsByDropDB(dbId, tbSpcId); } diff --git a/src/gausskernel/optimizer/commands/tablespace.cpp b/src/gausskernel/optimizer/commands/tablespace.cpp index 316bf02c84..dd36b8c32d 100644 --- a/src/gausskernel/optimizer/commands/tablespace.cpp +++ b/src/gausskernel/optimizer/commands/tablespace.cpp @@ -55,6 +55,8 @@ #include "access/xact.h" #include "access/xlog.h" #include "access/xloginsert.h" +#include "access/multi_redo_api.h" +#include "access/extreme_rto/standby_read/standby_read_delay_ddl.h" #include "catalog/catalog.h" #include "catalog/dependency.h" #include "catalog/indexing.h" @@ -2618,9 +2620,12 @@ void xlog_drop_tblspc(Oid tsId) * etc etc. There's not much we can do about that, so just remove what * we can and press on. */ + if (!destroy_tablespace_directories(tsId, true)) { ResolveRecoveryConflictWithTablespace(tsId); - + if (IS_EXRTO_READ) { + delete_by_table_space(tsId); + } /* * If we did recovery processing then hopefully the backends who * wrote temp files should have cleaned up and exited by now. So diff --git a/src/gausskernel/process/postmaster/postmaster.cpp b/src/gausskernel/process/postmaster/postmaster.cpp index 7cb582977e..3e518e813d 100644 --- a/src/gausskernel/process/postmaster/postmaster.cpp +++ b/src/gausskernel/process/postmaster/postmaster.cpp @@ -234,6 +234,7 @@ #include "access/multi_redo_api.h" #include "postmaster/postmaster.h" #include "access/parallel_recovery/dispatcher.h" +#include "access/extreme_rto/standby_read/lsn_info_meta.h" #include "access/extreme_rto/standby_read/standby_read_base.h" #include "utils/distribute_test.h" #ifdef ENABLE_MULTIPLE_NODES @@ -315,8 +316,6 @@ extern void gs_set_hs_shm_data(HaShmemData* ha_shm_data); extern void ReaperBackendMain(); extern void AdjustThreadAffinity(); -extern void exrto_standby_read_init(); - #define EXTERN_SLOTS_NUM 17 volatile PMState pmState = PM_INIT; bool dummyStandbyMode = false; @@ -3347,12 +3346,29 @@ static void CheckExtremeRtoGUCConflicts(void) errhint("recommend config \"wal_receiver_buffer_size=64MB\""))); } +#ifdef ENABLE_LITE_MODE + if ((g_instance.attr.attr_storage.recovery_parse_workers > 1) && g_instance.attr.attr_storage.EnableHotStandby) { + ereport(ERROR, (errcode(ERRCODE_SYSTEM_ERROR), + errmsg("when enabling lite mode, extreme rto could not support hot standby."), + errhint("Either turn off extreme rto, or turn off hot_standby."))); + } +#endif + #ifndef ENABLE_MULTIPLE_NODES - if (IS_DISASTER_RECOVER_MODE &&(g_instance.attr.attr_storage.recovery_parse_workers > 1) && g_instance.attr.attr_storage.EnableHotStandby) { - ereport(ERROR, - (errcode(ERRCODE_SYSTEM_ERROR), - errmsg("For disaster standby cluster, extreme rto could not support hot standby."), - errhint("Either turn off extreme rto, or turn off hot_standby."))); + if (IS_DISASTER_RECOVER_MODE && (g_instance.attr.attr_storage.recovery_parse_workers > 1) && + g_instance.attr.attr_storage.EnableHotStandby) { + ereport(ERROR, (errcode(ERRCODE_SYSTEM_ERROR), + errmsg("For disaster standby cluster, extreme rto could not support hot standby."), + errhint("Either turn off extreme rto, or turn off hot_standby."))); + } + + if (g_instance.attr.attr_storage.EnableHotStandby == true) { + int base_page_saved_interval = g_instance.attr.attr_storage.base_page_saved_interval; + g_instance.attr.attr_storage.base_page_saved_interval = + (g_instance.attr.attr_storage.base_page_saved_interval / (int)extreme_rto_standby_read::LSN_NUM_PER_NODE) * + (int)extreme_rto_standby_read::LSN_NUM_PER_NODE; // Rounded down of 5 + ereport(LOG, (errmsg("base_page_saved_interval is %d, ori is %d.", + g_instance.attr.attr_storage.base_page_saved_interval, base_page_saved_interval))); } #endif @@ -4006,7 +4022,7 @@ static int ServerLoop(void) } } ADIO_END(); - + if (threadPoolActivated && (pmState == PM_RUN || pmState == PM_HOT_STANDBY)) g_threadPoolControler->AddWorkerIfNecessary(); @@ -5094,10 +5110,11 @@ int ProcessStartupPacket(Port* port, bool SSLdone) errmsg("can not accept connection in pending mode."))); } else { #ifdef ENABLE_MULTIPLE_NODES - if (STANDBY_MODE == hashmdata->current_mode && (!IS_MULTI_DISASTER_RECOVER_MODE || GTM_FREE_MODE || - (IS_PGXC_DATANODE && !g_instance.attr.attr_storage.EnableHotStandby))) { - ereport(ERROR, (errcode(ERRCODE_CANNOT_CONNECT_NOW), - errmsg("can not accept connection in standby mode."))); + if (STANDBY_MODE == hashmdata->current_mode && + (!IS_MULTI_DISASTER_RECOVER_MODE || GTM_FREE_MODE || + (IS_PGXC_DATANODE && !g_instance.attr.attr_storage.EnableHotStandby))) { + ereport(ERROR, + (errcode(ERRCODE_CANNOT_CONNECT_NOW), errmsg("can not accept connection in standby mode."))); } #else if (hashmdata->current_mode == STANDBY_MODE && !g_instance.attr.attr_storage.EnableHotStandby) { @@ -6963,9 +6980,10 @@ static void reaper(SIGNAL_ARGS) g_instance.pid_cxt.WalWriterAuxiliaryPID = initialize_util_thread(WALWRITERAUXILIARY); if (g_instance.pid_cxt.CBMWriterPID == 0 && !dummyStandbyMode && - u_sess->attr.attr_storage.enable_cbm_tracking) - - + u_sess->attr.attr_storage.enable_cbm_tracking) { + g_instance.pid_cxt.CBMWriterPID = initialize_util_thread(CBMWRITER); + } + if (IS_EXRTO_READ && g_instance.pid_cxt.exrto_recycler_pid == 0) { g_instance.pid_cxt.exrto_recycler_pid = initialize_util_thread(EXRTO_RECYCLER); } diff --git a/src/gausskernel/process/stream/streamMain.cpp b/src/gausskernel/process/stream/streamMain.cpp index 222e894b8c..9cc8c2be53 100755 --- a/src/gausskernel/process/stream/streamMain.cpp +++ b/src/gausskernel/process/stream/streamMain.cpp @@ -24,7 +24,9 @@ #include "postgres.h" #include "access/gtm.h" +#include "access/multi_redo_api.h" #include "access/printtup.h" +#include "access/multi_redo_api.h" #include "distributelayer/streamMain.h" #include "distributelayer/streamProducer.h" #include "executor/exec/execStream.h" @@ -488,6 +490,19 @@ static void execute_stream_plan(StreamProducer* producer) * Start the portal. No parameters here. */ PortalStart(portal, producer->getParams(), 0, producer->getSnapShot()); + + /* The value of snapshot.read_lsn may be assigned to thread A and used on thread B. + So we should reassigned read_lsn to t_thrd of thread B */ + if (unlikely(IS_EXRTO_STANDBY_READ && producer->getSnapShot() != NULL)) { + t_thrd.proc->exrto_read_lsn = producer->getSnapShot()->read_lsn; + t_thrd.proc->exrto_min = t_thrd.proc->exrto_read_lsn; + } + + /* The value of snapshot.read_lsn may be assigned to thread A and used on thread B. + So we should reassigned read_lsn to t_thrd of thread B */ + if (unlikely(IS_EXRTO_STANDBY_READ && producer->getSnapShot() != NULL)) { + t_thrd.proc->exrto_read_lsn = producer->getSnapShot()->read_lsn; + } format = 0; PortalSetResultFormat(portal, 1, &format); diff --git a/src/gausskernel/process/tcop/postgres.cpp b/src/gausskernel/process/tcop/postgres.cpp index b08806c83e..ced83182d8 100755 --- a/src/gausskernel/process/tcop/postgres.cpp +++ b/src/gausskernel/process/tcop/postgres.cpp @@ -8583,7 +8583,7 @@ int PostgresMain(int argc, char* argv[], const char* dbname, const char* usernam lc_replan_nodegroup = InvalidOid; /* reset xmin before ReadCommand, in case blocking redo */ if (RecoveryInProgress()) { - t_thrd.pgxact->xmin = InvalidTransactionId; + } /* diff --git a/src/gausskernel/process/threadpool/knl_instance.cpp b/src/gausskernel/process/threadpool/knl_instance.cpp index 756c322292..a8ffe95555 100755 --- a/src/gausskernel/process/threadpool/knl_instance.cpp +++ b/src/gausskernel/process/threadpool/knl_instance.cpp @@ -42,6 +42,7 @@ #include "regex/regex.h" #include "utils/memutils.h" #include "utils/palloc.h" +#include "utils/snapshot.h" #include "workload/workload.h" #include "instruments/instr_waitevent.h" #include "access/multi_redo_api.h" @@ -325,7 +326,14 @@ static void knl_g_parallel_redo_init(knl_g_parallel_redo_context* predo_cxt) rc = memset_s(&predo_cxt->redoCpuBindcontrl, sizeof(RedoCpuBindControl), 0, sizeof(RedoCpuBindControl)); securec_check(rc, "", ""); predo_cxt->global_recycle_lsn = InvalidXLogRecPtr; + predo_cxt->exrto_snapshot = (ExrtoSnapshot)MemoryContextAllocZero( + INSTANCE_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), sizeof(ExrtoSnapshotData)); predo_cxt->redoItemHash = NULL; + + predo_cxt->standby_read_delay_ddl_stat.delete_stat = 0; + predo_cxt->standby_read_delay_ddl_stat.next_index_can_insert = 0; + predo_cxt->standby_read_delay_ddl_stat.next_index_need_unlink = 0; + predo_cxt->max_clog_pageno = 0; } static void knl_g_parallel_decode_init(knl_g_parallel_decode_context* pdecode_cxt) @@ -1071,4 +1079,3 @@ bool knl_g_get_redo_finish_status() uint32 isRedoFinish = pg_atomic_read_u32(&(g_instance.comm_cxt.predo_cxt.isRedoFinish)); return (isRedoFinish & REDO_FINISH_STATUS_CM) == REDO_FINISH_STATUS_CM; } - diff --git a/src/gausskernel/process/threadpool/knl_session.cpp b/src/gausskernel/process/threadpool/knl_session.cpp index 55b9420219..63d72c1829 100755 --- a/src/gausskernel/process/threadpool/knl_session.cpp +++ b/src/gausskernel/process/threadpool/knl_session.cpp @@ -957,6 +957,9 @@ static void knl_u_storage_init(knl_u_storage_context* storage_cxt) /* var in knl_uundofile.cpp */ storage_cxt->UndoFileCxt = NULL; + /* var in storage_exrto_file.cpp */ + storage_cxt->exrto_standby_read_file_cxt = NULL; + /* var in sync.cpp */ storage_cxt->pendingUnlinks = NIL; storage_cxt->pendingOpsCxt = NULL; diff --git a/src/gausskernel/process/threadpool/knl_thread.cpp b/src/gausskernel/process/threadpool/knl_thread.cpp index 4a604a2390..0f46ecc62b 100755 --- a/src/gausskernel/process/threadpool/knl_thread.cpp +++ b/src/gausskernel/process/threadpool/knl_thread.cpp @@ -962,6 +962,7 @@ static void knl_t_page_redo_init(knl_t_page_redo_context* page_redo_cxt) page_redo_cxt->sleep_long = false; page_redo_cxt->check_repair = false; page_redo_cxt->redo_worker_ptr = NULL; + page_redo_cxt->invalid_msg.valid = false; } static void knl_t_exrto_recycle_init(knl_t_exrto_recycle_context* exrto_recycle_cxt) diff --git a/src/gausskernel/process/threadpool/threadpool_worker.cpp b/src/gausskernel/process/threadpool/threadpool_worker.cpp index 0afdacf685..43c58ae3af 100644 --- a/src/gausskernel/process/threadpool/threadpool_worker.cpp +++ b/src/gausskernel/process/threadpool/threadpool_worker.cpp @@ -36,6 +36,7 @@ #include "threadpool/threadpool.h" #include "access/xact.h" +#include "access/multi_redo_api.h" #include "commands/prepare.h" #include "commands/tablespace.h" #include "commands/vacuum.h" @@ -534,6 +535,9 @@ void ThreadPoolWorker::CleanThread() thread_proc->workingVersionNum = pg_atomic_read_u32(&WorkingGrandVersionNum); if (m_currentSession != NULL) { + if (IS_EXRTO_STANDBY_READ) { + AtEOXact_Snapshot(false); + } DetachSessionFromThread(); } } diff --git a/src/gausskernel/storage/access/heap/heapam.cpp b/src/gausskernel/storage/access/heap/heapam.cpp index cda1455f31..5625c0d607 100755 --- a/src/gausskernel/storage/access/heap/heapam.cpp +++ b/src/gausskernel/storage/access/heap/heapam.cpp @@ -8737,7 +8737,11 @@ static void heap_xlog_cleanup_info(XLogReaderState* record) RelFileNode tmp_node; RelFileNodeCopy(tmp_node, xlrec->node, XLogRecGetBucketId(record)); - if (InHotStandby && g_supportHotStandby) { + if (IsExtremeRedo()) { + return; + } + + if (InHotStandby && g_supportHotStandby && !IS_EXRTO_READ) { XLogRecPtr lsn = record->EndRecPtr; ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, tmp_node, lsn); } @@ -10324,4 +10328,3 @@ HeapTuple heapam_index_fetch_tuple(IndexScanDesc scan, bool *all_dead, bool* has return NULL; } - diff --git a/src/gausskernel/storage/access/heap/tuptoaster.cpp b/src/gausskernel/storage/access/heap/tuptoaster.cpp index 580b08f282..b405a8954b 100644 --- a/src/gausskernel/storage/access/heap/tuptoaster.cpp +++ b/src/gausskernel/storage/access/heap/tuptoaster.cpp @@ -2380,7 +2380,7 @@ struct varlena* heap_internal_toast_fetch_datum(struct varatt_external toast_poi */ nextidx = 0; - toastscan = systable_beginscan_ordered(toastrel, toastidx, SnapshotToast, 1, &toastkey); + toastscan = systable_beginscan_ordered(toastrel, toastidx, get_toast_snapshot(), 1, &toastkey); while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL) { /* * Have a chunk, extract the sequence number and the data @@ -2559,7 +2559,7 @@ struct varlena* HeapInternalToastFetchDatumSlice(struct varatt_external toastPoi * The index is on (valueid, chunkidx) so they will come in order */ nextidx = startchunk; - toastscan = systable_beginscan_ordered(toastrel, toastidx, SnapshotToast, nscankeys, toastkey); + toastscan = systable_beginscan_ordered(toastrel, toastidx, get_toast_snapshot(), nscankeys, toastkey); while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL) { /* * Have a chunk, extract the sequence number and the data @@ -2775,7 +2775,7 @@ static struct varlena *toast_huge_fetch_datum_slice(struct varlena *attr, int64 Relation toastidx = index_open(toastrel->rd_rel->reltoastidxid, AccessShareLock); ScanKeyInit(&toastkey, (AttrNumber)1, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(large_toast_pointer.va_valueid)); - toastscan = systable_beginscan_ordered(toastrel, toastidx, SnapshotToast, 1, &toastkey); + toastscan = systable_beginscan_ordered(toastrel, toastidx, get_toast_snapshot(), 1, &toastkey); while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL) { chunk = DatumGetPointer(fastgetattr(ttup, CHUNK_DATA_ATTR, toast_tup_desc, &isnull)); struct varatt_external toast_pointer; diff --git a/src/gausskernel/storage/access/nbtree/nbtpage.cpp b/src/gausskernel/storage/access/nbtree/nbtpage.cpp index 5d6c20ee5b..8173ccb92d 100644 --- a/src/gausskernel/storage/access/nbtree/nbtpage.cpp +++ b/src/gausskernel/storage/access/nbtree/nbtpage.cpp @@ -25,6 +25,7 @@ #include "knl/knl_variable.h" #include "access/hio.h" +#include "access/multi_redo_api.h" #include "access/nbtree.h" #include "access/transam.h" #include "access/visibilitymap.h" diff --git a/src/gausskernel/storage/access/nbtree/nbtxlog.cpp b/src/gausskernel/storage/access/nbtree/nbtxlog.cpp index 066b2e5752..b0277c08c4 100755 --- a/src/gausskernel/storage/access/nbtree/nbtxlog.cpp +++ b/src/gausskernel/storage/access/nbtree/nbtxlog.cpp @@ -1072,7 +1072,7 @@ static void btree_xlog_reuse_page(XLogReaderState *record) RelFileNode tmp_node; RelFileNodeCopy(tmp_node, xlrec->node, XLogRecGetBucketId(record)); - if (InHotStandby && g_supportHotStandby) { + if (InHotStandby && g_supportHotStandby && !IS_EXRTO_READ) { XLogRecPtr lsn = record->EndRecPtr; ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, tmp_node, lsn); } diff --git a/src/gausskernel/storage/access/redo/redo_storage.cpp b/src/gausskernel/storage/access/redo/redo_storage.cpp index 73a25fbece..7288efa5b7 100644 --- a/src/gausskernel/storage/access/redo/redo_storage.cpp +++ b/src/gausskernel/storage/access/redo/redo_storage.cpp @@ -80,6 +80,7 @@ XLogRecParseState *smgr_redo_parse_to_block(XLogReaderState *record, uint32 *blo *blocknum = 0; if ((info == XLOG_SMGR_CREATE) || (info == XLOG_SMGR_TRUNCATE)) { recordstatehead = smgr_xlog_relnode_parse_to_block(record, blocknum); + recordstatehead->isFullSync = record->isFullSync; } else { ereport(PANIC, (errmsg("smgr_redo_parse_to_block: unknown op code %u", info))); } diff --git a/src/gausskernel/storage/access/redo/redo_xlogutils.cpp b/src/gausskernel/storage/access/redo/redo_xlogutils.cpp index cf6feafb7d..53f86af4db 100644 --- a/src/gausskernel/storage/access/redo/redo_xlogutils.cpp +++ b/src/gausskernel/storage/access/redo/redo_xlogutils.cpp @@ -141,13 +141,13 @@ bool DoLsnCheck(const RedoBufferInfo *bufferinfo, bool willInit, XLogRecPtr last *needRepair = true; XLogLsnCheckLogInvalidPage(bufferinfo, LSN_CHECK_ERROR, pblk); } - ereport(elevel, - (errmsg("lsn check error, record last lsn (%X/%X) ,lsn in current page %X/%X, " - "page info:%u/%u/%u forknum %d blknum:%u lsn %X/%X", - (uint32)(lastLsn >> XLOG_LSN_SWAP), (uint32)(lastLsn), (uint32)(pageCurLsn >> XLOG_LSN_SWAP), - (uint32)(pageCurLsn), blockinfo->rnode.spcNode, blockinfo->rnode.dbNode, - blockinfo->rnode.relNode, blockinfo->forknum, blockinfo->blkno, (uint32)(lsn >> XLOG_LSN_SWAP), - (uint32)(lsn)))); + ereport(elevel, (errmsg("lsn check error, record last lsn (%X/%X) ,lsn in current page %X/%X, " + "page info:%u/%u/%u/%d/%d forknum %d blknum:%u lsn %X/%X", + (uint32)(lastLsn >> XLOG_LSN_SWAP), (uint32)(lastLsn), + (uint32)(pageCurLsn >> XLOG_LSN_SWAP), (uint32)(pageCurLsn), + blockinfo->rnode.spcNode, blockinfo->rnode.dbNode, blockinfo->rnode.relNode, + blockinfo->rnode.bucketNode, blockinfo->rnode.opt, blockinfo->forknum, + blockinfo->blkno, (uint32)(lsn >> XLOG_LSN_SWAP), (uint32)(lsn)))); return false; } } @@ -1769,6 +1769,9 @@ bool XLogBlockRedoForExtremeRTO(XLogRecParseState *redoblocktate, RedoBufferInfo if ((block_valid != BLOCK_DATA_UNDO_TYPE) && g_instance.attr.attr_storage.EnableHotStandby && IsDefaultExtremeRtoMode() && XLByteLT(PageGetLSN(bufferinfo->pageinfo.page), blockhead->end_ptr)) { + if (bufferinfo->blockinfo.forknum >= EXRTO_FORK_NUM) { + ereport(PANIC, (errmsg("forknum is illegal: %d", bufferinfo->blockinfo.forknum))); + } BufferTag buf_tag; INIT_BUFFERTAG(buf_tag, bufferinfo->blockinfo.rnode, bufferinfo->blockinfo.forknum, bufferinfo->blockinfo.blkno); @@ -1951,7 +1954,7 @@ void redo_target_page(const BufferTag &buf_tag, StandbyReadLsnInfoArray *lsn_inf /* do we need register interrupt func here? like ProcessConfigFile */ XLogParseBufferInitFunc(&redo_pm, MAX_BUFFER_NUM_PER_WAL_RECORD, NULL, NULL); if (xlog_reader == NULL) { - ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"), + ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("redo_target_page: out of memory"), errdetail("Failed while allocating an XLog reading processor."))); } @@ -1961,7 +1964,7 @@ void redo_target_page(const BufferTag &buf_tag, StandbyReadLsnInfoArray *lsn_inf XLogRecord *record = XLogReadRecord(xlog_reader, lsn_info->lsn_array[i], &error_msg); if (record == NULL) { ereport(ERROR, (errcode_for_file_access(), - errmsg("could not read two-phase state from xlog at %X/%X, errormsg: %s", + errmsg("redo_target_page: could not read wal record from xlog at %X/%X, errormsg: %s", (uint32)(lsn_info->lsn_array[i] >> LSN_MOVE32), (uint32)(lsn_info->lsn_array[i]), error_msg ? error_msg : " "))); } @@ -1970,12 +1973,12 @@ void redo_target_page(const BufferTag &buf_tag, StandbyReadLsnInfoArray *lsn_inf XLogRecParseState *state = XLogParseToBlockCommonFunc(xlog_reader, &num); if (num == 0) { - ereport(ERROR, (errmsg("internal error, xlog in lsn %X/%X doesn't contain any block.", + ereport(ERROR, (errmsg("redo_target_page: internal error, xlog in lsn %X/%X doesn't contain any block.", (uint32)(lsn_info->lsn_array[i] >> LSN_MOVE32), (uint32)(lsn_info->lsn_array[i])))); } if (state == NULL) { - ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"), + ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("redo_target_page: out of memory"), errdetail("Failed while wal parse to block."))); } XLogRecParseState *state_iter = state; @@ -1986,7 +1989,7 @@ void redo_target_page(const BufferTag &buf_tag, StandbyReadLsnInfoArray *lsn_inf state_iter = (XLogRecParseState *)(state_iter->nextrecord); } if (state_iter == NULL) { - ereport(ERROR, (errmsg("internal error, xlog in lsn %X/%X doesn't contain target block.", + ereport(ERROR, (errmsg("redo_target_page: internal error, xlog in lsn %X/%X doesn't contain target block.", (uint32)(lsn_info->lsn_array[i] >> LSN_MOVE32), (uint32)(lsn_info->lsn_array[i])))); } buf_info.lsn = state_iter->blockparse.blockhead.end_ptr; diff --git a/src/gausskernel/storage/access/redo/standby_read/Makefile b/src/gausskernel/storage/access/redo/standby_read/Makefile index 2367845295..9d1fc64883 100644 --- a/src/gausskernel/storage/access/redo/standby_read/Makefile +++ b/src/gausskernel/storage/access/redo/standby_read/Makefile @@ -17,7 +17,7 @@ # Makefile for access/psort # # IDENTIFICATION -# src/backend/access/psort/Makefile +# src/gausskernel/storage/access/redo/standby_read/Makefile # #------------------------------------------------------------------------- @@ -32,6 +32,6 @@ ifneq "$(MAKECMDGOALS)" "clean" endif endif endif -OBJS = base_page_proc.o block_info_proc.o lsn_info_double_list.o lsn_info_proc.o standby_read_interface.o +OBJS = base_page_proc.o block_info_proc.o lsn_info_double_list.o lsn_info_proc.o standby_read_interface.o standby_read_delay_ddl.o include $(top_srcdir)/src/gausskernel/common.mk diff --git a/src/gausskernel/storage/access/redo/standby_read/base_page_proc.cpp b/src/gausskernel/storage/access/redo/standby_read/base_page_proc.cpp index 5144571064..b7f6ee2895 100644 --- a/src/gausskernel/storage/access/redo/standby_read/base_page_proc.cpp +++ b/src/gausskernel/storage/access/redo/standby_read/base_page_proc.cpp @@ -65,7 +65,11 @@ void generate_base_page(StandbyReadMetaInfo* meta_info, const Page src_page) Buffer dest_buf = buffer_read_base_page(meta_info->batch_id, meta_info->redo_id, position, RBM_ZERO_AND_LOCK); +#ifdef ENABLE_UT + Page dest_page = get_page_from_buffer(dest_buf); +#else Page dest_page = BufferGetPage(dest_buf); +#endif errno_t rc = memcpy_s(dest_page, BLCKSZ, src_page, BLCKSZ); securec_check(rc, "\0", "\0"); MarkBufferDirty(dest_buf); @@ -77,17 +81,21 @@ void generate_base_page(StandbyReadMetaInfo* meta_info, const Page src_page) void read_base_page(const BufferTag& buf_tag, BasePagePosition position, BufferDesc* dest_buf_desc) { extreme_rto::RedoItemTag redo_item_tag; - const uint32 worker_num_per_mng = extreme_rto::get_page_redo_worker_num_per_manager(); + INIT_REDO_ITEM_TAG(redo_item_tag, buf_tag.rnode, buf_tag.forkNum, buf_tag.blockNum); + const uint32 worker_num_per_mng = extreme_rto::get_page_redo_worker_num_per_manager(); /* batch id and worker id start from 1 when reading a page */ - uint32 batch_id = extreme_rto::GetSlotId(buf_tag.rnode, 0, 0, extreme_rto::GetBatchCount()) + 1; - INIT_REDO_ITEM_TAG(redo_item_tag, buf_tag.rnode, buf_tag.forkNum, buf_tag.blockNum); + uint32 batch_id = extreme_rto::GetSlotId(buf_tag.rnode, 0, 0, (uint32)extreme_rto::get_batch_redo_num()) + 1; uint32 redo_worker_id = extreme_rto::GetWorkerId(&redo_item_tag, worker_num_per_mng) + 1; Buffer buffer = buffer_read_base_page(batch_id, redo_worker_id, position, RBM_NORMAL); LockBuffer(buffer, BUFFER_LOCK_SHARE); +#ifdef ENABLE_UT + Page src_page = get_page_from_buffer(buffer); +#else Page src_page = BufferGetPage(buffer); +#endif Size page_size = BufferGetPageSize(buffer); Page dest_page = (Page)BufHdrGetBlock(dest_buf_desc); errno_t rc = memcpy_s(dest_page, page_size, src_page, page_size); @@ -103,5 +111,12 @@ void recycle_base_page_file(uint32 batch_id, uint32 redo_id, BasePagePosition re smgrdounlink(smgr, true, (BlockNumber)(recycle_pos / BLCKSZ)); } +#ifdef ENABLE_UT +Page get_page_from_buffer(Buffer buf) +{ + return BufferGetPage(buf); +} +#endif + } // namespace extreme_rto_standby_read diff --git a/src/gausskernel/storage/access/redo/standby_read/block_info_proc.cpp b/src/gausskernel/storage/access/redo/standby_read/block_info_proc.cpp index 356a6687b2..08fefe6ceb 100644 --- a/src/gausskernel/storage/access/redo/standby_read/block_info_proc.cpp +++ b/src/gausskernel/storage/access/redo/standby_read/block_info_proc.cpp @@ -16,7 +16,7 @@ * block_info_proc.cpp * * IDENTIFICATION - * src/gausskernel/storage/recovery/parallel/blocklevel/standby_read/block_info_proc.cpp + * src/gausskernel/storage/access/redo/standby_read/block_info_proc.cpp * * ------------------------------------------------------------------------- */ @@ -24,6 +24,7 @@ #include #include "access/extreme_rto/standby_read/block_info_meta.h" #include "access/extreme_rto/standby_read/lsn_info_meta.h" +#include "access/extreme_rto/standby_read/standby_read_base.h" #include "storage/smgr/relfilenode.h" namespace extreme_rto_standby_read { @@ -33,7 +34,7 @@ void block_info_page_init(Page page) static_assert(sizeof(BlockInfoPageHeader) == BLOCK_INFO_HEAD_SIZE, "BlockInfoPageHeader size is not 64 bytes"); static_assert(sizeof(BlockMetaInfo) == BLOCK_INFO_SIZE, "BlockMetaInfo size is not 64 bytes"); - BlockInfoPageHeader* page_header = (BlockInfoPageHeader*)page; + BlockInfoPageHeader *page_header = (BlockInfoPageHeader *)page; errno_t ret = memset_s(page_header, BLCKSZ, 0, BLCKSZ); securec_check(ret, "", ""); page_header->flags |= BLOCK_INFO_PAGE_VALID_FLAG; @@ -44,15 +45,21 @@ inline BlockNumber data_block_number_to_meta_page_number(BlockNumber block_num) { return block_num / BLOCK_INFO_NUM_PER_PAGE; } - +#ifdef ENABLE_UT +uint32 block_info_meta_page_offset(BlockNumber block_num) +{ + return (block_num % BLOCK_INFO_NUM_PER_PAGE) * BLOCK_INFO_SIZE + BLOCK_INFO_HEAD_SIZE; +} +#else inline uint32 block_info_meta_page_offset(BlockNumber block_num) { return (block_num % BLOCK_INFO_NUM_PER_PAGE) * BLOCK_INFO_SIZE + BLOCK_INFO_HEAD_SIZE; } +#endif // get page, just have pin, no lock BlockMetaInfo* get_block_meta_info_by_relfilenode( - const BufferTag& buf_tag, BufferAccessStrategy strategy, ReadBufferMode mode, Buffer* buffer) + const BufferTag& buf_tag, BufferAccessStrategy strategy, ReadBufferMode mode, Buffer* buffer, bool need_share_lock) { RelFileNode standby_read_rnode = buf_tag.rnode; standby_read_rnode.spcNode = EXRTO_BLOCK_INFO_SPACE_OID; @@ -63,25 +70,47 @@ BlockMetaInfo* get_block_meta_info_by_relfilenode( *buffer = ReadBuffer_common(smgr, 0, buf_tag.forkNum, meta_block_num, mode, strategy, &hit, NULL); if (*buffer == InvalidBuffer) { + ereport(DEBUG1, (errmodule(MOD_STANDBY_READ), + errmsg("get block meta info failed, buffer invalid %u/%u/%u %d %u, meta_block_num %u", + buf_tag.rnode.spcNode, buf_tag.rnode.dbNode, buf_tag.rnode.relNode, buf_tag.forkNum, + buf_tag.blockNum, meta_block_num))); return NULL; } + if (need_share_lock) { + LockBuffer(*buffer, BUFFER_LOCK_SHARE); + } else { + LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + } + +#ifdef ENABLE_UT + Page page = get_page_from_buffer(*buffer); +#else Page page = BufferGetPage(*buffer); - if (!is_block_info_page_valid((BlockInfoPageHeader*)page)) { +#endif + if (!is_block_info_page_valid((BlockInfoPageHeader *)page)) { if (mode == RBM_NORMAL) { - ReleaseBuffer(*buffer); + UnlockReleaseBuffer(*buffer); + ereport(DEBUG1, (errmodule(MOD_STANDBY_READ), + errmsg("get block meta info failed, page invalid %u/%u/%u %d %u, meta_block_num %u", + buf_tag.rnode.spcNode, buf_tag.rnode.dbNode, buf_tag.rnode.relNode, buf_tag.forkNum, + buf_tag.blockNum, meta_block_num))); return NULL; } } uint32 offset = block_info_meta_page_offset(buf_tag.blockNum); - BlockMetaInfo *block_info = ((BlockMetaInfo*)(page + offset)); + BlockMetaInfo *block_info = ((BlockMetaInfo *)(page + offset)); if (!is_block_meta_info_valid(block_info) && mode == RBM_NORMAL) { - ReleaseBuffer(*buffer); - + ereport(DEBUG1, + (errmsg("block_info is invalid %u/%u/%u %d %u min lsn %08X/%08X max lsn %08X/%08X flags:%u", + buf_tag.rnode.spcNode, buf_tag.rnode.dbNode, buf_tag.rnode.relNode, buf_tag.forkNum, + buf_tag.blockNum, (uint32)(block_info->min_lsn >> UINT64_HALF), (uint32)block_info->min_lsn, + (uint32)(block_info->max_lsn >> UINT64_HALF), (uint32)block_info->max_lsn, block_info->flags))); + UnlockReleaseBuffer(*buffer); return NULL; } - + Assert(block_info != NULL); return block_info; } @@ -106,11 +135,14 @@ void insert_lsn_to_block_info( ereport(PANIC, (errmsg("insert lsn failed,block invalid %u/%u/%u %d %u", buf_tag.rnode.spcNode, buf_tag.rnode.dbNode, buf_tag.rnode.relNode, buf_tag.forkNum, buf_tag.blockNum))); } - LockBuffer(block_info_buf, BUFFER_LOCK_EXCLUSIVE); +#ifdef ENABLE_UT + Page page = get_page_from_buffer(block_info_buf); +#else Page page = BufferGetPage(block_info_buf); +#endif XLogRecPtr current_page_lsn = PageGetLSN(base_page); if (!is_block_meta_info_valid(block_info)) { - if (!is_block_info_page_valid((BlockInfoPageHeader*)page)) { + if (!is_block_info_page_valid((BlockInfoPageHeader *)page)) { block_info_page_init(page); } @@ -142,14 +174,19 @@ StandbyReadRecyleState recyle_block_info( BlockMetaInfo* block_meta_info = get_block_meta_info_by_relfilenode(buf_tag, NULL, RBM_NORMAL, &buffer); if ((block_meta_info == NULL) || (buffer == InvalidBuffer)) { // no block info, should not at this branch - ereport(WARNING, (errmsg("block meta is invalid %u/%u/%u %d %u", buf_tag.rnode.spcNode, buf_tag.rnode.dbNode, - buf_tag.rnode.relNode, buf_tag.forkNum, buf_tag.blockNum))); + ereport(WARNING, (errmodule(MOD_STANDBY_READ), errmsg("block meta is invalid %u/%u/%u %d %u", + buf_tag.rnode.spcNode, buf_tag.rnode.dbNode, + buf_tag.rnode.relNode, buf_tag.forkNum, buf_tag.blockNum))); return STANDBY_READ_RECLYE_ALL; } StandbyReadRecyleState stat = STANDBY_READ_RECLYE_NONE; - LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); Assert(((block_meta_info->flags & BLOCK_INFO_NODE_VALID_FLAG) == BLOCK_INFO_NODE_VALID_FLAG)); if (XLByteLT(block_meta_info->max_lsn, recyle_lsn)) { + ereport(DEBUG1, + (errmsg(EXRTOFORMAT("block meta recycle all %u/%u/%u %d %u, max lsn %08X/%08X, recycle lsn %08X/%08X"), + buf_tag.rnode.spcNode, buf_tag.rnode.dbNode, buf_tag.rnode.relNode, buf_tag.forkNum, + buf_tag.blockNum, (uint32)(block_meta_info->max_lsn >> UINT64_HALF), + (uint32)block_meta_info->max_lsn, (uint32)(recyle_lsn >> UINT64_HALF), (uint32)recyle_lsn))); block_meta_info->flags &= ~BLOCK_INFO_NODE_VALID_FLAG; stat = STANDBY_READ_RECLYE_ALL; MarkBufferDirty(buffer); @@ -186,13 +223,11 @@ bool get_page_lsn_info(const BufferTag& buf_tag, BufferAccessStrategy strategy, StandbyReadLsnInfoArray* lsn_info) { Buffer buf; - BlockMetaInfo* block_meta_info = get_block_meta_info_by_relfilenode(buf_tag, strategy, RBM_NORMAL, &buf); + BlockMetaInfo* block_meta_info = get_block_meta_info_by_relfilenode(buf_tag, strategy, RBM_NORMAL, &buf, true); if (block_meta_info == NULL) { return false; } - LockBuffer(buf, BUFFER_LOCK_SHARE); - if (XLByteLT(read_lsn, block_meta_info->min_lsn)) { UnlockReleaseBuffer(buf); ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), @@ -203,9 +238,22 @@ bool get_page_lsn_info(const BufferTag& buf_tag, BufferAccessStrategy strategy, } Assert(block_meta_info->base_page_info_list.prev != LSN_INFO_LIST_HEAD); + if (block_meta_info->base_page_info_list.prev == LSN_INFO_LIST_HEAD) { + ereport(ERROR, + ((errmsg("block_meta_info->base_page_info_list.prev is invaild. timeline %u, recordnum %u , min lsn " + "%lu, max lsn %lu, read lsn %lu", + block_meta_info->timeline, block_meta_info->record_num, block_meta_info->min_lsn, + block_meta_info->max_lsn, read_lsn)))); + } reset_tmp_lsn_info_array(lsn_info); get_lsn_info_for_read(buf_tag, block_meta_info->base_page_info_list.prev, lsn_info, read_lsn); UnlockReleaseBuffer(buf); + + if (lsn_info->lsn_num == 0 && XLogRecPtrIsInvalid(lsn_info->base_page_lsn)) { + ereport(ERROR, ((errmsg("cannot find valid lsn info %u/%u/%u %d %u read lsn %lu, min lsn %lu", + buf_tag.rnode.spcNode, buf_tag.rnode.dbNode, buf_tag.rnode.relNode, buf_tag.forkNum, + buf_tag.blockNum, read_lsn, block_meta_info->min_lsn)))); + } return true; } @@ -227,73 +275,12 @@ void remove_one_block_info_file(const RelFileNode rnode) * recycle all relation files when drop db occurs. * db_id: database oid. */ -void remove_block_meta_info_files_of_db(Oid db_oid, Oid rel_oid) +void remove_block_meta_info_files_of_db(Oid db_oid) { - char pathbuf[EXRTO_FILE_PATH_LEN]; - char **filenames; - char **filename; - struct stat statbuf; - /* get block info file directory */ - char exrto_block_info_dir[EXRTO_FILE_PATH_LEN] = {0}; - int rc = snprintf_s(exrto_block_info_dir, EXRTO_FILE_PATH_LEN, EXRTO_FILE_PATH_LEN - 1, "%s/%s", EXRTO_FILE_DIR, - EXRTO_FILE_SUB_DIR[BLOCK_INFO_META]); - securec_check_ss(rc, "", ""); - /* get all files' name from block meta file directory */ - filenames = pgfnames(exrto_block_info_dir); - if (filenames == NULL) { - return; - } char target_prefix[EXRTO_FILE_PATH_LEN] = {0}; - if (rel_oid != InvalidOid) { - rc = sprintf_s(target_prefix, EXRTO_FILE_PATH_LEN, "%u_%u_", db_oid, rel_oid); - } else { - rc = sprintf_s(target_prefix, EXRTO_FILE_PATH_LEN, "%u_", db_oid); - } + errno_t rc = sprintf_s(target_prefix, EXRTO_FILE_PATH_LEN, "%u_", db_oid); securec_check_ss(rc, "", ""); - /* use the prefix name to match up files we want to delete */ - size_t prefix_len = strlen(target_prefix); - for (filename = filenames; *filename != NULL; filename++) { - char *fname = *filename; - size_t fname_len = strlen(fname); - /* - * the length of prefix is less than the length of file name and must be the same under the same prefix_len - */ - if (prefix_len >= fname_len || strncmp(target_prefix, fname, prefix_len) != 0) { - continue; - } - rc = - snprintf_s(pathbuf, EXRTO_FILE_PATH_LEN, EXRTO_FILE_PATH_LEN - 1, "%s/%s", exrto_block_info_dir, *filename); - securec_check_ss(rc, "", ""); - /* may be can be some error */ - if (lstat(pathbuf, &statbuf) != 0) { - if (errno != ENOENT) { -#ifndef FRONTEND - ereport(WARNING, (errmsg("could not stat file or directory \"%s\" \n", pathbuf))); -#else - fprintf(stderr, _("could not stat file or directory \"%s\": %s\n"), pathbuf, gs_strerror(errno)); -#endif - } - continue; - } - /* if the file is a directory, don't touch it */ - if (S_ISDIR(statbuf.st_mode)) { - /* skip dir */ - continue; - } - /* delete this file we found */ - if (unlink(pathbuf) != 0) { - if (errno != ENOENT) { -#ifndef FRONTEND - ereport(WARNING, (errmsg("could not remove file or directory \"%s\" ", pathbuf))); -#else - fprintf(stderr, _("could not remove file or directory \"%s\": %s\n"), pathbuf, gs_strerror(errno)); -#endif - } - } - } - pgfnames_cleanup(filenames); - return; + exrto_unlink_file_with_prefix(target_prefix, BLOCK_INFO_META); } } // namespace extreme_rto_standby_read - diff --git a/src/gausskernel/storage/access/redo/standby_read/lsn_info_double_list.cpp b/src/gausskernel/storage/access/redo/standby_read/lsn_info_double_list.cpp index bffb2be173..0c5827af51 100644 --- a/src/gausskernel/storage/access/redo/standby_read/lsn_info_double_list.cpp +++ b/src/gausskernel/storage/access/redo/standby_read/lsn_info_double_list.cpp @@ -63,7 +63,6 @@ void info_list_modify_old_tail(StandbyReadMetaInfo *meta_info, LsnInfoPosition o base_page_info->base_page_list.next = insert_pos; base_page_info->next_base_page_lsn = current_page_lsn; Assert(is_lsn_info_node_valid(base_page_info->lsn_info_node.flags)); - Assert(XLByteLT(base_page_info->cur_page_lsn, current_page_lsn)); } standby_read_meta_page_set_lsn(page, next_lsn); diff --git a/src/gausskernel/storage/access/redo/standby_read/lsn_info_proc.cpp b/src/gausskernel/storage/access/redo/standby_read/lsn_info_proc.cpp index 7655389554..6227742154 100644 --- a/src/gausskernel/storage/access/redo/standby_read/lsn_info_proc.cpp +++ b/src/gausskernel/storage/access/redo/standby_read/lsn_info_proc.cpp @@ -16,7 +16,7 @@ * lsn_info_proc.cpp * * IDENTIFICATION - * src/gausskernel/storage/recovery/parallel/blocklevel/standby_read/lsn_info_proc.cpp + * src/gausskernel/storage/access/redo/standby_read/lsn_info_proc.cpp * * ------------------------------------------------------------------------- */ @@ -98,9 +98,13 @@ Page get_lsn_info_page(uint32 batch_id, uint32 worker_id, LsnInfoPosition positi batch_id, worker_id))); return NULL; } - + +#ifdef ENABLE_UT + page = get_page_from_buffer(*buffer); +#else page = BufferGetPage(*buffer); - if (!is_lsn_info_page_valid((LsnInfoPageHeader*)page)) { +#endif + if (!is_lsn_info_page_valid((LsnInfoPageHeader *)page)) { if (mode == RBM_NORMAL) { ReleaseBuffer(*buffer); *buffer = InvalidBuffer; @@ -109,7 +113,7 @@ Page get_lsn_info_page(uint32 batch_id, uint32 worker_id, LsnInfoPosition positi /* make sure to make buffer dirty outside */ lsn_info_page_init(page); } - + return page; } @@ -312,17 +316,16 @@ void get_lsn_info_for_read(const BufferTag& buf_tag, LsnInfoPosition latest_lsn_ /* get batch id and page redo worker id */ extreme_rto::RedoItemTag redo_item_tag; - const uint32 worker_num_per_mng = extreme_rto::get_page_redo_worker_num_per_manager(); INIT_REDO_ITEM_TAG(redo_item_tag, buf_tag.rnode, buf_tag.forkNum, buf_tag.blockNum); /* batch id and worker id start from 1 when reading a page */ - batch_id = extreme_rto::GetSlotId(buf_tag.rnode, 0, 0, extreme_rto::get_batch_redo_num()) + 1; - worker_id = extreme_rto::GetWorkerId(&redo_item_tag, worker_num_per_mng) + 1; + batch_id = extreme_rto::GetSlotId(buf_tag.rnode, 0, 0, (uint32)extreme_rto::get_batch_redo_num()) + 1; + worker_id = extreme_rto::GetWorkerId(&redo_item_tag, extreme_rto::get_page_redo_worker_num_per_manager()) + 1; /* find fisrt base page whose lsn less than read lsn form tail to head */ do { /* reach the end of the list */ if (INFO_POSITION_IS_INVALID(latest_lsn_base_page_pos)) { - ereport(ERROR, ( + ereport(PANIC, ( errmsg("can not find base page, block is %u/%u/%u %d %u, batch_id: %u, redo_worker_id: %u", buf_tag.rnode.spcNode, buf_tag.rnode.dbNode, buf_tag.rnode.relNode, buf_tag.forkNum, buf_tag.blockNum, batch_id, worker_id))); @@ -346,7 +349,7 @@ void get_lsn_info_for_read(const BufferTag& buf_tag, LsnInfoPosition latest_lsn_ Assert(is_base_page_type(base_page_info->lsn_info_node.type)); /* If we find the desired page, keep it locked */ - if (XLByteLT(page_lsn, read_lsn)) { + if (XLByteLE(page_lsn, read_lsn)) { break; } UnlockReleaseBuffer(buffer); @@ -422,7 +425,7 @@ void set_base_page_map_bit(Page page, uint32 base_page_loc) * base_page_loc must be an integer multiple of LSN_INFO_HEAD_SIZE */ check_base_page_loc_valid(base_page_loc); - + LsnInfoPageHeader *page_header = (LsnInfoPageHeader *)page; uint8 *base_page_map = page_header->base_page_map; uint32 which_bit = base_page_loc / LSN_INFO_NODE_SIZE; @@ -469,25 +472,26 @@ void recycle_one_lsn_info_list(const BufferTag& buf_tag, LsnInfoPosition page_in { /* get batch id and page redo worker id */ extreme_rto::RedoItemTag redo_item_tag; - const uint32 worker_num_per_mng = extreme_rto::get_page_redo_worker_num_per_manager(); INIT_REDO_ITEM_TAG(redo_item_tag, buf_tag.rnode, buf_tag.forkNum, buf_tag.blockNum); + + const uint32 worker_num_per_mng = extreme_rto::get_page_redo_worker_num_per_manager(); /* batch id and worker id start from 1 when reading a page */ - uint32 batch_id = extreme_rto::GetSlotId(buf_tag.rnode, 0, 0, extreme_rto::get_batch_redo_num()) + 1; + uint32 batch_id = extreme_rto::GetSlotId(buf_tag.rnode, 0, 0, (uint32)extreme_rto::get_batch_redo_num()) + 1; uint32 worker_id = extreme_rto::GetWorkerId(&redo_item_tag, worker_num_per_mng) + 1; while (INFO_POSITION_IS_VALID(page_info_pos)) { Buffer buffer = InvalidBuffer; Page page = get_lsn_info_page(batch_id, worker_id, page_info_pos, RBM_NORMAL, &buffer); if (unlikely(page == NULL || buffer == InvalidBuffer)) { - ereport(PANIC, (errmsg(EXRTOFORMAT("get_lsn_info_page failed, batch_id: %u, redo_id: %u, pos: %lu"), - batch_id, worker_id, page_info_pos))); + ereport(PANIC, (errmsg(EXRTOFORMAT("get_lsn_info_page failed, batch_id: %u, redo_id: %u, pos: %lu"), + batch_id, worker_id, page_info_pos))); } LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); - + uint32 offset = lsn_info_postion_to_offset(page_info_pos); BasePageInfo base_page_info = (BasePageInfo)(page + offset); Assert(is_base_page_type(base_page_info->lsn_info_node.type)); - + *min_page_info_pos = page_info_pos; *min_lsn = base_page_info->cur_page_lsn; @@ -508,7 +512,11 @@ void recycle_one_lsn_info_list(const BufferTag& buf_tag, LsnInfoPosition page_in void invalid_base_page_list(StandbyReadMetaInfo *meta_info, Buffer buffer, uint32 offset) { LsnInfoPosition page_info_pos; +#ifdef ENABLE_UT + Page page = get_page_from_buffer(buffer); +#else Page page = BufferGetPage(buffer); +#endif BasePageInfo base_page_info = (BasePageInfo)(page + offset); /* set invalid flags */ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); @@ -522,12 +530,12 @@ void invalid_base_page_list(StandbyReadMetaInfo *meta_info, Buffer buffer, uint3 while (INFO_POSITION_IS_VALID(page_info_pos)) { page = get_lsn_info_page(batch_id, worker_id, page_info_pos, RBM_NORMAL, &buffer); if (unlikely(page == NULL || buffer == InvalidBuffer)) { - ereport(PANIC, (errmsg(EXRTOFORMAT("get_lsn_info_page failed, batch_id: %u, redo_id: %u, pos: %lu"), - batch_id, worker_id, page_info_pos))); + ereport(PANIC, (errmsg(EXRTOFORMAT("get_lsn_info_page failed, batch_id: %u, redo_id: %u, pos: %lu"), + batch_id, worker_id, page_info_pos))); } offset = lsn_info_postion_to_offset(page_info_pos); base_page_info = (BasePageInfo)(page + offset); - + /* unset valid flags */ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); base_page_info->lsn_info_node.flags &= ~LSN_INFO_NODE_VALID_FLAG; @@ -558,10 +566,10 @@ bool recycle_one_lsn_info_page(StandbyReadMetaInfo *meta_info, XLogRecPtr recycl LsnInfoPosition recycle_pos = meta_info->lsn_table_recyle_position; Page page = get_lsn_info_page(batch_id, worker_id, recycle_pos, RBM_NORMAL, &buffer); if (unlikely(page == NULL || buffer == InvalidBuffer)) { - ereport(PANIC, (errmsg(EXRTOFORMAT("get_lsn_info_page failed, batch_id: %u, redo_id: %u, pos: %lu"), - batch_id, worker_id, recycle_pos))); + ereport(PANIC, (errmsg(EXRTOFORMAT("get_lsn_info_page failed, batch_id: %u, redo_id: %u, pos: %lu"), batch_id, + worker_id, recycle_pos))); } - + bool buffer_is_locked = false; /* skip page header */ for (uint32 bit = 1; bit < BASE_PAGE_MAP_SIZE * BYTE_BITS; bit++) { @@ -627,7 +635,10 @@ void standby_read_recyle_per_workers(StandbyReadMetaInfo *meta_info, XLogRecPtr Assert(meta_info->redo_id > 0); bool recycle_next_page = true; BasePagePosition base_page_position = meta_info->base_page_recyle_position; - + uint64 last_base_page_recyle_segno = meta_info->base_page_recyle_position / EXRTO_BASE_PAGE_FILE_MAXSIZE; + uint64 last_lsn_table_recyle_segno = meta_info->lsn_table_recyle_position / EXRTO_LSN_INFO_FILE_MAXSIZE; + uint64 cur_base_page_recyle_segno, cur_lsn_table_recyle_segno; + while (meta_info->lsn_table_recyle_position + BLCKSZ < meta_info->lsn_table_next_position) { recycle_next_page = recycle_one_lsn_info_page(meta_info, recycle_lsn, &base_page_position); if (!recycle_next_page) { @@ -643,8 +654,18 @@ void standby_read_recyle_per_workers(StandbyReadMetaInfo *meta_info, XLogRecPtr Assert(meta_info->base_page_recyle_position % BLCKSZ == 0); Assert(meta_info->base_page_recyle_position <= meta_info->base_page_next_position); - recycle_lsn_info_file(meta_info->batch_id, meta_info->redo_id, meta_info->lsn_table_recyle_position); - recycle_base_page_file(meta_info->batch_id, meta_info->redo_id, meta_info->base_page_recyle_position); + cur_base_page_recyle_segno = meta_info->base_page_recyle_position / EXRTO_BASE_PAGE_FILE_MAXSIZE; + cur_lsn_table_recyle_segno = meta_info->lsn_table_recyle_position / EXRTO_LSN_INFO_FILE_MAXSIZE; + if (cur_base_page_recyle_segno > last_base_page_recyle_segno || + cur_lsn_table_recyle_segno > last_lsn_table_recyle_segno) { + buffer_drop_exrto_standby_read_buffers(meta_info); + } + if (cur_lsn_table_recyle_segno > last_lsn_table_recyle_segno) { + recycle_lsn_info_file(meta_info->batch_id, meta_info->redo_id, meta_info->lsn_table_recyle_position); + } + if (cur_base_page_recyle_segno > last_base_page_recyle_segno) { + recycle_base_page_file(meta_info->batch_id, meta_info->redo_id, meta_info->base_page_recyle_position); + } } } // namespace extreme_rto_standby_read diff --git a/src/gausskernel/storage/access/redo/standby_read/standby_read_delay_ddl.cpp b/src/gausskernel/storage/access/redo/standby_read/standby_read_delay_ddl.cpp new file mode 100644 index 0000000000..c9adb75a65 --- /dev/null +++ b/src/gausskernel/storage/access/redo/standby_read/standby_read_delay_ddl.cpp @@ -0,0 +1,448 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * standby_read_delay_ddl.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/access/redo/standby_read/standby_read_delay_ddl.cpp + * + * ------------------------------------------------------------------------- + */ + +#include +#include +#include +#include "access/extreme_rto/standby_read.h" +#include "access/extreme_rto/standby_read/standby_read_delay_ddl.h" +#include "access/extreme_rto/standby_read/block_info_meta.h" +#include "access/multi_redo_api.h" +#include "commands/dbcommands.h" + +#define DELAY_DDL_FILE_DIR "delay_ddl" +#define DELAY_DDL_FILE_NAME "delay_ddl/delay_delete_info_file" + +typedef enum { + DROP_DB_TYPE = 1, + DROP_TABLE_TYPE, +} DropDdlType; + +const static uint32 MAX_NUM_PER_FILE = 0x10000; + +typedef struct { + uint8 type; + uint8 len; + uint16 resvd1; + uint32 resvd2; + ColFileNode node_info; + XLogRecPtr lsn; + pg_crc32 crc; +} DelayDdlInfo; + +void init_delay_ddl_file() +{ + if (isDirExist(DELAY_DDL_FILE_DIR)) { + return; + } + + if (!IS_EXRTO_READ) { + return; + } + + if (mkdir(DELAY_DDL_FILE_DIR, S_IRWXU) < 0 && errno != EEXIST) { + ereport(ERROR, + (errcode_for_file_access(), errmsg("could not create directory \"%s\": %m", DELAY_DDL_FILE_DIR))); + } +} + +int read_delay_ddl_info(int fd, void* buf, size_t size, off_t off) +{ + int count = 0; +RETRY: + errno = 0; + int return_code = pread(fd, buf, size, off); + if (return_code < 0) { + /* OK to retry if interrupted */ + if (errno == EINTR) { + goto RETRY; + } + + if (errno == EIO) { + if (count < EIO_RETRY_TIMES) { + count++; + ereport(WARNING, + (errmsg("delete_by_lsn: failed (read len %lu, offset %ld), retry:Input/Output ERROR", size, off))); + goto RETRY; + } + } + } + + return return_code; +} + +bool write_delay_ddl_info(char* file_path, void* buf, size_t size, off_t off) +{ + int fd = BasicOpenFile(file_path, O_CREAT | O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR); + if (fd < 0) { + ereport(WARNING, + (errcode_for_file_access(), errmsg("write_delay_ddl_info:could not open file %s : %m", file_path))); + return false; + } + int count = 0; +RETRY: + errno = 0; + int return_code = pwrite(fd, buf, size, off); + if (return_code != (int)size && errno == 0) { + errno = ENOSPC; + } + + /* OK to retry if interrupted */ + if (errno == EINTR) { + goto RETRY; + } + if (errno == EIO) { + if (count < EIO_RETRY_TIMES) { + count++; + ereport(WARNING, + (errmsg("write_delay_ddl_info: write %s failed, then retry: Input/Output ERROR", file_path))); + goto RETRY; + } + } + + if (return_code != (int)size) { + ereport(WARNING, (errcode_for_file_access(), + errmsg("write_delay_ddl_info:write maybe failed %s ,write %d, need %lu, offset %ld: %m", + file_path, return_code, size, off))); + close(fd); + return false; + } + + if (fsync(fd) != 0) { + ereport(WARNING, + (errcode_for_file_access(), errmsg("write_delay_ddl_info:could not fsync file %s: %m", file_path))); + close(fd); + return false; + } + + close(fd); + return true; +} + + +static void enter_state(uint32 *stat) +{ + uint32 expected = 0; + while (!pg_atomic_compare_exchange_u32(stat, &expected, 1)) { + expected = 0; + RedoInterruptCallBack(); + } +} + +static void exit_state(uint32 *stat) +{ + (void)pg_atomic_sub_fetch_u32(stat, 1); +} + +void update_delay_ddl_db(Oid db_id, Oid tablespace_id, XLogRecPtr lsn) +{ + StandbyReadDelayDdlState *stat = &g_instance.comm_cxt.predo_cxt.standby_read_delay_ddl_stat; + enter_state(&stat->insert_stat); + uint64 insert_start = pg_atomic_read_u64(&stat->next_index_can_insert); + + char path[MAXPGPATH]; + errno_t errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, DELAY_DDL_FILE_NAME "_%08X_%lX", + t_thrd.shemem_ptr_cxt.ControlFile->timeline, insert_start / MAX_NUM_PER_FILE); + securec_check_ss(errorno, "", ""); + + off_t off_set = (off_t)(insert_start % MAX_NUM_PER_FILE * sizeof(DelayDdlInfo)); + + DelayDdlInfo tmp_info = { + .type = DROP_DB_TYPE, + .len = sizeof(DelayDdlInfo), + .resvd1 = 0, + .resvd2 = 0, + }; + + tmp_info.node_info.filenode.dbNode = db_id; + tmp_info.node_info.filenode.spcNode = tablespace_id; + tmp_info.lsn = lsn; + INIT_CRC32C(tmp_info.crc); + COMP_CRC32C(tmp_info.crc, (char*)&tmp_info, offsetof(DelayDdlInfo, crc)); + FIN_CRC32C(tmp_info.crc); + + if (write_delay_ddl_info(path, &tmp_info, sizeof(DelayDdlInfo), off_set)) { + pg_atomic_write_u64(&stat->next_index_can_insert, insert_start + 1); + } + exit_state(&stat->insert_stat); +} + +void update_delay_ddl_files(ColFileNode* xnodes, int nrels, XLogRecPtr lsn) +{ + DelayDdlInfo* info_list = (DelayDdlInfo*)palloc(sizeof(DelayDdlInfo) * (uint32)nrels); + for (int i = 0; i < nrels; ++i) { + info_list[i].type = DROP_TABLE_TYPE; + info_list[i].len = sizeof(DelayDdlInfo); + info_list[i].resvd1 = 0; + info_list[i].resvd2 = 0; + info_list[i].node_info = xnodes[i]; + info_list[i].lsn = lsn; + INIT_CRC32C(info_list[i].crc); + COMP_CRC32C(info_list[i].crc, (char*)&info_list[i], offsetof(DelayDdlInfo, crc)); + FIN_CRC32C(info_list[i].crc); + } + uint32 remains = (uint32)nrels; + StandbyReadDelayDdlState *stat = &g_instance.comm_cxt.predo_cxt.standby_read_delay_ddl_stat; + enter_state(&stat->insert_stat); + uint64 insert_start = pg_atomic_read_u64(&stat->next_index_can_insert); + + DelayDdlInfo *start = info_list; + while (remains > 0) { + uint32 left_size = MAX_NUM_PER_FILE - insert_start % MAX_NUM_PER_FILE; + uint32 copys = remains; + if (remains > left_size) { + copys = (uint32)left_size; + } + + char path[MAXPGPATH]; + errno_t errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, DELAY_DDL_FILE_NAME "_%08X_%lX", + t_thrd.shemem_ptr_cxt.ControlFile->timeline, insert_start / MAX_NUM_PER_FILE); + securec_check_ss(errorno, "", ""); + + off_t off_set = (off_t)(insert_start % MAX_NUM_PER_FILE * sizeof(DelayDdlInfo)); + + if (write_delay_ddl_info(path, start, copys * sizeof(DelayDdlInfo), off_set)) { + remains -= copys; + start += copys; + insert_start += copys; + } else { + break; + } + RedoInterruptCallBack(); + } + pfree(info_list); + + pg_atomic_write_u64(&stat->next_index_can_insert, insert_start); + exit_state(&stat->insert_stat); +} + +void do_delay_ddl(DelayDdlInfo* info) +{ + pg_crc32c crc_check; + INIT_CRC32C(crc_check); + COMP_CRC32C(crc_check, (char*)info, offsetof(DelayDdlInfo, crc)); + FIN_CRC32C(crc_check); + + if (!EQ_CRC32C(crc_check, info->crc)) { + ereport(WARNING, (errcode_for_file_access(), + errmsg("delay ddl ,crc(%u:%u) check error, maybe is type:%u, info %u/%u/%u lsn:%lu", crc_check, info->crc, + (uint32)info->type, info->node_info.filenode.spcNode, info->node_info.filenode.dbNode, + info->node_info.filenode.relNode, info->lsn))); + return; + } + + if (info->type == DROP_TABLE_TYPE) { + unlink_relfiles(&info->node_info, 1); + xact_redo_log_drop_segs(&info->node_info, 1, info->lsn); + } else if (info->type == DROP_DB_TYPE) { + do_db_drop(info->node_info.filenode.dbNode, info->node_info.filenode.spcNode); + } else { + ereport(WARNING, (errcode_for_file_access(), + errmsg("delay ddl ,type error, maybe is type:%u, info %u/%u/%u lsn:%lu", (uint32)info->type, + info->node_info.filenode.spcNode, info->node_info.filenode.dbNode, info->node_info.filenode.relNode, + info->lsn))); + } +} + +void delete_by_lsn(XLogRecPtr lsn) +{ + StandbyReadDelayDdlState *stat = &g_instance.comm_cxt.predo_cxt.standby_read_delay_ddl_stat; + enter_state(&stat->delete_stat); + + uint64 next_delete = pg_atomic_read_u64(&stat->next_index_need_unlink); + uint64 next_insert = pg_atomic_read_u64(&stat->next_index_can_insert); + DelayDdlInfo* info_list = (DelayDdlInfo*)palloc(sizeof(DelayDdlInfo) * MAX_NUM_PER_FILE); + bool go_on_delete = true; + uint64 deleted_total = 0; + while (next_delete < next_insert && go_on_delete) { + uint32 cur_deleted = MAX_NUM_PER_FILE; + + /* same file */ + if ((next_delete / MAX_NUM_PER_FILE) == (next_insert / MAX_NUM_PER_FILE)) { + cur_deleted = next_insert - next_delete; + } else { /* different file */ + cur_deleted = MAX_NUM_PER_FILE - next_delete % MAX_NUM_PER_FILE; + } + + uint64 offset = next_delete % MAX_NUM_PER_FILE * sizeof(DelayDdlInfo); + + char path[MAXPGPATH]; + errno_t errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, DELAY_DDL_FILE_NAME "_%08X_%lX", + t_thrd.shemem_ptr_cxt.ControlFile->timeline, next_delete / MAX_NUM_PER_FILE); + securec_check_ss(errorno, "", ""); + + int fd = BasicOpenFile(path, O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR); + if (fd < 0) { + return; + } + int count = read_delay_ddl_info(fd, info_list, cur_deleted * sizeof(DelayDdlInfo), (off_t)offset); + close(fd); + if (count <= 0) { + ereport(WARNING, + (errmsg("delete_by_lsn: file %s nothing deleted_total", path))); + go_on_delete = false; + continue; + } + if ((uint32)count / sizeof(DelayDdlInfo) < cur_deleted) { + cur_deleted = (uint32)count / sizeof(DelayDdlInfo); + ereport(WARNING, + (errmsg("delete_by_lsn: the info in file %s is less than expected, cur_deleted: %u, next_delete: " + "%lu, next_insert: %lu", + path, cur_deleted, next_delete, next_insert))); + } + + for (uint32 i = 0; i < cur_deleted; ++i) { + if (info_list[i].lsn <= lsn) { + do_delay_ddl(&info_list[i]); + } else { + cur_deleted = i; + go_on_delete = false; + break; + } + RedoInterruptCallBack(); + } + + next_delete += cur_deleted; + deleted_total += cur_deleted; + if (next_delete % MAX_NUM_PER_FILE == 0) { + (void)unlink(path); + } + RedoInterruptCallBack(); + } + ereport(LOG, (errmsg("delete_by_lsn: unlink files number: %lu", deleted_total))); + pfree(info_list); + pg_atomic_write_u64(&stat->next_index_need_unlink, next_delete); + exit_state(&stat->delete_stat); +} + +void delete_by_table_space(Oid tablespace_id) +{ + StandbyReadDelayDdlState *stat = &g_instance.comm_cxt.predo_cxt.standby_read_delay_ddl_stat; + enter_state(&stat->delete_stat); + uint64 next_delete = pg_atomic_read_u64(&stat->next_index_need_unlink); + uint64 next_insert = pg_atomic_read_u64(&stat->next_index_can_insert); + + DelayDdlInfo* info_list = (DelayDdlInfo*)palloc0(sizeof(DelayDdlInfo) * MAX_NUM_PER_FILE); + while (next_delete < next_insert) { + uint32 copys = MAX_NUM_PER_FILE; + + // same file + if ((next_delete / MAX_NUM_PER_FILE) == (next_insert / MAX_NUM_PER_FILE)) { + copys = next_insert - next_delete; + } else { /* different file */ + copys = MAX_NUM_PER_FILE - next_delete % MAX_NUM_PER_FILE; + } + + uint64 offset = next_delete % MAX_NUM_PER_FILE * sizeof(DelayDdlInfo); + char path[MAXPGPATH]; + errno_t errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, DELAY_DDL_FILE_NAME "_%08X_%lX", + t_thrd.shemem_ptr_cxt.ControlFile->timeline, next_delete / MAX_NUM_PER_FILE); + securec_check_ss(errorno, "", ""); + + int fd = BasicOpenFile(path, O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR); + if (fd < 0) { + ereport(WARNING, + (errmsg("delete_by_table_space: file %s could not open:%m", path))); + return; + } + + int count = read_delay_ddl_info(fd, info_list, copys * sizeof(DelayDdlInfo), (off_t)offset); + if (count <= 0) { + ereport(WARNING, + (errmsg("delete_by_table_space: file %s nothing deleted", path))); + return; + } + close(fd); + + if ((uint32)count / sizeof(DelayDdlInfo) < copys) { + copys = (uint32)count / sizeof(DelayDdlInfo); + ereport( + WARNING, + (errmsg("delete_by_table_space: the info in file %s is less than expected, copys: %u, next_delete: " + "%lu, next_insert: %lu", + path, copys, next_delete, next_insert))); + } + + for (uint32 i = 0; i < copys; ++i) { + if (info_list[i].node_info.filenode.spcNode == tablespace_id) { + do_delay_ddl(&info_list[i]); + } + RedoInterruptCallBack(); + } + next_delete += copys; + RedoInterruptCallBack(); + } + pfree(info_list); + exit_state(&stat->delete_stat); +} + +void do_all_old_delay_ddl() +{ + DIR* file_dir = AllocateDir(DELAY_DDL_FILE_DIR); + struct dirent* file_dirent = NULL; + uint32 timeline = 0; + uint64 segment = 0; + while ((file_dirent = ReadDir(file_dir, DELAY_DDL_FILE_DIR)) != NULL) { + int nmatch = sscanf_s(file_dirent->d_name, "delay_delete_info_file_%08X_%lX", &timeline, &segment); + if (nmatch != 2) { + continue; + } + if (timeline >= t_thrd.shemem_ptr_cxt.ControlFile->timeline && RecoveryInProgress()) { + continue; + } + DelayDdlInfo* info_list = (DelayDdlInfo*)palloc0(sizeof(DelayDdlInfo) * MAX_NUM_PER_FILE); + char path[MAXPGPATH]; + errno_t errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, DELAY_DDL_FILE_DIR "/%s", file_dirent->d_name); + securec_check_ss(errorno, "", ""); + + int fd = BasicOpenFile(path, O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR); + if (fd < 0) { + ereport(WARNING, + (errcode_for_file_access(), errmsg("open_delay_store_file:could not open file \"%s\": %m", path))); + } + + int count = read_delay_ddl_info(fd, info_list, MAX_NUM_PER_FILE * sizeof(DelayDdlInfo), 0); + close(fd); + if (count <= 0) { + return; + } + + for (uint32 i = 0; i < (uint32)(count / sizeof(DelayDdlInfo)); ++i) { + if (XLByteLE(info_list[i].lsn, t_thrd.shemem_ptr_cxt.ControlFile->checkPointCopy.redo)) { + do_delay_ddl(&info_list[i]); + } + RedoInterruptCallBack(); + } + + (void)unlink(path); + pfree(info_list); + RedoInterruptCallBack(); + } + + if (!RecoveryInProgress()) { + g_instance.comm_cxt.predo_cxt.standby_read_delay_ddl_stat.next_index_can_insert = 0; + g_instance.comm_cxt.predo_cxt.standby_read_delay_ddl_stat.next_index_need_unlink = 0; + g_instance.comm_cxt.predo_cxt.standby_read_delay_ddl_stat.delete_stat = 0; + g_instance.comm_cxt.predo_cxt.standby_read_delay_ddl_stat.insert_stat = 0; + } +} \ No newline at end of file diff --git a/src/gausskernel/storage/access/redo/standby_read/standby_read_interface.cpp b/src/gausskernel/storage/access/redo/standby_read/standby_read_interface.cpp index f64492a46b..145fa84a11 100644 --- a/src/gausskernel/storage/access/redo/standby_read/standby_read_interface.cpp +++ b/src/gausskernel/storage/access/redo/standby_read/standby_read_interface.cpp @@ -21,43 +21,54 @@ * ------------------------------------------------------------------------- */ +#include #include #include "access/extreme_rto/page_redo.h" #include "access/extreme_rto/standby_read/block_info_meta.h" #include "access/extreme_rto/standby_read/lsn_info_meta.h" +#include "access/extreme_rto/standby_read/standby_read_delay_ddl.h" #include "access/multi_redo_api.h" #include "pgstat.h" #include "storage/smgr/relfilenode.h" #include "storage/buf/buf_internals.h" #include "storage/buf/bufmgr.h" +#include "../../../page/pageparse.h" #include "storage/smgr/segment.h" #include "utils/rel.h" #include "utils/palloc.h" #include "access/extreme_rto/dispatcher.h" #include "funcapi.h" + const char* EXRTO_BASE_PAGE_SUB_DIR = "base_page"; const char* EXRTO_LSN_INFO_SUB_DIR = "lsn_info_meta"; const char* EXRTO_BLOCK_INFO_SUB_DIR = "block_info_meta"; const char* EXRTO_FILE_SUB_DIR[] = { EXRTO_BASE_PAGE_SUB_DIR, EXRTO_LSN_INFO_SUB_DIR, EXRTO_BLOCK_INFO_SUB_DIR}; const uint32 EXRTO_FILE_PATH_LEN = 1024; +const uint32 XID_THIRTY_TWO = 32; -void make_standby_read_node(XLogRecPtr read_lsn, RelFileNode& read_node) +void make_standby_read_node(XLogRecPtr read_lsn, RelFileNode &read_node, bool is_start_lsn) { read_node.spcNode = (Oid)(read_lsn >> 32); read_node.dbNode = (Oid)(read_lsn); - read_node.relNode = InvalidOid; // make sure it can be InvalidOid or not + read_node.relNode = InvalidOid; // make sure it can be InvalidOid or not read_node.opt = 0; - read_node.bucketNode = InvalidBktId; + if (is_start_lsn) { + /* means read_lsn is the start ptr of xlog */ + read_node.bucketNode = ExrtoReadStartLSNBktId; + } else { + /* means read_lsn is the end ptr of xlog */ + read_node.bucketNode = ExrtoReadEndLSNBktId; + } } -BufferDesc* alloc_standby_read_buf( - const BufferTag& buf_tag, BufferAccessStrategy strategy, bool& found, XLogRecPtr read_lsn) +BufferDesc *alloc_standby_read_buf(const BufferTag &buf_tag, BufferAccessStrategy strategy, bool &found, + XLogRecPtr read_lsn, bool is_start_lsn) { RelFileNode read_node; - make_standby_read_node(read_lsn, read_node); - BufferDesc* buf_desc = BufferAlloc(read_node, 0, buf_tag.forkNum, buf_tag.blockNum, strategy, &found, NULL); + make_standby_read_node(read_lsn, read_node, is_start_lsn); + BufferDesc *buf_desc = BufferAlloc(read_node, 0, buf_tag.forkNum, buf_tag.blockNum, strategy, &found, NULL); return buf_desc; } @@ -87,7 +98,7 @@ Buffer get_newest_page_for_read(Relation reln, ForkNumber fork_num, BlockNumber .blockNum = block_num, }; ResourceOwnerEnlargeBuffers(t_thrd.utils_cxt.CurrentResourceOwner); - BufferDesc* buf_desc = alloc_standby_read_buf(buf_tag, strategy, hit, page_lsn); + BufferDesc* buf_desc = alloc_standby_read_buf(buf_tag, strategy, hit, page_lsn, false); if (hit) { UnlockReleaseBuffer(newest_buf); @@ -125,10 +136,12 @@ Buffer standby_read_buf( .forkNum = fork_num, .blockNum = block_num, }; - XLogRecPtr read_lsn = t_thrd.proc->exrto_read_lsn; - if (read_lsn == InvalidXLogRecPtr) { - Assert(IsSystemRelation(reln)); - read_lsn = MAX_XLOG_REC_PTR; + + XLogRecPtr read_lsn = MAX_XLOG_REC_PTR; + if (u_sess->utils_cxt.CurrentSnapshot != NULL && XLogRecPtrIsValid(u_sess->utils_cxt.CurrentSnapshot->read_lsn)) { + read_lsn = u_sess->utils_cxt.CurrentSnapshot->read_lsn; + } else if (XLogRecPtrIsValid(t_thrd.proc->exrto_read_lsn)) { + read_lsn = t_thrd.proc->exrto_read_lsn; } Buffer read_buf = get_newest_page_for_read(reln, fork_num, block_num, mode, strategy, read_lsn); @@ -142,17 +155,24 @@ Buffer standby_read_buf( StandbyReadLsnInfoArray *lsn_info = &t_thrd.exrto_recycle_cxt.lsn_info; bool result = extreme_rto_standby_read::get_page_lsn_info(buf_tag, strategy, read_lsn, lsn_info); if (!result) { - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - (errmsg("standby_read_buf couldnot found buf %u/%u/%u %d %u read lsn %lu", buf_tag.rnode.spcNode, - buf_tag.rnode.dbNode, buf_tag.rnode.relNode, buf_tag.forkNum, buf_tag.blockNum, read_lsn)))); + ereport( + ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + (errmsg("standby_read_buf couldnot found buf %u/%u/%u %d %u read lsn %08X/%08X current_time: %ld " + "gen_snaptime:%ld thread_read_lsn:%08X/%08X", + buf_tag.rnode.spcNode, buf_tag.rnode.dbNode, buf_tag.rnode.relNode, buf_tag.forkNum, + buf_tag.blockNum, (uint32)(read_lsn >> XID_THIRTY_TWO), (uint32)read_lsn, GetCurrentTimestamp(), + g_instance.comm_cxt.predo_cxt.exrto_snapshot->gen_snap_time, + (uint32)(t_thrd.proc->exrto_read_lsn >> XID_THIRTY_TWO), (uint32)t_thrd.proc->exrto_read_lsn)))); return InvalidBuffer; } // read lsn info XLogRecPtr expected_lsn = InvalidXLogRecPtr; + bool is_start_lsn = true; if (lsn_info->lsn_num == 0) { expected_lsn = lsn_info->base_page_lsn; + is_start_lsn = false; } else { Assert(lsn_info->lsn_array[lsn_info->lsn_num - 1] > 0); Assert(lsn_info->lsn_array[lsn_info->lsn_num - 1] < read_lsn); @@ -160,7 +180,7 @@ Buffer standby_read_buf( expected_lsn = lsn_info->lsn_array[lsn_info->lsn_num - 1]; } - BufferDesc* buf_desc = alloc_standby_read_buf(buf_tag, strategy, hit, expected_lsn); + BufferDesc* buf_desc = alloc_standby_read_buf(buf_tag, strategy, hit, expected_lsn, is_start_lsn); if (hit) { return BufferDescriptorGetBuffer(buf_desc); @@ -243,10 +263,73 @@ void exrto_standby_read_init() if (IS_EXRTO_READ) { make_exrto_file_directory(); } + init_delay_ddl_file(); +} + +bool check_need_drop_buffer(StandbyReadMetaInfo *meta_info, const BufferTag tag) +{ + Assert(meta_info != NULL); + if (!IS_EXRTO_RELFILENODE(tag.rnode)) { + return false; + } + + ExRTOFileType type = exrto_file_type(tag.rnode.spcNode); + if (type == BLOCK_INFO_META) { + return false; + } + + uint32 batch_id = tag.rnode.dbNode >> LOW_WORKERID_BITS; + uint32 worker_id = tag.rnode.dbNode & LOW_WORKERID_MASK; + if (batch_id == meta_info->batch_id && worker_id == meta_info->redo_id) { + uint64 total_block_num = + get_total_block_num(type, tag.rnode.relNode, tag.blockNum); + uint64 recycle_pos = ((type == BASE_PAGE) ? meta_info->base_page_recyle_position + : meta_info->lsn_table_recyle_position); + return (total_block_num < (recycle_pos / BLCKSZ)); + } + + return false; +} + +void buffer_drop_exrto_standby_read_buffers(StandbyReadMetaInfo *meta_info) +{ + bool drop_all = (meta_info == NULL); + if (drop_all) { + ereport(LOG, (errmsg("buffer_drop_exrto_standby_read_buffers: start to drop buffers."))); + } + + int i = 0; + while (i < TOTAL_BUFFER_NUM) { + BufferDesc *buf_desc = GetBufferDescriptor(i); + uint32 buf_state; + bool need_drop = false; + /* + * Some safe unlocked checks can be done to reduce the number of cycle. + */ + if (!IS_EXRTO_RELFILENODE(buf_desc->tag.rnode)) { + i++; + continue; + } + + buf_state = LockBufHdr(buf_desc); + if (drop_all) { + need_drop = IS_EXRTO_RELFILENODE(buf_desc->tag.rnode); + } else { + /* only drop base page and lsn info buffers */ + need_drop = check_need_drop_buffer(meta_info, buf_desc->tag); + } + if (need_drop) { + InvalidateBuffer(buf_desc); /* with buffer head lock released */ + } else { + UnlockBufHdr(buf_desc, buf_state); + } + i++; + } } Datum gs_hot_standby_space_info(PG_FUNCTION_ARGS) { +#ifndef ENABLE_LITE_MODE #define EXRTO_HOT_STANDBY_SPACE_INFO_INFONUM 6 Datum values[EXRTO_HOT_STANDBY_SPACE_INFO_INFONUM]; errno_t rc; @@ -285,8 +368,8 @@ Datum gs_hot_standby_space_info(PG_FUNCTION_ARGS) } for (uint32 i = 0; i < worker_nums; ++i) { - extreme_rto::PageRedoWorker* page_redo_worker = extreme_rto::g_dispatcher->allWorkers[i]; - if (page_redo_worker->role != extreme_rto::REDO_PAGE_WORKER) { + extreme_rto::PageRedoWorker *page_redo_worker = extreme_rto::g_dispatcher->allWorkers[i]; + if (page_redo_worker->role != extreme_rto::REDO_PAGE_WORKER || (page_redo_worker->isUndoSpaceWorker)) { continue; } StandbyReadMetaInfo meta_info = page_redo_worker->standby_read_meta_info; @@ -313,10 +396,10 @@ Datum gs_hot_standby_space_info(PG_FUNCTION_ARGS) basepage_file_size += basepage_file_size_per_thread; } SpinLockRelease(&(g_instance.comm_cxt.predo_cxt.destroy_lock)); - + char block_meta_file_dir[EXRTO_FILE_PATH_LEN]; char block_meta_file_name[EXRTO_FILE_PATH_LEN]; - struct dirent *de = NULL; + struct dirent *de = nullptr; struct stat st; rc = snprintf_s(block_meta_file_dir, EXRTO_FILE_PATH_LEN, EXRTO_FILE_PATH_LEN - 1, "./%s/%s", @@ -347,5 +430,355 @@ Datum gs_hot_standby_space_info(PG_FUNCTION_ARGS) tuple = heap_form_tuple(tupdesc, values, nulls); PG_RETURN_DATUM(HeapTupleGetDatum(tuple)); +#else + FEATURE_ON_LITE_MODE_NOT_SUPPORTED(); + PG_RETURN_TEXT_P(NULL); +#endif } +namespace extreme_rto_standby_read { +typedef struct _DumpLsnInfo { + XLogRecPtr lsn_info_end_lsn; + XLogRecPtr base_page_info_cur_lsn; + std::vector lsn_info_vec; +} DumpLsnInfo; +XLogRecPtr acquire_max_lsn() +{ + if (RecoveryInProgress()) { + ereport(LOG, (errmsg("Can't get local max LSN during recovery in dump."))); + } + XLogRecPtr current_recptr = GetXLogWriteRecPtr(); + return current_recptr; +} + +void exrto_xlog_dump_err_rep(XLogReaderState *xlogreader_state, char *error_msg) +{ + if (xlogreader_state == NULL) { + ereport(LOG, (errmsg("could not read WAL record, xlogreader_state is invalid in dump."))); + return; + } + + if (error_msg != nullptr) { + ereport(LOG, (errmsg("could not read WAL record in dump at %X/%X: %s", + (uint32)(xlogreader_state->ReadRecPtr >> XIDTHIRTYTWO), + (uint32)xlogreader_state->ReadRecPtr, error_msg))); + } else { + ereport(LOG, (errmsg("could not read WAL record in dump at %X/%X", + (uint32)(xlogreader_state->ReadRecPtr >> XIDTHIRTYTWO), + (uint32)xlogreader_state->ReadRecPtr))); + } +} + +void exrto_xlog_dump(char *dump_filename, DumpLsnInfo dump_lsn_info_stru) +{ + XLogRecPtr start_lsn = dump_lsn_info_stru.base_page_info_cur_lsn; + XLogRecPtr end_lsn = dump_lsn_info_stru.lsn_info_end_lsn; + ereport(LOG, (errmsg("start_lsn in dump at %X/%X. end_lsn in dump at %X/%X ", (uint32)(start_lsn >> XIDTHIRTYTWO), + (uint32)start_lsn, (uint32)(end_lsn >> XIDTHIRTYTWO), (uint32)end_lsn))); + /* start reading */ + errno_t rc = EOK; + WalPrivate read_private; + rc = memset_s(&read_private, sizeof(WalPrivate), 0, sizeof(WalPrivate)); + securec_check(rc, "\0", "\0"); + read_private.data_dir = t_thrd.proc_cxt.DataDir; + read_private.tli = 1; + + XLogRecPtr min_lsn = (XLogGetLastRemovedSegno() + 1) * XLogSegSize; + XLogRecPtr max_lsn = acquire_max_lsn(); + if (XLByteLT(start_lsn, min_lsn)) { + start_lsn = min_lsn; + } + if ((max_lsn > start_lsn) && (XLByteLT(max_lsn, end_lsn) || end_lsn == PG_UINT64_MAX)) { + end_lsn = max_lsn; + } + + XLogReaderState *xlogreader_state = XLogReaderAllocate(&SimpleXLogPageRead, &read_private); + if (!xlogreader_state) { + ereport(LOG, (errmsg("memory is temporarily unavailable in dump while allocate xlog reader"))); + } + /* get the first valid xlog record location */ + XLogRecPtr first_record = XLogFindNextRecord(xlogreader_state, start_lsn); + /* if we are recycling or removing log files concurrently, we can't find the next record right after. + * Hence, we need to update the min_lsn */ + if (XLByteEQ(first_record, InvalidXLogRecPtr)) { + ereport(LOG, (errmsg("XLogFindNextRecord in dump: could not find a valid record after %X/%X. Retry.", + (uint32)(start_lsn >> XIDTHIRTYTWO), (uint32)start_lsn))); + bool found = false; + first_record = UpdateNextLSN(start_lsn, end_lsn, xlogreader_state, &found); + if (!found) + ereport(LOG, (errmsg("XLogFindNextRecord in dump: could not find a valid record between %X/%X and %X/%X.", + (uint32)(start_lsn >> XIDTHIRTYTWO), (uint32)start_lsn, + (uint32)(end_lsn >> XIDTHIRTYTWO), (uint32)end_lsn))); + } + XLogRecPtr valid_start_lsn = first_record; + XLogRecPtr valid_end_lsn = valid_start_lsn; + + FILE *output_file = fopen(dump_filename, "a"); + CheckOpenFile(output_file, dump_filename); + char *str_output = (char *)palloc0(MAXOUTPUTLEN * sizeof(char)); + + /* valid first record is not the given one */ + if (!XLByteEQ(first_record, start_lsn) && (start_lsn % XLogSegSize) != 0) { + rc = snprintf_s(str_output + (int)strlen(str_output), MAXOUTPUTLEN, MAXOUTPUTLEN - 1, + "first record is after %X/%X, at %X/%X, skipping over %lu bytes\n", + (uint32)(start_lsn >> XIDTHIRTYTWO), (uint32)start_lsn, + (uint32)(first_record >> XIDTHIRTYTWO), (uint32)first_record, + XLByteDifference(first_record, start_lsn)); + securec_check_ss(rc, "\0", "\0"); + } + CheckWriteFile(output_file, dump_filename, str_output); + pfree_ext(str_output); + + size_t count = 0; + char *error_msg = nullptr; + XLogRecord *record = NULL; + while (xlogreader_state && XLByteLE(xlogreader_state->EndRecPtr, end_lsn)) { + CHECK_FOR_INTERRUPTS(); /* Allow cancel/die interrupts */ + record = XLogReadRecord(xlogreader_state, first_record, &error_msg); + valid_end_lsn = xlogreader_state->EndRecPtr; + if (!record && XLByteLE(valid_end_lsn, end_lsn)) { + /* if we are recycling or removing log files concurrently, and we can't find the next record right + * after. In this case, we try to read from the current oldest xlog file. */ + bool found = false; + XLogRecPtr temp_start_lsn = Max(xlogreader_state->EndRecPtr, start_lsn); + first_record = UpdateNextLSN(temp_start_lsn, end_lsn, xlogreader_state, &found); + if (found) { + ereport(LOG, (errmsg("We cannot read %X/%X. After retried, we jump to read the next available %X/%X. " + "The missing part might be recycled or removed.", + (uint32)(temp_start_lsn >> XIDTHIRTYTWO), (uint32)temp_start_lsn, + (uint32)(first_record >> XIDTHIRTYTWO), (uint32)first_record))); + continue; + } + exrto_xlog_dump_err_rep(xlogreader_state, error_msg); + break; + } else if (!record && !XLByteLT(valid_end_lsn, end_lsn)) { + error_msg = nullptr; + exrto_xlog_dump_err_rep(xlogreader_state, error_msg); + break; + } + + str_output = (char *)palloc(MAXOUTPUTLEN * sizeof(char)); + rc = memset_s(str_output, MAXOUTPUTLEN, 0, MAXOUTPUTLEN); + securec_check(rc, "\0", "\0"); + XLogDumpDisplayRecord(xlogreader_state, str_output); + count++; + + CheckWriteFile(output_file, dump_filename, str_output); + pfree_ext(str_output); + if (count >= dump_lsn_info_stru.lsn_info_vec.size()) { + break; + } + first_record = dump_lsn_info_stru.lsn_info_vec.at(count); + } + XLogReaderFree(xlogreader_state); + str_output = (char *)palloc(MAXOUTPUTLEN * sizeof(char)); + rc = memset_s(str_output, MAXOUTPUTLEN, 0, MAXOUTPUTLEN); + securec_check(rc, "\0", "\0"); + + /* Summary(xx total): valid start_lsn: xxx, valid end_lsn: xxx */ + rc = snprintf_s(str_output + (int)strlen(str_output), MAXOUTPUTLEN, MAXOUTPUTLEN - 1, + "\nSummary (%zu total): valid start_lsn: %X/%X, valid end_lsn: %X/%X\n", count, + (uint32)(valid_start_lsn >> XIDTHIRTYTWO), (uint32)(valid_start_lsn), + (uint32)(valid_end_lsn >> XIDTHIRTYTWO), (uint32)(valid_end_lsn)); + securec_check_ss(rc, "\0", "\0"); + CheckWriteFile(output_file, dump_filename, str_output); + CheckCloseFile(output_file, dump_filename, true); + pfree_ext(str_output); + CloseXlogFile(); +} + +void dump_base_page_info(char *str_output, BasePageInfo base_page_info) +{ + errno_t rc = EOK; + rc = snprintf_s(str_output + (int)strlen(str_output), MAXOUTPUTLEN, MAXOUTPUTLEN - 1, + "Base_page_info: cur_page_lsn=%lu, read_lsn=%lu, spcNode=%u, dbNode=%u, relNode=%u, bucketnode=%d, " + "fork_num=%d, block_num=%u, next_base_page_lsn=%lu, base_page_position=%lu, pre=%lu, next=%lu\n", + base_page_info->cur_page_lsn, t_thrd.proc->exrto_read_lsn, base_page_info->relfilenode.spcNode, + base_page_info->relfilenode.dbNode, base_page_info->relfilenode.relNode, + base_page_info->relfilenode.bucketNode, base_page_info->fork_num, base_page_info->block_num, + base_page_info->next_base_page_lsn, base_page_info->base_page_position, + base_page_info->base_page_list.prev, base_page_info->base_page_list.next); + securec_check_ss(rc, "\0", "\0"); +} + +void dump_lsn_info(char *str_output, const BasePageInfo base_page_info, DumpLsnInfo &dump_lsn_info_stru, + const BufferTag& buf_tag, Buffer buffer) +{ + LsnInfo lsn_info = &base_page_info->lsn_info_node; + LsnInfoPosition next_lsn_info_pos; + bool find_front = false; + dump_lsn_info_stru.base_page_info_cur_lsn = base_page_info->cur_page_lsn; + dump_lsn_info_stru.lsn_info_end_lsn = dump_lsn_info_stru.base_page_info_cur_lsn; + + uint32 batch_id; + uint32 worker_id; + + extreme_rto::RedoItemTag redo_item_tag; + INIT_REDO_ITEM_TAG(redo_item_tag, buf_tag.rnode, buf_tag.forkNum, buf_tag.blockNum); + batch_id = extreme_rto::GetSlotId(buf_tag.rnode, 0, 0, (uint32)extreme_rto::get_batch_redo_num()) + 1; + worker_id = extreme_rto::GetWorkerId(&redo_item_tag, extreme_rto::get_page_redo_worker_num_per_manager()) + 1; + + do { + errno_t rc = EOK; + rc = snprintf_s(str_output + (int)strlen(str_output), MAXOUTPUTLEN, MAXOUTPUTLEN - 1, + "Lsn_info: pre=%lu, next=%lu, flags=%u, type=%u, used=%u\n", + lsn_info->lsn_list.prev, lsn_info->lsn_list.next, lsn_info->flags, lsn_info->type, + lsn_info->used); + securec_check_ss(rc, "\0", "\0"); + for (uint16 i = 0; i < lsn_info->used; ++i) { + rc = snprintf_s(str_output + (int)strlen(str_output), MAXOUTPUTLEN, MAXOUTPUTLEN - 1, + "lsn_num is %lu\n", lsn_info->lsn[i]); + securec_check_ss(rc, "\0", "\0"); + dump_lsn_info_stru.lsn_info_vec.push_back(lsn_info->lsn[i]); + if (!find_front) { + dump_lsn_info_stru.base_page_info_cur_lsn = lsn_info->lsn[i]; + find_front = true; + } + dump_lsn_info_stru.lsn_info_end_lsn = lsn_info->lsn[i]; + } + next_lsn_info_pos = lsn_info->lsn_list.next; + UnlockReleaseBuffer(buffer); + /* reach the end of the list */ + if (next_lsn_info_pos == LSN_INFO_LIST_HEAD) { + break; + } + + Page page = get_lsn_info_page(batch_id, worker_id, next_lsn_info_pos, RBM_NORMAL, &buffer); + if (unlikely(page == NULL || buffer == InvalidBuffer)) { + ereport(LOG, (errmsg(EXRTOFORMAT("get_lsn_info_page failed, batch_id: %u, redo_id: %u, pos: %lu"), batch_id, + worker_id, next_lsn_info_pos))); + break; + } + LockBuffer(buffer, BUFFER_LOCK_SHARE); + + uint32 offset = lsn_info_postion_to_offset(next_lsn_info_pos); + lsn_info = (LsnInfo)(page + offset); + } while (true); +} + +// dump all version of basepage info lsn info +void dump_base_page_info_lsn_info(const BufferTag &buf_tag, LsnInfoPosition head_lsn_base_page_pos, char *str_output, + DumpLsnInfo &dump_lsn_info_stru) +{ + uint32 batch_id; + uint32 worker_id; + BasePageInfo base_page_info = NULL; + Buffer buffer; + + extreme_rto::RedoItemTag redo_item_tag; + INIT_REDO_ITEM_TAG(redo_item_tag, buf_tag.rnode, buf_tag.forkNum, buf_tag.blockNum); + batch_id = extreme_rto::GetSlotId(buf_tag.rnode, 0, 0, (uint32)extreme_rto::get_batch_redo_num()) + 1; + worker_id = extreme_rto::GetWorkerId(&redo_item_tag, extreme_rto::get_page_redo_worker_num_per_manager()) + 1; + + /* find fisrt base page whose lsn less than read lsn form tail to head */ + do { + /* reach the end of the list */ + if (INFO_POSITION_IS_INVALID(head_lsn_base_page_pos)) { + ereport(LOG, (errmsg("can not find base page, block is %u/%u/%u %d %u, batch_id: %u, redo_worker_id: %u", + buf_tag.rnode.spcNode, buf_tag.rnode.dbNode, buf_tag.rnode.relNode, buf_tag.forkNum, + buf_tag.blockNum, batch_id, worker_id))); + break; + } + buffer = InvalidBuffer; + Page page = get_lsn_info_page(batch_id, worker_id, head_lsn_base_page_pos, RBM_NORMAL, &buffer); + if (unlikely(page == NULL || buffer == InvalidBuffer)) { + ereport(LOG, (errmsg(EXRTOFORMAT("get_lsn_info_page failed, batch_id: %u, redo_id: %u, pos: %lu"), batch_id, + worker_id, head_lsn_base_page_pos))); + break; + } + LockBuffer(buffer, BUFFER_LOCK_SHARE); + + uint32 offset = lsn_info_postion_to_offset(head_lsn_base_page_pos); + base_page_info = (BasePageInfo)(page + offset); + + if (is_base_page_type(base_page_info->lsn_info_node.type) == false) { + UnlockReleaseBuffer(buffer); + break; + } + + Buffer base_page_datum_buffer = + buffer_read_base_page(batch_id, worker_id, base_page_info->base_page_position, RBM_NORMAL); + LockBuffer(base_page_datum_buffer, BUFFER_LOCK_SHARE); + + UnlockReleaseBuffer(base_page_datum_buffer); + + dump_base_page_info(str_output, base_page_info); // print info + dump_lsn_info(str_output, base_page_info, dump_lsn_info_stru, buf_tag, buffer); + head_lsn_base_page_pos = base_page_info->base_page_list.prev; + } while (true); +} + +void dump_current_lsn(char *str_output) +{ + errno_t rc = snprintf_s(str_output + (int)strlen(str_output), MAXOUTPUTLEN, MAXOUTPUTLEN - 1, + "current redo position %lu, cache lsn: %lu\n", + g_instance.comm_cxt.predo_cxt.redoPf.last_replayed_end_ptr, t_thrd.proc->exrto_read_lsn); + securec_check_ss(rc, "\0", "\0"); +} + +void dump_one_block_info(char *str_output, BlockMetaInfo* block_meta_info) +{ + errno_t rc = snprintf_s(str_output + (int)strlen(str_output), MAXOUTPUTLEN, MAXOUTPUTLEN - 1, + "Block info meta info: timeline=%u, record_num=%u, min_lsn=%lu, max_lsn=%lu, " + "lsn_info_list prev=%lu, next=%lu, base_page_info_list prev=%lu, next=%lu\n", + block_meta_info->timeline, block_meta_info->record_num, block_meta_info->min_lsn, + block_meta_info->max_lsn, block_meta_info->lsn_info_list.prev, + block_meta_info->lsn_info_list.next, block_meta_info->base_page_info_list.prev, + block_meta_info->base_page_info_list.next); + securec_check_ss(rc, "\0", "\0"); +} + +void dump_error_all_info(const RelFileNode &rnode, ForkNumber forknum, BlockNumber blocknum) +{ + if (!IS_EXRTO_STANDBY_READ) { + return; + } + buffer_in_progress_pop(); + BufferTag buf_tag; + INIT_BUFFERTAG(buf_tag, rnode, forknum, blocknum); + + Buffer buf; + BlockMetaInfo *block_meta_info = get_block_meta_info_by_relfilenode(buf_tag, NULL, RBM_NORMAL, &buf, true); + if (block_meta_info == NULL) { + ereport(LOG, + (errmsg("can not find block meta info by given buftag. rnode is %u/%u/%u %d %u", buf_tag.rnode.spcNode, + buf_tag.rnode.dbNode, buf_tag.rnode.relNode, buf_tag.forkNum, buf_tag.blockNum))); + buffer_in_progress_push(); + return; // it's more likely we cannot get block info than cannot alloc file, so put this first for performance. + } + + char *str_output = (char *)palloc0(MAXOUTPUTLEN * sizeof(char)); + char *dump_filename = (char *)palloc0(MAXFILENAME * sizeof(char)); + errno_t rc = snprintf_s(dump_filename + (int)strlen(dump_filename), MAXFILENAME, MAXFILENAME - 1, + "%s/%u_%u_%u_%d_%d.lsnblockinfo_dump", t_thrd.proc_cxt.DataDir, rnode.spcNode, rnode.dbNode, rnode.relNode, + forknum, blocknum); + securec_check_ss(rc, "\0", "\0"); + FILE *dump_file = AllocateFile(dump_filename, PG_BINARY_W); + if (dump_file == NULL) { + ereport(LOG, (errmsg("can not alloc file. rnode is %u/%u/%u %d %u", buf_tag.rnode.spcNode, + buf_tag.rnode.dbNode, buf_tag.rnode.relNode, buf_tag.forkNum, buf_tag.blockNum))); + pfree_ext(str_output); + pfree_ext(dump_filename); + buffer_in_progress_push(); + return; + } + + dump_current_lsn(str_output); + dump_one_block_info(str_output, block_meta_info); + + DumpLsnInfo dump_lsn_info_stru; + dump_lsn_info_stru.lsn_info_end_lsn = PG_UINT64_MAX; + dump_base_page_info_lsn_info(buf_tag, block_meta_info->base_page_info_list.prev, str_output, dump_lsn_info_stru); + UnlockReleaseBuffer(buf); // buf was automatically locked by getting block meta info, so we need release + + uint result = fwrite(str_output, 1, strlen(str_output), dump_file); + if (result != strlen(str_output)) { + ereport(ERROR, (errcode(ERRCODE_FILE_WRITE_FAILED), errmsg("Cannot write into file %s!", dump_filename))); + } + pfree_ext(str_output); + (void)FreeFile(dump_file); + exrto_xlog_dump(dump_filename, dump_lsn_info_stru); + pfree_ext(dump_filename); + buffer_in_progress_push(); +} +} diff --git a/src/gausskernel/storage/access/spgist/spgxlog.cpp b/src/gausskernel/storage/access/spgist/spgxlog.cpp index 8f3aa009c4..b438d14b08 100755 --- a/src/gausskernel/storage/access/spgist/spgxlog.cpp +++ b/src/gausskernel/storage/access/spgist/spgxlog.cpp @@ -497,7 +497,7 @@ static void spgRedoVacuumRedirect(XLogReaderState *record) * If any redirection tuples are being removed, make sure there are no * live Hot Standby transactions that might need to see them. */ - if (InHotStandby && g_supportHotStandby) { + if (InHotStandby && g_supportHotStandby && !IS_EXRTO_READ) { spgxlogVacuumRedirect *xldata = (spgxlogVacuumRedirect *)ptr; if (TransactionIdIsValid(xldata->newestRedirectXid)) { RelFileNode node; diff --git a/src/gausskernel/storage/access/transam/clog.cpp b/src/gausskernel/storage/access/transam/clog.cpp index a0a050d93d..0decff72fc 100644 --- a/src/gausskernel/storage/access/transam/clog.cpp +++ b/src/gausskernel/storage/access/transam/clog.cpp @@ -1130,6 +1130,7 @@ void clog_redo(XLogReaderState *record) Assert(!ClogCtl(pageno)->shared->page_dirty[slotno]); LWLockRelease(ClogCtl(pageno)->shared->control_lock); + g_instance.comm_cxt.predo_cxt.max_clog_pageno = pageno; } else if (info == CLOG_TRUNCATE) { int64 pageno; diff --git a/src/gausskernel/storage/access/transam/extreme_rto/dispatcher.cpp b/src/gausskernel/storage/access/transam/extreme_rto/dispatcher.cpp index 61e853c849..d24d2fdbdd 100755 --- a/src/gausskernel/storage/access/transam/extreme_rto/dispatcher.cpp +++ b/src/gausskernel/storage/access/transam/extreme_rto/dispatcher.cpp @@ -118,6 +118,7 @@ static void SSDestroyRecoveryWorkers(); static void DispatchRecordWithPages(XLogReaderState *, List *); static void DispatchRecordWithoutPage(XLogReaderState *, List *); static void DispatchTxnRecord(XLogReaderState *, List *); +void dispatch_record_to_all_thread(XLogReaderState *record, List *expected_tlis); static void StartPageRedoWorkers(uint32); static void StopRecoveryWorkers(int, Datum); static bool StandbyWillChangeStandbyState(const XLogReaderState *); @@ -639,6 +640,8 @@ void SendSingalToPageWorker(int signal) for (uint32 i = 0; i < g_instance.comm_cxt.predo_cxt.totalNum; ++i) { uint32 state = pg_atomic_read_u32(&(g_instance.comm_cxt.predo_cxt.pageRedoThreadStatusList[i].threadState)); if (state == PAGE_REDO_WORKER_READY) { + ereport(LOG, (errmsg("Dispatch start to kill(pid %lu, signal %d)", + g_instance.comm_cxt.predo_cxt.pageRedoThreadStatusList[i].threadId, signal))); int err = gs_signal_send(g_instance.comm_cxt.predo_cxt.pageRedoThreadStatusList[i].threadId, signal); if (0 != err) { ereport(WARNING, (errmsg("Dispatch kill(pid %lu, signal %d) failed: \"%s\",", @@ -675,7 +678,7 @@ static void StopRecoveryWorkers(int code, Datum arg) if ((count & OUTPUT_WAIT_COUNT) == OUTPUT_WAIT_COUNT) { ereport(WARNING, (errmodule(MOD_REDO), errcode(ERRCODE_LOG), errmsg("StopRecoveryWorkers wait page work exit"))); - if ((count & PRINT_ALL_WAIT_COUNT) == PRINT_ALL_WAIT_COUNT) { + if ((count & STOP_WORKERS_WAIT_COUNT) == STOP_WORKERS_WAIT_COUNT) { DumpDispatcher(); ereport(PANIC, (errmodule(MOD_REDO), errcode(ERRCODE_LOG), errmsg("StopRecoveryWorkers wait too long!!!"))); @@ -700,8 +703,8 @@ static void StopRecoveryWorkers(int code, Datum arg) /* Run from the dispatcher thread. */ static void DestroyRecoveryWorkers() { + SpinLockAcquire(&(g_instance.comm_cxt.predo_cxt.destroy_lock)); if (g_dispatcher != NULL) { - SpinLockAcquire(&(g_instance.comm_cxt.predo_cxt.destroy_lock)); for (uint32 i = 0; i < g_dispatcher->pageLineNum; i++) { DestroyPageRedoWorker(g_dispatcher->pageLines[i].batchThd); DestroyPageRedoWorker(g_dispatcher->pageLines[i].managerThd); @@ -734,8 +737,8 @@ static void DestroyRecoveryWorkers() g_instance.comm_cxt.predo_cxt.parallelRedoCtx = NULL; } g_dispatcher = NULL; - SpinLockRelease(&(g_instance.comm_cxt.predo_cxt.destroy_lock)); } + SpinLockRelease(&(g_instance.comm_cxt.predo_cxt.destroy_lock)); } static void SSDestroyRecoveryWorkers() @@ -895,7 +898,8 @@ static void DispatchSyncTxnRecord(XLogReaderState *record, List *expectedTLIs) RedoItem *item = GetRedoItemPtr(record); ReferenceRedoItem(item); - if ((g_dispatcher->chosedPLCnt != 1) && (XLogRecGetRmid(&item->record) != RM_XACT_ID)) { + if ((g_dispatcher->chosedPLCnt != 1) && (XLogRecGetRmid(&item->record) != RM_XACT_ID) && + !(IsSmgrTruncate(&item->record))) { ereport(WARNING, (errmodule(MOD_REDO), errcode(ERRCODE_LOG), errmsg("[REDO_LOG_TRACE]DispatchSyncTxnRecord maybe some error:rmgrID:%u, info:%u, workerCount:%u", @@ -1025,6 +1029,8 @@ static bool DispatchXLogRecord(XLogReaderState *record, List *expectedTLIs, Time } else if ((info == XLOG_FPI) || (info == XLOG_FPI_FOR_HINT)) { DispatchRecordWithPages(record, expectedTLIs); + } else if (info == XLOG_BACKUP_END) { + dispatch_record_to_all_thread(record, expectedTLIs); } else { /* process in trxn thread and need to sync to other pagerredo thread */ DispatchTxnRecord(record, expectedTLIs); @@ -1033,6 +1039,17 @@ static bool DispatchXLogRecord(XLogReaderState *record, List *expectedTLIs, Time return isNeedFullSync; } +void dispatch_record_to_all_thread(XLogReaderState *record, List *expected_tlis) +{ + RedoItem *item = GetRedoItemPtr(record); + ReferenceRedoItem(item); + for (uint32 i = 0; i < g_dispatcher->pageLineNum; i++) { + ReferenceRedoItem(item); + AddPageRedoItem(g_dispatcher->pageLines[i].batchThd, item); + } + AddTxnRedoItem(g_dispatcher->trxnLine.managerThd, item); +} + /* Run from the dispatcher thread. */ static bool DispatchRelMapRecord(XLogReaderState *record, List *expectedTLIs, TimestampTz recordXTime) { @@ -1044,7 +1061,7 @@ static bool DispatchRelMapRecord(XLogReaderState *record, List *expectedTLIs, Ti /* Run from the dispatcher thread. */ static bool DispatchXactRecord(XLogReaderState *record, List *expectedTLIs, TimestampTz recordXTime) { - if (XactWillRemoveRelFiles(record)) { + if (xact_has_invalid_msg_or_delete_file(record)) { bool hasSegpageRelFile = XactHasSegpageRelFiles(record); uint32 doneFlag = 0; @@ -1207,15 +1224,9 @@ static bool DispatchSmgrRecord(XLogReaderState *record, List *expectedTLIs, Time RelFileNodeCopy(rnode, xlrec->rnode, XLogRecGetBucketId(record)); rnode.opt = GetCreateXlogFileNodeOpt(record); DispatchToOnePageWorker(record, rnode, expectedTLIs); - } else if (IsSmgrTruncate(record)) { - xl_smgr_truncate *xlrec = (xl_smgr_truncate *)XLogRecGetData(record); - RelFileNode rnode; - RelFileNodeCopy(rnode, xlrec->rnode, XLogRecGetBucketId(record)); - rnode.opt = GetTruncateXlogFileNodeOpt(record); - uint32 id = GetSlotId(rnode, 0, 0, GetBatchCount()); - AddSlotToPLSet(id); - - DispatchToSpecPageWorker(record, expectedTLIs); + } else if (info == XLOG_SMGR_TRUNCATE) { + record->isFullSync = true; + dispatch_record_to_all_thread(record, expectedTLIs); } return isNeedFullSync; diff --git a/src/gausskernel/storage/access/transam/extreme_rto/exrto_recycle.cpp b/src/gausskernel/storage/access/transam/extreme_rto/exrto_recycle.cpp index 30f94c2a3d..18c47d565c 100644 --- a/src/gausskernel/storage/access/transam/extreme_rto/exrto_recycle.cpp +++ b/src/gausskernel/storage/access/transam/extreme_rto/exrto_recycle.cpp @@ -26,6 +26,8 @@ #include "access/extreme_rto/page_redo.h" #include "access/extreme_rto/dispatcher.h" #include "access/extreme_rto/standby_read/lsn_info_meta.h" +#include "access/extreme_rto/standby_read.h" +#include "access/extreme_rto/standby_read/standby_read_delay_ddl.h" #include "access/multi_redo_api.h" #include "storage/ipc.h" #include "storage/smgr/smgr.h" @@ -103,12 +105,17 @@ bool check_if_need_force_recycle() PageRedoWorker** workers = g_dispatcher->allWorkers; int64 total_base_page_size = 0; int64 total_lsn_info_size = 0; - double ratio = g_instance.attr.attr_storage.standby_force_recyle_ratio; + double ratio = g_instance.attr.attr_storage.standby_force_recycle_ratio; + + // if standby_force_recyle_ratio is 0, the system does not recyle file. + if (ratio == 0) { + return false; + } for (uint32 i = 0; i < worker_nums; ++i) { PageRedoWorker* page_redo_worker = workers[i]; StandbyReadMetaInfo meta_info = page_redo_worker->standby_read_meta_info; - if (page_redo_worker->role != REDO_PAGE_WORKER) { + if (page_redo_worker->role != REDO_PAGE_WORKER || (page_redo_worker->isUndoSpaceWorker)) { continue; } total_base_page_size += (meta_info.base_page_next_position - meta_info.base_page_recyle_position); @@ -130,7 +137,7 @@ void do_standby_read_recyle(XLogRecPtr recycle_lsn) XLogRecPtr min_recycle_lsn = InvalidXLogRecPtr; for (uint32 i = 0; i < worker_nums; ++i) { PageRedoWorker* page_redo_worker = workers[i]; - if (page_redo_worker->role != REDO_PAGE_WORKER) { + if (page_redo_worker->role != REDO_PAGE_WORKER || (page_redo_worker->isUndoSpaceWorker)) { continue; } extreme_rto_standby_read::standby_read_recyle_per_workers(&page_redo_worker->standby_read_meta_info, recycle_lsn); @@ -145,6 +152,7 @@ void do_standby_read_recyle(XLogRecPtr recycle_lsn) (errmsg(EXRTOFORMAT("[exrto_recycle] update global recycle lsn: %08X/%08X"), (uint32)(min_recycle_lsn >> UINT64_HALF), (uint32)min_recycle_lsn))); } + delete_by_lsn(recycle_lsn); } void exrto_recycle_interrupt() @@ -197,6 +205,7 @@ void exrto_recycle_main() ereport(LOG, (errmsg("exrto recycle: standby_read_old dir not exist"))); } + do_all_old_delay_ddl(); if (!IS_EXRTO_READ || !RecoveryInProgress()) { ereport(LOG, (errmsg("exrto recycle is available only when exrto standby read is supported"))); diff --git a/src/gausskernel/storage/access/transam/extreme_rto/page_redo.cpp b/src/gausskernel/storage/access/transam/extreme_rto/page_redo.cpp index 24f8a54f04..7d5a19da63 100755 --- a/src/gausskernel/storage/access/transam/extreme_rto/page_redo.cpp +++ b/src/gausskernel/storage/access/transam/extreme_rto/page_redo.cpp @@ -23,7 +23,7 @@ * an idle worker to "steal" work from a busy worker. * * IDENTIFICATION - * src/gausskernel/storage/access/transam/parallel_recovery/page_redo.cpp + * src/gausskernel/storage/access/transam/extreme_rto/page_redo.cpp * * ------------------------------------------------------------------------- */ @@ -41,6 +41,7 @@ #include "access/xlogutils.h" #include "access/xlogproc.h" #include "access/nbtree.h" +#include "access/multi_redo_api.h" #include "catalog/storage_xlog.h" #include "gssignal/gs_signal.h" #include "libpq/pqsignal.h" @@ -74,6 +75,7 @@ #include "replication/datareceiver.h" #include "pgxc/barrier.h" #include "storage/file/fio_device.h" +#include "utils/timestamp.h" #ifdef ENABLE_MOT #include "storage/mot/mot_fdw.h" #endif @@ -98,6 +100,7 @@ namespace extreme_rto { static const int MAX_PARSE_BUFF_NUM = PAGE_WORK_QUEUE_SIZE * 10 * 3; static const int MAX_LOCAL_BUFF_NUM = PAGE_WORK_QUEUE_SIZE * 10 * 3; +static const int MAX_CLEAR_SMGR_NUM = 100000; static const char *const PROCESS_TYPE_CMD_ARG = "--forkpageredo"; static char g_AUXILIARY_TYPE_CMD_ARG[16] = {0}; @@ -350,7 +353,7 @@ void HandlePageRedoPageRepair(RepairBlockKey key, XLogPhyBlock pblk) RecordBadBlockAndPushToRemote(g_redoWorker->curRedoBlockState, CRC_CHECK_FAIL, InvalidXLogRecPtr, pblk); } -void HandlePageRedoInterrupts() +void HandlePageRedoInterruptsImpl(uint64 clearRedoFdCountInc = 1) { if (t_thrd.page_redo_cxt.got_SIGHUP) { t_thrd.page_redo_cxt.got_SIGHUP = false; @@ -371,6 +374,23 @@ void HandlePageRedoInterrupts() proc_exit(1); } + + static uint64 clearRedoFdCount = 0; + const uint64 clearRedoFdCountMask = 0x7FFFFFF; + clearRedoFdCount += clearRedoFdCountInc; + if (clearRedoFdCount > clearRedoFdCountMask && GetSMgrRelationHash() != NULL && + (g_redoWorker->role == REDO_PAGE_WORKER || g_redoWorker->role == REDO_PAGE_MNG)) { + clearRedoFdCount = 0; + long hash_num = hash_get_num_entries(GetSMgrRelationHash()); + if (hash_num >= MAX_CLEAR_SMGR_NUM) { + smgrcloseall(); + } + } +} + +void HandlePageRedoInterrupts() +{ + HandlePageRedoInterruptsImpl(); } void ReferenceRedoItem(void *item) @@ -796,7 +816,16 @@ void RedoPageManagerDoSmgrAction(XLogRecParseState *recordblockstate) (void)MemoryContextSwitchTo(oldCtx); recordblockstate->nextrecord = NULL; + bool need_wait = recordblockstate->isFullSync; + if (need_wait) { + pg_atomic_write_u32(&g_redoWorker->fullSyncFlag, 1); + } XLogBlockParseStateRelease(recordblockstate); + uint32 val = pg_atomic_read_u32(&g_redoWorker->fullSyncFlag); + while (val != 0) { + RedoInterruptCallBack(); + val = pg_atomic_read_u32(&g_redoWorker->fullSyncFlag); + } } void RedoPageManagerDoDataTypeAction(XLogRecParseState *parsestate) @@ -872,6 +901,19 @@ void PageManagerProcCheckPoint(XLogRecParseState *parseState) } } +void page_manager_proc_common_type(XLogRecParseState *parse_state) +{ + if (IsCheckPoint(parse_state)) { + PageManagerProcCheckPoint(parse_state); + } else if (is_backup_end(parse_state)) { + RedoPageManagerDistributeToAllOneBlock(parse_state); + XLogBlockParseStateRelease(parse_state); + } else { + Assert(0); + XLogBlockParseStateRelease(parse_state); + } +} + void PageManagerProcCreateTableSpace(XLogRecParseState *parseState) { bool needWait = parseState->isFullSync; @@ -970,7 +1012,7 @@ void PageManagerRedoParseState(XLogRecParseState *preState) CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_7]); break; case BLOCK_DATA_XLOG_COMMON_TYPE: - PageManagerProcCheckPoint(preState); + page_manager_proc_common_type(preState); break; case BLOCK_DATA_NEWCU_TYPE: PageManagerDistributeBcmBlock(preState); @@ -987,6 +1029,10 @@ void PageManagerRedoParseState(XLogRecParseState *preState) RedoPageManagerDistributeToAllOneBlock(preState); WaitNextBarrier(preState); break; + case BLOCK_DATA_XACTDATA_TYPE: + RedoPageManagerDistributeToAllOneBlock(preState); + XLogBlockParseStateRelease(preState); + break; default: XLogBlockParseStateRelease(preState); break; @@ -998,10 +1044,10 @@ bool PageManagerRedoDistributeItems(XLogRecParseState *record_block_state) if (record_block_state == (void *)&g_redoEndMark) { return true; } else if (record_block_state == (void *)&g_GlobalLsnForwarder) { - PageManagerProcLsnForwarder((RedoItem *) record_block_state); + PageManagerProcLsnForwarder((RedoItem *)record_block_state); return false; } else if (record_block_state == (void *)&g_cleanupMark) { - PageManagerProcCleanupMark((RedoItem *) record_block_state); + PageManagerProcCleanupMark((RedoItem *)record_block_state); return false; } else if (record_block_state == (void *)&g_cleanInvalidPageMark) { forget_range_invalid_pages((void *)record_block_state); @@ -1030,28 +1076,34 @@ bool PageManagerRedoDistributeItems(XLogRecParseState *record_block_state) void RedoPageManagerMain() { - XLogRecParseState *record_block_state; - bool is_end; + XLogRecParseState *record_block_state = NULL; (void)RegisterRedoInterruptCallBack(HandlePageRedoInterrupts); XLogParseBufferInitFunc(&(g_redoWorker->parseManager), MAX_PARSE_BUFF_NUM, &recordRefOperate, RedoInterruptCallBack); GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_1]); - while (true) { - if (!SPSCBlockingQueueIsEmpty(g_redoWorker->queue)) { - record_block_state = (XLogRecParseState *)SPSCBlockingQueueTake(g_redoWorker->queue); - CountAndGetRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_1], g_redoWorker->timeCostList[TIME_COST_STEP_2]); - is_end = PageManagerRedoDistributeItems(record_block_state); - CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_2]); - if (is_end) - break; - } + while ((record_block_state = (XLogRecParseState *)SPSCBlockingQueueTop(g_redoWorker->queue)) != + (XLogRecParseState *)&g_redoEndMark) { + ErrorContextCallback err_context; + err_context.callback = rm_redo_error_callback; + err_context.arg = (void *)record_block_state->refrecord; + err_context.previous = t_thrd.log_cxt.error_context_stack; + t_thrd.log_cxt.error_context_stack = &err_context; + + CountAndGetRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_1], + g_redoWorker->timeCostList[TIME_COST_STEP_2]); + PageManagerRedoDistributeItems(record_block_state); + SPSCBlockingQueuePop(g_redoWorker->queue); + CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_2]); RedoInterruptCallBack(); ADD_ABNORMAL_POSITION(5); GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_1]); + + t_thrd.log_cxt.error_context_stack = err_context.previous; } + SPSCBlockingQueuePop(g_redoWorker->queue); RedoThrdWaitForExit(g_redoWorker); XLogParseBufferDestoryFunc(&(g_redoWorker->parseManager)); } @@ -1095,8 +1147,9 @@ bool TrxnManagerDistributeItemsBeforeEnd(RedoItem *item) } else { GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_4]); if (IsCheckPoint(&item->record) || IsTableSpaceDrop(&item->record) || IsTableSpaceCreate(&item->record) || - (IsXactXlog(&item->record) && XactWillRemoveRelFiles(&item->record)) || IsBarrierRelated(&item->record) || - IsDataBaseDrop(&item->record) || IsDataBaseCreate(&item->record)) { + (IsXactXlog(&item->record) && xact_has_invalid_msg_or_delete_file(&item->record)) || + IsBarrierRelated(&item->record) || IsDataBaseDrop(&item->record) || IsDataBaseCreate(&item->record) || + IsSmgrTruncate(&item->record)) { uint32 relCount; do { RedoInterruptCallBack(); @@ -1262,6 +1315,10 @@ void TrxnWorkMain() forget_range_invalid_pages((void *)item); SPSCBlockingQueuePop(g_redoWorker->queue); } else { + if (IsSmgrTruncate(&item->record)) { + // need generate a new snapshot before truncate, and lsn is larger than the actual value + exrto_generate_snapshot(item->record.EndRecPtr); + } t_thrd.xlog_cxt.needImmediateCkp = item->needImmediateCheckpoint; bool fullSync = item->record.isFullSync; GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_3]); @@ -1278,7 +1335,7 @@ void TrxnWorkMain() } if (IsCheckPoint(&item->record) || (IsXactXlog(&item->record) && - XactWillRemoveRelFiles(&item->record)) || IsBarrierRelated(&item->record) || + xact_has_invalid_msg_or_delete_file(&item->record)) || IsBarrierRelated(&item->record) || IsDataBaseDrop(&item->record)) { exrto_generate_snapshot(g_redoWorker->lastReplayedEndRecPtr); } @@ -1394,6 +1451,13 @@ void RedoPageWorkerRedoBcmBlock(XLogRecParseState *procState) } } +void redo_page_worker_proc_common_record(XLogRecParseState *stat) +{ + if (IsCheckPoint(stat)) { + RedoPageWorkerCheckPoint(stat); + } +} + void RedoPageWorkerMain() { (void)RegisterRedoInterruptCallBack(HandlePageRedoInterrupts); @@ -1437,6 +1501,12 @@ void RedoPageWorkerMain() g_redoWorker->curRedoBlockState = (XLogBlockDataParse*)(&redoblockstate->blockparse.extra_rec); procState = (XLogRecParseState *)procState->nextrecord; + ErrorContextCallback err_context; + err_context.callback = rm_redo_error_callback; + err_context.arg = (void *)redoblockstate->refrecord; + err_context.previous = t_thrd.log_cxt.error_context_stack; + t_thrd.log_cxt.error_context_stack = &err_context; + switch (XLogBlockHeadGetValidInfo(&redoblockstate->blockparse.blockhead)) { case BLOCK_DATA_MAIN_DATA_TYPE: case BLOCK_DATA_UNDO_TYPE: @@ -1449,7 +1519,7 @@ void RedoPageWorkerMain() break; case BLOCK_DATA_XLOG_COMMON_TYPE: GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_6]); - RedoPageWorkerCheckPoint(redoblockstate); + redo_page_worker_proc_common_record(redoblockstate); SetCompletedReadEndPtr(g_redoWorker, redoblockstate->blockparse.blockhead.start_ptr, redoblockstate->blockparse.blockhead.end_ptr); CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_6]); @@ -1492,9 +1562,16 @@ void RedoPageWorkerMain() redoblockstate->blockparse.blockhead.end_ptr); CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_6]); break; + case BLOCK_DATA_XACTDATA_TYPE: + GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_6]); + SetCompletedReadEndPtr(g_redoWorker, redoblockstate->blockparse.blockhead.start_ptr, + redoblockstate->blockparse.blockhead.end_ptr); + CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_6]); + break; default: break; } + t_thrd.log_cxt.error_context_stack = err_context.previous; } (void)MemoryContextSwitchTo(oldCtx); GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_7]); @@ -1570,14 +1647,11 @@ void PushToWorkerLsn(bool force) { const uint32 max_record_count = PAGE_WORK_QUEUE_SIZE; static uint32 cur_recor_count = 0; - - cur_recor_count++; - if (!IsExtremeRtoRunning()) { return; } - - if (force) { + cur_recor_count++; + if (unlikely(force)) { uint32 refCount; do { refCount = pg_atomic_read_u32(&g_GlobalLsnForwarder.record.refcount); @@ -1586,17 +1660,36 @@ void PushToWorkerLsn(bool force) cur_recor_count = 0; SendLsnFowarder(); } else { - uint32 refCount = pg_atomic_read_u32(&g_GlobalLsnForwarder.record.refcount); - - if (refCount != 0 || cur_recor_count < max_record_count) { + if (cur_recor_count < max_record_count) { return; } + if (pg_atomic_read_u32(&g_GlobalLsnForwarder.record.refcount) != 0) { + return; + } SendLsnFowarder(); cur_recor_count = 0; } } +inline bool send_lsn_forwarder_for_check_to_hot_standby(XLogRecPtr lsn) +{ + if (t_thrd.xlog_cxt.reachedConsistency) { + // means has send lsn forwarder for consistenstcy check + return false; + } + if (XLogRecPtrIsInvalid(t_thrd.xlog_cxt.minRecoveryPoint)) { + return false; + } + + if (XLByteLT(lsn, t_thrd.xlog_cxt.minRecoveryPoint)) { + return false; + } + + t_thrd.xlog_cxt.reachedConsistency = true; + return true; +} + void ResetRtoXlogReadBuf(XLogRecPtr targetPagePtr) { uint32 startreadworker = pg_atomic_read_u32(&(g_recordbuffer->readWorkerState)); @@ -1975,14 +2068,14 @@ void XLogReadPageWorkerMain() g_redoWorker->lastReplayedReadRecPtr = xlogreader->ReadRecPtr; g_redoWorker->lastReplayedEndRecPtr = xlogreader->EndRecPtr; - + PushToWorkerLsn(send_lsn_forwarder_for_check_to_hot_standby(g_redoWorker->lastReplayedEndRecPtr)); + if (FORCE_FINISH_ENABLED) { CheckAndDoForceFinish(xlogreader); } CountAndGetRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_5], g_redoWorker->timeCostList[TIME_COST_STEP_1]); record = XLogParallelReadNextRecord(xlogreader); CountAndGetRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_1], g_redoWorker->timeCostList[TIME_COST_STEP_2]); - PushToWorkerLsn(false); CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_2]); RedoInterruptCallBack(); ADD_ABNORMAL_POSITION(8); @@ -2739,8 +2832,9 @@ bool XactHasSegpageRelFiles(XLogReaderState *record) if (XLogRecGetRmid(record) != RM_XACT_ID) { return false; } - bool compress; - XactGetRelFiles(record, &xnodes, &nrels, &compress); + + bool compress = (bool)(XLogRecGetInfo(record) & XLR_REL_COMPRESS); + XactGetRelFiles(record, &xnodes, &nrels); for (int32 idx = 0; idx < nrels; idx++) { ColFileNode colFileNode; @@ -3006,70 +3100,73 @@ void SeqCheckRemoteReadAndRepairPage() } } +inline void invalid_msg_leak_warning(XLogRecPtr trxn_lsn) +{ + if (t_thrd.page_redo_cxt.invalid_msg.valid) { + ereport(WARNING, (errmsg(EXRTOFORMAT("[exrto_generate_snapshot] not send invalid msg: %08X/%08X"), + (uint32)(trxn_lsn >> UINT64_HALF), (uint32)trxn_lsn))); + } +} + void exrto_generate_snapshot(XLogRecPtr trxn_lsn) { if (!g_instance.attr.attr_storage.EnableHotStandby) { return; } - ExrtoSnapshot exrto_snapshot = &g_dispatcher->exrto_snapshot; + ExrtoSnapshot exrto_snapshot = g_instance.comm_cxt.predo_cxt.exrto_snapshot; /* * do not generate the same snapshot repeatedly. */ if (XLByteLE(trxn_lsn, exrto_snapshot->read_lsn)) { + invalid_msg_leak_warning(trxn_lsn); return; } + + TransactionId xmin; + TransactionId xmax; + CommitSeqNo snapshot_csn; - if (XLogRecPtrIsInvalid(t_thrd.xlog_cxt.minRecoveryPoint)) { - return; - } - if (XLByteLT(trxn_lsn, exrto_snapshot->read_lsn)) { - return; - } - - SnapshotData snapshot; - - (void)GetSnapshotData(&snapshot, false); - - Assert(snapshot.takenDuringRecovery); + exrto_get_snapshot_data(xmin, xmax, snapshot_csn); (void)LWLockAcquire(ExrtoSnapshotLock, LW_EXCLUSIVE); - exrto_snapshot->snapshot_csn = snapshot.snapshotcsn; - exrto_snapshot->xmin = snapshot.xmin; - exrto_snapshot->xmax = snapshot.xmax; + exrto_snapshot->snapshot_csn = snapshot_csn; + exrto_snapshot->xmin = xmin; + exrto_snapshot->xmax = xmax; exrto_snapshot->read_lsn = trxn_lsn; - exrto_snapshot->gen_snap_time = GetCurrentTimestamp(); + send_delay_invalid_message(); LWLockRelease(ExrtoSnapshotLock); } void exrto_read_snapshot(Snapshot snapshot) { - if (t_thrd.role != WORKER && t_thrd.role != THREADPOOL_WORKER) { + if ((!is_exrto_standby_read_worker()) || u_sess->proc_cxt.clientIsCMAgent || dummyStandbyMode) { return; } - - if (g_dispatcher == NULL) { - ereport(ERROR, - (errmsg("g_dispatcher is not init")));; - } - - ExrtoSnapshot exrto_snapshot = &g_dispatcher->exrto_snapshot; + + ExrtoSnapshot exrto_snapshot = g_instance.comm_cxt.predo_cxt.exrto_snapshot; (void)LWLockAcquire(ExrtoSnapshotLock, LW_SHARED); if (XLByteEQ(exrto_snapshot->read_lsn, 0)) { LWLockRelease(ExrtoSnapshotLock); - ereport(ERROR, - (errmsg("could not get a valid snapshot with extreme rto"))); + ereport(ERROR, (errmsg("could not get a valid snapshot with extreme rto"))); } snapshot->snapshotcsn = exrto_snapshot->snapshot_csn; snapshot->xmin = exrto_snapshot->xmin; snapshot->xmax = exrto_snapshot->xmax; - - t_thrd.pgxact->xmin = exrto_snapshot->xmin; - t_thrd.proc->exrto_read_lsn = exrto_snapshot->read_lsn; - t_thrd.proc->exrto_gen_snap_time = exrto_snapshot->gen_snap_time; - u_sess->utils_cxt.TransactionXmin = exrto_snapshot->xmin; - u_sess->utils_cxt.exrto_read_lsn = exrto_snapshot->read_lsn; - + snapshot->read_lsn = exrto_snapshot->read_lsn; LWLockRelease(ExrtoSnapshotLock); + if (!TransactionIdIsValid(t_thrd.pgxact->xmin) || TransactionIdPrecedes(snapshot->xmin, t_thrd.pgxact->xmin)) { + t_thrd.pgxact->xmin = snapshot->xmin; + u_sess->utils_cxt.TransactionXmin = snapshot->xmin; + } + t_thrd.proc->exrto_read_lsn = exrto_snapshot->read_lsn; + if (t_thrd.proc->exrto_min == 0 || + XLByteLT(t_thrd.proc->exrto_min, t_thrd.proc->exrto_read_lsn)) { + t_thrd.proc->exrto_min = t_thrd.proc->exrto_read_lsn; + } + + if (t_thrd.proc->exrto_gen_snap_time == 0) { + t_thrd.proc->exrto_gen_snap_time = GetCurrentTimestamp(); + } Assert(XLogRecPtrIsValid(t_thrd.proc->exrto_read_lsn)); } @@ -3112,8 +3209,8 @@ XLogRecPtr calculate_force_recycle_lsn_per_worker(StandbyReadMetaInfo* meta_info meta_info->batch_id, meta_info->redo_id, lsn_info_recycle_pos))); } LockBuffer(buffer, BUFFER_LOCK_SHARE); - extreme_rto_standby_read::LsnInfo lsn_info = (extreme_rto_standby_read::LsnInfo)(page + - extreme_rto_standby_read::LSN_INFO_HEAD_SIZE); + extreme_rto_standby_read::LsnInfo lsn_info = + (extreme_rto_standby_read::LsnInfo)(page + extreme_rto_standby_read::LSN_INFO_HEAD_SIZE); lsn_info_recycle_lsn = lsn_info->lsn[0]; UnlockReleaseBuffer(buffer); } @@ -3129,7 +3226,7 @@ void calculate_force_recycle_lsn(XLogRecPtr &recycle_lsn) for (uint32 i = 0; i < worker_nums; ++i) { PageRedoWorker* page_redo_worker = workers[i]; - if (page_redo_worker->role != REDO_PAGE_WORKER) { + if (page_redo_worker->role != REDO_PAGE_WORKER || (page_redo_worker->isUndoSpaceWorker)) { continue; } recycle_lsn_per_worker = calculate_force_recycle_lsn_per_worker(&page_redo_worker->standby_read_meta_info); @@ -3144,6 +3241,9 @@ void calculate_force_recycle_lsn(XLogRecPtr &recycle_lsn) static inline bool exceed_standby_max_query_time(TimestampTz start_time) { + if (start_time == 0) { + return false; + } return TimestampDifferenceExceeds(start_time, GetCurrentTimestamp(), g_instance.attr.attr_storage.standby_max_query_time * MSECS_PER_SEC); } @@ -3161,13 +3261,18 @@ void proc_array_get_oldeset_readlsn(XLogRecPtr recycle_lsn, XLogRecPtr &oldest_l int pg_proc_no = proc_array->pgprocnos[index]; PGPROC* pg_proc = g_instance.proc_base_all_procs[pg_proc_no]; PGXACT* pg_xact = &g_instance.proc_base_all_xacts[pg_proc_no]; - XLogRecPtr read_lsn = pg_proc->exrto_read_lsn; TransactionId pxmin = pg_xact->xmin; - - if (pg_proc->pid == 0 || !TransactionIdIsValid(pxmin) || XLogRecPtrIsInvalid(read_lsn)) { + XLogRecPtr read_lsn = pg_proc->exrto_min; + ereport( + DEBUG1, + (errmsg(EXRTOFORMAT("proc_array_get_oldeset_readlsn info, read_lsn: %08X/%08X ,xmin: %lu ,vacuum_flags: " + "%hhu ,pid: %lu"), + (uint32)(read_lsn >> UINT64_HALF), (uint32)read_lsn, pxmin, pg_xact->vacuumFlags, pg_proc->pid))); + + if (pg_proc->pid == 0 || XLogRecPtrIsInvalid(read_lsn)) { continue; } - + Assert(!(pg_xact->vacuumFlags & PROC_IN_VACUUM)); /* * Backend is doing logical decoding which manages xmin @@ -3187,6 +3292,13 @@ void proc_array_get_oldeset_readlsn(XLogRecPtr recycle_lsn, XLogRecPtr &oldest_l * wanted so ignore any errors. */ (void)SendProcSignal(pg_proc->pid, PROCSIG_RECOVERY_CONFLICT_SNAPSHOT, pg_proc->backendId); + ereport( + LOG, + (errmsg( + EXRTOFORMAT("read_lsn is less than recycle_lsn or query time exceed max_query_time while " + "get_oldeset_readlsn, read_lsn %lu, " + "recycle_lsn: %lu, exrto_gen_snap_time: %ld, current_time: %ld, thread id = %lu\n"), + read_lsn, recycle_lsn, pg_proc->exrto_gen_snap_time, GetCurrentTimestamp(), pg_proc->pid))); /* * Wait a little bit for it to die so that we avoid flooding * an unresponsive backend when system is heavily loaded. @@ -3255,7 +3367,26 @@ XLogRecPtr exrto_calculate_recycle_position(bool force_recyle) if (force_recyle) { calculate_force_recycle_lsn(recycle_lsn); } - + ereport(DEBUG1, (errmsg(EXRTOFORMAT("time information of calculate recycle position, current_time: %ld, snapshot " + "read_lsn: %08X/%08X, gen_snaptime:%ld"), + GetCurrentTimestamp(), + (uint32)(g_instance.comm_cxt.predo_cxt.exrto_snapshot->read_lsn >> UINT64_HALF), + (uint32)g_instance.comm_cxt.predo_cxt.exrto_snapshot->read_lsn, + g_instance.comm_cxt.predo_cxt.exrto_snapshot->gen_snap_time))); + + /* + * If there is no backend read threads, set read oldest lsn to snapshot lsn. + */ + ExrtoSnapshot exrto_snapshot = NULL; + exrto_snapshot = g_instance.comm_cxt.predo_cxt.exrto_snapshot; + (void)LWLockAcquire(ExrtoSnapshotLock, LW_SHARED); + if (XLByteEQ(exrto_snapshot->read_lsn, 0)) { + ereport(WARNING, (errmsg("could not get a valid snapshot with extreme rto"))); + } else { + oldest_lsn = exrto_snapshot->read_lsn; + oldest_xmin = exrto_snapshot->xmin; + } + LWLockRelease(ExrtoSnapshotLock); /* Loop checks to avoid conflicting queries that were not successfully canceled. */ do { RedoInterruptCallBack(); @@ -3263,25 +3394,8 @@ XLogRecPtr exrto_calculate_recycle_position(bool force_recyle) check_times++; } while (conflict && check_times < max_check_times); - /* - * If there is no backend read threads, set read oldest lsn to snapshot lsn. - */ - if (XLogRecPtrIsInvalid(oldest_lsn)) { - ExrtoSnapshot exrto_snapshot = NULL; - exrto_snapshot = &g_dispatcher->exrto_snapshot; - (void)LWLockAcquire(ExrtoSnapshotLock, LW_SHARED); - if (XLByteEQ(exrto_snapshot->read_lsn, 0)) { - ereport(WARNING, - (errmsg("could not get a valid snapshot with extreme rto"))); - } else { - oldest_lsn = exrto_snapshot->read_lsn; - oldest_xmin = exrto_snapshot->xmin; - } - - LWLockRelease(ExrtoSnapshotLock); - } recycle_lsn = rtl::max(recycle_lsn, oldest_lsn); - + ereport( LOG, (errmsg( @@ -3289,9 +3403,10 @@ XLogRecPtr exrto_calculate_recycle_position(bool force_recyle) "[exrto_recycle] calculate recycle position, oldestlsn: %08X/%08X, snapshot read_lsn: %08X/%08X, try " "recycle lsn: %08X/%08X"), (uint32)(oldest_lsn >> UINT64_HALF), (uint32)oldest_lsn, - (uint32)(g_dispatcher->exrto_snapshot.read_lsn >> UINT64_HALF), - (uint32)g_dispatcher->exrto_snapshot.read_lsn, (uint32)(recycle_lsn >> UINT64_HALF), (uint32)recycle_lsn))); - + (uint32)(g_instance.comm_cxt.predo_cxt.exrto_snapshot->read_lsn >> UINT64_HALF), + (uint32)g_instance.comm_cxt.predo_cxt.exrto_snapshot->read_lsn, + (uint32)(recycle_lsn >> UINT64_HALF), (uint32)recycle_lsn))); + return recycle_lsn; } @@ -3306,9 +3421,9 @@ TransactionId exrto_calculate_recycle_xmin_for_undo() /* * If there is no backend read threads, set read oldest lsn to snapshot lsn. */ - if (oldest_xmin == InvalidTransactionId) { + if ((oldest_xmin == InvalidTransactionId) && (g_dispatcher != NULL)) { ExrtoSnapshot exrto_snapshot = NULL; - exrto_snapshot = &g_dispatcher->exrto_snapshot; + exrto_snapshot = g_instance.comm_cxt.predo_cxt.exrto_snapshot; (void)LWLockAcquire(ExrtoSnapshotLock, LW_SHARED); if (XLByteEQ(exrto_snapshot->xmin, InvalidTransactionId)) { ereport( diff --git a/src/gausskernel/storage/access/transam/multi_redo_api.cpp b/src/gausskernel/storage/access/transam/multi_redo_api.cpp index 7b2b564443..5697de461e 100644 --- a/src/gausskernel/storage/access/transam/multi_redo_api.cpp +++ b/src/gausskernel/storage/access/transam/multi_redo_api.cpp @@ -338,9 +338,8 @@ void CountXLogNumbers(XLogReaderState *record) record->readblocks); } else if (rm_id == RM_XACT_ID) { ColFileNode *xnodes = NULL; - bool compress = false; int nrels = 0; - XactGetRelFiles(record, &xnodes, &nrels, &compress); + XactGetRelFiles(record, &xnodes, &nrels); if (nrels > 0) { (void)pg_atomic_add_fetch_u64(&g_instance.comm_cxt.predo_cxt.xlogStatics[rm_id][info].extra_num, nrels); } diff --git a/src/gausskernel/storage/access/transam/multi_redo_settings.cpp b/src/gausskernel/storage/access/transam/multi_redo_settings.cpp index e26dba43ac..d0007c8fcf 100644 --- a/src/gausskernel/storage/access/transam/multi_redo_settings.cpp +++ b/src/gausskernel/storage/access/transam/multi_redo_settings.cpp @@ -56,7 +56,7 @@ void ConfigRecoveryParallelism() g_instance.attr.attr_storage.recovery_parse_workers, g_instance.attr.attr_storage.recovery_redo_workers_per_paser_worker, total_recovery_parallelism))); - g_supportHotStandby = false; + g_supportHotStandby = g_instance.attr.attr_storage.EnableHotStandby; SetConfigOption("recovery_parallelism", buf, PGC_POSTMASTER, PGC_S_OVERRIDE); } else if (g_instance.attr.attr_storage.max_recovery_parallelism > 1) { g_instance.comm_cxt.predo_cxt.redoType = PARALLEL_REDO; @@ -144,6 +144,9 @@ void ParseBindCpuInfo(RedoCpuBindControl *control) ptoken = TrimStr(strtok_r(attr, pdelimiter, &psave)); ptoken = pg_strtolower(ptoken); + if (ptoken == NULL) { + return; + } int bindNum = 0; if (strncmp("nobind", ptoken, strlen("nobind")) == 0) { diff --git a/src/gausskernel/storage/access/transam/ondemand_extreme_rto/page_redo.cpp b/src/gausskernel/storage/access/transam/ondemand_extreme_rto/page_redo.cpp index 56ea53b338..b93a19a0a1 100644 --- a/src/gausskernel/storage/access/transam/ondemand_extreme_rto/page_redo.cpp +++ b/src/gausskernel/storage/access/transam/ondemand_extreme_rto/page_redo.cpp @@ -3046,8 +3046,9 @@ bool XactHasSegpageRelFiles(XLogReaderState *record) if (XLogRecGetRmid(record) != RM_XACT_ID) { return false; } - bool compress; - XactGetRelFiles(record, &xnodes, &nrels, &compress); + + bool compress = (bool)(XLogRecGetInfo(record) & XLR_REL_COMPRESS); + XactGetRelFiles(record, &xnodes, &nrels); for (int32 idx = 0; idx < nrels; idx++) { ColFileNode colFileNode; diff --git a/src/gausskernel/storage/access/transam/parallel_recovery/dispatcher.cpp b/src/gausskernel/storage/access/transam/parallel_recovery/dispatcher.cpp index 40fb6dc16a..b8b574ace2 100755 --- a/src/gausskernel/storage/access/transam/parallel_recovery/dispatcher.cpp +++ b/src/gausskernel/storage/access/transam/parallel_recovery/dispatcher.cpp @@ -503,8 +503,8 @@ static void StopRecoveryWorkers(int code, Datum arg) /* Run from the dispatcher thread. */ static void DestroyRecoveryWorkers() { + SpinLockAcquire(&(g_instance.comm_cxt.predo_cxt.destroy_lock)); if (g_dispatcher != NULL) { - SpinLockAcquire(&(g_instance.comm_cxt.predo_cxt.destroy_lock)); for (uint32 i = 0; i < g_dispatcher->totalWorkerCount; i++) DestroyPageRedoWorker(g_dispatcher->pageWorkers[i]); if (g_dispatcher->txnWorker != NULL) @@ -519,8 +519,8 @@ static void DestroyRecoveryWorkers() g_instance.comm_cxt.predo_cxt.parallelRedoCtx = NULL; } g_dispatcher = NULL; - SpinLockRelease(&(g_instance.comm_cxt.predo_cxt.destroy_lock)); } + SpinLockRelease(&(g_instance.comm_cxt.predo_cxt.destroy_lock)); } static bool RmgrRecordInfoValid(XLogReaderState *record, uint8 minInfo, uint8 maxInfo) @@ -1894,10 +1894,12 @@ uint32 GetStartupBufferPinWaitBufLen() */ void GetStartupBufferPinWaitBufId(int *bufids, uint32 len) { - for (uint32 i = 0; i < len - 1; i++) { - bufids[i] = g_dispatcher->pageWorkers[i]->bufferPinWaitBufId; + if (g_dispatcher != NULL) { + for (uint32 i = 0; i < len - 1; i++) { + bufids[i] = g_dispatcher->pageWorkers[i]->bufferPinWaitBufId; + } + bufids[len - 1] = g_instance.proc_base->startupBufferPinWaitBufId; } - bufids[len - 1] = g_instance.proc_base->startupBufferPinWaitBufId; } void GetReplayedRecPtrFromUndoWorkers(XLogRecPtr *readPtr, XLogRecPtr *endPtr) diff --git a/src/gausskernel/storage/access/transam/xact.cpp b/src/gausskernel/storage/access/transam/xact.cpp index 68e632abbe..a6ff43cd40 100755 --- a/src/gausskernel/storage/access/transam/xact.cpp +++ b/src/gausskernel/storage/access/transam/xact.cpp @@ -47,6 +47,7 @@ #include "access/xlogutils.h" #include "access/multi_redo_api.h" #include "access/extreme_rto/standby_read/block_info_meta.h" +#include "access/extreme_rto/standby_read/standby_read_delay_ddl.h" #include "catalog/catalog.h" #include "catalog/namespace.h" #include "catalog/pg_authid.h" @@ -1539,8 +1540,9 @@ static TransactionId RecordTransactionCommit(void) if (useLocalXid || !IsPostmasterEnvironment || GTM_FREE_MODE) { #ifndef ENABLE_MULTIPLE_NODES /* For hot standby, set csn to commit in progress */ - CommitSeqNo csn = SetXact2CommitInProgress(xid, 0); - XLogInsertStandbyCSNCommitting(xid, csn, children, nchildren); + CommitSeqNo latestCsn = t_thrd.xact_cxt.ShmemVariableCache->nextCommitSeqNo; + XLogInsertStandbyCSNCommitting(xid, latestCsn, children, nchildren); + (void)SetXact2CommitInProgress(xid, 0); #else /* set commit CSN and update global CSN in gtm free mode. */ SetXact2CommitInProgress(xid, 0); @@ -7082,7 +7084,7 @@ static void xact_redo_forget_alloc_segs(TransactionId xid, TransactionId *subXid remainSegsLock.unLock(); } -static void xact_redo_log_drop_segs(_in_ ColFileNode *xnodes, _in_ int nrels, XLogRecPtr lsn) +void xact_redo_log_drop_segs(_in_ ColFileNode *xnodes, _in_ int nrels, XLogRecPtr lsn) { bool isNeedLogRemainSegs = IsNeedLogRemainSegs(lsn); if (!isNeedLogRemainSegs) { @@ -7173,7 +7175,7 @@ void push_unlink_rel_to_hashtbl(ColFileNode *xnodes, int nrels) /* * XLOG support routines */ -static void unlink_relfiles(_in_ ColFileNode *xnodes, _in_ int nrels) +void unlink_relfiles(_in_ ColFileNode *xnodes, _in_ int nrels) { ColMainFileNodesCreate(); @@ -7210,7 +7212,7 @@ static void unlink_relfiles(_in_ ColFileNode *xnodes, _in_ int nrels) /* * recycle exrto files when dropping table occurs. */ - if (IS_EXRTO_READ) { + if (RecoveryInProgress() && IS_EXRTO_READ) { RelFileNode block_meta_file = relFileNode; block_meta_file.spcNode = EXRTO_BLOCK_INFO_SPACE_OID; extreme_rto_standby_read::remove_one_block_info_file(block_meta_file); @@ -7243,6 +7245,32 @@ static void unlink_relfiles(_in_ ColFileNode *xnodes, _in_ int nrels) ColMainFileNodesDestroy(); } +void send_delay_invalid_message() +{ + if (t_thrd.page_redo_cxt.invalid_msg.valid) { + ProcessCommittedInvalidationMessages( + t_thrd.page_redo_cxt.invalid_msg.inval_msgs, t_thrd.page_redo_cxt.invalid_msg.nmsgs, + t_thrd.page_redo_cxt.invalid_msg.relcache_init_file_inval, t_thrd.page_redo_cxt.invalid_msg.db_id, + t_thrd.page_redo_cxt.invalid_msg.ts_id, t_thrd.page_redo_cxt.invalid_msg.lsn); + t_thrd.page_redo_cxt.invalid_msg.valid = false; + } +} + +void record_delay_invalid_message( + SharedInvalidationMessage* msgs, int nmsgs, bool relcache_init_file_inval, Oid dbid, Oid tsid, XLogRecPtr lsn) +{ + if (nmsgs <= 0) { + return; + } + t_thrd.page_redo_cxt.invalid_msg.inval_msgs = msgs; + t_thrd.page_redo_cxt.invalid_msg.nmsgs = nmsgs; + t_thrd.page_redo_cxt.invalid_msg.relcache_init_file_inval = relcache_init_file_inval; + t_thrd.page_redo_cxt.invalid_msg.db_id = dbid; + t_thrd.page_redo_cxt.invalid_msg.ts_id = tsid; + t_thrd.page_redo_cxt.invalid_msg.lsn = lsn; + t_thrd.page_redo_cxt.invalid_msg.valid = true; +} + /* * Before 9.0 this was a fairly short function, but now it performs many * actions for which the order of execution is critical. @@ -7310,7 +7338,7 @@ static void xact_redo_commit_internal(TransactionId xid, XLogRecPtr lsn, Transac } if (EnableGlobalSysCache()) { ProcessCommittedInvalidationMessages(inval_msgs, nmsgs, XactCompletionRelcacheInitFileInval(xinfo), - dbId, tsId); + dbId, tsId, lsn); } #endif } else { @@ -7370,8 +7398,13 @@ static void xact_redo_commit_internal(TransactionId xid, XLogRecPtr lsn, Transac * maintain the same order of invalidation then release locks as * occurs in CommitTransaction(). */ - ProcessCommittedInvalidationMessages(inval_msgs, nmsgs, XactCompletionRelcacheInitFileInval(xinfo), dbId, tsId); - + if (IS_EXRTO_READ) { + record_delay_invalid_message(inval_msgs, nmsgs, XactCompletionRelcacheInitFileInval(xinfo), dbId, + tsId, lsn); + } else { + ProcessCommittedInvalidationMessages(inval_msgs, nmsgs, XactCompletionRelcacheInitFileInval(xinfo), dbId, + tsId, lsn); + } /* * Release locks, if any. We do this for both two phase and normal one * phase transactions. In effect we are ignoring the prepare phase and @@ -7417,8 +7450,12 @@ static void xact_redo_commit_internal(TransactionId xid, XLogRecPtr lsn, Transac ColFileNodeCopy(&newColFileNodes[i], &colFileNodeRel[i]); } } - unlink_relfiles(newColFileNodes, nrels); - xact_redo_log_drop_segs(newColFileNodes, nrels, lsn); + if (IS_EXRTO_READ) { + update_delay_ddl_files(newColFileNodes, nrels, lsn); + } else { + unlink_relfiles(newColFileNodes, nrels); + xact_redo_log_drop_segs(newColFileNodes, nrels, lsn); + } if (unlikely((long)!compress)) { pfree(newColFileNodes); } @@ -7619,7 +7656,11 @@ static void xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid, XLogRecPtr newColFileNodes[i].filenode.opt = 0; } } - unlink_relfiles(newColFileNodes, xlrec->nrels); + if (IS_EXRTO_READ) { + update_delay_ddl_files(newColFileNodes, xlrec->nrels, lsn); + } else { + unlink_relfiles(newColFileNodes, xlrec->nrels); + } xact_redo_log_drop_segs(newColFileNodes, xlrec->nrels, lsn); if (unlikely((long)!compress)) { pfree(newColFileNodes); @@ -7727,12 +7768,52 @@ void xact_redo(XLogReaderState *record) } } -void XactGetRelFiles(XLogReaderState *record, ColFileNode **xnodesPtr, int *nrelsPtr, bool *compress) +bool xact_has_invalid_msg_or_delete_file(XLogReaderState *record) +{ + Assert(XLogRecGetRmid(record) == RM_XACT_ID); + + uint8 info = (XLogRecGetInfo(record) & (~XLR_INFO_MASK)); + xl_xact_commit *commit = NULL; + xl_xact_abort *abort = NULL; + int msg_files = 0; + + switch (info) { + case XLOG_XACT_COMMIT_COMPACT: + case XLOG_XACT_PREPARE: + case XLOG_XACT_ASSIGNMENT: + break; + case XLOG_XACT_COMMIT: + commit = (xl_xact_commit *)XLogRecGetData(record); + msg_files = commit->nmsgs + commit->nrels; + break; + case XLOG_XACT_ABORT_WITH_XID: + case XLOG_XACT_ABORT: + abort = (xl_xact_abort *)XLogRecGetData(record); + msg_files = abort->nrels; + break; + case XLOG_XACT_COMMIT_PREPARED: + commit = &(((xl_xact_commit_prepared *)XLogRecGetData(record))->crec); + msg_files = commit->nmsgs + commit->nrels; + break; + case XLOG_XACT_ABORT_PREPARED: + abort = &(((xl_xact_abort_prepared *)XLogRecGetData(record))->arec); + msg_files = abort->nrels; + break; + default: + ereport(PANIC, (errmodule(MOD_REDO), errcode(ERRCODE_LOG), + errmsg("xactWillRemoveRelFiles: unknown op code %u", (uint32)info))); + } + + return (msg_files > 0); +} + + +void XactGetRelFiles(XLogReaderState *record, ColFileNode **xnodesPtr, int *nrelsPtr) { Assert(XLogRecGetRmid(record) == RM_XACT_ID); uint8 info = (XLogRecGetInfo(record) & (~XLR_INFO_MASK)); - *compress = (bool)(XLogRecGetInfo(record) & XLR_REL_COMPRESS); + xl_xact_commit *commit = NULL; xl_xact_abort *abort = NULL; @@ -7781,13 +7862,12 @@ bool XactWillRemoveRelFiles(XLogReaderState *record) */ int nrels = 0; ColFileNode *xnodes = NULL; - bool compress = false; if (XLogRecGetRmid(record) != RM_XACT_ID) { return false; } - XactGetRelFiles(record, &xnodes, &nrels, &compress); + XactGetRelFiles(record, &xnodes, &nrels); return (nrels > 0); } @@ -7798,8 +7878,7 @@ bool xactWillRemoveRelFiles(XLogReaderState *record) ColFileNode *xnodes = NULL; Assert(XLogRecGetRmid(record) == RM_XACT_ID); - bool compress; - XactGetRelFiles(record, &xnodes, &nrels, &compress); + XactGetRelFiles(record, &xnodes, &nrels); return nrels > 0; } @@ -7807,8 +7886,9 @@ void xactApplyXLogDropRelation(XLogReaderState *record) { int nrels = 0; ColFileNode *xnodes = NULL; - bool compress; - XactGetRelFiles(record, &xnodes, &nrels, &compress); + + bool compress = (bool)(XLogRecGetInfo(record) & XLR_REL_COMPRESS); + XactGetRelFiles(record, &xnodes, &nrels); for (int i = 0; i < nrels; i++) { RelFileNodeBackend rbnode; ColFileNode node; diff --git a/src/gausskernel/storage/access/transam/xlog.cpp b/src/gausskernel/storage/access/transam/xlog.cpp index 66c6aba578..6d121a1793 100755 --- a/src/gausskernel/storage/access/transam/xlog.cpp +++ b/src/gausskernel/storage/access/transam/xlog.cpp @@ -54,6 +54,7 @@ #include "access/hash.h" #include "access/xlogproc.h" #include "access/parallel_recovery/dispatcher.h" +#include "access/extreme_rto/page_redo.h" #include "commands/tablespace.h" #include "commands/matview.h" @@ -3157,37 +3158,11 @@ void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force) if (t_thrd.xlog_cxt.minRecoveryPoint == 0) { t_thrd.xlog_cxt.updateMinRecoveryPoint = false; } else if (force || XLByteLT(t_thrd.xlog_cxt.minRecoveryPoint, lsn)) { - /* use volatile pointer to prevent code rearrangement */ - volatile XLogCtlData *xlogctl = t_thrd.shemem_ptr_cxt.XLogCtl; - XLogRecPtr newMinRecoveryPoint; - - /* - * To avoid having to update the control file too often, we update it - * all the way to the last record being replayed, even though 'lsn' - * would suffice for correctness. This also allows the 'force' case - * to not need a valid 'lsn' value. - * - * Another important reason for doing it this way is that the passed - * 'lsn' value could be bogus, i.e., past the end of available WAL, if - * the caller got it from a corrupted heap page. Accepting such a - * value as the min recovery point would prevent us from coming up at - * all. Instead, we just log a warning and continue with recovery. - * (See also the comments about corrupt LSNs in XLogFlush.) - */ - SpinLockAcquire(&xlogctl->info_lck); - newMinRecoveryPoint = xlogctl->lastReplayedEndRecPtr; - SpinLockRelease(&xlogctl->info_lck); - - if (!force && XLByteLT(newMinRecoveryPoint, lsn) && !enable_heap_bcm_data_replication()) { - ereport(DEBUG1, (errmsg("xlog min recovery request %X/%X is past current point %X/%X", (uint32)(lsn >> 32), - (uint32)lsn, (uint32)(newMinRecoveryPoint >> 32), (uint32)newMinRecoveryPoint))); - } - /* update control file */ - if (XLByteLT(t_thrd.shemem_ptr_cxt.ControlFile->minRecoveryPoint, newMinRecoveryPoint)) { - t_thrd.shemem_ptr_cxt.ControlFile->minRecoveryPoint = newMinRecoveryPoint; + if (XLByteLT(t_thrd.shemem_ptr_cxt.ControlFile->minRecoveryPoint, lsn)) { + t_thrd.shemem_ptr_cxt.ControlFile->minRecoveryPoint = lsn; UpdateControlFile(); - t_thrd.xlog_cxt.minRecoveryPoint = newMinRecoveryPoint; + t_thrd.xlog_cxt.minRecoveryPoint = lsn; SetMinRecoverPointForStats(t_thrd.xlog_cxt.minRecoveryPoint); ereport(DEBUG1, (errmsg("updated min recovery point to %X/%X", (uint32)(t_thrd.xlog_cxt.minRecoveryPoint >> 32), @@ -5380,7 +5355,7 @@ static XLogRecord *ReadRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, in * So err on the safe side and give up. */ if (!t_thrd.xlog_cxt.InArchiveRecovery && t_thrd.xlog_cxt.ArchiveRecoveryRequested && !fetching_ckpt) { - ProcTxnWorkLoad(false); + ProcTxnWorkLoad(true); volatile XLogCtlData *xlogctl = t_thrd.shemem_ptr_cxt.XLogCtl; XLogRecPtr newMinRecoveryPoint; ereport(DEBUG1, (errmsg_internal("reached end of WAL in pg_xlog, entering archive recovery"))); @@ -5435,7 +5410,7 @@ void UpdateMinrecoveryInAchive() { volatile XLogCtlData *xlogctl = t_thrd.shemem_ptr_cxt.XLogCtl; XLogRecPtr newMinRecoveryPoint; - + extreme_rto::PushToWorkerLsn(true); /* initialize minRecoveryPoint to this record */ LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); t_thrd.shemem_ptr_cxt.ControlFile->state = DB_IN_ARCHIVE_RECOVERY; @@ -5453,7 +5428,7 @@ void UpdateMinrecoveryInAchive() UpdateControlFile(); LWLockRelease(ControlFileLock); MultiRedoUpdateMinRecovery(t_thrd.xlog_cxt.minRecoveryPoint); - + t_thrd.xlog_cxt.updateMinRecoveryPoint = true; // for extreme rto xlog page read worker, no need send lsn forwarder ereport(LOG, (errmsg("update minrecovery point to %X/%X in archive recovery", (uint32)(t_thrd.xlog_cxt.minRecoveryPoint >> 32), (uint32)(t_thrd.xlog_cxt.minRecoveryPoint)))); @@ -8832,6 +8807,27 @@ static inline void UpdateTermFromXLog(uint32 xlTerm) } } +void init_extreme_rto_standby_read_first_snapshot(const XLogRecPtr checkpoint_loc) +{ + if (!IsExtremeRedo()) { + return; + } + + if (!g_instance.attr.attr_storage.EnableHotStandby) { + return; + } + + g_instance.comm_cxt.predo_cxt.exrto_snapshot->gen_snap_time = 0; + g_instance.comm_cxt.predo_cxt.exrto_snapshot->read_lsn = checkpoint_loc; + g_instance.comm_cxt.predo_cxt.exrto_snapshot->snapshot_csn = t_thrd.xact_cxt.ShmemVariableCache->nextCommitSeqNo; + g_instance.comm_cxt.predo_cxt.exrto_snapshot->xmin = t_thrd.xact_cxt.ShmemVariableCache->latestCompletedXid; + g_instance.comm_cxt.predo_cxt.exrto_snapshot->xmax = t_thrd.xact_cxt.ShmemVariableCache->latestCompletedXid; + if (TransactionIdIsValid(t_thrd.xact_cxt.ShmemVariableCache->standbyXmin) && + t_thrd.xact_cxt.ShmemVariableCache->standbyXmin <= t_thrd.xact_cxt.ShmemVariableCache->latestCompletedXid) { + g_instance.comm_cxt.predo_cxt.exrto_snapshot->xmin = t_thrd.xact_cxt.ShmemVariableCache->standbyXmin; + } +} + /* * This must be called ONCE during postmaster or standalone-backend startup */ @@ -9914,6 +9910,8 @@ void StartupXLOG(void) (uint32)t_thrd.shemem_ptr_cxt.ControlFile->backupStartPoint, t_thrd.shemem_ptr_cxt.ControlFile->backupEndRequired ? "TRUE" : "FALSE"))); + + init_extreme_rto_standby_read_first_snapshot(checkPoint.redo); pg_atomic_write_u32(&t_thrd.walreceiverfuncs_cxt.WalRcv->rcvDoneFromShareStorage, false); // Allow read-only connections immediately if we're consistent already. CheckRecoveryConsistency(); @@ -10437,7 +10435,9 @@ void StartupXLOG(void) if (IS_EXRTO_READ) { /* we are going to be master, we need to recycle residual_undo_file again */ + (void)LWLockAcquire(ExrtoRecycleResidualUndoLock, LW_EXCLUSIVE); g_instance.undo_cxt.is_exrto_residual_undo_file_recycled = false; + LWLockRelease(ExrtoRecycleResidualUndoLock); } LocalSetXLogInsertAllowed(); @@ -12766,7 +12766,8 @@ bool CreateRestartPoint(int flags) return false; } } - + (void)LWLockAcquire(RedoTruncateLock, LW_SHARED); + LWLockRelease(RedoTruncateLock); /* * Update pg_control, using current time. Check that it still shows * IN_ARCHIVE_RECOVERY state and an older checkpoint, else do nothing; @@ -13199,11 +13200,13 @@ static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo, XLogRecPtr curIns } } - if (IS_EXRTO_READ) { + if (RecoveryInProgress() && IS_EXRTO_READ) { XLogRecPtr recycle_recptr = pg_atomic_read_u64(&g_instance.comm_cxt.predo_cxt.global_recycle_lsn); XLogSegNo recyle_segno; XLByteToSeg(recycle_recptr, recyle_segno); - if (recyle_segno < segno && recyle_segno > 0) { + if (recyle_segno == 0) { + segno = 1; + } else if (recyle_segno < segno) { segno = recyle_segno; } } @@ -13409,6 +13412,14 @@ bool IsCheckPoint(const XLogRecParseState *parseState) return rmid == RM_XLOG_ID && (info == XLOG_CHECKPOINT_SHUTDOWN || info == XLOG_CHECKPOINT_ONLINE); } +bool is_backup_end(const XLogRecParseState *parse_state) +{ + uint8 info = parse_state->blockparse.blockhead.xl_info & (~XLR_INFO_MASK); + RmgrId rmid = parse_state->blockparse.blockhead.xl_rmid; + + return rmid == RM_XLOG_ID && (info == XLOG_BACKUP_END); +} + bool HasTimelineUpdate(XLogReaderState *record) { uint8 info; @@ -16364,6 +16375,9 @@ static bool read_tablespace_map(List **tablespaces) /* * Error context callback for errors occurring during rm_redo(). */ void rm_redo_error_callback(void *arg) { + if (arg == NULL) { + return; + } XLogReaderState *record = (XLogReaderState *)arg; StringInfoData buf; diff --git a/src/gausskernel/storage/access/transam/xlogutils.cpp b/src/gausskernel/storage/access/transam/xlogutils.cpp index c5db30a85f..7bd8aee2e0 100644 --- a/src/gausskernel/storage/access/transam/xlogutils.cpp +++ b/src/gausskernel/storage/access/transam/xlogutils.cpp @@ -1380,6 +1380,13 @@ void XlogDropRowReation(RelFileNode rnode) rbnode.node = rnode; rbnode.backend = InvalidBackendId; smgrclosenode(rbnode); + if (IS_EXRTO_READ) { + RelFileNodeBackend standbyReadRnode; + standbyReadRnode.node = rnode; + standbyReadRnode.node.spcNode = EXRTO_BLOCK_INFO_SPACE_OID; + standbyReadRnode.backend = InvalidBackendId; + smgrclosenode(standbyReadRnode); + } } void XLogForgetDDLRedo(XLogRecParseState *redoblockstate) @@ -1437,6 +1444,10 @@ void XLogDropSpaceShrink(XLogRecParseState *redoblockstate) */ void XLogDropRelation(const RelFileNode &rnode, ForkNumber forknum) { + if (AmErosRecyclerProcess()) { + return; + } + forget_invalid_pages(rnode, forknum, 0, false); /* clear relfilenode match entry of recovery thread hashtbl */ @@ -1514,6 +1525,10 @@ void XLogDropDatabase(Oid dbid) forget_invalid_pages_batch(InvalidOid, dbid); + if (AmErosRecyclerProcess()) { + return; + } + /* clear dbNode match entry of recovery thread hashtbl */ if (IsExtremeRedo()) { ExtremeBatchClearRecoveryThreadHashTbl(InvalidOid, dbid); @@ -1954,4 +1969,3 @@ XLogRecParseState *multixact_redo_parse_to_block(XLogReaderState *record, uint32 } return recordstatehead; } - diff --git a/src/gausskernel/storage/access/ubtree/ubtxlog.cpp b/src/gausskernel/storage/access/ubtree/ubtxlog.cpp index 798549cc2d..78fedf8887 100644 --- a/src/gausskernel/storage/access/ubtree/ubtxlog.cpp +++ b/src/gausskernel/storage/access/ubtree/ubtxlog.cpp @@ -970,7 +970,7 @@ static void UBTreeXlogReusePage(XLogReaderState *record) RelFileNode tmp_node; RelFileNodeCopy(tmp_node, xlrec->node, XLogRecGetBucketId(record)); - if (InHotStandby && g_supportHotStandby) { + if (InHotStandby && g_supportHotStandby && !IS_EXRTO_READ) { ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, tmp_node, lsn); } } @@ -1000,7 +1000,7 @@ static void UBTreeXlogPrunePage(XLogReaderState* record) /* Caller specified a bogus block_id */ ereport(PANIC, (errmsg("failed to locate backup block with ID %d", 0))); } - if (InHotStandby && TransactionIdIsValid(xlrec->latestRemovedXid)) + if (InHotStandby && TransactionIdIsValid(xlrec->latestRemovedXid) && !IS_EXRTO_READ) ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, rnode, lsn); if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { diff --git a/src/gausskernel/storage/access/ustore/knl_uextremeredo.cpp b/src/gausskernel/storage/access/ustore/knl_uextremeredo.cpp index 9149870fe0..5797e42a4e 100644 --- a/src/gausskernel/storage/access/ustore/knl_uextremeredo.cpp +++ b/src/gausskernel/storage/access/ustore/knl_uextremeredo.cpp @@ -1247,17 +1247,6 @@ void UHeapXlogCleanOperatorPage(RedoBufferInfo *buffer, void *recorddata, void * uint16 *nfixed = &tmpfixed; char *unused = (char *)xlrec + SizeOfUHeapClean; - /* - * We're about to remove tuples. In Hot Standby mode, ensure that there's - * no queries running for which the removed tuples are still visible. - * - * Not all UHEAP_CLEAN records remove tuples with xids, so we only want to - * conflict on the records that cause MVCC failures for user queries. If - * latestRemovedXid is invalid, skip conflict processing. - */ - if (InHotStandby && TransactionIdIsValid(xlrec->latestRemovedXid)) - ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, buffer->blockinfo.rnode, buffer->lsn); - /* Update all item pointers per the record, and repair fragmentation */ if (xlrec->flags & XLZ_CLEAN_CONTAINS_OFFSET) { targetOffnum = (OffsetNumber *)((char *)xlrec + SizeOfUHeapClean); @@ -2011,12 +2000,13 @@ static void RedoUndoDiscardBlock(XLogBlockHead *blockhead, XLogBlockUndoParse *b if (zone->GetLSN() < lsn) { zone->LockUndoZone(); Assert(blockdatarec->undoDiscardParse.startSlot == zone->GetRecycleTSlotPtr()); - if (IS_EXRTO_READ && (!g_instance.undo_cxt.is_exrto_residual_undo_file_recycled)) { - zone->set_recycle_tslot_ptr_exrto(endSlot); - } zone->SetRecycleTSlotPtr(endSlot); zone->SetDiscardURecPtr(endUndoPtr); zone->SetForceDiscardURecPtr(endUndoPtr); + if (!IS_EXRTO_READ) { + zone->set_discard_urec_ptr_exrto(endUndoPtr); + zone->set_force_discard_urec_ptr_exrto(endUndoPtr); + } zone->SetRecycleXid(recycledXid); zone->MarkDirty(); zone->SetLSN(lsn); @@ -2042,10 +2032,16 @@ static void RedoUndoUnlinkBlock(XLogBlockHead *blockhead, XLogBlockUndoParse *bl zoneId, usp->LSN(), unlinkLsn, head, newHead))); if (usp->LSN() < unlinkLsn) { - zone->ForgetUndoBuffer(head, newHead, UNDO_DB_OID); + /* + * before hot_standby mode, we don,t know we will be primary or standby, + * so before hot standby we better do unlinklog. + */ + if (!IS_EXRTO_READ) { + zone->ForgetUndoBuffer(head, newHead, UNDO_DB_OID); + } usp->LockSpace(); usp->MarkDirty(); - if (IS_EXRTO_STANDBY_READ) { + if (IS_EXRTO_READ) { usp->SetHead(newHead); } else { usp->UnlinkUndoLog(zoneId, newHead, UNDO_DB_OID); @@ -2072,10 +2068,16 @@ static void RedoSlotUnlinkBlock(XLogBlockHead *blockhead, XLogBlockUndoParse *bl zoneId, usp->LSN(), unlinkLsn, head, newHead))); if (usp->LSN() < unlinkLsn) { - zone->ForgetUndoBuffer(head, newHead, UNDO_SLOT_DB_OID); + /* + * before hot_standby mode, we don,t know we will be primary or standby, + * so before hot standby we better do unlinklog. + */ + if (!IS_EXRTO_READ) { + zone->ForgetUndoBuffer(head, newHead, UNDO_SLOT_DB_OID); + } usp->LockSpace(); usp->MarkDirty(); - if (IS_EXRTO_STANDBY_READ) { + if (IS_EXRTO_READ) { usp->SetHead(newHead); } else { usp->UnlinkUndoLog(zoneId, newHead, UNDO_SLOT_DB_OID); diff --git a/src/gausskernel/storage/access/ustore/knl_utuptoaster.cpp b/src/gausskernel/storage/access/ustore/knl_utuptoaster.cpp index 8240de04d7..2bb1448e6f 100644 --- a/src/gausskernel/storage/access/ustore/knl_utuptoaster.cpp +++ b/src/gausskernel/storage/access/ustore/knl_utuptoaster.cpp @@ -977,7 +977,7 @@ struct varlena *UHeapInternalToastFetchDatum(struct varatt_external toastPointer */ nextidx = 0; - toastscan = systable_beginscan_ordered(toastrel, toastidx, SnapshotToast, 1, &toastkey); + toastscan = systable_beginscan_ordered(toastrel, toastidx, get_toast_snapshot(), 1, &toastkey); while (UHeapSysIndexGetnextSlot(toastscan, ForwardScanDirection, slot)) { /* * Have a chunk, extract the sequence number and the data @@ -1163,7 +1163,7 @@ struct varlena *UHeapInternalToastFetchDatumSlice(struct varatt_external toastPo * The index is on (valueid, chunkidx) so they will come in order */ nextidx = startchunk; - toastscan = systable_beginscan_ordered(toastrel, toastidx, SnapshotToast, nscankeys, toastkey); + toastscan = systable_beginscan_ordered(toastrel, toastidx, get_toast_snapshot(), nscankeys, toastkey); while (UHeapSysIndexGetnextSlot(toastscan, ForwardScanDirection, slot)) { /* * Have a chunk, extract the sequence number and the data diff --git a/src/gausskernel/storage/access/ustore/knl_uvisibility.cpp b/src/gausskernel/storage/access/ustore/knl_uvisibility.cpp index 48beaf0e12..a12abb5563 100644 --- a/src/gausskernel/storage/access/ustore/knl_uvisibility.cpp +++ b/src/gausskernel/storage/access/ustore/knl_uvisibility.cpp @@ -249,6 +249,10 @@ bool UHeapTupleSatisfiesVisibility(UHeapTuple uhtup, Snapshot snapshot, Buffer b } uint64 globalFrozenXid = isFlashBack ? pg_atomic_read_u64(&g_instance.undo_cxt.globalRecycleXid) : pg_atomic_read_u64(&g_instance.undo_cxt.globalFrozenXid); + if (pm_state_is_hot_standby()) { + /* in hot standby mode, if globalRecycleXid advance during query, it may cause data inconsistency */ + globalFrozenXid = 0; + } if (TransactionIdIsValid(tdinfo.xid) && TransactionIdPrecedes(tdinfo.xid, globalFrozenXid)) { /* The slot is old enough that we can treat it as frozen. */ tdinfo.td_slot = UHEAPTUP_SLOT_FROZEN; @@ -813,6 +817,10 @@ bool UHeapTupleFetch(Relation rel, Buffer buffer, OffsetNumber offnum, Snapshot uint64 oldestRecycleXidHavingUndo = pg_atomic_read_u64(&g_instance.undo_cxt.globalRecycleXid); uint64 oldestXidHavingUndo = (isFlashBack || isLogical) ? oldestRecycleXidHavingUndo : pg_atomic_read_u64(&g_instance.undo_cxt.globalFrozenXid); + if (pm_state_is_hot_standby()) { + /* in hot standby mode, if globalRecycleXid advance during query, it may cause data inconsistency */ + oldestXidHavingUndo = 0; + } if (TransactionIdIsValid(tdinfo.xid) && TransactionIdPrecedes(tdinfo.xid, oldestXidHavingUndo)) { if (TransactionIdOlderThanAllUndo(tdinfo.xid)) { isFrozen = true; diff --git a/src/gausskernel/storage/access/ustore/undo/knl_uundoapi.cpp b/src/gausskernel/storage/access/ustore/undo/knl_uundoapi.cpp index 87acf77e59..4cbad291ae 100644 --- a/src/gausskernel/storage/access/ustore/undo/knl_uundoapi.cpp +++ b/src/gausskernel/storage/access/ustore/undo/knl_uundoapi.cpp @@ -646,6 +646,7 @@ void RecoveryUndoSystemMeta(void) /* Close fd. */ close(fd); + exrto_recycle_residual_undo_file("recovery_meta"); ereport(LOG, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("recovery_meta: undo recovery finish."))); #endif } diff --git a/src/gausskernel/storage/access/ustore/undo/knl_uundorecycle.cpp b/src/gausskernel/storage/access/ustore/undo/knl_uundorecycle.cpp index 44debc23e5..b6201db3ea 100755 --- a/src/gausskernel/storage/access/ustore/undo/knl_uundorecycle.cpp +++ b/src/gausskernel/storage/access/ustore/undo/knl_uundorecycle.cpp @@ -370,7 +370,7 @@ bool RecycleUndoSpace(UndoZone *zone, TransactionId recycleXmin, TransactionId f if (undoRecycled) { Assert(TransactionIdIsValid(recycleXid) && (zone->GetRecycleXid() < recycleXid)); zone->LockUndoZone(); - if (!zone->CheckRecycle(startUndoPtr, endUndoPtr)) { + if (!zone->CheckRecycle(startUndoPtr, endUndoPtr, false)) { ereport(PANIC, (errmodule(MOD_UNDO), errmsg(UNDOFORMAT("zone %d recycle start %lu >= recycle end %lu."), zone->GetZoneId(), startUndoPtr, endUndoPtr))); } @@ -536,7 +536,7 @@ void exrto_standby_release_space(UndoZone *zone, TransactionId recycle_xid, Undo UndoRecPtr oldest_end_undo_ptr = end_undo_ptr; Assert(TransactionIdIsValid(recycle_xid) && (zone->get_recycle_xid_exrto() < recycle_xid)); zone->LockUndoZone(); - if (!zone->CheckRecycle(start_undo_ptr, end_undo_ptr)) { + if (!zone->CheckRecycle(start_undo_ptr, end_undo_ptr, true)) { ereport(PANIC, (errmodule(MOD_UNDO), errmsg(UNDOFORMAT("zone %d recycle start %lu >= recycle end %lu."), zone->GetZoneId(), start_undo_ptr, end_undo_ptr))); @@ -549,7 +549,11 @@ void exrto_standby_release_space(UndoZone *zone, TransactionId recycle_xid, Undo } zone->set_discard_urec_ptr_exrto(oldest_end_undo_ptr); } - + + ereport(DEBUG1, (errmodule(MOD_STANDBY_READ), + errmsg("exrto_standby_release_space: zone %d recycle_xid %lu recycle start " + "%lu recycle end %lu recycle_tslot %lu.", + zone->GetZoneId(), recycle_xid, start_undo_ptr, end_undo_ptr, recycle_exrto))); zone->set_recycle_xid_exrto(recycle_xid); zone->set_force_discard_urec_ptr_exrto(end_undo_ptr); zone->set_recycle_tslot_ptr_exrto(recycle_exrto); @@ -644,17 +648,23 @@ bool exrto_standby_recycle_undo_zone() } /* recycle residual_undo_file which may be leftover by exrto read in standby */ -void exrto_recycle_residual_undo_file() +void exrto_recycle_residual_undo_file(char *FuncName) { uint32 idx = 0; uint64 record_file_cnt = 0; uint64 slot_file_cnt = 0; + (void)LWLockAcquire(ExrtoRecycleResidualUndoLock, LW_EXCLUSIVE); if (g_instance.undo_cxt.is_exrto_residual_undo_file_recycled) { + LWLockRelease(ExrtoRecycleResidualUndoLock); + ereport(LOG, (errmodule(MOD_UNDO), + errmsg(UNDOFORMAT("exrto_recycle_residual_undo_file skip, FuncName:%s."), FuncName))); return; } + g_instance.undo_cxt.is_exrto_residual_undo_file_recycled = true; + LWLockRelease(ExrtoRecycleResidualUndoLock); ereport(LOG, (errmodule(MOD_UNDO), - errmsg(UNDOFORMAT("exrto_recycle_residual_undo_file begin uZoneCount is %u."), - g_instance.undo_cxt.uZoneCount))); + errmsg(UNDOFORMAT("exrto_recycle_residual_undo_file begin uZoneCount is %u, FuncName:%s."), + g_instance.undo_cxt.uZoneCount, FuncName))); if (g_instance.undo_cxt.uZoneCount == 0 || g_instance.undo_cxt.uZones == NULL) { g_instance.undo_cxt.is_exrto_residual_undo_file_recycled = true; ereport(LOG, (errmodule(MOD_UNDO), @@ -673,7 +683,6 @@ void exrto_recycle_residual_undo_file() ereport(LOG, (errmodule(MOD_UNDO), errmsg(UNDOFORMAT("exrto_recycle_residual_undo_file release record_file_cnt:%lu, " "slot_file_cnt:%lu."), record_file_cnt, slot_file_cnt))); - g_instance.undo_cxt.is_exrto_residual_undo_file_recycled = true; } void recycle_wait(bool recycled, uint64 *non_recycled) @@ -799,6 +808,7 @@ void UndoRecycleMain() pg_usleep(10000000L); ereport(LOG, (errmodule(MOD_UNDO), errmsg(UNDOFORMAT("sleep 10s, ensure the snapcapturer can give the undorecyclemain a valid recycleXmin.")))); + exrto_recycle_residual_undo_file("recycle_main"); while (true) { if (t_thrd.undorecycler_cxt.got_SIGHUP) { t_thrd.undorecycler_cxt.got_SIGHUP = false; @@ -807,7 +817,6 @@ void UndoRecycleMain() if (t_thrd.undorecycler_cxt.shutdown_requested) { ShutDownRecycle(recycleMaxXIDs); } - exrto_recycle_residual_undo_file(); if (!RecoveryInProgress()) { TransactionId recycleXmin = InvalidTransactionId; TransactionId oldestXmin = GetOldestXminForUndo(&recycleXmin); diff --git a/src/gausskernel/storage/access/ustore/undo/knl_uundozone.cpp b/src/gausskernel/storage/access/ustore/undo/knl_uundozone.cpp index 7ac14bfd4c..6ae2a87005 100644 --- a/src/gausskernel/storage/access/ustore/undo/knl_uundozone.cpp +++ b/src/gausskernel/storage/access/ustore/undo/knl_uundozone.cpp @@ -81,25 +81,28 @@ bool UndoZone::CheckNeedSwitch(UndoRecordSize size) return false; } -bool UndoZone::CheckRecycle(UndoRecPtr starturp, UndoRecPtr endurp) +bool UndoZone::CheckRecycle(UndoRecPtr starturp, UndoRecPtr endurp, bool isexrto) { int startZid = UNDO_PTR_GET_ZONE_ID(starturp); int endZid = UNDO_PTR_GET_ZONE_ID(endurp); UndoLogOffset start = UNDO_PTR_GET_OFFSET(starturp); UndoLogOffset end = UNDO_PTR_GET_OFFSET(endurp); - UndoLogOffset force_discard_urec_ptr; - if (IS_EXRTO_STANDBY_READ) { - force_discard_urec_ptr = force_discard_urec_ptr_exrto; - } else { - force_discard_urec_ptr = forceDiscardURecPtr_; - } - Assert(start == force_discard_urec_ptr); WHITEBOX_TEST_STUB(UNDO_CHECK_RECYCLE_FAILED, WhiteboxDefaultErrorEmit); - - if ((startZid == endZid) && (force_discard_urec_ptr <= insertURecPtr_) && (end <= insertURecPtr_) - && (start < end)) { - return true; + if (isexrto) { + if ((startZid == endZid) && (forceDiscardURecPtr_ <= insertURecPtr_) && (end <= insertURecPtr_) && + (start < end)) { + return true; + } + } else { + if ((startZid == endZid) && (forceDiscardURecPtr_ <= insertURecPtr_) && (end <= insertURecPtr_) && + (start < end) && (start == forceDiscardURecPtr_)) { + return true; + } } + ereport(WARNING, (errmodule(MOD_UNDO), + errmsg(UNDOFORMAT("check_recycle: zone:%d, startZid:%d, endZid:%d, start:%lu, end:%lu, " + "forceDiscardURecPtr_:%lu, insertURecPtr_:%lu."), + zid_, startZid, endZid, start, end, forceDiscardURecPtr_, insertURecPtr_))); return false; } @@ -312,9 +315,14 @@ void UndoZone::ReleaseSpace(UndoRecPtr starturp, UndoRecPtr endurp, int *forceRe /* Release undo space from starturp to endurp and advance discard. */ uint64 UndoZone::release_residual_record_space() { - undoSpace_.LockSpace(); UndoLogOffset unlink_start = undoSpace_.find_oldest_offset(zid_, UNDO_DB_OID); UndoLogOffset unlink_end = undoSpace_.Head(); + uint64 start_segno = unlink_start / UNDO_LOG_SEGMENT_SIZE; + uint64 end_segno = unlink_end / UNDO_LOG_SEGMENT_SIZE; + ereport(DEBUG1, (errmodule(MOD_STANDBY_READ), + errmsg("release_residual_record_space start_segno:%lu end_segno:%lu.", start_segno, end_segno))); + ForgetUndoBuffer(start_segno * UNDO_LOG_SEGMENT_SIZE, end_segno * UNDO_LOG_SEGMENT_SIZE, UNDO_DB_OID); + undoSpace_.LockSpace(); undoSpace_.unlink_residual_log(zid_, unlink_start, unlink_end, UNDO_DB_OID); undoSpace_.UnlockSpace(); if (unlink_start > unlink_end) { @@ -367,9 +375,12 @@ void UndoZone::ReleaseSlotSpace(UndoRecPtr startSlotPtr, UndoRecPtr endSlotPtr, /* Release slot space from starturp to endurp and advance discard. */ uint64 UndoZone::release_residual_slot_space() { - slotSpace_.LockSpace(); UndoLogOffset unlink_start = slotSpace_.find_oldest_offset(zid_, UNDO_SLOT_DB_OID); UndoLogOffset unlink_end = slotSpace_.Head(); + uint64 start_segno = unlink_start / UNDO_LOG_SEGMENT_SIZE; + uint64 end_segno = unlink_end / UNDO_LOG_SEGMENT_SIZE; + ForgetUndoBuffer(start_segno * UNDO_LOG_SEGMENT_SIZE, end_segno * UNDO_LOG_SEGMENT_SIZE, UNDO_SLOT_DB_OID); + slotSpace_.LockSpace(); slotSpace_.unlink_residual_log(zid_, unlink_start, unlink_end, UNDO_SLOT_DB_OID); slotSpace_.UnlockSpace(); if (unlink_start > unlink_end) { diff --git a/src/gausskernel/storage/buffer/bufmgr.cpp b/src/gausskernel/storage/buffer/bufmgr.cpp index c52978bda4..a84d2bcb81 100644 --- a/src/gausskernel/storage/buffer/bufmgr.cpp +++ b/src/gausskernel/storage/buffer/bufmgr.cpp @@ -1628,6 +1628,47 @@ Buffer ReadBuffer(Relation reln, BlockNumber block_num) return ReadBufferExtended(reln, MAIN_FORKNUM, block_num, RBM_NORMAL, NULL); } +Buffer buffer_read_extended_internal(Relation reln, ForkNumber fork_num, BlockNumber block_num, ReadBufferMode mode, + BufferAccessStrategy strategy) +{ + bool hit = false; + Buffer buf; + + if (block_num == P_NEW) { + STORAGE_SPACE_OPERATION(reln, BLCKSZ); + } + + /* Open it at the smgr level */ + RelationOpenSmgr(reln); + + /* + * * Test for a temporary relation that belongs to some other session. + */ + if (RELATION_IS_OTHER_TEMP(reln) && fork_num <= INIT_FORKNUM) + /* + * We would be likely to get wrong data since we have no visibility into the owning session's local buffers. + */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary tables of other sessions"))); + + /* + * Read the buffer. + */ + pgstat_count_buffer_read(reln); + pgstatCountBlocksFetched4SessionLevel(); + + if (RelationisEncryptEnable(reln)) { + reln->rd_smgr->encrypt = true; + } + buf = + ReadBuffer_common(reln->rd_smgr, reln->rd_rel->relpersistence, fork_num, block_num, mode, strategy, &hit, NULL); + if (hit) { + /* Update pgstat counters to reflect a cache hit */ + pgstat_count_buffer_hit(reln); + } + return buf; +} + /* * ReadBufferExtended -- returns a buffer containing the requested * block of the requested relation. If the blknum @@ -1673,45 +1714,12 @@ Buffer ReadBuffer(Relation reln, BlockNumber block_num) Buffer ReadBufferExtended(Relation reln, ForkNumber fork_num, BlockNumber block_num, ReadBufferMode mode, BufferAccessStrategy strategy) { - if (IsDefaultExtremeRtoMode() && IsExtremeRtoRunning() && !AmPageRedoWorker()) { + if (IsDefaultExtremeRtoMode() && + (!RecoveryInProgress() || !IsExtremeRtoRunning() || !is_exrto_standby_read_worker())) { + return buffer_read_extended_internal(reln, fork_num, block_num, mode, strategy); + } else { return standby_read_buf(reln, fork_num, block_num, mode, strategy); } - - bool hit = false; - Buffer buf; - - if (block_num == P_NEW) { - STORAGE_SPACE_OPERATION(reln, BLCKSZ); - } - - /* Open it at the smgr level if not already done */ - RelationOpenSmgr(reln); - - /* - * Reject attempts to read non-local temporary relations; we would be - * likely to get wrong data since we have no visibility into the owning - * session's local buffers. - */ - if (RELATION_IS_OTHER_TEMP(reln) && fork_num <= INIT_FORKNUM) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary tables of other sessions"))); - - /* - * Read the buffer, and update pgstat counters to reflect a cache hit or - * miss. - */ - pgstat_count_buffer_read(reln); - pgstatCountBlocksFetched4SessionLevel(); - - if (RelationisEncryptEnable(reln)) { - reln->rd_smgr->encrypt = true; - } - buf = ReadBuffer_common(reln->rd_smgr, reln->rd_rel->relpersistence, fork_num, - block_num, mode, strategy, &hit, NULL); - if (hit) { - pgstat_count_buffer_hit(reln); - } - return buf; } /* @@ -5424,31 +5432,6 @@ void DropDatabaseBuffers(Oid dbid) gstrace_exit(GS_TRC_ID_DropDatabaseBuffers); } -void buffer_drop_exrto_standby_read_buffers() -{ - int i = 0; - ereport(LOG, (errmsg("buffer_drop_exrto_standby_read_buffers: start to drop buffers."))); - while (i < TOTAL_BUFFER_NUM) { - BufferDesc *buf_desc = GetBufferDescriptor(i); - uint32 buf_state; - /* - * Some safe unlocked checks can be done to reduce the number of cycle. - */ - if (!IS_EXRTO_RELFILENODE(buf_desc->tag.rnode)) { - i++; - continue; - } - - buf_state = LockBufHdr(buf_desc); - if (IS_EXRTO_RELFILENODE(buf_desc->tag.rnode)) { - InvalidateBuffer(buf_desc); /* with buffer head lock released */ - } else { - UnlockBufHdr(buf_desc, buf_state); - } - i++; - } -} - /* ----------------------------------------------------------------- * PrintBufferDescs * @@ -5718,11 +5701,6 @@ void MarkBufferDirtyHint(Buffer buffer, bool buffer_std) Assert(GetPrivateRefCount(buffer) > 0); - // temp buf just for old page version, could not write to disk - if (pg_atomic_read_u32(&buf_desc->state) & BM_IS_TMP_BUF) { - return; - } - /* here, either share or exclusive lock is OK */ if (!LWLockHeldByMe(buf_desc->content_lock)) ereport(PANIC, (errcode(ERRCODE_INVALID_BUFFER), @@ -5744,6 +5722,11 @@ void MarkBufferDirtyHint(Buffer buffer, bool buffer_std) bool delayChkpt = false; uint32 buf_state; uint32 old_buf_state; + buf_state = pg_atomic_read_u32(&buf_desc->state); + // temp buf just for old page version, could not write to disk + if (IS_EXRTO_READ && (buf_state & BM_IS_TMP_BUF)) { + return; + } /* * If we need to protect hint bit updates from torn writes, WAL-log a @@ -5756,9 +5739,8 @@ void MarkBufferDirtyHint(Buffer buffer, bool buffer_std) * The incremental checkpoint is protected by the doublewriter, the * half-write problem does not occur. */ - bool need_write_wal = - (!ENABLE_INCRE_CKPT && XLogHintBitIsNeeded() && (pg_atomic_read_u32(&buf_desc->state) & BM_PERMANENT)); - if (need_write_wal) { + if (unlikely(!ENABLE_INCRE_CKPT && XLogHintBitIsNeeded() && + (pg_atomic_read_u32(&buf_desc->state) & BM_PERMANENT))) { /* * If we're in recovery we cannot dirty a page because of a hint. * We can set the hint, just not dirty the page as a result so the @@ -6197,10 +6179,16 @@ void LockBufferForCleanup(Buffer buffer) */ bool HoldingBufferPinThatDelaysRecovery(void) { - uint32 bufLen = parallel_recovery::GetStartupBufferPinWaitBufLen(); + if (IS_EXRTO_READ) { + return false; + } + SpinLockAcquire(&(g_instance.comm_cxt.predo_cxt.destroy_lock)); int bufids[MAX_RECOVERY_THREAD_NUM + 1]; + errno_t rc = memset_s(bufids, sizeof(bufids), -1, sizeof(bufids)); + securec_check(rc, "\0", "\0"); + uint32 bufLen = parallel_recovery::GetStartupBufferPinWaitBufLen(); parallel_recovery::GetStartupBufferPinWaitBufId(bufids, bufLen); - + SpinLockRelease(&(g_instance.comm_cxt.predo_cxt.destroy_lock)); for (uint32 i = 0; i < bufLen; i++) { /* diff --git a/src/gausskernel/storage/ipc/procarray.cpp b/src/gausskernel/storage/ipc/procarray.cpp index b1bd477c9a..ddcbd8f702 100755 --- a/src/gausskernel/storage/ipc/procarray.cpp +++ b/src/gausskernel/storage/ipc/procarray.cpp @@ -543,6 +543,7 @@ void ProcArrayEndTransaction(PGPROC* proc, TransactionId latestXid, bool isCommi proc->snapXmax = InvalidTransactionId; proc->snapCSN = InvalidCommitSeqNo; proc->exrto_read_lsn = 0; + proc->exrto_min = 0; proc->exrto_gen_snap_time = 0; pgxact->csn_min = InvalidCommitSeqNo; pgxact->csn_dr = InvalidCommitSeqNo; @@ -589,6 +590,7 @@ static inline void ProcArrayEndTransactionInternal(PGPROC* proc, PGXACT* pgxact, proc->snapXmax = InvalidTransactionId; proc->snapCSN = InvalidCommitSeqNo; proc->exrto_read_lsn = 0; + proc->exrto_min = 0; proc->exrto_gen_snap_time = 0; pgxact->csn_min = InvalidCommitSeqNo; pgxact->csn_dr = InvalidCommitSeqNo; @@ -819,6 +821,8 @@ void ProcArrayClearTransaction(PGPROC* proc) pgxact->xmin = InvalidTransactionId; proc->snapXmax = InvalidTransactionId; proc->snapCSN = InvalidCommitSeqNo; + proc->exrto_read_lsn = 0; + proc->exrto_gen_snap_time = 0; pgxact->csn_min = InvalidCommitSeqNo; pgxact->csn_dr = InvalidCommitSeqNo; proc->recoveryConflictPending = false; @@ -832,8 +836,7 @@ void ProcArrayClearTransaction(PGPROC* proc) /* Clear the subtransaction-XID cache too */ pgxact->nxids = 0; - proc->exrto_read_lsn = 0; - proc->exrto_gen_snap_time = 0; + proc->exrto_min = 0; /* Free xid cache memory if needed */ ResetProcXidCache(proc, true); } @@ -2111,8 +2114,6 @@ RETRY: bool retry_get = false; uint64 retry_count = 0; const static uint64 WAIT_COUNT = 0x7FFFF; - /* reset xmin before acquiring lwlock, in case blocking redo */ - t_thrd.pgxact->xmin = InvalidTransactionId; RETRY_GET: if (snapshot->takenDuringRecovery && !StreamThreadAmI() && !IS_EXRTO_READ && !u_sess->proc_cxt.clientIsCMAgent) { @@ -2433,15 +2434,40 @@ GROUP_GET_SNAPSHOT: if (snapshot->takenDuringRecovery) { (void)pgstat_report_waitstatus(oldStatus); - } - - if (IsDefaultExtremeRtoMode() && IsExtremeRtoRunning() && pmState == PM_HOT_STANDBY) { - extreme_rto::exrto_read_snapshot(snapshot); + if (IsDefaultExtremeRtoMode() && IS_EXRTO_STANDBY_READ) { + extreme_rto::exrto_read_snapshot(snapshot); + if (t_thrd.proc->exrto_reload_cache) { + t_thrd.proc->exrto_reload_cache = false; + reset_invalidation_cache(); + } + AcceptInvalidationMessages(); + } } return snapshot; } +void exrto_get_snapshot_data(TransactionId &xmin, TransactionId &xmax, CommitSeqNo &snapshot_csn) +{ + LWLockAcquire(ProcArrayLock, LW_SHARED); + + /* xmax is always latest_completed_xid + 1 */ + xmax = t_thrd.xact_cxt.ShmemVariableCache->latestCompletedXid; + + Assert(TransactionIdIsNormal(xmax)); + TransactionIdAdvance(xmax); + /* initialize xmin calculation with xmax */ + xmin = xmax; + if (TransactionIdIsValid(t_thrd.xact_cxt.ShmemVariableCache->standbyXmin)) { + if (TransactionIdPrecedes(t_thrd.xact_cxt.ShmemVariableCache->standbyXmin, xmin)) { + xmin = t_thrd.xact_cxt.ShmemVariableCache->standbyXmin; + } + } + + LWLockRelease(ProcArrayLock); + snapshot_csn = pg_atomic_read_u64(&t_thrd.xact_cxt.ShmemVariableCache->nextCommitSeqNo); +} + /* * ProcArrayInstallImportedXmin -- install imported xmin into MyPgXact->xmin * @@ -3220,10 +3246,10 @@ bool proc_array_cancel_conflicting_proc(TransactionId latest_removed_xid, bool r int pg_proc_no = proc_array->pgprocnos[index]; PGPROC* pg_proc = g_instance.proc_base_all_procs[pg_proc_no]; PGXACT* pg_xact = &g_instance.proc_base_all_xacts[pg_proc_no]; - XLogRecPtr read_lsn = pg_proc->exrto_read_lsn; + XLogRecPtr read_lsn = pg_proc->exrto_min; TransactionId pxmin = pg_xact->xmin; - if (pg_proc->pid == 0 || !TransactionIdIsValid(pxmin) || XLogRecPtrIsInvalid(read_lsn)) { + if (pg_proc->pid == 0 || XLogRecPtrIsInvalid(read_lsn)) { continue; } @@ -3250,6 +3276,10 @@ bool proc_array_cancel_conflicting_proc(TransactionId latest_removed_xid, bool r * Wait a little bit for it to die so that we avoid flooding * an unresponsive backend when system is heavily loaded. */ + ereport(LOG, + (errmsg(EXRTOFORMAT("exrto_gen_snap_time: %ld, current_timestamp: %ld, cancel thread while " + "redo truncate, thread id = %lu\n"), + pg_proc->exrto_gen_snap_time, GetCurrentTimestamp(), pg_proc->pid))); pg_usleep(5000L); } } @@ -4457,11 +4487,11 @@ TransactionId GetGlobal2pcXmin() * Wait for the transaction which modify the tuple to finish. * First release the buffer lock. After waiting, re-acquire the buffer lock. */ -void SyncWaitXidEnd(TransactionId xid, Buffer buffer) +void SyncWaitXidEnd(TransactionId xid, Buffer buffer, const Snapshot snapshot) { if (!BufferIsValid(buffer)) { /* Wait local transaction finish */ - SyncLocalXidWait(xid); + SyncLocalXidWait(xid, snapshot); return; } @@ -4473,7 +4503,7 @@ void SyncWaitXidEnd(TransactionId xid, Buffer buffer) /* Release buffer lock */ LockBuffer(buffer, BUFFER_LOCK_UNLOCK); /* Wait local transaction finish */ - SyncLocalXidWait(xid); + SyncLocalXidWait(xid, snapshot); /* Re-acqure buffer lock, need transform lwlock mode to buffer lock mode */ LockBuffer(buffer, mode == LW_EXCLUSIVE ? BUFFER_LOCK_EXCLUSIVE : BUFFER_LOCK_SHARE); } @@ -4482,7 +4512,7 @@ void SyncWaitXidEnd(TransactionId xid, Buffer buffer) /* * Wait local transaction finish, if transaction wait time exceed transaction_sync_naptime, call gs_clean. */ -void SyncLocalXidWait(TransactionId xid) +void SyncLocalXidWait(TransactionId xid, const Snapshot snapshot) { ReleaseAllGSCRdConcurrentLock(); @@ -4492,7 +4522,7 @@ void SyncLocalXidWait(TransactionId xid) WaitState oldStatus = pgstat_report_waitstatus(STATE_WAIT_UNDEFINED, true); gstrace_entry(GS_TRC_ID_SyncLocalXidWait); - while (!ConditionalXactLockTableWait(xid)) { + while (!ConditionalXactLockTableWait(xid, snapshot)) { /* type of transaction id is same as node id, reuse the second param for waited transaction id */ pgstat_report_waitstatus_xid(STATE_WAIT_XACTSYNC, xid); diff --git a/src/gausskernel/storage/ipc/sinval.cpp b/src/gausskernel/storage/ipc/sinval.cpp index 5954ffc5eb..0f82dde70c 100644 --- a/src/gausskernel/storage/ipc/sinval.cpp +++ b/src/gausskernel/storage/ipc/sinval.cpp @@ -74,7 +74,7 @@ void GlobalExecuteSharedInvalidMessages(const SharedInvalidationMessage* msgs, i * SendSharedInvalidMessages * Add shared-cache-invalidation message(s) to the global SI message queue. */ -void SendSharedInvalidMessages(const SharedInvalidationMessage* msgs, int n) +void send_shared_invalid_messages(const SharedInvalidationMessage* msgs, int n, XLogRecPtr lsn) { if (ENABLE_DMS && SS_PRIMARY_MODE && !RecoveryInProgress()) { SSSendSharedInvalidMessages(msgs, n); @@ -84,12 +84,17 @@ void SendSharedInvalidMessages(const SharedInvalidationMessage* msgs, int n) if (EnableGlobalSysCache()) { GlobalInvalidSharedInvalidMessages(msgs, n, true); } - SIInsertDataEntries(msgs, n); + SIInsertDataEntries(msgs, n, lsn); if (ENABLE_GPC && g_instance.plan_cache != NULL) { g_instance.plan_cache->InvalMsg(msgs, n); } } +void SendSharedInvalidMessages(const SharedInvalidationMessage* msgs, int n) +{ + send_shared_invalid_messages(msgs, n, 0); +} + static bool SkipRedundantInvalMsg(SharedInvalidationMessage *msg) { if (msg->id != SHAREDINVALRELCACHE_ID || unlikely(u_sess->proc_cxt.MyDatabaseId == InvalidOid) || @@ -171,6 +176,7 @@ void ReceiveSharedInvalidMessages(void (*invalFunction)(SharedInvalidationMessag ereport(DEBUG4, (errmsg("cache state reset"))); inval_cxt->SIMCounter++; resetFunction(); + t_thrd.proc->exrto_reload_cache = true; break; /* nothing more to do */ } diff --git a/src/gausskernel/storage/ipc/sinvaladt.cpp b/src/gausskernel/storage/ipc/sinvaladt.cpp index 630a3155d1..490f582c46 100644 --- a/src/gausskernel/storage/ipc/sinvaladt.cpp +++ b/src/gausskernel/storage/ipc/sinvaladt.cpp @@ -179,7 +179,7 @@ typedef struct SISeg { /* * Circular buffer holding shared-inval messages */ - SharedInvalidationMessage buffer[MAXNUMMESSAGES]; + SharedInvalidationMessageEx buffer[MAXNUMMESSAGES]; /* * Per-backend state info. @@ -546,7 +546,7 @@ PGPROC* BackendIdGetProc(int backendID) * SIInsertDataEntries * Add new invalidation message(s) to the buffer. */ -void SIInsertDataEntries(const SharedInvalidationMessage* data, int n) +void SIInsertDataEntries(const SharedInvalidationMessage* data, int n, XLogRecPtr lsn) { SISeg* segP = t_thrd.shemem_ptr_cxt.shmInvalBuffer; @@ -592,7 +592,9 @@ void SIInsertDataEntries(const SharedInvalidationMessage* data, int n) max = segP->maxMsgNum; while (nthistime-- > 0) { - segP->buffer[max % MAXNUMMESSAGES] = *data++; + int index = max % MAXNUMMESSAGES; + segP->buffer[index].msg = *data++; + segP->buffer[index].lsn = lsn; max++; } @@ -735,9 +737,23 @@ int SIGetDataEntries(SharedInvalidationMessage* data, int datasize, bool workses * from the queue. */ n = 0; + + XLogRecPtr read_lsn = InvalidXLogRecPtr; + if (u_sess->utils_cxt.CurrentSnapshot != NULL && + XLogRecPtrIsValid(u_sess->utils_cxt.CurrentSnapshot->read_lsn)) { + read_lsn = u_sess->utils_cxt.CurrentSnapshot->read_lsn; + } else if (XLogRecPtrIsValid(t_thrd.proc->exrto_read_lsn)) { + read_lsn = t_thrd.proc->exrto_read_lsn; + } while (n < datasize && stateP->nextMsgNum < max) { - data[n++] = segP->buffer[stateP->nextMsgNum % MAXNUMMESSAGES]; + int index = stateP->nextMsgNum % MAXNUMMESSAGES; + if (read_lsn != InvalidXLogRecPtr && segP->buffer[index].lsn != InvalidXLogRecPtr) { + if (XLByteLT(read_lsn, segP->buffer[index].lsn)) { + break; + } + } + data[n++] = segP->buffer[index].msg; stateP->nextMsgNum++; } diff --git a/src/gausskernel/storage/lmgr/lmgr.cpp b/src/gausskernel/storage/lmgr/lmgr.cpp index 3154d85bf7..00570fb5ab 100755 --- a/src/gausskernel/storage/lmgr/lmgr.cpp +++ b/src/gausskernel/storage/lmgr/lmgr.cpp @@ -627,14 +627,18 @@ void XactLockTableWait(TransactionId xid, bool allow_con_update, int waitSec) * As above, but only lock if we can get the lock without blocking. * Returns TRUE if the lock was acquired. */ -bool ConditionalXactLockTableWait(TransactionId xid, bool waitparent, bool bcareNextXid) +bool ConditionalXactLockTableWait(TransactionId xid, const Snapshot snapshot, bool waitparent, bool bcareNextXid) { LOCKTAG tag; CLogXidStatus status = CLOG_XID_STATUS_IN_PROGRESS; + bool takenDuringRecovery = false; + if (snapshot != NULL) { + takenDuringRecovery = snapshot->takenDuringRecovery; + } for (;;) { Assert(!TransactionIdEquals(xid, GetTopTransactionIdIfAny()) || status == CLOG_XID_STATUS_COMMITTED || - status == CLOG_XID_STATUS_ABORTED); + status == CLOG_XID_STATUS_ABORTED || takenDuringRecovery); if (!TransactionIdIsValid(xid)) break; diff --git a/src/gausskernel/storage/lmgr/lwlocknames.txt b/src/gausskernel/storage/lmgr/lwlocknames.txt index 14a5191734..06c70bce86 100755 --- a/src/gausskernel/storage/lmgr/lwlocknames.txt +++ b/src/gausskernel/storage/lmgr/lwlocknames.txt @@ -141,3 +141,6 @@ AboCacheLock 131 OndemandXLogMemAllocLock 132 OndemandXLogFileHandleLock 133 ExrtoSnapshotLock 134 +RedoTruncateLock 135 + +ExrtoRecycleResidualUndoLock 137 \ No newline at end of file diff --git a/src/gausskernel/storage/lmgr/proc.cpp b/src/gausskernel/storage/lmgr/proc.cpp index 29ee4bf0b2..a4d2050fae 100755 --- a/src/gausskernel/storage/lmgr/proc.cpp +++ b/src/gausskernel/storage/lmgr/proc.cpp @@ -843,6 +843,9 @@ void InitProcess(void) t_thrd.pgxact->xmin = InvalidTransactionId; t_thrd.proc->snapXmax = InvalidTransactionId; t_thrd.proc->snapCSN = InvalidCommitSeqNo; + t_thrd.proc->exrto_read_lsn = 0; + t_thrd.proc->exrto_min = 0; + t_thrd.proc->exrto_gen_snap_time = 0; t_thrd.pgxact->csn_min = InvalidCommitSeqNo; t_thrd.pgxact->csn_dr = InvalidCommitSeqNo; t_thrd.pgxact->prepare_xid = InvalidTransactionId; @@ -950,8 +953,6 @@ void InitProcess(void) t_thrd.proc->snap_refcnt_bitmap = 0; #endif - t_thrd.proc->exrto_read_lsn = 0; - t_thrd.proc->exrto_gen_snap_time = 0; /* Check that group locking fields are in a proper initial state. */ Assert(t_thrd.proc->lockGroupLeader == NULL); Assert(dlist_is_empty(&t_thrd.proc->lockGroupMembers)); @@ -1112,7 +1113,9 @@ void InitAuxiliaryProcess(void) t_thrd.proc->snapXmax = InvalidTransactionId; t_thrd.proc->snapCSN = InvalidCommitSeqNo; t_thrd.proc->exrto_read_lsn = 0; + t_thrd.proc->exrto_min = 0; t_thrd.proc->exrto_gen_snap_time = 0; + t_thrd.proc->exrto_reload_cache = true; t_thrd.pgxact->csn_min = InvalidCommitSeqNo; t_thrd.pgxact->csn_dr = InvalidCommitSeqNo; t_thrd.proc->backendId = InvalidBackendId; diff --git a/src/gausskernel/storage/page/gs_xlogdump.cpp b/src/gausskernel/storage/page/gs_xlogdump.cpp index f76b087ae6..852d6be78a 100644 --- a/src/gausskernel/storage/page/gs_xlogdump.cpp +++ b/src/gausskernel/storage/page/gs_xlogdump.cpp @@ -107,7 +107,7 @@ static XLogRecPtr GetMaxLSN() return current_recptr; } -static void XLogDumpDisplayRecord(XLogReaderState *record, char *strOutput) +void XLogDumpDisplayRecord(XLogReaderState *record, char *strOutput) { errno_t rc = snprintf_s(strOutput + (int)strlen(strOutput), MAXOUTPUTLEN, MAXOUTPUTLEN - 1, "start_lsn: %X/%X \nend_lsn: %X/%X \nxid: " XID_FMT " \nterm: %u \ntotal length: %u \ndesc: %s - ", @@ -209,7 +209,7 @@ static bool CheckValidRecord(XLogReaderState *xlogreader_state, XLogFilter *filt return found; } -static XLogRecPtr UpdateNextLSN(XLogRecPtr cur_lsn, XLogRecPtr end_lsn, XLogReaderState *xlogreader_state, bool *found) +XLogRecPtr UpdateNextLSN(XLogRecPtr cur_lsn, XLogRecPtr end_lsn, XLogReaderState *xlogreader_state, bool *found) { XLogRecPtr next_record = InvalidXLogRecPtr; for (int tryTimes = 0; tryTimes < FIVE; tryTimes++) { diff --git a/src/gausskernel/storage/replication/basebackup.cpp b/src/gausskernel/storage/replication/basebackup.cpp index e8523cfc6f..9432b444fd 100755 --- a/src/gausskernel/storage/replication/basebackup.cpp +++ b/src/gausskernel/storage/replication/basebackup.cpp @@ -1780,6 +1780,24 @@ static bool check_data_filename(char *filename, int *segNo) token = strtok_r(filename, "_", &tmptoken); if ('\0' == tmptoken[0]) { + uint dot_count = 0; + int filename_idx = static_cast(strlen(filename) - 1); + // check the last word must be num + if (isdigit(filename[filename_idx]) == false) { + *segNo = 0; + return false; + } + while (filename_idx >= 0) { + if (filename[filename_idx] == '.') { + dot_count++; + } + /* if the char is not num/'.' or dot_count > 1, then break */ + if ((isdigit(filename[filename_idx]) == false && filename[filename_idx] != '.') || dot_count > 1) { + *segNo = 0; + return false; + } + filename_idx--; + } /* MAIN_FORK */ nmatch = sscanf_s(filename, "%u.%d", &relNode, segNo); return (nmatch == 1 || nmatch == 2); diff --git a/src/gausskernel/storage/replication/heartbeat/libpq/fe-connect.cpp b/src/gausskernel/storage/replication/heartbeat/libpq/fe-connect.cpp index e25e12941d..2c06069b7e 100644 --- a/src/gausskernel/storage/replication/heartbeat/libpq/fe-connect.cpp +++ b/src/gausskernel/storage/replication/heartbeat/libpq/fe-connect.cpp @@ -34,12 +34,14 @@ #define SOCK_ERRNO errno #define SOCK_ERRNO_SET(e) (errno = (e)) +#ifndef FREE_AND_RESET #define FREE_AND_RESET(ptr) do { \ if (NULL != (ptr)) { \ free(ptr); \ (ptr) = NULL; \ } \ } while (0) +#endif namespace PureLibpq { typedef struct PQconninfoOption { diff --git a/src/gausskernel/storage/replication/slot.cpp b/src/gausskernel/storage/replication/slot.cpp index 5879299950..7eae7497a0 100644 --- a/src/gausskernel/storage/replication/slot.cpp +++ b/src/gausskernel/storage/replication/slot.cpp @@ -692,6 +692,7 @@ void ReplicationSlotRelease(void) t_thrd.pgxact->xmin = InvalidTransactionId; t_thrd.pgxact->vacuumFlags &= ~PROC_IN_LOGICAL_DECODING; t_thrd.proc->exrto_read_lsn = 0; + t_thrd.proc->exrto_min = 0; t_thrd.proc->exrto_gen_snap_time = 0; LWLockRelease(ProcArrayLock); } diff --git a/src/gausskernel/storage/replication/walreceiver.cpp b/src/gausskernel/storage/replication/walreceiver.cpp index ba49b9ae1f..b3a7009bfe 100755 --- a/src/gausskernel/storage/replication/walreceiver.cpp +++ b/src/gausskernel/storage/replication/walreceiver.cpp @@ -1700,6 +1700,7 @@ static void XLogWalRcvSendHSFeedback(void) xmin = InvalidTransactionId; t_thrd.pgxact->xmin = InvalidTransactionId; t_thrd.proc->exrto_read_lsn = 0; + t_thrd.proc->exrto_min = 0; t_thrd.proc->exrto_gen_snap_time = 0; /* * Always send feedback message. diff --git a/src/gausskernel/storage/replication/walsender.cpp b/src/gausskernel/storage/replication/walsender.cpp index f15b692f66..738d136e46 100755 --- a/src/gausskernel/storage/replication/walsender.cpp +++ b/src/gausskernel/storage/replication/walsender.cpp @@ -2946,6 +2946,7 @@ static void PhysicalReplicationSlotNewXmin(TransactionId feedbackXmin) SpinLockAcquire(&slot->mutex); t_thrd.pgxact->xmin = InvalidTransactionId; t_thrd.proc->exrto_read_lsn = 0; + t_thrd.proc->exrto_min = 0; t_thrd.proc->exrto_gen_snap_time = 0; /* * For physical replication we don't need the the interlock provided diff --git a/src/gausskernel/storage/smgr/md.cpp b/src/gausskernel/storage/smgr/md.cpp index a7e378ee5c..0826059091 100644 --- a/src/gausskernel/storage/smgr/md.cpp +++ b/src/gausskernel/storage/smgr/md.cpp @@ -25,6 +25,7 @@ #include "miscadmin.h" #include "access/transam.h" #include "access/xlog.h" +#include "access/extreme_rto/standby_read/standby_read_base.h" #include "catalog/catalog.h" #include "portability/instr_time.h" #include "postmaster/bgwriter.h" @@ -1307,7 +1308,7 @@ SMGR_READ_STATUS mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber block } else { check_file_stat(FilePathName(v->mdfd_vfd)); force_backtrace_messages = true; - + extreme_rto_standby_read::dump_error_all_info(reln->smgr_rnode.node, forknum, blocknum); ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("could not read block %u in file \"%s\": read only %d of %d bytes", blocknum, FilePathName(v->mdfd_vfd), nbytes, BLCKSZ))); diff --git a/src/gausskernel/storage/smgr/storage_exrto_file.cpp b/src/gausskernel/storage/smgr/storage_exrto_file.cpp index a3234cee4f..46ee9e494e 100644 --- a/src/gausskernel/storage/smgr/storage_exrto_file.cpp +++ b/src/gausskernel/storage/smgr/storage_exrto_file.cpp @@ -47,32 +47,12 @@ typedef struct _ExRTOFileState { File file[EXRTO_FORK_NUM]; } ExRTOFileState; -static inline ExRTOFileType exrto_file_type(uint32 space_oid) -{ - if (space_oid == EXRTO_BASE_PAGE_SPACE_OID) { - return BASE_PAGE; - } else if (space_oid == EXRTO_LSN_INFO_SPACE_OID) { - return LSN_INFO_META; - } else { - return BLOCK_INFO_META; - } -} - static inline void set_file_state(ExRTOFileState *state, ForkNumber forknum, uint64 segno, File file) { state->segno[forknum] = segno; state->file[forknum] = file; } -static inline uint64 get_total_block_num(ExRTOFileType type, uint32 high, uint32 low) -{ - if (type == BASE_PAGE || type == LSN_INFO_META) { - return ((uint64)high << UINT64_HALF) | low; - } else { - return (uint64)low; - } -} - static ExRTOFileState *alloc_file_state(void) { MemoryContext current; @@ -100,7 +80,7 @@ static void exrto_get_file_path(const RelFileNode node, ForkNumber forknum, uint if (type == BASE_PAGE || type == LSN_INFO_META) { uint32 batch_id = node.dbNode >> LOW_WORKERID_BITS; uint32 worker_id = node.dbNode & LOW_WORKERID_MASK; - rc = snprintf_s(filename, EXRTO_FILE_PATH_LEN, EXRTO_FILE_PATH_LEN - 1, "%02X%02X%016X", + rc = snprintf_s(filename, EXRTO_FILE_PATH_LEN, EXRTO_FILE_PATH_LEN - 1, "%02x%02x%016lX", batch_id, worker_id, segno); } else { rc = snprintf_s(filename, EXRTO_FILE_PATH_LEN, EXRTO_FILE_PATH_LEN - 1, "%u_%u_%s.%u", @@ -201,16 +181,18 @@ static ExRTOFileState *exrto_open_file(SMgrRelation reln, ForkNumber forknum, Bl return state; } -BlockNumber get_single_file_nblocks(SMgrRelation reln, ForkNumber forknum, const ExRTOFileState*state) +BlockNumber get_single_file_nblocks(SMgrRelation reln, ForkNumber forknum, const ExRTOFileState *state) { Assert(state != NULL); char *filename = FilePathName(state->file[forknum]); off_t len = FileSeek(state->file[forknum], 0L, SEEK_END); if (len < 0) { + char filepath[EXRTO_FILE_PATH_LEN]; + errno_t rc = strcpy_s(filepath, EXRTO_FILE_PATH_LEN, filename); + securec_check(rc, "\0", "\0"); exrto_close(reln, forknum, InvalidBlockNumber); - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not seek to end of file \"%s\": %m", filename))); + ereport(ERROR, (errcode_for_file_access(), errmsg("could not seek to end of file \"%s\": %m", filepath))); } /* note that this calculation will ignore any partial block at EOF */ @@ -235,13 +217,20 @@ void exrto_close(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) if (state == NULL) { return; } - reln->fileState = NULL; /* prevent dangling pointer after error */ - + /* if not closed already */ if (state->file[forknum] >= 0) { FileClose(state->file[forknum]); + state->file[forknum] = -1; } + for (int forkno = 0; forkno < EXRTO_FORK_NUM; forkno++) { + if (state->file[forkno] != -1) { + return; + } + } + pfree(state); + reln->fileState = NULL; } bool exrto_exists(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) @@ -260,60 +249,97 @@ bool exrto_exists(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) return isExist; } -bool exrto_unlink_single_file(const RelFileNodeBackend &rnode, ForkNumber forknum, uint64 segno) +void exrto_unlink_file_with_prefix(char* target_prefix, ExRTOFileType type, uint64 segno) { - struct stat stat_buf; - char segpath[EXRTO_FILE_PATH_LEN]; - - exrto_get_file_path(rnode.node, forknum, segno, segpath); - if (stat(segpath, &stat_buf) < 0) { - if (errno != ENOENT) { - ereport(WARNING, (errcode_for_file_access(), - errmsg("could not stat file \"%s\" before removing: %m", segpath))); - } - return false; + char pathbuf[EXRTO_FILE_PATH_LEN]; + char **filenames; + char **filename; + struct stat statbuf; + /* get file directory */ + char exrto_block_info_dir[EXRTO_FILE_PATH_LEN] = {0}; + int rc = snprintf_s(exrto_block_info_dir, EXRTO_FILE_PATH_LEN, EXRTO_FILE_PATH_LEN - 1, "%s/%s", EXRTO_FILE_DIR, + EXRTO_FILE_SUB_DIR[type]); + securec_check_ss(rc, "", ""); + /* get all files' name from block meta file directory */ + filenames = pgfnames(exrto_block_info_dir); + if (filenames == NULL) { + return; } - if (unlink(segpath) < 0) { - ereport(WARNING, (errcode_for_file_access(), - errmsg("could not remove file \"%s\": %m", segpath))); + + /* use the prefix name to match up files we want to delete */ + size_t prefix_len = strlen(target_prefix); + for (filename = filenames; *filename != NULL; filename++) { + char *fname = *filename; + size_t fname_len = strlen(fname); + /* + * the length of prefix is less than the length of file name and must be the same under the same prefix_len + */ + if (prefix_len >= fname_len || strncmp(target_prefix, fname, prefix_len) != 0) { + continue; + } + if (segno > 0) { + uint32 batch_id, worker_id; + uint64 f_segno; + const int para_num = 3; + if (sscanf_s(fname, "%02X%02X%016lX", &batch_id, &worker_id, &f_segno) != para_num) { + continue; + } + if (f_segno >= segno) { + continue; + } + } + + rc = + snprintf_s(pathbuf, EXRTO_FILE_PATH_LEN, EXRTO_FILE_PATH_LEN - 1, "%s/%s", exrto_block_info_dir, *filename); + securec_check_ss(rc, "", ""); + /* may be can be some error */ + if (lstat(pathbuf, &statbuf) != 0) { + if (errno != ENOENT) { + ereport(WARNING, (errmsg("could not stat file or directory \"%s\" \n", pathbuf))); + } + continue; + } + /* if the file is a directory, don't touch it */ + if (S_ISDIR(statbuf.st_mode)) { + /* skip dir */ + continue; + } + /* delete this file we found */ + if (unlink(pathbuf) != 0) { + if (errno != ENOENT) { + ereport(WARNING, (errmsg("could not remove file or directory \"%s\" ", pathbuf))); + } + } } - return true; + pgfnames_cleanup(filenames); + return; } -void exrto_unlink_file(const RelFileNodeBackend &rnode, ForkNumber forknum, BlockNumber blocknum) +void exrto_unlink(const RelFileNodeBackend &rnode, ForkNumber forknum, bool is_redo, BlockNumber blocknum) { - uint64 segno; + char target_prefix[EXRTO_FILE_PATH_LEN] = {0}; ExRTOFileType type = exrto_file_type(rnode.node.spcNode); + uint64 segno; + errno_t rc; + if (type == BLOCK_INFO_META) { /* unlink all files */ - extreme_rto_standby_read::remove_block_meta_info_files_of_db(rnode.node.dbNode, rnode.node.relNode); + rc = sprintf_s(target_prefix, EXRTO_FILE_PATH_LEN, "%u_%u_", rnode.node.dbNode, rnode.node.relNode); + securec_check_ss(rc, "", ""); + exrto_unlink_file_with_prefix(target_prefix, type); } else if (type == BASE_PAGE || type == LSN_INFO_META) { /* just unlink the files before the file where blocknum is */ segno = get_seg_num(rnode, blocknum); - while (segno != 0) { - segno -= 1; - if (!exrto_unlink_single_file(rnode, forknum, segno)) { - return; - } + if (segno > 0) { + uint32 batch_id = rnode.node.dbNode >> LOW_WORKERID_BITS; + uint32 worker_id = rnode.node.dbNode & LOW_WORKERID_MASK; + rc = sprintf_s(target_prefix, EXRTO_FILE_PATH_LEN, "%02X%02X", batch_id, worker_id); + securec_check_ss(rc, "", ""); + exrto_unlink_file_with_prefix(target_prefix, type, segno); } } } -void exrto_unlink(const RelFileNodeBackend &rnode, ForkNumber forknum, bool is_redo, BlockNumber blocknum) -{ - ExRTOFileType type = exrto_file_type(rnode.node.spcNode); - if (type == BASE_PAGE || type == LSN_INFO_META) { - forknum = MAIN_FORKNUM; - } - if (forknum == InvalidForkNumber) { - for (int fork_num = 0; fork_num < EXRTO_FORK_NUM; fork_num++) { - exrto_unlink_file(rnode, (ForkNumber)fork_num, blocknum); - } - } else { - exrto_unlink_file(rnode, forknum, blocknum); - } -} - /* extend EXTEND_BLOCKS_NUM pages */ void exrto_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skip_fsync) { @@ -336,8 +362,11 @@ void exrto_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, c state = exrto_open_file(reln, forknum, blocknum, EXTENSION_CREATE); filename = FilePathName(state->file[forknum]); if (stat(filename, &file_stat) < 0) { + char filepath[EXRTO_FILE_PATH_LEN]; + errno_t rc = strcpy_s(filepath, EXRTO_FILE_PATH_LEN, filename); + securec_check(rc, "\0", "\0"); exrto_close(reln, forknum, InvalidBlockNumber); - ereport(ERROR, (errmsg("could not stat file \"%s\": %m.", filename))); + ereport(ERROR, (errmsg("could not stat file \"%s\": %m.", filepath))); } Assert(file_stat.st_size % BLCKSZ == 0); Assert(file_stat.st_size <= EXRTO_FILE_SIZE[type]); @@ -402,14 +431,15 @@ SMGR_READ_STATUS exrto_read(SMgrRelation reln, ForkNumber forknum, BlockNumber b } if (nbytes != BLCKSZ) { char *filename = FilePathName(state->file[forknum]); + char filepath[EXRTO_FILE_PATH_LEN]; + rc = strcpy_s(filepath, EXRTO_FILE_PATH_LEN, filename); + securec_check(rc, "\0", "\0"); exrto_close(reln, forknum, InvalidBlockNumber); if (nbytes < 0) { - ereport(ERROR, - (errmsg("could not read block %u in file \"%s\": %m.", blocknum, filename))); + ereport(ERROR, (errmsg("could not read block %u in file \"%s\": %m.", blocknum, filepath))); } - ereport(ERROR, - (errmsg("could not read block %u in file \"%s\": read only %d of %d bytes.", blocknum, filename, - nbytes, BLCKSZ))); + ereport(ERROR, (errmsg("could not read block %u in file \"%s\": read only %d of %d bytes.", blocknum, filepath, + nbytes, BLCKSZ))); } if (PageIsVerified((Page)buffer, blocknum)) { @@ -441,14 +471,15 @@ void exrto_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co nbytes = FilePWrite(state->file[forknum], buffer, BLCKSZ, seekpos); if (nbytes != BLCKSZ) { char *filename = FilePathName(state->file[forknum]); + char filepath[EXRTO_FILE_PATH_LEN]; + errno_t rc = strcpy_s(filepath, EXRTO_FILE_PATH_LEN, filename); + securec_check(rc, "\0", "\0"); exrto_close(reln, forknum, InvalidBlockNumber); if (nbytes < 0) { - ereport(ERROR, - (errmsg("could not write block %u in file \"%s\": %m.", blocknum, filename))); + ereport(ERROR, (errmsg("could not write block %u in file \"%s\": %m.", blocknum, filepath))); } - ereport(ERROR, - (errmsg("could not write block %u in file \"%s\": wrote only %d of %d bytes.", - blocknum, filename, nbytes, BLCKSZ))); + ereport(ERROR, (errmsg("could not write block %u in file \"%s\": wrote only %d of %d bytes.", blocknum, + filepath, nbytes, BLCKSZ))); } } diff --git a/src/include/access/extreme_rto/dispatcher.h b/src/include/access/extreme_rto/dispatcher.h index ed5e610581..d6b8c8ea66 100644 --- a/src/include/access/extreme_rto/dispatcher.h +++ b/src/include/access/extreme_rto/dispatcher.h @@ -165,7 +165,6 @@ typedef struct { volatile bool recoveryStop; volatile XLogRedoNumStatics xlogStatics[RM_NEXT_ID][MAX_XLOG_INFO_NUM]; RedoTimeCost *startupTimeCost; - ExrtoSnapshotData exrto_snapshot; } LogDispatcher; typedef struct { @@ -183,6 +182,7 @@ extern THR_LOCAL RecordBufferState *g_recordbuffer; const static uint64 OUTPUT_WAIT_COUNT = 0x7FFFFFF; const static uint64 PRINT_ALL_WAIT_COUNT = 0x7FFFFFFFF; +const static uint64 STOP_WORKERS_WAIT_COUNT = 0x13FFFFFFFF; extern RedoItem g_redoEndMark; extern RedoItem g_terminateMark; extern uint32 g_readManagerTriggerFlag; diff --git a/src/include/access/extreme_rto/standby_read/block_info_meta.h b/src/include/access/extreme_rto/standby_read/block_info_meta.h index b1d9eb18a3..d4cd7ed73a 100644 --- a/src/include/access/extreme_rto/standby_read/block_info_meta.h +++ b/src/include/access/extreme_rto/standby_read/block_info_meta.h @@ -77,8 +77,8 @@ typedef enum { STANDBY_READ_RECLYE_ALL, } StandbyReadRecyleState; -BlockMetaInfo* get_block_meta_info_by_relfilenode( - const BufferTag& buf_tag, BufferAccessStrategy strategy, ReadBufferMode mode, Buffer* buffer); +BlockMetaInfo *get_block_meta_info_by_relfilenode(const BufferTag &buf_tag, BufferAccessStrategy strategy, + ReadBufferMode mode, Buffer *buffer, bool need_share_lock = false); void insert_lsn_to_block_info( StandbyReadMetaInfo* mete_info, const BufferTag& buf_tag, const Page base_page, XLogRecPtr next_lsn); StandbyReadRecyleState recyle_block_info( @@ -98,7 +98,7 @@ static inline bool is_block_meta_info_valid(BlockMetaInfo* meta_info) void remove_one_block_info_file(const RelFileNode rnode); -void remove_block_meta_info_files_of_db(Oid db_oid, Oid rel_oid = InvalidOid); +void remove_block_meta_info_files_of_db(Oid db_oid); } // namespace extreme_rto_standby_read diff --git a/src/include/access/extreme_rto/standby_read/standby_read_base.h b/src/include/access/extreme_rto/standby_read/standby_read_base.h index 714b475068..d61690a932 100644 --- a/src/include/access/extreme_rto/standby_read/standby_read_base.h +++ b/src/include/access/extreme_rto/standby_read/standby_read_base.h @@ -29,6 +29,7 @@ #include "gs_thread.h" #include "postgres.h" #include "storage/buf/bufpage.h" +#include "storage/smgr/smgr.h" #include "postmaster/alarmchecker.h" #define EXRTO_FILE_DIR "standby_read" @@ -67,6 +68,21 @@ typedef struct _StandbyReadMetaInfo { XLogRecPtr recycle_lsn_per_worker; } StandbyReadMetaInfo; +typedef struct WalFilter { + TransactionId by_xid; + bool by_xid_enabled; + bool by_tablepath_enabled; + bool by_block; + bool by_lsn; + RelFileNode by_relfilenode; + BlockNumber block_num; +} WalFilter; + +typedef struct WalPrivate { + const char *data_dir; + TimeLineID tli; +} WalPrivate; + inline void standby_read_meta_page_set_lsn(Page page, XLogRecPtr LSN) { if (XLByteLT(LSN, PageGetLSN(page))) { @@ -75,7 +91,37 @@ inline void standby_read_meta_page_set_lsn(Page page, XLogRecPtr LSN) PageSetLSNInternal(page, LSN); } +inline ExRTOFileType exrto_file_type(uint32 space_oid) +{ + if (space_oid == EXRTO_BASE_PAGE_SPACE_OID) { + return BASE_PAGE; + } else if (space_oid == EXRTO_LSN_INFO_SPACE_OID) { + return LSN_INFO_META; + } else { + return BLOCK_INFO_META; + } +} + +inline uint64 get_total_block_num(ExRTOFileType type, uint32 high, uint32 low) +{ + if (type == BASE_PAGE || type == LSN_INFO_META) { + return ((uint64)high << UINT64_HALF) | low; + } else { + return (uint64)low; + } +} + void exrto_clean_dir(void); void exrto_recycle_old_dir(void); void exrto_standby_read_init(); +void buffer_drop_exrto_standby_read_buffers(StandbyReadMetaInfo *meta_info = NULL); +void exrto_unlink_file_with_prefix(char *target_prefix, ExRTOFileType type, uint64 segno = 0); +extern void XLogDumpDisplayRecord(XLogReaderState *record, char *strOutput); +extern XLogRecPtr UpdateNextLSN(XLogRecPtr cur_lsn, XLogRecPtr end_lsn, XLogReaderState *xlogreader_state, bool *found); +namespace extreme_rto_standby_read { +void dump_error_all_info(const RelFileNode &rnode, ForkNumber forknum, BlockNumber blocknum); +} +#ifdef ENABLE_UT +extern Page get_page_from_buffer(Buffer buf); +#endif #endif \ No newline at end of file diff --git a/src/include/access/extreme_rto/standby_read/standby_read_delay_ddl.h b/src/include/access/extreme_rto/standby_read/standby_read_delay_ddl.h new file mode 100644 index 0000000000..ed372174c0 --- /dev/null +++ b/src/include/access/extreme_rto/standby_read/standby_read_delay_ddl.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * standby_read_delay_ddl.h + * + * + * + * IDENTIFICATION + * src/include/access/extreme_rto/standby_read/standby_read_delay_ddl.h + * + * --------------------------------------------------------------------------------------- + */ + +#ifndef STANDBY_READ_DELAY_DDL_H +#define STANDBY_READ_DELAY_DDL_H + +#include "gs_thread.h" +#include "postgres.h" +#include "access/xlogdefs.h" +#include "storage/smgr/relfilenode.h" + +void do_all_old_delay_ddl(); +void delete_by_lsn(XLogRecPtr lsn); +void init_delay_ddl_file(); +void update_delay_ddl_db(Oid db_id, Oid tablespace_id, XLogRecPtr lsn); +void update_delay_ddl_files(ColFileNode* xnodes, int nrels, XLogRecPtr lsn); +void delete_by_table_space(Oid tablespace_id); +#endif \ No newline at end of file diff --git a/src/include/access/multi_redo_api.h b/src/include/access/multi_redo_api.h index 3dbc82dca9..0c1bd1f770 100644 --- a/src/include/access/multi_redo_api.h +++ b/src/include/access/multi_redo_api.h @@ -66,8 +66,15 @@ static const uint32 PAGE_REDO_WORKER_READY = 2; static const uint32 PAGE_REDO_WORKER_EXIT = 3; static const uint32 BIG_RECORD_LENGTH = XLOG_BLCKSZ * 16; -#define IS_EXRTO_READ (g_instance.attr.attr_storage.EnableHotStandby && IsExtremeRedo() && IsDefaultExtremeRtoMode()) +#define IS_EXRTO_READ (IsExtremeRedo() && g_instance.attr.attr_storage.EnableHotStandby && IsDefaultExtremeRtoMode()) #define IS_EXRTO_STANDBY_READ (IS_EXRTO_READ && pm_state_is_hot_standby()) +#define IS_EXRTO_RECOVERY_IN_PROGRESS (RecoveryInProgress() && IsExtremeRedo()) + +inline bool is_exrto_standby_read_worker() +{ + return (t_thrd.role == WORKER || t_thrd.role == THREADPOOL_WORKER || t_thrd.role == THREADPOOL_STREAM || + t_thrd.role == STREAM_WORKER); +} static inline int get_real_recovery_parallelism() { diff --git a/src/include/access/rmgrlist.h b/src/include/access/rmgrlist.h index e2e66ffb31..01ae82d157 100644 --- a/src/include/access/rmgrlist.h +++ b/src/include/access/rmgrlist.h @@ -43,7 +43,7 @@ PG_RMGR(RM_MULTIXACT_ID, "MultiXact", multixact_redo, multixact_desc, NULL, NULL PG_RMGR(RM_RELMAP_ID, "RelMap", relmap_redo, relmap_desc, NULL, NULL, NULL, NULL, NULL, relmap_type_name) PG_RMGR(RM_STANDBY_ID, "Standby", standby_redo, standby_desc, StandbyXlogStartup, StandbyXlogCleanup, \ - StandbySafeRestartpoint, NULL, NULL, standby_type_name) + NULL, NULL, NULL, standby_type_name) PG_RMGR(RM_HEAP2_ID, "Heap2", heap2_redo, heap2_desc, NULL, NULL, NULL, NULL, NULL, heap2_type_name) PG_RMGR(RM_HEAP_ID, "Heap", heap_redo, heap_desc, NULL, NULL, NULL, NULL, NULL, heap_type_name) diff --git a/src/include/access/ustore/undo/knl_uundozone.h b/src/include/access/ustore/undo/knl_uundozone.h index 5d87696ae1..be6caf8fe0 100644 --- a/src/include/access/ustore/undo/knl_uundozone.h +++ b/src/include/access/ustore/undo/knl_uundozone.h @@ -309,7 +309,7 @@ public: } bool CheckNeedSwitch(UndoRecordSize size); UndoRecordState CheckUndoRecordValid(UndoLogOffset offset, bool checkForceRecycle, TransactionId *lastXid); - bool CheckRecycle(UndoRecPtr starturp, UndoRecPtr endurp); + bool CheckRecycle(UndoRecPtr starturp, UndoRecPtr endurp, bool isexrto = false); UndoRecPtr AllocateSpace(uint64 size); void ReleaseSpace(UndoRecPtr starturp, UndoRecPtr endurp, int *forceRecycleSize); @@ -378,5 +378,7 @@ void AllocateZonesBeforXid(); void InitZone(UndoZone *uzone, const int zoneId, UndoPersistence upersistence); void InitUndoSpace(UndoZone *uzone, UndoSpaceType type); bool VerifyUndoZone(UndoZone *uzone); +void exrto_recycle_residual_undo_file(char *FuncName); + } // namespace undo #endif // __KNL_UUNDOZONE_H__ diff --git a/src/include/access/xact.h b/src/include/access/xact.h index 7e502fe2ab..09ad0c69a2 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -521,7 +521,9 @@ extern void parseAndRemoveLibrary(char* library, int nlibrary); extern bool IsInLiveSubtransaction(); extern void ExtendCsnlogForSubtrans(TransactionId parent_xid, int nsub_xid, TransactionId* sub_xids); extern CommitSeqNo SetXact2CommitInProgress(TransactionId xid, CommitSeqNo csn); -extern void XactGetRelFiles(XLogReaderState* record, ColFileNode** xnodesPtr, int* nrelsPtr, bool* compress); +extern void XactGetRelFiles(XLogReaderState* record, ColFileNode** xnodesPtr, int* nrelsPtr); +extern bool xact_has_invalid_msg_or_delete_file(XLogReaderState *record); +extern void send_delay_invalid_message(); extern bool XactWillRemoveRelFiles(XLogReaderState *record); extern HTAB* relfilenode_hashtbl_create(); extern CommitSeqNo getLocalNextCSN(); @@ -563,4 +565,6 @@ extern void BeginTxnForAutoCommitOff(); extern void SetTxnInfoForSSLibpqsw(TransactionId xid, CommandId cid); extern void ClearTxnInfoForSSLibpqsw(); extern bool IsTransactionInProgressState(); +extern void unlink_relfiles(_in_ ColFileNode *xnodes, _in_ int nrels); +void xact_redo_log_drop_segs(_in_ ColFileNode *xnodes, _in_ int nrels, XLogRecPtr lsn); #endif /* XACT_H */ diff --git a/src/include/access/xlogproc.h b/src/include/access/xlogproc.h index ed9d926e7f..bed5a495fa 100755 --- a/src/include/access/xlogproc.h +++ b/src/include/access/xlogproc.h @@ -950,7 +950,7 @@ static inline Buffer AtomicExchangeBuffer(volatile Buffer *ptr, Buffer newval) /* this is an estimated value */ static const uint32 MAX_BUFFER_NUM_PER_WAL_RECORD = XLR_MAX_BLOCK_ID + 1; -static const uint32 LSN_MOVE32 = 10; +static const uint32 LSN_MOVE32 = 32; void HeapXlogCleanOperatorPage( RedoBufferInfo* buffer, void* recorddata, void* blkdata, Size datalen, Size* freespace, bool repairFragmentation); @@ -1277,7 +1277,7 @@ extern void XLogBlockSegDdlDoRealAction(XLogBlockHead* blockhead, void* blockrec extern void GinRedoDataBlock(XLogBlockHead* blockhead, XLogBlockDataParse* blockdatarec, RedoBufferInfo* bufferinfo); extern void GistRedoDataBlock(XLogBlockHead *blockhead, XLogBlockDataParse *blockdatarec, RedoBufferInfo *bufferinfo); extern bool IsCheckPoint(const XLogRecParseState *parseState); - +bool is_backup_end(const XLogRecParseState *parse_state); void redo_atomic_xlog_dispatch(uint8 opCode, RedoBufferInfo *redo_buf, const char *data); void seg_redo_new_page_copy_and_flush(BufferTag *tag, char *data, XLogRecPtr lsn); void redo_target_page(const BufferTag& buf_tag, StandbyReadLsnInfoArray* lsn_info, Buffer base_page_buf); diff --git a/src/include/commands/dbcommands.h b/src/include/commands/dbcommands.h index 1a3fcf0212..a394e64e0a 100644 --- a/src/include/commands/dbcommands.h +++ b/src/include/commands/dbcommands.h @@ -76,5 +76,6 @@ extern bool IsSetTableSpace(AlterDatabaseStmt* stmt); extern int errdetail_busy_db(int notherbackends, int npreparedxacts); extern void PreCleanAndCheckConns(const char* dbname, bool missing_ok); #endif +extern void do_db_drop(Oid dbId, Oid tbSpcId); #endif /* DBCOMMANDS_H */ diff --git a/src/include/knl/knl_guc/knl_instance_attr_storage.h b/src/include/knl/knl_guc/knl_instance_attr_storage.h index b9c572b816..89ff5a3af3 100755 --- a/src/include/knl/knl_guc/knl_instance_attr_storage.h +++ b/src/include/knl/knl_guc/knl_instance_attr_storage.h @@ -213,7 +213,7 @@ typedef struct knl_instance_attr_storage { int64 max_standby_base_page_size; int64 max_standby_lsn_info_size; int base_page_saved_interval; - double standby_force_recyle_ratio; + double standby_force_recycle_ratio; int standby_recycle_interval; int standby_max_query_time; #ifndef ENABLE_MULTIPLE_NODES diff --git a/src/include/knl/knl_instance.h b/src/include/knl/knl_instance.h index 592d4e0339..41fdcd7f1c 100755 --- a/src/include/knl/knl_instance.h +++ b/src/include/knl/knl_instance.h @@ -113,8 +113,6 @@ enum knl_parallel_redo_state { REDO_DONE, }; -typedef struct ExrtoSnapshotData* ExrtoSnapshot; - /* all process level attribute which expose to user */ typedef struct knl_instance_attr { @@ -728,6 +726,15 @@ typedef struct knl_g_parallel_decode_context { ErrorData *edata; } knl_g_parallel_decode_context; +typedef struct _ExrtoSnapshotData* ExrtoSnapshot; + +typedef struct _StandbyReadDelayDdlState { + uint64 next_index_need_unlink; + uint64 next_index_can_insert; + uint32 delete_stat; + uint32 insert_stat; +} StandbyReadDelayDdlState; + typedef struct knl_g_parallel_redo_context { RedoType redoType; volatile knl_parallel_redo_state state; @@ -751,6 +758,9 @@ typedef struct knl_g_parallel_redo_context { RedoCpuBindControl redoCpuBindcontrl; XLogRecPtr global_recycle_lsn; /* extreme-rto standby read */ HTAB **redoItemHash; /* used in ondemand extreme RTO */ + ExrtoSnapshot exrto_snapshot; + StandbyReadDelayDdlState standby_read_delay_ddl_stat; + uint64 max_clog_pageno; } knl_g_parallel_redo_context; typedef struct knl_g_heartbeat_context { @@ -1404,4 +1414,3 @@ extern void add_numa_alloc_info(void* numaAddr, size_t length); #define DEFAULT_CREATE_GLOBAL_INDEX (u_sess->attr.attr_storage.default_index_kind == DEFAULT_INDEX_KIND_GLOBAL) #endif /* SRC_INCLUDE_KNL_KNL_INSTANCE_H_ */ - diff --git a/src/include/knl/knl_session.h b/src/include/knl/knl_session.h index aaadd977a2..c0292d6d2c 100644 --- a/src/include/knl/knl_session.h +++ b/src/include/knl/knl_session.h @@ -685,9 +685,6 @@ typedef struct knl_u_utils_context { HTAB* set_user_params_htab; DestReceiver* spi_printtupDR; - - /* backend read lsn for read on standby in extreme rto */ - XLogRecPtr exrto_read_lsn; } knl_u_utils_context; typedef struct knl_u_security_context { diff --git a/src/include/knl/knl_thread.h b/src/include/knl/knl_thread.h index 5934204940..e35361a50f 100755 --- a/src/include/knl/knl_thread.h +++ b/src/include/knl/knl_thread.h @@ -77,7 +77,6 @@ #include "port/pg_crc32c.h" #include "ddes/dms/ss_common_attr.h" #include "ddes/dms/ss_txnstatus.h" -#include "access/extreme_rto/standby_read/standby_read_base.h" #define MAX_PATH_LEN 1024 extern const int g_reserve_param_num; @@ -1941,19 +1940,31 @@ typedef struct knl_t_conn_context { const char* _float_inf; } knl_t_conn_context; +typedef struct _DelayInvalidMsg { + SharedInvalidationMessage* inval_msgs; + int nmsgs; + uint32 xinfo; + Oid db_id; + Oid ts_id; + XLogRecPtr lsn; + bool relcache_init_file_inval; + bool valid; +} DelayInvalidMsg; + typedef struct { volatile sig_atomic_t shutdown_requested; volatile sig_atomic_t got_SIGHUP; volatile sig_atomic_t sleep_long; volatile sig_atomic_t check_repair; void *redo_worker_ptr; + DelayInvalidMsg invalid_msg; } knl_t_page_redo_context; typedef struct _StandbyReadLsnInfoArray { XLogRecPtr *lsn_array; uint32 lsn_num; XLogRecPtr base_page_lsn; - BasePagePosition base_page_pos; + uint64 base_page_pos; } StandbyReadLsnInfoArray; typedef struct { diff --git a/src/include/storage/buf/bufmgr.h b/src/include/storage/buf/bufmgr.h index 61aa35e549..e3c7b3140a 100644 --- a/src/include/storage/buf/bufmgr.h +++ b/src/include/storage/buf/bufmgr.h @@ -35,6 +35,9 @@ #define IsNvmBufferID(id) ((id) >= NvmBufferStartID && (id) < SegmentBufferStartID) #define IsNormalBufferID(id) ((id) >= 0 && (id) < NvmBufferStartID) +#define ExrtoReadStartLSNBktId (-5) +#define ExrtoReadEndLSNBktId (-6) + #define USE_CKPT_THREAD_SYNC (!g_instance.attr.attr_storage.enableIncrementalCheckpoint || \ IsBootstrapProcessingMode() || \ pg_atomic_read_u32(&g_instance.ckpt_cxt_ctl->current_page_writer_count) < 1) @@ -320,7 +323,6 @@ extern void DropRelFileNodeAllBuffersUsingScan(RelFileNode* rnode, int rnode_len extern void DropRelFileNodeOneForkAllBuffersUsingHash(HTAB *relfilenode_hashtbl); extern void DropDatabaseBuffers(Oid dbid); -extern void buffer_drop_exrto_standby_read_buffers(); extern BlockNumber PartitionGetNumberOfBlocksInFork(Relation relation, Partition partition, ForkNumber forkNum, bool estimate = false); diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h index 8f1c8e2369..9de4afff5c 100644 --- a/src/include/storage/lmgr.h +++ b/src/include/storage/lmgr.h @@ -66,7 +66,8 @@ extern void UnlockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode); extern void XactLockTableInsert(TransactionId xid); extern void XactLockTableDelete(TransactionId xid); extern void XactLockTableWait(TransactionId xid, bool allow_con_update = false, int waitSec = 0); -extern bool ConditionalXactLockTableWait(TransactionId xid, bool waitparent = true, bool bCareNextxid = false); +extern bool ConditionalXactLockTableWait(TransactionId xid, const Snapshot snapshot = NULL, bool waitparent = true, + bool bCareNextxid = false); /* Lock a SubXID */ extern void SubXactLockTableInsert(SubTransactionId subxid); diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 6b25fa8ce3..915f3f791b 100755 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -244,6 +244,9 @@ struct PGPROC { /* commit sequence number send down */ CommitSeqNo commitCSN; + XLogRecPtr exrto_read_lsn; /* calculate recycle lsn for read on standby in extreme rto */ + TimestampTz exrto_gen_snap_time; + /* Support for group transaction status update. */ bool clogGroupMember; /* true, if member of clog group */ pg_atomic_uint32 clogGroupNext; /* next clog group member */ @@ -272,8 +275,9 @@ struct PGPROC { uint64 snap_refcnt_bitmap; #endif - XLogRecPtr exrto_read_lsn; /* calculate recycle lsn for read on standby in extreme rto */ - TimestampTz exrto_gen_snap_time; + XLogRecPtr exrto_min; /* calculate recycle lsn for read on standby in extreme rto */ + + bool exrto_reload_cache; LWLock* subxidsLock; struct XidCache subxids; /* cache for subtransaction XIDs */ diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h index 290d88c504..6f0b4aa9d3 100755 --- a/src/include/storage/procarray.h +++ b/src/include/storage/procarray.h @@ -50,7 +50,7 @@ extern int GetRoleIdCount(Oid roleoid); extern int IncreaseUserCount(Oid roleoid); extern int DecreaseUserCount(Oid roleoid); -extern void SyncLocalXidWait(TransactionId xid); +extern void SyncLocalXidWait(TransactionId xid, const Snapshot snapshot = NULL); extern Size ProcArrayShmemSize(void); extern void CreateSharedProcArray(void); @@ -84,6 +84,7 @@ Snapshot GetSnapshotData(Snapshot snapshot, bool force_local_snapshot, bool forH #else extern Snapshot GetSnapshotData(Snapshot snapshot, bool force_local_snapshot); #endif +void exrto_get_snapshot_data(TransactionId &xmin, TransactionId &xmax, CommitSeqNo &snapshot_csn); extern Snapshot GetLocalSnapshotData(Snapshot snapshot); @@ -178,7 +179,7 @@ extern void InitProcSubXidCacheContext(); extern void ProcArrayResetXmin(PGPROC* proc); extern uint64 GetCommitCsn(); extern void setCommitCsn(uint64 commit_csn); -extern void SyncWaitXidEnd(TransactionId xid, Buffer buffer); +extern void SyncWaitXidEnd(TransactionId xid, Buffer buffer, const Snapshot snapshot = NULL); extern CommitSeqNo calculate_local_csn_min(); extern void proc_cancel_invalid_gtm_lite_conn(); extern void forward_recent_global_xmin(void); diff --git a/src/include/storage/sinval.h b/src/include/storage/sinval.h index 2e5e404eb4..84fbbb4bdd 100644 --- a/src/include/storage/sinval.h +++ b/src/include/storage/sinval.h @@ -132,10 +132,16 @@ typedef union SharedInvalidationMessage { SharedInvalFuncMsg fm; } SharedInvalidationMessage; +typedef struct _SharedInvalidationMessageEx { + SharedInvalidationMessage msg; + XLogRecPtr lsn; +} SharedInvalidationMessageEx; + /* Counter of messages processed; don't worry about overflow. */ extern THR_LOCAL volatile sig_atomic_t catchupInterruptPending; extern void SendSharedInvalidMessages(const SharedInvalidationMessage* msgs, int n); +void send_shared_invalid_messages(const SharedInvalidationMessage* msgs, int n, XLogRecPtr lsn); extern void ReceiveSharedInvalidMessages( void (*invalFunction)(SharedInvalidationMessage* msg), void (*resetFunction)(void), bool worksession); @@ -152,7 +158,7 @@ extern void ProcessCatchupInterrupt(void); extern int xactGetCommittedInvalidationMessages(SharedInvalidationMessage** msgs, bool* RelcacheInitFileInval); extern void ProcessCommittedInvalidationMessages( - SharedInvalidationMessage* msgs, int nmsgs, bool RelcacheInitFileInval, Oid dbid, Oid tsid); + SharedInvalidationMessage* msgs, int nmsgs, bool RelcacheInitFileInval, Oid dbid, Oid tsid, XLogRecPtr lsn); extern void LocalExecuteThreadAndSessionInvalidationMessage(SharedInvalidationMessage* msg); extern void LocalExecuteThreadInvalidationMessage(SharedInvalidationMessage* msg); extern void LocalExecuteSessionInvalidationMessage(SharedInvalidationMessage* msg); diff --git a/src/include/storage/sinvaladt.h b/src/include/storage/sinvaladt.h index f77f5baccd..48ede75f56 100644 --- a/src/include/storage/sinvaladt.h +++ b/src/include/storage/sinvaladt.h @@ -34,7 +34,7 @@ extern void CleanupWorkSessionInvalidation(void); extern void SharedInvalBackendInit(bool sendOnly, bool worksession); extern PGPROC* BackendIdGetProc(int backendID); -extern void SIInsertDataEntries(const SharedInvalidationMessage* data, int n); +extern void SIInsertDataEntries(const SharedInvalidationMessage* data, int n, XLogRecPtr lsn = 0); extern int SIGetDataEntries(SharedInvalidationMessage* data, int datasize, bool worksession); extern void SICleanupQueue(bool callerHasWriteLock, int minFree); diff --git a/src/include/storage/smgr/smgr.h b/src/include/storage/smgr/smgr.h index 9acbb16433..2883690389 100644 --- a/src/include/storage/smgr/smgr.h +++ b/src/include/storage/smgr/smgr.h @@ -128,7 +128,7 @@ enum SMGR_READ_STATUS { #define EXRTO_BASE_PAGE_SPACE_OID (6) #define EXRTO_LSN_INFO_SPACE_OID (7) #define EXRTO_BLOCK_INFO_SPACE_OID (8) -#define EXRTO_FORK_NUM 3 +#define EXRTO_FORK_NUM MAX_FORKNUM + 1 #define MD_MANAGER (0) #define UNDO_MANAGER (1) diff --git a/src/include/utils/be_module.h b/src/include/utils/be_module.h index d7db7a9c4c..a288a1e640 100755 --- a/src/include/utils/be_module.h +++ b/src/include/utils/be_module.h @@ -141,6 +141,8 @@ enum ModuleId { MOD_LOGICAL_DECODE, /* logical decode */ MOD_GPRC, /* global package runtime cache */ MOD_DISASTER_READ, + MOD_STANDBY_READ, + MODE_REPSYNC, /* debug info for func SyncRepWaitForLSN */ MOD_SQLPATCH, MOD_DMS, /* DMS */ diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h index 31b855b091..d49113218f 100644 --- a/src/include/utils/inval.h +++ b/src/include/utils/inval.h @@ -77,7 +77,7 @@ extern void CacheInvalidateRelcacheByRelid(Oid relid); extern void CacheInvalidateSmgr(RelFileNodeBackend rnode); -extern void CacheInvalidateRelmap(Oid databaseId); +extern void CacheInvalidateRelmap(Oid databaseId, XLogRecPtr lsn); extern void CacheInvalidateHeapTupleInplace(Relation relation, HeapTuple tuple); @@ -100,5 +100,6 @@ extern void CallSessionSyscacheCallbacks(int cacheid, uint32 hashvalue); extern void InvalidateSessionSystemCaches(void); extern void InvalidateThreadSystemCaches(void); extern void CacheInvalidateRelcacheAll(void); +extern void reset_invalidation_cache(); #endif /* INVAL_H */ diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h index ce1baa4b9f..2f6dec245c 100644 --- a/src/include/utils/snapmgr.h +++ b/src/include/utils/snapmgr.h @@ -71,6 +71,7 @@ extern void RecheckXidFinish(TransactionId xid, CommitSeqNo csn); extern Snapshot GetTransactionSnapshot(bool force_local_snapshot = false); extern Snapshot GetLatestSnapshot(void); extern Snapshot GetCatalogSnapshot(); +extern Snapshot get_toast_snapshot(); extern void SnapshotSetCommandId(CommandId curcid); extern void PushActiveSnapshot(Snapshot snapshot); @@ -118,4 +119,6 @@ extern void TeardownHistoricSnapshot(bool is_error); extern bool HistoricSnapshotActive(void); extern void SetTransactionSnapshot(Snapshot sourcesnap, VirtualTransactionId *sourcevxid, ThreadId sourcepid); +extern Snapshot get_toast_snapshot(); + #endif /* SNAPMGR_H */ diff --git a/src/include/utils/snapshot.h b/src/include/utils/snapshot.h index 1c38951e6b..1ac8a293d9 100644 --- a/src/include/utils/snapshot.h +++ b/src/include/utils/snapshot.h @@ -211,7 +211,7 @@ typedef struct SnapshotData { */ TransactionId xmin; /* all XID < xmin are visible to me */ TransactionId xmax; /* all XID >= xmax are invisible to me */ - + XLogRecPtr read_lsn; /* xact lsn when generate snapshot */ /* subxid is in progress and it's the last one modify tuple */ SubTransactionId subxid; @@ -265,9 +265,7 @@ typedef struct SnapshotData { GTM_SnapshotType gtm_snapshot_type; } SnapshotData; -typedef struct ExrtoSnapshotData* ExrtoSnapshot; - -typedef struct ExrtoSnapshotData { +typedef struct _ExrtoSnapshotData { /* * The remaining fields are used only for MVCC snapshots, and are normally * just zeroes in special snapshots. (But xmin and xmax are used @@ -290,6 +288,7 @@ typedef struct ExrtoSnapshotData { XLogRecPtr read_lsn; /* xact lsn when generate snapshot */ TimestampTz gen_snap_time; } ExrtoSnapshotData; +typedef struct _ExrtoSnapshotData *ExrtoSnapshot; /* * Result codes for AM API tuple_{update,delete,lock}, and for visibility. diff --git a/src/test/ha/GNUmakefile b/src/test/ha/GNUmakefile index ee03e462f4..69e1ad0a31 100644 --- a/src/test/ha/GNUmakefile +++ b/src/test/ha/GNUmakefile @@ -43,6 +43,9 @@ hacheck_multi_single_mot: all hacheck_multi_single_shared_storage: all export prefix=$(prefix) && sh $(CURDIR)/run_ha_multi_single_shared_storage.sh 2 $(PART) +hacheck_single_standby_read: all + export prefix=$(prefix) && sh $(CURDIR)/run_ha_single_standby_read.sh + ## ## Clean up ## diff --git a/src/test/ha/ha_exrto_standby_read b/src/test/ha/ha_exrto_standby_read new file mode 100644 index 0000000000..9aa5316dde --- /dev/null +++ b/src/test/ha/ha_exrto_standby_read @@ -0,0 +1,2 @@ +exrtostandbyread/start_exrto_standby_read +exrtostandbyread/start_exrto_standby_read_multi_data \ No newline at end of file diff --git a/src/test/ha/ha_schedule_single_standby_read b/src/test/ha/ha_schedule_single_standby_read new file mode 100644 index 0000000000..d971647cff --- /dev/null +++ b/src/test/ha/ha_schedule_single_standby_read @@ -0,0 +1 @@ +exrtostandbyread/single_standby_read_base \ No newline at end of file diff --git a/src/test/ha/results/exrtostandbyread/.gitignore b/src/test/ha/results/exrtostandbyread/.gitignore new file mode 100644 index 0000000000..5e7d2734cf --- /dev/null +++ b/src/test/ha/results/exrtostandbyread/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore diff --git a/src/test/ha/run_ha_exrto_standby_read.sh b/src/test/ha/run_ha_exrto_standby_read.sh new file mode 100644 index 0000000000..504afa1ce1 --- /dev/null +++ b/src/test/ha/run_ha_exrto_standby_read.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# Copyright (c) Huawei Technologies Co., Ltd. 2010-2018. All rights reserved. +# run all the test case of ha + +#init some variables +loop_num=$1 +if [ -z $1 ]; then + loop_num=1 +fi +count=0 + +source ./standby_env.sh + +total_starttime=`date +"%Y-%m-%d %H:%M:%S"` +total_startvalue=`date -d "$total_starttime" +%s` + +array=("exrtostandbyread") +for element in ${array[@]} +do + mkdir -vp ./results/$element +done + +#init and start the database +if [ "$#" != '2' ]; then + printf "init and start the database\n" + sh deploy_multi_single.sh > ./results/deploy_standby_multi_single.log 2>&1 +fi + +for((i=1;i<=$loop_num;i++)) +do + printf "run the ha_exrto_standby_read %d time\n" $i + printf "%-50s%-10s%-10s\n" "testcase" "result" "time(s)" + for line in `cat ha_exrto_standby_read | grep -v ^#` + do + printf "%-50s" $line + starttime=`date +"%Y-%m-%d %H:%M:%S"` + sh ./testcase/$line.sh > ./results/$line.log 2>&1 + count=`expr $count + 1` + endtime=`date +"%Y-%m-%d %H:%M:%S"` + starttime1=`date -d "$starttime" +%s` + endtime1=`date -d "$endtime" +%s` + interval=`expr $endtime1 - $starttime1` + if [ $( grep "$failed_keyword" ./results/$line.log | grep -v "the database system is shutting down" | wc -l ) -eq 0 ]; then + printf "%-10s%-10s\n" ".... ok" $interval + else + printf "%-10s%-10s\n" ".... FAILED" $interval + # exit 0 + fi + done +done + +#stop the database +printf "stop the database\n" +python $scripts_dir/pgxc_multi.py -o > ./results/stop_database_multi.log 2>&1 + +total_endtime=`date +"%Y-%m-%d %H:%M:%S"` +total_endvalue=`date -d "$total_endtime" +%s` +printf "all %d tests passed.\n" $count +printf "total time: %ss\n" $(($total_endvalue - $total_startvalue)) diff --git a/src/test/ha/run_ha_single_standby_read.sh b/src/test/ha/run_ha_single_standby_read.sh new file mode 100644 index 0000000000..8c9035d4fa --- /dev/null +++ b/src/test/ha/run_ha_single_standby_read.sh @@ -0,0 +1,50 @@ +#!/bin/sh +# run the test case of standby-read + +#init some variables +count=0 +loop_num=$1 +if [ -z $1 ]; then + loop_num=1 +fi + +source ./standby_env.sh + +total_starttime=`date +"%Y-%m-%d %H:%M:%S"` +total_startvalue=`date -d "$total_starttime" +%s` + +#init and start the database +printf "init and start the database\n" +sh deploy_multi_single.sh > ./results/deploy_standby_multi_single.log 2>&1 + +for((i=1;i<=$loop_num;i++)) +do + printf "run the ha_schedule %d time\n" $i + printf "%-50s%-10s%-10s\n" "testcase" "result" "time(s)" + for line in `cat ha_schedule_single_standby_read$2 | grep -v ^#` + do + printf "%-50s" $line + starttime=`date +"%Y-%m-%d %H:%M:%S"` + starttime1=`date -d "$starttime" +%s` + sh ./testcase/$line.sh > ./results/$line.log 2>&1 + endtime=`date +"%Y-%m-%d %H:%M:%S"` + endtime1=`date -d "$endtime" +%s` + interval=`expr $endtime1 - $starttime1` + if [ $( grep "$failed_keyword" ./results/$line.log | grep -v "the database system is shutting down" | wc -l ) -eq 0 ]; then + printf "%-10s%-10s\n" ".... ok" $interval + count=`expr $count + 1` + else + printf "%-10s%-10s\n" ".... FAILED" $interval + echo "hacheck mistakes info in " ./results/$line.log + fi + done +done + +#stop the database +printf "stop the database\n" +python $scripts_dir/pgxc_psd_single.py -o > ./results/stop_database_single.log 2>&1 + +total_endtime=`date +"%Y-%m-%d %H:%M:%S"` +total_endvalue=`date -d "$total_endtime" +%s` +printf "all %d tests passed.\n" $count +printf "total time: %ss\n" $(($total_endvalue - $total_startvalue)) diff --git a/src/test/ha/standby_env.sh b/src/test/ha/standby_env.sh index c9e37b85eb..05743bfe76 100644 --- a/src/test/ha/standby_env.sh +++ b/src/test/ha/standby_env.sh @@ -1,7 +1,7 @@ #!/bin/sh #some enviroment vars -export g_base_port=8888 +export g_base_port=25632 export prefix=${GAUSSHOME} export g_pooler_base_port=`expr $g_base_port \+ 410` export g_base_standby_port=`expr $g_base_port \+ 400` diff --git a/src/test/ha/testcase/exrtostandbyread/single_standby_read_base.sh b/src/test/ha/testcase/exrtostandbyread/single_standby_read_base.sh new file mode 100644 index 0000000000..56fcd6bdec --- /dev/null +++ b/src/test/ha/testcase/exrtostandbyread/single_standby_read_base.sh @@ -0,0 +1,144 @@ +#!/bin/sh + +source ./util.sh + +function check_select_result() +{ + if [ $(echo $result | grep "${1}" | wc -l) -eq 1 ]; then + echo "remote read successful" + else + echo "remote read failed $failed_keyword with [$result]" + exit 1 + fi +} + +function test_base_sql_func() +{ + gsql -d test_standby_read_base -p $dn1_primary_port -c "DROP TABLE if exists test1; CREATE TABLE test1(contentId VARCHAR(128) NOT NULL, commentId VARCHAR(128) NOT NULL, appId VARCHAR(128) NOT NULL, PRIMARY KEY (contentId, commentId)) with(parallel_workers=8,storage_type=aSTORE);" + gsql -d test_standby_read_base -p $dn1_primary_port -c "DROP TABLE if exists test2; CREATE TABLE test2(contentId VARCHAR(128) NOT NULL, commentId VARCHAR(128) NOT NULL, appId VARCHAR(128) NOT NULL, PRIMARY KEY (contentId, commentId)) with(storage_type=aSTORE,fillfactor=80) partition by hash(contentId);" + gsql -d test_standby_read_base -p $dn1_primary_port -c "DROP TABLE if exists test3; CREATE TABLE test3(contentId VARCHAR(128) NOT NULL, commentId VARCHAR(128) NOT NULL, appId VARCHAR(128) NOT NULL, PRIMARY KEY (contentId, commentId)) with(storage_type=uSTORE,fillfactor=40) partition by list(contentId) (partition p1 values ('1') ,partition p2 values ('2') ,partition p3 values ('3') ,partition p4 values (default));" + + gsql -d test_standby_read_base -p $dn1_primary_port -c "insert into test1 select generate_series(1,20), generate_series(1,20), generate_series(1,20);" + gsql -d test_standby_read_base -p $dn1_primary_port -c "insert into test2 select generate_series(1,300), generate_series(1,300), generate_series(1,300);" + gsql -d test_standby_read_base -p $dn1_primary_port -c "insert into test3 select generate_series(1,20), generate_series(1,20), generate_series(1,20);" + + gsql -d test_standby_read_base -p $dn1_primary_port -c "delete test1 where contentId = 8;" + gsql -d test_standby_read_base -p $dn1_primary_port -c "delete test2 where contentId = 8;" + gsql -d test_standby_read_base -p $dn1_primary_port -c "delete test3 where contentId = 8;" + + gsql -d test_standby_read_base -p $dn1_primary_port -c "update test1 set appId = 2 where contentId = 1;" + gsql -d test_standby_read_base -p $dn1_primary_port -c "update test2 set appId = 2 where contentId = 1;" + gsql -d test_standby_read_base -p $dn1_primary_port -c "update test2 set appId = 2 where contentId = 1;" + + sleep 5 + + echo "execute light-proxy plan" + result=`gsql -d test_standby_read_base -p $dn1_standby_port -c "select * from test1 where appId = 10;"` + check_select_result "10 | 10 | 10 (1 row)" + + echo "execute remote-query plan" + result=`gsql -d test_standby_read_base -p $dn1_standby_port -c "select count(*) from test1, test2 where test1.appId = test2.appId;"` + check_select_result "21 (1 row)" + + echo "execute pbe fqs plan" + result=`gsql -d test_standby_read_base -p $dn1_standby_port -c 'prepare a as select * from test3 where appId = $1;execute a (10);'` + check_select_result "10 | 10 | 10 (1 row)" + + + echo "execute pbe remote-query plan" + result=`gsql -d test_standby_read_base -p $dn1_standby_port -c 'prepare b as select count(*) from test3, test1 where test3.appId = $1;execute b (10);'` + check_select_result "19 (1 row)" + + echo "execute cursor" + result=`gsql -d test_standby_read_base -p $dn1_standby_port -c "START TRANSACTION;CURSOR cursor1 FOR SELECT * FROM test1 order by 1;FETCH 1 IN cursor1;CLOSE cursor1;END;"` + check_select_result "1 | 1 | 2 (1 row) CLOSE CURSOR COMMIT" + + echo "execute transaction" + gsql -d test_standby_read_base -p $dn1_primary_port -c 'begin; update test2 set test2.appId = test2.contentId + 10 where test2.contentId = 10; select pg_sleep(2); commit;' & + result=`gsql -d test_standby_read_base -p $dn1_standby_port -c "select appId from test2 where test2.contentId = 10;"` + check_select_result "10 (1 row)" + sleep 5 + result=`gsql -d test_standby_read_base -p $dn1_standby_port -c "select appId from test2 where test2.contentId = 10;"` + check_select_result "20 (1 row)" + gsql -d test_standby_read_base -p $dn1_primary_port -c 'update test2 set test2.appId = 10 where test2.contentId = 10; select pg_sleep(3);' +} + +function test_standby_read_base_func() +{ + set_default + kill_cluster + + echo "set guc" + gs_guc set -Z datanode -D $primary_data_dir -c "recovery_parse_workers = 1" + gs_guc set -Z datanode -D $primary_data_dir -c "recovery_redo_workers = 1" + gs_guc set -Z datanode -D $primary_data_dir -c "hot_standby = on" + + gs_guc set -Z datanode -D $standby_data_dir -c "recovery_parse_workers = 1" + gs_guc set -Z datanode -D $standby_data_dir -c "recovery_redo_workers = 1" + gs_guc set -Z datanode -D $standby_data_dir -c "hot_standby = on" + + start_cluster + echo "start cluster success" + sleep 2 + + echo "prepare data" + gsql -d $db -p $dn1_primary_port -c "create database test_standby_read_base with encoding='UTF8' template=template0;" + + echo "test serial redo" + + test_base_sql_func + + sleep 2 + + echo "test paraller redo" + + gs_guc set -Z datanode -D $primary_data_dir -c "recovery_parse_workers = 1" + gs_guc set -Z datanode -D $standby_data_dir -c "recovery_parse_workers = 1" + + gs_guc set -Z datanode -D $primary_data_dir -c " recovery_max_workers = 4" + gs_guc set -Z datanode -D $standby_data_dir -c " recovery_max_workers = 4" + + kill_cluster + start_cluster + + test_base_sql_func + + sleep 2 + + echo "test exrto redo" + + gs_guc set -Z datanode -D $primary_data_dir -c "recovery_parse_workers = 2" + gs_guc set -Z datanode -D $standby_data_dir -c "recovery_parse_workers = 2" + + gs_guc set -Z datanode -D $primary_data_dir -c " recovery_redo_workers = 4" + gs_guc set -Z datanode -D $standby_data_dir -c " recovery_redo_workers = 4" + + kill_cluster + start_cluster + + test_base_sql_func +} + +function tear_down() { + sleep 1 + gsql -d $db -p $cn1_port -c "DROP DATABASE if exists test_standby_read_base;" + + stop_streaming_cluster + + echo "reset guc" + + gs_guc set -Z datanode -D $primary_data_dir -c "recovery_parse_workers=1" + gs_guc set -Z datanode -D $standby_data_dir -c "recovery_parse_workers=1" + + gs_guc set -Z datanode -D $primary_data_dir -c " recovery_redo_workers = 1" + gs_guc set -Z datanode -D $standby_data_dir -c " recovery_redo_workers = 1" + + gs_guc set -Z datanode -D $primary_data_dir -c " recovery_max_workers = 1" + gs_guc set -Z datanode -D $standby_data_dir -c " recovery_max_workers = 1" + + gs_guc set -Z datanode -D $primary_data_dir -c "hot_standby = on" + gs_guc set -Z datanode -D $standby_data_dir -c "hot_standby = on" +} + +test_standby_read_base_func +tear_down \ No newline at end of file diff --git a/src/test/ha/testcase/exrtostandbyread/start_exrto_standby_read.sh b/src/test/ha/testcase/exrtostandbyread/start_exrto_standby_read.sh new file mode 100644 index 0000000000..e9d97222c5 --- /dev/null +++ b/src/test/ha/testcase/exrtostandbyread/start_exrto_standby_read.sh @@ -0,0 +1,81 @@ +source ./util.sh + +function test_1() +{ + set_default + kill_cluster + printf "set extreme_rto_standby_read para\n" + gs_guc set -Z datanode -D $primary_data_dir -c "recovery_parse_workers = 3" + gs_guc set -Z datanode -D $primary_data_dir -c "recovery_redo_workers = 3" + gs_guc set -Z datanode -D $primary_data_dir -c "hot_standby = on" + + gs_guc set -Z datanode -D $standby_data_dir -c "recovery_parse_workers = 3" + gs_guc set -Z datanode -D $standby_data_dir -c "recovery_redo_workers = 3" + gs_guc set -Z datanode -D $standby_data_dir -c "hot_standby = on" + start_cluster + echo "start cluster success" + sleep 5 + + echo "insert data on primary" + gsql -d postgres -p ${dn1_primary_port} -m -c "drop table aaa;" + gsql -d postgres -p ${dn1_primary_port} -m -c "create table aaa(number int);" + gsql -d postgres -p ${dn1_primary_port} -m -c "insert into aaa(number) values(100);" + + echo "primary guc check" + res=`gsql -d postgres -p ${dn1_primary_port} -m -c "show recovery_parse_workers \x" | grep recovery_parse_workers | awk '{print $NF}'` + if [ "$res" != 3 ]; then + echo "extreme_rto_standby_read is off 1, $failed_keyword" + exit 1 + fi + + res=`gsql -d postgres -p ${dn1_primary_port} -m -c "show recovery_redo_workers \x" | grep recovery_redo_workers | awk '{print $NF}'` + if [ "$res" != 3 ]; then + echo "extreme_rto_standby_read is off 2, $failed_keyword" + exit 1 + fi + + res=`gsql -d postgres -p ${dn1_primary_port} -m -c "show hot_standby \x" | grep hot_standby | awk '{print $NF}'` + if [ "$res" != "on" ]; then + echo "extreme_rto_standby_read is off 3, $failed_keyword" + exit 1 + fi + + echo "standby guc check" + res=`gsql -d postgres -p ${dn1_standby_port} -m -c "show recovery_parse_workers \x" | grep recovery_parse_workers | awk '{print $NF}'` + if [ "$res" != 3 ]; then + echo "extreme_rto_standby_read is off 4, $failed_keyword" + exit 1 + fi + + res=`gsql -d postgres -p ${dn1_standby_port} -m -c "show recovery_redo_workers \x" | grep recovery_redo_workers | awk '{print $NF}'` + if [ "$res" != 3 ]; then + echo "extreme_rto_standby_read is off 5, $failed_keyword" + exit 1 + fi + + res=`gsql -d postgres -p ${dn1_standby_port} -m -c "show hot_standby \x" | grep hot_standby | awk '{print $NF}'` + if [ "$res" != "on" ]; then + echo "extreme_rto_standby_read is off 6, $failed_keyword" + exit 1 + fi + + echo "query data on standby" + res=`gsql -d postgres -p ${dn1_standby_port} -m -c "select * from aaa;" -x | grep number | awk '{print $NF}'` + if [ "$res" != 100 ]; then + echo "extreme_rto_standby_read is off 7, $failed_keyword" + exit 1 + else + echo "extreme_rto_standby_read is running" + fi +} + +function tear_down() { + stop_streaming_cluster + gs_guc set -Z datanode -D $primary_data_dir -c "recovery_parse_workers = 1" + gs_guc set -Z datanode -D $primary_data_dir -c "recovery_redo_workers = 1" + + gs_guc set -Z datanode -D $standby_data_dir -c "recovery_parse_workers = 1" + gs_guc set -Z datanode -D $standby_data_dir -c "recovery_redo_workers = 1" +} +test_1 +tear_down diff --git a/src/test/ha/testcase/exrtostandbyread/start_exrto_standby_read_multi_data.sh b/src/test/ha/testcase/exrtostandbyread/start_exrto_standby_read_multi_data.sh new file mode 100644 index 0000000000..7767749940 --- /dev/null +++ b/src/test/ha/testcase/exrtostandbyread/start_exrto_standby_read_multi_data.sh @@ -0,0 +1,91 @@ +source ./util.sh + +function test_1() +{ + set_default + kill_cluster + printf "set extreme_rto_standby_read para\n" + gs_guc set -Z datanode -D $primary_data_dir -c "recovery_parse_workers = 3" + gs_guc set -Z datanode -D $primary_data_dir -c "recovery_redo_workers = 3" + gs_guc set -Z datanode -D $primary_data_dir -c "hot_standby = on" + + gs_guc set -Z datanode -D $standby_data_dir -c "recovery_parse_workers = 3" + gs_guc set -Z datanode -D $standby_data_dir -c "recovery_redo_workers = 3" + gs_guc set -Z datanode -D $standby_data_dir -c "hot_standby = on" + start_cluster + echo "start cluster success" + sleep 5 + + echo "insert data on primary" + gsql -d postgres -p ${dn1_primary_port} -m -c "drop table aaa;" + gsql -d postgres -p ${dn1_primary_port} -m -c "create table aaa(number int);" + for((i=1;i<=100;i++)) + do + gsql -d postgres -p ${dn1_primary_port} -m -c "insert into aaa(number) values($i);" + done + + echo "primary guc check" + res=`gsql -d postgres -p ${dn1_primary_port} -m -c "show recovery_parse_workers \x" | grep recovery_parse_workers | awk '{print $NF}'` + if [ "$res" != 3 ]; then + echo "extreme_rto_standby_read is off 1, $failed_keyword" + exit 1 + fi + + res=`gsql -d postgres -p ${dn1_primary_port} -m -c "show recovery_redo_workers \x" | grep recovery_redo_workers | awk '{print $NF}'` + if [ "$res" != 3 ]; then + echo "extreme_rto_standby_read is off 2, $failed_keyword" + exit 1 + fi + + res=`gsql -d postgres -p ${dn1_primary_port} -m -c "show hot_standby \x" | grep hot_standby | awk '{print $NF}'` + if [ "$res" != "on" ]; then + echo "extreme_rto_standby_read is off 3, $failed_keyword" + exit 1 + fi + + echo "standby guc check" + res=`gsql -d postgres -p ${dn1_standby_port} -m -c "show recovery_parse_workers \x" | grep recovery_parse_workers | awk '{print $NF}'` + if [ "$res" != 3 ]; then + echo "extreme_rto_standby_read is off 4, $failed_keyword" + exit 1 + fi + + res=`gsql -d postgres -p ${dn1_standby_port} -m -c "show recovery_redo_workers \x" | grep recovery_redo_workers | awk '{print $NF}'` + if [ "$res" != 3 ]; then + echo "extreme_rto_standby_read is off 5, $failed_keyword" + exit 1 + fi + + res=`gsql -d postgres -p ${dn1_standby_port} -m -c "show hot_standby \x" | grep hot_standby | awk '{print $NF}'` + if [ "$res" != "on" ]; then + echo "extreme_rto_standby_read is off 6, $failed_keyword" + exit 1 + fi + + echo "query data on standby" + gsql -d postgres -p ${dn1_standby_port} -m -c "select * from aaa;" -x | grep number | awk '{print $NF}' > ./results/exrtostandbyread/start_exrto_standby_read_multi_data.txt + if [ "$?" -ne "0" ]; then + echo "extreme_rto_standby_read is off 7, $failed_keyword" + exit 1 + fi + number=0 + for line in $(cat ./results/exrtostandbyread/start_exrto_standby_read_multi_data.txt) + do + let number++ + if [ $line != $number ]; then + echo "extreme_rto_standby_read query data is wrong, $line, $number, $failed_keyword" + exit 1 + fi + done +} + +function tear_down() { + stop_streaming_cluster + gs_guc set -Z datanode -D $primary_data_dir -c "recovery_parse_workers = 1" + gs_guc set -Z datanode -D $primary_data_dir -c "recovery_redo_workers = 1" + + gs_guc set -Z datanode -D $standby_data_dir -c "recovery_parse_workers = 1" + gs_guc set -Z datanode -D $standby_data_dir -c "recovery_redo_workers = 1" +} +test_1 +tear_down diff --git a/src/test/regress/CMakeLists.txt b/src/test/regress/CMakeLists.txt index 10ddb4731a..1e959290c4 100755 --- a/src/test/regress/CMakeLists.txt +++ b/src/test/regress/CMakeLists.txt @@ -61,6 +61,7 @@ add_func_target_withargs_fastcheck(hacheck_decode hacheck_decode) add_func_target_withargs_fastcheck(hacheck_multi_single hacheck_multi_single) add_func_target_withargs_fastcheck(hacheck_multi_single_mot hacheck_multi_single_mot) add_func_target_withargs_fastcheck(hacheck_single_paxos hacheck_single_paxos) +add_func_target_withargs_fastcheck(hacheck_single_standby_read hacheck_single_standby_read) add_func_target_withargs_fastcheck(decodecheck_single decodecheck_single) add_func_target_withargs_fastcheck(upgradecheck_single upgradecheck_single) add_func_target_withargs_fastcheck(hacheck_ss_all hacheck_ss_all) diff --git a/src/test/regress/parallel_schedule0 b/src/test/regress/parallel_schedule0 index 3a3de2b2d4..6935231d76 100644 --- a/src/test/regress/parallel_schedule0 +++ b/src/test/regress/parallel_schedule0 @@ -12,7 +12,7 @@ test: btree_deduplication # This test case is used to monitor GUC parameter information. # If the GUC parameter is changed, please modify the Code/src/bin/gs_guc/cluster_guc.conf and Code/src/test/regress/output/recovery_2pc_tools.source files. #test: recovery_2pc_tools recovery_2pc_tools02 -test: recovery_2pc_tools +#test: recovery_2pc_tools test: sqlbypass_partition test: sqlpatch_base diff --git a/src/test/regress/parallel_schedule0A b/src/test/regress/parallel_schedule0A index 64d6d228fb..76da7e9e0e 100644 --- a/src/test/regress/parallel_schedule0A +++ b/src/test/regress/parallel_schedule0A @@ -9,7 +9,7 @@ # This test case is used to monitor GUC parameter information. # If the GUC parameter is changed, please modify the Code/src/bin/gs_guc/cluster_guc.conf and Code/src/test/regress/output/recovery_2pc_tools.source files. #test: recovery_2pc_tools recovery_2pc_tools02 -test: recovery_2pc_tools +#test: recovery_2pc_tools test: sqlbypass_partition test: sqlpatch_base diff --git a/src/test/regress/single_check.sh b/src/test/regress/single_check.sh index 03d19a1a43..203eeda949 100755 --- a/src/test/regress/single_check.sh +++ b/src/test/regress/single_check.sh @@ -265,6 +265,8 @@ function real_hacheck() sh ./run_paxos_single.sh ;; hacheck_ss_all) sh ./run_ha_single_ss.sh ;; + hacheck_single_standby_read) + sh ./run_ha_single_standby_read.sh ;; *) echo "module $module is not valid" ;; esac @@ -520,7 +522,7 @@ case $DO_CMD in --wlmcheck_single|wlmcheck_single) args_val="-d 6 -c 3 -p $p -r ${runtest}" real_wmlcheck parallel_schedule${part}.wlm make_wlmcheck_postgresql.conf "${args_val}" ;; - --hacheck_single_all|hacheck_single_all|--hacheck_single|hacheck_single|--hacheck_multi_single|hacheck_multi_single|--hacheck_multi_single_mot|hacheck_multi_single_mot|--hacheck_decode|hacheck_decode|--hacheck_single_paxos|hacheck_single_paxos|--hacheck_ss_all|hacheck_ss_all) + --hacheck_single_all|hacheck_single_all|--hacheck_single|hacheck_single|--hacheck_multi_single|hacheck_multi_single|--hacheck_multi_single_mot|hacheck_multi_single_mot|--hacheck_decode|hacheck_decode|--hacheck_single_paxos|hacheck_single_paxos|--hacheck_ss_all|hacheck_ss_all|--hacheck_single_standby_read|hacheck_single_standby_read) args_val=$(echo $DO_CMD | sed 's\--\\g') real_hacheck "${args_val}";; --fastcheck_ledger_single|fastcheck_ledger_single) -- Gitee