diff --git a/src/bin/gs_guc/cluster_guc.conf b/src/bin/gs_guc/cluster_guc.conf index bc9ab6a2adcd7e72b4286314a357b7c2edda2006..aeb8f93e69e0e87579f50849d74badd15170c9ae 100755 --- a/src/bin/gs_guc/cluster_guc.conf +++ b/src/bin/gs_guc/cluster_guc.conf @@ -685,6 +685,7 @@ upgrade_mode|int|0,2147483647|NULL|NULL| advance_xlog_file_num|int|0,1000000|NULL|NULL| numa_distribute_mode|string|0,0|NULL|NULL| defer_csn_cleanup_time|int|0,2147483647|ms|NULL| +exrto_standby_read_opt|bool|0,0|NULL|NULL| force_promote|int|0,1|NULL|NULL| max_keep_log_seg|int|0,2147483647|NULL|NULL| datanode_heartbeat_interval|int|1000,60000|ms|The value is best configured less than half of the wal_receiver_timeout and wal_sender_timeout.| diff --git a/src/common/backend/catalog/storage.cpp b/src/common/backend/catalog/storage.cpp index d27c6aa14a52115bd4eac6f09128074384893aa6..922cbda6b013189f99e008636ca0df09eba41093 100644 --- a/src/common/backend/catalog/storage.cpp +++ b/src/common/backend/catalog/storage.cpp @@ -1245,7 +1245,7 @@ void smgr_redo_create(RelFileNode rnode, ForkNumber forkNum, char *data) } } -void smgr_redo_truncate_cancel_conflicting_proc(TransactionId latest_removed_xid) +void smgr_redo_truncate_cancel_conflicting_proc(TransactionId latest_removed_xid, XLogRecPtr lsn) { if (IS_EXRTO_READ) { const int max_check_times = 1000; @@ -1256,7 +1256,7 @@ void smgr_redo_truncate_cancel_conflicting_proc(TransactionId latest_removed_xid RedoInterruptCallBack(); check_times++; reach_max_check_times = (check_times == max_check_times); - conflict = proc_array_cancel_conflicting_proc(latest_removed_xid, reach_max_check_times); + conflict = proc_array_cancel_conflicting_proc(latest_removed_xid, lsn, reach_max_check_times); } } } @@ -1264,7 +1264,7 @@ void smgr_redo_truncate_cancel_conflicting_proc(TransactionId latest_removed_xid void xlog_block_smgr_redo_truncate(RelFileNode rnode, BlockNumber blkno, XLogRecPtr lsn, TransactionId latest_removed_xid) { - smgr_redo_truncate_cancel_conflicting_proc(latest_removed_xid); + smgr_redo_truncate_cancel_conflicting_proc(latest_removed_xid, lsn); SMgrRelation reln = smgropen(rnode, InvalidBackendId); smgrcreate(reln, MAIN_FORKNUM, true); UpdateMinRecoveryPoint(lsn, false); diff --git a/src/common/backend/utils/errcodes.txt b/src/common/backend/utils/errcodes.txt index 8e40cec46d8f0c05e3517f5cbd2e604bc41b61e3..c2749761219acdcb4734e2b6c74d0ba944242047 100644 --- a/src/common/backend/utils/errcodes.txt +++ b/src/common/backend/utils/errcodes.txt @@ -630,3 +630,6 @@ Section: Class SE - Security Error 42714 E ERRCODE_DUPLICATE_POLICY duplicate_policy 42715 E ERRCODE_DUPLICATE_LABEL duplicate_label SE001 E ERRCODE_INVALID_AUDIT_LOG invalid_audit_log + +Section: Class SR - Uncorrected Error & warning +SR001 E ERRCODE_SR_RECOVERY_CONFLICT recovery_conflict \ No newline at end of file diff --git a/src/common/backend/utils/init/globals.cpp b/src/common/backend/utils/init/globals.cpp index 45c289ada374a1ce52955e8956086e09d5f345c7..d51879227a182fda1ad3c712e80fe4bab63f4ade 100644 --- a/src/common/backend/utils/init/globals.cpp +++ b/src/common/backend/utils/init/globals.cpp @@ -75,12 +75,13 @@ bool will_shutdown = false; * NEXT | 92899 | ? | ? * ********************************************/ -const uint32 GRAND_VERSION_NUM = 92912; +const uint32 GRAND_VERSION_NUM = 92913; /******************************************** * 2.VERSION NUM FOR EACH FEATURE * Please write indescending order. ********************************************/ +const uint32 PARTITION_ACCESS_EXCLUSIVE_LOCK_UPGRADE_VERSION = 92913; const uint32 PAGE_DIST_VERSION_NUM = 92912; const uint32 NODE_REFORM_INFO_VERSION_NUM = 92911; const uint32 GB18030_2022_VERSION_NUM = 92908; diff --git a/src/common/backend/utils/misc/guc.cpp b/src/common/backend/utils/misc/guc.cpp index 8c382b3761e85cc6abb42e62dc7cd1254f62e6c5..add4504fddc18947b0440897f8bfb82c9b2d1e25 100755 --- a/src/common/backend/utils/misc/guc.cpp +++ b/src/common/backend/utils/misc/guc.cpp @@ -6668,14 +6668,15 @@ bool parse_int(const char* value, int* result, int flags, const char** hintmsg) /* * Try to parse value as an 64-bit integer. The accepted format is - * decimal number. + * decimal number, octal, or hexadecimal formats, optionally followed by + * a unit name if "flags" indicates a unit is allowed. * * If the string parses okay, return true, else false. * If okay and result is not NULL, return the value in *result. * If not okay and hintmsg is not NULL, *hintmsg is set to a suitable * HINT message, or NULL if no hint provided. */ -bool parse_int64(const char* value, int64* result, const char** hintmsg) +bool parse_int64(const char *value, int64 *result, int flags, const char **hintmsg) { int64 val; char* endptr = NULL; @@ -6698,7 +6699,7 @@ bool parse_int64(const char* value, int64* result, const char** hintmsg) val = strtol(value, &endptr, 10); #endif - if (endptr == value || *endptr != '\0') { + if (endptr == value) { return false; /* no HINT for integer syntax error */ } @@ -6709,6 +6710,38 @@ bool parse_int64(const char* value, int64* result, const char** hintmsg) return false; } + /* allow whitespace between integer and unit */ + while (isspace((unsigned char)*endptr)) + endptr++; + + /* Handle possible unit conversion before check integer overflow */ + if (*endptr != '\0') { + /* + * Note: the multiple-switch coding technique here is a bit tedious, + * but seems necessary to avoid intermediate-value overflows. + */ + if (flags & GUC_UNIT_MEMORY) { + val = (int64)MemoryUnitConvert(&endptr, val, flags, hintmsg); + } else if (flags & GUC_UNIT_TIME) { + val = (int64)TimeUnitConvert(&endptr, val, flags, hintmsg); + } + + /* allow whitespace after unit */ + while (isspace((unsigned char)*endptr)) + endptr++; + + if (*endptr != '\0') + return false; /* appropriate hint, if any, already set */ + } + + /* Check for integer overflow */ + if (val != (int64)val) { + if (hintmsg != nullptr) + *hintmsg = gettext_noop("Value exceeds integer range."); + + return false; + } + if (result != NULL) { *result = val; } @@ -7266,7 +7299,7 @@ static bool validate_conf_int64(struct config_generic *record, const char *name, int64* newval = (newvalue == NULL ? &tmpnewval : (int64*)newvalue); const char* hintmsg = NULL; - if (!parse_int64(value, newval, &hintmsg)) { + if (!parse_int64(value, newval, conf->gen.flags, &hintmsg)) { ereport(elevel, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("parameter \"%s\" requires a numeric value", name))); diff --git a/src/common/backend/utils/misc/guc/guc_storage.cpp b/src/common/backend/utils/misc/guc/guc_storage.cpp index 1ee3c7c7fecdd27ebd84044505bac3ed3c4817fc..20d996857301e1883c5c8eb14f900979d7c84b1b 100755 --- a/src/common/backend/utils/misc/guc/guc_storage.cpp +++ b/src/common/backend/utils/misc/guc/guc_storage.cpp @@ -1227,7 +1227,6 @@ static void InitStorageConfigureNamesBool() NULL, NULL, NULL}, - {{"enable_time_report", PGC_POSTMASTER, NODE_SINGLENODE, @@ -1251,6 +1250,17 @@ static void InitStorageConfigureNamesBool() NULL, NULL, NULL}, + {{"exrto_standby_read_opt", + PGC_POSTMASTER, + NODE_ALL, + REPLICATION_STANDBY, + gettext_noop("Enable performance optimization of extreme-rto standby read."), + NULL}, + &g_instance.attr.attr_storage.enable_exrto_standby_read_opt, + true, + NULL, + NULL, + NULL}, /* End-of-list marker */ {{NULL, @@ -4101,28 +4111,30 @@ static void InitStorageConfigureNamesInt64() NULL}, #ifndef ENABLE_LITE_MODE {{"max_standby_base_page_size", - PGC_POSTMASTER, + PGC_SIGHUP, NODE_ALL, RESOURCES_RECOVERY, gettext_noop("Sets the max size of base page files on standby"), - NULL}, - &g_instance.attr.attr_storage.max_standby_base_page_size, - INT64CONST(0x4000000000), /* 256GB */ - INT64CONST(0x40000000), /* 1GB */ - INT64CONST(0x7FFFFFFFFFFFFFF), + NULL, + GUC_UNIT_KB}, + &u_sess->attr.attr_storage.max_standby_base_page_size, + 268435456, /* 256GB */ + 1048576, /* 1GB */ + 562949953421311, NULL, NULL, NULL}, {{"max_standby_lsn_info_size", - PGC_POSTMASTER, + PGC_SIGHUP, NODE_ALL, RESOURCES_RECOVERY, gettext_noop("Sets the max size of lsn info files on standby"), - NULL}, - &g_instance.attr.attr_storage.max_standby_lsn_info_size, - INT64CONST(0x4000000000), /* 256GB */ - INT64CONST(0x40000000), /* 1GB */ - INT64CONST(0x7FFFFFFFFFFFFFF), + NULL, + GUC_UNIT_KB}, + &u_sess->attr.attr_storage.max_standby_lsn_info_size, + 268435456, /* 256GB */ + 1048576, /* 1GB */ + 562949953421311, NULL, NULL, NULL}, diff --git a/src/common/backend/utils/time/snapmgr.cpp b/src/common/backend/utils/time/snapmgr.cpp index 68edd053daf80bc751f2b11a67d22d892bfbbe5f..d771423e33b156bba080fb41bcaaa23c4e0847cc 100644 --- a/src/common/backend/utils/time/snapmgr.cpp +++ b/src/common/backend/utils/time/snapmgr.cpp @@ -583,6 +583,7 @@ void StreamTxnContextSetSnapShot(void* snapshotPtr) u_sess->utils_cxt.CurrentSnapshot->xmax = snapshot->xmax; u_sess->utils_cxt.CurrentSnapshot->timeline = snapshot->timeline; u_sess->utils_cxt.CurrentSnapshot->snapshotcsn = snapshot->snapshotcsn; + u_sess->utils_cxt.CurrentSnapshot->read_lsn = snapshot->read_lsn; u_sess->utils_cxt.CurrentSnapshot->curcid = snapshot->curcid; diff --git a/src/gausskernel/optimizer/commands/tablespace.cpp b/src/gausskernel/optimizer/commands/tablespace.cpp index dd36b8c32dda402017364a9c7dafdfc1956a6249..3ba6410f8d065c5569127e87705856ab5f9d0d87 100644 --- a/src/gausskernel/optimizer/commands/tablespace.cpp +++ b/src/gausskernel/optimizer/commands/tablespace.cpp @@ -97,7 +97,7 @@ #include "storage/tcap.h" static void create_tablespace_directories(const char* location, const Oid tablespaceoid); -static bool destroy_tablespace_directories(Oid tablespaceoid, bool redo); +static bool destroy_tablespace_directories(Oid tablespaceoid, bool redo, bool is_exrto_read = false); static void createtbspc_abort_callback(bool isCommit, const void* arg); Datum CanonicalizeTablespaceOptions(Datum datum); @@ -1487,7 +1487,7 @@ static void createtbspc_abort_callback(bool isCommit, const void* arg) * * Returns TRUE if successful, FALSE if some subdirectory is not empty */ -static bool destroy_tablespace_directories(Oid tablespaceoid, bool redo) +static bool destroy_tablespace_directories(Oid tablespaceoid, bool redo, bool is_exrto_read) { char* linkloc = NULL; char* linkloc_with_version_dir = NULL; @@ -1607,7 +1607,9 @@ static bool destroy_tablespace_directories(Oid tablespaceoid, bool redo) if (rmdir(subfile) < 0) ereport(redo ? LOG : ERROR, (errcode_for_file_access(), errmsg("could not remove directory \"%s\": %m", subfile))); - + if (is_exrto_read) { + rmtree(subfile, true); + } if (spc) { spc_unlock(spc); } @@ -2634,7 +2636,7 @@ void xlog_drop_tblspc(Oid tsId) * that would crash the database and require manual intervention * before we could get past this WAL record on restart). */ - if (!destroy_tablespace_directories(tsId, true)) + if (!destroy_tablespace_directories(tsId, true, true)) ereport(LOG, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("directories for tablespace %u could not be removed", tsId), diff --git a/src/gausskernel/process/postmaster/postmaster.cpp b/src/gausskernel/process/postmaster/postmaster.cpp index 2a4e68afb36e62f81941a1e4e9dc65f42de036ff..8c517e0a2eab6c6f2c31581ee64a24c5c3ab8a43 100644 --- a/src/gausskernel/process/postmaster/postmaster.cpp +++ b/src/gausskernel/process/postmaster/postmaster.cpp @@ -4329,7 +4329,7 @@ static int ServerLoop(void) if (g_instance.attr.attr_storage.enable_ustore && g_instance.pid_cxt.UndoRecyclerPID == 0 && - pmState == PM_RUN) { + (pmState == PM_RUN || IS_EXRTO_STANDBY_READ)) { g_instance.pid_cxt.UndoRecyclerPID = initialize_util_thread(UNDO_RECYCLER); } @@ -6921,6 +6921,9 @@ static void reaper(SIGNAL_ARGS) g_instance.fatal_error = false; g_instance.demotion = NoDemote; t_thrd.postmaster_cxt.ReachedNormalRunning = true; + if ((IS_EXRTO_STANDBY_READ) && (g_instance.pid_cxt.UndoRecyclerPID!= 0)) { + signal_child(g_instance.pid_cxt.UndoRecyclerPID, SIGTERM); + } pmState = PM_RUN; if (t_thrd.postmaster_cxt.HaShmData && (t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE || diff --git a/src/gausskernel/process/stream/streamMain.cpp b/src/gausskernel/process/stream/streamMain.cpp index 9cc8c2be53c465cc8169c01fd4e0c3847f1af8c6..b4206856fffaae0778edbf6d9d490eeb5f0399fc 100755 --- a/src/gausskernel/process/stream/streamMain.cpp +++ b/src/gausskernel/process/stream/streamMain.cpp @@ -486,23 +486,19 @@ static void execute_stream_plan(StreamProducer* producer) PortalDefineQuery(portal, NULL, "DUMMY", commandTag, lappend(NULL, planstmt), NULL); - /* - * Start the portal. No parameters here. - */ - PortalStart(portal, producer->getParams(), 0, producer->getSnapShot()); - /* The value of snapshot.read_lsn may be assigned to thread A and used on thread B. So we should reassigned read_lsn to t_thrd of thread B */ if (unlikely(IS_EXRTO_STANDBY_READ && producer->getSnapShot() != NULL)) { t_thrd.proc->exrto_read_lsn = producer->getSnapShot()->read_lsn; t_thrd.proc->exrto_min = t_thrd.proc->exrto_read_lsn; + reset_invalidation_cache(); } - /* The value of snapshot.read_lsn may be assigned to thread A and used on thread B. - So we should reassigned read_lsn to t_thrd of thread B */ - if (unlikely(IS_EXRTO_STANDBY_READ && producer->getSnapShot() != NULL)) { - t_thrd.proc->exrto_read_lsn = producer->getSnapShot()->read_lsn; - } + /* + * Start the portal. No parameters here. + */ + PortalStart(portal, producer->getParams(), 0, producer->getSnapShot()); + format = 0; PortalSetResultFormat(portal, 1, &format); diff --git a/src/gausskernel/process/tcop/postgres.cpp b/src/gausskernel/process/tcop/postgres.cpp index ced83182d83932141928bce02bdf6cb6b748b66a..74e69a81c30c040c76bea9430aa89a4aff902288 100755 --- a/src/gausskernel/process/tcop/postgres.cpp +++ b/src/gausskernel/process/tcop/postgres.cpp @@ -6717,7 +6717,7 @@ void ProcessInterrupts(void) " database and repeat your command."))); else ereport(ERROR, - (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + (errcode(ERRCODE_SR_RECOVERY_CONFLICT), errmsg("canceling statement due to conflict with recovery"), errdetail_recovery_conflict())); } diff --git a/src/gausskernel/process/threadpool/knl_instance.cpp b/src/gausskernel/process/threadpool/knl_instance.cpp index 518e71f7f19e0a67be9f691607de7b8c0787ca91..9b553aaebec51057944f8e7a109880d6ee297d54 100755 --- a/src/gausskernel/process/threadpool/knl_instance.cpp +++ b/src/gausskernel/process/threadpool/knl_instance.cpp @@ -329,6 +329,7 @@ static void knl_g_parallel_redo_init(knl_g_parallel_redo_context* predo_cxt) rc = memset_s(&predo_cxt->redoCpuBindcontrl, sizeof(RedoCpuBindControl), 0, sizeof(RedoCpuBindControl)); securec_check(rc, "", ""); predo_cxt->global_recycle_lsn = InvalidXLogRecPtr; + predo_cxt->exrto_recyle_xmin = 0; predo_cxt->exrto_snapshot = (ExrtoSnapshot)MemoryContextAllocZero( INSTANCE_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), sizeof(ExrtoSnapshotData)); predo_cxt->redoItemHash = NULL; @@ -337,6 +338,9 @@ static void knl_g_parallel_redo_init(knl_g_parallel_redo_context* predo_cxt) predo_cxt->standby_read_delay_ddl_stat.next_index_can_insert = 0; predo_cxt->standby_read_delay_ddl_stat.next_index_need_unlink = 0; predo_cxt->max_clog_pageno = 0; + predo_cxt->buffer_pin_wait_buf_ids = NULL; + predo_cxt->buffer_pin_wait_buf_len = 0; + predo_cxt->exrto_send_lsn_forworder_time = 0; } static void knl_g_parallel_decode_init(knl_g_parallel_decode_context* pdecode_cxt) @@ -512,6 +516,7 @@ static void KnlGUndoInit(knl_g_undo_context *undoCxt) undoCxt->undoChainTotalSize = 0; undoCxt->globalFrozenXid = InvalidTransactionId; undoCxt->globalRecycleXid = InvalidTransactionId; + undoCxt->hotStandbyRecycleXid = InvalidTransactionId; undoCxt->is_exrto_residual_undo_file_recycled = false; } diff --git a/src/gausskernel/process/threadpool/knl_thread.cpp b/src/gausskernel/process/threadpool/knl_thread.cpp index 0f46ecc62b38551bd3ad00cac95987520cc8b13e..8ef2161e506a2882a05babcd59679456706a302c 100755 --- a/src/gausskernel/process/threadpool/knl_thread.cpp +++ b/src/gausskernel/process/threadpool/knl_thread.cpp @@ -1131,6 +1131,7 @@ static void KnlTUndorecyclerInit(knl_t_undorecycler_context* undorecyclerCxt) { undorecyclerCxt->got_SIGHUP = false; undorecyclerCxt->shutdown_requested = false; + undorecyclerCxt->is_recovery_in_progress = false; } static void KnlTUstoreInit(knl_u_ustore_context *ustoreCxt) diff --git a/src/gausskernel/storage/access/common/reloptions.cpp b/src/gausskernel/storage/access/common/reloptions.cpp index 28163d71e05b785ba6cd9600097d2dede4a3449c..afdcf96903de44cbaefa29b3ffdce23b7548bfd4 100644 --- a/src/gausskernel/storage/access/common/reloptions.cpp +++ b/src/gausskernel/storage/access/common/reloptions.cpp @@ -1232,7 +1232,7 @@ static void parse_one_reloption(relopt_value *option, const char *text_str, int case RELOPT_TYPE_INT64: { relopt_int64 *optint = (relopt_int64 *)option->gen; - parsed = parse_int64(value, &option->values.int64_val, NULL); + parsed = parse_int64(value, &option->values.int64_val, 0, NULL); if (validate && !parsed) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), diff --git a/src/gausskernel/storage/access/heap/heapam.cpp b/src/gausskernel/storage/access/heap/heapam.cpp index 5625c0d6074099b69528c898e502d26608da9450..05a5338b8777c71d27b421cf15b6959c52d558f2 100755 --- a/src/gausskernel/storage/access/heap/heapam.cpp +++ b/src/gausskernel/storage/access/heap/heapam.cpp @@ -8741,7 +8741,7 @@ static void heap_xlog_cleanup_info(XLogReaderState* record) return; } - if (InHotStandby && g_supportHotStandby && !IS_EXRTO_READ) { + if (InHotStandby && g_supportHotStandby) { XLogRecPtr lsn = record->EndRecPtr; ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, tmp_node, lsn); } diff --git a/src/gausskernel/storage/access/nbtree/nbtpage.cpp b/src/gausskernel/storage/access/nbtree/nbtpage.cpp index 8173ccb92d6e1493ac3d050339d2650ae53a61b5..33fae015450a81efd320bd91ca1d83218b415744 100644 --- a/src/gausskernel/storage/access/nbtree/nbtpage.cpp +++ b/src/gausskernel/storage/access/nbtree/nbtpage.cpp @@ -24,6 +24,7 @@ #include "postgres.h" #include "knl/knl_variable.h" +#include "access/extreme_rto/standby_read/standby_read_base.h" #include "access/hio.h" #include "access/multi_redo_api.h" #include "access/nbtree.h" @@ -507,10 +508,21 @@ void _bt_checkbuffer_valid(Relation rel, Buffer buf) } } +void exrto_dump_btree_info(Relation rel, BlockNumber blkno, BlockNumber par_blkno) +{ + if (RelationIsUstoreIndex(rel)) { + extreme_rto_standby_read::dump_error_all_info(rel->rd_node, 0, blkno); + extreme_rto_standby_read::dump_error_all_info(rel->rd_node, 0, par_blkno); + } else { + extreme_rto_standby_read::dump_error_all_info(rel->rd_node, 0, blkno); + extreme_rto_standby_read::dump_error_all_info(rel->rd_node, 0, par_blkno); + } +} + /* * _bt_checkpage() -- Verify that a freshly-read page looks sane. */ -void _bt_checkpage(Relation rel, Buffer buf) +void _bt_checkpage(Relation rel, Buffer buf, BlockNumber par_blkno) { Page page = BufferGetPage(buf); /* @@ -519,11 +531,16 @@ void _bt_checkpage(Relation rel, Buffer buf) * page header or is all-zero. We have to defend against the all-zero * case, however. */ - if (PageIsNew(page)) - ereport(ERROR, (errcode(ERRCODE_INDEX_CORRUPTED), - errmsg("index \"%s\" contains unexpected zero page at block %u", RelationGetRelationName(rel), - BufferGetBlockNumber(buf)), - errhint("Please REINDEX it."))); + if (PageIsNew(page)) { + PageHeader phdr = (PageHeader)page; + exrto_dump_btree_info(rel, BufferGetBlockNumber(buf), par_blkno); + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" oid: %u contains unexpected zero page at block %u, pd_upper %d pd_lower %d", + RelationGetRelationName(rel), rel->rd_id, BufferGetBlockNumber(buf), phdr->pd_upper, + phdr->pd_lower), + errhint("Please REINDEX it."))); + } /* * Additionally check that the special area looks sane. @@ -714,7 +731,7 @@ loop: * is when the target page is the same one already in the buffer. */ FORCE_INLINE -Buffer _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access) +Buffer _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access, BlockNumber par_blkno) { Buffer buf; @@ -724,7 +741,7 @@ Buffer _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access buf = ReleaseAndReadBuffer(obuf, rel, blkno); _bt_checkbuffer_valid(rel, buf); LockBuffer(buf, access); - _bt_checkpage(rel, buf); + _bt_checkpage(rel, buf, par_blkno); return buf; } diff --git a/src/gausskernel/storage/access/redo/redo_ginxlog.cpp b/src/gausskernel/storage/access/redo/redo_ginxlog.cpp index 34557f42e4990d1621fbcb7574d514965f1f0537..2ac61e357f38107aac1406269e6509c4d8f148d5 100644 --- a/src/gausskernel/storage/access/redo/redo_ginxlog.cpp +++ b/src/gausskernel/storage/access/redo/redo_ginxlog.cpp @@ -126,7 +126,7 @@ static XLogRecParseState *GinXlogSplitParseBlock(XLogReaderState *record, uint32 { XLogRecParseState *recordstatehead = NULL; XLogParseBufferAllocListFunc(record, &recordstatehead, NULL); - XLogRecSetBlockDataState(record, GIN_SPLIT_LEFT_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, GIN_SPLIT_LEFT_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); XLogRecParseState *blockstate = NULL; XLogParseBufferAllocListFunc(record, &blockstate, recordstatehead); @@ -140,7 +140,7 @@ static XLogRecParseState *GinXlogSplitParseBlock(XLogReaderState *record, uint32 if (isRoot) { XLogParseBufferAllocListFunc(record, &blockstate, recordstatehead); - XLogRecSetBlockDataState(record, GIN_SPLIT_ROOT_BLOCK_NUM, blockstate); + XLogRecSetBlockDataState(record, GIN_SPLIT_ROOT_BLOCK_NUM, blockstate, BLOCK_DATA_MAIN_DATA_TYPE, true); ++(*blocknum); } @@ -177,14 +177,14 @@ static XLogRecParseState *GinXlogDeleteParseBlock(XLogReaderState *record, uint3 { XLogRecParseState *recordstatehead = NULL; XLogParseBufferAllocListFunc(record, &recordstatehead, NULL); - XLogRecSetBlockDataState(record, GIN_DELETE_D_PAGE_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, GIN_DELETE_D_PAGE_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); XLogRecParseState *blockstate = NULL; XLogParseBufferAllocListFunc(record, &blockstate, recordstatehead); - XLogRecSetBlockDataState(record, GIN_DELETE_P_PAGE_BLOCK_NUM, blockstate); + XLogRecSetBlockDataState(record, GIN_DELETE_P_PAGE_BLOCK_NUM, blockstate, BLOCK_DATA_MAIN_DATA_TYPE, true); XLogParseBufferAllocListFunc(record, &blockstate, recordstatehead); - XLogRecSetBlockDataState(record, GIN_DELETE_L_PAGE_BLOCK_NUM, blockstate); + XLogRecSetBlockDataState(record, GIN_DELETE_L_PAGE_BLOCK_NUM, blockstate, BLOCK_DATA_MAIN_DATA_TYPE, true); *blocknum = 3; return recordstatehead; @@ -194,7 +194,7 @@ static XLogRecParseState *GinXlogUpdateMetaPageParseBlock(XLogReaderState *recor { XLogRecParseState *recordstatehead = NULL; XLogParseBufferAllocListFunc(record, &recordstatehead, NULL); - XLogRecSetBlockDataState(record, GIN_META_PAGE_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, GIN_META_PAGE_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); *blocknum = 1; ginxlogUpdateMeta *data = (ginxlogUpdateMeta *)XLogRecGetData(record); @@ -221,7 +221,8 @@ static XLogRecParseState *GinXlogDeleteListPageParseBlock(XLogReaderState *recor { XLogRecParseState *recordstatehead = NULL; XLogParseBufferAllocListFunc(record, &recordstatehead, NULL); - XLogRecSetBlockDataState(record, GIN_DELETE_LIST_META_PAGE_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, GIN_DELETE_LIST_META_PAGE_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, + true); *blocknum = 1; ginxlogDeleteListPages *data = (ginxlogDeleteListPages *)XLogRecGetData(record); @@ -229,7 +230,7 @@ static XLogRecParseState *GinXlogDeleteListPageParseBlock(XLogReaderState *recor for (int32 i = 0; i < data->ndeleted; i++) { XLogRecParseState *blockstate = NULL; XLogParseBufferAllocListFunc(record, &blockstate, recordstatehead); - XLogRecSetBlockDataState(record, i + 1, blockstate); + XLogRecSetBlockDataState(record, i + 1, blockstate, BLOCK_DATA_MAIN_DATA_TYPE, true); ++(*blocknum); } diff --git a/src/gausskernel/storage/access/redo/redo_gistxlog.cpp b/src/gausskernel/storage/access/redo/redo_gistxlog.cpp index c859e8267701aa1f6b1c35c4d261f4e3b31484da..4f0cbf0f2e80cb44ad3539518b952c235552c526 100644 --- a/src/gausskernel/storage/access/redo/redo_gistxlog.cpp +++ b/src/gausskernel/storage/access/redo/redo_gistxlog.cpp @@ -202,11 +202,13 @@ static XLogRecParseState *GistXlogPageSplitParseBlock(XLogReaderState *record, u BlockNumber blkno; XLogRecGetBlockTag(record, i + 1, NULL, NULL, &blkno); if (blkno == GIST_ROOT_BLKNO) { + XLogRecSetBlockDataState(record, i + 1, blockstate, BLOCK_DATA_MAIN_DATA_TYPE, true); XLogRecSetAuxiBlkNumState(&blockstate->blockparse.extra_rec.blockdatarec, InvalidForkNumber, InvalidForkNumber); isrootsplit = true; } if (blkno != GIST_ROOT_BLKNO) { + XLogRecSetBlockDataState(record, i + 1, blockstate); uint32 flag; if ((i < xldata->npage - 1) && !isrootsplit && xldata->markfollowright) flag = 1; diff --git a/src/gausskernel/storage/access/redo/redo_heapam.cpp b/src/gausskernel/storage/access/redo/redo_heapam.cpp index ff01c545f68ff26207701a6198d98d5e9104b323..28ce6f53277ca21de30d408f1805a04a6e60b37d 100755 --- a/src/gausskernel/storage/access/redo/redo_heapam.cpp +++ b/src/gausskernel/storage/access/redo/redo_heapam.cpp @@ -991,7 +991,7 @@ static XLogRecParseState *HeapXlogBaseShiftParseBlock(XLogReaderState *record, u if (recordstatehead == NULL) { return NULL; } - XLogRecSetBlockDataState(record, HEAP_BASESHIFT_ORIG_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, HEAP_BASESHIFT_ORIG_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); return recordstatehead; } @@ -1018,7 +1018,7 @@ static XLogRecParseState *HeapXlogLockParseBlock(XLogReaderState *record, uint32 if (recordstatehead == NULL) { return NULL; } - XLogRecSetBlockDataState(record, HEAP_LOCK_ORIG_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, HEAP_LOCK_ORIG_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); return recordstatehead; } @@ -1080,37 +1080,13 @@ XLogRecParseState *HeapRedoParseToBlock(XLogReaderState *record, uint32 *blocknu static XLogRecParseState *HeapXlogFreezeParseBlock(XLogReaderState *record, uint32 *blocknum) { XLogRecParseState *recordstatehead = NULL; - XLogRecParseState *blockstate = NULL; *blocknum = 1; XLogParseBufferAllocListFunc(record, &recordstatehead, NULL); if (recordstatehead == NULL) { return NULL; } - XLogRecSetBlockDataState(record, HEAP_FREEZE_ORIG_BLOCK_NUM, recordstatehead); - - /* - * In Hot Standby mode, ensure that there's no queries running which still - * consider the frozen xids as running. - */ - if (g_supportHotStandby) { - (*blocknum)++; - /* need notify hot standby */ - XLogParseBufferAllocListFunc(record, &blockstate, recordstatehead); - if (blockstate == NULL) { - return NULL; - } - RelFileNode rnode; - xl_heap_freeze *xlrec = (xl_heap_freeze *)XLogRecGetData(record); - TransactionId cutoff_xid = xlrec->cutoff_xid; - - XLogRecGetBlockTag(record, HEAP_FREEZE_ORIG_BLOCK_NUM, &rnode, NULL, NULL); - - RelFileNodeForkNum filenode = - RelFileNodeForkNumFill(&rnode, InvalidBackendId, InvalidForkNumber, InvalidBlockNumber); - XLogRecSetBlockCommonState(record, BLOCK_DATA_INVALIDMSG_TYPE, filenode, blockstate); - XLogRecSetInvalidMsgState(&blockstate->blockparse.extra_rec.blockinvalidmsg, cutoff_xid); - } + XLogRecSetBlockDataState(record, HEAP_FREEZE_ORIG_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); return recordstatehead; } @@ -1118,105 +1094,52 @@ static XLogRecParseState *HeapXlogFreezeParseBlock(XLogReaderState *record, uint static XLogRecParseState *HeapXlogInvalidParseBlock(XLogReaderState *record, uint32 *blocknum) { *blocknum = 1; - XLogRecParseState *blockstate = NULL; XLogRecParseState *recordstatehead = NULL; XLogParseBufferAllocListFunc(record, &recordstatehead, NULL); if (recordstatehead == NULL) { return NULL; } - XLogRecSetBlockDataState(record, HEAP_FREEZE_ORIG_BLOCK_NUM, recordstatehead); - - /* - * In Hot Standby mode, ensure that there's no queries running which still consider the - * invalid xids as running. - */ - if (g_supportHotStandby) { - (*blocknum)++; - /* need notify hot standby */ - XLogParseBufferAllocListFunc(record, &blockstate, recordstatehead); - if (blockstate == NULL) { - return NULL; - } - /* get cutoff xid */ - xl_heap_invalid *xlrecInvalid = (xl_heap_invalid *)XLogRecGetData(record); - TransactionId cutoff_xid = xlrecInvalid->cutoff_xid; - RelFileNode rnode; - - XLogRecGetBlockTag(record, HEAP_FREEZE_ORIG_BLOCK_NUM, &rnode, NULL, NULL); - - RelFileNodeForkNum filenode = - RelFileNodeForkNumFill(&rnode, InvalidBackendId, InvalidForkNumber, InvalidBlockNumber); - XLogRecSetBlockCommonState(record, BLOCK_DATA_INVALIDMSG_TYPE, filenode, blockstate); - XLogRecSetInvalidMsgState(&blockstate->blockparse.extra_rec.blockinvalidmsg, cutoff_xid); - } + XLogRecSetBlockDataState(record, HEAP_FREEZE_ORIG_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); return recordstatehead; } static XLogRecParseState *HeapXlogCleanParseBlock(XLogReaderState *record, uint32 *blocknum) { - xl_heap_clean *xlrec = (xl_heap_clean *)XLogRecGetData(record); XLogRecParseState *recordstatehead = NULL; - XLogRecParseState *blockstate = NULL; *blocknum = 1; XLogParseBufferAllocListFunc(record, &recordstatehead, NULL); if (recordstatehead == NULL) { return NULL; } - XLogRecSetBlockDataState(record, HEAP_CLEAN_ORIG_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, HEAP_CLEAN_ORIG_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); - /* - * We're about to remove tuples. In Hot Standby mode, ensure that there's - * no queries running for which the removed tuples are still visible. - * - * Not all HEAP2_CLEAN records remove tuples with xids, so we only want to - * conflict on the records that cause MVCC failures for user queries. If - * latestRemovedXid is invalid, skip conflict processing. - */ - if (g_supportHotStandby && TransactionIdIsValid(xlrec->latestRemovedXid)) { - (*blocknum)++; - /* need notify hot standby */ - XLogParseBufferAllocListFunc(record, &blockstate, recordstatehead); - if (blockstate == NULL) { - return NULL; - } - RelFileNode rnode; - XLogRecGetBlockTag(record, HEAP_CLEAN_ORIG_BLOCK_NUM, &rnode, NULL, NULL); - - RelFileNodeForkNum filenode = - RelFileNodeForkNumFill(&rnode, InvalidBackendId, InvalidForkNumber, InvalidBlockNumber); - XLogRecSetBlockCommonState(record, BLOCK_DATA_INVALIDMSG_TYPE, filenode, blockstate); - XLogRecSetInvalidMsgState(&blockstate->blockparse.extra_rec.blockinvalidmsg, xlrec->latestRemovedXid); - } return recordstatehead; } static XLogRecParseState *HeapXlogCleanupInfoParseBlock(XLogReaderState *record, uint32 *blocknum) { XLogRecParseState *recordstatehead = NULL; + RelFileNodeOld *rnode = NULL; + ForkNumber forknum = MAIN_FORKNUM; + BlockNumber blkno = InvalidBlockNumber; - /* Backup blocks are not used in cleanup_info records */ - Assert(!XLogRecHasAnyBlockRefs(record)); + (*blocknum)++; + XLogParseBufferAllocListFunc(record, &recordstatehead, NULL); - *blocknum = 0; - if (g_supportHotStandby) { - (*blocknum)++; - XLogParseBufferAllocListFunc(record, &recordstatehead, NULL); - if (recordstatehead == NULL) { - return NULL; - } + xl_heap_cleanup_info *xlrec = (xl_heap_cleanup_info *)XLogRecGetData(record); + rnode = &(xlrec->node); + forknum = MAIN_FORKNUM; + RelFileNode tmp_node; + RelFileNodeCopy(tmp_node, *rnode, (int2)XLogRecGetBucketId(record)); + tmp_node.opt = 0; + RelFileNodeForkNum filenode = RelFileNodeForkNumFill(&tmp_node, InvalidBackendId, forknum, blkno); + XLogRecSetBlockCommonState(record, BLOCK_DATA_CLEANUP_TYPE, filenode, recordstatehead); - xl_heap_cleanup_info *xlrec = (xl_heap_cleanup_info *)XLogRecGetData(record); - RelFileNode rnode; - RelFileNodeCopy(rnode, xlrec->node, XLogRecGetBucketId(record)); + wal_rec_set_clean_up_info_state(&(recordstatehead->blockparse.extra_rec.clean_up_info), xlrec->latestRemovedXid); - RelFileNodeForkNum filenode = - RelFileNodeForkNumFill(&rnode, InvalidBackendId, InvalidForkNumber, InvalidBlockNumber); - XLogRecSetBlockCommonState(record, BLOCK_DATA_INVALIDMSG_TYPE, filenode, recordstatehead); - XLogRecSetInvalidMsgState(&recordstatehead->blockparse.extra_rec.blockinvalidmsg, xlrec->latestRemovedXid); - } return recordstatehead; } @@ -1230,7 +1153,7 @@ static XLogRecParseState *HeapXlogVisibleParseBlock(XLogReaderState *record, uin if (recordstatehead == NULL) { return NULL; } - XLogRecSetBlockDataState(record, HEAP_VISIBLE_VM_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, HEAP_VISIBLE_VM_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); if (XLogRecHasBlockRef(record, HEAP_VISIBLE_DATA_BLOCK_NUM)) { (*blocknum)++; @@ -1238,26 +1161,9 @@ static XLogRecParseState *HeapXlogVisibleParseBlock(XLogReaderState *record, uin if (blockstate == NULL) { return NULL; } - XLogRecSetBlockDataState(record, HEAP_VISIBLE_DATA_BLOCK_NUM, blockstate); - } - - if (g_supportHotStandby) { - (*blocknum)++; - XLogParseBufferAllocListFunc(record, &blockstate, recordstatehead); - if (blockstate == NULL) { - return NULL; - } - RelFileNode rnode; - xl_heap_visible *xlrec = (xl_heap_visible *)XLogRecGetData(record); - - XLogRecGetBlockTag(record, HEAP_VISIBLE_VM_BLOCK_NUM, &rnode, NULL, NULL); - - RelFileNodeForkNum filenode = - RelFileNodeForkNumFill(&rnode, InvalidBackendId, InvalidForkNumber, InvalidBlockNumber); - XLogRecSetBlockCommonState(record, BLOCK_DATA_INVALIDMSG_TYPE, filenode, blockstate); - XLogRecSetInvalidMsgState(&blockstate->blockparse.extra_rec.blockinvalidmsg, xlrec->cutoff_xid); + XLogRecSetBlockDataState(record, HEAP_VISIBLE_DATA_BLOCK_NUM, blockstate, BLOCK_DATA_MAIN_DATA_TYPE, true); } - + return recordstatehead; } diff --git a/src/gausskernel/storage/access/redo/redo_nbtxlog.cpp b/src/gausskernel/storage/access/redo/redo_nbtxlog.cpp index 9b62eb03eeee028507039c02b1ed261ef9851e1c..5213ab41140c0d0db59e883ac6e02561c08006c8 100644 --- a/src/gausskernel/storage/access/redo/redo_nbtxlog.cpp +++ b/src/gausskernel/storage/access/redo/redo_nbtxlog.cpp @@ -593,9 +593,12 @@ XLogRecParseState *BtreeXlogInsertParseBlock(XLogReaderState *record, uint32 *bl if (recordstatehead == NULL) { return NULL; } - XLogRecSetBlockDataState(record, BTREE_INSERT_ORIG_BLOCK_NUM, recordstatehead); - if (info != XLOG_BTREE_INSERT_LEAF) { + if (info == XLOG_BTREE_INSERT_LEAF) { + XLogRecSetBlockDataState(record, BTREE_INSERT_ORIG_BLOCK_NUM, recordstatehead); + } else { + XLogRecSetBlockDataState(record, BTREE_INSERT_ORIG_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, + true); (*blocknum)++; XLogParseBufferAllocListFunc(record, &blockstate, recordstatehead); if (blockstate == NULL) { @@ -610,7 +613,7 @@ XLogRecParseState *BtreeXlogInsertParseBlock(XLogReaderState *record, uint32 *bl if (blockstate == NULL) { return NULL; } - XLogRecSetBlockDataState(record, BTREE_INSERT_META_BLOCK_NUM, blockstate); + XLogRecSetBlockDataState(record, BTREE_INSERT_META_BLOCK_NUM, blockstate, BLOCK_DATA_MAIN_DATA_TYPE, true); } return recordstatehead; @@ -636,7 +639,7 @@ static XLogRecParseState *BtreeXlogSplitParseBlock(XLogReaderState *record, uint if (recordstatehead == NULL) { return NULL; } - XLogRecSetBlockDataState(record, BTREE_SPLIT_LEFT_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, BTREE_SPLIT_LEFT_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); XLogRecSetAuxiBlkNumState(&recordstatehead->blockparse.extra_rec.blockdatarec, rightsib, InvalidForkNumber); (*blocknum)++; @@ -644,6 +647,8 @@ static XLogRecParseState *BtreeXlogSplitParseBlock(XLogReaderState *record, uint if (blockstate == NULL) { return NULL; } + + // no need restore base page, because this is a new page XLogRecSetBlockDataState(record, BTREE_SPLIT_RIGHT_BLOCK_NUM, blockstate); XLogRecSetAuxiBlkNumState(&blockstate->blockparse.extra_rec.blockdatarec, rnext, leftsib); @@ -653,7 +658,7 @@ static XLogRecParseState *BtreeXlogSplitParseBlock(XLogReaderState *record, uint if (blockstate == NULL) { return NULL; } - XLogRecSetBlockDataState(record, BTREE_SPLIT_RIGHTNEXT_BLOCK_NUM, blockstate); + XLogRecSetBlockDataState(record, BTREE_SPLIT_RIGHTNEXT_BLOCK_NUM, blockstate, BLOCK_DATA_MAIN_DATA_TYPE, true); XLogRecSetAuxiBlkNumState(&blockstate->blockparse.extra_rec.blockdatarec, rightsib, InvalidForkNumber); } @@ -663,6 +668,8 @@ static XLogRecParseState *BtreeXlogSplitParseBlock(XLogReaderState *record, uint if (blockstate == NULL) { return NULL; } + + // just clear split flag,no need restore base page XLogRecSetBlockDataState(record, BTREE_SPLIT_CHILD_BLOCK_NUM, blockstate); } @@ -679,7 +686,7 @@ static XLogRecParseState *BtreeXlogVacuumParseBlock(XLogReaderState *record, uin return NULL; } - XLogRecSetBlockDataState(record, BTREE_VACUUM_ORIG_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, BTREE_VACUUM_ORIG_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); return recordstatehead; } @@ -693,7 +700,7 @@ static XLogRecParseState *BtreeXlogDeleteParseBlock(XLogReaderState *record, uin return NULL; } - XLogRecSetBlockDataState(record, BTREE_DELETE_ORIG_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, BTREE_DELETE_ORIG_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); /* for hot standby, need to reslove the conflict */ { @@ -713,14 +720,14 @@ static XLogRecParseState *BtreeXlogMarkHalfdeadParseBlock(XLogReaderState *recor return NULL; } - XLogRecSetBlockDataState(record, BTREE_HALF_DEAD_PARENT_PAGE_NUM, recordstatehead); + XLogRecSetBlockDataState(record, BTREE_HALF_DEAD_PARENT_PAGE_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); (*blocknum)++; XLogParseBufferAllocListFunc(record, &blockstate, recordstatehead); if (blockstate == NULL) { return NULL; } - XLogRecSetBlockDataState(record, BTREE_HALF_DEAD_LEAF_PAGE_NUM, blockstate); + XLogRecSetBlockDataState(record, BTREE_HALF_DEAD_LEAF_PAGE_NUM, blockstate, BLOCK_DATA_MAIN_DATA_TYPE, true); return recordstatehead; } @@ -738,7 +745,7 @@ static XLogRecParseState *BtreeXlogUnlinkPageParseBlock(XLogReaderState *record, return NULL; } - XLogRecSetBlockDataState(record, BTREE_UNLINK_PAGE_RIGHT_NUM, recordstatehead); + XLogRecSetBlockDataState(record, BTREE_UNLINK_PAGE_RIGHT_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); if (xlrec->leftsib != P_NONE) { (*blocknum)++; @@ -746,7 +753,7 @@ static XLogRecParseState *BtreeXlogUnlinkPageParseBlock(XLogReaderState *record, if (blockstate == NULL) { return NULL; } - XLogRecSetBlockDataState(record, BTREE_UNLINK_PAGE_LEFT_NUM, blockstate); + XLogRecSetBlockDataState(record, BTREE_UNLINK_PAGE_LEFT_NUM, blockstate, BLOCK_DATA_MAIN_DATA_TYPE, true); } (*blocknum)++; @@ -754,7 +761,7 @@ static XLogRecParseState *BtreeXlogUnlinkPageParseBlock(XLogReaderState *record, if (blockstate == NULL) { return NULL; } - XLogRecSetBlockDataState(record, BTREE_UNLINK_PAGE_CUR_PAGE_NUM, blockstate); + XLogRecSetBlockDataState(record, BTREE_UNLINK_PAGE_CUR_PAGE_NUM, blockstate, BLOCK_DATA_MAIN_DATA_TYPE, true); if (XLogRecHasBlockRef(record, BTREE_UNLINK_PAGE_CHILD_NUM)) { (*blocknum)++; @@ -762,7 +769,7 @@ static XLogRecParseState *BtreeXlogUnlinkPageParseBlock(XLogReaderState *record, if (blockstate == NULL) { return NULL; } - XLogRecSetBlockDataState(record, BTREE_UNLINK_PAGE_CHILD_NUM, blockstate); + XLogRecSetBlockDataState(record, BTREE_UNLINK_PAGE_CHILD_NUM, blockstate, BLOCK_DATA_MAIN_DATA_TYPE, true); } /* Update metapage if needed */ @@ -772,7 +779,7 @@ static XLogRecParseState *BtreeXlogUnlinkPageParseBlock(XLogReaderState *record, if (blockstate == NULL) { return NULL; } - XLogRecSetBlockDataState(record, BTREE_UNLINK_PAGE_META_NUM, blockstate); + XLogRecSetBlockDataState(record, BTREE_UNLINK_PAGE_META_NUM, blockstate, BLOCK_DATA_MAIN_DATA_TYPE, true); } return recordstatehead; @@ -789,7 +796,7 @@ static XLogRecParseState *BtreeXlogNewrootParseBlock(XLogReaderState *record, ui if (recordstatehead == NULL) { return NULL; } - XLogRecSetBlockDataState(record, BTREE_NEWROOT_ORIG_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, BTREE_NEWROOT_ORIG_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); if (xlrec->level > 0) { (*blocknum)++; @@ -797,7 +804,7 @@ static XLogRecParseState *BtreeXlogNewrootParseBlock(XLogReaderState *record, ui if (blockstate == NULL) { return NULL; } - XLogRecSetBlockDataState(record, BTREE_NEWROOT_LEFT_BLOCK_NUM, blockstate); + XLogRecSetBlockDataState(record, BTREE_NEWROOT_LEFT_BLOCK_NUM, blockstate, BLOCK_DATA_MAIN_DATA_TYPE, true); } (*blocknum)++; @@ -805,7 +812,7 @@ static XLogRecParseState *BtreeXlogNewrootParseBlock(XLogReaderState *record, ui if (blockstate == NULL) { return NULL; } - XLogRecSetBlockDataState(record, BTREE_NEWROOT_META_BLOCK_NUM, blockstate); + XLogRecSetBlockDataState(record, BTREE_NEWROOT_META_BLOCK_NUM, blockstate, BLOCK_DATA_MAIN_DATA_TYPE, true); return recordstatehead; } @@ -813,24 +820,16 @@ static XLogRecParseState *BtreeXlogNewrootParseBlock(XLogReaderState *record, ui static XLogRecParseState *BtreeXlogReusePageParseBlock(XLogReaderState *record, uint32 *blocknum) { XLogRecParseState *recordstatehead = NULL; - xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *)XLogRecGetData(record); - *blocknum = 0; - if (g_supportHotStandby) { - (*blocknum)++; - XLogParseBufferAllocListFunc(record, &recordstatehead, NULL); - if (recordstatehead == NULL) { - return NULL; - } - - RelFileNode rnode; - RelFileNodeCopy(rnode, xlrec->node, XLogRecGetBucketId(record)); + *blocknum = 1; - RelFileNodeForkNum filenode = - RelFileNodeForkNumFill(&rnode, InvalidBackendId, InvalidForkNumber, InvalidBlockNumber); - XLogRecSetBlockCommonState(record, BLOCK_DATA_INVALIDMSG_TYPE, filenode, recordstatehead); - XLogRecSetInvalidMsgState(&recordstatehead->blockparse.extra_rec.blockinvalidmsg, xlrec->latestRemovedXid); + XLogParseBufferAllocListFunc(record, &recordstatehead, NULL); + if (recordstatehead == NULL) { + return NULL; } + + XLogRecSetBlockDataState(record, BTREE_REUSE_PAGE_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); + return recordstatehead; } @@ -1285,6 +1284,11 @@ void BtreeRedoDataBlock(XLogBlockHead *blockhead, XLogBlockDataParse *blockdatar case XLOG_BTREE_NEWROOT: BtreeXlogNewrootBlock(blockhead, blockdatarec, bufferinfo); break; + case XLOG_BTREE_REUSE_PAGE: + if (!(IS_EXRTO_STANDBY_READ && g_instance.attr.attr_storage.enable_exrto_standby_read_opt)) { + ereport(PANIC, (errmsg("btree_redo_block: unknown op code %u", info))); + } + break; default: ereport(PANIC, (errmsg("btree_redo_block: unknown op code %u", info))); } diff --git a/src/gausskernel/storage/access/redo/redo_ubtxlog.cpp b/src/gausskernel/storage/access/redo/redo_ubtxlog.cpp index f5fc6446a43d10fe224612b32905c133d682b5e5..0d809094003ef73c55eadca6ff88fd8a32a9fac4 100644 --- a/src/gausskernel/storage/access/redo/redo_ubtxlog.cpp +++ b/src/gausskernel/storage/access/redo/redo_ubtxlog.cpp @@ -591,9 +591,12 @@ XLogRecParseState *UBTreeXlogInsertParseBlock(XLogReaderState *record, uint32 *b if (recordstatehead == NULL) { return NULL; } - XLogRecSetBlockDataState(record, BTREE_INSERT_ORIG_BLOCK_NUM, recordstatehead); - if (info != XLOG_UBTREE_INSERT_LEAF) { + if (info == XLOG_UBTREE_INSERT_LEAF) { + XLogRecSetBlockDataState(record, BTREE_INSERT_ORIG_BLOCK_NUM, recordstatehead); + } else { + XLogRecSetBlockDataState(record, BTREE_INSERT_ORIG_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, + true); (*blocknum)++; XLogParseBufferAllocListFunc(record, &blockstate, recordstatehead); if (blockstate == NULL) { @@ -608,7 +611,7 @@ XLogRecParseState *UBTreeXlogInsertParseBlock(XLogReaderState *record, uint32 *b if (blockstate == NULL) { return NULL; } - XLogRecSetBlockDataState(record, BTREE_INSERT_META_BLOCK_NUM, blockstate); + XLogRecSetBlockDataState(record, BTREE_INSERT_META_BLOCK_NUM, blockstate, BLOCK_DATA_MAIN_DATA_TYPE, true); } return recordstatehead; @@ -634,7 +637,7 @@ static XLogRecParseState *UBTreeXlogSplitParseBlock(XLogReaderState *record, uin if (recordstatehead == NULL) { return NULL; } - XLogRecSetBlockDataState(record, BTREE_SPLIT_LEFT_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, BTREE_SPLIT_LEFT_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); XLogRecSetAuxiBlkNumState(&recordstatehead->blockparse.extra_rec.blockdatarec, rightsib, InvalidForkNumber); (*blocknum)++; @@ -651,7 +654,7 @@ static XLogRecParseState *UBTreeXlogSplitParseBlock(XLogReaderState *record, uin if (blockstate == NULL) { return NULL; } - XLogRecSetBlockDataState(record, BTREE_SPLIT_RIGHTNEXT_BLOCK_NUM, blockstate); + XLogRecSetBlockDataState(record, BTREE_SPLIT_RIGHTNEXT_BLOCK_NUM, blockstate, BLOCK_DATA_MAIN_DATA_TYPE, true); XLogRecSetAuxiBlkNumState(&blockstate->blockparse.extra_rec.blockdatarec, rightsib, InvalidForkNumber); } @@ -670,7 +673,6 @@ static XLogRecParseState *UBTreeXlogSplitParseBlock(XLogReaderState *record, uin static XLogRecParseState *UBTreeXlogVacuumParseBlock(XLogReaderState *record, uint32 *blocknum) { XLogRecParseState *recordstatehead = NULL; - XLogRecParseState *blockstate = NULL; *blocknum = 1; XLogParseBufferAllocListFunc(record, &recordstatehead, NULL); @@ -678,28 +680,7 @@ static XLogRecParseState *UBTreeXlogVacuumParseBlock(XLogReaderState *record, ui return NULL; } - XLogRecSetBlockDataState(record, BTREE_VACUUM_ORIG_BLOCK_NUM, recordstatehead); - - if (g_supportHotStandby) { - BlockNumber thisblkno = InvalidBlockNumber; - RelFileNode thisrnode = ((RelFileNode) {0, 0, 0, -1}); - - xl_btree_vacuum *xlrec = (xl_btree_vacuum *)XLogRecGetData(record); - XLogRecGetBlockTag(record, BTREE_VACUUM_ORIG_BLOCK_NUM, &thisrnode, NULL, &thisblkno); - - if ((xlrec->lastBlockVacuumed + 1) < thisblkno) { - (*blocknum)++; - XLogParseBufferAllocListFunc(record, &blockstate, recordstatehead); - if (blockstate == NULL) { - return NULL; - } - - RelFileNodeForkNum filenode = RelFileNodeForkNumFill(&thisrnode, InvalidBackendId, MAIN_FORKNUM, thisblkno); - XLogRecSetBlockCommonState(record, BLOCK_DATA_VACUUM_PIN_TYPE, filenode, blockstate); - XLogRecSetPinVacuumState(&blockstate->blockparse.extra_rec.blockvacuumpin, xlrec->lastBlockVacuumed); - } - } - + XLogRecSetBlockDataState(record, BTREE_VACUUM_ORIG_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); return recordstatehead; } @@ -713,7 +694,7 @@ static XLogRecParseState *UBTreeXlogDeleteParseBlock(XLogReaderState *record, ui return NULL; } - XLogRecSetBlockDataState(record, BTREE_DELETE_ORIG_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, BTREE_DELETE_ORIG_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); /* for hot standby, need to reslove the conflict */ { @@ -733,14 +714,14 @@ static XLogRecParseState *UBTreeXlogMarkHalfdeadParseBlock(XLogReaderState *reco return NULL; } - XLogRecSetBlockDataState(record, BTREE_HALF_DEAD_PARENT_PAGE_NUM, recordstatehead); + XLogRecSetBlockDataState(record, BTREE_HALF_DEAD_PARENT_PAGE_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); (*blocknum)++; XLogParseBufferAllocListFunc(record, &blockstate, recordstatehead); if (blockstate == NULL) { return NULL; } - XLogRecSetBlockDataState(record, BTREE_HALF_DEAD_LEAF_PAGE_NUM, blockstate); + XLogRecSetBlockDataState(record, BTREE_HALF_DEAD_LEAF_PAGE_NUM, blockstate, BLOCK_DATA_MAIN_DATA_TYPE, true); return recordstatehead; } @@ -758,7 +739,7 @@ static XLogRecParseState *UBTreeXlogUnlinkPageParseBlock(XLogReaderState *record return NULL; } - XLogRecSetBlockDataState(record, BTREE_UNLINK_PAGE_RIGHT_NUM, recordstatehead); + XLogRecSetBlockDataState(record, BTREE_UNLINK_PAGE_RIGHT_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); if (xlrec->leftsib != P_NONE) { (*blocknum)++; @@ -766,7 +747,7 @@ static XLogRecParseState *UBTreeXlogUnlinkPageParseBlock(XLogReaderState *record if (blockstate == NULL) { return NULL; } - XLogRecSetBlockDataState(record, BTREE_UNLINK_PAGE_LEFT_NUM, blockstate); + XLogRecSetBlockDataState(record, BTREE_UNLINK_PAGE_LEFT_NUM, blockstate, BLOCK_DATA_MAIN_DATA_TYPE, true); } (*blocknum)++; @@ -774,7 +755,7 @@ static XLogRecParseState *UBTreeXlogUnlinkPageParseBlock(XLogReaderState *record if (blockstate == NULL) { return NULL; } - XLogRecSetBlockDataState(record, BTREE_UNLINK_PAGE_CUR_PAGE_NUM, blockstate); + XLogRecSetBlockDataState(record, BTREE_UNLINK_PAGE_CUR_PAGE_NUM, blockstate, BLOCK_DATA_MAIN_DATA_TYPE, true); if (XLogRecHasBlockRef(record, BTREE_UNLINK_PAGE_CHILD_NUM)) { (*blocknum)++; @@ -782,7 +763,7 @@ static XLogRecParseState *UBTreeXlogUnlinkPageParseBlock(XLogReaderState *record if (blockstate == NULL) { return NULL; } - XLogRecSetBlockDataState(record, BTREE_UNLINK_PAGE_CHILD_NUM, blockstate); + XLogRecSetBlockDataState(record, BTREE_UNLINK_PAGE_CHILD_NUM, blockstate, BLOCK_DATA_MAIN_DATA_TYPE, true); } /* Update metapage if needed */ @@ -792,7 +773,7 @@ static XLogRecParseState *UBTreeXlogUnlinkPageParseBlock(XLogReaderState *record if (blockstate == NULL) { return NULL; } - XLogRecSetBlockDataState(record, BTREE_UNLINK_PAGE_META_NUM, blockstate); + XLogRecSetBlockDataState(record, BTREE_UNLINK_PAGE_META_NUM, blockstate, BLOCK_DATA_MAIN_DATA_TYPE, true); } return recordstatehead; @@ -809,7 +790,7 @@ static XLogRecParseState *UBTreeXlogNewrootParseBlock(XLogReaderState *record, u if (recordstatehead == NULL) { return NULL; } - XLogRecSetBlockDataState(record, BTREE_NEWROOT_ORIG_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, BTREE_NEWROOT_ORIG_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); if (xlrec->level > 0) { (*blocknum)++; @@ -817,7 +798,7 @@ static XLogRecParseState *UBTreeXlogNewrootParseBlock(XLogReaderState *record, u if (blockstate == NULL) { return NULL; } - XLogRecSetBlockDataState(record, BTREE_NEWROOT_LEFT_BLOCK_NUM, blockstate); + XLogRecSetBlockDataState(record, BTREE_NEWROOT_LEFT_BLOCK_NUM, blockstate, BLOCK_DATA_MAIN_DATA_TYPE, true); } (*blocknum)++; @@ -825,7 +806,7 @@ static XLogRecParseState *UBTreeXlogNewrootParseBlock(XLogReaderState *record, u if (blockstate == NULL) { return NULL; } - XLogRecSetBlockDataState(record, BTREE_NEWROOT_META_BLOCK_NUM, blockstate); + XLogRecSetBlockDataState(record, BTREE_NEWROOT_META_BLOCK_NUM, blockstate, BLOCK_DATA_MAIN_DATA_TYPE, true); return recordstatehead; } @@ -833,24 +814,14 @@ static XLogRecParseState *UBTreeXlogNewrootParseBlock(XLogReaderState *record, u static XLogRecParseState *UBTreeXlogReusePageParseBlock(XLogReaderState *record, uint32 *blocknum) { XLogRecParseState *recordstatehead = NULL; - xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *)XLogRecGetData(record); - *blocknum = 0; - if (g_supportHotStandby) { - (*blocknum)++; - XLogParseBufferAllocListFunc(record, &recordstatehead, NULL); - if (recordstatehead == NULL) { - return NULL; - } - - RelFileNode rnode; - RelFileNodeCopy(rnode, xlrec->node, XLogRecGetBucketId(record)); - - RelFileNodeForkNum filenode = - RelFileNodeForkNumFill(&rnode, InvalidBackendId, InvalidForkNumber, InvalidBlockNumber); - XLogRecSetBlockCommonState(record, BLOCK_DATA_INVALIDMSG_TYPE, filenode, recordstatehead); - XLogRecSetInvalidMsgState(&recordstatehead->blockparse.extra_rec.blockinvalidmsg, xlrec->latestRemovedXid); + *blocknum = 1; + XLogParseBufferAllocListFunc(record, &recordstatehead, NULL); + if (recordstatehead == NULL) { + return NULL; } + + XLogRecSetBlockDataState(record, BTREE_REUSE_PAGE_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); return recordstatehead; } @@ -864,7 +835,7 @@ static XLogRecParseState* UBTreeXlogMarkDeleteParseBlock(XLogReaderState* record return NULL; } - XLogRecSetBlockDataState(record, UBTREE_MARK_DELETE_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, UBTREE_MARK_DELETE_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); return recordstatehead; } @@ -878,7 +849,7 @@ static XLogRecParseState* UBTreeXlogPrunePageParseBlock(XLogReaderState* record, return NULL; } - XLogRecSetBlockDataState(record, UBTREE_PAGE_PRUNE_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, UBTREE_PAGE_PRUNE_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); return recordstatehead; } @@ -892,7 +863,7 @@ static XLogRecParseState* UBTree2XlogShiftBaseParseBlock(XLogReaderState* record return NULL; } - XLogRecSetBlockDataState(record, UBTREE2_BASE_SHIFT_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, UBTREE2_BASE_SHIFT_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); return recordstatehead; } @@ -973,7 +944,7 @@ XLogRecParseState *UBTree2XlogFreezeParseBlock(XLogReaderState *record, uint32 * if (recordstatehead == NULL) { return NULL; } - XLogRecSetBlockDataState(record, UBTREE2_FREEZE_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, UBTREE2_FREEZE_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); return recordstatehead; } @@ -1603,7 +1574,13 @@ void UBTreeRedoDataBlock(XLogBlockHead* blockhead, XLogBlockDataParse* blockdata case XLOG_UBTREE_PRUNE_PAGE: UBTreeXlogPrunePageBlock(blockhead, blockdatarec, bufferinfo); break; + case XLOG_UBTREE_REUSE_PAGE: + if (!(IS_EXRTO_STANDBY_READ && g_instance.attr.attr_storage.enable_exrto_standby_read_opt)) { + ereport(PANIC, (errmsg("UBTreeRedoDataBlock: unknown op code %u", info))); + } + break; default: ereport(PANIC, (errmsg("UBTreeRedoDataBlock: unknown op code %u", info))); + break; } } diff --git a/src/gausskernel/storage/access/redo/redo_xlogutils.cpp b/src/gausskernel/storage/access/redo/redo_xlogutils.cpp index 53f86af4db460f7f2bcc8ba958c26acd09352a74..30ee49700a894522979ccd8e020b846d285d35c9 100644 --- a/src/gausskernel/storage/access/redo/redo_xlogutils.cpp +++ b/src/gausskernel/storage/access/redo/redo_xlogutils.cpp @@ -413,7 +413,7 @@ void XLogRecSetBlockDataStateContent(XLogReaderState *record, uint32 blockid, XL } void XLogRecSetBlockDataState(XLogReaderState *record, uint32 blockid, XLogRecParseState *recordblockstate, - XLogBlockParseEnum type) + XLogBlockParseEnum type, bool is_conflict_type) { Assert(XLogRecHasBlockRef(record, blockid)); DecodedBkpBlock *decodebkp = &(record->blocks[blockid]); @@ -431,6 +431,7 @@ void XLogRecSetBlockDataState(XLogReaderState *record, uint32 blockid, XLogRecPa XLogBlockDataParse *blockdatarec = &(recordblockstate->blockparse.extra_rec.blockdatarec); XLogRecSetBlockDataStateContent(record, blockid, blockdatarec); + recordblockstate->blockparse.blockhead.is_conflict_type = is_conflict_type; } void XLogRecSetAuxiBlkNumState(XLogBlockDataParse *blockdatarec, BlockNumber auxilaryblkn1, BlockNumber auxilaryblkn2) @@ -466,7 +467,7 @@ void XLogRecSetVmBlockState(XLogReaderState *record, uint32 blockid, XLogRecPars XLogBlockVmParse *blockvm = &(recordblockstate->blockparse.extra_rec.blockvmrec); blockvm->heapBlk = heapBlk; - + recordblockstate->blockparse.blockhead.is_conflict_type = true; } void GetXlUndoHeaderExtraData(char **currLogPtr, XlUndoHeaderExtra *xlundohdrextra, uint8 flag) @@ -807,6 +808,11 @@ void XLogUpdateCopyedBlockState(XLogRecParseState *recordblockstate, XLogBlockPa recordblockstate->blockparse.blockhead.bucketNode = (int2)bucketNode; } +void wal_rec_set_clean_up_info_state(WalCleanupInfoParse *parse_state, TransactionId removed_xid) +{ + parse_state->removed_xid = removed_xid; +} + void XLogRecSetBlockDdlState(XLogBlockDdlParse *blockddlstate, uint32 blockddltype, char *mainData, int rels, bool compress, uint32 mainDataLen) { @@ -1500,11 +1506,13 @@ void XLogBlockDdlDoSmgrAction(XLogBlockHead *blockhead, void *blockrecbody, Redo smgr_redo_create(rnode, blockhead->forknum, blockddlrec->mainData); break; case BLOCK_DDL_TRUNCATE_RELNODE: { - TransactionId latest_removed_xid = InvalidTransactionId; - if (blockddlrec->mainDataLen == TRUNCATE_CONTAIN_XID_SIZE) { - latest_removed_xid = ((xl_smgr_truncate_compress*)blockddlrec->mainData)->latest_removed_xid; - } - xlog_block_smgr_redo_truncate(rnode, blockhead->blkno, blockhead->end_ptr, latest_removed_xid); + RelFileNode rel_node; + rel_node.spcNode = blockhead->spcNode; + rel_node.dbNode = blockhead->dbNode; + rel_node.relNode = blockhead->relNode; + rel_node.bucketNode = blockhead->bucketNode; + rel_node.opt = blockhead->opt; + XLogTruncateRelation(rel_node, blockhead->forknum, blockhead->blkno); break; } case BLOCK_DDL_DROP_RELNODE: { @@ -1729,6 +1737,22 @@ void XLogSynAllBuffer() } } +bool need_restore_new_page_version(XLogRecParseState *redo_block_state) +{ + if (!IsHeap2Clean(&redo_block_state->blockparse.blockhead)) { + return true; + } + + TransactionId recyle_xmin = pg_atomic_read_u64(&g_instance.comm_cxt.predo_cxt.exrto_recyle_xmin); + xl_heap_clean *xl_clean_rec = + (xl_heap_clean *)XLogBlockDataGetMainData(&redo_block_state->blockparse.extra_rec.blockdatarec, NULL); + if (TransactionIdPrecedes(xl_clean_rec->latestRemovedXid, recyle_xmin)) { + return false; + } + + return true; +} + bool XLogBlockRedoForExtremeRTO(XLogRecParseState *redoblocktate, RedoBufferInfo *bufferinfo, bool notfound, RedoTimeCost &readBufCost, RedoTimeCost &redoCost) { @@ -1768,15 +1792,29 @@ bool XLogBlockRedoForExtremeRTO(XLogRecParseState *redoblocktate, RedoBufferInfo } if ((block_valid != BLOCK_DATA_UNDO_TYPE) && g_instance.attr.attr_storage.EnableHotStandby && - IsDefaultExtremeRtoMode() && XLByteLT(PageGetLSN(bufferinfo->pageinfo.page), blockhead->end_ptr)) { - if (bufferinfo->blockinfo.forknum >= EXRTO_FORK_NUM) { + IsDefaultExtremeRtoMode() && XLByteLT(PageGetLSN(bufferinfo->pageinfo.page), blockhead->end_ptr) && + !IsSegmentFileNode(bufferinfo->blockinfo.rnode)) { + if (unlikely(bufferinfo->blockinfo.forknum >= EXRTO_FORK_NUM)) { ereport(PANIC, (errmsg("forknum is illegal: %d", bufferinfo->blockinfo.forknum))); } BufferTag buf_tag; - INIT_BUFFERTAG(buf_tag, bufferinfo->blockinfo.rnode, - bufferinfo->blockinfo.forknum, bufferinfo->blockinfo.blkno); - extreme_rto_standby_read::insert_lsn_to_block_info(&extreme_rto::g_redoWorker->standby_read_meta_info, buf_tag, - bufferinfo->pageinfo.page, blockhead->start_ptr); + INIT_BUFFERTAG( + buf_tag, bufferinfo->blockinfo.rnode, bufferinfo->blockinfo.forknum, bufferinfo->blockinfo.blkno); + + if (g_instance.attr.attr_storage.enable_exrto_standby_read_opt) { + if (blockhead->is_conflict_type && need_restore_new_page_version(redoblocktate)) { + extreme_rto_standby_read::insert_lsn_to_block_info_for_opt( + &extreme_rto::g_redoWorker->standby_read_meta_info, + buf_tag, + bufferinfo->pageinfo.page, + blockhead->start_ptr); + } + } else { + extreme_rto_standby_read::insert_lsn_to_block_info(&extreme_rto::g_redoWorker->standby_read_meta_info, + buf_tag, + bufferinfo->pageinfo.page, + blockhead->start_ptr); + } } if (redoaction != BLK_DONE) { diff --git a/src/gausskernel/storage/access/redo/standby_read/Makefile b/src/gausskernel/storage/access/redo/standby_read/Makefile index 9d1fc64883da8015594cef6f8b0db674f8626fa9..e1b7b266082ec9c2e82199b14fa8dcedf0137544 100644 --- a/src/gausskernel/storage/access/redo/standby_read/Makefile +++ b/src/gausskernel/storage/access/redo/standby_read/Makefile @@ -32,6 +32,6 @@ ifneq "$(MAKECMDGOALS)" "clean" endif endif endif -OBJS = base_page_proc.o block_info_proc.o lsn_info_double_list.o lsn_info_proc.o standby_read_interface.o standby_read_delay_ddl.o +OBJS = base_page_proc.o block_info_proc.o lsn_info_double_list.o lsn_info_proc.o standby_read_interface.o standby_read_delay_ddl.o standby_read_proc.o include $(top_srcdir)/src/gausskernel/common.mk diff --git a/src/gausskernel/storage/access/redo/standby_read/block_info_proc.cpp b/src/gausskernel/storage/access/redo/standby_read/block_info_proc.cpp index 08fefe6ceb165e5a1bc8a53b24298578e3effcbb..c5bc9a5c9b79e92c92c36330c6ecf74d0c18110d 100644 --- a/src/gausskernel/storage/access/redo/standby_read/block_info_proc.cpp +++ b/src/gausskernel/storage/access/redo/standby_read/block_info_proc.cpp @@ -127,7 +127,7 @@ void init_block_info(BlockMetaInfo* block_info, XLogRecPtr max_lsn) } void insert_lsn_to_block_info( - StandbyReadMetaInfo* meta_info, const BufferTag& buf_tag, const Page base_page, XLogRecPtr next_lsn) + StandbyReadMetaInfo *meta_info, const BufferTag &buf_tag, const Page base_page, XLogRecPtr next_lsn) { Buffer block_info_buf = InvalidBuffer; BlockMetaInfo* block_info = get_block_meta_info_by_relfilenode(buf_tag, NULL, RBM_ZERO_ON_ERROR, &block_info_buf); @@ -151,24 +151,76 @@ void insert_lsn_to_block_info( if (block_info->record_num == 0 || (block_info->record_num % (uint32)g_instance.attr.attr_storage.base_page_saved_interval) == 0) { - insert_base_page_to_lsn_info(meta_info, &block_info->lsn_info_list, &block_info->base_page_info_list, buf_tag, - base_page, current_page_lsn, next_lsn); + insert_base_page_to_lsn_info(meta_info, + &block_info->lsn_info_list, + &block_info->base_page_info_list, + buf_tag, + base_page, + current_page_lsn, + next_lsn); } else { insert_lsn_to_lsn_info(meta_info, &block_info->lsn_info_list, next_lsn); } + ++(block_info->record_num); Assert(block_info->max_lsn <= next_lsn); block_info->max_lsn = next_lsn; + standby_read_meta_page_set_lsn(page, next_lsn); + MarkBufferDirty(block_info_buf); + UnlockReleaseBuffer(block_info_buf); +} - ++(block_info->record_num); +void insert_lsn_to_block_info_for_opt( + StandbyReadMetaInfo *meta_info, const BufferTag &buf_tag, const Page base_page, XLogRecPtr next_lsn) +{ + Buffer block_info_buf = InvalidBuffer; + BlockMetaInfo *block_info = get_block_meta_info_by_relfilenode(buf_tag, NULL, RBM_ZERO_ON_ERROR, &block_info_buf); + if (unlikely(block_info == NULL || block_info_buf == InvalidBuffer)) { + ereport(PANIC, + (errmsg("insert lsn failed,block invalid %u/%u/%u %d %u", + buf_tag.rnode.spcNode, + buf_tag.rnode.dbNode, + buf_tag.rnode.relNode, + buf_tag.forkNum, + buf_tag.blockNum))); + } +#ifdef ENABLE_UT + Page page = get_page_from_buffer(block_info_buf); +#else + Page page = BufferGetPage(block_info_buf); +#endif + XLogRecPtr current_page_lsn = PageGetLSN(base_page); + /* if block is invalid or block is valid but all the lsn object of this block has been recycled(no data in lsn info + * files belongs to this block), we reset this block + */ + if (!is_block_meta_info_valid(block_info) || + block_info->lsn_info_list.prev < meta_info->lsn_table_recyle_position) { + if (!is_block_info_page_valid((BlockInfoPageHeader *)page)) { + block_info_page_init(page); + } + + init_block_info(block_info, current_page_lsn); + } + + insert_base_page_to_lsn_info(meta_info, + &block_info->lsn_info_list, + &block_info->base_page_info_list, + buf_tag, + base_page, + current_page_lsn, + next_lsn); + ++(block_info->record_num); + Assert(block_info->max_lsn <= next_lsn); + block_info->max_lsn = next_lsn; standby_read_meta_page_set_lsn(page, next_lsn); MarkBufferDirty(block_info_buf); UnlockReleaseBuffer(block_info_buf); } -StandbyReadRecyleState recyle_block_info( - const BufferTag& buf_tag, LsnInfoPosition base_page_info_pos, XLogRecPtr next_base_page_lsn, XLogRecPtr recyle_lsn) +StandbyReadRecyleState recyle_block_info(const BufferTag &buf_tag, LsnInfoPosition base_page_info_pos, + XLogRecPtr next_base_page_lsn, XLogRecPtr recyle_lsn, + XLogRecPtr *block_info_max_lsn) { Buffer buffer = InvalidBuffer; BlockMetaInfo* block_meta_info = get_block_meta_info_by_relfilenode(buf_tag, NULL, RBM_NORMAL, &buffer); @@ -181,6 +233,7 @@ StandbyReadRecyleState recyle_block_info( } StandbyReadRecyleState stat = STANDBY_READ_RECLYE_NONE; Assert(((block_meta_info->flags & BLOCK_INFO_NODE_VALID_FLAG) == BLOCK_INFO_NODE_VALID_FLAG)); + *block_info_max_lsn = block_meta_info->max_lsn; if (XLByteLT(block_meta_info->max_lsn, recyle_lsn)) { ereport(DEBUG1, (errmsg(EXRTOFORMAT("block meta recycle all %u/%u/%u %d %u, max lsn %08X/%08X, recycle lsn %08X/%08X"), @@ -263,9 +316,26 @@ bool get_page_lsn_info(const BufferTag& buf_tag, BufferAccessStrategy strategy, */ void remove_one_block_info_file(const RelFileNode rnode) { - DropRelFileNodeShareBuffers(rnode, MAIN_FORKNUM, 0); - DropRelFileNodeShareBuffers(rnode, FSM_FORKNUM, 0); - DropRelFileNodeShareBuffers(rnode, VISIBILITYMAP_FORKNUM, 0); + HTAB *relfilenode_hashtbl = g_instance.bgwriter_cxt.unlink_rel_hashtbl; + DelFileTag *entry = NULL; + bool found = false; + + LWLockAcquire(g_instance.bgwriter_cxt.rel_hashtbl_lock, LW_EXCLUSIVE); + entry = (DelFileTag*)hash_search(relfilenode_hashtbl, &rnode, HASH_ENTER, &found); + if (!found) { + entry->rnode.spcNode = rnode.spcNode; + entry->rnode.dbNode = rnode.dbNode; + entry->rnode.relNode = rnode.relNode; + entry->rnode.bucketNode = rnode.bucketNode; + entry->rnode.opt = rnode.opt; + entry->maxSegNo = 0; /* no need to forget fsyncs of segment */ + entry->fileUnlink = false; + } + LWLockRelease(g_instance.bgwriter_cxt.rel_hashtbl_lock); + + if (!found && g_instance.bgwriter_cxt.invalid_buf_proc_latch != NULL) { + SetLatch(g_instance.bgwriter_cxt.invalid_buf_proc_latch); + } SMgrRelation srel = smgropen(rnode, InvalidBackendId); smgrdounlink(srel, true); diff --git a/src/gausskernel/storage/access/redo/standby_read/lsn_info_proc.cpp b/src/gausskernel/storage/access/redo/standby_read/lsn_info_proc.cpp index 62277421540055920fefa0845bd9baa61adb8d07..1eaf0d51e1973cdb33c7b84ebbec90af03b09f9a 100644 --- a/src/gausskernel/storage/access/redo/standby_read/lsn_info_proc.cpp +++ b/src/gausskernel/storage/access/redo/standby_read/lsn_info_proc.cpp @@ -255,7 +255,21 @@ LsnInfoPosition create_base_page_info_node(StandbyReadMetaInfo *meta_info, base_page_info->base_page_position = base_page_pos; set_base_page_map_bit(page, offset); - + ereport(DEBUG1, + (errmsg("create_base_page_info_node, block is %u/%u/%u %d %u, batch_id: %u, redo_worker_id: %u" + "page lsn %lu, next lsn %lu, base page pos %lu, insert pos %lu", + buf_tag->rnode.spcNode, + buf_tag->rnode.dbNode, + buf_tag->rnode.relNode, + buf_tag->forkNum, + buf_tag->blockNum, + batch_id, + worker_id, + current_page_lsn, + next_lsn, + base_page_pos, + insert_pos))); + standby_read_meta_page_set_lsn(page, next_lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); @@ -497,11 +511,19 @@ void recycle_one_lsn_info_list(const BufferTag& buf_tag, LsnInfoPosition page_in /* retain a page version with page lsn less than recycle lsn */ XLogRecPtr next_base_page_lsn = base_page_info->next_base_page_lsn; - if (XLogRecPtrIsInvalid(next_base_page_lsn) || XLByteLT(recycle_lsn, next_base_page_lsn)) { - UnlockReleaseBuffer(buffer); - break; + if (g_instance.attr.attr_storage.enable_exrto_standby_read_opt) { + XLogRecPtr next_lsn = base_page_info->lsn_info_node.lsn[0]; + if (XLogRecPtrIsValid(next_lsn) && XLByteLE(recycle_lsn, next_lsn)) { + UnlockReleaseBuffer(buffer); + break; + } + } else { + if (XLogRecPtrIsInvalid(next_base_page_lsn) || XLByteLT(recycle_lsn, next_base_page_lsn)) { + UnlockReleaseBuffer(buffer); + break; + } } - + base_page_info->lsn_info_node.flags &= ~LSN_INFO_NODE_VALID_FLAG; page_info_pos = base_page_info->base_page_list.next; MarkBufferDirty(buffer); @@ -545,16 +567,26 @@ void invalid_base_page_list(StandbyReadMetaInfo *meta_info, Buffer buffer, uint3 } } -inline void update_recycle_lsn_per_worker(StandbyReadMetaInfo *meta_info, XLogRecPtr lsn) +inline void update_recycle_lsn_per_worker(StandbyReadMetaInfo *meta_info, XLogRecPtr base_page_lsn, + XLogRecPtr next_base_page_lsn, + XLogRecPtr block_info_max_lsn = InvalidXLogRecPtr) { - Assert(XLogRecPtrIsValid(lsn)); + Assert(XLogRecPtrIsValid(base_page_lsn)); if (XLogRecPtrIsInvalid(meta_info->recycle_lsn_per_worker) || - XLByteLT(meta_info->recycle_lsn_per_worker, lsn)) { - meta_info->recycle_lsn_per_worker = lsn; + XLByteLT(meta_info->recycle_lsn_per_worker, base_page_lsn)) { + meta_info->recycle_lsn_per_worker = base_page_lsn; } - ereport(LOG, (errmsg(EXRTOFORMAT( - "[exrto_recycle] update recycle lsn per worker , batch_id: %u, redo_id: %u, recycle lsn: %08X/%08X"), - meta_info->batch_id, meta_info->redo_id, (uint32)(lsn >> UINT64_HALF), (uint32)lsn))); + uint64 cur_base_page_recyle_segno = meta_info->base_page_recyle_position / EXRTO_BASE_PAGE_FILE_MAXSIZE; + uint64 cur_lsn_table_recyle_segno = meta_info->lsn_table_recyle_position / EXRTO_LSN_INFO_FILE_MAXSIZE; + ereport(LOG, + (errmsg(EXRTOFORMAT("[exrto_recycle] update recycle lsn per worker , batch_id: %u, redo_id: %u, recycle " + "base_page_lsn: %08X/%08X, next_base_page_lsn: %08X/%08X, block_info_max_lsn: " + "%08X/%08X, base page recycle segno: " + "%lu, lsn info recycle segno: %lu"), + meta_info->batch_id, meta_info->redo_id, (uint32)(base_page_lsn >> UINT64_HALF), + (uint32)base_page_lsn, (uint32)(next_base_page_lsn >> UINT64_HALF), (uint32)next_base_page_lsn, + (uint32)(block_info_max_lsn >> UINT64_HALF), (uint32)block_info_max_lsn, cur_base_page_recyle_segno, + cur_lsn_table_recyle_segno))); } bool recycle_one_lsn_info_page(StandbyReadMetaInfo *meta_info, XLogRecPtr recycle_lsn, @@ -598,25 +630,35 @@ bool recycle_one_lsn_info_page(StandbyReadMetaInfo *meta_info, XLogRecPtr recycl } XLogRecPtr next_base_page_lsn = base_page_info->next_base_page_lsn; *base_page_position = base_page_info->base_page_position; - if (XLogRecPtrIsValid(next_base_page_lsn) && XLByteLT(recycle_lsn, next_base_page_lsn)) { - update_recycle_lsn_per_worker(meta_info, base_page_lsn); - UnlockReleaseBuffer(buffer); - return false; + + if (g_instance.attr.attr_storage.enable_exrto_standby_read_opt) { + XLogRecPtr next_lsn = base_page_info->lsn_info_node.lsn[0]; + if (XLogRecPtrIsValid(next_lsn) && XLByteLE(recycle_lsn, next_lsn)) { + update_recycle_lsn_per_worker(meta_info, base_page_lsn, next_base_page_lsn); + UnlockReleaseBuffer(buffer); + return false; + } + } else { + if (XLogRecPtrIsValid(next_base_page_lsn) && XLByteLT(recycle_lsn, next_base_page_lsn)) { + update_recycle_lsn_per_worker(meta_info, base_page_lsn, next_base_page_lsn); + UnlockReleaseBuffer(buffer); + return false; + } } - + BufferTag buf_tag; INIT_BUFFERTAG(buf_tag, base_page_info->relfilenode, base_page_info->fork_num, base_page_info->block_num); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); buffer_is_locked = false; - + + XLogRecPtr block_info_max_lsn = InvalidXLogRecPtr; StandbyReadRecyleState stat = - recyle_block_info(buf_tag, cur_base_page_info_pos, next_base_page_lsn, recycle_lsn); + recyle_block_info(buf_tag, cur_base_page_info_pos, next_base_page_lsn, recycle_lsn, &block_info_max_lsn); if (stat == STANDBY_READ_RECLYE_ALL) { invalid_base_page_list(meta_info, buffer, offset); } else if (stat == STANDBY_READ_RECLYE_NONE) { - Assert(XLogRecPtrIsInvalid(next_base_page_lsn)); - update_recycle_lsn_per_worker(meta_info, base_page_lsn); + update_recycle_lsn_per_worker(meta_info, base_page_lsn, next_base_page_lsn, block_info_max_lsn); ReleaseBuffer(buffer); return false; } @@ -639,15 +681,22 @@ void standby_read_recyle_per_workers(StandbyReadMetaInfo *meta_info, XLogRecPtr uint64 last_lsn_table_recyle_segno = meta_info->lsn_table_recyle_position / EXRTO_LSN_INFO_FILE_MAXSIZE; uint64 cur_base_page_recyle_segno, cur_lsn_table_recyle_segno; - while (meta_info->lsn_table_recyle_position + BLCKSZ < meta_info->lsn_table_next_position) { + uint64 recyled_page_len = 0; + const uint32 recyle_ratio = 32; // no need recyle so fast + while (meta_info->lsn_table_recyle_position + BLCKSZ * recyle_ratio < meta_info->lsn_table_next_position) { recycle_next_page = recycle_one_lsn_info_page(meta_info, recycle_lsn, &base_page_position); if (!recycle_next_page) { break; } /* update recycle position */ meta_info->lsn_table_recyle_position += BLCKSZ; + recyled_page_len += BLCKSZ; Assert(meta_info->lsn_table_recyle_position % BLCKSZ == 0); - RedoInterruptCallBack(); + if (recyled_page_len >= EXRTO_LSN_INFO_FILE_MAXSIZE) { + RedoInterruptCallBack(); + pg_usleep(100); // sleep 0.1ms + recyled_page_len = 0; + } } meta_info->base_page_recyle_position = base_page_position; @@ -668,4 +717,83 @@ void standby_read_recyle_per_workers(StandbyReadMetaInfo *meta_info, XLogRecPtr } } +LsnInfoPosition get_nearest_base_page_pos( + const BufferTag &buf_tag, const LsnInfoDoubleList &lsn_info_list, XLogRecPtr read_lsn) +{ + Buffer buffer; + + XLogRecPtr page_lsn = InvalidXLogRecPtr; + LsnInfoPosition base_page_pos = LSN_INFO_LIST_HEAD; + uint32 batch_id; + uint32 worker_id; + + /* get batch id and page redo worker id */ + extreme_rto::RedoItemTag redo_item_tag; + INIT_REDO_ITEM_TAG(redo_item_tag, buf_tag.rnode, buf_tag.forkNum, buf_tag.blockNum); + /* batch id and worker id start from 1 when reading a page */ + batch_id = extreme_rto::GetSlotId(buf_tag.rnode, 0, 0, (uint32)extreme_rto::get_batch_redo_num()) + 1; + worker_id = + extreme_rto::GetWorkerId(&redo_item_tag, (uint32)extreme_rto::get_page_redo_worker_num_per_manager()) + 1; + LsnInfoPosition latest_lsn_base_page_pos = lsn_info_list.prev; + + /* Find the base page with the smallest lsn and greater than read lsn from tail to head */ + do { + /* reach the end of the list */ + if (INFO_POSITION_IS_INVALID(latest_lsn_base_page_pos)) { + ereport(DEBUG1, (errmsg("can not find base page, block is %u/%u/%u %d %u, batch_id: %u, redo_worker_id: %u", + buf_tag.rnode.spcNode, buf_tag.rnode.dbNode, buf_tag.rnode.relNode, buf_tag.forkNum, + buf_tag.blockNum, batch_id, worker_id))); + break; + } + buffer = InvalidBuffer; + Page page = get_lsn_info_page(batch_id, worker_id, latest_lsn_base_page_pos, RBM_NORMAL, &buffer); + if (page == NULL || buffer == InvalidBuffer) { + ereport(ERROR, (errmsg(EXRTOFORMAT("get_nearest_base_page_pos failed, batch_id: %u, redo_id: %u, pos: %lu"), + batch_id, worker_id, latest_lsn_base_page_pos))); + } + LockBuffer(buffer, BUFFER_LOCK_SHARE); + + uint32 offset = lsn_info_postion_to_offset(latest_lsn_base_page_pos); + BasePageInfo base_page_info = (BasePageInfo)(page + offset); + + Assert(is_base_page_type(base_page_info->lsn_info_node.type)); + if (!is_base_page_type(base_page_info->lsn_info_node.type)) { + UnlockReleaseBuffer(buffer); + ereport( + ERROR, + (errmsg(EXRTOFORMAT("get_nearest_base_page_pos failed, not base page type, block is %u/%u/%u %d %u, " + "batch_id: %u, redo_id: %u, pos: %lu"), + buf_tag.rnode.spcNode, buf_tag.rnode.dbNode, buf_tag.rnode.relNode, buf_tag.forkNum, + buf_tag.blockNum, batch_id, worker_id, latest_lsn_base_page_pos))); + } + + UnlockReleaseBuffer(buffer); + page_lsn = base_page_info->lsn_info_node.lsn[0]; + LsnInfoPosition prev_lsn_base_page_pos = base_page_info->base_page_list.prev; + + if (XLByteLT(page_lsn, read_lsn)) { + break; + } + + /* the base page's lsn >= read_lsn */ + base_page_pos = base_page_info->base_page_position; + + // the last base page info + if (XLByteEQ(lsn_info_list.next, latest_lsn_base_page_pos)) { + break; + } + + latest_lsn_base_page_pos = prev_lsn_base_page_pos; + } while (true); + + if (page_lsn == InvalidXLogRecPtr || base_page_pos == LSN_INFO_LIST_HEAD) { + ereport(DEBUG1, (errmsg(EXRTOFORMAT("get_nearest_base_page_pos failed, block is %u/%u/%u/%hd/%hu %d %u, " + "batch_id: %u, redo_id: %u, pos: %lu, page_lsn: %lu"), + buf_tag.rnode.spcNode, buf_tag.rnode.dbNode, buf_tag.rnode.relNode, + buf_tag.rnode.bucketNode, buf_tag.rnode.opt, buf_tag.forkNum, buf_tag.blockNum, + batch_id, worker_id, latest_lsn_base_page_pos, page_lsn))); + } + + return base_page_pos; +} } // namespace extreme_rto_standby_read diff --git a/src/gausskernel/storage/access/redo/standby_read/standby_read_delay_ddl.cpp b/src/gausskernel/storage/access/redo/standby_read/standby_read_delay_ddl.cpp index c9adb75a65601cffa75d1c9706b80c458b1b5e4b..3e1ade64b6be09ea5a3ced8886969ed3bf98a743 100644 --- a/src/gausskernel/storage/access/redo/standby_read/standby_read_delay_ddl.cpp +++ b/src/gausskernel/storage/access/redo/standby_read/standby_read_delay_ddl.cpp @@ -29,6 +29,9 @@ #include "access/extreme_rto/standby_read/block_info_meta.h" #include "access/multi_redo_api.h" #include "commands/dbcommands.h" +#include "access/slru.h" +#include "access/twophase.h" +#include "storage/procarray.h" #define DELAY_DDL_FILE_DIR "delay_ddl" #define DELAY_DDL_FILE_NAME "delay_ddl/delay_delete_info_file" @@ -36,8 +39,11 @@ typedef enum { DROP_DB_TYPE = 1, DROP_TABLE_TYPE, + TRUNCATE_CLOG, } DropDdlType; +#define ClogCtl(n) (&t_thrd.shemem_ptr_cxt.ClogCtl[CBufHashPartition(n)]) + const static uint32 MAX_NUM_PER_FILE = 0x10000; typedef struct { @@ -45,7 +51,10 @@ typedef struct { uint8 len; uint16 resvd1; uint32 resvd2; - ColFileNode node_info; + union { + ColFileNode file_info; + int64 pageno; + } node_info; XLogRecPtr lsn; pg_crc32 crc; } DelayDdlInfo; @@ -174,8 +183,8 @@ void update_delay_ddl_db(Oid db_id, Oid tablespace_id, XLogRecPtr lsn) .resvd2 = 0, }; - tmp_info.node_info.filenode.dbNode = db_id; - tmp_info.node_info.filenode.spcNode = tablespace_id; + tmp_info.node_info.file_info.filenode.dbNode = db_id; + tmp_info.node_info.file_info.filenode.spcNode = tablespace_id; tmp_info.lsn = lsn; INIT_CRC32C(tmp_info.crc); COMP_CRC32C(tmp_info.crc, (char*)&tmp_info, offsetof(DelayDdlInfo, crc)); @@ -195,7 +204,7 @@ void update_delay_ddl_files(ColFileNode* xnodes, int nrels, XLogRecPtr lsn) info_list[i].len = sizeof(DelayDdlInfo); info_list[i].resvd1 = 0; info_list[i].resvd2 = 0; - info_list[i].node_info = xnodes[i]; + info_list[i].node_info.file_info = xnodes[i]; info_list[i].lsn = lsn; INIT_CRC32C(info_list[i].crc); COMP_CRC32C(info_list[i].crc, (char*)&info_list[i], offsetof(DelayDdlInfo, crc)); @@ -236,7 +245,63 @@ void update_delay_ddl_files(ColFileNode* xnodes, int nrels, XLogRecPtr lsn) exit_state(&stat->insert_stat); } -void do_delay_ddl(DelayDdlInfo* info) +void update_delay_ddl_file_truncate_clog(XLogRecPtr lsn, int64 pageno) +{ + StandbyReadDelayDdlState *stat = &g_instance.comm_cxt.predo_cxt.standby_read_delay_ddl_stat; + enter_state(&stat->insert_stat); + uint64 insert_start = pg_atomic_read_u64(&stat->next_index_can_insert); + + char path[MAXPGPATH]; + errno_t errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, DELAY_DDL_FILE_NAME "_%08X_%lX", + t_thrd.shemem_ptr_cxt.ControlFile->timeline, insert_start / MAX_NUM_PER_FILE); + securec_check_ss(errorno, "", ""); + + off_t off_set = (off_t)(insert_start % MAX_NUM_PER_FILE * sizeof(DelayDdlInfo)); + + DelayDdlInfo truncate_clog_info = {0}; + truncate_clog_info.type = TRUNCATE_CLOG; + truncate_clog_info.len = sizeof(DelayDdlInfo); + truncate_clog_info.node_info.pageno = pageno; + truncate_clog_info.lsn = lsn; + INIT_CRC32C(truncate_clog_info.crc); + COMP_CRC32C(truncate_clog_info.crc, (char*)&truncate_clog_info, offsetof(DelayDdlInfo, crc)); + FIN_CRC32C(truncate_clog_info.crc); + + if (write_delay_ddl_info(path, &truncate_clog_info, sizeof(DelayDdlInfo), off_set)) { + pg_atomic_write_u64(&stat->next_index_can_insert, insert_start + 1); + } + exit_state(&stat->insert_stat); +} + +void clog_truncate_cancel_conflicting_proc(TransactionId latest_removed_xid, XLogRecPtr lsn) +{ + const int max_check_times = 1000; + int check_times = 0; + bool conflict = true; + bool reach_max_check_times = false; + while (conflict && check_times < max_check_times) { + RedoInterruptCallBack(); + check_times++; + reach_max_check_times = (check_times == max_check_times); + conflict = proc_array_cancel_conflicting_proc(latest_removed_xid, lsn, reach_max_check_times); + } +} + +void do_truncate_clog(int64 pageno) +{ + ClogCtl(pageno)->shared->latest_page_number = pageno; + + TransactionId truncate_xid = (TransactionId)PAGE_TO_TRANSACTION_ID(pageno); + clog_truncate_cancel_conflicting_proc(truncate_xid, InvalidXLogRecPtr); + if (TransactionIdPrecedes(g_instance.undo_cxt.hotStandbyRecycleXid, truncate_xid)) { + pg_atomic_write_u64(&g_instance.undo_cxt.hotStandbyRecycleXid, truncate_xid); + } + + SimpleLruTruncate(ClogCtl(0), pageno, NUM_CLOG_PARTITIONS); + DeleteObsoleteTwoPhaseFile(pageno); +} + +void do_delay_ddl(DelayDdlInfo *info, bool is_old_delay_ddl = false) { pg_crc32c crc_check; INIT_CRC32C(crc_check); @@ -246,21 +311,45 @@ void do_delay_ddl(DelayDdlInfo* info) if (!EQ_CRC32C(crc_check, info->crc)) { ereport(WARNING, (errcode_for_file_access(), errmsg("delay ddl ,crc(%u:%u) check error, maybe is type:%u, info %u/%u/%u lsn:%lu", crc_check, info->crc, - (uint32)info->type, info->node_info.filenode.spcNode, info->node_info.filenode.dbNode, - info->node_info.filenode.relNode, info->lsn))); + (uint32)info->type, info->node_info.file_info.filenode.spcNode, info->node_info.file_info.filenode.dbNode, + info->node_info.file_info.filenode.relNode, info->lsn))); return; } if (info->type == DROP_TABLE_TYPE) { - unlink_relfiles(&info->node_info, 1); - xact_redo_log_drop_segs(&info->node_info, 1, info->lsn); + ereport(DEBUG2, + (errmodule(MOD_STANDBY_READ), + errmsg("delay ddl for table, type:%u, info %u/%u/%u lsn:%lu", + (uint32)info->type, + info->node_info.file_info.filenode.spcNode, + info->node_info.file_info.filenode.dbNode, + info->node_info.file_info.filenode.relNode, + info->lsn))); + unlink_relfiles(&info->node_info.file_info, 1, is_old_delay_ddl); + xact_redo_log_drop_segs(&info->node_info.file_info, 1, info->lsn); } else if (info->type == DROP_DB_TYPE) { - do_db_drop(info->node_info.filenode.dbNode, info->node_info.filenode.spcNode); + ereport(DEBUG2, + (errmodule(MOD_STANDBY_READ), + errmsg("delay ddl for database, type:%u, info %u/%u lsn:%lu", + (uint32)info->type, + info->node_info.file_info.filenode.spcNode, + info->node_info.file_info.filenode.dbNode, + info->lsn))); + do_db_drop(info->node_info.file_info.filenode.dbNode, info->node_info.file_info.filenode.spcNode); + } else if (info->type == TRUNCATE_CLOG) { + ereport(LOG, + (errmodule(MOD_STANDBY_READ), errmsg("delay ddl for truncate clog, pageno: %ld", info->node_info.pageno))); + UpdateMinRecoveryPoint(info->lsn, false); + do_truncate_clog(info->node_info.pageno); } else { - ereport(WARNING, (errcode_for_file_access(), - errmsg("delay ddl ,type error, maybe is type:%u, info %u/%u/%u lsn:%lu", (uint32)info->type, - info->node_info.filenode.spcNode, info->node_info.filenode.dbNode, info->node_info.filenode.relNode, - info->lsn))); + ereport(WARNING, + (errcode_for_file_access(), + errmsg("delay ddl ,type error, maybe is type:%u, info %u/%u/%u lsn:%lu", + (uint32)info->type, + info->node_info.file_info.filenode.spcNode, + info->node_info.file_info.filenode.dbNode, + info->node_info.file_info.filenode.relNode, + info->lsn))); } } @@ -293,6 +382,7 @@ void delete_by_lsn(XLogRecPtr lsn) int fd = BasicOpenFile(path, O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR); if (fd < 0) { + exit_state(&stat->delete_stat); return; } int count = read_delay_ddl_info(fd, info_list, cur_deleted * sizeof(DelayDdlInfo), (off_t)offset); @@ -326,6 +416,7 @@ void delete_by_lsn(XLogRecPtr lsn) deleted_total += cur_deleted; if (next_delete % MAX_NUM_PER_FILE == 0) { (void)unlink(path); + ereport(LOG, (errmsg("delete delay ddl file end [%s:%d:%s]", __FUNCTION__, __LINE__, path))); } RedoInterruptCallBack(); } @@ -342,6 +433,8 @@ void delete_by_table_space(Oid tablespace_id) uint64 next_delete = pg_atomic_read_u64(&stat->next_index_need_unlink); uint64 next_insert = pg_atomic_read_u64(&stat->next_index_can_insert); + ereport(LOG, (errmsg("delete_by_table_space start"))); + DelayDdlInfo* info_list = (DelayDdlInfo*)palloc0(sizeof(DelayDdlInfo) * MAX_NUM_PER_FILE); while (next_delete < next_insert) { uint32 copys = MAX_NUM_PER_FILE; @@ -363,6 +456,7 @@ void delete_by_table_space(Oid tablespace_id) if (fd < 0) { ereport(WARNING, (errmsg("delete_by_table_space: file %s could not open:%m", path))); + exit_state(&stat->delete_stat); return; } @@ -370,6 +464,7 @@ void delete_by_table_space(Oid tablespace_id) if (count <= 0) { ereport(WARNING, (errmsg("delete_by_table_space: file %s nothing deleted", path))); + exit_state(&stat->delete_stat); return; } close(fd); @@ -384,7 +479,7 @@ void delete_by_table_space(Oid tablespace_id) } for (uint32 i = 0; i < copys; ++i) { - if (info_list[i].node_info.filenode.spcNode == tablespace_id) { + if (info_list[i].node_info.file_info.filenode.spcNode == tablespace_id) { do_delay_ddl(&info_list[i]); } RedoInterruptCallBack(); @@ -394,6 +489,7 @@ void delete_by_table_space(Oid tablespace_id) } pfree(info_list); exit_state(&stat->delete_stat); + ereport(LOG, (errmsg("delete_by_table_space end"))); } void do_all_old_delay_ddl() @@ -435,6 +531,7 @@ void do_all_old_delay_ddl() } (void)unlink(path); + ereport(LOG, (errmsg("delete delay ddl file end [%s:%d:%s]", __FUNCTION__, __LINE__, path))); pfree(info_list); RedoInterruptCallBack(); } diff --git a/src/gausskernel/storage/access/redo/standby_read/standby_read_interface.cpp b/src/gausskernel/storage/access/redo/standby_read/standby_read_interface.cpp index 145fa84a113efa9a6e89a42a56cd236832b40fa8..8e307c14ffb87a7ac356e940cdca7945a2ad6f68 100644 --- a/src/gausskernel/storage/access/redo/standby_read/standby_read_interface.cpp +++ b/src/gausskernel/storage/access/redo/standby_read/standby_read_interface.cpp @@ -48,11 +48,11 @@ const char* EXRTO_FILE_SUB_DIR[] = { const uint32 EXRTO_FILE_PATH_LEN = 1024; const uint32 XID_THIRTY_TWO = 32; -void make_standby_read_node(XLogRecPtr read_lsn, RelFileNode &read_node, bool is_start_lsn) +void make_standby_read_node(XLogRecPtr read_lsn, RelFileNode &read_node, bool is_start_lsn, Oid relnode) { read_node.spcNode = (Oid)(read_lsn >> 32); read_node.dbNode = (Oid)(read_lsn); - read_node.relNode = InvalidOid; // make sure it can be InvalidOid or not + read_node.relNode = relnode; read_node.opt = 0; if (is_start_lsn) { /* means read_lsn is the start ptr of xlog */ @@ -67,7 +67,7 @@ BufferDesc *alloc_standby_read_buf(const BufferTag &buf_tag, BufferAccessStrateg XLogRecPtr read_lsn, bool is_start_lsn) { RelFileNode read_node; - make_standby_read_node(read_lsn, read_node, is_start_lsn); + make_standby_read_node(read_lsn, read_node, is_start_lsn, buf_tag.rnode.relNode); BufferDesc *buf_desc = BufferAlloc(read_node, 0, buf_tag.forkNum, buf_tag.blockNum, strategy, &found, NULL); return buf_desc; @@ -78,8 +78,8 @@ Buffer get_newest_page_for_read(Relation reln, ForkNumber fork_num, BlockNumber { bool hit = false; - Buffer newest_buf = ReadBuffer_common( - reln->rd_smgr, reln->rd_rel->relpersistence, fork_num, block_num, mode, strategy, &hit, NULL); + Buffer newest_buf = + ReadBuffer_common(reln->rd_smgr, reln->rd_rel->relpersistence, fork_num, block_num, mode, strategy, &hit, NULL); if (BufferIsInvalid(newest_buf)) { return InvalidBuffer; } @@ -97,8 +97,51 @@ Buffer get_newest_page_for_read(Relation reln, ForkNumber fork_num, BlockNumber .forkNum = fork_num, .blockNum = block_num, }; + + ResourceOwnerEnlargeBuffers(t_thrd.utils_cxt.CurrentResourceOwner); + BufferDesc *buf_desc = alloc_standby_read_buf(buf_tag, strategy, hit, page_lsn, false); + + if (hit) { + UnlockReleaseBuffer(newest_buf); + return BufferDescriptorGetBuffer(buf_desc); + } + Page read_page = (Page)BufHdrGetBlock(buf_desc); + + errno_t rc = memcpy_s(read_page, BLCKSZ, newest_page, BLCKSZ); + securec_check(rc, "\0", "\0"); + + UnlockReleaseBuffer(newest_buf); + buf_desc->extra->lsn_on_disk = PageGetLSN(read_page); +#ifdef USE_ASSERT_CHECKING + buf_desc->lsn_dirty = InvalidXLogRecPtr; +#endif + + TerminateBufferIO(buf_desc, false, (BM_VALID | BM_IS_TMP_BUF)); + return BufferDescriptorGetBuffer(buf_desc); +} + +Buffer get_newest_page_for_read_new( + Relation reln, ForkNumber fork_num, BlockNumber block_num, ReadBufferMode mode, BufferAccessStrategy strategy) +{ + bool hit = false; + + Buffer newest_buf = + ReadBuffer_common(reln->rd_smgr, reln->rd_rel->relpersistence, fork_num, block_num, mode, strategy, &hit, NULL); + if (BufferIsInvalid(newest_buf)) { + return InvalidBuffer; + } + + LockBuffer(newest_buf, BUFFER_LOCK_SHARE); + Page newest_page = BufferGetPage(newest_buf); + + BufferTag buf_tag = { + .rnode = reln->rd_smgr->smgr_rnode.node, + .forkNum = fork_num, + .blockNum = block_num, + }; + ResourceOwnerEnlargeBuffers(t_thrd.utils_cxt.CurrentResourceOwner); - BufferDesc* buf_desc = alloc_standby_read_buf(buf_tag, strategy, hit, page_lsn, false); + BufferDesc *buf_desc = alloc_standby_read_buf(buf_tag, strategy, hit, PageGetLSN(newest_page), false); if (hit) { UnlockReleaseBuffer(newest_buf); @@ -108,10 +151,12 @@ Buffer get_newest_page_for_read(Relation reln, ForkNumber fork_num, BlockNumber errno_t rc = memcpy_s(read_page, BLCKSZ, newest_page, BLCKSZ); securec_check(rc, "\0", "\0"); + UnlockReleaseBuffer(newest_buf); + buf_desc->extra->lsn_on_disk = PageGetLSN(read_page); #ifdef USE_ASSERT_CHECKING - buf_desc->lsn_dirty = InvalidXLogRecPtr; + buf_desc->lsn_dirty = InvalidXLogRecPtr; #endif TerminateBufferIO(buf_desc, false, (BM_VALID | BM_IS_TMP_BUF)); @@ -121,6 +166,9 @@ Buffer get_newest_page_for_read(Relation reln, ForkNumber fork_num, BlockNumber Buffer standby_read_buf( Relation reln, ForkNumber fork_num, BlockNumber block_num, ReadBufferMode mode, BufferAccessStrategy strategy) { + if (g_instance.attr.attr_storage.enable_exrto_standby_read_opt) { + return extreme_rto_standby_read::standby_read_buf_new(reln, fork_num, block_num, mode, strategy); + } /* Open it at the smgr level */ RelationOpenSmgr(reln); // need or not ????? pgstat_count_buffer_read(reln); @@ -664,6 +712,8 @@ void dump_base_page_info_lsn_info(const BufferTag &buf_tag, LsnInfoPosition head uint32 worker_id; BasePageInfo base_page_info = NULL; Buffer buffer; + const int max_dump_item = 10000; + int cnt = 0; extreme_rto::RedoItemTag redo_item_tag; INIT_REDO_ITEM_TAG(redo_item_tag, buf_tag.rnode, buf_tag.forkNum, buf_tag.blockNum); @@ -672,6 +722,9 @@ void dump_base_page_info_lsn_info(const BufferTag &buf_tag, LsnInfoPosition head /* find fisrt base page whose lsn less than read lsn form tail to head */ do { + if (cnt > max_dump_item) { + break; + } /* reach the end of the list */ if (INFO_POSITION_IS_INVALID(head_lsn_base_page_pos)) { ereport(LOG, (errmsg("can not find base page, block is %u/%u/%u %d %u, batch_id: %u, redo_worker_id: %u", @@ -750,9 +803,18 @@ void dump_error_all_info(const RelFileNode &rnode, ForkNumber forknum, BlockNumb char *str_output = (char *)palloc0(MAXOUTPUTLEN * sizeof(char)); char *dump_filename = (char *)palloc0(MAXFILENAME * sizeof(char)); errno_t rc = snprintf_s(dump_filename + (int)strlen(dump_filename), MAXFILENAME, MAXFILENAME - 1, - "%s/%u_%u_%u_%d_%d.lsnblockinfo_dump", t_thrd.proc_cxt.DataDir, rnode.spcNode, rnode.dbNode, rnode.relNode, - forknum, blocknum); + "%s/%u_%u_%u_%d_%d.lsnblockinfo_dump", u_sess->attr.attr_common.Log_directory, + rnode.spcNode, rnode.dbNode, rnode.relNode, forknum, blocknum); securec_check_ss(rc, "\0", "\0"); + struct stat file_stat; + if (stat(dump_filename, &file_stat) == 0) { + /* file exists */ + pfree_ext(str_output); + pfree_ext(dump_filename); + buffer_in_progress_push(); + return; + } + FILE *dump_file = AllocateFile(dump_filename, PG_BINARY_W); if (dump_file == NULL) { ereport(LOG, (errmsg("can not alloc file. rnode is %u/%u/%u %d %u", buf_tag.rnode.spcNode, @@ -772,13 +834,143 @@ void dump_error_all_info(const RelFileNode &rnode, ForkNumber forknum, BlockNumb UnlockReleaseBuffer(buf); // buf was automatically locked by getting block meta info, so we need release uint result = fwrite(str_output, 1, strlen(str_output), dump_file); - if (result != strlen(str_output)) { - ereport(ERROR, (errcode(ERRCODE_FILE_WRITE_FAILED), errmsg("Cannot write into file %s!", dump_filename))); + if (result == strlen(str_output)) { + (void)fsync(fileno(dump_file)); + exrto_xlog_dump(dump_filename, dump_lsn_info_stru); + } else { + pfree_ext(str_output); + pfree_ext(dump_filename); + (void)FreeFile(dump_file); + buffer_in_progress_push(); + + ereport(ERROR, (errcode(ERRCODE_FILE_WRITE_FAILED), + errmsg("Cannot write into file %s/%u_%u_%u_%d_%u.lsnblockinfo_dump!", + u_sess->attr.attr_common.Log_directory, rnode.spcNode, rnode.dbNode, rnode.relNode, + forknum, blocknum))); } pfree_ext(str_output); - (void)FreeFile(dump_file); - exrto_xlog_dump(dump_filename, dump_lsn_info_stru); pfree_ext(dump_filename); + (void)FreeFile(dump_file); buffer_in_progress_push(); } + +Buffer standby_read_buf_new( + Relation reln, ForkNumber fork_num, BlockNumber block_num, ReadBufferMode mode, BufferAccessStrategy strategy) +{ + /* Open it at the smgr level */ + RelationOpenSmgr(reln); // need or not ????? + pgstat_count_buffer_read(reln); + pgstatCountBlocksFetched4SessionLevel(); + + if (RelationisEncryptEnable(reln)) { + reln->rd_smgr->encrypt = true; + } + + XLogRecPtr read_lsn = MAX_XLOG_REC_PTR; + if (u_sess->utils_cxt.CurrentSnapshot != NULL && XLogRecPtrIsValid(u_sess->utils_cxt.CurrentSnapshot->read_lsn)) { + read_lsn = u_sess->utils_cxt.CurrentSnapshot->read_lsn; + } else if (XLogRecPtrIsValid(t_thrd.proc->exrto_read_lsn)) { + read_lsn = t_thrd.proc->exrto_read_lsn; + } + + + Buffer read_buf = get_newest_page_for_read_new(reln, fork_num, block_num, mode, strategy); + if (unlikely(read_buf == InvalidBuffer)) { + ereport(DEBUG1, + (errmsg("couldnot get newest page buf %u/%u/%u %d %u read lsn %08X/%08X current_time: %ld " + "gen_snaptime:%ld thread_read_lsn:%08X/%08X", + reln->rd_smgr->smgr_rnode.node.spcNode, + reln->rd_smgr->smgr_rnode.node.dbNode, + reln->rd_smgr->smgr_rnode.node.relNode, + fork_num, + block_num, + (uint32)(read_lsn >> XID_THIRTY_TWO), + (uint32)read_lsn, + GetCurrentTimestamp(), + g_instance.comm_cxt.predo_cxt.exrto_snapshot->gen_snap_time, + (uint32)(t_thrd.proc->exrto_read_lsn >> XID_THIRTY_TWO), + (uint32)t_thrd.proc->exrto_read_lsn))); + return InvalidBuffer; + } + + if (XLByteLT(PageGetLSN(BufferGetPage(read_buf)), read_lsn)) { + return read_buf; + } + + BufferTag buf_tag = { + .rnode = reln->rd_smgr->smgr_rnode.node, + .forkNum = fork_num, + .blockNum = block_num, + }; + + Buffer block_info_buf; + // just lock this buffer ,so that redo worker could not modify this block info + BlockMetaInfo *block_info = + get_block_meta_info_by_relfilenode(buf_tag, NULL, RBM_ZERO_ON_ERROR, &block_info_buf, true); + if (unlikely(block_info == NULL || block_info_buf == InvalidBuffer)) { + ereport(PANIC, + (errmsg("standby_read_buf_new read block invalid %u/%u/%u/%hd/%hu %d %u", + buf_tag.rnode.spcNode, + buf_tag.rnode.dbNode, + buf_tag.rnode.relNode, + buf_tag.rnode.bucketNode, + buf_tag.rnode.opt, + buf_tag.forkNum, + buf_tag.blockNum))); + } + + if (!is_block_meta_info_valid(block_info)) { + UnlockReleaseBuffer(block_info_buf); + return read_buf; + } + + if (block_info->max_lsn < read_lsn) { + UnlockReleaseBuffer(block_info_buf); + return read_buf; + } + + // find nearest base page + LsnInfoPosition base_page_pos = get_nearest_base_page_pos(buf_tag, block_info->base_page_info_list, read_lsn); + if (base_page_pos == LSN_INFO_LIST_HEAD) { + UnlockReleaseBuffer(block_info_buf); + return read_buf; + } + UnlockReleaseBuffer(read_buf); + + extreme_rto::RedoItemTag redo_item_tag; + INIT_REDO_ITEM_TAG(redo_item_tag, buf_tag.rnode, buf_tag.forkNum, buf_tag.blockNum); + const uint32 worker_num_per_mng = (uint32)extreme_rto::get_page_redo_worker_num_per_manager(); + /* batch id and worker id start from 1 when reading a page */ + uint32 batch_id = extreme_rto::GetSlotId(buf_tag.rnode, 0, 0, (uint32)extreme_rto::get_batch_redo_num()) + 1; + uint32 redo_worker_id = extreme_rto::GetWorkerId(&redo_item_tag, worker_num_per_mng) + 1; + + Buffer base_page_buffer = buffer_read_base_page(batch_id, redo_worker_id, base_page_pos, RBM_NORMAL); + bool hit = false; + LockBuffer(base_page_buffer, BUFFER_LOCK_SHARE); + ResourceOwnerEnlargeBuffers(t_thrd.utils_cxt.CurrentResourceOwner); + + Page base_page = BufferGetPage(base_page_buffer); + XLogRecPtr base_page_lsn = PageGetLSN(base_page); + BufferDesc *buf_desc = alloc_standby_read_buf(buf_tag, strategy, hit, base_page_lsn, false); + + if (hit) { + UnlockReleaseBuffer(block_info_buf); + UnlockReleaseBuffer(base_page_buffer); + return BufferDescriptorGetBuffer(buf_desc); + } + + Page read_page = (Page)BufHdrGetBlock(buf_desc); + errno_t rc = memcpy_s(read_page, BLCKSZ, base_page, BLCKSZ); + securec_check(rc, "\0", "\0"); + + buf_desc->extra->lsn_on_disk = PageGetLSN(read_page); +#ifdef USE_ASSERT_CHECKING + buf_desc->lsn_dirty = InvalidXLogRecPtr; +#endif + + TerminateBufferIO(buf_desc, false, (BM_VALID | BM_IS_TMP_BUF)); + UnlockReleaseBuffer(block_info_buf); + UnlockReleaseBuffer(base_page_buffer); + return BufferDescriptorGetBuffer(buf_desc); } +} // namespace extreme_rto_standby_read diff --git a/src/gausskernel/storage/access/redo/standby_read/standby_read_proc.cpp b/src/gausskernel/storage/access/redo/standby_read/standby_read_proc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c4f6231ab2b72d690f49b0c376d395c9008efaef --- /dev/null +++ b/src/gausskernel/storage/access/redo/standby_read/standby_read_proc.cpp @@ -0,0 +1,423 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * standby_read_proc.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/access/redo/standby_read/standby_read_proc.cpp + * + * ------------------------------------------------------------------------- + */ + +#include "access/extreme_rto/page_redo.h" +#include "access/extreme_rto/standby_read/block_info_meta.h" +#include "access/extreme_rto/standby_read/lsn_info_meta.h" +#include "access/extreme_rto/standby_read/standby_read_base.h" +#include "access/multi_redo_api.h" +#include "access/extreme_rto/dispatcher.h" +#include "storage/procarray.h" +#include "replication/walreceiver.h" + +inline void invalid_msg_leak_warning(XLogRecPtr trxn_lsn) +{ + if (t_thrd.page_redo_cxt.invalid_msg.valid) { + ereport(WARNING, + (errmsg(EXRTOFORMAT("[exrto_generate_snapshot] not send invalid msg: %08X/%08X"), + (uint32)(trxn_lsn >> UINT64_HALF), + (uint32)trxn_lsn))); + } +} + +void exrto_generate_snapshot(XLogRecPtr trxn_lsn) +{ + if (!g_instance.attr.attr_storage.EnableHotStandby) { + return; + } + + ExrtoSnapshot exrto_snapshot = g_instance.comm_cxt.predo_cxt.exrto_snapshot; + /* + * do not generate the same snapshot repeatedly. + */ + if (XLByteLE(trxn_lsn, exrto_snapshot->read_lsn)) { + invalid_msg_leak_warning(trxn_lsn); + return; + } + + TransactionId xmin; + TransactionId xmax; + CommitSeqNo snapshot_csn; + + exrto_get_snapshot_data(xmin, xmax, snapshot_csn); + (void)LWLockAcquire(ExrtoSnapshotLock, LW_EXCLUSIVE); + exrto_snapshot->snapshot_csn = snapshot_csn; + exrto_snapshot->xmin = xmin; + exrto_snapshot->xmax = xmax; + exrto_snapshot->read_lsn = trxn_lsn; + send_delay_invalid_message(); + LWLockRelease(ExrtoSnapshotLock); +} + +void exrto_read_snapshot(Snapshot snapshot) +{ + if ((!is_exrto_standby_read_worker()) || u_sess->proc_cxt.clientIsCMAgent || dummyStandbyMode) { + return; + } + + ExrtoSnapshot exrto_snapshot = g_instance.comm_cxt.predo_cxt.exrto_snapshot; + bool retry_get = false; + const static uint64 WAIT_COUNT = 0x7FFFF; + uint64 retry_count = 0; + t_thrd.pgxact->xmin = InvalidTransactionId; + t_thrd.proc->exrto_min = InvalidXLogRecPtr; +RETRY_GET: + if (retry_get) { + CHECK_FOR_INTERRUPTS(); + pg_usleep(100L); + } + retry_count++; + if ((retry_count & WAIT_COUNT) == WAIT_COUNT) { + ereport(LOG, + (errmsg("retry to get exrto-standby-read snapshot, standby_redo_cleanup_xmin = %lu, " + "standby_redo_cleanup_xmin_lsn = %08X/%08X, " + "exrto_snapshot->xmin = %lu, read_lsn = %08X/%08X", + t_thrd.xact_cxt.ShmemVariableCache->standbyRedoCleanupXmin, + (uint32)(t_thrd.xact_cxt.ShmemVariableCache->standbyRedoCleanupXminLsn >> UINT64_HALF), + (uint32)t_thrd.xact_cxt.ShmemVariableCache->standbyRedoCleanupXminLsn, exrto_snapshot->xmin, + (uint32)(exrto_snapshot->read_lsn >> UINT64_HALF), (uint32)exrto_snapshot->read_lsn))); + } + (void)LWLockAcquire(ExrtoSnapshotLock, LW_SHARED); + if (XLByteEQ(exrto_snapshot->read_lsn, 0)) { + LWLockRelease(ExrtoSnapshotLock); + ereport(ERROR, (errmsg("could not get a valid snapshot with extreme rto"))); + } + + /* In exrto_standby_read_opt mode, getting a snapshot needs to wait for the cleanup-info xlog to be processed. */ + if (g_instance.attr.attr_storage.enable_exrto_standby_read_opt) { + LWLockAcquire(ProcArrayLock, LW_SHARED); + bool condition = + (exrto_snapshot->xmin <= + t_thrd.xact_cxt.ShmemVariableCache->standbyRedoCleanupXmin) && + (t_thrd.xact_cxt.ShmemVariableCache->standbyRedoCleanupXminLsn > exrto_snapshot->read_lsn); + LWLockRelease(ProcArrayLock); + if (condition) { + retry_get = true; + LWLockRelease(ExrtoSnapshotLock); + goto RETRY_GET; + } + } + + snapshot->snapshotcsn = exrto_snapshot->snapshot_csn; + snapshot->xmin = exrto_snapshot->xmin; + snapshot->xmax = exrto_snapshot->xmax; + snapshot->read_lsn = exrto_snapshot->read_lsn; + + t_thrd.pgxact->xmin = snapshot->xmin; + u_sess->utils_cxt.TransactionXmin = snapshot->xmin; + + t_thrd.proc->exrto_read_lsn = snapshot->read_lsn; + t_thrd.proc->exrto_min = snapshot->read_lsn; + LWLockRelease(ExrtoSnapshotLock); + + if (t_thrd.proc->exrto_gen_snap_time == 0) { + t_thrd.proc->exrto_gen_snap_time = GetCurrentTimestamp(); + } + Assert(XLogRecPtrIsValid(t_thrd.proc->exrto_read_lsn)); +} + +static inline uint64 get_force_recycle_pos(uint64 recycle_pos, uint64 insert_pos) +{ + const double force_recyle_ratio = 0.3; /* to be adjusted */ + Assert(recycle_pos <= insert_pos); + return recycle_pos + (uint64)((insert_pos - recycle_pos) * force_recyle_ratio); +} + +XLogRecPtr calculate_force_recycle_lsn_per_worker(StandbyReadMetaInfo *meta_info) +{ + uint64 base_page_recycle_pos; + uint64 lsn_info_recycle_pos; + XLogRecPtr base_page_recycle_lsn = InvalidXLogRecPtr; + XLogRecPtr lsn_info_recycle_lsn = InvalidXLogRecPtr; + Buffer buffer; + Page page; + + /* for base page */ + if (meta_info->base_page_recyle_position < meta_info->base_page_next_position) { + base_page_recycle_pos = + get_force_recycle_pos(meta_info->base_page_recyle_position, meta_info->base_page_next_position); + buffer = extreme_rto_standby_read::buffer_read_base_page( + meta_info->batch_id, meta_info->redo_id, base_page_recycle_pos, RBM_NORMAL); + LockBuffer(buffer, BUFFER_LOCK_SHARE); + base_page_recycle_lsn = PageGetLSN(BufferGetPage(buffer)); + UnlockReleaseBuffer(buffer); + } + + /* for lsn info */ + if (meta_info->lsn_table_recyle_position < meta_info->lsn_table_next_position) { + lsn_info_recycle_pos = + get_force_recycle_pos(meta_info->lsn_table_recyle_position, meta_info->lsn_table_next_position); + page = extreme_rto_standby_read::get_lsn_info_page( + meta_info->batch_id, meta_info->redo_id, lsn_info_recycle_pos, RBM_NORMAL, &buffer); + if (unlikely(page == NULL || buffer == InvalidBuffer)) { + ereport(PANIC, + (errmsg(EXRTOFORMAT("get_lsn_info_page failed, batch_id: %u, redo_id: %u, pos: %lu"), + meta_info->batch_id, + meta_info->redo_id, + lsn_info_recycle_pos))); + } + LockBuffer(buffer, BUFFER_LOCK_SHARE); + extreme_rto_standby_read::LsnInfo lsn_info = + (extreme_rto_standby_read::LsnInfo)(page + extreme_rto_standby_read::LSN_INFO_HEAD_SIZE); + lsn_info_recycle_lsn = lsn_info->lsn[0]; + UnlockReleaseBuffer(buffer); + } + + return rtl::max(base_page_recycle_lsn, lsn_info_recycle_lsn); +} + +void calculate_force_recycle_lsn(XLogRecPtr &recycle_lsn) +{ + XLogRecPtr recycle_lsn_per_worker; + uint32 worker_nums = extreme_rto::g_dispatcher->allWorkersCnt; + extreme_rto::PageRedoWorker **workers = extreme_rto::g_dispatcher->allWorkers; + + for (uint32 i = 0; i < worker_nums; ++i) { + extreme_rto::PageRedoWorker *page_redo_worker = workers[i]; + if (page_redo_worker->role != extreme_rto::REDO_PAGE_WORKER || (page_redo_worker->isUndoSpaceWorker)) { + continue; + } + recycle_lsn_per_worker = calculate_force_recycle_lsn_per_worker(&page_redo_worker->standby_read_meta_info); + if (XLByteLT(recycle_lsn, recycle_lsn_per_worker)) { + recycle_lsn = recycle_lsn_per_worker; + } + } + ereport(LOG, + (errmsg(EXRTOFORMAT("[exrto_recycle] try force recycle, recycle lsn: %08X/%08X"), + (uint32)(recycle_lsn >> UINT64_HALF), + (uint32)recycle_lsn))); +} + +static inline bool exceed_standby_max_query_time(TimestampTz start_time) +{ + if (start_time == 0) { + return false; + } + return TimestampDifferenceExceeds( + start_time, GetCurrentTimestamp(), g_instance.attr.attr_storage.standby_max_query_time * MSECS_PER_SEC); +} + +/* 1. resolve recycle conflict with backends + * 2. get oldest xmin and oldest readlsn of backends. */ +void proc_array_get_oldeset_readlsn( + XLogRecPtr recycle_lsn, XLogRecPtr &oldest_lsn, TransactionId &oldest_xmin, bool &conflict) +{ + ProcArrayStruct *proc_array = g_instance.proc_array_idx; + conflict = false; + + (void)LWLockAcquire(ProcArrayLock, LW_SHARED); + for (int index = 0; index < proc_array->numProcs; index++) { + int pg_proc_no = proc_array->pgprocnos[index]; + PGPROC *pg_proc = g_instance.proc_base_all_procs[pg_proc_no]; + PGXACT *pg_xact = &g_instance.proc_base_all_xacts[pg_proc_no]; + TransactionId pxmin = pg_xact->xmin; + XLogRecPtr read_lsn = pg_proc->exrto_min; + ereport(DEBUG1, + (errmsg(EXRTOFORMAT("proc_array_get_oldeset_readlsn info, read_lsn: %08X/%08X ,xmin: %lu ,vacuum_flags: " + "%hhu ,pid: %lu"), + (uint32)(read_lsn >> UINT64_HALF), + (uint32)read_lsn, + pxmin, + pg_xact->vacuumFlags, + pg_proc->pid))); + + if (pg_proc->pid == 0 || XLogRecPtrIsInvalid(read_lsn)) { + continue; + } + + Assert(!(pg_xact->vacuumFlags & PROC_IN_VACUUM)); + /* + * Backend is doing logical decoding which manages xmin + * separately, check below. + */ + if (pg_xact->vacuumFlags & PROC_IN_LOGICAL_DECODING) { + continue; + } + + /* cancel query when its read_lsn < recycle_lsn or its runtime > standby_max_query_time */ + if (XLByteLT(read_lsn, recycle_lsn) || exceed_standby_max_query_time(pg_proc->exrto_gen_snap_time)) { + pg_proc->recoveryConflictPending = true; + conflict = true; + if (pg_proc->pid != 0) { + /* + * Kill the pid if it's still here. If not, that's what we + * wanted so ignore any errors. + */ + (void)SendProcSignal(pg_proc->pid, PROCSIG_RECOVERY_CONFLICT_SNAPSHOT, pg_proc->backendId); + ereport(LOG, + (errmsg( + EXRTOFORMAT("read_lsn is less than recycle_lsn or query time exceed max_query_time while " + "get_oldeset_readlsn, read_lsn %lu, " + "recycle_lsn: %lu, exrto_gen_snap_time: %ld, current_time: %ld, thread id = %lu\n"), + read_lsn, + recycle_lsn, + pg_proc->exrto_gen_snap_time, + GetCurrentTimestamp(), + pg_proc->pid))); + /* + * Wait a little bit for it to die so that we avoid flooding + * an unresponsive backend when system is heavily loaded. + */ + pg_usleep(5000L); + } + continue; + } + + if (XLogRecPtrIsInvalid(oldest_lsn) || (XLogRecPtrIsValid(read_lsn) && XLByteLT(read_lsn, oldest_lsn))) { + oldest_lsn = read_lsn; + } + + if (!TransactionIdIsValid(oldest_xmin) || + (TransactionIdIsValid(pxmin) && TransactionIdFollows(oldest_xmin, pxmin))) { + oldest_xmin = pxmin; + } + } + LWLockRelease(ProcArrayLock); +} + +void proc_array_get_oldeset_xmin_for_undo(TransactionId &oldest_xmin) +{ + ProcArrayStruct *proc_array = g_instance.proc_array_idx; + + (void)LWLockAcquire(ProcArrayLock, LW_SHARED); + for (int index = 0; index < proc_array->numProcs; index++) { + int pg_proc_no = proc_array->pgprocnos[index]; + PGPROC *pg_proc = g_instance.proc_base_all_procs[pg_proc_no]; + PGXACT *pg_xact = &g_instance.proc_base_all_xacts[pg_proc_no]; + TransactionId pxmin = pg_xact->xmin; + + if (pg_proc->pid == 0 || !TransactionIdIsValid(pxmin)) { + continue; + } + + Assert(!(pg_xact->vacuumFlags & PROC_IN_VACUUM)); + /* + * Backend is doing logical decoding which manages xmin + * separately, check below. + */ + if (pg_xact->vacuumFlags & PROC_IN_LOGICAL_DECODING) { + continue; + } + if (!TransactionIdIsValid(oldest_xmin) || + (TransactionIdIsValid(pxmin) && TransactionIdFollows(oldest_xmin, pxmin))) { + oldest_xmin = pxmin; + } + } + LWLockRelease(ProcArrayLock); +} + +XLogRecPtr exrto_calculate_recycle_position(bool force_recyle) +{ + Assert(t_thrd.role != PAGEREDO); + Assert(IS_EXRTO_READ); + + XLogRecPtr recycle_lsn = pg_atomic_read_u64(&g_instance.comm_cxt.predo_cxt.global_recycle_lsn); + XLogRecPtr oldest_lsn = InvalidXLogRecPtr; + TransactionId oldest_xmin = InvalidTransactionId; + bool conflict = false; + const int max_check_times = 1000; + int check_times = 0; + + if (force_recyle) { + calculate_force_recycle_lsn(recycle_lsn); + } + ereport(DEBUG1, + (errmsg(EXRTOFORMAT("time information of calculate recycle position, current_time: %ld, snapshot " + "read_lsn: %08X/%08X, gen_snaptime:%ld"), + GetCurrentTimestamp(), + (uint32)(g_instance.comm_cxt.predo_cxt.exrto_snapshot->read_lsn >> UINT64_HALF), + (uint32)g_instance.comm_cxt.predo_cxt.exrto_snapshot->read_lsn, + g_instance.comm_cxt.predo_cxt.exrto_snapshot->gen_snap_time))); + + /* + * If there is no backend read threads, set read oldest lsn to snapshot lsn. + */ + ExrtoSnapshot exrto_snapshot = NULL; + exrto_snapshot = g_instance.comm_cxt.predo_cxt.exrto_snapshot; + (void)LWLockAcquire(ExrtoSnapshotLock, LW_SHARED); + if (XLByteEQ(exrto_snapshot->read_lsn, 0)) { + ereport(WARNING, (errmsg("could not get a valid snapshot with extreme rto"))); + } else { + oldest_lsn = exrto_snapshot->read_lsn; + oldest_xmin = exrto_snapshot->xmin; + } + LWLockRelease(ExrtoSnapshotLock); + /* Loop checks to avoid conflicting queries that were not successfully canceled. */ + do { + RedoInterruptCallBack(); + proc_array_get_oldeset_readlsn(recycle_lsn, oldest_lsn, oldest_xmin, conflict); + check_times++; + } while (conflict && check_times < max_check_times); + + recycle_lsn = rtl::max(recycle_lsn, oldest_lsn); + + ereport(LOG, + (errmsg( + EXRTOFORMAT( + "[exrto_recycle] calculate recycle position, oldestlsn: %08X/%08X, snapshot read_lsn: %08X/%08X, try " + "recycle lsn: %08X/%08X, xmin: %lu"), + (uint32)(oldest_lsn >> UINT64_HALF), + (uint32)oldest_lsn, + (uint32)(g_instance.comm_cxt.predo_cxt.exrto_snapshot->read_lsn >> UINT64_HALF), + (uint32)g_instance.comm_cxt.predo_cxt.exrto_snapshot->read_lsn, + (uint32)(recycle_lsn >> UINT64_HALF), + (uint32)recycle_lsn, oldest_xmin))); + pg_atomic_write_u64(&g_instance.comm_cxt.predo_cxt.exrto_recyle_xmin, oldest_xmin); + return recycle_lsn; +} + +TransactionId exrto_calculate_recycle_xmin_for_undo() +{ + Assert(t_thrd.role != PAGEREDO); + Assert(IS_EXRTO_READ); + TransactionId oldest_xmin = InvalidTransactionId; + TransactionId snapshot_xmin = InvalidTransactionId; + proc_array_get_oldeset_xmin_for_undo(oldest_xmin); + + /* + * If there is no backend read threads, set read oldest lsn to snapshot lsn. + */ + if ((oldest_xmin == InvalidTransactionId) && (extreme_rto::g_dispatcher != NULL)) { + ExrtoSnapshot exrto_snapshot = NULL; + exrto_snapshot = g_instance.comm_cxt.predo_cxt.exrto_snapshot; + (void)LWLockAcquire(ExrtoSnapshotLock, LW_SHARED); + if (XLByteEQ(exrto_snapshot->xmin, InvalidTransactionId)) { + ereport(WARNING, + (errmsg("exrto_calculate_recycle_xmin_for_undo: could not get a valid snapshot in exrto_snapshot"))); + } else { + snapshot_xmin = exrto_snapshot->xmin; + } + + LWLockRelease(ExrtoSnapshotLock); + } + ereport(DEBUG1, + (errmodule(MOD_UNDO), + errmsg(UNDOFORMAT("exrto_calculate_recycle_xmin_for_undo: oldest_xmin: %lu, snapshot_xmin: %lu."), + oldest_xmin, + snapshot_xmin))); + + if (oldest_xmin == InvalidTransactionId) { + return snapshot_xmin; + } + return oldest_xmin; +} diff --git a/src/gausskernel/storage/access/rmgrdesc/standbydesc.cpp b/src/gausskernel/storage/access/rmgrdesc/standbydesc.cpp index 50197b56f5fd4ac79c5e9c1cdc8135a6ee9d40d3..76adbc238ffbb4509a5387bf3acefd63f86d7349 100644 --- a/src/gausskernel/storage/access/rmgrdesc/standbydesc.cpp +++ b/src/gausskernel/storage/access/rmgrdesc/standbydesc.cpp @@ -45,36 +45,47 @@ void standby_desc(StringInfo buf, XLogReaderState *record) char *rec = XLogRecGetData(record); uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; if (info == XLOG_STANDBY_LOCK) { - xl_standby_locks *xlrec = (xl_standby_locks *)rec; - int i; - - appendStringInfo(buf, "AccessExclusive locks: nlocks %d ", xlrec->nlocks); - - for (i = 0; i < xlrec->nlocks; i++) - appendStringInfo(buf, " xid " XID_FMT " db %u rel %u", xlrec->locks[i].xid, xlrec->locks[i].dbOid, - xlrec->locks[i].relOid); + if ((XLogRecGetInfo(record) & PARTITION_ACCESS_EXCLUSIVE_LOCK_UPGRADE_FLAG) == 0) { + xl_standby_locks *xlrec = (xl_standby_locks *)rec; + appendStringInfo(buf, "AccessExclusive locks: nlocks %d ", xlrec->nlocks); + for (int i = 0; i < xlrec->nlocks; i++) { + appendStringInfo(buf, " xid " XID_FMT " db %u rel %u seq %u", xlrec->locks[i].xid, + xlrec->locks[i].dbOid, xlrec->locks[i].relOid, InvalidOid); + } + } else { + XLogStandbyLocksNew *xlrec = (XLogStandbyLocksNew *)rec; + appendStringInfo(buf, "AccessExclusive locks: nlocks %d ", xlrec->nlocks); + for (int i = 0; i < xlrec->nlocks; i++) { + appendStringInfo(buf, " xid " XID_FMT " db %u rel %u seq %u", xlrec->locks[i].xid, + xlrec->locks[i].dbOid, xlrec->locks[i].relOid, xlrec->locks[i].seq); + } + } } else if (info == XLOG_RUNNING_XACTS) { appendStringInfo(buf, " XLOG_RUNNING_XACTS"); } else if (info == XLOG_STANDBY_CSN) { appendStringInfo(buf, " XLOG_STANDBY_CSN"); } else if (info == XLOG_STANDBY_UNLOCK) { - xl_standby_locks *xlrec = (xl_standby_locks *)rec; - int i; - - appendStringInfo(buf, "release AccessExclusive locks: nlocks %d ", xlrec->nlocks); - - for (i = 0; i < xlrec->nlocks; i++) { - appendStringInfo(buf, " xid " XID_FMT " db %u rel %u", xlrec->locks[i].xid, xlrec->locks[i].dbOid, - xlrec->locks[i].relOid); + if ((XLogRecGetInfo(record) & PARTITION_ACCESS_EXCLUSIVE_LOCK_UPGRADE_FLAG) == 0) { + xl_standby_locks *xlrec = (xl_standby_locks *)rec; + appendStringInfo(buf, "AccessExclusive locks: nlocks %d ", xlrec->nlocks); + for (int i = 0; i < xlrec->nlocks; i++) { + appendStringInfo(buf, " xid " XID_FMT " db %u rel %u seq %u", xlrec->locks[i].xid, + xlrec->locks[i].dbOid, xlrec->locks[i].relOid, InvalidOid); + } + } else { + XLogStandbyLocksNew *xlrec = (XLogStandbyLocksNew *)rec; + appendStringInfo(buf, "AccessExclusive locks: nlocks %d ", xlrec->nlocks); + for (int i = 0; i < xlrec->nlocks; i++) { + appendStringInfo(buf, " xid " XID_FMT " db %u rel %u seq %u", xlrec->locks[i].xid, + xlrec->locks[i].dbOid, xlrec->locks[i].relOid, xlrec->locks[i].seq); + } } - } else if (info == XLOG_STANDBY_CSN_COMMITTING) { - uint64* id = ((uint64 *)XLogRecGetData(record)); + uint64 *id = ((uint64 *)XLogRecGetData(record)); appendStringInfo(buf, " XLOG_STANDBY_CSN_COMMITTING, xid %lu, csn %lu", id[0], id[1]); } else if (info == XLOG_STANDBY_CSN_ABORTED) { - uint64* id = ((uint64 *)XLogRecGetData(record)); + uint64 *id = ((uint64 *)XLogRecGetData(record)); appendStringInfo(buf, " XLOG_STANDBY_CSN_ABORTED, xid %lu", id[0]); - } else appendStringInfo(buf, "UNKNOWN"); } diff --git a/src/gausskernel/storage/access/transam/clog.cpp b/src/gausskernel/storage/access/transam/clog.cpp index 0decff72fc690f2f0a1cf39979d2ac7ed6ee3c5c..4109ef78534de02f28cb52f3e7311162665eea63 100644 --- a/src/gausskernel/storage/access/transam/clog.cpp +++ b/src/gausskernel/storage/access/transam/clog.cpp @@ -42,12 +42,15 @@ #include "access/xlog.h" #include "access/xloginsert.h" #include "access/xlogutils.h" +#include "access/extreme_rto/standby_read/standby_read_delay_ddl.h" +#include "access/multi_redo_api.h" #include "miscadmin.h" #include "pgstat.h" #include "pg_trace.h" #include "storage/smgr/fd.h" #include "storage/proc.h" #include "storage/file/fio_device.h" +#include "storage/procarray.h" #ifdef USE_ASSERT_CHECKING #include "utils/builtins.h" #endif /* USE_ASSERT_CHECKING */ @@ -1105,6 +1108,20 @@ static void WriteTruncateXlogRec(int64 pageno) XLogWaitFlush(recptr); } +void clog_redo_truncate_cancel_conflicting_proc(TransactionId latest_removed_xid, XLogRecPtr lsn) +{ + const int max_check_times = 1000; + int check_times = 0; + bool conflict = true; + bool reach_max_check_times = false; + while (conflict && check_times < max_check_times) { + RedoInterruptCallBack(); + check_times++; + reach_max_check_times = (check_times == max_check_times); + conflict = proc_array_cancel_conflicting_proc(latest_removed_xid, lsn, reach_max_check_times); + } +} + /* * CLOG resource manager's routines */ @@ -1137,12 +1154,22 @@ void clog_redo(XLogReaderState *record) rc = memcpy_s(&pageno, sizeof(int64), XLogRecGetData(record), sizeof(int64)); securec_check(rc, "", ""); + if (IS_EXRTO_READ) { + update_delay_ddl_file_truncate_clog(record->ReadRecPtr, pageno); + return; + } /* * During XLOG replay, latest_page_number isn't set up yet; insert a * suitable value to bypass the sanity test in SimpleLruTruncate. */ ClogCtl(pageno)->shared->latest_page_number = pageno; + TransactionId truncate_xid = (TransactionId)PAGE_TO_TRANSACTION_ID(pageno); + clog_redo_truncate_cancel_conflicting_proc(truncate_xid, InvalidXLogRecPtr); + if (TransactionIdPrecedes(g_instance.undo_cxt.hotStandbyRecycleXid, truncate_xid)) { + pg_atomic_write_u64(&g_instance.undo_cxt.hotStandbyRecycleXid, truncate_xid); + } + SimpleLruTruncate(ClogCtl(0), pageno, NUM_CLOG_PARTITIONS); DeleteObsoleteTwoPhaseFile(pageno); } else @@ -1302,3 +1329,4 @@ void SSCLOGShmemClear(void) CBufMappingPartitionLockByIndex(i), CLOGDIR); } } + diff --git a/src/gausskernel/storage/access/transam/extreme_rto/dispatcher.cpp b/src/gausskernel/storage/access/transam/extreme_rto/dispatcher.cpp index d24d2fdbddb2702aa31988dd9200f52419033813..eb77095e0abe2cb0f6bca901e0554ce499c6eedb 100755 --- a/src/gausskernel/storage/access/transam/extreme_rto/dispatcher.cpp +++ b/src/gausskernel/storage/access/transam/extreme_rto/dispatcher.cpp @@ -221,7 +221,7 @@ static const RmgrDispatchData g_dispatchTable[RM_MAX_ID + 1] = { }; const int REDO_WAIT_SLEEP_TIME = 5000; /* 5ms */ -const int MAX_REDO_WAIT_LOOP = 24000; /* 5ms*24000 = 2min */ +const int MAX_REDO_WAIT_LOOP = 48000; /* 5ms*24000 = 2min */ uint32 GetReadyWorker() { @@ -1320,7 +1320,8 @@ static bool DispatchCompresseShrinkRecord(XLogReaderState *record, List *expecte static bool DispatchBtreeRecord(XLogReaderState *record, List *expectedTLIs, TimestampTz recordXTime) { uint8 info = (XLogRecGetInfo(record) & (~XLR_INFO_MASK)); - if (info == XLOG_BTREE_REUSE_PAGE) { + if (info == XLOG_BTREE_REUSE_PAGE && + !(IS_EXRTO_STANDBY_READ && g_instance.attr.attr_storage.enable_exrto_standby_read_opt)) { DispatchTxnRecord(record, expectedTLIs); } else { DispatchRecordWithPages(record, expectedTLIs); @@ -1332,7 +1333,8 @@ static bool DispatchBtreeRecord(XLogReaderState *record, List *expectedTLIs, Tim static bool DispatchUBTreeRecord(XLogReaderState *record, List *expectedTLIs, TimestampTz recordXTime) { uint8 info = (XLogRecGetInfo(record) & (~XLR_INFO_MASK)); - if (info == XLOG_UBTREE_REUSE_PAGE) { + if (info == XLOG_UBTREE_REUSE_PAGE && + !(IS_EXRTO_STANDBY_READ && g_instance.attr.attr_storage.enable_exrto_standby_read_opt)) { DispatchTxnRecord(record, expectedTLIs); } else { DispatchRecordWithPages(record, expectedTLIs); @@ -1435,7 +1437,11 @@ static bool DispatchHeap2VacuumRecord(XLogReaderState *record, List *expectedTLI uint8 info = ((XLogRecGetInfo(record) & (~XLR_INFO_MASK)) & XLOG_HEAP_OPMASK); if (info == XLOG_HEAP2_CLEANUP_INFO) { - DispatchTxnRecord(record, expectedTLIs); + xl_heap_cleanup_info *xlrec = (xl_heap_cleanup_info *)XLogRecGetData(record); + RelFileNode tmp_node; + RelFileNodeCopy(tmp_node, xlrec->node, (int2)XLogRecGetBucketId(record)); + + DispatchToOnePageWorker(record, tmp_node, expectedTLIs); } else { DispatchRecordWithPages(record, expectedTLIs); } diff --git a/src/gausskernel/storage/access/transam/extreme_rto/exrto_recycle.cpp b/src/gausskernel/storage/access/transam/extreme_rto/exrto_recycle.cpp index 18c47d565cf3e43a35082a3dd613bfd768f367a9..a7d1389eb2f0ac2f01d86360f33ad53d767b66c5 100644 --- a/src/gausskernel/storage/access/transam/extreme_rto/exrto_recycle.cpp +++ b/src/gausskernel/storage/access/transam/extreme_rto/exrto_recycle.cpp @@ -122,8 +122,11 @@ bool check_if_need_force_recycle() total_lsn_info_size += (meta_info.lsn_table_next_position - meta_info.lsn_table_recyle_position); } - if (total_base_page_size > g_instance.attr.attr_storage.max_standby_base_page_size * ratio || - total_lsn_info_size > g_instance.attr.attr_storage.max_standby_lsn_info_size * ratio) { + /* the unit of max_standby_base_page_size and max_standby_lsn_info_size is KB */ + uint64 max_standby_base_page_size = ((uint64)u_sess->attr.attr_storage.max_standby_base_page_size << 10); + uint64 max_standby_lsn_info_size = ((uint64)u_sess->attr.attr_storage.max_standby_lsn_info_size << 10); + if (total_base_page_size > max_standby_base_page_size * ratio || + total_lsn_info_size > max_standby_lsn_info_size * ratio) { return true; } @@ -145,6 +148,7 @@ void do_standby_read_recyle(XLogRecPtr recycle_lsn) XLByteLT(page_redo_worker->standby_read_meta_info.recycle_lsn_per_worker, min_recycle_lsn)) { min_recycle_lsn = page_redo_worker->standby_read_meta_info.recycle_lsn_per_worker; } + pg_usleep(1000); // sleep 1ms } if (XLByteLT(g_instance.comm_cxt.predo_cxt.global_recycle_lsn, min_recycle_lsn)) { pg_atomic_write_u64(&g_instance.comm_cxt.predo_cxt.global_recycle_lsn, min_recycle_lsn); diff --git a/src/gausskernel/storage/access/transam/extreme_rto/page_redo.cpp b/src/gausskernel/storage/access/transam/extreme_rto/page_redo.cpp index 7d5a19da63c6991a88b6688b370b96372479419b..1b2db5d13d8981d563f4a35812195f958f462c21 100755 --- a/src/gausskernel/storage/access/transam/extreme_rto/page_redo.cpp +++ b/src/gausskernel/storage/access/transam/extreme_rto/page_redo.cpp @@ -100,7 +100,7 @@ namespace extreme_rto { static const int MAX_PARSE_BUFF_NUM = PAGE_WORK_QUEUE_SIZE * 10 * 3; static const int MAX_LOCAL_BUFF_NUM = PAGE_WORK_QUEUE_SIZE * 10 * 3; -static const int MAX_CLEAR_SMGR_NUM = 100000; +static const int MAX_CLEAR_SMGR_NUM = 50000; static const char *const PROCESS_TYPE_CMD_ARG = "--forkpageredo"; static char g_AUXILIARY_TYPE_CMD_ARG[16] = {0}; @@ -117,6 +117,7 @@ RedoItem g_cleanInvalidPageMark; static const int PAGE_REDO_WORKER_ARG = 3; static const int REDO_SLEEP_50US = 50; static const int REDO_SLEEP_100US = 100; +static const int EXRTO_STANDBY_READ_TIME_INTERVAL = 1 * 1000; static void ApplySinglePageRecord(RedoItem *); static void InitGlobals(); @@ -342,6 +343,23 @@ uint32 GetMyPageRedoWorkerIdWithLock() return g_redoWorker->id; } +void redo_worker_release_all_locks() +{ + Assert(t_thrd.proc != NULL); + + /* If waiting, get off wait queue (should only be needed after error) */ + LockErrorCleanup(); + + /* Release standard locks, including session-level if aborting */ + LockReleaseAll(DEFAULT_LOCKMETHOD, true); + + /* + * User locks are not released by transaction end, so be sure to release + * them explicitly. + */ + LockReleaseAll(USER_LOCKMETHOD, true); +} + /* Run from any worker thread. */ PGPROC *GetPageRedoWorkerProc(PageRedoWorker *worker) { @@ -393,6 +411,24 @@ void HandlePageRedoInterrupts() HandlePageRedoInterruptsImpl(); } +void clean_smgr(uint64 &clear_redo_fd_count) +{ + const uint64 clear_redo_fd_count_mask = 0x3FFFFF; + clear_redo_fd_count += 1; + if (clear_redo_fd_count > clear_redo_fd_count_mask && GetSMgrRelationHash() != NULL) { + clear_redo_fd_count = 0; + long hash_num = hash_get_num_entries(GetSMgrRelationHash()); + if (hash_num >= MAX_CLEAR_SMGR_NUM) { + ereport(LOG, + (errmsg("smgr close all: clear_redo_fd_count:%lu, hash_num:%ld,clear_redo_fd_count_mask :%lu", + clear_redo_fd_count, + hash_num, + clear_redo_fd_count_mask))); + smgrcloseall(); + } + } +} + void ReferenceRedoItem(void *item) { RedoItem *redoItem = (RedoItem *)item; @@ -444,7 +480,6 @@ void AddRecordReadBlocks(void *rec, uint32 readblocks) void AddRefRecord(void *rec) { - pg_memory_barrier(); #ifndef EXTREME_RTO_DEBUG (void)pg_atomic_fetch_add_u32(&((XLogReaderState *)rec)->refcount, 1); #else @@ -478,7 +513,6 @@ void AddRefRecord(void *rec) void SubRefRecord(void *rec) { - pg_memory_barrier(); Assert(((XLogReaderState *)rec)->refcount != 0); uint32 relCount = pg_atomic_sub_fetch_u32(&((XLogReaderState *)rec)->refcount, 1); #ifdef EXTREME_RTO_DEBUG @@ -961,6 +995,27 @@ static void WaitNextBarrier(XLogRecParseState *parseState) } } +void redo_page_manager_do_cleanup_action(XLogRecParseState *parse_state) +{ + if (!IS_EXRTO_READ_OPT || !pm_state_is_hot_standby()) { + return; + } + + RelFileNode tmp_node; + tmp_node.spcNode = parse_state->blockparse.blockhead.spcNode; + tmp_node.dbNode = parse_state->blockparse.blockhead.dbNode; + tmp_node.relNode = parse_state->blockparse.blockhead.relNode; + tmp_node.bucketNode = parse_state->blockparse.blockhead.bucketNode; + tmp_node.opt = parse_state->blockparse.blockhead.opt; + XLogRecPtr lsn = parse_state->blockparse.blockhead.end_ptr; + TransactionId removed_xid = parse_state->blockparse.extra_rec.clean_up_info.removed_xid; + + LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + UpdateCleanUpInfo(removed_xid, lsn); + LWLockRelease(ProcArrayLock); + ResolveRecoveryConflictWithSnapshot(removed_xid, tmp_node, lsn); +} + void PageManagerRedoParseState(XLogRecParseState *preState) { switch (preState->blockparse.blockhead.block_valid) { @@ -1033,6 +1088,10 @@ void PageManagerRedoParseState(XLogRecParseState *preState) RedoPageManagerDistributeToAllOneBlock(preState); XLogBlockParseStateRelease(preState); break; + case BLOCK_DATA_CLEANUP_TYPE: + redo_page_manager_do_cleanup_action(preState); + XLogBlockParseStateRelease(preState); + break; default: XLogBlockParseStateRelease(preState); break; @@ -1077,6 +1136,7 @@ bool PageManagerRedoDistributeItems(XLogRecParseState *record_block_state) void RedoPageManagerMain() { XLogRecParseState *record_block_state = NULL; + uint64 clear_redo_fd_count = 0; (void)RegisterRedoInterruptCallBack(HandlePageRedoInterrupts); XLogParseBufferInitFunc(&(g_redoWorker->parseManager), MAX_PARSE_BUFF_NUM, &recordRefOperate, @@ -1097,6 +1157,7 @@ void RedoPageManagerMain() SPSCBlockingQueuePop(g_redoWorker->queue); CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_2]); RedoInterruptCallBack(); + clean_smgr(clear_redo_fd_count); ADD_ABNORMAL_POSITION(5); GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_1]); @@ -1467,6 +1528,7 @@ void RedoPageWorkerMain() RedoInterruptCallBack); } + uint64 clear_redo_fd_count = 0; XLogRecParseState *redoblockstateHead = NULL; GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_1]); while ((redoblockstateHead = (XLogRecParseState *)SPSCBlockingQueueTop(g_redoWorker->queue)) != @@ -1594,6 +1656,7 @@ void RedoPageWorkerMain() } CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_8]); RedoInterruptCallBack(); + clean_smgr(clear_redo_fd_count); CountAndGetRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_2], g_redoWorker->timeCostList[TIME_COST_STEP_1]); ADD_ABNORMAL_POSITION(4); } @@ -1660,8 +1723,14 @@ void PushToWorkerLsn(bool force) cur_recor_count = 0; SendLsnFowarder(); } else { - if (cur_recor_count < max_record_count) { - return; + if (g_instance.attr.attr_storage.EnableHotStandby && pm_state_is_hot_standby()) { + if (!exceed_send_lsn_forworder_interval()) { + return; + } + } else { + if (cur_recor_count < max_record_count) { + return; + } } if (pg_atomic_read_u32(&g_GlobalLsnForwarder.record.refcount) != 0) { @@ -2592,6 +2661,9 @@ void ParallelRedoThreadMain() int retCode = RedoMainLoop(); StandbyReleaseAllLocks(); + if (g_redoWorker->role == REDO_TRXN_WORKER) { + redo_worker_release_all_locks(); + } ResourceManagerStop(); ereport(LOG, (errmsg("Page-redo-worker thread %u terminated, role:%u, slotId:%u, retcode %u.", g_redoWorker->id, g_redoWorker->role, g_redoWorker->slotId, retCode))); @@ -3100,349 +3172,18 @@ void SeqCheckRemoteReadAndRepairPage() } } -inline void invalid_msg_leak_warning(XLogRecPtr trxn_lsn) +bool exceed_send_lsn_forworder_interval() { - if (t_thrd.page_redo_cxt.invalid_msg.valid) { - ereport(WARNING, (errmsg(EXRTOFORMAT("[exrto_generate_snapshot] not send invalid msg: %08X/%08X"), - (uint32)(trxn_lsn >> UINT64_HALF), (uint32)trxn_lsn))); - } -} - -void exrto_generate_snapshot(XLogRecPtr trxn_lsn) -{ - if (!g_instance.attr.attr_storage.EnableHotStandby) { - return; - } - - ExrtoSnapshot exrto_snapshot = g_instance.comm_cxt.predo_cxt.exrto_snapshot; - /* - * do not generate the same snapshot repeatedly. - */ - if (XLByteLE(trxn_lsn, exrto_snapshot->read_lsn)) { - invalid_msg_leak_warning(trxn_lsn); - return; - } + TimestampTz last_time; + TimestampTz now_time; - TransactionId xmin; - TransactionId xmax; - CommitSeqNo snapshot_csn; - - exrto_get_snapshot_data(xmin, xmax, snapshot_csn); - (void)LWLockAcquire(ExrtoSnapshotLock, LW_EXCLUSIVE); - exrto_snapshot->snapshot_csn = snapshot_csn; - exrto_snapshot->xmin = xmin; - exrto_snapshot->xmax = xmax; - exrto_snapshot->read_lsn = trxn_lsn; - send_delay_invalid_message(); - LWLockRelease(ExrtoSnapshotLock); -} - -void exrto_read_snapshot(Snapshot snapshot) -{ - if ((!is_exrto_standby_read_worker()) || u_sess->proc_cxt.clientIsCMAgent || dummyStandbyMode) { - return; - } - - ExrtoSnapshot exrto_snapshot = g_instance.comm_cxt.predo_cxt.exrto_snapshot; - (void)LWLockAcquire(ExrtoSnapshotLock, LW_SHARED); - if (XLByteEQ(exrto_snapshot->read_lsn, 0)) { - LWLockRelease(ExrtoSnapshotLock); - ereport(ERROR, (errmsg("could not get a valid snapshot with extreme rto"))); - } - snapshot->snapshotcsn = exrto_snapshot->snapshot_csn; - snapshot->xmin = exrto_snapshot->xmin; - snapshot->xmax = exrto_snapshot->xmax; - snapshot->read_lsn = exrto_snapshot->read_lsn; - LWLockRelease(ExrtoSnapshotLock); - if (!TransactionIdIsValid(t_thrd.pgxact->xmin) || TransactionIdPrecedes(snapshot->xmin, t_thrd.pgxact->xmin)) { - t_thrd.pgxact->xmin = snapshot->xmin; - u_sess->utils_cxt.TransactionXmin = snapshot->xmin; - } - t_thrd.proc->exrto_read_lsn = exrto_snapshot->read_lsn; - if (t_thrd.proc->exrto_min == 0 || - XLByteLT(t_thrd.proc->exrto_min, t_thrd.proc->exrto_read_lsn)) { - t_thrd.proc->exrto_min = t_thrd.proc->exrto_read_lsn; - } - - if (t_thrd.proc->exrto_gen_snap_time == 0) { - t_thrd.proc->exrto_gen_snap_time = GetCurrentTimestamp(); - } - Assert(XLogRecPtrIsValid(t_thrd.proc->exrto_read_lsn)); -} - -static inline uint64 get_force_recycle_pos(uint64 recycle_pos, uint64 insert_pos) -{ - const double force_recyle_ratio = 0.3; /* to be adjusted */ - Assert(recycle_pos <= insert_pos); - return recycle_pos + (uint64)((insert_pos - recycle_pos) * force_recyle_ratio); -} - -XLogRecPtr calculate_force_recycle_lsn_per_worker(StandbyReadMetaInfo* meta_info) -{ - uint64 base_page_recycle_pos; - uint64 lsn_info_recycle_pos; - XLogRecPtr base_page_recycle_lsn = InvalidXLogRecPtr; - XLogRecPtr lsn_info_recycle_lsn = InvalidXLogRecPtr; - Buffer buffer; - Page page; - - /* for base page */ - if (meta_info->base_page_recyle_position < meta_info->base_page_next_position) { - base_page_recycle_pos = get_force_recycle_pos(meta_info->base_page_recyle_position, - meta_info->base_page_next_position); - buffer = extreme_rto_standby_read::buffer_read_base_page(meta_info->batch_id, meta_info->redo_id, - base_page_recycle_pos, RBM_NORMAL); - LockBuffer(buffer, BUFFER_LOCK_SHARE); - base_page_recycle_lsn = PageGetLSN(BufferGetPage(buffer)); - UnlockReleaseBuffer(buffer); - } - - /* for lsn info */ - if (meta_info->lsn_table_recyle_position < meta_info->lsn_table_next_position) { - lsn_info_recycle_pos = get_force_recycle_pos(meta_info->lsn_table_recyle_position, - meta_info->lsn_table_next_position); - page = extreme_rto_standby_read::get_lsn_info_page(meta_info->batch_id, meta_info->redo_id, - lsn_info_recycle_pos, RBM_NORMAL, &buffer); - if (unlikely(page == NULL || buffer == InvalidBuffer)) { - ereport(PANIC, - (errmsg(EXRTOFORMAT("get_lsn_info_page failed, batch_id: %u, redo_id: %u, pos: %lu"), - meta_info->batch_id, meta_info->redo_id, lsn_info_recycle_pos))); - } - LockBuffer(buffer, BUFFER_LOCK_SHARE); - extreme_rto_standby_read::LsnInfo lsn_info = - (extreme_rto_standby_read::LsnInfo)(page + extreme_rto_standby_read::LSN_INFO_HEAD_SIZE); - lsn_info_recycle_lsn = lsn_info->lsn[0]; - UnlockReleaseBuffer(buffer); - } - - return rtl::max(base_page_recycle_lsn, lsn_info_recycle_lsn); -} - -void calculate_force_recycle_lsn(XLogRecPtr &recycle_lsn) -{ - XLogRecPtr recycle_lsn_per_worker; - uint32 worker_nums = g_dispatcher->allWorkersCnt; - PageRedoWorker** workers = g_dispatcher->allWorkers; - - for (uint32 i = 0; i < worker_nums; ++i) { - PageRedoWorker* page_redo_worker = workers[i]; - if (page_redo_worker->role != REDO_PAGE_WORKER || (page_redo_worker->isUndoSpaceWorker)) { - continue; - } - recycle_lsn_per_worker = calculate_force_recycle_lsn_per_worker(&page_redo_worker->standby_read_meta_info); - if (XLByteLT(recycle_lsn, recycle_lsn_per_worker)) { - recycle_lsn = recycle_lsn_per_worker; - } - } - ereport(LOG, - (errmsg(EXRTOFORMAT("[exrto_recycle] try force recycle, recycle lsn: %08X/%08X"), - (uint32)(recycle_lsn >> UINT64_HALF), (uint32)recycle_lsn))); -} - -static inline bool exceed_standby_max_query_time(TimestampTz start_time) -{ - if (start_time == 0) { + last_time = g_instance.comm_cxt.predo_cxt.exrto_send_lsn_forworder_time; + now_time = GetCurrentTimestamp(); + if (!TimestampDifferenceExceeds(last_time, now_time, EXRTO_STANDBY_READ_TIME_INTERVAL)) { return false; } - return TimestampDifferenceExceeds(start_time, GetCurrentTimestamp(), - g_instance.attr.attr_storage.standby_max_query_time * MSECS_PER_SEC); -} - -/* 1. resolve recycle conflict with backends - * 2. get oldest xmin and oldest readlsn of backends. */ -void proc_array_get_oldeset_readlsn(XLogRecPtr recycle_lsn, XLogRecPtr &oldest_lsn, TransactionId &oldest_xmin, - bool &conflict) -{ - ProcArrayStruct* proc_array = g_instance.proc_array_idx; - conflict = false; - - LWLockAcquire(ProcArrayLock, LW_SHARED); - for (int index = 0; index < proc_array->numProcs; index++) { - int pg_proc_no = proc_array->pgprocnos[index]; - PGPROC* pg_proc = g_instance.proc_base_all_procs[pg_proc_no]; - PGXACT* pg_xact = &g_instance.proc_base_all_xacts[pg_proc_no]; - TransactionId pxmin = pg_xact->xmin; - XLogRecPtr read_lsn = pg_proc->exrto_min; - ereport( - DEBUG1, - (errmsg(EXRTOFORMAT("proc_array_get_oldeset_readlsn info, read_lsn: %08X/%08X ,xmin: %lu ,vacuum_flags: " - "%hhu ,pid: %lu"), - (uint32)(read_lsn >> UINT64_HALF), (uint32)read_lsn, pxmin, pg_xact->vacuumFlags, pg_proc->pid))); - - if (pg_proc->pid == 0 || XLogRecPtrIsInvalid(read_lsn)) { - continue; - } - - Assert(!(pg_xact->vacuumFlags & PROC_IN_VACUUM)); - /* - * Backend is doing logical decoding which manages xmin - * separately, check below. - */ - if (pg_xact->vacuumFlags & PROC_IN_LOGICAL_DECODING) { - continue; - } - - /* cancel query when its read_lsn < recycle_lsn or its runtime > standby_max_query_time */ - if (XLByteLT(read_lsn, recycle_lsn) || exceed_standby_max_query_time(pg_proc->exrto_gen_snap_time)) { - pg_proc->recoveryConflictPending = true; - conflict = true; - if (pg_proc->pid != 0) { - /* - * Kill the pid if it's still here. If not, that's what we - * wanted so ignore any errors. - */ - (void)SendProcSignal(pg_proc->pid, PROCSIG_RECOVERY_CONFLICT_SNAPSHOT, pg_proc->backendId); - ereport( - LOG, - (errmsg( - EXRTOFORMAT("read_lsn is less than recycle_lsn or query time exceed max_query_time while " - "get_oldeset_readlsn, read_lsn %lu, " - "recycle_lsn: %lu, exrto_gen_snap_time: %ld, current_time: %ld, thread id = %lu\n"), - read_lsn, recycle_lsn, pg_proc->exrto_gen_snap_time, GetCurrentTimestamp(), pg_proc->pid))); - /* - * Wait a little bit for it to die so that we avoid flooding - * an unresponsive backend when system is heavily loaded. - */ - pg_usleep(5000L); - } - continue; - } - - if (XLogRecPtrIsInvalid(oldest_lsn) || - (XLogRecPtrIsValid(read_lsn) && XLByteLT(read_lsn, oldest_lsn))) { - oldest_lsn = read_lsn; - } - - if (!TransactionIdIsValid(oldest_xmin) || - (TransactionIdIsValid(pxmin) && TransactionIdFollows(oldest_xmin, pxmin))) { - oldest_xmin = pxmin; - } - } - LWLockRelease(ProcArrayLock); -} - -void proc_array_get_oldeset_xmin_for_undo(TransactionId &oldest_xmin) -{ - ProcArrayStruct* proc_array = g_instance.proc_array_idx; - - LWLockAcquire(ProcArrayLock, LW_SHARED); - for (int index = 0; index < proc_array->numProcs; index++) { - int pg_proc_no = proc_array->pgprocnos[index]; - PGPROC* pg_proc = g_instance.proc_base_all_procs[pg_proc_no]; - PGXACT* pg_xact = &g_instance.proc_base_all_xacts[pg_proc_no]; - TransactionId pxmin = pg_xact->xmin; - - if (pg_proc->pid == 0 || !TransactionIdIsValid(pxmin)) { - continue; - } - - Assert(!(pg_xact->vacuumFlags & PROC_IN_VACUUM)); - /* - * Backend is doing logical decoding which manages xmin - * separately, check below. - */ - if (pg_xact->vacuumFlags & PROC_IN_LOGICAL_DECODING) { - continue; - } - if (!TransactionIdIsValid(oldest_xmin) || - (TransactionIdIsValid(pxmin) && TransactionIdFollows(oldest_xmin, pxmin))) { - oldest_xmin = pxmin; - } - } - LWLockRelease(ProcArrayLock); -} - -XLogRecPtr exrto_calculate_recycle_position(bool force_recyle) -{ - Assert(t_thrd.role != PAGEREDO); - Assert(IS_EXRTO_READ); - - XLogRecPtr recycle_lsn = g_instance.comm_cxt.predo_cxt.global_recycle_lsn; - XLogRecPtr oldest_lsn = InvalidXLogRecPtr; - TransactionId oldest_xmin = InvalidTransactionId; - bool conflict = false; - const int max_check_times = 1000; - int check_times = 0; - - if (force_recyle) { - calculate_force_recycle_lsn(recycle_lsn); - } - ereport(DEBUG1, (errmsg(EXRTOFORMAT("time information of calculate recycle position, current_time: %ld, snapshot " - "read_lsn: %08X/%08X, gen_snaptime:%ld"), - GetCurrentTimestamp(), - (uint32)(g_instance.comm_cxt.predo_cxt.exrto_snapshot->read_lsn >> UINT64_HALF), - (uint32)g_instance.comm_cxt.predo_cxt.exrto_snapshot->read_lsn, - g_instance.comm_cxt.predo_cxt.exrto_snapshot->gen_snap_time))); - - /* - * If there is no backend read threads, set read oldest lsn to snapshot lsn. - */ - ExrtoSnapshot exrto_snapshot = NULL; - exrto_snapshot = g_instance.comm_cxt.predo_cxt.exrto_snapshot; - (void)LWLockAcquire(ExrtoSnapshotLock, LW_SHARED); - if (XLByteEQ(exrto_snapshot->read_lsn, 0)) { - ereport(WARNING, (errmsg("could not get a valid snapshot with extreme rto"))); - } else { - oldest_lsn = exrto_snapshot->read_lsn; - oldest_xmin = exrto_snapshot->xmin; - } - LWLockRelease(ExrtoSnapshotLock); - /* Loop checks to avoid conflicting queries that were not successfully canceled. */ - do { - RedoInterruptCallBack(); - proc_array_get_oldeset_readlsn(recycle_lsn, oldest_lsn, oldest_xmin, conflict); - check_times++; - } while (conflict && check_times < max_check_times); - - recycle_lsn = rtl::max(recycle_lsn, oldest_lsn); - - ereport( - LOG, - (errmsg( - EXRTOFORMAT( - "[exrto_recycle] calculate recycle position, oldestlsn: %08X/%08X, snapshot read_lsn: %08X/%08X, try " - "recycle lsn: %08X/%08X"), - (uint32)(oldest_lsn >> UINT64_HALF), (uint32)oldest_lsn, - (uint32)(g_instance.comm_cxt.predo_cxt.exrto_snapshot->read_lsn >> UINT64_HALF), - (uint32)g_instance.comm_cxt.predo_cxt.exrto_snapshot->read_lsn, - (uint32)(recycle_lsn >> UINT64_HALF), (uint32)recycle_lsn))); - - return recycle_lsn; -} - -TransactionId exrto_calculate_recycle_xmin_for_undo() -{ - Assert(t_thrd.role != PAGEREDO); - Assert(IS_EXRTO_READ); - TransactionId oldest_xmin = InvalidTransactionId; - TransactionId snapshot_xmin = InvalidTransactionId; - proc_array_get_oldeset_xmin_for_undo(oldest_xmin); - - /* - * If there is no backend read threads, set read oldest lsn to snapshot lsn. - */ - if ((oldest_xmin == InvalidTransactionId) && (g_dispatcher != NULL)) { - ExrtoSnapshot exrto_snapshot = NULL; - exrto_snapshot = g_instance.comm_cxt.predo_cxt.exrto_snapshot; - (void)LWLockAcquire(ExrtoSnapshotLock, LW_SHARED); - if (XLByteEQ(exrto_snapshot->xmin, InvalidTransactionId)) { - ereport( - WARNING, - (errmsg("exrto_calculate_recycle_xmin_for_undo: could not get a valid snapshot in exrto_snapshot"))); - } else { - snapshot_xmin = exrto_snapshot->xmin; - } - - LWLockRelease(ExrtoSnapshotLock); - } - ereport(DEBUG1, (errmodule(MOD_UNDO), - errmsg(UNDOFORMAT("exrto_calculate_recycle_xmin_for_undo: oldest_xmin: %lu, snapshot_xmin: %lu."), - oldest_xmin, snapshot_xmin))); - - if (oldest_xmin == InvalidTransactionId) { - return snapshot_xmin; - } - return oldest_xmin; + g_instance.comm_cxt.predo_cxt.exrto_send_lsn_forworder_time = now_time; + return true; } } // namespace extreme_rto \ No newline at end of file diff --git a/src/gausskernel/storage/access/transam/parallel_recovery/dispatcher.cpp b/src/gausskernel/storage/access/transam/parallel_recovery/dispatcher.cpp index b8b574ace2b2cf8a89f18da110940cd7603c821a..8c1ae454856cbc2c60e8c2d84ec34a6eb483c166 100755 --- a/src/gausskernel/storage/access/transam/parallel_recovery/dispatcher.cpp +++ b/src/gausskernel/storage/access/transam/parallel_recovery/dispatcher.cpp @@ -75,6 +75,7 @@ #include "gssignal/gs_signal.h" #include "utils/atomic.h" #include "pgstat.h" +#include "access/xlogreader.h" #ifdef PGXC #include "pgxc/pgxc.h" @@ -327,9 +328,16 @@ void StartRecoveryWorkers(XLogRecPtr startLsn) g_dispatcher = CreateDispatcher(); g_dispatcher->oldCtx = MemoryContextSwitchTo(g_instance.comm_cxt.predo_cxt.parallelRedoCtx); g_dispatcher->txnWorker = StartTxnRedoWorker(); - if (g_dispatcher->txnWorker != NULL) + if (g_dispatcher->txnWorker != NULL) { + Assert(g_instance.comm_cxt.predo_cxt.buffer_pin_wait_buf_len == 0 || + g_instance.comm_cxt.predo_cxt.buffer_pin_wait_buf_len == get_real_recovery_parallelism()); + if (g_instance.comm_cxt.predo_cxt.buffer_pin_wait_buf_ids == NULL) { + g_instance.comm_cxt.predo_cxt.buffer_pin_wait_buf_ids = (int *)MemoryContextAllocZero( + INSTANCE_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), get_real_recovery_parallelism() * sizeof(int)); + g_instance.comm_cxt.predo_cxt.buffer_pin_wait_buf_len = get_real_recovery_parallelism(); + } StartPageRedoWorkers(get_real_recovery_parallelism()); - + } ereport(LOG, (errmodule(MOD_REDO), errcode(ERRCODE_LOG), errmsg("[PR]: max=%d, thrd=%d, workers=%u", g_instance.attr.attr_storage.max_recovery_parallelism, get_real_recovery_parallelism(), g_dispatcher->pageWorkerCount))); @@ -922,10 +930,13 @@ static void DispatchRecordWithPages(XLogReaderState *record, List *expectedTLIs, static bool DispatchHeapRecord(XLogReaderState *record, List *expectedTLIs, TimestampTz recordXTime) { - if (record->max_block_id >= 0) + if (unlikely((XLogRecGetInfo(record) & XLOG_HEAP_OPMASK) == XLOG_HEAP_INPLACE)) { + DispatchRecordWithoutPage(record, expectedTLIs); + } else if (record->max_block_id >= 0) { DispatchRecordWithPages(record, expectedTLIs, SUPPORT_FPAGE_DISPATCH); - else + } else { DispatchRecordWithoutPage(record, expectedTLIs); + } return false; } @@ -1872,7 +1883,7 @@ void SetStartupBufferPinWaitBufId(int bufid) for (uint32 i = 0; i < g_dispatcher->pageWorkerCount; i++) { PGPROC *proc = GetPageRedoWorkerProc(g_dispatcher->pageWorkers[i]); if (t_thrd.proc->pid == proc->pid) { - g_dispatcher->pageWorkers[i]->bufferPinWaitBufId = bufid; + g_instance.comm_cxt.predo_cxt.buffer_pin_wait_buf_ids[i] = bufid; break; } } @@ -1883,7 +1894,7 @@ uint32 GetStartupBufferPinWaitBufLen() { uint32 buf_len = 1; if ((get_real_recovery_parallelism() > 1) && (GetPageWorkerCount() > 0)) { - buf_len += g_dispatcher->pageWorkerCount; + buf_len += g_instance.comm_cxt.predo_cxt.buffer_pin_wait_buf_len; } return buf_len; } @@ -1896,7 +1907,7 @@ void GetStartupBufferPinWaitBufId(int *bufids, uint32 len) { if (g_dispatcher != NULL) { for (uint32 i = 0; i < len - 1; i++) { - bufids[i] = g_dispatcher->pageWorkers[i]->bufferPinWaitBufId; + bufids[i] = g_instance.comm_cxt.predo_cxt.buffer_pin_wait_buf_ids[i]; } bufids[len - 1] = g_instance.proc_base->startupBufferPinWaitBufId; } diff --git a/src/gausskernel/storage/access/transam/parallel_recovery/page_redo.cpp b/src/gausskernel/storage/access/transam/parallel_recovery/page_redo.cpp index f2b17460aa7eaff1f9455c991b66cf9172264ab8..a7981319c7437de38f2c541785b8caa9e889e0cf 100755 --- a/src/gausskernel/storage/access/transam/parallel_recovery/page_redo.cpp +++ b/src/gausskernel/storage/access/transam/parallel_recovery/page_redo.cpp @@ -190,7 +190,6 @@ static PageRedoWorker *CreateWorker(uint32 id) worker->statWaitReach = 0; worker->statWaitReplay = 0; worker->oldCtx = NULL; - worker->bufferPinWaitBufId = -1; worker->remoteReadPageNum = 0; worker->badPageHashTbl = BadBlockHashTblCreate(); diff --git a/src/gausskernel/storage/access/transam/parallel_recovery/spsc_blocking_queue.cpp b/src/gausskernel/storage/access/transam/parallel_recovery/spsc_blocking_queue.cpp index b63bf9efd2a6528eeb861b2fcd0a323f72ec04ab..9ccfa760ac49af5fe704e534b083a867e068a10c 100644 --- a/src/gausskernel/storage/access/transam/parallel_recovery/spsc_blocking_queue.cpp +++ b/src/gausskernel/storage/access/transam/parallel_recovery/spsc_blocking_queue.cpp @@ -183,10 +183,9 @@ void *SPSCBlockingQueueTop(SPSCBlockingQueue *queue) else sleeptime = MAX_REDO_QUE_TAKE_DELAY; pg_usleep(sleeptime); - } - - if (queue->callBackFunc != NULL) { - queue->callBackFunc(); + if (queue->callBackFunc != NULL) { + queue->callBackFunc(); + } } } while (COUNT(head, tail, queue->mask) == 0); t_thrd.page_redo_cxt.sleep_long = false; diff --git a/src/gausskernel/storage/access/transam/slru.cpp b/src/gausskernel/storage/access/transam/slru.cpp index 6d433ec07a89caccef0a45740922090c1b72b3f2..63e5a94b98b9198092004f432e3243138fd63f54 100644 --- a/src/gausskernel/storage/access/transam/slru.cpp +++ b/src/gausskernel/storage/access/transam/slru.cpp @@ -941,8 +941,9 @@ static void SlruReportIOError(SlruCtl ctl, int64 pageno, TransactionId xid) switch (t_thrd.xact_cxt.slru_errcause) { case SLRU_OPEN_FAILED: ereport(ERROR, (errmodule(MOD_SLRU), errcode_for_file_access(), - errmsg("could not access status of transaction %lu , nextXid is %lu ", xid, - t_thrd.xact_cxt.ShmemVariableCache->nextXid), + errmsg("could not access status of transaction %lu , nextXid is %lu, pageno %ld, " + "t_thrd.pgxact->xmin %lu", + xid, t_thrd.xact_cxt.ShmemVariableCache->nextXid, pageno, t_thrd.pgxact->xmin), errdetail("Could not open file \"%s\": %m.", path))); break; case SLRU_SEEK_FAILED: diff --git a/src/gausskernel/storage/access/transam/transam.cpp b/src/gausskernel/storage/access/transam/transam.cpp index 2a1167abec9fdefd390fe6fbccfdd0388c3fd811..103888be6d3208411fea25ac616aaf56247f855f 100644 --- a/src/gausskernel/storage/access/transam/transam.cpp +++ b/src/gausskernel/storage/access/transam/transam.cpp @@ -464,7 +464,8 @@ bool UHeapTransactionIdDidCommit(TransactionId transactionId) return true; } if (TransactionIdIsNormal(transactionId) && - TransactionIdPrecedes(transactionId, pg_atomic_read_u64(&g_instance.undo_cxt.globalRecycleXid))) { + TransactionIdPrecedes(transactionId, pg_atomic_read_u64(&g_instance.undo_cxt.globalRecycleXid)) && + !RecoveryInProgress()) { Assert(TransactionIdDidCommit(transactionId)); return true; } diff --git a/src/gausskernel/storage/access/transam/twophase_rmgr.cpp b/src/gausskernel/storage/access/transam/twophase_rmgr.cpp index e0e72b13019f8058185bef42a22e21ee9aecc0d4..9e8702a485cef7945f05ac79ea337b19aa38ac84 100644 --- a/src/gausskernel/storage/access/transam/twophase_rmgr.cpp +++ b/src/gausskernel/storage/access/transam/twophase_rmgr.cpp @@ -45,11 +45,3 @@ const TwoPhaseCallback g_twophase_postabort_callbacks[TWOPHASE_RM_MAX_ID + 1] = multixact_twophase_postabort, /* MultiXact */ NULL /* PredicateLock */ }; - -const TwoPhaseCallback g_twophase_standby_recover_callbacks[TWOPHASE_RM_MAX_ID + 1] = { - NULL, /* END ID */ - lock_twophase_standby_recover, /* Lock */ - NULL, /* pgstat */ - NULL, /* MultiXact */ - NULL /* PredicateLock */ -}; diff --git a/src/gausskernel/storage/access/transam/xact.cpp b/src/gausskernel/storage/access/transam/xact.cpp index a6ff43cd4007986b1b8994a61f7613809caa57ee..3e21f1bd2bc868d74e466c515262d76e1d7a3d95 100755 --- a/src/gausskernel/storage/access/transam/xact.cpp +++ b/src/gausskernel/storage/access/transam/xact.cpp @@ -7175,7 +7175,7 @@ void push_unlink_rel_to_hashtbl(ColFileNode *xnodes, int nrels) /* * XLOG support routines */ -void unlink_relfiles(_in_ ColFileNode *xnodes, _in_ int nrels) +void unlink_relfiles(_in_ ColFileNode *xnodes, _in_ int nrels, bool is_old_delay_ddl) { ColMainFileNodesCreate(); @@ -7212,7 +7212,7 @@ void unlink_relfiles(_in_ ColFileNode *xnodes, _in_ int nrels) /* * recycle exrto files when dropping table occurs. */ - if (RecoveryInProgress() && IS_EXRTO_READ) { + if (!is_old_delay_ddl && RecoveryInProgress() && IS_EXRTO_READ) { RelFileNode block_meta_file = relFileNode; block_meta_file.spcNode = EXRTO_BLOCK_INFO_SPACE_OID; extreme_rto_standby_read::remove_one_block_info_file(block_meta_file); diff --git a/src/gausskernel/storage/access/transam/xlog.cpp b/src/gausskernel/storage/access/transam/xlog.cpp index 30d497a054f535b1034abdd16391f993cf8d6dc6..17efd733ea910251eb9dfce8af029080b2475246 100755 --- a/src/gausskernel/storage/access/transam/xlog.cpp +++ b/src/gausskernel/storage/access/transam/xlog.cpp @@ -134,6 +134,7 @@ #include #include #include +#include "access/slru.h" #ifdef ENABLE_MOT #include "storage/mot/mot_fdw.h" @@ -8834,6 +8835,69 @@ void init_extreme_rto_standby_read_first_snapshot(const XLogRecPtr checkpoint_lo } } +static bool hex_string_to_int(char* hex_string, uint32* result) +{ + uint32 num = 0; + char* temp = hex_string; + uint32 c = 0; + uint32 index = 0; + + if (NULL == hex_string) { + return false; + } + + while (*temp++ != '\0') { + num++; + } + + while (num--) { + if (hex_string[num] >= 'A' && hex_string[num] <= 'F') { + c = (uint32)((hex_string[num] - 'A') + 10); + } else if (hex_string[num] >= '0' && hex_string[num] <= '9') { + c = (uint32)(hex_string[num] - '0'); + } else { + return false; + } + + *result += c << (index * 4); + index++; + } + + return true; +} + +static inline void set_hot_standby_recycle_xid() +{ + DIR *dir = NULL; + struct dirent *ptr = NULL; + uint32 segnum = 0; + char *dir_name = "pg_clog"; + + if ((dir = opendir(dir_name)) == NULL) { + return; + } + // find the first clog file + while ((ptr = readdir(dir)) != NULL) { + if (ptr->d_type != DT_REG) { + continue; + } + if (!hex_string_to_int(ptr->d_name, &segnum)) { + closedir(dir); + return; + } + /* one segment file has 8k*8bit/2*32 xids */ + uint32 segnum_xid = BLCKSZ * CLOG_XACTS_PER_BYTE * SLRU_PAGES_PER_SEGMENT; + /* the first xid number of current segment file */ + TransactionId xid = (uint64)segnum * segnum_xid; + pg_atomic_write_u64(&g_instance.undo_cxt.hotStandbyRecycleXid, xid); + ereport(LOG, (errmsg("Startup: write hotStandbyRecycleXid %lu", xid))); + + closedir(dir); + return; + } + closedir(dir); +} + /* * This must be called ONCE during postmaster or standalone-backend startup */ @@ -9520,6 +9584,7 @@ void StartupXLOG(void) } else { pg_atomic_write_u64(&g_instance.undo_cxt.globalRecycleXid, InvalidTransactionId); } + set_hot_standby_recycle_xid(); /* * Initialize replication slots, before there's a chance to remove @@ -9732,6 +9797,9 @@ void StartupXLOG(void) "have to use another backup for recovery."))); } t_thrd.shemem_ptr_cxt.ControlFile->backupEndPoint = t_thrd.shemem_ptr_cxt.ControlFile->minRecoveryPoint; + ereport(LOG, (errmsg("backup_from_standby: set backup end point to %X/%X", + (uint32)(t_thrd.shemem_ptr_cxt.ControlFile->backupEndPoint >> 32), + (uint32)t_thrd.shemem_ptr_cxt.ControlFile->backupEndPoint))); } else if (backupFromRoach) { t_thrd.shemem_ptr_cxt.ControlFile->backupEndPoint = t_thrd.shemem_ptr_cxt.ControlFile->minRecoveryPoint; ereport(LOG, (errmsg("perform roach backup restore and set backup end point to %X/%X", @@ -9911,13 +9979,14 @@ void StartupXLOG(void) * redo LSN and future consistent point. */ ereport(LOG, (errmsg("redo minRecoveryPoint at %X/%X; backupStartPoint at %X/%X; " - "backupEndRequired %s", + "backupEndPoint at %X/%X;backupEndRequired %s", (uint32)(t_thrd.xlog_cxt.minRecoveryPoint >> 32), (uint32)t_thrd.xlog_cxt.minRecoveryPoint, (uint32)(t_thrd.shemem_ptr_cxt.ControlFile->backupStartPoint >> 32), (uint32)t_thrd.shemem_ptr_cxt.ControlFile->backupStartPoint, + (uint32)(t_thrd.shemem_ptr_cxt.ControlFile->backupEndPoint >> 32), + (uint32)t_thrd.shemem_ptr_cxt.ControlFile->backupEndPoint, t_thrd.shemem_ptr_cxt.ControlFile->backupEndRequired ? "TRUE" : "FALSE"))); - init_extreme_rto_standby_read_first_snapshot(checkPoint.redo); pg_atomic_write_u32(&t_thrd.walreceiverfuncs_cxt.WalRcv->rcvDoneFromShareStorage, false); // Allow read-only connections immediately if we're consistent already. @@ -10904,7 +10973,7 @@ void ArchiveXlogForForceFinishRedo(XLogReaderState *xlogreader, TermFileData *te void backup_cut_xlog_file(XLogRecPtr lastReplayedEndRecPtr) { errno_t errorno = EOK; - ereport(DEBUG1, (errmsg("end of backup reached"))); + ereport(LOG, (errmsg("end of backup reached"))); LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); @@ -13210,7 +13279,8 @@ static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo, XLogRecPtr curIns } } - if (RecoveryInProgress() && IS_EXRTO_READ) { + if (RecoveryInProgress() && IS_EXRTO_READ && !dummyStandbyMode && + !g_instance.attr.attr_storage.enable_exrto_standby_read_opt) { XLogRecPtr recycle_recptr = pg_atomic_read_u64(&g_instance.comm_cxt.predo_cxt.global_recycle_lsn); XLogSegNo recyle_segno; XLByteToSeg(recycle_recptr, recyle_segno); @@ -13723,7 +13793,8 @@ void xlog_redo(XLogReaderState *record) rc = memcpy_s(&startpoint, sizeof(startpoint), XLogRecGetData(record), sizeof(startpoint)); securec_check(rc, "", ""); - if (XLByteEQ(t_thrd.shemem_ptr_cxt.ControlFile->backupStartPoint, startpoint)) { + if (XLByteEQ(t_thrd.shemem_ptr_cxt.ControlFile->backupStartPoint, startpoint) && + t_thrd.shemem_ptr_cxt.ControlFile->backupEndRequired) { /* * We have reached the end of base backup, the point where * pg_stop_backup() was done. The data on disk is now consistent. @@ -13731,7 +13802,7 @@ void xlog_redo(XLogReaderState *record) * sure we don't allow starting up at an earlier point even if * recovery is stopped and restarted soon after this. */ - ereport(DEBUG1, (errmsg("end of backup reached"))); + ereport(LOG, (errmsg("end of backup reached"))); LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); diff --git a/src/gausskernel/storage/access/ubtree/ubtpage.cpp b/src/gausskernel/storage/access/ubtree/ubtpage.cpp index f28c2b850fa48e53ed353a31bb8bc9ea434106f0..55b6a8a38d6e8442cbf3e3c7f1b019fb49668698 100644 --- a/src/gausskernel/storage/access/ubtree/ubtpage.cpp +++ b/src/gausskernel/storage/access/ubtree/ubtpage.cpp @@ -25,6 +25,7 @@ #include "knl/knl_variable.h" #include "access/hio.h" +#include "access/multi_redo_api.h" #include "access/nbtree.h" #include "access/ubtree.h" #include "access/transam.h" @@ -165,7 +166,8 @@ Buffer UBTreeGetRoot(Relation rel, int access) valid = PinBuffer(buf, NULL); if (valid) { LockBuffer(rootbuf, BT_READ); - isRootCacheValid = RelFileNodeEquals(buf->tag.rnode, rel->rd_node) && (buf->tag.blockNum == rootblkno); + isRootCacheValid = (!IS_EXRTO_STANDBY_READ) && RelFileNodeEquals(buf->tag.rnode, rel->rd_node) && + (buf->tag.blockNum == rootblkno); if (!isRootCacheValid) UnlockReleaseBuffer(rootbuf); } else { diff --git a/src/gausskernel/storage/access/ubtree/ubtsearch.cpp b/src/gausskernel/storage/access/ubtree/ubtsearch.cpp index 78d901461ad25fb509ffcfcc14a35fc999803ac8..22893d32f8255b5f9522c5eaab787bf1e5c8a385 100644 --- a/src/gausskernel/storage/access/ubtree/ubtsearch.cpp +++ b/src/gausskernel/storage/access/ubtree/ubtsearch.cpp @@ -148,7 +148,7 @@ BTStack UBTreeSearch(Relation rel, BTScanInsert key, Buffer *bufP, int access, b ExecuteUstoreVerify(USTORE_VERIFY_MOD_UBTREE, (char *) &verifyParams); } /* drop the read lock on the parent page, acquire one on the child */ - *bufP = _bt_relandgetbuf(rel, *bufP, blkno, pageAccess); + *bufP = _bt_relandgetbuf(rel, *bufP, blkno, pageAccess, par_blkno); /* okay, all set to move down a level */ stack_in = new_stack; diff --git a/src/gausskernel/storage/access/ubtree/ubtutils.cpp b/src/gausskernel/storage/access/ubtree/ubtutils.cpp index 8fbf0c786e1808d24ce060ff40eb4b35afbb9416..505401eb42a961ce39634f9d267f60e1ca7a467e 100644 --- a/src/gausskernel/storage/access/ubtree/ubtutils.cpp +++ b/src/gausskernel/storage/access/ubtree/ubtutils.cpp @@ -481,6 +481,10 @@ bool UBTreeItupGetXminXmax(Page page, OffsetNumber offnum, TransactionId oldest_ if (!TransactionIdIsValid(oldest_xmin)) { oldest_xmin = u_sess->utils_cxt.RecentGlobalDataXmin; } + /* we can't do bypass in hotstandby read mode, or there will be different between index scan and seq scan */ + if (RecoveryInProgress()) { + oldest_xmin = InvalidTransactionId; + } if (!TransactionIdIsValid(*xmin)) { isDead = true; diff --git a/src/gausskernel/storage/access/ustore/knl_uextremeredo.cpp b/src/gausskernel/storage/access/ustore/knl_uextremeredo.cpp index 5797e42a4ee17dfd0d9f1798edab31cae3a17ac3..e402ed226c5228aeb6b68e3b933e6490d6efddee 100644 --- a/src/gausskernel/storage/access/ustore/knl_uextremeredo.cpp +++ b/src/gausskernel/storage/access/ustore/knl_uextremeredo.cpp @@ -201,7 +201,7 @@ static XLogRecParseState *UHeapXlogFreezeTDParseBlock(XLogReaderState *record, u if (recordstatehead == NULL) { return NULL; } - XLogRecSetBlockDataState(record, UHEAP_FREEZE_TD_ORIG_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, UHEAP_FREEZE_TD_ORIG_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); return recordstatehead; } @@ -213,7 +213,7 @@ static XLogRecParseState *UHeapXlogInvalidTDParseBlock(XLogReaderState *record, if (recordstatehead == NULL) { return NULL; } - XLogRecSetBlockDataState(record, UHEAP_INVALID_TD_ORIG_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, UHEAP_INVALID_TD_ORIG_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); return recordstatehead; } @@ -225,7 +225,7 @@ static XLogRecParseState *UHeapXlogCleanParseBlock(XLogReaderState *record, uint if (recordstatehead == NULL) { return NULL; } - XLogRecSetBlockDataState(record, UHEAP_CLEAN_ORIG_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, UHEAP_CLEAN_ORIG_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); return recordstatehead; } @@ -276,7 +276,7 @@ static XLogRecParseState *UHeap2XlogBaseShiftParseBlock(XLogReaderState *record, if (recordstatehead == NULL) { return NULL; } - XLogRecSetBlockDataState(record, UHEAP2_ORIG_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, UHEAP2_ORIG_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); return recordstatehead; } @@ -288,7 +288,7 @@ static XLogRecParseState *UHeap2XlogFreezeParseBlock(XLogReaderState *record, ui if (recordstatehead == NULL) { return NULL; } - XLogRecSetBlockDataState(record, UHEAP2_ORIG_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, UHEAP2_ORIG_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); return recordstatehead; } @@ -404,7 +404,7 @@ static XLogRecParseState *UHeapXlogUheapUndoResetSlotParseBlock(XLogReaderState if (recordstatehead == NULL) { return NULL; } - XLogRecSetBlockDataState(record, UHEAP_UNDOACTION_ORIG_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, UHEAP_UNDOACTION_ORIG_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); return recordstatehead; } @@ -416,7 +416,7 @@ static XLogRecParseState *UHeapXlogUheapUndoPageParseBlock(XLogReaderState *reco if (recordstatehead == NULL) { return NULL; } - XLogRecSetBlockDataState(record, UHEAP_UNDOACTION_ORIG_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, UHEAP_UNDOACTION_ORIG_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); return recordstatehead; } @@ -428,7 +428,7 @@ static XLogRecParseState *UHeapXlogUheapUndoAbortSpecInsertParseBlock(XLogReader if (recordstatehead == NULL) { return NULL; } - XLogRecSetBlockDataState(record, UHEAP_UNDOACTION_ORIG_BLOCK_NUM, recordstatehead); + XLogRecSetBlockDataState(record, UHEAP_UNDOACTION_ORIG_BLOCK_NUM, recordstatehead, BLOCK_DATA_MAIN_DATA_TYPE, true); return recordstatehead; } diff --git a/src/gausskernel/storage/access/ustore/knl_uundorecord.cpp b/src/gausskernel/storage/access/ustore/knl_uundorecord.cpp index ad1ad7ddeb3af96753823a12effb8080d3069ef8..db419459cdaa07d8eb00277eac27904a583b5efa 100644 --- a/src/gausskernel/storage/access/ustore/knl_uundorecord.cpp +++ b/src/gausskernel/storage/access/ustore/knl_uundorecord.cpp @@ -546,7 +546,8 @@ UndoTraversalState FetchUndoRecord(__inout UndoRecord *urec, _in_ SatisfyUndoRec return UNDO_TRAVERSAL_ABORT; } - if (isNeedBypass && TransactionIdPrecedes(urec->Xid(), g_instance.undo_cxt.globalFrozenXid)) { + if (isNeedBypass && TransactionIdPrecedes(urec->Xid(), g_instance.undo_cxt.globalFrozenXid) && + !RecoveryInProgress()) { ereport(DEBUG1, (errmsg(UNDOFORMAT("Check visibility by globalFrozenXid")))); return UNDO_TRAVERSAL_STOP; } diff --git a/src/gausskernel/storage/access/ustore/knl_uvisibility.cpp b/src/gausskernel/storage/access/ustore/knl_uvisibility.cpp index a12abb556305f0e0218be80f70230d2e14901d83..0f01a383471164da10786d21a4a69dffbfc5a585 100644 --- a/src/gausskernel/storage/access/ustore/knl_uvisibility.cpp +++ b/src/gausskernel/storage/access/ustore/knl_uvisibility.cpp @@ -249,9 +249,9 @@ bool UHeapTupleSatisfiesVisibility(UHeapTuple uhtup, Snapshot snapshot, Buffer b } uint64 globalFrozenXid = isFlashBack ? pg_atomic_read_u64(&g_instance.undo_cxt.globalRecycleXid) : pg_atomic_read_u64(&g_instance.undo_cxt.globalFrozenXid); - if (pm_state_is_hot_standby()) { + if (RecoveryInProgress()) { /* in hot standby mode, if globalRecycleXid advance during query, it may cause data inconsistency */ - globalFrozenXid = 0; + globalFrozenXid = pg_atomic_read_u64(&g_instance.undo_cxt.hotStandbyRecycleXid); } if (TransactionIdIsValid(tdinfo.xid) && TransactionIdPrecedes(tdinfo.xid, globalFrozenXid)) { /* The slot is old enough that we can treat it as frozen. */ @@ -817,9 +817,9 @@ bool UHeapTupleFetch(Relation rel, Buffer buffer, OffsetNumber offnum, Snapshot uint64 oldestRecycleXidHavingUndo = pg_atomic_read_u64(&g_instance.undo_cxt.globalRecycleXid); uint64 oldestXidHavingUndo = (isFlashBack || isLogical) ? oldestRecycleXidHavingUndo : pg_atomic_read_u64(&g_instance.undo_cxt.globalFrozenXid); - if (pm_state_is_hot_standby()) { + if (RecoveryInProgress()) { /* in hot standby mode, if globalRecycleXid advance during query, it may cause data inconsistency */ - oldestXidHavingUndo = 0; + oldestXidHavingUndo = pg_atomic_read_u64(&g_instance.undo_cxt.hotStandbyRecycleXid); } if (TransactionIdIsValid(tdinfo.xid) && TransactionIdPrecedes(tdinfo.xid, oldestXidHavingUndo)) { if (TransactionIdOlderThanAllUndo(tdinfo.xid)) { diff --git a/src/gausskernel/storage/access/ustore/undo/knl_uundorecycle.cpp b/src/gausskernel/storage/access/ustore/undo/knl_uundorecycle.cpp index b6201db3eae21792e4d34e2d92c1f5c497fd55e8..e2d5e33d9d72fc00005af921cd62998599a10238 100755 --- a/src/gausskernel/storage/access/ustore/undo/knl_uundorecycle.cpp +++ b/src/gausskernel/storage/access/ustore/undo/knl_uundorecycle.cpp @@ -44,6 +44,7 @@ #include "utils/postinit.h" #include "utils/gs_bitmap.h" #include "pgstat.h" +#include "access/ustore/knl_uvisibility.h" #define TRANS_PARTITION_LINEAR_SPARE_TIME(degree) \ (degree > 3000 ? 3000 : degree) @@ -561,7 +562,20 @@ void exrto_standby_release_space(UndoZone *zone, TransactionId recycle_xid, Undo zone->ReleaseSpace(start_undo_ptr, end_undo_ptr, &g_forceRecycleSize); zone->ReleaseSlotSpace(0, recycle_exrto, &g_forceRecycleSize); } - + +bool is_undo_slot_exist(UndoSlotPtr slot_ptr) +{ + bool ret = false; + RelFileNode rnode; + UNDO_PTR_ASSIGN_REL_FILE_NODE(rnode, slot_ptr, UNDO_SLOT_DB_OID); + SMgrRelation reln = smgropen(rnode, InvalidBackendId); + if (smgrexists(reln, UNDO_FORKNUM, (BlockNumber)UNDO_PTR_GET_BLOCK_NUM(slot_ptr))) { + ret = true; + } + smgrclose(reln); + return ret; +} + bool exrto_standby_recycle_space(UndoZone *zone, TransactionId recycle_xmin) { UndoSlotPtr recycle_exrto = zone->get_recycle_tslot_ptr_exrto(); @@ -578,6 +592,23 @@ bool exrto_standby_recycle_space(UndoZone *zone, TransactionId recycle_xmin) zone->GetZoneId(), recycle_xmin, recycle_exrto, recycle_primary))); while (recycle_exrto < recycle_primary) { + uint64 start_segno = (uint)((UNDO_PTR_GET_OFFSET(recycle_exrto)) / UNDO_META_SEGMENT_SIZE); + uint64 end_segno = (uint)((UNDO_PTR_GET_OFFSET(recycle_exrto)) / UNDO_META_SEGMENT_SIZE + 1); + if (!is_undo_slot_exist(recycle_exrto)) { + zone->ForgetUndoBuffer( + start_segno * UNDO_META_SEGMENT_SIZE, end_segno * UNDO_META_SEGMENT_SIZE, UNDO_DB_OID); + ereport(WARNING, + (errmodule(MOD_UNDO), + errmsg(UNDOFORMAT("exrto_standby_recycle_space zone_id:%d, recycle_xmin:%lu, recycle_exrto:%lu, " + "recycle_primary:%lu, undo slot not exist."), + zone->GetZoneId(), + recycle_xmin, + recycle_exrto, + recycle_primary))); + recycle_exrto = GetNextSlotPtr(recycle_exrto); + continue; + } + UndoSlotBuffer& slot_buf = g_slotBufferCache->FetchTransactionBuffer(recycle_exrto); UndoRecPtr start_undo_ptr = INVALID_UNDO_REC_PTR; start = recycle_exrto; @@ -631,7 +662,7 @@ bool exrto_standby_recycle_undo_zone() if (g_instance.undo_cxt.uZoneCount == 0 || g_instance.undo_cxt.uZones == NULL) { return recycled; } - TransactionId recycle_xmin = extreme_rto::exrto_calculate_recycle_xmin_for_undo(); + TransactionId recycle_xmin = exrto_calculate_recycle_xmin_for_undo(); for (idx = 0; idx < PERSIST_ZONE_COUNT && !t_thrd.undorecycler_cxt.shutdown_requested; idx++) { UndoZone *zone = (UndoZone *)g_instance.undo_cxt.uZones[idx]; if (zone == NULL) { @@ -809,6 +840,7 @@ void UndoRecycleMain() ereport(LOG, (errmodule(MOD_UNDO), errmsg(UNDOFORMAT("sleep 10s, ensure the snapcapturer can give the undorecyclemain a valid recycleXmin.")))); exrto_recycle_residual_undo_file("recycle_main"); + t_thrd.undorecycler_cxt.is_recovery_in_progress = RecoveryInProgress(); while (true) { if (t_thrd.undorecycler_cxt.got_SIGHUP) { t_thrd.undorecycler_cxt.got_SIGHUP = false; @@ -817,7 +849,14 @@ void UndoRecycleMain() if (t_thrd.undorecycler_cxt.shutdown_requested) { ShutDownRecycle(recycleMaxXIDs); } - if (!RecoveryInProgress()) { + bool is_in_progress = RecoveryInProgress(); + if (is_in_progress != t_thrd.undorecycler_cxt.is_recovery_in_progress) { + ereport(LOG, (errmodule(MOD_UNDO), + errmsg(UNDOFORMAT("recycle_main: stop undo recycler because recovery_in_progress change " + "from %u to %u."), t_thrd.undorecycler_cxt.is_recovery_in_progress, is_in_progress))); + ShutDownRecycle(recycleMaxXIDs); + } + if (!t_thrd.undorecycler_cxt.is_recovery_in_progress) { TransactionId recycleXmin = InvalidTransactionId; TransactionId oldestXmin = GetOldestXminForUndo(&recycleXmin); if (!TransactionIdIsValid(recycleXmin) || diff --git a/src/gausskernel/storage/access/ustore/undo/knl_uundospace.cpp b/src/gausskernel/storage/access/ustore/undo/knl_uundospace.cpp index 3d60a8a4d75a315459e3563215173df61eb59e74..49f796f75bb74f8a5903d91008d0496ab4c7d567 100644 --- a/src/gausskernel/storage/access/ustore/undo/knl_uundospace.cpp +++ b/src/gausskernel/storage/access/ustore/undo/knl_uundospace.cpp @@ -114,7 +114,7 @@ void UndoSpace::UnlinkUndoLog(int zid, UndoLogOffset offset, uint32 dbId) RelFileNode rnode; UndoLogOffset head; UndoLogOffset old_head; - if (IS_EXRTO_STANDBY_READ) { + if (t_thrd.undorecycler_cxt.is_recovery_in_progress) { head = head_exrto; old_head = head_exrto; set_head_exrto(offset); diff --git a/src/gausskernel/storage/access/ustore/undo/knl_uundozone.cpp b/src/gausskernel/storage/access/ustore/undo/knl_uundozone.cpp index 6ae2a8700547f2c94f6163c98935d472ee828982..2408ff1a1fd839cdd3cc79432aaa7728906b6873 100644 --- a/src/gausskernel/storage/access/ustore/undo/knl_uundozone.cpp +++ b/src/gausskernel/storage/access/ustore/undo/knl_uundozone.cpp @@ -280,7 +280,7 @@ void UndoZone::ReleaseSpace(UndoRecPtr starturp, UndoRecPtr endurp, int *forceRe UndoLogOffset end = UNDO_PTR_GET_OFFSET(endurp); int startSegno; UndoLogOffset head; - if (IS_EXRTO_STANDBY_READ) { + if (t_thrd.undorecycler_cxt.is_recovery_in_progress) { head = undoSpace_.Head_exrto(); } else { head = undoSpace_.Head(); @@ -297,7 +297,7 @@ void UndoZone::ReleaseSpace(UndoRecPtr starturp, UndoRecPtr endurp, int *forceRe UndoRecPtr prevHead = MAKE_UNDO_PTR(zid_, head); undoSpace_.UnlinkUndoLog(zid_, endSegno * UNDO_LOG_SEGMENT_SIZE, UNDO_DB_OID); Assert(undoSpace_.Head() <= insertURecPtr_); - if (pLevel_ == UNDO_PERMANENT && (!IS_EXRTO_STANDBY_READ)) { + if (pLevel_ == UNDO_PERMANENT && (!t_thrd.undorecycler_cxt.is_recovery_in_progress)) { START_CRIT_SECTION(); undoSpace_.MarkDirty(); XlogUndoUnlink undoUnlink; @@ -340,7 +340,7 @@ void UndoZone::ReleaseSlotSpace(UndoRecPtr startSlotPtr, UndoRecPtr endSlotPtr, { UndoLogOffset end = UNDO_PTR_GET_OFFSET(endSlotPtr); UndoLogOffset head; - if (IS_EXRTO_STANDBY_READ) { + if (t_thrd.undorecycler_cxt.is_recovery_in_progress) { head = slotSpace_.Head_exrto(); } else { head = slotSpace_.Head(); @@ -357,7 +357,7 @@ void UndoZone::ReleaseSlotSpace(UndoRecPtr startSlotPtr, UndoRecPtr endSlotPtr, UndoRecPtr prevHead = MAKE_UNDO_PTR(zid_, head); slotSpace_.UnlinkUndoLog(zid_, endSegno * UNDO_META_SEGMENT_SIZE, UNDO_SLOT_DB_OID); Assert(slotSpace_.Head() <= allocateTSlotPtr_); - if (pLevel_ == UNDO_PERMANENT && !(IS_EXRTO_STANDBY_READ)) { + if (pLevel_ == UNDO_PERMANENT && !(t_thrd.undorecycler_cxt.is_recovery_in_progress)) { START_CRIT_SECTION(); slotSpace_.MarkDirty(); XlogUndoUnlink undoUnlink; diff --git a/src/gausskernel/storage/buffer/bufmgr.cpp b/src/gausskernel/storage/buffer/bufmgr.cpp index 65601d55a184356d42804f3705b18987655115d9..27375bc529d924865c481cd999c29490b29d6e8b 100644 --- a/src/gausskernel/storage/buffer/bufmgr.cpp +++ b/src/gausskernel/storage/buffer/bufmgr.cpp @@ -6197,13 +6197,11 @@ bool HoldingBufferPinThatDelaysRecovery(void) if (IS_EXRTO_READ) { return false; } - SpinLockAcquire(&(g_instance.comm_cxt.predo_cxt.destroy_lock)); int bufids[MAX_RECOVERY_THREAD_NUM + 1]; errno_t rc = memset_s(bufids, sizeof(bufids), -1, sizeof(bufids)); securec_check(rc, "\0", "\0"); uint32 bufLen = parallel_recovery::GetStartupBufferPinWaitBufLen(); parallel_recovery::GetStartupBufferPinWaitBufId(bufids, bufLen); - SpinLockRelease(&(g_instance.comm_cxt.predo_cxt.destroy_lock)); for (uint32 i = 0; i < bufLen; i++) { /* diff --git a/src/gausskernel/storage/ipc/procarray.cpp b/src/gausskernel/storage/ipc/procarray.cpp index ddcbd8f7029a3ccf3b97a93e6e95a9271a5d8406..5095d235f02ae6171b47dab9945685850222b8fb 100755 --- a/src/gausskernel/storage/ipc/procarray.cpp +++ b/src/gausskernel/storage/ipc/procarray.cpp @@ -1341,8 +1341,12 @@ bool TransactionIdIsInProgress(TransactionId xid, uint32* needSync, bool shortcu assigned value * local must sync with gtm. */ - if (shortcutByRecentXmin && - TransactionIdPrecedes(xid, pg_atomic_read_u64(&g_instance.undo_cxt.globalRecycleXid))) { + uint64 recycle_xid = pg_atomic_read_u64(&g_instance.undo_cxt.globalRecycleXid); + /* in hotstandby mode, the proc may being runnnig */ + if (RecoveryInProgress()) { + recycle_xid = InvalidTransactionId; + } + if (shortcutByRecentXmin && TransactionIdPrecedes(xid, recycle_xid)) { xc_by_recent_xmin_inc(); /* @@ -2114,6 +2118,8 @@ RETRY: bool retry_get = false; uint64 retry_count = 0; const static uint64 WAIT_COUNT = 0x7FFFF; + /* reset xmin before acquiring lwlock, in case blocking redo */ + t_thrd.pgxact->xmin = InvalidTransactionId; RETRY_GET: if (snapshot->takenDuringRecovery && !StreamThreadAmI() && !IS_EXRTO_READ && !u_sess->proc_cxt.clientIsCMAgent) { @@ -2433,15 +2439,15 @@ GROUP_GET_SNAPSHOT: snapshot->copied = false; if (snapshot->takenDuringRecovery) { - (void)pgstat_report_waitstatus(oldStatus); if (IsDefaultExtremeRtoMode() && IS_EXRTO_STANDBY_READ) { - extreme_rto::exrto_read_snapshot(snapshot); + exrto_read_snapshot(snapshot); if (t_thrd.proc->exrto_reload_cache) { t_thrd.proc->exrto_reload_cache = false; reset_invalidation_cache(); } AcceptInvalidationMessages(); } + (void)pgstat_report_waitstatus(oldStatus); } return snapshot; @@ -2724,6 +2730,9 @@ TransactionId GetOldestActiveTransactionId(TransactionId *globalXmin) xmin = oldestRunningXid; } *globalXmin = xmin; + if (IS_EXRTO_STANDBY_READ) { + ereport(LOG, (errmsg("proc_array_get_oldest_active_transaction_id: global_xmin = %lu", *globalXmin))); + } return oldestRunningXid; } @@ -3236,7 +3245,8 @@ ThreadId CancelVirtualTransaction(const VirtualTransactionId& vxid, ProcSignalRe return pid; } -bool proc_array_cancel_conflicting_proc(TransactionId latest_removed_xid, bool reach_max_check_times) +bool proc_array_cancel_conflicting_proc( + TransactionId latest_removed_xid, XLogRecPtr truncate_redo_lsn, bool reach_max_check_times) { ProcArrayStruct* proc_array = g_instance.proc_array_idx; bool conflict = false; @@ -3262,8 +3272,9 @@ bool proc_array_cancel_conflicting_proc(TransactionId latest_removed_xid, bool r continue; } - /* cancel query when its xmin < latest_removed_xid */ - if (TransactionIdPrecedesOrEquals(pxmin, latest_removed_xid)) { + /* cancel query when its xmin < latest_removed_xid */ + if (TransactionIdPrecedesOrEquals(pxmin, latest_removed_xid) || + (truncate_redo_lsn != InvalidXLogRecPtr && XLByteLT(read_lsn, truncate_redo_lsn))) { conflict = true; pg_proc->recoveryConflictPending = true; if (pg_proc->pid != 0) { @@ -3277,9 +3288,16 @@ bool proc_array_cancel_conflicting_proc(TransactionId latest_removed_xid, bool r * an unresponsive backend when system is heavily loaded. */ ereport(LOG, - (errmsg(EXRTOFORMAT("exrto_gen_snap_time: %ld, current_timestamp: %ld, cancel thread while " - "redo truncate, thread id = %lu\n"), - pg_proc->exrto_gen_snap_time, GetCurrentTimestamp(), pg_proc->pid))); + (errmsg(EXRTOFORMAT("cancel thread while " + "redo truncate (lsn: %08X/%08X, latest_removed_xid: %lu), thread id = %lu, " + "read_lsn: %08X/%08X, xmin: %lu"), + (uint32)(truncate_redo_lsn >> UINT64_HALF), + (uint32)truncate_redo_lsn, + latest_removed_xid, + pg_proc->pid, + (uint32)(read_lsn >> UINT64_HALF), + (uint32)read_lsn, + pxmin))); pg_usleep(5000L); } } diff --git a/src/gausskernel/storage/ipc/sinvaladt.cpp b/src/gausskernel/storage/ipc/sinvaladt.cpp index 490f582c46339f6e6cd3bf105196e370623693b6..f70fbfcec35fdbe7fc94ddf667bda68ac6a50672 100644 --- a/src/gausskernel/storage/ipc/sinvaladt.cpp +++ b/src/gausskernel/storage/ipc/sinvaladt.cpp @@ -20,6 +20,7 @@ #include #include "miscadmin.h" +#include "access/multi_redo_api.h" #include "storage/backendid.h" #include "storage/ipc.h" #include "storage/proc.h" @@ -739,16 +740,18 @@ int SIGetDataEntries(SharedInvalidationMessage* data, int datasize, bool workses n = 0; XLogRecPtr read_lsn = InvalidXLogRecPtr; - if (u_sess->utils_cxt.CurrentSnapshot != NULL && - XLogRecPtrIsValid(u_sess->utils_cxt.CurrentSnapshot->read_lsn)) { - read_lsn = u_sess->utils_cxt.CurrentSnapshot->read_lsn; - } else if (XLogRecPtrIsValid(t_thrd.proc->exrto_read_lsn)) { - read_lsn = t_thrd.proc->exrto_read_lsn; + if (IS_EXRTO_STANDBY_READ) { + if (u_sess->utils_cxt.CurrentSnapshot != NULL && + XLogRecPtrIsValid(u_sess->utils_cxt.CurrentSnapshot->read_lsn)) { + read_lsn = u_sess->utils_cxt.CurrentSnapshot->read_lsn; + } else if (XLogRecPtrIsValid(t_thrd.proc->exrto_read_lsn)) { + read_lsn = t_thrd.proc->exrto_read_lsn; + } } while (n < datasize && stateP->nextMsgNum < max) { int index = stateP->nextMsgNum % MAXNUMMESSAGES; - if (read_lsn != InvalidXLogRecPtr && segP->buffer[index].lsn != InvalidXLogRecPtr) { + if (XLogRecPtrIsValid(read_lsn) && XLogRecPtrIsValid(segP->buffer[index].lsn)) { if (XLByteLT(read_lsn, segP->buffer[index].lsn)) { break; } diff --git a/src/gausskernel/storage/ipc/standby.cpp b/src/gausskernel/storage/ipc/standby.cpp index d023429204bf6d8176d2648cd721e212829e14cc..cbab224f79a4b89c6e22d3c36aec0ad3a7310259 100755 --- a/src/gausskernel/storage/ipc/standby.cpp +++ b/src/gausskernel/storage/ipc/standby.cpp @@ -35,13 +35,14 @@ #include "utils/timestamp.h" #include "utils/snapmgr.h" #include "pgxc/poolutils.h" +#include "catalog/pg_partition_fn.h" #include "replication/walreceiver.h" static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId* waitlist, TransactionId* xminArray, ProcSignalReason reason, TimestampTz waitStart, TransactionId limitXmin = InvalidTransactionId); static void ResolveRecoveryConflictWithLock(Oid dbOid, Oid relOid); static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock* locks); -static void LogReleaseAccessExclusiveLocks(int nlocks, xl_standby_lock* locks); +static void log_access_exclusive_locks_new(int nlocks, XlStandbyLockNew* locks); static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts); static void RecordCommittingCsnInfo(TransactionId xid); @@ -198,6 +199,9 @@ static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId* waitlis /* wait until the virtual xid is gone */ while (!VirtualXactLock(*waitlist, false)) { PGPROC* proc = BackendIdGetProc((*waitlist).backendId); + if (proc == NULL) { + break; + } PGXACT* pgxact = &g_instance.proc_base_all_xacts[proc->pgprocno]; if (xminArray != NULL && pgxact->xmin != *xminArray && (!TransactionIdIsValid(pgxact->xmin) || TransactionIdFollows(pgxact->xmin, limitXmin))) { @@ -608,7 +612,7 @@ void CheckRecoveryConflictDeadlock(void) * We use session locks rather than normal locks so we don't need * ResourceOwners. */ -void StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid) +void StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid, uint32 seq) { LOCKTAG locktag; @@ -651,13 +655,22 @@ void StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid) entry->locks = NIL; } - xl_standby_lock* newlock = (xl_standby_lock*)palloc(sizeof(xl_standby_lock)); + XlStandbyLockNew* newlock = (XlStandbyLockNew*)palloc(sizeof(XlStandbyLockNew)); newlock->xid = xid; newlock->dbOid = dbOid; newlock->relOid = relOid; + newlock->seq = seq; entry->locks = lappend(entry->locks, newlock); - SET_LOCKTAG_RELATION(locktag, newlock->dbOid, newlock->relOid); + if (seq != InvalidOid) { + if (seq == PARTITION_OBJECT_LOCK_SDEQUENCE || seq == INTERVAL_PARTITION_LOCK_SDEQUENCE) { + SET_LOCKTAG_OBJECT(locktag, newlock->dbOid, newlock->relOid, newlock->seq, 0); + } else { + SET_LOCKTAG_PARTITION(locktag, newlock->dbOid, newlock->relOid, newlock->seq); + } + } else { + SET_LOCKTAG_RELATION(locktag, newlock->dbOid, newlock->relOid); + } if (LockAcquireExtended(&locktag, AccessExclusiveLock, true, true, false) == LOCKACQUIRE_NOT_AVAIL) ResolveRecoveryConflictWithLock(newlock->dbOid, newlock->relOid); @@ -666,11 +679,20 @@ void StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid) static void StandbyReleaseLockList(List *locks) { while (locks) { - xl_standby_lock *lock = (xl_standby_lock *) linitial(locks); + XlStandbyLockNew *lock = (XlStandbyLockNew *) linitial(locks); LOCKTAG locktag; - ereport(trace_recovery(DEBUG4), (errmsg("releasing recovery lock: xid %lu db %u rel %u", lock->xid, - lock->dbOid, lock->relOid))); - SET_LOCKTAG_RELATION(locktag, lock->dbOid, lock->relOid); + ereport(trace_recovery(DEBUG4), (errmsg("releasing recovery lock: xid %lu db %u rel %u seq %u", lock->xid, + lock->dbOid, lock->relOid, lock->seq))); + if (lock->seq != InvalidOid) { + if (lock->seq == PARTITION_OBJECT_LOCK_SDEQUENCE || lock->seq == INTERVAL_PARTITION_LOCK_SDEQUENCE) { + SET_LOCKTAG_OBJECT(locktag, lock->dbOid, lock->relOid, lock->seq, 0); + } else { + SET_LOCKTAG_PARTITION(locktag, lock->dbOid, lock->relOid, lock->seq); + } + } else { + SET_LOCKTAG_RELATION(locktag, lock->dbOid, lock->relOid); + } + if (!LockRelease(&locktag, AccessExclusiveLock, true)) { ereport(LOG, (errmsg("RecoveryLockLists contains entry for lock no longer recorded by lock manager: " "xid %lu database %u relation %u", lock->xid, lock->dbOid, lock->relOid))); @@ -833,11 +855,19 @@ void standby_redo(XLogReaderState* record) return; if (info == XLOG_STANDBY_LOCK) { - xl_standby_locks* xlrec = (xl_standby_locks*)XLogRecGetData(record); - int i; - - for (i = 0; i < xlrec->nlocks; i++) - StandbyAcquireAccessExclusiveLock(xlrec->locks[i].xid, xlrec->locks[i].dbOid, xlrec->locks[i].relOid); + if ((XLogRecGetInfo(record) & PARTITION_ACCESS_EXCLUSIVE_LOCK_UPGRADE_FLAG) == 0) { + xl_standby_locks *xlrec = (xl_standby_locks *)XLogRecGetData(record); + for (int i = 0; i < xlrec->nlocks; i++) { + StandbyAcquireAccessExclusiveLock(xlrec->locks[i].xid, xlrec->locks[i].dbOid, xlrec->locks[i].relOid, + InvalidOid); + } + } else { + XLogStandbyLocksNew *xlrec = (XLogStandbyLocksNew *)XLogRecGetData(record); + for (int i = 0; i < xlrec->nlocks; i++) { + StandbyAcquireAccessExclusiveLock(xlrec->locks[i].xid, xlrec->locks[i].dbOid, xlrec->locks[i].relOid, + xlrec->locks[i].seq); + } + } } else if (info == XLOG_RUNNING_XACTS) { RunningTransactionsData running; @@ -1063,28 +1093,50 @@ static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock* locks) (void)XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK); } +static void log_access_exclusive_locks_new(int nlocks, XlStandbyLockNew *locks) +{ + XLogStandbyLocksNew xlrec; + + xlrec.nlocks = nlocks; + + XLogBeginInsert(); + XLogRegisterData((char *)&xlrec, MIN_SIZE_OF_XACT_STANDBY_LOCKS_NEW); + XLogRegisterData((char *)locks, nlocks * sizeof(XlStandbyLockNew)); + + (void)XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK | PARTITION_ACCESS_EXCLUSIVE_LOCK_UPGRADE_FLAG); +} + /* * Individual logging of AccessExclusiveLocks for use during LockAcquire() */ -void LogAccessExclusiveLock(Oid dbOid, Oid relOid) +void LogAccessExclusiveLock(Oid dbOid, Oid relOid, uint32 seq) { - if (ENABLE_DMS) { - return; - } - - xl_standby_lock xlrec; - - xlrec.xid = GetTopTransactionId(); + if (t_thrd.proc->workingVersionNum < PARTITION_ACCESS_EXCLUSIVE_LOCK_UPGRADE_VERSION) { + xl_standby_lock xlrec; + xlrec.xid = GetTopTransactionId(); + /* + * Decode the lock_tag back to the original values, to avoid sending lots + * of empty bytes with every message. See lock.h to check how a lock_tag + * is defined for LOCKTAG_RELATION + */ + xlrec.dbOid = dbOid; + xlrec.relOid = relOid; - /* - * Decode the locktag back to the original values, to avoid sending lots - * of empty bytes with every message. See lock.h to check how a locktag - * is defined for LOCKTAG_RELATION - */ - xlrec.dbOid = dbOid; - xlrec.relOid = relOid; + LogAccessExclusiveLocks(1, &xlrec); + } else { + XlStandbyLockNew xlrec; + xlrec.xid = GetTopTransactionId(); + /* + * Decode the lock_tag back to the original values, to avoid sending lots + * of empty bytes with every message. See lock.h to check how a lock_tag + * is defined for LOCKTAG_RELATION + */ + xlrec.dbOid = dbOid; + xlrec.relOid = relOid; + xlrec.seq = seq; - LogAccessExclusiveLocks(1, &xlrec); + log_access_exclusive_locks_new(1, &xlrec); + } } /* @@ -1111,41 +1163,6 @@ void LogAccessExclusiveLockPrepare(void) (void)GetTopTransactionId(); } -static void LogReleaseAccessExclusiveLocks(int nlocks, xl_standby_lock* locks) -{ - xl_standby_locks xlrec; - - xlrec.nlocks = nlocks; - - XLogBeginInsert(); - XLogRegisterData((char*)&xlrec, MinSizeOfXactStandbyLocks); - XLogRegisterData((char*)locks, nlocks * sizeof(xl_standby_lock)); - - (void)XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_UNLOCK); -} - -void LogReleaseAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid) -{ - if (ENABLE_DMS) { - return; - } - - xl_standby_lock xlrec; - - xlrec.xid = xid; - - /* - * Decode the locktag back to the original values, to avoid sending lots - * of empty bytes with every message. See lock.h to check how a locktag - * is defined for LOCKTAG_RELATION - */ - xlrec.dbOid = dbOid; - xlrec.relOid = relOid; - - LogReleaseAccessExclusiveLocks(1, &xlrec); -} - - void StandbyXlogStartup(void) { t_thrd.xlog_cxt.committing_csn_list = NIL; diff --git a/src/gausskernel/storage/lmgr/lock.cpp b/src/gausskernel/storage/lmgr/lock.cpp index c0294edef230a272a52b0ca7e8e054f9ea841c23..8c5f7282df2f77064fc584186b1175925541af73 100644 --- a/src/gausskernel/storage/lmgr/lock.cpp +++ b/src/gausskernel/storage/lmgr/lock.cpp @@ -773,8 +773,9 @@ static LockAcquireResult LockAcquireExtendedXC(const LOCKTAG *locktag, LOCKMODE * * First we prepare to log, then after lock acquired we issue log record. */ - if (lockmode >= AccessExclusiveLock && (locktag->locktag_type == LOCKTAG_RELATION || - locktag->locktag_type == LOCKTAG_PARTITION || locktag->locktag_type == LOCKTAG_PARTITION_SEQUENCE) && + if (lockmode >= AccessExclusiveLock && + (locktag->locktag_type == LOCKTAG_RELATION || locktag->locktag_type == LOCKTAG_PARTITION || + locktag->locktag_type == LOCKTAG_PARTITION_SEQUENCE || locktag->locktag_type == LOCKTAG_OBJECT) && !RecoveryInProgress() && XLogStandbyInfoActive()) { LogAccessExclusiveLockPrepare(); log_lock = true; @@ -1058,7 +1059,12 @@ static LockAcquireResult LockAcquireExtendedXC(const LOCKTAG *locktag, LOCKMODE * lots of empty bytes with every message. See lock.h to check how a * locktag is defined for LOCKTAG_RELATION */ - LogAccessExclusiveLock(locktag->locktag_field1, locktag->locktag_field2); + uint32 seq = InvalidOid; + if (locktag->locktag_type == LOCKTAG_PARTITION || locktag->locktag_type == LOCKTAG_PARTITION_SEQUENCE || + locktag->locktag_type == LOCKTAG_OBJECT) { + seq = locktag->locktag_field3; + } + LogAccessExclusiveLock(locktag->locktag_field1, locktag->locktag_field2, seq); } instr_stmt_report_lock(LOCK_END, lockmode); @@ -2057,8 +2063,9 @@ void LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks) * released: it is always and only released when a toplevel transaction * ends. */ - if (lockmethodid == DEFAULT_LOCKMETHOD) + if ((lockmethodid == DEFAULT_LOCKMETHOD) && (t_thrd.role != PAGEREDO)) { VirtualXactLockTableCleanup(); + } numLockModes = lockMethodTable->numLockModes; @@ -3799,30 +3806,6 @@ void lock_twophase_recover(TransactionId xid, uint16 info, void *recdata, uint32 LWLockRelease(partitionLock); } -/* - * Re-acquire a lock belonging to a transaction that was prepared, when - * when starting up into hot standby mode. - */ -void lock_twophase_standby_recover(TransactionId xid, uint16 info, void *recdata, uint32 len) -{ - TwoPhaseLockRecord *rec = (TwoPhaseLockRecord *)recdata; - LOCKTAG *locktag = NULL; - LOCKMODE lockmode; - LOCKMETHODID lockmethodid; - - Assert(len == sizeof(TwoPhaseLockRecord)); - locktag = &rec->locktag; - lockmode = rec->lockmode; - lockmethodid = locktag->locktag_lockmethodid; - - CHECK_LOCKMETHODID(lockmethodid); - - if (lockmode == AccessExclusiveLock && locktag->locktag_type == LOCKTAG_RELATION) { - StandbyAcquireAccessExclusiveLock(xid, locktag->locktag_field1 /* dboid */, - locktag->locktag_field2 /* reloid */); - } -} - /* * 2PC processing routine for COMMIT PREPARED case. * diff --git a/src/gausskernel/storage/replication/walreceiver.cpp b/src/gausskernel/storage/replication/walreceiver.cpp index b3a7009bfe9dc402f3ef6e1e60d594186d2e8e91..34865ebebdb65ad385aecbba6e1937e213e3d9df 100755 --- a/src/gausskernel/storage/replication/walreceiver.cpp +++ b/src/gausskernel/storage/replication/walreceiver.cpp @@ -163,6 +163,7 @@ const char *g_reserve_param[] = { #endif "enable_huge_pages", "huge_page_size" + "exrto_standby_read_opt" }; const int g_reserve_param_num = lengthof(g_reserve_param); diff --git a/src/gausskernel/storage/smgr/storage_exrto_file.cpp b/src/gausskernel/storage/smgr/storage_exrto_file.cpp index 46ee9e494e2664c1b54a3cd99c0e6b076181a38d..20f433d8a2b6583bc0de7c6a6812e45d1304a3aa 100644 --- a/src/gausskernel/storage/smgr/storage_exrto_file.cpp +++ b/src/gausskernel/storage/smgr/storage_exrto_file.cpp @@ -180,6 +180,18 @@ static ExRTOFileState *exrto_open_file(SMgrRelation reln, ForkNumber forknum, Bl return state; } + +bool exrto_check_unlink_relfilenode(const RelFileNode rnode) +{ + HTAB *relfilenode_hashtbl = g_instance.bgwriter_cxt.unlink_rel_hashtbl; + bool found = false; + + LWLockAcquire(g_instance.bgwriter_cxt.rel_hashtbl_lock, LW_SHARED); + (void)hash_search(relfilenode_hashtbl, &(rnode), HASH_FIND, &found); + LWLockRelease(g_instance.bgwriter_cxt.rel_hashtbl_lock); + + return found; +} BlockNumber get_single_file_nblocks(SMgrRelation reln, ForkNumber forknum, const ExRTOFileState *state) { @@ -350,6 +362,7 @@ void exrto_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, c int nbytes; struct stat file_stat; char* filename; + ExtensionBehavior behavior; type = exrto_file_type(reln->smgr_rnode.node.spcNode); total_block_num = get_total_block_num(type, reln->smgr_rnode.node.relNode, blocknum); @@ -359,7 +372,16 @@ void exrto_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, c } seekpos = (off_t)BLCKSZ * (total_block_num % EXRTO_FILE_BLOCKS[type]); - state = exrto_open_file(reln, forknum, blocknum, EXTENSION_CREATE); + behavior = (type == BLOCK_INFO_META ? EXTENSION_RETURN_NULL : EXTENSION_CREATE); + state = exrto_open_file(reln, forknum, blocknum, behavior); + if (state == NULL) { + Assert(type == BLOCK_INFO_META); + if (exrto_check_unlink_relfilenode(reln->smgr_rnode.node)) { + return; + } else { + state = exrto_open_file(reln, forknum, blocknum, EXTENSION_CREATE); + } + } filename = FilePathName(state->file[forknum]); if (stat(filename, &file_stat) < 0) { char filepath[EXRTO_FILE_PATH_LEN]; @@ -380,12 +402,15 @@ void exrto_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, c (int)(EXRTO_FILE_SIZE[type] - file_stat.st_size)); nbytes = FilePWrite(state->file[forknum], NULL, extend_size, file_stat.st_size); if (nbytes != extend_size) { + char filepath[EXRTO_FILE_PATH_LEN]; + errno_t rc = strcpy_s(filepath, EXRTO_FILE_PATH_LEN, filename); + securec_check(rc, "\0", "\0"); exrto_close(reln, forknum, InvalidBlockNumber); if (nbytes < 0) { - ereport(ERROR, (errmsg("could not extend file \"%s\": %m.", filename))); + ereport(ERROR, (errmsg("could not extend file \"%s\": %m.", filepath))); } ereport(ERROR, - (errmsg("could not extend file \"%s\": wrote only %d of %d bytes.", filename, nbytes, extend_size))); + (errmsg("could not extend file \"%s\": wrote only %d of %d bytes.", filepath, nbytes, extend_size))); } Assert(get_single_file_nblocks(reln, forknum, state) <= ((BlockNumber)EXRTO_FILE_BLOCKS[type])); @@ -456,6 +481,7 @@ void exrto_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co uint64 total_block_num; off_t seekpos; int nbytes; + ExtensionBehavior behavior; type = exrto_file_type(reln->smgr_rnode.node.spcNode); total_block_num = get_total_block_num(type, reln->smgr_rnode.node.relNode, blocknum); @@ -467,7 +493,16 @@ void exrto_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co Assert(seekpos < (off_t)EXRTO_FILE_SIZE[type]); - state = exrto_open_file(reln, forknum, blocknum, EXTENSION_CREATE); + behavior = (type == BLOCK_INFO_META ? EXTENSION_RETURN_NULL : EXTENSION_CREATE); + state = exrto_open_file(reln, forknum, blocknum, behavior); + if (state == NULL) { + Assert(type == BLOCK_INFO_META); + if (exrto_check_unlink_relfilenode(reln->smgr_rnode.node)) { + return; + } else { + state = exrto_open_file(reln, forknum, blocknum, EXTENSION_CREATE); + } + } nbytes = FilePWrite(state->file[forknum], buffer, BLCKSZ, seekpos); if (nbytes != BLCKSZ) { char *filename = FilePathName(state->file[forknum]); @@ -547,13 +582,23 @@ void exrto_writeback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum uint64 total_block_num; type = exrto_file_type(reln->smgr_rnode.node.spcNode); total_block_num = get_total_block_num(type, reln->smgr_rnode.node.relNode, blocknum); + ExtensionBehavior behavior = (type == BLOCK_INFO_META ? EXTENSION_RETURN_NULL : EXTENSION_CREATE); while (nblocks > 0) { BlockNumber nflush = nblocks; off_t seekpos; ExRTOFileState *state = NULL; uint64 segnum_start, segnum_end; - state = exrto_open_file(reln, forknum, blocknum, EXTENSION_CREATE); + state = exrto_open_file(reln, forknum, blocknum, behavior); + if (state == NULL) { + Assert(type == BLOCK_INFO_META); + /* only check at first time */ + if (exrto_check_unlink_relfilenode(reln->smgr_rnode.node)) { + return; + } else { + state = exrto_open_file(reln, forknum, blocknum, EXTENSION_CREATE); + } + } segnum_start = total_block_num / EXRTO_FILE_BLOCKS[type]; segnum_end = (total_block_num + nblocks - 1) / EXRTO_FILE_BLOCKS[type]; @@ -572,5 +617,6 @@ void exrto_writeback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum Assert(((total_block_num + nflush) >> UINT64_HALF) == (total_block_num >> UINT64_HALF)); total_block_num += nflush; blocknum = (BlockNumber)total_block_num; + behavior = EXTENSION_CREATE; } } diff --git a/src/include/access/clog.h b/src/include/access/clog.h index c637bf48a6e987f7edc0ca11b51641539569ae83..68ae0c8cd882f590f4728f6388c0e1162bc6bc28 100644 --- a/src/include/access/clog.h +++ b/src/include/access/clog.h @@ -37,6 +37,7 @@ #define TransactionIdToPgIndex(xid) ((xid) % (TransactionId)CLOG_XACTS_PER_PAGE) #define TransactionIdToByte(xid) (TransactionIdToPgIndex(xid) / CLOG_XACTS_PER_BYTE) #define TransactionIdToBIndex(xid) ((xid) % (TransactionId)CLOG_XACTS_PER_BYTE) +#define PAGE_TO_TRANSACTION_ID(pageno) ((pageno) * (TransactionId)CLOG_XACTS_PER_PAGE) #define CLogPageNoToStartXactId(pageno) ((pageno > 0)?((pageno -1) * CLOG_XACTS_PER_PAGE): 0) /* CLog lwlock partition*/ diff --git a/src/include/access/extreme_rto/page_redo.h b/src/include/access/extreme_rto/page_redo.h index 7d789f85871bac495c5314407fa87a345994c225..90674a455f603db36d09a359b387cb20a7d414a6 100644 --- a/src/include/access/extreme_rto/page_redo.h +++ b/src/include/access/extreme_rto/page_redo.h @@ -204,6 +204,7 @@ void GetThreadNameIfPageRedoWorker(int argc, char *argv[], char **threadNamePtr) extern bool RedoWorkerIsUndoSpaceWorker(); uint32 GetMyPageRedoWorkerIdWithLock(); +void redo_worker_release_all_locks(); PGPROC *GetPageRedoWorkerProc(PageRedoWorker *worker); /* Worker main function. */ @@ -257,9 +258,6 @@ void BatchClearRecoveryThreadHashTbl(Oid spcNode, Oid dbNode); void RecordBadBlockAndPushToRemote(XLogBlockDataParse *datadecode, PageErrorType error_type, XLogRecPtr old_lsn, XLogPhyBlock pblk); void SeqCheckRemoteReadAndRepairPage(); -void exrto_generate_snapshot(XLogRecPtr trxn_lsn); -void exrto_read_snapshot(Snapshot snapshot); -XLogRecPtr exrto_calculate_recycle_position(bool force_recyle); -TransactionId exrto_calculate_recycle_xmin_for_undo(); +bool exceed_send_lsn_forworder_interval(); } // namespace extreme_rto #endif diff --git a/src/include/access/extreme_rto/standby_read/block_info_meta.h b/src/include/access/extreme_rto/standby_read/block_info_meta.h index d4cd7ed73a97b8d2f0bda723a2fce8e0d97139b6..9a104db693a8efdee7e0adfea8d105c5cb2b87b1 100644 --- a/src/include/access/extreme_rto/standby_read/block_info_meta.h +++ b/src/include/access/extreme_rto/standby_read/block_info_meta.h @@ -80,9 +80,13 @@ typedef enum { BlockMetaInfo *get_block_meta_info_by_relfilenode(const BufferTag &buf_tag, BufferAccessStrategy strategy, ReadBufferMode mode, Buffer *buffer, bool need_share_lock = false); void insert_lsn_to_block_info( - StandbyReadMetaInfo* mete_info, const BufferTag& buf_tag, const Page base_page, XLogRecPtr next_lsn); -StandbyReadRecyleState recyle_block_info( - const BufferTag& buf_tag, LsnInfoPosition base_page_info_pos, XLogRecPtr next_base_page_lsn, XLogRecPtr recyle_lsn); + StandbyReadMetaInfo *mete_info, const BufferTag &buf_tag, const Page base_page, XLogRecPtr next_lsn); +void insert_lsn_to_block_info_for_opt( + StandbyReadMetaInfo *mete_info, const BufferTag &buf_tag, const Page base_page, XLogRecPtr next_lsn); + +StandbyReadRecyleState recyle_block_info(const BufferTag &buf_tag, LsnInfoPosition base_page_info_pos, + XLogRecPtr next_base_page_lsn, XLogRecPtr recyle_lsn, + XLogRecPtr *block_info_max_lsn); bool get_page_lsn_info(const BufferTag& buf_tag, BufferAccessStrategy strategy, XLogRecPtr read_lsn, StandbyReadLsnInfoArray* lsn_info); static inline bool is_block_info_page_valid(BlockInfoPageHeader* header) diff --git a/src/include/access/extreme_rto/standby_read/lsn_info_meta.h b/src/include/access/extreme_rto/standby_read/lsn_info_meta.h index 7694bb984afa4ff2a9854e03475553dac3f544fe..8e641b5ab3e8c143db47b5a998218db2de455965 100644 --- a/src/include/access/extreme_rto/standby_read/lsn_info_meta.h +++ b/src/include/access/extreme_rto/standby_read/lsn_info_meta.h @@ -146,6 +146,8 @@ bool is_base_page_map_bit_set(Page page, uint32 which_bit); void recycle_one_lsn_info_list(const BufferTag& buf_tag, LsnInfoPosition page_info_pos, XLogRecPtr recycle_lsn, LsnInfoPosition *min_page_info_pos, XLogRecPtr *min_lsn); void standby_read_recyle_per_workers(StandbyReadMetaInfo *standby_read_meta_info, XLogRecPtr recycle_lsn); +LsnInfoPosition get_nearest_base_page_pos( + const BufferTag &buf_tag, const LsnInfoDoubleList &lsn_info_list, XLogRecPtr read_lsn); } // namespace extreme_rto_standby_read #endif \ No newline at end of file diff --git a/src/include/access/extreme_rto/standby_read/standby_read_base.h b/src/include/access/extreme_rto/standby_read/standby_read_base.h index d61690a932438bd71cbf5a426a6672dc985e82c2..efea1de98bcb156c27adcb2245b1c901c4b89c29 100644 --- a/src/include/access/extreme_rto/standby_read/standby_read_base.h +++ b/src/include/access/extreme_rto/standby_read/standby_read_base.h @@ -114,12 +114,18 @@ inline uint64 get_total_block_num(ExRTOFileType type, uint32 high, uint32 low) void exrto_clean_dir(void); void exrto_recycle_old_dir(void); void exrto_standby_read_init(); +void exrto_generate_snapshot(XLogRecPtr trxn_lsn); +void exrto_read_snapshot(Snapshot snapshot); +XLogRecPtr exrto_calculate_recycle_position(bool force_recyle); +TransactionId exrto_calculate_recycle_xmin_for_undo(); void buffer_drop_exrto_standby_read_buffers(StandbyReadMetaInfo *meta_info = NULL); void exrto_unlink_file_with_prefix(char *target_prefix, ExRTOFileType type, uint64 segno = 0); extern void XLogDumpDisplayRecord(XLogReaderState *record, char *strOutput); extern XLogRecPtr UpdateNextLSN(XLogRecPtr cur_lsn, XLogRecPtr end_lsn, XLogReaderState *xlogreader_state, bool *found); namespace extreme_rto_standby_read { void dump_error_all_info(const RelFileNode &rnode, ForkNumber forknum, BlockNumber blocknum); +Buffer standby_read_buf_new( + Relation reln, ForkNumber fork_num, BlockNumber block_num, ReadBufferMode mode, BufferAccessStrategy strategy); } #ifdef ENABLE_UT extern Page get_page_from_buffer(Buffer buf); diff --git a/src/include/access/extreme_rto/standby_read/standby_read_delay_ddl.h b/src/include/access/extreme_rto/standby_read/standby_read_delay_ddl.h index ed372174c015647fedc79c2a7d035a8921511694..a2623640ca9730f09fbff1dc39af5e0cc870579a 100644 --- a/src/include/access/extreme_rto/standby_read/standby_read_delay_ddl.h +++ b/src/include/access/extreme_rto/standby_read/standby_read_delay_ddl.h @@ -37,4 +37,5 @@ void init_delay_ddl_file(); void update_delay_ddl_db(Oid db_id, Oid tablespace_id, XLogRecPtr lsn); void update_delay_ddl_files(ColFileNode* xnodes, int nrels, XLogRecPtr lsn); void delete_by_table_space(Oid tablespace_id); +void update_delay_ddl_file_truncate_clog(XLogRecPtr lsn, int64 pageno); #endif \ No newline at end of file diff --git a/src/include/access/multi_redo_api.h b/src/include/access/multi_redo_api.h index 4ec8cee786a3e594f0d382219301c47056fb9a9f..b3bda3fa792f1008bae8715ffb9f26cde521cdfe 100644 --- a/src/include/access/multi_redo_api.h +++ b/src/include/access/multi_redo_api.h @@ -67,8 +67,10 @@ static const uint32 PAGE_REDO_WORKER_EXIT = 3; static const uint32 BIG_RECORD_LENGTH = XLOG_BLCKSZ * 16; #define IS_EXRTO_READ (IsExtremeRedo() && g_instance.attr.attr_storage.EnableHotStandby && IsDefaultExtremeRtoMode()) -#define IS_EXRTO_STANDBY_READ (IS_EXRTO_READ && pm_state_is_hot_standby()) #define IS_EXRTO_RECOVERY_IN_PROGRESS (RecoveryInProgress() && IsExtremeRedo() && IsDefaultExtremeRtoMode()) +#define IS_EXRTO_STANDBY_READ (pm_state_is_hot_standby() && IS_EXRTO_READ) +#define IS_EXRTO_READ_OPT \ + (g_instance.attr.attr_storage.EnableHotStandby && g_instance.attr.attr_storage.enable_exrto_standby_read_opt) inline bool is_exrto_standby_read_worker() { diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 55bf5596944c0f0e983a2d533f3c332b762f0ab8..86c611596d3a5741dd23973f842a8c0ff3916fd3 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -369,6 +369,10 @@ enum { BTREE_NEWROOT_META_BLOCK_NUM }; +enum { + BTREE_REUSE_PAGE_BLOCK_NUM = 0, +}; + typedef struct xl_btree_metadata_old { BlockNumber root; uint32 level; @@ -1334,9 +1338,10 @@ extern Buffer _bt_getroot(Relation rel, int access); extern Buffer _bt_gettrueroot(Relation rel); extern int _bt_getrootheight(Relation rel); extern void _bt_checkbuffer_valid(Relation rel, Buffer buf); -extern void _bt_checkpage(Relation rel, Buffer buf); +extern void _bt_checkpage(Relation rel, Buffer buf, BlockNumber par_blkno = InvalidBlockNumber); extern Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access); -extern Buffer _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access); +extern Buffer _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access, + BlockNumber par_blkno = InvalidBlockNumber); extern void _bt_relbuf(Relation rel, Buffer buf); extern void _bt_pageinit(Page page, Size size); extern bool _bt_page_recyclable(Page page); diff --git a/src/include/access/parallel_recovery/page_redo.h b/src/include/access/parallel_recovery/page_redo.h index e3056d9fc24d8327905845436ccd2a79e6b6b38b..22dbfa51ad7e2e36651addc8259b45cf91a34a3f 100644 --- a/src/include/access/parallel_recovery/page_redo.h +++ b/src/include/access/parallel_recovery/page_redo.h @@ -183,7 +183,6 @@ struct PageRedoWorker { uint64 statWaitReplay; pg_atomic_uint32 readyStatus; MemoryContext oldCtx; - int bufferPinWaitBufId; RedoTimeCost timeCostList[TIME_COST_NUM]; uint32 remoteReadPageNum; HTAB *badPageHashTbl; diff --git a/src/include/access/twophase_rmgr.h b/src/include/access/twophase_rmgr.h index 7f461b5f52ea73a2e67e53c595d86fd50c4e02c8..7a59fc6d519e2cd7f6ece4c44805ced85822aa16 100644 --- a/src/include/access/twophase_rmgr.h +++ b/src/include/access/twophase_rmgr.h @@ -30,7 +30,6 @@ typedef uint8 TwoPhaseRmgrId; extern const TwoPhaseCallback g_twophase_recover_callbacks[]; extern const TwoPhaseCallback g_twophase_postcommit_callbacks[]; extern const TwoPhaseCallback g_twophase_postabort_callbacks[]; -extern const TwoPhaseCallback g_twophase_standby_recover_callbacks[]; extern void RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info, const void* data, uint32 len); diff --git a/src/include/access/ustore/knl_uvisibility.h b/src/include/access/ustore/knl_uvisibility.h index 34d52c63c576b0d857cc4363f457c44d7e4a2e50..eef4711e32a487f73c5fb06720ae06500d631eea 100644 --- a/src/include/access/ustore/knl_uvisibility.h +++ b/src/include/access/ustore/knl_uvisibility.h @@ -16,6 +16,8 @@ #ifndef KNL_UVISIBILITY_H #define KNL_UVISIBILITY_H +#include "postmaster/postmaster.h" + typedef struct UHeapTupleTransInfo { int td_slot; TransactionId xid; @@ -87,4 +89,28 @@ bool UHeapTupleHasSerializableConflictOut(bool visible, Relation relation, ItemP void UHeapTupleCheckVisible(Snapshot snapshot, UHeapTuple tuple, Buffer buffer); void UHeapUpdateTDInfo(int tdSlot, Buffer buffer, OffsetNumber offnum, UHeapTupleTransInfo* uinfo); + +inline bool TransactionIdOlderThanAllUndo(TransactionId xid) +{ + /* to slove standby read consistency problem */ + if (RecoveryInProgress()) { + uint64 standby_recycle_xid = pg_atomic_read_u64(&g_instance.undo_cxt.hotStandbyRecycleXid); + return xid < standby_recycle_xid; + } + + uint64 cutoff = pg_atomic_read_u64(&g_instance.undo_cxt.globalRecycleXid); + return xid < cutoff; +} + +inline bool TransactionIdOlderThanFrozenXid(TransactionId xid) +{ + /* to slove standby read consistency problem */ + if (RecoveryInProgress()) { + uint64 standby_recycle_xid = pg_atomic_read_u64(&g_instance.undo_cxt.hotStandbyRecycleXid); + return xid < standby_recycle_xid; + } + + uint64 cutoff = pg_atomic_read_u64(&g_instance.undo_cxt.globalFrozenXid); + return xid < cutoff; +} #endif diff --git a/src/include/access/xact.h b/src/include/access/xact.h index 09ad0c69a201a4d9225c51e59877dea84eb3cc64..76af9756ff00f3b18cbc5709ae9f0863d8c5c2d6 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -565,6 +565,6 @@ extern void BeginTxnForAutoCommitOff(); extern void SetTxnInfoForSSLibpqsw(TransactionId xid, CommandId cid); extern void ClearTxnInfoForSSLibpqsw(); extern bool IsTransactionInProgressState(); -extern void unlink_relfiles(_in_ ColFileNode *xnodes, _in_ int nrels); +extern void unlink_relfiles(_in_ ColFileNode *xnodes, _in_ int nrels, bool is_old_delay_ddl = false); void xact_redo_log_drop_segs(_in_ ColFileNode *xnodes, _in_ int nrels, XLogRecPtr lsn); #endif /* XACT_H */ diff --git a/src/include/access/xlogproc.h b/src/include/access/xlogproc.h index bed5a495fabd98b1d30b980124926dafda0bb851..d9adbdddcd07961323c4aeebd2886f70f2a5faa2 100755 --- a/src/include/access/xlogproc.h +++ b/src/include/access/xlogproc.h @@ -160,6 +160,7 @@ typedef enum { BLOCK_DATA_SEG_SPACE_SHRINK, BLOCK_DATA_SEG_FULL_SYNC_TYPE, BLOCK_DATA_SEG_EXTEND, + BLOCK_DATA_CLEANUP_TYPE, } XLogBlockParseEnum; /* ********BLOCK COMMON HEADER END ***************** */ @@ -498,6 +499,7 @@ typedef struct { Oid dbNode; /* database */ int2 bucketNode; /* bucket */ uint2 opt; + bool is_conflict_type; /* whether wal log type is conflict with standby read if redo */ XLogPhyBlock pblk; } XLogBlockHead; @@ -594,6 +596,10 @@ typedef struct { Size dataLen; } XLogBlockSegNewPage; +typedef struct { + TransactionId removed_xid; +} WalCleanupInfoParse; + typedef struct { XLogBlockHead blockhead; XLogBlockRedoHead redohead; @@ -624,6 +630,7 @@ typedef struct { XLogBlockSegDdlParse blocksegddlrec; XLogBlockSegFullSyncParse blocksegfullsyncrec; XLogBlockSegNewPage blocksegnewpageinfo; + WalCleanupInfoParse clean_up_info; } extra_rec; } XLogBlockParse; @@ -1113,7 +1120,7 @@ void XLogRecSetSegNewPageInfo(XLogBlockSegNewPage *state, char *mainData, Size l void XLogRecSetAuxiBlkNumState(XLogBlockDataParse* blockdatarec, BlockNumber auxilaryblkn1, BlockNumber auxilaryblkn2); void XLogRecSetBlockDataStateContent(XLogReaderState *record, uint32 blockid, XLogBlockDataParse *blockdatarec); void XLogRecSetBlockDataState(XLogReaderState* record, uint32 blockid, XLogRecParseState* recordblockstate, - XLogBlockParseEnum type = BLOCK_DATA_MAIN_DATA_TYPE); + XLogBlockParseEnum type = BLOCK_DATA_MAIN_DATA_TYPE, bool is_conflict_type = false); extern char* XLogBlockDataGetBlockData(XLogBlockDataParse* datadecode, Size* len); void Heap2RedoDataBlock(XLogBlockHead* blockhead, XLogBlockDataParse* blockdatarec, RedoBufferInfo* bufferinfo); extern void HeapRedoDataBlock( @@ -1124,6 +1131,7 @@ extern void xlog_redo_data_block( extern void XLogRecSetBlockDdlState(XLogBlockDdlParse* blockddlstate, uint32 blockddltype, char *mainData, int rels = 1, bool compress = false, uint32 main_data_len = 0); XLogRedoAction XLogCheckBlockDataRedoAction(XLogBlockDataParse* datadecode, RedoBufferInfo* bufferinfo); +extern void wal_rec_set_clean_up_info_state(WalCleanupInfoParse *parse_state, TransactionId removed_xid); void BtreeRedoDataBlock(XLogBlockHead* blockhead, XLogBlockDataParse* blockdatarec, RedoBufferInfo* bufferinfo); void Btree2RedoDataBlock(XLogBlockHead* blockhead, XLogBlockDataParse* blockdatarec, RedoBufferInfo* bufferinfo); diff --git a/src/include/knl/knl_guc/knl_instance_attr_storage.h b/src/include/knl/knl_guc/knl_instance_attr_storage.h index 89ff5a3af32ae10888971cf7c662bb2757e6c043..f32e80ddeed41852c42802eaf0924b3bc5e730b4 100755 --- a/src/include/knl/knl_guc/knl_instance_attr_storage.h +++ b/src/include/knl/knl_guc/knl_instance_attr_storage.h @@ -210,12 +210,11 @@ typedef struct knl_instance_attr_storage { int max_active_gtt; /* extreme-rto standby read */ - int64 max_standby_base_page_size; - int64 max_standby_lsn_info_size; int base_page_saved_interval; double standby_force_recycle_ratio; int standby_recycle_interval; int standby_max_query_time; + bool enable_exrto_standby_read_opt; #ifndef ENABLE_MULTIPLE_NODES bool enable_save_confirmed_lsn; #endif diff --git a/src/include/knl/knl_guc/knl_session_attr_storage.h b/src/include/knl/knl_guc/knl_session_attr_storage.h index af7a7e43030f258b4df5471e7c896a3aeb1ed5d0..75c0e34edab2048415f770040ae79e74f591a423 100755 --- a/src/include/knl/knl_guc/knl_session_attr_storage.h +++ b/src/include/knl/knl_guc/knl_session_attr_storage.h @@ -267,6 +267,10 @@ typedef struct knl_session_attr_storage { int ignore_standby_lsn_window; int ignore_feedback_xmin_window; int subscription_conflict_resolution; + + /* extreme-rto standby read */ + int64 max_standby_base_page_size; + int64 max_standby_lsn_info_size; } knl_session_attr_storage; #endif /* SRC_INCLUDE_KNL_KNL_SESSION_ATTR_STORAGE */ diff --git a/src/include/knl/knl_instance.h b/src/include/knl/knl_instance.h index 41fdcd7f1c1c3f6953ae01bfca874b292221b5bb..2aaa2f461a3061877cdcbd5064a727376255824e 100755 --- a/src/include/knl/knl_instance.h +++ b/src/include/knl/knl_instance.h @@ -756,11 +756,16 @@ typedef struct knl_g_parallel_redo_context { char* ali_buf; XLogRedoNumStatics xlogStatics[RM_NEXT_ID][MAX_XLOG_INFO_NUM]; RedoCpuBindControl redoCpuBindcontrl; - XLogRecPtr global_recycle_lsn; /* extreme-rto standby read */ HTAB **redoItemHash; /* used in ondemand extreme RTO */ + /* extreme-rto standby read */ + TransactionId exrto_recyle_xmin; + XLogRecPtr global_recycle_lsn; ExrtoSnapshot exrto_snapshot; + TimestampTz exrto_send_lsn_forworder_time; StandbyReadDelayDdlState standby_read_delay_ddl_stat; uint64 max_clog_pageno; + int *buffer_pin_wait_buf_ids; + int buffer_pin_wait_buf_len; } knl_g_parallel_redo_context; typedef struct knl_g_heartbeat_context { @@ -931,6 +936,7 @@ typedef struct knl_g_undo_context { pg_atomic_uint64 globalFrozenXid; /* Oldest transaction id which is having undo. */ pg_atomic_uint64 globalRecycleXid; + pg_atomic_uint64 hotStandbyRecycleXid; bool is_exrto_residual_undo_file_recycled; } knl_g_undo_context; diff --git a/src/include/knl/knl_thread.h b/src/include/knl/knl_thread.h index 6b1c7b11fc2300c37f70e7cde9ba21fa1f2031f7..b3d5cf3a97541819ea67a598b84b97398b9f5b70 100755 --- a/src/include/knl/knl_thread.h +++ b/src/include/knl/knl_thread.h @@ -1556,6 +1556,7 @@ typedef struct knl_t_undorecycler_context { /* Flags set by signal handlers */ volatile sig_atomic_t got_SIGHUP; volatile sig_atomic_t shutdown_requested; + bool is_recovery_in_progress; } knl_t_undorecycler_context; typedef struct knl_t_rollback_requests_context { diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index abe48258f369469f9176744fd0fc3e5c77eec225..a52ef32180363ad2a51e33bbef8bbd95e5a12dc2 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -136,6 +136,7 @@ extern const uint32 SLOW_SQL_VERSION_NUM; extern const uint32 INDEX_HINT_VERSION_NUM; extern const uint32 CREATE_TABLE_AS_VERSION_NUM; extern const uint32 GB18030_2022_VERSION_NUM; +extern const uint32 PARTITION_ACCESS_EXCLUSIVE_LOCK_UPGRADE_VERSION; extern void register_backend_version(uint32 backend_version); extern bool contain_backend_version(uint32 version_number); diff --git a/src/include/storage/lock/lock.h b/src/include/storage/lock/lock.h index a67445a4107904411d16ca35c157cd44bbeb0472..8bebfefaa59febb0ccbdfa9226bdb365093014f9 100644 --- a/src/include/storage/lock/lock.h +++ b/src/include/storage/lock/lock.h @@ -675,13 +675,19 @@ typedef struct xl_standby_lock { Oid relOid; } xl_standby_lock; +typedef struct XlStandbyLockNew { + TransactionId xid; /* xid of holder of ACCESS_EXCLUSIVE_LOCK */ + Oid dbOid; + Oid relOid; + uint32 seq; +} XlStandbyLockNew; + extern xl_standby_lock* GetRunningTransactionLocks(int* nlocks); extern const char* GetLockmodeName(LOCKMETHODID lockmethodid, LOCKMODE mode); extern void lock_twophase_recover(TransactionId xid, uint16 info, void* recdata, uint32 len); extern void lock_twophase_postcommit(TransactionId xid, uint16 info, void* recdata, uint32 len); extern void lock_twophase_postabort(TransactionId xid, uint16 info, void* recdata, uint32 len); -extern void lock_twophase_standby_recover(TransactionId xid, uint16 info, void* recdata, uint32 len); extern DeadLockState DeadLockCheck(PGPROC* proc); extern PGPROC* GetBlockingAutoVacuumPgproc(void); diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 915f3f791b16ae566da4fcfe080599162c677ef7..085861b3d4ba3ad72d399d59e20cbb5e5f6539bc 100755 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -566,17 +566,6 @@ void CancelBlockedRedistWorker(LOCK* lock, LOCKMODE lockmode); extern void BecomeLockGroupLeader(void); extern void BecomeLockGroupMember(PGPROC *leader); -static inline bool TransactionIdOlderThanAllUndo(TransactionId xid) -{ - uint64 cutoff = pg_atomic_read_u64(&g_instance.undo_cxt.globalRecycleXid); - return xid < cutoff; -} -static inline bool TransactionIdOlderThanFrozenXid(TransactionId xid) -{ - uint64 cutoff = pg_atomic_read_u64(&g_instance.undo_cxt.globalFrozenXid); - return xid < cutoff; -} - extern int GetThreadPoolStreamProcNum(void); #endif /* PROC_H */ diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h index 6f0b4aa9d35d991bd9137bf52ccd692adc275720..164018c894ac0470b6e6054a50368d8d07384194 100755 --- a/src/include/storage/procarray.h +++ b/src/include/storage/procarray.h @@ -116,8 +116,8 @@ extern VirtualTransactionId *GetConflictingVirtualXIDs(TransactionId limitXmin, CommitSeqNo limitXminCSN = InvalidCommitSeqNo, TransactionId* xminArray = NULL); extern ThreadId CancelVirtualTransaction(const VirtualTransactionId& vxid, ProcSignalReason sigmode); -extern bool proc_array_cancel_conflicting_proc(TransactionId latest_removed_xid, - bool reach_max_check_times); +extern bool proc_array_cancel_conflicting_proc( + TransactionId latest_removed_xid, XLogRecPtr truncate_redo_lsn, bool reach_max_check_times); extern bool MinimumActiveBackends(int min); extern int CountDBBackends(Oid database_oid); @@ -141,6 +141,7 @@ extern void ProcArrayGetReplicationSlotXmin(TransactionId* xmin, TransactionId* extern TransactionId GetGlobal2pcXmin(); extern void CSNLogRecordAssignedTransactionId(TransactionId newXid); +extern void UpdateCleanUpInfo(TransactionId limitXmin, XLogRecPtr lsn); /* * Fast search of ProcArray mapping (xid => proc array index), diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h index 840043bafefb942f8edb7bcbf7ed241f8a6e5c03..5de989ba8f1c75e7c0621f2a35ccd94a33fd5267 100755 --- a/src/include/storage/standby.h +++ b/src/include/storage/standby.h @@ -41,7 +41,7 @@ extern void CheckRecoveryConflictDeadlock(void); * to make hot standby work. That includes logging AccessExclusiveLocks taken * by transactions and running-xacts snapshots. */ -extern void StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid); +extern void StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid, uint32 seq); extern void StandbyReleaseLockTree(TransactionId xid, int nsubxids, TransactionId* subxids); extern void StandbyReleaseAllLocks(void); extern void StandbyReleaseOldLocks(TransactionId oldestRunningXid); @@ -60,12 +60,18 @@ extern bool standbyWillTouchStandbyLocks(XLogReaderState* record); #define XLOG_STANDBY_CSN_COMMITTING 0x40 #define XLOG_STANDBY_CSN_ABORTED 0x50 +#define PARTITION_ACCESS_EXCLUSIVE_LOCK_UPGRADE_FLAG 0x01 typedef struct xl_standby_locks { int nlocks; /* number of entries in locks array */ xl_standby_lock locks[FLEXIBLE_ARRAY_MEMBER]; /* VARIABLE LENGTH ARRAY */ } xl_standby_locks; +typedef struct XLogStandbyLocksNew { + int nlocks; /* number of entries in locks array */ + XlStandbyLockNew locks[FLEXIBLE_ARRAY_MEMBER]; /* VARIABLE LENGTH ARRAY */ +} XLogStandbyLocksNew; + /* * Keep track of all the locks owned by a given transaction. */ @@ -77,6 +83,7 @@ typedef struct RecoveryLockListsEntry #define MinSizeOfXactStandbyLocks offsetof(xl_standby_locks, locks) +#define MIN_SIZE_OF_XACT_STANDBY_LOCKS_NEW offsetof(XLogStandbyLocksNew, locks) /* * When we write running xact data to WAL, we use this structure. @@ -140,9 +147,8 @@ typedef struct RunningTransactionsData { typedef RunningTransactionsData* RunningTransactions; -extern void LogAccessExclusiveLock(Oid dbOid, Oid relOid); +extern void LogAccessExclusiveLock(Oid dbOid, Oid relOid, uint32 seq); extern void LogAccessExclusiveLockPrepare(void); -extern void LogReleaseAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid); extern XLogRecPtr LogStandbySnapshot(void); #endif /* STANDBY_H */ diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index 6f1b557067af25a5d09744552528b9f7da326c4f..ff38a9ca2c9f9069ca883731861d7a2deebb787a 100755 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -278,7 +278,7 @@ extern char* GetGucName(const char *command, char *target_guc_name); extern void BeginReportingGUCOptions(void); extern void ParseLongOption(const char* string, char** name, char** value); extern bool parse_int(const char* value, int* result, int flags, const char** hintmsg); -extern bool parse_int64(const char* value, int64* result, const char** hintmsg); +extern bool parse_int64(const char* value, int64* result, int flags, const char** hintmsg); extern bool parse_real(const char* value, double* result, int flags = 0, const char** hintmsg = NULL); double TimeUnitConvert(char** endptr, double value, int flags, const char** hintmsg); double MemoryUnitConvert(char** endptr, double value, int flags, const char** hintmsg); diff --git a/src/test/ha/standby_env.sh b/src/test/ha/standby_env.sh index 05743bfe769aab61d66ba9f3f19c494e2e969c5e..b526dad79f609fec078c48adf97ee4d1e4dcfe36 100644 --- a/src/test/ha/standby_env.sh +++ b/src/test/ha/standby_env.sh @@ -1,7 +1,7 @@ #!/bin/sh #some enviroment vars -export g_base_port=25632 +export g_base_port=8888 export prefix=${GAUSSHOME} export g_pooler_base_port=`expr $g_base_port \+ 410` export g_base_standby_port=`expr $g_base_port \+ 400` @@ -11,28 +11,13 @@ export LD_LIBRARY_PATH=$prefix/lib:$prefix/lib/libobs:$LD_LIBRARY_PATH export PATH="$prefix/bin":$PATH export g_data_path="$install_path/hadata" -eth0ip=`/sbin/ifconfig eth0|sed -n 2p|awk '{ print $2 }'` -eth1ip=`/sbin/ifconfig eth1|sed -n 2p|awk '{ print $2 }'` -ethens=`/sbin/ifconfig ens4f0|sed -n 2p |awk '{ print $2 }'` -enp2s0f0=`/sbin/ifconfig enp2s0f0|sed -n 2p |awk '{ print $2 }'` -enp2s0f1=`/sbin/ifconfig enp2s0f1|sed -n 2p |awk '{ print $2 }'` -enp125s0f0=`/sbin/ifconfig enp125s0f0|sed -n 2p |awk '{ print $2 }'` +eth0ip=`/sbin/ifconfig | grep 'inet ' | grep -v 127.0.0.1 | head -1 | awk '{ print $2 }'` if [ -n "$eth0ip" ]; then - export eth_local_ip=$eth0ip -elif [ -n "$eth1ip" ];then - export eth_local_ip=$eth1ip -elif [ -n "$ethens" ];then - export eth_local_ip=$eth1ip -elif [ -n "$enp2s0f0" ];then - export eth_local_ip=$enp2s0f0 -elif [ -n "$enp2s0f1" ];then - export eth_local_ip=$enp2s0f1 -elif [ -n "$enp125s0f0" ];then - export eth_local_ip=$enp125s0f0 + export eth_local_ip=$eth0ip else - echo "error eth0 and eth1 not configured,exit" - exit 1 + echo "error eth0 and eth1 not configured,exit" + exit 1 fi export g_local_ip="127.0.0.1" diff --git a/src/test/ha/testcase/exrtostandbyread/single_standby_read_base.sh b/src/test/ha/testcase/exrtostandbyread/single_standby_read_base.sh index 56fcd6bdec6b183d0433e195f4da2ca52e0b52a9..b39d8e6f6e756fd05482f46e6a3469d248e4dc6c 100644 --- a/src/test/ha/testcase/exrtostandbyread/single_standby_read_base.sh +++ b/src/test/ha/testcase/exrtostandbyread/single_standby_read_base.sh @@ -12,11 +12,29 @@ function check_select_result() fi } +function start_standby_read_cluster() +{ + start_primary_as_primary + start_standby +} + +function stop_standby_read_cluster() +{ + stop_primary + stop_standby +} + +function restart_standby_read_cluster() +{ + stop_standby_read_cluster + start_standby_read_cluster +} + function test_base_sql_func() { gsql -d test_standby_read_base -p $dn1_primary_port -c "DROP TABLE if exists test1; CREATE TABLE test1(contentId VARCHAR(128) NOT NULL, commentId VARCHAR(128) NOT NULL, appId VARCHAR(128) NOT NULL, PRIMARY KEY (contentId, commentId)) with(parallel_workers=8,storage_type=aSTORE);" gsql -d test_standby_read_base -p $dn1_primary_port -c "DROP TABLE if exists test2; CREATE TABLE test2(contentId VARCHAR(128) NOT NULL, commentId VARCHAR(128) NOT NULL, appId VARCHAR(128) NOT NULL, PRIMARY KEY (contentId, commentId)) with(storage_type=aSTORE,fillfactor=80) partition by hash(contentId);" - gsql -d test_standby_read_base -p $dn1_primary_port -c "DROP TABLE if exists test3; CREATE TABLE test3(contentId VARCHAR(128) NOT NULL, commentId VARCHAR(128) NOT NULL, appId VARCHAR(128) NOT NULL, PRIMARY KEY (contentId, commentId)) with(storage_type=uSTORE,fillfactor=40) partition by list(contentId) (partition p1 values ('1') ,partition p2 values ('2') ,partition p3 values ('3') ,partition p4 values (default));" + gsql -d test_standby_read_base -p $dn1_primary_port -c "DROP TABLE if exists test3; CREATE TABLE test3(contentId VARCHAR(128) NOT NULL, commentId VARCHAR(128) NOT NULL, appId VARCHAR(128) NOT NULL, PRIMARY KEY (contentId, commentId)) with(storage_type=aSTORE,fillfactor=40) partition by list(contentId) (partition p1 values ('1') ,partition p2 values ('2') ,partition p3 values ('3') ,partition p4 values (default));" gsql -d test_standby_read_base -p $dn1_primary_port -c "insert into test1 select generate_series(1,20), generate_series(1,20), generate_series(1,20);" gsql -d test_standby_read_base -p $dn1_primary_port -c "insert into test2 select generate_series(1,300), generate_series(1,300), generate_series(1,300);" @@ -77,7 +95,7 @@ function test_standby_read_base_func() gs_guc set -Z datanode -D $standby_data_dir -c "recovery_redo_workers = 1" gs_guc set -Z datanode -D $standby_data_dir -c "hot_standby = on" - start_cluster + start_standby_read_cluster echo "start cluster success" sleep 2 @@ -98,8 +116,7 @@ function test_standby_read_base_func() gs_guc set -Z datanode -D $primary_data_dir -c " recovery_max_workers = 4" gs_guc set -Z datanode -D $standby_data_dir -c " recovery_max_workers = 4" - kill_cluster - start_cluster + restart_standby_read_cluster test_base_sql_func @@ -113,8 +130,7 @@ function test_standby_read_base_func() gs_guc set -Z datanode -D $primary_data_dir -c " recovery_redo_workers = 4" gs_guc set -Z datanode -D $standby_data_dir -c " recovery_redo_workers = 4" - kill_cluster - start_cluster + restart_standby_read_cluster test_base_sql_func }