diff --git a/src/gausskernel/storage/access/transam/ondemand_extreme_rto/page_redo.cpp b/src/gausskernel/storage/access/transam/ondemand_extreme_rto/page_redo.cpp index 4456afbd5d235197713aee29dc4990f3b2891cdf..56ea53b33825ac65fae25b621f87f754a1c56429 100644 --- a/src/gausskernel/storage/access/transam/ondemand_extreme_rto/page_redo.cpp +++ b/src/gausskernel/storage/access/transam/ondemand_extreme_rto/page_redo.cpp @@ -1582,6 +1582,100 @@ void RedoPageWorkerRedoBcmBlock(XLogRecParseState *procState) } } +// if holdLock is false, only lock if need redo; otherwise, lock anyway +bool checkBlockRedoDoneFromHashMapAndLock(LWLock **lock, RedoItemTag redoItemTag, RedoItemHashEntry **redoItemEntry, + bool holdLock) +{ + bool hashFound = false; + uint32 id = GetSlotId(redoItemTag.rNode, 0, 0, GetBatchCount()); + HTAB *hashMap = g_instance.comm_cxt.predo_cxt.redoItemHash[id]; + if (hashMap == NULL) { + ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("redo item hash table corrupted, there has invalid hashtable."))); + } + + unsigned int new_hash = XlogTrackTableHashCode(&redoItemTag); + *lock = XlogTrackMappingPartitionLock(new_hash); + (void)LWLockAcquire(*lock, LW_SHARED); + RedoItemHashEntry *entry = (RedoItemHashEntry *)hash_search(hashMap, (void *)&redoItemTag, HASH_FIND, &hashFound); + + /* Page is already up-to-date, no need to replay. */ + if (!hashFound || entry->redoItemNum == 0 || entry->redoDone) { + if (!holdLock) { + LWLockRelease(*lock); + *lock = NULL; + } + return true; + } + + // switch to exclusive lock in replay + LWLockRelease(*lock); + (void)LWLockAcquire(*lock, LW_EXCLUSIVE); + + // check again + if (entry->redoItemNum == 0 || entry->redoDone) { + if (!holdLock) { + LWLockRelease(*lock); + *lock = NULL; + } + return true; + } + + if (redoItemEntry != NULL) { + *redoItemEntry = entry; + } + return false; +} + +static inline void XLogRecGetRedoItemTag(XLogRecParseState *redoblockstate, RedoItemTag *redoItemTag) +{ + XLogBlockParse *blockparse = &(redoblockstate->blockparse); + + redoItemTag->rNode.dbNode = blockparse->blockhead.dbNode; + redoItemTag->rNode.relNode = blockparse->blockhead.relNode; + redoItemTag->rNode.spcNode = blockparse->blockhead.spcNode; + redoItemTag->rNode.bucketNode = blockparse->blockhead.bucketNode; + redoItemTag->rNode.opt = blockparse->blockhead.opt; + + redoItemTag->forkNum = blockparse->blockhead.forknum; + redoItemTag->blockNum = blockparse->blockhead.blkno; +} + +static inline bool IsXLogRecSameRedoBlock(XLogRecParseState *redoblockstate1, XLogRecParseState *redoblockstate2) +{ + RedoItemTag redoItemTag1; + RedoItemTag redoItemTag2; + + if (redoblockstate1 == NULL || redoblockstate2 == NULL) { + return false; + } + + XLogRecGetRedoItemTag(redoblockstate1, &redoItemTag1); + XLogRecGetRedoItemTag(redoblockstate2, &redoItemTag2); + + if (memcmp(&redoItemTag1, &redoItemTag2, sizeof(RedoItemTag)) != 0) { + return false; + } + return true; +} + +static inline bool IsProcInHashMap(XLogRecParseState *procState) +{ + bool result = false; + switch (XLogBlockHeadGetValidInfo(&procState->blockparse.blockhead)) { + case BLOCK_DATA_MAIN_DATA_TYPE: + case BLOCK_DATA_UNDO_TYPE: + case BLOCK_DATA_VM_TYPE: + case BLOCK_DATA_FSM_TYPE: + result = true; + break; + default: + break; + } + + return result; +} + void RedoPageWorkerMain() { (void)RegisterRedoInterruptCallBack(HandlePageRedoInterrupts); @@ -1592,6 +1686,7 @@ void RedoPageWorkerMain() } XLogRecParseState *redoblockstateHead = NULL; + LWLock *xlog_partition_lock = NULL; GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_1]); while ((redoblockstateHead = (XLogRecParseState *)SPSCBlockingQueueTop(g_redoWorker->queue)) != (XLogRecParseState *)&g_redoEndMark) { @@ -1623,6 +1718,7 @@ void RedoPageWorkerMain() bool notfound = false; bool updateFsm = false; bool needRelease = true; + bool redoDone = false; XLogRecParseState *procState = redoblockstateHead; XLogRecParseState *reloadBlockState = NULL; @@ -1634,6 +1730,21 @@ void RedoPageWorkerMain() // nextrecord will be redo in backwards position procState = (procState->distributeStatus == XLOG_TAIL_DISTRIBUTE) ? NULL : (XLogRecParseState *)procState->nextrecord; + if (xlog_partition_lock == NULL && SS_ONDEMAND_BUILD_DONE && IsProcInHashMap(redoblockstate)) { + RedoItemTag redoItemTag; + XLogRecGetRedoItemTag(redoblockstate, &redoItemTag); + redoDone = checkBlockRedoDoneFromHashMapAndLock(&xlog_partition_lock, redoItemTag, NULL, true); + } + + if (redoDone) { + Assert(xlog_partition_lock != NULL); + needRelease = false; + DereferenceRecParseState(redoblockstate); + SetCompletedReadEndPtr(g_redoWorker, redoblockstate->blockparse.blockhead.start_ptr, + redoblockstate->blockparse.blockhead.end_ptr); + goto redo_done; + } + switch (XLogBlockHeadGetValidInfo(&redoblockstate->blockparse.blockhead)) { case BLOCK_DATA_MAIN_DATA_TYPE: case BLOCK_DATA_UNDO_TYPE: @@ -1699,6 +1810,13 @@ void RedoPageWorkerMain() default: break; } + +redo_done: + if (xlog_partition_lock != NULL && !IsXLogRecSameRedoBlock(redoblockstate, procState)) { + LWLockRelease(xlog_partition_lock); + xlog_partition_lock = NULL; + redoDone = false; + } } (void)MemoryContextSwitchTo(oldCtx); GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_7]); diff --git a/src/gausskernel/storage/access/transam/ondemand_extreme_rto/redo_utils.cpp b/src/gausskernel/storage/access/transam/ondemand_extreme_rto/redo_utils.cpp index c93e275f06911188189eddef4d37392a8314c437..c4b9bb5cd2dfd8324cf7579be63e892bf87ec069 100644 --- a/src/gausskernel/storage/access/transam/ondemand_extreme_rto/redo_utils.cpp +++ b/src/gausskernel/storage/access/transam/ondemand_extreme_rto/redo_utils.cpp @@ -23,6 +23,7 @@ #include "access/xlogproc.h" #include "access/ondemand_extreme_rto/batch_redo.h" +#include "access/ondemand_extreme_rto/page_redo.h" #include "access/ondemand_extreme_rto/dispatcher.h" #include "access/ondemand_extreme_rto/redo_utils.h" #include "access/ondemand_extreme_rto/xlog_read.h" @@ -243,10 +244,8 @@ void OndemandXLogParseBufferRelease(XLogRecParseState *recordstate) BufferDesc *RedoForOndemandExtremeRTOQuery(BufferDesc *bufHdr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode) { - bool hashFound = false; bool needMarkDirty = false; - unsigned int new_hash; - LWLock *xlog_partition_lock; + LWLock *xlog_partition_lock = NULL; Buffer buf = BufferDescriptorGetBuffer(bufHdr); ondemand_extreme_rto::RedoItemHashEntry *redoItemEntry = NULL; ondemand_extreme_rto::RedoItemTag redoItemTag; @@ -259,27 +258,11 @@ BufferDesc *RedoForOndemandExtremeRTOQuery(BufferDesc *bufHdr, char relpersisten INIT_REDO_ITEM_TAG(redoItemTag, bufHdr->tag.rnode, forkNum, blockNum); - uint32 id = ondemand_extreme_rto::GetSlotId(bufHdr->tag.rnode, 0, 0, ondemand_extreme_rto::GetBatchCount()); - HTAB *hashMap = g_instance.comm_cxt.predo_cxt.redoItemHash[id]; - if (hashMap == NULL) { - ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("redo item hash table corrupted, there has invalid hashtable."))); - } - - new_hash = ondemand_extreme_rto::XlogTrackTableHashCode(&redoItemTag); - xlog_partition_lock = XlogTrackMappingPartitionLock(new_hash); - (void)LWLockAcquire(xlog_partition_lock, LW_SHARED); - redoItemEntry = (ondemand_extreme_rto::RedoItemHashEntry *)hash_search(hashMap, (void *)&redoItemTag, HASH_FIND, &hashFound); - - /* Page is already up-to-date, no need to replay. */ - if (!hashFound || redoItemEntry->redoItemNum == 0 || redoItemEntry->redoDone) { - LWLockRelease(xlog_partition_lock); + if (checkBlockRedoDoneFromHashMapAndLock(&xlog_partition_lock, redoItemTag, &redoItemEntry, false)) { return bufHdr; } - // switch to exclusive lock in replay - LWLockRelease(xlog_partition_lock); - (void)LWLockAcquire(xlog_partition_lock, LW_EXCLUSIVE); + Assert(xlog_partition_lock != NULL); rc = memset_s(&bufferInfo, sizeof(bufferInfo), 0, sizeof(bufferInfo)); securec_check(rc, "\0", "\0"); diff --git a/src/gausskernel/storage/access/transam/xlog.cpp b/src/gausskernel/storage/access/transam/xlog.cpp index 3a85240b58eb8502e359921f9d7f93b95124b018..b5720651cc12c3c6b41ce0b34f410f47e6f899de 100755 --- a/src/gausskernel/storage/access/transam/xlog.cpp +++ b/src/gausskernel/storage/access/transam/xlog.cpp @@ -9160,6 +9160,13 @@ void StartupXLOG(void) SSCLOGShmemClear(); SSMultiXactShmemClear(); } + ereport(LOG, (errmsg("[SS] Recovery instance %d, my instance %d, checkpoint record loc %X/%X, redo loc %X/%X", + g_instance.dms_cxt.SSRecoveryInfo.recovery_inst_id, + g_instance.attr.attr_storage.dms_attr.instance_id, + (uint32)(t_thrd.shemem_ptr_cxt.ControlFile->checkPoint >> 32), + (uint32)t_thrd.shemem_ptr_cxt.ControlFile->checkPoint, + (uint32)(t_thrd.shemem_ptr_cxt.ControlFile->checkPointCopy.redo >> 32), + (uint32)t_thrd.shemem_ptr_cxt.ControlFile->checkPointCopy.redo))); } else { xlogreader = XLogReaderAllocate(&XLogPageRead, &readprivate); } @@ -9611,6 +9618,8 @@ void StartupXLOG(void) t_thrd.xlog_cxt.InRecovery = false; } + g_instance.dms_cxt.SSRecoveryInfo.in_ondemand_recovery = false; + SetExtremeRtoMode(); if (SS_PRIMARY_MODE && ENABLE_ONDEMAND_RECOVERY && (SS_STANDBY_FAILOVER || SS_PRIMARY_NORMAL_REFORM) && t_thrd.xlog_cxt.InRecovery == true) { if (SSOndemandRecoveryExitNormal) { @@ -9622,8 +9631,6 @@ void StartupXLOG(void) SetOndemandExtremeRtoMode(); ereport(LOG, (errmsg("[On-demand] replayed in extreme rto ondemand recovery mode"))); } else { - g_instance.dms_cxt.SSRecoveryInfo.in_ondemand_recovery = false; - SetExtremeRtoMode(); ereport(LOG, (errmsg("[On-demand] do not allow replay in ondemand recovery if last ondemand recovery " "crash, replayed in extreme rto recovery mode"))); } diff --git a/src/include/access/ondemand_extreme_rto/page_redo.h b/src/include/access/ondemand_extreme_rto/page_redo.h index 285d66c2aecee8941ba9b4124fe582f366ae83ad..1615a36834de8c0ad6a078413fbebea48f748440 100644 --- a/src/include/access/ondemand_extreme_rto/page_redo.h +++ b/src/include/access/ondemand_extreme_rto/page_redo.h @@ -30,6 +30,7 @@ #include "knl/knl_variable.h" #include "access/ondemand_extreme_rto/redo_item.h" +#include "access/ondemand_extreme_rto/batch_redo.h" #include "nodes/pg_list.h" #include "storage/proc.h" @@ -247,6 +248,8 @@ void BatchClearRecoveryThreadHashTbl(Oid spcNode, Oid dbNode); void RecordBadBlockAndPushToRemote(XLogBlockDataParse *datadecode, PageErrorType error_type, XLogRecPtr old_lsn, XLogPhyBlock pblk); const char *RedoWokerRole2Str(RedoRole role); +bool checkBlockRedoDoneFromHashMapAndLock(LWLock **lock, RedoItemTag redoItemTag, RedoItemHashEntry **redoItemEntry, + bool holdLock); } // namespace ondemand_extreme_rto #endif