From 88d8d0734678846c98b79149324ce9ad0c9be6ad Mon Sep 17 00:00:00 2001 From: chendong76 <1209756284@qq.com> Date: Tue, 4 Jul 2023 15:13:40 +0800 Subject: [PATCH] =?UTF-8?q?=E8=A7=A3=E5=86=B3=E5=A4=9A=E6=AC=A1=E8=BF=9B?= =?UTF-8?q?=E5=85=A5=E6=8C=89=E9=9C=80=E5=9B=9E=E6=94=BE=E6=97=B6=EF=BC=8C?= =?UTF-8?q?=E5=9B=9E=E6=94=BE=E6=97=A5=E5=BF=97=E9=80=89=E6=8B=A9=E5=87=BA?= =?UTF-8?q?=E9=94=99=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ddes/adapter/ss_reform_common.cpp | 6 +++--- .../process/threadpool/knl_instance.cpp | 1 + .../access/transam/extreme_rto_redo_api.cpp | 5 +++++ .../access/transam/multi_redo_settings.cpp | 2 -- .../storage/access/transam/xlog.cpp | 18 +++++++++++------- .../xlog_share_storage/xlog_share_storage.cpp | 2 +- src/include/access/extreme_rto_redo_api.h | 1 + src/include/access/xlog_basic.h | 2 +- src/include/ddes/dms/ss_common_attr.h | 3 +++ src/include/ddes/dms/ss_dms_recovery.h | 3 ++- 10 files changed, 28 insertions(+), 15 deletions(-) diff --git a/src/gausskernel/ddes/adapter/ss_reform_common.cpp b/src/gausskernel/ddes/adapter/ss_reform_common.cpp index d298f3627f..1d64bb6618 100644 --- a/src/gausskernel/ddes/adapter/ss_reform_common.cpp +++ b/src/gausskernel/ddes/adapter/ss_reform_common.cpp @@ -187,8 +187,8 @@ void SSGetRecoveryXlogPath() errno_t rc = EOK; char *dssdir = g_instance.attr.attr_storage.dss_attr.ss_dss_vg_name; - rc = snprintf_s(g_instance.dms_cxt.SSRecoveryInfo.recovery_xlogDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_xlog%d", - dssdir, g_instance.dms_cxt.SSReformerControl.recoveryInstId); + rc = snprintf_s(g_instance.dms_cxt.SSRecoveryInfo.recovery_xlog_dir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_xlog%d", + dssdir, g_instance.dms_cxt.SSRecoveryInfo.recovery_inst_id); securec_check_ss(rc, "", ""); } @@ -282,7 +282,7 @@ loop: } // new params set to initial value - new_ctrl->version = REFORM_CTRL_VERSION;; + new_ctrl->version = REFORM_CTRL_VERSION; new_ctrl->recoveryInstId = INVALID_INSTANCEID; new_ctrl->clusterStatus = CLUSTER_NORMAL; diff --git a/src/gausskernel/process/threadpool/knl_instance.cpp b/src/gausskernel/process/threadpool/knl_instance.cpp index a51f9e3c8b..a3b02e86be 100755 --- a/src/gausskernel/process/threadpool/knl_instance.cpp +++ b/src/gausskernel/process/threadpool/knl_instance.cpp @@ -180,6 +180,7 @@ static void knl_g_dms_init(knl_g_dms_context *dms_cxt) dms_cxt->SSReformInfo.in_reform = false; dms_cxt->SSReformInfo.dms_role = DMS_ROLE_UNKNOW; dms_cxt->SSClusterState = NODESTATE_NORMAL; + dms_cxt->SSRecoveryInfo.recovery_inst_id = INVALID_INSTANCEID; dms_cxt->SSRecoveryInfo.recovery_pause_flag = true; dms_cxt->SSRecoveryInfo.failover_ckpt_status = NOT_ACTIVE; dms_cxt->SSRecoveryInfo.new_primary_reset_walbuf_flag = false; diff --git a/src/gausskernel/storage/access/transam/extreme_rto_redo_api.cpp b/src/gausskernel/storage/access/transam/extreme_rto_redo_api.cpp index 12deca658d..65e449757d 100644 --- a/src/gausskernel/storage/access/transam/extreme_rto_redo_api.cpp +++ b/src/gausskernel/storage/access/transam/extreme_rto_redo_api.cpp @@ -141,6 +141,11 @@ static const f_extreme_rto_redo extreme_rto_redosw[] = { }, }; +void SetOndemandExtremeRtoMode() +{ + g_extreme_rto_type = ONDEMAND_EXTREME_RTO; +} + void ExtremeWaitAllReplayWorkerIdle() { (*(extreme_rto_redosw[g_extreme_rto_type].wait_all_replay_worker_idle))(); diff --git a/src/gausskernel/storage/access/transam/multi_redo_settings.cpp b/src/gausskernel/storage/access/transam/multi_redo_settings.cpp index 8cb1971054..6ed92595e3 100644 --- a/src/gausskernel/storage/access/transam/multi_redo_settings.cpp +++ b/src/gausskernel/storage/access/transam/multi_redo_settings.cpp @@ -44,8 +44,6 @@ void ConfigRecoveryParallelism() if (g_instance.attr.attr_storage.recovery_parse_workers > 1) { g_instance.comm_cxt.predo_cxt.redoType = EXTREME_REDO; - g_extreme_rto_type = g_instance.attr.attr_storage.dms_attr.enable_ondemand_recovery ? - ONDEMAND_EXTREME_RTO : DEFAULT_EXTREME_RTO; g_instance.attr.attr_storage.batch_redo_num = g_instance.attr.attr_storage.recovery_parse_workers; uint32 total_recovery_parallelism = g_instance.attr.attr_storage.batch_redo_num * 2 + g_instance.attr.attr_storage.recovery_redo_workers_per_paser_worker * diff --git a/src/gausskernel/storage/access/transam/xlog.cpp b/src/gausskernel/storage/access/transam/xlog.cpp index 1d34417abe..15073cc7a8 100755 --- a/src/gausskernel/storage/access/transam/xlog.cpp +++ b/src/gausskernel/storage/access/transam/xlog.cpp @@ -8804,7 +8804,8 @@ void StartupXLOG(void) */ if (ENABLE_DMS && ENABLE_DSS) { int src_id = INVALID_INSTANCEID; - if (SS_CLUSTER_ONDEMAND_RECOVERY && SS_PRIMARY_MODE) { + SSReadControlFile(REFORM_CTRL_PAGE); + if ((SS_CLUSTER_ONDEMAND_BUILD || SS_CLUSTER_ONDEMAND_RECOVERY) && SS_PRIMARY_MODE) { if (SS_STANDBY_PROMOTING) { ereport(FATAL, (errmsg("Do not allow switchover if on-demand recovery is not finish"))); } @@ -8821,9 +8822,8 @@ void StartupXLOG(void) } else { src_id = g_instance.attr.attr_storage.dms_attr.instance_id; } - g_instance.dms_cxt.SSReformerControl.recoveryInstId = src_id; - SSSaveReformerCtrl(); } + g_instance.dms_cxt.SSRecoveryInfo.recovery_inst_id = src_id; SSReadControlFile(src_id); } else { ReadControlFile(); @@ -9480,10 +9480,14 @@ void StartupXLOG(void) } if (SS_PRIMARY_MODE) { - if (ENABLE_ONDEMAND_RECOVERY && t_thrd.xlog_cxt.InRecovery == true) { + if (ENABLE_ONDEMAND_RECOVERY && (SS_STANDBY_FAILOVER || SS_PRIMARY_NORMAL_REFORM) && + t_thrd.xlog_cxt.InRecovery == true) { g_instance.dms_cxt.SSRecoveryInfo.in_ondemand_recovery = true; - /* for other nodes in cluster */ + /* for other nodes in cluster and ondeamnd recovery failed */ g_instance.dms_cxt.SSReformerControl.clusterStatus = CLUSTER_IN_ONDEMAND_BUILD; + g_instance.dms_cxt.SSReformerControl.recoveryInstId = g_instance.dms_cxt.SSRecoveryInfo.recovery_inst_id; + SetOndemandExtremeRtoMode(); + ereport(LOG, (errmsg("[On-demand] replayed in extreme rto ondemand recovery mode"))); } else { g_instance.dms_cxt.SSReformerControl.clusterStatus = CLUSTER_NORMAL; } @@ -11891,7 +11895,7 @@ void CreateCheckPoint(int flags) pg_time_t now = (pg_time_t)time(NULL); int elapsed_secs = now - t_thrd.checkpoint_cxt.last_truncate_log_time; - if (!RecoveryInProgress() && + if (!RecoveryInProgress() && !SS_IN_ONDEMAND_RECOVERY && (GTM_FREE_MODE || TransactionIdIsNormal(t_thrd.xact_cxt.ShmemVariableCache->recentGlobalXmin))) { /* * Reduce the frequency of trucate CSN log to avoid the probability of lock contention @@ -19489,6 +19493,6 @@ int SSXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int re XLogRecPtr targetRecPtr, char *readBuf, TimeLineID *readTLI, char* xlog_path) { int read_len = SSReadXLog(xlogreader, targetPagePtr, reqLen, targetRecPtr, - readBuf, readTLI, g_instance.dms_cxt.SSRecoveryInfo.recovery_xlogDir); + readBuf, readTLI, g_instance.dms_cxt.SSRecoveryInfo.recovery_xlog_dir); return read_len; } diff --git a/src/gausskernel/storage/xlog_share_storage/xlog_share_storage.cpp b/src/gausskernel/storage/xlog_share_storage/xlog_share_storage.cpp index 7072acf143..99fc88d4ab 100644 --- a/src/gausskernel/storage/xlog_share_storage/xlog_share_storage.cpp +++ b/src/gausskernel/storage/xlog_share_storage/xlog_share_storage.cpp @@ -142,7 +142,7 @@ void LocalXLogRead(char *buf, XLogRecPtr startptr, Size count) if (SS_STANDBY_FAILOVER && SS_PRIMARY_CLUSTER_STANDBY) { int nRet; nRet = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s/%08X%08X%08X", - g_instance.dms_cxt.SSRecoveryInfo.recovery_xlogDir, t_thrd.xlog_cxt.ThisTimeLineID, + g_instance.dms_cxt.SSRecoveryInfo.recovery_xlog_dir, t_thrd.xlog_cxt.ThisTimeLineID, (uint32)((t_thrd.sharestoragexlogcopyer_cxt.readSegNo) / XLogSegmentsPerXLogId), (uint32)((t_thrd.sharestoragexlogcopyer_cxt.readSegNo) % XLogSegmentsPerXLogId)); securec_check_ss(nRet, "\0", "\0"); diff --git a/src/include/access/extreme_rto_redo_api.h b/src/include/access/extreme_rto_redo_api.h index 09f994d512..652d13c15c 100644 --- a/src/include/access/extreme_rto_redo_api.h +++ b/src/include/access/extreme_rto_redo_api.h @@ -36,6 +36,7 @@ typedef enum { extern ExtremeRtoRedoType g_extreme_rto_type; +void SetOndemandExtremeRtoMode(); void ExtremeWaitAllReplayWorkerIdle(); void ExtremeDispatchCleanInvalidPageMarkToAllRedoWorker(RepairFileKey key); void ExtremeDispatchClosefdMarkToAllRedoWorker(); diff --git a/src/include/access/xlog_basic.h b/src/include/access/xlog_basic.h index e03b585bdc..18b5b9ed00 100644 --- a/src/include/access/xlog_basic.h +++ b/src/include/access/xlog_basic.h @@ -99,7 +99,7 @@ * The XLog directory and control file (relative to $PGDATA) */ #define SS_XLOGDIR (g_instance.datadir_cxt.xlogDir) -#define SS_XLOGRECOVERYDIR (g_instance.dms_cxt.SSRecoveryInfo.recovery_xlogDir) +#define SS_XLOGRECOVERYDIR (g_instance.dms_cxt.SSRecoveryInfo.recovery_xlog_dir) #define XLOGDIR "pg_xlog" #define ARCHIVEDIR "pg_xlog/archive_status" #define XLOG_CONTROL_FILE (g_instance.datadir_cxt.controlPath) diff --git a/src/include/ddes/dms/ss_common_attr.h b/src/include/ddes/dms/ss_common_attr.h index a081a3a92c..9cd0bc02df 100644 --- a/src/include/ddes/dms/ss_common_attr.h +++ b/src/include/ddes/dms/ss_common_attr.h @@ -71,6 +71,9 @@ #define SS_STANDBY_FAILOVER (g_instance.dms_cxt.SSClusterState == NODESTATE_STANDBY_FAILOVER_PROMOTING) +#define SS_PRIMARY_NORMAL_REFORM \ + (SS_REFORM_REFORMER && (g_instance.dms_cxt.SSReformInfo.reform_type == DMS_REFORM_TYPE_FOR_NORMAL_OPENGAUSS)) + #define SS_PERFORMING_SWITCHOVER \ (ENABLE_DMS && (g_instance.dms_cxt.SSClusterState > NODESTATE_NORMAL && \ g_instance.dms_cxt.SSClusterState != NODESTATE_STANDBY_FAILOVER_PROMOTING)) diff --git a/src/include/ddes/dms/ss_dms_recovery.h b/src/include/ddes/dms/ss_dms_recovery.h index 2d29673908..98d210b17d 100644 --- a/src/include/ddes/dms/ss_dms_recovery.h +++ b/src/include/ddes/dms/ss_dms_recovery.h @@ -73,7 +73,8 @@ typedef enum st_failover_ckpt_status { typedef struct ss_recovery_info { bool recovery_pause_flag; volatile failover_ckpt_status_t failover_ckpt_status; - char recovery_xlogDir[MAXPGPATH]; + char recovery_xlog_dir[MAXPGPATH]; + int recovery_inst_id; LWLock* update_seg_lock; bool new_primary_reset_walbuf_flag; bool ready_to_startup; // when DB start (except failover), the flag will set true -- Gitee