diff --git a/src/gausskernel/ddes/adapter/ss_dms_callback.cpp b/src/gausskernel/ddes/adapter/ss_dms_callback.cpp index d704adbd47ec5c080b2352e93a717dd5131973e0..8e54e5a807f36fc3d3762344f5afd98ec32871a7 100644 --- a/src/gausskernel/ddes/adapter/ss_dms_callback.cpp +++ b/src/gausskernel/ddes/adapter/ss_dms_callback.cpp @@ -1477,39 +1477,24 @@ static int CBFlushCopy(void *db_handle, char *pageid) return GS_SUCCESS; } -static int CBFailoverPromote(void *db_handle) +static void SSFailoverPromoteNotify() { - g_instance.dms_cxt.SSRecoveryInfo.no_backend_left = false; - SSTriggerFailover(); - /** - * for alive failover: wait for backend threads to exit, at most 30s - * why wait code write this - * step 1, sned PMSIGNAL_DMS_TRIGGERFAILOVER to tell thread to exit - * step 2, PM detected backend exit - * step 3, reform proc wait - */ - if (!g_instance.dms_cxt.SSRecoveryInfo.startup_reform) { - long max_wait_time = 30000000L; - long wait_time = 0; - while (true) { - if (g_instance.dms_cxt.SSRecoveryInfo.no_backend_left) { - ereport(LOG, (errmodule(MOD_DMS), errmsg("[SS failover] backends exit successfully"))); - break; - } - if (wait_time > max_wait_time) { - ereport(WARNING, (errmodule(MOD_DMS), errmsg("[SS failover] failover failed, backends can not exit"))); - _exit(0); - } - pg_usleep(REFORM_WAIT_TIME); - wait_time += REFORM_WAIT_TIME; - } - - SSClearSegCache(); + if (g_instance.dms_cxt.SSRecoveryInfo.startup_reform) { + g_instance.dms_cxt.SSRecoveryInfo.restart_failover_flag = true; + ereport(LOG, (errmodule(MOD_DMS), errmsg("[SS failover] do failover when DB restart."))); + } else { SendPostmasterSignal(PMSIGNAL_DMS_FAILOVER_STARTUP); + ereport(LOG, (errmodule(MOD_DMS), errmsg("[SS failover] do failover when DB alive"))); } +} + +static int CBFailoverPromote(void *db_handle) +{ + SSClearSegCache(); + SSFailoverPromoteNotify(); while (true) { - if (SSFAILOVER_TRIGGER && g_instance.pid_cxt.StartupPID != 0) { + if (SS_STANDBY_FAILOVER && g_instance.pid_cxt.StartupPID != 0) { ereport(LOG, (errmodule(MOD_DMS), errmsg("startup thread success."))); return GS_SUCCESS; } @@ -1551,21 +1536,102 @@ static void CBReformSetDmsRole(void *db_handle, unsigned int reformer_id) SS_MY_INST_ID, reform_info->dms_role))); } +static void ReformCleanBackends() +{ + /* cluster has no transactions during startup reform */ + if (!g_instance.dms_cxt.SSRecoveryInfo.startup_reform) { + SendPostmasterSignal(PMSIGNAL_DMS_REFORM); + } + + while (true) { + if (dms_reform_failed()) { + ereport(WARNING, (errmodule(MOD_DMS), errmsg("[SS reform]reform failed during caneling backends"))); + return; + } + if (g_instance.dms_cxt.SSRecoveryInfo.reform_ready || g_instance.dms_cxt.SSRecoveryInfo.startup_reform) { + ereport(LOG, (errmodule(MOD_DMS), errmsg("[SS reform]reform ready, backends have been terminated"))); + return; + } + pg_usleep(REFORM_WAIT_TIME); + } +} + +static void AliveFailoverCleanBackends() +{ + if (g_instance.dms_cxt.SSRecoveryInfo.startup_reform) { + return; + } + + /** + * for alive failover: wait for backend threads to exit, at most 30s + * why wait code write this + * step 1, sned signal to tell thread to exit + * step 2, PM detected backend exit + * step 3, reform proc wait + */ + g_instance.dms_cxt.SSRecoveryInfo.no_backend_left = false; + SendPostmasterSignal(PMSIGNAL_DMS_FAILOVER_TERM_BACKENDS); + long max_wait_time = 30000000L; + long wait_time = 0; + while (true) { + if (g_instance.dms_cxt.SSRecoveryInfo.no_backend_left) { + ereport(LOG, (errmodule(MOD_DMS), errmsg("[SS failover] backends exit successfully"))); + break; + } + if (wait_time > max_wait_time) { + ereport(WARNING, (errmodule(MOD_DMS), errmsg("[SS failover] failover failed, backends can not exit"))); + _exit(0); + } + + if (dms_reform_failed()) { + ereport(WARNING, (errmodule(MOD_DMS), errmsg("[SS failover] reform failed during clean backends"))); + return; + } + + pg_usleep(REFORM_WAIT_TIME); + wait_time += REFORM_WAIT_TIME; + } +} + +static int reform_type_str_len = 30; +static void ReformTypeToString(SSReformType reform_type, char* ret_str) +{ + switch (reform_type) + { + case DMS_REFORM_TYPE_FOR_NORMAL_OPENGAUSS: + strcpy_s(ret_str, reform_type_str_len, "normal reform"); + break; + case DMS_REFORM_TYPE_FOR_FAILOVER_OPENGAUSS: + strcpy_s(ret_str, reform_type_str_len, "failover reform"); + break; + case DMS_REFORM_TYPE_FOR_SWITCHOVER_OPENGAUSS: + strcpy_s(ret_str, reform_type_str_len, "switchover reform"); + break; + case DMS_REFORM_TYPE_FOR_FULL_CLEAN: + strcpy_s(ret_str, reform_type_str_len, "full clean reform"); + break; + default: + strcpy_s(ret_str, reform_type_str_len, "unknown"); + break; + } + return; +} + static void CBReformStartNotify(void *db_handle, dms_role_t role, unsigned char reform_type) { - SSReformType ss_reform_type = (SSReformType)reform_type; ss_reform_info_t *reform_info = &g_instance.dms_cxt.SSReformInfo; + reform_info->reform_type = (SSReformType)reform_type; g_instance.dms_cxt.SSClusterState = NODESTATE_NORMAL; g_instance.dms_cxt.SSRecoveryInfo.reform_ready = false; g_instance.dms_cxt.SSRecoveryInfo.in_flushcopy = false; g_instance.dms_cxt.SSRecoveryInfo.startup_need_exit_normally = false; g_instance.dms_cxt.resetSyscache = true; - if (ss_reform_type == DMS_REFORM_TYPE_FOR_FAILOVER_OPENGAUSS) { + if (reform_info->reform_type == DMS_REFORM_TYPE_FOR_FAILOVER_OPENGAUSS) { g_instance.dms_cxt.SSRecoveryInfo.in_failover = true; g_instance.dms_cxt.SSRecoveryInfo.recovery_pause_flag = true; if (role == DMS_ROLE_REFORMER) { g_instance.dms_cxt.dw_init = false; - // variable set order: SharedRecoveryInProgress -> failover_triggered -> dms_role + // variable set order: SharedRecoveryInProgress -> failover_ckpt_status -> dms_role volatile XLogCtlData *xlogctl = t_thrd.shemem_ptr_cxt.XLogCtl; SpinLockAcquire(&xlogctl->info_lck); xlogctl->IsRecoveryDone = false; @@ -1573,7 +1639,7 @@ static void CBReformStartNotify(void *db_handle, dms_role_t role, unsigned char SpinLockRelease(&xlogctl->info_lck); t_thrd.shemem_ptr_cxt.ControlFile->state = DB_IN_CRASH_RECOVERY; pg_memory_barrier(); - g_instance.dms_cxt.SSRecoveryInfo.failover_triggered = true; + g_instance.dms_cxt.SSRecoveryInfo.failover_ckpt_status = NOT_ALLOW_CKPT; g_instance.dms_cxt.SSClusterState = NODESTATE_STANDBY_FAILOVER_PROMOTING; ereport(LOG, (errmodule(MOD_DMS), errmsg("[SS failover] failover trigger."))); } @@ -1581,8 +1647,10 @@ static void CBReformStartNotify(void *db_handle, dms_role_t role, unsigned char reform_info->dms_role = role; reform_info->in_reform = true; + char reform_type_str[reform_type_str_len] = {0}; + ReformTypeToString(reform_info->reform_type, reform_type_str); ereport(LOG, (errmodule(MOD_DMS), - errmsg("[SS reform] dms reform start, role:%d, reform type:%d", role, (int)ss_reform_type))); + errmsg("[SS reform] dms reform start, role:%d, reform type:%s", role, reform_type_str))); if (reform_info->dms_role == DMS_ROLE_REFORMER) { while (dss_set_server_status_wrapper() != GS_SUCCESS) { pg_usleep(REFORM_WAIT_LONG); @@ -1597,22 +1665,12 @@ static void CBReformStartNotify(void *db_handle, dms_role_t role, unsigned char int old_primary = SSGetPrimaryInstId(); SSReadControlFile(old_primary, true); - /* cluster has no transactions during startup reform */ - if (!g_instance.dms_cxt.SSRecoveryInfo.startup_reform) { - SendPostmasterSignal(PMSIGNAL_DMS_REFORM); + if (SS_STANDBY_FAILOVER) { + AliveFailoverCleanBackends(); + } else { + ReformCleanBackends(); } - while (true) { - if (dms_reform_failed()) { - ereport(WARNING, (errmodule(MOD_DMS), errmsg("[SS reform]reform failed during caneling backends"))); - return; - } - if (g_instance.dms_cxt.SSRecoveryInfo.reform_ready || g_instance.dms_cxt.SSRecoveryInfo.startup_reform) { - ereport(LOG, (errmodule(MOD_DMS), errmsg("[SS reform]reform ready, backends have been terminated"))); - return; - } - pg_usleep(REFORM_WAIT_TIME); - } } static int CBReformDoneNotify(void *db_handle) @@ -1625,10 +1683,9 @@ static int CBReformDoneNotify(void *db_handle) } } /* SSClusterState and in_reform must be set atomically */ - g_instance.dms_cxt.SSClusterState = NODESTATE_NORMAL; - g_instance.dms_cxt.SSReformInfo.in_reform = false; g_instance.dms_cxt.SSRecoveryInfo.startup_reform = false; g_instance.dms_cxt.SSRecoveryInfo.restart_failover_flag = false; + g_instance.dms_cxt.SSRecoveryInfo.failover_ckpt_status = NOT_ACTIVE; Assert(g_instance.dms_cxt.SSRecoveryInfo.in_flushcopy == false); ereport(LOG, (errmodule(MOD_DMS), @@ -1637,6 +1694,8 @@ static int CBReformDoneNotify(void *db_handle) /* reform success indicates that reform of primary and standby all complete, then update gaussdb.state */ SendPostmasterSignal(PMSIGNAL_DMS_REFORM_DONE); + g_instance.dms_cxt.SSClusterState = NODESTATE_NORMAL; + g_instance.dms_cxt.SSReformInfo.in_reform = false; return GS_SUCCESS; } diff --git a/src/gausskernel/ddes/adapter/ss_dms_recovery.cpp b/src/gausskernel/ddes/adapter/ss_dms_recovery.cpp index a37515f4e358ec92a275529a9cec89e75f599cb6..a42310c10de422138d551bd2a445031056842111 100644 --- a/src/gausskernel/ddes/adapter/ss_dms_recovery.cpp +++ b/src/gausskernel/ddes/adapter/ss_dms_recovery.cpp @@ -281,17 +281,6 @@ void SSWriteReformerControlPages(void) } } -void SSTriggerFailover() -{ - if (g_instance.dms_cxt.SSRecoveryInfo.startup_reform) { - g_instance.dms_cxt.SSRecoveryInfo.restart_failover_flag = true; - ereport(LOG, (errmodule(MOD_DMS), errmsg("[SS failover] do failover when DB restart."))); - } else { - SendPostmasterSignal(PMSIGNAL_DMS_TRIGGERFAILOVER); - ereport(LOG, (errmodule(MOD_DMS), errmsg("[SS failover] do failover when DB alive"))); - } -} - void SShandle_promote_signal() { if (pmState == PM_WAIT_BACKENDS) { diff --git a/src/gausskernel/process/postmaster/postmaster.cpp b/src/gausskernel/process/postmaster/postmaster.cpp index bb578682ee882603e6e8aed1fa3dd622791e3d71..0d3cda30048c61d519d06ae2d6c7ad1f51d56f98 100644 --- a/src/gausskernel/process/postmaster/postmaster.cpp +++ b/src/gausskernel/process/postmaster/postmaster.cpp @@ -10154,7 +10154,7 @@ static void sigusr1_handler(SIGNAL_ARGS) g_instance.dms_cxt.SSRecoveryInfo.reform_ready = true; } - if (ENABLE_DMS && CheckPostmasterSignal(PMSIGNAL_DMS_TRIGGERFAILOVER)) { + if (ENABLE_DMS && CheckPostmasterSignal(PMSIGNAL_DMS_FAILOVER_TERM_BACKENDS)) { PMUpdateDBState(PROMOTING_STATE, get_cur_mode(), get_cur_repl_num()); t_thrd.dms_cxt.CloseAllSessionsFailed = false; ereport(LOG, (errmodule(MOD_DMS), errmsg("[SS failover] kill backends begin."))); diff --git a/src/gausskernel/process/threadpool/knl_instance.cpp b/src/gausskernel/process/threadpool/knl_instance.cpp index 6c00699141f140f6646f93742d7065874d51e344..0e2373b3cd53aaa416d12d50b79d4f00583ba00b 100755 --- a/src/gausskernel/process/threadpool/knl_instance.cpp +++ b/src/gausskernel/process/threadpool/knl_instance.cpp @@ -181,7 +181,7 @@ static void knl_g_dms_init(knl_g_dms_context *dms_cxt) dms_cxt->SSReformInfo.dms_role = DMS_ROLE_UNKNOW; dms_cxt->SSClusterState = NODESTATE_NORMAL; dms_cxt->SSRecoveryInfo.recovery_pause_flag = true; - dms_cxt->SSRecoveryInfo.failover_triggered = false; + dms_cxt->SSRecoveryInfo.failover_ckpt_status = NOT_ACTIVE; dms_cxt->SSRecoveryInfo.new_primary_reset_walbuf_flag = false; dms_cxt->SSRecoveryInfo.ready_to_startup = false; dms_cxt->SSRecoveryInfo.startup_reform = true; diff --git a/src/gausskernel/storage/access/transam/xlog.cpp b/src/gausskernel/storage/access/transam/xlog.cpp index b05dcfc7c2d4fc8bdcd34e2976f43b1b644a69e6..0d2caf64d9b635db26eff0f72055e421dd70b97f 100755 --- a/src/gausskernel/storage/access/transam/xlog.cpp +++ b/src/gausskernel/storage/access/transam/xlog.cpp @@ -6752,7 +6752,7 @@ void XLOGShmemInit(void) /* Reset walbuffer only before startup thread init, in which StartupXLOG pushes LSN */ if (ENABLE_DMS && t_thrd.role == STARTUP && (((SS_STANDBY_PROMOTING || SS_PRIMARY_DEMOTED) && g_instance.dms_cxt.SSRecoveryInfo.new_primary_reset_walbuf_flag == true) || - SSFAILOVER_TRIGGER)) { + SS_STANDBY_FAILOVER)) { g_instance.dms_cxt.SSRecoveryInfo.new_primary_reset_walbuf_flag = false; errorno = memset_s(t_thrd.shemem_ptr_cxt.XLogCtl->xlblocks, sizeof(XLogRecPtr) * g_instance.attr.attr_storage.XLOGbuffers, 0, @@ -9260,7 +9260,7 @@ void StartupXLOG(void) */ if (ENABLE_DMS) { int src_id = g_instance.attr.attr_storage.dms_attr.instance_id; - if (SSFAILOVER_TRIGGER || SS_STANDBY_PROMOTING) { + if (SS_STANDBY_FAILOVER || SS_STANDBY_PROMOTING) { src_id = SSGetPrimaryInstId(); ereport(LOG, (errmsg("[SS Reform]: Standby:%d promoting, reading control file of original primary:%d", g_instance.attr.attr_storage.dms_attr.instance_id, src_id))); @@ -9462,7 +9462,7 @@ void StartupXLOG(void) securec_check(errorno, "", ""); if (ENABLE_DMS && ENABLE_DSS) { - if (SSFAILOVER_TRIGGER || SS_STANDBY_PROMOTING) { + if (SS_STANDBY_FAILOVER || SS_STANDBY_PROMOTING) { SSGetXlogPath(); xlogreader = SSXLogReaderAllocate(&SSXLogPageRead, &readprivate, ALIGNOF_BUFFER); close_readFile_if_open(); @@ -9690,7 +9690,7 @@ void StartupXLOG(void) * in SS Switchover, skip dw init since we didn't do ShutdownXLOG */ - if ((ENABLE_REFORM && SS_REFORM_REFORMER && !SSFAILOVER_TRIGGER && !SS_PERFORMING_SWITCHOVER) || + if ((ENABLE_REFORM && SS_REFORM_REFORMER && !SS_STANDBY_FAILOVER && !SS_PERFORMING_SWITCHOVER) || !ENABLE_DMS || !ENABLE_REFORM) { /* process assist file of chunk recycling */ dw_ext_init(); @@ -9891,7 +9891,7 @@ void StartupXLOG(void) * have been a clean shutdown and we did not have a recovery.conf file, * then assume no recovery needed. */ - if (SSFAILOVER_TRIGGER || SS_STANDBY_PROMOTING) { + if (SS_STANDBY_FAILOVER || SS_STANDBY_PROMOTING) { t_thrd.xlog_cxt.InRecovery = true; if (SS_STANDBY_PROMOTING) { ereport(LOG, (errmsg("[SS switchover] Standby promote: redo shutdown checkpoint now"))); @@ -10015,7 +10015,7 @@ void StartupXLOG(void) } t_thrd.shemem_ptr_cxt.ControlFile->time = (pg_time_t)time(NULL); /* No need to hold ControlFileLock yet, we aren't up far enough */ - if (!SSFAILOVER_TRIGGER) { + if (!SS_STANDBY_FAILOVER) { UpdateControlFile(); } @@ -10063,7 +10063,7 @@ void StartupXLOG(void) * connections, so that read-only backends don't try to read whatever * garbage is left over from before. */ - if (!RecoveryByPending && (!SSFAILOVER_TRIGGER && SSModifySharedLunAllowed())) { + if (!RecoveryByPending && (!SS_STANDBY_FAILOVER && SSModifySharedLunAllowed())) { ResetUnloggedRelations(UNLOGGED_RELATION_CLEANUP); } @@ -10542,7 +10542,7 @@ void StartupXLOG(void) EndOfLog = t_thrd.xlog_cxt.EndRecPtr; XLByteToPrevSeg(EndOfLog, endLogSegNo); - if ((ENABLE_DMS && SSFAILOVER_TRIGGER) || SS_STANDBY_PROMOTING) { + if ((ENABLE_DMS && SS_STANDBY_FAILOVER) || SS_STANDBY_PROMOTING) { bool use_existent = true; (void)XLogFileInit(endLogSegNo, &use_existent, true); } @@ -10744,7 +10744,7 @@ void StartupXLOG(void) g_instance.dms_cxt.SSRecoveryInfo.recovery_pause_flag = true; } - if (!SSFAILOVER_TRIGGER && !SS_STANDBY_PROMOTING) { + if (!SS_STANDBY_FAILOVER && !SS_STANDBY_PROMOTING) { LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); t_thrd.shemem_ptr_cxt.ControlFile->state = DB_IN_PRODUCTION; t_thrd.shemem_ptr_cxt.ControlFile->time = (pg_time_t)time(NULL); @@ -10847,9 +10847,9 @@ void StartupXLOG(void) } } - if (SSFAILOVER_TRIGGER || SS_STANDBY_PROMOTING) { - if (SSFAILOVER_TRIGGER) { - g_instance.dms_cxt.SSRecoveryInfo.failover_triggered = false; + if (SS_STANDBY_FAILOVER || SS_STANDBY_PROMOTING) { + if (SS_STANDBY_FAILOVER) { + g_instance.dms_cxt.SSRecoveryInfo.failover_ckpt_status = ALLOW_CKPT; pg_memory_barrier(); } ereport(LOG, (errmodule(MOD_DMS), @@ -11256,7 +11256,8 @@ bool RecoveryInProgress(void) * shared variable has once been seen false. */ if (!t_thrd.xlog_cxt.LocalRecoveryInProgress) { - if (!ENABLE_DMS || (ENABLE_DMS && !SSFAILOVER_TRIGGER && !SS_STANDBY_PROMOTING)) { + if (!ENABLE_DMS || (ENABLE_DMS && !SS_STANDBY_PROMOTING && + g_instance.dms_cxt.SSRecoveryInfo.failover_ckpt_status == NOT_ACTIVE)) { return false; } } @@ -11894,7 +11895,7 @@ void CreateCheckPoint(int flags) END_CRIT_SECTION(); } return; - } else if (SSFAILOVER_TRIGGER) { + } else if (g_instance.dms_cxt.SSRecoveryInfo.failover_ckpt_status == NOT_ALLOW_CKPT) { ereport(LOG, (errmodule(MOD_DMS), errmsg("[SS failover] do not do CreateCheckpoint during failover"))); return; } @@ -17171,7 +17172,7 @@ int ParallelXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, if (readSource & XLOG_FROM_STREAM) { readLen = ParallelXLogReadWorkBufRead(xlogreader, targetPagePtr, reqLen, targetRecPtr, readTLI); } else { - if (SSFAILOVER_TRIGGER || SS_STANDBY_PROMOTING) { + if (SS_STANDBY_FAILOVER || SS_STANDBY_PROMOTING) { readLen = SSXLogPageRead(xlogreader, targetPagePtr, reqLen, targetRecPtr, xlogreader->readBuf, readTLI, NULL); } else { @@ -19763,7 +19764,7 @@ bool SSModifySharedLunAllowed() g_instance.dms_cxt.SSClusterState == NODESTATE_PRIMARY_DEMOTING || g_instance.dms_cxt.SSClusterState == NODESTATE_STANDBY_PROMOTING || g_instance.dms_cxt.SSClusterState == NODESTATE_STANDBY_PROMOTED || - SSFAILOVER_TRIGGER) { + SS_STANDBY_FAILOVER) { return true; } return false; diff --git a/src/include/ddes/dms/ss_common_attr.h b/src/include/ddes/dms/ss_common_attr.h index 206deef701c3f782d7c7b90f8ec9af554212826c..3eca48e81cda539a019c38208c7c121751e64759 100644 --- a/src/include/ddes/dms/ss_common_attr.h +++ b/src/include/ddes/dms/ss_common_attr.h @@ -67,9 +67,7 @@ #define SS_IN_FLUSHCOPY (ENABLE_DMS && g_instance.dms_cxt.SSRecoveryInfo.in_flushcopy == true) -#define SS_STANDBY_FAILOVER ((g_instance.dms_cxt.SSClusterState == NODESTATE_STANDBY_FAILOVER_PROMOTING) \ - && (g_instance.dms_cxt.SSReformerControl.primaryInstId != SS_MY_INST_ID) \ - && SS_REFORM_REFORMER) +#define SS_STANDBY_FAILOVER (g_instance.dms_cxt.SSClusterState == NODESTATE_STANDBY_FAILOVER_PROMOTING) #define SS_PERFORMING_SWITCHOVER \ (ENABLE_DMS && (g_instance.dms_cxt.SSClusterState > NODESTATE_NORMAL && \ diff --git a/src/include/ddes/dms/ss_dms_recovery.h b/src/include/ddes/dms/ss_dms_recovery.h index 91dd619bb57873e159f0cc8ca88f57a495870147..961cccb11b7a170974a8d6e75636af80ed7a2e21 100644 --- a/src/include/ddes/dms/ss_dms_recovery.h +++ b/src/include/ddes/dms/ss_dms_recovery.h @@ -29,7 +29,6 @@ #define REFORM_CTRL_PAGE DMS_MAX_INSTANCE #define RECOVERY_WAIT_TIME 10000 -#define SSFAILOVER_TRIGGER (ENABLE_DMS && g_instance.dms_cxt.SSRecoveryInfo.failover_triggered == true) #define SS_BEFORE_RECOVERY (ENABLE_DMS && g_instance.dms_cxt.SSReformInfo.in_reform == true \ && g_instance.dms_cxt.SSRecoveryInfo.recovery_pause_flag == true) #define SS_IN_FAILOVER (ENABLE_DMS && g_instance.dms_cxt.SSRecoveryInfo.in_failover == true) @@ -43,11 +42,18 @@ typedef struct st_reformer_ctrl { typedef struct st_reform_info { bool in_reform; dms_role_t dms_role; + SSReformType reform_type; } ss_reform_info_t; +typedef enum st_failover_ckpt_status { + NOT_ACTIVE = 0, + NOT_ALLOW_CKPT, + ALLOW_CKPT +} failover_ckpt_status_t; + typedef struct ss_recovery_info { bool recovery_pause_flag; - volatile bool failover_triggered; + volatile failover_ckpt_status_t failover_ckpt_status; char recovery_xlogDir[MAXPGPATH]; LWLock* update_seg_lock; bool new_primary_reset_walbuf_flag; @@ -69,7 +75,6 @@ extern void SSReadControlFile(int id, bool updateDmsCtx = false); extern void SSWriteReformerControlPages(void); extern bool SSRecoveryApplyDelay(); extern void SShandle_promote_signal(); -extern void SSTriggerFailover(); extern void ss_failover_dw_init(); extern void ss_switchover_promoting_dw_init(); diff --git a/src/include/storage/pmsignal.h b/src/include/storage/pmsignal.h index 6b83ce0adf6e1d10f19e38b5b72fe30273ac7fd8..c87a34a28d9b294f568e3ba4d77b9f934436e449 100644 --- a/src/include/storage/pmsignal.h +++ b/src/include/storage/pmsignal.h @@ -50,7 +50,7 @@ typedef enum { PMSIGNAL_START_LOGICAL_READ_WORKER,/* start logical read worker */ PMSIGNAL_START_PARALLEL_DECODE_WORKER,/* start parallel decoding worker */ PMSIGNAL_START_APPLY_WORKER, /* start a apply worker */ - PMSIGNAL_DMS_TRIGGERFAILOVER, /* failover for reform */ + PMSIGNAL_DMS_FAILOVER_TERM_BACKENDS, /* term backends in alive failover */ PMSIGNAL_DMS_FAILOVER_STARTUP, /* start startup thread in alive failover */ PMSIGNAL_DMS_SWITCHOVER_PROMOTE, /* dms standby switchover promote */ PMSIGNAL_DMS_REFORM, /* dms reform start during PM_RUN */