From b472eb0cd37721e7855e56986d45fa80545a447b Mon Sep 17 00:00:00 2001 From: bowenliu Date: Tue, 19 Sep 2023 16:41:24 +0800 Subject: [PATCH] fix reform timeout due to twophasecleaner and backend term deadlock --- src/gausskernel/ddes/adapter/ss_dms_callback.cpp | 10 +++------- src/gausskernel/process/postmaster/twophasecleaner.cpp | 7 +++++++ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/gausskernel/ddes/adapter/ss_dms_callback.cpp b/src/gausskernel/ddes/adapter/ss_dms_callback.cpp index 66dbc94fbc..5929d493b5 100644 --- a/src/gausskernel/ddes/adapter/ss_dms_callback.cpp +++ b/src/gausskernel/ddes/adapter/ss_dms_callback.cpp @@ -389,9 +389,6 @@ static int CBSwitchoverPromote(void *db_handle, unsigned char origPrimaryId) if (g_instance.dms_cxt.SSClusterState == NODESTATE_STANDBY_PROMOTED) { /* flush control file primary id in advance to save new standby's waiting time */ SSSavePrimaryInstId(SS_MY_INST_ID); - - SSReadControlFile(REFORM_CTRL_PAGE); - Assert(SSGetPrimaryInstId() == SS_MY_INST_ID); ereport(LOG, (errmodule(MOD_DMS), errmsg("[SS switchover] Standby promote: success, set new primary:%d.", SS_MY_INST_ID))); return DMS_SUCCESS; @@ -429,16 +426,15 @@ static void CBSwitchoverResult(void *db_handle, int result) static int SetPrimaryIdOnStandby(int primary_id) { - g_instance.dms_cxt.SSReformerControl.primaryInstId = primary_id; - for (int ntries = 0;; ntries++) { SSReadControlFile(REFORM_CTRL_PAGE); /* need to double check */ if (g_instance.dms_cxt.SSReformerControl.primaryInstId == primary_id) { ereport(LOG, (errmodule(MOD_DMS), - errmsg("[SS %s] Reform success, this is a standby:%d confirming new primary:%d.", - SS_PERFORMING_SWITCHOVER ? "switchover" : "reform", SS_MY_INST_ID, primary_id))); + errmsg("[SS %s] Reform success, this is a standby:%d confirming new primary:%d, confirm ntries=%d.", + SS_PERFORMING_SWITCHOVER ? "switchover" : "reform", SS_MY_INST_ID, primary_id, ntries))); return DMS_SUCCESS; } else { + SSSavePrimaryInstId(primary_id); if (ntries >= WAIT_REFORM_CTRL_REFRESH_TRIES) { ereport(ERROR, (errmodule(MOD_DMS), errmsg("[SS %s] Failed to confirm new primary: %d," diff --git a/src/gausskernel/process/postmaster/twophasecleaner.cpp b/src/gausskernel/process/postmaster/twophasecleaner.cpp index 52698e049e..5051e3710e 100644 --- a/src/gausskernel/process/postmaster/twophasecleaner.cpp +++ b/src/gausskernel/process/postmaster/twophasecleaner.cpp @@ -205,6 +205,12 @@ NON_EXEC_STATIC void TwoPhaseCleanerMain() if (t_thrd.tpcleaner_cxt.shutdown_requested) { /* Normal exit from the twophasecleaner is here */ + ereport(LOG, (errmsg("TwoPhaseCleaner exits via SIGTERM"))); + proc_exit(0); + } + + if (SS_PRIMARY_DEMOTING) { + ereport(LOG, (errmsg("TwoPhaseCleaner exits via SS global var"))); proc_exit(0); } @@ -326,6 +332,7 @@ static void TwoPCShutdownHandler(SIGNAL_ARGS) { int save_errno = errno; + ereport(LOG, (errmsg("TwoPhaseCleaner received SIGTERM"))); t_thrd.tpcleaner_cxt.shutdown_requested = true; if (t_thrd.proc) -- Gitee