From 93c14c3ab462b996dfd930b3ec6b5518e1de85c0 Mon Sep 17 00:00:00 2001 From: dongning12 Date: Fri, 2 Jun 2023 11:14:14 +0800 Subject: [PATCH] =?UTF-8?q?[=E8=B5=84=E6=BA=90=E6=B1=A0=E5=8C=96]flush=5Fc?= =?UTF-8?q?opy=E9=98=B6=E6=AE=B5=E5=BC=82=E5=B8=B8=E5=9C=BA=E6=99=AF?= =?UTF-8?q?=E4=B8=8B=E7=9A=84=E9=80=80=E5=87=BA=E5=88=86=E6=94=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ddes/adapter/ss_dms_callback.cpp | 6 +++ .../ddes/adapter/ss_dms_recovery.cpp | 47 +++++++++++-------- .../process/postmaster/postmaster.cpp | 5 ++ .../process/threadpool/knl_thread.cpp | 1 + src/gausskernel/storage/buffer/bufmgr.cpp | 5 ++ .../storage/smgr/segment/segbuffer.cpp | 7 +-- src/include/ddes/dms/ss_dms_recovery.h | 1 + src/include/knl/knl_thread.h | 1 + 8 files changed, 49 insertions(+), 24 deletions(-) diff --git a/src/gausskernel/ddes/adapter/ss_dms_callback.cpp b/src/gausskernel/ddes/adapter/ss_dms_callback.cpp index 8e65359fd6..846c6c0ca4 100644 --- a/src/gausskernel/ddes/adapter/ss_dms_callback.cpp +++ b/src/gausskernel/ddes/adapter/ss_dms_callback.cpp @@ -1451,6 +1451,7 @@ static int CBFlushCopy(void *db_handle, char *pageid) */ if (BufferIsInvalid(buffer)) { if (dms_reform_failed()) { + SSWaitStartupExit(); return GS_ERROR; } else { Assert(0); @@ -1459,6 +1460,11 @@ static int CBFlushCopy(void *db_handle, char *pageid) Assert(XLogRecPtrIsValid(g_instance.dms_cxt.ckptRedo)); LockBuffer(buffer, BUFFER_LOCK_SHARE); + if (t_thrd.dms_cxt.flush_copy_get_page_failed) { + t_thrd.dms_cxt.flush_copy_get_page_failed = false; + SSWaitStartupExit(); + return GS_ERROR; + } BufferDesc* buf_desc = GetBufferDescriptor(buffer - 1); XLogRecPtr pagelsn = BufferGetLSN(buf_desc); if (XLByteLT(g_instance.dms_cxt.ckptRedo, pagelsn)) { diff --git a/src/gausskernel/ddes/adapter/ss_dms_recovery.cpp b/src/gausskernel/ddes/adapter/ss_dms_recovery.cpp index a42310c10d..fdc44c3a28 100644 --- a/src/gausskernel/ddes/adapter/ss_dms_recovery.cpp +++ b/src/gausskernel/ddes/adapter/ss_dms_recovery.cpp @@ -54,6 +54,33 @@ void SSSavePrimaryInstId(int id) SSSaveReformerCtrl(); } +void SSWaitStartupExit() +{ + if (g_instance.pid_cxt.StartupPID == 0) { + return; + } + + if (SS_STANDBY_FAILOVER && !g_instance.dms_cxt.SSRecoveryInfo.restart_failover_flag) { + g_instance.dms_cxt.SSRecoveryInfo.startup_need_exit_normally = true; + } + SendPostmasterSignal(PMSIGNAL_DMS_TERM_STARTUP); + int err_level = g_instance.dms_cxt.SSRecoveryInfo.startup_need_exit_normally ? LOG : WARNING; + ereport(err_level, (errmodule(MOD_DMS), errmsg("[SS reform] reform failed, startup thread need exit"))); + + while (true) { + if (g_instance.pid_cxt.StartupPID == 0) { + break; + } + + if (g_instance.dms_cxt.SSRecoveryInfo.recovery_trapped_in_page_request) { + ereport(WARNING, (errmodule(MOD_DMS), errmsg("[SS reform] pageredo or startup thread are trapped " + "in page request during recovery phase, need exit"))); + _exit(0); + } + pg_usleep(5000L); + } +} + /** * find reform failed in recovery phase, maybe other node restart * pageredo or startup thread may trapped in LockBuffer for page request @@ -74,25 +101,7 @@ bool SSRecoveryNodes() bool result = false; while (true) { if (dms_reform_failed()) { - if (SS_STANDBY_FAILOVER && !g_instance.dms_cxt.SSRecoveryInfo.restart_failover_flag) { - g_instance.dms_cxt.SSRecoveryInfo.startup_need_exit_normally = true; - } - SendPostmasterSignal(PMSIGNAL_DMS_TERM_STARTUP); - - while (true) { - if (g_instance.pid_cxt.StartupPID == 0) { - ereport(LOG, (errmodule(MOD_DMS), errmsg("[SS reform] reform failed, startup thread exit noramlly " - "during recovery"))); - break; - } - - if (g_instance.dms_cxt.SSRecoveryInfo.recovery_trapped_in_page_request) { - ereport(WARNING, (errmodule(MOD_DMS), errmsg("[SS reform] pageredo or startup thread are trapped " - "in page request during recovery phase, need exit"))); - _exit(0); - } - pg_usleep(5000L); - } + SSWaitStartupExit(); result = false; break; } diff --git a/src/gausskernel/process/postmaster/postmaster.cpp b/src/gausskernel/process/postmaster/postmaster.cpp index 0d3cda3004..1d92de0e36 100644 --- a/src/gausskernel/process/postmaster/postmaster.cpp +++ b/src/gausskernel/process/postmaster/postmaster.cpp @@ -10175,6 +10175,11 @@ static void sigusr1_handler(SIGNAL_ARGS) /* shut down all backends and autovac workers */ (void)SignalSomeChildren(SIGTERM, BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC); + //active check once + if (CountChildren(BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC) == 0) { + g_instance.dms_cxt.SSRecoveryInfo.no_backend_left = true; + } + /* and the autovac launcher too */ if (g_instance.pid_cxt.AutoVacPID != 0) signal_child(g_instance.pid_cxt.AutoVacPID, SIGTERM); diff --git a/src/gausskernel/process/threadpool/knl_thread.cpp b/src/gausskernel/process/threadpool/knl_thread.cpp index cc02ffde04..13d33ffe96 100755 --- a/src/gausskernel/process/threadpool/knl_thread.cpp +++ b/src/gausskernel/process/threadpool/knl_thread.cpp @@ -1705,6 +1705,7 @@ static void knl_t_dms_context_init(knl_t_dms_context *dms_cxt) dms_cxt->file_size = 0; errno_t rc = memset_s(dms_cxt->msg_backup, sizeof(dms_cxt->msg_backup), 0, sizeof(dms_cxt->msg_backup)); securec_check(rc, "\0", "\0"); + dms_cxt->flush_copy_get_page_failed = false; } static void knl_t_rc_init(knl_t_rc_context* rc_cxt) { diff --git a/src/gausskernel/storage/buffer/bufmgr.cpp b/src/gausskernel/storage/buffer/bufmgr.cpp index cb88688821..14c7a6011b 100644 --- a/src/gausskernel/storage/buffer/bufmgr.cpp +++ b/src/gausskernel/storage/buffer/bufmgr.cpp @@ -5913,6 +5913,11 @@ retry: LWLockRelease(buf->content_lock); + if (AmDmsReformProcProcess() && dms_reform_failed()) { + t_thrd.dms_cxt.flush_copy_get_page_failed = true; + return; + } + if ((AmPageRedoProcess() || AmStartupProcess()) && dms_reform_failed()) { g_instance.dms_cxt.SSRecoveryInfo.recovery_trapped_in_page_request = true; } diff --git a/src/gausskernel/storage/smgr/segment/segbuffer.cpp b/src/gausskernel/storage/smgr/segment/segbuffer.cpp index d51b9c2fab..cfae8e036a 100644 --- a/src/gausskernel/storage/smgr/segment/segbuffer.cpp +++ b/src/gausskernel/storage/smgr/segment/segbuffer.cpp @@ -483,11 +483,8 @@ void ReportInvalidPage(RepairBlockKey key) void ReadSegBufferForCheck(BufferDesc* bufHdr, ReadBufferMode mode, SegSpace *spc, Block bufBlock) { if (spc == NULL) { - bool found; - SegSpcTag tag = {.spcNode = bufHdr->tag.rnode.spcNode, .dbNode = bufHdr->tag.rnode.dbNode}; - SegmentCheck(t_thrd.storage_cxt.SegSpcCache != NULL); - spc = (SegSpace *)hash_search(t_thrd.storage_cxt.SegSpcCache, (void *)&tag, HASH_FIND, &found); - SegmentCheck(found); + spc = spc_open(bufHdr->tag.rnode.spcNode, bufHdr->tag.rnode.dbNode, false, false); + SegmentCheck(spc != NULL); } seg_physical_read(spc, bufHdr->tag.rnode, bufHdr->tag.forkNum, bufHdr->tag.blockNum, (char *)bufBlock); diff --git a/src/include/ddes/dms/ss_dms_recovery.h b/src/include/ddes/dms/ss_dms_recovery.h index 961cccb11b..6affe3c2c1 100644 --- a/src/include/ddes/dms/ss_dms_recovery.h +++ b/src/include/ddes/dms/ss_dms_recovery.h @@ -69,6 +69,7 @@ typedef struct ss_recovery_info { } ss_recovery_info_t; extern bool SSRecoveryNodes(); +extern void SSWaitStartupExit(); extern int SSGetPrimaryInstId(); extern void SSSavePrimaryInstId(int id); extern void SSReadControlFile(int id, bool updateDmsCtx = false); diff --git a/src/include/knl/knl_thread.h b/src/include/knl/knl_thread.h index d4aea62d94..979580bfbf 100755 --- a/src/include/knl/knl_thread.h +++ b/src/include/knl/knl_thread.h @@ -3352,6 +3352,7 @@ typedef struct knl_t_dms_context { int offset; /* current read/write position in aligned_buf */ int file_size; /* initialized as pg_internal.init file size, will decrease after read */ char msg_backup[24]; // 24 is sizeof mes_message_head_t + bool flush_copy_get_page_failed; //used in flush copy } knl_t_dms_context; /* thread context. */ -- Gitee