From 8577a8ca033145e8713aa6cc49a9ea0a93cb8512 Mon Sep 17 00:00:00 2001 From: lilongfei Date: Fri, 13 Oct 2023 14:46:30 +0800 Subject: [PATCH 1/6] =?UTF-8?q?fixed=200b8e80a=20from=20https://gitee.com/?= =?UTF-8?q?lilongfei15/CM/pulls/158=20=E4=BF=AE=E5=A4=8D=E7=BD=91=E7=BB=9C?= =?UTF-8?q?=E9=9A=94=E7=A6=BB=E5=9C=BA=E6=99=AFVIP=E8=A1=8C=E4=B8=BA?= =?UTF-8?q?=E4=B8=8D=E7=AC=A6=E5=90=88=E9=A2=84=E6=9C=9F:=E6=96=B0?= =?UTF-8?q?=E4=B8=BB=E6=8B=89=E8=B5=B7=E4=BD=86=E5=8E=9F=E5=A7=8B=E4=B8=BB?= =?UTF-8?q?=E4=B8=8A=E7=9A=84VIP=E6=9C=AA=E5=8D=B8=E8=BD=BD=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/cm_agent/cma_create_conn_cms.cpp | 78 ++++++++++++------- src/cm_agent/cma_instance_management.cpp | 5 ++ src/include/cm/cm_agent/cma_create_conn_cms.h | 2 + 3 files changed, 58 insertions(+), 27 deletions(-) diff --git a/src/cm_agent/cma_create_conn_cms.cpp b/src/cm_agent/cma_create_conn_cms.cpp index 0d68470..a9f3545 100644 --- a/src/cm_agent/cma_create_conn_cms.cpp +++ b/src/cm_agent/cma_create_conn_cms.cpp @@ -27,6 +27,7 @@ #include "cma_create_conn_cms.h" #define DISABLE_TIMEOUT 0 +#define FASTSHOTDOWN_OLD_DN_PRIMARY 1 #define NOT_UPGRADED 0 #define GRAYSCALE_UPGRADED 1 @@ -383,9 +384,11 @@ void* ConnCmsPMain(void* arg) (void)clock_gettime(CLOCK_MONOTONIC, &g_serverHeartbeatTime); bool have_killed_nodes = false; bool isToStopInstances = false; + bool isVipAvailable = false; struct timeval checkBeginFunction = {0, 0}; struct timeval checkEndFunction = {0, 0}; const int twoSec = 2; + uint32 timeout = 0; for (;;) { (void)gettimeofday(&checkBeginFunction, NULL); @@ -416,6 +419,13 @@ void* ConnCmsPMain(void* arg) } else if (CmaDisconnectWithAllRemoteCmsInOtherAz()) { isToStopInstances = true; } + // check VIP status + for (uint32 i = 0; i < g_currentNode->datanodeCount; i++) { + if (CheckFloatIpStateInDnExternal(i) == PROCESS_RUNNING) { + isVipAvailable = true; + break; + } + } /* agentStopInstanceDelayTime: The delay time of stopping instances. * If isToStopInstances is true, and g_enableFenceDn is true, @@ -427,40 +437,39 @@ void* ConnCmsPMain(void* arg) * and the operation of stopping instances will not be executed. */ #ifndef ENABLE_MULTIPLE_NODES + if (!isVipAvailable) { + timeout = IsBoolCmParamTrue(g_enableFenceDn) ? FENCE_TIMEOUT : DISABLE_TIMEOUT; + } else { + timeout = FASTSHOTDOWN_OLD_DN_PRIMARY; + } uint32 timeout = IsBoolCmParamTrue(g_enableFenceDn) ? FENCE_TIMEOUT : DISABLE_TIMEOUT; uint32 agentStopInstanceDelayTime = isToStopInstances ? timeout : agent_kill_instance_timeout; #else uint32 agentStopInstanceDelayTime = isToStopInstances ? DISABLE_TIMEOUT : agent_kill_instance_timeout; #endif - if (isDisconnectTimeout(g_disconnectTime, (int)agentStopInstanceDelayTime) && !have_killed_nodes) { - if ((undocumentedVersion == 0) && isMaintenanceModeDisableOperation(CMA_KILL_SELF_INSTANCES)) { - have_killed_nodes = false; - write_runlog(LOG, "%d Maintaining cluster: cm agent cannot stop self instances.\n", __LINE__); - } else if (!g_firstConnectFlag) { - have_killed_nodes = false; - write_runlog(LOG, "Agent has never successfully connected to the server," - " so can not stop instances of current node.\n"); - } else { - write_runlog(LOG, "agent disconnect from cm_server %u seconds, stop instances in this node. " - "sync_dropped_coordinator change to false.\n", agentStopInstanceDelayTime); - - if (g_isPauseArbitration) { - continue; - } - - g_syncDroppedCoordinator = false; - have_killed_nodes = true; - - #ifndef ENABLE_MULTIPLE_NODES - /* - * Kill datanode proccess, so that it can be restarted with pending mode. - */ - uint32 i; - for (i = 0; i < g_currentNode->datanodeCount; i++) { - immediate_stop_one_instance(g_currentNode->datanode[i].datanodeLocalDataPath, INSTANCE_DN); + if (!isVipAvailable) { + if (isDisconnectTimeout(g_disconnectTime, (int)agentStopInstanceDelayTime) && !have_killed_nodes) { + if ((undocumentedVersion == 0) && isMaintenanceModeDisableOperation(CMA_KILL_SELF_INSTANCES)) { + have_killed_nodes = false; + write_runlog(LOG, "%d Maintaining cluster: cm agent cannot stop self instances.\n", __LINE__); + } else if (!g_firstConnectFlag) { + have_killed_nodes = false; + write_runlog(LOG, "Agent has never successfully connected to the server," + " so can not stop instances of current node.\n"); + } else { + if (g_isPauseArbitration) { + continue; + } + + ImmediateStopDnOfCurNode(agentStopInstanceDelayTime, &have_killed_nodes); } - #endif } + } else { + if (g_isPauseArbitration) { + continue; + } + + ImmediateStopDnOfCurNode(agentStopInstanceDelayTime, &have_killed_nodes); } } } @@ -488,3 +497,18 @@ void* CheckUpgradeMode(void* arg) } return NULL; } +void ImmediateStopDnOfCurNode(uint32 stopInstDelayTime, bool *haveKilledNodes) +{ + write_runlog(LOG, "agent disconnect from cm_server %u seconds, stop instances in this node. " + "sync_dropped_coordinator change to false.\n", stopInstDelayTime); + + g_syncDroppedCoordinator = false; + *haveKilledNodes = true; + + #ifndef ENABLE_MULTIPLE_NODES + // Kill datanode proccess, so that it can be restarted with pending mode. + for (uint32 i = 0; i < g_currentNode->datanodeCount; i++) { + immediate_stop_one_instance(g_currentNode->datanode[i].datanodeLocalDataPath, INSTANCE_DN); + } + #endif +} \ No newline at end of file diff --git a/src/cm_agent/cma_instance_management.cpp b/src/cm_agent/cma_instance_management.cpp index 002e4fd..d7d9ce8 100644 --- a/src/cm_agent/cma_instance_management.cpp +++ b/src/cm_agent/cma_instance_management.cpp @@ -767,6 +767,11 @@ static int32 CheckFloatIpStateInDn(uint32 index) return PROCESS_NOT_EXIST; } +int32 CheckFloatIpStateInDnExternal(uint32 index) +{ + return CheckFloatIpStateInDn(index); +} + static int datanode_stopped_check(void) { int ret; diff --git a/src/include/cm/cm_agent/cma_create_conn_cms.h b/src/include/cm/cm_agent/cma_create_conn_cms.h index 939eb5b..17e37a8 100644 --- a/src/include/cm/cm_agent/cma_create_conn_cms.h +++ b/src/include/cm/cm_agent/cma_create_conn_cms.h @@ -36,4 +36,6 @@ bool isMaintenanceModeDisableOperation(const cma_operation op); void* ConnCmsPMain(void* arg); extern bool isUpgradeCluster(); void* CheckUpgradeMode(void* arg); +void ImmediateStopDnOfCurNode(uint32 stopInstDelayTime, bool *haveKilledNodes); +extern int32 CheckFloatIpStateInDnExternal(uint32 index); #endif -- Gitee From bd8121a2b8353762a9a559918c2e97a14c5276c8 Mon Sep 17 00:00:00 2001 From: lilongfei Date: Tue, 17 Oct 2023 07:21:35 +0000 Subject: [PATCH 2/6] =?UTF-8?q?update=20src/cm=5Fagent/cma=5Fcreate=5Fconn?= =?UTF-8?q?=5Fcms.cpp.=20timeout=E5=8F=98=E9=87=8F=E6=89=8B=E8=AF=AF?= =?UTF-8?q?=E6=9C=AA=E5=88=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: lilongfei --- src/cm_agent/cma_create_conn_cms.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/cm_agent/cma_create_conn_cms.cpp b/src/cm_agent/cma_create_conn_cms.cpp index a9f3545..c7f1991 100644 --- a/src/cm_agent/cma_create_conn_cms.cpp +++ b/src/cm_agent/cma_create_conn_cms.cpp @@ -442,7 +442,6 @@ void* ConnCmsPMain(void* arg) } else { timeout = FASTSHOTDOWN_OLD_DN_PRIMARY; } - uint32 timeout = IsBoolCmParamTrue(g_enableFenceDn) ? FENCE_TIMEOUT : DISABLE_TIMEOUT; uint32 agentStopInstanceDelayTime = isToStopInstances ? timeout : agent_kill_instance_timeout; #else uint32 agentStopInstanceDelayTime = isToStopInstances ? DISABLE_TIMEOUT : agent_kill_instance_timeout; -- Gitee From 5b0a91f1761505f472adda6d62cd2555773346f8 Mon Sep 17 00:00:00 2001 From: lilongfei Date: Tue, 17 Oct 2023 07:37:14 +0000 Subject: [PATCH 3/6] update src/cm_agent/cma_create_conn_cms.cpp. fix code check Signed-off-by: lilongfei --- src/cm_agent/cma_create_conn_cms.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/cm_agent/cma_create_conn_cms.cpp b/src/cm_agent/cma_create_conn_cms.cpp index c7f1991..2a09d49 100644 --- a/src/cm_agent/cma_create_conn_cms.cpp +++ b/src/cm_agent/cma_create_conn_cms.cpp @@ -27,12 +27,12 @@ #include "cma_create_conn_cms.h" #define DISABLE_TIMEOUT 0 -#define FASTSHOTDOWN_OLD_DN_PRIMARY 1 +#define #define NOT_UPGRADED 0 #define GRAYSCALE_UPGRADED 1 #define INPLACE_UPGRADED 2 - +const int FASTSHOTDOWN_OLD_DN_PRIMARY = 1; /** * This flag is activated after the cm agent is connected to the cm Server for the first time. * If this flag is not activated, we will disable some cm agent features. @@ -425,7 +425,7 @@ void* ConnCmsPMain(void* arg) isVipAvailable = true; break; } - } + } /* agentStopInstanceDelayTime: The delay time of stopping instances. * If isToStopInstances is true, and g_enableFenceDn is true, @@ -450,7 +450,9 @@ void* ConnCmsPMain(void* arg) if (isDisconnectTimeout(g_disconnectTime, (int)agentStopInstanceDelayTime) && !have_killed_nodes) { if ((undocumentedVersion == 0) && isMaintenanceModeDisableOperation(CMA_KILL_SELF_INSTANCES)) { have_killed_nodes = false; - write_runlog(LOG, "%d Maintaining cluster: cm agent cannot stop self instances.\n", __LINE__); + write_runlog(LOG, + "%d Maintaining cluster: cm agent cannot stop self instances.\n", + __LINE__); } else if (!g_firstConnectFlag) { have_killed_nodes = false; write_runlog(LOG, "Agent has never successfully connected to the server," -- Gitee From 67d26837f182be56bfc92d9c2bea0d647a17fd9b Mon Sep 17 00:00:00 2001 From: lilongfei Date: Tue, 17 Oct 2023 07:39:30 +0000 Subject: [PATCH 4/6] update src/cm_agent/cma_create_conn_cms.cpp. Signed-off-by: lilongfei --- src/cm_agent/cma_create_conn_cms.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/cm_agent/cma_create_conn_cms.cpp b/src/cm_agent/cma_create_conn_cms.cpp index 2a09d49..e2d639b 100644 --- a/src/cm_agent/cma_create_conn_cms.cpp +++ b/src/cm_agent/cma_create_conn_cms.cpp @@ -27,7 +27,6 @@ #include "cma_create_conn_cms.h" #define DISABLE_TIMEOUT 0 -#define #define NOT_UPGRADED 0 #define GRAYSCALE_UPGRADED 1 -- Gitee From cbea0915988770ae51802da46b85ec8e0da8fef8 Mon Sep 17 00:00:00 2001 From: lilongfei Date: Tue, 17 Oct 2023 07:50:42 +0000 Subject: [PATCH 5/6] update src/cm_agent/cma_create_conn_cms.cpp. fix code check Signed-off-by: lilongfei --- src/cm_agent/cma_create_conn_cms.cpp | 32 +++++++++++++++------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/cm_agent/cma_create_conn_cms.cpp b/src/cm_agent/cma_create_conn_cms.cpp index e2d639b..1a3cb1b 100644 --- a/src/cm_agent/cma_create_conn_cms.cpp +++ b/src/cm_agent/cma_create_conn_cms.cpp @@ -450,8 +450,8 @@ void* ConnCmsPMain(void* arg) if ((undocumentedVersion == 0) && isMaintenanceModeDisableOperation(CMA_KILL_SELF_INSTANCES)) { have_killed_nodes = false; write_runlog(LOG, - "%d Maintaining cluster: cm agent cannot stop self instances.\n", - __LINE__); + "%d Maintaining cluster: cm agent cannot stop self instances.\n", + __LINE__); } else if (!g_firstConnectFlag) { have_killed_nodes = false; write_runlog(LOG, "Agent has never successfully connected to the server," @@ -497,18 +497,20 @@ void* CheckUpgradeMode(void* arg) } return NULL; } -void ImmediateStopDnOfCurNode(uint32 stopInstDelayTime, bool *haveKilledNodes) -{ - write_runlog(LOG, "agent disconnect from cm_server %u seconds, stop instances in this node. " - "sync_dropped_coordinator change to false.\n", stopInstDelayTime); - - g_syncDroppedCoordinator = false; - *haveKilledNodes = true; - - #ifndef ENABLE_MULTIPLE_NODES - // Kill datanode proccess, so that it can be restarted with pending mode. - for (uint32 i = 0; i < g_currentNode->datanodeCount; i++) { - immediate_stop_one_instance(g_currentNode->datanode[i].datanodeLocalDataPath, INSTANCE_DN); +namespace { + void ImmediateStopDnOfCurNode(uint32 stopInstDelayTime, bool *haveKilledNodes) + { + write_runlog(LOG, "agent disconnect from cm_server %u seconds, stop instances in this node. " + "sync_dropped_coordinator change to false.\n", stopInstDelayTime); + + g_syncDroppedCoordinator = false; + *haveKilledNodes = true; + + #ifndef ENABLE_MULTIPLE_NODES + // Kill datanode proccess, so that it can be restarted with pending mode. + for (uint32 i = 0; i < g_currentNode->datanodeCount; i++) { + immediate_stop_one_instance(g_currentNode->datanode[i].datanodeLocalDataPath, INSTANCE_DN); + } + #endif } - #endif } \ No newline at end of file -- Gitee From c11019be906eecdf95f86671244b8373cd7bf962 Mon Sep 17 00:00:00 2001 From: lilongfei Date: Tue, 17 Oct 2023 07:53:43 +0000 Subject: [PATCH 6/6] =?UTF-8?q?update=20src/cm=5Fagent/cma=5Fcreate=5Fconn?= =?UTF-8?q?=5Fcms.cpp.=20=E4=BD=BF=E7=94=A8=E5=8C=BF=E5=90=8Dnamespace?= =?UTF-8?q?=E5=B0=81=E8=A3=85ImmediateStopDnOfCurNode=E5=87=BD=E6=95=B0?= =?UTF-8?q?=E4=BC=9A=E5=AF=BC=E8=87=B4=E6=97=A0=E6=B3=95=E7=BC=96=E8=AF=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: lilongfei --- src/cm_agent/cma_create_conn_cms.cpp | 31 ++++++++++++++-------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/src/cm_agent/cma_create_conn_cms.cpp b/src/cm_agent/cma_create_conn_cms.cpp index 1a3cb1b..d76267c 100644 --- a/src/cm_agent/cma_create_conn_cms.cpp +++ b/src/cm_agent/cma_create_conn_cms.cpp @@ -497,20 +497,19 @@ void* CheckUpgradeMode(void* arg) } return NULL; } -namespace { - void ImmediateStopDnOfCurNode(uint32 stopInstDelayTime, bool *haveKilledNodes) - { - write_runlog(LOG, "agent disconnect from cm_server %u seconds, stop instances in this node. " - "sync_dropped_coordinator change to false.\n", stopInstDelayTime); - - g_syncDroppedCoordinator = false; - *haveKilledNodes = true; - - #ifndef ENABLE_MULTIPLE_NODES - // Kill datanode proccess, so that it can be restarted with pending mode. - for (uint32 i = 0; i < g_currentNode->datanodeCount; i++) { - immediate_stop_one_instance(g_currentNode->datanode[i].datanodeLocalDataPath, INSTANCE_DN); - } - #endif + +void ImmediateStopDnOfCurNode(uint32 stopInstDelayTime, bool *haveKilledNodes) +{ + write_runlog(LOG, "agent disconnect from cm_server %u seconds, stop instances in this node. " + "sync_dropped_coordinator change to false.\n", stopInstDelayTime); + + g_syncDroppedCoordinator = false; + *haveKilledNodes = true; + + #ifndef ENABLE_MULTIPLE_NODES + // Kill datanode proccess, so that it can be restarted with pending mode. + for (uint32 i = 0; i < g_currentNode->datanodeCount; i++) { + immediate_stop_one_instance(g_currentNode->datanode[i].datanodeLocalDataPath, INSTANCE_DN); } -} \ No newline at end of file + #endif +} -- Gitee