diff --git a/src/cm_agent/cma_create_conn_cms.cpp b/src/cm_agent/cma_create_conn_cms.cpp index 0d6847055aa899f6220cd23e46d8d67adc79225e..d76267c9c180bb1549f5958d18a38228e5389546 100644 --- a/src/cm_agent/cma_create_conn_cms.cpp +++ b/src/cm_agent/cma_create_conn_cms.cpp @@ -31,7 +31,7 @@ #define NOT_UPGRADED 0 #define GRAYSCALE_UPGRADED 1 #define INPLACE_UPGRADED 2 - +const int FASTSHOTDOWN_OLD_DN_PRIMARY = 1; /** * This flag is activated after the cm agent is connected to the cm Server for the first time. * If this flag is not activated, we will disable some cm agent features. @@ -383,9 +383,11 @@ void* ConnCmsPMain(void* arg) (void)clock_gettime(CLOCK_MONOTONIC, &g_serverHeartbeatTime); bool have_killed_nodes = false; bool isToStopInstances = false; + bool isVipAvailable = false; struct timeval checkBeginFunction = {0, 0}; struct timeval checkEndFunction = {0, 0}; const int twoSec = 2; + uint32 timeout = 0; for (;;) { (void)gettimeofday(&checkBeginFunction, NULL); @@ -416,6 +418,13 @@ void* ConnCmsPMain(void* arg) } else if (CmaDisconnectWithAllRemoteCmsInOtherAz()) { isToStopInstances = true; } + // check VIP status + for (uint32 i = 0; i < g_currentNode->datanodeCount; i++) { + if (CheckFloatIpStateInDnExternal(i) == PROCESS_RUNNING) { + isVipAvailable = true; + break; + } + } /* agentStopInstanceDelayTime: The delay time of stopping instances. * If isToStopInstances is true, and g_enableFenceDn is true, @@ -427,40 +436,40 @@ void* ConnCmsPMain(void* arg) * and the operation of stopping instances will not be executed. */ #ifndef ENABLE_MULTIPLE_NODES - uint32 timeout = IsBoolCmParamTrue(g_enableFenceDn) ? FENCE_TIMEOUT : DISABLE_TIMEOUT; + if (!isVipAvailable) { + timeout = IsBoolCmParamTrue(g_enableFenceDn) ? FENCE_TIMEOUT : DISABLE_TIMEOUT; + } else { + timeout = FASTSHOTDOWN_OLD_DN_PRIMARY; + } uint32 agentStopInstanceDelayTime = isToStopInstances ? timeout : agent_kill_instance_timeout; #else uint32 agentStopInstanceDelayTime = isToStopInstances ? DISABLE_TIMEOUT : agent_kill_instance_timeout; #endif - if (isDisconnectTimeout(g_disconnectTime, (int)agentStopInstanceDelayTime) && !have_killed_nodes) { - if ((undocumentedVersion == 0) && isMaintenanceModeDisableOperation(CMA_KILL_SELF_INSTANCES)) { - have_killed_nodes = false; - write_runlog(LOG, "%d Maintaining cluster: cm agent cannot stop self instances.\n", __LINE__); - } else if (!g_firstConnectFlag) { - have_killed_nodes = false; - write_runlog(LOG, "Agent has never successfully connected to the server," - " so can not stop instances of current node.\n"); - } else { - write_runlog(LOG, "agent disconnect from cm_server %u seconds, stop instances in this node. " - "sync_dropped_coordinator change to false.\n", agentStopInstanceDelayTime); - - if (g_isPauseArbitration) { - continue; + if (!isVipAvailable) { + if (isDisconnectTimeout(g_disconnectTime, (int)agentStopInstanceDelayTime) && !have_killed_nodes) { + if ((undocumentedVersion == 0) && isMaintenanceModeDisableOperation(CMA_KILL_SELF_INSTANCES)) { + have_killed_nodes = false; + write_runlog(LOG, + "%d Maintaining cluster: cm agent cannot stop self instances.\n", + __LINE__); + } else if (!g_firstConnectFlag) { + have_killed_nodes = false; + write_runlog(LOG, "Agent has never successfully connected to the server," + " so can not stop instances of current node.\n"); + } else { + if (g_isPauseArbitration) { + continue; + } + + ImmediateStopDnOfCurNode(agentStopInstanceDelayTime, &have_killed_nodes); } - - g_syncDroppedCoordinator = false; - have_killed_nodes = true; - - #ifndef ENABLE_MULTIPLE_NODES - /* - * Kill datanode proccess, so that it can be restarted with pending mode. - */ - uint32 i; - for (i = 0; i < g_currentNode->datanodeCount; i++) { - immediate_stop_one_instance(g_currentNode->datanode[i].datanodeLocalDataPath, INSTANCE_DN); - } - #endif } + } else { + if (g_isPauseArbitration) { + continue; + } + + ImmediateStopDnOfCurNode(agentStopInstanceDelayTime, &have_killed_nodes); } } } @@ -488,3 +497,19 @@ void* CheckUpgradeMode(void* arg) } return NULL; } + +void ImmediateStopDnOfCurNode(uint32 stopInstDelayTime, bool *haveKilledNodes) +{ + write_runlog(LOG, "agent disconnect from cm_server %u seconds, stop instances in this node. " + "sync_dropped_coordinator change to false.\n", stopInstDelayTime); + + g_syncDroppedCoordinator = false; + *haveKilledNodes = true; + + #ifndef ENABLE_MULTIPLE_NODES + // Kill datanode proccess, so that it can be restarted with pending mode. + for (uint32 i = 0; i < g_currentNode->datanodeCount; i++) { + immediate_stop_one_instance(g_currentNode->datanode[i].datanodeLocalDataPath, INSTANCE_DN); + } + #endif +} diff --git a/src/cm_agent/cma_instance_management.cpp b/src/cm_agent/cma_instance_management.cpp index 002e4fd377ccbdf6729cf99ec5c996cfc197f1bc..d7d9ce8f36e52cec09c12146024c768224dd753d 100644 --- a/src/cm_agent/cma_instance_management.cpp +++ b/src/cm_agent/cma_instance_management.cpp @@ -767,6 +767,11 @@ static int32 CheckFloatIpStateInDn(uint32 index) return PROCESS_NOT_EXIST; } +int32 CheckFloatIpStateInDnExternal(uint32 index) +{ + return CheckFloatIpStateInDn(index); +} + static int datanode_stopped_check(void) { int ret; diff --git a/src/include/cm/cm_agent/cma_create_conn_cms.h b/src/include/cm/cm_agent/cma_create_conn_cms.h index 939eb5b360851f59820e6ed32eeb1b5ad309fd1d..17e37a8c99331d126056bf56688240df76cc9299 100644 --- a/src/include/cm/cm_agent/cma_create_conn_cms.h +++ b/src/include/cm/cm_agent/cma_create_conn_cms.h @@ -36,4 +36,6 @@ bool isMaintenanceModeDisableOperation(const cma_operation op); void* ConnCmsPMain(void* arg); extern bool isUpgradeCluster(); void* CheckUpgradeMode(void* arg); +void ImmediateStopDnOfCurNode(uint32 stopInstDelayTime, bool *haveKilledNodes); +extern int32 CheckFloatIpStateInDnExternal(uint32 index); #endif