diff --git a/src/cm_agent/client_adpts/libpq/cma_datanode.cpp b/src/cm_agent/client_adpts/libpq/cma_datanode.cpp index 334010dea36df1edbb1a928b01c71362ebe5b1d6..e5df828dee658215a23a879bc8483c4b1de6666b 100644 --- a/src/cm_agent/client_adpts/libpq/cma_datanode.cpp +++ b/src/cm_agent/client_adpts/libpq/cma_datanode.cpp @@ -1296,6 +1296,12 @@ void StartDatanodeCheck(void) !g_ltranDown[ii]); #endif if (cdt) { + if (g_dnStartCounts[ii] >= g_maxStartTimes) { + write_runlog(LOG, "dn[%d] start times(%u) exceeds the max start times(%u), do not start it!" + " Please check!\n", ii, g_dnStartCounts[ii], g_maxStartTimes); + continue; + } + if (stat(gaussdbStatePath, &instanceStatBuf) == 0) { if (unlink(gaussdbStatePath) != 0) { write_runlog(ERROR, "unlink DN state file(%s) failed.\n", gaussdbStatePath); diff --git a/src/cm_agent/cm_agent.centralized.conf.sample b/src/cm_agent/cm_agent.centralized.conf.sample index d690ab102a5c86a8fea26fa5a3ab20192f09daad..2faccde4b346f77d06601630d6b5b2d4b81c6e64 100644 --- a/src/cm_agent/cm_agent.centralized.conf.sample +++ b/src/cm_agent/cm_agent.centralized.conf.sample @@ -51,4 +51,5 @@ enable_fence_dn = off #enable fence the d #default off event_triggers = '' ss_double_cluster_mode = 0 #cluster run mode for ss double cluster scene, Valid value: 0-2 +max_start_times = 5 #max start times for DN and cm_server ############### must leave a new line at the end ################### diff --git a/src/cm_agent/cm_agent.centralized_new.conf.sample b/src/cm_agent/cm_agent.centralized_new.conf.sample index 26032fb5ce757779482398e0c80d54d26d41cee2..d646448f5c11cd272ac53fbde3773f38377cb2ec 100644 --- a/src/cm_agent/cm_agent.centralized_new.conf.sample +++ b/src/cm_agent/cm_agent.centralized_new.conf.sample @@ -48,4 +48,5 @@ enable_fence_dn = off #enable fence the d #if set to on, restart datenode after 30 seconds. otherwise, don't restart datanode. #default off ss_double_cluster_mode = 0 #cluster run mode for ss double cluster scene, Valid value: 0-2 +max_start_times = 5 #max start times for DN and cm_server ############### must leave a new line at the end ################### diff --git a/src/cm_agent/cm_agent.conf.sample b/src/cm_agent/cm_agent.conf.sample index fe8e72d135ba94f9e6a747875923344f281b8561..9f87f1309208bbfb2ab95c16ec9ad599ca3d3536 100644 --- a/src/cm_agent/cm_agent.conf.sample +++ b/src/cm_agent/cm_agent.conf.sample @@ -51,4 +51,5 @@ enable_fence_dn = off #enable fence the d #if set to on, restart datenode after 30 seconds. otherwise, don't restart datanode. #default off ss_double_cluster_mode = 0 #cluster run mode for ss double cluster scene, Valid value: 0-2 +max_start_times = 5 #max start times for DN and cm_server ############### must leave a new line at the end ################### diff --git a/src/cm_agent/cma_common.cpp b/src/cm_agent/cma_common.cpp index 294ee8798dc4b447ab21e6a76630600bec642a5b..98c6c239984b54d4e920fa90251a3fcce129056a 100644 --- a/src/cm_agent/cma_common.cpp +++ b/src/cm_agent/cma_common.cpp @@ -402,6 +402,11 @@ void ReloadParametersFromConfigfile() if (get_config_param(configDir, "enable_fence_dn", g_enableFenceDn, sizeof(g_enableFenceDn)) < 0) write_runlog(ERROR, "get_config_param() get enable_fence_dn fail.\n"); #endif + g_maxStartTimes = get_int_value_from_config(configDir, "max_start_times", 5); + if (g_maxStartTimes < 5) { + g_maxStartTimes = 5; + } + write_runlog(LOG, "reload cm_agent parameters:\n" diff --git a/src/cm_agent/cma_global_params.cpp b/src/cm_agent/cma_global_params.cpp index 3c9ac30c6b421b682c3f06e2a0929b1642727921..c0ab0b3ff3db490739f4e7fb8ad270982617481b 100644 --- a/src/cm_agent/cma_global_params.cpp +++ b/src/cm_agent/cma_global_params.cpp @@ -234,6 +234,7 @@ bool g_isPauseArbitration = false; char g_cmManualPausePath[MAX_PATH_LEN] = {0}; bool g_isStarting = false; char g_cmManualStartingPath[MAX_PATH_LEN] = {0}; +int g_maxStartTimes = 5; bool &GetIsSharedStorageMode() { diff --git a/src/cm_agent/cma_instance_management.cpp b/src/cm_agent/cma_instance_management.cpp index 3bb1afa3c5bcfce6a63e318c9b6b6c9b03170efa..39f63ecc1d3be3189b626596e09e51d6c8a5c55b 100644 --- a/src/cm_agent/cma_instance_management.cpp +++ b/src/cm_agent/cma_instance_management.cpp @@ -184,17 +184,7 @@ void start_cmserver_check(void) } break; case PROCESS_NOT_EXIST: - if (g_startCmsCount < STARTUP_CMS_CHECK_TIMES) { - /* - * the value is -1, it meas the - * cluster is starting now ,and cmserver don't start any one - */ - if (g_startCmsCount == -1) { - g_startCmsCount = 1; - } else { - ++g_startCmsCount; - } - } else { + if (g_startCmsCount >= STARTUP_CMS_CHECK_TIMES) { if (g_startupAlarmList != NULL) { /* fill the alarm message */ WriteAlarmAdditionalInfo(&tempAdditionalParam, @@ -210,6 +200,24 @@ void start_cmserver_check(void) } } + /* + * the value is -1, it means the + * cluster is starting now ,and cmserver don't start any one + */ + if (g_startCmsCount == -1) { + g_startCmsCount = 1; + } else if (g_startCmsCount < g_maxStartTimes) { + ++g_startCmsCount; + } + + write_runlog(LOG, "the node(%u) cms start times(%u), max start times(%u)\n", + g_currentNode->node, g_startCmsCount, g_maxStartTimes); + if (g_startCmsCount >= g_maxStartTimes) { + write_runlog(LOG, "the node(%u) cms start times(%u) exceeds the max start times(%u), do not start it!" + " Please check!\n", g_currentNode->node, g_startCmsCount, g_maxStartTimes); + return; + } + if (g_cmsDiskDamage || g_cmsNicDown) { write_runlog(LOG, "g_cmsDiskDamage is %d, and g_cmsNicDown is %d, cannot start cms.\n", g_cmsDiskDamage, g_cmsNicDown); diff --git a/src/cm_agent/cma_main.cpp b/src/cm_agent/cma_main.cpp index 2371eb6fe1c6402f2d09d3b65642132394c55678..2cff2eb727f10ec6a77c9217121e20247097b2a0 100644 --- a/src/cm_agent/cma_main.cpp +++ b/src/cm_agent/cma_main.cpp @@ -1477,6 +1477,10 @@ int get_agent_global_params_from_configfile() write_runlog(ERROR, "get_config_param() get enable_fence_dn fail.\n"); #endif GetEventTrigger(); + g_maxStartTimes = get_int_value_from_config(configDir, "max_start_times", 5); + if (g_maxStartTimes < 5) { + g_maxStartTimes = 5; + } #ifdef __aarch64__ agent_process_cpu_affinity = get_uint32_value_from_config(configDir, "process_cpu_affinity", 0); diff --git a/src/cm_ctl/ctl_param_check.cpp b/src/cm_ctl/ctl_param_check.cpp index d9fa453e719f856cc9acb262b3aecab1fc788e09..27f3ed91f7526551e0c9861752dc6db1293d0a00 100644 --- a/src/cm_ctl/ctl_param_check.cpp +++ b/src/cm_ctl/ctl_param_check.cpp @@ -64,6 +64,7 @@ const char *g_cmaParamInfo[] = { "db_service_vip|string|0,0|NULL|NULL|", "event_triggers|string|0,0|NULL|NULL|", "ss_double_cluster_mode|int|0,2|NULL|NULL|", + "max_start_times|int|5,65536|NULL|max start times for DN and cm_server|", }; const char *g_cmsParamInfo[] = { diff --git a/src/include/cm/cm_agent/cma_global_params.h b/src/include/cm/cm_agent/cma_global_params.h index fbcf08ca6f89c1f83bb726c851f3a1f7bdc0acf7..88103e3939e21dff7ad2db11242ba51684567d71 100644 --- a/src/include/cm/cm_agent/cma_global_params.h +++ b/src/include/cm/cm_agent/cma_global_params.h @@ -314,6 +314,7 @@ extern bool g_isPauseArbitration; extern char g_cmManualPausePath[MAX_PATH_LEN]; extern bool g_isStarting; extern char g_cmManualStartingPath[MAX_PATH_LEN]; +extern int g_maxStartTimes; #endif