From 6dd60831bfc61d4012c6f3bbe6d9ec5429226e80 Mon Sep 17 00:00:00 2001 From: xue_meng_en <1836611252@qq.com> Date: Sat, 30 Mar 2024 15:45:42 +0800 Subject: [PATCH 1/4] =?UTF-8?q?=E8=BF=9E=E7=BB=AD=E5=90=AF=E5=8A=A8?= =?UTF-8?q?=E6=AC=A1=E6=95=B0=E8=B6=85=E8=BF=87max=5Fstart=5Ftimes?= =?UTF-8?q?=E6=9A=82=E5=81=9C=E5=90=AF=E5=8A=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/cm_agent/client_adpts/libpq/cma_datanode.cpp | 6 ++++++ src/cm_agent/cm_agent.centralized.conf.sample | 1 + src/cm_agent/cm_agent.centralized_new.conf.sample | 1 + src/cm_agent/cm_agent.conf.sample | 1 + src/cm_agent/cma_common.cpp | 5 +++++ src/cm_agent/cma_global_params.cpp | 1 + src/cm_agent/cma_instance_management.cpp | 6 ++++++ src/cm_agent/cma_main.cpp | 4 ++++ src/cm_ctl/ctl_param_check.cpp | 1 + src/include/cm/cm_agent/cma_global_params.h | 1 + 10 files changed, 27 insertions(+) diff --git a/src/cm_agent/client_adpts/libpq/cma_datanode.cpp b/src/cm_agent/client_adpts/libpq/cma_datanode.cpp index 334010d..e5df828 100644 --- a/src/cm_agent/client_adpts/libpq/cma_datanode.cpp +++ b/src/cm_agent/client_adpts/libpq/cma_datanode.cpp @@ -1296,6 +1296,12 @@ void StartDatanodeCheck(void) !g_ltranDown[ii]); #endif if (cdt) { + if (g_dnStartCounts[ii] >= g_maxStartTimes) { + write_runlog(LOG, "dn[%d] start times(%u) exceeds the max start times(%u), do not start it!" + " Please check!\n", ii, g_dnStartCounts[ii], g_maxStartTimes); + continue; + } + if (stat(gaussdbStatePath, &instanceStatBuf) == 0) { if (unlink(gaussdbStatePath) != 0) { write_runlog(ERROR, "unlink DN state file(%s) failed.\n", gaussdbStatePath); diff --git a/src/cm_agent/cm_agent.centralized.conf.sample b/src/cm_agent/cm_agent.centralized.conf.sample index d690ab1..2faccde 100644 --- a/src/cm_agent/cm_agent.centralized.conf.sample +++ b/src/cm_agent/cm_agent.centralized.conf.sample @@ -51,4 +51,5 @@ enable_fence_dn = off #enable fence the d #default off event_triggers = '' ss_double_cluster_mode = 0 #cluster run mode for ss double cluster scene, Valid value: 0-2 +max_start_times = 5 #max start times for DN and cm_server ############### must leave a new line at the end ################### diff --git a/src/cm_agent/cm_agent.centralized_new.conf.sample b/src/cm_agent/cm_agent.centralized_new.conf.sample index 26032fb..d646448 100644 --- a/src/cm_agent/cm_agent.centralized_new.conf.sample +++ b/src/cm_agent/cm_agent.centralized_new.conf.sample @@ -48,4 +48,5 @@ enable_fence_dn = off #enable fence the d #if set to on, restart datenode after 30 seconds. otherwise, don't restart datanode. #default off ss_double_cluster_mode = 0 #cluster run mode for ss double cluster scene, Valid value: 0-2 +max_start_times = 5 #max start times for DN and cm_server ############### must leave a new line at the end ################### diff --git a/src/cm_agent/cm_agent.conf.sample b/src/cm_agent/cm_agent.conf.sample index fe8e72d..9f87f13 100644 --- a/src/cm_agent/cm_agent.conf.sample +++ b/src/cm_agent/cm_agent.conf.sample @@ -51,4 +51,5 @@ enable_fence_dn = off #enable fence the d #if set to on, restart datenode after 30 seconds. otherwise, don't restart datanode. #default off ss_double_cluster_mode = 0 #cluster run mode for ss double cluster scene, Valid value: 0-2 +max_start_times = 5 #max start times for DN and cm_server ############### must leave a new line at the end ################### diff --git a/src/cm_agent/cma_common.cpp b/src/cm_agent/cma_common.cpp index 294ee87..98c6c23 100644 --- a/src/cm_agent/cma_common.cpp +++ b/src/cm_agent/cma_common.cpp @@ -402,6 +402,11 @@ void ReloadParametersFromConfigfile() if (get_config_param(configDir, "enable_fence_dn", g_enableFenceDn, sizeof(g_enableFenceDn)) < 0) write_runlog(ERROR, "get_config_param() get enable_fence_dn fail.\n"); #endif + g_maxStartTimes = get_int_value_from_config(configDir, "max_start_times", 5); + if (g_maxStartTimes < 5) { + g_maxStartTimes = 5; + } + write_runlog(LOG, "reload cm_agent parameters:\n" diff --git a/src/cm_agent/cma_global_params.cpp b/src/cm_agent/cma_global_params.cpp index 3c9ac30..c0ab0b3 100644 --- a/src/cm_agent/cma_global_params.cpp +++ b/src/cm_agent/cma_global_params.cpp @@ -234,6 +234,7 @@ bool g_isPauseArbitration = false; char g_cmManualPausePath[MAX_PATH_LEN] = {0}; bool g_isStarting = false; char g_cmManualStartingPath[MAX_PATH_LEN] = {0}; +int g_maxStartTimes = 5; bool &GetIsSharedStorageMode() { diff --git a/src/cm_agent/cma_instance_management.cpp b/src/cm_agent/cma_instance_management.cpp index 3bb1afa..b26dcef 100644 --- a/src/cm_agent/cma_instance_management.cpp +++ b/src/cm_agent/cma_instance_management.cpp @@ -210,6 +210,12 @@ void start_cmserver_check(void) } } + if (g_startCmsCount >= g_maxStartTimes) { + write_runlog(LOG, "the node(%u) cms start times(%u) exceeds the max start times(%u), do not start it!" + " Please check!\n", g_currentNode->node, g_startCmsCount, g_maxStartTimes); + return; + } + if (g_cmsDiskDamage || g_cmsNicDown) { write_runlog(LOG, "g_cmsDiskDamage is %d, and g_cmsNicDown is %d, cannot start cms.\n", g_cmsDiskDamage, g_cmsNicDown); diff --git a/src/cm_agent/cma_main.cpp b/src/cm_agent/cma_main.cpp index 2371eb6..2cff2eb 100644 --- a/src/cm_agent/cma_main.cpp +++ b/src/cm_agent/cma_main.cpp @@ -1477,6 +1477,10 @@ int get_agent_global_params_from_configfile() write_runlog(ERROR, "get_config_param() get enable_fence_dn fail.\n"); #endif GetEventTrigger(); + g_maxStartTimes = get_int_value_from_config(configDir, "max_start_times", 5); + if (g_maxStartTimes < 5) { + g_maxStartTimes = 5; + } #ifdef __aarch64__ agent_process_cpu_affinity = get_uint32_value_from_config(configDir, "process_cpu_affinity", 0); diff --git a/src/cm_ctl/ctl_param_check.cpp b/src/cm_ctl/ctl_param_check.cpp index d9fa453..27f3ed9 100644 --- a/src/cm_ctl/ctl_param_check.cpp +++ b/src/cm_ctl/ctl_param_check.cpp @@ -64,6 +64,7 @@ const char *g_cmaParamInfo[] = { "db_service_vip|string|0,0|NULL|NULL|", "event_triggers|string|0,0|NULL|NULL|", "ss_double_cluster_mode|int|0,2|NULL|NULL|", + "max_start_times|int|5,65536|NULL|max start times for DN and cm_server|", }; const char *g_cmsParamInfo[] = { diff --git a/src/include/cm/cm_agent/cma_global_params.h b/src/include/cm/cm_agent/cma_global_params.h index fbcf08c..88103e3 100644 --- a/src/include/cm/cm_agent/cma_global_params.h +++ b/src/include/cm/cm_agent/cma_global_params.h @@ -314,6 +314,7 @@ extern bool g_isPauseArbitration; extern char g_cmManualPausePath[MAX_PATH_LEN]; extern bool g_isStarting; extern char g_cmManualStartingPath[MAX_PATH_LEN]; +extern int g_maxStartTimes; #endif -- Gitee From 42824c87b9252692e3e58cfe9719478595f481cd Mon Sep 17 00:00:00 2001 From: xue_meng_en <1836611252@qq.com> Date: Sat, 29 Jun 2024 18:09:02 +0800 Subject: [PATCH 2/4] debug --- src/cm_agent/cma_instance_management.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cm_agent/cma_instance_management.cpp b/src/cm_agent/cma_instance_management.cpp index b26dcef..63a3b4b 100644 --- a/src/cm_agent/cma_instance_management.cpp +++ b/src/cm_agent/cma_instance_management.cpp @@ -210,6 +210,8 @@ void start_cmserver_check(void) } } + write_runlog(LOG, "the node(%u) cms start times(%u), max start times(%u)\n", + g_currentNode->node, g_startCmsCount, g_maxStartTimes); if (g_startCmsCount >= g_maxStartTimes) { write_runlog(LOG, "the node(%u) cms start times(%u) exceeds the max start times(%u), do not start it!" " Please check!\n", g_currentNode->node, g_startCmsCount, g_maxStartTimes); -- Gitee From ba22eb263e924051d7a26f7a2a9e3de1e086e79b Mon Sep 17 00:00:00 2001 From: xue_meng_en <1836611252@qq.com> Date: Thu, 4 Jul 2024 10:30:45 +0800 Subject: [PATCH 3/4] start cms times ++ --- src/cm_agent/cma_instance_management.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/cm_agent/cma_instance_management.cpp b/src/cm_agent/cma_instance_management.cpp index 63a3b4b..8bec124 100644 --- a/src/cm_agent/cma_instance_management.cpp +++ b/src/cm_agent/cma_instance_management.cpp @@ -184,17 +184,7 @@ void start_cmserver_check(void) } break; case PROCESS_NOT_EXIST: - if (g_startCmsCount < STARTUP_CMS_CHECK_TIMES) { - /* - * the value is -1, it meas the - * cluster is starting now ,and cmserver don't start any one - */ - if (g_startCmsCount == -1) { - g_startCmsCount = 1; - } else { - ++g_startCmsCount; - } - } else { + if (g_startCmsCount >= STARTUP_CMS_CHECK_TIMES) { if (g_startupAlarmList != NULL) { /* fill the alarm message */ WriteAlarmAdditionalInfo(&tempAdditionalParam, @@ -210,6 +200,16 @@ void start_cmserver_check(void) } } + /* + * the value is -1, it means the + * cluster is starting now ,and cmserver don't start any one + */ + if (g_startCmsCount == -1) { + g_startCmsCount = 1; + } else { + ++g_startCmsCount; + } + write_runlog(LOG, "the node(%u) cms start times(%u), max start times(%u)\n", g_currentNode->node, g_startCmsCount, g_maxStartTimes); if (g_startCmsCount >= g_maxStartTimes) { -- Gitee From 7a7811ba3c0e35153ae9a6ddac3fa6f9b1f91790 Mon Sep 17 00:00:00 2001 From: xue_meng_en <1836611252@qq.com> Date: Fri, 5 Jul 2024 10:26:50 +0800 Subject: [PATCH 4/4] fix --- src/cm_agent/cma_instance_management.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cm_agent/cma_instance_management.cpp b/src/cm_agent/cma_instance_management.cpp index 8bec124..39f63ec 100644 --- a/src/cm_agent/cma_instance_management.cpp +++ b/src/cm_agent/cma_instance_management.cpp @@ -206,7 +206,7 @@ void start_cmserver_check(void) */ if (g_startCmsCount == -1) { g_startCmsCount = 1; - } else { + } else if (g_startCmsCount < g_maxStartTimes) { ++g_startCmsCount; } -- Gitee