From 80acba2bbb283637c1d6755382e218ddc93d43ec Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 17 May 2025 10:18:47 +0800 Subject: [PATCH 001/144] cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM --- script/base_utils/template/resource_en.json | 13 +- script/base_utils/template/resource_zh.json | 13 +- script/gspylib/common/ClusterParams.py | 4 +- script/gspylib/common/Common.py | 9 + script/gspylib/common/OMCommand.py | 26 ++ .../component/Kernel/DN_OLAP/DN_OLAP.py | 415 +++++++++++++----- script/gspylib/component/Kernel/Kernel.py | 240 ++++------ script/impl/install/InstallImpl.py | 82 +--- script/impl/install/OLAP/InstallImplOLAP.py | 3 +- script/impl/preinstall/PreinstallImpl.py | 39 +- script/local/CheckConfig.py | 3 +- script/local/ConfigHba.py | 16 - script/local/Install.py | 28 +- script/local/LocalCheckOS.py | 45 +- script/local/PreInstallUtility.py | 19 +- 15 files changed, 558 insertions(+), 397 deletions(-) diff --git a/script/base_utils/template/resource_en.json b/script/base_utils/template/resource_en.json index c56584d7..301e3681 100644 --- a/script/base_utils/template/resource_en.json +++ b/script/base_utils/template/resource_en.json @@ -24,13 +24,18 @@ "input_cm": "Please enter 1/2 for selection, the default option is 1) Deploy CM", "cm_port": "Please enter the cmserver port(default:15400):", - "choose_pri_standby": "Please choose whether to deploy as primary standby or single?", - "input_pri_standby": "Please enter 1/2 for selection, the default option is 1) primary and standby deployment", + "choose_pri_standby": "Please choose whether to deploy in single-node or multi-node mode?", + "input_pri_standby": "Please enter 1/2 for selection, the default option is 1) multi-node deployment", "input_ip_hostname": "Please enter the host node IP and node name(for example:192.168.0.1 hostname1;192.168.0.2 hostname2)", "max_nodes": "Please enter the number of nodes, supporting a maximum of one primary and eight backup, which is 9 node (default is one primary and two backup, with 3 nodes)", + + "choose_dbstor" : "Enable DBStor shared-storage mode?", + "input_dbstor" : "Input 1 / 2 (default 1) 1) Disable 2) Enable", + "intput_dbstor_fs" : "Enter the three DBStor file-systems (default: log_fs:log_fs,page_fs:page_fs,share_fs:share_fs)", + "intput_share_fs" : "Enter the shared-filesystem mount info, e.g. 127.0.0.1:share_fs", - "deploy_pri_standby": "Primary and standby deployment", - "deploy_single": "single deployment", + "deploy_pri_standby": "multi-node deployment", + "deploy_single": "single-node deployment", "not_permission": "The user does not have permission for", "invalid_character": "The input character is invalid!", diff --git a/script/base_utils/template/resource_zh.json b/script/base_utils/template/resource_zh.json index 77ba1fb0..8b4b0b04 100644 --- a/script/base_utils/template/resource_zh.json +++ b/script/base_utils/template/resource_zh.json @@ -24,13 +24,18 @@ "input_cm": "请输入 1/2 进行选择,默认选项是 1)部署CM", "cm_port": "请输入cmserver端口(默认:15400):", - "choose_pri_standby": "请选择是否主备部署?", - "input_pri_standby": "请输入 1/2 进行选择,默认选项是 1)主备部署", + "choose_pri_standby": "请选择是否多节点部署?", + "input_pri_standby": "请输入 1/2 进行选择,默认选项是 1)多节点部署", "input_ip_hostname": "请输入主机节点IP和节点名称(如:192.168.0.1 hostname1;192.168.0.2 hostname2)", "max_nodes": "请输入节点数量,最多支持一主八备,即9个节点(默认是一主两备,3个节点)", - "deploy_pri_standby": "主备部署", - "deploy_single": "单机部署", + "choose_dbstor": "请选择是否启用 DBStor 共享存储?", + "input_dbstor": "请输入 1/2 进行选择,默认选项是 1)不部署", + "intput_dbstor_fs": "请输入'dbstor的3个文件系统'的路径信息(默认是:log_fs:log_fs,page_fs:page_fs,share_fs:share_fs)", + "intput_share_fs": "请输入共享文件系统挂载信息(形如 127.0.0.1:share_fs)", + + "deploy_pri_standby": "多节点部署", + "deploy_single": "单节点部署", "not_permission": "当前用户没有权限操作", "invalid_character": "存在不合法的字符", diff --git a/script/gspylib/common/ClusterParams.py b/script/gspylib/common/ClusterParams.py index 0c5f5e2c..e78f2680 100644 --- a/script/gspylib/common/ClusterParams.py +++ b/script/gspylib/common/ClusterParams.py @@ -57,6 +57,7 @@ class ClusterParams: UWAL_DEVICES_PATH = 'uwal_devices_path' PASSWORD = 'password' CLUSTER_TYPE = 'clusterType' + SHARE_FS = 'share_fs' @staticmethod def get_all_param_names(): @@ -89,7 +90,8 @@ class ClusterParams: ClusterParams.UWAL_ASYNC_APPEND_SWITCH, ClusterParams.UWAL_DEVICES_PATH, ClusterParams.PASSWORD, - ClusterParams.CLUSTER_TYPE + ClusterParams.CLUSTER_TYPE, + ClusterParams.SHARE_FS ] FLOAT_IP_PATTERN = re.compile(r'\bfloatIp[0-9]+') diff --git a/script/gspylib/common/Common.py b/script/gspylib/common/Common.py index 6ff59b7e..4e993dd9 100644 --- a/script/gspylib/common/Common.py +++ b/script/gspylib/common/Common.py @@ -1180,6 +1180,15 @@ class DefaultValue(): "export PGHOST=%s" % tmpPath, \ "export GAUSSLOG=%s" % logPath, "umask 077"] + # ---------- Cantian‑specific ---------- + envList.extend([ + f"export CM_CONFIG_PATH=$PGDATA/cm_config.ini", + f"export CTDB_HOME=$PGDATA", + f"export GSDB_HOME=$PGDATA", + f"export DSS_HOME=$PGDATA/dss", + f"export DSS_DATA_DISK=$DSS_HOME/data", + f"export share_fs=127.0.0.1:share_fs" + ]) if agentPath != '': envList.append("export AGENTPATH=%s" % agentPath) if agentLogPath != '': diff --git a/script/gspylib/common/OMCommand.py b/script/gspylib/common/OMCommand.py index fd98dc9e..ba674dea 100644 --- a/script/gspylib/common/OMCommand.py +++ b/script/gspylib/common/OMCommand.py @@ -262,3 +262,29 @@ class OMCommand(): finally: if os.path.exists(status_file): os.remove(status_file) + + @staticmethod + def wait_for_cantiand(logger, datadir, timeout: int = 300, delta: int = 5): + """ + 等待单节点 cantiand 进入 OPEN 状态 + """ + logger.debug("Waiting for cantiand to reach OPEN state…") + + pgrep_cmd = r"pgrep -f 'cantiand.*open'" + + retry_times = 1 if timeout <= 0 else max(1, timeout // delta) + + status, _ = CmdUtil.retryGetstatusoutput( + pgrep_cmd, + retry_time=retry_times, + sleep_time=delta + ) + + if status == 0: + logger.log("cantiand started and is in OPEN state.") + return + + raise Exception( + ErrorCode.GAUSS_528["GAUSS_52800"] % + ("Timeout", "cantiand not started") + ) \ No newline at end of file diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index 4e065d41..4f6328ac 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -30,6 +30,7 @@ from domain_utils.cluster_file.cluster_dir import ClusterDir from base_utils.os.compress_util import CompressUtil from base_utils.os.env_util import EnvUtil from base_utils.os.file_util import FileUtil +from domain_utils.cluster_file.cluster_config_file import ClusterConfigFile from domain_utils.cluster_os.cluster_user import ClusterUser from base_utils.os.grep_util import GrepUtil from base_utils.os.user_util import UserUtil @@ -54,6 +55,7 @@ CASCADE_STANDBY_INSTANCE = 3 BASE_ID_GTM = 4001 BASE_ID_DATANODE = 6001 + class DN_OLAP(Kernel): ''' The class is used to define base component. @@ -78,6 +80,28 @@ class DN_OLAP(Kernel): tmpDict["ssl_ca_file"] = "'cacert.pem'" return tmpDict + def calc_ss_instance_id(self) -> str: + """ + 返回当前 DN 在全集群 DN 列表中的 0-based 顺序号(字符串)。 + """ + user = UserUtil.getUserInfo()["name"] + cluster = dbClusterInfo() + cluster.initFromStaticConfig(user) + + all_ids = [ + dn.instanceId + for node in cluster.dbNodes + for dn in node.datanodes + ] + all_ids.sort() + + try: + idx = all_ids.index(self.instInfo.instanceId) + except ValueError: + idx = 0 + + return str(idx) + def copyAndModCertFiles(self): """ function : copy and chage permission cert files @@ -110,21 +134,247 @@ class DN_OLAP(Kernel): FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, "%s/server.key.rand" % self.instInfo.datadir) + @staticmethod + def copy_and_render_file(src: str, dst: str, mode: str = DefaultValue.KEY_FILE_MODE): + """ + Copy *src* to *dst* and replace ${VAR}/$VAR using os.environ + """ + FileUtil.cpFile(src, dst, True) + text = ''.join(FileUtil.readFile(dst)) + + def _replace(match): + var = match.group(1) or match.group(2) + return os.environ.get(var, "") + + text = re.sub(r"\$\{(\w+)\}|\$(\w+)", _replace, text) + + FileUtil.writeFile(dst, [text], "w") + FileUtil.changeMode(mode, dst) + + def is_primary_node(self) -> bool: + """ + 判断当前实例是否位于集群 static_config 中的第一个 dbNode(primary-init 节点)。 + """ + user = UserUtil.getUserInfo()["name"] + cluster_info = dbClusterInfo() + cluster_info.initFromStaticConfig(user) + + if not cluster_info.dbNodes: + return False + + first_node = cluster_info.dbNodes[0] + cur_id = self.instInfo.instanceId + return any(dn.instanceId == cur_id for dn in first_node.datanodes) + + def create_database(self, gauss_home: str): + """ + 1. 复制 GAUSSHOME/admin → CTDB_HOME/admin + 2. 确保 CTDB_HOME/data 目录存在且为空:如有内容先清空 + 3. 渲染 create_database.sql.template → CTDB_HOME/data/create_database.sql + 4. 执行建库脚本 + """ + ctdb_home = os.getenv("CTDB_HOME") + if not ctdb_home: + raise Exception("CTDB_HOME is not defined.") + + # 准备创库sql + src_admin = os.path.join(gauss_home, "admin") + dst_admin = os.path.join(ctdb_home, "admin") + FileUtil.cpFile(src_admin, dst_admin, cmd_type="shell") + FileUtil.changeMode(DefaultValue.KEY_DIRECTORY_MODE, dst_admin, recursive=True) + + tpl_sql = os.path.join(gauss_home, "share", "cantian", + "create_database.sql.template") + work_sql = os.path.join(ctdb_home, "create_database.sql") + self.copy_and_render_file(tpl_sql, work_sql) + + cmd = (f"ctsql '/' as sysdba -q " + f"-D {self.instInfo.datadir} -f {work_sql}") + self.logger.debug(cmd) + st, out = CmdUtil.retryGetstatusoutput(cmd) + if st != 0: + raise Exception("create database failed:\n" + out) + + self.logger.debug("Create database success.") + + def prepare_data_dir(self) -> None: + """ + 1. 从环境变量 share_fs / SHARE_FS 获取 NFS,如 10.1.1.1:/share_dir + 2. 主节点:sudo 挂 root → 创建/清空 cantianData → 卸 root + 3. 所有节点:sudo 挂 cantianData → chown 给业务用户 + """ + ctdb_home = EnvUtil.getEnv("CTDB_HOME") + if not ctdb_home: + raise EnvironmentError("CTDB_HOME is not set.") + data_dir = os.path.join(ctdb_home, "data") + FileUtil.createDirectory(data_dir, DefaultValue.KEY_DIRECTORY_MODE) + self.logger.debug(f"[prepare_data_dir] data_dir = {data_dir}") + + # ---------- 1. 读取 share_fs ---------- + share_fs_val = os.getenv("share_fs") or os.getenv("SHARE_FS") or "" + self.logger.debug(f"[prepare_data_dir] env share_fs = {share_fs_val or ''}") + if not share_fs_val: + self.logger.debug("[prepare_data_dir] share_fs not set; skip.") + return + if ":" not in share_fs_val: + raise ValueError(f"share_fs invalid: {share_fs_val!r} (expect :)") + + host_ip, fs_dir = share_fs_val.split(":", 1) + if host_ip in ("127.0.0.1", "localhost"): + self.logger.debug("[prepare_data_dir] share_fs points to localhost; skip.") + return + + nfs_root = f"{host_ip}:/{fs_dir}" + nfs_subdir = f"{host_ip}:/{fs_dir}/cantianData" + self.logger.debug(f"[prepare_data_dir] nfs_root = {nfs_root}") + self.logger.debug(f"[prepare_data_dir] nfs_subdir = {nfs_subdir}") + + uid, gid = os.getuid(), os.getgid() + mount_opt = "-o rw,soft,timeo=600,retrans=2" + + if self.is_primary_node(): + # 3. 挂 root + cmd = f"sudo -n mount -t nfs {mount_opt} {nfs_root} {data_dir}" + self.logger.debug(f"[prepare_data_dir] CMD = {cmd}") + st, out = CmdUtil.getstatusoutput_by_fast_popen(cmd) + if st != 0 and "already mounted" not in out.lower(): + raise RuntimeError(f"mount root failed:\n{out}") + + CmdUtil.getstatusoutput_by_fast_popen(f"sudo -n chown {uid}:{gid} {data_dir}") + + # 4. 创建 / 清空 cantianData + cantian_dir = os.path.join(data_dir, "cantianData") + if os.path.exists(cantian_dir): + FileUtil.cleanDirectoryContent(cantian_dir) + else: + cmd = f"sudo -n mkdir -p {cantian_dir}" + self.logger.debug(f"[prepare_data_dir] CMD = {cmd}") + st, out = CmdUtil.getstatusoutput_by_fast_popen(cmd) + if st != 0: + CmdUtil.getstatusoutput_by_fast_popen(f"sudo -n umount -l {data_dir}") + raise RuntimeError(f"create cantianData failed:\n{out}") + + # 5. 卸 root + CmdUtil.getstatusoutput_by_fast_popen(f"sudo -n umount -l {data_dir}") + + # ---------- 6. 所有节点挂 cantianData ---------- + cmd = f"sudo -n mount -t nfs {mount_opt} {nfs_subdir} {data_dir}" + self.logger.debug(f"[prepare_data_dir] CMD = {cmd}") + st, out = CmdUtil.getstatusoutput_by_fast_popen(cmd) + if st != 0 and "already mounted" not in out.lower(): + raise RuntimeError(f"mount cantianData failed:\n{out}") + + CmdUtil.getstatusoutput_by_fast_popen(f"sudo -n chown {uid}:{gid} {data_dir}") + + self.logger.debug(f"[prepare_data_dir] complete: {nfs_subdir} mounted on {data_dir}") + + def prepare_config(self) -> None: + """ + 统一完成 **Cantian / DSS / CM** 的目录创建、模板拷贝、参数写入。 + """ + gauss_home = EnvUtil.getEnv("GAUSSHOME") + pgdata = self.instInfo.datadir + + # -------- Cantian 配置 ----------------------------------------- + cantian_ini_tpl = os.path.join(gauss_home, "share", "cantian", + "cantiand.ini.sample") + cantian_cfg_dir = os.path.join(pgdata, "cfg") + FileUtil.createDirectory(cantian_cfg_dir, DefaultValue.KEY_DIRECTORY_MODE) + self.copy_and_render_file(cantian_ini_tpl, + os.path.join(cantian_cfg_dir, "cantiand.ini")) + FileUtil.createFile(os.path.join(cantian_cfg_dir, "cantiand.ini.tmp"), + True, DefaultValue.KEY_FILE_MODE) + # 更新cantian配置文件 + self.updateConfig(os.path.join(cantian_cfg_dir, "cantiand.ini"), + self.getCantianDict()) + + # -------- DSS 目录 & 配置 -------------------------------------- + dss_home = os.getenv("DSS_HOME", + os.path.realpath(os.path.join(pgdata, "../dss"))) + dss_data = os.getenv("DSS_DATA_DISK", + os.path.realpath(os.path.join(pgdata, "../dss/data"))) + FileUtil.createDirectory(dss_home, DefaultValue.KEY_DIRECTORY_MODE) + FileUtil.createDirectory(dss_data, DefaultValue.KEY_DIRECTORY_MODE) + + dss_cfg_dir = os.path.join(dss_data, "cfg") + FileUtil.createDirectory(dss_cfg_dir, DefaultValue.KEY_DIRECTORY_MODE) + + dss_ini_tpl = os.path.join(gauss_home, "share", "dss", "cantiand.ini.sample") + dss_ini_dst = os.path.join(dss_cfg_dir, "cantiand.ini") + self.copy_and_render_file(dss_ini_tpl, dss_ini_dst) + + # 更新配置文件 + self.updateConfig(dss_ini_dst, + {"SS_INTERCONNECT_URL": self.get_ss_inter_url()}) + + # -------- CM 配置 --------------------------------------------- + cm_dest = os.getenv("CM_CONFIG_PATH", + os.path.join(pgdata, "cm_config.ini")) + cm_tpl = os.path.join(gauss_home, "share", "cm", "cm_config.ini.sample") + self.copy_and_render_file(cm_tpl, cm_dest) + + self.logger.debug("set_config done.") + + def init_cantian(self) -> None: + self.logger.debug("Start Cantian init …") + + self.prepare_data_dir() + self.prepare_config() + + if self.is_primary_node(): + gauss_home = EnvUtil.getEnv("GAUSSHOME") + self.init_database(gauss_home) + + self.copyAndModCertFiles() + + self.logger.log("Cantian instance init success.") + + def init_database(self, gauss_home: str): + """ + 仅1节点调用: + 1. 后台 nomount 启动 Cantian + 2. 执行建库 SQL + 3. 优雅关闭 Cantian + """ + pgdata = self.instInfo.datadir + + nomount_cmd = f"nohup cantiand nomount -D {pgdata} >/dev/null 2>&1 &" + self.logger.debug(nomount_cmd) + st, out = CmdUtil.retryGetstatusoutput(nomount_cmd) + if st != 0: + raise Exception(f"Cantian nomount failed:\n{out}") + + self.create_database(gauss_home) + + pgrep_cmd = r"pgrep -f 'cantiand.*nomount'" + status, _ = CmdUtil.retryGetstatusoutput(pgrep_cmd) + + if status != 0: + self.logger.debug("cantiand is not running, skip shutdown.") + return + + shutdown_cmd = f"ctsql '/' as sysdba -q -D {pgdata} -c 'shutdown'" + st, out = CmdUtil.retryGetstatusoutput(shutdown_cmd) + + if st != 0: + raise Exception(f"Cantiand shutdown failed:\n{out}") + + self.logger.debug("Cantiand shutdown successfully") + @Dss.catch_err(exist_so=True) def initInstance(self): """ - function: - init DB instance - input:string:NA - output: + 初始化实例的顺序: + 1. DWS 镜像 → 解压镜像并写 pgxc_node_name + 2. DSS 模式 → (Cantian 暂不支持) + 3. Cantian → cantiand + ctsql 建库 """ - if (not os.path.exists(self.instInfo.datadir)): + if not os.path.exists(self.instInfo.datadir): raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % - ("data directory [%s]" % self.instInfo.datadir)) + (f"data directory [{self.instInfo.datadir}]")) + + nodename = self.getInstanceNodeName()[:22] - nodename = self.getInstanceNodeName() - # if nodename too long, obtains the first 22 digits - nodename = nodename[:22] if (self.dwsMode): image_path = DefaultValue.DWS_IMAGE_PATH # decompress package to files @@ -134,49 +384,15 @@ class DN_OLAP(Kernel): tmpDict = {} tmpDict["pgxc_node_name"] = "'%s'" % nodename self.setGucConfig(tmpDict) - else: - # If xlogdir is set in xmlfile, an independent xlog - # path will be created. - if (self.instInfo.xlogdir != ''): - cmd = "%s/gs_initdb --locale=C -D %s -X %s " \ - "--nodename=%s %s -C %s" % ( - self.binPath, self.instInfo.datadir, - self.instInfo.xlogdir, nodename, - " ".join(self.initParas), self.binPath) - else: - cmd = "%s/gs_initdb --locale=C -D %s --nodename=%s %s -C %s" \ - % \ - (self.binPath, self.instInfo.datadir, nodename, - " ".join(self.initParas), self.binPath) - self.logger.debug('check DCF mode:%s' % self.paxos_mode) - if self.paxos_mode: - cmd += " -c" - elif self.dss_mode: - vgname = EnvUtil.getEnv('VGNAME') - dss_home = EnvUtil.getEnv('DSS_HOME') - inst_id = DssInst.get_dss_id_from_key(dss_home) - dss_nodes_list = DssConfig.get_value_b64_handler( - 'dss_nodes_list', self.dss_config, action='decode') - cfg_context = DssInst.get_dms_url(dss_nodes_list) - - # when use one private vg for xlog, vgname should get from inst_id=0 - pri_vgname = DssInst.get_private_vgname_by_ini(dss_home, inst_id) - cmd += " -n --vgname=\"{}\" --enable-dss --dms_url=\"{}\" -I {}" \ - " --socketpath=\"{}\"".format( - "+{},+{}".format(vgname, pri_vgname), cfg_context, inst_id, - "UDS:{}/.dss_unix_d_socket".format(dss_home)) - if (self.dorado_cluster_mode != ""): - cmd += " --enable-ss-dorado" - self.logger.debug("Command for initializing database " - "node instance: %s" % cmd) - status, output = CmdUtil.retryGetstatusoutput( - cmd, retry_time=0 if self.dss_mode else 3) - if (status != 0): - raise Exception(ErrorCode.GAUSS_516["GAUSS_51615"] + - " Command:%s. Error:\n%s" % (cmd, output)) - # set ssl to DB nodes. + + if self.dss_mode: + raise Exception("Cantian 初始化暂不支持 DSS 共享存储模式。") + + self.init_cantian() + dnGucParas = self.getDnGUCDict() self.setGucConfig(dnGucParas) + self.copyAndModCertFiles() def getInstanceNodeName(self): @@ -330,6 +546,41 @@ class DN_OLAP(Kernel): self.logger.debug("DN parameter value is : {0}".format(tmp_dn_dict)) return tmp_dn_dict + def getCantianDict(self) -> dict: + """ + 写入 cantiand.ini 的 3 个字段 + """ + return { + "LSNR_ADDR": self.instInfo.listenIps[0], + "LSNR_PORT": str(self.instInfo.port), + "SS_INSTANCE_ID": self.calc_ss_instance_id(), + } + + def get_ss_inter_url(self) -> str: + """ + 例:0|10.1.1.1|1611,1|10.1.1.2|1611,2|10.1.1.3|1611 + """ + user = UserUtil.getUserInfo()["name"] + cluster = dbClusterInfo() + cluster.initFromStaticConfig(user) + + all_dn = sorted( + (dn for node in cluster.dbNodes for dn in node.datanodes), + key=lambda x: x.instanceId, + ) + + if not all_dn: + raise Exception("No datanodes found in cluster configuration.") + + default_port = 1611 + + parts = [] + for idx, dn in enumerate(all_dn): + ip = dn.listenIps[0] + parts.append(f"{idx}|{ip}|{default_port}") + + return ",".join(parts) + def getPrivateGucParamList(self): """ function : Get the private guc parameter list. @@ -491,70 +742,6 @@ class DN_OLAP(Kernel): else: guc_dict["max_connections"] = 1000 - def setPghbaConfig(self, clusterAllIpList, try_reload=False, float_ips=None): - """ - """ - principal = None - if DefaultValue.checkKerberos(EnvUtil.getMpprcFile()): - - (status, output) = \ - GrepUtil.getGrepValue("-Er", "^default_realm", - os.path.join(os.path.dirname( - EnvUtil.getMpprcFile()), - DefaultValue.FI_KRB_CONF)) - if status != 0: - raise Exception(ErrorCode.GAUSS_502["GAUSS_50222"] % - "krb5.conf" + "Error:\n%s" % output) - principal = output.split("=")[1].strip() - - # build ip string list - # Every 1000 records merged into one - i = 0 - guc_paras_str = "" - GUCParasStrList = [] - pg_user = ClusterUser.get_pg_user() - for ip_address in clusterAllIpList: - i += 1 - subnet_length = NetUtil.get_submask_len(ip_address) - # Set the initial user and initial database access permissions - if principal is None: - if ip_address.startswith("floatIp"): - guc_paras_str += "-h \"host all all %s/%s %s\" " % \ - (float_ips[ip_address], subnet_length, METHOD_SHA) - else: - guc_paras_str += "-h \"host all %s %s/%s %s\" " % \ - (pg_user, ip_address, subnet_length, METHOD_TRUST) - guc_paras_str += "-h \"host all all %s/%s %s\" " % \ - (ip_address, subnet_length, METHOD_SHA) - else: - if ip_address.startswith("floatIp"): - guc_paras_str += "-h \"host all all %s/%s %s\" " % \ - (float_ips[ip_address], subnet_length, METHOD_SHA) - else: - guc_paras_str += "-h \"host all %s %s/%s gss include_realm=1 " \ - " krb_realm=%s\" " % (pg_user, ip_address, subnet_length, principal) - guc_paras_str += "-h \"host all all %s/%s %s\" " % \ - (ip_address, subnet_length, METHOD_SHA) - if (i % MAX_PARA_NUMBER == 0): - GUCParasStrList.append(guc_paras_str) - i = 0 - guc_paras_str = "" - streaming_dn_ips = self.get_streaming_relate_dn_ips(self.instInfo) - if streaming_dn_ips: - for dn_ip in streaming_dn_ips: - subnet_length = NetUtil.get_submask_len(ip_address) - guc_paras_str += "-h \"host all %s %s/%s %s\" " \ - % (pg_user, dn_ip, subnet_length, METHOD_TRUST) - guc_paras_str += "-h \"host all all %s/%s %s\" " \ - % (dn_ip, subnet_length, METHOD_SHA) - ip_segment = NetUtil.get_ip_cidr_segment(dn_ip) - guc_paras_str += "-h \"host replication all %s sha256\" " % ip_segment - - if (guc_paras_str != ""): - GUCParasStrList.append(guc_paras_str) - - for parasStr in GUCParasStrList: - self.doGUCConfig("set", parasStr, True, try_reload=try_reload) """ Desc: diff --git a/script/gspylib/component/Kernel/Kernel.py b/script/gspylib/component/Kernel/Kernel.py index 8b6b3c16..86535df8 100644 --- a/script/gspylib/component/Kernel/Kernel.py +++ b/script/gspylib/component/Kernel/Kernel.py @@ -20,6 +20,7 @@ import os import subprocess import re import pwd +import time import json sys.path.append(sys.path[0] + "/../../../") @@ -35,6 +36,7 @@ from domain_utils.cluster_os.cluster_user import ClusterUser from base_utils.os.net_util import NetUtil MAX_PARA_NUMBER = 1000 +TIMR_OUT = 60 class Kernel(BaseComponent): @@ -67,79 +69,61 @@ class Kernel(BaseComponent): start/stop/query single instance """ - def start(self, time_out=DefaultValue.TIMEOUT_CLUSTER_START, - security_mode="off", cluster_number=None, is_dss_mode=False): + def start(self, is_dss_mode: bool = False): """ + 启动 Cantian 实例 + • 普通模式:后台执行 `cantiand open -D ` + • DSS 模式目前跳过 """ - if cluster_number: - cmd = "%s/gs_ctl start -o '-u %s' -D %s " % ( - self.binPath, int(float(cluster_number) * 1000), - self.instInfo.datadir) - else: - cmd = "%s/gs_ctl start -D %s " % ( - self.binPath, self.instInfo.datadir) - if not is_dss_mode and self.instInfo.instanceType == DefaultValue.MASTER_INSTANCE: - if len(self.instInfo.peerInstanceInfos) > 0: - cmd += "-M primary" - elif not is_dss_mode and self.instInfo.instanceType == DefaultValue.CASCADE_STANDBY: - cmd += "-M cascade_standby" - elif not is_dss_mode and self.instInfo.instanceType == DefaultValue.STANDBY_INSTANCE: - cmd += "-M standby" - if time_out is not None: - cmd += " -t %s" % time_out - if security_mode == "on": - cmd += " -o \'--securitymode\'" - configFile = "%s/postgresql.conf" % self.instInfo.datadir - output = FileUtil.readFile(configFile, "logging_collector") - value = None - for line in output: - line = line.split('#')[0].strip() - if line.find('logging_collector') >= 0 and line.find('=') > 0: - value = line.split('=')[1].strip() - break - if value == "off": - cmd += " >/dev/null 2>&1" - self.logger.debug("start cmd = %s" % cmd) - (status, output) = subprocess.getstatusoutput(cmd) - if status != 0 or re.search("start failed", output): - raise Exception(ErrorCode.GAUSS_516["GAUSS_51607"] % "instance" - + " Error: Please check the gs_ctl log for " - "failure details." + "\n" + output) - if value == "off": - output = "[BACKEND] WARNING: The parameter logging_collector is " \ - "set to off. The log will not be recorded to file. " \ - "Please check any error manually." - self.logger.log(output) - - def stop(self, stopMode="", time_out=300): - """ - """ - cmd = "%s/gs_ctl stop -D %s " % ( - self.binPath, self.instInfo.datadir) - if not self.isPidFileExist(): - cmd += " -m immediate" - else: - # check stop mode - if stopMode != "": - cmd += " -m %s" % stopMode - cmd += " -t %s" % time_out - self.logger.debug("stop cmd = %s" % cmd) - (status, output) = subprocess.getstatusoutput(cmd) + if is_dss_mode: + self.logger.debug("DSS mode start skipped (not yet supported).") + return + + data_dir = self.instInfo.datadir + + cmd = f"nohup cantiand open -D {data_dir} > /dev/null 2>&1 &" + self.logger.debug(f"start cmd = {cmd}") + + status, out = subprocess.getstatusoutput(cmd) if status != 0: - raise Exception(ErrorCode.GAUSS_516["GAUSS_51610"] % - "instance" + " Error: \n%s." % output) - if output.find("No such process") > 0: - cmd = "ps c -eo pid,euid,cmd | grep gaussdb | grep -v grep | " \ - "awk '{if($2 == curuid && $1!=\"-n\") " \ - "print \"/proc/\"$1\"/cwd\"}' curuid=`id -u`|" \ - " xargs ls -l |awk '{if ($NF==\"%s\") print $(NF-2)}' | " \ - "awk -F/ '{print $3 }'" % (self.instInfo.datadir) - (status, rightpid) = subprocess.getstatusoutput(cmd) - if rightpid and rightpid.find("Permission denied") > -1: - self.logger.debug("stop success with query process %s" % output) + raise Exception(ErrorCode.GAUSS_516["GAUSS_51607"] % "Cantian instance" + + "\nError: " + out) + + for _ in range(TIMR_OUT): + st, pid, _ = subprocess.getstatusoutput( + f"pgrep -f \"cantiand[^ ]* open -D {data_dir}\"" + ) + if st == 0 and pid.strip(): + self.logger.debug(f"Cantian running, pid={pid.strip()}") + self.logger.log("Cantian instance start success.") return - if rightpid or status != 0: - GaussLog.exitWithError(output) + time.sleep(1) + + # 若超时 + raise Exception(ErrorCode.GAUSS_516["GAUSS_51607"] % "Cantian instance" + + " Error: process did not come up within 60 seconds.") + + def stop(self): + """ + 停止 Cantian 实例: + 通过 `pidof cantiand` + `grep memdir` 找出进程后执行 kill -9 + """ + memdir = f"/dev/shm/gaussdb/{self.instInfo.instanceId}" + find_cmd = f"ps -ef | grep cantiand | grep '{memdir}' | grep -v grep | awk '{{print $2}}'" + status, pids = subprocess.getstatusoutput(find_cmd) + + if status != 0 or not pids.strip(): + self.logger.log("No Cantian process found for this instance; skip stop.") + return + + # 执行 kill -9 stop cantian 进程 + for pid in pids.strip().splitlines(): + kill_cmd = f"kill -9 {pid}" + self.logger.debug(f"Stopping Cantian instance: {kill_cmd}") + status, output = subprocess.getstatusoutput(kill_cmd) + if status != 0: + raise Exception(f"Failed to kill Cantian process {pid}:\n{output}") + self.logger.log(f"Cantian process {pid} killed.") def isPidFileExist(self): pidFile = "%s/postmaster.pid" % self.instInfo.datadir @@ -428,68 +412,50 @@ class Kernel(BaseComponent): return tempCommonDict - def doGUCConfig(self, action, GUCParasStr, isHab=False, try_reload=False): + def setGucConfig(self, paraDict=None, setMode="set"): """ + 将安装 XML / 默认模板参数写入/cfg/cantiand.ini.tmp """ - # check instance data directory - if (self.instInfo.datadir == "" or not os.path.exists( - self.instInfo.datadir)): - raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % - ("data directory of the instance[%s]" % - str(self.instInfo))) + ini_path = os.path.join( + self.instInfo.datadir, "cfg", "cantiand.ini.tmp") + self.updateConfig(ini_path, paraDict or {}) - if (GUCParasStr == ""): - return + def updateConfig(self, ini_path: str, kv: dict) -> None: + """ + 把 kv 写入 ini_path + 1. 读取原文件;遇到相同 key --> 覆盖整行 + 2. 新 key 追加到末尾 + """ + if not os.path.isfile(ini_path): + raise Exception(f"{ini_path} not found") - # check conf file - if (isHab == True): - configFile = "%s/pg_hba.conf" % self.instInfo.datadir - else: - configFile = "%s/postgresql.conf" % self.instInfo.datadir - if (not os.path.exists(configFile)): - raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % configFile) - - if try_reload: - cmd_reload = "%s/gs_guc %s -D %s %s " % (self.binPath, 'reload', - self.instInfo.datadir, GUCParasStr) - status, output = CmdUtil.retryGetstatusoutput(cmd_reload, 3, 3) - if status != 0: - self.logger.log("Failed to reload guc params with commander:[%s]" % cmd_reload) + lines = [ln.rstrip("\n") for ln in FileUtil.readFile(ini_path)] + touched = set() + new_ln = [] + + for line in lines: + raw = line.lstrip() + if raw.startswith("#") or "=" not in raw: + new_ln.append(line) + continue + + key, _, _ = raw.partition("=") + key = key.strip() + if key in kv: + indent = line[:line.find(key)] + new_ln.append(f"{indent}{key} = {kv[key]}") + touched.add(key) else: - self.logger.log("Successfully to reload guc params with commander:[%s]" - % cmd_reload) - return - cmd = "%s/gs_guc %s -D %s %s " % (self.binPath, action, - self.instInfo.datadir, GUCParasStr) - self.logger.debug("gs_guc command is: {0}".format(cmd)) - (status, output) = CmdUtil.retryGetstatusoutput(cmd, 3, 3) - if (status != 0): - raise Exception(ErrorCode.GAUSS_500["GAUSS_50007"] % "GUC" + - " Command: %s. Error:\n%s" % (cmd, output)) - - def setGucConfig(self, paraDict=None, setMode='set'): - """ - """ - i = 0 - guc_paras_str = "" - guc_paras_str_list = [] - if paraDict is None: - paraDict = {} - for paras in paraDict: - i += 1 - value = str(paraDict[paras]) - if (paras.startswith('dcf') and paras.endswith(('path', 'config'))): - value = "'%s'" % value - guc_paras_str += " -c \"%s=%s\" " % (paras, value) - if (i % MAX_PARA_NUMBER == 0): - guc_paras_str_list.append(guc_paras_str) - i = 0 - guc_paras_str = "" - if guc_paras_str != "": - guc_paras_str_list.append(guc_paras_str) - - for parasStr in guc_paras_str_list: - self.doGUCConfig(setMode, parasStr, False) + new_ln.append(line) + + # 追加新增 + for k, v in kv.items(): + if k not in touched: + new_ln.append(f"{k} = {v}") + + FileUtil.writeFile(ini_path, new_ln, "w") + FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, ini_path) + self.logger.debug(f"Rewrite {ini_path} with {len(kv)} keys.") def get_streaming_relate_dn_ips(self, instance): """ @@ -526,25 +492,3 @@ class Kernel(BaseComponent): dn_ip_list.append(node_ip) self.logger.debug("Got streaming cluster pg_hba ips %s." % dn_ip_list) return dn_ip_list - - def removeIpInfoOnPghbaConfig(self, ipAddressList): - """ - """ - i = 0 - GUCParasStr = "" - GUCParasStrList = [] - pg_user = ClusterUser.get_pg_user() - for ipAddress in ipAddressList: - i += 1 - submask_length = NetUtil.get_submask_len(ipAddress) - GUCParasStr += " -h \"host all all %s/%s\"" % (ipAddress, submask_length) - GUCParasStr += " -h \"host all %s %s/%s\"" % (pg_user, ipAddress, submask_length) - if i * 2 % MAX_PARA_NUMBER == 0: - GUCParasStrList.append(GUCParasStr) - i = 0 - GUCParasStr = "" - if (GUCParasStr != ""): - GUCParasStrList.append(GUCParasStr) - - for parasStr in GUCParasStrList: - self.doGUCConfig("set", parasStr, True) diff --git a/script/impl/install/InstallImpl.py b/script/impl/install/InstallImpl.py index 4ca7a21d..e5750e2b 100644 --- a/script/impl/install/InstallImpl.py +++ b/script/impl/install/InstallImpl.py @@ -20,6 +20,7 @@ import os import sys import re +import subprocess import socket sys.path.append(sys.path[0] + "/../../") @@ -616,71 +617,34 @@ class InstallImpl: def startCluster(self): """ - function: start cluster - input : NA - output: NA + Cantian 单节点启动 """ - # Start cluster applications - if DefaultValue.get_cm_server_num_from_static(self.context.clusterInfo) == 0: - self.context.logger.debug("No CM configuration, start cluster with openGauss om.") - else: - self.context.logger.debug("Start Cluster with cm_ctl tool.") - self.cm_start_cluster() - return - cmd = "source %s;" % self.context.mpprcFile - cmd += "%s -t %s -U %s -X %s -R %s -c %s -l %s %s" % ( - OMCommand.getLocalScript("Local_Install"), - ACTION_START_CLUSTER, - self.context.user + ":" + self.context.group, - self.context.xmlFile, - self.context.clusterInfo.appPath, - self.context.clusterInfo.name, self.context.localLog, - self.getCommandOptions()) - self.context.logger.debug("Command for start cluster: %s" % cmd) - CmdExecutor.execCommandWithMode( - cmd, - self.context.sshTool, - self.context.isSingle or self.context.localMode, - self.context.mpprcFile) - cmd = "source %s;" % self.context.mpprcFile - cmd += "%s -t %s -U %s -X %s -R %s -c %s -l %s %s" % ( - OMCommand.getLocalScript("Local_Install"), - ACTION_BUILD_STANDBY, - self.context.user + ":" + self.context.group, - self.context.xmlFile, - self.context.clusterInfo.appPath, - self.context.clusterInfo.name, self.context.localLog, - self.getCommandOptions()) - self.context.logger.debug("Command for build standby: %s" % cmd) + logger = self.context.logger + logger.debug("==> Enter startCluster (Cantian single-node)") + + ct_data = EnvUtil.getEnv("PGDATA") + if not ct_data: + logger.debug("PGDATA environment variable is not defined.") + ct_data = "/opt/openGauss/install/data/dn1/" + + logger.debug("PGDATA = %s", ct_data) + + cmd = ( + "source %s; " + "bash -c 'nohup cantiand open -D $PGDATA > /dev/null 2>&1 &'" + % self.context.mpprcFile + ) + logger.debug("start cmd = %s", cmd) CmdExecutor.execCommandWithMode( cmd, self.context.sshTool, self.context.isSingle or self.context.localMode, - self.context.mpprcFile) + self.context.mpprcFile + ) - # build casecade stand by - cmd = "source %s;" % self.context.mpprcFile - cmd += "%s -t %s -U %s -X %s -R %s -c %s -l %s %s" % ( - OMCommand.getLocalScript("Local_Install"), - ACTION_BUILD_CASCADESTANDBY, - self.context.user + ":" + self.context.group, - self.context.xmlFile, - self.context.clusterInfo.appPath, - self.context.clusterInfo.name, self.context.localLog, - self.getCommandOptions()) - self.context.logger.debug("Command for build cascade standby: %s" % cmd) - for hostname in self.context.sshTool.hostNames: - CmdExecutor.execCommandWithMode( - cmd, - self.context.sshTool, - self.context.isSingle or self.context.localMode, - self.context.mpprcFile, [hostname]) - if DefaultValue.get_cm_server_num_from_static( - self.context.clusterInfo) == 0: - # no cm - OMCommand.wait_for_normal(self.context.logger, self.context.user) - - self.context.logger.log("Successfully started cluster.") + OMCommand.wait_for_cantiand(self.context.logger, self.context.user) + + logger.log("Successfully started Cantian cluster.") def doStart(self): """ diff --git a/script/impl/install/OLAP/InstallImplOLAP.py b/script/impl/install/OLAP/InstallImplOLAP.py index 7d8a544b..559d5cc8 100644 --- a/script/impl/install/OLAP/InstallImplOLAP.py +++ b/script/impl/install/OLAP/InstallImplOLAP.py @@ -321,7 +321,8 @@ class InstallImplOLAP(InstallImpl): if self.context.clusterInfo.float_ips: self.config_cm_res_json() self.updateInstanceConfig() - self.updateHbaConfig() + # cantian 不需要,后面删除 + # self.updateHbaConfig() def checkMemAndCores(self): """ diff --git a/script/impl/preinstall/PreinstallImpl.py b/script/impl/preinstall/PreinstallImpl.py index 7630a7d0..b366ba8b 100644 --- a/script/impl/preinstall/PreinstallImpl.py +++ b/script/impl/preinstall/PreinstallImpl.py @@ -1693,6 +1693,40 @@ class PreinstallImpl: cmd += (' --env ' + self.context.mpprcFile) os.system(cmd) + def prepareMountSudoers(self): + """ + Add NOPASSWD sudo for mount / umount / mkdir on every node. + """ + if not self.context.current_user_root: + self.context.logger.debug("prepareMountSudoers skipped (not root).") + return + + sudo_line = ( + f"{self.context.user} ALL=(root) NOPASSWD:/bin/mount,/bin/umount," + f"/bin/mkdir,/bin/chown" + ) + sudo_file = f"/etc/sudoers.d/{self.context.user}_mount" + + shell_cmd = ( + f"echo '{sudo_line}' > {sudo_file} && " + f"chmod 440 {sudo_file} && " + f"visudo -cf {sudo_file}" + ) + + self.context.logger.debug(f"prepareMountSudoers CMD = {shell_cmd}") + + self.context.sshTool.executeCommand( + shell_cmd, + DefaultValue.SUCCESS, + [], + self.context.mpprcFile + ) + + self.context.logger.log( + f"NOPASSWD sudo for mount/umount written to {sudo_file} on all nodes.", + "constant" + ) + def doPreInstall(self): """ function: the main process of preinstall @@ -1719,6 +1753,8 @@ class PreinstallImpl: self.distributePackages() # create user and exchange keys for database user self.createOSUser() + # allow mount/umount without password + self.prepareMountSudoers() # set tool env on all host self.set_tool_env() # prepare sshd service for user. @@ -1765,7 +1801,8 @@ class PreinstallImpl: # set arm optimization self.setArmOptimization() # fix server package mode - self.fixServerPackageOwner() + # self.fixServerPackageOwner() + # cantian 暂时忽略 # unreg the disk of the dss and about self.dss_init() # set user cron diff --git a/script/local/CheckConfig.py b/script/local/CheckConfig.py index 0f4357fc..c6b24e5c 100644 --- a/script/local/CheckConfig.py +++ b/script/local/CheckConfig.py @@ -180,7 +180,8 @@ class CheckNodeEnv(LocalBaseOM): FileUtil.getchangeFileModeCmd(user_dir) self.logger.debug("Command to change the obs log setting.") # change the obs log setting file distribute package - self.changeObsLogSetting() + # 容灾需要 + # self.changeObsLogSetting() except Exception as e: self.logger.logExit(str(e)) diff --git a/script/local/ConfigHba.py b/script/local/ConfigHba.py index bae1932d..2fa37722 100644 --- a/script/local/ConfigHba.py +++ b/script/local/ConfigHba.py @@ -226,15 +226,6 @@ class ConfigHba(LocalBaseOM): except Exception as e: raise Exception(str(e)) - def remove_streaming_config(self, component): - """ - remove dn & cn pg_hba for streaming stop - """ - ip_segment_list = list(set([NetUtil.get_ip_cidr_segment(remove_ip) for remove_ip in self.removeIps])) - for ip_segment in ip_segment_list: - ip_remove_str = "-h \"host replication all %s\" " % ip_segment - component.doGUCConfig("set", ip_remove_str, True) - def __configAnInstance(self, component): """ function: set hba config for single component @@ -260,13 +251,6 @@ class ConfigHba(LocalBaseOM): self.logger.debug("The %s does not exist." % hbaFile) return - component.setPghbaConfig(self.allIps, try_reload=self.try_reload, - float_ips=self.clusterInfo.float_ips) - if len(self.removeIps) != 0: - component.removeIpInfoOnPghbaConfig(self.removeIps) - self.remove_streaming_config(component) - - if __name__ == '__main__': """ function: config database node instance hba.conf diff --git a/script/local/Install.py b/script/local/Install.py index 9da21958..5f1ed249 100644 --- a/script/local/Install.py +++ b/script/local/Install.py @@ -496,16 +496,16 @@ class Install(LocalBaseOM): FileUtil.cpFile(dirName + "/../../script", self.installPath + "/bin/") - # cp $GAUSSHOME/bin/script/gspylib/etc/sql/pmk to /share/postgresql - destPath = self.installPath + "/share/postgresql/" - pmkPath = self.installPath + "/bin/script/gspylib/etc/sql/" - pmkFile = pmkPath + "pmk_schema.sql" - if os.path.exists(pmkFile): - FileUtil.cpFile(pmkFile, destPath) - - pmk_singe_inst_file = pmkPath + "pmk_schema_single_inst.sql" - if os.path.exists(pmk_singe_inst_file): - FileUtil.cpFile(pmk_singe_inst_file, destPath) + # # cp $GAUSSHOME/bin/script/gspylib/etc/sql/pmk to /share/postgresql + # destPath = self.installPath + "/share/postgresql/" + # pmkPath = self.installPath + "/bin/script/gspylib/etc/sql/" + # pmkFile = pmkPath + "pmk_schema.sql" + # if os.path.exists(pmkFile): + # FileUtil.cpFile(pmkFile, destPath) + # + # pmk_singe_inst_file = pmkPath + "pmk_schema_single_inst.sql" + # if os.path.exists(pmk_singe_inst_file): + # FileUtil.cpFile(pmk_singe_inst_file, destPath) # decompress CM package self.decompress_cm_package() @@ -698,10 +698,6 @@ class Install(LocalBaseOM): FileUtil.changeMode(DefaultValue.KEY_DIRECTORY_MODE, libPath, True) sharePath = "'%s'/share" % self.installPath FileUtil.changeMode(DefaultValue.KEY_DIRECTORY_MODE, sharePath, True) - etcPath = "'%s'/etc" % self.installPath - FileUtil.changeMode(DefaultValue.KEY_DIRECTORY_MODE, etcPath, True) - includePath = "'%s'/include" % self.installPath - FileUtil.changeMode(DefaultValue.KEY_DIRECTORY_MODE, includePath, True) tarFile = "'%s'/bin/'%s'" % (self.installPath, PackageInfo.get_package_back_name()) @@ -721,12 +717,8 @@ class Install(LocalBaseOM): # bin config file FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, "'%s'/bin/alarmItem.conf" % self.installPath) - FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, - "'%s'/bin/cluster_guc.conf" % self.installPath) FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, "'%s'/bin/upgrade_version" % self.installPath) - FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, - "'%s'/bin/retry_errcodes.conf" % self.installPath) FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, "'%s'/bin/cluster_static_config" % self.installPath) diff --git a/script/local/LocalCheckOS.py b/script/local/LocalCheckOS.py index 8c89d709..2e189b69 100644 --- a/script/local/LocalCheckOS.py +++ b/script/local/LocalCheckOS.py @@ -31,27 +31,30 @@ from datetime import datetime localDirPath = os.path.dirname(os.path.realpath(__file__)) -sys.path.append(sys.path[0] + "/../") -from gspylib.common.ParameterParsecheck import Parameter -from os_platform.UserPlatform import g_Platform -from gspylib.common.GaussLog import GaussLog -from gspylib.common.DbClusterInfo import dbClusterInfo -from gspylib.common.Common import DefaultValue -from gspylib.common.ErrorCode import ErrorCode -from base_utils.os.cmd_util import CmdUtil -from domain_utils.cluster_file.config_param import ConfigParam -from base_utils.os.disk_util import DiskUtil -from domain_utils.cluster_file.version_info import VersionInfo -from base_utils.os.net_util import NetUtil -from domain_utils.domain_common.cluster_constants import ClusterConstants -from os_platform.linux_distro import LinuxDistro -from os_platform.common import SUPPORT_RHEL6X_VERSION_LIST, \ - SUPPORT_RHEL7X_VERSION_LIST, SUPPORT_SUSE12X_VERSION_LIST, \ - SUPPORT_SUSE11X_VERSION_LIST, SUPPORT_RHEL8X_VERSION_LIST, \ - SUPPORT_RHEL_LEAST_VERSION - -sys.path.insert(0, localDirPath + "/../../lib") -import psutil +try: + sys.path.append(sys.path[0] + "/../") + from gspylib.common.ParameterParsecheck import Parameter + from os_platform.UserPlatform import g_Platform + from gspylib.common.GaussLog import GaussLog + from gspylib.common.DbClusterInfo import dbClusterInfo + from gspylib.common.Common import DefaultValue + from gspylib.common.ErrorCode import ErrorCode + from base_utils.os.cmd_util import CmdUtil + from domain_utils.cluster_file.config_param import ConfigParam + from base_utils.os.disk_util import DiskUtil + from domain_utils.cluster_file.version_info import VersionInfo + from base_utils.os.net_util import NetUtil + from domain_utils.domain_common.cluster_constants import ClusterConstants + from os_platform.linux_distro import LinuxDistro + from os_platform.common import SUPPORT_RHEL6X_VERSION_LIST, \ + SUPPORT_RHEL7X_VERSION_LIST, SUPPORT_SUSE12X_VERSION_LIST, \ + SUPPORT_SUSE11X_VERSION_LIST, SUPPORT_RHEL8X_VERSION_LIST, \ + SUPPORT_RHEL_LEAST_VERSION + + sys.path.insert(0, localDirPath + "/../../lib") + import psutil +except ImportError as err: + sys.exit("[GAUSS-52200]: Cannot import module: %s."% str(err)) ACTION_CHECK_OS_VERSION = "Check_OS_Version" ACTION_CHECK_KERNEL_VERSION = "Check_Kernel_Version" diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py index c6d3a8b5..82834e96 100644 --- a/script/local/PreInstallUtility.py +++ b/script/local/PreInstallUtility.py @@ -1825,15 +1825,16 @@ Common options: output: True #need to set cgroup False #not need to set cgroup """ - #Determine whether action is expansion. - hostName = NetUtil.GetHostIpOrName() - if len(self.clusterInfo.newNodes) == 0: - return True - #Determine whether the current node is a new node - for node in self.clusterInfo.newNodes: - if hostName == node.name: - return True - self.logger.debug("The current node is the old node for expansion, no need to set cgroup.") + # cantian 暂时不支持cgroup + # #Determine whether action is expansion. + # hostName = NetUtil.GetHostIpOrName() + # if len(self.clusterInfo.newNodes) == 0: + # return True + # #Determine whether the current node is a new node + # for node in self.clusterInfo.newNodes: + # if hostName == node.name: + # return True + # self.logger.debug("The current node is the old node for expansion, no need to set cgroup.") return False def decompressPkg2Cgroup(self): -- Gitee From 08516b67be6d0299831b693ea2997bae9921be3f Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 24 May 2025 10:30:41 +0800 Subject: [PATCH 002/144] cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM --- script/base_utils/template/cluster_tmp.xml | 6 +- script/base_utils/template/resource_en.json | 6 +- script/base_utils/template/resource_zh.json | 6 +- script/base_utils/template/xml_constant.py | 6 +- script/gspylib/common/Common.py | 31 ++- script/gspylib/common/DbClusterInfo.py | 74 ++++-- script/gspylib/common/OMCommand.py | 52 ++-- script/gspylib/component/Kernel/Kernel.py | 275 ++++++++++--------- script/impl/install/InstallImpl.py | 31 +-- script/impl/om/OLAP/OmImplOLAP.py | 276 ++++++++++++-------- script/local/PreInstallUtility.py | 3 + 11 files changed, 443 insertions(+), 323 deletions(-) diff --git a/script/base_utils/template/cluster_tmp.xml b/script/base_utils/template/cluster_tmp.xml index 90872ca5..5a78220e 100644 --- a/script/base_utils/template/cluster_tmp.xml +++ b/script/base_utils/template/cluster_tmp.xml @@ -12,9 +12,9 @@ - - - + + + diff --git a/script/base_utils/template/resource_en.json b/script/base_utils/template/resource_en.json index 301e3681..774a585c 100644 --- a/script/base_utils/template/resource_en.json +++ b/script/base_utils/template/resource_en.json @@ -14,9 +14,9 @@ "input_ddes": "Please enter 1/2 for selection, the default option is 1) Do not deploy", "intput_dss_home": "Please enter dss_home dir(default:/opt/openGauss/install/dss_home)", "intput_ss_dss_vg_name": "Please enter 'ss_dss_vg_name'(default:data)", - "input_voting_disk_path": "Please enter the path information for 'votingDiskPath'(default:/dev/sde)", - "input_dss_vg_info": "Please enter the path information for 'dss_vg_info'(default:data:/dev/sdb,p0:/dev/sdc,p1:/dev/sdd)", - "input_share_disk_dir": "Please enter the path information for 'shareDiskDir'(default:/dev/sdf)", + "input_voting_disk_path": "Please enter the path information for 'votingDiskPath'(default:/dev/sdc)", + "input_dss_vg_info": "Please enter the path information for 'dss_vg_info'(default:data:/dev/sda,p0:/dev/sdb)", + "input_share_disk_dir": "Please enter the path information for 'shareDiskDir'(default:/dev/sdd)", "deploy": "Deploy", "not_deploy": "Do not deploy", diff --git a/script/base_utils/template/resource_zh.json b/script/base_utils/template/resource_zh.json index 8b4b0b04..82c12835 100644 --- a/script/base_utils/template/resource_zh.json +++ b/script/base_utils/template/resource_zh.json @@ -14,9 +14,9 @@ "input_ddes": "请输入 1/2 进行选择,默认选项是 1)不部署", "intput_dss_home": "请输入'dss_home'的路径信息(默认:/opt/openGauss/install/dss_home)", "intput_ss_dss_vg_name": "请输入'ss_dss_vg_name'的路径信息(默认:data)", - "input_voting_disk_path": "请输入'votingDiskPath'的路径信息(默认是:/dev/sde)", - "input_dss_vg_info": "请输入'dss_vg_info'的路径信息(默认是:data:/dev/sdb,p0:/dev/sdc,p1:/dev/sdd)", - "input_share_disk_dir": "请输入'shareDiskDir'的路径信息(默认是:/dev/sdf)", + "input_voting_disk_path": "请输入'votingDiskPath'的路径信息(默认是:/dev/sdc)", + "input_dss_vg_info": "请输入'dss_vg_info'的路径信息(默认是:data:/dev/sda,p0:/dev/sdb)", + "input_share_disk_dir": "请输入'shareDiskDir'的路径信息(默认是:/dev/sdd)", "deploy": "部署", "not_deploy": "不部署", diff --git a/script/base_utils/template/xml_constant.py b/script/base_utils/template/xml_constant.py index 211fff37..756f2fbe 100644 --- a/script/base_utils/template/xml_constant.py +++ b/script/base_utils/template/xml_constant.py @@ -45,11 +45,11 @@ class XmlConstant: DSS_VG_NAME_DIR = "data" - DSS_VG_INFO_DIR = "data:/dev/sdb,p0:/dev/sdc,p1:/dev/sdd" + DSS_VG_INFO_DIR = "data:/dev/sda,p0:/dev/sdb" - VOTING_DIR = "/dev/sde" + VOTING_DIR = "/dev/sdc" - SHAREDISK_DIR = "/dev/sdf" + SHAREDISK_DIR = "/dev/sdd" @staticmethod def get_current_dir(): diff --git a/script/gspylib/common/Common.py b/script/gspylib/common/Common.py index 4e993dd9..f94e0b6d 100644 --- a/script/gspylib/common/Common.py +++ b/script/gspylib/common/Common.py @@ -1182,17 +1182,28 @@ class DefaultValue(): "umask 077"] # ---------- Cantian‑specific ---------- envList.extend([ - f"export CM_CONFIG_PATH=$PGDATA/cm_config.ini", - f"export CTDB_HOME=$PGDATA", - f"export GSDB_HOME=$PGDATA", - f"export DSS_HOME=$PGDATA/dss", - f"export DSS_DATA_DISK=$DSS_HOME/data", - f"export share_fs=127.0.0.1:share_fs" + "export CM_CONFIG_PATH=$PGDATA/cm_config.ini", + "export CTDB_HOME=$PGDATA", + "export GSDB_HOME=$PGDATA", + "export share_fs=127.0.0.1:share_fs" ]) - if agentPath != '': - envList.append("export AGENTPATH=%s" % agentPath) - if agentLogPath != '': - envList.append("export AGENTLOGPATH=%s" % agentLogPath) + + if "DSS_HOME" in os.environ and os.environ["DSS_HOME"].strip(): + envList.append("export DSS_DATA_DISK=$DSS_HOME/data") + else: + default_dss_home = os.path.abspath( + os.path.join(installPath, os.pardir, "dss_home") + ) + envList.extend([ + f"export DSS_HOME={default_dss_home}", + "export DSS_DATA_DISK=$DSS_HOME/data" + ]) + + if agentPath: + envList.append(f"export AGENTPATH={agentPath}") + if agentLogPath: + envList.append(f"export AGENTLOGPATH={agentLogPath}") + DefaultValue.setComponentEnvVariable(userProfile, envList) diff --git a/script/gspylib/common/DbClusterInfo.py b/script/gspylib/common/DbClusterInfo.py index 77de17b7..0a166170 100644 --- a/script/gspylib/common/DbClusterInfo.py +++ b/script/gspylib/common/DbClusterInfo.py @@ -1320,51 +1320,67 @@ class dbClusterInfo(): def queryClsInfoParallel(self, hostName, sshtools, mpprcFile, querytype): """ - function : queryClsInfoParallel - Query cluster information in parallel. - input : String - output : Map + Query cluster information in parallel. """ + remote_nodes = [n for n in self.dbNodes if n.name != hostName] + node_tool_map = {node.name: sshtools[i] for i, node in enumerate(remote_nodes)} + dbInfoList = [] - index = 0 for dbNode in self.dbNodes: + sshtool = node_tool_map.get(dbNode.name) for dnInst in dbNode.datanodes: - sshtool = None - if (dbNode.name != hostName): - sshtool = sshtools[index] - index += 1 - querycmd = "" + if querytype == "status": - querycmd = "gs_ctl query -D %s" % dnInst.datadir + data_dir = dnInst.datadir + is_primary = (dnInst.instanceType == MASTER_INSTANCE) + role = "primary" if is_primary else "standby" + role_cap = "Primary" if is_primary else "Standby" + + # ctsql 视图查询 + grep 判断 OPEN + sql_cmd = "SELECT NAME, STATUS, OPEN_STATUS FROM DV_DATABASE" + ctsql_cmd = f"ctsql '/' as sysdba -q -D {data_dir} -c \"{sql_cmd}\"" + probe = f"{ctsql_cmd} | grep -Eq '[[:space:]]OPEN[[:space:]]'" + + echo_ok = ( + "echo -e 'instance_state : Normal\\n" + "ha_state : Normal\\n" + f"instance_role : {role}\\n" + f"local_role : {role_cap}\\n" + "db_state : Normal'" + ) + echo_bad = ( + "echo -e 'instance_state : Down\\n" + "ha_state : Unknown\\n" + "instance_role : unknown\\n" + "local_role : Unknown\\n" + "db_state : Down'" + ) + querycmd = f"{probe} && {echo_ok} || {echo_bad}" + elif querytype == "port": - querycmd = "gs_guc check -D %s -c port" % dnInst.datadir - dbName = dbNode.name + querycmd = "echo port : 5432" + dbInfoList.append({ - "name": dbName, + "name": dbNode.name, + "dataDir": dnInst.datadir, "command": querycmd, "sshtool": sshtool }) - def queryInstance(dbInfo): - dnName = dbInfo["name"] - command = dbInfo["command"] - sshtool = dbInfo["sshtool"] - status = 0 - output = "" - - if dnName != hostName: - (statusMap, output) = sshtool.getSshStatusOutput( - command, [dnName], mpprcFile) - if statusMap[dnName] != 'Success': - status = -1 + def queryInstance(job): + node, ddir = job["name"], job["dataDir"] + cmd, tool = job["command"], job["sshtool"] + + if tool: + statusMap, output = tool.getSshStatusOutput(cmd, [node], mpprcFile) + status = 0 if statusMap.get(node) == "Success" else 1 else: - (status, output) = subprocess.getstatusoutput(command) + status, output = subprocess.getstatusoutput(cmd) - global_cls_query_rst[dnName+command.split()[-1]] = [status, output] + global_cls_query_rst[node + ddir] = [status, output] global global_cls_query_rst parallelTool.parallelExecute(queryInstance, dbInfoList) - return global_cls_query_rst def queryClsInfo(self, hostName, sshtools, mpprcFile, cmd, logger=None): diff --git a/script/gspylib/common/OMCommand.py b/script/gspylib/common/OMCommand.py index ba674dea..db1ca927 100644 --- a/script/gspylib/common/OMCommand.py +++ b/script/gspylib/common/OMCommand.py @@ -21,6 +21,7 @@ import os import sys import time import subprocess +import re from datetime import datetime, timedelta from multiprocessing.dummy import Pool as ThreadPool @@ -264,27 +265,48 @@ class OMCommand(): os.remove(status_file) @staticmethod - def wait_for_cantiand(logger, datadir, timeout: int = 300, delta: int = 5): + def isDBOpen(datadir: str) -> bool: """ - 等待单节点 cantiand 进入 OPEN 状态 + 通过查询 DV_DATABASE 视图,单次判断数据库是否处于 OPEN 状态。 """ - logger.debug("Waiting for cantiand to reach OPEN state…") + import re - pgrep_cmd = r"pgrep -f 'cantiand.*open'" + sql_cmd = "SELECT NAME, STATUS, OPEN_STATUS FROM DV_DATABASE" + ctsql_cmd = f"ctsql '/' as sysdba -q -D {datadir} -c \"{sql_cmd}\"" - retry_times = 1 if timeout <= 0 else max(1, timeout // delta) + status, output = CmdUtil.retryGetstatusoutput(ctsql_cmd, retry_time=0) + if status != 0 or "1 rows fetched" not in output: + return False - status, _ = CmdUtil.retryGetstatusoutput( - pgrep_cmd, - retry_time=retry_times, - sleep_time=delta - ) + try: + data_line = re.split(r"\n+", output.strip())[-2].strip() + db_status = re.split(r"\s+", data_line)[1].strip().upper() + except Exception: + return False + + return db_status == "OPEN" + + @staticmethod + def wait_for_cantiand(logger, datadir: str, timeout: int = 300, delta: int = 5): + """ + 周期性调用 isDBOpen,直到 cantiand 进入 OPEN 状态或超时。 + - timeout:最大等待秒数(<=0 表示无限等待) + - delta :两次检查间隔秒数 + """ + logger.debug("Waiting for cantiand to reach OPEN state…") + deadline = (time.time() + timeout) if timeout > 0 else float("inf") - if status == 0: - logger.log("cantiand started and is in OPEN state.") - return + while True: + if OMCommand.isDBOpen(datadir): + logger.debug("cantiand started and database is OPEN.") + return + + if time.time() >= deadline: + break + + time.sleep(delta) raise Exception( ErrorCode.GAUSS_528["GAUSS_52800"] % - ("Timeout", "cantiand not started") - ) \ No newline at end of file + ("Timeout", "cantiand not in OPEN state") + ) diff --git a/script/gspylib/component/Kernel/Kernel.py b/script/gspylib/component/Kernel/Kernel.py index 86535df8..dba3f765 100644 --- a/script/gspylib/component/Kernel/Kernel.py +++ b/script/gspylib/component/Kernel/Kernel.py @@ -69,65 +69,128 @@ class Kernel(BaseComponent): start/stop/query single instance """ - def start(self, is_dss_mode: bool = False): + def start(self, is_dss_mode: bool = False, time_out=DefaultValue.TIMEOUT_CLUSTER_START): """ 启动 Cantian 实例 • 普通模式:后台执行 `cantiand open -D ` • DSS 模式目前跳过 """ - if is_dss_mode: - self.logger.debug("DSS mode start skipped (not yet supported).") - return + # dss 需要修改 + # if is_dss_mode: + # self.logger.debug("DSS mode start skipped (not yet supported).") + # return data_dir = self.instInfo.datadir cmd = f"nohup cantiand open -D {data_dir} > /dev/null 2>&1 &" self.logger.debug(f"start cmd = {cmd}") - status, out = subprocess.getstatusoutput(cmd) if status != 0: - raise Exception(ErrorCode.GAUSS_516["GAUSS_51607"] % "Cantian instance" - + "\nError: " + out) + raise Exception(ErrorCode.GAUSS_516["GAUSS_51607"] + % "Cantian instance" + "\nError: " + out) - for _ in range(TIMR_OUT): - st, pid, _ = subprocess.getstatusoutput( - f"pgrep -f \"cantiand[^ ]* open -D {data_dir}\"" - ) - if st == 0 and pid.strip(): - self.logger.debug(f"Cantian running, pid={pid.strip()}") + for _ in range(time_out): + st, _ = subprocess.getstatusoutput( + f"pgrep -f \"cantiand[^ ]* open -D {data_dir}\"") + if st == 0: self.logger.log("Cantian instance start success.") return time.sleep(1) - # 若超时 - raise Exception(ErrorCode.GAUSS_516["GAUSS_51607"] % "Cantian instance" - + " Error: process did not come up within 60 seconds.") + raise Exception(ErrorCode.GAUSS_516["GAUSS_51607"] + % "Cantian instance" + + " Error: timeout waiting for process ready.") - def stop(self): + def stop(self, stopMode: str = "", time_out: int = 300): """ - 停止 Cantian 实例: - 通过 `pidof cantiand` + `grep memdir` 找出进程后执行 kill -9 + 停止 Cantian 实例 """ memdir = f"/dev/shm/gaussdb/{self.instInfo.instanceId}" - find_cmd = f"ps -ef | grep cantiand | grep '{memdir}' | grep -v grep | awk '{{print $2}}'" - status, pids = subprocess.getstatusoutput(find_cmd) - - if status != 0 or not pids.strip(): - self.logger.log("No Cantian process found for this instance; skip stop.") + find_cmd = ( + "ps -eo pid,cmd | grep cantiand | " + f"grep '{memdir}' | grep -v grep | awk '{{print $1}}'" + ) + status, out = CmdUtil.retryGetstatusoutput(find_cmd, retry_time=0) + pids = out.strip().split() if status == 0 and out.strip() else [] + + if not pids: + self.logger.log("No Cantian process found; skip stop.") return - # 执行 kill -9 stop cantian 进程 - for pid in pids.strip().splitlines(): + mode = stopMode or "fast" + if mode == "immediate": + self.logger.debug("Immediate mode; force kill -9.") + else: + pgdata = self.instInfo.datadir + graceful_cmd = ( + f"ctsql '/' as sysdba -q -D {pgdata} -c 'shutdown'" + ) + self.logger.debug(f"Graceful shutdown cmd: {graceful_cmd}") + CmdUtil.retryGetstatusoutput(graceful_cmd, retry_time=0) + + end_time = time.time() + time_out + while time.time() < end_time: + status, out = CmdUtil.retryGetstatusoutput(find_cmd, retry_time=0) + if not out.strip(): + self.logger.log("Cantian instance stopped gracefully.") + return + time.sleep(2) + + self.logger.debug("Graceful shutdown timeout; escalate to kill -9.") + + status, out = CmdUtil.retryGetstatusoutput(find_cmd, retry_time=0) + pids = out.strip().split() if status == 0 and out.strip() else [] + for pid in pids: kill_cmd = f"kill -9 {pid}" - self.logger.debug(f"Stopping Cantian instance: {kill_cmd}") - status, output = subprocess.getstatusoutput(kill_cmd) - if status != 0: - raise Exception(f"Failed to kill Cantian process {pid}:\n{output}") - self.logger.log(f"Cantian process {pid} killed.") + self.logger.debug(f"Force killing Cantian pid {pid}") + st, msg = CmdUtil.retryGetstatusoutput(kill_cmd, retry_time=0) + if st != 0: + raise Exception(ErrorCode.GAUSS_516["GAUSS_51610"] % + "Cantian instance" + f"\nError:\n{msg}") + + status, out = CmdUtil.retryGetstatusoutput(find_cmd, retry_time=0) + if out.strip(): + raise Exception(ErrorCode.GAUSS_516["GAUSS_51610"] % + "Cantian instance" + "\nError: process still alive.") + self.logger.log("Cantian instance stopped (force).") + + def isDbOpen(self, timeout: int = 30, interval: int = 5) -> bool: + """ + Cantian 健康探针(视图版) + 轮询 DV_DATABASE,若 STATUS 列为 OPEN 即返回 True。 + """ + pgdata = self.instInfo.datadir + sql_cmd = "SELECT NAME, STATUS, OPEN_STATUS FROM DV_DATABASE" + ctsql = (f"ctsql '/' as sysdba -q -D {pgdata} " + f"-c \"{sql_cmd}\"") + + expire_ts = time.time() + timeout + while time.time() < expire_ts: + stat, out = CmdUtil.retryGetstatusoutput(ctsql, retry_time=0) + if stat != 0: + self.logger.debug(f"ctsql failed: {out.strip()}") + time.sleep(interval) + continue + + if "1 rows fetched" not in out: + time.sleep(interval) + continue + + try: + row_line = re.split(r"\n+", out.strip())[-2].strip() + db_status = re.split(r"\s+", row_line)[1].strip().upper() + except Exception as err: + self.logger.debug(f"parse DV_DATABASE failed: {err}") + time.sleep(interval) + continue - def isPidFileExist(self): - pidFile = "%s/postmaster.pid" % self.instInfo.datadir - return os.path.isfile(pidFile) + if db_status == "OPEN": + return True + + time.sleep(interval) + + self.logger.debug("Cantiand start timeout") + return False def build(self, buidMode="full", standByBuildTimeout=300): """ @@ -241,102 +304,66 @@ class Kernel(BaseComponent): def cleanDir(self, instDir): """ - function: Clean the dirs - input : instDir - output: NA + 删除实例目录(Cantian + openGauss + DSS) """ - if (not os.path.exists(instDir)): + if not os.path.exists(instDir): return - dataDir = [] dataDir = os.listdir(instDir) - if (os.getuid() == 0): - pglDir = '%s/pg_location' % instDir - isPglDirEmpty = False - if (os.path.exists(pglDir) and len(os.listdir(pglDir)) == 0): - isPglDirEmpty = True - if (len(dataDir) == 0 or isPglDirEmpty): + if os.getuid() == 0: + pglDir = f"{instDir}/pg_location" + isPglDirEmpty = os.path.isdir(pglDir) and len(os.listdir(pglDir)) == 0 + if not dataDir or isPglDirEmpty: FileUtil.cleanDirectoryContent(instDir) - else: - for info in dataDir: - if (str(info) == "pg_location"): - resultMount = [] - resultDir = [] - pglDir = '%s/pg_location' % instDir - - # delete all files in the mount point - cmd = "%s | %s '%s' | %s '{printf $3}'" % \ - (CmdUtil.getMountCmd(), CmdUtil.getGrepCmd(), - pglDir, CmdUtil.getAwkCmd()) - (status, outputMount) = subprocess.getstatusoutput(cmd) - if (status != 0): - raise Exception(ErrorCode.GAUSS_502["GAUSS_50207"] % - instDir + " Error:\n%s." % - str(outputMount) + - "The cmd is %s" % cmd) - else: - if (len(outputMount) > 0): - resultMount = str(outputMount).split() - for infoMount in resultMount: - FileUtil.cleanDirectoryContent(infoMount) - else: - FileUtil.cleanDirectoryContent(instDir) - continue - - # delete file in the pg_location directory - if (not os.path.exists(pglDir)): - continue - cmd = "cd '%s'" % pglDir - (status, output) = subprocess.getstatusoutput(cmd) - if (status != 0): - raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % - cmd + " Error: \n%s " % output) - - outputFile = FileUtil.findFile(".", "f", "type") - if (len(outputFile) > 0): - for infoFile in outputFile: - tmpinfoFile = pglDir + infoFile[1:] - for infoMount in resultMount: - if (tmpinfoFile.find(infoMount) < 0 and - infoMount.find(tmpinfoFile) < 0): - realFile = "'%s/%s'" % (pglDir, infoFile) - FileUtil.removeFile(realFile, "shell") - - # delete directory in the pg_location directory - cmd = "if [ -d '%s' ]; then cd '%s' && find -type d; fi" \ - % \ - (pglDir, pglDir) - (status, outputDir) = subprocess.getstatusoutput(cmd) - if (status != 0): - raise Exception(ErrorCode.GAUSS_502["GAUSS_50207"] % - instDir + " Error:\n%s." % - str(outputDir) + "The cmd is %s" % cmd) - else: - resultDir = FileUtil.findFile(".", "d", "type") - resultDir.remove(".") - if (len(resultDir) > 0): - for infoDir in resultDir: - tmpinfoDir = pglDir + infoDir[1:] - for infoMount in resultMount: - if (tmpinfoDir.find(infoMount) < 0 and - infoMount.find(tmpinfoDir) < 0): - realPath = "'%s/%s'" % ( - pglDir, infoDir) - FileUtil.removeDirectory(realPath) - - ignores = [ - 'pg_location', 'cfg', 'log', 'dss_inst.ini', 'dss_vg_conf.ini', - 'nodedata.cfg', '.', '..' - ] - extra_cmd = '! -name'.join([' \'{}\' '.format(ig) for ig in ignores]) - - cmd = "if [ -d '%s' ];then cd '%s' && find . ! -name %s -print0" \ - " |xargs -r -0 -n100 rm -rf; fi " % (instDir, instDir, extra_cmd) - (status, output) = subprocess.getstatusoutput(cmd) - if (status != 0): + else: + FileUtil.cleanDirectoryContent(instDir) + return + + # ============ omm :处理 pg_location & DSS ============ + for info in dataDir: + if info == "pg_location": + pglDir = f"{instDir}/pg_location" + cmd = "{} | {} '{}' | {} '{{printf $3}}'".format( + CmdUtil.getMountCmd(), CmdUtil.getGrepCmd(), + pglDir, CmdUtil.getAwkCmd()) + status, mp_out = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50207"] % + instDir + f" Error:\n{mp_out}.\nThe cmd is {cmd}") + + for mp in mp_out.split(): + FileUtil.cleanDirectoryContent(mp) + subprocess.call(f"umount -l {mp}", shell=True) + + FileUtil.cleanDirectoryContent(pglDir) + + for ini in ("dss_inst.ini", "dss_vg_conf.ini"): + ini_path = os.path.join(instDir, ini) + if os.path.isfile(ini_path): + with open(ini_path) as fp: + for line in fp: + if "=" in line: + mount_dir = line.split("=", 1)[1].strip() + if mount_dir and os.path.isdir(mount_dir): + FileUtil.cleanDirectoryContent(mount_dir) + subprocess.call(f"umount -l {mount_dir}", shell=True) + + # ============ 删除除 "."、".." 外所有条目 ============= + cmd = ("if [ -d '{0}' ]; then cd '{0}' && " + "find . ! -name '.' ! -name '..' -print0 | " + "xargs -r -0 -n100 rm -rf; fi").format(instDir) + status, output = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50207"] % + instDir + f" Error:\n{output}.\nThe cmd is {cmd}") + + # ============ 最终校验 ============= + if os.listdir(instDir): + FileUtil.cleanDirectoryContent(instDir) + if os.listdir(instDir): raise Exception(ErrorCode.GAUSS_502["GAUSS_50207"] % - instDir + " Error:\n%s." % str(output) + - "The cmd is %s" % cmd) + instDir + " still not empty after cleanDir().") + self.logger.debug(f"[Cantian] cleanDir finished: {instDir} is empty.") def uninstall(self, instNodeName): """ diff --git a/script/impl/install/InstallImpl.py b/script/impl/install/InstallImpl.py index e5750e2b..f3888c71 100644 --- a/script/impl/install/InstallImpl.py +++ b/script/impl/install/InstallImpl.py @@ -616,25 +616,19 @@ class InstallImpl: isSingle=self.context.isSingle) def startCluster(self): - """ - Cantian 单节点启动 - """ - logger = self.context.logger - logger.debug("==> Enter startCluster (Cantian single-node)") - - ct_data = EnvUtil.getEnv("PGDATA") - if not ct_data: - logger.debug("PGDATA environment variable is not defined.") - ct_data = "/opt/openGauss/install/data/dn1/" - - logger.debug("PGDATA = %s", ct_data) + if DefaultValue.get_cm_server_num_from_static(self.context.clusterInfo): + self.context.logger.debug("Start cluster with cm_ctl tool.") + self.cm_start_cluster() + return cmd = ( - "source %s; " - "bash -c 'nohup cantiand open -D $PGDATA > /dev/null 2>&1 &'" - % self.context.mpprcFile + f"source {self.context.mpprcFile};" + f"{OMCommand.getLocalScript('Local_Install')} -t {ACTION_START_CLUSTER} " + f"-U {self.context.user}:{self.context.group} -X {self.context.xmlFile} " + f"-R {self.context.clusterInfo.appPath} -c {self.context.clusterInfo.name} " + f"-l {self.context.localLog} {self.getCommandOptions()}" ) - logger.debug("start cmd = %s", cmd) + self.context.logger.debug(f"[Cantian] start cmd = {cmd}") CmdExecutor.execCommandWithMode( cmd, self.context.sshTool, @@ -642,9 +636,10 @@ class InstallImpl: self.context.mpprcFile ) - OMCommand.wait_for_cantiand(self.context.logger, self.context.user) + if DefaultValue.get_cm_server_num_from_static(self.context.clusterInfo) == 0: + OMCommand.wait_for_cantiand(self.context.logger, self.context.user) - logger.log("Successfully started Cantian cluster.") + self.context.logger.log("Successfully started Cantian cluster.") def doStart(self): """ diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index f57a68c8..94a0cc38 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -39,6 +39,7 @@ from base_utils.os.env_util import EnvUtil from gspylib.component.DSS.dss_checker import DssConfig from gspylib.common.Common import DefaultValue, ClusterCommand from gspylib.common.generate_xml import GenerateXml +from gspylib.common.OMCommand import OMCommand from base_utils.os.cmd_util import CmdUtil @@ -197,102 +198,110 @@ class OmImplOLAP(OmImpl): def doStartCluster(self): """ - function: do start cluster - input: NA - output: NA + function: do start cluster (Cantian adapted) + input : NA + output : NA """ self.logger.debug("Operating: Starting.") - - # only stop cm components(cm_server cm_agent om_monitor) + + # CM branch print(self.context.g_opts.component) if self.context.g_opts.component == "CM": if self.context.clusterInfo.hasNoCm(): self.logger.log("No CM components to start.") else: - self.do_opt_cm_components('start') + self.do_opt_cm_components("start") return - - # if has cm and not in separately upgrade, will start cluster by cm_ctl command - if (not self.context.clusterInfo.hasNoCm() and self.context.g_opts.component != "DN"): - self.context.logger.debug("Have CM configuration, upgrade all" - " nodes together.") + + # start via CM if CM exists and component is not DN + if (not self.context.clusterInfo.hasNoCm() + and self.context.g_opts.component != "DN"): + self.context.logger.debug("Have CM configuration, upgrade all " + "nodes together.") self.doStartClusterByCm() return - - # Specifies the stop node - # Gets the specified node id - startType = "node" if self.context.g_opts.nodeName != "" else "cluster" - # Perform a start operation - self.logger.log("Starting %s." % startType) + + # ---------------- basic variables ---------------- + startType = "node" if self.context.g_opts.nodeName else "cluster" + self.logger.log(f"Starting {startType}.") self.logger.log("=========================================") + hostName = NetUtil.GetHostIpOrName() - # get the newest dynaminc config and send to other node self.clusterInfo.checkClusterDynamicConfig(self.context.user, hostName) - if self.context.g_opts.nodeName == "": - hostList = self.clusterInfo.getClusterNodeNames() - else: - hostList = [] - hostList.append(self.context.g_opts.nodeName) + + hostList = (self.clusterInfo.getClusterNodeNames() + if not self.context.g_opts.nodeName + else [self.context.g_opts.nodeName]) + self.sshTool = SshTool(self.clusterInfo.getClusterNodeNames(), None, DefaultValue.TIMEOUT_CLUSTER_START) - if self.time_out is None: - time_out = DefaultValue.TIMEOUT_CLUSTER_START - else: - time_out = self.time_out - if self.context.g_opts.cluster_number: - cmd = "source %s; %s -U %s -R %s -t %s --security-mode=%s --cluster_number=%s" % ( - self.context.g_opts.mpprcFile, - OMCommand.getLocalScript("Local_StartInstance"), - self.context.user, self.context.clusterInfo.appPath, time_out, - self.context.g_opts.security_mode, self.context.g_opts.cluster_number) - else: - cmd = "source %s; %s -U %s -R %s -t %s --security-mode=%s" % ( - self.context.g_opts.mpprcFile, - OMCommand.getLocalScript("Local_StartInstance"), - self.context.user, self.context.clusterInfo.appPath, time_out, - self.context.g_opts.security_mode) - if self.dataDir != "": - cmd += " -D %s" % self.dataDir - failedOutput = '' - for nodeName in hostList: - (statusMap, output) = self.sshTool.getSshStatusOutput(cmd, [nodeName]) - if statusMap[nodeName] != 'Success': + + # ---------------- start cantiand per instance ---------------- + failedOutput = "" + for dbNode in self.clusterInfo.dbNodes: + if dbNode.name not in hostList: + continue + + cmd_lines = [ + f"nohup cantiand open -D {inst.datadir} > /dev/null 2>&1 &" + for inst in dbNode.datanodes + ] + full_cmd = " && ".join(cmd_lines) + + statusMap, output = self.sshTool.getSshStatusOutput( + full_cmd, [dbNode.name], self.context.g_opts.mpprcFile + ) + + if statusMap.get(dbNode.name) != "Success": failedOutput += output - elif re.search("another server might be running", output): + elif re.search(r"\bERROR\b", output, re.IGNORECASE): self.logger.log(output) - elif re.search("] WARNING:", output): - tmp = '\n'.join(re.findall(".*] WARNING:.*", output)) - self.logger.log(output[0:output.find(":")] + '\n' + tmp) - if len(failedOutput): + + if failedOutput: self.logger.log("=========================================") - raise Exception( - ErrorCode.GAUSS_536["GAUSS_53600"] % (cmd, failedOutput)) + raise Exception(ErrorCode.GAUSS_536["GAUSS_53600"] % + (full_cmd, failedOutput)) + + # ---------------- instance-level health check ---------------- + for dbNode in self.clusterInfo.dbNodes: + if dbNode.name not in hostList: + continue + for inst in dbNode.datanodes: + OMCommand.wait_for_cantiand(self.logger, + inst.datadir, + DefaultValue.TIMEOUT_CLUSTER_START, + 5) + + # === cluster-level aggregate check if startType == "cluster": + # simple aggregate: every instance already OPEN above, + # but keep loop structure for compatibility + self.logger.log("Verifying cluster state ...") starttime = time.time() - cluster_state = "" - cmd = "source %s; gs_om -t status|grep cluster_state" \ - % self.context.g_opts.mpprcFile - if self.context.g_opts.component == "DN": - cmd = "source %s; gs_om -t query|grep cluster_state" \ - % self.context.g_opts.mpprcFile - while time.time() <= 30 + starttime: - status, output = subprocess.getstatusoutput(cmd) - if status != 0: - raise Exception( - ErrorCode.GAUSS_516["GAUSS_51607"] % "cluster" + - " After startup, check cluster_state failed") - else: - cluster_state = output.split()[-1] - if cluster_state != "Normal": - self.logger.log("Waiting for check cluster state...") - time.sleep(5) - else: - break - if cluster_state != "Normal": + deadline = starttime + 30 # 30 seconds window + while time.time() <= deadline: + not_open = [] + for dbNode in self.clusterInfo.dbNodes: + for inst in dbNode.datanodes: + sql = ("SELECT COUNT(*) FROM DV_DATABASE " + "WHERE STATUS='OPEN' AND OPEN_STATUS='READ WRITE'") + ctsql = (f"ctsql '/' as sysdba -q -D {inst.datadir} " + f"-c \"{sql}\" | grep -q '^ *1' ") + status = subprocess.call(ctsql, shell=True) + if status != 0: + not_open.append(f"{dbNode.name}:{inst.datadir}") + if not not_open: + break + self.logger.log("Waiting for cluster to reach OPEN: " + f"{', '.join(not_open)}") + time.sleep(5) + + if not_open: raise Exception(ErrorCode.GAUSS_516["GAUSS_51607"] % "cluster" - + " After startup, the last check results were" - " %s. Please check manually." - % cluster_state) + + " After startup, some instances are not OPEN: " + + ", ".join(not_open)) + + # ---------------- success ---------------- self.logger.log("=========================================") self.logger.log("Successfully started.") self.logger.debug("Operation succeeded: Start.") @@ -340,62 +349,99 @@ class OmImplOLAP(OmImpl): self.context.user, self.context.clusterInfo.appPath, action) (statusMap, output) = self.sshTool.getSshStatusOutput(cmd, host_list) - + def doStopCluster(self): """ - function: do stop cluster - input: NA - output: NA + Cantian 集群 / 节点停止 """ self.logger.debug("Operating: Stopping.") - - # only stop cm components(cm_server cm_agent om_monitor) - print(self.context.g_opts.component) + if self.context.g_opts.component == "CM": if self.context.clusterInfo.hasNoCm(): self.logger.log("No CM components to stop.") else: - self.do_opt_cm_components('stop') + self.do_opt_cm_components("stop") return - - # if has cm and not in separately upgrade, will stop cluster by cm_ctl command - if not self.context.clusterInfo.hasNoCm() and self.context.g_opts.component != "DN": + + if (not self.context.clusterInfo.hasNoCm() + and self.context.g_opts.component != "DN"): self.doStopClusterByCm() return - # Specifies the stop node - # Gets the specified node id - stop_type = "node" if self.context.g_opts.nodeName != "" else "cluster" - # Perform a stop operation - self.logger.log("Stopping %s." % stop_type) + + stop_type = "node" if self.context.g_opts.nodeName else "cluster" + self.logger.log(f"Stopping {stop_type}.") self.logger.log("=========================================") - if self.context.g_opts.nodeName == "": - host_list = self.clusterInfo.getClusterNodeNames() - else: - host_list = [] - host_list.append(self.context.g_opts.nodeName) + + host_list = ([self.context.g_opts.nodeName] + if self.context.g_opts.nodeName else + self.clusterInfo.getClusterNodeNames()) + self.sshTool = SshTool(self.clusterInfo.getClusterNodeNames(), None, DefaultValue.TIMEOUT_CLUSTER_START) - if self.time_out is None: - time_out = DefaultValue.TIMEOUT_CLUSTER_STOP - else: - time_out = self.time_out - cmd = "source %s; %s -U %s -R %s -t %s" % ( - self.context.g_opts.mpprcFile, - OMCommand.getLocalScript("Local_StopInstance"), - self.context.user, self.context.clusterInfo.appPath, time_out) - if self.dataDir != "": - cmd += " -D %s" % self.dataDir - if self.mode != "": - cmd += " -m %s" % self.mode - (statusMap, output) = self.sshTool.getSshStatusOutput(cmd, host_list) - for nodeName in host_list: - if statusMap[nodeName] != 'Success': - raise Exception( - ErrorCode.GAUSS_536["GAUSS_53606"] % (cmd, output)) - self.logger.log("Successfully stopped %s." % stop_type) + timeout = self.time_out or DefaultValue.TIMEOUT_CLUSTER_STOP + + def cantian_stop_cmd(data_dir: str) -> str: + """ + 构造单实例停库 shell 命令。 + - 进程存在且 DB 为 OPEN → 优雅 shutdown;超时后 kill -9 + - 进程存在但 DB 非 OPEN → 直接 kill -9 + """ + probe_proc = rf"pgrep -f 'cantiand.*-D {re.escape(data_dir)}'" + sql = "SELECT NAME, STATUS, OPEN_STATUS FROM DV_DATABASE" + ctsql = f"ctsql '/' as sysdba -q -D {data_dir} -c \"{sql}\"" + probe_open = f"{ctsql} | grep -Eq '[[:space:]]OPEN[[:space:]]'" + + graceful_shutdown = f"ctsql '/' as sysdba -q -D {data_dir} -c 'shutdown'" + deadline = f"$((SECONDS+{timeout}))" + wait_loop = ( + f"while {probe_proc}; do " + f"[ $SECONDS -ge {deadline} ] && break; " + f"sleep 2; done" + ) + force_kill = ( + f"pids=$({probe_proc}); " + f"echo '[Cantian] force kill '$pids; " + f"[ -n \"$pids\" ] && kill -9 $pids" + ) + + stop_cmd = ( + "(" + f"if {probe_proc}; then " + f"if {probe_open}; then " + f"{graceful_shutdown}; {wait_loop}; " + f"{probe_proc} && {force_kill}; " + "else " + f"{force_kill}; " + "fi; " + "fi)" + ) + + return stop_cmd + + failed_nodes, failed_outputs = [], "" + for dbNode in self.clusterInfo.dbNodes: + if dbNode.name not in host_list: + continue + + cmd_lines = [cantian_stop_cmd(inst.datadir) for inst in dbNode.datanodes] + full_cmd = " && ".join(cmd_lines) + + statusMap, output = self.sshTool.getSshStatusOutput( + full_cmd, [dbNode.name], self.context.g_opts.mpprcFile + ) + + if statusMap.get(dbNode.name) != "Success": + failed_nodes.append(dbNode.name) + failed_outputs += f"\n[{dbNode.name}]\n{output.strip()}" + + if failed_nodes: + raise Exception(ErrorCode.GAUSS_536["GAUSS_53606"] % + (",".join(failed_nodes), failed_outputs)) + + self.logger.log(f"Successfully stopped {stop_type}.") self.logger.log("=========================================") - self.logger.log("End stop %s." % stop_type) + self.logger.log(f"End stop {stop_type}.") self.logger.debug("Operation succeeded: Stop.") def doView(self): diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py index 82834e96..53d337be 100644 --- a/script/local/PreInstallUtility.py +++ b/script/local/PreInstallUtility.py @@ -987,6 +987,9 @@ Common options: self.prepare_dss_vg_ini(dss_home) self.prepare_cm_vg_ini(dss_home) + FileUtil.changeOwner(self.user, dss_cfg, recursive=True, link=True) + FileUtil.changeMode(DefaultValue.KEY_DIRECTORY_MODE, dss_cfg, recursive=True) + def prepare_dss_soft_link(self): ''' -- Gitee From 95c02c91533277df7ebe09179da2f70aaf6dc125 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Wed, 28 May 2025 09:31:34 +0800 Subject: [PATCH 003/144] cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM cantian2.0-OM --- script/gspylib/common/DbClusterInfo.py | 4 +- script/gspylib/common/OMCommand.py | 44 ++++++-- script/gspylib/component/DSS/dss_checker.py | 27 +++-- script/gspylib/component/DSS/dss_comp.py | 2 +- script/gspylib/component/Kernel/Kernel.py | 116 +++++++++++--------- script/gspylib/pssh/bin/TaskPool.py | 6 +- script/impl/om/OLAP/OmImplOLAP.py | 67 +++++------ script/local/PreInstallUtility.py | 19 +++- script/local/StartInstance.py | 1 - 9 files changed, 171 insertions(+), 115 deletions(-) diff --git a/script/gspylib/common/DbClusterInfo.py b/script/gspylib/common/DbClusterInfo.py index 0a166170..54ca2e2f 100644 --- a/script/gspylib/common/DbClusterInfo.py +++ b/script/gspylib/common/DbClusterInfo.py @@ -1339,7 +1339,7 @@ class dbClusterInfo(): # ctsql 视图查询 + grep 判断 OPEN sql_cmd = "SELECT NAME, STATUS, OPEN_STATUS FROM DV_DATABASE" ctsql_cmd = f"ctsql '/' as sysdba -q -D {data_dir} -c \"{sql_cmd}\"" - probe = f"{ctsql_cmd} | grep -Eq '[[:space:]]OPEN[[:space:]]'" + probe = f"{ctsql_cmd} | grep -q 'OPEN'" echo_ok = ( "echo -e 'instance_state : Normal\\n" @@ -1358,7 +1358,7 @@ class dbClusterInfo(): querycmd = f"{probe} && {echo_ok} || {echo_bad}" elif querytype == "port": - querycmd = "echo port : 5432" + querycmd = f"gs_guc check -D {dnInst.datadir} -c port" dbInfoList.append({ "name": dbNode.name, diff --git a/script/gspylib/common/OMCommand.py b/script/gspylib/common/OMCommand.py index db1ca927..e8356eef 100644 --- a/script/gspylib/common/OMCommand.py +++ b/script/gspylib/common/OMCommand.py @@ -267,24 +267,50 @@ class OMCommand(): @staticmethod def isDBOpen(datadir: str) -> bool: """ - 通过查询 DV_DATABASE 视图,单次判断数据库是否处于 OPEN 状态。 - """ - import re + Query DV_DATABASE once and return True if STATUS is OPEN.sql返回exp: + + Please enter password: + ******** + connected. + + SQL> + NAME STATUS OPEN_STATUS + -------------------------------- -------------------- -------------------- + dbstor OPEN READ WRITE + + 1 rows fetched. + """ sql_cmd = "SELECT NAME, STATUS, OPEN_STATUS FROM DV_DATABASE" ctsql_cmd = f"ctsql '/' as sysdba -q -D {datadir} -c \"{sql_cmd}\"" status, output = CmdUtil.retryGetstatusoutput(ctsql_cmd, retry_time=0) - if status != 0 or "1 rows fetched" not in output: + if status != 0: return False - try: - data_line = re.split(r"\n+", output.strip())[-2].strip() - db_status = re.split(r"\s+", data_line)[1].strip().upper() - except Exception: + lines = output.splitlines() + data_line = None + got_separator = False + + for line in lines: + line = line.strip() + + if not line or line.startswith(("Please enter", "connected.", "SQL>")): + continue + if re.match(r"^NAME\s+", line, re.I): + continue + if re.match(r"^-{5,}", line): + got_separator = True + continue + if got_separator: + data_line = line + break + + if not data_line: return False - return db_status == "OPEN" + cols = re.split(r"\s+", data_line) + return len(cols) >= 2 and cols[1].upper() == "OPEN" @staticmethod def wait_for_cantiand(logger, datadir: str, timeout: int = 300, delta: int = 5): diff --git a/script/gspylib/component/DSS/dss_checker.py b/script/gspylib/component/DSS/dss_checker.py index 94b6fd2c..ac29bd38 100644 --- a/script/gspylib/component/DSS/dss_checker.py +++ b/script/gspylib/component/DSS/dss_checker.py @@ -174,21 +174,24 @@ class DssConfig(): The dssserver process is not available when running. Then check if it is available. ''' - logger.debug(f'Start to check {process} available.' ) - if process.find('dssserver') > -1: - cmd = 'dsscmd lsvg' + logger.debug(f'Start to check {process} available.') + + if 'dssserver' in process: + cmd = 'timeout 6 dsscmd lsvg' + for cur in range(retry): + logger.debug(f'[{cur + 1}/{retry}] Execute: {cmd}') + sts, out = CmdUtil.exec_by_popen(cmd) - if not sts: - # There is '.' in the out. - logger.debug(f'The dssserver is not available. Message: {out}') - if cur >= retry - 1: - return False - else: - time.sleep(interval) - else: - logger.debug(f'The dssserver is available. The result of the lsvg: {out}') + if sts: + logger.debug(f'The dssserver is available. lsvg result: {out}') break + + logger.debug(f'dssserver not available. Message: {out}') + if cur >= retry - 1: + return False + time.sleep(interval) + return True @staticmethod diff --git a/script/gspylib/component/DSS/dss_comp.py b/script/gspylib/component/DSS/dss_comp.py index d26e67ce..a11ee2ea 100644 --- a/script/gspylib/component/DSS/dss_comp.py +++ b/script/gspylib/component/DSS/dss_comp.py @@ -218,7 +218,7 @@ class Dss(BaseComponent): bin_path='', kill_server=True, unrej=False, - exist_so=False): + exist_so=True): ''' The OM manually starts the DSS server to obtain the socket file. ''' diff --git a/script/gspylib/component/Kernel/Kernel.py b/script/gspylib/component/Kernel/Kernel.py index dba3f765..1878d42f 100644 --- a/script/gspylib/component/Kernel/Kernel.py +++ b/script/gspylib/component/Kernel/Kernel.py @@ -161,10 +161,10 @@ class Kernel(BaseComponent): """ pgdata = self.instInfo.datadir sql_cmd = "SELECT NAME, STATUS, OPEN_STATUS FROM DV_DATABASE" - ctsql = (f"ctsql '/' as sysdba -q -D {pgdata} " - f"-c \"{sql_cmd}\"") + ctsql = f"ctsql '/' as sysdba -q -D {pgdata} -c \"{sql_cmd}\"" expire_ts = time.time() + timeout + while time.time() < expire_ts: stat, out = CmdUtil.retryGetstatusoutput(ctsql, retry_time=0) if stat != 0: @@ -172,17 +172,28 @@ class Kernel(BaseComponent): time.sleep(interval) continue - if "1 rows fetched" not in out: + data_line = None + got_sep = False + for ln in out.splitlines(): + ln = ln.strip() + if not ln or ln.startswith(("Please enter", "connected.", "SQL>")): + continue + if re.match(r"^NAME\s+", ln, re.I): + continue + if re.match(r"^-{5,}", ln): + got_sep = True + continue + if got_sep: + data_line = ln + break + + if not data_line: time.sleep(interval) continue - try: - row_line = re.split(r"\n+", out.strip())[-2].strip() - db_status = re.split(r"\s+", row_line)[1].strip().upper() - except Exception as err: - self.logger.debug(f"parse DV_DATABASE failed: {err}") - time.sleep(interval) - continue + cols = re.split(r"\s+", data_line) + db_status = cols[1].upper() if len(cols) >= 2 else "" + self.logger.debug(f"ctsql db status: {db_status}") if db_status == "OPEN": return True @@ -304,65 +315,70 @@ class Kernel(BaseComponent): def cleanDir(self, instDir): """ - 删除实例目录(Cantian + openGauss + DSS) + 删除实例目录(Cantian + openGauss + DSS),(若为挂载点先 umount 再清空) + * DSS *.ini 的挂载点由上层 DssConfig 统一解析处理 """ if not os.path.exists(instDir): return dataDir = os.listdir(instDir) + if os.getuid() == 0: - pglDir = f"{instDir}/pg_location" + pglDir = os.path.join(instDir, "pg_location") isPglDirEmpty = os.path.isdir(pglDir) and len(os.listdir(pglDir)) == 0 if not dataDir or isPglDirEmpty: FileUtil.cleanDirectoryContent(instDir) - else: - FileUtil.cleanDirectoryContent(instDir) return - # ============ omm :处理 pg_location & DSS ============ - for info in dataDir: - if info == "pg_location": - pglDir = f"{instDir}/pg_location" - cmd = "{} | {} '{}' | {} '{{printf $3}}'".format( - CmdUtil.getMountCmd(), CmdUtil.getGrepCmd(), - pglDir, CmdUtil.getAwkCmd()) - status, mp_out = subprocess.getstatusoutput(cmd) - if status != 0: - raise Exception(ErrorCode.GAUSS_502["GAUSS_50207"] % - instDir + f" Error:\n{mp_out}.\nThe cmd is {cmd}") - - for mp in mp_out.split(): - FileUtil.cleanDirectoryContent(mp) + if "pg_location" in dataDir: + pglDir = os.path.join(instDir, "pg_location") + cmd = "{} | {} '{}' | {} '{{printf $3}}'".format( + CmdUtil.getMountCmd(), CmdUtil.getGrepCmd(), + pglDir, CmdUtil.getAwkCmd() + ) + status, mp_out = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50207"] % instDir + + f" Error:\n{mp_out}.\nThe cmd is {cmd}" + ) + + # 先卸载挂载点,再清空内容 + for mp in mp_out.split(): + if mp: subprocess.call(f"umount -l {mp}", shell=True) + FileUtil.cleanDirectoryContent(mp) + + FileUtil.cleanDirectoryContent(pglDir) - FileUtil.cleanDirectoryContent(pglDir) - - for ini in ("dss_inst.ini", "dss_vg_conf.ini"): - ini_path = os.path.join(instDir, ini) - if os.path.isfile(ini_path): - with open(ini_path) as fp: - for line in fp: - if "=" in line: - mount_dir = line.split("=", 1)[1].strip() - if mount_dir and os.path.isdir(mount_dir): - FileUtil.cleanDirectoryContent(mount_dir) - subprocess.call(f"umount -l {mount_dir}", shell=True) - - # ============ 删除除 "."、".." 外所有条目 ============= - cmd = ("if [ -d '{0}' ]; then cd '{0}' && " - "find . ! -name '.' ! -name '..' -print0 | " - "xargs -r -0 -n100 rm -rf; fi").format(instDir) + # ---------- 删除非白名单条目 ---------- + ignores = [ + "pg_location", "cfg", "log", + "dss_inst.ini", "dss_vg_conf.ini", + "nodedata.cfg", ".", ".." + ] + extra_cmd = "! -name".join([f" '{ig}' " for ig in ignores]) + + cmd = ( + f"if [ -d '{instDir}' ]; then cd '{instDir}' && " + f"find . ! -name {extra_cmd} -print0 | " + "xargs -r -0 -n100 rm -rf; fi" + ) status, output = subprocess.getstatusoutput(cmd) if status != 0: - raise Exception(ErrorCode.GAUSS_502["GAUSS_50207"] % - instDir + f" Error:\n{output}.\nThe cmd is {cmd}") + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50207"] % instDir + + f" Error:\n{output}.\nThe cmd is {cmd}" + ) - # ============ 最终校验 ============= if os.listdir(instDir): FileUtil.cleanDirectoryContent(instDir) if os.listdir(instDir): - raise Exception(ErrorCode.GAUSS_502["GAUSS_50207"] % - instDir + " still not empty after cleanDir().") + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50207"] % + instDir + " still not empty after cleanDir()." + ) + self.logger.debug(f"[Cantian] cleanDir finished: {instDir} is empty.") def uninstall(self, instNodeName): diff --git a/script/gspylib/pssh/bin/TaskPool.py b/script/gspylib/pssh/bin/TaskPool.py index 5412cb64..99653558 100644 --- a/script/gspylib/pssh/bin/TaskPool.py +++ b/script/gspylib/pssh/bin/TaskPool.py @@ -152,7 +152,8 @@ class WriterThread(threading.Thread): except IOError as e: raise Exception("[GAUSS-50206] : Failed to create file" " or directory. Error:\n%s." % str(e)) - with open(self.out_file, 'wb', buffering=1) as fp_out: + + with open(self.out_file, 'wb') as fp_out: fp_out.write(self.stdout.encode('utf-8')) if self.err_file: @@ -162,7 +163,8 @@ class WriterThread(threading.Thread): except IOError as e: raise Exception("[GAUSS-50206] : Failed to create file" " or directory. Error:\n%s." % str(e)) - with open(self.err_file, 'wb', buffering=1) as fp_err: + + with open(self.err_file, 'wb') as fp_err: fp_err.write(self.stderr.encode('utf-8')) diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index 94a0cc38..980853d2 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -278,7 +278,7 @@ class OmImplOLAP(OmImpl): # but keep loop structure for compatibility self.logger.log("Verifying cluster state ...") starttime = time.time() - deadline = starttime + 30 # 30 seconds window + deadline = starttime + DefaultValue.TIMEOUT_CLUSTER_START while time.time() <= deadline: not_open = [] for dbNode in self.clusterInfo.dbNodes: @@ -352,8 +352,10 @@ class OmImplOLAP(OmImpl): def doStopCluster(self): """ - Cantian 集群 / 节点停止 + function: do stop cluster (Cantian version, final) """ + import re + self.logger.debug("Operating: Stopping.") if self.context.g_opts.component == "CM": @@ -372,51 +374,42 @@ class OmImplOLAP(OmImpl): self.logger.log(f"Stopping {stop_type}.") self.logger.log("=========================================") - host_list = ([self.context.g_opts.nodeName] - if self.context.g_opts.nodeName else - self.clusterInfo.getClusterNodeNames()) + host_list = (self.clusterInfo.getClusterNodeNames() + if not self.context.g_opts.nodeName + else [self.context.g_opts.nodeName]) self.sshTool = SshTool(self.clusterInfo.getClusterNodeNames(), None, DefaultValue.TIMEOUT_CLUSTER_START) - timeout = self.time_out or DefaultValue.TIMEOUT_CLUSTER_STOP - - def cantian_stop_cmd(data_dir: str) -> str: + def silent_stop_cmd(data_dir: str) -> str: """ - 构造单实例停库 shell 命令。 - - 进程存在且 DB 为 OPEN → 优雅 shutdown;超时后 kill -9 - - 进程存在但 DB 非 OPEN → 直接 kill -9 + Quietly stop one Cantian instance. """ - probe_proc = rf"pgrep -f 'cantiand.*-D {re.escape(data_dir)}'" - sql = "SELECT NAME, STATUS, OPEN_STATUS FROM DV_DATABASE" - ctsql = f"ctsql '/' as sysdba -q -D {data_dir} -c \"{sql}\"" - probe_open = f"{ctsql} | grep -Eq '[[:space:]]OPEN[[:space:]]'" - - graceful_shutdown = f"ctsql '/' as sysdba -q -D {data_dir} -c 'shutdown'" - deadline = f"$((SECONDS+{timeout}))" - wait_loop = ( - f"while {probe_proc}; do " - f"[ $SECONDS -ge {deadline} ] && break; " - f"sleep 2; done" + pat = rf"cantiand.*open.*-D {re.escape(data_dir)}" + probe = f"pgrep -f '{pat}' | tr '\\n' ' '" + graceful = ( + f"ctsql '/' as sysdba -q -D {data_dir} -c 'shutdown' " + ">/dev/null 2>&1 || true" ) force_kill = ( - f"pids=$({probe_proc}); " - f"echo '[Cantian] force kill '$pids; " - f"[ -n \"$pids\" ] && kill -9 $pids" + f"$({probe}) | xargs -r kill -9 >/dev/null 2>&1 || true" ) + pre_check = f"pids=$({probe}); [ -z \"$pids\" ] && exit 0" + try_grace = f"{graceful}" + second_ck = f"pids=$({probe})" + kill_rest = f"[ -n \"$pids\" ] && {force_kill}" + finish = "exit 0" + stop_cmd = ( - "(" - f"if {probe_proc}; then " - f"if {probe_open}; then " - f"{graceful_shutdown}; {wait_loop}; " - f"{probe_proc} && {force_kill}; " - "else " - f"{force_kill}; " - "fi; " - "fi)" + "{ " + f"{pre_check}; " + f"{try_grace}; " + f"{second_ck}; " + f"{kill_rest}; " + f"{finish}; " + "}" ) - return stop_cmd failed_nodes, failed_outputs = [], "" @@ -424,8 +417,8 @@ class OmImplOLAP(OmImpl): if dbNode.name not in host_list: continue - cmd_lines = [cantian_stop_cmd(inst.datadir) for inst in dbNode.datanodes] - full_cmd = " && ".join(cmd_lines) + inst_cmds = [silent_stop_cmd(inst.datadir) for inst in dbNode.datanodes] + full_cmd = " && ".join(inst_cmds) statusMap, output = self.sshTool.getSshStatusOutput( full_cmd, [dbNode.name], self.context.g_opts.mpprcFile diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py index 53d337be..34211956 100644 --- a/script/local/PreInstallUtility.py +++ b/script/local/PreInstallUtility.py @@ -1198,6 +1198,7 @@ Common options: needCheckEmpty = True self.logger.debug("Install path %s." % installPath) + self.prepareGivenPath(installPath, needCheckEmpty) self.checkUpperPath(needCheckEmpty, installPath) @@ -1235,11 +1236,26 @@ Common options: output)) if output: output = output.splitlines()[-1] + + if output != "1": + self.logger.debug("Upper dir [%s] not writable, try chown to %s." % (upperDir, self.user)) + FileUtil.changeOwner(self.user, upperDir, False, "shell", link=True) + + cmd = "if [ -w %s ];then echo 1; else echo 0;fi" % upperDir + cmd = CmdUtil.get_user_exec_cmd(self.current_user_root, + self.user, cmd) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0 or output.splitlines()[-1] != "1": + self.logger.logExit("User [%s] still has no write permission " + "to directory %s." % (self.user, upperDir)) + if output == "1": return + fileList = os.listdir(upperDir) if installPath in fileList: fileList.remove(installPath) + if len(fileList) != 0: self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50202"] % upperDir + " Or user [%s] has write" @@ -2861,8 +2877,9 @@ Common options: FileUtil.changeOwner(self.user, toolPath, recursive=True, link=True) FileUtil.changeMode(DefaultValue.KEY_DIRECTORY_MODE, toolPath, recursive=True) + script_dir = f"{toolPath}/script" FileUtil.changeMode(DefaultValue.SPE_FILE_MODE, - "%s/script/gs_*" % toolPath) + script_dir, recursive=True) FileUtil.changeMode(DefaultValue.BIN_FILE_MODE, "%s/*.sha256" % toolPath) FileUtil.changeMode(DefaultValue.BIN_FILE_MODE, "%s/*.tar.gz" % toolPath) FileUtil.changeMode(DefaultValue.BIN_FILE_MODE, "%s/*.tar.bz2" % diff --git a/script/local/StartInstance.py b/script/local/StartInstance.py index c80ec7e5..ebbae1ca 100644 --- a/script/local/StartInstance.py +++ b/script/local/StartInstance.py @@ -30,7 +30,6 @@ from domain_utils.domain_common.cluster_constants import ClusterConstants from base_utils.os.env_util import EnvUtil from gspylib.component.DSS.dss_checker import DssConfig - class Start(LocalBaseOM): """ The class is used to do perform start -- Gitee From 04f7d5c2990ca87159eed21bfaec695023e8831c Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Tue, 24 Jun 2025 09:17:31 +0000 Subject: [PATCH 004/144] !7 cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql * cantian2.0-mysql --- script/base_diff/comm_constants.py | 1 + script/base_utils/os/file_util.py | 38 ++- script/base_utils/template/cluster_tmp.xml | 3 + script/base_utils/template/resource_en.json | 15 +- script/base_utils/template/resource_zh.json | 13 +- script/base_utils/template/xml_constant.py | 3 + script/base_utils/template/xml_status.py | 111 ++++++++- script/base_utils/template/xml_template.py | 62 +++++ .../domain_utils/cluster_file/package_info.py | 32 +++ script/gspylib/common/ClusterParams.py | 6 +- script/gspylib/common/Common.py | 4 +- script/gspylib/common/DbClusterInfo.py | 145 ++++++++---- script/gspylib/common/LocalBaseOM.py | 15 ++ .../component/Kernel/DN_OLAP/DN_OLAP.py | 105 ++++++--- script/gspylib/component/Kernel/Kernel.py | 202 ++++++++++------ script/gspylib/component/MySql/mysql_comp.py | 217 ++++++++++++++++++ script/impl/install/InstallImpl.py | 6 +- script/impl/preinstall/PreinstallImpl.py | 2 +- script/local/InitInstance.py | 13 +- script/local/Install.py | 158 ++++++++++++- script/local/PreInstallUtility.py | 45 +++- script/local/StartInstance.py | 3 + 22 files changed, 1017 insertions(+), 182 deletions(-) create mode 100644 script/gspylib/component/MySql/mysql_comp.py diff --git a/script/base_diff/comm_constants.py b/script/base_diff/comm_constants.py index cb249d2f..660300be 100644 --- a/script/base_diff/comm_constants.py +++ b/script/base_diff/comm_constants.py @@ -33,6 +33,7 @@ class CommConstants: PKG_SERVER = "Server" PKG_OM = "OM" PKG_CM = "CM" + PKG_CONNECTOR = "Connector" PKG_SHA256 = "sha256" # upgrade sql sha file and sql file diff --git a/script/base_utils/os/file_util.py b/script/base_utils/os/file_util.py index fb404212..bbc80b68 100644 --- a/script/base_utils/os/file_util.py +++ b/script/base_utils/os/file_util.py @@ -449,19 +449,20 @@ class FileUtil(object): try: # do with shell command. if cmd_type == "shell": - if "*" in path: - path = FileUtil.withAsteriskPath(path) - else: - path = "'" + path + "'" - cmd = CmdUtil.getChownCmd(user, group, path, recursive) + quoted = FileUtil.withAsteriskPath(path) if "*" in path else f"'{path}'" + cmd = CmdUtil.getChownCmd(user, group, quoted, recursive) if link: - cmd = cmd + " -h" + parts = cmd.split() + if "-h" not in parts: + parts.insert(2, "-h") + cmd = " ".join(parts) + if retry_flag: CmdUtil.retryGetstatusoutput(cmd, retry_time, waite_time) else: - (status, output) = subprocess.getstatusoutput(cmd) + status, output = subprocess.getstatusoutput(cmd) if status != 0: - raise Exception(output + " The cmd is %s" % cmd) + raise Exception(output + f" The cmd is {cmd}") # do with python API. If the name has special characters. else: os.chown(path, userInfo.pw_uid, userInfo.pw_gid) @@ -559,6 +560,27 @@ class FileUtil(object): fp_file.truncate() fp_file.flush() + @staticmethod + def umount_if_mounted(path: str, logger=None) -> None: + """ + Lazy-umount if it is a mount point. + Raise RuntimeError on failure; noop if path not mounted. + """ + if not path: + return + # mountpoint -q path → 0 if mounted + if subprocess.call(["mountpoint", "-q", path]) == 0: + if logger: + logger.debug(f"[file_util] umount -l {path}") + ret = subprocess.run( + ["sudo", "-n", "umount", "-l", path], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True + ) + if ret.returncode != 0: + raise RuntimeError(f"umount {path} failed: {ret.stdout.strip()}") + @staticmethod def removeDirectory(path): """ diff --git a/script/base_utils/template/cluster_tmp.xml b/script/base_utils/template/cluster_tmp.xml index 5a78220e..d8b6269b 100644 --- a/script/base_utils/template/cluster_tmp.xml +++ b/script/base_utils/template/cluster_tmp.xml @@ -16,6 +16,9 @@ + + + diff --git a/script/base_utils/template/resource_en.json b/script/base_utils/template/resource_en.json index 774a585c..4f53b27b 100644 --- a/script/base_utils/template/resource_en.json +++ b/script/base_utils/template/resource_en.json @@ -23,6 +23,16 @@ "choose_cm": "Please choose whether to deploy CM?", "input_cm": "Please enter 1/2 for selection, the default option is 1) Deploy CM", "cm_port": "Please enter the cmserver port(default:15400):", + + "choose_mysql": "Please choose whether to deploy MySql?", + "input_mysql": "Please enter 1/2 for selection, the default option is 1) Deploy MySql", + "deploy_mysql": "Deploy MySql", + "not_deploy_mysql": "Do not deploy MySql", + + "choose_mysql_meta" : "Should MySql metadata be unified into Cantian?", + "input_mysql_meta" : "Please enter 1/2, default 1) Yes", + "deploy_mysql_meta" : "Yes", + "not_deploy_mysql_meta" : "No", "choose_pri_standby": "Please choose whether to deploy in single-node or multi-node mode?", "input_pri_standby": "Please enter 1/2 for selection, the default option is 1) multi-node deployment", @@ -32,8 +42,9 @@ "choose_dbstor" : "Enable DBStor shared-storage mode?", "input_dbstor" : "Input 1 / 2 (default 1) 1) Disable 2) Enable", "intput_dbstor_fs" : "Enter the three DBStor file-systems (default: log_fs:log_fs,page_fs:page_fs,share_fs:share_fs)", - "intput_share_fs" : "Enter the shared-filesystem mount info, e.g. 127.0.0.1:share_fs", - + + "intput_share_fs": "Enter the shared-filesystem mount info, e.g. 127.0.0.1:share_fs", + "deploy_pri_standby": "multi-node deployment", "deploy_single": "single-node deployment", diff --git a/script/base_utils/template/resource_zh.json b/script/base_utils/template/resource_zh.json index 82c12835..b0df5025 100644 --- a/script/base_utils/template/resource_zh.json +++ b/script/base_utils/template/resource_zh.json @@ -24,6 +24,16 @@ "input_cm": "请输入 1/2 进行选择,默认选项是 1)部署CM", "cm_port": "请输入cmserver端口(默认:15400):", + "choose_mysql": "请选择是否部署MySql?", + "input_mysql": "请输入 1/2 进行选择,默认选项是 1)部署MySql", + "deploy_mysql": "部署MySql", + "not_deploy_mysql": "不部署MySql", + + "choose_mysql_meta" : "MySql 元数据是否归一(写入 Cantian)?", + "input_mysql_meta" : "请输入 1/2 进行选择, 默认 1) 归一", + "deploy_mysql_meta" : "归一", + "not_deploy_mysql_meta" : "非归一", + "choose_pri_standby": "请选择是否多节点部署?", "input_pri_standby": "请输入 1/2 进行选择,默认选项是 1)多节点部署", "input_ip_hostname": "请输入主机节点IP和节点名称(如:192.168.0.1 hostname1;192.168.0.2 hostname2)", @@ -32,7 +42,8 @@ "choose_dbstor": "请选择是否启用 DBStor 共享存储?", "input_dbstor": "请输入 1/2 进行选择,默认选项是 1)不部署", "intput_dbstor_fs": "请输入'dbstor的3个文件系统'的路径信息(默认是:log_fs:log_fs,page_fs:page_fs,share_fs:share_fs)", - "intput_share_fs": "请输入共享文件系统挂载信息(形如 127.0.0.1:share_fs)", + + "intput_share_fs": "请输入共享文件系统挂载信息(如 127.0.0.1:share_fs)", "deploy_pri_standby": "多节点部署", "deploy_single": "单节点部署", diff --git a/script/base_utils/template/xml_constant.py b/script/base_utils/template/xml_constant.py index 756f2fbe..bd670cd8 100644 --- a/script/base_utils/template/xml_constant.py +++ b/script/base_utils/template/xml_constant.py @@ -7,6 +7,7 @@ class XmlConstant: IS_CHINESE = False IS_CM = False IS_DDES = False + IS_MYSQL = False IS_PRI_STANDBY = False PRI_STANDBY_COUNT = 3 PRI_STANDBY_IP = {} @@ -20,6 +21,7 @@ class XmlConstant: DSS_PARA_INFO = ['enable_dss', 'dss_home', 'dss_vg_info', 'votingDiskPath', 'shareDiskDir', 'ss_dss_vg_name', 'dss_ssl_enable'] + MYSQL_PARA = ['enable_mysql', 'mysql_metadata_in_cantian'] UPDATE_DSS_PARA_INFO = ['dss_home', 'dss_vg_info', 'votingDiskPath', 'shareDiskDir', 'ss_dss_vg_name'] CM_PARA_INFO = ['cmDir', 'cmsNum', 'cmServerPortBase', 'cmServerPortStandby', 'cmServerListenIp1', 'cmServerHaIp1', 'cmServerlevel', 'cmServerRelation'] @@ -29,6 +31,7 @@ class XmlConstant: DATABASE_PORT = "" CM_SERVER_PORT = "" + SHARE_FS = "127.0.0.1:share_fs" SSH_PORTS = [] DEFAULT_DATABASE_PORT = "15000" diff --git a/script/base_utils/template/xml_status.py b/script/base_utils/template/xml_status.py index 9c18b596..591d6b17 100644 --- a/script/base_utils/template/xml_status.py +++ b/script/base_utils/template/xml_status.py @@ -173,6 +173,43 @@ def check_input_xml_info(xml_dir): return False return True +def check_share_fs(val: str) -> bool: + """ + Validate the shared-filesystem string. + + Rules + ----- + 1. Format must be : with no blanks + 2. must be syntactically valid **and** reachable (ping) + 3. must contain no illegal characters + """ + if " " in val or ":" not in val: + GaussLog.printMessage("Invalid format: use : with no spaces.") + return False + + ip, directory = val.split(":", 1) + + if not NetUtil.isIpValid(ip): + GaussLog.printMessage("Invalid IP address.") + return False + + if not check_illegal_character(directory): + return False + + try: + ret = subprocess.run( + ["ping", "-c", "1", "-W", "2", ip], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + if ret.returncode != 0: + GaussLog.printMessage(f"Cannot ping {ip}. Please check network connectivity.") + return False + except FileNotFoundError: + GaussLog.printMessage("Ping command not found; skipping reachability check.") + + return True + class XmlStatus(TemplateStatus): @@ -229,11 +266,29 @@ class DataPortStatus(TemplateStatus): return DatabaseInstallStatus() if not user_input: XmlConstant.DATABASE_PORT = XmlConstant.DEFAULT_DATABASE_PORT - return PriStandbyStatus() + return MysqlStatus() if not check_port(user_input): continue XmlConstant.DATABASE_PORT = user_input - return PriStandbyStatus() + return MysqlStatus() + + +class ShareFsStatus(TemplateStatus): + + def work(self): + for _ in range(XmlConstant.TRIES): + user_input = input(XmlConstant.RESOURCE_DATA.get('intput_share_fs')).strip() + if user_input.lower() in ("back", "b"): + return DdesStatus() + + if not user_input: + return CmStatus() + + if not check_share_fs(user_input): + continue + + XmlConstant.SHARE_FS = user_input + return CmStatus() class PriStandbyStatus(TemplateStatus): @@ -283,13 +338,13 @@ class DdesStatus(TemplateStatus): return PriStandbyStatus() if not user_input: XmlConstant.IS_DDES = False - return CmStatus() + return ShareFsStatus() if not user_input.isdigit(): GaussLog.printMessage(XmlConstant.RESOURCE_DATA.get('invalid_num')) continue - if user_input == "1": + if not user_input or user_input == "1": XmlConstant.IS_DDES = False - return CmStatus() + return ShareFsStatus() elif user_input == "2": XmlConstant.IS_DDES = True XmlConstant.IS_CM = True @@ -469,6 +524,52 @@ class PriStandbyCountStatus(TemplateStatus): continue +class MysqlStatus(TemplateStatus): + + def work(self): + XmlConstant.IS_MYSQL = True + GaussLog.printMessage(XmlConstant.RESOURCE_DATA.get('choose_mysql')) + XmlConstant.select_option( + XmlConstant.RESOURCE_DATA.get('deploy_mysql'), + XmlConstant.RESOURCE_DATA.get('not_deploy_mysql') + ) + + for _ in range(XmlConstant.TRIES): + opt = input(XmlConstant.RESOURCE_DATA.get('input_mysql')).strip() + + if opt.lower() in ("back", "b"): + return DataPortStatus() + if not opt or opt == "1": + XmlConstant.IS_MYSQL = True + return MysqlMetaStatus() + + if opt == "2": + XmlConstant.IS_MYSQL = False + return PriStandbyStatus() + + GaussLog.printMessage(XmlConstant.RESOURCE_DATA.get('invalid_num')) + + +class MysqlMetaStatus(TemplateStatus): + def work(self): + # 默认归一 + XmlConstant.MYSQL_META_ON = True + GaussLog.printMessage(XmlConstant.RESOURCE_DATA.get('choose_mysql_meta')) + XmlConstant.select_option(XmlConstant.RESOURCE_DATA.get('deploy_mysql_meta'), + XmlConstant.RESOURCE_DATA.get('not_deploy_mysql_meta')) + for _ in range(XmlConstant.TRIES): + opt = input(XmlConstant.RESOURCE_DATA.get('input_mysql_meta')).strip() + if opt.lower() in ('back', 'b'): + return MysqlStatus() + if not opt or opt == "1": + XmlConstant.MYSQL_META_ON = True + return PriStandbyStatus() + if opt == "2": + XmlConstant.MYSQL_META_ON = False + return PriStandbyStatus() + GaussLog.printMessage(XmlConstant.RESOURCE_DATA.get('invalid_num')) + + def get_localhost_name(): return socket.gethostname() diff --git a/script/base_utils/template/xml_template.py b/script/base_utils/template/xml_template.py index 2c2f23aa..8d25c54f 100644 --- a/script/base_utils/template/xml_template.py +++ b/script/base_utils/template/xml_template.py @@ -93,6 +93,7 @@ def with_chinese(): XmlConstant.select_option(XmlConstant.RESOURCE_DATA.get('chinese'), XmlConstant.RESOURCE_DATA.get('english')) check_common(check_input_chinese) + def confirm_xml(): for i in range(XmlConstant.TRIES): if i == 3: @@ -142,6 +143,33 @@ class GenerateTemplate: if child.attrib['name'] in XmlConstant.DSS_PARA_INFO: self.root[0].remove(child) + def delete_xml_mysql(self): + """若未启用 MySQL,则移除 参数.""" + if XmlConstant.IS_MYSQL: + return + for child in self.root[0].findall('PARAM'): + if child.attrib['name'] in XmlConstant.MYSQL_PARA: + self.root[0].remove(child) + + + def delete_xml_share_fs(self): + """ + 删除占位默认值的 + """ + default_val = "127.0.0.1:share_fs" + + share_fs_param = next( + (p for p in self.root[0].findall("PARAM") + if p.attrib.get("name") == "share_fs"), + None + ) + if share_fs_param is None: + return + + if share_fs_param.attrib.get("value", "").strip() == default_val: + self.root[0].remove(share_fs_param) + + def delete_xml_cm(self): if XmlConstant.IS_CM: return @@ -249,6 +277,10 @@ class GenerateTemplate: self.update_device_label_info() # 3.update ddes info self.update_ddes_info() + # 3.1 update share_fs info + self.update_share_fs_info() + # 3.5 update mysql info + self.update_mysql_info() # 4.update cm info self.update_cm_info() @@ -269,6 +301,32 @@ class GenerateTemplate: if status == 0: GaussLog.printMessage(output) + def update_mysql_info(self): + """ + 设置为 on / off(默认 cluster_tmp.xml 中为 on)。 + """ + for child in self.root[0].findall('PARAM'): + if child.attrib['name'] == 'enable_mysql': + child.attrib['value'] = 'on' if XmlConstant.IS_MYSQL else 'off' + if child.attrib['name'] == 'mysql_metadata_in_cantian': + if XmlConstant.IS_MYSQL: + child.attrib['value'] = 'on' if XmlConstant.MYSQL_META_ON else 'off' + + def update_share_fs_info(self): + if XmlConstant.IS_DDES: + for ele in list(self.root[0].findall("PARAM")): + if ele.attrib.get("name") == "share_fs": + self.root[0].remove(ele) + return + + for ele in self.root[0].findall("PARAM"): + if ele.attrib.get("name") == "share_fs": + ele.attrib["value"] = XmlConstant.SHARE_FS + break + else: + ET.SubElement(self.root[0], "PARAM", + {"name": "share_fs", "value": XmlConstant.SHARE_FS}) + def run(self): # get locale get_locale() @@ -286,10 +344,14 @@ class GenerateTemplate: self.delete_xml_node() # delete xml ddes info self.delete_xml_ddes() + # delete xml mysql info + self.delete_xml_mysql() # delete xml cm info self.delete_xml_cm() # update xml all info self.update_xml_all_info() + # delete xml share_fs info + self.delete_xml_share_fs() # generate a new xml file self.generate_new_xml_file() # display xml info diff --git a/script/domain_utils/cluster_file/package_info.py b/script/domain_utils/cluster_file/package_info.py index 90ef7ba1..1b952ab2 100644 --- a/script/domain_utils/cluster_file/package_info.py +++ b/script/domain_utils/cluster_file/package_info.py @@ -65,6 +65,15 @@ class PackageInfo(object): """ return PackageInfo.getPackageFile(CommConstants.PKG_SERVER) + @staticmethod + def get_connector_package_file_path(): + """ + function : Get the path of bin file version. + input : NA + output : str + """ + return PackageInfo.getPackageFile(CommConstants.PKG_CONNECTOR) + @staticmethod def getFileSHA256Info(): """ @@ -158,6 +167,25 @@ class PackageInfo(object): integrity_file_name = PackageInfo.getSHA256FilePath() cm_package = server_file_name.replace("Server", "CM").replace("tar.bz2", 'tar.gz') om_package = server_file_name.replace("Server", "OM").replace("tar.bz2", 'tar.gz') + connector_package = server_file_name.replace("Server", "Connector").replace("tar.bz2", "tar.gz") + + if "x86_64" in server_file_name: + cpu_arch = "x86_64" + elif "aarch64" in server_file_name: + cpu_arch = "aarch64" + else: + raise Exception(f"Unsupported CPU arch in server package: {server_file_name}") + + # 查找 Mysql_server*.tgz, 后续需要优化,enable_mysql再拷贝分发 + mysql_server_pkg = "" + for fname in os.listdir(package_path): + if fname.startswith("Mysql_server") and fname.endswith(".tgz"): + if cpu_arch in fname: + mysql_server_pkg = fname + break + else: + raise Exception(f"Mysql_server package CPU arch mismatch: {fname} " + f"(expected {cpu_arch})") tar_lists = SingleInstDiff.get_package_tar_lists(is_single_inst, os.path.normpath(package_path)) @@ -177,6 +205,10 @@ class PackageInfo(object): # add CM package to bak package if os.path.isfile(os.path.realpath(os.path.join(package_path, cm_package))): cmd += "%s " % os.path.basename(cm_package) + if os.path.isfile(os.path.realpath(os.path.join(package_path, connector_package))): + cmd += "%s " % os.path.basename(connector_package) + if mysql_server_pkg and os.path.isfile(os.path.join(package_path, mysql_server_pkg)): + cmd += f"{mysql_server_pkg} " cmd += "&& %s " % CmdUtil.getChmodCmd( str(ConstantsBase.KEY_FILE_MODE), PackageInfo.get_package_back_name()) diff --git a/script/gspylib/common/ClusterParams.py b/script/gspylib/common/ClusterParams.py index e78f2680..1f31dc9a 100644 --- a/script/gspylib/common/ClusterParams.py +++ b/script/gspylib/common/ClusterParams.py @@ -58,6 +58,8 @@ class ClusterParams: PASSWORD = 'password' CLUSTER_TYPE = 'clusterType' SHARE_FS = 'share_fs' + ENABLE_MYSQL = 'enable_mysql' + MYSQL_METADATA_IN_CANTIAN = 'mysql_metadata_in_cantian' @staticmethod def get_all_param_names(): @@ -91,7 +93,9 @@ class ClusterParams: ClusterParams.UWAL_DEVICES_PATH, ClusterParams.PASSWORD, ClusterParams.CLUSTER_TYPE, - ClusterParams.SHARE_FS + ClusterParams.SHARE_FS, + ClusterParams.ENABLE_MYSQL, + ClusterParams.MYSQL_METADATA_IN_CANTIAN ] FLOAT_IP_PATTERN = re.compile(r'\bfloatIp[0-9]+') diff --git a/script/gspylib/common/Common.py b/script/gspylib/common/Common.py index f94e0b6d..c28de63a 100644 --- a/script/gspylib/common/Common.py +++ b/script/gspylib/common/Common.py @@ -177,6 +177,7 @@ class DefaultValue(): ########################### # start timeout value TIMEOUT_CLUSTER_START = 300 + TIMEOUT_INSTANCE_START = 120 # stop timeout value TIMEOUT_CLUSTER_STOP = 300 # query timeout value @@ -1184,8 +1185,7 @@ class DefaultValue(): envList.extend([ "export CM_CONFIG_PATH=$PGDATA/cm_config.ini", "export CTDB_HOME=$PGDATA", - "export GSDB_HOME=$PGDATA", - "export share_fs=127.0.0.1:share_fs" + "export GSDB_HOME=$PGDATA" ]) if "DSS_HOME" in os.environ and os.environ["DSS_HOME"].strip(): diff --git a/script/gspylib/common/DbClusterInfo.py b/script/gspylib/common/DbClusterInfo.py index 54ca2e2f..9d310a5f 100644 --- a/script/gspylib/common/DbClusterInfo.py +++ b/script/gspylib/common/DbClusterInfo.py @@ -38,6 +38,9 @@ from domain_utils.cluster_file.cluster_config_file import ClusterConfigFile from base_utils.os.file_util import FileUtil from domain_utils.cluster_file.version_info import VersionInfo from domain_utils.domain_common.cluster_constants import ClusterConstants +from domain_utils.cluster_file.profile_file import ProfileFile +from domain_utils.cluster_file.cluster_dir import ClusterDir +from domain_utils.cluster_os.cluster_user import UserUtil from base_utils.common.constantsbase import ConstantsBase from base_utils.os.env_util import EnvUtil from base_utils.os.net_util import NetUtil @@ -1016,6 +1019,9 @@ class dbClusterInfo(): self.uwal_rpc_flowcontrol_switch = "" self.uwal_rpc_flowcontrol_value = "" self.uwal_async_append_switch = "" + # add for mysql + self.enable_mysql = "" + self.mysql_metadata_in_cantian = "" def __str__(self): """ @@ -3077,82 +3083,131 @@ class dbClusterInfo(): global g_networkType self.clusterType = CLUSTER_TYPE_SINGLE_INST - # Read cluster name - self.name = self.__read_and_check_config_item(xmlRootNode, "clusterName", "cluster") - # Read application install path - self.appPath = self.__read_and_check_config_item(xmlRootNode, "gaussdbAppPath", "cluster") - # Read application log path - self.logPath = self.__read_and_check_config_item(xmlRootNode, "gaussdbLogPath", - "cluster", error_ignore=True) - if not self.logPath: - self.logPath = "/var/log/gaussdb" + # 基本路径 + self.name = self.__read_and_check_config_item(xmlRootNode, + "clusterName", "cluster") + self.appPath = self.__read_and_check_config_item(xmlRootNode, + "gaussdbAppPath", + "cluster") + + self.logPath = self.__read_and_check_config_item(xmlRootNode, + "gaussdbLogPath", + "cluster", + error_ignore=True) or "/var/log/gaussdb" if not os.path.isabs(self.logPath): - raise Exception(ErrorCode.GAUSS_502["GAUSS_50213"] % \ - ("%s log path(%s)" % ( - VersionInfo.PRODUCT_NAME, self.logPath))) + raise Exception(ErrorCode.GAUSS_502["GAUSS_50213"] % + ("%s log path(%s)" % + (VersionInfo.PRODUCT_NAME, self.logPath))) + + self._read_enable_dss(xmlRootNode) + self._read_enable_dcf(xmlRootNode) + g_networkType = self._read_network_type(xmlRootNode) + self._read_enable_uwal(xmlRootNode) + + if "HOST_IP" in os.environ: + self.corePath = self.__read_and_check_config_item(xmlRootNode, + "corePath", + "cluster", + True) + + self._read_enable_mysql_settings(xmlRootNode) + + def _read_enable_dss(self, xmlRootNode): + _, self.enable_dss = ClusterConfigFile.readOneClusterConfigItem( + xmlRootNode, "enable_dss", "cluster") - _, self.enable_dss = ClusterConfigFile.readOneClusterConfigItem(xmlRootNode, - "enable_dss", - "cluster") if self.enable_dss.strip() == "on": self.enable_dss = self.enable_dss.strip() self.init_dss_config(xml_entiy=xmlRootNode) elif self.enable_dss.strip() not in ['off', '']: raise Exception(ErrorCode.GAUSS_500["GAUSS_50011"] % - ('enable_dss', self.enable_dss)) + ('enable_dss', self.enable_dss)) + + def _read_enable_dcf(self, xmlRootNode): + _, self.enable_dcf = ClusterConfigFile.readOneClusterConfigItem( + xmlRootNode, "enable_dcf", "cluster") - # Read enable_dcf - ret_status, self.enable_dcf = ClusterConfigFile.readOneClusterConfigItem(xmlRootNode, - "enable_dcf", - "cluster") if self.enable_dcf not in ['', 'on', 'off']: raise Exception(ErrorCode.GAUSS_500["GAUSS_50011"] % - ('enable_dcf', self.enable_dcf)) + ('enable_dcf', self.enable_dcf)) if self.enable_dcf == 'on' and self.enable_dss == 'on': raise Exception('Only one DSS or DCF can be enabled.') if self.enable_dcf == 'on': - (ret_status, ret_value) = ClusterConfigFile.readOneClusterConfigItem( + status, cfg = ClusterConfigFile.readOneClusterConfigItem( xmlRootNode, "dcf_config", "CLUSTER") - if ret_status == 0: - self.dcf_config = ret_value.strip() - if self.dcf_config.count('role') - self.dcf_config.count('PASSIVE') < 3: + if status == 0: + self.dcf_config = cfg.strip() + if self.dcf_config.count('role') - \ + self.dcf_config.count('PASSIVE') < 3: raise Exception(ErrorCode.GAUSS_500["GAUSS_50011"] % ('dcf_config', self.dcf_config)) else: raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % - 'dcf_config' + " Error: \n%s" % ret_value) + 'dcf_config' + " Error: \n%s" % cfg) - # Read network type - (retStatus, retValue) = ClusterConfigFile.readOneClusterConfigItem( + def _read_network_type(self, xmlRootNode) -> int: + status, val = ClusterConfigFile.readOneClusterConfigItem( xmlRootNode, "networkType", "cluster") - if retStatus == 0: - if retValue.isdigit() and int(retValue) in [0, 1]: - g_networkType = int(retValue) - else: - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ - "cluster network type" + " Error: \nThe parameter value must be 0 or 1.") - elif retStatus == 2: - g_networkType = 0 - else: - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ - "cluster network type" + " Error: \n%s" % retValue) - - # Read enable_uwal + if status == 0: + if val.isdigit() and int(val) in (0, 1): + return int(val) + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % + "cluster network type" + + " Error: \nThe parameter value must be 0 or 1.") + if status == 2: + return 0 + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % + "cluster network type" + " Error: \n%s" % val) + + def _read_enable_uwal(self, xmlRootNode): _, self.enable_uwal = ClusterConfigFile.readOneClusterConfigItem( xmlRootNode, "enable_uwal", "cluster") if self.enable_uwal not in ['', 'on', 'off']: raise Exception(ErrorCode.GAUSS_500["GAUSS_50011"] % - ('enable_uwal', self.enable_uwal)) + ('enable_uwal', self.enable_uwal)) if self.enable_uwal == 'on': self.init_uwal_config(xml_entiy=xmlRootNode) - if "HOST_IP" in os.environ.keys(): - self.corePath = self.__read_and_check_config_item(xmlRootNode, "corePath", - "cluster", True) + def _read_enable_mysql_settings(self, xmlRootNode): + # enable_mysql + status, val = ClusterConfigFile.readOneClusterConfigItem( + xmlRootNode, "enable_mysql", "cluster") + if status == 0: + val = val.strip().lower() + if val in ('on', 'off', ''): + self.enable_mysql = val + else: + raise Exception(ErrorCode.GAUSS_500["GAUSS_50011"] % + ("enable_mysql", val)) + elif status != 2: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % + "enable_mysql" + " Error: \n%s" % val) + + # mysql_metadata_in_cantian + if self.enable_mysql == 'on': + status, meta = ClusterConfigFile.readOneClusterConfigItem( + xmlRootNode, "mysql_metadata_in_cantian", "cluster") + if status == 0: + meta = meta.strip().lower() + if meta in ('on', 'off', ''): + self.mysql_metadata_in_cantian = meta or 'on' + else: + raise Exception(ErrorCode.GAUSS_500["GAUSS_50011"] % + ("mysql_metadata_in_cantian", meta)) + elif status == 2: + # 未配置时默认启用 + self.mysql_metadata_in_cantian = 'on' + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % + "mysql_metadata_in_cantian" + + " Error: \n%s" % meta) + else: + # 未开启 MySQL 时该参数无意义 + self.mysql_metadata_in_cantian = "" def get_cluster_back_ip1s(self): # Read cluster backIp1s diff --git a/script/gspylib/common/LocalBaseOM.py b/script/gspylib/common/LocalBaseOM.py index 71f1a546..1ec3c907 100644 --- a/script/gspylib/common/LocalBaseOM.py +++ b/script/gspylib/common/LocalBaseOM.py @@ -26,6 +26,7 @@ from gspylib.common.ErrorCode import ErrorCode from gspylib.component.CM.CM_OLAP.CM_OLAP import CM_OLAP from gspylib.component.DSS.dss_comp import Dss from gspylib.component.Kernel.DN_OLAP.DN_OLAP import DN_OLAP +from gspylib.component.MySql.mysql_comp import Mysql from domain_utils.cluster_file.version_info import VersionInfo from base_utils.os.net_util import NetUtil from base_utils.os.user_util import UserUtil @@ -77,6 +78,7 @@ class LocalBaseOM(object): self.dnCons = [] self.gtsCons = [] self.dss_cons = [] + self.mysqlCons = [] self.paxos_mode = paxos_mode self.dss_mode = dss_mode self.dss_config = dss_config @@ -91,6 +93,7 @@ class LocalBaseOM(object): self.initCmComponent() self.initKernelComponent(paxos_mode) self.init_dss_component(self.dss_mode) + self.initMysqlComponent() def init_dss_component(self, dss_mode=False): @@ -158,6 +161,18 @@ class LocalBaseOM(object): component.dorado_cluster_mode = self.dorado_cluster_mode self.dnCons.append(component) + def initMysqlComponent(self): + enable_flag = os.getenv("ENABLE_MYSQL") + if str(enable_flag).strip().lower() not in ("on", "true", "yes", "1"): + self.logger.debug("[MySQL] ENABLE_MYSQL is not set to on, skip mysql initialization") + return + + comp = Mysql() + comp.logger = self.logger + comp.user = self.user + comp.mysqlRoot = os.path.join(os.path.dirname(self.clusterInfo.appPath), "mysql") + self.mysqlCons.append(comp) + def readConfigInfo(self): """ function: Read config from static config file diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index 4f6328ac..249675d0 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -18,6 +18,7 @@ import sys import os import re +import time sys.path.append(sys.path[0] + "/../../../../") from gspylib.common.ErrorCode import ErrorCode @@ -31,6 +32,7 @@ from base_utils.os.compress_util import CompressUtil from base_utils.os.env_util import EnvUtil from base_utils.os.file_util import FileUtil from domain_utils.cluster_file.cluster_config_file import ClusterConfigFile +from domain_utils.cluster_file.profile_file import ProfileFile from domain_utils.cluster_os.cluster_user import ClusterUser from base_utils.os.grep_util import GrepUtil from base_utils.os.user_util import UserUtil @@ -168,30 +170,52 @@ class DN_OLAP(Kernel): def create_database(self, gauss_home: str): """ - 1. 复制 GAUSSHOME/admin → CTDB_HOME/admin + 创建 Cantian 数据库 + + 步骤 + ---- + 0. 使用 `ctsql -c 'exit'` 先验证连接 + 1. 复制 GAUSSHOME/admin → CTDB_HOME/admin 2. 确保 CTDB_HOME/data 目录存在且为空:如有内容先清空 - 3. 渲染 create_database.sql.template → CTDB_HOME/data/create_database.sql + 3. 渲染 create_database.sql.template → CTDB_HOME/create_database.sql 4. 执行建库脚本 """ ctdb_home = os.getenv("CTDB_HOME") if not ctdb_home: raise Exception("CTDB_HOME is not defined.") - # 准备创库sql + test_cmd = ( + f"ctsql '/' as sysdba -q " + f"-D {self.instInfo.datadir} " + f"-c 'exit'" + ) + self.logger.debug(f"[CreateDB] test connection: {test_cmd}") + rc, out = CmdUtil.retryGetstatusoutput(test_cmd) + if rc != 0: + raise Exception("Cannot connect to Cantian, abort create_database():\n" + out) + src_admin = os.path.join(gauss_home, "admin") dst_admin = os.path.join(ctdb_home, "admin") FileUtil.cpFile(src_admin, dst_admin, cmd_type="shell") FileUtil.changeMode(DefaultValue.KEY_DIRECTORY_MODE, dst_admin, recursive=True) + data_dir = os.path.join(ctdb_home, "data") + if os.path.isdir(data_dir): + FileUtil.cleanDirectoryContent(data_dir) + else: + FileUtil.createDirectory(data_dir, DefaultValue.KEY_DIRECTORY_MODE) + tpl_sql = os.path.join(gauss_home, "share", "cantian", - "create_database.sql.template") + "create_database.sql.template") work_sql = os.path.join(ctdb_home, "create_database.sql") self.copy_and_render_file(tpl_sql, work_sql) - cmd = (f"ctsql '/' as sysdba -q " - f"-D {self.instInfo.datadir} -f {work_sql}") - self.logger.debug(cmd) - st, out = CmdUtil.retryGetstatusoutput(cmd) + run_sql = ( + f"ctsql '/' as sysdba -q " + f"-D {self.instInfo.datadir} -f {work_sql}" + ) + self.logger.debug(f"[CreateDB] create cmd: {run_sql}") + st, out = CmdUtil.retryGetstatusoutput(run_sql, retry_time=0) if st != 0: raise Exception("create database failed:\n" + out) @@ -199,7 +223,7 @@ class DN_OLAP(Kernel): def prepare_data_dir(self) -> None: """ - 1. 从环境变量 share_fs / SHARE_FS 获取 NFS,如 10.1.1.1:/share_dir + 1. 从环境变量 SHARE_FS 获取 NFS,格式形如 10.1.1.1:share_dir 2. 主节点:sudo 挂 root → 创建/清空 cantianData → 卸 root 3. 所有节点:sudo 挂 cantianData → chown 给业务用户 """ @@ -210,21 +234,20 @@ class DN_OLAP(Kernel): FileUtil.createDirectory(data_dir, DefaultValue.KEY_DIRECTORY_MODE) self.logger.debug(f"[prepare_data_dir] data_dir = {data_dir}") - # ---------- 1. 读取 share_fs ---------- - share_fs_val = os.getenv("share_fs") or os.getenv("SHARE_FS") or "" - self.logger.debug(f"[prepare_data_dir] env share_fs = {share_fs_val or ''}") + share_fs_val = os.getenv("SHARE_FS", "") + self.logger.debug(f"[prepare_data_dir] env SHARE_FS = {share_fs_val or ''}") if not share_fs_val: - self.logger.debug("[prepare_data_dir] share_fs not set; skip.") + self.logger.debug("[prepare_data_dir] SHARE_FS not set; skip.") return if ":" not in share_fs_val: - raise ValueError(f"share_fs invalid: {share_fs_val!r} (expect :)") + raise ValueError(f"SHARE_FS invalid: {share_fs_val!r} (expect :)") host_ip, fs_dir = share_fs_val.split(":", 1) if host_ip in ("127.0.0.1", "localhost"): - self.logger.debug("[prepare_data_dir] share_fs points to localhost; skip.") + self.logger.debug("[prepare_data_dir] SHARE_FS points to localhost; skip.") return - nfs_root = f"{host_ip}:/{fs_dir}" + nfs_root = f"{host_ip}:/{fs_dir}" nfs_subdir = f"{host_ip}:/{fs_dir}/cantianData" self.logger.debug(f"[prepare_data_dir] nfs_root = {nfs_root}") self.logger.debug(f"[prepare_data_dir] nfs_subdir = {nfs_subdir}") @@ -233,7 +256,7 @@ class DN_OLAP(Kernel): mount_opt = "-o rw,soft,timeo=600,retrans=2" if self.is_primary_node(): - # 3. 挂 root + # 2. 挂 root cmd = f"sudo -n mount -t nfs {mount_opt} {nfs_root} {data_dir}" self.logger.debug(f"[prepare_data_dir] CMD = {cmd}") st, out = CmdUtil.getstatusoutput_by_fast_popen(cmd) @@ -242,7 +265,7 @@ class DN_OLAP(Kernel): CmdUtil.getstatusoutput_by_fast_popen(f"sudo -n chown {uid}:{gid} {data_dir}") - # 4. 创建 / 清空 cantianData + # 3. 创建 / 清空 cantianData cantian_dir = os.path.join(data_dir, "cantianData") if os.path.exists(cantian_dir): FileUtil.cleanDirectoryContent(cantian_dir) @@ -254,10 +277,10 @@ class DN_OLAP(Kernel): CmdUtil.getstatusoutput_by_fast_popen(f"sudo -n umount -l {data_dir}") raise RuntimeError(f"create cantianData failed:\n{out}") - # 5. 卸 root + # 4. 卸 root CmdUtil.getstatusoutput_by_fast_popen(f"sudo -n umount -l {data_dir}") - # ---------- 6. 所有节点挂 cantianData ---------- + # ---------- 所有节点挂 cantianData ---------- cmd = f"sudo -n mount -t nfs {mount_opt} {nfs_subdir} {data_dir}" self.logger.debug(f"[prepare_data_dir] CMD = {cmd}") st, out = CmdUtil.getstatusoutput_by_fast_popen(cmd) @@ -275,6 +298,13 @@ class DN_OLAP(Kernel): gauss_home = EnvUtil.getEnv("GAUSSHOME") pgdata = self.instInfo.datadir + # -------- Admin 目录 -------------------------------------- + src_admin = os.path.join(gauss_home, "admin") + dst_admin = os.path.join(pgdata, "admin") + FileUtil.cpFile(src_admin, dst_admin, cmd_type="shell") + FileUtil.changeMode(DefaultValue.KEY_DIRECTORY_MODE, + dst_admin, recursive=True) + # -------- Cantian 配置 ----------------------------------------- cantian_ini_tpl = os.path.join(gauss_home, "share", "cantian", "cantiand.ini.sample") @@ -347,19 +377,14 @@ class DN_OLAP(Kernel): self.create_database(gauss_home) pgrep_cmd = r"pgrep -f 'cantiand.*nomount'" - status, _ = CmdUtil.retryGetstatusoutput(pgrep_cmd) - - if status != 0: + if CmdUtil.retryGetstatusoutput(pgrep_cmd)[0] != 0: self.logger.debug("cantiand is not running, skip shutdown.") - return - - shutdown_cmd = f"ctsql '/' as sysdba -q -D {pgdata} -c 'shutdown'" - st, out = CmdUtil.retryGetstatusoutput(shutdown_cmd) - - if st != 0: - raise Exception(f"Cantiand shutdown failed:\n{out}") - - self.logger.debug("Cantiand shutdown successfully") + else: + shutdown_cmd = f"ctsql '/' as sysdba -q -D {pgdata} -c 'shutdown'" + st, out = CmdUtil.retryGetstatusoutput(shutdown_cmd) + if st != 0: + raise Exception(f"Cantiand shutdown failed:\n{out}") + self.logger.debug("Cantiand shutdown successfully.") @Dss.catch_err(exist_so=True) def initInstance(self): @@ -548,13 +573,25 @@ class DN_OLAP(Kernel): def getCantianDict(self) -> dict: """ - 写入 cantiand.ini 的 3 个字段 + 写入 cantiand.ini 的参数 """ - return { + meta_flag = "TRUE" + gid = os.getgid() + + user = UserUtil.getUserInfo()["name"] + cluster = dbClusterInfo() + cluster.initFromStaticConfig(user) + if getattr(cluster, "mysql_metadata_in_cantian", "on").lower() == "off": + meta_flag = "FALSE" + + cantian_dict = { "LSNR_ADDR": self.instInfo.listenIps[0], "LSNR_PORT": str(self.instInfo.port), "SS_INSTANCE_ID": self.calc_ss_instance_id(), + "MYSQL_METADATA_IN_CANTIAN": meta_flag, + "MYSQL_DEPLOY_GROUP_ID": str(gid), } + return cantian_dict def get_ss_inter_url(self) -> str: """ diff --git a/script/gspylib/component/Kernel/Kernel.py b/script/gspylib/component/Kernel/Kernel.py index 1878d42f..88443a5d 100644 --- a/script/gspylib/component/Kernel/Kernel.py +++ b/script/gspylib/component/Kernel/Kernel.py @@ -34,6 +34,8 @@ from base_utils.os.file_util import FileUtil from base_utils.security.security_checker import SecurityChecker from domain_utils.cluster_os.cluster_user import ClusterUser from base_utils.os.net_util import NetUtil +from base_utils.os.user_util import UserUtil +from gspylib.common.DbClusterInfo import dbClusterInfo MAX_PARA_NUMBER = 1000 TIMR_OUT = 60 @@ -69,37 +71,47 @@ class Kernel(BaseComponent): start/stop/query single instance """ - def start(self, is_dss_mode: bool = False, time_out=DefaultValue.TIMEOUT_CLUSTER_START): + def start(self, is_dss_mode: bool = False, + time_out: int = DefaultValue.TIMEOUT_INSTANCE_START): """ 启动 Cantian 实例 - • 普通模式:后台执行 `cantiand open -D ` - • DSS 模式目前跳过 + • 普通模式:后台执行 setsid cantiand open -D + • DSS 模式暂未实现 """ - # dss 需要修改 + # 临时适配,后面需要调整 + self.adjust_cm_bitmap() + # if is_dss_mode: - # self.logger.debug("DSS mode start skipped (not yet supported).") + # self.logger.debug("DSS mode start skipped.") # return data_dir = self.instInfo.datadir + ct_exec_dir = os.path.join(data_dir, "data") + + if not os.path.isdir(ct_exec_dir): + FileUtil.createDirectory(ct_exec_dir, DefaultValue.KEY_DIRECTORY_MODE) + + start_cmd = ( + f"cd {ct_exec_dir} && " + f"nohup sh -c 'cantiand open -D {data_dir} >/dev/null 2>&1 &' " + f">/dev/null 2>&1" + ) + + self.logger.debug(f"[Kernel] start cmd = {start_cmd}") + CmdUtil.retryGetstatusoutput(start_cmd, retry_time=0) + + if not Kernel.isDbOpen( + pgdata=data_dir, + logger=self.logger, + timeout=time_out, + interval=5): + raise Exception( + ErrorCode.GAUSS_516["GAUSS_51607"] + % "Cantian instance" + + " Error: timeout waiting for DB OPEN." + ) - cmd = f"nohup cantiand open -D {data_dir} > /dev/null 2>&1 &" - self.logger.debug(f"start cmd = {cmd}") - status, out = subprocess.getstatusoutput(cmd) - if status != 0: - raise Exception(ErrorCode.GAUSS_516["GAUSS_51607"] - % "Cantian instance" + "\nError: " + out) - - for _ in range(time_out): - st, _ = subprocess.getstatusoutput( - f"pgrep -f \"cantiand[^ ]* open -D {data_dir}\"") - if st == 0: - self.logger.log("Cantian instance start success.") - return - time.sleep(1) - - raise Exception(ErrorCode.GAUSS_516["GAUSS_51607"] - % "Cantian instance" - + " Error: timeout waiting for process ready.") + self.logger.log("Cantian instance start success.") def stop(self, stopMode: str = "", time_out: int = 300): """ @@ -154,53 +166,84 @@ class Kernel(BaseComponent): "Cantian instance" + "\nError: process still alive.") self.logger.log("Cantian instance stopped (force).") - def isDbOpen(self, timeout: int = 30, interval: int = 5) -> bool: + + def adjust_cm_bitmap(self) -> None: + """ + 双节点:把本节点 cm_config.ini 的 BITMAP_ONLINE 设为 3。 + """ + user = UserUtil.getUserInfo()["name"] + cluster = dbClusterInfo() + cluster.initFromStaticConfig(user) + + if len(cluster.dbNodes) != 2: + self.logger.debug("[adjust_bitmap_online] node_num!=2, skip.") + return + + cm_ini = os.getenv("CM_CONFIG_PATH", "").strip() + + if not cm_ini: + cydb_home = os.getenv("CTDB_HOME", "").strip() + if cydb_home: + cm_ini = os.path.join(cydb_home, "cm_config.ini") + + if not cm_ini: + local_ips = set(NetUtil.getAllIps()) + for dbNode in cluster.dbNodes: + if any(ip in local_ips for ip in dbNode.backIps): + cm_ini = os.path.join(dbNode.datanodes[0].datadir, + "cm_config.ini") + break + + if not cm_ini: + raise Exception("Cannot resolve local cm_config.ini path.") + + self.logger.debug(f"[adjust_bitmap_online] target cm_ini: {cm_ini}") + + self.updateConfig(cm_ini, {"BITMAP_ONLINE": "3"}) + self.logger.debug("BITMAP_ONLINE set to 3.") + + @staticmethod + def isDbOpen(pgdata: str, + logger, + timeout: int = DefaultValue.TIMEOUT_INSTANCE_START, + interval: int = 5) -> bool: """ Cantian 健康探针(视图版) 轮询 DV_DATABASE,若 STATUS 列为 OPEN 即返回 True。 """ - pgdata = self.instInfo.datadir sql_cmd = "SELECT NAME, STATUS, OPEN_STATUS FROM DV_DATABASE" ctsql = f"ctsql '/' as sysdba -q -D {pgdata} -c \"{sql_cmd}\"" + deadline = time.time() + timeout - expire_ts = time.time() + timeout - - while time.time() < expire_ts: + while time.time() < deadline: stat, out = CmdUtil.retryGetstatusoutput(ctsql, retry_time=0) if stat != 0: - self.logger.debug(f"ctsql failed: {out.strip()}") + logger.debug(f"[isDbOpen] ctsql failed: {out.strip()}") time.sleep(interval) continue - data_line = None - got_sep = False + data_line, sep_seen = None, False for ln in out.splitlines(): ln = ln.strip() if not ln or ln.startswith(("Please enter", "connected.", "SQL>")): continue - if re.match(r"^NAME\s+", ln, re.I): - continue if re.match(r"^-{5,}", ln): - got_sep = True + sep_seen = True continue - if got_sep: + if sep_seen: data_line = ln break - if not data_line: - time.sleep(interval) - continue - - cols = re.split(r"\s+", data_line) - db_status = cols[1].upper() if len(cols) >= 2 else "" - self.logger.debug(f"ctsql db status: {db_status}") - - if db_status == "OPEN": - return True + if data_line: + cols = re.split(r"\s+", data_line) + status = cols[1].upper() if len(cols) >= 2 else "" + logger.debug(f"[isDbOpen] STATUS = {status}") + if status == "OPEN": + return True time.sleep(interval) - self.logger.debug("Cantiand start timeout") + logger.debug("[isDbOpen] timeout waiting Cantian OPEN") return False def build(self, buidMode="full", standByBuildTimeout=300): @@ -315,68 +358,75 @@ class Kernel(BaseComponent): def cleanDir(self, instDir): """ - 删除实例目录(Cantian + openGauss + DSS),(若为挂载点先 umount 再清空) - * DSS *.ini 的挂载点由上层 DssConfig 统一解析处理 + 删除实例目录(umount + remove) """ if not os.path.exists(instDir): return - dataDir = os.listdir(instDir) + try: + mounts = [] + with open("/proc/self/mounts") as fp: + for line in fp: + mp = line.split()[1] + if mp == instDir or mp.startswith(instDir + "/"): + mounts.append(mp) + + mounts.sort(key=len, reverse=True) + + for mp in mounts: + FileUtil.umount_if_mounted(mp, self.logger) + except Exception as exc: + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50207"] % instDir + + f" Error during umount: {exc}" + ) if os.getuid() == 0: pglDir = os.path.join(instDir, "pg_location") - isPglDirEmpty = os.path.isdir(pglDir) and len(os.listdir(pglDir)) == 0 - if not dataDir or isPglDirEmpty: + isPglDirEmpty = os.path.isdir(pglDir) and not os.listdir(pglDir) + if not os.listdir(instDir) or isPglDirEmpty: FileUtil.cleanDirectoryContent(instDir) return - if "pg_location" in dataDir: + if "pg_location" in os.listdir(instDir): pglDir = os.path.join(instDir, "pg_location") - cmd = "{} | {} '{}' | {} '{{printf $3}}'".format( - CmdUtil.getMountCmd(), CmdUtil.getGrepCmd(), - pglDir, CmdUtil.getAwkCmd() - ) - status, mp_out = subprocess.getstatusoutput(cmd) - if status != 0: - raise Exception( - ErrorCode.GAUSS_502["GAUSS_50207"] % instDir + - f" Error:\n{mp_out}.\nThe cmd is {cmd}" - ) - - # 先卸载挂载点,再清空内容 - for mp in mp_out.split(): - if mp: - subprocess.call(f"umount -l {mp}", shell=True) - FileUtil.cleanDirectoryContent(mp) - FileUtil.cleanDirectoryContent(pglDir) - # ---------- 删除非白名单条目 ---------- ignores = [ "pg_location", "cfg", "log", "dss_inst.ini", "dss_vg_conf.ini", "nodedata.cfg", ".", ".." ] extra_cmd = "! -name".join([f" '{ig}' " for ig in ignores]) - cmd = ( f"if [ -d '{instDir}' ]; then cd '{instDir}' && " f"find . ! -name {extra_cmd} -print0 | " "xargs -r -0 -n100 rm -rf; fi" ) - status, output = subprocess.getstatusoutput(cmd) - if status != 0: + st, out = subprocess.getstatusoutput(cmd) + if st != 0: raise Exception( ErrorCode.GAUSS_502["GAUSS_50207"] % instDir + - f" Error:\n{output}.\nThe cmd is {cmd}" + f" Error:\n{out}.\nThe cmd is {cmd}" ) if os.listdir(instDir): FileUtil.cleanDirectoryContent(instDir) if os.listdir(instDir): raise Exception( - ErrorCode.GAUSS_502["GAUSS_50207"] % - instDir + " still not empty after cleanDir()." + ErrorCode.GAUSS_502["GAUSS_50207"] % instDir + + " still not empty after cleanDir()." + ) + + mysql_dir = os.path.abspath(os.path.join(instDir, "..", "mysql")) + self.logger.debug(f"[Cantian] cleanDir: {mysql_dir} is not empty.") + if os.path.isdir(mysql_dir): + try: + FileUtil.cleanDirectoryContent(mysql_dir) + except Exception as exc: + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50207"] % mysql_dir + + f" Error while cleaning mysql dir: {exc}" ) self.logger.debug(f"[Cantian] cleanDir finished: {instDir} is empty.") diff --git a/script/gspylib/component/MySql/mysql_comp.py b/script/gspylib/component/MySql/mysql_comp.py new file mode 100644 index 00000000..d490f215 --- /dev/null +++ b/script/gspylib/component/MySql/mysql_comp.py @@ -0,0 +1,217 @@ +# -*- coding:utf-8 -*- +import os +import time +import re +from pathlib import Path + +from base_utils.os.env_util import EnvUtil +from base_utils.os.cmd_util import CmdUtil +from base_utils.os.file_util import FileUtil +from gspylib.component.BaseComponent import BaseComponent +from gspylib.component.Kernel.Kernel import Kernel +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.DbClusterInfo import dbClusterInfo +from domain_utils.cluster_os.cluster_user import UserUtil +from gspylib.common.Common import DefaultValue + + +class Mysql(BaseComponent): + @staticmethod + def _gauss_home() -> Path: + gh = EnvUtil.getEnv("GAUSSHOME") + if not gh: + raise EnvironmentError("GAUSSHOME is not set.") + return Path(gh).resolve() + + @staticmethod + def _mysql_home() -> Path: + mh = EnvUtil.getEnv("MYSQL_HOME") + if not mh or not Path(mh).is_dir(): + raise EnvironmentError("MYSQL_HOME is invalid.") + return Path(mh).resolve() + + @staticmethod + def _is_primary_node() -> bool: + user = UserUtil.getUserInfo()["name"] + cluster = dbClusterInfo() + cluster.initFromStaticConfig(user) + if not cluster.dbNodes: + return True + first_node = cluster.dbNodes[0] + local_host = os.uname().nodename + return local_host in first_node.backIps or local_host == first_node.name + + @staticmethod + def _cantian_datadir() -> str: + user = UserUtil.getUserInfo()["name"] + cluster = dbClusterInfo() + cluster.initFromStaticConfig(user) + if not cluster.dbNodes or not cluster.dbNodes[0].datanodes: + raise Exception("cluster_static.conf missing dbNode or datanode.") + return cluster.dbNodes[0].datanodes[0].datadir + + def initInstance(self): + enable_flag = os.getenv("ENABLE_MYSQL", "").strip().lower() + if enable_flag not in ("on", "true", "yes", "1"): + self.logger.debug("[MySQL] ENABLE_MYSQL disabled, skip start()") + return + + if not self._is_primary_node(): + self.logger.log("[MySQL] Not primary node, skip initInstance.") + return + + try: + gauss_home = self._gauss_home() + mysql_home = self._mysql_home() + data_dir = mysql_home / "data" + plugin_dir = gauss_home / "lib" + cnf_file = gauss_home / "share" / "mysql" / "my.cnf" + bin_mysqld = mysql_home / "bin" / "mysqld" + + ct_data_dir = self._cantian_datadir() + ct_exec_dir = Path(ct_data_dir) / "data" + if not ct_exec_dir.exists(): + FileUtil.createDirectory(str(ct_exec_dir), DefaultValue.KEY_DIRECTORY_MODE) + + ct_cmd = ( + f"cd {ct_exec_dir} && " + f"nohup sh -c 'cantiand open -D {ct_data_dir} >/dev/null 2>&1 &' " + f">/dev/null 2>&1" + ) + self.logger.debug(f"[MySQL] ct_cmd: {ct_cmd}") + CmdUtil.retryGetstatusoutput(ct_cmd, retry_time=0) + + if not Kernel.isDbOpen( + pgdata=ct_data_dir, + logger=self.logger, + timeout=DefaultValue.TIMEOUT_INSTANCE_START, + interval=5): + raise RuntimeError("Cantian open timeout (DV_DATABASE not OPEN)") + + if data_dir.exists(): + FileUtil.removeDirectory(str(data_dir)) + FileUtil.createDirectory(str(data_dir), DefaultValue.KEY_DIRECTORY_MODE) + + # 3. 初始化 MySQL + init_cmd = ( + f"{bin_mysqld} " + f"--defaults-file={cnf_file} --initialize-insecure " + f"--datadir={data_dir} " + f"--plugin-dir={plugin_dir} " + f'--early-plugin-load="ha_ctc.so"' + ) + self.logger.debug(f"[MySQL] init_cmd: {init_cmd}") + ret, _ = CmdUtil.retryGetstatusoutput(init_cmd, retry_time=0) + if ret != 0: + raise RuntimeError(f"mysqld initialize failed, exit code {ret}") + + # 4. 关闭 Cantian + self._exec(f"ctsql '/' as sysdba -q -D {ct_data_dir} -c 'shutdown'", + ignore_error=True) + + except Exception as e: + raise Exception( + ErrorCode.GAUSS_516["GAUSS_51610"] % + f"MySQL.initInstance failed: {e}" + ) + + def _wait_mysql_ready(self, log_file: Path, timeout_sec: int) -> bool: + """ + 轮询日志直到出现 “ready for connections” + """ + check_cmd = f"grep -q 'ready for connections' {log_file}" + retry = max(1, int(timeout_sec) // 3) + rc, _ = CmdUtil.retryGetstatusoutput( + check_cmd, + retry_time=retry, + sleep_time=3 + ) + return rc == 0 + + def start(self, timeout: int = DefaultValue.TIMEOUT_INSTANCE_START): + """ + 启动 MySQL + """ + enable_flag = os.getenv("ENABLE_MYSQL", "").strip().lower() + if enable_flag not in ("on", "true", "yes", "1"): + self.logger.debug("[MySQL] ENABLE_MYSQL disabled, skip start()") + return + + mysql_home = self._mysql_home() + gauss_home = self._gauss_home() + + plugin_dir = gauss_home / "lib" + cnf_file = gauss_home / "share" / "mysql" / "my.cnf" + bin_safe = mysql_home / "bin" / "mysqld" + data_dir = mysql_home / "data" + + if not data_dir.exists(): + FileUtil.createDirectory(str(data_dir), DefaultValue.KEY_DIRECTORY_MODE) + self.logger.debug(f"[MySQL] auto-create data_dir: {data_dir}") + + gausslog_dir = os.getenv("GAUSSLOG", str(mysql_home / "log")) + Path(gausslog_dir).mkdir(parents=True, exist_ok=True) + log_file = Path(gausslog_dir) / "mysql.log" + + start_cmd = ( + f"nohup {bin_safe} " + f"--defaults-file={cnf_file} " + f"--plugin-dir={plugin_dir} " + f"--datadir={data_dir} " + f'--early-plugin-load="ha_ctc.so" ' + f"> {log_file} 2>&1 ha_ctc_share.so + off -> ha_ctc_noshare.so + """ + connector_dir = os.path.join(tmp_dir, "connector") + + dest_lib_dir = os.path.join(self.installPath, "lib") + FileUtil.createDirectory(dest_lib_dir, True, DefaultValue.KEY_DIRECTORY_MODE) + + want_share = getattr(self.clusterInfo, + "mysql_metadata_in_cantian", + "on").lower() == "on" + + so_share, so_noshare = "ha_ctc_share.so", "ha_ctc_noshare.so" + proxy_so = "libctc_proxy.so" + selected_so = so_share if want_share else so_noshare + + required_libs = [proxy_so, selected_so] + missing = [lib for lib in required_libs + if not os.path.isfile(os.path.join(connector_dir, lib))] + if missing: + raise Exception("Missing library file(s) in connector package: " + ", ".join(missing)) + + for lib in required_libs: + src = os.path.join(connector_dir, lib) + dst = os.path.join(dest_lib_dir, lib) + CmdUtil.execCmd(f"cp -pf '{src}' '{dst}'") + + link_path = os.path.join(dest_lib_dir, "ha_ctc.so") + CmdUtil.execCmd(f"ln -sfn '{selected_so}' '{link_path}'") + + src_mysql_share = os.path.join(tmp_dir, "share", "mysql") + if os.path.isdir(src_mysql_share): + dest_share_root = os.path.join(self.installPath, "share") + dest_mysql_dir = os.path.join(dest_share_root, "mysql") + + FileUtil.createDirectory(dest_share_root, True, + DefaultValue.KEY_DIRECTORY_MODE) + + if os.path.isdir(dest_mysql_dir): + FileUtil.removeDirectory(dest_mysql_dir) + + CmdUtil.execCmd(f"cp -r '{src_mysql_share}' '{dest_share_root}'") + + FileUtil.changeOwner(self.user, dest_mysql_dir, True) + self.logger.log(f"Copied connector share/mysql to {dest_mysql_dir}") + else: + self.logger.debug("No share/mysql directory found in connector package; skip.") + + FileUtil.changeOwner(self.user, dest_lib_dir, True) + + self.logger.log(f"MySQL connector libs deployed to {dest_lib_dir} " + f"(copied: {', '.join(required_libs)}, symlink ha_ctc.so -> {selected_so})") + + def decompress_mysql_server(self): + """ + Decompress Mysql_server*x86_64*.tgz / Mysql_server*aarch64*.tgz + """ + if getattr(self.clusterInfo, "enable_mysql", "off").lower() != "on": + self.logger.debug("enable_mysql != on, skip Mysql_server package.") + return + + server_pkg = os.path.basename(PackageInfo.getPackageFile(CommConstants.PKG_SERVER)) + if "x86_64" in server_pkg: + arch_tag = "x86_64" + elif "aarch64" in server_pkg: + arch_tag = "aarch64" + else: + raise Exception(f"Unsupported CPU arch in server package: {server_pkg}") + + gphome = EnvUtil.getEnvironmentParameterValue("GPHOME", self.user) + mysql_pkg = "" + for fname in os.listdir(gphome): + if fname.startswith("Mysql_server") and fname.endswith(".tgz") and arch_tag in fname: + mysql_pkg = os.path.join(gphome, fname) + break + if not mysql_pkg: + raise Exception(f"Mysql_server package ({arch_tag}) not found in {gphome}") + self.logger.debug(f"[Mysql_server] path = {mysql_pkg}") + + install_root = os.path.dirname(self.installPath) + tmp_dir = os.path.join(install_root, "tmp", "mysql_server") + FileUtil.createDirectory(tmp_dir, True, DefaultValue.KEY_DIRECTORY_MODE) + + tar_cmd = f"tar -zxf '{mysql_pkg}' -C '{tmp_dir}'" + self.logger.log(f"Decompressing Mysql_server: {tar_cmd}") + status, output = subprocess.getstatusoutput(tar_cmd) + if status != 0: + raise Exception(f"Failed to decompress Mysql_server.\n{output}") + + src_mysql_dir = os.path.join(tmp_dir, "Mysql_server", "mysql") + if not os.path.isdir(src_mysql_dir): + raise Exception(f"Mysql_server package format error: missing {src_mysql_dir}") + + dest_mysql_dir = os.path.join(install_root, "mysql") + if os.path.isdir(dest_mysql_dir): + FileUtil.removeDirectory(dest_mysql_dir) + + FileUtil.cpFile(src_mysql_dir, dest_mysql_dir) + FileUtil.changeOwner(self.user, dest_mysql_dir, True) + + FileUtil.removeDirectory(tmp_dir) + self.logger.log(f"Deployed Mysql_server to {dest_mysql_dir}") + def generate_dss_path(self): """ Generate dss path @@ -448,7 +598,6 @@ class Install(LocalBaseOM): FileUtil.createDirectory(dss_app_path, True, DefaultValue.KEY_DIRECTORY_MODE) self.logger.debug("Create dss app path successfully.") - def generate_install_path(self): """ Generate install path @@ -509,6 +658,10 @@ class Install(LocalBaseOM): # decompress CM package self.decompress_cm_package() + # decompress connector package + self.decompress_mysql_connector() + # decompress Mysql_server package + self.decompress_mysql_server() # change owner for tar file. FileUtil.changeOwner(self.user, self.installPath, True) @@ -764,6 +917,9 @@ class Install(LocalBaseOM): for dn in self.dnCons: dn.start(self.time_out) + for my in self.mysqlCons: + my.start(self.time_out) + def buildStandby(self): """ function: build standby diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py index 34211956..14f18799 100644 --- a/script/local/PreInstallUtility.py +++ b/script/local/PreInstallUtility.py @@ -1730,6 +1730,13 @@ Common options: "^\\s*export\\s*PYTHONPATH=\\$GPHOME\\/lib") self.logger.debug( "Deleting crash PYTHONPATH in user environment variables.") + # 清理 MySQL & SHARE_FS + FileUtil.deleteLine(userProfile, r"^\s*export\s*ENABLE_MYSQL=.*$") + FileUtil.deleteLine(userProfile, r"^\s*export\s*MYSQL_METADATA_IN_CANTIAN=.*$") + FileUtil.deleteLine(userProfile, r"^\s*export\s*SHARE_FS=.*$") + FileUtil.deleteLine(userProfile, r"^\s*export\s*MYSQL_HOME=.*$") + FileUtil.deleteLine(userProfile, r"^\s*export\s*PATH=.*\$MYSQL_HOME[^ ]*bin.*$") + def setToolEnv(self): """ @@ -1771,7 +1778,7 @@ Common options: datadir = node_info.datanodes[0].datadir FileUtil.writeFile(userProfile, ["export PGDATA=%s" % datadir]) - + # set COREPATH FileUtil.writeFile(userProfile, ["export COREPATH=%s" % self.cluster_core_path]) # set PGDATABASE @@ -1803,7 +1810,41 @@ Common options: # set PYTHONPATH FileUtil.writeFile(userProfile, ["export PYTHONPATH=$GPHOME/lib"]) - + share_fs_env = ClusterConfigFile.getOneClusterConfigItem( + "share_fs", self.clusterConfig).strip() + enable_mysql_flag = ClusterConfigFile.getOneClusterConfigItem( + "enable_mysql", self.clusterConfig).strip().lower() or "off" + mysql_meta_flag = ClusterConfigFile.getOneClusterConfigItem( + "mysql_metadata_in_cantian", self.clusterConfig).strip().lower() or "on" + + if share_fs_env: + for prof in (userProfile, self.user_env_file): + FileUtil.deleteLine(prof, r"^\s*export\s*SHARE_FS=.*$") + FileUtil.writeFile(prof, [f"export SHARE_FS={share_fs_env}"]) + self.logger.debug(f"Successfully flush 'export SHARE_FS={share_fs_env}'") + else: + self.logger.debug("share_fs is default or empty; skip 'export SHARE_FS'") + + if enable_mysql_flag == "on": + mysql_home = os.path.join(os.path.dirname(self.clusterToolPath), "mysql") + for prof in (userProfile, self.user_env_file): + FileUtil.deleteLine(prof, r"^\s*export\s*ENABLE_MYSQL=.*$") + FileUtil.deleteLine(prof, r"^\s*export\s*MYSQL_METADATA_IN_CANTIAN=.*$") + FileUtil.deleteLine(prof, r"^\s*export\s*MYSQL_HOME=.*$") + FileUtil.deleteLine(prof, r"^\s*export\s*PATH=.*\$MYSQL_HOME[^ ]*bin.*$") + + FileUtil.writeFile(prof, [ + "export ENABLE_MYSQL=on", + f"export MYSQL_METADATA_IN_CANTIAN={mysql_meta_flag}", + f"export MYSQL_HOME={mysql_home}", + r"export PATH=$MYSQL_HOME/bin:$PATH" + ]) + self.logger.debug( + f"Flush ENABLE_MYSQL=on MYSQL_METADATA_IN_CANTIAN={mysql_meta_flag} " + f"MYSQL_HOME={mysql_home}" + ) + else: + self.logger.debug("[MySQL] enable_mysql=off; skip MySQL env export") except Exception as e: self.logger.logExit(str(e)) self.logger.debug("Successfully set tool ENV.") diff --git a/script/local/StartInstance.py b/script/local/StartInstance.py index ebbae1ca..50474d5b 100644 --- a/script/local/StartInstance.py +++ b/script/local/StartInstance.py @@ -172,6 +172,9 @@ General options: if not isDataDirCorrect: raise Exception(ErrorCode.GAUSS_536["GAUSS_53610"] % self.dataDir) + for my in self.mysqlCons: + my.start(self.time_out) + def main(): """ -- Gitee From b2b4445b980e5e96ebff1c4cde48de42275ea10c Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 5 Jun 2025 10:33:36 +0800 Subject: [PATCH 005/144] cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql cantian2.0-mysql --- script/base_diff/comm_constants.py | 1 + script/base_utils/os/file_util.py | 38 ++- script/base_utils/template/cluster_tmp.xml | 3 + script/base_utils/template/resource_en.json | 15 +- script/base_utils/template/resource_zh.json | 13 +- script/base_utils/template/xml_constant.py | 3 + script/base_utils/template/xml_status.py | 111 ++++++++- script/base_utils/template/xml_template.py | 62 +++++ .../domain_utils/cluster_file/package_info.py | 32 +++ script/gspylib/common/ClusterParams.py | 6 +- script/gspylib/common/Common.py | 4 +- script/gspylib/common/DbClusterInfo.py | 145 ++++++++---- script/gspylib/common/LocalBaseOM.py | 15 ++ .../component/Kernel/DN_OLAP/DN_OLAP.py | 105 ++++++--- script/gspylib/component/Kernel/Kernel.py | 202 ++++++++++------ script/gspylib/component/MySql/mysql_comp.py | 217 ++++++++++++++++++ script/impl/install/InstallImpl.py | 6 +- script/impl/preinstall/PreinstallImpl.py | 2 +- script/local/InitInstance.py | 13 +- script/local/Install.py | 158 ++++++++++++- script/local/PreInstallUtility.py | 45 +++- script/local/StartInstance.py | 3 + 22 files changed, 1017 insertions(+), 182 deletions(-) create mode 100644 script/gspylib/component/MySql/mysql_comp.py diff --git a/script/base_diff/comm_constants.py b/script/base_diff/comm_constants.py index cb249d2f..660300be 100644 --- a/script/base_diff/comm_constants.py +++ b/script/base_diff/comm_constants.py @@ -33,6 +33,7 @@ class CommConstants: PKG_SERVER = "Server" PKG_OM = "OM" PKG_CM = "CM" + PKG_CONNECTOR = "Connector" PKG_SHA256 = "sha256" # upgrade sql sha file and sql file diff --git a/script/base_utils/os/file_util.py b/script/base_utils/os/file_util.py index fb404212..bbc80b68 100644 --- a/script/base_utils/os/file_util.py +++ b/script/base_utils/os/file_util.py @@ -449,19 +449,20 @@ class FileUtil(object): try: # do with shell command. if cmd_type == "shell": - if "*" in path: - path = FileUtil.withAsteriskPath(path) - else: - path = "'" + path + "'" - cmd = CmdUtil.getChownCmd(user, group, path, recursive) + quoted = FileUtil.withAsteriskPath(path) if "*" in path else f"'{path}'" + cmd = CmdUtil.getChownCmd(user, group, quoted, recursive) if link: - cmd = cmd + " -h" + parts = cmd.split() + if "-h" not in parts: + parts.insert(2, "-h") + cmd = " ".join(parts) + if retry_flag: CmdUtil.retryGetstatusoutput(cmd, retry_time, waite_time) else: - (status, output) = subprocess.getstatusoutput(cmd) + status, output = subprocess.getstatusoutput(cmd) if status != 0: - raise Exception(output + " The cmd is %s" % cmd) + raise Exception(output + f" The cmd is {cmd}") # do with python API. If the name has special characters. else: os.chown(path, userInfo.pw_uid, userInfo.pw_gid) @@ -559,6 +560,27 @@ class FileUtil(object): fp_file.truncate() fp_file.flush() + @staticmethod + def umount_if_mounted(path: str, logger=None) -> None: + """ + Lazy-umount if it is a mount point. + Raise RuntimeError on failure; noop if path not mounted. + """ + if not path: + return + # mountpoint -q path → 0 if mounted + if subprocess.call(["mountpoint", "-q", path]) == 0: + if logger: + logger.debug(f"[file_util] umount -l {path}") + ret = subprocess.run( + ["sudo", "-n", "umount", "-l", path], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True + ) + if ret.returncode != 0: + raise RuntimeError(f"umount {path} failed: {ret.stdout.strip()}") + @staticmethod def removeDirectory(path): """ diff --git a/script/base_utils/template/cluster_tmp.xml b/script/base_utils/template/cluster_tmp.xml index 5a78220e..d8b6269b 100644 --- a/script/base_utils/template/cluster_tmp.xml +++ b/script/base_utils/template/cluster_tmp.xml @@ -16,6 +16,9 @@ + + + diff --git a/script/base_utils/template/resource_en.json b/script/base_utils/template/resource_en.json index 774a585c..4f53b27b 100644 --- a/script/base_utils/template/resource_en.json +++ b/script/base_utils/template/resource_en.json @@ -23,6 +23,16 @@ "choose_cm": "Please choose whether to deploy CM?", "input_cm": "Please enter 1/2 for selection, the default option is 1) Deploy CM", "cm_port": "Please enter the cmserver port(default:15400):", + + "choose_mysql": "Please choose whether to deploy MySql?", + "input_mysql": "Please enter 1/2 for selection, the default option is 1) Deploy MySql", + "deploy_mysql": "Deploy MySql", + "not_deploy_mysql": "Do not deploy MySql", + + "choose_mysql_meta" : "Should MySql metadata be unified into Cantian?", + "input_mysql_meta" : "Please enter 1/2, default 1) Yes", + "deploy_mysql_meta" : "Yes", + "not_deploy_mysql_meta" : "No", "choose_pri_standby": "Please choose whether to deploy in single-node or multi-node mode?", "input_pri_standby": "Please enter 1/2 for selection, the default option is 1) multi-node deployment", @@ -32,8 +42,9 @@ "choose_dbstor" : "Enable DBStor shared-storage mode?", "input_dbstor" : "Input 1 / 2 (default 1) 1) Disable 2) Enable", "intput_dbstor_fs" : "Enter the three DBStor file-systems (default: log_fs:log_fs,page_fs:page_fs,share_fs:share_fs)", - "intput_share_fs" : "Enter the shared-filesystem mount info, e.g. 127.0.0.1:share_fs", - + + "intput_share_fs": "Enter the shared-filesystem mount info, e.g. 127.0.0.1:share_fs", + "deploy_pri_standby": "multi-node deployment", "deploy_single": "single-node deployment", diff --git a/script/base_utils/template/resource_zh.json b/script/base_utils/template/resource_zh.json index 82c12835..b0df5025 100644 --- a/script/base_utils/template/resource_zh.json +++ b/script/base_utils/template/resource_zh.json @@ -24,6 +24,16 @@ "input_cm": "请输入 1/2 进行选择,默认选项是 1)部署CM", "cm_port": "请输入cmserver端口(默认:15400):", + "choose_mysql": "请选择是否部署MySql?", + "input_mysql": "请输入 1/2 进行选择,默认选项是 1)部署MySql", + "deploy_mysql": "部署MySql", + "not_deploy_mysql": "不部署MySql", + + "choose_mysql_meta" : "MySql 元数据是否归一(写入 Cantian)?", + "input_mysql_meta" : "请输入 1/2 进行选择, 默认 1) 归一", + "deploy_mysql_meta" : "归一", + "not_deploy_mysql_meta" : "非归一", + "choose_pri_standby": "请选择是否多节点部署?", "input_pri_standby": "请输入 1/2 进行选择,默认选项是 1)多节点部署", "input_ip_hostname": "请输入主机节点IP和节点名称(如:192.168.0.1 hostname1;192.168.0.2 hostname2)", @@ -32,7 +42,8 @@ "choose_dbstor": "请选择是否启用 DBStor 共享存储?", "input_dbstor": "请输入 1/2 进行选择,默认选项是 1)不部署", "intput_dbstor_fs": "请输入'dbstor的3个文件系统'的路径信息(默认是:log_fs:log_fs,page_fs:page_fs,share_fs:share_fs)", - "intput_share_fs": "请输入共享文件系统挂载信息(形如 127.0.0.1:share_fs)", + + "intput_share_fs": "请输入共享文件系统挂载信息(如 127.0.0.1:share_fs)", "deploy_pri_standby": "多节点部署", "deploy_single": "单节点部署", diff --git a/script/base_utils/template/xml_constant.py b/script/base_utils/template/xml_constant.py index 756f2fbe..bd670cd8 100644 --- a/script/base_utils/template/xml_constant.py +++ b/script/base_utils/template/xml_constant.py @@ -7,6 +7,7 @@ class XmlConstant: IS_CHINESE = False IS_CM = False IS_DDES = False + IS_MYSQL = False IS_PRI_STANDBY = False PRI_STANDBY_COUNT = 3 PRI_STANDBY_IP = {} @@ -20,6 +21,7 @@ class XmlConstant: DSS_PARA_INFO = ['enable_dss', 'dss_home', 'dss_vg_info', 'votingDiskPath', 'shareDiskDir', 'ss_dss_vg_name', 'dss_ssl_enable'] + MYSQL_PARA = ['enable_mysql', 'mysql_metadata_in_cantian'] UPDATE_DSS_PARA_INFO = ['dss_home', 'dss_vg_info', 'votingDiskPath', 'shareDiskDir', 'ss_dss_vg_name'] CM_PARA_INFO = ['cmDir', 'cmsNum', 'cmServerPortBase', 'cmServerPortStandby', 'cmServerListenIp1', 'cmServerHaIp1', 'cmServerlevel', 'cmServerRelation'] @@ -29,6 +31,7 @@ class XmlConstant: DATABASE_PORT = "" CM_SERVER_PORT = "" + SHARE_FS = "127.0.0.1:share_fs" SSH_PORTS = [] DEFAULT_DATABASE_PORT = "15000" diff --git a/script/base_utils/template/xml_status.py b/script/base_utils/template/xml_status.py index 9c18b596..591d6b17 100644 --- a/script/base_utils/template/xml_status.py +++ b/script/base_utils/template/xml_status.py @@ -173,6 +173,43 @@ def check_input_xml_info(xml_dir): return False return True +def check_share_fs(val: str) -> bool: + """ + Validate the shared-filesystem string. + + Rules + ----- + 1. Format must be : with no blanks + 2. must be syntactically valid **and** reachable (ping) + 3. must contain no illegal characters + """ + if " " in val or ":" not in val: + GaussLog.printMessage("Invalid format: use : with no spaces.") + return False + + ip, directory = val.split(":", 1) + + if not NetUtil.isIpValid(ip): + GaussLog.printMessage("Invalid IP address.") + return False + + if not check_illegal_character(directory): + return False + + try: + ret = subprocess.run( + ["ping", "-c", "1", "-W", "2", ip], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + if ret.returncode != 0: + GaussLog.printMessage(f"Cannot ping {ip}. Please check network connectivity.") + return False + except FileNotFoundError: + GaussLog.printMessage("Ping command not found; skipping reachability check.") + + return True + class XmlStatus(TemplateStatus): @@ -229,11 +266,29 @@ class DataPortStatus(TemplateStatus): return DatabaseInstallStatus() if not user_input: XmlConstant.DATABASE_PORT = XmlConstant.DEFAULT_DATABASE_PORT - return PriStandbyStatus() + return MysqlStatus() if not check_port(user_input): continue XmlConstant.DATABASE_PORT = user_input - return PriStandbyStatus() + return MysqlStatus() + + +class ShareFsStatus(TemplateStatus): + + def work(self): + for _ in range(XmlConstant.TRIES): + user_input = input(XmlConstant.RESOURCE_DATA.get('intput_share_fs')).strip() + if user_input.lower() in ("back", "b"): + return DdesStatus() + + if not user_input: + return CmStatus() + + if not check_share_fs(user_input): + continue + + XmlConstant.SHARE_FS = user_input + return CmStatus() class PriStandbyStatus(TemplateStatus): @@ -283,13 +338,13 @@ class DdesStatus(TemplateStatus): return PriStandbyStatus() if not user_input: XmlConstant.IS_DDES = False - return CmStatus() + return ShareFsStatus() if not user_input.isdigit(): GaussLog.printMessage(XmlConstant.RESOURCE_DATA.get('invalid_num')) continue - if user_input == "1": + if not user_input or user_input == "1": XmlConstant.IS_DDES = False - return CmStatus() + return ShareFsStatus() elif user_input == "2": XmlConstant.IS_DDES = True XmlConstant.IS_CM = True @@ -469,6 +524,52 @@ class PriStandbyCountStatus(TemplateStatus): continue +class MysqlStatus(TemplateStatus): + + def work(self): + XmlConstant.IS_MYSQL = True + GaussLog.printMessage(XmlConstant.RESOURCE_DATA.get('choose_mysql')) + XmlConstant.select_option( + XmlConstant.RESOURCE_DATA.get('deploy_mysql'), + XmlConstant.RESOURCE_DATA.get('not_deploy_mysql') + ) + + for _ in range(XmlConstant.TRIES): + opt = input(XmlConstant.RESOURCE_DATA.get('input_mysql')).strip() + + if opt.lower() in ("back", "b"): + return DataPortStatus() + if not opt or opt == "1": + XmlConstant.IS_MYSQL = True + return MysqlMetaStatus() + + if opt == "2": + XmlConstant.IS_MYSQL = False + return PriStandbyStatus() + + GaussLog.printMessage(XmlConstant.RESOURCE_DATA.get('invalid_num')) + + +class MysqlMetaStatus(TemplateStatus): + def work(self): + # 默认归一 + XmlConstant.MYSQL_META_ON = True + GaussLog.printMessage(XmlConstant.RESOURCE_DATA.get('choose_mysql_meta')) + XmlConstant.select_option(XmlConstant.RESOURCE_DATA.get('deploy_mysql_meta'), + XmlConstant.RESOURCE_DATA.get('not_deploy_mysql_meta')) + for _ in range(XmlConstant.TRIES): + opt = input(XmlConstant.RESOURCE_DATA.get('input_mysql_meta')).strip() + if opt.lower() in ('back', 'b'): + return MysqlStatus() + if not opt or opt == "1": + XmlConstant.MYSQL_META_ON = True + return PriStandbyStatus() + if opt == "2": + XmlConstant.MYSQL_META_ON = False + return PriStandbyStatus() + GaussLog.printMessage(XmlConstant.RESOURCE_DATA.get('invalid_num')) + + def get_localhost_name(): return socket.gethostname() diff --git a/script/base_utils/template/xml_template.py b/script/base_utils/template/xml_template.py index 2c2f23aa..8d25c54f 100644 --- a/script/base_utils/template/xml_template.py +++ b/script/base_utils/template/xml_template.py @@ -93,6 +93,7 @@ def with_chinese(): XmlConstant.select_option(XmlConstant.RESOURCE_DATA.get('chinese'), XmlConstant.RESOURCE_DATA.get('english')) check_common(check_input_chinese) + def confirm_xml(): for i in range(XmlConstant.TRIES): if i == 3: @@ -142,6 +143,33 @@ class GenerateTemplate: if child.attrib['name'] in XmlConstant.DSS_PARA_INFO: self.root[0].remove(child) + def delete_xml_mysql(self): + """若未启用 MySQL,则移除 参数.""" + if XmlConstant.IS_MYSQL: + return + for child in self.root[0].findall('PARAM'): + if child.attrib['name'] in XmlConstant.MYSQL_PARA: + self.root[0].remove(child) + + + def delete_xml_share_fs(self): + """ + 删除占位默认值的 + """ + default_val = "127.0.0.1:share_fs" + + share_fs_param = next( + (p for p in self.root[0].findall("PARAM") + if p.attrib.get("name") == "share_fs"), + None + ) + if share_fs_param is None: + return + + if share_fs_param.attrib.get("value", "").strip() == default_val: + self.root[0].remove(share_fs_param) + + def delete_xml_cm(self): if XmlConstant.IS_CM: return @@ -249,6 +277,10 @@ class GenerateTemplate: self.update_device_label_info() # 3.update ddes info self.update_ddes_info() + # 3.1 update share_fs info + self.update_share_fs_info() + # 3.5 update mysql info + self.update_mysql_info() # 4.update cm info self.update_cm_info() @@ -269,6 +301,32 @@ class GenerateTemplate: if status == 0: GaussLog.printMessage(output) + def update_mysql_info(self): + """ + 设置为 on / off(默认 cluster_tmp.xml 中为 on)。 + """ + for child in self.root[0].findall('PARAM'): + if child.attrib['name'] == 'enable_mysql': + child.attrib['value'] = 'on' if XmlConstant.IS_MYSQL else 'off' + if child.attrib['name'] == 'mysql_metadata_in_cantian': + if XmlConstant.IS_MYSQL: + child.attrib['value'] = 'on' if XmlConstant.MYSQL_META_ON else 'off' + + def update_share_fs_info(self): + if XmlConstant.IS_DDES: + for ele in list(self.root[0].findall("PARAM")): + if ele.attrib.get("name") == "share_fs": + self.root[0].remove(ele) + return + + for ele in self.root[0].findall("PARAM"): + if ele.attrib.get("name") == "share_fs": + ele.attrib["value"] = XmlConstant.SHARE_FS + break + else: + ET.SubElement(self.root[0], "PARAM", + {"name": "share_fs", "value": XmlConstant.SHARE_FS}) + def run(self): # get locale get_locale() @@ -286,10 +344,14 @@ class GenerateTemplate: self.delete_xml_node() # delete xml ddes info self.delete_xml_ddes() + # delete xml mysql info + self.delete_xml_mysql() # delete xml cm info self.delete_xml_cm() # update xml all info self.update_xml_all_info() + # delete xml share_fs info + self.delete_xml_share_fs() # generate a new xml file self.generate_new_xml_file() # display xml info diff --git a/script/domain_utils/cluster_file/package_info.py b/script/domain_utils/cluster_file/package_info.py index 90ef7ba1..1b952ab2 100644 --- a/script/domain_utils/cluster_file/package_info.py +++ b/script/domain_utils/cluster_file/package_info.py @@ -65,6 +65,15 @@ class PackageInfo(object): """ return PackageInfo.getPackageFile(CommConstants.PKG_SERVER) + @staticmethod + def get_connector_package_file_path(): + """ + function : Get the path of bin file version. + input : NA + output : str + """ + return PackageInfo.getPackageFile(CommConstants.PKG_CONNECTOR) + @staticmethod def getFileSHA256Info(): """ @@ -158,6 +167,25 @@ class PackageInfo(object): integrity_file_name = PackageInfo.getSHA256FilePath() cm_package = server_file_name.replace("Server", "CM").replace("tar.bz2", 'tar.gz') om_package = server_file_name.replace("Server", "OM").replace("tar.bz2", 'tar.gz') + connector_package = server_file_name.replace("Server", "Connector").replace("tar.bz2", "tar.gz") + + if "x86_64" in server_file_name: + cpu_arch = "x86_64" + elif "aarch64" in server_file_name: + cpu_arch = "aarch64" + else: + raise Exception(f"Unsupported CPU arch in server package: {server_file_name}") + + # 查找 Mysql_server*.tgz, 后续需要优化,enable_mysql再拷贝分发 + mysql_server_pkg = "" + for fname in os.listdir(package_path): + if fname.startswith("Mysql_server") and fname.endswith(".tgz"): + if cpu_arch in fname: + mysql_server_pkg = fname + break + else: + raise Exception(f"Mysql_server package CPU arch mismatch: {fname} " + f"(expected {cpu_arch})") tar_lists = SingleInstDiff.get_package_tar_lists(is_single_inst, os.path.normpath(package_path)) @@ -177,6 +205,10 @@ class PackageInfo(object): # add CM package to bak package if os.path.isfile(os.path.realpath(os.path.join(package_path, cm_package))): cmd += "%s " % os.path.basename(cm_package) + if os.path.isfile(os.path.realpath(os.path.join(package_path, connector_package))): + cmd += "%s " % os.path.basename(connector_package) + if mysql_server_pkg and os.path.isfile(os.path.join(package_path, mysql_server_pkg)): + cmd += f"{mysql_server_pkg} " cmd += "&& %s " % CmdUtil.getChmodCmd( str(ConstantsBase.KEY_FILE_MODE), PackageInfo.get_package_back_name()) diff --git a/script/gspylib/common/ClusterParams.py b/script/gspylib/common/ClusterParams.py index e78f2680..1f31dc9a 100644 --- a/script/gspylib/common/ClusterParams.py +++ b/script/gspylib/common/ClusterParams.py @@ -58,6 +58,8 @@ class ClusterParams: PASSWORD = 'password' CLUSTER_TYPE = 'clusterType' SHARE_FS = 'share_fs' + ENABLE_MYSQL = 'enable_mysql' + MYSQL_METADATA_IN_CANTIAN = 'mysql_metadata_in_cantian' @staticmethod def get_all_param_names(): @@ -91,7 +93,9 @@ class ClusterParams: ClusterParams.UWAL_DEVICES_PATH, ClusterParams.PASSWORD, ClusterParams.CLUSTER_TYPE, - ClusterParams.SHARE_FS + ClusterParams.SHARE_FS, + ClusterParams.ENABLE_MYSQL, + ClusterParams.MYSQL_METADATA_IN_CANTIAN ] FLOAT_IP_PATTERN = re.compile(r'\bfloatIp[0-9]+') diff --git a/script/gspylib/common/Common.py b/script/gspylib/common/Common.py index f94e0b6d..c28de63a 100644 --- a/script/gspylib/common/Common.py +++ b/script/gspylib/common/Common.py @@ -177,6 +177,7 @@ class DefaultValue(): ########################### # start timeout value TIMEOUT_CLUSTER_START = 300 + TIMEOUT_INSTANCE_START = 120 # stop timeout value TIMEOUT_CLUSTER_STOP = 300 # query timeout value @@ -1184,8 +1185,7 @@ class DefaultValue(): envList.extend([ "export CM_CONFIG_PATH=$PGDATA/cm_config.ini", "export CTDB_HOME=$PGDATA", - "export GSDB_HOME=$PGDATA", - "export share_fs=127.0.0.1:share_fs" + "export GSDB_HOME=$PGDATA" ]) if "DSS_HOME" in os.environ and os.environ["DSS_HOME"].strip(): diff --git a/script/gspylib/common/DbClusterInfo.py b/script/gspylib/common/DbClusterInfo.py index 54ca2e2f..9d310a5f 100644 --- a/script/gspylib/common/DbClusterInfo.py +++ b/script/gspylib/common/DbClusterInfo.py @@ -38,6 +38,9 @@ from domain_utils.cluster_file.cluster_config_file import ClusterConfigFile from base_utils.os.file_util import FileUtil from domain_utils.cluster_file.version_info import VersionInfo from domain_utils.domain_common.cluster_constants import ClusterConstants +from domain_utils.cluster_file.profile_file import ProfileFile +from domain_utils.cluster_file.cluster_dir import ClusterDir +from domain_utils.cluster_os.cluster_user import UserUtil from base_utils.common.constantsbase import ConstantsBase from base_utils.os.env_util import EnvUtil from base_utils.os.net_util import NetUtil @@ -1016,6 +1019,9 @@ class dbClusterInfo(): self.uwal_rpc_flowcontrol_switch = "" self.uwal_rpc_flowcontrol_value = "" self.uwal_async_append_switch = "" + # add for mysql + self.enable_mysql = "" + self.mysql_metadata_in_cantian = "" def __str__(self): """ @@ -3077,82 +3083,131 @@ class dbClusterInfo(): global g_networkType self.clusterType = CLUSTER_TYPE_SINGLE_INST - # Read cluster name - self.name = self.__read_and_check_config_item(xmlRootNode, "clusterName", "cluster") - # Read application install path - self.appPath = self.__read_and_check_config_item(xmlRootNode, "gaussdbAppPath", "cluster") - # Read application log path - self.logPath = self.__read_and_check_config_item(xmlRootNode, "gaussdbLogPath", - "cluster", error_ignore=True) - if not self.logPath: - self.logPath = "/var/log/gaussdb" + # 基本路径 + self.name = self.__read_and_check_config_item(xmlRootNode, + "clusterName", "cluster") + self.appPath = self.__read_and_check_config_item(xmlRootNode, + "gaussdbAppPath", + "cluster") + + self.logPath = self.__read_and_check_config_item(xmlRootNode, + "gaussdbLogPath", + "cluster", + error_ignore=True) or "/var/log/gaussdb" if not os.path.isabs(self.logPath): - raise Exception(ErrorCode.GAUSS_502["GAUSS_50213"] % \ - ("%s log path(%s)" % ( - VersionInfo.PRODUCT_NAME, self.logPath))) + raise Exception(ErrorCode.GAUSS_502["GAUSS_50213"] % + ("%s log path(%s)" % + (VersionInfo.PRODUCT_NAME, self.logPath))) + + self._read_enable_dss(xmlRootNode) + self._read_enable_dcf(xmlRootNode) + g_networkType = self._read_network_type(xmlRootNode) + self._read_enable_uwal(xmlRootNode) + + if "HOST_IP" in os.environ: + self.corePath = self.__read_and_check_config_item(xmlRootNode, + "corePath", + "cluster", + True) + + self._read_enable_mysql_settings(xmlRootNode) + + def _read_enable_dss(self, xmlRootNode): + _, self.enable_dss = ClusterConfigFile.readOneClusterConfigItem( + xmlRootNode, "enable_dss", "cluster") - _, self.enable_dss = ClusterConfigFile.readOneClusterConfigItem(xmlRootNode, - "enable_dss", - "cluster") if self.enable_dss.strip() == "on": self.enable_dss = self.enable_dss.strip() self.init_dss_config(xml_entiy=xmlRootNode) elif self.enable_dss.strip() not in ['off', '']: raise Exception(ErrorCode.GAUSS_500["GAUSS_50011"] % - ('enable_dss', self.enable_dss)) + ('enable_dss', self.enable_dss)) + + def _read_enable_dcf(self, xmlRootNode): + _, self.enable_dcf = ClusterConfigFile.readOneClusterConfigItem( + xmlRootNode, "enable_dcf", "cluster") - # Read enable_dcf - ret_status, self.enable_dcf = ClusterConfigFile.readOneClusterConfigItem(xmlRootNode, - "enable_dcf", - "cluster") if self.enable_dcf not in ['', 'on', 'off']: raise Exception(ErrorCode.GAUSS_500["GAUSS_50011"] % - ('enable_dcf', self.enable_dcf)) + ('enable_dcf', self.enable_dcf)) if self.enable_dcf == 'on' and self.enable_dss == 'on': raise Exception('Only one DSS or DCF can be enabled.') if self.enable_dcf == 'on': - (ret_status, ret_value) = ClusterConfigFile.readOneClusterConfigItem( + status, cfg = ClusterConfigFile.readOneClusterConfigItem( xmlRootNode, "dcf_config", "CLUSTER") - if ret_status == 0: - self.dcf_config = ret_value.strip() - if self.dcf_config.count('role') - self.dcf_config.count('PASSIVE') < 3: + if status == 0: + self.dcf_config = cfg.strip() + if self.dcf_config.count('role') - \ + self.dcf_config.count('PASSIVE') < 3: raise Exception(ErrorCode.GAUSS_500["GAUSS_50011"] % ('dcf_config', self.dcf_config)) else: raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % - 'dcf_config' + " Error: \n%s" % ret_value) + 'dcf_config' + " Error: \n%s" % cfg) - # Read network type - (retStatus, retValue) = ClusterConfigFile.readOneClusterConfigItem( + def _read_network_type(self, xmlRootNode) -> int: + status, val = ClusterConfigFile.readOneClusterConfigItem( xmlRootNode, "networkType", "cluster") - if retStatus == 0: - if retValue.isdigit() and int(retValue) in [0, 1]: - g_networkType = int(retValue) - else: - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ - "cluster network type" + " Error: \nThe parameter value must be 0 or 1.") - elif retStatus == 2: - g_networkType = 0 - else: - raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % \ - "cluster network type" + " Error: \n%s" % retValue) - - # Read enable_uwal + if status == 0: + if val.isdigit() and int(val) in (0, 1): + return int(val) + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % + "cluster network type" + + " Error: \nThe parameter value must be 0 or 1.") + if status == 2: + return 0 + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % + "cluster network type" + " Error: \n%s" % val) + + def _read_enable_uwal(self, xmlRootNode): _, self.enable_uwal = ClusterConfigFile.readOneClusterConfigItem( xmlRootNode, "enable_uwal", "cluster") if self.enable_uwal not in ['', 'on', 'off']: raise Exception(ErrorCode.GAUSS_500["GAUSS_50011"] % - ('enable_uwal', self.enable_uwal)) + ('enable_uwal', self.enable_uwal)) if self.enable_uwal == 'on': self.init_uwal_config(xml_entiy=xmlRootNode) - if "HOST_IP" in os.environ.keys(): - self.corePath = self.__read_and_check_config_item(xmlRootNode, "corePath", - "cluster", True) + def _read_enable_mysql_settings(self, xmlRootNode): + # enable_mysql + status, val = ClusterConfigFile.readOneClusterConfigItem( + xmlRootNode, "enable_mysql", "cluster") + if status == 0: + val = val.strip().lower() + if val in ('on', 'off', ''): + self.enable_mysql = val + else: + raise Exception(ErrorCode.GAUSS_500["GAUSS_50011"] % + ("enable_mysql", val)) + elif status != 2: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % + "enable_mysql" + " Error: \n%s" % val) + + # mysql_metadata_in_cantian + if self.enable_mysql == 'on': + status, meta = ClusterConfigFile.readOneClusterConfigItem( + xmlRootNode, "mysql_metadata_in_cantian", "cluster") + if status == 0: + meta = meta.strip().lower() + if meta in ('on', 'off', ''): + self.mysql_metadata_in_cantian = meta or 'on' + else: + raise Exception(ErrorCode.GAUSS_500["GAUSS_50011"] % + ("mysql_metadata_in_cantian", meta)) + elif status == 2: + # 未配置时默认启用 + self.mysql_metadata_in_cantian = 'on' + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % + "mysql_metadata_in_cantian" + + " Error: \n%s" % meta) + else: + # 未开启 MySQL 时该参数无意义 + self.mysql_metadata_in_cantian = "" def get_cluster_back_ip1s(self): # Read cluster backIp1s diff --git a/script/gspylib/common/LocalBaseOM.py b/script/gspylib/common/LocalBaseOM.py index 71f1a546..1ec3c907 100644 --- a/script/gspylib/common/LocalBaseOM.py +++ b/script/gspylib/common/LocalBaseOM.py @@ -26,6 +26,7 @@ from gspylib.common.ErrorCode import ErrorCode from gspylib.component.CM.CM_OLAP.CM_OLAP import CM_OLAP from gspylib.component.DSS.dss_comp import Dss from gspylib.component.Kernel.DN_OLAP.DN_OLAP import DN_OLAP +from gspylib.component.MySql.mysql_comp import Mysql from domain_utils.cluster_file.version_info import VersionInfo from base_utils.os.net_util import NetUtil from base_utils.os.user_util import UserUtil @@ -77,6 +78,7 @@ class LocalBaseOM(object): self.dnCons = [] self.gtsCons = [] self.dss_cons = [] + self.mysqlCons = [] self.paxos_mode = paxos_mode self.dss_mode = dss_mode self.dss_config = dss_config @@ -91,6 +93,7 @@ class LocalBaseOM(object): self.initCmComponent() self.initKernelComponent(paxos_mode) self.init_dss_component(self.dss_mode) + self.initMysqlComponent() def init_dss_component(self, dss_mode=False): @@ -158,6 +161,18 @@ class LocalBaseOM(object): component.dorado_cluster_mode = self.dorado_cluster_mode self.dnCons.append(component) + def initMysqlComponent(self): + enable_flag = os.getenv("ENABLE_MYSQL") + if str(enable_flag).strip().lower() not in ("on", "true", "yes", "1"): + self.logger.debug("[MySQL] ENABLE_MYSQL is not set to on, skip mysql initialization") + return + + comp = Mysql() + comp.logger = self.logger + comp.user = self.user + comp.mysqlRoot = os.path.join(os.path.dirname(self.clusterInfo.appPath), "mysql") + self.mysqlCons.append(comp) + def readConfigInfo(self): """ function: Read config from static config file diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index 4f6328ac..249675d0 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -18,6 +18,7 @@ import sys import os import re +import time sys.path.append(sys.path[0] + "/../../../../") from gspylib.common.ErrorCode import ErrorCode @@ -31,6 +32,7 @@ from base_utils.os.compress_util import CompressUtil from base_utils.os.env_util import EnvUtil from base_utils.os.file_util import FileUtil from domain_utils.cluster_file.cluster_config_file import ClusterConfigFile +from domain_utils.cluster_file.profile_file import ProfileFile from domain_utils.cluster_os.cluster_user import ClusterUser from base_utils.os.grep_util import GrepUtil from base_utils.os.user_util import UserUtil @@ -168,30 +170,52 @@ class DN_OLAP(Kernel): def create_database(self, gauss_home: str): """ - 1. 复制 GAUSSHOME/admin → CTDB_HOME/admin + 创建 Cantian 数据库 + + 步骤 + ---- + 0. 使用 `ctsql -c 'exit'` 先验证连接 + 1. 复制 GAUSSHOME/admin → CTDB_HOME/admin 2. 确保 CTDB_HOME/data 目录存在且为空:如有内容先清空 - 3. 渲染 create_database.sql.template → CTDB_HOME/data/create_database.sql + 3. 渲染 create_database.sql.template → CTDB_HOME/create_database.sql 4. 执行建库脚本 """ ctdb_home = os.getenv("CTDB_HOME") if not ctdb_home: raise Exception("CTDB_HOME is not defined.") - # 准备创库sql + test_cmd = ( + f"ctsql '/' as sysdba -q " + f"-D {self.instInfo.datadir} " + f"-c 'exit'" + ) + self.logger.debug(f"[CreateDB] test connection: {test_cmd}") + rc, out = CmdUtil.retryGetstatusoutput(test_cmd) + if rc != 0: + raise Exception("Cannot connect to Cantian, abort create_database():\n" + out) + src_admin = os.path.join(gauss_home, "admin") dst_admin = os.path.join(ctdb_home, "admin") FileUtil.cpFile(src_admin, dst_admin, cmd_type="shell") FileUtil.changeMode(DefaultValue.KEY_DIRECTORY_MODE, dst_admin, recursive=True) + data_dir = os.path.join(ctdb_home, "data") + if os.path.isdir(data_dir): + FileUtil.cleanDirectoryContent(data_dir) + else: + FileUtil.createDirectory(data_dir, DefaultValue.KEY_DIRECTORY_MODE) + tpl_sql = os.path.join(gauss_home, "share", "cantian", - "create_database.sql.template") + "create_database.sql.template") work_sql = os.path.join(ctdb_home, "create_database.sql") self.copy_and_render_file(tpl_sql, work_sql) - cmd = (f"ctsql '/' as sysdba -q " - f"-D {self.instInfo.datadir} -f {work_sql}") - self.logger.debug(cmd) - st, out = CmdUtil.retryGetstatusoutput(cmd) + run_sql = ( + f"ctsql '/' as sysdba -q " + f"-D {self.instInfo.datadir} -f {work_sql}" + ) + self.logger.debug(f"[CreateDB] create cmd: {run_sql}") + st, out = CmdUtil.retryGetstatusoutput(run_sql, retry_time=0) if st != 0: raise Exception("create database failed:\n" + out) @@ -199,7 +223,7 @@ class DN_OLAP(Kernel): def prepare_data_dir(self) -> None: """ - 1. 从环境变量 share_fs / SHARE_FS 获取 NFS,如 10.1.1.1:/share_dir + 1. 从环境变量 SHARE_FS 获取 NFS,格式形如 10.1.1.1:share_dir 2. 主节点:sudo 挂 root → 创建/清空 cantianData → 卸 root 3. 所有节点:sudo 挂 cantianData → chown 给业务用户 """ @@ -210,21 +234,20 @@ class DN_OLAP(Kernel): FileUtil.createDirectory(data_dir, DefaultValue.KEY_DIRECTORY_MODE) self.logger.debug(f"[prepare_data_dir] data_dir = {data_dir}") - # ---------- 1. 读取 share_fs ---------- - share_fs_val = os.getenv("share_fs") or os.getenv("SHARE_FS") or "" - self.logger.debug(f"[prepare_data_dir] env share_fs = {share_fs_val or ''}") + share_fs_val = os.getenv("SHARE_FS", "") + self.logger.debug(f"[prepare_data_dir] env SHARE_FS = {share_fs_val or ''}") if not share_fs_val: - self.logger.debug("[prepare_data_dir] share_fs not set; skip.") + self.logger.debug("[prepare_data_dir] SHARE_FS not set; skip.") return if ":" not in share_fs_val: - raise ValueError(f"share_fs invalid: {share_fs_val!r} (expect :)") + raise ValueError(f"SHARE_FS invalid: {share_fs_val!r} (expect :)") host_ip, fs_dir = share_fs_val.split(":", 1) if host_ip in ("127.0.0.1", "localhost"): - self.logger.debug("[prepare_data_dir] share_fs points to localhost; skip.") + self.logger.debug("[prepare_data_dir] SHARE_FS points to localhost; skip.") return - nfs_root = f"{host_ip}:/{fs_dir}" + nfs_root = f"{host_ip}:/{fs_dir}" nfs_subdir = f"{host_ip}:/{fs_dir}/cantianData" self.logger.debug(f"[prepare_data_dir] nfs_root = {nfs_root}") self.logger.debug(f"[prepare_data_dir] nfs_subdir = {nfs_subdir}") @@ -233,7 +256,7 @@ class DN_OLAP(Kernel): mount_opt = "-o rw,soft,timeo=600,retrans=2" if self.is_primary_node(): - # 3. 挂 root + # 2. 挂 root cmd = f"sudo -n mount -t nfs {mount_opt} {nfs_root} {data_dir}" self.logger.debug(f"[prepare_data_dir] CMD = {cmd}") st, out = CmdUtil.getstatusoutput_by_fast_popen(cmd) @@ -242,7 +265,7 @@ class DN_OLAP(Kernel): CmdUtil.getstatusoutput_by_fast_popen(f"sudo -n chown {uid}:{gid} {data_dir}") - # 4. 创建 / 清空 cantianData + # 3. 创建 / 清空 cantianData cantian_dir = os.path.join(data_dir, "cantianData") if os.path.exists(cantian_dir): FileUtil.cleanDirectoryContent(cantian_dir) @@ -254,10 +277,10 @@ class DN_OLAP(Kernel): CmdUtil.getstatusoutput_by_fast_popen(f"sudo -n umount -l {data_dir}") raise RuntimeError(f"create cantianData failed:\n{out}") - # 5. 卸 root + # 4. 卸 root CmdUtil.getstatusoutput_by_fast_popen(f"sudo -n umount -l {data_dir}") - # ---------- 6. 所有节点挂 cantianData ---------- + # ---------- 所有节点挂 cantianData ---------- cmd = f"sudo -n mount -t nfs {mount_opt} {nfs_subdir} {data_dir}" self.logger.debug(f"[prepare_data_dir] CMD = {cmd}") st, out = CmdUtil.getstatusoutput_by_fast_popen(cmd) @@ -275,6 +298,13 @@ class DN_OLAP(Kernel): gauss_home = EnvUtil.getEnv("GAUSSHOME") pgdata = self.instInfo.datadir + # -------- Admin 目录 -------------------------------------- + src_admin = os.path.join(gauss_home, "admin") + dst_admin = os.path.join(pgdata, "admin") + FileUtil.cpFile(src_admin, dst_admin, cmd_type="shell") + FileUtil.changeMode(DefaultValue.KEY_DIRECTORY_MODE, + dst_admin, recursive=True) + # -------- Cantian 配置 ----------------------------------------- cantian_ini_tpl = os.path.join(gauss_home, "share", "cantian", "cantiand.ini.sample") @@ -347,19 +377,14 @@ class DN_OLAP(Kernel): self.create_database(gauss_home) pgrep_cmd = r"pgrep -f 'cantiand.*nomount'" - status, _ = CmdUtil.retryGetstatusoutput(pgrep_cmd) - - if status != 0: + if CmdUtil.retryGetstatusoutput(pgrep_cmd)[0] != 0: self.logger.debug("cantiand is not running, skip shutdown.") - return - - shutdown_cmd = f"ctsql '/' as sysdba -q -D {pgdata} -c 'shutdown'" - st, out = CmdUtil.retryGetstatusoutput(shutdown_cmd) - - if st != 0: - raise Exception(f"Cantiand shutdown failed:\n{out}") - - self.logger.debug("Cantiand shutdown successfully") + else: + shutdown_cmd = f"ctsql '/' as sysdba -q -D {pgdata} -c 'shutdown'" + st, out = CmdUtil.retryGetstatusoutput(shutdown_cmd) + if st != 0: + raise Exception(f"Cantiand shutdown failed:\n{out}") + self.logger.debug("Cantiand shutdown successfully.") @Dss.catch_err(exist_so=True) def initInstance(self): @@ -548,13 +573,25 @@ class DN_OLAP(Kernel): def getCantianDict(self) -> dict: """ - 写入 cantiand.ini 的 3 个字段 + 写入 cantiand.ini 的参数 """ - return { + meta_flag = "TRUE" + gid = os.getgid() + + user = UserUtil.getUserInfo()["name"] + cluster = dbClusterInfo() + cluster.initFromStaticConfig(user) + if getattr(cluster, "mysql_metadata_in_cantian", "on").lower() == "off": + meta_flag = "FALSE" + + cantian_dict = { "LSNR_ADDR": self.instInfo.listenIps[0], "LSNR_PORT": str(self.instInfo.port), "SS_INSTANCE_ID": self.calc_ss_instance_id(), + "MYSQL_METADATA_IN_CANTIAN": meta_flag, + "MYSQL_DEPLOY_GROUP_ID": str(gid), } + return cantian_dict def get_ss_inter_url(self) -> str: """ diff --git a/script/gspylib/component/Kernel/Kernel.py b/script/gspylib/component/Kernel/Kernel.py index 1878d42f..88443a5d 100644 --- a/script/gspylib/component/Kernel/Kernel.py +++ b/script/gspylib/component/Kernel/Kernel.py @@ -34,6 +34,8 @@ from base_utils.os.file_util import FileUtil from base_utils.security.security_checker import SecurityChecker from domain_utils.cluster_os.cluster_user import ClusterUser from base_utils.os.net_util import NetUtil +from base_utils.os.user_util import UserUtil +from gspylib.common.DbClusterInfo import dbClusterInfo MAX_PARA_NUMBER = 1000 TIMR_OUT = 60 @@ -69,37 +71,47 @@ class Kernel(BaseComponent): start/stop/query single instance """ - def start(self, is_dss_mode: bool = False, time_out=DefaultValue.TIMEOUT_CLUSTER_START): + def start(self, is_dss_mode: bool = False, + time_out: int = DefaultValue.TIMEOUT_INSTANCE_START): """ 启动 Cantian 实例 - • 普通模式:后台执行 `cantiand open -D ` - • DSS 模式目前跳过 + • 普通模式:后台执行 setsid cantiand open -D + • DSS 模式暂未实现 """ - # dss 需要修改 + # 临时适配,后面需要调整 + self.adjust_cm_bitmap() + # if is_dss_mode: - # self.logger.debug("DSS mode start skipped (not yet supported).") + # self.logger.debug("DSS mode start skipped.") # return data_dir = self.instInfo.datadir + ct_exec_dir = os.path.join(data_dir, "data") + + if not os.path.isdir(ct_exec_dir): + FileUtil.createDirectory(ct_exec_dir, DefaultValue.KEY_DIRECTORY_MODE) + + start_cmd = ( + f"cd {ct_exec_dir} && " + f"nohup sh -c 'cantiand open -D {data_dir} >/dev/null 2>&1 &' " + f">/dev/null 2>&1" + ) + + self.logger.debug(f"[Kernel] start cmd = {start_cmd}") + CmdUtil.retryGetstatusoutput(start_cmd, retry_time=0) + + if not Kernel.isDbOpen( + pgdata=data_dir, + logger=self.logger, + timeout=time_out, + interval=5): + raise Exception( + ErrorCode.GAUSS_516["GAUSS_51607"] + % "Cantian instance" + + " Error: timeout waiting for DB OPEN." + ) - cmd = f"nohup cantiand open -D {data_dir} > /dev/null 2>&1 &" - self.logger.debug(f"start cmd = {cmd}") - status, out = subprocess.getstatusoutput(cmd) - if status != 0: - raise Exception(ErrorCode.GAUSS_516["GAUSS_51607"] - % "Cantian instance" + "\nError: " + out) - - for _ in range(time_out): - st, _ = subprocess.getstatusoutput( - f"pgrep -f \"cantiand[^ ]* open -D {data_dir}\"") - if st == 0: - self.logger.log("Cantian instance start success.") - return - time.sleep(1) - - raise Exception(ErrorCode.GAUSS_516["GAUSS_51607"] - % "Cantian instance" - + " Error: timeout waiting for process ready.") + self.logger.log("Cantian instance start success.") def stop(self, stopMode: str = "", time_out: int = 300): """ @@ -154,53 +166,84 @@ class Kernel(BaseComponent): "Cantian instance" + "\nError: process still alive.") self.logger.log("Cantian instance stopped (force).") - def isDbOpen(self, timeout: int = 30, interval: int = 5) -> bool: + + def adjust_cm_bitmap(self) -> None: + """ + 双节点:把本节点 cm_config.ini 的 BITMAP_ONLINE 设为 3。 + """ + user = UserUtil.getUserInfo()["name"] + cluster = dbClusterInfo() + cluster.initFromStaticConfig(user) + + if len(cluster.dbNodes) != 2: + self.logger.debug("[adjust_bitmap_online] node_num!=2, skip.") + return + + cm_ini = os.getenv("CM_CONFIG_PATH", "").strip() + + if not cm_ini: + cydb_home = os.getenv("CTDB_HOME", "").strip() + if cydb_home: + cm_ini = os.path.join(cydb_home, "cm_config.ini") + + if not cm_ini: + local_ips = set(NetUtil.getAllIps()) + for dbNode in cluster.dbNodes: + if any(ip in local_ips for ip in dbNode.backIps): + cm_ini = os.path.join(dbNode.datanodes[0].datadir, + "cm_config.ini") + break + + if not cm_ini: + raise Exception("Cannot resolve local cm_config.ini path.") + + self.logger.debug(f"[adjust_bitmap_online] target cm_ini: {cm_ini}") + + self.updateConfig(cm_ini, {"BITMAP_ONLINE": "3"}) + self.logger.debug("BITMAP_ONLINE set to 3.") + + @staticmethod + def isDbOpen(pgdata: str, + logger, + timeout: int = DefaultValue.TIMEOUT_INSTANCE_START, + interval: int = 5) -> bool: """ Cantian 健康探针(视图版) 轮询 DV_DATABASE,若 STATUS 列为 OPEN 即返回 True。 """ - pgdata = self.instInfo.datadir sql_cmd = "SELECT NAME, STATUS, OPEN_STATUS FROM DV_DATABASE" ctsql = f"ctsql '/' as sysdba -q -D {pgdata} -c \"{sql_cmd}\"" + deadline = time.time() + timeout - expire_ts = time.time() + timeout - - while time.time() < expire_ts: + while time.time() < deadline: stat, out = CmdUtil.retryGetstatusoutput(ctsql, retry_time=0) if stat != 0: - self.logger.debug(f"ctsql failed: {out.strip()}") + logger.debug(f"[isDbOpen] ctsql failed: {out.strip()}") time.sleep(interval) continue - data_line = None - got_sep = False + data_line, sep_seen = None, False for ln in out.splitlines(): ln = ln.strip() if not ln or ln.startswith(("Please enter", "connected.", "SQL>")): continue - if re.match(r"^NAME\s+", ln, re.I): - continue if re.match(r"^-{5,}", ln): - got_sep = True + sep_seen = True continue - if got_sep: + if sep_seen: data_line = ln break - if not data_line: - time.sleep(interval) - continue - - cols = re.split(r"\s+", data_line) - db_status = cols[1].upper() if len(cols) >= 2 else "" - self.logger.debug(f"ctsql db status: {db_status}") - - if db_status == "OPEN": - return True + if data_line: + cols = re.split(r"\s+", data_line) + status = cols[1].upper() if len(cols) >= 2 else "" + logger.debug(f"[isDbOpen] STATUS = {status}") + if status == "OPEN": + return True time.sleep(interval) - self.logger.debug("Cantiand start timeout") + logger.debug("[isDbOpen] timeout waiting Cantian OPEN") return False def build(self, buidMode="full", standByBuildTimeout=300): @@ -315,68 +358,75 @@ class Kernel(BaseComponent): def cleanDir(self, instDir): """ - 删除实例目录(Cantian + openGauss + DSS),(若为挂载点先 umount 再清空) - * DSS *.ini 的挂载点由上层 DssConfig 统一解析处理 + 删除实例目录(umount + remove) """ if not os.path.exists(instDir): return - dataDir = os.listdir(instDir) + try: + mounts = [] + with open("/proc/self/mounts") as fp: + for line in fp: + mp = line.split()[1] + if mp == instDir or mp.startswith(instDir + "/"): + mounts.append(mp) + + mounts.sort(key=len, reverse=True) + + for mp in mounts: + FileUtil.umount_if_mounted(mp, self.logger) + except Exception as exc: + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50207"] % instDir + + f" Error during umount: {exc}" + ) if os.getuid() == 0: pglDir = os.path.join(instDir, "pg_location") - isPglDirEmpty = os.path.isdir(pglDir) and len(os.listdir(pglDir)) == 0 - if not dataDir or isPglDirEmpty: + isPglDirEmpty = os.path.isdir(pglDir) and not os.listdir(pglDir) + if not os.listdir(instDir) or isPglDirEmpty: FileUtil.cleanDirectoryContent(instDir) return - if "pg_location" in dataDir: + if "pg_location" in os.listdir(instDir): pglDir = os.path.join(instDir, "pg_location") - cmd = "{} | {} '{}' | {} '{{printf $3}}'".format( - CmdUtil.getMountCmd(), CmdUtil.getGrepCmd(), - pglDir, CmdUtil.getAwkCmd() - ) - status, mp_out = subprocess.getstatusoutput(cmd) - if status != 0: - raise Exception( - ErrorCode.GAUSS_502["GAUSS_50207"] % instDir + - f" Error:\n{mp_out}.\nThe cmd is {cmd}" - ) - - # 先卸载挂载点,再清空内容 - for mp in mp_out.split(): - if mp: - subprocess.call(f"umount -l {mp}", shell=True) - FileUtil.cleanDirectoryContent(mp) - FileUtil.cleanDirectoryContent(pglDir) - # ---------- 删除非白名单条目 ---------- ignores = [ "pg_location", "cfg", "log", "dss_inst.ini", "dss_vg_conf.ini", "nodedata.cfg", ".", ".." ] extra_cmd = "! -name".join([f" '{ig}' " for ig in ignores]) - cmd = ( f"if [ -d '{instDir}' ]; then cd '{instDir}' && " f"find . ! -name {extra_cmd} -print0 | " "xargs -r -0 -n100 rm -rf; fi" ) - status, output = subprocess.getstatusoutput(cmd) - if status != 0: + st, out = subprocess.getstatusoutput(cmd) + if st != 0: raise Exception( ErrorCode.GAUSS_502["GAUSS_50207"] % instDir + - f" Error:\n{output}.\nThe cmd is {cmd}" + f" Error:\n{out}.\nThe cmd is {cmd}" ) if os.listdir(instDir): FileUtil.cleanDirectoryContent(instDir) if os.listdir(instDir): raise Exception( - ErrorCode.GAUSS_502["GAUSS_50207"] % - instDir + " still not empty after cleanDir()." + ErrorCode.GAUSS_502["GAUSS_50207"] % instDir + + " still not empty after cleanDir()." + ) + + mysql_dir = os.path.abspath(os.path.join(instDir, "..", "mysql")) + self.logger.debug(f"[Cantian] cleanDir: {mysql_dir} is not empty.") + if os.path.isdir(mysql_dir): + try: + FileUtil.cleanDirectoryContent(mysql_dir) + except Exception as exc: + raise Exception( + ErrorCode.GAUSS_502["GAUSS_50207"] % mysql_dir + + f" Error while cleaning mysql dir: {exc}" ) self.logger.debug(f"[Cantian] cleanDir finished: {instDir} is empty.") diff --git a/script/gspylib/component/MySql/mysql_comp.py b/script/gspylib/component/MySql/mysql_comp.py new file mode 100644 index 00000000..d490f215 --- /dev/null +++ b/script/gspylib/component/MySql/mysql_comp.py @@ -0,0 +1,217 @@ +# -*- coding:utf-8 -*- +import os +import time +import re +from pathlib import Path + +from base_utils.os.env_util import EnvUtil +from base_utils.os.cmd_util import CmdUtil +from base_utils.os.file_util import FileUtil +from gspylib.component.BaseComponent import BaseComponent +from gspylib.component.Kernel.Kernel import Kernel +from gspylib.common.ErrorCode import ErrorCode +from gspylib.common.DbClusterInfo import dbClusterInfo +from domain_utils.cluster_os.cluster_user import UserUtil +from gspylib.common.Common import DefaultValue + + +class Mysql(BaseComponent): + @staticmethod + def _gauss_home() -> Path: + gh = EnvUtil.getEnv("GAUSSHOME") + if not gh: + raise EnvironmentError("GAUSSHOME is not set.") + return Path(gh).resolve() + + @staticmethod + def _mysql_home() -> Path: + mh = EnvUtil.getEnv("MYSQL_HOME") + if not mh or not Path(mh).is_dir(): + raise EnvironmentError("MYSQL_HOME is invalid.") + return Path(mh).resolve() + + @staticmethod + def _is_primary_node() -> bool: + user = UserUtil.getUserInfo()["name"] + cluster = dbClusterInfo() + cluster.initFromStaticConfig(user) + if not cluster.dbNodes: + return True + first_node = cluster.dbNodes[0] + local_host = os.uname().nodename + return local_host in first_node.backIps or local_host == first_node.name + + @staticmethod + def _cantian_datadir() -> str: + user = UserUtil.getUserInfo()["name"] + cluster = dbClusterInfo() + cluster.initFromStaticConfig(user) + if not cluster.dbNodes or not cluster.dbNodes[0].datanodes: + raise Exception("cluster_static.conf missing dbNode or datanode.") + return cluster.dbNodes[0].datanodes[0].datadir + + def initInstance(self): + enable_flag = os.getenv("ENABLE_MYSQL", "").strip().lower() + if enable_flag not in ("on", "true", "yes", "1"): + self.logger.debug("[MySQL] ENABLE_MYSQL disabled, skip start()") + return + + if not self._is_primary_node(): + self.logger.log("[MySQL] Not primary node, skip initInstance.") + return + + try: + gauss_home = self._gauss_home() + mysql_home = self._mysql_home() + data_dir = mysql_home / "data" + plugin_dir = gauss_home / "lib" + cnf_file = gauss_home / "share" / "mysql" / "my.cnf" + bin_mysqld = mysql_home / "bin" / "mysqld" + + ct_data_dir = self._cantian_datadir() + ct_exec_dir = Path(ct_data_dir) / "data" + if not ct_exec_dir.exists(): + FileUtil.createDirectory(str(ct_exec_dir), DefaultValue.KEY_DIRECTORY_MODE) + + ct_cmd = ( + f"cd {ct_exec_dir} && " + f"nohup sh -c 'cantiand open -D {ct_data_dir} >/dev/null 2>&1 &' " + f">/dev/null 2>&1" + ) + self.logger.debug(f"[MySQL] ct_cmd: {ct_cmd}") + CmdUtil.retryGetstatusoutput(ct_cmd, retry_time=0) + + if not Kernel.isDbOpen( + pgdata=ct_data_dir, + logger=self.logger, + timeout=DefaultValue.TIMEOUT_INSTANCE_START, + interval=5): + raise RuntimeError("Cantian open timeout (DV_DATABASE not OPEN)") + + if data_dir.exists(): + FileUtil.removeDirectory(str(data_dir)) + FileUtil.createDirectory(str(data_dir), DefaultValue.KEY_DIRECTORY_MODE) + + # 3. 初始化 MySQL + init_cmd = ( + f"{bin_mysqld} " + f"--defaults-file={cnf_file} --initialize-insecure " + f"--datadir={data_dir} " + f"--plugin-dir={plugin_dir} " + f'--early-plugin-load="ha_ctc.so"' + ) + self.logger.debug(f"[MySQL] init_cmd: {init_cmd}") + ret, _ = CmdUtil.retryGetstatusoutput(init_cmd, retry_time=0) + if ret != 0: + raise RuntimeError(f"mysqld initialize failed, exit code {ret}") + + # 4. 关闭 Cantian + self._exec(f"ctsql '/' as sysdba -q -D {ct_data_dir} -c 'shutdown'", + ignore_error=True) + + except Exception as e: + raise Exception( + ErrorCode.GAUSS_516["GAUSS_51610"] % + f"MySQL.initInstance failed: {e}" + ) + + def _wait_mysql_ready(self, log_file: Path, timeout_sec: int) -> bool: + """ + 轮询日志直到出现 “ready for connections” + """ + check_cmd = f"grep -q 'ready for connections' {log_file}" + retry = max(1, int(timeout_sec) // 3) + rc, _ = CmdUtil.retryGetstatusoutput( + check_cmd, + retry_time=retry, + sleep_time=3 + ) + return rc == 0 + + def start(self, timeout: int = DefaultValue.TIMEOUT_INSTANCE_START): + """ + 启动 MySQL + """ + enable_flag = os.getenv("ENABLE_MYSQL", "").strip().lower() + if enable_flag not in ("on", "true", "yes", "1"): + self.logger.debug("[MySQL] ENABLE_MYSQL disabled, skip start()") + return + + mysql_home = self._mysql_home() + gauss_home = self._gauss_home() + + plugin_dir = gauss_home / "lib" + cnf_file = gauss_home / "share" / "mysql" / "my.cnf" + bin_safe = mysql_home / "bin" / "mysqld" + data_dir = mysql_home / "data" + + if not data_dir.exists(): + FileUtil.createDirectory(str(data_dir), DefaultValue.KEY_DIRECTORY_MODE) + self.logger.debug(f"[MySQL] auto-create data_dir: {data_dir}") + + gausslog_dir = os.getenv("GAUSSLOG", str(mysql_home / "log")) + Path(gausslog_dir).mkdir(parents=True, exist_ok=True) + log_file = Path(gausslog_dir) / "mysql.log" + + start_cmd = ( + f"nohup {bin_safe} " + f"--defaults-file={cnf_file} " + f"--plugin-dir={plugin_dir} " + f"--datadir={data_dir} " + f'--early-plugin-load="ha_ctc.so" ' + f"> {log_file} 2>&1 ha_ctc_share.so + off -> ha_ctc_noshare.so + """ + connector_dir = os.path.join(tmp_dir, "connector") + + dest_lib_dir = os.path.join(self.installPath, "lib") + FileUtil.createDirectory(dest_lib_dir, True, DefaultValue.KEY_DIRECTORY_MODE) + + want_share = getattr(self.clusterInfo, + "mysql_metadata_in_cantian", + "on").lower() == "on" + + so_share, so_noshare = "ha_ctc_share.so", "ha_ctc_noshare.so" + proxy_so = "libctc_proxy.so" + selected_so = so_share if want_share else so_noshare + + required_libs = [proxy_so, selected_so] + missing = [lib for lib in required_libs + if not os.path.isfile(os.path.join(connector_dir, lib))] + if missing: + raise Exception("Missing library file(s) in connector package: " + ", ".join(missing)) + + for lib in required_libs: + src = os.path.join(connector_dir, lib) + dst = os.path.join(dest_lib_dir, lib) + CmdUtil.execCmd(f"cp -pf '{src}' '{dst}'") + + link_path = os.path.join(dest_lib_dir, "ha_ctc.so") + CmdUtil.execCmd(f"ln -sfn '{selected_so}' '{link_path}'") + + src_mysql_share = os.path.join(tmp_dir, "share", "mysql") + if os.path.isdir(src_mysql_share): + dest_share_root = os.path.join(self.installPath, "share") + dest_mysql_dir = os.path.join(dest_share_root, "mysql") + + FileUtil.createDirectory(dest_share_root, True, + DefaultValue.KEY_DIRECTORY_MODE) + + if os.path.isdir(dest_mysql_dir): + FileUtil.removeDirectory(dest_mysql_dir) + + CmdUtil.execCmd(f"cp -r '{src_mysql_share}' '{dest_share_root}'") + + FileUtil.changeOwner(self.user, dest_mysql_dir, True) + self.logger.log(f"Copied connector share/mysql to {dest_mysql_dir}") + else: + self.logger.debug("No share/mysql directory found in connector package; skip.") + + FileUtil.changeOwner(self.user, dest_lib_dir, True) + + self.logger.log(f"MySQL connector libs deployed to {dest_lib_dir} " + f"(copied: {', '.join(required_libs)}, symlink ha_ctc.so -> {selected_so})") + + def decompress_mysql_server(self): + """ + Decompress Mysql_server*x86_64*.tgz / Mysql_server*aarch64*.tgz + """ + if getattr(self.clusterInfo, "enable_mysql", "off").lower() != "on": + self.logger.debug("enable_mysql != on, skip Mysql_server package.") + return + + server_pkg = os.path.basename(PackageInfo.getPackageFile(CommConstants.PKG_SERVER)) + if "x86_64" in server_pkg: + arch_tag = "x86_64" + elif "aarch64" in server_pkg: + arch_tag = "aarch64" + else: + raise Exception(f"Unsupported CPU arch in server package: {server_pkg}") + + gphome = EnvUtil.getEnvironmentParameterValue("GPHOME", self.user) + mysql_pkg = "" + for fname in os.listdir(gphome): + if fname.startswith("Mysql_server") and fname.endswith(".tgz") and arch_tag in fname: + mysql_pkg = os.path.join(gphome, fname) + break + if not mysql_pkg: + raise Exception(f"Mysql_server package ({arch_tag}) not found in {gphome}") + self.logger.debug(f"[Mysql_server] path = {mysql_pkg}") + + install_root = os.path.dirname(self.installPath) + tmp_dir = os.path.join(install_root, "tmp", "mysql_server") + FileUtil.createDirectory(tmp_dir, True, DefaultValue.KEY_DIRECTORY_MODE) + + tar_cmd = f"tar -zxf '{mysql_pkg}' -C '{tmp_dir}'" + self.logger.log(f"Decompressing Mysql_server: {tar_cmd}") + status, output = subprocess.getstatusoutput(tar_cmd) + if status != 0: + raise Exception(f"Failed to decompress Mysql_server.\n{output}") + + src_mysql_dir = os.path.join(tmp_dir, "Mysql_server", "mysql") + if not os.path.isdir(src_mysql_dir): + raise Exception(f"Mysql_server package format error: missing {src_mysql_dir}") + + dest_mysql_dir = os.path.join(install_root, "mysql") + if os.path.isdir(dest_mysql_dir): + FileUtil.removeDirectory(dest_mysql_dir) + + FileUtil.cpFile(src_mysql_dir, dest_mysql_dir) + FileUtil.changeOwner(self.user, dest_mysql_dir, True) + + FileUtil.removeDirectory(tmp_dir) + self.logger.log(f"Deployed Mysql_server to {dest_mysql_dir}") + def generate_dss_path(self): """ Generate dss path @@ -448,7 +598,6 @@ class Install(LocalBaseOM): FileUtil.createDirectory(dss_app_path, True, DefaultValue.KEY_DIRECTORY_MODE) self.logger.debug("Create dss app path successfully.") - def generate_install_path(self): """ Generate install path @@ -509,6 +658,10 @@ class Install(LocalBaseOM): # decompress CM package self.decompress_cm_package() + # decompress connector package + self.decompress_mysql_connector() + # decompress Mysql_server package + self.decompress_mysql_server() # change owner for tar file. FileUtil.changeOwner(self.user, self.installPath, True) @@ -764,6 +917,9 @@ class Install(LocalBaseOM): for dn in self.dnCons: dn.start(self.time_out) + for my in self.mysqlCons: + my.start(self.time_out) + def buildStandby(self): """ function: build standby diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py index 34211956..14f18799 100644 --- a/script/local/PreInstallUtility.py +++ b/script/local/PreInstallUtility.py @@ -1730,6 +1730,13 @@ Common options: "^\\s*export\\s*PYTHONPATH=\\$GPHOME\\/lib") self.logger.debug( "Deleting crash PYTHONPATH in user environment variables.") + # 清理 MySQL & SHARE_FS + FileUtil.deleteLine(userProfile, r"^\s*export\s*ENABLE_MYSQL=.*$") + FileUtil.deleteLine(userProfile, r"^\s*export\s*MYSQL_METADATA_IN_CANTIAN=.*$") + FileUtil.deleteLine(userProfile, r"^\s*export\s*SHARE_FS=.*$") + FileUtil.deleteLine(userProfile, r"^\s*export\s*MYSQL_HOME=.*$") + FileUtil.deleteLine(userProfile, r"^\s*export\s*PATH=.*\$MYSQL_HOME[^ ]*bin.*$") + def setToolEnv(self): """ @@ -1771,7 +1778,7 @@ Common options: datadir = node_info.datanodes[0].datadir FileUtil.writeFile(userProfile, ["export PGDATA=%s" % datadir]) - + # set COREPATH FileUtil.writeFile(userProfile, ["export COREPATH=%s" % self.cluster_core_path]) # set PGDATABASE @@ -1803,7 +1810,41 @@ Common options: # set PYTHONPATH FileUtil.writeFile(userProfile, ["export PYTHONPATH=$GPHOME/lib"]) - + share_fs_env = ClusterConfigFile.getOneClusterConfigItem( + "share_fs", self.clusterConfig).strip() + enable_mysql_flag = ClusterConfigFile.getOneClusterConfigItem( + "enable_mysql", self.clusterConfig).strip().lower() or "off" + mysql_meta_flag = ClusterConfigFile.getOneClusterConfigItem( + "mysql_metadata_in_cantian", self.clusterConfig).strip().lower() or "on" + + if share_fs_env: + for prof in (userProfile, self.user_env_file): + FileUtil.deleteLine(prof, r"^\s*export\s*SHARE_FS=.*$") + FileUtil.writeFile(prof, [f"export SHARE_FS={share_fs_env}"]) + self.logger.debug(f"Successfully flush 'export SHARE_FS={share_fs_env}'") + else: + self.logger.debug("share_fs is default or empty; skip 'export SHARE_FS'") + + if enable_mysql_flag == "on": + mysql_home = os.path.join(os.path.dirname(self.clusterToolPath), "mysql") + for prof in (userProfile, self.user_env_file): + FileUtil.deleteLine(prof, r"^\s*export\s*ENABLE_MYSQL=.*$") + FileUtil.deleteLine(prof, r"^\s*export\s*MYSQL_METADATA_IN_CANTIAN=.*$") + FileUtil.deleteLine(prof, r"^\s*export\s*MYSQL_HOME=.*$") + FileUtil.deleteLine(prof, r"^\s*export\s*PATH=.*\$MYSQL_HOME[^ ]*bin.*$") + + FileUtil.writeFile(prof, [ + "export ENABLE_MYSQL=on", + f"export MYSQL_METADATA_IN_CANTIAN={mysql_meta_flag}", + f"export MYSQL_HOME={mysql_home}", + r"export PATH=$MYSQL_HOME/bin:$PATH" + ]) + self.logger.debug( + f"Flush ENABLE_MYSQL=on MYSQL_METADATA_IN_CANTIAN={mysql_meta_flag} " + f"MYSQL_HOME={mysql_home}" + ) + else: + self.logger.debug("[MySQL] enable_mysql=off; skip MySQL env export") except Exception as e: self.logger.logExit(str(e)) self.logger.debug("Successfully set tool ENV.") diff --git a/script/local/StartInstance.py b/script/local/StartInstance.py index ebbae1ca..50474d5b 100644 --- a/script/local/StartInstance.py +++ b/script/local/StartInstance.py @@ -172,6 +172,9 @@ General options: if not isDataDirCorrect: raise Exception(ErrorCode.GAUSS_536["GAUSS_53610"] % self.dataDir) + for my in self.mysqlCons: + my.start(self.time_out) + def main(): """ -- Gitee From 15cd36edb43798282a4c34cf0a1f48c00ac3c942 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Tue, 24 Jun 2025 17:44:23 +0800 Subject: [PATCH 006/144] cantian2.0-mysql-2 --- script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index 249675d0..9162bf18 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -298,13 +298,6 @@ class DN_OLAP(Kernel): gauss_home = EnvUtil.getEnv("GAUSSHOME") pgdata = self.instInfo.datadir - # -------- Admin 目录 -------------------------------------- - src_admin = os.path.join(gauss_home, "admin") - dst_admin = os.path.join(pgdata, "admin") - FileUtil.cpFile(src_admin, dst_admin, cmd_type="shell") - FileUtil.changeMode(DefaultValue.KEY_DIRECTORY_MODE, - dst_admin, recursive=True) - # -------- Cantian 配置 ----------------------------------------- cantian_ini_tpl = os.path.join(gauss_home, "share", "cantian", "cantiand.ini.sample") -- Gitee From ff171c3e98922d1a0f5023735cf69a85cb2417f5 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 26 Jun 2025 14:51:29 +0800 Subject: [PATCH 007/144] cantian2.0-mysql-2 --- script/gspylib/component/MySql/mysql_comp.py | 27 +++----------------- script/local/InitInstance.py | 6 ----- 2 files changed, 3 insertions(+), 30 deletions(-) diff --git a/script/gspylib/component/MySql/mysql_comp.py b/script/gspylib/component/MySql/mysql_comp.py index d490f215..4f721a05 100644 --- a/script/gspylib/component/MySql/mysql_comp.py +++ b/script/gspylib/component/MySql/mysql_comp.py @@ -68,26 +68,6 @@ class Mysql(BaseComponent): cnf_file = gauss_home / "share" / "mysql" / "my.cnf" bin_mysqld = mysql_home / "bin" / "mysqld" - ct_data_dir = self._cantian_datadir() - ct_exec_dir = Path(ct_data_dir) / "data" - if not ct_exec_dir.exists(): - FileUtil.createDirectory(str(ct_exec_dir), DefaultValue.KEY_DIRECTORY_MODE) - - ct_cmd = ( - f"cd {ct_exec_dir} && " - f"nohup sh -c 'cantiand open -D {ct_data_dir} >/dev/null 2>&1 &' " - f">/dev/null 2>&1" - ) - self.logger.debug(f"[MySQL] ct_cmd: {ct_cmd}") - CmdUtil.retryGetstatusoutput(ct_cmd, retry_time=0) - - if not Kernel.isDbOpen( - pgdata=ct_data_dir, - logger=self.logger, - timeout=DefaultValue.TIMEOUT_INSTANCE_START, - interval=5): - raise RuntimeError("Cantian open timeout (DV_DATABASE not OPEN)") - if data_dir.exists(): FileUtil.removeDirectory(str(data_dir)) FileUtil.createDirectory(str(data_dir), DefaultValue.KEY_DIRECTORY_MODE) @@ -105,10 +85,6 @@ class Mysql(BaseComponent): if ret != 0: raise RuntimeError(f"mysqld initialize failed, exit code {ret}") - # 4. 关闭 Cantian - self._exec(f"ctsql '/' as sysdba -q -D {ct_data_dir} -c 'shutdown'", - ignore_error=True) - except Exception as e: raise Exception( ErrorCode.GAUSS_516["GAUSS_51610"] % @@ -137,6 +113,9 @@ class Mysql(BaseComponent): self.logger.debug("[MySQL] ENABLE_MYSQL disabled, skip start()") return + # 拉起mysql前先主节点初始化mysql + self.initInstance() + mysql_home = self._mysql_home() gauss_home = self._gauss_home() diff --git a/script/local/InitInstance.py b/script/local/InitInstance.py index f92adbbd..50b2aa73 100644 --- a/script/local/InitInstance.py +++ b/script/local/InitInstance.py @@ -226,10 +226,8 @@ class initDbNode(LocalBaseOM): if not vc_mode: components = (self.etcdCons + self.cmCons + self.gtmCons + self.cnCons + self.dnCons) - mysql_components = self.mysqlCons else: components = self.dnCons - mysql_components = [] try: # config instance in parallel if self.dss_cons: @@ -237,10 +235,6 @@ class initDbNode(LocalBaseOM): parallelTool.parallelExecute(self.initInstance, components) - if mysql_components: - self.logger.log("Initializing MySQL component ...") - parallelTool.parallelExecute(self.initInstance, - mysql_components) except Exception as e: self.logger.logExit(str(e)) -- Gitee From 7e0dba1e8d44b99f8b95bd0077ab1044e9a2931b Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 26 Jun 2025 16:35:08 +0800 Subject: [PATCH 008/144] cantian2.0-mysql-2 --- script/gspylib/component/MySql/mysql_comp.py | 5 ++++- script/local/InitInstance.py | 6 ++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/script/gspylib/component/MySql/mysql_comp.py b/script/gspylib/component/MySql/mysql_comp.py index 4f721a05..8f35c6b6 100644 --- a/script/gspylib/component/MySql/mysql_comp.py +++ b/script/gspylib/component/MySql/mysql_comp.py @@ -51,6 +51,9 @@ class Mysql(BaseComponent): return cluster.dbNodes[0].datanodes[0].datadir def initInstance(self): + pass + + def start_initInstance(self): enable_flag = os.getenv("ENABLE_MYSQL", "").strip().lower() if enable_flag not in ("on", "true", "yes", "1"): self.logger.debug("[MySQL] ENABLE_MYSQL disabled, skip start()") @@ -114,7 +117,7 @@ class Mysql(BaseComponent): return # 拉起mysql前先主节点初始化mysql - self.initInstance() + self.start_initInstance() mysql_home = self._mysql_home() gauss_home = self._gauss_home() diff --git a/script/local/InitInstance.py b/script/local/InitInstance.py index 50b2aa73..f92adbbd 100644 --- a/script/local/InitInstance.py +++ b/script/local/InitInstance.py @@ -226,8 +226,10 @@ class initDbNode(LocalBaseOM): if not vc_mode: components = (self.etcdCons + self.cmCons + self.gtmCons + self.cnCons + self.dnCons) + mysql_components = self.mysqlCons else: components = self.dnCons + mysql_components = [] try: # config instance in parallel if self.dss_cons: @@ -235,6 +237,10 @@ class initDbNode(LocalBaseOM): parallelTool.parallelExecute(self.initInstance, components) + if mysql_components: + self.logger.log("Initializing MySQL component ...") + parallelTool.parallelExecute(self.initInstance, + mysql_components) except Exception as e: self.logger.logExit(str(e)) -- Gitee From adac294d8bb9732f794e672fee53043499fc5e15 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 28 Jun 2025 16:31:53 +0800 Subject: [PATCH 009/144] cantian2.0-mysql-2 --- script/gspylib/component/MySql/mysql_comp.py | 30 ++++++++++++++++---- 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/script/gspylib/component/MySql/mysql_comp.py b/script/gspylib/component/MySql/mysql_comp.py index 8f35c6b6..d490f215 100644 --- a/script/gspylib/component/MySql/mysql_comp.py +++ b/script/gspylib/component/MySql/mysql_comp.py @@ -51,9 +51,6 @@ class Mysql(BaseComponent): return cluster.dbNodes[0].datanodes[0].datadir def initInstance(self): - pass - - def start_initInstance(self): enable_flag = os.getenv("ENABLE_MYSQL", "").strip().lower() if enable_flag not in ("on", "true", "yes", "1"): self.logger.debug("[MySQL] ENABLE_MYSQL disabled, skip start()") @@ -71,6 +68,26 @@ class Mysql(BaseComponent): cnf_file = gauss_home / "share" / "mysql" / "my.cnf" bin_mysqld = mysql_home / "bin" / "mysqld" + ct_data_dir = self._cantian_datadir() + ct_exec_dir = Path(ct_data_dir) / "data" + if not ct_exec_dir.exists(): + FileUtil.createDirectory(str(ct_exec_dir), DefaultValue.KEY_DIRECTORY_MODE) + + ct_cmd = ( + f"cd {ct_exec_dir} && " + f"nohup sh -c 'cantiand open -D {ct_data_dir} >/dev/null 2>&1 &' " + f">/dev/null 2>&1" + ) + self.logger.debug(f"[MySQL] ct_cmd: {ct_cmd}") + CmdUtil.retryGetstatusoutput(ct_cmd, retry_time=0) + + if not Kernel.isDbOpen( + pgdata=ct_data_dir, + logger=self.logger, + timeout=DefaultValue.TIMEOUT_INSTANCE_START, + interval=5): + raise RuntimeError("Cantian open timeout (DV_DATABASE not OPEN)") + if data_dir.exists(): FileUtil.removeDirectory(str(data_dir)) FileUtil.createDirectory(str(data_dir), DefaultValue.KEY_DIRECTORY_MODE) @@ -88,6 +105,10 @@ class Mysql(BaseComponent): if ret != 0: raise RuntimeError(f"mysqld initialize failed, exit code {ret}") + # 4. 关闭 Cantian + self._exec(f"ctsql '/' as sysdba -q -D {ct_data_dir} -c 'shutdown'", + ignore_error=True) + except Exception as e: raise Exception( ErrorCode.GAUSS_516["GAUSS_51610"] % @@ -116,9 +137,6 @@ class Mysql(BaseComponent): self.logger.debug("[MySQL] ENABLE_MYSQL disabled, skip start()") return - # 拉起mysql前先主节点初始化mysql - self.start_initInstance() - mysql_home = self._mysql_home() gauss_home = self._gauss_home() -- Gitee From 49a243b84c626b77f8230a8760e5ad55c326c5ed Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 28 Jun 2025 16:40:01 +0800 Subject: [PATCH 010/144] cantian2.0-mysql-2 --- script/gspylib/component/MySql/mysql_comp.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/script/gspylib/component/MySql/mysql_comp.py b/script/gspylib/component/MySql/mysql_comp.py index d490f215..1b16bd3c 100644 --- a/script/gspylib/component/MySql/mysql_comp.py +++ b/script/gspylib/component/MySql/mysql_comp.py @@ -67,6 +67,9 @@ class Mysql(BaseComponent): plugin_dir = gauss_home / "lib" cnf_file = gauss_home / "share" / "mysql" / "my.cnf" bin_mysqld = mysql_home / "bin" / "mysqld" + gausslog_dir = os.getenv("GAUSSLOG", str(mysql_home / "log")) + Path(gausslog_dir).mkdir(parents=True, exist_ok=True) + log_file = Path(gausslog_dir) / "mysql.log" ct_data_dir = self._cantian_datadir() ct_exec_dir = Path(ct_data_dir) / "data" @@ -99,6 +102,8 @@ class Mysql(BaseComponent): f"--datadir={data_dir} " f"--plugin-dir={plugin_dir} " f'--early-plugin-load="ha_ctc.so"' + f"--core-file " + f"--log-error={log_file}" ) self.logger.debug(f"[MySQL] init_cmd: {init_cmd}") ret, _ = CmdUtil.retryGetstatusoutput(init_cmd, retry_time=0) -- Gitee From 49efef662408d589a0a699c15c427a60aa1280de Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 28 Jun 2025 17:14:38 +0800 Subject: [PATCH 011/144] cantian2.0-mysql-2 --- script/gspylib/component/MySql/mysql_comp.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/script/gspylib/component/MySql/mysql_comp.py b/script/gspylib/component/MySql/mysql_comp.py index 1b16bd3c..893dc1b1 100644 --- a/script/gspylib/component/MySql/mysql_comp.py +++ b/script/gspylib/component/MySql/mysql_comp.py @@ -67,9 +67,10 @@ class Mysql(BaseComponent): plugin_dir = gauss_home / "lib" cnf_file = gauss_home / "share" / "mysql" / "my.cnf" bin_mysqld = mysql_home / "bin" / "mysqld" - gausslog_dir = os.getenv("GAUSSLOG", str(mysql_home / "log")) - Path(gausslog_dir).mkdir(parents=True, exist_ok=True) - log_file = Path(gausslog_dir) / "mysql.log" + gausslog_root = Path(os.getenv("GAUSSLOG", mysql_home / "log")) + gausslog_dir = gausslog_root / "my_log" + gausslog_dir.mkdir(parents=True, exist_ok=True) + log_file = gausslog_dir / "mysql.log" ct_data_dir = self._cantian_datadir() ct_exec_dir = Path(ct_data_dir) / "data" @@ -154,9 +155,10 @@ class Mysql(BaseComponent): FileUtil.createDirectory(str(data_dir), DefaultValue.KEY_DIRECTORY_MODE) self.logger.debug(f"[MySQL] auto-create data_dir: {data_dir}") - gausslog_dir = os.getenv("GAUSSLOG", str(mysql_home / "log")) - Path(gausslog_dir).mkdir(parents=True, exist_ok=True) - log_file = Path(gausslog_dir) / "mysql.log" + gausslog_root = Path(os.getenv("GAUSSLOG", mysql_home / "log")) + gausslog_dir = gausslog_root / "my_log" + gausslog_dir.mkdir(parents=True, exist_ok=True) + log_file = gausslog_dir / "mysql.log" start_cmd = ( f"nohup {bin_safe} " -- Gitee From 2611cb76d1c53a7ff7702820ed2252c7445b345e Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 28 Jun 2025 18:15:49 +0800 Subject: [PATCH 012/144] cantian2.0-mysql-2 --- script/gspylib/common/DbClusterInfo.py | 2 +- script/impl/om/OLAP/OmImplOLAP.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/script/gspylib/common/DbClusterInfo.py b/script/gspylib/common/DbClusterInfo.py index 9d310a5f..73f6cbdd 100644 --- a/script/gspylib/common/DbClusterInfo.py +++ b/script/gspylib/common/DbClusterInfo.py @@ -1345,7 +1345,7 @@ class dbClusterInfo(): # ctsql 视图查询 + grep 判断 OPEN sql_cmd = "SELECT NAME, STATUS, OPEN_STATUS FROM DV_DATABASE" ctsql_cmd = f"ctsql '/' as sysdba -q -D {data_dir} -c \"{sql_cmd}\"" - probe = f"{ctsql_cmd} | grep -q 'OPEN'" + probe = f"{ctsql_cmd} | grep -v 'OPEN_STATUS' | grep -v '^SQL>' | grep -qw 'OPEN'" echo_ok = ( "echo -e 'instance_state : Normal\\n" diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index 980853d2..ef2f8e84 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -285,8 +285,8 @@ class OmImplOLAP(OmImpl): for inst in dbNode.datanodes: sql = ("SELECT COUNT(*) FROM DV_DATABASE " "WHERE STATUS='OPEN' AND OPEN_STATUS='READ WRITE'") - ctsql = (f"ctsql '/' as sysdba -q -D {inst.datadir} " - f"-c \"{sql}\" | grep -q '^ *1' ") + ctsql = (f"ctsql '/' as sysdba -q -D {inst.datadir} -c \"{sql}\" " + "| grep -v 'OPEN_STATUS' | grep -v '^SQL>' | grep -qw 'OPEN'") status = subprocess.call(ctsql, shell=True) if status != 0: not_open.append(f"{dbNode.name}:{inst.datadir}") -- Gitee From 8f179a390eb659e490cc8bbdb6155ff64904f859 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 30 Jun 2025 15:31:01 +0800 Subject: [PATCH 013/144] cantian2.0-mysql-2 --- script/impl/om/OLAP/OmImplOLAP.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index ef2f8e84..3065a1db 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -392,7 +392,7 @@ class OmImplOLAP(OmImpl): ">/dev/null 2>&1 || true" ) force_kill = ( - f"$({probe}) | xargs -r kill -9 >/dev/null 2>&1 || true" + f"({probe}) | xargs -r kill -9 >/dev/null 2>&1 || true" ) pre_check = f"pids=$({probe}); [ -z \"$pids\" ] && exit 0" -- Gitee From 72647f4e8618d8c020cad7d40a54472724ec2577 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 30 Jun 2025 16:36:22 +0800 Subject: [PATCH 014/144] cantian2.0-mysql-2 --- script/impl/om/OLAP/OmImplOLAP.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index 3065a1db..3613a7da 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -386,7 +386,7 @@ class OmImplOLAP(OmImpl): Quietly stop one Cantian instance. """ pat = rf"cantiand.*open.*-D {re.escape(data_dir)}" - probe = f"pgrep -f '{pat}' | tr '\\n' ' '" + probe = f"(pgrep -f '{pat}' || true) | tr '\\n' ' '" graceful = ( f"ctsql '/' as sysdba -q -D {data_dir} -c 'shutdown' " ">/dev/null 2>&1 || true" @@ -395,16 +395,17 @@ class OmImplOLAP(OmImpl): f"({probe}) | xargs -r kill -9 >/dev/null 2>&1 || true" ) + init_pids = "pids=''" pre_check = f"pids=$({probe}); [ -z \"$pids\" ] && exit 0" try_grace = f"{graceful}" second_ck = f"pids=$({probe})" - kill_rest = f"[ -n \"$pids\" ] && {force_kill}" + kill_rest = "[ -n \"${pids:-}\" ] && " + force_kill finish = "exit 0" stop_cmd = ( "{ " + f"{init_pids}; " f"{pre_check}; " - f"{try_grace}; " f"{second_ck}; " f"{kill_rest}; " f"{finish}; " -- Gitee From fbc4890f31d9b876bbafe620efe6e6b93154723c Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 30 Jun 2025 16:45:16 +0800 Subject: [PATCH 015/144] cantian2.0-mysql-2 --- script/impl/om/OLAP/OmImplOLAP.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index 3613a7da..e99a99cc 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -386,7 +386,7 @@ class OmImplOLAP(OmImpl): Quietly stop one Cantian instance. """ pat = rf"cantiand.*open.*-D {re.escape(data_dir)}" - probe = f"(pgrep -f '{pat}' || true) | tr '\\n' ' '" + probe = f"pgrep -f '{pat}' | tr '\\n' ' '" graceful = ( f"ctsql '/' as sysdba -q -D {data_dir} -c 'shutdown' " ">/dev/null 2>&1 || true" @@ -395,17 +395,17 @@ class OmImplOLAP(OmImpl): f"({probe}) | xargs -r kill -9 >/dev/null 2>&1 || true" ) - init_pids = "pids=''" pre_check = f"pids=$({probe}); [ -z \"$pids\" ] && exit 0" try_grace = f"{graceful}" second_ck = f"pids=$({probe})" - kill_rest = "[ -n \"${pids:-}\" ] && " + force_kill + force_kill = "for pid in $pids; do kill -9 \"$pid\" >/dev/null 2>&1 || true; done" + kill_rest = f"[ -n \"${{pids// /}}\" ] && {{ {force_kill}; }}" finish = "exit 0" stop_cmd = ( "{ " - f"{init_pids}; " f"{pre_check}; " + f"{try_grace}; " f"{second_ck}; " f"{kill_rest}; " f"{finish}; " -- Gitee From 7fab02f768c7a187fc57074baa3a26115800c0d3 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 30 Jun 2025 22:00:08 +0800 Subject: [PATCH 016/144] cantian2.0-mysql-2 --- script/gspylib/component/Kernel/Kernel.py | 4 ++-- script/gspylib/component/MySql/mysql_comp.py | 4 ++-- script/impl/om/OLAP/OmImplOLAP.py | 6 +++++- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/script/gspylib/component/Kernel/Kernel.py b/script/gspylib/component/Kernel/Kernel.py index 88443a5d..03bd5d4a 100644 --- a/script/gspylib/component/Kernel/Kernel.py +++ b/script/gspylib/component/Kernel/Kernel.py @@ -93,8 +93,8 @@ class Kernel(BaseComponent): start_cmd = ( f"cd {ct_exec_dir} && " - f"nohup sh -c 'cantiand open -D {data_dir} >/dev/null 2>&1 &' " - f">/dev/null 2>&1" + f"nohup sh -c '${{GAUSSHOME}}/bin/cantiand open -D {data_dir} " + f">/dev/null 2>&1 &' >/dev/null 2>&1" ) self.logger.debug(f"[Kernel] start cmd = {start_cmd}") diff --git a/script/gspylib/component/MySql/mysql_comp.py b/script/gspylib/component/MySql/mysql_comp.py index 893dc1b1..deddc234 100644 --- a/script/gspylib/component/MySql/mysql_comp.py +++ b/script/gspylib/component/MySql/mysql_comp.py @@ -79,8 +79,8 @@ class Mysql(BaseComponent): ct_cmd = ( f"cd {ct_exec_dir} && " - f"nohup sh -c 'cantiand open -D {ct_data_dir} >/dev/null 2>&1 &' " - f">/dev/null 2>&1" + f"nohup sh -c '${{GAUSSHOME}}/bin/cantiand open -D {ct_data_dir} " + f">/dev/null 2>&1 &' >/dev/null 2>&1" ) self.logger.debug(f"[MySQL] ct_cmd: {ct_cmd}") CmdUtil.retryGetstatusoutput(ct_cmd, retry_time=0) diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index e99a99cc..ee3bf478 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -243,7 +243,11 @@ class OmImplOLAP(OmImpl): continue cmd_lines = [ - f"nohup cantiand open -D {inst.datadir} > /dev/null 2>&1 &" + ( + f"cd {inst.datadir} && " + f"nohup sh -c '${{GAUSSHOME}}/bin/cantiand open " + f"-D {inst.datadir} >/dev/null 2>&1 &' >/dev/null 2>&1" + ) for inst in dbNode.datanodes ] full_cmd = " && ".join(cmd_lines) -- Gitee From 47b6360762b12bfecdd99c34de848a117267275f Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 30 Jun 2025 22:40:53 +0800 Subject: [PATCH 017/144] cantian2.0-mysql-2 --- script/impl/om/OLAP/OmImplOLAP.py | 34 ++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index ee3bf478..463b6412 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -238,6 +238,8 @@ class OmImplOLAP(OmImpl): # ---------------- start cantiand per instance ---------------- failedOutput = "" + + host_cmd_pairs = [] for dbNode in self.clusterInfo.dbNodes: if dbNode.name not in hostList: continue @@ -251,20 +253,38 @@ class OmImplOLAP(OmImpl): for inst in dbNode.datanodes ] full_cmd = " && ".join(cmd_lines) + host_cmd_pairs.append((dbNode.name, full_cmd)) - statusMap, output = self.sshTool.getSshStatusOutput( - full_cmd, [dbNode.name], self.context.g_opts.mpprcFile + from concurrent.futures import ThreadPoolExecutor, as_completed + + def _run_on_host(host: str, cmd: str): + return self.sshTool.getSshStatusOutput( + cmd, [host], self.context.g_opts.mpprcFile ) - if statusMap.get(dbNode.name) != "Success": - failedOutput += output - elif re.search(r"\bERROR\b", output, re.IGNORECASE): - self.logger.log(output) + with ThreadPoolExecutor(max_workers=len(host_cmd_pairs)) as pool: + future_map = { + pool.submit(_run_on_host, host, cmd): host + for host, cmd in host_cmd_pairs + } + + for fut in as_completed(future_map): + host = future_map[fut] + try: + statusMap, output = fut.result() + except Exception as exc: + failedOutput += f"\n[{host}] {exc}" + continue + + if statusMap.get(host) != "Success": + failedOutput += output + elif re.search(r"\bERROR\b", output, re.IGNORECASE): + self.logger.log(output) if failedOutput: self.logger.log("=========================================") raise Exception(ErrorCode.GAUSS_536["GAUSS_53600"] % - (full_cmd, failedOutput)) + ("", failedOutput)) # ---------------- instance-level health check ---------------- for dbNode in self.clusterInfo.dbNodes: -- Gitee From 8941c345ff81cb53c73fdd7c88682392a25d6356 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 30 Jun 2025 22:48:23 +0800 Subject: [PATCH 018/144] cantian2.0-mysql-2 --- script/impl/om/OLAP/OmImplOLAP.py | 51 +++++++++++-------------------- 1 file changed, 17 insertions(+), 34 deletions(-) diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index 463b6412..09ad2ad6 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -239,52 +239,35 @@ class OmImplOLAP(OmImpl): # ---------------- start cantiand per instance ---------------- failedOutput = "" - host_cmd_pairs = [] + cmd_lines = [] for dbNode in self.clusterInfo.dbNodes: if dbNode.name not in hostList: continue - - cmd_lines = [ - ( - f"cd {inst.datadir} && " + for inst in dbNode.datanodes: + cmd_lines.append( + f"if [ -d '{inst.datadir}' ]; then " + f"cd '{inst.datadir}' && " f"nohup sh -c '${{GAUSSHOME}}/bin/cantiand open " - f"-D {inst.datadir} >/dev/null 2>&1 &' >/dev/null 2>&1" + f"-D {inst.datadir} >/dev/null 2>&1 &' >/dev/null 2>&1; " + f"fi" ) - for inst in dbNode.datanodes - ] - full_cmd = " && ".join(cmd_lines) - host_cmd_pairs.append((dbNode.name, full_cmd)) - from concurrent.futures import ThreadPoolExecutor, as_completed + full_cmd = " ; ".join(cmd_lines) - def _run_on_host(host: str, cmd: str): - return self.sshTool.getSshStatusOutput( - cmd, [host], self.context.g_opts.mpprcFile - ) + statusMap, output = self.sshTool.getSshStatusOutput( + full_cmd, hostList, self.context.g_opts.mpprcFile + ) - with ThreadPoolExecutor(max_workers=len(host_cmd_pairs)) as pool: - future_map = { - pool.submit(_run_on_host, host, cmd): host - for host, cmd in host_cmd_pairs - } - - for fut in as_completed(future_map): - host = future_map[fut] - try: - statusMap, output = fut.result() - except Exception as exc: - failedOutput += f"\n[{host}] {exc}" - continue - - if statusMap.get(host) != "Success": - failedOutput += output - elif re.search(r"\bERROR\b", output, re.IGNORECASE): - self.logger.log(output) + for host in hostList: + if statusMap.get(host) != "Success": + failedOutput += f"\n[{host}]\n{output.strip()}" + elif re.search(r"\bERROR\b", output, re.IGNORECASE): + self.logger.log(output) if failedOutput: self.logger.log("=========================================") raise Exception(ErrorCode.GAUSS_536["GAUSS_53600"] % - ("", failedOutput)) + (full_cmd, failedOutput)) # ---------------- instance-level health check ---------------- for dbNode in self.clusterInfo.dbNodes: -- Gitee From e97dc9475c1ff701331adc323c6976769198e75a Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 30 Jun 2025 23:11:00 +0800 Subject: [PATCH 019/144] cantian2.0-mysql-2 --- script/impl/om/OLAP/OmImplOLAP.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index 09ad2ad6..87fb6195 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -244,9 +244,9 @@ class OmImplOLAP(OmImpl): if dbNode.name not in hostList: continue for inst in dbNode.datanodes: + ct_exec_dir = os.path.join(inst.datadir, "data") cmd_lines.append( - f"if [ -d '{inst.datadir}' ]; then " - f"cd '{inst.datadir}' && " + f"cd '{ct_exec_dir}' && " f"nohup sh -c '${{GAUSSHOME}}/bin/cantiand open " f"-D {inst.datadir} >/dev/null 2>&1 &' >/dev/null 2>&1; " f"fi" -- Gitee From 8d689bbc27c5e3fc1dd3f0aa22c2f910f5c9244c Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 30 Jun 2025 23:14:22 +0800 Subject: [PATCH 020/144] cantian2.0-mysql-2 --- script/impl/om/OLAP/OmImplOLAP.py | 1 - 1 file changed, 1 deletion(-) diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index 87fb6195..c4f8bd45 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -249,7 +249,6 @@ class OmImplOLAP(OmImpl): f"cd '{ct_exec_dir}' && " f"nohup sh -c '${{GAUSSHOME}}/bin/cantiand open " f"-D {inst.datadir} >/dev/null 2>&1 &' >/dev/null 2>&1; " - f"fi" ) full_cmd = " ; ".join(cmd_lines) -- Gitee From e0a8d56c78a13016280c5548d95aec6fff67e179 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 30 Jun 2025 23:14:53 +0800 Subject: [PATCH 021/144] cantian2.0-mysql-2 --- script/impl/om/OLAP/OmImplOLAP.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index c4f8bd45..35630172 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -248,7 +248,7 @@ class OmImplOLAP(OmImpl): cmd_lines.append( f"cd '{ct_exec_dir}' && " f"nohup sh -c '${{GAUSSHOME}}/bin/cantiand open " - f"-D {inst.datadir} >/dev/null 2>&1 &' >/dev/null 2>&1; " + f"-D {inst.datadir} >/dev/null 2>&1 &' >/dev/null 2>&1 " ) full_cmd = " ; ".join(cmd_lines) -- Gitee From a53271da48ac632479bf3a2323942cff4c11a0fe Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 3 Jul 2025 09:36:07 +0800 Subject: [PATCH 022/144] cantian2.0-dss --- script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index 249675d0..8333a482 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -263,7 +263,14 @@ class DN_OLAP(Kernel): if st != 0 and "already mounted" not in out.lower(): raise RuntimeError(f"mount root failed:\n{out}") - CmdUtil.getstatusoutput_by_fast_popen(f"sudo -n chown {uid}:{gid} {data_dir}") + st, out = CmdUtil.getstatusoutput_by_fast_popen(f"sudo -n chown {uid}:{gid} {data_dir}") + if st != 0: + err = out.lower() + if "operation not permitted" in err or "eperm" in err: + raise RuntimeError( + f"chown failed on {data_dir} (possible root_squash; set 'no_root_squash')\n{out}" + ) + raise RuntimeError(f"chown failed:\n{out}") # 3. 创建 / 清空 cantianData cantian_dir = os.path.join(data_dir, "cantianData") -- Gitee From 4195063345a9b26ca3c898a3e8f3654983f1c631 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 3 Jul 2025 17:18:29 +0800 Subject: [PATCH 023/144] cantian2.0-dss --- script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index 8333a482..073b515f 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -290,7 +290,7 @@ class DN_OLAP(Kernel): # ---------- 所有节点挂 cantianData ---------- cmd = f"sudo -n mount -t nfs {mount_opt} {nfs_subdir} {data_dir}" self.logger.debug(f"[prepare_data_dir] CMD = {cmd}") - st, out = CmdUtil.getstatusoutput_by_fast_popen(cmd) + st, out = CmdUtil.retry_util_timeout(cmd, timeout=60, sleep_time=10) if st != 0 and "already mounted" not in out.lower(): raise RuntimeError(f"mount cantianData failed:\n{out}") -- Gitee From eb4bc92fde90a2021fb9bd123ced37f05f8c5022 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 3 Jul 2025 20:28:02 +0800 Subject: [PATCH 024/144] cantian2.0-dss --- script/gspylib/component/DSS/dss_comp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/gspylib/component/DSS/dss_comp.py b/script/gspylib/component/DSS/dss_comp.py index a11ee2ea..d26e67ce 100644 --- a/script/gspylib/component/DSS/dss_comp.py +++ b/script/gspylib/component/DSS/dss_comp.py @@ -218,7 +218,7 @@ class Dss(BaseComponent): bin_path='', kill_server=True, unrej=False, - exist_so=True): + exist_so=False): ''' The OM manually starts the DSS server to obtain the socket file. ''' -- Gitee From 051ad3a7f2ea291a627d1e8a4a4803af8dfcb537 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 7 Jul 2025 11:11:00 +0800 Subject: [PATCH 025/144] =?UTF-8?q?CM=E9=80=82=E9=85=8D=E5=8F=82=E5=A4=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/component/CM/CM.py | 10 +++++----- script/gspylib/component/CM/CM_OLAP/CM_OLAP.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/script/gspylib/component/CM/CM.py b/script/gspylib/component/CM/CM.py index 3f90b654..60224c57 100644 --- a/script/gspylib/component/CM/CM.py +++ b/script/gspylib/component/CM/CM.py @@ -40,14 +40,14 @@ class CM(BaseComponent): class CmResAttr(): - def __init__(self, script, res_type='DN'): + def __init__(self, script, res_type='APP'): self.resources_type = res_type self.script = script self.check_interval = 1 - self.time_out = 120 - self.restart_times = 5 - self.restart_delay = 1 - self.restart_period = 1 + self.time_out = 1 + self.restart_times = 3 + self.restart_delay = 45 + self.restart_period = 45 def __str__(self): return str(vars(self)).replace(":", '=').replace('\'', '').replace( diff --git a/script/gspylib/component/CM/CM_OLAP/CM_OLAP.py b/script/gspylib/component/CM/CM_OLAP/CM_OLAP.py index a6a50dd5..c07674c9 100644 --- a/script/gspylib/component/CM/CM_OLAP/CM_OLAP.py +++ b/script/gspylib/component/CM/CM_OLAP/CM_OLAP.py @@ -847,11 +847,11 @@ class CM_OLAP(CM): gauss_home = EnvUtil.getEnvironmentParameterValue('GAUSSHOME', user) dss_home = EnvUtil.getEnvironmentParameterValue('DSS_HOME', user) # not use realpath - dms_contrl = os.path.join(gauss_home, 'bin/dms_contrl.sh') + dms_contrl = os.path.join(gauss_home, 'bin/cantian_contrl.sh') dss_contrl = os.path.join(gauss_home, 'bin/dss_contrl.sh') cmd = [ - str(CmResCtrlCmd(name='dms_res', attr=CmResAttr(dms_contrl))), + str(CmResCtrlCmd(name='cantian', attr=CmResAttr(dms_contrl))), str( CmResCtrlCmd(name='dss', attr=CmResAttr(dss_contrl, res_type='APP'))) -- Gitee From 626663f7ed2c432c30d29eb76a685fdbe451eee5 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 7 Jul 2025 16:36:10 +0800 Subject: [PATCH 026/144] mysql corefile --- script/gspylib/component/MySql/mysql_comp.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/script/gspylib/component/MySql/mysql_comp.py b/script/gspylib/component/MySql/mysql_comp.py index deddc234..227dc5b3 100644 --- a/script/gspylib/component/MySql/mysql_comp.py +++ b/script/gspylib/component/MySql/mysql_comp.py @@ -70,6 +70,7 @@ class Mysql(BaseComponent): gausslog_root = Path(os.getenv("GAUSSLOG", mysql_home / "log")) gausslog_dir = gausslog_root / "my_log" gausslog_dir.mkdir(parents=True, exist_ok=True) + core_dir = mysql_home.parent / "corefile" log_file = gausslog_dir / "mysql.log" ct_data_dir = self._cantian_datadir() @@ -103,7 +104,7 @@ class Mysql(BaseComponent): f"--datadir={data_dir} " f"--plugin-dir={plugin_dir} " f'--early-plugin-load="ha_ctc.so"' - f"--core-file " + f"--core-file={core_dir} " f"--log-error={log_file}" ) self.logger.debug(f"[MySQL] init_cmd: {init_cmd}") @@ -150,6 +151,7 @@ class Mysql(BaseComponent): cnf_file = gauss_home / "share" / "mysql" / "my.cnf" bin_safe = mysql_home / "bin" / "mysqld" data_dir = mysql_home / "data" + core_dir = mysql_home.parent / "corefile" if not data_dir.exists(): FileUtil.createDirectory(str(data_dir), DefaultValue.KEY_DIRECTORY_MODE) @@ -165,6 +167,7 @@ class Mysql(BaseComponent): f"--defaults-file={cnf_file} " f"--plugin-dir={plugin_dir} " f"--datadir={data_dir} " + f"--core-file={core_dir} " f'--early-plugin-load="ha_ctc.so" ' f"> {log_file} 2>&1 Date: Wed, 9 Jul 2025 14:29:40 +0800 Subject: [PATCH 027/144] mysql corefile --- script/gspylib/component/MySql/mysql_comp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/gspylib/component/MySql/mysql_comp.py b/script/gspylib/component/MySql/mysql_comp.py index 227dc5b3..3acb56ba 100644 --- a/script/gspylib/component/MySql/mysql_comp.py +++ b/script/gspylib/component/MySql/mysql_comp.py @@ -103,7 +103,7 @@ class Mysql(BaseComponent): f"--defaults-file={cnf_file} --initialize-insecure " f"--datadir={data_dir} " f"--plugin-dir={plugin_dir} " - f'--early-plugin-load="ha_ctc.so"' + f'--early-plugin-load="ha_ctc.so" ' f"--core-file={core_dir} " f"--log-error={log_file}" ) -- Gitee From e0c432a248ee43fb2c1b17958917527ba30106d9 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Wed, 9 Jul 2025 15:10:51 +0800 Subject: [PATCH 028/144] mysql corefile --- script/gspylib/component/MySql/mysql_comp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/script/gspylib/component/MySql/mysql_comp.py b/script/gspylib/component/MySql/mysql_comp.py index 3acb56ba..7ee0b40f 100644 --- a/script/gspylib/component/MySql/mysql_comp.py +++ b/script/gspylib/component/MySql/mysql_comp.py @@ -104,7 +104,7 @@ class Mysql(BaseComponent): f"--datadir={data_dir} " f"--plugin-dir={plugin_dir} " f'--early-plugin-load="ha_ctc.so" ' - f"--core-file={core_dir} " + f"--core-file " f"--log-error={log_file}" ) self.logger.debug(f"[MySQL] init_cmd: {init_cmd}") @@ -167,7 +167,7 @@ class Mysql(BaseComponent): f"--defaults-file={cnf_file} " f"--plugin-dir={plugin_dir} " f"--datadir={data_dir} " - f"--core-file={core_dir} " + f"--core-file " f'--early-plugin-load="ha_ctc.so" ' f"> {log_file} 2>&1 Date: Wed, 9 Jul 2025 15:11:09 +0800 Subject: [PATCH 029/144] mysql corefile --- script/gspylib/component/MySql/mysql_comp.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/script/gspylib/component/MySql/mysql_comp.py b/script/gspylib/component/MySql/mysql_comp.py index 7ee0b40f..d59c02cf 100644 --- a/script/gspylib/component/MySql/mysql_comp.py +++ b/script/gspylib/component/MySql/mysql_comp.py @@ -70,7 +70,6 @@ class Mysql(BaseComponent): gausslog_root = Path(os.getenv("GAUSSLOG", mysql_home / "log")) gausslog_dir = gausslog_root / "my_log" gausslog_dir.mkdir(parents=True, exist_ok=True) - core_dir = mysql_home.parent / "corefile" log_file = gausslog_dir / "mysql.log" ct_data_dir = self._cantian_datadir() @@ -151,7 +150,6 @@ class Mysql(BaseComponent): cnf_file = gauss_home / "share" / "mysql" / "my.cnf" bin_safe = mysql_home / "bin" / "mysqld" data_dir = mysql_home / "data" - core_dir = mysql_home.parent / "corefile" if not data_dir.exists(): FileUtil.createDirectory(str(data_dir), DefaultValue.KEY_DIRECTORY_MODE) -- Gitee From c1d2412e5a474dc13aba57a66acfc78bb1eeaa4b Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Wed, 9 Jul 2025 16:04:26 +0800 Subject: [PATCH 030/144] mysql corefile --- script/gspylib/common/Common.py | 2 ++ script/gspylib/component/MySql/mysql_comp.py | 28 -------------------- 2 files changed, 2 insertions(+), 28 deletions(-) diff --git a/script/gspylib/common/Common.py b/script/gspylib/common/Common.py index c28de63a..a43a5894 100644 --- a/script/gspylib/common/Common.py +++ b/script/gspylib/common/Common.py @@ -1185,6 +1185,8 @@ class DefaultValue(): envList.extend([ "export CM_CONFIG_PATH=$PGDATA/cm_config.ini", "export CTDB_HOME=$PGDATA", + "export CANTIAN_MODE=open", + "export CANTIAN_HOME_DIR=$PGDATA", "export GSDB_HOME=$PGDATA" ]) diff --git a/script/gspylib/component/MySql/mysql_comp.py b/script/gspylib/component/MySql/mysql_comp.py index d59c02cf..dae890ab 100644 --- a/script/gspylib/component/MySql/mysql_comp.py +++ b/script/gspylib/component/MySql/mysql_comp.py @@ -72,30 +72,6 @@ class Mysql(BaseComponent): gausslog_dir.mkdir(parents=True, exist_ok=True) log_file = gausslog_dir / "mysql.log" - ct_data_dir = self._cantian_datadir() - ct_exec_dir = Path(ct_data_dir) / "data" - if not ct_exec_dir.exists(): - FileUtil.createDirectory(str(ct_exec_dir), DefaultValue.KEY_DIRECTORY_MODE) - - ct_cmd = ( - f"cd {ct_exec_dir} && " - f"nohup sh -c '${{GAUSSHOME}}/bin/cantiand open -D {ct_data_dir} " - f">/dev/null 2>&1 &' >/dev/null 2>&1" - ) - self.logger.debug(f"[MySQL] ct_cmd: {ct_cmd}") - CmdUtil.retryGetstatusoutput(ct_cmd, retry_time=0) - - if not Kernel.isDbOpen( - pgdata=ct_data_dir, - logger=self.logger, - timeout=DefaultValue.TIMEOUT_INSTANCE_START, - interval=5): - raise RuntimeError("Cantian open timeout (DV_DATABASE not OPEN)") - - if data_dir.exists(): - FileUtil.removeDirectory(str(data_dir)) - FileUtil.createDirectory(str(data_dir), DefaultValue.KEY_DIRECTORY_MODE) - # 3. 初始化 MySQL init_cmd = ( f"{bin_mysqld} " @@ -111,10 +87,6 @@ class Mysql(BaseComponent): if ret != 0: raise RuntimeError(f"mysqld initialize failed, exit code {ret}") - # 4. 关闭 Cantian - self._exec(f"ctsql '/' as sysdba -q -D {ct_data_dir} -c 'shutdown'", - ignore_error=True) - except Exception as e: raise Exception( ErrorCode.GAUSS_516["GAUSS_51610"] % -- Gitee From c167961eba36218e47c23b439bd2bc635ac689b7 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Wed, 9 Jul 2025 16:15:52 +0800 Subject: [PATCH 031/144] mysql corefile --- script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index 073b515f..c808d408 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -591,12 +591,18 @@ class DN_OLAP(Kernel): if getattr(cluster, "mysql_metadata_in_cantian", "on").lower() == "off": meta_flag = "FALSE" + shared_path = "" + ctdb_home = os.getenv("CTDB_HOME") + if ctdb_home: + shared_path = os.path.join(ctdb_home, "data") + cantian_dict = { "LSNR_ADDR": self.instInfo.listenIps[0], "LSNR_PORT": str(self.instInfo.port), "SS_INSTANCE_ID": self.calc_ss_instance_id(), "MYSQL_METADATA_IN_CANTIAN": meta_flag, "MYSQL_DEPLOY_GROUP_ID": str(gid), + "SHARED_PATH": shared_path, } return cantian_dict -- Gitee From b2ddcb65081f6f0ed67f3ebfbeb8ea871c376d3b Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Wed, 9 Jul 2025 19:01:40 +0800 Subject: [PATCH 032/144] mysql corefile --- script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index c808d408..8a539799 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -253,7 +253,10 @@ class DN_OLAP(Kernel): self.logger.debug(f"[prepare_data_dir] nfs_subdir = {nfs_subdir}") uid, gid = os.getuid(), os.getgid() - mount_opt = "-o rw,soft,timeo=600,retrans=2" + mount_opt = ( + "-o vers=4.1,rw,hard,sync," + "noac,actimeo=0,lookupcache=none,delegation=off,local_lock=none,timeo=60,retrans=3" + ) if self.is_primary_node(): # 2. 挂 root -- Gitee From 0dfd5dfa4249a8aaaee8ade675ee114626b3e96a Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Wed, 9 Jul 2025 19:11:12 +0800 Subject: [PATCH 033/144] mysql corefile --- script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index 8a539799..f387b49b 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -255,7 +255,7 @@ class DN_OLAP(Kernel): uid, gid = os.getuid(), os.getgid() mount_opt = ( "-o vers=4.1,rw,hard,sync," - "noac,actimeo=0,lookupcache=none,delegation=off,local_lock=none,timeo=60,retrans=3" + "noac,actimeo=0,lookupcache=none,timeo=60,retrans=3" ) if self.is_primary_node(): -- Gitee From 20e995efe647848ea2f942c6108c0f21ec8c3421 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 10 Jul 2025 12:00:39 +0800 Subject: [PATCH 034/144] mysql corefile --- script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index f387b49b..8e169717 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -254,8 +254,7 @@ class DN_OLAP(Kernel): uid, gid = os.getuid(), os.getgid() mount_opt = ( - "-o vers=4.1,rw,hard,sync," - "noac,actimeo=0,lookupcache=none,timeo=60,retrans=3" + "-o vers=4.1,nosuid,nodev,noexec,timeo=60" ) if self.is_primary_node(): @@ -607,6 +606,12 @@ class DN_OLAP(Kernel): "MYSQL_DEPLOY_GROUP_ID": str(gid), "SHARED_PATH": shared_path, } + + # 使用 NFS 需要加上FULLDIRECTIO参数,否则可能出现数据不一致的问题 + share_fs = os.getenv("SHARE_FS", "").strip() + if share_fs and not share_fs.startswith("127.0.0.1"): + cantian_dict["FILE_OPTIONS"] = "FULLDIRECTIO" + return cantian_dict def get_ss_inter_url(self) -> str: -- Gitee From 41b0748ee6be29654f240eb74fdc6b1d94cce6d4 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 10 Jul 2025 16:21:00 +0800 Subject: [PATCH 035/144] mysqld --- script/gspylib/component/Kernel/Kernel.py | 7 +++ script/gspylib/component/MySql/mysql_comp.py | 18 +++++-- script/impl/om/OLAP/OmImplOLAP.py | 56 ++++++++++++-------- script/local/PreInstallUtility.py | 15 ++++++ 4 files changed, 71 insertions(+), 25 deletions(-) diff --git a/script/gspylib/component/Kernel/Kernel.py b/script/gspylib/component/Kernel/Kernel.py index 03bd5d4a..c8ebc40a 100644 --- a/script/gspylib/component/Kernel/Kernel.py +++ b/script/gspylib/component/Kernel/Kernel.py @@ -85,6 +85,13 @@ class Kernel(BaseComponent): # self.logger.debug("DSS mode start skipped.") # return + # 单进程模式:直接跳过,后面通过mysql拉起 + single_flag = os.getenv("CANTIAN_MYSQL_SINGLE", "").strip().lower() + if single_flag in ("on", "true", "yes", "1"): + self.logger.debug("[Kernel] CANTIAN_MYSQL_SINGLE=on → " + "single-process mode, skip cantiand start().") + return + data_dir = self.instInfo.datadir ct_exec_dir = os.path.join(data_dir, "data") diff --git a/script/gspylib/component/MySql/mysql_comp.py b/script/gspylib/component/MySql/mysql_comp.py index dae890ab..1308d0ed 100644 --- a/script/gspylib/component/MySql/mysql_comp.py +++ b/script/gspylib/component/MySql/mysql_comp.py @@ -52,8 +52,14 @@ class Mysql(BaseComponent): def initInstance(self): enable_flag = os.getenv("ENABLE_MYSQL", "").strip().lower() + # 单进程才需要自动拉起mysql,双进程需要手动拉起mysql + single_flag = os.getenv("CANTIAN_MYSQL_SINGLE", "").strip().lower() + if enable_flag not in ("on", "true", "yes", "1"): - self.logger.debug("[MySQL] ENABLE_MYSQL disabled, skip start()") + self.logger.debug("[MySQL] ENABLE_MYSQL disabled, skip initInstance().") + return + if single_flag not in ("on", "true", "yes", "1"): + self.logger.debug("[MySQL] CANTIAN_MYSQL_SINGLE != on, skip initInstance().") return if not self._is_primary_node(): @@ -110,11 +116,15 @@ class Mysql(BaseComponent): """ 启动 MySQL """ - enable_flag = os.getenv("ENABLE_MYSQL", "").strip().lower() + enable_flag = os.getenv("ENABLE_MYSQL", "").strip().lower() + single_flag = os.getenv("CANTIAN_MYSQL_SINGLE", "").strip().lower() + if enable_flag not in ("on", "true", "yes", "1"): - self.logger.debug("[MySQL] ENABLE_MYSQL disabled, skip start()") + self.logger.debug("[MySQL] ENABLE_MYSQL disabled, skip start().") + return + if single_flag not in ("on", "true", "yes", "1"): + self.logger.debug("[MySQL] CANTIAN_MYSQL_SINGLE != on, skip start().") return - mysql_home = self._mysql_home() gauss_home = self._gauss_home() diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index 35630172..8c52849f 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -236,20 +236,40 @@ class OmImplOLAP(OmImpl): self.sshTool = SshTool(self.clusterInfo.getClusterNodeNames(), None, DefaultValue.TIMEOUT_CLUSTER_START) - # ---------------- start cantiand per instance ---------------- + # ---------------- start cantiand/mysqld per instance ---------------- + single_mode = os.getenv("CANTIAN_MYSQL_SINGLE", "").strip().lower() in ( + "on", "true", "yes", "1" + ) failedOutput = "" cmd_lines = [] for dbNode in self.clusterInfo.dbNodes: if dbNode.name not in hostList: continue - for inst in dbNode.datanodes: - ct_exec_dir = os.path.join(inst.datadir, "data") + + if single_mode: + # 单进程模式:只启动 mysqld cmd_lines.append( - f"cd '{ct_exec_dir}' && " - f"nohup sh -c '${{GAUSSHOME}}/bin/cantiand open " - f"-D {inst.datadir} >/dev/null 2>&1 &' >/dev/null 2>&1 " + 'LOG_FILE="${LOG_FILE:-${GAUSSLOG:-$MYSQL_HOME/log}/my_log/mysql.log}" && ' + 'mkdir -p "$(dirname "$LOG_FILE")" && ' + 'nohup $MYSQL_HOME/bin/mysqld ' + '--defaults-file=$GAUSSHOME/share/mysql/my.cnf ' + '--plugin-dir=$GAUSSHOME/lib ' + '--datadir=$MYSQL_HOME/data ' + '--core-file ' + '--early-plugin-load="ha_ctc.so" ' + '--log-error="$LOG_FILE" ' + '> "$LOG_FILE" 2>&1 /dev/null 2>&1 &' >/dev/null 2>&1 " + ) full_cmd = " ; ".join(cmd_lines) @@ -389,32 +409,26 @@ class OmImplOLAP(OmImpl): def silent_stop_cmd(data_dir: str) -> str: """ - Quietly stop one Cantian instance. + Quietly stop Cantian instance(s) associated with *data_dir*, + then make sure neither mysqld-Cantian nor cantiand is left. """ pat = rf"cantiand.*open.*-D {re.escape(data_dir)}" - probe = f"pgrep -f '{pat}' | tr '\\n' ' '" + graceful = ( f"ctsql '/' as sysdba -q -D {data_dir} -c 'shutdown' " ">/dev/null 2>&1 || true" ) + force_kill = ( - f"({probe}) | xargs -r kill -9 >/dev/null 2>&1 || true" + "pkill -9 -f \"${MYSQL_HOME}/bin/mysqld\" >/dev/null 2>&1 || true; " + f"pkill -9 -f '{pat}' >/dev/null 2>&1 || true" ) - pre_check = f"pids=$({probe}); [ -z \"$pids\" ] && exit 0" - try_grace = f"{graceful}" - second_ck = f"pids=$({probe})" - force_kill = "for pid in $pids; do kill -9 \"$pid\" >/dev/null 2>&1 || true; done" - kill_rest = f"[ -n \"${{pids// /}}\" ] && {{ {force_kill}; }}" - finish = "exit 0" - stop_cmd = ( "{ " - f"{pre_check}; " - f"{try_grace}; " - f"{second_ck}; " - f"{kill_rest}; " - f"{finish}; " + f"{graceful}; " + f"{force_kill}; " + "exit 0; " "}" ) return stop_cmd diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py index 14f18799..322083e7 100644 --- a/script/local/PreInstallUtility.py +++ b/script/local/PreInstallUtility.py @@ -1827,16 +1827,31 @@ Common options: if enable_mysql_flag == "on": mysql_home = os.path.join(os.path.dirname(self.clusterToolPath), "mysql") + + # 1. 依据 GAUSSHOME/lib/ha_ctc.so 判断单进程/多进程模式 + gauss_home = EnvUtil.getEnvironmentParameterValue("GAUSSHOME", self.user) + ha_ctc_so = os.path.join(gauss_home, "lib", "ha_ctc.so") + if not os.path.isfile(ha_ctc_so): + raise Exception(f"ENABLE_MYSQL is on, but {ha_ctc_so} does not exist") + + status, ldd_out = subprocess.getstatusoutput(f"ldd {ha_ctc_so} 2>/dev/null") + if status != 0: + raise Exception(f"Failed to run ldd on {ha_ctc_so}: {ldd_out}") + + cantian_single_flag = "on" if "libcantian.so" in ldd_out else "false" + for prof in (userProfile, self.user_env_file): FileUtil.deleteLine(prof, r"^\s*export\s*ENABLE_MYSQL=.*$") FileUtil.deleteLine(prof, r"^\s*export\s*MYSQL_METADATA_IN_CANTIAN=.*$") FileUtil.deleteLine(prof, r"^\s*export\s*MYSQL_HOME=.*$") FileUtil.deleteLine(prof, r"^\s*export\s*PATH=.*\$MYSQL_HOME[^ ]*bin.*$") + FileUtil.deleteLine(prof, r"^\s*export\s*CANTIAN_MYSQL_SINGLE=.*$") FileUtil.writeFile(prof, [ "export ENABLE_MYSQL=on", f"export MYSQL_METADATA_IN_CANTIAN={mysql_meta_flag}", f"export MYSQL_HOME={mysql_home}", + f"export CANTIAN_MYSQL_SINGLE={cantian_single_flag}", r"export PATH=$MYSQL_HOME/bin:$PATH" ]) self.logger.debug( -- Gitee From f66c843792b45cf58faf1889c19839ca2264972a Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 10 Jul 2025 19:21:57 +0800 Subject: [PATCH 036/144] mysqld --- script/local/Install.py | 35 +++++++++++++++++++++++++++++++ script/local/PreInstallUtility.py | 14 ------------- 2 files changed, 35 insertions(+), 14 deletions(-) diff --git a/script/local/Install.py b/script/local/Install.py index 4eeddde2..eb972428 100644 --- a/script/local/Install.py +++ b/script/local/Install.py @@ -535,6 +535,20 @@ class Install(LocalBaseOM): self.logger.log(f"MySQL connector libs deployed to {dest_lib_dir} " f"(copied: {', '.join(required_libs)}, symlink ha_ctc.so -> {selected_so})") + # 单进程模式检测:ha_ctc.so + ha_ctc_path = link_path + status, ldd_out = subprocess.getstatusoutput( + f"ldd '{ha_ctc_path}' 2>/dev/null" + ) + + if status == 0 and "libcantian.so" in ldd_out: + ProfileFile.updateUserEnvVariable(self.mpprcFile, + "CANTIAN_MYSQL_SINGLE", "on") + self.logger.log( + "[SingleProcess] Detected libcantian.so in ha_ctc.so; " + "exported CANTIAN_MYSQL_SINGLE=on" + ) + def decompress_mysql_server(self): """ Decompress Mysql_server*x86_64*.tgz / Mysql_server*aarch64*.tgz @@ -582,6 +596,27 @@ class Install(LocalBaseOM): FileUtil.cpFile(src_mysql_dir, dest_mysql_dir) FileUtil.changeOwner(self.user, dest_mysql_dir, True) + # ---------- 迁移 lib/plugin/* → /lib ---------- + plugin_dir = os.path.join(dest_mysql_dir, "lib", "plugin") + dest_lib_dir = os.path.join(self.installPath, "lib") # ha_ctc.so 所在目录 + + if os.path.isdir(plugin_dir): + if not os.listdir(plugin_dir): + self.logger.debug(f"{plugin_dir} is empty, skip moving plugin libs.") + else: + FileUtil.createDirectory(dest_lib_dir, True, DefaultValue.KEY_DIRECTORY_MODE) + + mv_cmd = f"mv {plugin_dir}/* '{dest_lib_dir}/'" + status, output = subprocess.getstatusoutput(mv_cmd) + + if status != 0: + raise Exception(f"Failed to move MySQL plugin libs: {output}") + + FileUtil.changeOwner(self.user, dest_lib_dir, True) + self.logger.log(f"Plugin libs moved from {plugin_dir} → {dest_lib_dir}") + else: + self.logger.debug(f"No plugin dir found at {plugin_dir}; skip.") + FileUtil.removeDirectory(tmp_dir) self.logger.log(f"Deployed Mysql_server to {dest_mysql_dir}") diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py index 322083e7..96f65f9e 100644 --- a/script/local/PreInstallUtility.py +++ b/script/local/PreInstallUtility.py @@ -1828,30 +1828,16 @@ Common options: if enable_mysql_flag == "on": mysql_home = os.path.join(os.path.dirname(self.clusterToolPath), "mysql") - # 1. 依据 GAUSSHOME/lib/ha_ctc.so 判断单进程/多进程模式 - gauss_home = EnvUtil.getEnvironmentParameterValue("GAUSSHOME", self.user) - ha_ctc_so = os.path.join(gauss_home, "lib", "ha_ctc.so") - if not os.path.isfile(ha_ctc_so): - raise Exception(f"ENABLE_MYSQL is on, but {ha_ctc_so} does not exist") - - status, ldd_out = subprocess.getstatusoutput(f"ldd {ha_ctc_so} 2>/dev/null") - if status != 0: - raise Exception(f"Failed to run ldd on {ha_ctc_so}: {ldd_out}") - - cantian_single_flag = "on" if "libcantian.so" in ldd_out else "false" - for prof in (userProfile, self.user_env_file): FileUtil.deleteLine(prof, r"^\s*export\s*ENABLE_MYSQL=.*$") FileUtil.deleteLine(prof, r"^\s*export\s*MYSQL_METADATA_IN_CANTIAN=.*$") FileUtil.deleteLine(prof, r"^\s*export\s*MYSQL_HOME=.*$") FileUtil.deleteLine(prof, r"^\s*export\s*PATH=.*\$MYSQL_HOME[^ ]*bin.*$") - FileUtil.deleteLine(prof, r"^\s*export\s*CANTIAN_MYSQL_SINGLE=.*$") FileUtil.writeFile(prof, [ "export ENABLE_MYSQL=on", f"export MYSQL_METADATA_IN_CANTIAN={mysql_meta_flag}", f"export MYSQL_HOME={mysql_home}", - f"export CANTIAN_MYSQL_SINGLE={cantian_single_flag}", r"export PATH=$MYSQL_HOME/bin:$PATH" ]) self.logger.debug( -- Gitee From a201699e246daf6b684af1c001146cb79ead0b63 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 10 Jul 2025 19:50:52 +0800 Subject: [PATCH 037/144] mysqld --- script/local/Install.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/local/Install.py b/script/local/Install.py index eb972428..2be44e90 100644 --- a/script/local/Install.py +++ b/script/local/Install.py @@ -606,7 +606,7 @@ class Install(LocalBaseOM): else: FileUtil.createDirectory(dest_lib_dir, True, DefaultValue.KEY_DIRECTORY_MODE) - mv_cmd = f"mv {plugin_dir}/* '{dest_lib_dir}/'" + mv_cmd = f"mv -n {plugin_dir}/* '{dest_lib_dir}/'" status, output = subprocess.getstatusoutput(mv_cmd) if status != 0: -- Gitee From f60b6b202139dfefbe4559d09c1287f6d24d3265 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 10 Jul 2025 21:06:59 +0800 Subject: [PATCH 038/144] mysqld --- script/gspylib/common/Common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/script/gspylib/common/Common.py b/script/gspylib/common/Common.py index a43a5894..767d7ae6 100644 --- a/script/gspylib/common/Common.py +++ b/script/gspylib/common/Common.py @@ -1185,8 +1185,8 @@ class DefaultValue(): envList.extend([ "export CM_CONFIG_PATH=$PGDATA/cm_config.ini", "export CTDB_HOME=$PGDATA", - "export CANTIAN_MODE=open", - "export CANTIAN_HOME_DIR=$PGDATA", + "export CANTIAND_MODE=open", + "export CANTIAND_HOME_DIR=$PGDATA", "export GSDB_HOME=$PGDATA" ]) -- Gitee From 5550bae6afd0830b1eaa4d144b8747ae45a9b2e8 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 10 Jul 2025 22:07:14 +0800 Subject: [PATCH 039/144] mysqld --- script/impl/om/OLAP/OmImplOLAP.py | 32 +++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index 8c52849f..daa5e78b 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -240,44 +240,44 @@ class OmImplOLAP(OmImpl): single_mode = os.getenv("CANTIAN_MYSQL_SINGLE", "").strip().lower() in ( "on", "true", "yes", "1" ) - failedOutput = "" - cmd_lines = [] + host_cmd_map = {h: [] for h in hostList} + for dbNode in self.clusterInfo.dbNodes: if dbNode.name not in hostList: continue if single_mode: - # 单进程模式:只启动 mysqld - cmd_lines.append( - 'LOG_FILE="${LOG_FILE:-${GAUSSLOG:-$MYSQL_HOME/log}/my_log/mysql.log}" && ' - 'mkdir -p "$(dirname "$LOG_FILE")" && ' + host_cmd_map[dbNode.name].append( 'nohup $MYSQL_HOME/bin/mysqld ' '--defaults-file=$GAUSSHOME/share/mysql/my.cnf ' '--plugin-dir=$GAUSSHOME/lib ' '--datadir=$MYSQL_HOME/data ' '--core-file ' '--early-plugin-load="ha_ctc.so" ' - '--log-error="$LOG_FILE" ' - '> "$LOG_FILE" 2>&1 /dev/null 2>&1 &' ) else: # 其他模式:为节点上每个 datanode 启动一条 cantiand for inst in dbNode.datanodes: ct_exec_dir = os.path.join(inst.datadir, "data") - cmd_lines.append( + host_cmd_map[dbNode.name].append( f"cd '{ct_exec_dir}' && " - f"nohup sh -c '${{GAUSSHOME}}/bin/cantiand open " - f"-D {inst.datadir} >/dev/null 2>&1 &' >/dev/null 2>&1 " + f"nohup $GAUSSHOME/bin/cantiand open " + f"-D {inst.datadir} >/dev/null 2>&1 &" ) - full_cmd = " ; ".join(cmd_lines) + failed_nodes, failedOutput = [], "" + for host, cmds in host_cmd_map.items(): + if not cmds: + continue + full_cmd = " ; ".join(cmds) - statusMap, output = self.sshTool.getSshStatusOutput( - full_cmd, hostList, self.context.g_opts.mpprcFile - ) + statusMap, output = self.sshTool.getSshStatusOutput( + full_cmd, [host], self.context.g_opts.mpprcFile + ) - for host in hostList: if statusMap.get(host) != "Success": failedOutput += f"\n[{host}]\n{output.strip()}" elif re.search(r"\bERROR\b", output, re.IGNORECASE): -- Gitee From 870f74ab2c1e79bc596515bb948a502dab228a3b Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 10 Jul 2025 22:24:53 +0800 Subject: [PATCH 040/144] mysqld --- script/impl/om/OLAP/OmImplOLAP.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index daa5e78b..08dbcd66 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -300,32 +300,39 @@ class OmImplOLAP(OmImpl): # === cluster-level aggregate check if startType == "cluster": - # simple aggregate: every instance already OPEN above, - # but keep loop structure for compatibility self.logger.log("Verifying cluster state ...") - starttime = time.time() - deadline = starttime + DefaultValue.TIMEOUT_CLUSTER_START + start_time = time.time() + deadline = start_time + DefaultValue.TIMEOUT_CLUSTER_START + while time.time() <= deadline: not_open = [] + for dbNode in self.clusterInfo.dbNodes: for inst in dbNode.datanodes: sql = ("SELECT COUNT(*) FROM DV_DATABASE " "WHERE STATUS='OPEN' AND OPEN_STATUS='READ WRITE'") - ctsql = (f"ctsql '/' as sysdba -q -D {inst.datadir} -c \"{sql}\" " - "| grep -v 'OPEN_STATUS' | grep -v '^SQL>' | grep -qw 'OPEN'") - status = subprocess.call(ctsql, shell=True) - if status != 0: + ctsql = ( + f"ctsql '/' as sysdba -q -D {inst.datadir} -c \"{sql}\" " + "| grep -v 'OPEN_STATUS' | grep -v '^SQL>' | grep -qw 'OPEN'" + ) + + ret, _ = CmdUtil.retryGetstatusoutput(ctsql, retry_time=0) + if ret != 0: not_open.append(f"{dbNode.name}:{inst.datadir}") + if not not_open: break + self.logger.log("Waiting for cluster to reach OPEN: " f"{', '.join(not_open)}") time.sleep(5) if not_open: - raise Exception(ErrorCode.GAUSS_516["GAUSS_51607"] % "cluster" - + " After startup, some instances are not OPEN: " - + ", ".join(not_open)) + raise Exception( + ErrorCode.GAUSS_516["GAUSS_51607"] % "cluster" + + " After startup, some instances are not OPEN: " + + ", ".join(not_open) + ) # ---------------- success ---------------- self.logger.log("=========================================") -- Gitee From 8c6f311a8ab60d1c2028fce4a98ad6d8c0a5f147 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 10 Jul 2025 22:34:58 +0800 Subject: [PATCH 041/144] mysqld --- script/impl/om/OLAP/OmImplOLAP.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index 08dbcd66..f923db86 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -304,21 +304,36 @@ class OmImplOLAP(OmImpl): start_time = time.time() deadline = start_time + DefaultValue.TIMEOUT_CLUSTER_START + sql = ("SELECT COUNT(*) FROM DV_DATABASE " + "WHERE STATUS='OPEN' AND OPEN_STATUS='READ WRITE'") + while time.time() <= deadline: not_open = [] for dbNode in self.clusterInfo.dbNodes: for inst in dbNode.datanodes: - sql = ("SELECT COUNT(*) FROM DV_DATABASE " - "WHERE STATUS='OPEN' AND OPEN_STATUS='READ WRITE'") - ctsql = ( - f"ctsql '/' as sysdba -q -D {inst.datadir} -c \"{sql}\" " - "| grep -v 'OPEN_STATUS' | grep -v '^SQL>' | grep -qw 'OPEN'" + base_cmd = ( + f"ctsql '/' as sysdba -q -D {inst.datadir} -c \"{sql}\"" ) + ret, out = CmdUtil.retryGetstatusoutput(base_cmd, retry_time=0) - ret, _ = CmdUtil.retryGetstatusoutput(ctsql, retry_time=0) if ret != 0: not_open.append(f"{dbNode.name}:{inst.datadir}") + continue + + lines = [ + ln.strip() for ln in out.splitlines() + if ln.strip() + and not ln.startswith("SQL>") + and not ln.upper().startswith("COUNT") + ] + + count_open = 0 + if lines and lines[0].isdigit(): + count_open = int(lines[0]) + + if count_open == 0: + not_open.append(f"{dbNode.name}:{inst.datadir}") if not not_open: break -- Gitee From 488204c5fc4a2468dfa818881c12b22704107b35 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 10 Jul 2025 22:41:44 +0800 Subject: [PATCH 042/144] mysqld --- script/impl/om/OLAP/OmImplOLAP.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index f923db86..9b94c14d 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -304,35 +304,38 @@ class OmImplOLAP(OmImpl): start_time = time.time() deadline = start_time + DefaultValue.TIMEOUT_CLUSTER_START - sql = ("SELECT COUNT(*) FROM DV_DATABASE " - "WHERE STATUS='OPEN' AND OPEN_STATUS='READ WRITE'") + sql = ("SELECT NAME, STATUS, OPEN_STATUS FROM DV_DATABASE") while time.time() <= deadline: not_open = [] for dbNode in self.clusterInfo.dbNodes: for inst in dbNode.datanodes: + # ① 执行 ctsql(不带 grep 管道) base_cmd = ( f"ctsql '/' as sysdba -q -D {inst.datadir} -c \"{sql}\"" ) ret, out = CmdUtil.retryGetstatusoutput(base_cmd, retry_time=0) - + self.logger.log(f"Successfully started.{out}") + # ② ctsql 出错 ⇒ 直接视为未 OPEN if ret != 0: not_open.append(f"{dbNode.name}:{inst.datadir}") continue - lines = [ - ln.strip() for ln in out.splitlines() - if ln.strip() - and not ln.startswith("SQL>") - and not ln.upper().startswith("COUNT") + # ③ 等价于:grep -v 'OPEN_STATUS' | grep -v '^SQL>' + filtered_lines = [ + ln.strip() + for ln in out.splitlines() + if ln.strip() # 去掉空行 + and 'OPEN_STATUS' not in ln # 去掉列标题行 + and not ln.startswith("SQL>") # 去掉 SQL 提示 ] - count_open = 0 - if lines and lines[0].isdigit(): - count_open = int(lines[0]) + # ④ 等价于:grep -qw 'OPEN' + opened = any(re.search(r'\bOPEN\b', ln, re.IGNORECASE) + for ln in filtered_lines) - if count_open == 0: + if not opened: not_open.append(f"{dbNode.name}:{inst.datadir}") if not not_open: @@ -349,6 +352,7 @@ class OmImplOLAP(OmImpl): + ", ".join(not_open) ) + # ---------------- success ---------------- self.logger.log("=========================================") self.logger.log("Successfully started.") -- Gitee From e28ac995ac07ac0a225b85a277bccfdc9b611728 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 10 Jul 2025 22:43:22 +0800 Subject: [PATCH 043/144] mysqld --- script/impl/om/OLAP/OmImplOLAP.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index 9b94c14d..bdb55903 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -311,27 +311,23 @@ class OmImplOLAP(OmImpl): for dbNode in self.clusterInfo.dbNodes: for inst in dbNode.datanodes: - # ① 执行 ctsql(不带 grep 管道) base_cmd = ( f"ctsql '/' as sysdba -q -D {inst.datadir} -c \"{sql}\"" ) ret, out = CmdUtil.retryGetstatusoutput(base_cmd, retry_time=0) self.logger.log(f"Successfully started.{out}") - # ② ctsql 出错 ⇒ 直接视为未 OPEN if ret != 0: not_open.append(f"{dbNode.name}:{inst.datadir}") continue - - # ③ 等价于:grep -v 'OPEN_STATUS' | grep -v '^SQL>' filtered_lines = [ ln.strip() for ln in out.splitlines() - if ln.strip() # 去掉空行 - and 'OPEN_STATUS' not in ln # 去掉列标题行 - and not ln.startswith("SQL>") # 去掉 SQL 提示 + if ln.strip() + and 'OPEN_STATUS' not in ln + and not ln.startswith("SQL>") ] - # ④ 等价于:grep -qw 'OPEN' + # 检查是否为open状态 opened = any(re.search(r'\bOPEN\b', ln, re.IGNORECASE) for ln in filtered_lines) -- Gitee From 3d591537d36fc62d9ea4b4f1ab8884db29c5494d Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 10 Jul 2025 22:47:46 +0800 Subject: [PATCH 044/144] mysqld --- script/impl/om/OLAP/OmImplOLAP.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index bdb55903..da7e6f1f 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -242,6 +242,7 @@ class OmImplOLAP(OmImpl): ) host_cmd_map = {h: [] for h in hostList} + mpprc = self.context.g_opts.mpprcFile for dbNode in self.clusterInfo.dbNodes: if dbNode.name not in hostList: @@ -249,6 +250,7 @@ class OmImplOLAP(OmImpl): if single_mode: host_cmd_map[dbNode.name].append( + f"source {mpprc}; " 'nohup $MYSQL_HOME/bin/mysqld ' '--defaults-file=$GAUSSHOME/share/mysql/my.cnf ' '--plugin-dir=$GAUSSHOME/lib ' @@ -259,10 +261,10 @@ class OmImplOLAP(OmImpl): '> /dev/null 2>&1 &' ) else: - # 其他模式:为节点上每个 datanode 启动一条 cantiand for inst in dbNode.datanodes: ct_exec_dir = os.path.join(inst.datadir, "data") host_cmd_map[dbNode.name].append( + f"source {mpprc}; " f"cd '{ct_exec_dir}' && " f"nohup $GAUSSHOME/bin/cantiand open " f"-D {inst.datadir} >/dev/null 2>&1 &" @@ -315,7 +317,6 @@ class OmImplOLAP(OmImpl): f"ctsql '/' as sysdba -q -D {inst.datadir} -c \"{sql}\"" ) ret, out = CmdUtil.retryGetstatusoutput(base_cmd, retry_time=0) - self.logger.log(f"Successfully started.{out}") if ret != 0: not_open.append(f"{dbNode.name}:{inst.datadir}") continue -- Gitee From e3a241782fe7c81ddf2e9f992372bdaf3e0ee61d Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 10 Jul 2025 22:53:00 +0800 Subject: [PATCH 045/144] mysqld --- script/impl/om/OLAP/OmImplOLAP.py | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index da7e6f1f..10eb810d 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -429,32 +429,21 @@ class OmImplOLAP(OmImpl): self.sshTool = SshTool(self.clusterInfo.getClusterNodeNames(), None, DefaultValue.TIMEOUT_CLUSTER_START) + mpprc = self.context.g_opts.mpprcFile def silent_stop_cmd(data_dir: str) -> str: - """ - Quietly stop Cantian instance(s) associated with *data_dir*, - then make sure neither mysqld-Cantian nor cantiand is left. - """ pat = rf"cantiand.*open.*-D {re.escape(data_dir)}" - graceful = ( + f"source {mpprc}; " f"ctsql '/' as sysdba -q -D {data_dir} -c 'shutdown' " ">/dev/null 2>&1 || true" ) - force_kill = ( + f"source {mpprc}; " "pkill -9 -f \"${MYSQL_HOME}/bin/mysqld\" >/dev/null 2>&1 || true; " f"pkill -9 -f '{pat}' >/dev/null 2>&1 || true" ) - - stop_cmd = ( - "{ " - f"{graceful}; " - f"{force_kill}; " - "exit 0; " - "}" - ) - return stop_cmd + return f"{{ {graceful}; {force_kill}; exit 0; }}" failed_nodes, failed_outputs = [], "" for dbNode in self.clusterInfo.dbNodes: @@ -462,10 +451,10 @@ class OmImplOLAP(OmImpl): continue inst_cmds = [silent_stop_cmd(inst.datadir) for inst in dbNode.datanodes] - full_cmd = " && ".join(inst_cmds) + full_cmd = " ; ".join(inst_cmds) statusMap, output = self.sshTool.getSshStatusOutput( - full_cmd, [dbNode.name], self.context.g_opts.mpprcFile + full_cmd, [dbNode.name], mpprc ) if statusMap.get(dbNode.name) != "Success": -- Gitee From edf7076350cd30c8778c814c76d2f897fba57ace Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 10 Jul 2025 22:56:21 +0800 Subject: [PATCH 046/144] mysqld --- script/impl/om/OLAP/OmImplOLAP.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index 10eb810d..bf9172d1 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -432,18 +432,32 @@ class OmImplOLAP(OmImpl): mpprc = self.context.g_opts.mpprcFile def silent_stop_cmd(data_dir: str) -> str: - pat = rf"cantiand.*open.*-D {re.escape(data_dir)}" + """ + 安静关闭实例:先优雅 shutdown,再有则强杀。 + • 任何情况下整条脚本最终都 return 0,保证 SshTool 判定 Success。 + """ + pat_cantiand = rf"cantiand.*open.*-D {re.escape(data_dir)}" + + # 1. 优雅关闭 graceful = ( f"source {mpprc}; " f"ctsql '/' as sysdba -q -D {data_dir} -c 'shutdown' " ">/dev/null 2>&1 || true" ) - force_kill = ( + + # 2. 强杀(先判断进程是否存在,再 pkill;否则 pkill 返回 1) + force = ( + # mysqld-Cantian f"source {mpprc}; " - "pkill -9 -f \"${MYSQL_HOME}/bin/mysqld\" >/dev/null 2>&1 || true; " - f"pkill -9 -f '{pat}' >/dev/null 2>&1 || true" + "pgrep -f \"${MYSQL_HOME}/bin/mysqld\" >/dev/null 2>&1 " + "&& pkill -9 -f \"${MYSQL_HOME}/bin/mysqld\" || true; " + # cantiand + f"pgrep -f '{pat_cantiand}' >/dev/null 2>&1 " + f"&& pkill -9 -f '{pat_cantiand}' || true" ) - return f"{{ {graceful}; {force_kill}; exit 0; }}" + + # 3. 整合成单条脚本,并最终 exit 0 + return f"{{ {graceful}; {force}; exit 0; }}" failed_nodes, failed_outputs = [], "" for dbNode in self.clusterInfo.dbNodes: -- Gitee From 22a432eb5209e0a198a196df6a3d7f540d9ec64a Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 10 Jul 2025 23:00:46 +0800 Subject: [PATCH 047/144] mysqld --- script/impl/om/OLAP/OmImplOLAP.py | 32 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index bf9172d1..bb61abb3 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -430,33 +430,22 @@ class OmImplOLAP(OmImpl): self.sshTool = SshTool(self.clusterInfo.getClusterNodeNames(), None, DefaultValue.TIMEOUT_CLUSTER_START) mpprc = self.context.g_opts.mpprcFile + success_flags = ("success", "normal", "ok") # 大小写不敏感 def silent_stop_cmd(data_dir: str) -> str: - """ - 安静关闭实例:先优雅 shutdown,再有则强杀。 - • 任何情况下整条脚本最终都 return 0,保证 SshTool 判定 Success。 - """ - pat_cantiand = rf"cantiand.*open.*-D {re.escape(data_dir)}" - - # 1. 优雅关闭 + pat = rf"cantiand.*open.*-D {re.escape(data_dir)}" graceful = ( f"source {mpprc}; " f"ctsql '/' as sysdba -q -D {data_dir} -c 'shutdown' " ">/dev/null 2>&1 || true" ) - - # 2. 强杀(先判断进程是否存在,再 pkill;否则 pkill 返回 1) force = ( - # mysqld-Cantian f"source {mpprc}; " "pgrep -f \"${MYSQL_HOME}/bin/mysqld\" >/dev/null 2>&1 " "&& pkill -9 -f \"${MYSQL_HOME}/bin/mysqld\" || true; " - # cantiand - f"pgrep -f '{pat_cantiand}' >/dev/null 2>&1 " - f"&& pkill -9 -f '{pat_cantiand}' || true" + f"pgrep -f '{pat}' >/dev/null 2>&1 " + f"&& pkill -9 -f '{pat}' || true" ) - - # 3. 整合成单条脚本,并最终 exit 0 return f"{{ {graceful}; {force}; exit 0; }}" failed_nodes, failed_outputs = [], "" @@ -465,19 +454,26 @@ class OmImplOLAP(OmImpl): continue inst_cmds = [silent_stop_cmd(inst.datadir) for inst in dbNode.datanodes] + if not inst_cmds: # 该节点无 datanode,跳过 + continue + full_cmd = " ; ".join(inst_cmds) statusMap, output = self.sshTool.getSshStatusOutput( full_cmd, [dbNode.name], mpprc ) - if statusMap.get(dbNode.name) != "Success": + status = statusMap.get(dbNode.name, "unknown").lower() + if status not in success_flags: failed_nodes.append(dbNode.name) failed_outputs += f"\n[{dbNode.name}]\n{output.strip()}" + # 仅当确有异常节点才抛错 if failed_nodes: - raise Exception(ErrorCode.GAUSS_536["GAUSS_53606"] % - (",".join(failed_nodes), failed_outputs)) + raise Exception( + ErrorCode.GAUSS_536["GAUSS_53606"] + % (",".join(failed_nodes), failed_outputs) + ) self.logger.log(f"Successfully stopped {stop_type}.") self.logger.log("=========================================") -- Gitee From f7f31ea23bfabf1ddd8766481b4be1e4f995990b Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 10 Jul 2025 23:05:16 +0800 Subject: [PATCH 048/144] mysqld --- script/impl/om/OLAP/OmImplOLAP.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index bb61abb3..667e8d55 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -432,20 +432,28 @@ class OmImplOLAP(OmImpl): mpprc = self.context.g_opts.mpprcFile success_flags = ("success", "normal", "ok") # 大小写不敏感 + mpprc = self.context.g_opts.mpprcFile + def silent_stop_cmd(data_dir: str) -> str: pat = rf"cantiand.*open.*-D {re.escape(data_dir)}" + graceful = ( f"source {mpprc}; " f"ctsql '/' as sysdba -q -D {data_dir} -c 'shutdown' " ">/dev/null 2>&1 || true" ) + force = ( f"source {mpprc}; " + # ---- mysqld ---- "pgrep -f \"${MYSQL_HOME}/bin/mysqld\" >/dev/null 2>&1 " "&& pkill -9 -f \"${MYSQL_HOME}/bin/mysqld\" || true; " + # ---- cantiand ---- f"pgrep -f '{pat}' >/dev/null 2>&1 " f"&& pkill -9 -f '{pat}' || true" ) + + # 返回 0,保证 SshTool 判定 Success return f"{{ {graceful}; {force}; exit 0; }}" failed_nodes, failed_outputs = [], "" -- Gitee From f5a672d7a3c19a1c1f81271d3eaa0760d5278c93 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 10 Jul 2025 23:11:10 +0800 Subject: [PATCH 049/144] mysqld --- script/impl/om/OLAP/OmImplOLAP.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index 667e8d55..1861acad 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -429,32 +429,33 @@ class OmImplOLAP(OmImpl): self.sshTool = SshTool(self.clusterInfo.getClusterNodeNames(), None, DefaultValue.TIMEOUT_CLUSTER_START) - mpprc = self.context.g_opts.mpprcFile - success_flags = ("success", "normal", "ok") # 大小写不敏感 + success_flags = ("success", "normal", "ok") mpprc = self.context.g_opts.mpprcFile def silent_stop_cmd(data_dir: str) -> str: + """ + 安静关闭实例:先优雅 shutdown,再有则强杀。 + 远端已由 SshTool 自动 source mpprc,这里不再重复。 + """ pat = rf"cantiand.*open.*-D {re.escape(data_dir)}" graceful = ( - f"source {mpprc}; " f"ctsql '/' as sysdba -q -D {data_dir} -c 'shutdown' " ">/dev/null 2>&1 || true" ) force = ( - f"source {mpprc}; " - # ---- mysqld ---- + # mysqld (Cantian 单进程) "pgrep -f \"${MYSQL_HOME}/bin/mysqld\" >/dev/null 2>&1 " "&& pkill -9 -f \"${MYSQL_HOME}/bin/mysqld\" || true; " - # ---- cantiand ---- + # cantiand f"pgrep -f '{pat}' >/dev/null 2>&1 " f"&& pkill -9 -f '{pat}' || true" ) - # 返回 0,保证 SshTool 判定 Success - return f"{{ {graceful}; {force}; exit 0; }}" + # 整段脚本以 { … ; true; } 包裹,确保返回 0 + return f"{{ {graceful}; {force}; true; }}" failed_nodes, failed_outputs = [], "" for dbNode in self.clusterInfo.dbNodes: -- Gitee From b6ab045a273b73902bbed75e182b3111c159a036 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 10 Jul 2025 23:22:56 +0800 Subject: [PATCH 050/144] mysqld --- script/impl/om/OLAP/OmImplOLAP.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index 1861acad..b764f11f 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -446,15 +446,12 @@ class OmImplOLAP(OmImpl): ) force = ( - # mysqld (Cantian 单进程) "pgrep -f \"${MYSQL_HOME}/bin/mysqld\" >/dev/null 2>&1 " "&& pkill -9 -f \"${MYSQL_HOME}/bin/mysqld\" || true; " - # cantiand f"pgrep -f '{pat}' >/dev/null 2>&1 " f"&& pkill -9 -f '{pat}' || true" ) - # 整段脚本以 { … ; true; } 包裹,确保返回 0 return f"{{ {graceful}; {force}; true; }}" failed_nodes, failed_outputs = [], "" @@ -463,7 +460,7 @@ class OmImplOLAP(OmImpl): continue inst_cmds = [silent_stop_cmd(inst.datadir) for inst in dbNode.datanodes] - if not inst_cmds: # 该节点无 datanode,跳过 + if not inst_cmds: continue full_cmd = " ; ".join(inst_cmds) @@ -472,10 +469,10 @@ class OmImplOLAP(OmImpl): full_cmd, [dbNode.name], mpprc ) - status = statusMap.get(dbNode.name, "unknown").lower() - if status not in success_flags: - failed_nodes.append(dbNode.name) - failed_outputs += f"\n[{dbNode.name}]\n{output.strip()}" + # status = statusMap.get(dbNode.name, "unknown").lower() + # if status not in success_flags: + # failed_nodes.append(dbNode.name) + # failed_outputs += f"\n[{dbNode.name}]\n{output.strip()}" # 仅当确有异常节点才抛错 if failed_nodes: -- Gitee From 5b48055d69f6e4b891119a9ee9beaef0ad011460 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Wed, 16 Jul 2025 14:17:50 +0800 Subject: [PATCH 051/144] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dcentos8=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E5=85=BC=E5=AE=B9=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/os_platform/linux_platform.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/script/os_platform/linux_platform.py b/script/os_platform/linux_platform.py index eed94e1e..fe2eb283 100644 --- a/script/os_platform/linux_platform.py +++ b/script/os_platform/linux_platform.py @@ -219,7 +219,8 @@ class LinuxPlatform(object): ] elif version[0] == "8": file_name_list = [ - self.package_file_path(prefix_str, packageVersion, PAK_OPENEULER, postfix_str), + self.package_file_path(prefix_str, packageVersion, PAK_CENTOS, postfix_str), + self.package_file_path(prefix_str, packageVersion, PAK_OPENEULER, postfix_str) ] else: file_name_list = [ -- Gitee From 0f07b4af8a084fd688be0ecdc90c587be084ab3e Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Fri, 18 Jul 2025 16:06:41 +0800 Subject: [PATCH 052/144] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dcentos8=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E5=85=BC=E5=AE=B9=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/common/DbClusterInfo.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/script/gspylib/common/DbClusterInfo.py b/script/gspylib/common/DbClusterInfo.py index 73f6cbdd..95118e1c 100644 --- a/script/gspylib/common/DbClusterInfo.py +++ b/script/gspylib/common/DbClusterInfo.py @@ -768,9 +768,10 @@ class dbNodeInfo(): return retStr def setDnDetailNum(self): - self.dataNum = self.getDnNum(MASTER_INSTANCE) - self.standbyDnNum = self.getDnNum(STANDBY_INSTANCE) - self.dummyStandbyDnNum = self.getDnNum(DUMMY_STANDBY_INSTANCE) + # cantian适配DSS CM临时修改 + self.dataNum = 0 + self.standbyDnNum = 0 + self.dummyStandbyDnNum = 0 def getDnNum(self, dntype): """ @@ -1279,8 +1280,9 @@ class dbClusterInfo(): outText = outText + ("sshChannel %u:%s\n" % ( j + 1, dbNode.sshIps[j])) j = j + 1 - outText = outText + ( - "datanodeCount :%u\n" % len(dbNode.datanodes)) + outText = outText + ("datanodeCount :0\n") + # outText = outText + ( + # "datanodeCount :%u\n" % len(dbNode.datanodes)) j = 0 for dnInst in dbNode.datanodes: j = j + 1 -- Gitee From 20ef22f186053d5f5a8e358faf154ae4beac57d6 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 19 Jul 2025 10:46:36 +0800 Subject: [PATCH 053/144] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dcentos8=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E5=85=BC=E5=AE=B9=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/common/Common.py | 2 +- script/gspylib/common/DbClusterInfo.py | 11 ++- .../gspylib/component/CM/CM_OLAP/CM_OLAP.py | 66 ++++++++++------ .../component/Kernel/DN_OLAP/DN_OLAP.py | 79 +++++++++++++++++-- 4 files changed, 121 insertions(+), 37 deletions(-) diff --git a/script/gspylib/common/Common.py b/script/gspylib/common/Common.py index 767d7ae6..1785348a 100644 --- a/script/gspylib/common/Common.py +++ b/script/gspylib/common/Common.py @@ -1198,7 +1198,7 @@ class DefaultValue(): ) envList.extend([ f"export DSS_HOME={default_dss_home}", - "export DSS_DATA_DISK=$DSS_HOME/data" + "export DSS_DATA_DISK=$DSS_HOME" ]) if agentPath: diff --git a/script/gspylib/common/DbClusterInfo.py b/script/gspylib/common/DbClusterInfo.py index 95118e1c..1c5e5db2 100644 --- a/script/gspylib/common/DbClusterInfo.py +++ b/script/gspylib/common/DbClusterInfo.py @@ -769,9 +769,9 @@ class dbNodeInfo(): def setDnDetailNum(self): # cantian适配DSS CM临时修改 - self.dataNum = 0 - self.standbyDnNum = 0 - self.dummyStandbyDnNum = 0 + self.dataNum = self.getDnNum(MASTER_INSTANCE) + self.standbyDnNum = self.getDnNum(STANDBY_INSTANCE) + self.dummyStandbyDnNum = self.getDnNum(DUMMY_STANDBY_INSTANCE) def getDnNum(self, dntype): """ @@ -1280,9 +1280,8 @@ class dbClusterInfo(): outText = outText + ("sshChannel %u:%s\n" % ( j + 1, dbNode.sshIps[j])) j = j + 1 - outText = outText + ("datanodeCount :0\n") - # outText = outText + ( - # "datanodeCount :%u\n" % len(dbNode.datanodes)) + outText = outText + ( + "datanodeCount :%u\n" % len(dbNode.datanodes)) j = 0 for dnInst in dbNode.datanodes: j = j + 1 diff --git a/script/gspylib/component/CM/CM_OLAP/CM_OLAP.py b/script/gspylib/component/CM/CM_OLAP/CM_OLAP.py index c07674c9..4ad42f26 100644 --- a/script/gspylib/component/CM/CM_OLAP/CM_OLAP.py +++ b/script/gspylib/component/CM/CM_OLAP/CM_OLAP.py @@ -845,35 +845,51 @@ class CM_OLAP(CM): def get_init_cm_cmd(self): user = pwd.getpwuid(os.getuid()).pw_name gauss_home = EnvUtil.getEnvironmentParameterValue('GAUSSHOME', user) - dss_home = EnvUtil.getEnvironmentParameterValue('DSS_HOME', user) + dss_home = EnvUtil.getEnvironmentParameterValue('DSS_HOME', user) + + # Cantian 数据目录:${CTDB_HOME}/data + ctdb_home = EnvUtil.getEnvironmentParameterValue('CTDB_HOME', user) + if not ctdb_home: + raise EnvironmentError("CTDB_HOME is not set – cannot locate Cantian data directory.") + cantian_data = os.path.join(ctdb_home, "data") + # not use realpath - dms_contrl = os.path.join(gauss_home, 'bin/cantian_contrl.sh') - dss_contrl = os.path.join(gauss_home, 'bin/dss_contrl.sh') - - cmd = [ - str(CmResCtrlCmd(name='cantian', attr=CmResAttr(dms_contrl))), - str( - CmResCtrlCmd(name='dss', - attr=CmResAttr(dss_contrl, res_type='APP'))) + dms_contrl = os.path.join(gauss_home, "bin/cantian_contrl.sh") + dss_contrl = os.path.join(gauss_home, "bin/dss_contrl.sh") + + cmd_list = [ + str(CmResCtrlCmd(name="cantian", attr=CmResAttr(dms_contrl))), + str(CmResCtrlCmd(name="dss", attr=CmResAttr(dss_contrl, res_type="APP"))) ] + # 提前解码 dss_nodes_list,避免循环里重复解码 + dss_nodes_list = DssConfig.get_value_b64_handler( + "dss_nodes_list", self.dss_config, action="decode" + ) + for db_inst in self.cluster_info.dbNodes: - cmd.append( - str( - CmResCtrlCmd(action='edit', - name='dss', - attr=DssInstAttr( - node_id=db_inst.id, - dss_id=DssInst.get_current_dss_id( - dss_home, db_inst, - DssConfig.get_value_b64_handler( - 'dss_nodes_list', - self.dss_config, - action='decode')), - dss_home="{};{}".format( - dss_home, - db_inst.datanodes[0].datadir))))) - return "source {}; {}".format(EnvUtil.getMpprcFile(), ' ;'.join(cmd)) + dss_id = DssInst.get_current_dss_id(dss_home, db_inst, dss_nodes_list) + + cmd_list.append(str(CmResCtrlCmd( + action="edit", name="dss", + attr=DssInstAttr( + node_id=db_inst.id, + dss_id=dss_id, + dss_home=f"{dss_home};{db_inst.datanodes[0].datadir}" + ) + ))) + + cmd_list.append(str(CmResCtrlCmd( + action="edit", name="cantian", + attr=DssInstAttr( + node_id=db_inst.id, + dss_id=dss_id, + dss_home=f"{dss_home};{cantian_data}" + ) + ))) + + joined_cmd = " ;".join(cmd_list) + return f"source {EnvUtil.getMpprcFile()}; {joined_cmd}" def init_cm_res_json(self, rm_cm_json=True): cm_resource = os.path.realpath( diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index 8e169717..4f4c6304 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -136,6 +136,42 @@ class DN_OLAP(Kernel): FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, "%s/server.key.rand" % self.instInfo.datadir) + def _patch_create_sql_paths(self, sql_file: str) -> None: + """ + 将 create_database.sql 中出现的 'dbfiles1' 统一替换: + • DSS 模式 → '+' + . dbstor 模式 待定 + • nfs 模式 → '/data' + """ + if not os.path.exists(sql_file): + raise FileNotFoundError(sql_file) + + perm = int(FileUtil.get_permission_value(sql_file), 8) + + if self.dss_mode: + vg = self._resolve_dss_vg() + target = f"+{vg}" + else: + ctdb_home = os.getenv("CTDB_HOME") + if not ctdb_home: + raise EnvironmentError("CTDB_HOME must be set when not in DSS mode.") + target = os.path.join(ctdb_home, "data") + + orig_lines = FileUtil.readFile(sql_file) + new_content = re.sub(r"\bdbfiles1\b", target, "".join(orig_lines)) + new_lines = new_content.splitlines() + + FileUtil.write_custom_context( + sql_file, + new_lines, + perm, + p_mode="w" + ) + + self.logger.debug( + f"[CreateDB] replaced 'dbfiles1' → '{target}' in {sql_file}" + ) + @staticmethod def copy_and_render_file(src: str, dst: str, mode: str = DefaultValue.KEY_FILE_MODE): """ @@ -209,6 +245,7 @@ class DN_OLAP(Kernel): "create_database.sql.template") work_sql = os.path.join(ctdb_home, "create_database.sql") self.copy_and_render_file(tpl_sql, work_sql) + self._patch_create_sql_paths(work_sql) run_sql = ( f"ctsql '/' as sysdba -q " @@ -329,9 +366,9 @@ class DN_OLAP(Kernel): # -------- DSS 目录 & 配置 -------------------------------------- dss_home = os.getenv("DSS_HOME", - os.path.realpath(os.path.join(pgdata, "../dss"))) + os.path.realpath(os.path.join(pgdata, "../dss_home"))) dss_data = os.getenv("DSS_DATA_DISK", - os.path.realpath(os.path.join(pgdata, "../dss/data"))) + os.path.realpath(os.path.join(pgdata, "../dss_home"))) FileUtil.createDirectory(dss_home, DefaultValue.KEY_DIRECTORY_MODE) FileUtil.createDirectory(dss_data, DefaultValue.KEY_DIRECTORY_MODE) @@ -342,9 +379,9 @@ class DN_OLAP(Kernel): dss_ini_dst = os.path.join(dss_cfg_dir, "cantiand.ini") self.copy_and_render_file(dss_ini_tpl, dss_ini_dst) - # 更新配置文件 - self.updateConfig(dss_ini_dst, - {"SS_INTERCONNECT_URL": self.get_ss_inter_url()}) + # 更新dss对应配置文件 + self.updateConfig(os.path.join(dss_cfg_dir, "cantiand.ini"), + self.getDssCantianDict()) # -------- CM 配置 --------------------------------------------- cm_dest = os.getenv("CM_CONFIG_PATH", @@ -614,6 +651,38 @@ class DN_OLAP(Kernel): return cantian_dict + def getDssCantianDict(self) -> dict: + params = {"SS_INTERCONNECT_URL": self.get_ss_inter_url()} + + if self.dss_mode: + vg = self._resolve_dss_vg() + self.logger.debug(f"[DSS] resolved private VG = {vg}") + params["CONTROL_FILES"] = ( + f'"(+{vg}/ctrl1, +{vg}/ctrl2, +{vg}/ctrl3)"' + ) + else: + ctdb_home = os.getenv("CTDB_HOME") + if not ctdb_home: + raise EnvironmentError("CTDB_HOME must be set when not in DSS mode.") + params["CONTROL_FILES"] = ( + "(${CTDB_HOME}/data/ctrl1, " + "${CTDB_HOME}/data/ctrl2, " + "${CTDB_HOME}/data/ctrl3)" + ) + + return params + + def _resolve_dss_vg(self) -> str: + dss_home = os.getenv("DSS_HOME") + + try: + inst_id = DssInst.get_dss_id_from_key(dss_home) + return DssInst.get_private_vgname_by_ini(dss_home, inst_id) + except Exception as exc: + raise EnvironmentError( + f"Cannot determine DSS private VG name: {exc}" + ) from exc + def get_ss_inter_url(self) -> str: """ 例:0|10.1.1.1|1611,1|10.1.1.2|1611,2|10.1.1.3|1611 -- Gitee From de1ca57d309589f7b0ac54b88587580273d82053 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 19 Jul 2025 11:26:20 +0800 Subject: [PATCH 054/144] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dcentos8=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E5=85=BC=E5=AE=B9=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../component/Kernel/DN_OLAP/DN_OLAP.py | 32 +++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index 4f4c6304..70ef522f 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -407,10 +407,12 @@ class DN_OLAP(Kernel): def init_database(self, gauss_home: str): """ - 仅1节点调用: - 1. 后台 nomount 启动 Cantian - 2. 执行建库 SQL - 3. 优雅关闭 Cantian + 仅 主 节点调用: + 1) 后台 nomount 启动 Cantian + 2) 执行建库 SQL + 3) 关闭 Cantian: + • 优雅 shutdown + • 若失败 → 强制 kill -9 """ pgdata = self.instInfo.datadir @@ -423,14 +425,26 @@ class DN_OLAP(Kernel): self.create_database(gauss_home) pgrep_cmd = r"pgrep -f 'cantiand.*nomount'" + shutdown_cmd = f"ctsql '/' as sysdba -q -D {pgdata} -c 'shutdown'" + if CmdUtil.retryGetstatusoutput(pgrep_cmd)[0] != 0: self.logger.debug("cantiand is not running, skip shutdown.") - else: - shutdown_cmd = f"ctsql '/' as sysdba -q -D {pgdata} -c 'shutdown'" - st, out = CmdUtil.retryGetstatusoutput(shutdown_cmd) - if st != 0: - raise Exception(f"Cantiand shutdown failed:\n{out}") + return + + st, out = CmdUtil.retryGetstatusoutput(shutdown_cmd) + if st == 0: self.logger.debug("Cantiand shutdown successfully.") + return + + self.logger.warning(f"Cantiand graceful shutdown failed, try force kill:\n{out}") + + kill_cmd = r"pgrep -f 'cantiand.*nomount' | xargs -r kill -9" + CmdUtil.getstatusoutput_by_fast_popen(kill_cmd) + time.sleep(2) + + if CmdUtil.retryGetstatusoutput(pgrep_cmd)[0] == 0: + raise Exception("Cantiand force kill failed: process still alive.") + self.logger.debug("Cantiand force-killed successfully.") @Dss.catch_err(exist_so=True) def initInstance(self): -- Gitee From f25f0518b2cd801f06257b8a91f85b00060b90dd Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 19 Jul 2025 11:29:44 +0800 Subject: [PATCH 055/144] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dcentos8=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E5=85=BC=E5=AE=B9=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index 70ef522f..ca7516f8 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -436,7 +436,7 @@ class DN_OLAP(Kernel): self.logger.debug("Cantiand shutdown successfully.") return - self.logger.warning(f"Cantiand graceful shutdown failed, try force kill:\n{out}") + self.logger.debug(f"Cantiand graceful shutdown failed, try force kill:\n{out}") kill_cmd = r"pgrep -f 'cantiand.*nomount' | xargs -r kill -9" CmdUtil.getstatusoutput_by_fast_popen(kill_cmd) -- Gitee From 012690a33734fbea574bad939b9fa72bb3ff04e6 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 19 Jul 2025 11:31:48 +0800 Subject: [PATCH 056/144] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dcentos8=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E5=85=BC=E5=AE=B9=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py | 1 - 1 file changed, 1 deletion(-) diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index ca7516f8..1a9545da 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -440,7 +440,6 @@ class DN_OLAP(Kernel): kill_cmd = r"pgrep -f 'cantiand.*nomount' | xargs -r kill -9" CmdUtil.getstatusoutput_by_fast_popen(kill_cmd) - time.sleep(2) if CmdUtil.retryGetstatusoutput(pgrep_cmd)[0] == 0: raise Exception("Cantiand force kill failed: process still alive.") -- Gitee From 1b34ad4c412a8a019c72d5b5449059ec9d6e943d Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 19 Jul 2025 11:41:21 +0800 Subject: [PATCH 057/144] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dcentos8=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E5=85=BC=E5=AE=B9=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index 1a9545da..7de778a2 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -678,9 +678,9 @@ class DN_OLAP(Kernel): if not ctdb_home: raise EnvironmentError("CTDB_HOME must be set when not in DSS mode.") params["CONTROL_FILES"] = ( - "(${CTDB_HOME}/data/ctrl1, " - "${CTDB_HOME}/data/ctrl2, " - "${CTDB_HOME}/data/ctrl3)" + f"({ctdb_home}/data/ctrl1, " + f"{ctdb_home}/data/ctrl2, " + f"{ctdb_home}/data/ctrl3)" ) return params -- Gitee From dc31f9dfc841b81b3efe959d9b4f4fb7f339b979 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 19 Jul 2025 14:07:47 +0800 Subject: [PATCH 058/144] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dcentos8=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E5=85=BC=E5=AE=B9=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/impl/om/OLAP/OmImplOLAP.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index b764f11f..758f7824 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -237,9 +237,11 @@ class OmImplOLAP(OmImpl): DefaultValue.TIMEOUT_CLUSTER_START) # ---------------- start cantiand/mysqld per instance ---------------- - single_mode = os.getenv("CANTIAN_MYSQL_SINGLE", "").strip().lower() in ( - "on", "true", "yes", "1" - ) + single_mode_raw = EnvUtil.getEnvironmentParameterValue( + "CANTIAN_MYSQL_SINGLE", + self.context.user, + default="").strip().lower() + single_mode = single_mode_raw in ("on", "true", "yes", "1") host_cmd_map = {h: [] for h in hostList} mpprc = self.context.g_opts.mpprcFile -- Gitee From 3fd3b1c32ad8cf41427c55b85b75e51e76151272 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 19 Jul 2025 14:11:32 +0800 Subject: [PATCH 059/144] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dcentos8=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E5=85=BC=E5=AE=B9=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/impl/om/OLAP/OmImplOLAP.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index 758f7824..370a2d0c 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -237,10 +237,12 @@ class OmImplOLAP(OmImpl): DefaultValue.TIMEOUT_CLUSTER_START) # ---------------- start cantiand/mysqld per instance ---------------- + mpprc = self.context.g_opts.mpprcFile single_mode_raw = EnvUtil.getEnvironmentParameterValue( "CANTIAN_MYSQL_SINGLE", self.context.user, - default="").strip().lower() + mpprc + ).strip().lower() single_mode = single_mode_raw in ("on", "true", "yes", "1") host_cmd_map = {h: [] for h in hostList} -- Gitee From acbf9f542b642292ca9419373e0d3400fd0b4fba Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 19 Jul 2025 14:50:55 +0800 Subject: [PATCH 060/144] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dcentos8=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E5=85=BC=E5=AE=B9=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/base_utils/template/xml_status.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/script/base_utils/template/xml_status.py b/script/base_utils/template/xml_status.py index 591d6b17..9c4c6bcb 100644 --- a/script/base_utils/template/xml_status.py +++ b/script/base_utils/template/xml_status.py @@ -179,16 +179,26 @@ def check_share_fs(val: str) -> bool: Rules ----- - 1. Format must be : with no blanks - 2. must be syntactically valid **and** reachable (ping) - 3. must contain no illegal characters + 1. Must not be empty. + 2. Format must be : with no blanks. + 3. must be syntactically valid and reachable (ping). + 4. must not be the loopback address 127.0.0.1. + 5. must contain no illegal characters. """ + if not val or not val.strip(): + GaussLog.printMessage("Shared filesystem address cannot be empty.") + return False + if " " in val or ":" not in val: GaussLog.printMessage("Invalid format: use : with no spaces.") return False ip, directory = val.split(":", 1) + if ip == "127.0.0.1": + GaussLog.printMessage("Loopback address 127.0.0.1 is not allowed.") + return False + if not NetUtil.isIpValid(ip): GaussLog.printMessage("Invalid IP address.") return False -- Gitee From c1b7d6b3337b0055f1b84112333db9f415c6dbb9 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 19 Jul 2025 15:02:30 +0800 Subject: [PATCH 061/144] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dcentos8=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E5=85=BC=E5=AE=B9=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/base_utils/template/resource_en.json | 1 + script/base_utils/template/resource_zh.json | 1 + script/base_utils/template/xml_status.py | 19 +++++-------------- 3 files changed, 7 insertions(+), 14 deletions(-) diff --git a/script/base_utils/template/resource_en.json b/script/base_utils/template/resource_en.json index 4f53b27b..a3121c30 100644 --- a/script/base_utils/template/resource_en.json +++ b/script/base_utils/template/resource_en.json @@ -67,6 +67,7 @@ "not_back": "Unable to return to the previous step", "finish": "Finished", "invalid_local_ip": "Unable to obtain local IP address", + "share_fs_empty": "The shared-filesystem mount info cannot be empty", "mkdir_dir_failed": "mkdir dir failed", "mkdir_file_failed": "touch file failed", diff --git a/script/base_utils/template/resource_zh.json b/script/base_utils/template/resource_zh.json index b0df5025..aeb973fc 100644 --- a/script/base_utils/template/resource_zh.json +++ b/script/base_utils/template/resource_zh.json @@ -67,6 +67,7 @@ "not_back": "无法返回到上一步", "finish": "输入完成", "invalid_local_ip": "无法获取到本地ip", + "share_fs_empty": "输入的文件系统信息不能为空", "mkdir_dir_failed": "创建目录失败", "mkdir_file_failed": "创建文件失败", diff --git a/script/base_utils/template/xml_status.py b/script/base_utils/template/xml_status.py index 9c4c6bcb..a75b4647 100644 --- a/script/base_utils/template/xml_status.py +++ b/script/base_utils/template/xml_status.py @@ -179,26 +179,16 @@ def check_share_fs(val: str) -> bool: Rules ----- - 1. Must not be empty. - 2. Format must be : with no blanks. - 3. must be syntactically valid and reachable (ping). - 4. must not be the loopback address 127.0.0.1. - 5. must contain no illegal characters. + 1. Format must be : with no blanks + 2. must be syntactically valid **and** reachable (ping) + 3. must contain no illegal characters """ - if not val or not val.strip(): - GaussLog.printMessage("Shared filesystem address cannot be empty.") - return False - if " " in val or ":" not in val: GaussLog.printMessage("Invalid format: use : with no spaces.") return False ip, directory = val.split(":", 1) - if ip == "127.0.0.1": - GaussLog.printMessage("Loopback address 127.0.0.1 is not allowed.") - return False - if not NetUtil.isIpValid(ip): GaussLog.printMessage("Invalid IP address.") return False @@ -292,7 +282,8 @@ class ShareFsStatus(TemplateStatus): return DdesStatus() if not user_input: - return CmStatus() + GaussLog.printMessage(XmlConstant.RESOURCE_DATA.get('share_fs_empty')) + continue if not check_share_fs(user_input): continue -- Gitee From d0a12bd4c6dc82d5eeaf0cb4f21f3c33e9f066c2 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 19 Jul 2025 17:23:08 +0800 Subject: [PATCH 062/144] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dcentos8=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E5=85=BC=E5=AE=B9=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/common/DbClusterInfo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/gspylib/common/DbClusterInfo.py b/script/gspylib/common/DbClusterInfo.py index 1c5e5db2..d5d73564 100644 --- a/script/gspylib/common/DbClusterInfo.py +++ b/script/gspylib/common/DbClusterInfo.py @@ -1345,7 +1345,7 @@ class dbClusterInfo(): # ctsql 视图查询 + grep 判断 OPEN sql_cmd = "SELECT NAME, STATUS, OPEN_STATUS FROM DV_DATABASE" - ctsql_cmd = f"ctsql '/' as sysdba -q -D {data_dir} -c \"{sql_cmd}\"" + ctsql_cmd = f"ctsql '/' as sysdba -q -D {data_dir} -c '{sql_cmd}'" probe = f"{ctsql_cmd} | grep -v 'OPEN_STATUS' | grep -v '^SQL>' | grep -qw 'OPEN'" echo_ok = ( -- Gitee From b4a07b0e04bed27a9ad02042fcd3c4c162d9a8b6 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 19 Jul 2025 17:32:11 +0800 Subject: [PATCH 063/144] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dcentos8=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E5=85=BC=E5=AE=B9=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/impl/om/OLAP/OmImplOLAP.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index 370a2d0c..86643a3a 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -318,7 +318,7 @@ class OmImplOLAP(OmImpl): for dbNode in self.clusterInfo.dbNodes: for inst in dbNode.datanodes: base_cmd = ( - f"ctsql '/' as sysdba -q -D {inst.datadir} -c \"{sql}\"" + f"ctsql '/' as sysdba -q -D {inst.datadir} -c '{sql}'" ) ret, out = CmdUtil.retryGetstatusoutput(base_cmd, retry_time=0) if ret != 0: -- Gitee From 457390436da3343144807e0b8c7fedb993d31114 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 21 Jul 2025 20:42:41 +0800 Subject: [PATCH 064/144] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dcentos8=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E5=85=BC=E5=AE=B9=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/base_utils/template/resource_en.json | 2 +- script/base_utils/template/resource_zh.json | 2 +- script/gspylib/component/DSS/dss_comp.py | 2 ++ script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py | 6 +++++- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/script/base_utils/template/resource_en.json b/script/base_utils/template/resource_en.json index a3121c30..6fa46b4d 100644 --- a/script/base_utils/template/resource_en.json +++ b/script/base_utils/template/resource_en.json @@ -41,7 +41,7 @@ "choose_dbstor" : "Enable DBStor shared-storage mode?", "input_dbstor" : "Input 1 / 2 (default 1) 1) Disable 2) Enable", - "intput_dbstor_fs" : "Enter the three DBStor file-systems (default: log_fs:log_fs,page_fs:page_fs,share_fs:share_fs)", + "intput_dbstor_fs" : "Enter the three DBStor file-systems (default: 127.0.0.1:log_fs,127.0.0.1:page_fs,127.0.0.1:share_fs,127.0.0.1:archive_fs)", "intput_share_fs": "Enter the shared-filesystem mount info, e.g. 127.0.0.1:share_fs", diff --git a/script/base_utils/template/resource_zh.json b/script/base_utils/template/resource_zh.json index aeb973fc..471077af 100644 --- a/script/base_utils/template/resource_zh.json +++ b/script/base_utils/template/resource_zh.json @@ -41,7 +41,7 @@ "choose_dbstor": "请选择是否启用 DBStor 共享存储?", "input_dbstor": "请输入 1/2 进行选择,默认选项是 1)不部署", - "intput_dbstor_fs": "请输入'dbstor的3个文件系统'的路径信息(默认是:log_fs:log_fs,page_fs:page_fs,share_fs:share_fs)", + "intput_dbstor_fs": "请输入'dbstor的3个文件系统'的路径信息(默认是:127.0.0.1:log_fs,127.0.0.1:page_fs,127.0.0.1:share_fs,127.0.0.1:archive_fs)", "intput_share_fs": "请输入共享文件系统挂载信息(如 127.0.0.1:share_fs)", diff --git a/script/gspylib/component/DSS/dss_comp.py b/script/gspylib/component/DSS/dss_comp.py index d26e67ce..66e06e60 100644 --- a/script/gspylib/component/DSS/dss_comp.py +++ b/script/gspylib/component/DSS/dss_comp.py @@ -346,6 +346,8 @@ class DssInitCfg(): setattr(self, key, value) if dss_nodes_list: self.DSS_NODES_LIST = dss_nodes_list + else: + self.DSS_NODES_LIST = '0:127.0.0.1:1611' self.STORAGE_MODE = "SHARE_DISK" # _SHM_KEY value range: 1–64 self._SHM_KEY = os.getuid() % 64 + 1 diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index 7de778a2..63f6e8a2 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -657,6 +657,10 @@ class DN_OLAP(Kernel): "SHARED_PATH": shared_path, } + # Enable DSS explicitly when we're in DSS mode + if self.dss_mode: + cantian_dict["ENABLE_DSS"] = "TRUE" + # 使用 NFS 需要加上FULLDIRECTIO参数,否则可能出现数据不一致的问题 share_fs = os.getenv("SHARE_FS", "").strip() if share_fs and not share_fs.startswith("127.0.0.1"): @@ -671,7 +675,7 @@ class DN_OLAP(Kernel): vg = self._resolve_dss_vg() self.logger.debug(f"[DSS] resolved private VG = {vg}") params["CONTROL_FILES"] = ( - f'"(+{vg}/ctrl1, +{vg}/ctrl2, +{vg}/ctrl3)"' + f'"+{vg}/ctrl1, +{vg}/ctrl2, +{vg}/ctrl3"' ) else: ctdb_home = os.getenv("CTDB_HOME") -- Gitee From 7b104d539e0b62242ed4d8e6cd75ed91c8d36bb0 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Tue, 22 Jul 2025 10:05:49 +0800 Subject: [PATCH 065/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OM对接Dbstor OM对接Dbstor OM对接Dbstor OM对接Dbstor OM对接Dbstor OM对接Dbstor OM对接Dbstor OM对接Dbstor --- script/base_utils/template/cluster_tmp.xml | 14 ++ script/base_utils/template/resource_en.json | 15 +- script/base_utils/template/resource_zh.json | 19 +- script/base_utils/template/xml_constant.py | 12 ++ script/base_utils/template/xml_status.py | 164 ++++++++++++++++-- script/base_utils/template/xml_template.py | 71 +++++++- .../domain_utils/cluster_file/package_info.py | 13 ++ script/gspylib/common/ClusterParams.py | 15 +- script/gspylib/common/DbClusterInfo.py | 77 ++++++++ script/gspylib/component/DSS/dss_comp.py | 2 - .../gspylib/component/Dbstor/dbstor_comp.py | 131 ++++++++++++++ .../component/Kernel/DN_OLAP/DN_OLAP.py | 27 +-- script/gspylib/component/MySql/mysql_comp.py | 11 +- script/local/Install.py | 72 ++++++++ 14 files changed, 604 insertions(+), 39 deletions(-) create mode 100644 script/gspylib/component/Dbstor/dbstor_comp.py diff --git a/script/base_utils/template/cluster_tmp.xml b/script/base_utils/template/cluster_tmp.xml index d8b6269b..29775d8a 100644 --- a/script/base_utils/template/cluster_tmp.xml +++ b/script/base_utils/template/cluster_tmp.xml @@ -16,6 +16,11 @@ + + + + + @@ -28,6 +33,7 @@ + @@ -48,6 +54,7 @@ + @@ -59,6 +66,7 @@ + @@ -70,6 +78,7 @@ + @@ -81,6 +90,7 @@ + @@ -92,6 +102,7 @@ + @@ -103,6 +114,7 @@ + @@ -114,6 +126,7 @@ + @@ -125,6 +138,7 @@ + diff --git a/script/base_utils/template/resource_en.json b/script/base_utils/template/resource_en.json index 6fa46b4d..e4f3ae5d 100644 --- a/script/base_utils/template/resource_en.json +++ b/script/base_utils/template/resource_en.json @@ -36,12 +36,21 @@ "choose_pri_standby": "Please choose whether to deploy in single-node or multi-node mode?", "input_pri_standby": "Please enter 1/2 for selection, the default option is 1) multi-node deployment", - "input_ip_hostname": "Please enter the host node IP and node name(for example:192.168.0.1 hostname1;192.168.0.2 hostname2)", + "input_ip_hostname": "Please enter cluster communication IP and host name (for example:192.168.0.1 hostname1;192.168.0.2 hostname2)", "max_nodes": "Please enter the number of nodes, supporting a maximum of one primary and eight backup, which is 9 node (default is one primary and two backup, with 3 nodes)", "choose_dbstor" : "Enable DBStor shared-storage mode?", - "input_dbstor" : "Input 1 / 2 (default 1) 1) Disable 2) Enable", - "intput_dbstor_fs" : "Enter the three DBStor file-systems (default: 127.0.0.1:log_fs,127.0.0.1:page_fs,127.0.0.1:share_fs,127.0.0.1:archive_fs)", + "input_dbstor" : "Input 1 / 2 (default 1) 1) Enable 2) Disable", + "intput_dbstor_fs" : "Enter the 4 DBStor file-systems (default: log_fs;page_fs;share_fs;archive_fs)", + "intput_dbstor_home": "Please enter dbstor_home dir (default:/opt/openGauss/install/dbstor_home)", + "input_cantian_vlan_ip": "Enter cantian_vlan_ip (nodes separated by ';', multi-IP with '|', e.g. 10.10.10.1|11.11.11.1;10.10.10.2|11.11.11.2):", + "cantian_vlan_ip_empty": "cantian_vlan_ip cannot be empty", + "input_storage_vlan_ip": "Enter storage_vlan_ip (multi-VLAN '|', multi-IP ',', e.g. 10.10.10.1,10.10.10.2|11.11.11.1,11.11.11.2):", + "storage_vlan_ip_empty": "storage_vlan_ip cannot be empty", + "input_link_type": "LINK_TYPE (0 = TCP, 1 = RDMA) [default 0]:", + "link_type_invalid": "Only 0 or 1 is allowed!", + "dbstor_fs_empty": "dbstor_fs cannot be empty", + "dbstor_fs_invalid": "dbstor_fs must contain 4 names (log/page/share/archive) separated by ';'", "intput_share_fs": "Enter the shared-filesystem mount info, e.g. 127.0.0.1:share_fs", diff --git a/script/base_utils/template/resource_zh.json b/script/base_utils/template/resource_zh.json index 471077af..d500dbe6 100644 --- a/script/base_utils/template/resource_zh.json +++ b/script/base_utils/template/resource_zh.json @@ -29,19 +29,28 @@ "deploy_mysql": "部署MySql", "not_deploy_mysql": "不部署MySql", - "choose_mysql_meta" : "MySql 元数据是否归一(写入 Cantian)?", + "choose_mysql_meta" : "MySql 元数据是否归一(写入 Cantian)?", "input_mysql_meta" : "请输入 1/2 进行选择, 默认 1) 归一", "deploy_mysql_meta" : "归一", "not_deploy_mysql_meta" : "非归一", "choose_pri_standby": "请选择是否多节点部署?", - "input_pri_standby": "请输入 1/2 进行选择,默认选项是 1)多节点部署", - "input_ip_hostname": "请输入主机节点IP和节点名称(如:192.168.0.1 hostname1;192.168.0.2 hostname2)", + "input_pri_standby": "请输入 1/2 进行选择,默认选项是 1)多节点部署", + "input_ip_hostname": "请输入“集群通信 IP”和节点名称(如:192.168.0.1 hostname1;192.168.0.2 hostname2)", "max_nodes": "请输入节点数量,最多支持一主八备,即9个节点(默认是一主两备,3个节点)", "choose_dbstor": "请选择是否启用 DBStor 共享存储?", - "input_dbstor": "请输入 1/2 进行选择,默认选项是 1)不部署", - "intput_dbstor_fs": "请输入'dbstor的3个文件系统'的路径信息(默认是:127.0.0.1:log_fs,127.0.0.1:page_fs,127.0.0.1:share_fs,127.0.0.1:archive_fs)", + "input_dbstor": "请输入 1/2 进行选择,默认选项是 1)部署", + "intput_dbstor_fs": "请输入'dbstor的4个文件系统'的路径信息(默认是:log_fs;page_fs;share_fs;archive_fs)", + "intput_dbstor_home": "请输入 'dbstor_home' 的路径信息(默认:/opt/openGauss/install/dbstor_home)", + "input_cantian_vlan_ip": "请输入 cantian_vlan_ip(节点用 ; 分隔,同节点多IP用 |,示例: 10.10.10.1|11.11.11.1;10.10.10.2|11.11.11.2):", + "cantian_vlan_ip_empty": "cantian_vlan_ip 不能为空", + "input_storage_vlan_ip": "请输入 storage_vlan_ip(多VLAN用 |,同VLAN多IP用,示例: 10.10.10.1,10.10.10.2|11.11.11.1,11.11.11.2):", + "storage_vlan_ip_empty": "storage_vlan_ip 不能为空", + "input_link_type": "请输入 LINK_TYPE(0=TCP,1=RDMA,默认0):", + "link_type_invalid": "仅允许输入 0 或 1!", + "dbstor_fs_empty": "dbstor_fs 不能为空", + "dbstor_fs_invalid": "dbstor_fs 必须包含 4 个文件系统名(log/page/share/archive),用 ; 分隔", "intput_share_fs": "请输入共享文件系统挂载信息(如 127.0.0.1:share_fs)", diff --git a/script/base_utils/template/xml_constant.py b/script/base_utils/template/xml_constant.py index bd670cd8..c1a9ab69 100644 --- a/script/base_utils/template/xml_constant.py +++ b/script/base_utils/template/xml_constant.py @@ -8,6 +8,7 @@ class XmlConstant: IS_CM = False IS_DDES = False IS_MYSQL = False + IS_DBSTOR = False IS_PRI_STANDBY = False PRI_STANDBY_COUNT = 3 PRI_STANDBY_IP = {} @@ -22,6 +23,9 @@ class XmlConstant: DSS_PARA_INFO = ['enable_dss', 'dss_home', 'dss_vg_info', 'votingDiskPath', 'shareDiskDir', 'ss_dss_vg_name', 'dss_ssl_enable'] MYSQL_PARA = ['enable_mysql', 'mysql_metadata_in_cantian'] + DBSTOR_PARA_INFO = ["enable_dbstor", "dbstor_home", "dbstor_fs", "storage_vlan_ip", "link_type"] + DBSTOR_DEVICE_PARA_INFO = ["cantian_vlan_ip"] + UPDATE_DBSTOR_PARA_INFO = ["dbstor_home", "dbstor_fs", "cantian_vlan_ip", "storage_vlan_ip", "link_type"] UPDATE_DSS_PARA_INFO = ['dss_home', 'dss_vg_info', 'votingDiskPath', 'shareDiskDir', 'ss_dss_vg_name'] CM_PARA_INFO = ['cmDir', 'cmsNum', 'cmServerPortBase', 'cmServerPortStandby', 'cmServerListenIp1', 'cmServerHaIp1', 'cmServerlevel', 'cmServerRelation'] @@ -32,6 +36,14 @@ class XmlConstant: DATABASE_PORT = "" CM_SERVER_PORT = "" SHARE_FS = "127.0.0.1:share_fs" + + DBSTOR_HOME_DIR = "/opt/openGauss/install/dbstor_home" + DBSTOR_FS = "log_fs;page_fs;share_fs;archive_fs" + CANTIAN_VLAN_IP = "" + STORAGE_VLAN_IP = "" + # 0=TCP 1=RDMA + LINK_TYPE = "0" + SSH_PORTS = [] DEFAULT_DATABASE_PORT = "15000" diff --git a/script/base_utils/template/xml_status.py b/script/base_utils/template/xml_status.py index a75b4647..f1ed137f 100644 --- a/script/base_utils/template/xml_status.py +++ b/script/base_utils/template/xml_status.py @@ -1,5 +1,6 @@ import os import sys +import re import socket import subprocess @@ -8,6 +9,8 @@ from gspylib.common.Common import DefaultValue from base_utils.os.net_util import NetUtil from base_utils.template.xml_constant import XmlConstant +ILLEGAL_PATTERN = re.compile(r"[;`&|]") + def check_illegal_character(user_put): for rac in DefaultValue.PATH_CHECK_LIST: @@ -173,6 +176,34 @@ def check_input_xml_info(xml_dir): return False return True + +def check_ip(ip: str) -> bool: + if not NetUtil.isIpValid(ip): + GaussLog.printMessage("%s %s" % + (ip, XmlConstant.RESOURCE_DATA.get('invalid_ip'))) + return False + return True + + +def check_ip_groups(expr: str, outer_sep: str, inner_sep: str) -> bool: + """ + 校验 cantian_vlan_ip / storage_vlan_ip: + outer_sep 负责节点或 VLAN 间分隔 + inner_sep 负责同节点多 IP 分隔 + """ + groups = [g.strip() for g in expr.split(outer_sep) if g.strip()] + if not groups: + return False + for g in groups: + ips = [ip.strip() for ip in g.split(inner_sep) if ip.strip()] + if not ips: + return False + for ip in ips: + if not check_ip(ip): + return False + return True + + def check_share_fs(val: str) -> bool: """ Validate the shared-filesystem string. @@ -188,26 +219,20 @@ def check_share_fs(val: str) -> bool: return False ip, directory = val.split(":", 1) - if not NetUtil.isIpValid(ip): GaussLog.printMessage("Invalid IP address.") return False - if not check_illegal_character(directory): return False try: - ret = subprocess.run( - ["ping", "-c", "1", "-W", "2", ip], - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) + ret = subprocess.run(["ping", "-c", "1", "-W", "2", ip], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) if ret.returncode != 0: - GaussLog.printMessage(f"Cannot ping {ip}. Please check network connectivity.") + GaussLog.printMessage(f"Cannot ping {ip}.") return False except FileNotFoundError: - GaussLog.printMessage("Ping command not found; skipping reachability check.") - + pass return True @@ -308,13 +333,13 @@ class PriStandbyStatus(TemplateStatus): return DataPortStatus() if not user_input: XmlConstant.IS_PRI_STANDBY = True - return DdesStatus() + return DbstorStatus() if not user_input.isdigit(): GaussLog.printMessage(XmlConstant.RESOURCE_DATA.get('invalid_num')) continue if user_input == "1": XmlConstant.IS_PRI_STANDBY = True - return DdesStatus() + return DbstorStatus() elif user_input == "2": XmlConstant.IS_PRI_STANDBY = False return PriStandbyCountStatus() @@ -494,6 +519,121 @@ class CmServerPortStatus(TemplateStatus): return PriStandbyCountStatus() +class DbstorStatus(TemplateStatus): + """ + ① 询问是否部署 DBStor + 若选“部署” → 跳过 DSS,填写 dbstor_fs → CmStatus + 若选“不开启” → 是否部署 DSS + """ + + def work(self): + XmlConstant.IS_DBSTOR = False + GaussLog.printMessage(XmlConstant.RESOURCE_DATA.get('choose_dbstor')) + XmlConstant.select_option(XmlConstant.RESOURCE_DATA.get('deploy'), + XmlConstant.RESOURCE_DATA.get('not_deploy')) + + for _ in range(XmlConstant.TRIES): + opt = input(XmlConstant.RESOURCE_DATA.get('input_dbstor')).strip() + if opt.lower() in ('back', 'b'): + return PriStandbyStatus() + + if not opt or opt == "1": + XmlConstant.IS_DBSTOR = True + XmlConstant.IS_DDES = False + return DbstorHomeStatus() + + if opt == "2": + XmlConstant.IS_DBSTOR = False + return DdesStatus() + GaussLog.printMessage(XmlConstant.RESOURCE_DATA.get('invalid_num')) + + +class DbstorHomeStatus(TemplateStatus): + """② dbstor_home""" + def work(self): + default_path = XmlConstant.DBSTOR_HOME_DIR + for _ in range(XmlConstant.TRIES): + user_input = input(XmlConstant.RESOURCE_DATA.get('intput_dbstor_home')).strip() + if user_input.lower() in ('back', 'b'): + return DbstorStatus() + if not user_input: + user_input = default_path + if not check_database_dir(user_input): + continue + XmlConstant.DBSTOR_HOME_DIR = os.path.normpath(user_input) + return CantianVlanIpStatus() + + +class CantianVlanIpStatus(TemplateStatus): + """③ cantian_vlan_ip""" + def work(self): + for _ in range(XmlConstant.TRIES): + user_input = input(XmlConstant.RESOURCE_DATA.get('input_cantian_vlan_ip')).strip() + if user_input.lower() in ('back', 'b'): + return DbstorHomeStatus() + if not user_input: + GaussLog.printMessage(XmlConstant.RESOURCE_DATA.get('cantian_vlan_ip_empty')) + continue + if not check_ip_groups(user_input, ';', '|'): + continue + XmlConstant.CANTIAN_VLAN_IP = user_input + return StorageVlanIpStatus() + + +class StorageVlanIpStatus(TemplateStatus): + """④ storage_vlan_ip""" + def work(self): + for _ in range(XmlConstant.TRIES): + user_input = input(XmlConstant.RESOURCE_DATA.get('input_storage_vlan_ip')).strip() + if user_input.lower() in ('back', 'b'): + return CantianVlanIpStatus() + if not user_input: + GaussLog.printMessage(XmlConstant.RESOURCE_DATA.get('storage_vlan_ip_empty')) + continue + if not check_ip_groups(user_input, '|', ','): + continue + XmlConstant.STORAGE_VLAN_IP = user_input + return LinkTypeStatus() + + +class LinkTypeStatus(TemplateStatus): + """⑤ LINK_TYPE (0 / 1)""" + def work(self): + for _ in range(XmlConstant.TRIES): + user_input = input(XmlConstant.RESOURCE_DATA.get('input_link_type')).strip() + if user_input.lower() in ('back', 'b'): + return StorageVlanIpStatus() + if not user_input: + user_input = "0" + if user_input not in ("0", "1"): + GaussLog.printMessage(XmlConstant.RESOURCE_DATA.get('link_type_invalid')) + continue + XmlConstant.LINK_TYPE = user_input + return DbstorFsStatus() + + +class DbstorFsStatus(TemplateStatus): + """⑥ dbstor_fs(log_fs;page_fs;share_fs;archive_fs)""" + def work(self): + for _ in range(XmlConstant.TRIES): + user_input = input(XmlConstant.RESOURCE_DATA.get('intput_dbstor_fs')).strip() + if user_input.lower() in ('back', 'b'): + return LinkTypeStatus() + if not user_input: + GaussLog.printMessage(XmlConstant.RESOURCE_DATA.get('dbstor_fs_empty')) + continue + parts = [p.strip() for p in user_input.split(';') if p.strip()] + if len(parts) != 4: + GaussLog.printMessage(XmlConstant.RESOURCE_DATA.get('dbstor_fs_invalid')) + continue + for name in parts: + if not check_illegal_character(name): + break + else: + XmlConstant.DBSTOR_FS = ';'.join(parts) + return CmStatus() + + class PriStandbyCountStatus(TemplateStatus): def work(self): diff --git a/script/base_utils/template/xml_template.py b/script/base_utils/template/xml_template.py index 8d25c54f..97624819 100644 --- a/script/base_utils/template/xml_template.py +++ b/script/base_utils/template/xml_template.py @@ -143,6 +143,66 @@ class GenerateTemplate: if child.attrib['name'] in XmlConstant.DSS_PARA_INFO: self.root[0].remove(child) + def delete_xml_dbstor(self): + """ + - 开启 DBStor → 删除 share_fs,占位 DBStor 字段保留 + - 未开启 → 删除 / + """ + if XmlConstant.IS_DBSTOR: + # 去掉 share_fs(NFS) + for ele in list(self.root[0].findall("PARAM")): + if ele.attrib.get("name") == "share_fs": + self.root[0].remove(ele) + return + # 未启用 DBStor,删除占位字段 + for ele in list(self.root[0].findall("PARAM")): + if ele.attrib.get("name") in XmlConstant.DBSTOR_PARA_INFO: + self.root[0].remove(ele) + + for dev in self.root[1].findall("DEVICE"): + for p in list(dev.findall("PARAM")): + if p.attrib.get("name") in XmlConstant.DBSTOR_DEVICE_PARA_INFO: + dev.remove(p) + + def update_dbstor_info(self): + if not XmlConstant.IS_DBSTOR: + return + + if not XmlConstant.IS_DBSTOR: + return + + exist = {p.attrib["name"]: p for p in self.root[0].findall("PARAM")} + + def upsert(name: str, value: str): + if name in exist: + exist[name].attrib["value"] = value + else: + ET.SubElement(self.root[0], "PARAM", {"name": name, "value": value}) + + upsert("enable_dbstor", "on") + upsert("dbstor_home", XmlConstant.DBSTOR_HOME_DIR) + upsert("dbstor_fs", XmlConstant.DBSTOR_FS) + upsert("storage_vlan_ip", XmlConstant.STORAGE_VLAN_IP) + upsert("link_type", XmlConstant.LINK_TYPE) + + def update_cantian_vlan_ip(self): + """ + 写入每个 下的 。 + """ + if not XmlConstant.IS_DBSTOR: + return + + ip_groups = [g.strip() for g in XmlConstant.CANTIAN_VLAN_IP.split(";") if g.strip()] + for idx, device in enumerate(self.root[1].findall("DEVICE")): + value = ip_groups[idx] if idx < len(ip_groups) else "" + param = next((p for p in device.findall("PARAM") + if p.attrib.get("name") == "cantian_vlan_ip"), None) + if param: + param.attrib["value"] = value + else: + ET.SubElement(device, "PARAM", + {"name": "cantian_vlan_ip", "value": value}) + def delete_xml_mysql(self): """若未启用 MySQL,则移除 参数.""" if XmlConstant.IS_MYSQL: @@ -240,6 +300,7 @@ class GenerateTemplate: self.update_database_port() # update node ip self.update_node_ip_hostname_info() + self.update_cantian_vlan_ip() def update_ddes_info(self): if not XmlConstant.IS_DDES: @@ -275,11 +336,13 @@ class GenerateTemplate: self.update_cluster_label_common_info() # 2.update device label common info self.update_device_label_info() - # 3.update ddes info + # 3.update dbstor info + self.update_dbstor_info() + # 3.1.update ddes info self.update_ddes_info() - # 3.1 update share_fs info + # 3.2 update share_fs info self.update_share_fs_info() - # 3.5 update mysql info + # 3.3 update mysql info self.update_mysql_info() # 4.update cm info self.update_cm_info() @@ -342,6 +405,8 @@ class GenerateTemplate: self.load_xml() # delete xml excess node count self.delete_xml_node() + # delete xml dbstor info + self.delete_xml_dbstor() # delete xml ddes info self.delete_xml_ddes() # delete xml mysql info diff --git a/script/domain_utils/cluster_file/package_info.py b/script/domain_utils/cluster_file/package_info.py index 1b952ab2..c48689db 100644 --- a/script/domain_utils/cluster_file/package_info.py +++ b/script/domain_utils/cluster_file/package_info.py @@ -187,6 +187,17 @@ class PackageInfo(object): raise Exception(f"Mysql_server package CPU arch mismatch: {fname} " f"(expected {cpu_arch})") + # 查找 DBStor_Client*.tgz + dbstor_client_pkg = "" + for fname in os.listdir(package_path): + if fname.startswith("DBStor_Client") and fname.endswith(".tgz"): + if cpu_arch in fname: + dbstor_client_pkg = fname + break + else: + raise Exception(f"DBStor_Client package CPU arch mismatch: {fname} " + f"(expected {cpu_arch})") + tar_lists = SingleInstDiff.get_package_tar_lists(is_single_inst, os.path.normpath(package_path)) upgrade_sql_file_path = os.path.join(package_path, @@ -209,6 +220,8 @@ class PackageInfo(object): cmd += "%s " % os.path.basename(connector_package) if mysql_server_pkg and os.path.isfile(os.path.join(package_path, mysql_server_pkg)): cmd += f"{mysql_server_pkg} " + if dbstor_client_pkg and os.path.isfile(os.path.join(package_path, dbstor_client_pkg)): + cmd += f"{dbstor_client_pkg} " cmd += "&& %s " % CmdUtil.getChmodCmd( str(ConstantsBase.KEY_FILE_MODE), PackageInfo.get_package_back_name()) diff --git a/script/gspylib/common/ClusterParams.py b/script/gspylib/common/ClusterParams.py index 1f31dc9a..7e27ba59 100644 --- a/script/gspylib/common/ClusterParams.py +++ b/script/gspylib/common/ClusterParams.py @@ -57,9 +57,17 @@ class ClusterParams: UWAL_DEVICES_PATH = 'uwal_devices_path' PASSWORD = 'password' CLUSTER_TYPE = 'clusterType' + # Share-FS SHARE_FS = 'share_fs' + # mysql ENABLE_MYSQL = 'enable_mysql' MYSQL_METADATA_IN_CANTIAN = 'mysql_metadata_in_cantian' + # DBStor + ENABLE_DBSTOR = 'enable_dbstor' + DBSTOR_HOME = 'dbstor_home' + DBSTOR_FS = 'dbstor_fs' + STORAGE_VLAN_IP = 'storage_vlan_ip' + LINK_TYPE = 'link_type' @staticmethod def get_all_param_names(): @@ -95,7 +103,12 @@ class ClusterParams: ClusterParams.CLUSTER_TYPE, ClusterParams.SHARE_FS, ClusterParams.ENABLE_MYSQL, - ClusterParams.MYSQL_METADATA_IN_CANTIAN + ClusterParams.MYSQL_METADATA_IN_CANTIAN, + ClusterParams.ENABLE_DBSTOR, + ClusterParams.DBSTOR_HOME, + ClusterParams.DBSTOR_FS, + ClusterParams.STORAGE_VLAN_IP, + ClusterParams.LINK_TYPE ] FLOAT_IP_PATTERN = re.compile(r'\bfloatIp[0-9]+') diff --git a/script/gspylib/common/DbClusterInfo.py b/script/gspylib/common/DbClusterInfo.py index d5d73564..24e301be 100644 --- a/script/gspylib/common/DbClusterInfo.py +++ b/script/gspylib/common/DbClusterInfo.py @@ -1023,6 +1023,12 @@ class dbClusterInfo(): # add for mysql self.enable_mysql = "" self.mysql_metadata_in_cantian = "" + # add for dbstor + self.enable_dbstor = "" + self.dbstor_home = "" + self.dbstor_fs = "" + self.storage_vlan_ip = "" + self.link_type = "" def __str__(self): """ @@ -3112,6 +3118,7 @@ class dbClusterInfo(): True) self._read_enable_mysql_settings(xmlRootNode) + self._read_enable_dbstor_settings(xmlRootNode) def _read_enable_dss(self, xmlRootNode): _, self.enable_dss = ClusterConfigFile.readOneClusterConfigItem( @@ -3210,6 +3217,76 @@ class dbClusterInfo(): # 未开启 MySQL 时该参数无意义 self.mysql_metadata_in_cantian = "" + def _read_enable_dbstor_settings(self, xmlRootNode): + """ + 解析 段中的 DBStor 参数。 + """ + # enable_dbstor + status, val = ClusterConfigFile.readOneClusterConfigItem( + xmlRootNode, "enable_dbstor", "cluster") + if status == 0: + val = val.strip().lower() + if val in ('on', 'off', ''): + self.enable_dbstor = val + else: + raise Exception(ErrorCode.GAUSS_500["GAUSS_50011"] % + ("enable_dbstor", val)) + elif status == 2: + self.enable_dbstor = 'off' + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % + "enable_dbstor" + " Error: \n%s" % val) + + if self.enable_dbstor != 'on': + return + + if self.enable_dss == 'on': + raise Exception('Only one of DBStor or DSS can be enabled.') + + status, home = ClusterConfigFile.readOneClusterConfigItem( + xmlRootNode, "dbstor_home", "cluster") + if status == 0 and home.strip(): + self.dbstor_home = os.path.normpath(home.strip()) + checkPathVaild(self.dbstor_home) + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % + "dbstor_home" + " Error: \n%s" % home) + + status, fs = ClusterConfigFile.readOneClusterConfigItem( + xmlRootNode, "dbstor_fs", "cluster") + if status == 0 and fs.strip(): + fs_items = fs.strip().split(';') + if len(fs_items) != 4: + raise Exception(ErrorCode.GAUSS_500["GAUSS_50011"] % + ("dbstor_fs", fs)) + self.dbstor_fs = fs.strip() + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % + "dbstor_fs" + " Error: \n%s" % fs) + + status, sip = ClusterConfigFile.readOneClusterConfigItem( + xmlRootNode, "storage_vlan_ip", "cluster") + if status == 0 and sip.strip(): + self.storage_vlan_ip = sip.strip() + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % + "storage_vlan_ip" + " Error: \n%s" % sip) + + status, ltp = ClusterConfigFile.readOneClusterConfigItem( + xmlRootNode, "link_type", "cluster") + if status == 0: + ltp = ltp.strip() + if ltp in ('0', '1', ''): + self.link_type = ltp or '0' + else: + raise Exception(ErrorCode.GAUSS_500["GAUSS_50011"] % + ("link_type", ltp)) + elif status == 2: + self.link_type = '0' + else: + raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % + "link_type" + " Error: \n%s" % ltp) + def get_cluster_back_ip1s(self): # Read cluster backIp1s status, output = ClusterConfigFile.readOneClusterConfigItem( diff --git a/script/gspylib/component/DSS/dss_comp.py b/script/gspylib/component/DSS/dss_comp.py index 66e06e60..d26e67ce 100644 --- a/script/gspylib/component/DSS/dss_comp.py +++ b/script/gspylib/component/DSS/dss_comp.py @@ -346,8 +346,6 @@ class DssInitCfg(): setattr(self, key, value) if dss_nodes_list: self.DSS_NODES_LIST = dss_nodes_list - else: - self.DSS_NODES_LIST = '0:127.0.0.1:1611' self.STORAGE_MODE = "SHARE_DISK" # _SHM_KEY value range: 1–64 self._SHM_KEY = os.getuid() % 64 + 1 diff --git a/script/gspylib/component/Dbstor/dbstor_comp.py b/script/gspylib/component/Dbstor/dbstor_comp.py new file mode 100644 index 00000000..ca12659c --- /dev/null +++ b/script/gspylib/component/Dbstor/dbstor_comp.py @@ -0,0 +1,131 @@ +# gspylib/component/DBSTOR/dbstor_comp.py +# -*- coding:utf-8 -*- +import os +import socket +from typing import Dict + +from gspylib.component.BaseComponent import BaseComponent +from gspylib.common.Common import DefaultValue +from base_utils.os.file_util import FileUtil +from base_utils.os.user_util import UserUtil + + +def update_ini_file(ini_path: str, kv: Dict[str, str], + file_mode: str = DefaultValue.KEY_FILE_MODE, + logger=None) -> None: + if not os.path.isfile(ini_path): + raise FileNotFoundError(f"{ini_path} not found") + + lines = [ln.rstrip("\n") for ln in FileUtil.readFile(ini_path)] + touched, result = set(), [] + + for line in lines: + raw = line.lstrip() + if raw.startswith("#") or "=" not in raw: + result.append(line) + continue + key, _, _ = raw.partition("=") + kstrip = key.strip() + if kstrip in kv: + indent = line[:line.find(kstrip)] + result.append(f"{indent}{kstrip} = {kv[kstrip]}") + touched.add(kstrip) + else: + result.append(line) + + for k, v in kv.items(): + if k not in touched: + result.append(f"{k} = {v}") + + FileUtil.writeFile(ini_path, result, "w") + FileUtil.changeMode(file_mode, ini_path) + if logger: + logger.debug(f"[update_ini_file] rewrite {ini_path} with {len(kv)} keys.") + +class Dbstor(BaseComponent): + def __init__(self): + super().__init__() + + def initInstance(self): + if getattr(self.clusterInfo, "enable_dbstor", "off").lower() != "on": + if self.logger: + self.logger.debug("enable_dbstor != on, skip dbstor component init.") + return + + dbstor_home = self._get_dbstor_home() + cfg_dir = os.path.join(dbstor_home, "conf", "dbs") + cfg_file = os.path.join(cfg_dir, "dbstor_config.ini") + + FileUtil.createDirectory(cfg_dir, DefaultValue.KEY_DIRECTORY_MODE) + + if not os.path.exists(cfg_file): + FileUtil.createFile(cfg_file) + FileUtil.writeFile(cfg_file, ["[CLIENT]", ""], "w") + + node_id, local_ip = self._get_node_id_and_local_ip() + self.update_dbstor_config(self.clusterInfo, cfg_file, node_id, local_ip, self.logger) + + user = UserUtil.getUserInfo()["name"] + FileUtil.changeOwner(user, cfg_file, False) + + if self.logger: + self.logger.log(f"DBStor config prepared at {cfg_file}") + + def update_dbstor_config(self, clusterInfo, cfg_file: str, + node_id: int, local_ip: str, logger=None) -> None: + """ + 根据 clusterInfo 组装 13 个键并写入 cfg_file + """ + fs_names = clusterInfo.dbstor_fs.split(";") + if len(fs_names) != 4: + raise Exception(f"dbstor_fs must have 4 names. Got: {clusterInfo.dbstor_fs}") + + kv = { + "NAMESPACE_FSNAME": fs_names[0], + "NAMESPACE_PAGE_FSNAME": fs_names[1], + "NAMESPACE_SHARE_FSNAME": fs_names[2], + "NAMESPACE_ARCHIVE_FSNAME": fs_names[3], + "LOG_VSTOR": "0", + "PAGE_VSTOR": "0", + "ARCHIVE_VSTOR": "0", + "IS_CONTAINER": "0", + "LINKE_TYPE": clusterInfo.link_type or "0", + "LOCAL_IP": local_ip, + "REMOTE_IP": clusterInfo.storage_vlan_ip, + "NODE_ID": str(node_id), + "DBS_LOG_PATH": "${GAUSSLOG}/dbstor" + } + + update_ini_file(cfg_file, kv, DefaultValue.KEY_FILE_MODE, logger) + + def _get_dbstor_home(self) -> str: + home = getattr(self.clusterInfo, "dbstor_home", "").strip() + if home: + return home + env_home = os.getenv("DBSTOR_HOME") + if env_home: + return env_home + raise Exception("dbstor_home not set and DBSTOR_HOME env not found.") + + def _get_node_id_and_local_ip(self): + hostname = socket.gethostname() + for idx, dbNode in enumerate(self.clusterInfo.dbNodes): + if dbNode.name == hostname: + local_ip = dbNode.extra.paramMap.get("cantian_vlan_ip", "") + return idx, local_ip + return 0, "" + + def install(self, nodeName: str = "", dbInitParams: str = ""): + pass + + def start(self): + pass + + def stop(self): + pass + + def uninstall(self): + pass + + def upgrade(self): + pass diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index 63f6e8a2..915fb886 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -426,20 +426,26 @@ class DN_OLAP(Kernel): pgrep_cmd = r"pgrep -f 'cantiand.*nomount'" shutdown_cmd = f"ctsql '/' as sysdba -q -D {pgdata} -c 'shutdown'" + kill_cmd = r"pgrep -f 'cantiand.*nomount' | xargs -r kill -9" if CmdUtil.retryGetstatusoutput(pgrep_cmd)[0] != 0: self.logger.debug("cantiand is not running, skip shutdown.") return - st, out = CmdUtil.retryGetstatusoutput(shutdown_cmd) - if st == 0: - self.logger.debug("Cantiand shutdown successfully.") - return - - self.logger.debug(f"Cantiand graceful shutdown failed, try force kill:\n{out}") + CmdUtil.retryGetstatusoutput(shutdown_cmd) - kill_cmd = r"pgrep -f 'cantiand.*nomount' | xargs -r kill -9" - CmdUtil.getstatusoutput_by_fast_popen(kill_cmd) + # 轮询等待 (≤ TIMEOUT_CLUSTER_QUERY) + waited = 0 + while waited < DefaultValue.TIMEOUT_CLUSTER_QUERY: + if CmdUtil.retryGetstatusoutput(pgrep_cmd)[0] != 0: + self.logger.debug("Cantiand shutdown gracefully.") + break + time.sleep(1) + waited += 1 + else: + # 超时后强制 kill + self.logger.debug("Graceful shutdown timeout, force killing Cantian.") + CmdUtil.getstatusoutput_by_fast_popen(kill_cmd) if CmdUtil.retryGetstatusoutput(pgrep_cmd)[0] == 0: raise Exception("Cantiand force kill failed: process still alive.") @@ -469,9 +475,6 @@ class DN_OLAP(Kernel): tmpDict["pgxc_node_name"] = "'%s'" % nodename self.setGucConfig(tmpDict) - if self.dss_mode: - raise Exception("Cantian 初始化暂不支持 DSS 共享存储模式。") - self.init_cantian() dnGucParas = self.getDnGUCDict() @@ -675,7 +678,7 @@ class DN_OLAP(Kernel): vg = self._resolve_dss_vg() self.logger.debug(f"[DSS] resolved private VG = {vg}") params["CONTROL_FILES"] = ( - f'"+{vg}/ctrl1, +{vg}/ctrl2, +{vg}/ctrl3"' + f'(+{vg}/ctrl1, +{vg}/ctrl2, +{vg}/ctrl3)' ) else: ctdb_home = os.getenv("CTDB_HOME") diff --git a/script/gspylib/component/MySql/mysql_comp.py b/script/gspylib/component/MySql/mysql_comp.py index 1308d0ed..df47da77 100644 --- a/script/gspylib/component/MySql/mysql_comp.py +++ b/script/gspylib/component/MySql/mysql_comp.py @@ -8,7 +8,7 @@ from base_utils.os.env_util import EnvUtil from base_utils.os.cmd_util import CmdUtil from base_utils.os.file_util import FileUtil from gspylib.component.BaseComponent import BaseComponent -from gspylib.component.Kernel.Kernel import Kernel +from gspylib.component.DSS.dss_comp import Dss, DssInst from gspylib.common.ErrorCode import ErrorCode from gspylib.common.DbClusterInfo import dbClusterInfo from domain_utils.cluster_os.cluster_user import UserUtil @@ -51,6 +51,15 @@ class Mysql(BaseComponent): return cluster.dbNodes[0].datanodes[0].datadir def initInstance(self): + # CM 场景通过CM初始化Mysql + cm_enabled = (getattr(self.clusterInfo, "cmsNum", 0) > 0 + and not self.clusterInfo.hasNoCm()) + if cm_enabled: + if self.logger: + self.logger.debug("CM detected (cmsNum=%d), skip MySQL init." % + self.clusterInfo.cmsNum) + return + enable_flag = os.getenv("ENABLE_MYSQL", "").strip().lower() # 单进程才需要自动拉起mysql,双进程需要手动拉起mysql single_flag = os.getenv("CANTIAN_MYSQL_SINGLE", "").strip().lower() diff --git a/script/local/Install.py b/script/local/Install.py index 2be44e90..0340e623 100644 --- a/script/local/Install.py +++ b/script/local/Install.py @@ -620,6 +620,76 @@ class Install(LocalBaseOM): FileUtil.removeDirectory(tmp_dir) self.logger.log(f"Deployed Mysql_server to {dest_mysql_dir}") + def decompress_dbstor_client(self): + """ + 前提: cluster.xml 中 enable_dbstor=on + 步骤: + 1. DBStor_Client_{arch}.tgz → 解到 $DBSTOR_HOME + 2. 里面的 dbstor_client-*.tgz → 再解到 $DBSTOR_HOME + 3. 里面的 Dbstor_Client_Test-*.tgz→ 解到 $DBSTOR_HOME/tools + 4. 删除所有 *.tgz + """ + if getattr(self.clusterInfo, "enable_dbstor", "off").lower() != "on": + self.logger.debug("enable_dbstor != on, skip DBStor_Client package.") + return + + server_pkg = os.path.basename(PackageInfo.getPackageFile(CommConstants.PKG_SERVER)) + if "x86_64" in server_pkg: + arch_tag = "x86_64" + elif "aarch64" in server_pkg: + arch_tag = "aarch64" + else: + raise Exception(f"Unsupported CPU arch in server package: {server_pkg}") + + gphome = EnvUtil.getEnvironmentParameterValue("GPHOME", self.user) + dbstor_pkg = "" + for fname in os.listdir(gphome): + if fname.startswith("DBStor_Client") and fname.endswith(".tgz") and arch_tag in fname: + dbstor_pkg = os.path.join(gphome, fname) + break + if not dbstor_pkg: + raise Exception(f"DBStor_Client package ({arch_tag}) not found in {gphome}") + self.logger.debug(f"[DBStor_Client] path = {dbstor_pkg}") + + dbstor_home = getattr(self.clusterInfo, "dbstor_home", "").strip() + if not dbstor_home: + raise Exception("dbstor_home is empty in cluster config.") + FileUtil.createDirectory(dbstor_home, True, DefaultValue.KEY_DIRECTORY_MODE) + + tar_cmd = f"tar -zxf '{dbstor_pkg}' -C '{dbstor_home}'" + self.logger.log(f"Decompressing DBStor_Client: {tar_cmd}") + status, output = subprocess.getstatusoutput(tar_cmd) + if status != 0: + raise Exception(f"Failed to decompress DBStor_Client.\n{output}") + FileUtil.removeFile(dbstor_pkg) + + tools_dir = os.path.join(dbstor_home, "tools") + FileUtil.createDirectory(tools_dir, True, DefaultValue.KEY_DIRECTORY_MODE) + FileUtil.createDirectory(os.path.join(dbstor_home, "ftds"), True, DefaultValue.KEY_DIRECTORY_MODE) + FileUtil.createDirectory(os.path.join(dbstor_home, "dbs"), True, DefaultValue.KEY_DIRECTORY_MODE) + FileUtil.createDirectory(os.path.join(dbstor_home, "infra"), True, DefaultValue.KEY_DIRECTORY_MODE) + + inner_pkgs = [f for f in os.listdir(dbstor_home) + if f.endswith("-dbstore.tgz")] + + if not inner_pkgs: + raise Exception("DBStor_Client package format error: no *-dbstore.tgz found.") + + for inner in inner_pkgs: + abs_path = os.path.join(dbstor_home, inner) + if inner.startswith("dbstor_client"): + cmd = f"tar -zxf '{abs_path}' -C '{dbstor_home}'" + else: + cmd = f"tar -zxf '{abs_path}' -C '{tools_dir}'" + self.logger.log(f"[DBStor] extracting {inner}") + status, output = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(f"Failed to extract {inner}: {output}") + FileUtil.removeFile(abs_path) + + FileUtil.changeOwner(self.user, dbstor_home, True) + self.logger.log(f"DBStor client deployed under {dbstor_home} (tools dir created, temp tgz removed)") + def generate_dss_path(self): """ Generate dss path @@ -697,6 +767,8 @@ class Install(LocalBaseOM): self.decompress_mysql_connector() # decompress Mysql_server package self.decompress_mysql_server() + # decompress DBStor client package + self.decompress_dbstor_client() # change owner for tar file. FileUtil.changeOwner(self.user, self.installPath, True) -- Gitee From 7d23b0f8e062f9a24c7303562c64a6194a5b6b00 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Wed, 23 Jul 2025 22:59:02 +0800 Subject: [PATCH 066/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../gspylib/component/Dbstor/dbstor_comp.py | 32 +++++++++- .../component/Kernel/DN_OLAP/DN_OLAP.py | 4 +- script/gspylib/component/MySql/mysql_comp.py | 8 +-- script/obtains_lsid | 58 +++++++++++++++++++ 4 files changed, 92 insertions(+), 10 deletions(-) create mode 100644 script/obtains_lsid diff --git a/script/gspylib/component/Dbstor/dbstor_comp.py b/script/gspylib/component/Dbstor/dbstor_comp.py index ca12659c..f09bb23b 100644 --- a/script/gspylib/component/Dbstor/dbstor_comp.py +++ b/script/gspylib/component/Dbstor/dbstor_comp.py @@ -2,12 +2,14 @@ # -*- coding:utf-8 -*- import os import socket +import re from typing import Dict from gspylib.component.BaseComponent import BaseComponent from gspylib.common.Common import DefaultValue from base_utils.os.file_util import FileUtil from base_utils.os.user_util import UserUtil +from base_utils.os.cmd_util import CmdUtil def update_ini_file(ini_path: str, kv: Dict[str, str], @@ -42,6 +44,25 @@ def update_ini_file(ini_path: str, kv: Dict[str, str], if logger: logger.debug(f"[update_ini_file] rewrite {ini_path} with {len(kv)} keys.") + +def calc_cluster_id(cluster_name: str) -> int: + """ + 根据 clusterName 计算 0-255 的 CLUSTER_ID。 + + 规则: + 1. 若名称结尾带数字 → 取该数字并对 256 取模 + 2. 若名称不带数字 → 把所有字符的 ASCII 码求和再对 256 取模 + + :param cluster_name: XML / static_config 中的 clusterName + :return: 0‒255 之间的整数 + """ + m = re.search(r"(\d+)$", cluster_name or "") + if m: + return int(m.group(1)) % 256 + + return sum(ord(c) for c in (cluster_name or "")) % 256 + + class Dbstor(BaseComponent): def __init__(self): super().__init__() @@ -74,12 +95,19 @@ class Dbstor(BaseComponent): def update_dbstor_config(self, clusterInfo, cfg_file: str, node_id: int, local_ip: str, logger=None) -> None: """ - 根据 clusterInfo 组装 13 个键并写入 cfg_file + 写入 dbstor_config.ini """ fs_names = clusterInfo.dbstor_fs.split(";") if len(fs_names) != 4: raise Exception(f"dbstor_fs must have 4 names. Got: {clusterInfo.dbstor_fs}") + cluster_id = calc_cluster_id(clusterInfo.clusterName) + + st, out = CmdUtil.retryGetstatusoutput("uuidgen") + if st != 0: + raise Exception(f"generate DPU_UUID failed:\n{out}") + dpu_uuid = out.strip() + kv = { "NAMESPACE_FSNAME": fs_names[0], "NAMESPACE_PAGE_FSNAME": fs_names[1], @@ -93,6 +121,8 @@ class Dbstor(BaseComponent): "LOCAL_IP": local_ip, "REMOTE_IP": clusterInfo.storage_vlan_ip, "NODE_ID": str(node_id), + "CLUSTER_ID": str(cluster_id), + "DPU_UUID": dpu_uuid, "DBS_LOG_PATH": "${GAUSSLOG}/dbstor" } diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index 915fb886..623a47f8 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -654,7 +654,7 @@ class DN_OLAP(Kernel): cantian_dict = { "LSNR_ADDR": self.instInfo.listenIps[0], "LSNR_PORT": str(self.instInfo.port), - "SS_INSTANCE_ID": self.calc_ss_instance_id(), + "DTC_INSTANCE_ID": self.calc_ss_instance_id(), "MYSQL_METADATA_IN_CANTIAN": meta_flag, "MYSQL_DEPLOY_GROUP_ID": str(gid), "SHARED_PATH": shared_path, @@ -672,7 +672,7 @@ class DN_OLAP(Kernel): return cantian_dict def getDssCantianDict(self) -> dict: - params = {"SS_INTERCONNECT_URL": self.get_ss_inter_url()} + params = {"DTC_INTERCONNECT_URL": self.get_ss_inter_url()} if self.dss_mode: vg = self._resolve_dss_vg() diff --git a/script/gspylib/component/MySql/mysql_comp.py b/script/gspylib/component/MySql/mysql_comp.py index df47da77..b05be15a 100644 --- a/script/gspylib/component/MySql/mysql_comp.py +++ b/script/gspylib/component/MySql/mysql_comp.py @@ -52,13 +52,7 @@ class Mysql(BaseComponent): def initInstance(self): # CM 场景通过CM初始化Mysql - cm_enabled = (getattr(self.clusterInfo, "cmsNum", 0) > 0 - and not self.clusterInfo.hasNoCm()) - if cm_enabled: - if self.logger: - self.logger.debug("CM detected (cmsNum=%d), skip MySQL init." % - self.clusterInfo.cmsNum) - return + return enable_flag = os.getenv("ENABLE_MYSQL", "").strip().lower() # 单进程才需要自动拉起mysql,双进程需要手动拉起mysql diff --git a/script/obtains_lsid b/script/obtains_lsid new file mode 100644 index 00000000..4ef00962 --- /dev/null +++ b/script/obtains_lsid @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +import hashlib +import sys +import uuid +import random + + + +class LSIDGenerate(object): + def __init__(self, + n_type: int, + cluster_id: int, + cluster_name: str, + process_id: int, + node_id: int): + self.n_type = int(n_type) + self.cluster_id = int(cluster_id) + self.cluster_name = str(cluster_name) + self.process_id = int(process_id) + self.node_id = int(node_id) + self.random_seed = -1 + + @staticmethod + def generate_uuid(n_type, c_id, c_random, p_id, n_id): + _id = str(n_type) + str(c_id) + str(c_random) + str(n_id) + str(p_id) + return str(uuid.uuid3(uuid.NAMESPACE_DNS, _id)) + + @staticmethod + def generate_random_seed(name: str) -> int: + digest = int(hashlib.sha256(name.encode("utf-8")).hexdigest(), 16) + random.seed(digest) + return random.randint(0, 255) + + def generate_lsid(self): + # 返回lsid十六进制 + return int(str(bin(self.n_type))[2:].rjust(2, "0") + + str(bin(3))[2:].rjust(2, "0") + + str(bin(self.cluster_id))[2:].rjust(8, "0") + + str(bin(self.random_seed))[2:].rjust(8, "0") + + str(bin(self.process_id))[2:].rjust(4, "0") + + str(bin(self.node_id))[2:].rjust(8, "0"), 2) + + def execute(self): + self.random_seed = self.generate_random_seed(self.cluster_name) + process_uuid = self.generate_uuid(self.n_type, self.cluster_id, self.random_seed, self.process_id, self.node_id) + ls_id = self.generate_lsid() + return ls_id, process_uuid + + +if __name__ == "__main__": + node_type = int(sys.argv[1]) + cluster_id = int(sys.argv[2]) + cluster_name = sys.argv[3] + process_id = int(sys.argv[4]) + node_id = int(sys.argv[5]) + id_generate = LSIDGenerate(node_type, cluster_id, cluster_name, process_id, node_id) + print("%s\n%s" % id_generate.execute()) -- Gitee From 20ea98df602acbe3151bbfa4865bd8fa84d602e1 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 24 Jul 2025 14:32:29 +0800 Subject: [PATCH 067/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../gspylib/component/Dbstor/dbstor_comp.py | 47 ++++++++++++++++--- script/local/Install.py | 1 + script/obtains_lsid | 32 ++++++------- 3 files changed, 55 insertions(+), 25 deletions(-) diff --git a/script/gspylib/component/Dbstor/dbstor_comp.py b/script/gspylib/component/Dbstor/dbstor_comp.py index f09bb23b..b2dc25ec 100644 --- a/script/gspylib/component/Dbstor/dbstor_comp.py +++ b/script/gspylib/component/Dbstor/dbstor_comp.py @@ -3,7 +3,8 @@ import os import socket import re -from typing import Dict +import shutil +from typing import Dict, Tuple from gspylib.component.BaseComponent import BaseComponent from gspylib.common.Common import DefaultValue @@ -41,8 +42,18 @@ def update_ini_file(ini_path: str, kv: Dict[str, str], FileUtil.writeFile(ini_path, result, "w") FileUtil.changeMode(file_mode, ini_path) - if logger: - logger.debug(f"[update_ini_file] rewrite {ini_path} with {len(kv)} keys.") + logger and logger.debug(f"[update_ini_file] rewrite {ini_path} with {len(kv)} keys.") + + +def run_obtains_lsid(cluster_id: int, node_id: int, node_type: int, proc_id: int) -> Tuple[str, str]: + cmd = f"obtains_lsid {node_type} {cluster_id} {proc_id} {node_id}" + st, out = CmdUtil.retryGetstatusoutput(cmd) + if st != 0: + raise Exception(f"obtains_lsid failed: {out}") + lines = out.strip().splitlines() + if len(lines) != 2: + raise Exception("obtains_lsid output format error") + return lines[0].strip(), lines[1].strip() def calc_cluster_id(cluster_name: str) -> int: @@ -86,11 +97,33 @@ class Dbstor(BaseComponent): node_id, local_ip = self._get_node_id_and_local_ip() self.update_dbstor_config(self.clusterInfo, cfg_file, node_id, local_ip, self.logger) - user = UserUtil.getUserInfo()["name"] - FileUtil.changeOwner(user, cfg_file, False) + # 生成 4 份 tool_x.ini 配置 + self._generate_tool_configs(cfg_dir, cfg_file, node_id) + + FileUtil.changeOwner(UserUtil.getUserInfo()["name"], cfg_dir, True) + self.logger and self.logger.log(f"DBStor config prepared at {cfg_dir}") + + def _generate_tool_configs(self, cfg_dir: str, base_cfg: str, node_id: int): + cluster_name = os.getenv("GS_CLUSTER_NAME", "") + cluster_id = calc_cluster_id(cluster_name) + + _, cluster_uuid = run_obtains_lsid(cluster_id, node_id, 0, 0) + + for idx, tool_proc in enumerate(range(7, 11), start=1): + inst_id, tool_uuid = run_obtains_lsid(cluster_id, node_id, 2, tool_proc) + + dst = os.path.join(cfg_dir, f"dbstor_config_tool_{idx}.ini") + shutil.copy2(base_cfg, dst) - if self.logger: - self.logger.log(f"DBStor config prepared at {cfg_file}") + extra_kv = { + "DBSTOR_OWNER_NAME": "dbstor", + "CLUSTER_NAME": cluster_name, + "CLUSTER_UUID": cluster_uuid, + "INST_ID": inst_id, + "TOOL_UUID": tool_uuid + } + update_ini_file(dst, extra_kv, DefaultValue.KEY_FILE_MODE, self.logger) + self.logger and self.logger.debug(f"Generate tool config {dst}") def update_dbstor_config(self, clusterInfo, cfg_file: str, node_id: int, local_ip: str, logger=None) -> None: diff --git a/script/local/Install.py b/script/local/Install.py index 0340e623..fe3ac447 100644 --- a/script/local/Install.py +++ b/script/local/Install.py @@ -668,6 +668,7 @@ class Install(LocalBaseOM): FileUtil.createDirectory(os.path.join(dbstor_home, "ftds"), True, DefaultValue.KEY_DIRECTORY_MODE) FileUtil.createDirectory(os.path.join(dbstor_home, "dbs"), True, DefaultValue.KEY_DIRECTORY_MODE) FileUtil.createDirectory(os.path.join(dbstor_home, "infra"), True, DefaultValue.KEY_DIRECTORY_MODE) + FileUtil.createDirectory(os.path.join(dbstor_home, "kmc"), True, DefaultValue.KEY_DIRECTORY_MODE) inner_pkgs = [f for f in os.listdir(dbstor_home) if f.endswith("-dbstore.tgz")] diff --git a/script/obtains_lsid b/script/obtains_lsid index 4ef00962..9a596f4e 100644 --- a/script/obtains_lsid +++ b/script/obtains_lsid @@ -2,24 +2,20 @@ # -*- coding:utf-8 -*- import hashlib import sys +import os import uuid import random class LSIDGenerate(object): - def __init__(self, - n_type: int, - cluster_id: int, - cluster_name: str, - process_id: int, - node_id: int): + def __init__(self, n_type, c_id, p_id, n_id): self.n_type = int(n_type) - self.cluster_id = int(cluster_id) - self.cluster_name = str(cluster_name) - self.process_id = int(process_id) - self.node_id = int(node_id) + self.process_id = int(p_id) + self.cluster_id = int(c_id) + self.node_id = int(n_id) self.random_seed = -1 + self.info = {} @staticmethod def generate_uuid(n_type, c_id, c_random, p_id, n_id): @@ -27,9 +23,10 @@ class LSIDGenerate(object): return str(uuid.uuid3(uuid.NAMESPACE_DNS, _id)) @staticmethod - def generate_random_seed(name: str) -> int: - digest = int(hashlib.sha256(name.encode("utf-8")).hexdigest(), 16) - random.seed(digest) + def generate_random_seed(): + cluster_name = os.getenv("GS_CLUSTER_NAME", "") + hash_object = int(hashlib.sha256(cluster_name.encode('utf-8')).hexdigest(), 16) + random.seed(hash_object) return random.randint(0, 255) def generate_lsid(self): @@ -42,7 +39,7 @@ class LSIDGenerate(object): + str(bin(self.node_id))[2:].rjust(8, "0"), 2) def execute(self): - self.random_seed = self.generate_random_seed(self.cluster_name) + self.random_seed = self.generate_random_seed() process_uuid = self.generate_uuid(self.n_type, self.cluster_id, self.random_seed, self.process_id, self.node_id) ls_id = self.generate_lsid() return ls_id, process_uuid @@ -51,8 +48,7 @@ class LSIDGenerate(object): if __name__ == "__main__": node_type = int(sys.argv[1]) cluster_id = int(sys.argv[2]) - cluster_name = sys.argv[3] - process_id = int(sys.argv[4]) - node_id = int(sys.argv[5]) - id_generate = LSIDGenerate(node_type, cluster_id, cluster_name, process_id, node_id) + process_id = int(sys.argv[3]) + node_id = int(sys.argv[4]) + id_generate = LSIDGenerate(node_type, cluster_id, process_id, node_id) print("%s\n%s" % id_generate.execute()) -- Gitee From 5c93402862b0d22e6a645a4873ba553f80958b74 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 24 Jul 2025 19:58:47 +0800 Subject: [PATCH 068/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../gspylib/component/Dbstor/dbstor_comp.py | 77 ++++++++++- .../component/Kernel/DN_OLAP/DN_OLAP.py | 11 ++ script/local/PreInstallUtility.py | 121 +++++++++++++----- 3 files changed, 171 insertions(+), 38 deletions(-) diff --git a/script/gspylib/component/Dbstor/dbstor_comp.py b/script/gspylib/component/Dbstor/dbstor_comp.py index b2dc25ec..b1091a60 100644 --- a/script/gspylib/component/Dbstor/dbstor_comp.py +++ b/script/gspylib/component/Dbstor/dbstor_comp.py @@ -11,6 +11,7 @@ from gspylib.common.Common import DefaultValue from base_utils.os.file_util import FileUtil from base_utils.os.user_util import UserUtil from base_utils.os.cmd_util import CmdUtil +from base_utils.os.env_util import EnvUtil def update_ini_file(ini_path: str, kv: Dict[str, str], @@ -85,7 +86,7 @@ class Dbstor(BaseComponent): return dbstor_home = self._get_dbstor_home() - cfg_dir = os.path.join(dbstor_home, "conf", "dbs") + cfg_dir = os.path.join(dbstor_home, "conf", "dbs") cfg_file = os.path.join(cfg_dir, "dbstor_config.ini") FileUtil.createDirectory(cfg_dir, DefaultValue.KEY_DIRECTORY_MODE) @@ -100,6 +101,9 @@ class Dbstor(BaseComponent): # 生成 4 份 tool_x.ini 配置 self._generate_tool_configs(cfg_dir, cfg_file, node_id) + self._sync_link_type_resources(dbstor_home) + self._sync_cantian_dbstor_conf(dbstor_home) + FileUtil.changeOwner(UserUtil.getUserInfo()["name"], cfg_dir, True) self.logger and self.logger.log(f"DBStor config prepared at {cfg_dir}") @@ -111,9 +115,8 @@ class Dbstor(BaseComponent): for idx, tool_proc in enumerate(range(7, 11), start=1): inst_id, tool_uuid = run_obtains_lsid(cluster_id, node_id, 2, tool_proc) - dst = os.path.join(cfg_dir, f"dbstor_config_tool_{idx}.ini") - shutil.copy2(base_cfg, dst) + FileUtil.copyFile(base_cfg, dst) extra_kv = { "DBSTOR_OWNER_NAME": "dbstor", @@ -125,6 +128,74 @@ class Dbstor(BaseComponent): update_ini_file(dst, extra_kv, DefaultValue.KEY_FILE_MODE, self.logger) self.logger and self.logger.debug(f"Generate tool config {dst}") + def _sync_link_type_resources(self, dbstor_home: str): + link_type = os.getenv("LINK_TYPE", "0").strip() + if link_type not in ("0", "1"): + link_type = "0" + + lib_subdir = "nomlnx" if link_type == "0" else "mlnx" + src_lib = os.path.join(dbstor_home, "lib", lib_subdir, "libxnetlite.so") + dst_lib = os.path.join(dbstor_home, "lib", "libxnetlite.so") + if not os.path.isfile(src_lib): + raise Exception(f"Expected lib not found: {src_lib}") + FileUtil.copyFile(src_lib, dst_lib) + + gauss_home = EnvUtil.getEnv("GAUSSHOME") + if not gauss_home: + raise Exception("Env GAUSSHOME not set") + cfg_name = "node_config_tcp.xml" if link_type == "0" else "node_config_rdma.xml" + src_xml = os.path.join(gauss_home, "share", "dbstor", cfg_name) + infra_dir = os.path.join(dbstor_home, "conf", "infra") + FileUtil.createDirectory(infra_dir, DefaultValue.KEY_DIRECTORY_MODE) + dst_xml = os.path.join(infra_dir, "node_config.xml") + if not os.path.isfile(src_xml): + raise Exception(f"Expected config not found: {src_xml}") + FileUtil.copyFile(src_xml, dst_xml) + + self.logger and self.logger.debug( + f"LINK_TYPE={link_type}: copied {src_lib} → {dst_lib}, {src_xml} → {dst_xml}" + ) + + def _sync_cantian_dbstor_conf(self, dbstor_home: str): + """ + 将 DBSTOR_HOME/conf/{dbs,infra} 复制到CTDB_HOME/dbstor/conf/ + """ + ctdb_home = os.getenv("CTDB_HOME") + if not ctdb_home: + self.logger and self.logger.debug("CTDB_HOME not set, skip cantian dbstor conf sync.") + return + + src_conf = os.path.join(dbstor_home, "conf") + dst_base = os.path.join(ctdb_home, "dbstor") + dst_conf = os.path.join(dst_base, "conf") + dst_dbs = os.path.join(dst_conf, "dbs") + dst_infra = os.path.join(dst_conf, "infra") + + for path in (dst_base, dst_conf, dst_dbs, dst_infra): + FileUtil.createDirectory(path, DefaultValue.KEY_DIRECTORY_MODE) + + # 复制 dbstor_config.ini + src_ini = os.path.join(src_conf, "dbs", "dbstor_config.ini") + dst_ini = os.path.join(dst_dbs, "dbstor_config.ini") + FileUtil.copyFile(src_ini, dst_ini) + + update_ini_file(dst_ini, + {"DBSTOR_OWNER_NAME": "cantian"}, + DefaultValue.KEY_FILE_MODE, + self.logger) + + # 递归复制 infra 目录 + src_infra = os.path.join(src_conf, "infra") + for root, _, files in os.walk(src_infra): + rel = os.path.relpath(root, src_infra) + tgt_dir = os.path.join(dst_infra, rel) if rel != "." else dst_infra + FileUtil.createDirectory(tgt_dir, DefaultValue.KEY_DIRECTORY_MODE) + for f in files: + FileUtil.copyFile(os.path.join(root, f), + os.path.join(tgt_dir, f)) + + self.logger and self.logger.debug(f"Synced Cantian dbstor conf → {dst_conf}") + def update_dbstor_config(self, clusterInfo, cfg_file: str, node_id: int, local_ip: str, logger=None) -> None: """ diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index 623a47f8..da552c6b 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -664,6 +664,17 @@ class DN_OLAP(Kernel): if self.dss_mode: cantian_dict["ENABLE_DSS"] = "TRUE" + # Enable dbstor + if os.getenv("ENABLE_DBSTOR", "off").lower() == "on": + cantian_dict.update({ + "DBSTOR_DEPLOY_MODE": "1", + "ENABLE_DBSTOR": "TRUE", + "DBSTOR_NAMESPACE": os.getenv("GS_CLUSTER_NAME", ""), + "SHARED_PATH": "-", + "_DOUBLEWITE": "FALSE", + "ENABLE_DBSTOR_BATCH_FLUSH": "TRUE", + }) + # 使用 NFS 需要加上FULLDIRECTIO参数,否则可能出现数据不一致的问题 share_fs = os.getenv("SHARE_FS", "").strip() if share_fs and not share_fs.startswith("127.0.0.1"): diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py index 96f65f9e..3a3a4624 100644 --- a/script/local/PreInstallUtility.py +++ b/script/local/PreInstallUtility.py @@ -1810,46 +1810,97 @@ Common options: # set PYTHONPATH FileUtil.writeFile(userProfile, ["export PYTHONPATH=$GPHOME/lib"]) - share_fs_env = ClusterConfigFile.getOneClusterConfigItem( - "share_fs", self.clusterConfig).strip() - enable_mysql_flag = ClusterConfigFile.getOneClusterConfigItem( - "enable_mysql", self.clusterConfig).strip().lower() or "off" - mysql_meta_flag = ClusterConfigFile.getOneClusterConfigItem( - "mysql_metadata_in_cantian", self.clusterConfig).strip().lower() or "on" - - if share_fs_env: - for prof in (userProfile, self.user_env_file): - FileUtil.deleteLine(prof, r"^\s*export\s*SHARE_FS=.*$") - FileUtil.writeFile(prof, [f"export SHARE_FS={share_fs_env}"]) - self.logger.debug(f"Successfully flush 'export SHARE_FS={share_fs_env}'") - else: - self.logger.debug("share_fs is default or empty; skip 'export SHARE_FS'") - - if enable_mysql_flag == "on": - mysql_home = os.path.join(os.path.dirname(self.clusterToolPath), "mysql") + self._set_share_fs_env(userProfile) + self._set_enable_mysql_env(userProfile) + self._set_enable_dbstor_env(userProfile) - for prof in (userProfile, self.user_env_file): - FileUtil.deleteLine(prof, r"^\s*export\s*ENABLE_MYSQL=.*$") - FileUtil.deleteLine(prof, r"^\s*export\s*MYSQL_METADATA_IN_CANTIAN=.*$") - FileUtil.deleteLine(prof, r"^\s*export\s*MYSQL_HOME=.*$") - FileUtil.deleteLine(prof, r"^\s*export\s*PATH=.*\$MYSQL_HOME[^ ]*bin.*$") - - FileUtil.writeFile(prof, [ - "export ENABLE_MYSQL=on", - f"export MYSQL_METADATA_IN_CANTIAN={mysql_meta_flag}", - f"export MYSQL_HOME={mysql_home}", - r"export PATH=$MYSQL_HOME/bin:$PATH" - ]) - self.logger.debug( - f"Flush ENABLE_MYSQL=on MYSQL_METADATA_IN_CANTIAN={mysql_meta_flag} " - f"MYSQL_HOME={mysql_home}" - ) - else: - self.logger.debug("[MySQL] enable_mysql=off; skip MySQL env export") except Exception as e: self.logger.logExit(str(e)) self.logger.debug("Successfully set tool ENV.") + def _set_share_fs_env(self, userProfile: str) -> None: + share_fs_env = ClusterConfigFile.getOneClusterConfigItem( + "share_fs", self.clusterConfig).strip() + + if not share_fs_env: + self.logger.debug("share_fs is default or empty; skip 'export SHARE_FS'") + return + + for prof in (userProfile, self.user_env_file): + FileUtil.deleteLine(prof, r"^\s*export\s*SHARE_FS=.*$") + FileUtil.writeFile(prof, [f"export SHARE_FS={share_fs_env}"]) + self.logger.debug(f"Flush SHARE_FS={share_fs_env}") + + def _set_enable_mysql_env(self, userProfile: str) -> None: + enable_mysql_flag = ClusterConfigFile.getOneClusterConfigItem( + "enable_mysql", self.clusterConfig).strip().lower() or "off" + if enable_mysql_flag != "on": + self.logger.debug("[MySQL] enable_mysql=off; skip MySQL env export") + return + + mysql_meta_flag = ClusterConfigFile.getOneClusterConfigItem( + "mysql_metadata_in_cantian", self.clusterConfig).strip().lower() or "on" + mysql_home = os.path.join(os.path.dirname(self.clusterToolPath), "mysql") + + for prof in (userProfile, self.user_env_file): + FileUtil.deleteLine(prof, r"^\s*export\s*ENABLE_MYSQL=.*$") + FileUtil.deleteLine(prof, r"^\s*export\s*MYSQL_METADATA_IN_CANTIAN=.*$") + FileUtil.deleteLine(prof, r"^\s*export\s*MYSQL_HOME=.*$") + FileUtil.deleteLine(prof, r"^\s*export\s*PATH=.*\$MYSQL_HOME[^ ]*bin.*$") + + FileUtil.writeFile(prof, [ + "export ENABLE_MYSQL=on", + f"export MYSQL_METADATA_IN_CANTIAN={mysql_meta_flag}", + f"export MYSQL_HOME={mysql_home}", + r"export PATH=$MYSQL_HOME/bin:$PATH" + ]) + self.logger.debug( + f"Flush ENABLE_MYSQL=on MYSQL_METADATA_IN_CANTIAN={mysql_meta_flag} " + f"MYSQL_HOME={mysql_home}" + ) + + def _set_dbstor_env(self, userProfile: str) -> None: + + enable_dbstor_flag = ClusterConfigFile.getOneClusterConfigItem( + "enable_dbstor", self.clusterConfig).strip().lower() or "off" + + if enable_dbstor_flag == "on": + link_type = ClusterConfigFile.getOneClusterConfigItem( + "link_type", self.clusterConfig).strip() or "0" + storage_vlan_ip = ClusterConfigFile.getOneClusterConfigItem( + "storage_vlan_ip", self.clusterConfig).strip() + + for prof in (userProfile, self.user_env_file): + FileUtil.deleteLine(prof, r"^\s*export\s*ENABLE_DBSTOR=.*$") + FileUtil.deleteLine(prof, r"^\s*export\s*LINK_TYPE=.*$") + FileUtil.deleteLine(prof, r"^\s*export\s*STORAGE_VLAN_IP=.*$") + + FileUtil.writeFile(prof, [ + "export ENABLE_DBSTOR=on", + f"export LINK_TYPE={link_type}", + f"export STORAGE_VLAN_IP={storage_vlan_ip}" + ]) + self.logger.debug( + f"Flush ENABLE_DBSTOR=on LINK_TYPE={link_type} STORAGE_VLAN_IP={storage_vlan_ip}" + ) + else: + self.logger.debug("[DBStor] enable_dbstor=off; skip DBStor env export") + + # Cantian VLAN IP + host = NetUtil.GetHostIpOrName() + dbNode = self.clusterInfo.getDbNodeByName(host) + if dbNode: + vlan_ip = dbNode.extra.paramMap.get("cantian_vlan_ip", "").strip() + if vlan_ip: + for prof in (userProfile, self.user_env_file): + FileUtil.deleteLine(prof, r"^\s*export\s*CANTIAN_VLAN_IP=.*$") + FileUtil.writeFile(prof, [f"export CANTIAN_VLAN_IP={vlan_ip}"]) + self.logger.debug(f"Flush CANTIAN_VLAN_IP={vlan_ip}") + else: + self.logger.debug("cantian_vlan_ip empty; skip env export.") + else: + self.logger.debug("Current host not found in DEVICELIST; skip cantian_vlan_ip env.") + def setLibrary(self): """ function: Setting Library -- Gitee From 510936ede40520e9f275a6461b655a9ea126116b Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 24 Jul 2025 20:21:53 +0800 Subject: [PATCH 069/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../gspylib/component/Dbstor/dbstor_comp.py | 156 +++++++----------- 1 file changed, 62 insertions(+), 94 deletions(-) diff --git a/script/gspylib/component/Dbstor/dbstor_comp.py b/script/gspylib/component/Dbstor/dbstor_comp.py index b1091a60..1cf18556 100644 --- a/script/gspylib/component/Dbstor/dbstor_comp.py +++ b/script/gspylib/component/Dbstor/dbstor_comp.py @@ -1,9 +1,7 @@ # gspylib/component/DBSTOR/dbstor_comp.py # -*- coding:utf-8 -*- import os -import socket import re -import shutil from typing import Dict, Tuple from gspylib.component.BaseComponent import BaseComponent @@ -14,6 +12,7 @@ from base_utils.os.cmd_util import CmdUtil from base_utils.os.env_util import EnvUtil + def update_ini_file(ini_path: str, kv: Dict[str, str], file_mode: str = DefaultValue.KEY_FILE_MODE, logger=None) -> None: @@ -46,7 +45,8 @@ def update_ini_file(ini_path: str, kv: Dict[str, str], logger and logger.debug(f"[update_ini_file] rewrite {ini_path} with {len(kv)} keys.") -def run_obtains_lsid(cluster_id: int, node_id: int, node_type: int, proc_id: int) -> Tuple[str, str]: +def run_obtains_lsid(cluster_id: int, node_id: int, + node_type: int, proc_id: int) -> Tuple[str, str]: cmd = f"obtains_lsid {node_type} {cluster_id} {proc_id} {node_id}" st, out = CmdUtil.retryGetstatusoutput(cmd) if st != 0: @@ -76,16 +76,12 @@ def calc_cluster_id(cluster_name: str) -> int: class Dbstor(BaseComponent): - def __init__(self): - super().__init__() - def initInstance(self): - if getattr(self.clusterInfo, "enable_dbstor", "off").lower() != "on": - if self.logger: - self.logger.debug("enable_dbstor != on, skip dbstor component init.") + if os.getenv("ENABLE_DBSTOR", "off").lower() != "on": + self.logger and self.logger.debug("ENABLE_DBSTOR!=on, skip dbstor init.") return - dbstor_home = self._get_dbstor_home() + dbstor_home = os.getenv("DBSTOR_HOME", "").strip() cfg_dir = os.path.join(dbstor_home, "conf", "dbs") cfg_file = os.path.join(cfg_dir, "dbstor_config.ini") @@ -95,11 +91,8 @@ class Dbstor(BaseComponent): FileUtil.createFile(cfg_file) FileUtil.writeFile(cfg_file, ["[CLIENT]", ""], "w") - node_id, local_ip = self._get_node_id_and_local_ip() - self.update_dbstor_config(self.clusterInfo, cfg_file, node_id, local_ip, self.logger) - - # 生成 4 份 tool_x.ini 配置 - self._generate_tool_configs(cfg_dir, cfg_file, node_id) + self._update_dbstor_config(cfg_file) + self._generate_tool_configs(cfg_dir, cfg_file) self._sync_link_type_resources(dbstor_home) self._sync_cantian_dbstor_conf(dbstor_home) @@ -107,9 +100,10 @@ class Dbstor(BaseComponent): FileUtil.changeOwner(UserUtil.getUserInfo()["name"], cfg_dir, True) self.logger and self.logger.log(f"DBStor config prepared at {cfg_dir}") - def _generate_tool_configs(self, cfg_dir: str, base_cfg: str, node_id: int): + def _generate_tool_configs(self, cfg_dir: str, base_cfg: str): cluster_name = os.getenv("GS_CLUSTER_NAME", "") cluster_id = calc_cluster_id(cluster_name) + node_id = int(os.getenv("NODE_ID", "0")) _, cluster_uuid = run_obtains_lsid(cluster_id, node_id, 0, 0) @@ -120,10 +114,10 @@ class Dbstor(BaseComponent): extra_kv = { "DBSTOR_OWNER_NAME": "dbstor", - "CLUSTER_NAME": cluster_name, - "CLUSTER_UUID": cluster_uuid, - "INST_ID": inst_id, - "TOOL_UUID": tool_uuid + "CLUSTER_NAME": cluster_name, + "CLUSTER_UUID": cluster_uuid, + "INST_ID": inst_id, + "TOOL_UUID": tool_uuid } update_ini_file(dst, extra_kv, DefaultValue.KEY_FILE_MODE, self.logger) self.logger and self.logger.debug(f"Generate tool config {dst}") @@ -133,8 +127,11 @@ class Dbstor(BaseComponent): if link_type not in ("0", "1"): link_type = "0" - lib_subdir = "nomlnx" if link_type == "0" else "mlnx" - src_lib = os.path.join(dbstor_home, "lib", lib_subdir, "libxnetlite.so") + src_lib = os.path.join( + dbstor_home, "lib", + "nomlnx" if link_type == "0" else "mlnx", + "libxnetlite.so" + ) dst_lib = os.path.join(dbstor_home, "lib", "libxnetlite.so") if not os.path.isfile(src_lib): raise Exception(f"Expected lib not found: {src_lib}") @@ -142,7 +139,7 @@ class Dbstor(BaseComponent): gauss_home = EnvUtil.getEnv("GAUSSHOME") if not gauss_home: - raise Exception("Env GAUSSHOME not set") + raise Exception("GAUSSHOME env not set") cfg_name = "node_config_tcp.xml" if link_type == "0" else "node_config_rdma.xml" src_xml = os.path.join(gauss_home, "share", "dbstor", cfg_name) infra_dir = os.path.join(dbstor_home, "conf", "infra") @@ -157,34 +154,26 @@ class Dbstor(BaseComponent): ) def _sync_cantian_dbstor_conf(self, dbstor_home: str): - """ - 将 DBSTOR_HOME/conf/{dbs,infra} 复制到CTDB_HOME/dbstor/conf/ - """ ctdb_home = os.getenv("CTDB_HOME") if not ctdb_home: - self.logger and self.logger.debug("CTDB_HOME not set, skip cantian dbstor conf sync.") + self.logger and self.logger.debug("CTDB_HOME not set; skip cantian sync.") return src_conf = os.path.join(dbstor_home, "conf") dst_base = os.path.join(ctdb_home, "dbstor") dst_conf = os.path.join(dst_base, "conf") - dst_dbs = os.path.join(dst_conf, "dbs") + dst_dbs = os.path.join(dst_conf, "dbs") dst_infra = os.path.join(dst_conf, "infra") - for path in (dst_base, dst_conf, dst_dbs, dst_infra): - FileUtil.createDirectory(path, DefaultValue.KEY_DIRECTORY_MODE) - - # 复制 dbstor_config.ini - src_ini = os.path.join(src_conf, "dbs", "dbstor_config.ini") - dst_ini = os.path.join(dst_dbs, "dbstor_config.ini") - FileUtil.copyFile(src_ini, dst_ini) + for p in (dst_base, dst_conf, dst_dbs, dst_infra): + FileUtil.createDirectory(p, DefaultValue.KEY_DIRECTORY_MODE) - update_ini_file(dst_ini, + FileUtil.copyFile(os.path.join(src_conf, "dbs", "dbstor_config.ini"), + os.path.join(dst_dbs, "dbstor_config.ini")) + update_ini_file(os.path.join(dst_dbs, "dbstor_config.ini"), {"DBSTOR_OWNER_NAME": "cantian"}, - DefaultValue.KEY_FILE_MODE, - self.logger) + DefaultValue.KEY_FILE_MODE, self.logger) - # 递归复制 infra 目录 src_infra = os.path.join(src_conf, "infra") for root, _, files in os.walk(src_infra): rel = os.path.relpath(root, src_infra) @@ -196,70 +185,49 @@ class Dbstor(BaseComponent): self.logger and self.logger.debug(f"Synced Cantian dbstor conf → {dst_conf}") - def update_dbstor_config(self, clusterInfo, cfg_file: str, - node_id: int, local_ip: str, logger=None) -> None: - """ - 写入 dbstor_config.ini - """ - fs_names = clusterInfo.dbstor_fs.split(";") + def _update_dbstor_config(self, cfg_file: str) -> None: + + fs_names = os.getenv( + "DBSTOR_FS", + "log_fs;page_fs;share_fs;archive_fs" + ).split(";") if len(fs_names) != 4: - raise Exception(f"dbstor_fs must have 4 names. Got: {clusterInfo.dbstor_fs}") + raise Exception("DBSTOR_FS must have 4 names separated by ';'") - cluster_id = calc_cluster_id(clusterInfo.clusterName) + cluster_name = os.getenv("GS_CLUSTER_NAME", "") + cluster_id = calc_cluster_id(cluster_name) + link_type = os.getenv("LINK_TYPE", "0").strip() + storage_ip = os.getenv("STORAGE_VLAN_IP", "") + node_id = int(os.getenv("NODE_ID", "0")) + local_ip = os.getenv("CANTIAN_VLAN_IP", "") - st, out = CmdUtil.retryGetstatusoutput("uuidgen") - if st != 0: - raise Exception(f"generate DPU_UUID failed:\n{out}") - dpu_uuid = out.strip() + dpu_uuid = CmdUtil.retryGetstatusoutput("uuidgen")[1].strip() kv = { - "NAMESPACE_FSNAME": fs_names[0], - "NAMESPACE_PAGE_FSNAME": fs_names[1], - "NAMESPACE_SHARE_FSNAME": fs_names[2], + "NAMESPACE_FSNAME": fs_names[0], + "NAMESPACE_PAGE_FSNAME": fs_names[1], + "NAMESPACE_SHARE_FSNAME": fs_names[2], "NAMESPACE_ARCHIVE_FSNAME": fs_names[3], - "LOG_VSTOR": "0", - "PAGE_VSTOR": "0", - "ARCHIVE_VSTOR": "0", - "IS_CONTAINER": "0", - "LINKE_TYPE": clusterInfo.link_type or "0", - "LOCAL_IP": local_ip, - "REMOTE_IP": clusterInfo.storage_vlan_ip, - "NODE_ID": str(node_id), - "CLUSTER_ID": str(cluster_id), - "DPU_UUID": dpu_uuid, - "DBS_LOG_PATH": "${GAUSSLOG}/dbstor" + "LOG_VSTOR": "0", + "PAGE_VSTOR": "0", + "ARCHIVE_VSTOR": "0", + "IS_CONTAINER": "0", + "LINKE_TYPE": link_type, + "LOCAL_IP": local_ip, + "REMOTE_IP": storage_ip, + "NODE_ID": str(node_id), + "CLUSTER_ID": str(cluster_id), + "DPU_UUID": dpu_uuid, + "DBS_LOG_PATH": "${GAUSSLOG}/dbstor" } + update_ini_file(cfg_file, kv, DefaultValue.KEY_FILE_MODE, self.logger) - update_ini_file(cfg_file, kv, DefaultValue.KEY_FILE_MODE, logger) - - def _get_dbstor_home(self) -> str: - home = getattr(self.clusterInfo, "dbstor_home", "").strip() - if home: - return home - env_home = os.getenv("DBSTOR_HOME") - if env_home: - return env_home - raise Exception("dbstor_home not set and DBSTOR_HOME env not found.") - - def _get_node_id_and_local_ip(self): - hostname = socket.gethostname() - for idx, dbNode in enumerate(self.clusterInfo.dbNodes): - if dbNode.name == hostname: - local_ip = dbNode.extra.paramMap.get("cantian_vlan_ip", "") - return idx, local_ip - return 0, "" - - def install(self, nodeName: str = "", dbInitParams: str = ""): - pass + def install(self, nodeName: str = "", dbInitParams: str = ""): pass - def start(self): - pass + def start(self): pass - def stop(self): - pass + def stop(self): pass - def uninstall(self): - pass + def uninstall(self): pass - def upgrade(self): - pass + def upgrade(self): pass -- Gitee From 6e404db7f8f4b2f21f9a5e087ff4da6e6dcf8580 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Fri, 25 Jul 2025 11:55:44 +0800 Subject: [PATCH 070/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../gspylib/component/Dbstor/dbstor_comp.py | 93 ++++++++++++------- script/local/Install.py | 79 ++++++++++++++-- script/local/PreInstallUtility.py | 60 +++++++----- 3 files changed, 170 insertions(+), 62 deletions(-) diff --git a/script/gspylib/component/Dbstor/dbstor_comp.py b/script/gspylib/component/Dbstor/dbstor_comp.py index 1cf18556..f79f8310 100644 --- a/script/gspylib/component/Dbstor/dbstor_comp.py +++ b/script/gspylib/component/Dbstor/dbstor_comp.py @@ -96,6 +96,7 @@ class Dbstor(BaseComponent): self._sync_link_type_resources(dbstor_home) self._sync_cantian_dbstor_conf(dbstor_home) + self._sync_cm_dbstor_conf(dbstor_home) FileUtil.changeOwner(UserUtil.getUserInfo()["name"], cfg_dir, True) self.logger and self.logger.log(f"DBStor config prepared at {cfg_dir}") @@ -130,60 +131,49 @@ class Dbstor(BaseComponent): src_lib = os.path.join( dbstor_home, "lib", "nomlnx" if link_type == "0" else "mlnx", - "libxnetlite.so" - ) + "libxnetlite.so") dst_lib = os.path.join(dbstor_home, "lib", "libxnetlite.so") - if not os.path.isfile(src_lib): - raise Exception(f"Expected lib not found: {src_lib}") FileUtil.copyFile(src_lib, dst_lib) gauss_home = EnvUtil.getEnv("GAUSSHOME") if not gauss_home: raise Exception("GAUSSHOME env not set") - cfg_name = "node_config_tcp.xml" if link_type == "0" else "node_config_rdma.xml" - src_xml = os.path.join(gauss_home, "share", "dbstor", cfg_name) + infra_dir = os.path.join(dbstor_home, "conf", "infra") FileUtil.createDirectory(infra_dir, DefaultValue.KEY_DIRECTORY_MODE) - dst_xml = os.path.join(infra_dir, "node_config.xml") - if not os.path.isfile(src_xml): - raise Exception(f"Expected config not found: {src_xml}") - FileUtil.copyFile(src_xml, dst_xml) + + src_node_cfg_name = "node_config_tcp.xml" if link_type == "0" else "node_config_rdma.xml" + src_node_cfg = os.path.join(gauss_home, "share", "dbstor", src_node_cfg_name) + FileUtil.copyFile(src_node_cfg, os.path.join(infra_dir, "node_config.xml")) + + # osd.cfg + src_osd = os.path.join(gauss_home, "share", "dbstor", "osd.cfg") + FileUtil.copyFile(src_osd, os.path.join(infra_dir, "osd.cfg")) self.logger and self.logger.debug( - f"LINK_TYPE={link_type}: copied {src_lib} → {dst_lib}, {src_xml} → {dst_xml}" - ) + f"[DBStor] LINK_TYPE={link_type}: libxnetlite.so, node_config.xml, osd.cfg copied.") def _sync_cantian_dbstor_conf(self, dbstor_home: str): ctdb_home = os.getenv("CTDB_HOME") if not ctdb_home: - self.logger and self.logger.debug("CTDB_HOME not set; skip cantian sync.") + self.logger and self.logger.debug("CTDB_HOME not set; skip Cantian sync.") return - src_conf = os.path.join(dbstor_home, "conf") dst_base = os.path.join(ctdb_home, "dbstor") dst_conf = os.path.join(dst_base, "conf") - dst_dbs = os.path.join(dst_conf, "dbs") - dst_infra = os.path.join(dst_conf, "infra") - - for p in (dst_base, dst_conf, dst_dbs, dst_infra): + dst_dbs = os.path.join(dst_conf, "dbs") + for p in (dst_base, dst_conf, dst_dbs): FileUtil.createDirectory(p, DefaultValue.KEY_DIRECTORY_MODE) - FileUtil.copyFile(os.path.join(src_conf, "dbs", "dbstor_config.ini"), - os.path.join(dst_dbs, "dbstor_config.ini")) - update_ini_file(os.path.join(dst_dbs, "dbstor_config.ini"), + src_cfg = os.path.join(dbstor_home, "conf", "dbs", "dbstor_config.ini") + dst_cfg = os.path.join(dst_dbs, "dbstor_config.ini") + FileUtil.copyFile(src_cfg, dst_cfg) + update_ini_file(dst_cfg, {"DBSTOR_OWNER_NAME": "cantian"}, - DefaultValue.KEY_FILE_MODE, self.logger) - - src_infra = os.path.join(src_conf, "infra") - for root, _, files in os.walk(src_infra): - rel = os.path.relpath(root, src_infra) - tgt_dir = os.path.join(dst_infra, rel) if rel != "." else dst_infra - FileUtil.createDirectory(tgt_dir, DefaultValue.KEY_DIRECTORY_MODE) - for f in files: - FileUtil.copyFile(os.path.join(root, f), - os.path.join(tgt_dir, f)) + DefaultValue.KEY_FILE_MODE, + self.logger) - self.logger and self.logger.debug(f"Synced Cantian dbstor conf → {dst_conf}") + self.logger and self.logger.debug(f"Synced Cantian dbstor conf → {dst_cfg}") def _update_dbstor_config(self, cfg_file: str) -> None: @@ -222,6 +212,45 @@ class Dbstor(BaseComponent): } update_ini_file(cfg_file, kv, DefaultValue.KEY_FILE_MODE, self.logger) + def _sync_cm_dbstor_conf(self, dbstor_home: str): + """ + 同步 DBStor 配置到 CM 目录 ($PGDATA/../cmserver/dbstor): + • dbs/dbstor_config.ini → OWNER 名修改为 cms + • infra/osd.cfg → 原名复制 + • infra/node_config_*.xml (按 LINK_TYPE) → 重命名为 node_config.xml + """ + pgdata = EnvUtil.getEnv("PGDATA") + if not pgdata: + self.logger and self.logger.debug("PGDATA not set; skip CM dbstor sync.") + return + + cm_root = os.path.join(os.path.dirname(pgdata), "cmserver") + dst_base = os.path.join(cm_root, "dbstor") + dst_conf = os.path.join(dst_base, "conf") + dst_dbs = os.path.join(dst_conf, "dbs") + dst_infra = os.path.join(dst_conf, "infra") + for p in (dst_base, dst_conf, dst_dbs, dst_infra): + FileUtil.createDirectory(p, DefaultValue.KEY_DIRECTORY_MODE) + + src_cfg = os.path.join(dbstor_home, "conf", "dbs", "dbstor_config.ini") + dst_cfg = os.path.join(dst_dbs, "dbstor_config.ini") + FileUtil.copyFile(src_cfg, dst_cfg) + update_ini_file(dst_cfg, {"DBSTOR_OWNER_NAME": "cm"}, + DefaultValue.KEY_FILE_MODE, self.logger) + + link_type = os.getenv("LINK_TYPE", "0").strip() + gauss_home = EnvUtil.getEnv("GAUSSHOME") + src_cfg_name = "node_config_tcp_cm.xml" if link_type == "0" else "node_config_rdma_cm.xml" + src_cfg_path = os.path.join(gauss_home, "share", "dbstor", src_cfg_name) + dst_cfg_path = os.path.join(dst_infra, "node_config.xml") + FileUtil.copyFile(src_cfg_path, dst_cfg_path) + + src_osd = os.path.join(gauss_home, "share", "dbstor", "osd.cfg") + dst_osd = os.path.join(dst_infra, "osd.cfg") + FileUtil.copyFile(src_osd, dst_osd) + + self.logger and self.logger.debug(f"Synced CM dbstor conf → {dst_conf}") + def install(self, nodeName: str = "", dbInitParams: str = ""): pass def start(self): pass diff --git a/script/local/Install.py b/script/local/Install.py index fe3ac447..d7392c06 100644 --- a/script/local/Install.py +++ b/script/local/Install.py @@ -25,6 +25,8 @@ import sys import re import subprocess import traceback +import getpass +import stat sys.path.append(sys.path[0] + "/../") from gspylib.common.GaussLog import GaussLog @@ -598,7 +600,7 @@ class Install(LocalBaseOM): # ---------- 迁移 lib/plugin/* → /lib ---------- plugin_dir = os.path.join(dest_mysql_dir, "lib", "plugin") - dest_lib_dir = os.path.join(self.installPath, "lib") # ha_ctc.so 所在目录 + dest_lib_dir = os.path.join(self.installPath, "lib") if os.path.isdir(plugin_dir): if not os.listdir(plugin_dir): @@ -620,14 +622,60 @@ class Install(LocalBaseOM): FileUtil.removeDirectory(tmp_dir) self.logger.log(f"Deployed Mysql_server to {dest_mysql_dir}") + def _prompt_encrypt_dbstor_pass(self, dbstor_home: str) -> None: + """ + 1. 用户输入存储用户名/密码 + 2. 调用 dbstor_home 下唯一的 kmc_encrypt_only_* 生成密文 + """ + storage_user = input("DBStor storage user : ").strip() + storage_pass = getpass.getpass("DBStor storage pass : ").strip() + + enc_bin = next( + (os.path.join(dbstor_home, f) + for f in os.listdir(dbstor_home) + if re.match(r"kmc_encrypt_only_", f)), + "" + ) + if not enc_bin: + raise Exception("kmc_encrypt_only_* not found in DBStor root dir") + os.chmod(enc_bin, os.stat(enc_bin).st_mode | stat.S_IXUSR) + + st, cipher = subprocess.getstatusoutput(f"echo '{storage_pass}' | {enc_bin}") + if st != 0: + raise Exception(f"Encrypt DBStor password failed:\n{cipher}") + cipher = cipher.strip() + + src_binfile = os.path.join(dbstor_home, "binfile") + dest_conf = os.path.join(dbstor_home, "conf") + dest_kmc = os.path.join(dest_conf, "kmc") + + if not os.path.isdir(src_binfile): + raise Exception("binfile directory not produced by kmc_encrypt_only_*") + + FileUtil.createDirectory(dest_conf, True, DefaultValue.KEY_DIRECTORY_MODE) + if os.path.isdir(dest_kmc): + FileUtil.removeDirectory(dest_kmc) + + FileUtil.cpFile(src_binfile, dest_kmc) + FileUtil.removeDirectory(src_binfile) + + ProfileFile.updateUserEnvVariable(self.mpprcFile, "DBSTOR_USER", storage_user) + ProfileFile.updateUserEnvVariable(self.mpprcFile, "DBSTOR_AUTH_KEY", cipher) + + os.environ["DBSTOR_USER"] = storage_user + os.environ["DBSTOR_AUTH_KEY"] = cipher + + self.logger.debug("DBStor password encrypted; binfile copied to conf/kmc via FileUtil") + def decompress_dbstor_client(self): """ 前提: cluster.xml 中 enable_dbstor=on 步骤: - 1. DBStor_Client_{arch}.tgz → 解到 $DBSTOR_HOME - 2. 里面的 dbstor_client-*.tgz → 再解到 $DBSTOR_HOME - 3. 里面的 Dbstor_Client_Test-*.tgz→ 解到 $DBSTOR_HOME/tools - 4. 删除所有 *.tgz + 1. 解 DBStor_Client*.tgz → $DBSTOR_HOME + 2. 创建目录:data/ftds, conf/{dbs,infra}, tools + 3. 解内部 *-dbstore.tgz + 4. 交互加密密码,生成 conf/kmc + 5. 将 lib/kmc_shared 拷贝为 add-on """ if getattr(self.clusterInfo, "enable_dbstor", "off").lower() != "on": self.logger.debug("enable_dbstor != on, skip DBStor_Client package.") @@ -663,12 +711,14 @@ class Install(LocalBaseOM): raise Exception(f"Failed to decompress DBStor_Client.\n{output}") FileUtil.removeFile(dbstor_pkg) + # 创建目录结构 + FileUtil.createDirectory(os.path.join(dbstor_home, "data", "ftds"), + True, DefaultValue.KEY_DIRECTORY_MODE) + for sub in ("dbs", "infra"): + FileUtil.createDirectory(os.path.join(dbstor_home, "conf", sub), + True, DefaultValue.KEY_DIRECTORY_MODE) tools_dir = os.path.join(dbstor_home, "tools") FileUtil.createDirectory(tools_dir, True, DefaultValue.KEY_DIRECTORY_MODE) - FileUtil.createDirectory(os.path.join(dbstor_home, "ftds"), True, DefaultValue.KEY_DIRECTORY_MODE) - FileUtil.createDirectory(os.path.join(dbstor_home, "dbs"), True, DefaultValue.KEY_DIRECTORY_MODE) - FileUtil.createDirectory(os.path.join(dbstor_home, "infra"), True, DefaultValue.KEY_DIRECTORY_MODE) - FileUtil.createDirectory(os.path.join(dbstor_home, "kmc"), True, DefaultValue.KEY_DIRECTORY_MODE) inner_pkgs = [f for f in os.listdir(dbstor_home) if f.endswith("-dbstore.tgz")] @@ -688,6 +738,17 @@ class Install(LocalBaseOM): raise Exception(f"Failed to extract {inner}: {output}") FileUtil.removeFile(abs_path) + # lib/kmc_shared → add-on + src_kmc_shared = os.path.join(dbstor_home, "lib", "kmc_shared") + dest_addon = os.path.join(dbstor_home, "add-on") + if os.path.isdir(src_kmc_shared): + FileUtil.createDirectory(dest_addon, True, DefaultValue.KEY_DIRECTORY_MODE) + FileUtil.cpFile(src_kmc_shared, dest_addon) + FileUtil.removeDirectory(src_kmc_shared) + self.logger.debug(f"Moved kmc_shared → {dest_addon}") + + self._prompt_encrypt_dbstor_pass(dbstor_home) + FileUtil.changeOwner(self.user, dbstor_home, True) self.logger.log(f"DBStor client deployed under {dbstor_home} (tools dir created, temp tgz removed)") diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py index 3a3a4624..84664908 100644 --- a/script/local/PreInstallUtility.py +++ b/script/local/PreInstallUtility.py @@ -1860,31 +1860,49 @@ Common options: ) def _set_dbstor_env(self, userProfile: str) -> None: - + """ + 1. 若 clusterConfig 中 enable_dbstor = on + • 交互获取 storage_user / storage_pass + • 写出 ENABLE_DBSTOR / LINK_TYPE / STORAGE_VLAN_IP / + DBSTOR_USER / DBSTOR_PASS + 2. 追加当前节点的 CANTIAN_VLAN_IP + """ enable_dbstor_flag = ClusterConfigFile.getOneClusterConfigItem( "enable_dbstor", self.clusterConfig).strip().lower() or "off" + if enable_dbstor_flag != "on": + self.logger.debug("[DBStor] enable_dbstor=off; skip DBStor env export") + return - if enable_dbstor_flag == "on": - link_type = ClusterConfigFile.getOneClusterConfigItem( - "link_type", self.clusterConfig).strip() or "0" - storage_vlan_ip = ClusterConfigFile.getOneClusterConfigItem( - "storage_vlan_ip", self.clusterConfig).strip() - - for prof in (userProfile, self.user_env_file): - FileUtil.deleteLine(prof, r"^\s*export\s*ENABLE_DBSTOR=.*$") - FileUtil.deleteLine(prof, r"^\s*export\s*LINK_TYPE=.*$") - FileUtil.deleteLine(prof, r"^\s*export\s*STORAGE_VLAN_IP=.*$") - - FileUtil.writeFile(prof, [ - "export ENABLE_DBSTOR=on", - f"export LINK_TYPE={link_type}", - f"export STORAGE_VLAN_IP={storage_vlan_ip}" - ]) - self.logger.debug( - f"Flush ENABLE_DBSTOR=on LINK_TYPE={link_type} STORAGE_VLAN_IP={storage_vlan_ip}" + link_type = ClusterConfigFile.getOneClusterConfigItem( + "link_type", self.clusterConfig).strip() or "0" + storage_vlan_ip = ClusterConfigFile.getOneClusterConfigItem( + "storage_vlan_ip", self.clusterConfig).strip() + + storage_user = os.getenv("DBSTOR_USER", "").strip() + storage_pass = os.getenv("DBSTOR_PASS", "").strip() + if not storage_user: + storage_user = input("DBStor storage user : ").strip() + if not storage_pass: + import getpass + storage_pass = getpass.getpass("DBStor storage pass : ").strip() + + for prof in (userProfile, self.user_env_file): + FileUtil.deleteLine( + prof, + r"^\s*export\s*(ENABLE_DBSTOR|LINK_TYPE|STORAGE_VLAN_IP" + r"|DBSTOR_USER|DBSTOR_PASS)=.*$" ) - else: - self.logger.debug("[DBStor] enable_dbstor=off; skip DBStor env export") + FileUtil.writeFile(prof, [ + "export ENABLE_DBSTOR=on", + f"export LINK_TYPE={link_type}", + f"export STORAGE_VLAN_IP={storage_vlan_ip}", + f"export DBSTOR_USER={storage_user}", + f"export DBSTOR_PASS={storage_pass}" + ]) + self.logger.debug( + f"Flush DBStor env: ENABLE_DBSTOR=on LINK_TYPE={link_type} " + f"STORAGE_VLAN_IP={storage_vlan_ip} DBSTOR_USER={storage_user}" + ) # Cantian VLAN IP host = NetUtil.GetHostIpOrName() -- Gitee From 5ec6e318d46206172b9093cf824b79fced39381b Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Fri, 25 Jul 2025 12:51:46 +0800 Subject: [PATCH 071/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/PreInstallUtility.py | 40 ++++++++++++++----------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py index 84664908..7eebc765 100644 --- a/script/local/PreInstallUtility.py +++ b/script/local/PreInstallUtility.py @@ -1861,11 +1861,11 @@ Common options: def _set_dbstor_env(self, userProfile: str) -> None: """ - 1. 若 clusterConfig 中 enable_dbstor = on - • 交互获取 storage_user / storage_pass - • 写出 ENABLE_DBSTOR / LINK_TYPE / STORAGE_VLAN_IP / - DBSTOR_USER / DBSTOR_PASS - 2. 追加当前节点的 CANTIAN_VLAN_IP + enable_dbstor = on 时写入: + • ENABLE_DBSTOR / LINK_TYPE / STORAGE_VLAN_IP + • DBSTOR_HOME + • LD_LIBRARY_PATH += $DBSTOR_HOME/add-on:$DBSTOR_HOME/lib + • CANTIAN_VLAN_IP """ enable_dbstor_flag = ClusterConfigFile.getOneClusterConfigItem( "enable_dbstor", self.clusterConfig).strip().lower() or "off" @@ -1874,37 +1874,33 @@ Common options: return link_type = ClusterConfigFile.getOneClusterConfigItem( - "link_type", self.clusterConfig).strip() or "0" + "link_type", self.clusterConfig).strip() or "0" storage_vlan_ip = ClusterConfigFile.getOneClusterConfigItem( "storage_vlan_ip", self.clusterConfig).strip() - - storage_user = os.getenv("DBSTOR_USER", "").strip() - storage_pass = os.getenv("DBSTOR_PASS", "").strip() - if not storage_user: - storage_user = input("DBStor storage user : ").strip() - if not storage_pass: - import getpass - storage_pass = getpass.getpass("DBStor storage pass : ").strip() + dbstor_home_cfg = ClusterConfigFile.getOneClusterConfigItem( + "dbstor_home", self.clusterConfig).strip() for prof in (userProfile, self.user_env_file): FileUtil.deleteLine( prof, - r"^\s*export\s*(ENABLE_DBSTOR|LINK_TYPE|STORAGE_VLAN_IP" - r"|DBSTOR_USER|DBSTOR_PASS)=.*$" - ) + r"^\s*export\s*(ENABLE_DBSTOR|LINK_TYPE|STORAGE_VLAN_IP|DBSTOR_HOME)=.*$") + FileUtil.deleteLine( + prof, + r"^\s*export\s*LD_LIBRARY_PATH=.*\$DBSTOR_HOME[^ ]*.*$") + FileUtil.writeFile(prof, [ "export ENABLE_DBSTOR=on", f"export LINK_TYPE={link_type}", f"export STORAGE_VLAN_IP={storage_vlan_ip}", - f"export DBSTOR_USER={storage_user}", - f"export DBSTOR_PASS={storage_pass}" + f"export DBSTOR_HOME={dbstor_home_cfg}", + r"export LD_LIBRARY_PATH=$DBSTOR_HOME/add-on:$DBSTOR_HOME/lib:$LD_LIBRARY_PATH" ]) + self.logger.debug( - f"Flush DBStor env: ENABLE_DBSTOR=on LINK_TYPE={link_type} " - f"STORAGE_VLAN_IP={storage_vlan_ip} DBSTOR_USER={storage_user}" + f"Flush DBStor ENV: DBSTOR_HOME={dbstor_home_cfg}, LINK_TYPE={link_type}, " + f"STORAGE_VLAN_IP={storage_vlan_ip}" ) - # Cantian VLAN IP host = NetUtil.GetHostIpOrName() dbNode = self.clusterInfo.getDbNodeByName(host) if dbNode: -- Gitee From 30ab1dc1de105f065128699ac497004e1642f82c Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Fri, 25 Jul 2025 14:09:40 +0800 Subject: [PATCH 072/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index da552c6b..00040ceb 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -412,7 +412,7 @@ class DN_OLAP(Kernel): 2) 执行建库 SQL 3) 关闭 Cantian: • 优雅 shutdown - • 若失败 → 强制 kill -9 + • 若在超时时间内未退出 → 抛出异常 """ pgdata = self.instInfo.datadir @@ -426,7 +426,6 @@ class DN_OLAP(Kernel): pgrep_cmd = r"pgrep -f 'cantiand.*nomount'" shutdown_cmd = f"ctsql '/' as sysdba -q -D {pgdata} -c 'shutdown'" - kill_cmd = r"pgrep -f 'cantiand.*nomount' | xargs -r kill -9" if CmdUtil.retryGetstatusoutput(pgrep_cmd)[0] != 0: self.logger.debug("cantiand is not running, skip shutdown.") @@ -443,13 +442,9 @@ class DN_OLAP(Kernel): time.sleep(1) waited += 1 else: - # 超时后强制 kill - self.logger.debug("Graceful shutdown timeout, force killing Cantian.") - CmdUtil.getstatusoutput_by_fast_popen(kill_cmd) + raise Exception("Cantiand shutdown timeout: process still running after graceful attempt.") - if CmdUtil.retryGetstatusoutput(pgrep_cmd)[0] == 0: - raise Exception("Cantiand force kill failed: process still alive.") - self.logger.debug("Cantiand force-killed successfully.") + self.logger.debug("Cantiand stopped successfully.") @Dss.catch_err(exist_so=True) def initInstance(self): -- Gitee From 400db7e122858865602b390c244e5dc47a34ca1c Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Fri, 25 Jul 2025 15:52:03 +0800 Subject: [PATCH 073/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/base_utils/os/env_util.py | 15 +++++++++++++++ script/gspylib/common/LocalBaseOM.py | 19 +++++++++++++++++++ script/gspylib/component/BaseComponent.py | 2 ++ .../{Dbstor => DBstor}/dbstor_comp.py | 2 +- script/local/InitInstance.py | 17 ++++++++++++++++- script/local/PreInstallUtility.py | 2 +- 6 files changed, 54 insertions(+), 3 deletions(-) rename script/gspylib/component/{Dbstor => DBstor}/dbstor_comp.py (99%) diff --git a/script/base_utils/os/env_util.py b/script/base_utils/os/env_util.py index f5d5a054..2a8529c0 100644 --- a/script/base_utils/os/env_util.py +++ b/script/base_utils/os/env_util.py @@ -265,3 +265,18 @@ class EnvUtil(object): return True else: return False + +@staticmethod +def get_dbstor_home(user=""): + if os.getuid() == 0 and user == "": + return "" + return EnvUtil.getEnvironmentParameterValue("DBSTOR_HOME", user) + +@staticmethod +def is_dbstor_mode(user=""): + dbstor_home = EnvUtil.get_dbstor_home(user) + flag = str(EnvUtil.getEnv("enable_dbstor")).strip().lower() + if os.path.isdir(dbstor_home) and flag in ("on", "true", "yes", "1"): + return True + else: + return False \ No newline at end of file diff --git a/script/gspylib/common/LocalBaseOM.py b/script/gspylib/common/LocalBaseOM.py index 1ec3c907..77754b5c 100644 --- a/script/gspylib/common/LocalBaseOM.py +++ b/script/gspylib/common/LocalBaseOM.py @@ -32,6 +32,7 @@ from base_utils.os.net_util import NetUtil from base_utils.os.user_util import UserUtil from base_utils.os.env_util import EnvUtil from gspylib.component.DSS.dss_checker import DssConfig +from gspylib.component.DBStor.dbstor_comp import DBStor import impl.upgrade.UpgradeConst as const @@ -50,7 +51,9 @@ class LocalBaseOM(object): gtmInitParas=None, paxos_mode=False, dss_mode=False, + dbstor_mode=False, dss_config="", + dbstor_config="", dorado_cluster_mode=""): ''' Constructor @@ -78,10 +81,13 @@ class LocalBaseOM(object): self.dnCons = [] self.gtsCons = [] self.dss_cons = [] + self.dbstor_cons = [] self.mysqlCons = [] self.paxos_mode = paxos_mode self.dss_mode = dss_mode + self.dbstor_mode = dbstor_mode self.dss_config = dss_config + self.dbstor_config = dbstor_config self.dorado_cluster_mode = dorado_cluster_mode def initComponent(self, paxos_mode=False): @@ -93,6 +99,7 @@ class LocalBaseOM(object): self.initCmComponent() self.initKernelComponent(paxos_mode) self.init_dss_component(self.dss_mode) + self.init_dbstor_component(self.dbstor_mode) self.initMysqlComponent() @@ -107,6 +114,18 @@ class LocalBaseOM(object): component.dss_mode = dss_mode self.dss_cons.append(component) + def init_dbstor_component(self, dbstor_mode=False): + if not dbstor_mode: + return + + for _ in self.dbNodeInfo.datanodes: + component = DBStor() + component.logger = self.logger + component.binPath = f"{self.clusterInfo.appPath}/bin" + component.dbstor_mode = dbstor_mode + component.dbstor_config = self.dbstor_config + self.dbstor_cons.append(component) + def initComponentAttributes(self, component): """ function: Init component attributes on current node diff --git a/script/gspylib/component/BaseComponent.py b/script/gspylib/component/BaseComponent.py index 97167297..8576e2ea 100644 --- a/script/gspylib/component/BaseComponent.py +++ b/script/gspylib/component/BaseComponent.py @@ -55,6 +55,8 @@ class BaseComponent(object): self.paxos_mode = '' self.dss_mode = '' self.dss_config = '' + self.dbstor_mode = '' + self.dbstor_config = '' self.dorado_cluster_mode = '' def install(self): diff --git a/script/gspylib/component/Dbstor/dbstor_comp.py b/script/gspylib/component/DBstor/dbstor_comp.py similarity index 99% rename from script/gspylib/component/Dbstor/dbstor_comp.py rename to script/gspylib/component/DBstor/dbstor_comp.py index f79f8310..a6bc638b 100644 --- a/script/gspylib/component/Dbstor/dbstor_comp.py +++ b/script/gspylib/component/DBstor/dbstor_comp.py @@ -75,7 +75,7 @@ def calc_cluster_id(cluster_name: str) -> int: return sum(ord(c) for c in (cluster_name or "")) % 256 -class Dbstor(BaseComponent): +class DBStor(BaseComponent): def initInstance(self): if os.getenv("ENABLE_DBSTOR", "off").lower() != "on": self.logger and self.logger.debug("ENABLE_DBSTOR!=on, skip dbstor init.") diff --git a/script/local/InitInstance.py b/script/local/InitInstance.py index f92adbbd..ff479cf8 100644 --- a/script/local/InitInstance.py +++ b/script/local/InitInstance.py @@ -62,6 +62,8 @@ class CmdOptions(): self.paxos_mode = False self.dss_mode = False self.dss_config = "" + self.dbstor_mode = False + self.dbstor_config = "" self.dorado_cluster_mode = "" @@ -80,7 +82,7 @@ def parseCommandLine(): try: opts, args = getopt.getopt(sys.argv[1:], "U:P:G:l:?", [ "help", "dws_mode", "vc_mode", "paxos_mode", "dss_mode", - "dss_config=", "dorado_cluster_mode=" + "dss_config=", "dbstor_mode", "dbstor_config=", "dorado_cluster_mode=" ]) except Exception as e: usage() @@ -113,6 +115,10 @@ def parseCommandLine(): g_opts.dss_mode = True elif key == "--dss_config": g_opts.dss_config = value.strip() + elif key == "--dbstor_mode": + g_opts.dbstor_mode = True + elif key == "--dbstor_config": + g_opts.dbstor_config = value.strip() elif key == "--dorado_cluster_mode": g_opts.dorado_cluster_mode = value.strip() Parameter.checkParaVaild(key, value) @@ -182,6 +188,8 @@ class initDbNode(LocalBaseOM): paxos_mode=False, dss_mode=False, dss_config="", + dbstor_mode=False, + dbstor_config="", dorado_cluster_mode = ""): """ function: init instance @@ -199,6 +207,8 @@ class initDbNode(LocalBaseOM): paxos_mode, dss_mode=dss_mode, dss_config=dss_config, + dbstor_mode=dbstor_mode, + dbstor_config=dbstor_config, dorado_cluster_mode=dorado_cluster_mode) if self.clusterConfig == "": # Read config from static config file @@ -234,6 +244,9 @@ class initDbNode(LocalBaseOM): # config instance in parallel if self.dss_cons: parallelTool.parallelExecute(self.initInstance, self.dss_cons) + if self.dbstor_cons: + self.logger.log("Initializing DBStor component ...") + parallelTool.parallelExecute(self.initInstance, self.dbstor_cons) parallelTool.parallelExecute(self.initInstance, components) @@ -282,6 +295,8 @@ if __name__ == '__main__': g_opts.paxos_mode, dss_mode=g_opts.dss_mode, dss_config=g_opts.dss_config, + dbstor_mode=g_opts.dbstor_mode, + dbstor_config=g_opts.dbstor_config, dorado_cluster_mode=g_opts.dorado_cluster_mode) dbInit.initNodeInst(g_opts.vc_mode) diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py index 7eebc765..643f10e4 100644 --- a/script/local/PreInstallUtility.py +++ b/script/local/PreInstallUtility.py @@ -1812,7 +1812,7 @@ Common options: self._set_share_fs_env(userProfile) self._set_enable_mysql_env(userProfile) - self._set_enable_dbstor_env(userProfile) + self._set_dbstor_env(userProfile) except Exception as e: self.logger.logExit(str(e)) -- Gitee From 04f7ccc75e0fceade48964eab827ec9717af364a Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Fri, 25 Jul 2025 16:40:22 +0800 Subject: [PATCH 074/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/base_utils/template/xml_template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/base_utils/template/xml_template.py b/script/base_utils/template/xml_template.py index 97624819..96dc4a76 100644 --- a/script/base_utils/template/xml_template.py +++ b/script/base_utils/template/xml_template.py @@ -300,7 +300,6 @@ class GenerateTemplate: self.update_database_port() # update node ip self.update_node_ip_hostname_info() - self.update_cantian_vlan_ip() def update_ddes_info(self): if not XmlConstant.IS_DDES: @@ -338,6 +337,7 @@ class GenerateTemplate: self.update_device_label_info() # 3.update dbstor info self.update_dbstor_info() + self.update_cantian_vlan_ip() # 3.1.update ddes info self.update_ddes_info() # 3.2 update share_fs info -- Gitee From a9e0f4112b6f5606cb3e28f4b2c53cc23a9f6ac0 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Fri, 25 Jul 2025 16:52:30 +0800 Subject: [PATCH 075/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/base_utils/template/xml_template.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/script/base_utils/template/xml_template.py b/script/base_utils/template/xml_template.py index 96dc4a76..8b82f478 100644 --- a/script/base_utils/template/xml_template.py +++ b/script/base_utils/template/xml_template.py @@ -192,16 +192,24 @@ class GenerateTemplate: if not XmlConstant.IS_DBSTOR: return - ip_groups = [g.strip() for g in XmlConstant.CANTIAN_VLAN_IP.split(";") if g.strip()] + user_input = XmlConstant.CANTIAN_VLAN_IP.strip() + if not user_input: + return + + ip_groups = [g.strip() for g in user_input.split(";") if g.strip()] + for idx, device in enumerate(self.root[1].findall("DEVICE")): - value = ip_groups[idx] if idx < len(ip_groups) else "" + target = ip_groups[idx] if idx < len(ip_groups) else "" + if not target: + continue + param = next((p for p in device.findall("PARAM") if p.attrib.get("name") == "cantian_vlan_ip"), None) if param: - param.attrib["value"] = value + param.set("value", target) else: - ET.SubElement(device, "PARAM", - {"name": "cantian_vlan_ip", "value": value}) + ET.SubElement(device, "PARAM", {"name": "cantian_vlan_ip", + "value": target}) def delete_xml_mysql(self): """若未启用 MySQL,则移除 参数.""" -- Gitee From aba09e76bcb71f97afc8058c1aad6c6c0f08d26e Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Fri, 25 Jul 2025 17:25:33 +0800 Subject: [PATCH 076/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/base_utils/template/cluster_tmp.xml | 2 +- script/base_utils/template/resource_en.json | 2 +- script/base_utils/template/resource_zh.json | 2 +- script/base_utils/template/xml_template.py | 29 +++++++++++++-------- 4 files changed, 21 insertions(+), 14 deletions(-) diff --git a/script/base_utils/template/cluster_tmp.xml b/script/base_utils/template/cluster_tmp.xml index 29775d8a..86ad16b3 100644 --- a/script/base_utils/template/cluster_tmp.xml +++ b/script/base_utils/template/cluster_tmp.xml @@ -19,7 +19,7 @@ - + diff --git a/script/base_utils/template/resource_en.json b/script/base_utils/template/resource_en.json index e4f3ae5d..59f36d6f 100644 --- a/script/base_utils/template/resource_en.json +++ b/script/base_utils/template/resource_en.json @@ -45,7 +45,7 @@ "intput_dbstor_home": "Please enter dbstor_home dir (default:/opt/openGauss/install/dbstor_home)", "input_cantian_vlan_ip": "Enter cantian_vlan_ip (nodes separated by ';', multi-IP with '|', e.g. 10.10.10.1|11.11.11.1;10.10.10.2|11.11.11.2):", "cantian_vlan_ip_empty": "cantian_vlan_ip cannot be empty", - "input_storage_vlan_ip": "Enter storage_vlan_ip (multi-VLAN '|', multi-IP ',', e.g. 10.10.10.1,10.10.10.2|11.11.11.1,11.11.11.2):", + "input_storage_vlan_ip": "Enter storage_vlan_ip (multi-VLAN '|', multi-IP ',', e.g. 10.10.10.111,10.10.10.112|11.11.11.111,11.11.11.112):", "storage_vlan_ip_empty": "storage_vlan_ip cannot be empty", "input_link_type": "LINK_TYPE (0 = TCP, 1 = RDMA) [default 0]:", "link_type_invalid": "Only 0 or 1 is allowed!", diff --git a/script/base_utils/template/resource_zh.json b/script/base_utils/template/resource_zh.json index d500dbe6..26d1f5a6 100644 --- a/script/base_utils/template/resource_zh.json +++ b/script/base_utils/template/resource_zh.json @@ -45,7 +45,7 @@ "intput_dbstor_home": "请输入 'dbstor_home' 的路径信息(默认:/opt/openGauss/install/dbstor_home)", "input_cantian_vlan_ip": "请输入 cantian_vlan_ip(节点用 ; 分隔,同节点多IP用 |,示例: 10.10.10.1|11.11.11.1;10.10.10.2|11.11.11.2):", "cantian_vlan_ip_empty": "cantian_vlan_ip 不能为空", - "input_storage_vlan_ip": "请输入 storage_vlan_ip(多VLAN用 |,同VLAN多IP用,示例: 10.10.10.1,10.10.10.2|11.11.11.1,11.11.11.2):", + "input_storage_vlan_ip": "请输入 storage_vlan_ip(多VLAN用 |,同VLAN多IP用,示例: 10.10.10.111,10.10.10.112|11.11.11.111,11.11.11.112):", "storage_vlan_ip_empty": "storage_vlan_ip 不能为空", "input_link_type": "请输入 LINK_TYPE(0=TCP,1=RDMA,默认0):", "link_type_invalid": "仅允许输入 0 或 1!", diff --git a/script/base_utils/template/xml_template.py b/script/base_utils/template/xml_template.py index 8b82f478..6209580c 100644 --- a/script/base_utils/template/xml_template.py +++ b/script/base_utils/template/xml_template.py @@ -197,19 +197,26 @@ class GenerateTemplate: return ip_groups = [g.strip() for g in user_input.split(";") if g.strip()] - - for idx, device in enumerate(self.root[1].findall("DEVICE")): - target = ip_groups[idx] if idx < len(ip_groups) else "" - if not target: - continue - - param = next((p for p in device.findall("PARAM") - if p.attrib.get("name") == "cantian_vlan_ip"), None) + devices = list(self.root[1].findall("DEVICE")) + + if len(ip_groups) < len(devices): + raise Exception( + f"cantian_vlan_ip groups are fewer than DEVICE nodes: " + f"expected {len(devices)}, but got {len(ip_groups)}." + ) + + for idx, device in enumerate(devices): + value = ip_groups[idx] + param = next( + (p for p in device.findall("PARAM") + if p.attrib.get("name") == "cantian_vlan_ip"), + None + ) if param: - param.set("value", target) + param.attrib["value"] = value else: - ET.SubElement(device, "PARAM", {"name": "cantian_vlan_ip", - "value": target}) + ET.SubElement(device, "PARAM", + {"name": "cantian_vlan_ip", "value": value}) def delete_xml_mysql(self): """若未启用 MySQL,则移除 参数.""" -- Gitee From 6ccd34c97e4e92b03c243d71cbc0ce880b44f2ac Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Fri, 25 Jul 2025 17:30:34 +0800 Subject: [PATCH 077/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/base_utils/template/cluster_tmp.xml | 2 -- 1 file changed, 2 deletions(-) diff --git a/script/base_utils/template/cluster_tmp.xml b/script/base_utils/template/cluster_tmp.xml index 86ad16b3..a3cd08a4 100644 --- a/script/base_utils/template/cluster_tmp.xml +++ b/script/base_utils/template/cluster_tmp.xml @@ -33,7 +33,6 @@ - @@ -54,7 +53,6 @@ - -- Gitee From 970828f238abe618f544ace6221a4e20147389ef Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Fri, 25 Jul 2025 17:42:16 +0800 Subject: [PATCH 078/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/base_utils/template/cluster_tmp.xml | 2 ++ script/base_utils/template/resource_en.json | 2 +- script/base_utils/template/resource_zh.json | 2 +- script/base_utils/template/xml_template.py | 29 ++++++++------------- 4 files changed, 15 insertions(+), 20 deletions(-) diff --git a/script/base_utils/template/cluster_tmp.xml b/script/base_utils/template/cluster_tmp.xml index a3cd08a4..33442dc6 100644 --- a/script/base_utils/template/cluster_tmp.xml +++ b/script/base_utils/template/cluster_tmp.xml @@ -33,6 +33,7 @@ + @@ -53,6 +54,7 @@ + diff --git a/script/base_utils/template/resource_en.json b/script/base_utils/template/resource_en.json index 59f36d6f..df3d9291 100644 --- a/script/base_utils/template/resource_en.json +++ b/script/base_utils/template/resource_en.json @@ -41,7 +41,7 @@ "choose_dbstor" : "Enable DBStor shared-storage mode?", "input_dbstor" : "Input 1 / 2 (default 1) 1) Enable 2) Disable", - "intput_dbstor_fs" : "Enter the 4 DBStor file-systems (default: log_fs;page_fs;share_fs;archive_fs)", + "intput_dbstor_fs" : "Enter the 4 DBStor file-systems (e.g. log_fs;page_fs;share_fs;archive_fs)", "intput_dbstor_home": "Please enter dbstor_home dir (default:/opt/openGauss/install/dbstor_home)", "input_cantian_vlan_ip": "Enter cantian_vlan_ip (nodes separated by ';', multi-IP with '|', e.g. 10.10.10.1|11.11.11.1;10.10.10.2|11.11.11.2):", "cantian_vlan_ip_empty": "cantian_vlan_ip cannot be empty", diff --git a/script/base_utils/template/resource_zh.json b/script/base_utils/template/resource_zh.json index 26d1f5a6..5071aa80 100644 --- a/script/base_utils/template/resource_zh.json +++ b/script/base_utils/template/resource_zh.json @@ -41,7 +41,7 @@ "choose_dbstor": "请选择是否启用 DBStor 共享存储?", "input_dbstor": "请输入 1/2 进行选择,默认选项是 1)部署", - "intput_dbstor_fs": "请输入'dbstor的4个文件系统'的路径信息(默认是:log_fs;page_fs;share_fs;archive_fs)", + "intput_dbstor_fs": "请输入'dbstor的4个文件系统'的路径信息(示例:log_fs;page_fs;share_fs;archive_fs)", "intput_dbstor_home": "请输入 'dbstor_home' 的路径信息(默认:/opt/openGauss/install/dbstor_home)", "input_cantian_vlan_ip": "请输入 cantian_vlan_ip(节点用 ; 分隔,同节点多IP用 |,示例: 10.10.10.1|11.11.11.1;10.10.10.2|11.11.11.2):", "cantian_vlan_ip_empty": "cantian_vlan_ip 不能为空", diff --git a/script/base_utils/template/xml_template.py b/script/base_utils/template/xml_template.py index 6209580c..01ad4955 100644 --- a/script/base_utils/template/xml_template.py +++ b/script/base_utils/template/xml_template.py @@ -192,31 +192,24 @@ class GenerateTemplate: if not XmlConstant.IS_DBSTOR: return - user_input = XmlConstant.CANTIAN_VLAN_IP.strip() - if not user_input: - return - - ip_groups = [g.strip() for g in user_input.split(";") if g.strip()] + ips = [g.strip() for g in XmlConstant.CANTIAN_VLAN_IP.strip().split(";") if g.strip()] devices = list(self.root[1].findall("DEVICE")) - if len(ip_groups) < len(devices): + if len(ips) < len(devices): raise Exception( f"cantian_vlan_ip groups are fewer than DEVICE nodes: " - f"expected {len(devices)}, but got {len(ip_groups)}." + f"expected {len(devices)}, but got {len(ips)}." ) - for idx, device in enumerate(devices): - value = ip_groups[idx] - param = next( - (p for p in device.findall("PARAM") - if p.attrib.get("name") == "cantian_vlan_ip"), - None + for idx, dev in enumerate(devices): + for p in list(dev.findall("PARAM")): + if p.attrib.get("name") == "cantian_vlan_ip": + dev.remove(p) + + ET.SubElement( + dev, "PARAM", + {"name": "cantian_vlan_ip", "value": ips[idx]} ) - if param: - param.attrib["value"] = value - else: - ET.SubElement(device, "PARAM", - {"name": "cantian_vlan_ip", "value": value}) def delete_xml_mysql(self): """若未启用 MySQL,则移除 参数.""" -- Gitee From d7eba6a1c7ae85c90978008c92babed26f9efe92 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Fri, 25 Jul 2025 17:50:29 +0800 Subject: [PATCH 079/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/base_utils/template/xml_template.py | 25 +++++++++++++++------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/script/base_utils/template/xml_template.py b/script/base_utils/template/xml_template.py index 01ad4955..ffca630d 100644 --- a/script/base_utils/template/xml_template.py +++ b/script/base_utils/template/xml_template.py @@ -202,14 +202,18 @@ class GenerateTemplate: ) for idx, dev in enumerate(devices): - for p in list(dev.findall("PARAM")): - if p.attrib.get("name") == "cantian_vlan_ip": - dev.remove(p) + params = [p for p in dev.findall("PARAM") + if p.attrib.get("name") == "cantian_vlan_ip"] - ET.SubElement( - dev, "PARAM", - {"name": "cantian_vlan_ip", "value": ips[idx]} - ) + if params: + params[0].attrib["value"] = ips[idx] + for extra in params[1:]: + dev.remove(extra) + else: + ET.SubElement( + dev, "PARAM", + {"name": "cantian_vlan_ip", "value": ips[idx]} + ) def delete_xml_mysql(self): """若未启用 MySQL,则移除 参数.""" @@ -359,7 +363,12 @@ class GenerateTemplate: self.target_xml = XmlConstant.TARGET_XML if os.path.exists(self.target_xml): os.remove(self.target_xml) - ET.ElementTree(self.root).write(self.target_xml) + try: + ET.indent(self.tree, space=" ", level=0) + except AttributeError: + pass + + self.tree.write(self.target_xml, encoding="utf-8", xml_declaration=False) def display_xml_info(self): if not os.path.exists(self.target_xml): -- Gitee From f1fc72b4e5bf62989cf801ffff7888e14533c753 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Fri, 25 Jul 2025 19:47:29 +0800 Subject: [PATCH 080/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index 00040ceb..24233531 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -151,6 +151,8 @@ class DN_OLAP(Kernel): if self.dss_mode: vg = self._resolve_dss_vg() target = f"+{vg}" + elif self.dbstor_mode: + return else: ctdb_home = os.getenv("CTDB_HOME") if not ctdb_home: @@ -241,8 +243,12 @@ class DN_OLAP(Kernel): else: FileUtil.createDirectory(data_dir, DefaultValue.KEY_DIRECTORY_MODE) - tpl_sql = os.path.join(gauss_home, "share", "cantian", - "create_database.sql.template") + enable_dbstor = os.getenv("ENABLE_DBSTOR", "off").lower() == "on" + if enable_dbstor: + tpl_sql = os.path.join(gauss_home, "share", "cantian", "create_dbstor_database.sql.template") + else: + tpl_sql = os.path.join(gauss_home, "share", "cantian", "create_database.sql.template") + work_sql = os.path.join(ctdb_home, "create_database.sql") self.copy_and_render_file(tpl_sql, work_sql) self._patch_create_sql_paths(work_sql) @@ -686,10 +692,12 @@ class DN_OLAP(Kernel): params["CONTROL_FILES"] = ( f'(+{vg}/ctrl1, +{vg}/ctrl2, +{vg}/ctrl3)' ) + elif self.dbstor_mode: + params["CONTROL_FILES"] = "(-ctrl1, -ctrl2, -ctrl3)" else: ctdb_home = os.getenv("CTDB_HOME") if not ctdb_home: - raise EnvironmentError("CTDB_HOME must be set when not in DSS mode.") + raise EnvironmentError("CTDB_HOME must be set when not in DSS or DBStor mode.") params["CONTROL_FILES"] = ( f"({ctdb_home}/data/ctrl1, " f"{ctdb_home}/data/ctrl2, " -- Gitee From 04bd65723e51f5d73bca2106ecfb7bb8ec993223 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Fri, 25 Jul 2025 20:08:20 +0800 Subject: [PATCH 081/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/common/DeviceListParams.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/script/gspylib/common/DeviceListParams.py b/script/gspylib/common/DeviceListParams.py index 5fd57fc3..06f6afc5 100644 --- a/script/gspylib/common/DeviceListParams.py +++ b/script/gspylib/common/DeviceListParams.py @@ -56,6 +56,7 @@ class DeviceListParams: REMOTE_STREAM_IP_MAP1 = 'remoteStreamIpmap1' REMOTE_DATA_PORT_BASE = 'remotedataPortBase' SSH_PORT = "sshPort" + CANTIAN_VLAN_IP = 'cantian_vlan_ip' @staticmethod def get_all_param_names(): @@ -87,7 +88,8 @@ class DeviceListParams: DeviceListParams.LOCAL_STREAM_IP_MAP1, DeviceListParams.REMOTE_STREAM_IP_MAP1, DeviceListParams.REMOTE_DATA_PORT_BASE, - DeviceListParams.SSH_PORT + DeviceListParams.SSH_PORT, + DeviceListParams.CANTIAN_VLAN_IP ] SYNC_NODE_PATTERN = re.compile(r'^syncNode_.*') \ No newline at end of file -- Gitee From 1a79f19a8e3310f90db54e00fcd2fda823c5afa5 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Fri, 25 Jul 2025 20:21:24 +0800 Subject: [PATCH 082/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/component/{DBstor => DBStor}/dbstor_comp.py | 0 script/local/Install.py | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename script/gspylib/component/{DBstor => DBStor}/dbstor_comp.py (100%) diff --git a/script/gspylib/component/DBstor/dbstor_comp.py b/script/gspylib/component/DBStor/dbstor_comp.py similarity index 100% rename from script/gspylib/component/DBstor/dbstor_comp.py rename to script/gspylib/component/DBStor/dbstor_comp.py diff --git a/script/local/Install.py b/script/local/Install.py index d7392c06..bb24def1 100644 --- a/script/local/Install.py +++ b/script/local/Install.py @@ -627,8 +627,8 @@ class Install(LocalBaseOM): 1. 用户输入存储用户名/密码 2. 调用 dbstor_home 下唯一的 kmc_encrypt_only_* 生成密文 """ - storage_user = input("DBStor storage user : ").strip() - storage_pass = getpass.getpass("DBStor storage pass : ").strip() + storage_user = input("Please enter DBStor storage username: ").strip() + storage_pass = getpass.getpass("Please enter DBStor storage password: ").strip() enc_bin = next( (os.path.join(dbstor_home, f) -- Gitee From f8581355b8cd4bd839b08556133b915ef086d3c1 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 26 Jul 2025 11:02:57 +0800 Subject: [PATCH 083/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/common/Common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/script/gspylib/common/Common.py b/script/gspylib/common/Common.py index 1785348a..a9332dec 100644 --- a/script/gspylib/common/Common.py +++ b/script/gspylib/common/Common.py @@ -1187,6 +1187,7 @@ class DefaultValue(): "export CTDB_HOME=$PGDATA", "export CANTIAND_MODE=open", "export CANTIAND_HOME_DIR=$PGDATA", + "export LSID_GENERATE_SCRIPT_PATH=$GPHOME/script/obtains_lsid", # 临时添加,测试使用 "export GSDB_HOME=$PGDATA" ]) -- Gitee From 9819f8ed53c9bb6c291e0f68ede8ed65d4b09a4f Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 26 Jul 2025 11:44:46 +0800 Subject: [PATCH 084/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/PreInstallUtility.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py index 643f10e4..0214380f 100644 --- a/script/local/PreInstallUtility.py +++ b/script/local/PreInstallUtility.py @@ -1902,18 +1902,22 @@ Common options: ) host = NetUtil.GetHostIpOrName() - dbNode = self.clusterInfo.getDbNodeByName(host) - if dbNode: - vlan_ip = dbNode.extra.paramMap.get("cantian_vlan_ip", "").strip() - if vlan_ip: - for prof in (userProfile, self.user_env_file): - FileUtil.deleteLine(prof, r"^\s*export\s*CANTIAN_VLAN_IP=.*$") - FileUtil.writeFile(prof, [f"export CANTIAN_VLAN_IP={vlan_ip}"]) - self.logger.debug(f"Flush CANTIAN_VLAN_IP={vlan_ip}") - else: - self.logger.debug("cantian_vlan_ip empty; skip env export.") + root = ClusterConfigFile.initParserXMLFile(self.clusterConfig) + status, vlan_str = ClusterConfigFile.readOneClusterConfigItem( + root, + "cantian_vlan_ip", + "node", + host + ) + if status == 0 and vlan_str: + ips = [ip.strip() for ip in vlan_str.split(",") if ip.strip()] + vlan_ip = ",".join(ips) + for prof in (userProfile, self.user_env_file): + FileUtil.deleteLine(prof, r"^\s*export\s*CANTIAN_VLAN_IP=.*$") + FileUtil.writeFile(prof, [f"export CANTIAN_VLAN_IP={vlan_ip}"]) + self.logger.debug(f"Flush CANTIAN_VLAN_IP={vlan_ip}") else: - self.logger.debug("Current host not found in DEVICELIST; skip cantian_vlan_ip env.") + self.logger.debug("cantian_vlan_ip empty or not defined for this node; skip env export.") def setLibrary(self): """ -- Gitee From 932d0110fdb96b4a28e23d8c7808bf2ad73928f1 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 26 Jul 2025 14:34:56 +0800 Subject: [PATCH 085/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/Install.py | 1 - script/local/PreInstallUtility.py | 11 +++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/script/local/Install.py b/script/local/Install.py index bb24def1..578b3adf 100644 --- a/script/local/Install.py +++ b/script/local/Install.py @@ -709,7 +709,6 @@ class Install(LocalBaseOM): status, output = subprocess.getstatusoutput(tar_cmd) if status != 0: raise Exception(f"Failed to decompress DBStor_Client.\n{output}") - FileUtil.removeFile(dbstor_pkg) # 创建目录结构 FileUtil.createDirectory(os.path.join(dbstor_home, "data", "ftds"), diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py index 0214380f..b58b9e06 100644 --- a/script/local/PreInstallUtility.py +++ b/script/local/PreInstallUtility.py @@ -1881,12 +1881,11 @@ Common options: "dbstor_home", self.clusterConfig).strip() for prof in (userProfile, self.user_env_file): - FileUtil.deleteLine( - prof, - r"^\s*export\s*(ENABLE_DBSTOR|LINK_TYPE|STORAGE_VLAN_IP|DBSTOR_HOME)=.*$") - FileUtil.deleteLine( - prof, - r"^\s*export\s*LD_LIBRARY_PATH=.*\$DBSTOR_HOME[^ ]*.*$") + FileUtil.deleteLine(prof, r"^\s*export\s*ENABLE_DBSTOR=.*$") + FileUtil.deleteLine(prof, r"^\s*export\s*LINK_TYPE=.*$") + FileUtil.deleteLine(prof, r"^\s*export\s*STORAGE_VLAN_IP=.*$") + FileUtil.deleteLine(prof, r"^\s*export\s*DBSTOR_HOME=.*$") + FileUtil.deleteLine(prof, r"^\s*export\s*LD_LIBRARY_PATH=.*\$DBSTOR_HOME[^ ]*.*$") FileUtil.writeFile(prof, [ "export ENABLE_DBSTOR=on", -- Gitee From d0ef1504ac351be6c41e4f4a6faa63ff4a9bbadc Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 26 Jul 2025 14:44:45 +0800 Subject: [PATCH 086/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/Install.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/script/local/Install.py b/script/local/Install.py index 578b3adf..ba0b56c7 100644 --- a/script/local/Install.py +++ b/script/local/Install.py @@ -627,8 +627,13 @@ class Install(LocalBaseOM): 1. 用户输入存储用户名/密码 2. 调用 dbstor_home 下唯一的 kmc_encrypt_only_* 生成密文 """ - storage_user = input("Please enter DBStor storage username: ").strip() - storage_pass = getpass.getpass("Please enter DBStor storage password: ").strip() + sys.stdout.write("Please enter DBStor storage username: ") + sys.stdout.flush() + storage_user = sys.stdin.readline().strip() + + sys.stdout.write("Please enter DBStor storage password: ") + sys.stdout.flush() + storage_pass = getpass.getpass("").strip() enc_bin = next( (os.path.join(dbstor_home, f) -- Gitee From f90fced33fc96babcc65b527c0ad141c46af3b9b Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 26 Jul 2025 15:11:31 +0800 Subject: [PATCH 087/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/common/ParallelBaseOM.py | 65 +++++++++++++++++++++++++ script/impl/install/InstallImpl.py | 1 + script/local/Install.py | 52 -------------------- 3 files changed, 66 insertions(+), 52 deletions(-) diff --git a/script/gspylib/common/ParallelBaseOM.py b/script/gspylib/common/ParallelBaseOM.py index 82ac859b..51412021 100644 --- a/script/gspylib/common/ParallelBaseOM.py +++ b/script/gspylib/common/ParallelBaseOM.py @@ -19,6 +19,7 @@ import socket import sys import getpass import pwd +import re from subprocess import PIPE sys.path.append(sys.path[0] + "/../../") @@ -575,3 +576,67 @@ class ParallelBaseOM(object): scpFile = os.path.join(binPath, "%s" % certFile) self.sshTool.scpFiles(scpFile, binPath, hostList) self.logger.debug("Successfully encrypted cipher and rand files.") + + def genDbstorAuthFile(self) -> str: + """ + 如果环境变量 ENABLE_DBSTOR=on,则: + 1. 从 stdout 提示并读取存储用户名和密码 + 2. 调用 dbstor_home 下的 kmc_encrypt_only_* 生成密文(用 CmdUtil,不用 subprocess) + 3. 将密文和用户名写入到 ${dbstor_home}/conf/kmc/dbstor_auth.conf + 返回 kmc 目录路径,否则返回空字符串。 + """ + enable_flag = os.environ.get("ENABLE_DBSTOR", "off").strip().lower() + if enable_flag != "on": + self.logger.debug("ENABLE_DBSTOR is off; skipping DBStor auth generation.") + return "" + + sys.stdout.write("Please enter DBStor storage username: ") + sys.stdout.flush() + storage_user = sys.stdin.readline().strip() + + sys.stdout.write("Please enter DBStor storage password: ") + sys.stdout.flush() + storage_pass = getpass.getpass("").strip() + + dbstor_home = os.environ.get("DBSTOR_HOME") + if not dbstor_home: + raise Exception("DBSTOR_HOME environment variable must be set when ENABLE_DBSTOR=on.") + enc_bin = next( + (os.path.join(dbstor_home, f) + for f in os.listdir(dbstor_home) + if re.match(r"kmc_encrypt_only_", f)), + "" + ) + if not enc_bin: + raise Exception("kmc_encrypt_only_* not found under DBSTOR_HOME.") + FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, enc_bin) + + from base_utils.os.cmd_util import CmdUtil + cmd = f"echo '{storage_pass}' | {enc_bin}" + status, cipher = CmdUtil.retryGetstatusoutput(cmd) + if status != 0: + raise Exception(f"Encrypt DBStor password failed:\n{cipher}") + cipher = cipher.strip() + + src_binfile = os.path.join(os.getcwd(), "binfile") + if not os.path.exists(src_binfile): + raise Exception("binfile not produced by kmc_encrypt_only_* in current dir.") + dest_conf = os.path.join(dbstor_home, "conf") + dest_kmc = os.path.join(dest_conf, "kmc") + + FileUtil.createDirectory(dest_conf, True, DefaultValue.KEY_DIRECTORY_MODE) + if os.path.isdir(dest_kmc): + FileUtil.removeDirectory(dest_kmc) + FileUtil.cpFile(src_binfile, dest_kmc) + FileUtil.removeDirectory(src_binfile) if os.path.isdir(src_binfile) else os.remove(src_binfile) + + auth_file = os.path.join(dest_kmc, "dbstor_auth.conf") + lines = [ + f"DBSTOR_USER={storage_user}", + f"DBSTOR_AUTH_KEY={cipher}" + ] + FileUtil.writeFile(auth_file, lines) + FileUtil.changeMode(DefaultValue.SPE_FILE_MODE, auth_file) + FileUtil.changeOwner(os.environ.get("USER"), auth_file) + + self.logger.debug(f"DBStor auth written to {auth_file}") \ No newline at end of file diff --git a/script/impl/install/InstallImpl.py b/script/impl/install/InstallImpl.py index 38c7b976..e28ed2f2 100644 --- a/script/impl/install/InstallImpl.py +++ b/script/impl/install/InstallImpl.py @@ -460,6 +460,7 @@ class InstallImpl: self.context.logger.log("encrypt cipher and rand files " "for database.") initPasswd = self.getPasswdFromInitParam() + self.context.genDbstorAuthFile() self.context.genCipherAndRandFile(None, initPasswd) self.context.logger.log("begin to create CA cert files") self.context.createServerCa() diff --git a/script/local/Install.py b/script/local/Install.py index ba0b56c7..38b8b757 100644 --- a/script/local/Install.py +++ b/script/local/Install.py @@ -622,56 +622,6 @@ class Install(LocalBaseOM): FileUtil.removeDirectory(tmp_dir) self.logger.log(f"Deployed Mysql_server to {dest_mysql_dir}") - def _prompt_encrypt_dbstor_pass(self, dbstor_home: str) -> None: - """ - 1. 用户输入存储用户名/密码 - 2. 调用 dbstor_home 下唯一的 kmc_encrypt_only_* 生成密文 - """ - sys.stdout.write("Please enter DBStor storage username: ") - sys.stdout.flush() - storage_user = sys.stdin.readline().strip() - - sys.stdout.write("Please enter DBStor storage password: ") - sys.stdout.flush() - storage_pass = getpass.getpass("").strip() - - enc_bin = next( - (os.path.join(dbstor_home, f) - for f in os.listdir(dbstor_home) - if re.match(r"kmc_encrypt_only_", f)), - "" - ) - if not enc_bin: - raise Exception("kmc_encrypt_only_* not found in DBStor root dir") - os.chmod(enc_bin, os.stat(enc_bin).st_mode | stat.S_IXUSR) - - st, cipher = subprocess.getstatusoutput(f"echo '{storage_pass}' | {enc_bin}") - if st != 0: - raise Exception(f"Encrypt DBStor password failed:\n{cipher}") - cipher = cipher.strip() - - src_binfile = os.path.join(dbstor_home, "binfile") - dest_conf = os.path.join(dbstor_home, "conf") - dest_kmc = os.path.join(dest_conf, "kmc") - - if not os.path.isdir(src_binfile): - raise Exception("binfile directory not produced by kmc_encrypt_only_*") - - FileUtil.createDirectory(dest_conf, True, DefaultValue.KEY_DIRECTORY_MODE) - if os.path.isdir(dest_kmc): - FileUtil.removeDirectory(dest_kmc) - - FileUtil.cpFile(src_binfile, dest_kmc) - FileUtil.removeDirectory(src_binfile) - - ProfileFile.updateUserEnvVariable(self.mpprcFile, "DBSTOR_USER", storage_user) - ProfileFile.updateUserEnvVariable(self.mpprcFile, "DBSTOR_AUTH_KEY", cipher) - - os.environ["DBSTOR_USER"] = storage_user - os.environ["DBSTOR_AUTH_KEY"] = cipher - - self.logger.debug("DBStor password encrypted; binfile copied to conf/kmc via FileUtil") - def decompress_dbstor_client(self): """ 前提: cluster.xml 中 enable_dbstor=on @@ -751,8 +701,6 @@ class Install(LocalBaseOM): FileUtil.removeDirectory(src_kmc_shared) self.logger.debug(f"Moved kmc_shared → {dest_addon}") - self._prompt_encrypt_dbstor_pass(dbstor_home) - FileUtil.changeOwner(self.user, dbstor_home, True) self.logger.log(f"DBStor client deployed under {dbstor_home} (tools dir created, temp tgz removed)") -- Gitee From 4faab3febb7c0c474414fe9dcc5f0e83d5aacee1 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 26 Jul 2025 15:25:58 +0800 Subject: [PATCH 088/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../gspylib/component/DBStor/dbstor_comp.py | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/script/gspylib/component/DBStor/dbstor_comp.py b/script/gspylib/component/DBStor/dbstor_comp.py index a6bc638b..4042bb45 100644 --- a/script/gspylib/component/DBStor/dbstor_comp.py +++ b/script/gspylib/component/DBStor/dbstor_comp.py @@ -193,6 +193,23 @@ class DBStor(BaseComponent): dpu_uuid = CmdUtil.retryGetstatusoutput("uuidgen")[1].strip() + dbstor_home = os.getenv("DBSTOR_HOME", "").strip() + auth_file = os.path.join(dbstor_home, "conf", "kmc", "dbstor_auth.conf") + if not os.path.isfile(auth_file): + raise FileNotFoundError(f"{auth_file} not found.") + + auth_lines = FileUtil.readFile(auth_file) + auth_kv = {} + for ln in auth_lines: + ln = ln.strip() + if "=" not in ln or ln.startswith("#"): + continue + k, _, v = ln.partition("=") + auth_kv[k.strip()] = v.strip() + + storage_user = auth_kv.get("DBSTOR_USER", "") + cipher = auth_kv.get("DBSTOR_AUTH_KEY", "") + kv = { "NAMESPACE_FSNAME": fs_names[0], "NAMESPACE_PAGE_FSNAME": fs_names[1], @@ -208,8 +225,11 @@ class DBStor(BaseComponent): "NODE_ID": str(node_id), "CLUSTER_ID": str(cluster_id), "DPU_UUID": dpu_uuid, - "DBS_LOG_PATH": "${GAUSSLOG}/dbstor" + "DBS_LOG_PATH": "${GAUSSLOG}/dbstor", + "USER_NAME": storage_user, + "PASSWORD": cipher } + update_ini_file(cfg_file, kv, DefaultValue.KEY_FILE_MODE, self.logger) def _sync_cm_dbstor_conf(self, dbstor_home: str): -- Gitee From b55cf29577b6eed16bc4148e05a1b7a652cc7c98 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 26 Jul 2025 15:37:46 +0800 Subject: [PATCH 089/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/Install.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/script/local/Install.py b/script/local/Install.py index 38b8b757..b6ec1dbe 100644 --- a/script/local/Install.py +++ b/script/local/Install.py @@ -631,6 +631,7 @@ class Install(LocalBaseOM): 3. 解内部 *-dbstore.tgz 4. 交互加密密码,生成 conf/kmc 5. 将 lib/kmc_shared 拷贝为 add-on + 6. 对 kmc_encrypt_only_* 加可执行权限,避免后面调用失败 """ if getattr(self.clusterInfo, "enable_dbstor", "off").lower() != "on": self.logger.debug("enable_dbstor != on, skip DBStor_Client package.") @@ -659,13 +660,12 @@ class Install(LocalBaseOM): raise Exception("dbstor_home is empty in cluster config.") FileUtil.createDirectory(dbstor_home, True, DefaultValue.KEY_DIRECTORY_MODE) - tar_cmd = f"tar -zxf '{dbstor_pkg}' -C '{dbstor_home}'" - self.logger.log(f"Decompressing DBStor_Client: {tar_cmd}") - status, output = subprocess.getstatusoutput(tar_cmd) + cmd = f"tar -zxf '{dbstor_pkg}' -C '{dbstor_home}'" + self.logger.log(f"Decompressing DBStor_Client: {cmd}") + status, output = subprocess.getstatusoutput(cmd) if status != 0: raise Exception(f"Failed to decompress DBStor_Client.\n{output}") - # 创建目录结构 FileUtil.createDirectory(os.path.join(dbstor_home, "data", "ftds"), True, DefaultValue.KEY_DIRECTORY_MODE) for sub in ("dbs", "infra"): @@ -674,25 +674,22 @@ class Install(LocalBaseOM): tools_dir = os.path.join(dbstor_home, "tools") FileUtil.createDirectory(tools_dir, True, DefaultValue.KEY_DIRECTORY_MODE) - inner_pkgs = [f for f in os.listdir(dbstor_home) - if f.endswith("-dbstore.tgz")] - + inner_pkgs = [f for f in os.listdir(dbstor_home) if f.endswith("-dbstore.tgz")] if not inner_pkgs: raise Exception("DBStor_Client package format error: no *-dbstore.tgz found.") - for inner in inner_pkgs: abs_path = os.path.join(dbstor_home, inner) if inner.startswith("dbstor_client"): - cmd = f"tar -zxf '{abs_path}' -C '{dbstor_home}'" + extract_to = dbstor_home else: - cmd = f"tar -zxf '{abs_path}' -C '{tools_dir}'" + extract_to = tools_dir + cmd = f"tar -zxf '{abs_path}' -C '{extract_to}'" self.logger.log(f"[DBStor] extracting {inner}") status, output = subprocess.getstatusoutput(cmd) if status != 0: raise Exception(f"Failed to extract {inner}: {output}") FileUtil.removeFile(abs_path) - # lib/kmc_shared → add-on src_kmc_shared = os.path.join(dbstor_home, "lib", "kmc_shared") dest_addon = os.path.join(dbstor_home, "add-on") if os.path.isdir(src_kmc_shared): @@ -701,8 +698,16 @@ class Install(LocalBaseOM): FileUtil.removeDirectory(src_kmc_shared) self.logger.debug(f"Moved kmc_shared → {dest_addon}") + for root, _, files in os.walk(dbstor_home): + for fname in files: + if fname.startswith("kmc_encrypt_only_"): + full = os.path.join(root, fname) + FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, full) + self.logger.debug(f"Set executable permission on {full}") + FileUtil.changeOwner(self.user, dbstor_home, True) - self.logger.log(f"DBStor client deployed under {dbstor_home} (tools dir created, temp tgz removed)") + self.logger.log(f"DBStor client deployed under {dbstor_home} " + "(tools dir created, temp tgz removed, kmc executables fixed)") def generate_dss_path(self): """ -- Gitee From 39894948eb5584e126ecf2dcd6187ac5bd942914 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 26 Jul 2025 15:44:58 +0800 Subject: [PATCH 090/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/Install.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/local/Install.py b/script/local/Install.py index b6ec1dbe..755389b5 100644 --- a/script/local/Install.py +++ b/script/local/Install.py @@ -702,7 +702,7 @@ class Install(LocalBaseOM): for fname in files: if fname.startswith("kmc_encrypt_only_"): full = os.path.join(root, fname) - FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, full) + FileUtil.changeMode(DefaultValue.BIN_FILE_MODE, full) self.logger.debug(f"Set executable permission on {full}") FileUtil.changeOwner(self.user, dbstor_home, True) -- Gitee From 2c13b86386a94b1f479968d2c243518485d6d329 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 26 Jul 2025 15:49:42 +0800 Subject: [PATCH 091/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/common/ParallelBaseOM.py | 2 +- script/local/Install.py | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/script/gspylib/common/ParallelBaseOM.py b/script/gspylib/common/ParallelBaseOM.py index 51412021..57ef2f5a 100644 --- a/script/gspylib/common/ParallelBaseOM.py +++ b/script/gspylib/common/ParallelBaseOM.py @@ -609,7 +609,7 @@ class ParallelBaseOM(object): ) if not enc_bin: raise Exception("kmc_encrypt_only_* not found under DBSTOR_HOME.") - FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, enc_bin) + FileUtil.changeMode(DefaultValue.BIN_FILE_MODE, enc_bin) from base_utils.os.cmd_util import CmdUtil cmd = f"echo '{storage_pass}' | {enc_bin}" diff --git a/script/local/Install.py b/script/local/Install.py index 755389b5..61abf25b 100644 --- a/script/local/Install.py +++ b/script/local/Install.py @@ -698,13 +698,6 @@ class Install(LocalBaseOM): FileUtil.removeDirectory(src_kmc_shared) self.logger.debug(f"Moved kmc_shared → {dest_addon}") - for root, _, files in os.walk(dbstor_home): - for fname in files: - if fname.startswith("kmc_encrypt_only_"): - full = os.path.join(root, fname) - FileUtil.changeMode(DefaultValue.BIN_FILE_MODE, full) - self.logger.debug(f"Set executable permission on {full}") - FileUtil.changeOwner(self.user, dbstor_home, True) self.logger.log(f"DBStor client deployed under {dbstor_home} " "(tools dir created, temp tgz removed, kmc executables fixed)") -- Gitee From 1ca398c2c8ed1210c890c13088e841d5bc28dc01 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 26 Jul 2025 16:45:54 +0800 Subject: [PATCH 092/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/common/ParallelBaseOM.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/gspylib/common/ParallelBaseOM.py b/script/gspylib/common/ParallelBaseOM.py index 57ef2f5a..46d68781 100644 --- a/script/gspylib/common/ParallelBaseOM.py +++ b/script/gspylib/common/ParallelBaseOM.py @@ -612,7 +612,7 @@ class ParallelBaseOM(object): FileUtil.changeMode(DefaultValue.BIN_FILE_MODE, enc_bin) from base_utils.os.cmd_util import CmdUtil - cmd = f"echo '{storage_pass}' | {enc_bin}" + cmd = f"{enc_bin} {storage_pass}" status, cipher = CmdUtil.retryGetstatusoutput(cmd) if status != 0: raise Exception(f"Encrypt DBStor password failed:\n{cipher}") -- Gitee From 6d99c31f53de55c65b2c7cc7c4c9e1e7e7830a3b Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 26 Jul 2025 16:52:42 +0800 Subject: [PATCH 093/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/common/ParallelBaseOM.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/script/gspylib/common/ParallelBaseOM.py b/script/gspylib/common/ParallelBaseOM.py index 46d68781..855d6fa7 100644 --- a/script/gspylib/common/ParallelBaseOM.py +++ b/script/gspylib/common/ParallelBaseOM.py @@ -577,7 +577,7 @@ class ParallelBaseOM(object): self.sshTool.scpFiles(scpFile, binPath, hostList) self.logger.debug("Successfully encrypted cipher and rand files.") - def genDbstorAuthFile(self) -> str: + def genDbstorAuthFile(self): """ 如果环境变量 ENABLE_DBSTOR=on,则: 1. 从 stdout 提示并读取存储用户名和密码 @@ -601,6 +601,7 @@ class ParallelBaseOM(object): dbstor_home = os.environ.get("DBSTOR_HOME") if not dbstor_home: raise Exception("DBSTOR_HOME environment variable must be set when ENABLE_DBSTOR=on.") + enc_bin = next( (os.path.join(dbstor_home, f) for f in os.listdir(dbstor_home) @@ -621,14 +622,17 @@ class ParallelBaseOM(object): src_binfile = os.path.join(os.getcwd(), "binfile") if not os.path.exists(src_binfile): raise Exception("binfile not produced by kmc_encrypt_only_* in current dir.") + dest_conf = os.path.join(dbstor_home, "conf") dest_kmc = os.path.join(dest_conf, "kmc") - FileUtil.createDirectory(dest_conf, True, DefaultValue.KEY_DIRECTORY_MODE) - if os.path.isdir(dest_kmc): - FileUtil.removeDirectory(dest_kmc) + FileUtil.createDirectory(dest_kmc, True, DefaultValue.KEY_DIRECTORY_MODE) + FileUtil.cpFile(src_binfile, dest_kmc) - FileUtil.removeDirectory(src_binfile) if os.path.isdir(src_binfile) else os.remove(src_binfile) + if os.path.isdir(src_binfile): + FileUtil.removeDirectory(src_binfile) + else: + os.remove(src_binfile) auth_file = os.path.join(dest_kmc, "dbstor_auth.conf") lines = [ -- Gitee From 7fe89440fc079bcdba51e7166137302003bd4bb5 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 26 Jul 2025 17:09:57 +0800 Subject: [PATCH 094/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/common/ParallelBaseOM.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/script/gspylib/common/ParallelBaseOM.py b/script/gspylib/common/ParallelBaseOM.py index 855d6fa7..d8c890d1 100644 --- a/script/gspylib/common/ParallelBaseOM.py +++ b/script/gspylib/common/ParallelBaseOM.py @@ -619,27 +619,26 @@ class ParallelBaseOM(object): raise Exception(f"Encrypt DBStor password failed:\n{cipher}") cipher = cipher.strip() - src_binfile = os.path.join(os.getcwd(), "binfile") - if not os.path.exists(src_binfile): - raise Exception("binfile not produced by kmc_encrypt_only_* in current dir.") - - dest_conf = os.path.join(dbstor_home, "conf") - dest_kmc = os.path.join(dest_conf, "kmc") + src_binfile_dir = os.path.join(os.getcwd(), "binfile") + if not os.path.isdir(src_binfile_dir): + raise Exception("binfile directory not produced by kmc_encrypt_only_* in current dir.") + dest_kmc = os.path.join(dbstor_home, "conf", "kmc") FileUtil.createDirectory(dest_kmc, True, DefaultValue.KEY_DIRECTORY_MODE) - FileUtil.cpFile(src_binfile, dest_kmc) - if os.path.isdir(src_binfile): - FileUtil.removeDirectory(src_binfile) - else: - os.remove(src_binfile) + for fname in os.listdir(src_binfile_dir): + src_path = os.path.join(src_binfile_dir, fname) + dest_path = os.path.join(dest_kmc, fname) + FileUtil.cpFile(src_path, dest_path) + + FileUtil.removeDirectory(src_binfile_dir) auth_file = os.path.join(dest_kmc, "dbstor_auth.conf") lines = [ f"DBSTOR_USER={storage_user}", f"DBSTOR_AUTH_KEY={cipher}" ] - FileUtil.writeFile(auth_file, lines) + FileUtil.writeFile(auth_file, lines, mode="w") FileUtil.changeMode(DefaultValue.SPE_FILE_MODE, auth_file) FileUtil.changeOwner(os.environ.get("USER"), auth_file) -- Gitee From ca56cc51043e54d8a1420052b37e3ecc127b9c0f Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 26 Jul 2025 17:15:45 +0800 Subject: [PATCH 095/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/common/ParallelBaseOM.py | 1 + 1 file changed, 1 insertion(+) diff --git a/script/gspylib/common/ParallelBaseOM.py b/script/gspylib/common/ParallelBaseOM.py index d8c890d1..1b357dbe 100644 --- a/script/gspylib/common/ParallelBaseOM.py +++ b/script/gspylib/common/ParallelBaseOM.py @@ -634,6 +634,7 @@ class ParallelBaseOM(object): FileUtil.removeDirectory(src_binfile_dir) auth_file = os.path.join(dest_kmc, "dbstor_auth.conf") + FileUtil.createFileInSafeMode(auth_file) lines = [ f"DBSTOR_USER={storage_user}", f"DBSTOR_AUTH_KEY={cipher}" -- Gitee From 408d70c4f5768b6dd03ce214cb51022c40415f8c Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 26 Jul 2025 17:47:35 +0800 Subject: [PATCH 096/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py | 1 - 1 file changed, 1 deletion(-) diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index 24233531..3393e17e 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -672,7 +672,6 @@ class DN_OLAP(Kernel): "ENABLE_DBSTOR": "TRUE", "DBSTOR_NAMESPACE": os.getenv("GS_CLUSTER_NAME", ""), "SHARED_PATH": "-", - "_DOUBLEWITE": "FALSE", "ENABLE_DBSTOR_BATCH_FLUSH": "TRUE", }) -- Gitee From cf44b02babe7eecd3a567dcd2f80cf5cd8f12405 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 26 Jul 2025 18:03:06 +0800 Subject: [PATCH 097/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/InitInstance.py | 2 +- script/local/Install.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/script/local/InitInstance.py b/script/local/InitInstance.py index ff479cf8..305d2a45 100644 --- a/script/local/InitInstance.py +++ b/script/local/InitInstance.py @@ -244,7 +244,7 @@ class initDbNode(LocalBaseOM): # config instance in parallel if self.dss_cons: parallelTool.parallelExecute(self.initInstance, self.dss_cons) - if self.dbstor_cons: + if self.dbstor_mode: self.logger.log("Initializing DBStor component ...") parallelTool.parallelExecute(self.initInstance, self.dbstor_cons) diff --git a/script/local/Install.py b/script/local/Install.py index 61abf25b..6b0c580e 100644 --- a/script/local/Install.py +++ b/script/local/Install.py @@ -694,9 +694,12 @@ class Install(LocalBaseOM): dest_addon = os.path.join(dbstor_home, "add-on") if os.path.isdir(src_kmc_shared): FileUtil.createDirectory(dest_addon, True, DefaultValue.KEY_DIRECTORY_MODE) - FileUtil.cpFile(src_kmc_shared, dest_addon) + for fname in os.listdir(src_kmc_shared): + src_path = os.path.join(src_kmc_shared, fname) + dest_path = os.path.join(dest_addon, fname) + FileUtil.cpFile(src_path, dest_path) FileUtil.removeDirectory(src_kmc_shared) - self.logger.debug(f"Moved kmc_shared → {dest_addon}") + self.logger.debug(f"Moved contents of kmc_shared → {dest_addon}") FileUtil.changeOwner(self.user, dbstor_home, True) self.logger.log(f"DBStor client deployed under {dbstor_home} " -- Gitee From c3d7cc204756da1a6cbf614698110b4cb960d01c Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Sat, 26 Jul 2025 18:18:55 +0800 Subject: [PATCH 098/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/common/ParallelBaseOM.py | 1 + 1 file changed, 1 insertion(+) diff --git a/script/gspylib/common/ParallelBaseOM.py b/script/gspylib/common/ParallelBaseOM.py index 1b357dbe..a35814ab 100644 --- a/script/gspylib/common/ParallelBaseOM.py +++ b/script/gspylib/common/ParallelBaseOM.py @@ -635,6 +635,7 @@ class ParallelBaseOM(object): auth_file = os.path.join(dest_kmc, "dbstor_auth.conf") FileUtil.createFileInSafeMode(auth_file) + FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, auth_file) lines = [ f"DBSTOR_USER={storage_user}", f"DBSTOR_AUTH_KEY={cipher}" -- Gitee From f176fab892adfeadc4c0b9b9497693dc21234f64 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 28 Jul 2025 11:13:08 +0800 Subject: [PATCH 099/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/common/LocalBaseOM.py | 6 +++--- script/local/InitInstance.py | 4 +--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/script/gspylib/common/LocalBaseOM.py b/script/gspylib/common/LocalBaseOM.py index 77754b5c..3578c262 100644 --- a/script/gspylib/common/LocalBaseOM.py +++ b/script/gspylib/common/LocalBaseOM.py @@ -115,15 +115,15 @@ class LocalBaseOM(object): self.dss_cons.append(component) def init_dbstor_component(self, dbstor_mode=False): - if not dbstor_mode: + enable_flag = os.getenv("ENABLE_DBSTOR") + if str(enable_flag).strip().lower() not in ("on", "true", "yes", "1"): + self.logger.debug("[DBSTOR] ENABLE_DBSTOR is not set to on, skip DBSTOR initialization") return for _ in self.dbNodeInfo.datanodes: component = DBStor() component.logger = self.logger component.binPath = f"{self.clusterInfo.appPath}/bin" - component.dbstor_mode = dbstor_mode - component.dbstor_config = self.dbstor_config self.dbstor_cons.append(component) def initComponentAttributes(self, component): diff --git a/script/local/InitInstance.py b/script/local/InitInstance.py index 305d2a45..d9300999 100644 --- a/script/local/InitInstance.py +++ b/script/local/InitInstance.py @@ -244,9 +244,7 @@ class initDbNode(LocalBaseOM): # config instance in parallel if self.dss_cons: parallelTool.parallelExecute(self.initInstance, self.dss_cons) - if self.dbstor_mode: - self.logger.log("Initializing DBStor component ...") - parallelTool.parallelExecute(self.initInstance, self.dbstor_cons) + parallelTool.parallelExecute(self.initInstance, self.dbstor_cons) parallelTool.parallelExecute(self.initInstance, components) -- Gitee From 10114a0fa0f597f71d70fb491b6215314111389d Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 28 Jul 2025 11:44:33 +0800 Subject: [PATCH 100/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/component/DBStor/dbstor_comp.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/script/gspylib/component/DBStor/dbstor_comp.py b/script/gspylib/component/DBStor/dbstor_comp.py index 4042bb45..dfecb7bc 100644 --- a/script/gspylib/component/DBStor/dbstor_comp.py +++ b/script/gspylib/component/DBStor/dbstor_comp.py @@ -111,7 +111,7 @@ class DBStor(BaseComponent): for idx, tool_proc in enumerate(range(7, 11), start=1): inst_id, tool_uuid = run_obtains_lsid(cluster_id, node_id, 2, tool_proc) dst = os.path.join(cfg_dir, f"dbstor_config_tool_{idx}.ini") - FileUtil.copyFile(base_cfg, dst) + FileUtil.cpFile(base_cfg, dst) extra_kv = { "DBSTOR_OWNER_NAME": "dbstor", @@ -133,7 +133,7 @@ class DBStor(BaseComponent): "nomlnx" if link_type == "0" else "mlnx", "libxnetlite.so") dst_lib = os.path.join(dbstor_home, "lib", "libxnetlite.so") - FileUtil.copyFile(src_lib, dst_lib) + FileUtil.cpFile(src_lib, dst_lib) gauss_home = EnvUtil.getEnv("GAUSSHOME") if not gauss_home: @@ -144,11 +144,11 @@ class DBStor(BaseComponent): src_node_cfg_name = "node_config_tcp.xml" if link_type == "0" else "node_config_rdma.xml" src_node_cfg = os.path.join(gauss_home, "share", "dbstor", src_node_cfg_name) - FileUtil.copyFile(src_node_cfg, os.path.join(infra_dir, "node_config.xml")) + FileUtil.cpFile(src_node_cfg, os.path.join(infra_dir, "node_config.xml")) # osd.cfg src_osd = os.path.join(gauss_home, "share", "dbstor", "osd.cfg") - FileUtil.copyFile(src_osd, os.path.join(infra_dir, "osd.cfg")) + FileUtil.cpFile(src_osd, os.path.join(infra_dir, "osd.cfg")) self.logger and self.logger.debug( f"[DBStor] LINK_TYPE={link_type}: libxnetlite.so, node_config.xml, osd.cfg copied.") @@ -167,7 +167,7 @@ class DBStor(BaseComponent): src_cfg = os.path.join(dbstor_home, "conf", "dbs", "dbstor_config.ini") dst_cfg = os.path.join(dst_dbs, "dbstor_config.ini") - FileUtil.copyFile(src_cfg, dst_cfg) + FileUtil.cpFile(src_cfg, dst_cfg) update_ini_file(dst_cfg, {"DBSTOR_OWNER_NAME": "cantian"}, DefaultValue.KEY_FILE_MODE, @@ -254,7 +254,7 @@ class DBStor(BaseComponent): src_cfg = os.path.join(dbstor_home, "conf", "dbs", "dbstor_config.ini") dst_cfg = os.path.join(dst_dbs, "dbstor_config.ini") - FileUtil.copyFile(src_cfg, dst_cfg) + FileUtil.cpFile(src_cfg, dst_cfg) update_ini_file(dst_cfg, {"DBSTOR_OWNER_NAME": "cm"}, DefaultValue.KEY_FILE_MODE, self.logger) @@ -263,11 +263,11 @@ class DBStor(BaseComponent): src_cfg_name = "node_config_tcp_cm.xml" if link_type == "0" else "node_config_rdma_cm.xml" src_cfg_path = os.path.join(gauss_home, "share", "dbstor", src_cfg_name) dst_cfg_path = os.path.join(dst_infra, "node_config.xml") - FileUtil.copyFile(src_cfg_path, dst_cfg_path) + FileUtil.cpFile(src_cfg_path, dst_cfg_path) src_osd = os.path.join(gauss_home, "share", "dbstor", "osd.cfg") dst_osd = os.path.join(dst_infra, "osd.cfg") - FileUtil.copyFile(src_osd, dst_osd) + FileUtil.cpFile(src_osd, dst_osd) self.logger and self.logger.debug(f"Synced CM dbstor conf → {dst_conf}") -- Gitee From 8bc62a414d2e5770981776e5f31173e611129fe8 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 28 Jul 2025 13:00:40 +0800 Subject: [PATCH 101/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/common/ParallelBaseOM.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/script/gspylib/common/ParallelBaseOM.py b/script/gspylib/common/ParallelBaseOM.py index a35814ab..f95d336c 100644 --- a/script/gspylib/common/ParallelBaseOM.py +++ b/script/gspylib/common/ParallelBaseOM.py @@ -644,4 +644,18 @@ class ParallelBaseOM(object): FileUtil.changeMode(DefaultValue.SPE_FILE_MODE, auth_file) FileUtil.changeOwner(os.environ.get("USER"), auth_file) + + hostList = [dn.name for dn in self.clusterInfo.dbNodes] + local_host = socket.gethostname() + if local_host in hostList: + hostList.remove(local_host) + + if hostList and not self.isSingle: + self.logger.debug(f"Distributing DBStor kmc files to: {hostList}") + + for fname in os.listdir(dest_kmc): + src = os.path.join(dest_kmc, fname) + self.sshTool.scpFiles(src, dest_kmc, hostList) + self.logger.debug("Successfully distributed DBStor kmc content.") + self.logger.debug(f"DBStor auth written to {auth_file}") \ No newline at end of file -- Gitee From 9f99ea9194778e7635a553ba063bf596127ef1d9 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 28 Jul 2025 13:07:27 +0800 Subject: [PATCH 102/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/component/DBStor/dbstor_comp.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/script/gspylib/component/DBStor/dbstor_comp.py b/script/gspylib/component/DBStor/dbstor_comp.py index dfecb7bc..0a2302af 100644 --- a/script/gspylib/component/DBStor/dbstor_comp.py +++ b/script/gspylib/component/DBStor/dbstor_comp.py @@ -210,6 +210,12 @@ class DBStor(BaseComponent): storage_user = auth_kv.get("DBSTOR_USER", "") cipher = auth_kv.get("DBSTOR_AUTH_KEY", "") + # 从 GAUSSLOG 环境变量拼接绝对日志路径 + gausslog = EnvUtil.getEnv("GAUSSLOG") or os.getenv("GAUSSLOG") + if not gausslog: + raise Exception("GAUSSLOG environment variable must be set for DBS_LOG_PATH") + dbs_log_path = os.path.join(gausslog, "dbstor") + kv = { "NAMESPACE_FSNAME": fs_names[0], "NAMESPACE_PAGE_FSNAME": fs_names[1], @@ -225,7 +231,7 @@ class DBStor(BaseComponent): "NODE_ID": str(node_id), "CLUSTER_ID": str(cluster_id), "DPU_UUID": dpu_uuid, - "DBS_LOG_PATH": "${GAUSSLOG}/dbstor", + "DBS_LOG_PATH": dbs_log_path, "USER_NAME": storage_user, "PASSWORD": cipher } -- Gitee From 9792591b714bffbde2839163789b908ea7f4068c Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 28 Jul 2025 13:28:13 +0800 Subject: [PATCH 103/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/common/ParallelBaseOM.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/script/gspylib/common/ParallelBaseOM.py b/script/gspylib/common/ParallelBaseOM.py index f95d336c..d4abbfbd 100644 --- a/script/gspylib/common/ParallelBaseOM.py +++ b/script/gspylib/common/ParallelBaseOM.py @@ -648,14 +648,13 @@ class ParallelBaseOM(object): hostList = [dn.name for dn in self.clusterInfo.dbNodes] local_host = socket.gethostname() if local_host in hostList: - hostList.remove(local_host) + hostList.remove(local_host) if hostList and not self.isSingle: - self.logger.debug(f"Distributing DBStor kmc files to: {hostList}") - - for fname in os.listdir(dest_kmc): - src = os.path.join(dest_kmc, fname) - self.sshTool.scpFiles(src, dest_kmc, hostList) - self.logger.debug("Successfully distributed DBStor kmc content.") + mkdir_cmd = f"mkdir -p {dest_kmc}" + self.sshTool.executeCommand(mkdir_cmd, hostList=hostList) + parent_dir = os.path.dirname(dest_kmc) + self.sshTool.scpFiles(dest_kmc, parent_dir, hostList) + self.logger.debug(f"Distributed kmc directory to nodes: {hostList}") self.logger.debug(f"DBStor auth written to {auth_file}") \ No newline at end of file -- Gitee From 1cb87a27b64b99a8ddf1c61e7934e4bfb5ab1b94 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 28 Jul 2025 14:23:25 +0800 Subject: [PATCH 104/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/PreInstallUtility.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py index b58b9e06..23dc3fe4 100644 --- a/script/local/PreInstallUtility.py +++ b/script/local/PreInstallUtility.py @@ -1872,6 +1872,7 @@ Common options: if enable_dbstor_flag != "on": self.logger.debug("[DBStor] enable_dbstor=off; skip DBStor env export") return + host = NetUtil.GetHostIpOrName() link_type = ClusterConfigFile.getOneClusterConfigItem( "link_type", self.clusterConfig).strip() or "0" @@ -1880,11 +1881,18 @@ Common options: dbstor_home_cfg = ClusterConfigFile.getOneClusterConfigItem( "dbstor_home", self.clusterConfig).strip() + all_nodes = [dn.name for dn in self.clusterInfo.dbNodes] + try: + node_index = all_nodes.index(host) + except ValueError: + node_index = 0 + for prof in (userProfile, self.user_env_file): FileUtil.deleteLine(prof, r"^\s*export\s*ENABLE_DBSTOR=.*$") FileUtil.deleteLine(prof, r"^\s*export\s*LINK_TYPE=.*$") FileUtil.deleteLine(prof, r"^\s*export\s*STORAGE_VLAN_IP=.*$") FileUtil.deleteLine(prof, r"^\s*export\s*DBSTOR_HOME=.*$") + FileUtil.deleteLine(prof, r"^\s*export\s*NODE_ID=.*$") FileUtil.deleteLine(prof, r"^\s*export\s*LD_LIBRARY_PATH=.*\$DBSTOR_HOME[^ ]*.*$") FileUtil.writeFile(prof, [ @@ -1892,7 +1900,8 @@ Common options: f"export LINK_TYPE={link_type}", f"export STORAGE_VLAN_IP={storage_vlan_ip}", f"export DBSTOR_HOME={dbstor_home_cfg}", - r"export LD_LIBRARY_PATH=$DBSTOR_HOME/add-on:$DBSTOR_HOME/lib:$LD_LIBRARY_PATH" + r"export LD_LIBRARY_PATH=$DBSTOR_HOME/add-on:$DBSTOR_HOME/lib:$LD_LIBRARY_PATH", + f"export NODE_ID={node_index}" ]) self.logger.debug( @@ -1900,7 +1909,6 @@ Common options: f"STORAGE_VLAN_IP={storage_vlan_ip}" ) - host = NetUtil.GetHostIpOrName() root = ClusterConfigFile.initParserXMLFile(self.clusterConfig) status, vlan_str = ClusterConfigFile.readOneClusterConfigItem( root, -- Gitee From df74e6ff3c7974b7182eed6a82dec48844165b15 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 28 Jul 2025 14:45:01 +0800 Subject: [PATCH 105/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/component/DBStor/dbstor_comp.py | 1 + 1 file changed, 1 insertion(+) diff --git a/script/gspylib/component/DBStor/dbstor_comp.py b/script/gspylib/component/DBStor/dbstor_comp.py index 0a2302af..02049614 100644 --- a/script/gspylib/component/DBStor/dbstor_comp.py +++ b/script/gspylib/component/DBStor/dbstor_comp.py @@ -215,6 +215,7 @@ class DBStor(BaseComponent): if not gausslog: raise Exception("GAUSSLOG environment variable must be set for DBS_LOG_PATH") dbs_log_path = os.path.join(gausslog, "dbstor") + FileUtil.createDirectory(dbs_log_path, DefaultValue.KEY_DIRECTORY_MODE) kv = { "NAMESPACE_FSNAME": fs_names[0], -- Gitee From 1fb1ad8aea5886459311b958d26b0efb276e8f72 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 28 Jul 2025 15:06:56 +0800 Subject: [PATCH 106/144] =?UTF-8?q?OM=E5=AF=B9=E6=8E=A5Dbstor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../gspylib/component/DBStor/dbstor_comp.py | 30 ++++++++++++++----- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/script/gspylib/component/DBStor/dbstor_comp.py b/script/gspylib/component/DBStor/dbstor_comp.py index 02049614..b8397ab7 100644 --- a/script/gspylib/component/DBStor/dbstor_comp.py +++ b/script/gspylib/component/DBStor/dbstor_comp.py @@ -162,18 +162,34 @@ class DBStor(BaseComponent): dst_base = os.path.join(ctdb_home, "dbstor") dst_conf = os.path.join(dst_base, "conf") dst_dbs = os.path.join(dst_conf, "dbs") - for p in (dst_base, dst_conf, dst_dbs): + dst_infra = os.path.join(dst_conf, "infra") + for p in (dst_base, dst_conf, dst_dbs, dst_infra): FileUtil.createDirectory(p, DefaultValue.KEY_DIRECTORY_MODE) src_cfg = os.path.join(dbstor_home, "conf", "dbs", "dbstor_config.ini") - dst_cfg = os.path.join(dst_dbs, "dbstor_config.ini") + dst_cfg = os.path.join(dst_dbs, "dbstor_config.ini") FileUtil.cpFile(src_cfg, dst_cfg) - update_ini_file(dst_cfg, - {"DBSTOR_OWNER_NAME": "cantian"}, - DefaultValue.KEY_FILE_MODE, - self.logger) + update_ini_file( + dst_cfg, + {"DBSTOR_OWNER_NAME": "cantian"}, + DefaultValue.KEY_FILE_MODE, + self.logger + ) + + src_infra_dir = os.path.join(dbstor_home, "conf", "infra") + if os.path.isdir(src_infra_dir): + for fname in os.listdir(src_infra_dir): + src = os.path.join(src_infra_dir, fname) + dst = os.path.join(dst_infra, fname) + FileUtil.cpFile(src, dst) + else: + self.logger and self.logger.debug( + f"No infra directory at {src_infra_dir}; skipping infra sync." + ) - self.logger and self.logger.debug(f"Synced Cantian dbstor conf → {dst_cfg}") + self.logger and self.logger.debug( + f"Synced Cantian dbstor conf → {dst_cfg} and infra files → {dst_infra}" + ) def _update_dbstor_config(self, cfg_file: str) -> None: -- Gitee From 5039cdfe341746788c6e5607d27f1285b04b79ab Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 28 Jul 2025 16:05:24 +0800 Subject: [PATCH 107/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/component/CM/CM.py | 10 +-- .../gspylib/component/CM/CM_OLAP/CM_OLAP.py | 66 +++++++------------ 2 files changed, 30 insertions(+), 46 deletions(-) diff --git a/script/gspylib/component/CM/CM.py b/script/gspylib/component/CM/CM.py index 60224c57..3f90b654 100644 --- a/script/gspylib/component/CM/CM.py +++ b/script/gspylib/component/CM/CM.py @@ -40,14 +40,14 @@ class CM(BaseComponent): class CmResAttr(): - def __init__(self, script, res_type='APP'): + def __init__(self, script, res_type='DN'): self.resources_type = res_type self.script = script self.check_interval = 1 - self.time_out = 1 - self.restart_times = 3 - self.restart_delay = 45 - self.restart_period = 45 + self.time_out = 120 + self.restart_times = 5 + self.restart_delay = 1 + self.restart_period = 1 def __str__(self): return str(vars(self)).replace(":", '=').replace('\'', '').replace( diff --git a/script/gspylib/component/CM/CM_OLAP/CM_OLAP.py b/script/gspylib/component/CM/CM_OLAP/CM_OLAP.py index 4ad42f26..a6a50dd5 100644 --- a/script/gspylib/component/CM/CM_OLAP/CM_OLAP.py +++ b/script/gspylib/component/CM/CM_OLAP/CM_OLAP.py @@ -845,51 +845,35 @@ class CM_OLAP(CM): def get_init_cm_cmd(self): user = pwd.getpwuid(os.getuid()).pw_name gauss_home = EnvUtil.getEnvironmentParameterValue('GAUSSHOME', user) - dss_home = EnvUtil.getEnvironmentParameterValue('DSS_HOME', user) - - # Cantian 数据目录:${CTDB_HOME}/data - ctdb_home = EnvUtil.getEnvironmentParameterValue('CTDB_HOME', user) - if not ctdb_home: - raise EnvironmentError("CTDB_HOME is not set – cannot locate Cantian data directory.") - cantian_data = os.path.join(ctdb_home, "data") - + dss_home = EnvUtil.getEnvironmentParameterValue('DSS_HOME', user) # not use realpath - dms_contrl = os.path.join(gauss_home, "bin/cantian_contrl.sh") - dss_contrl = os.path.join(gauss_home, "bin/dss_contrl.sh") - - cmd_list = [ - str(CmResCtrlCmd(name="cantian", attr=CmResAttr(dms_contrl))), - str(CmResCtrlCmd(name="dss", attr=CmResAttr(dss_contrl, res_type="APP"))) + dms_contrl = os.path.join(gauss_home, 'bin/dms_contrl.sh') + dss_contrl = os.path.join(gauss_home, 'bin/dss_contrl.sh') + + cmd = [ + str(CmResCtrlCmd(name='dms_res', attr=CmResAttr(dms_contrl))), + str( + CmResCtrlCmd(name='dss', + attr=CmResAttr(dss_contrl, res_type='APP'))) ] - # 提前解码 dss_nodes_list,避免循环里重复解码 - dss_nodes_list = DssConfig.get_value_b64_handler( - "dss_nodes_list", self.dss_config, action="decode" - ) - for db_inst in self.cluster_info.dbNodes: - dss_id = DssInst.get_current_dss_id(dss_home, db_inst, dss_nodes_list) - - cmd_list.append(str(CmResCtrlCmd( - action="edit", name="dss", - attr=DssInstAttr( - node_id=db_inst.id, - dss_id=dss_id, - dss_home=f"{dss_home};{db_inst.datanodes[0].datadir}" - ) - ))) - - cmd_list.append(str(CmResCtrlCmd( - action="edit", name="cantian", - attr=DssInstAttr( - node_id=db_inst.id, - dss_id=dss_id, - dss_home=f"{dss_home};{cantian_data}" - ) - ))) - - joined_cmd = " ;".join(cmd_list) - return f"source {EnvUtil.getMpprcFile()}; {joined_cmd}" + cmd.append( + str( + CmResCtrlCmd(action='edit', + name='dss', + attr=DssInstAttr( + node_id=db_inst.id, + dss_id=DssInst.get_current_dss_id( + dss_home, db_inst, + DssConfig.get_value_b64_handler( + 'dss_nodes_list', + self.dss_config, + action='decode')), + dss_home="{};{}".format( + dss_home, + db_inst.datanodes[0].datadir))))) + return "source {}; {}".format(EnvUtil.getMpprcFile(), ' ;'.join(cmd)) def init_cm_res_json(self, rm_cm_json=True): cm_resource = os.path.realpath( -- Gitee From 7c54643c81010b91c0ba9dfa66a33f0e8021ac19 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 28 Jul 2025 16:55:01 +0800 Subject: [PATCH 108/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/base_utils/template/cluster_tmp.xml | 2 +- script/base_utils/template/resource_en.json | 2 +- script/base_utils/template/resource_zh.json | 2 +- script/base_utils/template/xml_status.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/script/base_utils/template/cluster_tmp.xml b/script/base_utils/template/cluster_tmp.xml index 33442dc6..5ce3203d 100644 --- a/script/base_utils/template/cluster_tmp.xml +++ b/script/base_utils/template/cluster_tmp.xml @@ -19,7 +19,7 @@ - + diff --git a/script/base_utils/template/resource_en.json b/script/base_utils/template/resource_en.json index df3d9291..3abee9dd 100644 --- a/script/base_utils/template/resource_en.json +++ b/script/base_utils/template/resource_en.json @@ -45,7 +45,7 @@ "intput_dbstor_home": "Please enter dbstor_home dir (default:/opt/openGauss/install/dbstor_home)", "input_cantian_vlan_ip": "Enter cantian_vlan_ip (nodes separated by ';', multi-IP with '|', e.g. 10.10.10.1|11.11.11.1;10.10.10.2|11.11.11.2):", "cantian_vlan_ip_empty": "cantian_vlan_ip cannot be empty", - "input_storage_vlan_ip": "Enter storage_vlan_ip (multi-VLAN '|', multi-IP ',', e.g. 10.10.10.111,10.10.10.112|11.11.11.111,11.11.11.112):", + "input_storage_vlan_ip": "Enter storage_vlan_ip (multi-VLAN '|', multi-IP ',', e.g. 10.10.10.111;10.10.10.112|11.11.11.111;11.11.11.112):", "storage_vlan_ip_empty": "storage_vlan_ip cannot be empty", "input_link_type": "LINK_TYPE (0 = TCP, 1 = RDMA) [default 0]:", "link_type_invalid": "Only 0 or 1 is allowed!", diff --git a/script/base_utils/template/resource_zh.json b/script/base_utils/template/resource_zh.json index 5071aa80..b4a7ea2e 100644 --- a/script/base_utils/template/resource_zh.json +++ b/script/base_utils/template/resource_zh.json @@ -45,7 +45,7 @@ "intput_dbstor_home": "请输入 'dbstor_home' 的路径信息(默认:/opt/openGauss/install/dbstor_home)", "input_cantian_vlan_ip": "请输入 cantian_vlan_ip(节点用 ; 分隔,同节点多IP用 |,示例: 10.10.10.1|11.11.11.1;10.10.10.2|11.11.11.2):", "cantian_vlan_ip_empty": "cantian_vlan_ip 不能为空", - "input_storage_vlan_ip": "请输入 storage_vlan_ip(多VLAN用 |,同VLAN多IP用,示例: 10.10.10.111,10.10.10.112|11.11.11.111,11.11.11.112):", + "input_storage_vlan_ip": "请输入 storage_vlan_ip(多VLAN用 |,同VLAN多IP用,示例: 10.10.10.111;10.10.10.112|11.11.11.111;11.11.11.112):", "storage_vlan_ip_empty": "storage_vlan_ip 不能为空", "input_link_type": "请输入 LINK_TYPE(0=TCP,1=RDMA,默认0):", "link_type_invalid": "仅允许输入 0 或 1!", diff --git a/script/base_utils/template/xml_status.py b/script/base_utils/template/xml_status.py index f1ed137f..f33453ec 100644 --- a/script/base_utils/template/xml_status.py +++ b/script/base_utils/template/xml_status.py @@ -590,7 +590,7 @@ class StorageVlanIpStatus(TemplateStatus): if not user_input: GaussLog.printMessage(XmlConstant.RESOURCE_DATA.get('storage_vlan_ip_empty')) continue - if not check_ip_groups(user_input, '|', ','): + if not check_ip_groups(user_input, '|', ';'): continue XmlConstant.STORAGE_VLAN_IP = user_input return LinkTypeStatus() -- Gitee From c5fea4462b2816fe636692260084688e0960a3c2 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 28 Jul 2025 17:03:38 +0800 Subject: [PATCH 109/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/component/DBStor/dbstor_comp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/gspylib/component/DBStor/dbstor_comp.py b/script/gspylib/component/DBStor/dbstor_comp.py index b8397ab7..77c9813c 100644 --- a/script/gspylib/component/DBStor/dbstor_comp.py +++ b/script/gspylib/component/DBStor/dbstor_comp.py @@ -242,7 +242,7 @@ class DBStor(BaseComponent): "PAGE_VSTOR": "0", "ARCHIVE_VSTOR": "0", "IS_CONTAINER": "0", - "LINKE_TYPE": link_type, + "LINK_TYPE": link_type, "LOCAL_IP": local_ip, "REMOTE_IP": storage_ip, "NODE_ID": str(node_id), -- Gitee From 81738b1a3c1da84449543bf4eccc523596f91d8d Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 28 Jul 2025 17:47:41 +0800 Subject: [PATCH 110/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../gspylib/component/DBStor/dbstor_comp.py | 30 +++++-------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/script/gspylib/component/DBStor/dbstor_comp.py b/script/gspylib/component/DBStor/dbstor_comp.py index 77c9813c..71028077 100644 --- a/script/gspylib/component/DBStor/dbstor_comp.py +++ b/script/gspylib/component/DBStor/dbstor_comp.py @@ -162,34 +162,18 @@ class DBStor(BaseComponent): dst_base = os.path.join(ctdb_home, "dbstor") dst_conf = os.path.join(dst_base, "conf") dst_dbs = os.path.join(dst_conf, "dbs") - dst_infra = os.path.join(dst_conf, "infra") - for p in (dst_base, dst_conf, dst_dbs, dst_infra): + for p in (dst_base, dst_conf, dst_dbs): FileUtil.createDirectory(p, DefaultValue.KEY_DIRECTORY_MODE) src_cfg = os.path.join(dbstor_home, "conf", "dbs", "dbstor_config.ini") - dst_cfg = os.path.join(dst_dbs, "dbstor_config.ini") + dst_cfg = os.path.join(dst_dbs, "dbstor_config.ini") FileUtil.cpFile(src_cfg, dst_cfg) - update_ini_file( - dst_cfg, - {"DBSTOR_OWNER_NAME": "cantian"}, - DefaultValue.KEY_FILE_MODE, - self.logger - ) - - src_infra_dir = os.path.join(dbstor_home, "conf", "infra") - if os.path.isdir(src_infra_dir): - for fname in os.listdir(src_infra_dir): - src = os.path.join(src_infra_dir, fname) - dst = os.path.join(dst_infra, fname) - FileUtil.cpFile(src, dst) - else: - self.logger and self.logger.debug( - f"No infra directory at {src_infra_dir}; skipping infra sync." - ) + update_ini_file(dst_cfg, + {"DBSTOR_OWNER_NAME": "cantian"}, + DefaultValue.KEY_FILE_MODE, + self.logger) - self.logger and self.logger.debug( - f"Synced Cantian dbstor conf → {dst_cfg} and infra files → {dst_infra}" - ) + self.logger and self.logger.debug(f"Synced Cantian dbstor conf → {dst_cfg}") def _update_dbstor_config(self, cfg_file: str) -> None: -- Gitee From 2d53db852260b6c3fae1cc0314a7a559314bbcdd Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 28 Jul 2025 18:59:39 +0800 Subject: [PATCH 111/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/PreInstallUtility.py | 46 +++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py index 23dc3fe4..15b0baf2 100644 --- a/script/local/PreInstallUtility.py +++ b/script/local/PreInstallUtility.py @@ -1813,6 +1813,8 @@ Common options: self._set_share_fs_env(userProfile) self._set_enable_mysql_env(userProfile) self._set_dbstor_env(userProfile) + self._set_cm_env(userProfile) + self._set_dss_env(userProfile) except Exception as e: self.logger.logExit(str(e)) @@ -1859,6 +1861,50 @@ Common options: f"MYSQL_HOME={mysql_home}" ) + def _set_cm_env(self, userProfile: str) -> None: + """ + 如果 XML 中 cmsNum >= 1,则 ENABLE_CM=on。 + """ + root = ClusterConfigFile.initParserXMLFile(self.clusterConfig) + + enable_cm = False + for dn in self.clusterInfo.dbNodes: + status, cms_str = ClusterConfigFile.readOneClusterConfigItem( + root, "cmsNum", "node", dn.name + ) + if status == 0 and cms_str: + try: + if int(cms_str) >= 1: + enable_cm = True + break + except ValueError: + continue + + if enable_cm: + for prof in (userProfile, self.user_env_file): + FileUtil.deleteLine(prof, r"^\s*export\s*ENABLE_CM=.*$") + FileUtil.writeFile(prof, ["export ENABLE_CM=on"]) + self.logger.debug("Flush ENABLE_CM=on") + else: + self.logger.debug("No cmsNum>=1 found; skip ENABLE_CM") + + + def _set_dss_env(self, userProfile: str) -> None: + """ + enable_dss=on + """ + enable_dss_flag = ClusterConfigFile.getOneClusterConfigItem( + "enable_dss", self.clusterConfig + ).strip().lower() or "off" + + if enable_dss_flag == "on": + for prof in (userProfile, self.user_env_file): + FileUtil.deleteLine(prof, r"^\s*export\s*ENABLE_DSS=.*$") + FileUtil.writeFile(prof, ["export ENABLE_DSS=on"]) + self.logger.debug("Flush ENABLE_DSS=on") + else: + self.logger.debug("enable_dss=off; skip ENABLE_DSS") + def _set_dbstor_env(self, userProfile: str) -> None: """ enable_dbstor = on 时写入: -- Gitee From def792dc7d7d1790d8294f8fef9b9001c5955b60 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 28 Jul 2025 19:03:10 +0800 Subject: [PATCH 112/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/component/Kernel/Kernel.py | 8 +++++--- script/gspylib/component/MySql/mysql_comp.py | 11 +++++++++-- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/script/gspylib/component/Kernel/Kernel.py b/script/gspylib/component/Kernel/Kernel.py index c8ebc40a..01c8721c 100644 --- a/script/gspylib/component/Kernel/Kernel.py +++ b/script/gspylib/component/Kernel/Kernel.py @@ -81,9 +81,11 @@ class Kernel(BaseComponent): # 临时适配,后面需要调整 self.adjust_cm_bitmap() - # if is_dss_mode: - # self.logger.debug("DSS mode start skipped.") - # return + # 存在CM则通过CM拉起 + enable_cm = os.getenv("ENABLE_CM", "").strip().lower() + if enable_cm == "on": + self.logger.debug("[MySQL] ENABLE_CM=on; skip start().") + return # 单进程模式:直接跳过,后面通过mysql拉起 single_flag = os.getenv("CANTIAN_MYSQL_SINGLE", "").strip().lower() diff --git a/script/gspylib/component/MySql/mysql_comp.py b/script/gspylib/component/MySql/mysql_comp.py index b05be15a..7ed72c31 100644 --- a/script/gspylib/component/MySql/mysql_comp.py +++ b/script/gspylib/component/MySql/mysql_comp.py @@ -51,8 +51,10 @@ class Mysql(BaseComponent): return cluster.dbNodes[0].datanodes[0].datadir def initInstance(self): - # CM 场景通过CM初始化Mysql - return + enable_cm = os.getenv("ENABLE_CM", "").strip().lower() + if enable_cm == "on": + self.logger.debug("[MySQL] ENABLE_CM=on; skip initInstance().") + return enable_flag = os.getenv("ENABLE_MYSQL", "").strip().lower() # 单进程才需要自动拉起mysql,双进程需要手动拉起mysql @@ -119,6 +121,11 @@ class Mysql(BaseComponent): """ 启动 MySQL """ + enable_cm = os.getenv("ENABLE_CM", "").strip().lower() + if enable_cm == "on": + self.logger.debug("[MySQL] ENABLE_CM=on; skip start().") + return + enable_flag = os.getenv("ENABLE_MYSQL", "").strip().lower() single_flag = os.getenv("CANTIAN_MYSQL_SINGLE", "").strip().lower() -- Gitee From c5d82506b1f46d2f3fbf8ddc93dd280a6cf3bffe Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 28 Jul 2025 19:41:40 +0800 Subject: [PATCH 113/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/InitInstance.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/script/local/InitInstance.py b/script/local/InitInstance.py index d9300999..f07ce8b5 100644 --- a/script/local/InitInstance.py +++ b/script/local/InitInstance.py @@ -244,7 +244,8 @@ class initDbNode(LocalBaseOM): # config instance in parallel if self.dss_cons: parallelTool.parallelExecute(self.initInstance, self.dss_cons) - parallelTool.parallelExecute(self.initInstance, self.dbstor_cons) + if self.dbstor_cons: + parallelTool.parallelExecute(self.initInstance, self.dbstor_cons) parallelTool.parallelExecute(self.initInstance, components) -- Gitee From 57ba4de4c8620b35806cb9060f1ab53495a03131 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 28 Jul 2025 20:15:55 +0800 Subject: [PATCH 114/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/PreInstallUtility.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py index 15b0baf2..68eba29d 100644 --- a/script/local/PreInstallUtility.py +++ b/script/local/PreInstallUtility.py @@ -1899,11 +1899,11 @@ Common options: if enable_dss_flag == "on": for prof in (userProfile, self.user_env_file): - FileUtil.deleteLine(prof, r"^\s*export\s*ENABLE_DSS=.*$") - FileUtil.writeFile(prof, ["export ENABLE_DSS=on"]) - self.logger.debug("Flush ENABLE_DSS=on") + FileUtil.deleteLine(prof, r"^\s*export\s*DSS_ENABLE=.*$") + FileUtil.writeFile(prof, ["export DSS_ENABLE=on"]) + self.logger.debug("Flush DSS_ENABLE=on") else: - self.logger.debug("enable_dss=off; skip ENABLE_DSS") + self.logger.debug("enable_dss=off; skip DSS_ENABLE") def _set_dbstor_env(self, userProfile: str) -> None: """ -- Gitee From be5151634cb96127fb2e00a069492182e1b8a83d Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 28 Jul 2025 21:39:29 +0800 Subject: [PATCH 115/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../gspylib/component/Kernel/DN_OLAP/DN_OLAP.py | 15 +++++++++------ script/local/PreInstallUtility.py | 4 ++-- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index 3393e17e..c32f30f0 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -138,20 +138,21 @@ class DN_OLAP(Kernel): def _patch_create_sql_paths(self, sql_file: str) -> None: """ - 将 create_database.sql 中出现的 'dbfiles1' 统一替换: - • DSS 模式 → '+' - . dbstor 模式 待定 - • nfs 模式 → '/data' + 将 create_database.sql 中出现的 'dbfiles1' 替换为: + • DSS 模式 → '+' + • DBStor 模式 → 跳过替换 + • NFS 模式 → '/data' """ if not os.path.exists(sql_file): raise FileNotFoundError(sql_file) perm = int(FileUtil.get_permission_value(sql_file), 8) + enable_dbstor = os.getenv("ENABLE_DBSTOR", "off").lower() == "on" if self.dss_mode: vg = self._resolve_dss_vg() target = f"+{vg}" - elif self.dbstor_mode: + elif enable_dbstor: return else: ctdb_home = os.getenv("CTDB_HOME") @@ -685,13 +686,15 @@ class DN_OLAP(Kernel): def getDssCantianDict(self) -> dict: params = {"DTC_INTERCONNECT_URL": self.get_ss_inter_url()} + enable_dbstor = os.getenv("ENABLE_DBSTOR", "off").lower() == "on" + if self.dss_mode: vg = self._resolve_dss_vg() self.logger.debug(f"[DSS] resolved private VG = {vg}") params["CONTROL_FILES"] = ( f'(+{vg}/ctrl1, +{vg}/ctrl2, +{vg}/ctrl3)' ) - elif self.dbstor_mode: + elif enable_dbstor: params["CONTROL_FILES"] = "(-ctrl1, -ctrl2, -ctrl3)" else: ctdb_home = os.getenv("CTDB_HOME") diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py index 68eba29d..f3e06564 100644 --- a/script/local/PreInstallUtility.py +++ b/script/local/PreInstallUtility.py @@ -1944,7 +1944,7 @@ Common options: FileUtil.writeFile(prof, [ "export ENABLE_DBSTOR=on", f"export LINK_TYPE={link_type}", - f"export STORAGE_VLAN_IP={storage_vlan_ip}", + f'export STORAGE_VLAN_IP="{storage_vlan_ip}"', f"export DBSTOR_HOME={dbstor_home_cfg}", r"export LD_LIBRARY_PATH=$DBSTOR_HOME/add-on:$DBSTOR_HOME/lib:$LD_LIBRARY_PATH", f"export NODE_ID={node_index}" @@ -1967,7 +1967,7 @@ Common options: vlan_ip = ",".join(ips) for prof in (userProfile, self.user_env_file): FileUtil.deleteLine(prof, r"^\s*export\s*CANTIAN_VLAN_IP=.*$") - FileUtil.writeFile(prof, [f"export CANTIAN_VLAN_IP={vlan_ip}"]) + FileUtil.writeFile(prof, [f'export CANTIAN_VLAN_IP="{vlan_ip}"']) self.logger.debug(f"Flush CANTIAN_VLAN_IP={vlan_ip}") else: self.logger.debug("cantian_vlan_ip empty or not defined for this node; skip env export.") -- Gitee From 48c02911c26c9bc7fb3b39cba39b75669a9bfb9c Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Tue, 29 Jul 2025 10:46:02 +0800 Subject: [PATCH 116/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/impl/preinstall/PreinstallImpl.py | 2 +- script/local/PreInstallUtility.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/script/impl/preinstall/PreinstallImpl.py b/script/impl/preinstall/PreinstallImpl.py index ead6ea97..f7e193f1 100644 --- a/script/impl/preinstall/PreinstallImpl.py +++ b/script/impl/preinstall/PreinstallImpl.py @@ -1801,7 +1801,7 @@ class PreinstallImpl: # set arm optimization self.setArmOptimization() # fix server package mode - # self.fixServerPackageOwner() + self.fixServerPackageOwner() # cantian 暂时忽略 # unreg the disk of the dss and about self.dss_init() diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py index f3e06564..c4994322 100644 --- a/script/local/PreInstallUtility.py +++ b/script/local/PreInstallUtility.py @@ -3163,7 +3163,8 @@ Common options: :return: """ self.fix_owner_and_permission() - self.backup_om_scripts() + # cantian 没有使用gs_cgroup,直接去掉 + # self.backup_om_scripts() def dss_init(self): ''' -- Gitee From 5858d989eb3ae5c7766b779aed8704eff445b541 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Tue, 29 Jul 2025 11:05:28 +0800 Subject: [PATCH 117/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/PreInstallUtility.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py index c4994322..a2822c78 100644 --- a/script/local/PreInstallUtility.py +++ b/script/local/PreInstallUtility.py @@ -3042,7 +3042,7 @@ Common options: FileUtil.changeMode(DefaultValue.KEY_DIRECTORY_MODE, toolPath, recursive=True) script_dir = f"{toolPath}/script" - FileUtil.changeMode(DefaultValue.SPE_FILE_MODE, + FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, script_dir, recursive=True) FileUtil.changeMode(DefaultValue.BIN_FILE_MODE, "%s/*.sha256" % toolPath) FileUtil.changeMode(DefaultValue.BIN_FILE_MODE, "%s/*.tar.gz" % toolPath) -- Gitee From 7263e937681b98f4489d42143e5046616b801b84 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Tue, 29 Jul 2025 15:56:25 +0800 Subject: [PATCH 118/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/Install.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/script/local/Install.py b/script/local/Install.py index 6b0c580e..178d950f 100644 --- a/script/local/Install.py +++ b/script/local/Install.py @@ -397,12 +397,15 @@ class Install(LocalBaseOM): self.check_clib_bin(clib_bin, bin_) app_bin = os.path.realpath(os.path.join(dss_app, bin_)) if os.path.isfile(clib_bin): - mv_cmd = r'\mv {0} {1}'.format(clib_bin, app_bin) - status, output = subprocess.getstatusoutput(mv_cmd) + cp_cmd = rf'\cp -f {clib_bin} {app_bin}' + status, output = subprocess.getstatusoutput(cp_cmd) if status != 0: - raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % mv_cmd + + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cp_cmd + "Error:\n%s" % output) + FileUtil.changeMode(DefaultValue.BIN_FILE_MODE, app_bin) + FileUtil.change_caps(f"{DefaultValue.CAP_ADM},{DefaultValue.CAP_WIO}", app_bin) + link_cmd = 'ln -snf {0}/perctrl {1}'.format(dss_app, bin_path) self.logger.debug(f"The cmd of the link: {link_cmd}.") status, output = subprocess.getstatusoutput(link_cmd) -- Gitee From 99b8cb06e25dedeaeafb75e0119f77b12154b388 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Tue, 29 Jul 2025 19:44:59 +0800 Subject: [PATCH 119/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/component/Kernel/Kernel.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/script/gspylib/component/Kernel/Kernel.py b/script/gspylib/component/Kernel/Kernel.py index 01c8721c..47e5b3cd 100644 --- a/script/gspylib/component/Kernel/Kernel.py +++ b/script/gspylib/component/Kernel/Kernel.py @@ -450,6 +450,17 @@ class Kernel(BaseComponent): """ self.logger.log("Cleaning instance.") + # 防止在要删除的没有了下面 + gausshome = os.getenv("GAUSSHOME", "").strip() + if gausshome: + parent_dir = os.path.dirname(gausshome.rstrip(os.sep)) + cwd = os.getcwd() + if cwd.startswith(gausshome.rstrip(os.sep) + os.sep): + os.chdir(parent_dir) + self.logger.debug( + f"CWD '{cwd}' under GAUSSHOME detected, switched to '{parent_dir}'" + ) + # tablespace data directory tbsDirList = self.getInstanceTblspcDirs(instNodeName) -- Gitee From ad48a6d05ac129f5e8014da78258ad9ae4e9ee91 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Tue, 29 Jul 2025 19:45:20 +0800 Subject: [PATCH 120/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/component/Kernel/Kernel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/gspylib/component/Kernel/Kernel.py b/script/gspylib/component/Kernel/Kernel.py index 47e5b3cd..51a56da6 100644 --- a/script/gspylib/component/Kernel/Kernel.py +++ b/script/gspylib/component/Kernel/Kernel.py @@ -450,7 +450,7 @@ class Kernel(BaseComponent): """ self.logger.log("Cleaning instance.") - # 防止在要删除的没有了下面 + # 防止在要删除的目录了下面 gausshome = os.getenv("GAUSSHOME", "").strip() if gausshome: parent_dir = os.path.dirname(gausshome.rstrip(os.sep)) -- Gitee From 574218277dac1ad0e1c2bf94a479dbcfe56ef1de Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Tue, 29 Jul 2025 20:10:12 +0800 Subject: [PATCH 121/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/component/Kernel/Kernel.py | 11 ---------- script/impl/uninstall/UninstallImpl.py | 25 +++++++++++++++++++++++ 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/script/gspylib/component/Kernel/Kernel.py b/script/gspylib/component/Kernel/Kernel.py index 51a56da6..01c8721c 100644 --- a/script/gspylib/component/Kernel/Kernel.py +++ b/script/gspylib/component/Kernel/Kernel.py @@ -450,17 +450,6 @@ class Kernel(BaseComponent): """ self.logger.log("Cleaning instance.") - # 防止在要删除的目录了下面 - gausshome = os.getenv("GAUSSHOME", "").strip() - if gausshome: - parent_dir = os.path.dirname(gausshome.rstrip(os.sep)) - cwd = os.getcwd() - if cwd.startswith(gausshome.rstrip(os.sep) + os.sep): - os.chdir(parent_dir) - self.logger.debug( - f"CWD '{cwd}' under GAUSSHOME detected, switched to '{parent_dir}'" - ) - # tablespace data directory tbsDirList = self.getInstanceTblspcDirs(instNodeName) diff --git a/script/impl/uninstall/UninstallImpl.py b/script/impl/uninstall/UninstallImpl.py index 6a0d122e..f15b48b8 100644 --- a/script/impl/uninstall/UninstallImpl.py +++ b/script/impl/uninstall/UninstallImpl.py @@ -245,6 +245,31 @@ class UninstallImpl: self.logger.debug("No need to delete data.", "constant") return + # 如果当前目录在 GPHOME 及其子目录下,则切换到GPHOME父目录 + gphome = EnvUtil.getEnv("GPHOME") + if gphome: + gphome_real = os.path.realpath(gphome) + parent_dir = os.path.dirname(gphome_real) + cwd = os.getcwd() + if cwd.startswith(gphome_real + os.sep): + os.chdir(parent_dir) + self.logger.debug( + f"CWD '{cwd}' under GPHOME detected, switched to '{parent_dir}'" + ) + + # DBSTOR模式清理 KMC 信号量(key = 0x20161227)和 加密 信号量(key = 0x20161316) + if EnvUtil.getEnv("ENABLE_DBSTOR", "off").lower() == "on": + self.logger.debug("Cleaning DBStor KMC and encryption semaphores.", "constant") + + cmd_kmc = "lsip -s -c | grep 0x20161227 | awk '{print $3}' | xargs -r -n1 ipcrm -s" + CmdExecutor.execCommandWithMode(cmd_kmc, self.sshTool, + self.localMode or self.isSingle, + self.mpprcFile) + cmd_enc = "lsip -s -c | grep 0x20161316 | awk '{print $3}' | xargs -r -n1 ipcrm -s" + CmdExecutor.execCommandWithMode(cmd_enc, self.sshTool, + self.localMode or self.isSingle, + self.mpprcFile) + # Clean instance data cmd = "%s -U %s -l %s" % ( OMCommand.getLocalScript("Local_Clean_Instance"), self.user, -- Gitee From 6d8c5c76d4474a4640a1d89ea4bb09a673163954 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Tue, 29 Jul 2025 20:30:32 +0800 Subject: [PATCH 122/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/impl/uninstall/UninstallImpl.py | 39 +++++++++++++++++--------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/script/impl/uninstall/UninstallImpl.py b/script/impl/uninstall/UninstallImpl.py index f15b48b8..9b63a0e5 100644 --- a/script/impl/uninstall/UninstallImpl.py +++ b/script/impl/uninstall/UninstallImpl.py @@ -257,19 +257,6 @@ class UninstallImpl: f"CWD '{cwd}' under GPHOME detected, switched to '{parent_dir}'" ) - # DBSTOR模式清理 KMC 信号量(key = 0x20161227)和 加密 信号量(key = 0x20161316) - if EnvUtil.getEnv("ENABLE_DBSTOR", "off").lower() == "on": - self.logger.debug("Cleaning DBStor KMC and encryption semaphores.", "constant") - - cmd_kmc = "lsip -s -c | grep 0x20161227 | awk '{print $3}' | xargs -r -n1 ipcrm -s" - CmdExecutor.execCommandWithMode(cmd_kmc, self.sshTool, - self.localMode or self.isSingle, - self.mpprcFile) - cmd_enc = "lsip -s -c | grep 0x20161316 | awk '{print $3}' | xargs -r -n1 ipcrm -s" - CmdExecutor.execCommandWithMode(cmd_enc, self.sshTool, - self.localMode or self.isSingle, - self.mpprcFile) - # Clean instance data cmd = "%s -U %s -l %s" % ( OMCommand.getLocalScript("Local_Clean_Instance"), self.user, @@ -289,6 +276,31 @@ class UninstallImpl: self.logger.log("Successfully deleted instances.", "constant") + def clean_dbstor_semaphores(self): + """ + Clean DBStor KMC and encryption semaphores on this node. + Only runs when ENABLE_DBSTOR=on. + """ + if EnvUtil.getEnv("ENABLE_DBSTOR", "off").lower() != "on": + self.logger.debug("ENABLE_DBSTOR is not 'on'; skip DBStor semaphore cleanup.") + return + + self.logger.debug("Cleaning DBStor semaphores (KMC and encryption keys).") + + for sem_key in ("0x20161227", "0x20161316"): + cmd = ( + f"lsip -s -c | grep {sem_key} " + "| awk '{print $3}' " + "| xargs -r -n1 ipcrm -s" + ) + CmdExecutor.execCommandWithMode( + cmd, + self.sshTool, + self.localMode or self.isSingle, + self.mpprcFile + ) + self.logger.debug(f"Removed DBStor semaphore with key {sem_key}.") + def CleanTmpFiles(self): """ function: clean temp files @@ -530,6 +542,7 @@ class UninstallImpl: self.checkUninstall() self.StopCluster() self.CleanInstance() + self.clean_dbstor_semaphores() self.CleanTmpFiles() self.UninstallApp() self.ReCleanEtcdPath() -- Gitee From c761b6da261e343fbf240f503df5e8bdc13ec3d9 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Tue, 29 Jul 2025 21:29:00 +0800 Subject: [PATCH 123/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/impl/uninstall/UninstallImpl.py | 27 ++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/script/impl/uninstall/UninstallImpl.py b/script/impl/uninstall/UninstallImpl.py index 9b63a0e5..349885ae 100644 --- a/script/impl/uninstall/UninstallImpl.py +++ b/script/impl/uninstall/UninstallImpl.py @@ -104,6 +104,21 @@ class UninstallImpl: """ # Checking uninstallation self.logger.log("Checking uninstallation.", "addStep") + + # If cwd equals parent_dir or is within its subtree, abort + gphome = EnvUtil.getEnv("GPHOME", "") + if gphome: + real_gphome = os.path.realpath(gphome) + parent_dir = os.path.dirname(real_gphome) + cwd = os.path.realpath(os.getcwd()) + + if cwd == parent_dir or cwd.startswith(parent_dir + os.sep): + raise Exception( + f"Current working directory '{cwd}' is inside the parent directory " + f"of GPHOME ('{parent_dir}').\n" + "Please change to a directory outside of this path and retry the uninstall." + ) + # use check uninstall to check every nodes cmd = "%s -R '%s' -U %s -l %s" % ( OMCommand.getLocalScript("Local_Check_Uninstall"), @@ -245,18 +260,6 @@ class UninstallImpl: self.logger.debug("No need to delete data.", "constant") return - # 如果当前目录在 GPHOME 及其子目录下,则切换到GPHOME父目录 - gphome = EnvUtil.getEnv("GPHOME") - if gphome: - gphome_real = os.path.realpath(gphome) - parent_dir = os.path.dirname(gphome_real) - cwd = os.getcwd() - if cwd.startswith(gphome_real + os.sep): - os.chdir(parent_dir) - self.logger.debug( - f"CWD '{cwd}' under GPHOME detected, switched to '{parent_dir}'" - ) - # Clean instance data cmd = "%s -U %s -l %s" % ( OMCommand.getLocalScript("Local_Clean_Instance"), self.user, -- Gitee From 4a262fb50d9a0d63a3f9c38dee9d28060cab5fb3 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Tue, 29 Jul 2025 21:37:04 +0800 Subject: [PATCH 124/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/impl/uninstall/UninstallImpl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/impl/uninstall/UninstallImpl.py b/script/impl/uninstall/UninstallImpl.py index 349885ae..5fa46740 100644 --- a/script/impl/uninstall/UninstallImpl.py +++ b/script/impl/uninstall/UninstallImpl.py @@ -114,7 +114,7 @@ class UninstallImpl: if cwd == parent_dir or cwd.startswith(parent_dir + os.sep): raise Exception( - f"Current working directory '{cwd}' is inside the parent directory " + f"[ERROR ] Current working directory '{cwd}' is inside the parent directory " f"of GPHOME ('{parent_dir}').\n" "Please change to a directory outside of this path and retry the uninstall." ) -- Gitee From d4bd902d6d3d22a97452a65fd5e82cb72fa1f0f0 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Tue, 29 Jul 2025 22:38:29 +0800 Subject: [PATCH 125/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/PreInstallUtility.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py index a2822c78..44ce9a2f 100644 --- a/script/local/PreInstallUtility.py +++ b/script/local/PreInstallUtility.py @@ -3041,9 +3041,8 @@ Common options: FileUtil.changeOwner(self.user, toolPath, recursive=True, link=True) FileUtil.changeMode(DefaultValue.KEY_DIRECTORY_MODE, toolPath, recursive=True) - script_dir = f"{toolPath}/script" - FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, - script_dir, recursive=True) + FileUtil.changeMode(DefaultValue.SPE_FILE_MODE, + "%s/script/gs_*" % toolPath) FileUtil.changeMode(DefaultValue.BIN_FILE_MODE, "%s/*.sha256" % toolPath) FileUtil.changeMode(DefaultValue.BIN_FILE_MODE, "%s/*.tar.gz" % toolPath) FileUtil.changeMode(DefaultValue.BIN_FILE_MODE, "%s/*.tar.bz2" % -- Gitee From 594322447cd330dafcae0b292a87bc347d3daedb Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Wed, 30 Jul 2025 14:13:59 +0800 Subject: [PATCH 126/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index c32f30f0..013fa39f 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -664,7 +664,9 @@ class DN_OLAP(Kernel): # Enable DSS explicitly when we're in DSS mode if self.dss_mode: + vg = self._resolve_dss_vg() cantian_dict["ENABLE_DSS"] = "TRUE" + cantian_dict["SHARED_PATH"] = f"+{vg}" # Enable dbstor if os.getenv("ENABLE_DBSTOR", "off").lower() == "on": -- Gitee From 92f72596f7a5251ef435af2a4c234ffb49e6d5d8 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Wed, 30 Jul 2025 17:38:50 +0800 Subject: [PATCH 127/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/impl/install/InstallImpl.py | 44 ++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/script/impl/install/InstallImpl.py b/script/impl/install/InstallImpl.py index e28ed2f2..85379d28 100644 --- a/script/impl/install/InstallImpl.py +++ b/script/impl/install/InstallImpl.py @@ -594,6 +594,9 @@ class InstallImpl: output: NA """ # check the --autostart parameter + mysql_single_flag = os.getenv("CANTIAN_MYSQL_SINGLE", "off").strip().lower() + is_mysql_single = (mysql_single_flag == "on") + self.context.logger.debug("The start mode is yes, delete cms_need_to_switchover file.") home_bin_path = self.context.clusterInfo.appPath + "/bin/" cmFile = home_bin_path + "cms_need_to_switchover" @@ -605,19 +608,38 @@ class InstallImpl: raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + " Error: \n%s" % str(output)) self.context.logger.debug("Successfully deleting cms_need_to_switchover file.") - # Start cluster applications if self.context.clusterInfo.enable_dcf != 'on': - self.context.cmCons[0].startCluster(self.context.user, - 0, - self.context.time_out, - isSwitchOver=True, - isSingle=self.context.isSingle) + self.context.cmCons[0].startCluster( + self.context.user, + 0, + self.context.time_out, + isSwitchOver=True, + isSingle=self.context.isSingle + ) + if is_mysql_single: + self.context.cmCons[0].startCluster( + self.context.user, + 1, + self.context.time_out, + isSwitchOver=True, + isSingle=self.context.isSingle + ) else: - self.context.cmCons[0].startCluster(self.context.user, - 0, - self.context.time_out, - isSwitchOver=False, - isSingle=self.context.isSingle) + self.context.cmCons[0].startCluster( + self.context.user, + 0, + self.context.time_out, + isSwitchOver=False, + isSingle=self.context.isSingle + ) + if is_mysql_single: + self.context.cmCons[0].startCluster( + self.context.user, + 1, + self.context.time_out, + isSwitchOver=False, + isSingle=self.context.isSingle + ) def startCluster(self): if DefaultValue.get_cm_server_num_from_static(self.context.clusterInfo): -- Gitee From be60234cd770e37fc1071a0d10f5bc3a80bf35e1 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Wed, 30 Jul 2025 17:50:46 +0800 Subject: [PATCH 128/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/component/DSS/dss_comp.py | 2 +- .../gspylib/component/Kernel/DN_OLAP/DN_OLAP.py | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/script/gspylib/component/DSS/dss_comp.py b/script/gspylib/component/DSS/dss_comp.py index d26e67ce..85d39890 100644 --- a/script/gspylib/component/DSS/dss_comp.py +++ b/script/gspylib/component/DSS/dss_comp.py @@ -334,7 +334,7 @@ class DssInitCfg(): def __init__(self, inst_id='', dss_home='', - dss_nodes_list='', + dss_nodes_list='0:127.0.0.1:1611', exist_so=True, dss_ssl=True, cert_path='', diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index 013fa39f..7356d8a4 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -420,9 +420,11 @@ class DN_OLAP(Kernel): 3) 关闭 Cantian: • 优雅 shutdown • 若在超时时间内未退出 → 抛出异常 + 4) 若 CANTIAN_MYSQL_SINGLE=on,则在 $MYSQL_HOME 创建 first_start 标识文件 """ - pgdata = self.instInfo.datadir + single_mode = os.getenv("CANTIAN_MYSQL_SINGLE", "off").strip().lower() == "on" + pgdata = self.instInfo.datadir nomount_cmd = f"nohup cantiand nomount -D {pgdata} >/dev/null 2>&1 &" self.logger.debug(nomount_cmd) st, out = CmdUtil.retryGetstatusoutput(nomount_cmd) @@ -453,6 +455,19 @@ class DN_OLAP(Kernel): self.logger.debug("Cantiand stopped successfully.") + # 4) 单进程模式 → 创建 first_start 标识文件 + if single_mode: + mysql_home = os.getenv("MYSQL_HOME") + if mysql_home: + marker_path = os.path.join(mysql_home, "first_start") + try: + FileUtil.createFile(marker_path, True, DefaultValue.KEY_FILE_MODE) + self.logger.debug(f"[SingleProcess] created marker file: {marker_path}") + except Exception as exc: + self.logger.warning(f"[SingleProcess] failed to create {marker_path}: {exc}") + else: + self.logger.debug("[SingleProcess] MYSQL_HOME is not set; skip first_start marker.") + @Dss.catch_err(exist_so=True) def initInstance(self): """ -- Gitee From 653edbedf2eca033e6f973fece1c51d513670747 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 31 Jul 2025 14:24:18 +0800 Subject: [PATCH 129/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/component/DSS/dss_comp.py | 2 +- script/local/Install.py | 3 --- script/local/PreInstallUtility.py | 24 +++++++++++++++++------- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/script/gspylib/component/DSS/dss_comp.py b/script/gspylib/component/DSS/dss_comp.py index 85d39890..d26e67ce 100644 --- a/script/gspylib/component/DSS/dss_comp.py +++ b/script/gspylib/component/DSS/dss_comp.py @@ -334,7 +334,7 @@ class DssInitCfg(): def __init__(self, inst_id='', dss_home='', - dss_nodes_list='0:127.0.0.1:1611', + dss_nodes_list='', exist_so=True, dss_ssl=True, cert_path='', diff --git a/script/local/Install.py b/script/local/Install.py index 178d950f..d787ffa7 100644 --- a/script/local/Install.py +++ b/script/local/Install.py @@ -403,9 +403,6 @@ class Install(LocalBaseOM): raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cp_cmd + "Error:\n%s" % output) - FileUtil.changeMode(DefaultValue.BIN_FILE_MODE, app_bin) - FileUtil.change_caps(f"{DefaultValue.CAP_ADM},{DefaultValue.CAP_WIO}", app_bin) - link_cmd = 'ln -snf {0}/perctrl {1}'.format(dss_app, bin_path) self.logger.debug(f"The cmd of the link: {link_cmd}.") status, output = subprocess.getstatusoutput(link_cmd) diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py index 44ce9a2f..581a2652 100644 --- a/script/local/PreInstallUtility.py +++ b/script/local/PreInstallUtility.py @@ -933,14 +933,24 @@ Common options: ''' dss_inst_ini = os.path.realpath( - os.path.join(dss_home, 'cfg', 'dss_inst.ini')) + os.path.join(dss_home, 'cfg', 'dss_inst.ini') + ) + + # 若 XML 未配置,回退到单节点默认值 + dss_nodes_list = (self.clusterInfo.dss_config or '').strip() + if not dss_nodes_list: + dss_nodes_list = '0:127.0.0.1:1611' + context = list( - DssInitCfg(dss_id, - dss_home, - self.clusterInfo.dss_config, - dss_ssl=False)) - FileUtil.write_custom_context( - dss_inst_ini, context, authority=DefaultValue.KEY_FILE_MODE_IN_OS) + DssInitCfg( + inst_id=dss_id, + dss_home=dss_home, + dss_nodes_list=dss_nodes_list, + dss_ssl=False + ) + ) + + FileUtil.write_custom_context(dss_inst_ini, context, authority=DefaultValue.KEY_FILE_MODE_IN_OS ) def prepare_dss_vg_ini(self, dss_home): ''' -- Gitee From 2c9140476bef81b6d154f0c81d171a32ba30d8a9 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 31 Jul 2025 16:25:53 +0800 Subject: [PATCH 130/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/impl/install/InstallImpl.py | 46 +++++++++--------------------- 1 file changed, 14 insertions(+), 32 deletions(-) diff --git a/script/impl/install/InstallImpl.py b/script/impl/install/InstallImpl.py index 85379d28..2983c790 100644 --- a/script/impl/install/InstallImpl.py +++ b/script/impl/install/InstallImpl.py @@ -608,38 +608,20 @@ class InstallImpl: raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + " Error: \n%s" % str(output)) self.context.logger.debug("Successfully deleting cms_need_to_switchover file.") - if self.context.clusterInfo.enable_dcf != 'on': - self.context.cmCons[0].startCluster( - self.context.user, - 0, - self.context.time_out, - isSwitchOver=True, - isSingle=self.context.isSingle - ) - if is_mysql_single: - self.context.cmCons[0].startCluster( - self.context.user, - 1, - self.context.time_out, - isSwitchOver=True, - isSingle=self.context.isSingle - ) - else: - self.context.cmCons[0].startCluster( - self.context.user, - 0, - self.context.time_out, - isSwitchOver=False, - isSingle=self.context.isSingle - ) - if is_mysql_single: - self.context.cmCons[0].startCluster( - self.context.user, - 1, - self.context.time_out, - isSwitchOver=False, - isSingle=self.context.isSingle - ) + node_id = 1 if is_mysql_single else 0 + is_switch = (self.context.clusterInfo.enable_dcf != "on") + + self.context.cmCons[0].startCluster( + self.context.user, + node_id, + self.context.time_out, + isSwitchOver=is_switch, + isSingle=self.context.isSingle, + ) + + self.context.logger.debug( + f"Start cluster done (node_id={node_id}, switch_over={is_switch})." + ) def startCluster(self): if DefaultValue.get_cm_server_num_from_static(self.context.clusterInfo): -- Gitee From 20f7decf835aea6bb3ce7c8a6b2ca7f01c060dc0 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 31 Jul 2025 21:26:34 +0800 Subject: [PATCH 131/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/impl/install/InstallImpl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/impl/install/InstallImpl.py b/script/impl/install/InstallImpl.py index 2983c790..33c72067 100644 --- a/script/impl/install/InstallImpl.py +++ b/script/impl/install/InstallImpl.py @@ -595,7 +595,7 @@ class InstallImpl: """ # check the --autostart parameter mysql_single_flag = os.getenv("CANTIAN_MYSQL_SINGLE", "off").strip().lower() - is_mysql_single = (mysql_single_flag == "on") + is_mysql_single = mysql_single_flag in ("on", "true", "yes", "1") self.context.logger.debug("The start mode is yes, delete cms_need_to_switchover file.") home_bin_path = self.context.clusterInfo.appPath + "/bin/" -- Gitee From 0d6b80ce1ce229c127e75cb65b7c43c2acbe23a6 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 31 Jul 2025 21:35:10 +0800 Subject: [PATCH 132/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/impl/install/InstallImpl.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/script/impl/install/InstallImpl.py b/script/impl/install/InstallImpl.py index 33c72067..3f4334c9 100644 --- a/script/impl/install/InstallImpl.py +++ b/script/impl/install/InstallImpl.py @@ -593,6 +593,8 @@ class InstallImpl: input : NA output: NA """ + os.environ[DefaultValue.MPPRC_FILE_ENV] = self.context.mpprcFile + # check the --autostart parameter mysql_single_flag = os.getenv("CANTIAN_MYSQL_SINGLE", "off").strip().lower() is_mysql_single = mysql_single_flag in ("on", "true", "yes", "1") -- Gitee From cf6d9f2189422d69c18769fbb23793c01b82598d Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Thu, 31 Jul 2025 22:46:59 +0800 Subject: [PATCH 133/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/impl/install/InstallImpl.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/script/impl/install/InstallImpl.py b/script/impl/install/InstallImpl.py index 3f4334c9..61cd9015 100644 --- a/script/impl/install/InstallImpl.py +++ b/script/impl/install/InstallImpl.py @@ -593,7 +593,19 @@ class InstallImpl: input : NA output: NA """ - os.environ[DefaultValue.MPPRC_FILE_ENV] = self.context.mpprcFile + mpprc = self.context.mpprcFile + mysql_single_flag = "off" + if mpprc and os.path.isfile(mpprc): + try: + mysql_single_flag = subprocess.check_output( + f"bash -c 'source {mpprc}; echo -n $CANTIAN_MYSQL_SINGLE'", + shell=True, + text=True + ).strip().lower() or "off" + except subprocess.CalledProcessError as exc: + self.context.logger.warning( + f"source {mpprc} failed ({exc}); treat mysql_single=off" + ) # check the --autostart parameter mysql_single_flag = os.getenv("CANTIAN_MYSQL_SINGLE", "off").strip().lower() -- Gitee From a7792179ddf8cb8f9030d904d8a21877e4a06687 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Fri, 1 Aug 2025 09:11:18 +0800 Subject: [PATCH 134/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/impl/install/InstallImpl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/script/impl/install/InstallImpl.py b/script/impl/install/InstallImpl.py index 61cd9015..997de838 100644 --- a/script/impl/install/InstallImpl.py +++ b/script/impl/install/InstallImpl.py @@ -593,6 +593,7 @@ class InstallImpl: input : NA output: NA """ + # check the --autostart parameter mpprc = self.context.mpprcFile mysql_single_flag = "off" if mpprc and os.path.isfile(mpprc): @@ -607,9 +608,8 @@ class InstallImpl: f"source {mpprc} failed ({exc}); treat mysql_single=off" ) - # check the --autostart parameter - mysql_single_flag = os.getenv("CANTIAN_MYSQL_SINGLE", "off").strip().lower() is_mysql_single = mysql_single_flag in ("on", "true", "yes", "1") + self.context.logger.debug("CANTIAN_MYSQL_SINGLE=%r", mysql_single_flag) self.context.logger.debug("The start mode is yes, delete cms_need_to_switchover file.") home_bin_path = self.context.clusterInfo.appPath + "/bin/" -- Gitee From 3fdf51a677ca853f11e0919b5ff2e126c51516c7 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Fri, 1 Aug 2025 10:35:10 +0800 Subject: [PATCH 135/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../gspylib/component/DBStor/dbstor_comp.py | 29 +++++++++++++------ 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/script/gspylib/component/DBStor/dbstor_comp.py b/script/gspylib/component/DBStor/dbstor_comp.py index 71028077..f3ba8ceb 100644 --- a/script/gspylib/component/DBStor/dbstor_comp.py +++ b/script/gspylib/component/DBStor/dbstor_comp.py @@ -262,21 +262,32 @@ class DBStor(BaseComponent): src_cfg = os.path.join(dbstor_home, "conf", "dbs", "dbstor_config.ini") dst_cfg = os.path.join(dst_dbs, "dbstor_config.ini") FileUtil.cpFile(src_cfg, dst_cfg) - update_ini_file(dst_cfg, {"DBSTOR_OWNER_NAME": "cm"}, + + cluster_name = os.getenv("GS_CLUSTER_NAME", "") + cluster_id = calc_cluster_id(cluster_name) + node_id = int(os.getenv("NODE_ID", "0")) + inst_id, cluster_uuid = run_obtains_lsid(cluster_id, node_id, 0, 0) + + extra_kv = { + "DBSTOR_OWNER_NAME": "cm", + "CLUSTER_NAME": cluster_name, + "CLUSTER_UUID": cluster_uuid, + "INST_ID": inst_id + } + update_ini_file(dst_cfg, extra_kv, DefaultValue.KEY_FILE_MODE, self.logger) link_type = os.getenv("LINK_TYPE", "0").strip() gauss_home = EnvUtil.getEnv("GAUSSHOME") - src_cfg_name = "node_config_tcp_cm.xml" if link_type == "0" else "node_config_rdma_cm.xml" - src_cfg_path = os.path.join(gauss_home, "share", "dbstor", src_cfg_name) - dst_cfg_path = os.path.join(dst_infra, "node_config.xml") - FileUtil.cpFile(src_cfg_path, dst_cfg_path) + cfg_name = "node_config_tcp_cm.xml" if link_type == "0" else "node_config_rdma_cm.xml" + FileUtil.cpFile(os.path.join(gauss_home, "share", "dbstor", cfg_name), + os.path.join(dst_infra, "node_config.xml")) - src_osd = os.path.join(gauss_home, "share", "dbstor", "osd.cfg") - dst_osd = os.path.join(dst_infra, "osd.cfg") - FileUtil.cpFile(src_osd, dst_osd) + FileUtil.cpFile(os.path.join(gauss_home, "share", "dbstor", "osd.cfg"), + os.path.join(dst_infra, "osd.cfg")) - self.logger and self.logger.debug(f"Synced CM dbstor conf → {dst_conf}") + self.logger and self.logger.debug( + f"Synced CM dbstor conf → {dst_cfg} (owner=cm, cluster={cluster_name}).") def install(self, nodeName: str = "", dbInitParams: str = ""): pass -- Gitee From 12c2d231f21f3cbcd8ac3dd62c2ea019db7d7232 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 4 Aug 2025 17:23:01 +0800 Subject: [PATCH 136/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/Install.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/script/local/Install.py b/script/local/Install.py index d787ffa7..2b7a8b26 100644 --- a/script/local/Install.py +++ b/script/local/Install.py @@ -866,16 +866,23 @@ class Install(LocalBaseOM): def __createStaticConfig(self): """ - function: Save cluster info to static config - input : NA - output: NA + 生成 cluster_static_config 文件 + - 如果是单进程模式(CANTIAN_MYSQL_SINGLE=on),则将 dataNode1 的路径加入 /mysql/data 后缀 """ staticConfigPath = "%s/bin/cluster_static_config" % self.installPath - # save static config nodeId = self.dbNodeInfo.id + + single_flag = os.environ.get("CANTIAN_MYSQL_SINGLE", "off").strip().lower() + self.logger.debug(f"CANTIAN_MYSQL_SINGLE is {single_flag}: patching dataNode1 paths") + if single_flag in ("on", "true", "yes", "1"): + for dbNode in self.clusterInfo.dbNodes: + for dnInst in dbNode.datanodes: + if not dnInst.datadir.endswith("/mysql/data"): + dnInst.datadir = os.path.join(dnInst.datadir, "mysql/data") + self.logger.debug(f"Modified datadir: {dnInst.datadir}") + self.clusterInfo.saveToStaticConfig(staticConfigPath, nodeId) - FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, staticConfigPath) - FileUtil.changeOwner(self.user, staticConfigPath, False) + self.logger.debug("Successfully generated cluster_static_config at %s" % staticConfigPath) def __bakInstallPackage(self): """ -- Gitee From 010f35cc1547aca4f27e35b866076e99a52f3a3f Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 4 Aug 2025 19:21:38 +0800 Subject: [PATCH 137/144] =?UTF-8?q?CM=E5=9B=9E=E9=80=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/Install.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/script/local/Install.py b/script/local/Install.py index 2b7a8b26..d787ffa7 100644 --- a/script/local/Install.py +++ b/script/local/Install.py @@ -866,23 +866,16 @@ class Install(LocalBaseOM): def __createStaticConfig(self): """ - 生成 cluster_static_config 文件 - - 如果是单进程模式(CANTIAN_MYSQL_SINGLE=on),则将 dataNode1 的路径加入 /mysql/data 后缀 + function: Save cluster info to static config + input : NA + output: NA """ staticConfigPath = "%s/bin/cluster_static_config" % self.installPath + # save static config nodeId = self.dbNodeInfo.id - - single_flag = os.environ.get("CANTIAN_MYSQL_SINGLE", "off").strip().lower() - self.logger.debug(f"CANTIAN_MYSQL_SINGLE is {single_flag}: patching dataNode1 paths") - if single_flag in ("on", "true", "yes", "1"): - for dbNode in self.clusterInfo.dbNodes: - for dnInst in dbNode.datanodes: - if not dnInst.datadir.endswith("/mysql/data"): - dnInst.datadir = os.path.join(dnInst.datadir, "mysql/data") - self.logger.debug(f"Modified datadir: {dnInst.datadir}") - self.clusterInfo.saveToStaticConfig(staticConfigPath, nodeId) - self.logger.debug("Successfully generated cluster_static_config at %s" % staticConfigPath) + FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, staticConfigPath) + FileUtil.changeOwner(self.user, staticConfigPath, False) def __bakInstallPackage(self): """ -- Gitee From 0bc8099bd807ffefc2148c5aff20d962312c6421 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 4 Aug 2025 20:21:02 +0800 Subject: [PATCH 138/144] =?UTF-8?q?=E9=9D=99=E6=80=81=E9=85=8D=E7=BD=AE?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E5=88=B7=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/impl/install/OLAP/InstallImplOLAP.py | 63 +++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/script/impl/install/OLAP/InstallImplOLAP.py b/script/impl/install/OLAP/InstallImplOLAP.py index 559d5cc8..aca56c26 100644 --- a/script/impl/install/OLAP/InstallImplOLAP.py +++ b/script/impl/install/OLAP/InstallImplOLAP.py @@ -21,6 +21,7 @@ import subprocess import os import sys import re +import tempfile sys.path.append(sys.path[0] + "/../../../") from gspylib.common.Common import DefaultValue @@ -268,6 +269,66 @@ class InstallImplOLAP(InstallImpl): CmdExecutor.execCommandLocally(' && '.join(cmd)) self.context.logger.log("End to clean up the dss luns.") + def __refresh_static_config_mysql_single(self) -> None: + """当 CANTIAN_MYSQL_SINGLE=on 时刷新 cluster_static_config。""" + mpprc = self.context.mpprcFile + mysql_single_flag = "off" + if mpprc and os.path.isfile(mpprc): + try: + mysql_single_flag = subprocess.check_output( + f"bash -c 'source {mpprc}; echo -n $CANTIAN_MYSQL_SINGLE'", + shell=True, text=True + ).strip().lower() or "off" + except subprocess.CalledProcessError as exc: + self.context.logger.warning( + "source %s failed (%s); treat mysql_single=off", mpprc, exc + ) + + if mysql_single_flag not in ("on", "true", "yes", "1"): + self.context.logger.debug( + "CANTIAN_MYSQL_SINGLE=%r; skip static-config refresh.", + mysql_single_flag + ) + return + + pgdata_path = os.environ.get("PGDATA", "").strip() + if not pgdata_path: + raise EnvironmentError( + "[mysql-single] PGDATA environment variable is not set; " + "cannot refresh cluster_static_config." + ) + + old_path = pgdata_path + new_path = f"{pgdata_path}/mysql/data" + self.context.logger.debug("[mysql-single] PGDATA=%s", old_path) + + xml_src = self.context.xmlFile + if not xml_src or not os.path.isfile(xml_src): + self.context.logger.warning("xmlFile %s not found; skip refresh.", xml_src) + return + + tmp_dir = tempfile.mkdtemp(prefix="mysql_single_xml_") + tmp_xml = os.path.join(tmp_dir, "cluster_single.xml") + FileUtil.cpFile(xml_src, tmp_xml, True) + + content = "".join(FileUtil.readFile(tmp_xml)).replace(old_path, new_path) + FileUtil.writeFile(tmp_xml, [content], "w") + + cmd = ( + f"bash -c 'source {mpprc}; " + f"gs_om -t generateconf -X {tmp_xml} --distribute'" + ) + self.context.logger.debug("[mysql-single] %s", cmd) + st, out = CmdUtil.retryGetstatusoutput(cmd) + if st != 0: + raise Exception( + f"[mysql-single] regenerate cluster_static_config failed:\n{out}" + ) + + self.context.logger.info( + "[mysql-single] cluster_static_config refreshed & distributed." + ) + def initNodeInstance(self): """ function: init instance applications @@ -309,6 +370,8 @@ class InstallImplOLAP(InstallImpl): self.context.sshTool, self.context.isSingle, parallelism=parallelism) + # 单进程mysql需要更新静态配置文件 + self.__refresh_static_config_mysql_single() self.context.logger.debug("Successfully initialized node instance.") def configInstance(self): -- Gitee From 87a513495ed42b580cb507e50c7b7795b19bf333 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 4 Aug 2025 20:40:39 +0800 Subject: [PATCH 139/144] =?UTF-8?q?=E9=9D=99=E6=80=81=E9=85=8D=E7=BD=AE?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E5=88=B7=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/PreInstallUtility.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py index 581a2652..c9ed620f 100644 --- a/script/local/PreInstallUtility.py +++ b/script/local/PreInstallUtility.py @@ -917,12 +917,15 @@ Common options: if self.clusterInfo.enable_dss == 'on': idx = DssConfig.get_current_dss_id_by_dn(self.clusterInfo.dbNodes, self.dbNodeInfo) - if idx != -1 and not EnvUtil.is_fuzzy_upgrade( - self.user, self.logger, self.mpprcFile): + gauss_env = EnvUtil.getEnvironmentParameterValue('GAUSS_ENV', self.user, env_file=self.mpprcFile) + is_upgrade = (gauss_env == "2") + if self.logger: + self.logger.debug("GAUSS_ENV=%s → is_upgrade=%s", gauss_env, is_upgrade) + + if idx != -1 and not is_upgrade: self.prepare_dss_home_path(idx) else: - self.logger.debug('In dss-mode, the dn does not ' \ - 'exist on the current node or in upgrade.') + self.logger.debug("In dss-mode: DN not on this node or cluster is upgrading." ) self.logger.debug("Successfully created paths for cluster.") -- Gitee From d504809bbc410b84e9c81a5cddb8316c977653ac Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 4 Aug 2025 21:08:01 +0800 Subject: [PATCH 140/144] =?UTF-8?q?=E9=9D=99=E6=80=81=E9=85=8D=E7=BD=AE?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E5=88=B7=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/local/PreInstallUtility.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py index c9ed620f..59a5e8a5 100644 --- a/script/local/PreInstallUtility.py +++ b/script/local/PreInstallUtility.py @@ -920,7 +920,7 @@ Common options: gauss_env = EnvUtil.getEnvironmentParameterValue('GAUSS_ENV', self.user, env_file=self.mpprcFile) is_upgrade = (gauss_env == "2") if self.logger: - self.logger.debug("GAUSS_ENV=%s → is_upgrade=%s", gauss_env, is_upgrade) + self.logger.debug(f"GAUSS_ENV={gauss_env} → is_upgrade={is_upgrade}") if idx != -1 and not is_upgrade: self.prepare_dss_home_path(idx) -- Gitee From caeeb4f5963cf849d1b74abdaecea91bfbb76ba5 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Mon, 4 Aug 2025 21:23:28 +0800 Subject: [PATCH 141/144] =?UTF-8?q?=E9=9D=99=E6=80=81=E9=85=8D=E7=BD=AE?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E5=88=B7=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/impl/install/OLAP/InstallImplOLAP.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/script/impl/install/OLAP/InstallImplOLAP.py b/script/impl/install/OLAP/InstallImplOLAP.py index aca56c26..dabcc0ab 100644 --- a/script/impl/install/OLAP/InstallImplOLAP.py +++ b/script/impl/install/OLAP/InstallImplOLAP.py @@ -318,14 +318,14 @@ class InstallImplOLAP(InstallImpl): f"bash -c 'source {mpprc}; " f"gs_om -t generateconf -X {tmp_xml} --distribute'" ) - self.context.logger.debug("[mysql-single] %s", cmd) + self.context.logger.debug(f"[mysql-single] {cmd}", cmd) st, out = CmdUtil.retryGetstatusoutput(cmd) if st != 0: raise Exception( f"[mysql-single] regenerate cluster_static_config failed:\n{out}" ) - self.context.logger.info( + self.context.logger.debug( "[mysql-single] cluster_static_config refreshed & distributed." ) -- Gitee From f0ad8945dcff11f69d219ec1d80aa099743d8215 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Tue, 5 Aug 2025 16:24:39 +0800 Subject: [PATCH 142/144] =?UTF-8?q?=E9=9D=99=E6=80=81=E9=85=8D=E7=BD=AE?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E5=88=B7=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/component/DBStor/dbstor_comp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/gspylib/component/DBStor/dbstor_comp.py b/script/gspylib/component/DBStor/dbstor_comp.py index f3ba8ceb..03738732 100644 --- a/script/gspylib/component/DBStor/dbstor_comp.py +++ b/script/gspylib/component/DBStor/dbstor_comp.py @@ -266,7 +266,7 @@ class DBStor(BaseComponent): cluster_name = os.getenv("GS_CLUSTER_NAME", "") cluster_id = calc_cluster_id(cluster_name) node_id = int(os.getenv("NODE_ID", "0")) - inst_id, cluster_uuid = run_obtains_lsid(cluster_id, node_id, 0, 0) + inst_id, cluster_uuid = run_obtains_lsid(cluster_id, 0, 0, 0) extra_kv = { "DBSTOR_OWNER_NAME": "cm", -- Gitee From 8e4b9199aabc8b7b806bd293b71a5cc29a43a054 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Tue, 5 Aug 2025 20:58:06 +0800 Subject: [PATCH 143/144] =?UTF-8?q?=E9=9D=99=E6=80=81=E9=85=8D=E7=BD=AE?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E5=88=B7=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../gspylib/component/CM/CM_OLAP/CM_OLAP.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/script/gspylib/component/CM/CM_OLAP/CM_OLAP.py b/script/gspylib/component/CM/CM_OLAP/CM_OLAP.py index a6a50dd5..95d19227 100644 --- a/script/gspylib/component/CM/CM_OLAP/CM_OLAP.py +++ b/script/gspylib/component/CM/CM_OLAP/CM_OLAP.py @@ -115,6 +115,17 @@ class CM_OLAP(CM): 'dn_arbitrate_mode': 'share_disk', 'ddb_type': '2' }) + elif os.getenv("ENABLE_DBSTOR", "off").strip().lower() in ("on", "true", "yes", "1"): + server_para_dict["ddb_type"] = "3" + server_para_dict["dbstor_cluster_name"] = os.getenv("GS_CLUSTER_NAME", "") + + fs_names = os.getenv( + "DBSTOR_FS", "log_fs;page_fs;share_fs;archive_fs" + ).split(";") + + share_fs = fs_names[2] if len(fs_names) >= 3 else "share_fs" + server_para_dict["dbstor_share_path"] = f"/{share_fs}/cmserver" + self.setGucConfig(server_para_dict) self.logger.debug("Initializing cm_server instance successfully.") @@ -175,7 +186,8 @@ class CM_OLAP(CM): if self.instInfo.instanceRole == DefaultValue.INSTANCE_ROLE_CMSERVER: self.init_cm_server() elif self.instInfo.instanceRole == DefaultValue.INSTANCE_ROLE_CMAGENT: - if self.dss_mode: + dbstor_mode = os.getenv("ENABLE_DBSTOR", "off").strip().lower() in ("on", "true", "yes", "1") + if self.dss_mode or dbstor_mode: self.init_globals() self.init_cm_res_json() self.init_cm_agent() @@ -850,6 +862,11 @@ class CM_OLAP(CM): dms_contrl = os.path.join(gauss_home, 'bin/dms_contrl.sh') dss_contrl = os.path.join(gauss_home, 'bin/dss_contrl.sh') + dbstor_mode = os.getenv("ENABLE_DBSTOR", "off").lower() == "on" + if dbstor_mode: + cmd = [str(CmResCtrlCmd(name='dms_res', attr=CmResAttr(dms_contrl)))] + return "source {}; {}".format(EnvUtil.getMpprcFile(), ' ;'.join(cmd)) + cmd = [ str(CmResCtrlCmd(name='dms_res', attr=CmResAttr(dms_contrl))), str( -- Gitee From 8f85a7f0f16103520d3bbebd30926d4428b5f5b2 Mon Sep 17 00:00:00 2001 From: Duanshaohua Date: Wed, 6 Aug 2025 09:14:22 +0800 Subject: [PATCH 144/144] =?UTF-8?q?=E9=9D=99=E6=80=81=E9=85=8D=E7=BD=AE?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E5=88=B7=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/gspylib/component/CM/CM_OLAP/CM_OLAP.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/script/gspylib/component/CM/CM_OLAP/CM_OLAP.py b/script/gspylib/component/CM/CM_OLAP/CM_OLAP.py index 95d19227..fe64d18a 100644 --- a/script/gspylib/component/CM/CM_OLAP/CM_OLAP.py +++ b/script/gspylib/component/CM/CM_OLAP/CM_OLAP.py @@ -123,7 +123,10 @@ class CM_OLAP(CM): "DBSTOR_FS", "log_fs;page_fs;share_fs;archive_fs" ).split(";") - share_fs = fs_names[2] if len(fs_names) >= 3 else "share_fs" + if len(fs_names) < 3 or not fs_names[2]: + raise Exception("DBSTOR_FS is invalid: missing share_fs filesystem") + + share_fs = fs_names[2] server_para_dict["dbstor_share_path"] = f"/{share_fs}/cmserver" self.setGucConfig(server_para_dict) -- Gitee