diff --git a/diff-disk-type-use-diff-config.patch b/diff-disk-type-use-diff-config.patch new file mode 100644 index 0000000000000000000000000000000000000000..70976d380725f99a153b1f66ae5b4f52810e9297 --- /dev/null +++ b/diff-disk-type-use-diff-config.patch @@ -0,0 +1,430 @@ +From e7c1b0095e16369fb09ae62ffa3158be5e8893a1 Mon Sep 17 00:00:00 2001 +From: gaoruoshu +Date: Fri, 11 Oct 2024 10:48:35 +0800 +Subject: [PATCH] diff disk type use diff config + +--- + config/plugins/avg_block_io.ini | 26 +++- + src/python/sentryCollector/collect_plugin.py | 6 + + .../avg_block_io/avg_block_io.py | 144 ++++++++---------- + .../sentryPlugins/avg_block_io/module_conn.py | 19 ++- + .../sentryPlugins/avg_block_io/utils.py | 43 ++++++ + 5 files changed, 146 insertions(+), 92 deletions(-) + +diff --git a/config/plugins/avg_block_io.ini b/config/plugins/avg_block_io.ini +index 858db18..5c4b9b0 100644 +--- a/config/plugins/avg_block_io.ini ++++ b/config/plugins/avg_block_io.ini +@@ -11,13 +11,29 @@ period_time=1 + win_size=30 + win_threshold=6 + +-[latency] +-read_avg_lim=10 +-write_avg_lim=10 ++[latency_nvme_ssd] ++read_avg_lim=300 ++write_avg_lim=300 + read_avg_time=3 + write_avg_time=3 +-read_tot_lim=50 +-write_tot_lim=50 ++read_tot_lim=500 ++write_tot_lim=500 ++ ++[latency_sata_ssd] ++read_avg_lim=10000 ++write_avg_lim=10000 ++read_avg_time=3 ++write_avg_time=3 ++read_tot_lim=50000 ++write_tot_lim=50000 ++ ++[latency_sata_hdd] ++read_avg_lim=15000 ++write_avg_lim=15000 ++read_avg_time=3 ++write_avg_time=3 ++read_tot_lim=50000 ++write_tot_lim=50000 + + [iodump] + read_iodump_lim=0 +diff --git a/src/python/sentryCollector/collect_plugin.py b/src/python/sentryCollector/collect_plugin.py +index 31bf11b..bec405a 100644 +--- a/src/python/sentryCollector/collect_plugin.py ++++ b/src/python/sentryCollector/collect_plugin.py +@@ -79,6 +79,12 @@ class DiskType(): + TYPE_SATA_SSD = 1 + TYPE_SATA_HDD = 2 + ++Disk_Type = { ++ DiskType.TYPE_NVME_SSD: "nvme_ssd", ++ DiskType.TYPE_SATA_SSD: "sata_ssd", ++ DiskType.TYPE_SATA_HDD: "sata_hdd" ++} ++ + def client_send_and_recv(request_data, data_str_len, protocol): + """client socket send and recv message""" + try: +diff --git a/src/python/sentryPlugins/avg_block_io/avg_block_io.py b/src/python/sentryPlugins/avg_block_io/avg_block_io.py +index cf2ded3..fdad995 100644 +--- a/src/python/sentryPlugins/avg_block_io/avg_block_io.py ++++ b/src/python/sentryPlugins/avg_block_io/avg_block_io.py +@@ -14,8 +14,9 @@ import configparser + import time + + from .stage_window import IoWindow, IoDumpWindow +-from .module_conn import avg_is_iocollect_valid, avg_get_io_data, report_alarm_fail, process_report_data, sig_handler +-from .utils import update_avg_and_check_abnormal, get_log_level ++from .module_conn import avg_is_iocollect_valid, avg_get_io_data, report_alarm_fail, process_report_data, sig_handler, get_disk_type_by_name ++from .utils import update_avg_and_check_abnormal, get_log_level, get_section_value ++from sentryCollector.collect_plugin import Disk_Type + + CONFIG_FILE = "/etc/sysSentry/plugins/avg_block_io.ini" + +@@ -37,44 +38,40 @@ def read_config_common(config): + disk = [] if disk_name == "default" else disk_name.split(",") + except configparser.NoOptionError: + disk = [] +- logging.warning("Unset disk, set to default") ++ logging.warning("Unset common.disk, set to default") + + try: + stage_name = config.get("common", "stage") + stage = [] if stage_name == "default" else stage_name.split(",") + except configparser.NoOptionError: + stage = [] +- logging.warning("Unset stage, set to read,write") ++ logging.warning("Unset common.stage, set to default") + + if len(disk) > 10: +- logging.warning("Too many disks, record only max 10 disks") ++ logging.warning("Too many common.disks, record only max 10 disks") + disk = disk[:10] + + try: + iotype_name = config.get("common", "iotype").split(",") +- iotype_list = [rw.lower() for rw in iotype_name if rw.lower() in ['read', 'write', 'flush', 'discard']] +- err_iotype = [rw.lower() for rw in iotype_name if rw.lower() not in ['read', 'write', 'flush', 'discard']] ++ iotype_list = [rw.lower() for rw in iotype_name if rw.lower() in ['read', 'write']] ++ err_iotype = [rw.lower() for rw in iotype_name if rw.lower() not in ['read', 'write']] + +- if iotype_list in [None, []]: +- iotype_list = ["read", "write"] +- except configparser.NoOptionError: +- iotype = ["read", "write"] +- logging.warning("Unset iotype, set to default") ++ if err_iotype: ++ report_alarm_fail("Invalid common.iotype config") + +- if err_iotype: +- logging.warning("{} in common.iotype are not valid, set iotype={}".format(err_iotype, iotype_list)) +- ++ except configparser.NoOptionError: ++ iotype_list = ["read", "write"] ++ logging.warning("Unset common.iotype, set to read,write") + + try: + period_time = int(config.get("common", "period_time")) + if not (1 <= period_time <= 300): + raise ValueError("Invalid period_time") + except ValueError: +- period_time = 1 +- logging.warning("Invalid period_time, set to 1s") ++ report_alarm_fail("Invalid common.period_time") + except configparser.NoOptionError: + period_time = 1 +- logging.warning("Unset period_time, use 1s as default") ++ logging.warning("Unset common.period_time, use 1s as default") + + return period_time, disk, stage, iotype_list + +@@ -87,76 +84,56 @@ def read_config_algorithm(config): + try: + win_size = int(config.get("algorithm", "win_size")) + if not (1 <= win_size <= 300): +- raise ValueError("Invalid win_size") ++ raise ValueError("Invalid algorithm.win_size") + except ValueError: +- win_size = 30 +- logging.warning("Invalid win_size, set to 30") ++ report_alarm_fail("Invalid algorithm.win_size config") + except configparser.NoOptionError: + win_size = 30 +- logging.warning("Unset win_size, use 30 as default") ++ logging.warning("Unset algorithm.win_size, use 30 as default") + + try: + win_threshold = int(config.get("algorithm", "win_threshold")) + if win_threshold < 1 or win_threshold > 300 or win_threshold > win_size: +- raise ValueError("Invalid win_threshold") ++ raise ValueError("Invalid algorithm.win_threshold") + except ValueError: +- win_threshold = 6 +- logging.warning("Invalid win_threshold, set to 6") ++ report_alarm_fail("Invalid algorithm.win_threshold config") + except configparser.NoOptionError: + win_threshold = 6 +- logging.warning("Unset win_threshold, use 6 as default") ++ logging.warning("Unset algorithm.win_threshold, use 6 as default") + + return win_size, win_threshold + + +-def read_config_lat_iodump(io_dic, config): +- """read config file, get [latency] [iodump] section value""" ++def read_config_latency(config): ++ """read config file, get [latency_xxx] section value""" + common_param = {} +- lat_sec = None +- if not config.has_section("latency"): +- logging.warning("Cannot find latency section in config file") +- else: +- lat_sec = config["latency"] +- +- iodump_sec = None +- if not config.has_section("iodump"): +- logging.warning("Cannot find iodump section in config file") +- else: +- iodump_sec = config["iodump"] +- +- if not lat_sec and not iodump_sec: +- return common_param +- +- for io_type in io_dic["iotype_list"]: +- common_param[io_type] = {} +- +- latency_keys = { +- "avg_lim": "{}_avg_lim".format(io_type), +- "avg_time": "{}_avg_time".format(io_type), +- "tot_lim": "{}_tot_lim".format(io_type), +- } +- iodump_key = "{}_iodump_lim".format(io_type) ++ for type_name in Disk_Type: ++ section_name = f"latency_{Disk_Type[type_name]}" ++ if not config.has_section(section_name): ++ report_alarm_fail(f"Cannot find {section_name} section in config file") + +- if iodump_sec and iodump_key in iodump_sec and iodump_sec[iodump_key].isdecimal(): +- common_param[io_type][iodump_key] = int(iodump_sec[iodump_key]) ++ common_param[Disk_Type[type_name]] = get_section_value(section_name, config) ++ return common_param + +- if not lat_sec: +- continue + +- for key_suffix, key_template in latency_keys.items(): +- if key_template in lat_sec and lat_sec[key_template].isdecimal(): +- common_param[io_type][key_template] = int(lat_sec[key_template]) ++def read_config_iodump(config): ++ """read config file, get [iodump] section value""" ++ common_param = {} ++ section_name = "iodump" ++ if not config.has_section(section_name): ++ report_alarm_fail(f"Cannot find {section_name} section in config file") + +- return common_param ++ return get_section_value(section_name, config) + + +-def read_config_stage(config, stage, iotype_list): +- """read config file, get [STAGE_NAME] section value""" ++def read_config_stage(config, stage, iotype_list, curr_disk_type): ++ """read config file, get [STAGE_NAME_diskType] section value""" + res = {} +- if not stage in config: ++ section_name = f"{stage}_{curr_disk_type}" ++ if not config.has_section(section_name): + return res + +- for key in config[stage]: ++ for key in config[section_name]: + if config[stage][key].isdecimal(): + res[key] = int(config[stage][key]) + +@@ -171,11 +148,12 @@ def init_io_win(io_dic, config, common_param): + for disk_name in io_dic["disk_list"]: + io_data[disk_name] = {} + io_avg_value[disk_name] = {} ++ curr_disk_type = get_disk_type_by_name(disk_name) + for stage_name in io_dic["stage_list"]: + io_data[disk_name][stage_name] = {} + io_avg_value[disk_name][stage_name] = {} +- # step3. 解析stage配置 +- curr_stage_param = read_config_stage(config, stage_name, iotype_list) ++ # 解析stage配置 ++ curr_stage_param = read_config_stage(config, stage_name, iotype_list, curr_disk_type) + for rw in iotype_list: + io_data[disk_name][stage_name][rw] = {} + io_avg_value[disk_name][stage_name][rw] = [0, 0] +@@ -187,10 +165,10 @@ def init_io_win(io_dic, config, common_param): + iodump_lim_key = "{}_iodump_lim".format(rw) + + # 获取值,优先从 curr_stage_param 获取,如果不存在,则从 common_param 获取 +- avg_lim_value = curr_stage_param.get(avg_lim_key, common_param.get(rw, {}).get(avg_lim_key)) +- avg_time_value = curr_stage_param.get(avg_time_key, common_param.get(rw, {}).get(avg_time_key)) +- tot_lim_value = curr_stage_param.get(tot_lim_key, common_param.get(rw, {}).get(tot_lim_key)) +- iodump_lim_value = curr_stage_param.get(iodump_lim_key, common_param.get(rw, {}).get(iodump_lim_key)) ++ avg_lim_value = curr_stage_param.get(avg_lim_key, common_param.get(curr_disk_type, {}).get(avg_lim_key)) ++ avg_time_value = curr_stage_param.get(avg_time_key, common_param.get(curr_disk_type, {}).get(avg_time_key)) ++ tot_lim_value = curr_stage_param.get(tot_lim_key, common_param.get(curr_disk_type, {}).get(tot_lim_key)) ++ iodump_lim_value = curr_stage_param.get(iodump_lim_key, common_param.get("iodump", {}).get(iodump_lim_key)) + + if avg_lim_value and avg_time_value and tot_lim_value: + io_data[disk_name][stage_name][rw]["latency"] = IoWindow(window_size=io_dic["win_size"], window_threshold=io_dic["win_threshold"], abnormal_multiple=avg_time_value, abnormal_multiple_lim=avg_lim_value, abnormal_time=tot_lim_value) +@@ -217,28 +195,21 @@ def get_valid_disk_stage_list(io_dic, config_disk, config_stage): + stage_list = [key for key in all_stage_set if key in config_stage] + not_in_stage_list = [key for key in config_stage if key not in all_stage_set] + +- if not config_disk: ++ if not_in_stage_list: ++ report_alarm_fail(f"Invalid common.stage_list config, cannot set {not_in_stage_list}") ++ ++ if not config_disk and not not_in_disk_list: + disk_list = [key for key in all_disk_set] + +- if not config_stage: ++ if not config_stage and not not_in_stage_list: + stage_list = [key for key in all_stage_set] + + disk_list = disk_list[:10] if len(disk_list) > 10 else disk_list +- stage_list = stage_list[:15] if len(stage_list) > 15 else stage_list +- +- if config_disk and not disk_list: +- logging.warning("Cannot get valid disk by disk={}, set to default".format(config_disk)) +- disk_list, stage_list = get_valid_disk_stage_list(io_dic, [], config_stage) +- +- if config_stage and not stage_list: +- logging.warning("Cannot get valid stage by stage={}, set to default".format(config_stage)) +- disk_list, stage_list = get_valid_disk_stage_list(io_dic, config_disk, []) + + if not stage_list or not disk_list: + report_alarm_fail("Cannot get valid disk name or stage name.") + + log_invalid_keys(not_in_disk_list, 'disk', config_disk, disk_list) +- log_invalid_keys(not_in_stage_list, 'stage', config_stage, stage_list) + + return disk_list, stage_list + +@@ -310,8 +281,13 @@ def main(): + # step1. 解析公共配置 --- algorithm + io_dic["win_size"], io_dic["win_threshold"] = read_config_algorithm(config) + +- # step2. 循环创建窗口 +- common_param = read_config_lat_iodump(io_dic, config) ++ # step2. 解析公共配置 --- latency_xxx ++ common_param = read_config_latency(config) ++ ++ # step3. 解析公共配置 --- iodump ++ common_param['iodump'] = read_config_iodump(config) ++ ++ # step4. 循环创建窗口 + io_data, io_avg_value = init_io_win(io_dic, config, common_param) + + main_loop(io_dic, io_data, io_avg_value) +diff --git a/src/python/sentryPlugins/avg_block_io/module_conn.py b/src/python/sentryPlugins/avg_block_io/module_conn.py +index 40b3fcc..8d6f429 100644 +--- a/src/python/sentryPlugins/avg_block_io/module_conn.py ++++ b/src/python/sentryPlugins/avg_block_io/module_conn.py +@@ -14,7 +14,7 @@ import sys + import time + + from .utils import is_abnormal, get_win_data, log_slow_win +-from sentryCollector.collect_plugin import is_iocollect_valid, get_io_data, Result_Messages ++from sentryCollector.collect_plugin import is_iocollect_valid, get_io_data, Result_Messages, get_disk_type, Disk_Type + from syssentry.result import ResultLevel, report_result + from xalarm.sentry_notify import xalarm_report, MINOR_ALM, ALARM_TYPE_OCCUR + +@@ -51,7 +51,7 @@ def check_result_validation(res, reason): + try: + json_data = json.loads(res['message']) + except json.JSONDecodeError: +- err_msg = "Failed to {}: invalid return message".format(reason) ++ err_msg = f"Failed to {reason}: invalid return message" + report_alarm_fail(err_msg) + + return json_data +@@ -60,7 +60,7 @@ def check_result_validation(res, reason): + def report_alarm_fail(alarm_info): + """report result to xalarmd""" + report_result(TASK_NAME, ResultLevel.FAIL, json.dumps({"msg": alarm_info})) +- logging.error(alarm_info) ++ logging.critical(alarm_info) + sys.exit(1) + + +@@ -114,3 +114,16 @@ def process_report_data(disk_name, rw, io_data): + + log_slow_win(msg, "unknown") + xalarm_report(1002, MINOR_ALM, ALARM_TYPE_OCCUR, json.dumps(msg)) ++ ++ ++def get_disk_type_by_name(disk_name): ++ res = get_disk_type(disk_name) ++ disk_type_str = check_result_validation(get_disk_type(disk_name), f'Invalid disk type {disk_name}') ++ try: ++ curr_disk_type = int(disk_type_str) ++ if curr_disk_type not in Disk_Type: ++ raise ValueError ++ except ValueError: ++ report_alarm_fail(f"Failed to get disk type for {disk_name}") ++ ++ return Disk_Type[curr_disk_type] +\ No newline at end of file +diff --git a/src/python/sentryPlugins/avg_block_io/utils.py b/src/python/sentryPlugins/avg_block_io/utils.py +index 3b7f027..cef1edd 100644 +--- a/src/python/sentryPlugins/avg_block_io/utils.py ++++ b/src/python/sentryPlugins/avg_block_io/utils.py +@@ -26,6 +26,49 @@ LogLevel = { + } + + ++DEFAULT_PARAM = { ++ 'latency_nvme_ssd': { ++ 'read_avg_lim': 300, ++ 'write_avg_lim': 300, ++ 'read_avg_time': 3, ++ 'write_avg_time': 3, ++ 'read_tot_lim': 500, ++ 'write_tot_lim': 500, ++ }, 'latency_sata_ssd' : { ++ 'read_avg_lim': 10000, ++ 'write_avg_lim': 10000, ++ 'read_avg_time': 3, ++ 'write_avg_time': 3, ++ 'read_tot_lim': 50000, ++ 'write_tot_lim': 50000, ++ }, 'latency_sata_hdd' : { ++ 'read_avg_lim': 15000, ++ 'write_avg_lim': 15000, ++ 'read_avg_time': 3, ++ 'write_avg_time': 3, ++ 'read_tot_lim': 50000, ++ 'write_tot_lim': 50000 ++ }, 'iodump': { ++ 'read_iodump_lim': 0, ++ 'write_iodump_lim': 0 ++ } ++} ++ ++ ++def get_section_value(section_name, config): ++ common_param = {} ++ config_sec = config[section_name] ++ for config_key in DEFAULT_PARAM[section_name]: ++ if config_key in config_sec: ++ if not config_sec[config_key].isdecimal(): ++ report_alarm_fail(f"Invalid {section_name}.{config_key} config.") ++ common_param[config_key] = int(config_sec[config_key]) ++ else: ++ logging.warning(f"Unset {section_name}.{config_key} in config file, use {DEFAULT_PARAM[section_name][config_key]} as default") ++ common_param[config_key] = DEFAULT_PARAM[section_name][config_key] ++ return common_param ++ ++ + def get_log_level(filename): + if not os.path.exists(filename): + return logging.INFO +-- +2.27.0 diff --git a/sysSentry.spec b/sysSentry.spec index c015d6a4023fa21fa792f4c5d8643ab433b3102e..893ed0fd256fd7a04da730b03108b5518854639d 100644 --- a/sysSentry.spec +++ b/sysSentry.spec @@ -4,7 +4,7 @@ Summary: System Inspection Framework Name: sysSentry Version: 1.0.2 -Release: 32 +Release: 33 License: Mulan PSL v2 Group: System Environment/Daemons Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz @@ -44,6 +44,7 @@ Patch31: xalarm-add-alarm-msg-length-to-8192.patch Patch32: add-log-for-improving-maintainability.patch Patch33: add-get_disk_type-and-fix-some-bugs.patch Patch34: ai_block_io-adapt-alarm-module.patch +Patch35: diff-disk-type-use-diff-config.patch BuildRequires: cmake gcc-c++ BuildRequires: python3 python3-setuptools @@ -298,6 +299,12 @@ rm -rf %{buildroot} %attr(0550,root,root) %{python3_sitelib}/sentryPlugins/ai_block_io %changelog +* Thu Oct 11 2024 gaoruoshu - 1.0.2-33 +- Type:requirement +- CVE:NA +- SUG:NA +- DESC:avg_block_io adapt different type of disk, use different config + * Thu Oct 10 2024 heyouzhi - 1.0.2-32 - Type:requirement - CVE:NA