diff --git a/sysSentry-1.0.2/src/python/sentryPlugins/avg_block_io/avg_block_io.py b/sysSentry-1.0.2/src/python/sentryPlugins/avg_block_io/avg_block_io.py index f3ade09686a8afc1960ba149338afe0002fe53c1..cd47919c3fcae03075dc5f7340718a2f0abaad42 100644 --- a/sysSentry-1.0.2/src/python/sentryPlugins/avg_block_io/avg_block_io.py +++ b/sysSentry-1.0.2/src/python/sentryPlugins/avg_block_io/avg_block_io.py @@ -13,132 +13,13 @@ import signal import configparser import time +from .config import read_config_log, read_config_common, read_config_algorithm, read_config_latency, read_config_iodump, read_config_stage from .stage_window import IoWindow, IoDumpWindow from .module_conn import avg_is_iocollect_valid, avg_get_io_data, report_alarm_fail, process_report_data, sig_handler, get_disk_type_by_name -from .utils import update_avg_and_check_abnormal, get_log_level, get_section_value -from sentryCollector.collect_plugin import Disk_Type +from .utils import update_avg_and_check_abnormal CONFIG_FILE = "/etc/sysSentry/plugins/avg_block_io.ini" -def log_invalid_keys(not_in_list, keys_name, config_list, default_list): - """print invalid log""" - if config_list and not_in_list: - logging.warning("{} in common.{} are not valid, set {}={}".format(not_in_list, keys_name, keys_name, default_list)) - elif config_list == ["default"]: - logging.warning("Default {} use {}".format(keys_name, default_list)) - - -def read_config_common(config): - """read config file, get [common] section value""" - if not config.has_section("common"): - report_alarm_fail("Cannot find common section in config file") - - try: - disk_name = config.get("common", "disk") - disk = [] if disk_name == "default" else disk_name.split(",") - except configparser.NoOptionError: - disk = [] - logging.warning("Unset common.disk, set to default") - - try: - stage_name = config.get("common", "stage") - stage = [] if stage_name == "default" else stage_name.split(",") - except configparser.NoOptionError: - stage = [] - logging.warning("Unset common.stage, set to default") - - if len(disk) > 10: - logging.warning("Too many common.disks, record only max 10 disks") - disk = disk[:10] - - try: - iotype_name = config.get("common", "iotype").split(",") - iotype_list = [rw.lower() for rw in iotype_name if rw.lower() in ['read', 'write']] - err_iotype = [rw.lower() for rw in iotype_name if rw.lower() not in ['read', 'write']] - - if err_iotype: - report_alarm_fail("Invalid common.iotype config") - - except configparser.NoOptionError: - iotype_list = ["read", "write"] - logging.warning("Unset common.iotype, set to read,write") - - try: - period_time = int(config.get("common", "period_time")) - if not (1 <= period_time <= 300): - raise ValueError("Invalid period_time") - except ValueError: - report_alarm_fail("Invalid common.period_time") - except configparser.NoOptionError: - period_time = 1 - logging.warning("Unset common.period_time, use 1s as default") - - return period_time, disk, stage, iotype_list - - -def read_config_algorithm(config): - """read config file, get [algorithm] section value""" - if not config.has_section("algorithm"): - report_alarm_fail("Cannot find algorithm section in config file") - - try: - win_size = int(config.get("algorithm", "win_size")) - if not (1 <= win_size <= 300): - raise ValueError("Invalid algorithm.win_size") - except ValueError: - report_alarm_fail("Invalid algorithm.win_size config") - except configparser.NoOptionError: - win_size = 30 - logging.warning("Unset algorithm.win_size, use 30 as default") - - try: - win_threshold = int(config.get("algorithm", "win_threshold")) - if win_threshold < 1 or win_threshold > 300 or win_threshold > win_size: - raise ValueError("Invalid algorithm.win_threshold") - except ValueError: - report_alarm_fail("Invalid algorithm.win_threshold config") - except configparser.NoOptionError: - win_threshold = 6 - logging.warning("Unset algorithm.win_threshold, use 6 as default") - - return win_size, win_threshold - - -def read_config_latency(config): - """read config file, get [latency_xxx] section value""" - common_param = {} - for type_name in Disk_Type: - section_name = f"latency_{Disk_Type[type_name]}" - if not config.has_section(section_name): - report_alarm_fail(f"Cannot find {section_name} section in config file") - - common_param[Disk_Type[type_name]] = get_section_value(section_name, config) - return common_param - - -def read_config_iodump(config): - """read config file, get [iodump] section value""" - common_param = {} - section_name = "iodump" - if not config.has_section(section_name): - report_alarm_fail(f"Cannot find {section_name} section in config file") - - return get_section_value(section_name, config) - - -def read_config_stage(config, stage, iotype_list, curr_disk_type): - """read config file, get [STAGE_NAME_diskType] section value""" - res = {} - section_name = f"{stage}_{curr_disk_type}" - if not config.has_section(section_name): - return res - - for key in config[section_name]: - if config[stage][key].isdecimal(): - res[key] = int(config[stage][key]) - - return res - def init_io_win(io_dic, config, common_param): """initialize windows of latency, iodump, and dict of avg_value""" @@ -192,24 +73,33 @@ def get_valid_disk_stage_list(io_dic, config_disk, config_stage): disk_list = [key for key in all_disk_set if key in config_disk] not_in_disk_list = [key for key in config_disk if key not in all_disk_set] + if not config_disk and not not_in_disk_list: + disk_list = [key for key in all_disk_set] + + if not disk_list: + report_alarm_fail("Cannot get valid disk name") + + disk_list = disk_list[:10] if len(disk_list) > 10 else disk_list + + if not config_disk: + logging.info(f"Default common.disk using disk={disk_list}") + elif sorted(disk_list) != sorted(config_disk): + logging.warning(f"Set common.disk to {disk_list}") + stage_list = [key for key in all_stage_set if key in config_stage] not_in_stage_list = [key for key in config_stage if key not in all_stage_set] if not_in_stage_list: report_alarm_fail(f"Invalid common.stage_list config, cannot set {not_in_stage_list}") - if not config_disk and not not_in_disk_list: - disk_list = [key for key in all_disk_set] - - if not config_stage and not not_in_stage_list: + if not config_stage: stage_list = [key for key in all_stage_set] - disk_list = disk_list[:10] if len(disk_list) > 10 else disk_list - - if not stage_list or not disk_list: - report_alarm_fail("Cannot get valid disk name or stage name.") + if not stage_list: + report_alarm_fail("Cannot get valid stage name.") - log_invalid_keys(not_in_disk_list, 'disk', config_disk, disk_list) + if not config_stage: + logging.info(f"Default common.stage using stage={stage_list}") return disk_list, stage_list @@ -254,9 +144,8 @@ def main(): signal.signal(signal.SIGINT, sig_handler) signal.signal(signal.SIGTERM, sig_handler) - log_level = get_log_level(CONFIG_FILE) + log_level = read_config_log(CONFIG_FILE) log_format = "%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s" - logging.basicConfig(level=log_level, format=log_format) # 初始化配置读取 @@ -274,6 +163,8 @@ def main(): # 采集模块对接,is_iocollect_valid() io_dic["disk_list"], io_dic["stage_list"] = get_valid_disk_stage_list(io_dic, disk, stage) + logging.debug(f"disk={io_dic['disk_list']}, stage={io_dic['stage_list']}") + if "bio" not in io_dic["stage_list"]: report_alarm_fail("Cannot run avg_block_io without bio stage") diff --git a/sysSentry-1.0.2/src/python/sentryPlugins/avg_block_io/config.py b/sysSentry-1.0.2/src/python/sentryPlugins/avg_block_io/config.py new file mode 100644 index 0000000000000000000000000000000000000000..c8f45ce5341b8b62c0edd4b8ab385aa1d5dd289d --- /dev/null +++ b/sysSentry-1.0.2/src/python/sentryPlugins/avg_block_io/config.py @@ -0,0 +1,208 @@ +import configparser +import logging +import os + +from .module_conn import report_alarm_fail +from sentryCollector.collect_plugin import Disk_Type + + +CONF_LOG = 'log' +CONF_LOG_LEVEL = 'level' +LogLevel = { + "debug": logging.DEBUG, + "info": logging.INFO, + "warning": logging.WARNING, + "error": logging.ERROR, + "critical": logging.CRITICAL +} + +CONF_COMMON = 'common' +CONF_COMMON_DISK = 'disk' +CONF_COMMON_STAGE = 'stage' +CONF_COMMON_IOTYPE = 'iotype' +CONF_COMMON_PER_TIME = 'period_time' + +CONF_ALGO = 'algorithm' +CONF_ALGO_SIZE = 'win_size' +CONF_ALGO_THRE = 'win_threshold' + +CONF_LATENCY = 'latency_{}' +CONF_IODUMP = 'iodump' + + +DEFAULT_PARAM = { + CONF_LOG: { + CONF_LOG_LEVEL: 'info' + }, CONF_COMMON: { + CONF_COMMON_DISK: 'default', + CONF_COMMON_STAGE: 'default', + CONF_COMMON_IOTYPE: 'read,write', + CONF_COMMON_PER_TIME: 1 + }, CONF_ALGO: { + CONF_ALGO_SIZE: 30, + CONF_ALGO_THRE: 6 + }, 'latency_nvme_ssd': { + 'read_avg_lim': 300, + 'write_avg_lim': 300, + 'read_avg_time': 3, + 'write_avg_time': 3, + 'read_tot_lim': 500, + 'write_tot_lim': 500, + }, 'latency_sata_ssd' : { + 'read_avg_lim': 10000, + 'write_avg_lim': 10000, + 'read_avg_time': 3, + 'write_avg_time': 3, + 'read_tot_lim': 50000, + 'write_tot_lim': 50000, + }, 'latency_sata_hdd' : { + 'read_avg_lim': 15000, + 'write_avg_lim': 15000, + 'read_avg_time': 3, + 'write_avg_time': 3, + 'read_tot_lim': 50000, + 'write_tot_lim': 50000 + }, CONF_IODUMP: { + 'read_iodump_lim': 0, + 'write_iodump_lim': 0 + } +} + + +def get_section_value(section_name, config): + common_param = {} + config_sec = config[section_name] + for config_key in DEFAULT_PARAM[section_name]: + if config_key in config_sec: + if not config_sec[config_key].isdecimal(): + report_alarm_fail(f"Invalid {section_name}.{config_key} config.") + common_param[config_key] = int(config_sec[config_key]) + else: + common_param[config_key] = DEFAULT_PARAM[section_name][config_key] + logging.warning(f"Unset {section_name}.{config_key} in config file, use {common_param[config_key]} as default") + return common_param + + +def read_config_log(filename): + """read config file, get [log] section value""" + default_log_level = DEFAULT_PARAM[CONF_LOG][CONF_LOG_LEVEL] + if not os.path.exists(filename): + return LogLevel.get(default_log_level) + + config = configparser.ConfigParser() + config.read(filename) + + log_level = config.get(CONF_LOG, CONF_LOG_LEVEL, fallback=default_log_level) + if log_level.lower() in LogLevel: + return LogLevel.get(log_level.lower()) + return LogLevel.get(default_log_level) + + +def read_config_common(config): + """read config file, get [common] section value""" + if not config.has_section(CONF_COMMON): + report_alarm_fail(f"Cannot find {CONF_COMMON} section in config file") + + try: + disk_name = config.get(CONF_COMMON, CONF_COMMON_DISK).lower() + disk = [] if disk_name == "default" else disk_name.split(",") + except configparser.NoOptionError: + disk = [] + logging.warning(f"Unset {CONF_COMMON}.{CONF_COMMON_DISK}, set to default") + + try: + stage_name = config.get(CONF_COMMON, CONF_COMMON_STAGE).lower() + stage = [] if stage_name == "default" else stage_name.split(",") + except configparser.NoOptionError: + stage = [] + logging.warning(f"Unset {CONF_COMMON}.{CONF_COMMON_STAGE}, set to default") + + if len(disk) > 10: + logging.warning(f"Too many {CONF_COMMON}.disks, record only max 10 disks") + disk = disk[:10] + + try: + iotype_name = config.get(CONF_COMMON, CONF_COMMON_IOTYPE).lower().split(",") + iotype_list = [rw.lower() for rw in iotype_name if rw.lower() in ['read', 'write']] + err_iotype = [rw.lower() for rw in iotype_name if rw.lower() not in ['read', 'write']] + + if err_iotype: + report_alarm_fail(f"Invalid {CONF_COMMON}.{CONF_COMMON_IOTYPE} config") + + except configparser.NoOptionError: + iotype_list = DEFAULT_PARAM[CONF_COMMON][CONF_COMMON_IOTYPE] + logging.warning(f"Unset {CONF_COMMON}.{CONF_COMMON_IOTYPE}, use {iotupe_list} as default") + + try: + period_time = int(config.get(CONF_COMMON, CONF_COMMON_PER_TIME)) + if not (1 <= period_time <= 300): + raise ValueError("Invalid period_time") + except ValueError: + report_alarm_fail(f"Invalid {CONF_COMMON}.{CONF_COMMON_PER_TIME}") + except configparser.NoOptionError: + period_time = DEFAULT_PARAM[CONF_COMMON][CONF_COMMON_PER_TIME] + logging.warning(f"Unset {CONF_COMMON}.{CONF_COMMON_PER_TIME}, use {period_time} as default") + + return period_time, disk, stage, iotype_list + + +def read_config_algorithm(config): + """read config file, get [algorithm] section value""" + if not config.has_section(CONF_ALGO): + report_alarm_fail(f"Cannot find {CONF_ALGO} section in config file") + + try: + win_size = int(config.get(CONF_ALGO, CONF_ALGO_SIZE)) + if not (1 <= win_size <= 300): + raise ValueError(f"Invalid {CONF_ALGO}.{CONF_ALGO_SIZE}") + except ValueError: + report_alarm_fail(f"Invalid {CONF_ALGO}.{CONF_ALGO_SIZE} config") + except configparser.NoOptionError: + win_size = DEFAULT_PARAM[CONF_ALGO][CONF_ALGO_SIZE] + logging.warning(f"Unset {CONF_ALGO}.{CONF_ALGO_SIZE}, use {win_size} as default") + + try: + win_threshold = int(config.get(CONF_ALGO, CONF_ALGO_THRE)) + if win_threshold < 1 or win_threshold > 300 or win_threshold > win_size: + raise ValueError(f"Invalid {CONF_ALGO}.{CONF_ALGO_THRE}") + except ValueError: + report_alarm_fail(f"Invalid {CONF_ALGO}.{CONF_ALGO_THRE} config") + except configparser.NoOptionError: + win_threshold = DEFAULT_PARAM[CONF_ALGO]['win_threshold'] + logging.warning(f"Unset {CONF_ALGO}.{CONF_ALGO_THRE}, use {win_threshold} as default") + + return win_size, win_threshold + + +def read_config_latency(config): + """read config file, get [latency_xxx] section value""" + common_param = {} + for type_name in Disk_Type: + section_name = CONF_LATENCY.format(Disk_Type[type_name]) + if not config.has_section(section_name): + report_alarm_fail(f"Cannot find {section_name} section in config file") + + common_param[Disk_Type[type_name]] = get_section_value(section_name, config) + return common_param + + +def read_config_iodump(config): + """read config file, get [iodump] section value""" + if not config.has_section(CONF_IODUMP): + report_alarm_fail(f"Cannot find {CONF_IODUMP} section in config file") + + return get_section_value(CONF_IODUMP, config) + + +def read_config_stage(config, stage, iotype_list, curr_disk_type): + """read config file, get [STAGE_NAME_diskType] section value""" + res = {} + section_name = f"{stage}_{curr_disk_type}" + if not config.has_section(section_name): + return res + + for key in config[section_name]: + if config[stage][key].isdecimal(): + res[key] = int(config[stage][key]) + + return res diff --git a/sysSentry-1.0.2/src/python/sentryPlugins/avg_block_io/module_conn.py b/sysSentry-1.0.2/src/python/sentryPlugins/avg_block_io/module_conn.py index 8d6f429a14252824bf0c4f163754ad5ac712f99f..cbdaad4476309af440b1fdda65ed53a5934b8323 100644 --- a/sysSentry-1.0.2/src/python/sentryPlugins/avg_block_io/module_conn.py +++ b/sysSentry-1.0.2/src/python/sentryPlugins/avg_block_io/module_conn.py @@ -29,12 +29,16 @@ def sig_handler(signum, _f): def avg_get_io_data(io_dic): """get_io_data from sentryCollector""" + logging.debug(f"send to sentryCollector get_io_data: period={io_dic['period_time']}, " + f"disk={io_dic['disk_list']}, stage={io_dic['stage_list']}, iotype={io_dic['iotype_list']}") res = get_io_data(io_dic["period_time"], io_dic["disk_list"], io_dic["stage_list"], io_dic["iotype_list"]) return check_result_validation(res, 'get io data') def avg_is_iocollect_valid(io_dic, config_disk, config_stage): """is_iocollect_valid from sentryCollector""" + logging.debug(f"send to sentryCollector is_iocollect_valid: period={io_dic['period_time']}, " + f"disk={config_disk}, stage={config_stage}") res = is_iocollect_valid(io_dic["period_time"], config_disk, config_stage) return check_result_validation(res, 'check config validation') @@ -79,7 +83,7 @@ def process_report_data(disk_name, rw, io_data): # io press ctrl_stage = ['throtl', 'wbt', 'iocost', 'bfq'] for stage_name in ctrl_stage: - abnormal, abnormal_list = is_abnormal((disk_name, 'bio', rw), io_data) + abnormal, abnormal_list = is_abnormal((disk_name, stage_name, rw), io_data) if not abnormal: continue msg["reason"] = "IO press" @@ -117,6 +121,7 @@ def process_report_data(disk_name, rw, io_data): def get_disk_type_by_name(disk_name): + logging.debug(f"send to sentryCollector get_disk_type: disk_name={disk_name}") res = get_disk_type(disk_name) disk_type_str = check_result_validation(get_disk_type(disk_name), f'Invalid disk type {disk_name}') try: @@ -126,4 +131,4 @@ def get_disk_type_by_name(disk_name): except ValueError: report_alarm_fail(f"Failed to get disk type for {disk_name}") - return Disk_Type[curr_disk_type] \ No newline at end of file + return Disk_Type[curr_disk_type] diff --git a/sysSentry-1.0.2/src/python/sentryPlugins/avg_block_io/utils.py b/sysSentry-1.0.2/src/python/sentryPlugins/avg_block_io/utils.py index c381c078c1e6de36673392e4d8df5928bd4f672e..1bfd4e82aa2283326bdfd5a20ca25cf0f56fcfb6 100644 --- a/sysSentry-1.0.2/src/python/sentryPlugins/avg_block_io/utils.py +++ b/sysSentry-1.0.2/src/python/sentryPlugins/avg_block_io/utils.py @@ -8,84 +8,12 @@ # IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR # PURPOSE. # See the Mulan PSL v2 for more details. -import configparser import logging import os AVG_VALUE = 0 AVG_COUNT = 1 -CONF_LOG = 'log' -CONF_LOG_LEVEL = 'level' -LogLevel = { - "debug": logging.DEBUG, - "info": logging.INFO, - "warning": logging.WARNING, - "error": logging.ERROR, - "critical": logging.CRITICAL -} - - -DEFAULT_PARAM = { - 'latency_nvme_ssd': { - 'read_avg_lim': 300, - 'write_avg_lim': 300, - 'read_avg_time': 3, - 'write_avg_time': 3, - 'read_tot_lim': 500, - 'write_tot_lim': 500, - }, 'latency_sata_ssd' : { - 'read_avg_lim': 10000, - 'write_avg_lim': 10000, - 'read_avg_time': 3, - 'write_avg_time': 3, - 'read_tot_lim': 50000, - 'write_tot_lim': 50000, - }, 'latency_sata_hdd' : { - 'read_avg_lim': 15000, - 'write_avg_lim': 15000, - 'read_avg_time': 3, - 'write_avg_time': 3, - 'read_tot_lim': 50000, - 'write_tot_lim': 50000 - }, 'iodump': { - 'read_iodump_lim': 0, - 'write_iodump_lim': 0 - } -} - - -def get_section_value(section_name, config): - common_param = {} - config_sec = config[section_name] - for config_key in DEFAULT_PARAM[section_name]: - if config_key in config_sec: - if not config_sec[config_key].isdecimal(): - report_alarm_fail(f"Invalid {section_name}.{config_key} config.") - common_param[config_key] = int(config_sec[config_key]) - else: - logging.warning(f"Unset {section_name}.{config_key} in config file, use {DEFAULT_PARAM[section_name][config_key]} as default") - common_param[config_key] = DEFAULT_PARAM[section_name][config_key] - return common_param - - -def get_log_level(filename): - if not os.path.exists(filename): - return logging.INFO - - try: - config = configparser.ConfigParser() - config.read(filename) - if not config.has_option(CONF_LOG, CONF_LOG_LEVEL): - return logging.INFO - log_level = config.get(CONF_LOG, CONF_LOG_LEVEL) - - if log_level.lower() in LogLevel: - return LogLevel.get(log_level.lower()) - return logging.INFO - except configparser.Error: - return logging.INFO - def get_nested_value(data, keys): """get data from nested dict"""