From 40b7bdcf11009fe16a80dfa06d3bd7aeeed5b343 Mon Sep 17 00:00:00 2001 From: heyouzhi <1037617413@qq.com> Date: Fri, 20 Sep 2024 16:46:31 +0800 Subject: [PATCH] refactor: add __init__ and format code --- .../ai_threshold_slow_io_detection.ini | 30 +- .../tasks/ai_threshold_slow_io_detection.mod | 8 +- .../ai_threshold_slow_io_detection/README.md | 2 +- .../__init__.py | 0 .../alarm_report.py | 109 +++--- .../config_parser.py | 330 +++++++++-------- .../data_access.py | 183 +++++----- .../detector.py | 105 +++--- .../ai_threshold_slow_io_detection/io_data.py | 156 ++++---- .../sliding_window.py | 230 ++++++------ .../slow_io_detection.py | 289 ++++++++------- .../threshold.py | 341 ++++++++++-------- .../ai_threshold_slow_io_detection/utils.py | 140 +++---- 13 files changed, 1027 insertions(+), 896 deletions(-) create mode 100644 sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/__init__.py diff --git a/sysSentry-1.0.2/config/plugins/ai_threshold_slow_io_detection.ini b/sysSentry-1.0.2/config/plugins/ai_threshold_slow_io_detection.ini index 31b3664..4ceb90f 100644 --- a/sysSentry-1.0.2/config/plugins/ai_threshold_slow_io_detection.ini +++ b/sysSentry-1.0.2/config/plugins/ai_threshold_slow_io_detection.ini @@ -1,16 +1,16 @@ -[common] -absolute_threshold=40 -slow_io_detect_frequency=1 -log_level=info - -[algorithm] -train_data_duration=0.1 -train_update_duration=0.02 -algorithm_type=boxplot -boxplot_parameter=1.5 -n_sigma_parameter=3 - -[sliding_window] -sliding_window_type=not_continuous -window_size=30 +[common] +absolute_threshold=40 +slow_io_detect_frequency=1 +log_level=info + +[algorithm] +train_data_duration=0.1 +train_update_duration=0.02 +algorithm_type=boxplot +boxplot_parameter=1.5 +n_sigma_parameter=3 + +[sliding_window] +sliding_window_type=not_continuous +window_size=30 window_minimum_threshold=6 \ No newline at end of file diff --git a/sysSentry-1.0.2/config/tasks/ai_threshold_slow_io_detection.mod b/sysSentry-1.0.2/config/tasks/ai_threshold_slow_io_detection.mod index 26b7c5d..2729f72 100644 --- a/sysSentry-1.0.2/config/tasks/ai_threshold_slow_io_detection.mod +++ b/sysSentry-1.0.2/config/tasks/ai_threshold_slow_io_detection.mod @@ -1,5 +1,5 @@ -[common] -enabled=yes -task_start=/usr/bin/python3 /usr/bin/ai_threshold_slow_io_detection -task_stop=pkill -f /usr/bin/ai_threshold_slow_io_detection +[common] +enabled=yes +task_start=/usr/bin/python3 /usr/bin/ai_threshold_slow_io_detection +task_stop=pkill -f /usr/bin/ai_threshold_slow_io_detection type=oneshot \ No newline at end of file diff --git a/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/README.md b/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/README.md index 938342e..95c1111 100644 --- a/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/README.md +++ b/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/README.md @@ -1 +1 @@ -# slow_io_detection +# slow_io_detection diff --git a/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/__init__.py b/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/alarm_report.py b/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/alarm_report.py index c63945a..da25713 100644 --- a/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/alarm_report.py +++ b/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/alarm_report.py @@ -1,49 +1,60 @@ -# coding: utf-8 -# Copyright (c) 2024 Huawei Technologies Co., Ltd. -# sysSentry is licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. - -from syssentry.result import ResultLevel, report_result -import logging -import json - - -class AlarmReport: - TASK_NAME = "SLOW_IO_DETECTION" - - @staticmethod - def report_pass(info: str): - report_result(AlarmReport.TASK_NAME, ResultLevel.PASS, json.dumps({"msg": info})) - logging.info(f'Report {AlarmReport.TASK_NAME} PASS: {info}') - - @staticmethod - def report_fail(info: str): - report_result(AlarmReport.TASK_NAME, ResultLevel.FAIL, json.dumps({"msg": info})) - logging.info(f'Report {AlarmReport.TASK_NAME} FAIL: {info}') - - @staticmethod - def report_skip(info: str): - report_result(AlarmReport.TASK_NAME, ResultLevel.SKIP, json.dumps({"msg": info})) - logging.info(f'Report {AlarmReport.TASK_NAME} SKIP: {info}') - - @staticmethod - def report_minor_alm(info: str): - report_result(AlarmReport.TASK_NAME, ResultLevel.MINOR_ALM, json.dumps({"msg": info})) - logging.info(f'Report {AlarmReport.TASK_NAME} MINOR_ALM: {info}') - - @staticmethod - def report_major_alm(info: str): - report_result(AlarmReport.TASK_NAME, ResultLevel.MAJOR_ALM, json.dumps({"msg": info})) - logging.info(f'Report {AlarmReport.TASK_NAME} MAJOR_ALM: {info}') - - @staticmethod - def report_critical_alm(info: str): - report_result(AlarmReport.TASK_NAME, ResultLevel.CRITICAL_ALM, json.dumps({"msg": info})) - logging.info(f'Report {AlarmReport.TASK_NAME} CRITICAL_ALM: {info}') - +# coding: utf-8 +# Copyright (c) 2024 Huawei Technologies Co., Ltd. +# sysSentry is licensed under the Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +# PURPOSE. +# See the Mulan PSL v2 for more details. + +from syssentry.result import ResultLevel, report_result +import logging +import json + + +class AlarmReport: + TASK_NAME = "SLOW_IO_DETECTION" + + @staticmethod + def report_pass(info: str): + report_result( + AlarmReport.TASK_NAME, ResultLevel.PASS, json.dumps({"msg": info}) + ) + logging.info(f"Report {AlarmReport.TASK_NAME} PASS: {info}") + + @staticmethod + def report_fail(info: str): + report_result( + AlarmReport.TASK_NAME, ResultLevel.FAIL, json.dumps({"msg": info}) + ) + logging.info(f"Report {AlarmReport.TASK_NAME} FAIL: {info}") + + @staticmethod + def report_skip(info: str): + report_result( + AlarmReport.TASK_NAME, ResultLevel.SKIP, json.dumps({"msg": info}) + ) + logging.info(f"Report {AlarmReport.TASK_NAME} SKIP: {info}") + + @staticmethod + def report_minor_alm(info: str): + report_result( + AlarmReport.TASK_NAME, ResultLevel.MINOR_ALM, json.dumps({"msg": info}) + ) + logging.info(f"Report {AlarmReport.TASK_NAME} MINOR_ALM: {info}") + + @staticmethod + def report_major_alm(info: str): + report_result( + AlarmReport.TASK_NAME, ResultLevel.MAJOR_ALM, json.dumps({"msg": info}) + ) + logging.info(f"Report {AlarmReport.TASK_NAME} MAJOR_ALM: {info}") + + @staticmethod + def report_critical_alm(info: str): + report_result( + AlarmReport.TASK_NAME, ResultLevel.CRITICAL_ALM, json.dumps({"msg": info}) + ) + logging.info(f"Report {AlarmReport.TASK_NAME} CRITICAL_ALM: {info}") diff --git a/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/config_parser.py b/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/config_parser.py index d01f43c..d21cac7 100644 --- a/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/config_parser.py +++ b/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/config_parser.py @@ -1,141 +1,189 @@ -# coding: utf-8 -# Copyright (c) 2024 Huawei Technologies Co., Ltd. -# sysSentry is licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. - -import configparser -import logging - - -class ConfigParser: - - DEFAULT_ABSOLUTE_THRESHOLD = 40 - DEFAULT_SLOW_IO_DETECTION_FREQUENCY = 1 - DEFAULT_LOG_LEVEL = 'info' - DEFAULT_TRAIN_DATA_DURATION = 24 - DEFAULT_TRAIN_UPDATE_DURATION = 2 - DEFAULT_ALGORITHM_TYPE = 'boxplot' - DEFAULT_N_SIGMA_PARAMETER = 3 - DEFAULT_BOXPLOT_PARAMETER = 1.5 - DEFAULT_SLIDING_WINDOW_TYPE = 'not_continuous' - DEFAULT_WINDOW_SIZE = 30 - DEFAULT_WINDOW_MINIMUM_THRESHOLD = 6 - - def __init__(self, config_file_name): - self.__boxplot_parameter = None - self.__window_minimum_threshold = None - self.__window_size = None - self.__sliding_window_type = None - self.__n_sigma_parameter = None - self.__algorithm_type = None - self.__train_update_duration = None - self.__log_level = None - self.__slow_io_detect_frequency = None - self.__absolute_threshold = None - self.__train_data_duration = None - self.__config_file_name = config_file_name - - def read_config_from_file(self): - - con = configparser.ConfigParser() - con.read(self.__config_file_name, encoding='utf-8') - - items_common = dict(con.items('common')) - items_algorithm = dict(con.items('algorithm')) - items_sliding_window = dict(con.items('sliding_window')) - - try: - self.__absolute_threshold = int(items_common.get('absolute_threshold', - ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD)) - except ValueError: - self.__absolute_threshold = ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD - logging.warning('absolute threshold type conversion has error, use default value.') - - try: - self.__slow_io_detect_frequency = int(items_common.get('slow_io_detect_frequency', - ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY)) - except ValueError: - self.__slow_io_detect_frequency = ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY - logging.warning('slow_io_detect_frequency type conversion has error, use default value.') - - self.__log_level = items_common.get('log_level', ConfigParser.DEFAULT_LOG_LEVEL) - - try: - self.__train_data_duration = float(items_algorithm.get('train_data_duration', - ConfigParser.DEFAULT_TRAIN_DATA_DURATION)) - except ValueError: - self.__train_data_duration = ConfigParser.DEFAULT_TRAIN_DATA_DURATION - logging.warning('train_data_duration type conversion has error, use default value.') - - try: - self.__train_update_duration = float(items_algorithm.get('train_update_duration', - ConfigParser.DEFAULT_TRAIN_UPDATE_DURATION)) - except ValueError: - self.__train_update_duration = ConfigParser.DEFAULT_TRAIN_UPDATE_DURATION - logging.warning('train_update_duration type conversion has error, use default value.') - - try: - self.__algorithm_type = items_algorithm.get('algorithm_type', ConfigParser.DEFAULT_ALGORITHM_TYPE) - except ValueError: - self.__algorithm_type = ConfigParser.DEFAULT_ALGORITHM_TYPE - logging.warning('algorithmType type conversion has error, use default value.') - - if self.__algorithm_type == 'n_sigma': - try: - self.__n_sigma_parameter = float(items_algorithm.get('n_sigma_parameter', - ConfigParser.DEFAULT_N_SIGMA_PARAMETER)) - except ValueError: - self.__n_sigma_parameter = ConfigParser.DEFAULT_N_SIGMA_PARAMETER - logging.warning('n_sigma_parameter type conversion has error, use default value.') - elif self.__algorithm_type == 'boxplot': - try: - self.__boxplot_parameter = float(items_algorithm.get('boxplot_parameter', - ConfigParser.DEFAULT_BOXPLOT_PARAMETER)) - except ValueError: - self.__boxplot_parameter = ConfigParser.DEFAULT_BOXPLOT_PARAMETER - logging.warning('boxplot_parameter type conversion has error, use default value.') - - self.__sliding_window_type = items_sliding_window.get('sliding_window_type', - ConfigParser.DEFAULT_SLIDING_WINDOW_TYPE) - - try: - self.__window_size = int(items_sliding_window.get('window_size', - ConfigParser.DEFAULT_WINDOW_SIZE)) - except ValueError: - self.__window_size = ConfigParser.DEFAULT_WINDOW_SIZE - logging.warning('window_size type conversion has error, use default value.') - - try: - self.__window_minimum_threshold = ( - int(items_sliding_window.get('window_minimum_threshold', - ConfigParser.DEFAULT_WINDOW_MINIMUM_THRESHOLD))) - except ValueError: - self.__window_minimum_threshold = ConfigParser.DEFAULT_WINDOW_MINIMUM_THRESHOLD - logging.warning('window_minimum_threshold type conversion has error, use default value.') - - def get_slow_io_detect_frequency(self): - return self.__slow_io_detect_frequency - - def get_algorithm_type(self): - return self.__algorithm_type - - def get_sliding_window_type(self): - return self.__sliding_window_type - - def get_train_data_duration_and_train_update_duration(self): - return self.__train_data_duration, self.__train_update_duration - - def get_window_size_and_window_minimum_threshold(self): - return self.__window_size, self.__window_minimum_threshold - - def get_absolute_threshold(self): - return self.__absolute_threshold - - def get_log_level(self): - return self.__log_level +# coding: utf-8 +# Copyright (c) 2024 Huawei Technologies Co., Ltd. +# sysSentry is licensed under the Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +# PURPOSE. +# See the Mulan PSL v2 for more details. + +import configparser +import logging + + +class ConfigParser: + + DEFAULT_ABSOLUTE_THRESHOLD = 40 + DEFAULT_SLOW_IO_DETECTION_FREQUENCY = 1 + DEFAULT_LOG_LEVEL = "info" + DEFAULT_TRAIN_DATA_DURATION = 24 + DEFAULT_TRAIN_UPDATE_DURATION = 2 + DEFAULT_ALGORITHM_TYPE = "boxplot" + DEFAULT_N_SIGMA_PARAMETER = 3 + DEFAULT_BOXPLOT_PARAMETER = 1.5 + DEFAULT_SLIDING_WINDOW_TYPE = "not_continuous" + DEFAULT_WINDOW_SIZE = 30 + DEFAULT_WINDOW_MINIMUM_THRESHOLD = 6 + + def __init__(self, config_file_name): + self.__boxplot_parameter = None + self.__window_minimum_threshold = None + self.__window_size = None + self.__sliding_window_type = None + self.__n_sigma_parameter = None + self.__algorithm_type = None + self.__train_update_duration = None + self.__log_level = None + self.__slow_io_detect_frequency = None + self.__absolute_threshold = None + self.__train_data_duration = None + self.__config_file_name = config_file_name + + def read_config_from_file(self): + + con = configparser.ConfigParser() + con.read(self.__config_file_name, encoding="utf-8") + + items_common = dict(con.items("common")) + items_algorithm = dict(con.items("algorithm")) + items_sliding_window = dict(con.items("sliding_window")) + + try: + self.__absolute_threshold = int( + items_common.get( + "absolute_threshold", ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD + ) + ) + except ValueError: + self.__absolute_threshold = ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD + logging.warning( + "absolute threshold type conversion has error, use default value." + ) + + try: + self.__slow_io_detect_frequency = int( + items_common.get( + "slow_io_detect_frequency", + ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY, + ) + ) + except ValueError: + self.__slow_io_detect_frequency = ( + ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY + ) + logging.warning( + "slow_io_detect_frequency type conversion has error, use default value." + ) + + self.__log_level = items_common.get("log_level", ConfigParser.DEFAULT_LOG_LEVEL) + + try: + self.__train_data_duration = float( + items_algorithm.get( + "train_data_duration", ConfigParser.DEFAULT_TRAIN_DATA_DURATION + ) + ) + except ValueError: + self.__train_data_duration = ConfigParser.DEFAULT_TRAIN_DATA_DURATION + logging.warning( + "train_data_duration type conversion has error, use default value." + ) + + try: + self.__train_update_duration = float( + items_algorithm.get( + "train_update_duration", ConfigParser.DEFAULT_TRAIN_UPDATE_DURATION + ) + ) + except ValueError: + self.__train_update_duration = ConfigParser.DEFAULT_TRAIN_UPDATE_DURATION + logging.warning( + "train_update_duration type conversion has error, use default value." + ) + + try: + self.__algorithm_type = items_algorithm.get( + "algorithm_type", ConfigParser.DEFAULT_ALGORITHM_TYPE + ) + except ValueError: + self.__algorithm_type = ConfigParser.DEFAULT_ALGORITHM_TYPE + logging.warning( + "algorithmType type conversion has error, use default value." + ) + + if self.__algorithm_type == "n_sigma": + try: + self.__n_sigma_parameter = float( + items_algorithm.get( + "n_sigma_parameter", ConfigParser.DEFAULT_N_SIGMA_PARAMETER + ) + ) + except ValueError: + self.__n_sigma_parameter = ConfigParser.DEFAULT_N_SIGMA_PARAMETER + logging.warning( + "n_sigma_parameter type conversion has error, use default value." + ) + elif self.__algorithm_type == "boxplot": + try: + self.__boxplot_parameter = float( + items_algorithm.get( + "boxplot_parameter", ConfigParser.DEFAULT_BOXPLOT_PARAMETER + ) + ) + except ValueError: + self.__boxplot_parameter = ConfigParser.DEFAULT_BOXPLOT_PARAMETER + logging.warning( + "boxplot_parameter type conversion has error, use default value." + ) + + self.__sliding_window_type = items_sliding_window.get( + "sliding_window_type", ConfigParser.DEFAULT_SLIDING_WINDOW_TYPE + ) + + try: + self.__window_size = int( + items_sliding_window.get( + "window_size", ConfigParser.DEFAULT_WINDOW_SIZE + ) + ) + except ValueError: + self.__window_size = ConfigParser.DEFAULT_WINDOW_SIZE + logging.warning("window_size type conversion has error, use default value.") + + try: + self.__window_minimum_threshold = int( + items_sliding_window.get( + "window_minimum_threshold", + ConfigParser.DEFAULT_WINDOW_MINIMUM_THRESHOLD, + ) + ) + except ValueError: + self.__window_minimum_threshold = ( + ConfigParser.DEFAULT_WINDOW_MINIMUM_THRESHOLD + ) + logging.warning( + "window_minimum_threshold type conversion has error, use default value." + ) + + def get_slow_io_detect_frequency(self): + return self.__slow_io_detect_frequency + + def get_algorithm_type(self): + return self.__algorithm_type + + def get_sliding_window_type(self): + return self.__sliding_window_type + + def get_train_data_duration_and_train_update_duration(self): + return self.__train_data_duration, self.__train_update_duration + + def get_window_size_and_window_minimum_threshold(self): + return self.__window_size, self.__window_minimum_threshold + + def get_absolute_threshold(self): + return self.__absolute_threshold + + def get_log_level(self): + return self.__log_level diff --git a/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/data_access.py b/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/data_access.py index a4db935..9f9ebc2 100644 --- a/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/data_access.py +++ b/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/data_access.py @@ -1,91 +1,92 @@ -# coding: utf-8 -# Copyright (c) 2024 Huawei Technologies Co., Ltd. -# sysSentry is licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. - -import json -import logging - -from sentryCollector.collect_plugin import ( - Result_Messages, - get_io_data, - is_iocollect_valid, -) -from .io_data import IOStageData, IOData - -COLLECT_STAGES = [ - "throtl", - "wbt", - "gettag", - "plug", - "bfq", - "hctx", - "requeue", - "rq_driver", - "bio", - "iocost", -] - -def check_collect_valid(period): - data_raw = is_iocollect_valid(period) - if data_raw["ret"] == 0: - try: - data = json.loads(data_raw["message"]) - except Exception as e: - logging.warning(f"get io data failed, {e}") - return [] - return [k for k in data.keys()] - else: - return [] - - -def _get_raw_data(period, disk_list): - return get_io_data( - period, - disk_list, - COLLECT_STAGES, - ["read", "write", "flush", "discard"], - ) - - -def _get_io_stage_data(data): - io_stage_data = IOStageData() - for data_type in ('read', 'write', 'flush', 'discard'): - if data_type in data: - getattr(io_stage_data, data_type).latency = data[data_type][0] - getattr(io_stage_data, data_type).io_dump = data[data_type][1] - getattr(io_stage_data, data_type).io_length = data[data_type][2] - getattr(io_stage_data, data_type).iops = data[data_type][3] - return io_stage_data - - -def get_io_data_from_collect_plug(period, disk_list): - data_raw = _get_raw_data(period, disk_list) - if data_raw["ret"] == 0: - ret = {} - try: - data = json.loads(data_raw["message"]) - except json.decoder.JSONDecodeError as e: - logging.warning(f"get io data failed, {e}") - return None - - for disk in data: - disk_data = data[disk] - disk_ret = IOData() - for k, v in disk_data.items(): - try: - getattr(disk_ret, k) - setattr(disk_ret, k, _get_io_stage_data(v)) - except AttributeError: - logging.debug(f'no attr {k}') - continue - ret[disk] = disk_ret - return ret - logging.warning(f'get io data failed with message: {data_raw["message"]}') - return None +# coding: utf-8 +# Copyright (c) 2024 Huawei Technologies Co., Ltd. +# sysSentry is licensed under the Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +# PURPOSE. +# See the Mulan PSL v2 for more details. + +import json +import logging + +from sentryCollector.collect_plugin import ( + Result_Messages, + get_io_data, + is_iocollect_valid, +) +from .io_data import IOStageData, IOData + +COLLECT_STAGES = [ + "throtl", + "wbt", + "gettag", + "plug", + "bfq", + "hctx", + "requeue", + "rq_driver", + "bio", + "iocost", +] + + +def check_collect_valid(period): + data_raw = is_iocollect_valid(period) + if data_raw["ret"] == 0: + try: + data = json.loads(data_raw["message"]) + except Exception as e: + logging.warning(f"get io data failed, {e}") + return [] + return [k for k in data.keys()] + else: + return [] + + +def _get_raw_data(period, disk_list): + return get_io_data( + period, + disk_list, + COLLECT_STAGES, + ["read", "write", "flush", "discard"], + ) + + +def _get_io_stage_data(data): + io_stage_data = IOStageData() + for data_type in ("read", "write", "flush", "discard"): + if data_type in data: + getattr(io_stage_data, data_type).latency = data[data_type][0] + getattr(io_stage_data, data_type).io_dump = data[data_type][1] + getattr(io_stage_data, data_type).io_length = data[data_type][2] + getattr(io_stage_data, data_type).iops = data[data_type][3] + return io_stage_data + + +def get_io_data_from_collect_plug(period, disk_list): + data_raw = _get_raw_data(period, disk_list) + if data_raw["ret"] == 0: + ret = {} + try: + data = json.loads(data_raw["message"]) + except json.decoder.JSONDecodeError as e: + logging.warning(f"get io data failed, {e}") + return None + + for disk in data: + disk_data = data[disk] + disk_ret = IOData() + for k, v in disk_data.items(): + try: + getattr(disk_ret, k) + setattr(disk_ret, k, _get_io_stage_data(v)) + except AttributeError: + logging.debug(f"no attr {k}") + continue + ret[disk] = disk_ret + return ret + logging.warning(f'get io data failed with message: {data_raw["message"]}') + return None diff --git a/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/detector.py b/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/detector.py index d996f8d..1ece8f7 100644 --- a/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/detector.py +++ b/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/detector.py @@ -1,48 +1,57 @@ -# coding: utf-8 -# Copyright (c) 2024 Huawei Technologies Co., Ltd. -# sysSentry is licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. -import logging - -from .io_data import MetricName -from .threshold import Threshold -from .sliding_window import SlidingWindow -from .utils import get_metric_value_from_io_data_dict_by_metric_name - - -class Detector: - _metric_name: MetricName = None - _threshold: Threshold = None - _slidingWindow: SlidingWindow = None - - def __init__(self, metric_name: MetricName, threshold: Threshold, sliding_window: SlidingWindow): - self._metric_name = metric_name - self._threshold = threshold - self._slidingWindow = sliding_window - self._threshold.attach_observer(self._slidingWindow) - - def get_metric_name(self): - return self._metric_name - - def is_slow_io_event(self, io_data_dict_with_disk_name: dict): - logging.debug(f'Enter Detector: {self}') - metric_value = get_metric_value_from_io_data_dict_by_metric_name(io_data_dict_with_disk_name, self._metric_name) - if metric_value > 1e-6: - logging.debug(f'Input metric value: {str(metric_value)}') - self._threshold.push_latest_data_to_queue(metric_value) - detection_result = self._slidingWindow.is_slow_io_event(metric_value) - logging.debug(f'Detection result: {str(detection_result)}') - logging.debug(f'Exit Detector: {self}') - return detection_result - - def __repr__(self): - return (f'disk_name: {self._metric_name.get_disk_name()}, stage_name: {self._metric_name.get_stage_name()},' - f' access_type_name: {self._metric_name.get_io_access_type_name()},' - f' metric_name: {self._metric_name.get_metric_name()}, threshold_type: {self._threshold},' - f' sliding_window_type: {self._slidingWindow}') +# coding: utf-8 +# Copyright (c) 2024 Huawei Technologies Co., Ltd. +# sysSentry is licensed under the Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +# PURPOSE. +# See the Mulan PSL v2 for more details. +import logging + +from .io_data import MetricName +from .threshold import Threshold +from .sliding_window import SlidingWindow +from .utils import get_metric_value_from_io_data_dict_by_metric_name + + +class Detector: + _metric_name: MetricName = None + _threshold: Threshold = None + _slidingWindow: SlidingWindow = None + + def __init__( + self, + metric_name: MetricName, + threshold: Threshold, + sliding_window: SlidingWindow, + ): + self._metric_name = metric_name + self._threshold = threshold + self._slidingWindow = sliding_window + self._threshold.attach_observer(self._slidingWindow) + + def get_metric_name(self): + return self._metric_name + + def is_slow_io_event(self, io_data_dict_with_disk_name: dict): + logging.debug(f"Enter Detector: {self}") + metric_value = get_metric_value_from_io_data_dict_by_metric_name( + io_data_dict_with_disk_name, self._metric_name + ) + if metric_value > 1e-6: + logging.debug(f"Input metric value: {str(metric_value)}") + self._threshold.push_latest_data_to_queue(metric_value) + detection_result = self._slidingWindow.is_slow_io_event(metric_value) + logging.debug(f"Detection result: {str(detection_result)}") + logging.debug(f"Exit Detector: {self}") + return detection_result + + def __repr__(self): + return ( + f"disk_name: {self._metric_name.get_disk_name()}, stage_name: {self._metric_name.get_stage_name()}," + f" access_type_name: {self._metric_name.get_io_access_type_name()}," + f" metric_name: {self._metric_name.get_metric_name()}, threshold_type: {self._threshold}," + f" sliding_window_type: {self._slidingWindow}" + ) diff --git a/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/io_data.py b/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/io_data.py index b177751..07a10b3 100644 --- a/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/io_data.py +++ b/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/io_data.py @@ -1,74 +1,82 @@ -# coding: utf-8 -# Copyright (c) 2024 Huawei Technologies Co., Ltd. -# sysSentry is licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. - -from dataclasses import dataclass, field -from datetime import datetime -from typing import Optional - - -@dataclass -class BaseData: - latency: Optional[float] = field(default_factory=lambda: None) - io_dump: Optional[int] = field(default_factory=lambda: None) - io_length: Optional[int] = field(default_factory=lambda: None) - iops: Optional[int] = field(default_factory=lambda: None) - - -@dataclass -class IOStageData: - read: BaseData = field(default_factory=lambda: BaseData()) - write: BaseData = field(default_factory=lambda: BaseData()) - flush: BaseData = field(default_factory=lambda: BaseData()) - discard: BaseData = field(default_factory=lambda: BaseData()) - - -@dataclass -class IOData: - throtl: IOStageData = field(default_factory=lambda: IOStageData()) - wbt: IOStageData = field(default_factory=lambda: IOStageData()) - gettag: IOStageData = field(default_factory=lambda: IOStageData()) - iocost: IOStageData = field(default_factory=lambda: IOStageData()) - plug: IOStageData = field(default_factory=lambda: IOStageData()) - bfq: IOStageData = field(default_factory=lambda: IOStageData()) - hctx: IOStageData = field(default_factory=lambda: IOStageData()) - requeue: IOStageData = field(default_factory=lambda: IOStageData()) - rq_driver: IOStageData = field(default_factory=lambda: IOStageData()) - bio: IOStageData = field(default_factory=lambda: IOStageData()) - time_stamp: float = field(default_factory=lambda: datetime.now().timestamp()) - - -class MetricName: - _disk_name: str = None - _stage_name: str = None - _io_access_type_name: str = None - _metric_name: str = None - - def __init__(self, disk_name: str, stage_name: str, io_access_type_name: str, metric_name: str): - self._disk_name = disk_name - self._stage_name = stage_name - self._io_access_type_name = io_access_type_name - self._metric_name = metric_name - - def get_disk_name(self): - return self._disk_name - - def get_stage_name(self): - return self._stage_name - - def get_io_access_type_name(self): - return self._io_access_type_name - - def get_metric_name(self): - return self._metric_name - - def __repr__(self): - return (f'disk: {self._disk_name}, stage: {self._stage_name}, io_access_type: {self._io_access_type_name},' - f'metric: {self._metric_name}') +# coding: utf-8 +# Copyright (c) 2024 Huawei Technologies Co., Ltd. +# sysSentry is licensed under the Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +# PURPOSE. +# See the Mulan PSL v2 for more details. + +from dataclasses import dataclass, field +from datetime import datetime +from typing import Optional + + +@dataclass +class BaseData: + latency: Optional[float] = field(default_factory=lambda: None) + io_dump: Optional[int] = field(default_factory=lambda: None) + io_length: Optional[int] = field(default_factory=lambda: None) + iops: Optional[int] = field(default_factory=lambda: None) + + +@dataclass +class IOStageData: + read: BaseData = field(default_factory=lambda: BaseData()) + write: BaseData = field(default_factory=lambda: BaseData()) + flush: BaseData = field(default_factory=lambda: BaseData()) + discard: BaseData = field(default_factory=lambda: BaseData()) + + +@dataclass +class IOData: + throtl: IOStageData = field(default_factory=lambda: IOStageData()) + wbt: IOStageData = field(default_factory=lambda: IOStageData()) + gettag: IOStageData = field(default_factory=lambda: IOStageData()) + iocost: IOStageData = field(default_factory=lambda: IOStageData()) + plug: IOStageData = field(default_factory=lambda: IOStageData()) + bfq: IOStageData = field(default_factory=lambda: IOStageData()) + hctx: IOStageData = field(default_factory=lambda: IOStageData()) + requeue: IOStageData = field(default_factory=lambda: IOStageData()) + rq_driver: IOStageData = field(default_factory=lambda: IOStageData()) + bio: IOStageData = field(default_factory=lambda: IOStageData()) + time_stamp: float = field(default_factory=lambda: datetime.now().timestamp()) + + +class MetricName: + _disk_name: str = None + _stage_name: str = None + _io_access_type_name: str = None + _metric_name: str = None + + def __init__( + self, + disk_name: str, + stage_name: str, + io_access_type_name: str, + metric_name: str, + ): + self._disk_name = disk_name + self._stage_name = stage_name + self._io_access_type_name = io_access_type_name + self._metric_name = metric_name + + def get_disk_name(self): + return self._disk_name + + def get_stage_name(self): + return self._stage_name + + def get_io_access_type_name(self): + return self._io_access_type_name + + def get_metric_name(self): + return self._metric_name + + def __repr__(self): + return ( + f"disk: {self._disk_name}, stage: {self._stage_name}, io_access_type: {self._io_access_type_name}," + f"metric: {self._metric_name}" + ) diff --git a/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/sliding_window.py b/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/sliding_window.py index b537772..047aac5 100644 --- a/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/sliding_window.py +++ b/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/sliding_window.py @@ -1,113 +1,117 @@ -# coding: utf-8 -# Copyright (c) 2024 Huawei Technologies Co., Ltd. -# sysSentry is licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. - -from enum import Enum, unique -import numpy as np - - -@unique -class SlidingWindowType(Enum): - NotContinuousSlidingWindow = 0 - ContinuousSlidingWindow = 1 - MedianSlidingWindow = 2 - - -class SlidingWindow: - _ai_threshold = None - _queue_length = None - _queue_threshold = None - _io_data_queue: list = None - _io_data_queue_abnormal_tag: list = None - - def __init__(self, queue_length: int, threshold: int): - self._queue_length = queue_length - self._queue_threshold = threshold - self._io_data_queue = [] - self._io_data_queue_abnormal_tag = [] - - def push(self, data: float): - if len(self._io_data_queue) == self._queue_length: - self._io_data_queue.pop(0) - self._io_data_queue_abnormal_tag.pop(0) - self._io_data_queue.append(data) - self._io_data_queue_abnormal_tag.append(data >= self._ai_threshold if self._ai_threshold is not None else False) - - def update(self, threshold): - if self._ai_threshold == threshold: - return - self._ai_threshold = threshold - self._io_data_queue_abnormal_tag.clear() - for data in self._io_data_queue: - self._io_data_queue_abnormal_tag.append(data >= self._ai_threshold) - - def is_slow_io_event(self, data): - return False, None, None - - def __repr__(self): - return "SlidingWindow" - - -class NotContinuousSlidingWindow(SlidingWindow): - def is_slow_io_event(self, data): - super().push(data) - if len(self._io_data_queue) < self._queue_length or self._ai_threshold is None: - return False, self._io_data_queue, self._ai_threshold - if self._io_data_queue_abnormal_tag.count(True) >= self._queue_threshold: - return True, self._io_data_queue, self._ai_threshold - return False, self._io_data_queue, self._ai_threshold - - def __repr__(self): - return "NotContinuousSlidingWindow" - - -class ContinuousSlidingWindow(SlidingWindow): - def is_slow_io_event(self, data): - super().push(data) - if len(self._io_data_queue) < self._queue_length or self._ai_threshold is None: - return False, self._io_data_queue, self._ai_threshold - consecutive_count = 0 - for tag in self._io_data_queue_abnormal_tag: - if tag: - consecutive_count += 1 - if consecutive_count >= self._queue_threshold: - return True, self._io_data_queue, self._ai_threshold - else: - consecutive_count = 0 - return False, self._io_data_queue, self._ai_threshold - - def __repr__(self): - return "ContinuousSlidingWindow" - - -class MedianSlidingWindow(SlidingWindow): - def is_slow_io_event(self, data): - super().push(data) - if len(self._io_data_queue) < self._queue_length or self._ai_threshold is None: - return False, self._io_data_queue, self._ai_threshold - median = np.median(self._io_data_queue) - if median >= self._ai_threshold: - return True, self._io_data_queue, self._ai_threshold - return False, self._io_data_queue, self._ai_threshold - - def __repr__(self): - return "MedianSlidingWindow" - - -class SlidingWindowFactory: - def get_sliding_window(self, sliding_window_type: SlidingWindowType, *args, **kwargs): - if sliding_window_type == SlidingWindowType.NotContinuousSlidingWindow: - return NotContinuousSlidingWindow(*args, **kwargs) - elif sliding_window_type == SlidingWindowType.ContinuousSlidingWindow: - return ContinuousSlidingWindow(*args, **kwargs) - elif sliding_window_type == SlidingWindowType.MedianSlidingWindow: - return MedianSlidingWindow(*args, **kwargs) - else: - return NotContinuousSlidingWindow(*args, **kwargs) +# coding: utf-8 +# Copyright (c) 2024 Huawei Technologies Co., Ltd. +# sysSentry is licensed under the Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +# PURPOSE. +# See the Mulan PSL v2 for more details. + +from enum import Enum, unique +import numpy as np + + +@unique +class SlidingWindowType(Enum): + NotContinuousSlidingWindow = 0 + ContinuousSlidingWindow = 1 + MedianSlidingWindow = 2 + + +class SlidingWindow: + _ai_threshold = None + _queue_length = None + _queue_threshold = None + _io_data_queue: list = None + _io_data_queue_abnormal_tag: list = None + + def __init__(self, queue_length: int, threshold: int): + self._queue_length = queue_length + self._queue_threshold = threshold + self._io_data_queue = [] + self._io_data_queue_abnormal_tag = [] + + def push(self, data: float): + if len(self._io_data_queue) == self._queue_length: + self._io_data_queue.pop(0) + self._io_data_queue_abnormal_tag.pop(0) + self._io_data_queue.append(data) + self._io_data_queue_abnormal_tag.append( + data >= self._ai_threshold if self._ai_threshold is not None else False + ) + + def update(self, threshold): + if self._ai_threshold == threshold: + return + self._ai_threshold = threshold + self._io_data_queue_abnormal_tag.clear() + for data in self._io_data_queue: + self._io_data_queue_abnormal_tag.append(data >= self._ai_threshold) + + def is_slow_io_event(self, data): + return False, None, None + + def __repr__(self): + return "SlidingWindow" + + +class NotContinuousSlidingWindow(SlidingWindow): + def is_slow_io_event(self, data): + super().push(data) + if len(self._io_data_queue) < self._queue_length or self._ai_threshold is None: + return False, self._io_data_queue, self._ai_threshold + if self._io_data_queue_abnormal_tag.count(True) >= self._queue_threshold: + return True, self._io_data_queue, self._ai_threshold + return False, self._io_data_queue, self._ai_threshold + + def __repr__(self): + return "NotContinuousSlidingWindow" + + +class ContinuousSlidingWindow(SlidingWindow): + def is_slow_io_event(self, data): + super().push(data) + if len(self._io_data_queue) < self._queue_length or self._ai_threshold is None: + return False, self._io_data_queue, self._ai_threshold + consecutive_count = 0 + for tag in self._io_data_queue_abnormal_tag: + if tag: + consecutive_count += 1 + if consecutive_count >= self._queue_threshold: + return True, self._io_data_queue, self._ai_threshold + else: + consecutive_count = 0 + return False, self._io_data_queue, self._ai_threshold + + def __repr__(self): + return "ContinuousSlidingWindow" + + +class MedianSlidingWindow(SlidingWindow): + def is_slow_io_event(self, data): + super().push(data) + if len(self._io_data_queue) < self._queue_length or self._ai_threshold is None: + return False, self._io_data_queue, self._ai_threshold + median = np.median(self._io_data_queue) + if median >= self._ai_threshold: + return True, self._io_data_queue, self._ai_threshold + return False, self._io_data_queue, self._ai_threshold + + def __repr__(self): + return "MedianSlidingWindow" + + +class SlidingWindowFactory: + def get_sliding_window( + self, sliding_window_type: SlidingWindowType, *args, **kwargs + ): + if sliding_window_type == SlidingWindowType.NotContinuousSlidingWindow: + return NotContinuousSlidingWindow(*args, **kwargs) + elif sliding_window_type == SlidingWindowType.ContinuousSlidingWindow: + return ContinuousSlidingWindow(*args, **kwargs) + elif sliding_window_type == SlidingWindowType.MedianSlidingWindow: + return MedianSlidingWindow(*args, **kwargs) + else: + return NotContinuousSlidingWindow(*args, **kwargs) diff --git a/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/slow_io_detection.py b/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/slow_io_detection.py index 72d6dd2..1ba669c 100644 --- a/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/slow_io_detection.py +++ b/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/slow_io_detection.py @@ -1,133 +1,156 @@ -# coding: utf-8 -# Copyright (c) 2024 Huawei Technologies Co., Ltd. -# sysSentry is licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. - -import time -import signal -import logging - -from .detector import Detector -from .threshold import ThresholdFactory, AbsoluteThreshold -from .sliding_window import SlidingWindowFactory -from .utils import (get_threshold_type_enum, get_sliding_window_type_enum, get_data_queue_size_and_update_size, - get_log_level) -from .config_parser import ConfigParser -from .data_access import get_io_data_from_collect_plug, check_collect_valid -from .io_data import MetricName -from .alarm_report import AlarmReport - -CONFIG_FILE = "/etc/sysSentry/plugins/ai_threshold_slow_io_detection.ini" - - -def sig_handler(signum, frame): - logging.info("receive signal: %d", signum) - AlarmReport().report_fail(f"receive signal: {signum}") - exit(signum) - - -class SlowIODetection: - _config_parser = None - _disk_list = None - _detector_name_list = [] - _detectors = {} - - def __init__(self, config_parser: ConfigParser): - self._config_parser = config_parser - self.__set_log_format() - self.__init_detector_name_list() - self.__init_detector() - - def __set_log_format(self): - log_format = "%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s" - log_level = get_log_level(self._config_parser.get_log_level()) - logging.basicConfig(level=log_level, format=log_format) - - def __init_detector_name_list(self): - self._disk_list = check_collect_valid(self._config_parser.get_slow_io_detect_frequency()) - for disk in self._disk_list: - self._detector_name_list.append(MetricName(disk, "bio", "read", "latency")) - self._detector_name_list.append(MetricName(disk, "bio", "write", "latency")) - - def __init_detector(self): - train_data_duration, train_update_duration = (self._config_parser. - get_train_data_duration_and_train_update_duration()) - slow_io_detection_frequency = self._config_parser.get_slow_io_detect_frequency() - threshold_type = get_threshold_type_enum(self._config_parser.get_algorithm_type()) - data_queue_size, update_size = get_data_queue_size_and_update_size(train_data_duration, - train_update_duration, - slow_io_detection_frequency) - sliding_window_type = get_sliding_window_type_enum(self._config_parser.get_sliding_window_type()) - window_size, window_threshold = self._config_parser.get_window_size_and_window_minimum_threshold() - - for detector_name in self._detector_name_list: - threshold = ThresholdFactory().get_threshold(threshold_type, data_queue_size=data_queue_size, - data_queue_update_size=update_size) - sliding_window = SlidingWindowFactory().get_sliding_window(sliding_window_type, queue_length=window_size, - threshold=window_threshold) - detector = Detector(detector_name, threshold, sliding_window) - # 绝对阈值的阈值初始化 - if isinstance(threshold, AbsoluteThreshold): - threshold.set_threshold(self._config_parser.get_absolute_threshold()) - self._detectors[detector_name] = detector - logging.info(f"add detector: {detector}") - - def launch(self): - while True: - logging.debug('step0. AI threshold slow io event detection is looping.') - - # Step1:获取IO数据 - io_data_dict_with_disk_name = get_io_data_from_collect_plug( - self._config_parser.get_slow_io_detect_frequency(), self._disk_list - ) - logging.debug(f'step1. Get io data: {str(io_data_dict_with_disk_name)}') - if io_data_dict_with_disk_name is None: - continue - # Step2:慢IO检测 - logging.debug('step2. Start to detection slow io event.') - slow_io_event_list = [] - for metric_name, detector in self._detectors.items(): - result = detector.is_slow_io_event(io_data_dict_with_disk_name) - if result[0]: - slow_io_event_list.append((detector.get_metric_name(), result)) - logging.debug('step2. End to detection slow io event.') - - # Step3:慢IO事件上报 - logging.debug('step3. Report slow io event to sysSentry.') - for slow_io_event in slow_io_event_list: - metric_name: MetricName = slow_io_event[0] - result = slow_io_event[1] - AlarmReport.report_major_alm(f"disk {metric_name.get_disk_name()} has slow io event." - f"stage: {metric_name.get_metric_name()}," - f"type: {metric_name.get_io_access_type_name()}," - f"metric: {metric_name.get_metric_name()}," - f"current window: {result[1]}," - f"threshold: {result[2]}") - logging.error(f"slow io event happen: {str(slow_io_event)}") - - # Step4:等待检测时间 - logging.debug('step4. Wait to start next slow io event detection loop.') - time.sleep(self._config_parser.get_slow_io_detect_frequency()) - - -def main(): - # Step1:注册消息处理函数 - signal.signal(signal.SIGINT, sig_handler) - signal.signal(signal.SIGTERM, sig_handler) - # Step2:断点恢复 - # todo: - - # Step3:读取配置 - config_file_name = CONFIG_FILE - config = ConfigParser(config_file_name) - config.read_config_from_file() - - # Step4:启动慢IO检测 - slow_io_detection = SlowIODetection(config) - slow_io_detection.launch() +# coding: utf-8 +# Copyright (c) 2024 Huawei Technologies Co., Ltd. +# sysSentry is licensed under the Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +# PURPOSE. +# See the Mulan PSL v2 for more details. + +import time +import signal +import logging + +from .detector import Detector +from .threshold import ThresholdFactory, AbsoluteThreshold +from .sliding_window import SlidingWindowFactory +from .utils import ( + get_threshold_type_enum, + get_sliding_window_type_enum, + get_data_queue_size_and_update_size, + get_log_level, +) +from .config_parser import ConfigParser +from .data_access import get_io_data_from_collect_plug, check_collect_valid +from .io_data import MetricName +from .alarm_report import AlarmReport + +CONFIG_FILE = "/etc/sysSentry/plugins/ai_threshold_slow_io_detection.ini" + + +def sig_handler(signum, frame): + logging.info("receive signal: %d", signum) + AlarmReport().report_fail(f"receive signal: {signum}") + exit(signum) + + +class SlowIODetection: + _config_parser = None + _disk_list = None + _detector_name_list = [] + _detectors = {} + + def __init__(self, config_parser: ConfigParser): + self._config_parser = config_parser + self.__set_log_format() + self.__init_detector_name_list() + self.__init_detector() + + def __set_log_format(self): + log_format = ( + "%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s" + ) + log_level = get_log_level(self._config_parser.get_log_level()) + logging.basicConfig(level=log_level, format=log_format) + + def __init_detector_name_list(self): + self._disk_list = check_collect_valid( + self._config_parser.get_slow_io_detect_frequency() + ) + for disk in self._disk_list: + self._detector_name_list.append(MetricName(disk, "bio", "read", "latency")) + self._detector_name_list.append(MetricName(disk, "bio", "write", "latency")) + + def __init_detector(self): + train_data_duration, train_update_duration = ( + self._config_parser.get_train_data_duration_and_train_update_duration() + ) + slow_io_detection_frequency = self._config_parser.get_slow_io_detect_frequency() + threshold_type = get_threshold_type_enum( + self._config_parser.get_algorithm_type() + ) + data_queue_size, update_size = get_data_queue_size_and_update_size( + train_data_duration, train_update_duration, slow_io_detection_frequency + ) + sliding_window_type = get_sliding_window_type_enum( + self._config_parser.get_sliding_window_type() + ) + window_size, window_threshold = ( + self._config_parser.get_window_size_and_window_minimum_threshold() + ) + + for detector_name in self._detector_name_list: + threshold = ThresholdFactory().get_threshold( + threshold_type, + data_queue_size=data_queue_size, + data_queue_update_size=update_size, + ) + sliding_window = SlidingWindowFactory().get_sliding_window( + sliding_window_type, + queue_length=window_size, + threshold=window_threshold, + ) + detector = Detector(detector_name, threshold, sliding_window) + # 绝对阈值的阈值初始化 + if isinstance(threshold, AbsoluteThreshold): + threshold.set_threshold(self._config_parser.get_absolute_threshold()) + self._detectors[detector_name] = detector + logging.info(f"add detector: {detector}") + + def launch(self): + while True: + logging.debug("step0. AI threshold slow io event detection is looping.") + + # Step1:获取IO数据 + io_data_dict_with_disk_name = get_io_data_from_collect_plug( + self._config_parser.get_slow_io_detect_frequency(), self._disk_list + ) + logging.debug(f"step1. Get io data: {str(io_data_dict_with_disk_name)}") + if io_data_dict_with_disk_name is None: + continue + # Step2:慢IO检测 + logging.debug("step2. Start to detection slow io event.") + slow_io_event_list = [] + for metric_name, detector in self._detectors.items(): + result = detector.is_slow_io_event(io_data_dict_with_disk_name) + if result[0]: + slow_io_event_list.append((detector.get_metric_name(), result)) + logging.debug("step2. End to detection slow io event.") + + # Step3:慢IO事件上报 + logging.debug("step3. Report slow io event to sysSentry.") + for slow_io_event in slow_io_event_list: + metric_name: MetricName = slow_io_event[0] + result = slow_io_event[1] + AlarmReport.report_major_alm( + f"disk {metric_name.get_disk_name()} has slow io event." + f"stage: {metric_name.get_metric_name()}," + f"type: {metric_name.get_io_access_type_name()}," + f"metric: {metric_name.get_metric_name()}," + f"current window: {result[1]}," + f"threshold: {result[2]}" + ) + logging.error(f"slow io event happen: {str(slow_io_event)}") + + # Step4:等待检测时间 + logging.debug("step4. Wait to start next slow io event detection loop.") + time.sleep(self._config_parser.get_slow_io_detect_frequency()) + + +def main(): + # Step1:注册消息处理函数 + signal.signal(signal.SIGINT, sig_handler) + signal.signal(signal.SIGTERM, sig_handler) + # Step2:断点恢复 + # todo: + + # Step3:读取配置 + config_file_name = CONFIG_FILE + config = ConfigParser(config_file_name) + config.read_config_from_file() + + # Step4:启动慢IO检测 + slow_io_detection = SlowIODetection(config) + slow_io_detection.launch() diff --git a/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/threshold.py b/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/threshold.py index ca8cd8f..0f09233 100644 --- a/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/threshold.py +++ b/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/threshold.py @@ -1,160 +1,181 @@ -# coding: utf-8 -# Copyright (c) 2024 Huawei Technologies Co., Ltd. -# sysSentry is licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. -import logging -from enum import Enum -import queue -import numpy as np -import math - -from .sliding_window import SlidingWindow - - -class ThresholdState(Enum): - INIT = 0 - START = 1 - - -class Threshold: - threshold = None - data_queue: queue.Queue = None - data_queue_update_size: int = None - new_data_size: int = None - threshold_state: ThresholdState = None - - def __init__(self, data_queue_size: int = 10000, data_queue_update_size: int = 1000): - self._observer = None - self.data_queue = queue.Queue(data_queue_size) - self.data_queue_update_size = data_queue_update_size - self.new_data_size = 0 - self.threshold_state = ThresholdState.INIT - self.threshold = math.inf - - def set_threshold(self, threshold): - self.threshold = threshold - self.threshold_state = ThresholdState.START - self.notify_observer() - - def get_threshold(self): - if self.threshold_state == ThresholdState.INIT: - return None - return self.threshold - - def is_abnormal(self, data): - if self.threshold_state == ThresholdState.INIT: - return False - return data >= self.threshold - - # 使用观察者模式,当阈值更新时,自动同步刷新滑窗中的阈值 - def attach_observer(self, observer: SlidingWindow): - self._observer = observer - - def notify_observer(self): - if self._observer is not None: - self._observer.update(self.threshold) - - def push_latest_data_to_queue(self, data): - pass - - def __repr__(self): - return "Threshold" - - -class AbsoluteThreshold(Threshold): - def __init__(self, data_queue_size: int = 10000, data_queue_update_size: int = 1000): - super().__init__(data_queue_size, data_queue_update_size) - - def push_latest_data_to_queue(self, data): - pass - - def __repr__(self): - return "AbsoluteThreshold" - - -class BoxplotThreshold(Threshold): - def __init__(self, parameter: float = 1.5, data_queue_size: int = 10000, data_queue_update_size: int = 1000): - super().__init__(data_queue_size, data_queue_update_size) - self.parameter = parameter - - def _update_threshold(self): - data = list(self.data_queue.queue) - q1 = np.percentile(data, 25) - q3 = np.percentile(data, 75) - iqr = q3 - q1 - self.threshold = q3 + self.parameter * iqr - if self.threshold_state == ThresholdState.INIT: - self.threshold_state = ThresholdState.START - self.notify_observer() - - def push_latest_data_to_queue(self, data): - try: - self.data_queue.put(data, block=False) - except queue.Full: - self.data_queue.get() - self.data_queue.put(data) - self.new_data_size += 1 - if (self.data_queue.full() and (self.threshold_state == ThresholdState.INIT or - (self.threshold_state == ThresholdState.START and - self.new_data_size >= self.data_queue_update_size))): - self._update_threshold() - self.new_data_size = 0 - - def __repr__(self): - return "BoxplotThreshold" - - -class NSigmaThreshold(Threshold): - def __init__(self, parameter: float = 2.0, data_queue_size: int = 10000, data_queue_update_size: int = 1000): - super().__init__(data_queue_size, data_queue_update_size) - self.parameter = parameter - - def _update_threshold(self): - data = list(self.data_queue.queue) - mean = np.mean(data) - std = np.std(data) - self.threshold = mean + self.parameter * std - if self.threshold_state == ThresholdState.INIT: - self.threshold_state = ThresholdState.START - self.notify_observer() - - def push_latest_data_to_queue(self, data): - try: - self.data_queue.put(data, block=False) - except queue.Full: - self.data_queue.get() - self.data_queue.put(data) - self.new_data_size += 1 - if (self.data_queue.full() and (self.threshold_state == ThresholdState.INIT or - (self.threshold_state == ThresholdState.START and - self.new_data_size >= self.data_queue_update_size))): - self._update_threshold() - self.new_data_size = 0 - - def __repr__(self): - return "NSigmaThreshold" - - -class ThresholdType(Enum): - AbsoluteThreshold = 0 - BoxplotThreshold = 1 - NSigmaThreshold = 2 - - -class ThresholdFactory: - def get_threshold(self, threshold_type: ThresholdType, *args, **kwargs): - if threshold_type == ThresholdType.AbsoluteThreshold: - return AbsoluteThreshold(*args, **kwargs) - elif threshold_type == ThresholdType.BoxplotThreshold: - return BoxplotThreshold(*args, **kwargs) - elif threshold_type == ThresholdType.NSigmaThreshold: - return NSigmaThreshold(*args, **kwargs) - else: - raise ValueError(f"Invalid threshold type: {threshold_type}") - +# coding: utf-8 +# Copyright (c) 2024 Huawei Technologies Co., Ltd. +# sysSentry is licensed under the Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +# PURPOSE. +# See the Mulan PSL v2 for more details. +import logging +from enum import Enum +import queue +import numpy as np +import math + +from .sliding_window import SlidingWindow + + +class ThresholdState(Enum): + INIT = 0 + START = 1 + + +class Threshold: + threshold = None + data_queue: queue.Queue = None + data_queue_update_size: int = None + new_data_size: int = None + threshold_state: ThresholdState = None + + def __init__( + self, data_queue_size: int = 10000, data_queue_update_size: int = 1000 + ): + self._observer = None + self.data_queue = queue.Queue(data_queue_size) + self.data_queue_update_size = data_queue_update_size + self.new_data_size = 0 + self.threshold_state = ThresholdState.INIT + self.threshold = math.inf + + def set_threshold(self, threshold): + self.threshold = threshold + self.threshold_state = ThresholdState.START + self.notify_observer() + + def get_threshold(self): + if self.threshold_state == ThresholdState.INIT: + return None + return self.threshold + + def is_abnormal(self, data): + if self.threshold_state == ThresholdState.INIT: + return False + return data >= self.threshold + + # 使用观察者模式,当阈值更新时,自动同步刷新滑窗中的阈值 + def attach_observer(self, observer: SlidingWindow): + self._observer = observer + + def notify_observer(self): + if self._observer is not None: + self._observer.update(self.threshold) + + def push_latest_data_to_queue(self, data): + pass + + def __repr__(self): + return "Threshold" + + +class AbsoluteThreshold(Threshold): + def __init__( + self, data_queue_size: int = 10000, data_queue_update_size: int = 1000 + ): + super().__init__(data_queue_size, data_queue_update_size) + + def push_latest_data_to_queue(self, data): + pass + + def __repr__(self): + return "AbsoluteThreshold" + + +class BoxplotThreshold(Threshold): + def __init__( + self, + parameter: float = 1.5, + data_queue_size: int = 10000, + data_queue_update_size: int = 1000, + ): + super().__init__(data_queue_size, data_queue_update_size) + self.parameter = parameter + + def _update_threshold(self): + data = list(self.data_queue.queue) + q1 = np.percentile(data, 25) + q3 = np.percentile(data, 75) + iqr = q3 - q1 + self.threshold = q3 + self.parameter * iqr + if self.threshold_state == ThresholdState.INIT: + self.threshold_state = ThresholdState.START + self.notify_observer() + + def push_latest_data_to_queue(self, data): + try: + self.data_queue.put(data, block=False) + except queue.Full: + self.data_queue.get() + self.data_queue.put(data) + self.new_data_size += 1 + if self.data_queue.full() and ( + self.threshold_state == ThresholdState.INIT + or ( + self.threshold_state == ThresholdState.START + and self.new_data_size >= self.data_queue_update_size + ) + ): + self._update_threshold() + self.new_data_size = 0 + + def __repr__(self): + return "BoxplotThreshold" + + +class NSigmaThreshold(Threshold): + def __init__( + self, + parameter: float = 2.0, + data_queue_size: int = 10000, + data_queue_update_size: int = 1000, + ): + super().__init__(data_queue_size, data_queue_update_size) + self.parameter = parameter + + def _update_threshold(self): + data = list(self.data_queue.queue) + mean = np.mean(data) + std = np.std(data) + self.threshold = mean + self.parameter * std + if self.threshold_state == ThresholdState.INIT: + self.threshold_state = ThresholdState.START + self.notify_observer() + + def push_latest_data_to_queue(self, data): + try: + self.data_queue.put(data, block=False) + except queue.Full: + self.data_queue.get() + self.data_queue.put(data) + self.new_data_size += 1 + if self.data_queue.full() and ( + self.threshold_state == ThresholdState.INIT + or ( + self.threshold_state == ThresholdState.START + and self.new_data_size >= self.data_queue_update_size + ) + ): + self._update_threshold() + self.new_data_size = 0 + + def __repr__(self): + return "NSigmaThreshold" + + +class ThresholdType(Enum): + AbsoluteThreshold = 0 + BoxplotThreshold = 1 + NSigmaThreshold = 2 + + +class ThresholdFactory: + def get_threshold(self, threshold_type: ThresholdType, *args, **kwargs): + if threshold_type == ThresholdType.AbsoluteThreshold: + return AbsoluteThreshold(*args, **kwargs) + elif threshold_type == ThresholdType.BoxplotThreshold: + return BoxplotThreshold(*args, **kwargs) + elif threshold_type == ThresholdType.NSigmaThreshold: + return NSigmaThreshold(*args, **kwargs) + else: + raise ValueError(f"Invalid threshold type: {threshold_type}") diff --git a/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/utils.py b/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/utils.py index 937989d..b5df147 100644 --- a/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/utils.py +++ b/sysSentry-1.0.2/src/python/sentryPlugins/ai_threshold_slow_io_detection/utils.py @@ -1,67 +1,73 @@ -# coding: utf-8 -# Copyright (c) 2024 Huawei Technologies Co., Ltd. -# sysSentry is licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. -import logging -from dataclasses import asdict - -from .threshold import ThresholdType -from .sliding_window import SlidingWindowType -from .io_data import MetricName, IOData - -def get_threshold_type_enum(algorithm_type: str): - if algorithm_type.lower() == 'absolute': - return ThresholdType.AbsoluteThreshold - if algorithm_type.lower() == 'boxplot': - return ThresholdType.BoxplotThreshold - if algorithm_type.lower() == 'n_sigma': - return ThresholdType.NSigmaThreshold - logging.info('not found correct algorithm type, use default: boxplot.') - return ThresholdType.BoxplotThreshold - - -def get_sliding_window_type_enum(sliding_window_type: str): - if sliding_window_type.lower() == 'not_continuous': - return SlidingWindowType.NotContinuousSlidingWindow - if sliding_window_type.lower() == 'continuous': - return SlidingWindowType.ContinuousSlidingWindow - if sliding_window_type.lower() == 'median': - return SlidingWindowType.MedianSlidingWindow - logging.info('not found correct sliding window type, use default: not_continuous.') - return SlidingWindowType.NotContinuousSlidingWindow - - -def get_metric_value_from_io_data_dict_by_metric_name(io_data_dict: dict, metric_name: MetricName): - try: - io_data: IOData = io_data_dict[metric_name.get_disk_name()] - io_stage_data = asdict(io_data)[metric_name.get_stage_name()] - base_data = io_stage_data[metric_name.get_io_access_type_name()] - metric_value = base_data[metric_name.get_metric_name()] - return metric_value - except KeyError: - return None - - -def get_data_queue_size_and_update_size(training_data_duration: float, train_update_duration: float, - slow_io_detect_frequency: int): - data_queue_size = int(training_data_duration * 60 * 60 / slow_io_detect_frequency) - update_size = int(train_update_duration * 60 * 60 / slow_io_detect_frequency) - return data_queue_size, update_size - - -def get_log_level(log_level: str): - if log_level.lower() == 'debug': - return logging.DEBUG - elif log_level.lower() == 'info': - return logging.INFO - elif log_level.lower() == 'warning': - return logging.WARNING - elif log_level.lower() == 'fatal': - return logging.FATAL - return None +# coding: utf-8 +# Copyright (c) 2024 Huawei Technologies Co., Ltd. +# sysSentry is licensed under the Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +# PURPOSE. +# See the Mulan PSL v2 for more details. +import logging +from dataclasses import asdict + +from .threshold import ThresholdType +from .sliding_window import SlidingWindowType +from .io_data import MetricName, IOData + + +def get_threshold_type_enum(algorithm_type: str): + if algorithm_type.lower() == "absolute": + return ThresholdType.AbsoluteThreshold + if algorithm_type.lower() == "boxplot": + return ThresholdType.BoxplotThreshold + if algorithm_type.lower() == "n_sigma": + return ThresholdType.NSigmaThreshold + logging.info("not found correct algorithm type, use default: boxplot.") + return ThresholdType.BoxplotThreshold + + +def get_sliding_window_type_enum(sliding_window_type: str): + if sliding_window_type.lower() == "not_continuous": + return SlidingWindowType.NotContinuousSlidingWindow + if sliding_window_type.lower() == "continuous": + return SlidingWindowType.ContinuousSlidingWindow + if sliding_window_type.lower() == "median": + return SlidingWindowType.MedianSlidingWindow + logging.info("not found correct sliding window type, use default: not_continuous.") + return SlidingWindowType.NotContinuousSlidingWindow + + +def get_metric_value_from_io_data_dict_by_metric_name( + io_data_dict: dict, metric_name: MetricName +): + try: + io_data: IOData = io_data_dict[metric_name.get_disk_name()] + io_stage_data = asdict(io_data)[metric_name.get_stage_name()] + base_data = io_stage_data[metric_name.get_io_access_type_name()] + metric_value = base_data[metric_name.get_metric_name()] + return metric_value + except KeyError: + return None + + +def get_data_queue_size_and_update_size( + training_data_duration: float, + train_update_duration: float, + slow_io_detect_frequency: int, +): + data_queue_size = int(training_data_duration * 60 * 60 / slow_io_detect_frequency) + update_size = int(train_update_duration * 60 * 60 / slow_io_detect_frequency) + return data_queue_size, update_size + + +def get_log_level(log_level: str): + if log_level.lower() == "debug": + return logging.DEBUG + elif log_level.lower() == "info": + return logging.INFO + elif log_level.lower() == "warning": + return logging.WARNING + elif log_level.lower() == "fatal": + return logging.FATAL + return None -- Gitee