diff --git a/ai_block_io-adapt-alarm-module.patch b/ai_block_io-adapt-alarm-module.patch new file mode 100644 index 0000000000000000000000000000000000000000..f24974b0997f616c98c5734b2cb17f118474379b --- /dev/null +++ b/ai_block_io-adapt-alarm-module.patch @@ -0,0 +1,221 @@ +From 367f8ab8a5ad26d80caf1bc4529c79d279ef0fb1 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E8=B4=BA=E6=9C=89=E5=BF=97?= <1037617413@qq.com> +Date: Thu, 10 Oct 2024 17:21:48 +0800 +Subject: [PATCH] ai_block_io adapt alarm module + +--- + config/tasks/ai_block_io.mod | 4 +- + .../sentryPlugins/ai_block_io/ai_block_io.py | 28 +++++--- + .../sentryPlugins/ai_block_io/alarm_report.py | 65 ++++++++++++++----- + .../sentryPlugins/ai_block_io/data_access.py | 5 +- + .../sentryPlugins/ai_block_io/detector.py | 2 +- + 5 files changed, 73 insertions(+), 31 deletions(-) + +diff --git a/config/tasks/ai_block_io.mod b/config/tasks/ai_block_io.mod +index 1971d7d..82f4f0b 100644 +--- a/config/tasks/ai_block_io.mod ++++ b/config/tasks/ai_block_io.mod +@@ -2,4 +2,6 @@ + enabled=yes + task_start=/usr/bin/python3 /usr/bin/ai_block_io + task_stop=pkill -f /usr/bin/ai_block_io +-type=oneshot +\ No newline at end of file ++type=oneshot ++alarm_id=1002 ++alarm_clear_time=5 +\ No newline at end of file +diff --git a/src/python/sentryPlugins/ai_block_io/ai_block_io.py b/src/python/sentryPlugins/ai_block_io/ai_block_io.py +index 3b00ef3..77104a9 100644 +--- a/src/python/sentryPlugins/ai_block_io/ai_block_io.py ++++ b/src/python/sentryPlugins/ai_block_io/ai_block_io.py +@@ -20,14 +20,14 @@ from .utils import get_data_queue_size_and_update_size + from .config_parser import ConfigParser + from .data_access import get_io_data_from_collect_plug, check_collect_valid + from .io_data import MetricName +-from .alarm_report import AlarmReport ++from .alarm_report import Xalarm, Report + + CONFIG_FILE = "/etc/sysSentry/plugins/ai_block_io.ini" + + + def sig_handler(signum, frame): + logging.info("receive signal: %d", signum) +- AlarmReport().report_fail(f"receive signal: {signum}") ++ Report.report_pass(f"receive signal: {signum}, exiting...") + exit(signum) + + +@@ -44,6 +44,10 @@ class SlowIODetection: + + def __init_detector_name_list(self): + self._disk_list = check_collect_valid(self._config_parser.get_slow_io_detect_frequency()) ++ if self._disk_list is None: ++ Report.report_pass("get available disk error, please check if the collector plug is enable. exiting...") ++ exit(1) ++ + logging.info(f"ai_block_io plug has found disks: {self._disk_list}") + disks_to_detection: list = self._config_parser.get_disks_to_detection() + # 情况1:None,则启用所有磁盘检测 +@@ -101,7 +105,8 @@ class SlowIODetection: + ) + logging.debug(f'step1. Get io data: {str(io_data_dict_with_disk_name)}') + if io_data_dict_with_disk_name is None: +- continue ++ Report.report_pass("get io data error, please check if the collector plug is enable. exitting...") ++ exit(1) + + # Step2:慢IO检测 + logging.debug('step2. Start to detection slow io event.') +@@ -117,13 +122,16 @@ class SlowIODetection: + for slow_io_event in slow_io_event_list: + metric_name: MetricName = slow_io_event[0] + result = slow_io_event[1] +- alarm_content = (f"disk {metric_name.get_disk_name()} has slow io event. " +- f"stage is: {metric_name.get_stage_name()}, " +- f"io access type is: {metric_name.get_io_access_type_name()}, " +- f"metric is: {metric_name.get_metric_name()}, " +- f"current window is: {result[1]}, " +- f"threshold is: {result[2]}") +- AlarmReport.report_major_alm(alarm_content) ++ alarm_content = { ++ "driver_name": f"{metric_name.get_disk_name()}", ++ "reason": "disk_slow", ++ "block_stack": f"{metric_name.get_stage_name()}", ++ "io_type": f"{metric_name.get_io_access_type_name()}", ++ "alarm_source": "ai_block_io", ++ "alarm_type": "latency", ++ "details": f"current window is: {result[1]}, threshold is: {result[2]}.", ++ } ++ Xalarm.major(alarm_content) + logging.warning(alarm_content) + + # Step4:等待检测时间 +diff --git a/src/python/sentryPlugins/ai_block_io/alarm_report.py b/src/python/sentryPlugins/ai_block_io/alarm_report.py +index 230c8cd..92bd6e3 100644 +--- a/src/python/sentryPlugins/ai_block_io/alarm_report.py ++++ b/src/python/sentryPlugins/ai_block_io/alarm_report.py +@@ -9,41 +9,72 @@ + # PURPOSE. + # See the Mulan PSL v2 for more details. + +-from syssentry.result import ResultLevel, report_result + import logging + import json + ++from xalarm.sentry_notify import ( ++ xalarm_report, ++ MINOR_ALM, ++ MAJOR_ALM, ++ CRITICAL_ALM, ++ ALARM_TYPE_OCCUR, ++ ALARM_TYPE_RECOVER, ++) ++ ++from syssentry.result import ResultLevel, report_result ++ + +-class AlarmReport: ++class Report: + TASK_NAME = "ai_block_io" + + @staticmethod + def report_pass(info: str): +- report_result(AlarmReport.TASK_NAME, ResultLevel.PASS, json.dumps({"msg": info})) +- logging.info(f'Report {AlarmReport.TASK_NAME} PASS: {info}') ++ report_result(Report.TASK_NAME, ResultLevel.PASS, json.dumps({"msg": info})) ++ logging.info(f'Report {Report.TASK_NAME} PASS: {info}') + + @staticmethod + def report_fail(info: str): +- report_result(AlarmReport.TASK_NAME, ResultLevel.FAIL, json.dumps({"msg": info})) +- logging.info(f'Report {AlarmReport.TASK_NAME} FAIL: {info}') ++ report_result(Report.TASK_NAME, ResultLevel.FAIL, json.dumps({"msg": info})) ++ logging.info(f'Report {Report.TASK_NAME} FAIL: {info}') + + @staticmethod + def report_skip(info: str): +- report_result(AlarmReport.TASK_NAME, ResultLevel.SKIP, json.dumps({"msg": info})) +- logging.info(f'Report {AlarmReport.TASK_NAME} SKIP: {info}') ++ report_result(Report.TASK_NAME, ResultLevel.SKIP, json.dumps({"msg": info})) ++ logging.info(f'Report {Report.TASK_NAME} SKIP: {info}') ++ ++ ++class Xalarm: ++ ALARM_ID = 1002 + + @staticmethod +- def report_minor_alm(info: str): +- report_result(AlarmReport.TASK_NAME, ResultLevel.MINOR_ALM, json.dumps({"msg": info})) +- logging.info(f'Report {AlarmReport.TASK_NAME} MINOR_ALM: {info}') ++ def minor(info: dict): ++ info_str = json.dumps(info) ++ xalarm_report(Xalarm.ALARM_ID, MINOR_ALM, ALARM_TYPE_OCCUR, info_str) ++ logging.info(f"Report {Xalarm.ALARM_ID} MINOR_ALM: {info_str}") + + @staticmethod +- def report_major_alm(info: str): +- report_result(AlarmReport.TASK_NAME, ResultLevel.MAJOR_ALM, json.dumps({"msg": info})) +- logging.info(f'Report {AlarmReport.TASK_NAME} MAJOR_ALM: {info}') ++ def major(info: dict): ++ info_str = json.dumps(info) ++ xalarm_report(Xalarm.ALARM_ID, MAJOR_ALM, ALARM_TYPE_OCCUR, info_str) ++ logging.info(f"Report {Xalarm.ALARM_ID} MAJOR_ALM: {info_str}") + + @staticmethod +- def report_critical_alm(info: str): +- report_result(AlarmReport.TASK_NAME, ResultLevel.CRITICAL_ALM, json.dumps({"msg": info})) +- logging.info(f'Report {AlarmReport.TASK_NAME} CRITICAL_ALM: {info}') ++ def critical(info: dict): ++ info_str = json.dumps(info) ++ xalarm_report(Xalarm.ALARM_ID, CRITICAL_ALM, ALARM_TYPE_OCCUR, info_str) ++ logging.info(f"Report {Xalarm.ALARM_ID} CRITICAL_ALM: {info_str}") ++ ++ def minor_recover(info: dict): ++ info_str = json.dumps(info) ++ xalarm_report(Xalarm.ALARM_ID, MINOR_ALM, ALARM_TYPE_RECOVER, info_str) ++ logging.info(f"Report {Xalarm.ALARM_ID} MINOR_ALM Recover: {info_str}") ++ ++ def major_recover(info: dict): ++ info_str = json.dumps(info) ++ xalarm_report(Xalarm.ALARM_ID, MAJOR_ALM, ALARM_TYPE_RECOVER, info_str) ++ logging.info(f"Report {Xalarm.ALARM_ID} MAJOR_ALM Recover: {info_str}") + ++ def critical_recover(info: dict): ++ info_str = json.dumps(info) ++ xalarm_report(Xalarm.ALARM_ID, CRITICAL_ALM, ALARM_TYPE_RECOVER, info_str) ++ logging.info(f"Report {Xalarm.ALARM_ID} CRITICAL_ALM Recover: {info_str}") +diff --git a/src/python/sentryPlugins/ai_block_io/data_access.py b/src/python/sentryPlugins/ai_block_io/data_access.py +index 01c5315..c7679cd 100644 +--- a/src/python/sentryPlugins/ai_block_io/data_access.py ++++ b/src/python/sentryPlugins/ai_block_io/data_access.py +@@ -42,10 +42,11 @@ def check_collect_valid(period): + data = json.loads(data_raw["message"]) + except Exception as e: + logging.warning(f"get io data failed, {e}") +- return [] ++ return None + return [k for k in data.keys()] + else: +- return [] ++ logging.warning(f"get io data failed, return {data_raw}") ++ return None + + + def _get_raw_data(period, disk_list): +diff --git a/src/python/sentryPlugins/ai_block_io/detector.py b/src/python/sentryPlugins/ai_block_io/detector.py +index a48144f..0ed282b 100644 +--- a/src/python/sentryPlugins/ai_block_io/detector.py ++++ b/src/python/sentryPlugins/ai_block_io/detector.py +@@ -35,7 +35,7 @@ class Detector: + self._count += 1 + if self._count % 15 == 0: + self._count = 0 +- logging.info(f"({self._metric_name}) 's latest threshold is: {self._threshold.get_threshold()}.") ++ logging.debug(f"({self._metric_name}) 's latest threshold is: {self._threshold.get_threshold()}.") + logging.debug(f'enter Detector: {self}') + metric_value = get_metric_value_from_io_data_dict_by_metric_name(io_data_dict_with_disk_name, self._metric_name) + if metric_value is None: +-- +2.23.0 + diff --git a/sysSentry.spec b/sysSentry.spec index 66761fa307f87695d55ba361c01cab6aaa8db5f9..c015d6a4023fa21fa792f4c5d8643ab433b3102e 100644 --- a/sysSentry.spec +++ b/sysSentry.spec @@ -4,7 +4,7 @@ Summary: System Inspection Framework Name: sysSentry Version: 1.0.2 -Release: 31 +Release: 32 License: Mulan PSL v2 Group: System Environment/Daemons Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz @@ -43,6 +43,7 @@ Patch30: ebpf-fix-alarm-bug.patch Patch31: xalarm-add-alarm-msg-length-to-8192.patch Patch32: add-log-for-improving-maintainability.patch Patch33: add-get_disk_type-and-fix-some-bugs.patch +Patch34: ai_block_io-adapt-alarm-module.patch BuildRequires: cmake gcc-c++ BuildRequires: python3 python3-setuptools @@ -297,6 +298,12 @@ rm -rf %{buildroot} %attr(0550,root,root) %{python3_sitelib}/sentryPlugins/ai_block_io %changelog +* Thu Oct 10 2024 heyouzhi - 1.0.2-32 +- Type:requirement +- CVE:NA +- SUG:NA +- DESC:ai_block_io adapt alarm module + * Thu Oct 10 2024 zhuofeng - 1.0.2-31 - Type:bugfix - CVE:NA