diff --git a/listen-thread-of-collect-module-exits-occasionally.patch b/listen-thread-of-collect-module-exits-occasionally.patch new file mode 100644 index 0000000000000000000000000000000000000000..7bf76a4f855f66d13a92bac373df6dcb0fffc651 --- /dev/null +++ b/listen-thread-of-collect-module-exits-occasionally.patch @@ -0,0 +1,111 @@ +From a79b59f46c56e885d99354be2be0c92eb4108f67 Mon Sep 17 00:00:00 2001 +From: zhuofeng +Date: Wed, 16 Oct 2024 11:30:30 +0800 +Subject: [PATCH] listen thread of collect module exits occasionally + +--- + src/python/sentryCollector/collect_io.py | 6 ++---- + src/python/sentryCollector/collect_server.py | 18 ++++++++---------- + 2 files changed, 10 insertions(+), 14 deletions(-) + +diff --git a/src/python/sentryCollector/collect_io.py b/src/python/sentryCollector/collect_io.py +index 945ccbc..3d98c50 100644 +--- a/src/python/sentryCollector/collect_io.py ++++ b/src/python/sentryCollector/collect_io.py +@@ -320,7 +320,7 @@ class CollectIo(): + if curr_io_dump > 0: + logging.info(f"ebpf io_dump info : {disk_name}, {stage}, {io_type}, {curr_io_dump}") + IO_GLOBAL_DATA[disk_name][stage][io_type].insert(0, [curr_lat, curr_io_dump, curr_io_length, curr_iops]) +- logging.debug(f"ebpf collect data : {IO_GLOBAL_DATA}") ++ + elapsed_time = time.time() - start_time + sleep_time = self.period_time - elapsed_time + if sleep_time < 0: +@@ -432,8 +432,6 @@ class CollectIo(): + continue + self.append_period_lat(disk_name, stage_list) + +- logging.debug(f"no-lock collect data : {IO_GLOBAL_DATA}") +- + elapsed_time = time.time() - start_time + sleep_time = self.period_time - elapsed_time + if sleep_time < 0: +diff --git a/src/python/sentryCollector/collect_server.py b/src/python/sentryCollector/collect_server.py +index 11d1af0..ad3ac0e 100644 +--- a/src/python/sentryCollector/collect_server.py ++++ b/src/python/sentryCollector/collect_server.py +@@ -64,7 +64,7 @@ class CollectServer(): + self.io_global_data = IO_GLOBAL_DATA + + if len(IO_CONFIG_DATA) == 0: +- logging.error("the collect thread is not started, the data is invalid. ") ++ logging.error("the collect thread is not started, the data is invalid.") + return json.dumps(result_rev) + + period_time = IO_CONFIG_DATA[0] +@@ -75,7 +75,7 @@ class CollectServer(): + stage_list = json.loads(data_struct['stage']) + + if (period < period_time) or (period > period_time * max_save) or (period % period_time): +- logging.error("is_iocollect_valid: period time: %d is invalid", period) ++ logging.error("is_iocollect_valid: period time is invalid, user period: %d, config period_time: %d", period, period_time) + return json.dumps(result_rev) + + for disk_name, stage_info in self.io_global_data.items(): +@@ -96,7 +96,7 @@ class CollectServer(): + self.io_global_data = IO_GLOBAL_DATA + + if len(IO_CONFIG_DATA) == 0: +- logging.error("the collect thread is not started, the data is invalid. ") ++ logging.error("the collect thread is not started, the data is invalid.") + return json.dumps(result_rev) + period_time = IO_CONFIG_DATA[0] + max_save = IO_CONFIG_DATA[1] +@@ -107,11 +107,11 @@ class CollectServer(): + iotype_list = json.loads(data_struct['iotype']) + + if (period < period_time) or (period > period_time * max_save) or (period % period_time): +- logging.error("get_io_data: period time: %d is invalid", period) ++ logging.error("get_io_data: period time is invalid, user period: %d, config period_time: %d", period, period_time) + return json.dumps(result_rev) + + collect_index = period // period_time - 1 +- logging.debug("period: %d, collect_index: %d", period, collect_index) ++ logging.debug("user period: %d, config period_time: %d, collect_index: %d", period, period_time, collect_index) + + for disk_name, stage_info in self.io_global_data.items(): + if disk_name not in disk_list: +@@ -124,7 +124,7 @@ class CollectServer(): + for iotype_name, iotype_info in iotype_info.items(): + if iotype_name not in iotype_list: + continue +- if len(iotype_info) < collect_index: ++ if len(iotype_info) - 1 < collect_index: + continue + result_rev[disk_name][stage_name][iotype_name] = iotype_info[collect_index] + +@@ -250,10 +250,8 @@ class CollectServer(): + except socket.error: + logging.error("server fd create failed") + server_fd = None +- + return server_fd + +- + def server_loop(self): + """main loop""" + logging.info("collect listen thread start") +@@ -277,8 +275,8 @@ class CollectServer(): + self.server_recv(server_fd) + else: + continue +- except socket.error: +- pass ++ except Exception: ++ logging.error('collect listen exception : %s', traceback.format_exc()) + + def stop_thread(self): + self.stop_event.set() +-- +2.33.0 + diff --git a/sysSentry.spec b/sysSentry.spec index 8181877b681c9e3d2c321ffc5ba0f8ce5999f41c..6f25c41ede34d097046e1ee74df0f67a3b579180 100644 --- a/sysSentry.spec +++ b/sysSentry.spec @@ -4,7 +4,7 @@ Summary: System Inspection Framework Name: sysSentry Version: 1.0.2 -Release: 45 +Release: 46 License: Mulan PSL v2 Group: System Environment/Daemons Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz @@ -62,6 +62,7 @@ Patch49: fix-word-error.patch Patch50: ai_block_io-fix-some-bugs.patch Patch51: refactor-config.py-and-bugfix-uncorrect-slow-io-repo.patch Patch52: get_io_data-failed-wont-stop-avg_block_io-and-del-di.patch +Patch53: listen-thread-of-collect-module-exits-occasionally.patch BuildRequires: cmake gcc-c++ BuildRequires: python3 python3-setuptools @@ -334,6 +335,12 @@ rm -rf %{buildroot} %attr(0550,root,root) %{python3_sitelib}/sentryCollector/__pycache__/collect_plugin* %changelog +* Wed Oct 16 2024 zhuofeng - 1.0.2-46 +- Type:bugfix +- CVE:NA +- SUG:NA +- DESC:listen thread of collect module exits occasionally + * Tue Oct 15 2024 gaoruoshu - 1.0.2-45 - Type:bugfix - CVE:NA