diff --git a/Fix-resource-leak.patch b/Fix-resource-leak.patch new file mode 100644 index 0000000000000000000000000000000000000000..96cbd5abb54817991770bdbdba0c7b78f38a00fb --- /dev/null +++ b/Fix-resource-leak.patch @@ -0,0 +1,24 @@ +From a86c0e223e9fb3cbb3b53529980df1eb8d2917d0 Mon Sep 17 00:00:00 2001 +From: shixuantong +Date: Thu, 6 Nov 2025 10:24:35 +0800 +Subject: [PATCH 04/16] Fix resource leak + +--- + src/libs/libxalarm/register_xalarm.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/libs/libxalarm/register_xalarm.c b/src/libs/libxalarm/register_xalarm.c +index 3cd7e9d..f43297a 100644 +--- a/src/libs/libxalarm/register_xalarm.c ++++ b/src/libs/libxalarm/register_xalarm.c +@@ -526,6 +526,7 @@ int send_data_to_socket(const char *socket_path, const char *message) + // set socket address + if (memset(&addr, 0, sizeof(struct sockaddr_un)) == NULL) { + fprintf(stderr, "%s: memset info failed.\n", __func__); ++ close(sockfd); + return RETURN_CODE_FAIL; + } + +-- +2.27.0 + diff --git a/Fix-the-use-of-uninitialized-variable-ret.patch b/Fix-the-use-of-uninitialized-variable-ret.patch new file mode 100644 index 0000000000000000000000000000000000000000..49b989c9dc82fe3fe6e4a4c5acb49321e20eff22 --- /dev/null +++ b/Fix-the-use-of-uninitialized-variable-ret.patch @@ -0,0 +1,51 @@ +From 3fdf5ec519893387fd89a8cb3faa34213f935cd2 Mon Sep 17 00:00:00 2001 +From: shixuantong +Date: Thu, 6 Nov 2025 10:25:45 +0800 +Subject: [PATCH 06/16] Fix the use of uninitialized variable 'ret' + +--- + src/libs/libxalarm/register_xalarm.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +diff --git a/src/libs/libxalarm/register_xalarm.c b/src/libs/libxalarm/register_xalarm.c +index f43297a..13584cb 100644 +--- a/src/libs/libxalarm/register_xalarm.c ++++ b/src/libs/libxalarm/register_xalarm.c +@@ -312,15 +312,13 @@ char *xalarm_getdesc(const struct alarm_info *palarm) + + static int init_report_addr(struct sockaddr_un *alarm_addr, char *report_path) + { +- int ret; +- + if (alarm_addr == NULL) { + fprintf(stderr, "%s: alarm_addr is null\n", __func__); + return -1; + } + + if (memset(alarm_addr, 0, sizeof(struct sockaddr_un)) == NULL) { +- fprintf(stderr, "%s: memset alarm_addr failed, ret: %d\n", __func__, ret); ++ fprintf(stderr, "%s: memset alarm_addr failed\n", __func__); + return -1; + } + alarm_addr->sun_family = AF_UNIX; +@@ -332,7 +330,7 @@ static int init_report_addr(struct sockaddr_un *alarm_addr, char *report_path) + int xalarm_Report(unsigned short usAlarmId, unsigned char ucAlarmLevel, + unsigned char ucAlarmType, char *pucParas) + { +- int ret, fd; ++ int ret = 0, fd; + struct alarm_info info; + struct sockaddr_un alarm_addr; + +@@ -349,7 +347,7 @@ int xalarm_Report(unsigned short usAlarmId, unsigned char ucAlarmLevel, + } + + if (memset(&info, 0, sizeof(struct alarm_info)) == NULL) { +- fprintf(stderr, "%s: memset info failed, ret: %d\n", __func__, ret); ++ fprintf(stderr, "%s: memset info failed\n", __func__); + return -1; + } + info.usAlarmId = usAlarmId; +-- +2.27.0 + diff --git a/Use-malloc-to-allocate-memory-as-much-as-possible.patch b/Use-malloc-to-allocate-memory-as-much-as-possible.patch new file mode 100644 index 0000000000000000000000000000000000000000..39d7ec746586b0341a8ede3d3b3f89eb3e4b1ae9 --- /dev/null +++ b/Use-malloc-to-allocate-memory-as-much-as-possible.patch @@ -0,0 +1,149 @@ +From 87f20a47f32adc03464fac4eff4c8450fcf5a45e Mon Sep 17 00:00:00 2001 +From: shixuantong +Date: Thu, 3 Apr 2025 10:01:15 +0800 +Subject: [PATCH] Use malloc to allocate memory as much as possible + +--- + src/libs/libxalarm/register_xalarm.c | 34 ++++++++++--------- + .../sentry_msg_monitor/sentry_msg_monitor.c | 20 +++++++++-- + 2 files changed, 36 insertions(+), 18 deletions(-) + +diff --git a/src/libs/libxalarm/register_xalarm.c b/src/libs/libxalarm/register_xalarm.c +index f9da783..1ddd0ae 100644 +--- a/src/libs/libxalarm/register_xalarm.c ++++ b/src/libs/libxalarm/register_xalarm.c +@@ -604,13 +604,14 @@ static bool is_valid_task_name(const char *task_name) + */ + int report_result(const char *task_name, enum RESULT_LEVEL result_level, const char *report_data) + { ++ int ret = RETURE_CODE_FAIL; + if (result_level < 0 || result_level >= RESULT_LEVEL_NUM) { +- fprintf(stderr, "result_level (%u) is invalid, it must be in [0-5]\n", result_level); +- return RETURE_CODE_FAIL; ++ fprintf(stderr, "result_level (%d) is invalid, it must be in [0-5]\n", result_level); ++ return ret; + } + + if (!is_valid_task_name(task_name)) { +- return RETURE_CODE_FAIL; ++ return ret; + } + + json_object *send_data = json_object_new_object(); +@@ -624,35 +625,36 @@ int report_result(const char *task_name, enum RESULT_LEVEL result_level, const c + const char *result_json_string = json_object_to_json_string(send_data); + if (result_json_string == NULL) { + fprintf(stderr, "%s: json_object_to_json_string return NULL", __func__); +- json_object_put(send_data); +- return RETURE_CODE_FAIL; ++ goto free_json; + } + + int send_data_len = strlen(result_json_string); + if (send_data_len > RESULT_INFO_MAX_LEN) { +- fprintf(stderr, "%s: failed to send result message (%s) to sysSentry! send data is too long (%zu) > (%d)\n", ++ fprintf(stderr, "%s: failed to send result message (%s) to sysSentry! send data is too long (%d) > (%d)\n", + __func__, result_json_string, send_data_len, RESULT_INFO_MAX_LEN); +- json_object_put(send_data); +- return RETURE_CODE_FAIL; ++ goto free_json; + } + +- char message[RESULT_INFO_HEAD_LEN + RESULT_INFO_MAX_LEN]; +- if (memset(message, 0, RESULT_INFO_HEAD_LEN + RESULT_INFO_MAX_LEN) == NULL) { +- fprintf(stderr, "%s: memset message failed", __func__); +- json_object_put(send_data); +- return RETURE_CODE_FAIL; ++ char *message = (char *)calloc(RESULT_INFO_HEAD_LEN + RESULT_INFO_MAX_LEN, sizeof(char)); ++ if (!message) { ++ fprintf(stderr, "Failed to allocate memory!"); ++ goto free_json; + } + + sprintf(message, "%s%04d%s", RESULT_INFO_HEAD_MAGIC, send_data_len, result_json_string); + + if (send_data_to_socket(RESULT_REPORT_SOCKET, message)) { + fprintf(stderr, "%s: failed to send result message (%s) to sysSentry!\n", __func__, message); +- json_object_put(send_data); +- return RETURE_CODE_FAIL; ++ goto free_msg; + } + ++ ret = RETURE_CODE_SUCCESS; ++free_msg: ++ free(message); ++ message = NULL; ++free_json: + json_object_put(send_data); +- return RETURE_CODE_SUCCESS; ++ return ret; + } + + int xalarm_register_event(struct alarm_register **register_info, struct alarm_subscription_info id_filter) +diff --git a/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c b/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c +index a307a96..e62c936 100644 +--- a/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c ++++ b/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c +@@ -94,7 +94,7 @@ static int smh_dev_get_fd(void) + static int convert_smh_msg_to_str(struct sentry_msg_helper_msg* smh_msg, char* str) + { + int res; +- char nid_str[MSG_STR_MAX_LEN]; ++ char *nid_str = NULL; + size_t offset = 0; + switch (smh_msg->type) { + case SMH_MESSAGE_POWER_OFF: +@@ -105,11 +105,18 @@ static int convert_smh_msg_to_str(struct sentry_msg_helper_msg* smh_msg, char* s + } + break; + case SMH_MESSAGE_OOM: ++ nid_str = (char *) calloc (MSG_STR_MAX_LEN, sizeof(char)); ++ if (!nid_str) { ++ logging_error("Failed to allocate memory!"); ++ return -1; ++ } + for (int i = 0; i < MAX_NUMA_NODES; i++) { + res = snprintf(nid_str + offset, MSG_STR_MAX_LEN - offset, "%d%s", + smh_msg->oom_info.nid[i], (i < MAX_NUMA_NODES - 1) ? "," : ""); + if ((size_t)res >= MSG_STR_MAX_LEN) { + logging_warn("msg str size exceeds the max value\n"); ++ free(nid_str); ++ nid_str = NULL; + return -1; + } + offset += res; +@@ -123,6 +130,8 @@ static int convert_smh_msg_to_str(struct sentry_msg_helper_msg* smh_msg, char* s + smh_msg->oom_info.timeout, + smh_msg->oom_info.reason + ); ++ free(nid_str); ++ nid_str = NULL; + if ((size_t)res >= MSG_STR_MAX_LEN) { + logging_warn("msg str size exceeds the max value\n"); + return -1; +@@ -183,7 +192,12 @@ static void* sender_thread(void* arg) { + pthread_cleanup_push(sender_cleanup, &fd); + + pthread_t partner_t; +- char str[MSG_STR_MAX_LEN]; ++ char *str = (char *) calloc (MSG_STR_MAX_LEN, sizeof(char)); ++ if (!str) { ++ logging_error("Failed to allocate memory!"); ++ close(fd); ++ goto close_recv; ++ } + + while (1) { + struct sentry_msg_helper_msg smh_msg; +@@ -236,6 +250,8 @@ static void* sender_thread(void* arg) { + + sender_err: + close(fd); ++ free(str); ++ str = NULL; + close_recv: + partner_t = *(pthread_t*)arg; + if (partner_t) +-- +2.27.0 + diff --git a/add-API-to-enable-disable-the-hijacking-function-for.patch b/add-API-to-enable-disable-the-hijacking-function-for.patch new file mode 100644 index 0000000000000000000000000000000000000000..066d19cbca7812d1f60422fa479c3965d077177a --- /dev/null +++ b/add-API-to-enable-disable-the-hijacking-function-for.patch @@ -0,0 +1,54 @@ +From becd842a223a41ae836db76c73eaf456cd9ef63a Mon Sep 17 00:00:00 2001 +From: shixuantong +Date: Thu, 27 Nov 2025 14:12:41 +0800 +Subject: [PATCH 16/16] add API to enable/disable the hijacking function for + oom, power off and ub mem fault event + +--- + src/services/syssentry/sentryctl | 16 +++++++++++++--- + 1 file changed, 13 insertions(+), 3 deletions(-) + +diff --git a/src/services/syssentry/sentryctl b/src/services/syssentry/sentryctl +index ec683b7..1e00252 100644 +--- a/src/services/syssentry/sentryctl ++++ b/src/services/syssentry/sentryctl +@@ -61,8 +61,8 @@ def write_proc_file(proc_dir, proc_name, proc_value): + finally: + return exit_code + +-def set_sentry_reporter_proc(proc_value): +- return write_proc_file("sentry_reporter", "ub_mem_fault_with_kill", proc_value) ++def set_sentry_reporter_proc(proc_name, proc_value): ++ return write_proc_file("sentry_reporter", proc_name, proc_value) + + def set_remote_reporter_proc(proc_name, proc_value): + return write_proc_file("sentry_remote_reporter", proc_name, proc_value) +@@ -245,6 +245,9 @@ if __name__ == '__main__': + ], + "sentry_reporter" : [ + {"name" : "ub_mem_fault_with_kill", "type" : "str", "choices": ["on", "off"], "required" : False, "help" : "Enable/Disable sending SIGBUS signal with UB mem event"}, ++ {"name" : "ub_mem_fault", "type" : str, "choices": ["on", "off"], "required" : False, "help": "Enable/Disable UB mem event"}, ++ {"name" : "power_off", "type": str, "choices": ["on", "off"], "required" : False, "help": "Enable/Disable power off event"}, ++ {"name" : "oom", "type" : str, "choices": ["on", "off"], "required" : False, "help": "Enable/Disable oom event"}, + ], + } + parser_set = subparsers.add_parser('set', help='set plugins params') +@@ -326,7 +329,14 @@ if __name__ == '__main__': + elif client_args.set_task == "sentry_uvb_comm": + ret_code += set_uvb_proc(client_args.server_cna) + elif client_args.set_task == "sentry_reporter": +- ret_code += set_sentry_reporter_proc(client_args.ub_mem_fault_with_kill) ++ if client_args.ub_mem_fault_with_kill != None: ++ ret_code += set_sentry_reporter_proc("ub_mem_fault_with_kill", client_args.ub_mem_fault_with_kill) ++ if client_args.ub_mem_fault != None: ++ ret_code += set_sentry_reporter_proc("ub_mem_fault", client_args.ub_mem_fault); ++ if client_args.power_off != None: ++ ret_code += set_sentry_reporter_proc("power_off", client_args.power_off); ++ if client_args.oom != None: ++ ret_code += set_sentry_reporter_proc("oom", client_args.oom); + sys.exit(ret_code) + else: + parser.print_help() +-- +2.27.0 + diff --git a/add-NONZERO_EXITED-status-for-plugin-exited-with-non.patch b/add-NONZERO_EXITED-status-for-plugin-exited-with-non.patch new file mode 100644 index 0000000000000000000000000000000000000000..99e41e19789f9cedd5cfce32639b94fd786247e6 --- /dev/null +++ b/add-NONZERO_EXITED-status-for-plugin-exited-with-non.patch @@ -0,0 +1,170 @@ +From 4cd232fad19821fb283b8ba724de5156174bbba8 Mon Sep 17 00:00:00 2001 +From: shixuantong +Date: Thu, 6 Nov 2025 19:04:22 +0800 +Subject: [PATCH 09/16] add NONZERO_EXITED status for plugin exited with + non-zero code + +--- + selftest/test/test_sentryctl_type_period.sh | 3 +++ + src/services/syssentry/callbacks.py | 5 +++-- + src/services/syssentry/global_values.py | 13 +++++++------ + src/services/syssentry/mod_status.py | 6 +++--- + src/services/syssentry/syssentry.py | 12 ++++++++---- + 5 files changed, 24 insertions(+), 15 deletions(-) + +diff --git a/selftest/test/test_sentryctl_type_period.sh b/selftest/test/test_sentryctl_type_period.sh +index e6a1d8f..6788fad 100644 +--- a/selftest/test/test_sentryctl_type_period.sh ++++ b/selftest/test/test_sentryctl_type_period.sh +@@ -53,6 +53,9 @@ function do_test() { + sentryctl status test_type_period | grep -w "status: FAILED" + expect_eq $? 0 + ++ sentryctl start test_type_period ++ expect_eq $? 0 ++ + sentryctl stop test_type_period + expect_eq $? 0 + +diff --git a/src/services/syssentry/callbacks.py b/src/services/syssentry/callbacks.py +index 6ec2c29..fbcc53e 100644 +--- a/src/services/syssentry/callbacks.py ++++ b/src/services/syssentry/callbacks.py +@@ -17,7 +17,8 @@ import logging + + + from .task_map import TasksMap, ONESHOT_TYPE, PERIOD_TYPE +-from .mod_status import EXITED_STATUS, RUNNING_STATUS, WAITING_STATUS, set_runtime_status ++from .mod_status import EXITED_STATUS, NONZERO_EXITED_STATUS, FAILED_STATUS, RUNNING_STATUS, WAITING_STATUS ++from .mod_status import set_runtime_status + from .alarm import get_alarm_result + + +@@ -69,7 +70,7 @@ def task_stop(mod_name): + if not task.load_enabled: + return "failed", "mod is not enabled" + logging.info("%s stop", mod_name) +- if task.runtime_status == EXITED_STATUS: ++ if task.runtime_status in [NONZERO_EXITED_STATUS, EXITED_STATUS, FAILED_STATUS]: + return "success", "task already stopped" + if task.runtime_status == WAITING_STATUS: + set_runtime_status(task.name, EXITED_STATUS) +diff --git a/src/services/syssentry/global_values.py b/src/services/syssentry/global_values.py +index 7cb99e0..92b6237 100644 +--- a/src/services/syssentry/global_values.py ++++ b/src/services/syssentry/global_values.py +@@ -20,7 +20,8 @@ import os + + from .result import ResultLevel, RESULT_LEVEL_ERR_MSG_DICT + from .utils import get_current_time_string +-from .mod_status import set_runtime_status, RUNNING_STATUS ++from .mod_status import set_runtime_status ++from .mod_status import RUNNING_STATUS, EXITED_STATUS, NONZERO_EXITED_STATUS, FAILED_STATUS, WAITING_STATUS + + SENTRY_RUN_DIR = "/var/run/sysSentry" + CTL_SOCKET_PATH = "/var/run/sysSentry/control.sock" +@@ -48,7 +49,7 @@ class InspectTask: + self.type = task_type + self.status = "ERROR" + # runtime information +- self.runtime_status = "EXITED" ++ self.runtime_status = EXITED_STATUS + self.pid = -1 + # task attribute + self.task_start = start_task +@@ -95,7 +96,7 @@ class InspectTask: + self.result_info["details"] = {} + if not self.period_enabled: + self.period_enabled = True +- if self.runtime_status in ("EXITED", "FAILED"): ++ if self.runtime_status in (EXITED_STATUS, FAILED_STATUS, NONZERO_EXITED_STATUS): + + if self.conflict != 'up': + ret = self.check_conflict() +@@ -119,7 +120,7 @@ class InspectTask: + logging.error("task %s start Popen error, invalid cmd", cmd_list) + self.result_info["result"] = ResultLevel.FAIL.name + self.result_info["error_msg"] = RESULT_LEVEL_ERR_MSG_DICT.get(ResultLevel.FAIL.name) +- self.runtime_status = "FAILED" ++ self.runtime_status = FAILED_STATUS + return False, "start command is invalid, popen failed" + finally: + if isinstance(logfile, io.TextIOWrapper) and not logfile.closed: +@@ -127,7 +128,7 @@ class InspectTask: + + self.pid = child.pid + logging.debug("start task %s pid %d", self.name, self.pid) +- self.runtime_status = "RUNNING" ++ self.runtime_status = RUNNING_STATUS + if self.heartbeat_interval > 0: + self.last_heartbeat = time.perf_counter() + return True, "start task success" +@@ -136,7 +137,7 @@ class InspectTask: + def stop(self): + """stop""" + self.period_enabled = False +- if self.runtime_status == "RUNNING": ++ if self.runtime_status == RUNNING_STATUS: + cmd_list = self.task_stop.split() + if cmd_list[-1] == "$pid": + cmd_list[-1] = str(self.pid) +diff --git a/src/services/syssentry/mod_status.py b/src/services/syssentry/mod_status.py +index 574493a..c0fcb9d 100644 +--- a/src/services/syssentry/mod_status.py ++++ b/src/services/syssentry/mod_status.py +@@ -19,14 +19,14 @@ from .task_map import TasksMap, ONESHOT_TYPE, PERIOD_TYPE + ONESHOT_MOD_STATUS = ("UNLOADED", "LOADED", "ERROR") + PERIOD_MOD_STATUS = ("UNLOADED", "LOADED", "ERROR") + +-ONESHOT_RUNTIME_STATUS = ("RUNNING", "EXITED", "FAILED") +-PERIOD_RUNTIME_STATUS = ("RUNNING", "WAITING", "FAILED", "EXITED") +- + RUNNING_STATUS = "RUNNING" + EXITED_STATUS = "EXITED" ++NONZERO_EXITED_STATUS = "NONZERO_EXITED" + FAILED_STATUS = "FAILED" + WAITING_STATUS = "WAITING" + ++ONESHOT_RUNTIME_STATUS = (RUNNING_STATUS, EXITED_STATUS, NONZERO_EXITED_STATUS, FAILED_STATUS) ++PERIOD_RUNTIME_STATUS = (RUNNING_STATUS, WAITING_STATUS, FAILED_STATUS, EXITED_STATUS) + + + def set_task_status(task_name, status_code): +diff --git a/src/services/syssentry/syssentry.py b/src/services/syssentry/syssentry.py +index eab8033..2f371a7 100644 +--- a/src/services/syssentry/syssentry.py ++++ b/src/services/syssentry/syssentry.py +@@ -30,6 +30,7 @@ from .global_values import SENTRY_RUN_DIR, CTL_SOCKET_PATH, SENTRY_RUN_DIR_PERM + from .cron_process import period_tasks_handle + from .callbacks import mod_list_show, task_start, task_get_status, task_stop, task_get_result, task_get_alarm + from .mod_status import get_task_by_pid, set_runtime_status ++from .mod_status import RUNNING_STATUS, EXITED_STATUS, NONZERO_EXITED_STATUS, FAILED_STATUS, WAITING_STATUS + from .load_mods import load_tasks, reload_single_mod + from .heartbeat import (heartbeat_timeout_chk, heartbeat_fd_create, + heartbeat_recv, THB_SOCKET_PATH) +@@ -571,15 +572,18 @@ def sigchld_handler(signum, _f): + if os.WIFEXITED(child_exit_code): + # exit normally with exit() syscall + if task.type == "PERIOD" and task.period_enabled: +- set_runtime_status(task.name, "WAITING") ++ set_runtime_status(task.name, WAITING_STATUS) + else: +- set_runtime_status(task.name, "EXITED") ++ if os.WEXITSTATUS(child_exit_code): ++ set_runtime_status(task.name, NONZERO_EXITED_STATUS) ++ else: ++ set_runtime_status(task.name, EXITED_STATUS) + else: + # exit abnormally + if not task.period_enabled: +- set_runtime_status(task.name, "EXITED") ++ set_runtime_status(task.name, EXITED_STATUS) + else: +- set_runtime_status(task.name, "FAILED") ++ set_runtime_status(task.name, FAILED_STATUS) + task.result_info["end_time"] = get_current_time_string() + except: + break +-- +2.27.0 + diff --git a/add-UB-fault-report-function.patch b/add-UB-fault-report-function.patch new file mode 100644 index 0000000000000000000000000000000000000000..d6d00ec080889a867803416d7d4deeb7dfed1ae3 --- /dev/null +++ b/add-UB-fault-report-function.patch @@ -0,0 +1,264 @@ +From a8663ee9bb506bf9153001822ad8a46d4af3b6ba Mon Sep 17 00:00:00 2001 +From: zhuofeng +Date: Sun, 9 Nov 2025 18:05:54 +0800 +Subject: [PATCH 11/16] add UB fault report function + +Feature issue: +https://gitee.com/openeuler/release-management/issues/ID45WQ +--- + src/libs/libxalarm/register_xalarm.h | 1 + + src/sentryPlugins/sentry_msg_monitor/Makefile | 2 +- + .../sentry_msg_monitor/sentry_msg_monitor.c | 111 +++++++++++++++++- + .../sentry_msg_monitor/smh_common_type.h | 40 ++++++- + 4 files changed, 149 insertions(+), 5 deletions(-) + +diff --git a/src/libs/libxalarm/register_xalarm.h b/src/libs/libxalarm/register_xalarm.h +index cbb21b7..1e2818d 100644 +--- a/src/libs/libxalarm/register_xalarm.h ++++ b/src/libs/libxalarm/register_xalarm.h +@@ -27,6 +27,7 @@ + #define ALARM_PANIC_ACK_EVENT 1008 + #define ALARM_KERNEL_REBOOT_EVENT 1009 + #define ALARM_KERNEL_REBOOT_ACK_EVENT 1010 ++#define ALARM_UBUS_MEM_EVENT 1013 + + #define MINOR_ALM 1 + #define MAJOR_ALM 2 +diff --git a/src/sentryPlugins/sentry_msg_monitor/Makefile b/src/sentryPlugins/sentry_msg_monitor/Makefile +index 2f423b2..c74ee07 100644 +--- a/src/sentryPlugins/sentry_msg_monitor/Makefile ++++ b/src/sentryPlugins/sentry_msg_monitor/Makefile +@@ -15,7 +15,7 @@ + all: sentry_msg_monitor + + sentry_msg_monitor: +- gcc sentry_msg_monitor.c -fstack-protector-strong -Wall -D_FORTIFY_SOURCE=2 -O2 -g -Wl,-z,relro -Wl,-z,now -fPIE -pie -I../../libsentry/c/log -L../../libsentry/c/log/build -I../../libs/libxalarm -L../../libs/build/libxalarm -lxalarm -lsentry_log -ljson-c -pthread -o sentry_msg_monitor ++ gcc sentry_msg_monitor.c -fstack-protector-strong -Wall -D_FORTIFY_SOURCE=2 -O2 -g -Wl,-z,relro -Wl,-z,now -fPIE -pie -I../../libsentry/c/log -L../../libsentry/c/log/build -I../../libs/libxalarm -L../../libs/build/libxalarm -lobmm -lxalarm -lsentry_log -ljson-c -pthread -o sentry_msg_monitor + + clean: + rm -f sentry_msg_monitor +diff --git a/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c b/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c +index bfe9356..5a08078 100644 +--- a/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c ++++ b/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c +@@ -23,6 +23,8 @@ + #include + #include + #include ++#include ++#include + + #include "register_xalarm.h" + #include "log_utils.h" +@@ -31,7 +33,7 @@ + #define TOOL_NAME "sentry_msg_monitor" + #define SMH_DEV_PATH "/dev/sentry_msg_helper" + #define PID_FILE_PATH "/var/run/"TOOL_NAME".pid" +-#define ID_LIST_LENGTH SMH_MESSAGE_MAX ++#define ID_LIST_LENGTH 4 //reboot oom panic kernel_reboot + #define MSG_STR_MAX_LEN 1024 + #define DEFAULT_LOG_LEVEL LOG_INFO + #define MAX_RETRY_NUM 3 +@@ -108,6 +110,50 @@ static int smh_dev_get_fd(void) + return smh_dev_fd; + } + ++static int convert_ubus_type_to_sentry_type(enum ras_err_type ubus_type) ++{ ++ int sentry_type = -1; ++ switch (ubus_type) { ++ case UB_MEM_ATOMIC_DATA_ERR: ++ sentry_type = SENTRY_MEM_ERR_ROUTE; ++ break; ++ case MAR_NOPORT_VLD_INT_ERR: ++ sentry_type = SENTRY_MEM_FLUX_INT; ++ break; ++ case MAR_NEAR_AUTH_FAIL_ERR: ++ sentry_type = SENTRY_MEM_ERR_OUTBOUND_TRANSLATION; ++ break; ++ case MAR_FAR_AUTH_FAIL_ERR: ++ case UB_MEM_FLOW_READ_AUTH_POISON: ++ case UB_MEM_FLOW_READ_AUTH_RESPERR: ++ sentry_type = SENTRY_MEM_ERR_INBOUND_TRANSLATION; ++ break; ++ case MAR_TIMEOUT_ERR: ++ case UB_MEM_TIMEOUT_POISON: ++ case UB_MEM_TIMEOUT_RESPERR: ++ sentry_type = SENTRY_MEM_ERR_TIMEOUT; ++ break; ++ case MAR_ILLEGAL_ACCESS_ERR: ++ sentry_type = SENTRY_MEM_ERR_BUS; ++ break; ++ case REMOTE_READ_DATA_ERR_OR_WRITE_RESPONSE_ERR: ++ case UB_MEM_READ_DATA_ERR: ++ case UB_MEM_FLOW_POISON: ++ case UB_MEM_READ_DATA_POISON: ++ case UB_MEM_READ_DATA_RESPERR: ++ sentry_type = SENTRY_MEM_ERR_UCE; ++ break; ++ case MAR_FLUX_INT_ERR: ++ case MAR_WITHOUT_CXT_ERR: ++ sentry_type = SENTRY_MEM_ERR_NO_REPORT; ++ break; ++ default: ++ logging_warn("Unknown ubus type: %d\n", ubus_type); ++ break; ++ } ++ return sentry_type; ++} ++ + static int convert_power_off_smh_smg_to_str(const struct sentry_msg_helper_msg* smh_msg, char* str) + { + int res; +@@ -171,6 +217,63 @@ static int convert_remote_smh_smg_to_str(const struct sentry_msg_helper_msg* smh + return 0; + } + ++static int convert_ub_mem_err_smh_msg_to_str(struct sentry_msg_helper_msg* smh_msg, char* str) ++{ ++ enum ras_err_type raw_err_type = smh_msg->helper_msg_info.ub_mem_info.raw_ubus_mem_err_type; ++ int sentry_err_type = convert_ubus_type_to_sentry_type(raw_err_type); ++ // return -1 indicates that only logs are recorded, and no alerts are sent to xalam. ++ if (sentry_err_type == SENTRY_MEM_ERR_NO_REPORT) { ++ logging_info("received kernel event raw_ubus_mem_err_type is %d\n", raw_err_type); ++ return -1; ++ } ++ if (sentry_err_type == -1) { ++ logging_error("raw_ubus_mem_err_type to sentry_ubus_mem_err_type failed, " ++ "raw_ubus_mem_err_type: %d, sentry_ubus_mem_err_type: %d\n", ++ raw_err_type, sentry_err_type); ++ return -1; ++ } ++ uint64_t msgid = smh_msg->msgid; ++ uint64_t pa = smh_msg->helper_msg_info.ub_mem_info.pa; ++ ++ mem_id id; ++ unsigned long obmm_offset; ++ int result = obmm_query_memid_by_pa(pa, &id, &obmm_offset); ++ if (result < 0) { ++ logging_error("query memid falied, result: %d, errno: %d (%s)\n", result, errno, strerror(errno)); ++ return -1; ++ } ++ ++ char hex_str[20]; ++ int ret = snprintf(hex_str, sizeof(hex_str), "0x%lx", (long)pa); ++ if (ret < 0) { ++ logging_error("convert pa to string failed\n"); ++ return -1; ++ } ++ struct json_object *root = json_object_new_object(); ++ json_object_object_add(root, "msgid", json_object_new_int64(msgid)); ++ json_object_object_add(root, "sentry_ubus_mem_err_type", json_object_new_int(sentry_err_type)); ++ json_object_object_add(root, "raw_ubus_mem_err_type", json_object_new_int(raw_err_type)); ++ json_object_object_add(root, "pa", json_object_new_string(hex_str)); ++ json_object_object_add(root, "memid", json_object_new_int64(id)); ++ ++ const char* json_str = json_object_to_json_string(root); ++ if (json_str == NULL) { ++ logging_error("json_str return NULL\n"); ++ json_object_put(root); ++ return -1; ++ } ++ ++ strncpy(str, json_str, MSG_STR_MAX_LEN - 1); ++ ++ if (strlen(str) >= MSG_STR_MAX_LEN) { ++ logging_error("msg str size exceeds the max value\n"); ++ json_object_put(root); ++ return -1; ++ } ++ json_object_put(root); ++ return 0; ++} ++ + static int convert_smh_msg_to_str(struct sentry_msg_helper_msg* smh_msg, char* str) + { + int res; +@@ -185,6 +288,9 @@ static int convert_smh_msg_to_str(struct sentry_msg_helper_msg* smh_msg, char* s + case SMH_MESSAGE_KERNEL_REBOOT: + res = convert_remote_smh_smg_to_str(smh_msg, str); + break; ++ case SMH_MESSAGE_UB_MEM_ERR: ++ res = convert_ub_mem_err_smh_msg_to_str(smh_msg, str); ++ break; + default: + logging_warn("Unknown msg type: %d\n", smh_msg->type); + return -1; +@@ -242,6 +348,9 @@ static unsigned short convert_msg_type_to_xalarm_type(enum sentry_msg_helper_msg + case SMH_MESSAGE_KERNEL_REBOOT: + xalarm_type = ALARM_KERNEL_REBOOT_EVENT; + break; ++ case SMH_MESSAGE_UB_MEM_ERR: ++ xalarm_type = ALARM_UBUS_MEM_EVENT; ++ break; + default: + logging_warn("Unknown msg type: %d\n", msg_type); + break; +diff --git a/src/sentryPlugins/sentry_msg_monitor/smh_common_type.h b/src/sentryPlugins/sentry_msg_monitor/smh_common_type.h +index 0ccbb12..8035231 100644 +--- a/src/sentryPlugins/sentry_msg_monitor/smh_common_type.h ++++ b/src/sentryPlugins/sentry_msg_monitor/smh_common_type.h +@@ -29,16 +29,46 @@ enum { + + #define SMH_MSG_ACK _IO(SMH_TYPE, SMH_CMD_MSG_ACK) + ++enum ras_err_type { ++ UB_MEM_ATOMIC_DATA_ERR = 0, ++ UB_MEM_READ_DATA_ERR, ++ UB_MEM_FLOW_POISON, ++ UB_MEM_FLOW_READ_AUTH_POISON, ++ UB_MEM_FLOW_READ_AUTH_RESPERR, ++ UB_MEM_TIMEOUT_POISON, ++ UB_MEM_TIMEOUT_RESPERR, ++ UB_MEM_READ_DATA_POISON, ++ UB_MEM_READ_DATA_RESPERR, ++ MAR_NOPORT_VLD_INT_ERR, ++ MAR_FLUX_INT_ERR, ++ MAR_WITHOUT_CXT_ERR, ++ RSP_BKPRE_OVER_TIMEOUT_ERR, ++ MAR_NEAR_AUTH_FAIL_ERR, ++ MAR_FAR_AUTH_FAIL_ERR, ++ MAR_TIMEOUT_ERR, ++ MAR_ILLEGAL_ACCESS_ERR, ++ REMOTE_READ_DATA_ERR_OR_WRITE_RESPONSE_ERR, ++}; ++ ++enum sentry_ubus_mem_err_type { ++ SENTRY_MEM_ERR_ROUTE, ++ SENTRY_MEM_FLUX_INT, ++ SENTRY_MEM_ERR_OUTBOUND_TRANSLATION, ++ SENTRY_MEM_ERR_INBOUND_TRANSLATION, ++ SENTRY_MEM_ERR_TIMEOUT, ++ SENTRY_MEM_ERR_BUS, ++ SENTRY_MEM_ERR_UCE, ++ SENTRY_MEM_ERR_NO_REPORT = 1000, ++}; ++ + enum sentry_msg_helper_msg_type { + SMH_MESSAGE_POWER_OFF, + SMH_MESSAGE_OOM, + SMH_MESSAGE_PANIC, + SMH_MESSAGE_KERNEL_REBOOT, +- SMH_MESSAGE_MAX, +- // Add ACK events HERE (below SMH_MESSAGE_MAX) ++ SMH_MESSAGE_UB_MEM_ERR, + SMH_MESSAGE_PANIC_ACK, + SMH_MESSAGE_KERNEL_REBOOT_ACK, +- SMH_MESSAGE_UNKNOWN, + }; + + struct sentry_msg_helper_msg { +@@ -59,6 +89,10 @@ struct sentry_msg_helper_msg { + uint32_t cna; + char eid[EID_MAX_LEN]; + } remote_info; ++ struct { ++ uint64_t pa; ++ enum ras_err_type raw_ubus_mem_err_type; ++ } ub_mem_info; + } helper_msg_info; + unsigned long res; + }; +-- +2.27.0 + diff --git a/add-dfx-for-xalarmd-to-rebuild-connection-after-comm.patch b/add-dfx-for-xalarmd-to-rebuild-connection-after-comm.patch new file mode 100644 index 0000000000000000000000000000000000000000..67817c0d6a76183fa33918ccfd609d03a49428bf --- /dev/null +++ b/add-dfx-for-xalarmd-to-rebuild-connection-after-comm.patch @@ -0,0 +1,468 @@ +From de1a9df6d5eeafcf4c242318ea27f1d6bdf033e7 Mon Sep 17 00:00:00 2001 +From: PshySimon +Date: Mon, 24 Feb 2025 19:11:26 +0800 +Subject: [PATCH] add dfx for xalarmd to rebuild connection after communication + disconnection + +--- + config/service/xalarmd.service | 7 +- + src/services/xalarm/xalarm_server.py | 237 +++++++++++++++++++++---- + src/services/xalarm/xalarm_transfer.py | 49 +++-- + 3 files changed, 242 insertions(+), 51 deletions(-) + +diff --git a/config/service/xalarmd.service b/config/service/xalarmd.service +index 0665b39..20db997 100644 +--- a/config/service/xalarmd.service ++++ b/config/service/xalarmd.service +@@ -2,8 +2,13 @@ + Description = xalarm daemon for alarm messages forwarding + + [Service] +-ExecStart =/usr/bin/python3 /usr/bin/xalarmd + Type = forking ++ExecStart=/usr/bin/python3 /usr/bin/xalarmd ++ExecStop=/bin/kill ++KillMode=process ++Restart=on-failure ++RestartSec=3s + + [Install] + WantedBy = multi-user.target ++ +diff --git a/src/services/xalarm/xalarm_server.py b/src/services/xalarm/xalarm_server.py +index ba6b8f8..932279c 100644 +--- a/src/services/xalarm/xalarm_server.py ++++ b/src/services/xalarm/xalarm_server.py +@@ -18,14 +18,16 @@ import socket + import os + import logging + import select ++import stat + import threading + ++from time import sleep + from .xalarm_api import alarm_bin2stu, alarm_stu2str + from .xalarm_transfer import ( + check_filter, + transmit_alarm, + wait_for_connection, +- peroid_task_to_cleanup_connections ++ cleanup_closed_connections + ) + + +@@ -35,53 +37,216 @@ SOCK_FILE = "/var/run/xalarm/report" + ALARM_REPORT_LEN = 8216 + ALARM_DIR_PERMISSION = 0o755 + SOCKET_FILE_PERMISSON = 0o666 ++PERMISION_MASK = 0o777 ++PEROID_CHECK_TIME = 3 + ALARM_LISTEN_QUEUE_LEN = 5 ++PEROID_SCANN_TIME = 60 ++fd_to_socket_lock = threading.Lock() + ++def check_permission(path, permission): ++ """check whether the permission of path is right ++ """ ++ return (os.stat(path).st_mode & PERMISION_MASK) == permission ++ ++ ++def check_socket_file(path): ++ if not os.path.exists(path): ++ return False ++ ++ file_stat = os.stat(path) ++ # path is not a socket file ++ if not stat.S_ISSOCK(file_stat.st_mode): ++ return False ++ return True + +-def clear_sock_path(): ++ ++def clear_sock_file(sock_file): + """unlink unix socket if exist + """ ++ if os.path.exists(sock_file): ++ os.unlink(sock_file) ++ ++def clear_sock_conn(sock_fd, epoll_fd): ++ if sock_fd is None: ++ return ++ if sock_fd.fileno() == -1: ++ return ++ if epoll_fd is not None: ++ epoll_fd.unregister(sock_fd.fileno()) ++ epoll_fd.close() ++ sock_fd.close() ++ ++ ++def create_sock_conn(sock_file, sock_type): ++ sock_fd, epoll_fd = (None, None) ++ try: ++ sock_fd = socket.socket(socket.AF_UNIX, sock_type) ++ sock_fd.bind(sock_file) ++ ++ if sock_type == socket.SOCK_STREAM: ++ sock_fd.listen(ALARM_LISTEN_QUEUE_LEN) ++ sock_fd.setblocking(False) ++ ++ epoll_fd = select.epoll() ++ epoll_fd.register(sock_fd.fileno(), select.EPOLLIN) ++ os.chmod(sock_file, SOCKET_FILE_PERMISSON) ++ logging.info("socket file %s has been created", sock_file) ++ return sock_fd, epoll_fd ++ except socket.error as e: ++ logging.error("failed to bind %s socket, reason is %s", sock_file, str(e)) ++ clear_sock_conn(sock_fd, epoll_fd) ++ ++ return sock_fd, epoll_fd ++ ++def recover_sock_path_and_permission(): ++ # if directory not exists or permission denied, remake + if not os.path.exists(ALARM_DIR): ++ logging.info("xalarmd run dir not exists, create %s", ALARM_DIR) + os.mkdir(ALARM_DIR) ++ if not check_permission(ALARM_DIR, ALARM_DIR_PERMISSION): ++ logging.info("xalarmd run dir %s permission set not properly, recover as default permission", ALARM_DIR) + os.chmod(ALARM_DIR, ALARM_DIR_PERMISSION) +- if os.path.exists(SOCK_FILE): +- os.unlink(SOCK_FILE) +- if os.path.exists(USER_RECV_SOCK): +- os.unlink(USER_RECV_SOCK) ++ if os.path.exists(SOCK_FILE) and not check_permission(SOCK_FILE, SOCKET_FILE_PERMISSON): ++ logging.info("socket file %s permission %s set not properly, recover as default permission", ++ SOCK_FILE, oct(os.stat(SOCK_FILE).st_mode & PERMISION_MASK)) ++ os.chmod(SOCK_FILE, SOCKET_FILE_PERMISSON) ++ if os.path.exists(USER_RECV_SOCK) and not check_permission(USER_RECV_SOCK, SOCKET_FILE_PERMISSON): ++ logging.info("socket file %s permission %s set not properly, recover as default permission", ++ USER_RECV_SOCK, oct(os.stat(USER_RECV_SOCK).st_mode & PERMISION_MASK)) ++ os.chmod(USER_RECV_SOCK, SOCKET_FILE_PERMISSON) + ++def peroid_task_to_cleanup_connections(): ++ global alarm_sock ++ global alarm_epoll ++ global fd_to_socket ++ global conn_thread_should_stop ++ global fd_to_socket_lock ++ logging.info("cleanup thread is running") + +-def server_loop(alarm_config): +- """alarm daemon process loop +- """ +- logging.info("server loop waiting for messages") +- clear_sock_path() ++ while True: ++ sleep(PEROID_SCANN_TIME) ++ # if conn thread stopped, cleanup thread should not cleanup anymore ++ if conn_thread_should_stop.is_set(): ++ continue ++ cleanup_closed_connections(alarm_sock, alarm_epoll, fd_to_socket, fd_to_socket_lock) + +- sock = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) +- sock.bind(SOCK_FILE) +- os.chmod(SOCK_FILE, SOCKET_FILE_PERMISSON) ++def watch_socket_file_and_dir(): ++ global conn_thread ++ global alarm_epoll ++ global report_epoll ++ global conn_thread_should_stop ++ global report_sock ++ global alarm_sock ++ global fd_to_socket ++ global fd_to_socket_lock ++ while True: ++ try: ++ recover_sock_path_and_permission() ++ if not check_socket_file(SOCK_FILE): ++ logging.info("socket file %s not found or socket file been replaced, recovering ...", SOCK_FILE) ++ clear_sock_conn(report_sock, report_epoll) ++ clear_sock_file(SOCK_FILE) ++ # if create socket failed, will retry to create because socket file was cleared in last step ++ report_sock, report_epoll = create_sock_conn(SOCK_FILE, socket.SOCK_DGRAM) ++ ++ if not check_socket_file(USER_RECV_SOCK): ++ logging.info("socket file %s not found or socket file been replaced, recovering ...", USER_RECV_SOCK) ++ # set conn_thread_should_stop as True ++ conn_thread_should_stop.set() ++ # Ensure that conn_thread has been stopped before clean and release resources ++ conn_thread.join() + +- alarm_sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) +- alarm_sock.bind(USER_RECV_SOCK) +- os.chmod(USER_RECV_SOCK, SOCKET_FILE_PERMISSON) +- alarm_sock.listen(ALARM_LISTEN_QUEUE_LEN) +- alarm_sock.setblocking(False) ++ # Now only transmit_alarm will use this lock ++ # Ensure fd_to_socket dict resource has been released ++ with fd_to_socket_lock: ++ for stored_sock_fd, stored_sock in fd_to_socket.items(): ++ if stored_sock is None: ++ continue ++ if alarm_sock is not None and (stored_sock.fileno() != alarm_sock.fileno()): ++ stored_sock.close() ++ clear_sock_conn(alarm_sock, alarm_epoll) ++ clear_sock_file(USER_RECV_SOCK) ++ ++ alarm_sock, alarm_epoll = create_sock_conn(USER_RECV_SOCK, socket.SOCK_STREAM) ++ fd_to_socket = {alarm_sock.fileno(): alarm_sock,} + +- epoll = select.epoll() +- epoll.register(alarm_sock.fileno(), select.EPOLLIN) +- fd_to_socket = {alarm_sock.fileno(): alarm_sock,} +- thread_should_stop = False ++ # set conn_thread_should_stop as False ++ conn_thread_should_stop.clear() ++ conn_thread = start_wait_for_conn_thread( ++ alarm_sock, ++ alarm_epoll, ++ fd_to_socket, ++ conn_thread_should_stop, ++ fd_to_socket_lock ++ ) ++ except Exception as e: ++ logging.error("Error watch socket file thread: %s", str(e)) ++ ++ sleep(PEROID_CHECK_TIME) + +- conn_thread = threading.Thread(target=wait_for_connection, args=(alarm_sock, epoll, fd_to_socket, thread_should_stop)) ++ ++def start_wait_for_conn_thread(alarm_sock_, alarm_epoll_, ++ fd_to_socket_, conn_thread_should_stop_, fd_to_socket_lock_): ++ conn_thread = threading.Thread( ++ target=wait_for_connection, ++ args=( ++ alarm_sock_, ++ alarm_epoll_, ++ fd_to_socket_, ++ conn_thread_should_stop_, ++ fd_to_socket_lock_) ++ ) + conn_thread.daemon = True + conn_thread.start() ++ return conn_thread ++ ++ ++def server_loop(alarm_config): ++ """alarm daemon process loop ++ """ ++ logging.info("server loop waiting for messages") ++ clear_sock_file(SOCK_FILE) ++ clear_sock_file(USER_RECV_SOCK) ++ recover_sock_path_and_permission() ++ global report_sock ++ global alarm_sock ++ global alarm_epoll ++ global report_epoll ++ global fd_to_socket ++ global conn_thread_should_stop ++ global conn_thread ++ global fd_to_socket_lock ++ report_sock, report_epoll = create_sock_conn(SOCK_FILE, socket.SOCK_DGRAM) ++ alarm_sock, alarm_epoll = create_sock_conn(USER_RECV_SOCK, socket.SOCK_STREAM) ++ fd_to_socket = {alarm_sock.fileno(): alarm_sock,} ++ ++ conn_thread_should_stop = threading.Event() ++ conn_thread = start_wait_for_conn_thread( ++ alarm_sock, ++ alarm_epoll, ++ fd_to_socket, ++ conn_thread_should_stop, ++ fd_to_socket_lock ++ ) + +- cleanup_thread = threading.Thread(target=peroid_task_to_cleanup_connections, args=(alarm_sock, epoll, fd_to_socket, thread_should_stop)) ++ cleanup_thread = threading.Thread(target=peroid_task_to_cleanup_connections) + cleanup_thread.daemon = True + cleanup_thread.start() + ++ watch_thread = threading.Thread(target=watch_socket_file_and_dir) ++ watch_thread.daemon = True ++ watch_thread.start() ++ + while True: + try: +- data, _ = sock.recvfrom(ALARM_REPORT_LEN) ++ # set timeout as 1 seconds to avoid main process blocked by recvfrom ++ # which will cause socket cannot be rebuild ++ events = report_epoll.poll(1.0) ++ data = None ++ for fileno, event in events: ++ if fileno == report_sock.fileno(): ++ data, _ = report_sock.recvfrom(ALARM_REPORT_LEN) ++ + if not data: + continue + if len(data) != ALARM_REPORT_LEN: +@@ -93,19 +258,29 @@ def server_loop(alarm_config): + logging.info("server recieve report msg, %s", alarm_str) + if not check_filter(alarm_info, alarm_config): + continue +- transmit_alarm(alarm_sock, epoll, fd_to_socket, data, alarm_str) ++ transmit_alarm( ++ alarm_sock, ++ alarm_epoll, ++ fd_to_socket, ++ data, ++ alarm_str, ++ fd_to_socket_lock ++ ) + except Exception as e: + logging.error(f"Error server:{e}") + +- thread_should_stop = True ++ conn_thread_should_stop.set() + conn_thread.join() + cleanup_thread.join() ++ watch_thread.join() + +- epoll.unregister(alarm_sock.fileno()) +- epoll.close() ++ alarm_epoll.unregister(alarm_sock.fileno()) ++ alarm_epoll.close() + alarm_sock.close() + os.unlink(USER_RECV_SOCK) + +- sock.close() ++ report_sock.close() ++ ++ + + +diff --git a/src/services/xalarm/xalarm_transfer.py b/src/services/xalarm/xalarm_transfer.py +index ccf16f9..d52a61f 100644 +--- a/src/services/xalarm/xalarm_transfer.py ++++ b/src/services/xalarm/xalarm_transfer.py +@@ -17,14 +17,14 @@ Create: 2023-11-02 + import socket + import logging + import threading ++import errno + from time import sleep + + MIN_ID_NUMBER = 1001 + MAX_ID_NUMBER = 1128 + MAX_CONNECTION_NUM = 100 + TEST_CONNECT_BUFFER_SIZE = 32 +-PEROID_SCANN_TIME = 60 +-LOCK = threading.Lock() ++MAX_RETRY_TIMES = 3 + + + def check_filter(alarm_info, alarm_filter): +@@ -40,7 +40,7 @@ def check_filter(alarm_info, alarm_filter): + return True + + +-def cleanup_closed_connections(server_sock, epoll, fd_to_socket): ++def cleanup_closed_connections(server_sock, epoll, fd_to_socket, fd_to_socket_lock): + """ + clean invalid client socket connections saved in 'fd_to_socket' + :param server_sock: server socket instance of alarm +@@ -48,7 +48,7 @@ def cleanup_closed_connections(server_sock, epoll, fd_to_socket): + :param fd_to_socket: dict instance, used to hold client connections and server connections + """ + to_remove = [] +- with LOCK: ++ with fd_to_socket_lock: + for fileno, connection in fd_to_socket.items(): + if connection is server_sock: + continue +@@ -69,46 +69,40 @@ def cleanup_closed_connections(server_sock, epoll, fd_to_socket): + logging.info(f"cleaned up connection {fileno} for client lost connection.") + + +-def peroid_task_to_cleanup_connections(server_sock, epoll, fd_to_socket, thread_should_stop): +- while not thread_should_stop: +- sleep(PEROID_SCANN_TIME) +- cleanup_closed_connections(server_sock, epoll, fd_to_socket) +- +- +-def wait_for_connection(server_sock, epoll, fd_to_socket, thread_should_stop): ++def wait_for_connection(server_sock, epoll, fd_to_socket, conn_thread_should_stop, fd_to_socket_lock): + """ + thread function for catch and save client connection + :param server_sock: server socket instance of alarm + :param epoll: epoll instance, used to unregister invalid client connections + :param fd_to_socket: dict instance, used to hold client connections and server connections +- :param thread_should_stop: bool instance ++ :param conn_thread_should_stop: bool instance + """ +- while not thread_should_stop: ++ logging.info("wait for connection thread is running") ++ while not conn_thread_should_stop.is_set(): + try: + events = epoll.poll(1) +- ++ + for fileno, event in events: + if fileno == server_sock.fileno(): + connection, client_address = server_sock.accept() + # if reach max connection, cleanup closed connections + if len(fd_to_socket) - 1 >= MAX_CONNECTION_NUM: +- cleanup_closed_connections(server_sock, epoll, fd_to_socket) ++ cleanup_closed_connections(server_sock, epoll, fd_to_socket, fd_to_socket_lock) + # if connections still reach max num, close this connection automatically + if len(fd_to_socket) - 1 >= MAX_CONNECTION_NUM: + logging.info(f"connection reach max num of {MAX_CONNECTION_NUM}, closed current connection!") + connection.close() + continue +- with LOCK: ++ with fd_to_socket_lock: + fd_to_socket[connection.fileno()] = connection + logging.info("connection fd %d registered event.", connection.fileno()) + except socket.error as e: + logging.debug(f"socket error, reason is {e}") +- break + except (KeyError, OSError, ValueError) as e: + logging.debug(f"wait for connection failed {e}") + + +-def transmit_alarm(server_sock, epoll, fd_to_socket, bin_data, alarm_str): ++def transmit_alarm(server_sock, epoll, fd_to_socket, bin_data, alarm_str, fd_to_socket_lock): + """ + this function is to broadcast alarm data to client, if fail to send data, remove connections held by fd_to_socket + :param server_sock: server socket instance of alarm +@@ -117,8 +111,9 @@ def transmit_alarm(server_sock, epoll, fd_to_socket, bin_data, alarm_str): + :param bin_data: binary instance, alarm info data in C-style struct format defined in xalarm_api.py + """ + to_remove = [] ++ to_retry = [] + +- with LOCK: ++ with fd_to_socket_lock: + for fileno, connection in fd_to_socket.items(): + if connection is not server_sock: + try: +@@ -127,13 +122,29 @@ def transmit_alarm(server_sock, epoll, fd_to_socket, bin_data, alarm_str): + fileno, alarm_str) + except (BrokenPipeError, ConnectionResetError): + to_remove.append(fileno) ++ except socket.error as e: ++ if e.errno == errno.EAGAIN: ++ to_retry.append(connection) ++ else: ++ logging.info("Sending msg failed, fd is %d, alarm msg is %s, reason is: %s", ++ fileno, alarm_str, str(e)) + except Exception as e: + logging.info("Sending msg failed, fd is %d, alarm msg is %s, reason is: %s", + fileno, alarm_str, str(e)) + ++ for connection in to_retry: ++ for i in range(MAX_RETRY_TIMES): ++ try: ++ connection.sendall(bin_data) ++ break ++ except Exception as e: ++ sleep(0.1) ++ logging.info("Sending msg failed for %d times, fd is %d, alarm msg is %s, reason is: %s", ++ i, connection.fileno(), alarm_str, str(e)) + + for fileno in to_remove: + fd_to_socket[fileno].close() + del fd_to_socket[fileno] + logging.info(f"cleaned up connection {fileno} for client lost connection.") + ++ +-- +2.27.0 + diff --git a/add-oom-event-report.patch b/add-oom-event-report.patch new file mode 100644 index 0000000000000000000000000000000000000000..96b554c127b57de10ca9dd451a38c80e3ff763a6 --- /dev/null +++ b/add-oom-event-report.patch @@ -0,0 +1,208 @@ +From 1e8f5bfc2a994ff8f544bbc6fd4ef4b04134ab51 Mon Sep 17 00:00:00 2001 +From: luckky +Date: Mon, 3 Mar 2025 03:09:57 +0000 +Subject: [PATCH] add oom event report + +Signed-off-by: luckky +--- + config/service/sysSentry.service | 2 + + src/libs/libxalarm/register_xalarm.h | 2 + + src/sentryPlugins/sentry_msg_monitor/Makefile | 2 +- + .../sentry_msg_monitor/sentry_msg_monitor.c | 63 +++++++++++++++---- + .../sentry_msg_monitor/smh_common_type.h | 12 +++- + 5 files changed, 66 insertions(+), 15 deletions(-) + +diff --git a/config/service/sysSentry.service b/config/service/sysSentry.service +index 1d8338f..7b3f59e 100644 +--- a/config/service/sysSentry.service ++++ b/config/service/sysSentry.service +@@ -1,5 +1,7 @@ + [Unit] + Description=EulerOS System Inspection Frame ++Requires=xalarmd.service ++After=xalarmd.service + + [Service] + ExecStart=/usr/bin/python3 /usr/bin/syssentry +diff --git a/src/libs/libxalarm/register_xalarm.h b/src/libs/libxalarm/register_xalarm.h +index 7a485ff..263fff5 100644 +--- a/src/libs/libxalarm/register_xalarm.h ++++ b/src/libs/libxalarm/register_xalarm.h +@@ -21,6 +21,8 @@ + + #define ALARM_REBOOT_EVENT 1003 + #define ALARM_REBOOT_ACK_EVENT 1004 ++#define ALARM_OOM_EVENT 1005 ++#define ALARM_OOM_ACK_EVENT 1006 + + #define MINOR_ALM 1 + #define MAJOR_ALM 2 +diff --git a/src/sentryPlugins/sentry_msg_monitor/Makefile b/src/sentryPlugins/sentry_msg_monitor/Makefile +index 77f4b21..dbc2a57 100644 +--- a/src/sentryPlugins/sentry_msg_monitor/Makefile ++++ b/src/sentryPlugins/sentry_msg_monitor/Makefile +@@ -1,7 +1,7 @@ + all: sentry_msg_monitor + + sentry_msg_monitor: +- gcc sentry_msg_monitor.c -Wall -o3 -I../../libsentry/c/log -L../../libsentry/c/log/build -I../../libs/libxalarm -L../../libs/build/libxalarm/ -lxalarm -lsentry_log -ljson-c -pthread -o sentry_msg_monitor ++ gcc sentry_msg_monitor.c -Wall -o3 -I../../libsentry/c/log -L../../libsentry/c/log/build -I../../libs/libxalarm -L../../libs/build/libxalarm -lxalarm -lsentry_log -ljson-c -pthread -o sentry_msg_monitor + + clean: + rm -f sentry_msg_monitor +diff --git a/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c b/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c +index 514d04d..a307a96 100644 +--- a/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c ++++ b/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c +@@ -16,7 +16,7 @@ + #define SMH_DEV_PATH "/dev/sentry_msg_helper" + #define PID_FILE_PATH "/var/run/"TOOL_NAME".pid" + #define ID_LIST_LENGTH SMH_MESSAGE_MAX +-#define MSG_STR_MAX_LEN 128 ++#define MSG_STR_MAX_LEN 1024 + #define DEFAULT_LOG_LEVEL LOG_INFO + #define MAX_RETRY_NUM 3 + #define RETRY_PERIOD 1 +@@ -93,12 +93,43 @@ static int smh_dev_get_fd(void) + + static int convert_smh_msg_to_str(struct sentry_msg_helper_msg* smh_msg, char* str) + { +- char msgid_str[32]; +- snprintf(msgid_str, sizeof(msgid_str), "%lu", smh_msg->msgid); +- +- int res = snprintf(str, MSG_STR_MAX_LEN, "%s", msgid_str); +- if ((size_t)res >= MSG_STR_MAX_LEN) { +- logging_warn("msg str size exceeds the max value\n"); ++ int res; ++ char nid_str[MSG_STR_MAX_LEN]; ++ size_t offset = 0; ++ switch (smh_msg->type) { ++ case SMH_MESSAGE_POWER_OFF: ++ res = snprintf(str, MSG_STR_MAX_LEN, "%lu", smh_msg->msgid); ++ if ((size_t)res >= MSG_STR_MAX_LEN) { ++ logging_warn("msg str size exceeds the max value\n"); ++ return -1; ++ } ++ break; ++ case SMH_MESSAGE_OOM: ++ for (int i = 0; i < MAX_NUMA_NODES; i++) { ++ res = snprintf(nid_str + offset, MSG_STR_MAX_LEN - offset, "%d%s", ++ smh_msg->oom_info.nid[i], (i < MAX_NUMA_NODES - 1) ? "," : ""); ++ if ((size_t)res >= MSG_STR_MAX_LEN) { ++ logging_warn("msg str size exceeds the max value\n"); ++ return -1; ++ } ++ offset += res; ++ } ++ res = snprintf(str, MSG_STR_MAX_LEN, ++ "%lu_{nr_nid:%d,nid:[%s],sync:%d,timeout:%d,reason:%d}", ++ smh_msg->msgid, ++ smh_msg->oom_info.nr_nid, ++ nid_str, ++ smh_msg->oom_info.sync, ++ smh_msg->oom_info.timeout, ++ smh_msg->oom_info.reason ++ ); ++ if ((size_t)res >= MSG_STR_MAX_LEN) { ++ logging_warn("msg str size exceeds the max value\n"); ++ return -1; ++ } ++ break; ++ default: ++ logging_warn("Unknown msg type: %d\n", smh_msg->type); + return -1; + } + return 0; +@@ -106,26 +137,31 @@ static int convert_smh_msg_to_str(struct sentry_msg_helper_msg* smh_msg, char* s + + static int convert_str_to_smh_msg(char* str, struct sentry_msg_helper_msg* smh_msg) + { +- if (!(sscanf(str, "%lu_%d", &(smh_msg->msgid), &(smh_msg->res)) == XALARM_MSG_ITEM_CNT)) { ++ int n; ++ if (!(sscanf(str, "%lu_%lu%n", &(smh_msg->msgid), &(smh_msg->res), &n) == XALARM_MSG_ITEM_CNT) ++ || strlen(str) != n) { + logging_warn("Invalid msg str format, str is %s\n", str); + return -1; + } + return 0; + } + +-static unsigned short get_xalarm_us_alarm_id(enum sentry_msg_helper_msg_type msg_type) ++static unsigned short convert_msg_type_to_xalarm_type(enum sentry_msg_helper_msg_type msg_type) + { +- unsigned short alarm_id = 0; ++ unsigned short xalarm_type = 0; + switch (msg_type) + { + case SMH_MESSAGE_POWER_OFF: +- alarm_id = ALARM_REBOOT_EVENT; ++ xalarm_type = ALARM_REBOOT_EVENT; ++ break; ++ case SMH_MESSAGE_OOM: ++ xalarm_type = ALARM_OOM_EVENT; + break; + default: + logging_warn("Unknown msg type: %d\n", msg_type); + break; + } +- return alarm_id; ++ return xalarm_type; + } + + static void sender_cleanup(void* arg) +@@ -171,7 +207,7 @@ static void* sender_thread(void* arg) { + if (ret < 0) { + continue; + } +- unsigned short al_type = get_xalarm_us_alarm_id(smh_msg.type); ++ unsigned short al_type = convert_msg_type_to_xalarm_type(smh_msg.type); + if (al_type == 0) { + logging_warn("Send msg to xalarmd failed: Get unknown type msg, skip it\n"); + continue; +@@ -246,6 +282,7 @@ re_register: + .len = ID_LIST_LENGTH + }; + id_filter.id_list[0] = ALARM_REBOOT_ACK_EVENT; ++ id_filter.id_list[1] = ALARM_OOM_ACK_EVENT; + + for (int i = 0; i < MAX_RETRY_NUM; i++) { + ret = xalarm_register_event(®ister_info, id_filter); +diff --git a/src/sentryPlugins/sentry_msg_monitor/smh_common_type.h b/src/sentryPlugins/sentry_msg_monitor/smh_common_type.h +index 8069fb8..bd01556 100644 +--- a/src/sentryPlugins/sentry_msg_monitor/smh_common_type.h ++++ b/src/sentryPlugins/sentry_msg_monitor/smh_common_type.h +@@ -4,6 +4,7 @@ + #include + + #define SMH_TYPE ('}') ++#define MAX_NUMA_NODES 8 + + enum { + SMH_CMD_MSG_ACK = 0x10, +@@ -13,13 +14,22 @@ enum { + + enum sentry_msg_helper_msg_type { + SMH_MESSAGE_POWER_OFF, ++ SMH_MESSAGE_OOM, + SMH_MESSAGE_MAX, + }; + + struct sentry_msg_helper_msg { + enum sentry_msg_helper_msg_type type; + uint64_t msgid; +- int res; ++ // reboot_info is empty ++ struct { ++ int nr_nid; ++ int nid[MAX_NUMA_NODES]; ++ int sync; ++ int timeout; ++ int reason; ++ } oom_info; ++ unsigned long res; + }; + + #endif +-- +2.43.0 + diff --git a/add-sentry-msg-monitor.patch b/add-sentry-msg-monitor.patch new file mode 100644 index 0000000000000000000000000000000000000000..a673ca3e31ce8ccac1ee1ac61c7360e1d021f35f --- /dev/null +++ b/add-sentry-msg-monitor.patch @@ -0,0 +1,519 @@ +From c2909d6a596770cebb4245d6feb2595b0681938f Mon Sep 17 00:00:00 2001 +From: luckky +Date: Sun, 16 Feb 2025 20:07:17 +0800 +Subject: [PATCH] add sentry msg monitor + +--- + Makefile | 17 +- + config/env/sentry_msg_monitor.env | 1 + + config/tasks/sentry_msg_monitor.mod | 9 + + src/sentryPlugins/sentry_msg_monitor/Makefile | 9 + + .../sentry_msg_monitor/sentry_msg_monitor.c | 358 ++++++++++++++++++ + .../sentry_msg_monitor/smh_common_type.h | 25 ++ + 6 files changed, 417 insertions(+), 2 deletions(-) + create mode 100644 config/env/sentry_msg_monitor.env + create mode 100644 config/tasks/sentry_msg_monitor.mod + create mode 100644 src/sentryPlugins/sentry_msg_monitor/Makefile + create mode 100644 src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c + create mode 100644 src/sentryPlugins/sentry_msg_monitor/smh_common_type.h + +diff --git a/Makefile b/Makefile +index 330a7d4..8f6ecff 100644 +--- a/Makefile ++++ b/Makefile +@@ -27,7 +27,7 @@ PYTHON_VERSION := $(shell $(PYBIN) --version 2>&1 | awk '{print $$2}' | cut -d ' + PKGVER := syssentry-$(VERSION)-py$(PYTHON_VERSION) + PKGVEREGG := syssentry-$(VERSION)-py$(PYTHON_VERSION).egg-info + +-all: lib ebpf hbm_online_repair bmc_block_io soc_ring_sentry ++all: lib ebpf hbm_online_repair bmc_block_io soc_ring_sentry sentry_msg_monitor + + lib:libxalarm log + +@@ -52,6 +52,9 @@ bmc_block_io: lib + soc_ring_sentry: lib + cd $(CURSRCDIR)/sentryPlugins/soc_ring_sentry/ && make + ++sentry_msg_monitor: lib ++ cd $(CURSRCDIR)/sentryPlugins/sentry_msg_monitor/ && make ++ + install: all dirs isentry + + dirs: +@@ -138,6 +141,11 @@ isentry: + install -m 600 $(CURCONFIGDIR)/plugins/bmc_block_io.ini $(ETCDIR)/sysSentry/plugins/ + install -m 600 $(CURCONFIGDIR)/tasks/bmc_block_io.mod $(ETCDIR)/sysSentry/tasks/ + ++ # sentry_msg_monitor ++ install -m 550 $(CURSRCDIR)/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor $(BINDIR) ++ install -m 600 $(CURCONFIGDIR)/env/sentry_msg_monitor.env $(ETCDIR)/sysconfig/ ++ install -m 600 $(CURCONFIGDIR)/tasks/sentry_msg_monitor.mod $(ETCDIR)/sysSentry/tasks/ ++ + # pysentry_notify + install -m 550 src/libsentry/python/pySentryNotify/sentry_notify.py $(PYDIR)/xalarm + +@@ -171,7 +179,10 @@ bmc_clean: + srs_clean: + cd $(CURSRCDIR)/sentryPlugins/soc_ring_sentry && make clean + +-clean: ebpf_clean hbm_clean bmc_clean srs_clean ++smm_clean: ++ cd $(CURSRCDIR)/sentryPlugins/sentry_msg_monitor && make clean ++ ++clean: ebpf_clean hbm_clean bmc_clean srs_clean smm_clean + rm -rf $(CURLIBDIR)/build + rm -rf $(CURSRCDIR)/build + rm -rf $(CURSRCDIR)/libsentry/c/log/build +@@ -187,6 +198,7 @@ uninstall: + rm -rf $(BINDIR)/soc_ring_sentry + rm -rf $(BINDIR)/bmc_block_io + rm -rf $(BINDIR)/ebpf_collector ++ rm -rf $(BINDIR)/sentry_msg_monitor + rm -rf $(LIBINSTALLDIR)/libxalarm.so + rm -rf $(INCLUDEDIR)/xalarm + rm -rf $(LIBINSTALLDIR)/libsentry_log.so +@@ -194,6 +206,7 @@ uninstall: + rm -rf $(ETCDIR)/sysSentry + rm -rf $(ETCDIR)/hbm_online_repair.env + rm -rf $(ETCDIR)/soc_ring_sentry.env ++ rm -rf $(ETCDIR)/sentry_msg_monitor.env + rm -rf $(LOGSAVEDIR)/sysSentry + rm -rf $(PYDIR)/syssentry + rm -rf $(PYDIR)/xalarm +diff --git a/config/env/sentry_msg_monitor.env b/config/env/sentry_msg_monitor.env +new file mode 100644 +index 0000000..013ad2c +--- /dev/null ++++ b/config/env/sentry_msg_monitor.env +@@ -0,0 +1 @@ ++LOG_LEVEL=info +\ No newline at end of file +diff --git a/config/tasks/sentry_msg_monitor.mod b/config/tasks/sentry_msg_monitor.mod +new file mode 100644 +index 0000000..c7dabce +--- /dev/null ++++ b/config/tasks/sentry_msg_monitor.mod +@@ -0,0 +1,9 @@ ++[common] ++enabled=yes ++task_start=/usr/bin/sentry_msg_monitor ++task_stop=kill $pid ++type=period ++interval=10 ++onstart=yes ++env_file=/etc/sysconfig/sentry_msg_monitor.env ++conflict=up +\ No newline at end of file +diff --git a/src/sentryPlugins/sentry_msg_monitor/Makefile b/src/sentryPlugins/sentry_msg_monitor/Makefile +new file mode 100644 +index 0000000..77f4b21 +--- /dev/null ++++ b/src/sentryPlugins/sentry_msg_monitor/Makefile +@@ -0,0 +1,9 @@ ++all: sentry_msg_monitor ++ ++sentry_msg_monitor: ++ gcc sentry_msg_monitor.c -Wall -o3 -I../../libsentry/c/log -L../../libsentry/c/log/build -I../../libs/libxalarm -L../../libs/build/libxalarm/ -lxalarm -lsentry_log -ljson-c -pthread -o sentry_msg_monitor ++ ++clean: ++ rm -f sentry_msg_monitor ++ ++.PHONY: all clean +diff --git a/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c b/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c +new file mode 100644 +index 0000000..514d04d +--- /dev/null ++++ b/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c +@@ -0,0 +1,358 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "register_xalarm.h" ++#include "log_utils.h" ++#include "smh_common_type.h" ++ ++#define TOOL_NAME "sentry_msg_monitor" ++#define SMH_DEV_PATH "/dev/sentry_msg_helper" ++#define PID_FILE_PATH "/var/run/"TOOL_NAME".pid" ++#define ID_LIST_LENGTH SMH_MESSAGE_MAX ++#define MSG_STR_MAX_LEN 128 ++#define DEFAULT_LOG_LEVEL LOG_INFO ++#define MAX_RETRY_NUM 3 ++#define RETRY_PERIOD 1 ++#define XALARM_MSG_ITEM_CNT 2 // msgid_res ++struct receiver_cleanup_data { ++ struct alarm_msg *al_msg; ++ struct alarm_register* register_info; ++}; ++ ++static int handle_file_lock(int fd, bool lock) ++{ ++ int ret; ++ struct flock fl; ++ fl.l_type = lock ? F_WRLCK : F_UNLCK; ++ fl.l_whence = SEEK_SET; ++ fl.l_start = 0; ++ fl.l_len = 0; ++ ++ ret = fcntl(fd, F_SETLK, &fl); ++ if (ret < 0) { ++ logging_error("fcntl failed, error msg is %s\n", strerror(errno)); ++ } else { ++ logging_debug("fcntl success, lock ret code is %d\n", ret); ++ } ++ return ret; ++} ++ ++static int check_and_set_pid_file() ++{ ++ int ret, fd; ++ fd = open(PID_FILE_PATH, O_CREAT | O_RDWR, 0600); ++ if (fd < 0) { ++ logging_error("open file %s failed!\n", PID_FILE_PATH); ++ return -1; ++ } ++ ++ ret = handle_file_lock(fd, true); ++ if (ret < 0) { ++ logging_error("%s is already running\n", TOOL_NAME); ++ close(fd); ++ return ret; ++ } ++ ++ return fd; ++} ++ ++static int release_pid_file(int fd) ++{ ++ int ret; ++ ret = handle_file_lock(fd, false); ++ if (ret < 0) { ++ logging_error("release pid file %s lock failed, error msg is %s\n", PID_FILE_PATH, strerror(errno)); ++ return ret; ++ } ++ ++ close(fd); ++ ret = remove(PID_FILE_PATH); ++ if (ret < 0) { ++ logging_error("remove %s failed, error msg is %s\n", PID_FILE_PATH, strerror(errno)); ++ } ++ return ret; ++} ++ ++static int smh_dev_get_fd(void) ++{ ++ int smh_dev_fd; ++ smh_dev_fd = open(SMH_DEV_PATH, O_RDWR); ++ if (smh_dev_fd < 0) { ++ logging_error("Failed to open smh_dev_fd for %s.\n", SMH_DEV_PATH); ++ } ++ ++ return smh_dev_fd; ++} ++ ++static int convert_smh_msg_to_str(struct sentry_msg_helper_msg* smh_msg, char* str) ++{ ++ char msgid_str[32]; ++ snprintf(msgid_str, sizeof(msgid_str), "%lu", smh_msg->msgid); ++ ++ int res = snprintf(str, MSG_STR_MAX_LEN, "%s", msgid_str); ++ if ((size_t)res >= MSG_STR_MAX_LEN) { ++ logging_warn("msg str size exceeds the max value\n"); ++ return -1; ++ } ++ return 0; ++} ++ ++static int convert_str_to_smh_msg(char* str, struct sentry_msg_helper_msg* smh_msg) ++{ ++ if (!(sscanf(str, "%lu_%d", &(smh_msg->msgid), &(smh_msg->res)) == XALARM_MSG_ITEM_CNT)) { ++ logging_warn("Invalid msg str format, str is %s\n", str); ++ return -1; ++ } ++ return 0; ++} ++ ++static unsigned short get_xalarm_us_alarm_id(enum sentry_msg_helper_msg_type msg_type) ++{ ++ unsigned short alarm_id = 0; ++ switch (msg_type) ++ { ++ case SMH_MESSAGE_POWER_OFF: ++ alarm_id = ALARM_REBOOT_EVENT; ++ break; ++ default: ++ logging_warn("Unknown msg type: %d\n", msg_type); ++ break; ++ } ++ return alarm_id; ++} ++ ++static void sender_cleanup(void* arg) ++{ ++ logging_debug("In sender thread cleanup\n"); ++ int fd = *(int *)arg; ++ if (fd > 0) { ++ close(fd); ++ } ++ logging_info("Sender thread cleanup over\n"); ++} ++ ++static void* sender_thread(void* arg) { ++ int ret; ++ int fd = smh_dev_get_fd(); ++ if (fd < 0) { ++ goto close_recv; ++ } ++ pthread_cleanup_push(sender_cleanup, &fd); ++ ++ pthread_t partner_t; ++ char str[MSG_STR_MAX_LEN]; ++ ++ while (1) { ++ struct sentry_msg_helper_msg smh_msg; ++ errno = 0; ++ ret = read(fd, &smh_msg, sizeof(struct sentry_msg_helper_msg)); ++ if (ret != sizeof(struct sentry_msg_helper_msg)) { ++ if (errno == ERESTART || errno == EFAULT) { ++ logging_warn("Read dev failed, return code (%d): try to read the next one msg from kernel\n", errno); ++ continue; ++ } else if (errno == EAGAIN) { ++ logging_warn("Read dev failed, return code (%d): kernel queue is full, try to read again\n", errno); ++ continue; ++ } else { ++ logging_error("Read dev failed, return code %d\n", errno); ++ goto sender_err; ++ } ++ } ++ logging_debug("Read dev success!\n"); ++ ++ ret = convert_smh_msg_to_str(&smh_msg, str); ++ if (ret < 0) { ++ continue; ++ } ++ unsigned short al_type = get_xalarm_us_alarm_id(smh_msg.type); ++ if (al_type == 0) { ++ logging_warn("Send msg to xalarmd failed: Get unknown type msg, skip it\n"); ++ continue; ++ } ++ for (int i = 0; i < MAX_RETRY_NUM; i++) { ++ ret = xalarm_report_event(al_type, str); ++ if (ret == 0) { ++ logging_info("Send msg success: al_type: %d, str: %s\n", al_type, str); ++ break; ++ } ++ if (ret == -EINVAL) { ++ logging_warn("Send msg to xalarmd failed: (%d) Invalid input value, skip it\n", ret); ++ break; ++ } else if (ret == -ENOTCONN || ret == -ECOMM) { ++ logging_warn("Send msg to xalarmd failed: (%d) Bad socket conn, try again\n", ret); ++ sleep(RETRY_PERIOD); ++ } else if (ret < 0) { ++ logging_warn("xalarm_report_event return %d\n", ret); ++ break; ++ } ++ } ++ if (ret == -ENOTCONN || ret == -ECOMM) { ++ logging_warn("Send msg to xalarmd failed: (%d) Bad socket conn, skip it\n", ret); ++ } ++ } ++ ++sender_err: ++ close(fd); ++close_recv: ++ partner_t = *(pthread_t*)arg; ++ if (partner_t) ++ pthread_cancel(partner_t); ++ logging_error("Sender thread exited unexpectedly\n"); ++ pthread_cleanup_pop(0); ++ return NULL; ++} ++ ++static void receiver_cleanup(void* arg) ++{ ++ logging_debug("In receiver thread cleanup\n"); ++ struct receiver_cleanup_data* rcd = (struct receiver_cleanup_data*) arg; ++ if (rcd->al_msg) { ++ free(rcd->al_msg); ++ } ++ if (rcd->register_info) { ++ xalarm_unregister_event(rcd->register_info); ++ } ++ logging_info("Receiver thread cleanup over\n"); ++} ++ ++static void* receiver_thread(void* arg) { ++ int ret, fd; ++ struct alarm_msg *al_msg; ++ struct sentry_msg_helper_msg smh_msg; ++ pthread_t partner_t; ++ struct alarm_register* register_info; ++ ++ fd = smh_dev_get_fd(); ++ if (fd < 0) { ++ goto close_send; ++ } ++ ++ al_msg = (struct alarm_msg*)malloc(sizeof(struct alarm_msg)); ++ if (!al_msg) { ++ logging_error("malloc al_msg failed!\n"); ++ goto receiver_err; ++ } ++ ++re_register: ++ register_info = NULL; ++ struct alarm_subscription_info id_filter = { ++ .len = ID_LIST_LENGTH ++ }; ++ id_filter.id_list[0] = ALARM_REBOOT_ACK_EVENT; ++ ++ for (int i = 0; i < MAX_RETRY_NUM; i++) { ++ ret = xalarm_register_event(®ister_info, id_filter); ++ if (ret == 0) ++ break; ++ if (ret == -ENOTCONN) { ++ logging_warn("Failed to register xalarm, try to re-register again\n"); ++ sleep(RETRY_PERIOD); ++ } else { ++ logging_error("xalarm_register_event return %d\n", ret); ++ goto receiver_err; ++ } ++ } ++ if (ret == -ENOTCONN) { ++ logging_error("Failed to register xalarm: (%d) bad connection\n", ret); ++ goto receiver_err; ++ } ++ ++ struct receiver_cleanup_data rcd = { ++ .al_msg = al_msg, ++ .register_info = register_info ++ }; ++ pthread_cleanup_push(receiver_cleanup, &rcd); ++ ++ while (1) { ++ ret = xalarm_get_event(al_msg, register_info); ++ if (ret == -ENOTCONN || ret == -ECONNRESET || ret == -EBADF) { ++ logging_warn("Failed to get msg: (%d) Xalarmd service exception, try to re-register\n", ret); ++ xalarm_unregister_event(register_info); ++ goto re_register; ++ } else if (ret < 0) { ++ logging_error("xalarm_get_event return %d\n", ret); ++ goto un_register; ++ } else { ++ logging_info("Get msg: al_type: %d, str: %s\n", al_msg->usAlarmId, al_msg->pucParas); ++ } ++ ++ ret = convert_str_to_smh_msg(al_msg->pucParas, &smh_msg); ++ if (ret < 0) { ++ logging_warn("Convert str failed: Bad format '%s', skip it\n", al_msg->pucParas); ++ continue; ++ } ++ for (int i = 0; i < MAX_RETRY_NUM; i++) { ++ errno = 0; ++ ret = ioctl(fd, SMH_MSG_ACK, &smh_msg); ++ if (ret == 0) ++ break; ++ if (errno == ERESTART || errno == ETIME || errno == ENOENT) { ++ logging_warn("Ack to kernel failed: ioctl return %d, skip it\n", errno); ++ break; ++ } else if (errno == EFAULT) { ++ logging_warn("Ack to kernel failed: (%d) Copy from user failed, try again\n", errno); ++ sleep(RETRY_PERIOD); ++ } else if (ret < 0) { ++ logging_error("Ack to kernel failed: ioctl return %d\n", errno); ++ goto un_register; ++ } ++ } ++ if (errno == EFAULT) { ++ logging_warn("Ack to kernel failed: (%d) Copy from user failed, skip it\n", errno); ++ } ++ } ++ ++un_register: ++ xalarm_unregister_event(register_info); ++receiver_err: ++ free(al_msg); ++ close(fd); ++close_send: ++ partner_t = *(pthread_t*)arg; ++ if (partner_t) ++ pthread_cancel(partner_t); ++ logging_error("Receiver thread exited unexpectedly\n"); ++ pthread_cleanup_pop(0); ++ return NULL; ++} ++ ++int main() ++{ ++ int ret, pid_fd; ++ pthread_t sender, receiver; ++ ++ pid_fd = check_and_set_pid_file(); ++ if (pid_fd < 0) { ++ return pid_fd; ++ } ++ ++ ret = pthread_create(&sender, NULL, sender_thread, &receiver); ++ if (ret) { ++ logging_error("Failed to create sender thread"); ++ goto err_release; ++ } ++ ret = pthread_create(&receiver, NULL, receiver_thread, &sender); ++ ++ if (ret) { ++ logging_error("Failed to create receiver thread"); ++ pthread_cancel(sender); ++ pthread_join(sender, NULL); ++ goto err_release; ++ } ++ ++ logging_info("sentry_msg_monitor start!\n"); ++ ++ pthread_join(sender, NULL); ++ pthread_join(receiver, NULL); ++ ++err_release: ++ release_pid_file(pid_fd); ++ return ret; ++} +diff --git a/src/sentryPlugins/sentry_msg_monitor/smh_common_type.h b/src/sentryPlugins/sentry_msg_monitor/smh_common_type.h +new file mode 100644 +index 0000000..8069fb8 +--- /dev/null ++++ b/src/sentryPlugins/sentry_msg_monitor/smh_common_type.h +@@ -0,0 +1,25 @@ ++#ifndef SMH_COMMON_TYPE_H ++#define SMH_COMMON_TYPE_H ++ ++#include ++ ++#define SMH_TYPE ('}') ++ ++enum { ++ SMH_CMD_MSG_ACK = 0x10, ++}; ++ ++#define SMH_MSG_ACK _IO(SMH_TYPE, SMH_CMD_MSG_ACK) ++ ++enum sentry_msg_helper_msg_type { ++ SMH_MESSAGE_POWER_OFF, ++ SMH_MESSAGE_MAX, ++}; ++ ++struct sentry_msg_helper_msg { ++ enum sentry_msg_helper_msg_type type; ++ uint64_t msgid; ++ int res; ++}; ++ ++#endif +-- +2.27.0 + diff --git a/build-sentry_msg_monitor-only-under-aarch64-architec.patch b/build-sentry_msg_monitor-only-under-aarch64-architec.patch new file mode 100644 index 0000000000000000000000000000000000000000..510565d2e5571caa43bc122e73d15eae74ff4e4f --- /dev/null +++ b/build-sentry_msg_monitor-only-under-aarch64-architec.patch @@ -0,0 +1,68 @@ +From e24be56f37a447f26107b1c16021cb139d5126fa Mon Sep 17 00:00:00 2001 +From: shixuantong +Date: Thu, 27 Nov 2025 20:59:39 +0800 +Subject: [PATCH] build sentry_msg_monitor only under aarch64 architecture + +--- + Makefile | 6 +++++- + src/sentryPlugins/sentry_msg_monitor/Makefile | 10 ++++++++++ + 2 files changed, 15 insertions(+), 1 deletion(-) + +diff --git a/Makefile b/Makefile +index c450999..ee95d4a 100644 +--- a/Makefile ++++ b/Makefile +@@ -26,6 +26,8 @@ PYDIR = $(DESTDIR)$(PREFIX)/lib/$(PYNAME)/site-packages + PYTHON_VERSION := $(shell $(PYBIN) --version 2>&1 | awk '{print $$2}' | cut -d '.' -f 1,2) + PKGVER := syssentry-$(VERSION)-py$(PYTHON_VERSION) + PKGVEREGG := syssentry-$(VERSION)-py$(PYTHON_VERSION).egg-info ++ ++ARCH := $(shell uname -m) + + all: lib ebpf hbm_online_repair bmc_block_io soc_ring_sentry sentry_msg_monitor + +@@ -150,11 +152,13 @@ isentry: + install -m 550 $(CURSRCDIR)/sentryPlugins/bmc_block_io/output/bmc_block_io $(BINDIR) + install -m 600 $(CURCONFIGDIR)/plugins/bmc_block_io.ini $(ETCDIR)/sysSentry/plugins/ + install -m 600 $(CURCONFIGDIR)/tasks/bmc_block_io.mod $(ETCDIR)/sysSentry/tasks/ +- ++ ++ifeq ($(ARCH), aarch64) + # sentry_msg_monitor + install -m 550 $(CURSRCDIR)/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor $(BINDIR) + install -m 600 $(CURCONFIGDIR)/env/sentry_msg_monitor.env $(ETCDIR)/sysconfig/ + install -m 600 $(CURCONFIGDIR)/tasks/sentry_msg_monitor.mod $(ETCDIR)/sysSentry/tasks/ ++endif + + # pysentry_notify + install -m 644 src/libsentry/python/pySentryNotify/sentry_notify.py $(PYDIR)/xalarm +diff --git a/src/sentryPlugins/sentry_msg_monitor/Makefile b/src/sentryPlugins/sentry_msg_monitor/Makefile +index a0a6de1..5025f54 100644 +--- a/src/sentryPlugins/sentry_msg_monitor/Makefile ++++ b/src/sentryPlugins/sentry_msg_monitor/Makefile +@@ -12,6 +12,15 @@ + # Author: Luckky + # Create: 2025-02-18 + ++ARCH := $(shell uname -m) ++ ++ifneq ($(ARCH), aarch64) ++UNSUPPORTED_TARGETS := all install clean distclean uninstall ++$(UNSUPPORTED_TARGETS): ++ @echo "sentry_msg_monitor only supports aarch64 architecture (current: $(ARCH))" ++ @exit 0 ++.DEFAULT_GOAL := all ++else + all: sentry_msg_monitor + + sentry_msg_monitor: +@@ -19,5 +28,6 @@ sentry_msg_monitor: + + clean: + rm -f sentry_msg_monitor ++endif + + .PHONY: all clean +-- +2.27.0 + diff --git a/fix-Out-of-memory-bounds-access-in-ebpf_collector.patch b/fix-Out-of-memory-bounds-access-in-ebpf_collector.patch new file mode 100644 index 0000000000000000000000000000000000000000..da3a90da66377ad260cf8becb8b21cbdcb401752 --- /dev/null +++ b/fix-Out-of-memory-bounds-access-in-ebpf_collector.patch @@ -0,0 +1,30 @@ +From 797f0760d3dc1e396547edffad887e273c5731c6 Mon Sep 17 00:00:00 2001 +From: shixuantong +Date: Tue, 25 Nov 2025 16:42:28 +0800 +Subject: [PATCH 14/16] fix Out of memory bounds access in ebpf_collector + +--- + .../sentryCollector/ebpf_collector/ebpf_collector.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/src/services/sentryCollector/ebpf_collector/ebpf_collector.c b/src/services/sentryCollector/ebpf_collector/ebpf_collector.c +index 445cce7..e000f0f 100644 +--- a/src/services/sentryCollector/ebpf_collector/ebpf_collector.c ++++ b/src/services/sentryCollector/ebpf_collector/ebpf_collector.c +@@ -240,7 +240,12 @@ char *read_config_value(const char *file, const char *section, const char *key) + char *value = NULL; + + while (fgets(line, sizeof(line), fp) != NULL) { +- line[strcspn(line, "\n")] = 0; ++ size_t newline_char_idx = strcspn(line, "\n"); ++ if (newline_char_idx == MAX_LINE_LENGTH) { ++ line[MAX_LINE_LENGTH - 1] = '\0'; ++ } else { ++ line[newline_char_idx] = '\0'; ++ } + + if (line[0] == '\0' || line[0] == ';' || line[0] == '#') { + continue; +-- +2.27.0 + diff --git a/fix-an-issue-with-printing-error.patch b/fix-an-issue-with-printing-error.patch new file mode 100644 index 0000000000000000000000000000000000000000..9877a51fe73c2f79765ba2ddacbb2ff676eab15a --- /dev/null +++ b/fix-an-issue-with-printing-error.patch @@ -0,0 +1,26 @@ +From 98852da140c536ef41f801f61430130d28eba3d2 Mon Sep 17 00:00:00 2001 +From: zhuofeng <1107893276@qq.com> +Date: Wed, 12 Mar 2025 02:48:26 +0000 +Subject: [PATCH] fix an issue with printing error + +Signed-off-by: zhuofeng <1107893276@qq.com> +--- + src/services/syssentry/global_values.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/services/syssentry/global_values.py b/src/services/syssentry/global_values.py +index a6da9e3..931b8ab 100644 +--- a/src/services/syssentry/global_values.py ++++ b/src/services/syssentry/global_values.py +@@ -116,7 +116,7 @@ class InspectTask: + try: + child = subprocess.Popen(cmd_list, stdout=logfile, stderr=subprocess.STDOUT, close_fds=True, env=self.environ_conf) + except OSError: +- logging.error("task %s start Popen error, invalid cmd") ++ logging.error("task %s start Popen error, invalid cmd", cmd_list) + self.result_info["result"] = ResultLevel.FAIL.name + self.result_info["error_msg"] = RESULT_LEVEL_ERR_MSG_DICT.get(ResultLevel.FAIL.name) + self.runtime_status = "FAILED" +-- +2.43.0 + diff --git a/fix-buffer-overflow-in-checkset_cpulist.patch b/fix-buffer-overflow-in-checkset_cpulist.patch new file mode 100644 index 0000000000000000000000000000000000000000..c7656ead7d653684ff16cd98b2ac19b09ef8a2cd --- /dev/null +++ b/fix-buffer-overflow-in-checkset_cpulist.patch @@ -0,0 +1,25 @@ +From 697f9079081afef834b19116f1286a92f6f2f730 Mon Sep 17 00:00:00 2001 +From: shixuantong +Date: Thu, 6 Nov 2025 10:25:14 +0800 +Subject: [PATCH 05/16] fix buffer overflow in checkset_cpulist() + +--- + src/sentryPlugins/cpu_sentry/catlib/cli_param_checker.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/sentryPlugins/cpu_sentry/catlib/cli_param_checker.c b/src/sentryPlugins/cpu_sentry/catlib/cli_param_checker.c +index 71edf17..6ea35a5 100644 +--- a/src/sentryPlugins/cpu_sentry/catlib/cli_param_checker.c ++++ b/src/sentryPlugins/cpu_sentry/catlib/cli_param_checker.c +@@ -29,7 +29,7 @@ void checkset_cpulist(char *getopt_optarg, catcli_request_body *p_request_body, + regcomp(®, CPULIST_REGEX, REG_EXTENDED); // 编译正则模式串 + const size_t nmatch = 1; // 定义匹配结果最大允许数 + regmatch_t pmatch[1]; // 定义匹配结果在待匹配串中的下标范围 +- char getopt_optarg_copy[strlen(getopt_optarg)]; ++ char getopt_optarg_copy[strlen(getopt_optarg) + 1]; + strcpy(getopt_optarg_copy,getopt_optarg); + int status = regexec(®, getopt_optarg_copy, nmatch, pmatch, 0); + regfree(®); // 释放正则表达式 +-- +2.27.0 + diff --git a/fix-error-code-for-socket-failed.patch b/fix-error-code-for-socket-failed.patch new file mode 100644 index 0000000000000000000000000000000000000000..f87e3da11a6580409a47596610c5cb2aa7b2b0a0 --- /dev/null +++ b/fix-error-code-for-socket-failed.patch @@ -0,0 +1,49 @@ +From 63b3b424cee48565ed2278757c03e17f3e83ae4e Mon Sep 17 00:00:00 2001 +From: shixuantong +Date: Thu, 6 Nov 2025 10:23:19 +0800 +Subject: [PATCH 02/16] fix error code for socket failed + +--- + src/libs/libxalarm/register_xalarm.c | 5 +++-- + src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c | 2 +- + 2 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/src/libs/libxalarm/register_xalarm.c b/src/libs/libxalarm/register_xalarm.c +index 4204fce..d689d0a 100644 +--- a/src/libs/libxalarm/register_xalarm.c ++++ b/src/libs/libxalarm/register_xalarm.c +@@ -363,7 +363,7 @@ int xalarm_Report(unsigned short usAlarmId, unsigned char ucAlarmLevel, + fd = socket(AF_UNIX, SOCK_DGRAM, 0); + if (fd < 0) { + fprintf(stderr, "%s socket create error: %s\n", __func__, strerror(errno)); +- return -1; ++ return -ENODEV; + } + + ret = init_report_addr(&alarm_addr, PATH_REPORT_ALARM); +@@ -779,7 +779,8 @@ int xalarm_report_event(unsigned short usAlarmId, char *pucParas) + + fd = socket(AF_UNIX, SOCK_DGRAM, 0); + if (fd < 0) { +- return -ENOTCONN; ++ fprintf(stderr, "%s socket create error: %s\n", __func__, strerror(errno)); ++ return -ENODEV; + } + + ret = init_report_addr(&alarm_addr, PATH_REPORT_ALARM); +diff --git a/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c b/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c +index e62c936..efab827 100644 +--- a/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c ++++ b/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c +@@ -235,7 +235,7 @@ static void* sender_thread(void* arg) { + if (ret == -EINVAL) { + logging_warn("Send msg to xalarmd failed: (%d) Invalid input value, skip it\n", ret); + break; +- } else if (ret == -ENOTCONN || ret == -ECOMM) { ++ } else if (ret == -ENOTCONN || ret == -ECOMM || ret == -ENODEV) { + logging_warn("Send msg to xalarmd failed: (%d) Bad socket conn, try again\n", ret); + sleep(RETRY_PERIOD); + } else if (ret < 0) { +-- +2.27.0 + diff --git a/fix-log_utils.patch b/fix-log_utils.patch new file mode 100644 index 0000000000000000000000000000000000000000..6df6038e561d7115545ce4271c2f0c2fd1a02697 --- /dev/null +++ b/fix-log_utils.patch @@ -0,0 +1,69 @@ +From ec2ba808a4ff94ff452799a0088851c3a7d79ce7 Mon Sep 17 00:00:00 2001 +From: shixuantong +Date: Thu, 6 Nov 2025 10:22:55 +0800 +Subject: [PATCH 01/16] fix log_utils + +--- + src/libsentry/c/log/CMakeLists.txt | 2 +- + src/libsentry/c/log/log_utils.c | 2 +- + src/libsentry/c/log/log_utils.h | 10 +++++----- + 3 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/src/libsentry/c/log/CMakeLists.txt b/src/libsentry/c/log/CMakeLists.txt +index 6488195..494ff0d 100644 +--- a/src/libsentry/c/log/CMakeLists.txt ++++ b/src/libsentry/c/log/CMakeLists.txt +@@ -1,7 +1,7 @@ + # Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. + # Description: cmake file for log_utils +-project(sentry_log) + cmake_minimum_required(VERSION 3.22) ++project(sentry_log) + add_library(sentry_log SHARED log_utils.c) + set_target_properties(sentry_log PROPERTIES LINK_FLAGS "-Wl,-z,relro -Wl,-z,now -Wl,-z,noexecstack -Wtrampolines") + set_target_properties(sentry_log PROPERTIES CMAKE_C_FLAGS "-shared -fPIC -fstack-protector-strong -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -g") +diff --git a/src/libsentry/c/log/log_utils.c b/src/libsentry/c/log/log_utils.c +index 935e6d6..0565104 100644 +--- a/src/libsentry/c/log/log_utils.c ++++ b/src/libsentry/c/log/log_utils.c +@@ -23,7 +23,6 @@ void logMessage(LogLevel level, char* file, int line, const char *format, ...) + + void setLogLevel() + { +- currentLogLevel = LOG_INFO; + char* levelStr = getenv(LOG_LEVEL_ENV); + if (levelStr == NULL) { + logMessage(LOG_WARN, __FILE__, __LINE__, "getenv('%s') is NULL, use default log level : %s\n", LOG_LEVEL_ENV, LOG_LEVEL_STRING(LOG_INFO)); +@@ -40,6 +39,7 @@ void setLogLevel() + currentLogLevel = LOG_DEBUG; + logMessage(LOG_INFO,__FILE__, __LINE__,"Set log level : %s\n", LOG_LEVEL_STRING(LOG_DEBUG)); + } else { ++ currentLogLevel = LOG_INFO; + logMessage(LOG_WARN, __FILE__, __LINE__, "unknown log level : %s, use default log level : %s\n", levelStr, LOG_LEVEL_STRING(LOG_INFO)); + } + } +diff --git a/src/libsentry/c/log/log_utils.h b/src/libsentry/c/log/log_utils.h +index 8a56520..2cd74e2 100644 +--- a/src/libsentry/c/log/log_utils.h ++++ b/src/libsentry/c/log/log_utils.h +@@ -8,12 +8,12 @@ + #ifndef _SYSSENTRY_LOG_H + #define _SYSSENTRY_LOG_H + +-#include "stdio.h" ++#include + #include +-#include "string.h" +-#include "stdarg.h" +-#include "time.h" +-#include "libgen.h" ++#include ++#include ++#include ++#include + + typedef enum { + LOG_DEBUG = 0, +-- +2.27.0 + diff --git a/fix-missing-pycache-file.patch b/fix-missing-pycache-file.patch new file mode 100644 index 0000000000000000000000000000000000000000..d4ad0dbbc50152eefcf96d2c12cfe0f9edb173b7 --- /dev/null +++ b/fix-missing-pycache-file.patch @@ -0,0 +1,73 @@ +From 85d23aa0f18bd79a54d247f39503a49a9e4fa772 Mon Sep 17 00:00:00 2001 +From: hubin +Date: Sat, 22 Mar 2025 16:41:55 +0800 +Subject: [PATCH 4/7] fix missing pycache file + +--- + Makefile | 21 +++++++++++++++------ + 1 file changed, 15 insertions(+), 6 deletions(-) + +diff --git a/Makefile b/Makefile +index 330a7d4..01ed310 100644 +--- a/Makefile ++++ b/Makefile +@@ -68,15 +68,17 @@ dirs: + + isentry: + cd $(CURSRCDIR) && $(PYBIN) setup.py install --prefix=$(PREFIX) -O1 --root=build --record=SENTRY_FILES +- find $(CURSRCDIR)/build/ -type d -name '__pycache__' -exec rm -rf {} + + + # sysSentry主包 + ## 创建目录 + install -d -m 700 $(LOGSAVEDIR)/sysSentry + install -d -m 700 $(VARLIB)/logrotate-syssentry + install -d -m 700 $(PYDIR)/syssentry ++ install -d -m 700 $(PYDIR)/syssentry/__pycache__ + install -d -m 755 $(PYDIR)/xalarm ++ install -d -m 700 $(PYDIR)/xalarm/__pycache__ + install -d -m 700 $(PYDIR)/sentryCollector ++ install -d -m 700 $(PYDIR)/sentryCollector/__pycache__ + install -d -m 700 $(PYDIR)/$(PKGVEREGG) + install -d -m 700 $(ETCDIR)/sysconfig + +@@ -95,9 +97,12 @@ isentry: + install -m 600 $(CURCONFIGDIR)/service/sentryCollector.service $(SYSTEMDDIR) + + ## 安装python源代码文件到相应的目录 +- install -m 550 src/build/usr/lib/$(PYNAME)/site-packages/services/syssentry/* $(PYDIR)/syssentry +- install -m 555 src/build/usr/lib/$(PYNAME)/site-packages/services/xalarm/* $(PYDIR)/xalarm +- install -m 550 src/build/usr/lib/$(PYNAME)/site-packages/services/sentryCollector/* $(PYDIR)/sentryCollector ++ install -m 550 src/build/usr/lib/$(PYNAME)/site-packages/services/syssentry/*.py $(PYDIR)/syssentry ++ install -m 550 src/build/usr/lib/$(PYNAME)/site-packages/services/syssentry/__pycache__/* $(PYDIR)/syssentry/__pycache__ ++ install -m 550 src/build/usr/lib/$(PYNAME)/site-packages/services/xalarm/*.py $(PYDIR)/xalarm ++ install -m 550 src/build/usr/lib/$(PYNAME)/site-packages/services/xalarm/__pycache__/* $(PYDIR)/xalarm/__pycache__ ++ install -m 550 src/build/usr/lib/$(PYNAME)/site-packages/services/sentryCollector/*.py $(PYDIR)/sentryCollector ++ install -m 550 src/build/usr/lib/$(PYNAME)/site-packages/services/sentryCollector/__pycache__/* $(PYDIR)/sentryCollector/__pycache__ + install -m 550 src/build/usr/lib/$(PYNAME)/site-packages/$(PKGVEREGG)/* $(PYDIR)/$(PKGVEREGG) + + ## 安装可执行文件 +@@ -111,15 +116,19 @@ isentry: + + # avg_block_io + install -d -m 700 $(PYDIR)/sentryPlugins/avg_block_io ++ install -d -m 700 $(PYDIR)/sentryPlugins/avg_block_io/__pycache__ + install -m 550 $(CURSRCDIR)/build/usr/bin/avg_block_io $(BINDIR) +- install -m 550 src/build/usr/lib/$(PYNAME)/site-packages/sentryPlugins/avg_block_io/* $(PYDIR)/sentryPlugins/avg_block_io ++ install -m 550 src/build/usr/lib/$(PYNAME)/site-packages/sentryPlugins/avg_block_io/*.py $(PYDIR)/sentryPlugins/avg_block_io ++ install -m 550 src/build/usr/lib/$(PYNAME)/site-packages/sentryPlugins/avg_block_io/__pycache__/* $(PYDIR)/sentryPlugins/avg_block_io/__pycache__ + install -m 600 $(CURCONFIGDIR)/plugins/avg_block_io.ini $(ETCDIR)/sysSentry/plugins/ + install -m 600 $(CURCONFIGDIR)/tasks/avg_block_io.mod $(ETCDIR)/sysSentry/tasks/ + + # ai_block_io + install -d -m 700 $(PYDIR)/sentryPlugins/ai_block_io ++ install -d -m 700 $(PYDIR)/sentryPlugins/ai_block_io/__pycache__ + install -m 550 $(CURSRCDIR)/build/usr/bin/ai_block_io $(BINDIR) +- install -m 550 src/build/usr/lib/$(PYNAME)/site-packages/sentryPlugins/ai_block_io/* $(PYDIR)/sentryPlugins/ai_block_io ++ install -m 550 src/build/usr/lib/$(PYNAME)/site-packages/sentryPlugins/ai_block_io/*.py $(PYDIR)/sentryPlugins/ai_block_io ++ install -m 550 src/build/usr/lib/$(PYNAME)/site-packages/sentryPlugins/ai_block_io/__pycache__/* $(PYDIR)/sentryPlugins/ai_block_io/__pycache__ + install -m 600 $(CURCONFIGDIR)/plugins/ai_block_io.ini $(ETCDIR)/sysSentry/plugins/ + install -m 600 $(CURCONFIGDIR)/tasks/ai_block_io.mod $(ETCDIR)/sysSentry/tasks/ + +-- +2.43.0 + diff --git a/fix-process-exit-status-and-service-kill-mode.patch b/fix-process-exit-status-and-service-kill-mode.patch new file mode 100644 index 0000000000000000000000000000000000000000..34a51292e59e993f326af9c3abdd856773409597 --- /dev/null +++ b/fix-process-exit-status-and-service-kill-mode.patch @@ -0,0 +1,138 @@ +From 2a27a7d986ab4ce427c5157128d0e5c1710b57d2 Mon Sep 17 00:00:00 2001 +From: shixuantong +Date: Tue, 15 Apr 2025 15:45:15 +0800 +Subject: [PATCH 10/16] fix process exit status and service kill mode + +--- + config/service/sysSentry.service | 2 +- + selftest/test/test_sentryctl_interval_exception.sh | 2 +- + selftest/test/test_sentryctl_normal.sh | 2 +- + selftest/test/test_sentryctl_type_oneshot.sh | 2 +- + selftest/test/test_sentryctl_type_period.sh | 6 +++--- + src/services/syssentry/syssentry.py | 12 +++++++----- + 6 files changed, 14 insertions(+), 12 deletions(-) + +diff --git a/config/service/sysSentry.service b/config/service/sysSentry.service +index 7b3f59e..335b3da 100644 +--- a/config/service/sysSentry.service ++++ b/config/service/sysSentry.service +@@ -6,7 +6,7 @@ After=xalarmd.service + [Service] + ExecStart=/usr/bin/python3 /usr/bin/syssentry + ExecStop=/bin/kill $MAINPID +-KillMode=process ++KillMode=mixed + Restart=on-failure + RestartSec=10s + +diff --git a/selftest/test/test_sentryctl_interval_exception.sh b/selftest/test/test_sentryctl_interval_exception.sh +index 9066ff8..eaaaa86 100644 +--- a/selftest/test/test_sentryctl_interval_exception.sh ++++ b/selftest/test/test_sentryctl_interval_exception.sh +@@ -40,7 +40,7 @@ function do_test() { + sleep 1 + + sentryctl status test_interval_exception 2>&1 | tee ${tmp_log} | cat +- expect_true "grep -E '(status: EXITED)' ${tmp_log}" ++ expect_true "grep -E '(status: FAILED)' ${tmp_log}" + + } + +diff --git a/selftest/test/test_sentryctl_normal.sh b/selftest/test/test_sentryctl_normal.sh +index efd9f29..77d43ec 100644 +--- a/selftest/test/test_sentryctl_normal.sh ++++ b/selftest/test/test_sentryctl_normal.sh +@@ -34,7 +34,7 @@ function do_test() { + if [ $? -eq 0 ] + then + sentryctl status test_normal 2>&1 | tee ${tmp_log} +- expect_true "grep -E '(status: EXITED)' ${tmp_log}" ++ expect_true "grep -E '(status: FAILED)' ${tmp_log}" + fi + } + +diff --git a/selftest/test/test_sentryctl_type_oneshot.sh b/selftest/test/test_sentryctl_type_oneshot.sh +index 500aba4..61b2937 100644 +--- a/selftest/test/test_sentryctl_type_oneshot.sh ++++ b/selftest/test/test_sentryctl_type_oneshot.sh +@@ -33,7 +33,7 @@ function do_test() { + sleep 1 + + sentryctl status test_type_oneshot 2>&1 | tee ${tmp_log} | cat +- expect_true "grep -E '(status: EXITED)' ${tmp_log}" ++ expect_true "grep -E '(status: FAILED)' ${tmp_log}" + + sentryctl start test_type_oneshot + expect_eq $? 0 +diff --git a/selftest/test/test_sentryctl_type_period.sh b/selftest/test/test_sentryctl_type_period.sh +index 6788fad..16e9658 100644 +--- a/selftest/test/test_sentryctl_type_period.sh ++++ b/selftest/test/test_sentryctl_type_period.sh +@@ -33,7 +33,7 @@ function do_test() { + sleep 1 + + sentryctl status test_type_period 2>&1 | tee ${tmp_log} | cat +- expect_true "grep -E '(status: EXITED)' ${tmp_log}" ++ expect_true "grep -E '(status: FAILED)' ${tmp_log}" + + sentryctl start test_type_period + expect_eq $? 0 +@@ -50,7 +50,7 @@ function do_test() { + if pgrep -x "test_task" >/dev/null; then + pkill -x "test_task" + sleep 2 +- sentryctl status test_type_period | grep -w "status: FAILED" ++ sentryctl status test_type_period | grep -w "status: WAITING" + expect_eq $? 0 + + sentryctl start test_type_period +@@ -59,7 +59,7 @@ function do_test() { + sentryctl stop test_type_period + expect_eq $? 0 + +- sentryctl status test_type_period | grep -w "status: EXITED" ++ sentryctl status test_type_period | grep -w "status: FAILED" + expect_eq $? 0 + fi + +diff --git a/src/services/syssentry/syssentry.py b/src/services/syssentry/syssentry.py +index 2f371a7..831d974 100644 +--- a/src/services/syssentry/syssentry.py ++++ b/src/services/syssentry/syssentry.py +@@ -562,26 +562,28 @@ def sigchld_handler(signum, _f): + """ + while True: + try: +- child_pid, child_exit_code = os.waitpid(-1, os.WNOHANG) ++ child_pid, child_status = os.waitpid(-1, os.WNOHANG) + logging.debug("sigchld pid :%d", child_pid) + task = get_task_by_pid(child_pid) + if not task: + logging.debug("pid %d cannot find task, ignore", child_pid) + break + logging.debug("task name %s", task.name) +- if os.WIFEXITED(child_exit_code): ++ if os.WIFEXITED(child_status): + # exit normally with exit() syscall ++ logging.info("task %s exit with status %d", task.name, os.WEXITSTATUS(child_status)) + if task.type == "PERIOD" and task.period_enabled: + set_runtime_status(task.name, WAITING_STATUS) + else: +- if os.WEXITSTATUS(child_exit_code): ++ if os.WEXITSTATUS(child_status): + set_runtime_status(task.name, NONZERO_EXITED_STATUS) + else: + set_runtime_status(task.name, EXITED_STATUS) + else: + # exit abnormally +- if not task.period_enabled: +- set_runtime_status(task.name, EXITED_STATUS) ++ logging.info("task %s terminated", task.name) ++ if task.type == "PERIOD" and task.period_enabled: ++ set_runtime_status(task.name, WAITING_STATUS) + else: + set_runtime_status(task.name, FAILED_STATUS) + task.result_info["end_time"] = get_current_time_string() +-- +2.27.0 + diff --git a/fix-python-files-permission.patch b/fix-python-files-permission.patch new file mode 100644 index 0000000000000000000000000000000000000000..55ef5e87b0d5c70d1fe4d3ccdb619099d80d1862 --- /dev/null +++ b/fix-python-files-permission.patch @@ -0,0 +1,170 @@ +From 36a0819f8dc58d26f143969f52b3447e54987489 Mon Sep 17 00:00:00 2001 +From: guodashun +Date: Sat, 22 Mar 2025 19:04:18 +0800 +Subject: [PATCH] fix python files permission + +--- + Makefile | 125 ++++++++++++++++++++++++++++--------------------------- + 1 file changed, 63 insertions(+), 62 deletions(-) + +diff --git a/Makefile b/Makefile +index b9ce6ec..c450999 100644 +--- a/Makefile ++++ b/Makefile +@@ -73,18 +73,19 @@ isentry: + cd $(CURSRCDIR) && $(PYBIN) setup.py install --prefix=$(PREFIX) -O1 --root=build --record=SENTRY_FILES + + # sysSentry主包 +- ## 创建目录 +- install -d -m 700 $(LOGSAVEDIR)/sysSentry +- install -d -m 700 $(VARLIB)/logrotate-syssentry +- install -d -m 700 $(PYDIR)/syssentry +- install -d -m 700 $(PYDIR)/syssentry/__pycache__ +- install -d -m 755 $(PYDIR)/xalarm +- install -d -m 700 $(PYDIR)/xalarm/__pycache__ +- install -d -m 700 $(PYDIR)/sentryCollector +- install -d -m 700 $(PYDIR)/sentryCollector/__pycache__ +- install -d -m 700 $(PYDIR)/$(PKGVEREGG) +- install -d -m 700 $(ETCDIR)/sysconfig +- ++ ## 创建目录 ++ install -d -m 700 $(LOGSAVEDIR)/sysSentry ++ install -d -m 700 $(VARLIB)/logrotate-syssentry ++ install -d -m 755 $(PYDIR)/syssentry ++ install -d -m 755 $(PYDIR)/syssentry/__pycache__ ++ install -d -m 755 $(PYDIR)/xalarm/ ++ install -d -m 755 $(PYDIR)/xalarm/__pycache__ ++ install -d -m 755 $(PYDIR)/sentryCollector ++ install -d -m 755 $(PYDIR)/sentryCollector/__pycache__ ++ install -d -m 755 $(PYDIR)/$(PKGVEREGG) ++ install -d -m 700 $(PYDIR)/$(PKGVEREGG) ++ install -d -m 700 $(ETCDIR)/sysconfig ++ + ## 安装配置文件 + install -m 600 $(CURCONFIGDIR)/logrotate-sysSentry.conf $(ETCDIR)/sysSentry/ + install -m 600 $(CURCONFIGDIR)/inspect.conf $(ETCDIR)/sysSentry/ +@@ -97,44 +98,44 @@ isentry: + ## 安装 systemd 服务文件 + install -m 600 $(CURCONFIGDIR)/service/sysSentry.service $(SYSTEMDDIR) + install -m 600 $(CURCONFIGDIR)/service/xalarmd.service $(SYSTEMDDIR) +- install -m 600 $(CURCONFIGDIR)/service/sentryCollector.service $(SYSTEMDDIR) +- +- ## 安装python源代码文件到相应的目录 +- install -m 550 src/build/usr/lib/$(PYNAME)/site-packages/services/syssentry/*.py $(PYDIR)/syssentry +- install -m 550 src/build/usr/lib/$(PYNAME)/site-packages/services/syssentry/__pycache__/* $(PYDIR)/syssentry/__pycache__ +- install -m 550 src/build/usr/lib/$(PYNAME)/site-packages/services/xalarm/*.py $(PYDIR)/xalarm +- install -m 550 src/build/usr/lib/$(PYNAME)/site-packages/services/xalarm/__pycache__/* $(PYDIR)/xalarm/__pycache__ +- install -m 550 src/build/usr/lib/$(PYNAME)/site-packages/services/sentryCollector/*.py $(PYDIR)/sentryCollector +- install -m 550 src/build/usr/lib/$(PYNAME)/site-packages/services/sentryCollector/__pycache__/* $(PYDIR)/sentryCollector/__pycache__ +- install -m 550 src/build/usr/lib/$(PYNAME)/site-packages/$(PKGVEREGG)/* $(PYDIR)/$(PKGVEREGG) +- +- ## 安装可执行文件 +- install -m 550 $(CURSRCDIR)/services/syssentry/sentryctl $(BINDIR) ++ install -m 600 $(CURCONFIGDIR)/service/sentryCollector.service $(SYSTEMDDIR) ++ ++ ## 安装python源代码文件到相应的目录 ++ install -m 644 src/build/usr/lib/$(PYNAME)/site-packages/services/syssentry/*.py $(PYDIR)/syssentry ++ install -m 644 src/build/usr/lib/$(PYNAME)/site-packages/services/syssentry/__pycache__/* $(PYDIR)/syssentry/__pycache__ ++ install -m 644 src/build/usr/lib/$(PYNAME)/site-packages/services/xalarm/*.py $(PYDIR)/xalarm ++ install -m 644 src/build/usr/lib/$(PYNAME)/site-packages/services/xalarm/__pycache__/* $(PYDIR)/xalarm/__pycache__ ++ install -m 644 src/build/usr/lib/$(PYNAME)/site-packages/services/sentryCollector/*.py $(PYDIR)/sentryCollector ++ install -m 644 src/build/usr/lib/$(PYNAME)/site-packages/services/sentryCollector/__pycache__/* $(PYDIR)/sentryCollector/__pycache__ ++ install -m 644 src/build/usr/lib/$(PYNAME)/site-packages/$(PKGVEREGG)/* $(PYDIR)/$(PKGVEREGG) ++ ++ ## 安装可执行文件 ++ install -m 550 $(CURSRCDIR)/services/syssentry/sentryctl $(BINDIR) + install -m 550 $(CURSRCDIR)/build/usr/bin/syssentry $(BINDIR) + install -m 550 $(CURSRCDIR)/build/usr/bin/xalarmd $(BINDIR) + install -m 550 $(CURSRCDIR)/build/usr/bin/sentryCollector $(BINDIR) + @if [ -f "$(CURSRCDIR)/services/sentryCollector/ebpf_collector/ebpf_collector" ]; then \ + install -m 550 $(CURSRCDIR)/services/sentryCollector/ebpf_collector/ebpf_collector $(BINDIR); \ +- fi +- +- # avg_block_io +- install -d -m 700 $(PYDIR)/sentryPlugins/avg_block_io +- install -d -m 700 $(PYDIR)/sentryPlugins/avg_block_io/__pycache__ +- install -m 550 $(CURSRCDIR)/build/usr/bin/avg_block_io $(BINDIR) +- install -m 550 src/build/usr/lib/$(PYNAME)/site-packages/sentryPlugins/avg_block_io/*.py $(PYDIR)/sentryPlugins/avg_block_io +- install -m 550 src/build/usr/lib/$(PYNAME)/site-packages/sentryPlugins/avg_block_io/__pycache__/* $(PYDIR)/sentryPlugins/avg_block_io/__pycache__ +- install -m 600 $(CURCONFIGDIR)/plugins/avg_block_io.ini $(ETCDIR)/sysSentry/plugins/ +- install -m 600 $(CURCONFIGDIR)/tasks/avg_block_io.mod $(ETCDIR)/sysSentry/tasks/ +- +- # ai_block_io +- install -d -m 700 $(PYDIR)/sentryPlugins/ai_block_io +- install -d -m 700 $(PYDIR)/sentryPlugins/ai_block_io/__pycache__ +- install -m 550 $(CURSRCDIR)/build/usr/bin/ai_block_io $(BINDIR) +- install -m 550 src/build/usr/lib/$(PYNAME)/site-packages/sentryPlugins/ai_block_io/*.py $(PYDIR)/sentryPlugins/ai_block_io +- install -m 550 src/build/usr/lib/$(PYNAME)/site-packages/sentryPlugins/ai_block_io/__pycache__/* $(PYDIR)/sentryPlugins/ai_block_io/__pycache__ +- install -m 600 $(CURCONFIGDIR)/plugins/ai_block_io.ini $(ETCDIR)/sysSentry/plugins/ +- install -m 600 $(CURCONFIGDIR)/tasks/ai_block_io.mod $(ETCDIR)/sysSentry/tasks/ +- ++ fi ++ ++ # avg_block_io ++ install -d -m 755 $(PYDIR)/sentryPlugins/avg_block_io ++ install -d -m 755 $(PYDIR)/sentryPlugins/avg_block_io/__pycache__ ++ install -m 550 $(CURSRCDIR)/build/usr/bin/avg_block_io $(BINDIR) ++ install -m 644 src/build/usr/lib/$(PYNAME)/site-packages/sentryPlugins/avg_block_io/*.py $(PYDIR)/sentryPlugins/avg_block_io ++ install -m 644 src/build/usr/lib/$(PYNAME)/site-packages/sentryPlugins/avg_block_io/__pycache__/* $(PYDIR)/sentryPlugins/avg_block_io/__pycache__ ++ install -m 600 $(CURCONFIGDIR)/plugins/avg_block_io.ini $(ETCDIR)/sysSentry/plugins/ ++ install -m 600 $(CURCONFIGDIR)/tasks/avg_block_io.mod $(ETCDIR)/sysSentry/tasks/ ++ ++ # ai_block_io ++ install -d -m 755 $(PYDIR)/sentryPlugins/ai_block_io ++ install -d -m 755 $(PYDIR)/sentryPlugins/ai_block_io/__pycache__ ++ install -m 550 $(CURSRCDIR)/build/usr/bin/ai_block_io $(BINDIR) ++ install -m 644 src/build/usr/lib/$(PYNAME)/site-packages/sentryPlugins/ai_block_io/*.py $(PYDIR)/sentryPlugins/ai_block_io ++ install -m 644 src/build/usr/lib/$(PYNAME)/site-packages/sentryPlugins/ai_block_io/__pycache__/* $(PYDIR)/sentryPlugins/ai_block_io/__pycache__ ++ install -m 600 $(CURCONFIGDIR)/plugins/ai_block_io.ini $(ETCDIR)/sysSentry/plugins/ ++ install -m 600 $(CURCONFIGDIR)/tasks/ai_block_io.mod $(ETCDIR)/sysSentry/tasks/ ++ + # hbm_online_repair + install -m 550 $(CURSRCDIR)/sentryPlugins/hbm_online_repair/hbm_online_repair $(BINDIR) + install -m 600 $(CURCONFIGDIR)/env/hbm_online_repair.env $(ETCDIR)/sysconfig/ +@@ -153,26 +154,26 @@ isentry: + # sentry_msg_monitor + install -m 550 $(CURSRCDIR)/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor $(BINDIR) + install -m 600 $(CURCONFIGDIR)/env/sentry_msg_monitor.env $(ETCDIR)/sysconfig/ +- install -m 600 $(CURCONFIGDIR)/tasks/sentry_msg_monitor.mod $(ETCDIR)/sysSentry/tasks/ +- +- # pysentry_notify +- install -m 550 src/libsentry/python/pySentryNotify/sentry_notify.py $(PYDIR)/xalarm +- +- # pysentry_collect +- install -m 550 src/libsentry/python/pySentryCollector/collect_plugin.py $(PYDIR)/sentryCollector +- +- # libxalarm +- install -m 555 $(CURLIBDIR)/build/libxalarm/libxalarm.so $(LIBINSTALLDIR) ++ install -m 600 $(CURCONFIGDIR)/tasks/sentry_msg_monitor.mod $(ETCDIR)/sysSentry/tasks/ ++ ++ # pysentry_notify ++ install -m 644 src/libsentry/python/pySentryNotify/sentry_notify.py $(PYDIR)/xalarm ++ ++ # pysentry_collect ++ install -m 644 src/libsentry/python/pySentryCollector/collect_plugin.py $(PYDIR)/sentryCollector ++ ++ # libxalarm ++ install -m 555 $(CURLIBDIR)/build/libxalarm/libxalarm.so $(LIBINSTALLDIR) + + # libxalarm-devel + install -d -m 755 $(INCLUDEDIR)/xalarm +- install -m 644 $(CURLIBDIR)/libxalarm/register_xalarm.h $(INCLUDEDIR)/xalarm/ +- +- # pyxalarm +- install -m 555 src/libs/pyxalarm/register_xalarm.py $(PYDIR)/xalarm +- +- # log utils +- install -d -m 700 $(INCLUDEDIR)/libsentry ++ install -m 644 $(CURLIBDIR)/libxalarm/register_xalarm.h $(INCLUDEDIR)/xalarm/ ++ ++ # pyxalarm ++ install -m 644 src/libs/pyxalarm/register_xalarm.py $(PYDIR)/xalarm ++ ++ # log utils ++ install -d -m 700 $(INCLUDEDIR)/libsentry + install -m 644 $(CURSRCDIR)/libsentry/c/log/log_utils.h $(INCLUDEDIR)/libsentry/ + install -m 550 $(CURSRCDIR)/libsentry/c/log/build/libsentry_log.so $(LIBINSTALLDIR) + +-- +2.27.0 + diff --git a/fix-resource-leak-in-hbm_online_repair.patch b/fix-resource-leak-in-hbm_online_repair.patch new file mode 100644 index 0000000000000000000000000000000000000000..acfe36fe2fea6cfc77de79058489f2a5711e395c --- /dev/null +++ b/fix-resource-leak-in-hbm_online_repair.patch @@ -0,0 +1,37 @@ +From aba6165e4125a164353cf4f73ea122d083266ed9 Mon Sep 17 00:00:00 2001 +From: shixuantong +Date: Tue, 25 Nov 2025 16:29:37 +0800 +Subject: [PATCH 13/16] fix resource leak in hbm_online_repair + +--- + src/sentryPlugins/hbm_online_repair/hbm-ras-events.c | 1 + + src/sentryPlugins/hbm_online_repair/non-standard-hbm-repair.c | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/src/sentryPlugins/hbm_online_repair/hbm-ras-events.c b/src/sentryPlugins/hbm_online_repair/hbm-ras-events.c +index b859afb..a677fdb 100644 +--- a/src/sentryPlugins/hbm_online_repair/hbm-ras-events.c ++++ b/src/sentryPlugins/hbm_online_repair/hbm-ras-events.c +@@ -414,6 +414,7 @@ static int init_event_format(struct ras_events *ras, struct tep_handle *pevent, + free(page); + return EINVAL; + } ++ free(page); + return 0; + } + +diff --git a/src/sentryPlugins/hbm_online_repair/non-standard-hbm-repair.c b/src/sentryPlugins/hbm_online_repair/non-standard-hbm-repair.c +index 07af481..638dafa 100644 +--- a/src/sentryPlugins/hbm_online_repair/non-standard-hbm-repair.c ++++ b/src/sentryPlugins/hbm_online_repair/non-standard-hbm-repair.c +@@ -193,6 +193,7 @@ static int read_variable_attribute(char *name, char *guid, uint32_t *attribute) + readsize = read(fd, attribute, sizeof(uint32_t)); + if (readsize != sizeof(uint32_t)) { + log(LOG_ERROR, "read attribute of %s failed\n", filename); ++ close(fd); + return -1; + } + +-- +2.27.0 + diff --git a/fix-some-warnings.patch b/fix-some-warnings.patch new file mode 100644 index 0000000000000000000000000000000000000000..0e76d489f4be6ac5046e50fb27084a2ed8fcd200 --- /dev/null +++ b/fix-some-warnings.patch @@ -0,0 +1,53 @@ +From 4d916488a81c110e07a4206146289150fab26daf Mon Sep 17 00:00:00 2001 +From: shixuantong +Date: Thu, 6 Nov 2025 10:22:29 +0800 +Subject: [PATCH 7/7] fix some warnings + +--- + src/sentryPlugins/hbm_online_repair/hbm_online_repair.c | 5 ++++- + .../hbm_online_repair/non-standard-hbm-repair.c | 6 +++++- + 2 files changed, 9 insertions(+), 2 deletions(-) + +diff --git a/src/sentryPlugins/hbm_online_repair/hbm_online_repair.c b/src/sentryPlugins/hbm_online_repair/hbm_online_repair.c +index c6de011..756a5b4 100644 +--- a/src/sentryPlugins/hbm_online_repair/hbm_online_repair.c ++++ b/src/sentryPlugins/hbm_online_repair/hbm_online_repair.c +@@ -52,7 +52,10 @@ int execute_command(const char *command) + return -1; + } + +- fgets(buffer, sizeof(buffer), fp); ++ // check return value ++ if (fgets(buffer, sizeof(buffer), fp) == NULL) { ++ log(LOG_DEBUG, "execute_command: no output (EOF) for '%s'\n", command); ++ } + log(LOG_DEBUG, "output of command %s is: %s\n", command, buffer); + + ret = pclose(fp); +diff --git a/src/sentryPlugins/hbm_online_repair/non-standard-hbm-repair.c b/src/sentryPlugins/hbm_online_repair/non-standard-hbm-repair.c +index 35bd21a..07af481 100644 +--- a/src/sentryPlugins/hbm_online_repair/non-standard-hbm-repair.c ++++ b/src/sentryPlugins/hbm_online_repair/non-standard-hbm-repair.c +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + + #include "logger.h" + #include "non-standard-hbm-repair.h" +@@ -642,7 +643,10 @@ static int hbmc_get_memory_type(char *path) + char buf[128]; + FILE *file; + +- snprintf(fname, MAX_PATH, "%s/%s", path, "memory_type"); ++ size_t suffix_len = sizeof("/memory_type") - 1; ++ int limit = MAX_PATH - suffix_len - 1; ++ snprintf(fname, MAX_PATH, "%.*s/%s", limit, path, "memory_type"); ++ + file = fopen(fname, "r"); + if (!file) { + log(LOG_WARNING, "HBM: Cannot to open '%s': %s\n", +-- +2.43.0 + diff --git a/fix-sys-exit-bug.patch b/fix-sys-exit-bug.patch new file mode 100644 index 0000000000000000000000000000000000000000..00c206f41bb3d2a8ad8072b2a0fcd562c2c10e63 --- /dev/null +++ b/fix-sys-exit-bug.patch @@ -0,0 +1,109 @@ +From f231e9420da8792ac6990546b7485b9e767a7d69 Mon Sep 17 00:00:00 2001 +From: guodashun +Date: Sun, 23 Mar 2025 13:05:18 +0800 +Subject: [PATCH 6/7] fix sys exit bug + +--- + src/services/syssentry/syssentry.py | 47 +++++++++++++++++------------ + 1 file changed, 28 insertions(+), 19 deletions(-) + +diff --git a/src/services/syssentry/syssentry.py b/src/services/syssentry/syssentry.py +index 55a845b..32707d1 100644 +--- a/src/services/syssentry/syssentry.py ++++ b/src/services/syssentry/syssentry.py +@@ -53,7 +53,7 @@ except ImportError: + BMC_EXIST = False + + +-INSPECTOR = None ++exit_flag = False + + CTL_MSG_HEAD_LEN = 6 + CTL_MSG_MAGIC_LEN = 3 +@@ -500,7 +500,8 @@ def main_loop(): + continue + task.onstart_handle() + +- while True: ++ global exit_flag ++ while not exit_flag: + try: + events_list = epoll_fd.poll(SERVER_EPOLL_TIMEOUT) + for event_fd, _ in events_list: +@@ -544,10 +545,14 @@ def release_pidfile(): + def remove_sock_file(): + """remove sock file + """ ++ for socket_path in (THB_SOCKET_PATH, CTL_SOCKET_PATH, CPU_ALARM_SOCKET_PATH, RESULT_SOCKET_PATH, BMC_SOCKET_PATH): ++ try: ++ os.unlink(socket_path) ++ except FileNotFoundError: ++ pass + try: +- os.unlink(THB_SOCKET_PATH) +- os.unlink(CTL_SOCKET_PATH) +- except FileNotFoundError: ++ os.rmdir(SENTRY_RUN_DIR) ++ except Exception: + pass + + +@@ -586,21 +591,10 @@ def sig_handler(signum, _f): + :param _f: + :return: + """ ++ global exit_flag + if signum not in (signal.SIGINT, signal.SIGTERM): + return +- tasks_dict = TasksMap.tasks_dict +- for task_type in tasks_dict: +- for task_name in tasks_dict[task_type]: +- task = tasks_dict[task_type][task_name] +- task.stop() +- if task.pid > 0: +- try: +- os.kill(task.pid, signal.SIGTERM) +- except os.error as os_error: +- logging.debug("sigterm kill error, %s", str(os_error)) +- release_pidfile() +- remove_sock_file() +- sys.exit(0) ++ exit_flag = True + + + def chk_and_set_pidfile(): +@@ -621,6 +615,19 @@ def chk_and_set_pidfile(): + return False + + ++def clean_child(): ++ tasks_dict = TasksMap.tasks_dict ++ for task_type in tasks_dict: ++ for task_name in tasks_dict[task_type]: ++ task = tasks_dict[task_type][task_name] ++ task.stop() ++ if task.pid > 0: ++ try: ++ os.kill(task.pid, signal.SIGTERM) ++ except os.error as os_error: ++ logging.debug("sigterm kill error, %s", str(os_error)) ++ ++ + def main(): + """main + """ +@@ -654,8 +661,10 @@ def main(): + main_loop() + + except Exception: +- logging.error('%s', traceback.format_exc()) ++ pass + finally: + if clientId != -1: + xalarm_unregister(clientId) ++ clean_child() + release_pidfile() ++ remove_sock_file() +-- +2.43.0 + diff --git a/fix-systemctl-stop-error-bug-that-clientId-is-a-loca.patch b/fix-systemctl-stop-error-bug-that-clientId-is-a-loca.patch new file mode 100644 index 0000000000000000000000000000000000000000..2f9ff022b772f3afc47d3b3522001da7b6d25ede --- /dev/null +++ b/fix-systemctl-stop-error-bug-that-clientId-is-a-loca.patch @@ -0,0 +1,47 @@ +From 011e4f092ad0684d92e6895d7bd043bc468d58ef Mon Sep 17 00:00:00 2001 +From: guodashun +Date: Thu, 20 Mar 2025 15:20:59 +0800 +Subject: [PATCH 3/7] fix systemctl stop error bug that clientId is a local + variable + +--- + src/services/syssentry/alarm.py | 1 - + src/services/syssentry/syssentry.py | 3 +-- + 2 files changed, 1 insertion(+), 3 deletions(-) + +diff --git a/src/services/syssentry/alarm.py b/src/services/syssentry/alarm.py +index ad9d11c..a0220a0 100644 +--- a/src/services/syssentry/alarm.py ++++ b/src/services/syssentry/alarm.py +@@ -38,7 +38,6 @@ alarm_list_lock = threading.Lock() + + id_filter = [] + id_base = 1001 +-clientId = -1 + + MILLISECONDS_UNIT_SECONDS = 1000 + MAX_NUM_OF_ALARM_ID = 128 +diff --git a/src/services/syssentry/syssentry.py b/src/services/syssentry/syssentry.py +index 625da9a..55a845b 100644 +--- a/src/services/syssentry/syssentry.py ++++ b/src/services/syssentry/syssentry.py +@@ -40,8 +40,6 @@ from .alarm import alarm_register + + from xalarm.register_xalarm import xalarm_unregister + +-clientId = -1 +- + CPU_EXIST = True + try: + from sentryPlugins.cpu_sentry.cpu_alarm import cpu_alarm_recv +@@ -640,6 +638,7 @@ def main(): + logging.error("get pid file lock failed, exist") + sys.exit(17) + ++ clientId = -1 + try: + signal.signal(signal.SIGINT, sig_handler) + signal.signal(signal.SIGTERM, sig_handler) +-- +2.43.0 + diff --git a/fix-typo.patch b/fix-typo.patch new file mode 100644 index 0000000000000000000000000000000000000000..595ec5b1d1758777a1d78301e2dfdcd751811883 --- /dev/null +++ b/fix-typo.patch @@ -0,0 +1,182 @@ +From 3c7d8c013517ccc75445136f6f1f479cad00cd21 Mon Sep 17 00:00:00 2001 +From: shixuantong +Date: Thu, 6 Nov 2025 10:24:19 +0800 +Subject: [PATCH] fix typo + +--- + src/libs/libxalarm/register_xalarm.c | 14 ++++++------- + src/libs/libxalarm/register_xalarm.h | 4 ++-- + .../bmc_block_io/src/cbmcblockio.cpp | 4 ++-- + src/services/xalarm/xalarm_server.py | 20 +++++++++---------- + 4 files changed, 21 insertions(+), 21 deletions(-) + +diff --git a/src/libs/libxalarm/register_xalarm.c b/src/libs/libxalarm/register_xalarm.c +index d689d0a..3cd7e9d 100644 +--- a/src/libs/libxalarm/register_xalarm.c ++++ b/src/libs/libxalarm/register_xalarm.c +@@ -520,13 +520,13 @@ int send_data_to_socket(const char *socket_path, const char *message) + sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + if (sockfd == -1) { + fprintf(stderr, "failed to create socket\n"); +- return RETURE_CODE_FAIL; ++ return RETURN_CODE_FAIL; + } + + // set socket address + if (memset(&addr, 0, sizeof(struct sockaddr_un)) == NULL) { + fprintf(stderr, "%s: memset info failed.\n", __func__); +- return RETURE_CODE_FAIL; ++ return RETURN_CODE_FAIL; + } + + addr.sun_family = AF_UNIX; +@@ -535,18 +535,18 @@ int send_data_to_socket(const char *socket_path, const char *message) + if (connect(sockfd, (struct sockaddr *)&addr, sizeof(struct sockaddr_un)) == -1) { + fprintf(stderr, "failed to connect socket %s\n", socket_path); + close(sockfd); +- return RETURE_CODE_FAIL; ++ return RETURN_CODE_FAIL; + } + + // write data + if (write(sockfd, message, strlen(message)) == -1) { + fprintf(stderr, "failed to send data to socket %s\n", socket_path); + close(sockfd); +- return RETURE_CODE_FAIL; ++ return RETURN_CODE_FAIL; + } + + close(sockfd); +- return RETURE_CODE_SUCCESS; ++ return RETURN_CODE_SUCCESS; + } + + +@@ -600,7 +600,7 @@ static bool is_valid_task_name(const char *task_name) + */ + int report_result(const char *task_name, enum RESULT_LEVEL result_level, const char *report_data) + { +- int ret = RETURE_CODE_FAIL; ++ int ret = RETURN_CODE_FAIL; + if (result_level < 0 || result_level >= RESULT_LEVEL_NUM) { + fprintf(stderr, "result_level (%d) is invalid, it must be in [0-5]\n", result_level); + return ret; +@@ -644,7 +644,7 @@ int report_result(const char *task_name, enum RESULT_LEVEL result_level, const c + goto free_msg; + } + +- ret = RETURE_CODE_SUCCESS; ++ ret = RETURN_CODE_SUCCESS; + free_msg: + free(message); + message = NULL; +diff --git a/src/libs/libxalarm/register_xalarm.h b/src/libs/libxalarm/register_xalarm.h +index 263fff5..80fd635 100644 +--- a/src/libs/libxalarm/register_xalarm.h ++++ b/src/libs/libxalarm/register_xalarm.h +@@ -130,8 +130,8 @@ enum RESULT_LEVEL { + #define RESULT_INFO_MAX_LEN 4096 + #define RESULT_INFO_LOG_MGS_MAX_LEN 255 + +-#define RETURE_CODE_FAIL (-1) +-#define RETURE_CODE_SUCCESS 0 ++#define RETURN_CODE_FAIL (-1) ++#define RETURN_CODE_SUCCESS 0 + + extern int report_result(const char *task_name, + enum RESULT_LEVEL result_level, +diff --git a/src/sentryPlugins/bmc_block_io/src/cbmcblockio.cpp b/src/sentryPlugins/bmc_block_io/src/cbmcblockio.cpp +index 5d64bde..0b9b578 100644 +--- a/src/sentryPlugins/bmc_block_io/src/cbmcblockio.cpp ++++ b/src/sentryPlugins/bmc_block_io/src/cbmcblockio.cpp +@@ -409,7 +409,7 @@ void CBMCBlockIo::ReportAlarm(const IPMIEvent& event) + json_object_object_add(jObject, JSON_KEY_DETAILS.c_str(), json_object_new_string("{}}")); + const char *jData = json_object_to_json_string(jObject); + int ret = xalarm_Report(m_alarmId, ucAlarmLevel, ucAlarmType, const_cast(jData)); +- if (ret != RETURE_CODE_SUCCESS) { ++ if (ret != RETURN_CODE_SUCCESS) { + BMC_LOG_ERROR << "Failed to xalarm_Report, ret: " << ret; + } + json_object_put(jObject); +@@ -423,7 +423,7 @@ void CBMCBlockIo::ReportResult(int resultLevel, const std::string& msg) + json_object_object_add(jObject, JSON_KEY_MSG.c_str(), json_object_new_string(msg.c_str())); + const char *jData = json_object_to_json_string(jObject); + int ret = report_result(BMC_TASK_NAME.c_str(), level, const_cast(jData)); +- if (ret != RETURE_CODE_SUCCESS) { ++ if (ret != RETURN_CODE_SUCCESS) { + BMC_LOG_ERROR << "Failed to report_result, ret: " << ret; + } + json_object_put(jObject); +diff --git a/src/services/xalarm/xalarm_server.py b/src/services/xalarm/xalarm_server.py +index 932279c..d31cd03 100644 +--- a/src/services/xalarm/xalarm_server.py ++++ b/src/services/xalarm/xalarm_server.py +@@ -37,16 +37,16 @@ SOCK_FILE = "/var/run/xalarm/report" + ALARM_REPORT_LEN = 8216 + ALARM_DIR_PERMISSION = 0o755 + SOCKET_FILE_PERMISSON = 0o666 +-PERMISION_MASK = 0o777 +-PEROID_CHECK_TIME = 3 ++PERMISSION_MASK = 0o777 ++PERIOD_CHECK_TIME = 3 + ALARM_LISTEN_QUEUE_LEN = 5 +-PEROID_SCANN_TIME = 60 ++PERIOD_SCANN_TIME = 60 + fd_to_socket_lock = threading.Lock() + + def check_permission(path, permission): + """check whether the permission of path is right + """ +- return (os.stat(path).st_mode & PERMISION_MASK) == permission ++ return (os.stat(path).st_mode & PERMISSION_MASK) == permission + + + def check_socket_file(path): +@@ -108,14 +108,14 @@ def recover_sock_path_and_permission(): + os.chmod(ALARM_DIR, ALARM_DIR_PERMISSION) + if os.path.exists(SOCK_FILE) and not check_permission(SOCK_FILE, SOCKET_FILE_PERMISSON): + logging.info("socket file %s permission %s set not properly, recover as default permission", +- SOCK_FILE, oct(os.stat(SOCK_FILE).st_mode & PERMISION_MASK)) ++ SOCK_FILE, oct(os.stat(SOCK_FILE).st_mode & PERMISSION_MASK)) + os.chmod(SOCK_FILE, SOCKET_FILE_PERMISSON) + if os.path.exists(USER_RECV_SOCK) and not check_permission(USER_RECV_SOCK, SOCKET_FILE_PERMISSON): + logging.info("socket file %s permission %s set not properly, recover as default permission", +- USER_RECV_SOCK, oct(os.stat(USER_RECV_SOCK).st_mode & PERMISION_MASK)) ++ USER_RECV_SOCK, oct(os.stat(USER_RECV_SOCK).st_mode & PERMISSION_MASK)) + os.chmod(USER_RECV_SOCK, SOCKET_FILE_PERMISSON) + +-def peroid_task_to_cleanup_connections(): ++def period_task_to_cleanup_connections(): + global alarm_sock + global alarm_epoll + global fd_to_socket +@@ -124,7 +124,7 @@ def peroid_task_to_cleanup_connections(): + logging.info("cleanup thread is running") + + while True: +- sleep(PEROID_SCANN_TIME) ++ sleep(PERIOD_SCANN_TIME) + # if conn thread stopped, cleanup thread should not cleanup anymore + if conn_thread_should_stop.is_set(): + continue +@@ -182,7 +182,7 @@ def watch_socket_file_and_dir(): + except Exception as e: + logging.error("Error watch socket file thread: %s", str(e)) + +- sleep(PEROID_CHECK_TIME) ++ sleep(PERIOD_CHECK_TIME) + + + def start_wait_for_conn_thread(alarm_sock_, alarm_epoll_, +@@ -229,7 +229,7 @@ def server_loop(alarm_config): + fd_to_socket_lock + ) + +- cleanup_thread = threading.Thread(target=peroid_task_to_cleanup_connections) ++ cleanup_thread = threading.Thread(target=period_task_to_cleanup_connections) + cleanup_thread.daemon = True + cleanup_thread.start() + +-- +2.27.0 + diff --git a/fix-xalarmd-stop-failed-by-systemd.patch b/fix-xalarmd-stop-failed-by-systemd.patch new file mode 100644 index 0000000000000000000000000000000000000000..3027985671e5923cd74610fe98c72f35d4098f34 --- /dev/null +++ b/fix-xalarmd-stop-failed-by-systemd.patch @@ -0,0 +1,25 @@ +From d4f28fadee2d536699e21fca49e8e14782f4d19c Mon Sep 17 00:00:00 2001 +From: caixiaomeng 00662745 +Date: Tue, 18 Mar 2025 20:44:46 +0800 +Subject: [PATCH 2/7] fix xalarmd stop failed by systemd + +--- + config/service/xalarmd.service | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/config/service/xalarmd.service b/config/service/xalarmd.service +index 20db997..6d8418c 100644 +--- a/config/service/xalarmd.service ++++ b/config/service/xalarmd.service +@@ -4,7 +4,7 @@ Description = xalarm daemon for alarm messages forwarding + [Service] + Type = forking + ExecStart=/usr/bin/python3 /usr/bin/xalarmd +-ExecStop=/bin/kill ++ExecStop=/bin/kill $MAINPID + KillMode=process + Restart=on-failure + RestartSec=3s +-- +2.43.0 + diff --git a/report-panic-and-kernel-reboot-event.patch b/report-panic-and-kernel-reboot-event.patch new file mode 100644 index 0000000000000000000000000000000000000000..634cd1f4ff88c49e29d50bbd4e03fda501d1af10 --- /dev/null +++ b/report-panic-and-kernel-reboot-event.patch @@ -0,0 +1,874 @@ +From be8f08df6d3edd52a2b26e0058a4d84997e5957c Mon Sep 17 00:00:00 2001 +From: shixuantong +Date: Thu, 6 Nov 2025 10:26:25 +0800 +Subject: [PATCH 08/16] report panic and kernel reboot event + +Feature issue: +https://gitee.com/openeuler/release-management/issues/ID45WQ + +1. UVB Communication: Dependent on Kernel (cis and odf modules). +2. UMRA Communication: Dependent on Kernel and umdk. +--- + config/tasks/ai_block_io.mod | 2 +- + config/tasks/hbm_online_repair.mod | 2 +- + config/tasks/rasdaemon.mod | 2 +- + config/tasks/sentry_msg_monitor.mod | 2 +- + config/tasks/sentry_urma_comm.mod | 6 + + src/libs/libxalarm/register_xalarm.h | 16 +- + src/sentryPlugins/sentry_msg_monitor/Makefile | 16 +- + .../sentry_msg_monitor/sentry_msg_monitor.c | 256 ++++++++++++------ + .../sentry_msg_monitor/smh_common_type.h | 63 +++-- + src/services/syssentry/mod_status.py | 1 + + src/services/syssentry/sentryctl | 158 ++++++++++- + src/services/syssentry/syssentry.py | 8 +- + 12 files changed, 419 insertions(+), 113 deletions(-) + create mode 100644 config/tasks/sentry_urma_comm.mod + +diff --git a/config/tasks/ai_block_io.mod b/config/tasks/ai_block_io.mod +index 82f4f0b..947ad39 100644 +--- a/config/tasks/ai_block_io.mod ++++ b/config/tasks/ai_block_io.mod +@@ -4,4 +4,4 @@ task_start=/usr/bin/python3 /usr/bin/ai_block_io + task_stop=pkill -f /usr/bin/ai_block_io + type=oneshot + alarm_id=1002 +-alarm_clear_time=5 +\ No newline at end of file ++alarm_clear_time=5 +diff --git a/config/tasks/hbm_online_repair.mod b/config/tasks/hbm_online_repair.mod +index 4dcef43..5c69fa6 100644 +--- a/config/tasks/hbm_online_repair.mod ++++ b/config/tasks/hbm_online_repair.mod +@@ -6,4 +6,4 @@ type=period + interval=10 + onstart=yes + env_file=/etc/sysconfig/hbm_online_repair.env +-conflict=up +\ No newline at end of file ++conflict=up +diff --git a/config/tasks/rasdaemon.mod b/config/tasks/rasdaemon.mod +index ed6f3c6..f0d8290 100644 +--- a/config/tasks/rasdaemon.mod ++++ b/config/tasks/rasdaemon.mod +@@ -5,4 +5,4 @@ task_stop=kill $pid + type=oneshot + onstart=yes + env_file=/etc/sysconfig/rasdaemon +-conflict=up +\ No newline at end of file ++conflict=up +diff --git a/config/tasks/sentry_msg_monitor.mod b/config/tasks/sentry_msg_monitor.mod +index c7dabce..4847255 100644 +--- a/config/tasks/sentry_msg_monitor.mod ++++ b/config/tasks/sentry_msg_monitor.mod +@@ -6,4 +6,4 @@ type=period + interval=10 + onstart=yes + env_file=/etc/sysconfig/sentry_msg_monitor.env +-conflict=up +\ No newline at end of file ++conflict=up +diff --git a/config/tasks/sentry_urma_comm.mod b/config/tasks/sentry_urma_comm.mod +new file mode 100644 +index 0000000..ad775a5 +--- /dev/null ++++ b/config/tasks/sentry_urma_comm.mod +@@ -0,0 +1,6 @@ ++[common] ++enabled=yes ++task_start=modprobe sentry_urma_comm ++task_stop=rmmod sentry_urma_comm ++type=oneshot ++onstart=no +diff --git a/src/libs/libxalarm/register_xalarm.h b/src/libs/libxalarm/register_xalarm.h +index 80fd635..cbb21b7 100644 +--- a/src/libs/libxalarm/register_xalarm.h ++++ b/src/libs/libxalarm/register_xalarm.h +@@ -20,12 +20,16 @@ + #define MEMORY_ALARM_ID 1001 + + #define ALARM_REBOOT_EVENT 1003 +-#define ALARM_REBOOT_ACK_EVENT 1004 +-#define ALARM_OOM_EVENT 1005 +-#define ALARM_OOM_ACK_EVENT 1006 +- +-#define MINOR_ALM 1 +-#define MAJOR_ALM 2 ++#define ALARM_REBOOT_ACK_EVENT 1004 ++#define ALARM_OOM_EVENT 1005 ++#define ALARM_OOM_ACK_EVENT 1006 ++#define ALARM_PANIC_EVENT 1007 ++#define ALARM_PANIC_ACK_EVENT 1008 ++#define ALARM_KERNEL_REBOOT_EVENT 1009 ++#define ALARM_KERNEL_REBOOT_ACK_EVENT 1010 ++ ++#define MINOR_ALM 1 ++#define MAJOR_ALM 2 + #define CRITICAL_ALM 3 + + #define ALARM_TYPE_OCCUR 1 +diff --git a/src/sentryPlugins/sentry_msg_monitor/Makefile b/src/sentryPlugins/sentry_msg_monitor/Makefile +index dbc2a57..2f423b2 100644 +--- a/src/sentryPlugins/sentry_msg_monitor/Makefile ++++ b/src/sentryPlugins/sentry_msg_monitor/Makefile +@@ -1,7 +1,21 @@ ++# Copyright (c) 2025 Huawei Technologies Co., Ltd. ++# sysSentry is licensed under the Mulan PSL v2. ++# You can use this software according to the terms and conditions of the Mulan PSL v2. ++# You may obtain a copy of Mulan PSL v2 at: ++# http://license.coscl.org.cn/MulanPSL2 ++# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR ++# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR ++# PURPOSE. ++# See the Mulan PSL v2 for more details. ++ ++# Description: build file for sentry_msg_monitor ++# Author: Luckky ++# Create: 2025-02-18 ++ + all: sentry_msg_monitor + + sentry_msg_monitor: +- gcc sentry_msg_monitor.c -Wall -o3 -I../../libsentry/c/log -L../../libsentry/c/log/build -I../../libs/libxalarm -L../../libs/build/libxalarm -lxalarm -lsentry_log -ljson-c -pthread -o sentry_msg_monitor ++ gcc sentry_msg_monitor.c -fstack-protector-strong -Wall -D_FORTIFY_SOURCE=2 -O2 -g -Wl,-z,relro -Wl,-z,now -fPIE -pie -I../../libsentry/c/log -L../../libsentry/c/log/build -I../../libs/libxalarm -L../../libs/build/libxalarm -lxalarm -lsentry_log -ljson-c -pthread -o sentry_msg_monitor + + clean: + rm -f sentry_msg_monitor +diff --git a/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c b/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c +index efab827..bfe9356 100644 +--- a/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c ++++ b/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c +@@ -1,3 +1,19 @@ ++/* ++ * Copyright (c) 2025 Huawei Technologies Co., Ltd. ++ * sysSentry is licensed under the Mulan PSL v2. ++ * You can use this software according to the terms and conditions of the Mulan PSL v2. ++ * You may obtain a copy of Mulan PSL v2 at: ++ * http://license.coscl.org.cn/MulanPSL2 ++ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR ++ * PURPOSE. ++ * See the Mulan PSL v2 for more details. ++ ++ * Description: sentry msg monitor ++ * Author: Luckky ++ * Create: 2025-02-18 ++ */ ++ + #include + #include + #include +@@ -20,7 +36,8 @@ + #define DEFAULT_LOG_LEVEL LOG_INFO + #define MAX_RETRY_NUM 3 + #define RETRY_PERIOD 1 +-#define XALARM_MSG_ITEM_CNT 2 // msgid_res ++#define XALARM_GENERAL_MSG_ITEM_CNT 2 // msgid_res ++#define XALARM_PANIC_MSG_ITEM_CNT 4 // msgid_{cna:cna,eid:eid}_res + struct receiver_cleanup_data { + struct alarm_msg *al_msg; + struct alarm_register* register_info; +@@ -82,93 +99,152 @@ static int release_pid_file(int fd) + + static int smh_dev_get_fd(void) + { +- int smh_dev_fd; ++ int smh_dev_fd; + smh_dev_fd = open(SMH_DEV_PATH, O_RDWR); + if (smh_dev_fd < 0) { + logging_error("Failed to open smh_dev_fd for %s.\n", SMH_DEV_PATH); + } + +- return smh_dev_fd; ++ return smh_dev_fd; + } + +-static int convert_smh_msg_to_str(struct sentry_msg_helper_msg* smh_msg, char* str) ++static int convert_power_off_smh_smg_to_str(const struct sentry_msg_helper_msg* smh_msg, char* str) ++{ ++ int res; ++ res = snprintf(str, MSG_STR_MAX_LEN, "%lu", smh_msg->msgid); ++ if ((size_t)res >= MSG_STR_MAX_LEN) { ++ logging_warn("msg str size exceeds the max value\n"); ++ return -1; ++ } ++ return 0; ++} ++ ++static int convert_oom_smh_smg_to_str(const struct sentry_msg_helper_msg* smh_msg, char* str) + { + int res; +- char *nid_str = NULL; + size_t offset = 0; +- switch (smh_msg->type) { +- case SMH_MESSAGE_POWER_OFF: +- res = snprintf(str, MSG_STR_MAX_LEN, "%lu", smh_msg->msgid); +- if ((size_t)res >= MSG_STR_MAX_LEN) { +- logging_warn("msg str size exceeds the max value\n"); +- return -1; +- } +- break; +- case SMH_MESSAGE_OOM: +- nid_str = (char *) calloc (MSG_STR_MAX_LEN, sizeof(char)); +- if (!nid_str) { +- logging_error("Failed to allocate memory!"); +- return -1; +- } +- for (int i = 0; i < MAX_NUMA_NODES; i++) { +- res = snprintf(nid_str + offset, MSG_STR_MAX_LEN - offset, "%d%s", +- smh_msg->oom_info.nid[i], (i < MAX_NUMA_NODES - 1) ? "," : ""); +- if ((size_t)res >= MSG_STR_MAX_LEN) { +- logging_warn("msg str size exceeds the max value\n"); +- free(nid_str); +- nid_str = NULL; +- return -1; +- } +- offset += res; +- } +- res = snprintf(str, MSG_STR_MAX_LEN, +- "%lu_{nr_nid:%d,nid:[%s],sync:%d,timeout:%d,reason:%d}", +- smh_msg->msgid, +- smh_msg->oom_info.nr_nid, +- nid_str, +- smh_msg->oom_info.sync, +- smh_msg->oom_info.timeout, +- smh_msg->oom_info.reason +- ); +- free(nid_str); +- nid_str = NULL; ++ ++ char *nid_str = (char *) calloc (MSG_STR_MAX_LEN, sizeof(char)); ++ if (!nid_str) { ++ logging_error("Failed to allocate memory!"); ++ return -1; ++ } ++ for (int i = 0; i < OOM_EVENT_MAX_NUMA_NODES ; i++) { ++ res = snprintf(nid_str + offset, MSG_STR_MAX_LEN - offset, "%d%s", ++ smh_msg->helper_msg_info.oom_info.nid[i], ++ (i < OOM_EVENT_MAX_NUMA_NODES - 1) ? "," : ""); + if ((size_t)res >= MSG_STR_MAX_LEN) { + logging_warn("msg str size exceeds the max value\n"); ++ free(nid_str); ++ nid_str = NULL; + return -1; + } +- break; +- default: +- logging_warn("Unknown msg type: %d\n", smh_msg->type); ++ offset += res; ++ } ++ res = snprintf(str, MSG_STR_MAX_LEN, ++ "%lu_{nr_nid:%d,nid:[%s],sync:%d,timeout:%d,reason:%d}", ++ smh_msg->msgid, ++ smh_msg->helper_msg_info.oom_info.nr_nid, ++ nid_str, ++ smh_msg->helper_msg_info.oom_info.sync, ++ smh_msg->helper_msg_info.oom_info.timeout, ++ smh_msg->helper_msg_info.oom_info.reason); ++ free(nid_str); ++ nid_str = NULL; ++ if ((size_t)res >= MSG_STR_MAX_LEN) { ++ logging_warn("msg str size exceeds the max value\n"); + return -1; + } + return 0; + } + +-static int convert_str_to_smh_msg(char* str, struct sentry_msg_helper_msg* smh_msg) ++static int convert_remote_smh_smg_to_str(const struct sentry_msg_helper_msg* smh_msg, char* str) + { +- int n; +- if (!(sscanf(str, "%lu_%lu%n", &(smh_msg->msgid), &(smh_msg->res), &n) == XALARM_MSG_ITEM_CNT) +- || strlen(str) != n) { +- logging_warn("Invalid msg str format, str is %s\n", str); ++ int res = snprintf(str, MSG_STR_MAX_LEN, "%lu_{cna:%u,eid:%s}", ++ smh_msg->msgid, ++ smh_msg->helper_msg_info.remote_info.cna, ++ smh_msg->helper_msg_info.remote_info.eid); ++ if ((size_t)res >= MSG_STR_MAX_LEN) { ++ logging_warn("msg str size exceeds the max value\n"); + return -1; + } + return 0; + } + ++static int convert_smh_msg_to_str(struct sentry_msg_helper_msg* smh_msg, char* str) ++{ ++ int res; ++ switch (smh_msg->type) { ++ case SMH_MESSAGE_POWER_OFF: ++ res = convert_power_off_smh_smg_to_str(smh_msg, str); ++ break; ++ case SMH_MESSAGE_OOM: ++ res = convert_oom_smh_smg_to_str(smh_msg, str); ++ break; ++ case SMH_MESSAGE_PANIC: ++ case SMH_MESSAGE_KERNEL_REBOOT: ++ res = convert_remote_smh_smg_to_str(smh_msg, str); ++ break; ++ default: ++ logging_warn("Unknown msg type: %d\n", smh_msg->type); ++ return -1; ++ } ++ return res; ++} ++ ++static int convert_str_to_smh_msg(struct alarm_msg *al_msg, struct sentry_msg_helper_msg* smh_msg) ++{ ++ int n, ret = 0; ++ unsigned short alarm_ack_type = al_msg->usAlarmId; ++ switch (alarm_ack_type) { ++ case ALARM_REBOOT_ACK_EVENT: ++ case ALARM_OOM_ACK_EVENT: ++ if (!(sscanf(al_msg->pucParas, "%lu_%lu%n", ++ &(smh_msg->msgid), ++ &(smh_msg->res), ++ &n) == XALARM_GENERAL_MSG_ITEM_CNT) || strlen(al_msg->pucParas) != n) { ++ logging_warn("Invalid msg str format, str is %s\n", al_msg->pucParas); ++ ret = -1; ++ } ++ break; ++ case ALARM_PANIC_ACK_EVENT: ++ case ALARM_KERNEL_REBOOT_ACK_EVENT: ++ if (!(sscanf(al_msg->pucParas, "%lu_{cna:%u,eid:%39[^}]}_%lu%n", ++ &(smh_msg->msgid), ++ &(smh_msg->helper_msg_info.remote_info.cna), ++ smh_msg->helper_msg_info.remote_info.eid, ++ &(smh_msg->res), ++ &n) == XALARM_PANIC_MSG_ITEM_CNT) || strlen(al_msg->pucParas) != n) { ++ logging_warn("Invalid msg str format, str is %s\n", al_msg->pucParas); ++ ret = -1; ++ } ++ break; ++ default: ++ ret = -1; ++ logging_warn("Unknown ack event type: %d\n", alarm_ack_type); ++ } ++ return ret; ++} ++ + static unsigned short convert_msg_type_to_xalarm_type(enum sentry_msg_helper_msg_type msg_type) + { + unsigned short xalarm_type = 0; +- switch (msg_type) +- { +- case SMH_MESSAGE_POWER_OFF: +- xalarm_type = ALARM_REBOOT_EVENT; +- break; +- case SMH_MESSAGE_OOM: +- xalarm_type = ALARM_OOM_EVENT; +- break; +- default: +- logging_warn("Unknown msg type: %d\n", msg_type); +- break; ++ switch (msg_type) { ++ case SMH_MESSAGE_POWER_OFF: ++ xalarm_type = ALARM_REBOOT_EVENT; ++ break; ++ case SMH_MESSAGE_OOM: ++ xalarm_type = ALARM_OOM_EVENT; ++ break; ++ case SMH_MESSAGE_PANIC: ++ xalarm_type = ALARM_PANIC_EVENT; ++ break; ++ case SMH_MESSAGE_KERNEL_REBOOT: ++ xalarm_type = ALARM_KERNEL_REBOOT_EVENT; ++ break; ++ default: ++ logging_warn("Unknown msg type: %d\n", msg_type); ++ break; + } + return xalarm_type; + } +@@ -183,8 +259,9 @@ static void sender_cleanup(void* arg) + logging_info("Sender thread cleanup over\n"); + } + +-static void* sender_thread(void* arg) { +- int ret; ++static void* sender_thread(void* arg) ++{ ++ int ret, retry_num; + int fd = smh_dev_get_fd(); + if (fd < 0) { + goto close_recv; +@@ -221,22 +298,26 @@ static void* sender_thread(void* arg) { + if (ret < 0) { + continue; + } +- unsigned short al_type = convert_msg_type_to_xalarm_type(smh_msg.type); +- if (al_type == 0) { ++ unsigned short alarm_type = convert_msg_type_to_xalarm_type(smh_msg.type); ++ if (alarm_type == 0) { + logging_warn("Send msg to xalarmd failed: Get unknown type msg, skip it\n"); + continue; + } ++ ++ retry_num = 0; + for (int i = 0; i < MAX_RETRY_NUM; i++) { +- ret = xalarm_report_event(al_type, str); ++ ret = xalarm_report_event(alarm_type, str); + if (ret == 0) { +- logging_info("Send msg success: al_type: %d, str: %s\n", al_type, str); ++ logging_info("Send msg success: alarm_type: %d, str: %s\n", alarm_type, str); + break; + } + if (ret == -EINVAL) { + logging_warn("Send msg to xalarmd failed: (%d) Invalid input value, skip it\n", ret); + break; + } else if (ret == -ENOTCONN || ret == -ECOMM || ret == -ENODEV) { +- logging_warn("Send msg to xalarmd failed: (%d) Bad socket conn, try again\n", ret); ++ retry_num++; ++ logging_warn("Send msg to xalarmd failed: (%d) Bad socket conn, start the %dth retry in %d seconds.\n", ++ ret, retry_num, RETRY_PERIOD); + sleep(RETRY_PERIOD); + } else if (ret < 0) { + logging_warn("xalarm_report_event return %d\n", ret); +@@ -244,7 +325,7 @@ static void* sender_thread(void* arg) { + } + } + if (ret == -ENOTCONN || ret == -ECOMM) { +- logging_warn("Send msg to xalarmd failed: (%d) Bad socket conn, skip it\n", ret); ++ logging_warn("Send msg to xalarmd failed after %d retries: Bad socket conn, skip it.\n", retry_num); + } + } + +@@ -254,8 +335,9 @@ sender_err: + str = NULL; + close_recv: + partner_t = *(pthread_t*)arg; +- if (partner_t) ++ if (partner_t) { + pthread_cancel(partner_t); ++ } + logging_error("Sender thread exited unexpectedly\n"); + pthread_cleanup_pop(0); + return NULL; +@@ -274,8 +356,9 @@ static void receiver_cleanup(void* arg) + logging_info("Receiver thread cleanup over\n"); + } + +-static void* receiver_thread(void* arg) { +- int ret, fd; ++static void* receiver_thread(void* arg) ++{ ++ int ret, fd, retry_num; + struct alarm_msg *al_msg; + struct sentry_msg_helper_msg smh_msg; + pthread_t partner_t; +@@ -299,13 +382,18 @@ re_register: + }; + id_filter.id_list[0] = ALARM_REBOOT_ACK_EVENT; + id_filter.id_list[1] = ALARM_OOM_ACK_EVENT; ++ id_filter.id_list[2] = ALARM_PANIC_ACK_EVENT; ++ id_filter.id_list[3] = ALARM_KERNEL_REBOOT_ACK_EVENT; + ++ retry_num = 0; + for (int i = 0; i < MAX_RETRY_NUM; i++) { + ret = xalarm_register_event(®ister_info, id_filter); +- if (ret == 0) ++ if (ret == 0) { + break; ++ } + if (ret == -ENOTCONN) { +- logging_warn("Failed to register xalarm, try to re-register again\n"); ++ retry_num++; ++ logging_warn("Failed to register xalarm, start the %dth retry in %d seconds.\n", retry_num, RETRY_PERIOD); + sleep(RETRY_PERIOD); + } else { + logging_error("xalarm_register_event return %d\n", ret); +@@ -313,7 +401,8 @@ re_register: + } + } + if (ret == -ENOTCONN) { +- logging_error("Failed to register xalarm: (%d) bad connection\n", ret); ++ logging_error("Failed to register xalarm after %d retries: bad connection, " ++ "enter the error handling process\n", retry_num); + goto receiver_err; + } + +@@ -333,24 +422,28 @@ re_register: + logging_error("xalarm_get_event return %d\n", ret); + goto un_register; + } else { +- logging_info("Get msg: al_type: %d, str: %s\n", al_msg->usAlarmId, al_msg->pucParas); ++ logging_info("Get msg: alarm_type: %d, str: %s\n", al_msg->usAlarmId, al_msg->pucParas); + } + +- ret = convert_str_to_smh_msg(al_msg->pucParas, &smh_msg); ++ ret = convert_str_to_smh_msg(al_msg, &smh_msg); + if (ret < 0) { + logging_warn("Convert str failed: Bad format '%s', skip it\n", al_msg->pucParas); + continue; + } ++ retry_num = 0; + for (int i = 0; i < MAX_RETRY_NUM; i++) { + errno = 0; + ret = ioctl(fd, SMH_MSG_ACK, &smh_msg); +- if (ret == 0) ++ if (ret == 0) { + break; ++ } + if (errno == ERESTART || errno == ETIME || errno == ENOENT) { + logging_warn("Ack to kernel failed: ioctl return %d, skip it\n", errno); + break; + } else if (errno == EFAULT) { +- logging_warn("Ack to kernel failed: (%d) Copy from user failed, try again\n", errno); ++ retry_num++; ++ logging_warn("Ack to kernel failed: (%d) Copy from user failed, start the %dth retry in %d seconds.\n", ++ errno, retry_num, RETRY_PERIOD); + sleep(RETRY_PERIOD); + } else if (ret < 0) { + logging_error("Ack to kernel failed: ioctl return %d\n", errno); +@@ -358,7 +451,7 @@ re_register: + } + } + if (errno == EFAULT) { +- logging_warn("Ack to kernel failed: (%d) Copy from user failed, skip it\n", errno); ++ logging_warn("Ack to kernel failed after %d retries: Copy from user failed, skip it\n", retry_num); + } + } + +@@ -369,8 +462,9 @@ receiver_err: + close(fd); + close_send: + partner_t = *(pthread_t*)arg; +- if (partner_t) ++ if (partner_t) { + pthread_cancel(partner_t); ++ } + logging_error("Receiver thread exited unexpectedly\n"); + pthread_cleanup_pop(0); + return NULL; +@@ -391,8 +485,8 @@ int main() + logging_error("Failed to create sender thread"); + goto err_release; + } +- ret = pthread_create(&receiver, NULL, receiver_thread, &sender); + ++ ret = pthread_create(&receiver, NULL, receiver_thread, &sender); + if (ret) { + logging_error("Failed to create receiver thread"); + pthread_cancel(sender); +diff --git a/src/sentryPlugins/sentry_msg_monitor/smh_common_type.h b/src/sentryPlugins/sentry_msg_monitor/smh_common_type.h +index bd01556..0ccbb12 100644 +--- a/src/sentryPlugins/sentry_msg_monitor/smh_common_type.h ++++ b/src/sentryPlugins/sentry_msg_monitor/smh_common_type.h +@@ -1,35 +1,66 @@ ++/* ++ * Copyright (c) 2025 Huawei Technologies Co., Ltd. ++ * sysSentry is licensed under the Mulan PSL v2. ++ * You can use this software according to the terms and conditions of the Mulan PSL v2. ++ * You may obtain a copy of Mulan PSL v2 at: ++ * http://license.coscl.org.cn/MulanPSL2 ++ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR ++ * PURPOSE. ++ * See the Mulan PSL v2 for more details. ++ ++ * Description: header file for sentry msg monitor ++ * Author: Luckky ++ * Create: 2025-02-18 ++*/ ++ + #ifndef SMH_COMMON_TYPE_H + #define SMH_COMMON_TYPE_H + + #include + + #define SMH_TYPE ('}') +-#define MAX_NUMA_NODES 8 ++#define OOM_EVENT_MAX_NUMA_NODES 8 ++#define EID_MAX_LEN 40 // eid str len 39 + '\0' + + enum { +- SMH_CMD_MSG_ACK = 0x10, ++ SMH_CMD_MSG_ACK = 0x10, + }; + + #define SMH_MSG_ACK _IO(SMH_TYPE, SMH_CMD_MSG_ACK) + + enum sentry_msg_helper_msg_type { +- SMH_MESSAGE_POWER_OFF, +- SMH_MESSAGE_OOM, +- SMH_MESSAGE_MAX, ++ SMH_MESSAGE_POWER_OFF, ++ SMH_MESSAGE_OOM, ++ SMH_MESSAGE_PANIC, ++ SMH_MESSAGE_KERNEL_REBOOT, ++ SMH_MESSAGE_MAX, ++ // Add ACK events HERE (below SMH_MESSAGE_MAX) ++ SMH_MESSAGE_PANIC_ACK, ++ SMH_MESSAGE_KERNEL_REBOOT_ACK, ++ SMH_MESSAGE_UNKNOWN, + }; + + struct sentry_msg_helper_msg { +- enum sentry_msg_helper_msg_type type; +- uint64_t msgid; +- // reboot_info is empty +- struct { +- int nr_nid; +- int nid[MAX_NUMA_NODES]; +- int sync; +- int timeout; +- int reason; +- } oom_info; +- unsigned long res; ++ enum sentry_msg_helper_msg_type type; ++ uint64_t msgid; ++ uint64_t start_send_time; ++ uint64_t timeout_time; ++ // reboot_info is empty ++ union { ++ struct { ++ int nr_nid; ++ int nid[OOM_EVENT_MAX_NUMA_NODES]; ++ int sync; ++ int timeout; ++ int reason; ++ } oom_info; ++ struct { ++ uint32_t cna; ++ char eid[EID_MAX_LEN]; ++ } remote_info; ++ } helper_msg_info; ++ unsigned long res; + }; + + #endif +diff --git a/src/services/syssentry/mod_status.py b/src/services/syssentry/mod_status.py +index 78d7262..574493a 100644 +--- a/src/services/syssentry/mod_status.py ++++ b/src/services/syssentry/mod_status.py +@@ -28,6 +28,7 @@ FAILED_STATUS = "FAILED" + WAITING_STATUS = "WAITING" + + ++ + def set_task_status(task_name, status_code): + """set task status""" + task_type = TasksMap.get_task_type(task_name) +diff --git a/src/services/syssentry/sentryctl b/src/services/syssentry/sentryctl +index c2e3cef..d94a044 100644 +--- a/src/services/syssentry/sentryctl ++++ b/src/services/syssentry/sentryctl +@@ -23,11 +23,92 @@ import json + CTL_SOCKET_PATH = "/var/run/sysSentry/control.sock" + MAX_PARAM_LENGTH = 256 + ++MAX_DIE_NUM = 2 ++MAX_UVB_CNA_STR_LEN = 22 ++MAX_URMA_EID_LENGTH = 39 ++MAX_NODES = 32 ++MIN_PANIC_TIMEOUT_MS = 0 ++MAX_PANIC_TIMEOUT_MS = 3600000 ++MIN_KERNEL_REBOOT_TIMEOUT_MS = 0 ++MAX_KERNEL_REBOOT_TIMEOUT_MS = 3600000 ++MIN_CLIENT_JETTY_ID = 3 ++MAX_CLIENT_JETTY_ID = 1023 ++ + RESULT_MSG_DATA_LEN = 4 + CTL_MSG_LEN_LEN = 3 + ALARM_MSG_DATA_LEN = 6 + DEFAULT_ALARM_TIME_RANGE = 10 + ++def write_proc_file(proc_dir, proc_name, proc_value): ++ """ ++ Don't use 'shell=True' for subprocess.run/subprocess.Popen, it's not safe. However, if 'shell=true' ++ is not set, it is difficult to modify the proc file for subprocess.run/subprocess.Popen. ++ """ ++ exit_code = 0 ++ try: ++ with open("/proc/%s/%s" % (proc_dir, proc_name), mode="w") as f: ++ f.write(str(proc_value) + "\n") ++ except PermissionError as e: ++ exit_code = -e.errno ++ print("sentryctl: error: set %s failed for %s, the user does not have the permission!" % (proc_dir, proc_name)) ++ except FileNotFoundError as e: ++ exit_code = -e.errno ++ print("sentryctl: error: set %s failed for %s, the proc file does not exist!" % (proc_dir, proc_name)) ++ except Exception as e: ++ exit_code = getattr(e, 'errno', -1) ++ exit_code = -exit_code if exit_code > 0 else -1 ++ print("sentryctl: error: set %s failed for %s" % (proc_dir, proc_name)) ++ finally: ++ return exit_code ++ ++def set_remote_reporter_proc(proc_name, proc_value): ++ return write_proc_file("sentry_remote_reporter", proc_name, proc_value) ++ ++def set_urma_heartbeat(proc_value): ++ return write_proc_file("sentry_urma_comm", "heartbeat", proc_value) ++ ++def set_uvb_proc(server_cna_str): ++ if len(server_cna_str.strip()) == 0: ++ print("Invalid args for server_cna") ++ sys.exit(-1) ++ server_cna_list = server_cna_str.strip().split(";") ++ if len(server_cna_list) > MAX_NODES: ++ print("Exceeded the maximum number (%d) of nodes supported." % MAX_NODES) ++ sys.exit(-1) ++ for cna in server_cna_list: ++ if len(cna.strip()) == 0: ++ print("Find invalid cna (%s) for server_cna" % cna) ++ sys.exit(-1) ++ if len(cna.strip()) > MAX_UVB_CNA_STR_LEN: ++ print("Invalid cna (%s) for server_cna" % cna) ++ sys.exit(-1) ++ server_cna_string =";".join(server_cna_list) ++ return write_proc_file("sentry_uvb_comm", "server_cna", server_cna_string) ++ ++def set_urma_proc(server_eid, client_jetty_id): ++ if len(server_eid.strip()) == 0: ++ print("Invalid args for server_eid, server_eid is empty string") ++ sys.exit(-1) ++ if client_jetty_id < MIN_CLIENT_JETTY_ID or client_jetty_id > MAX_CLIENT_JETTY_ID: ++ print("Invalid args for client_jetty_id") ++ sys.exit(-1) ++ ++ server_eid_list = server_eid.strip().split(";") ++ if len(server_eid_list) > MAX_DIE_NUM: ++ print("Invalid args for server_eid, server_eid contains an extra semicolon.") ++ sys.exit(-1) ++ for server_eid_i in server_eid_list: ++ if len(server_eid_i) == 0: ++ print("Invalid args for server_eid, server_eid is empty string") ++ sys.exit(-1) ++ server_eid_list_i = server_eid_i.split(",") ++ for eid_i in server_eid_list_i: ++ if len(eid_i.strip()) != MAX_URMA_EID_LENGTH: ++ print("Invalid args for server_eid, the length of the eid ({}) does not equal {}.".format(eid_i, MAX_URMA_EID_LENGTH)) ++ sys.exit(-1) ++ write_urma_info = " ".join((server_eid, str(client_jetty_id))) ++ return write_proc_file("sentry_urma_comm", "client_info", write_urma_info) ++ + def status_output_format(res_data): + """format output""" + print(f"status: {res_data}") +@@ -140,8 +221,34 @@ if __name__ == '__main__': + parser_get_alarm.add_argument('-d', '--detailed', action='store_true', help='Print Detailed Information') + parser_list = subparsers.add_parser('list', help='show all loaded task mod') + +- client_args = parser.parse_args() ++ set_cmd_args_plugins_info = { ++ "sentry_remote_reporter": [ ++ {"name" : "cna", "type": int, "choices": None, "required" : False, "help": "set cna info, it is uint32_t integer"}, ++ {"name" : "eid", "type": str, "choices": None, "required" : False, "help": "set local eid info"}, ++ {"name" : "uvb_comm", "type": str, "choices": ["on", "off"], "required" : False, "help": "Select the UVB communication mode"}, ++ {"name" : "urma_comm", "type": str, "choices": ["on", "off"], "required" : False, "help": "Select the URMA communication mode"}, ++ {"name" : "panic", "type": str, "choices": ["on", "off"], "required" : False, "help": "Panic event control switch"}, ++ {"name" : "kernel_reboot", "type": str, "choices": ["on", "off"], "required" : False, "help": "Kernel reboot event control switch"}, ++ {"name" : "panic_timeout_ms", "type": int, "choices": None, "required" : False, "help": "set panic timeout, value range is [%d, %d]" %(MIN_PANIC_TIMEOUT_MS, MAX_PANIC_TIMEOUT_MS)}, ++ {"name" : "kernel_reboot_timeout_ms", "type": int, "choices": None, "required" : False, "help": "set kernel reboot timeout, value range is [%d, %d]" %(MIN_KERNEL_REBOOT_TIMEOUT_MS, MAX_KERNEL_REBOOT_TIMEOUT_MS)}, ++ ], ++ "sentry_urma_comm" : [ ++ {"name" : "server_eid", "type": str, "choices": None, "required" : False, "help": "Info about other nodes to be connected to the current node"}, ++ {"name" : "client_jetty_id", "type": int, "choices": None, "required" : False, "help": "jetty id of the current node, value range is [%d, %d]" %(MIN_CLIENT_JETTY_ID, MAX_CLIENT_JETTY_ID)}, ++ {"name" : "heartbeat", "type": str, "choices": ["on", "off"], "required" : False, "help": "Heartbeat detection function switch, default off"}, ++ ], ++ "sentry_uvb_comm" : [ ++ {"name": "server_cna", "type": str, "choices": None, "required" : True, "help": "server cna array"}, ++ ], ++ } ++ parser_set = subparsers.add_parser('set', help='set plugins params') ++ parsers_set_plugin_param = parser_set.add_subparsers(dest="set_task") ++ for plugin_name, args_info in set_cmd_args_plugins_info.items(): ++ parser_set_plugin_param = parsers_set_plugin_param.add_parser(plugin_name, help="set task args for %s" % plugin_name) ++ for arg in args_info: ++ parser_set_plugin_param.add_argument("--" + arg["name"], type = arg["type"], choices = arg["choices"], required = arg["required"], help = arg["help"]) + ++ client_args = parser.parse_args() + if client_args.cmd_type == 'list': + req_msg_struct = {"type": "mod_list", "data":""} + elif client_args.cmd_type == 'start': +@@ -165,6 +272,55 @@ if __name__ == '__main__': + } + elif client_args.cmd_type == 'reload': + req_msg_struct = {"type": "reload", "data": client_args.task_name} ++ elif client_args.cmd_type == 'set': ++ ret_code = 0 ++ if client_args.set_task == "sentry_remote_reporter": ++ if client_args.cna != None: ++ if client_args.cna < 0: ++ print("cna should be a number not less than 0.") ++ ret_code = -1 ++ else: ++ ret_code += set_remote_reporter_proc("cna", client_args.cna) ++ if client_args.eid != None: ++ num_of_semicolons = client_args.eid.count(";") ++ if num_of_semicolons > 1: ++ print("invalid value for extraneous semicolon.") ++ sys.exit(-1) ++ eid_list = client_args.eid.split(";") ++ for eid_i in eid_list: ++ if len(eid_i) != MAX_URMA_EID_LENGTH: ++ print("The length of eid must be {}, but the detected input length is {}.".format(MAX_URMA_EID_LENGTH, len(eid_i))) ++ sys.exit(-1) ++ ret_code += set_remote_reporter_proc("eid", client_args.eid) ++ if client_args.uvb_comm != None or client_args.urma_comm != None: ++ if client_args.urma_comm == "on": ++ ret_code += set_remote_reporter_proc("urma_comm", client_args.urma_comm) ++ if client_args.uvb_comm == "on": ++ ret_code += set_remote_reporter_proc("uvb_comm", client_args.uvb_comm) ++ if client_args.urma_comm == "off": ++ ret_code += set_remote_reporter_proc("urma_comm", client_args.urma_comm) ++ if client_args.uvb_comm == "off": ++ ret_code += set_remote_reporter_proc("uvb_comm", client_args.uvb_comm) ++ if client_args.panic != None: ++ ret_code += set_remote_reporter_proc("panic", client_args.panic) ++ if client_args.panic_timeout_ms != None: ++ ret_code += set_remote_reporter_proc("panic_timeout", client_args.panic_timeout_ms) ++ if client_args.kernel_reboot != None: ++ ret_code += set_remote_reporter_proc("kernel_reboot", client_args.kernel_reboot) ++ if client_args.kernel_reboot_timeout_ms != None: ++ ret_code += set_remote_reporter_proc("kernel_reboot_timeout", client_args.kernel_reboot_timeout_ms) ++ elif client_args.set_task == "sentry_urma_comm": ++ if client_args.server_eid != None and client_args.client_jetty_id: ++ ret_code += set_urma_proc(client_args.server_eid, client_args.client_jetty_id) ++ elif client_args.server_eid != None or client_args.client_jetty_id: ++ print("Options --server_eid and --client_jetty_id need to be used together") ++ ret_code += -1 ++ if client_args.heartbeat != None: ++ ret_code += set_urma_heartbeat(client_args.heartbeat) ++ elif client_args.set_task == "sentry_uvb_comm": ++ ret_code += set_uvb_proc(client_args.server_cna) ++ ++ sys.exit(ret_code) + else: + parser.print_help() + sys.exit(-1) +diff --git a/src/services/syssentry/syssentry.py b/src/services/syssentry/syssentry.py +index 32707d1..eab8033 100644 +--- a/src/services/syssentry/syssentry.py ++++ b/src/services/syssentry/syssentry.py +@@ -645,7 +645,7 @@ def main(): + logging.error("get pid file lock failed, exist") + sys.exit(17) + +- clientId = -1 ++ client_id = -1 + try: + signal.signal(signal.SIGINT, sig_handler) + signal.signal(signal.SIGTERM, sig_handler) +@@ -657,14 +657,14 @@ def main(): + _ = SentryConfig.init_param() + TasksMap.init_task_map() + load_tasks() +- clientId = alarm_register() ++ client_id = alarm_register() + main_loop() + + except Exception: + pass + finally: +- if clientId != -1: +- xalarm_unregister(clientId) ++ if client_id != -1: ++ xalarm_unregister(client_id) + clean_child() + release_pidfile() + remove_sock_file() +-- +2.27.0 + diff --git a/report-power-off-result-to-BMC.patch b/report-power-off-result-to-BMC.patch new file mode 100644 index 0000000000000000000000000000000000000000..e9f82fed56067abeeba69563b9831e0b245e6f61 --- /dev/null +++ b/report-power-off-result-to-BMC.patch @@ -0,0 +1,302 @@ +From a6211c4857efc78504eb00ae955385352c5d56cd Mon Sep 17 00:00:00 2001 +From: shixuantong +Date: Thu, 27 Nov 2025 11:29:40 +0800 +Subject: [PATCH 15/16] report power off result to BMC + +Feature issue: +https://gitee.com/openeuler/release-management/issues/ID45WQ +--- + src/sentryPlugins/sentry_msg_monitor/Makefile | 2 +- + .../sentry_msg_monitor/bmc_log_lib.c | 170 ++++++++++++++++++ + .../sentry_msg_monitor/bmc_log_lib.h | 35 ++++ + .../sentry_msg_monitor/sentry_msg_monitor.c | 11 +- + 4 files changed, 215 insertions(+), 3 deletions(-) + create mode 100644 src/sentryPlugins/sentry_msg_monitor/bmc_log_lib.c + create mode 100644 src/sentryPlugins/sentry_msg_monitor/bmc_log_lib.h + +diff --git a/src/sentryPlugins/sentry_msg_monitor/Makefile b/src/sentryPlugins/sentry_msg_monitor/Makefile +index 82e0d6e..a0a6de1 100644 +--- a/src/sentryPlugins/sentry_msg_monitor/Makefile ++++ b/src/sentryPlugins/sentry_msg_monitor/Makefile +@@ -15,7 +15,7 @@ + all: sentry_msg_monitor + + sentry_msg_monitor: +- gcc sentry_msg_monitor.c ub_fault_lib.c -fstack-protector-strong -Wall -D_FORTIFY_SOURCE=2 -O2 -g -Wl,-z,relro -Wl,-z,now -fPIE -pie -I../../libsentry/c/log -L../../libsentry/c/log/build -I../../libs/libxalarm -L../../libs/build/libxalarm -lobmm -lxalarm -lsentry_log -ljson-c -pthread -o sentry_msg_monitor ++ gcc sentry_msg_monitor.c ub_fault_lib.c bmc_log_lib.c -fstack-protector-strong -Wall -D_FORTIFY_SOURCE=2 -O2 -g -Wl,-z,relro -Wl,-z,now -fPIE -pie -I../../libsentry/c/log -L../../libsentry/c/log/build -I../../libs/libxalarm -L../../libs/build/libxalarm -lobmm -lxalarm -lsentry_log -ljson-c -pthread -o sentry_msg_monitor + + clean: + rm -f sentry_msg_monitor +diff --git a/src/sentryPlugins/sentry_msg_monitor/bmc_log_lib.c b/src/sentryPlugins/sentry_msg_monitor/bmc_log_lib.c +new file mode 100644 +index 0000000..04fbd62 +--- /dev/null ++++ b/src/sentryPlugins/sentry_msg_monitor/bmc_log_lib.c +@@ -0,0 +1,170 @@ ++/* ++ * Copyright (c) 2025 Huawei Technologies Co., Ltd. ++ * sysSentry is licensed under the Mulan PSL v2. ++ * You can use this software according to the terms and conditions of the Mulan PSL v2. ++ * You may obtain a copy of Mulan PSL v2 at: ++ * http://license.coscl.org.cn/MulanPSL2 ++ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR ++ * PURPOSE. ++ * See the Mulan PSL v2 for more details. ++ ++ * Description: bmc log lib ++ * Author: sxt1001 ++ * Create: 2025-11-24 ++*/ ++ ++#include ++#include ++#include ++ ++#include "register_xalarm.h" ++#include "log_utils.h" ++#include "bmc_log_lib.h" ++ ++#define BUFFER_MAX_LEN 4096 ++#define SINGLE_ASCII_HEX_STR_LEN 8 ++#define POWER_RESULT_STR_MAX_LEN 55 // power_off,{rmrs_res_str},{rmrs_res} max(rmrs_res_str) = 39 ++#define IPMITOOL_COMMAND_STR_PREFIX_REBOOT "ipmitool raw 0x30 0x92 0xdb 0x07 0x00 0x0f 0x01 " ++#define IPMITOOL_COMMAND_STR_MAX_LEN \ ++ ((POWER_RESULT_STR_MAX_LEN * SINGLE_ASCII_HEX_STR_LEN) + sizeof(IPMITOOL_COMMAND_STR_PREFIX_REBOOT) + 1) ++ ++static const char* rmrs_result_strings[] = { ++ [RMRS_SUCCESS] = "Memory return succeeded", ++ [RMRS_IPC_ERROR] = "Node communication failed", ++ [RMRS_MIGRATE_ERROR] = "Memory migration failed", ++ [RMRS_LACK_LOCAL_MEM_ERROR] = "Local memory shortage error", ++ [RMRS_LACK_REMOTE_MEM_ERROR] = "Borrowed memory shortage error", ++ [RMRS_RESOURCE_COLLECT_ERROR] = "Resource acquisition failed", ++ [RMRS_BORROW_MEM_ERROR] = "Memory borrowing failed or timed out", ++ [RMRS_RETURN_MEM_ERROR] = "Memory return failed or timed out", ++ [RMRS_PARTIAL_SUCCESS] = "Partial node memory return succeeded", ++ [RMRS_MIGRATE_TIMEOUT] = "Memory migration succeeded with timeout", ++}; ++ ++static char* get_rmrs_result_string(enum sentry_rmrs_result_type rmrs_res_type) ++{ ++ if (rmrs_res_type < 0 || rmrs_res_type >= RMRS_UNKNOWN_CODE) { ++ return NULL; ++ } ++ ++ return (char*) rmrs_result_strings[rmrs_res_type]; ++} ++ ++static int string_to_ascii_hex(const char* raw_string, char* ascii_string, int ascii_string_size) ++{ ++ if (!raw_string || !ascii_string || ascii_string_size <= 0) { ++ logging_error("invalid args.\n"); ++ return -1; ++ } ++ ++ int len = strlen(raw_string); ++ for (int i = 0; i < len; i++) { ++ char hex[SINGLE_ASCII_HEX_STR_LEN]; ++ int n = snprintf(hex, sizeof(hex), "0x%02x ", (unsigned char)raw_string[i]); ++ if (n <= 0 || n >= sizeof(hex)) { ++ logging_error("snprintf failed, raw character is %c\n", raw_string[i]); ++ return -1; ++ } ++ int available = ascii_string_size - strlen(ascii_string) - 1; ++ if (available < SINGLE_ASCII_HEX_STR_LEN) { ++ logging_error("ascii string len is too short to add new ascii character\n"); ++ return -1; ++ } ++ strncat(ascii_string, hex, SINGLE_ASCII_HEX_STR_LEN); ++ } ++ ++ if (len > 0) { ++ ascii_string[strlen(ascii_string) - 1] = '\0'; ++ } ++ return 0; ++} ++ ++static int execute_command(const char *command) ++{ ++ FILE *fp; ++ char buffer[BUFFER_MAX_LEN] = {0}; ++ int ret = 0; ++ ++ if (!command) { ++ logging_error("invalid args.\n"); ++ return -1; ++ } ++ ++ fp = popen(command, "r"); ++ if (!fp) { ++ logging_error("popen failed\n"); ++ return -1; ++ } ++ ++ if (!fgets(buffer, sizeof(buffer), fp)) { ++ logging_warn("no output (EOF) for ipmitool command\n"); ++ } ++ ++ logging_debug("output of ipmitool command is : %s\n", buffer); ++ ret = pclose(fp); ++ if (ret < 0) { ++ logging_error("pclose failed\n"); ++ return -1; ++ } ++ if (!WIFEXITED(ret)) { ++ logging_error("ipmitool command did not terminate normally\n"); ++ return -1; ++ } ++ ret = -WEXITSTATUS(ret); ++ logging_info("ipmitool command exited with status: %d\n", ret); ++ return ret; ++} ++ ++static int report_power_off_result_to_bmc(enum sentry_rmrs_result_type res) ++{ ++ int ret = 0; ++ ++ char power_off_res_str[POWER_RESULT_STR_MAX_LEN]; ++ char power_off_res_hex_str[POWER_RESULT_STR_MAX_LEN * SINGLE_ASCII_HEX_STR_LEN]; ++ char command_ascii_str[IPMITOOL_COMMAND_STR_MAX_LEN]; ++ ++ char *rmrs_result_string = get_rmrs_result_string(res); ++ if (!rmrs_result_string) { ++ logging_error("Undefined ACK results\n"); ++ return -1; ++ } ++ ++ ret = snprintf(power_off_res_str, sizeof(power_off_res_str), "%s,%s,%d", ++ "power_off", rmrs_result_string, res); ++ if (ret <= 0) { ++ logging_error("snprintf failed\n"); ++ return ret; ++ } ++ ++ ret = string_to_ascii_hex(power_off_res_str, power_off_res_hex_str, sizeof(power_off_res_hex_str)); ++ if (ret < 0) { ++ logging_error("Failed to convert power off res raw string to ascii string\n"); ++ return ret; ++ } ++ ++ ret = snprintf(command_ascii_str, sizeof(command_ascii_str), ++ "%s%s", IPMITOOL_COMMAND_STR_PREFIX_REBOOT, power_off_res_hex_str); ++ if (ret <= 0) { ++ logging_error("snprintf failed\n"); ++ return -1; ++ } ++ logging_debug("ipmitool cmd is [%s]\n", command_ascii_str); ++ ++ ret = execute_command(command_ascii_str); ++ if (ret < 0) { ++ logging_error("Failed to report BMC log\n"); ++ return ret; ++ } ++ logging_info("Success to report BMC log\n"); ++ return 0; ++} ++ ++int report_result_to_bmc(int ack_result, int ioctl_result) ++{ ++ if (ack_result == RMRS_SUCCESS && ioctl_result) { ++ // ack success, but ioctl failed ++ ack_result = RMRS_MIGRATE_TIMEOUT; ++ } ++ return report_power_off_result_to_bmc(ack_result); ++} +diff --git a/src/sentryPlugins/sentry_msg_monitor/bmc_log_lib.h b/src/sentryPlugins/sentry_msg_monitor/bmc_log_lib.h +new file mode 100644 +index 0000000..9606d91 +--- /dev/null ++++ b/src/sentryPlugins/sentry_msg_monitor/bmc_log_lib.h +@@ -0,0 +1,35 @@ ++/* ++ * Copyright (c) 2025 Huawei Technologies Co., Ltd. ++ * sysSentry is licensed under the Mulan PSL v2. ++ * You can use this software according to the terms and conditions of the Mulan PSL v2. ++ * You may obtain a copy of Mulan PSL v2 at: ++ * http://license.coscl.org.cn/MulanPSL2 ++ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR ++ * PURPOSE. ++ * See the Mulan PSL v2 for more details. ++ ++ * Description: header file for power off event ++ * Author: sxt1001 ++ * Create: 2025-11-24 ++*/ ++ ++#ifndef SENTRY_BMC_LOG_LIB_H ++#define SENTRY_BMC_LOG_LIB_H ++ ++enum sentry_rmrs_result_type { ++ RMRS_SUCCESS = 0, ++ RMRS_IPC_ERROR, ++ RMRS_MIGRATE_ERROR, ++ RMRS_LACK_LOCAL_MEM_ERROR, ++ RMRS_LACK_REMOTE_MEM_ERROR, ++ RMRS_RESOURCE_COLLECT_ERROR, ++ RMRS_BORROW_MEM_ERROR, ++ RMRS_RETURN_MEM_ERROR, ++ RMRS_PARTIAL_SUCCESS, ++ RMRS_MIGRATE_TIMEOUT, ++ RMRS_UNKNOWN_CODE, // unknown error, invalid type ++}; ++ ++int report_result_to_bmc(int ack_result, int ioctl_result); ++#endif +diff --git a/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c b/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c +index 6b2bfd7..5544062 100644 +--- a/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c ++++ b/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c +@@ -30,11 +30,12 @@ + #include "log_utils.h" + #include "smh_common_type.h" + #include "ub_fault_lib.h" ++#include "bmc_log_lib.h" + + #define TOOL_NAME "sentry_msg_monitor" + #define SMH_DEV_PATH "/dev/sentry_msg_helper" + #define PID_FILE_PATH "/var/run/"TOOL_NAME".pid" +-#define ID_LIST_LENGTH 4 //reboot oom panic kernel_reboot ++#define ID_LIST_LENGTH 4 // reboot oom panic kernel_reboot + #define MSG_STR_MAX_LEN 1024 + #define DEFAULT_LOG_LEVEL LOG_INFO + #define MAX_RETRY_NUM 3 +@@ -423,7 +424,6 @@ static void* sender_thread(void* arg) + logging_warn("Send msg to xalarmd failed: Get unknown type msg, skip it\n"); + continue; + } +- + retry_num = 0; + for (int i = 0; i < MAX_RETRY_NUM; i++) { + ret = xalarm_report_event(alarm_type, str); +@@ -550,6 +550,7 @@ re_register: + logging_warn("Convert str failed: Bad format '%s', skip it\n", al_msg->pucParas); + continue; + } ++ + retry_num = 0; + for (int i = 0; i < MAX_RETRY_NUM; i++) { + errno = 0; +@@ -567,12 +568,18 @@ re_register: + sleep(RETRY_PERIOD); + } else if (ret < 0) { + logging_error("Ack to kernel failed: ioctl return %d\n", errno); ++ if (al_msg->usAlarmId == ALARM_REBOOT_ACK_EVENT) { ++ report_result_to_bmc(smh_msg.res, ret); ++ } + goto un_register; + } + } + if (errno == EFAULT) { + logging_warn("Ack to kernel failed after %d retries: Copy from user failed, skip it\n", retry_num); + } ++ if (al_msg->usAlarmId == ALARM_REBOOT_ACK_EVENT) { ++ report_result_to_bmc(smh_msg.res, ret); ++ } + } + + un_register: +-- +2.27.0 + diff --git a/support-to-send-SIGBUS-signal-for-UB-memory-fault.patch b/support-to-send-SIGBUS-signal-for-UB-memory-fault.patch new file mode 100644 index 0000000000000000000000000000000000000000..9b5b7dd845b152b5975c62c99fb8d0d1674b6443 --- /dev/null +++ b/support-to-send-SIGBUS-signal-for-UB-memory-fault.patch @@ -0,0 +1,494 @@ +From 7290595e4bd5728dbe7e1c8c215e026eb0f84cbd Mon Sep 17 00:00:00 2001 +From: shixuantong +Date: Mon, 10 Nov 2025 17:37:02 +0800 +Subject: [PATCH 12/16] support to send SIGBUS signal for UB memory fault + +Feature issue: +https://gitee.com/openeuler/release-management/issues/ID45WQ +--- + src/libsentry/c/log/log_utils.h | 13 +- + src/sentryPlugins/sentry_msg_monitor/Makefile | 2 +- + .../sentry_msg_monitor/sentry_msg_monitor.c | 20 +- + .../sentry_msg_monitor/smh_common_type.h | 5 +- + .../sentry_msg_monitor/ub_fault_lib.c | 252 ++++++++++++++++++ + .../sentry_msg_monitor/ub_fault_lib.h | 27 ++ + src/services/syssentry/sentryctl | 9 +- + 7 files changed, 316 insertions(+), 12 deletions(-) + create mode 100644 src/sentryPlugins/sentry_msg_monitor/ub_fault_lib.c + create mode 100644 src/sentryPlugins/sentry_msg_monitor/ub_fault_lib.h + +diff --git a/src/libsentry/c/log/log_utils.h b/src/libsentry/c/log/log_utils.h +index 2cd74e2..774fa97 100644 +--- a/src/libsentry/c/log/log_utils.h ++++ b/src/libsentry/c/log/log_utils.h +@@ -31,20 +31,23 @@ typedef enum { + level == LOG_ERROR ? "ERROR" : \ + "UNKNOWN_LEVEL") + +-#define PRINT_LOG_PREFIX(level, file, line) do { \ +- time_t t = time(NULL); \ +- struct tm *local_time = localtime(&t); \ +- fprintf(LOG_FD(level), "%d-%02d-%02d %02d:%02d:%02d,000 - %s - [%s:%d] - ", \ ++#define PRINT_LOG_PREFIX(level, file, line) \ ++ do { \ ++ struct timespec ts; \ ++ clock_gettime(CLOCK_REALTIME, &ts); \ ++ struct tm *local_time = localtime(&ts.tv_sec); \ ++ fprintf(LOG_FD(level), "%d-%02d-%02d %02d:%02d:%02d,%03ld - %s - [%s:%d] - ", \ + local_time->tm_year + 1900, \ + local_time->tm_mon + 1, \ + local_time->tm_mday, \ + local_time->tm_hour, \ + local_time->tm_min, \ + local_time->tm_sec, \ ++ ts.tv_nsec / 1000000L, \ + LOG_LEVEL_STRING(level), \ + basename(file), \ + line); \ +-} while (0) ++ } while (0) + + // configure Env for log + #define LOG_LEVEL_ENV "LOG_LEVEL" +diff --git a/src/sentryPlugins/sentry_msg_monitor/Makefile b/src/sentryPlugins/sentry_msg_monitor/Makefile +index c74ee07..82e0d6e 100644 +--- a/src/sentryPlugins/sentry_msg_monitor/Makefile ++++ b/src/sentryPlugins/sentry_msg_monitor/Makefile +@@ -15,7 +15,7 @@ + all: sentry_msg_monitor + + sentry_msg_monitor: +- gcc sentry_msg_monitor.c -fstack-protector-strong -Wall -D_FORTIFY_SOURCE=2 -O2 -g -Wl,-z,relro -Wl,-z,now -fPIE -pie -I../../libsentry/c/log -L../../libsentry/c/log/build -I../../libs/libxalarm -L../../libs/build/libxalarm -lobmm -lxalarm -lsentry_log -ljson-c -pthread -o sentry_msg_monitor ++ gcc sentry_msg_monitor.c ub_fault_lib.c -fstack-protector-strong -Wall -D_FORTIFY_SOURCE=2 -O2 -g -Wl,-z,relro -Wl,-z,now -fPIE -pie -I../../libsentry/c/log -L../../libsentry/c/log/build -I../../libs/libxalarm -L../../libs/build/libxalarm -lobmm -lxalarm -lsentry_log -ljson-c -pthread -o sentry_msg_monitor + + clean: + rm -f sentry_msg_monitor +diff --git a/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c b/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c +index 5a08078..6b2bfd7 100644 +--- a/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c ++++ b/src/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor.c +@@ -29,6 +29,7 @@ + #include "register_xalarm.h" + #include "log_utils.h" + #include "smh_common_type.h" ++#include "ub_fault_lib.h" + + #define TOOL_NAME "sentry_msg_monitor" + #define SMH_DEV_PATH "/dev/sentry_msg_helper" +@@ -40,6 +41,8 @@ + #define RETRY_PERIOD 1 + #define XALARM_GENERAL_MSG_ITEM_CNT 2 // msgid_res + #define XALARM_PANIC_MSG_ITEM_CNT 4 // msgid_{cna:cna,eid:eid}_res ++#define PYHS_ADDR_HEX_STR_MAX_LEN 20 ++ + struct receiver_cleanup_data { + struct alarm_msg *al_msg; + struct alarm_register* register_info; +@@ -243,7 +246,13 @@ static int convert_ub_mem_err_smh_msg_to_str(struct sentry_msg_helper_msg* smh_m + return -1; + } + +- char hex_str[20]; ++ if (smh_msg->helper_msg_info.ub_mem_info.mem_type == FD_MODE ++ && smh_msg->helper_msg_info.ub_mem_info.fault_with_kill) { ++ logging_info("ub mem event raw type is %d, sending SIGBUS signal to process\n", raw_err_type); ++ find_and_send_sigbus_to_thread(id, obmm_offset); ++ } ++ ++ char hex_str[PYHS_ADDR_HEX_STR_MAX_LEN]; + int ret = snprintf(hex_str, sizeof(hex_str), "0x%lx", (long)pa); + if (ret < 0) { + logging_error("convert pa to string failed\n"); +@@ -401,12 +410,14 @@ static void* sender_thread(void* arg) + goto sender_err; + } + } +- logging_debug("Read dev success!\n"); ++ logging_info("Read dev success!\n"); + + ret = convert_smh_msg_to_str(&smh_msg, str); + if (ret < 0) { + continue; + } ++ logging_info("convert_smh_msg_to_str success, msgid is %u\n", smh_msg.msgid); ++ + unsigned short alarm_type = convert_msg_type_to_xalarm_type(smh_msg.type); + if (alarm_type == 0) { + logging_warn("Send msg to xalarmd failed: Get unknown type msg, skip it\n"); +@@ -417,7 +428,7 @@ static void* sender_thread(void* arg) + for (int i = 0; i < MAX_RETRY_NUM; i++) { + ret = xalarm_report_event(alarm_type, str); + if (ret == 0) { +- logging_info("Send msg success: alarm_type: %d, str: %s\n", alarm_type, str); ++ logging_info("Send msg success: alarm_type: %d\n", alarm_type); + break; + } + if (ret == -EINVAL) { +@@ -531,7 +542,7 @@ re_register: + logging_error("xalarm_get_event return %d\n", ret); + goto un_register; + } else { +- logging_info("Get msg: alarm_type: %d, str: %s\n", al_msg->usAlarmId, al_msg->pucParas); ++ logging_info("Get msg: alarm_type: %d\n", al_msg->usAlarmId); + } + + ret = convert_str_to_smh_msg(al_msg, &smh_msg); +@@ -610,5 +621,6 @@ int main() + + err_release: + release_pid_file(pid_fd); ++ logging_info("sentry_msg_monitor end with ret %d!\n", ret); + return ret; + } +diff --git a/src/sentryPlugins/sentry_msg_monitor/smh_common_type.h b/src/sentryPlugins/sentry_msg_monitor/smh_common_type.h +index 8035231..31cd352 100644 +--- a/src/sentryPlugins/sentry_msg_monitor/smh_common_type.h ++++ b/src/sentryPlugins/sentry_msg_monitor/smh_common_type.h +@@ -69,6 +69,7 @@ enum sentry_msg_helper_msg_type { + SMH_MESSAGE_UB_MEM_ERR, + SMH_MESSAGE_PANIC_ACK, + SMH_MESSAGE_KERNEL_REBOOT_ACK, ++ SMH_MESSAGE_UNKNOWN, + }; + + struct sentry_msg_helper_msg { +@@ -89,8 +90,10 @@ struct sentry_msg_helper_msg { + uint32_t cna; + char eid[EID_MAX_LEN]; + } remote_info; +- struct { ++ struct { + uint64_t pa; ++ int mem_type; ++ int fault_with_kill; + enum ras_err_type raw_ubus_mem_err_type; + } ub_mem_info; + } helper_msg_info; +diff --git a/src/sentryPlugins/sentry_msg_monitor/ub_fault_lib.c b/src/sentryPlugins/sentry_msg_monitor/ub_fault_lib.c +new file mode 100644 +index 0000000..7d96b62 +--- /dev/null ++++ b/src/sentryPlugins/sentry_msg_monitor/ub_fault_lib.c +@@ -0,0 +1,252 @@ ++/* ++ * Copyright (c) 2025 Huawei Technologies Co., Ltd. ++ * sysSentry is licensed under the Mulan PSL v2. ++ * You can use this software according to the terms and conditions of the Mulan PSL v2. ++ * You may obtain a copy of Mulan PSL v2 at: ++ * http://license.coscl.org.cn/MulanPSL2 ++ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR ++ * PURPOSE. ++ * See the Mulan PSL v2 for more details. ++ ++ * Description: ub fault lib ++ * Author: sxt1001 ++ * Create: 2025-11-10 ++*/ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "log_utils.h" ++#include "ub_fault_lib.h" ++ ++#define COMMAND_STR_MAX_LEN 512 ++#define OBMM_SHMDEV_NAME_MAX_LEN 256 ++#define COMMAND_OUTPUT_LINE_MAX_LEN 1024 ++#define ID_CAPACITY 10 ++#define PERM_STRING_LEN 8 ++ ++static int find_processes_by_device(const char *obmm_shmdev_name, pid_t **pids, int *count) ++{ ++ char command[COMMAND_STR_MAX_LEN]; ++ FILE *fp; ++ char line[COMMAND_OUTPUT_LINE_MAX_LEN]; ++ int capacity = ID_CAPACITY; ++ int found = 0; ++ ++ if (!obmm_shmdev_name) { ++ logging_error("obmm_shmdev_name is empty\n"); ++ return -1; ++ } ++ snprintf(command, sizeof(command), "/usr/bin/lsof \"%s\" | awk 'NR > 1 {print $2}'", obmm_shmdev_name); ++ ++ *pids = malloc(capacity * sizeof(pid_t)); ++ if (!*pids) { ++ logging_error("malloc failed for pids\n"); ++ return -1; ++ } ++ ++ fp = popen(command, "r"); ++ if (!fp) { ++ logging_error("popen failed\n"); ++ free(*pids); ++ *pids = NULL; ++ return -1; ++ } ++ ++ while (fgets(line, sizeof(line), fp)) { ++ pid_t pid = atoi(line); ++ ++ int duplicate = 0; ++ for (int i = 0; i < found; i++) { ++ if ((*pids)[i] == pid) { ++ duplicate = 1; ++ break; ++ } ++ } ++ ++ if (!duplicate) { ++ if (found >= capacity) { ++ capacity *= 2; ++ pid_t *new_pids = realloc(*pids, capacity * sizeof(pid_t)); ++ if (!new_pids) { ++ logging_error("realloc failed\n"); ++ break; ++ } ++ *pids = new_pids; ++ } ++ (*pids)[found++] = pid; ++ } ++ } ++ ++ pclose(fp); ++ *count = found; ++ return 0; ++} ++ ++static bool is_accessing_faulty_address(const char *obmm_shmdev_name, const char *tid_maps_str, unsigned long obmm_offset, unsigned long *virt_addr) ++{ ++ FILE *tid_maps_fp; ++ char line[COMMAND_OUTPUT_LINE_MAX_LEN]; ++ int found = 0; ++ ++ if (!obmm_shmdev_name || !tid_maps_str) { ++ logging_error("Invalid parameter\n"); ++ return false; ++ } ++ ++ tid_maps_fp = fopen(tid_maps_str, "r"); ++ if (!tid_maps_fp) { ++ logging_error("fopen %s failed\n", tid_maps_str); ++ return false; ++ } ++ ++ while (fgets(line, sizeof(line), tid_maps_fp)) { ++ unsigned long start, end; ++ char perms[PERM_STRING_LEN]; ++ unsigned long offset; ++ char other_maps_line[COMMAND_OUTPUT_LINE_MAX_LEN]; ++ ++ if (!strstr(line, obmm_shmdev_name)) { ++ continue; ++ } ++ ++ // parse maps line ++ if (sscanf(line, "%lx-%lx %7s %lx %1024s", ++ &start, &end, perms, &offset, other_maps_line) < 5) { ++ logging_error("parse [%s] failed.\n", line); ++ continue; ++ } ++ ++ // check offset ++ logging_debug("start to check offset for [%s]\n", line); ++ if (obmm_offset <= ((end - start) + offset) && obmm_offset >= offset) { ++ *virt_addr = start - offset + obmm_offset; ++ found = 1; ++ } ++ ++ if (found) { ++ break; ++ } ++ } ++ fclose(tid_maps_fp); ++ ++ return found ? true : false; ++} ++ ++static bool is_positive_integer(const char *str) { ++ if (!str || *str == '\0') ++ return false; ++ ++ while (*str) { ++ if (!isdigit((unsigned char)*str)) { ++ return false; ++ } ++ str++; ++ } ++ return true; ++} ++ ++static int check_process_mapping(const char *obmm_shmdev_name, pid_t pid, unsigned long obmm_offset, ++ unsigned long *virt_addr, pid_t **tids, int *tid_count) ++{ ++ char p_task_path[MAX_PATH]; ++ struct dirent *entry; ++ int capacity = ID_CAPACITY; ++ int found = 0; ++ ++ *tids = malloc(capacity * sizeof(pid_t)); ++ if (!*tids) { ++ logging_error("malloc failed for tids\n"); ++ return -1; ++ } ++ ++ // We need to send a SIGBUS signal to the thread, check /proc/$pid/task/$tid/maps ++ snprintf(p_task_path, sizeof(p_task_path), "/proc/%d/task/", pid); ++ ++ DIR *dir = opendir(p_task_path); ++ if (!dir) { ++ logging_error("opendir %s failed\n", p_task_path); ++ return -1; ++ } ++ ++ while ((entry = readdir(dir)) != NULL) { ++ if (entry->d_type == DT_DIR && is_positive_integer(entry->d_name)) { ++ char tid_maps_path[MAX_PATH]; ++ pid_t tid = atoi(entry->d_name); ++ logging_debug("start to check /proc/%d/task/%d/maps\n", pid, tid); ++ snprintf(tid_maps_path, sizeof(tid_maps_path), "/proc/%d/task/%d/maps", pid, tid); ++ if (is_accessing_faulty_address(obmm_shmdev_name, tid_maps_path, obmm_offset, virt_addr)) { ++ if (found >= capacity) { ++ capacity *= 2; ++ pid_t *new_tids = realloc(*tids, capacity * sizeof(pid_t)); ++ if (!new_tids) { ++ logging_error("realloc new_tids failed\n"); ++ break; ++ } ++ *tids = new_tids; ++ } ++ (*tids)[found++] = tid; ++ } ++ } ++ } ++ *tid_count = found; ++ closedir(dir); ++ return found ? 0 : -1; ++} ++ ++static int send_sigbus_to_thread(pid_t tid, unsigned long virt_addr) ++{ ++ union sigval value; ++ int ret; ++ ++ value.sival_ptr = (void *)virt_addr; ++ ++ ret = sigqueue(tid, SIGBUS, value); ++ if (ret) { ++ logging_error("sigqueue failed\n"); ++ } ++ return ret; ++} ++ ++int find_and_send_sigbus_to_thread(mem_id memid, unsigned long obmm_offset) ++{ ++ pid_t *pids = NULL; ++ int pid_count = 0; ++ char obmm_shmdev_name[OBMM_SHMDEV_NAME_MAX_LEN]; ++ ++ snprintf(obmm_shmdev_name, sizeof(obmm_shmdev_name), "/dev/obmm_shmdev%lu", memid); ++ ++ if (find_processes_by_device(obmm_shmdev_name, &pids, &pid_count) != 0) { ++ logging_error("find_processes_by_device failed\n"); ++ if (pids) { ++ free(pids); ++ } ++ return -1; ++ } ++ ++ for (int pid_idx = 0; pid_idx < pid_count; pid_idx++) { ++ unsigned long virt_addr; ++ pid_t *tids = NULL; ++ int tid_count = 0; ++ if (check_process_mapping(obmm_shmdev_name, pids[pid_idx], obmm_offset, &virt_addr, &tids, &tid_count) == 0) { ++ for (int tid_idx = 0; tid_idx < tid_count; tid_idx++) { ++ logging_info("Sending SIGBUS to thread %d for process %d\n", tids[tid_idx], pids[pid_idx]); ++ send_sigbus_to_thread(tids[tid_idx], virt_addr); ++ } ++ } ++ if (tids) { ++ free(tids); ++ } ++ } ++ ++ if (pids) { ++ free(pids); ++ } ++ return 0; ++} +diff --git a/src/sentryPlugins/sentry_msg_monitor/ub_fault_lib.h b/src/sentryPlugins/sentry_msg_monitor/ub_fault_lib.h +new file mode 100644 +index 0000000..eece608 +--- /dev/null ++++ b/src/sentryPlugins/sentry_msg_monitor/ub_fault_lib.h +@@ -0,0 +1,27 @@ ++/* ++ * Copyright (c) 2025 Huawei Technologies Co., Ltd. ++ * sysSentry is licensed under the Mulan PSL v2. ++ * You can use this software according to the terms and conditions of the Mulan PSL v2. ++ * You may obtain a copy of Mulan PSL v2 at: ++ * http://license.coscl.org.cn/MulanPSL2 ++ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR ++ * PURPOSE. ++ * See the Mulan PSL v2 for more details. ++ ++ * Description: header file for ub mem event ++ * Author: sxt1001 ++ * Create: 2025-11-10 ++*/ ++ ++#ifndef SENTRY_UB_FAULT_LIB_H ++#include ++#include ++ ++#define MAX_PATH 128 ++#define PAGE_SIZE 4096 ++#define FD_MODE 0 ++#define NUMA_MODE 1 ++ ++int find_and_send_sigbus_to_thread(mem_id memid, unsigned long obmm_offset); ++#endif +diff --git a/src/services/syssentry/sentryctl b/src/services/syssentry/sentryctl +index d94a044..ec683b7 100644 +--- a/src/services/syssentry/sentryctl ++++ b/src/services/syssentry/sentryctl +@@ -61,6 +61,9 @@ def write_proc_file(proc_dir, proc_name, proc_value): + finally: + return exit_code + ++def set_sentry_reporter_proc(proc_value): ++ return write_proc_file("sentry_reporter", "ub_mem_fault_with_kill", proc_value) ++ + def set_remote_reporter_proc(proc_name, proc_value): + return write_proc_file("sentry_remote_reporter", proc_name, proc_value) + +@@ -240,6 +243,9 @@ if __name__ == '__main__': + "sentry_uvb_comm" : [ + {"name": "server_cna", "type": str, "choices": None, "required" : True, "help": "server cna array"}, + ], ++ "sentry_reporter" : [ ++ {"name" : "ub_mem_fault_with_kill", "type" : "str", "choices": ["on", "off"], "required" : False, "help" : "Enable/Disable sending SIGBUS signal with UB mem event"}, ++ ], + } + parser_set = subparsers.add_parser('set', help='set plugins params') + parsers_set_plugin_param = parser_set.add_subparsers(dest="set_task") +@@ -319,7 +325,8 @@ if __name__ == '__main__': + ret_code += set_urma_heartbeat(client_args.heartbeat) + elif client_args.set_task == "sentry_uvb_comm": + ret_code += set_uvb_proc(client_args.server_cna) +- ++ elif client_args.set_task == "sentry_reporter": ++ ret_code += set_sentry_reporter_proc(client_args.ub_mem_fault_with_kill) + sys.exit(ret_code) + else: + parser.print_help() +-- +2.27.0 + diff --git a/sysSentry.spec b/sysSentry.spec index 9eb98fb436b1e65caa8be4766148bac35f72dbe4..d346338b265e6e6ee45afc506205178c5400e62f 100644 --- a/sysSentry.spec +++ b/sysSentry.spec @@ -4,7 +4,7 @@ Summary: System Inspection Framework Name: sysSentry Version: 1.0.3 -Release: 14 +Release: 15 License: Mulan PSL v2 Group: System Environment/Daemons Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz @@ -30,6 +30,33 @@ Patch18: Fix-two-code-review-comments.patch Patch19: Add-MulanV2-License-statement.patch Patch20: add-bmc_block_io-and-slow-io-plugin-upgrade.patch Patch21: add-disk-latency-collect.patch +Patch22: add-dfx-for-xalarmd-to-rebuild-connection-after-comm.patch +Patch23: fix-an-issue-with-printing-error.patch +Patch24: add-sentry-msg-monitor.patch +Patch25: add-oom-event-report.patch +Patch26: Use-malloc-to-allocate-memory-as-much-as-possible.patch +Patch27: fix-xalarmd-stop-failed-by-systemd.patch +Patch28: fix-systemctl-stop-error-bug-that-clientId-is-a-loca.patch +Patch29: fix-missing-pycache-file.patch +Patch30: fix-python-files-permission.patch +Patch31: fix-sys-exit-bug.patch +Patch32: fix-some-warnings.patch +Patch33: fix-log_utils.patch +Patch34: fix-error-code-for-socket-failed.patch +Patch35: fix-typo.patch +Patch36: Fix-resource-leak.patch +Patch37: fix-buffer-overflow-in-checkset_cpulist.patch +Patch38: Fix-the-use-of-uninitialized-variable-ret.patch +Patch39: report-panic-and-kernel-reboot-event.patch +Patch40: add-NONZERO_EXITED-status-for-plugin-exited-with-non.patch +Patch41: fix-process-exit-status-and-service-kill-mode.patch +Patch42: add-UB-fault-report-function.patch +Patch43: support-to-send-SIGBUS-signal-for-UB-memory-fault.patch +Patch44: fix-resource-leak-in-hbm_online_repair.patch +Patch45: fix-Out-of-memory-bounds-access-in-ebpf_collector.patch +Patch46: report-power-off-result-to-BMC.patch +Patch47: add-API-to-enable-disable-the-hijacking-function-for.patch +Patch48: build-sentry_msg_monitor-only-under-aarch64-architec.patch BuildRequires: cmake gcc-c++ BuildRequires: python3 python3-setuptools @@ -115,6 +142,18 @@ Requires: sysSentry = %{version}-%{release} %description -n hbm_online_repair This package provides hbm_online_repair for the sysSentry. +%ifarch aarch64 +%package -n sentry_msg_monitor +Summary: A plugin for sysSentry to listening specific messages +Requires: sysSentry = %{version}-%{release} +Provides: sentry_msg_monitor = %{version} +BuildRequires: libobmm-devel +Requires: lsof libobmm ipmitool + +%description -n sentry_msg_monitor +This package provides a plugin for sysSentry to listening specific messages +%endif + %package -n bmc_block_io Summary: bmc_block_io for the sysSentry Provides: bmc_block_io = %{version} @@ -218,6 +257,10 @@ rm -rf /var/run/sysSentry | : %exclude %{_sysconfdir}/sysconfig/soc_ring_sentry.env %exclude %{_sysconfdir}/sysSentry/tasks/soc_ring_sentry.mod +# sentry_msg_monitor +%exclude %{_sysconfdir}/sysconfig/sentry_msg_monitor.env +%exclude %{_sysconfdir}/sysSentry/tasks/sentry_msg_monitor.mod + %files -n libxalarm %attr(0555,root,root) %{_libdir}/libxalarm.so @@ -253,6 +296,13 @@ rm -rf /var/run/sysSentry | : %attr(0600,root,root) %config(noreplace) %{_sysconfdir}/sysSentry/tasks/hbm_online_repair.mod %attr(0550,root,root) %{python3_sitelib}/syssentry/bmc_alarm.py +%ifarch aarch64 +%files -n sentry_msg_monitor +%attr(0550,root,root) %{_bindir}/sentry_msg_monitor +%attr(0600,root,root) %{_sysconfdir}/sysconfig/sentry_msg_monitor.env +%attr(0600,root,root) %{_sysconfdir}/sysSentry/tasks/sentry_msg_monitor.mod +%endif + %files -n bmc_block_io %attr(0550,root,root) %{_bindir}/bmc_block_io %attr(0600,root,root) %{_sysconfdir}/sysSentry/plugins/bmc_block_io.ini @@ -264,6 +314,13 @@ rm -rf /var/run/sysSentry | : %attr(0600,root,root) %config(noreplace) %{_sysconfdir}/sysSentry/tasks/soc_ring_sentry.mod %changelog +* Thu Nov 27 2025 shixuantong - 1.0.3-15 +- Type:feature +- CVE:NA +- SUG:NA +- DESC:add sentry msg monitor + support oom/power off/ub mem fault/panic/reboot event hijackin + * Mon Nov 17 2025 hewanhan - 1.0.3-14 - Type:feature - CVE:NA