From 5a61180494f6ef390bdfb994e4d04d2c4dace65c Mon Sep 17 00:00:00 2001 From: jwolf <523083921@qq.com> Date: Wed, 6 Mar 2024 17:07:37 +0800 Subject: [PATCH] cpu_patrol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 删除文件 catlib/sched.h 删除文件 catlib/build 删除文件 catlib/plugin/cpu_patrol/sched.h update catlib/cli_executor.c. Signed-off-by: qixing <523083921@qq.com> update README.md. Signed-off-by: qixing <523083921@qq.com> update README.md. Signed-off-by: qixing <523083921@qq.com> optimize prama update catlib/cli_executor.c. Signed-off-by: qixing <523083921@qq.com> update catlib/plugin/cpu_patrol/cpu_patrol_result.c. Signed-off-by: qixing <523083921@qq.com> update catlib/plugin/cpu_patrol/cpu_patrol.c. Signed-off-by: qixing <523083921@qq.com> update catlib/CMakeLists.txt. Signed-off-by: qixing <523083921@qq.com> update catlib/cli_param_checker.c. strtok_r函数会修改输入数据值,改为其copy进行参数检查 Signed-off-by: qixing <523083921@qq.com> update catlib/plugin/cpu_patrol/cpu_patrol_result.c. 同前一个BUG 还是strtok切割导致的 Signed-off-by: qixing <523083921@qq.com> update catlib/cli_param_checker.c. 参数说明修改 Signed-off-by: qixing <523083921@qq.com> update catlib/cli_common.c. 参数说明修改 Signed-off-by: qixing <523083921@qq.com> update catlib/cli_common.c. 大小写 Signed-off-by: qixing <523083921@qq.com> update catlib/cli_executor.c. 函数名test_print_cpu_set 改为print_cpu_set Signed-off-by: qixing <523083921@qq.com> 修改目录 Signed-off-by: qixing <523083921@qq.com> 删除文件 catlib 删除文件 README.en.md 删除文件 README.md 删除文件 sysSentry-1.0.2/src/c/catcli/catlib 修改目录 Signed-off-by: qixing <523083921@qq.com> 删除文件 sysSentry-1.0.2/catlib 新建 src 修改目录 Signed-off-by: qixing <523083921@qq.com> 删除文件 sysSentry-1.0.2/src/.keep update sysSentry-1.0.2/src/c/catcli/catlib/cli_executor.h. 冗余代码 Signed-off-by: qixing <523083921@qq.com> 修改目录 Signed-off-by: qixing <523083921@qq.com> --- README.en.md | 36 -- ...77\347\224\250\346\211\213\345\206\214.md" | 35 ++ .../src/c/catcli/README.md | 16 +- .../src/c/catcli/catlib/CMakeLists.txt | 35 ++ .../src/c/catcli/catlib/cat_structs.h | 49 +++ sysSentry-1.0.2/src/c/catcli/catlib/catcli.c | 52 +++ .../src/c/catcli/catlib/cli_common.c | 22 ++ .../src/c/catcli/catlib/cli_common.h | 65 ++++ .../src/c/catcli/catlib/cli_executor.c | 69 ++++ .../src/c/catcli/catlib/cli_executor.h | 14 + .../src/c/catcli/catlib/cli_param_checker.c | 121 +++++++ .../src/c/catcli/catlib/cli_param_checker.h | 53 +++ .../src/c/catcli/catlib/plugin/CMakeLists.txt | 3 + .../catlib/plugin/cpu_patrol/CMakeLists.txt | 8 + .../catlib/plugin/cpu_patrol/cpu_patrol.c | 321 ++++++++++++++++++ .../catlib/plugin/cpu_patrol/cpu_patrol.h | 40 +++ .../plugin/cpu_patrol/cpu_patrol_result.c | 287 ++++++++++++++++ .../plugin/cpu_patrol/cpu_patrol_result.h | 43 +++ .../catlib/plugin/mem_patrol/CMakeLists.txt | 0 19 files changed, 1224 insertions(+), 45 deletions(-) delete mode 100644 README.en.md create mode 100644 "sysSentry-1.0.2/Docs/CATCLI\344\275\277\347\224\250\346\211\213\345\206\214.md" rename README.md => sysSentry-1.0.2/src/c/catcli/README.md (66%) create mode 100644 sysSentry-1.0.2/src/c/catcli/catlib/CMakeLists.txt create mode 100644 sysSentry-1.0.2/src/c/catcli/catlib/cat_structs.h create mode 100644 sysSentry-1.0.2/src/c/catcli/catlib/catcli.c create mode 100644 sysSentry-1.0.2/src/c/catcli/catlib/cli_common.c create mode 100644 sysSentry-1.0.2/src/c/catcli/catlib/cli_common.h create mode 100644 sysSentry-1.0.2/src/c/catcli/catlib/cli_executor.c create mode 100644 sysSentry-1.0.2/src/c/catcli/catlib/cli_executor.h create mode 100644 sysSentry-1.0.2/src/c/catcli/catlib/cli_param_checker.c create mode 100644 sysSentry-1.0.2/src/c/catcli/catlib/cli_param_checker.h create mode 100644 sysSentry-1.0.2/src/c/catcli/catlib/plugin/CMakeLists.txt create mode 100644 sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/CMakeLists.txt create mode 100644 sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/cpu_patrol.c create mode 100644 sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/cpu_patrol.h create mode 100644 sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/cpu_patrol_result.c create mode 100644 sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/cpu_patrol_result.h create mode 100644 sysSentry-1.0.2/src/c/catcli/catlib/plugin/mem_patrol/CMakeLists.txt diff --git a/README.en.md b/README.en.md deleted file mode 100644 index d6445ed..0000000 --- a/README.en.md +++ /dev/null @@ -1,36 +0,0 @@ -# sysSentry - -#### Description -sysSentry is a system inspection framework used to manage system inspection tasks. - -#### Software Architecture -Software architecture description - -#### Installation - -1. xxxx -2. xxxx -3. xxxx - -#### Instructions - -1. xxxx -2. xxxx -3. xxxx - -#### Contribution - -1. Fork the repository -2. Create Feat_xxx branch -3. Commit your code -4. Create Pull Request - - -#### Gitee Feature - -1. You can use Readme\_XXX.md to support different languages, such as Readme\_en.md, Readme\_zh.md -2. Gitee blog [blog.gitee.com](https://blog.gitee.com) -3. Explore open source project [https://gitee.com/explore](https://gitee.com/explore) -4. The most valuable open source project [GVP](https://gitee.com/gvp) -5. The manual of Gitee [https://gitee.com/help](https://gitee.com/help) -6. The most popular members [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) diff --git "a/sysSentry-1.0.2/Docs/CATCLI\344\275\277\347\224\250\346\211\213\345\206\214.md" "b/sysSentry-1.0.2/Docs/CATCLI\344\275\277\347\224\250\346\211\213\345\206\214.md" new file mode 100644 index 0000000..5a2f254 --- /dev/null +++ "b/sysSentry-1.0.2/Docs/CATCLI\344\275\277\347\224\250\346\211\213\345\206\214.md" @@ -0,0 +1,35 @@ +# cpu_patrol + +#### 介绍 +Cpu_patrol is a module used to check cpu, and it depends on the openEuler 5.10. + +#### 软件架构 +软件架构说明 +cat-cli 命令行程序函数调用cpu_patrol动态库进行cpu巡检 + +#### 安装教程 + +1. clone源码或下载源码压缩包解压 +2. 进入catlib目录运行cmake命令`cmake -B ./build/ -S . -D CMAKE_INSTALL_PREFIX=/usr/local -D CMAKE_BUILD_TYPE=Release` +3. 编译及安装`cd build && make && make install` + +#### 使用说明 + +详见命令`cat-cli -h` + +#### 参与贡献 + +1. Fork 本仓库 +2. 新建 Feat_xxx 分支 +3. 提交代码 +4. 新建 Pull Request + + +#### 特技 + +1. 使用 Readme\_XXX.md 来支持不同的语言,例如 Readme\_en.md, Readme\_zh.md +2. Gitee 官方博客 [blog.gitee.com](https://blog.gitee.com) +3. 你可以 [https://gitee.com/explore](https://gitee.com/explore) 这个地址来了解 Gitee 上的优秀开源项目 +4. [GVP](https://gitee.com/gvp) 全称是 Gitee 最有价值开源项目,是综合评定出的优秀开源项目 +5. Gitee 官方提供的使用手册 [https://gitee.com/help](https://gitee.com/help) +6. Gitee 封面人物是一档用来展示 Gitee 会员风采的栏目 [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) diff --git a/README.md b/sysSentry-1.0.2/src/c/catcli/README.md similarity index 66% rename from README.md rename to sysSentry-1.0.2/src/c/catcli/README.md index 76e6167..5a2f254 100644 --- a/README.md +++ b/sysSentry-1.0.2/src/c/catcli/README.md @@ -1,23 +1,21 @@ -# sysSentry +# cpu_patrol #### 介绍 -sysSentry is a system inspection framework used to manage system inspection tasks. +Cpu_patrol is a module used to check cpu, and it depends on the openEuler 5.10. #### 软件架构 软件架构说明 - +cat-cli 命令行程序函数调用cpu_patrol动态库进行cpu巡检 #### 安装教程 -1. xxxx -2. xxxx -3. xxxx +1. clone源码或下载源码压缩包解压 +2. 进入catlib目录运行cmake命令`cmake -B ./build/ -S . -D CMAKE_INSTALL_PREFIX=/usr/local -D CMAKE_BUILD_TYPE=Release` +3. 编译及安装`cd build && make && make install` #### 使用说明 -1. xxxx -2. xxxx -3. xxxx +详见命令`cat-cli -h` #### 参与贡献 diff --git a/sysSentry-1.0.2/src/c/catcli/catlib/CMakeLists.txt b/sysSentry-1.0.2/src/c/catcli/catlib/CMakeLists.txt new file mode 100644 index 0000000..a7d906a --- /dev/null +++ b/sysSentry-1.0.2/src/c/catcli/catlib/CMakeLists.txt @@ -0,0 +1,35 @@ +cmake_minimum_required(VERSION 3.14.1) +project(cat-cli C) +set(CMAKE_C_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED OFF) +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g") +# 父子模块都可以直接如下路径下的头文件 +include_directories(PUBLIC + ${PROJECT_SOURCE_DIR} + ${PROJECT_SOURCE_DIR}/plugin/cpu_patrol) + +# 导入子模块 +add_subdirectory(plugin) + +add_executable(cat-cli) +file(GLOB CLI_SRC *.c) +target_sources(cat-cli PUBLIC ${CLI_SRC}) +# cpu_patrol以独立的so链接 +TARGET_LINK_LIBRARIES(cat-cli pthread -ldl cpu_patrol) + +# 安装相关 +install(TARGETS cat-cli + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib + ) +install(TARGETS cpu_patrol LIBRARY DESTINATION lib) + +# 创建软链接 +install(CODE "execute_process(COMMAND ln -sf ${CMAKE_INSTALL_PREFIX}/bin/cat-cli /usr/local/bin/cat-cli)") +install(CODE "execute_process(COMMAND ln -sf ${CMAKE_INSTALL_PREFIX}/lib/libcpu_patrol.so /lib64/libcpu_patrol.so)") + +# 成功安装信息 +install(CODE "MESSAGE(\"[INFO] install successfully, run 'cat-cli -h' to test\")") + + diff --git a/sysSentry-1.0.2/src/c/catcli/catlib/cat_structs.h b/sysSentry-1.0.2/src/c/catcli/catlib/cat_structs.h new file mode 100644 index 0000000..a7bc5d0 --- /dev/null +++ b/sysSentry-1.0.2/src/c/catcli/catlib/cat_structs.h @@ -0,0 +1,49 @@ +#ifndef CAT_STRUCTS_H +#define CAT_STRUCTS_H + +#ifndef USERS_M30029087_CLIONPROJECTS_CATHELPER_FOR_HOST_YAOZONG_CATLIB_CAT_STRUCTS_H +#define USERS_M30029087_CLIONPROJECTS_CATHELPER_FOR_HOST_YAOZONG_CATLIB_CAT_STRUCTS_H + +#ifndef CAT_STRUCT_H +#define CAT_STRUCT_H +#define MAX_ERR_LEN 128 +// 统一错误码 +typedef enum { + CAT_OK = 0, // Success + CAT_ERR = 101, // Error + CAT_NOT_SUPPORTED = 102, // The feature is not supported + CAT_GENERIC_ERROR = 103, // A generic, unspecified error + CAT_LOAD_LIBRARY_FAIL = 104, // Load library fail + CAT_ALREADY_RUNNING = 105, // AN instance is already running + CAT_INVALID_PARAMETER = 106, // Invalid parameter +} cat_return_t; +// 巡检模块 +typedef enum { + CAT_PATROL_CPU = 0x0001, + CAT_PATROL_MEM = 0x0002, + CAT_PATROL_HBM = 0x0004, + CAT_PATROL_NPU = 0x0008, + CAT_PATROL_UNKNOWN +} cat_patrol_module; + +// cli请求参数封装 +typedef struct catcli_request_body { + cat_patrol_module patrol_module; + int patrol_second; + int cpu_utility; + void *module_params; +} catcli_request_body; + +// 命令行选项错误信息 +typedef struct option_errs { + char patrol_module_err[MAX_ERR_LEN]; + char patrol_time_err[MAX_ERR_LEN]; + char cpulist_err[MAX_ERR_LEN]; + char cpu_usage_percentage_err[MAX_ERR_LEN]; +} option_errs; +#endif + +#endif + +#endif + diff --git a/sysSentry-1.0.2/src/c/catcli/catlib/catcli.c b/sysSentry-1.0.2/src/c/catcli/catlib/catcli.c new file mode 100644 index 0000000..db175df --- /dev/null +++ b/sysSentry-1.0.2/src/c/catcli/catlib/catcli.c @@ -0,0 +1,52 @@ +#include +#include "cat_structs.h" +#include "cli_common.h" +#include "cat_structs.h" +#include "cli_param_checker.h" +#include "cli_executor.h" + + +int main(int argc, char * const argv[]) +{ + if (argc <= 1) { + print_err_help(); + return CAT_ERR; + } + int opt = 0; + struct option long_opts[] = {HELP, PATROL_MODULE, PATROL_TIME, CPUS, CPU_USAGE_PERCENTAGE, END_FLAG}; + char *short_opts = "hm:t:l:u:"; + catcli_request_body request_body = { + .module_params = NULL, + .cpu_utility = 50, + .patrol_module = CAT_PATROL_UNKNOWN, + .patrol_second = 0 + }; + option_errs option_errs = { 0 }; + while ((opt = getopt_long(argc, argv, short_opts, long_opts, NULL)) != -1) { + switch ((char)opt) { + case 'h': + print_opts_help(); + return CAT_OK; + case 'm': + checkset_patrol_type(optarg, &request_body, &option_errs); + break; + case 't': + checkset_patrol_time(optarg, &request_body, &option_errs); + break; + case 'l': + checkset_cpulist(optarg, &request_body, &option_errs); + break; + case 'u': + checkset_cpu_usage_percentage(optarg, &request_body, &option_errs); + break; + default: + print_err_help(); + return CAT_ERR; + } + } + int ret = checkParamsDependency(&request_body, &option_errs); + RETURN_NOT_TRUE(ret == CAT_OK, NULL, NULL); // 检查到的参数值有异常不再后续检查 + return execute_request(&request_body); +} + + diff --git a/sysSentry-1.0.2/src/c/catcli/catlib/cli_common.c b/sysSentry-1.0.2/src/c/catcli/catlib/cli_common.c new file mode 100644 index 0000000..598ecb2 --- /dev/null +++ b/sysSentry-1.0.2/src/c/catcli/catlib/cli_common.c @@ -0,0 +1,22 @@ +#include +#include "cli_common.h" + +void print_err_help() +{ + printf("See 'cat-cli "); + PRINT_BLUE("--help"); + printf("'\n"); +} + +void print_opts_help() +{ + printf("usage: cat-cli [OPTIONS]\n" + "\n" + "Options:\n" + "-h, --help :Show the help message\n" + "-m, --patrol_module :0x0001(CPU)|0x0002(MEM)|0x0004(HBM)|0x0008(NPU)\n" + "-t, --patrol_second :Patrol time(second),an integer greater than 0\n" + "-l, --cpulist :Specify patrol cpu IDs,\"-l\" is valid when \"-m 0x0001\",eg:\"-l 0-3,7\"\n" + "-u, --cpu_utility :The maximum CPU usage time percentage of the patrol program,the range is (0,100],default:50\n"); +} + diff --git a/sysSentry-1.0.2/src/c/catcli/catlib/cli_common.h b/sysSentry-1.0.2/src/c/catcli/catlib/cli_common.h new file mode 100644 index 0000000..3ee70df --- /dev/null +++ b/sysSentry-1.0.2/src/c/catcli/catlib/cli_common.h @@ -0,0 +1,65 @@ +#ifndef CATHELPER_CLI_COMMON_H +#define CATHELPER_CLI_COMMON_H + +#include +#include +#include + +#define DECIMAL 10 /* 十进制 */ + +// 期望bool_expression为true,否则return CAT_ERR,(bool_expression可能为表达式!!!) +#define RETURN_NOT_TRUE(bool_expression, to_release, fail_msg, ...) \ + do { \ + if (!(bool_expression)) { \ + if (fail_msg != NULL) { \ + printf(fail_msg, ##__VA_ARGS__), printf("\n"); \ + } \ + free(to_release); \ + return CAT_ERR; \ + } \ + } while (0) + +// 红色打印 +#define PRINT_RED(msg, ...) \ + do { \ + fprintf(stdout, "\033[31m"); \ + printf(msg, ##__VA_ARGS__); \ + fprintf(stdout, "\033[0m"); \ + } while (0) +// 蓝色打印 +#define PRINT_BLUE(msg, ...) \ + do { \ + fprintf(stdout, "\033[0;32;34m"); \ + printf(msg, ##__VA_ARGS__); \ + fprintf(stdout, "\033[0m"); \ + } while (0) + +// 绿色打印 +#define PRINT_GREEN(msg, ...) \ + do { \ + fprintf(stdout, "\033[0;32;32m"); \ + printf(msg, ##__VA_ARGS__); \ + fprintf(stdout, "\033[0m"); \ + } while (0) + + +static const struct option HELP = { "help", no_argument, NULL, 'h' }; +static const struct option PATROL_MODULE = { "patrol_module", required_argument, NULL, 'm' }; +static const struct option PATROL_TIME = { "patrol_second", required_argument, NULL, 't' }; +static const struct option CPUS = { "cpulist", required_argument, NULL, 'l' }; +static const struct option CPU_USAGE_PERCENTAGE = {"cpu_utility", required_argument, NULL, 'u' }; +static const struct option END_FLAG = { 0, 0, 0, 0 }; // 避免长选项未匹配到时发生段错误 + +/** + * 打印错误help提示 + */ +void print_err_help(); + +/** + * 打印命令行选项帮助信息 + */ +void print_opts_help(); + +#endif // CATHELPER_CLI_COMMON_H + + diff --git a/sysSentry-1.0.2/src/c/catcli/catlib/cli_executor.c b/sysSentry-1.0.2/src/c/catcli/catlib/cli_executor.c new file mode 100644 index 0000000..6f4fbbd --- /dev/null +++ b/sysSentry-1.0.2/src/c/catcli/catlib/cli_executor.c @@ -0,0 +1,69 @@ +#define _GNU_SOURCE +#include +#include +#include "cpu_patrol.h" +#include "cli_executor.h" + +static void print_cpu_set(cpu_set_t *cpu_set, ssize_t num_cpus, const char *prefix) +{ + bool is_begin_set = false; + int begin_cpuid = -1; + int end_cpuid = -1; + printf(prefix); + for (ssize_t i = 0; i < num_cpus; i++) { + if (!CPU_ISSET(i, cpu_set)) { + continue; + } + if (!is_begin_set) { + is_begin_set = true; + begin_cpuid = i; + end_cpuid = begin_cpuid; + continue; + } + if (i == (end_cpuid + 1)) { + end_cpuid++; + continue; + } + + if (begin_cpuid == end_cpuid) { + printf("%u,", begin_cpuid); + } else { + printf("%u-%u,", begin_cpuid, end_cpuid); + } + + end_cpuid = i; + begin_cpuid = end_cpuid; + } + if (begin_cpuid == end_cpuid) { + printf("%u\n", begin_cpuid); + } else { + printf("%u-%u\n", begin_cpuid, end_cpuid); + } +} + +int execute_request(struct catcli_request_body *request_body) +{ + cat_patrol_module module = request_body->patrol_module; + if (module == CAT_PATROL_CPU) { + cpu_set_t *cpu_set = __CPU_ALLOC(MAX_CPU_LOGIC_CORE); + ssize_t size = __CPU_ALLOC_SIZE(MAX_CPU_LOGIC_CORE); + __CPU_ZERO_S(size, cpu_set); + int ret = lib_cpu_patrol_start(request_body->module_params, request_body->cpu_utility, + request_body->patrol_second, cpu_set); + int count = __CPU_COUNT_S(size, cpu_set); + printf("cpu patrol execute %s, isolated cores: %d\n", ret == CAT_OK ? "ok" : "failed", count); + char *prefix = ":"; + if (count == 0) { + puts(prefix); + } else { + print_cpu_set(cpu_set, MAX_CPU_LOGIC_CORE, prefix); + } + __CPU_FREE(cpu_set); + return ret; + } else { + puts("Only CPU Patrol is supported currently!"); + return CAT_NOT_SUPPORTED; + } +} + + diff --git a/sysSentry-1.0.2/src/c/catcli/catlib/cli_executor.h b/sysSentry-1.0.2/src/c/catcli/catlib/cli_executor.h new file mode 100644 index 0000000..b0b203d --- /dev/null +++ b/sysSentry-1.0.2/src/c/catcli/catlib/cli_executor.h @@ -0,0 +1,14 @@ +#ifndef CATHELPER_CLI_EXECUTOR_H +#define CATHELPER_CLI_EXECUTOR_H + +#include "cat_structs.h" + +/** + * 执行cli请求 + * @param request_body 封装的请求参数 + * @return + */ +int execute_request(struct catcli_request_body *request_body); + +#endif // CATHELPER_CLI_EXECUTOR_H + diff --git a/sysSentry-1.0.2/src/c/catcli/catlib/cli_param_checker.c b/sysSentry-1.0.2/src/c/catcli/catlib/cli_param_checker.c new file mode 100644 index 0000000..a1aa636 --- /dev/null +++ b/sysSentry-1.0.2/src/c/catcli/catlib/cli_param_checker.c @@ -0,0 +1,121 @@ +#include +#include +#include +#include +#include +#include +#include "cli_common.h" +#include "cat_structs.h" +#include "cli_param_checker.h" +#define CPU_USAGE_PERCENTAGE_MAX 100 +#define CPULIST_REGEX "^([0-9]+(-[0-9]+)*,?)+$" + +void checkset_cpu_usage_percentage(char *getopt_optarg, catcli_request_body *p_request_body, struct option_errs *errs) +{ + long cpu_utility = strtol(getopt_optarg, NULL, DECIMAL); + if (cpu_utility <= 0 || cpu_utility > CPU_USAGE_PERCENTAGE_MAX) { + strncpy(errs->patrol_module_err, + "\"cpu_utility \" must be an integer greater in the range (0,100],correct \"-u, --cpu_utility\"\n", MAX_ERR_LEN); + } + p_request_body->cpu_utility = (int)cpu_utility; +} + +void checkset_cpulist(char *getopt_optarg, catcli_request_body *p_request_body, struct option_errs *errs) +{ + regex_t reg = { 0 }; + regcomp(®, CPULIST_REGEX, REG_EXTENDED); // 编译正则模式串 + const size_t nmatch = 1; // 定义匹配结果最大允许数 + regmatch_t pmatch[1]; // 定义匹配结果在待匹配串中的下标范围 + char getopt_optarg_copy[strlen(getopt_optarg)]; + strcpy(getopt_optarg_copy,getopt_optarg); + int status = regexec(®, getopt_optarg_copy, nmatch, pmatch, 0); + regfree(®); // 释放正则表达式 + if (status != 0) { + strncpy(errs->cpulist_err, + "\"cpulist\" is invalid format,the correct format should be like '0-3,7',correct \"-l, --cpulist\"\n", + MAX_ERR_LEN); + } else { + long total_core = sysconf(_SC_NPROCESSORS_CONF); + char *savePtr = NULL; + savePtr = getopt_optarg_copy; + while (true) { + char *split = strtok_r(savePtr, ",", &savePtr); + if (split == NULL) { + break; + } + char *subSavePtr = NULL; + char *subSplit = strtok_r(split, "-", &subSavePtr); + long coreid_before = strtol(subSplit, NULL, DECIMAL); + long coreid_after = strcmp(subSavePtr, "") == 0 ? -1 : strtol(subSavePtr, NULL, DECIMAL); + if (coreid_before > total_core || coreid_after > total_core) { + strncpy(errs->cpulist_err, + "The specified \"cpulist\" contain cpu core id which has exceeded the max cpu core id,correct " + "\"-l, --cpulist\"\n", + MAX_ERR_LEN); + return; + } + if (coreid_after >= 0 && coreid_before > coreid_after) { + strncpy(errs->cpulist_err, + "\"cpulist\" must not contain descending cpuid segment such as \"8-2\",correct \"-l, --cpulist\"\n", + MAX_ERR_LEN); + return; + } + } + p_request_body->module_params = getopt_optarg; + } +} + +void checkset_patrol_time(char *getopt_optarg, catcli_request_body *p_request_body, struct option_errs *errs) +{ + long second = strtol(getopt_optarg, NULL, DECIMAL); + if (second <= 0 || second > INT_MAX) { + strncpy(errs->patrol_time_err, + "\"patrol_second\" must be a number in the range of (0,INT_MAX] ,correct \"-t, --patrol_second\"\n", + MAX_ERR_LEN); + } + p_request_body->patrol_second = (int)second; +} + +void checkset_patrol_type(char *getopt_optarg, catcli_request_body *p_request_body, struct option_errs *errs) +{ + if (strcmp(getopt_optarg, "0x0001") == 0 || strcasecmp(getopt_optarg, "CPU") == 0) { + p_request_body->patrol_module = CAT_PATROL_CPU; + } else if (strcmp(getopt_optarg, "0x0002") == 0 || strcasecmp(getopt_optarg, "MEM") == 0) { + p_request_body->patrol_module = CAT_PATROL_MEM; + } else if (strcmp(getopt_optarg, "0x0004") == 0 || strcasecmp(getopt_optarg, "HBM") == 0) { + p_request_body->patrol_module = CAT_PATROL_HBM; + } else if (strcmp(getopt_optarg, "0x0008") == 0 || strcasecmp(getopt_optarg, "NPU") == 0) { + p_request_body->patrol_module = CAT_PATROL_NPU; + } else { + p_request_body->patrol_module = CAT_PATROL_UNKNOWN; + strncpy(errs->patrol_module_err, "unknown patrol module,correct \"-m, --patrol_module\"\n", MAX_ERR_LEN); + } +} + + +int checkParamsDependency(catcli_request_body *p_request_body, option_errs *p_option_errs) +{ + bool has_err = false; + if (p_request_body->patrol_module == CAT_PATROL_UNKNOWN) { + PRINT_RED(":%s", p_option_errs->patrol_module_err); + has_err = true; + } + if (p_request_body->cpu_utility <= 0) { + PRINT_RED(":%s", p_option_errs->cpu_usage_percentage_err); + has_err = true; + } + if (p_request_body->patrol_second <= 0) { + PRINT_RED(":%s", p_option_errs->patrol_time_err); + has_err = true; + } + if (p_request_body->module_params == NULL && p_request_body->patrol_module == CAT_PATROL_CPU) { + PRINT_RED(":%s", p_option_errs->cpulist_err); + has_err = true; + } + if (has_err) { + print_opts_help(); + return CAT_INVALID_PARAMETER; + } + return CAT_OK; +} + diff --git a/sysSentry-1.0.2/src/c/catcli/catlib/cli_param_checker.h b/sysSentry-1.0.2/src/c/catcli/catlib/cli_param_checker.h new file mode 100644 index 0000000..f761478 --- /dev/null +++ b/sysSentry-1.0.2/src/c/catcli/catlib/cli_param_checker.h @@ -0,0 +1,53 @@ +#ifndef CATHELPER_CLI_PARAM_CHECKER_H +#define CATHELPER_CLI_PARAM_CHECKER_H + +#include +#include +#include +#include +#include +#include "cli_common.h" +#include "cat_structs.h" +/** + * 检查cpu使用率百分比 (0,100] + * @param optarg + * @param p_request_body + * @param errs + */ +void checkset_cpu_usage_percentage(char *getopt_optarg, catcli_request_body *p_request_body, struct option_errs *errs); + +/** + * 检查cpulist,正确格式如:1,3-7 + * @param optarg + * @param p_request_body + * @param errs + */ +void checkset_cpulist(char *getopt_optarg, catcli_request_body *p_request_body, struct option_errs *errs); + +/** + * 检查巡检时间,大于0整数,秒 + * @param optarg + * @param p_request_body + * @param errs + */ +void checkset_patrol_time(char *getopt_optarg, catcli_request_body *p_request_body, struct option_errs *errs); + +/** + * 检查巡检类型 0x0001(CPU)|0x0002(MEM)|0x0004(HBM)|0x0008(NPU) + * @param optarg + * @param p_request_body + * @param errs + */ +void checkset_patrol_type(char *getopt_optarg, catcli_request_body *p_request_body, struct option_errs *errs); + +/** + * 参数间依赖检查 + * @param p_request_body + * @param p_option_errs + * @return + */ +int checkParamsDependency(catcli_request_body *p_request_body, option_errs *p_option_errs); + +#endif // CATHELPER_CLI_PARAM_CHECKER_H + + diff --git a/sysSentry-1.0.2/src/c/catcli/catlib/plugin/CMakeLists.txt b/sysSentry-1.0.2/src/c/catcli/catlib/plugin/CMakeLists.txt new file mode 100644 index 0000000..9c8adcd --- /dev/null +++ b/sysSentry-1.0.2/src/c/catcli/catlib/plugin/CMakeLists.txt @@ -0,0 +1,3 @@ +add_subdirectory(cpu_patrol) +add_subdirectory(mem_patrol) + diff --git a/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/CMakeLists.txt b/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/CMakeLists.txt new file mode 100644 index 0000000..1fdde36 --- /dev/null +++ b/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/CMakeLists.txt @@ -0,0 +1,8 @@ +project(cpu_patrol C) + +file(GLOB CPU_PATROL_SRC *.c) + +add_library(cpu_patrol SHARED ${CPU_PATROL_SRC}) + +TARGET_LINK_LIBRARIES(cpu_patrol pthread -ldl) + diff --git a/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/cpu_patrol.c b/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/cpu_patrol.c new file mode 100644 index 0000000..22d22bf --- /dev/null +++ b/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/cpu_patrol.c @@ -0,0 +1,321 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cpu_patrol_result.h" +#include "cpu_patrol.h" + +pthread_mutex_t g_start_mutex = PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_t g_stop_mutex = PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_t g_stop_flag_mutex = PTHREAD_MUTEX_INITIALIZER; +unsigned short g_CONVERT_TO_PERCENTAGE = 100; +bool g_stop_flag = false; + + +static void set_stop_flag(bool flag) +{ + pthread_mutex_lock(&g_stop_flag_mutex); + + g_stop_flag = flag; + + if (pthread_mutex_unlock(&g_stop_flag_mutex) != 0) { + CAT_LOG_E("pthread_mutex_unlock g_stop_flag_mutex failed."); + } +} + +static cat_return_t write_patrol_config(const char *path, const char *config) +{ + int fd = open(path, O_RDWR); + if (fd < 0) { + CAT_LOG_E("Open path[%s] file fail, errno[%d]", path, errno); + return CAT_GENERIC_ERROR; + } + + ssize_t ret = write(fd, config, strlen(config)); + if (ret == -1) { + CAT_LOG_E("Write [%s] file fail, config[%s], errno[%d]", path, config, errno); + close(fd); + if (errno == EINVAL) { + return CAT_INVALID_PARAMETER; + } + return CAT_GENERIC_ERROR; + } + + close(fd); + return CAT_OK; +} + +static cat_return_t set_patrol_cpumask(const char *cpumask) +{ + return write_patrol_config(KERNEL_INTF_PATH_CPUMASK, cpumask); +} + +static cat_return_t set_patrol_times(unsigned int patrol_times) +{ + char tmpstr[DECIMAL_STR_LEN] = {0}; + + int ret = snprintf(tmpstr, sizeof(tmpstr), "%u", patrol_times); + if (ret <= 0) { + CAT_LOG_E("Get patrol_times string fail, %d", ret); + return CAT_GENERIC_ERROR; + } + + return write_patrol_config(KERNEL_INTF_PATH_PATROL_TIMES, tmpstr); +} + +static cat_return_t set_patrol_cpu_utility(unsigned int utility) +{ + char tmpstr[DECIMAL_STR_LEN] = {0}; + + int ret = snprintf(tmpstr, sizeof(tmpstr), "%u", utility); + if (ret <= 0) { + CAT_LOG_E("Get utility string fail, %d", ret); + return CAT_GENERIC_ERROR; + } + + return write_patrol_config(KERNEL_INTF_PATH_CPU_UTILITY, tmpstr); +} + +static cat_return_t check_cpumask(const char *cpumask) +{ + regex_t reg = { 0 }; + const char *pattern = "^([0-9]+(-[0-9]+)*,?)+$"; + + // 编译正则模式串 + if (regcomp(®, pattern, REG_EXTENDED) != 0) { + CAT_LOG_E("regcomp error"); + return CAT_GENERIC_ERROR; + } + + const size_t nmatch = 1; // 定义匹配结果最大允许数 + regmatch_t pmatch[1] = {0}; // 定义匹配结果在待匹配串中的下标范围 + int status = regexec(®, cpumask, nmatch, pmatch, 0); + regfree(®); // 释放正则表达式 + if (status != 0) { + CAT_LOG_E("CPU mask check failed: '%s' is invalid format", cpumask); + return CAT_INVALID_PARAMETER; + } + + return CAT_OK; +} + +static cat_return_t start_patrol(bool is_start) +{ + int fd = open("/sys/devices/system/cpu/cpuinspect/start_patrol", O_WRONLY); + if (fd < 0) { + CAT_LOG_E("Open start_patrol file fail, %s", strerror(errno)); + return CAT_GENERIC_ERROR; + } + + char start_switch = (char)((is_start != 0) ? '1' : '0'); + int ret = write(fd, &start_switch, sizeof(start_switch)); + if (ret < 0) { + CAT_LOG_E("Write cpu_utility file fail, %s", strerror(errno)); + close(fd); + return CAT_GENERIC_ERROR; + } + + close(fd); + return CAT_OK; +} + +static bool is_patrol_running(void) +{ + int fd = open("/sys/devices/system/cpu/cpuinspect/patrol_complete", O_RDONLY); + if (fd < 0) { + CAT_LOG_E("Open patrol_complete file fail, %s", strerror(errno)); + return false; + } + + // 查询上一次巡检是否已经结束,'1' 结束,'0' 未结束 + char buf = '0'; + if (read(fd, &buf, sizeof(buf)) > 0) { + if (buf == '1') { + close(fd); + return false; + } + } + close(fd); + + return true; +} + +static cat_return_t fill_patrol_para(const char *cpumask, int cpu_utility, int patrol_second, + cpu_patrol_para *patrol_para) +{ + if (check_cpumask(cpumask) != CAT_OK) { + return CAT_INVALID_PARAMETER; + } + patrol_para->cpumask = cpumask; + if (patrol_second <= 0) { + CAT_LOG_E("patrol second must be greater than 0"); + return CAT_INVALID_PARAMETER; + } + patrol_para->patrol_second = patrol_second; + if (cpu_utility > MAX_CPU_UTILITY || cpu_utility < 0) { + CAT_LOG_E("the range of cpu utility is [0,100]"); + return CAT_INVALID_PARAMETER; + } + patrol_para->cpu_utility = cpu_utility; + // 内核一次巡检的用例次数,根据cpu利用率动态调整,至少为1次 + patrol_para->patrol_times = PATROL_TIMES_PER_ITERATION * patrol_para->cpu_utility / g_CONVERT_TO_PERCENTAGE; + patrol_para->patrol_times = (patrol_para->patrol_times == 0 ? 1 : patrol_para->patrol_times); + + return CAT_OK; +} + +static cat_return_t set_patrol_policy(const cpu_patrol_para *patrol_para) +{ + cat_return_t ret = set_patrol_cpumask(patrol_para->cpumask); + if (ret != CAT_OK) { + return ret; + } + ret = set_patrol_times(patrol_para->patrol_times); + if (ret != CAT_OK) { + return ret; + } + + (void)set_patrol_cpu_utility(patrol_para->cpu_utility); + + CAT_LOG_I("Set policy, cpumask[%s], times[%d], utility[%d%%]", patrol_para->cpumask, patrol_para->patrol_times, + patrol_para->cpu_utility); + + return CAT_OK; +} + +static cat_return_t run(const cpu_patrol_para *patrol_para) +{ + CAT_LOG_I("Cpu patrol begin,it will cost about %d(s)", patrol_para->patrol_second); + // 内核巡检基本配置 + cat_return_t ret = set_patrol_policy(patrol_para); + if (ret != CAT_OK) { + return ret; + } + (void) signal(SIGTERM, lib_cpu_patrol_stop); // kill + (void) signal(SIGINT, lib_cpu_patrol_stop); // ctrl+c + // 分多次进行巡检,防止巡检调度进程被杀掉后,内核态的巡检还长时间未结束。内核一次巡检的时长跟PATROL_TIMES_PER_ITERATION成正比 + const int wait_time = 500 * 1000; // 等待500毫秒 + + struct timespec start_timespec; + int start_time_ret = clock_gettime(CLOCK_MONOTONIC, &start_timespec); + if (start_time_ret == -1) { + CAT_LOG_E("get system clock failed"); + return CAT_ERR; + } + struct tm start_tm; + struct tm *start_tm_ret = localtime_r(&start_timespec.tv_sec, &start_tm); + if (start_tm_ret == NULL) { + CAT_LOG_E("convert timestamp to localtime failed"); + return CAT_ERR; + } + time_t start_time = mktime(&start_tm); + + while (!g_stop_flag) { + struct timespec end_timespec; + int end_time_ret = clock_gettime(CLOCK_MONOTONIC, &end_timespec); + if (end_time_ret == -1) { + CAT_LOG_E("get system clock failed"); + return CAT_ERR; + } + struct tm end_tm; + struct tm *end_tm_ret = localtime_r(&end_timespec.tv_sec, &end_tm); + if (end_tm_ret == NULL) { + CAT_LOG_E("convert timestamp to localtime failed"); + return CAT_ERR; + } + time_t end_time = mktime(&end_tm); + double diff_time = difftime(end_time, start_time); + if (diff_time >= patrol_para->patrol_second) { + break; + } + ret = start_patrol(true); + if (ret != CAT_OK) { + break; + } + // 巡检是异步下发的,等待500毫秒,巡检开始执行后,再查询结果 + usleep(wait_time); + // 等待巡检结束,每500毫秒查询一次 + while (is_patrol_running()) { + usleep(wait_time); + } + handle_patrol_result(); + } + return CAT_OK; +} + +cat_return_t start(const char *cpumask, int cpu_utility, int patrol_second, cpu_set_t *cpu_set) +{ + if (is_patrol_running()) { + CAT_LOG_E("AN cpu patrol instance is already running."); + return CAT_ALREADY_RUNNING; + } + set_stop_flag(false); + + // 1、先清空上一次的巡检结果 + clear_patrol_result(); + + // 2、解析巡检参数 + cpu_patrol_para patrol_para = { 0 }; + patrol_para.patrol_times = PATROL_TIMES_PER_ITERATION; + cat_return_t ret = fill_patrol_para(cpumask, cpu_utility, patrol_second, &patrol_para); + if (ret != CAT_OK) { + return ret; + } + + // 3、执行巡检 + ret = run(&patrol_para); + if (ret != CAT_OK) { + return ret; + } + + // 4、返回巡检结果 + return get_patrol_result(cpu_set); +} + +cat_return_t stop(void) +{ + set_stop_flag(true); + return start_patrol(false); +} + +cat_return_t lib_cpu_patrol_start(const char *cpumask, int cpu_utility, const int patrol_second, + cpu_set_t *cpu_set) +{ + // 只允许一个巡检线程 + if (pthread_mutex_trylock(&g_start_mutex) != 0) { + CAT_LOG_E("Resource busy, an cpu patrol instance is already running."); + return CAT_ALREADY_RUNNING; + } + + cat_return_t ret = start(cpumask, cpu_utility, patrol_second, cpu_set); + + if (pthread_mutex_unlock(&g_start_mutex) != 0) { + CAT_LOG_E("pthread_mutex_unlock g_start_mutex failed."); + } + + return ret; +} + +cat_return_t lib_cpu_patrol_stop(void) +{ + puts("system will stop cpu patrol soon"); + // 正在停止巡检,返回成功 + if (pthread_mutex_trylock(&g_stop_mutex) != 0) { + return CAT_OK; + } + + cat_return_t ret = stop(); + + if (pthread_mutex_unlock(&g_stop_mutex) != 0) { + CAT_LOG_E("pthread_mutex_unlock g_stop_mutex failed."); + } + + return ret; +} + + diff --git a/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/cpu_patrol.h b/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/cpu_patrol.h new file mode 100644 index 0000000..f7acd1e --- /dev/null +++ b/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/cpu_patrol.h @@ -0,0 +1,40 @@ +#ifndef CPU_PATROL_H +#define CPU_PATROL_H + +#include +#include "cat_structs.h" +#define MAX_PATROL_TIMES 36000 // 巡检用例执行的次数,最佳效果为不低于36000 +#define PATROL_TIMES_PER_ITERATION 200 // 把MAX_PATROL_TIMES分成多次迭代进行,每次迭代内核执行的巡检用例次数,CPU利用率100%情况下,一次迭代执行约1秒,防呆 +#define DECIMAL_STR_LEN 11 +#define DEFAULT_CPU_UTILITY 50 // 内核巡检进程默认CPU利用率 +#define MIN_CPU_UTILITY 1 // 内核巡检进程最小CPU利用率 +#define MAX_CPU_UTILITY 100 // 内核巡检进程最大CPU利用率 +#define MAX_CPU_LOGIC_CORE 1024 // 最大CPU逻辑核数 +#define KERNEL_INTF_PATH_CPUMASK "/sys/devices/system/cpu/cpuinspect/cpumask" +#define KERNEL_INTF_PATH_PATROL_TIMES "/sys/devices/system/cpu/cpuinspect/patrol_times" +#define KERNEL_INTF_PATH_CPU_UTILITY "/sys/devices/system/cpu/cpuinspect/cpu_utility" + +typedef struct { + const char *cpumask; // cpu core list, eg. 0-7,9-127 + unsigned int cpu_utility; // 内核巡检进程最大cpu利用率 + unsigned int patrol_times; // 内核执行的CPU巡检用例次数 + unsigned int patrol_second; // 持续巡检时间(秒) +} cpu_patrol_para; + +/* + * input: cpumask: 巡检的CPU核范围,格式"55,32,16,1-3,13-17"; + * cpu_utility: 内核巡检进程最大CPU利用率,范围[1-100],若输入值非法,则取默认值DEFAULT_CPU_UTILITY} + * patrol_time: 巡检持续时间(秒),大于1整数 + * output: cpu_set:返回巡检被隔离的核列表 + */ +cat_return_t lib_cpu_patrol_start(const char *cpumask, int cpu_utility, int patrol_time, + cpu_set_t *cpu_set); + +/* + * 停止巡检 + */ +cat_return_t lib_cpu_patrol_stop(void); + + +#endif + diff --git a/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/cpu_patrol_result.c b/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/cpu_patrol_result.c new file mode 100644 index 0000000..6f161df --- /dev/null +++ b/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/cpu_patrol_result.c @@ -0,0 +1,287 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include + +#include "cat_structs.h" +#include "cpu_patrol_result.h" + +core_list_st g_isolated_core_list = { 0 }; + +/* + * 功能说明:把核id插入到数组里面,递增排序 + */ +static cat_return_t insert_core_to_list(core_list_st *core_list, int coreid) +{ + if (coreid == 0) { + CAT_LOG_W("Core %d is a special core and cannot be isolated", coreid); + return CAT_OK; + } + if ((core_list->current_nums == MAX_ISOLATE_CORES_PER_PATROL) || (coreid < 0)) { + CAT_LOG_E("Insert error, core id(%d)", coreid); + return CAT_ERR; + } + + bool if_insert = false; + for (unsigned short i = 0; i < core_list->current_nums; i++) { + if (core_list->order_list[i] < (unsigned int)coreid) { + continue; + } else if (core_list->order_list[i] == (unsigned int)coreid) { + // 已存在,无需重复插入 + if_insert = true; + break; + } + for (unsigned short j = core_list->current_nums - 1; j >= i; j--) { + core_list->order_list[j + 1] = core_list->order_list[j]; + if (j == 0) { + break; + } + } + core_list->order_list[i] = coreid; + core_list->current_nums++; + if_insert = true; + break; + } + + // 在数组末尾插入 + if (!if_insert) { + core_list->order_list[core_list->current_nums] = coreid; + core_list->current_nums++; + } + + return CAT_OK; +} + +/* + * 功能说明:巡检结果“1-3,7,9,12-15”,解析时先按逗号切割,再按短横线切割 + */ +static cat_return_t parse_patrol_result(char *buf, core_list_st *fault_list) +{ + const int number_base = 10; // 字符串转十进制 + char coma_split[] = ","; + char line_split[] = "-"; + while (true) { + char *split = strtok_r(buf, coma_split, &buf); + if (split == NULL) { + break; + } + char *sub_save_ptr = NULL; + char *subSplit = strtok_r(split, line_split, &sub_save_ptr); + int coreid_before = (int) strtol(subSplit, NULL, number_base); + int coreid_after = strcmp(sub_save_ptr, "") == 0 ? -1 : (int) strtol(sub_save_ptr, NULL, 10); + if (coreid_after < 0) { + insert_core_to_list(fault_list, coreid_before); + } else { + for (int i = coreid_before; i <= coreid_after; i++) { + insert_core_to_list(fault_list, i); + } + } + } + return CAT_OK; +} + +static cat_return_t get_result(char *buf, int buf_len) +{ + int fd = open("/sys/devices/system/cpu/cpuinspect/result", O_RDONLY); + if (fd < 0) { + CAT_LOG_E("Open cpu_utility file fail, %s", strerror(errno)); + return CAT_ERR; + } + + int count = read(fd, buf, buf_len); + if (count <= 0) { + CAT_LOG_E("Read error, count %d", count); + close(fd); + return CAT_ERR; + } + buf[count - 1] = '\0'; // read返回包涵换行符‘\n’,把‘\n'换成结束符 + close(fd); + + return CAT_OK; +} + +static int open_cpu_sys_file(unsigned int cpu, int oflag) +{ + char path[MAX_CPU_SYS_FILE_PATH_LEN] = {0}; + int ret = snprintf(path, sizeof(path), CPU_PATH_FORMAT, cpu); + if (ret <= 0) { + CAT_LOG_E("Get cpu sys file path fail, %d", ret); + return -1; + } + + int fd = open(path, oflag); + if (fd < 0) { + CAT_LOG_E("Open cpu sys file fail, %s", strerror(errno)); + return -1; + } + + return fd; +} + +// 获取指定cpu的状态信息(主要是是否在线或离线) +static char get_cpu_core_status(unsigned int cpu) +{ + // 打开online文件 + int fd = open_cpu_sys_file(cpu, O_RDONLY); + if (fd == -1) { + return CPU_STATE_UNKNOWN; + } + char buf = CPU_STATE_UNKNOWN; + if (read(fd, &buf, sizeof(buf)) <= 0) { + CAT_LOG_E("Read cpu state fail"); + close(fd); + return CPU_STATE_UNKNOWN; + } + close(fd); + + return buf; +} + +static cat_return_t do_cpu_core_offline(unsigned int cpu) +{ + int fd = open_cpu_sys_file(cpu, O_RDWR); + if (fd == -1) { + return CAT_ERR; + } + + char buf[2] = ""; + buf[0] = CPU_STATE_OFFLINE; + buf[1] = '\0'; + + ssize_t rc = write(fd, buf, strlen(buf)); + close(fd); + if (rc < 0) { + CAT_LOG_E("CPU%d offline failed, errno:%d", cpu, errno); + return CAT_ERR; + } + /* 检测是否下线成功 */ + if (get_cpu_core_status(cpu) == CPU_STATE_OFFLINE) { + return CAT_OK; + } + + return CAT_ERR; +} + +/* + * 功能说明:隔离巡检故障核,并把成功隔离的故障核添加到隔离列表 + */ +void isolate_cpu_core(core_list_st *isolated_core_list, const core_list_st *fault_list) +{ + unsigned int total_core = sysconf(_SC_NPROCESSORS_CONF); + if (total_core == -1) { + CAT_LOG_E("Get total cpu cores failed."); + return; + } + for (unsigned short i = 0; i < fault_list->current_nums; i++) { + // 0核不隔离 + if ((fault_list->order_list[i] >= total_core) || (fault_list->order_list[i] == 0)) { + CAT_LOG_E("Isolate cpu core failed, invalid core id(%u)", fault_list->order_list[i]); + continue; + } + if (get_cpu_core_status(fault_list->order_list[i]) != CPU_STATE_ONLINE) { + continue; + } + if (do_cpu_core_offline(fault_list->order_list[i]) == CAT_OK) { + (void)insert_core_to_list(isolated_core_list, fault_list->order_list[i]); + CAT_LOG_I(":%d", fault_list->order_list[i]); + } + } +} + +/* + * 功能说明:把列表1,2,3,10,22,23,24转换成"1-3,10,22,24"形式字符串 + */ +static cat_return_t get_core_list_str(const core_list_st *core_list, char *out_str, unsigned short out_str_len) +{ + if (core_list->current_nums == 0) { + *out_str = '\0'; + return CAT_OK; + } + + char buf[PATROL_RESULT_LEN] = {0}; + char tmp_buf[PATROL_RESULT_LEN] = {0}; + unsigned int begin_cpuid = core_list->order_list[0]; + unsigned int end_cpuid = begin_cpuid; + + for (unsigned short i = 1; i < core_list->current_nums; i++) { + if (core_list->order_list[i] == (end_cpuid + 1)) { + end_cpuid++; + continue; + } + + if (begin_cpuid == end_cpuid) { + (void)snprintf(tmp_buf, sizeof(tmp_buf), "%u", begin_cpuid); + } else { + (void)snprintf(tmp_buf, sizeof(tmp_buf), "%u-%u", begin_cpuid, end_cpuid); + } + (void)strncat(buf, tmp_buf, sizeof(buf) - strlen(buf) - 1); + (void)strncat(buf, ",", sizeof(buf) - strlen(buf) - 1); + + end_cpuid = core_list->order_list[i]; + begin_cpuid = end_cpuid; + } + if (begin_cpuid == end_cpuid) { + (void)snprintf(tmp_buf, sizeof(tmp_buf), "%u", begin_cpuid); + } else { + (void)snprintf(tmp_buf, sizeof(tmp_buf), "%u-%u", begin_cpuid, end_cpuid); + } + (void)strncat(buf, tmp_buf, sizeof(buf) - strlen(buf) - 1); + + strncpy(out_str, buf, out_str_len); + return CAT_OK; +} + +static cat_return_t get_core_list(const core_list_st *core_list, cpu_set_t *cpu_set) +{ + if (core_list->current_nums == 0) { + return CAT_OK; + } + + for (unsigned short i = 0; i < core_list->current_nums; i++) { + CPU_SET(core_list->order_list[i], cpu_set); + } + + return CAT_OK; +} + +void handle_patrol_result(void) +{ + char buf[PATROL_RESULT_LEN] = {0}; + if (get_result(buf, PATROL_RESULT_LEN) != CAT_OK) { + return; + } + + if (buf[0] == '\0') { + return; + } + + // 记录巡检过程中发现的故障核 + CAT_LOG_W("Found fault cores:[%s]", buf); + + // 获取巡检故障核 + core_list_st fault_list = { 0 }; + if (parse_patrol_result(buf, &fault_list) != CAT_OK) { + return; + } + + // 隔离巡检故障核,并把成功隔离的故障核添加到隔离列表 + isolate_cpu_core(&g_isolated_core_list, &fault_list); +} + +/* + * 功能说明:返回巡检隔离的故障核列表 + */ +cat_return_t get_patrol_result(cpu_set_t *isolated_cpu_set) +{ + return get_core_list(&g_isolated_core_list, isolated_cpu_set); +} + +void clear_patrol_result(void) +{ + (void)memset(&g_isolated_core_list, 0, sizeof(g_isolated_core_list)); +} + diff --git a/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/cpu_patrol_result.h b/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/cpu_patrol_result.h new file mode 100644 index 0000000..f18d5ce --- /dev/null +++ b/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/cpu_patrol_result.h @@ -0,0 +1,43 @@ +#ifndef CPU_PATROL_RESULT_H +#define CPU_PATROL_RESULT_H + +#include +#include +#include "cat_structs.h" + +#define CPU_PATH_FORMAT "/sys/devices/system/cpu/cpu%d/online" +#define PATROL_RESULT_LEN 512 +#define MAX_CPU_SYS_FILE_PATH_LEN 256 + +typedef enum { + CPU_STATE_OFFLINE = '0', + CPU_STATE_ONLINE = '1', + CPU_STATE_UNKNOWN = '2' +} cpu_core_state; + +#define FILE_NAME(x) ((strrchr(x, '/') == NULL) ? (x) : strrchr(x, '/') + 1) +#define CAT_LOG(level, ...) \ + do { \ + printf("[%s] %s %d %s: ", level, FILE_NAME(__FILE__), __LINE__, __FUNCTION__); \ + printf(__VA_ARGS__); \ + printf("\n"); \ + } while (0) + +#define CAT_LOG_I(...) CAT_LOG("INFO", __VA_ARGS__) +#define CAT_LOG_W(...) CAT_LOG("WARN", __VA_ARGS__) +#define CAT_LOG_E(...) CAT_LOG("ERROR", __VA_ARGS__) + +#define MAX_ISOLATE_CORES_PER_PATROL 64 // 一次巡检最大支持隔离故障核数量,一次巡检同时检测到2个以上故障核的概率非常低 +typedef struct { + unsigned int order_list[MAX_ISOLATE_CORES_PER_PATROL]; + unsigned short current_nums; +} core_list_st; + +void handle_patrol_result(void); +// 返回巡检隔离的故障核列表 +cat_return_t get_patrol_result(cpu_set_t *isolated_cpu_set); +void clear_patrol_result(void); + +#endif + + diff --git a/sysSentry-1.0.2/src/c/catcli/catlib/plugin/mem_patrol/CMakeLists.txt b/sysSentry-1.0.2/src/c/catcli/catlib/plugin/mem_patrol/CMakeLists.txt new file mode 100644 index 0000000..e69de29 -- Gitee