diff --git a/add-huge-page-aggregation.patch b/add-huge-page-aggregation.patch new file mode 100644 index 0000000000000000000000000000000000000000..373ee8558170bdab6fa7b4c921fba6dee19eab05 --- /dev/null +++ b/add-huge-page-aggregation.patch @@ -0,0 +1,246 @@ +From b8fac2aa55b97d3bfe22c5a96f84c98f8f3a2e94 Mon Sep 17 00:00:00 2001 +From: zhuo <1107893276@qq.com> +Date: Tue, 9 Sep 2025 14:57:02 +0800 +Subject: [PATCH] add huge page aggregation + +--- + .../non-standard-hbm-repair.c | 168 +++++++++++++++++- + 1 file changed, 165 insertions(+), 3 deletions(-) + +diff --git a/src/c/hbm_online_repair/non-standard-hbm-repair.c b/src/c/hbm_online_repair/non-standard-hbm-repair.c +index 97cb9a7..cce1e0b 100644 +--- a/src/c/hbm_online_repair/non-standard-hbm-repair.c ++++ b/src/c/hbm_online_repair/non-standard-hbm-repair.c +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + + #include "logger.h" + #include "non-standard-hbm-repair.h" +@@ -510,10 +511,54 @@ static int notice_BMC(const struct hisi_common_error_section *err, uint8_t repai + return 0; + } + +-static int hbmc_hbm_page_isolate(const struct hisi_common_error_section *err) ++static int query_numa_node(unsigned long long phys_addr) ++{ ++ FILE *fp = popen("lsmem --output-all --raw 2>/dev/null", "r"); ++ if (!fp) { ++ log(LOG_ERROR, "lsmem --output-all popen failed"); ++ return -1; ++ } ++ ++ char line[1024]; ++ int found = 0, node; ++ ++ // skip title ++ fgets(line, sizeof(line), fp); ++ ++ while (fgets(line, sizeof(line), fp)) { ++ unsigned long start, end; ++ char range[64], node_str[16]; ++ ++ if (sscanf(line, "%63s %*s %*s %*s %*s %15s", range, node_str) != 2) ++ continue; ++ ++ if (sscanf(range, "%lx-%lx", &start, &end) != 2) ++ continue; ++ ++ if (sscanf(node_str, "%d", &node) != 1) ++ continue; ++ ++ if (phys_addr >= start && phys_addr <= end) { ++ log(LOG_DEBUG, "Physical Address 0x%llX -> NUMA Node: %d\n", phys_addr, node); ++ found = 1; ++ break; ++ } ++ } ++ pclose(fp); ++ ++ if (!found) { ++ log(LOG_WARNING, "NUMA node not found for address 0x%llX\n", phys_addr); ++ return -1; ++ } ++ ++ return node; ++} ++ ++static int hbmc_hbm_page_isolate(const struct hisi_common_error_section *err, int *node_arr, int *out_count) + { + unsigned long long paddr; +- int ret; ++ int ret, node; ++ int local_count = 0; + bool is_acls = err->reg_array[HBM_REPAIR_REQ_TYPE] & (HBM_CE_ACLS | HBM_PSUE_ACLS); + int required_isolate_size = (is_acls ? HBM_ACLS_ADDR_NUM : HBM_SPPR_ADDR_NUM) * DEFAULT_PAGE_SIZE_KB; + int hardware_corrupted_size = get_hardware_corrupted_size(); +@@ -533,6 +578,11 @@ static int hbmc_hbm_page_isolate(const struct hisi_common_error_section *err) + paddr <<= TYPE_UINT32_WIDTH; + paddr += err->reg_array[HBM_ADDL]; + ++ node = query_numa_node(paddr); ++ if (node < 0) { ++ return -1; ++ } ++ node_arr[local_count++] = node; + ret = write_file("/sys/kernel/page_eject", "offline_page", paddr); + if (ret < 0) { + notice_BMC(err, ISOLATE_FAILED_OTHER_REASON); +@@ -547,6 +597,20 @@ static int hbmc_hbm_page_isolate(const struct hisi_common_error_section *err) + paddr = err->reg_array[2 * i + HBM_ADDH]; + paddr <<= TYPE_UINT32_WIDTH; + paddr += err->reg_array[2 * i + HBM_ADDL]; ++ node = query_numa_node(paddr); ++ if (node < 0) { ++ continue; ++ } ++ int exists = 0; ++ for (int j = 0; j < local_count; j++) { ++ if (node_arr[j] == node) { ++ exists = 1; ++ break; ++ } ++ } ++ if (!exists) { ++ node_arr[local_count++] = node; ++ } + ret = write_file("/sys/kernel/page_eject", "offline_page", paddr); + if (ret < 0) { + all_success = false; +@@ -559,6 +623,7 @@ static int hbmc_hbm_page_isolate(const struct hisi_common_error_section *err) + ret = -1; + } + } ++ *out_count = local_count; + return ret < 0 ? ret : 0; + } + +@@ -676,6 +741,72 @@ err: + return type; + } + ++static int open_hugepages(int node, FILE **hugefp) ++{ ++ char path[256]; ++ snprintf(path, sizeof(path), "/sys/devices/system/node/node%d/hugepages/hugepages-2048kB/nr_hugepages", node); ++ ++ *hugefp = fopen(path, "r+"); ++ if (!*hugefp) { ++ log(LOG_ERROR, "failed to open %s hugepages file\n", path); ++ return -1; ++ } ++ ++ return 0; ++} ++ ++static int read_hugepages_value(int node) ++{ ++ FILE *hugefp = NULL; ++ int res = open_hugepages(node, &hugefp); ++ if (res < 0) { ++ return -1; ++ } ++ ++ char buffer[64]; ++ if (fgets(buffer, sizeof(buffer), hugefp) == NULL) { ++ log(LOG_ERROR, "Failed to read\n"); ++ fclose(hugefp); ++ return -1; ++ } ++ ++ size_t len = strlen(buffer); ++ if (len > 0 && buffer[len - 1] == '\n') { ++ buffer[len - 1] = '\0'; ++ } ++ ++ long nr_hugepages = strtol(buffer, NULL, 10); ++ if (nr_hugepages < 0) { ++ log(LOG_ERROR, "Invalid value in %s, errno: %d(%s)\n", buffer, errno, strerror(errno)); ++ fclose(hugefp); ++ return -1; ++ } ++ ++ fclose(hugefp); ++ return nr_hugepages; ++} ++ ++static int write_hugepages_value(int node, int nr_hugepages) ++{ ++ FILE *hugefp = NULL; ++ int res = open_hugepages(node, &hugefp); ++ if (res < 0) { ++ return -1; ++ } ++ ++ int ret = fprintf(hugefp, "%d\n", nr_hugepages); ++ fclose(hugefp); ++ ++ if (ret <= 0) { ++ log(LOG_ERROR, "Failed to write value %d\n", nr_hugepages); ++ return -1; ++ } ++ ++ log(LOG_DEBUG, "Successfully wrote hugepages value %d to node %d\n", nr_hugepages, node); ++ return 0; ++} ++ ++ + static void hbm_repair_handler(const struct hisi_common_error_section *err) + { + log(LOG_DEBUG, "Received ACLS/SPPR flat mode repair request, try to repair\n"); +@@ -686,11 +817,22 @@ static void hbm_repair_handler(const struct hisi_common_error_section *err) + int ret; + bool find_device = false, find_hbm_mem = false, addr_in_hbm_device = false; + +- ret = hbmc_hbm_page_isolate(err); ++ int node_arr[HBM_SPPR_ADDR_NUM]; ++ int out_count, node, original_nr_hugepages, repair_nr_hugepages, aggregated_nr_hugepages; ++ ret = hbmc_hbm_page_isolate(err, node_arr, &out_count); + if (ret < 0) { + return; + } + ++ for (int i = 0; i < out_count; i++) { ++ node = node_arr[i]; ++ original_nr_hugepages = read_hugepages_value(node); ++ if (original_nr_hugepages < 0) { ++ return; ++ } ++ log(LOG_INFO, "original hugepages on %d node is: %d\n", node, original_nr_hugepages); ++ } ++ + dir = opendir(sys_dev_path); + if (!dir) { + log(LOG_WARNING, "Can't read '%s': %s\n", +@@ -730,6 +872,26 @@ static void hbm_repair_handler(const struct hisi_common_error_section *err) + notice_BMC(err, REPAIR_FAILED_INVALID_PARAM); + } + ++ for (int i = 0; i < out_count; i++) { ++ node = node_arr[i]; ++ repair_nr_hugepages = read_hugepages_value(node); ++ if (repair_nr_hugepages < 0) { ++ return; ++ } ++ log(LOG_INFO, "repair hugepages on %d node is: %d\n", node, repair_nr_hugepages); ++ ++ ret = write_hugepages_value(node, repair_nr_hugepages); ++ if (ret < 0) { ++ return; ++ } ++ ++ aggregated_nr_hugepages = read_hugepages_value(node); ++ if (aggregated_nr_hugepages < 0) { ++ return; ++ } ++ log(LOG_INFO, "aggregated hugepages on %d node is: %d\n", node, aggregated_nr_hugepages); ++ } ++ + closedir(dir); + } + +-- +2.43.0 + diff --git a/sysSentry.spec b/sysSentry.spec index f4c7b0ba8545435ed2822e99253a13ab5a9cee04..bad8216ce796ee5b9844e899220bb384fb301355 100644 --- a/sysSentry.spec +++ b/sysSentry.spec @@ -4,7 +4,7 @@ Summary: System Inspection Framework Name: sysSentry Version: 1.0.2 -Release: 35 +Release: 36 License: Mulan PSL v2 Group: System Environment/Daemons Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz @@ -46,6 +46,7 @@ Patch33: ai-block-io-exit-when-stage-is-not-supported.patch Patch34: fix-period-task-some-bugs.patch Patch35: fix-env_file-and-environ_conf.patch Patch36: fix-cpu_sentry-result-when-found_fault_cores_number-.patch +Patch37: add-huge-page-aggregation.patch BuildRequires: cmake gcc-c++ BuildRequires: python3 python3-setuptools @@ -363,6 +364,12 @@ rm -rf %{buildroot} %attr(0550,root,root) %{python3_sitelib}/sentryCollector/__pycache__/collect_plugin* %changelog +* Tue Sep 09 2025 zhuo <1107893276@qq.com> - 1.0.2-36 +- Type:requirement +- CVE:NA +- SUG:NA +- DESC:add huge page aggregation + * Sat May 17 2025 shixuantong - 1.0.2-35 - Type:bugfix - CVE:NA