From 8f4db6a6cc149d85d3d1eb4f3057a9114f56f2a7 Mon Sep 17 00:00:00 2001 From: yu-liang-bin Date: Wed, 6 Aug 2025 16:10:10 +0800 Subject: [PATCH] bug fix memory --- torch_npu/profiler/analysis/_profiler_config.py | 4 +++- torch_npu/profiler/analysis/_profiling_parser.py | 2 ++ .../profiler/analysis/prof_common_func/_constant.py | 1 + .../profiler/analysis/prof_view/_memory_view_parser.py | 10 +++++++--- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/torch_npu/profiler/analysis/_profiler_config.py b/torch_npu/profiler/analysis/_profiler_config.py index aa3f76c986..fc95cf283f 100644 --- a/torch_npu/profiler/analysis/_profiler_config.py +++ b/torch_npu/profiler/analysis/_profiler_config.py @@ -156,7 +156,9 @@ class ProfilerConfig: self._rank_id = info_json.get(Constant.RANK_ID, -1) def load_timediff_info(self, profiler_path: str, info_json: dict): - self._localtime_diff = CANNFileParser(profiler_path).get_localtime_diff() + if (Constant.NPU_ACTIVITIES in info_json.get(Constant.CONFIG, {}).get(Constant.COMMON_CONFIG, {}) + .get(Constant.ACTIVITIES, [])): + self._localtime_diff = CANNFileParser(profiler_path).get_localtime_diff() end_info = info_json.get(Constant.END_INFO, {}) if not self._localtime_diff and end_info: self._localtime_diff = int(end_info.get(Constant.FWK_END_TIME, 0)) - int( diff --git a/torch_npu/profiler/analysis/_profiling_parser.py b/torch_npu/profiler/analysis/_profiling_parser.py index 4458d7559c..809db5e992 100644 --- a/torch_npu/profiler/analysis/_profiling_parser.py +++ b/torch_npu/profiler/analysis/_profiling_parser.py @@ -7,6 +7,7 @@ from .prof_common_func._cann_package_manager import CannPackageManager from .prof_common_func._path_manager import ProfilerPathManager from .prof_common_func._task_manager import ConcurrentTasksManager from .prof_common_func._log import ProfilerLogger +from .prof_common_func._utils import no_exception_func from .prof_config._parser_config import ParserConfig from .prof_parse._cann_file_parser import CANNFileParser from ._profiler_config import ProfilerConfig @@ -82,6 +83,7 @@ class ProfilingParser: if re.match(patten, filename) and os.path.isfile(os.path.join(cann_path, filename)): PathManager.remove_file_safety(os.path.join(cann_path, filename)) + @no_exception_func() def analyse_profiling_data(self): ProfilerLogger.init(self._profiler_path, "ProfilingParser") self.logger = ProfilerLogger.get_instance() diff --git a/torch_npu/profiler/analysis/prof_common_func/_constant.py b/torch_npu/profiler/analysis/prof_common_func/_constant.py index 1a62c54d6f..37abe8cf04 100644 --- a/torch_npu/profiler/analysis/prof_common_func/_constant.py +++ b/torch_npu/profiler/analysis/prof_common_func/_constant.py @@ -102,6 +102,7 @@ class Constant(object): RANK_ID = "rank_id" COMMON_CONFIG = "common_config" ACTIVITIES = "activities" + NPU_ACTIVITIES = "ProfilerActivity.NPU" EXPERIMENTAL_CONFIG = "experimental_config" PROFILER_LEVEL = '_profiler_level' AI_CORE_METRICS = '_aic_metrics' diff --git a/torch_npu/profiler/analysis/prof_view/_memory_view_parser.py b/torch_npu/profiler/analysis/prof_view/_memory_view_parser.py index 47255efd09..1eb390df4c 100644 --- a/torch_npu/profiler/analysis/prof_view/_memory_view_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_memory_view_parser.py @@ -29,6 +29,7 @@ class MemoryViewParser(BaseParser): def __init__(self, name: str, param_dict: dict): super().__init__(name, param_dict) + self._activities = param_dict.get(Constant.ACTIVITIES, []) self.size_record_list = [] self.pta_record_list = [] self.ge_record_list = [] @@ -140,13 +141,16 @@ class MemoryViewParser(BaseParser): """ add ge memory and app memory from cann files """ - npu_app_memory_file_set = CANNFileParser(self._profiler_path).get_file_list_by_type(CANNDataEnum.NPU_MEMORY) + npu_app_memory_file_set = CANNFileParser(self._profiler_path).get_file_list_by_type(CANNDataEnum.NPU_MEMORY) \ + if Constant.NPU_ACTIVITIES in self._activities else set() app_record_data = self._get_data_from_file(npu_app_memory_file_set, NpuMemoryBean) self.size_record_list.extend(app_record_data) self._add_device_type_for_npu() - ge_memory_record_file = CANNFileParser(self._profiler_path).get_file_list_by_type(CANNDataEnum.GE_MEMORY_RECORD) + ge_memory_record_file = CANNFileParser(self._profiler_path).get_file_list_by_type(CANNDataEnum.GE_MEMORY_RECORD) \ + if Constant.NPU_ACTIVITIES in self._activities else set() self.split_component_ge(self._get_data_from_file(ge_memory_record_file, GeMemoryRecordBean, bean_list=True)) - ge_op_memory_file = CANNFileParser(self._profiler_path).get_file_list_by_type(CANNDataEnum.GE_OPERATOR_MEMORY) + ge_op_memory_file = CANNFileParser(self._profiler_path).get_file_list_by_type(CANNDataEnum.GE_OPERATOR_MEMORY) \ + if Constant.NPU_ACTIVITIES in self._activities else set() self.memory_data.extend(self._get_data_from_file(ge_op_memory_file, GeOpMemoryBean)) def _init_pta_data(self): -- Gitee