diff --git a/RELEASE.md b/RELEASE.md index ebcc0d4ee01bef81a4f3d96c3376ad9be0bf987e..f5fe023f4cfc5c9f7a9e31b5973cfe57e25dcf38 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -2,6 +2,61 @@ [查看中文](./RELEASE_CN.md) +## MindSpore Insight 2.3.0 Release Notes + +### Major Features and Improvements + +#### Profiler + +- [STABLE] In PyNative mode, Timeline intergrates host profiling information, including task time and user-side call stack information. + +### Contributors + +Thanks goes to these wonderful people: + +Ning Ma, Jiaxing Zhu, Jiarong Ji, Yanming Miao, Nan Wang, XiaoXian Jin, Qingxiang Zang, DaWei Fan, XinYu Shi, KaiDa Qiu, Wei Zhang, XianQi Zhou, Chen Mao, XiHan Peng. + +Contributions of any kind are welcome! + +## MindSpore Insight 2.3.0-rc2 Release Notes + +### Bug Fixes + +- [I9JTLU] Fixed the "system error" reported on the summary timeline download page. + +### Contributors + +Thanks goes to these wonderful people: + +Ning Ma, Jiaxing Zhu, Jiarong Ji, Yanming Miao, Nan Wang, XiaoXian Jin, Qingxiang Zang, DaWei Fan, XinYu Shi, KaiDa Qiu, Wei Zhang, XianQi Zhou, Chen Mao, XiHan Peng. + +Contributions of any kind are welcome! + +## MindSpore Insight 2.3.0-rc1 Release Notes + +### Major Features and Improvements + +#### Profiler + +- [BETA] Dynamically start and stop profiling. Users can collect profiling data in real time according to the training situation, reducing the amount of data collected. +- [BETA] Profiling the communication operator time-consuming matrix. Users can find cluster communication performance bottlenecks by analyzing the communication operator time-consuming matrix. +- [BETA] Improve the performance of Ascend environment in parsing profiling data. +- [BETA] Supports offline analysis of data generated by Profiling. Users can collect data first and then parse the data as needed. +- [BETA] Supports collecting performance data of HBM, PCIe, and l2_cache to enrich performance analysis indicators. + +#### Dump + +- [BETA] The statistical information saved by Dump records MD5 values, and users can determine small differences in tensor values through MD5 values. +- [BETA] Dump supports the float16 data type and supports users to locate float16 type operator accuracy issues. + +### Contributors + +Thanks goes to these wonderful people: + +Ning Ma, Jiaxing Zhu, Jiarong Ji, Yanming Miao, Nan Wang, XiaoXian Jin, Qingxiang Zang, DaWei Fan, XinYu Shi, KaiDa Qiu, Wei Zhang, XianQi Zhou, Chen Mao, XiHan Peng. + +Contributions of any kind are welcome! + ## MindSpore Insight 2.2.0 Release Notes ### Major Features and Improvements @@ -88,7 +143,7 @@ Ning Ma, Chuting Liu, Jiaxing Zhu, Qingxiang Zang, Yaomin Mao. Contributions of any kind are welcome! -## MindInsight 2.0.0-alpha Release Notes +## MindSpore Insight 2.0.0-alpha Release Notes ### Major Features and Improvements @@ -113,7 +168,7 @@ Ning Ma, Chuting Liu, Jiaxing Zhu, Qingxiang Zang, Yaomin Mao. Contributions of any kind are welcome! -## MindInsight 1.9.0 Release Notes +## MindSpore Insight 1.9.0 Release Notes ### Major Features and Improvements @@ -142,7 +197,7 @@ Special thanks to Zhongwei Wang, Rongchen Zhu, Jiaying Lu, Zhiyong Wang, Yating Contributions of any kind are welcome! -## MindInsight 1.8.0 Release Notes +## MindSpore Insight 1.8.0 Release Notes ### Major Features and Improvements @@ -173,7 +228,7 @@ Special thanks to Zhiyong Wang, Zhongwei Wang, Rusheng Pan, Yating Wei, Luoxuan Contributions of any kind are welcome! -## MindInsight 1.7.0 Release Notes +## MindSpore Insight 1.7.0 Release Notes ### Major Features and Improvements @@ -198,9 +253,9 @@ Special thanks to Zhiyong Wang, Zhongwei Wang, Rusheng Pan, Yating Wei, Luoxuan Contributions of any kind are welcome! -# MindInsight 1.6.0 +# MindSpore Insight 1.6.0 -## MindInsight 1.6.0 Release Notes +## MindSpore Insight 1.6.0 Release Notes ### Major Features and Improvements @@ -241,9 +296,9 @@ Special thanks to Zhiyong Wang, Zhongwei Wang, Rusheng Pan, Yating Wei, Luoxuan Contributions of any kind are welcome! -# MindInsight 1.5.0 +# MindSpore Insight 1.5.0 -## MindInsight 1.5.0 Release Notes +## MindSpore Insight 1.5.0 Release Notes ### Major Features and Improvements @@ -275,7 +330,7 @@ Contributions of any kind are welcome! ##### Command Line Interface -reviously, we don't set memory limit for offline debugger. In order to use offline debugger in limited environment, we provide with memory limit options when start MindInsight server. View the [Offline Debugger Tutorial](https://www.mindspore.cn/mindinsight/docs/zh-CN/r1.5/debugger_offline.html). +reviously, we don't set memory limit for offline debugger. In order to use offline debugger in limited environment, we provide with memory limit options when start MindSpore Insight server. View the [Offline Debugger Tutorial](https://www.mindspore.cn/mindinsight/docs/zh-CN/r1.5/debugger_offline.html). New start command options: @@ -296,9 +351,9 @@ Congli Gao, Longfei Li, Yongxiong Liang, Chongming Liu, Pengting Luo, Yanming Mi Contributions of any kind are welcome! -# MindInsight 1.4.0 +# MindSpore Insight 1.4.0 -## MindInsight 1.4.0 Release Notes +## MindSpore Insight 1.4.0 Release Notes ### Major Features and Improvements @@ -324,9 +379,9 @@ Congli Gao, Longfei Li, Yongxiong Liang, Chongming Liu, Pengting Luo, Yanming Mi Contributions of any kind are welcome! -# MindInsight 1.3.0 +# MindSpore Insight 1.3.0 -## MindInsight 1.3.0 Release Notes +## MindSpore Insight 1.3.0 Release Notes ### Major Features and Improvements @@ -355,7 +410,7 @@ Contributions of any kind are welcome! #### Build & Installation -- [STABLE] Unified MindInsight installation package, supporting multiple Linux distributions, CPU architectures(x86/ARM), and Python versions(3.7/3.8/3.9). +- [STABLE] Unified MindSpore Insight installation package, supporting multiple Linux distributions, CPU architectures(x86/ARM), and Python versions(3.7/3.8/3.9). ### API Change @@ -387,9 +442,9 @@ Congli Gao, Longfei Li, Yongxiong Liang, Chongming Liu, Pengting Luo, Yanming Mi Contributions of any kind are welcome! -# MindInsight 1.2.0 +# MindSpore Insight 1.2.0 -## MindInsight 1.2.0 Release Notes +## MindSpore Insight 1.2.0 Release Notes ### Major Features and Improvements @@ -449,9 +504,9 @@ Congli Gao, Longfei Li, Yongxiong Liang, Chongming Liu, Pengting Luo, Yanming Mi Contributions of any kind are welcome! -# MindInsight 1.1.0 +# MindSpore Insight 1.1.0 -## MindInsight 1.1.0 Release Notes +## MindSpore Insight 1.1.0 Release Notes ### Major Features and Improvements @@ -515,9 +570,9 @@ Congli Gao, Jianfeng Zhu, Zhenzhong Kou, Longfei Li, Yongxiong Liang, Chongming Contributions of any kind are welcome! -# MindInsight 1.0.0 +# MindSpore Insight 1.0.0 -## MindInsight 1.0.0 Release Notes +## MindSpore Insight 1.0.0 Release Notes ### Major Features and Improvements @@ -540,9 +595,9 @@ Congli Gao, Jianfeng Zhu, Zhenzhong Kou, Hongzhang Li, Longfei Li, Yongxiong Lia Contributions of any kind are welcome! -# MindInsight 0.7.0-beta +# MindSpore Insight 0.7.0-beta -## MindInsight 0.7.0 Release Notes +## MindSpore Insight 0.7.0 Release Notes ### Major Features and Improvements @@ -564,9 +619,9 @@ Congli Gao, Weifeng Huang, Zhenzhong Kou, Hongzhang Li, Longfei Li, Yongxiong Li Contributions of any kind are welcome! -# MindInsight 0.6.0-beta +# MindSpore Insight 0.6.0-beta -## MindInsight 0.6.0 Release Notes +## MindSpore Insight 0.6.0 Release Notes ### Major Features and Improvements @@ -589,9 +644,9 @@ Congli Gao, Weifeng Huang, Zhenzhong Kou, Hongzhang Li, Longfei Li, Yongxiong Li Contributions of any kind are welcome! -# MindInsight 0.5.0-beta +# MindSpore Insight 0.5.0-beta -## MindInsight 0.5.0 Release Notes +## MindSpore Insight 0.5.0 Release Notes ### Major Features and Improvements @@ -623,15 +678,15 @@ Chao Chen, Congli Gao, Ye Huang, Weifeng Huang, Zhenzhong Kou, Hongzhang Li, Lon Contributions of any kind are welcome! -# MindInsight 0.3.0-alpha +# MindSpore Insight 0.3.0-alpha -## MindInsight 0.3.0 Release Notes +## MindSpore Insight 0.3.0 Release Notes ### Major Features and Improvements - Profiling - Provide easy to use apis for profiling start/stop and profiling data analyse (on Ascend only). - - Provide operators performance display and analysis on MindInsight UI. + - Provide operators performance display and analysis on MindSpore Insight UI. - Large scale network computation graph visualization. - Optimize summary record implementation and improve its performance. - Improve lineage usability @@ -657,15 +712,15 @@ Chao Chen, Congli Gao, Ye Huang, Weifeng Huang, Zhenzhong Kou, Hongzhang Li, Lon Contributions of any kind are welcome! -# MindInsight 0.2.0-alpha +# MindSpore Insight 0.2.0-alpha -## MindInsight 0.2.0 Release Notes +## MindSpore Insight 0.2.0 Release Notes ### Major Features and Improvements - Parameter distribution graph (Histogram). - Now you can use [`HistogramSummary`](https://www.mindspore.cn/docs/en/master/api_python/mindspore.ops.html#mindspore.ops.HistogramSummary) and MindInsight to record and visualize distribution info of tensors. See our [tutorial](https://www.mindspore.cn/mindinsight/docs/en/master/index.html). + Now you can use [`HistogramSummary`](https://www.mindspore.cn/docs/en/master/api_python/mindspore.ops.html#mindspore.ops.HistogramSummary) and MindSpore Insight to record and visualize distribution info of tensors. See our [tutorial](https://www.mindspore.cn/mindinsight/docs/en/master/index.html). - Lineage support Custom information - GPU support @@ -687,9 +742,9 @@ Ye Huang, Weifeng Huang, Zhenzhong Kou, Pengting Luo, Hongzhang Li, Yongxiong Li Contributions of any kind are welcome! -# MindInsight 0.1.0-alpha +# MindSpore Insight 0.1.0-alpha -## MindInsight 0.1.0 Release Notes +## MindSpore Insight 0.1.0 Release Notes - Training process observation - Provides and displays training process information, including computational graphs and training process indicators. diff --git a/RELEASE_CN.md b/RELEASE_CN.md index 35c8ab4bfd9ae35fef7e4c5691ef6c230165d974..ab08c51905d95cbcf69231a2882bbe5d1fbac2a1 100644 --- a/RELEASE_CN.md +++ b/RELEASE_CN.md @@ -2,7 +2,62 @@ [View English](./RELEASE.md) -## MindSpore MindInsight 2.2.0 Release Notes +## MindSpore Insight 2.3.0 Release Notes + +### 主要特性和增强 + +#### Profiler + +- [STABLE] PyNative模式下Timeline整合host profiling信息,包括任务耗时、用户侧调用栈信息。 + +### 贡献者 + +感谢以下人员做出的贡献: + +Ning Ma, Jiaxing Zhu, Jiarong Ji, Yanming Miao, Nan Wang, XiaoXian Jin, Qingxiang Zang, DaWei Fan, XinYu Shi, KaiDa Qiu, Wei Zhang, XianQi Zhou, Chen Mao, XiHan Peng. + +欢迎以任何形式对项目提供贡献! + +## MindSpore Insight 2.3.0-rc2 Release Notes + +### Bug Fixes + +- [I9JTLU] 修复summary timeline下载页面报"system error"的错误。 + +### 贡献者 + +感谢以下人员做出的贡献: + +Ning Ma, Jiaxing Zhu, Jiarong Ji, Yanming Miao, Nan Wang, XiaoXian Jin, Qingxiang Zang, DaWei Fan, XinYu Shi, KaiDa Qiu, Wei Zhang, XianQi Zhou, Chen Mao, XiHan Peng. + +欢迎以任何形式对项目提供贡献! + +## MindSpore Insight 2.3.0-rc1 Release Notes + +### 主要特性和增强 + +#### Profiler + +- [BETA] 动态启停profiling,用户可以根据训练情况实时采集profiling 数据,减少采集数据量。 +- [BETA] Profiling通信算子耗时矩阵,用户通过分析通信算子耗时矩阵,找出集群通信性能瓶颈。 +- [BETA] 提高昇腾环境解析Profiling数据的性能。 +- [BETA] 支持离线解析Profiling生成的数据,用户可以先采集数据,然后根据需要再解析数据。 +- [BETA] 支持采集HBM、PCIe、l2_cache性能数据,丰富性能分析指标。 + +#### Dump + +- [BETA] Dump保存的统计信息记录MD5值,用户可以通过MD5值确定张量值的微小差异。 +- [BETA] Dump支持bfloat16数据类型,支撑用户定位bfloat16类型的算子精度问题。 + +### 贡献者 + +感谢以下人员做出的贡献: + +Ning Ma, Jiaxing Zhu, Jiarong Ji, Yanming Miao, Nan Wang, XiaoXian Jin, Qingxiang Zang, DaWei Fan, XinYu Shi, KaiDa Qiu, Wei Zhang, XianQi Zhou, Chen Mao, XiHan Peng. + +欢迎以任何形式对项目提供贡献! + +## MindSpore Insight 2.2.0 Release Notes ### 主要特性和增强 @@ -88,7 +143,7 @@ Ning Ma, Chuting Liu, Jiaxing Zhu, Qingxiang Zang, Yaomin Mao. 欢迎以任何形式对项目提供贡献! -## MindInsight 2.0.0-alpha Release Notes +## MindSpore Insight 2.0.0-alpha Release Notes ### 主要特性和增强 @@ -113,7 +168,7 @@ Ning Ma, Chuting Liu, Jiaxing Zhu, Qingxiang Zang, Yaomin Mao. 欢迎以任何形式对项目提供贡献! -## MindInsight 1.9.0 Release Notes +## MindSpore Insight 1.9.0 Release Notes ### 主要特性和增强 @@ -140,7 +195,7 @@ Special thanks to Zhongwei Wang, Rongchen Zhu, Jiaying Lu, Zhiyong Wang, Yating 欢迎以任何形式对项目提供贡献! -## MindInsight 1.8.0 Release Notes +## MindSpore Insight 1.8.0 Release Notes ### 主要特性和增强 @@ -169,7 +224,7 @@ Special thanks to Zhiyong Wang, Zhongwei Wang, Rusheng Pan, Yating Wei, Luoxuan 欢迎以任何形式对项目提供贡献! -## MindInsight 1.7.0 Release Notes +## MindSpore Insight 1.7.0 Release Notes ### 主要特性及改进 diff --git a/mindinsight/backend/profiler/profile_api.py b/mindinsight/backend/profiler/profile_api.py index 8db3eed8302b5671a6ddeab8baacbabb5501e0c2..199f8a1b59592b2f03509835c327b967834851f9 100644 --- a/mindinsight/backend/profiler/profile_api.py +++ b/mindinsight/backend/profiler/profile_api.py @@ -155,8 +155,7 @@ def get_training_trace_graph(): if os.path.exists(profiler_info_file): with open(profiler_info_file, 'r', encoding='utf-8') as file: profiler_info = json.loads(file.read()) - if profiler_info.get("context_mode", "graph").lower() == "pynative" or len(profiler_info.get("graph_ids", - [])) > 1: + if profiler_info.get("context_mode", "graph").lower() == "pynative": return jsonify(graph_info) if profiler_info.get("is_heterogeneous", False): graph_info = {'is_heterogeneous': True} @@ -174,12 +173,6 @@ def get_training_trace_graph(): }}) graph_info['summary'] = analyser.summary graph_info['point_info'] = analyser.point_info(graph_type) - graph_info['is_heterogeneous'] = False - - # In heterogeneous training scene, do not display step trace data. - cpu_op_type_file_name = f"cpu_op_type_info_{device_id}.csv" - if cpu_op_type_file_name in os.listdir(profiler_dir_abs): - graph_info = {'is_heterogeneous': True} return jsonify(graph_info) @@ -584,7 +577,8 @@ def get_msprof_timeline(): rank_list = request.args.get("rank_list", None) model_list = request.args.get("model_list", None) kind = request.args.get("kind", None) - merge_model = request.args.get("merge_model", True) + merge_model = request.args.get("merge_model", 'true') + scope_name = request.args.get("scope_name", 'false') if rank_list: rank_list = [int(rank_id) for rank_id in rank_list.split(',')] @@ -600,10 +594,14 @@ def get_msprof_timeline(): else: merge_model = True + if scope_name == 'false': + scope_name = False + else: + scope_name = True + analyser = AnalyserFactory.instance().get_analyser( 'msprof_timeline', profiler_dir_abs, None) - - timeline = analyser.get_merged_timeline(rank_list, model_list, kind, merge_model) + timeline = analyser.get_merged_timeline(rank_list, model_list, kind, merge_model, scope_name) return jsonify(timeline) @@ -840,12 +838,12 @@ def get_cluster_step_trace_info(): device_id = condition.get("device_id", "0") to_int(device_id, 'device_id') - # In heterogeneous training scene, do not display cluster step trace data. - cpu_op_type_file_name_prefix = "cpu_op_type_info_" - for item in os.listdir(profiler_dir_abs): - if cpu_op_type_file_name_prefix in item: - step_trace_info = {'is_heterogeneous': True} - return jsonify(step_trace_info) + profiler_info_file = os.path.join(profiler_dir_abs, f'profiler_info_{device_id}.json') + if os.path.exists(profiler_info_file): + with open(profiler_info_file, 'r', encoding='utf-8') as file: + profiler_info = json.loads(file.read()) + if profiler_info.get("is_heterogeneous", False): + return jsonify({'is_heterogeneous': True}) analyser = AnalyserFactory.instance().get_analyser( 'cluster_step_trace', profiler_dir_abs, device_id diff --git a/mindinsight/datavisual/data_transform/graph/msgraph.py b/mindinsight/datavisual/data_transform/graph/msgraph.py index 25bafcfaf87851b04b0daea36e9aee418f717acb..5ef65a9ec5b90a0fad8abb9bbcba90454c527439 100644 --- a/mindinsight/datavisual/data_transform/graph/msgraph.py +++ b/mindinsight/datavisual/data_transform/graph/msgraph.py @@ -66,7 +66,7 @@ class MSGraph(Graph): name = f'{node_proto.op_type}-op{node_id}' node_name = Node.create_node_name(node_proto.scope, name) - if node_proto.full_name and node_proto.op_type != NodeTypeEnum.LOAD.value: + if node_proto.full_name: node_name = node_proto.full_name if node_proto.full_name and any( diff --git a/mindinsight/debugger/debugger_services/debugger_offline_server.py b/mindinsight/debugger/debugger_services/debugger_offline_server.py index 5be4b7e5c35d7871d00e4227e553b8d65662f837..c17c62197b3390b88dd26d29912ae2020fe3c51e 100644 --- a/mindinsight/debugger/debugger_services/debugger_offline_server.py +++ b/mindinsight/debugger/debugger_services/debugger_offline_server.py @@ -231,6 +231,7 @@ class DebuggerOfflineManager: try: graphs = self._data_loader.load_graphs(threshold=settings.MAX_GRAPH_NODE_SIZE) except DebuggerNodeTooLarge as err: + self._metadata_stream.max_graph_node_size = settings.MAX_GRAPH_NODE_SIZE self._update_state(ServerStatus.NODE_TOO_LARGE) log.exception(err) return diff --git a/mindinsight/debugger/stream_handler/metadata_handler.py b/mindinsight/debugger/stream_handler/metadata_handler.py index 0215f47fa2db4ba8874d2aadcf8462a767c749bd..61b0e2fd44845b28a5b8ff26de49891db6dbc95f 100644 --- a/mindinsight/debugger/stream_handler/metadata_handler.py +++ b/mindinsight/debugger/stream_handler/metadata_handler.py @@ -40,6 +40,7 @@ class MetadataHandler(StreamHandlerBase): # maximum step number among all devices self._max_step_num = 0 self._debugger_type = DebuggerServerMode.ONLINE.value + self.max_graph_node_size = 0 @property def debugger_type(self): @@ -220,10 +221,12 @@ class MetadataHandler(StreamHandlerBase): 'graph_name': self.graph_name, 'recommendation_confirmed': self._recommendation_confirmed, 'debugger_version': self.debugger_version, - 'data_version': self.data_version + 'data_version': self.data_version, } if self.debugger_type == 'offline': metadata['total_step_num'] = self.max_step_num + if self.state == ServerStatus.NODE_TOO_LARGE.value: + metadata['max_graph_node_size'] = self.max_graph_node_size else: if not isinstance(filter_condition, list): filter_condition = [filter_condition] diff --git a/mindinsight/profiler/analyser/analyser.py b/mindinsight/profiler/analyser/analyser.py index e27e26f0f12a5ab3d0b4ed3c83a3850d50c6c855..75f8c2aee23bb590d98e176535ae6f445094d02f 100644 --- a/mindinsight/profiler/analyser/analyser.py +++ b/mindinsight/profiler/analyser/analyser.py @@ -35,7 +35,7 @@ class AicoreTypeAnalyser(BaseAnalyser): Raises: ProfilerPathErrorException: If the profiling dir is invalid. """ - _col_names = ['op_type', 'total_time', 'execution_frequency', 'total_percent', 'avg_time'] + _col_names = ['kernel_type', 'total_time', 'execution_frequency', 'total_percent', 'avg_time'] _file_name_aicore_type_time = 'aicore_intermediate_{}_type.csv' def _load(self): @@ -111,8 +111,8 @@ class AicoreDetailAnalyser(BaseAnalyser): Raises: ProfilerPathErrorException: If the profiling dir is invalid. """ - _col_names = ['op_name', 'op_type', 'avg_execution_time', 'execution_frequency', 'MFLOPs(10^6 cube)', - 'GFLOPS(10^9 cube)', 'MFLOPs(10^6 vector)', 'GFLOPS(10^9 vector)', 'full_op_name', 'op_info'] + _col_names = ['op_name', 'kernel_name', 'kernel_type', 'avg_execution_time', 'execution_frequency', + 'MFLOPs(10^6 cube)', 'GFLOPS(10^9 cube)', 'MFLOPs(10^6 vector)', 'GFLOPS(10^9 vector)', 'op_info'] _file_name_aicore_detail_time = 'aicore_intermediate_{}_detail.csv' _file_name_flops = 'flops_{}.txt' _file_name_framework_info = 'framework_raw_{}.csv' @@ -141,8 +141,13 @@ class AicoreDetailAnalyser(BaseAnalyser): self._filter(filter_condition) type_detail_cache = {} + is_display_full_op_name = filter_condition.get( + 'is_display_full_op_name', True + ) + kernel_type_idx = 2 if is_display_full_op_name else 1 + avg_exec_time_idx = 3 if is_display_full_op_name else 2 for detail_info in self._result: - op_type = detail_info[1] + op_type = detail_info[kernel_type_idx] if op_type not in op_type_order: continue infos = type_detail_cache.get(op_type) @@ -156,7 +161,7 @@ class AicoreDetailAnalyser(BaseAnalyser): detail_infos = type_detail_cache.get(op_type) if detail_infos is None: continue - detail_infos.sort(key=lambda item: item[2], reverse=True) + detail_infos.sort(key=lambda item: item[avg_exec_time_idx], reverse=True) result.extend(detail_infos) return { @@ -239,9 +244,9 @@ class AicoreDetailAnalyser(BaseAnalyser): return self._default_filter(item, filter_condition) def _inner_map(item: list): - inner_item = item[0:8] + inner_item = item[1:9] if is_display_full_op_name: - inner_item.append(item[8]) + inner_item.insert(0, item[0]) if is_display_detail: inner_item.append(item[9]) return inner_item @@ -268,9 +273,9 @@ class AicoreDetailAnalyser(BaseAnalyser): is_display_full_op_name (bool): Whether to display the operator full name. """ - self._display_col_names = self._col_names[0:8] + self._display_col_names = self._col_names[1:9] if is_display_full_op_name: - self._display_col_names.append(self._col_names[8]) + self._display_col_names.insert(0, self._col_names[0]) if is_display_detail: self._display_col_names.append(self._col_names[9]) @@ -285,8 +290,8 @@ class AicoreDetailAnalyser(BaseAnalyser): Returns: list[Union[str, float]], the converted data. """ - return [row[3], row[4], row[5], row[6], - json.loads(row[7]) if row[7] else None] + return [row[4], row[5], row[6], row[7], + json.loads(row[8]) if row[8] else None] def _get_op_detail_info(self, row, framework_infos, flops_infos): """ @@ -303,21 +308,21 @@ class AicoreDetailAnalyser(BaseAnalyser): framework_info = framework_infos.get(row[0]) flops_info = flops_infos.get(row[0], ['-', '-', '-', '-']) if len(flops_info) > 3: - return [framework_info[1], framework_info[2], + return [framework_info[0], framework_info[1], framework_info[2], self._format_float_data(float(row[1]) * self._ms_to_us), self._format_float_data(int(row[2])), self._format_float_data(flops_info[0]), self._format_float_data(flops_info[1]), self._format_float_data(flops_info[2]), self._format_float_data(flops_info[3]), - framework_info[0], framework_info[4]] - return [framework_info[1], framework_info[2], + framework_info[4]] + return [framework_info[0], framework_info[1], framework_info[2], self._format_float_data(float(row[1]) * self._ms_to_us), self._format_float_data(int(row[2])), self._format_float_data(flops_info[0]), self._format_float_data(flops_info[1]), self._format_float_data(flops_info[2]), - framework_info[3], framework_info[0], framework_info[4]] + framework_info[3], framework_info[4]] class AicpuTypeAnalyser(BaseAnalyser): @@ -332,7 +337,7 @@ class AicpuTypeAnalyser(BaseAnalyser): Raises: ProfilerPathErrorException: If the profiling dir is invalid. """ - _col_names = ['op_type', 'total_time', 'execution_frequency', 'percent'] + _col_names = ['kernel_type', 'total_time', 'execution_frequency', 'percent'] _file_name_aicpu_time = 'aicpu_intermediate_{}.csv' def _load(self): @@ -404,7 +409,7 @@ class AicpuDetailAnalyser(BaseAnalyser): Raises: ProfilerPathErrorException: If the profiling dir is invalid. """ - _col_names = ['op_name', 'op_type', 'avg_execution_time', 'dispatch_time', + _col_names = ['kernel_name', 'kernel_type', 'avg_execution_time', 'dispatch_time', 'execution_frequency'] _file_name_aicpu_time = 'aicpu_intermediate_{}.csv' diff --git a/mindinsight/profiler/analyser/cluster_analyser.py b/mindinsight/profiler/analyser/cluster_analyser.py index f97a61dbfbed959b29dcbc04d4c669ac4fe4f1ab..0ce85b08d6975b5d4496c1a17abc3489091e51d0 100644 --- a/mindinsight/profiler/analyser/cluster_analyser.py +++ b/mindinsight/profiler/analyser/cluster_analyser.py @@ -172,10 +172,11 @@ class ClusterStepTraceAnalyser(ClusterAnalyser): # step_trace_info[7]: fp_and_bp time # step_trace_info[8]: tail time # divided by 1e5, the unit becomes a millisecond + iter_total_time = round(float(step_trace_info[3]) / 1e5, 4) iteration_interval = round(float(step_trace_info[6]) / 1e5, 4) fp_and_bp = round(float(step_trace_info[7]) / 1e5, 4) tail = round(float(step_trace_info[8]) / 1e5, 4) - step_trace_info = [iteration_interval, fp_and_bp, tail] + step_trace_info = [iteration_interval, fp_and_bp, tail, iter_total_time] return step_trace_info def _get_cluster_step_bottleneck_info(self, step_num, stage_id): diff --git a/mindinsight/profiler/analyser/msprof_timeline_analyser.py b/mindinsight/profiler/analyser/msprof_timeline_analyser.py index 9e3057c379a8b18191bfc75f9af2b556ac679662..736cb6205f58202381f937867df3a3a03d4e0317 100644 --- a/mindinsight/profiler/analyser/msprof_timeline_analyser.py +++ b/mindinsight/profiler/analyser/msprof_timeline_analyser.py @@ -13,35 +13,32 @@ # limitations under the License. # ============================================================================ """The Timeline Analyser.""" +import csv import json import os import glob +import re +import time +from decimal import Decimal +from concurrent.futures import ThreadPoolExecutor from marshmallow import ValidationError from mindinsight.profiler.analyser.base_analyser import BaseAnalyser from mindinsight.profiler.common.log import logger -from mindinsight.profiler.common.validator.validate_path import validate_and_normalize_path -def get_absolute_ts_start_info(pro_path) -> float: +def get_diff_time(rank_id, prof_path): """ Get difference time between ranks """ - start_json = None - for root, _, files in os.walk(pro_path): - for file in files: - if "start_info" in file and ".done" not in file: - start_json = os.path.join(root, file) - break - if start_json: - with open(start_json, "r+") as f: - info = json.load(f) - ts_us = float(info.get("collectionTimeBegin", 0)) - ts_ns = float(info.get("clockMonotonicRaw", 0)) - if not ts_us and not ts_ns: - return 0 - return ts_us - ts_ns / 1000 - return 0 + profiler_info_file = os.path.join(prof_path, os.pardir, f'profiler_info_{rank_id}.json') + if not os.path.exists(profiler_info_file): + return Decimal(0).quantize(Decimal('0.000')) + + with open(profiler_info_file, 'r') as fr: + diff_time = json.load(fr).get('diff_time', 0) + + return Decimal(diff_time).quantize(Decimal('0.000')) def get_rank_id_from_info_json(pro_path): @@ -77,14 +74,14 @@ def get_timeline_info(prof_dirs): timeline_info = {} for prof_path in prof_dirs: - rank_id, device_id = get_rank_id_from_info_json(prof_path) - ts_difference_us = get_absolute_ts_start_info(prof_path) + rank_id, _ = get_rank_id_from_info_json(prof_path) + ts_difference_us = get_diff_time(rank_id, prof_path) if rank_id is None: logger.warning('Could not find the rank id in %s, ignore this file.', prof_path) continue if rank_id not in timeline_info or (rank_id in timeline_info and prof_path > timeline_info.get(rank_id)[0]): - prof_path = os.path.join(prof_path, f'device_{device_id}') + prof_path = os.path.join(prof_path, 'mindstudio_profiler_output') timeline_info[rank_id] = (prof_path, ts_difference_us) return timeline_info @@ -92,17 +89,27 @@ def get_timeline_info(prof_dirs): def get_job_dir(parent_path): job_path_list = glob.glob(fr'{parent_path}/PROF_*_*') - timeline_info = get_timeline_info(job_path_list) - return timeline_info + return get_timeline_info(job_path_list) + +def get_newest_file(file_list): + ''' + Find the newest files + :param file_list: + :return: + ''' + newest_file_list = [] + newest_timestamp = '0' + for file_path in file_list: + timestamp = file_path.split('.')[0].split('/')[-1].split('_')[-1] + newest_timestamp = max(timestamp, newest_timestamp) -def get_newest_file(file_list, split_num=4): - new_file_list = {} for file_path in file_list: - key = '_'.join(file_path.split('/')[-1].split('_')[:split_num]) - if key not in new_file_list or new_file_list[key] < file_path: - new_file_list[key] = file_path - return list(new_file_list.values()) + if file_path.split('.')[0].split('/')[-1].split('_')[-1] == newest_timestamp: + newest_file_list.append(file_path) + + newest_file_list.sort() + return newest_file_list class MsprofTimelineAnalyser(BaseAnalyser): @@ -110,130 +117,148 @@ class MsprofTimelineAnalyser(BaseAnalyser): Analyse timeline data from file. """ - def _load(self): - """Load data according to the parsed profiling files.""" - - def _filter(self, filter_condition): + def __init__(self, profiling_dir, device_id=None): + super(MsprofTimelineAnalyser, self).__init__(profiling_dir, device_id) + self.top_scope_name = ('Default', 'Gradients', 'recompute_Default') + self.step_trace_index = 1 + self.cann_index = 2 + self.scope_index = 3 + self.ascend_hardware_index = 4 + self.hccl_index = 5 + self.cpu_index = 6 + self.overlap_index = 7 + + def get_merged_timeline(self, rank_list, model_list, kind, merge_model=True, scope_name=False): """ - Filter the profiling data according to the filter condition. - - Args: - filter_condition (dict): The filter condition. + Get the merged timeline """ - def _parse_step_trace_merge_model(self, raw_data, model_list): - """ - Get step trace by merge models - """ - pid = None - tids = {} - for event in raw_data: - if event.get('name') == 'process_name' and event.get("ph") == "M": - pid = event.get('pid') - elif event.get('name') == 'thread_name' and event.get("ph") == "M": - arg_name = event.get('args', {}).get('name') - if not model_list or (arg_name and int(arg_name.split(':')[-1].strip()) in model_list): - tids[event.get('tid')] = arg_name - return pid, tids + # get all job path, like PROF_* + sub_dirs = get_job_dir(self._profiling_dir) - def _parse_step_trace_not_merge_model(self, raw_data, model_list): - """ - Get step trace by not merge models - """ - tids = [] - for event in raw_data: - if event.get('name') == 'thread_name' and event.get("ph") == "M": - arg_name = event.get('args', {}).get('name') - if not model_list or (arg_name and int(arg_name.split(':')[-1].strip()) in model_list): - tids.append(event.get('tid')) - return tids + if rank_list: + new_sub_dirs = {} + for key, value in sub_dirs.items(): + if key in rank_list: + new_sub_dirs[key] = value + sub_dirs = new_sub_dirs - def _parse_step_trace_data(self, step_trace_file, difference_ts, model_list, merge_model): - """ - parse step trace data - """ + if not sub_dirs: + logger.error('Could not found any rank from %s', rank_list) + return [] + + if kind == 'summary': + start = time.time() + summary_data = self._get_summary_timeline_data(sub_dirs, merge_model) + logger.info("Summary timeline time consuming: %s", time.time() - start) + return summary_data + + if kind == 'detail': + start = time.time() + detail_data = self._get_detail_timeline_data(sub_dirs, model_list, merge_model, scope_name) + logger.info("Detail timeline time consuming: %s", time.time() - start) + return detail_data + return [] + + def parse_cpu_timeline(self, file_list, rank_id, difference_ts, scope_name): + """Load cpu operator data from file""" + ms_to_us = 1e3 + ps_to_ns = 1e-3 + new_pid = int(f'{self.cpu_index}{rank_id}') + process_list = [{"name": "process_name", + "pid": new_pid, + "args": { + "name": f"CPU OP Rank{rank_id}" + }, + "ph": "M" + }, {"name": "process_sort_index", "pid": new_pid, + "args": {"sort_index": self.cpu_index}, "ph": "M"} + ] + tid_set = set() + thread_list = [] + new_timeline = [] + scope_data = [] try: - step_trace_file = validate_and_normalize_path( - step_trace_file, raise_key='Invalid timeline path, could not found step trace file.' - ) flags = os.O_RDONLY - with os.fdopen(os.open(step_trace_file, flags, 0o200), 'r') as fr: - raw_data = json.load(fr) - - new_events = [] - if merge_model: - pid, tids = self._parse_step_trace_merge_model(raw_data, model_list) - - if not pid: - logger.error('Could not found process_name pid. method: _parse_step_trace_data') - return [] - - process_meta = { - "name": "process_name", - "pid": pid, - "tid": 0, - "args": { - "name": "Step Trace" - }, - "ph": "M" - } - - thread_meta = { - "name": "thread_name", - "pid": pid, - "tid": pid, - "args": { - "name": "iterations" - }, - "ph": "M" - } - - new_events = [process_meta, thread_meta] - for event in raw_data: - if event.get('ph') == 'M' or event.get('tid') not in tids: - continue - - event_name = event.get('name').strip() - if event_name.startswith('Iteration') and len(event_name.split(' ')) == 2: - event['name'] = f"{tids.get(event.get('tid'))} {event_name}" - - if difference_ts and event.get('ts'): - event['ts'] += difference_ts - - event['tid'] = pid - - new_events.append(event) - - else: - tids = self._parse_step_trace_not_merge_model(raw_data, model_list) - - for event in raw_data: - if (event.get('name') == 'process_name' and event.get("ph") == "M") or \ - event.get('tid') in tids: - if difference_ts and event.get('ts'): - event['ts'] += difference_ts - new_events.append(event) + for file_path in file_list: + with os.fdopen(os.open(file_path, flags, 0o400), 'r') as fr: + for line in fr: + op_list = line.strip().split(';') + op_full_name = op_list[0] + time_arr = op_list[-1] + time_arr = time_arr.split(" ") + for time_str in time_arr: + ts, dur, tid = time_str.split(",") + ts = Decimal(ts).quantize(Decimal('0.000')) * Decimal(ps_to_ns).quantize(Decimal('0.000')) + + if scope_name and op_full_name and op_full_name.startswith(self.top_scope_name): + te = ts + Decimal(dur).quantize(Decimal('0.000')) + scope_data.append((op_full_name.split('/')[:-1], ts, te)) + + ts -= difference_ts + + if int(tid) not in tid_set: + tid_set.add(int(tid)) + thread_list.append({"name": "thread_name", + "pid": new_pid, + "tid": int(tid), + "ph": "M", + 'args': {'name': f'thread {tid}'} + }) + + new_timeline.append({'name': op_list[0], + 'pid': new_pid, + 'tid': int(tid), + 'ph': 'X', + 'ts': str(ts), + 'dur': float(dur) * ms_to_us, + 'args': + {'type': op_list[1]} + }) + break - return new_events - except ValidationError as err: - logger.error('parse_step_trace_data failed! please theck. detail: %s', err) - raise ValidationError from err + return process_list + thread_list + new_timeline, scope_data - except (IOError, OSError, json.JSONDecodeError) as err: - logger.error('parse_step_trace_data failed! please theck. detail: %s', err) + except (ValidationError, IOError, OSError, json.JSONDecodeError) as err: + logger.error('parse_cann_data failed! please theck. detail: %s', err) return [] - def _parse_overlap_analysis_data(self, file_list, difference_ts): + def get_option(self): + """ + Get the option values + """ + # get all job path, like PROF_* + sub_dirs = get_job_dir(self._profiling_dir) + rank_list = list(sub_dirs.keys()) + rank_list.sort() + + _, model_merged = self._get_models(sub_dirs) + model_list = list(model_merged) + model_list.sort() + + return {'rank_list': rank_list, 'model_list': model_list} + + def _load(self): + """Load data according to the parsed profiling files.""" + + def _filter(self, filter_condition): + """ + Filter the profiling data according to the filter condition. + + Args: + filter_condition (dict): The filter condition. + """ + + def _parse_overlap_analysis_data(self, file_list, rank_id, difference_ts): """ parse overlap analysis data """ try: - file_list = [validate_and_normalize_path( - file_path, raise_key='Invalid timeline path, could not found msprof json file.' - ) for file_path in file_list] flags = os.O_RDONLY - with os.fdopen(os.open(file_list[0], flags, 0o200), 'r') as fr: - raw_data = json.load(fr) + raw_data = [] + for file_path in file_list: + with os.fdopen(os.open(file_path, flags, 0o400), 'r') as fr: + raw_data.extend(json.load(fr)) pid = None for event in raw_data: @@ -243,374 +268,507 @@ class MsprofTimelineAnalyser(BaseAnalyser): break if not pid: - logger.error('Could not found process_name pid. method: _parse_overlap_analysis_data') + logger.warning('Could not found process_name pid. method: _parse_overlap_analysis_data') return [] - process_name = { - "name": "process_name", - "pid": pid, - "tid": 0, - "args": { - "name": "Overlap Analysis" - }, - "ph": "M" - } + new_events = [] + new_pid = int(f'{self.overlap_index}{rank_id}') + for event in raw_data: + if event.get('pid') != pid: + continue - thread_name = [{ - "name": "thread_name", - "pid": pid, - "tid": 0, - "args": { - "name": "Computing" - }, - "ph": "M" - }, { - "name": "thread_name", - "pid": pid, - "tid": 1, - "args": { - "name": "Communication" - }, - "ph": "M" - }, { - "name": "thread_name", - "pid": pid, - "tid": 2, - "args": { - "name": "Communication(Not Overlapped)" - }, - "ph": "M" - }, { - "name": "thread_name", - "pid": pid, - "tid": 3, - "args": { - "name": "Free" - }, - "ph": "M" - }] - new_events = [process_name] + thread_name - - tid_mapper = { - 'Computing': 0, - 'Communication': 1, - 'Communication(Not Overlapped)': 2, - 'Free': 3 - } - - for msprof_file in file_list: - flags = os.O_RDONLY - with os.fdopen(os.open(msprof_file, flags, 0o200), 'r') as fr: - raw_data = json.load(fr) - - for event in raw_data: - if event.get('ph') == 'M': - continue - - if event.get('name') in tid_mapper: - event['pid'] = pid - event['tid'] = tid_mapper.get(event.get('name')) - - if difference_ts and event.get('ts'): - event['ts'] += difference_ts - new_events.append(event) - return new_events + if event.get('name') == 'process_name' and event.get("ph") == "M": + event["args"]["name"] += f" Rank{rank_id}" - except ValidationError as err: - logger.error('parse_overlap_analysis_data failed! please theck. detail: %s', err) - raise ValidationError from err + if event.get('name') == 'process_sort_index' and event.get("ph") == "M": + event["args"]["sort_index"] = self.overlap_index - except (IOError, OSError, json.JSONDecodeError) as err: + event['pid'] = new_pid + if event.get('ts'): + ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) + ts += difference_ts + event['ts'] = str(ts) + + new_events.append(event) + + return new_events + + except (ValidationError, IOError, OSError, json.JSONDecodeError) as err: logger.error('parse_overlap_analysis_data failed! please theck. detail: %s', err) return [] - def _parse_ascend_hardware_data(self, file_list, difference_ts): + def _parse_step_trace_metadata(self, raw_data, model_list): """ - parse ascend hardware data + Get step trace by merge models """ - try: - file_list = [validate_and_normalize_path( - file_path, raise_key='Invalid timeline path, could not found task json file.' - ) for file_path in file_list] - flags = os.O_RDONLY - with os.fdopen(os.open(file_list[0], flags, 0o200), 'r') as fr: - raw_data = json.load(fr) + pattern1 = re.compile(r'Step Trace\(Model ID:(\d)+\)') + pattern2 = re.compile(r'(\d)+') + tid_mapper = {} + pid = None + for event in raw_data: + if event.get("ph") != "M": + continue - pid = None - tid_mapper = {} - for event in raw_data: - if event.get('name') == 'process_name' and event.get("ph") == "M" and \ - event.get('args').get('name') == 'Ascend Hardware': - pid = event.get('pid') + if event.get('name') == 'process_name': + pid = event.get('pid') + continue + + if event.get('name') == 'thread_name': + arg_name = event.get('args', {}).get('name') + arg_name = re.search(pattern1, arg_name) + if not arg_name: + continue + model_id = re.search(pattern2, arg_name.group()) + if not model_id: + continue + model_id = model_id.group() + tid = event.get('tid') + if not model_list or int(model_id) in model_list: + tid_mapper[tid] = f'Model {model_id}' + + return tid_mapper, pid + + def _parse_step_trace_merge(self, old_pid, new_pid, rank_id, raw_data, tid_mapper, difference_ts): + """merge step trace data""" + new_events = [{ + "name": "process_name", + "pid": new_pid, + "args": { + "name": f"Step Trace Rank{rank_id}" + }, + "ph": "M" + }, { + "name": "process_sort_index", + "pid": new_pid, + "args": { + "sort_index": self.step_trace_index + }, + "ph": "M" + }, { + "name": "thread_name", + "pid": new_pid, + "tid": 0, + "args": { + "name": "iterations" + }, + "ph": "M" + }] - if event.get('name') == 'thread_name' and event.get("ph") == "M" and \ - 'Stream' in event.get('args').get('name'): - thread_name = event.get('args').get('name') - if event.get('tid') not in tid_mapper: - tid_mapper[event.get('tid')] = thread_name + for event in raw_data: + arg_name = tid_mapper.get(event.get('tid')) + if event.get('ph') == 'M' or event.get('pid') != old_pid or not arg_name: + continue + + event_name = event.get('name').strip() + if event.get('ph') == 'X' and event_name.startswith('Iteration') and len( + event_name.split(' ')) == 2: + event['name'] = f"{arg_name} {event_name}" + + if event.get('ts'): + ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) + ts -= difference_ts + event['ts'] = str(ts) + event['pid'] = new_pid + event['tid'] = 0 + new_events.append(event) + return new_events + + def _parse_step_trace_not_merge(self, old_pid, new_pid, rank_id, raw_data, difference_ts): + """not merge step trace data""" + new_events = [] + for event in raw_data: + if event.get('pid') != old_pid: + continue + if event.get('name') == 'process_name' and event.get('ph') == 'M': + event['args']['name'] = f"Step Trace Rank{rank_id}" + elif event.get('name') == 'process_sort_index' and event.get('ph') == 'M': + event['args']['sort_index'] = self.step_trace_index + + event['pid'] = new_pid + if event.get('ts'): + ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) + ts -= difference_ts + event['ts'] = str(ts) + new_events.append(event) + return new_events + + def _parse_step_trace_data(self, file_list, rank_id, difference_ts, model_list, merge_model): + """ + parse step trace data + """ + try: + flags = os.O_RDONLY + raw_data = [] + for file_path in file_list: + with os.fdopen(os.open(file_path, flags, 0o400), 'r') as fr: + raw_data.extend(json.load(fr)) + tid_mapper, pid = self._parse_step_trace_metadata(raw_data, model_list) if not pid: - logger.error('Could not found process_name pid. method: _parse_ascend_hardware_data') + logger.error('Could not found process_name pid. method: _parse_step_trace_data') return [] - process_name = { - "name": "process_name", - "pid": pid, - "tid": 0, - "args": { - "name": "Ascend Hardware" - }, - "ph": "M" - } - - thread_name_list = [{ - "name": "thread_name", - "pid": pid, - "tid": k, - "args": { - "name": v - }, - "ph": "M" - } for k, v in tid_mapper.items()] + new_pid = int(f'{self.step_trace_index}{rank_id}') - new_events = [process_name] + thread_name_list + if merge_model: + return self._parse_step_trace_merge(pid, new_pid, rank_id, raw_data, tid_mapper, difference_ts) - for msprof_file in file_list: - with open(msprof_file, 'r') as fr: - raw_data = json.load(fr) + return self._parse_step_trace_not_merge(pid, new_pid, rank_id, raw_data, difference_ts) - for event in raw_data: + except (ValidationError, IOError, OSError, json.JSONDecodeError) as err: + logger.error('parse_step_trace_data failed! please theck. detail: %s', err) + return [] - if event.get('ph') == 'M': - continue + def _parse_msprof_metadata(self, new_pid_hardware, raw_data): + """ + Get msprof by merge models + """ + tid_mapper_hardware = {} + pid_hardware = None + pid_hccl = None + pid_cann = None + pid_overlap = None + for event in raw_data: + if event.get('name') == 'process_name' and event.get("ph") == "M" and \ + event.get('args').get('name') == 'Ascend Hardware': + pid_hardware = event.get('pid') - event['pid'] = pid + elif event.get('name') == 'thread_name' and event.get("ph") == "M" and \ + 'Stream' in event.get('args').get('name'): + event['pid'] = new_pid_hardware + tid_mapper_hardware.update({event.get('tid'): event}) - if difference_ts and event.get('ts'): - event['ts'] += difference_ts - new_events.append(event) + elif event.get('name') == 'process_name' and event.get("ph") == "M" and \ + event.get('args').get('name') == 'HCCL': + pid_hccl = event.get('pid') - return new_events + elif event.get('name') == 'process_name' and event.get("ph") == "M" and \ + event.get('args').get('name') == 'CANN': + pid_cann = event.get('pid') - except ValidationError as err: - logger.error('parse_ascend_hardware_data failed! please theck. detail: %s', err) - raise ValidationError from err + elif event.get('name') == 'process_name' and event.get("ph") == "M" and \ + event.get('args').get('name') == 'Overlap Analysis': + pid_overlap = event.get('pid') - except (IOError, OSError, json.JSONDecodeError) as err: - logger.error('parse_ascend_hardware_data failed! please theck. detail: %s', err) - return [] + result = (pid_hardware, tid_mapper_hardware, pid_hccl, pid_cann, pid_overlap) + return result - def _parse_hccl_data(self, file_list, difference_ts): + def _parse_msprof_raw_data(self, raw_data, difference_ts, tid_mapper_hardware, model_list, scope_name, **kwargs): """ - parse hccl data + Parse the msprof raw data """ - try: - file_list[0] = validate_and_normalize_path( - file_list[0], raise_key='Invalid timeline path, could not found hccl json file.' - ) - flags = os.O_RDONLY - with os.fdopen(os.open(file_list[0], flags, 0o200), 'r') as fr: - raw_data = json.load(fr) - - pid = None - tid_mapper = {} - for event in raw_data: - if event.get('name') == 'process_name' and event.get("ph") == "M" and \ - event.get('args').get('name') == 'HCCL': - pid = event.get('pid') - elif event.get('name') == 'thread_name' and event.get("ph") == "M" and \ - ('Plane' in event.get('args').get('name') or 'Communication' in event.get('args').get('name')) \ - and event.get('tid') not in tid_mapper: - tid_mapper[event.get('tid')] = event.get('args').get('name') + new_events_hardware = [] + new_events_hccl = [] + new_events_cann = [] + new_events_overlap = [] - if not pid: - logger.error('Could not found process_name pid. method: _parse_hccl_data') - return [] + scope_data = [] - process_name = { + for event in raw_data: + model_id = event.get('args', {}).get('Model Id') + is_process = event.get('ph') == 'M' \ + and (event.get('name') == 'process_name' or event.get('name') == 'process_sort_index') + if is_process or (model_list and model_id not in model_list): + continue + + op_full_name = event.get('name') + if scope_name and op_full_name and op_full_name.startswith(self.top_scope_name): + ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) + te = ts + Decimal(event.get('dur')).quantize(Decimal('0.000')) + scope_data.append((op_full_name.split('/')[:-1], ts, te)) + + if event.get('ts'): + ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) + ts -= difference_ts + event['ts'] = str(ts) + + if event.get('pid') == kwargs.get('pid_hardware') and event.get('ph') != 'M' \ + and event.get('tid') in tid_mapper_hardware: + event['pid'] = kwargs.get('new_pid_hardware') + new_events_hardware.append(event) + + elif event.get('pid') == kwargs.get('pid_hccl'): + event['pid'] = kwargs.get('new_pid_hccl') + new_events_hccl.append(event) + + elif not model_list and event.get('pid') == kwargs.get('pid_cann'): + event['pid'] = kwargs.get('new_pid_cann') + new_events_cann.append(event) + + elif not model_list and event.get('pid') == kwargs.get('pid_overlap'): + event['pid'] = kwargs.get('new_pid_overlap') + new_events_overlap.append(event) + + return new_events_hardware + new_events_hccl + new_events_cann + new_events_overlap, scope_data + + def _parse_msprof_data(self, file_list, rank_id, difference_ts, model_list, scope_name): + """ + parse ascend hardware and hccl and cann data + """ + flags = os.O_RDONLY + raw_data = [] + + new_pid_hardware = int(f'{self.ascend_hardware_index}{rank_id}') + new_pid_hccl = int(f'{self.hccl_index}{rank_id}') + new_pid_cann = int(f'{self.cann_index}{rank_id}') + new_pid_overlap = int(f'{self.overlap_index}{rank_id}') + new_metadata = [ + { "name": "process_name", - "pid": pid, - "tid": 0, + "pid": new_pid_hardware, "args": { - "name": "HCCL" + "name": f"Ascend Hardware Rank{rank_id}" }, "ph": "M" - } - - thread_name_list = [{ - "name": "thread_name", - "pid": pid, - "tid": k, + }, {"name": "process_sort_index", "pid": new_pid_hardware, + "args": {"sort_index": self.ascend_hardware_index}, "ph": "M"}, + { + "name": "process_name", + "pid": new_pid_hccl, + "args": { + "name": f"HCCL Rank{rank_id}" + }, + "ph": "M" + }, {"name": "process_sort_index", "pid": new_pid_hccl, + "args": {"sort_index": self.hccl_index}, "ph": "M"}, + { + "name": "process_name", + "pid": new_pid_cann, "args": { - "name": v + "name": f"CANN Rank{rank_id}" }, "ph": "M" - } for k, v in tid_mapper.items()] + }, {"name": "process_sort_index", "pid": new_pid_cann, + "args": {"sort_index": self.cann_index}, "ph": "M"}, + { + "name": "process_name", + "pid": new_pid_overlap, + "args": { + "name": f"Overlap Analysis Rank{rank_id}" + }, + "ph": "M" + }, {"name": "process_sort_index", "pid": new_pid_overlap, + "args": {"sort_index": self.overlap_index}, "ph": "M"} + ] - new_events = [process_name] + thread_name_list + try: + for file_path in file_list: + with os.fdopen(os.open(file_path, flags, 0o400), 'r') as fr: + raw_data.extend(json.load(fr)) + + pid_hardware, tid_mapper_hardware, pid_hccl, pid_cann, pid_overlap \ + = self._parse_msprof_metadata(new_pid_hardware, raw_data) - for hccl_file in file_list: - hccl_file = validate_and_normalize_path( - hccl_file, raise_key='Invalid timeline path, could not found hccl json file.' - ) - flags = os.O_RDONLY - with os.fdopen(os.open(hccl_file, flags, 0o200), 'r') as fr: - raw_data = json.load(fr) + is_pid_valid = not pid_hardware and not pid_hccl and pid_cann and pid_overlap - for event in raw_data: - if event.get('ph') == 'M': - continue - event['pid'] = pid + if is_pid_valid: + logger.error('Could not found process_name pid. method: _parse_msprof_data') + return [] - if difference_ts and event.get('ts'): - event['ts'] += difference_ts - new_events.append(event) + pid_dict = {'pid_hardware': pid_hardware, 'pid_hccl': pid_hccl, + 'pid_cann': pid_cann, 'pid_overlap': pid_overlap, + 'new_pid_hardware': new_pid_hardware, 'new_pid_hccl': new_pid_hccl, + 'new_pid_cann': new_pid_cann, 'new_pid_overlap': new_pid_overlap} - return new_events + new_events, scope_data = self._parse_msprof_raw_data(raw_data, difference_ts, tid_mapper_hardware, + model_list, scope_name, **pid_dict) + return new_metadata + list(tid_mapper_hardware.values()) + new_events, scope_data - except ValidationError as err: - logger.error('parse_hccl_data failed! please theck. detail: %s', err) - raise ValidationError from err + except (ValidationError, IOError, OSError, json.JSONDecodeError) as err: + logger.error('_parse_msprof_data failed! please theck. detail: %s', err) + return [] - except (IOError, OSError, json.JSONDecodeError) as err: - logger.error('parse_hccl_data failed! please theck. detail: %s', err) + def _parse_scope_info(self, scope_data, rank_id, difference_ts): + """parse scope layer""" + if not scope_data: return [] + new_pid = int(f'{self.scope_index}{rank_id}') + scope_data.sort(key=lambda x: x[1]) + process_list = [ + {"name": "process_name", + "pid": new_pid, + "args": { + "name": f"Scope Layer Rank{rank_id}" + }, + "ph": "M"}, + {"name": "process_sort_index", + "pid": new_pid, + "args": {"sort_index": self.scope_index}, + "ph": "M"} + ] - def _get_summary_timeline_data(self, sub_dirs, model_list, merge_model): + new_events = [] + layer_stack = [] + for layer_name in scope_data[0][0]: + layer_stack.append([layer_name, scope_data[0][1], scope_data[0][2]]) + + for op in scope_data[1:]: + if op[1] < layer_stack[0][2]: + # 并行算子只保留前面的 + continue + flag = True # 判断上层是否合并, 上层不合并下层也不合并 + for layer_depth, layer_name in enumerate(op[0]): + if layer_depth >= len(layer_stack): + layer_stack.append([layer_name, op[1], op[2]]) + else: + if layer_stack[layer_depth][0] == layer_name and flag: + layer_stack[layer_depth][2] = op[2] # 合并 + else: + ts = layer_stack[layer_depth][1] + ts -= difference_ts + new_events.append({ + "name": layer_stack[layer_depth][0], + "pid": new_pid, + "tid": layer_depth, + "ph": "X", + "ts": str(ts), + "dur": float(layer_stack[layer_depth][2] - layer_stack[layer_depth][1]) + }) + layer_stack[layer_depth] = [layer_name, op[1], op[2]] + flag = False + + thread_list = [] + for index, layer in enumerate(layer_stack): + thread_list.extend([{ + "name": "thread_name", + "pid": new_pid, + "tid": index, + "args": { + "name": f"layer{index}" + }, + "ph": "M" + }, { + "name": "thread_sort_index", + "pid": new_pid, + "tid": index, + "args": {"sort_index": index}, + "ph": "M" + }]) + if layer: + ts = layer[1] + ts -= difference_ts + new_events.append({ + "name": layer[0], + "pid": new_pid, + "tid": index, + "ph": "X", + "ts": str(ts), + "dur": float(layer[2] - layer[1]) + }) + + return process_list + thread_list + new_events + + def _get_summary_timeline_data(self, sub_dirs, merge_model): """ Get summary timeline Returns: json, the content of timeline data. """ - timeline_data = {} - for rank_id, (job_dir, difference_ts) in sub_dirs.items(): - data_list = [] - - # get step trace - step_trace_file_name = fr'{job_dir}/timeline/step_trace_*_*_*.json' - file_list = glob.glob(step_trace_file_name) - if not file_list: - logger.error('Could not find step trace file in %s/device_%s/timeline', job_dir, rank_id) - else: - data_list.extend(self._parse_step_trace_data(file_list[0], difference_ts, model_list, merge_model)) - - # get overlap analysis - file_list = [] - if model_list: - for model_id in model_list: - overlap_file_name = fr'{job_dir}/timeline/msprof_*_{model_id}_*.json' - file_list.extend(glob.glob(overlap_file_name)) - else: - overlap_file_name = fr'{job_dir}/timeline/msprof_*_*_*.json' - file_list.extend(glob.glob(overlap_file_name)) - - if not file_list: - logger.error('Could not find overlap analysis file in %s/device_%s/timeline', job_dir, rank_id) - else: - data_list.extend(self._parse_overlap_analysis_data(get_newest_file(file_list), difference_ts)) - - timeline_data[rank_id] = data_list + task_list = [] + timeline_data = [] + with ThreadPoolExecutor() as pool: + for rank_id, (job_dir, difference_ts) in sub_dirs.items(): + + # get step trace + step_trace_file_name = fr'{job_dir}/step_trace_*.json' + file_list = get_newest_file(glob.glob(step_trace_file_name)) + + if not file_list: + logger.warning('Could not find step trace file in %s', job_dir) + else: + task_list.append(pool.submit(self._parse_step_trace_data, file_list, + rank_id, difference_ts, None, + merge_model)) + + # get overlap analysis + overlap_file_name = fr'{job_dir}/msprof_*.json' + file_list = get_newest_file(glob.glob(overlap_file_name)) + + if not file_list: + logger.warning('Could not find overlap analysis file in %s', job_dir) + else: + task_list.append(pool.submit(self._parse_overlap_analysis_data, file_list, + rank_id, difference_ts)) + + all_done = list(range(len(task_list))) + while all_done: + for ind, t in enumerate(task_list): + if ind in all_done and t.done(): + timeline_data.extend(t.result()) + all_done.remove(ind) return timeline_data - def _get_detail_timeline_data(self, sub_dirs, model_list, merge_model): + def _get_detail_timeline_data(self, sub_dirs, model_list, merge_model, scope_name): """ Get detail timeline Returns: json, the content of timeline data. """ - # get summary timeline data. include step_trace data and overlap data - summary_data = self._get_summary_timeline_data(sub_dirs, model_list, merge_model) - - timeline_data = {} - for rank_id, (job_dir, difference_ts) in sub_dirs.items(): - data_list = [] - - # get Ascend Hardware - file_list_hardware = [] - # get hccl - file_list_hccl = [] - - if model_list: - for model_id in model_list: - hardware_file_name = fr'{job_dir}/timeline/task_time_*_{model_id}_*.json' - file_list_hardware.extend(glob.glob(hardware_file_name)) - - hccl_file_name = fr'{job_dir}/timeline/hccl_*_{model_id}_*.json' - file_list_hccl.extend(glob.glob(hccl_file_name)) - else: - hardware_file_name = fr'{job_dir}/timeline/task_time_*_*_*.json' - file_list_hardware.extend(glob.glob(hardware_file_name)) - - hccl_file_name = fr'{job_dir}/timeline/hccl_*_*_*.json' - file_list_hccl.extend(glob.glob(hccl_file_name)) - - if not file_list_hardware: - logger.error('Could not find ascend hardware file in %s/device_%s/timeline', job_dir, rank_id) - else: - data_list.extend(self._parse_ascend_hardware_data(get_newest_file(file_list_hardware, 5), - difference_ts)) - - if not file_list_hccl: - logger.error('Could not find hccl file in %s/device_%s/timeline', job_dir, rank_id) - else: - data_list.extend(self._parse_hccl_data(get_newest_file(file_list_hccl), difference_ts)) - - timeline_data[rank_id] = data_list - - detail_data = {} - for rank_id, data_d in timeline_data.items(): - data_s = summary_data.get(rank_id) - detail_data[rank_id] = data_s + data_d - - return detail_data - - def _merge_timeline(self, timeline_data): - """ - merge all timeline data - """ - new_events = [] - for rank_id, events in timeline_data.items(): - - for event in events: - # 区分不同rank的同一进程的pid - event["pid"] = int(''.join(x for x in str(event.get("pid")) if x.isdigit()) + str(rank_id)) - - # 进程名加上rank_id区分不同rank - if event.get("name") == "process_name" and event.get("ph") == "M": - event["args"]["name"] += f" rank{rank_id}" - new_events.append(event) - return new_events - - def get_merged_timeline(self, rank_list, model_list, kind, merge_model=True): - """ - Get the merged timeline - """ - - # get all job path, like PROF_* - sub_dirs = get_job_dir(self._profiling_dir) + timeline_data = [] + task_list = [] - if rank_list: - new_sub_dirs = {} - for key, value in sub_dirs.items(): - if key in rank_list: - new_sub_dirs[key] = value - sub_dirs = new_sub_dirs - - if not sub_dirs: - logger.error('Could not found any rank from %s', rank_list) - return [] - - if kind == 'summary': - summary_data = self._get_summary_timeline_data(sub_dirs, model_list, merge_model) - return self._merge_timeline(summary_data) + _, model_merged = self._get_models(sub_dirs) + model_list_all = list(model_merged) + if model_list_all: + model_list_all.sort() + if model_list: + model_list.sort() + if model_list_all == model_list: + model_list = None + + with ThreadPoolExecutor() as pool: + for rank_id, (job_dir, difference_ts) in sub_dirs.items(): + all_scope_data = [] # 所有带scope的算子 + + # get step_trace data + step_trace_file_name = fr'{job_dir}/step_trace_*.json' + file_list_step_trace = get_newest_file(glob.glob(step_trace_file_name)) + if not file_list_step_trace: + logger.warning('Could not find step trace file in %s', job_dir) + else: + task_list.append(pool.submit(self._parse_step_trace_data, file_list_step_trace, + rank_id, difference_ts, model_list, merge_model)) + + # get Ascend Hardware 、Hccl、CANN、overlap + msprof_file_name = fr'{job_dir}/msprof_*.json' + file_list_msprof = get_newest_file(glob.glob(msprof_file_name)) + if not file_list_msprof: + logger.warning('Could not find msprof file in %s', job_dir) + else: + ascend_timeline, scope_data = self._parse_msprof_data(file_list_msprof, + rank_id, difference_ts, model_list, + scope_name) + timeline_data.extend(ascend_timeline) + all_scope_data.extend(scope_data) + + if not model_list: + # get cpu op + cpu_op_file_name = fr'{self._profiling_dir}/cpu_op_execute_timestamp_{rank_id}.txt' + file_list = glob.glob(cpu_op_file_name) + + if not file_list: + logger.warning('Could not find cpu op file in %s', job_dir) + else: + cpu_timeline, scope_data = self.parse_cpu_timeline(get_newest_file(file_list), + rank_id, difference_ts, scope_name) + timeline_data.extend(cpu_timeline) + all_scope_data.extend(scope_data) + + # parse scope info + task_list.append(pool.submit(self._parse_scope_info, all_scope_data, + rank_id, difference_ts)) + + all_done = list(range(len(task_list))) + while all_done: + for ind, t in enumerate(task_list): + if ind in all_done and t.done(): + timeline_data.extend(t.result()) + all_done.remove(ind) - if kind == 'detail': - detail_data = self._get_detail_timeline_data(sub_dirs, model_list, merge_model) - return self._merge_timeline(detail_data) - return [] + return timeline_data def _get_models(self, sub_dirs): """ @@ -619,29 +777,18 @@ class MsprofTimelineAnalyser(BaseAnalyser): model_dict = {} model_merged = set() for rank_id, (job_dir, _) in sub_dirs.items(): - step_trace_file_name = fr'{job_dir}/timeline/step_trace_*_*_*.json' - file_list = glob.glob(step_trace_file_name) + step_trace_file_name = fr'{job_dir}/step_trace_*.csv' + file_list = get_newest_file(glob.glob(step_trace_file_name)) + if not file_list: + continue model_set = set() - for file_name in file_list: - last_name = file_name.rsplit('/', maxsplit=1)[-1] - last_name_suffix = last_name.split(f'step_trace_')[-1] - model_id = last_name_suffix.split('_')[1] - model_set.add(int(model_id)) + with open(file_list[0], 'r', newline='') as fr: + reader = csv.DictReader(fr, delimiter=',', quotechar='"') + for row in reader: + model_id = row.get('Model ID') + if model_id: + model_set.add(float(model_id)) + model_dict[rank_id] = model_set model_merged.update(model_set) return model_dict, model_merged - - def get_option(self): - """ - Get the option values - """ - # get all job path, like PROF_* - sub_dirs = get_job_dir(self._profiling_dir) - rank_list = list(sub_dirs.keys()) - rank_list.sort() - - _, model_merged = self._get_models(sub_dirs) - model_list = list(model_merged) - model_list.sort() - - return {'rank_list': rank_list, 'model_list': model_list} diff --git a/mindinsight/profiler/analyser/timeline_processor.py b/mindinsight/profiler/analyser/timeline_processor.py index 33589ed1231b9e12fef2d477010b343a3d811e42..b85801e98ff83ddc1493f6526860bcfabdcc53ed 100644 --- a/mindinsight/profiler/analyser/timeline_processor.py +++ b/mindinsight/profiler/analyser/timeline_processor.py @@ -45,22 +45,24 @@ class TimelineService: operator_time_maps = {} for device_name, cur_op_nodes in self.op_nodes.items(): - step_start = 0 + step_start = 0.0 step_end = float('inf') for item in self.all_data.get(device_name): if item['name'] == step: - step_start = item['ts'] - step_end = item['ts'] + item['dur'] + step_start = float(item['ts']) + step_end = step_start + float(item['dur']) break operator_time_maps[device_name] = {} for item in cur_op_nodes: - if step_start < item['ts'] < step_end or\ - item['dur'] < step_start < item['ts'] + item['dur']: - operator_time_maps.get(device_name)[item['name']] = {"st": item['ts'],\ - "ed": item['ts'] + item['dur'], "dur": item['dur']} - max_time = max(max_time, item['ts'] + item['dur']) - min_time = min(min_time, item['ts']) + ts = float(item['ts']) + dur = float(item['dur']) + if step_start < ts < step_end or dur < step_start < ts + dur: + operator_time_maps.get(device_name)[item['name']] = {"st": ts, + "ed": ts + dur, + "dur": dur} + max_time = max(max_time, ts + dur) + min_time = min(min_time, ts) def cmp(a): return int(re.search(r'\d+', a[0]).group()) @@ -80,10 +82,10 @@ class TimelineService: aggre_node['st_min'] = min(aggre_node.get('st_min'), node.get('st')) aggre_node['ed_max'] = max(aggre_node.get('ed_max'), node.get('ed')) aggre_node['ed_min'] = min(aggre_node.get('ed_min'), node.get('ed')) - aggre_node['st_avg'] = (node['st'] + aggre_node['st_avg'] *\ - aggre_node['n']) / (aggre_node['n'] + 1) - aggre_node['ed_avg'] = (node['ed'] + aggre_node['ed_avg'] *\ - aggre_node['n']) / (aggre_node['n'] + 1) + aggre_node['st_avg'] = (node['st'] + aggre_node['st_avg'] * + aggre_node['n']) / (aggre_node['n'] + 1) + aggre_node['ed_avg'] = (node['ed'] + aggre_node['ed_avg'] * + aggre_node['n']) / (aggre_node['n'] + 1) aggre_node['n'] += 1 else: data[node_name] = { @@ -96,8 +98,8 @@ class TimelineService: 'n': 1 } stage_data[stage_name] = {"data": data, "devices": device_names} - TimelineData = collections.namedtuple('TimelineData',\ - ['operator_time_maps', 'min_time', 'max_time', 'stage_data']) + TimelineData = collections.namedtuple('TimelineData', + ['operator_time_maps', 'min_time', 'max_time', 'stage_data']) timeline_data = TimelineData(operator_time_maps, min_time, max_time, stage_data) return timeline_data @@ -151,8 +153,9 @@ class TimelineService: minn = float('inf') for item in cur_one_step_op: if 'name' in item and 'AllReduce' in item['name']: - if item['ts'] < minn: - minn = item['ts'] + ts = float(item['ts']) + if ts < minn: + minn = ts min_all_reduce = item if min_all_reduce == '': continue @@ -162,8 +165,8 @@ class TimelineService: for item in one_step_op.get(device_name2): if item['name'] == min_all_reduce['name']: visited.add(device_name2) - min_all_reduce['ed'] = min_all_reduce['ts'] + min_all_reduce['dur'] - item['ed'] = item['ts'] + item['dur'] + min_all_reduce['ed'] = float(min_all_reduce['ts']) + float(min_all_reduce['dur']) + item['ed'] = float(item['ts']) + float(item['dur']) self.align_info[device_name2] = min_all_reduce['ed'] - item['ed'] stages.append(device_name2) break @@ -174,7 +177,7 @@ class TimelineService: for device_name, cur_data in self.all_data.items(): for item in cur_data: if 'ts' in item: - item['ts'] += self.align_info.get(device_name) + item['ts'] = float(item['ts']) + self.align_info.get(device_name) self.stage_device_map = {} def cmp(a): @@ -196,15 +199,15 @@ class TimelineService: ret = {} for device_name, cur_op_nodes in self.op_nodes.items(): ret[device_name] = [] - step_start = 0 + step_start = 0.0 step_end = float('inf') for item in self.all_data.get(device_name): if item['name'] == step: - step_start = item['ts'] - step_end = item['ts'] + item['dur'] + step_start = float(item['ts']) + step_end = step_start + float(item['dur']) break for item in cur_op_nodes: - if item['ts'] > step_start and item['ts'] < step_end: + if step_start < float(item['ts']) < step_end: ret.get(device_name).append(item) return ret @@ -218,16 +221,16 @@ class TimelineService: ret = {} for device_name, cur_data in self.all_data.items(): ret[device_name] = [] - step_start = 0 + step_start = 0.0 step_end = float('inf') for item in cur_data: if item['name'] == step: - step_start = item['ts'] - step_end = item['ts'] + item['dur'] + step_start = float(item['ts']) + step_end = step_start + float(item['dur']) break for item in cur_data: if 'scope_level' in item: - if item['ts'] > step_start and item['ts'] < step_end: + if step_start < float(item['ts']) < step_end: ret.get(device_name).append(item) return ret @@ -247,7 +250,7 @@ def _find_scope(cur_scope_by_level, op): l = 0 r = len(intervals) - 1 ans = -1 - t = op['ts'] + t = float(op['ts']) while l <= r: mid = (l + r) >> 1 if intervals[mid][0] <= t: @@ -255,6 +258,6 @@ def _find_scope(cur_scope_by_level, op): l = mid + 1 else: r = mid - 1 - if ans != -1 and intervals[ans][0] + intervals[ans][1] >= op['ts'] + op['dur']: + if ans != -1 and intervals[ans][0] + intervals[ans][1] >= float(op['ts']) + float(op['dur']): return cur_scope, True return "", False diff --git a/mindinsight/profiler/common/util.py b/mindinsight/profiler/common/util.py index 0101d1bfe7c6b29184d9225bd2f63de0af3a5d01..a339010f0939a04446a6929aae33c231dab99a82 100644 --- a/mindinsight/profiler/common/util.py +++ b/mindinsight/profiler/common/util.py @@ -251,6 +251,25 @@ def get_profile_data_version(profiler_dir): return {} +def get_all_export_flag(profiler_dir): + """ get the flag what means whether support all-export mode. """ + + profile_info_pattern = re.compile(r"profiler_info_(\d+).json") + profile_info_file = None + for f_name in os.listdir(profiler_dir): + re_match = re.match(profile_info_pattern, f_name) + if re_match: + profile_info_file = re_match.group() + break + if profile_info_file: + full_path = os.path.join(profiler_dir, profile_info_file) + with open(full_path, 'r') as fr: + data = json.load(fr) + return data.get('all_export', False) + + return False + + def get_parallel_message(profiler_dir): """get the parallel message""" diff --git a/mindinsight/profiler/proposer/allproposers/parallel_proposer.py b/mindinsight/profiler/proposer/allproposers/parallel_proposer.py index 3f291ffd7303462d98833d8488ca233a6d0db02e..296cc5fe8c47ad696e8c8463f964c80a0b0d3d44 100644 --- a/mindinsight/profiler/proposer/allproposers/parallel_proposer.py +++ b/mindinsight/profiler/proposer/allproposers/parallel_proposer.py @@ -116,7 +116,10 @@ class ParallelProposer(Proposer): percent = 0.0 for step_interval in step_intervals: # get how much more than the average - proportion = step_interval["step_interval"] / avg_step_interval - 1 + if avg_step_interval == 0: + proportion = 0.0 + else: + proportion = step_interval["step_interval"] / avg_step_interval - 1 if proportion > self._step_interval_threshold and proportion > percent: rank_id = step_interval["rank_id"] val = step_interval["step_interval"] diff --git a/mindinsight/ui/src/components/operator-unit.vue b/mindinsight/ui/src/components/operator-unit.vue index 39efec02f55b7b966aa1baf3cf1e36a7216ed058..965f1110a19277560daa05ce40281598ae266e45 100644 --- a/mindinsight/ui/src/components/operator-unit.vue +++ b/mindinsight/ui/src/components/operator-unit.vue @@ -734,7 +734,7 @@ export default { }, pageTotal: 0, op_filter_condition: { - op_type: { + kernel_type: { in: [k[0]], }, }, diff --git a/mindinsight/ui/src/locales/en-us.json b/mindinsight/ui/src/locales/en-us.json index 3382a5b7d4fd2e51a0bd9262f869215fa9cdd94f..f4e53a4101268fdcdb8e449d2ab2935afcc3e967 100644 --- a/mindinsight/ui/src/locales/en-us.json +++ b/mindinsight/ui/src/locales/en-us.json @@ -308,7 +308,9 @@ "classificationOperator": "Type", "card": " ", "searchByType": "Enter operator type", + "searchByKernelType": "Enter kernel type", "searchByName": "Enter operator name", + "searchByKernelName": "Enter kernel name", "operatorInfo": "Operator", "kernelInfo": "Kernel", "searchByCoreName": "Enter kernel name", @@ -326,6 +328,7 @@ "titleText": "Profiling - Single-host", "profilingDashboard": "Profiling Dashboard", "showAverage": "Average value", + "iterTotalTime": "Step Total Time", "iterationGapTime": "Step Interval", "fpBpTime": "Forward and Backward Propagation", "fpTime": "Forward Propagation", @@ -630,6 +633,7 @@ "modelList": "subgraphs: ", "kind": "data kind: ", "mergeModel": "merge subgraphs: ", + "scopeName": "split op scope layer: ", "select": "default select all", "flopsScopeTipOne": "Node movement: Drag the scope node to move it to the specified position.", "flopsScopeTipTwo": "Reset: Restore the graphics to the Initial state.", diff --git a/mindinsight/ui/src/locales/zh-cn.json b/mindinsight/ui/src/locales/zh-cn.json index e1bff9e4a7d4c667da31ecc19c92e81a5a8fc608..4f2b4f4b27006784c40143d8da981e9dea6d44ed 100644 --- a/mindinsight/ui/src/locales/zh-cn.json +++ b/mindinsight/ui/src/locales/zh-cn.json @@ -307,7 +307,9 @@ "classificationOperator": "分类", "card": "卡", "searchByType": "请输入算子类型搜索", + "searchByKernelType": "请输入Kernel类型搜索", "searchByName": "请输入算子名称搜索", + "searchByKernelName": "请输入Kernel名称搜索", "searchByCoreName": "请输入内核名称搜索", "searchByCoreFullName": "请输入算子全名搜索", "operatorInfo": "算子信息", @@ -325,6 +327,7 @@ "titleText": "性能分析 - 单机", "profilingDashboard": "性能看板", "showAverage": "展示平均值", + "iterTotalTime": "迭代总耗时", "iterationGapTime": "迭代间隙时间", "fpBpTime": "前向+反向时间", "fpTime": "前向时间", @@ -630,6 +633,7 @@ "modelList": "子图:", "kind": "数据类型:", "mergeModel": "合并多子图:", + "scopeName": "切分算子scope层级:", "select": "默认选择全部:", "flopsScopeTipOne": "节点移动:通过拖动scope节点,将其移动到指定位置。", "flopsScopeTipTwo": "重置:将图形恢复到初始状态。", diff --git a/mindinsight/ui/src/mixins/debugger-mixin.vue b/mindinsight/ui/src/mixins/debugger-mixin.vue index 188f53fd7c264eb28ad306c5c86e4297add6671a..63ca4895125f87139eae10b5709af1627ac51737 100644 --- a/mindinsight/ui/src/mixins/debugger-mixin.vue +++ b/mindinsight/ui/src/mixins/debugger-mixin.vue @@ -624,6 +624,7 @@ export default { if (res.data.metadata.state === this.state.node_too_large) { this.dialogVisible = true; this.nodeDataIsLarge = true; + this.maxGraphNodeSize = res.data.metadata.max_graph_node_size; return; } this.dealMetadata(res.data.metadata); diff --git a/mindinsight/ui/src/views/profiling-gpu/single/profiling-dashboard.vue b/mindinsight/ui/src/views/profiling-gpu/single/profiling-dashboard.vue index 356cf169fdd5e23cd300c1e1374f4a07300c131a..600ae84b58e3c7a45b56dcdcd26485783b2cfd99 100644 --- a/mindinsight/ui/src/views/profiling-gpu/single/profiling-dashboard.vue +++ b/mindinsight/ui/src/views/profiling-gpu/single/profiling-dashboard.vue @@ -714,7 +714,6 @@ export default { this.queryTimelineInfo(); this.initPieChart(); this.getProccessSummary(); - // this.queryTrainingTrace(); if(this.isDynamic){ this.$nextTick(() => { this.initDynamicShape(); @@ -1077,7 +1076,6 @@ export default { } } else { this.svg.totalHeight = 0; - // this.svg.noData = true; this.svg.data = []; this.svg.initOver = true; this.removeTrace(); @@ -1500,7 +1498,7 @@ export default { step_filter: ["1"], }, }; - let details = [];// + let details = []; let series = []; let legend = []; let ssChart = []; @@ -1591,7 +1589,7 @@ export default { dispaly_op_type: this.topOperatorValueGPU, }, }; - let details = [];// + let details = []; let series = []; let legend = []; RequestService.queryDynamicShapeGPU(params).then( @@ -1635,9 +1633,7 @@ export default { details.push(content) } ); - // this.getFormatterDetailData(row,isSort); } - // this.operatorOptions.xAxis.data = series[0].data.map((_v, i) => i + 1); this.operatorOptions.series = series; this.operatorOptions.legend.data = legend; @@ -1650,7 +1646,6 @@ export default { this.operatorOptions.legend.tooltip.formatter = (params) =>{ return this.formatLegendTip(params); }; - // search this.$nextTick(() => { this.chartObj.setOption(this.operatorOptions, true); this.drawChart(); diff --git a/mindinsight/ui/src/views/profiling/cluster/performance/performance-dashboard.vue b/mindinsight/ui/src/views/profiling/cluster/performance/performance-dashboard.vue index 36bda447cffcefe5366c95bb15c61ada86288361..b6c8e732bbd79c3e8c28501d2a77964412805df1 100644 --- a/mindinsight/ui/src/views/profiling/cluster/performance/performance-dashboard.vue +++ b/mindinsight/ui/src/views/profiling/cluster/performance/performance-dashboard.vue @@ -80,6 +80,7 @@ export default { this.$t('profiling.iterationGapTime'), this.$t('profiling.fpBpTime'), this.$t('profiling.tailTime'), + this.$t('profiling.iterTotalTime') ], title: this.$t('profilingCluster.stepChartTitle'), }, // Chart object of performance window @@ -149,10 +150,6 @@ export default { }; RequestService.getClusterInfo(params) .then((res) => { - if (typeof res.data === 'object' && res.data.is_heterogeneous) { - this.performanceState = HETEROGENEOUS; - return; - } if (res?.data?.info?.length > 0) { let chartData = []; const parallelMode = res.data['parallel-mode']; @@ -164,6 +161,7 @@ export default { this.$t('profiling.iterationGapTime'), this.$t('profiling.fpBpTime'), this.$t('profiling.tailTime'), + this.$t('profiling.iterTotalTime') ], }, 'model-parallel': { diff --git a/mindinsight/ui/src/views/profiling/cluster/performance/step-trace.vue b/mindinsight/ui/src/views/profiling/cluster/performance/step-trace.vue index 4e48bcef34287e8aadf5ddf97b1614f391d3be39..5a7475ed1c4676d75b72814e7f003c64232b3b1e 100644 --- a/mindinsight/ui/src/views/profiling/cluster/performance/step-trace.vue +++ b/mindinsight/ui/src/views/profiling/cluster/performance/step-trace.vue @@ -227,6 +227,7 @@ export default { iteration_interval: this.$t('profiling.iterationGapTime'), fp_and_bp: this.$t('profiling.fpBpTime'), tail: this.$t('profiling.tailTime'), + iter_total_time: this.$t('profiling.iterTotalTime'), communication_alone: this.$t('profilingCluster.communicationAloneTime'), computation: this.$t('profilingCluster.computationTime'), receive_alone: this.$t('profilingCluster.receiveAloneTime'), @@ -278,8 +279,9 @@ export default { this.$t('profiling.iterationGapTime'), this.$t('profiling.fpBpTime'), this.$t('profiling.tailTime'), + this.$t('profiling.iterTotalTime') ], - cols: ['iteration_interval', 'fp_and_bp', 'tail'], + cols: ['iteration_interval', 'fp_and_bp', 'tail', 'iter_total_time'], tips: [ { label: this.$t('profiling.iterationGapTime'), diff --git a/mindinsight/ui/src/views/profiling/single/performance/operator.vue b/mindinsight/ui/src/views/profiling/single/performance/operator.vue index cd987c3fc3391e3f7be72caa1f567deba7187a0c..28d4b1d06946e9e58d0e1f4d6a866bb4659d4b24 100644 --- a/mindinsight/ui/src/views/profiling/single/performance/operator.vue +++ b/mindinsight/ui/src/views/profiling/single/performance/operator.vue @@ -157,55 +157,55 @@ export default { coreSearch: { all: { label: - this.$t('operator.searchByType') + + this.$t('operator.searchByKernelType') + this.$t('symbols.leftbracket') + this.$t('public.caseMode') + this.$t('symbols.rightbracket'), - type: 'op_type', + type: 'kernel_type', }, detail: { label: - this.$t('operator.searchByName') + + this.$t('operator.searchByKernelName') + this.$t('symbols.leftbracket') + this.$t('public.caseMode') + this.$t('symbols.rightbracket'), - type: 'op_name', + type: 'kernel_name', }, }, cpuSearch: { all: { label: - this.$t('operator.searchByType') + + this.$t('operator.searchByKernelType') + this.$t('symbols.leftbracket') + this.$t('public.caseMode') + this.$t('symbols.rightbracket'), - type: 'op_type', + type: 'kernel_type', }, detail: { label: - this.$t('operator.searchByType') + + this.$t('operator.searchByKernelType') + this.$t('symbols.leftbracket') + this.$t('public.caseMode') + this.$t('symbols.rightbracket'), - type: 'op_type', + type: 'kernel_type', }, }, hostSearch: { all: { label: - this.$t('operator.searchByType') + + this.$t('operator.searchByKernelType') + this.$t('symbols.leftbracket') + this.$t('public.caseMode') + this.$t('symbols.rightbracket'), - type: 'op_type', + type: 'kernel_type', }, detail: { label: - this.$t('operator.searchByName') + + this.$t('operator.searchByKernelName') + this.$t('symbols.leftbracket') + this.$t('public.caseMode') + this.$t('symbols.rightbracket'), - type: 'op_name', + type: 'kernel_name', }, }, aicoreOpChart:{ diff --git a/mindinsight/ui/src/views/profiling/single/performance/performance-dashboard.vue b/mindinsight/ui/src/views/profiling/single/performance/performance-dashboard.vue index d0b14b890796f4a001eb82321d2930d12700d996..9573620657695357c175bdbf1faafdd90f396e8b 100644 --- a/mindinsight/ui/src/views/profiling/single/performance/performance-dashboard.vue +++ b/mindinsight/ui/src/views/profiling/single/performance/performance-dashboard.vue @@ -73,7 +73,7 @@ limitations under the License. alt="" />
{{$t("public.dataLoading")}}
-{{isHeterogeneous?$t("profiling.isHeterogeneous"):$t("public.noStepStraceData")}}
+{{$t("public.noStepStraceData")}}
{{$t("public.dataLoading")}}
-{{isHeterogeneous?$t("profiling.isHeterogeneous"):$t("public.noData")}}
+{{$t("public.noData")}}
{{$t("public.dataLoading")}}
-{{$t("public.noData")}}
-