From b50b312833bb4676744ed2873db631a38225249b Mon Sep 17 00:00:00 2001 From: z30020733 Date: Tue, 5 Dec 2023 17:14:52 +0800 Subject: [PATCH 01/19] Adapt to msprof all-export interface. --- mindinsight/backend/profiler/profile_api.py | 11 +- .../analyser/msprof_timeline_analyser.py | 524 +++++++++++++++++- mindinsight/profiler/common/util.py | 19 + .../performance/performance-dashboard.vue | 252 ++++----- 4 files changed, 664 insertions(+), 142 deletions(-) diff --git a/mindinsight/backend/profiler/profile_api.py b/mindinsight/backend/profiler/profile_api.py index 8db3eed8..e7bc3b7b 100644 --- a/mindinsight/backend/profiler/profile_api.py +++ b/mindinsight/backend/profiler/profile_api.py @@ -31,7 +31,7 @@ from mindinsight.profiler.analyser.analyser_factory import AnalyserFactory from mindinsight.profiler.analyser.minddata_analyser import MinddataAnalyser from mindinsight.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException from mindinsight.profiler.common.util import analyse_device_list_from_profiler_dir, \ - check_train_job_and_profiler_dir, get_profile_data_version + check_train_job_and_profiler_dir, get_profile_data_version, get_all_export_flag from mindinsight.profiler.common.validator.validate import validate_condition, validate_ui_proc from mindinsight.profiler.common.validator.validate import validate_minddata_pipeline_condition from mindinsight.profiler.common.validator.validate_path import \ @@ -600,8 +600,13 @@ def get_msprof_timeline(): else: merge_model = True - analyser = AnalyserFactory.instance().get_analyser( - 'msprof_timeline', profiler_dir_abs, None) + flag = get_all_export_flag(profiler_dir_abs) + if flag: + analyser = AnalyserFactory.instance().get_analyser( + 'msprof_timeline', profiler_dir_abs, None) + else: + analyser = AnalyserFactory.instance().get_analyser( + 'msprof_timeline_old', profiler_dir_abs, None) timeline = analyser.get_merged_timeline(rank_list, model_list, kind, merge_model) diff --git a/mindinsight/profiler/analyser/msprof_timeline_analyser.py b/mindinsight/profiler/analyser/msprof_timeline_analyser.py index 9e3057c3..0883eed8 100644 --- a/mindinsight/profiler/analyser/msprof_timeline_analyser.py +++ b/mindinsight/profiler/analyser/msprof_timeline_analyser.py @@ -13,9 +13,15 @@ # limitations under the License. # ============================================================================ """The Timeline Analyser.""" +import csv import json import os import glob +import re +import time +from decimal import Decimal +import logging as logger +from concurrent.futures import ThreadPoolExecutor from marshmallow import ValidationError from mindinsight.profiler.analyser.base_analyser import BaseAnalyser @@ -36,11 +42,11 @@ def get_absolute_ts_start_info(pro_path) -> float: if start_json: with open(start_json, "r+") as f: info = json.load(f) - ts_us = float(info.get("collectionTimeBegin", 0)) - ts_ns = float(info.get("clockMonotonicRaw", 0)) + ts_us = Decimal(info.get("collectionTimeBegin", 0)).quantize(Decimal('0.000')) + ts_ns = Decimal(info.get("clockMonotonicRaw", 0)).quantize(Decimal('0.000')) if not ts_us and not ts_ns: return 0 - return ts_us - ts_ns / 1000 + return ts_us - ts_ns / Decimal(1000) return 0 @@ -92,20 +98,19 @@ def get_timeline_info(prof_dirs): def get_job_dir(parent_path): job_path_list = glob.glob(fr'{parent_path}/PROF_*_*') - timeline_info = get_timeline_info(job_path_list) - return timeline_info + return get_timeline_info(job_path_list) -def get_newest_file(file_list, split_num=4): +def get_newest_file(file_list): new_file_list = {} for file_path in file_list: - key = '_'.join(file_path.split('/')[-1].split('_')[:split_num]) + key = '_'.join(file_path.split('.')[0].split('/')[-1].split('_')[:-1]) if key not in new_file_list or new_file_list[key] < file_path: new_file_list[key] = file_path return list(new_file_list.values()) -class MsprofTimelineAnalyser(BaseAnalyser): +class MsprofTimelineOldAnalyser(BaseAnalyser): """ Analyse timeline data from file. """ @@ -198,7 +203,8 @@ class MsprofTimelineAnalyser(BaseAnalyser): event['name'] = f"{tids.get(event.get('tid'))} {event_name}" if difference_ts and event.get('ts'): - event['ts'] += difference_ts + ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) + event['ts'] = str(ts + difference_ts) event['tid'] = pid @@ -312,7 +318,8 @@ class MsprofTimelineAnalyser(BaseAnalyser): event['tid'] = tid_mapper.get(event.get('name')) if difference_ts and event.get('ts'): - event['ts'] += difference_ts + ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) + event['ts'] = str(ts + difference_ts) new_events.append(event) return new_events @@ -387,7 +394,8 @@ class MsprofTimelineAnalyser(BaseAnalyser): event['pid'] = pid if difference_ts and event.get('ts'): - event['ts'] += difference_ts + ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) + event['ts'] = str(ts + difference_ts) new_events.append(event) return new_events @@ -464,7 +472,8 @@ class MsprofTimelineAnalyser(BaseAnalyser): event['pid'] = pid if difference_ts and event.get('ts'): - event['ts'] += difference_ts + ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) + event['ts'] = str(ts + difference_ts) new_events.append(event) return new_events @@ -550,7 +559,7 @@ class MsprofTimelineAnalyser(BaseAnalyser): if not file_list_hardware: logger.error('Could not find ascend hardware file in %s/device_%s/timeline', job_dir, rank_id) else: - data_list.extend(self._parse_ascend_hardware_data(get_newest_file(file_list_hardware, 5), + data_list.extend(self._parse_ascend_hardware_data(get_newest_file(file_list_hardware), difference_ts)) if not file_list_hccl: @@ -645,3 +654,492 @@ class MsprofTimelineAnalyser(BaseAnalyser): model_list.sort() return {'rank_list': rank_list, 'model_list': model_list} + + +class MsprofTimelineAnalyser(BaseAnalyser): + """ + Analyse timeline data from file. + """ + + def _load(self): + """Load data according to the parsed profiling files.""" + + def _filter(self, filter_condition): + """ + Filter the profiling data according to the filter condition. + + Args: + filter_condition (dict): The filter condition. + """ + + def _parse_step_trace_metadata(self, raw_data, model_list): + """ + Get step trace by merge models + """ + pattern1 = re.compile(r'Step Trace\(Model ID:(\d)+\)') + pattern2 = re.compile(r'(\d)+') + metadata = {} + pid = None + for event in raw_data: + if event.get("ph") != "M": + continue + + if event.get('name') == 'process_name': + pid = event.get('pid') + + if pid not in metadata: + metadata[pid] = {} + continue + + if event.get('name') == 'thread_name': + arg_name = event.get('args', {}).get('name') + arg_name = re.search(pattern1, arg_name) + if not arg_name: + continue + model_id = re.search(pattern2, arg_name.group()) + if not model_id: + continue + model_id = model_id.group() + tid = event.get('tid') + if not model_list or int(model_id) in model_list: + metadata[event.get('pid')][tid] = f'Model {model_id}' + + return metadata, pid + + def _parse_step_trace_merge(self, rank_id, old_pid, new_pid, raw_data, metadata, difference_ts): + """parse data with merge model mode """ + + new_events = [{ + "name": "process_name", + "pid": new_pid, + "args": { + "name": f"Step Trace Rank{rank_id}" + }, + "ph": "M" + }, { + "name": "thread_name", + "pid": new_pid, + "tid": 0, + "args": { + "name": "iterations" + }, + "ph": "M" + }] + + for event in raw_data: + arg_name = metadata.get(old_pid, {}).get(event.get('tid')) + if event.get('ph') == 'M' or event.get('pid') != old_pid or not arg_name: + continue + + event_name = event.get('name').strip() + if event.get('ph') == 'X' and event_name.startswith('Iteration') and len( + event_name.split(' ')) == 2: + event['name'] = f"{arg_name} {event_name}" + + if difference_ts and event.get('ts'): + ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) + event['ts'] = str(ts + difference_ts) + event['pid'] = new_pid + event['tid'] = 0 + new_events.append(event) + return new_events + + def _parse_step_trace_notmerge(self, rank_id, old_pid, new_pid, raw_data, metadata, difference_ts): + """parse data with not merge model mode """ + + new_events = [] + for event in raw_data: + arg_name = metadata.get(old_pid, {}).get(event.get('tid')) + if event.get('pid') != old_pid or not arg_name: + continue + if event.get('name') == 'process_name' and event.get('ph') == 'M': + event['args']['name'] = f"Step Trace Rank{rank_id}" + + event['pid'] = new_pid + + if difference_ts and event.get('ts'): + ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) + event['ts'] = str(ts + difference_ts) + new_events.append(event) + return new_events + + def _parse_step_trace_data(self, file_list, rank_id, difference_ts, model_list, merge_model): + """ + parse step trace data + """ + try: + flags = os.O_RDONLY + raw_data = [] + for file_path in file_list: + with os.fdopen(os.open(file_path, flags, 0o400), 'r') as fr: + raw_data.extend(json.load(fr)) + + metadata, old_pid = self._parse_step_trace_metadata(raw_data, model_list) + if not metadata: + logger.error('Could not found process_name pid. method: _parse_step_trace_data') + return [] + + new_pid = int(f'2{rank_id}') + if merge_model: + return self._parse_step_trace_merge(rank_id, old_pid, new_pid, raw_data, metadata, difference_ts) + return self._parse_step_trace_notmerge(rank_id, old_pid, new_pid, raw_data, metadata, difference_ts) + except ValidationError as err: + logger.error('parse_step_trace_data failed! please theck. detail: %s', err) + raise ValidationError from err + + except (IOError, OSError, json.JSONDecodeError) as err: + logger.error('parse_step_trace_data failed! please theck. detail: %s', err) + return [] + + def _parse_overlap_analysis_data(self, file_list, rank_id, difference_ts): + """ + parse overlap analysis data + """ + try: + flags = os.O_RDONLY + raw_data = [] + for file_path in file_list: + with os.fdopen(os.open(file_path, flags, 0o400), 'r') as fr: + raw_data.extend(json.load(fr)) + + pid = None + for event in raw_data: + if event.get('name') == 'process_name' and event.get("ph") == "M" and \ + event.get('args').get('name') == 'Overlap Analysis': + pid = event.get('pid') + break + + if not pid: + print('Could not found process_name pid. method: _parse_overlap_analysis_data') + return [] + + new_events = [] + new_pid = int(f'1{rank_id}') + for event in raw_data: + if event.get('pid') != pid: + continue + + if event.get('name') == 'process_name' and event.get("ph") == "M": + event["args"]["name"] += f" Rank{rank_id}" + + event['pid'] = new_pid + if difference_ts and event.get('ts'): + ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) + event['ts'] = str(ts + difference_ts) + + new_events.append(event) + + return new_events + + except ValidationError as err: + print('parse_overlap_analysis_data failed! please theck. detail: %s', err) + raise ValidationError from err + + except (IOError, OSError, json.JSONDecodeError) as err: + print('parse_overlap_analysis_data failed! please theck. detail: %s', err) + return [] + + def _parse_ascend_hardware_data(self, file_list, rank_id, difference_ts, model_list): + """ + parse ascend hardware data + """ + try: + flags = os.O_RDONLY + raw_data = [] + for file_path in file_list: + with os.fdopen(os.open(file_path, flags, 0o400), 'r') as fr: + raw_data.extend(json.load(fr)) + + new_events = [] + pid = None + tid_mapper = {} + tid_set = set() + new_pid = int(f'3{rank_id}') + for event in raw_data: + if event.get('name') == 'process_name' and event.get("ph") == "M" and \ + event.get('args').get('name') == 'Ascend Hardware': + pid = event.get('pid') + continue + + if event.get('name') == 'thread_name' and event.get("ph") == "M" and \ + 'Stream' in event.get('args').get('name'): + event['pid'] = new_pid + tid_mapper.update({event.get('tid'): event}) + continue + + if event.get("ph") != "M": + model_id = event.get('args', {}).get('Model Id') + tid = event.get('tid') + if model_list and model_id not in model_list: + continue + + if difference_ts and event.get('ts'): + ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) + event['ts'] = str(ts + difference_ts) + event['pid'] = new_pid + tid_set.add(tid) + new_events.append(event) + + if not pid: + logger.error('Could not found process_name pid. method: _parse_ascend_hardware_data') + return [] + + new_metadata = [{ + "name": "process_name", + "pid": new_pid, + "args": { + "name": f"Ascend Hardware Rank{rank_id}" + }, + "ph": "M" + }] + + for tid in tid_set: + new_metadata.append(tid_mapper.get(tid)) + + return new_metadata + new_events + + except ValidationError as err: + logger.error('parse_ascend_hardware_data failed! please theck. detail: %s', err) + raise ValidationError from err + + except (IOError, OSError, json.JSONDecodeError) as err: + logger.error('parse_ascend_hardware_data failed! please theck. detail: %s', err) + return [] + + def _parse_hccl_data(self, file_list, rank_id, difference_ts, model_list): + """ + parse hccl data + """ + try: + flags = os.O_RDONLY + raw_data = [] + for file_path in file_list: + with os.fdopen(os.open(file_path, flags, 0o400), 'r') as fr: + raw_data.extend(json.load(fr)) + + pid = None + tid_mapper = {} + tid_set = set() + new_events = [] + new_pid = int(f'4{rank_id}') + for event in raw_data: + if event.get('name') == 'process_name' and event.get("ph") == "M" and \ + event.get('args').get('name') == 'HCCL': + pid = event.get('pid') + continue + + if event.get('name') == 'thread_name' and event.get("ph") == "M" and \ + ('Plane' in event.get('args').get('name') or 'Communication' in event.get('args').get('name')): + event['pid'] = new_pid + tid_mapper.update({event.get('tid'): event}) + continue + + if event.get("ph") != "M": + model_id = event.get('args', {}).get('model id') + tid = event.get('tid') + if model_list and model_id not in model_list: + continue + + if difference_ts and event.get('ts'): + ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) + event['ts'] = str(ts + difference_ts) + + event['pid'] = new_pid + tid_set.add(tid) + new_events.append(event) + + if not pid: + logger.error('Could not found process_name pid. method: _parse_hccl_data') + return [] + + new_metadata = [{ + "name": "process_name", + "pid": new_pid, + "args": { + "name": f"HCCL Rank{rank_id}" + }, + "ph": "M" + }] + + for tid in tid_set: + new_metadata.append(tid_mapper.get(tid)) + + return new_metadata + new_events + + except ValidationError as err: + logger.error('parse_hccl_data failed! please theck. detail: %s', err) + raise ValidationError from err + + except (IOError, OSError, json.JSONDecodeError) as err: + logger.error('parse_hccl_data failed! please theck. detail: %s', err) + return [] + + def _get_summary_timeline_data(self, sub_dirs, merge_model): + """ + Get summary timeline + Returns: + json, the content of timeline data. + """ + task_list = [] + timeline_data = [] + with ThreadPoolExecutor() as pool: + for rank_id, (job_dir, difference_ts) in sub_dirs.items(): + + # get step trace + step_trace_file_name = fr'{job_dir}/timeline/step_trace_*.json' + file_list = glob.glob(step_trace_file_name) + if not file_list: + logger.error('Could not find step trace file in %s/device_%s/timeline', job_dir, rank_id) + else: + task_list.append(pool.submit(self._parse_step_trace_data, get_newest_file(file_list), + rank_id, difference_ts, None, + merge_model)) + + # get overlap analysis + overlap_file_name = fr'{job_dir}/timeline/msprof_*.json' + file_list = glob.glob(overlap_file_name) + + if not file_list: + logger.error('Could not find overlap analysis file in %s/device_%s/timeline', job_dir, rank_id) + else: + task_list.append(pool.submit(self._parse_overlap_analysis_data, get_newest_file(file_list), + rank_id, difference_ts)) + + all_done = list(range(len(task_list))) + print(all_done) + while all_done: + for ind, t in enumerate(task_list): + if ind in all_done and t.done(): + timeline_data.extend(t.result()) + all_done.remove(ind) + + return timeline_data + + def _get_detail_timeline_data(self, sub_dirs, model_list, merge_model): + """ + Get detail timeline + Returns: + json, the content of timeline data. + """ + + timeline_data = [] + task_list = [] + with ThreadPoolExecutor() as pool: + for rank_id, (job_dir, difference_ts) in sub_dirs.items(): + + # get step_trace data + step_trace_file_name = fr'{job_dir}/timeline/step_trace_*.json' + file_list_step_trace = glob.glob(step_trace_file_name) + if not file_list_step_trace: + logger.error('Could not find step trace file in %s/device_%s/timeline', job_dir, rank_id) + else: + task_list.append(pool.submit(self._parse_step_trace_data, get_newest_file(file_list_step_trace), + rank_id, difference_ts, model_list, + merge_model)) + + # timeline_data.extend(self._parse_step_trace_data(get_newest_file(file_list_step_trace), + # rank_id, difference_ts, model_list, + # merge_model)) + + # get Ascend Hardware + hardware_file_name = fr'{job_dir}/timeline/task_time_*.json' + file_list_hardware = glob.glob(hardware_file_name) + if not file_list_hardware: + logger.error('Could not find ascend hardware file in %s/device_%s/timeline', job_dir, rank_id) + else: + task_list.append(pool.submit(self._parse_ascend_hardware_data, get_newest_file(file_list_hardware), + rank_id, difference_ts, model_list)) + + # timeline_data.extend(self._parse_ascend_hardware_data(get_newest_file(file_list_hardware), + # rank_id, difference_ts, model_list)) + + # get hccl + hccl_file_name = fr'{job_dir}/timeline/hccl_*.json' + file_list_hccl = glob.glob(hccl_file_name) + if not file_list_hccl: + logger.error('Could not find hccl file in %s/device_%s/timeline', job_dir, rank_id) + else: + task_list.append(pool.submit(self._parse_hccl_data, get_newest_file(file_list_hccl), + rank_id, difference_ts, model_list)) + + # timeline_data.extend(self._parse_hccl_data(get_newest_file(file_list_hccl), + # rank_id, difference_ts, model_list)) + + all_done = list(range(len(task_list))) + print(all_done) + while all_done: + for ind, t in enumerate(task_list): + if ind in all_done and t.done(): + timeline_data.extend(t.result()) + all_done.remove(ind) + + return timeline_data + + def get_merged_timeline(self, rank_list, model_list, kind, merge_model=True): + """ + Get the merged timeline + """ + + # get all job path, like PROF_* + sub_dirs = get_job_dir(self._profiling_dir) + + if rank_list: + new_sub_dirs = {} + for key, value in sub_dirs.items(): + if key in rank_list: + new_sub_dirs[key] = value + sub_dirs = new_sub_dirs + + if not sub_dirs: + logger.error('Could not found any rank from %s', rank_list) + return [] + + if kind == 'summary': + start = time.time() + summary_data = self._get_summary_timeline_data(sub_dirs, merge_model) + print(time.time() - start) + return summary_data + + if kind == 'detail': + start = time.time() + detail_data = self._get_detail_timeline_data(sub_dirs, model_list, merge_model) + print(time.time() - start) + return detail_data + return [] + + def _get_models(self, sub_dirs): + """ + Get all models + """ + model_dict = {} + model_merged = set() + for rank_id, (job_dir, _) in sub_dirs.items(): + step_trace_file_name = fr'{job_dir}/summary/step_trace_*.csv' + file_list = glob.glob(step_trace_file_name) + file_name = max(file_list) + model_set = set() + with open(file_name, 'r', newline='') as fr: + reader = csv.DictReader(fr, delimiter=',', quotechar='"') + for row in reader: + model_id = row.get('Model ID') + if model_id: + model_set.add(int(model_id)) + + model_dict[rank_id] = model_set + model_merged.update(model_set) + return model_dict, model_merged + + def get_option(self): + """ + Get the option values + """ + # get all job path, like PROF_* + sub_dirs = get_job_dir(self._profiling_dir) + rank_list = list(sub_dirs.keys()) + rank_list.sort() + + _, model_merged = self._get_models(sub_dirs) + model_list = list(model_merged) + model_list.sort() + + return {'rank_list': rank_list, 'model_list': model_list} diff --git a/mindinsight/profiler/common/util.py b/mindinsight/profiler/common/util.py index 0101d1bf..a339010f 100644 --- a/mindinsight/profiler/common/util.py +++ b/mindinsight/profiler/common/util.py @@ -251,6 +251,25 @@ def get_profile_data_version(profiler_dir): return {} +def get_all_export_flag(profiler_dir): + """ get the flag what means whether support all-export mode. """ + + profile_info_pattern = re.compile(r"profiler_info_(\d+).json") + profile_info_file = None + for f_name in os.listdir(profiler_dir): + re_match = re.match(profile_info_pattern, f_name) + if re_match: + profile_info_file = re_match.group() + break + if profile_info_file: + full_path = os.path.join(profiler_dir, profile_info_file) + with open(full_path, 'r') as fr: + data = json.load(fr) + return data.get('all_export', False) + + return False + + def get_parallel_message(profiler_dir): """get the parallel message""" diff --git a/mindinsight/ui/src/views/profiling/single/performance/performance-dashboard.vue b/mindinsight/ui/src/views/profiling/single/performance/performance-dashboard.vue index d0b14b89..0845cf8c 100644 --- a/mindinsight/ui/src/views/profiling/single/performance/performance-dashboard.vue +++ b/mindinsight/ui/src/views/profiling/single/performance/performance-dashboard.vue @@ -373,6 +373,132 @@ limitations under the License.
+ + +
+
{{ $t('profiling.timeLineMsprof') }}
+
+ +
+
+
{{$t("profiling.features")}}
+
{{$t("profiling.timelineMsprofTips.title1")}}
+
{{$t("profiling.timelineMsprofTips.content11")}}
+
{{$t("profiling.timelineMsprofTips.content12")}}
+
{{$t("profiling.timelineMsprofTips.content13")}}
+
{{$t("profiling.timelineMsprofTips.content14")}}
+
+
{{$t("profiling.timelineMsprofTips.title2")}}
+
+ {{$t("profiling.timelineMsprofTips.content21.part1")}} + {{$t("profiling.timelineMsprofTips.content21.part2")}} + {{$t("profiling.timelineMsprofTips.content21.part3")}} +
+
{{$t("profiling.timelineMsprofTips.content22")}}
+
+
{{$t("profiling.timelineMsprofTips.title3")}}
+
{{$t("profiling.timelineMsprofTips.content31")}}
+
{{$t("profiling.timelineMsprofTips.content32")}}
+
{{$t("profiling.timelineMsprofTips.content33")}}
+
+
{{$t("profiling.timelineMsprofTips.title4")}}
+
{{$t("profiling.timelineMsprofTips.content41")}}
+
{{$t("profiling.timelineMsprofTips.content42")}}
+
{{$t("profiling.timelineMsprofTips.content43")}}
+
{{$t("profiling.timelineMsprofTips.content44")}}
+
+
+ +
+
+
+ +
+
+
+ + +
+
+ {{$t('profiling.rankList')}} + + + + + + +
+
+ {{$t('profiling.modelList')}} + + + + + + +
+
+ {{$t('profiling.kind')}} + + + + + + +
+
+ {{$t('profiling.mergeModel')}} + + + + + + +
+
+ +
+
+ +
+

{{$t("public.dataLoading")}}

+

{{$t("public.noData")}}

+
+
{{ $t('profiling.timeLine') }}
@@ -508,132 +634,6 @@ limitations under the License.

{{$t("public.noData")}}

- -
-
{{ $t('profiling.timeLineMsprof') }}
-
- -
-
-
{{$t("profiling.features")}}
-
{{$t("profiling.timelineMsprofTips.title1")}}
-
{{$t("profiling.timelineMsprofTips.content11")}}
-
{{$t("profiling.timelineMsprofTips.content12")}}
-
{{$t("profiling.timelineMsprofTips.content13")}}
-
{{$t("profiling.timelineMsprofTips.content14")}}
-
-
{{$t("profiling.timelineMsprofTips.title2")}}
-
- {{$t("profiling.timelineMsprofTips.content21.part1")}} - {{$t("profiling.timelineMsprofTips.content21.part2")}} - {{$t("profiling.timelineMsprofTips.content21.part3")}} -
-
{{$t("profiling.timelineMsprofTips.content22")}}
-
-
{{$t("profiling.timelineMsprofTips.title3")}}
-
{{$t("profiling.timelineMsprofTips.content31")}}
-
{{$t("profiling.timelineMsprofTips.content32")}}
-
{{$t("profiling.timelineMsprofTips.content33")}}
-
-
{{$t("profiling.timelineMsprofTips.title4")}}
-
{{$t("profiling.timelineMsprofTips.content41")}}
-
{{$t("profiling.timelineMsprofTips.content42")}}
-
{{$t("profiling.timelineMsprofTips.content43")}}
-
{{$t("profiling.timelineMsprofTips.content44")}}
-
-
- -
-
-
- -
-
-
- - -
-
- {{$t('profiling.rankList')}} - - - - - - -
-
- {{$t('profiling.modelList')}} - - - - - - -
-
- {{$t('profiling.kind')}} - - - - - - -
-
- {{$t('profiling.mergeModel')}} - - - - - - -
-
- -
-
- -
-

{{$t("public.dataLoading")}}

-

{{$t("public.noData")}}

-
- -
-- Gitee From 11f9c79c6477d8ac95fb06f953c29212d17f9113 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=87=A7=E5=BA=86=E9=A6=99?= Date: Mon, 11 Dec 2023 21:19:45 +0800 Subject: [PATCH 02/19] [MI] fix debuger node large warning --- .../debugger/debugger_services/debugger_offline_server.py | 1 + mindinsight/debugger/stream_handler/metadata_handler.py | 5 ++++- mindinsight/ui/src/mixins/debugger-mixin.vue | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/mindinsight/debugger/debugger_services/debugger_offline_server.py b/mindinsight/debugger/debugger_services/debugger_offline_server.py index 5be4b7e5..c17c6219 100644 --- a/mindinsight/debugger/debugger_services/debugger_offline_server.py +++ b/mindinsight/debugger/debugger_services/debugger_offline_server.py @@ -231,6 +231,7 @@ class DebuggerOfflineManager: try: graphs = self._data_loader.load_graphs(threshold=settings.MAX_GRAPH_NODE_SIZE) except DebuggerNodeTooLarge as err: + self._metadata_stream.max_graph_node_size = settings.MAX_GRAPH_NODE_SIZE self._update_state(ServerStatus.NODE_TOO_LARGE) log.exception(err) return diff --git a/mindinsight/debugger/stream_handler/metadata_handler.py b/mindinsight/debugger/stream_handler/metadata_handler.py index 0215f47f..61b0e2fd 100644 --- a/mindinsight/debugger/stream_handler/metadata_handler.py +++ b/mindinsight/debugger/stream_handler/metadata_handler.py @@ -40,6 +40,7 @@ class MetadataHandler(StreamHandlerBase): # maximum step number among all devices self._max_step_num = 0 self._debugger_type = DebuggerServerMode.ONLINE.value + self.max_graph_node_size = 0 @property def debugger_type(self): @@ -220,10 +221,12 @@ class MetadataHandler(StreamHandlerBase): 'graph_name': self.graph_name, 'recommendation_confirmed': self._recommendation_confirmed, 'debugger_version': self.debugger_version, - 'data_version': self.data_version + 'data_version': self.data_version, } if self.debugger_type == 'offline': metadata['total_step_num'] = self.max_step_num + if self.state == ServerStatus.NODE_TOO_LARGE.value: + metadata['max_graph_node_size'] = self.max_graph_node_size else: if not isinstance(filter_condition, list): filter_condition = [filter_condition] diff --git a/mindinsight/ui/src/mixins/debugger-mixin.vue b/mindinsight/ui/src/mixins/debugger-mixin.vue index 188f53fd..63ca4895 100644 --- a/mindinsight/ui/src/mixins/debugger-mixin.vue +++ b/mindinsight/ui/src/mixins/debugger-mixin.vue @@ -624,6 +624,7 @@ export default { if (res.data.metadata.state === this.state.node_too_large) { this.dialogVisible = true; this.nodeDataIsLarge = true; + this.maxGraphNodeSize = res.data.metadata.max_graph_node_size; return; } this.dealMetadata(res.data.metadata); -- Gitee From 78fd11acff3e9893330c0c777a9b3e0618f2fe1b Mon Sep 17 00:00:00 2001 From: maning202007 Date: Sat, 23 Dec 2023 19:47:13 +0800 Subject: [PATCH 03/19] Fix the issue for Load operator name --- mindinsight/datavisual/data_transform/graph/msgraph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindinsight/datavisual/data_transform/graph/msgraph.py b/mindinsight/datavisual/data_transform/graph/msgraph.py index 25bafcfa..5ef65a9e 100644 --- a/mindinsight/datavisual/data_transform/graph/msgraph.py +++ b/mindinsight/datavisual/data_transform/graph/msgraph.py @@ -66,7 +66,7 @@ class MSGraph(Graph): name = f'{node_proto.op_type}-op{node_id}' node_name = Node.create_node_name(node_proto.scope, name) - if node_proto.full_name and node_proto.op_type != NodeTypeEnum.LOAD.value: + if node_proto.full_name: node_name = node_proto.full_name if node_proto.full_name and any( -- Gitee From 7b9d4c2a219c82fb301f494dcfd1ca8f12fce31a Mon Sep 17 00:00:00 2001 From: MooYeh Date: Wed, 3 Jan 2024 18:31:39 +0800 Subject: [PATCH 04/19] MS Profiler Tracing --- mindinsight/profiler/analyser/analyser.py | 39 +++-- .../ui/src/components/operator-unit.vue | 2 +- mindinsight/ui/src/locales/en-us.json | 2 + mindinsight/ui/src/locales/zh-cn.json | 2 + .../profiling/single/performance/operator.vue | 24 +-- tests/st/func/profiler/test_aicpu_analyser.py | 4 +- tests/st/func/profiler/test_op_analyser.py | 16 +- .../profiler/analyser/test_aicpu_analyser.py | 8 +- .../analyser/test_analyser_aicore_detail.py | 40 ++--- .../analyser/test_analyser_aicore_type.py | 20 +-- .../profiler/aicore_intermediate_1_detail.csv | 2 +- .../profiler/aicore_intermediate_1_type.csv | 2 +- tests/utils/resource/profiler/flops_0.txt | 2 +- tests/utils/resource/profiler/flops_1.txt | 2 +- .../resource/profiler/framework_raw_0.csv | 10 +- .../resource/profiler/framework_raw_1.csv | 22 +-- .../profiler/aicore_intermediate_1_detail.csv | 2 +- .../profiler/aicore_intermediate_1_type.csv | 2 +- .../run_1/normal_run/profiler/flops_1.txt | 2 +- .../normal_run/profiler/framework_raw_1.csv | 164 +++++++++--------- .../profiler/aicpu_intermediate_1.csv | 2 +- 21 files changed, 189 insertions(+), 180 deletions(-) diff --git a/mindinsight/profiler/analyser/analyser.py b/mindinsight/profiler/analyser/analyser.py index e27e26f0..75f8c2ae 100644 --- a/mindinsight/profiler/analyser/analyser.py +++ b/mindinsight/profiler/analyser/analyser.py @@ -35,7 +35,7 @@ class AicoreTypeAnalyser(BaseAnalyser): Raises: ProfilerPathErrorException: If the profiling dir is invalid. """ - _col_names = ['op_type', 'total_time', 'execution_frequency', 'total_percent', 'avg_time'] + _col_names = ['kernel_type', 'total_time', 'execution_frequency', 'total_percent', 'avg_time'] _file_name_aicore_type_time = 'aicore_intermediate_{}_type.csv' def _load(self): @@ -111,8 +111,8 @@ class AicoreDetailAnalyser(BaseAnalyser): Raises: ProfilerPathErrorException: If the profiling dir is invalid. """ - _col_names = ['op_name', 'op_type', 'avg_execution_time', 'execution_frequency', 'MFLOPs(10^6 cube)', - 'GFLOPS(10^9 cube)', 'MFLOPs(10^6 vector)', 'GFLOPS(10^9 vector)', 'full_op_name', 'op_info'] + _col_names = ['op_name', 'kernel_name', 'kernel_type', 'avg_execution_time', 'execution_frequency', + 'MFLOPs(10^6 cube)', 'GFLOPS(10^9 cube)', 'MFLOPs(10^6 vector)', 'GFLOPS(10^9 vector)', 'op_info'] _file_name_aicore_detail_time = 'aicore_intermediate_{}_detail.csv' _file_name_flops = 'flops_{}.txt' _file_name_framework_info = 'framework_raw_{}.csv' @@ -141,8 +141,13 @@ class AicoreDetailAnalyser(BaseAnalyser): self._filter(filter_condition) type_detail_cache = {} + is_display_full_op_name = filter_condition.get( + 'is_display_full_op_name', True + ) + kernel_type_idx = 2 if is_display_full_op_name else 1 + avg_exec_time_idx = 3 if is_display_full_op_name else 2 for detail_info in self._result: - op_type = detail_info[1] + op_type = detail_info[kernel_type_idx] if op_type not in op_type_order: continue infos = type_detail_cache.get(op_type) @@ -156,7 +161,7 @@ class AicoreDetailAnalyser(BaseAnalyser): detail_infos = type_detail_cache.get(op_type) if detail_infos is None: continue - detail_infos.sort(key=lambda item: item[2], reverse=True) + detail_infos.sort(key=lambda item: item[avg_exec_time_idx], reverse=True) result.extend(detail_infos) return { @@ -239,9 +244,9 @@ class AicoreDetailAnalyser(BaseAnalyser): return self._default_filter(item, filter_condition) def _inner_map(item: list): - inner_item = item[0:8] + inner_item = item[1:9] if is_display_full_op_name: - inner_item.append(item[8]) + inner_item.insert(0, item[0]) if is_display_detail: inner_item.append(item[9]) return inner_item @@ -268,9 +273,9 @@ class AicoreDetailAnalyser(BaseAnalyser): is_display_full_op_name (bool): Whether to display the operator full name. """ - self._display_col_names = self._col_names[0:8] + self._display_col_names = self._col_names[1:9] if is_display_full_op_name: - self._display_col_names.append(self._col_names[8]) + self._display_col_names.insert(0, self._col_names[0]) if is_display_detail: self._display_col_names.append(self._col_names[9]) @@ -285,8 +290,8 @@ class AicoreDetailAnalyser(BaseAnalyser): Returns: list[Union[str, float]], the converted data. """ - return [row[3], row[4], row[5], row[6], - json.loads(row[7]) if row[7] else None] + return [row[4], row[5], row[6], row[7], + json.loads(row[8]) if row[8] else None] def _get_op_detail_info(self, row, framework_infos, flops_infos): """ @@ -303,21 +308,21 @@ class AicoreDetailAnalyser(BaseAnalyser): framework_info = framework_infos.get(row[0]) flops_info = flops_infos.get(row[0], ['-', '-', '-', '-']) if len(flops_info) > 3: - return [framework_info[1], framework_info[2], + return [framework_info[0], framework_info[1], framework_info[2], self._format_float_data(float(row[1]) * self._ms_to_us), self._format_float_data(int(row[2])), self._format_float_data(flops_info[0]), self._format_float_data(flops_info[1]), self._format_float_data(flops_info[2]), self._format_float_data(flops_info[3]), - framework_info[0], framework_info[4]] - return [framework_info[1], framework_info[2], + framework_info[4]] + return [framework_info[0], framework_info[1], framework_info[2], self._format_float_data(float(row[1]) * self._ms_to_us), self._format_float_data(int(row[2])), self._format_float_data(flops_info[0]), self._format_float_data(flops_info[1]), self._format_float_data(flops_info[2]), - framework_info[3], framework_info[0], framework_info[4]] + framework_info[3], framework_info[4]] class AicpuTypeAnalyser(BaseAnalyser): @@ -332,7 +337,7 @@ class AicpuTypeAnalyser(BaseAnalyser): Raises: ProfilerPathErrorException: If the profiling dir is invalid. """ - _col_names = ['op_type', 'total_time', 'execution_frequency', 'percent'] + _col_names = ['kernel_type', 'total_time', 'execution_frequency', 'percent'] _file_name_aicpu_time = 'aicpu_intermediate_{}.csv' def _load(self): @@ -404,7 +409,7 @@ class AicpuDetailAnalyser(BaseAnalyser): Raises: ProfilerPathErrorException: If the profiling dir is invalid. """ - _col_names = ['op_name', 'op_type', 'avg_execution_time', 'dispatch_time', + _col_names = ['kernel_name', 'kernel_type', 'avg_execution_time', 'dispatch_time', 'execution_frequency'] _file_name_aicpu_time = 'aicpu_intermediate_{}.csv' diff --git a/mindinsight/ui/src/components/operator-unit.vue b/mindinsight/ui/src/components/operator-unit.vue index 39efec02..965f1110 100644 --- a/mindinsight/ui/src/components/operator-unit.vue +++ b/mindinsight/ui/src/components/operator-unit.vue @@ -734,7 +734,7 @@ export default { }, pageTotal: 0, op_filter_condition: { - op_type: { + kernel_type: { in: [k[0]], }, }, diff --git a/mindinsight/ui/src/locales/en-us.json b/mindinsight/ui/src/locales/en-us.json index 3382a5b7..ab5bf2c6 100644 --- a/mindinsight/ui/src/locales/en-us.json +++ b/mindinsight/ui/src/locales/en-us.json @@ -308,7 +308,9 @@ "classificationOperator": "Type", "card": " ", "searchByType": "Enter operator type", + "searchByKernelType": "Enter kernel type", "searchByName": "Enter operator name", + "searchByKernelName": "Enter kernel name", "operatorInfo": "Operator", "kernelInfo": "Kernel", "searchByCoreName": "Enter kernel name", diff --git a/mindinsight/ui/src/locales/zh-cn.json b/mindinsight/ui/src/locales/zh-cn.json index e1bff9e4..0a66de07 100644 --- a/mindinsight/ui/src/locales/zh-cn.json +++ b/mindinsight/ui/src/locales/zh-cn.json @@ -307,7 +307,9 @@ "classificationOperator": "分类", "card": "卡", "searchByType": "请输入算子类型搜索", + "searchByKernelType": "请输入Kernel类型搜索", "searchByName": "请输入算子名称搜索", + "searchByKernelName": "请输入Kernel名称搜索", "searchByCoreName": "请输入内核名称搜索", "searchByCoreFullName": "请输入算子全名搜索", "operatorInfo": "算子信息", diff --git a/mindinsight/ui/src/views/profiling/single/performance/operator.vue b/mindinsight/ui/src/views/profiling/single/performance/operator.vue index cd987c3f..28d4b1d0 100644 --- a/mindinsight/ui/src/views/profiling/single/performance/operator.vue +++ b/mindinsight/ui/src/views/profiling/single/performance/operator.vue @@ -157,55 +157,55 @@ export default { coreSearch: { all: { label: - this.$t('operator.searchByType') + + this.$t('operator.searchByKernelType') + this.$t('symbols.leftbracket') + this.$t('public.caseMode') + this.$t('symbols.rightbracket'), - type: 'op_type', + type: 'kernel_type', }, detail: { label: - this.$t('operator.searchByName') + + this.$t('operator.searchByKernelName') + this.$t('symbols.leftbracket') + this.$t('public.caseMode') + this.$t('symbols.rightbracket'), - type: 'op_name', + type: 'kernel_name', }, }, cpuSearch: { all: { label: - this.$t('operator.searchByType') + + this.$t('operator.searchByKernelType') + this.$t('symbols.leftbracket') + this.$t('public.caseMode') + this.$t('symbols.rightbracket'), - type: 'op_type', + type: 'kernel_type', }, detail: { label: - this.$t('operator.searchByType') + + this.$t('operator.searchByKernelType') + this.$t('symbols.leftbracket') + this.$t('public.caseMode') + this.$t('symbols.rightbracket'), - type: 'op_type', + type: 'kernel_type', }, }, hostSearch: { all: { label: - this.$t('operator.searchByType') + + this.$t('operator.searchByKernelType') + this.$t('symbols.leftbracket') + this.$t('public.caseMode') + this.$t('symbols.rightbracket'), - type: 'op_type', + type: 'kernel_type', }, detail: { label: - this.$t('operator.searchByName') + + this.$t('operator.searchByKernelName') + this.$t('symbols.leftbracket') + this.$t('public.caseMode') + this.$t('symbols.rightbracket'), - type: 'op_name', + type: 'kernel_name', }, }, aicoreOpChart:{ diff --git a/tests/st/func/profiler/test_aicpu_analyser.py b/tests/st/func/profiler/test_aicpu_analyser.py index 7a336800..3c64f2fc 100644 --- a/tests/st/func/profiler/test_aicpu_analyser.py +++ b/tests/st/func/profiler/test_aicpu_analyser.py @@ -54,7 +54,7 @@ class TestAicpuAnalyser: def test_query_aicpu_type(self): """Test the function of querying AICPU operator type information.""" expect_result = { - 'col_name': ['op_type', 'execution_time', 'execution_frequency', 'percent'], + 'col_name': ['kernel_type', 'execution_time', 'execution_frequency', 'percent'], 'object': [ ['InitData', 7906.0, 1, 89.84], ['GetNext', 590.5, 2, 6.71], @@ -74,7 +74,7 @@ class TestAicpuAnalyser: def test_query_aicpu_detail(self): """Test the function of querying AICPU operator type information.""" expect_result = { - 'col_name': ['op_name', 'op_type', 'total_time', + 'col_name': ['kernel_name', 'kernel_type', 'total_time', 'dispatch_time', 'execution_frequency'], 'size': 3 } diff --git a/tests/st/func/profiler/test_op_analyser.py b/tests/st/func/profiler/test_op_analyser.py index 3357b914..e5dc553b 100644 --- a/tests/st/func/profiler/test_op_analyser.py +++ b/tests/st/func/profiler/test_op_analyser.py @@ -27,19 +27,19 @@ from tests.st.func.profiler.conftest import BASE_SUMMARY_DIR OP_GATHER_V2_INFO = { 'col_name': - ['op_name', 'op_type', 'avg_execution_time', 'execution_frequency', - 'MFLOPs(10^6 cube)', 'GFLOPS(10^9 cube)', 'MFLOPs(10^6 vector)', 'GFLOPS(10^9 vector)', 'full_op_name', + ['op_name', 'kernel_name', 'kernel_type', 'avg_execution_time', 'execution_frequency', + 'MFLOPs(10^6 cube)', 'GFLOPS(10^9 cube)', 'MFLOPs(10^6 vector)', 'GFLOPS(10^9 vector)', 'op_info'], 'object': [ [ - 'AssignAdd-op203', 'AssignAdd', 1.79, 3, '-', '-', '-', '-', 'Default/AssignAdd-op203', + 'Default/AssignAdd-op203', 'AssignAdd-op203', 'AssignAdd', 1.79, 3, '-', '-', '-', '-', {'input_0': {'format': 'DEFAULT', 'data_type': 'INT32', 'shape': [1]}, 'input_1': {'format': 'DEFAULT', 'data_type': 'INT32', 'shape': [1]}, 'output_0': {'format': 'DEFAULT', 'data_type': 'INT32', 'shape': [1]} } ], [ - 'AssignAdd-op206', 'AssignAdd', 1.283, 3, '-', '-', '-', '-', 'Default/AssignAdd-op206', + 'Default/AssignAdd-op206', 'AssignAdd-op206', 'AssignAdd', 1.283, 3, '-', '-', '-', '-', {'input_0': {'format': 'DEFAULT', 'data_type': 'INT32', 'shape': [1]}, 'input_1': {'format': 'DEFAULT', 'data_type': 'INT32', 'shape': [1]}, 'output_0': {'format': 'DEFAULT', 'data_type': 'INT32', 'shape': [1]} @@ -77,7 +77,7 @@ class TestOpAnalyser: """Test the function of querying AICORE operator type information.""" expect_result = { 'col_name': - ['op_type', 'total_time', 'execution_frequency', 'total_percent', 'avg_time'], + ['kernel_type', 'total_time', 'execution_frequency', 'total_percent', 'avg_time'], 'object': [['MatMul', 2807.82, 25, 5760.0, 112.313], ['Cast', 104.32, 27, 214.0, 3.864], ['TransData', 86.12, 9, 177.0, 9.569], @@ -123,7 +123,7 @@ class TestOpAnalyser: """Test the function of querying AICORE operator type information.""" expect_result = { 'col_name': - ['op_type', 'total_time', 'execution_frequency', 'total_percent', 'avg_time'], + ['kernel_type', 'total_time', 'execution_frequency', 'total_percent', 'avg_time'], 'object': [ ['MatMul', 2807.82, 25, 5760.0, 112.313], ['Mul', 4.29, 3, 9.0, 1.43] @@ -132,7 +132,7 @@ class TestOpAnalyser: } condition = { 'filter_condition': { - 'op_type': { + 'kernel_type': { 'partial_match_str_in': ['Mul'] } }, @@ -155,7 +155,7 @@ class TestOpAnalyser: expect_result = OP_GATHER_V2_INFO condition = { 'filter_condition': { - 'op_type': { + 'kernel_type': { 'in': ['AssignAdd'] } }, diff --git a/tests/ut/profiler/analyser/test_aicpu_analyser.py b/tests/ut/profiler/analyser/test_aicpu_analyser.py index 8133274f..d3fe6ad6 100644 --- a/tests/ut/profiler/analyser/test_aicpu_analyser.py +++ b/tests/ut/profiler/analyser/test_aicpu_analyser.py @@ -47,7 +47,7 @@ class TestAicpuAnalyser: def test_query_aicpu_type(self): """Test the function of querying AICPU operator type information.""" expect_result = { - 'col_name': ['op_type', 'execution_time', 'execution_frequency', 'percent'], + 'col_name': ['kernel_type', 'execution_time', 'execution_frequency', 'percent'], 'object': [ ['InitData', 7906.0, 1, 89.84], ], @@ -55,7 +55,7 @@ class TestAicpuAnalyser: } condition = { 'filter_condition': { - 'op_type': { + 'kernel_type': { 'partial_match_str_in': ['init'] } } @@ -66,14 +66,14 @@ class TestAicpuAnalyser: def test_query_aicpu_detail(self): """Test the function of querying AICPU operator detail information.""" expect_result = { - 'col_name': ['op_name', 'op_type', 'total_time', + 'col_name': ['kernel_name', 'kernel_type', 'total_time', 'dispatch_time', 'execution_frequency'], 'size': 1 } condition = { 'filter_condition': { - 'op_type': { + 'kernel_type': { 'partial_match_str_in': ['get'] } } diff --git a/tests/ut/profiler/analyser/test_analyser_aicore_detail.py b/tests/ut/profiler/analyser/test_analyser_aicore_detail.py index 3b325b8c..d91c4ed4 100644 --- a/tests/ut/profiler/analyser/test_analyser_aicore_detail.py +++ b/tests/ut/profiler/analyser/test_analyser_aicore_detail.py @@ -21,8 +21,8 @@ from unittest import TestCase from mindinsight.profiler.analyser.analyser_factory import AnalyserFactory from tests.ut.profiler import PROFILER_DIR -COL_NAMES = ['op_name', 'op_type', 'avg_execution_time', 'execution_frequency', 'MFLOPs(10^6 cube)', - 'GFLOPS(10^9 cube)', 'MFLOPs(10^6 vector)', 'GFLOPS(10^9 vector)', 'full_op_name', 'op_info'] +COL_NAMES = ['op_name', 'kernel_name', 'kernel_type', 'avg_execution_time', 'execution_frequency', + 'MFLOPs(10^6 cube)', 'GFLOPS(10^9 cube)', 'MFLOPs(10^6 vector)', 'GFLOPS(10^9 vector)', 'op_info'] def get_detail_infos(indexes=None, sort_name=None, sort_type=True): @@ -52,9 +52,9 @@ def get_detail_infos(indexes=None, sort_name=None, sort_type=True): for fm_info, detail_info, flops_line in zip(fm_csv_reader, detail_csv_reader, flops_reader): flops = flops_line.strip().split(',') cache.append( - [fm_info[4], fm_info[5], round(float(detail_info[1]) * 1e3, 3), int(detail_info[2]), float(flops[1]), - float(flops[2]), float(flops[3]), float(flops[4]), - fm_info[3], json.loads(fm_info[7]) if fm_info[7] else None] + [fm_info[4], fm_info[5], fm_info[6], round(float(detail_info[1]) * 1e3, 3), int(detail_info[2]), + float(flops[1]), float(flops[2]), float(flops[3]), float(flops[4]), + json.loads(fm_info[8]) if fm_info[8] else None] ) if indexes: @@ -99,7 +99,7 @@ class TestAicoreDetailAnalyser(TestCase): } condition = { 'filter_condition': { - 'op_type': { + 'kernel_type': { 'in': ['MatMul'] } } @@ -109,7 +109,7 @@ class TestAicoreDetailAnalyser(TestCase): condition = { 'filter_condition': { - 'op_type': { + 'kernel_type': { 'not_in': ['AtomicAddrClean', 'Cast', 'TransData', 'Conv2D'] } } @@ -119,7 +119,7 @@ class TestAicoreDetailAnalyser(TestCase): condition = { 'filter_condition': { - 'op_name': { + 'kernel_name': { 'partial_match_str_in': ['op9'] } } @@ -145,12 +145,12 @@ class TestAicoreDetailAnalyser(TestCase): expect_result = { 'col_name': COL_NAMES, - 'object': get_detail_infos(sort_name='op_name', sort_type=False), + 'object': get_detail_infos(sort_name='kernel_name', sort_type=False), 'size': 10 } condition = { 'sort_condition': { - 'name': 'op_name', + 'name': 'kernel_name', 'type': 'ascending' } } @@ -198,7 +198,7 @@ class TestAicoreDetailAnalyser(TestCase): } condition = { 'filter_condition': { - 'op_name': { + 'kernel_name': { 'partial_match_str_in': ['Atomic', 'Conv'] } }, @@ -222,10 +222,10 @@ class TestAicoreDetailAnalyser(TestCase): } condition = { 'filter_condition': { - 'op_type': { + 'kernel_type': { 'in': ['Conv2D', 'AtomicAddrClean', 'TransData'] }, - 'op_name': { + 'kernel_name': { 'partial_match_str_in': ['Atomic', 'Conv'] } }, @@ -247,7 +247,7 @@ class TestAicoreDetailAnalyser(TestCase): } condition = { 'filter_condition': { - 'op_type': { + 'kernel_type': { 'in': ['MatMul'] }, 'is_display_detail': False @@ -257,13 +257,13 @@ class TestAicoreDetailAnalyser(TestCase): self.assertDictEqual(expect_result, result) expect_result = { - 'col_name': COL_NAMES[0:8], - 'object': [item[0:8] for item in detail_infos], + 'col_name': COL_NAMES[1:9], + 'object': [item[1:9] for item in detail_infos], 'size': 1 } condition = { 'filter_condition': { - 'op_type': { + 'kernel_type': { 'in': ['MatMul'] }, 'is_display_detail': False, @@ -278,12 +278,12 @@ class TestAicoreDetailAnalyser(TestCase): """Test the success of the querying and sorting function by operator type.""" detail_infos = get_detail_infos(indexes=[9, 0, 2, 1, 5, 3, 4]) expect_result = { - 'col_name': COL_NAMES[0:8], - 'object': [item[0:8] for item in detail_infos] + 'col_name': COL_NAMES[1:9], + 'object': [item[1:9] for item in detail_infos] } filter_condition = { - 'op_type': { + 'kernel_type': { 'in': ['AtomicAddrClean', 'Cast', 'MatMul'], 'not_in': ['TransData'] }, diff --git a/tests/ut/profiler/analyser/test_analyser_aicore_type.py b/tests/ut/profiler/analyser/test_analyser_aicore_type.py index 676a208b..6f27e23a 100644 --- a/tests/ut/profiler/analyser/test_analyser_aicore_type.py +++ b/tests/ut/profiler/analyser/test_analyser_aicore_type.py @@ -20,7 +20,7 @@ from unittest import TestCase from mindinsight.profiler.analyser.analyser_factory import AnalyserFactory from tests.ut.profiler import PROFILER_DIR -COL_NAMES = ['op_type', 'total_time', 'execution_frequency', 'total_percent', 'avg_time'] +COL_NAMES = ['kernel_type', 'total_time', 'execution_frequency', 'total_percent', 'avg_time'] def get_type_infos(indexes=None, sort_name=None, sort_type=True): @@ -93,7 +93,7 @@ class TestAicoreTypeAnalyser(TestCase): } condition = { 'filter_condition': { - 'op_type': { + 'kernel_type': { 'in': ['Cast'] } } @@ -108,7 +108,7 @@ class TestAicoreTypeAnalyser(TestCase): } condition = { 'filter_condition': { - 'op_type': { + 'kernel_type': { 'not_in': ['Cast'] } } @@ -123,7 +123,7 @@ class TestAicoreTypeAnalyser(TestCase): } condition = { 'filter_condition': { - 'op_type': { + 'kernel_type': { 'partial_match_str_in': ['C'] } } @@ -140,7 +140,7 @@ class TestAicoreTypeAnalyser(TestCase): } condition = { 'filter_condition': { - 'op_type': { + 'kernel_type': { 'in': ['Cast', 'Conv2D'] } } @@ -155,7 +155,7 @@ class TestAicoreTypeAnalyser(TestCase): } condition = { 'filter_condition': { - 'op_type': { + 'kernel_type': { 'not_in': ['Cast', 'Conv2D'] } } @@ -170,7 +170,7 @@ class TestAicoreTypeAnalyser(TestCase): } condition = { 'filter_condition': { - 'op_type': { + 'kernel_type': { 'partial_match_str_in': ['Trans', 'Conv'] } } @@ -182,11 +182,11 @@ class TestAicoreTypeAnalyser(TestCase): """Test the success of the querying function.""" expect_result = { 'col_name': COL_NAMES, - 'object': get_type_infos(sort_name='op_type', sort_type=True), + 'object': get_type_infos(sort_name='kernel_type', sort_type=True), 'size': 5} condition = { 'sort_condition': { - 'name': 'op_type', + 'name': 'kernel_type', 'type': 'descending' } } @@ -248,7 +248,7 @@ class TestAicoreTypeAnalyser(TestCase): } condition = { 'filter_condition': { - 'op_type': { + 'kernel_type': { 'partial_match_str_in': ['C'] } }, diff --git a/tests/utils/resource/profiler/aicore_intermediate_1_detail.csv b/tests/utils/resource/profiler/aicore_intermediate_1_detail.csv index 42a64606..358d41cd 100755 --- a/tests/utils/resource/profiler/aicore_intermediate_1_detail.csv +++ b/tests/utils/resource/profiler/aicore_intermediate_1_detail.csv @@ -1,4 +1,4 @@ -full_op_time,execution_time,execution_frequency +full_kernel_time,execution_time,execution_frequency Default/AtomicAddrClean-op104,0.00133,1 Default/AtomicAddrClean-op105,0.000987,1 Default/AtomicAddrClean-op106,0.001129,1 diff --git a/tests/utils/resource/profiler/aicore_intermediate_1_type.csv b/tests/utils/resource/profiler/aicore_intermediate_1_type.csv index ad7efca9..8021b96f 100755 --- a/tests/utils/resource/profiler/aicore_intermediate_1_type.csv +++ b/tests/utils/resource/profiler/aicore_intermediate_1_type.csv @@ -1,4 +1,4 @@ -op_type,total_time,execution_frequency,percent +kernel_type,total_time,execution_frequency,percent AtomicAddrClean,0.007283,6,0.49 Cast,0.053395,13,3.63 TransData,0.121800,5,8.23 diff --git a/tests/utils/resource/profiler/flops_0.txt b/tests/utils/resource/profiler/flops_0.txt index 56e7d657..73eb07d6 100644 --- a/tests/utils/resource/profiler/flops_0.txt +++ b/tests/utils/resource/profiler/flops_0.txt @@ -1,4 +1,4 @@ -op_full_name, FLOPs, FLOPS, FLOPS utilization +full_kernel_name, FLOPs, FLOPS, FLOPS utilization Default/Cast-op6, 333, 333, 33 Default/TransData-op7, 333, 333, 33 Default/network-WithLossCell/_backbone-ResNet/conv1-Conv2d/Cast-op5, 333, 333, 33 diff --git a/tests/utils/resource/profiler/flops_1.txt b/tests/utils/resource/profiler/flops_1.txt index fa697675..7604dc55 100644 --- a/tests/utils/resource/profiler/flops_1.txt +++ b/tests/utils/resource/profiler/flops_1.txt @@ -1,4 +1,4 @@ -op_full_name, MFLOPs(10^6 cube), GFLOPS(10^9 cube), FLOPS utilization(cube), MFLOPs(10^6 vector), GFLOPS(10^9 vector), FLOPS utilization(vec) +full_kernel_name, MFLOPs(10^6 cube), GFLOPS(10^9 cube), FLOPS utilization(cube), MFLOPs(10^6 vector), GFLOPS(10^9 vector), FLOPS utilization(vec) Default/AtomicAddrClean-op104, 333, 333, 33, 333, 333, 33 Default/AtomicAddrClean-op105, 333, 333, 33, 333, 333, 33 Default/AtomicAddrClean-op106, 333, 333, 33, 333, 333, 33 diff --git a/tests/utils/resource/profiler/framework_raw_0.csv b/tests/utils/resource/profiler/framework_raw_0.csv index 762bc693..e3f87d88 100755 --- a/tests/utils/resource/profiler/framework_raw_0.csv +++ b/tests/utils/resource/profiler/framework_raw_0.csv @@ -1,5 +1,5 @@ -task_id,stream_id,block_dim,full_op_name,op_name,op_type,subgraph,op_info -51517,0,32,Default/Cast-op6,Cast-op6,Cast,Default,"{""input_0"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT32"", ""shape"": ""32,3,224,224""}, ""output_0"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""32,3,224,224""}}" -51518,0,32,Default/TransData-op7,TransData-op7,TransData,Default,"{""input_0"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""32,3,224,224""}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""32,1,224,224,16""}}" -51519,0,32,Default/network-WithLossCell/_backbone-ResNet/conv1-Conv2d/Cast-op5,Cast-op5,Cast,Default,"{""input_0"": {""format"": ""FracZ"", ""data_type"": ""NUMBER_TYPE_FLOAT32"", ""shape"": ""49,4,16,16""}, ""output_0"": {""format"": ""FracZ"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""49,4,16,16""}}" -51522,0,4,Default/network-WithLossCell/_backbone-ResNet/layer1-SequentialCell/0-ResidualBlock/conv1-Conv2d/Cast-op28,Cast-op28,Cast,Default,"{""input_0"": {""format"": ""FracZ"", ""data_type"": ""NUMBER_TYPE_FLOAT32"", ""shape"": ""4,4,16,16""}, ""output_0"": {""format"": ""FracZ"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""4,4,16,16""}}" +task_id,stream_id,block_dim,full_kernel_name,op_name,kernel_name,kernel_type,subgraph,op_info +51517,0,32,Default/Cast-op6,Default/Cast-op6,Cast-op6,Cast,Default,"{""input_0"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT32"", ""shape"": ""32,3,224,224""}, ""output_0"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""32,3,224,224""}}" +51518,0,32,Default/TransData-op7,Default/TransData-op7,TransData-op7,TransData,Default,"{""input_0"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""32,3,224,224""}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""32,1,224,224,16""}}" +51519,0,32,Default/network-WithLossCell/_backbone-ResNet/conv1-Conv2d/Cast-op5,Default/network-WithLossCell/_backbone-ResNet/conv1-Conv2d/Cast-op5,Cast-op5,Cast,Default,"{""input_0"": {""format"": ""FracZ"", ""data_type"": ""NUMBER_TYPE_FLOAT32"", ""shape"": ""49,4,16,16""}, ""output_0"": {""format"": ""FracZ"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""49,4,16,16""}}" +51522,0,4,Default/network-WithLossCell/_backbone-ResNet/layer1-SequentialCell/0-ResidualBlock/conv1-Conv2d/Cast-op28,Default/network-WithLossCell/_backbone-ResNet/layer1-SequentialCell/0-ResidualBlock/conv1-Conv2d/Cast-op28,Cast-op28,Cast,Default,"{""input_0"": {""format"": ""FracZ"", ""data_type"": ""NUMBER_TYPE_FLOAT32"", ""shape"": ""4,4,16,16""}, ""output_0"": {""format"": ""FracZ"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""4,4,16,16""}}" diff --git a/tests/utils/resource/profiler/framework_raw_1.csv b/tests/utils/resource/profiler/framework_raw_1.csv index 5b60ecef..f45ab959 100755 --- a/tests/utils/resource/profiler/framework_raw_1.csv +++ b/tests/utils/resource/profiler/framework_raw_1.csv @@ -1,11 +1,11 @@ -task_id,stream_id,block_dim,full_op_name,op_name,op_type,subgraph,op_info -30290,0,1,Default/AtomicAddrClean-op104,AtomicAddrClean-op104,AtomicAddrClean,Default,"{""input_0"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT32"", ""shape"": """"}}" -30295,0,1,Default/AtomicAddrClean-op105,AtomicAddrClean-op105,AtomicAddrClean,Default,"{""input_0"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT32"", ""shape"": ""10""}}" -30300,0,1,Default/AtomicAddrClean-op106,AtomicAddrClean-op106,AtomicAddrClean,Default,"{""input_0"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT32"", ""shape"": ""84""}}" -30268,0,32,Default/Cast-op10,Cast-op10,Cast,Default,"{""input_0"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT32"", ""shape"": ""32,1,32,32""}, ""output_0"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""32,1,32,32""}}" -30271,0,9,Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op12,Cast-op12,Cast,Default,"{""input_0"": {""format"": ""FracZ"", ""data_type"": ""NUMBER_TYPE_FLOAT32"", ""shape"": ""25,1,16,16""}, ""output_0"": {""format"": ""FracZ"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""25,1,16,16""}}" -30320,0,32,Gradients/Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/gradConv2D/Cast-op53,Cast-op53,Cast,Gradients,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""NUMBER_TYPE_FLOAT32"", ""shape"": ""32,1,28,28,16""}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""32,1,28,28,16""}}" -30269,0,32,Default/TransData-op11,TransData-op11,TransData,Default,"{""input_0"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""32,1,32,32""}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""32,1,32,32""}}" -30308,0,32,Gradients/Default/network-WithLossCell/_backbone-LeNet5/gradReshape/TransData-op44,TransData-op44,TransData,Gradients,"{""input_0"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""32,16,5,5""}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""32,1,5,5,16""}}" -30272,0,32,Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Conv2D-op13,Conv2D-op13,Conv2D,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""32,1,32,32,16""}, ""input_1"": {""format"": ""FracZ"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""25,1,16,16""}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""32,1,28,28,16""}}" -30286,0,1,Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/MatMul-op9,MatMul-op9,MatMul,Default,"{""input_0"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT32"", ""shape"": ""32,120""}, ""input_1"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT32"", ""shape"": ""84,120""}, ""input_2"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT32"", ""shape"": ""84""}, ""output_0"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT32"", ""shape"": ""32,84""}}" +task_id,stream_id,block_dim,full_kernel_name,op_name,kernel_name,kernel_type,subgraph,op_info +30290,0,1,Default/AtomicAddrClean-op104,Default/AtomicAddrClean-op104,AtomicAddrClean-op104,AtomicAddrClean,Default,"{""input_0"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT32"", ""shape"": """"}}" +30295,0,1,Default/AtomicAddrClean-op105,Default/AtomicAddrClean-op105,AtomicAddrClean-op105,AtomicAddrClean,Default,"{""input_0"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT32"", ""shape"": ""10""}}" +30300,0,1,Default/AtomicAddrClean-op106,Default/AtomicAddrClean-op106,AtomicAddrClean-op106,AtomicAddrClean,Default,"{""input_0"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT32"", ""shape"": ""84""}}" +30268,0,32,Default/Cast-op10,Default/Cast-op10,Cast-op10,Cast,Default,"{""input_0"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT32"", ""shape"": ""32,1,32,32""}, ""output_0"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""32,1,32,32""}}" +30271,0,9,Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op12,Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op12,Cast-op12,Cast,Default,"{""input_0"": {""format"": ""FracZ"", ""data_type"": ""NUMBER_TYPE_FLOAT32"", ""shape"": ""25,1,16,16""}, ""output_0"": {""format"": ""FracZ"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""25,1,16,16""}}" +30320,0,32,Gradients/Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/gradConv2D/Cast-op53,Gradients/Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/gradConv2D/Cast-op53,Cast-op53,Cast,Gradients,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""NUMBER_TYPE_FLOAT32"", ""shape"": ""32,1,28,28,16""}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""32,1,28,28,16""}}" +30269,0,32,Default/TransData-op11,Default/TransData-op11,TransData-op11,TransData,Default,"{""input_0"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""32,1,32,32""}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""32,1,32,32""}}" +30308,0,32,Gradients/Default/network-WithLossCell/_backbone-LeNet5/gradReshape/TransData-op44,Gradients/Default/network-WithLossCell/_backbone-LeNet5/gradReshape/TransData-op44,TransData-op44,TransData,Gradients,"{""input_0"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""32,16,5,5""}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""32,1,5,5,16""}}" +30272,0,32,Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Conv2D-op13,Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Conv2D-op13,Conv2D-op13,Conv2D,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""32,1,32,32,16""}, ""input_1"": {""format"": ""FracZ"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""25,1,16,16""}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""NUMBER_TYPE_FLOAT16"", ""shape"": ""32,1,28,28,16""}}" +30286,0,1,Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/MatMul-op9,Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/MatMul-op9,MatMul-op9,MatMul,Default,"{""input_0"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT32"", ""shape"": ""32,120""}, ""input_1"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT32"", ""shape"": ""84,120""}, ""input_2"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT32"", ""shape"": ""84""}, ""output_0"": {""format"": ""DefaultFormat"", ""data_type"": ""NUMBER_TYPE_FLOAT32"", ""shape"": ""32,84""}}" diff --git a/tests/utils/resource/run_1/normal_run/profiler/aicore_intermediate_1_detail.csv b/tests/utils/resource/run_1/normal_run/profiler/aicore_intermediate_1_detail.csv index b122d096..5887db13 100644 --- a/tests/utils/resource/run_1/normal_run/profiler/aicore_intermediate_1_detail.csv +++ b/tests/utils/resource/run_1/normal_run/profiler/aicore_intermediate_1_detail.csv @@ -1,4 +1,4 @@ -full_op_name,execution_time,execution_frequency +full_kernel_name,execution_time,execution_frequency Default/GetNext-op6,5e-05,3 Default/StreamSend-op199,1e-05,3 Default/StreamRecv-op202,5e-05,3 diff --git a/tests/utils/resource/run_1/normal_run/profiler/aicore_intermediate_1_type.csv b/tests/utils/resource/run_1/normal_run/profiler/aicore_intermediate_1_type.csv index ee6c5587..9b462c12 100644 --- a/tests/utils/resource/run_1/normal_run/profiler/aicore_intermediate_1_type.csv +++ b/tests/utils/resource/run_1/normal_run/profiler/aicore_intermediate_1_type.csv @@ -1,4 +1,4 @@ -op_type,total_time,execution_frequency,percent +kernel_type,total_time,execution_frequency,percent GetNext,0.00015,3,0.00 StreamSend,0.00003,3,0.00 StreamRecv,0.00015,3,0.00 diff --git a/tests/utils/resource/run_1/normal_run/profiler/flops_1.txt b/tests/utils/resource/run_1/normal_run/profiler/flops_1.txt index b8214348..4e7c744f 100644 --- a/tests/utils/resource/run_1/normal_run/profiler/flops_1.txt +++ b/tests/utils/resource/run_1/normal_run/profiler/flops_1.txt @@ -1,4 +1,4 @@ -op_full_name, FLOPs, FLOPS, FLOPS utilization +full_kernel_name, FLOPs, FLOPS, FLOPS utilization Default/AssignAdd-op414, 333, 333, 33 Default/network-TrainStepWrap/optimizer_d-Adam/Mul-op29, 333, 333, 33 Default/network-TrainStepWrap/optimizer_d-Adam/Assign-op30, 333, 333, 33 diff --git a/tests/utils/resource/run_1/normal_run/profiler/framework_raw_1.csv b/tests/utils/resource/run_1/normal_run/profiler/framework_raw_1.csv index 92bbdd0b..fff14ef2 100644 --- a/tests/utils/resource/run_1/normal_run/profiler/framework_raw_1.csv +++ b/tests/utils/resource/run_1/normal_run/profiler/framework_raw_1.csv @@ -1,82 +1,82 @@ -task_id,stream_id,block_dim,full_op_name,op_name,op_type,subgraph,op_info,graph_id -2,8,1,Default/InitDataSetQueue-op0,InitDataSetQueue-op0,InitDataSetQueue,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [1]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [1]}}",11 -2,8,1,Default/StreamSwitch-op198,StreamSwitch-op198,StreamSwitch,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [1]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [1]}}",11 -2,88,1,Default/GetNext-op6,GetNext-op6,GetNext,Default,"{""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 32, 32]}, ""output_1"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [32]}}",11 -4,88,1,Default/GetNext-op6,GetNext-op6,GetNext,Default,"{""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 32, 32]}, ""output_1"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [32]}}",11 -5,88,1,Default/StreamSend-op199,StreamSend-op199,StreamSend,Default,{},11 -2,90,1,Default/StreamSwitch-op200,StreamSwitch-op200,StreamSwitch,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [1]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [1]}}",11 -3,90,1,Default/EndGraph-op201,EndGraph-op201,EndGraph,Default,{},11 -3,78,1,Default/StreamRecv-op202,StreamRecv-op202,StreamRecv,Default,{},11 -4,78,1,Default/AssignAdd-op203,AssignAdd-op203,AssignAdd,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [1]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [1]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [1]}}",11 -5,78,1,Default/StreamActive-op204,StreamActive-op204,StreamActive,Default,{},11 -2,72,1,Default/Profiling-op208,Profiling-op208,Profiling,Default,{},11 -3,72,1,Default/Profiling-op209,Profiling-op209,Profiling,Default,{},11 -4,72,32,Default/TransData-op114,TransData-op114,TransData,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 32, 32]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 32, 32, 16]}}",11 -5,72,4,Default/TensorMove-op89,TensorMove-op89,TensorMove,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [32]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [32]}}",11 -6,72,1,Default/StreamActive-op205,StreamActive-op205,StreamActive,Default,{},11 -7,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/gradSparseSoftmaxCrossEntropyWithLogits/OneHotD-op85,OneHotD-op85,OneHotD,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [32]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""input_2"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 10]}}",11 -8,72,32,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op168,Cast-op168,Cast,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 32, 32, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 32, 32, 16]}}",11 -9,72,7,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op169,Cast-op169,Cast,Default,"{""input_0"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}, ""output_0"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT16"", ""shape"": [25, 1, 16, 16]}}",11 -10,72,32,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Conv2D-op10,Conv2D-op10,Conv2D,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 32, 32, 16]}, ""input_1"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT16"", ""shape"": [25, 1, 16, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 28, 28, 16]}}",11 -11,72,32,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReluV2-op91,ReluV2-op91,ReluV2,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 28, 28, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 28, 28, 16]}, ""output_1"": {""format"": ""DEFAULT"", ""data_type"": ""UINT8"", ""shape"": [32, 1, 28, 28, 2]}}",11 -12,72,32,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/Cast-op173,Cast-op173,Cast,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 28, 28, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 28, 28, 16]}}",11 -13,72,2,Default/AtomicAddrClean-op188,AtomicAddrClean-op188,AtomicAddrClean,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 14, 14, 16]}}",11 -14,72,32,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/MaxPoolWithArgmax-op92,MaxPoolWithArgmax-op92,MaxPoolWithArgmax,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 28, 28, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 14, 14, 16]}, ""output_1"": {""format"": ""NC1HWC0"", ""data_type"": ""UINT16"", ""shape"": [32, 1, 4, 14, 16]}}",11 -15,72,7,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/Cast-op170,Cast-op170,Cast,Default,"{""input_0"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}, ""output_0"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT16"", ""shape"": [25, 1, 16, 16]}}",11 -16,72,32,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/Conv2D-op13,Conv2D-op13,Conv2D,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 14, 14, 16]}, ""input_1"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT16"", ""shape"": [25, 1, 16, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 10, 10, 16]}}",11 -17,72,32,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReluV2-op93,ReluV2-op93,ReluV2,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 10, 10, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 10, 10, 16]}, ""output_1"": {""format"": ""DEFAULT"", ""data_type"": ""UINT8"", ""shape"": [32, 1, 10, 10, 2]}}",11 -18,72,32,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/Cast-op177,Cast-op177,Cast,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 10, 10, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 10, 10, 16]}}",11 -19,72,1,Default/AtomicAddrClean-op196,AtomicAddrClean-op196,AtomicAddrClean,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 5, 5, 16]}}",11 -20,72,32,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/MaxPoolWithArgmax-op94,MaxPoolWithArgmax-op94,MaxPoolWithArgmax,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 10, 10, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 5, 5, 16]}, ""output_1"": {""format"": ""NC1HWC0"", ""data_type"": ""UINT16"", ""shape"": [32, 1, 4, 3, 16]}}",11 -21,72,25,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/Cast-op176,Cast-op176,Cast,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 5, 5, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 5, 5, 16]}}",11 -22,72,32,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/TransData-op143,TransData-op143,TransData,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 5, 5, 16]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 16, 5, 5]}}",11 -23,72,32,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/MatMul-op95,MatMul-op95,MatMul,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 400]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120, 400]}, ""input_2"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 120]}}",11 -24,72,4,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/relu-ReLU/Relu-op96,Relu-op96,Relu,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 120]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 120]}}",11 -25,72,1,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/MatMul-op97,MatMul-op97,MatMul,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 120]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84, 120]}, ""input_2"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 84]}}",11 -26,72,3,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/relu-ReLU/Relu-op98,Relu-op98,Relu,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 84]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 84]}}",11 -27,72,1,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/MatMul-op99,MatMul-op99,MatMul,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 84]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10, 84]}, ""input_2"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 10]}}",11 -28,72,2,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/gradSparseSoftmaxCrossEntropyWithLogits/SoftmaxCrossEntropyWithLogits-op100,SoftmaxCrossEntropyWithLogits-op100,SoftmaxCrossEntropyWithLogits,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 10]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 10]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32]}, ""output_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 10]}}",11 -29,72,1,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/gradSparseSoftmaxCrossEntropyWithLogits/Mul-op101,Mul-op101,Mul,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 10]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 10]}}",11 -30,72,1,Default/AtomicAddrClean-op189,AtomicAddrClean-op189,AtomicAddrClean,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}}",11 -31,72,1,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/gradSparseSoftmaxCrossEntropyWithLogits/ReduceMeanD-op82,ReduceMeanD-op82,ReduceMeanD,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}}",11 -32,72,1,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul-op52,MatMul-op52,MatMul,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 10]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 84]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10, 84]}}",11 -33,72,1,Default/AtomicAddrClean-op195,AtomicAddrClean-op195,AtomicAddrClean,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10]}}",11 -34,72,6,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op49,BiasAddGrad-op49,BiasAddGrad,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 10]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10]}}",11 -35,72,1,Default/network-TrainOneStepCell/optimizer-Momentum/ApplyMomentumD-op102,ApplyMomentumD-op102,ApplyMomentumD,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10]}, ""input_2"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""input_3"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10]}, ""input_4"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10]}, ""output_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10]}}",11 -36,72,1,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul-op34,MatMul-op34,MatMul,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 10]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10, 84]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 84]}}",11 -37,72,3,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGrad-op35,ReluGrad-op35,ReluGrad,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 84]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 84]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 84]}}",11 -38,72,1,Default/network-TrainOneStepCell/optimizer-Momentum/ApplyMomentumD-op103,ApplyMomentumD-op103,ApplyMomentumD,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10, 84]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10, 84]}, ""input_2"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""input_3"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10, 84]}, ""input_4"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10, 84]}, ""output_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10, 84]}}",11 -39,72,1,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul-op58,MatMul-op58,MatMul,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 84]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 120]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84, 120]}}",11 -40,72,1,Default/AtomicAddrClean-op193,AtomicAddrClean-op193,AtomicAddrClean,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84]}}",11 -41,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op55,BiasAddGrad-op55,BiasAddGrad,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 84]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84]}}",11 -42,72,1,Default/network-TrainOneStepCell/optimizer-Momentum/ApplyMomentumD-op104,ApplyMomentumD-op104,ApplyMomentumD,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84]}, ""input_2"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""input_3"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84]}, ""input_4"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84]}, ""output_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84]}}",11 -43,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul-op36,MatMul-op36,MatMul,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 84]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84, 120]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 120]}}",11 -44,72,4,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGrad-op37,ReluGrad-op37,ReluGrad,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 120]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 120]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 120]}}",11 -45,72,10,Default/network-TrainOneStepCell/optimizer-Momentum/ApplyMomentumD-op105,ApplyMomentumD-op105,ApplyMomentumD,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84, 120]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84, 120]}, ""input_2"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""input_3"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84, 120]}, ""input_4"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84, 120]}, ""output_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84, 120]}}",11 -46,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul-op38,MatMul-op38,MatMul,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 120]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120, 400]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 400]}}",11 -47,72,30,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul-op64,MatMul-op64,MatMul,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 120]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 400]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120, 400]}}",11 -48,72,1,Default/AtomicAddrClean-op191,AtomicAddrClean-op191,AtomicAddrClean,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120]}}",11 -49,72,16,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op61,BiasAddGrad-op61,BiasAddGrad,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 120]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120]}}",11 -50,72,1,Default/network-TrainOneStepCell/optimizer-Momentum/ApplyMomentumD-op106,ApplyMomentumD-op106,ApplyMomentumD,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120]}, ""input_2"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""input_3"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120]}, ""input_4"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120]}, ""output_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120]}}",11 -51,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/flatten-Flatten/gradReshape/TransData-op155,TransData-op155,TransData,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 16, 5, 5]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 5, 5, 16]}}",11 -52,72,13,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/gradMaxPool/Cast-op178,Cast-op178,Cast,Gradients,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 5, 5, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 5, 5, 16]}}",11 -53,72,3,Default/AtomicAddrClean-op192,AtomicAddrClean-op192,AtomicAddrClean,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 10, 10, 16]}}",11 -54,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/gradMaxPool/MaxPoolGradWithArgmax-op107,MaxPoolGradWithArgmax-op107,MaxPoolGradWithArgmax,Gradients,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 10, 10, 16]}, ""input_1"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 5, 5, 16]}, ""input_2"": {""format"": ""NC1HWC0"", ""data_type"": ""UINT16"", ""shape"": [32, 1, 4, 3, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 10, 10, 16]}}",11 -55,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/gradMaxPool/Cast-op174,Cast-op174,Cast,Gradients,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 10, 10, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 10, 10, 16]}}",11 -56,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/gradConv2D/FusionOp_ReluGradV2_Cast-op180,FusionOp_ReluGradV2_Cast-op180,FusionOp_ReluGradV2_Cast,Gradients,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 10, 10, 16]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""UINT8"", ""shape"": [32, 1, 10, 10, 2]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 10, 10, 16]}}",11 -57,72,32,Default/network-TrainOneStepCell/optimizer-Momentum/ApplyMomentumD-op109,ApplyMomentumD-op109,ApplyMomentumD,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120, 400]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120, 400]}, ""input_2"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""input_3"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120, 400]}, ""input_4"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120, 400]}, ""output_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120, 400]}}",11 -58,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/gradConv2D/Conv2DBackpropInputD-op83,Conv2DBackpropInputD-op83,Conv2DBackpropInputD,Gradients,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 10, 10, 16]}, ""input_1"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT16"", ""shape"": [25, 1, 16, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 14, 14, 16]}}",11 -59,72,1,Default/AtomicAddrClean-op190,AtomicAddrClean-op190,AtomicAddrClean,Default,"{""input_0"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}}",11 -60,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/gradConv2D/Conv2DBackpropFilterD-op86,Conv2DBackpropFilterD-op86,Conv2DBackpropFilterD,Gradients,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 10, 10, 16]}, ""input_1"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 14, 14, 16]}, ""output_0"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}}",11 -61,72,7,Default/network-TrainOneStepCell/optimizer-Momentum/ApplyMomentumD-op110,ApplyMomentumD-op110,ApplyMomentumD,Default,"{""input_0"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}, ""input_1"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}, ""input_2"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""input_3"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}, ""input_4"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""output_0"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}, ""output_1"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}}",11 -62,72,17,Default/AtomicAddrClean-op197,AtomicAddrClean-op197,AtomicAddrClean,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 28, 28, 16]}}",11 -63,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/gradMaxPool/MaxPoolGradWithArgmax-op111,MaxPoolGradWithArgmax-op111,MaxPoolGradWithArgmax,Gradients,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 28, 28, 16]}, ""input_1"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 14, 14, 16]}, ""input_2"": {""format"": ""NC1HWC0"", ""data_type"": ""UINT16"", ""shape"": [32, 1, 4, 14, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 28, 28, 16]}}",11 -64,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/gradMaxPool/Cast-op175,Cast-op175,Cast,Gradients,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 28, 28, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 28, 28, 16]}}",11 -65,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/gradConv2D/FusionOp_ReluGradV2_Cast-op185,FusionOp_ReluGradV2_Cast-op185,FusionOp_ReluGradV2_Cast,Gradients,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 28, 28, 16]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""UINT8"", ""shape"": [32, 1, 28, 28, 2]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 28, 28, 16]}}",11 -66,72,1,Default/AtomicAddrClean-op194,AtomicAddrClean-op194,AtomicAddrClean,Default,"{""input_0"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}}",11 -67,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/gradConv2D/Conv2DBackpropFilterD-op84,Conv2DBackpropFilterD-op84,Conv2DBackpropFilterD,Gradients,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 28, 28, 16]}, ""input_1"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 32, 32, 16]}, ""output_0"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}}",11 -68,72,7,Default/network-TrainOneStepCell/optimizer-Momentum/ApplyMomentumD-op113,ApplyMomentumD-op113,ApplyMomentumD,Default,"{""input_0"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}, ""input_1"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}, ""input_2"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""input_3"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}, ""input_4"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""output_0"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}, ""output_1"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}}",11 -69,72,1,Default/Profiling-op210,Profiling-op210,Profiling,Default,{},11 -70,72,1,Default/Profiling-op211,Profiling-op211,Profiling,Default,{},11 -71,72,1,Default/AssignAdd-op206,AssignAdd-op206,AssignAdd,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [1]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [1]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [1]}}",11 -72,72,1,Default/StreamActive-op207,StreamActive-op207,StreamActive,Default,{},11 +task_id,stream_id,block_dim,full_kernel_name,op_name,kernel_name,kernel_type,subgraph,op_info,graph_id +2,8,1,Default/InitDataSetQueue-op0,Default/InitDataSetQueue-op0,InitDataSetQueue-op0,InitDataSetQueue,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [1]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [1]}}",11 +2,8,1,Default/StreamSwitch-op198,Default/StreamSwitch-op198,StreamSwitch-op198,StreamSwitch,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [1]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [1]}}",11 +2,88,1,Default/GetNext-op6,Default/GetNext-op6,GetNext-op6,GetNext,Default,"{""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 32, 32]}, ""output_1"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [32]}}",11 +4,88,1,Default/GetNext-op6,Default/GetNext-op6,GetNext-op6,GetNext,Default,"{""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 32, 32]}, ""output_1"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [32]}}",11 +5,88,1,Default/StreamSend-op199,Default/StreamSend-op199,StreamSend-op199,StreamSend,Default,{},11 +2,90,1,Default/StreamSwitch-op200,Default/StreamSwitch-op200,StreamSwitch-op200,StreamSwitch,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [1]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [1]}}",11 +3,90,1,Default/EndGraph-op201,Default/EndGraph-op201,EndGraph-op201,EndGraph,Default,{},11 +3,78,1,Default/StreamRecv-op202,Default/StreamRecv-op202,StreamRecv-op202,StreamRecv,Default,{},11 +4,78,1,Default/AssignAdd-op203,Default/AssignAdd-op203,AssignAdd-op203,AssignAdd,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [1]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [1]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [1]}}",11 +5,78,1,Default/StreamActive-op204,Default/StreamActive-op204,StreamActive-op204,StreamActive,Default,{},11 +2,72,1,Default/Profiling-op208,Default/Profiling-op208,Profiling-op208,Profiling,Default,{},11 +3,72,1,Default/Profiling-op209,Default/Profiling-op209,Profiling-op209,Profiling,Default,{},11 +4,72,32,Default/TransData-op114,Default/TransData-op114,TransData-op114,TransData,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 32, 32]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 32, 32, 16]}}",11 +5,72,4,Default/TensorMove-op89,Default/TensorMove-op89,TensorMove-op89,TensorMove,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [32]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [32]}}",11 +6,72,1,Default/StreamActive-op205,Default/StreamActive-op205,StreamActive-op205,StreamActive,Default,{},11 +7,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/gradSparseSoftmaxCrossEntropyWithLogits/OneHotD-op85,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/gradSparseSoftmaxCrossEntropyWithLogits/OneHotD-op85,OneHotD-op85,OneHotD,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [32]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""input_2"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 10]}}",11 +8,72,32,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op168,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op168,Cast-op168,Cast,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 32, 32, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 32, 32, 16]}}",11 +9,72,7,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op169,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op169,Cast-op169,Cast,Default,"{""input_0"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}, ""output_0"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT16"", ""shape"": [25, 1, 16, 16]}}",11 +10,72,32,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Conv2D-op10,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Conv2D-op10,Conv2D-op10,Conv2D,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 32, 32, 16]}, ""input_1"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT16"", ""shape"": [25, 1, 16, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 28, 28, 16]}}",11 +11,72,32,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReluV2-op91,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReluV2-op91,ReluV2-op91,ReluV2,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 28, 28, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 28, 28, 16]}, ""output_1"": {""format"": ""DEFAULT"", ""data_type"": ""UINT8"", ""shape"": [32, 1, 28, 28, 2]}}",11 +12,72,32,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/Cast-op173,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/Cast-op173,Cast-op173,Cast,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 28, 28, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 28, 28, 16]}}",11 +13,72,2,Default/AtomicAddrClean-op188,Default/AtomicAddrClean-op188,AtomicAddrClean-op188,AtomicAddrClean,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 14, 14, 16]}}",11 +14,72,32,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/MaxPoolWithArgmax-op92,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/MaxPoolWithArgmax-op92,MaxPoolWithArgmax-op92,MaxPoolWithArgmax,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 28, 28, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 14, 14, 16]}, ""output_1"": {""format"": ""NC1HWC0"", ""data_type"": ""UINT16"", ""shape"": [32, 1, 4, 14, 16]}}",11 +15,72,7,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/Cast-op170,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/Cast-op170,Cast-op170,Cast,Default,"{""input_0"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}, ""output_0"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT16"", ""shape"": [25, 1, 16, 16]}}",11 +16,72,32,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/Conv2D-op13,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/Conv2D-op13,Conv2D-op13,Conv2D,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 14, 14, 16]}, ""input_1"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT16"", ""shape"": [25, 1, 16, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 10, 10, 16]}}",11 +17,72,32,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReluV2-op93,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReluV2-op93,ReluV2-op93,ReluV2,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 10, 10, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 10, 10, 16]}, ""output_1"": {""format"": ""DEFAULT"", ""data_type"": ""UINT8"", ""shape"": [32, 1, 10, 10, 2]}}",11 +18,72,32,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/Cast-op177,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/Cast-op177,Cast-op177,Cast,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 10, 10, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 10, 10, 16]}}",11 +19,72,1,Default/AtomicAddrClean-op196,Default/AtomicAddrClean-op196,AtomicAddrClean-op196,AtomicAddrClean,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 5, 5, 16]}}",11 +20,72,32,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/MaxPoolWithArgmax-op94,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/MaxPoolWithArgmax-op94,MaxPoolWithArgmax-op94,MaxPoolWithArgmax,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 10, 10, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 5, 5, 16]}, ""output_1"": {""format"": ""NC1HWC0"", ""data_type"": ""UINT16"", ""shape"": [32, 1, 4, 3, 16]}}",11 +21,72,25,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/Cast-op176,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/Cast-op176,Cast-op176,Cast,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 5, 5, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 5, 5, 16]}}",11 +22,72,32,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/TransData-op143,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/TransData-op143,TransData-op143,TransData,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 5, 5, 16]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 16, 5, 5]}}",11 +23,72,32,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/MatMul-op95,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/MatMul-op95,MatMul-op95,MatMul,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 400]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120, 400]}, ""input_2"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 120]}}",11 +24,72,4,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/relu-ReLU/Relu-op96,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/relu-ReLU/Relu-op96,Relu-op96,Relu,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 120]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 120]}}",11 +25,72,1,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/MatMul-op97,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/MatMul-op97,MatMul-op97,MatMul,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 120]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84, 120]}, ""input_2"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 84]}}",11 +26,72,3,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/relu-ReLU/Relu-op98,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/relu-ReLU/Relu-op98,Relu-op98,Relu,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 84]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 84]}}",11 +27,72,1,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/MatMul-op99,Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/MatMul-op99,MatMul-op99,MatMul,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 84]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10, 84]}, ""input_2"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 10]}}",11 +28,72,2,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/gradSparseSoftmaxCrossEntropyWithLogits/SoftmaxCrossEntropyWithLogits-op100,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/gradSparseSoftmaxCrossEntropyWithLogits/SoftmaxCrossEntropyWithLogits-op100,SoftmaxCrossEntropyWithLogits-op100,SoftmaxCrossEntropyWithLogits,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 10]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 10]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32]}, ""output_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 10]}}",11 +29,72,1,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/gradSparseSoftmaxCrossEntropyWithLogits/Mul-op101,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/gradSparseSoftmaxCrossEntropyWithLogits/Mul-op101,Mul-op101,Mul,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 10]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 10]}}",11 +30,72,1,Default/AtomicAddrClean-op189,Default/AtomicAddrClean-op189,AtomicAddrClean-op189,AtomicAddrClean,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}}",11 +31,72,1,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/gradSparseSoftmaxCrossEntropyWithLogits/ReduceMeanD-op82,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/gradSparseSoftmaxCrossEntropyWithLogits/ReduceMeanD-op82,ReduceMeanD-op82,ReduceMeanD,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}}",11 +32,72,1,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul-op52,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul-op52,MatMul-op52,MatMul,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 10]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 84]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10, 84]}}",11 +33,72,1,Default/AtomicAddrClean-op195,Default/AtomicAddrClean-op195,AtomicAddrClean-op195,AtomicAddrClean,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10]}}",11 +34,72,6,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op49,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op49,BiasAddGrad-op49,BiasAddGrad,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 10]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10]}}",11 +35,72,1,Default/network-TrainOneStepCell/optimizer-Momentum/ApplyMomentumD-op102,Default/network-TrainOneStepCell/optimizer-Momentum/ApplyMomentumD-op102,ApplyMomentumD-op102,ApplyMomentumD,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10]}, ""input_2"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""input_3"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10]}, ""input_4"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10]}, ""output_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10]}}",11 +36,72,1,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul-op34,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul-op34,MatMul-op34,MatMul,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 10]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10, 84]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 84]}}",11 +37,72,3,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGrad-op35,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGrad-op35,ReluGrad-op35,ReluGrad,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 84]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 84]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 84]}}",11 +38,72,1,Default/network-TrainOneStepCell/optimizer-Momentum/ApplyMomentumD-op103,Default/network-TrainOneStepCell/optimizer-Momentum/ApplyMomentumD-op103,ApplyMomentumD-op103,ApplyMomentumD,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10, 84]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10, 84]}, ""input_2"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""input_3"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10, 84]}, ""input_4"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10, 84]}, ""output_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [10, 84]}}",11 +39,72,1,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul-op58,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul-op58,MatMul-op58,MatMul,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 84]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 120]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84, 120]}}",11 +40,72,1,Default/AtomicAddrClean-op193,Default/AtomicAddrClean-op193,AtomicAddrClean-op193,AtomicAddrClean,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84]}}",11 +41,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op55,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op55,BiasAddGrad-op55,BiasAddGrad,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 84]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84]}}",11 +42,72,1,Default/network-TrainOneStepCell/optimizer-Momentum/ApplyMomentumD-op104,Default/network-TrainOneStepCell/optimizer-Momentum/ApplyMomentumD-op104,ApplyMomentumD-op104,ApplyMomentumD,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84]}, ""input_2"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""input_3"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84]}, ""input_4"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84]}, ""output_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84]}}",11 +43,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul-op36,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul-op36,MatMul-op36,MatMul,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 84]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84, 120]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 120]}}",11 +44,72,4,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGrad-op37,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGrad-op37,ReluGrad-op37,ReluGrad,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 120]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 120]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 120]}}",11 +45,72,10,Default/network-TrainOneStepCell/optimizer-Momentum/ApplyMomentumD-op105,Default/network-TrainOneStepCell/optimizer-Momentum/ApplyMomentumD-op105,ApplyMomentumD-op105,ApplyMomentumD,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84, 120]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84, 120]}, ""input_2"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""input_3"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84, 120]}, ""input_4"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84, 120]}, ""output_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [84, 120]}}",11 +46,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul-op38,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul-op38,MatMul-op38,MatMul,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 120]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120, 400]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 400]}}",11 +47,72,30,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul-op64,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul-op64,MatMul-op64,MatMul,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 120]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 400]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120, 400]}}",11 +48,72,1,Default/AtomicAddrClean-op191,Default/AtomicAddrClean-op191,AtomicAddrClean-op191,AtomicAddrClean,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120]}}",11 +49,72,16,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op61,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op61,BiasAddGrad-op61,BiasAddGrad,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 120]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120]}}",11 +50,72,1,Default/network-TrainOneStepCell/optimizer-Momentum/ApplyMomentumD-op106,Default/network-TrainOneStepCell/optimizer-Momentum/ApplyMomentumD-op106,ApplyMomentumD-op106,ApplyMomentumD,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120]}, ""input_2"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""input_3"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120]}, ""input_4"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120]}, ""output_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120]}}",11 +51,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/flatten-Flatten/gradReshape/TransData-op155,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/flatten-Flatten/gradReshape/TransData-op155,TransData-op155,TransData,Gradients,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [32, 16, 5, 5]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 5, 5, 16]}}",11 +52,72,13,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/gradMaxPool/Cast-op178,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/gradMaxPool/Cast-op178,Cast-op178,Cast,Gradients,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 5, 5, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 5, 5, 16]}}",11 +53,72,3,Default/AtomicAddrClean-op192,Default/AtomicAddrClean-op192,AtomicAddrClean-op192,AtomicAddrClean,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 10, 10, 16]}}",11 +54,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/gradMaxPool/MaxPoolGradWithArgmax-op107,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/gradMaxPool/MaxPoolGradWithArgmax-op107,MaxPoolGradWithArgmax-op107,MaxPoolGradWithArgmax,Gradients,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 10, 10, 16]}, ""input_1"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 5, 5, 16]}, ""input_2"": {""format"": ""NC1HWC0"", ""data_type"": ""UINT16"", ""shape"": [32, 1, 4, 3, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 10, 10, 16]}}",11 +55,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/gradMaxPool/Cast-op174,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/gradMaxPool/Cast-op174,Cast-op174,Cast,Gradients,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 10, 10, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 10, 10, 16]}}",11 +56,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/gradConv2D/FusionOp_ReluGradV2_Cast-op180,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/gradConv2D/FusionOp_ReluGradV2_Cast-op180,FusionOp_ReluGradV2_Cast-op180,FusionOp_ReluGradV2_Cast,Gradients,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 10, 10, 16]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""UINT8"", ""shape"": [32, 1, 10, 10, 2]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 10, 10, 16]}}",11 +57,72,32,Default/network-TrainOneStepCell/optimizer-Momentum/ApplyMomentumD-op109,Default/network-TrainOneStepCell/optimizer-Momentum/ApplyMomentumD-op109,ApplyMomentumD-op109,ApplyMomentumD,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120, 400]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120, 400]}, ""input_2"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""input_3"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120, 400]}, ""input_4"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120, 400]}, ""output_1"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": [120, 400]}}",11 +58,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/gradConv2D/Conv2DBackpropInputD-op83,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/gradConv2D/Conv2DBackpropInputD-op83,Conv2DBackpropInputD-op83,Conv2DBackpropInputD,Gradients,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 10, 10, 16]}, ""input_1"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT16"", ""shape"": [25, 1, 16, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 14, 14, 16]}}",11 +59,72,1,Default/AtomicAddrClean-op190,Default/AtomicAddrClean-op190,AtomicAddrClean-op190,AtomicAddrClean,Default,"{""input_0"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}}",11 +60,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/gradConv2D/Conv2DBackpropFilterD-op86,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/gradConv2D/Conv2DBackpropFilterD-op86,Conv2DBackpropFilterD-op86,Conv2DBackpropFilterD,Gradients,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 10, 10, 16]}, ""input_1"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 14, 14, 16]}, ""output_0"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}}",11 +61,72,7,Default/network-TrainOneStepCell/optimizer-Momentum/ApplyMomentumD-op110,Default/network-TrainOneStepCell/optimizer-Momentum/ApplyMomentumD-op110,ApplyMomentumD-op110,ApplyMomentumD,Default,"{""input_0"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}, ""input_1"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}, ""input_2"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""input_3"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}, ""input_4"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""output_0"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}, ""output_1"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}}",11 +62,72,17,Default/AtomicAddrClean-op197,Default/AtomicAddrClean-op197,AtomicAddrClean-op197,AtomicAddrClean,Default,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 28, 28, 16]}}",11 +63,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/gradMaxPool/MaxPoolGradWithArgmax-op111,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/gradMaxPool/MaxPoolGradWithArgmax-op111,MaxPoolGradWithArgmax-op111,MaxPoolGradWithArgmax,Gradients,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 28, 28, 16]}, ""input_1"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 14, 14, 16]}, ""input_2"": {""format"": ""NC1HWC0"", ""data_type"": ""UINT16"", ""shape"": [32, 1, 4, 14, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 28, 28, 16]}}",11 +64,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/gradMaxPool/Cast-op175,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/gradMaxPool/Cast-op175,Cast-op175,Cast,Gradients,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 28, 28, 16]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 28, 28, 16]}}",11 +65,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/gradConv2D/FusionOp_ReluGradV2_Cast-op185,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/gradConv2D/FusionOp_ReluGradV2_Cast-op185,FusionOp_ReluGradV2_Cast-op185,FusionOp_ReluGradV2_Cast,Gradients,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT32"", ""shape"": [32, 1, 28, 28, 16]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""UINT8"", ""shape"": [32, 1, 28, 28, 2]}, ""output_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 28, 28, 16]}}",11 +66,72,1,Default/AtomicAddrClean-op194,Default/AtomicAddrClean-op194,AtomicAddrClean-op194,AtomicAddrClean,Default,"{""input_0"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}}",11 +67,72,32,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/gradConv2D/Conv2DBackpropFilterD-op84,Gradients/Default/network-TrainOneStepCell/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/gradConv2D/Conv2DBackpropFilterD-op84,Conv2DBackpropFilterD-op84,Conv2DBackpropFilterD,Gradients,"{""input_0"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 28, 28, 16]}, ""input_1"": {""format"": ""NC1HWC0"", ""data_type"": ""FLOAT16"", ""shape"": [32, 1, 32, 32, 16]}, ""output_0"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}}",11 +68,72,7,Default/network-TrainOneStepCell/optimizer-Momentum/ApplyMomentumD-op113,Default/network-TrainOneStepCell/optimizer-Momentum/ApplyMomentumD-op113,ApplyMomentumD-op113,ApplyMomentumD,Default,"{""input_0"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}, ""input_1"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}, ""input_2"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""input_3"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}, ""input_4"": {""format"": ""DEFAULT"", ""data_type"": ""FLOAT32"", ""shape"": []}, ""output_0"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}, ""output_1"": {""format"": ""FRAC_Z"", ""data_type"": ""FLOAT32"", ""shape"": [25, 1, 16, 16]}}",11 +69,72,1,Default/Profiling-op210,Default/Profiling-op210,Profiling-op210,Profiling,Default,{},11 +70,72,1,Default/Profiling-op211,Default/Profiling-op211,Profiling-op211,Profiling,Default,{},11 +71,72,1,Default/AssignAdd-op206,Default/AssignAdd-op206,AssignAdd-op206,AssignAdd,Default,"{""input_0"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [1]}, ""input_1"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [1]}, ""output_0"": {""format"": ""DEFAULT"", ""data_type"": ""INT32"", ""shape"": [1]}}",11 +72,72,1,Default/StreamActive-op207,Default/StreamActive-op207,StreamActive-op207,StreamActive,Default,{},11 diff --git a/tests/utils/resource/run_2/normal_run/profiler/aicpu_intermediate_1.csv b/tests/utils/resource/run_2/normal_run/profiler/aicpu_intermediate_1.csv index 49023581..062d1583 100644 --- a/tests/utils/resource/run_2/normal_run/profiler/aicpu_intermediate_1.csv +++ b/tests/utils/resource/run_2/normal_run/profiler/aicpu_intermediate_1.csv @@ -1,4 +1,4 @@ -serial_number,op_type,total_time,dispatch_time,execution_time,run_start,run_end +serial_number,kernel_type,total_time,dispatch_time,execution_time,run_start,run_end 1,InitData,7.906,0.15,0.22,154901853050,154901853210 2,GetNext,0.888,0.132,0.22,154907895189,154907895707 3,EndOfSequence,0.282,0.078,0.22,154907896287,154907896335 -- Gitee From 280f3f50fc8050f25e5f28096d9d77b1193a96b2 Mon Sep 17 00:00:00 2001 From: z30020733 Date: Tue, 16 Jan 2024 10:12:17 +0800 Subject: [PATCH 05/19] Ascend timeline normalization and optimization. --- .../analyser/msprof_timeline_analyser.py | 617 +++++++++++++----- 1 file changed, 448 insertions(+), 169 deletions(-) diff --git a/mindinsight/profiler/analyser/msprof_timeline_analyser.py b/mindinsight/profiler/analyser/msprof_timeline_analyser.py index 0883eed8..9920870d 100644 --- a/mindinsight/profiler/analyser/msprof_timeline_analyser.py +++ b/mindinsight/profiler/analyser/msprof_timeline_analyser.py @@ -44,8 +44,6 @@ def get_absolute_ts_start_info(pro_path) -> float: info = json.load(f) ts_us = Decimal(info.get("collectionTimeBegin", 0)).quantize(Decimal('0.000')) ts_ns = Decimal(info.get("clockMonotonicRaw", 0)).quantize(Decimal('0.000')) - if not ts_us and not ts_ns: - return 0 return ts_us - ts_ns / Decimal(1000) return 0 @@ -202,9 +200,10 @@ class MsprofTimelineOldAnalyser(BaseAnalyser): if event_name.startswith('Iteration') and len(event_name.split(' ')) == 2: event['name'] = f"{tids.get(event.get('tid'))} {event_name}" - if difference_ts and event.get('ts'): + if event.get('ts'): ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) - event['ts'] = str(ts + difference_ts) + ts += difference_ts + event['ts'] = str(ts) event['tid'] = pid @@ -216,8 +215,9 @@ class MsprofTimelineOldAnalyser(BaseAnalyser): for event in raw_data: if (event.get('name') == 'process_name' and event.get("ph") == "M") or \ event.get('tid') in tids: - if difference_ts and event.get('ts'): - event['ts'] += difference_ts + if event.get('ts'): + ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) + event['ts'] = str(ts + difference_ts) new_events.append(event) return new_events @@ -316,10 +316,10 @@ class MsprofTimelineOldAnalyser(BaseAnalyser): if event.get('name') in tid_mapper: event['pid'] = pid event['tid'] = tid_mapper.get(event.get('name')) - - if difference_ts and event.get('ts'): + if event.get('ts'): ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) - event['ts'] = str(ts + difference_ts) + ts += difference_ts + event['ts'] = str(ts) new_events.append(event) return new_events @@ -392,10 +392,10 @@ class MsprofTimelineOldAnalyser(BaseAnalyser): continue event['pid'] = pid - - if difference_ts and event.get('ts'): + if event.get('ts'): ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) - event['ts'] = str(ts + difference_ts) + ts += difference_ts + event['ts'] = str(ts) new_events.append(event) return new_events @@ -471,7 +471,7 @@ class MsprofTimelineOldAnalyser(BaseAnalyser): continue event['pid'] = pid - if difference_ts and event.get('ts'): + if event.get('ts'): ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) event['ts'] = str(ts + difference_ts) new_events.append(event) @@ -661,6 +661,131 @@ class MsprofTimelineAnalyser(BaseAnalyser): Analyse timeline data from file. """ + def __init__(self, profiling_dir, device_id=None): + super(MsprofTimelineAnalyser, self).__init__(profiling_dir, device_id) + self.top_scope_name = ('Default', 'Gradients', 'recompute_Default') + self.step_trace_index = 1 + self.cann_index = 2 + self.scope_index = 3 + self.ascend_hardware_index = 4 + self.hccl_index = 5 + self.cpu_index = 6 + self.overlap_index = 7 + + def get_merged_timeline(self, rank_list, model_list, kind, merge_model=True, scope_name=False): + """ + Get the merged timeline + """ + + # get all job path, like PROF_* + sub_dirs = get_job_dir(self._profiling_dir) + + if rank_list: + new_sub_dirs = {} + for key, value in sub_dirs.items(): + if key in rank_list: + new_sub_dirs[key] = value + sub_dirs = new_sub_dirs + + if not sub_dirs: + logger.error('Could not found any rank from %s', rank_list) + return [] + + if kind == 'summary': + start = time.time() + summary_data = self._get_summary_timeline_data(sub_dirs, merge_model) + logger.info("Summary timeline time consuming: %s", time.time() - start) + return summary_data + + if kind == 'detail': + start = time.time() + detail_data = self._get_detail_timeline_data(sub_dirs, model_list, merge_model, scope_name) + logger.info("Detail timeline time consuming: %s", time.time() - start) + return detail_data + return [] + + def parse_cpu_timeline(self, file_list, rank_id, difference_ts, scope_name): + """Load cpu operator data from file""" + ms_to_us = 1e3 + ps_to_ns = 1e-3 + new_pid = int(f'{self.cpu_index}{rank_id}') + process_list = [{"name": "process_name", + "pid": new_pid, + "args": { + "name": f"CPU OP Rank{rank_id}" + }, + "ph": "M" + }, {"name": "process_sort_index", "pid": new_pid, + "args": {"sort_index": self.cpu_index}, "ph": "M"} + ] + tid_set = set() + thread_list = [] + new_timeline = [] + scope_data = [] + try: + flags = os.O_RDONLY + for file_path in file_list: + with os.fdopen(os.open(file_path, flags, 0o400), 'r') as fr: + for line in fr: + op_list = line.strip().split(';') + op_full_name = op_list[0] + time_arr = op_list[-1] + time_arr = time_arr.split(" ") + for time_str in time_arr: + ts, dur, tid = time_str.split(",") + ts = Decimal(ts).quantize(Decimal('0.000')) * Decimal(ps_to_ns).quantize(Decimal('0.000')) + + if scope_name and op_full_name and op_full_name.startswith(self.top_scope_name): + te = ts + Decimal(dur).quantize(Decimal('0.000')) + scope_data.append((op_full_name.split('/')[:-1], ts, te)) + + ts += difference_ts + + if int(tid) not in tid_set: + tid_set.add(int(tid)) + thread_list.append({"name": "thread_name", + "pid": new_pid, + "tid": int(tid), + "ph": "M", + 'args': {'name': f'thread {tid}'} + }) + + new_timeline.append({'name': op_list[0], + 'pid': new_pid, + 'tid': int(tid), + 'ph': 'X', + 'ts': str(ts), + 'dur': float(dur) * ms_to_us, + 'args': + {'type': op_list[1]} + }) + break + + return process_list + thread_list + new_timeline, scope_data + + except ValidationError as err: + logger.error('parse_cann_data failed! please theck. detail: %s', err) + raise ValidationError from err + + except (IOError, OSError, json.JSONDecodeError) as err: + logger.error('parse_cann_data failed! please theck. detail: %s', err) + return [] + + def get_option(self): + """ + Get the option values + """ + # get all job path, like PROF_* + sub_dirs = get_job_dir(self._profiling_dir) + rank_list = list(sub_dirs.keys()) + rank_list.sort() + + _, model_merged = self._get_models(sub_dirs) + model_list = list(model_merged) + model_list.sort() + + return {'rank_list': rank_list, 'model_list': model_list} + def _load(self): """Load data according to the parsed profiling files.""" @@ -678,7 +803,7 @@ class MsprofTimelineAnalyser(BaseAnalyser): """ pattern1 = re.compile(r'Step Trace\(Model ID:(\d)+\)') pattern2 = re.compile(r'(\d)+') - metadata = {} + tid_mapper = {} pid = None for event in raw_data: if event.get("ph") != "M": @@ -686,9 +811,6 @@ class MsprofTimelineAnalyser(BaseAnalyser): if event.get('name') == 'process_name': pid = event.get('pid') - - if pid not in metadata: - metadata[pid] = {} continue if event.get('name') == 'thread_name': @@ -702,13 +824,12 @@ class MsprofTimelineAnalyser(BaseAnalyser): model_id = model_id.group() tid = event.get('tid') if not model_list or int(model_id) in model_list: - metadata[event.get('pid')][tid] = f'Model {model_id}' - - return metadata, pid + tid_mapper[tid] = f'Model {model_id}' - def _parse_step_trace_merge(self, rank_id, old_pid, new_pid, raw_data, metadata, difference_ts): - """parse data with merge model mode """ + return tid_mapper, pid + def _parse_step_trace_merge(self, old_pid, new_pid, rank_id, raw_data, tid_mapper, difference_ts): + """merge step trace data""" new_events = [{ "name": "process_name", "pid": new_pid, @@ -716,6 +837,13 @@ class MsprofTimelineAnalyser(BaseAnalyser): "name": f"Step Trace Rank{rank_id}" }, "ph": "M" + }, { + "name": "process_sort_index", + "pid": new_pid, + "args": { + "sort_index": self.step_trace_index + }, + "ph": "M" }, { "name": "thread_name", "pid": new_pid, @@ -727,7 +855,7 @@ class MsprofTimelineAnalyser(BaseAnalyser): }] for event in raw_data: - arg_name = metadata.get(old_pid, {}).get(event.get('tid')) + arg_name = tid_mapper.get(event.get('tid')) if event.get('ph') == 'M' or event.get('pid') != old_pid or not arg_name: continue @@ -736,30 +864,32 @@ class MsprofTimelineAnalyser(BaseAnalyser): event_name.split(' ')) == 2: event['name'] = f"{arg_name} {event_name}" - if difference_ts and event.get('ts'): + if event.get('ts'): ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) - event['ts'] = str(ts + difference_ts) + ts += difference_ts + event['ts'] = str(ts) event['pid'] = new_pid event['tid'] = 0 new_events.append(event) return new_events - def _parse_step_trace_notmerge(self, rank_id, old_pid, new_pid, raw_data, metadata, difference_ts): - """parse data with not merge model mode """ - + def _parse_step_trace_not_merge(self, old_pid, new_pid, rank_id, raw_data, tid_mapper, difference_ts): + """not merge step trace data""" new_events = [] for event in raw_data: - arg_name = metadata.get(old_pid, {}).get(event.get('tid')) + arg_name = tid_mapper.get(event.get('tid')) if event.get('pid') != old_pid or not arg_name: continue if event.get('name') == 'process_name' and event.get('ph') == 'M': event['args']['name'] = f"Step Trace Rank{rank_id}" + elif event.get('name') == 'process_sort_index' and event.get('ph') == 'M': + event['args']['sort_index'] = self.step_trace_index event['pid'] = new_pid - - if difference_ts and event.get('ts'): + if event.get('ts'): ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) - event['ts'] = str(ts + difference_ts) + ts += difference_ts + event['ts'] = str(ts) new_events.append(event) return new_events @@ -774,15 +904,18 @@ class MsprofTimelineAnalyser(BaseAnalyser): with os.fdopen(os.open(file_path, flags, 0o400), 'r') as fr: raw_data.extend(json.load(fr)) - metadata, old_pid = self._parse_step_trace_metadata(raw_data, model_list) - if not metadata: + tid_mapper, pid = self._parse_step_trace_metadata(raw_data, model_list) + if not pid: logger.error('Could not found process_name pid. method: _parse_step_trace_data') return [] - new_pid = int(f'2{rank_id}') + new_pid = int(f'{self.step_trace_index}{rank_id}') + if merge_model: - return self._parse_step_trace_merge(rank_id, old_pid, new_pid, raw_data, metadata, difference_ts) - return self._parse_step_trace_notmerge(rank_id, old_pid, new_pid, raw_data, metadata, difference_ts) + return self._parse_step_trace_merge(pid, new_pid, rank_id, raw_data, tid_mapper, difference_ts) + + return self._parse_step_trace_not_merge(pid, new_pid, rank_id, raw_data, tid_mapper, difference_ts) + except ValidationError as err: logger.error('parse_step_trace_data failed! please theck. detail: %s', err) raise ValidationError from err @@ -810,11 +943,11 @@ class MsprofTimelineAnalyser(BaseAnalyser): break if not pid: - print('Could not found process_name pid. method: _parse_overlap_analysis_data') + logger.warning('Could not found process_name pid. method: _parse_overlap_analysis_data') return [] new_events = [] - new_pid = int(f'1{rank_id}') + new_pid = int(f'{self.overlap_index}{rank_id}') for event in raw_data: if event.get('pid') != pid: continue @@ -822,81 +955,103 @@ class MsprofTimelineAnalyser(BaseAnalyser): if event.get('name') == 'process_name' and event.get("ph") == "M": event["args"]["name"] += f" Rank{rank_id}" + if event.get('name') == 'process_sort_index' and event.get("ph") == "M": + event["args"]["sort_index"] = self.overlap_index + event['pid'] = new_pid - if difference_ts and event.get('ts'): + if event.get('ts'): ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) - event['ts'] = str(ts + difference_ts) + ts += difference_ts + event['ts'] = str(ts) new_events.append(event) return new_events except ValidationError as err: - print('parse_overlap_analysis_data failed! please theck. detail: %s', err) + logger.error('parse_overlap_analysis_data failed! please theck. detail: %s', err) raise ValidationError from err except (IOError, OSError, json.JSONDecodeError) as err: - print('parse_overlap_analysis_data failed! please theck. detail: %s', err) + logger.error('parse_overlap_analysis_data failed! please theck. detail: %s', err) return [] - def _parse_ascend_hardware_data(self, file_list, rank_id, difference_ts, model_list): + def _parse_ascend_hardware_metadata(self, new_pid, raw_data): + """ + Get ascend hardware by merge models + """ + tid_mapper = {} + pid = None + for event in raw_data: + if event.get('name') == 'process_name' and event.get("ph") == "M" and \ + event.get('args').get('name') == 'Ascend Hardware': + pid = event.get('pid') + + elif event.get('name') == 'thread_name' and event.get("ph") == "M" and \ + 'Stream' in event.get('args').get('name'): + event['pid'] = new_pid + tid_mapper.update({event.get('tid'): event}) + return pid, tid_mapper + + def _parse_ascend_hardware_data(self, file_list, rank_id, difference_ts, model_list, scope_name): """ parse ascend hardware data """ + flags = os.O_RDONLY + raw_data = [] + + new_events = [] + tid_set = set() + new_pid = int(f'{self.ascend_hardware_index}{rank_id}') + new_metadata = [{ + "name": "process_name", + "pid": new_pid, + "args": { + "name": f"Ascend Hardware Rank{rank_id}" + }, + "ph": "M" + }, {"name": "process_sort_index", "pid": new_pid, + "args": {"sort_index": self.ascend_hardware_index}, "ph": "M"}] + scope_data = [] + model_id_set = set() try: - flags = os.O_RDONLY - raw_data = [] for file_path in file_list: with os.fdopen(os.open(file_path, flags, 0o400), 'r') as fr: raw_data.extend(json.load(fr)) - new_events = [] - pid = None - tid_mapper = {} - tid_set = set() - new_pid = int(f'3{rank_id}') - for event in raw_data: - if event.get('name') == 'process_name' and event.get("ph") == "M" and \ - event.get('args').get('name') == 'Ascend Hardware': - pid = event.get('pid') - continue - - if event.get('name') == 'thread_name' and event.get("ph") == "M" and \ - 'Stream' in event.get('args').get('name'): - event['pid'] = new_pid - tid_mapper.update({event.get('tid'): event}) - continue - - if event.get("ph") != "M": - model_id = event.get('args', {}).get('Model Id') - tid = event.get('tid') - if model_list and model_id not in model_list: - continue - - if difference_ts and event.get('ts'): - ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) - event['ts'] = str(ts + difference_ts) - event['pid'] = new_pid - tid_set.add(tid) - new_events.append(event) + pid, tid_mapper = self._parse_ascend_hardware_metadata(new_pid, raw_data) if not pid: logger.error('Could not found process_name pid. method: _parse_ascend_hardware_data') return [] - new_metadata = [{ - "name": "process_name", - "pid": new_pid, - "args": { - "name": f"Ascend Hardware Rank{rank_id}" - }, - "ph": "M" - }] + for event in raw_data: + model_id = event.get('args', {}).get('Model Id') + model_id_set.add(model_id) + if event.get("ph") == "M" or (model_list and model_id not in model_list): + continue - for tid in tid_set: - new_metadata.append(tid_mapper.get(tid)) + op_full_name = event.get('name') + if scope_name and op_full_name and op_full_name.startswith(self.top_scope_name): + ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) + te = ts + Decimal(event.get('dur')).quantize(Decimal('0.000')) + scope_data.append((op_full_name.split('/')[:-1], ts, te)) - return new_metadata + new_events + if event.get('ts'): + ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) + ts += difference_ts + event['ts'] = str(ts) + event['pid'] = new_pid + tid_set.add(event.get('tid')) + new_events.append(event) + + for tid in tid_set: + thread_event = tid_mapper.get(tid) + if thread_event is None: + thread_event = {"name": "thread_name", "pid": new_pid, + "tid": tid, "args": {"name": f"Stream {tid}"}, "ph": "M"} + new_metadata.append(thread_event) + return new_metadata + new_events, scope_data except ValidationError as err: logger.error('parse_ascend_hardware_data failed! please theck. detail: %s', err) @@ -921,37 +1076,40 @@ class MsprofTimelineAnalyser(BaseAnalyser): tid_mapper = {} tid_set = set() new_events = [] - new_pid = int(f'4{rank_id}') + new_pid = int(f'{self.hccl_index}{rank_id}') + model_id_set = set() + for event in raw_data: if event.get('name') == 'process_name' and event.get("ph") == "M" and \ event.get('args').get('name') == 'HCCL': pid = event.get('pid') - continue - if event.get('name') == 'thread_name' and event.get("ph") == "M" and \ + elif event.get('name') == 'thread_name' and event.get("ph") == "M" and \ ('Plane' in event.get('args').get('name') or 'Communication' in event.get('args').get('name')): event['pid'] = new_pid tid_mapper.update({event.get('tid'): event}) - continue - - if event.get("ph") != "M": - model_id = event.get('args', {}).get('model id') - tid = event.get('tid') - if model_list and model_id not in model_list: - continue - - if difference_ts and event.get('ts'): - ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) - event['ts'] = str(ts + difference_ts) - - event['pid'] = new_pid - tid_set.add(tid) - new_events.append(event) if not pid: logger.error('Could not found process_name pid. method: _parse_hccl_data') return [] + for event in raw_data: + model_id = event.get('args', {}).get('model id') + model_id_set.add(model_id) + if event.get("ph") == "M" or (model_list and model_id not in model_list): + continue + + if event.get('ts'): + ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) + ts += difference_ts + event['ts'] = str(ts) + + event['pid'] = new_pid + new_events.append(event) + + tid = event.get('tid') + tid_set.add(tid) + new_metadata = [{ "name": "process_name", "pid": new_pid, @@ -959,11 +1117,11 @@ class MsprofTimelineAnalyser(BaseAnalyser): "name": f"HCCL Rank{rank_id}" }, "ph": "M" - }] + }, {"name": "process_sort_index", "pid": new_pid, + "args": {"sort_index": self.hccl_index}, "ph": "M"}] for tid in tid_set: new_metadata.append(tid_mapper.get(tid)) - return new_metadata + new_events except ValidationError as err: @@ -974,6 +1132,137 @@ class MsprofTimelineAnalyser(BaseAnalyser): logger.error('parse_hccl_data failed! please theck. detail: %s', err) return [] + def _parse_cann_data(self, file_list, rank_id, difference_ts): + """ + pid: 1 rank + """ + try: + flags = os.O_RDONLY + raw_data = [] + for file_path in file_list: + with os.fdopen(os.open(file_path, flags, 0o400), 'r') as fr: + raw_data.extend(json.load(fr)) + + pid = None + for event in raw_data: + if event.get('name') == 'process_name' and event.get("ph") == "M" and \ + event.get('args').get('name') == 'CANN': + pid = event.get('pid') + break + + if not pid: + logger.warning('Could not found process_name pid. method: _parse_cann_data') + return [] + + new_events = [] + new_pid = int(f'{self.cann_index}{rank_id}') + for event in raw_data: + if event.get('pid') != pid: + continue + if event.get('name') == 'process_name' and event.get("ph") == "M": + event["args"]["name"] += f" Rank{rank_id}" + + if event.get('name') == 'process_sort_index' and event.get("ph") == "M": + event["args"]["sort_index"] = self.cann_index + + event['pid'] = new_pid + if event.get('ts'): + ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) + ts += difference_ts + event['ts'] = str(ts) + + new_events.append(event) + + return new_events + + except ValidationError as err: + logger.error('parse_cann_data failed! please theck. detail: %s', err) + raise ValidationError from err + + except (IOError, OSError, json.JSONDecodeError) as err: + logger.error('parse_cann_data failed! please theck. detail: %s', err) + return [] + + def _parse_scope_info(self, scope_data, rank_id, difference_ts): + """parse scope layer""" + if not scope_data: + return [] + new_pid = int(f'{self.scope_index}{rank_id}') + scope_data.sort(key=lambda x: x[1]) + process_list = [ + {"name": "process_name", + "pid": new_pid, + "args": { + "name": f"Scope Layer Rank{rank_id}" + }, + "ph": "M"}, + {"name": "process_sort_index", + "pid": new_pid, + "args": {"sort_index": self.scope_index}, + "ph": "M"} + ] + + new_events = [] + layer_stack = [] + for layer_name in scope_data[0][0]: + layer_stack.append([layer_name, scope_data[0][1], scope_data[0][2]]) + + for op in scope_data[1:]: + if op[1] < layer_stack[0][2]: + # 并行算子只保留前面的 + continue + flag = True # 判断上层是否合并, 上层不合并下层也不合并 + for layer_depth, layer_name in enumerate(op[0]): + if layer_depth >= len(layer_stack): + layer_stack.append([layer_name, op[1], op[2]]) + else: + if layer_stack[layer_depth][0] == layer_name and flag: + layer_stack[layer_depth][2] = op[2] # 合并 + else: + ts = layer_stack[layer_depth][1] + ts += difference_ts + new_events.append({ + "name": layer_stack[layer_depth][0], + "pid": new_pid, + "tid": layer_depth, + "ph": "X", + "ts": str(ts), + "dur": float(layer_stack[layer_depth][2] - layer_stack[layer_depth][1]) + }) + layer_stack[layer_depth] = [layer_name, op[1], op[2]] + flag = False + + thread_list = [] + for index, layer in enumerate(layer_stack): + thread_list.extend([{ + "name": "thread_name", + "pid": new_pid, + "tid": index, + "args": { + "name": f"layer{index}" + }, + "ph": "M" + }, { + "name": "thread_sort_index", + "pid": new_pid, + "tid": index, + "args": {"sort_index": index}, + "ph": "M" + }]) + if layer: + ts = layer[1] + ts += difference_ts + new_events.append({ + "name": layer[0], + "pid": new_pid, + "tid": index, + "ph": "X", + "ts": str(ts), + "dur": float(layer[2] - layer[1]) + }) + + return process_list + thread_list + new_events + def _get_summary_timeline_data(self, sub_dirs, merge_model): """ Get summary timeline @@ -1006,7 +1295,6 @@ class MsprofTimelineAnalyser(BaseAnalyser): rank_id, difference_ts)) all_done = list(range(len(task_list))) - print(all_done) while all_done: for ind, t in enumerate(task_list): if ind in all_done and t.done(): @@ -1015,7 +1303,7 @@ class MsprofTimelineAnalyser(BaseAnalyser): return timeline_data - def _get_detail_timeline_data(self, sub_dirs, model_list, merge_model): + def _get_detail_timeline_data(self, sub_dirs, model_list, merge_model, scope_name): """ Get detail timeline Returns: @@ -1024,8 +1312,19 @@ class MsprofTimelineAnalyser(BaseAnalyser): timeline_data = [] task_list = [] + + _, model_merged = self._get_models(sub_dirs) + model_list_all = list(model_merged) + if model_list_all: + model_list_all.sort() + if model_list: + model_list.sort() + if model_list_all == model_list: + model_list = None + with ThreadPoolExecutor() as pool: for rank_id, (job_dir, difference_ts) in sub_dirs.items(): + all_scope_data = [] # 所有带scope的算子 # get step_trace data step_trace_file_name = fr'{job_dir}/timeline/step_trace_*.json' @@ -1034,12 +1333,7 @@ class MsprofTimelineAnalyser(BaseAnalyser): logger.error('Could not find step trace file in %s/device_%s/timeline', job_dir, rank_id) else: task_list.append(pool.submit(self._parse_step_trace_data, get_newest_file(file_list_step_trace), - rank_id, difference_ts, model_list, - merge_model)) - - # timeline_data.extend(self._parse_step_trace_data(get_newest_file(file_list_step_trace), - # rank_id, difference_ts, model_list, - # merge_model)) + rank_id, difference_ts, model_list, merge_model)) # get Ascend Hardware hardware_file_name = fr'{job_dir}/timeline/task_time_*.json' @@ -1047,11 +1341,11 @@ class MsprofTimelineAnalyser(BaseAnalyser): if not file_list_hardware: logger.error('Could not find ascend hardware file in %s/device_%s/timeline', job_dir, rank_id) else: - task_list.append(pool.submit(self._parse_ascend_hardware_data, get_newest_file(file_list_hardware), - rank_id, difference_ts, model_list)) - - # timeline_data.extend(self._parse_ascend_hardware_data(get_newest_file(file_list_hardware), - # rank_id, difference_ts, model_list)) + ascend_timeline, scope_data = self._parse_ascend_hardware_data(get_newest_file(file_list_hardware), + rank_id, difference_ts, model_list, + scope_name) + timeline_data.extend(ascend_timeline) + all_scope_data.extend(scope_data) # get hccl hccl_file_name = fr'{job_dir}/timeline/hccl_*.json' @@ -1062,11 +1356,43 @@ class MsprofTimelineAnalyser(BaseAnalyser): task_list.append(pool.submit(self._parse_hccl_data, get_newest_file(file_list_hccl), rank_id, difference_ts, model_list)) - # timeline_data.extend(self._parse_hccl_data(get_newest_file(file_list_hccl), - # rank_id, difference_ts, model_list)) + if not model_list: + # get CANN + cann_file_name = fr'{job_dir}/timeline/msprof_*.json' + file_list = glob.glob(cann_file_name) + + if not file_list: + logger.error('Could not find overlap analysis file in %s/device_%s/timeline', job_dir, rank_id) + else: + task_list.append(pool.submit(self._parse_cann_data, get_newest_file(file_list), + rank_id, difference_ts)) + + # get overlap analysis + overlap_file_name = fr'{job_dir}/timeline/msprof_*.json' + file_list = glob.glob(overlap_file_name) + if not file_list: + logger.error('Could not find overlap analysis file in %s/device_%s/timeline', job_dir, rank_id) + else: + task_list.append(pool.submit(self._parse_overlap_analysis_data, get_newest_file(file_list), + rank_id, difference_ts)) + + # get cpu op + cpu_op_file_name = fr'{self._profiling_dir}/cpu_op_execute_timestamp_{rank_id}.txt' + file_list = glob.glob(cpu_op_file_name) + + if not file_list: + logger.warning('Could not find cpu op file in %s', job_dir) + else: + cpu_timeline, scope_data = self.parse_cpu_timeline(get_newest_file(file_list), + rank_id, difference_ts, scope_name) + timeline_data.extend(cpu_timeline) + all_scope_data.extend(scope_data) + + # parse scope info + task_list.append(pool.submit(self._parse_scope_info, all_scope_data, + rank_id, difference_ts)) all_done = list(range(len(task_list))) - print(all_done) while all_done: for ind, t in enumerate(task_list): if ind in all_done and t.done(): @@ -1075,38 +1401,6 @@ class MsprofTimelineAnalyser(BaseAnalyser): return timeline_data - def get_merged_timeline(self, rank_list, model_list, kind, merge_model=True): - """ - Get the merged timeline - """ - - # get all job path, like PROF_* - sub_dirs = get_job_dir(self._profiling_dir) - - if rank_list: - new_sub_dirs = {} - for key, value in sub_dirs.items(): - if key in rank_list: - new_sub_dirs[key] = value - sub_dirs = new_sub_dirs - - if not sub_dirs: - logger.error('Could not found any rank from %s', rank_list) - return [] - - if kind == 'summary': - start = time.time() - summary_data = self._get_summary_timeline_data(sub_dirs, merge_model) - print(time.time() - start) - return summary_data - - if kind == 'detail': - start = time.time() - detail_data = self._get_detail_timeline_data(sub_dirs, model_list, merge_model) - print(time.time() - start) - return detail_data - return [] - def _get_models(self, sub_dirs): """ Get all models @@ -1128,18 +1422,3 @@ class MsprofTimelineAnalyser(BaseAnalyser): model_dict[rank_id] = model_set model_merged.update(model_set) return model_dict, model_merged - - def get_option(self): - """ - Get the option values - """ - # get all job path, like PROF_* - sub_dirs = get_job_dir(self._profiling_dir) - rank_list = list(sub_dirs.keys()) - rank_list.sort() - - _, model_merged = self._get_models(sub_dirs) - model_list = list(model_merged) - model_list.sort() - - return {'rank_list': rank_list, 'model_list': model_list} -- Gitee From 70c142f44f9401494310b99413889dbe493ce579 Mon Sep 17 00:00:00 2001 From: z30020733 Date: Tue, 16 Jan 2024 20:56:14 +0800 Subject: [PATCH 06/19] Timeline normalization and optimization for UI --- mindinsight/backend/profiler/profile_api.py | 11 +- mindinsight/ui/src/locales/en-us.json | 1 + mindinsight/ui/src/locales/zh-cn.json | 1 + .../performance/performance-dashboard.vue | 285 ++---------------- 4 files changed, 36 insertions(+), 262 deletions(-) diff --git a/mindinsight/backend/profiler/profile_api.py b/mindinsight/backend/profiler/profile_api.py index e7bc3b7b..4f8c9918 100644 --- a/mindinsight/backend/profiler/profile_api.py +++ b/mindinsight/backend/profiler/profile_api.py @@ -585,6 +585,7 @@ def get_msprof_timeline(): model_list = request.args.get("model_list", None) kind = request.args.get("kind", None) merge_model = request.args.get("merge_model", True) + scope_name = request.args.get("scope_name", False) if rank_list: rank_list = [int(rank_id) for rank_id in rank_list.split(',')] @@ -600,16 +601,20 @@ def get_msprof_timeline(): else: merge_model = True + if scope_name == 'false': + scope_name = False + else: + scope_name = True + flag = get_all_export_flag(profiler_dir_abs) if flag: analyser = AnalyserFactory.instance().get_analyser( 'msprof_timeline', profiler_dir_abs, None) + timeline = analyser.get_merged_timeline(rank_list, model_list, kind, merge_model, scope_name) else: analyser = AnalyserFactory.instance().get_analyser( 'msprof_timeline_old', profiler_dir_abs, None) - - timeline = analyser.get_merged_timeline(rank_list, model_list, kind, merge_model) - + timeline = analyser.get_merged_timeline(rank_list, model_list, kind, merge_model) return jsonify(timeline) diff --git a/mindinsight/ui/src/locales/en-us.json b/mindinsight/ui/src/locales/en-us.json index 3382a5b7..99e4a824 100644 --- a/mindinsight/ui/src/locales/en-us.json +++ b/mindinsight/ui/src/locales/en-us.json @@ -630,6 +630,7 @@ "modelList": "subgraphs: ", "kind": "data kind: ", "mergeModel": "merge subgraphs: ", + "scopeName": "split op scope layer: ", "select": "default select all", "flopsScopeTipOne": "Node movement: Drag the scope node to move it to the specified position.", "flopsScopeTipTwo": "Reset: Restore the graphics to the Initial state.", diff --git a/mindinsight/ui/src/locales/zh-cn.json b/mindinsight/ui/src/locales/zh-cn.json index e1bff9e4..35aca0e5 100644 --- a/mindinsight/ui/src/locales/zh-cn.json +++ b/mindinsight/ui/src/locales/zh-cn.json @@ -630,6 +630,7 @@ "modelList": "子图:", "kind": "数据类型:", "mergeModel": "合并多子图:", + "scopeName": "切分算子scope层级:", "select": "默认选择全部:", "flopsScopeTipOne": "节点移动:通过拖动scope节点,将其移动到指定位置。", "flopsScopeTipTwo": "重置:将图形恢复到初始状态。", diff --git a/mindinsight/ui/src/views/profiling/single/performance/performance-dashboard.vue b/mindinsight/ui/src/views/profiling/single/performance/performance-dashboard.vue index 0845cf8c..9ef8a44a 100644 --- a/mindinsight/ui/src/views/profiling/single/performance/performance-dashboard.vue +++ b/mindinsight/ui/src/views/profiling/single/performance/performance-dashboard.vue @@ -374,7 +374,7 @@ limitations under the License.
- +
{{ $t('profiling.timeLineMsprof') }}
@@ -426,7 +426,7 @@ limitations under the License.
+ v-if="!timelineInfoMsprof.noData">
{{$t('profiling.rankList')}} @@ -487,123 +487,12 @@ limitations under the License.
-
- -
-
- -
-

{{$t("public.dataLoading")}}

-

{{$t("public.noData")}}

-
- -
-
{{ $t('profiling.timeLine') }}
-
- -
-
-
{{$t("profiling.features")}}
-
{{$t("profiling.timelineTips.title1")}}
-
{{$t("profiling.timelineTips.content11")}}
-
{{$t("profiling.timelineTips.content12")}}
-
{{$t("profiling.timelineTips.content13")}}
-
{{$t("profiling.timelineTips.content14")}}
-
-
{{$t("profiling.timelineTips.title2")}}
-
- {{$t("profiling.timelineTips.content21.part1")}} - {{$t("profiling.timelineTips.content21.part2")}} - {{$t("profiling.timelineTips.content21.part3")}} -
-
{{$t("profiling.timelineTips.content22")}}
-
- {{$t("profiling.timelineTips.content23.part1")}} - {{$t("profiling.timelineTips.content23.part2")}} - {{$t("profiling.timelineTips.content23.part3")}} - {{$t("profiling.timelineTips.content23.part4")}} - {{$t("profiling.timelineTips.content23.part5")}} - {{$t("profiling.timelineTips.content23.part6")}} - {{$t("profiling.timelineTips.content23.part7")}} -
-
-
{{$t("profiling.timelineTips.title3")}}
-
{{$t("profiling.timelineTips.content31")}}
-
{{$t("profiling.timelineTips.content32")}}
-
-
{{$t("profiling.timelineTips.title4")}}
-
{{$t("profiling.timelineTips.content41")}}
-
{{$t("profiling.timelineTips.content42")}}
-
{{$t("profiling.timelineTips.content43")}}
-
{{$t("profiling.timelineTips.content44")}}
-
{{$t("profiling.timelineTips.content45")}}
-
{{$t("profiling.timelineTips.content46")}}
-
{{$t("profiling.timelineTips.content47")}}
-
{{$t("profiling.timelineTips.content48")}}
-
{{$t("profiling.timelineTips.content49")}}
-
{{$t("profiling.timelineTips.content410")}}
-
{{$t("profiling.timelineTips.content411")}}
-
{{$t("profiling.timelineTips.content412")}}
-
-
-
{{$t("profiling.features")}}
-
{{$t("profiling.timelineTips.title1")}}
-
{{$t("profiling.pynativeTimelineTips.content11")}}
-
{{$t("profiling.pynativeTimelineTips.content12")}}
-
-
{{$t("profiling.timelineTips.title2")}}
-
- {{$t("profiling.timelineTips.content21.part1")}} - {{$t("profiling.timelineTips.content21.part2")}} - {{$t("profiling.timelineTips.content21.part3")}} -
-
{{$t("profiling.timelineTips.content22")}}
-
- {{$t("profiling.timelineTips.content23.part1")}} - {{$t("profiling.timelineTips.content23.part2")}} - {{$t("profiling.timelineTips.content23.part3")}} - {{$t("profiling.timelineTips.content23.part4")}} - {{$t("profiling.timelineTips.content23.part5")}} - {{$t("profiling.timelineTips.content23.part6")}} - {{$t("profiling.timelineTips.content23.part7")}} -
-
-
{{$t("profiling.timelineTips.title3")}}
-
{{$t("profiling.pynativeTimelineTips.content31")}}
-
-
{{$t("profiling.timelineTips.title4")}}
-
{{$t("profiling.pynativeTimelineTips.content41")}}
-
{{$t("profiling.pynativeTimelineTips.content42")}}
-
{{$t("profiling.pynativeTimelineTips.content43")}}
-
-
- -
-
-
- -
-
-
- -
-
- {{$t('profiling.scopeNameNum')}} - - + {{$t('profiling.scopeName')}} + + + @@ -611,29 +500,17 @@ limitations under the License.
-
- {{$t('profiling.opTotalTime')}}{{timelineInfo.totalTime}}ms -
-
- {{$t('profiling.streamNum')}}{{timelineInfo.streamNum}} -
-
- {{$t('profiling.opNum')}}{{timelineInfo.opNum}} -
-
- {{$t('profiling.opTimes')}}{{timelineInfo.opTimes + $t('profiling.times')}} -
+
+ v-if="timelineInfoMsprof.noData && !isPynative">
-

{{$t("public.dataLoading")}}

-

{{$t("public.noData")}}

+

{{$t("public.dataLoading")}}

+

{{$t("public.noData")}}

-
@@ -710,23 +587,6 @@ export default { colorList: CommonProperty.pieColorArr[this.$store.state.themeIndex], initOver: false, // Is initialization complete }, - timeLine: { - // Time line data - data: null, - waiting: false, // Is it waiting for interface return - disable: true, - }, - timelineInfo: { - // Time line information - totalTime: 0, - streamNum: 0, - opNum: 0, // Number of operators - opTimes: 0, // Operator time consuming - noData: true, - initOver: false, // Is initialization complete - scopeNameNum: '', - scopeNameNumArr: [], - }, timeLineMsprof: { // Time line data data: null, @@ -735,6 +595,8 @@ export default { }, timelineInfoMsprof: { // Time line information + noData: true, + initOver: false, rankListSelected: [], rankList: [], modelListSelected: [], @@ -755,6 +617,14 @@ export default { value: false, label: 'No' }], + scopeNameSelected: false, + scopeNameArr: [{ + value: true, + label: 'Yes' + }, { + value: false, + label: 'No' + }], }, processSummary: { // Data of process summary @@ -863,7 +733,6 @@ export default { if (isInteger(newValue)) { this.svg.initOver = false; this.pieChart.initOver = false; - this.timelineInfo.initOver = false; this.processSummary.initOver = false; this.init(); } else { @@ -872,9 +741,7 @@ export default { this.svg.noData = true; this.svg.initOver = true; this.pieChart.initOver = true; - this.timelineInfo.initOver = true; this.processSummary.initOver = true; - this.timeLine.waiting = false; } } }, @@ -886,7 +753,6 @@ export default { * Initialization function */ init() { - this.queryTimelineInfo(); if (this.isDynamic) { this.$nextTick(() => { this.initShapeStep(); @@ -1570,83 +1436,7 @@ export default { this.svg.resizeTimer = null; }, 500); }, - /** - * Query the data of time line - */ - queryTimelineInfo() { - const params = { - dir: this.trainInfo.path, - device_id: this.rankID, - }; - RequestService.queryTimelineInfo(params) - .then((res) => { - this.timelineInfo.initOver = true; - if (res && res.data) { - this.timelineInfo.noData = false; - this.timelineInfo.totalTime = - this.toFixedFun(res.data.total_time, 4) || (res.data.total_time === 0 ? 0 : '--'); - this.timelineInfo.streamNum = res.data.num_of_streams || (res.data.num_of_streams === 0 ? 0 : '--'); - this.timelineInfo.opNum = res.data.num_of_ops || (res.data.num_of_ops === 0 ? 0 : '--'); - this.timelineInfo.opTimes = res.data.op_exe_times || (res.data.op_exe_times === 0 ? 0 : '--'); - if (res.data.max_scope_name_num >= 0) { - this.timelineInfo.scopeNameNum = res.data.max_scope_name_num; - this.timelineInfo.scopeNameNumArr = Array(res.data.max_scope_name_num + 1) - .fill() - .map((value, key) => { - return { - label: key, - value: key, - }; - }); - this.queryTimeline(); - } else { - this.timeLine.disable = true; - } - } else { - this.timelineInfo.noData = true; - this.timeLine.disable = true; - } - }) - .catch(() => { - this.timelineInfo.noData = true; - this.timelineInfo.initOver = true; - this.timeLine.disable = true; - }); - }, - queryTimeline() { - this.timeLine.waiting = true; - this.timeLine.disable = true; - const params = { - dir: this.trainInfo.path, - device_id: this.rankID, - scope_name_num: this.timelineInfo.scopeNameNum, - }; - RequestService.queryTimeline(params) - .then((res) => { - this.timeLine.waiting = false; - if (res && res.data && res.data.length) { - this.timeLine.data = JSON.stringify(res.data); - this.timeLine.disable = false; - } - }) - .catch(() => { - this.timeLine.waiting = false; - }); - }, - /** - * Download timeline data file - */ - downloadTimelineFile() { - const downloadLink = document.createElement('a'); - downloadLink.download = this.getDocName(); - downloadLink.style.display = 'none'; - const blob = new Blob([this.timeLine.data]); - downloadLink.href = URL.createObjectURL(blob); - document.body.appendChild(downloadLink); - downloadLink.click(); - document.body.removeChild(downloadLink); - }, /** * Query the data of msprof time line */ @@ -1656,7 +1446,7 @@ export default { }; RequestService.queryMsprofTimelineOption(params) .then((res) => { - this.timelineInfo.initOver = true; + this.timelineInfoMsprof.initOver = true; if (res && res.data && res.data.rank_list && res.data.model_list) { this.timelineInfoMsprof.noData = false; this.timeLineMsprof.disable = false; @@ -1681,7 +1471,6 @@ export default { }) .catch(() => { this.timelineInfoMsprof.noData = true; - this.timelineInfoMsprof.initOver = true; this.timeLineMsprof.disable = true; }); }, @@ -1694,14 +1483,15 @@ export default { model_list: this.timelineInfoMsprof.modelListSelected, kind: this.timelineInfoMsprof.kindSelected, merge_model: this.timelineInfoMsprof.mergeModelSelected, + scope_name: this.timelineInfoMsprof.scopeNameSelected, }; RequestService.queryMsprofTimeline(params) .then((res) => { this.timeLineMsprof.waiting = false; if (res && res.data && res.data.length) { this.timeLineMsprof.data = JSON.stringify(res.data); + this.downloadTimelineMsprofFile(); this.timeLineMsprof.disable = false; - this.downloadTimelineMsprofFile() } }) .catch(() => { @@ -1758,30 +1548,7 @@ export default { this.processSummary.noData = false; } }, - /** - * Generate a download file name - * @return {String} - */ - getDocName() { - const dealNumber = (value) => { - const prefix = value < 10 ? '0' : ''; - return prefix + value; - }; - const replacedPrefix = './'; - let dir = this.trainInfo.dir; - if (dir === replacedPrefix) dir = ' '; - if (dir.startsWith(replacedPrefix)) dir = dir.replace(replacedPrefix, ''); - const date = new Date(); - const year = date.getFullYear(); - const mouth = dealNumber(date.getMonth() + 1); - const day = dealNumber(date.getDate()); - const hour = dealNumber(date.getHours()); - const minute = dealNumber(date.getMinutes()); - const second = dealNumber(date.getSeconds()); - const millisecond = date.getMilliseconds(); - const timestamp = `${year}${mouth}${day}${hour}${minute}${second}${millisecond}`; - return `timeline_${dir}_${this.rankID}_scope-num-${this.timelineInfo.scopeNameNum}_${timestamp}.json`; - }, + /** * Generate a download msprof file name * @return {String} -- Gitee From 596ccb77f9d713842a5ad326822fd6b0fcf664f4 Mon Sep 17 00:00:00 2001 From: z30020733 Date: Wed, 17 Jan 2024 16:50:20 +0800 Subject: [PATCH 07/19] Ascend timeline normalization and optimization support pynative mode. --- mindinsight/backend/profiler/profile_api.py | 4 +- .../analyser/msprof_timeline_analyser.py | 67 +++++-------------- .../performance/performance-dashboard.vue | 10 +-- 3 files changed, 23 insertions(+), 58 deletions(-) diff --git a/mindinsight/backend/profiler/profile_api.py b/mindinsight/backend/profiler/profile_api.py index 4f8c9918..b8be6eba 100644 --- a/mindinsight/backend/profiler/profile_api.py +++ b/mindinsight/backend/profiler/profile_api.py @@ -584,8 +584,8 @@ def get_msprof_timeline(): rank_list = request.args.get("rank_list", None) model_list = request.args.get("model_list", None) kind = request.args.get("kind", None) - merge_model = request.args.get("merge_model", True) - scope_name = request.args.get("scope_name", False) + merge_model = request.args.get("merge_model", 'true') + scope_name = request.args.get("scope_name", 'false') if rank_list: rank_list = [int(rank_id) for rank_id in rank_list.split(',')] diff --git a/mindinsight/profiler/analyser/msprof_timeline_analyser.py b/mindinsight/profiler/analyser/msprof_timeline_analyser.py index 9920870d..393ec352 100644 --- a/mindinsight/profiler/analyser/msprof_timeline_analyser.py +++ b/mindinsight/profiler/analyser/msprof_timeline_analyser.py @@ -221,11 +221,8 @@ class MsprofTimelineOldAnalyser(BaseAnalyser): new_events.append(event) return new_events - except ValidationError as err: - logger.error('parse_step_trace_data failed! please theck. detail: %s', err) - raise ValidationError from err - except (IOError, OSError, json.JSONDecodeError) as err: + except (ValidationError, IOError, OSError, json.JSONDecodeError) as err: logger.error('parse_step_trace_data failed! please theck. detail: %s', err) return [] @@ -323,11 +320,7 @@ class MsprofTimelineOldAnalyser(BaseAnalyser): new_events.append(event) return new_events - except ValidationError as err: - logger.error('parse_overlap_analysis_data failed! please theck. detail: %s', err) - raise ValidationError from err - - except (IOError, OSError, json.JSONDecodeError) as err: + except (ValidationError, IOError, OSError, json.JSONDecodeError) as err: logger.error('parse_overlap_analysis_data failed! please theck. detail: %s', err) return [] @@ -400,11 +393,7 @@ class MsprofTimelineOldAnalyser(BaseAnalyser): return new_events - except ValidationError as err: - logger.error('parse_ascend_hardware_data failed! please theck. detail: %s', err) - raise ValidationError from err - - except (IOError, OSError, json.JSONDecodeError) as err: + except (ValidationError, IOError, OSError, json.JSONDecodeError) as err: logger.error('parse_ascend_hardware_data failed! please theck. detail: %s', err) return [] @@ -478,11 +467,7 @@ class MsprofTimelineOldAnalyser(BaseAnalyser): return new_events - except ValidationError as err: - logger.error('parse_hccl_data failed! please theck. detail: %s', err) - raise ValidationError from err - - except (IOError, OSError, json.JSONDecodeError) as err: + except (ValidationError, IOError, OSError, json.JSONDecodeError) as err: logger.error('parse_hccl_data failed! please theck. detail: %s', err) return [] @@ -763,11 +748,7 @@ class MsprofTimelineAnalyser(BaseAnalyser): return process_list + thread_list + new_timeline, scope_data - except ValidationError as err: - logger.error('parse_cann_data failed! please theck. detail: %s', err) - raise ValidationError from err - - except (IOError, OSError, json.JSONDecodeError) as err: + except (ValidationError, IOError, OSError, json.JSONDecodeError) as err: logger.error('parse_cann_data failed! please theck. detail: %s', err) return [] @@ -916,11 +897,7 @@ class MsprofTimelineAnalyser(BaseAnalyser): return self._parse_step_trace_not_merge(pid, new_pid, rank_id, raw_data, tid_mapper, difference_ts) - except ValidationError as err: - logger.error('parse_step_trace_data failed! please theck. detail: %s', err) - raise ValidationError from err - - except (IOError, OSError, json.JSONDecodeError) as err: + except (ValidationError, IOError, OSError, json.JSONDecodeError) as err: logger.error('parse_step_trace_data failed! please theck. detail: %s', err) return [] @@ -968,11 +945,7 @@ class MsprofTimelineAnalyser(BaseAnalyser): return new_events - except ValidationError as err: - logger.error('parse_overlap_analysis_data failed! please theck. detail: %s', err) - raise ValidationError from err - - except (IOError, OSError, json.JSONDecodeError) as err: + except (ValidationError, IOError, OSError, json.JSONDecodeError) as err: logger.error('parse_overlap_analysis_data failed! please theck. detail: %s', err) return [] @@ -1053,11 +1026,7 @@ class MsprofTimelineAnalyser(BaseAnalyser): new_metadata.append(thread_event) return new_metadata + new_events, scope_data - except ValidationError as err: - logger.error('parse_ascend_hardware_data failed! please theck. detail: %s', err) - raise ValidationError from err - - except (IOError, OSError, json.JSONDecodeError) as err: + except (ValidationError, IOError, OSError, json.JSONDecodeError) as err: logger.error('parse_ascend_hardware_data failed! please theck. detail: %s', err) return [] @@ -1124,11 +1093,7 @@ class MsprofTimelineAnalyser(BaseAnalyser): new_metadata.append(tid_mapper.get(tid)) return new_metadata + new_events - except ValidationError as err: - logger.error('parse_hccl_data failed! please theck. detail: %s', err) - raise ValidationError from err - - except (IOError, OSError, json.JSONDecodeError) as err: + except (ValidationError, IOError, OSError, json.JSONDecodeError) as err: logger.error('parse_hccl_data failed! please theck. detail: %s', err) return [] @@ -1175,11 +1140,7 @@ class MsprofTimelineAnalyser(BaseAnalyser): return new_events - except ValidationError as err: - logger.error('parse_cann_data failed! please theck. detail: %s', err) - raise ValidationError from err - - except (IOError, OSError, json.JSONDecodeError) as err: + except (ValidationError, IOError, OSError, json.JSONDecodeError) as err: logger.error('parse_cann_data failed! please theck. detail: %s', err) return [] @@ -1410,9 +1371,13 @@ class MsprofTimelineAnalyser(BaseAnalyser): for rank_id, (job_dir, _) in sub_dirs.items(): step_trace_file_name = fr'{job_dir}/summary/step_trace_*.csv' file_list = glob.glob(step_trace_file_name) - file_name = max(file_list) model_set = set() - with open(file_name, 'r', newline='') as fr: + if not file_list: + model_dict[rank_id] = model_set + model_merged.update(model_set) + continue + + with open(max(file_list), 'r', newline='') as fr: reader = csv.DictReader(fr, delimiter=',', quotechar='"') for row in reader: model_id = row.get('Model ID') diff --git a/mindinsight/ui/src/views/profiling/single/performance/performance-dashboard.vue b/mindinsight/ui/src/views/profiling/single/performance/performance-dashboard.vue index 9ef8a44a..de791888 100644 --- a/mindinsight/ui/src/views/profiling/single/performance/performance-dashboard.vue +++ b/mindinsight/ui/src/views/profiling/single/performance/performance-dashboard.vue @@ -375,7 +375,7 @@ limitations under the License.
-
+
{{ $t('profiling.timeLineMsprof') }}
-
+
{{$t('profiling.modelList')}}
-
+
{{$t('profiling.mergeModel')}}
+ v-if="timelineInfoMsprof.noData">
@@ -725,7 +725,7 @@ export default { setTimeout(() => { this.$bus.$on('collapse', this.resizeTrace); }, 500); - if (!this.isPynative) this.queryMsprofTimelineOption(); + this.queryMsprofTimelineOption(); }, watch: { rankID: { -- Gitee From 5d5e549c77c19d69cb8896c8cbb14df7024ea1d3 Mon Sep 17 00:00:00 2001 From: z30020733 Date: Mon, 11 Mar 2024 14:42:24 +0800 Subject: [PATCH 08/19] Adapt the msprof changes. --- mindinsight/backend/profiler/profile_api.py | 18 +- .../analyser/msprof_timeline_analyser.py | 995 +++--------------- 2 files changed, 181 insertions(+), 832 deletions(-) diff --git a/mindinsight/backend/profiler/profile_api.py b/mindinsight/backend/profiler/profile_api.py index b8be6eba..5a71758a 100644 --- a/mindinsight/backend/profiler/profile_api.py +++ b/mindinsight/backend/profiler/profile_api.py @@ -31,7 +31,7 @@ from mindinsight.profiler.analyser.analyser_factory import AnalyserFactory from mindinsight.profiler.analyser.minddata_analyser import MinddataAnalyser from mindinsight.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException from mindinsight.profiler.common.util import analyse_device_list_from_profiler_dir, \ - check_train_job_and_profiler_dir, get_profile_data_version, get_all_export_flag + check_train_job_and_profiler_dir, get_profile_data_version from mindinsight.profiler.common.validator.validate import validate_condition, validate_ui_proc from mindinsight.profiler.common.validator.validate import validate_minddata_pipeline_condition from mindinsight.profiler.common.validator.validate_path import \ @@ -155,8 +155,7 @@ def get_training_trace_graph(): if os.path.exists(profiler_info_file): with open(profiler_info_file, 'r', encoding='utf-8') as file: profiler_info = json.loads(file.read()) - if profiler_info.get("context_mode", "graph").lower() == "pynative" or len(profiler_info.get("graph_ids", - [])) > 1: + if profiler_info.get("context_mode", "graph").lower() == "pynative": return jsonify(graph_info) if profiler_info.get("is_heterogeneous", False): graph_info = {'is_heterogeneous': True} @@ -606,15 +605,10 @@ def get_msprof_timeline(): else: scope_name = True - flag = get_all_export_flag(profiler_dir_abs) - if flag: - analyser = AnalyserFactory.instance().get_analyser( - 'msprof_timeline', profiler_dir_abs, None) - timeline = analyser.get_merged_timeline(rank_list, model_list, kind, merge_model, scope_name) - else: - analyser = AnalyserFactory.instance().get_analyser( - 'msprof_timeline_old', profiler_dir_abs, None) - timeline = analyser.get_merged_timeline(rank_list, model_list, kind, merge_model) + analyser = AnalyserFactory.instance().get_analyser( + 'msprof_timeline', profiler_dir_abs, None) + timeline = analyser.get_merged_timeline(rank_list, model_list, kind, merge_model, scope_name) + return jsonify(timeline) diff --git a/mindinsight/profiler/analyser/msprof_timeline_analyser.py b/mindinsight/profiler/analyser/msprof_timeline_analyser.py index 393ec352..8449609e 100644 --- a/mindinsight/profiler/analyser/msprof_timeline_analyser.py +++ b/mindinsight/profiler/analyser/msprof_timeline_analyser.py @@ -20,32 +20,25 @@ import glob import re import time from decimal import Decimal -import logging as logger from concurrent.futures import ThreadPoolExecutor from marshmallow import ValidationError from mindinsight.profiler.analyser.base_analyser import BaseAnalyser from mindinsight.profiler.common.log import logger -from mindinsight.profiler.common.validator.validate_path import validate_and_normalize_path -def get_absolute_ts_start_info(pro_path) -> float: +def get_diff_time(rank_id, prof_path): """ Get difference time between ranks """ - start_json = None - for root, _, files in os.walk(pro_path): - for file in files: - if "start_info" in file and ".done" not in file: - start_json = os.path.join(root, file) - break - if start_json: - with open(start_json, "r+") as f: - info = json.load(f) - ts_us = Decimal(info.get("collectionTimeBegin", 0)).quantize(Decimal('0.000')) - ts_ns = Decimal(info.get("clockMonotonicRaw", 0)).quantize(Decimal('0.000')) - return ts_us - ts_ns / Decimal(1000) - return 0 + profiler_info_file = os.path.join(prof_path, os.pardir, f'profiler_info_{rank_id}.json') + if not os.path.exists(profiler_info_file): + return Decimal(0).quantize(Decimal('0.000')) + + with open(profiler_info_file, 'r') as fr: + diff_time = json.load(fr).get('diff_time', 0) + + return Decimal(diff_time).quantize(Decimal('0.000')) def get_rank_id_from_info_json(pro_path): @@ -81,14 +74,14 @@ def get_timeline_info(prof_dirs): timeline_info = {} for prof_path in prof_dirs: - rank_id, device_id = get_rank_id_from_info_json(prof_path) - ts_difference_us = get_absolute_ts_start_info(prof_path) + rank_id, _ = get_rank_id_from_info_json(prof_path) + ts_difference_us = get_diff_time(rank_id, prof_path) if rank_id is None: logger.warning('Could not find the rank id in %s, ignore this file.', prof_path) continue if rank_id not in timeline_info or (rank_id in timeline_info and prof_path > timeline_info.get(rank_id)[0]): - prof_path = os.path.join(prof_path, f'device_{device_id}') + prof_path = os.path.join(prof_path, 'mindstudio_profiler_output') timeline_info[rank_id] = (prof_path, ts_difference_us) return timeline_info @@ -100,545 +93,23 @@ def get_job_dir(parent_path): def get_newest_file(file_list): - new_file_list = {} + ''' + Find the newest files + :param file_list: + :return: + ''' + newest_file_list = [] + newest_timestamp = '0' for file_path in file_list: - key = '_'.join(file_path.split('.')[0].split('/')[-1].split('_')[:-1]) - if key not in new_file_list or new_file_list[key] < file_path: - new_file_list[key] = file_path - return list(new_file_list.values()) - - -class MsprofTimelineOldAnalyser(BaseAnalyser): - """ - Analyse timeline data from file. - """ - - def _load(self): - """Load data according to the parsed profiling files.""" - - def _filter(self, filter_condition): - """ - Filter the profiling data according to the filter condition. - - Args: - filter_condition (dict): The filter condition. - """ - - def _parse_step_trace_merge_model(self, raw_data, model_list): - """ - Get step trace by merge models - """ - pid = None - tids = {} - for event in raw_data: - if event.get('name') == 'process_name' and event.get("ph") == "M": - pid = event.get('pid') - elif event.get('name') == 'thread_name' and event.get("ph") == "M": - arg_name = event.get('args', {}).get('name') - if not model_list or (arg_name and int(arg_name.split(':')[-1].strip()) in model_list): - tids[event.get('tid')] = arg_name - return pid, tids - - def _parse_step_trace_not_merge_model(self, raw_data, model_list): - """ - Get step trace by not merge models - """ - tids = [] - for event in raw_data: - if event.get('name') == 'thread_name' and event.get("ph") == "M": - arg_name = event.get('args', {}).get('name') - if not model_list or (arg_name and int(arg_name.split(':')[-1].strip()) in model_list): - tids.append(event.get('tid')) - return tids - - def _parse_step_trace_data(self, step_trace_file, difference_ts, model_list, merge_model): - """ - parse step trace data - """ - try: - step_trace_file = validate_and_normalize_path( - step_trace_file, raise_key='Invalid timeline path, could not found step trace file.' - ) - flags = os.O_RDONLY - with os.fdopen(os.open(step_trace_file, flags, 0o200), 'r') as fr: - raw_data = json.load(fr) - - new_events = [] - if merge_model: - pid, tids = self._parse_step_trace_merge_model(raw_data, model_list) - - if not pid: - logger.error('Could not found process_name pid. method: _parse_step_trace_data') - return [] - - process_meta = { - "name": "process_name", - "pid": pid, - "tid": 0, - "args": { - "name": "Step Trace" - }, - "ph": "M" - } - - thread_meta = { - "name": "thread_name", - "pid": pid, - "tid": pid, - "args": { - "name": "iterations" - }, - "ph": "M" - } - - new_events = [process_meta, thread_meta] - for event in raw_data: - if event.get('ph') == 'M' or event.get('tid') not in tids: - continue - - event_name = event.get('name').strip() - if event_name.startswith('Iteration') and len(event_name.split(' ')) == 2: - event['name'] = f"{tids.get(event.get('tid'))} {event_name}" - - if event.get('ts'): - ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) - ts += difference_ts - event['ts'] = str(ts) - - event['tid'] = pid - - new_events.append(event) - - else: - tids = self._parse_step_trace_not_merge_model(raw_data, model_list) - - for event in raw_data: - if (event.get('name') == 'process_name' and event.get("ph") == "M") or \ - event.get('tid') in tids: - if event.get('ts'): - ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) - event['ts'] = str(ts + difference_ts) - new_events.append(event) - - return new_events - - except (ValidationError, IOError, OSError, json.JSONDecodeError) as err: - logger.error('parse_step_trace_data failed! please theck. detail: %s', err) - return [] - - def _parse_overlap_analysis_data(self, file_list, difference_ts): - """ - parse overlap analysis data - """ - try: - file_list = [validate_and_normalize_path( - file_path, raise_key='Invalid timeline path, could not found msprof json file.' - ) for file_path in file_list] - flags = os.O_RDONLY - with os.fdopen(os.open(file_list[0], flags, 0o200), 'r') as fr: - raw_data = json.load(fr) - - pid = None - for event in raw_data: - if event.get('name') == 'process_name' and event.get("ph") == "M" and \ - event.get('args').get('name') == 'Overlap Analysis': - pid = event.get('pid') - break - - if not pid: - logger.error('Could not found process_name pid. method: _parse_overlap_analysis_data') - return [] - - process_name = { - "name": "process_name", - "pid": pid, - "tid": 0, - "args": { - "name": "Overlap Analysis" - }, - "ph": "M" - } - - thread_name = [{ - "name": "thread_name", - "pid": pid, - "tid": 0, - "args": { - "name": "Computing" - }, - "ph": "M" - }, { - "name": "thread_name", - "pid": pid, - "tid": 1, - "args": { - "name": "Communication" - }, - "ph": "M" - }, { - "name": "thread_name", - "pid": pid, - "tid": 2, - "args": { - "name": "Communication(Not Overlapped)" - }, - "ph": "M" - }, { - "name": "thread_name", - "pid": pid, - "tid": 3, - "args": { - "name": "Free" - }, - "ph": "M" - }] - new_events = [process_name] + thread_name - - tid_mapper = { - 'Computing': 0, - 'Communication': 1, - 'Communication(Not Overlapped)': 2, - 'Free': 3 - } - - for msprof_file in file_list: - flags = os.O_RDONLY - with os.fdopen(os.open(msprof_file, flags, 0o200), 'r') as fr: - raw_data = json.load(fr) - - for event in raw_data: - if event.get('ph') == 'M': - continue - - if event.get('name') in tid_mapper: - event['pid'] = pid - event['tid'] = tid_mapper.get(event.get('name')) - if event.get('ts'): - ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) - ts += difference_ts - event['ts'] = str(ts) - new_events.append(event) - return new_events - - except (ValidationError, IOError, OSError, json.JSONDecodeError) as err: - logger.error('parse_overlap_analysis_data failed! please theck. detail: %s', err) - return [] - - def _parse_ascend_hardware_data(self, file_list, difference_ts): - """ - parse ascend hardware data - """ - try: - file_list = [validate_and_normalize_path( - file_path, raise_key='Invalid timeline path, could not found task json file.' - ) for file_path in file_list] - flags = os.O_RDONLY - with os.fdopen(os.open(file_list[0], flags, 0o200), 'r') as fr: - raw_data = json.load(fr) - - pid = None - tid_mapper = {} - for event in raw_data: - if event.get('name') == 'process_name' and event.get("ph") == "M" and \ - event.get('args').get('name') == 'Ascend Hardware': - pid = event.get('pid') - - if event.get('name') == 'thread_name' and event.get("ph") == "M" and \ - 'Stream' in event.get('args').get('name'): - thread_name = event.get('args').get('name') - if event.get('tid') not in tid_mapper: - tid_mapper[event.get('tid')] = thread_name - - if not pid: - logger.error('Could not found process_name pid. method: _parse_ascend_hardware_data') - return [] - - process_name = { - "name": "process_name", - "pid": pid, - "tid": 0, - "args": { - "name": "Ascend Hardware" - }, - "ph": "M" - } - - thread_name_list = [{ - "name": "thread_name", - "pid": pid, - "tid": k, - "args": { - "name": v - }, - "ph": "M" - } for k, v in tid_mapper.items()] - - new_events = [process_name] + thread_name_list - - for msprof_file in file_list: - with open(msprof_file, 'r') as fr: - raw_data = json.load(fr) - - for event in raw_data: - - if event.get('ph') == 'M': - continue - - event['pid'] = pid - if event.get('ts'): - ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) - ts += difference_ts - event['ts'] = str(ts) - new_events.append(event) - - return new_events - - except (ValidationError, IOError, OSError, json.JSONDecodeError) as err: - logger.error('parse_ascend_hardware_data failed! please theck. detail: %s', err) - return [] - - def _parse_hccl_data(self, file_list, difference_ts): - """ - parse hccl data - """ - try: - file_list[0] = validate_and_normalize_path( - file_list[0], raise_key='Invalid timeline path, could not found hccl json file.' - ) - flags = os.O_RDONLY - with os.fdopen(os.open(file_list[0], flags, 0o200), 'r') as fr: - raw_data = json.load(fr) - - pid = None - tid_mapper = {} - for event in raw_data: - if event.get('name') == 'process_name' and event.get("ph") == "M" and \ - event.get('args').get('name') == 'HCCL': - pid = event.get('pid') - - elif event.get('name') == 'thread_name' and event.get("ph") == "M" and \ - ('Plane' in event.get('args').get('name') or 'Communication' in event.get('args').get('name')) \ - and event.get('tid') not in tid_mapper: - tid_mapper[event.get('tid')] = event.get('args').get('name') - - if not pid: - logger.error('Could not found process_name pid. method: _parse_hccl_data') - return [] - - process_name = { - "name": "process_name", - "pid": pid, - "tid": 0, - "args": { - "name": "HCCL" - }, - "ph": "M" - } - - thread_name_list = [{ - "name": "thread_name", - "pid": pid, - "tid": k, - "args": { - "name": v - }, - "ph": "M" - } for k, v in tid_mapper.items()] - - new_events = [process_name] + thread_name_list - - for hccl_file in file_list: - hccl_file = validate_and_normalize_path( - hccl_file, raise_key='Invalid timeline path, could not found hccl json file.' - ) - flags = os.O_RDONLY - with os.fdopen(os.open(hccl_file, flags, 0o200), 'r') as fr: - raw_data = json.load(fr) - - for event in raw_data: - if event.get('ph') == 'M': - continue - event['pid'] = pid - - if event.get('ts'): - ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) - event['ts'] = str(ts + difference_ts) - new_events.append(event) - - return new_events - - except (ValidationError, IOError, OSError, json.JSONDecodeError) as err: - logger.error('parse_hccl_data failed! please theck. detail: %s', err) - return [] - - def _get_summary_timeline_data(self, sub_dirs, model_list, merge_model): - """ - Get summary timeline - Returns: - json, the content of timeline data. - """ - timeline_data = {} - for rank_id, (job_dir, difference_ts) in sub_dirs.items(): - data_list = [] - - # get step trace - step_trace_file_name = fr'{job_dir}/timeline/step_trace_*_*_*.json' - file_list = glob.glob(step_trace_file_name) - if not file_list: - logger.error('Could not find step trace file in %s/device_%s/timeline', job_dir, rank_id) - else: - data_list.extend(self._parse_step_trace_data(file_list[0], difference_ts, model_list, merge_model)) - - # get overlap analysis - file_list = [] - if model_list: - for model_id in model_list: - overlap_file_name = fr'{job_dir}/timeline/msprof_*_{model_id}_*.json' - file_list.extend(glob.glob(overlap_file_name)) - else: - overlap_file_name = fr'{job_dir}/timeline/msprof_*_*_*.json' - file_list.extend(glob.glob(overlap_file_name)) - - if not file_list: - logger.error('Could not find overlap analysis file in %s/device_%s/timeline', job_dir, rank_id) - else: - data_list.extend(self._parse_overlap_analysis_data(get_newest_file(file_list), difference_ts)) - - timeline_data[rank_id] = data_list - - return timeline_data - - def _get_detail_timeline_data(self, sub_dirs, model_list, merge_model): - """ - Get detail timeline - Returns: - json, the content of timeline data. - """ - - # get summary timeline data. include step_trace data and overlap data - summary_data = self._get_summary_timeline_data(sub_dirs, model_list, merge_model) - - timeline_data = {} - for rank_id, (job_dir, difference_ts) in sub_dirs.items(): - data_list = [] - - # get Ascend Hardware - file_list_hardware = [] - # get hccl - file_list_hccl = [] - - if model_list: - for model_id in model_list: - hardware_file_name = fr'{job_dir}/timeline/task_time_*_{model_id}_*.json' - file_list_hardware.extend(glob.glob(hardware_file_name)) - - hccl_file_name = fr'{job_dir}/timeline/hccl_*_{model_id}_*.json' - file_list_hccl.extend(glob.glob(hccl_file_name)) - else: - hardware_file_name = fr'{job_dir}/timeline/task_time_*_*_*.json' - file_list_hardware.extend(glob.glob(hardware_file_name)) - - hccl_file_name = fr'{job_dir}/timeline/hccl_*_*_*.json' - file_list_hccl.extend(glob.glob(hccl_file_name)) - - if not file_list_hardware: - logger.error('Could not find ascend hardware file in %s/device_%s/timeline', job_dir, rank_id) - else: - data_list.extend(self._parse_ascend_hardware_data(get_newest_file(file_list_hardware), - difference_ts)) - - if not file_list_hccl: - logger.error('Could not find hccl file in %s/device_%s/timeline', job_dir, rank_id) - else: - data_list.extend(self._parse_hccl_data(get_newest_file(file_list_hccl), difference_ts)) - - timeline_data[rank_id] = data_list - - detail_data = {} - for rank_id, data_d in timeline_data.items(): - data_s = summary_data.get(rank_id) - detail_data[rank_id] = data_s + data_d - - return detail_data - - def _merge_timeline(self, timeline_data): - """ - merge all timeline data - """ - new_events = [] - for rank_id, events in timeline_data.items(): - - for event in events: - # 区分不同rank的同一进程的pid - event["pid"] = int(''.join(x for x in str(event.get("pid")) if x.isdigit()) + str(rank_id)) - - # 进程名加上rank_id区分不同rank - if event.get("name") == "process_name" and event.get("ph") == "M": - event["args"]["name"] += f" rank{rank_id}" - new_events.append(event) - return new_events - - def get_merged_timeline(self, rank_list, model_list, kind, merge_model=True): - """ - Get the merged timeline - """ - - # get all job path, like PROF_* - sub_dirs = get_job_dir(self._profiling_dir) - - if rank_list: - new_sub_dirs = {} - for key, value in sub_dirs.items(): - if key in rank_list: - new_sub_dirs[key] = value - sub_dirs = new_sub_dirs - - if not sub_dirs: - logger.error('Could not found any rank from %s', rank_list) - return [] - - if kind == 'summary': - summary_data = self._get_summary_timeline_data(sub_dirs, model_list, merge_model) - return self._merge_timeline(summary_data) - - if kind == 'detail': - detail_data = self._get_detail_timeline_data(sub_dirs, model_list, merge_model) - return self._merge_timeline(detail_data) - return [] - - def _get_models(self, sub_dirs): - """ - Get all models - """ - model_dict = {} - model_merged = set() - for rank_id, (job_dir, _) in sub_dirs.items(): - step_trace_file_name = fr'{job_dir}/timeline/step_trace_*_*_*.json' - file_list = glob.glob(step_trace_file_name) - model_set = set() - for file_name in file_list: - last_name = file_name.rsplit('/', maxsplit=1)[-1] - last_name_suffix = last_name.split(f'step_trace_')[-1] - model_id = last_name_suffix.split('_')[1] - model_set.add(int(model_id)) - model_dict[rank_id] = model_set - model_merged.update(model_set) - return model_dict, model_merged - - def get_option(self): - """ - Get the option values - """ - # get all job path, like PROF_* - sub_dirs = get_job_dir(self._profiling_dir) - rank_list = list(sub_dirs.keys()) - rank_list.sort() + timestamp = file_path.split('.')[0].split('/')[-1].split('_')[-1] + newest_timestamp = max(timestamp, newest_timestamp) - _, model_merged = self._get_models(sub_dirs) - model_list = list(model_merged) - model_list.sort() + for file_path in file_list: + if file_path.split('.')[0].split('/')[-1].split('_')[-1] == newest_timestamp: + newest_file_list.append(file_path) - return {'rank_list': rank_list, 'model_list': model_list} + newest_file_list.sort() + return newest_file_list class MsprofTimelineAnalyser(BaseAnalyser): @@ -724,7 +195,7 @@ class MsprofTimelineAnalyser(BaseAnalyser): te = ts + Decimal(dur).quantize(Decimal('0.000')) scope_data.append((op_full_name.split('/')[:-1], ts, te)) - ts += difference_ts + ts -= difference_ts if int(tid) not in tid_set: tid_set.add(int(tid)) @@ -847,19 +318,18 @@ class MsprofTimelineAnalyser(BaseAnalyser): if event.get('ts'): ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) - ts += difference_ts + ts -= difference_ts event['ts'] = str(ts) event['pid'] = new_pid event['tid'] = 0 new_events.append(event) return new_events - def _parse_step_trace_not_merge(self, old_pid, new_pid, rank_id, raw_data, tid_mapper, difference_ts): + def _parse_step_trace_not_merge(self, old_pid, new_pid, rank_id, raw_data, difference_ts): """not merge step trace data""" new_events = [] for event in raw_data: - arg_name = tid_mapper.get(event.get('tid')) - if event.get('pid') != old_pid or not arg_name: + if event.get('pid') != old_pid: continue if event.get('name') == 'process_name' and event.get('ph') == 'M': event['args']['name'] = f"Step Trace Rank{rank_id}" @@ -869,7 +339,7 @@ class MsprofTimelineAnalyser(BaseAnalyser): event['pid'] = new_pid if event.get('ts'): ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) - ts += difference_ts + ts -= difference_ts event['ts'] = str(ts) new_events.append(event) return new_events @@ -895,253 +365,170 @@ class MsprofTimelineAnalyser(BaseAnalyser): if merge_model: return self._parse_step_trace_merge(pid, new_pid, rank_id, raw_data, tid_mapper, difference_ts) - return self._parse_step_trace_not_merge(pid, new_pid, rank_id, raw_data, tid_mapper, difference_ts) + return self._parse_step_trace_not_merge(pid, new_pid, rank_id, raw_data, difference_ts) except (ValidationError, IOError, OSError, json.JSONDecodeError) as err: logger.error('parse_step_trace_data failed! please theck. detail: %s', err) return [] - def _parse_overlap_analysis_data(self, file_list, rank_id, difference_ts): - """ - parse overlap analysis data - """ - try: - flags = os.O_RDONLY - raw_data = [] - for file_path in file_list: - with os.fdopen(os.open(file_path, flags, 0o400), 'r') as fr: - raw_data.extend(json.load(fr)) - - pid = None - for event in raw_data: - if event.get('name') == 'process_name' and event.get("ph") == "M" and \ - event.get('args').get('name') == 'Overlap Analysis': - pid = event.get('pid') - break - - if not pid: - logger.warning('Could not found process_name pid. method: _parse_overlap_analysis_data') - return [] - - new_events = [] - new_pid = int(f'{self.overlap_index}{rank_id}') - for event in raw_data: - if event.get('pid') != pid: - continue - - if event.get('name') == 'process_name' and event.get("ph") == "M": - event["args"]["name"] += f" Rank{rank_id}" - - if event.get('name') == 'process_sort_index' and event.get("ph") == "M": - event["args"]["sort_index"] = self.overlap_index - - event['pid'] = new_pid - if event.get('ts'): - ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) - ts += difference_ts - event['ts'] = str(ts) - - new_events.append(event) - - return new_events - - except (ValidationError, IOError, OSError, json.JSONDecodeError) as err: - logger.error('parse_overlap_analysis_data failed! please theck. detail: %s', err) - return [] - - def _parse_ascend_hardware_metadata(self, new_pid, raw_data): + def _parse_msprof_metadata(self, new_pid_hardware, raw_data): """ - Get ascend hardware by merge models + Get msprof by merge models """ - tid_mapper = {} - pid = None + tid_mapper_hardware = {} + pid_hardware = None + pid_hccl = None + pid_cann = None + pid_overlap = None for event in raw_data: if event.get('name') == 'process_name' and event.get("ph") == "M" and \ event.get('args').get('name') == 'Ascend Hardware': - pid = event.get('pid') + pid_hardware = event.get('pid') elif event.get('name') == 'thread_name' and event.get("ph") == "M" and \ 'Stream' in event.get('args').get('name'): - event['pid'] = new_pid - tid_mapper.update({event.get('tid'): event}) - return pid, tid_mapper + event['pid'] = new_pid_hardware + tid_mapper_hardware.update({event.get('tid'): event}) - def _parse_ascend_hardware_data(self, file_list, rank_id, difference_ts, model_list, scope_name): - """ - parse ascend hardware data - """ - flags = os.O_RDONLY - raw_data = [] + elif event.get('name') == 'process_name' and event.get("ph") == "M" and \ + event.get('args').get('name') == 'HCCL': + pid_hccl = event.get('pid') - new_events = [] - tid_set = set() - new_pid = int(f'{self.ascend_hardware_index}{rank_id}') - new_metadata = [{ - "name": "process_name", - "pid": new_pid, - "args": { - "name": f"Ascend Hardware Rank{rank_id}" - }, - "ph": "M" - }, {"name": "process_sort_index", "pid": new_pid, - "args": {"sort_index": self.ascend_hardware_index}, "ph": "M"}] - scope_data = [] - model_id_set = set() - try: - for file_path in file_list: - with os.fdopen(os.open(file_path, flags, 0o400), 'r') as fr: - raw_data.extend(json.load(fr)) + elif event.get('name') == 'process_name' and event.get("ph") == "M" and \ + event.get('args').get('name') == 'CANN': + pid_cann = event.get('pid') - pid, tid_mapper = self._parse_ascend_hardware_metadata(new_pid, raw_data) + elif event.get('name') == 'process_name' and event.get("ph") == "M" and \ + event.get('args').get('name') == 'Overlap Analysis': + pid_overlap = event.get('pid') - if not pid: - logger.error('Could not found process_name pid. method: _parse_ascend_hardware_data') - return [] + result = (pid_hardware, tid_mapper_hardware, pid_hccl, pid_cann, pid_overlap) + return result - for event in raw_data: - model_id = event.get('args', {}).get('Model Id') - model_id_set.add(model_id) - if event.get("ph") == "M" or (model_list and model_id not in model_list): - continue + def _parse_msprof_raw_data(self, raw_data, difference_ts, tid_mapper_hardware, model_list, scope_name, **kwargs): + """ + Parse the msprof raw data + """ - op_full_name = event.get('name') - if scope_name and op_full_name and op_full_name.startswith(self.top_scope_name): - ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) - te = ts + Decimal(event.get('dur')).quantize(Decimal('0.000')) - scope_data.append((op_full_name.split('/')[:-1], ts, te)) - - if event.get('ts'): - ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) - ts += difference_ts - event['ts'] = str(ts) - event['pid'] = new_pid - tid_set.add(event.get('tid')) - new_events.append(event) - - for tid in tid_set: - thread_event = tid_mapper.get(tid) - if thread_event is None: - thread_event = {"name": "thread_name", "pid": new_pid, - "tid": tid, "args": {"name": f"Stream {tid}"}, "ph": "M"} - new_metadata.append(thread_event) - return new_metadata + new_events, scope_data + new_events_hardware = [] + new_events_hccl = [] + new_events_cann = [] + new_events_overlap = [] - except (ValidationError, IOError, OSError, json.JSONDecodeError) as err: - logger.error('parse_ascend_hardware_data failed! please theck. detail: %s', err) - return [] + scope_data = [] - def _parse_hccl_data(self, file_list, rank_id, difference_ts, model_list): - """ - parse hccl data - """ - try: - flags = os.O_RDONLY - raw_data = [] - for file_path in file_list: - with os.fdopen(os.open(file_path, flags, 0o400), 'r') as fr: - raw_data.extend(json.load(fr)) + for event in raw_data: + model_id = event.get('args', {}).get('Model Id') + is_process = event.get('ph') == 'M' \ + and (event.get('name') == 'process_name' or event.get('name') == 'process_sort_index') + if is_process or (model_list and model_id not in model_list): + continue - pid = None - tid_mapper = {} - tid_set = set() - new_events = [] - new_pid = int(f'{self.hccl_index}{rank_id}') - model_id_set = set() + op_full_name = event.get('name') + if scope_name and op_full_name and op_full_name.startswith(self.top_scope_name): + ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) + te = ts + Decimal(event.get('dur')).quantize(Decimal('0.000')) + scope_data.append((op_full_name.split('/')[:-1], ts, te)) - for event in raw_data: - if event.get('name') == 'process_name' and event.get("ph") == "M" and \ - event.get('args').get('name') == 'HCCL': - pid = event.get('pid') + if event.get('ts'): + ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) + ts -= difference_ts + event['ts'] = str(ts) - elif event.get('name') == 'thread_name' and event.get("ph") == "M" and \ - ('Plane' in event.get('args').get('name') or 'Communication' in event.get('args').get('name')): - event['pid'] = new_pid - tid_mapper.update({event.get('tid'): event}) + if event.get('pid') == kwargs.get('pid_hardware') and event.get('ph') != 'M' \ + and event.get('tid') in tid_mapper_hardware: + event['pid'] = kwargs.get('new_pid_hardware') + new_events_hardware.append(event) - if not pid: - logger.error('Could not found process_name pid. method: _parse_hccl_data') - return [] + elif event.get('pid') == kwargs.get('pid_hccl'): + event['pid'] = kwargs.get('new_pid_hccl') + new_events_hccl.append(event) - for event in raw_data: - model_id = event.get('args', {}).get('model id') - model_id_set.add(model_id) - if event.get("ph") == "M" or (model_list and model_id not in model_list): - continue + elif not model_list and event.get('pid') == kwargs.get('pid_cann'): + event['pid'] = kwargs.get('new_pid_cann') + new_events_cann.append(event) - if event.get('ts'): - ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) - ts += difference_ts - event['ts'] = str(ts) + elif not model_list and event.get('pid') == kwargs.get('pid_overlap'): + event['pid'] = kwargs.get('new_pid_overlap') + new_events_overlap.append(event) - event['pid'] = new_pid - new_events.append(event) + return new_events_hardware + new_events_hccl + new_events_cann + new_events_overlap, scope_data - tid = event.get('tid') - tid_set.add(tid) + def _parse_msprof_data(self, file_list, rank_id, difference_ts, model_list, scope_name): + """ + parse ascend hardware and hccl and cann data + """ + flags = os.O_RDONLY + raw_data = [] - new_metadata = [{ + new_pid_hardware = int(f'{self.ascend_hardware_index}{rank_id}') + new_pid_hccl = int(f'{self.hccl_index}{rank_id}') + new_pid_cann = int(f'{self.cann_index}{rank_id}') + new_pid_overlap = int(f'{self.overlap_index}{rank_id}') + new_metadata = [ + { "name": "process_name", - "pid": new_pid, + "pid": new_pid_hardware, + "args": { + "name": f"Ascend Hardware Rank{rank_id}" + }, + "ph": "M" + }, {"name": "process_sort_index", "pid": new_pid_hardware, + "args": {"sort_index": self.ascend_hardware_index}, "ph": "M"}, + { + "name": "process_name", + "pid": new_pid_hccl, "args": { "name": f"HCCL Rank{rank_id}" }, "ph": "M" - }, {"name": "process_sort_index", "pid": new_pid, - "args": {"sort_index": self.hccl_index}, "ph": "M"}] - - for tid in tid_set: - new_metadata.append(tid_mapper.get(tid)) - return new_metadata + new_events - - except (ValidationError, IOError, OSError, json.JSONDecodeError) as err: - logger.error('parse_hccl_data failed! please theck. detail: %s', err) - return [] + }, {"name": "process_sort_index", "pid": new_pid_hccl, + "args": {"sort_index": self.hccl_index}, "ph": "M"}, + { + "name": "process_name", + "pid": new_pid_cann, + "args": { + "name": f"CANN Rank{rank_id}" + }, + "ph": "M" + }, {"name": "process_sort_index", "pid": new_pid_cann, + "args": {"sort_index": self.cann_index}, "ph": "M"}, + { + "name": "process_name", + "pid": new_pid_overlap, + "args": { + "name": f"Overlap Analysis Rank{rank_id}" + }, + "ph": "M" + }, {"name": "process_sort_index", "pid": new_pid_overlap, + "args": {"sort_index": self.overlap_index}, "ph": "M"} + ] - def _parse_cann_data(self, file_list, rank_id, difference_ts): - """ - pid: 1 rank - """ try: - flags = os.O_RDONLY - raw_data = [] for file_path in file_list: with os.fdopen(os.open(file_path, flags, 0o400), 'r') as fr: raw_data.extend(json.load(fr)) - pid = None - for event in raw_data: - if event.get('name') == 'process_name' and event.get("ph") == "M" and \ - event.get('args').get('name') == 'CANN': - pid = event.get('pid') - break - - if not pid: - logger.warning('Could not found process_name pid. method: _parse_cann_data') - return [] - - new_events = [] - new_pid = int(f'{self.cann_index}{rank_id}') - for event in raw_data: - if event.get('pid') != pid: - continue - if event.get('name') == 'process_name' and event.get("ph") == "M": - event["args"]["name"] += f" Rank{rank_id}" + pid_hardware, tid_mapper_hardware, pid_hccl, pid_cann, pid_overlap \ + = self._parse_msprof_metadata(new_pid_hardware, raw_data) - if event.get('name') == 'process_sort_index' and event.get("ph") == "M": - event["args"]["sort_index"] = self.cann_index + is_pid_valid = not pid_hardware and not pid_hccl and pid_cann and pid_overlap - event['pid'] = new_pid - if event.get('ts'): - ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) - ts += difference_ts - event['ts'] = str(ts) + if is_pid_valid: + logger.error('Could not found process_name pid. method: _parse_msprof_data') + return [] - new_events.append(event) + pid_dict = {'pid_hardware': pid_hardware, 'pid_hccl': pid_hccl, + 'pid_cann': pid_cann, 'pid_overlap': pid_overlap, + 'new_pid_hardware': new_pid_hardware, 'new_pid_hccl': new_pid_hccl, + 'new_pid_cann': new_pid_cann, 'new_pid_overlap': new_pid_overlap} - return new_events + new_events, scope_data = self._parse_msprof_raw_data(raw_data, difference_ts, tid_mapper_hardware, + model_list, scope_name, **pid_dict) + return new_metadata + list(tid_mapper_hardware.values()) + new_events, scope_data except (ValidationError, IOError, OSError, json.JSONDecodeError) as err: - logger.error('parse_cann_data failed! please theck. detail: %s', err) + logger.error('_parse_msprof_data failed! please theck. detail: %s', err) return [] def _parse_scope_info(self, scope_data, rank_id, difference_ts): @@ -1181,7 +568,7 @@ class MsprofTimelineAnalyser(BaseAnalyser): layer_stack[layer_depth][2] = op[2] # 合并 else: ts = layer_stack[layer_depth][1] - ts += difference_ts + ts -= difference_ts new_events.append({ "name": layer_stack[layer_depth][0], "pid": new_pid, @@ -1212,7 +599,7 @@ class MsprofTimelineAnalyser(BaseAnalyser): }]) if layer: ts = layer[1] - ts += difference_ts + ts -= difference_ts new_events.append({ "name": layer[0], "pid": new_pid, @@ -1236,23 +623,24 @@ class MsprofTimelineAnalyser(BaseAnalyser): for rank_id, (job_dir, difference_ts) in sub_dirs.items(): # get step trace - step_trace_file_name = fr'{job_dir}/timeline/step_trace_*.json' - file_list = glob.glob(step_trace_file_name) + step_trace_file_name = fr'{job_dir}/step_trace_*.json' + file_list = get_newest_file(glob.glob(step_trace_file_name)) + if not file_list: - logger.error('Could not find step trace file in %s/device_%s/timeline', job_dir, rank_id) + logger.error('Could not find step trace file in %s', job_dir) else: - task_list.append(pool.submit(self._parse_step_trace_data, get_newest_file(file_list), + task_list.append(pool.submit(self._parse_step_trace_data, file_list, rank_id, difference_ts, None, merge_model)) # get overlap analysis - overlap_file_name = fr'{job_dir}/timeline/msprof_*.json' - file_list = glob.glob(overlap_file_name) + overlap_file_name = fr'{job_dir}/msprof_*.json' + file_list = get_newest_file(glob.glob(overlap_file_name)) if not file_list: - logger.error('Could not find overlap analysis file in %s/device_%s/timeline', job_dir, rank_id) + logger.error('Could not find overlap analysis file in %s', job_dir) else: - task_list.append(pool.submit(self._parse_overlap_analysis_data, get_newest_file(file_list), + task_list.append(pool.submit(self._parse_overlap_analysis_data, file_list, rank_id, difference_ts)) all_done = list(range(len(task_list))) @@ -1288,55 +676,27 @@ class MsprofTimelineAnalyser(BaseAnalyser): all_scope_data = [] # 所有带scope的算子 # get step_trace data - step_trace_file_name = fr'{job_dir}/timeline/step_trace_*.json' - file_list_step_trace = glob.glob(step_trace_file_name) + step_trace_file_name = fr'{job_dir}/step_trace_*.json' + file_list_step_trace = get_newest_file(glob.glob(step_trace_file_name)) if not file_list_step_trace: - logger.error('Could not find step trace file in %s/device_%s/timeline', job_dir, rank_id) + logger.error('Could not find step trace file in %s', job_dir) else: - task_list.append(pool.submit(self._parse_step_trace_data, get_newest_file(file_list_step_trace), + task_list.append(pool.submit(self._parse_step_trace_data, file_list_step_trace, rank_id, difference_ts, model_list, merge_model)) - # get Ascend Hardware - hardware_file_name = fr'{job_dir}/timeline/task_time_*.json' - file_list_hardware = glob.glob(hardware_file_name) - if not file_list_hardware: - logger.error('Could not find ascend hardware file in %s/device_%s/timeline', job_dir, rank_id) + # get Ascend Hardware 、Hccl、CANN、overlap + msprof_file_name = fr'{job_dir}/msprof_*.json' + file_list_msprof = get_newest_file(glob.glob(msprof_file_name)) + if not file_list_msprof: + logger.error('Could not find msprof file in %s', job_dir) else: - ascend_timeline, scope_data = self._parse_ascend_hardware_data(get_newest_file(file_list_hardware), - rank_id, difference_ts, model_list, - scope_name) + ascend_timeline, scope_data = self._parse_msprof_data(file_list_msprof, + rank_id, difference_ts, model_list, + scope_name) timeline_data.extend(ascend_timeline) all_scope_data.extend(scope_data) - # get hccl - hccl_file_name = fr'{job_dir}/timeline/hccl_*.json' - file_list_hccl = glob.glob(hccl_file_name) - if not file_list_hccl: - logger.error('Could not find hccl file in %s/device_%s/timeline', job_dir, rank_id) - else: - task_list.append(pool.submit(self._parse_hccl_data, get_newest_file(file_list_hccl), - rank_id, difference_ts, model_list)) - if not model_list: - # get CANN - cann_file_name = fr'{job_dir}/timeline/msprof_*.json' - file_list = glob.glob(cann_file_name) - - if not file_list: - logger.error('Could not find overlap analysis file in %s/device_%s/timeline', job_dir, rank_id) - else: - task_list.append(pool.submit(self._parse_cann_data, get_newest_file(file_list), - rank_id, difference_ts)) - - # get overlap analysis - overlap_file_name = fr'{job_dir}/timeline/msprof_*.json' - file_list = glob.glob(overlap_file_name) - if not file_list: - logger.error('Could not find overlap analysis file in %s/device_%s/timeline', job_dir, rank_id) - else: - task_list.append(pool.submit(self._parse_overlap_analysis_data, get_newest_file(file_list), - rank_id, difference_ts)) - # get cpu op cpu_op_file_name = fr'{self._profiling_dir}/cpu_op_execute_timestamp_{rank_id}.txt' file_list = glob.glob(cpu_op_file_name) @@ -1369,20 +729,15 @@ class MsprofTimelineAnalyser(BaseAnalyser): model_dict = {} model_merged = set() for rank_id, (job_dir, _) in sub_dirs.items(): - step_trace_file_name = fr'{job_dir}/summary/step_trace_*.csv' - file_list = glob.glob(step_trace_file_name) + step_trace_file_name = fr'{job_dir}/step_trace_*.csv' + file_list = get_newest_file(glob.glob(step_trace_file_name)) model_set = set() - if not file_list: - model_dict[rank_id] = model_set - model_merged.update(model_set) - continue - - with open(max(file_list), 'r', newline='') as fr: + with open(file_list[0], 'r', newline='') as fr: reader = csv.DictReader(fr, delimiter=',', quotechar='"') for row in reader: model_id = row.get('Model ID') if model_id: - model_set.add(int(model_id)) + model_set.add(float(model_id)) model_dict[rank_id] = model_set model_merged.update(model_set) -- Gitee From f9a0f6533f37fd1b52c060193d43cdbf59e12966 Mon Sep 17 00:00:00 2001 From: z30020733 Date: Mon, 18 Mar 2024 14:32:29 +0800 Subject: [PATCH 09/19] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E9=9B=86=E7=BE=A4?= =?UTF-8?q?=E8=BF=AD=E4=BB=A3=E8=BD=A8=E8=BF=B9=E9=A1=B5=E9=9D=A2bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../profiler/analyser/cluster_analyser.py | 3 +- .../profiler/analyser/timeline_processor.py | 63 ++++++++++--------- .../allproposers/parallel_proposer.py | 5 +- mindinsight/ui/src/locales/en-us.json | 1 + mindinsight/ui/src/locales/zh-cn.json | 1 + .../performance/performance-dashboard.vue | 2 + .../cluster/performance/step-trace.vue | 4 +- .../st/func/profiler/test_cluster_analyser.py | 2 +- 8 files changed, 47 insertions(+), 34 deletions(-) diff --git a/mindinsight/profiler/analyser/cluster_analyser.py b/mindinsight/profiler/analyser/cluster_analyser.py index f97a61db..0ce85b08 100644 --- a/mindinsight/profiler/analyser/cluster_analyser.py +++ b/mindinsight/profiler/analyser/cluster_analyser.py @@ -172,10 +172,11 @@ class ClusterStepTraceAnalyser(ClusterAnalyser): # step_trace_info[7]: fp_and_bp time # step_trace_info[8]: tail time # divided by 1e5, the unit becomes a millisecond + iter_total_time = round(float(step_trace_info[3]) / 1e5, 4) iteration_interval = round(float(step_trace_info[6]) / 1e5, 4) fp_and_bp = round(float(step_trace_info[7]) / 1e5, 4) tail = round(float(step_trace_info[8]) / 1e5, 4) - step_trace_info = [iteration_interval, fp_and_bp, tail] + step_trace_info = [iteration_interval, fp_and_bp, tail, iter_total_time] return step_trace_info def _get_cluster_step_bottleneck_info(self, step_num, stage_id): diff --git a/mindinsight/profiler/analyser/timeline_processor.py b/mindinsight/profiler/analyser/timeline_processor.py index 33589ed1..b85801e9 100644 --- a/mindinsight/profiler/analyser/timeline_processor.py +++ b/mindinsight/profiler/analyser/timeline_processor.py @@ -45,22 +45,24 @@ class TimelineService: operator_time_maps = {} for device_name, cur_op_nodes in self.op_nodes.items(): - step_start = 0 + step_start = 0.0 step_end = float('inf') for item in self.all_data.get(device_name): if item['name'] == step: - step_start = item['ts'] - step_end = item['ts'] + item['dur'] + step_start = float(item['ts']) + step_end = step_start + float(item['dur']) break operator_time_maps[device_name] = {} for item in cur_op_nodes: - if step_start < item['ts'] < step_end or\ - item['dur'] < step_start < item['ts'] + item['dur']: - operator_time_maps.get(device_name)[item['name']] = {"st": item['ts'],\ - "ed": item['ts'] + item['dur'], "dur": item['dur']} - max_time = max(max_time, item['ts'] + item['dur']) - min_time = min(min_time, item['ts']) + ts = float(item['ts']) + dur = float(item['dur']) + if step_start < ts < step_end or dur < step_start < ts + dur: + operator_time_maps.get(device_name)[item['name']] = {"st": ts, + "ed": ts + dur, + "dur": dur} + max_time = max(max_time, ts + dur) + min_time = min(min_time, ts) def cmp(a): return int(re.search(r'\d+', a[0]).group()) @@ -80,10 +82,10 @@ class TimelineService: aggre_node['st_min'] = min(aggre_node.get('st_min'), node.get('st')) aggre_node['ed_max'] = max(aggre_node.get('ed_max'), node.get('ed')) aggre_node['ed_min'] = min(aggre_node.get('ed_min'), node.get('ed')) - aggre_node['st_avg'] = (node['st'] + aggre_node['st_avg'] *\ - aggre_node['n']) / (aggre_node['n'] + 1) - aggre_node['ed_avg'] = (node['ed'] + aggre_node['ed_avg'] *\ - aggre_node['n']) / (aggre_node['n'] + 1) + aggre_node['st_avg'] = (node['st'] + aggre_node['st_avg'] * + aggre_node['n']) / (aggre_node['n'] + 1) + aggre_node['ed_avg'] = (node['ed'] + aggre_node['ed_avg'] * + aggre_node['n']) / (aggre_node['n'] + 1) aggre_node['n'] += 1 else: data[node_name] = { @@ -96,8 +98,8 @@ class TimelineService: 'n': 1 } stage_data[stage_name] = {"data": data, "devices": device_names} - TimelineData = collections.namedtuple('TimelineData',\ - ['operator_time_maps', 'min_time', 'max_time', 'stage_data']) + TimelineData = collections.namedtuple('TimelineData', + ['operator_time_maps', 'min_time', 'max_time', 'stage_data']) timeline_data = TimelineData(operator_time_maps, min_time, max_time, stage_data) return timeline_data @@ -151,8 +153,9 @@ class TimelineService: minn = float('inf') for item in cur_one_step_op: if 'name' in item and 'AllReduce' in item['name']: - if item['ts'] < minn: - minn = item['ts'] + ts = float(item['ts']) + if ts < minn: + minn = ts min_all_reduce = item if min_all_reduce == '': continue @@ -162,8 +165,8 @@ class TimelineService: for item in one_step_op.get(device_name2): if item['name'] == min_all_reduce['name']: visited.add(device_name2) - min_all_reduce['ed'] = min_all_reduce['ts'] + min_all_reduce['dur'] - item['ed'] = item['ts'] + item['dur'] + min_all_reduce['ed'] = float(min_all_reduce['ts']) + float(min_all_reduce['dur']) + item['ed'] = float(item['ts']) + float(item['dur']) self.align_info[device_name2] = min_all_reduce['ed'] - item['ed'] stages.append(device_name2) break @@ -174,7 +177,7 @@ class TimelineService: for device_name, cur_data in self.all_data.items(): for item in cur_data: if 'ts' in item: - item['ts'] += self.align_info.get(device_name) + item['ts'] = float(item['ts']) + self.align_info.get(device_name) self.stage_device_map = {} def cmp(a): @@ -196,15 +199,15 @@ class TimelineService: ret = {} for device_name, cur_op_nodes in self.op_nodes.items(): ret[device_name] = [] - step_start = 0 + step_start = 0.0 step_end = float('inf') for item in self.all_data.get(device_name): if item['name'] == step: - step_start = item['ts'] - step_end = item['ts'] + item['dur'] + step_start = float(item['ts']) + step_end = step_start + float(item['dur']) break for item in cur_op_nodes: - if item['ts'] > step_start and item['ts'] < step_end: + if step_start < float(item['ts']) < step_end: ret.get(device_name).append(item) return ret @@ -218,16 +221,16 @@ class TimelineService: ret = {} for device_name, cur_data in self.all_data.items(): ret[device_name] = [] - step_start = 0 + step_start = 0.0 step_end = float('inf') for item in cur_data: if item['name'] == step: - step_start = item['ts'] - step_end = item['ts'] + item['dur'] + step_start = float(item['ts']) + step_end = step_start + float(item['dur']) break for item in cur_data: if 'scope_level' in item: - if item['ts'] > step_start and item['ts'] < step_end: + if step_start < float(item['ts']) < step_end: ret.get(device_name).append(item) return ret @@ -247,7 +250,7 @@ def _find_scope(cur_scope_by_level, op): l = 0 r = len(intervals) - 1 ans = -1 - t = op['ts'] + t = float(op['ts']) while l <= r: mid = (l + r) >> 1 if intervals[mid][0] <= t: @@ -255,6 +258,6 @@ def _find_scope(cur_scope_by_level, op): l = mid + 1 else: r = mid - 1 - if ans != -1 and intervals[ans][0] + intervals[ans][1] >= op['ts'] + op['dur']: + if ans != -1 and intervals[ans][0] + intervals[ans][1] >= float(op['ts']) + float(op['dur']): return cur_scope, True return "", False diff --git a/mindinsight/profiler/proposer/allproposers/parallel_proposer.py b/mindinsight/profiler/proposer/allproposers/parallel_proposer.py index 3f291ffd..296cc5fe 100644 --- a/mindinsight/profiler/proposer/allproposers/parallel_proposer.py +++ b/mindinsight/profiler/proposer/allproposers/parallel_proposer.py @@ -116,7 +116,10 @@ class ParallelProposer(Proposer): percent = 0.0 for step_interval in step_intervals: # get how much more than the average - proportion = step_interval["step_interval"] / avg_step_interval - 1 + if avg_step_interval == 0: + proportion = 0.0 + else: + proportion = step_interval["step_interval"] / avg_step_interval - 1 if proportion > self._step_interval_threshold and proportion > percent: rank_id = step_interval["rank_id"] val = step_interval["step_interval"] diff --git a/mindinsight/ui/src/locales/en-us.json b/mindinsight/ui/src/locales/en-us.json index 47e7e8da..f4e53a41 100644 --- a/mindinsight/ui/src/locales/en-us.json +++ b/mindinsight/ui/src/locales/en-us.json @@ -328,6 +328,7 @@ "titleText": "Profiling - Single-host", "profilingDashboard": "Profiling Dashboard", "showAverage": "Average value", + "iterTotalTime": "Step Total Time", "iterationGapTime": "Step Interval", "fpBpTime": "Forward and Backward Propagation", "fpTime": "Forward Propagation", diff --git a/mindinsight/ui/src/locales/zh-cn.json b/mindinsight/ui/src/locales/zh-cn.json index 0c6ccc08..4f2b4f4b 100644 --- a/mindinsight/ui/src/locales/zh-cn.json +++ b/mindinsight/ui/src/locales/zh-cn.json @@ -327,6 +327,7 @@ "titleText": "性能分析 - 单机", "profilingDashboard": "性能看板", "showAverage": "展示平均值", + "iterTotalTime": "迭代总耗时", "iterationGapTime": "迭代间隙时间", "fpBpTime": "前向+反向时间", "fpTime": "前向时间", diff --git a/mindinsight/ui/src/views/profiling/cluster/performance/performance-dashboard.vue b/mindinsight/ui/src/views/profiling/cluster/performance/performance-dashboard.vue index 36bda447..dc527db6 100644 --- a/mindinsight/ui/src/views/profiling/cluster/performance/performance-dashboard.vue +++ b/mindinsight/ui/src/views/profiling/cluster/performance/performance-dashboard.vue @@ -80,6 +80,7 @@ export default { this.$t('profiling.iterationGapTime'), this.$t('profiling.fpBpTime'), this.$t('profiling.tailTime'), + this.$t('profiling.iterTotalTime') ], title: this.$t('profilingCluster.stepChartTitle'), }, // Chart object of performance window @@ -164,6 +165,7 @@ export default { this.$t('profiling.iterationGapTime'), this.$t('profiling.fpBpTime'), this.$t('profiling.tailTime'), + this.$t('profiling.iterTotalTime') ], }, 'model-parallel': { diff --git a/mindinsight/ui/src/views/profiling/cluster/performance/step-trace.vue b/mindinsight/ui/src/views/profiling/cluster/performance/step-trace.vue index 4e48bcef..5a7475ed 100644 --- a/mindinsight/ui/src/views/profiling/cluster/performance/step-trace.vue +++ b/mindinsight/ui/src/views/profiling/cluster/performance/step-trace.vue @@ -227,6 +227,7 @@ export default { iteration_interval: this.$t('profiling.iterationGapTime'), fp_and_bp: this.$t('profiling.fpBpTime'), tail: this.$t('profiling.tailTime'), + iter_total_time: this.$t('profiling.iterTotalTime'), communication_alone: this.$t('profilingCluster.communicationAloneTime'), computation: this.$t('profilingCluster.computationTime'), receive_alone: this.$t('profilingCluster.receiveAloneTime'), @@ -278,8 +279,9 @@ export default { this.$t('profiling.iterationGapTime'), this.$t('profiling.fpBpTime'), this.$t('profiling.tailTime'), + this.$t('profiling.iterTotalTime') ], - cols: ['iteration_interval', 'fp_and_bp', 'tail'], + cols: ['iteration_interval', 'fp_and_bp', 'tail', 'iter_total_time'], tips: [ { label: this.$t('profiling.iterationGapTime'), diff --git a/tests/st/func/profiler/test_cluster_analyser.py b/tests/st/func/profiler/test_cluster_analyser.py index f94f4282..bc43e066 100644 --- a/tests/st/func/profiler/test_cluster_analyser.py +++ b/tests/st/func/profiler/test_cluster_analyser.py @@ -59,7 +59,7 @@ class TestClusterAnalyser: } expect_result = { "total_step_num": '1', - "info": [{"step_trace_info": [0.0, 23.6681, 0.041], + "info": [{"step_trace_info": [0.0, 23.6681, 0.041, 23.7091], "rank_id": 1, "profiler_dir": "profiler" }], -- Gitee From db4e632dabde88d4b22ae2a9b98a168d3eaa8fa0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=87=A7=E5=BA=86=E9=A6=99?= Date: Fri, 29 Mar 2024 16:20:50 +0800 Subject: [PATCH 10/19] r2.3 rc1 version --- mindinsight/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindinsight/_version.py b/mindinsight/_version.py index 861fce7a..7879c07c 100644 --- a/mindinsight/_version.py +++ b/mindinsight/_version.py @@ -14,4 +14,4 @@ # ============================================================================ """Mindinsight version module.""" -VERSION = '2.3.0' +VERSION = '2.3.0rc1' -- Gitee From 02168cd2880e533b8e6a9224f5a903a7075a177d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=87=A7=E5=BA=86=E9=A6=99?= Date: Sun, 7 Apr 2024 19:43:12 +0800 Subject: [PATCH 11/19] add r2.3 release note --- RELEASE.md | 25 +++++++++++++++++++++++++ RELEASE_CN.md | 25 +++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/RELEASE.md b/RELEASE.md index ebcc0d4e..004dc725 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -2,6 +2,31 @@ [查看中文](./RELEASE_CN.md) +## MindInsight 2.3.0-rc1 Release Notes + +### Major Features and Improvements + +#### Profiler + +- [BETA] Dynamically start and stop profiling. Users can collect profiling data in real time according to the training situation, reducing the amount of data collected. +- [BETA] Profiling the communication operator time-consuming matrix. Users can find cluster communication performance bottlenecks by analyzing the communication operator time-consuming matrix. +- [BETA] Improve the performance of Ascend environment in parsing profiling data. +- [BETA] Supports offline analysis of data generated by Profiling. Users can collect data first and then parse the data as needed. +- [BETA] Supports collecting performance data of HBM, PCIe, and l2_cache to enrich performance analysis indicators. + +#### Dump + +- [BETA] The statistical information saved by Dump records MD5 values, and users can determine small differences in tensor values through MD5 values. +- [BETA] Dump supports the float16 data type and supports users to locate float16 type operator accuracy issues. + +### Contributors + +Thanks goes to these wonderful people: + +Ning Ma, Jiaxing Zhu, Jiarong Ji, Yanming Miao, Nan Wang, XiaoXian Jin, Qingxiang Zang, DaWei Fan, XinYu Shi, KaiDa Qiu, Wei Zhang, XianQi Zhou, Chen Mao, XiHan Peng. + +Contributions of any kind are welcome! + ## MindSpore Insight 2.2.0 Release Notes ### Major Features and Improvements diff --git a/RELEASE_CN.md b/RELEASE_CN.md index 35c8ab4b..ea99efee 100644 --- a/RELEASE_CN.md +++ b/RELEASE_CN.md @@ -2,6 +2,31 @@ [View English](./RELEASE.md) +## MindInsight 2.3.0-rc1 Release Notes + +### 主要特性和增强 + +#### Profiler + +- [BETA] 动态启停profiling,用户可以根据训练情况实时采集profiling 数据,减少采集数据量。 +- [BETA] Profiling通信算子耗时矩阵,用户通过分析通信算子耗时矩阵,找出集群通信性能瓶颈。 +- [BETA] 提高昇腾环境解析Profiling数据的性能。 +- [BETA] 支持离线解析Profiling生成的数据,用户可以先采集数据,然后根据需要再解析数据。 +- [BETA] 支持采集HBM、PCIe、l2_cache性能数据,丰富性能分析指标。 + +#### Dump + +- [BETA] Dump保存的统计信息记录MD5值,用户可以通过MD5值确定张量值的微小差异。 +- [BETA] Dump支持bfloat16数据类型,支撑用户定位bfloat16类型的算子精度问题。 + +### 贡献者 + +感谢以下人员做出的贡献: + +Ning Ma, Jiaxing Zhu, Jiarong Ji, Yanming Miao, Nan Wang, XiaoXian Jin, Qingxiang Zang, DaWei Fan, XinYu Shi, KaiDa Qiu, Wei Zhang, XianQi Zhou, Chen Mao, XiHan Peng. + +欢迎以任何形式对项目提供贡献! + ## MindSpore MindInsight 2.2.0 Release Notes ### 主要特性和增强 -- Gitee From 7b5fbcf78bf179f90471645856153cf1a513ccc8 Mon Sep 17 00:00:00 2001 From: z30020733 Date: Mon, 29 Apr 2024 15:29:29 +0800 Subject: [PATCH 12/19] fix bug that summary timeline load failed. --- .../analyser/msprof_timeline_analyser.py | 58 +++++++++++++++++-- 1 file changed, 54 insertions(+), 4 deletions(-) diff --git a/mindinsight/profiler/analyser/msprof_timeline_analyser.py b/mindinsight/profiler/analyser/msprof_timeline_analyser.py index 8449609e..736cb620 100644 --- a/mindinsight/profiler/analyser/msprof_timeline_analyser.py +++ b/mindinsight/profiler/analyser/msprof_timeline_analyser.py @@ -249,6 +249,54 @@ class MsprofTimelineAnalyser(BaseAnalyser): filter_condition (dict): The filter condition. """ + def _parse_overlap_analysis_data(self, file_list, rank_id, difference_ts): + """ + parse overlap analysis data + """ + try: + flags = os.O_RDONLY + raw_data = [] + for file_path in file_list: + with os.fdopen(os.open(file_path, flags, 0o400), 'r') as fr: + raw_data.extend(json.load(fr)) + + pid = None + for event in raw_data: + if event.get('name') == 'process_name' and event.get("ph") == "M" and \ + event.get('args').get('name') == 'Overlap Analysis': + pid = event.get('pid') + break + + if not pid: + logger.warning('Could not found process_name pid. method: _parse_overlap_analysis_data') + return [] + + new_events = [] + new_pid = int(f'{self.overlap_index}{rank_id}') + for event in raw_data: + if event.get('pid') != pid: + continue + + if event.get('name') == 'process_name' and event.get("ph") == "M": + event["args"]["name"] += f" Rank{rank_id}" + + if event.get('name') == 'process_sort_index' and event.get("ph") == "M": + event["args"]["sort_index"] = self.overlap_index + + event['pid'] = new_pid + if event.get('ts'): + ts = Decimal(event.get('ts')).quantize(Decimal('0.000')) + ts += difference_ts + event['ts'] = str(ts) + + new_events.append(event) + + return new_events + + except (ValidationError, IOError, OSError, json.JSONDecodeError) as err: + logger.error('parse_overlap_analysis_data failed! please theck. detail: %s', err) + return [] + def _parse_step_trace_metadata(self, raw_data, model_list): """ Get step trace by merge models @@ -627,7 +675,7 @@ class MsprofTimelineAnalyser(BaseAnalyser): file_list = get_newest_file(glob.glob(step_trace_file_name)) if not file_list: - logger.error('Could not find step trace file in %s', job_dir) + logger.warning('Could not find step trace file in %s', job_dir) else: task_list.append(pool.submit(self._parse_step_trace_data, file_list, rank_id, difference_ts, None, @@ -638,7 +686,7 @@ class MsprofTimelineAnalyser(BaseAnalyser): file_list = get_newest_file(glob.glob(overlap_file_name)) if not file_list: - logger.error('Could not find overlap analysis file in %s', job_dir) + logger.warning('Could not find overlap analysis file in %s', job_dir) else: task_list.append(pool.submit(self._parse_overlap_analysis_data, file_list, rank_id, difference_ts)) @@ -679,7 +727,7 @@ class MsprofTimelineAnalyser(BaseAnalyser): step_trace_file_name = fr'{job_dir}/step_trace_*.json' file_list_step_trace = get_newest_file(glob.glob(step_trace_file_name)) if not file_list_step_trace: - logger.error('Could not find step trace file in %s', job_dir) + logger.warning('Could not find step trace file in %s', job_dir) else: task_list.append(pool.submit(self._parse_step_trace_data, file_list_step_trace, rank_id, difference_ts, model_list, merge_model)) @@ -688,7 +736,7 @@ class MsprofTimelineAnalyser(BaseAnalyser): msprof_file_name = fr'{job_dir}/msprof_*.json' file_list_msprof = get_newest_file(glob.glob(msprof_file_name)) if not file_list_msprof: - logger.error('Could not find msprof file in %s', job_dir) + logger.warning('Could not find msprof file in %s', job_dir) else: ascend_timeline, scope_data = self._parse_msprof_data(file_list_msprof, rank_id, difference_ts, model_list, @@ -731,6 +779,8 @@ class MsprofTimelineAnalyser(BaseAnalyser): for rank_id, (job_dir, _) in sub_dirs.items(): step_trace_file_name = fr'{job_dir}/step_trace_*.csv' file_list = get_newest_file(glob.glob(step_trace_file_name)) + if not file_list: + continue model_set = set() with open(file_list[0], 'r', newline='') as fr: reader = csv.DictReader(fr, delimiter=',', quotechar='"') -- Gitee From 9005a8bfb159ad12b9ff42b2f4272dde81daed29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=87=A7=E5=BA=86=E9=A6=99?= Date: Tue, 30 Apr 2024 18:32:00 +0800 Subject: [PATCH 13/19] r2.3rc2_version --- mindinsight/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindinsight/_version.py b/mindinsight/_version.py index 7879c07c..77cbeeff 100644 --- a/mindinsight/_version.py +++ b/mindinsight/_version.py @@ -14,4 +14,4 @@ # ============================================================================ """Mindinsight version module.""" -VERSION = '2.3.0rc1' +VERSION = '2.3.0rc2' -- Gitee From 754740ddf365fc9ec96e0a4ee8b629758b60e071 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=87=A7=E5=BA=86=E9=A6=99?= Date: Mon, 6 May 2024 20:17:44 +0800 Subject: [PATCH 14/19] add r2.3rc2 realse note --- RELEASE.md | 84 ++++++++++++++++++++++++++++++--------------------- RELEASE_CN.md | 26 ++++++++++++---- 2 files changed, 69 insertions(+), 41 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 004dc725..f0ad51a7 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -2,7 +2,21 @@ [查看中文](./RELEASE_CN.md) -## MindInsight 2.3.0-rc1 Release Notes +## MindSpore Insight 2.3.0-rc2 Release Notes + +### Bug Fixes + +- [I9JTLU] Fixed the "system error" reported on the summary timeline download page. + +### Contributors + +Thanks goes to these wonderful people: + +Ning Ma, Jiaxing Zhu, Jiarong Ji, Yanming Miao, Nan Wang, XiaoXian Jin, Qingxiang Zang, DaWei Fan, XinYu Shi, KaiDa Qiu, Wei Zhang, XianQi Zhou, Chen Mao, XiHan Peng. + +Contributions of any kind are welcome! + +## MindSpore Insight 2.3.0-rc1 Release Notes ### Major Features and Improvements @@ -113,7 +127,7 @@ Ning Ma, Chuting Liu, Jiaxing Zhu, Qingxiang Zang, Yaomin Mao. Contributions of any kind are welcome! -## MindInsight 2.0.0-alpha Release Notes +## MindSpore Insight 2.0.0-alpha Release Notes ### Major Features and Improvements @@ -138,7 +152,7 @@ Ning Ma, Chuting Liu, Jiaxing Zhu, Qingxiang Zang, Yaomin Mao. Contributions of any kind are welcome! -## MindInsight 1.9.0 Release Notes +## MindSpore Insight 1.9.0 Release Notes ### Major Features and Improvements @@ -167,7 +181,7 @@ Special thanks to Zhongwei Wang, Rongchen Zhu, Jiaying Lu, Zhiyong Wang, Yating Contributions of any kind are welcome! -## MindInsight 1.8.0 Release Notes +## MindSpore Insight 1.8.0 Release Notes ### Major Features and Improvements @@ -198,7 +212,7 @@ Special thanks to Zhiyong Wang, Zhongwei Wang, Rusheng Pan, Yating Wei, Luoxuan Contributions of any kind are welcome! -## MindInsight 1.7.0 Release Notes +## MindSpore Insight 1.7.0 Release Notes ### Major Features and Improvements @@ -223,9 +237,9 @@ Special thanks to Zhiyong Wang, Zhongwei Wang, Rusheng Pan, Yating Wei, Luoxuan Contributions of any kind are welcome! -# MindInsight 1.6.0 +# MindSpore Insight 1.6.0 -## MindInsight 1.6.0 Release Notes +## MindSpore Insight 1.6.0 Release Notes ### Major Features and Improvements @@ -266,9 +280,9 @@ Special thanks to Zhiyong Wang, Zhongwei Wang, Rusheng Pan, Yating Wei, Luoxuan Contributions of any kind are welcome! -# MindInsight 1.5.0 +# MindSpore Insight 1.5.0 -## MindInsight 1.5.0 Release Notes +## MindSpore Insight 1.5.0 Release Notes ### Major Features and Improvements @@ -300,7 +314,7 @@ Contributions of any kind are welcome! ##### Command Line Interface -reviously, we don't set memory limit for offline debugger. In order to use offline debugger in limited environment, we provide with memory limit options when start MindInsight server. View the [Offline Debugger Tutorial](https://www.mindspore.cn/mindinsight/docs/zh-CN/r1.5/debugger_offline.html). +reviously, we don't set memory limit for offline debugger. In order to use offline debugger in limited environment, we provide with memory limit options when start MindSpore Insight server. View the [Offline Debugger Tutorial](https://www.mindspore.cn/mindinsight/docs/zh-CN/r1.5/debugger_offline.html). New start command options: @@ -321,9 +335,9 @@ Congli Gao, Longfei Li, Yongxiong Liang, Chongming Liu, Pengting Luo, Yanming Mi Contributions of any kind are welcome! -# MindInsight 1.4.0 +# MindSpore Insight 1.4.0 -## MindInsight 1.4.0 Release Notes +## MindSpore Insight 1.4.0 Release Notes ### Major Features and Improvements @@ -349,9 +363,9 @@ Congli Gao, Longfei Li, Yongxiong Liang, Chongming Liu, Pengting Luo, Yanming Mi Contributions of any kind are welcome! -# MindInsight 1.3.0 +# MindSpore Insight 1.3.0 -## MindInsight 1.3.0 Release Notes +## MindSpore Insight 1.3.0 Release Notes ### Major Features and Improvements @@ -380,7 +394,7 @@ Contributions of any kind are welcome! #### Build & Installation -- [STABLE] Unified MindInsight installation package, supporting multiple Linux distributions, CPU architectures(x86/ARM), and Python versions(3.7/3.8/3.9). +- [STABLE] Unified MindSpore Insight installation package, supporting multiple Linux distributions, CPU architectures(x86/ARM), and Python versions(3.7/3.8/3.9). ### API Change @@ -412,9 +426,9 @@ Congli Gao, Longfei Li, Yongxiong Liang, Chongming Liu, Pengting Luo, Yanming Mi Contributions of any kind are welcome! -# MindInsight 1.2.0 +# MindSpore Insight 1.2.0 -## MindInsight 1.2.0 Release Notes +## MindSpore Insight 1.2.0 Release Notes ### Major Features and Improvements @@ -474,9 +488,9 @@ Congli Gao, Longfei Li, Yongxiong Liang, Chongming Liu, Pengting Luo, Yanming Mi Contributions of any kind are welcome! -# MindInsight 1.1.0 +# MindSpore Insight 1.1.0 -## MindInsight 1.1.0 Release Notes +## MindSpore Insight 1.1.0 Release Notes ### Major Features and Improvements @@ -540,9 +554,9 @@ Congli Gao, Jianfeng Zhu, Zhenzhong Kou, Longfei Li, Yongxiong Liang, Chongming Contributions of any kind are welcome! -# MindInsight 1.0.0 +# MindSpore Insight 1.0.0 -## MindInsight 1.0.0 Release Notes +## MindSpore Insight 1.0.0 Release Notes ### Major Features and Improvements @@ -565,9 +579,9 @@ Congli Gao, Jianfeng Zhu, Zhenzhong Kou, Hongzhang Li, Longfei Li, Yongxiong Lia Contributions of any kind are welcome! -# MindInsight 0.7.0-beta +# MindSpore Insight 0.7.0-beta -## MindInsight 0.7.0 Release Notes +## MindSpore Insight 0.7.0 Release Notes ### Major Features and Improvements @@ -589,9 +603,9 @@ Congli Gao, Weifeng Huang, Zhenzhong Kou, Hongzhang Li, Longfei Li, Yongxiong Li Contributions of any kind are welcome! -# MindInsight 0.6.0-beta +# MindSpore Insight 0.6.0-beta -## MindInsight 0.6.0 Release Notes +## MindSpore Insight 0.6.0 Release Notes ### Major Features and Improvements @@ -614,9 +628,9 @@ Congli Gao, Weifeng Huang, Zhenzhong Kou, Hongzhang Li, Longfei Li, Yongxiong Li Contributions of any kind are welcome! -# MindInsight 0.5.0-beta +# MindSpore Insight 0.5.0-beta -## MindInsight 0.5.0 Release Notes +## MindSpore Insight 0.5.0 Release Notes ### Major Features and Improvements @@ -648,15 +662,15 @@ Chao Chen, Congli Gao, Ye Huang, Weifeng Huang, Zhenzhong Kou, Hongzhang Li, Lon Contributions of any kind are welcome! -# MindInsight 0.3.0-alpha +# MindSpore Insight 0.3.0-alpha -## MindInsight 0.3.0 Release Notes +## MindSpore Insight 0.3.0 Release Notes ### Major Features and Improvements - Profiling - Provide easy to use apis for profiling start/stop and profiling data analyse (on Ascend only). - - Provide operators performance display and analysis on MindInsight UI. + - Provide operators performance display and analysis on MindSpore Insight UI. - Large scale network computation graph visualization. - Optimize summary record implementation and improve its performance. - Improve lineage usability @@ -682,15 +696,15 @@ Chao Chen, Congli Gao, Ye Huang, Weifeng Huang, Zhenzhong Kou, Hongzhang Li, Lon Contributions of any kind are welcome! -# MindInsight 0.2.0-alpha +# MindSpore Insight 0.2.0-alpha -## MindInsight 0.2.0 Release Notes +## MindSpore Insight 0.2.0 Release Notes ### Major Features and Improvements - Parameter distribution graph (Histogram). - Now you can use [`HistogramSummary`](https://www.mindspore.cn/docs/en/master/api_python/mindspore.ops.html#mindspore.ops.HistogramSummary) and MindInsight to record and visualize distribution info of tensors. See our [tutorial](https://www.mindspore.cn/mindinsight/docs/en/master/index.html). + Now you can use [`HistogramSummary`](https://www.mindspore.cn/docs/en/master/api_python/mindspore.ops.html#mindspore.ops.HistogramSummary) and MindSpore Insight to record and visualize distribution info of tensors. See our [tutorial](https://www.mindspore.cn/mindinsight/docs/en/master/index.html). - Lineage support Custom information - GPU support @@ -712,9 +726,9 @@ Ye Huang, Weifeng Huang, Zhenzhong Kou, Pengting Luo, Hongzhang Li, Yongxiong Li Contributions of any kind are welcome! -# MindInsight 0.1.0-alpha +# MindSpore Insight 0.1.0-alpha -## MindInsight 0.1.0 Release Notes +## MindSpore Insight 0.1.0 Release Notes - Training process observation - Provides and displays training process information, including computational graphs and training process indicators. diff --git a/RELEASE_CN.md b/RELEASE_CN.md index ea99efee..7193c80b 100644 --- a/RELEASE_CN.md +++ b/RELEASE_CN.md @@ -2,7 +2,21 @@ [View English](./RELEASE.md) -## MindInsight 2.3.0-rc1 Release Notes +## MindSpore Insight 2.3.0-rc2 Release Notes + +### Bug Fixes + +- [I9JTLU] 修复summary timeline下载页面报"system error"的错误。 + +### 贡献者 + +感谢以下人员做出的贡献: + +Ning Ma, Jiaxing Zhu, Jiarong Ji, Yanming Miao, Nan Wang, XiaoXian Jin, Qingxiang Zang, DaWei Fan, XinYu Shi, KaiDa Qiu, Wei Zhang, XianQi Zhou, Chen Mao, XiHan Peng. + +欢迎以任何形式对项目提供贡献! + +## MindSpore Insight 2.3.0-rc1 Release Notes ### 主要特性和增强 @@ -27,7 +41,7 @@ Ning Ma, Jiaxing Zhu, Jiarong Ji, Yanming Miao, Nan Wang, XiaoXian Jin, Qingxian 欢迎以任何形式对项目提供贡献! -## MindSpore MindInsight 2.2.0 Release Notes +## MindSpore Insight 2.2.0 Release Notes ### 主要特性和增强 @@ -113,7 +127,7 @@ Ning Ma, Chuting Liu, Jiaxing Zhu, Qingxiang Zang, Yaomin Mao. 欢迎以任何形式对项目提供贡献! -## MindInsight 2.0.0-alpha Release Notes +## MindSpore Insight 2.0.0-alpha Release Notes ### 主要特性和增强 @@ -138,7 +152,7 @@ Ning Ma, Chuting Liu, Jiaxing Zhu, Qingxiang Zang, Yaomin Mao. 欢迎以任何形式对项目提供贡献! -## MindInsight 1.9.0 Release Notes +## MindSpore Insight 1.9.0 Release Notes ### 主要特性和增强 @@ -165,7 +179,7 @@ Special thanks to Zhongwei Wang, Rongchen Zhu, Jiaying Lu, Zhiyong Wang, Yating 欢迎以任何形式对项目提供贡献! -## MindInsight 1.8.0 Release Notes +## MindSpore Insight 1.8.0 Release Notes ### 主要特性和增强 @@ -194,7 +208,7 @@ Special thanks to Zhiyong Wang, Zhongwei Wang, Rusheng Pan, Yating Wei, Luoxuan 欢迎以任何形式对项目提供贡献! -## MindInsight 1.7.0 Release Notes +## MindSpore Insight 1.7.0 Release Notes ### 主要特性及改进 -- Gitee From e560ca38c1a4b98bc34a5314e61adeeb14db9015 Mon Sep 17 00:00:00 2001 From: z30020733 Date: Wed, 12 Jun 2024 16:39:21 +0800 Subject: [PATCH 15/19] =?UTF-8?q?Ascend=E5=9C=BA=E6=99=AF=E5=85=BC?= =?UTF-8?q?=E5=AE=B9=E5=BC=82=E6=9E=84=E5=9C=BA=E6=99=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mindinsight/backend/profiler/profile_api.py | 18 ++++++------------ .../single/profiling-dashboard.vue | 9 ++------- .../performance/performance-dashboard.vue | 4 ---- .../performance/performance-dashboard.vue | 7 ++----- 4 files changed, 10 insertions(+), 28 deletions(-) diff --git a/mindinsight/backend/profiler/profile_api.py b/mindinsight/backend/profiler/profile_api.py index 5a71758a..199f8a1b 100644 --- a/mindinsight/backend/profiler/profile_api.py +++ b/mindinsight/backend/profiler/profile_api.py @@ -173,12 +173,6 @@ def get_training_trace_graph(): }}) graph_info['summary'] = analyser.summary graph_info['point_info'] = analyser.point_info(graph_type) - graph_info['is_heterogeneous'] = False - - # In heterogeneous training scene, do not display step trace data. - cpu_op_type_file_name = f"cpu_op_type_info_{device_id}.csv" - if cpu_op_type_file_name in os.listdir(profiler_dir_abs): - graph_info = {'is_heterogeneous': True} return jsonify(graph_info) @@ -844,12 +838,12 @@ def get_cluster_step_trace_info(): device_id = condition.get("device_id", "0") to_int(device_id, 'device_id') - # In heterogeneous training scene, do not display cluster step trace data. - cpu_op_type_file_name_prefix = "cpu_op_type_info_" - for item in os.listdir(profiler_dir_abs): - if cpu_op_type_file_name_prefix in item: - step_trace_info = {'is_heterogeneous': True} - return jsonify(step_trace_info) + profiler_info_file = os.path.join(profiler_dir_abs, f'profiler_info_{device_id}.json') + if os.path.exists(profiler_info_file): + with open(profiler_info_file, 'r', encoding='utf-8') as file: + profiler_info = json.loads(file.read()) + if profiler_info.get("is_heterogeneous", False): + return jsonify({'is_heterogeneous': True}) analyser = AnalyserFactory.instance().get_analyser( 'cluster_step_trace', profiler_dir_abs, device_id diff --git a/mindinsight/ui/src/views/profiling-gpu/single/profiling-dashboard.vue b/mindinsight/ui/src/views/profiling-gpu/single/profiling-dashboard.vue index 356cf169..600ae84b 100644 --- a/mindinsight/ui/src/views/profiling-gpu/single/profiling-dashboard.vue +++ b/mindinsight/ui/src/views/profiling-gpu/single/profiling-dashboard.vue @@ -714,7 +714,6 @@ export default { this.queryTimelineInfo(); this.initPieChart(); this.getProccessSummary(); - // this.queryTrainingTrace(); if(this.isDynamic){ this.$nextTick(() => { this.initDynamicShape(); @@ -1077,7 +1076,6 @@ export default { } } else { this.svg.totalHeight = 0; - // this.svg.noData = true; this.svg.data = []; this.svg.initOver = true; this.removeTrace(); @@ -1500,7 +1498,7 @@ export default { step_filter: ["1"], }, }; - let details = [];// + let details = []; let series = []; let legend = []; let ssChart = []; @@ -1591,7 +1589,7 @@ export default { dispaly_op_type: this.topOperatorValueGPU, }, }; - let details = [];// + let details = []; let series = []; let legend = []; RequestService.queryDynamicShapeGPU(params).then( @@ -1635,9 +1633,7 @@ export default { details.push(content) } ); - // this.getFormatterDetailData(row,isSort); } - // this.operatorOptions.xAxis.data = series[0].data.map((_v, i) => i + 1); this.operatorOptions.series = series; this.operatorOptions.legend.data = legend; @@ -1650,7 +1646,6 @@ export default { this.operatorOptions.legend.tooltip.formatter = (params) =>{ return this.formatLegendTip(params); }; - // search this.$nextTick(() => { this.chartObj.setOption(this.operatorOptions, true); this.drawChart(); diff --git a/mindinsight/ui/src/views/profiling/cluster/performance/performance-dashboard.vue b/mindinsight/ui/src/views/profiling/cluster/performance/performance-dashboard.vue index dc527db6..b6c8e732 100644 --- a/mindinsight/ui/src/views/profiling/cluster/performance/performance-dashboard.vue +++ b/mindinsight/ui/src/views/profiling/cluster/performance/performance-dashboard.vue @@ -150,10 +150,6 @@ export default { }; RequestService.getClusterInfo(params) .then((res) => { - if (typeof res.data === 'object' && res.data.is_heterogeneous) { - this.performanceState = HETEROGENEOUS; - return; - } if (res?.data?.info?.length > 0) { let chartData = []; const parallelMode = res.data['parallel-mode']; diff --git a/mindinsight/ui/src/views/profiling/single/performance/performance-dashboard.vue b/mindinsight/ui/src/views/profiling/single/performance/performance-dashboard.vue index de791888..95736206 100644 --- a/mindinsight/ui/src/views/profiling/single/performance/performance-dashboard.vue +++ b/mindinsight/ui/src/views/profiling/single/performance/performance-dashboard.vue @@ -73,7 +73,7 @@ limitations under the License. alt="" />

{{$t("public.dataLoading")}}

-

{{isHeterogeneous?$t("profiling.isHeterogeneous"):$t("public.noStepStraceData")}}

+

{{$t("public.noStepStraceData")}}

{{$t("public.dataLoading")}}

-

{{isHeterogeneous?$t("profiling.isHeterogeneous"):$t("public.noData")}}

+

{{$t("public.noData")}}

{ this.svg.initOver = true; - this.isHeterogeneous = res.data.is_heterogeneous; if (res && res.data && res.data.training_trace_graph && res.data.training_trace_graph.length) { this.svg.noData = false; this.removeTrace(); @@ -1112,7 +1110,6 @@ export default { this.totalSteps = '--'; this.totalTime = '--'; this.tailPercent = '--'; - this.isHeterogeneous = false; }, ); }, -- Gitee From 1b06471e00f53c023fbc6c72f7735d6f176087cb Mon Sep 17 00:00:00 2001 From: z30020733 Date: Tue, 18 Jun 2024 11:43:18 +0800 Subject: [PATCH 16/19] pandas <= 1.3.5 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 0a955d13..75206772 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,7 +13,7 @@ pyyaml >= 5.3.1 scipy >= 1.5.2 scikit-learn >= 0.23.1 Werkzeug > 2.1.0 -pandas >= 1.0.4 , <= 1.2.5 +pandas >= 1.0.4 , <= 1.3.5 treelib >= 1.6.1 grpcio >= 1.39.0 XlsxWriter >= 1.3.2 \ No newline at end of file -- Gitee From 9cab0967285212f591648dd4fe5084976ad1a7e1 Mon Sep 17 00:00:00 2001 From: z30020733 Date: Wed, 19 Jun 2024 16:53:29 +0800 Subject: [PATCH 17/19] Modify requirements for adapting python3.10.x. --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 75206772..034122e2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ itsdangerous >= 1.1.0 Jinja2 >= 2.10.1 MarkupSafe >= 1.1.1 marshmallow >= 3.10.0 -numpy >= 1.17.0 +numpy >= 1.17.0, <= 1.26.4 pillow >= 6.2.0 protobuf >= 3.13.0 , <= 3.20.1 psutil >= 5.7.0 @@ -13,7 +13,7 @@ pyyaml >= 5.3.1 scipy >= 1.5.2 scikit-learn >= 0.23.1 Werkzeug > 2.1.0 -pandas >= 1.0.4 , <= 1.3.5 +pandas >= 1.0.4 , <= 1.3.3 treelib >= 1.6.1 grpcio >= 1.39.0 XlsxWriter >= 1.3.2 \ No newline at end of file -- Gitee From da39797a3411fee71accfd34e5f0807f2ba42aac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=87=A7=E5=BA=86=E9=A6=99?= Date: Wed, 19 Jun 2024 17:51:59 +0800 Subject: [PATCH 18/19] change r2.3.0version --- mindinsight/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindinsight/_version.py b/mindinsight/_version.py index 77cbeeff..861fce7a 100644 --- a/mindinsight/_version.py +++ b/mindinsight/_version.py @@ -14,4 +14,4 @@ # ============================================================================ """Mindinsight version module.""" -VERSION = '2.3.0rc2' +VERSION = '2.3.0' -- Gitee From 5fdb4b2d1c466677b6010eccdb67cac8376cb459 Mon Sep 17 00:00:00 2001 From: z30020733 Date: Fri, 28 Jun 2024 14:51:43 +0800 Subject: [PATCH 19/19] Add the RELEASE NOTE of version 2.3.0. --- RELEASE.md | 16 ++++++++++++++++ RELEASE_CN.md | 16 ++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/RELEASE.md b/RELEASE.md index f0ad51a7..f5fe023f 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -2,6 +2,22 @@ [查看中文](./RELEASE_CN.md) +## MindSpore Insight 2.3.0 Release Notes + +### Major Features and Improvements + +#### Profiler + +- [STABLE] In PyNative mode, Timeline intergrates host profiling information, including task time and user-side call stack information. + +### Contributors + +Thanks goes to these wonderful people: + +Ning Ma, Jiaxing Zhu, Jiarong Ji, Yanming Miao, Nan Wang, XiaoXian Jin, Qingxiang Zang, DaWei Fan, XinYu Shi, KaiDa Qiu, Wei Zhang, XianQi Zhou, Chen Mao, XiHan Peng. + +Contributions of any kind are welcome! + ## MindSpore Insight 2.3.0-rc2 Release Notes ### Bug Fixes diff --git a/RELEASE_CN.md b/RELEASE_CN.md index 7193c80b..ab08c519 100644 --- a/RELEASE_CN.md +++ b/RELEASE_CN.md @@ -2,6 +2,22 @@ [View English](./RELEASE.md) +## MindSpore Insight 2.3.0 Release Notes + +### 主要特性和增强 + +#### Profiler + +- [STABLE] PyNative模式下Timeline整合host profiling信息,包括任务耗时、用户侧调用栈信息。 + +### 贡献者 + +感谢以下人员做出的贡献: + +Ning Ma, Jiaxing Zhu, Jiarong Ji, Yanming Miao, Nan Wang, XiaoXian Jin, Qingxiang Zang, DaWei Fan, XinYu Shi, KaiDa Qiu, Wei Zhang, XianQi Zhou, Chen Mao, XiHan Peng. + +欢迎以任何形式对项目提供贡献! + ## MindSpore Insight 2.3.0-rc2 Release Notes ### Bug Fixes -- Gitee