From dc6030c81ed901ec02fa219732b1f1d93a0fd064 Mon Sep 17 00:00:00 2001 From: Sunboquan Date: Mon, 9 Oct 2023 11:37:10 +0800 Subject: [PATCH] add input method & add README pics --- profiler/cluster_analyse/cluster_analysis.py | 12 ++++++++---- .../pytorch_data_preprocessor.py | 11 ++++++----- profiler/cluster_analyse/common_func/file_manager.py | 6 +++--- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/profiler/cluster_analyse/cluster_analysis.py b/profiler/cluster_analyse/cluster_analysis.py index 861bdeac6..67f3256ee 100644 --- a/profiler/cluster_analyse/cluster_analysis.py +++ b/profiler/cluster_analyse/cluster_analysis.py @@ -33,11 +33,15 @@ class Interface: if not data_map: print("Can not get rank info or profiling data.") return - communication_group, collective_group_dict, communication_ops = \ - CommunicationGroupGenerator(self.collection_path, data_map).generate() - if not collective_group_dict: + try: + communication_group, collective_group_dict, communication_ops = \ + CommunicationGroupGenerator(self.collection_path, data_map).generate() + except RuntimeError: print("Can not get communication info from ranks") - return + finally: + communication_group = {} + communication_ops = [] + collective_group_dict = {} params = { Constant.COLLECTION_PATH: self.collection_path, Constant.DATA_MAP: data_map, diff --git a/profiler/cluster_analyse/cluster_data_preprocess/pytorch_data_preprocessor.py b/profiler/cluster_analyse/cluster_data_preprocess/pytorch_data_preprocessor.py index 2870048a5..c10bd5142 100644 --- a/profiler/cluster_analyse/cluster_data_preprocess/pytorch_data_preprocessor.py +++ b/profiler/cluster_analyse/cluster_data_preprocess/pytorch_data_preprocessor.py @@ -27,10 +27,11 @@ class PytorchDataPreprocessor: def get_data_map(self) -> dict: FileManager.check_file_or_directory_path(self.path, isdir=True) - - collector_dirs = [dir_name for dir_name in os.listdir(self.path) if os.path.isdir(os.path.join(self.path, dir_name))] - ascend_pt_dirs = [dir_name for dir_name in collector_dirs if dir_name.endswith("ascend_pt")] - + ascend_pt_dirs = [] + for root, dirs, files in os.walk(self.path): + for dir_name in dirs: + if dir_name.endswith("ascend_pt"): + ascend_pt_dirs.append(os.path.join(root, dir_name)) rank_id_map = defaultdict(list) for dir_name in ascend_pt_dirs: rank_id = self.get_rank_id(dir_name) @@ -46,7 +47,7 @@ class PytorchDataPreprocessor: return ret_dict def get_rank_id(self, dir_name: str) -> int: - files = os.listdir(os.path.join(self.path, dir_name)) + files = os.listdir(dir_name) for file_name in files: if file_name.startswith(self.PROFILER_INFO_HEAD) and file_name.endswith(self.PROFILER_INFO_EXTENSION): return int(file_name[len(self.PROFILER_INFO_HEAD): -1 * len(self.PROFILER_INFO_EXTENSION)]) diff --git a/profiler/cluster_analyse/common_func/file_manager.py b/profiler/cluster_analyse/common_func/file_manager.py index 3ac6b843a..8f20f35fb 100644 --- a/profiler/cluster_analyse/common_func/file_manager.py +++ b/profiler/cluster_analyse/common_func/file_manager.py @@ -33,6 +33,9 @@ class FileManager: Exception Description: when invalid data throw exception """ + if not os.path.exists(path): + raise RuntimeError('{} is not exist.'.format(path)) + if not os.access(path, os.R_OK): raise RuntimeError( 'The path {} does not have permission to read. Please check the path permission'.format(path)) @@ -46,9 +49,6 @@ class FileManager: raise RuntimeError(msg) if isdir: - if not os.path.exists(path): - raise RuntimeError('The path {} is not exist.'.format(path)) - if not os.path.isdir(path): raise RuntimeError('The path {} is not a directory.'.format(path)) -- Gitee