From 2ac1bf563846e71a720caeae58111b4d76a3788a Mon Sep 17 00:00:00 2001
From: fanxiaotong <fanxiaotong32@163.com>
Date: Wed, 25 Oct 2023 11:31:17 +0800
Subject: [PATCH 1/2] fix bug

---
 .../cluster_prof_Info_analysis.py             | 30 ++++++++++++++++---
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/profiler/cluster_analyse/cluster_kernels_analysis/cluster_prof_Info_analysis.py b/profiler/cluster_analyse/cluster_kernels_analysis/cluster_prof_Info_analysis.py
index 88ff0dd3f..e52836044 100644
--- a/profiler/cluster_analyse/cluster_kernels_analysis/cluster_prof_Info_analysis.py
+++ b/profiler/cluster_analyse/cluster_kernels_analysis/cluster_prof_Info_analysis.py
@@ -42,6 +42,10 @@ class FormDataProcessor:
         for f in self.files:
             if "mindstudio_profiler_output" in f:
                 continue
+            # 判断csv文件大小
+            if not check_file_readable(f):
+                continue
+
             # 读取CSV文件
             df = pd.read_csv(f)
             # 保留需要的列
@@ -51,12 +55,18 @@ class FormDataProcessor:
                 print(f"{f}文件没有所需的列，请确认profiling数据的正确性:\n,以下列可能不存在{columns_to_keep}\n")
                 continue
             # 从文件名提取设备ID
+            try:
+                df['device_id'] = self.getDeviceId(f)
+            except:
+                print(f"文件 \"{f}\" 的路径或者是文件夹名没有按照要求，请确保存在[device_]这一级文件夹\n")
+                continue
             # 添加新列 "device_id"
-            df['device_id'] = self.getDeviceId(f)
-            df['node_id'] = self.getNodeId(f)
-
+            try:
+                df['node_id'] = self.getNodeId(f)
+            except:
+                print(f"文件 \"{f}\" 的路径或者是文件夹名没有按照要求，请确保存在[node_*]这一级文件夹\n")
+                continue
             # 将数据添加到最终的数据框中
-            
             all_data = all_data.append(df, ignore_index=True)
         return all_data
 
@@ -78,6 +88,14 @@ class FormDataProcessor:
     def getRankNum(self):
         return len(self.files)
 
+    def check_file_readable(self, file_path):
+        if not os.access(path, os.R_OK):
+            print(f"the path \"{file_path}\" does not have permission to read")
+            return False
+        if os.path.getsize(path) > MAX_READFILE_BYTES:
+            print(f"the path \"{file_path}\" is to large, Please check the path")
+            return False
+        return True
 
 # 表驱动，获取不同芯片类型不同交付件的所需的列
 class ViewInfoManager:
@@ -163,6 +181,8 @@ class TimeToCsvAnalyzer(OpSummaryAnalyzerBase):
         for column in self.columns_to_view:
             view_data[column + '_range'] = view_data[column + '_max'] - view_data[column + '_min']
         view_data.to_csv(self.result_dir + "/cluster_duration_time_analysis.csv", index=False)
+        # 该文件权限设置为只读权限，不允许修改
+        os.chmod(self.result_dir + "/cluster_duration_time_analysis.csv", stat.S_IROTH)
         return view_data
 
 
@@ -211,6 +231,8 @@ class StatisticalInfoToHtmlAnalyzer(OpSummaryAnalyzerBase):
                           width=int(rank_num * 100 * col_num),
                           title_text="Op Performance Comparison")
         plot(fig, filename=self.result_dir + "/" + column + "_Info.html")
+        # 该文件权限设置为只读权限，不允许修改
+        os.chmod(self.result_dir + "/" + column + "_Info.html", stat.S_IROTH)
 
     def getCalNum(self, rank_num):
         # 计算每行应该画多少个子图
-- 
Gitee


From 1635dcf026ae04c02cbe12bd3460887282d8586e Mon Sep 17 00:00:00 2001
From: fanxiaotong <fanxiaotong32@163.com>
Date: Wed, 25 Oct 2023 15:15:36 +0800
Subject: [PATCH 2/2] fix bug

---
 .../cluster_prof_Info_analysis.py             | 37 +++++++++++--------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/profiler/cluster_analyse/cluster_kernels_analysis/cluster_prof_Info_analysis.py b/profiler/cluster_analyse/cluster_kernels_analysis/cluster_prof_Info_analysis.py
index e52836044..830e2ab59 100644
--- a/profiler/cluster_analyse/cluster_kernels_analysis/cluster_prof_Info_analysis.py
+++ b/profiler/cluster_analyse/cluster_kernels_analysis/cluster_prof_Info_analysis.py
@@ -22,9 +22,12 @@ import plotly.graph_objects as go
 from plotly.subplots import make_subplots
 from plotly.offline import plot
 import os
+import stat
 
 import warnings
 
+MAX_READ_FILE_BYTES = 64 * 1024 * 1024
+
 
 class FormDataProcessor:
     def __init__(self, path, form_name):
@@ -43,7 +46,7 @@ class FormDataProcessor:
             if "mindstudio_profiler_output" in f:
                 continue
             # 判断csv文件大小
-            if not check_file_readable(f):
+            if not self.check_file_readable(f):
                 continue
 
             # 读取CSV文件
@@ -58,16 +61,16 @@ class FormDataProcessor:
             try:
                 df['device_id'] = self.getDeviceId(f)
             except:
-                print(f"文件 \"{f}\" 的路径或者是文件夹名没有按照要求，请确保存在[device_]这一级文件夹\n")
+                print(f"文件 \"{f}\" 的路径或者是文件夹名没有按照要求，请确保存在[device_]这一级文件夹,具体操作指导见readme\n")
                 continue
             # 添加新列 "device_id"
             try:
                 df['node_id'] = self.getNodeId(f)
             except:
-                print(f"文件 \"{f}\" 的路径或者是文件夹名没有按照要求，请确保存在[node_*]这一级文件夹\n")
+                print(f"文件 \"{f}\" 的路径或者是文件夹名没有按照要求，请确保存在[node*]这一级文件夹,具体操作指导见readme\n")
                 continue
             # 将数据添加到最终的数据框中
-            all_data = all_data.append(df, ignore_index=True)
+            all_data = all_data._append(df, ignore_index=True)
         return all_data
 
     def getChipType(self):
@@ -89,10 +92,10 @@ class FormDataProcessor:
         return len(self.files)
 
     def check_file_readable(self, file_path):
-        if not os.access(path, os.R_OK):
+        if not os.access(file_path, os.R_OK):
             print(f"the path \"{file_path}\" does not have permission to read")
             return False
-        if os.path.getsize(path) > MAX_READFILE_BYTES:
+        if os.path.getsize(file_path) > MAX_READ_FILE_BYTES:
             print(f"the path \"{file_path}\" is to large, Please check the path")
             return False
         return True
@@ -252,6 +255,10 @@ class DeliverableGenerator:
 
     def run(self):
         summary_data = self.formProcess.readSummaryData(self.columns_to_keep)
+        # 判断summarydata 数据是否为空，如果是空， 说明所有csv读取数据都失败了
+        if summary_data.empty:
+            print("没有符合要求的csv表格数据，请排查您的PROFILING数据")
+            return
         rank_num = self.formProcess.getRankNum()
         for analyzer in self.analyzers:
             analyzer.GenerateDeliverable(summary_data, rank_num)
@@ -277,15 +284,15 @@ class DeliverableGenerator:
 
 
 def main():
-        # 解析命令行参数
-        parser = argparse.ArgumentParser()
-        parser.add_argument("--dir", "-d", default=None, help="root dir of PROF_* data")
-        parser.add_argument("--top_n", "-n", default=10, help="how many operators to show", type=int)
-        parser.add_argument("--type", "-t", default='html', help="compare ratio or aicore-time", type=str)
-        args = parser.parse_args()
-
-        deviverable_gen = DeliverableGenerator(args)
-        deviverable_gen.run()
+    # 解析命令行参数
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--dir", "-d", default=None, help="root dir of PROF_* data")
+    parser.add_argument("--top_n", "-n", default=10, help="how many operators to show", type=int)
+    parser.add_argument("--type", "-t", default='html', help="compare ratio or aicore-time", type=str)
+    args = parser.parse_args()
+
+    deviverable_gen = DeliverableGenerator(args)
+    deviverable_gen.run()
 
 if __name__ == "__main__":
     main()
-- 
Gitee