From da2bdac7020ecb534b1db131c7a1c135dc1142dd Mon Sep 17 00:00:00 2001
From: xli <1184313039@qq.com>
Date: Tue, 27 Dec 2022 13:10:17 +0800
Subject: [PATCH] update mask rcnn 1.8

---
 .../Faster_Mask_RCNN_for_PyTorch/README.md    |  64 ++++---
 .../detectron2/layers/mask_ops.py             |   6 +-
 .../detectron2/utils/memory.py                |   5 +-
 .../test/train_full_8p.sh                     |   4 +-
 .../test/train_mask_rcnn_performance_1p.sh    | 146 +++++++++++++++
 .../test/train_mask_rcnn_performance_8p.sh    | 167 ++++++++++++++++++
 6 files changed, 363 insertions(+), 29 deletions(-)
 create mode 100644 PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/test/train_mask_rcnn_performance_1p.sh
 create mode 100644 PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/test/train_mask_rcnn_performance_8p.sh

diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/README.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/README.md
index a51b47c93f..0d1a33d801 100644
--- a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/README.md
+++ b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/README.md
@@ -47,9 +47,9 @@ FasterRCNN是一个业界领先的目标检测网络，他继承了FastRCNN的
 
   | 配套       | 版本                                                         |
   | ---------- | ------------------------------------------------------------ |
-  | 固件与驱动 | [5.1.RC2](https://www.hiascend.com/hardware/firmware-drivers?tag=commercial) |
-  | CANN       | [5.1.RC2](https://www.hiascend.com/software/cann/commercial?version=5.1.RC2) |
-  | PyTorch    | [1.8.1](https://gitee.com/ascend/pytorch/tree/master/)|
+  | 固件与驱动 | [6.0.0.alpha002](https://www.hiascend.com/zh/hardware/firmware-drivers?tag=community&pId=5&mId=6&cann=23e6d0e47a1911edade3fa163edea0c5&ver=1.0.13.alpha) |
+  | CANN       | [6.0.0.alpha002](https://www.hiascend.com/software/cann/communit) |
+  | PyTorch    | [1.8.1](https://gitee.com/ascend/pytorch/tree/master/)       |
 
 - 环境准备指导。
 
@@ -115,24 +115,33 @@ python3.7 -m pip install -e Faster_Mask_RCNN_for_PyTorch
 
    该模型支持单机单卡训练和单机8卡训练。    
    mask_rcnn启动训练    
-   - 单机单卡训练
-
-        启动单卡训练。
-
+   
+   - 单机单卡性能
+   
+        启动1卡性能。
+   
         ```
-        bash ./test/train_full_1p.sh --data_path=数据集路径  
+        bash ./test/train_mask_rcnn_performance_1p.sh --data_path=数据集路径  
         ```
-
+   
    - 单机8卡训练
-
+   
         启动8卡训练。
-
+   
         ```
         bash ./test/train_full_8p.sh --data_path=数据集路径  
         ```
-
-   - 多机多卡性能数据获取流程
    
+   - 单机8卡性能
+   
+     启动8卡性能。
+     
+     ```
+     bash ./test/train_mask_rcnn_performance_8p.sh --data_path=数据集路径  
+     ```
+     
+   - 多机多卡性能数据获取流程
+
          ```shell
          	1. 安装环境
          	2. 开始训练，每个机器所请按下面提示进行配置
@@ -141,21 +150,21 @@ python3.7 -m pip install -e Faster_Mask_RCNN_for_PyTorch
 
     faster_rcnn启动训练     
    - 单机单卡训练
-
+   
      启动单卡训练。
-
+   
      ```
      bash ./test/train_faster_rcnn_full_1p.sh --data_path=数据集路径  
      ```
-
+   
    - 单机8卡训练
-
+   
      启动8卡训练。
-
+   
      ```
      bash ./test/train_faster_rcnn_full_8p.sh --data_path=数据集路径 
      ```
-
+   
    - 多机多卡性能数据获取流程
    
      ```shell
@@ -163,11 +172,11 @@ python3.7 -m pip install -e Faster_Mask_RCNN_for_PyTorch
      2. 开始训练，每个机器所请按下面提示进行配置
      bash ./test/train_faster_rcnn_performance_multinodes.sh  --data_path=数据集路径 --batch_size=单卡batch_size*所有卡数 --nnodes=机器总数量 --node_rank=当前机器rank(0,1,2..) --local_addr=当前机器IP(需要和master_addr处于同一网段) --master_addr=主节点IP
      ```
-
+   
    --data\_path参数填写数据集路径。
-
+   
    模型训练脚本参数说明如下。
-
+   
    ```
    公共参数：
     AMP                                                                # 开启混合精度
@@ -187,7 +196,7 @@ python3.7 -m pip install -e Faster_Mask_RCNN_for_PyTorch
 
 # 训练结果展示
 
-**表 2**  训练结果展示表
+**表 2**  fast_rcnn训练结果展示表
 
 | NAME    | Acc@1 |  FPS | Epochs | AMP_Type |
 | ------- | ----- | ---: | ------ | -------: |
@@ -196,6 +205,15 @@ python3.7 -m pip install -e Faster_Mask_RCNN_for_PyTorch
 | 8p-NPU1.5 | 26.773 | 76.5 | -    |        O2 |
 | 8p-NPU1.8  | 27 | 86.3 | -    |       O2 |
 
+**表3** mask rcnn 训练结果展示表
+
+| NAME      | Acc@1 |     FPS | Epochs | AMP_Type |
+| --------- | ----- | ------: | ------ | -------: |
+| 1p-NPU1.5 | -     |   6.531 | -      |       O2 |
+| 1p-NPU1.8 | -     |   6.538 | -      |       O2 |
+| 8p-NPU1.5 | 26.6  | 31.8831 | -      |       O2 |
+| 8p-NPU1.8 | 27.1  | 32.9152 | -      |       O2 |
+
 # 版本说明
 
 ## 变更
diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/mask_ops.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/mask_ops.py
index f6ad215987..dfd1212937 100755
--- a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/mask_ops.py
+++ b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/mask_ops.py
@@ -71,9 +71,9 @@ def _do_paste_mask(masks, boxes, img_h, img_w, skip_empty=True):
     gx = img_x[:, None, :].expand(N, img_y.size(1), img_x.size(1))
     gy = img_y[:, :, None].expand(N, img_y.size(1), img_x.size(1))
     grid = torch.stack([gx, gy], dim=3)
-
-    img_masks = F.grid_sample(masks.to(dtype=torch.float32), grid, align_corners=False)
-
+    
+    img_masks = F.grid_sample(masks.to(dtype=torch.float32), grid, padding_mode='border', align_corners=False)
+      
     if skip_empty:
         return img_masks[:, 0], (slice(y0_int, y1_int), slice(x0_int, x1_int))
     else:
diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/memory.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/memory.py
index ca782a292e..0838c00a7a 100755
--- a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/memory.py
+++ b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/memory.py
@@ -79,6 +79,7 @@ def retry_if_cuda_oom(func):
 
     @wraps(func)
     def wrapped(*args, **kwargs):
+        logger = logging.getLogger(__name__)
         with _ignore_torch_cuda_oom():
             return func(*args, **kwargs)
 
@@ -86,9 +87,9 @@ def retry_if_cuda_oom(func):
         torch.cuda.empty_cache()
         with _ignore_torch_cuda_oom():
             return func(*args, **kwargs)
-
+     
         # Try on CPU. This slows down the code significantly, therefore print a notice.
-        logger = logging.getLogger(__name__)
+       
         logger.info("Attempting to copy inputs of {} to CPU due to CUDA OOM".format(str(func)))
         new_args = (maybe_to_cpu(x) for x in args)
         new_kwargs = {k: maybe_to_cpu(v) for k, v in kwargs.items()}
diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/test/train_full_8p.sh b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/test/train_full_8p.sh
index 6cf024f8ab..2394b4c75f 100644
--- a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/test/train_full_8p.sh
+++ b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/test/train_full_8p.sh
@@ -12,6 +12,8 @@ else
 fi
 
 # 指定训练所使用的npu device卡id
+Network="Mask_RCNN_for_PyTorch"
+export RANK_SIZE=8
 device_id=0
 batch_size=64
 #参数校验，不需要修改
@@ -70,7 +72,7 @@ nohup python3.7 tools/train_net.py \
         OPT_LEVEL O2 \
         LOSS_SCALE_VALUE 64 \
         SOLVER.IMS_PER_BATCH 64 \
-        SOLVER.MAX_ITER 10250 \
+        SOLVER.MAX_ITER 12250 \
         SEED 1234 \
         MODEL.RPN.NMS_THRESH 0.8 \
         MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO 2 \
diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/test/train_mask_rcnn_performance_1p.sh b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/test/train_mask_rcnn_performance_1p.sh
new file mode 100644
index 0000000000..f781e365fa
--- /dev/null
+++ b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/test/train_mask_rcnn_performance_1p.sh
@@ -0,0 +1,146 @@
+#!/bin/bash
+
+###############指定训练脚本执行路径###############
+# cd到与test文件夹同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ];then
+    test_path_dir=${cur_path}
+    cd ..
+    cur_path=`pwd`
+else
+    test_path_dir=${cur_path}/test
+fi
+#集合通信参数,不需要修改
+export RANK_SIZE=1
+
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="Mask_RCNN_for_PyTorch"
+#训练batch_size
+batch_size=8
+#训练step
+train_steps=4000
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+# 指定训练所使用的npu device卡id
+device_id=0
+
+# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改
+if [ $ASCEND_DEVICE_ID ];then
+    echo "device id is ${ASCEND_DEVICE_ID}"
+elif [ ${device_id} ];then
+    export ASCEND_DEVICE_ID=${device_id}
+    echo "device id is ${ASCEND_DEVICE_ID}"
+else
+    "[Error] device id must be config"
+    exit 1
+fi
+#################创建日志输出目录，不需要修改#################
+if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
+    rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID}
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+else
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+fi
+
+
+#修改参数
+sed -i "s|\"coco_2017_train\": (\"coco/train2017\", \"coco/annotations/instances_train2017.json\")|\"coco_2017_train\": (\"$data_path/coco/train2017\", \"$data_path/coco/annotations/instances_train2017.json\")|g" $cur_path/detectron2/data/datasets/builtin.py
+sed -i "s|\"coco_2017_val\": (\"coco/val2017\", \"coco/annotations/instances_val2017.json\")|\"coco_2017_val\": (\"$data_path/coco/val2017\", \"$data_path/coco/annotations/instances_val2017.json\")|g" $cur_path/detectron2/data/datasets/builtin.py
+sed -i "s|WEIGHTS: \"detectron2://ImageNetPretrained/MSRA/R-101.pkl\"|WEIGHTS: \"$data_path/R-101.pkl\"|g" $cur_path/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml
+wait
+
+#非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source  ${test_path_dir}/env_npu.sh
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+nohup python3.7 tools/train_net.py \
+        --config-file  configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml \
+        AMP 1 \
+        OPT_LEVEL O2 \
+        LOSS_SCALE_VALUE 64 \
+        MODEL.DEVICE npu:$ASCEND_DEVICE_ID \
+        SOLVER.IMS_PER_BATCH $batch_size \
+        SOLVER.MAX_ITER $train_steps \
+        SEED 1234 \
+        MODEL.RPN.NMS_THRESH 0.8 \
+        MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO 2 \
+        MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO 2 \
+        DATALOADER.NUM_WORKERS 8 \
+        SOLVER.BASE_LR 0.0025 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+wait
+
+#修改参数
+sed -i "s|\"coco_2017_train\": (\"$data_path/coco/train2017\", \"$data_path/coco/annotations/instances_train2017.json\")|\"coco_2017_train\": (\"coco/train2017\", \"coco/annotations/instances_train2017.json\")|g" $cur_path/detectron2/data/datasets/builtin.py
+sed -i "s|\"coco_2017_val\": (\"$data_path/coco/val2017\", \"$data_path/coco/annotations/instances_val2017.json\")|\"coco_2017_val\": (\"coco/val2017\", \"coco/annotations/instances_val2017.json\")|g" $cur_path/detectron2/data/datasets/builtin.py
+sed -i "s|WEIGHTS: \"$data_path/R-101.pkl\"|WEIGHTS: \"detectron2://ImageNetPretrained/MSRA/R-101.pkl\"|g" $cur_path/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`grep FPS $test_path_dir/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $NF}'|awk '{sum+=$1} END {print  sum/NR}'`
+
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+train_accuracy=`grep "Average Precision" $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "=" '{print $NF}'|head -1`
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#稳定性精度看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+##获取性能数据
+#吞吐量，不需要修改
+ActualFPS=${FPS}
+#单迭代训练时长，不需要修改
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep total_loss $test_path_dir/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'total_loss: ' '{print $2}'|awk '{print $1}' > $test_path_dir/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $test_path_dir/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/test/train_mask_rcnn_performance_8p.sh b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/test/train_mask_rcnn_performance_8p.sh
new file mode 100644
index 0000000000..5b6e8a0a35
--- /dev/null
+++ b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/test/train_mask_rcnn_performance_8p.sh
@@ -0,0 +1,167 @@
+#!/bin/bash
+
+###############指定训练脚本执行路径###############
+# cd到与test文件夹同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ];then
+    test_path_dir=${cur_path}
+    cd ..
+    cur_path=`pwd`
+else
+    test_path_dir=${cur_path}/test
+fi
+
+#集合通信参数,不需要修改
+export RANK_SIZE=8
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="Mask_RCNN_for_PyTorch"
+#训练epoch
+train_epochs=1
+#训练batch_size
+batch_size=64
+#训练step
+train_steps=200
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/test/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/test/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/test/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+# 指定训练所使用的npu device卡id
+device_id=0
+
+# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改
+if [ $ASCEND_DEVICE_ID ];then
+    echo "device id is ${ASCEND_DEVICE_ID}"
+elif [ ${device_id} ];then
+    export ASCEND_DEVICE_ID=${device_id}
+    echo "device id is ${ASCEND_DEVICE_ID}"
+else
+    "[Error] device id must be config"
+    exit 1
+fi
+
+#################创建日志输出目录，不需要修改#################
+if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ckpt
+fi
+
+
+#修改参数
+sed -i "s|\"coco_2017_train\": (\"coco/train2017\", \"coco/annotations/instances_train2017.json\")|\"coco_2017_train\": (\"$data_path/coco/train2017\", \"$data_path/coco/annotations/instances_train2017.json\")|g" $cur_path/detectron2/data/datasets/builtin.py
+sed -i "s|\"coco_2017_val\": (\"coco/val2017\", \"coco/annotations/instances_val2017.json\")|\"coco_2017_val\": (\"$data_path/coco/val2017\", \"$data_path/coco/annotations/instances_val2017.json\")|g" $cur_path/detectron2/data/datasets/builtin.py
+sed -i "s|WEIGHTS: \"detectron2://ImageNetPretrained/MSRA/R-101.pkl\"|WEIGHTS: \"$data_path/R-101.pkl\"|g" $cur_path/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml
+wait
+
+cd $cur_path/
+#非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source  ${test_path_dir}/env_npu.sh
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+nohup python3.7 tools/train_net.py \
+        --config-file  configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml \
+        --device-ids 0 1 2 3 4 5 6 7 \
+        --num-gpus 8 \
+        AMP 1 \
+        OPT_LEVEL O2 \
+        LOSS_SCALE_VALUE 64 \
+        SOLVER.IMS_PER_BATCH $batch_size \
+        SOLVER.MAX_ITER $train_steps \
+        SEED 1234 \
+        MODEL.RPN.NMS_THRESH 0.8 \
+        MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO 2 \
+        MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO 2 \
+        DATALOADER.NUM_WORKERS 8 \
+        SOLVER.BASE_LR 0.02 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+wait
+#修改参数
+sed -i "s|\"coco_2017_train\": (\"$data_path/coco/train2017\", \"$data_path/coco/annotations/instances_train2017.json\")|\"coco_2017_train\": (\"coco/train2017\", \"coco/annotations/instances_train2017.json\")|g" $cur_path/detectron2/data/datasets/builtin.py
+sed -i "s|\"coco_2017_val\": (\"$data_path/coco/val2017\", \"$data_path/coco/annotations/instances_val2017.json\")|\"coco_2017_val\": (\"coco/val2017\", \"coco/annotations/instances_val2017.json\")|g" $cur_path/detectron2/data/datasets/builtin.py
+sed -i "s|WEIGHTS: \"$data_path/R-101.pkl\"|WEIGHTS: \"detectron2://ImageNetPretrained/MSRA/R-101.pkl\"|g" $cur_path/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml
+wait
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`grep FPS $test_path_dir/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $NF}'|awk '{sum+=$1} END {print  sum/NR}'`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+train_accuracy=`grep "Average Precision" $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "=" '{print $NF}'|head -1`
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#稳定性精度看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+
+##获取性能数据
+#吞吐量，不需要修改
+ActualFPS=${FPS}
+#单迭代训练时长，不需要修改
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep total_loss $test_path_dir/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'total_loss: ' '{print $2}'|awk '{print $1}' > $test_path_dir/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $test_path_dir/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log
-- 
Gitee