From f782a9f6d92df69409fdfbc3bf11a7ac6c55aaf7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=8C=83=E6=96=87=E7=84=98?= <fanwentao11@h-partners.com>
Date: Sat, 15 Mar 2025 15:02:34 +0800
Subject: [PATCH] =?UTF-8?q?=E5=8E=BB=E6=8E=89npu=20profiling=E9=87=87?=
 =?UTF-8?q?=E9=9B=86=E8=84=9A=E6=9C=AC?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

requirements.txt添加ninja，加快mmcv编译安装
---
 model_examples/BEVDet/BEVDet.patch     | 402 ++++++++++++++++++++++++-
 model_examples/BEVDet/requirements.txt |   1 +
 2 files changed, 390 insertions(+), 13 deletions(-)

diff --git a/model_examples/BEVDet/BEVDet.patch b/model_examples/BEVDet/BEVDet.patch
index 280e31d2..b0a57fb6 100644
--- a/model_examples/BEVDet/BEVDet.patch
+++ b/model_examples/BEVDet/BEVDet.patch
@@ -124,19 +124,6 @@ index 4d97026..be10ecd 100644
              model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
  
      # build runner
-diff --git a/mmdet3d/core/hook/__init__.py b/mmdet3d/core/hook/__init__.py
-index 0b85670..168b207 100644
---- a/mmdet3d/core/hook/__init__.py
-+++ b/mmdet3d/core/hook/__init__.py
-@@ -3,6 +3,7 @@ from .ema import MEGVIIEMAHook
- from .utils import is_parallel
- from .sequentialcontrol import SequentialControlHook
- from .syncbncontrol import SyncbnControlHook
-+from .profiler_hook_npu import ProfilerHookNPU
- 
- __all__ = ['MEGVIIEMAHook', 'is_parallel', 'SequentialControlHook',
--           'SyncbnControlHook']
-+           'SyncbnControlHook', 'ProfilerHookNPU']
 diff --git a/mmdet3d/datasets/pipelines/loading.py b/mmdet3d/datasets/pipelines/loading.py
 index b9357ff..3f23888 100644
 --- a/mmdet3d/datasets/pipelines/loading.py
@@ -608,6 +595,395 @@ index bda8bfe..9015b9e 100644
  
          scores = scores.permute(0, 2, 3, 1)  # (B, N, K, M)
  
+diff --git a/test/env_npu.sh b/test/env_npu.sh
+new file mode 100644
+index 0000000..5c16419
+--- /dev/null
++++ b/test/env_npu.sh
+@@ -0,0 +1,51 @@
++#!/bin/bash
++CANN_INSTALL_PATH_CONF='/etc/Ascend/ascend_cann_install.info'
++
++if [ -f $CANN_INSTALL_PATH_CONF ]; then
++    CANN_INSTALL_PATH=$(cat $CANN_INSTALL_PATH_CONF | grep Install_Path | cut -d "=" -f 2)
++else
++    CANN_INSTALL_PATH="/usr/local/Ascend"
++fi
++
++if [ -d ${CANN_INSTALL_PATH}/ascend-toolkit/latest ]; then
++    source ${CANN_INSTALL_PATH}/ascend-toolkit/set_env.sh
++else
++    source ${CANN_INSTALL_PATH}/nnae/set_env.sh
++fi
++
++
++#将Host日志输出到串口,0-关闭/1-开启
++export ASCEND_SLOG_PRINT_TO_STDOUT=0
++#设置默认日志级别,0-debug/1-info/2-warning/3-error
++export ASCEND_GLOBAL_LOG_LEVEL=3
++#设置Event日志开启标志,0-关闭/1-开启
++export ASCEND_GLOBAL_EVENT_ENABLE=0
++#设置是否开启taskque,0-关闭/1-开启
++export TASK_QUEUE_ENABLE=2
++#设置是否开启PTCopy,0-关闭/1-开启
++export PTCOPY_ENABLE=1
++#设置是否开启combined标志,0-关闭/1-开启
++export COMBINED_ENABLE=1
++#设置特殊场景是否需要重新编译,不需要修改
++export DYNAMIC_OP="ADD#MUL"
++#HCCL白名单开关,1-关闭/0-开启
++export HCCL_WHITELIST_DISABLE=1
++export HCCL_IF_IP=$(hostname -I |awk '{print $1}')
++
++#开启绑核
++export CPU_AFFINITY_CONF=1
++
++export OMP_NUM_THREADS=16
++export MKL_NUM_THREADS=16
++
++#设置device侧日志登记为error
++msnpureport -g error -d 0
++msnpureport -g error -d 1
++msnpureport -g error -d 2
++msnpureport -g error -d 3
++msnpureport -g error -d 4
++msnpureport -g error -d 5
++msnpureport -g error -d 6
++msnpureport -g error -d 7
++#关闭Device侧Event日志
++msnpureport -e disable
+diff --git a/test/train_1p.sh b/test/train_1p.sh
+new file mode 100644
+index 0000000..b726985
+--- /dev/null
++++ b/test/train_1p.sh
+@@ -0,0 +1,167 @@
++#!/bin/bash
++
++#当前路径
++cur_path=`pwd`
++# 指定训练所使用的npu device卡id
++device_id=0
++
++#集合通信参数
++export RANK_SIZE=1
++export JOB_ID=10087
++RANK_ID_START=0
++
++performance=0
++
++#基础参数
++batch_size=1
++#训练step
++max_epochs=24
++
++
++# 帮助信息
++if [[ $1 == --help || $1 == -h ]];then
++    echo"usage:./train_1p.sh <args>"
++    echo " "
++    echo "parameter explain:
++    --py_config               train config
++    --performance              switch to performance mode when != 0
++    --work_dir                 set output dir for training
++    -h/--help		             show help message
++    "
++    exit 1
++fi
++
++#参数校验
++for para in $*
++do
++    if [[ $para == --py_config* ]];then
++        py_config=`echo ${para#*=}`
++    elif [[ $para == --performance* ]];then
++        performance=`echo ${para#*=}`
++    elif [[ $para == --work_dir* ]];then
++        work_dir=`echo ${para#*=}`
++    fi
++done
++
++if (($performance!=0)); then
++    max_epochs=1
++fi
++
++#校验是否传入py_config
++if [[ $py_config == "" ]];then
++    echo "[Error] para \"py_config\" must be config"
++    exit 1
++fi
++
++#配置名称
++config_name=`echo $py_config | awk -F "/" '{print $NF}' | awk -F "." '{print $1}'`
++#网络名称
++Network=$config_name
++
++# 校验是否指定了device_id,分动态分配device_id与手动指定device_id
++if [ $ASCEND_DEVICE_ID ];then
++    echo "device id is ${ASCEND_DEVICE_ID}"
++elif [ ${device_id} ];then
++    export ASCEND_DEVICE_ID=${device_id}
++    echo "device id is ${ASCEND_DEVICE_ID}"
++else
++    "[Error] device id must be config"
++    exit 1
++fi
++
++if [[ $work_dir == "" ]];then
++    work_dir="output/train_1p/$config_name"
++else
++    work_dir="${work_dir}/train_1p/$config_name"
++fi
++
++test_path_dir=$cur_path
++ASCEND_DEVICE_ID=$device_id
++export ASCEND_RT_VISIBLE_DEVICES=$ASCEND_DEVICE_ID
++if [ ! -d ${test_path_dir}/output ];then
++    mkdir ${test_path_dir}/output
++fi
++if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
++    rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID}
++    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ckpt
++else
++    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ckpt
++fi
++
++
++#训练开始时间
++start_time=$(date +%s)
++# 非平台场景时source 环境变量
++check_etp_flag=`env | grep etp_running_flag`
++etp_flag=`echo ${check_etp_flag#*=}`
++if [ x"${etp_flag}" != x"true" ];then
++    source ${test_path_dir}/test/env_npu.sh
++fi
++
++
++#设置环境变量
++echo "Device ID: $ASCEND_DEVICE_ID"
++export RANK_ID=$RANK_ID
++export WORLD_SIZE=1
++
++bash ./tools/dist_train.sh ${py_config} ${WORLD_SIZE} \
++--work-dir ${work_dir} \
++--cfg-options runner.max_epochs=$max_epochs
++
++
++#训练结束时间
++end_time=$(date +%s)
++e2e_time=$(( $end_time - $start_time ))
++
++log_file=`find ${work_dir} -regex ".*\.log" | sort -r | head -n 1`
++
++#结果打印
++echo "------------------ Final result ------------------"
++#输出性能FPS
++FPS=`grep -a 'Epoch '  ${log_file}|awk -F " time: " '!/Epoch \[1\]\[1/ {print $NF}'|awk -F " " '{print $1}' | awk '{ sum += $0; n++} END { if (n > 0) print sum / n;}'`
++FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${FPS}'}'`
++#打印
++echo "Final Performance images/sec : $FPS"
++echo "E2E Training Duration sec : $e2e_time"
++
++#性能看护结果汇总
++#训练用例信息
++DeviceType=`uname -m`
++CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'map'
++
++##获取性能数据
++#吞吐量
++ActualFPS=${FPS}
++#单迭代训练时长
++TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
++echo "TrainingTime for step(ms) : $TrainingTime"
++
++#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中
++grep "Epoch " ${log_file}|awk -F "loss: " '!/Epoch \[1\]\[1/ {print $NF}' | awk -F "," '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
++
++#最后一个迭代loss值
++ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
++
++#关键信息打印到${CaseName}.log中
++echo "Network = ${Network}" >  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
++echo "RankSize = ${RANK_SIZE}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
++echo "BatchSize = ${batch_size}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
++echo "DeviceType = ${DeviceType}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
++echo "CaseName = ${CaseName}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
++echo "ActualFPS = ${ActualFPS}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
++echo "TrainingTime = ${TrainingTime}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
++echo "ActualLoss = ${ActualLoss}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
++echo "E2ETrainingTime = ${e2e_time}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
++
++# 性能任务控制
++if (($performance==0));then
++  eval_log_file=`echo ${test_path_dir}/output/$ASCEND_DEVICE_ID/eval_${CaseName}.log`
++  chmod +x ./tools/dist_test.sh
++  nohup ./tools/dist_test.sh ${py_config} ${work_dir}/epoch_${max_epochs}_ema.pth 8 --eval mAP >> ${eval_log_file} 2>&1 &
++  wait
++  #输出训练精度
++  train_accuracy=`grep -a 'mAP: ' ${eval_log_file}|awk 'END {print}'|awk -F " " '{print $NF}'`
++  #打印
++  echo "Final Train Accuracy : ${train_accuracy}"
++  echo "mAP = ${train_accuracy}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
++fi
+diff --git a/test/train_8p.sh b/test/train_8p.sh
+new file mode 100644
+index 0000000..9597828
+--- /dev/null
++++ b/test/train_8p.sh
+@@ -0,0 +1,153 @@
++#!/bin/bash
++
++#当前路径
++cur_path=`pwd`
++
++#集合通信参数
++export RANK_SIZE=8
++export JOB_ID=10087
++RANK_ID_START=0
++
++performance=0
++
++#基础参数
++batch_size=8
++#训练step
++max_epochs=24
++
++# 帮助信息
++if [[ $1 == --help || $1 == -h ]];then
++    echo"usage:./train_8p.sh <args>"
++    echo " "
++    echo "parameter explain:
++    --py_config               train config
++    --performance              switch to performance mode when != 0
++    --work_dir                 set output dir for training
++    -h/--help		             show help message
++    "
++    exit 1
++fi
++
++#参数校验
++for para in $*
++do
++    if [[ $para == --py_config* ]];then
++        py_config=`echo ${para#*=}`
++    elif [[ $para == --performance* ]];then
++        performance=`echo ${para#*=}`
++    elif [[ $para == --work_dir* ]];then
++        work_dir=`echo ${para#*=}`
++    fi
++done
++
++if (($performance!=0)); then
++    max_epochs=1
++fi
++
++#校验是否传入py_config
++if [[ $py_config == "" ]];then
++    echo "[Error] para \"py_config\" must be config"
++    exit 1
++fi
++
++#配置名称
++config_name=`echo $py_config | awk -F "/" '{print $NF}' | awk -F "." '{print $1}'`
++#网络名称，同配置名称
++Network=$config_name
++
++if [[ $work_dir == "" ]];then
++    work_dir="output/train_8p/$config_name"
++else
++    work_dir="${work_dir}/train_8p/$config_name"
++fi
++
++test_path_dir=$cur_path
++ASCEND_DEVICE_ID=0
++
++if [ ! -d ${test_path_dir}/output ];then
++    mkdir ${test_path_dir}/output
++fi
++if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
++    rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID}
++    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ckpt
++else
++    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ckpt
++fi
++
++
++#训练开始时间
++start_time=$(date +%s)
++# 非平台场景时source 环境变量
++check_etp_flag=`env | grep etp_running_flag`
++etp_flag=`echo ${check_etp_flag#*=}`
++if [ x"${etp_flag}" != x"true" ];then
++    source ${test_path_dir}/test/env_npu.sh
++fi
++
++
++#设置环境变量
++echo "Device ID: $ASCEND_DEVICE_ID"
++export RANK_ID=$RANK_ID
++export WORLD_SIZE=8
++
++bash ./tools/dist_train.sh ${py_config} ${WORLD_SIZE} \
++--work-dir ${work_dir} \
++--cfg-options runner.max_epochs=$max_epochs
++
++
++#训练结束时间
++end_time=$(date +%s)
++e2e_time=$(( $end_time - $start_time ))
++
++log_file=`find ${work_dir} -regex ".*\.log" | sort -r | head -n 1`
++
++#结果打印
++echo "------------------ Final result ------------------"
++#输出性能FPS
++FPS=`grep -a 'Epoch '  ${log_file}|awk -F " time: " '!/Epoch \[1\]\[1/ {print $NF}'|awk -F " " '{print $1}' | awk '{ sum += $0; n++} END { if (n > 0) print sum / n;}'`
++FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*8/'${FPS}'}'`
++#打印
++echo "Final Performance images/sec : $FPS"
++echo "E2E Training Duration sec : $e2e_time"
++
++#性能看护结果汇总
++#训练用例信息
++DeviceType=`uname -m`
++CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'map'
++
++##获取性能数据
++#吞吐量
++ActualFPS=${FPS}
++#单迭代训练时长
++TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000*8/'${FPS}'}'`
++echo "TrainingTime for step(ms) : $TrainingTime"
++
++#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中
++grep "Epoch " ${log_file}|awk -F "loss: " '!/Epoch \[1\]\[1/ {print $NF}' | awk -F "," '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
++
++#最后一个迭代loss值
++ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
++
++#关键信息打印到${CaseName}.log中
++echo "Network = ${Network}" >  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
++echo "RankSize = ${RANK_SIZE}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
++echo "BatchSize = ${batch_size}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
++echo "DeviceType = ${DeviceType}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
++echo "CaseName = ${CaseName}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
++echo "ActualFPS = ${ActualFPS}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
++echo "TrainingTime = ${TrainingTime}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
++echo "ActualLoss = ${ActualLoss}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
++echo "E2ETrainingTime = ${e2e_time}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
++
++# 性能任务控制
++if (($performance==0));then
++  eval_log_file=`echo ${test_path_dir}/output/$ASCEND_DEVICE_ID/eval_${CaseName}.log`
++  chmod +x ./tools/dist_test.sh
++  nohup ./tools/dist_test.sh ${py_config} ${work_dir}/epoch_${max_epochs}_ema.pth 8 --eval mAP >> ${eval_log_file} 2>&1 &
++  wait
++  #输出训练精度
++  train_accuracy=`grep -a 'mAP: ' ${eval_log_file}|awk 'END {print}'|awk -F " " '{print $NF}'`
++  #打印
++  echo "Final Train Accuracy : ${train_accuracy}"
++  echo "mAP = ${train_accuracy}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
++fi
 diff --git a/tests/test_utils/test_box3d.py b/tests/test_utils/test_box3d.py
 index 69d8b31..5149884 100644
 --- a/tests/test_utils/test_box3d.py
diff --git a/model_examples/BEVDet/requirements.txt b/model_examples/BEVDet/requirements.txt
index eb5a4463..bb2413fb 100644
--- a/model_examples/BEVDet/requirements.txt
+++ b/model_examples/BEVDet/requirements.txt
@@ -23,3 +23,4 @@ absl-py
 yapf
 mmdet==2.28.2
 mmsegmentation==0.30.0
+ninja
\ No newline at end of file
-- 
Gitee