From 1bd6e4253647b9cd2c7d9f16805bba78452fe9b8 Mon Sep 17 00:00:00 2001 From: bailang Date: Fri, 19 May 2023 15:16:39 +0800 Subject: [PATCH] =?UTF-8?q?[=E4=BC=97=E6=99=BA][PyTorch]SSD-Resnet?= =?UTF-8?q?=E6=A8=A1=E5=9E=8B=E4=BF=AE=E5=A4=8D=E7=B2=BE=E5=BA=A6=E5=80=BC?= =?UTF-8?q?=E8=8E=B7=E5=8F=96=E4=B8=8D=E5=88=B0=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: bailang --- .../SSD-Resnet/test/train_full_8p.sh | 66 +++++++++---------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/PyTorch/contrib/cv/detection/SSD-Resnet/test/train_full_8p.sh b/PyTorch/contrib/cv/detection/SSD-Resnet/test/train_full_8p.sh index fd61d10824..ead01c3905 100644 --- a/PyTorch/contrib/cv/detection/SSD-Resnet/test/train_full_8p.sh +++ b/PyTorch/contrib/cv/detection/SSD-Resnet/test/train_full_8p.sh @@ -1,23 +1,23 @@ #!/bin/bash -################基础配置参数,需要模型审视修改################## -# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE -# 网络名称,同目录名称 +################鍩虹閰嶇疆鍙傛暟锛岄渶瑕佹ā鍨嬪瑙嗕慨鏀################## +# 蹇呴夊瓧娈(蹇呴』鍦ㄦ澶勫畾涔夌殑鍙傛暟): Network batch_size RANK_SIZE +# 缃戠粶鍚嶇О锛屽悓鐩綍鍚嶇О Network="SSD-Resnet_ID3530_for_Pytorch" -# 训练batch_size +# 璁粌batch_size batch_size=32 -# 训练使用的npu卡数 +# 璁粌浣跨敤鐨刵pu鍗℃暟 export RANK_SIZE=8 data_path="/opt/npu/dataset/coco/" -# 训练epoch +# 璁粌epoch train_epochs=90 -# 指定训练所使用的npu device卡id +# 鎸囧畾璁粌鎵浣跨敤鐨刵pu device鍗d device_id=8 -# 加载数据进程数 +# 鍔犺浇鏁版嵁杩涚▼鏁 workers=16 -# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +# 鍙傛暟鏍¢獙锛宒ata_path涓哄繀浼犲弬鏁帮紝鍏朵粬鍙傛暟鐨勫鍒犵敱妯″瀷鑷韩鍐冲畾锛涙澶勬柊澧炲弬鏁伴渶鍦ㄤ笂闈㈡湁瀹氫箟骞惰祴鍊 for para in $* do if [[ $para == --device_id* ]];then @@ -29,12 +29,12 @@ do fi done -# 校验是否传入data_path,不需要修改 +# 鏍¢獙鏄惁浼犲叆data_path,涓嶉渶瑕佷慨鏀 if [[ $data_path == "" ]];then echo "[Error] para \"data_path\" must be confing" exit 1 fi -# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改 +# 鏍¢獙鏄惁鎸囧畾浜哾evice_id,鍒嗗姩鎬佸垎閰峝evice_id涓庢墜鍔ㄦ寚瀹歞evice_id,姝ゅ涓嶉渶瑕佷慨鏀 if [ $ASCEND_DEVICE_ID ];then echo "device id is ${ASCEND_DEVICE_ID}" elif [ ${device_id} ];then @@ -47,8 +47,8 @@ fi -###############指定训练脚本执行路径############### -# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +###############鎸囧畾璁粌鑴氭湰鎵ц璺緞############### +# cd鍒颁笌test鏂囦欢澶瑰悓灞傜骇鐩綍涓嬫墽琛岃剼鏈紝鎻愰珮鍏煎鎬э紱test_path_dir涓哄寘鍚玹est鏂囦欢澶圭殑璺緞 cur_path=`pwd` cur_path_last_dirname=${cur_path##*/} if [ x"${cur_path_last_dirname}" == x"test" ];then @@ -60,7 +60,7 @@ else fi -#################创建日志输出目录,不需要修改################# +#################鍒涘缓鏃ュ織杈撳嚭鐩綍锛屼笉闇瑕佷慨鏀################# if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID @@ -69,10 +69,10 @@ else fi -#################启动训练脚本################# -#训练开始时间,不需要修改 +#################鍚姩璁粌鑴氭湰################# +#璁粌寮濮嬫椂闂达紝涓嶉渶瑕佷慨鏀 start_time=$(date +%s) -# 非平台场景时source 环境变量 +# 闈炲钩鍙板満鏅椂source 鐜鍙橀噺 check_etp_flag=`env | grep etp_running_flag` etp_flag=`echo ${check_etp_flag#*=}` if [ x"${etp_flag}" != x"true" ];then @@ -104,43 +104,43 @@ done wait -##################获取训练数据################ -#训练结束时间,不需要修改 +##################鑾峰彇璁粌鏁版嵁################ +#璁粌缁撴潫鏃堕棿锛屼笉闇瑕佷慨鏀 end_time=$(date +%s) e2e_time=$(( $end_time - $start_time )) -#结果打印 +#缁撴灉鎵撳嵃 echo "------------------ Final result ------------------" -#输出性能FPS +#杈撳嚭鎬ц兘FPS FPS=`grep -a 'sec:' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log| awk -F " " '{print $15}' | tail -n +3 | awk '{sum+=$1} END {print sum/NR}'` -#打印 +#鎵撳嵃 echo "Final Performance images/sec : $FPS" -#输出训练精度 -train_accuracy=`grep -a 'Current AP' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $3}'|awk 'END {print}'` -#打印 +#杈撳嚭璁粌绮惧害 +train_accuracy=`grep -a 'Current AP' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "AP: " '{print $NF}'|awk -F " " '{print $1}'|awk 'END {print}'` +#鎵撳嵃 echo "Final Train Accuracy : ${train_accuracy}" echo "E2E Training Duration sec : $e2e_time" -#性能看护结果汇总 -#训练用例信息,不需要修改 +#鎬ц兘鐪嬫姢缁撴灉姹囨 +#璁粌鐢ㄤ緥淇℃伅锛屼笉闇瑕佷慨鏀 BatchSize=${batch_size} DeviceType=`uname -m` CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' -##获取性能数据,不需要修改 -#吞吐量 +##鑾峰彇鎬ц兘鏁版嵁锛屼笉闇瑕佷慨鏀 +#鍚炲悙閲 ActualFPS=${FPS} -#单迭代训练时长 +#鍗曡凯浠h缁冩椂闀 TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中, +#浠巘rain_$ASCEND_DEVICE_ID.log鎻愬彇Loss鍒皌rain_${CaseName}_loss.txt涓紝 grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt -#最后一个迭代loss值,不需要修改 +#鏈鍚庝竴涓凯浠oss鍊硷紝涓嶉渶瑕佷慨鏀 ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` -#关键信息打印到${CaseName}.log中,不需要修改 +#鍏抽敭淇℃伅鎵撳嵃鍒${CaseName}.log涓紝涓嶉渶瑕佷慨鏀 echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -- Gitee