From 3a2ab3e0c405a2babf51618d60171c41b2201383 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B4=AA=E9=A3=9E?= <11094814+hong-fei1997@user.noreply.gitee.com> Date: Tue, 14 Feb 2023 11:06:35 +0000 Subject: [PATCH 01/12] update ResNet50_ID0058_for_TensorFlow/test/train_full_8p.sh. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 洪飞 <11094814+hong-fei1997@user.noreply.gitee.com> --- .../test/train_full_8p.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_8p.sh index 7f44f2c59..b385ad12a 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_8p.sh @@ -128,14 +128,14 @@ do #执行训练脚本,需要模型审视修改 corenum=`cat /proc/cpuinfo |grep 'processor' |wc -l` - #let a=RANK_ID*${corenum}/8 - #let b=RANK_ID+1 - #let c=b*${corenum}/8-1 - #if [ "x${bind_core}" != x ];then - # bind_core="taskset -c $a-$c" - #fi + let a=RANK_ID*${corenum}/8 + let b=RANK_ID+1 + let c=b*${corenum}/8-1 + if [ "x${bind_core}" != x ];then + bind_core="taskset -c $a-$c" + fi #--max_train_steps=$max_train_steps \ - python3.7 ${cur_path}/../src/mains/res50.py \ + nohup ${bind_core} python3.7 ${cur_path}/../src/mains/res50.py \ --config_file=$config_file \ --iterations_per_loop=$iterations_per_loop \ --debug=$debug \ -- Gitee From 8f70dc00bb533bfd23b6dc585e220c83dce8d77a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B4=AA=E9=A3=9E?= <11094814+hong-fei1997@user.noreply.gitee.com> Date: Tue, 14 Feb 2023 11:11:46 +0000 Subject: [PATCH 02/12] update AlexNet_ID0072_for_TensorFlow/test/train_full_8p.sh. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 洪飞 <11094814+hong-fei1997@user.noreply.gitee.com> --- .../AlexNet_ID0072_for_TensorFlow/test/train_full_8p.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/cv/image_classification/AlexNet_ID0072_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/AlexNet_ID0072_for_TensorFlow/test/train_full_8p.sh index 71ba419cf..e4f872acb 100644 --- a/TensorFlow/built-in/cv/image_classification/AlexNet_ID0072_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/AlexNet_ID0072_for_TensorFlow/test/train_full_8p.sh @@ -124,7 +124,7 @@ do if [ "x${bind_core}" != x ];then bind_core="taskset -c $a-$c" fi -python3.7 ${cur_path}/../train.py --rank_size=8 \ +nohup ${bind_core} python3.7 ${cur_path}/../train.py --rank_size=8 \ --epochs_between_evals=1 \ --mode=train \ --max_epochs=150 \ -- Gitee From e2f8ed505fb858006804726a7f94d13ade82e336 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B4=AA=E9=A3=9E?= <11094814+hong-fei1997@user.noreply.gitee.com> Date: Tue, 14 Feb 2023 11:14:40 +0000 Subject: [PATCH 03/12] update EfficientNet_B0_ID0009_for_TensorFlow/test/train_full_8p.sh. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 洪飞 <11094814+hong-fei1997@user.noreply.gitee.com> --- .../EfficientNet_B0_ID0009_for_TensorFlow/test/train_full_8p.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/cv/image_classification/EfficientNet_B0_ID0009_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/EfficientNet_B0_ID0009_for_TensorFlow/test/train_full_8p.sh index 77c6e25a6..e23728d37 100644 --- a/TensorFlow/built-in/cv/image_classification/EfficientNet_B0_ID0009_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/EfficientNet_B0_ID0009_for_TensorFlow/test/train_full_8p.sh @@ -137,7 +137,7 @@ do if [ "x${bind_core}" != x ];then bind_core="taskset -c $a-$c" fi - nohup python3.7 efficientnet/main_npu.py \ + nohup ${bind_core} python3.7 efficientnet/main_npu.py \ --data_dir=${data_path} \ --model_dir=${cur_path}/output/$ASCEND_DEVICE_ID/ckpt \ --mode=train_and_eval \ -- Gitee From 90f162ee8ca653a4178652c5956ce25930d8c260 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B4=AA=E9=A3=9E?= <11094814+hong-fei1997@user.noreply.gitee.com> Date: Tue, 14 Feb 2023 11:20:04 +0000 Subject: [PATCH 04/12] update GoogleNet_ID0051_for_TensorFlow/test/train_full_8p.sh. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 洪飞 <11094814+hong-fei1997@user.noreply.gitee.com> --- .../test/train_full_8p.sh | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/TensorFlow/built-in/cv/image_classification/GoogleNet_ID0051_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/GoogleNet_ID0051_for_TensorFlow/test/train_full_8p.sh index f4506c7dc..12b149eaa 100644 --- a/TensorFlow/built-in/cv/image_classification/GoogleNet_ID0051_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/GoogleNet_ID0051_for_TensorFlow/test/train_full_8p.sh @@ -83,6 +83,9 @@ do mkdir -p ${autotune_dump_path}/rl elif [[ $para == --data_path* ]];then data_path=`echo ${para#*=}` + elif [[ $para == --bind_core* ]]; then + bind_core=`echo ${para#*=}` + name_bind="_bindcore" fi done @@ -115,9 +118,18 @@ do else mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt fi + + # 绑核,不需要的绑核的模型删除,需要模型审视修改 + corenum=`cat /proc/cpuinfo |grep "processor"|wc -l` + let a=RANK_ID*${corenum}/${RANK_SIZE} + let b=RANK_ID+1 + let c=b*${corenum}/${RANK_SIZE}-1 #执行训练脚本,需要模型审视修改 - nohup python3.7 train.py \ + if [ "x${bind_core}" != x ];then + bind_core="taskset -c $a-$c" + fi + nohup ${bind_core} python3.7 train.py \ --rank_size=$RANK_SIZE \ --mode=train_and_evaluate \ --max_epochs=200 \ -- Gitee From 820caa6a55a2b6fbb018b33d3087fb3caa61dd62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B4=AA=E9=A3=9E?= <11094814+hong-fei1997@user.noreply.gitee.com> Date: Tue, 14 Feb 2023 11:25:15 +0000 Subject: [PATCH 05/12] update DenseNet121_ID0067_for_TensorFlow/test/train_full_8p.sh. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 洪飞 <11094814+hong-fei1997@user.noreply.gitee.com> --- .../test/train_full_8p.sh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/TensorFlow/built-in/cv/image_classification/DenseNet121_ID0067_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/DenseNet121_ID0067_for_TensorFlow/test/train_full_8p.sh index f258be9af..e1280f0f6 100644 --- a/TensorFlow/built-in/cv/image_classification/DenseNet121_ID0067_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/DenseNet121_ID0067_for_TensorFlow/test/train_full_8p.sh @@ -109,14 +109,20 @@ do mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt fi # 绑核,不需要的绑核的模型删除,需要的模型审视修改 + corenum=`cat /proc/cpuinfo |grep "processor"|wc -l` + let a=RANK_ID*${corenum}/${RANK_SIZE} + let b=RANK_ID+1 + let c=b*${corenum}/${RANK_SIZE}-1 #let a=RANK_ID*12 #let b=RANK_ID+1 #let c=b*12-1 #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path - - python3.7 ./train.py \ + if [ "x${bind_core}" != x ];then + bind_core="taskset -c $a-$c" + fi + nohup ${bind_core} python3.7 ./train.py \ --data_dir=${data_path} \ --rank_size=${RANK_SIZE} \ --iterations_per_loop=1000 \ -- Gitee From 9c9b38cb2f764bf34b53d8ff7f587addfa8b89c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B4=AA=E9=A3=9E?= <11094814+hong-fei1997@user.noreply.gitee.com> Date: Tue, 14 Feb 2023 11:28:53 +0000 Subject: [PATCH 06/12] update MobileNetV2_ID0074_for_TensorFlow/test/train_full_8p.sh. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 洪飞 <11094814+hong-fei1997@user.noreply.gitee.com> --- .../test/train_full_8p.sh | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/TensorFlow/built-in/cv/image_classification/MobileNetV2_ID0074_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/MobileNetV2_ID0074_for_TensorFlow/test/train_full_8p.sh index 4eca34746..d36ce3b7d 100644 --- a/TensorFlow/built-in/cv/image_classification/MobileNetV2_ID0074_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/MobileNetV2_ID0074_for_TensorFlow/test/train_full_8p.sh @@ -123,6 +123,7 @@ do DEVICE_INDEX=$DEVICE_ID export DEVICE_INDEX=${DEVICE_INDEX} + #创建DeviceID输出目录,不需要修改 if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} @@ -134,17 +135,18 @@ do mkdir -p results/$ASCEND_DEVICE_ID sed -i 's|results|results/'$ASCEND_DEVICE_ID'|g' train.py - + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path - #corenum=`cat /proc/cpuinfo |grep 'processor' |wc -l` - #let a=RANK_ID*${corenum}/8 - #let b=RANK_ID+1 - #let c=b*${corenum}/8-1 - #if [ "x${bind_core}" != x ];then - # bind_core="taskset -c $a-$c" - #fi - python3.7 train.py \ + corenum=`cat /proc/cpuinfo |grep 'processor' |wc -l` + let a=RANK_ID*${corenum}/8 + let b=RANK_ID+1 + let c=b*${corenum}/8-1 + if [ "x${bind_core}" != x ];then + bind_core="taskset -c $a-$c" + fi + nohup ${bind_core} python3.7 train.py \ --dataset_dir=$data_path \ --max_epoch=$train_epochs \ --model_name="mobilenet_v2" \ -- Gitee From f4a0ed2d2a1a45c42b7ff62782c1d82112a133b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B4=AA=E9=A3=9E?= <11094814+hong-fei1997@user.noreply.gitee.com> Date: Tue, 14 Feb 2023 11:31:34 +0000 Subject: [PATCH 07/12] update Resnet50v1.5_ID1721_for_TensorFlow/test/train_full_8p.sh. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 洪飞 <11094814+hong-fei1997@user.noreply.gitee.com> --- .../test/train_full_8p.sh | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/TensorFlow/built-in/cv/image_classification/Resnet50v1.5_ID1721_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/Resnet50v1.5_ID1721_for_TensorFlow/test/train_full_8p.sh index 16ac5783d..b01f752e2 100644 --- a/TensorFlow/built-in/cv/image_classification/Resnet50v1.5_ID1721_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/Resnet50v1.5_ID1721_for_TensorFlow/test/train_full_8p.sh @@ -76,6 +76,9 @@ do cp -rf $install_path/fwkacllib/data/rl/Ascend910/custom ${autotune_dump_path}/RL/ elif [[ $para == --data_path* ]];then data_path=`echo ${para#*=}` + elif [[ $para == --bind_core* ]]; then + bind_core=`echo ${para#*=}` + name_bind="_bindcore" fi done @@ -112,11 +115,19 @@ do else mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt fi + + # 绑核,不需要的绑核的模型删除,需要模型审视修改 + corenum=`cat /proc/cpuinfo |grep "processor"|wc -l` + let a=RANK_ID*${corenum}/${RANK_SIZE} + let b=RANK_ID+1 + let c=b*${corenum}/${RANK_SIZE}-1 #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path,--autotune - - nohup python3 imagenet_main.py \ + if [ "x${bind_core}" != x ];then + bind_core="taskset -c $a-$c" + fi + nohup ${bind_core} python3 imagenet_main.py \ --resnet_size=50 \ --resnet_version=1 \ --max_train_steps=$train_steps \ -- Gitee From 2d1566ba12f03f1c5ef2c210cd514b8ca6ed3b7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B4=AA=E9=A3=9E?= <11094814+hong-fei1997@user.noreply.gitee.com> Date: Tue, 14 Feb 2023 11:32:56 +0000 Subject: [PATCH 08/12] update ResNet101_ID0063_for_TensorFlow/test/train_full_8p.sh. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 洪飞 <11094814+hong-fei1997@user.noreply.gitee.com> --- .../test/train_full_8p.sh | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/TensorFlow/built-in/cv/image_classification/ResNet101_ID0063_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/ResNet101_ID0063_for_TensorFlow/test/train_full_8p.sh index b341f3926..7b585c7dd 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet101_ID0063_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet101_ID0063_for_TensorFlow/test/train_full_8p.sh @@ -121,18 +121,18 @@ do mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt fi - # 绑核,不需要的绑核的模型删除,需要模型审视修改 - #corenum=`cat /proc/cpuinfo |grep "processor"|wc -l` - #let a=RANK_ID*${corenum}/${RANK_SIZE} - #let b=RANK_ID+1 - #let c=b*${corenum}/${RANK_SIZE}-1 + # 绑核,不需要的绑核的模型删除,需要模型审视修改 + corenum=`cat /proc/cpuinfo |grep "processor"|wc -l` + let a=RANK_ID*${corenum}/${RANK_SIZE} + let b=RANK_ID+1 + let c=b*${corenum}/${RANK_SIZE}-1 #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path - #if [ "x${bind_core}" != x ];then - # bind_core="taskset -c $a-$c" - #fi - python3.7 r1/resnet/imagenet_main.py \ + if [ "x${bind_core}" != x ];then + bind_core="taskset -c $a-$c" + fi + nohup ${bind_core} python3.7 r1/resnet/imagenet_main.py \ --resnet_size=101 \ --batch_size=${batch_size} \ --num_gpus=1 \ -- Gitee From 34c555f551338629c03d0d2a251de072e9946c4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B4=AA=E9=A3=9E?= <11094814+hong-fei1997@user.noreply.gitee.com> Date: Tue, 14 Feb 2023 11:34:57 +0000 Subject: [PATCH 09/12] update VGG16_ID0068_for_TensorFlow/test/train_full_8p.sh. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 洪飞 <11094814+hong-fei1997@user.noreply.gitee.com> --- .../VGG16_ID0068_for_TensorFlow/test/train_full_8p.sh | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/TensorFlow/built-in/cv/image_classification/VGG16_ID0068_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/VGG16_ID0068_for_TensorFlow/test/train_full_8p.sh index 31afa627e..fd7d65e81 100644 --- a/TensorFlow/built-in/cv/image_classification/VGG16_ID0068_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/VGG16_ID0068_for_TensorFlow/test/train_full_8p.sh @@ -118,11 +118,18 @@ do mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt fi + # 绑核,不需要的绑核的模型删除,需要模型审视修改 + corenum=`cat /proc/cpuinfo |grep "processor"|wc -l` + let a=RANK_ID*${corenum}/${RANK_SIZE} + let b=RANK_ID+1 + let c=b*${corenum}/${RANK_SIZE}-1 #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path - - python3.7 $cur_path/../train.py \ + if [ "x${bind_core}" != x ];then + bind_core="taskset -c $a-$c" + fi + nohup ${bind_core} python3.7 $cur_path/../train.py \ --rank_size=8 \ --mode=train_and_evaluate \ --max_epochs=150 \ -- Gitee From 6c5588305fe1061610c2d2f2599b929eb96171d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B4=AA=E9=A3=9E?= <11094814+hong-fei1997@user.noreply.gitee.com> Date: Tue, 14 Feb 2023 11:37:29 +0000 Subject: [PATCH 10/12] update ResNext50_ID0070_for_TensorFlow/test/train_full_8p.sh. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 洪飞 <11094814+hong-fei1997@user.noreply.gitee.com> --- .../ResNext50_ID0070_for_TensorFlow/test/train_full_8p.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/cv/image_classification/ResNext50_ID0070_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/ResNext50_ID0070_for_TensorFlow/test/train_full_8p.sh index 10c84b674..528ea03cf 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNext50_ID0070_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNext50_ID0070_for_TensorFlow/test/train_full_8p.sh @@ -150,7 +150,7 @@ do if [ "x${bind_core}" != x ];then bind_core="taskset -c $a-$c" fi - ${bind_core} python3.7 res50.py \ + nohup ${bind_core} python3.7 res50.py \ --config_file=$config_file \ --max_train_steps=$max_train_steps \ --iterations_per_loop=$iterations_per_loop \ -- Gitee From 4055839b90cdf2fba888cecde9878b36c3a59f02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B4=AA=E9=A3=9E?= <11094814+hong-fei1997@user.noreply.gitee.com> Date: Tue, 14 Feb 2023 11:41:19 +0000 Subject: [PATCH 11/12] update InceptionV3_ID0491_for_TensorFlow/test/train_full_8p.sh. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 洪飞 <11094814+hong-fei1997@user.noreply.gitee.com> --- .../test/train_full_8p.sh | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/TensorFlow/built-in/cv/image_classification/InceptionV3_ID0491_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/InceptionV3_ID0491_for_TensorFlow/test/train_full_8p.sh index efc2118f7..a1f83e068 100644 --- a/TensorFlow/built-in/cv/image_classification/InceptionV3_ID0491_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/InceptionV3_ID0491_for_TensorFlow/test/train_full_8p.sh @@ -78,6 +78,9 @@ do cp -rf $install_path/fwkacllib/data/rl/Ascend910/custom ${autotune_dump_path}/RL/ elif [[ $para == --data_path* ]];then data_path=`echo ${para#*=}` + elif [[ $para == --bind_core* ]]; then + bind_core=`echo ${para#*=}` + name_bind="_bindcore" fi done #校验是否传入data_path,不需要修改 @@ -107,9 +110,17 @@ do else mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt fi - + + # 绑核,不需要的绑核的模型删除,需要模型审视修改 + corenum=`cat /proc/cpuinfo |grep "processor"|wc -l` + let a=RANK_ID*${corenum}/${RANK_SIZE} + let b=RANK_ID+1 + let c=b*${corenum}/${RANK_SIZE}-1 #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 - python3 Incetpion_V3.py --dataset_dir=$data_path --epoch_num=$train_epochs --NPU_DEVICE_INDEX=$ASCEND_DEVICE_ID --npu_nums=8 > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + if [ "x${bind_core}" != x ];then + bind_core="taskset -c $a-$c" + fi + nohup ${bind_core} python3 Incetpion_V3.py --dataset_dir=$data_path --epoch_num=$train_epochs --NPU_DEVICE_INDEX=$ASCEND_DEVICE_ID --npu_nums=8 > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done wait -- Gitee From 70dca568b114869e40fea509584e6a8a3b5ad27b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B4=AA=E9=A3=9E?= <11094814+hong-fei1997@user.noreply.gitee.com> Date: Tue, 14 Feb 2023 11:43:35 +0000 Subject: [PATCH 12/12] update TInceptionV4_ID0002_for_TensorFlow/test/train_full_8p.sh. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 洪飞 <11094814+hong-fei1997@user.noreply.gitee.com> --- .../test/train_full_8p.sh | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/TensorFlow/built-in/cv/image_classification/InceptionV4_ID0002_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/InceptionV4_ID0002_for_TensorFlow/test/train_full_8p.sh index f99f54788..f39f15a6c 100644 --- a/TensorFlow/built-in/cv/image_classification/InceptionV4_ID0002_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/InceptionV4_ID0002_for_TensorFlow/test/train_full_8p.sh @@ -85,6 +85,9 @@ do cp -rf $install_path/fwkacllib/data/rl/Ascend910/custom ${autotune_dump_path}/RL/ elif [[ $para == --data_path* ]];then data_path=`echo ${para#*=}` + elif [[ $para == --bind_core* ]]; then + bind_core=`echo ${para#*=}` + name_bind="_bindcore" fi done @@ -130,10 +133,18 @@ do fi + # 绑核,不需要的绑核的模型删除,需要模型审视修改 + corenum=`cat /proc/cpuinfo |grep "processor"|wc -l` + let a=RANK_ID*${corenum}/${RANK_SIZE} + let b=RANK_ID+1 + let c=b*${corenum}/${RANK_SIZE}-1 #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + if [ "x${bind_core}" != x ];then + bind_core="taskset -c $a-$c" + fi #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path - python3.7 train.py --rank_size=8 \ + nohup ${bind_core} python3.7 train.py --rank_size=8 \ --mode=train_and_evaluate \ --max_epochs=$train_epochs \ --T_max=100 \ -- Gitee