diff --git a/examples/verl_plugin/README.md b/examples/verl_plugin/README.md index 736c836ae8fc5c5c98bd7a387253936f7cad1e26..4015634b78cab90ad28e4e78491cf4031b9de20e 100644 --- a/examples/verl_plugin/README.md +++ b/examples/verl_plugin/README.md @@ -104,7 +104,7 @@ git checkout 97b65c63c729c61ca607315cf7084012aabc6bba pip install -r requirements-npu.txt pip install -v -e . # for x86 machine [Optional] -# pip install -r requirements-npu.txt --trusted-host download.pytorch.org --trusted-host +# pip install -r requirements-npu.txt --trusted-host download.pytorch.org --trusted-host mirrors.huaweicloud.com # pip install -v -e . --trusted-host download.pytorch.org --trusted-host mirrors.huaweicloud.com cd .. @@ -231,7 +231,7 @@ python ./examples/data_preprocess/geo3k.py --local_dir=./data/geo3k - 若多机运行,*仅需主节点*需运行此脚本 ```bash - bash examples/grpo_trainer/train_qwen2_5_vl_7b_grpo_full.sh --data_path=xxx ---model_path=xxx + bash examples/grpo_trainer/train_qwen2_5_vl_7b_grpo_full.sh --data_path=xxx --model_path=xxx ``` > *注意:所有节点的代码、权重、数据等路径的层级要保持一致,且启动ray的时候都位于verl目录下* diff --git a/examples/verl_plugin/scripts/ray_start.sh b/examples/verl_plugin/scripts/ray_start.sh index 9f204866952d6306e41cb33ee3fafaed3a05475f..74537000dba8d932d7afcecc45c2f1c77a8e2602 100644 --- a/examples/verl_plugin/scripts/ray_start.sh +++ b/examples/verl_plugin/scripts/ray_start.sh @@ -76,5 +76,3 @@ else fi done fi - -sleep 600 diff --git a/examples/verl_plugin/scripts/train_qwen2_5_vl_32b_grpo_full.sh b/examples/verl_plugin/scripts/train_qwen2_5_vl_32b_grpo_full.sh index b4421a865e586e5ccba4e6fc3614116847fe56fa..fba5ea25a2f34561b4f874b8046ce82f14f97dcf 100644 --- a/examples/verl_plugin/scripts/train_qwen2_5_vl_32b_grpo_full.sh +++ b/examples/verl_plugin/scripts/train_qwen2_5_vl_32b_grpo_full.sh @@ -108,4 +108,4 @@ python3 -m verl.trainer.main_ppo \ trainer.val_before_train=False \ actor_rollout_ref.model.enable_activation_offload=True \ actor_rollout_ref.actor.ulysses_sequence_parallel_size=$sp_size \ - actor_rollout_ref.ref.ulysses_sequence_parallel_size=$sp_size > train_qwen2_5_vl_32b_grpo_full.log 2>&1 & \ No newline at end of file + actor_rollout_ref.ref.ulysses_sequence_parallel_size=$sp_size | tee train_qwen2_5_vl_32b_grpo_full.log 2>&1 & \ No newline at end of file diff --git a/examples/verl_plugin/scripts/train_qwen2_5_vl_32b_grpo_performance.sh b/examples/verl_plugin/scripts/train_qwen2_5_vl_32b_grpo_performance.sh index d0b9ce74385baa2dff72c82c2662daaebdd1d73c..34fdd0aebd1e87b732d8c0e964eb05c54378ca1e 100644 --- a/examples/verl_plugin/scripts/train_qwen2_5_vl_32b_grpo_performance.sh +++ b/examples/verl_plugin/scripts/train_qwen2_5_vl_32b_grpo_performance.sh @@ -109,7 +109,7 @@ python3 -m verl.trainer.main_ppo \ trainer.val_before_train=False \ actor_rollout_ref.model.enable_activation_offload=True \ actor_rollout_ref.actor.ulysses_sequence_parallel_size=$sp_size \ - actor_rollout_ref.ref.ulysses_sequence_parallel_size=$sp_size > train_qwen2_5_vl_32b_grpo_performance.log 2>&1 & + actor_rollout_ref.ref.ulysses_sequence_parallel_size=$sp_size | tee train_qwen2_5_vl_32b_grpo_performance.log 2>&1 & wait TPS=`grep 'perf/throughput:' train_qwen2_5_vl_32b_grpo_performance.log | awk -F 'perf/throughput:' '{print$2}' | awk -F ' ' '{print$1}' | tail -n 5 | awk '{sum+=$1} END {print sum/NR}'` diff --git a/examples/verl_plugin/scripts/train_qwen2_5_vl_7b_grpo_full.sh b/examples/verl_plugin/scripts/train_qwen2_5_vl_7b_grpo_full.sh index 88e7be4dc04c98ca8a3338584ece508b352e859a..8c7ed5ca3272ce4dd70dacee3c841da1846bdc38 100644 --- a/examples/verl_plugin/scripts/train_qwen2_5_vl_7b_grpo_full.sh +++ b/examples/verl_plugin/scripts/train_qwen2_5_vl_7b_grpo_full.sh @@ -106,4 +106,4 @@ python3 -m verl.trainer.main_ppo \ trainer.device=npu \ trainer.val_before_train=False \ actor_rollout_ref.actor.ulysses_sequence_parallel_size=$sp_size \ - actor_rollout_ref.ref.ulysses_sequence_parallel_size=$sp_size > train_qwen2_5_vl_7b_grpo_full.log 2>&1 & \ No newline at end of file + actor_rollout_ref.ref.ulysses_sequence_parallel_size=$sp_size | tee train_qwen2_5_vl_7b_grpo_full.log 2>&1 & \ No newline at end of file diff --git a/examples/verl_plugin/scripts/train_qwen2_5_vl_7b_grpo_performance.sh b/examples/verl_plugin/scripts/train_qwen2_5_vl_7b_grpo_performance.sh index 59689cb2108c6fa579992ea88a688a937fcc796b..3abdc5ae22d877055184390618d7c1a960ed4204 100644 --- a/examples/verl_plugin/scripts/train_qwen2_5_vl_7b_grpo_performance.sh +++ b/examples/verl_plugin/scripts/train_qwen2_5_vl_7b_grpo_performance.sh @@ -108,7 +108,7 @@ python3 -m verl.trainer.main_ppo \ actor_rollout_ref.actor.ulysses_sequence_parallel_size=$sp_size \ actor_rollout_ref.ref.ulysses_sequence_parallel_size=$sp_size \ actor_rollout_ref.actor.entropy_from_logits_with_chunking=True \ - actor_rollout_ref.ref.entropy_from_logits_with_chunking=True > train_qwen2_5_vl_7b_grpo_performance.log 2>&1 & + actor_rollout_ref.ref.entropy_from_logits_with_chunking=True | tee train_qwen2_5_vl_7b_grpo_performance.log 2>&1 & wait TPS=`grep 'perf/throughput:' train_qwen2_5_vl_7b_grpo_performance.log | awk -F 'perf/throughput:' '{print$2}' | awk -F ' ' '{print$1}' | tail -n 5 | awk '{sum+=$1} END {print sum/NR}'`