From 99496b64e5c19d51c59faddfb2649c46a4d21915 Mon Sep 17 00:00:00 2001
From: "siyuan.lei" <siyuan.lei@iluvatar.com>
Date: Thu, 21 Aug 2025 05:35:05 +0000
Subject: [PATCH] support llama3-8B for 4.3.0

---
 nlp/llm/llama3_8b/openorca/README.md          | 44 +++++++++++++++++++
 nlp/llm/llama3_8b/openorca/train_sft_llama.sh | 31 +++++++++++++
 2 files changed, 75 insertions(+)
 create mode 100644 nlp/llm/llama3_8b/openorca/README.md
 create mode 100644 nlp/llm/llama3_8b/openorca/train_sft_llama.sh

diff --git a/nlp/llm/llama3_8b/openorca/README.md b/nlp/llm/llama3_8b/openorca/README.md
new file mode 100644
index 000000000..c1b126179
--- /dev/null
+++ b/nlp/llm/llama3_8b/openorca/README.md
@@ -0,0 +1,44 @@
+# Llama3-8B (OpenRLHF)
+
+## Model Description
+
+Llama3-8B is an advanced auto-regressive language model developed by Meta, featuring 8 billion parameters. It utilizes
+an optimized transformer architecture with Grouped-Query Attention (GQA) for improved inference efficiency. Trained on
+sequences of 8,192 tokens and using a 128K token vocabulary, it excels in various natural language tasks. The model
+incorporates supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human
+preferences, ensuring both helpfulness and safety in its responses. Llama3-8B offers state-of-the-art performance in
+language understanding and generation.
+
+## Supported Environments
+
+| GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
+| :----: | :----: | :----: |
+| BI-V150 | 4.3.0     |  25.06  |
+
+## Model Preparation
+
+### Install OpenRLHF
+
+```sh
+# install
+git clone https://github.com/OpenRLHF/OpenRLHF.git -b v0.5.7
+cd OpenRLHF
+pip install -e .
+```
+
+## Model Training
+
+```sh
+# Make sure you have need 16 BI-V150
+cp *.sh OpenRLHF/examples/scripts/
+cd OpenRLHF/examples/scripts/
+
+# train sft
+bash train_sft_llama.sh
+```
+
+tips: 如果执行中遇到oom，可以适当降低下micro_train_batch_size
+
+## References
+
+- [OpenRLHF](https://github.com/OpenRLHF/OpenRLHF)
diff --git a/nlp/llm/llama3_8b/openorca/train_sft_llama.sh b/nlp/llm/llama3_8b/openorca/train_sft_llama.sh
new file mode 100644
index 000000000..5eb67be28
--- /dev/null
+++ b/nlp/llm/llama3_8b/openorca/train_sft_llama.sh
@@ -0,0 +1,31 @@
+set -x
+
+read -r -d '' training_commands <<EOF
+openrlhf.cli.train_sft \
+   --max_len 2048 \
+   --dataset Open-Orca/OpenOrca \
+   --input_key question \
+   --output_key response \
+   --train_batch_size 256 \
+   --micro_train_batch_size 2 \
+   --max_samples 500000 \
+   --pretrain meta-llama/Meta-Llama-3-8B \
+   --save_path ./checkpoint/llama3-8b-sft \
+   --save_steps -1 \
+   --logging_steps 1 \
+   --eval_steps -1 \
+   --zero_stage 2 \
+   --max_epochs 1 \
+   --bf16 \
+   --attn_implementation flash_attention_2 \
+   --learning_rate 5e-6 \
+   --load_checkpoint \
+   --packing_samples \
+   --gradient_checkpointing
+EOF
+    # --wandb [WANDB_TOKENS]
+    # --packing_samples
+
+if [[ ${1} != "slurm" ]]; then
+    deepspeed --module $training_commands
+f
\ No newline at end of file
-- 
Gitee