From d4b215fb5612510868f8dc70589b071e8df5a602 Mon Sep 17 00:00:00 2001
From: fengliangjun <f00812941@china.huawei.com>
Date: Sat, 10 Jun 2023 11:26:55 +0800
Subject: [PATCH] readme update

---
 README.md                                    | 56 ++++++++++++++++++--
 examples/gpt_task/pretrain_gpt_1p.sh         |  1 -
 examples/gpt_task/pretrain_gpt_td_8p.sh      |  1 -
 examples/llama_task/pretrain_llama_1p.sh     |  1 -
 examples/llama_task/pretrain_llama_ptd_8p.sh |  1 -
 examples/llama_task/pretrain_llama_td_8p.sh  |  1 -
 6 files changed, 53 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 4f13884a9..ed38a5dfb 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,58 @@
 ## AscendSpeed
-DeepSpeed version of NVIDIA's Megatron-LM that adds additional support for several features such as MoE model training, Curriculum Learning, 3D Parallelism, and others. The ```AscendSpeed/examples/``` folder includes example scripts about the features supported by DeepSpeed.
+Acceleration library for large language models in Ascend platform, which is developed based on Megatron and Deepspeed. Currently, GPT and LLaMA with PTD parallel strategy are supported.
+
+
+### Quick Start
+1. Clone the repository to your local server:
+```bash
+git clone https://gitee.com/ascend/AscendSpeed.git 
+cd AscendSpeed 
+mkdir logs
+mkdir ckpt
+```
+
+2. Download [dataset](https://onebox.huawei.com/p/8cb41da9ed8ae1579fbc50d2a2bdebd7):
+```bash
+mv dataset AscendSpeed
+cd dataset
+tar -xzvf enwiki-gpt.tar.gz
+```
+
+3. Build conda envs:
+```bash
+conda create -n py37 python=3.7
+conda activate py37
+```
+
+4. Install pytorch and dependency: \
+(pytorch should be selected according to your server, X86 or Arm, "[2023060213](https://onebox.huawei.com/p/d79535eac8fb159399febca1afa81f95)" version is used here)
+```bash
+pip install apex-0.1_ascend_XXXX-cp37-cp37m-linux_XXXX.whl
+pip install torch-1.11.0+cpu-cp37-cp37m-linux_XXXXX.whl 
+pip install torch_npu-1.11.0.post1.devXXXX-cp37-cp37m-linux_XXXX.whl
+
+pip install -r requirements
+```
+
+5. Install deepspeed and adapter:
+(Deepspeed adapter v0.9.2 can also be downloaded from [here](https://onebox.huawei.com/p/6cdf01273f5d4edbd38a6da7b972c8f9))
+```bash
+pip install deepspeed==0.9.2
+git clone https://gitee.com/ascend/DeepSpeed.git  (Deepspeed adapter v0.9.2, not master!)
+cd DeepSpeed
+vim setup.py (line 24 form "required_dp_ver = '0.6.0'" to "required_dp_ver = '0.9.2'")
+python setup.py develop
+```
+
+6. Start your task
+```bash
+sh examples/gpt_task/pretrain_gpt_ptd_8p.sh    (Demon gpt with 200M parameters)
+sh examples/llama_task/pretrain_llama_ptd_8p.sh    (Demon llama with 250M parameters)
+```
+
+
+
 
-### Run on Azure and AzureML
-To try out DeepSpeed on Azure, this fork of Megatron offers easy-to-use recipes and bash scripts. We strongly recommend to start with AzureML recipe in the ```examples/azureml``` folder. If you have a custom infrastructure (e.g. HPC clusters) or Azure VM based environment, please refer to the bash scripts in the ```examples/azure``` folder. 
 
 Below is Megatron-LM's original README:
 ------
diff --git a/examples/gpt_task/pretrain_gpt_1p.sh b/examples/gpt_task/pretrain_gpt_1p.sh
index a15c047bf..10d1f0b0e 100644
--- a/examples/gpt_task/pretrain_gpt_1p.sh
+++ b/examples/gpt_task/pretrain_gpt_1p.sh
@@ -1,6 +1,5 @@
 # This is an example: basic gpt
 # without parameter specific and any parallel technologies
-
 export LD_LIBRARY_PATH=/usr/local/lib:/root/miniconda3/lib:$LD_LIBRARY_PATH
 export HCCL_CONNECT_TIMEOUT=1200
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
diff --git a/examples/gpt_task/pretrain_gpt_td_8p.sh b/examples/gpt_task/pretrain_gpt_td_8p.sh
index a965a80e1..9b973a0a9 100644
--- a/examples/gpt_task/pretrain_gpt_td_8p.sh
+++ b/examples/gpt_task/pretrain_gpt_td_8p.sh
@@ -1,6 +1,5 @@
 # This is an example: train gpt using TD,
 # the number of parameters is not aligned
-
 export LD_LIBRARY_PATH=/usr/local/lib:/root/miniconda3/lib:$LD_LIBRARY_PATH
 export HCCL_CONNECT_TIMEOUT=1200
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
diff --git a/examples/llama_task/pretrain_llama_1p.sh b/examples/llama_task/pretrain_llama_1p.sh
index 9d03d7412..735d0c8df 100644
--- a/examples/llama_task/pretrain_llama_1p.sh
+++ b/examples/llama_task/pretrain_llama_1p.sh
@@ -1,6 +1,5 @@
 # This is an example: basic llama
 # without parameter specific and any parallel technologies
-
 export LD_LIBRARY_PATH=/usr/local/lib:/root/miniconda3/lib:$LD_LIBRARY_PATH
 export HCCL_CONNECT_TIMEOUT=1200
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
diff --git a/examples/llama_task/pretrain_llama_ptd_8p.sh b/examples/llama_task/pretrain_llama_ptd_8p.sh
index 4924f730c..73efe04e2 100644
--- a/examples/llama_task/pretrain_llama_ptd_8p.sh
+++ b/examples/llama_task/pretrain_llama_ptd_8p.sh
@@ -1,6 +1,5 @@
 # This is an example: train llama using PTD,
 # the number of parameters is not aligned
-
 export LD_LIBRARY_PATH=/usr/local/lib:/usr/local/lib:/root/miniconda3/lib:$LD_LIBRARY_PATH
 export HCCL_CONNECT_TIMEOUT=1200
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
diff --git a/examples/llama_task/pretrain_llama_td_8p.sh b/examples/llama_task/pretrain_llama_td_8p.sh
index fe1545e60..9cfec2938 100644
--- a/examples/llama_task/pretrain_llama_td_8p.sh
+++ b/examples/llama_task/pretrain_llama_td_8p.sh
@@ -1,6 +1,5 @@
 # This is an example: train llama using TD,
 # the number of parameters is not aligned
-
 export LD_LIBRARY_PATH=/usr/local/lib:/root/miniconda3/lib:$LD_LIBRARY_PATH
 export HCCL_CONNECT_TIMEOUT=1200
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-- 
Gitee