From d2441e030c791ad484f25425da43e22d0fcb762f Mon Sep 17 00:00:00 2001 From: mr-lin314 <798948055@qq.com> Date: Fri, 29 Aug 2025 15:22:01 +0800 Subject: [PATCH 1/4] [docs]add mindspeed_rl commit id --- examples/rl/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/rl/README.md b/examples/rl/README.md index 27443a61..592b4f4b 100644 --- a/examples/rl/README.md +++ b/examples/rl/README.md @@ -92,6 +92,7 @@ cd .. # 下载MindSpeed RL git clone https://gitee.com/ascend/MindSpeed-RL.git cd MindSpeed-RL +git checkout 6cb2e0d61eed19460d24575f65005930b8335329 pip install -r requirements.txt cp -r mindspeed_rl ../MindSpeed-MM/ cd .. -- Gitee From f15628caa1d5933fed6139d2c7c145419f97d71c Mon Sep 17 00:00:00 2001 From: mr-lin314 <798948055@qq.com> Date: Fri, 29 Aug 2025 16:20:51 +0800 Subject: [PATCH 2/4] [docs]add mindspeed_rl commit id --- examples/rl/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/rl/README.md b/examples/rl/README.md index 592b4f4b..e931d9e3 100644 --- a/examples/rl/README.md +++ b/examples/rl/README.md @@ -85,7 +85,6 @@ git clone https://gitee.com/ascend/MindSpeed.git cd MindSpeed # checkout commit from MindSpeed core_r0.12.1 git checkout 6d63944cb2470a0bebc38dfb65299b91329b8d92 -pip install -r requirements.txt cp -r mindspeed ../MindSpeed-MM/ cd .. @@ -135,6 +134,7 @@ pip install numpy==1.26.4 source /usr/local/Ascend/ascend-toolkit/set_env.sh source /usr/local/Ascend/nnal/atb/set_env.sh cd vllm-ascend +pip install -r requirements-dev.txt # 因为前面已经安装了对应的torch_npu版本,这里需要将vllm_ascend文件中的requirements.txt中的torch-npu==2.5.1注释 # 安装时若编译失败,可以关闭编译:export COMPILE_CUSTOM_KERNELS=0 python setup.py develop -- Gitee From 07a0bea35eebbae59e3626f02bbb6dbeae8196c2 Mon Sep 17 00:00:00 2001 From: mr-lin314 <798948055@qq.com> Date: Fri, 29 Aug 2025 17:00:26 +0800 Subject: [PATCH 3/4] [docs]add mindspeed_rl commit id --- examples/rl/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/rl/README.md b/examples/rl/README.md index e931d9e3..5890b5c1 100644 --- a/examples/rl/README.md +++ b/examples/rl/README.md @@ -56,6 +56,8 @@ cp -r megatron ../MindSpeed-MM/ cd .. cd MindSpeed-MM +# 安装mm-convert工具 +pip install -e . --no-deps mkdir -p logs data ckpt cd .. ``` @@ -97,8 +99,6 @@ cp -r mindspeed_rl ../MindSpeed-MM/ cd .. cd MindSpeed-MM -# 安装mm-convert工具 -pip install -e . --no-deps # MindSpeed/Megatron 中文件替换(必选) bash examples/rl/scripts/copy_adaptor_to_mindspeed.sh cd .. -- Gitee From 78553d7b00afd69c16207eef83470699b82ec155 Mon Sep 17 00:00:00 2001 From: mr-lin314 <798948055@qq.com> Date: Sat, 30 Aug 2025 09:58:08 +0800 Subject: [PATCH 4/4] [docs]add mindspeed_rl commit id --- examples/rl/code/adaptor.py | 39 +++++++++++++++---------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/examples/rl/code/adaptor.py b/examples/rl/code/adaptor.py index 0c7660e4..5026fec1 100644 --- a/examples/rl/code/adaptor.py +++ b/examples/rl/code/adaptor.py @@ -29,6 +29,14 @@ def dot_product_attention_forward_impl( attention_bias: Tensor = None, packed_seq_params: Optional[PackedSeqParams] = None, ): + if packed_seq_params is None: + seq_length, bsz, n_head, head_dim = ( + query.shape[0], query.shape[1], query.shape[2], query.shape[3] + ) + else: + seq_length, n_head, head_dim = ( + query.shape[0], query.shape[1], query.shape[2] + ) use_remove_padding = getattr(self.config, 'use_remove_padding', False) if use_remove_padding: from mindspeed.utils import get_actual_seq_len @@ -50,10 +58,6 @@ def dot_product_attention_forward_impl( sparse_mode=3)[0].reshape(seq_length, bsz, -1) else: - seq_length, bsz, n_head, head_dim = ( - query.shape[0], query.shape[1], query.shape[2], query.shape[3] - ) - sparse_mode = self.config.sparse_mode if attn_mask_type == AttnMaskType.no_mask: sparse_mode = 0 # default mask @@ -64,17 +68,15 @@ def dot_product_attention_forward_impl( else self.softmax_scale ) - if packed_seq_params is not None: # TND - actual_seq_qlen = packed_seq_params.cu_seqlens_q.tolist() - actual_seq_kvlen = packed_seq_params.cu_seqlens_kv.tolist() - query, key, value = ( - [ - rearrange(x, 's b h d -> (b s) h d') - for x in [query, key, value] - ] - ) + if packed_seq_params is not None: # TND + if isinstance(packed_seq_params.cu_seqlens_q, list): + actual_seq_qlen = packed_seq_params.cu_seqlens_q + actual_seq_kvlen = packed_seq_params.cu_seqlens_kv + else: + actual_seq_qlen = packed_seq_params.cu_seqlens_q.tolist() + actual_seq_kvlen = packed_seq_params.cu_seqlens_kv.tolist() shape_order = 'TND' - else: # SBH + else: # SBH actual_seq_qlen = None actual_seq_kvlen = None query, key, value = ( @@ -102,13 +104,4 @@ def dot_product_attention_forward_impl( actual_seq_kvlen=actual_seq_kvlen )[0] - if packed_seq_params is not None: - output = ( - rearrange( - output, - '(b s) h d -> s b (h d)', - s=seq_length, b=bsz - ) - ) - return output -- Gitee