From d82dcf38e758f78fba5097e330c4b47ac6ebfa06 Mon Sep 17 00:00:00 2001 From: Erpim Date: Mon, 29 Sep 2025 21:19:23 +0800 Subject: [PATCH] fix ep pin_memory --- .jenkins/test/config/dependent_packages.yaml | 2 +- tests/mindformers | 2 +- tests/st/python/utils.py | 2 -- vllm_mindspore/lora/models.py | 9 +++++---- .../model_executor/models/mf_models/mindformers.py | 13 ++++++------- 5 files changed, 13 insertions(+), 15 deletions(-) diff --git a/.jenkins/test/config/dependent_packages.yaml b/.jenkins/test/config/dependent_packages.yaml index bb8a8003..8aad948e 100644 --- a/.jenkins/test/config/dependent_packages.yaml +++ b/.jenkins/test/config/dependent_packages.yaml @@ -5,7 +5,7 @@ mindspore_gs: 'https://repo.mindspore.cn/mindspore/golden-stick/version/202509/20250901/master_20250901221800_3e34fd43040b0c5d296e6bc1a82212deae3ee041_newest/' msadapter: - 'https://repo.mindspore.cn/mindspore/msadapter/version/202509/20250927/master_20250927010016_9d331d34b4371f874360a0ba2a9b8a543bd28964_newest/' + 'https://repo.mindspore.cn/mindspore/msadapter/version/202509/20250929/master_20250929203817_1b4f3bc61383eab75bd823ba591e15fd09afa24a_newest/' vllm: 'https://repo.mindspore.cn/mirrors/vllm/version/202507/20250715/v0.9.1/' diff --git a/tests/mindformers b/tests/mindformers index b06ca2b5..cd39c154 160000 --- a/tests/mindformers +++ b/tests/mindformers @@ -1 +1 @@ -Subproject commit b06ca2b51da0e10aa479ca1a6a289393eee7ca3e +Subproject commit cd39c1543c3c29b57da6e305f0902096a046be35 diff --git a/tests/st/python/utils.py b/tests/st/python/utils.py index ae3cb138..84354ea1 100644 --- a/tests/st/python/utils.py +++ b/tests/st/python/utils.py @@ -79,8 +79,6 @@ class EnvVarManager: else: os.environ["PYTHONPATH"] = mindformers_path - os.environ['MS_ENABLE_TRACE_MEMORY'] = "off" - # Update environments. for var_name, value in env_vars.items(): self.set_env_var(var_name, value) diff --git a/vllm_mindspore/lora/models.py b/vllm_mindspore/lora/models.py index 621f609a..7a687019 100644 --- a/vllm_mindspore/lora/models.py +++ b/vllm_mindspore/lora/models.py @@ -77,7 +77,8 @@ def from_lora_tensors( if embeddings_module: lora_embeddings_tensor = embeddings[ embedding_modules[embeddings_module]] - if pin_memory: + if pin_memory and \ + lora_embeddings_tensor._ms_device == "CPU": lora_embeddings_tensor = ( lora_embeddings_tensor.pin_memory()) loras[module_name] = LoRALayerWeights.from_config( @@ -87,12 +88,12 @@ def from_lora_tensors( # vllm-mindspore remove tensor device loras[module_name].bias = tensor.to(dtype=dtype).t() bias = tensor.to(dtype=dtype).t() - if pin_memory: + if pin_memory and bias._ms_device == "CPU": bias = bias.pin_memory() loras[module_name].bias = bias elif is_lora_a: loras[module_name].lora_a = tensor.to(dtype=dtype).t() - if pin_memory: + if pin_memory and loras[module_name].lora_a._ms_device == "CPU": loras[module_name].lora_a = loras[ module_name].lora_a.pin_memory() else: @@ -105,7 +106,7 @@ def from_lora_tensors( addition = target_embedding_padding - lora_b.shape[1] loras[module_name].lora_b = mint.nn.functional.pad( lora_b, (0, addition)) - if pin_memory: + if pin_memory and loras[module_name].lora_b._ms_device == "CPU": loras[module_name].lora_b = loras[ module_name].lora_b.pin_memory() diff --git a/vllm_mindspore/model_executor/models/mf_models/mindformers.py b/vllm_mindspore/model_executor/models/mf_models/mindformers.py index db2af89b..41d2a636 100644 --- a/vllm_mindspore/model_executor/models/mf_models/mindformers.py +++ b/vllm_mindspore/model_executor/models/mf_models/mindformers.py @@ -262,10 +262,10 @@ class MindFormersForCausalLM(MsModelBase, SupportsPP): (attn_padding_idx, attn_unpadding_idx, ffn_padding_idx, ffn_unpadding_idx) = self._get_padding_index(q_seq_lens) - model_inputs["attn_padding_idx"] = attn_padding_idx - model_inputs["attn_unpadding_idx"] = attn_unpadding_idx - model_inputs["ffn_padding_idx"] = ffn_padding_idx - model_inputs["ffn_unpadding_idx"] = ffn_unpadding_idx + model_inputs["attn_padding_idx"] = convert_pin(attn_padding_idx) + model_inputs["attn_unpadding_idx"] = convert_pin(attn_unpadding_idx) + model_inputs["ffn_padding_idx"] = convert_pin(ffn_padding_idx) + model_inputs["ffn_unpadding_idx"] = convert_pin(ffn_unpadding_idx) return model_inputs @@ -317,11 +317,10 @@ class MindFormersForCausalLM(MsModelBase, SupportsPP): is_prefill, position_ids, query_lens_np, seq_lens_np) model_inputs = {} - model_inputs["input_ids"] = convert_pin(input_ids.astype(ms.int32) * 1) + model_inputs["input_ids"] = convert_pin(input_ids.astype(ms.int32)) model_inputs["batch_valid_length"] = convert_pin( ms.from_numpy(seq_lens_np)) - model_inputs["block_tables"] = convert_pin(attn_metadata.block_tables * - 1) + model_inputs["block_tables"] = convert_pin(attn_metadata.block_tables) model_inputs["slot_mapping"] = convert_pin(attn_metadata.slot_mapping) model_inputs["positions"] = convert_pin(position_ids) model_inputs["q_seq_lens"] = convert_pin(q_seq_lens) -- Gitee