From 70fd17c31b4fe3f31c4564e014ae05e5cd36dd8b Mon Sep 17 00:00:00 2001 From: superxf Date: Thu, 4 Dec 2025 14:57:14 +0800 Subject: [PATCH] support qwen3-vl 4B --- vllm_mindspore/model_executor/models/qwen2.py | 13 ++++++++++++- vllm_mindspore/model_executor/models/qwen3_vl.py | 3 ++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/vllm_mindspore/model_executor/models/qwen2.py b/vllm_mindspore/model_executor/models/qwen2.py index 925f97cc8..50a00c99d 100644 --- a/vllm_mindspore/model_executor/models/qwen2.py +++ b/vllm_mindspore/model_executor/models/qwen2.py @@ -60,6 +60,7 @@ from vllm_mindspore.model_executor.models.model_base import (NativeModel) from vllm_mindspore.model_executor.models.utils import ( PPMissingLayer, make_empty_intermediate_tensors_factory, make_layers, maybe_prefix) +from vllm_mindspore.utils import is_310p class Qwen2MLP(nn.Cell): @@ -411,7 +412,17 @@ class Qwen2Model(nn.Cell): default_weight_loader) weight_loader(param, loaded_weight) loaded_params.add(name) - + # lm_head and embed weight require different handling + # in 310p, therefore lm_head weight need load separately. + if (self.config.tie_word_embeddings + and "embed_tokens" in name + and "lm_head.weight" in params_dict): + lm_head_key = "lm_head.weight" + if is_310p(): + lm_head_param = params_dict[lm_head_key] + weight_loader(lm_head_param, loaded_weight) + + loaded_params.add(lm_head_key) return loaded_params diff --git a/vllm_mindspore/model_executor/models/qwen3_vl.py b/vllm_mindspore/model_executor/models/qwen3_vl.py index def6f57d9..d41435cef 100644 --- a/vllm_mindspore/model_executor/models/qwen3_vl.py +++ b/vllm_mindspore/model_executor/models/qwen3_vl.py @@ -1035,7 +1035,8 @@ class Qwen3LLMForCausalLM(Qwen3ForCausalLM): self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size, quant_config=quant_config) - if self.config.tie_word_embeddings: + # In 310p, lm_head and embed weight require different handling. + if self.config.tie_word_embeddings and not is_310p(): self.lm_head.weight = self.model.embed_tokens.weight self.logits_processor = LogitsProcessor(config.vocab_size) self.make_empty_intermediate_tensors = ( -- Gitee