From 70fd17c31b4fe3f31c4564e014ae05e5cd36dd8b Mon Sep 17 00:00:00 2001
From: superxf <xufeng97@huawei.com>
Date: Thu, 4 Dec 2025 14:57:14 +0800
Subject: [PATCH] support qwen3-vl 4B

---
 vllm_mindspore/model_executor/models/qwen2.py    | 13 ++++++++++++-
 vllm_mindspore/model_executor/models/qwen3_vl.py |  3 ++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/vllm_mindspore/model_executor/models/qwen2.py b/vllm_mindspore/model_executor/models/qwen2.py
index 925f97cc8..50a00c99d 100644
--- a/vllm_mindspore/model_executor/models/qwen2.py
+++ b/vllm_mindspore/model_executor/models/qwen2.py
@@ -60,6 +60,7 @@ from vllm_mindspore.model_executor.models.model_base import (NativeModel)
 from vllm_mindspore.model_executor.models.utils import (
     PPMissingLayer, make_empty_intermediate_tensors_factory, make_layers,
     maybe_prefix)
+from vllm_mindspore.utils import is_310p
 
 
 class Qwen2MLP(nn.Cell):
@@ -411,7 +412,17 @@ class Qwen2Model(nn.Cell):
                                             default_weight_loader)
                     weight_loader(param, loaded_weight)
                     loaded_params.add(name)
-
+                    # lm_head and embed weight require different handling
+                    # in 310p, therefore lm_head weight need load separately.
+                    if (self.config.tie_word_embeddings
+                            and "embed_tokens" in name
+                            and "lm_head.weight" in params_dict):
+                        lm_head_key = "lm_head.weight"
+                        if is_310p():
+                            lm_head_param = params_dict[lm_head_key]
+                            weight_loader(lm_head_param, loaded_weight)
+
+                        loaded_params.add(lm_head_key)
         return loaded_params
 
 
diff --git a/vllm_mindspore/model_executor/models/qwen3_vl.py b/vllm_mindspore/model_executor/models/qwen3_vl.py
index def6f57d9..d41435cef 100644
--- a/vllm_mindspore/model_executor/models/qwen3_vl.py
+++ b/vllm_mindspore/model_executor/models/qwen3_vl.py
@@ -1035,7 +1035,8 @@ class Qwen3LLMForCausalLM(Qwen3ForCausalLM):
         self.lm_head = ParallelLMHead(config.vocab_size,
                                       config.hidden_size,
                                       quant_config=quant_config)
-        if self.config.tie_word_embeddings:
+        # In 310p, lm_head and embed weight require different handling.
+        if self.config.tie_word_embeddings and not is_310p():
             self.lm_head.weight = self.model.embed_tokens.weight
         self.logits_processor = LogitsProcessor(config.vocab_size)
         self.make_empty_intermediate_tensors = (
-- 
Gitee