diff --git a/vllm_mindspore/model_executor/models/qwen2.py b/vllm_mindspore/model_executor/models/qwen2.py index 925f97cc8c6a53ea03a871dbc88a348ee3394fa8..50a00c99df56ef8d6ca4b9f2d7f793065477abc8 100644 --- a/vllm_mindspore/model_executor/models/qwen2.py +++ b/vllm_mindspore/model_executor/models/qwen2.py @@ -60,6 +60,7 @@ from vllm_mindspore.model_executor.models.model_base import (NativeModel) from vllm_mindspore.model_executor.models.utils import ( PPMissingLayer, make_empty_intermediate_tensors_factory, make_layers, maybe_prefix) +from vllm_mindspore.utils import is_310p class Qwen2MLP(nn.Cell): @@ -411,7 +412,17 @@ class Qwen2Model(nn.Cell): default_weight_loader) weight_loader(param, loaded_weight) loaded_params.add(name) - + # lm_head and embed weight require different handling + # in 310p, therefore lm_head weight need load separately. + if (self.config.tie_word_embeddings + and "embed_tokens" in name + and "lm_head.weight" in params_dict): + lm_head_key = "lm_head.weight" + if is_310p(): + lm_head_param = params_dict[lm_head_key] + weight_loader(lm_head_param, loaded_weight) + + loaded_params.add(lm_head_key) return loaded_params diff --git a/vllm_mindspore/model_executor/models/qwen3_vl.py b/vllm_mindspore/model_executor/models/qwen3_vl.py index def6f57d93fe939eb56579e2257f41d6bccfdd17..d41435cef2fde84127ba43f494497f0e480b9bb8 100644 --- a/vllm_mindspore/model_executor/models/qwen3_vl.py +++ b/vllm_mindspore/model_executor/models/qwen3_vl.py @@ -1035,7 +1035,8 @@ class Qwen3LLMForCausalLM(Qwen3ForCausalLM): self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size, quant_config=quant_config) - if self.config.tie_word_embeddings: + # In 310p, lm_head and embed weight require different handling. + if self.config.tie_word_embeddings and not is_310p(): self.lm_head.weight = self.model.embed_tokens.weight self.logits_processor = LogitsProcessor(config.vocab_size) self.make_empty_intermediate_tensors = (