diff --git a/vllm_mindspore/model_executor/layers/quantization/smooth_quant_modelslim.py b/vllm_mindspore/model_executor/layers/quantization/smooth_quant_modelslim.py index afd71bf531ebb6f332d4260fb0cdd95b38507085..c72c9f1c58ec536adc295622a2fc05b6e8a10102 100644 --- a/vllm_mindspore/model_executor/layers/quantization/smooth_quant_modelslim.py +++ b/vllm_mindspore/model_executor/layers/quantization/smooth_quant_modelslim.py @@ -91,7 +91,7 @@ class SmoothQuantModelSlimConfig(QuantizationConfig): @staticmethod def get_config_filenames() -> list[str]: - return ["quant_model_description.json"] + return ["quant_model_description.json", "quantization_description.json"] @classmethod def from_config(cls, config: dict[str, @@ -124,7 +124,7 @@ class SmoothQuantModelSlimConfig(QuantizationConfig): def _build_layer_quant_key(prefix: str) -> str: # Split the fused qkv projection into the standard q projection. - prefix = prefix.replace("language_model.model", "model") + prefix = prefix.replace("language_model.model", "model.language_model") prefix = prefix.replace("qkv_proj", "q_proj") # Collapse gate+up projection to the canonical gate projection. prefix = prefix.replace("gate_up_proj", "gate_proj") diff --git a/vllm_mindspore/model_executor/models/glm4_1v.py b/vllm_mindspore/model_executor/models/glm4_1v.py index baaef706e833840153ed1a7598af5eaacaeb6b56..d1eb246aa6b12246ee44ea5f5b3e2da5af304d7d 100644 --- a/vllm_mindspore/model_executor/models/glm4_1v.py +++ b/vllm_mindspore/model_executor/models/glm4_1v.py @@ -1529,11 +1529,6 @@ class Glm4vForConditionalGeneration(NativeModel, SupportsMultiModal): if "visual." in name: self.visual.load_weights([(name, weight)], params_dict) else: - if "language_model" not in name: - name = name.replace("model.layers", "model.language_model.layers") - name = name.replace("model.norm", "model.language_model.norm") - name = name.replace("model.embed_tokens", "model.language_model.embed_tokens") - name = name.replace("output_layer", "lm_head") self.model.load_weights([(name, weight)], params_dict) return None