diff --git a/vllm_mindspore/model_executor/models/model_base.py b/vllm_mindspore/model_executor/models/model_base.py index 421e039c511775bf098a9a0e15b3bff83592fa55..e877479f870709c5599da8c9a8647218693a24ea 100644 --- a/vllm_mindspore/model_executor/models/model_base.py +++ b/vllm_mindspore/model_executor/models/model_base.py @@ -78,7 +78,7 @@ class MLAAttentionWrapper(AttentionWrapper): if not self.use_ringmla: self.kv_cache = [ ( - ms.mint.zeros( + create_kv_cache( self.kv_shape, # type: ignore[misc] dtype=vllm_config.model_config.dtype), ) for _ in range(vllm_config.parallel_config.pipeline_parallel_size)