diff --git a/vllm_mindspore/v1/worker/gpu_model_runner.py b/vllm_mindspore/v1/worker/gpu_model_runner.py index 9b8ffd281ae42bde4c4c95e843e0f3e13a3c6092..b4ae7d2ba0a285cb88969e40e58ff7ce3e61ead6 100644 --- a/vllm_mindspore/v1/worker/gpu_model_runner.py +++ b/vllm_mindspore/v1/worker/gpu_model_runner.py @@ -509,7 +509,7 @@ def initialize_kv_cache_tensors( if is_310p(): kv_caches = _allocate_nz_kv_cache_tensors(self, kv_cache_config) elif getattr(getattr(self.vllm_config, "quant_config", None), \ - "fa3_quant", False): + "fa3_quant", False): kv_caches = _allocate_nz_kv_cache_tensors_fa3(self, kv_cache_config) else: # Initialize the memory buffer for KV cache