From 4814f99c1ac1131fb614cf87e82cf56de4c2e983 Mon Sep 17 00:00:00 2001 From: zlq2020 Date: Mon, 22 Sep 2025 09:07:23 +0800 Subject: [PATCH] change block-size default value --- .../cases_parallel/vllm_qwen2_5_vl_7b_v1.py | 24 +++++++++++++++---- .../model_executor/models/registry.py | 5 ++-- vllm_mindspore/platforms/ascend.py | 3 ++- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/tests/st/python/cases_parallel/vllm_qwen2_5_vl_7b_v1.py b/tests/st/python/cases_parallel/vllm_qwen2_5_vl_7b_v1.py index daffc478..5dcf3030 100644 --- a/tests/st/python/cases_parallel/vllm_qwen2_5_vl_7b_v1.py +++ b/tests/st/python/cases_parallel/vllm_qwen2_5_vl_7b_v1.py @@ -52,6 +52,12 @@ PROMPT_TEMPLATE = ( "What is in the image?<|im_end|>\n" "<|im_start|>assistant\n") +PROMPT_TEMPLATE_2 = ( + "<|im_start|>system\nYou are a helpful assistant.<|im_end|>" + "\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>" + "Is there anyone in the picture?<|im_end|>\n" + "<|im_start|>assistant\n") + video_path = "/home/workspace/mindspore_dataset/video_file/korean_eating.mp4" model_path = "/home/workspace/mindspore_dataset/weight/Qwen2.5-VL-7B-Instruct" @@ -75,12 +81,20 @@ def generate_llm_engine(enforce_eager=False, tensor_parallel_size=1): def forward_and_check(llm): - inputs = [{ - "prompt": PROMPT_TEMPLATE, - "multi_modal_data": { - "image": pil_image() + inputs = [ + { + "prompt": PROMPT_TEMPLATE, + "multi_modal_data": { + "image": pil_image() + }, }, - }] + { + "prompt": PROMPT_TEMPLATE_2, + "multi_modal_data": { + "image": pil_image() + }, + }, + ] # Create a sampling params object. sampling_params = SamplingParams(temperature=0.0, max_tokens=128, top_k=1) diff --git a/vllm_mindspore/model_executor/models/registry.py b/vllm_mindspore/model_executor/models/registry.py index f3564e0f..cd6d9253 100644 --- a/vllm_mindspore/model_executor/models/registry.py +++ b/vllm_mindspore/model_executor/models/registry.py @@ -38,7 +38,8 @@ try: if name.startswith("mcore_") ] except ImportError as e: - logger.warning("Error when importing MindSpore Transformers: %s", e) + logger.info("Can't get model support list from MindSpore Transformers: %s", + e) if is_mindformers_model_backend(): raise ImportError from e mf_supported = False @@ -48,7 +49,7 @@ try: from mindone import transformers # noqa: F401 mindone_supported = True except ImportError as e: - logger.warning("Error when importing MindSpore ONE: %s", e) + logger.info("No MindSpore ONE: %s", e) if is_mindone_model_backend(): raise ImportError from e mindone_supported = False diff --git a/vllm_mindspore/platforms/ascend.py b/vllm_mindspore/platforms/ascend.py index 8e39c3d1..0df8f3e8 100644 --- a/vllm_mindspore/platforms/ascend.py +++ b/vllm_mindspore/platforms/ascend.py @@ -100,7 +100,8 @@ class AscendPlatform(Platform): cache_config = vllm_config.cache_config if cache_config and cache_config.block_size is None: - cache_config.block_size = 16 + # default value: 16 -> 128 for better performance + cache_config.block_size = 128 model_config = vllm_config.model_config model_config.disable_cascade_attn = True -- Gitee