diff --git a/tests/st/python/cases_parallel/vllm_qwen2_5_vl_7b_v1.py b/tests/st/python/cases_parallel/vllm_qwen2_5_vl_7b_v1.py index daffc4782158015008df32c2ea7a757e211e39ba..5dcf3030025109f9bd30baddbf5185491db2b7b6 100644 --- a/tests/st/python/cases_parallel/vllm_qwen2_5_vl_7b_v1.py +++ b/tests/st/python/cases_parallel/vllm_qwen2_5_vl_7b_v1.py @@ -52,6 +52,12 @@ PROMPT_TEMPLATE = ( "What is in the image?<|im_end|>\n" "<|im_start|>assistant\n") +PROMPT_TEMPLATE_2 = ( + "<|im_start|>system\nYou are a helpful assistant.<|im_end|>" + "\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>" + "Is there anyone in the picture?<|im_end|>\n" + "<|im_start|>assistant\n") + video_path = "/home/workspace/mindspore_dataset/video_file/korean_eating.mp4" model_path = "/home/workspace/mindspore_dataset/weight/Qwen2.5-VL-7B-Instruct" @@ -75,12 +81,20 @@ def generate_llm_engine(enforce_eager=False, tensor_parallel_size=1): def forward_and_check(llm): - inputs = [{ - "prompt": PROMPT_TEMPLATE, - "multi_modal_data": { - "image": pil_image() + inputs = [ + { + "prompt": PROMPT_TEMPLATE, + "multi_modal_data": { + "image": pil_image() + }, }, - }] + { + "prompt": PROMPT_TEMPLATE_2, + "multi_modal_data": { + "image": pil_image() + }, + }, + ] # Create a sampling params object. sampling_params = SamplingParams(temperature=0.0, max_tokens=128, top_k=1) diff --git a/vllm_mindspore/model_executor/models/registry.py b/vllm_mindspore/model_executor/models/registry.py index f3564e0f220f6499aaae8e0e906efabf3e0aec1d..cd6d92537e09913dfc93a911e3e27034a5af381a 100644 --- a/vllm_mindspore/model_executor/models/registry.py +++ b/vllm_mindspore/model_executor/models/registry.py @@ -38,7 +38,8 @@ try: if name.startswith("mcore_") ] except ImportError as e: - logger.warning("Error when importing MindSpore Transformers: %s", e) + logger.info("Can't get model support list from MindSpore Transformers: %s", + e) if is_mindformers_model_backend(): raise ImportError from e mf_supported = False @@ -48,7 +49,7 @@ try: from mindone import transformers # noqa: F401 mindone_supported = True except ImportError as e: - logger.warning("Error when importing MindSpore ONE: %s", e) + logger.info("No MindSpore ONE: %s", e) if is_mindone_model_backend(): raise ImportError from e mindone_supported = False diff --git a/vllm_mindspore/platforms/ascend.py b/vllm_mindspore/platforms/ascend.py index 8e39c3d1560d7de3b653ac22eefd5fdc51cbd3ea..0df8f3e8548f829157c408c3db2fa0923c4b7727 100644 --- a/vllm_mindspore/platforms/ascend.py +++ b/vllm_mindspore/platforms/ascend.py @@ -100,7 +100,8 @@ class AscendPlatform(Platform): cache_config = vllm_config.cache_config if cache_config and cache_config.block_size is None: - cache_config.block_size = 16 + # default value: 16 -> 128 for better performance + cache_config.block_size = 128 model_config = vllm_config.model_config model_config.disable_cascade_attn = True