diff --git a/backport-CVE-2025-61620.patch b/backport-CVE-2025-61620.patch new file mode 100644 index 0000000000000000000000000000000000000000..f60827a1c057228d3d9928afd17bf73401264a96 --- /dev/null +++ b/backport-CVE-2025-61620.patch @@ -0,0 +1,297 @@ +From 7977e5027c2250a4abc1f474c5619c40b4e5682f Mon Sep 17 00:00:00 2001 +From: Russell Bryant +Date: Sat, 27 Sep 2025 06:46:49 -0400 +Subject: [PATCH] Add filtering for chat template kwargs (#25794) + +Signed-off-by: Isotr0py +Co-authored-by: Isotr0py +--- + tests/entrypoints/test_chat_utils.py | 85 +++++++++++++++++++++++++ + vllm/entrypoints/chat_utils.py | 55 +++++++++++++++- + vllm/entrypoints/openai/api_server.py | 1 + + vllm/entrypoints/openai/cli_args.py | 7 ++ + vllm/entrypoints/openai/serving_chat.py | 14 +++- + 5 files changed, 158 insertions(+), 4 deletions(-) + +diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py +index 4929466..7408406 100644 +--- a/tests/entrypoints/test_chat_utils.py ++++ b/tests/entrypoints/test_chat_utils.py +@@ -12,6 +12,7 @@ from vllm.entrypoints.chat_utils import (_try_extract_ast, load_chat_template, + parse_chat_messages, + parse_chat_messages_futures, + resolve_chat_template_content_format, ++ resolve_chat_template_kwargs, + resolve_hf_chat_template) + from vllm.entrypoints.llm import apply_hf_chat_template + from vllm.multimodal import MultiModalDataDict +@@ -28,6 +29,7 @@ ULTRAVOX_MODEL_ID = "fixie-ai/ultravox-v0_5-llama-3_2-1b" + QWEN2AUDIO_MODEL_ID = "Qwen/Qwen2-Audio-7B-Instruct" + QWEN2VL_MODEL_ID = "Qwen/Qwen2-VL-2B-Instruct" + QWEN25VL_MODEL_ID = "Qwen/Qwen2.5-VL-3B-Instruct" ++QWEN3_MODEL_ID = "Qwen/Qwen3-8B" + MLLAMA_MODEL_ID = "meta-llama/Llama-3.2-11B-Vision-Instruct" + LLAMA_GUARD_MODEL_ID = "meta-llama/Llama-Guard-3-1B" + HERMES_MODEL_ID = "NousResearch/Hermes-3-Llama-3.1-8B" +@@ -854,6 +856,89 @@ def test_resolve_hf_chat_template(sample_json_schema, model, use_tools): + assert isinstance(chat_template, str) + + ++@pytest.mark.parametrize( ++ "model, expected_kwargs", ++ [ ++ ( ++ QWEN2VL_MODEL_ID, ++ { ++ "add_vision_id", "add_generation_prompt", ++ "continue_final_message", "tools" ++ }, ++ ), ++ ( ++ QWEN3_MODEL_ID, ++ { ++ "enable_thinking", "add_generation_prompt", ++ "continue_final_message", "tools" ++ }, ++ ), ++ ], ++) ++def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, ++ expected_kwargs): ++ """checks that chat_template is a dict type for HF models.""" ++ model_info = HF_EXAMPLE_MODELS.find_hf_info(model) ++ model_info.check_available_online(on_fail="skip") ++ ++ tools = ([{ ++ "type": "function", ++ "function": { ++ "name": "dummy_function_name", ++ "description": "This is a dummy function", ++ "parameters": sample_json_schema, ++ }, ++ }]) ++ ++ chat_template_kwargs = { ++ # both unused ++ "unsed_kwargs_1": 123, ++ "unsed_kwargs_2": "abc", ++ # should not appear ++ "chat_template": "{% Hello world! %}", ++ # used by tokenizer ++ "continue_final_message": True, ++ "tools": tools, ++ # both used by Qwen2-VL and Qwen3 ++ "add_generation_prompt": True, ++ # only used by Qwen2-VL ++ "add_vision_id": True, ++ # only used by Qwen3 ++ "enable_thinking": True, ++ } ++ ++ model_config = ModelConfig( ++ model, ++ tokenizer=model_info.tokenizer or model, ++ tokenizer_mode=model_info.tokenizer_mode, ++ revision=model_info.revision, ++ trust_remote_code=model_info.trust_remote_code, ++ hf_overrides=model_info.hf_overrides, ++ skip_tokenizer_init=model_info.skip_tokenizer_init, ++ enforce_eager=model_info.enforce_eager, ++ dtype=model_info.dtype) ++ ++ # Build the tokenizer ++ tokenizer = get_tokenizer( ++ model, ++ trust_remote_code=model_config.trust_remote_code, ++ ) ++ ++ # Test detecting the tokenizer's chat_template ++ chat_template = resolve_hf_chat_template( ++ tokenizer, ++ chat_template=None, ++ tools=tools, ++ model_config=model_config, ++ ) ++ resolved_chat_template_kwargs = resolve_chat_template_kwargs( ++ tokenizer, ++ chat_template=chat_template, ++ chat_template_kwargs=chat_template_kwargs, ++ ) ++ assert set(resolved_chat_template_kwargs.keys()) == expected_kwargs ++ ++ + # NOTE: Qwen2-Audio default chat template is specially defined inside + # processor class instead of using `tokenizer_config.json` + # yapf: disable +diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py +index 95c806c..58f0b94 100644 +--- a/vllm/entrypoints/chat_utils.py ++++ b/vllm/entrypoints/chat_utils.py +@@ -11,7 +11,12 @@ from pathlib import Path + from typing import (Any, Callable, Generic, Literal, Optional, TypeVar, Union, + cast) + ++import jinja2 ++import jinja2.ext ++import jinja2.meta + import jinja2.nodes ++import jinja2.parser ++import jinja2.sandbox + import transformers.utils.chat_template_utils as hf_chat_utils + # yapf conflicts with isort for this block + # yapf: disable +@@ -45,7 +50,7 @@ from vllm.transformers_utils.chat_templates import ( + # yapf: enable + from vllm.transformers_utils.processor import cached_get_processor + from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer +-from vllm.utils import deprecate_kwargs, random_uuid ++from vllm.utils import deprecate_kwargs, random_uuid, supports_kw + + logger = init_logger(__name__) + +@@ -1206,6 +1211,46 @@ def parse_chat_messages_futures( + return conversation, mm_tracker.all_mm_data() + + ++# adapted from https://github.com/huggingface/transformers/blob/v4.56.2/src/transformers/utils/chat_template_utils.py#L398-L412 ++# only preserve the parse function used to resolve chat template kwargs ++class AssistantTracker(jinja2.ext.Extension): ++ tags = {"generation"} ++ ++ def parse(self, parser: jinja2.parser.Parser) -> jinja2.nodes.CallBlock: ++ lineno = next(parser.stream).lineno ++ body = parser.parse_statements(["name:endgeneration"], drop_needle=True) ++ call = self.call_method("_generation_support") ++ call_block = jinja2.nodes.CallBlock(call, [], [], body) ++ return call_block.set_lineno(lineno) ++ ++ ++def resolve_chat_template_kwargs( ++ tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast], ++ chat_template: str, ++ chat_template_kwargs: dict[str, Any], ++) -> dict[str, Any]: ++ fn_kw = { ++ k for k in chat_template_kwargs ++ if supports_kw(tokenizer.apply_chat_template, k, allow_var_kwargs=False) ++ } ++ ++ env = jinja2.sandbox.ImmutableSandboxedEnvironment( ++ trim_blocks=True, ++ lstrip_blocks=True, ++ extensions=[AssistantTracker, jinja2.ext.loopcontrols], ++ ) ++ parsed_content = env.parse(chat_template) ++ template_vars = jinja2.meta.find_undeclared_variables(parsed_content) ++ ++ # We exclude chat_template from kwargs here, because ++ # chat template has been already resolved at this stage ++ unexpected_vars = {"chat_template"} ++ accept_vars = (fn_kw | template_vars) - unexpected_vars ++ return { ++ k: v for k, v in chat_template_kwargs.items() if k in accept_vars ++ } ++ ++ + @deprecate_kwargs( + "trust_remote_code", + additional_message="Please use `model_config.trust_remote_code` instead.", +@@ -1236,13 +1281,17 @@ def apply_hf_chat_template( + "does not define one.") + + try: +- ++ resolved_kwargs = resolve_chat_template_kwargs( ++ tokenizer=tokenizer, ++ chat_template=hf_chat_template, ++ chat_template_kwargs=kwargs, ++ ) + return tokenizer.apply_chat_template( + conversation=conversation, # type: ignore[arg-type] + tools=tools, # type: ignore[arg-type] + chat_template=hf_chat_template, + tokenize=tokenize, +- **kwargs, ++ **resolved_kwargs, + ) + + # External library exceptions can sometimes occur despite the framework's +diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py +index 62f1c6a..887d55e 100644 +--- a/vllm/entrypoints/openai/api_server.py ++++ b/vllm/entrypoints/openai/api_server.py +@@ -1185,6 +1185,7 @@ async def init_app_state( + request_logger=request_logger, + chat_template=resolved_chat_template, + chat_template_content_format=args.chat_template_content_format, ++ trust_request_chat_template=args.trust_request_chat_template, + return_tokens_as_token_ids=args.return_tokens_as_token_ids, + enable_auto_tools=args.enable_auto_tool_choice, + tool_parser=args.tool_call_parser, +diff --git a/vllm/entrypoints/openai/cli_args.py b/vllm/entrypoints/openai/cli_args.py +index ca70e78..d462d82 100644 +--- a/vllm/entrypoints/openai/cli_args.py ++++ b/vllm/entrypoints/openai/cli_args.py +@@ -155,6 +155,13 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: + '* "openai" will render the content as a list of dictionaries, ' + 'similar to OpenAI schema. ' + 'Example: ``[{"type": "text", "text": "Hello world!"}]``') ++ parser.add_argument( ++ "--trust-request-chat-template", ++ action="store_true", ++ default=False, ++ help='Whether to trust the chat template provided in the request. If False,' ++ 'the server will always use the chat template specified by ``--chat-template``' ++ 'or the ones from tokenizer.') + parser.add_argument("--response-role", + type=optional_type(str), + default="assistant", +diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py +index 79eac18..7470c44 100644 +--- a/vllm/entrypoints/openai/serving_chat.py ++++ b/vllm/entrypoints/openai/serving_chat.py +@@ -59,6 +59,7 @@ class OpenAIServingChat(OpenAIServing): + request_logger: Optional[RequestLogger], + chat_template: Optional[str], + chat_template_content_format: ChatTemplateContentFormatOption, ++ trust_request_chat_template: bool = False, + return_tokens_as_token_ids: bool = False, + reasoning_parser: str = "", + enable_auto_tools: bool = False, +@@ -74,6 +75,7 @@ class OpenAIServingChat(OpenAIServing): + self.response_role = response_role + self.chat_template = chat_template + self.chat_template_content_format: Final = chat_template_content_format ++ self.trust_request_chat_template = trust_request_chat_template + + # set up tool use + self.enable_auto_tools: bool = enable_auto_tools +@@ -176,6 +178,16 @@ class OpenAIServingChat(OpenAIServing): + tool.model_dump() for tool in request.tools + ] + ++ request_chat_template = request.chat_template ++ chat_template_kwargs = request.chat_template_kwargs ++ if not self.trust_request_chat_template and ( ++ request_chat_template is not None or ++ (chat_template_kwargs and ++ chat_template_kwargs.get("chat_template") is not None)): ++ return self.create_error_response( ++ "Chat template is passed with request, but " ++ "--trust-request-chat-template is not set. " ++ "Refused request with untrusted chat template.") + ( + conversation, + request_prompts, +@@ -184,7 +196,7 @@ class OpenAIServingChat(OpenAIServing): + request, + tokenizer, + request.messages, +- chat_template=request.chat_template or self.chat_template, ++ chat_template=request_chat_template or self.chat_template, + chat_template_content_format=self.chat_template_content_format, + add_generation_prompt=request.add_generation_prompt, + continue_final_message=request.continue_final_message, +-- +2.43.0 + diff --git a/vllm.spec b/vllm.spec index 3ebe76062fc164bf2a2807874c306f91d1f5c480..f8ca89642af7c6f083bd7368dc1c74312e0ac970 100644 --- a/vllm.spec +++ b/vllm.spec @@ -3,12 +3,14 @@ Name: vllm Version: 0.9.1 -Release: 1 +Release: 2 Summary: Powerful engine for LLMs License: (Apache-2.0 AND BSD-3-Clause) OR BSD-3-CLause URL: https://github.com/vllm-project/vllm Source0: https://gitee.com/src-openeuler/vllm/raw/master/vllm-%{version}.tar.gz +Patch0001: backport-CVE-2025-61620.patch + BuildArch: noarch %description @@ -28,6 +30,7 @@ Buildrequires: python3-pytorch %prep %autosetup -n %{name}-%{version} -N +%autopatch -p1 %build export SETUPTOOLS_SCM_PRETEND_VERSION=%{version} @@ -69,6 +72,12 @@ mv %{buildroot}/filelist.lst . %files -n python3-%{_name} -f filelist.lst %changelog +* Thu Oct 9 2025 wangziliang - 0.9.1-2 +- Type:CVE +- CVE:CVE-2025-61620 +- SUG:NA +- DESC:fix CVE-2025-61620 + * Fri Jul 4 2025 gongzequn - 0.9.1-1 - Change the baseline version to 0.9.1