From 89b3fface1f1a91602c94c22916b3018e7a8c6d9 Mon Sep 17 00:00:00 2001
From: tronzhang <zhangzhaochuang@huawei.com>
Date: Tue, 30 Sep 2025 16:28:36 +0800
Subject: [PATCH] donot support structed output now

---
 vllm_mindspore/__init__.py            |  5 ++-
 vllm_mindspore/config.py              | 33 ------------------
 vllm_mindspore/v1/engine/processor.py | 50 +++++++++++++++++++++++++++
 3 files changed, 54 insertions(+), 34 deletions(-)
 create mode 100644 vllm_mindspore/v1/engine/processor.py

diff --git a/vllm_mindspore/__init__.py b/vllm_mindspore/__init__.py
index efd917b0a..58c78e076 100644
--- a/vllm_mindspore/__init__.py
+++ b/vllm_mindspore/__init__.py
@@ -282,7 +282,7 @@ vllm.v1.utils.CoreEngineActorManager.__init__ = core_engine_actor_manager_init
 from .config import (_verify_quantization, _verify_args, vllm_config_post_init,
                      vllm_config_get_quantization_config, model_post_init,
                      _get_and_verify_dtype, stateless_init_dp_group,
-                     has_unfinished_dp, v1_process_validate_sampling_params)
+                     has_unfinished_dp)
 
 vllm.config.ModelConfig._verify_quantization = _verify_quantization
 vllm.config.VllmConfig.__post_init__ = vllm_config_post_init
@@ -571,8 +571,11 @@ DPAsyncMPClient.get_core_engine_for_request = get_core_engine_for_request
 DPAsyncMPClient.add_request_async = add_request_async
 DPAsyncMPClient.process_engine_outputs = staticmethod(process_engine_outputs)
 
+from vllm_mindspore.v1.engine.processor import (
+    v1_process_validate_sampling_params, v1_process_validate_structured_output)
 from vllm.v1.engine.processor import Processor
 
 Processor._validate_sampling_params = v1_process_validate_sampling_params
+Processor._validate_structured_output = v1_process_validate_structured_output
 
 check_ready()
diff --git a/vllm_mindspore/config.py b/vllm_mindspore/config.py
index a8beae818..7f93164b6 100644
--- a/vllm_mindspore/config.py
+++ b/vllm_mindspore/config.py
@@ -34,8 +34,6 @@ from vllm.config import (_STR_DTYPE_TO_TORCH_DTYPE, CacheConfig,
                          CompilationConfig, CompilationLevel, VllmConfig,
                          _find_dtype, _resolve_auto_dtype, get_attr_docs)
 from vllm.logger import init_logger
-from vllm.lora.request import LoRARequest
-from vllm.sampling_params import SamplingParams
 from vllm.utils import random_uuid
 
 from vllm_mindspore.utils import is_310p
@@ -465,34 +463,3 @@ class _CacheConfig(CacheConfig):
     """Data type for kv cache storage. If "auto", will use model data type.
     CUDA 11.8+ supports fp8 (=fp8_e4m3) and fp8_e5m2. ROCm (AMD GPU) supports
     fp8 (=fp8_e4m3)."""
-
-
-def v1_process_validate_sampling_params(
-    self,
-    params: SamplingParams,
-    lora_request: Optional[LoRARequest],
-) -> None:
-
-    model_config = self.vllm_config.model_config
-    vocab_size = model_config.get_vocab_size()
-    if params.top_k > vocab_size:
-        raise ValueError(
-            f"top_k cannot be greater than vocabulary size({vocab_size}), "
-            f"but got {params.top_k}.")
-    scheduler_config = self.vllm_config.scheduler_config
-    max_num_seqs = scheduler_config.max_num_seqs
-    if params.n > max_num_seqs:
-        raise ValueError(f"SchedulerConfig.n cannot be greater than "
-                         f"max_num_seqs({max_num_seqs}), but got {params.n}.")
-
-    self._validate_structured_output(params)
-    self._validate_logit_bias(params)
-
-    if params.allowed_token_ids is None:
-        return
-    if not params.allowed_token_ids:
-        raise ValueError("allowed_token_ids is not None and empty!")
-    tokenizer = self.tokenizer.get_lora_tokenizer(lora_request)
-    vocab_size = len(tokenizer)
-    if not all(0 <= tid < vocab_size for tid in params.allowed_token_ids):
-        raise ValueError("allowed_token_ids contains out-of-vocab token id!")
diff --git a/vllm_mindspore/v1/engine/processor.py b/vllm_mindspore/v1/engine/processor.py
new file mode 100644
index 000000000..16033bac2
--- /dev/null
+++ b/vllm_mindspore/v1/engine/processor.py
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright 2025 Huawei Technologies Co., Ltd.
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+# Functions are adapted from
+# https://github.com/vllm-project/vllm/blob/v0.9.1/vllm/v1/engine/processor.py
+
+from typing import Optional
+
+from vllm.lora.request import LoRARequest
+from vllm.sampling_params import SamplingParams
+
+
+def v1_process_validate_sampling_params(
+    self,
+    params: SamplingParams,
+    lora_request: Optional[LoRARequest],
+) -> None:
+
+    model_config = self.vllm_config.model_config
+    vocab_size = model_config.get_vocab_size()
+    if params.top_k > vocab_size:
+        raise ValueError(
+            f"top_k cannot be greater than vocabulary size({vocab_size}), "
+            f"but got {params.top_k}.")
+    scheduler_config = self.vllm_config.scheduler_config
+    max_num_seqs = scheduler_config.max_num_seqs
+    if params.n > max_num_seqs:
+        raise ValueError(f"SchedulerConfig.n cannot be greater than "
+                         f"max_num_seqs({max_num_seqs}), but got {params.n}.")
+
+    self._validate_structured_output(params)
+    self._validate_logit_bias(params)
+
+    if params.allowed_token_ids is None:
+        return
+    if not params.allowed_token_ids:
+        raise ValueError("allowed_token_ids is not None and empty!")
+    tokenizer = self.tokenizer.get_lora_tokenizer(lora_request)
+    vocab_size = len(tokenizer)
+    if not all(0 <= tid < vocab_size for tid in params.allowed_token_ids):
+        raise ValueError("allowed_token_ids contains out-of-vocab token id!")
+
+
+def v1_process_validate_structured_output(self,
+                                          params: SamplingParams) -> None:
+    if not params.guided_decoding or not self.decoding_config:
+        return
+    raise ValueError(
+        "vLLM-MindSpore Plugin does not support structured output now.")
-- 
Gitee