From 684968c1c475a2ee20175bd7f917bd16d0ce6586 Mon Sep 17 00:00:00 2001
From: tronzhang <zhangzhaochuang@huawei.com>
Date: Sat, 11 Oct 2025 14:43:37 +0800
Subject: [PATCH] add condition to control whether dispatching requests only by
 p0 or not

---
 vllm_mindspore/__init__.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/vllm_mindspore/__init__.py b/vllm_mindspore/__init__.py
index efd917b0..2bfd6754 100644
--- a/vllm_mindspore/__init__.py
+++ b/vllm_mindspore/__init__.py
@@ -22,6 +22,8 @@ import warnings
 import msadapter  # noqa: F401
 from vllm_mindspore.ray_patch import patch_ray
 
+is_dispatch_req_all_depend_p0 = True
+
 patch_ray()
 
 if "vllm" in sys.modules:
@@ -565,11 +567,15 @@ vllm.entrypoints.cli.serve.CoreEngine = MsCoreEngine
 vllm.v1.engine.core_client.CoreEngine = MsCoreEngine
 vllm.v1.utils.CoreEngine = MsCoreEngine
 
-from vllm.v1.engine.core_client import DPAsyncMPClient
+if is_dispatch_req_all_depend_p0:
+    # Dispatch the request based on the status stored on p0,
+    # instead of load-balance statuses published by p1.
+    from vllm.v1.engine.core_client import DPAsyncMPClient
 
-DPAsyncMPClient.get_core_engine_for_request = get_core_engine_for_request
-DPAsyncMPClient.add_request_async = add_request_async
-DPAsyncMPClient.process_engine_outputs = staticmethod(process_engine_outputs)
+    DPAsyncMPClient.get_core_engine_for_request = get_core_engine_for_request
+    DPAsyncMPClient.add_request_async = add_request_async
+    DPAsyncMPClient.process_engine_outputs = staticmethod(
+        process_engine_outputs)
 
 from vllm.v1.engine.processor import Processor
 
-- 
Gitee