From 4dbb5be5b93c9ca839d71d4a8e93a560b78eccf2 Mon Sep 17 00:00:00 2001
From: Dwade <wangchunyang18@huawei.com>
Date: Thu, 10 Apr 2025 12:44:02 +0800
Subject: [PATCH] resource-isolation-v073

---
 resource-isolation-v073.patch | 111 ++++++++++++++++++++++++++++++++++
 vllm.spec                     |   7 ++-
 2 files changed, 117 insertions(+), 1 deletion(-)
 create mode 100644 resource-isolation-v073.patch

diff --git a/resource-isolation-v073.patch b/resource-isolation-v073.patch
new file mode 100644
index 0000000..0defb77
--- /dev/null
+++ b/resource-isolation-v073.patch
@@ -0,0 +1,111 @@
+diff --git a/vllm/worker/worker.py b/vllm/worker/worker.py
+index ff38e3b..7732693 100644
+--- a/vllm/worker/worker.py
++++ b/vllm/worker/worker.py
+@@ -34,6 +34,98 @@ from vllm.worker.worker_base import (LocalOrDistributedWorkerBase, WorkerBase,
+ 
+ logger = init_logger(__name__)
+ 
++import subprocess
++import psutil
++
++npu_device_dict = {"d801":"910A","d802":"910B","d803":"910C","d100":"310","d500":"310P"}
++
++def execute_command(cmd_list):
++    try:
++        with subprocess.Popen(cmd_list, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as p:
++            out, _ = p.communicate(timeout=1000)
++        res = out.decode()
++        return res
++    except FileNotFoundError as e:
++        raise RuntimeError(f"Failed to execute command, because {e}.")
++
++def get_numa_map():
++    numa_topo_out = execute_command(["npu-smi", "info", "-t", "topo"]).strip().split("\n")
++
++    line_no = 0
++    npu_no = 0
++    numa_to_npu_map = {}
++    numa_number = 0
++    max_cpu = 0
++
++    numa_node = execute_command("lscpu").strip().split("\n")
++    for val in numa_node:
++        if val.startswith("CPU(s):"):
++            max_cpu = int(val.split(" ")[-1]) - 1
++        if val.startswith("NUMA"):
++            nodes = val.split(" ")
++            numa_number = int(nodes[-1])
++            break
++
++    npu_max_cpu = False
++    npu_max_cpu_no = 0
++    for val in numa_topo_out:
++        line_no += 1
++        line = ''.join(val.split())
++        if line.startswith("NPU") and line_no > 1:
++            cpu_range = line[33:]
++            npu_max_cpu_no = max(npu_max_cpu_no, int(cpu_range.split("-")[1]))
++            if numa_to_npu_map.get(cpu_range, None) is None:
++                numa_to_npu_map[cpu_range] = list()
++            numa_to_npu_map[cpu_range].append(npu_no)
++            npu_no += 1
++
++    npu_max_cpu = True if npu_max_cpu_no==max_cpu else False
++    shared_mode = False
++    if npu_no > numa_number:
++        shared_mode = True
++
++    npu_to_core_map = {}
++    for key, val in numa_to_npu_map.items():
++        cpu_range = key.split("-")
++        total_core_num = int(cpu_range[1]) - int(cpu_range[0]) + 1
++        cpu_start = int(cpu_range[0]) + total_core_num if npu_max_cpu==False else int(cpu_range[0]) - total_core_num
++        cpu_end = int(cpu_range[1]) + total_core_num if npu_max_cpu==False else int(cpu_range[1]) - total_core_num
++        shared_mode = True
++        if shared_mode:
++            shared_npu_num = len(val)
++            core_num_per_npu = int(total_core_num / shared_npu_num)
++        else:
++            core_num_per_npu = total_core_num if npu_max_cpu==False else -(total_core_num)
++        core_start = cpu_start
++        for npu in val:
++            npu_to_core_map[npu] = [core_start, core_start + core_num_per_npu]
++            core_start += core_num_per_npu
++
++    return npu_to_core_map
++
++def bind_cpu(rank):
++    device_type = os.getenv("DEVICE_TYPE", "NPU")
++    if device_type != "NPU":
++        logger.info(f"Current platform {device_type} not supported")
++        return
++    pci_out = execute_command("lspci").strip().split("\n")
++    npu_device_type = ""
++    for val in pci_out:
++        if "Processing accelerators" in val and "Device" in val:
++            npu_device_type = npu_device_dict[val.split(" ")[-3]]
++            break
++    if npu_device_type != "910B" and npu_device_type != "910A":
++        logger.info(f"Current platform {npu_device_type} not supported")
++        return
++
++    rank_cpu_maps = get_numa_map()
++
++    cpu_range = rank_cpu_maps[rank]
++    cpu_list = list(range(cpu_range[0], cpu_range[1]))
++    current_process = psutil.Process()
++    current_process.cpu_affinity(cpu_list)
++
++    logger.info(f"bind process {current_process.pid} in rank{rank} to cpu: {cpu_list}")
+ 
+ class Worker(LocalOrDistributedWorkerBase):
+     """A worker class that executes (a partition of) the model on a GPU.
+@@ -56,6 +148,7 @@ class Worker(LocalOrDistributedWorkerBase):
+         self.parallel_config.rank = rank
+         self.local_rank = local_rank
+         self.rank = rank
++        bind_cpu(local_rank)
+         self.distributed_init_method = distributed_init_method
+         self.is_driver_worker = is_driver_worker
+         if self.model_config.trust_remote_code:
diff --git a/vllm.spec b/vllm.spec
index b0fec1d..577f99a 100644
--- a/vllm.spec
+++ b/vllm.spec
@@ -3,12 +3,14 @@
 
 Name:       vllm
 Version:    0.7.3
-Release:    1
+Release:    2
 Summary:    Powerful engine for LLMs
 License:    (Apache-2.0 AND BSD-3-Clause) OR BSD-3-CLause
 URL:        https://github.com/vllm-project/vllm
 Source0:    https://gitee.com/src-openeuler/vllm/raw/master/vllm-%{version}.tar.gz
 
+Patch0001:   resource-isolation-v073.patch
+
 BuildArch:  noarch
 
 %description
@@ -69,6 +71,9 @@ mv %{buildroot}/filelist.lst .
 %files -n python3-%{_name} -f filelist.lst
 
 %changelog
+* Thu Apr 10 2025 wangchunyang <wangchunyang18@huawei.com> - 0.7.3-2
+- Add CPU-side resource isolation optimizations into vLLM; Supported accelerator: Ascend 910A, 910B
+
 * Tue Apr 8 2025 renwenjie<renwenjie5@huawei.com> - 0.7.3-1
 - Change the baseline version to 0.7.3
 
-- 
Gitee