From 4dbb5be5b93c9ca839d71d4a8e93a560b78eccf2 Mon Sep 17 00:00:00 2001 From: Dwade Date: Thu, 10 Apr 2025 12:44:02 +0800 Subject: [PATCH] resource-isolation-v073 --- resource-isolation-v073.patch | 111 ++++++++++++++++++++++++++++++++++ vllm.spec | 7 ++- 2 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 resource-isolation-v073.patch diff --git a/resource-isolation-v073.patch b/resource-isolation-v073.patch new file mode 100644 index 0000000..0defb77 --- /dev/null +++ b/resource-isolation-v073.patch @@ -0,0 +1,111 @@ +diff --git a/vllm/worker/worker.py b/vllm/worker/worker.py +index ff38e3b..7732693 100644 +--- a/vllm/worker/worker.py ++++ b/vllm/worker/worker.py +@@ -34,6 +34,98 @@ from vllm.worker.worker_base import (LocalOrDistributedWorkerBase, WorkerBase, + + logger = init_logger(__name__) + ++import subprocess ++import psutil ++ ++npu_device_dict = {"d801":"910A","d802":"910B","d803":"910C","d100":"310","d500":"310P"} ++ ++def execute_command(cmd_list): ++ try: ++ with subprocess.Popen(cmd_list, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as p: ++ out, _ = p.communicate(timeout=1000) ++ res = out.decode() ++ return res ++ except FileNotFoundError as e: ++ raise RuntimeError(f"Failed to execute command, because {e}.") ++ ++def get_numa_map(): ++ numa_topo_out = execute_command(["npu-smi", "info", "-t", "topo"]).strip().split("\n") ++ ++ line_no = 0 ++ npu_no = 0 ++ numa_to_npu_map = {} ++ numa_number = 0 ++ max_cpu = 0 ++ ++ numa_node = execute_command("lscpu").strip().split("\n") ++ for val in numa_node: ++ if val.startswith("CPU(s):"): ++ max_cpu = int(val.split(" ")[-1]) - 1 ++ if val.startswith("NUMA"): ++ nodes = val.split(" ") ++ numa_number = int(nodes[-1]) ++ break ++ ++ npu_max_cpu = False ++ npu_max_cpu_no = 0 ++ for val in numa_topo_out: ++ line_no += 1 ++ line = ''.join(val.split()) ++ if line.startswith("NPU") and line_no > 1: ++ cpu_range = line[33:] ++ npu_max_cpu_no = max(npu_max_cpu_no, int(cpu_range.split("-")[1])) ++ if numa_to_npu_map.get(cpu_range, None) is None: ++ numa_to_npu_map[cpu_range] = list() ++ numa_to_npu_map[cpu_range].append(npu_no) ++ npu_no += 1 ++ ++ npu_max_cpu = True if npu_max_cpu_no==max_cpu else False ++ shared_mode = False ++ if npu_no > numa_number: ++ shared_mode = True ++ ++ npu_to_core_map = {} ++ for key, val in numa_to_npu_map.items(): ++ cpu_range = key.split("-") ++ total_core_num = int(cpu_range[1]) - int(cpu_range[0]) + 1 ++ cpu_start = int(cpu_range[0]) + total_core_num if npu_max_cpu==False else int(cpu_range[0]) - total_core_num ++ cpu_end = int(cpu_range[1]) + total_core_num if npu_max_cpu==False else int(cpu_range[1]) - total_core_num ++ shared_mode = True ++ if shared_mode: ++ shared_npu_num = len(val) ++ core_num_per_npu = int(total_core_num / shared_npu_num) ++ else: ++ core_num_per_npu = total_core_num if npu_max_cpu==False else -(total_core_num) ++ core_start = cpu_start ++ for npu in val: ++ npu_to_core_map[npu] = [core_start, core_start + core_num_per_npu] ++ core_start += core_num_per_npu ++ ++ return npu_to_core_map ++ ++def bind_cpu(rank): ++ device_type = os.getenv("DEVICE_TYPE", "NPU") ++ if device_type != "NPU": ++ logger.info(f"Current platform {device_type} not supported") ++ return ++ pci_out = execute_command("lspci").strip().split("\n") ++ npu_device_type = "" ++ for val in pci_out: ++ if "Processing accelerators" in val and "Device" in val: ++ npu_device_type = npu_device_dict[val.split(" ")[-3]] ++ break ++ if npu_device_type != "910B" and npu_device_type != "910A": ++ logger.info(f"Current platform {npu_device_type} not supported") ++ return ++ ++ rank_cpu_maps = get_numa_map() ++ ++ cpu_range = rank_cpu_maps[rank] ++ cpu_list = list(range(cpu_range[0], cpu_range[1])) ++ current_process = psutil.Process() ++ current_process.cpu_affinity(cpu_list) ++ ++ logger.info(f"bind process {current_process.pid} in rank{rank} to cpu: {cpu_list}") + + class Worker(LocalOrDistributedWorkerBase): + """A worker class that executes (a partition of) the model on a GPU. +@@ -56,6 +148,7 @@ class Worker(LocalOrDistributedWorkerBase): + self.parallel_config.rank = rank + self.local_rank = local_rank + self.rank = rank ++ bind_cpu(local_rank) + self.distributed_init_method = distributed_init_method + self.is_driver_worker = is_driver_worker + if self.model_config.trust_remote_code: diff --git a/vllm.spec b/vllm.spec index b0fec1d..577f99a 100644 --- a/vllm.spec +++ b/vllm.spec @@ -3,12 +3,14 @@ Name: vllm Version: 0.7.3 -Release: 1 +Release: 2 Summary: Powerful engine for LLMs License: (Apache-2.0 AND BSD-3-Clause) OR BSD-3-CLause URL: https://github.com/vllm-project/vllm Source0: https://gitee.com/src-openeuler/vllm/raw/master/vllm-%{version}.tar.gz +Patch0001: resource-isolation-v073.patch + BuildArch: noarch %description @@ -69,6 +71,9 @@ mv %{buildroot}/filelist.lst . %files -n python3-%{_name} -f filelist.lst %changelog +* Thu Apr 10 2025 wangchunyang - 0.7.3-2 +- Add CPU-side resource isolation optimizations into vLLM; Supported accelerator: Ascend 910A, 910B + * Tue Apr 8 2025 renwenjie - 0.7.3-1 - Change the baseline version to 0.7.3 -- Gitee