diff --git a/vllm_mindspore/__init__.py b/vllm_mindspore/__init__.py index 5892937a8957829c8a47ae1df13ada581137f7e6..8bd06a48501f9c6a2ccfd7959eae34d6eb57dec1 100644 --- a/vllm_mindspore/__init__.py +++ b/vllm_mindspore/__init__.py @@ -59,6 +59,19 @@ import vllm.v1.engine.core from vllm_mindspore.v1.engine.core import shutdown vllm.v1.engine.core.DPEngineCoreProc.shutdown = shutdown +import vllm.lora.utils +from vllm_mindspore.lora.utils import get_supported_lora_modules_ms +vllm.lora.utils.get_supported_lora_modules = get_supported_lora_modules_ms + +from vllm_mindspore.lora.models import LoRAModel_ms, LoRAModelManager_ms +from vllm_mindspore.lora.worker_manager import _load_adapter_ms +import LoRAModelManager +from vllm.lora.worker_manager import WorkerLoRAManager +LoRAModelManager._create_lora_modules = LoRAModelManager_ms._create_lora_modules +LoRAModelManager._set_adapter_mapping = LoRAModelManager_ms._set_adapter_mapping +WorkerLoRAManager._load_adapter = _load_adapter_ms +vllm.lora.models.LoRAModel = LoRAModel_ms + from vllm_mindspore.utils import ( direct_register_custom_op, make_tensor_with_pad, diff --git a/vllm_mindspore/lora/__init__.py b/vllm_mindspore/lora/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/vllm_mindspore/lora/models.py b/vllm_mindspore/lora/models.py new file mode 100644 index 0000000000000000000000000000000000000000..2b2073ca1f188c625215f36b396d68cdc47c6847 --- /dev/null +++ b/vllm_mindspore/lora/models.py @@ -0,0 +1,11 @@ +from vllm.lora.models import LoRAModelManager, LoRAModel +from vllm_mindspore.utils import is_mindformers_model_backend +from mindformers.parallel_core.inference.lora.models import LoRAModel as MFLoRAModel +from mindformers.parallel_core.inference.lora.models import vLLMLoRAModelManager + +if is_mindformers_model_backend(): + LoRAModel_ms = MFLoRAModel + LoRAModelManager_ms = vLLMLoRAModelManager +else: + LoRAModel_ms = LoRAModel + LoRAModelManager_ms = LoRAModelManager \ No newline at end of file diff --git a/vllm_mindspore/lora/utils.py b/vllm_mindspore/lora/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..d8c45057e5e11af082a23871aa2ccc605a8ed93d --- /dev/null +++ b/vllm_mindspore/lora/utils.py @@ -0,0 +1,8 @@ +from vllm.lora.utils import get_supported_lora_modules +from vllm_mindspore.utils import is_mindformers_model_backend +from mindformers.parallel_core.inference.lora.lora_utils import get_mf_supported_lora_modules + +if is_mindformers_model_backend(): + get_supported_lora_modules_ms = get_mf_supported_lora_modules +else: + get_supported_lora_modules_ms = get_supported_lora_modules \ No newline at end of file diff --git a/vllm_mindspore/lora/worker_manager.py b/vllm_mindspore/lora/worker_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..bc86937b9b0b9932309505d4ab75657a5fc4d78e --- /dev/null +++ b/vllm_mindspore/lora/worker_manager.py @@ -0,0 +1,31 @@ +from typing import List +from vllm.lora.request import LoRARequest +from vllm_mindspore.utils import is_mindformers_model_backend +from mindformers.parallel_core.inference.lora.models import load_lora_ckpt + +def _load_adapter_ms(self, lora_request: LoRARequest): + supported_lora_modules = ( + self._adapter_manager.supported_lora_modules) + packed_modules_mapping = ( + self._adapter_manager.packed_modules_mapping) + expected_lora_modules: List[str] = [] + for module in supported_lora_modules: + if module in packed_modules_mapping: + expected_lora_modules.extend( + packed_modules_mapping[module]) + else: + expected_lora_modules.append(module) + + expected_lora_modules = list(set(expected_lora_modules)) + lora = load_lora_ckpt(lora_request, + self.lora_config, + self.vocab_size, + expected_lora_modules, + self.max_position_embeddings, + self.embedding_modules, + self.embedding_padding_modules) + return lora + +if not is_mindformers_model_backend(): + from vllm.lora.worker_manager import WorkerLoRAManager + _load_adapter_ms = WorkerLoRAManager._load_adapter \ No newline at end of file