From 04af5bc1766e6db133979f8c555745db70cb2dc1 Mon Sep 17 00:00:00 2001 From: yiguangzheng Date: Tue, 11 Nov 2025 10:06:54 +0800 Subject: [PATCH] feat: load weight from buffer --- .../mindspore_lite/mindspore_lite.Model.rst | 69 ++++- include/api/model.h | 18 +- mindspore-lite/python/api/model.py | 181 ++++++++--- mindspore-lite/python/src/model_pybind.cc | 40 ++- .../src/extendrt/cxx_api/model/model.cc | 19 ++ .../src/extendrt/cxx_api/model/model_impl.cc | 118 ++++--- .../src/extendrt/cxx_api/model/model_impl.h | 34 +- .../src/litert/cxx_api/model/model.cc | 6 + .../python_api/test_model_build_buffer.py | 291 ++++++++++++++++++ 9 files changed, 678 insertions(+), 98 deletions(-) create mode 100644 mindspore-lite/test/st/python/python_api/test_model_build_buffer.py diff --git a/docs/api/lite_api_python/mindspore_lite/mindspore_lite.Model.rst b/docs/api/lite_api_python/mindspore_lite/mindspore_lite.Model.rst index 6cd3171b..0efe1522 100644 --- a/docs/api/lite_api_python/mindspore_lite/mindspore_lite.Model.rst +++ b/docs/api/lite_api_python/mindspore_lite/mindspore_lite.Model.rst @@ -5,6 +5,54 @@ mindspore_lite.Model `Model` 类定义MindSpore Lite模型,便于计算图管理。 + .. py:method:: build_from_buffer(model_bytes, weight_bytes=None, model_type=None, context=None, config_path="", config_dict=None) + + 从缓冲区加载并构建模型。 + + 参数: + - **model_bytes** (Bytes) - 定义输入模型的缓冲区。 + - **weight_bytes** (Bytes, 可选) - 定义输入模型权重的缓冲区。默认值:``None``。 + - **model_type** (ModelType, 可选) - 定义输入模型文件的类型。选项有 ``ModelType::MINDIR`` 。默认值:``None``。有关详细信息,请参见 `模型类型 `_ 。 + - **context** (Context,可选) - 定义上下文,用于在执行期间传递选项。默认值: ``None`` ,表示设置target为cpu的Context。 + - **config_path** (str,可选) - 定义配置文件的路径,用于在构建模型期间传递用户定义选项。在以下场景中,用户可能需要设置参数。例如:"/home/user/config.txt"。默认值: ``""`` 。 + + 进行混合精度推理的设置,配置文件内容及说明如下: + + .. code-block:: + + [execution_plan] + [op_name1]=data_type:float16(名字为op_name1的算子设置数据类型为float16) + [op_name2]=data_type:float32(名字为op_name2的算子设置数据类型为float32) + + - **config_dict** (dict,可选) - 配置参数字典,当使用该字典配置参数时,优先级高于配置文件。默认值:``None``。 + + 推理配置rank table。配置文件中的内容及说明如下: + + .. code-block:: + + [ascend_context] + rank_table_file=[path_a](使用路径a的rank table) + + 同时配置参数字典中如下: + + .. code-block:: + + config_dict = {"ascend_context" : {"rank_table_file" : "path_b"}} + + 那么配置参数字典中路径b的rank table将覆盖配置文件中的路径a的rank table。 + + 异常: + - **TypeError** - `model_bytes` 不是Bytes类型。 + - **TypeError** - `weight_bytes` 既不是Bytes类型也不是``None``。 + - **TypeError** - `model_type` 不是ModelType类型。 + - **TypeError** - `context` 既不是Context类型也不是 ``None`` 。 + - **TypeError** - `config_path` 不是str类型。 + - **RuntimeError** - `model_bytes` 长度为0。 + - **RuntimeError** - `model_type` 的值不是``ModelType::MINDIR``。 + - **RuntimeError** - `config_path` 文件路径不存在。 + - **RuntimeError** - 从 `config_path` 加载配置文件失败。 + - **RuntimeError** - 从缓冲区加载并构建模型失败。 + .. py:method:: build_from_file(model_path, model_type, context=None, config_path="", config_dict=None, dec_key=None, dec_mode="AES-GCM", dec_num_parallel=0) 从文件加载并构建模型。 @@ -15,24 +63,13 @@ mindspore_lite.Model - **context** (Context,可选) - 定义上下文,用于在执行期间传递选项。默认值: ``None`` 。 ``None`` 表示设置target为cpu的Context。 - **config_path** (str,可选) - 定义配置文件的路径,用于在构建模型期间传递用户定义选项。在以下场景中,用户可能需要设置参数。例如:"/home/user/config.txt"。默认值: ``""`` 。 - - **用法1** - 进行混合精度推理的设置,配置文件内容及说明如下: + 进行混合精度推理的设置,配置文件内容及说明如下: - .. code-block:: - - [execution_plan] - [op_name1]=data_type:float16(名字为op_name1的算子设置数据类型为float16) - [op_name2]=data_type:float32(名字为op_name2的算子设置数据类型为float32) - - - **用法2** - 在使用GPU推理时,进行TensorRT设置,配置文件内容及说明如下: - - .. code-block:: + .. code-block:: - [ms_cache] - serialize_path=[serialization model path](序列化模型的存储路径) - [gpu_context] - input_shape=input_name:[input_dim](模型输入维度,用于动态shape) - dynamic_dims=[min_dim~max_dim](模型输入的动态维度范围,用于动态shape) - opt_dims=[opt_dim](模型最优输入维度,用于动态shape) + [execution_plan] + [op_name1]=data_type:float16(名字为op_name1的算子设置数据类型为float16) + [op_name2]=data_type:float32(名字为op_name2的算子设置数据类型为float32) - **config_dict** (dict,可选) - 配置参数字典,当使用该字典配置参数时,优先级高于配置文件。 diff --git a/include/api/model.h b/include/api/model.h index d92c04b3..678b4183 100644 --- a/include/api/model.h +++ b/include/api/model.h @@ -51,13 +51,29 @@ class MS_API Model { /// \param[in] data_size Define bytes number of model buffer. /// \param[in] model_type Define The type of model file. Options: ModelType::kMindIR, ModelType::kMindIR_Lite. Only /// ModelType::kMindIR_Lite is valid for Device-side Inference. Cloud-side Inference supports options - /// ModelType::kMindIR and ModelType::kMindIR_Lite, but option odelType::kMindIR_Lite will be removed in future + /// ModelType::kMindIR and ModelType::kMindIR_Lite, but option ModelType::kMindIR_Lite will be removed in future /// iterations. \param[in] model_context Define the context used to store options during execution. /// /// \return Status. kSuccess: build success, kLiteModelRebuild: build model repeatedly, Other: other types of errors. Status Build(const void *model_data, size_t data_size, ModelType model_type, const std::shared_ptr &model_context = nullptr); + /// \brief Build a model from model buffer so that it can run on a device. + /// + /// \param[in] model_data Define the buffer read from a model file. + /// \param[in] data_size Define bytes number of model buffer. + /// \param[in] weight_data Define the buffer read from a weight file. + /// \param[in] weight_size Define bytes number of weight buffer. + /// \param[in] model_type Define The type of model file. Options: ModelType::kMindIR, ModelType::kMindIR_Lite. Only + /// ModelType::kMindIR_Lite is valid for Device-side Inference. Cloud-side Inference supports options + /// ModelType::kMindIR and ModelType::kMindIR_Lite, but option ModelType::kMindIR_Lite will be removed in future + /// iterations. + /// \param[in] model_context Define the context used to store options during execution. + /// + /// \return Status. kSuccess: build success, kLiteModelRebuild: build model repeatedly, Other: other types of errors. + Status Build(const void *model_data, size_t data_size, const void *weight_data, size_t weight_size, + ModelType model_type, const std::shared_ptr &model_context = nullptr); + /// \brief Load and build a model from model buffer so that it can run on a device. /// /// \param[in] model_path Define the model path. diff --git a/mindspore-lite/python/api/model.py b/mindspore-lite/python/api/model.py index 9bbf1926..09054d5a 100644 --- a/mindspore-lite/python/api/model.py +++ b/mindspore-lite/python/api/model.py @@ -135,26 +135,13 @@ class Model(BaseModel): options during build model. In the following scenarios, users may need to set the parameter. For example, "/home/user/config.txt". Default: ``""``. - - Usage 1: Set mixed precision inference. The content and description of the configuration file are as - follows: + Set mixed precision inference. The content and description of the configuration file are as follows: - .. code-block:: - - [execution_plan] - [op_name1]=data_Type: float16 (The operator named op_name1 sets the data type as float16) - [op_name2]=data_Type: float32 (The operator named op_name2 sets the data type as float32) - - - Usage 2: When GPU inference, set the configuration of TensorRT. The content and description of the - configuration file are as follows: - - .. code-block:: + .. code-block:: - [ms_cache] - serialize_Path=[serialization model path](storage path of serialization model) - [gpu_context] - input_shape=input_Name: [input_dim] (Model input dimension, for dynamic shape) - dynamic_Dims=[min_dim~max_dim] (dynamic dimension range of model input, for dynamic shape) - opt_Dims=[opt_dim] (the optimal input dimension of the model, for dynamic shape) + [execution_plan] + [op_name1]=data_Type: float16 (The operator named op_name1 sets the data type as float16) + [op_name2]=data_Type: float32 (The operator named op_name2 sets the data type as float32) config_dict (dict, optional): When you set config in this dict, the priority is higher than the configuration items in config_path. @@ -224,21 +211,154 @@ class Model(BaseModel): model_type_ = _c_lite_wrapper.ModelType.kMindIR_Lite if model_type is ModelType.MINDIR: model_type_ = _c_lite_wrapper.ModelType.kMindIR + + self._apply_config(config_path, config_dict) + + if dec_key: + check_isinstance("dec_key", dec_key, bytes) + check_isinstance("dec_mode", dec_mode, str) + check_isinstance("dec_num_parallel", dec_num_parallel, int) + check_empty_string("dec_mode", dec_mode) + ret = self._model.build_from_file_with_decrypt( + self.model_path_, model_type_, context._context._inner_context, + dec_key, len(dec_key), dec_mode, dec_num_parallel) + else: + ret = self._model.build_from_file( + self.model_path_, model_type_, context._context._inner_context) + if not ret.IsOk(): + raise RuntimeError( + f"build_from_file failed! Error is {ret.ToString()}") + + @set_env + def build_from_buffer( + self, + model_bytes, + weight_bytes=None, + model_type=None, + context=None, + config_path="", + config_dict: dict = None, + ): + """ + Load and build a model from buffer. + + Args: + model_bytes (Bytes): Bytes of the mindir model when build from buffer. + weight_bytes (Bytes, optional): Bytes of the separate weight when build from buffer. Default: ``None``. + model_type (ModelType, optional): Define The type of input model file. Option is ``ModelType.MINDIR``. + Default: ``None``. For details, see + `ModelType `_ . + context (Context, optional): Define the context used to transfer options during execution. + Default: ``None``. ``None`` means the Context with cpu target. + config_path (str, optional): Define the config file path. the config file is used to transfer user defined + options during build model. In the following scenarios, users may need to set the parameter. + For example, "/home/user/config.txt". Default: ``""``. + + Set mixed precision inference. The content and description of the configuration + file are as follows: + + .. code-block:: + + [execution_plan] + [op_name1]=data_Type: float16 (The operator named op_name1 sets the data type as float16) + [op_name2]=data_Type: float32 (The operator named op_name2 sets the data type as float32) + + + config_dict (dict, optional): When you set config in this dict, the priority is higher than the + configuration items in config_path. Default: ``None``. + + Set rank table file for inference. The content of the configuration file is as follows: + + .. code-block:: + + [ascend_context] + rank_table_file=[path_a](storage initial path of the rank table file) + + When set + + .. code-block:: + + config_dict = {"ascend_context" : {"rank_table_file" : "path_b"}} + + The path_b from the config_dict will be used to compile the model. + + Raises: + TypeError: `model_bytes` is not a Bytes. + TypeError: `weight_bytes` is neither a Bytes nor ``None``. + TypeError: `model_type` is not a ModelType. + TypeError: `context` is neither a Context nor ``None``. + TypeError: `config_path` is not a str. + RuntimeError: Length of `model_bytes` is 0. + RuntimeError: Value of `model_type` is is not ``ModelType.MINDIR``. + RuntimeError: `config_path` does not exist. + RuntimeError: Failed to load the configuration file from `config_path`. + RuntimeError: Failed to load and build the model from the buffer. + + Examples: + >>> # Testcase 1: build from buffer with a single file mindir model. + >>> import mindspore_lite as mslite + >>> with open("mobilenetv2.mindir", "rb") as f: + >>> model_bytes = f.read() + >>> model = mslite.Model() + >>> model.build_from_buffer(model_bytes, None, mslite.ModelType.MINDIR) + >>> print(model) + model_path: None. + >>> # Testcase 2: build from buffer with a separated weight model. + >>> import mindspore_lite as mslite + >>> with open("sd1.5_unet.onnx_graph.mindir", "rb") as f: + >>> model_bytes = f.read() + >>> with open("sd1.5_unet.onnx_variables/data_0", "rb") as f: + >>> weight_bytes = f.read() + >>> model = mslite.Model() + >>> context = mslite.Context() + >>> context.target = ["ascend"] + >>> model.build_from_buffer(model_bytes, weight_bytes, mslite.ModelType.MINDIR, context) + >>> print(model) + model_path: None. + """ + check_isinstance("model_bytes", model_bytes, bytes) + if len(model_bytes) == 0: + raise RuntimeError("build_from_buffer failed, model_bytes is empty.") + + check_isinstance("weight_bytes", weight_bytes, bytes, enable_none=True) + check_isinstance("model_type", model_type, ModelType) + if model_type != ModelType.MINDIR: + raise RuntimeError("build_from_buffer failed, model_type should be MINDIR") + + model_type_ = _c_lite_wrapper.ModelType.kMindIR + + if context is None: + context = Context() + check_isinstance("context", context, Context) + check_isinstance("config_path", config_path, str) + + self.provider = context.ascend.provider + + self.model_path_ = None + + self._apply_config(config_path, config_dict) + + ret = self._model.build_from_buff(model_bytes, weight_bytes, model_type_, context._context._inner_context) + if not ret.IsOk(): + raise RuntimeError(f"build_from_buffer failed! Error is {ret.ToString()}") + + def _apply_config(self, config_path, config_dict): + """ + apply config for build + """ if config_path: if not os.path.exists(config_path): - raise RuntimeError( - "build_from_file failed, config_path does not exist!") + raise RuntimeError("build_from_file failed, config_path does not exist!") ret = self._model.load_config(config_path) if not ret.IsOk(): - raise RuntimeError( - f"load configuration failed! Error is {ret.ToString()}") + raise RuntimeError(f"load configuration failed! Error is {ret.ToString()}") parse_res = _parse_update_weight_config_name(config_path) if parse_res is not None and len(parse_res) >= 2: update_names, self.lora_name_map = parse_res[0], parse_res[1] if config_dict is None: config_dict = {"ascend_context": {"variable_weights_list": update_names}} else: - config_dict['ascend_context']["variable_weights_list"] = update_names + config_dict["ascend_context"]["variable_weights_list"] = update_names if config_dict: check_isinstance("config_dict", config_dict, dict) @@ -253,21 +373,6 @@ class Model(BaseModel): if not ret.IsOk(): raise RuntimeError(f"update configuration failed! Error is {ret.ToString()}.") - if dec_key: - check_isinstance("dec_key", dec_key, bytes) - check_isinstance("dec_mode", dec_mode, str) - check_isinstance("dec_num_parallel", dec_num_parallel, int) - check_empty_string("dec_mode", dec_mode) - ret = self._model.build_from_file_with_decrypt( - self.model_path_, model_type_, context._context._inner_context, - dec_key, len(dec_key), dec_mode, dec_num_parallel) - else: - ret = self._model.build_from_file( - self.model_path_, model_type_, context._context._inner_context) - if not ret.IsOk(): - raise RuntimeError( - f"build_from_file failed! Error is {ret.ToString()}") - def get_outputs(self): """ Obtains all output information Tensors of the model. diff --git a/mindspore-lite/python/src/model_pybind.cc b/mindspore-lite/python/src/model_pybind.cc index 08b137d5..63cf6a94 100644 --- a/mindspore-lite/python/src/model_pybind.cc +++ b/mindspore-lite/python/src/model_pybind.cc @@ -153,6 +153,42 @@ Status PyModelBuild(Model *model, const std::string &model_path, ModelType model return kSuccess; } +Status PyModelBuildFromBuffer(Model *model, py::bytes model_bytes, py::object weight_bytes, ModelType model_type, + const std::shared_ptr &model_context) { + if (model_bytes.is_none()) { + MS_LOG(ERROR) << "model_bytes is None."; + return kLiteError; + } + void *model_ptr = nullptr; + ssize_t model_size = 0; + PYBIND11_BYTES_AS_STRING_AND_SIZE(model_bytes.ptr(), reinterpret_cast(&model_ptr), &model_size); + if (model_ptr == nullptr) { + MS_LOG(ERROR) << "model_ptr is nullptr."; + return kLiteError; + } + if (model_size == 0) { + MS_LOG(ERROR) << "model_size is 0."; + return kLiteError; + } + void *weight_ptr = nullptr; + ssize_t weight_size = 0; + if (!weight_bytes.is_none() && !py::isinstance(weight_bytes)) { + MS_LOG(ERROR) << "weight_bytes should be bytes or None."; + return kLiteError; + } + if (!weight_bytes.is_none()) { + PYBIND11_BYTES_AS_STRING_AND_SIZE(weight_bytes.ptr(), reinterpret_cast(&weight_ptr), &weight_size); + if (weight_ptr == nullptr) { + MS_LOG(ERROR) << "weight_ptr is nullptr."; + return kLiteError; + } + } + + py::gil_scoped_release release; + auto ret = model->Build(model_ptr, model_size, weight_ptr, weight_size, model_type, model_context); + return ret; +} + std::vector PyExecGetInputs(ModelExecutor *executor) { if (executor == nullptr) { MS_LOG(ERROR) << "ModelExecutor object cannot be nullptr!"; @@ -252,9 +288,7 @@ void ModelPyBind(const py::module &m) { (void)py::class_>(m, "ModelBind") .def(py::init<>()) - .def("build_from_buff", - py::overload_cast &>(&Model::Build), - py::call_guard()) + .def("build_from_buff", PyModelBuildFromBuffer) .def("build_from_file", py::overload_cast &>(&Model::Build), py::call_guard()) diff --git a/mindspore-lite/src/extendrt/cxx_api/model/model.cc b/mindspore-lite/src/extendrt/cxx_api/model/model.cc index b3f1ebce..ebd882d1 100644 --- a/mindspore-lite/src/extendrt/cxx_api/model/model.cc +++ b/mindspore-lite/src/extendrt/cxx_api/model/model.cc @@ -92,6 +92,25 @@ Status Model::Build(const void *model_data, size_t data_size, ModelType model_ty } } +Status Model::Build(const void *model_data, size_t data_size, const void *weight_data, size_t weight_size, + ModelType model_type, const std::shared_ptr &model_context) { + if (impl_ == nullptr) { + MS_LOG(ERROR) << "Model implement is null."; + return kLiteNullptr; + } + try { + Status ret = impl_->Build(model_data, data_size, weight_data, weight_size, model_type, model_context); + if (ret != kSuccess) { + MS_LOG(ERROR) << "impl_->Build failed! ret = " << ret; + return ret; + } + return kSuccess; + } catch (const std::exception &exe) { + MS_LOG(ERROR) << "Catch exception: " << exe.what(); + return kCoreFailed; + } +} + Status Model::Build(const std::vector &model_path, ModelType model_type, const std::shared_ptr &model_context) { if (impl_ == nullptr) { diff --git a/mindspore-lite/src/extendrt/cxx_api/model/model_impl.cc b/mindspore-lite/src/extendrt/cxx_api/model/model_impl.cc index c126365c..ad985911 100644 --- a/mindspore-lite/src/extendrt/cxx_api/model/model_impl.cc +++ b/mindspore-lite/src/extendrt/cxx_api/model/model_impl.cc @@ -134,6 +134,14 @@ Status PrimitivePyToC(const FuncGraphPtr &func_graph) { } return kSuccess; } + +std::string WeightBufferParamsDisplayStr(const void *weight_data, size_t weight_size) { + std::stringstream ss; + ss << (weight_data == nullptr ? " weight_data is nullptr." : " weight_data is not nullptr.") + << " weight_size: " << weight_size; + return ss.str(); +} + } // namespace void ModelImpl::SetMsContext() { @@ -190,7 +198,51 @@ ConverterPlugin::ConverterFunc ConverterPlugin::GetConverterFuncInner() { ModelImpl::ModelImpl() : graph_(nullptr), session_(nullptr), context_(nullptr) {} -FuncGraphPtr ModelImpl::LoadGraphByBufferImpl(const void *model_buff, size_t model_size, ModelType model_type, +FuncGraphPtr ModelImpl::DispatchLoadGraph(const void *model_buff, size_t model_size, const void *weight_data, + size_t weight_size, const std::string &model_path) { + std::string weight_path = "./"; + auto mindir_path = GetConfig(lite::kConfigModelFileSection, lite::kConfigMindIRPathKey); + std::string base_path = ""; + if (!mindir_path.empty()) { + base_path = mindir_path; + } else { + // user does not set mindir_path, convert from model_path + base_path = model_path; + } + FuncGraphPtr func_graph; + std::string user_info_string; + bool build_from_file = weight_data == nullptr && weight_size == 0 && !base_path.empty(); + bool build_from_buffer_model = weight_data == nullptr && weight_size == 0 && base_path.empty(); + bool build_from_buffer_model_weight = weight_data != nullptr && weight_size != 0 && base_path.empty(); + std::unique_lock l(g_load_mindir_lock); + MindIRLoader mindir_loader(true, nullptr, 0, kDecModeAesGcm, false); + bool ret = false; + if (build_from_file) { + if (base_path.find("/") != std::string::npos) { + weight_path = base_path.substr(0, base_path.rfind("/")); + } + MS_LOG(INFO) << "model will build from file."; + ret = mindir_loader.LoadMindIR(model_buff, model_size, weight_path, &func_graph, &user_info_string); + } else if (build_from_buffer_model || build_from_buffer_model_weight) { + MS_LOG(INFO) << "model will build from buffer."; + ret = mindir_loader.LoadMindIR(model_buff, model_size, weight_data, weight_size, &func_graph, &user_info_string); + } else { + MS_LOG(ERROR) << "cannot determine how to build model." + << " got:" << WeightBufferParamsDisplayStr(weight_data, weight_size) << " model_path: \"" + << model_path << "\""; + } + if (!ret || func_graph == nullptr) { + MS_LOG(ERROR) << "Failed to load MindIR model, please check the validity of the model."; + return nullptr; + } + if (!user_info_string.empty()) { + SetModelInfo(lite::KModelUserInfo, user_info_string); + } + return func_graph; +} + +FuncGraphPtr ModelImpl::LoadGraphByBufferImpl(const void *model_buff, size_t model_size, const void *weight_data, + size_t weight_size, ModelType model_type, const std::shared_ptr &model_context, const std::string &model_path) { if (model_type != kMindIR) { @@ -203,18 +255,7 @@ FuncGraphPtr ModelImpl::LoadGraphByBufferImpl(const void *model_buff, size_t mod MS_LOG(ERROR) << "UpdateSharingWorkspaceConfig failed!"; return nullptr; } - auto mindir_path = GetConfig(lite::kConfigModelFileSection, lite::kConfigMindIRPathKey); - std::string weight_path = "./"; - std::string base_path = ""; - if (!mindir_path.empty()) { - base_path = mindir_path; - } else { - // user does not set mindir_path, convert from model_path - base_path = model_path; - } - if (base_path.find("/") != std::string::npos) { - weight_path = base_path.substr(0, base_path.rfind("/")); - } + auto dump_path = GetConfig(lite::kAscendContextSection, lite::kDumpPathKey); if (!dump_path.empty()) { auto dir_pos = model_path.find_last_of('/'); @@ -224,20 +265,12 @@ FuncGraphPtr ModelImpl::LoadGraphByBufferImpl(const void *model_buff, size_t mod (void)UpdateConfig(lite::kAscendContextSection, std::pair(lite::kDumpModelNameKey, model_name)); } - FuncGraphPtr func_graph; - std::string user_info_string; - { - std::unique_lock l(g_load_mindir_lock); - MindIRLoader mindir_loader(true, nullptr, 0, kDecModeAesGcm, false); - auto ret = mindir_loader.LoadMindIR(model_buff, model_size, weight_path, &func_graph, &user_info_string); - if (!ret || func_graph == nullptr) { - MS_LOG(ERROR) << "Failed to load MindIR model, please check the validity of the model."; - return nullptr; - } - if (!user_info_string.empty()) { - SetModelInfo(lite::KModelUserInfo, user_info_string); - } + FuncGraphPtr func_graph = DispatchLoadGraph(model_buff, model_size, weight_data, weight_size, model_path); + if (func_graph == nullptr) { + MS_LOG(ERROR) << "Failed to load MindIR model, please check the validity of the model."; + return nullptr; } + if (func_graph->get_attr(lite::kDynamicDimsKey) != nullptr) { auto dynamic_dims = GetValue(func_graph->get_attr(lite::kDynamicDimsKey)); SetModelInfo(lite::kDynamicDimsKey, dynamic_dims); @@ -411,14 +444,23 @@ void ModelImpl::UpdateProvider() { } } } - -Status ModelImpl::BuildByBufferImpl(const void *model_buff, size_t model_size, ModelType model_type, +Status ModelImpl::CheckBuildFromBuffer(ModelType model_type, const void *weight_data, size_t weight_size) { + if (model_type != kMindIR && (weight_data != nullptr || weight_size != 0)) { + MS_LOG(ERROR) << "Build from weight buffer is not support model_type:" << model_type + << ". got: " << WeightBufferParamsDisplayStr(weight_data, weight_size); + return kLiteParamInvalid; + } + return kSuccess; +} +Status ModelImpl::BuildByBufferImpl(const void *model_buff, size_t model_size, const void *weight_data, + size_t weight_size, ModelType model_type, const std::shared_ptr &model_context, const std::string &model_path) { MS_CHECK_TRUE_MSG(model_buff != nullptr, kLiteError, "The input model buffer is nullptr!"); MS_CHECK_TRUE_MSG(model_size != 0, kLiteError, "The input model buffer size is 0!"); - if (model_context == nullptr) { - MS_LOG(ERROR) << "Invalid context pointers!"; - return kLiteError; + MS_CHECK_TRUE_MSG(model_context != nullptr, kLiteError, "Invalid context pointers!"); + auto ret = CheckBuildFromBuffer(model_type, weight_data, weight_size); + if (ret != kSuccess) { + return ret; } std::lock_guard lock(mutex_); if (session_) { @@ -457,7 +499,7 @@ Status ModelImpl::BuildByBufferImpl(const void *model_buff, size_t model_size, M MS_LOG(ERROR) << "Create session failed!"; return kLiteError; } - Status ret; + if (model_type == kMindIR_Lite) { ret = session_->CompileGraph(model_buff, model_size, &graph_id_); if (ret != kSuccess) { @@ -475,7 +517,8 @@ Status ModelImpl::BuildByBufferImpl(const void *model_buff, size_t model_size, M } if (model_type != kOM) { - func_graph = LoadGraphByBufferImpl(model_buff, model_size, model_type, model_context, model_path); + func_graph = + LoadGraphByBufferImpl(model_buff, model_size, weight_data, weight_size, model_type, model_context, model_path); if (func_graph == nullptr) { MS_LOG(ERROR) << "Failed to load MindIR model, please check the validity of the model."; return kLiteError; @@ -648,7 +691,12 @@ Status ModelImpl::Build(const FuncGraphPtr &func_graph, const std::shared_ptr &model_context) { - return BuildByBufferImpl(model_data, data_size, model_type, model_context); + return BuildByBufferImpl(model_data, data_size, nullptr, 0, model_type, model_context); +} + +Status ModelImpl::Build(const void *model_data, size_t data_size, const void *weight_data, size_t weight_size, + ModelType model_type, const std::shared_ptr &model_context) { + return BuildByBufferImpl(model_data, data_size, weight_data, weight_size, model_type, model_context); } Status ModelImpl::Build(const void *model_data, size_t data_size, ModelType model_type, @@ -668,7 +716,7 @@ Status ModelImpl::Build(const std::string &model_path, ModelType model_type, MS_LOG(ERROR) << "Failed to read buffer from model file."; return kLiteError; } - return BuildByBufferImpl(buffer.Data(), buffer.DataSize(), model_type, model_context, model_path); + return BuildByBufferImpl(buffer.Data(), buffer.DataSize(), nullptr, 0, model_type, model_context, model_path); } Status ModelImpl::ConvertGraphOnline(const FuncGraphPtr &func_graph, const std::shared_ptr &model_context) { diff --git a/mindspore-lite/src/extendrt/cxx_api/model/model_impl.h b/mindspore-lite/src/extendrt/cxx_api/model/model_impl.h index bd3fa763..c79d03bd 100644 --- a/mindspore-lite/src/extendrt/cxx_api/model/model_impl.h +++ b/mindspore-lite/src/extendrt/cxx_api/model/model_impl.h @@ -71,6 +71,20 @@ class ModelImpl { Status Build(const void *model_data, size_t data_size, ModelType model_type, const std::shared_ptr &model_context); + /// \brief Build a model from model buffer so that it can run on a device. + /// + /// \param[in] model_data Define the buffer read from a model file. + /// \param[in] data_size Define bytes number of model buffer. + /// \param[in] weight_data Define the buffer read from a weight file. + /// \param[in] weight_size Define bytes number of weight buffer. + /// \param[in] model_type Define The type of model file. Options: ModelType::kMindIR, ModelType::kMindIR_Lite. + /// Only ModelType::kMindIR is valid for Lite. + /// \param[in] model_context Define the context used to store options during execution. + /// + /// \return Status. + Status Build(const void *model_data, size_t data_size, const void *weight_data, size_t weight_size, + ModelType model_type, const std::shared_ptr &model_context); + /// \brief Build a model from a encrypted weight file and a graph buffer so that it can run on a device. /// /// \param[in] model_data Define the buffer of the loaded mindir_graph @@ -225,13 +239,19 @@ class ModelImpl { /// /// \param[in] model_data Define the buffer read from a model file. /// \param[in] data_size Define bytes number of model buffer. + /// \param[in] weight_data Define the buffer read from a weight file. + /// \param[in] weight_size Define bytes number of weight buffer. /// \param[in] model_type Define The type of model file. Options: ModelType::kMindIR, ModelType::kMindIR_Lite. - /// Only ModelType::kMindIR is valid for Lite. \param[in] model_context Define the context used to store options - /// during execution. \param[in] model_path Define the model_path, this param is used for net and weight divided case. + /// Only ModelType::kMindIR is valid for Lite. + /// \param[in] model_context Define the context used to store options + /// during execution. + /// \param[in] model_path Define the model_path, this param is used for net and weight divided case. /// /// \return value of config as string type. - Status BuildByBufferImpl(const void *model_data, size_t data_size, ModelType model_type, - const std::shared_ptr &model_context, const std::string &model_path = ""); + Status BuildByBufferImpl(const void *model_data, size_t data_size, const void *weight_data, size_t weight_size, + ModelType model_type, const std::shared_ptr &model_context, + const std::string &model_path = ""); + /// \brief Model build by buffer implementation for encrypted file, unified model build flow. /// /// \param[in] model_data Define the buffer read from a model file. @@ -246,7 +266,8 @@ class ModelImpl { const std::shared_ptr &model_context, const std::string &model_path, const CryptoInfo &cryptoInfo); - FuncGraphPtr LoadGraphByBufferImpl(const void *model_data, size_t data_size, ModelType model_type, + FuncGraphPtr LoadGraphByBufferImpl(const void *model_data, size_t data_size, const void *weight_data, + size_t weight_size, ModelType model_type, const std::shared_ptr &model_context, const std::string &model_path); FuncGraphPtr LoadGraphByBufferImpl(const void *model_data, size_t model_size, ModelType model_type, @@ -272,6 +293,9 @@ class ModelImpl { Status UpdateSharingWorkspaceConfig(const void *model_buff, size_t model_size, const std::string &model_path); void UpdateProvider(); + FuncGraphPtr DispatchLoadGraph(const void *model_buff, size_t model_size, const void *weight_data, size_t weight_size, + const std::string &model_path); + Status CheckBuildFromBuffer(ModelType model_type, const void *weight_data, size_t weight_size); friend class Model; friend class Serialization; diff --git a/mindspore-lite/src/litert/cxx_api/model/model.cc b/mindspore-lite/src/litert/cxx_api/model/model.cc index ea5aed46..7a4dd8a2 100644 --- a/mindspore-lite/src/litert/cxx_api/model/model.cc +++ b/mindspore-lite/src/litert/cxx_api/model/model.cc @@ -159,6 +159,12 @@ Status Model::Build(const void *model_data, size_t data_size, ModelType model_ty return kSuccess; } +Status Model::Build(const void *model_data, size_t data_size, const void *weight_data, size_t weight_size, + ModelType model_type, const std::shared_ptr &model_context) { + MS_LOG(ERROR) << "Build with weight buffer is only support for mindspore_lite's ascend backend."; + return kLiteError; +} + Status Model::Build(const std::vector &model_path, ModelType model_type, const std::shared_ptr &model_context, const Key &dec_key, const std::vector &dec_mode, const std::vector &cropto_lib_path) { diff --git a/mindspore-lite/test/st/python/python_api/test_model_build_buffer.py b/mindspore-lite/test/st/python/python_api/test_model_build_buffer.py new file mode 100644 index 00000000..2ffaabed --- /dev/null +++ b/mindspore-lite/test/st/python/python_api/test_model_build_buffer.py @@ -0,0 +1,291 @@ +# Copyright 2025 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +Test load from buffer +""" + +from typing import Tuple, List, Dict +from dataclasses import dataclass, replace +from pathlib import Path +import subprocess +import pytest +import mindspore_lite as mslite +import numpy as np +from utils import expect_error + + +@dataclass +class ModelArgs: + model_path: str + weight_path: str = None + model_type: str = mslite.ModelType.MINDIR + device: str = "ascend" + use_ge: bool = False + config: dict = None + device_id: int = None + inputs: Tuple[np.ndarray] = None + + +def _prepare_build_context(context, model_build_args: ModelArgs): + context.target = [model_build_args.device] + if model_build_args.device == "ascend": + if model_build_args.use_ge: + context.ascend.provider = "ge" + if model_build_args.device_id is not None: + context.ascend.device_id = model_build_args.device_id + + +def build_model_from_file(model_build_args: ModelArgs): + """ + build model + """ + context = mslite.Context() + model = mslite.Model() + + _prepare_build_context(context, model_build_args) + + model.build_from_file( + model_build_args.model_path, + model_build_args.model_type, + context, + config_dict=model_build_args.config, + ) + return model + + +def build_model_from_buffer(model_build_args: ModelArgs): + """ + build model + """ + context = mslite.Context() + model = mslite.Model() + + _prepare_build_context(context, model_build_args) + + with open(model_build_args.model_path, "rb") as f: + model_bytes = f.read() + + weight_bytes = None + if model_build_args.weight_path is not None: + with open(model_build_args.weight_path, "rb") as f: + weight_bytes = f.read() + + model.build_from_buffer( + model_bytes, + weight_bytes, + model_build_args.model_type, + context, + config_dict=model_build_args.config, + ) + return model + + +def _fill_model_build_args(obj, output_dir, device_id): + """ + prepare model build args + """ + model_path = obj.model_path.format(output_dir=output_dir) + weight_path = obj.weight_path.format(output_dir=output_dir) if obj.weight_path is not None else None + return replace(obj, model_path=model_path, weight_path=weight_path, device_id=device_id) + + +@pytest.fixture(scope="module", autouse=True) +def module_setup_and_teardown_fixture(so_path, mindir_dir, output_dir): + """ + module setup + convert bert model + """ + # setup + # convert bert model + fmk = "ONNX" + model_path = Path(mindir_dir) / "bert_model.onnx" + acl_output_path = Path(output_dir) / "bert_model.onnx" + acl_optimize = "ascend_oriented" + acl_cmd = [ + Path(so_path) / "tools/converter/converter/converter_lite", + f"--optimize={acl_optimize}", + f"--modelFile={model_path}", + f"--outputFile={acl_output_path}", + f"--fmk={fmk}", + ] + subprocess.run(acl_cmd, check=True) + + cpu_output_path = Path(output_dir) / "bert_model.onnx.cpu" + cpu_optimize = "general" + cpu_cmd = [ + Path(so_path) / "tools/converter/converter/converter_lite", + f"--optimize={cpu_optimize}", + f"--modelFile={model_path}", + f"--outputFile={cpu_output_path}", + f"--fmk={fmk}", + ] + subprocess.run(cpu_cmd, check=True) + empty_mindir = Path(output_dir) / "emptyfile" + empty_mindir.unlink(missing_ok=True) + empty_mindir.touch() + yield + + +@pytest.mark.parametrize( + "args", + ( + ModelArgs( + "{output_dir}/sd1.5_unet.onnx_graph.mindir", + "{output_dir}/sd1.5_unet.onnx_variables/data_0", + inputs=( + np.ones((2, 4, 64, 64)).astype(np.float32), + np.ones((1,)).astype(np.float32), + np.ones((2, 77, 768)).astype(np.float32), + ), + ), + ModelArgs( + "{output_dir}/bert_model.onnx.mindir", + None, + inputs=( + np.ones((1, 128)).astype(np.int32), + np.ones((1, 128)).astype(np.int32), + np.ones((1, 128)).astype(np.int32), + ), + ), + ModelArgs( + "{output_dir}/bert_model.onnx.mindir", + "{output_dir}/sd1.5_unet.onnx_variables/data_0", + inputs=( + np.ones((1, 128)).astype(np.int32), + np.ones((1, 128)).astype(np.int32), + np.ones((1, 128)).astype(np.int32), + ), + ), + ), +) +def test_build_from_buffer_correct(args: ModelArgs, output_dir: str, device_id: List[int]): + """ + test model build form buffer + """ + model_build_args = _fill_model_build_args(args, output_dir, device_id[0]) + + assert model_build_args.inputs is not None + + model_from_file = build_model_from_file(model_build_args) + model_from_buffer = build_model_from_buffer(model_build_args) + + model_input = [mslite.Tensor(tensor=i, device=f"ascend:{device_id[0]}") for i in model_build_args.inputs] + + output_file = model_from_file.predict(model_input) + output_buffer = model_from_buffer.predict(model_input) + + for of, ob in zip(output_file, output_buffer): + np.testing.assert_allclose(of.get_data_to_numpy(), ob.get_data_to_numpy()) + + +@pytest.mark.parametrize( + "args,error_type,msg", + ( + ( + ModelArgs( + "{output_dir}/sd1.5_unet.onnx_graph.mindir", + None, + ), + RuntimeError, + "build_from_buffer failed! Error is Common error code.", + ), + ( + ModelArgs( + "{output_dir}/sd1.5_unet.onnx_graph.mindir", + "{output_dir}/emptyfile", + ), + RuntimeError, + "build_from_buffer failed! Error is Common error code.", + ), + ( + ModelArgs( + "{output_dir}/emptyfile", + None, + ), + RuntimeError, + "build_from_buffer failed, model_bytes is empty.", + ), + ), +) +def test_build_from_buffer_lack_weight( + args: ModelArgs, error_type: Exception, msg: str, output_dir: str, device_id: List[int] +): + """ + test model build form buffer. lack weight + """ + model_build_args = _fill_model_build_args(args, output_dir, device_id[0]) + + with expect_error(error_type) as exec_info: + build_model_from_buffer(model_build_args) + assert msg in str(exec_info.value) + + +@pytest.mark.parametrize( + "build_args,error_type,msg", + ( + # model_bytes + ({"model_bytes": None}, TypeError, "model_bytes must be bytes"), + ({"model_bytes": bytes()}, RuntimeError, "build_from_buffer failed, model_bytes is empty."), + # weight_bytes + ({"model_bytes": b"0", "weight_bytes": str()}, TypeError, "weight_bytes must be bytes"), + # model_type + ( + {"model_bytes": b"0"}, + TypeError, + "model_type must be ModelType", + ), + ( + {"model_bytes": b"0", "model_type": mslite.ModelType.MINDIR_LITE}, + RuntimeError, + "build_from_buffer failed, model_type should be MINDIR", + ), + # context + ( + {"model_bytes": b"0", "model_type": mslite.ModelType.MINDIR, "context": str()}, + TypeError, + "context must be Context", + ), + # config_path + ( + {"model_bytes": b"0", "model_type": mslite.ModelType.MINDIR, "config_path": 1}, + TypeError, + "config_path must be str", + ), + # config_dict + ( + {"model_bytes": b"0", "model_type": mslite.ModelType.MINDIR, "config_dict": 1}, + TypeError, + "config_dict must be dict", + ), + ), +) +def test_build_from_buffer_arg_type( + build_args: Dict, error_type: Exception, msg: str, output_dir: str, device_id: List[int] +): + """ + test model build form buffer. check args type. + """ + args = ModelArgs(str()) + model_build_args = _fill_model_build_args(args, output_dir, device_id[0]) + + with expect_error(error_type) as exec_info: + context = mslite.Context() + model = mslite.Model() + + _prepare_build_context(context, model_build_args) + + model.build_from_buffer(**build_args) + + assert msg in str(exec_info.value) -- Gitee