From 04af5bc1766e6db133979f8c555745db70cb2dc1 Mon Sep 17 00:00:00 2001
From: yiguangzheng <yiguangzheng@huawei.com>
Date: Tue, 11 Nov 2025 10:06:54 +0800
Subject: [PATCH] feat: load weight from buffer

---
 .../mindspore_lite/mindspore_lite.Model.rst   |  69 ++++-
 include/api/model.h                           |  18 +-
 mindspore-lite/python/api/model.py            | 181 ++++++++---
 mindspore-lite/python/src/model_pybind.cc     |  40 ++-
 .../src/extendrt/cxx_api/model/model.cc       |  19 ++
 .../src/extendrt/cxx_api/model/model_impl.cc  | 118 ++++---
 .../src/extendrt/cxx_api/model/model_impl.h   |  34 +-
 .../src/litert/cxx_api/model/model.cc         |   6 +
 .../python_api/test_model_build_buffer.py     | 291 ++++++++++++++++++
 9 files changed, 678 insertions(+), 98 deletions(-)
 create mode 100644 mindspore-lite/test/st/python/python_api/test_model_build_buffer.py

diff --git a/docs/api/lite_api_python/mindspore_lite/mindspore_lite.Model.rst b/docs/api/lite_api_python/mindspore_lite/mindspore_lite.Model.rst
index 6cd3171b..0efe1522 100644
--- a/docs/api/lite_api_python/mindspore_lite/mindspore_lite.Model.rst
+++ b/docs/api/lite_api_python/mindspore_lite/mindspore_lite.Model.rst
@@ -5,6 +5,54 @@ mindspore_lite.Model
 
     `Model` 类定义MindSpore Lite模型，便于计算图管理。
 
+    .. py:method:: build_from_buffer(model_bytes, weight_bytes=None, model_type=None, context=None, config_path="", config_dict=None)
+
+        从缓冲区加载并构建模型。
+
+        参数：
+            - **model_bytes** (Bytes) - 定义输入模型的缓冲区。
+            - **weight_bytes** (Bytes, 可选) - 定义输入模型权重的缓冲区。默认值：``None``。
+            - **model_type** (ModelType, 可选) - 定义输入模型文件的类型。选项有 ``ModelType::MINDIR`` 。默认值：``None``。有关详细信息，请参见 `模型类型 <https://mindspore.cn/lite/api/zh-CN/master/mindspore_lite/mindspore_lite.ModelType.html>`_ 。
+            - **context** (Context，可选) - 定义上下文，用于在执行期间传递选项。默认值： ``None`` ，表示设置target为cpu的Context。
+            - **config_path** (str，可选) - 定义配置文件的路径，用于在构建模型期间传递用户定义选项。在以下场景中，用户可能需要设置参数。例如："/home/user/config.txt"。默认值： ``""`` 。
+
+              进行混合精度推理的设置，配置文件内容及说明如下：
+
+              .. code-block::
+
+                  [execution_plan]
+                  [op_name1]=data_type:float16（名字为op_name1的算子设置数据类型为float16）
+                  [op_name2]=data_type:float32（名字为op_name2的算子设置数据类型为float32）
+
+            - **config_dict** (dict，可选) - 配置参数字典，当使用该字典配置参数时，优先级高于配置文件。默认值：``None``。
+
+              推理配置rank table。配置文件中的内容及说明如下：
+
+              .. code-block::
+
+                  [ascend_context]
+                  rank_table_file=[path_a]（使用路径a的rank table）
+
+              同时配置参数字典中如下：
+
+              .. code-block::
+
+                  config_dict = {"ascend_context" : {"rank_table_file" : "path_b"}}
+
+              那么配置参数字典中路径b的rank table将覆盖配置文件中的路径a的rank table。
+
+        异常：
+            - **TypeError** - `model_bytes` 不是Bytes类型。
+            - **TypeError** - `weight_bytes` 既不是Bytes类型也不是``None``。
+            - **TypeError** - `model_type` 不是ModelType类型。
+            - **TypeError** - `context` 既不是Context类型也不是 ``None`` 。
+            - **TypeError** - `config_path` 不是str类型。
+            - **RuntimeError** - `model_bytes` 长度为0。
+            - **RuntimeError** - `model_type` 的值不是``ModelType::MINDIR``。
+            - **RuntimeError** - `config_path` 文件路径不存在。
+            - **RuntimeError** - 从 `config_path` 加载配置文件失败。
+            - **RuntimeError** - 从缓冲区加载并构建模型失败。
+
     .. py:method:: build_from_file(model_path, model_type, context=None, config_path="", config_dict=None, dec_key=None, dec_mode="AES-GCM", dec_num_parallel=0)
 
         从文件加载并构建模型。
@@ -15,24 +63,13 @@ mindspore_lite.Model
             - **context** (Context，可选) - 定义上下文，用于在执行期间传递选项。默认值： ``None`` 。 ``None`` 表示设置target为cpu的Context。
             - **config_path** (str，可选) - 定义配置文件的路径，用于在构建模型期间传递用户定义选项。在以下场景中，用户可能需要设置参数。例如："/home/user/config.txt"。默认值： ``""`` 。
 
-              - **用法1** - 进行混合精度推理的设置，配置文件内容及说明如下：
+              进行混合精度推理的设置，配置文件内容及说明如下：
 
-                .. code-block::
-
-                    [execution_plan]
-                    [op_name1]=data_type:float16（名字为op_name1的算子设置数据类型为float16）
-                    [op_name2]=data_type:float32（名字为op_name2的算子设置数据类型为float32）
-
-              - **用法2** - 在使用GPU推理时，进行TensorRT设置，配置文件内容及说明如下：
-
-                .. code-block::
+              .. code-block::
 
-                    [ms_cache]
-                    serialize_path=[serialization model path]（序列化模型的存储路径）
-                    [gpu_context]
-                    input_shape=input_name:[input_dim]（模型输入维度，用于动态shape）
-                    dynamic_dims=[min_dim~max_dim]（模型输入的动态维度范围，用于动态shape）
-                    opt_dims=[opt_dim]（模型最优输入维度，用于动态shape）
+                  [execution_plan]
+                  [op_name1]=data_type:float16（名字为op_name1的算子设置数据类型为float16）
+                  [op_name2]=data_type:float32（名字为op_name2的算子设置数据类型为float32）
 
             - **config_dict** (dict，可选) - 配置参数字典，当使用该字典配置参数时，优先级高于配置文件。
 
diff --git a/include/api/model.h b/include/api/model.h
index d92c04b3..678b4183 100644
--- a/include/api/model.h
+++ b/include/api/model.h
@@ -51,13 +51,29 @@ class MS_API Model {
   /// \param[in] data_size Define bytes number of model buffer.
   /// \param[in] model_type Define The type of model file. Options: ModelType::kMindIR, ModelType::kMindIR_Lite. Only
   /// ModelType::kMindIR_Lite is valid for Device-side Inference. Cloud-side Inference supports options
-  /// ModelType::kMindIR and ModelType::kMindIR_Lite, but option odelType::kMindIR_Lite will be removed in future
+  /// ModelType::kMindIR and ModelType::kMindIR_Lite, but option ModelType::kMindIR_Lite will be removed in future
   /// iterations. \param[in] model_context Define the context used to store options during execution.
   ///
   /// \return Status. kSuccess: build success, kLiteModelRebuild: build model repeatedly, Other: other types of errors.
   Status Build(const void *model_data, size_t data_size, ModelType model_type,
                const std::shared_ptr<Context> &model_context = nullptr);
 
+  /// \brief Build a model from model buffer so that it can run on a device.
+  ///
+  /// \param[in] model_data Define the buffer read from a model file.
+  /// \param[in] data_size Define bytes number of model buffer.
+  /// \param[in] weight_data Define the buffer read from a weight file.
+  /// \param[in] weight_size Define bytes number of weight buffer.
+  /// \param[in] model_type Define The type of model file. Options: ModelType::kMindIR, ModelType::kMindIR_Lite. Only
+  ///     ModelType::kMindIR_Lite is valid for Device-side Inference. Cloud-side Inference supports options
+  ///     ModelType::kMindIR and ModelType::kMindIR_Lite, but option ModelType::kMindIR_Lite will be removed in future
+  ///     iterations.
+  /// \param[in] model_context Define the context used to store options during execution.
+  ///
+  /// \return Status. kSuccess: build success, kLiteModelRebuild: build model repeatedly, Other: other types of errors.
+  Status Build(const void *model_data, size_t data_size, const void *weight_data, size_t weight_size,
+               ModelType model_type, const std::shared_ptr<Context> &model_context = nullptr);
+
   /// \brief Load and build a model from model buffer so that it can run on a device.
   ///
   /// \param[in] model_path Define the model path.
diff --git a/mindspore-lite/python/api/model.py b/mindspore-lite/python/api/model.py
index 9bbf1926..09054d5a 100644
--- a/mindspore-lite/python/api/model.py
+++ b/mindspore-lite/python/api/model.py
@@ -135,26 +135,13 @@ class Model(BaseModel):
                 options during build model. In the following scenarios, users may need to set the parameter.
                 For example, "/home/user/config.txt". Default: ``""``.
 
-                - Usage 1: Set mixed precision inference. The content and description of the configuration file are as
-                  follows:
+                Set mixed precision inference. The content and description of the configuration file are as follows:
 
-                  .. code-block::
-
-                      [execution_plan]
-                      [op_name1]=data_Type: float16 (The operator named op_name1 sets the data type as float16)
-                      [op_name2]=data_Type: float32 (The operator named op_name2 sets the data type as float32)
-
-                - Usage 2: When GPU inference, set the configuration of TensorRT. The content and description of the
-                  configuration file are as follows:
-
-                  .. code-block::
+                .. code-block::
 
-                      [ms_cache]
-                      serialize_Path=[serialization model path](storage path of serialization model)
-                      [gpu_context]
-                      input_shape=input_Name: [input_dim] (Model input dimension, for dynamic shape)
-                      dynamic_Dims=[min_dim~max_dim] (dynamic dimension range of model input, for dynamic shape)
-                      opt_Dims=[opt_dim] (the optimal input dimension of the model, for dynamic shape)
+                    [execution_plan]
+                    [op_name1]=data_Type: float16 (The operator named op_name1 sets the data type as float16)
+                    [op_name2]=data_Type: float32 (The operator named op_name2 sets the data type as float32)
 
             config_dict (dict, optional): When you set config in this dict, the priority is higher than the
                 configuration items in config_path.
@@ -224,21 +211,154 @@ class Model(BaseModel):
         model_type_ = _c_lite_wrapper.ModelType.kMindIR_Lite
         if model_type is ModelType.MINDIR:
             model_type_ = _c_lite_wrapper.ModelType.kMindIR
+
+        self._apply_config(config_path, config_dict)
+
+        if dec_key:
+            check_isinstance("dec_key", dec_key, bytes)
+            check_isinstance("dec_mode", dec_mode, str)
+            check_isinstance("dec_num_parallel", dec_num_parallel, int)
+            check_empty_string("dec_mode", dec_mode)
+            ret = self._model.build_from_file_with_decrypt(
+                self.model_path_, model_type_, context._context._inner_context,
+                dec_key, len(dec_key), dec_mode, dec_num_parallel)
+        else:
+            ret = self._model.build_from_file(
+                self.model_path_, model_type_, context._context._inner_context)
+        if not ret.IsOk():
+            raise RuntimeError(
+                f"build_from_file failed! Error is {ret.ToString()}")
+
+    @set_env
+    def build_from_buffer(
+        self,
+        model_bytes,
+        weight_bytes=None,
+        model_type=None,
+        context=None,
+        config_path="",
+        config_dict: dict = None,
+    ):
+        """
+        Load and build a model from buffer.
+
+        Args:
+            model_bytes (Bytes): Bytes of the mindir model when build from buffer.
+            weight_bytes (Bytes, optional): Bytes of the separate weight when build from buffer. Default: ``None``.
+            model_type (ModelType, optional): Define The type of input model file. Option is ``ModelType.MINDIR``.
+                Default: ``None``. For details, see
+                `ModelType <https://mindspore.cn/lite/api/en/master/mindspore_lite/mindspore_lite.ModelType.html>`_ .
+            context (Context, optional): Define the context used to transfer options during execution.
+                Default: ``None``. ``None`` means the Context with cpu target.
+            config_path (str, optional): Define the config file path. the config file is used to transfer user defined
+                options during build model. In the following scenarios, users may need to set the parameter.
+                For example, "/home/user/config.txt". Default: ``""``.
+
+                Set mixed precision inference. The content and description of the configuration
+                file are as follows:
+
+                .. code-block::
+
+                    [execution_plan]
+                    [op_name1]=data_Type: float16 (The operator named op_name1 sets the data type as float16)
+                    [op_name2]=data_Type: float32 (The operator named op_name2 sets the data type as float32)
+
+
+            config_dict (dict, optional): When you set config in this dict, the priority is higher than the
+                configuration items in config_path. Default: ``None``.
+
+                Set rank table file for inference. The content of the configuration file is as follows:
+
+                .. code-block::
+
+                    [ascend_context]
+                    rank_table_file=[path_a](storage initial path of the rank table file)
+
+                When set
+
+                .. code-block::
+
+                    config_dict = {"ascend_context" : {"rank_table_file" : "path_b"}}
+
+                The path_b from the config_dict will be used to compile the model.
+
+        Raises:
+            TypeError: `model_bytes` is not a Bytes.
+            TypeError: `weight_bytes` is neither a Bytes nor ``None``.
+            TypeError: `model_type` is not a ModelType.
+            TypeError: `context` is neither a Context nor ``None``.
+            TypeError: `config_path` is not a str.
+            RuntimeError: Length of `model_bytes` is 0.
+            RuntimeError: Value of `model_type` is is not ``ModelType.MINDIR``.
+            RuntimeError: `config_path` does not exist.
+            RuntimeError: Failed to load the configuration file from `config_path`.
+            RuntimeError: Failed to load and build the model from the buffer.
+
+        Examples:
+            >>> # Testcase 1: build from buffer with a single file mindir model.
+            >>> import mindspore_lite as mslite
+            >>> with open("mobilenetv2.mindir", "rb") as f:
+            >>>     model_bytes = f.read()
+            >>> model = mslite.Model()
+            >>> model.build_from_buffer(model_bytes, None, mslite.ModelType.MINDIR)
+            >>> print(model)
+            model_path: None.
+            >>> # Testcase 2: build from buffer with a separated weight model.
+            >>> import mindspore_lite as mslite
+            >>> with open("sd1.5_unet.onnx_graph.mindir", "rb") as f:
+            >>>     model_bytes = f.read()
+            >>> with open("sd1.5_unet.onnx_variables/data_0", "rb") as f:
+            >>>     weight_bytes = f.read()
+            >>> model = mslite.Model()
+            >>> context = mslite.Context()
+            >>> context.target = ["ascend"]
+            >>> model.build_from_buffer(model_bytes, weight_bytes, mslite.ModelType.MINDIR, context)
+            >>> print(model)
+            model_path: None.
+        """
+        check_isinstance("model_bytes", model_bytes, bytes)
+        if len(model_bytes) == 0:
+            raise RuntimeError("build_from_buffer failed, model_bytes is empty.")
+
+        check_isinstance("weight_bytes", weight_bytes, bytes, enable_none=True)
+        check_isinstance("model_type", model_type, ModelType)
+        if model_type != ModelType.MINDIR:
+            raise RuntimeError("build_from_buffer failed, model_type should be MINDIR")
+
+        model_type_ = _c_lite_wrapper.ModelType.kMindIR
+
+        if context is None:
+            context = Context()
+        check_isinstance("context", context, Context)
+        check_isinstance("config_path", config_path, str)
+
+        self.provider = context.ascend.provider
+
+        self.model_path_ = None
+
+        self._apply_config(config_path, config_dict)
+
+        ret = self._model.build_from_buff(model_bytes, weight_bytes, model_type_, context._context._inner_context)
+        if not ret.IsOk():
+            raise RuntimeError(f"build_from_buffer failed! Error is {ret.ToString()}")
+
+    def _apply_config(self, config_path, config_dict):
+        """
+        apply config for build
+        """
         if config_path:
             if not os.path.exists(config_path):
-                raise RuntimeError(
-                    "build_from_file failed, config_path does not exist!")
+                raise RuntimeError("build_from_file failed, config_path does not exist!")
             ret = self._model.load_config(config_path)
             if not ret.IsOk():
-                raise RuntimeError(
-                    f"load configuration failed! Error is {ret.ToString()}")
+                raise RuntimeError(f"load configuration failed! Error is {ret.ToString()}")
             parse_res = _parse_update_weight_config_name(config_path)
             if parse_res is not None and len(parse_res) >= 2:
                 update_names, self.lora_name_map = parse_res[0], parse_res[1]
                 if config_dict is None:
                     config_dict = {"ascend_context": {"variable_weights_list": update_names}}
                 else:
-                    config_dict['ascend_context']["variable_weights_list"] = update_names
+                    config_dict["ascend_context"]["variable_weights_list"] = update_names
 
         if config_dict:
             check_isinstance("config_dict", config_dict, dict)
@@ -253,21 +373,6 @@ class Model(BaseModel):
                 if not ret.IsOk():
                     raise RuntimeError(f"update configuration failed! Error is {ret.ToString()}.")
 
-        if dec_key:
-            check_isinstance("dec_key", dec_key, bytes)
-            check_isinstance("dec_mode", dec_mode, str)
-            check_isinstance("dec_num_parallel", dec_num_parallel, int)
-            check_empty_string("dec_mode", dec_mode)
-            ret = self._model.build_from_file_with_decrypt(
-                self.model_path_, model_type_, context._context._inner_context,
-                dec_key, len(dec_key), dec_mode, dec_num_parallel)
-        else:
-            ret = self._model.build_from_file(
-                self.model_path_, model_type_, context._context._inner_context)
-        if not ret.IsOk():
-            raise RuntimeError(
-                f"build_from_file failed! Error is {ret.ToString()}")
-
     def get_outputs(self):
         """
         Obtains all output information Tensors of the model.
diff --git a/mindspore-lite/python/src/model_pybind.cc b/mindspore-lite/python/src/model_pybind.cc
index 08b137d5..63cf6a94 100644
--- a/mindspore-lite/python/src/model_pybind.cc
+++ b/mindspore-lite/python/src/model_pybind.cc
@@ -153,6 +153,42 @@ Status PyModelBuild(Model *model, const std::string &model_path, ModelType model
   return kSuccess;
 }
 
+Status PyModelBuildFromBuffer(Model *model, py::bytes model_bytes, py::object weight_bytes, ModelType model_type,
+                              const std::shared_ptr<Context> &model_context) {
+  if (model_bytes.is_none()) {
+    MS_LOG(ERROR) << "model_bytes is None.";
+    return kLiteError;
+  }
+  void *model_ptr = nullptr;
+  ssize_t model_size = 0;
+  PYBIND11_BYTES_AS_STRING_AND_SIZE(model_bytes.ptr(), reinterpret_cast<char **>(&model_ptr), &model_size);
+  if (model_ptr == nullptr) {
+    MS_LOG(ERROR) << "model_ptr is nullptr.";
+    return kLiteError;
+  }
+  if (model_size == 0) {
+    MS_LOG(ERROR) << "model_size is 0.";
+    return kLiteError;
+  }
+  void *weight_ptr = nullptr;
+  ssize_t weight_size = 0;
+  if (!weight_bytes.is_none() && !py::isinstance<py::bytes>(weight_bytes)) {
+    MS_LOG(ERROR) << "weight_bytes should be bytes or None.";
+    return kLiteError;
+  }
+  if (!weight_bytes.is_none()) {
+    PYBIND11_BYTES_AS_STRING_AND_SIZE(weight_bytes.ptr(), reinterpret_cast<char **>(&weight_ptr), &weight_size);
+    if (weight_ptr == nullptr) {
+      MS_LOG(ERROR) << "weight_ptr is nullptr.";
+      return kLiteError;
+    }
+  }
+
+  py::gil_scoped_release release;
+  auto ret = model->Build(model_ptr, model_size, weight_ptr, weight_size, model_type, model_context);
+  return ret;
+}
+
 std::vector<MSTensorPtr> PyExecGetInputs(ModelExecutor *executor) {
   if (executor == nullptr) {
     MS_LOG(ERROR) << "ModelExecutor object cannot be nullptr!";
@@ -252,9 +288,7 @@ void ModelPyBind(const py::module &m) {
 
   (void)py::class_<Model, std::shared_ptr<Model>>(m, "ModelBind")
     .def(py::init<>())
-    .def("build_from_buff",
-         py::overload_cast<const void *, size_t, ModelType, const std::shared_ptr<Context> &>(&Model::Build),
-         py::call_guard<py::gil_scoped_release>())
+    .def("build_from_buff", PyModelBuildFromBuffer)
     .def("build_from_file",
          py::overload_cast<const std::string &, ModelType, const std::shared_ptr<Context> &>(&Model::Build),
          py::call_guard<py::gil_scoped_release>())
diff --git a/mindspore-lite/src/extendrt/cxx_api/model/model.cc b/mindspore-lite/src/extendrt/cxx_api/model/model.cc
index b3f1ebce..ebd882d1 100644
--- a/mindspore-lite/src/extendrt/cxx_api/model/model.cc
+++ b/mindspore-lite/src/extendrt/cxx_api/model/model.cc
@@ -92,6 +92,25 @@ Status Model::Build(const void *model_data, size_t data_size, ModelType model_ty
   }
 }
 
+Status Model::Build(const void *model_data, size_t data_size, const void *weight_data, size_t weight_size,
+                    ModelType model_type, const std::shared_ptr<Context> &model_context) {
+  if (impl_ == nullptr) {
+    MS_LOG(ERROR) << "Model implement is null.";
+    return kLiteNullptr;
+  }
+  try {
+    Status ret = impl_->Build(model_data, data_size, weight_data, weight_size, model_type, model_context);
+    if (ret != kSuccess) {
+      MS_LOG(ERROR) << "impl_->Build failed! ret = " << ret;
+      return ret;
+    }
+    return kSuccess;
+  } catch (const std::exception &exe) {
+    MS_LOG(ERROR) << "Catch exception: " << exe.what();
+    return kCoreFailed;
+  }
+}
+
 Status Model::Build(const std::vector<char> &model_path, ModelType model_type,
                     const std::shared_ptr<Context> &model_context) {
   if (impl_ == nullptr) {
diff --git a/mindspore-lite/src/extendrt/cxx_api/model/model_impl.cc b/mindspore-lite/src/extendrt/cxx_api/model/model_impl.cc
index c126365c..ad985911 100644
--- a/mindspore-lite/src/extendrt/cxx_api/model/model_impl.cc
+++ b/mindspore-lite/src/extendrt/cxx_api/model/model_impl.cc
@@ -134,6 +134,14 @@ Status PrimitivePyToC(const FuncGraphPtr &func_graph) {
   }
   return kSuccess;
 }
+
+std::string WeightBufferParamsDisplayStr(const void *weight_data, size_t weight_size) {
+  std::stringstream ss;
+  ss << (weight_data == nullptr ? " weight_data is nullptr." : " weight_data is not nullptr.")
+     << " weight_size: " << weight_size;
+  return ss.str();
+}
+
 }  // namespace
 
 void ModelImpl::SetMsContext() {
@@ -190,7 +198,51 @@ ConverterPlugin::ConverterFunc ConverterPlugin::GetConverterFuncInner() {
 
 ModelImpl::ModelImpl() : graph_(nullptr), session_(nullptr), context_(nullptr) {}
 
-FuncGraphPtr ModelImpl::LoadGraphByBufferImpl(const void *model_buff, size_t model_size, ModelType model_type,
+FuncGraphPtr ModelImpl::DispatchLoadGraph(const void *model_buff, size_t model_size, const void *weight_data,
+                                          size_t weight_size, const std::string &model_path) {
+  std::string weight_path = "./";
+  auto mindir_path = GetConfig(lite::kConfigModelFileSection, lite::kConfigMindIRPathKey);
+  std::string base_path = "";
+  if (!mindir_path.empty()) {
+    base_path = mindir_path;
+  } else {
+    // user does not set mindir_path, convert from model_path
+    base_path = model_path;
+  }
+  FuncGraphPtr func_graph;
+  std::string user_info_string;
+  bool build_from_file = weight_data == nullptr && weight_size == 0 && !base_path.empty();
+  bool build_from_buffer_model = weight_data == nullptr && weight_size == 0 && base_path.empty();
+  bool build_from_buffer_model_weight = weight_data != nullptr && weight_size != 0 && base_path.empty();
+  std::unique_lock<std::mutex> l(g_load_mindir_lock);
+  MindIRLoader mindir_loader(true, nullptr, 0, kDecModeAesGcm, false);
+  bool ret = false;
+  if (build_from_file) {
+    if (base_path.find("/") != std::string::npos) {
+      weight_path = base_path.substr(0, base_path.rfind("/"));
+    }
+    MS_LOG(INFO) << "model will build from file.";
+    ret = mindir_loader.LoadMindIR(model_buff, model_size, weight_path, &func_graph, &user_info_string);
+  } else if (build_from_buffer_model || build_from_buffer_model_weight) {
+    MS_LOG(INFO) << "model will build from buffer.";
+    ret = mindir_loader.LoadMindIR(model_buff, model_size, weight_data, weight_size, &func_graph, &user_info_string);
+  } else {
+    MS_LOG(ERROR) << "cannot determine how to build model."
+                  << " got:" << WeightBufferParamsDisplayStr(weight_data, weight_size) << " model_path: \""
+                  << model_path << "\"";
+  }
+  if (!ret || func_graph == nullptr) {
+    MS_LOG(ERROR) << "Failed to load MindIR model, please check the validity of the model.";
+    return nullptr;
+  }
+  if (!user_info_string.empty()) {
+    SetModelInfo(lite::KModelUserInfo, user_info_string);
+  }
+  return func_graph;
+}
+
+FuncGraphPtr ModelImpl::LoadGraphByBufferImpl(const void *model_buff, size_t model_size, const void *weight_data,
+                                              size_t weight_size, ModelType model_type,
                                               const std::shared_ptr<Context> &model_context,
                                               const std::string &model_path) {
   if (model_type != kMindIR) {
@@ -203,18 +255,7 @@ FuncGraphPtr ModelImpl::LoadGraphByBufferImpl(const void *model_buff, size_t mod
     MS_LOG(ERROR) << "UpdateSharingWorkspaceConfig failed!";
     return nullptr;
   }
-  auto mindir_path = GetConfig(lite::kConfigModelFileSection, lite::kConfigMindIRPathKey);
-  std::string weight_path = "./";
-  std::string base_path = "";
-  if (!mindir_path.empty()) {
-    base_path = mindir_path;
-  } else {
-    // user does not set mindir_path, convert from model_path
-    base_path = model_path;
-  }
-  if (base_path.find("/") != std::string::npos) {
-    weight_path = base_path.substr(0, base_path.rfind("/"));
-  }
+
   auto dump_path = GetConfig(lite::kAscendContextSection, lite::kDumpPathKey);
   if (!dump_path.empty()) {
     auto dir_pos = model_path.find_last_of('/');
@@ -224,20 +265,12 @@ FuncGraphPtr ModelImpl::LoadGraphByBufferImpl(const void *model_buff, size_t mod
     (void)UpdateConfig(lite::kAscendContextSection,
                        std::pair<std::string, std::string>(lite::kDumpModelNameKey, model_name));
   }
-  FuncGraphPtr func_graph;
-  std::string user_info_string;
-  {
-    std::unique_lock<std::mutex> l(g_load_mindir_lock);
-    MindIRLoader mindir_loader(true, nullptr, 0, kDecModeAesGcm, false);
-    auto ret = mindir_loader.LoadMindIR(model_buff, model_size, weight_path, &func_graph, &user_info_string);
-    if (!ret || func_graph == nullptr) {
-      MS_LOG(ERROR) << "Failed to load MindIR model, please check the validity of the model.";
-      return nullptr;
-    }
-    if (!user_info_string.empty()) {
-      SetModelInfo(lite::KModelUserInfo, user_info_string);
-    }
+  FuncGraphPtr func_graph = DispatchLoadGraph(model_buff, model_size, weight_data, weight_size, model_path);
+  if (func_graph == nullptr) {
+    MS_LOG(ERROR) << "Failed to load MindIR model, please check the validity of the model.";
+    return nullptr;
   }
+
   if (func_graph->get_attr(lite::kDynamicDimsKey) != nullptr) {
     auto dynamic_dims = GetValue<std::string>(func_graph->get_attr(lite::kDynamicDimsKey));
     SetModelInfo(lite::kDynamicDimsKey, dynamic_dims);
@@ -411,14 +444,23 @@ void ModelImpl::UpdateProvider() {
     }
   }
 }
-
-Status ModelImpl::BuildByBufferImpl(const void *model_buff, size_t model_size, ModelType model_type,
+Status ModelImpl::CheckBuildFromBuffer(ModelType model_type, const void *weight_data, size_t weight_size) {
+  if (model_type != kMindIR && (weight_data != nullptr || weight_size != 0)) {
+    MS_LOG(ERROR) << "Build from weight buffer is not support model_type:" << model_type
+                  << ". got: " << WeightBufferParamsDisplayStr(weight_data, weight_size);
+    return kLiteParamInvalid;
+  }
+  return kSuccess;
+}
+Status ModelImpl::BuildByBufferImpl(const void *model_buff, size_t model_size, const void *weight_data,
+                                    size_t weight_size, ModelType model_type,
                                     const std::shared_ptr<Context> &model_context, const std::string &model_path) {
   MS_CHECK_TRUE_MSG(model_buff != nullptr, kLiteError, "The input model buffer is nullptr!");
   MS_CHECK_TRUE_MSG(model_size != 0, kLiteError, "The input model buffer size is 0!");
-  if (model_context == nullptr) {
-    MS_LOG(ERROR) << "Invalid context pointers!";
-    return kLiteError;
+  MS_CHECK_TRUE_MSG(model_context != nullptr, kLiteError, "Invalid context pointers!");
+  auto ret = CheckBuildFromBuffer(model_type, weight_data, weight_size);
+  if (ret != kSuccess) {
+    return ret;
   }
   std::lock_guard<std::recursive_mutex> lock(mutex_);
   if (session_) {
@@ -457,7 +499,7 @@ Status ModelImpl::BuildByBufferImpl(const void *model_buff, size_t model_size, M
     MS_LOG(ERROR) << "Create session failed!";
     return kLiteError;
   }
-  Status ret;
+
   if (model_type == kMindIR_Lite) {
     ret = session_->CompileGraph(model_buff, model_size, &graph_id_);
     if (ret != kSuccess) {
@@ -475,7 +517,8 @@ Status ModelImpl::BuildByBufferImpl(const void *model_buff, size_t model_size, M
   }
 
   if (model_type != kOM) {
-    func_graph = LoadGraphByBufferImpl(model_buff, model_size, model_type, model_context, model_path);
+    func_graph =
+      LoadGraphByBufferImpl(model_buff, model_size, weight_data, weight_size, model_type, model_context, model_path);
     if (func_graph == nullptr) {
       MS_LOG(ERROR) << "Failed to load MindIR model, please check the validity of the model.";
       return kLiteError;
@@ -648,7 +691,12 @@ Status ModelImpl::Build(const FuncGraphPtr &func_graph, const std::shared_ptr<Co
 
 Status ModelImpl::Build(const void *model_data, size_t data_size, ModelType model_type,
                         const std::shared_ptr<Context> &model_context) {
-  return BuildByBufferImpl(model_data, data_size, model_type, model_context);
+  return BuildByBufferImpl(model_data, data_size, nullptr, 0, model_type, model_context);
+}
+
+Status ModelImpl::Build(const void *model_data, size_t data_size, const void *weight_data, size_t weight_size,
+                        ModelType model_type, const std::shared_ptr<Context> &model_context) {
+  return BuildByBufferImpl(model_data, data_size, weight_data, weight_size, model_type, model_context);
 }
 
 Status ModelImpl::Build(const void *model_data, size_t data_size, ModelType model_type,
@@ -668,7 +716,7 @@ Status ModelImpl::Build(const std::string &model_path, ModelType model_type,
     MS_LOG(ERROR) << "Failed to read buffer from model file.";
     return kLiteError;
   }
-  return BuildByBufferImpl(buffer.Data(), buffer.DataSize(), model_type, model_context, model_path);
+  return BuildByBufferImpl(buffer.Data(), buffer.DataSize(), nullptr, 0, model_type, model_context, model_path);
 }
 
 Status ModelImpl::ConvertGraphOnline(const FuncGraphPtr &func_graph, const std::shared_ptr<Context> &model_context) {
diff --git a/mindspore-lite/src/extendrt/cxx_api/model/model_impl.h b/mindspore-lite/src/extendrt/cxx_api/model/model_impl.h
index bd3fa763..c79d03bd 100644
--- a/mindspore-lite/src/extendrt/cxx_api/model/model_impl.h
+++ b/mindspore-lite/src/extendrt/cxx_api/model/model_impl.h
@@ -71,6 +71,20 @@ class ModelImpl {
   Status Build(const void *model_data, size_t data_size, ModelType model_type,
                const std::shared_ptr<Context> &model_context);
 
+  /// \brief Build a model from model buffer so that it can run on a device.
+  ///
+  /// \param[in] model_data Define the buffer read from a model file.
+  /// \param[in] data_size Define bytes number of model buffer.
+  /// \param[in] weight_data Define the buffer read from a weight file.
+  /// \param[in] weight_size Define bytes number of weight buffer.
+  /// \param[in] model_type Define The type of model file. Options: ModelType::kMindIR, ModelType::kMindIR_Lite.
+  /// Only ModelType::kMindIR is valid for Lite.
+  /// \param[in] model_context Define the context used to store options during execution.
+  ///
+  /// \return Status.
+  Status Build(const void *model_data, size_t data_size, const void *weight_data, size_t weight_size,
+               ModelType model_type, const std::shared_ptr<Context> &model_context);
+
   /// \brief Build a model from a encrypted weight file and a graph buffer so that it can run on a device.
   ///
   /// \param[in] model_data Define the buffer of the loaded mindir_graph
@@ -225,13 +239,19 @@ class ModelImpl {
   ///
   /// \param[in] model_data Define the buffer read from a model file.
   /// \param[in] data_size Define bytes number of model buffer.
+  /// \param[in] weight_data Define the buffer read from a weight file.
+  /// \param[in] weight_size Define bytes number of weight buffer.
   /// \param[in] model_type Define The type of model file. Options: ModelType::kMindIR, ModelType::kMindIR_Lite.
-  /// Only ModelType::kMindIR is valid for Lite. \param[in] model_context Define the context used to store options
-  /// during execution. \param[in] model_path Define the model_path, this param is used for net and weight divided case.
+  /// Only ModelType::kMindIR is valid for Lite.
+  /// \param[in] model_context Define the context used to store options
+  /// during execution.
+  /// \param[in] model_path Define the model_path, this param is used for net and weight divided case.
   ///
   /// \return value of config as string type.
-  Status BuildByBufferImpl(const void *model_data, size_t data_size, ModelType model_type,
-                           const std::shared_ptr<Context> &model_context, const std::string &model_path = "");
+  Status BuildByBufferImpl(const void *model_data, size_t data_size, const void *weight_data, size_t weight_size,
+                           ModelType model_type, const std::shared_ptr<Context> &model_context,
+                           const std::string &model_path = "");
+
   /// \brief Model build by buffer implementation for encrypted file, unified model build flow.
   ///
   /// \param[in] model_data Define the buffer read from a model file.
@@ -246,7 +266,8 @@ class ModelImpl {
                            const std::shared_ptr<Context> &model_context, const std::string &model_path,
                            const CryptoInfo &cryptoInfo);
 
-  FuncGraphPtr LoadGraphByBufferImpl(const void *model_data, size_t data_size, ModelType model_type,
+  FuncGraphPtr LoadGraphByBufferImpl(const void *model_data, size_t data_size, const void *weight_data,
+                                     size_t weight_size, ModelType model_type,
                                      const std::shared_ptr<Context> &model_context, const std::string &model_path);
 
   FuncGraphPtr LoadGraphByBufferImpl(const void *model_data, size_t model_size, ModelType model_type,
@@ -272,6 +293,9 @@ class ModelImpl {
 
   Status UpdateSharingWorkspaceConfig(const void *model_buff, size_t model_size, const std::string &model_path);
   void UpdateProvider();
+  FuncGraphPtr DispatchLoadGraph(const void *model_buff, size_t model_size, const void *weight_data, size_t weight_size,
+                                 const std::string &model_path);
+  Status CheckBuildFromBuffer(ModelType model_type, const void *weight_data, size_t weight_size);
 
   friend class Model;
   friend class Serialization;
diff --git a/mindspore-lite/src/litert/cxx_api/model/model.cc b/mindspore-lite/src/litert/cxx_api/model/model.cc
index ea5aed46..7a4dd8a2 100644
--- a/mindspore-lite/src/litert/cxx_api/model/model.cc
+++ b/mindspore-lite/src/litert/cxx_api/model/model.cc
@@ -159,6 +159,12 @@ Status Model::Build(const void *model_data, size_t data_size, ModelType model_ty
   return kSuccess;
 }
 
+Status Model::Build(const void *model_data, size_t data_size, const void *weight_data, size_t weight_size,
+                    ModelType model_type, const std::shared_ptr<Context> &model_context) {
+  MS_LOG(ERROR) << "Build with weight buffer is only support for mindspore_lite's ascend backend.";
+  return kLiteError;
+}
+
 Status Model::Build(const std::vector<char> &model_path, ModelType model_type,
                     const std::shared_ptr<Context> &model_context, const Key &dec_key,
                     const std::vector<char> &dec_mode, const std::vector<char> &cropto_lib_path) {
diff --git a/mindspore-lite/test/st/python/python_api/test_model_build_buffer.py b/mindspore-lite/test/st/python/python_api/test_model_build_buffer.py
new file mode 100644
index 00000000..2ffaabed
--- /dev/null
+++ b/mindspore-lite/test/st/python/python_api/test_model_build_buffer.py
@@ -0,0 +1,291 @@
+# Copyright 2025 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+Test load from buffer
+"""
+
+from typing import Tuple, List, Dict
+from dataclasses import dataclass, replace
+from pathlib import Path
+import subprocess
+import pytest
+import mindspore_lite as mslite
+import numpy as np
+from utils import expect_error
+
+
+@dataclass
+class ModelArgs:
+    model_path: str
+    weight_path: str = None
+    model_type: str = mslite.ModelType.MINDIR
+    device: str = "ascend"
+    use_ge: bool = False
+    config: dict = None
+    device_id: int = None
+    inputs: Tuple[np.ndarray] = None
+
+
+def _prepare_build_context(context, model_build_args: ModelArgs):
+    context.target = [model_build_args.device]
+    if model_build_args.device == "ascend":
+        if model_build_args.use_ge:
+            context.ascend.provider = "ge"
+        if model_build_args.device_id is not None:
+            context.ascend.device_id = model_build_args.device_id
+
+
+def build_model_from_file(model_build_args: ModelArgs):
+    """
+    build model
+    """
+    context = mslite.Context()
+    model = mslite.Model()
+
+    _prepare_build_context(context, model_build_args)
+
+    model.build_from_file(
+        model_build_args.model_path,
+        model_build_args.model_type,
+        context,
+        config_dict=model_build_args.config,
+    )
+    return model
+
+
+def build_model_from_buffer(model_build_args: ModelArgs):
+    """
+    build model
+    """
+    context = mslite.Context()
+    model = mslite.Model()
+
+    _prepare_build_context(context, model_build_args)
+
+    with open(model_build_args.model_path, "rb") as f:
+        model_bytes = f.read()
+
+    weight_bytes = None
+    if model_build_args.weight_path is not None:
+        with open(model_build_args.weight_path, "rb") as f:
+            weight_bytes = f.read()
+
+    model.build_from_buffer(
+        model_bytes,
+        weight_bytes,
+        model_build_args.model_type,
+        context,
+        config_dict=model_build_args.config,
+    )
+    return model
+
+
+def _fill_model_build_args(obj, output_dir, device_id):
+    """
+    prepare model build args
+    """
+    model_path = obj.model_path.format(output_dir=output_dir)
+    weight_path = obj.weight_path.format(output_dir=output_dir) if obj.weight_path is not None else None
+    return replace(obj, model_path=model_path, weight_path=weight_path, device_id=device_id)
+
+
+@pytest.fixture(scope="module", autouse=True)
+def module_setup_and_teardown_fixture(so_path, mindir_dir, output_dir):
+    """
+    module setup
+    convert bert model
+    """
+    # setup
+    # convert bert model
+    fmk = "ONNX"
+    model_path = Path(mindir_dir) / "bert_model.onnx"
+    acl_output_path = Path(output_dir) / "bert_model.onnx"
+    acl_optimize = "ascend_oriented"
+    acl_cmd = [
+        Path(so_path) / "tools/converter/converter/converter_lite",
+        f"--optimize={acl_optimize}",
+        f"--modelFile={model_path}",
+        f"--outputFile={acl_output_path}",
+        f"--fmk={fmk}",
+    ]
+    subprocess.run(acl_cmd, check=True)
+
+    cpu_output_path = Path(output_dir) / "bert_model.onnx.cpu"
+    cpu_optimize = "general"
+    cpu_cmd = [
+        Path(so_path) / "tools/converter/converter/converter_lite",
+        f"--optimize={cpu_optimize}",
+        f"--modelFile={model_path}",
+        f"--outputFile={cpu_output_path}",
+        f"--fmk={fmk}",
+    ]
+    subprocess.run(cpu_cmd, check=True)
+    empty_mindir = Path(output_dir) / "emptyfile"
+    empty_mindir.unlink(missing_ok=True)
+    empty_mindir.touch()
+    yield
+
+
+@pytest.mark.parametrize(
+    "args",
+    (
+        ModelArgs(
+            "{output_dir}/sd1.5_unet.onnx_graph.mindir",
+            "{output_dir}/sd1.5_unet.onnx_variables/data_0",
+            inputs=(
+                np.ones((2, 4, 64, 64)).astype(np.float32),
+                np.ones((1,)).astype(np.float32),
+                np.ones((2, 77, 768)).astype(np.float32),
+            ),
+        ),
+        ModelArgs(
+            "{output_dir}/bert_model.onnx.mindir",
+            None,
+            inputs=(
+                np.ones((1, 128)).astype(np.int32),
+                np.ones((1, 128)).astype(np.int32),
+                np.ones((1, 128)).astype(np.int32),
+            ),
+        ),
+        ModelArgs(
+            "{output_dir}/bert_model.onnx.mindir",
+            "{output_dir}/sd1.5_unet.onnx_variables/data_0",
+            inputs=(
+                np.ones((1, 128)).astype(np.int32),
+                np.ones((1, 128)).astype(np.int32),
+                np.ones((1, 128)).astype(np.int32),
+            ),
+        ),
+    ),
+)
+def test_build_from_buffer_correct(args: ModelArgs, output_dir: str, device_id: List[int]):
+    """
+    test model build form buffer
+    """
+    model_build_args = _fill_model_build_args(args, output_dir, device_id[0])
+
+    assert model_build_args.inputs is not None
+
+    model_from_file = build_model_from_file(model_build_args)
+    model_from_buffer = build_model_from_buffer(model_build_args)
+
+    model_input = [mslite.Tensor(tensor=i, device=f"ascend:{device_id[0]}") for i in model_build_args.inputs]
+
+    output_file = model_from_file.predict(model_input)
+    output_buffer = model_from_buffer.predict(model_input)
+
+    for of, ob in zip(output_file, output_buffer):
+        np.testing.assert_allclose(of.get_data_to_numpy(), ob.get_data_to_numpy())
+
+
+@pytest.mark.parametrize(
+    "args,error_type,msg",
+    (
+        (
+            ModelArgs(
+                "{output_dir}/sd1.5_unet.onnx_graph.mindir",
+                None,
+            ),
+            RuntimeError,
+            "build_from_buffer failed! Error is Common error code.",
+        ),
+        (
+            ModelArgs(
+                "{output_dir}/sd1.5_unet.onnx_graph.mindir",
+                "{output_dir}/emptyfile",
+            ),
+            RuntimeError,
+            "build_from_buffer failed! Error is Common error code.",
+        ),
+        (
+            ModelArgs(
+                "{output_dir}/emptyfile",
+                None,
+            ),
+            RuntimeError,
+            "build_from_buffer failed, model_bytes is empty.",
+        ),
+    ),
+)
+def test_build_from_buffer_lack_weight(
+    args: ModelArgs, error_type: Exception, msg: str, output_dir: str, device_id: List[int]
+):
+    """
+    test model build form buffer. lack weight
+    """
+    model_build_args = _fill_model_build_args(args, output_dir, device_id[0])
+
+    with expect_error(error_type) as exec_info:
+        build_model_from_buffer(model_build_args)
+    assert msg in str(exec_info.value)
+
+
+@pytest.mark.parametrize(
+    "build_args,error_type,msg",
+    (
+        # model_bytes
+        ({"model_bytes": None}, TypeError, "model_bytes must be bytes"),
+        ({"model_bytes": bytes()}, RuntimeError, "build_from_buffer failed, model_bytes is empty."),
+        # weight_bytes
+        ({"model_bytes": b"0", "weight_bytes": str()}, TypeError, "weight_bytes must be bytes"),
+        # model_type
+        (
+            {"model_bytes": b"0"},
+            TypeError,
+            "model_type must be ModelType",
+        ),
+        (
+            {"model_bytes": b"0", "model_type": mslite.ModelType.MINDIR_LITE},
+            RuntimeError,
+            "build_from_buffer failed, model_type should be MINDIR",
+        ),
+        # context
+        (
+            {"model_bytes": b"0", "model_type": mslite.ModelType.MINDIR, "context": str()},
+            TypeError,
+            "context must be Context",
+        ),
+        # config_path
+        (
+            {"model_bytes": b"0", "model_type": mslite.ModelType.MINDIR, "config_path": 1},
+            TypeError,
+            "config_path must be str",
+        ),
+        # config_dict
+        (
+            {"model_bytes": b"0", "model_type": mslite.ModelType.MINDIR, "config_dict": 1},
+            TypeError,
+            "config_dict must be dict",
+        ),
+    ),
+)
+def test_build_from_buffer_arg_type(
+    build_args: Dict, error_type: Exception, msg: str, output_dir: str, device_id: List[int]
+):
+    """
+    test model build form buffer. check args type.
+    """
+    args = ModelArgs(str())
+    model_build_args = _fill_model_build_args(args, output_dir, device_id[0])
+
+    with expect_error(error_type) as exec_info:
+        context = mslite.Context()
+        model = mslite.Model()
+
+        _prepare_build_context(context, model_build_args)
+
+        model.build_from_buffer(**build_args)
+
+    assert msg in str(exec_info.value)
-- 
Gitee