From f8875e978307b4b9fe8f307ccf2ffedc64010ed8 Mon Sep 17 00:00:00 2001
From: Bellatan <tanmei2@huawei.com>
Date: Wed, 22 Oct 2025 10:19:05 +0800
Subject: [PATCH] add hiai delegate support offline npu infer. Co-authored-by:
 Bellatan <tanmei2@huawei.com> Co-authored-by: laoyu <laoyu241@163.com>

---
 mindspore-lite/include/model.h                |   2 +
 .../common/ops/populate/custom_populate.cc    |  60 ++--
 mindspore-lite/src/common/prim_util.cc        |   5 +-
 .../src/litert/delegate/delegate_utils.cc     |  21 ++
 .../src/litert/delegate/delegate_utils.h      |   7 +
 .../src/litert/delegate/npu/npu_delegate.cc   | 200 +++++++++++-
 .../src/litert/delegate/npu/npu_delegate.h    |   9 +
 .../delegate/npu/offline_model_kernel.cc      | 284 ++++++++++++++++++
 .../delegate/npu/offline_model_kernel.h       |  64 ++++
 mindspore-lite/src/litert/infer_manager.cc    |   4 +
 .../src/litert/kernel/cpu/base/custom_base.cc |  42 +++
 .../src/litert/kernel/cpu/base/custom_base.h  |  43 +++
 .../src/litert/kernel/cpu/nnacl_c/op_base.h   |   1 +
 mindspore-lite/src/litert/kernel_registry.cc  |   7 +-
 mindspore-lite/src/litert/lite_session.cc     |   3 +
 mindspore-lite/src/litert/scheduler.cc        |  35 ++-
 16 files changed, 761 insertions(+), 26 deletions(-)
 create mode 100644 mindspore-lite/src/litert/delegate/npu/offline_model_kernel.cc
 create mode 100644 mindspore-lite/src/litert/delegate/npu/offline_model_kernel.h
 create mode 100644 mindspore-lite/src/litert/kernel/cpu/base/custom_base.cc
 create mode 100644 mindspore-lite/src/litert/kernel/cpu/base/custom_base.h
diff --git a/mindspore-lite/include/model.h b/mindspore-lite/include/model.h
index a72b19c6..098b0c79 100644
--- a/mindspore-lite/include/model.h
+++ b/mindspore-lite/include/model.h
@@ -42,6 +42,7 @@ struct MS_API LiteGraph {
     std::vector<uint32_t> output_indices_;
     int quant_type_;
     int device_type_ = -1;
+    Node() = default;
   };
   struct SubGraph {
     std::string name_;
@@ -49,6 +50,7 @@ struct MS_API LiteGraph {
     std::vector<uint32_t> output_indices_;
     std::vector<uint32_t> node_indices_;
     std::vector<uint32_t> tensor_indices_;
+    SubGraph() = default;
   };
   std::string name_;
   std::string version_;
diff --git a/mindspore-lite/src/common/ops/populate/custom_populate.cc b/mindspore-lite/src/common/ops/populate/custom_populate.cc
index 6bb3b944..85689387 100644
--- a/mindspore-lite/src/common/ops/populate/custom_populate.cc
+++ b/mindspore-lite/src/common/ops/populate/custom_populate.cc
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <cstddef>
+#include <unordered_map>
 #include <vector>
 #include <string>
 #include "src/common/ops/populate/populate_register.h"
@@ -27,6 +29,7 @@
 #include "nnacl_c/scatter_nd_parameter.h"
 #include "nnacl_c/conv3d_parameter.h"
 #include "nnacl_c/grid_sampler_parameter.h"
+#include "nnacl_c/op_base.h"
 
 using mindspore::schema::PrimitiveType_Custom;
 
@@ -175,6 +178,36 @@ OpParameter *CreateGridSamplerParameter(const schema::Custom *value) {
   return reinterpret_cast<OpParameter *>(param);
 }
 
+OpParameter *CreateNpuOfflineModelParameter(const void *prim) {
+  auto *param = static_cast<CustomParameter *>(malloc(sizeof(CustomParameter)));
+  if (param == nullptr) {
+    MS_LOG(ERROR) << "Malloc NpuOfflineModel Parameter failed.";
+    return nullptr;
+  }
+  memset(param, 0, sizeof(CustomParameter));
+  param->op_parameter_.type_ = PrimType_Inner_ThirdPartyModel;
+  // The offline model only uses the attr_data [0] field, and other fields do not need to be assigned values.
+  param->attr_data[0] = static_cast<char *>(const_cast<void *>(prim));
+  return reinterpret_cast<OpParameter *>(param);
+}
+
+OpParameter *AllocOpParameter(std::string type) {
+  static std::unordered_map<std::string, PrimType> str_to_prim_type_map = {
+    {"ShapeFusion", PrimType_Inner_ShapeFusion},
+    {"ReduceConcatFusion", PrimType_Inner_ReduceConcatFusion},
+    {"EncoderLayer", PrimType_Inner_EncoderLayer},
+    {"DecoderLayer", PrimType_Inner_DecoderLayer},
+    {"UsePastEmbedding", PrimType_Inner_UsePastEmbedding},
+    {"FSEDecode", PrimType_Inner_FseDecode},
+    {"CastGatherReduceFusion", PrimType_Inner_CastGatherReduceFusion},
+  };
+  auto it = str_to_prim_type_map.find(type);
+  if (it != str_to_prim_type_map.end()) {
+    return CreateParam(str_to_prim_type_map[type]);
+  }
+  return nullptr;
+}
+
 OpParameter *PopulateCustomParameter(const void *prim) {
   MS_CHECK_TRUE_RET(prim != nullptr, nullptr);
   auto primitive = static_cast<const schema::Primitive *>(prim);
@@ -185,9 +218,7 @@ OpParameter *PopulateCustomParameter(const void *prim) {
   }
   MS_CHECK_TRUE_RET(value->type() != nullptr, nullptr);
   std::string type = value->type()->c_str();
-  if (type == "ShapeFusion") {
-    return CreateParam(PrimType_Inner_ShapeFusion);
-  } else if (type == "GraphKernel") {
+  if (type == "GraphKernel") {
     auto *param = static_cast<CustomParameter *>(malloc(sizeof(CustomParameter)));
     if (param == nullptr) {
       MS_LOG(ERROR) << "malloc CustomParameter failed.";
@@ -200,20 +231,8 @@ OpParameter *PopulateCustomParameter(const void *prim) {
     return reinterpret_cast<OpParameter *>(param);
   } else if (type == "SplitReduceConcatFusion") {
     return PopulateSplitReduceConcatFusionParam(value);
-  } else if (type == "ReduceConcatFusion") {
-    return CreateParam(PrimType_Inner_ReduceConcatFusion);
-  } else if (type == "EncoderLayer") {
-    return CreateParam(PrimType_Inner_EncoderLayer);
-  } else if (type == "DecoderLayer") {
-    return CreateParam(PrimType_Inner_DecoderLayer);
-  } else if (type == "UsePastEmbedding") {
-    return CreateParam(PrimType_Inner_UsePastEmbedding);
-  } else if (type == "FSEDecode") {
-    return CreateParam(PrimType_Inner_FseDecode);
   } else if (type == "CustomGRU") {
     return CreateCustomGruParameter();
-  } else if (type == "CastGatherReduceFusion") {
-    return CreateParam(PrimType_Inner_CastGatherReduceFusion);
   } else if (type == "MaskedFill") {
     return CreateCustomMaskedFillParameter();
   } else if (type == "TensorScatterMax") {
@@ -226,10 +245,17 @@ OpParameter *PopulateCustomParameter(const void *prim) {
     return CreateGridSamplerParameter(value);
   } else if (type.compare(0, 10, "Custom_FT_") == 0) {
     return CreateParam(PrimType_Custom);
+  } else if (type == "ThirdPartyModel") {
+    return CreateNpuOfflineModelParameter(prim);
   } else {
-    MS_LOG(WARNING) << "Unsupported custom type: " << type;
+    auto param = AllocOpParameter(type);
+    if (param == nullptr) {
+      MS_LOG(WARNING) << "Unsupported custom type: " << type;
+      return nullptr;
+    } else {
+      return param;
+    }
   }
-  return nullptr;
 }
 
 REG_POPULATE(PrimType_Custom, PopulateCustomParameter, SCHEMA_CUR)
diff --git a/mindspore-lite/src/common/prim_util.cc b/mindspore-lite/src/common/prim_util.cc
index d640815d..ce1eb867 100644
--- a/mindspore-lite/src/common/prim_util.cc
+++ b/mindspore-lite/src/common/prim_util.cc
@@ -36,7 +36,8 @@ static const char *const kInnerOpNames[C20NUM] = {"Inner_ToFormat",           "I
                                                   "Inner_CustomGru",          "Inner_CastGatherReduceFusion",
                                                   "Inner_ReduceConcatFusion", "Inner_AclCustomOp",
                                                   "Inner_CustomMaskedFill",   "Inner_CustomTensorScatterMax",
-                                                  "Inner_CustomIsInf"};
+                                                  "Inner_CustomIsInf",        "Inner_Conv3D",
+                                                  "Inner_GridSampler",        "Inner_ThirdPartyModel"};
 int GetPrimitiveType(const void *primitive, int schema_version) {
   if (primitive == nullptr) {
     return -1;
@@ -56,6 +57,8 @@ const char *PrimitiveCurVersionTypeName(int type) {
     return schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(type));
   } else if (type >= static_cast<int>(schema::PrimitiveType_MAX)) {
     if (type >= PrimType_InnerOpMin && type < PrimType_InnerOpMax) {
+      MS_LOG(INFO) << "Current real type index:" << type << ", expected type index:" << (type - PrimType_InnerOpMin)
+                   << ".";
       return kInnerOpNames[type - PrimType_InnerOpMin];
     }
   }
diff --git a/mindspore-lite/src/litert/delegate/delegate_utils.cc b/mindspore-lite/src/litert/delegate/delegate_utils.cc
index c9aeeb11..0b95c6c0 100644
--- a/mindspore-lite/src/litert/delegate/delegate_utils.cc
+++ b/mindspore-lite/src/litert/delegate/delegate_utils.cc
@@ -81,4 +81,25 @@ void BinaryMaskData2Bool(int src_mask, bool *dst_mask, size_t mask_size) {
 bool IsSubGraphInputTensor(const std::vector<mindspore::MSTensor> &inputs, mindspore::MSTensor input) {
   return std::find(inputs.begin(), inputs.end(), input) != inputs.end();
 }
+
+#ifdef SUPPORT_NPU
+hiai::HIAI_DataType MSDataTypeToHIAIDataType(DataType ms_dtype) {
+  static const std::unordered_map<DataType, hiai::HIAI_DataType> ms_dtype_to_hiai_dtype_map = {
+    {DataType::kNumberTypeUInt8, hiai::HIAI_DataType::HIAI_DATATYPE_UINT8},
+    {DataType::kNumberTypeInt8, hiai::HIAI_DataType::HIAI_DATATYPE_INT8},
+    {DataType::kNumberTypeInt16, hiai::HIAI_DataType::HIAI_DATATYPE_INT16},
+    {DataType::kNumberTypeInt32, hiai::HIAI_DataType::HIAI_DATATYPE_INT32},
+    {DataType::kNumberTypeUInt32, hiai::HIAI_DataType::HIAI_DATATYPE_UINT32},
+    {DataType::kNumberTypeInt64, hiai::HIAI_DataType::HIAI_DATATYPE_INT64},
+    {DataType::kNumberTypeFloat16, hiai::HIAI_DataType::HIAI_DATATYPE_FLOAT16},
+    {DataType::kNumberTypeFloat32, hiai::HIAI_DataType::HIAI_DATATYPE_FLOAT32},
+    {DataType::kNumberTypeFloat64, hiai::HIAI_DataType::HIAI_DATATYPE_DOUBLE},
+  };
+  auto it = ms_dtype_to_hiai_dtype_map.find(ms_dtype);
+  if (it != ms_dtype_to_hiai_dtype_map.end()) {
+    return it->second;
+  }
+  return hiai::HIAI_DataType::HIAI_DATATYPE_FLOAT32;
+}
+#endif
 }  // namespace mindspore::lite
diff --git a/mindspore-lite/src/litert/delegate/delegate_utils.h b/mindspore-lite/src/litert/delegate/delegate_utils.h
index 5843699c..228daf75 100644
--- a/mindspore-lite/src/litert/delegate/delegate_utils.h
+++ b/mindspore-lite/src/litert/delegate/delegate_utils.h
@@ -20,6 +20,9 @@
 #include "src/common/log_adapter.h"
 #include "include/errorcode.h"
 #include "nnacl_c/op_base.h"
+#ifdef SUPPORT_NPU
+#include "include/HiAiModelManagerService.h"
+#endif
 
 namespace mindspore::lite {
 bool IsSubGraphInputTensor(const std::vector<mindspore::MSTensor> &inputs, mindspore::MSTensor input);
@@ -32,6 +35,10 @@ int MaskDataNHWC2NCHWBinary(int mask);
 
 void BinaryMaskData2Bool(int src_mask, bool *dst_mask, size_t mask_size);
 
+#ifdef SUPPORT_NPU
+hiai::HIAI_DataType MSDataTypeToHIAIDataType(DataType ms_dtype);
+#endif
+
 template <typename T>
 void AssistDataNHWC2NCHW(void *raw_data, size_t unit_size) {
   MS_ASSERT(raw_data != nullptr);
diff --git a/mindspore-lite/src/litert/delegate/npu/npu_delegate.cc b/mindspore-lite/src/litert/delegate/npu/npu_delegate.cc
index 92e8f26f..2db0113a 100644
--- a/mindspore-lite/src/litert/delegate/npu/npu_delegate.cc
+++ b/mindspore-lite/src/litert/delegate/npu/npu_delegate.cc
@@ -60,10 +60,15 @@
 #include "src/litert/delegate/npu/pass/npu_transform_pass.h"
 #include "src/litert/delegate/npu/pass/npu_insert_transform_pass.h"
 #include "src/litert/delegate/npu/pass/npu_fusion_pass.h"
+#include "src/litert/delegate/npu/offline_model_kernel.h"
 
 using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OK;
 
+namespace {
+constexpr int32_t kNum2 = 2;
+}  // namespace
+
 namespace mindspore::lite {
 NPUDelegate::~NPUDelegate() {
   if (npu_manager_ != nullptr) {
@@ -76,6 +81,7 @@ NPUDelegate::~NPUDelegate() {
     delete pass_manager_;
     pass_manager_ = nullptr;
   }
+  FreeLiteGraph(&lite_graph_);
 }
 
 Status NPUDelegate::AddPasses() {
@@ -197,11 +203,195 @@ Status NPUDelegate::Init() {
   return mindspore::kSuccess;
 }
 
-Status NPUDelegate::Build(DelegateModel<schema::Primitive> *model) {
+void NPUDelegate::ShallowCopyLiteGraph(const lite::LiteGraph &lite_graph) {
+  std::vector<LiteGraph::Node *> node_list;
+  node_list.reserve(lite_graph.all_nodes_.size());
+  MS_LOG(INFO) << "HIAIDelegate ShallowCopyLiteGraph start.";
+  // copy node
+  for (auto node : lite_graph.all_nodes_) {
+    auto new_node = new (std::nothrow) LiteGraph::Node(*node);
+    if (new_node == nullptr) {
+      MS_LOG(ERROR) << "New LiteGraph node failed. Origin node:" << node->name_;
+      for (auto cur_node : node_list) {
+        delete cur_node;
+      }
+      return;
+    }
+    node_list.emplace_back(new_node);
+  }
+  // copy subgraph
+  std::vector<LiteGraph::SubGraph *> subgraph_list;
+  for (auto subgraph : lite_graph.sub_graphs_) {
+    auto new_subgraph = new (std::nothrow) LiteGraph::SubGraph(*subgraph);
+    if (new_subgraph == nullptr) {
+      MS_LOG(ERROR) << "New LiteGraph::Subgraph failed. Origin graph:" << subgraph->name_;
+      for (auto cur_subgraph : subgraph_list) {
+        delete cur_subgraph;
+      }
+      for (auto cur_node : node_list) {
+        delete cur_node;
+      }
+      return;
+    }
+    subgraph_list.emplace_back(new_subgraph);
+  }
+  // check tensor
+  for (auto tensor : lite_graph.all_tensors_) {
+    bool ret = CheckTensorSupported(static_cast<const schema::Tensor *>(tensor));
+    if (!ret) {
+      MS_LOG(ERROR) << "Tensor supported check failed.";
+      for (auto cur_subgraph : subgraph_list) {
+        delete cur_subgraph;
+      }
+      for (auto cur_node : node_list) {
+        delete cur_node;
+      }
+      return;
+    }
+  }
+
+  lite_graph_ = new (std::nothrow) lite::LiteGraph();
+  if (lite_graph_ == nullptr) {
+    MS_LOG(ERROR) << "New LiteGraph failed.";
+    for (auto cur_subgraph : subgraph_list) {
+      delete cur_subgraph;
+    }
+    for (auto cur_node : node_list) {
+      delete cur_node;
+    }
+    return;
+  }
+
+  lite_graph_->name_ = lite_graph.name_;
+  lite_graph_->version_ = lite_graph.version_;
+  lite_graph_->input_indices_ = lite_graph.input_indices_;
+  lite_graph_->output_indices_ = lite_graph.output_indices_;
+  lite_graph_->all_tensors_ = lite_graph.all_tensors_;
+  lite_graph_->all_nodes_ = node_list;
+  lite_graph_->sub_graphs_ = subgraph_list;
+  MS_LOG(INFO) << "NPUDelegate ShallowCopyLiteGraph success. all_tensors_ size " << lite_graph_->all_tensors_.size()
+               << " all_nodes_ size " << lite_graph_->all_nodes_.size() << " sub_graphs_ size "
+               << lite_graph_->sub_graphs_.size() << " sub_graphs_[0] input_indices_ size "
+               << lite_graph_->sub_graphs_[0]->input_indices_.size() << " sub_graphs_[0] output_indices_ size "
+               << lite_graph_->sub_graphs_[0]->output_indices_.size();
+}
+
+void NPUDelegate::FreeLiteGraph(lite::LiteGraph **liteGraph) {
+  if (liteGraph != nullptr && *liteGraph != nullptr) {
+    MS_LOG(INFO) << "start to free LiteGraph.";
+    auto graph = *liteGraph;
+    MS_LOG(INFO) << "Destroying  nodes.";
+
+    for (size_t idx = 0; idx < graph->all_nodes_.size(); idx++) {
+      if (graph->all_nodes_[idx] != nullptr) {
+        delete graph->all_nodes_[idx];
+        graph->all_nodes_[idx] = nullptr;
+      }
+    }
+    MS_LOG(INFO) << "Destroying  subgraphs.";
+
+    for (size_t idx = 0; idx < graph->sub_graphs_.size(); idx++) {
+      if (graph->sub_graphs_[idx] != nullptr) {
+        delete graph->sub_graphs_[idx];
+        graph->sub_graphs_[idx] = nullptr;
+      }
+    }
+    delete graph;
+    *liteGraph = nullptr;
+  } else {
+    MS_LOG(WARNING) << "npu_lite_graph is nullptr, no need to free.";
+  }
+}
+
+bool NPUDelegate::IsCustomModel() const {
+  // check if there is only one Cutsom kernel in LiteModel.
+  if (lite_graph_ == nullptr) {
+    MS_LOG(ERROR) << "Current lite graph is null.";
+    return false;
+  }
+  if (lite_graph_->all_nodes_.size() != 1) {
+    MS_LOG(ERROR) << "Current node num in lite graph is:" << lite_graph_->all_nodes_.size() << ".";
+    return false;
+  }
+  auto node = lite_graph_->all_nodes_[0];
+  if (node == nullptr) {
+    MS_LOG(ERROR) << "Current node is null in lite graph.";
+    return false;
+  }
+  if (node->node_type_ != mindspore::schema::PrimitiveType_Custom) {
+    MS_LOG(ERROR) << "Current node type is:" << node->node_type_ << ", expected type is PrimitiveType_Custom.";
+    return false;
+  }
+  return true;
+}
+
+bool NPUDelegate::CheckTensorSupported(const schema::Tensor *primitive) {
+  if (primitive == nullptr) {
+    MS_LOG(ERROR) << "primitive is nullptr, which type is Tensor.";
+    return false;
+  }
+
+  int32_t data_type = primitive->dataType();
+  if (data_type <= kTypeUnknown || data_type >= kMonadTypeEnd) {
+    MS_LOG(ERROR) << "invalid data type. " << data_type;
+    return false;
+  }
+
+  if (primitive->dims() == nullptr) {
+    MS_LOG(ERROR) << "Dims of tensor is nullptr";
+    return false;
+  }
+
+  if (data_type == kObjectTypeTensorType) {
+    MS_LOG(ERROR) << "Not support TensorList.";
+    return false;
+  }
+
+  if (primitive->data() == nullptr || primitive->data()->size() <= 0) {
+    MS_LOG(DEBUG) << "No valid data converted.";
+    return true;
+  }
+  return true;
+}
+
+Status NPUDelegate::buildOfflineModel(DelegateModel<schema::Primitive> *model) {
+  MS_LOG(INFO) << "enable npu offline model infer.";
+
+  // Get Node Tensor
+  auto node = lite_graph_->all_nodes_[0];
+  MS_CHECK_TRUE_RET(node != nullptr, kLiteError);
+  auto input_num = node->input_indices_.size();
+
+  // at least one input and one OM model buffer(as the last constant input)
+  MS_CHECK_TRUE_RET(input_num >= kNum2, kLiteError);
+  MS_CHECK_TRUE_RET(lite_graph_->all_tensors_.size() >= kNum2, kLiteError);
+  auto input_tensor = lite_graph_->all_tensors_[node->input_indices_[0]];
+  MS_CHECK_TRUE_RET(input_tensor != nullptr, kLiteError);
+  auto model_tensor = lite_graph_->all_tensors_[node->input_indices_[input_num - 1]];
+  MS_CHECK_TRUE_RET(model_tensor != nullptr, kLiteError);
+  MS_CHECK_TRUE_RET(model_tensor->data() != nullptr, kLiteError);
+  uint8_t *model_buffer = const_cast<uint8_t *>(model_tensor->data()->data());
+  size_t model_size = model_tensor->data()->size();
+  MS_LOG(DEBUG) << "Model input size:" << model->inputs().size() << ", output size:" << model->outputs().size() << ".";
+  // create offlineModelKernel
+  auto offline_model_kernel =
+    new (std::nothrow) OfflineModelKernel(model->inputs(), model->outputs(), model_buffer, model_size);
+  if (offline_model_kernel == nullptr) {
+    MS_LOG(ERROR) << "new OfflineModelKernel failed.";
+    return mindspore::kLiteError;
+  }
+  (void)model->Replace(model->BeginKernelIterator(), model->EndKernelIterator(), offline_model_kernel);
+  MS_LOG(INFO) << "Replace kernel in NPUDelegate success.";
+  return mindspore::kSuccess;
+}
+
+Status NPUDelegate::buildOnlineModel(DelegateModel<schema::Primitive> *model) {
   KernelIter from;
   KernelIter end;
   std::vector<NPUOp *> npu_ops;
   int graph_index = 0;
+
+  MS_LOG(INFO) << "enable npu online model infer.";
   for (auto iter = model->BeginKernelIterator(); iter != model->EndKernelIterator(); iter++) {
     kernel::Kernel *kernel = *iter;
     auto npu_op = GetOP(kernel, model->GetPrimitive(kernel));
@@ -243,6 +433,14 @@ Status NPUDelegate::Build(DelegateModel<schema::Primitive> *model) {
   return mindspore::kSuccess;
 }
 
+Status NPUDelegate::Build(DelegateModel<schema::Primitive> *model) {
+  if (IsCustomModel()) {
+    return buildOfflineModel(model);
+  } else {
+    return buildOnlineModel(model);
+  }
+}
+
 NPUOp *NPUDelegate::GetOP(kernel::Kernel *kernel, const schema::Primitive *primitive) {
   if (primitive == nullptr) {
     MS_LOG(ERROR) << "primitive is NULL!";
diff --git a/mindspore-lite/src/litert/delegate/npu/npu_delegate.h b/mindspore-lite/src/litert/delegate/npu/npu_delegate.h
index 17b43a34..79038567 100644
--- a/mindspore-lite/src/litert/delegate/npu/npu_delegate.h
+++ b/mindspore-lite/src/litert/delegate/npu/npu_delegate.h
@@ -21,10 +21,12 @@
 #include <map>
 #include <string>
 #include "include/api/delegate.h"
+#include "include/model.h"
 #include "src/litert/delegate/npu/npu_manager.h"
 #include "src/litert/delegate/npu/pass/npu_pass_manager.h"
 #include "src/litert/delegate/npu/op/npu_op.h"
 #include "src/litert/inner_context.h"
+#include "src/litert/delegate/npu/offline_model_kernel.h"
 
 namespace mindspore::lite {
 class NPUDelegate : public Delegate {
@@ -39,6 +41,12 @@ class NPUDelegate : public Delegate {
   Status Init() override;
 
   Status Build(DelegateModel<schema::Primitive> *model) override;
+  void ShallowCopyLiteGraph(const lite::LiteGraph &liteGraph);
+  void FreeLiteGraph(lite::LiteGraph **liteGraph);
+  bool IsCustomModel() const;
+  bool CheckTensorSupported(const schema::Tensor *primitive);
+  Status buildOfflineModel(DelegateModel<schema::Primitive> *model);
+  Status buildOnlineModel(DelegateModel<schema::Primitive> *model);
 
  protected:
   NPUOp *GetOP(kernel::Kernel *kernel, const schema::Primitive *primitive);
@@ -48,6 +56,7 @@ class NPUDelegate : public Delegate {
 
   Status AddPasses();
 
+  LiteGraph *lite_graph_ = nullptr;
   NPUManager *npu_manager_ = nullptr;
   NPUPassManager *pass_manager_ = nullptr;
   std::map<schema::PrimitiveType, NPUGetOp> op_func_lists_;
diff --git a/mindspore-lite/src/litert/delegate/npu/offline_model_kernel.cc b/mindspore-lite/src/litert/delegate/npu/offline_model_kernel.cc
new file mode 100644
index 00000000..b7bf1941
--- /dev/null
+++ b/mindspore-lite/src/litert/delegate/npu/offline_model_kernel.cc
@@ -0,0 +1,284 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <unordered_map>
+#include "src/litert/delegate/npu/offline_model_kernel.h"
+#include "litert/cxx_api/tensor/tensor_impl.h"
+#include "include/api/data_type.h"
+
+namespace {
+constexpr int32_t kHiaiFrequencyType = 3;  // HIGH
+constexpr int32_t kHiaiDeviceType = 0;     // NPU
+const char kHiaiModelName[] = "Third_Party_Model";
+}  // namespace
+
+namespace mindspore {
+OfflineModelKernel::~OfflineModelKernel() {
+  model_manager_client_ = nullptr;
+  model_builder_ = nullptr;
+  for (auto t : offline_model_inputs_tensors_) {
+    t.reset();
+  }
+
+  for (auto t : offline_model_outputs_tensors_) {
+    t.reset();
+  }
+}
+
+int OfflineModelKernel::Prepare() {
+  model_manager_client_ = std::make_shared<hiai::AiModelMngerClient>();
+  if (model_manager_client_ == nullptr) {
+    MS_LOG(ERROR) << "Alloc AiModelMngerClient failed.";
+    return lite::RET_ERROR;
+  }
+  model_builder_ = std::make_shared<hiai::AiModelBuilder>(model_manager_client_);
+  if (model_builder_ == nullptr) {
+    MS_LOG(ERROR) << "Alloc AiModelBuilder failed.";
+    return lite::RET_ERROR;
+  }
+  auto client_ret = model_manager_client_->Init(nullptr);  // sync mode
+  if (client_ret != lite::RET_OK) {
+    MS_LOG(ERROR) << "Init modelBuilder failed.";
+    return lite::RET_ERROR;
+  }
+  // Build Model
+  int build_ret = BuildHiaiModel(offline_model_buffer_, offline_model_size_);
+  if (build_ret != lite::RET_OK) {
+    MS_LOG(ERROR) << "Build offline model buffer failed.";
+    return lite::RET_ERROR;
+  }
+  MS_LOG(INFO) << "OfflineModelKernel build offline model buffer success.";
+  return lite::RET_OK;
+}
+
+int OfflineModelKernel::BuildHiaiModel(uint8_t *model_data, size_t model_data_length) {
+  MS_LOG(INFO) << "OfflineModelKernel Build Function start.";
+  MS_CHECK_TRUE_RET(model_data_length != 0, kLiteError);
+  if (model_data == nullptr) {
+    MS_LOG(ERROR) << "Current model_data is invalid, please check model file.";
+    return lite::RET_ERROR;
+  }
+  void *offline_model_data = model_data;
+  if (kHiaiFrequencyType == -1 || kHiaiDeviceType == -1) {
+    MS_LOG(ERROR) << "Create model description failed. Current kHiaiFrequencyType is :" << kHiaiFrequencyType
+                  << ", kHiaiDeviceType:" << kHiaiDeviceType << ".";
+    return lite::RET_ERROR;
+  }
+  std::vector<std::shared_ptr<hiai::AiModelDescription>> model_descs;
+  std::unordered_map<std::shared_ptr<hiai::AiModelBuilder>, hiai::MemBuffer *> builder_buffer_map;
+  MS_LOG(INFO) << "Create model description: version [" << model_manager_client_->GetVersion()
+               << "], kHiaiFrequencyType is " << kHiaiFrequencyType << ", kHiaiDeviceType is " << kHiaiDeviceType
+               << ".";
+  std::shared_ptr<hiai::AiModelDescription> model_desc =
+    std::make_shared<hiai::AiModelDescription>(kHiaiModelName, kHiaiFrequencyType, 0, 1, kHiaiDeviceType);
+  if (model_desc == nullptr) {
+    MS_LOG(ERROR) << "Alloc AiModelDescription failed.";
+    return lite::RET_ERROR;
+  }
+  model_descs.push_back(model_desc);
+  auto model_buffer = model_builder_->InputMemBufferCreate(offline_model_data, model_data_length);
+  if (model_buffer == nullptr) {
+    MS_LOG(ERROR) << "Hiai Model Builder input memory buffer create failed, model data size:" << model_data_length;
+    return lite::RET_ERROR;
+  }
+  builder_buffer_map.insert({model_builder_, model_buffer});
+  model_desc->SetModelBuffer(model_buffer->GetMemBufferData(), model_buffer->GetMemBufferSize());
+  MS_LOG(INFO) << "Hiai Model Builder set offline model buffer success.";
+
+  if (!model_descs.empty()) {
+    auto load_ret = model_manager_client_->Load(model_descs);
+    if (load_ret != hiai::AI_SUCCESS) {
+      for (auto it : builder_buffer_map) {
+        it.first->MemBufferDestroy(it.second);
+      }
+      builder_buffer_map.clear();
+      MS_LOG(ERROR) << "Hiai Client load offline model failed and clear offline model buffer.";
+      return lite::RET_ERROR;
+    }
+    MS_LOG(INFO) << "Hiai Client load offline model success.";
+    model_descs.clear();
+  }
+  // Init OfflineModel IO tensor
+  if (InitHiaiIOTensors() != lite::RET_OK) {
+    MS_LOG(ERROR) << "OfflineModelKernel InitHiaiIOTensors failed.";
+    return lite::RET_ERROR;
+  }
+  MS_LOG(INFO) << "OfflineModelKernel init offline model tensor and load model success.";
+  return lite::RET_OK;
+}
+
+int OfflineModelKernel::InitHiaiIOTensors() {
+  std::vector<hiai::TensorDimension> input_dimension;
+  std::vector<hiai::TensorDimension> output_dimension;
+  if (model_manager_client_ == nullptr) {
+    MS_LOG(ERROR) << "Hiai Client is nullptr.";
+    return lite::RET_ERROR;
+  }
+  auto get_io_dim_ret = model_manager_client_->GetModelIOTensorDim(kHiaiModelName, input_dimension, output_dimension);
+  if (get_io_dim_ret != hiai::AI_SUCCESS) {
+    MS_LOG(ERROR) << "Get offline model input and output tensor dims failed." << get_io_dim_ret;
+    return lite::RET_ERROR;
+  }
+  MS_LOG(DEBUG) << "Input NCHW :" << input_dimension[0].GetNumber() << " " << input_dimension[0].GetChannel() << " "
+                << input_dimension[0].GetHeight() << " " << input_dimension[0].GetWidth();
+  MS_LOG(DEBUG) << "Output NCHW :" << output_dimension[0].GetNumber() << " " << output_dimension[0].GetChannel() << " "
+                << output_dimension[0].GetHeight() << " " << output_dimension[0].GetWidth();
+
+  MS_LOG(DEBUG) << "Init input ai_tensors.";
+  auto in_tensor_ret = InitHiaiTensorWithMSTensor(input_dimension, inputs_, offline_model_inputs_tensors_);
+  if (in_tensor_ret != lite::RET_OK) {
+    MS_LOG(ERROR) << "Update offline model input tensor vector failed. " << in_tensor_ret;
+    return lite::RET_ERROR;
+  }
+  MS_LOG(DEBUG) << "Init output ai_tensors.";
+  auto out_tensor_ret = InitHiaiTensorWithMSTensor(output_dimension, outputs_, offline_model_outputs_tensors_);
+  if (out_tensor_ret != lite::RET_OK) {
+    MS_LOG(ERROR) << "Update offline model output tensor vector failed. " << out_tensor_ret;
+    return lite::RET_ERROR;
+  }
+  return lite::RET_OK;
+}
+
+int OfflineModelKernel::InitHiaiTensorWithMSTensor(
+  const std::vector<hiai::TensorDimension> &dimension, const std::vector<MSTensor> &ms_tensors,
+  std::vector<std::shared_ptr<hiai::AiTensor>> &offline_model_tensors) {
+  if (dimension.empty()) {
+    MS_LOG(ERROR) << "Offline model tensor dimension is empty.";
+    return lite::RET_ERROR;
+  }
+  MS_LOG(DEBUG) << " dimension size:" << dimension.size();
+  for (int i = 0; i < dimension.size(); i++) {
+    std::shared_ptr<hiai::AiTensor> ai_tensor = std::make_shared<hiai::AiTensor>();
+    if (ai_tensor == nullptr) {
+      MS_LOG(ERROR) << "Alloc AiTensor failed.";
+      return lite::RET_ERROR;
+    }
+    if (ai_tensor->Init(&dimension[i], lite::MSDataTypeToHIAIDataType(ms_tensors[i].DataType())) != hiai::AI_SUCCESS) {
+      MS_LOG(ERROR) << "AiTensor init failed.";
+      return lite::RET_ERROR;
+    }
+    offline_model_tensors.push_back(ai_tensor);
+  }
+  return lite::RET_OK;
+}
+
+int OfflineModelKernel::Execute() {
+  // Get MS INPUT Tensors
+  MS_LOG(INFO) << "Before OfflineModelKernel execute, MSTensorData need to be copy to AiTensorData. Inputs_ size: "
+               << inputs_.size() << ", outputs_ size: " << outputs_.size();
+  auto ms_hiai_ret = CopyMSTensorsDataToHiaiTensorsData();
+  if (ms_hiai_ret != lite::RET_OK) {
+    MS_LOG(ERROR) << "CopyMSTensorsDataToHiaiTensorsData failed.";
+    return lite::RET_ERROR;
+  }
+  auto execute_ret = ExecuteHiaiModel();
+  if (execute_ret != hiai::AI_SUCCESS) {
+    MS_LOG(ERROR) << "OfflineModelKernel ExecuteHiaiModel failed.";
+    return lite::RET_ERROR;
+  }
+  MS_LOG(INFO) << "OfflineModelKernel ExecuteHiaiModel success.";
+  auto hiai_ms_ret = CopyHiaiTensorsDataToMSTensorsData();
+  if (hiai_ms_ret != lite::RET_OK) {
+    MS_LOG(ERROR) << "ConvertAiTensorToMSTensor failed.";
+    return lite::RET_ERROR;
+  }
+  MS_LOG(INFO) << "OfflineModelKernel ExecuteHiaiModel done, and CopyHiaiTensorsDataToMSTensorsData success.";
+  return lite::RET_OK;
+}
+
+int OfflineModelKernel::ExecuteHiaiModel() {
+  hiai::AiContext context;
+  std::string key = "model_name";
+  std::string value = kHiaiModelName;
+  context.AddPara(key, value);
+  int32_t stamp;
+  if (model_manager_client_ == nullptr) {
+    MS_LOG(ERROR) << "Hiai client is nullptr.";
+    return lite::RET_ERROR;
+  }
+  int ret =
+    model_manager_client_->Process(context, offline_model_inputs_tensors_, offline_model_outputs_tensors_, 3000, stamp);
+  if (ret != lite::RET_OK) {
+    MS_LOG(ERROR) << "OfflineModelKernel Predict failed by Hiai client using Process function.";
+    return lite::RET_ERROR;
+  }
+  MS_LOG(INFO) << "OfflineModelKernel Predict model success, ret:" << ret << " stamp:" << stamp;
+  return lite::RET_OK;
+}
+
+int OfflineModelKernel::CopyMSTensorsDataToHiaiTensorsData() {
+  MS_LOG(INFO) << "ConvertMSTensorToAiTensor ms_input tensor num:" << inputs_.size()
+               << " ai_input tensor num:" << offline_model_inputs_tensors_.size();
+  if (offline_model_inputs_tensors_.size() != inputs_.size()) {
+    MS_LOG(ERROR) << "ms_input and ai_input have different size. ms_input tensor num:" << inputs_.size()
+                  << " ai_input tensor num:" << offline_model_outputs_tensors_.size();
+    return lite::RET_ERROR;
+  }
+  for (size_t i = 0; i < offline_model_inputs_tensors_.size(); i++) {
+    if (offline_model_inputs_tensors_.at(i)->GetSize() != inputs_.at(i).DataSize()) {
+      MS_LOG(ERROR) << "ms_input and ai_input have different dataSize. ms_input tensor dataSize "
+                    << inputs_.at(i).DataSize()
+                    << " ai_input tensor num:" << offline_model_inputs_tensors_.at(i)->GetSize();
+      return lite::RET_ERROR;
+    }
+    auto src_buffer = inputs_.at(i).MutableData();
+    if (src_buffer == nullptr) {
+      MS_LOG(ERROR) << "For " << kHiaiModelName << ", the ms_input at [" << i
+                    << "], tensor name:" << inputs_.at(i).Name() << " buffer is null.";
+      return lite::RET_ERROR;
+    }
+    auto dest_buffer = offline_model_inputs_tensors_.at(i)->GetBuffer();
+    if (dest_buffer == nullptr) {
+      MS_LOG(ERROR) << "For " << kHiaiModelName << ", the ai_input at [" << i << "], buffer is null.";
+      return lite::RET_ERROR;
+    }
+    std::memcpy(dest_buffer, src_buffer, inputs_.at(i).DataSize());
+  }
+  MS_LOG(INFO) << "ConvertMSTensorToAiTensor success.";
+  return lite::RET_OK;
+}
+
+int OfflineModelKernel::CopyHiaiTensorsDataToMSTensorsData() {
+  MS_LOG(INFO) << "ConvertAiTensorToMSTensor ms_output tensor num:" << outputs_.size()
+               << " ai_output tensor num:" << offline_model_outputs_tensors_.size();
+  if (offline_model_outputs_tensors_.size() != outputs_.size()) {
+    MS_LOG(ERROR) << "ms_output and ai_output have different size. ms_output tensor num:" << outputs_.size()
+                  << " ai_output tensor num:" << offline_model_outputs_tensors_.size();
+    return lite::RET_ERROR;
+  }
+  for (size_t i = 0; i < offline_model_outputs_tensors_.size(); i++) {
+    if (offline_model_outputs_tensors_.at(i)->GetSize() != outputs_.at(i).DataSize()) {
+      MS_LOG(ERROR) << "ms_output and ai_output have different dataSize. ms_output tensor dataSize "
+                    << outputs_.at(i).DataSize()
+                    << " ai_output tensor num:" << offline_model_outputs_tensors_.at(i)->GetSize();
+      return lite::RET_ERROR;
+    }
+    auto src_buffer = offline_model_outputs_tensors_.at(i)->GetBuffer();
+    if (src_buffer == nullptr) {
+      MS_LOG(ERROR) << "For " << kHiaiModelName << ", the ai_output at [" << i << "], buffer is null.";
+      return lite::RET_ERROR;
+    }
+    auto dest_buffer = outputs_.at(i).MutableData();
+    if (dest_buffer == nullptr) {
+      MS_LOG(ERROR) << "For " << kHiaiModelName << ", the ms_output at [" << i
+                    << "], tensor name:" << outputs_.at(i).Name() << " buffer is null.";
+      return lite::RET_ERROR;
+    }
+    std::memcpy(dest_buffer, src_buffer, offline_model_outputs_tensors_.at(i)->GetSize());
+  }
+  MS_LOG(INFO) << "ConvertAiTensorToMSTensor success.";
+  return lite::RET_OK;
+}
+}  // namespace mindspore
diff --git a/mindspore-lite/src/litert/delegate/npu/offline_model_kernel.h b/mindspore-lite/src/litert/delegate/npu/offline_model_kernel.h
new file mode 100644
index 00000000..d108cb8a
--- /dev/null
+++ b/mindspore-lite/src/litert/delegate/npu/offline_model_kernel.h
@@ -0,0 +1,64 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef LITE_OFFLINE_MODEL_KERNEL_H
+#define LITE_OFFLINE_MODEL_KERNEL_H
+#include <vector>
+#include <utility>
+#include <string>
+#include <memory>
+#include "include/api/kernel.h"
+#include "src/common/log_adapter.h"
+#include "src/litert/inner_context.h"
+#include "include/errorcode.h"
+#include "include/HiAiModelManagerService.h"
+#include "src/litert/delegate/delegate_utils.h"
+
+namespace mindspore {
+class OfflineModelKernel : public kernel::Kernel {
+  /**
+   * We decide to make the whole model into one kernel.
+   * */
+ public:
+  OfflineModelKernel(const std::vector<MSTensor> &inputs, const std::vector<MSTensor> &outputs,
+                     uint8_t *offline_model_buffer, size_t offline_model_size)
+      : kernel::Kernel(inputs, outputs, nullptr, nullptr),
+        offline_model_buffer_(offline_model_buffer),
+        offline_model_size_(offline_model_size) {}
+  int Prepare() override;
+  int ReSize() override { return kSuccess; }
+  int Execute() override;
+  ~OfflineModelKernel();
+
+ private:
+  int BuildHiaiModel(uint8_t *modelData, size_t modelDataLength);
+  int InitHiaiIOTensors();
+  int InitHiaiTensorWithMSTensor(const std::vector<hiai::TensorDimension> &input_dimension,
+                                 const std::vector<MSTensor> &ms_tensors,
+                                 std::vector<std::shared_ptr<hiai::AiTensor>> &offline_model_tensors);
+
+  int ExecuteHiaiModel();
+  int CopyMSTensorsDataToHiaiTensorsData();
+  int CopyHiaiTensorsDataToMSTensorsData();
+
+  std::shared_ptr<hiai::AiModelMngerClient> model_manager_client_ = nullptr;
+  std::shared_ptr<hiai::AiModelBuilder> model_builder_ = nullptr;
+  uint8_t *offline_model_buffer_;
+  size_t offline_model_size_;
+  std::vector<std::shared_ptr<hiai::AiTensor>> offline_model_inputs_tensors_;
+  std::vector<std::shared_ptr<hiai::AiTensor>> offline_model_outputs_tensors_;
+};
+}  // namespace mindspore
+#endif  // LITE_OFFLINE_MODEL_KERNEL_H
diff --git a/mindspore-lite/src/litert/infer_manager.cc b/mindspore-lite/src/litert/infer_manager.cc
index 6d7e7c20..5aceef86 100644
--- a/mindspore-lite/src/litert/infer_manager.cc
+++ b/mindspore-lite/src/litert/infer_manager.cc
@@ -189,6 +189,10 @@ int KernelInferShape(const std::vector<lite::Tensor *> &inputs, const std::vecto
     MS_LOG(ERROR) << "No input!";
     return RET_ERROR;
   }
+  if (parameter->type_ == static_cast<int>(PrimType_Inner_ThirdPartyModel)) {
+    MS_LOG(INFO) << "No need infer shape for PrimType_Inner_ThirdPartyModel.";
+    return RET_OK;
+  }
   std::vector<TensorC *> in_tensors;
   std::vector<TensorC *> out_tensors;
   int ret = GenerateInTensorC(inputs, &in_tensors, allocator);
diff --git a/mindspore-lite/src/litert/kernel/cpu/base/custom_base.cc b/mindspore-lite/src/litert/kernel/cpu/base/custom_base.cc
new file mode 100644
index 00000000..d5dcb57b
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/cpu/base/custom_base.cc
@@ -0,0 +1,42 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/litert/kernel/cpu/base/custom_base.h"
+#include <algorithm>
+#include <utility>
+#include <vector>
+#include "src/litert/kernel_registry.h"
+#include "nnacl_c/op_base.h"
+
+using mindspore::kernel::KERNEL_ARCH;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_Custom;
+
+namespace mindspore::kernel {
+int CustomBaseCPUKernel::Prepare() { return RET_OK; }
+
+int CustomBaseCPUKernel::ReSize() { return RET_OK; }
+
+int CustomBaseCPUKernel::Run() { return RET_OK; }
+
+REG_KERNEL(kCPU, kNumberTypeInt32, PrimType_Inner_ThirdPartyModel, LiteKernelCreator<CustomBaseCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeFloat32, PrimType_Inner_ThirdPartyModel, LiteKernelCreator<CustomBaseCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeInt8, PrimType_Inner_ThirdPartyModel, LiteKernelCreator<CustomBaseCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeUInt8, PrimType_Inner_ThirdPartyModel, LiteKernelCreator<CustomBaseCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeBool, PrimType_Inner_ThirdPartyModel, LiteKernelCreator<CustomBaseCPUKernel>)
+}  // namespace mindspore::kernel
diff --git a/mindspore-lite/src/litert/kernel/cpu/base/custom_base.h b/mindspore-lite/src/litert/kernel/cpu/base/custom_base.h
new file mode 100644
index 00000000..25262321
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/cpu/base/custom_base.h
@@ -0,0 +1,43 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_LITERT_KERNEL_CPU_BASE_CUSTOM_BASE_H_
+#define MINDSPORE_LITE_SRC_LITERT_KERNEL_CPU_BASE_CUSTOM_BASE_H_
+
+#include <vector>
+#include "src/litert/lite_kernel.h"
+#include "nnacl_c/custom_parameter.h"
+
+namespace mindspore::kernel {
+class CustomBaseCPUKernel : public LiteKernel {
+ public:
+  CustomBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                      const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
+      : LiteKernel(parameter, inputs, outputs, ctx) {
+    custom_param_ = reinterpret_cast<CustomParameter *>(op_parameter_);
+  }
+  ~CustomBaseCPUKernel() override = default;
+
+  int Prepare() override;
+  int ReSize() override;
+  int Run() override;
+
+ private:
+  CustomParameter *custom_param_ = nullptr;
+};
+}  // namespace mindspore::kernel
+
+#endif  // MINDSPORE_LITE_SRC_LITERT_KERNEL_CPU_BASE_CUSTOM_BASE_H_
diff --git a/mindspore-lite/src/litert/kernel/cpu/nnacl_c/op_base.h b/mindspore-lite/src/litert/kernel/cpu/nnacl_c/op_base.h
index bbe93692..f4482a5f 100644
--- a/mindspore-lite/src/litert/kernel/cpu/nnacl_c/op_base.h
+++ b/mindspore-lite/src/litert/kernel/cpu/nnacl_c/op_base.h
@@ -571,6 +571,7 @@ enum PrimType {
   PrimType_Inner_CustomIsInf = 10016,
   PrimType_Inner_Conv3D = 10017,
   PrimType_Inner_GridSampler = 10018,
+  PrimType_Inner_ThirdPartyModel = 10019,
   PrimType_InnerOpMax,
   PrimType_InnerOpMin = PrimType_Inner_ToFormat
 };
diff --git a/mindspore-lite/src/litert/kernel_registry.cc b/mindspore-lite/src/litert/kernel_registry.cc
index e0dcb63e..2d6230a1 100644
--- a/mindspore-lite/src/litert/kernel_registry.cc
+++ b/mindspore-lite/src/litert/kernel_registry.cc
@@ -178,6 +178,7 @@ int KernelRegistry::GetCustomKernel(const std::vector<Tensor *> &in_tensors, con
   registry::KernelDesc desc{static_cast<DataType>(key.data_type), key.type, key.kernel_arch, key.provider};
   auto creator = registry::RegisterKernel::GetCreator(static_cast<const schema::Primitive *>(primitive), &desc);
   if (creator == nullptr) {
+    MS_LOG(INFO) << "Not support to get Creator.";
     return RET_NOT_SUPPORT;
   }
 
@@ -205,6 +206,7 @@ int KernelRegistry::GetCustomKernel(const std::vector<Tensor *> &in_tensors, con
     }
   }
 #endif
+  MS_LOG(ERROR) << "Common base kernel registry failed.";
   return RET_ERROR;
 }
 
@@ -257,6 +259,7 @@ int KernelRegistry::GetKernelExec(const std::vector<Tensor *> &in_tensors, const
     if (ret == RET_OK) {
       (*kernel)->set_context(ctx);
     }
+    MS_LOG(INFO) << "Get kernel " << (ret == RET_OK ? "succeed." : "failed.");
     return ret;
   }
 #endif
@@ -271,10 +274,12 @@ int KernelRegistry::GetKernelExec(const std::vector<Tensor *> &in_tensors, const
       kernel_exec->set_desc(modify_key);
       kernel_exec->set_context(ctx);
       *kernel = kernel_exec;
+      MS_LOG(INFO) << "Get Lite Kernel succeed for type:" << PrimitiveCurVersionTypeName(key.type)
+                   << " by type index:" << key.type << ".";
       return RET_OK;
     }
   }
-  MS_LOG(WARNING) << "common cpu kernel registry failed";
+  MS_LOG(ERROR) << "common cpu kernel registry for lite_kernel failed.";
   return RET_ERROR;
 }
 }  // namespace mindspore::lite
diff --git a/mindspore-lite/src/litert/lite_session.cc b/mindspore-lite/src/litert/lite_session.cc
index f6c834d5..4b4da451 100644
--- a/mindspore-lite/src/litert/lite_session.cc
+++ b/mindspore-lite/src/litert/lite_session.cc
@@ -645,6 +645,7 @@ int LiteSession::CompileGraph(Model *model) {
     this->context_->set_infer_checker(InferCheckerAll);
   }
   is_running_.store(false);
+  MS_LOG(INFO) << "CompileGraph for current model success.";
   return RET_OK;
 }
 
@@ -782,6 +783,7 @@ int LiteSession::PrepareKernels(const Model *model) {
         MS_LOG(ERROR) << "Prepare kernel " << kernel->name() << " failed: " << ret;
         return ret;
       }
+      MS_LOG(INFO) << "Set Allocator For Delegate Kernels success.";
     }
 
     if (!is_train_session_ && kernel->desc().arch != kernel::kDelegate && kernel->desc().arch != kernel::kGPU) {
@@ -1094,6 +1096,7 @@ int LiteSession::InitDelegate() {
   }
 
   if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Create Delegate failed. ret info: " << ret;
     return ret;
   }
   if (delegate_ != nullptr) {
diff --git a/mindspore-lite/src/litert/scheduler.cc b/mindspore-lite/src/litert/scheduler.cc
index f5f4a709..117945e2 100644
--- a/mindspore-lite/src/litert/scheduler.cc
+++ b/mindspore-lite/src/litert/scheduler.cc
@@ -58,6 +58,9 @@
 #if defined(MSLITE_ENABLE_CLOUD_INFERENCE) && defined(ENABLE_MINDRT)
 #include "thread/parallel_thread_pool_manager.h"
 #endif
+#ifdef SUPPORT_NPU
+#include "src/litert/delegate/npu/npu_delegate.h"
+#endif
 
 using AbstractBaseModel = mindspore::infer::AbstractBaseModel;
 
@@ -255,6 +258,13 @@ int Scheduler::CheckCpuValid(const std::vector<kernel::KernelExec *> *dst_kernel
   if (context_->IsDeviceTypeEnabled(DT_CPU)) {
     return RET_OK;
   }
+  // Custom model
+  if (dst_kernels->size() == 1 && (*dst_kernels)[0]->name() == "Custom" &&
+      (*dst_kernels)[0]->desc().arch == kernel::KERNEL_ARCH::kDelegate) {
+    MS_LOG(INFO) << "kernel type is " << (*dst_kernels)[0]->name() << " and device type is "
+                 << (*dst_kernels)[0]->desc().arch << ".";
+    return RET_OK;
+  }
   for (auto kernel : *dst_kernels) {
     if (kernel->desc().arch == kernel::KERNEL_ARCH::kCPU) {
       MS_LOG(ERROR) << "kernel: " << kernel->name() << " only support in CPU.";
@@ -406,6 +416,13 @@ int Scheduler::Schedule(std::vector<kernel::KernelExec *> *dst_kernels) {
     return ret;
   }
 
+  for (auto kernel : *dst_kernels) {
+    MS_LOG(DEBUG) << "kernel: [" << kernel->name() << "] "
+                  << "TypeId(" << kernel->desc().data_type << "); "
+                  << "OpType(" << PrimitiveCurVersionTypeName(kernel->desc().type) << "); "
+                  << "format(" << kernel->desc().format << "); "
+                  << "arch(" << kernel->desc().arch << ")";
+  }
   ret = CheckCpuValid(dst_kernels);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "kernels invalid in set devices.";
@@ -501,6 +518,11 @@ int Scheduler::ReplaceDelegateKernels(std::vector<kernel::KernelExec *> *dst_ker
     MS_LOG(ERROR) << "New delegate model failed.";
     return RET_NULL_PTR;
   }
+
+#ifdef SUPPORT_NPU
+  auto delegate = static_cast<NPUDelegate *>(delegate_.get());
+  delegate->ShallowCopyLiteGraph(this->src_model_->graph_);
+#endif
   auto ret = delegate_->Build(model);
   if (ret != mindspore::kSuccess) {
     delete model;
@@ -1001,8 +1023,8 @@ int Scheduler::FindCpuKernel(const std::vector<Tensor *> &in_tensors, const std:
   MS_CHECK_TRUE_MSG(op_parameter != nullptr, RET_ERROR, "op parameter is nullptr.");
   auto op_type = op_parameter->type_;
   if (!KernelRegistry::GetInstance()->SupportKernel(desc)) {
-    MS_LOG(INFO) << "unsupported op_type: " << PrimitiveCurVersionTypeName(op_type)
-                 << ", data_type: " << desc.data_type;
+    MS_LOG(INFO) << "Unsupported op_type index: " << op_type << ", op_type: " << PrimitiveCurVersionTypeName(op_type)
+                 << ", data_type: " << desc.data_type << ".";
     return RET_NOT_SUPPORT;
   }
   kernel::KernelKey cpu_desc = desc;
@@ -1041,8 +1063,8 @@ int Scheduler::FindCpuKernel(const std::vector<Tensor *> &in_tensors, const std:
   ret = KernelRegistry::GetInstance()->GetKernelExec(in_tensors, out_tensors, context_, ms_context_, cpu_desc,
                                                      op_parameter, kernel);
   if (ret == RET_OK) {
-    MS_LOG(DEBUG) << "Get TypeId(expect = " << kernel_data_type << ", real = " << cpu_desc.data_type
-                  << ") op success: " << PrimitiveCurVersionTypeName(op_type);
+    MS_LOG(INFO) << "Get TypeId(expect = " << kernel_data_type << ", real = " << cpu_desc.data_type
+                 << ") op success: " << PrimitiveCurVersionTypeName(op_type);
     if (is_train_session_) {
       ret = (*kernel)->Prepare();
       RestoreTensorData(&restored_origin_tensors);
@@ -1288,8 +1310,9 @@ kernel::KernelExec *Scheduler::FindBackendKernel(const std::vector<Tensor *> &in
     if (status == RET_OK) {
       return kernel;
     } else {
-      MS_LOG(DEBUG) << "Get fp16 op failed, scheduler to cpu: " << PrimitiveCurVersionTypeName(desc.type) << " "
-                    << node->name_;
+      MS_LOG(INFO) << "Get op failed, scheduler to CPU by node: " << node->name_ << ", prefer_data_type is "
+                   << (prefer_data_type == kNumberTypeFloat16 ? "fp16" : "unknown") << ", desc.type index:" << desc.type
+                   << ", desc.type:" << PrimitiveCurVersionTypeName(desc.type) << ".";
       if (status == RET_ERROR) {
         op_parameters_.erase(node->output_indices_.at(0));
         auto ret = InferNodeShape(node);
-- 
Gitee