From f8875e978307b4b9fe8f307ccf2ffedc64010ed8 Mon Sep 17 00:00:00 2001 From: Bellatan Date: Wed, 22 Oct 2025 10:19:05 +0800 Subject: [PATCH] add hiai delegate support offline npu infer. Co-authored-by: Bellatan Co-authored-by: laoyu --- mindspore-lite/include/model.h | 2 + .../common/ops/populate/custom_populate.cc | 60 ++-- mindspore-lite/src/common/prim_util.cc | 5 +- .../src/litert/delegate/delegate_utils.cc | 21 ++ .../src/litert/delegate/delegate_utils.h | 7 + .../src/litert/delegate/npu/npu_delegate.cc | 200 +++++++++++- .../src/litert/delegate/npu/npu_delegate.h | 9 + .../delegate/npu/offline_model_kernel.cc | 284 ++++++++++++++++++ .../delegate/npu/offline_model_kernel.h | 64 ++++ mindspore-lite/src/litert/infer_manager.cc | 4 + .../src/litert/kernel/cpu/base/custom_base.cc | 42 +++ .../src/litert/kernel/cpu/base/custom_base.h | 43 +++ .../src/litert/kernel/cpu/nnacl_c/op_base.h | 1 + mindspore-lite/src/litert/kernel_registry.cc | 7 +- mindspore-lite/src/litert/lite_session.cc | 3 + mindspore-lite/src/litert/scheduler.cc | 35 ++- 16 files changed, 761 insertions(+), 26 deletions(-) create mode 100644 mindspore-lite/src/litert/delegate/npu/offline_model_kernel.cc create mode 100644 mindspore-lite/src/litert/delegate/npu/offline_model_kernel.h create mode 100644 mindspore-lite/src/litert/kernel/cpu/base/custom_base.cc create mode 100644 mindspore-lite/src/litert/kernel/cpu/base/custom_base.h diff --git a/mindspore-lite/include/model.h b/mindspore-lite/include/model.h index a72b19c6..098b0c79 100644 --- a/mindspore-lite/include/model.h +++ b/mindspore-lite/include/model.h @@ -42,6 +42,7 @@ struct MS_API LiteGraph { std::vector output_indices_; int quant_type_; int device_type_ = -1; + Node() = default; }; struct SubGraph { std::string name_; @@ -49,6 +50,7 @@ struct MS_API LiteGraph { std::vector output_indices_; std::vector node_indices_; std::vector tensor_indices_; + SubGraph() = default; }; std::string name_; std::string version_; diff --git a/mindspore-lite/src/common/ops/populate/custom_populate.cc b/mindspore-lite/src/common/ops/populate/custom_populate.cc index 6bb3b944..85689387 100644 --- a/mindspore-lite/src/common/ops/populate/custom_populate.cc +++ b/mindspore-lite/src/common/ops/populate/custom_populate.cc @@ -14,6 +14,8 @@ * limitations under the License. */ +#include +#include #include #include #include "src/common/ops/populate/populate_register.h" @@ -27,6 +29,7 @@ #include "nnacl_c/scatter_nd_parameter.h" #include "nnacl_c/conv3d_parameter.h" #include "nnacl_c/grid_sampler_parameter.h" +#include "nnacl_c/op_base.h" using mindspore::schema::PrimitiveType_Custom; @@ -175,6 +178,36 @@ OpParameter *CreateGridSamplerParameter(const schema::Custom *value) { return reinterpret_cast(param); } +OpParameter *CreateNpuOfflineModelParameter(const void *prim) { + auto *param = static_cast(malloc(sizeof(CustomParameter))); + if (param == nullptr) { + MS_LOG(ERROR) << "Malloc NpuOfflineModel Parameter failed."; + return nullptr; + } + memset(param, 0, sizeof(CustomParameter)); + param->op_parameter_.type_ = PrimType_Inner_ThirdPartyModel; + // The offline model only uses the attr_data [0] field, and other fields do not need to be assigned values. + param->attr_data[0] = static_cast(const_cast(prim)); + return reinterpret_cast(param); +} + +OpParameter *AllocOpParameter(std::string type) { + static std::unordered_map str_to_prim_type_map = { + {"ShapeFusion", PrimType_Inner_ShapeFusion}, + {"ReduceConcatFusion", PrimType_Inner_ReduceConcatFusion}, + {"EncoderLayer", PrimType_Inner_EncoderLayer}, + {"DecoderLayer", PrimType_Inner_DecoderLayer}, + {"UsePastEmbedding", PrimType_Inner_UsePastEmbedding}, + {"FSEDecode", PrimType_Inner_FseDecode}, + {"CastGatherReduceFusion", PrimType_Inner_CastGatherReduceFusion}, + }; + auto it = str_to_prim_type_map.find(type); + if (it != str_to_prim_type_map.end()) { + return CreateParam(str_to_prim_type_map[type]); + } + return nullptr; +} + OpParameter *PopulateCustomParameter(const void *prim) { MS_CHECK_TRUE_RET(prim != nullptr, nullptr); auto primitive = static_cast(prim); @@ -185,9 +218,7 @@ OpParameter *PopulateCustomParameter(const void *prim) { } MS_CHECK_TRUE_RET(value->type() != nullptr, nullptr); std::string type = value->type()->c_str(); - if (type == "ShapeFusion") { - return CreateParam(PrimType_Inner_ShapeFusion); - } else if (type == "GraphKernel") { + if (type == "GraphKernel") { auto *param = static_cast(malloc(sizeof(CustomParameter))); if (param == nullptr) { MS_LOG(ERROR) << "malloc CustomParameter failed."; @@ -200,20 +231,8 @@ OpParameter *PopulateCustomParameter(const void *prim) { return reinterpret_cast(param); } else if (type == "SplitReduceConcatFusion") { return PopulateSplitReduceConcatFusionParam(value); - } else if (type == "ReduceConcatFusion") { - return CreateParam(PrimType_Inner_ReduceConcatFusion); - } else if (type == "EncoderLayer") { - return CreateParam(PrimType_Inner_EncoderLayer); - } else if (type == "DecoderLayer") { - return CreateParam(PrimType_Inner_DecoderLayer); - } else if (type == "UsePastEmbedding") { - return CreateParam(PrimType_Inner_UsePastEmbedding); - } else if (type == "FSEDecode") { - return CreateParam(PrimType_Inner_FseDecode); } else if (type == "CustomGRU") { return CreateCustomGruParameter(); - } else if (type == "CastGatherReduceFusion") { - return CreateParam(PrimType_Inner_CastGatherReduceFusion); } else if (type == "MaskedFill") { return CreateCustomMaskedFillParameter(); } else if (type == "TensorScatterMax") { @@ -226,10 +245,17 @@ OpParameter *PopulateCustomParameter(const void *prim) { return CreateGridSamplerParameter(value); } else if (type.compare(0, 10, "Custom_FT_") == 0) { return CreateParam(PrimType_Custom); + } else if (type == "ThirdPartyModel") { + return CreateNpuOfflineModelParameter(prim); } else { - MS_LOG(WARNING) << "Unsupported custom type: " << type; + auto param = AllocOpParameter(type); + if (param == nullptr) { + MS_LOG(WARNING) << "Unsupported custom type: " << type; + return nullptr; + } else { + return param; + } } - return nullptr; } REG_POPULATE(PrimType_Custom, PopulateCustomParameter, SCHEMA_CUR) diff --git a/mindspore-lite/src/common/prim_util.cc b/mindspore-lite/src/common/prim_util.cc index d640815d..ce1eb867 100644 --- a/mindspore-lite/src/common/prim_util.cc +++ b/mindspore-lite/src/common/prim_util.cc @@ -36,7 +36,8 @@ static const char *const kInnerOpNames[C20NUM] = {"Inner_ToFormat", "I "Inner_CustomGru", "Inner_CastGatherReduceFusion", "Inner_ReduceConcatFusion", "Inner_AclCustomOp", "Inner_CustomMaskedFill", "Inner_CustomTensorScatterMax", - "Inner_CustomIsInf"}; + "Inner_CustomIsInf", "Inner_Conv3D", + "Inner_GridSampler", "Inner_ThirdPartyModel"}; int GetPrimitiveType(const void *primitive, int schema_version) { if (primitive == nullptr) { return -1; @@ -56,6 +57,8 @@ const char *PrimitiveCurVersionTypeName(int type) { return schema::EnumNamePrimitiveType(static_cast(type)); } else if (type >= static_cast(schema::PrimitiveType_MAX)) { if (type >= PrimType_InnerOpMin && type < PrimType_InnerOpMax) { + MS_LOG(INFO) << "Current real type index:" << type << ", expected type index:" << (type - PrimType_InnerOpMin) + << "."; return kInnerOpNames[type - PrimType_InnerOpMin]; } } diff --git a/mindspore-lite/src/litert/delegate/delegate_utils.cc b/mindspore-lite/src/litert/delegate/delegate_utils.cc index c9aeeb11..0b95c6c0 100644 --- a/mindspore-lite/src/litert/delegate/delegate_utils.cc +++ b/mindspore-lite/src/litert/delegate/delegate_utils.cc @@ -81,4 +81,25 @@ void BinaryMaskData2Bool(int src_mask, bool *dst_mask, size_t mask_size) { bool IsSubGraphInputTensor(const std::vector &inputs, mindspore::MSTensor input) { return std::find(inputs.begin(), inputs.end(), input) != inputs.end(); } + +#ifdef SUPPORT_NPU +hiai::HIAI_DataType MSDataTypeToHIAIDataType(DataType ms_dtype) { + static const std::unordered_map ms_dtype_to_hiai_dtype_map = { + {DataType::kNumberTypeUInt8, hiai::HIAI_DataType::HIAI_DATATYPE_UINT8}, + {DataType::kNumberTypeInt8, hiai::HIAI_DataType::HIAI_DATATYPE_INT8}, + {DataType::kNumberTypeInt16, hiai::HIAI_DataType::HIAI_DATATYPE_INT16}, + {DataType::kNumberTypeInt32, hiai::HIAI_DataType::HIAI_DATATYPE_INT32}, + {DataType::kNumberTypeUInt32, hiai::HIAI_DataType::HIAI_DATATYPE_UINT32}, + {DataType::kNumberTypeInt64, hiai::HIAI_DataType::HIAI_DATATYPE_INT64}, + {DataType::kNumberTypeFloat16, hiai::HIAI_DataType::HIAI_DATATYPE_FLOAT16}, + {DataType::kNumberTypeFloat32, hiai::HIAI_DataType::HIAI_DATATYPE_FLOAT32}, + {DataType::kNumberTypeFloat64, hiai::HIAI_DataType::HIAI_DATATYPE_DOUBLE}, + }; + auto it = ms_dtype_to_hiai_dtype_map.find(ms_dtype); + if (it != ms_dtype_to_hiai_dtype_map.end()) { + return it->second; + } + return hiai::HIAI_DataType::HIAI_DATATYPE_FLOAT32; +} +#endif } // namespace mindspore::lite diff --git a/mindspore-lite/src/litert/delegate/delegate_utils.h b/mindspore-lite/src/litert/delegate/delegate_utils.h index 5843699c..228daf75 100644 --- a/mindspore-lite/src/litert/delegate/delegate_utils.h +++ b/mindspore-lite/src/litert/delegate/delegate_utils.h @@ -20,6 +20,9 @@ #include "src/common/log_adapter.h" #include "include/errorcode.h" #include "nnacl_c/op_base.h" +#ifdef SUPPORT_NPU +#include "include/HiAiModelManagerService.h" +#endif namespace mindspore::lite { bool IsSubGraphInputTensor(const std::vector &inputs, mindspore::MSTensor input); @@ -32,6 +35,10 @@ int MaskDataNHWC2NCHWBinary(int mask); void BinaryMaskData2Bool(int src_mask, bool *dst_mask, size_t mask_size); +#ifdef SUPPORT_NPU +hiai::HIAI_DataType MSDataTypeToHIAIDataType(DataType ms_dtype); +#endif + template void AssistDataNHWC2NCHW(void *raw_data, size_t unit_size) { MS_ASSERT(raw_data != nullptr); diff --git a/mindspore-lite/src/litert/delegate/npu/npu_delegate.cc b/mindspore-lite/src/litert/delegate/npu/npu_delegate.cc index 92e8f26f..2db0113a 100644 --- a/mindspore-lite/src/litert/delegate/npu/npu_delegate.cc +++ b/mindspore-lite/src/litert/delegate/npu/npu_delegate.cc @@ -60,10 +60,15 @@ #include "src/litert/delegate/npu/pass/npu_transform_pass.h" #include "src/litert/delegate/npu/pass/npu_insert_transform_pass.h" #include "src/litert/delegate/npu/pass/npu_fusion_pass.h" +#include "src/litert/delegate/npu/offline_model_kernel.h" using mindspore::lite::RET_ERROR; using mindspore::lite::RET_OK; +namespace { +constexpr int32_t kNum2 = 2; +} // namespace + namespace mindspore::lite { NPUDelegate::~NPUDelegate() { if (npu_manager_ != nullptr) { @@ -76,6 +81,7 @@ NPUDelegate::~NPUDelegate() { delete pass_manager_; pass_manager_ = nullptr; } + FreeLiteGraph(&lite_graph_); } Status NPUDelegate::AddPasses() { @@ -197,11 +203,195 @@ Status NPUDelegate::Init() { return mindspore::kSuccess; } -Status NPUDelegate::Build(DelegateModel *model) { +void NPUDelegate::ShallowCopyLiteGraph(const lite::LiteGraph &lite_graph) { + std::vector node_list; + node_list.reserve(lite_graph.all_nodes_.size()); + MS_LOG(INFO) << "HIAIDelegate ShallowCopyLiteGraph start."; + // copy node + for (auto node : lite_graph.all_nodes_) { + auto new_node = new (std::nothrow) LiteGraph::Node(*node); + if (new_node == nullptr) { + MS_LOG(ERROR) << "New LiteGraph node failed. Origin node:" << node->name_; + for (auto cur_node : node_list) { + delete cur_node; + } + return; + } + node_list.emplace_back(new_node); + } + // copy subgraph + std::vector subgraph_list; + for (auto subgraph : lite_graph.sub_graphs_) { + auto new_subgraph = new (std::nothrow) LiteGraph::SubGraph(*subgraph); + if (new_subgraph == nullptr) { + MS_LOG(ERROR) << "New LiteGraph::Subgraph failed. Origin graph:" << subgraph->name_; + for (auto cur_subgraph : subgraph_list) { + delete cur_subgraph; + } + for (auto cur_node : node_list) { + delete cur_node; + } + return; + } + subgraph_list.emplace_back(new_subgraph); + } + // check tensor + for (auto tensor : lite_graph.all_tensors_) { + bool ret = CheckTensorSupported(static_cast(tensor)); + if (!ret) { + MS_LOG(ERROR) << "Tensor supported check failed."; + for (auto cur_subgraph : subgraph_list) { + delete cur_subgraph; + } + for (auto cur_node : node_list) { + delete cur_node; + } + return; + } + } + + lite_graph_ = new (std::nothrow) lite::LiteGraph(); + if (lite_graph_ == nullptr) { + MS_LOG(ERROR) << "New LiteGraph failed."; + for (auto cur_subgraph : subgraph_list) { + delete cur_subgraph; + } + for (auto cur_node : node_list) { + delete cur_node; + } + return; + } + + lite_graph_->name_ = lite_graph.name_; + lite_graph_->version_ = lite_graph.version_; + lite_graph_->input_indices_ = lite_graph.input_indices_; + lite_graph_->output_indices_ = lite_graph.output_indices_; + lite_graph_->all_tensors_ = lite_graph.all_tensors_; + lite_graph_->all_nodes_ = node_list; + lite_graph_->sub_graphs_ = subgraph_list; + MS_LOG(INFO) << "NPUDelegate ShallowCopyLiteGraph success. all_tensors_ size " << lite_graph_->all_tensors_.size() + << " all_nodes_ size " << lite_graph_->all_nodes_.size() << " sub_graphs_ size " + << lite_graph_->sub_graphs_.size() << " sub_graphs_[0] input_indices_ size " + << lite_graph_->sub_graphs_[0]->input_indices_.size() << " sub_graphs_[0] output_indices_ size " + << lite_graph_->sub_graphs_[0]->output_indices_.size(); +} + +void NPUDelegate::FreeLiteGraph(lite::LiteGraph **liteGraph) { + if (liteGraph != nullptr && *liteGraph != nullptr) { + MS_LOG(INFO) << "start to free LiteGraph."; + auto graph = *liteGraph; + MS_LOG(INFO) << "Destroying nodes."; + + for (size_t idx = 0; idx < graph->all_nodes_.size(); idx++) { + if (graph->all_nodes_[idx] != nullptr) { + delete graph->all_nodes_[idx]; + graph->all_nodes_[idx] = nullptr; + } + } + MS_LOG(INFO) << "Destroying subgraphs."; + + for (size_t idx = 0; idx < graph->sub_graphs_.size(); idx++) { + if (graph->sub_graphs_[idx] != nullptr) { + delete graph->sub_graphs_[idx]; + graph->sub_graphs_[idx] = nullptr; + } + } + delete graph; + *liteGraph = nullptr; + } else { + MS_LOG(WARNING) << "npu_lite_graph is nullptr, no need to free."; + } +} + +bool NPUDelegate::IsCustomModel() const { + // check if there is only one Cutsom kernel in LiteModel. + if (lite_graph_ == nullptr) { + MS_LOG(ERROR) << "Current lite graph is null."; + return false; + } + if (lite_graph_->all_nodes_.size() != 1) { + MS_LOG(ERROR) << "Current node num in lite graph is:" << lite_graph_->all_nodes_.size() << "."; + return false; + } + auto node = lite_graph_->all_nodes_[0]; + if (node == nullptr) { + MS_LOG(ERROR) << "Current node is null in lite graph."; + return false; + } + if (node->node_type_ != mindspore::schema::PrimitiveType_Custom) { + MS_LOG(ERROR) << "Current node type is:" << node->node_type_ << ", expected type is PrimitiveType_Custom."; + return false; + } + return true; +} + +bool NPUDelegate::CheckTensorSupported(const schema::Tensor *primitive) { + if (primitive == nullptr) { + MS_LOG(ERROR) << "primitive is nullptr, which type is Tensor."; + return false; + } + + int32_t data_type = primitive->dataType(); + if (data_type <= kTypeUnknown || data_type >= kMonadTypeEnd) { + MS_LOG(ERROR) << "invalid data type. " << data_type; + return false; + } + + if (primitive->dims() == nullptr) { + MS_LOG(ERROR) << "Dims of tensor is nullptr"; + return false; + } + + if (data_type == kObjectTypeTensorType) { + MS_LOG(ERROR) << "Not support TensorList."; + return false; + } + + if (primitive->data() == nullptr || primitive->data()->size() <= 0) { + MS_LOG(DEBUG) << "No valid data converted."; + return true; + } + return true; +} + +Status NPUDelegate::buildOfflineModel(DelegateModel *model) { + MS_LOG(INFO) << "enable npu offline model infer."; + + // Get Node Tensor + auto node = lite_graph_->all_nodes_[0]; + MS_CHECK_TRUE_RET(node != nullptr, kLiteError); + auto input_num = node->input_indices_.size(); + + // at least one input and one OM model buffer(as the last constant input) + MS_CHECK_TRUE_RET(input_num >= kNum2, kLiteError); + MS_CHECK_TRUE_RET(lite_graph_->all_tensors_.size() >= kNum2, kLiteError); + auto input_tensor = lite_graph_->all_tensors_[node->input_indices_[0]]; + MS_CHECK_TRUE_RET(input_tensor != nullptr, kLiteError); + auto model_tensor = lite_graph_->all_tensors_[node->input_indices_[input_num - 1]]; + MS_CHECK_TRUE_RET(model_tensor != nullptr, kLiteError); + MS_CHECK_TRUE_RET(model_tensor->data() != nullptr, kLiteError); + uint8_t *model_buffer = const_cast(model_tensor->data()->data()); + size_t model_size = model_tensor->data()->size(); + MS_LOG(DEBUG) << "Model input size:" << model->inputs().size() << ", output size:" << model->outputs().size() << "."; + // create offlineModelKernel + auto offline_model_kernel = + new (std::nothrow) OfflineModelKernel(model->inputs(), model->outputs(), model_buffer, model_size); + if (offline_model_kernel == nullptr) { + MS_LOG(ERROR) << "new OfflineModelKernel failed."; + return mindspore::kLiteError; + } + (void)model->Replace(model->BeginKernelIterator(), model->EndKernelIterator(), offline_model_kernel); + MS_LOG(INFO) << "Replace kernel in NPUDelegate success."; + return mindspore::kSuccess; +} + +Status NPUDelegate::buildOnlineModel(DelegateModel *model) { KernelIter from; KernelIter end; std::vector npu_ops; int graph_index = 0; + + MS_LOG(INFO) << "enable npu online model infer."; for (auto iter = model->BeginKernelIterator(); iter != model->EndKernelIterator(); iter++) { kernel::Kernel *kernel = *iter; auto npu_op = GetOP(kernel, model->GetPrimitive(kernel)); @@ -243,6 +433,14 @@ Status NPUDelegate::Build(DelegateModel *model) { return mindspore::kSuccess; } +Status NPUDelegate::Build(DelegateModel *model) { + if (IsCustomModel()) { + return buildOfflineModel(model); + } else { + return buildOnlineModel(model); + } +} + NPUOp *NPUDelegate::GetOP(kernel::Kernel *kernel, const schema::Primitive *primitive) { if (primitive == nullptr) { MS_LOG(ERROR) << "primitive is NULL!"; diff --git a/mindspore-lite/src/litert/delegate/npu/npu_delegate.h b/mindspore-lite/src/litert/delegate/npu/npu_delegate.h index 17b43a34..79038567 100644 --- a/mindspore-lite/src/litert/delegate/npu/npu_delegate.h +++ b/mindspore-lite/src/litert/delegate/npu/npu_delegate.h @@ -21,10 +21,12 @@ #include #include #include "include/api/delegate.h" +#include "include/model.h" #include "src/litert/delegate/npu/npu_manager.h" #include "src/litert/delegate/npu/pass/npu_pass_manager.h" #include "src/litert/delegate/npu/op/npu_op.h" #include "src/litert/inner_context.h" +#include "src/litert/delegate/npu/offline_model_kernel.h" namespace mindspore::lite { class NPUDelegate : public Delegate { @@ -39,6 +41,12 @@ class NPUDelegate : public Delegate { Status Init() override; Status Build(DelegateModel *model) override; + void ShallowCopyLiteGraph(const lite::LiteGraph &liteGraph); + void FreeLiteGraph(lite::LiteGraph **liteGraph); + bool IsCustomModel() const; + bool CheckTensorSupported(const schema::Tensor *primitive); + Status buildOfflineModel(DelegateModel *model); + Status buildOnlineModel(DelegateModel *model); protected: NPUOp *GetOP(kernel::Kernel *kernel, const schema::Primitive *primitive); @@ -48,6 +56,7 @@ class NPUDelegate : public Delegate { Status AddPasses(); + LiteGraph *lite_graph_ = nullptr; NPUManager *npu_manager_ = nullptr; NPUPassManager *pass_manager_ = nullptr; std::map op_func_lists_; diff --git a/mindspore-lite/src/litert/delegate/npu/offline_model_kernel.cc b/mindspore-lite/src/litert/delegate/npu/offline_model_kernel.cc new file mode 100644 index 00000000..b7bf1941 --- /dev/null +++ b/mindspore-lite/src/litert/delegate/npu/offline_model_kernel.cc @@ -0,0 +1,284 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include "src/litert/delegate/npu/offline_model_kernel.h" +#include "litert/cxx_api/tensor/tensor_impl.h" +#include "include/api/data_type.h" + +namespace { +constexpr int32_t kHiaiFrequencyType = 3; // HIGH +constexpr int32_t kHiaiDeviceType = 0; // NPU +const char kHiaiModelName[] = "Third_Party_Model"; +} // namespace + +namespace mindspore { +OfflineModelKernel::~OfflineModelKernel() { + model_manager_client_ = nullptr; + model_builder_ = nullptr; + for (auto t : offline_model_inputs_tensors_) { + t.reset(); + } + + for (auto t : offline_model_outputs_tensors_) { + t.reset(); + } +} + +int OfflineModelKernel::Prepare() { + model_manager_client_ = std::make_shared(); + if (model_manager_client_ == nullptr) { + MS_LOG(ERROR) << "Alloc AiModelMngerClient failed."; + return lite::RET_ERROR; + } + model_builder_ = std::make_shared(model_manager_client_); + if (model_builder_ == nullptr) { + MS_LOG(ERROR) << "Alloc AiModelBuilder failed."; + return lite::RET_ERROR; + } + auto client_ret = model_manager_client_->Init(nullptr); // sync mode + if (client_ret != lite::RET_OK) { + MS_LOG(ERROR) << "Init modelBuilder failed."; + return lite::RET_ERROR; + } + // Build Model + int build_ret = BuildHiaiModel(offline_model_buffer_, offline_model_size_); + if (build_ret != lite::RET_OK) { + MS_LOG(ERROR) << "Build offline model buffer failed."; + return lite::RET_ERROR; + } + MS_LOG(INFO) << "OfflineModelKernel build offline model buffer success."; + return lite::RET_OK; +} + +int OfflineModelKernel::BuildHiaiModel(uint8_t *model_data, size_t model_data_length) { + MS_LOG(INFO) << "OfflineModelKernel Build Function start."; + MS_CHECK_TRUE_RET(model_data_length != 0, kLiteError); + if (model_data == nullptr) { + MS_LOG(ERROR) << "Current model_data is invalid, please check model file."; + return lite::RET_ERROR; + } + void *offline_model_data = model_data; + if (kHiaiFrequencyType == -1 || kHiaiDeviceType == -1) { + MS_LOG(ERROR) << "Create model description failed. Current kHiaiFrequencyType is :" << kHiaiFrequencyType + << ", kHiaiDeviceType:" << kHiaiDeviceType << "."; + return lite::RET_ERROR; + } + std::vector> model_descs; + std::unordered_map, hiai::MemBuffer *> builder_buffer_map; + MS_LOG(INFO) << "Create model description: version [" << model_manager_client_->GetVersion() + << "], kHiaiFrequencyType is " << kHiaiFrequencyType << ", kHiaiDeviceType is " << kHiaiDeviceType + << "."; + std::shared_ptr model_desc = + std::make_shared(kHiaiModelName, kHiaiFrequencyType, 0, 1, kHiaiDeviceType); + if (model_desc == nullptr) { + MS_LOG(ERROR) << "Alloc AiModelDescription failed."; + return lite::RET_ERROR; + } + model_descs.push_back(model_desc); + auto model_buffer = model_builder_->InputMemBufferCreate(offline_model_data, model_data_length); + if (model_buffer == nullptr) { + MS_LOG(ERROR) << "Hiai Model Builder input memory buffer create failed, model data size:" << model_data_length; + return lite::RET_ERROR; + } + builder_buffer_map.insert({model_builder_, model_buffer}); + model_desc->SetModelBuffer(model_buffer->GetMemBufferData(), model_buffer->GetMemBufferSize()); + MS_LOG(INFO) << "Hiai Model Builder set offline model buffer success."; + + if (!model_descs.empty()) { + auto load_ret = model_manager_client_->Load(model_descs); + if (load_ret != hiai::AI_SUCCESS) { + for (auto it : builder_buffer_map) { + it.first->MemBufferDestroy(it.second); + } + builder_buffer_map.clear(); + MS_LOG(ERROR) << "Hiai Client load offline model failed and clear offline model buffer."; + return lite::RET_ERROR; + } + MS_LOG(INFO) << "Hiai Client load offline model success."; + model_descs.clear(); + } + // Init OfflineModel IO tensor + if (InitHiaiIOTensors() != lite::RET_OK) { + MS_LOG(ERROR) << "OfflineModelKernel InitHiaiIOTensors failed."; + return lite::RET_ERROR; + } + MS_LOG(INFO) << "OfflineModelKernel init offline model tensor and load model success."; + return lite::RET_OK; +} + +int OfflineModelKernel::InitHiaiIOTensors() { + std::vector input_dimension; + std::vector output_dimension; + if (model_manager_client_ == nullptr) { + MS_LOG(ERROR) << "Hiai Client is nullptr."; + return lite::RET_ERROR; + } + auto get_io_dim_ret = model_manager_client_->GetModelIOTensorDim(kHiaiModelName, input_dimension, output_dimension); + if (get_io_dim_ret != hiai::AI_SUCCESS) { + MS_LOG(ERROR) << "Get offline model input and output tensor dims failed." << get_io_dim_ret; + return lite::RET_ERROR; + } + MS_LOG(DEBUG) << "Input NCHW :" << input_dimension[0].GetNumber() << " " << input_dimension[0].GetChannel() << " " + << input_dimension[0].GetHeight() << " " << input_dimension[0].GetWidth(); + MS_LOG(DEBUG) << "Output NCHW :" << output_dimension[0].GetNumber() << " " << output_dimension[0].GetChannel() << " " + << output_dimension[0].GetHeight() << " " << output_dimension[0].GetWidth(); + + MS_LOG(DEBUG) << "Init input ai_tensors."; + auto in_tensor_ret = InitHiaiTensorWithMSTensor(input_dimension, inputs_, offline_model_inputs_tensors_); + if (in_tensor_ret != lite::RET_OK) { + MS_LOG(ERROR) << "Update offline model input tensor vector failed. " << in_tensor_ret; + return lite::RET_ERROR; + } + MS_LOG(DEBUG) << "Init output ai_tensors."; + auto out_tensor_ret = InitHiaiTensorWithMSTensor(output_dimension, outputs_, offline_model_outputs_tensors_); + if (out_tensor_ret != lite::RET_OK) { + MS_LOG(ERROR) << "Update offline model output tensor vector failed. " << out_tensor_ret; + return lite::RET_ERROR; + } + return lite::RET_OK; +} + +int OfflineModelKernel::InitHiaiTensorWithMSTensor( + const std::vector &dimension, const std::vector &ms_tensors, + std::vector> &offline_model_tensors) { + if (dimension.empty()) { + MS_LOG(ERROR) << "Offline model tensor dimension is empty."; + return lite::RET_ERROR; + } + MS_LOG(DEBUG) << " dimension size:" << dimension.size(); + for (int i = 0; i < dimension.size(); i++) { + std::shared_ptr ai_tensor = std::make_shared(); + if (ai_tensor == nullptr) { + MS_LOG(ERROR) << "Alloc AiTensor failed."; + return lite::RET_ERROR; + } + if (ai_tensor->Init(&dimension[i], lite::MSDataTypeToHIAIDataType(ms_tensors[i].DataType())) != hiai::AI_SUCCESS) { + MS_LOG(ERROR) << "AiTensor init failed."; + return lite::RET_ERROR; + } + offline_model_tensors.push_back(ai_tensor); + } + return lite::RET_OK; +} + +int OfflineModelKernel::Execute() { + // Get MS INPUT Tensors + MS_LOG(INFO) << "Before OfflineModelKernel execute, MSTensorData need to be copy to AiTensorData. Inputs_ size: " + << inputs_.size() << ", outputs_ size: " << outputs_.size(); + auto ms_hiai_ret = CopyMSTensorsDataToHiaiTensorsData(); + if (ms_hiai_ret != lite::RET_OK) { + MS_LOG(ERROR) << "CopyMSTensorsDataToHiaiTensorsData failed."; + return lite::RET_ERROR; + } + auto execute_ret = ExecuteHiaiModel(); + if (execute_ret != hiai::AI_SUCCESS) { + MS_LOG(ERROR) << "OfflineModelKernel ExecuteHiaiModel failed."; + return lite::RET_ERROR; + } + MS_LOG(INFO) << "OfflineModelKernel ExecuteHiaiModel success."; + auto hiai_ms_ret = CopyHiaiTensorsDataToMSTensorsData(); + if (hiai_ms_ret != lite::RET_OK) { + MS_LOG(ERROR) << "ConvertAiTensorToMSTensor failed."; + return lite::RET_ERROR; + } + MS_LOG(INFO) << "OfflineModelKernel ExecuteHiaiModel done, and CopyHiaiTensorsDataToMSTensorsData success."; + return lite::RET_OK; +} + +int OfflineModelKernel::ExecuteHiaiModel() { + hiai::AiContext context; + std::string key = "model_name"; + std::string value = kHiaiModelName; + context.AddPara(key, value); + int32_t stamp; + if (model_manager_client_ == nullptr) { + MS_LOG(ERROR) << "Hiai client is nullptr."; + return lite::RET_ERROR; + } + int ret = + model_manager_client_->Process(context, offline_model_inputs_tensors_, offline_model_outputs_tensors_, 3000, stamp); + if (ret != lite::RET_OK) { + MS_LOG(ERROR) << "OfflineModelKernel Predict failed by Hiai client using Process function."; + return lite::RET_ERROR; + } + MS_LOG(INFO) << "OfflineModelKernel Predict model success, ret:" << ret << " stamp:" << stamp; + return lite::RET_OK; +} + +int OfflineModelKernel::CopyMSTensorsDataToHiaiTensorsData() { + MS_LOG(INFO) << "ConvertMSTensorToAiTensor ms_input tensor num:" << inputs_.size() + << " ai_input tensor num:" << offline_model_inputs_tensors_.size(); + if (offline_model_inputs_tensors_.size() != inputs_.size()) { + MS_LOG(ERROR) << "ms_input and ai_input have different size. ms_input tensor num:" << inputs_.size() + << " ai_input tensor num:" << offline_model_outputs_tensors_.size(); + return lite::RET_ERROR; + } + for (size_t i = 0; i < offline_model_inputs_tensors_.size(); i++) { + if (offline_model_inputs_tensors_.at(i)->GetSize() != inputs_.at(i).DataSize()) { + MS_LOG(ERROR) << "ms_input and ai_input have different dataSize. ms_input tensor dataSize " + << inputs_.at(i).DataSize() + << " ai_input tensor num:" << offline_model_inputs_tensors_.at(i)->GetSize(); + return lite::RET_ERROR; + } + auto src_buffer = inputs_.at(i).MutableData(); + if (src_buffer == nullptr) { + MS_LOG(ERROR) << "For " << kHiaiModelName << ", the ms_input at [" << i + << "], tensor name:" << inputs_.at(i).Name() << " buffer is null."; + return lite::RET_ERROR; + } + auto dest_buffer = offline_model_inputs_tensors_.at(i)->GetBuffer(); + if (dest_buffer == nullptr) { + MS_LOG(ERROR) << "For " << kHiaiModelName << ", the ai_input at [" << i << "], buffer is null."; + return lite::RET_ERROR; + } + std::memcpy(dest_buffer, src_buffer, inputs_.at(i).DataSize()); + } + MS_LOG(INFO) << "ConvertMSTensorToAiTensor success."; + return lite::RET_OK; +} + +int OfflineModelKernel::CopyHiaiTensorsDataToMSTensorsData() { + MS_LOG(INFO) << "ConvertAiTensorToMSTensor ms_output tensor num:" << outputs_.size() + << " ai_output tensor num:" << offline_model_outputs_tensors_.size(); + if (offline_model_outputs_tensors_.size() != outputs_.size()) { + MS_LOG(ERROR) << "ms_output and ai_output have different size. ms_output tensor num:" << outputs_.size() + << " ai_output tensor num:" << offline_model_outputs_tensors_.size(); + return lite::RET_ERROR; + } + for (size_t i = 0; i < offline_model_outputs_tensors_.size(); i++) { + if (offline_model_outputs_tensors_.at(i)->GetSize() != outputs_.at(i).DataSize()) { + MS_LOG(ERROR) << "ms_output and ai_output have different dataSize. ms_output tensor dataSize " + << outputs_.at(i).DataSize() + << " ai_output tensor num:" << offline_model_outputs_tensors_.at(i)->GetSize(); + return lite::RET_ERROR; + } + auto src_buffer = offline_model_outputs_tensors_.at(i)->GetBuffer(); + if (src_buffer == nullptr) { + MS_LOG(ERROR) << "For " << kHiaiModelName << ", the ai_output at [" << i << "], buffer is null."; + return lite::RET_ERROR; + } + auto dest_buffer = outputs_.at(i).MutableData(); + if (dest_buffer == nullptr) { + MS_LOG(ERROR) << "For " << kHiaiModelName << ", the ms_output at [" << i + << "], tensor name:" << outputs_.at(i).Name() << " buffer is null."; + return lite::RET_ERROR; + } + std::memcpy(dest_buffer, src_buffer, offline_model_outputs_tensors_.at(i)->GetSize()); + } + MS_LOG(INFO) << "ConvertAiTensorToMSTensor success."; + return lite::RET_OK; +} +} // namespace mindspore diff --git a/mindspore-lite/src/litert/delegate/npu/offline_model_kernel.h b/mindspore-lite/src/litert/delegate/npu/offline_model_kernel.h new file mode 100644 index 00000000..d108cb8a --- /dev/null +++ b/mindspore-lite/src/litert/delegate/npu/offline_model_kernel.h @@ -0,0 +1,64 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef LITE_OFFLINE_MODEL_KERNEL_H +#define LITE_OFFLINE_MODEL_KERNEL_H +#include +#include +#include +#include +#include "include/api/kernel.h" +#include "src/common/log_adapter.h" +#include "src/litert/inner_context.h" +#include "include/errorcode.h" +#include "include/HiAiModelManagerService.h" +#include "src/litert/delegate/delegate_utils.h" + +namespace mindspore { +class OfflineModelKernel : public kernel::Kernel { + /** + * We decide to make the whole model into one kernel. + * */ + public: + OfflineModelKernel(const std::vector &inputs, const std::vector &outputs, + uint8_t *offline_model_buffer, size_t offline_model_size) + : kernel::Kernel(inputs, outputs, nullptr, nullptr), + offline_model_buffer_(offline_model_buffer), + offline_model_size_(offline_model_size) {} + int Prepare() override; + int ReSize() override { return kSuccess; } + int Execute() override; + ~OfflineModelKernel(); + + private: + int BuildHiaiModel(uint8_t *modelData, size_t modelDataLength); + int InitHiaiIOTensors(); + int InitHiaiTensorWithMSTensor(const std::vector &input_dimension, + const std::vector &ms_tensors, + std::vector> &offline_model_tensors); + + int ExecuteHiaiModel(); + int CopyMSTensorsDataToHiaiTensorsData(); + int CopyHiaiTensorsDataToMSTensorsData(); + + std::shared_ptr model_manager_client_ = nullptr; + std::shared_ptr model_builder_ = nullptr; + uint8_t *offline_model_buffer_; + size_t offline_model_size_; + std::vector> offline_model_inputs_tensors_; + std::vector> offline_model_outputs_tensors_; +}; +} // namespace mindspore +#endif // LITE_OFFLINE_MODEL_KERNEL_H diff --git a/mindspore-lite/src/litert/infer_manager.cc b/mindspore-lite/src/litert/infer_manager.cc index 6d7e7c20..5aceef86 100644 --- a/mindspore-lite/src/litert/infer_manager.cc +++ b/mindspore-lite/src/litert/infer_manager.cc @@ -189,6 +189,10 @@ int KernelInferShape(const std::vector &inputs, const std::vecto MS_LOG(ERROR) << "No input!"; return RET_ERROR; } + if (parameter->type_ == static_cast(PrimType_Inner_ThirdPartyModel)) { + MS_LOG(INFO) << "No need infer shape for PrimType_Inner_ThirdPartyModel."; + return RET_OK; + } std::vector in_tensors; std::vector out_tensors; int ret = GenerateInTensorC(inputs, &in_tensors, allocator); diff --git a/mindspore-lite/src/litert/kernel/cpu/base/custom_base.cc b/mindspore-lite/src/litert/kernel/cpu/base/custom_base.cc new file mode 100644 index 00000000..d5dcb57b --- /dev/null +++ b/mindspore-lite/src/litert/kernel/cpu/base/custom_base.cc @@ -0,0 +1,42 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/litert/kernel/cpu/base/custom_base.h" +#include +#include +#include +#include "src/litert/kernel_registry.h" +#include "nnacl_c/op_base.h" + +using mindspore::kernel::KERNEL_ARCH; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_Custom; + +namespace mindspore::kernel { +int CustomBaseCPUKernel::Prepare() { return RET_OK; } + +int CustomBaseCPUKernel::ReSize() { return RET_OK; } + +int CustomBaseCPUKernel::Run() { return RET_OK; } + +REG_KERNEL(kCPU, kNumberTypeInt32, PrimType_Inner_ThirdPartyModel, LiteKernelCreator) +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimType_Inner_ThirdPartyModel, LiteKernelCreator) +REG_KERNEL(kCPU, kNumberTypeInt8, PrimType_Inner_ThirdPartyModel, LiteKernelCreator) +REG_KERNEL(kCPU, kNumberTypeUInt8, PrimType_Inner_ThirdPartyModel, LiteKernelCreator) +REG_KERNEL(kCPU, kNumberTypeBool, PrimType_Inner_ThirdPartyModel, LiteKernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore-lite/src/litert/kernel/cpu/base/custom_base.h b/mindspore-lite/src/litert/kernel/cpu/base/custom_base.h new file mode 100644 index 00000000..25262321 --- /dev/null +++ b/mindspore-lite/src/litert/kernel/cpu/base/custom_base.h @@ -0,0 +1,43 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_LITERT_KERNEL_CPU_BASE_CUSTOM_BASE_H_ +#define MINDSPORE_LITE_SRC_LITERT_KERNEL_CPU_BASE_CUSTOM_BASE_H_ + +#include +#include "src/litert/lite_kernel.h" +#include "nnacl_c/custom_parameter.h" + +namespace mindspore::kernel { +class CustomBaseCPUKernel : public LiteKernel { + public: + CustomBaseCPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::InnerContext *ctx) + : LiteKernel(parameter, inputs, outputs, ctx) { + custom_param_ = reinterpret_cast(op_parameter_); + } + ~CustomBaseCPUKernel() override = default; + + int Prepare() override; + int ReSize() override; + int Run() override; + + private: + CustomParameter *custom_param_ = nullptr; +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_LITERT_KERNEL_CPU_BASE_CUSTOM_BASE_H_ diff --git a/mindspore-lite/src/litert/kernel/cpu/nnacl_c/op_base.h b/mindspore-lite/src/litert/kernel/cpu/nnacl_c/op_base.h index bbe93692..f4482a5f 100644 --- a/mindspore-lite/src/litert/kernel/cpu/nnacl_c/op_base.h +++ b/mindspore-lite/src/litert/kernel/cpu/nnacl_c/op_base.h @@ -571,6 +571,7 @@ enum PrimType { PrimType_Inner_CustomIsInf = 10016, PrimType_Inner_Conv3D = 10017, PrimType_Inner_GridSampler = 10018, + PrimType_Inner_ThirdPartyModel = 10019, PrimType_InnerOpMax, PrimType_InnerOpMin = PrimType_Inner_ToFormat }; diff --git a/mindspore-lite/src/litert/kernel_registry.cc b/mindspore-lite/src/litert/kernel_registry.cc index e0dcb63e..2d6230a1 100644 --- a/mindspore-lite/src/litert/kernel_registry.cc +++ b/mindspore-lite/src/litert/kernel_registry.cc @@ -178,6 +178,7 @@ int KernelRegistry::GetCustomKernel(const std::vector &in_tensors, con registry::KernelDesc desc{static_cast(key.data_type), key.type, key.kernel_arch, key.provider}; auto creator = registry::RegisterKernel::GetCreator(static_cast(primitive), &desc); if (creator == nullptr) { + MS_LOG(INFO) << "Not support to get Creator."; return RET_NOT_SUPPORT; } @@ -205,6 +206,7 @@ int KernelRegistry::GetCustomKernel(const std::vector &in_tensors, con } } #endif + MS_LOG(ERROR) << "Common base kernel registry failed."; return RET_ERROR; } @@ -257,6 +259,7 @@ int KernelRegistry::GetKernelExec(const std::vector &in_tensors, const if (ret == RET_OK) { (*kernel)->set_context(ctx); } + MS_LOG(INFO) << "Get kernel " << (ret == RET_OK ? "succeed." : "failed."); return ret; } #endif @@ -271,10 +274,12 @@ int KernelRegistry::GetKernelExec(const std::vector &in_tensors, const kernel_exec->set_desc(modify_key); kernel_exec->set_context(ctx); *kernel = kernel_exec; + MS_LOG(INFO) << "Get Lite Kernel succeed for type:" << PrimitiveCurVersionTypeName(key.type) + << " by type index:" << key.type << "."; return RET_OK; } } - MS_LOG(WARNING) << "common cpu kernel registry failed"; + MS_LOG(ERROR) << "common cpu kernel registry for lite_kernel failed."; return RET_ERROR; } } // namespace mindspore::lite diff --git a/mindspore-lite/src/litert/lite_session.cc b/mindspore-lite/src/litert/lite_session.cc index f6c834d5..4b4da451 100644 --- a/mindspore-lite/src/litert/lite_session.cc +++ b/mindspore-lite/src/litert/lite_session.cc @@ -645,6 +645,7 @@ int LiteSession::CompileGraph(Model *model) { this->context_->set_infer_checker(InferCheckerAll); } is_running_.store(false); + MS_LOG(INFO) << "CompileGraph for current model success."; return RET_OK; } @@ -782,6 +783,7 @@ int LiteSession::PrepareKernels(const Model *model) { MS_LOG(ERROR) << "Prepare kernel " << kernel->name() << " failed: " << ret; return ret; } + MS_LOG(INFO) << "Set Allocator For Delegate Kernels success."; } if (!is_train_session_ && kernel->desc().arch != kernel::kDelegate && kernel->desc().arch != kernel::kGPU) { @@ -1094,6 +1096,7 @@ int LiteSession::InitDelegate() { } if (ret != RET_OK) { + MS_LOG(ERROR) << "Create Delegate failed. ret info: " << ret; return ret; } if (delegate_ != nullptr) { diff --git a/mindspore-lite/src/litert/scheduler.cc b/mindspore-lite/src/litert/scheduler.cc index f5f4a709..117945e2 100644 --- a/mindspore-lite/src/litert/scheduler.cc +++ b/mindspore-lite/src/litert/scheduler.cc @@ -58,6 +58,9 @@ #if defined(MSLITE_ENABLE_CLOUD_INFERENCE) && defined(ENABLE_MINDRT) #include "thread/parallel_thread_pool_manager.h" #endif +#ifdef SUPPORT_NPU +#include "src/litert/delegate/npu/npu_delegate.h" +#endif using AbstractBaseModel = mindspore::infer::AbstractBaseModel; @@ -255,6 +258,13 @@ int Scheduler::CheckCpuValid(const std::vector *dst_kernel if (context_->IsDeviceTypeEnabled(DT_CPU)) { return RET_OK; } + // Custom model + if (dst_kernels->size() == 1 && (*dst_kernels)[0]->name() == "Custom" && + (*dst_kernels)[0]->desc().arch == kernel::KERNEL_ARCH::kDelegate) { + MS_LOG(INFO) << "kernel type is " << (*dst_kernels)[0]->name() << " and device type is " + << (*dst_kernels)[0]->desc().arch << "."; + return RET_OK; + } for (auto kernel : *dst_kernels) { if (kernel->desc().arch == kernel::KERNEL_ARCH::kCPU) { MS_LOG(ERROR) << "kernel: " << kernel->name() << " only support in CPU."; @@ -406,6 +416,13 @@ int Scheduler::Schedule(std::vector *dst_kernels) { return ret; } + for (auto kernel : *dst_kernels) { + MS_LOG(DEBUG) << "kernel: [" << kernel->name() << "] " + << "TypeId(" << kernel->desc().data_type << "); " + << "OpType(" << PrimitiveCurVersionTypeName(kernel->desc().type) << "); " + << "format(" << kernel->desc().format << "); " + << "arch(" << kernel->desc().arch << ")"; + } ret = CheckCpuValid(dst_kernels); if (ret != RET_OK) { MS_LOG(ERROR) << "kernels invalid in set devices."; @@ -501,6 +518,11 @@ int Scheduler::ReplaceDelegateKernels(std::vector *dst_ker MS_LOG(ERROR) << "New delegate model failed."; return RET_NULL_PTR; } + +#ifdef SUPPORT_NPU + auto delegate = static_cast(delegate_.get()); + delegate->ShallowCopyLiteGraph(this->src_model_->graph_); +#endif auto ret = delegate_->Build(model); if (ret != mindspore::kSuccess) { delete model; @@ -1001,8 +1023,8 @@ int Scheduler::FindCpuKernel(const std::vector &in_tensors, const std: MS_CHECK_TRUE_MSG(op_parameter != nullptr, RET_ERROR, "op parameter is nullptr."); auto op_type = op_parameter->type_; if (!KernelRegistry::GetInstance()->SupportKernel(desc)) { - MS_LOG(INFO) << "unsupported op_type: " << PrimitiveCurVersionTypeName(op_type) - << ", data_type: " << desc.data_type; + MS_LOG(INFO) << "Unsupported op_type index: " << op_type << ", op_type: " << PrimitiveCurVersionTypeName(op_type) + << ", data_type: " << desc.data_type << "."; return RET_NOT_SUPPORT; } kernel::KernelKey cpu_desc = desc; @@ -1041,8 +1063,8 @@ int Scheduler::FindCpuKernel(const std::vector &in_tensors, const std: ret = KernelRegistry::GetInstance()->GetKernelExec(in_tensors, out_tensors, context_, ms_context_, cpu_desc, op_parameter, kernel); if (ret == RET_OK) { - MS_LOG(DEBUG) << "Get TypeId(expect = " << kernel_data_type << ", real = " << cpu_desc.data_type - << ") op success: " << PrimitiveCurVersionTypeName(op_type); + MS_LOG(INFO) << "Get TypeId(expect = " << kernel_data_type << ", real = " << cpu_desc.data_type + << ") op success: " << PrimitiveCurVersionTypeName(op_type); if (is_train_session_) { ret = (*kernel)->Prepare(); RestoreTensorData(&restored_origin_tensors); @@ -1288,8 +1310,9 @@ kernel::KernelExec *Scheduler::FindBackendKernel(const std::vector &in if (status == RET_OK) { return kernel; } else { - MS_LOG(DEBUG) << "Get fp16 op failed, scheduler to cpu: " << PrimitiveCurVersionTypeName(desc.type) << " " - << node->name_; + MS_LOG(INFO) << "Get op failed, scheduler to CPU by node: " << node->name_ << ", prefer_data_type is " + << (prefer_data_type == kNumberTypeFloat16 ? "fp16" : "unknown") << ", desc.type index:" << desc.type + << ", desc.type:" << PrimitiveCurVersionTypeName(desc.type) << "."; if (status == RET_ERROR) { op_parameters_.erase(node->output_indices_.at(0)); auto ret = InferNodeShape(node); -- Gitee