diff --git a/cmake/package_lite.cmake b/cmake/package_lite.cmake index 153c6272bbb9605f9e1163c5555141c947970958..bfb01a280ad7a874dcc6e54f73d42e74546fa9b1 100644 --- a/cmake/package_lite.cmake +++ b/cmake/package_lite.cmake @@ -353,6 +353,10 @@ if(PLATFORM_ARM64) COMPONENT ${RUNTIME_COMPONENT_NAME}) install(FILES ${DDK_LIB_PATH}/libhiai_ir_build.so DESTINATION ${RUNTIME_DIR}/third_party/hiai_ddk/lib COMPONENT ${RUNTIME_COMPONENT_NAME}) + if(EXISTS "${DDK_LIB_PATH}/libhiai_model_compatible.so") + install(FILES ${DDK_LIB_PATH}/libhiai_model_compatible.so + DESTINATION ${RUNTIME_DIR}/third_party/hiai_ddk/lib COMPONENT ${RUNTIME_COMPONENT_NAME}) + endif() if(EXISTS "${DDK_LIB_PATH}/libhiai_hcl_model_runtime.so") install(FILES ${DDK_LIB_PATH}/libhiai_hcl_model_runtime.so DESTINATION ${RUNTIME_DIR}/third_party/hiai_ddk/lib COMPONENT ${RUNTIME_COMPONENT_NAME}) @@ -595,6 +599,10 @@ if(PLATFORM_ARM64) COMPONENT ${RUNTIME_COMPONENT_NAME}) install(FILES ${DDK_LIB_PATH}/libhiai_ir_build.so DESTINATION ${TEST_CASE_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) + if(EXISTS "${DDK_LIB_PATH}/libhiai_model_compatible.so") + install(FILES ${DDK_LIB_PATH}/libhiai_model_compatible.so + DESTINATION ${TEST_CASE_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) + endif() if(EXISTS "${DDK_LIB_PATH}/libhiai_hcl_model_runtime.so") install(FILES ${DDK_LIB_PATH}/libhiai_hcl_model_runtime.so DESTINATION ${TEST_CASE_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) @@ -609,6 +617,10 @@ elseif(PLATFORM_ARM32) COMPONENT ${RUNTIME_COMPONENT_NAME}) install(FILES ${DDK_LIB_PATH}/libhiai_ir_build.so DESTINATION ${RUNTIME_DIR}/third_party/hiai_ddk/lib COMPONENT ${RUNTIME_COMPONENT_NAME}) + if(EXISTS "${DDK_LIB_PATH}/libhiai_model_compatible.so") + install(FILES ${DDK_LIB_PATH}/libhiai_model_compatible.so + DESTINATION ${RUNTIME_DIR}/third_party/hiai_ddk/lib COMPONENT ${RUNTIME_COMPONENT_NAME}) + endif() if(EXISTS "${DDK_LIB_PATH}/libhiai_hcl_model_runtime.so") install(FILES ${DDK_LIB_PATH}/libhiai_hcl_model_runtime.so DESTINATION ${RUNTIME_DIR}/third_party/hiai_ddk/lib COMPONENT ${RUNTIME_COMPONENT_NAME}) diff --git a/include/c_api/types_c.h b/include/c_api/types_c.h index 708cb84e5380eb2514a6d40a81138924eb7eeb9e..50d6787054b53ea460fe5a0847fd1e45f30185b4 100644 --- a/include/c_api/types_c.h +++ b/include/c_api/types_c.h @@ -39,6 +39,7 @@ typedef enum MSDeviceType { kMSDeviceTypeGPU, kMSDeviceTypeKirinNPU, // add new type here + kMSDeviceTypeHIAI, kMSDeviceTypeInvalid = 100, } MSDeviceType; diff --git a/include/cxx_api/context.h b/include/cxx_api/context.h index 425fc87c66a0f37c1f0f47579b17dab7b10123da..c73d9fcafc018e9eff053cc7dd5d72140e4f74b3 100644 --- a/include/cxx_api/context.h +++ b/include/cxx_api/context.h @@ -39,6 +39,7 @@ enum DeviceType { kDSP, kCustomDevice, kAllDevice, + kHIAI, // add new type here kInvalidDeviceType = 100, }; @@ -385,6 +386,67 @@ void GPUDeviceInfo::SetPrecisionMode(const std::string &precision_mode) { } std::string GPUDeviceInfo::GetPrecisionMode() const { return CharToString(GetPrecisionModeChar()); } +/// \brief Derived from DeviceInfoContext, The configuration of the model running on the NPU. This option is only valid +/// for MindSpore Lite. +class MS_API HIAIDeviceInfo : public DeviceInfoContext { + public: + /// \brief Get the type of this DeviceInfoContext. + /// + /// \return Type of this DeviceInfoContext. + enum DeviceType GetDeviceType() const override { return DeviceType::kHIAI; }; + + /// \brief Set device id. + /// + /// \param[in] device_id The device id. + void SetDeviceID(uint32_t device_id); + + /// \brief Get the device id. + /// + /// \return The device id. + uint32_t GetDeviceID() const; + + /// \brief Set performance mode. + /// + /// \param[in] performance_mode The performance mode. + void SetPerformanceMode(int performance_mode); + + /// \brief Get performance mode. + /// + /// \return The priority. + int GetPerformanceMode() const; + + /// \brief Set priority. + /// + /// \param[in] priority The priority. + void SetPriority(int priority); + + /// \brief Get priority. + /// + /// \return The priority. + int GetPriority() const; + + /// \brief Set enables to perform the float16 inference + /// + /// \param[in] is_fp16 Enable float16 inference or not. + void SetEnableFP16(bool is_fp16); + + /// \brief Get enables to perform the float16 inference + /// + /// \return Whether enable float16 inference. + bool GetEnableFP16() const; + + /// \brief Set the high performance frequency. + /// + /// \param[in] frequency Can be set to 1 (low power consumption), 2 (balanced), 3 (high performance), + /// 4 (extreme performance), default as 3. + void SetFrequency(int frequency); + + /// \brief Get the high frequency. + /// + /// \return Hiai NNRT frequency + int GetFrequency() const; +}; + /// \brief Derived from DeviceInfoContext, The configuration of the model running on the Ascend. This option is /// invalid for MindSpore Lite. class MS_API AscendDeviceInfo : public DeviceInfoContext { diff --git a/mindspore-lite/include/lite_types.h b/mindspore-lite/include/lite_types.h index cd8cc19b36ac66ae553037b87885db731812bc6f..3c57dfa4eb99022d592981009a07cdce1e5606dc 100644 --- a/mindspore-lite/include/lite_types.h +++ b/mindspore-lite/include/lite_types.h @@ -43,6 +43,7 @@ typedef enum { DT_ASCEND, /**< ASCEND device type */ DT_DSP, /**< DSP device type */ DT_CUSTOM, /**< EXTEND device type */ + DT_HIAI, /**< HIAI device type */ DT_END /**< NO device type */ } DeviceType; diff --git a/mindspore-lite/include/model.h b/mindspore-lite/include/model.h index 9010d6b8b3d5bc3e9f732691dcae8027e50201eb..98c8116230e4e7c0cb8b20c268ca659ababc9c45 100644 --- a/mindspore-lite/include/model.h +++ b/mindspore-lite/include/model.h @@ -42,6 +42,18 @@ struct MS_API LiteGraph { std::vector output_indices_; int quant_type_; int device_type_ = -1; + Node() = default; + + Node(const Node &new_node) + : name_(new_node.name_), + op_type_(new_node.op_type_), + node_type_(new_node.node_type_), + primitive_(new_node.primitive_), + base_operator_(new_node.base_operator_), + input_indices_(new_node.input_indices_), + output_indices_(new_node.output_indices_), + quant_type_(new_node.quant_type_), + device_type_(new_node.device_type_) {} }; struct SubGraph { std::string name_; @@ -49,6 +61,14 @@ struct MS_API LiteGraph { std::vector output_indices_; std::vector node_indices_; std::vector tensor_indices_; + SubGraph() = default; + + SubGraph(const SubGraph &new_subgraph) + : name_(new_subgraph.name_), + input_indices_(new_subgraph.input_indices_), + output_indices_(new_subgraph.output_indices_), + node_indices_(new_subgraph.node_indices_), + tensor_indices_(new_subgraph.tensor_indices_) {} }; std::string name_; std::string version_; diff --git a/mindspore-lite/src/CMakeLists.txt b/mindspore-lite/src/CMakeLists.txt index 8336e6e78d982eed55658efca6c10180ac2fc91f..899216f3c8f25af10677e9409fcfd6789d5dd9d5 100644 --- a/mindspore-lite/src/CMakeLists.txt +++ b/mindspore-lite/src/CMakeLists.txt @@ -471,6 +471,9 @@ if(SUPPORT_NPU) add_subdirectory(litert/delegate/npu) target_link_libraries(mindspore-lite npu_kernel_mid) target_link_libraries(mindspore-lite_static npu_kernel_mid) + add_subdirectory(litert/delegate/hiai) + target_link_libraries(mindspore-lite hiai_kernel_mid) + target_link_libraries(mindspore-lite_static hiai_kernel_mid) endif() if(PLATFORM_ARM32 OR PLATFORM_ARM64 AND NOT TARGET_HIMIX diff --git a/mindspore-lite/src/common/context_util.cc b/mindspore-lite/src/common/context_util.cc index 4a2e89abbae72c8a35459b0f233f7b3e7225d0d5..c50ccabd616df7474b71dc324cebb7ab71fc9104 100644 --- a/mindspore-lite/src/common/context_util.cc +++ b/mindspore-lite/src/common/context_util.cc @@ -73,6 +73,17 @@ std::shared_ptr NPUDeviceInfoFromNPUDeviceContext return npu_info; } +std::shared_ptr HIAIDeviceInfoFromHIAIDeviceContext( + const lite::DeviceContext &hiai_context) { + if (hiai_context.device_type_ != DT_HIAI) { + MS_LOG(ERROR) << "Function input parameter is not HIAI context."; + return nullptr; + } + auto hiai_info = std::make_shared(); + MS_CHECK_TRUE_RET(hiai_info != nullptr, nullptr); + return hiai_info; +} + std::vector GetBatchSize(const std::string &batch_size) { std::vector res; std::vector batch_size_vec = StrSplit(batch_size, ","); @@ -152,9 +163,10 @@ mindspore::Context *MSContextFromContext(const std::shared_ptr &co auto &device_infos = ms_context->MutableDeviceInfo(); std::map(const lite::DeviceContext &)>> transfer_funcs = { - {DT_CPU, CPUDeviceInfoFromCPUDeviceContext}, {DT_GPU, GPUDeviceInfoFromGPUDeviceContext}, - {DT_NPU, NPUDeviceInfoFromNPUDeviceContext}, {DT_ASCEND, AscendDeviceInfoFromAscendDeviceContext}, - {DT_DSP, DSPDeviceInfoFromDSPDeviceContext}, {DT_CUSTOM, CustomDeviceInfoFromCustomDeviceContext}}; + {DT_CPU, CPUDeviceInfoFromCPUDeviceContext}, {DT_GPU, GPUDeviceInfoFromGPUDeviceContext}, + {DT_NPU, NPUDeviceInfoFromNPUDeviceContext}, {DT_ASCEND, AscendDeviceInfoFromAscendDeviceContext}, + {DT_DSP, DSPDeviceInfoFromDSPDeviceContext}, {DT_CUSTOM, CustomDeviceInfoFromCustomDeviceContext}, + {DT_HIAI, HIAIDeviceInfoFromHIAIDeviceContext}}; for (auto &device_context : context->device_list_) { auto device_type = device_context.device_type_; if (transfer_funcs.find(device_type) == transfer_funcs.end()) { diff --git a/mindspore-lite/src/common/ops/populate/custom_populate.cc b/mindspore-lite/src/common/ops/populate/custom_populate.cc index 84ea9ace12eeef3774c0db97b260d5d176817916..78397303955b8bb2bfd153d4f61fbcc4fbbe892f 100644 --- a/mindspore-lite/src/common/ops/populate/custom_populate.cc +++ b/mindspore-lite/src/common/ops/populate/custom_populate.cc @@ -24,6 +24,7 @@ #include "nnacl_c/scatter_nd_parameter.h" #include "nnacl_c/conv3d_parameter.h" #include "nnacl_c/grid_sampler_parameter.h" +#include "nnacl_c/op_base.h" using mindspore::schema::PrimitiveType_Custom; @@ -172,6 +173,19 @@ OpParameter *CreateGridSamplerParameter(const schema::Custom *value) { return reinterpret_cast(param); } +OpParameter *CreateThirdPartyModelParameter(const void *prim) { + auto *param = static_cast(malloc(sizeof(CustomParameter))); + if (param == nullptr) { + MS_LOG(ERROR) << "Malloc ThirdPartyModel Parameter failed."; + return nullptr; + } + memset(param, 0, sizeof(CustomParameter)); + param->op_parameter_.type_ = PrimType_Inner_ThirdPartyModel; + // Just use the attr_data pointer to save the prim directly, the inner value is parsed as necessary. + param->attr_data[0] = static_cast(const_cast(prim)); + return reinterpret_cast(param); +} + OpParameter *PopulateCustomParameter(const void *prim) { MS_CHECK_TRUE_RET(prim != nullptr, nullptr); auto primitive = static_cast(prim); @@ -221,6 +235,8 @@ OpParameter *PopulateCustomParameter(const void *prim) { return CreateCustomConv3DParameter(value); } else if (type == "GridSampler") { return CreateGridSamplerParameter(value); + } else if (type == "ThirdPartyModel") { + return CreateThirdPartyModelParameter(prim); } else { MS_LOG(WARNING) << "Unsupported custom type: " << type; } diff --git a/mindspore-lite/src/common/prim_util.cc b/mindspore-lite/src/common/prim_util.cc index d640815d9f52d94d4087f3d7b415b8c1d63e01bb..ce1eb8677a04f8088dfaa332d714c391c3e1de9a 100644 --- a/mindspore-lite/src/common/prim_util.cc +++ b/mindspore-lite/src/common/prim_util.cc @@ -36,7 +36,8 @@ static const char *const kInnerOpNames[C20NUM] = {"Inner_ToFormat", "I "Inner_CustomGru", "Inner_CastGatherReduceFusion", "Inner_ReduceConcatFusion", "Inner_AclCustomOp", "Inner_CustomMaskedFill", "Inner_CustomTensorScatterMax", - "Inner_CustomIsInf"}; + "Inner_CustomIsInf", "Inner_Conv3D", + "Inner_GridSampler", "Inner_ThirdPartyModel"}; int GetPrimitiveType(const void *primitive, int schema_version) { if (primitive == nullptr) { return -1; @@ -56,6 +57,8 @@ const char *PrimitiveCurVersionTypeName(int type) { return schema::EnumNamePrimitiveType(static_cast(type)); } else if (type >= static_cast(schema::PrimitiveType_MAX)) { if (type >= PrimType_InnerOpMin && type < PrimType_InnerOpMax) { + MS_LOG(INFO) << "Current real type index:" << type << ", expected type index:" << (type - PrimType_InnerOpMin) + << "."; return kInnerOpNames[type - PrimType_InnerOpMin]; } } diff --git a/mindspore-lite/src/litert/c_api/context_c.cc b/mindspore-lite/src/litert/c_api/context_c.cc index dc494a3db71fb6b1b409db2e72d7e4b2ad7731f2..34571ec642af98300c998691b289729b3f0abec3 100644 --- a/mindspore-lite/src/litert/c_api/context_c.cc +++ b/mindspore-lite/src/litert/c_api/context_c.cc @@ -143,6 +143,8 @@ MSDeviceInfoHandle MSDeviceInfoCreate(MSDeviceType device_type) { impl = new (std::nothrow) mindspore::GPUDeviceInfo(); } else if (kMSDeviceTypeKirinNPU == device_type) { impl = new (std::nothrow) mindspore::KirinNPUDeviceInfo(); + } else if (kMSDeviceTypeHIAI == device_type) { + impl = new (std::nothrow) mindspore::HIAIDeviceInfo(); } else { MS_LOG(ERROR) << "device_type is invalid."; impl = nullptr; @@ -279,6 +281,9 @@ void MSDeviceInfoSetFrequency(MSDeviceInfoHandle device_info, int frequency) { if (static_cast(impl_device->GetDeviceType()) == kMSDeviceTypeKirinNPU) { auto impl = static_cast(device_info); impl->SetFrequency(frequency); + } else if (static_cast(impl_device->GetDeviceType()) == kMSDeviceTypeHIAI) { + auto impl = static_cast(device_info); + impl->SetFrequency(frequency); } else { MS_LOG(ERROR) << "Unsupported Feature."; } diff --git a/mindspore-lite/src/litert/cxx_api/context.cc b/mindspore-lite/src/litert/cxx_api/context.cc index 6f6cc7f894d29b969417bb8658fbb3526f4fea5e..8720cd2d6678715dec495a37d3214055671430ed 100644 --- a/mindspore-lite/src/litert/cxx_api/context.cc +++ b/mindspore-lite/src/litert/cxx_api/context.cc @@ -49,6 +49,10 @@ constexpr auto kModelOptionAscendDynamicBatchSize = "mindspore.option.ascend.dyn constexpr auto kModelOptionAscendDynamicImageSize = "mindspore.option.ascend.dynamic_image_size"; constexpr auto kModelOptionAscendBufferOptimize = "mindspore.option.ascend.buffer_optimize"; constexpr auto kModelOptionAscendRankID = "mindspore.option.ascend.rank_id"; +constexpr auto kModelOptionHIAIDeviceID = "mindspore.option.hiai.device_id"; +constexpr auto kModelOptionHIAIPerformanceMode = "mindspore.option.hiai.performance_mode"; +constexpr auto kModelOptionHIAIPriority = "mindspore.option.hiai.priority"; +constexpr auto kModelOptionHIAIEnableFP16 = "mindspore.option.hiai.enable_fp16"; #ifdef USE_GLOG extern "C" { extern void mindspore_log_init(); @@ -463,6 +467,86 @@ std::vector GPUDeviceInfo::GetPrecisionModeChar() const { return ret; } +void HIAIDeviceInfo::SetDeviceID(uint32_t device_id) { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return; + } + data_->params[kModelOptionHIAIDeviceID] = device_id; +} + +uint32_t HIAIDeviceInfo::GetDeviceID() const { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return 0; + } + return GetValue(data_, kModelOptionHIAIDeviceID); +} + +void HIAIDeviceInfo::SetPerformanceMode(int performance_mode) { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return; + } + data_->params[kModelOptionHIAIPerformanceMode] = performance_mode; +} + +int HIAIDeviceInfo::GetPerformanceMode() const { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return 0; + } + return GetValue(data_, kModelOptionHIAIPerformanceMode); +} + +void HIAIDeviceInfo::SetPriority(int priority) { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return; + } + data_->params[kModelOptionHIAIPriority] = priority; +} + +int HIAIDeviceInfo::GetPriority() const { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return 0; + } + return GetValue(data_, kModelOptionHIAIPriority); +} + +void HIAIDeviceInfo::SetEnableFP16(bool is_fp16) { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return; + } + data_->params[kModelOptionHIAIEnableFP16] = is_fp16; +} + +bool HIAIDeviceInfo::GetEnableFP16() const { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return false; + } + return GetValue(data_, kModelOptionHIAIEnableFP16); +} + +void HIAIDeviceInfo::SetFrequency(int frequency) { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return; + } + data_->params[kModelOptionKirinNpuFrequency] = frequency; +} + +int HIAIDeviceInfo::GetFrequency() const { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return 0; + } + return GetValue(data_, kModelOptionKirinNpuFrequency); +} + void AscendDeviceInfo::SetDeviceID(uint32_t device_id) { if (data_ == nullptr) { MS_LOG(ERROR) << "Invalid context."; diff --git a/mindspore-lite/src/litert/cxx_api/converters.cc b/mindspore-lite/src/litert/cxx_api/converters.cc index 01ca00e104141be6e4916875010697066434a043..a935244601c7544e94896604c93f4caac76a0822 100644 --- a/mindspore-lite/src/litert/cxx_api/converters.cc +++ b/mindspore-lite/src/litert/cxx_api/converters.cc @@ -95,6 +95,17 @@ Status ContextUtils::AddCustomDevice(lite::InnerContext *inner_context, return kSuccess; } +Status ContextUtils::AddHIAIDevice(lite::InnerContext *inner_context, size_t device_id, int performance_mode, + int priority, bool enable_fp16) { + lite::DeviceInfo device_info = {0}; + device_info.hiai_device_info_.device_id_ = device_id; + device_info.hiai_device_info_.performance_mode_ = performance_mode; + device_info.hiai_device_info_.priority_ = priority; + device_info.hiai_device_info_.enable_fp16_ = enable_fp16; + inner_context->device_list_.push_back({lite::DT_HIAI, device_info}); + return kSuccess; +} + void ContextUtils::ResetContextDefaultParam(Context *context) { if (context->GetInterOpParallelNum() == 0) { context->SetInterOpParallelNum(kDefaultInterOpParallelNum); @@ -174,6 +185,10 @@ std::shared_ptr ContextUtils::Convert(Context *context) { ret = AddDspDevice(inner_context.get(), device.get()); } else if (device->GetDeviceType() == kCustomDevice) { ret = AddCustomDevice(inner_context.get(), device); + } else if (device->GetDeviceType() == kHIAI) { + auto hiai_device_info = device->Cast(); + ret = AddHIAIDevice(inner_context.get(), hiai_device_info->GetDeviceID(), hiai_device_info->GetPerformanceMode(), + hiai_device_info->GetPriority(), hiai_device_info->GetEnableFP16()); } if (ret != kSuccess) { MS_LOG(ERROR) << "Add device failed!"; diff --git a/mindspore-lite/src/litert/cxx_api/converters.h b/mindspore-lite/src/litert/cxx_api/converters.h index ccae58a16e00b831b1d6232062905c4e565262d7..aaf461f38eb348900e27b56df2d2fd4662645548 100644 --- a/mindspore-lite/src/litert/cxx_api/converters.h +++ b/mindspore-lite/src/litert/cxx_api/converters.h @@ -47,6 +47,8 @@ class MS_API ContextUtils { static Status AddAscendDevice(lite::InnerContext *inner_context, DeviceInfoContext *device); static Status AddDspDevice(lite::InnerContext *inner_context, DeviceInfoContext *device); static Status AddCustomDevice(lite::InnerContext *inner_context, const std::shared_ptr &device); + static Status AddHIAIDevice(lite::InnerContext *inner_context, size_t device_id, int performance_mode, int priority, + bool enable_fp16); static bool IsAffinityModeValid(int affinity_mode) { return affinity_mode >= lite::NO_BIND && affinity_mode <= lite::MID_CPU; } diff --git a/mindspore-lite/src/litert/cxx_api/kernel_executor/CMakeLists.txt b/mindspore-lite/src/litert/cxx_api/kernel_executor/CMakeLists.txt index 60e6437b96523def0b04ef404815d568b6956f12..03158511d27e5411219d4d7b1a4fd878993decf9 100644 --- a/mindspore-lite/src/litert/cxx_api/kernel_executor/CMakeLists.txt +++ b/mindspore-lite/src/litert/cxx_api/kernel_executor/CMakeLists.txt @@ -38,6 +38,10 @@ if(${MSLITE_ENABLE_NPU}) ${CMAKE_CURRENT_SOURCE_DIR}/custom_om_infer.cc ) list(APPEND KERNEL_EXECUTOR_SRC ${CUSTOM_OM_CXX}) + set(HIAI_CUSTOM_OM_CXX + ${CMAKE_CURRENT_SOURCE_DIR}/../../delegate/hiai/hiai_model_kernel.cc + ) + list(APPEND KERNEL_EXECUTOR_SRC ${HIAI_CUSTOM_OM_CXX}) endif() add_library(kernel_executor SHARED ${KERNEL_EXECUTOR_SRC}) diff --git a/mindspore-lite/src/litert/delegate/delegate_utils.cc b/mindspore-lite/src/litert/delegate/delegate_utils.cc index c9aeeb11ca562ff2aa9bb38cef25a0f015c2ef1f..8adda723653b0cb2de32f1f20bb781a84435f2a5 100644 --- a/mindspore-lite/src/litert/delegate/delegate_utils.cc +++ b/mindspore-lite/src/litert/delegate/delegate_utils.cc @@ -81,4 +81,17 @@ void BinaryMaskData2Bool(int src_mask, bool *dst_mask, size_t mask_size) { bool IsSubGraphInputTensor(const std::vector &inputs, mindspore::MSTensor input) { return std::find(inputs.begin(), inputs.end(), input) != inputs.end(); } + +#ifdef SUPPORT_NPU +hiai::HIAI_DataType MSDataTypeToHIAIDataType(DataType ms_dtype) { + if (ms_dtype==DataType::kNumberTypeInt8) { + return hiai::HIAI_DataType::HIAI_DATATYPE_INT8; + }else if (ms_dtype==DataType::kNumberTypeInt32) { + return hiai::HIAI_DataType::HIAI_DATATYPE_INT32; + }else if (ms_dtype==DataType::kNumberTypeFloat16) { + return hiai::HIAI_DataType::HIAI_DATATYPE_FLOAT16; + } + return hiai::HIAI_DataType::HIAI_DATATYPE_FLOAT32; +} +#endif } // namespace mindspore::lite diff --git a/mindspore-lite/src/litert/delegate/delegate_utils.h b/mindspore-lite/src/litert/delegate/delegate_utils.h index bd85665ea4871af5f7eac9bccdd0111d9d7ba171..bbb03d958199946b83d83d142b7d730e8d99fa57 100644 --- a/mindspore-lite/src/litert/delegate/delegate_utils.h +++ b/mindspore-lite/src/litert/delegate/delegate_utils.h @@ -20,6 +20,9 @@ #include "src/common/log_adapter.h" #include "include/errorcode.h" #include "nnacl_c/op_base.h" +#ifdef SUPPORT_NPU +#include "include/HiAiModelManagerService.h" +#endif namespace mindspore::lite { bool IsSubGraphInputTensor(const std::vector &inputs, mindspore::MSTensor input); @@ -32,6 +35,10 @@ int MaskDataNHWC2NCHWBinary(int mask); void BinaryMaskData2Bool(int src_mask, bool *dst_mask, size_t mask_size); +#ifdef SUPPORT_NPU +hiai::HIAI_DataType MSDataTypeToHIAIDataType(DataType ms_dtype); +#endif + template void AssistDataNHWC2NCHW(void *raw_data, size_t unit_size) { MS_ASSERT(raw_data != nullptr); diff --git a/mindspore-lite/src/litert/delegate/hiai/CMakeLists.txt b/mindspore-lite/src/litert/delegate/hiai/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..b6899bfc594624626be11c3b190e2b43651bb21b --- /dev/null +++ b/mindspore-lite/src/litert/delegate/hiai/CMakeLists.txt @@ -0,0 +1,35 @@ +include_directories(${DDK_PATH}) +file(GLOB_RECURSE HIAI_NPU_RUNTIME_SRC + ${CMAKE_CURRENT_SOURCE_DIR}/*.cc) + +set(LIBCXX_LIB_DIR "${ANDROID_NDK}/sources/cxx-stl/llvm-libc++/libs/arm64-v8a") +link_directories(${LIBCXX_LIB_DIR}) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DANDROID_STL=c++_shared") +add_library(c++_shared SHARED IMPORTED) + +set_target_properties(c++_shared PROPERTIES IMPORTED_LOCATION ${LIBCXX_LIB_DIR}/libc++_shared.so) +add_library(hiai SHARED IMPORTED) +set_target_properties(hiai PROPERTIES IMPORTED_LOCATION ${DDK_LIB_PATH}/libhiai.so) +add_library(hiai_ir SHARED IMPORTED) +set_target_properties(hiai_ir PROPERTIES IMPORTED_LOCATION ${DDK_LIB_PATH}/libhiai_ir.so) +add_library(hiai_ir_build SHARED IMPORTED) +set_target_properties(hiai_ir_build PROPERTIES IMPORTED_LOCATION ${DDK_LIB_PATH}/libhiai_ir_build.so) +if(EXISTS "${DDK_LIB_PATH}/libhiai_model_compatible.so") + add_library(hiai_model_compatible SHARED IMPORTED) + set_target_properties(hiai_model_compatible PROPERTIES IMPORTED_LOCATION + ${DDK_LIB_PATH}/libhiai_model_compatible.so) +else() + message(WARNING "hiai_model_compatible not found: ${DDK_LIB_PATH}/libhiai_model_compatible.so") +endif() +add_library(hiai_kernel_mid OBJECT ${HIAI_NPU_RUNTIME_SRC}) +add_dependencies(hiai_kernel_mid fbs_src) +target_link_libraries( + hiai_kernel_mid + hiai + hiai_ir + hiai_ir_build + c++_shared +) +if(TARGET hiai_model_compatible) + target_link_libraries(hiai_kernel_mid hiai_model_compatible) +endif() \ No newline at end of file diff --git a/mindspore-lite/src/litert/delegate/hiai/hiai_delegate.cc b/mindspore-lite/src/litert/delegate/hiai/hiai_delegate.cc new file mode 100644 index 0000000000000000000000000000000000000000..689218e25bbaa3e8fa5ca283472626d8d9c00ee4 --- /dev/null +++ b/mindspore-lite/src/litert/delegate/hiai/hiai_delegate.cc @@ -0,0 +1,309 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include "include/model.h" +#include "src/common/log_adapter.h" +#include "src/common/file_utils.h" +#include "schema/model_generated.h" +#include "schema/ops_generated.h" +#include "flatbuffers/flatbuffers.h" +#include "litert/tensor_category.h" +#include "src/litert/delegate/hiai/hiai_delegate.h" +#include "src/litert/delegate/hiai/hiai_model_kernel.h" + +namespace { +constexpr int32_t kNum2 = 2; +static constexpr int kBitNum1 = 1; +static constexpr int kBitNum8 = 8; +static constexpr int kBitNum16 = 16; +} // namespace + +namespace mindspore { +namespace lite { +Status HIAIDelegate::Init() { + is_support_npu_ = IsSupportNPU(); + if (is_support_npu_) { + MS_LOG(WARNING) + << "Current platform support NPU, set frequency to 3, device type to NPU. Original frequency value is " + << frequency_type_ << " device type is CPU by default."; + device_type_ = 0; + frequency_type_ = 3; + } + return kSuccess; +} + +bool HIAIDelegate::NeedBitUpackCheck(const schema::Tensor &src_tensor) { + if (src_tensor.enableHuffmanCode()) { + return true; + } + bool need_bit_unpack = src_tensor.quantParams() != nullptr && src_tensor.quantParams()->size() > 0 && + src_tensor.quantParams()->Get(0) != nullptr; + if (need_bit_unpack) { + auto num_bits = src_tensor.quantParams()->Get(0)->numBits(); + need_bit_unpack = ((num_bits >= kBitNum1 && num_bits < kBitNum8) || (num_bits > kBitNum8 && num_bits < kBitNum16)); + } + + return need_bit_unpack; +} + +int HIAIDelegate::DecompressTensor(const schema::Tensor &src_tensor) { + if (src_tensor.weightQuantCompressType() == schema::WeightQuantCompressType_FSE || + src_tensor.weightQuantCompressType() == schema::WeightQuantCompressType_INDEXING || + src_tensor.weightQuantCompressType() == schema::WeightQuantCompressType_SPARSE) { + return RET_NOT_SUPPORT; + } + if (!NeedBitUpackCheck(src_tensor)) { + return RET_NO_CHANGE; + } + MS_LOG(ERROR) << "DecompressTensor Error."; + return RET_ERROR; +} + +Status HIAIDelegate::CheckTensorSupported(const schema::Tensor *primitive) { + if (primitive == nullptr) { + MS_LOG(ERROR) << "primitive is nullptr, which type is Tensor."; + return mindspore::kLiteSuccessExit; + } + + int32_t data_type = primitive->dataType(); + if (data_type <= kTypeUnknown || data_type >= kMonadTypeEnd) { + MS_LOG(ERROR) << "invalid data type. " << data_type; + return mindspore::kLiteSuccessExit; + } + + if (primitive->dims() == nullptr) { + MS_LOG(DEBUG) << "Dims of tensor is nullptr"; + return mindspore::kLiteSuccessExit; + } + + if (data_type == kObjectTypeTensorType) { + MS_LOG(ERROR) << "Not support TensorList."; + return mindspore::kLiteNotSupport; + } + + if (primitive->data() == nullptr || primitive->data()->size() <= 0) { + MS_LOG(DEBUG) << "No valid data converted."; + return mindspore::kSuccess; + } else { + auto ret = DecompressTensor(*primitive); + if (ret == RET_NO_CHANGE) { + } else { + MS_LOG(ERROR) << "Not support Decompress Tensor."; + return mindspore::kLiteNotSupport; + } + } + return mindspore::kSuccess; +} + +int HIAIDelegate::CompareVersion(const std::string &version1, const std::string &version2) { + std::istringstream iss1(version1); + std::istringstream iss2(version2); + std::string string1; + std::string string2; + while (!iss1.eof() || !iss2.eof()) { + getline(iss1, string1, '.'); + getline(iss2, string2, '.'); + if (stoi(string1) > stoi(string2)) return 1; + if (stoi(string1) < stoi(string2)) return -1; + string1 = string2 = "0"; + } + return lite::RET_OK; +} + +bool HIAIDelegate::CheckDDKVerGreatEqual(const std::string &spec_version) { + auto client = std::make_shared(); + if (client->GetVersion() != nullptr) { + std::string version = client->GetVersion(); + int ret = CompareVersion(version, spec_version); + if (ret < lite::RET_OK) { + MS_LOG(WARNING) << "DDK Version " << version << " less than " << spec_version; + return false; + } + } else { + MS_LOG(WARNING) << "Get DDK Version failed!"; + return false; + } + return true; +} + +bool HIAIDelegate::IsSupportNPU() { + // Avoid multiple checks + if (CheckDDKVerGreatEqual("100.320.011.019")) { + is_support_npu_ = true; + MS_LOG(INFO) << "Current device supports NPU."; + } else { + is_support_npu_ = false; + MS_LOG(WARNING) << "Current device does not support NPU."; + } + return is_support_npu_; +} + +void HIAIDelegate::ShallowCopyLiteGraph(const lite::LiteGraph &lite_graph) { + std::vector node_list; + node_list.reserve(lite_graph.all_nodes_.size()); + MS_LOG(INFO) << "HIAIDelegate ShallowCopyLiteGraph start."; + // copy node + for (auto node : lite_graph.all_nodes_) { + auto new_node = new (std::nothrow) LiteGraph::Node(*node); + if (new_node == nullptr) { + MS_LOG(ERROR) << "New LiteGraph node failed. Origin node:" << node->name_; + for (auto cur_node : node_list) { + delete cur_node; + } + return; + } + node_list.emplace_back(new_node); + } + // copy subgraph + std::vector subgraph_list; + for (auto subgraph : lite_graph.sub_graphs_) { + auto new_subgraph = new (std::nothrow) LiteGraph::SubGraph(*subgraph); + if (new_subgraph == nullptr) { + MS_LOG(ERROR) << "New LiteGraph::Subgraph failed. Origin graph:" << subgraph->name_; + for (auto cur_subgraph : subgraph_list) { + delete cur_subgraph; + } + return; + } + subgraph_list.emplace_back(new_subgraph); + } + for (auto tensor : lite_graph.all_tensors_) { + Status ret = CheckTensorSupported(static_cast(tensor)); + if (ret == kLiteError) { + MS_LOG(ERROR) << "Tensor supported check failed."; + return; + } + } + + lite_graph_ = new (std::nothrow) lite::LiteGraph(); + if (lite_graph_ == nullptr) { + MS_LOG(ERROR) << "New LiteGraph failed."; + delete lite_graph_; + return; + } + + lite_graph_->name_ = lite_graph.name_; + lite_graph_->version_ = lite_graph.version_; + lite_graph_->input_indices_ = lite_graph.input_indices_; + lite_graph_->output_indices_ = lite_graph.output_indices_; + lite_graph_->all_tensors_ = lite_graph.all_tensors_; + lite_graph_->all_nodes_ = node_list; + lite_graph_->sub_graphs_ = subgraph_list; + MS_LOG(INFO) << "HIAIDelegate ShallowCopyLiteGraph success. all_tensors_ size " << lite_graph_->all_tensors_.size() + << " all_nodes_ size " << lite_graph_->all_nodes_.size() << " sub_graphs_ size " + << lite_graph_->sub_graphs_.size() << " sub_graphs_[0] input_indices_ size " + << lite_graph_->sub_graphs_[0]->input_indices_.size() << " sub_graphs_[0] output_indices_ size " + << lite_graph_->sub_graphs_[0]->output_indices_.size(); +} + +bool HIAIDelegate::IsCustomModel() const { + // check if there is only one Cutsom kernel in LiteModel. + if (lite_graph_ == nullptr) { + MS_LOG(ERROR) << "Current lite graph is null."; + return false; + } + if (lite_graph_->all_nodes_.size() != 1) { + MS_LOG(ERROR) << "Current node num in lite graph is:" << lite_graph_->all_nodes_.size() << "."; + return false; + } + auto node = lite_graph_->all_nodes_[0]; + if (node == nullptr) { + MS_LOG(ERROR) << "Current node is null in lite graph."; + return false; + } + if (node->node_type_ != mindspore::schema::PrimitiveType_Custom) { + MS_LOG(ERROR) << "Current node type is:" << node->node_type_ << ", expected type is PrimitiveType_Custom."; + return false; + } + return true; +} + +Status HIAIDelegate::Build(DelegateModel *model) { + MS_LOG(INFO) << "Choose to build offline inference model"; + if (!IsCustomModel()) { + MS_LOG(ERROR) << "not third party model"; + return kLiteError; + } + // Get Node Tensor + auto node = lite_graph_->all_nodes_[0]; + MS_CHECK_TRUE_RET(node != nullptr, kLiteError); + auto input_num = node->input_indices_.size(); // node 的输入 + // at least one input and one OM model buffer(as the last constant input) + MS_CHECK_TRUE_RET(input_num >= kNum2, kLiteError); + MS_CHECK_TRUE_RET(lite_graph_->all_tensors_.size() >= kNum2, kLiteError); + auto input_tensor = lite_graph_->all_tensors_[node->input_indices_[0]]; + MS_CHECK_TRUE_RET(input_tensor != nullptr, kLiteError); + auto model_tensor = lite_graph_->all_tensors_[node->input_indices_[input_num - 1]]; + MS_CHECK_TRUE_RET(model_tensor != nullptr, kLiteError); + MS_CHECK_TRUE_RET(model_tensor->data() != nullptr, kLiteError); + model_buffer_ = const_cast(model_tensor->data()->data()); + model_size_ = model_tensor->data()->size(); + MS_LOG(DEBUG) << "Model input size:" << model->inputs().size() << ", output size:" << model->outputs().size() << "."; + auto hiai_model_kernel = new (std::nothrow) + HIAIModelKernel(model->inputs(), model->outputs(), model_buffer_, model_size_, frequency_type_, device_type_); + if (hiai_model_kernel == nullptr) { + MS_LOG(ERROR) << "new HIAIModelKernel failed."; + return kLiteError; + } + (void)model->Replace(model->BeginKernelIterator(), model->EndKernelIterator(), hiai_model_kernel); + MS_LOG(INFO) << "Replace kernel in HIAIDelegate success."; + return kSuccess; +} + +void HIAIDelegate::FreeLiteGraph(lite::LiteGraph **liteGraph) { + if (liteGraph != nullptr && *liteGraph != nullptr) { + MS_LOG(INFO) << "start to free LiteGraph."; + auto graph = *liteGraph; + graph->name_.clear(); + graph->input_indices_.clear(); + graph->output_indices_.clear(); + MS_LOG(INFO) << "Destroying nodes."; + + for (size_t idx = 0; idx < graph->all_nodes_.size(); idx++) { + if (graph->all_nodes_[idx] != nullptr) { + delete graph->all_nodes_[idx]; + graph->all_nodes_[idx] = nullptr; + } + } + MS_LOG(INFO) << "Destroying subgraphs."; + + for (size_t idx = 0; idx < graph->sub_graphs_.size(); idx++) { + if (graph->sub_graphs_[idx] != nullptr) { + delete graph->sub_graphs_[idx]; + graph->sub_graphs_[idx] = nullptr; + } + } + delete graph; + *liteGraph = nullptr; + } else { + MS_LOG(WARNING) << "nnrt_lite_graph is nullptr, no need to free."; + } +} + +HIAIDelegate::~HIAIDelegate() { + delete lite_graph_; + lite_graph_ = nullptr; + if (lite_graph_ != nullptr) { + MS_LOG(ERROR) << "Delete HIAIDelegate."; + } +} +} // namespace lite +} // namespace mindspore + diff --git a/mindspore-lite/src/litert/delegate/hiai/hiai_delegate.h b/mindspore-lite/src/litert/delegate/hiai/hiai_delegate.h new file mode 100644 index 0000000000000000000000000000000000000000..bb4510b91e982ec90f6696d2e5508e39989a1e56 --- /dev/null +++ b/mindspore-lite/src/litert/delegate/hiai/hiai_delegate.h @@ -0,0 +1,63 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_HIAI_NNR_DELEGATE_H +#define MINDSPORE_HIAI_NNR_DELEGATE_H + +#include +#include +#include +#include "include/cxx_api/delegate.h" +#include "include/model.h" +#include "src/litert/inner_context.h" +#include "schema/model_generated.h" +#include "include/HiAiModelManagerService.h" +#include "src/litert/delegate/hiai/hiai_model_kernel.h" + +namespace mindspore { +namespace lite { +class HIAIDelegate : public Delegate { + public: + HIAIDelegate() = default; + explicit HIAIDelegate(const HIAIDeviceInfo &hiai_device_info) : hiai_device_info_(hiai_device_info) {} + ~HIAIDelegate() override; + Status Init() override; + Status Build(DelegateModel *model) override; + void ShallowCopyLiteGraph(const lite::LiteGraph &liteGraph); + void FreeLiteGraph(lite::LiteGraph **liteGraph); + + private: + bool IsSupportNPU(); + bool CheckDDKVerGreatEqual(const std::string &spec_version); + int CompareVersion(const std::string &version1, const std::string &version2); + bool IsCustomModel() const; + Status CheckTensorSupported(const schema::Tensor *primitive); + bool NeedBitUpackCheck(const schema::Tensor &src_tensor); + int DecompressTensor(const schema::Tensor &src_tensor); + int32_t frequency_type_ = 1; // hiai::AiModelDescription_Frequency_LOW + int32_t device_type_ = 3; // CPU + uint8_t *model_buffer_ = nullptr; + size_t model_size_ = -1; + HIAIDeviceInfo hiai_device_info_; + LiteGraph *lite_graph_ = nullptr; + std::string cache_dir_{}; + bool is_support_npu_ = false; + std::vector in_tensors_; + std::vector out_tensors_; +}; +} // namespace lite +} // namespace mindspore + +#endif // MINDSPORE_HIAI_NNR_DELEGATE_H diff --git a/mindspore-lite/src/litert/delegate/hiai/hiai_model_kernel.cc b/mindspore-lite/src/litert/delegate/hiai/hiai_model_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..74d3655342f2a2a905b8c5cf6746518c5253fe4d --- /dev/null +++ b/mindspore-lite/src/litert/delegate/hiai/hiai_model_kernel.cc @@ -0,0 +1,285 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include "src/litert/delegate/hiai/hiai_model_kernel.h" +#include "litert/cxx_api/tensor/tensor_impl.h" +#include "include/cxx_api/data_type.h" + +namespace mindspore { +int HIAIModelKernel::Prepare() { + model_manager_client_ = std::make_shared(); + model_builder = std::make_shared(model_manager_client_); + auto client_ret = model_manager_client_->Init(nullptr); // sync mode + if (client_ret != lite::RET_OK) { + MS_LOG(ERROR) << "Init modelBuilder failed."; + return lite::RET_ERROR; + } + // Build Model + int build_ret = Build(hiai_model_buffer_, hiai_model_size_); + if (build_ret != lite::RET_OK) { + MS_LOG(ERROR) << "Build OMModelBuffer failed."; + return lite::RET_ERROR; + } + MS_LOG(INFO) << "HIAIModel Kernel build OM model buffer success."; + return lite::RET_OK; +} + +int HIAIModelKernel::Build(uint8_t *model_data, size_t model_data_length) { + MS_LOG(INFO) << "HIAIModelKernel Build Function start."; + MS_CHECK_TRUE_RET(model_data_length != 0, kLiteError); + if (model_data == nullptr) { + MS_LOG(ERROR) << "Current model_data is invalid, please check model file."; + return lite::RET_ERROR; + } + void *hiai_modelData = model_data; + if (frequency_type_ == -1 || device_type_ == -1) { + MS_LOG(ERROR) << "Create model description failed. Current frequency_type_ is :" << frequency_type_ + << ", device_type_:" << device_type_ << "."; + return lite::RET_ERROR; + } + std::vector> model_descs; + std::unordered_map, hiai::MemBuffer *> builder_buffer_map; + MS_LOG(INFO) << "Create model description: version [" << model_manager_client_->GetVersion() + << "], frequency_type_ is " << frequency_type_ << ", device_type_ is " << device_type_ << "."; + std::shared_ptr model_desc = + std::make_shared(model_name_, frequency_type_, 0, 1, device_type_); + model_descs.push_back(model_desc); + auto model_buffer = model_builder->InputMemBufferCreate(hiai_modelData, model_data_length); + if (model_buffer == nullptr) { + MS_LOG(ERROR) << "Hiai Model Builder input memory buffer create failed, model data size:" << model_data_length; + return lite::RET_ERROR; + } + builder_buffer_map.insert({model_builder, model_buffer}); + model_desc->SetModelBuffer(model_buffer->GetMemBufferData(), model_buffer->GetMemBufferSize()); + MS_LOG(INFO) << "Hiai Model Builder set model buffer success."; + + if (!model_descs.empty()) { + auto load_ret = model_manager_client_->Load(model_descs); + if (load_ret != hiai::AI_SUCCESS) { + for (auto it : builder_buffer_map) { + it.first->MemBufferDestroy(it.second); + } + builder_buffer_map.clear(); + MS_LOG(ERROR) << "Hiai Client load model failed and clear Hiai model buffer."; + return lite::RET_ERROR; + } + MS_LOG(INFO) << "Hiai Client load model success."; + model_descs.clear(); + } + // Init Hiai IO tensor + if (InitHiaiIOTensors() != lite::RET_OK) { + MS_LOG(ERROR) << "Hiai kernel InitHiaiIOTensors failed."; + return lite::RET_ERROR; + } + MS_LOG(INFO) << "Hiai NNRT kernel init Hiai IO tensor and load model success."; + return lite::RET_OK; +} + +int HIAIModelKernel::InitHiaiIOTensors() { + std::vector input_dimension; + std::vector output_dimension; + if (model_manager_client_ == nullptr) { + MS_LOG(ERROR) << "Hiai Client is nullptr."; + return lite::RET_ERROR; + } + auto get_io_dim_ret = model_manager_client_->GetModelIOTensorDim(model_name_, input_dimension, output_dimension); + if (get_io_dim_ret != hiai::AI_SUCCESS) { + MS_LOG(ERROR) << "Get hiai input and output tensor dims failed." << get_io_dim_ret; + return lite::RET_ERROR; + } + MS_LOG(DEBUG) << "Input NCHW :" << input_dimension[0].GetNumber() << " " << input_dimension[0].GetChannel() << " " + << input_dimension[0].GetHeight() << " " << input_dimension[0].GetWidth(); + MS_LOG(DEBUG) << "Output NCHW :" << output_dimension[0].GetNumber() << " " << output_dimension[0].GetChannel() << " " + << output_dimension[0].GetHeight() << " " << output_dimension[0].GetWidth(); + auto in_tensor_ret = UpdateInputTensorVec(input_dimension); + if (in_tensor_ret != lite::RET_OK) { + MS_LOG(ERROR) << "Update input tensor vector failed. " << in_tensor_ret; + return lite::RET_ERROR; + } + auto out_tensor_ret = UpdateOutputTensorVec(output_dimension); + if (out_tensor_ret != lite::RET_OK) { + MS_LOG(ERROR) << "Update output tensor vector failed. " << out_tensor_ret; + return lite::RET_ERROR; + } + return lite::RET_OK; +} + +int HIAIModelKernel::UpdateInputTensorVec(const std::vector &input_dimension) { + if (input_dimension.empty()) { + MS_LOG(ERROR) << "Hiai input tensor dimension is empty."; + return lite::RET_ERROR; + } + MS_LOG(DEBUG) << "Hiai input_dimension size:" << input_dimension.size(); + for (int i=0; i input = std::make_shared(); + if (input->Init(&input_dimension[i], lite::MSDataTypeToHIAIDataType(inputs_[i].DataType())) != hiai::AI_SUCCESS) { + MS_LOG(ERROR) << "Input AiTensor init failed."; + return lite::RET_ERROR; + } + hiai_inputs_tensors_.push_back(input); + } + if (hiai_inputs_tensors_.empty()) { + MS_LOG(ERROR) << "Hiai input tensor is empty."; + return lite::RET_ERROR; + } + MS_LOG(INFO) << "After UpdateInputTensorVec hiai_inputs_tensors_ size:" << hiai_inputs_tensors_.size(); + return lite::RET_OK; +} + +int HIAIModelKernel::UpdateOutputTensorVec(const std::vector &output_dimension) { + if (output_dimension.empty()) { + MS_LOG(ERROR) << "Hiai output tensor dimension is empty."; + return lite::RET_ERROR; + } + MS_LOG(DEBUG) << "Hiai output_dimension size:" << output_dimension.size(); + for (int i=0; i output = std::make_shared(); + int ret = output->Init(&output_dimension[i], lite::MSDataTypeToHIAIDataType(outputs_[i].DataType())); + if (ret != hiai::AI_SUCCESS) { + return lite::RET_ERROR; + } + hiai_outputs_tensors_.push_back(output); + } + if (hiai_outputs_tensors_.empty()) { + MS_LOG(ERROR) << "Hiai output tensor is empty."; + return lite::RET_ERROR; + } + MS_LOG(INFO) << "After UpdateOutputTensorVec hiai_outputs_tensors_ size:" << hiai_outputs_tensors_.size(); + return lite::RET_OK; +} + +int HIAIModelKernel::Execute() { + // Get MS INPUT Tensors + MS_LOG(INFO) << "Before HIAI Kernel execute, MSTensor need to be converted to HiaiTensor. Inputs_ size: " + << inputs_.size() << ", outputs_ size: " << outputs_.size(); + for (auto &tensor : inputs_) { + MS_LOG(DEBUG) << "Input Tensor:" << tensor.Name() << " shape:" << tensor.Shape() << " size:" << tensor.DataSize(); + } + for (auto &tensor : outputs_) { + MS_LOG(DEBUG) << "Output Tensor:" << tensor.Name() << " shape:" << tensor.Shape() << " size:" << tensor.DataSize(); + } + auto ms_hiai_ret = ConvertMSTensorToHiaiTensor(); + if (ms_hiai_ret != lite::RET_OK) { + MS_LOG(ERROR) << "ConvertMSTensorToHiaiTensor failed."; + return lite::RET_ERROR; + } + auto predict_ret = Predict(); + if (predict_ret != hiai::AI_SUCCESS) { + MS_LOG(ERROR) << "HIAIModelKernel Predict failed."; + return lite::RET_ERROR; + } + MS_LOG(INFO) << "HIAIModelKernel predict model success."; + auto hiai_ms_ret = ConvertHiaiTensorToMSTensor(); + if (hiai_ms_ret != lite::RET_OK) { + MS_LOG(ERROR) << "ConvertHiaiTensorToMSTensor failed."; + return lite::RET_ERROR; + } + MS_LOG(INFO) << "HIAI Kernel predict done, and convert HiaiTensor to MSTensor success."; + return lite::RET_OK; +} + +int HIAIModelKernel::Predict() { + hiai::AiContext context; + std::string key = "model_name"; + std::string value = model_name_; + context.AddPara(key, value); + int32_t iStamp; + if (model_manager_client_ == nullptr) { + MS_LOG(ERROR) << "Hiai client is nullptr."; + return lite::RET_ERROR; + } + MS_LOG(DEBUG) << "hiai input tensor size:" << hiai_inputs_tensors_.size() + << " output tensor num:" << hiai_outputs_tensors_.size() + << " output tensor buffer:" << hiai_outputs_tensors_[0]->GetBuffer() + << " datasize:" << hiai_outputs_tensors_[0]->GetSize(); + int ret = model_manager_client_->Process(context, hiai_inputs_tensors_, hiai_outputs_tensors_, 3000, iStamp); + if (ret != lite::RET_OK) { + MS_LOG(ERROR) << "HIAIModelKernel Predict failed by Hiai client using Process function."; + return lite::RET_ERROR; + } + MS_LOG(INFO) << "HIAIModelKernel Predict model success, ret:" << ret << " stamp:" << iStamp; + return lite::RET_OK; +} + +int HIAIModelKernel::ConvertMSTensorToHiaiTensor() { + MS_LOG(INFO) << "ConvertMSTensorToHiaiTensor ms_input tensor num:" << inputs_.size() + << " hiai_input tensor num:" << hiai_inputs_tensors_.size(); + if (hiai_inputs_tensors_.size() != inputs_.size()) { + MS_LOG(ERROR) << "ms_input and hiai_input have different size. ms_input tensor num:" << inputs_.size() + << " hiai_input tensor num:" << hiai_outputs_tensors_.size(); + return lite::RET_ERROR; + } + for (size_t i = 0; i < hiai_inputs_tensors_.size(); i++) { + if (hiai_inputs_tensors_.at(i)->GetSize() != inputs_.at(i).DataSize()) { + MS_LOG(ERROR) << "ms_input and hiai_input have different dataSize. ms_input tensor dataSize " + << inputs_.at(i).DataSize() << " hiai_input tensor num:" << hiai_inputs_tensors_.at(i)->GetSize(); + return lite::RET_ERROR; + } + auto src_buffer = inputs_.at(i).MutableData(); + if (src_buffer == nullptr) { + MS_LOG(ERROR) << "For " << model_name_ << ", the ms_input at [" << i << "], tensor name:" << inputs_.at(i).Name() + << " buffer is null."; + return lite::RET_ERROR; + } + auto dest_buffer = hiai_inputs_tensors_.at(i)->GetBuffer(); + if (dest_buffer == nullptr) { + MS_LOG(ERROR) << "For " << model_name_ << ", the hiai_input at [" << i << "], buffer is null."; + return lite::RET_ERROR; + } + MS_LOG(DEBUG) << "at [" << i << "] hiai_input data size:" << hiai_inputs_tensors_.at(i)->GetSize() + << ", buffer:" << dest_buffer << "\n ms_input data size:" << inputs_.at(i).DataSize() + << ", buffer:" << src_buffer; + std::memcpy(dest_buffer, src_buffer, inputs_.at(i).DataSize()); + } + MS_LOG(INFO) << "ConvertMSTensorToHiaiTensor success."; + return lite::RET_OK; +} + +int HIAIModelKernel::ConvertHiaiTensorToMSTensor() { + MS_LOG(INFO) << "ConvertHiaiTensorToMSTensor ms_output tensor num:" << outputs_.size() + << " hiai_output tensor num:" << hiai_outputs_tensors_.size(); + if (hiai_outputs_tensors_.size() != outputs_.size()) { + MS_LOG(ERROR) << "ms_output and hiai_output have different size. ms_output tensor num:" << outputs_.size() + << " hiai_output tensor num:" << hiai_outputs_tensors_.size(); + return lite::RET_ERROR; + } + for (size_t i = 0; i < hiai_outputs_tensors_.size(); i++) { + if (hiai_outputs_tensors_.at(i)->GetSize() != outputs_.at(i).DataSize()) { + MS_LOG(ERROR) << "ms_output and hiai_output have different dataSize. ms_output tensor dataSize " + << outputs_.at(i).DataSize() + << " hiai_output tensor num:" << hiai_outputs_tensors_.at(i)->GetSize(); + return lite::RET_ERROR; + } + auto src_buffer = hiai_outputs_tensors_.at(i)->GetBuffer(); + if (src_buffer == nullptr) { + MS_LOG(ERROR) << "For " << model_name_ << ", the hiai_output at [" << i << "], buffer is null."; + return lite::RET_ERROR; + } + auto dest_buffer = outputs_.at(i).MutableData(); + if (dest_buffer == nullptr) { + MS_LOG(ERROR) << "For " << model_name_ << ", the ms_output at [" << i + << "], tensor name:" << outputs_.at(i).Name() << " buffer is null."; + return lite::RET_ERROR; + } + MS_LOG(DEBUG) << "at [" << i << "] hiai_output data size:" << hiai_outputs_tensors_.at(i)->GetSize() + << ", buffer:" << src_buffer << " ms_output data size:" << outputs_.at(i).DataSize() + << ", buffer:" << dest_buffer; + std::memcpy(dest_buffer, src_buffer, hiai_outputs_tensors_.at(i)->GetSize()); + } + MS_LOG(INFO) << "ConvertHiaiTensorToMSTensor success."; + return lite::RET_OK; +} +} // namespace mindspore diff --git a/mindspore-lite/src/litert/delegate/hiai/hiai_model_kernel.h b/mindspore-lite/src/litert/delegate/hiai/hiai_model_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..f85d0d6f4f499f830a3d79691c0fcddca9a29cc6 --- /dev/null +++ b/mindspore-lite/src/litert/delegate/hiai/hiai_model_kernel.h @@ -0,0 +1,79 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef LITE_HIAI_MODEL_KERNEL_H +#define LITE_HIAI_MODEL_KERNEL_H +#include +#include +#include +#include +#include "include/cxx_api/kernel.h" +#include "src/common/log_adapter.h" +#include "src/litert/inner_context.h" +#include "src/litert/delegate/delegate_utils.h" +#include "include/errorcode.h" +#include "include/HiAiModelManagerService.h" + +namespace mindspore { +class HIAIModelKernel : public kernel::Kernel { + /** + * We decide to make the whole model into one kernel. + * */ + public: + HIAIModelKernel(const std::vector &inputs, const std::vector &outputs, uint8_t *hiai_model_buffer, + size_t hiai_model_size, int32_t frequency_type, int32_t device_type) + : kernel::Kernel(inputs, outputs, nullptr, nullptr), + hiai_model_buffer_(hiai_model_buffer), + hiai_model_size_(hiai_model_size), + frequency_type_(frequency_type), + device_type_(device_type) {} + int Prepare() override; + int ReSize() override { return kSuccess; } + int Execute() override; + ~HIAIModelKernel() { + model_manager_client_ = nullptr; + model_builder = nullptr; + for (auto t : hiai_inputs_tensors_) { + t.reset(); + } + hiai_inputs_tensors_.clear(); + for (auto t : hiai_inputs_tensors_) { + t.reset(); + } + hiai_inputs_tensors_.clear(); + } + + private: + int Build(uint8_t *modelData, size_t modelDataLength); + int InitHiaiIOTensors(); + int UpdateInputTensorVec(const std::vector &input_dimension); + int UpdateOutputTensorVec(const std::vector &output_dimension); + int Predict(); + int ConvertMSTensorToHiaiTensor(); + int ConvertHiaiTensorToMSTensor(); + std::shared_ptr model_manager_client_ = nullptr; + std::shared_ptr model_builder = nullptr; + std::string model_name_ = "Third_Party_Model"; + uint8_t *hiai_model_buffer_; + size_t hiai_model_size_; + int32_t frequency_type_ = -1; + int32_t device_type_ = -1; + std::vector> hiai_inputs_tensors_; + std::vector> hiai_outputs_tensors_; +}; +} // namespace mindspore + +#endif // LITE_HIAI_MODEL_KERNEL_H + diff --git a/mindspore-lite/src/litert/infer_manager.cc b/mindspore-lite/src/litert/infer_manager.cc index 6d7e7c2038f5c4d011068a6650eac2556c8c369c..53c235075d7892eebac74c620846714d60443202 100644 --- a/mindspore-lite/src/litert/infer_manager.cc +++ b/mindspore-lite/src/litert/infer_manager.cc @@ -189,6 +189,10 @@ int KernelInferShape(const std::vector &inputs, const std::vecto MS_LOG(ERROR) << "No input!"; return RET_ERROR; } + if (parameter->type_ == static_cast(PrimType::PrimType_Inner_ThirdPartyModel)) { + MS_LOG(INFO) << "No need infer shape for PrimType_Inner_ThirdPartyModel."; + return RET_OK; + } std::vector in_tensors; std::vector out_tensors; int ret = GenerateInTensorC(inputs, &in_tensors, allocator); diff --git a/mindspore-lite/src/litert/inner_context.cc b/mindspore-lite/src/litert/inner_context.cc index 91fbbd18e84f0570189d94ae132194a57dc255f9..542f06a9bdb361f067e987825aaab48d716163c5 100644 --- a/mindspore-lite/src/litert/inner_context.cc +++ b/mindspore-lite/src/litert/inner_context.cc @@ -123,7 +123,9 @@ int InnerContext::Init() { } #endif } - + if (IsDeviceTypeEnabled(DT_HIAI)) { + MS_LOG(INFO) << "HIAI enabled."; + } if (CreateThreadPool(false)) { MS_LOG(ERROR) << "CreateThreadPool failed."; return RET_ERROR; diff --git a/mindspore-lite/src/litert/inner_context.h b/mindspore-lite/src/litert/inner_context.h index 19f61ad84e955845fe9bae72204f79ec6710227e..25256e98a6e126eb829554e0e6d119ce3644ed59 100644 --- a/mindspore-lite/src/litert/inner_context.h +++ b/mindspore-lite/src/litert/inner_context.h @@ -75,6 +75,14 @@ typedef struct CustomDeviceInfo { std::shared_ptr user_defined_device_info_; } CustomDeviceInfo; +typedef struct HIAIDeviceInfo { + size_t device_id_ = 0; + int priority_ = 0; + int performance_mode_ = 0; + bool enable_fp16_ = false; + int frequency_ = 3; /**< npu frequency inference, low 1, medium 2, high 3, extreme 4, other values will be set to 3 */ +} HIAIDeviceInfo; + struct DeviceInfo { CpuDeviceInfo cpu_device_info_; GpuDeviceInfo gpu_device_info_; @@ -82,6 +90,7 @@ struct DeviceInfo { AscendDeviceInfo ascend_device_info_; DspDeviceInfo dsp_device_info_; CustomDeviceInfo custom_device_info_; + HIAIDeviceInfo hiai_device_info_; }; struct DeviceContext { diff --git a/mindspore-lite/src/litert/kernel/cpu/base/custom_base.cc b/mindspore-lite/src/litert/kernel/cpu/base/custom_base.cc new file mode 100644 index 0000000000000000000000000000000000000000..4917b823af81ca3fce2c052c7553dcb80b373804 --- /dev/null +++ b/mindspore-lite/src/litert/kernel/cpu/base/custom_base.cc @@ -0,0 +1,42 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/litert/kernel/cpu/base/custom_base.h" +#include +#include +#include +#include "src/litert/kernel_registry.h" +#include "nnacl_c/op_base.h" + +using mindspore::kernel::KERNEL_ARCH; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_Custom; + +namespace mindspore::kernel { +int CustomBaseCPUKernel::Prepare() { return RET_OK; } + +int CustomBaseCPUKernel::ReSize() { return RET_OK; } + +int CustomBaseCPUKernel::Run() { return RET_OK; } + +REG_KERNEL(kCPU, kNumberTypeInt32, PrimType_Inner_ThirdPartyModel, LiteKernelCreator) +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimType_Inner_ThirdPartyModel, LiteKernelCreator) +REG_KERNEL(kCPU, kNumberTypeInt8, PrimType_Inner_ThirdPartyModel, LiteKernelCreator) +REG_KERNEL(kCPU, kNumberTypeUInt8, PrimType_Inner_ThirdPartyModel, LiteKernelCreator) +REG_KERNEL(kCPU, kNumberTypeBool, PrimType_Inner_ThirdPartyModel, LiteKernelCreator) +} // namespace mindspore::kernel \ No newline at end of file diff --git a/mindspore-lite/src/litert/kernel/cpu/base/custom_base.h b/mindspore-lite/src/litert/kernel/cpu/base/custom_base.h new file mode 100644 index 0000000000000000000000000000000000000000..711fc6546553b88698d146d39fd7b4be197ead61 --- /dev/null +++ b/mindspore-lite/src/litert/kernel/cpu/base/custom_base.h @@ -0,0 +1,43 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_LITERT_KERNEL_CPU_BASE_CUSTOM_BASE_H_ +#define MINDSPORE_LITE_SRC_LITERT_KERNEL_CPU_BASE_CUSTOM_BASE_H_ + +#include +#include "src/litert/lite_kernel.h" +#include "nnacl_c/custom_parameter.h" + +namespace mindspore::kernel { +class CustomBaseCPUKernel : public LiteKernel { + public: + CustomBaseCPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::InnerContext *ctx) + : LiteKernel(parameter, inputs, outputs, ctx) { + custom_param_ = reinterpret_cast(op_parameter_); + } + ~CustomBaseCPUKernel() override = default; + + int Prepare() override; + int ReSize() override; + int Run() override; + + private: + CustomParameter *custom_param_ = nullptr; +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_LITERT_KERNEL_CPU_BASE_CUSTOM_BASE_H_ \ No newline at end of file diff --git a/mindspore-lite/src/litert/kernel/cpu/nnacl_c/custom_third_party.h b/mindspore-lite/src/litert/kernel/cpu/nnacl_c/custom_third_party.h new file mode 100644 index 0000000000000000000000000000000000000000..b7e7ac642af9d7737327edd68ad5ef2d2378e1e6 --- /dev/null +++ b/mindspore-lite/src/litert/kernel/cpu/nnacl_c/custom_third_party.h @@ -0,0 +1,26 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_SRC_LITERT_KERNEL_CPU_NNACL_C_CUSTOM_THIRDPARTY_H_ +#define MINDSPORE_LITE_SRC_LITERT_KERNEL_CPU_NNACL_C_CUSTOM_THIRDPARTY_H_ + +#include "nnacl_c/op_base.h" + +typedef struct CustomThirdPartyParameter { + // Primitive parameter + OpParameter op_parameter_; +} CustomThirdPartyParameter; + +#endif // MINDSPORE_LITE_SRC_LITERT_KERNEL_CPU_NNACL_C_CUSTOM_THIRDPARTY_H_ \ No newline at end of file diff --git a/mindspore-lite/src/litert/kernel/cpu/nnacl_c/op_base.h b/mindspore-lite/src/litert/kernel/cpu/nnacl_c/op_base.h index bbe93692e643e892afc7ef74900311d32e6b5ff8..f4482a5f68d5bb7c9daf14174013f78f09dc0161 100644 --- a/mindspore-lite/src/litert/kernel/cpu/nnacl_c/op_base.h +++ b/mindspore-lite/src/litert/kernel/cpu/nnacl_c/op_base.h @@ -571,6 +571,7 @@ enum PrimType { PrimType_Inner_CustomIsInf = 10016, PrimType_Inner_Conv3D = 10017, PrimType_Inner_GridSampler = 10018, + PrimType_Inner_ThirdPartyModel = 10019, PrimType_InnerOpMax, PrimType_InnerOpMin = PrimType_Inner_ToFormat }; diff --git a/mindspore-lite/src/litert/kernel_registry.cc b/mindspore-lite/src/litert/kernel_registry.cc index e0dcb63e688d3eb9f10240d42d99e4c60714e4dd..2d6230a137199463061f57146b50f7d3056d321f 100644 --- a/mindspore-lite/src/litert/kernel_registry.cc +++ b/mindspore-lite/src/litert/kernel_registry.cc @@ -178,6 +178,7 @@ int KernelRegistry::GetCustomKernel(const std::vector &in_tensors, con registry::KernelDesc desc{static_cast(key.data_type), key.type, key.kernel_arch, key.provider}; auto creator = registry::RegisterKernel::GetCreator(static_cast(primitive), &desc); if (creator == nullptr) { + MS_LOG(INFO) << "Not support to get Creator."; return RET_NOT_SUPPORT; } @@ -205,6 +206,7 @@ int KernelRegistry::GetCustomKernel(const std::vector &in_tensors, con } } #endif + MS_LOG(ERROR) << "Common base kernel registry failed."; return RET_ERROR; } @@ -257,6 +259,7 @@ int KernelRegistry::GetKernelExec(const std::vector &in_tensors, const if (ret == RET_OK) { (*kernel)->set_context(ctx); } + MS_LOG(INFO) << "Get kernel " << (ret == RET_OK ? "succeed." : "failed."); return ret; } #endif @@ -271,10 +274,12 @@ int KernelRegistry::GetKernelExec(const std::vector &in_tensors, const kernel_exec->set_desc(modify_key); kernel_exec->set_context(ctx); *kernel = kernel_exec; + MS_LOG(INFO) << "Get Lite Kernel succeed for type:" << PrimitiveCurVersionTypeName(key.type) + << " by type index:" << key.type << "."; return RET_OK; } } - MS_LOG(WARNING) << "common cpu kernel registry failed"; + MS_LOG(ERROR) << "common cpu kernel registry for lite_kernel failed."; return RET_ERROR; } } // namespace mindspore::lite diff --git a/mindspore-lite/src/litert/lite_session.cc b/mindspore-lite/src/litert/lite_session.cc index f6c834d5871730c10685c912b47bced1e3011ab9..88020ee29b1b39966c3ae532f64bf4088a70f181 100644 --- a/mindspore-lite/src/litert/lite_session.cc +++ b/mindspore-lite/src/litert/lite_session.cc @@ -46,6 +46,7 @@ #endif #ifdef SUPPORT_NPU #include "src/litert/delegate/npu/npu_delegate.h" +#include "src/litert/delegate/hiai/hiai_delegate.h" #endif #ifdef GPU_OPENCL #include "src/litert/kernel/opencl/opencl_subgraph.h" @@ -645,6 +646,7 @@ int LiteSession::CompileGraph(Model *model) { this->context_->set_infer_checker(InferCheckerAll); } is_running_.store(false); + MS_LOG(INFO) << "CompileGraph for current model success."; return RET_OK; } @@ -782,6 +784,7 @@ int LiteSession::PrepareKernels(const Model *model) { MS_LOG(ERROR) << "Prepare kernel " << kernel->name() << " failed: " << ret; return ret; } + MS_LOG(INFO) << "Set Allocator For Delegate Kernels success."; } if (!is_train_session_ && kernel->desc().arch != kernel::kDelegate && kernel->desc().arch != kernel::kGPU) { @@ -1039,6 +1042,25 @@ int LiteSession::CreateNPUDelegate() { return RET_OK; } +int LiteSession::CreateHIAIDelegate() { +#ifdef SUPPORT_NPU + auto iter = std::find_if(context_->device_list_.begin(), context_->device_list_.end(), + [](const DeviceContext &device) { return device.device_type_ == lite::DT_HIAI; }); + if (iter == context_->device_list_.end()) { + MS_LOG(ERROR) << "Found non device info"; + return RET_ERROR; + } + delegate_ = std::make_shared(iter->device_info_.hiai_device_info_); + if (delegate_ == nullptr) { + MS_LOG(ERROR) << "New HIAI delegate failed"; + return RET_ERROR; + } + delegate_device_type_ = DT_HIAI; + this->context_->delegate = delegate_; +#endif + return RET_OK; +} + int LiteSession::CreateNNAPIDelegate() { #ifdef SUPPORT_NNAPI bool enable_fp16 = @@ -1090,10 +1112,13 @@ int LiteSession::InitDelegate() { ret = CreateNPUDelegate(); } else if (context_->IsDeviceTypeEnabled(DT_GPU)) { ret = CreateTensorRTDelegate(); + }else if (context_->IsDeviceTypeEnabled(DT_HIAI)) { + ret = CreateHIAIDelegate(); } } if (ret != RET_OK) { + MS_LOG(INFO) << "Create Delegate failed. ret info: " << ret; return ret; } if (delegate_ != nullptr) { diff --git a/mindspore-lite/src/litert/lite_session.h b/mindspore-lite/src/litert/lite_session.h index 78bc3da382f68b9edf46c74eb31e2b47bc82dd50..d50dae8fe77c14b24f4bab4821e02582b27fcfd0 100644 --- a/mindspore-lite/src/litert/lite_session.h +++ b/mindspore-lite/src/litert/lite_session.h @@ -182,6 +182,7 @@ class MS_API LiteSession { int CreateNNAPIDelegate(); int CreateCoreMLDelegate(); int InitDelegate(); + int CreateHIAIDelegate(); int InitGPURuntime(); int InitDSPRuntime(); int InitSharedThreadPool(); diff --git a/mindspore-lite/src/litert/scheduler.cc b/mindspore-lite/src/litert/scheduler.cc index f5f4a709caa61e747f96eee754365a614a95c244..def1ee4087a053a86679482519201985018ff995 100644 --- a/mindspore-lite/src/litert/scheduler.cc +++ b/mindspore-lite/src/litert/scheduler.cc @@ -58,6 +58,9 @@ #if defined(MSLITE_ENABLE_CLOUD_INFERENCE) && defined(ENABLE_MINDRT) #include "thread/parallel_thread_pool_manager.h" #endif +#ifdef SUPPORT_NPU +#include "src/litert/delegate/hiai/hiai_delegate.h" +#endif using AbstractBaseModel = mindspore::infer::AbstractBaseModel; @@ -255,6 +258,13 @@ int Scheduler::CheckCpuValid(const std::vector *dst_kernel if (context_->IsDeviceTypeEnabled(DT_CPU)) { return RET_OK; } + // Custom model + if (dst_kernels->size() == 1 && (*dst_kernels)[0]->name() == "Custom" && + (*dst_kernels)[0]->desc().arch == kernel::KERNEL_ARCH::kDelegate) { + MS_LOG(INFO) << "kernel type is " << (*dst_kernels)[0]->name() << " and device type is " + << (*dst_kernels)[0]->desc().arch << "."; + return RET_OK; + } for (auto kernel : *dst_kernels) { if (kernel->desc().arch == kernel::KERNEL_ARCH::kCPU) { MS_LOG(ERROR) << "kernel: " << kernel->name() << " only support in CPU."; @@ -405,7 +415,13 @@ int Scheduler::Schedule(std::vector *dst_kernels) { MS_LOG(ERROR) << "Repalce delegate kernels failed."; return ret; } - + for (auto kernel : *dst_kernels) { + MS_LOG(DEBUG) << "kernel: [" << kernel->name() << "] " + << "TypeId(" << kernel->desc().data_type << "); " + << "OpType(" << PrimitiveCurVersionTypeName(kernel->desc().type) << "); " + << "format(" << kernel->desc().format << "); " + << "arch(" << kernel->desc().arch << ")"; + } ret = CheckCpuValid(dst_kernels); if (ret != RET_OK) { MS_LOG(ERROR) << "kernels invalid in set devices."; @@ -501,6 +517,13 @@ int Scheduler::ReplaceDelegateKernels(std::vector *dst_ker MS_LOG(ERROR) << "New delegate model failed."; return RET_NULL_PTR; } + + #ifdef SUPPORT_NPU + if (context_->IsDeviceTypeEnabled(DT_HIAI)) { + auto delegate = static_cast(delegate_.get()); + delegate->ShallowCopyLiteGraph(this->src_model_->graph_); + } + #endif auto ret = delegate_->Build(model); if (ret != mindspore::kSuccess) { delete model; @@ -1001,8 +1024,8 @@ int Scheduler::FindCpuKernel(const std::vector &in_tensors, const std: MS_CHECK_TRUE_MSG(op_parameter != nullptr, RET_ERROR, "op parameter is nullptr."); auto op_type = op_parameter->type_; if (!KernelRegistry::GetInstance()->SupportKernel(desc)) { - MS_LOG(INFO) << "unsupported op_type: " << PrimitiveCurVersionTypeName(op_type) - << ", data_type: " << desc.data_type; + MS_LOG(INFO) << "Unsupported op_type index: " << op_type << ", op_type: " << PrimitiveCurVersionTypeName(op_type) + << ", data_type: " << desc.data_type << "."; return RET_NOT_SUPPORT; } kernel::KernelKey cpu_desc = desc; @@ -1041,8 +1064,8 @@ int Scheduler::FindCpuKernel(const std::vector &in_tensors, const std: ret = KernelRegistry::GetInstance()->GetKernelExec(in_tensors, out_tensors, context_, ms_context_, cpu_desc, op_parameter, kernel); if (ret == RET_OK) { - MS_LOG(DEBUG) << "Get TypeId(expect = " << kernel_data_type << ", real = " << cpu_desc.data_type - << ") op success: " << PrimitiveCurVersionTypeName(op_type); + MS_LOG(INFO) << "Get TypeId(expect = " << kernel_data_type << ", real = " << cpu_desc.data_type + << ") op success: " << PrimitiveCurVersionTypeName(op_type); if (is_train_session_) { ret = (*kernel)->Prepare(); RestoreTensorData(&restored_origin_tensors); @@ -1288,8 +1311,9 @@ kernel::KernelExec *Scheduler::FindBackendKernel(const std::vector &in if (status == RET_OK) { return kernel; } else { - MS_LOG(DEBUG) << "Get fp16 op failed, scheduler to cpu: " << PrimitiveCurVersionTypeName(desc.type) << " " - << node->name_; + MS_LOG(INFO) << "Get op failed, scheduler to CPU by node: " << node->name_ << ", prefer_data_type is " + << (prefer_data_type == kNumberTypeFloat16 ? "fp16" : "unknown") << ", desc.type index:" << desc.type + << ", desc.type:" << PrimitiveCurVersionTypeName(desc.type) << "."; if (status == RET_ERROR) { op_parameters_.erase(node->output_indices_.at(0)); auto ret = InferNodeShape(node); diff --git a/mindspore-lite/tools/benchmark/benchmark_base.cc b/mindspore-lite/tools/benchmark/benchmark_base.cc index 333e77ff31063f4e76cc72f142e53131c403dbd3..7edc83670ea6e403438bccd1f3b556dab602fa9d 100644 --- a/mindspore-lite/tools/benchmark/benchmark_base.cc +++ b/mindspore-lite/tools/benchmark/benchmark_base.cc @@ -405,7 +405,7 @@ int BenchmarkBase::CheckThreadNumValid() { int BenchmarkBase::CheckDeviceTypeValid() { if (flags_->device_ != "CPU" && flags_->device_ != "GPU" && flags_->device_ != "NPU" && flags_->device_ != "Ascend" && - flags_->device_ != "DSP") { + flags_->device_ != "HIAI") { MS_LOG(ERROR) << "Device type:" << flags_->device_ << " is not supported."; std::cerr << "Device type:" << flags_->device_ << " is not supported." << std::endl; return RET_ERROR; diff --git a/mindspore-lite/tools/benchmark/benchmark_c_api.cc b/mindspore-lite/tools/benchmark/benchmark_c_api.cc index d66c65f6feab84e983c5658565d123abf71969b9..dfdb7ab52cf42b7a8244d3d0a741a9d12d786f87 100644 --- a/mindspore-lite/tools/benchmark/benchmark_c_api.cc +++ b/mindspore-lite/tools/benchmark/benchmark_c_api.cc @@ -125,6 +125,15 @@ int BenchmarkCApi::InitContext() { MSDeviceInfoSetFrequency(npu_device_info, kFrequencyDefault); MSContextAddDeviceInfo(context_, npu_device_info); } + if (flags_->device_ == "HIAI") { + MSDeviceInfoHandle hiai_device_info = MSDeviceInfoCreate(kMSDeviceTypeHIAI); + if (hiai_device_info == nullptr) { + BENCHMARK_LOG_ERROR("MSDeviceInfoCreate for hiai_device_info failed."); + return RET_ERROR; + } + MSDeviceInfoSetFrequency(hiai_device_info, kFrequencyDefault); + MSContextAddDeviceInfo(context_, hiai_device_info); + } MSDeviceInfoHandle cpu_device_info = MSDeviceInfoCreate(kMSDeviceTypeCPU); MSDeviceInfoSetEnableFP16(cpu_device_info, flags_->enable_fp16_); MSContextAddDeviceInfo(context_, cpu_device_info); diff --git a/mindspore-lite/tools/benchmark/benchmark_unified_api.cc b/mindspore-lite/tools/benchmark/benchmark_unified_api.cc index 722d7f74c38944f40828082104bf4c47436b1efa..c67b548c5d4c83062d8d61abc6db7a5e52c7239c 100644 --- a/mindspore-lite/tools/benchmark/benchmark_unified_api.cc +++ b/mindspore-lite/tools/benchmark/benchmark_unified_api.cc @@ -530,6 +530,18 @@ int BenchmarkUnifiedApi::InitMSContext(const std::shared_ptr InitMSContextForAscend(context, &device_list); } + if (flags_->device_ == "HIAI" || flags_->device_ == "Auto") { + std::shared_ptr hiai_device_info = std::make_shared(); + if (hiai_device_info == nullptr) { + MS_LOG(ERROR) << "Create hiai_device_info failed."; + return RET_ERROR; + } + hiai_device_info->SetEnableFP16(flags_->enable_fp16_); + hiai_device_info->SetFrequency(kFrequencyDefault); + device_list.push_back(hiai_device_info); + } + + if (flags_->device_ == "DSP" || flags_->device_ == "Auto") { std::shared_ptr dsp_device_info = std::make_shared(); device_list.push_back(dsp_device_info); @@ -1323,8 +1335,8 @@ int BenchmarkUnifiedApi::CompileGraph(mindspore::ModelType model_type, const std } memset(dec_key.key, 0, kEncMaxLen); if (ret != kSuccess) { - MS_LOG(ERROR) << "ms_model_.Build failed while running ", model_name.c_str(); - std::cout << "ms_model_.Build failed while running ", model_name.c_str(); + MS_LOG(ERROR) << "ms_model_.Build failed while running, model path:" << model_name.c_str(); + std::cout << "ms_model_.Build failed while running model path:" << model_name.c_str(); return RET_ERROR; } return RET_OK; diff --git a/mindspore-lite/tools/converter/adapter/acl/cxx_api_lite/cxx_api/context.cc b/mindspore-lite/tools/converter/adapter/acl/cxx_api_lite/cxx_api/context.cc index 7ec050099331c90f2b773be719fc2ffeab3b7427..75e8a0fd8dabe756e855fa996ec765ddf39f50de 100644 --- a/mindspore-lite/tools/converter/adapter/acl/cxx_api_lite/cxx_api/context.cc +++ b/mindspore-lite/tools/converter/adapter/acl/cxx_api_lite/cxx_api/context.cc @@ -161,6 +161,16 @@ int KirinNPUDeviceInfo::GetFrequency() const { return GetAnyValueI32(data_->params, kModelOptionKirinNpuFrequency); } +void HIAIDeviceInfo::SetFrequency(int frequency) { + MS_EXCEPTION_IF_NULL(data_); + SetAnyValue(&data_->params[kModelOptionKirinNpuFrequency], frequency); +} + +int HIAIDeviceInfo::GetFrequency() const { + MS_EXCEPTION_IF_NULL(data_); + return GetAnyValueI32(data_->params, kModelOptionKirinNpuFrequency); +} + void GPUDeviceInfo::SetDeviceID(uint32_t device_id) { MS_EXCEPTION_IF_NULL(data_); SetAnyValue(&data_->params[kModelOptionGPUDeviceID], device_id);