diff --git a/.jenkins/check/config/whitelizard.txt b/.jenkins/check/config/whitelizard.txt index b494fdf54878f0b8896325e4be0953a3ce032489..7a2f817a0db6100ca9720893c0817f8523ab223c 100644 --- a/.jenkins/check/config/whitelizard.txt +++ b/.jenkins/check/config/whitelizard.txt @@ -27,6 +27,8 @@ mindspore-lite/mindspore-lite/src/litert/kernel/opencl/kernel/fullconnection.cc: mindspore-lite/mindspore-lite/src/litert/scheduler.cc:mindspore::lite::Scheduler::FindBackendKernel mindspore-lite/mindspore-lite/src/litert/thread_pool.c:GetArch mindspore-lite/mindspore-lite/src/train/train_loop.cc:mindspore::lite::TrainLoop::Train +mindspore-lite/mindspore-lite/src/common/utils.h:mindspore::lite::DataTypeSize +mindspore-lite/mindspore-lite/src/litert/scheduler.cc:mindspore::lite::Scheduler::FindProviderKernel # minddata mindspore-lite/mindspore-lite/minddata/dataset/engine/datasetops/data_queue_op.cc:mindspore::dataset::DataQueueOp::SendDataToAscend diff --git a/include/cxx_api/context.h b/include/cxx_api/context.h index d6b1e723ff98a50c08e4709841f79afd5cf28193..425fc87c66a0f37c1f0f47579b17dab7b10123da 100644 --- a/include/cxx_api/context.h +++ b/include/cxx_api/context.h @@ -36,6 +36,7 @@ enum DeviceType { kAscend, kAscend910, kAscend310, + kDSP, kCustomDevice, kAllDevice, // add new type here @@ -598,5 +599,26 @@ void AscendDeviceInfo::SetBufferOptimizeMode(const std::string &buffer_optimize_ SetBufferOptimizeMode(StringToChar(buffer_optimize_mode)); } std::string AscendDeviceInfo::GetBufferOptimizeMode() const { return CharToString(GetBufferOptimizeModeChar()); } + +/// \brief Derived from DeviceInfoContext, The configuration of the model running on the dsp. This option is +/// invalid for MindSpore Lite. +class MS_API DSPDeviceInfo : public DeviceInfoContext { + public: + /// \brief Get the type of this DeviceInfoContext. + /// + /// \return Type of this DeviceInfoContext. + enum DeviceType GetDeviceType() const override { return DeviceType::kDSP; }; + + /// \brief Set device id. + /// + /// \param[in] device_id The device id. + void SetDeviceID(uint32_t device_id); + + /// \brief Get the device id. + /// + /// \return The device id. + uint32_t GetDeviceID() const; +}; + } // namespace mindspore #endif // MINDSPORE_INCLUDE_API_CONTEXT_H diff --git a/mindspore-lite/CMakeLists.txt b/mindspore-lite/CMakeLists.txt index c5f73f0b956b7f1b554d651af632493d11739a1b..da76df772ac9ca10331f3e10d0328f03c1ef6e76 100644 --- a/mindspore-lite/CMakeLists.txt +++ b/mindspore-lite/CMakeLists.txt @@ -346,9 +346,13 @@ elseif(TOOLCHAIN_NAME STREQUAL "ohos") elseif(TOOLCHAIN_NAME STREQUAL "cortex-a15") if((${MSLITE_REGISTRY_DEVICE} STREQUAL "ft78")) set(TARGET_FT78 on) + add_compile_definitions(SUPPORT_FT78) elseif((${MSLITE_REGISTRY_DEVICE} STREQUAL "ft04")) set(TARGET_FT04 on) + add_compile_definitions(SUPPORT_FT04) endif() + set(MSLITE_ENABLE_DSP ON) + add_compile_definitions(ENABLE_DSP) endif() if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.3.0 diff --git a/mindspore-lite/include/lite_types.h b/mindspore-lite/include/lite_types.h index 017e98a86c87087f48ac2f9e1dbeafe2d9704c49..cd8cc19b36ac66ae553037b87885db731812bc6f 100644 --- a/mindspore-lite/include/lite_types.h +++ b/mindspore-lite/include/lite_types.h @@ -41,6 +41,7 @@ typedef enum { DT_GPU, /**< GPU device type */ DT_NPU, /**< NPU device type */ DT_ASCEND, /**< ASCEND device type */ + DT_DSP, /**< DSP device type */ DT_CUSTOM, /**< EXTEND device type */ DT_END /**< NO device type */ } DeviceType; diff --git a/mindspore-lite/src/CMakeLists.txt b/mindspore-lite/src/CMakeLists.txt index ec45bcf459e0ebeeed2e2b509cc5b49e97f728c6..8336e6e78d982eed55658efca6c10180ac2fc91f 100644 --- a/mindspore-lite/src/CMakeLists.txt +++ b/mindspore-lite/src/CMakeLists.txt @@ -446,6 +446,27 @@ if(MSLITE_GPU_BACKEND STREQUAL opencl) target_link_libraries(mindspore-lite_static opencl_kernel_mid) endif() +if(${MSLITE_REGISTRY_DEVICE} STREQUAL "ft04" OR ${MSLITE_REGISTRY_DEVICE} STREQUAL "ft78") + if(NOT DEFINED ENV{DSP_SDK_PATH}) + message(FATAL_ERROR "DSP SDK path not found. Please set DSP_SDK_PATH environment variable.") + endif() + message(STATUS "DSP SDK path: $ENV{DSP_SDK_PATH}") + set(HTHREAD_LIB_PATH "$ENV{DSP_SDK_PATH}/hthread/lib/${MSLITE_REGISTRY_DEVICE}") + if(NOT EXISTS "${HTHREAD_LIB_PATH}") + message(FATAL_ERROR "HTHREAD library directory not found at: ${HTHREAD_LIB_PATH}") + endif() + find_library(HTHREAD_LIB + NAMES libhthread_host.a hthread_host + PATHS "${HTHREAD_LIB_PATH}" + NO_DEFAULT_PATH + REQUIRED) + add_subdirectory(litert/kernel/dsp) + target_link_libraries(mindspore-lite dsp_kernel_mid) + target_link_libraries(mindspore-lite_static dsp_kernel_mid) + target_link_libraries(mindspore-lite ${HTHREAD_LIB}) + target_link_libraries(mindspore-lite_static ${HTHREAD_LIB}) +endif() + if(SUPPORT_NPU) add_subdirectory(litert/delegate/npu) target_link_libraries(mindspore-lite npu_kernel_mid) diff --git a/mindspore-lite/src/common/context_util.cc b/mindspore-lite/src/common/context_util.cc index bc2e40e1326a94c9a179f6dceb77c9e0339ea264..4a2e89abbae72c8a35459b0f233f7b3e7225d0d5 100644 --- a/mindspore-lite/src/common/context_util.cc +++ b/mindspore-lite/src/common/context_util.cc @@ -108,6 +108,17 @@ std::shared_ptr AscendDeviceInfoFromAscendDeviceCon return ascend_info; } +std::shared_ptr DSPDeviceInfoFromDSPDeviceContext(const lite::DeviceContext &dsp_context) { + if (dsp_context.device_type_ != DT_DSP) { + MS_LOG(ERROR) << "Function input parameter is not dsp context."; + return nullptr; + } + auto dsp_info = std::make_shared(); + MS_CHECK_TRUE_RET(dsp_info != nullptr, nullptr); + PassBasicProperties(dsp_info, dsp_context); + return dsp_info; +} + std::shared_ptr CustomDeviceInfoFromCustomDeviceContext( const lite::DeviceContext &inner_context) { if (inner_context.device_type_ != DT_CUSTOM) { @@ -140,11 +151,10 @@ mindspore::Context *MSContextFromContext(const std::shared_ptr &co } auto &device_infos = ms_context->MutableDeviceInfo(); std::map(const lite::DeviceContext &)>> - transfer_funcs = {{DT_CPU, CPUDeviceInfoFromCPUDeviceContext}, - {DT_GPU, GPUDeviceInfoFromGPUDeviceContext}, - {DT_NPU, NPUDeviceInfoFromNPUDeviceContext}, - {DT_ASCEND, AscendDeviceInfoFromAscendDeviceContext}, - {DT_CUSTOM, CustomDeviceInfoFromCustomDeviceContext}}; + transfer_funcs = { + {DT_CPU, CPUDeviceInfoFromCPUDeviceContext}, {DT_GPU, GPUDeviceInfoFromGPUDeviceContext}, + {DT_NPU, NPUDeviceInfoFromNPUDeviceContext}, {DT_ASCEND, AscendDeviceInfoFromAscendDeviceContext}, + {DT_DSP, DSPDeviceInfoFromDSPDeviceContext}, {DT_CUSTOM, CustomDeviceInfoFromCustomDeviceContext}}; for (auto &device_context : context->device_list_) { auto device_type = device_context.device_type_; if (transfer_funcs.find(device_type) == transfer_funcs.end()) { diff --git a/mindspore-lite/src/common/utils.h b/mindspore-lite/src/common/utils.h index 49dd63482e9057033754bc31a44de5b1dbc88a9f..87b2a11175a843a2afdbe8d3ae06014b6acf4733 100644 --- a/mindspore-lite/src/common/utils.h +++ b/mindspore-lite/src/common/utils.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -342,6 +343,10 @@ inline size_t DataTypeSize(TypeId type) { return 0; case kMetaTypeTypeType: return sizeof(int); + case kNumberTypeComplex64: + return sizeof(std::complex); + case kNumberTypeComplex128: + return sizeof(std::complex); default: MS_LOG(ERROR) << "Not support the type: " << type; return 0; diff --git a/mindspore-lite/src/executor/kernel_exec.h b/mindspore-lite/src/executor/kernel_exec.h index 72a298656fc40bbc174d41d0b20a5fe0b4be511c..c009d63228590132687032f968216ca2afd8c9e3 100644 --- a/mindspore-lite/src/executor/kernel_exec.h +++ b/mindspore-lite/src/executor/kernel_exec.h @@ -49,7 +49,18 @@ using mindspore::infer::Abstractkernel; using mindspore::lite::KernelCallBack; namespace mindspore::kernel { -enum KERNEL_ARCH { kCPU, kGPU, kAPU, kNPU, kCustom, kDelegate, kACL, kKernelArch_MIN = kCPU, kKernelArch_MAX = kAPU }; +enum KERNEL_ARCH { + kCPU, + kGPU, + kDSP, + kAPU, + kNPU, + kCustom, + kDelegate, + kACL, + kKernelArch_MIN = kCPU, + kKernelArch_MAX = kAPU +}; static const char *const kBuiltin = "Builtin"; struct KernelKey { @@ -78,7 +89,8 @@ enum SubGraphType { kEntranceSubGraph, kExitSubGraph, kStackSubGraph, - kAclSubGraph + kAclSubGraph, + kDspSubGraph }; class KernelExec { diff --git a/mindspore-lite/src/litert/cxx_api/context.cc b/mindspore-lite/src/litert/cxx_api/context.cc index 6d844982c5bc677e466b892837806d3646b803bf..6f6cc7f894d29b969417bb8658fbb3526f4fea5e 100644 --- a/mindspore-lite/src/litert/cxx_api/context.cc +++ b/mindspore-lite/src/litert/cxx_api/context.cc @@ -683,4 +683,20 @@ std::vector AscendDeviceInfo::GetBufferOptimizeModeChar() const { const std::string &ref = GetValue(data_, kModelOptionAscendBufferOptimize); return StringToChar(ref); } + +void DSPDeviceInfo::SetDeviceID(uint32_t device_id) { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return; + } + data_->params[kModelOptionDeviceID] = device_id; +} + +uint32_t DSPDeviceInfo::GetDeviceID() const { + if (data_ == nullptr) { + MS_LOG(ERROR) << "Invalid context."; + return 0; + } + return GetValue(data_, kModelOptionDeviceID); +} } // namespace mindspore diff --git a/mindspore-lite/src/litert/cxx_api/converters.cc b/mindspore-lite/src/litert/cxx_api/converters.cc index 8daf0242090d51e82e693269de7a7c41bf26add3..01ca00e104141be6e4916875010697066434a043 100644 --- a/mindspore-lite/src/litert/cxx_api/converters.cc +++ b/mindspore-lite/src/litert/cxx_api/converters.cc @@ -78,6 +78,15 @@ Status ContextUtils::AddAscendDevice(lite::InnerContext *inner_context, DeviceIn return kSuccess; } +Status ContextUtils::AddDspDevice(lite::InnerContext *inner_context, DeviceInfoContext *device) { + lite::DeviceInfo device_info; + auto dsp_context = device->Cast(); + device_info.dsp_device_info_ = {dsp_context->GetDeviceID()}; + inner_context->device_list_.push_back( + {lite::DT_DSP, device_info, dsp_context->GetProvider(), dsp_context->GetProviderDevice()}); + return kSuccess; +} + Status ContextUtils::AddCustomDevice(lite::InnerContext *inner_context, const std::shared_ptr &device) { lite::DeviceInfo device_info; @@ -161,6 +170,8 @@ std::shared_ptr ContextUtils::Convert(Context *context) { ret = AddNpuDevice(npu_context->GetEnableFP16(), npu_context->GetFrequency(), inner_context.get()); } else if (device->GetDeviceType() == kAscend) { ret = AddAscendDevice(inner_context.get(), device.get()); + } else if (device->GetDeviceType() == kDSP) { + ret = AddDspDevice(inner_context.get(), device.get()); } else if (device->GetDeviceType() == kCustomDevice) { ret = AddCustomDevice(inner_context.get(), device); } diff --git a/mindspore-lite/src/litert/cxx_api/converters.h b/mindspore-lite/src/litert/cxx_api/converters.h index 68ae65f7d0455d95e8422816bc54e16d3dc25f3b..ccae58a16e00b831b1d6232062905c4e565262d7 100644 --- a/mindspore-lite/src/litert/cxx_api/converters.h +++ b/mindspore-lite/src/litert/cxx_api/converters.h @@ -45,6 +45,7 @@ class MS_API ContextUtils { lite::InnerContext *inner_context); static Status AddNpuDevice(bool enable_fp16, int frequency, lite::InnerContext *inner_context); static Status AddAscendDevice(lite::InnerContext *inner_context, DeviceInfoContext *device); + static Status AddDspDevice(lite::InnerContext *inner_context, DeviceInfoContext *device); static Status AddCustomDevice(lite::InnerContext *inner_context, const std::shared_ptr &device); static bool IsAffinityModeValid(int affinity_mode) { return affinity_mode >= lite::NO_BIND && affinity_mode <= lite::MID_CPU; diff --git a/mindspore-lite/src/litert/inner_context.h b/mindspore-lite/src/litert/inner_context.h index e5f02fb4fc1c28e697b67ff97210ed05feb21745..19f61ad84e955845fe9bae72204f79ec6710227e 100644 --- a/mindspore-lite/src/litert/inner_context.h +++ b/mindspore-lite/src/litert/inner_context.h @@ -66,6 +66,10 @@ typedef struct AscendDeviceInfo { std::string image_size_; } AscendDeviceInfo; +typedef struct DspDeviceInfo { + uint32_t device_id_ = 0; +} DspDeviceInfo; + /// \brief CustomDeviceInfo defined for user defined device configuration information. typedef struct CustomDeviceInfo { std::shared_ptr user_defined_device_info_; @@ -76,6 +80,7 @@ struct DeviceInfo { GpuDeviceInfo gpu_device_info_; NpuDeviceInfo npu_device_info_; AscendDeviceInfo ascend_device_info_; + DspDeviceInfo dsp_device_info_; CustomDeviceInfo custom_device_info_; }; diff --git a/mindspore-lite/src/litert/kernel/dsp/CMakeLists.txt b/mindspore-lite/src/litert/kernel/dsp/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..fa2e9c4e479d40f8d79cf46939c59dd00cff305f --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/CMakeLists.txt @@ -0,0 +1,15 @@ +file(GLOB_RECURSE COMMON_SRC + ${CMAKE_CURRENT_SOURCE_DIR}/dsp_allocator.cc + ${CMAKE_CURRENT_SOURCE_DIR}/dsp_kernel.cc + ${CMAKE_CURRENT_SOURCE_DIR}/dsp_runtime_wrapper.cc + ${CMAKE_CURRENT_SOURCE_DIR}/dsp_runtime.cc + ${CMAKE_CURRENT_SOURCE_DIR}/dsp_subgraph.cc + ) + +file(GLOB_RECURSE DSP_KERNEL_SRC + ${CMAKE_CURRENT_SOURCE_DIR}/${MSLITE_REGISTRY_DEVICE}/*.cc + ) + +add_library(dsp_kernel_mid OBJECT ${DSP_KERNEL_SRC} ${COMMON_SRC}) +add_dependencies(dsp_kernel_mid fbs_src) +target_include_directories(dsp_kernel_mid PRIVATE $ENV{DSP_SDK_PATH}) diff --git a/mindspore-lite/src/litert/kernel/dsp/dsp_allocator.cc b/mindspore-lite/src/litert/kernel/dsp/dsp_allocator.cc new file mode 100644 index 0000000000000000000000000000000000000000..568599337f17415c459bdfa9937f8992899b7850 --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/dsp_allocator.cc @@ -0,0 +1,245 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/litert/kernel/dsp/dsp_allocator.h" +#include +#include "include/errorcode.h" +#include "src/common/log_adapter.h" +#include "src/litert/kernel/dsp/dsp_runtime.h" +#include "hthread/include/hthread_host.h" + +namespace mindspore::lite::dsp { +DSPAllocator::DSPAllocator(DSPRuntime *dsp_runtime) : dsp_runtime_(dsp_runtime) { + device_id_ = dsp_runtime->GetDeviceID(); +} + +DSPAllocator::~DSPAllocator() { Clear(); } + +void DSPAllocator::Lock() { + if (lock_flag_) { + lock.lock(); + } +} + +void DSPAllocator::UnLock() { + if (lock_flag_) { + lock.unlock(); + } +} + +void *DSPAllocator::MinimumFit(MemType mem_type, size_t size) { + auto iter = free_list_.lower_bound(size); + while (iter != free_list_.end() && (iter->second->size_ >= size) && (iter->second->size_ < (size << shift_factor_))) { + auto mem_buf = iter->second; + bool is_match = mem_buf->mem_type_ == mem_type; + if (is_match) { + free_list_.erase(iter); + allocated_list_[mem_buf->host_ptr_] = mem_buf; + mem_buf->ref_count_ = 0; + MS_LOG(DEBUG) << "Find Mem from free list. size: " << mem_buf->size_ + << ", type: " << static_cast(mem_buf->mem_type_); + return mem_buf->host_ptr_; + } + ++iter; + } + return nullptr; +} + +void *DSPAllocator::_Malloc(MemType mem_type, size_t size) { + if (size > dsp_runtime_->GetMaxAllocSize()) { + MS_LOG(ERROR) << "MallocData out of max_size, size: " << size; + return nullptr; + } + Lock(); + void *host_ptr = MinimumFit(mem_type, size); + UNLOCK_AND_RETURN_NULL(host_ptr != nullptr, host_ptr); + + total_size_ += size; + + MemBuf *mem_buf = new (std::nothrow) MemBuf; + if (mem_buf == nullptr) { + UnLock(); + return nullptr; + } + mem_buf->device_ptr_ = 0; + auto ret = HostTlsfMalloc(device_id_, core_id_, static_cast(mem_type), size, &mem_buf->device_ptr_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "HostTlsfMalloc failed, size: " << size << ", type: " << static_cast(mem_type); + delete mem_buf; + UnLock(); + return nullptr; + } + host_ptr = reinterpret_cast(GetViraddr(mem_buf->device_ptr_, size)); + mem_buf->ref_count_ = 0; + mem_buf->size_ = size; + mem_buf->host_ptr_ = host_ptr; + mem_buf->mem_type_ = mem_type; + allocated_list_[host_ptr] = mem_buf; + UnLock(); + + MS_LOG(DEBUG) << "Malloc a new buffer. memory type: " << static_cast(mem_buf->mem_type_) + << ", size: " << std::dec << mem_buf->size_; + return host_ptr; +} + +void DSPAllocator::Free(void *buf) { + if (buf == nullptr) { + return; + } + Lock(); + auto iter = allocated_list_.find(buf); + if (iter != allocated_list_.end()) { + auto mem_buf = iter->second; + mem_buf->ref_count_ = 0; + allocated_list_.erase(iter); + free_list_.insert(std::make_pair(mem_buf->size_, mem_buf)); + UnLock(); + return; + } + UnLock(); + MS_LOG(WARNING) << "Host ptr has freed"; +} + +int DSPAllocator::RefCount(void *buf) { + if (buf == nullptr) { + return -1; + } + Lock(); + auto iter = allocated_list_.find(buf); + if (iter != allocated_list_.end()) { + auto mem_buf = iter->second; + int ref_count = std::atomic_load(&mem_buf->ref_count_); + UnLock(); + return ref_count; + } + UnLock(); + return -1; +} + +int DSPAllocator::SetRefCount(void *buf, int ref_count) { + if (buf == nullptr) { + return -1; + } + Lock(); + auto iter = allocated_list_.find(buf); + if (iter != allocated_list_.end()) { + auto mem_buf = iter->second; + std::atomic_store(&mem_buf->ref_count_, ref_count); + UnLock(); + return ref_count; + } + UnLock(); + return -1; +} + +int DSPAllocator::IncRefCount(void *buf, int ref_count) { + if (buf == nullptr) { + return -1; + } + Lock(); + auto iter = allocated_list_.find(buf); + if (iter != allocated_list_.end()) { + auto membuf = iter->second; + auto ref = std::atomic_fetch_add(&membuf->ref_count_, ref_count); + UnLock(); + return (ref + ref_count); + } + UnLock(); + return -1; +} + +int DSPAllocator::DecRefCount(void *buf, int ref_count) { + if (buf == nullptr) { + return -1; + } + Lock(); + auto iter = allocated_list_.find(buf); + if (iter != allocated_list_.end()) { + auto mem_buf = iter->second; + auto ref = std::atomic_fetch_sub(&mem_buf->ref_count_, ref_count); + UnLock(); + return (ref - ref_count); + } + UnLock(); + return -1; +} + +size_t DSPAllocator::TotalSize() { + Lock(); + size_t total_size = 0; + for (auto it = allocated_list_.begin(); it != allocated_list_.end(); it++) { + total_size += it->second->size_; + } + for (auto it = free_list_.begin(); it != free_list_.end(); it++) { + total_size += it->second->size_; + } + UnLock(); + return total_size; +} + +uint64_t DSPAllocator::GetDeviceMemPtr(void *buffer) { + auto it = allocated_list_.find(buffer); + if (it != allocated_list_.end()) { + return it->second->device_ptr_; + } + MS_LOG(ERROR) << "Can not found device ptr!"; + return 0; +} + +template +void DSPAllocator::ClearMemList(T *list) { + for (auto it = list->begin(); it != list->end(); it++) { + if (it->second->host_ptr_ != nullptr) { + MS_LOG(DEBUG) << "ReleaseViraddr host ptr."; + ReleaseViraddr(reinterpret_cast(it->second->host_ptr_), it->second->device_ptr_, it->second->size_); + it->second->host_ptr_ = nullptr; + } + if (it->second->device_ptr_ != 0) { + MS_LOG(DEBUG) << "HostTlsfFree device ptr."; + HostTlsfFree(device_id_, core_id_, static_cast(it->second->mem_type_), &it->second->device_ptr_); + } + delete it->second; + } + list->clear(); +} + +void DSPAllocator::Clear() { + Lock(); + ClearMemList>(&allocated_list_); + ClearMemList>(&free_list_); + UnLock(); +} + +MemType DSPAllocator::GetMemType(void *host_ptr) { + MemType mem_type{MemType::DDR}; + Lock(); + auto it = allocated_list_.find(host_ptr); + if (it == allocated_list_.end()) { + UnLock(); + MS_LOG(ERROR) << "Can not found buffer!"; + return mem_type; + } + MemBuf *mem_buf = it->second; + if (mem_buf == nullptr) { + UnLock(); + MS_LOG(ERROR) << "MemBuf is nullptr for host_ptr!"; + return mem_type; + } + mem_type = mem_buf->mem_type_; + UnLock(); + return mem_type; +} +} // namespace mindspore::lite::dsp diff --git a/mindspore-lite/src/litert/kernel/dsp/dsp_allocator.h b/mindspore-lite/src/litert/kernel/dsp/dsp_allocator.h new file mode 100644 index 0000000000000000000000000000000000000000..3cb2bae2b79f7c67a7729a8fdeb429dc67f19821 --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/dsp_allocator.h @@ -0,0 +1,92 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_ALLOCATOR_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_ALLOCATOR_H_ + +#include +#include +#include +#include +#include +#include +#include +#include "src/litert/inner_allocator.h" + +namespace mindspore::lite::dsp { +enum class MemType : char { SMC, DDR, DDR36BIT, L2 }; +#define UNLOCK_AND_RETURN_NULL(condition, ptr) \ + do { \ + if (condition) { \ + UnLock(); \ + return (ptr); \ + } \ + } while (0) + +class DSPRuntime; +class DSPAllocator : public mindspore::Allocator { + public: + explicit DSPAllocator(DSPRuntime *dsp_runtime); + ~DSPAllocator() override; + + using Allocator::Malloc; + void *Malloc(size_t size, MemType type) { return _Malloc(type, size); } + void *Malloc(size_t size) override { return _Malloc(MemType::DDR, size); } + + void Free(void *ptr) override; + int RefCount(void *ptr) override; + int SetRefCount(void *ptr, int ref_count) override; + int DecRefCount(void *ptr, int ref_count) override; + int IncRefCount(void *ptr, int ref_count) override; + size_t TotalSize(); + + void Clear(); + MemType GetMemType(void *host_ptr); + uint64_t GetDeviceMemPtr(void *buffer); + void *Prepare(void *ptr) override { return ptr; } + + private: + void Lock(); + void UnLock(); + void *MinimumFit(MemType mem_type, size_t size); + void *_Malloc(MemType mem_type, size_t size = 0); + template + void ClearMemList(T *list); + + private: + DSPRuntime *dsp_runtime_{nullptr}; + int32_t device_id_{0}; + int32_t core_id_{0}; + std::mutex lock; + struct MemBuf { + std::atomic_int ref_count_ = 0; + size_t size_{0}; + uint64_t device_ptr_{0}; + void *host_ptr_{nullptr}; + MemType mem_type_{MemType::DDR}; + }; + + // buf, membuf> + std::unordered_map allocated_list_; + std::multimap free_list_; + uint64_t total_size_{0}; + // 6 is empirical value + int shift_factor_ = 6; + bool lock_flag_ = true; +}; +} // namespace mindspore::lite::dsp + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_ALLOCATOR_H_ diff --git a/mindspore-lite/src/litert/kernel/dsp/dsp_kernel.cc b/mindspore-lite/src/litert/kernel/dsp/dsp_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..d8894645ef066f1c04ac88540534b4a6f4f1c2b8 --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/dsp_kernel.cc @@ -0,0 +1,96 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/litert/infer_manager.h" +#include "src/litert/kernel/dsp/dsp_kernel.h" +#include "src/litert/weight_decoder.h" +#include "src/common/file_utils.h" + +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; + +namespace mindspore::kernel { +bool DSPKernel::MallocDataDone() { + for (auto &out_tensor : out_tensors_) { + if (out_tensor->data() == nullptr) { + return false; + } + auto allocator = out_tensor->allocator(); + if (allocator == nullptr) { + return false; + } + auto buffer = + reinterpret_cast(allocator.get())->GetDeviceMemPtr(out_tensor->data()); + if (buffer == 0) { + return false; + } + } + return true; +} + +int DSPKernel::PreProcess() { + if (MallocDataDone()) { + return RET_OK; + } + int ret = ReSize(); + if (ret != RET_OK) { + return ret; + } + for (size_t i = 0; i < out_tensors_.size(); ++i) { + auto *output = out_tensors_.at(i); + CHECK_NULL_RETURN(output); + CHECK_NULL_RETURN(output->allocator()); + ret = output->MallocData(); + MS_LOG(DEBUG) << "MallocData for output " << i << ", data: " << output->data(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "MallocData failed"; + return ret; + } + output->ResetRefCount(); + } + return RET_OK; +} + +int DSPKernel::InferShape() { + if (InferShapeDone()) { + return RET_OK; + } + auto ret = lite::KernelInferShape(in_tensors_, out_tensors_, op_parameter_); + if (ret != RET_OK) { + MS_LOG(WARNING) << "InferShape failed, type: " + << schema::EnumNamePrimitiveType(static_cast(type())); + return ret; + } + return RET_OK; +} + +int DSPKernel::ReSize() { + if (InferShapeDone()) { + return RET_OK; + } + auto ret = InferShape(); + if (ret != RET_OK) { + return ret; + } + + ret = Prepare(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "ReSize failed for kernel prepare!"; + return ret; + } + return RET_OK; +} +} // namespace mindspore::kernel diff --git a/mindspore-lite/src/litert/kernel/dsp/dsp_kernel.h b/mindspore-lite/src/litert/kernel/dsp/dsp_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..08d907877d9393b09fba3fa366652c4ace244baa --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/dsp_kernel.h @@ -0,0 +1,109 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_KERNEL_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_KERNEL_H_ + +#include +#include +#include +#include +#include +#include +#include "src/litert/lite_kernel.h" +#include "src/executor/kernel_exec.h" +#include "include/errorcode.h" +#include "src/litert/kernel/dsp/dsp_runtime.h" +#include "src/litert/kernel/dsp/dsp_allocator.h" +#include "src/litert/tensor_category.h" +#include "nnacl_c/resize_parameter.h" + +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; + +namespace mindspore::kernel { +constexpr int INPUT_TENSOR_SIZE_1 = 1; +constexpr int INPUT_TENSOR_SIZE_2 = 2; +constexpr int INPUT_TENSOR_SIZE_3 = 3; +constexpr int INPUT_TENSOR_SIZE_4 = 4; +constexpr int INPUT_TENSOR_SIZE_5 = 5; +constexpr int INPUT_TENSOR_SIZE_6 = 6; +constexpr int INPUT_TENSOR_SIZE_16 = 16; +constexpr int OUTPUT_TENSOR_SIZE_1 = 1; +constexpr int OUTPUT_TENSOR_SIZE_2 = 2; +constexpr int OUTPUT_TENSOR_SIZE_3 = 3; +constexpr int OUTPUT_TENSOR_SIZE_4 = 4; + +class DSPKernel : public LiteKernel { + public: + DSPKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::InnerContext *ctx) + : LiteKernel(parameter, inputs, outputs, ctx) { + dsp_runtime_ = dsp_runtime_wrapper_.GetInstance(); + } + ~DSPKernel() override = default; + + int Prepare() override { return RET_OK; } + int PreProcess() override; + int ReSize() override; + int Run() override { return RET_ERROR; } + + bool MallocDataDone(); + virtual int CheckSpecs() { return RET_OK; } + lite::dsp::MemType GetMemType() { return out_mem_type_; } + void SetMemType(lite::dsp::MemType mem_type) { out_mem_type_ = mem_type; } + void SetKernelArg(const std::vector &kernel_args = {}) { kernel_args_ = kernel_args; } + int InferShape() override; + + protected: + lite::dsp::DSPRuntime *dsp_runtime_; + std::vector kernel_args_; + lite::dsp::MemType out_mem_type_{lite::dsp::MemType::DDR}; + + private: + lite::dsp::DSPRuntimeInnerWrapper dsp_runtime_wrapper_; +}; + +template +kernel::LiteKernel *DSPKernelCreator(const std::vector &inputs, + const std::vector &outputs, OpParameter *opParameter, + const lite::InnerContext *ctx, const kernel::KernelKey &desc) { + auto *kernel = new (std::nothrow) T(reinterpret_cast(opParameter), inputs, outputs, ctx); + if (kernel == nullptr) { + MS_LOG(WARNING) << "kernel " << opParameter->name_ << "is nullptr."; + return nullptr; + } + auto shape = outputs.front()->shape(); + if (std::find(shape.begin(), shape.end(), -1) != shape.end()) { + MS_LOG(WARNING) << "kernel " << opParameter->name_ << "don't infer shape yet!"; + return kernel; + } + if (std::find(shape.begin(), shape.end(), 0) != shape.end()) { + MS_LOG(WARNING) << "kernel " << opParameter->name_ << "don't support output shape has zero."; + delete kernel; + return nullptr; + } + auto ret = kernel->CheckSpecs(); + if (ret != mindspore::lite::RET_OK) { + MS_LOG(WARNING) << "Check " << opParameter->name_ << " specification failed!"; + delete kernel; + return nullptr; + } + return kernel; +} +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_KERNEL_H_ diff --git a/mindspore-lite/src/litert/kernel/dsp/dsp_runtime.cc b/mindspore-lite/src/litert/kernel/dsp/dsp_runtime.cc new file mode 100644 index 0000000000000000000000000000000000000000..8221a6c8522205ff1b2b559138d5c402c8d6906e --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/dsp_runtime.cc @@ -0,0 +1,157 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include "include/errorcode.h" +#include "src/common/file_utils.h" +#include "src/common/log_adapter.h" +#include "src/litert/kernel/dsp/dsp_runtime.h" +#include "src/litert/kernel/dsp/dsp_allocator.h" +#include "hthread/include/hthread_host.h" + +namespace mindspore::lite::dsp { +static std::mutex g_mtx; +static std::mutex g_init_mtx; + +InitState DSPRuntime::init_state_ = UnInit; +DSPRuntime *DSPRuntime::dsp_runtime_instance_ = nullptr; +size_t DSPRuntime::instance_count_ = 0; + +DSPRuntime *DSPRuntime::GetInstance() { + std::unique_lock lck(g_mtx); + static DSPRuntime dsp_runtime; + if (instance_count_ == 0) { + dsp_runtime_instance_ = &dsp_runtime; + } + instance_count_++; + return dsp_runtime_instance_; +} + +void DSPRuntime::DeleteInstance() { + std::unique_lock lck(g_mtx); + if (instance_count_ == 0) { + MS_LOG(ERROR) << "No DSPRuntime instance could delete!"; + return; + } + instance_count_--; + if (instance_count_ == 0) { + dsp_runtime_instance_->Uninit(); + } +} + +// Init will get devices info, load dsp ops library. +int DSPRuntime::Init() { + std::unique_lock lck(g_init_mtx); + if (init_state_ == InitSuccess) { + return RET_OK; + } else if (init_state_ == InitFailed) { + return RET_ERROR; + } + init_state_ = InitFailed; + + if (IsPrintDebug()) { + MT_INFO_LOG = 1; + } + GetHthreadVersion(); + auto device_status = DeviceOpen(device_id_); + if (device_status < 0) { + MS_LOG(ERROR) << "Open DSP Device failed!"; + return RET_ERROR; + } + std::string library_path = "/usr/lib/dsp_lib.dat"; + std::ifstream ifs(library_path); + if (!ifs.good()) { + MS_LOG(ERROR) << "DSP Lib: " << library_path << " is not exist."; + return RET_ERROR; + } + if (ImportLib(library_path.data()) != RET_OK) { + MS_LOG(ERROR) << "Load DSP OPS Library failed!"; + return RET_ERROR; + } + + allocator_ = std::make_shared(this); + if (allocator_ == nullptr) { + MS_LOG(ERROR) << "DSP allocator failed!"; + return RET_ERROR; + } + init_state_ = InitSuccess; + MS_LOG(INFO) << "DSPRuntime init done!"; + return RET_OK; +} + +int DSPRuntime::Uninit() { + std::unique_lock lck(g_init_mtx); + if (init_state_ != InitSuccess) { + return RET_OK; + } + auto ret = DeviceClose(device_id_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Close DSP Device failed!"; + return RET_ERROR; + } + allocator_ = nullptr; + init_state_ = UnInit; + MS_LOG(INFO) << "DSPRuntime uninit done!"; + return RET_OK; +} + +DSPRuntime::~DSPRuntime() { Uninit(); } + +int DSPRuntime::RunKernel(const std::string &kernel_name, const std::vector &kernel_args, + const int core_mask) { + int ret = -1; + int thread_id = -1; + ret = LaunchGroup(device_id_, core_mask, &thread_id, const_cast(kernel_name.c_str()), kernel_args.size(), + const_cast(kernel_args.data())); + if (ret != RET_OK) { + MS_LOG(ERROR) << "LaunchGroup failed! kernel name: " << kernel_name; + return ret; + } + ret = WaitGroup(thread_id); + if (ret != RET_OK) { + MS_LOG(ERROR) << "WaitGroup failed! kernel name: " << kernel_name; + return ret; + } + ret = DestroyGroup(thread_id); + if (ret != RET_OK) { + MS_LOG(ERROR) << "DestroyGroup failed! kernel name: " << kernel_name; + return ret; + } + return ret; +} + +uint64_t DSPRuntime::GetMaxAllocSize() { return GetSysMemorySize(); } + +int DSPRuntime::CopyDeviceMemToHost(void *dst, const void *src, size_t size) const { + auto ret = HostMemCopy(dst, reinterpret_cast(src), size, 1); + if (ret != RET_OK) { + MS_LOG(ERROR) << "CopyDeviceMemToHost failed!"; + } + return ret; +} + +int DSPRuntime::CopyHostMemToDevice(void *dst, const void *src, size_t size) const { + auto ret = HostMemCopy(const_cast(src), reinterpret_cast(dst), size, 0); + if (ret != RET_OK) { + MS_LOG(ERROR) << "CopyHostMemToDevice failed!"; + } + return ret; +} +} // namespace mindspore::lite::dsp diff --git a/mindspore-lite/src/litert/kernel/dsp/dsp_runtime.h b/mindspore-lite/src/litert/kernel/dsp/dsp_runtime.h new file mode 100644 index 0000000000000000000000000000000000000000..db14c2cb20deb2afe6c9fcad58d6879a91359e7b --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/dsp_runtime.h @@ -0,0 +1,82 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_RUNTIME_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_RUNTIME_H_ + +#include +#include +#include +#include +#include +#include +#include +#include "ir/dtype/type_id.h" +#include "src/common/log_adapter.h" +#include "src/litert/kernel/dsp/dsp_runtime_wrapper.h" +#include "src/litert/kernel/dsp/dsp_allocator.h" + +namespace mindspore::lite::dsp { +enum InitState { UnInit = 0, InitSuccess = 1, InitFailed = 2 }; + +class DSPRuntimeInnerWrapper; +class DSPRuntimeWrapper; +class DSPRuntime { + public: + friend DSPRuntimeInnerWrapper; + friend DSPRuntimeWrapper; + ~DSPRuntime(); + DSPRuntime(const DSPRuntime &) = delete; + DSPRuntime &operator=(const DSPRuntime &) = delete; + + int Init(); + int Uninit(); + + std::shared_ptr GetAllocator() { return allocator_; } + uint64_t GetMaxAllocSize(); + int32_t GetDeviceID() { return device_id_; } + + int RunKernel(const std::string &kernel_name, const std::vector &kernel_args, const int core_mask); + + int CopyDeviceMemToHost(void *dst, const void *src, size_t size) const; + int CopyHostMemToDevice(void *dst, const void *src, size_t size) const; + + private: + static DSPRuntime *GetInstance(); + static void DeleteInstance(); + DSPRuntime() = default; + + private: + static InitState init_state_; + static size_t instance_count_; + static DSPRuntime *dsp_runtime_instance_; + int32_t device_id_{0}; + std::shared_ptr allocator_{nullptr}; +}; + +class DSPRuntimeInnerWrapper { + public: + DSPRuntimeInnerWrapper() { dsp_runtime_ = DSPRuntime::GetInstance(); } + ~DSPRuntimeInnerWrapper() { DSPRuntime::DeleteInstance(); } + DSPRuntimeInnerWrapper(const DSPRuntimeInnerWrapper &) = delete; + DSPRuntimeInnerWrapper &operator=(const DSPRuntimeInnerWrapper &) = delete; + DSPRuntime *GetInstance() { return dsp_runtime_; } + + private: + DSPRuntime *dsp_runtime_{nullptr}; +}; +} // namespace mindspore::lite::dsp +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_RUNTIME_H_ diff --git a/mindspore-lite/src/litert/kernel/dsp/dsp_runtime_wrapper.cc b/mindspore-lite/src/litert/kernel/dsp/dsp_runtime_wrapper.cc new file mode 100644 index 0000000000000000000000000000000000000000..ab0997a27d12b7d5e15fc6721e3c78afac545cf6 --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/dsp_runtime_wrapper.cc @@ -0,0 +1,33 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/litert/kernel/dsp/dsp_runtime_wrapper.h" +#include +#include +#include +#include +#include "include/errorcode.h" +#include "src/common/file_utils.h" +#include "src/litert/kernel/dsp/dsp_allocator.h" +#include "src/litert/kernel/dsp/dsp_runtime.h" + +namespace mindspore::registry::dsp { +std::shared_ptr DSPRuntimeWrapper::GetAllocator() { + lite::dsp::DSPRuntimeInnerWrapper dsp_runtime_wrapper; + lite::dsp::DSPRuntime *dsp_runtime = dsp_runtime_wrapper.GetInstance(); + return dsp_runtime->GetAllocator(); +} +} // namespace mindspore::registry::dsp diff --git a/mindspore-lite/src/litert/kernel/dsp/dsp_runtime_wrapper.h b/mindspore-lite/src/litert/kernel/dsp/dsp_runtime_wrapper.h new file mode 100644 index 0000000000000000000000000000000000000000..fb5bc80c2f5b7229c1da214b0d0aafd91b399be8 --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/dsp_runtime_wrapper.h @@ -0,0 +1,40 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_RUNTIME_WRAPPER_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_RUNTIME_WRAPPER_H_ + +#include +#include +#include +#include +#include +#include +#include +#include "include/api/allocator.h" +#include "include/api/status.h" +#include "include/api/dual_abi_helper.h" + +namespace mindspore::registry::dsp { +class MS_API DSPRuntimeWrapper { + public: + DSPRuntimeWrapper() = default; + ~DSPRuntimeWrapper() = default; + + std::shared_ptr GetAllocator(); +}; +} // namespace mindspore::registry::dsp +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_RUNTIME_WRAPPER_H_ diff --git a/mindspore-lite/src/litert/kernel/dsp/dsp_subgraph.cc b/mindspore-lite/src/litert/kernel/dsp/dsp_subgraph.cc new file mode 100644 index 0000000000000000000000000000000000000000..e5dbc807cff4fbb0341734689f22405146c81385 --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/dsp_subgraph.cc @@ -0,0 +1,131 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/litert/kernel/dsp/dsp_subgraph.h" +#include +#include +#include +#include +#include +#include +#include "include/errorcode.h" +#include "src/common/utils.h" + +namespace mindspore::kernel { +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::lite::dsp::MemType; + +DspSubGraph::~DspSubGraph() { UnInit(); } + +void DspSubGraph::GetInOutNodes() { + this->in_nodes_.clear(); + this->out_nodes_.clear(); + auto in_tensors = this->in_tensors(); + auto out_tensors = this->out_tensors(); + for (auto *node : nodes_) { + for (auto *tensor : node->in_tensors()) { + if (std::find(in_tensors.begin(), in_tensors.end(), tensor) != in_tensors.end()) { + in_nodes_.emplace_back(node); + break; + } + } + for (auto *tensor : node->out_tensors()) { + if (std::find(out_tensors.begin(), out_tensors.end(), tensor) != out_tensors.end()) { + out_nodes_.emplace_back(node); + break; + } + } + } +} + +int DspSubGraph::Prepare() { + for (const auto tensor : in_tensors()) { + MS_ASSERT(tensor); + tensor->set_allocator(allocator_); + } + for (const auto tensor : out_tensors()) { + MS_ASSERT(tensor); + tensor->set_allocator(allocator_); + } + for (auto node : this->nodes_) { + if (node == nullptr) { + MS_LOG(ERROR) << "node in Subgraph is nullptr"; + return mindspore::lite::RET_NULL_PTR; + } + for (const auto tensor : node->out_tensors()) { + CHECK_NULL_RETURN(tensor); + MS_CHECK_TRUE_RET(tensor->data() == nullptr, RET_ERROR); + tensor->set_allocator(allocator_); + } + } + return RET_OK; +} + +void DspSubGraph::UnInit() { + for (const auto &op : nodes_) { + delete op; + } + nodes_.clear(); + delete this->executor_; +} + +int DspSubGraph::ReSize() { + for (auto kernel : nodes_) { + if (kernel == nullptr) { + MS_LOG(ERROR) << "input kernel is nullptr!"; + return RET_ERROR; + } + if (kernel->subgraph_type() != kernel::kNotSubGraph) { + MS_LOG(ERROR) << "all nodes in should be kernel"; + return RET_ERROR; + } + std::vector outputs = kernel->out_tensors(); + for (auto &output : outputs) { + output->FreeData(); + output->set_shape({-1}); + } + } + for (auto kernel : nodes_) { + auto ret = kernel->ReSize(); + if (ret != RET_OK) { + MS_LOG(WARNING) << "ReSize " << kernel->name() << "failed!, ret:" << ret; + return ret; + } + } + return RET_OK; +} + +int DspSubGraph::Execute(const KernelCallBack &before, const KernelCallBack &after) { + MS_ASSERT(allocator_ != nullptr); + for (auto &tensor : in_tensors()) { + MS_ASSERT(tensor); + if (tensor->data() == nullptr) { + MS_LOG(ERROR) << "Dsp subgraph input tensor data is null"; + return RET_ERROR; + } + } + for (auto *kernel : nodes_) { + MS_ASSERT(kernel != nullptr); + auto ret = kernel->Execute(before, after); + if (ret != RET_OK) { + MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name(); + return ret; + } + } + return RET_OK; +} +} // namespace mindspore::kernel diff --git a/mindspore-lite/src/litert/kernel/dsp/dsp_subgraph.h b/mindspore-lite/src/litert/kernel/dsp/dsp_subgraph.h new file mode 100644 index 0000000000000000000000000000000000000000..7935c0f7dcf45ecc13b60447f97f665dd82ebc39 --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/dsp_subgraph.h @@ -0,0 +1,62 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_SUBGRAPH_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_SUBGRAPH_H_ + +#include +#include +#include +#include "src/litert/kernel/dsp/dsp_kernel.h" +#include "src/executor/sub_graph_kernel.h" + +namespace mindspore::kernel { +class DspSubGraph : public SubGraphKernel { + public: + DspSubGraph(const std::vector &inKernels, const std::vector &outKernels, + const std::vector &nodes, MSKernel *kernel) + : SubGraphKernel(inKernels, outKernels, nodes, kernel) { + dsp_runtime_ = dsp_runtime_wrapper_.GetInstance(); + allocator_ = dsp_runtime_->GetAllocator(); + subgraph_type_ = kDspSubGraph; + if (nodes.front()->desc().data_type == kNumberTypeFloat16) { + desc_.data_type = kNumberTypeFloat16; + } else { + desc_.data_type = kNumberTypeFloat32; + } + desc_.arch = kernel::KERNEL_ARCH::kDSP; + static std::atomic_int index = 0; + this->set_name("DspSubGraph" + std::to_string(index++)); + } + ~DspSubGraph() override; + + int Prepare() override; + int ReSize() override; + int Execute() override { return Execute(nullptr, nullptr); } + int Execute(const KernelCallBack &before, const KernelCallBack &after) override; + + private: + void UnInit(); + void GetInOutNodes(); + + private: + std::shared_ptr allocator_{nullptr}; + lite::dsp::DSPRuntimeInnerWrapper dsp_runtime_wrapper_; + lite::dsp::DSPRuntime *dsp_runtime_{nullptr}; +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_SUBGRAPH_H_ diff --git a/mindspore-lite/src/litert/kernel/dsp/ft04/add.cc b/mindspore-lite/src/litert/kernel/dsp/ft04/add.cc new file mode 100644 index 0000000000000000000000000000000000000000..c32870d0aa1804b58b0c11e9e001781975b6c7df --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/ft04/add.cc @@ -0,0 +1,112 @@ + +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "src/litert/kernel_registry.h" +#include "src/litert/kernel/dsp/ft04/add.h" + +using mindspore::kernel::KERNEL_ARCH::kDSP; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_AddFusion; + +namespace mindspore::kernel { +int AddDSPKernel::CheckSpecs() { + if (in_tensors_.size() != INPUT_TENSOR_SIZE_2) { + MS_LOG(WARNING) << "in size: " << in_tensors_.size(); + return RET_ERROR; + } + + if (in_tensors_.front()->shape() != in_tensors_.back()->shape()) { + MS_LOG(WARNING) << "input shape must be equal"; + return RET_ERROR; + } + return RET_OK; +} + +int AddDSPKernel::Prepare() { return RET_OK; } + +int AddDSPKernel::AddRunFp32() { + kernel_name_ = "fp_add_s"; + core_mask_ = 0xf; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int AddDSPKernel::AddRunFp16() { + kernel_name_ = "hp_add_s"; + core_mask_ = 0xf; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int AddDSPKernel::AddRunInt16() { + kernel_name_ = "i16_add_s"; + core_mask_ = 0xf; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int AddDSPKernel::AddRunInt32() { + kernel_name_ = "i32_add_s"; + core_mask_ = 0xf; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int AddDSPKernel::AddRunComplex64() { + kernel_name_ = "c64_add_s"; + core_mask_ = 0xf; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int AddDSPKernel::Run() { + int ret = -1; + MS_LOG(DEBUG) << this->name() << " Running! "; + uint64_t length = in_tensors_[0]->ElementsNum(); + auto allocator = dsp_runtime_->GetAllocator(); + uint64_t x_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[0]->data()); + uint64_t y_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[1]->data()); + uint64_t out_device_ptr = allocator->GetDeviceMemPtr(out_tensors_[0]->data()); + SetKernelArg({x_device_ptr, y_device_ptr, out_device_ptr, length}); + auto data_type = in_tensors_[0]->data_type(); + if (data_type == kNumberTypeFloat32) { + ret = AddRunFp32(); + } else if (data_type == kNumberTypeFloat16) { + ret = AddRunFp16(); + } else if (data_type == kNumberTypeInt16) { + ret = AddRunInt16(); + } else if (data_type == kNumberTypeInt32) { + ret = AddRunInt32(); + } else if (data_type == kNumberTypeComplex64) { + ret = AddRunComplex64(); + } else { + MS_LOG(ERROR) << "unsupported data type: " << static_cast(data_type); + } + if (ret != RET_OK) { + MS_LOG(ERROR) << this->name() << " Run failed! "; + return ret; + } + MS_LOG(DEBUG) << this->name() << " Run success! "; + return RET_OK; +} + +REG_KERNEL(kDSP, kNumberTypeFloat32, PrimitiveType_AddFusion, DSPKernelCreator) +REG_KERNEL(kDSP, kNumberTypeInt16, PrimitiveType_AddFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeInt32, PrimitiveType_AddFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeComplex64, PrimitiveType_AddFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeFloat16, PrimitiveType_AddFusion, DSPKernelCreator); +} // namespace mindspore::kernel diff --git a/mindspore-lite/src/litert/kernel/dsp/ft04/add.h b/mindspore-lite/src/litert/kernel/dsp/ft04/add.h new file mode 100644 index 0000000000000000000000000000000000000000..7005afd572c7fed443c356082cb68aec56779959 --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/ft04/add.h @@ -0,0 +1,45 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_ADD_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_ADD_H_ + +#include +#include +#include "src/litert/kernel/dsp/dsp_kernel.h" + +namespace mindspore::kernel { +class AddDSPKernel : public DSPKernel { + public: + using DSPKernel::DSPKernel; + + ~AddDSPKernel() override = default; + int Prepare() override; + int CheckSpecs() override; + int Run() override; + + int AddRunFp32(); + int AddRunFp16(); + int AddRunInt16(); + int AddRunInt32(); + int AddRunComplex64(); + + private: + std::string kernel_name_; + uint64_t core_mask_; +}; +} // namespace mindspore::kernel +#endif diff --git a/mindspore-lite/src/litert/kernel/dsp/ft04/div.cc b/mindspore-lite/src/litert/kernel/dsp/ft04/div.cc new file mode 100644 index 0000000000000000000000000000000000000000..1e4bccfe326542d4b549932325eabe087c3b99f4 --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/ft04/div.cc @@ -0,0 +1,112 @@ + +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "src/litert/kernel_registry.h" +#include "src/litert/kernel/dsp/ft04/div.h" + +using mindspore::kernel::KERNEL_ARCH::kDSP; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_DivFusion; + +namespace mindspore::kernel { +int DivDSPKernel::CheckSpecs() { + if (in_tensors_.size() != INPUT_TENSOR_SIZE_2) { + MS_LOG(WARNING) << "in size: " << in_tensors_.size(); + return RET_ERROR; + } + + if (in_tensors_.front()->shape() != in_tensors_.back()->shape()) { + MS_LOG(WARNING) << "input shape must be equal"; + return RET_ERROR; + } + return RET_OK; +} + +int DivDSPKernel::Prepare() { return RET_OK; } + +int DivDSPKernel::DivRunFp32() { + kernel_name_ = "fp_div_s"; + core_mask_ = 0xf; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int DivDSPKernel::DivRunFp16() { + kernel_name_ = "hp_div_s"; + core_mask_ = 0xf; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int DivDSPKernel::DivRunInt16() { + kernel_name_ = "i16_div_s"; + core_mask_ = 0xf; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int DivDSPKernel::DivRunInt32() { + kernel_name_ = "i32_div_s"; + core_mask_ = 0xf; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int DivDSPKernel::DivRunComplex64() { + kernel_name_ = "c64_div_s"; + core_mask_ = 0xf; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int DivDSPKernel::Run() { + int ret = -1; + MS_LOG(DEBUG) << this->name() << " Running! "; + uint64_t length = in_tensors_[0]->ElementsNum(); + auto allocator = dsp_runtime_->GetAllocator(); + uint64_t x_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[0]->data()); + uint64_t y_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[1]->data()); + uint64_t out_device_ptr = allocator->GetDeviceMemPtr(out_tensors_[0]->data()); + SetKernelArg({x_device_ptr, y_device_ptr, out_device_ptr, length}); + auto data_type = in_tensors_[0]->data_type(); + if (data_type == kNumberTypeFloat32) { + ret = DivRunFp32(); + } else if (data_type == kNumberTypeFloat16) { + ret = DivRunFp16(); + } else if (data_type == kNumberTypeInt16) { + ret = DivRunInt16(); + } else if (data_type == kNumberTypeInt32) { + ret = DivRunInt32(); + } else if (data_type == kNumberTypeComplex64) { + ret = DivRunComplex64(); + } else { + MS_LOG(ERROR) << "unsupported data type: " << static_cast(data_type); + } + if (ret != RET_OK) { + MS_LOG(ERROR) << this->name() << " Run failed! "; + return ret; + } + MS_LOG(DEBUG) << this->name() << " Run success! "; + return RET_OK; +} + +REG_KERNEL(kDSP, kNumberTypeFloat32, PrimitiveType_DivFusion, DSPKernelCreator) +REG_KERNEL(kDSP, kNumberTypeInt16, PrimitiveType_DivFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeInt32, PrimitiveType_DivFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeComplex64, PrimitiveType_DivFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeFloat16, PrimitiveType_DivFusion, DSPKernelCreator); +} // namespace mindspore::kernel diff --git a/mindspore-lite/src/litert/kernel/dsp/ft04/div.h b/mindspore-lite/src/litert/kernel/dsp/ft04/div.h new file mode 100644 index 0000000000000000000000000000000000000000..1b6d1487d46bda2a8940bf6a9ac5e33dc8f3ff8b --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/ft04/div.h @@ -0,0 +1,45 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_DIV_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_DIV_H_ + +#include +#include +#include "src/litert/kernel/dsp/dsp_kernel.h" + +namespace mindspore::kernel { +class DivDSPKernel : public DSPKernel { + public: + using DSPKernel::DSPKernel; + + ~DivDSPKernel() override = default; + int Prepare() override; + int CheckSpecs() override; + int Run() override; + + int DivRunFp32(); + int DivRunFp16(); + int DivRunInt16(); + int DivRunInt32(); + int DivRunComplex64(); + + private: + std::string kernel_name_; + uint64_t core_mask_; +}; +} // namespace mindspore::kernel +#endif diff --git a/mindspore-lite/src/litert/kernel/dsp/ft04/exp.cc b/mindspore-lite/src/litert/kernel/dsp/ft04/exp.cc new file mode 100644 index 0000000000000000000000000000000000000000..ed72a21a97a622d91cf180adcd116453c1723405 --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/ft04/exp.cc @@ -0,0 +1,126 @@ + +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/litert/kernel/dsp/ft04/exp.h" +#include +#include +#include +#include +#include "src/litert/kernel/cpu/nnacl_c/exp_parameter.h" +#include "src/litert/kernel_registry.h" + +using mindspore::kernel::KERNEL_ARCH::kDSP; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_ExpFusion; + +namespace mindspore::kernel { +int ExpDSPKernel::CheckSpecs() { + if (in_tensors_.size() != INPUT_TENSOR_SIZE_1) { + MS_LOG(WARNING) << "in size: " << in_tensors_.size(); + return RET_ERROR; + } + return RET_OK; +} + +int ExpDSPKernel::Prepare() { + auto exp_param = reinterpret_cast(this->op_parameter_); + scale_ = static_cast(exp_param->scale_); + float log_base = (exp_param->base_ == -1) ? 1 : logf(exp_param->base_); + in_scale_ = exp_param->scale_ * log_base; + if (exp_param->shift_ == 0) { + out_scale_ = 1; + } else { + if (log_base == 1) { + out_scale_ = expf(exp_param->shift_); + } else { + out_scale_ = powf(exp_param->base_, exp_param->shift_); + } + } + return RET_OK; +} + +int ExpDSPKernel::ExpRunFp32() { + kernel_name_ = "fp_exp_s"; + core_mask_ = 0xf; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int ExpDSPKernel::ExpRunFp16() { + kernel_name_ = "hp_exp_s"; + core_mask_ = 0xf; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int ExpDSPKernel::ExpRunInt16() { + kernel_name_ = "i16_exp_s"; + core_mask_ = 0xf; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int ExpDSPKernel::ExpRunInt32() { + kernel_name_ = "i32_exp_s"; + core_mask_ = 0xf; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int ExpDSPKernel::ExpRunComplex64() { + kernel_name_ = "c64_exp_s"; + core_mask_ = 0xf; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int ExpDSPKernel::Run() { + int ret = -1; + MS_LOG(DEBUG) << this->name() << " Running! "; + uint64_t length = in_tensors_[0]->ElementsNum(); + auto allocator = dsp_runtime_->GetAllocator(); + uint64_t x_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[0]->data()); + uint64_t out_device_ptr = allocator->GetDeviceMemPtr(out_tensors_[0]->data()); + uint64_t in_scale_hex = 0, out_scale_hex = 0; + memcpy(&in_scale_hex, &in_scale_, sizeof(float)); + memcpy(&out_scale_hex, &out_scale_, sizeof(float)); + SetKernelArg({x_device_ptr, out_device_ptr, length, in_scale_hex, out_scale_hex, static_cast(scale_)}); + auto data_type = in_tensors_[0]->data_type(); + if (data_type == kNumberTypeFloat32) { + ret = ExpRunFp32(); + } else if (data_type == kNumberTypeFloat16) { + ret = ExpRunFp16(); + } else if (data_type == kNumberTypeInt16) { + ret = ExpRunInt16(); + } else if (data_type == kNumberTypeInt32) { + ret = ExpRunInt32(); + } else if (data_type == kNumberTypeComplex64) { + ret = ExpRunComplex64(); + } else { + MS_LOG(ERROR) << "unsupported data type: " << static_cast(data_type); + } + if (ret != RET_OK) { + MS_LOG(ERROR) << this->name() << " Run failed! "; + return ret; + } + MS_LOG(DEBUG) << this->name() << " Run success! "; + return RET_OK; +} + +REG_KERNEL(kDSP, kNumberTypeFloat32, PrimitiveType_ExpFusion, DSPKernelCreator) +REG_KERNEL(kDSP, kNumberTypeInt16, PrimitiveType_ExpFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeInt32, PrimitiveType_ExpFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeComplex64, PrimitiveType_ExpFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeFloat16, PrimitiveType_ExpFusion, DSPKernelCreator); +} // namespace mindspore::kernel diff --git a/mindspore-lite/src/litert/kernel/dsp/ft04/exp.h b/mindspore-lite/src/litert/kernel/dsp/ft04/exp.h new file mode 100644 index 0000000000000000000000000000000000000000..6cb7e5ec39e91ef6920ae3abcc09e3c578e186ae --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/ft04/exp.h @@ -0,0 +1,48 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_EXP_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_EXP_H_ + +#include +#include +#include "src/litert/kernel/dsp/dsp_kernel.h" + +namespace mindspore::kernel { +class ExpDSPKernel : public DSPKernel { + public: + using DSPKernel::DSPKernel; + + ~ExpDSPKernel() override = default; + int Prepare() override; + int CheckSpecs() override; + int Run() override; + + int ExpRunFp32(); + int ExpRunFp16(); + int ExpRunInt16(); + int ExpRunInt32(); + int ExpRunComplex64(); + + private: + std::string kernel_name_; + uint64_t core_mask_; + float in_scale_; + float out_scale_; + int scale_; +}; +} // namespace mindspore::kernel +#endif diff --git a/mindspore-lite/src/litert/kernel/dsp/ft04/mul.cc b/mindspore-lite/src/litert/kernel/dsp/ft04/mul.cc new file mode 100644 index 0000000000000000000000000000000000000000..c6f754dac6c3ca6a54aa737692459d815e9e8c8f --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/ft04/mul.cc @@ -0,0 +1,112 @@ + +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "src/litert/kernel_registry.h" +#include "src/litert/kernel/dsp/ft04/mul.h" + +using mindspore::kernel::KERNEL_ARCH::kDSP; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_MulFusion; + +namespace mindspore::kernel { +int MulDSPKernel::CheckSpecs() { + if (in_tensors_.size() != INPUT_TENSOR_SIZE_2) { + MS_LOG(WARNING) << "in size: " << in_tensors_.size(); + return RET_ERROR; + } + + if (in_tensors_.front()->shape() != in_tensors_.back()->shape()) { + MS_LOG(WARNING) << "input shape must be equal"; + return RET_ERROR; + } + return RET_OK; +} + +int MulDSPKernel::Prepare() { return RET_OK; } + +int MulDSPKernel::MulRunFp32() { + kernel_name_ = "fp_mul_s"; + core_mask_ = 0xf; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int MulDSPKernel::MulRunFp16() { + kernel_name_ = "hp_mul_s"; + core_mask_ = 0xf; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int MulDSPKernel::MulRunInt16() { + kernel_name_ = "i16_mul_s"; + core_mask_ = 0xf; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int MulDSPKernel::MulRunInt32() { + kernel_name_ = "i32_mul_s"; + core_mask_ = 0xf; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int MulDSPKernel::MulRunComplex64() { + kernel_name_ = "c64_mul_s"; + core_mask_ = 0xf; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int MulDSPKernel::Run() { + int ret = -1; + MS_LOG(DEBUG) << this->name() << " Running! "; + uint64_t length = in_tensors_[0]->ElementsNum(); + auto allocator = dsp_runtime_->GetAllocator(); + uint64_t x_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[0]->data()); + uint64_t y_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[1]->data()); + uint64_t out_device_ptr = allocator->GetDeviceMemPtr(out_tensors_[0]->data()); + SetKernelArg({x_device_ptr, y_device_ptr, out_device_ptr, length}); + auto data_type = in_tensors_[0]->data_type(); + if (data_type == kNumberTypeFloat32) { + ret = MulRunFp32(); + } else if (data_type == kNumberTypeFloat16) { + ret = MulRunFp16(); + } else if (data_type == kNumberTypeInt16) { + ret = MulRunInt16(); + } else if (data_type == kNumberTypeInt32) { + ret = MulRunInt32(); + } else if (data_type == kNumberTypeComplex64) { + ret = MulRunComplex64(); + } else { + MS_LOG(ERROR) << "unsupported data type: " << static_cast(data_type); + } + if (ret != RET_OK) { + MS_LOG(ERROR) << this->name() << " Run failed! "; + return ret; + } + MS_LOG(DEBUG) << this->name() << " Run success! "; + return RET_OK; +} + +REG_KERNEL(kDSP, kNumberTypeFloat32, PrimitiveType_MulFusion, DSPKernelCreator) +REG_KERNEL(kDSP, kNumberTypeInt16, PrimitiveType_MulFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeInt32, PrimitiveType_MulFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeComplex64, PrimitiveType_MulFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeFloat16, PrimitiveType_MulFusion, DSPKernelCreator); +} // namespace mindspore::kernel diff --git a/mindspore-lite/src/litert/kernel/dsp/ft04/mul.h b/mindspore-lite/src/litert/kernel/dsp/ft04/mul.h new file mode 100644 index 0000000000000000000000000000000000000000..ac1d33b50351fe4904901a8247db47935a784956 --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/ft04/mul.h @@ -0,0 +1,45 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_MUL_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_MUL_H_ + +#include +#include +#include "src/litert/kernel/dsp/dsp_kernel.h" + +namespace mindspore::kernel { +class MulDSPKernel : public DSPKernel { + public: + using DSPKernel::DSPKernel; + + ~MulDSPKernel() override = default; + int Prepare() override; + int CheckSpecs() override; + int Run() override; + + int MulRunFp32(); + int MulRunFp16(); + int MulRunInt16(); + int MulRunInt32(); + int MulRunComplex64(); + + private: + std::string kernel_name_; + uint64_t core_mask_; +}; +} // namespace mindspore::kernel +#endif diff --git a/mindspore-lite/src/litert/kernel/dsp/ft04/sub.cc b/mindspore-lite/src/litert/kernel/dsp/ft04/sub.cc new file mode 100644 index 0000000000000000000000000000000000000000..8cdcc4045b6ececf501e300d90b9cf035a421280 --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/ft04/sub.cc @@ -0,0 +1,112 @@ + +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "src/litert/kernel_registry.h" +#include "src/litert/kernel/dsp/ft04/sub.h" + +using mindspore::kernel::KERNEL_ARCH::kDSP; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_SubFusion; + +namespace mindspore::kernel { +int SubDSPKernel::CheckSpecs() { + if (in_tensors_.size() != INPUT_TENSOR_SIZE_2) { + MS_LOG(WARNING) << "in size: " << in_tensors_.size(); + return RET_ERROR; + } + + if (in_tensors_.front()->shape() != in_tensors_.back()->shape()) { + MS_LOG(WARNING) << "input shape must be equal"; + return RET_ERROR; + } + return RET_OK; +} + +int SubDSPKernel::Prepare() { return RET_OK; } + +int SubDSPKernel::SubRunFp32() { + kernel_name_ = "fp_sub_s"; + core_mask_ = 0xf; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int SubDSPKernel::SubRunFp16() { + kernel_name_ = "hp_sub_s"; + core_mask_ = 0xf; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int SubDSPKernel::SubRunInt16() { + kernel_name_ = "i16_sub_s"; + core_mask_ = 0xf; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int SubDSPKernel::SubRunInt32() { + kernel_name_ = "i32_sub_s"; + core_mask_ = 0xf; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int SubDSPKernel::SubRunComplex64() { + kernel_name_ = "c64_sub_s"; + core_mask_ = 0xf; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int SubDSPKernel::Run() { + int ret = -1; + MS_LOG(DEBUG) << this->name() << " Running! "; + uint64_t length = in_tensors_[0]->ElementsNum(); + auto allocator = dsp_runtime_->GetAllocator(); + uint64_t x_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[0]->data()); + uint64_t y_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[1]->data()); + uint64_t out_device_ptr = allocator->GetDeviceMemPtr(out_tensors_[0]->data()); + SetKernelArg({x_device_ptr, y_device_ptr, out_device_ptr, length}); + auto data_type = in_tensors_[0]->data_type(); + if (data_type == kNumberTypeFloat32) { + ret = SubRunFp32(); + } else if (data_type == kNumberTypeFloat16) { + ret = SubRunFp16(); + } else if (data_type == kNumberTypeInt16) { + ret = SubRunInt16(); + } else if (data_type == kNumberTypeInt32) { + ret = SubRunInt32(); + } else if (data_type == kNumberTypeComplex64) { + ret = SubRunComplex64(); + } else { + MS_LOG(ERROR) << "unsupported data type: " << static_cast(data_type); + } + if (ret != RET_OK) { + MS_LOG(ERROR) << this->name() << " Run failed! "; + return ret; + } + MS_LOG(DEBUG) << this->name() << " Run success! "; + return RET_OK; +} + +REG_KERNEL(kDSP, kNumberTypeFloat32, PrimitiveType_SubFusion, DSPKernelCreator) +REG_KERNEL(kDSP, kNumberTypeInt16, PrimitiveType_SubFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeInt32, PrimitiveType_SubFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeComplex64, PrimitiveType_SubFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeFloat16, PrimitiveType_SubFusion, DSPKernelCreator); +} // namespace mindspore::kernel diff --git a/mindspore-lite/src/litert/kernel/dsp/ft04/sub.h b/mindspore-lite/src/litert/kernel/dsp/ft04/sub.h new file mode 100644 index 0000000000000000000000000000000000000000..39a9be9e8a9345e8c1e08f2c28346a710ae2ab9c --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/ft04/sub.h @@ -0,0 +1,45 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_SUB_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_SUB_H_ + +#include +#include +#include "src/litert/kernel/dsp/dsp_kernel.h" + +namespace mindspore::kernel { +class SubDSPKernel : public DSPKernel { + public: + using DSPKernel::DSPKernel; + + ~SubDSPKernel() override = default; + int Prepare() override; + int CheckSpecs() override; + int Run() override; + + int SubRunFp32(); + int SubRunFp16(); + int SubRunInt16(); + int SubRunInt32(); + int SubRunComplex64(); + + private: + std::string kernel_name_; + uint64_t core_mask_; +}; +} // namespace mindspore::kernel +#endif diff --git a/mindspore-lite/src/litert/kernel/dsp/ft78/add.cc b/mindspore-lite/src/litert/kernel/dsp/ft78/add.cc new file mode 100644 index 0000000000000000000000000000000000000000..d38ad27df2a16c995e1ad058d97f2219f6ceace3 --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/ft78/add.cc @@ -0,0 +1,130 @@ + +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "src/litert/kernel_registry.h" +#include "src/litert/kernel/dsp/ft78/add.h" + +using mindspore::kernel::KERNEL_ARCH::kDSP; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_AddFusion; + +namespace mindspore::kernel { +int AddDSPKernel::CheckSpecs() { + if (in_tensors_.size() != INPUT_TENSOR_SIZE_2) { + MS_LOG(WARNING) << "in size: " << in_tensors_.size(); + return RET_ERROR; + } + + if (in_tensors_.front()->shape() != in_tensors_.back()->shape()) { + MS_LOG(WARNING) << "input shape must be equal"; + return RET_ERROR; + } + return RET_OK; +} + +int AddDSPKernel::Prepare() { return RET_OK; } + +int AddDSPKernel::AddRunFp32() { + kernel_name_ = "fp_add_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int AddDSPKernel::AddRunFp64() { + kernel_name_ = "dp_add_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int AddDSPKernel::AddRunInt8() { + kernel_name_ = "i8_add_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int AddDSPKernel::AddRunInt16() { + kernel_name_ = "i16_add_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int AddDSPKernel::AddRunInt32() { + kernel_name_ = "i32_add_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int AddDSPKernel::AddRunComplex64() { + kernel_name_ = "c64_add_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int AddDSPKernel::AddRunComplex128() { + kernel_name_ = "c128_add_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int AddDSPKernel::Run() { + int ret = -1; + MS_LOG(DEBUG) << this->name() << " Running! "; + uint64_t length = in_tensors_[0]->ElementsNum(); + auto allocator = dsp_runtime_->GetAllocator(); + uint64_t x_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[0]->data()); + uint64_t y_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[1]->data()); + uint64_t out_device_ptr = allocator->GetDeviceMemPtr(out_tensors_[0]->data()); + SetKernelArg({x_device_ptr, y_device_ptr, out_device_ptr, length}); + auto data_type = in_tensors_[0]->data_type(); + if (data_type == kNumberTypeFloat32) { + ret = AddRunFp32(); + } else if (data_type == kNumberTypeFloat64) { + ret = AddRunFp64(); + } else if (data_type == kNumberTypeInt8) { + ret = AddRunInt8(); + } else if (data_type == kNumberTypeInt16) { + ret = AddRunInt16(); + } else if (data_type == kNumberTypeInt32) { + ret = AddRunInt32(); + } else if (data_type == kNumberTypeComplex64) { + ret = AddRunComplex64(); + } else if (data_type == kNumberTypeComplex128) { + ret = AddRunComplex128(); + } else { + MS_LOG(ERROR) << "unsupported data type: " << static_cast(data_type); + } + if (ret != RET_OK) { + MS_LOG(ERROR) << this->name() << " Run failed! "; + return ret; + } + MS_LOG(DEBUG) << this->name() << " Run success! "; + return RET_OK; +} + +REG_KERNEL(kDSP, kNumberTypeFloat32, PrimitiveType_AddFusion, DSPKernelCreator) +REG_KERNEL(kDSP, kNumberTypeFloat64, PrimitiveType_AddFusion, DSPKernelCreator) +REG_KERNEL(kDSP, kNumberTypeInt8, PrimitiveType_AddFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeInt16, PrimitiveType_AddFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeInt32, PrimitiveType_AddFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeComplex64, PrimitiveType_AddFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeComplex128, PrimitiveType_AddFusion, DSPKernelCreator); +} // namespace mindspore::kernel diff --git a/mindspore-lite/src/litert/kernel/dsp/ft78/add.h b/mindspore-lite/src/litert/kernel/dsp/ft78/add.h new file mode 100644 index 0000000000000000000000000000000000000000..73f4fffe4b92e35f67b3e4b7edbb56311505013f --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/ft78/add.h @@ -0,0 +1,47 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_ADD_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_ADD_H_ + +#include +#include +#include "src/litert/kernel/dsp/dsp_kernel.h" + +namespace mindspore::kernel { +class AddDSPKernel : public DSPKernel { + public: + using DSPKernel::DSPKernel; + + ~AddDSPKernel() override = default; + int Prepare() override; + int CheckSpecs() override; + int Run() override; + + int AddRunFp32(); + int AddRunFp64(); + int AddRunInt8(); + int AddRunInt16(); + int AddRunInt32(); + int AddRunComplex64(); + int AddRunComplex128(); + + private: + std::string kernel_name_; + uint64_t core_mask_; +}; +} // namespace mindspore::kernel +#endif diff --git a/mindspore-lite/src/litert/kernel/dsp/ft78/div.cc b/mindspore-lite/src/litert/kernel/dsp/ft78/div.cc new file mode 100644 index 0000000000000000000000000000000000000000..a4fe1a80abbfbcafe7338f7ccde60150d3c3b6be --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/ft78/div.cc @@ -0,0 +1,142 @@ + +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "src/litert/kernel_registry.h" +#include "src/litert/kernel/dsp/ft78/div.h" + +using mindspore::kernel::KERNEL_ARCH::kDSP; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_DivFusion; + +namespace mindspore::kernel { +int DivDSPKernel::CheckSpecs() { + if (in_tensors_.empty() || in_tensors_.size() != INPUT_TENSOR_SIZE_2) { + MS_LOG(WARNING) << "Invalid input tensors size: " << in_tensors_.size(); + return RET_ERROR; + } + auto x_tensor = in_tensors_.front(); + auto y_tensor = in_tensors_.back(); + if (x_tensor->shape() != y_tensor->shape() && x_tensor->ElementsNum() != 1 && y_tensor->ElementsNum() != 1) { + MS_LOG(WARNING) << "Input shapes must be equal or one must be scalar"; + return RET_ERROR; + } + return RET_OK; +} + +int DivDSPKernel::Prepare() { + if (in_tensors_.empty() || in_tensors_.size() != INPUT_TENSOR_SIZE_2) { + return RET_ERROR; + } + auto x_num = in_tensors_[0]->ElementsNum(); + auto y_num = in_tensors_[1]->ElementsNum(); + if (x_num == 1 || y_num == 1) { + optimize_ = true; + first_scalar_ = (x_num == 1); + } + return RET_OK; +} + +int DivDSPKernel::DivRunFp32() { + kernel_name_ = "fp_div_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int DivDSPKernel::DivRunFp64() { + kernel_name_ = "dp_div_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int DivDSPKernel::DivRunInt8() { + kernel_name_ = "i8_div_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int DivDSPKernel::DivRunInt16() { + kernel_name_ = "i16_div_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int DivDSPKernel::DivRunInt32() { + kernel_name_ = "i32_div_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int DivDSPKernel::DivRunComplex64() { + kernel_name_ = "c64_div_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int DivDSPKernel::DivRunComplex128() { + kernel_name_ = "c128_div_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int DivDSPKernel::Run() { + int ret = -1; + MS_LOG(DEBUG) << this->name() << " Running! "; + uint64_t length = in_tensors_[0]->ElementsNum(); + auto allocator = dsp_runtime_->GetAllocator(); + uint64_t x_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[0]->data()); + uint64_t y_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[1]->data()); + uint64_t out_device_ptr = allocator->GetDeviceMemPtr(out_tensors_[0]->data()); + SetKernelArg({x_device_ptr, y_device_ptr, out_device_ptr, length, optimize_, first_scalar_}); + auto data_type = in_tensors_[0]->data_type(); + if (data_type == kNumberTypeFloat32) { + ret = DivRunFp32(); + } else if (data_type == kNumberTypeFloat64) { + ret = DivRunFp64(); + } else if (data_type == kNumberTypeInt8) { + ret = DivRunInt8(); + } else if (data_type == kNumberTypeInt16) { + ret = DivRunInt16(); + } else if (data_type == kNumberTypeInt32) { + ret = DivRunInt32(); + } else if (data_type == kNumberTypeComplex64) { + ret = DivRunComplex64(); + } else if (data_type == kNumberTypeComplex128) { + ret = DivRunComplex128(); + } else { + MS_LOG(ERROR) << "unsupported data type: " << static_cast(data_type); + } + if (ret != RET_OK) { + MS_LOG(ERROR) << this->name() << " Run failed! "; + return ret; + } + MS_LOG(DEBUG) << this->name() << " Run success! "; + return RET_OK; +} + +REG_KERNEL(kDSP, kNumberTypeFloat32, PrimitiveType_DivFusion, DSPKernelCreator) +REG_KERNEL(kDSP, kNumberTypeFloat64, PrimitiveType_DivFusion, DSPKernelCreator) +REG_KERNEL(kDSP, kNumberTypeInt8, PrimitiveType_DivFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeInt16, PrimitiveType_DivFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeInt32, PrimitiveType_DivFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeComplex64, PrimitiveType_DivFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeComplex128, PrimitiveType_DivFusion, DSPKernelCreator); +} // namespace mindspore::kernel diff --git a/mindspore-lite/src/litert/kernel/dsp/ft78/div.h b/mindspore-lite/src/litert/kernel/dsp/ft78/div.h new file mode 100644 index 0000000000000000000000000000000000000000..83271f17713329077f61c054b14f9c66173b66fd --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/ft78/div.h @@ -0,0 +1,49 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_DIV_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_DIV_H_ + +#include +#include +#include "src/litert/kernel/dsp/dsp_kernel.h" + +namespace mindspore::kernel { +class DivDSPKernel : public DSPKernel { + public: + using DSPKernel::DSPKernel; + + ~DivDSPKernel() override = default; + int Prepare() override; + int CheckSpecs() override; + int Run() override; + + int DivRunFp32(); + int DivRunFp64(); + int DivRunInt8(); + int DivRunInt16(); + int DivRunInt32(); + int DivRunComplex64(); + int DivRunComplex128(); + + private: + std::string kernel_name_; + uint64_t core_mask_; + bool optimize_{false}; + bool first_scalar_{false}; +}; +} // namespace mindspore::kernel +#endif diff --git a/mindspore-lite/src/litert/kernel/dsp/ft78/exp.cc b/mindspore-lite/src/litert/kernel/dsp/ft78/exp.cc new file mode 100644 index 0000000000000000000000000000000000000000..413ff66ed7a7599edc2c514b19e95bede00adf3e --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/ft78/exp.cc @@ -0,0 +1,135 @@ + +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/litert/kernel/dsp/ft78/exp.h" +#include +#include +#include +#include +#include "src/litert/kernel/cpu/nnacl_c/exp_parameter.h" +#include "src/litert/kernel_registry.h" + +using mindspore::kernel::KERNEL_ARCH::kDSP; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_ExpFusion; + +namespace mindspore::kernel { +int ExpDSPKernel::CheckSpecs() { + if (in_tensors_.size() != INPUT_TENSOR_SIZE_1) { + MS_LOG(WARNING) << "in size: " << in_tensors_.size(); + return RET_ERROR; + } + return RET_OK; +} + +int ExpDSPKernel::Prepare() { + auto exp_param = reinterpret_cast(this->op_parameter_); + scale_ = static_cast(exp_param->scale_); + float log_base = (exp_param->base_ == -1) ? 1 : logf(exp_param->base_); + in_scale_ = exp_param->scale_ * log_base; + if (exp_param->shift_ == 0) { + out_scale_ = 1; + } else { + if (log_base == 1) { + out_scale_ = expf(exp_param->shift_); + } else { + out_scale_ = powf(exp_param->base_, exp_param->shift_); + } + } + return RET_OK; +} + +int ExpDSPKernel::ExpRunFp32() { + kernel_name_ = "fp_exp_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int ExpDSPKernel::ExpRunFp64() { + kernel_name_ = "dp_exp_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int ExpDSPKernel::ExpRunInt8() { + kernel_name_ = "i8_exp_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int ExpDSPKernel::ExpRunInt32() { + kernel_name_ = "i32_exp_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int ExpDSPKernel::ExpRunComplex64() { + kernel_name_ = "c64_exp_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int ExpDSPKernel::ExpRunComplex128() { + kernel_name_ = "c128_exp_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int ExpDSPKernel::Run() { + int ret = -1; + MS_LOG(DEBUG) << this->name() << " Running! "; + uint64_t length = in_tensors_[0]->ElementsNum(); + auto allocator = dsp_runtime_->GetAllocator(); + uint64_t x_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[0]->data()); + uint64_t out_device_ptr = allocator->GetDeviceMemPtr(out_tensors_[0]->data()); + uint64_t in_scale_hex = 0, out_scale_hex = 0; + memcpy(&in_scale_hex, &in_scale_, sizeof(float)); + memcpy(&out_scale_hex, &out_scale_, sizeof(float)); + SetKernelArg({x_device_ptr, out_device_ptr, length, in_scale_hex, out_scale_hex, static_cast(scale_)}); + auto data_type = in_tensors_[0]->data_type(); + if (data_type == kNumberTypeFloat32) { + ret = ExpRunFp32(); + } else if (data_type == kNumberTypeFloat64) { + ret = ExpRunFp64(); + } else if (data_type == kNumberTypeInt8) { + ret = ExpRunInt8(); + } else if (data_type == kNumberTypeInt32) { + ret = ExpRunInt32(); + } else if (data_type == kNumberTypeComplex64) { + ret = ExpRunComplex64(); + } else if (data_type == kNumberTypeComplex128) { + ret = ExpRunComplex128(); + } else { + MS_LOG(ERROR) << "unsupported data type: " << static_cast(data_type); + } + if (ret != RET_OK) { + MS_LOG(ERROR) << this->name() << " Run failed! "; + return ret; + } + MS_LOG(DEBUG) << this->name() << " Run success! "; + return RET_OK; +} + +REG_KERNEL(kDSP, kNumberTypeFloat32, PrimitiveType_ExpFusion, DSPKernelCreator) +REG_KERNEL(kDSP, kNumberTypeFloat64, PrimitiveType_ExpFusion, DSPKernelCreator) +REG_KERNEL(kDSP, kNumberTypeInt8, PrimitiveType_ExpFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeInt32, PrimitiveType_ExpFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeComplex64, PrimitiveType_ExpFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeComplex128, PrimitiveType_ExpFusion, DSPKernelCreator); +} // namespace mindspore::kernel diff --git a/mindspore-lite/src/litert/kernel/dsp/ft78/exp.h b/mindspore-lite/src/litert/kernel/dsp/ft78/exp.h new file mode 100644 index 0000000000000000000000000000000000000000..dae095c90af40888ca71c6335ce7ebe3586ec683 --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/ft78/exp.h @@ -0,0 +1,50 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_EXP_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_EXP_H_ + +#include +#include +#include "src/litert/kernel/dsp/dsp_kernel.h" + +namespace mindspore::kernel { +class ExpDSPKernel : public DSPKernel { + public: + using DSPKernel::DSPKernel; + + ~ExpDSPKernel() override = default; + int Prepare() override; + int CheckSpecs() override; + int Run() override; + + int ExpRunFp32(); + int ExpRunFp64(); + int ExpRunInt8(); + int ExpRunInt16(); + int ExpRunInt32(); + int ExpRunComplex64(); + int ExpRunComplex128(); + + private: + std::string kernel_name_; + uint64_t core_mask_; + float in_scale_; + float out_scale_; + int scale_; +}; +} // namespace mindspore::kernel +#endif diff --git a/mindspore-lite/src/litert/kernel/dsp/ft78/mul.cc b/mindspore-lite/src/litert/kernel/dsp/ft78/mul.cc new file mode 100644 index 0000000000000000000000000000000000000000..83e1f11426562aab44f069eb2d3013ba493d9152 --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/ft78/mul.cc @@ -0,0 +1,130 @@ + +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "src/litert/kernel_registry.h" +#include "src/litert/kernel/dsp/ft78/mul.h" + +using mindspore::kernel::KERNEL_ARCH::kDSP; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_MulFusion; + +namespace mindspore::kernel { +int MulDSPKernel::CheckSpecs() { + if (in_tensors_.size() != INPUT_TENSOR_SIZE_2) { + MS_LOG(WARNING) << "in size: " << in_tensors_.size(); + return RET_ERROR; + } + + if (in_tensors_.front()->shape() != in_tensors_.back()->shape()) { + MS_LOG(WARNING) << "input shape must be equal"; + return RET_ERROR; + } + return RET_OK; +} + +int MulDSPKernel::Prepare() { return RET_OK; } + +int MulDSPKernel::MulRunFp32() { + kernel_name_ = "fp_mul_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int MulDSPKernel::MulRunFp64() { + kernel_name_ = "dp_mul_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int MulDSPKernel::MulRunInt8() { + kernel_name_ = "i8_mul_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int MulDSPKernel::MulRunInt16() { + kernel_name_ = "i16_mul_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int MulDSPKernel::MulRunInt32() { + kernel_name_ = "i32_mul_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int MulDSPKernel::MulRunComplex64() { + kernel_name_ = "c64_mul_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int MulDSPKernel::MulRunComplex128() { + kernel_name_ = "c128_mul_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int MulDSPKernel::Run() { + int ret = -1; + MS_LOG(DEBUG) << this->name() << " Running! "; + uint64_t length = in_tensors_[0]->ElementsNum(); + auto allocator = dsp_runtime_->GetAllocator(); + uint64_t x_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[0]->data()); + uint64_t y_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[1]->data()); + uint64_t out_device_ptr = allocator->GetDeviceMemPtr(out_tensors_[0]->data()); + SetKernelArg({x_device_ptr, y_device_ptr, out_device_ptr, length}); + auto data_type = in_tensors_[0]->data_type(); + if (data_type == kNumberTypeFloat32) { + ret = MulRunFp32(); + } else if (data_type == kNumberTypeFloat64) { + ret = MulRunFp64(); + } else if (data_type == kNumberTypeInt8) { + ret = MulRunInt8(); + } else if (data_type == kNumberTypeInt16) { + ret = MulRunInt16(); + } else if (data_type == kNumberTypeInt32) { + ret = MulRunInt32(); + } else if (data_type == kNumberTypeComplex64) { + ret = MulRunComplex64(); + } else if (data_type == kNumberTypeComplex128) { + ret = MulRunComplex128(); + } else { + MS_LOG(ERROR) << "unsupported data type: " << static_cast(data_type); + } + if (ret != RET_OK) { + MS_LOG(ERROR) << this->name() << " Run failed! "; + return ret; + } + MS_LOG(DEBUG) << this->name() << " Run success! "; + return RET_OK; +} + +REG_KERNEL(kDSP, kNumberTypeFloat32, PrimitiveType_MulFusion, DSPKernelCreator) +REG_KERNEL(kDSP, kNumberTypeFloat64, PrimitiveType_MulFusion, DSPKernelCreator) +REG_KERNEL(kDSP, kNumberTypeInt8, PrimitiveType_MulFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeInt16, PrimitiveType_MulFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeInt32, PrimitiveType_MulFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeComplex64, PrimitiveType_MulFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeComplex128, PrimitiveType_MulFusion, DSPKernelCreator); +} // namespace mindspore::kernel diff --git a/mindspore-lite/src/litert/kernel/dsp/ft78/mul.h b/mindspore-lite/src/litert/kernel/dsp/ft78/mul.h new file mode 100644 index 0000000000000000000000000000000000000000..7e0de812fcb7b6b560429c11012b71810c7bf96c --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/ft78/mul.h @@ -0,0 +1,47 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_MUL_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_MUL_H_ + +#include +#include +#include "src/litert/kernel/dsp/dsp_kernel.h" + +namespace mindspore::kernel { +class MulDSPKernel : public DSPKernel { + public: + using DSPKernel::DSPKernel; + + ~MulDSPKernel() override = default; + int Prepare() override; + int CheckSpecs() override; + int Run() override; + + int MulRunFp32(); + int MulRunFp64(); + int MulRunInt8(); + int MulRunInt16(); + int MulRunInt32(); + int MulRunComplex64(); + int MulRunComplex128(); + + private: + std::string kernel_name_; + uint64_t core_mask_; +}; +} // namespace mindspore::kernel +#endif diff --git a/mindspore-lite/src/litert/kernel/dsp/ft78/sub.cc b/mindspore-lite/src/litert/kernel/dsp/ft78/sub.cc new file mode 100644 index 0000000000000000000000000000000000000000..efb15ec7bb5e56e6b6874b13dbeb64439aaf7ea0 --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/ft78/sub.cc @@ -0,0 +1,130 @@ + +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "src/litert/kernel_registry.h" +#include "src/litert/kernel/dsp/ft78/sub.h" + +using mindspore::kernel::KERNEL_ARCH::kDSP; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_SubFusion; + +namespace mindspore::kernel { +int SubDSPKernel::CheckSpecs() { + if (in_tensors_.size() != INPUT_TENSOR_SIZE_2) { + MS_LOG(WARNING) << "in size: " << in_tensors_.size(); + return RET_ERROR; + } + + if (in_tensors_.front()->shape() != in_tensors_.back()->shape()) { + MS_LOG(WARNING) << "input shape must be equal"; + return RET_ERROR; + } + return RET_OK; +} + +int SubDSPKernel::Prepare() { return RET_OK; } + +int SubDSPKernel::SubRunFp32() { + kernel_name_ = "fp_sub_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int SubDSPKernel::SubRunFp64() { + kernel_name_ = "dp_sub_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int SubDSPKernel::SubRunInt8() { + kernel_name_ = "i8_sub_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int SubDSPKernel::SubRunInt16() { + kernel_name_ = "i16_sub_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int SubDSPKernel::SubRunInt32() { + kernel_name_ = "i32_sub_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int SubDSPKernel::SubRunComplex64() { + kernel_name_ = "c64_sub_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int SubDSPKernel::SubRunComplex128() { + kernel_name_ = "c128_sub_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int SubDSPKernel::Run() { + int ret = -1; + MS_LOG(DEBUG) << this->name() << " Running! "; + uint64_t length = in_tensors_[0]->ElementsNum(); + auto allocator = dsp_runtime_->GetAllocator(); + uint64_t x_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[0]->data()); + uint64_t y_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[1]->data()); + uint64_t out_device_ptr = allocator->GetDeviceMemPtr(out_tensors_[0]->data()); + SetKernelArg({x_device_ptr, y_device_ptr, out_device_ptr, length}); + auto data_type = in_tensors_[0]->data_type(); + if (data_type == kNumberTypeFloat32) { + ret = SubRunFp32(); + } else if (data_type == kNumberTypeFloat64) { + ret = SubRunFp64(); + } else if (data_type == kNumberTypeInt8) { + ret = SubRunInt8(); + } else if (data_type == kNumberTypeInt16) { + ret = SubRunInt16(); + } else if (data_type == kNumberTypeInt32) { + ret = SubRunInt32(); + } else if (data_type == kNumberTypeComplex64) { + ret = SubRunComplex64(); + } else if (data_type == kNumberTypeComplex128) { + ret = SubRunComplex128(); + } else { + MS_LOG(ERROR) << "unsupported data type: " << static_cast(data_type); + } + if (ret != RET_OK) { + MS_LOG(ERROR) << this->name() << " Run failed! "; + return ret; + } + MS_LOG(DEBUG) << this->name() << " Run success! "; + return RET_OK; +} + +REG_KERNEL(kDSP, kNumberTypeFloat32, PrimitiveType_SubFusion, DSPKernelCreator) +REG_KERNEL(kDSP, kNumberTypeFloat64, PrimitiveType_SubFusion, DSPKernelCreator) +REG_KERNEL(kDSP, kNumberTypeInt8, PrimitiveType_SubFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeInt16, PrimitiveType_SubFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeInt32, PrimitiveType_SubFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeComplex64, PrimitiveType_SubFusion, DSPKernelCreator); +REG_KERNEL(kDSP, kNumberTypeComplex128, PrimitiveType_SubFusion, DSPKernelCreator); +} // namespace mindspore::kernel diff --git a/mindspore-lite/src/litert/kernel/dsp/ft78/sub.h b/mindspore-lite/src/litert/kernel/dsp/ft78/sub.h new file mode 100644 index 0000000000000000000000000000000000000000..66431eca6cb95d3132eb386c9f1925af514bb426 --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/ft78/sub.h @@ -0,0 +1,47 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_SUB_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_SUB_H_ + +#include +#include +#include "src/litert/kernel/dsp/dsp_kernel.h" + +namespace mindspore::kernel { +class SubDSPKernel : public DSPKernel { + public: + using DSPKernel::DSPKernel; + + ~SubDSPKernel() override = default; + int Prepare() override; + int CheckSpecs() override; + int Run() override; + + int SubRunFp32(); + int SubRunFp64(); + int SubRunInt8(); + int SubRunInt16(); + int SubRunInt32(); + int SubRunComplex64(); + int SubRunComplex128(); + + private: + std::string kernel_name_; + uint64_t core_mask_; +}; +} // namespace mindspore::kernel +#endif diff --git a/mindspore-lite/src/litert/kernel_exec_util.cc b/mindspore-lite/src/litert/kernel_exec_util.cc index b4a885ee44081760103e7ea5a5ad09ec6d25fda9..a9211fced98048fbf991a5104394f3120347609a 100644 --- a/mindspore-lite/src/litert/kernel_exec_util.cc +++ b/mindspore-lite/src/litert/kernel_exec_util.cc @@ -25,6 +25,9 @@ #include "src/litert/kernel/opencl/opencl_subgraph.h" #include "src/litert/kernel/gpu/opencl/opencl_runtime.h" #endif +#if ENABLE_DSP +#include "src/litert/kernel/dsp/dsp_subgraph.h" +#endif #include "src/control_flow/control_subgraph_creator.h" #include "src/litert/kernel/cpu/base/partial_fusion.h" @@ -418,6 +421,11 @@ SubGraphKernel *KernelExecUtil::CreateSubGraphKernel(const std::vector &in_tensors, con if (kernel_exec != nullptr) { constexpr auto kArchCPU = "CPU"; constexpr auto kArchGPU = "GPU"; + constexpr auto kArchDSP = "DSP"; kernel::KernelKey tmp_key = key; if (desc.arch == kArchCPU) { tmp_key.arch = kernel::kCPU; } else if (desc.arch == kArchGPU) { tmp_key.arch = kernel::kGPU; + } else if (desc.arch == kArchDSP) { + tmp_key.arch = kernel::kDSP; } else { tmp_key.arch = kernel::kCustom; } diff --git a/mindspore-lite/src/litert/lite_session.cc b/mindspore-lite/src/litert/lite_session.cc index 5fc41f44fb7a9bf130469130ed846a6d5bd289ee..f6c834d5871730c10685c912b47bced1e3011ab9 100644 --- a/mindspore-lite/src/litert/lite_session.cc +++ b/mindspore-lite/src/litert/lite_session.cc @@ -1153,6 +1153,13 @@ int LiteSession::Init(const std::shared_ptr &context) { return ret; } + ret = InitDSPRuntime(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Init DSP runtime failed."; + is_running_.store(false); + return ret; + } + is_running_.store(false); return RET_OK; } @@ -1218,6 +1225,10 @@ LiteSession::~LiteSession() { #ifdef GPU_OPENCL delete opencl_runtime_wrapper_; opencl_runtime_wrapper_ = nullptr; +#endif +#ifdef ENABLE_DSP + delete dsp_runtime_wrapper_; + dsp_runtime_wrapper_ = nullptr; #endif delete ms_context_; ms_context_ = nullptr; @@ -1739,6 +1750,26 @@ int LiteSession::RuntimeAllocatorSetData() { return RET_OK; } +int LiteSession::InitDSPRuntime() { +#ifdef ENABLE_DSP + if (this->context_->IsDeviceTypeEnabled(DT_DSP)) { + dsp_runtime_wrapper_ = new (std::nothrow) dsp::DSPRuntimeInnerWrapper(); + if (dsp_runtime_wrapper_ == nullptr) { + MS_LOG(ERROR) << "create DSPRuntimeInnerWrapper failed"; + return RET_ERROR; + } + auto dsp_runtime = dsp_runtime_wrapper_->GetInstance(); + if (dsp_runtime->Init() != RET_OK) { + this->context_->device_list_ = {{DT_CPU, {false, MID_CPU}}}; + MS_LOG(WARNING) << "Init DSP runtime failed, change to CPU mode."; + } else { + MS_LOG(INFO) << "Init DSP runtime success."; + } + } +#endif + return RET_OK; +} + int LiteSession::InitGPURuntime() { if (context_->IsDeviceTypeEnabled(DT_CPU)) { CpuBindMode cpu_bind_mode = context_->GetDeviceInfo(DT_CPU).cpu_device_info_.cpu_bind_mode_; diff --git a/mindspore-lite/src/litert/lite_session.h b/mindspore-lite/src/litert/lite_session.h index 233302ae06de1c4313f08d00b1b0b75847c282ca..78bc3da382f68b9edf46c74eb31e2b47bc82dd50 100644 --- a/mindspore-lite/src/litert/lite_session.h +++ b/mindspore-lite/src/litert/lite_session.h @@ -36,6 +36,9 @@ #if GPU_OPENCL #include "src/litert/kernel/gpu/opencl/opencl_runtime.h" #endif +#ifdef ENABLE_DSP +#include "src/litert/kernel/dsp/dsp_runtime.h" +#endif #include "src/litert/scheduler_cb.h" #include "src/executor/sub_graph_kernel.h" @@ -180,6 +183,7 @@ class MS_API LiteSession { int CreateCoreMLDelegate(); int InitDelegate(); int InitGPURuntime(); + int InitDSPRuntime(); int InitSharedThreadPool(); int ReshapeWeightTensor(lite::Tensor *orig_tensor, lite::Tensor *new_tensor); @@ -234,6 +238,9 @@ class MS_API LiteSession { #if GPU_OPENCL opencl::OpenCLRuntimeInnerWrapper *opencl_runtime_wrapper_{nullptr}; #endif +#ifdef ENABLE_DSP + dsp::DSPRuntimeInnerWrapper *dsp_runtime_wrapper_{nullptr}; +#endif // In the dynamic shape scene, the flag is to indicate when to do shape-infer for kernel. If true, the shape-infer // will not be called when calling 'Resize', but be done along with running. And we will decide whether to call diff --git a/mindspore-lite/src/litert/scheduler.cc b/mindspore-lite/src/litert/scheduler.cc index 6ce3d4a277e3e0f0b720cbd26e1371d479c3e25c..f5f4a709caa61e747f96eee754365a614a95c244 100644 --- a/mindspore-lite/src/litert/scheduler.cc +++ b/mindspore-lite/src/litert/scheduler.cc @@ -1093,6 +1093,42 @@ int Scheduler::FindGpuKernel(const std::vector &in_tensors, const std: } #endif +#ifdef ENABLE_DSP +int Scheduler::FindDspKernel(const std::vector &in_tensors, const std::vector &out_tensors, + OpParameter *op_parameter, const kernel::KernelKey &desc, kernel::KernelExec **kernel, + TypeId prefer_data_type) { + MS_ASSERT(op_parameter != nullptr); + MS_ASSERT(kernel != nullptr); + if (!context_->IsDeviceTypeEnabled(DT_DSP)) { + return RET_NOT_SUPPORT; + } + + // support more data type like int32 + kernel::KernelKey dsp_desc{kernel::KERNEL_ARCH::kDSP, desc.data_type, NHWC, desc.type}; + // weight dequant + auto ret = WeightDecoder::DequantNode(op_parameter, in_tensors, kNumberTypeFloat32, src_model_->graph_.version_, + context_->float_mode); + if (ret != RET_OK) { + MS_LOG(DEBUG) << "Dequant input tensors failed: " << ret; + return RET_NOT_SUPPORT; + } + // we don't need to restore tensor for copy data + ret = CopyConstTensorData(in_tensors, op_parameter->type_); + if (ret != RET_OK) { + MS_LOG(DEBUG) << "CopyConstTensorsData failed: " << ret; + return RET_NOT_SUPPORT; + } + ret = KernelRegistry::GetInstance()->GetKernelExec(in_tensors, out_tensors, context_, ms_context_, dsp_desc, + op_parameter, kernel); + if (ret == RET_OK) { + MS_LOG(DEBUG) << "Get dsp_desc op success: " << PrimitiveCurVersionTypeName(dsp_desc.type); + } else { + MS_LOG(DEBUG) << "Get dsp_desc op failed, scheduler to cpu: " << PrimitiveCurVersionTypeName(dsp_desc.type); + } + return ret; +} +#endif + int Scheduler::FindProviderKernel(const std::vector &in_tensors, const std::vector &out_tensors, const LiteGraph::Node *node, TypeId data_type, kernel::KernelExec **kernel) { #ifndef CUSTOM_KERNEL_REGISTRY_CLIP @@ -1102,6 +1138,15 @@ int Scheduler::FindProviderKernel(const std::vector &in_tensors, const if (prim_type == schema::PrimitiveType_Custom) { for (auto &&device : context_->device_list_) { if (!device.provider_.empty() && !device.provider_device_.empty()) { + if (device.provider_device_ == "DSP") { + kernel::KernelKey desc{kernel::KERNEL_ARCH::kDSP, data_type, NHWC, prim_type, + device.provider_device_, device.provider_}; + ret = KernelRegistry::GetInstance()->GetKernelExec(in_tensors, out_tensors, context_, ms_context_, desc, + nullptr, kernel, node->primitive_); + if (ret == RET_OK && *kernel != nullptr) { + return ret; + } + } kernel::KernelKey desc{kernel::KERNEL_ARCH::kCPU, data_type, NHWC, prim_type, device.provider_device_, device.provider_}; ret = KernelRegistry::GetInstance()->GetKernelExec(in_tensors, out_tensors, context_, ms_context_, desc, @@ -1188,6 +1233,30 @@ kernel::KernelExec *Scheduler::FindBackendKernel(const std::vector &in op_parameter->is_train_session_ = is_train_session_; kernel::KernelKey desc{kernel::KERNEL_ARCH::kCPU, data_type, NHWC, op_parameter->type_}; +#ifdef ENABLE_DSP + bool dsp_priority = DeviceTypePriority(context_, DT_DSP, DT_CPU); + bool use_dsp_kernel = node->device_type_ == DT_DSP || node->device_type_ == kDefaultDeviceType; + if (dsp_priority && use_dsp_kernel) { + status = FindDspKernel(in_tensors, out_tensors, op_parameter, desc, &kernel, prefer_data_type); + if (status == RET_OK) { + return kernel; + } else { + MS_LOG(DEBUG) << "Get dsp op failed, scheduler to cpu: " << PrimitiveCurVersionTypeName(desc.type) << " " + << node->name_; + if (status == RET_ERROR) { + op_parameters_.erase(node->output_indices_.at(0)); + auto ret = InferNodeShape(node); + if (ret == RET_INFER_INVALID || ret == RET_OK) { + op_parameter = op_parameters_[node->output_indices_.at(0)]; + } else { + MS_LOG(ERROR) << "Try repeat infer fail: " << node->name_; + return nullptr; + } + } + } + } +#endif + #ifdef GPU_OPENCL bool gpu_priority = DeviceTypePriority(context_, DT_GPU, DT_CPU); bool use_gpu_kernel = node->device_type_ == DT_GPU || node->device_type_ == kDefaultDeviceType; @@ -1283,6 +1352,8 @@ kernel::SubGraphType GetKernelSubGraphType(const kernel::KernelExec *kernel, con } } else if (desc.arch == kernel::KERNEL_ARCH::kCustom) { return kernel::kCustomSubGraph; + } else if (desc.arch == kernel::KERNEL_ARCH::kDSP) { + return kernel::kDspSubGraph; } return kernel::kNotSubGraph; } @@ -1598,6 +1669,8 @@ bool KernelFitCurrentSubGraph(const kernel::SubGraphType subgraph_type, const ke } return KernelFitCurrentSubGraphCPUFp32(desc.data_type); } + case kernel::SubGraphType::kDspSubGraph: + return kernel.desc().arch == kernel::KERNEL_ARCH::kDSP; default: return false; } @@ -1687,8 +1760,9 @@ TypeId Scheduler::GetFirstFp32Fp16OrInt8Type(const std::vector &in_ten if (dtype == kObjectTypeTensorType) { return TensorListDataType(tensor); } - std::unordered_set type_set = {kNumberTypeFloat32, kNumberTypeFloat16, kNumberTypeInt8, kNumberTypeInt32, - kNumberTypeBool, kNumberTypeUInt8, kObjectTypeString}; + std::unordered_set type_set = {kNumberTypeFloat32, kNumberTypeFloat16, kNumberTypeInt8, + kNumberTypeInt32, kNumberTypeBool, kNumberTypeUInt8, + kObjectTypeString, kNumberTypeComplex64, kNumberTypeComplex128}; if (type_set.find(dtype) != type_set.end()) { return dtype; } diff --git a/mindspore-lite/src/litert/scheduler.h b/mindspore-lite/src/litert/scheduler.h index c114c75707cd810aeac6909d7409e83ac05fd2fd..3927377ecb9a7506e531546d7718598410e3e030 100644 --- a/mindspore-lite/src/litert/scheduler.h +++ b/mindspore-lite/src/litert/scheduler.h @@ -89,6 +89,13 @@ class Scheduler { OpParameter *op_parameter, const kernel::KernelKey &desc, kernel::KernelExec **kernel, TypeId prefer_data_type); #endif + +#ifdef ENABLE_DSP + int FindDspKernel(const std::vector &in_tensors, const std::vector &out_tensors, + OpParameter *op_parameter, const kernel::KernelKey &desc, kernel::KernelExec **kernel, + TypeId prefer_data_type); +#endif + int FindProviderKernel(const std::vector &in_tensors, const std::vector &out_tensors, const LiteGraph::Node *node, TypeId data_type, kernel::KernelExec **kernel); diff --git a/mindspore-lite/test/CMakeLists.txt b/mindspore-lite/test/CMakeLists.txt index 88489c5c8dd5d914c9fbef850bab1e2b3782db40..3a1f5cab2e6a5060f7e91643d52d06b83f19063b 100644 --- a/mindspore-lite/test/CMakeLists.txt +++ b/mindspore-lite/test/CMakeLists.txt @@ -74,6 +74,13 @@ if(MSLITE_GPU_BACKEND STREQUAL opencl) endif() endif() +if(MSLITE_ENABLE_DSP) + file(GLOB_RECURSE TEST_GPU_UT_SRC + ${TEST_DIR}/ut/src/runtime/kernel/dsp/*.cc + ) + list(APPEND TEST_UT_SRC ${TEST_GPU_UT_SRC}) +endif() + if(MSLITE_GPU_BACKEND STREQUAL cuda) set(CUDA_PATH $ENV{CUDA_HOME}) include_directories(${CUDA_PATH}/include) diff --git a/mindspore-lite/test/ut/src/runtime/kernel/dsp/arithmetic_tests.cc b/mindspore-lite/test/ut/src/runtime/kernel/dsp/arithmetic_tests.cc new file mode 100644 index 0000000000000000000000000000000000000000..1fd7f720fcea01c0afd9845b5129a358301832cf --- /dev/null +++ b/mindspore-lite/test/ut/src/runtime/kernel/dsp/arithmetic_tests.cc @@ -0,0 +1,1443 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include "ut/src/runtime/kernel/dsp/dsp_test.h" +#include "include/api/context.h" +#include "include/api/data_type.h" +#include "include/api/model.h" +#include "nnacl_c/arithmetic_parameter.h" +#include "schema/inner/model_generated.h" +#include "src/litert/kernel/dsp/dsp_subgraph.h" +#include "src/litert/kernel_registry.h" +#include "ut/src/runtime/kernel/opencl/common.h" + +namespace mindspore::lite::dsp::test { + +constexpr int kTestArraySize = 10000; // 100 * 100 +constexpr int kTestArraySize2 = 20000; // 100 * 100 * 2 + +class TestDSP_Arithmetic : public DSPCommonTest {}; + +namespace { +OpParameter *CreateParameter(schema::PrimitiveType type, const std::vector &input0_shape, + const std::vector &input1_shape, + schema::ActivationType act_type = schema::ActivationType_NO_ACTIVATION) { + auto *param = opencl::test::CreateParameter(type); + int input0_size = std::accumulate(input0_shape.begin(), input0_shape.end(), 1, std::multiplies<>()); + int input1_size = std::accumulate(input1_shape.begin(), input1_shape.end(), 1, std::multiplies<>()); + if (input0_size != input1_size) { + param->broadcasting_ = true; + } + param->activation_type_ = act_type; + return reinterpret_cast(param); +} +} // namespace + +TEST_F(TestDSP_Arithmetic, Add_Fp32) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_AddFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeFloat32, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeFloat32, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeFloat32, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeFloat32, NHWC, schema::PrimitiveType_AddFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(num, 3); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum())); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Arithmetic, Add_Int16) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_AddFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeInt16, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeInt16, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeInt16, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt16, NHWC, schema::PrimitiveType_AddFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize, 3); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum())); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Arithmetic, Add_Int32) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_AddFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeInt32, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeInt32, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeInt32, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt32, NHWC, schema::PrimitiveType_AddFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize, 3); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum())); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Arithmetic, Add_Cplx64) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_AddFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeComplex64, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeComplex64, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeComplex64, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num * 2, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num * 2, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num * 2, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeComplex64, NHWC, schema::PrimitiveType_AddFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize2, 3); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum() * 2)); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Arithmetic, Sub_Fp32) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_SubFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeFloat32, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeFloat32, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeFloat32, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeFloat32, NHWC, schema::PrimitiveType_SubFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(num, 1); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum())); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Arithmetic, Sub_Int16) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_SubFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeInt16, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeInt16, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeInt16, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt16, NHWC, schema::PrimitiveType_SubFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize, 1); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum())); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Arithmetic, Sub_Int32) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_SubFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeInt32, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeInt32, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeInt32, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt32, NHWC, schema::PrimitiveType_SubFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize, 1); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum())); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Arithmetic, Sub_Cplx64) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_SubFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeComplex64, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeComplex64, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeComplex64, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num * 2, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num * 2, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num * 2, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeComplex64, NHWC, schema::PrimitiveType_SubFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize2, 1); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum() * 2)); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Arithmetic, Mul_Fp32) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_MulFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeFloat32, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeFloat32, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeFloat32, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeFloat32, NHWC, schema::PrimitiveType_MulFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(num, 2); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum())); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Arithmetic, Mul_Int16) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_MulFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeInt16, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeInt16, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeInt16, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt16, NHWC, schema::PrimitiveType_MulFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize, 2); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum())); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Arithmetic, Mul_Int32) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_MulFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeInt32, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeInt32, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeInt32, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt32, NHWC, schema::PrimitiveType_MulFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize, 2); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum())); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Arithmetic, Mul_Cplx64) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_MulFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeComplex64, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeComplex64, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeComplex64, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num * 2, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num * 2, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num * 2, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeComplex64, NHWC, schema::PrimitiveType_MulFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize2); + for (int i = 0; i < kTestArraySize2; i += 2) { + correct[i] = 0; + correct[i + 1] = 4; + } + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum() * 2)); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Arithmetic, Div_Fp32) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_DivFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeFloat32, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeFloat32, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeFloat32, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeFloat32, NHWC, schema::PrimitiveType_DivFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(num, 2); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum())); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Arithmetic, Div_Int16) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_DivFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeInt16, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeInt16, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeInt16, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt16, NHWC, schema::PrimitiveType_DivFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize, 2); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum())); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Arithmetic, Div_Int32) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_DivFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeInt32, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeInt32, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeInt32, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt32, NHWC, schema::PrimitiveType_DivFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize, 2); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum())); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Arithmetic, Div_Cplx64) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_DivFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeComplex64, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeComplex64, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeComplex64, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num * 2, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num * 2, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num * 2, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeComplex64, NHWC, schema::PrimitiveType_DivFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize2); + for (int i = 0; i < kTestArraySize2; i += 2) { + correct[i] = 2; + correct[i + 1] = 0; + } + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum() * 2)); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +#ifdef SUPPORT_FT78 +TEST_F(TestDSP_Arithmetic, Add_Fp64) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_AddFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeFloat64, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeFloat64, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeFloat64, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeFloat64, NHWC, schema::PrimitiveType_AddFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize, 3); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum())); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Arithmetic, Add_Int8) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_AddFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeInt8, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeInt8, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeInt8, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt8, NHWC, schema::PrimitiveType_AddFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize, 3); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum())); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Arithmetic, Add_Cplx128) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_AddFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeComplex128, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeComplex128, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeComplex128, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num * 2, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num * 2, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num * 2, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeComplex128, NHWC, schema::PrimitiveType_AddFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize2, 3); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum() * 2)); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Arithmetic, Sub_Fp64) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_SubFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeFloat64, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeFloat64, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeFloat64, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeFloat64, NHWC, schema::PrimitiveType_SubFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize, 1); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum())); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Arithmetic, Sub_Int8) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_SubFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeInt8, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeInt8, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeInt8, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt8, NHWC, schema::PrimitiveType_SubFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize, 1); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum())); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Arithmetic, Sub_Cplx128) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_SubFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeComplex128, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeComplex128, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeComplex128, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num * 2, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num * 2, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num * 2, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeComplex128, NHWC, schema::PrimitiveType_SubFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize2, 1); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum() * 2)); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Arithmetic, Mul_Fp64) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_MulFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeFloat64, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeFloat64, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeFloat64, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeFloat64, NHWC, schema::PrimitiveType_MulFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize, 2); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum())); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Arithmetic, Mul_Int8) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_MulFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeInt8, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeInt8, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeInt8, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt8, NHWC, schema::PrimitiveType_MulFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize, 2); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum())); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Arithmetic, Mul_Cplx128) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_MulFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeComplex128, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeComplex128, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeComplex128, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num * 2, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num * 2, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num * 2, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeComplex128, NHWC, schema::PrimitiveType_MulFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize2); + for (int i = 0; i < kTestArraySize2; i += 2) { + correct[i] = 0; + correct[i + 1] = 4; + } + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum() * 2)); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Arithmetic, Div_Fp64) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_DivFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeFloat64, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeFloat64, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeFloat64, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeFloat64, NHWC, schema::PrimitiveType_DivFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize, 2); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum())); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Arithmetic, Div_Int8) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_DivFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeInt8, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeInt8, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeInt8, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt8, NHWC, schema::PrimitiveType_DivFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize, 2); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum())); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Arithmetic, Div_Cplx128) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input0_shape = {100, 100}; + std::vector input1_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input0_shape[0] * input0_shape[1]; + + auto *param = CreateParameter(schema::PrimitiveType_DivFusion, input0_shape, input1_shape); + + auto input0 = new lite::Tensor(kNumberTypeComplex128, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input0->MallocData(allocator_); + inputs_.push_back(input0); + + auto input1 = new lite::Tensor(kNumberTypeComplex128, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input1->MallocData(allocator_); + inputs_.push_back(input1); + + auto output = new lite::Tensor(kNumberTypeComplex128, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input0->MutableData()), num * 2, 2); + std::fill_n(reinterpret_cast(input1->MutableData()), num * 2, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num * 2, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeComplex128, NHWC, schema::PrimitiveType_DivFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize2); + for (int i = 0; i < kTestArraySize2; i += 2) { + correct[i] = 2; + correct[i + 1] = 0; + } + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum() * 2)); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} +#endif + +} // namespace mindspore::lite::dsp::test diff --git a/mindspore-lite/test/ut/src/runtime/kernel/dsp/dsp_test.h b/mindspore-lite/test/ut/src/runtime/kernel/dsp/dsp_test.h new file mode 100644 index 0000000000000000000000000000000000000000..88419f42d7e853af569ac4d207993293a3f96258 --- /dev/null +++ b/mindspore-lite/test/ut/src/runtime/kernel/dsp/dsp_test.h @@ -0,0 +1,55 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_TEST_UT_SRC_RUNTIME_KERNEL_DSP_DSP_TEST_H_ +#define MINDSPORE_LITE_TEST_UT_SRC_RUNTIME_KERNEL_DSP_DSP_TEST_H_ + +#include +#include +#include "schema/inner/model_generated.h" +#include "src/litert/kernel_registry.h" +#include "src/litert/kernel/dsp/dsp_subgraph.h" +#include "common/common_test.h" +#include "nnacl_c/arithmetic_parameter.h" + +namespace mindspore::lite::dsp::test { + +class DSPCommonTest : public CommonTest { + public: + void InitDSPRuntime() { + dsp_runtime_wrapper_ = new (std::nothrow) dsp::DSPRuntimeInnerWrapper(); + if (dsp_runtime_wrapper_ == nullptr) { + MS_LOG(ERROR) << "create DSPRuntimeInnerWrapper failed."; + } + auto dsp_runtime = dsp_runtime_wrapper_->GetInstance(); + if (dsp_runtime->Init() != RET_OK) { + MS_LOG(ERROR) << "Init DSP runtime failed."; + } + allocator_ = dsp_runtime->GetAllocator(); + } + + void UninitDSPRuntime() { + delete dsp_runtime_wrapper_; + dsp_runtime_wrapper_ = nullptr; + } + + protected: + dsp::DSPRuntimeInnerWrapper *dsp_runtime_wrapper_{nullptr}; + std::shared_ptr allocator_; +}; +} // namespace mindspore::lite::dsp::test + +#endif // MINDSPORE_LITE_TEST_UT_SRC_RUNTIME_KERNEL_DSP_DSP_TEST_H_ diff --git a/mindspore-lite/test/ut/src/runtime/kernel/dsp/exp_tests.cc b/mindspore-lite/test/ut/src/runtime/kernel/dsp/exp_tests.cc new file mode 100644 index 0000000000000000000000000000000000000000..62bd15ad5aac4d5f7d090fddae8027696c7341a7 --- /dev/null +++ b/mindspore-lite/test/ut/src/runtime/kernel/dsp/exp_tests.cc @@ -0,0 +1,317 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "ut/src/runtime/kernel/dsp/dsp_test.h" +#include "include/api/context.h" +#include "include/api/data_type.h" +#include "include/api/model.h" +#include "nnacl_c/exp_parameter.h" +#include "schema/inner/model_generated.h" +#include "src/litert/kernel/dsp/dsp_subgraph.h" +#include "src/litert/kernel_registry.h" + +namespace mindspore::lite::dsp::test { + +// 编译时常量,用于替代变长数组 +constexpr int kTestArraySize = 10000; // 100 * 100 +constexpr int kTestArraySize2 = 20000; // 100 * 100 * 2 + +class TestDSP_Exp : public DSPCommonTest {}; + +TEST_F(TestDSP_Exp, Exp_Fp32) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input_shape[0] * input_shape[1]; + + auto param = new ExpParameter(); + param->base_ = -1; + param->scale_ = 1; + param->shift_ = 0; + auto input = new lite::Tensor(kNumberTypeFloat32, input_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input->MallocData(allocator_); + inputs_.push_back(input); + + auto output = new lite::Tensor(kNumberTypeFloat32, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input->MutableData()), num, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeFloat32, NHWC, schema::PrimitiveType_ExpFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(num, 2.7182798); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum())); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Exp, Exp_Int32) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input_shape[0] * input_shape[1]; + + auto param = new ExpParameter(); + param->base_ = -1; + param->scale_ = 1; + param->shift_ = 0; + auto input = new lite::Tensor(kNumberTypeInt32, input_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input->MallocData(allocator_); + inputs_.push_back(input); + + auto output = new lite::Tensor(kNumberTypeFloat32, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input->MutableData()), num, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt32, NHWC, schema::PrimitiveType_ExpFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(num, 2.7182798); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum())); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Exp, Exp_Cplx64) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input_shape[0] * input_shape[1]; + + auto param = new ExpParameter(); + param->base_ = -1; + param->scale_ = 1; + param->shift_ = 0; + auto input = new lite::Tensor(kNumberTypeComplex64, input_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input->MallocData(allocator_); + inputs_.push_back(input); + + auto output = new lite::Tensor(kNumberTypeComplex64, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input->MutableData()), num * 2, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num * 2, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeComplex64, NHWC, schema::PrimitiveType_ExpFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize2); + for (int i = 0; i < kTestArraySize2; i = i + 2) { + correct[i] = 1.4686939; + correct[i + 1] = 2.2873552; + } + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum() * 2)); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +#ifdef SUPPORT_FT78 +TEST_F(TestDSP_Exp, Exp_Fp64) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input_shape[0] * input_shape[1]; + + auto param = new ExpParameter(); + param->base_ = -1; + param->scale_ = 1; + param->shift_ = 0; + auto input = new lite::Tensor(kNumberTypeFloat64, input_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input->MallocData(allocator_); + inputs_.push_back(input); + + auto output = new lite::Tensor(kNumberTypeFloat64, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input->MutableData()), num, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeFloat64, NHWC, schema::PrimitiveType_ExpFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize, 2.7182798); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum())); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Exp, Exp_Int8) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input_shape[0] * input_shape[1]; + + auto param = new ExpParameter(); + param->base_ = -1; + param->scale_ = 1; + param->shift_ = 0; + auto input = new lite::Tensor(kNumberTypeInt8, input_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input->MallocData(allocator_); + inputs_.push_back(input); + + auto output = new lite::Tensor(kNumberTypeInt8, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input->MutableData()), num, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt8, NHWC, schema::PrimitiveType_ExpFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize, 2); + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum())); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +TEST_F(TestDSP_Exp, Exp_Cplx128) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + std::vector input_shape = {100, 100}; + std::vector output_shape = {100, 100}; + int num = input_shape[0] * input_shape[1]; + + auto param = new ExpParameter(); + param->base_ = -1; + param->scale_ = 1; + param->shift_ = 0; + auto input = new lite::Tensor(kNumberTypeComplex128, input_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + input->MallocData(allocator_); + inputs_.push_back(input); + + auto output = new lite::Tensor(kNumberTypeComplex128, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(input->MutableData()), num * 2, 1); + std::fill_n(reinterpret_cast(output->MutableData()), num * 2, 0); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeComplex128, NHWC, schema::PrimitiveType_ExpFusion}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + std::vector correct(kTestArraySize2); + for (int i = 0; i < kTestArraySize2; i = i + 2) { + correct[i] = 1.4686939; + correct[i + 1] = 2.2873552; + } + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum() * 2)); + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} +#endif + +} // namespace mindspore::lite::dsp::test diff --git a/scripts/build/build_lite.sh b/scripts/build/build_lite.sh index e978b56507559a2da823078719abef8e5579a34a..7e248e1821ad109d1e7855604b2e44760328c7f0 100755 --- a/scripts/build/build_lite.sh +++ b/scripts/build/build_lite.sh @@ -34,6 +34,13 @@ check_Hi35xx() { fi } +check_dsp_sdk() { + if [[ "X${DSP_SDK_PATH}" == "X" ]]; then + echo "error: to compile the runtime package of DSP, you need to set DSP_SDK_PATH to declare the path of DSP sdk." + exit 1 + fi +} + get_version() { VERSION_STR=$(cat ${BASEPATH}/version.txt) } @@ -391,6 +398,7 @@ build_lite() { TOOLCHAIN_NAME=${MSLITE_MICRO_PLATFORM} elif [[ ("${MSLITE_REGISTRY_DEVICE}" == "ft04" || "${MSLITE_REGISTRY_DEVICE}" == "ft78") && "${local_lite_platform}" == "arm32" ]]; then TOOLCHAIN_NAME="cortex-a15" + check_dsp_sdk fi machine=`uname -m`