diff --git a/.jenkins/check/config/whitelizard.txt b/.jenkins/check/config/whitelizard.txt
index b494fdf54878f0b8896325e4be0953a3ce032489..7a2f817a0db6100ca9720893c0817f8523ab223c 100644
--- a/.jenkins/check/config/whitelizard.txt
+++ b/.jenkins/check/config/whitelizard.txt
@@ -27,6 +27,8 @@ mindspore-lite/mindspore-lite/src/litert/kernel/opencl/kernel/fullconnection.cc:
 mindspore-lite/mindspore-lite/src/litert/scheduler.cc:mindspore::lite::Scheduler::FindBackendKernel
 mindspore-lite/mindspore-lite/src/litert/thread_pool.c:GetArch
 mindspore-lite/mindspore-lite/src/train/train_loop.cc:mindspore::lite::TrainLoop::Train
+mindspore-lite/mindspore-lite/src/common/utils.h:mindspore::lite::DataTypeSize
+mindspore-lite/mindspore-lite/src/litert/scheduler.cc:mindspore::lite::Scheduler::FindProviderKernel
 
 # minddata
 mindspore-lite/mindspore-lite/minddata/dataset/engine/datasetops/data_queue_op.cc:mindspore::dataset::DataQueueOp::SendDataToAscend
diff --git a/include/cxx_api/context.h b/include/cxx_api/context.h
index d6b1e723ff98a50c08e4709841f79afd5cf28193..425fc87c66a0f37c1f0f47579b17dab7b10123da 100644
--- a/include/cxx_api/context.h
+++ b/include/cxx_api/context.h
@@ -36,6 +36,7 @@ enum DeviceType {
   kAscend,
   kAscend910,
   kAscend310,
+  kDSP,
   kCustomDevice,
   kAllDevice,
   // add new type here
@@ -598,5 +599,26 @@ void AscendDeviceInfo::SetBufferOptimizeMode(const std::string &buffer_optimize_
   SetBufferOptimizeMode(StringToChar(buffer_optimize_mode));
 }
 std::string AscendDeviceInfo::GetBufferOptimizeMode() const { return CharToString(GetBufferOptimizeModeChar()); }
+
+/// \brief Derived from DeviceInfoContext, The configuration of the model running on the dsp. This option is
+/// invalid for MindSpore Lite.
+class MS_API DSPDeviceInfo : public DeviceInfoContext {
+ public:
+  /// \brief Get the type of this DeviceInfoContext.
+  ///
+  /// \return Type of this DeviceInfoContext.
+  enum DeviceType GetDeviceType() const override { return DeviceType::kDSP; };
+
+  /// \brief Set device id.
+  ///
+  /// \param[in] device_id The device id.
+  void SetDeviceID(uint32_t device_id);
+
+  /// \brief Get the device id.
+  ///
+  /// \return The device id.
+  uint32_t GetDeviceID() const;
+};
+
 }  // namespace mindspore
 #endif  // MINDSPORE_INCLUDE_API_CONTEXT_H
diff --git a/mindspore-lite/CMakeLists.txt b/mindspore-lite/CMakeLists.txt
index c5f73f0b956b7f1b554d651af632493d11739a1b..da76df772ac9ca10331f3e10d0328f03c1ef6e76 100644
--- a/mindspore-lite/CMakeLists.txt
+++ b/mindspore-lite/CMakeLists.txt
@@ -346,9 +346,13 @@ elseif(TOOLCHAIN_NAME STREQUAL "ohos")
 elseif(TOOLCHAIN_NAME STREQUAL "cortex-a15")
     if((${MSLITE_REGISTRY_DEVICE} STREQUAL "ft78"))
         set(TARGET_FT78 on)
+        add_compile_definitions(SUPPORT_FT78)
     elseif((${MSLITE_REGISTRY_DEVICE} STREQUAL "ft04"))
         set(TARGET_FT04 on)
+        add_compile_definitions(SUPPORT_FT04)
     endif()
+    set(MSLITE_ENABLE_DSP ON)
+    add_compile_definitions(ENABLE_DSP)
 endif()
 
 if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.3.0
diff --git a/mindspore-lite/include/lite_types.h b/mindspore-lite/include/lite_types.h
index 017e98a86c87087f48ac2f9e1dbeafe2d9704c49..cd8cc19b36ac66ae553037b87885db731812bc6f 100644
--- a/mindspore-lite/include/lite_types.h
+++ b/mindspore-lite/include/lite_types.h
@@ -41,6 +41,7 @@ typedef enum {
   DT_GPU,    /**< GPU device type */
   DT_NPU,    /**< NPU device type */
   DT_ASCEND, /**< ASCEND device type */
+  DT_DSP,    /**< DSP device type */
   DT_CUSTOM, /**< EXTEND device type */
   DT_END     /**< NO device type */
 } DeviceType;
diff --git a/mindspore-lite/src/CMakeLists.txt b/mindspore-lite/src/CMakeLists.txt
index ec45bcf459e0ebeeed2e2b509cc5b49e97f728c6..8336e6e78d982eed55658efca6c10180ac2fc91f 100644
--- a/mindspore-lite/src/CMakeLists.txt
+++ b/mindspore-lite/src/CMakeLists.txt
@@ -446,6 +446,27 @@ if(MSLITE_GPU_BACKEND STREQUAL opencl)
     target_link_libraries(mindspore-lite_static opencl_kernel_mid)
 endif()
 
+if(${MSLITE_REGISTRY_DEVICE}  STREQUAL "ft04" OR ${MSLITE_REGISTRY_DEVICE}  STREQUAL "ft78")
+    if(NOT DEFINED ENV{DSP_SDK_PATH})
+        message(FATAL_ERROR "DSP SDK path not found. Please set DSP_SDK_PATH environment variable.")
+    endif()
+    message(STATUS "DSP SDK path: $ENV{DSP_SDK_PATH}")
+    set(HTHREAD_LIB_PATH "$ENV{DSP_SDK_PATH}/hthread/lib/${MSLITE_REGISTRY_DEVICE}")
+    if(NOT EXISTS "${HTHREAD_LIB_PATH}")
+        message(FATAL_ERROR "HTHREAD library directory not found at: ${HTHREAD_LIB_PATH}")
+    endif()
+    find_library(HTHREAD_LIB
+                    NAMES libhthread_host.a hthread_host
+                    PATHS "${HTHREAD_LIB_PATH}"
+                    NO_DEFAULT_PATH
+                    REQUIRED)
+    add_subdirectory(litert/kernel/dsp)
+    target_link_libraries(mindspore-lite dsp_kernel_mid)
+    target_link_libraries(mindspore-lite_static dsp_kernel_mid)
+    target_link_libraries(mindspore-lite ${HTHREAD_LIB})
+    target_link_libraries(mindspore-lite_static ${HTHREAD_LIB})
+endif()
+
 if(SUPPORT_NPU)
     add_subdirectory(litert/delegate/npu)
     target_link_libraries(mindspore-lite npu_kernel_mid)
diff --git a/mindspore-lite/src/common/context_util.cc b/mindspore-lite/src/common/context_util.cc
index bc2e40e1326a94c9a179f6dceb77c9e0339ea264..4a2e89abbae72c8a35459b0f233f7b3e7225d0d5 100644
--- a/mindspore-lite/src/common/context_util.cc
+++ b/mindspore-lite/src/common/context_util.cc
@@ -108,6 +108,17 @@ std::shared_ptr<mindspore::AscendDeviceInfo> AscendDeviceInfoFromAscendDeviceCon
   return ascend_info;
 }
 
+std::shared_ptr<mindspore::DSPDeviceInfo> DSPDeviceInfoFromDSPDeviceContext(const lite::DeviceContext &dsp_context) {
+  if (dsp_context.device_type_ != DT_DSP) {
+    MS_LOG(ERROR) << "Function input parameter is not dsp context.";
+    return nullptr;
+  }
+  auto dsp_info = std::make_shared<mindspore::DSPDeviceInfo>();
+  MS_CHECK_TRUE_RET(dsp_info != nullptr, nullptr);
+  PassBasicProperties(dsp_info, dsp_context);
+  return dsp_info;
+}
+
 std::shared_ptr<mindspore::DeviceInfoContext> CustomDeviceInfoFromCustomDeviceContext(
   const lite::DeviceContext &inner_context) {
   if (inner_context.device_type_ != DT_CUSTOM) {
@@ -140,11 +151,10 @@ mindspore::Context *MSContextFromContext(const std::shared_ptr<InnerContext> &co
   }
   auto &device_infos = ms_context->MutableDeviceInfo();
   std::map<DeviceType, std::function<std::shared_ptr<mindspore::DeviceInfoContext>(const lite::DeviceContext &)>>
-    transfer_funcs = {{DT_CPU, CPUDeviceInfoFromCPUDeviceContext},
-                      {DT_GPU, GPUDeviceInfoFromGPUDeviceContext},
-                      {DT_NPU, NPUDeviceInfoFromNPUDeviceContext},
-                      {DT_ASCEND, AscendDeviceInfoFromAscendDeviceContext},
-                      {DT_CUSTOM, CustomDeviceInfoFromCustomDeviceContext}};
+    transfer_funcs = {
+      {DT_CPU, CPUDeviceInfoFromCPUDeviceContext}, {DT_GPU, GPUDeviceInfoFromGPUDeviceContext},
+      {DT_NPU, NPUDeviceInfoFromNPUDeviceContext}, {DT_ASCEND, AscendDeviceInfoFromAscendDeviceContext},
+      {DT_DSP, DSPDeviceInfoFromDSPDeviceContext}, {DT_CUSTOM, CustomDeviceInfoFromCustomDeviceContext}};
   for (auto &device_context : context->device_list_) {
     auto device_type = device_context.device_type_;
     if (transfer_funcs.find(device_type) == transfer_funcs.end()) {
diff --git a/mindspore-lite/src/common/utils.h b/mindspore-lite/src/common/utils.h
index 49dd63482e9057033754bc31a44de5b1dbc88a9f..87b2a11175a843a2afdbe8d3ae06014b6acf4733 100644
--- a/mindspore-lite/src/common/utils.h
+++ b/mindspore-lite/src/common/utils.h
@@ -20,6 +20,7 @@
 #include <ctime>
 #include <cstdint>
 #include <vector>
+#include <complex>
 #include <set>
 #include <limits>
 #include <cmath>
@@ -342,6 +343,10 @@ inline size_t DataTypeSize(TypeId type) {
       return 0;
     case kMetaTypeTypeType:
       return sizeof(int);
+    case kNumberTypeComplex64:
+      return sizeof(std::complex<float>);
+    case kNumberTypeComplex128:
+      return sizeof(std::complex<double>);
     default:
       MS_LOG(ERROR) << "Not support the type: " << type;
       return 0;
diff --git a/mindspore-lite/src/executor/kernel_exec.h b/mindspore-lite/src/executor/kernel_exec.h
index 72a298656fc40bbc174d41d0b20a5fe0b4be511c..c009d63228590132687032f968216ca2afd8c9e3 100644
--- a/mindspore-lite/src/executor/kernel_exec.h
+++ b/mindspore-lite/src/executor/kernel_exec.h
@@ -49,7 +49,18 @@ using mindspore::infer::Abstractkernel;
 using mindspore::lite::KernelCallBack;
 
 namespace mindspore::kernel {
-enum KERNEL_ARCH { kCPU, kGPU, kAPU, kNPU, kCustom, kDelegate, kACL, kKernelArch_MIN = kCPU, kKernelArch_MAX = kAPU };
+enum KERNEL_ARCH {
+  kCPU,
+  kGPU,
+  kDSP,
+  kAPU,
+  kNPU,
+  kCustom,
+  kDelegate,
+  kACL,
+  kKernelArch_MIN = kCPU,
+  kKernelArch_MAX = kAPU
+};
 static const char *const kBuiltin = "Builtin";
 
 struct KernelKey {
@@ -78,7 +89,8 @@ enum SubGraphType {
   kEntranceSubGraph,
   kExitSubGraph,
   kStackSubGraph,
-  kAclSubGraph
+  kAclSubGraph,
+  kDspSubGraph
 };
 
 class KernelExec {
diff --git a/mindspore-lite/src/litert/cxx_api/context.cc b/mindspore-lite/src/litert/cxx_api/context.cc
index 6d844982c5bc677e466b892837806d3646b803bf..6f6cc7f894d29b969417bb8658fbb3526f4fea5e 100644
--- a/mindspore-lite/src/litert/cxx_api/context.cc
+++ b/mindspore-lite/src/litert/cxx_api/context.cc
@@ -683,4 +683,20 @@ std::vector<char> AscendDeviceInfo::GetBufferOptimizeModeChar() const {
   const std::string &ref = GetValue<std::string>(data_, kModelOptionAscendBufferOptimize);
   return StringToChar(ref);
 }
+
+void DSPDeviceInfo::SetDeviceID(uint32_t device_id) {
+  if (data_ == nullptr) {
+    MS_LOG(ERROR) << "Invalid context.";
+    return;
+  }
+  data_->params[kModelOptionDeviceID] = device_id;
+}
+
+uint32_t DSPDeviceInfo::GetDeviceID() const {
+  if (data_ == nullptr) {
+    MS_LOG(ERROR) << "Invalid context.";
+    return 0;
+  }
+  return GetValue<uint32_t>(data_, kModelOptionDeviceID);
+}
 }  // namespace mindspore
diff --git a/mindspore-lite/src/litert/cxx_api/converters.cc b/mindspore-lite/src/litert/cxx_api/converters.cc
index 8daf0242090d51e82e693269de7a7c41bf26add3..01ca00e104141be6e4916875010697066434a043 100644
--- a/mindspore-lite/src/litert/cxx_api/converters.cc
+++ b/mindspore-lite/src/litert/cxx_api/converters.cc
@@ -78,6 +78,15 @@ Status ContextUtils::AddAscendDevice(lite::InnerContext *inner_context, DeviceIn
   return kSuccess;
 }
 
+Status ContextUtils::AddDspDevice(lite::InnerContext *inner_context, DeviceInfoContext *device) {
+  lite::DeviceInfo device_info;
+  auto dsp_context = device->Cast<DSPDeviceInfo>();
+  device_info.dsp_device_info_ = {dsp_context->GetDeviceID()};
+  inner_context->device_list_.push_back(
+    {lite::DT_DSP, device_info, dsp_context->GetProvider(), dsp_context->GetProviderDevice()});
+  return kSuccess;
+}
+
 Status ContextUtils::AddCustomDevice(lite::InnerContext *inner_context,
                                      const std::shared_ptr<DeviceInfoContext> &device) {
   lite::DeviceInfo device_info;
@@ -161,6 +170,8 @@ std::shared_ptr<lite::InnerContext> ContextUtils::Convert(Context *context) {
       ret = AddNpuDevice(npu_context->GetEnableFP16(), npu_context->GetFrequency(), inner_context.get());
     } else if (device->GetDeviceType() == kAscend) {
       ret = AddAscendDevice(inner_context.get(), device.get());
+    } else if (device->GetDeviceType() == kDSP) {
+      ret = AddDspDevice(inner_context.get(), device.get());
     } else if (device->GetDeviceType() == kCustomDevice) {
       ret = AddCustomDevice(inner_context.get(), device);
     }
diff --git a/mindspore-lite/src/litert/cxx_api/converters.h b/mindspore-lite/src/litert/cxx_api/converters.h
index 68ae65f7d0455d95e8422816bc54e16d3dc25f3b..ccae58a16e00b831b1d6232062905c4e565262d7 100644
--- a/mindspore-lite/src/litert/cxx_api/converters.h
+++ b/mindspore-lite/src/litert/cxx_api/converters.h
@@ -45,6 +45,7 @@ class MS_API ContextUtils {
                              lite::InnerContext *inner_context);
   static Status AddNpuDevice(bool enable_fp16, int frequency, lite::InnerContext *inner_context);
   static Status AddAscendDevice(lite::InnerContext *inner_context, DeviceInfoContext *device);
+  static Status AddDspDevice(lite::InnerContext *inner_context, DeviceInfoContext *device);
   static Status AddCustomDevice(lite::InnerContext *inner_context, const std::shared_ptr<DeviceInfoContext> &device);
   static bool IsAffinityModeValid(int affinity_mode) {
     return affinity_mode >= lite::NO_BIND && affinity_mode <= lite::MID_CPU;
diff --git a/mindspore-lite/src/litert/inner_context.h b/mindspore-lite/src/litert/inner_context.h
index e5f02fb4fc1c28e697b67ff97210ed05feb21745..19f61ad84e955845fe9bae72204f79ec6710227e 100644
--- a/mindspore-lite/src/litert/inner_context.h
+++ b/mindspore-lite/src/litert/inner_context.h
@@ -66,6 +66,10 @@ typedef struct AscendDeviceInfo {
   std::string image_size_;
 } AscendDeviceInfo;
 
+typedef struct DspDeviceInfo {
+  uint32_t device_id_ = 0;
+} DspDeviceInfo;
+
 /// \brief CustomDeviceInfo defined for user defined device configuration information.
 typedef struct CustomDeviceInfo {
   std::shared_ptr<DeviceInfoContext> user_defined_device_info_;
@@ -76,6 +80,7 @@ struct DeviceInfo {
   GpuDeviceInfo gpu_device_info_;
   NpuDeviceInfo npu_device_info_;
   AscendDeviceInfo ascend_device_info_;
+  DspDeviceInfo dsp_device_info_;
   CustomDeviceInfo custom_device_info_;
 };
 
diff --git a/mindspore-lite/src/litert/kernel/dsp/CMakeLists.txt b/mindspore-lite/src/litert/kernel/dsp/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa2e9c4e479d40f8d79cf46939c59dd00cff305f
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/CMakeLists.txt
@@ -0,0 +1,15 @@
+file(GLOB_RECURSE COMMON_SRC
+        ${CMAKE_CURRENT_SOURCE_DIR}/dsp_allocator.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/dsp_kernel.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/dsp_runtime_wrapper.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/dsp_runtime.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/dsp_subgraph.cc
+    )
+
+file(GLOB_RECURSE DSP_KERNEL_SRC
+        ${CMAKE_CURRENT_SOURCE_DIR}/${MSLITE_REGISTRY_DEVICE}/*.cc
+    )
+
+add_library(dsp_kernel_mid OBJECT ${DSP_KERNEL_SRC} ${COMMON_SRC})
+add_dependencies(dsp_kernel_mid fbs_src)
+target_include_directories(dsp_kernel_mid PRIVATE $ENV{DSP_SDK_PATH})
diff --git a/mindspore-lite/src/litert/kernel/dsp/dsp_allocator.cc b/mindspore-lite/src/litert/kernel/dsp/dsp_allocator.cc
new file mode 100644
index 0000000000000000000000000000000000000000..568599337f17415c459bdfa9937f8992899b7850
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/dsp_allocator.cc
@@ -0,0 +1,245 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/litert/kernel/dsp/dsp_allocator.h"
+#include <utility>
+#include "include/errorcode.h"
+#include "src/common/log_adapter.h"
+#include "src/litert/kernel/dsp/dsp_runtime.h"
+#include "hthread/include/hthread_host.h"
+
+namespace mindspore::lite::dsp {
+DSPAllocator::DSPAllocator(DSPRuntime *dsp_runtime) : dsp_runtime_(dsp_runtime) {
+  device_id_ = dsp_runtime->GetDeviceID();
+}
+
+DSPAllocator::~DSPAllocator() { Clear(); }
+
+void DSPAllocator::Lock() {
+  if (lock_flag_) {
+    lock.lock();
+  }
+}
+
+void DSPAllocator::UnLock() {
+  if (lock_flag_) {
+    lock.unlock();
+  }
+}
+
+void *DSPAllocator::MinimumFit(MemType mem_type, size_t size) {
+  auto iter = free_list_.lower_bound(size);
+  while (iter != free_list_.end() && (iter->second->size_ >= size) && (iter->second->size_ < (size << shift_factor_))) {
+    auto mem_buf = iter->second;
+    bool is_match = mem_buf->mem_type_ == mem_type;
+    if (is_match) {
+      free_list_.erase(iter);
+      allocated_list_[mem_buf->host_ptr_] = mem_buf;
+      mem_buf->ref_count_ = 0;
+      MS_LOG(DEBUG) << "Find Mem from free list. size: " << mem_buf->size_
+                    << ", type: " << static_cast<int>(mem_buf->mem_type_);
+      return mem_buf->host_ptr_;
+    }
+    ++iter;
+  }
+  return nullptr;
+}
+
+void *DSPAllocator::_Malloc(MemType mem_type, size_t size) {
+  if (size > dsp_runtime_->GetMaxAllocSize()) {
+    MS_LOG(ERROR) << "MallocData out of max_size, size: " << size;
+    return nullptr;
+  }
+  Lock();
+  void *host_ptr = MinimumFit(mem_type, size);
+  UNLOCK_AND_RETURN_NULL(host_ptr != nullptr, host_ptr);
+
+  total_size_ += size;
+
+  MemBuf *mem_buf = new (std::nothrow) MemBuf;
+  if (mem_buf == nullptr) {
+    UnLock();
+    return nullptr;
+  }
+  mem_buf->device_ptr_ = 0;
+  auto ret = HostTlsfMalloc(device_id_, core_id_, static_cast<int>(mem_type), size, &mem_buf->device_ptr_);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "HostTlsfMalloc failed, size: " << size << ", type: " << static_cast<int>(mem_type);
+    delete mem_buf;
+    UnLock();
+    return nullptr;
+  }
+  host_ptr = reinterpret_cast<void *>(GetViraddr(mem_buf->device_ptr_, size));
+  mem_buf->ref_count_ = 0;
+  mem_buf->size_ = size;
+  mem_buf->host_ptr_ = host_ptr;
+  mem_buf->mem_type_ = mem_type;
+  allocated_list_[host_ptr] = mem_buf;
+  UnLock();
+
+  MS_LOG(DEBUG) << "Malloc a new buffer. memory type: " << static_cast<int>(mem_buf->mem_type_)
+                << ", size: " << std::dec << mem_buf->size_;
+  return host_ptr;
+}
+
+void DSPAllocator::Free(void *buf) {
+  if (buf == nullptr) {
+    return;
+  }
+  Lock();
+  auto iter = allocated_list_.find(buf);
+  if (iter != allocated_list_.end()) {
+    auto mem_buf = iter->second;
+    mem_buf->ref_count_ = 0;
+    allocated_list_.erase(iter);
+    free_list_.insert(std::make_pair(mem_buf->size_, mem_buf));
+    UnLock();
+    return;
+  }
+  UnLock();
+  MS_LOG(WARNING) << "Host ptr has freed";
+}
+
+int DSPAllocator::RefCount(void *buf) {
+  if (buf == nullptr) {
+    return -1;
+  }
+  Lock();
+  auto iter = allocated_list_.find(buf);
+  if (iter != allocated_list_.end()) {
+    auto mem_buf = iter->second;
+    int ref_count = std::atomic_load(&mem_buf->ref_count_);
+    UnLock();
+    return ref_count;
+  }
+  UnLock();
+  return -1;
+}
+
+int DSPAllocator::SetRefCount(void *buf, int ref_count) {
+  if (buf == nullptr) {
+    return -1;
+  }
+  Lock();
+  auto iter = allocated_list_.find(buf);
+  if (iter != allocated_list_.end()) {
+    auto mem_buf = iter->second;
+    std::atomic_store(&mem_buf->ref_count_, ref_count);
+    UnLock();
+    return ref_count;
+  }
+  UnLock();
+  return -1;
+}
+
+int DSPAllocator::IncRefCount(void *buf, int ref_count) {
+  if (buf == nullptr) {
+    return -1;
+  }
+  Lock();
+  auto iter = allocated_list_.find(buf);
+  if (iter != allocated_list_.end()) {
+    auto membuf = iter->second;
+    auto ref = std::atomic_fetch_add(&membuf->ref_count_, ref_count);
+    UnLock();
+    return (ref + ref_count);
+  }
+  UnLock();
+  return -1;
+}
+
+int DSPAllocator::DecRefCount(void *buf, int ref_count) {
+  if (buf == nullptr) {
+    return -1;
+  }
+  Lock();
+  auto iter = allocated_list_.find(buf);
+  if (iter != allocated_list_.end()) {
+    auto mem_buf = iter->second;
+    auto ref = std::atomic_fetch_sub(&mem_buf->ref_count_, ref_count);
+    UnLock();
+    return (ref - ref_count);
+  }
+  UnLock();
+  return -1;
+}
+
+size_t DSPAllocator::TotalSize() {
+  Lock();
+  size_t total_size = 0;
+  for (auto it = allocated_list_.begin(); it != allocated_list_.end(); it++) {
+    total_size += it->second->size_;
+  }
+  for (auto it = free_list_.begin(); it != free_list_.end(); it++) {
+    total_size += it->second->size_;
+  }
+  UnLock();
+  return total_size;
+}
+
+uint64_t DSPAllocator::GetDeviceMemPtr(void *buffer) {
+  auto it = allocated_list_.find(buffer);
+  if (it != allocated_list_.end()) {
+    return it->second->device_ptr_;
+  }
+  MS_LOG(ERROR) << "Can not found device ptr!";
+  return 0;
+}
+
+template <typename T>
+void DSPAllocator::ClearMemList(T *list) {
+  for (auto it = list->begin(); it != list->end(); it++) {
+    if (it->second->host_ptr_ != nullptr) {
+      MS_LOG(DEBUG) << "ReleaseViraddr host ptr.";
+      ReleaseViraddr(reinterpret_cast<uint32_t>(it->second->host_ptr_), it->second->device_ptr_, it->second->size_);
+      it->second->host_ptr_ = nullptr;
+    }
+    if (it->second->device_ptr_ != 0) {
+      MS_LOG(DEBUG) << "HostTlsfFree device ptr.";
+      HostTlsfFree(device_id_, core_id_, static_cast<int>(it->second->mem_type_), &it->second->device_ptr_);
+    }
+    delete it->second;
+  }
+  list->clear();
+}
+
+void DSPAllocator::Clear() {
+  Lock();
+  ClearMemList<std::unordered_map<void *, MemBuf *>>(&allocated_list_);
+  ClearMemList<std::multimap<size_t, MemBuf *>>(&free_list_);
+  UnLock();
+}
+
+MemType DSPAllocator::GetMemType(void *host_ptr) {
+  MemType mem_type{MemType::DDR};
+  Lock();
+  auto it = allocated_list_.find(host_ptr);
+  if (it == allocated_list_.end()) {
+    UnLock();
+    MS_LOG(ERROR) << "Can not found buffer!";
+    return mem_type;
+  }
+  MemBuf *mem_buf = it->second;
+  if (mem_buf == nullptr) {
+    UnLock();
+    MS_LOG(ERROR) << "MemBuf is nullptr for host_ptr!";
+    return mem_type;
+  }
+  mem_type = mem_buf->mem_type_;
+  UnLock();
+  return mem_type;
+}
+}  // namespace mindspore::lite::dsp
diff --git a/mindspore-lite/src/litert/kernel/dsp/dsp_allocator.h b/mindspore-lite/src/litert/kernel/dsp/dsp_allocator.h
new file mode 100644
index 0000000000000000000000000000000000000000..3cb2bae2b79f7c67a7729a8fdeb429dc67f19821
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/dsp_allocator.h
@@ -0,0 +1,92 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_ALLOCATOR_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_ALLOCATOR_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+#include <mutex>
+#include <map>
+#include <unordered_map>
+#include <unordered_set>
+#include "src/litert/inner_allocator.h"
+
+namespace mindspore::lite::dsp {
+enum class MemType : char { SMC, DDR, DDR36BIT, L2 };
+#define UNLOCK_AND_RETURN_NULL(condition, ptr) \
+  do {                                         \
+    if (condition) {                           \
+      UnLock();                                \
+      return (ptr);                            \
+    }                                          \
+  } while (0)
+
+class DSPRuntime;
+class DSPAllocator : public mindspore::Allocator {
+ public:
+  explicit DSPAllocator(DSPRuntime *dsp_runtime);
+  ~DSPAllocator() override;
+
+  using Allocator::Malloc;
+  void *Malloc(size_t size, MemType type) { return _Malloc(type, size); }
+  void *Malloc(size_t size) override { return _Malloc(MemType::DDR, size); }
+
+  void Free(void *ptr) override;
+  int RefCount(void *ptr) override;
+  int SetRefCount(void *ptr, int ref_count) override;
+  int DecRefCount(void *ptr, int ref_count) override;
+  int IncRefCount(void *ptr, int ref_count) override;
+  size_t TotalSize();
+
+  void Clear();
+  MemType GetMemType(void *host_ptr);
+  uint64_t GetDeviceMemPtr(void *buffer);
+  void *Prepare(void *ptr) override { return ptr; }
+
+ private:
+  void Lock();
+  void UnLock();
+  void *MinimumFit(MemType mem_type, size_t size);
+  void *_Malloc(MemType mem_type, size_t size = 0);
+  template <typename T>
+  void ClearMemList(T *list);
+
+ private:
+  DSPRuntime *dsp_runtime_{nullptr};
+  int32_t device_id_{0};
+  int32_t core_id_{0};
+  std::mutex lock;
+  struct MemBuf {
+    std::atomic_int ref_count_ = 0;
+    size_t size_{0};
+    uint64_t device_ptr_{0};
+    void *host_ptr_{nullptr};
+    MemType mem_type_{MemType::DDR};
+  };
+
+  // <membuf->buf, membuf>
+  std::unordered_map<void *, MemBuf *> allocated_list_;
+  std::multimap<size_t, MemBuf *> free_list_;
+  uint64_t total_size_{0};
+  // 6 is empirical value
+  int shift_factor_ = 6;
+  bool lock_flag_ = true;
+};
+}  // namespace mindspore::lite::dsp
+
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_ALLOCATOR_H_
diff --git a/mindspore-lite/src/litert/kernel/dsp/dsp_kernel.cc b/mindspore-lite/src/litert/kernel/dsp/dsp_kernel.cc
new file mode 100644
index 0000000000000000000000000000000000000000..d8894645ef066f1c04ac88540534b4a6f4f1c2b8
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/dsp_kernel.cc
@@ -0,0 +1,96 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/litert/infer_manager.h"
+#include "src/litert/kernel/dsp/dsp_kernel.h"
+#include "src/litert/weight_decoder.h"
+#include "src/common/file_utils.h"
+
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+
+namespace mindspore::kernel {
+bool DSPKernel::MallocDataDone() {
+  for (auto &out_tensor : out_tensors_) {
+    if (out_tensor->data() == nullptr) {
+      return false;
+    }
+    auto allocator = out_tensor->allocator();
+    if (allocator == nullptr) {
+      return false;
+    }
+    auto buffer =
+      reinterpret_cast<mindspore::lite::dsp::DSPAllocator *>(allocator.get())->GetDeviceMemPtr(out_tensor->data());
+    if (buffer == 0) {
+      return false;
+    }
+  }
+  return true;
+}
+
+int DSPKernel::PreProcess() {
+  if (MallocDataDone()) {
+    return RET_OK;
+  }
+  int ret = ReSize();
+  if (ret != RET_OK) {
+    return ret;
+  }
+  for (size_t i = 0; i < out_tensors_.size(); ++i) {
+    auto *output = out_tensors_.at(i);
+    CHECK_NULL_RETURN(output);
+    CHECK_NULL_RETURN(output->allocator());
+    ret = output->MallocData();
+    MS_LOG(DEBUG) << "MallocData for output " << i << ", data: " << output->data();
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "MallocData failed";
+      return ret;
+    }
+    output->ResetRefCount();
+  }
+  return RET_OK;
+}
+
+int DSPKernel::InferShape() {
+  if (InferShapeDone()) {
+    return RET_OK;
+  }
+  auto ret = lite::KernelInferShape(in_tensors_, out_tensors_, op_parameter_);
+  if (ret != RET_OK) {
+    MS_LOG(WARNING) << "InferShape failed, type: "
+                    << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(type()));
+    return ret;
+  }
+  return RET_OK;
+}
+
+int DSPKernel::ReSize() {
+  if (InferShapeDone()) {
+    return RET_OK;
+  }
+  auto ret = InferShape();
+  if (ret != RET_OK) {
+    return ret;
+  }
+
+  ret = Prepare();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "ReSize failed for kernel prepare!";
+    return ret;
+  }
+  return RET_OK;
+}
+}  // namespace mindspore::kernel
diff --git a/mindspore-lite/src/litert/kernel/dsp/dsp_kernel.h b/mindspore-lite/src/litert/kernel/dsp/dsp_kernel.h
new file mode 100644
index 0000000000000000000000000000000000000000..08d907877d9393b09fba3fa366652c4ace244baa
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/dsp_kernel.h
@@ -0,0 +1,109 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_KERNEL_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_KERNEL_H_
+
+#include <vector>
+#include <set>
+#include <map>
+#include <memory>
+#include <string>
+#include <cfloat>
+#include "src/litert/lite_kernel.h"
+#include "src/executor/kernel_exec.h"
+#include "include/errorcode.h"
+#include "src/litert/kernel/dsp/dsp_runtime.h"
+#include "src/litert/kernel/dsp/dsp_allocator.h"
+#include "src/litert/tensor_category.h"
+#include "nnacl_c/resize_parameter.h"
+
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+
+namespace mindspore::kernel {
+constexpr int INPUT_TENSOR_SIZE_1 = 1;
+constexpr int INPUT_TENSOR_SIZE_2 = 2;
+constexpr int INPUT_TENSOR_SIZE_3 = 3;
+constexpr int INPUT_TENSOR_SIZE_4 = 4;
+constexpr int INPUT_TENSOR_SIZE_5 = 5;
+constexpr int INPUT_TENSOR_SIZE_6 = 6;
+constexpr int INPUT_TENSOR_SIZE_16 = 16;
+constexpr int OUTPUT_TENSOR_SIZE_1 = 1;
+constexpr int OUTPUT_TENSOR_SIZE_2 = 2;
+constexpr int OUTPUT_TENSOR_SIZE_3 = 3;
+constexpr int OUTPUT_TENSOR_SIZE_4 = 4;
+
+class DSPKernel : public LiteKernel {
+ public:
+  DSPKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+            const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
+      : LiteKernel(parameter, inputs, outputs, ctx) {
+    dsp_runtime_ = dsp_runtime_wrapper_.GetInstance();
+  }
+  ~DSPKernel() override = default;
+
+  int Prepare() override { return RET_OK; }
+  int PreProcess() override;
+  int ReSize() override;
+  int Run() override { return RET_ERROR; }
+
+  bool MallocDataDone();
+  virtual int CheckSpecs() { return RET_OK; }
+  lite::dsp::MemType GetMemType() { return out_mem_type_; }
+  void SetMemType(lite::dsp::MemType mem_type) { out_mem_type_ = mem_type; }
+  void SetKernelArg(const std::vector<uint64_t> &kernel_args = {}) { kernel_args_ = kernel_args; }
+  int InferShape() override;
+
+ protected:
+  lite::dsp::DSPRuntime *dsp_runtime_;
+  std::vector<uint64_t> kernel_args_;
+  lite::dsp::MemType out_mem_type_{lite::dsp::MemType::DDR};
+
+ private:
+  lite::dsp::DSPRuntimeInnerWrapper dsp_runtime_wrapper_;
+};
+
+template <class T>
+kernel::LiteKernel *DSPKernelCreator(const std::vector<lite::Tensor *> &inputs,
+                                     const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
+                                     const lite::InnerContext *ctx, const kernel::KernelKey &desc) {
+  auto *kernel = new (std::nothrow) T(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs, ctx);
+  if (kernel == nullptr) {
+    MS_LOG(WARNING) << "kernel " << opParameter->name_ << "is nullptr.";
+    return nullptr;
+  }
+  auto shape = outputs.front()->shape();
+  if (std::find(shape.begin(), shape.end(), -1) != shape.end()) {
+    MS_LOG(WARNING) << "kernel " << opParameter->name_ << "don't infer shape yet!";
+    return kernel;
+  }
+  if (std::find(shape.begin(), shape.end(), 0) != shape.end()) {
+    MS_LOG(WARNING) << "kernel " << opParameter->name_ << "don't support output shape has zero.";
+    delete kernel;
+    return nullptr;
+  }
+  auto ret = kernel->CheckSpecs();
+  if (ret != mindspore::lite::RET_OK) {
+    MS_LOG(WARNING) << "Check " << opParameter->name_ << " specification failed!";
+    delete kernel;
+    return nullptr;
+  }
+  return kernel;
+}
+}  // namespace mindspore::kernel
+
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_KERNEL_H_
diff --git a/mindspore-lite/src/litert/kernel/dsp/dsp_runtime.cc b/mindspore-lite/src/litert/kernel/dsp/dsp_runtime.cc
new file mode 100644
index 0000000000000000000000000000000000000000..8221a6c8522205ff1b2b559138d5c402c8d6906e
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/dsp_runtime.cc
@@ -0,0 +1,157 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include <numeric>
+#include <string>
+#include <utility>
+#include <vector>
+#include "include/errorcode.h"
+#include "src/common/file_utils.h"
+#include "src/common/log_adapter.h"
+#include "src/litert/kernel/dsp/dsp_runtime.h"
+#include "src/litert/kernel/dsp/dsp_allocator.h"
+#include "hthread/include/hthread_host.h"
+
+namespace mindspore::lite::dsp {
+static std::mutex g_mtx;
+static std::mutex g_init_mtx;
+
+InitState DSPRuntime::init_state_ = UnInit;
+DSPRuntime *DSPRuntime::dsp_runtime_instance_ = nullptr;
+size_t DSPRuntime::instance_count_ = 0;
+
+DSPRuntime *DSPRuntime::GetInstance() {
+  std::unique_lock<std::mutex> lck(g_mtx);
+  static DSPRuntime dsp_runtime;
+  if (instance_count_ == 0) {
+    dsp_runtime_instance_ = &dsp_runtime;
+  }
+  instance_count_++;
+  return dsp_runtime_instance_;
+}
+
+void DSPRuntime::DeleteInstance() {
+  std::unique_lock<std::mutex> lck(g_mtx);
+  if (instance_count_ == 0) {
+    MS_LOG(ERROR) << "No DSPRuntime instance could delete!";
+    return;
+  }
+  instance_count_--;
+  if (instance_count_ == 0) {
+    dsp_runtime_instance_->Uninit();
+  }
+}
+
+// Init will get devices info, load dsp ops library.
+int DSPRuntime::Init() {
+  std::unique_lock<std::mutex> lck(g_init_mtx);
+  if (init_state_ == InitSuccess) {
+    return RET_OK;
+  } else if (init_state_ == InitFailed) {
+    return RET_ERROR;
+  }
+  init_state_ = InitFailed;
+
+  if (IsPrintDebug()) {
+    MT_INFO_LOG = 1;
+  }
+  GetHthreadVersion();
+  auto device_status = DeviceOpen(device_id_);
+  if (device_status < 0) {
+    MS_LOG(ERROR) << "Open DSP Device failed!";
+    return RET_ERROR;
+  }
+  std::string library_path = "/usr/lib/dsp_lib.dat";
+  std::ifstream ifs(library_path);
+  if (!ifs.good()) {
+    MS_LOG(ERROR) << "DSP Lib: " << library_path << " is not exist.";
+    return RET_ERROR;
+  }
+  if (ImportLib(library_path.data()) != RET_OK) {
+    MS_LOG(ERROR) << "Load DSP OPS Library failed!";
+    return RET_ERROR;
+  }
+
+  allocator_ = std::make_shared<DSPAllocator>(this);
+  if (allocator_ == nullptr) {
+    MS_LOG(ERROR) << "DSP allocator failed!";
+    return RET_ERROR;
+  }
+  init_state_ = InitSuccess;
+  MS_LOG(INFO) << "DSPRuntime init done!";
+  return RET_OK;
+}
+
+int DSPRuntime::Uninit() {
+  std::unique_lock<std::mutex> lck(g_init_mtx);
+  if (init_state_ != InitSuccess) {
+    return RET_OK;
+  }
+  auto ret = DeviceClose(device_id_);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Close DSP Device failed!";
+    return RET_ERROR;
+  }
+  allocator_ = nullptr;
+  init_state_ = UnInit;
+  MS_LOG(INFO) << "DSPRuntime uninit done!";
+  return RET_OK;
+}
+
+DSPRuntime::~DSPRuntime() { Uninit(); }
+
+int DSPRuntime::RunKernel(const std::string &kernel_name, const std::vector<uint64_t> &kernel_args,
+                          const int core_mask) {
+  int ret = -1;
+  int thread_id = -1;
+  ret = LaunchGroup(device_id_, core_mask, &thread_id, const_cast<char *>(kernel_name.c_str()), kernel_args.size(),
+                    const_cast<uint64_t *>(kernel_args.data()));
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "LaunchGroup failed! kernel name: " << kernel_name;
+    return ret;
+  }
+  ret = WaitGroup(thread_id);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "WaitGroup failed! kernel name: " << kernel_name;
+    return ret;
+  }
+  ret = DestroyGroup(thread_id);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "DestroyGroup failed! kernel name: " << kernel_name;
+    return ret;
+  }
+  return ret;
+}
+
+uint64_t DSPRuntime::GetMaxAllocSize() { return GetSysMemorySize(); }
+
+int DSPRuntime::CopyDeviceMemToHost(void *dst, const void *src, size_t size) const {
+  auto ret = HostMemCopy(dst, reinterpret_cast<uint64_t>(src), size, 1);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "CopyDeviceMemToHost failed!";
+  }
+  return ret;
+}
+
+int DSPRuntime::CopyHostMemToDevice(void *dst, const void *src, size_t size) const {
+  auto ret = HostMemCopy(const_cast<void *>(src), reinterpret_cast<uint64_t>(dst), size, 0);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "CopyHostMemToDevice failed!";
+  }
+  return ret;
+}
+}  // namespace mindspore::lite::dsp
diff --git a/mindspore-lite/src/litert/kernel/dsp/dsp_runtime.h b/mindspore-lite/src/litert/kernel/dsp/dsp_runtime.h
new file mode 100644
index 0000000000000000000000000000000000000000..db14c2cb20deb2afe6c9fcad58d6879a91359e7b
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/dsp_runtime.h
@@ -0,0 +1,82 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_RUNTIME_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_RUNTIME_H_
+
+#include <vector>
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <utility>
+#include <type_traits>
+#include "ir/dtype/type_id.h"
+#include "src/common/log_adapter.h"
+#include "src/litert/kernel/dsp/dsp_runtime_wrapper.h"
+#include "src/litert/kernel/dsp/dsp_allocator.h"
+
+namespace mindspore::lite::dsp {
+enum InitState { UnInit = 0, InitSuccess = 1, InitFailed = 2 };
+
+class DSPRuntimeInnerWrapper;
+class DSPRuntimeWrapper;
+class DSPRuntime {
+ public:
+  friend DSPRuntimeInnerWrapper;
+  friend DSPRuntimeWrapper;
+  ~DSPRuntime();
+  DSPRuntime(const DSPRuntime &) = delete;
+  DSPRuntime &operator=(const DSPRuntime &) = delete;
+
+  int Init();
+  int Uninit();
+
+  std::shared_ptr<DSPAllocator> GetAllocator() { return allocator_; }
+  uint64_t GetMaxAllocSize();
+  int32_t GetDeviceID() { return device_id_; }
+
+  int RunKernel(const std::string &kernel_name, const std::vector<uint64_t> &kernel_args, const int core_mask);
+
+  int CopyDeviceMemToHost(void *dst, const void *src, size_t size) const;
+  int CopyHostMemToDevice(void *dst, const void *src, size_t size) const;
+
+ private:
+  static DSPRuntime *GetInstance();
+  static void DeleteInstance();
+  DSPRuntime() = default;
+
+ private:
+  static InitState init_state_;
+  static size_t instance_count_;
+  static DSPRuntime *dsp_runtime_instance_;
+  int32_t device_id_{0};
+  std::shared_ptr<DSPAllocator> allocator_{nullptr};
+};
+
+class DSPRuntimeInnerWrapper {
+ public:
+  DSPRuntimeInnerWrapper() { dsp_runtime_ = DSPRuntime::GetInstance(); }
+  ~DSPRuntimeInnerWrapper() { DSPRuntime::DeleteInstance(); }
+  DSPRuntimeInnerWrapper(const DSPRuntimeInnerWrapper &) = delete;
+  DSPRuntimeInnerWrapper &operator=(const DSPRuntimeInnerWrapper &) = delete;
+  DSPRuntime *GetInstance() { return dsp_runtime_; }
+
+ private:
+  DSPRuntime *dsp_runtime_{nullptr};
+};
+}  // namespace mindspore::lite::dsp
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_RUNTIME_H_
diff --git a/mindspore-lite/src/litert/kernel/dsp/dsp_runtime_wrapper.cc b/mindspore-lite/src/litert/kernel/dsp/dsp_runtime_wrapper.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ab0997a27d12b7d5e15fc6721e3c78afac545cf6
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/dsp_runtime_wrapper.cc
@@ -0,0 +1,33 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/litert/kernel/dsp/dsp_runtime_wrapper.h"
+#include <memory>
+#include <numeric>
+#include <utility>
+#include <vector>
+#include "include/errorcode.h"
+#include "src/common/file_utils.h"
+#include "src/litert/kernel/dsp/dsp_allocator.h"
+#include "src/litert/kernel/dsp/dsp_runtime.h"
+
+namespace mindspore::registry::dsp {
+std::shared_ptr<Allocator> DSPRuntimeWrapper::GetAllocator() {
+  lite::dsp::DSPRuntimeInnerWrapper dsp_runtime_wrapper;
+  lite::dsp::DSPRuntime *dsp_runtime = dsp_runtime_wrapper.GetInstance();
+  return dsp_runtime->GetAllocator();
+}
+}  // namespace mindspore::registry::dsp
diff --git a/mindspore-lite/src/litert/kernel/dsp/dsp_runtime_wrapper.h b/mindspore-lite/src/litert/kernel/dsp/dsp_runtime_wrapper.h
new file mode 100644
index 0000000000000000000000000000000000000000..fb5bc80c2f5b7229c1da214b0d0aafd91b399be8
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/dsp_runtime_wrapper.h
@@ -0,0 +1,40 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_RUNTIME_WRAPPER_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_RUNTIME_WRAPPER_H_
+
+#include <vector>
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <utility>
+#include <type_traits>
+#include "include/api/allocator.h"
+#include "include/api/status.h"
+#include "include/api/dual_abi_helper.h"
+
+namespace mindspore::registry::dsp {
+class MS_API DSPRuntimeWrapper {
+ public:
+  DSPRuntimeWrapper() = default;
+  ~DSPRuntimeWrapper() = default;
+
+  std::shared_ptr<Allocator> GetAllocator();
+};
+}  // namespace mindspore::registry::dsp
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_RUNTIME_WRAPPER_H_
diff --git a/mindspore-lite/src/litert/kernel/dsp/dsp_subgraph.cc b/mindspore-lite/src/litert/kernel/dsp/dsp_subgraph.cc
new file mode 100644
index 0000000000000000000000000000000000000000..e5dbc807cff4fbb0341734689f22405146c81385
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/dsp_subgraph.cc
@@ -0,0 +1,131 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/litert/kernel/dsp/dsp_subgraph.h"
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+#include "include/errorcode.h"
+#include "src/common/utils.h"
+
+namespace mindspore::kernel {
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::lite::dsp::MemType;
+
+DspSubGraph::~DspSubGraph() { UnInit(); }
+
+void DspSubGraph::GetInOutNodes() {
+  this->in_nodes_.clear();
+  this->out_nodes_.clear();
+  auto in_tensors = this->in_tensors();
+  auto out_tensors = this->out_tensors();
+  for (auto *node : nodes_) {
+    for (auto *tensor : node->in_tensors()) {
+      if (std::find(in_tensors.begin(), in_tensors.end(), tensor) != in_tensors.end()) {
+        in_nodes_.emplace_back(node);
+        break;
+      }
+    }
+    for (auto *tensor : node->out_tensors()) {
+      if (std::find(out_tensors.begin(), out_tensors.end(), tensor) != out_tensors.end()) {
+        out_nodes_.emplace_back(node);
+        break;
+      }
+    }
+  }
+}
+
+int DspSubGraph::Prepare() {
+  for (const auto tensor : in_tensors()) {
+    MS_ASSERT(tensor);
+    tensor->set_allocator(allocator_);
+  }
+  for (const auto tensor : out_tensors()) {
+    MS_ASSERT(tensor);
+    tensor->set_allocator(allocator_);
+  }
+  for (auto node : this->nodes_) {
+    if (node == nullptr) {
+      MS_LOG(ERROR) << "node in Subgraph is nullptr";
+      return mindspore::lite::RET_NULL_PTR;
+    }
+    for (const auto tensor : node->out_tensors()) {
+      CHECK_NULL_RETURN(tensor);
+      MS_CHECK_TRUE_RET(tensor->data() == nullptr, RET_ERROR);
+      tensor->set_allocator(allocator_);
+    }
+  }
+  return RET_OK;
+}
+
+void DspSubGraph::UnInit() {
+  for (const auto &op : nodes_) {
+    delete op;
+  }
+  nodes_.clear();
+  delete this->executor_;
+}
+
+int DspSubGraph::ReSize() {
+  for (auto kernel : nodes_) {
+    if (kernel == nullptr) {
+      MS_LOG(ERROR) << "input kernel is nullptr!";
+      return RET_ERROR;
+    }
+    if (kernel->subgraph_type() != kernel::kNotSubGraph) {
+      MS_LOG(ERROR) << "all nodes in should be kernel";
+      return RET_ERROR;
+    }
+    std::vector<lite::Tensor *> outputs = kernel->out_tensors();
+    for (auto &output : outputs) {
+      output->FreeData();
+      output->set_shape({-1});
+    }
+  }
+  for (auto kernel : nodes_) {
+    auto ret = kernel->ReSize();
+    if (ret != RET_OK) {
+      MS_LOG(WARNING) << "ReSize " << kernel->name() << "failed!, ret:" << ret;
+      return ret;
+    }
+  }
+  return RET_OK;
+}
+
+int DspSubGraph::Execute(const KernelCallBack &before, const KernelCallBack &after) {
+  MS_ASSERT(allocator_ != nullptr);
+  for (auto &tensor : in_tensors()) {
+    MS_ASSERT(tensor);
+    if (tensor->data() == nullptr) {
+      MS_LOG(ERROR) << "Dsp subgraph input tensor data is null";
+      return RET_ERROR;
+    }
+  }
+  for (auto *kernel : nodes_) {
+    MS_ASSERT(kernel != nullptr);
+    auto ret = kernel->Execute(before, after);
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name();
+      return ret;
+    }
+  }
+  return RET_OK;
+}
+}  // namespace mindspore::kernel
diff --git a/mindspore-lite/src/litert/kernel/dsp/dsp_subgraph.h b/mindspore-lite/src/litert/kernel/dsp/dsp_subgraph.h
new file mode 100644
index 0000000000000000000000000000000000000000..7935c0f7dcf45ecc13b60447f97f665dd82ebc39
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/dsp_subgraph.h
@@ -0,0 +1,62 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_SUBGRAPH_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_SUBGRAPH_H_
+
+#include <memory>
+#include <set>
+#include <vector>
+#include "src/litert/kernel/dsp/dsp_kernel.h"
+#include "src/executor/sub_graph_kernel.h"
+
+namespace mindspore::kernel {
+class DspSubGraph : public SubGraphKernel {
+ public:
+  DspSubGraph(const std::vector<kernel::KernelExec *> &inKernels, const std::vector<kernel::KernelExec *> &outKernels,
+              const std::vector<kernel::KernelExec *> &nodes, MSKernel *kernel)
+      : SubGraphKernel(inKernels, outKernels, nodes, kernel) {
+    dsp_runtime_ = dsp_runtime_wrapper_.GetInstance();
+    allocator_ = dsp_runtime_->GetAllocator();
+    subgraph_type_ = kDspSubGraph;
+    if (nodes.front()->desc().data_type == kNumberTypeFloat16) {
+      desc_.data_type = kNumberTypeFloat16;
+    } else {
+      desc_.data_type = kNumberTypeFloat32;
+    }
+    desc_.arch = kernel::KERNEL_ARCH::kDSP;
+    static std::atomic_int index = 0;
+    this->set_name("DspSubGraph" + std::to_string(index++));
+  }
+  ~DspSubGraph() override;
+
+  int Prepare() override;
+  int ReSize() override;
+  int Execute() override { return Execute(nullptr, nullptr); }
+  int Execute(const KernelCallBack &before, const KernelCallBack &after) override;
+
+ private:
+  void UnInit();
+  void GetInOutNodes();
+
+ private:
+  std::shared_ptr<lite::dsp::DSPAllocator> allocator_{nullptr};
+  lite::dsp::DSPRuntimeInnerWrapper dsp_runtime_wrapper_;
+  lite::dsp::DSPRuntime *dsp_runtime_{nullptr};
+};
+}  // namespace mindspore::kernel
+
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_DSP_SUBGRAPH_H_
diff --git a/mindspore-lite/src/litert/kernel/dsp/ft04/add.cc b/mindspore-lite/src/litert/kernel/dsp/ft04/add.cc
new file mode 100644
index 0000000000000000000000000000000000000000..c32870d0aa1804b58b0c11e9e001781975b6c7df
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/ft04/add.cc
@@ -0,0 +1,112 @@
+
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <map>
+#include <string>
+#include "src/litert/kernel_registry.h"
+#include "src/litert/kernel/dsp/ft04/add.h"
+
+using mindspore::kernel::KERNEL_ARCH::kDSP;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_AddFusion;
+
+namespace mindspore::kernel {
+int AddDSPKernel::CheckSpecs() {
+  if (in_tensors_.size() != INPUT_TENSOR_SIZE_2) {
+    MS_LOG(WARNING) << "in size: " << in_tensors_.size();
+    return RET_ERROR;
+  }
+
+  if (in_tensors_.front()->shape() != in_tensors_.back()->shape()) {
+    MS_LOG(WARNING) << "input shape must be equal";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int AddDSPKernel::Prepare() { return RET_OK; }
+
+int AddDSPKernel::AddRunFp32() {
+  kernel_name_ = "fp_add_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int AddDSPKernel::AddRunFp16() {
+  kernel_name_ = "hp_add_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int AddDSPKernel::AddRunInt16() {
+  kernel_name_ = "i16_add_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int AddDSPKernel::AddRunInt32() {
+  kernel_name_ = "i32_add_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int AddDSPKernel::AddRunComplex64() {
+  kernel_name_ = "c64_add_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int AddDSPKernel::Run() {
+  int ret = -1;
+  MS_LOG(DEBUG) << this->name() << " Running! ";
+  uint64_t length = in_tensors_[0]->ElementsNum();
+  auto allocator = dsp_runtime_->GetAllocator();
+  uint64_t x_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[0]->data());
+  uint64_t y_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[1]->data());
+  uint64_t out_device_ptr = allocator->GetDeviceMemPtr(out_tensors_[0]->data());
+  SetKernelArg({x_device_ptr, y_device_ptr, out_device_ptr, length});
+  auto data_type = in_tensors_[0]->data_type();
+  if (data_type == kNumberTypeFloat32) {
+    ret = AddRunFp32();
+  } else if (data_type == kNumberTypeFloat16) {
+    ret = AddRunFp16();
+  } else if (data_type == kNumberTypeInt16) {
+    ret = AddRunInt16();
+  } else if (data_type == kNumberTypeInt32) {
+    ret = AddRunInt32();
+  } else if (data_type == kNumberTypeComplex64) {
+    ret = AddRunComplex64();
+  } else {
+    MS_LOG(ERROR) << "unsupported data type: " << static_cast<int>(data_type);
+  }
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << this->name() << " Run failed! ";
+    return ret;
+  }
+  MS_LOG(DEBUG) << this->name() << " Run success! ";
+  return RET_OK;
+}
+
+REG_KERNEL(kDSP, kNumberTypeFloat32, PrimitiveType_AddFusion, DSPKernelCreator<AddDSPKernel>)
+REG_KERNEL(kDSP, kNumberTypeInt16, PrimitiveType_AddFusion, DSPKernelCreator<AddDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeInt32, PrimitiveType_AddFusion, DSPKernelCreator<AddDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeComplex64, PrimitiveType_AddFusion, DSPKernelCreator<AddDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeFloat16, PrimitiveType_AddFusion, DSPKernelCreator<AddDSPKernel>);
+}  // namespace mindspore::kernel
diff --git a/mindspore-lite/src/litert/kernel/dsp/ft04/add.h b/mindspore-lite/src/litert/kernel/dsp/ft04/add.h
new file mode 100644
index 0000000000000000000000000000000000000000..7005afd572c7fed443c356082cb68aec56779959
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/ft04/add.h
@@ -0,0 +1,45 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_ADD_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_ADD_H_
+
+#include <vector>
+#include <string>
+#include "src/litert/kernel/dsp/dsp_kernel.h"
+
+namespace mindspore::kernel {
+class AddDSPKernel : public DSPKernel {
+ public:
+  using DSPKernel::DSPKernel;
+
+  ~AddDSPKernel() override = default;
+  int Prepare() override;
+  int CheckSpecs() override;
+  int Run() override;
+
+  int AddRunFp32();
+  int AddRunFp16();
+  int AddRunInt16();
+  int AddRunInt32();
+  int AddRunComplex64();
+
+ private:
+  std::string kernel_name_;
+  uint64_t core_mask_;
+};
+}  // namespace mindspore::kernel
+#endif
diff --git a/mindspore-lite/src/litert/kernel/dsp/ft04/div.cc b/mindspore-lite/src/litert/kernel/dsp/ft04/div.cc
new file mode 100644
index 0000000000000000000000000000000000000000..1e4bccfe326542d4b549932325eabe087c3b99f4
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/ft04/div.cc
@@ -0,0 +1,112 @@
+
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <map>
+#include <string>
+#include "src/litert/kernel_registry.h"
+#include "src/litert/kernel/dsp/ft04/div.h"
+
+using mindspore::kernel::KERNEL_ARCH::kDSP;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_DivFusion;
+
+namespace mindspore::kernel {
+int DivDSPKernel::CheckSpecs() {
+  if (in_tensors_.size() != INPUT_TENSOR_SIZE_2) {
+    MS_LOG(WARNING) << "in size: " << in_tensors_.size();
+    return RET_ERROR;
+  }
+
+  if (in_tensors_.front()->shape() != in_tensors_.back()->shape()) {
+    MS_LOG(WARNING) << "input shape must be equal";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int DivDSPKernel::Prepare() { return RET_OK; }
+
+int DivDSPKernel::DivRunFp32() {
+  kernel_name_ = "fp_div_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int DivDSPKernel::DivRunFp16() {
+  kernel_name_ = "hp_div_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int DivDSPKernel::DivRunInt16() {
+  kernel_name_ = "i16_div_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int DivDSPKernel::DivRunInt32() {
+  kernel_name_ = "i32_div_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int DivDSPKernel::DivRunComplex64() {
+  kernel_name_ = "c64_div_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int DivDSPKernel::Run() {
+  int ret = -1;
+  MS_LOG(DEBUG) << this->name() << " Running! ";
+  uint64_t length = in_tensors_[0]->ElementsNum();
+  auto allocator = dsp_runtime_->GetAllocator();
+  uint64_t x_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[0]->data());
+  uint64_t y_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[1]->data());
+  uint64_t out_device_ptr = allocator->GetDeviceMemPtr(out_tensors_[0]->data());
+  SetKernelArg({x_device_ptr, y_device_ptr, out_device_ptr, length});
+  auto data_type = in_tensors_[0]->data_type();
+  if (data_type == kNumberTypeFloat32) {
+    ret = DivRunFp32();
+  } else if (data_type == kNumberTypeFloat16) {
+    ret = DivRunFp16();
+  } else if (data_type == kNumberTypeInt16) {
+    ret = DivRunInt16();
+  } else if (data_type == kNumberTypeInt32) {
+    ret = DivRunInt32();
+  } else if (data_type == kNumberTypeComplex64) {
+    ret = DivRunComplex64();
+  } else {
+    MS_LOG(ERROR) << "unsupported data type: " << static_cast<int>(data_type);
+  }
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << this->name() << " Run failed! ";
+    return ret;
+  }
+  MS_LOG(DEBUG) << this->name() << " Run success! ";
+  return RET_OK;
+}
+
+REG_KERNEL(kDSP, kNumberTypeFloat32, PrimitiveType_DivFusion, DSPKernelCreator<DivDSPKernel>)
+REG_KERNEL(kDSP, kNumberTypeInt16, PrimitiveType_DivFusion, DSPKernelCreator<DivDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeInt32, PrimitiveType_DivFusion, DSPKernelCreator<DivDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeComplex64, PrimitiveType_DivFusion, DSPKernelCreator<DivDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeFloat16, PrimitiveType_DivFusion, DSPKernelCreator<DivDSPKernel>);
+}  // namespace mindspore::kernel
diff --git a/mindspore-lite/src/litert/kernel/dsp/ft04/div.h b/mindspore-lite/src/litert/kernel/dsp/ft04/div.h
new file mode 100644
index 0000000000000000000000000000000000000000..1b6d1487d46bda2a8940bf6a9ac5e33dc8f3ff8b
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/ft04/div.h
@@ -0,0 +1,45 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_DIV_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_DIV_H_
+
+#include <vector>
+#include <string>
+#include "src/litert/kernel/dsp/dsp_kernel.h"
+
+namespace mindspore::kernel {
+class DivDSPKernel : public DSPKernel {
+ public:
+  using DSPKernel::DSPKernel;
+
+  ~DivDSPKernel() override = default;
+  int Prepare() override;
+  int CheckSpecs() override;
+  int Run() override;
+
+  int DivRunFp32();
+  int DivRunFp16();
+  int DivRunInt16();
+  int DivRunInt32();
+  int DivRunComplex64();
+
+ private:
+  std::string kernel_name_;
+  uint64_t core_mask_;
+};
+}  // namespace mindspore::kernel
+#endif
diff --git a/mindspore-lite/src/litert/kernel/dsp/ft04/exp.cc b/mindspore-lite/src/litert/kernel/dsp/ft04/exp.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ed72a21a97a622d91cf180adcd116453c1723405
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/ft04/exp.cc
@@ -0,0 +1,126 @@
+
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/litert/kernel/dsp/ft04/exp.h"
+#include <math.h>
+#include <algorithm>
+#include <map>
+#include <string>
+#include "src/litert/kernel/cpu/nnacl_c/exp_parameter.h"
+#include "src/litert/kernel_registry.h"
+
+using mindspore::kernel::KERNEL_ARCH::kDSP;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_ExpFusion;
+
+namespace mindspore::kernel {
+int ExpDSPKernel::CheckSpecs() {
+  if (in_tensors_.size() != INPUT_TENSOR_SIZE_1) {
+    MS_LOG(WARNING) << "in size: " << in_tensors_.size();
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int ExpDSPKernel::Prepare() {
+  auto exp_param = reinterpret_cast<const ExpParameter *>(this->op_parameter_);
+  scale_ = static_cast<int>(exp_param->scale_);
+  float log_base = (exp_param->base_ == -1) ? 1 : logf(exp_param->base_);
+  in_scale_ = exp_param->scale_ * log_base;
+  if (exp_param->shift_ == 0) {
+    out_scale_ = 1;
+  } else {
+    if (log_base == 1) {
+      out_scale_ = expf(exp_param->shift_);
+    } else {
+      out_scale_ = powf(exp_param->base_, exp_param->shift_);
+    }
+  }
+  return RET_OK;
+}
+
+int ExpDSPKernel::ExpRunFp32() {
+  kernel_name_ = "fp_exp_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int ExpDSPKernel::ExpRunFp16() {
+  kernel_name_ = "hp_exp_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int ExpDSPKernel::ExpRunInt16() {
+  kernel_name_ = "i16_exp_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int ExpDSPKernel::ExpRunInt32() {
+  kernel_name_ = "i32_exp_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int ExpDSPKernel::ExpRunComplex64() {
+  kernel_name_ = "c64_exp_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int ExpDSPKernel::Run() {
+  int ret = -1;
+  MS_LOG(DEBUG) << this->name() << " Running! ";
+  uint64_t length = in_tensors_[0]->ElementsNum();
+  auto allocator = dsp_runtime_->GetAllocator();
+  uint64_t x_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[0]->data());
+  uint64_t out_device_ptr = allocator->GetDeviceMemPtr(out_tensors_[0]->data());
+  uint64_t in_scale_hex = 0, out_scale_hex = 0;
+  memcpy(&in_scale_hex, &in_scale_, sizeof(float));
+  memcpy(&out_scale_hex, &out_scale_, sizeof(float));
+  SetKernelArg({x_device_ptr, out_device_ptr, length, in_scale_hex, out_scale_hex, static_cast<uint64_t>(scale_)});
+  auto data_type = in_tensors_[0]->data_type();
+  if (data_type == kNumberTypeFloat32) {
+    ret = ExpRunFp32();
+  } else if (data_type == kNumberTypeFloat16) {
+    ret = ExpRunFp16();
+  } else if (data_type == kNumberTypeInt16) {
+    ret = ExpRunInt16();
+  } else if (data_type == kNumberTypeInt32) {
+    ret = ExpRunInt32();
+  } else if (data_type == kNumberTypeComplex64) {
+    ret = ExpRunComplex64();
+  } else {
+    MS_LOG(ERROR) << "unsupported data type: " << static_cast<int>(data_type);
+  }
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << this->name() << " Run failed! ";
+    return ret;
+  }
+  MS_LOG(DEBUG) << this->name() << " Run success! ";
+  return RET_OK;
+}
+
+REG_KERNEL(kDSP, kNumberTypeFloat32, PrimitiveType_ExpFusion, DSPKernelCreator<ExpDSPKernel>)
+REG_KERNEL(kDSP, kNumberTypeInt16, PrimitiveType_ExpFusion, DSPKernelCreator<ExpDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeInt32, PrimitiveType_ExpFusion, DSPKernelCreator<ExpDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeComplex64, PrimitiveType_ExpFusion, DSPKernelCreator<ExpDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeFloat16, PrimitiveType_ExpFusion, DSPKernelCreator<ExpDSPKernel>);
+}  // namespace mindspore::kernel
diff --git a/mindspore-lite/src/litert/kernel/dsp/ft04/exp.h b/mindspore-lite/src/litert/kernel/dsp/ft04/exp.h
new file mode 100644
index 0000000000000000000000000000000000000000..6cb7e5ec39e91ef6920ae3abcc09e3c578e186ae
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/ft04/exp.h
@@ -0,0 +1,48 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_EXP_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_EXP_H_
+
+#include <vector>
+#include <string>
+#include "src/litert/kernel/dsp/dsp_kernel.h"
+
+namespace mindspore::kernel {
+class ExpDSPKernel : public DSPKernel {
+ public:
+  using DSPKernel::DSPKernel;
+
+  ~ExpDSPKernel() override = default;
+  int Prepare() override;
+  int CheckSpecs() override;
+  int Run() override;
+
+  int ExpRunFp32();
+  int ExpRunFp16();
+  int ExpRunInt16();
+  int ExpRunInt32();
+  int ExpRunComplex64();
+
+ private:
+  std::string kernel_name_;
+  uint64_t core_mask_;
+  float in_scale_;
+  float out_scale_;
+  int scale_;
+};
+}  // namespace mindspore::kernel
+#endif
diff --git a/mindspore-lite/src/litert/kernel/dsp/ft04/mul.cc b/mindspore-lite/src/litert/kernel/dsp/ft04/mul.cc
new file mode 100644
index 0000000000000000000000000000000000000000..c6f754dac6c3ca6a54aa737692459d815e9e8c8f
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/ft04/mul.cc
@@ -0,0 +1,112 @@
+
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <map>
+#include <string>
+#include "src/litert/kernel_registry.h"
+#include "src/litert/kernel/dsp/ft04/mul.h"
+
+using mindspore::kernel::KERNEL_ARCH::kDSP;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_MulFusion;
+
+namespace mindspore::kernel {
+int MulDSPKernel::CheckSpecs() {
+  if (in_tensors_.size() != INPUT_TENSOR_SIZE_2) {
+    MS_LOG(WARNING) << "in size: " << in_tensors_.size();
+    return RET_ERROR;
+  }
+
+  if (in_tensors_.front()->shape() != in_tensors_.back()->shape()) {
+    MS_LOG(WARNING) << "input shape must be equal";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int MulDSPKernel::Prepare() { return RET_OK; }
+
+int MulDSPKernel::MulRunFp32() {
+  kernel_name_ = "fp_mul_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int MulDSPKernel::MulRunFp16() {
+  kernel_name_ = "hp_mul_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int MulDSPKernel::MulRunInt16() {
+  kernel_name_ = "i16_mul_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int MulDSPKernel::MulRunInt32() {
+  kernel_name_ = "i32_mul_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int MulDSPKernel::MulRunComplex64() {
+  kernel_name_ = "c64_mul_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int MulDSPKernel::Run() {
+  int ret = -1;
+  MS_LOG(DEBUG) << this->name() << " Running! ";
+  uint64_t length = in_tensors_[0]->ElementsNum();
+  auto allocator = dsp_runtime_->GetAllocator();
+  uint64_t x_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[0]->data());
+  uint64_t y_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[1]->data());
+  uint64_t out_device_ptr = allocator->GetDeviceMemPtr(out_tensors_[0]->data());
+  SetKernelArg({x_device_ptr, y_device_ptr, out_device_ptr, length});
+  auto data_type = in_tensors_[0]->data_type();
+  if (data_type == kNumberTypeFloat32) {
+    ret = MulRunFp32();
+  } else if (data_type == kNumberTypeFloat16) {
+    ret = MulRunFp16();
+  } else if (data_type == kNumberTypeInt16) {
+    ret = MulRunInt16();
+  } else if (data_type == kNumberTypeInt32) {
+    ret = MulRunInt32();
+  } else if (data_type == kNumberTypeComplex64) {
+    ret = MulRunComplex64();
+  } else {
+    MS_LOG(ERROR) << "unsupported data type: " << static_cast<int>(data_type);
+  }
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << this->name() << " Run failed! ";
+    return ret;
+  }
+  MS_LOG(DEBUG) << this->name() << " Run success! ";
+  return RET_OK;
+}
+
+REG_KERNEL(kDSP, kNumberTypeFloat32, PrimitiveType_MulFusion, DSPKernelCreator<MulDSPKernel>)
+REG_KERNEL(kDSP, kNumberTypeInt16, PrimitiveType_MulFusion, DSPKernelCreator<MulDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeInt32, PrimitiveType_MulFusion, DSPKernelCreator<MulDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeComplex64, PrimitiveType_MulFusion, DSPKernelCreator<MulDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeFloat16, PrimitiveType_MulFusion, DSPKernelCreator<MulDSPKernel>);
+}  // namespace mindspore::kernel
diff --git a/mindspore-lite/src/litert/kernel/dsp/ft04/mul.h b/mindspore-lite/src/litert/kernel/dsp/ft04/mul.h
new file mode 100644
index 0000000000000000000000000000000000000000..ac1d33b50351fe4904901a8247db47935a784956
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/ft04/mul.h
@@ -0,0 +1,45 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_MUL_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_MUL_H_
+
+#include <vector>
+#include <string>
+#include "src/litert/kernel/dsp/dsp_kernel.h"
+
+namespace mindspore::kernel {
+class MulDSPKernel : public DSPKernel {
+ public:
+  using DSPKernel::DSPKernel;
+
+  ~MulDSPKernel() override = default;
+  int Prepare() override;
+  int CheckSpecs() override;
+  int Run() override;
+
+  int MulRunFp32();
+  int MulRunFp16();
+  int MulRunInt16();
+  int MulRunInt32();
+  int MulRunComplex64();
+
+ private:
+  std::string kernel_name_;
+  uint64_t core_mask_;
+};
+}  // namespace mindspore::kernel
+#endif
diff --git a/mindspore-lite/src/litert/kernel/dsp/ft04/sub.cc b/mindspore-lite/src/litert/kernel/dsp/ft04/sub.cc
new file mode 100644
index 0000000000000000000000000000000000000000..8cdcc4045b6ececf501e300d90b9cf035a421280
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/ft04/sub.cc
@@ -0,0 +1,112 @@
+
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <map>
+#include <string>
+#include "src/litert/kernel_registry.h"
+#include "src/litert/kernel/dsp/ft04/sub.h"
+
+using mindspore::kernel::KERNEL_ARCH::kDSP;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_SubFusion;
+
+namespace mindspore::kernel {
+int SubDSPKernel::CheckSpecs() {
+  if (in_tensors_.size() != INPUT_TENSOR_SIZE_2) {
+    MS_LOG(WARNING) << "in size: " << in_tensors_.size();
+    return RET_ERROR;
+  }
+
+  if (in_tensors_.front()->shape() != in_tensors_.back()->shape()) {
+    MS_LOG(WARNING) << "input shape must be equal";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int SubDSPKernel::Prepare() { return RET_OK; }
+
+int SubDSPKernel::SubRunFp32() {
+  kernel_name_ = "fp_sub_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int SubDSPKernel::SubRunFp16() {
+  kernel_name_ = "hp_sub_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int SubDSPKernel::SubRunInt16() {
+  kernel_name_ = "i16_sub_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int SubDSPKernel::SubRunInt32() {
+  kernel_name_ = "i32_sub_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int SubDSPKernel::SubRunComplex64() {
+  kernel_name_ = "c64_sub_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int SubDSPKernel::Run() {
+  int ret = -1;
+  MS_LOG(DEBUG) << this->name() << " Running! ";
+  uint64_t length = in_tensors_[0]->ElementsNum();
+  auto allocator = dsp_runtime_->GetAllocator();
+  uint64_t x_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[0]->data());
+  uint64_t y_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[1]->data());
+  uint64_t out_device_ptr = allocator->GetDeviceMemPtr(out_tensors_[0]->data());
+  SetKernelArg({x_device_ptr, y_device_ptr, out_device_ptr, length});
+  auto data_type = in_tensors_[0]->data_type();
+  if (data_type == kNumberTypeFloat32) {
+    ret = SubRunFp32();
+  } else if (data_type == kNumberTypeFloat16) {
+    ret = SubRunFp16();
+  } else if (data_type == kNumberTypeInt16) {
+    ret = SubRunInt16();
+  } else if (data_type == kNumberTypeInt32) {
+    ret = SubRunInt32();
+  } else if (data_type == kNumberTypeComplex64) {
+    ret = SubRunComplex64();
+  } else {
+    MS_LOG(ERROR) << "unsupported data type: " << static_cast<int>(data_type);
+  }
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << this->name() << " Run failed! ";
+    return ret;
+  }
+  MS_LOG(DEBUG) << this->name() << " Run success! ";
+  return RET_OK;
+}
+
+REG_KERNEL(kDSP, kNumberTypeFloat32, PrimitiveType_SubFusion, DSPKernelCreator<SubDSPKernel>)
+REG_KERNEL(kDSP, kNumberTypeInt16, PrimitiveType_SubFusion, DSPKernelCreator<SubDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeInt32, PrimitiveType_SubFusion, DSPKernelCreator<SubDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeComplex64, PrimitiveType_SubFusion, DSPKernelCreator<SubDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeFloat16, PrimitiveType_SubFusion, DSPKernelCreator<SubDSPKernel>);
+}  // namespace mindspore::kernel
diff --git a/mindspore-lite/src/litert/kernel/dsp/ft04/sub.h b/mindspore-lite/src/litert/kernel/dsp/ft04/sub.h
new file mode 100644
index 0000000000000000000000000000000000000000..39a9be9e8a9345e8c1e08f2c28346a710ae2ab9c
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/ft04/sub.h
@@ -0,0 +1,45 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_SUB_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_SUB_H_
+
+#include <vector>
+#include <string>
+#include "src/litert/kernel/dsp/dsp_kernel.h"
+
+namespace mindspore::kernel {
+class SubDSPKernel : public DSPKernel {
+ public:
+  using DSPKernel::DSPKernel;
+
+  ~SubDSPKernel() override = default;
+  int Prepare() override;
+  int CheckSpecs() override;
+  int Run() override;
+
+  int SubRunFp32();
+  int SubRunFp16();
+  int SubRunInt16();
+  int SubRunInt32();
+  int SubRunComplex64();
+
+ private:
+  std::string kernel_name_;
+  uint64_t core_mask_;
+};
+}  // namespace mindspore::kernel
+#endif
diff --git a/mindspore-lite/src/litert/kernel/dsp/ft78/add.cc b/mindspore-lite/src/litert/kernel/dsp/ft78/add.cc
new file mode 100644
index 0000000000000000000000000000000000000000..d38ad27df2a16c995e1ad058d97f2219f6ceace3
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/ft78/add.cc
@@ -0,0 +1,130 @@
+
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <map>
+#include <string>
+#include "src/litert/kernel_registry.h"
+#include "src/litert/kernel/dsp/ft78/add.h"
+
+using mindspore::kernel::KERNEL_ARCH::kDSP;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_AddFusion;
+
+namespace mindspore::kernel {
+int AddDSPKernel::CheckSpecs() {
+  if (in_tensors_.size() != INPUT_TENSOR_SIZE_2) {
+    MS_LOG(WARNING) << "in size: " << in_tensors_.size();
+    return RET_ERROR;
+  }
+
+  if (in_tensors_.front()->shape() != in_tensors_.back()->shape()) {
+    MS_LOG(WARNING) << "input shape must be equal";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int AddDSPKernel::Prepare() { return RET_OK; }
+
+int AddDSPKernel::AddRunFp32() {
+  kernel_name_ = "fp_add_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int AddDSPKernel::AddRunFp64() {
+  kernel_name_ = "dp_add_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int AddDSPKernel::AddRunInt8() {
+  kernel_name_ = "i8_add_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int AddDSPKernel::AddRunInt16() {
+  kernel_name_ = "i16_add_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int AddDSPKernel::AddRunInt32() {
+  kernel_name_ = "i32_add_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int AddDSPKernel::AddRunComplex64() {
+  kernel_name_ = "c64_add_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int AddDSPKernel::AddRunComplex128() {
+  kernel_name_ = "c128_add_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int AddDSPKernel::Run() {
+  int ret = -1;
+  MS_LOG(DEBUG) << this->name() << " Running! ";
+  uint64_t length = in_tensors_[0]->ElementsNum();
+  auto allocator = dsp_runtime_->GetAllocator();
+  uint64_t x_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[0]->data());
+  uint64_t y_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[1]->data());
+  uint64_t out_device_ptr = allocator->GetDeviceMemPtr(out_tensors_[0]->data());
+  SetKernelArg({x_device_ptr, y_device_ptr, out_device_ptr, length});
+  auto data_type = in_tensors_[0]->data_type();
+  if (data_type == kNumberTypeFloat32) {
+    ret = AddRunFp32();
+  } else if (data_type == kNumberTypeFloat64) {
+    ret = AddRunFp64();
+  } else if (data_type == kNumberTypeInt8) {
+    ret = AddRunInt8();
+  } else if (data_type == kNumberTypeInt16) {
+    ret = AddRunInt16();
+  } else if (data_type == kNumberTypeInt32) {
+    ret = AddRunInt32();
+  } else if (data_type == kNumberTypeComplex64) {
+    ret = AddRunComplex64();
+  } else if (data_type == kNumberTypeComplex128) {
+    ret = AddRunComplex128();
+  } else {
+    MS_LOG(ERROR) << "unsupported data type: " << static_cast<int>(data_type);
+  }
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << this->name() << " Run failed! ";
+    return ret;
+  }
+  MS_LOG(DEBUG) << this->name() << " Run success! ";
+  return RET_OK;
+}
+
+REG_KERNEL(kDSP, kNumberTypeFloat32, PrimitiveType_AddFusion, DSPKernelCreator<AddDSPKernel>)
+REG_KERNEL(kDSP, kNumberTypeFloat64, PrimitiveType_AddFusion, DSPKernelCreator<AddDSPKernel>)
+REG_KERNEL(kDSP, kNumberTypeInt8, PrimitiveType_AddFusion, DSPKernelCreator<AddDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeInt16, PrimitiveType_AddFusion, DSPKernelCreator<AddDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeInt32, PrimitiveType_AddFusion, DSPKernelCreator<AddDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeComplex64, PrimitiveType_AddFusion, DSPKernelCreator<AddDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeComplex128, PrimitiveType_AddFusion, DSPKernelCreator<AddDSPKernel>);
+}  // namespace mindspore::kernel
diff --git a/mindspore-lite/src/litert/kernel/dsp/ft78/add.h b/mindspore-lite/src/litert/kernel/dsp/ft78/add.h
new file mode 100644
index 0000000000000000000000000000000000000000..73f4fffe4b92e35f67b3e4b7edbb56311505013f
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/ft78/add.h
@@ -0,0 +1,47 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_ADD_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_ADD_H_
+
+#include <vector>
+#include <string>
+#include "src/litert/kernel/dsp/dsp_kernel.h"
+
+namespace mindspore::kernel {
+class AddDSPKernel : public DSPKernel {
+ public:
+  using DSPKernel::DSPKernel;
+
+  ~AddDSPKernel() override = default;
+  int Prepare() override;
+  int CheckSpecs() override;
+  int Run() override;
+
+  int AddRunFp32();
+  int AddRunFp64();
+  int AddRunInt8();
+  int AddRunInt16();
+  int AddRunInt32();
+  int AddRunComplex64();
+  int AddRunComplex128();
+
+ private:
+  std::string kernel_name_;
+  uint64_t core_mask_;
+};
+}  // namespace mindspore::kernel
+#endif
diff --git a/mindspore-lite/src/litert/kernel/dsp/ft78/div.cc b/mindspore-lite/src/litert/kernel/dsp/ft78/div.cc
new file mode 100644
index 0000000000000000000000000000000000000000..a4fe1a80abbfbcafe7338f7ccde60150d3c3b6be
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/ft78/div.cc
@@ -0,0 +1,142 @@
+
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <map>
+#include <string>
+#include "src/litert/kernel_registry.h"
+#include "src/litert/kernel/dsp/ft78/div.h"
+
+using mindspore::kernel::KERNEL_ARCH::kDSP;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_DivFusion;
+
+namespace mindspore::kernel {
+int DivDSPKernel::CheckSpecs() {
+  if (in_tensors_.empty() || in_tensors_.size() != INPUT_TENSOR_SIZE_2) {
+    MS_LOG(WARNING) << "Invalid input tensors size: " << in_tensors_.size();
+    return RET_ERROR;
+  }
+  auto x_tensor = in_tensors_.front();
+  auto y_tensor = in_tensors_.back();
+  if (x_tensor->shape() != y_tensor->shape() && x_tensor->ElementsNum() != 1 && y_tensor->ElementsNum() != 1) {
+    MS_LOG(WARNING) << "Input shapes must be equal or one must be scalar";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int DivDSPKernel::Prepare() {
+  if (in_tensors_.empty() || in_tensors_.size() != INPUT_TENSOR_SIZE_2) {
+    return RET_ERROR;
+  }
+  auto x_num = in_tensors_[0]->ElementsNum();
+  auto y_num = in_tensors_[1]->ElementsNum();
+  if (x_num == 1 || y_num == 1) {
+    optimize_ = true;
+    first_scalar_ = (x_num == 1);
+  }
+  return RET_OK;
+}
+
+int DivDSPKernel::DivRunFp32() {
+  kernel_name_ = "fp_div_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int DivDSPKernel::DivRunFp64() {
+  kernel_name_ = "dp_div_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int DivDSPKernel::DivRunInt8() {
+  kernel_name_ = "i8_div_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int DivDSPKernel::DivRunInt16() {
+  kernel_name_ = "i16_div_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int DivDSPKernel::DivRunInt32() {
+  kernel_name_ = "i32_div_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int DivDSPKernel::DivRunComplex64() {
+  kernel_name_ = "c64_div_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int DivDSPKernel::DivRunComplex128() {
+  kernel_name_ = "c128_div_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int DivDSPKernel::Run() {
+  int ret = -1;
+  MS_LOG(DEBUG) << this->name() << " Running! ";
+  uint64_t length = in_tensors_[0]->ElementsNum();
+  auto allocator = dsp_runtime_->GetAllocator();
+  uint64_t x_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[0]->data());
+  uint64_t y_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[1]->data());
+  uint64_t out_device_ptr = allocator->GetDeviceMemPtr(out_tensors_[0]->data());
+  SetKernelArg({x_device_ptr, y_device_ptr, out_device_ptr, length, optimize_, first_scalar_});
+  auto data_type = in_tensors_[0]->data_type();
+  if (data_type == kNumberTypeFloat32) {
+    ret = DivRunFp32();
+  } else if (data_type == kNumberTypeFloat64) {
+    ret = DivRunFp64();
+  } else if (data_type == kNumberTypeInt8) {
+    ret = DivRunInt8();
+  } else if (data_type == kNumberTypeInt16) {
+    ret = DivRunInt16();
+  } else if (data_type == kNumberTypeInt32) {
+    ret = DivRunInt32();
+  } else if (data_type == kNumberTypeComplex64) {
+    ret = DivRunComplex64();
+  } else if (data_type == kNumberTypeComplex128) {
+    ret = DivRunComplex128();
+  } else {
+    MS_LOG(ERROR) << "unsupported data type: " << static_cast<int>(data_type);
+  }
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << this->name() << " Run failed! ";
+    return ret;
+  }
+  MS_LOG(DEBUG) << this->name() << " Run success! ";
+  return RET_OK;
+}
+
+REG_KERNEL(kDSP, kNumberTypeFloat32, PrimitiveType_DivFusion, DSPKernelCreator<DivDSPKernel>)
+REG_KERNEL(kDSP, kNumberTypeFloat64, PrimitiveType_DivFusion, DSPKernelCreator<DivDSPKernel>)
+REG_KERNEL(kDSP, kNumberTypeInt8, PrimitiveType_DivFusion, DSPKernelCreator<DivDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeInt16, PrimitiveType_DivFusion, DSPKernelCreator<DivDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeInt32, PrimitiveType_DivFusion, DSPKernelCreator<DivDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeComplex64, PrimitiveType_DivFusion, DSPKernelCreator<DivDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeComplex128, PrimitiveType_DivFusion, DSPKernelCreator<DivDSPKernel>);
+}  // namespace mindspore::kernel
diff --git a/mindspore-lite/src/litert/kernel/dsp/ft78/div.h b/mindspore-lite/src/litert/kernel/dsp/ft78/div.h
new file mode 100644
index 0000000000000000000000000000000000000000..83271f17713329077f61c054b14f9c66173b66fd
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/ft78/div.h
@@ -0,0 +1,49 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_DIV_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_DIV_H_
+
+#include <vector>
+#include <string>
+#include "src/litert/kernel/dsp/dsp_kernel.h"
+
+namespace mindspore::kernel {
+class DivDSPKernel : public DSPKernel {
+ public:
+  using DSPKernel::DSPKernel;
+
+  ~DivDSPKernel() override = default;
+  int Prepare() override;
+  int CheckSpecs() override;
+  int Run() override;
+
+  int DivRunFp32();
+  int DivRunFp64();
+  int DivRunInt8();
+  int DivRunInt16();
+  int DivRunInt32();
+  int DivRunComplex64();
+  int DivRunComplex128();
+
+ private:
+  std::string kernel_name_;
+  uint64_t core_mask_;
+  bool optimize_{false};
+  bool first_scalar_{false};
+};
+}  // namespace mindspore::kernel
+#endif
diff --git a/mindspore-lite/src/litert/kernel/dsp/ft78/exp.cc b/mindspore-lite/src/litert/kernel/dsp/ft78/exp.cc
new file mode 100644
index 0000000000000000000000000000000000000000..413ff66ed7a7599edc2c514b19e95bede00adf3e
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/ft78/exp.cc
@@ -0,0 +1,135 @@
+
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/litert/kernel/dsp/ft78/exp.h"
+#include <math.h>
+#include <algorithm>
+#include <map>
+#include <string>
+#include "src/litert/kernel/cpu/nnacl_c/exp_parameter.h"
+#include "src/litert/kernel_registry.h"
+
+using mindspore::kernel::KERNEL_ARCH::kDSP;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_ExpFusion;
+
+namespace mindspore::kernel {
+int ExpDSPKernel::CheckSpecs() {
+  if (in_tensors_.size() != INPUT_TENSOR_SIZE_1) {
+    MS_LOG(WARNING) << "in size: " << in_tensors_.size();
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int ExpDSPKernel::Prepare() {
+  auto exp_param = reinterpret_cast<const ExpParameter *>(this->op_parameter_);
+  scale_ = static_cast<int>(exp_param->scale_);
+  float log_base = (exp_param->base_ == -1) ? 1 : logf(exp_param->base_);
+  in_scale_ = exp_param->scale_ * log_base;
+  if (exp_param->shift_ == 0) {
+    out_scale_ = 1;
+  } else {
+    if (log_base == 1) {
+      out_scale_ = expf(exp_param->shift_);
+    } else {
+      out_scale_ = powf(exp_param->base_, exp_param->shift_);
+    }
+  }
+  return RET_OK;
+}
+
+int ExpDSPKernel::ExpRunFp32() {
+  kernel_name_ = "fp_exp_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int ExpDSPKernel::ExpRunFp64() {
+  kernel_name_ = "dp_exp_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int ExpDSPKernel::ExpRunInt8() {
+  kernel_name_ = "i8_exp_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int ExpDSPKernel::ExpRunInt32() {
+  kernel_name_ = "i32_exp_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int ExpDSPKernel::ExpRunComplex64() {
+  kernel_name_ = "c64_exp_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int ExpDSPKernel::ExpRunComplex128() {
+  kernel_name_ = "c128_exp_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int ExpDSPKernel::Run() {
+  int ret = -1;
+  MS_LOG(DEBUG) << this->name() << " Running! ";
+  uint64_t length = in_tensors_[0]->ElementsNum();
+  auto allocator = dsp_runtime_->GetAllocator();
+  uint64_t x_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[0]->data());
+  uint64_t out_device_ptr = allocator->GetDeviceMemPtr(out_tensors_[0]->data());
+  uint64_t in_scale_hex = 0, out_scale_hex = 0;
+  memcpy(&in_scale_hex, &in_scale_, sizeof(float));
+  memcpy(&out_scale_hex, &out_scale_, sizeof(float));
+  SetKernelArg({x_device_ptr, out_device_ptr, length, in_scale_hex, out_scale_hex, static_cast<uint64_t>(scale_)});
+  auto data_type = in_tensors_[0]->data_type();
+  if (data_type == kNumberTypeFloat32) {
+    ret = ExpRunFp32();
+  } else if (data_type == kNumberTypeFloat64) {
+    ret = ExpRunFp64();
+  } else if (data_type == kNumberTypeInt8) {
+    ret = ExpRunInt8();
+  } else if (data_type == kNumberTypeInt32) {
+    ret = ExpRunInt32();
+  } else if (data_type == kNumberTypeComplex64) {
+    ret = ExpRunComplex64();
+  } else if (data_type == kNumberTypeComplex128) {
+    ret = ExpRunComplex128();
+  } else {
+    MS_LOG(ERROR) << "unsupported data type: " << static_cast<int>(data_type);
+  }
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << this->name() << " Run failed! ";
+    return ret;
+  }
+  MS_LOG(DEBUG) << this->name() << " Run success! ";
+  return RET_OK;
+}
+
+REG_KERNEL(kDSP, kNumberTypeFloat32, PrimitiveType_ExpFusion, DSPKernelCreator<ExpDSPKernel>)
+REG_KERNEL(kDSP, kNumberTypeFloat64, PrimitiveType_ExpFusion, DSPKernelCreator<ExpDSPKernel>)
+REG_KERNEL(kDSP, kNumberTypeInt8, PrimitiveType_ExpFusion, DSPKernelCreator<ExpDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeInt32, PrimitiveType_ExpFusion, DSPKernelCreator<ExpDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeComplex64, PrimitiveType_ExpFusion, DSPKernelCreator<ExpDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeComplex128, PrimitiveType_ExpFusion, DSPKernelCreator<ExpDSPKernel>);
+}  // namespace mindspore::kernel
diff --git a/mindspore-lite/src/litert/kernel/dsp/ft78/exp.h b/mindspore-lite/src/litert/kernel/dsp/ft78/exp.h
new file mode 100644
index 0000000000000000000000000000000000000000..dae095c90af40888ca71c6335ce7ebe3586ec683
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/ft78/exp.h
@@ -0,0 +1,50 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_EXP_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_EXP_H_
+
+#include <vector>
+#include <string>
+#include "src/litert/kernel/dsp/dsp_kernel.h"
+
+namespace mindspore::kernel {
+class ExpDSPKernel : public DSPKernel {
+ public:
+  using DSPKernel::DSPKernel;
+
+  ~ExpDSPKernel() override = default;
+  int Prepare() override;
+  int CheckSpecs() override;
+  int Run() override;
+
+  int ExpRunFp32();
+  int ExpRunFp64();
+  int ExpRunInt8();
+  int ExpRunInt16();
+  int ExpRunInt32();
+  int ExpRunComplex64();
+  int ExpRunComplex128();
+
+ private:
+  std::string kernel_name_;
+  uint64_t core_mask_;
+  float in_scale_;
+  float out_scale_;
+  int scale_;
+};
+}  // namespace mindspore::kernel
+#endif
diff --git a/mindspore-lite/src/litert/kernel/dsp/ft78/mul.cc b/mindspore-lite/src/litert/kernel/dsp/ft78/mul.cc
new file mode 100644
index 0000000000000000000000000000000000000000..83e1f11426562aab44f069eb2d3013ba493d9152
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/ft78/mul.cc
@@ -0,0 +1,130 @@
+
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <map>
+#include <string>
+#include "src/litert/kernel_registry.h"
+#include "src/litert/kernel/dsp/ft78/mul.h"
+
+using mindspore::kernel::KERNEL_ARCH::kDSP;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_MulFusion;
+
+namespace mindspore::kernel {
+int MulDSPKernel::CheckSpecs() {
+  if (in_tensors_.size() != INPUT_TENSOR_SIZE_2) {
+    MS_LOG(WARNING) << "in size: " << in_tensors_.size();
+    return RET_ERROR;
+  }
+
+  if (in_tensors_.front()->shape() != in_tensors_.back()->shape()) {
+    MS_LOG(WARNING) << "input shape must be equal";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int MulDSPKernel::Prepare() { return RET_OK; }
+
+int MulDSPKernel::MulRunFp32() {
+  kernel_name_ = "fp_mul_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int MulDSPKernel::MulRunFp64() {
+  kernel_name_ = "dp_mul_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int MulDSPKernel::MulRunInt8() {
+  kernel_name_ = "i8_mul_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int MulDSPKernel::MulRunInt16() {
+  kernel_name_ = "i16_mul_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int MulDSPKernel::MulRunInt32() {
+  kernel_name_ = "i32_mul_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int MulDSPKernel::MulRunComplex64() {
+  kernel_name_ = "c64_mul_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int MulDSPKernel::MulRunComplex128() {
+  kernel_name_ = "c128_mul_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int MulDSPKernel::Run() {
+  int ret = -1;
+  MS_LOG(DEBUG) << this->name() << " Running! ";
+  uint64_t length = in_tensors_[0]->ElementsNum();
+  auto allocator = dsp_runtime_->GetAllocator();
+  uint64_t x_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[0]->data());
+  uint64_t y_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[1]->data());
+  uint64_t out_device_ptr = allocator->GetDeviceMemPtr(out_tensors_[0]->data());
+  SetKernelArg({x_device_ptr, y_device_ptr, out_device_ptr, length});
+  auto data_type = in_tensors_[0]->data_type();
+  if (data_type == kNumberTypeFloat32) {
+    ret = MulRunFp32();
+  } else if (data_type == kNumberTypeFloat64) {
+    ret = MulRunFp64();
+  } else if (data_type == kNumberTypeInt8) {
+    ret = MulRunInt8();
+  } else if (data_type == kNumberTypeInt16) {
+    ret = MulRunInt16();
+  } else if (data_type == kNumberTypeInt32) {
+    ret = MulRunInt32();
+  } else if (data_type == kNumberTypeComplex64) {
+    ret = MulRunComplex64();
+  } else if (data_type == kNumberTypeComplex128) {
+    ret = MulRunComplex128();
+  } else {
+    MS_LOG(ERROR) << "unsupported data type: " << static_cast<int>(data_type);
+  }
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << this->name() << " Run failed! ";
+    return ret;
+  }
+  MS_LOG(DEBUG) << this->name() << " Run success! ";
+  return RET_OK;
+}
+
+REG_KERNEL(kDSP, kNumberTypeFloat32, PrimitiveType_MulFusion, DSPKernelCreator<MulDSPKernel>)
+REG_KERNEL(kDSP, kNumberTypeFloat64, PrimitiveType_MulFusion, DSPKernelCreator<MulDSPKernel>)
+REG_KERNEL(kDSP, kNumberTypeInt8, PrimitiveType_MulFusion, DSPKernelCreator<MulDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeInt16, PrimitiveType_MulFusion, DSPKernelCreator<MulDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeInt32, PrimitiveType_MulFusion, DSPKernelCreator<MulDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeComplex64, PrimitiveType_MulFusion, DSPKernelCreator<MulDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeComplex128, PrimitiveType_MulFusion, DSPKernelCreator<MulDSPKernel>);
+}  // namespace mindspore::kernel
diff --git a/mindspore-lite/src/litert/kernel/dsp/ft78/mul.h b/mindspore-lite/src/litert/kernel/dsp/ft78/mul.h
new file mode 100644
index 0000000000000000000000000000000000000000..7e0de812fcb7b6b560429c11012b71810c7bf96c
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/ft78/mul.h
@@ -0,0 +1,47 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_MUL_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_MUL_H_
+
+#include <vector>
+#include <string>
+#include "src/litert/kernel/dsp/dsp_kernel.h"
+
+namespace mindspore::kernel {
+class MulDSPKernel : public DSPKernel {
+ public:
+  using DSPKernel::DSPKernel;
+
+  ~MulDSPKernel() override = default;
+  int Prepare() override;
+  int CheckSpecs() override;
+  int Run() override;
+
+  int MulRunFp32();
+  int MulRunFp64();
+  int MulRunInt8();
+  int MulRunInt16();
+  int MulRunInt32();
+  int MulRunComplex64();
+  int MulRunComplex128();
+
+ private:
+  std::string kernel_name_;
+  uint64_t core_mask_;
+};
+}  // namespace mindspore::kernel
+#endif
diff --git a/mindspore-lite/src/litert/kernel/dsp/ft78/sub.cc b/mindspore-lite/src/litert/kernel/dsp/ft78/sub.cc
new file mode 100644
index 0000000000000000000000000000000000000000..efb15ec7bb5e56e6b6874b13dbeb64439aaf7ea0
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/ft78/sub.cc
@@ -0,0 +1,130 @@
+
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <map>
+#include <string>
+#include "src/litert/kernel_registry.h"
+#include "src/litert/kernel/dsp/ft78/sub.h"
+
+using mindspore::kernel::KERNEL_ARCH::kDSP;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_SubFusion;
+
+namespace mindspore::kernel {
+int SubDSPKernel::CheckSpecs() {
+  if (in_tensors_.size() != INPUT_TENSOR_SIZE_2) {
+    MS_LOG(WARNING) << "in size: " << in_tensors_.size();
+    return RET_ERROR;
+  }
+
+  if (in_tensors_.front()->shape() != in_tensors_.back()->shape()) {
+    MS_LOG(WARNING) << "input shape must be equal";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int SubDSPKernel::Prepare() { return RET_OK; }
+
+int SubDSPKernel::SubRunFp32() {
+  kernel_name_ = "fp_sub_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int SubDSPKernel::SubRunFp64() {
+  kernel_name_ = "dp_sub_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int SubDSPKernel::SubRunInt8() {
+  kernel_name_ = "i8_sub_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int SubDSPKernel::SubRunInt16() {
+  kernel_name_ = "i16_sub_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int SubDSPKernel::SubRunInt32() {
+  kernel_name_ = "i32_sub_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int SubDSPKernel::SubRunComplex64() {
+  kernel_name_ = "c64_sub_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int SubDSPKernel::SubRunComplex128() {
+  kernel_name_ = "c128_sub_s";
+  core_mask_ = 0xff;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int SubDSPKernel::Run() {
+  int ret = -1;
+  MS_LOG(DEBUG) << this->name() << " Running! ";
+  uint64_t length = in_tensors_[0]->ElementsNum();
+  auto allocator = dsp_runtime_->GetAllocator();
+  uint64_t x_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[0]->data());
+  uint64_t y_device_ptr = allocator->GetDeviceMemPtr(in_tensors_[1]->data());
+  uint64_t out_device_ptr = allocator->GetDeviceMemPtr(out_tensors_[0]->data());
+  SetKernelArg({x_device_ptr, y_device_ptr, out_device_ptr, length});
+  auto data_type = in_tensors_[0]->data_type();
+  if (data_type == kNumberTypeFloat32) {
+    ret = SubRunFp32();
+  } else if (data_type == kNumberTypeFloat64) {
+    ret = SubRunFp64();
+  } else if (data_type == kNumberTypeInt8) {
+    ret = SubRunInt8();
+  } else if (data_type == kNumberTypeInt16) {
+    ret = SubRunInt16();
+  } else if (data_type == kNumberTypeInt32) {
+    ret = SubRunInt32();
+  } else if (data_type == kNumberTypeComplex64) {
+    ret = SubRunComplex64();
+  } else if (data_type == kNumberTypeComplex128) {
+    ret = SubRunComplex128();
+  } else {
+    MS_LOG(ERROR) << "unsupported data type: " << static_cast<int>(data_type);
+  }
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << this->name() << " Run failed! ";
+    return ret;
+  }
+  MS_LOG(DEBUG) << this->name() << " Run success! ";
+  return RET_OK;
+}
+
+REG_KERNEL(kDSP, kNumberTypeFloat32, PrimitiveType_SubFusion, DSPKernelCreator<SubDSPKernel>)
+REG_KERNEL(kDSP, kNumberTypeFloat64, PrimitiveType_SubFusion, DSPKernelCreator<SubDSPKernel>)
+REG_KERNEL(kDSP, kNumberTypeInt8, PrimitiveType_SubFusion, DSPKernelCreator<SubDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeInt16, PrimitiveType_SubFusion, DSPKernelCreator<SubDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeInt32, PrimitiveType_SubFusion, DSPKernelCreator<SubDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeComplex64, PrimitiveType_SubFusion, DSPKernelCreator<SubDSPKernel>);
+REG_KERNEL(kDSP, kNumberTypeComplex128, PrimitiveType_SubFusion, DSPKernelCreator<SubDSPKernel>);
+}  // namespace mindspore::kernel
diff --git a/mindspore-lite/src/litert/kernel/dsp/ft78/sub.h b/mindspore-lite/src/litert/kernel/dsp/ft78/sub.h
new file mode 100644
index 0000000000000000000000000000000000000000..66431eca6cb95d3132eb386c9f1925af514bb426
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/ft78/sub.h
@@ -0,0 +1,47 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_SUB_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_SUB_H_
+
+#include <vector>
+#include <string>
+#include "src/litert/kernel/dsp/dsp_kernel.h"
+
+namespace mindspore::kernel {
+class SubDSPKernel : public DSPKernel {
+ public:
+  using DSPKernel::DSPKernel;
+
+  ~SubDSPKernel() override = default;
+  int Prepare() override;
+  int CheckSpecs() override;
+  int Run() override;
+
+  int SubRunFp32();
+  int SubRunFp64();
+  int SubRunInt8();
+  int SubRunInt16();
+  int SubRunInt32();
+  int SubRunComplex64();
+  int SubRunComplex128();
+
+ private:
+  std::string kernel_name_;
+  uint64_t core_mask_;
+};
+}  // namespace mindspore::kernel
+#endif
diff --git a/mindspore-lite/src/litert/kernel_exec_util.cc b/mindspore-lite/src/litert/kernel_exec_util.cc
index b4a885ee44081760103e7ea5a5ad09ec6d25fda9..a9211fced98048fbf991a5104394f3120347609a 100644
--- a/mindspore-lite/src/litert/kernel_exec_util.cc
+++ b/mindspore-lite/src/litert/kernel_exec_util.cc
@@ -25,6 +25,9 @@
 #include "src/litert/kernel/opencl/opencl_subgraph.h"
 #include "src/litert/kernel/gpu/opencl/opencl_runtime.h"
 #endif
+#if ENABLE_DSP
+#include "src/litert/kernel/dsp/dsp_subgraph.h"
+#endif
 #include "src/control_flow/control_subgraph_creator.h"
 #include "src/litert/kernel/cpu/base/partial_fusion.h"
 
@@ -418,6 +421,11 @@ SubGraphKernel *KernelExecUtil::CreateSubGraphKernel(const std::vector<KernelExe
     case kAclSubGraph: {
       sub_graph = new (std::nothrow) AclSubGraph(input_kernels, output_kernels, kernels, lite_kernel);
     } break;
+    case kDspSubGraph: {
+#if ENABLE_DSP
+      sub_graph = new (std::nothrow) DspSubGraph(input_kernels, output_kernels, kernels, lite_kernel);
+#endif
+    } break;
     default: {
       MS_LOG(ERROR) << "not support subgraph type: " << type;
       delete lite_kernel;
diff --git a/mindspore-lite/src/litert/kernel_registry.cc b/mindspore-lite/src/litert/kernel_registry.cc
index d28eaf5b54d155eb48e53c54298ea61d55c9c514..e0dcb63e688d3eb9f10240d42d99e4c60714e4dd 100644
--- a/mindspore-lite/src/litert/kernel_registry.cc
+++ b/mindspore-lite/src/litert/kernel_registry.cc
@@ -188,11 +188,14 @@ int KernelRegistry::GetCustomKernel(const std::vector<Tensor *> &in_tensors, con
     if (kernel_exec != nullptr) {
       constexpr auto kArchCPU = "CPU";
       constexpr auto kArchGPU = "GPU";
+      constexpr auto kArchDSP = "DSP";
       kernel::KernelKey tmp_key = key;
       if (desc.arch == kArchCPU) {
         tmp_key.arch = kernel::kCPU;
       } else if (desc.arch == kArchGPU) {
         tmp_key.arch = kernel::kGPU;
+      } else if (desc.arch == kArchDSP) {
+        tmp_key.arch = kernel::kDSP;
       } else {
         tmp_key.arch = kernel::kCustom;
       }
diff --git a/mindspore-lite/src/litert/lite_session.cc b/mindspore-lite/src/litert/lite_session.cc
index 5fc41f44fb7a9bf130469130ed846a6d5bd289ee..f6c834d5871730c10685c912b47bced1e3011ab9 100644
--- a/mindspore-lite/src/litert/lite_session.cc
+++ b/mindspore-lite/src/litert/lite_session.cc
@@ -1153,6 +1153,13 @@ int LiteSession::Init(const std::shared_ptr<InnerContext> &context) {
     return ret;
   }
 
+  ret = InitDSPRuntime();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Init DSP runtime failed.";
+    is_running_.store(false);
+    return ret;
+  }
+
   is_running_.store(false);
   return RET_OK;
 }
@@ -1218,6 +1225,10 @@ LiteSession::~LiteSession() {
 #ifdef GPU_OPENCL
   delete opencl_runtime_wrapper_;
   opencl_runtime_wrapper_ = nullptr;
+#endif
+#ifdef ENABLE_DSP
+  delete dsp_runtime_wrapper_;
+  dsp_runtime_wrapper_ = nullptr;
 #endif
   delete ms_context_;
   ms_context_ = nullptr;
@@ -1739,6 +1750,26 @@ int LiteSession::RuntimeAllocatorSetData() {
   return RET_OK;
 }
 
+int LiteSession::InitDSPRuntime() {
+#ifdef ENABLE_DSP
+  if (this->context_->IsDeviceTypeEnabled(DT_DSP)) {
+    dsp_runtime_wrapper_ = new (std::nothrow) dsp::DSPRuntimeInnerWrapper();
+    if (dsp_runtime_wrapper_ == nullptr) {
+      MS_LOG(ERROR) << "create DSPRuntimeInnerWrapper failed";
+      return RET_ERROR;
+    }
+    auto dsp_runtime = dsp_runtime_wrapper_->GetInstance();
+    if (dsp_runtime->Init() != RET_OK) {
+      this->context_->device_list_ = {{DT_CPU, {false, MID_CPU}}};
+      MS_LOG(WARNING) << "Init DSP runtime failed, change to CPU mode.";
+    } else {
+      MS_LOG(INFO) << "Init DSP runtime success.";
+    }
+  }
+#endif
+  return RET_OK;
+}
+
 int LiteSession::InitGPURuntime() {
   if (context_->IsDeviceTypeEnabled(DT_CPU)) {
     CpuBindMode cpu_bind_mode = context_->GetDeviceInfo(DT_CPU).cpu_device_info_.cpu_bind_mode_;
diff --git a/mindspore-lite/src/litert/lite_session.h b/mindspore-lite/src/litert/lite_session.h
index 233302ae06de1c4313f08d00b1b0b75847c282ca..78bc3da382f68b9edf46c74eb31e2b47bc82dd50 100644
--- a/mindspore-lite/src/litert/lite_session.h
+++ b/mindspore-lite/src/litert/lite_session.h
@@ -36,6 +36,9 @@
 #if GPU_OPENCL
 #include "src/litert/kernel/gpu/opencl/opencl_runtime.h"
 #endif
+#ifdef ENABLE_DSP
+#include "src/litert/kernel/dsp/dsp_runtime.h"
+#endif
 #include "src/litert/scheduler_cb.h"
 #include "src/executor/sub_graph_kernel.h"
 
@@ -180,6 +183,7 @@ class MS_API LiteSession {
   int CreateCoreMLDelegate();
   int InitDelegate();
   int InitGPURuntime();
+  int InitDSPRuntime();
   int InitSharedThreadPool();
   int ReshapeWeightTensor(lite::Tensor *orig_tensor, lite::Tensor *new_tensor);
 
@@ -234,6 +238,9 @@ class MS_API LiteSession {
 #if GPU_OPENCL
   opencl::OpenCLRuntimeInnerWrapper *opencl_runtime_wrapper_{nullptr};
 #endif
+#ifdef ENABLE_DSP
+  dsp::DSPRuntimeInnerWrapper *dsp_runtime_wrapper_{nullptr};
+#endif
 
   // In the dynamic shape scene, the flag is to indicate when to do shape-infer for kernel. If true, the shape-infer
   // will not be called when calling 'Resize', but be done along with running. And we will decide whether to call
diff --git a/mindspore-lite/src/litert/scheduler.cc b/mindspore-lite/src/litert/scheduler.cc
index 6ce3d4a277e3e0f0b720cbd26e1371d479c3e25c..f5f4a709caa61e747f96eee754365a614a95c244 100644
--- a/mindspore-lite/src/litert/scheduler.cc
+++ b/mindspore-lite/src/litert/scheduler.cc
@@ -1093,6 +1093,42 @@ int Scheduler::FindGpuKernel(const std::vector<Tensor *> &in_tensors, const std:
 }
 #endif
 
+#ifdef ENABLE_DSP
+int Scheduler::FindDspKernel(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
+                             OpParameter *op_parameter, const kernel::KernelKey &desc, kernel::KernelExec **kernel,
+                             TypeId prefer_data_type) {
+  MS_ASSERT(op_parameter != nullptr);
+  MS_ASSERT(kernel != nullptr);
+  if (!context_->IsDeviceTypeEnabled(DT_DSP)) {
+    return RET_NOT_SUPPORT;
+  }
+
+  // support more data type like int32
+  kernel::KernelKey dsp_desc{kernel::KERNEL_ARCH::kDSP, desc.data_type, NHWC, desc.type};
+  // weight dequant
+  auto ret = WeightDecoder::DequantNode(op_parameter, in_tensors, kNumberTypeFloat32, src_model_->graph_.version_,
+                                        context_->float_mode);
+  if (ret != RET_OK) {
+    MS_LOG(DEBUG) << "Dequant input tensors failed: " << ret;
+    return RET_NOT_SUPPORT;
+  }
+  // we don't need to restore tensor for copy data
+  ret = CopyConstTensorData(in_tensors, op_parameter->type_);
+  if (ret != RET_OK) {
+    MS_LOG(DEBUG) << "CopyConstTensorsData failed: " << ret;
+    return RET_NOT_SUPPORT;
+  }
+  ret = KernelRegistry::GetInstance()->GetKernelExec(in_tensors, out_tensors, context_, ms_context_, dsp_desc,
+                                                     op_parameter, kernel);
+  if (ret == RET_OK) {
+    MS_LOG(DEBUG) << "Get dsp_desc op success: " << PrimitiveCurVersionTypeName(dsp_desc.type);
+  } else {
+    MS_LOG(DEBUG) << "Get dsp_desc op failed, scheduler to cpu: " << PrimitiveCurVersionTypeName(dsp_desc.type);
+  }
+  return ret;
+}
+#endif
+
 int Scheduler::FindProviderKernel(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                                   const LiteGraph::Node *node, TypeId data_type, kernel::KernelExec **kernel) {
 #ifndef CUSTOM_KERNEL_REGISTRY_CLIP
@@ -1102,6 +1138,15 @@ int Scheduler::FindProviderKernel(const std::vector<Tensor *> &in_tensors, const
   if (prim_type == schema::PrimitiveType_Custom) {
     for (auto &&device : context_->device_list_) {
       if (!device.provider_.empty() && !device.provider_device_.empty()) {
+        if (device.provider_device_ == "DSP") {
+          kernel::KernelKey desc{kernel::KERNEL_ARCH::kDSP, data_type,       NHWC, prim_type,
+                                 device.provider_device_,   device.provider_};
+          ret = KernelRegistry::GetInstance()->GetKernelExec(in_tensors, out_tensors, context_, ms_context_, desc,
+                                                             nullptr, kernel, node->primitive_);
+          if (ret == RET_OK && *kernel != nullptr) {
+            return ret;
+          }
+        }
         kernel::KernelKey desc{kernel::KERNEL_ARCH::kCPU, data_type,       NHWC, prim_type,
                                device.provider_device_,   device.provider_};
         ret = KernelRegistry::GetInstance()->GetKernelExec(in_tensors, out_tensors, context_, ms_context_, desc,
@@ -1188,6 +1233,30 @@ kernel::KernelExec *Scheduler::FindBackendKernel(const std::vector<Tensor *> &in
   op_parameter->is_train_session_ = is_train_session_;
   kernel::KernelKey desc{kernel::KERNEL_ARCH::kCPU, data_type, NHWC, op_parameter->type_};
 
+#ifdef ENABLE_DSP
+  bool dsp_priority = DeviceTypePriority(context_, DT_DSP, DT_CPU);
+  bool use_dsp_kernel = node->device_type_ == DT_DSP || node->device_type_ == kDefaultDeviceType;
+  if (dsp_priority && use_dsp_kernel) {
+    status = FindDspKernel(in_tensors, out_tensors, op_parameter, desc, &kernel, prefer_data_type);
+    if (status == RET_OK) {
+      return kernel;
+    } else {
+      MS_LOG(DEBUG) << "Get dsp op failed, scheduler to cpu: " << PrimitiveCurVersionTypeName(desc.type) << " "
+                    << node->name_;
+      if (status == RET_ERROR) {
+        op_parameters_.erase(node->output_indices_.at(0));
+        auto ret = InferNodeShape(node);
+        if (ret == RET_INFER_INVALID || ret == RET_OK) {
+          op_parameter = op_parameters_[node->output_indices_.at(0)];
+        } else {
+          MS_LOG(ERROR) << "Try repeat infer fail: " << node->name_;
+          return nullptr;
+        }
+      }
+    }
+  }
+#endif
+
 #ifdef GPU_OPENCL
   bool gpu_priority = DeviceTypePriority(context_, DT_GPU, DT_CPU);
   bool use_gpu_kernel = node->device_type_ == DT_GPU || node->device_type_ == kDefaultDeviceType;
@@ -1283,6 +1352,8 @@ kernel::SubGraphType GetKernelSubGraphType(const kernel::KernelExec *kernel, con
     }
   } else if (desc.arch == kernel::KERNEL_ARCH::kCustom) {
     return kernel::kCustomSubGraph;
+  } else if (desc.arch == kernel::KERNEL_ARCH::kDSP) {
+    return kernel::kDspSubGraph;
   }
   return kernel::kNotSubGraph;
 }
@@ -1598,6 +1669,8 @@ bool KernelFitCurrentSubGraph(const kernel::SubGraphType subgraph_type, const ke
       }
       return KernelFitCurrentSubGraphCPUFp32(desc.data_type);
     }
+    case kernel::SubGraphType::kDspSubGraph:
+      return kernel.desc().arch == kernel::KERNEL_ARCH::kDSP;
     default:
       return false;
   }
@@ -1687,8 +1760,9 @@ TypeId Scheduler::GetFirstFp32Fp16OrInt8Type(const std::vector<Tensor *> &in_ten
     if (dtype == kObjectTypeTensorType) {
       return TensorListDataType(tensor);
     }
-    std::unordered_set<TypeId> type_set = {kNumberTypeFloat32, kNumberTypeFloat16, kNumberTypeInt8,  kNumberTypeInt32,
-                                           kNumberTypeBool,    kNumberTypeUInt8,   kObjectTypeString};
+    std::unordered_set<TypeId> type_set = {kNumberTypeFloat32, kNumberTypeFloat16,   kNumberTypeInt8,
+                                           kNumberTypeInt32,   kNumberTypeBool,      kNumberTypeUInt8,
+                                           kObjectTypeString,  kNumberTypeComplex64, kNumberTypeComplex128};
     if (type_set.find(dtype) != type_set.end()) {
       return dtype;
     }
diff --git a/mindspore-lite/src/litert/scheduler.h b/mindspore-lite/src/litert/scheduler.h
index c114c75707cd810aeac6909d7409e83ac05fd2fd..3927377ecb9a7506e531546d7718598410e3e030 100644
--- a/mindspore-lite/src/litert/scheduler.h
+++ b/mindspore-lite/src/litert/scheduler.h
@@ -89,6 +89,13 @@ class Scheduler {
                     OpParameter *op_parameter, const kernel::KernelKey &desc, kernel::KernelExec **kernel,
                     TypeId prefer_data_type);
 #endif
+
+#ifdef ENABLE_DSP
+  int FindDspKernel(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
+                    OpParameter *op_parameter, const kernel::KernelKey &desc, kernel::KernelExec **kernel,
+                    TypeId prefer_data_type);
+#endif
+
   int FindProviderKernel(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                          const LiteGraph::Node *node, TypeId data_type, kernel::KernelExec **kernel);
 
diff --git a/mindspore-lite/test/CMakeLists.txt b/mindspore-lite/test/CMakeLists.txt
index 88489c5c8dd5d914c9fbef850bab1e2b3782db40..3a1f5cab2e6a5060f7e91643d52d06b83f19063b 100644
--- a/mindspore-lite/test/CMakeLists.txt
+++ b/mindspore-lite/test/CMakeLists.txt
@@ -74,6 +74,13 @@ if(MSLITE_GPU_BACKEND STREQUAL opencl)
     endif()
 endif()
 
+if(MSLITE_ENABLE_DSP)
+    file(GLOB_RECURSE TEST_GPU_UT_SRC
+        ${TEST_DIR}/ut/src/runtime/kernel/dsp/*.cc
+        )
+    list(APPEND TEST_UT_SRC ${TEST_GPU_UT_SRC})
+endif()
+
 if(MSLITE_GPU_BACKEND STREQUAL cuda)
     set(CUDA_PATH $ENV{CUDA_HOME})
     include_directories(${CUDA_PATH}/include)
diff --git a/mindspore-lite/test/ut/src/runtime/kernel/dsp/arithmetic_tests.cc b/mindspore-lite/test/ut/src/runtime/kernel/dsp/arithmetic_tests.cc
new file mode 100644
index 0000000000000000000000000000000000000000..1fd7f720fcea01c0afd9845b5129a358301832cf
--- /dev/null
+++ b/mindspore-lite/test/ut/src/runtime/kernel/dsp/arithmetic_tests.cc
@@ -0,0 +1,1443 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <functional>
+#include <iostream>
+#include <memory>
+#include <vector>
+#include "ut/src/runtime/kernel/dsp/dsp_test.h"
+#include "include/api/context.h"
+#include "include/api/data_type.h"
+#include "include/api/model.h"
+#include "nnacl_c/arithmetic_parameter.h"
+#include "schema/inner/model_generated.h"
+#include "src/litert/kernel/dsp/dsp_subgraph.h"
+#include "src/litert/kernel_registry.h"
+#include "ut/src/runtime/kernel/opencl/common.h"
+
+namespace mindspore::lite::dsp::test {
+
+constexpr int kTestArraySize = 10000;   // 100 * 100
+constexpr int kTestArraySize2 = 20000;  // 100 * 100 * 2
+
+class TestDSP_Arithmetic : public DSPCommonTest {};
+
+namespace {
+OpParameter *CreateParameter(schema::PrimitiveType type, const std::vector<int> &input0_shape,
+                             const std::vector<int> &input1_shape,
+                             schema::ActivationType act_type = schema::ActivationType_NO_ACTIVATION) {
+  auto *param = opencl::test::CreateParameter<ArithmeticParameter>(type);
+  int input0_size = std::accumulate(input0_shape.begin(), input0_shape.end(), 1, std::multiplies<>());
+  int input1_size = std::accumulate(input1_shape.begin(), input1_shape.end(), 1, std::multiplies<>());
+  if (input0_size != input1_size) {
+    param->broadcasting_ = true;
+  }
+  param->activation_type_ = act_type;
+  return reinterpret_cast<OpParameter *>(param);
+}
+}  // namespace
+
+TEST_F(TestDSP_Arithmetic, Add_Fp32) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_AddFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeFloat32, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeFloat32, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeFloat32, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<float *>(input0->MutableData()), num, 2);
+  std::fill_n(reinterpret_cast<float *>(input1->MutableData()), num, 1);
+  std::fill_n(reinterpret_cast<float *>(output->MutableData()), num, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeFloat32, NHWC, schema::PrimitiveType_AddFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<float> correct(num, 3);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<float *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum()));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Arithmetic, Add_Int16) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_AddFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeInt16, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeInt16, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeInt16, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<int16_t *>(input0->MutableData()), num, 2);
+  std::fill_n(reinterpret_cast<int16_t *>(input1->MutableData()), num, 1);
+  std::fill_n(reinterpret_cast<int16_t *>(output->MutableData()), num, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt16, NHWC, schema::PrimitiveType_AddFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<int16_t> correct(kTestArraySize, 3);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<int16_t *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum()));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Arithmetic, Add_Int32) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_AddFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeInt32, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeInt32, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeInt32, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<int32_t *>(input0->MutableData()), num, 2);
+  std::fill_n(reinterpret_cast<int32_t *>(input1->MutableData()), num, 1);
+  std::fill_n(reinterpret_cast<int32_t *>(output->MutableData()), num, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt32, NHWC, schema::PrimitiveType_AddFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<int32_t> correct(kTestArraySize, 3);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<int32_t *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum()));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Arithmetic, Add_Cplx64) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_AddFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeComplex64, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeComplex64, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeComplex64, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<float *>(input0->MutableData()), num * 2, 2);
+  std::fill_n(reinterpret_cast<float *>(input1->MutableData()), num * 2, 1);
+  std::fill_n(reinterpret_cast<float *>(output->MutableData()), num * 2, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeComplex64, NHWC, schema::PrimitiveType_AddFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<float> correct(kTestArraySize2, 3);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<float *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum() * 2));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Arithmetic, Sub_Fp32) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_SubFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeFloat32, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeFloat32, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeFloat32, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<float *>(input0->MutableData()), num, 2);
+  std::fill_n(reinterpret_cast<float *>(input1->MutableData()), num, 1);
+  std::fill_n(reinterpret_cast<float *>(output->MutableData()), num, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeFloat32, NHWC, schema::PrimitiveType_SubFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<float> correct(num, 1);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<float *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum()));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Arithmetic, Sub_Int16) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_SubFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeInt16, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeInt16, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeInt16, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<int16_t *>(input0->MutableData()), num, 2);
+  std::fill_n(reinterpret_cast<int16_t *>(input1->MutableData()), num, 1);
+  std::fill_n(reinterpret_cast<int16_t *>(output->MutableData()), num, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt16, NHWC, schema::PrimitiveType_SubFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<int16_t> correct(kTestArraySize, 1);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<int16_t *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum()));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Arithmetic, Sub_Int32) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_SubFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeInt32, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeInt32, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeInt32, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<int32_t *>(input0->MutableData()), num, 2);
+  std::fill_n(reinterpret_cast<int32_t *>(input1->MutableData()), num, 1);
+  std::fill_n(reinterpret_cast<int32_t *>(output->MutableData()), num, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt32, NHWC, schema::PrimitiveType_SubFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<int32_t> correct(kTestArraySize, 1);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<int32_t *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum()));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Arithmetic, Sub_Cplx64) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_SubFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeComplex64, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeComplex64, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeComplex64, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<float *>(input0->MutableData()), num * 2, 2);
+  std::fill_n(reinterpret_cast<float *>(input1->MutableData()), num * 2, 1);
+  std::fill_n(reinterpret_cast<float *>(output->MutableData()), num * 2, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeComplex64, NHWC, schema::PrimitiveType_SubFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<float> correct(kTestArraySize2, 1);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<float *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum() * 2));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Arithmetic, Mul_Fp32) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_MulFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeFloat32, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeFloat32, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeFloat32, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<float *>(input0->MutableData()), num, 2);
+  std::fill_n(reinterpret_cast<float *>(input1->MutableData()), num, 1);
+  std::fill_n(reinterpret_cast<float *>(output->MutableData()), num, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeFloat32, NHWC, schema::PrimitiveType_MulFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<float> correct(num, 2);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<float *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum()));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Arithmetic, Mul_Int16) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_MulFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeInt16, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeInt16, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeInt16, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<int16_t *>(input0->MutableData()), num, 2);
+  std::fill_n(reinterpret_cast<int16_t *>(input1->MutableData()), num, 1);
+  std::fill_n(reinterpret_cast<int16_t *>(output->MutableData()), num, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt16, NHWC, schema::PrimitiveType_MulFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<int16_t> correct(kTestArraySize, 2);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<int16_t *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum()));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Arithmetic, Mul_Int32) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_MulFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeInt32, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeInt32, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeInt32, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<int32_t *>(input0->MutableData()), num, 2);
+  std::fill_n(reinterpret_cast<int32_t *>(input1->MutableData()), num, 1);
+  std::fill_n(reinterpret_cast<int32_t *>(output->MutableData()), num, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt32, NHWC, schema::PrimitiveType_MulFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<int32_t> correct(kTestArraySize, 2);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<int32_t *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum()));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Arithmetic, Mul_Cplx64) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_MulFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeComplex64, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeComplex64, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeComplex64, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<float *>(input0->MutableData()), num * 2, 2);
+  std::fill_n(reinterpret_cast<float *>(input1->MutableData()), num * 2, 1);
+  std::fill_n(reinterpret_cast<float *>(output->MutableData()), num * 2, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeComplex64, NHWC, schema::PrimitiveType_MulFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<float> correct(kTestArraySize2);
+  for (int i = 0; i < kTestArraySize2; i += 2) {
+    correct[i] = 0;
+    correct[i + 1] = 4;
+  }
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<float *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum() * 2));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Arithmetic, Div_Fp32) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_DivFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeFloat32, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeFloat32, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeFloat32, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<float *>(input0->MutableData()), num, 2);
+  std::fill_n(reinterpret_cast<float *>(input1->MutableData()), num, 1);
+  std::fill_n(reinterpret_cast<float *>(output->MutableData()), num, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeFloat32, NHWC, schema::PrimitiveType_DivFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<float> correct(num, 2);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<float *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum()));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Arithmetic, Div_Int16) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_DivFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeInt16, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeInt16, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeInt16, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<int16_t *>(input0->MutableData()), num, 2);
+  std::fill_n(reinterpret_cast<int16_t *>(input1->MutableData()), num, 1);
+  std::fill_n(reinterpret_cast<int16_t *>(output->MutableData()), num, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt16, NHWC, schema::PrimitiveType_DivFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<int16_t> correct(kTestArraySize, 2);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<int16_t *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum()));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Arithmetic, Div_Int32) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_DivFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeInt32, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeInt32, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeInt32, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<int32_t *>(input0->MutableData()), num, 2);
+  std::fill_n(reinterpret_cast<int32_t *>(input1->MutableData()), num, 1);
+  std::fill_n(reinterpret_cast<int32_t *>(output->MutableData()), num, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt32, NHWC, schema::PrimitiveType_DivFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<int32_t> correct(kTestArraySize, 2);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<int32_t *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum()));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Arithmetic, Div_Cplx64) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_DivFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeComplex64, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeComplex64, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeComplex64, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<float *>(input0->MutableData()), num * 2, 2);
+  std::fill_n(reinterpret_cast<float *>(input1->MutableData()), num * 2, 1);
+  std::fill_n(reinterpret_cast<float *>(output->MutableData()), num * 2, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeComplex64, NHWC, schema::PrimitiveType_DivFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<float> correct(kTestArraySize2);
+  for (int i = 0; i < kTestArraySize2; i += 2) {
+    correct[i] = 2;
+    correct[i + 1] = 0;
+  }
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<float *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum() * 2));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+#ifdef SUPPORT_FT78
+TEST_F(TestDSP_Arithmetic, Add_Fp64) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_AddFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeFloat64, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeFloat64, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeFloat64, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<double *>(input0->MutableData()), num, 2);
+  std::fill_n(reinterpret_cast<double *>(input1->MutableData()), num, 1);
+  std::fill_n(reinterpret_cast<double *>(output->MutableData()), num, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeFloat64, NHWC, schema::PrimitiveType_AddFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<double> correct(kTestArraySize, 3);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<double *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum()));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Arithmetic, Add_Int8) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_AddFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeInt8, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeInt8, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeInt8, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<int8_t *>(input0->MutableData()), num, 2);
+  std::fill_n(reinterpret_cast<int8_t *>(input1->MutableData()), num, 1);
+  std::fill_n(reinterpret_cast<int8_t *>(output->MutableData()), num, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt8, NHWC, schema::PrimitiveType_AddFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<int8_t> correct(kTestArraySize, 3);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<int8_t *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum()));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Arithmetic, Add_Cplx128) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_AddFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeComplex128, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeComplex128, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeComplex128, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<double *>(input0->MutableData()), num * 2, 2);
+  std::fill_n(reinterpret_cast<double *>(input1->MutableData()), num * 2, 1);
+  std::fill_n(reinterpret_cast<double *>(output->MutableData()), num * 2, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeComplex128, NHWC, schema::PrimitiveType_AddFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<double> correct(kTestArraySize2, 3);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<double *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum() * 2));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Arithmetic, Sub_Fp64) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_SubFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeFloat64, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeFloat64, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeFloat64, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<double *>(input0->MutableData()), num, 2);
+  std::fill_n(reinterpret_cast<double *>(input1->MutableData()), num, 1);
+  std::fill_n(reinterpret_cast<double *>(output->MutableData()), num, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeFloat64, NHWC, schema::PrimitiveType_SubFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<double> correct(kTestArraySize, 1);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<double *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum()));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Arithmetic, Sub_Int8) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_SubFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeInt8, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeInt8, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeInt8, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<int8_t *>(input0->MutableData()), num, 2);
+  std::fill_n(reinterpret_cast<int8_t *>(input1->MutableData()), num, 1);
+  std::fill_n(reinterpret_cast<int8_t *>(output->MutableData()), num, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt8, NHWC, schema::PrimitiveType_SubFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<int8_t> correct(kTestArraySize, 1);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<int8_t *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum()));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Arithmetic, Sub_Cplx128) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_SubFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeComplex128, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeComplex128, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeComplex128, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<double *>(input0->MutableData()), num * 2, 2);
+  std::fill_n(reinterpret_cast<double *>(input1->MutableData()), num * 2, 1);
+  std::fill_n(reinterpret_cast<double *>(output->MutableData()), num * 2, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeComplex128, NHWC, schema::PrimitiveType_SubFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<double> correct(kTestArraySize2, 1);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<double *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum() * 2));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Arithmetic, Mul_Fp64) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_MulFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeFloat64, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeFloat64, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeFloat64, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<double *>(input0->MutableData()), num, 2);
+  std::fill_n(reinterpret_cast<double *>(input1->MutableData()), num, 1);
+  std::fill_n(reinterpret_cast<double *>(output->MutableData()), num, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeFloat64, NHWC, schema::PrimitiveType_MulFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<double> correct(kTestArraySize, 2);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<double *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum()));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Arithmetic, Mul_Int8) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_MulFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeInt8, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeInt8, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeInt8, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<int8_t *>(input0->MutableData()), num, 2);
+  std::fill_n(reinterpret_cast<int8_t *>(input1->MutableData()), num, 1);
+  std::fill_n(reinterpret_cast<int8_t *>(output->MutableData()), num, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt8, NHWC, schema::PrimitiveType_MulFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<int8_t> correct(kTestArraySize, 2);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<int8_t *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum()));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Arithmetic, Mul_Cplx128) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_MulFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeComplex128, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeComplex128, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeComplex128, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<double *>(input0->MutableData()), num * 2, 2);
+  std::fill_n(reinterpret_cast<double *>(input1->MutableData()), num * 2, 1);
+  std::fill_n(reinterpret_cast<double *>(output->MutableData()), num * 2, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeComplex128, NHWC, schema::PrimitiveType_MulFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<double> correct(kTestArraySize2);
+  for (int i = 0; i < kTestArraySize2; i += 2) {
+    correct[i] = 0;
+    correct[i + 1] = 4;
+  }
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<double *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum() * 2));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Arithmetic, Div_Fp64) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_DivFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeFloat64, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeFloat64, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeFloat64, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<double *>(input0->MutableData()), num, 2);
+  std::fill_n(reinterpret_cast<double *>(input1->MutableData()), num, 1);
+  std::fill_n(reinterpret_cast<double *>(output->MutableData()), num, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeFloat64, NHWC, schema::PrimitiveType_DivFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<double> correct(kTestArraySize, 2);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<double *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum()));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Arithmetic, Div_Int8) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_DivFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeInt8, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeInt8, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeInt8, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<int8_t *>(input0->MutableData()), num, 2);
+  std::fill_n(reinterpret_cast<int8_t *>(input1->MutableData()), num, 1);
+  std::fill_n(reinterpret_cast<int8_t *>(output->MutableData()), num, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt8, NHWC, schema::PrimitiveType_DivFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<int8_t> correct(kTestArraySize, 2);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<int8_t *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum()));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Arithmetic, Div_Cplx128) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input0_shape = {100, 100};
+  std::vector<int> input1_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input0_shape[0] * input0_shape[1];
+
+  auto *param = CreateParameter(schema::PrimitiveType_DivFusion, input0_shape, input1_shape);
+
+  auto input0 = new lite::Tensor(kNumberTypeComplex128, input0_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input0->MallocData(allocator_);
+  inputs_.push_back(input0);
+
+  auto input1 = new lite::Tensor(kNumberTypeComplex128, input1_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input1->MallocData(allocator_);
+  inputs_.push_back(input1);
+
+  auto output = new lite::Tensor(kNumberTypeComplex128, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<double *>(input0->MutableData()), num * 2, 2);
+  std::fill_n(reinterpret_cast<double *>(input1->MutableData()), num * 2, 1);
+  std::fill_n(reinterpret_cast<double *>(output->MutableData()), num * 2, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeComplex128, NHWC, schema::PrimitiveType_DivFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<double> correct(kTestArraySize2);
+  for (int i = 0; i < kTestArraySize2; i += 2) {
+    correct[i] = 2;
+    correct[i + 1] = 0;
+  }
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<double *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum() * 2));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+#endif
+
+}  // namespace mindspore::lite::dsp::test
diff --git a/mindspore-lite/test/ut/src/runtime/kernel/dsp/dsp_test.h b/mindspore-lite/test/ut/src/runtime/kernel/dsp/dsp_test.h
new file mode 100644
index 0000000000000000000000000000000000000000..88419f42d7e853af569ac4d207993293a3f96258
--- /dev/null
+++ b/mindspore-lite/test/ut/src/runtime/kernel/dsp/dsp_test.h
@@ -0,0 +1,55 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_TEST_UT_SRC_RUNTIME_KERNEL_DSP_DSP_TEST_H_
+#define MINDSPORE_LITE_TEST_UT_SRC_RUNTIME_KERNEL_DSP_DSP_TEST_H_
+
+#include <iostream>
+#include <memory>
+#include "schema/inner/model_generated.h"
+#include "src/litert/kernel_registry.h"
+#include "src/litert/kernel/dsp/dsp_subgraph.h"
+#include "common/common_test.h"
+#include "nnacl_c/arithmetic_parameter.h"
+
+namespace mindspore::lite::dsp::test {
+
+class DSPCommonTest : public CommonTest {
+ public:
+  void InitDSPRuntime() {
+    dsp_runtime_wrapper_ = new (std::nothrow) dsp::DSPRuntimeInnerWrapper();
+    if (dsp_runtime_wrapper_ == nullptr) {
+      MS_LOG(ERROR) << "create DSPRuntimeInnerWrapper failed.";
+    }
+    auto dsp_runtime = dsp_runtime_wrapper_->GetInstance();
+    if (dsp_runtime->Init() != RET_OK) {
+      MS_LOG(ERROR) << "Init DSP runtime failed.";
+    }
+    allocator_ = dsp_runtime->GetAllocator();
+  }
+
+  void UninitDSPRuntime() {
+    delete dsp_runtime_wrapper_;
+    dsp_runtime_wrapper_ = nullptr;
+  }
+
+ protected:
+  dsp::DSPRuntimeInnerWrapper *dsp_runtime_wrapper_{nullptr};
+  std::shared_ptr<DSPAllocator> allocator_;
+};
+}  // namespace mindspore::lite::dsp::test
+
+#endif  // MINDSPORE_LITE_TEST_UT_SRC_RUNTIME_KERNEL_DSP_DSP_TEST_H_
diff --git a/mindspore-lite/test/ut/src/runtime/kernel/dsp/exp_tests.cc b/mindspore-lite/test/ut/src/runtime/kernel/dsp/exp_tests.cc
new file mode 100644
index 0000000000000000000000000000000000000000..62bd15ad5aac4d5f7d090fddae8027696c7341a7
--- /dev/null
+++ b/mindspore-lite/test/ut/src/runtime/kernel/dsp/exp_tests.cc
@@ -0,0 +1,317 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <memory>
+#include <vector>
+#include "ut/src/runtime/kernel/dsp/dsp_test.h"
+#include "include/api/context.h"
+#include "include/api/data_type.h"
+#include "include/api/model.h"
+#include "nnacl_c/exp_parameter.h"
+#include "schema/inner/model_generated.h"
+#include "src/litert/kernel/dsp/dsp_subgraph.h"
+#include "src/litert/kernel_registry.h"
+
+namespace mindspore::lite::dsp::test {
+
+// 编译时常量，用于替代变长数组
+constexpr int kTestArraySize = 10000;   // 100 * 100
+constexpr int kTestArraySize2 = 20000;  // 100 * 100 * 2
+
+class TestDSP_Exp : public DSPCommonTest {};
+
+TEST_F(TestDSP_Exp, Exp_Fp32) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input_shape[0] * input_shape[1];
+
+  auto param = new ExpParameter();
+  param->base_ = -1;
+  param->scale_ = 1;
+  param->shift_ = 0;
+  auto input = new lite::Tensor(kNumberTypeFloat32, input_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input->MallocData(allocator_);
+  inputs_.push_back(input);
+
+  auto output = new lite::Tensor(kNumberTypeFloat32, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<float *>(input->MutableData()), num, 1);
+  std::fill_n(reinterpret_cast<float *>(output->MutableData()), num, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeFloat32, NHWC, schema::PrimitiveType_ExpFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<float> correct(num, 2.7182798);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<float *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum()));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Exp, Exp_Int32) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input_shape[0] * input_shape[1];
+
+  auto param = new ExpParameter();
+  param->base_ = -1;
+  param->scale_ = 1;
+  param->shift_ = 0;
+  auto input = new lite::Tensor(kNumberTypeInt32, input_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input->MallocData(allocator_);
+  inputs_.push_back(input);
+
+  auto output = new lite::Tensor(kNumberTypeFloat32, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<int32_t *>(input->MutableData()), num, 1);
+  std::fill_n(reinterpret_cast<float *>(output->MutableData()), num, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt32, NHWC, schema::PrimitiveType_ExpFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<float> correct(num, 2.7182798);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<float *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum()));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Exp, Exp_Cplx64) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input_shape[0] * input_shape[1];
+
+  auto param = new ExpParameter();
+  param->base_ = -1;
+  param->scale_ = 1;
+  param->shift_ = 0;
+  auto input = new lite::Tensor(kNumberTypeComplex64, input_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input->MallocData(allocator_);
+  inputs_.push_back(input);
+
+  auto output = new lite::Tensor(kNumberTypeComplex64, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<float *>(input->MutableData()), num * 2, 1);
+  std::fill_n(reinterpret_cast<float *>(output->MutableData()), num * 2, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeComplex64, NHWC, schema::PrimitiveType_ExpFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<float> correct(kTestArraySize2);
+  for (int i = 0; i < kTestArraySize2; i = i + 2) {
+    correct[i] = 1.4686939;
+    correct[i + 1] = 2.2873552;
+  }
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<float *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum() * 2));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+#ifdef SUPPORT_FT78
+TEST_F(TestDSP_Exp, Exp_Fp64) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input_shape[0] * input_shape[1];
+
+  auto param = new ExpParameter();
+  param->base_ = -1;
+  param->scale_ = 1;
+  param->shift_ = 0;
+  auto input = new lite::Tensor(kNumberTypeFloat64, input_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input->MallocData(allocator_);
+  inputs_.push_back(input);
+
+  auto output = new lite::Tensor(kNumberTypeFloat64, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<double *>(input->MutableData()), num, 1);
+  std::fill_n(reinterpret_cast<double *>(output->MutableData()), num, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeFloat64, NHWC, schema::PrimitiveType_ExpFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<double> correct(kTestArraySize, 2.7182798);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<double *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum()));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Exp, Exp_Int8) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input_shape[0] * input_shape[1];
+
+  auto param = new ExpParameter();
+  param->base_ = -1;
+  param->scale_ = 1;
+  param->shift_ = 0;
+  auto input = new lite::Tensor(kNumberTypeInt8, input_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input->MallocData(allocator_);
+  inputs_.push_back(input);
+
+  auto output = new lite::Tensor(kNumberTypeInt8, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<int8_t *>(input->MutableData()), num, 1);
+  std::fill_n(reinterpret_cast<int8_t *>(output->MutableData()), num, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt8, NHWC, schema::PrimitiveType_ExpFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<int8_t> correct(kTestArraySize, 2);
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<int8_t *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum()));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+
+TEST_F(TestDSP_Exp, Exp_Cplx128) {
+  InitDSPRuntime();
+  std::vector<lite::Tensor *> inputs_;
+  std::vector<lite::Tensor *> outputs_;
+
+  std::vector<int> input_shape = {100, 100};
+  std::vector<int> output_shape = {100, 100};
+  int num = input_shape[0] * input_shape[1];
+
+  auto param = new ExpParameter();
+  param->base_ = -1;
+  param->scale_ = 1;
+  param->shift_ = 0;
+  auto input = new lite::Tensor(kNumberTypeComplex128, input_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  input->MallocData(allocator_);
+  inputs_.push_back(input);
+
+  auto output = new lite::Tensor(kNumberTypeComplex128, output_shape, mindspore::NHWC, lite::Category::CONST_TENSOR);
+  output->MallocData(allocator_);
+  outputs_.push_back(output);
+
+  std::fill_n(reinterpret_cast<double *>(input->MutableData()), num * 2, 1);
+  std::fill_n(reinterpret_cast<double *>(output->MutableData()), num * 2, 0);
+
+  auto ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeComplex128, NHWC, schema::PrimitiveType_ExpFusion};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  auto kernel = creator(inputs_, outputs_, reinterpret_cast<OpParameter *>(param), ctx, key);
+  auto ret = kernel->Prepare();
+  EXPECT_EQ(0, ret);
+  ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  std::vector<double> correct(kTestArraySize2);
+  for (int i = 0; i < kTestArraySize2; i = i + 2) {
+    correct[i] = 1.4686939;
+    correct[i + 1] = 2.2873552;
+  }
+  ASSERT_EQ(0, CompareOutputData(reinterpret_cast<double *>(outputs_[0]->MutableData()), correct.data(),
+                                 outputs_[0]->ElementsNum() * 2));
+  UninitDSPRuntime();
+  delete ctx;
+  for (auto t : inputs_) delete t;
+  for (auto t : outputs_) delete t;
+  delete kernel;
+}
+#endif
+
+}  // namespace mindspore::lite::dsp::test
diff --git a/scripts/build/build_lite.sh b/scripts/build/build_lite.sh
index e978b56507559a2da823078719abef8e5579a34a..7e248e1821ad109d1e7855604b2e44760328c7f0 100755
--- a/scripts/build/build_lite.sh
+++ b/scripts/build/build_lite.sh
@@ -34,6 +34,13 @@ check_Hi35xx() {
   fi
 }
 
+check_dsp_sdk() {
+  if [[ "X${DSP_SDK_PATH}" == "X" ]]; then
+    echo "error: to compile the runtime package of DSP, you need to set DSP_SDK_PATH to declare the path of DSP sdk."
+    exit 1
+  fi
+}
+
 get_version() {
     VERSION_STR=$(cat ${BASEPATH}/version.txt)
 }
@@ -391,6 +398,7 @@ build_lite() {
       TOOLCHAIN_NAME=${MSLITE_MICRO_PLATFORM}
     elif [[ ("${MSLITE_REGISTRY_DEVICE}" == "ft04" || "${MSLITE_REGISTRY_DEVICE}" == "ft78") && "${local_lite_platform}" == "arm32" ]]; then
       TOOLCHAIN_NAME="cortex-a15"
+      check_dsp_sdk
     fi
 
     machine=`uname -m`