diff --git a/mindspore-lite/src/litert/kernel/dsp/ft04/broadcastto.cc b/mindspore-lite/src/litert/kernel/dsp/ft04/broadcastto.cc
new file mode 100644
index 0000000000000000000000000000000000000000..fd5481c883bd32578815da0490acd428f0769e1a
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/ft04/broadcastto.cc
@@ -0,0 +1,178 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/litert/kernel/dsp/ft04/broadcastto.h"
+#include <cstdint>
+#include <cstring>
+#include <string>
+#include "src/litert/kernel/cpu/nnacl_c/broadcast_to_parameter.h"
+#include "src/litert/kernel_registry.h"
+
+using mindspore::kernel::KERNEL_ARCH::kDSP;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_BroadcastTo;
+
+namespace mindspore::kernel {
+namespace {
+constexpr size_t kInputTensorSize = 1;
+constexpr size_t kOutputTensorSize = 1;
+}  // namespace
+
+int BroadcastToDSPKernel::Prepare() { return RET_OK; }
+
+int BroadcastToDSPKernel::CheckSpecs() {
+  if (in_tensors_.size() != kInputTensorSize) {
+    MS_LOG(WARNING) << "BroadcastTo expects one input, got: " << in_tensors_.size();
+    return RET_ERROR;
+  }
+  if (out_tensors_.size() != kOutputTensorSize) {
+    MS_LOG(WARNING) << "BroadcastTo expects one output, got: " << out_tensors_.size();
+    return RET_ERROR;
+  }
+  const auto input_rank = in_tensors_[0]->shape().size();
+  const auto output_rank = out_tensors_[0]->shape().size();
+  if (input_rank == 0 || output_rank == 0) {
+    MS_LOG(WARNING) << "BroadcastTo requires non-empty input/output shapes.";
+    return RET_ERROR;
+  }
+  if (input_rank > MAX_SHAPE_SIZE || output_rank > MAX_SHAPE_SIZE) {
+    MS_LOG(WARNING) << "BroadcastTo rank exceeds limit, input: " << input_rank << ", output: " << output_rank;
+    return RET_ERROR;
+  }
+  auto *param = reinterpret_cast<BroadcastToParameter *>(op_parameter_);
+  if (param == nullptr) {
+    MS_LOG(WARNING) << "BroadcastTo parameter is null.";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int BroadcastToDSPKernel::BroadcastToRunFp32() {
+  kernel_name_ = "fp_broadcastto_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int BroadcastToDSPKernel::BroadcastToRunFp16() {
+  kernel_name_ = "hp_broadcastto_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int BroadcastToDSPKernel::BroadcastToRunInt16() {
+  kernel_name_ = "i16_broadcastto_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int BroadcastToDSPKernel::BroadcastToRunInt32() {
+  kernel_name_ = "i32_broadcastto_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int BroadcastToDSPKernel::BroadcastToRunComplex64() {
+  kernel_name_ = "c64_broadcastto_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int BroadcastToDSPKernel::Run() {
+  int ret = RET_ERROR;
+  auto allocator = dsp_runtime_->GetAllocator();
+
+  auto *input_tensor = in_tensors_[0];
+  auto *output_tensor = out_tensors_[0];
+
+  const auto &in_shape_vec = input_tensor->shape();
+  const auto &out_shape_vec = output_tensor->shape();
+  const size_t input_shape_size = in_shape_vec.size();
+  const size_t output_shape_size = out_shape_vec.size();
+
+  int32_t in_shape_host[MAX_SHAPE_SIZE] = {0};
+  for (size_t i = 0; i < input_shape_size; ++i) {
+    in_shape_host[i] = static_cast<int32_t>(in_shape_vec[i]);
+  }
+
+  int32_t out_shape_host[MAX_SHAPE_SIZE] = {0};
+  for (size_t i = 0; i < output_shape_size; ++i) {
+    out_shape_host[i] = static_cast<int32_t>(out_shape_vec[i]);
+  }
+
+  void *in_shape_buf = allocator->Malloc(sizeof(int32_t) * input_shape_size);
+  void *out_shape_buf = allocator->Malloc(sizeof(int32_t) * output_shape_size);
+
+  if (in_shape_buf == nullptr || out_shape_buf == nullptr) {
+    allocator->Free(in_shape_buf);
+    allocator->Free(out_shape_buf);
+    MS_LOG(ERROR) << "BroadcastTo alloc shape buffer failed.";
+    return RET_ERROR;
+  }
+
+  std::memcpy(in_shape_buf, in_shape_host, sizeof(int32_t) * input_shape_size);
+  std::memcpy(out_shape_buf, out_shape_host, sizeof(int32_t) * output_shape_size);
+
+  uint64_t in_shape_dev = allocator->GetDeviceMemPtr(in_shape_buf);
+  uint64_t out_shape_dev = allocator->GetDeviceMemPtr(out_shape_buf);
+
+  uint64_t input_dev = allocator->GetDeviceMemPtr(input_tensor->data());
+  uint64_t output_dev = allocator->GetDeviceMemPtr(output_tensor->data());
+
+  auto data_type = input_tensor->data_type();
+  size_t data_size = lite::DataTypeSize(data_type);
+  if (data_size == 0) {
+    allocator->Free(in_shape_buf);
+    allocator->Free(out_shape_buf);
+    MS_LOG(ERROR) << "BroadcastTo unsupported dtype: " << static_cast<int>(data_type);
+    return RET_ERROR;
+  }
+
+  SetKernelArg({input_dev, output_dev, in_shape_dev, static_cast<uint64_t>(input_shape_size), out_shape_dev,
+                static_cast<uint64_t>(output_shape_size), static_cast<uint64_t>(data_size)});
+
+  if (data_type == kNumberTypeFloat32) {
+    ret = BroadcastToRunFp32();
+  } else if (data_type == kNumberTypeFloat16) {
+    ret = BroadcastToRunFp16();
+  } else if (data_type == kNumberTypeInt16) {
+    ret = BroadcastToRunInt16();
+  } else if (data_type == kNumberTypeInt32) {
+    ret = BroadcastToRunInt32();
+  } else if (data_type == kNumberTypeComplex64) {
+    ret = BroadcastToRunComplex64();
+  } else {
+    MS_LOG(ERROR) << "BroadcastTo unsupported dtype: " << static_cast<int>(data_type);
+  }
+
+  allocator->Free(in_shape_buf);
+  allocator->Free(out_shape_buf);
+
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << this->name() << " Run failed!";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+REG_KERNEL(kDSP, kNumberTypeFloat32, PrimitiveType_BroadcastTo, DSPKernelCreator<BroadcastToDSPKernel>)
+REG_KERNEL(kDSP, kNumberTypeFloat16, PrimitiveType_BroadcastTo, DSPKernelCreator<BroadcastToDSPKernel>)
+REG_KERNEL(kDSP, kNumberTypeInt16, PrimitiveType_BroadcastTo, DSPKernelCreator<BroadcastToDSPKernel>)
+REG_KERNEL(kDSP, kNumberTypeInt32, PrimitiveType_BroadcastTo, DSPKernelCreator<BroadcastToDSPKernel>)
+REG_KERNEL(kDSP, kNumberTypeComplex64, PrimitiveType_BroadcastTo, DSPKernelCreator<BroadcastToDSPKernel>)
+
+}  // namespace mindspore::kernel
diff --git a/mindspore-lite/src/litert/kernel/dsp/ft04/broadcastto.h b/mindspore-lite/src/litert/kernel/dsp/ft04/broadcastto.h
new file mode 100644
index 0000000000000000000000000000000000000000..b3bdb9300281c2b2062b9bd7f17a787c922b56c2
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/ft04/broadcastto.h
@@ -0,0 +1,46 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_BROADCASTTO_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_BROADCASTTO_H_
+
+#include <string>
+#include "src/litert/kernel/dsp/dsp_kernel.h"
+
+namespace mindspore::kernel {
+class BroadcastToDSPKernel : public DSPKernel {
+ public:
+  using DSPKernel::DSPKernel;
+
+  ~BroadcastToDSPKernel() override = default;
+
+  int Prepare() override;
+  int CheckSpecs() override;
+  int Run() override;
+
+  int BroadcastToRunFp32();
+  int BroadcastToRunFp16();
+  int BroadcastToRunInt16();
+  int BroadcastToRunInt32();
+  int BroadcastToRunComplex64();
+
+ private:
+  std::string kernel_name_;
+  uint64_t core_mask_{0};
+};
+}  // namespace mindspore::kernel
+
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_BROADCASTTO_H_
diff --git a/mindspore-lite/src/litert/kernel/dsp/ft78/broadcastto.cc b/mindspore-lite/src/litert/kernel/dsp/ft78/broadcastto.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ac9cfc29a13ea12421de0880b2c3a67083aa1de6
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/ft78/broadcastto.cc
@@ -0,0 +1,202 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/litert/kernel/dsp/ft78/broadcastto.h"
+#include <cstdint>
+#include <cstring>
+#include <string>
+#include "src/litert/kernel/cpu/nnacl_c/broadcast_to_parameter.h"
+#include "src/litert/kernel_registry.h"
+
+using mindspore::kernel::KERNEL_ARCH::kDSP;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_BroadcastTo;
+
+namespace mindspore::kernel {
+namespace {
+constexpr size_t kInputTensorSize = 1;
+constexpr size_t kOutputTensorSize = 1;
+}  // namespace
+
+int BroadcastToDSPKernel::Prepare() { return RET_OK; }
+
+int BroadcastToDSPKernel::CheckSpecs() {
+  if (in_tensors_.size() != kInputTensorSize) {
+    MS_LOG(WARNING) << "BroadcastTo expects one input, got: " << in_tensors_.size();
+    return RET_ERROR;
+  }
+  if (out_tensors_.size() != kOutputTensorSize) {
+    MS_LOG(WARNING) << "BroadcastTo expects one output, got: " << out_tensors_.size();
+    return RET_ERROR;
+  }
+  const auto input_rank = in_tensors_[0]->shape().size();
+  const auto output_rank = out_tensors_[0]->shape().size();
+  if (input_rank == 0 || output_rank == 0) {
+    MS_LOG(WARNING) << "BroadcastTo requires non-empty input/output shapes.";
+    return RET_ERROR;
+  }
+  if (input_rank > MAX_SHAPE_SIZE || output_rank > MAX_SHAPE_SIZE) {
+    MS_LOG(WARNING) << "BroadcastTo rank exceeds limit, input: " << input_rank << ", output: " << output_rank;
+    return RET_ERROR;
+  }
+  auto *param = reinterpret_cast<BroadcastToParameter *>(op_parameter_);
+  if (param == nullptr) {
+    MS_LOG(WARNING) << "BroadcastTo parameter is null.";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int BroadcastToDSPKernel::BroadcastToRunFp32() {
+  kernel_name_ = "fp_broadcastto_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int BroadcastToDSPKernel::BroadcastToRunFp64() {
+  kernel_name_ = "dp_broadcastto_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int BroadcastToDSPKernel::BroadcastToRunInt8() {
+  kernel_name_ = "i8_broadcastto_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int BroadcastToDSPKernel::BroadcastToRunInt16() {
+  kernel_name_ = "i16_broadcastto_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int BroadcastToDSPKernel::BroadcastToRunInt32() {
+  kernel_name_ = "i32_broadcastto_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int BroadcastToDSPKernel::BroadcastToRunComplex64() {
+  kernel_name_ = "c64_broadcastto_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int BroadcastToDSPKernel::BroadcastToRunComplex128() {
+  kernel_name_ = "c128_broadcastto_s";
+  core_mask_ = 0xf;
+  return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_);
+}
+
+int BroadcastToDSPKernel::Run() {
+  int ret = RET_ERROR;
+  auto allocator = dsp_runtime_->GetAllocator();
+
+  auto *input_tensor = in_tensors_[0];
+  auto *output_tensor = out_tensors_[0];
+
+  const auto &in_shape_vec = input_tensor->shape();
+  const auto &out_shape_vec = output_tensor->shape();
+  const size_t input_shape_size = in_shape_vec.size();
+  const size_t output_shape_size = out_shape_vec.size();
+
+  int32_t in_shape_host[MAX_SHAPE_SIZE] = {0};
+  for (size_t i = 0; i < input_shape_size; ++i) {
+    in_shape_host[i] = static_cast<int32_t>(in_shape_vec[i]);
+  }
+
+  int32_t out_shape_host[MAX_SHAPE_SIZE] = {0};
+  for (size_t i = 0; i < output_shape_size; ++i) {
+    out_shape_host[i] = static_cast<int32_t>(out_shape_vec[i]);
+  }
+
+  void *in_shape_buf = allocator->Malloc(sizeof(int32_t) * input_shape_size);
+  void *out_shape_buf = allocator->Malloc(sizeof(int32_t) * output_shape_size);
+  void *int_addr_buf = allocator->Malloc(sizeof(int32_t) * 2);
+  int32_t int_addr_tmp[2] = {static_cast<int32_t>(input_shape_size), static_cast<int32_t>(output_shape_size)};
+
+  if (in_shape_buf == nullptr || out_shape_buf == nullptr || int_addr_buf == nullptr) {
+    allocator->Free(in_shape_buf);
+    allocator->Free(out_shape_buf);
+    allocator->Free(int_addr_buf);
+    MS_LOG(ERROR) << "BroadcastTo alloc shape buffer failed.";
+    return RET_ERROR;
+  }
+
+  std::memcpy(in_shape_buf, in_shape_host, sizeof(int32_t) * input_shape_size);
+  std::memcpy(out_shape_buf, out_shape_host, sizeof(int32_t) * output_shape_size);
+  std::memcpy(int_addr_buf, int_addr_tmp, sizeof(int_addr_tmp));
+
+  uint64_t in_shape_dev = allocator->GetDeviceMemPtr(in_shape_buf);
+  uint64_t out_shape_dev = allocator->GetDeviceMemPtr(out_shape_buf);
+  uint64_t int_addr_dev = allocator->GetDeviceMemPtr(int_addr_buf);
+
+  uint64_t input_dev = allocator->GetDeviceMemPtr(input_tensor->data());
+  uint64_t output_dev = allocator->GetDeviceMemPtr(output_tensor->data());
+
+  auto data_type = input_tensor->data_type();
+  size_t data_size = lite::DataTypeSize(data_type);
+  if (data_size == 0) {
+    allocator->Free(in_shape_buf);
+    allocator->Free(out_shape_buf);
+    allocator->Free(int_addr_buf);
+    MS_LOG(ERROR) << "BroadcastTo unsupported dtype: " << static_cast<int>(data_type);
+    return RET_ERROR;
+  }
+
+  SetKernelArg({input_dev, output_dev, in_shape_dev, out_shape_dev, int_addr_dev, static_cast<uint64_t>(data_size)});
+
+  if (data_type == kNumberTypeFloat32) {
+    ret = BroadcastToRunFp32();
+  } else if (data_type == kNumberTypeFloat64) {
+    ret = BroadcastToRunFp64();
+  } else if (data_type == kNumberTypeInt8) {
+    ret = BroadcastToRunInt8();
+  } else if (data_type == kNumberTypeInt16) {
+    ret = BroadcastToRunInt16();
+  } else if (data_type == kNumberTypeInt32) {
+    ret = BroadcastToRunInt32();
+  } else if (data_type == kNumberTypeComplex64) {
+    ret = BroadcastToRunComplex64();
+  } else if (data_type == kNumberTypeComplex128) {
+    ret = BroadcastToRunComplex128();
+  } else {
+    MS_LOG(ERROR) << "BroadcastTo unsupported dtype: " << static_cast<int>(data_type);
+  }
+
+  allocator->Free(in_shape_buf);
+  allocator->Free(out_shape_buf);
+  allocator->Free(int_addr_buf);
+
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << this->name() << " Run failed!";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+REG_KERNEL(kDSP, kNumberTypeFloat32, PrimitiveType_BroadcastTo, DSPKernelCreator<BroadcastToDSPKernel>)
+REG_KERNEL(kDSP, kNumberTypeFloat64, PrimitiveType_BroadcastTo, DSPKernelCreator<BroadcastToDSPKernel>)
+REG_KERNEL(kDSP, kNumberTypeInt8, PrimitiveType_BroadcastTo, DSPKernelCreator<BroadcastToDSPKernel>)
+REG_KERNEL(kDSP, kNumberTypeInt16, PrimitiveType_BroadcastTo, DSPKernelCreator<BroadcastToDSPKernel>)
+REG_KERNEL(kDSP, kNumberTypeInt32, PrimitiveType_BroadcastTo, DSPKernelCreator<BroadcastToDSPKernel>)
+REG_KERNEL(kDSP, kNumberTypeComplex64, PrimitiveType_BroadcastTo, DSPKernelCreator<BroadcastToDSPKernel>)
+REG_KERNEL(kDSP, kNumberTypeComplex128, PrimitiveType_BroadcastTo, DSPKernelCreator<BroadcastToDSPKernel>)
+
+}  // namespace mindspore::kernel
diff --git a/mindspore-lite/src/litert/kernel/dsp/ft78/broadcastto.h b/mindspore-lite/src/litert/kernel/dsp/ft78/broadcastto.h
new file mode 100644
index 0000000000000000000000000000000000000000..e117de7c75c566b085f0d53f123344b4cd8ad380
--- /dev/null
+++ b/mindspore-lite/src/litert/kernel/dsp/ft78/broadcastto.h
@@ -0,0 +1,48 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_BROADCASTTO_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_BROADCASTTO_H_
+
+#include <string>
+#include "src/litert/kernel/dsp/dsp_kernel.h"
+
+namespace mindspore::kernel {
+class BroadcastToDSPKernel : public DSPKernel {
+ public:
+  using DSPKernel::DSPKernel;
+
+  ~BroadcastToDSPKernel() override = default;
+
+  int Prepare() override;
+  int CheckSpecs() override;
+  int Run() override;
+
+  int BroadcastToRunFp32();
+  int BroadcastToRunFp64();
+  int BroadcastToRunInt8();
+  int BroadcastToRunInt16();
+  int BroadcastToRunInt32();
+  int BroadcastToRunComplex64();
+  int BroadcastToRunComplex128();
+
+ private:
+  std::string kernel_name_;
+  uint64_t core_mask_{0};
+};
+}  // namespace mindspore::kernel
+
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_BROADCASTTO_H_
diff --git a/mindspore-lite/test/ut/src/runtime/kernel/dsp/broadcastto_tests.cc b/mindspore-lite/test/ut/src/runtime/kernel/dsp/broadcastto_tests.cc
new file mode 100644
index 0000000000000000000000000000000000000000..cdc726a896c1a685d5baf1e1ddbd2e29ffd1b28c
--- /dev/null
+++ b/mindspore-lite/test/ut/src/runtime/kernel/dsp/broadcastto_tests.cc
@@ -0,0 +1,518 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <array>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <vector>
+
+#include "ut/src/runtime/kernel/dsp/dsp_test.h"
+#include "include/api/context.h"
+#include "include/api/data_type.h"
+#include "schema/inner/model_generated.h"
+#include "src/litert/kernel_registry.h"
+#include "src/litert/kernel/cpu/nnacl_c/broadcast_to_parameter.h"
+
+namespace mindspore::lite::dsp::test {
+namespace {
+// Use the same shape sets as main.c to ensure interface compatibility.
+constexpr std::array<int, 2> kCase0In = {1, 100};
+constexpr std::array<int, 2> kCase0Out = {100, 100};
+constexpr std::array<int, 2> kCase1In = {1, 10};  // NOTE: kept for parity with main.c even though dims differ.
+constexpr std::array<int, 2> kCase1Out = {10, 100};
+
+int64_t Accumulate(const std::vector<int> &shape) {
+  int64_t total = 1;
+  for (int dim : shape) {
+    total *= dim;
+  }
+  return total;
+}
+
+// Reference broadcast implementation matching main.c (broadcastto_c).
+template <typename T>
+void BroadcastToRef(const T *input, T *output, const std::vector<int> &input_shape,
+                    const std::vector<int> &output_shape, bool is_complex) {
+  size_t input_rank = input_shape.size();
+  size_t output_rank = output_shape.size();
+  std::vector<int> ext_in_shape(output_rank, 1);
+  size_t shape_gap = output_rank - input_rank;
+  for (size_t i = 0; i < input_rank; ++i) {
+    ext_in_shape[shape_gap + i] = input_shape[i];
+  }
+
+  // stride calculation
+  std::vector<int> out_stride(output_rank, 1);
+  std::vector<int> in_stride(output_rank, 1);
+  for (int i = static_cast<int>(output_rank) - 2; i >= 0; --i) {
+    out_stride[i] = out_stride[i + 1] * output_shape[i + 1];
+  }
+  in_stride[output_rank - 1] = (ext_in_shape[output_rank - 1] == 1) ? 0 : 1;
+  for (int i = static_cast<int>(output_rank) - 2; i >= 0; --i) {
+    in_stride[i] = (ext_in_shape[i] == 1) ? 0 : in_stride[i + 1] * ext_in_shape[i + 1];
+  }
+
+  int64_t out_elems = Accumulate(output_shape);
+  int factor = is_complex ? 2 : 1;
+  std::fill(output, output + out_elems * factor, static_cast<T>(0));
+
+  for (int64_t idx = 0; idx < out_elems; ++idx) {
+    int tmp = static_cast<int>(idx);
+    int in_offset = 0;
+    for (size_t axis = 0; axis < output_rank; ++axis) {
+      int pos = tmp / out_stride[axis];
+      in_offset += pos * in_stride[axis];
+      tmp %= out_stride[axis];
+    }
+    if (!is_complex) {
+      output[idx] = input[in_offset];
+    } else {
+      output[2 * idx] = input[2 * in_offset];
+      output[2 * idx + 1] = input[2 * in_offset + 1];
+    }
+  }
+}
+
+BroadcastToParameter *CreateParam(const std::vector<int> &out_shape) {
+  auto *param = reinterpret_cast<BroadcastToParameter *>(malloc(sizeof(BroadcastToParameter)));
+  if (param == nullptr) {
+    return nullptr;
+  }
+  std::memset(param, 0, sizeof(BroadcastToParameter));
+  param->op_parameter_.type_ = static_cast<int>(schema::PrimitiveType_BroadcastTo);
+  param->shape_size_ = out_shape.size();
+  for (size_t i = 0; i < out_shape.size(); ++i) {
+    param->shape_[i] = out_shape[i];
+  }
+  return param;
+}
+
+std::vector<int> ToVec(const std::array<int, 2> &shape) { return {shape[0], shape[1]}; }
+}  // namespace
+
+class TestDSP_BroadcastTo : public DSPCommonTest {};
+
+TEST_F(TestDSP_BroadcastTo, BroadcastTo_Fp32) {
+  InitDSPRuntime();
+  auto in_shape = ToVec(kCase0In);
+  auto out_shape = ToVec(kCase0Out);
+  int64_t in_elems = Accumulate(in_shape);
+  int64_t out_elems = Accumulate(out_shape);
+
+  std::vector<lite::Tensor *> inputs;
+  std::vector<lite::Tensor *> outputs;
+  auto *input = new lite::Tensor(kNumberTypeFloat32, in_shape, mindspore::NHWC, lite::Category::VAR);
+  input->MallocData(allocator_);
+  inputs.push_back(input);
+
+  auto *output = new lite::Tensor(kNumberTypeFloat32, out_shape, mindspore::NHWC, lite::Category::VAR);
+  output->MallocData(allocator_);
+  outputs.push_back(output);
+
+  for (int i = 0; i < in_elems; ++i) {
+    reinterpret_cast<float *>(input->MutableData())[i] = static_cast<float>(i + 1);
+  }
+
+  std::vector<float> expected(out_elems, 0.f);
+  BroadcastToRef(reinterpret_cast<float *>(input->MutableData()), expected.data(), in_shape, out_shape, false);
+
+  auto *ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+  auto *param = CreateParam(out_shape);
+  ASSERT_NE(param, nullptr);
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeFloat32, NHWC, schema::PrimitiveType_BroadcastTo};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  ASSERT_NE(creator, nullptr);
+  auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), ctx, key);
+  ASSERT_NE(kernel, nullptr);
+
+  EXPECT_EQ(kernel->Prepare(), lite::RET_OK);
+  EXPECT_EQ(kernel->Run(), lite::RET_OK);
+
+  auto *out_data = reinterpret_cast<float *>(output->MutableData());
+  int cmp = CompareOutputData(out_data, expected.data(), out_elems, 1e-5);
+  ASSERT_EQ(0, cmp);
+
+  UninitDSPRuntime();
+  delete ctx;
+  delete kernel;
+  delete input;
+  delete output;
+}
+
+TEST_F(TestDSP_BroadcastTo, BroadcastTo_Int16) {
+  InitDSPRuntime();
+  auto in_shape = ToVec(kCase0In);
+  auto out_shape = ToVec(kCase0Out);
+  int64_t in_elems = Accumulate(in_shape);
+  int64_t out_elems = Accumulate(out_shape);
+
+  std::vector<lite::Tensor *> inputs;
+  std::vector<lite::Tensor *> outputs;
+  auto *input = new lite::Tensor(kNumberTypeInt16, in_shape, mindspore::NHWC, lite::Category::VAR);
+  input->MallocData(allocator_);
+  inputs.push_back(input);
+
+  auto *output = new lite::Tensor(kNumberTypeInt16, out_shape, mindspore::NHWC, lite::Category::VAR);
+  output->MallocData(allocator_);
+  outputs.push_back(output);
+
+  for (int i = 0; i < in_elems; ++i) {
+    reinterpret_cast<int16_t *>(input->MutableData())[i] = static_cast<int16_t>((i + 1) % 32760);
+  }
+
+  std::vector<int16_t> expected(out_elems, 0);
+  BroadcastToRef(reinterpret_cast<int16_t *>(input->MutableData()), expected.data(), in_shape, out_shape, false);
+
+  auto *ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+  auto *param = CreateParam(out_shape);
+  ASSERT_NE(param, nullptr);
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt16, NHWC, schema::PrimitiveType_BroadcastTo};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  ASSERT_NE(creator, nullptr);
+  auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), ctx, key);
+  ASSERT_NE(kernel, nullptr);
+
+  EXPECT_EQ(kernel->Prepare(), lite::RET_OK);
+  EXPECT_EQ(kernel->Run(), lite::RET_OK);
+
+  auto *out_data = reinterpret_cast<int16_t *>(output->MutableData());
+  int cmp = CompareOutputData(out_data, expected.data(), out_elems, 0.0f);
+  ASSERT_EQ(0, cmp);
+
+  UninitDSPRuntime();
+  delete ctx;
+  delete kernel;
+  delete input;
+  delete output;
+}
+
+TEST_F(TestDSP_BroadcastTo, BroadcastTo_Int32) {
+  InitDSPRuntime();
+  auto in_shape = ToVec(kCase0In);
+  auto out_shape = ToVec(kCase0Out);
+  int64_t in_elems = Accumulate(in_shape);
+  int64_t out_elems = Accumulate(out_shape);
+
+  std::vector<lite::Tensor *> inputs;
+  std::vector<lite::Tensor *> outputs;
+  auto *input = new lite::Tensor(kNumberTypeInt32, in_shape, mindspore::NHWC, lite::Category::VAR);
+  input->MallocData(allocator_);
+  inputs.push_back(input);
+
+  auto *output = new lite::Tensor(kNumberTypeInt32, out_shape, mindspore::NHWC, lite::Category::VAR);
+  output->MallocData(allocator_);
+  outputs.push_back(output);
+
+  for (int i = 0; i < in_elems; ++i) {
+    reinterpret_cast<int32_t *>(input->MutableData())[i] = i + 1;
+  }
+
+  std::vector<int32_t> expected(out_elems, 0);
+  BroadcastToRef(reinterpret_cast<int32_t *>(input->MutableData()), expected.data(), in_shape, out_shape, false);
+
+  auto *ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+  auto *param = CreateParam(out_shape);
+  ASSERT_NE(param, nullptr);
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt32, NHWC, schema::PrimitiveType_BroadcastTo};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  ASSERT_NE(creator, nullptr);
+  auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), ctx, key);
+  ASSERT_NE(kernel, nullptr);
+
+  EXPECT_EQ(kernel->Prepare(), lite::RET_OK);
+  EXPECT_EQ(kernel->Run(), lite::RET_OK);
+
+  auto *out_data = reinterpret_cast<int32_t *>(output->MutableData());
+  int cmp = CompareOutputData(out_data, expected.data(), out_elems, 0.0f);
+  ASSERT_EQ(0, cmp);
+
+  UninitDSPRuntime();
+  delete ctx;
+  delete kernel;
+  delete input;
+  delete output;
+}
+
+TEST_F(TestDSP_BroadcastTo, BroadcastTo_Complex64) {
+  InitDSPRuntime();
+  auto in_shape = ToVec(kCase0In);
+  auto out_shape = ToVec(kCase0Out);
+  int64_t in_elems = Accumulate(in_shape);
+  int64_t out_elems = Accumulate(out_shape);
+
+  std::vector<lite::Tensor *> inputs;
+  std::vector<lite::Tensor *> outputs;
+  auto *input = new lite::Tensor(kNumberTypeComplex64, in_shape, mindspore::NHWC, lite::Category::VAR);
+  input->MallocData(allocator_);
+  inputs.push_back(input);
+
+  auto *output = new lite::Tensor(kNumberTypeComplex64, out_shape, mindspore::NHWC, lite::Category::VAR);
+  output->MallocData(allocator_);
+  outputs.push_back(output);
+
+  auto *in_cplx = reinterpret_cast<float *>(input->MutableData());
+  for (int i = 0; i < in_elems; ++i) {
+    in_cplx[2 * i] = 0.5f * static_cast<float>(i + 1);
+    in_cplx[2 * i + 1] = 0.3f * static_cast<float>(i + 1);
+  }
+
+  std::vector<float> expected(out_elems * 2, 0.f);
+  BroadcastToRef(reinterpret_cast<float *>(input->MutableData()), expected.data(), in_shape, out_shape, true);
+
+  auto *ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+  auto *param = CreateParam(out_shape);
+  ASSERT_NE(param, nullptr);
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeComplex64, NHWC, schema::PrimitiveType_BroadcastTo};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  ASSERT_NE(creator, nullptr);
+  auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), ctx, key);
+  ASSERT_NE(kernel, nullptr);
+
+  EXPECT_EQ(kernel->Prepare(), lite::RET_OK);
+  EXPECT_EQ(kernel->Run(), lite::RET_OK);
+
+  auto *out_cplx = reinterpret_cast<float *>(output->MutableData());
+  int cmp = CompareOutputData(out_cplx, expected.data(), out_elems * 2, 1e-5);
+  ASSERT_EQ(0, cmp);
+
+  UninitDSPRuntime();
+  delete ctx;
+  delete kernel;
+  delete input;
+  delete output;
+}
+
+#ifdef SUPPORT_FT04
+TEST_F(TestDSP_BroadcastTo, BroadcastTo_Fp16) {
+  InitDSPRuntime();
+  auto in_shape = ToVec(kCase0In);
+  auto out_shape = ToVec(kCase0Out);
+  int64_t in_elems = Accumulate(in_shape);
+  int64_t out_elems = Accumulate(out_shape);
+
+  std::vector<lite::Tensor *> inputs;
+  std::vector<lite::Tensor *> outputs;
+  auto *input = new lite::Tensor(kNumberTypeFloat16, in_shape, mindspore::NHWC, lite::Category::VAR);
+  input->MallocData(allocator_);
+  inputs.push_back(input);
+
+  auto *output = new lite::Tensor(kNumberTypeFloat16, out_shape, mindspore::NHWC, lite::Category::VAR);
+  output->MallocData(allocator_);
+  outputs.push_back(output);
+
+  auto *in_half = reinterpret_cast<uint16_t *>(input->MutableData());
+  for (int i = 0; i < in_elems; ++i) {
+    in_half[i] = fp32_to_fp16(static_cast<float>(i + 1) * 0.1f);
+  }
+
+  std::vector<uint16_t> expected_half(out_elems, 0);
+  BroadcastToRef(reinterpret_cast<uint16_t *>(input->MutableData()), expected_half.data(), in_shape, out_shape, false);
+  std::vector<float> expected(out_elems, 0.f);
+  for (int i = 0; i < out_elems; ++i) {
+    expected[i] = fp16_to_fp32(expected_half[i]);
+  }
+
+  auto *ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+  auto *param = CreateParam(out_shape);
+  ASSERT_NE(param, nullptr);
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeFloat16, NHWC, schema::PrimitiveType_BroadcastTo};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  ASSERT_NE(creator, nullptr);
+  auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), ctx, key);
+  ASSERT_NE(kernel, nullptr);
+
+  EXPECT_EQ(kernel->Prepare(), lite::RET_OK);
+  EXPECT_EQ(kernel->Run(), lite::RET_OK);
+
+  auto *out_half = reinterpret_cast<uint16_t *>(output->MutableData());
+  std::vector<float> out_fp(out_elems, 0.f);
+  for (int i = 0; i < out_elems; ++i) {
+    out_fp[i] = fp16_to_fp32(out_half[i]);
+  }
+  int cmp = CompareOutputData(out_fp.data(), expected.data(), out_elems, 1e-3f);
+  ASSERT_EQ(0, cmp);
+
+  UninitDSPRuntime();
+  delete ctx;
+  delete kernel;
+  delete input;
+  delete output;
+}
+#endif  // SUPPORT_FT04
+
+#ifdef SUPPORT_FT78
+TEST_F(TestDSP_BroadcastTo, BroadcastTo_Fp64) {
+  InitDSPRuntime();
+  auto in_shape = ToVec(kCase0In);
+  auto out_shape = ToVec(kCase0Out);
+  int64_t in_elems = Accumulate(in_shape);
+  int64_t out_elems = Accumulate(out_shape);
+
+  std::vector<lite::Tensor *> inputs;
+  std::vector<lite::Tensor *> outputs;
+  auto *input = new lite::Tensor(kNumberTypeFloat64, in_shape, mindspore::NHWC, lite::Category::VAR);
+  input->MallocData(allocator_);
+  inputs.push_back(input);
+
+  auto *output = new lite::Tensor(kNumberTypeFloat64, out_shape, mindspore::NHWC, lite::Category::VAR);
+  output->MallocData(allocator_);
+  outputs.push_back(output);
+
+  for (int i = 0; i < in_elems; ++i) {
+    reinterpret_cast<double *>(input->MutableData())[i] = static_cast<double>(i + 1);
+  }
+
+  std::vector<double> expected(out_elems, 0.0);
+  BroadcastToRef(reinterpret_cast<double *>(input->MutableData()), expected.data(), in_shape, out_shape, false);
+
+  auto *ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+  auto *param = CreateParam(out_shape);
+  ASSERT_NE(param, nullptr);
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeFloat64, NHWC, schema::PrimitiveType_BroadcastTo};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  ASSERT_NE(creator, nullptr);
+  auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), ctx, key);
+  ASSERT_NE(kernel, nullptr);
+
+  EXPECT_EQ(kernel->Prepare(), lite::RET_OK);
+  EXPECT_EQ(kernel->Run(), lite::RET_OK);
+
+  auto *out_data = reinterpret_cast<double *>(output->MutableData());
+  int cmp = CompareOutputData(out_data, expected.data(), out_elems, 1e-9);
+  ASSERT_EQ(0, cmp);
+
+  UninitDSPRuntime();
+  delete ctx;
+  delete kernel;
+  delete input;
+  delete output;
+}
+
+TEST_F(TestDSP_BroadcastTo, BroadcastTo_Int8) {
+  InitDSPRuntime();
+  auto in_shape = ToVec(kCase0In);
+  auto out_shape = ToVec(kCase0Out);
+  int64_t in_elems = Accumulate(in_shape);
+  int64_t out_elems = Accumulate(out_shape);
+
+  std::vector<lite::Tensor *> inputs;
+  std::vector<lite::Tensor *> outputs;
+  auto *input = new lite::Tensor(kNumberTypeInt8, in_shape, mindspore::NHWC, lite::Category::VAR);
+  input->MallocData(allocator_);
+  inputs.push_back(input);
+
+  auto *output = new lite::Tensor(kNumberTypeInt8, out_shape, mindspore::NHWC, lite::Category::VAR);
+  output->MallocData(allocator_);
+  outputs.push_back(output);
+
+  for (int i = 0; i < in_elems; ++i) {
+    reinterpret_cast<int8_t *>(input->MutableData())[i] = static_cast<int8_t>((i % 120) - 60);
+  }
+
+  std::vector<int8_t> expected(out_elems, 0);
+  BroadcastToRef(reinterpret_cast<int8_t *>(input->MutableData()), expected.data(), in_shape, out_shape, false);
+
+  auto *ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+  auto *param = CreateParam(out_shape);
+  ASSERT_NE(param, nullptr);
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeInt8, NHWC, schema::PrimitiveType_BroadcastTo};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  ASSERT_NE(creator, nullptr);
+  auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), ctx, key);
+  ASSERT_NE(kernel, nullptr);
+
+  EXPECT_EQ(kernel->Prepare(), lite::RET_OK);
+  EXPECT_EQ(kernel->Run(), lite::RET_OK);
+
+  auto *out_data = reinterpret_cast<int8_t *>(output->MutableData());
+  int cmp = CompareOutputData(out_data, expected.data(), out_elems, 0.0f);
+  ASSERT_EQ(0, cmp);
+
+  UninitDSPRuntime();
+  delete ctx;
+  delete kernel;
+  delete input;
+  delete output;
+}
+
+TEST_F(TestDSP_BroadcastTo, BroadcastTo_Complex128) {
+  InitDSPRuntime();
+  auto in_shape = ToVec(kCase0In);
+  auto out_shape = ToVec(kCase0Out);
+  int64_t in_elems = Accumulate(in_shape);
+  int64_t out_elems = Accumulate(out_shape);
+
+  std::vector<lite::Tensor *> inputs;
+  std::vector<lite::Tensor *> outputs;
+  auto *input = new lite::Tensor(kNumberTypeComplex128, in_shape, mindspore::NHWC, lite::Category::VAR);
+  input->MallocData(allocator_);
+  inputs.push_back(input);
+
+  auto *output = new lite::Tensor(kNumberTypeComplex128, out_shape, mindspore::NHWC, lite::Category::VAR);
+  output->MallocData(allocator_);
+  outputs.push_back(output);
+
+  auto *in_cplx = reinterpret_cast<double *>(input->MutableData());
+  for (int i = 0; i < in_elems; ++i) {
+    in_cplx[2 * i] = 0.5 * static_cast<double>(i + 1);
+    in_cplx[2 * i + 1] = 0.3 * static_cast<double>(i + 1);
+  }
+
+  std::vector<double> expected(out_elems * 2, 0.0);
+  BroadcastToRef(reinterpret_cast<double *>(input->MutableData()), expected.data(), in_shape, out_shape, true);
+
+  auto *ctx = new lite::InnerContext;
+  ASSERT_EQ(lite::RET_OK, ctx->Init());
+  auto *param = CreateParam(out_shape);
+  ASSERT_NE(param, nullptr);
+
+  kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeComplex128, NHWC, schema::PrimitiveType_BroadcastTo};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  ASSERT_NE(creator, nullptr);
+  auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), ctx, key);
+  ASSERT_NE(kernel, nullptr);
+
+  EXPECT_EQ(kernel->Prepare(), lite::RET_OK);
+  EXPECT_EQ(kernel->Run(), lite::RET_OK);
+
+  auto *out_cplx = reinterpret_cast<double *>(output->MutableData());
+  int cmp = CompareOutputData(out_cplx, expected.data(), out_elems * 2, 1e-9);
+  ASSERT_EQ(0, cmp);
+
+  UninitDSPRuntime();
+  delete ctx;
+  delete kernel;
+  delete input;
+  delete output;
+}
+#endif  // SUPPORT_FT78
+}  // namespace mindspore::lite::dsp::test
diff --git a/mindspore-lite/test/ut/src/runtime/kernel/dsp/dsp_test.h b/mindspore-lite/test/ut/src/runtime/kernel/dsp/dsp_test.h
index 88419f42d7e853af569ac4d207993293a3f96258..450e0d6c8c5a67f6b0d9675569fd25acd1494ba7 100644
--- a/mindspore-lite/test/ut/src/runtime/kernel/dsp/dsp_test.h
+++ b/mindspore-lite/test/ut/src/runtime/kernel/dsp/dsp_test.h
@@ -46,6 +46,98 @@ class DSPCommonTest : public CommonTest {
     dsp_runtime_wrapper_ = nullptr;
   }
 
+  // Local IEEE754 half <-> float converters to avoid any linkage/impl mismatch in tests.
+  float fp16_to_fp32(uint16_t h) {
+    uint32_t sign = (static_cast<uint32_t>(h) & 0x8000u) << 16;
+    uint32_t exp = (static_cast<uint32_t>(h) & 0x7C00u) >> 10;
+    uint32_t mant = static_cast<uint32_t>(h & 0x03FFu);
+    uint32_t f;
+    if (exp == 0) {
+      if (mant == 0) {
+        f = sign;  // zero
+      } else {
+        // subnormal -> normalize
+        exp = 1;
+        while ((mant & 0x0400u) == 0) {
+          mant <<= 1;
+          --exp;
+        }
+        mant &= 0x03FFu;
+        uint32_t fexp = (exp + (127 - 15)) << 23;
+        f = sign | fexp | (mant << 13);
+      }
+    } else if (exp == 0x1Fu) {  // Inf/NaN
+      f = sign | 0x7F800000u | (mant << 13);
+    } else {
+      uint32_t fexp = (exp + (127 - 15)) << 23;
+      f = sign | fexp | (mant << 13);
+    }
+    float out;
+    std::memcpy(&out, &f, sizeof(out));
+    return out;
+  }
+
+  uint16_t fp32_to_fp16(float val) {
+    uint32_t fbits;
+    std::memcpy(&fbits, &val, sizeof(fbits));
+    uint32_t sign = (fbits >> 16) & 0x8000u;
+    uint32_t fexp = (fbits >> 23) & 0xFFu;
+    uint32_t fmant = fbits & 0x007FFFFFu;
+
+    // NaN/Inf handling
+    if (fexp == 0xFFu) {
+      if (fmant != 0) {
+        // NaN: keep a quiet NaN in half
+        return static_cast<uint16_t>(sign | 0x7C00u | 0x0001u);
+      }
+      // Inf
+      return static_cast<uint16_t>(sign | 0x7C00u);
+    }
+
+    // Rebias exponent for half
+    int32_t hexp = static_cast<int32_t>(fexp) - 127 + 15;
+
+    if (hexp <= 0) {
+      // Subnormal or underflow to zero in half
+      if (hexp < -10) {
+        return static_cast<uint16_t>(sign);  // Underflow to zero
+      }
+      // Make implicit leading 1 explicit
+      uint32_t mant = fmant | 0x00800000u;
+      // Shift to align to half subnormal mantissa (10 bits)
+      int shift = 1 - hexp;  // shift in [1..10]
+      // Compute mantissa with round-to-nearest-even
+      uint32_t mant_rounded = mant >> (shift + 13);
+      uint32_t round_bit = (mant >> (shift + 12)) & 1u;
+      uint32_t sticky = (mant & ((1u << (shift + 12)) - 1u)) != 0u;
+      mant_rounded += (round_bit & (sticky | (mant_rounded & 1u)));
+      return static_cast<uint16_t>(sign | static_cast<uint16_t>(mant_rounded));
+    }
+
+    if (hexp >= 0x1F) {
+      // Overflow to half inf
+      return static_cast<uint16_t>(sign | 0x7C00u);
+    }
+
+    // Normal case: build exponent and mantissa with round-to-nearest-even
+    uint16_t hexp_field = static_cast<uint16_t>(hexp) << 10;
+    uint32_t mant = fmant;
+    uint32_t mant_rounded = mant >> 13;
+    uint32_t round_bit = (mant >> 12) & 1u;
+    uint32_t sticky = (mant & 0xFFFu) != 0u;
+    mant_rounded += (round_bit & (sticky | (mant_rounded & 1u)));
+    if (mant_rounded == 0x400u) {
+      // Mantissa overflow after rounding; bump exponent, zero mantissa
+      mant_rounded = 0;
+      hexp_field = static_cast<uint16_t>(hexp_field + 0x0400u);
+      if (hexp_field >= 0x7C00u) {
+        // Exponent overflow -> inf
+        return static_cast<uint16_t>(sign | 0x7C00u);
+      }
+    }
+    return static_cast<uint16_t>(sign | hexp_field | static_cast<uint16_t>(mant_rounded));
+  }
+
  protected:
   dsp::DSPRuntimeInnerWrapper *dsp_runtime_wrapper_{nullptr};
   std::shared_ptr<DSPAllocator> allocator_;