diff --git a/mindspore-lite/src/litert/kernel/dsp/ft04/linspace.cc b/mindspore-lite/src/litert/kernel/dsp/ft04/linspace.cc new file mode 100644 index 0000000000000000000000000000000000000000..1434ad836c42e9428042b82f0d2ccb7216a64826 --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/ft04/linspace.cc @@ -0,0 +1,106 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/litert/kernel/dsp/ft04/linspace.h" +#include +#include +#include +#include +#include "src/litert/kernel_registry.h" +#include "schema/inner/model_generated.h" + +using mindspore::kernel::KERNEL_ARCH::kDSP; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_LinSpace; + +namespace mindspore::kernel { + +// Inputs are expected to be fp32 scalars (start, end). No fp16-to-fp32 conversion is needed here. + +int LinspaceDSPKernel::CheckSpecs() { + if (out_tensors_.size() != 1) { + MS_LOG(WARNING) << "Linspace out size: " << out_tensors_.size(); + return RET_ERROR; + } + if (in_tensors_.size() != 3) { + MS_LOG(WARNING) << "Linspace in size: " << in_tensors_.size(); + return RET_ERROR; + } + return RET_OK; +} + +int LinspaceDSPKernel::Prepare() { return RET_OK; } + +int LinspaceDSPKernel::LinspaceRunFp32() { + std::string kernel_name = "fp_linspace_s"; + uint64_t core_mask = 0xF; // all 4 cores + return dsp_runtime_->RunKernel(kernel_name, kernel_args_, core_mask); +} + +int LinspaceDSPKernel::LinspaceRunFp16() { + std::string kernel_name = "hp_linspace_s"; + uint64_t core_mask = 0xF; // all 4 cores + return dsp_runtime_->RunKernel(kernel_name, kernel_args_, core_mask); +} + +int LinspaceDSPKernel::Run() { + int ret = -1; + auto allocator = dsp_runtime_->GetAllocator(); + uint64_t out_device_ptr = allocator->GetDeviceMemPtr(out_tensors_[0]->data()); + uint64_t length = static_cast(out_tensors_[0]->ElementsNum()); + + // Read start/end from inputs (scalar tensors) + float start_v = 0.f; + float end_v = 0.f; + + auto dt = in_tensors_[0]->data_type(); + if (dt != kNumberTypeFloat32) { + MS_LOG(ERROR) << "Linspace expects fp32 start/end inputs, got dtype: " << static_cast(dt); + return RET_ERROR; + } + start_v = *(reinterpret_cast(in_tensors_[0]->data())); + end_v = *(reinterpret_cast(in_tensors_[1]->data())); + + // Pack args: output_ptr, start(float bits), end(float bits), length + uint64_t start_hex = 0; + std::memcpy(&start_hex, &start_v, sizeof(float)); + uint64_t end_hex = 0; + std::memcpy(&end_hex, &end_v, sizeof(float)); + SetKernelArg({out_device_ptr, start_hex, end_hex, length}); + + auto out_dt = out_tensors_[0]->data_type(); + if (out_dt == kNumberTypeFloat32) { + ret = LinspaceRunFp32(); + } else if (out_dt == kNumberTypeFloat16) { + ret = LinspaceRunFp16(); + } else { + MS_LOG(ERROR) << "Linspace unsupported output dtype: " << static_cast(out_dt); + return RET_ERROR; + } + + if (ret != RET_OK) { + MS_LOG(ERROR) << this->name() << " Linspace Run failed!"; + return RET_ERROR; + } + return RET_OK; +} + +REG_KERNEL(kDSP, kNumberTypeFloat32, PrimitiveType_LinSpace, DSPKernelCreator) +REG_KERNEL(kDSP, kNumberTypeFloat16, PrimitiveType_LinSpace, DSPKernelCreator) + +} // namespace mindspore::kernel diff --git a/mindspore-lite/src/litert/kernel/dsp/ft04/linspace.h b/mindspore-lite/src/litert/kernel/dsp/ft04/linspace.h new file mode 100644 index 0000000000000000000000000000000000000000..571f91d5f97c5d115a3e1e8078d1c75ca6391367 --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/ft04/linspace.h @@ -0,0 +1,42 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_LINSPACE_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_KERNEL_LINSPACE_H_ + +#include +#include +#include "src/litert/kernel/dsp/dsp_kernel.h" + +namespace mindspore::kernel { +class LinspaceDSPKernel : public DSPKernel { + public: + using DSPKernel::DSPKernel; + + ~LinspaceDSPKernel() override = default; + int Prepare() override; + int CheckSpecs() override; + int Run() override; + + int LinspaceRunFp32(); + int LinspaceRunFp16(); + + private: + std::string kernel_name_; + uint64_t core_mask_{}; +}; +} // namespace mindspore::kernel +#endif diff --git a/mindspore-lite/src/litert/kernel/dsp/ft78/linspace.cc b/mindspore-lite/src/litert/kernel/dsp/ft78/linspace.cc new file mode 100644 index 0000000000000000000000000000000000000000..5d40d540b4fa91e79f90980755798dc3d97982d2 --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/ft78/linspace.cc @@ -0,0 +1,101 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/litert/kernel/dsp/ft78/linspace.h" +#include +#include +#include +#include "src/litert/kernel_registry.h" + +using mindspore::kernel::KERNEL_ARCH::kDSP; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_LinSpace; + +namespace mindspore::kernel { +int LinspaceDSPKernel::CheckSpecs() { + if (out_tensors_.size() != 1) { + MS_LOG(WARNING) << "Linspace out size: " << out_tensors_.size(); + return RET_ERROR; + } + if (in_tensors_.size() != 3) { + MS_LOG(WARNING) << "Linspace in size: " << in_tensors_.size(); + return RET_ERROR; + } + return RET_OK; +} + +int LinspaceDSPKernel::Prepare() { return RET_OK; } + +int LinspaceDSPKernel::LinspaceRunFp32() { + kernel_name_ = "fp_linspace_s"; + core_mask_ = 0xff; + return dsp_runtime_->RunKernel(kernel_name_, kernel_args_, core_mask_); +} + +int LinspaceDSPKernel::Run() { + if (in_tensors_[0]->data_type() != kNumberTypeFloat32 || in_tensors_[1]->data_type() != kNumberTypeFloat32) { + MS_LOG(ERROR) << "Linspace ft78 requires fp32 start/end inputs, got start dtype " + << static_cast(in_tensors_[0]->data_type()) << " end dtype " + << static_cast(in_tensors_[1]->data_type()); + return RET_ERROR; + } + if (in_tensors_[2]->data_type() != kNumberTypeInt32) { + MS_LOG(ERROR) << "Linspace expects int32 count input, got dtype " << static_cast(in_tensors_[2]->data_type()); + return RET_ERROR; + } + + int32_t elements = *reinterpret_cast(in_tensors_[2]->data()); + if (elements <= 0) { + MS_LOG(ERROR) << "Linspace expects positive num, got " << elements; + return RET_ERROR; + } + + uint64_t length = static_cast(out_tensors_[0]->ElementsNum()); + if (length != static_cast(elements)) { + MS_LOG(ERROR) << "Linspace output length " << length << " mismatch with requested num " << elements; + return RET_ERROR; + } + + if (out_tensors_[0]->data_type() != kNumberTypeFloat32) { + MS_LOG(ERROR) << "Linspace ft78 only supports fp32 output, got dtype " + << static_cast(out_tensors_[0]->data_type()); + return RET_ERROR; + } + + float start_v = *reinterpret_cast(in_tensors_[0]->data()); + float end_v = *reinterpret_cast(in_tensors_[1]->data()); + + auto allocator = dsp_runtime_->GetAllocator(); + uint64_t out_device_ptr = allocator->GetDeviceMemPtr(out_tensors_[0]->data()); + + uint64_t start_hex = 0; + std::memcpy(&start_hex, &start_v, sizeof(float)); + uint64_t end_hex = 0; + std::memcpy(&end_hex, &end_v, sizeof(float)); + SetKernelArg({out_device_ptr, start_hex, end_hex, length}); + + auto ret = LinspaceRunFp32(); + if (ret != RET_OK) { + MS_LOG(ERROR) << this->name() << " Linspace Run failed!"; + return RET_ERROR; + } + return RET_OK; +} + +REG_KERNEL(kDSP, kNumberTypeFloat32, PrimitiveType_LinSpace, DSPKernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore-lite/src/litert/kernel/dsp/ft78/linspace.h b/mindspore-lite/src/litert/kernel/dsp/ft78/linspace.h new file mode 100644 index 0000000000000000000000000000000000000000..9b3a12330c76dfd85344aeb9923eee6db7f85e66 --- /dev/null +++ b/mindspore-lite/src/litert/kernel/dsp/ft78/linspace.h @@ -0,0 +1,44 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_FT78_LINSPACE_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_FT78_LINSPACE_H_ + +#include +#include +#include "src/litert/kernel/dsp/dsp_kernel.h" + +namespace mindspore::kernel { +class LinspaceDSPKernel : public DSPKernel { + public: + LinspaceDSPKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::InnerContext *ctx) + : DSPKernel(parameter, inputs, outputs, ctx) {} + ~LinspaceDSPKernel() override = default; + + int CheckSpecs() override; + int Prepare() override; + int Run() override; + + private: + int LinspaceRunFp32(); + + std::string kernel_name_; + uint64_t core_mask_{0}; +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_DSP_FT78_LINSPACE_H_ diff --git a/mindspore-lite/test/ut/src/runtime/kernel/dsp/linspace_tests.cc b/mindspore-lite/test/ut/src/runtime/kernel/dsp/linspace_tests.cc new file mode 100644 index 0000000000000000000000000000000000000000..95944aab5c5d15ad223fbbbf40911933742ab97e --- /dev/null +++ b/mindspore-lite/test/ut/src/runtime/kernel/dsp/linspace_tests.cc @@ -0,0 +1,249 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include "ut/src/runtime/kernel/dsp/dsp_test.h" +#include "include/api/context.h" +#include "include/api/data_type.h" +#include "include/api/model.h" +#include "schema/inner/model_generated.h" +#include "src/litert/kernel/dsp/dsp_subgraph.h" +#include "src/litert/kernel_registry.h" +#include "src/litert/kernel/cpu/nnacl_c/op_base.h" + +namespace mindspore::lite::dsp::test { + +class TestDSP_Linspace : public DSPCommonTest {}; + +TEST_F(TestDSP_Linspace, Linspace_Fp32) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + const int num = 100; + const float start = 0.0f; + const float end = 99.0f; // inclusive + + // inputs: start (fp32), end (fp32), num (i32) + auto in_start = new lite::Tensor(kNumberTypeFloat32, {1}, mindspore::NHWC, lite::Category::CONST_TENSOR); + in_start->MallocData(allocator_); + *reinterpret_cast(in_start->MutableData()) = start; + inputs_.push_back(in_start); + + auto in_end = new lite::Tensor(kNumberTypeFloat32, {1}, mindspore::NHWC, lite::Category::CONST_TENSOR); + in_end->MallocData(allocator_); + *reinterpret_cast(in_end->MutableData()) = end; + inputs_.push_back(in_end); + + auto in_num = new lite::Tensor(kNumberTypeInt32, {1}, mindspore::NHWC, lite::Category::CONST_TENSOR); + in_num->MallocData(allocator_); + *reinterpret_cast(in_num->MutableData()) = num; + inputs_.push_back(in_num); + + // output: fp32 length=num + auto output = new lite::Tensor(kNumberTypeFloat32, {num}, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::fill_n(reinterpret_cast(output->MutableData()), num, 0.0f); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeFloat32, NHWC, schema::PrimitiveType_LinSpace}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + // Minimal OpParameter for kernel construction + auto *param = new OpParameter(); + param->type_ = static_cast(schema::PrimitiveType_LinSpace); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + // expected: evenly spaced from start to end inclusive + std::vector correct(num); + if (num == 1) { + correct[0] = start; + } else { + const float step = (end - start) / static_cast(num - 1); + for (int i = 0; i < num; ++i) correct[i] = start + step * static_cast(i); + } + + ASSERT_EQ(0, CompareOutputData(reinterpret_cast(outputs_[0]->MutableData()), correct.data(), + outputs_[0]->ElementsNum())); + + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} + +// local helpers for fp16 conversion +typedef int16_t float16; +static inline float fp16_to_fp32(float16 h) { + uint32_t sign = (h & 0x8000) << 16; + uint32_t exp = (h & 0x7C00) >> 10; + uint32_t frac = (h & 0x03FF); + uint32_t f_exp, f_frac; + if (exp == 0) { + if (frac == 0) { + f_exp = 0; + f_frac = 0; + } else { + int shift = 0; + while ((frac & 0x0200) == 0) { + frac <<= 1; + ++shift; + } + frac &= 0x03FF; + f_exp = 127 - 15 - shift; + f_frac = frac << 13; + } + } else if (exp == 0x1F) { + f_exp = 255; + f_frac = frac << 13; + } else { + f_exp = exp - 15 + 127; + f_frac = frac << 13; + } + uint32_t f_bits = sign | (f_exp << 23) | f_frac; + float result; + std::memcpy(&result, &f_bits, sizeof(result)); + return result; +} +[[maybe_unused]] static inline float16 fp32_to_fp16(float v) { + uint32_t bits; + std::memcpy(&bits, &v, sizeof(bits)); + uint32_t sign = (bits >> 31) & 0x1; + int32_t exponent = ((bits >> 23) & 0xFF) - 127 + 15; + uint32_t mantissa = bits & 0x007FFFFF; + float16 result; + if (exponent <= 0) { + if (exponent < -10) { + result = static_cast(sign << 15); + } else { + mantissa |= 0x00800000; + int shift = 14 - exponent; + uint32_t mantissa_shifted = mantissa >> shift; + uint32_t remainder = mantissa & ((1U << shift) - 1); + if (remainder > (1U << (shift - 1)) || (remainder == (1U << (shift - 1)) && (mantissa_shifted & 1))) { + mantissa_shifted++; + } + result = static_cast((sign << 15) | (mantissa_shifted & 0x3FF)); + } + } else if (exponent == 0xFF - 127 + 15) { + result = + (mantissa == 0) ? static_cast((sign << 15) | 0x7C00) : static_cast((sign << 15) | 0x7E00); + } else if (exponent > 30) { + result = static_cast((sign << 15) | 0x7C00); + } else { + uint32_t mantissa_rounded = mantissa >> 13; + uint32_t remainder = mantissa & 0x1FFF; + if (remainder > 0x1000 || (remainder == 0x1000 && (mantissa_rounded & 1))) { + mantissa_rounded++; + if (mantissa_rounded == 0x400) { + mantissa_rounded = 0; + exponent++; + if (exponent > 30) { + return static_cast((sign << 15) | 0x7C00); + } + } + } + result = static_cast((sign << 15) | (static_cast(exponent) << 10) | (mantissa_rounded & 0x3FF)); + } + return result; +} + +#ifdef SUPPORT_FT04 +TEST_F(TestDSP_Linspace, Linspace_Fp16) { + InitDSPRuntime(); + std::vector inputs_; + std::vector outputs_; + + const int num = 257; + const float start = -1.0f; + const float end = 1.0f; // inclusive + + // inputs: start (fp32), end (fp32), num (i32) + auto in_start = new lite::Tensor(kNumberTypeFloat32, {1}, mindspore::NHWC, lite::Category::CONST_TENSOR); + in_start->MallocData(allocator_); + *reinterpret_cast(in_start->MutableData()) = start; + inputs_.push_back(in_start); + + auto in_end = new lite::Tensor(kNumberTypeFloat32, {1}, mindspore::NHWC, lite::Category::CONST_TENSOR); + in_end->MallocData(allocator_); + *reinterpret_cast(in_end->MutableData()) = end; + inputs_.push_back(in_end); + + auto in_num = new lite::Tensor(kNumberTypeInt32, {1}, mindspore::NHWC, lite::Category::CONST_TENSOR); + in_num->MallocData(allocator_); + *reinterpret_cast(in_num->MutableData()) = num; + inputs_.push_back(in_num); + + // output: fp16 length=num + auto output = new lite::Tensor(kNumberTypeFloat16, {num}, mindspore::NHWC, lite::Category::CONST_TENSOR); + output->MallocData(allocator_); + outputs_.push_back(output); + + std::memset(output->MutableData(), 0, static_cast(num) * sizeof(uint16_t)); + + auto ctx = new lite::InnerContext; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + + kernel::KernelKey key = {kernel::KERNEL_ARCH::kDSP, kNumberTypeFloat16, NHWC, schema::PrimitiveType_LinSpace}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + auto *param = new OpParameter(); + param->type_ = static_cast(schema::PrimitiveType_LinSpace); + auto kernel = creator(inputs_, outputs_, reinterpret_cast(param), ctx, key); + auto ret = kernel->Prepare(); + EXPECT_EQ(0, ret); + ret = kernel->Run(); + EXPECT_EQ(0, ret); + + // expected: quantized to fp16 grid (via fp32->fp16->fp32) + std::vector correct(num); + if (num == 1) { + correct[0] = fp16_to_fp32(fp32_to_fp16(start)); + } else { + const float step = (end - start) / static_cast(num - 1); + for (int i = 0; i < num; ++i) { + float v = start + step * static_cast(i); + correct[i] = fp16_to_fp32(fp32_to_fp16(v)); + } + } + + auto out_fp16 = reinterpret_cast(outputs_[0]->MutableData()); + std::vector actual(num); + for (int i = 0; i < num; ++i) actual[i] = fp16_to_fp32(static_cast(out_fp16[i])); + + ASSERT_EQ(0, CompareOutputData(actual.data(), correct.data(), outputs_[0]->ElementsNum(), 1e-4)); + + UninitDSPRuntime(); + delete ctx; + for (auto t : inputs_) delete t; + for (auto t : outputs_) delete t; + delete kernel; +} +#endif + +} // namespace mindspore::lite::dsp::test