From 3b5c1620dbf408fe12f0d0019fa6cb943838695d Mon Sep 17 00:00:00 2001
From: sunsuodong <sunsuodong@huawei.com>
Date: Wed, 16 Feb 2022 05:00:08 -0800
Subject: [PATCH] format gn file and support asm file

Signed-off-by: sunsuodong <sunsuodong@huawei.com>
---
 .../kernel_compiler/cpu/nnacl/BUILD.gn        | 200 +++++++++++++++---
 mindspore/core/mindrt/BUILD.gn                |   2 +-
 mindspore/lite/BUILD.gn                       | 177 ++++++++++++----
 mindspore/lite/bundle.json                    |   2 +-
 .../kernel/arm/fp16/arithmetic_fp16.cc        |   2 +-
 .../runtime/kernel/arm/fp16/arithmetic_fp16.h |   2 +-
 .../fp16/convolution_depthwise_3x3_fp16.cc    |   4 +-
 .../arm/fp16/convolution_depthwise_3x3_fp16.h |   2 +-
 .../arm/fp16/convolution_depthwise_fp16.cc    |   4 +-
 .../arm/fp16/convolution_depthwise_fp16.h     |   2 +-
 .../convolution_depthwise_slidewindow_fp16.cc |   4 +-
 .../convolution_depthwise_slidewindow_fp16.h  |   2 +-
 .../arm/fp16/deconvolution_depthwise_fp16.cc  |   4 +-
 .../arm/fp16/deconvolution_depthwise_fp16.h   |   2 +-
 .../src/runtime/kernel/arm/fp16/stack_fp16.cc |   4 +-
 .../src/runtime/kernel/arm/fp16/stack_fp16.h  |   2 +-
 mindspore/lite/tools/benchmark/BUILD.gn       |  53 +++++
 17 files changed, 388 insertions(+), 80 deletions(-)
 create mode 100644 mindspore/lite/tools/benchmark/BUILD.gn

diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/BUILD.gn b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/BUILD.gn
index 7bb3769d0b..0637531d21 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/BUILD.gn
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/BUILD.gn
@@ -16,10 +16,6 @@
 import("//build/ohos.gni")
 ohos_source_set("nnacl_o") {
   sources = [
-    "common_func.c",
-    "kernel.c",
-    "nnacl_common.c",
-    "nnacl_utils.c",
     "base/arithmetic_base.c",
     "base/batch_to_space_base.c",
     "base/broadcast_to.c",
@@ -39,10 +35,11 @@ ohos_source_set("nnacl_o") {
     "base/transpose_base.c",
     "base/unsorted_segment_sum_base.c",
     "base/unstack_base.c",
+    "common_func.c",
     "fp32/activation_fp32.c",
     "fp32/adam_fp32.c",
-    "fp32/adder_fp32.c",
     "fp32/add_fp32.c",
+    "fp32/adder_fp32.c",
     "fp32/arg_min_max_fp32.c",
     "fp32/arithmetic_compare_fp32.c",
     "fp32/arithmetic_fp32.c",
@@ -122,23 +119,9 @@ ohos_source_set("nnacl_o") {
     "fp32_grad/softmax_grad.c",
     "fp32_grad/strided_slice_grad.c",
     "fp32_sparse/matmul_sparse_x1_fp32.c",
-    "infer/control/tensor_array_infer.c",
-    "infer/control/tensor_array_read_infer.c",
-    "infer/control/tensor_array_write_infer.c",
-    "infer/control/tensorlist_fromtensor_infer.c",
-    "infer/control/tensorlist_getitem_infer.c",
-    "infer/control/tensorlist_reserve_infer.c",
-    "infer/control/tensorlist_setitem_infer.c",
-    "infer/control/tensorlist_stack_infer.c",
-    "infer/string/custom_extract_features_infer.c",
-    "infer/string/custom_normalize_infer.c",
-    "infer/string/custom_predict_infer.c",
-    "infer/string/hashtable_lookup_infer.c",
-    "infer/string/lsh_projection_infer.c",
-    "infer/string/skip_gram_infer.c",
     "infer/adam_infer.c",
-    "infer/addn_infer.c",
     "infer/add_sub_grad_infer.c",
+    "infer/addn_infer.c",
     "infer/affine_infer.c",
     "infer/all_gather.c",
     "infer/apply_momentum_infer.c",
@@ -160,6 +143,14 @@ ohos_source_set("nnacl_o") {
     "infer/common_infer.c",
     "infer/concat_infer.c",
     "infer/constant_of_shape_infer.c",
+    "infer/control/tensor_array_infer.c",
+    "infer/control/tensor_array_read_infer.c",
+    "infer/control/tensor_array_write_infer.c",
+    "infer/control/tensorlist_fromtensor_infer.c",
+    "infer/control/tensorlist_getitem_infer.c",
+    "infer/control/tensorlist_reserve_infer.c",
+    "infer/control/tensorlist_setitem_infer.c",
+    "infer/control/tensorlist_stack_infer.c",
     "infer/conv2d_grad_filter_infer.c",
     "infer/conv2d_grad_input_infer.c",
     "infer/conv2d_infer.c",
@@ -240,6 +231,12 @@ ohos_source_set("nnacl_o") {
     "infer/stack_infer.c",
     "infer/strided_slice_grad_infer.c",
     "infer/strided_slice_infer.c",
+    "infer/string/custom_extract_features_infer.c",
+    "infer/string/custom_normalize_infer.c",
+    "infer/string/custom_predict_infer.c",
+    "infer/string/hashtable_lookup_infer.c",
+    "infer/string/lsh_projection_infer.c",
+    "infer/string/skip_gram_infer.c",
     "infer/tile_infer.c",
     "infer/topk_infer.c",
     "infer/transpose_infer.c",
@@ -253,8 +250,8 @@ ohos_source_set("nnacl_o") {
     "int8/arg_min_max_int8.c",
     "int8/arithmetic_int8.c",
     "int8/arithmetic_self_int8.c",
-    "int8/batchnorm_int8.c",
     "int8/batch_to_space_int8.c",
+    "int8/batchnorm_int8.c",
     "int8/common_func_int8.c",
     "int8/concat_int8.c",
     "int8/conv1x1_int8.c",
@@ -266,8 +263,8 @@ ohos_source_set("nnacl_o") {
     "int8/depth_to_space_int8.c",
     "int8/div_int8.c",
     "int8/fixed_point.c",
-    "int8/gather_int8.c",
     "int8/gatherNd_int8.c",
+    "int8/gather_int8.c",
     "int8/hswish_int8.c",
     "int8/l2_norm_int8.c",
     "int8/layer_norm_int8.c",
@@ -296,11 +293,164 @@ ohos_source_set("nnacl_o") {
     "int8/topk_int8.c",
     "int8/transpose_int8.c",
     "int8/unsqueeze_int8.c",
+    "kernel.c",
+    "nnacl_common.c",
+    "nnacl_utils.c",
   ]
 
-  include_dirs = [
-    "../",
-  ]
+  if (target_cpu == "arm") {
+    sources += [
+      "assembly/arm32/ConvDw3x3Int8BorderPixel.S",
+      "assembly/arm32/ConvDwFp32Border.S",
+      "assembly/arm32/ConvDwFp32Center.S",
+      "assembly/arm32/ConvDwFp32Row.S",
+      "assembly/arm32/ConvDwInt8Center.S",
+      "assembly/arm32/ConvDwInt8PostAlign4.S",
+      "assembly/arm32/ConvDwInt8PostAlign4PerChannel.S",
+      "assembly/arm32/ConvDwInt8Row.S",
+      "assembly/arm32/DeconvDwFp32Center.S",
+      "assembly/arm32/DeconvDwInt8Center.S",
+      "assembly/arm32/DeconvDwInt8Post.S",
+      "assembly/arm32/IndirectGemmInt16to32_8x4.S",
+      "assembly/arm32/IndirectGemmInt8_2x4.S",
+      "assembly/arm32/MatVecMulFp32.S",
+      "assembly/arm32/MatmulFp32.S",
+      "assembly/arm32/MatmulFp32Opt.S",
+      "assembly/arm32/MatmulFp32Opt12x4.S",
+      "assembly/arm32/MatmulInt8.S",
+      "assembly/arm32/MatmulInt8Opt.S",
+      "assembly/arm32/MatmulWinogradFp32.S",
+      "assembly/arm32/PostFuncBiasReluC4.S",
+      "assembly/arm32/PostFuncBiasReluC8.S",
+      "assembly/arm32/PreSum4x16Int8Peroc.S",
+      "assembly/arm32/PreSum4x16Int8Pert.S",
+      "assembly/arm32/TiledC4MatmulFp32.S",
+      "assembly/arm32/WinogradTransLeft.S",
+      "assembly/arm32/WinogradTransRight.S",
+    ]
+  } else if (target_cpu == "arm64") {
+    sources += [
+      "assembly/arm64/AdderFp32.S",
+      "assembly/arm64/ConvDw3x3Fp32Corner.S",
+      "assembly/arm64/ConvDw3x3Fp32Horizontal.S",
+      "assembly/arm64/ConvDw3x3Fp32Stride1.S",
+      "assembly/arm64/ConvDw3x3Fp32Stride2.S",
+      "assembly/arm64/ConvDw3x3Fp32Vertical.S",
+      "assembly/arm64/ConvDw3x3Int8.S",
+      "assembly/arm64/ConvDw3x3Int8Corner.S",
+      "assembly/arm64/ConvDw3x3Int8Horizontal.S",
+      "assembly/arm64/ConvDw3x3Int8Stride2.S",
+      "assembly/arm64/ConvDw3x3Int8Vertical.S",
+      "assembly/arm64/ConvDw3x3Line.S",
+      "assembly/arm64/ConvDwFp32Border.S",
+      "assembly/arm64/ConvDwFp32Center.S",
+      "assembly/arm64/ConvDwFp32Indirect3x3.S",
+      "assembly/arm64/ConvDwFp32Indirect5x5.S",
+      "assembly/arm64/ConvDwFp32Row.S",
+      "assembly/arm64/ConvDwInt8Center.S",
+      "assembly/arm64/ConvDwInt8PostAlign4.S",
+      "assembly/arm64/ConvDwInt8PostAlign4PerChannel.S",
+      "assembly/arm64/ConvDwInt8Row.S",
+      "assembly/arm64/ConvFp32Center.S",
+      "assembly/arm64/DeconvDwFp32Border.S",
+      "assembly/arm64/DeconvDwFp32Center.S",
+      "assembly/arm64/DeconvDwInt8Center.S",
+      "assembly/arm64/DeconvDwInt8Post.S",
+      "assembly/arm64/IndirectGemmInt16to32_8x4.S",
+      "assembly/arm64/MatVecMulFp32.S",
+      "assembly/arm64/MatmulFp32.S",
+      "assembly/arm64/MatmulFp32Opt.S",
+      "assembly/arm64/MatmulFp32OptRow12.S",
+      "assembly/arm64/MatmulFp32OptRow4.S",
+      "assembly/arm64/MatmulFp32OptRow8.S",
+      "assembly/arm64/MatmulInt8.S",
+      "assembly/arm64/MatmulInt8Opt.S",
+      "assembly/arm64/MatmulR4Int8.S",
+      "assembly/arm64/MatmulWinogradFp32.S",
+      "assembly/arm64/PostFuncBiasReluC4.S",
+      "assembly/arm64/PostFuncBiasReluC8.S",
+      "assembly/arm64/PostFuncInt8C4Neon64.S",
+      "assembly/arm64/PreSum4x16Int8Peroc.S",
+      "assembly/arm64/PreSum4x16Int8Pert.S",
+      "assembly/arm64/SPMM8x8Fp32.S",
+      "assembly/arm64/TiledC4MatmulFp32.S",
+      "assembly/arm64/WinogradTransLeft.S",
+      "assembly/arm64/WinogradTransRight.S",
+      "fp16/activation_fp16.c",
+      "fp16/arg_min_max_fp16.c",
+      "fp16/arithmetic_fp16.c",
+      "fp16/arithmetic_self_fp16.c",
+      "fp16/batchnorm_fp16.c",
+      "fp16/common_func_fp16.c",
+      "fp16/conv_depthwise_fp16.c",
+      "fp16/conv_fp16.c",
+      "fp16/crop_fp16.c",
+      "fp16/deconv_fp16.c",
+      "fp16/deconv_winograd_fp16.c",
+      "fp16/exp_fp16.c",
+      "fp16/fill_fp16.c",
+      "fp16/gru_fp16.c",
+      "fp16/instance_norm_fp16.c",
+      "fp16/layer_norm_fp16.c",
+      "fp16/log_softmax_fp16.c",
+      "fp16/lstm_fp16.c",
+      "fp16/matmul_fp16.c",
+      "fp16/matrix_fp16.c",
+      "fp16/one_hot_fp16.c",
+      "fp16/pack_fp16.c",
+      "fp16/pad_fp16.c",
+      "fp16/pooling_fp16.c",
+      "fp16/power_fp16.c",
+      "fp16/prelu_fp16.c",
+      "fp16/quant_dtype_cast_fp16.c",
+      "fp16/ragged_range_fp16.c",
+      "fp16/reduce_fp16.c",
+      "fp16/resize_fp16.c",
+      "fp16/scale_fp16.c",
+      "fp16/softmax_fp16.c",
+      "fp16/sparse_to_dense_fp16.c",
+      "fp16/splice_fp16.c",
+      "fp16/topk_fp16.c",
+      "fp16/transpose_fp16.c",
+      "fp16/unique_fp16.c",
+      "fp16/where_fp16.c",
+      "fp16/winograd_transform_fp16.c",
+      "fp16/winograd_utils_fp16.c",
+      "fp16_grad/activation_grad.c",
+      "fp16_grad/arithmetic_grad.c",
+      "fp16_grad/arithmetic_self_grad.c",
+      "fp16_grad/batch_norm.c",
+      "fp16_grad/convolution_grad_filter.c",
+      "fp16_grad/convolution_grad_input.c",
+      "fp16_grad/dropout_grad.c",
+      "fp16_grad/gemm_fp16.c",
+      "fp16_grad/layernorm_grad.c",
+      "fp16_grad/pack_fp16_ext.c",
+      "fp16_grad/pooling_grad.c",
+      "fp16_grad/resize_grad.c",
+      "fp16_grad/strided_slice_grad.c",
+      "fp16_grad/unsorted_segment_sum.c",
+    ]
+  }
+
+  include_dirs = [ "../" ]
+
+  defines = []
+
+  if (target_cpu == "arm") {
+    defines += [
+      "ENABLE_ARM",
+      "ENABLE_ARM32",
+      "ENABLE_NEON",
+    ]
+  } else if (target_cpu == "arm64") {
+    defines += [
+      "ENABLE_ARM",
+      "ENABLE_ARM64",
+      "ENABLE_NEON",
+      "ENABLE_FP16",
+    ]
+  }
 
   part_name = "mindspore"
 }
diff --git a/mindspore/core/mindrt/BUILD.gn b/mindspore/core/mindrt/BUILD.gn
index db4c0b53e4..a2d81c4ff4 100644
--- a/mindspore/core/mindrt/BUILD.gn
+++ b/mindspore/core/mindrt/BUILD.gn
@@ -35,7 +35,7 @@ ohos_source_set("mindrt_o") {
     "src/",
     "../../lite/",
     "../../lite/src/",
-    "../../core/"
+    "../../core/",
   ]
 
   part_name = "mindspore"
diff --git a/mindspore/lite/BUILD.gn b/mindspore/lite/BUILD.gn
index 841304b25e..45d16454a1 100644
--- a/mindspore/lite/BUILD.gn
+++ b/mindspore/lite/BUILD.gn
@@ -15,11 +15,20 @@
 
 import("//build/ohos.gni")
 
+ohos_group("mindspore") {
+  deps = [
+    ":mindspore_lib",
+    #"tools/benchmark:benchmark_bin",
+  ]
+}
+
+config("mindspore_lib_config") {
+  include_dirs = [ "../../include/" ]
+}
+
 ohos_shared_library("mindspore_lib") {
   sources = [
-    "src/delegate/tensorrt/distribution/distribution_base.cc",
-    "tools/converter/quantizer/fse_decoder.cc",
-    "tools/converter/quantizer/fse_bit_stream.cc",
+    "../core/utils/status.cc",
     "src/c_api/context_c.cc",
     "src/c_api/model_c.cc",
     "src/c_api/tensor_c.cc",
@@ -42,6 +51,7 @@ ohos_shared_library("mindspore_lib") {
     "src/control_flow/entrance_subgraph_kernel.cc",
     "src/control_flow/exit_subgraph_kernel.cc",
     "src/control_flow/identity_kernel.cc",
+    "src/cpu_info.cc",
     "src/cxx_api/cell.cc",
     "src/cxx_api/context.cc",
     "src/cxx_api/converters.cc",
@@ -53,7 +63,20 @@ ohos_shared_library("mindspore_lib") {
     "src/cxx_api/tensor/tensor_impl.cc",
     "src/cxx_api/tensor_utils.cc",
     "src/cxx_api/types.cc",
-    "../core/utils/status.cc",
+    "src/delegate/tensorrt/distribution/distribution_base.cc",
+    "src/errorcode.cc",
+    "src/executor.cc",
+    "src/huffman_decode.cc",
+    "src/inner_context.cc",
+    "src/inner_kernel.cc",
+    "src/kernel_registry.cc",
+    "src/lite_kernel.cc",
+    "src/lite_kernel_util.cc",
+    "src/lite_mindrt.cc",
+    "src/lite_model.cc",
+    "src/lite_session.cc",
+    "src/mindrt_executor.cc",
+    "src/ms_tensor.cc",
     "src/ops/compat/attr_transfer_common.cc",
     "src/ops/compat/v0/broadcast_to_compat_v0.cc",
     "src/ops/compat/v0/cast_compat_v0.cc",
@@ -77,8 +100,8 @@ ohos_shared_library("mindspore_lib") {
     "src/ops/populate/activation_grad_populate.cc",
     "src/ops/populate/activation_populate.cc",
     "src/ops/populate/adam_populate.cc",
-    "src/ops/populate/adder_populate.cc",
     "src/ops/populate/add_populate.cc",
+    "src/ops/populate/adder_populate.cc",
     "src/ops/populate/affine_populate.cc",
     "src/ops/populate/all_gather.cc",
     "src/ops/populate/argmax_populate.cc",
@@ -120,8 +143,8 @@ ohos_shared_library("mindspore_lib") {
     "src/ops/populate/eltwise_populate.cc",
     "src/ops/populate/embedding_lookup_populate.cc",
     "src/ops/populate/erf_populate.cc",
-    "src/ops/populate/expand_dims_populate.cc",
     "src/ops/populate/exp_populate.cc",
+    "src/ops/populate/expand_dims_populate.cc",
     "src/ops/populate/fill_populate.cc",
     "src/ops/populate/flatten_populate.cc",
     "src/ops/populate/full_connection_populate.cc",
@@ -147,12 +170,12 @@ ohos_shared_library("mindspore_lib") {
     "src/ops/populate/nonzero_populate.cc",
     "src/ops/populate/one_hot_populate.cc",
     "src/ops/populate/oneslike_populate.cc",
+    "src/ops/populate/p_relu_populate.cc",
     "src/ops/populate/pad_populate.cc",
     "src/ops/populate/partial_populate.cc",
     "src/ops/populate/pooling_populate.cc",
     "src/ops/populate/populate_register.cc",
     "src/ops/populate/power_populate.cc",
-    "src/ops/populate/p_relu_populate.cc",
     "src/ops/populate/prior_box_populate.cc",
     "src/ops/populate/quant_dtype_cast_populate.cc",
     "src/ops/populate/ragged_range_populate.cc",
@@ -204,8 +227,8 @@ ohos_shared_library("mindspore_lib") {
     "src/ops/populate/v0/activation_grad_populate_v0.cc",
     "src/ops/populate/v0/activation_populate_v0.cc",
     "src/ops/populate/v0/adam_populate_v0.cc",
-    "src/ops/populate/v0/addn_populate_v0.cc",
     "src/ops/populate/v0/add_populate_v0.cc",
+    "src/ops/populate/v0/addn_populate_v0.cc",
     "src/ops/populate/v0/argmax_populate_v0.cc",
     "src/ops/populate/v0/argmin_populate_v0.cc",
     "src/ops/populate/v0/arithmetic_populate_v0.cc",
@@ -241,8 +264,8 @@ ohos_shared_library("mindspore_lib") {
     "src/ops/populate/v0/div_populate_v0.cc",
     "src/ops/populate/v0/eltwise_populate_v0.cc",
     "src/ops/populate/v0/embedding_lookup_populate_v0.cc",
-    "src/ops/populate/v0/expand_dims_populate_v0.cc",
     "src/ops/populate/v0/exp_populate_v0.cc",
+    "src/ops/populate/v0/expand_dims_populate_v0.cc",
     "src/ops/populate/v0/fill_populate_v0.cc",
     "src/ops/populate/v0/flatten_populate_v0.cc",
     "src/ops/populate/v0/full_connection_populate_v0.cc",
@@ -262,11 +285,11 @@ ohos_shared_library("mindspore_lib") {
     "src/ops/populate/v0/non_max_suppression_populate_v0.cc",
     "src/ops/populate/v0/one_hot_populate_v0.cc",
     "src/ops/populate/v0/oneslike_populate_v0.cc",
+    "src/ops/populate/v0/p_relu_populate_v0.cc",
     "src/ops/populate/v0/pad_populate_v0.cc",
     "src/ops/populate/v0/partial_populate_v0.cc",
     "src/ops/populate/v0/pooling_populate_v0.cc",
     "src/ops/populate/v0/power_populate_v0.cc",
-    "src/ops/populate/v0/p_relu_populate_v0.cc",
     "src/ops/populate/v0/prior_box_populate_v0.cc",
     "src/ops/populate/v0/quant_dtype_cast_populate_v0.cc",
     "src/ops/populate/v0/range_populate_v0.cc",
@@ -356,8 +379,8 @@ ohos_shared_library("mindspore_lib") {
     "src/runtime/kernel/arm/fp32/arithmetic_compare_fp32.cc",
     "src/runtime/kernel/arm/fp32/arithmetic_fp32.cc",
     "src/runtime/kernel/arm/fp32/arithmetic_self_fp32.cc",
-    "src/runtime/kernel/arm/fp32/batchnorm_fp32.cc",
     "src/runtime/kernel/arm/fp32/batch_to_space_fp32.cc",
+    "src/runtime/kernel/arm/fp32/batchnorm_fp32.cc",
     "src/runtime/kernel/arm/fp32/bias_fp32.cc",
     "src/runtime/kernel/arm/fp32/broadcast_to_fp32.cc",
     "src/runtime/kernel/arm/fp32/cast_fp32.cc",
@@ -385,8 +408,8 @@ ohos_shared_library("mindspore_lib") {
     "src/runtime/kernel/arm/fp32/fill_fp32.cc",
     "src/runtime/kernel/arm/fp32/fullconnection_fp32.cc",
     "src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.cc",
-    "src/runtime/kernel/arm/fp32/gather_fp32.cc",
     "src/runtime/kernel/arm/fp32/gatherNd_fp32.cc",
+    "src/runtime/kernel/arm/fp32/gather_fp32.cc",
     "src/runtime/kernel/arm/fp32/glu_fp32.cc",
     "src/runtime/kernel/arm/fp32/group_convolution_fp32.cc",
     "src/runtime/kernel/arm/fp32/gru_fp32.cc",
@@ -397,8 +420,8 @@ ohos_shared_library("mindspore_lib") {
     "src/runtime/kernel/arm/fp32/local_response_norm_fp32.cc",
     "src/runtime/kernel/arm/fp32/log_softmax_fp32.cc",
     "src/runtime/kernel/arm/fp32/lstm_fp32.cc",
-    "src/runtime/kernel/arm/fp32/matmul_fp32_base.cc",
     "src/runtime/kernel/arm/fp32/matmul_fp32.cc",
+    "src/runtime/kernel/arm/fp32/matmul_fp32_base.cc",
     "src/runtime/kernel/arm/fp32/non_max_suppression_fp32.cc",
     "src/runtime/kernel/arm/fp32/nonzero_fp32.cc",
     "src/runtime/kernel/arm/fp32/pad_fp32.cc",
@@ -423,7 +446,6 @@ ohos_shared_library("mindspore_lib") {
     "src/runtime/kernel/arm/fp32/softmax_fp32.cc",
     "src/runtime/kernel/arm/fp32/space_to_batch_fp32.cc",
     "src/runtime/kernel/arm/fp32/space_to_depth_fp32.cc",
-    "src/runtime/kernel/arm/fp32_sparse/matmul_sparse_fp32.cc",
     "src/runtime/kernel/arm/fp32/sparse_to_dense_fp32.cc",
     "src/runtime/kernel/arm/fp32/splice_fp32.cc",
     "src/runtime/kernel/arm/fp32/topk_fp32.cc",
@@ -433,13 +455,14 @@ ohos_shared_library("mindspore_lib") {
     "src/runtime/kernel/arm/fp32/unstack_fp32.cc",
     "src/runtime/kernel/arm/fp32/where_fp32.cc",
     "src/runtime/kernel/arm/fp32/zeroslike_fp32.cc",
+    "src/runtime/kernel/arm/fp32_sparse/matmul_sparse_fp32.cc",
     "src/runtime/kernel/arm/int8/activation_int8.cc",
     "src/runtime/kernel/arm/int8/add_int8.cc",
     "src/runtime/kernel/arm/int8/argminmax_int8.cc",
     "src/runtime/kernel/arm/int8/arithmetic_int8.cc",
     "src/runtime/kernel/arm/int8/arithmetic_self_int8.cc",
-    "src/runtime/kernel/arm/int8/batchnorm_int8.cc",
     "src/runtime/kernel/arm/int8/batch_to_space_int8.cc",
+    "src/runtime/kernel/arm/int8/batchnorm_int8.cc",
     "src/runtime/kernel/arm/int8/bias_add_int8.cc",
     "src/runtime/kernel/arm/int8/concat_int8.cc",
     "src/runtime/kernel/arm/int8/convolution_1x1_int8.cc",
@@ -456,8 +479,8 @@ ohos_shared_library("mindspore_lib") {
     "src/runtime/kernel/arm/int8/detection_post_process_int8.cc",
     "src/runtime/kernel/arm/int8/div_int8.cc",
     "src/runtime/kernel/arm/int8/fullconnection_int8.cc",
-    "src/runtime/kernel/arm/int8/gather_int8.cc",
     "src/runtime/kernel/arm/int8/gatherNd_int8.cc",
+    "src/runtime/kernel/arm/int8/gather_int8.cc",
     "src/runtime/kernel/arm/int8/group_convolution_int8.cc",
     "src/runtime/kernel/arm/int8/hswish_int8.cc",
     "src/runtime/kernel/arm/int8/l2_norm_int8.cc",
@@ -495,28 +518,16 @@ ohos_shared_library("mindspore_lib") {
     "src/runtime/runtime_allocator.cc",
     "src/runtime/runtime_convert.cc",
     "src/runtime/runtime_pass.cc",
-    "src/cpu_info.cc",
-    "src/errorcode.cc",
-    "src/executor.cc",
-    "src/huffman_decode.cc",
-    "src/inner_context.cc",
-    "src/inner_kernel.cc",
-    "src/kernel_registry.cc",
-    "src/lite_kernel.cc",
-    "src/lite_kernel_util.cc",
-    "src/lite_mindrt.cc",
-    "src/lite_model.cc",
-    "src/lite_session.cc",
-    "src/mindrt_executor.cc",
-    "src/ms_tensor.cc",
     "src/scheduler.cc",
     "src/schema_tensor_wrapper.cc",
     "src/sub_graph_kernel.cc",
     "src/sub_graph_split.cc",
-    "src/tensor_category.cc",
     "src/tensor.cc",
+    "src/tensor_category.cc",
     "src/tensorlist.cc",
     "src/weight_decoder.cc",
+    "tools/converter/quantizer/fse_bit_stream.cc",
+    "tools/converter/quantizer/fse_decoder.cc",
   ]
 
   include_dirs = [
@@ -532,7 +543,23 @@ ohos_shared_library("mindspore_lib") {
     "../../mindspore/core/mindrt/include/",
   ]
 
-  defines = []
+  defines = [ "ENABLE_MINDRT" ]
+
+  if (target_cpu == "arm") {
+    defines += [
+      "ENABLE_ARM",
+      "ENABLE_ARM32",
+      "ENABLE_NEON",
+    ]
+  } else if (target_cpu == "arm64") {
+    defines += [
+      "ENABLE_ARM",
+      "ENABLE_ARM64",
+      "ENABLE_NEON",
+      "ENABLE_FP16",
+    ]
+  }
+
   cflags = []
   cflags_c = []
   cflags_cc = []
@@ -541,15 +568,93 @@ ohos_shared_library("mindspore_lib") {
   configs = []
 
   deps = [
+    ":third_party",
     "../ccsrc/backend/kernel_compiler/cpu/nnacl/:nnacl_o",
     "../core/mindrt/:mindrt_o",
-    ":third_party"
   ]
 
-  output_name = "libmindspore-lite.huawei" # 可选，模块输出名
-  output_extension = "so" # 可选，模块名后缀
+  if (target_cpu == "arm64") {
+    deps += [ ":arm_fp16_cc_o" ]
+  }
+
+  output_name = "libmindspore-lite.huawei"
+  output_extension = "so"
+
+  public_configs = [ ":mindspore_lib_config" ]
+  part_name = "mindspore"
+}
+
+ohos_source_set("arm_fp16_cc_o") {
+  sources = [
+    "src/runtime/kernel/arm/fp16/activation_fp16.cc",
+    "src/runtime/kernel/arm/fp16/addn_fp16.cc",
+    "src/runtime/kernel/arm/fp16/arithmetic_compare_fp16.cc",
+    "src/runtime/kernel/arm/fp16/arithmetic_fp16.cc",
+    "src/runtime/kernel/arm/fp16/arithmetic_self_fp16.cc",
+    "src/runtime/kernel/arm/fp16/batchnorm_fp16.cc",
+    "src/runtime/kernel/arm/fp16/biasadd_fp16.cc",
+    "src/runtime/kernel/arm/fp16/cast_fp16.cc",
+    "src/runtime/kernel/arm/fp16/common_fp16.cc",
+    "src/runtime/kernel/arm/fp16/concat_fp16.cc",
+    "src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc",
+    "src/runtime/kernel/arm/fp16/convolution_delegate_fp16.cc",
+    "src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.cc",
+    "src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc",
+    "src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc",
+    "src/runtime/kernel/arm/fp16/convolution_fp16.cc",
+    "src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc",
+    "src/runtime/kernel/arm/fp16/crop_fp16.cc",
+    "src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc",
+    "src/runtime/kernel/arm/fp16/deconvolution_fp16.cc",
+    "src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc",
+    "src/runtime/kernel/arm/fp16/depth_to_space_fp16.cc",
+    "src/runtime/kernel/arm/fp16/exp_fp16.cc",
+    "src/runtime/kernel/arm/fp16/fill_fp16.cc",
+    "src/runtime/kernel/arm/fp16/fullconnection_fp16.cc",
+    "src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.cc",
+    "src/runtime/kernel/arm/fp16/gather_fp16.cc",
+    "src/runtime/kernel/arm/fp16/group_convolution_fp16.cc",
+    "src/runtime/kernel/arm/fp16/gru_fp16.cc",
+    "src/runtime/kernel/arm/fp16/instance_norm_fp16.cc",
+    "src/runtime/kernel/arm/fp16/layer_norm_fp16.cc",
+    "src/runtime/kernel/arm/fp16/layout_transform_fp16.cc",
+    "src/runtime/kernel/arm/fp16/log_softmax_fp16.cc",
+    "src/runtime/kernel/arm/fp16/lstm_fp16.cc",
+    "src/runtime/kernel/arm/fp16/matmul_base_fp16.cc",
+    "src/runtime/kernel/arm/fp16/matmul_fp16.cc",
+    "src/runtime/kernel/arm/fp16/pad_fp16.cc",
+    "src/runtime/kernel/arm/fp16/pooling_fp16.cc",
+    "src/runtime/kernel/arm/fp16/power_fp16.cc",
+    "src/runtime/kernel/arm/fp16/prelu_fp16.cc",
+    "src/runtime/kernel/arm/fp16/quant_dtype_cast_fp16.cc",
+    "src/runtime/kernel/arm/fp16/ragged_range_fp16.cc",
+    "src/runtime/kernel/arm/fp16/reduce_fp16.cc",
+    "src/runtime/kernel/arm/fp16/resize_fp16.cc",
+    "src/runtime/kernel/arm/fp16/scale_fp16.cc",
+    "src/runtime/kernel/arm/fp16/slice_fp16.cc",
+    "src/runtime/kernel/arm/fp16/softmax_fp16.cc",
+    "src/runtime/kernel/arm/fp16/stack_fp16.cc",
+    "src/runtime/kernel/arm/fp16/transpose_fp16.cc",
+    "src/runtime/kernel/arm/fp16/where_fp16.cc",
+    "src/runtime/kernel/arm/fp16_grad/activation_fp16_grad.cc",
+    "src/runtime/kernel/arm/fp16_grad/arithmetic_fp16_grad.cc",
+    "src/runtime/kernel/arm/fp16_grad/arithmetic_fp16_self_grad.cc",
+    "src/runtime/kernel/arm/fp16_grad/bias_fp16_grad.cc",
+    "src/runtime/kernel/arm/fp16_grad/bn_fp16_grad.cc",
+    "src/runtime/kernel/arm/fp16_grad/convolution_fp16_grad_filter.cc",
+    "src/runtime/kernel/arm/fp16_grad/convolution_fp16_grad_input.cc",
+    "src/runtime/kernel/arm/fp16_grad/dropout_fp16_grad.cc",
+    "src/runtime/kernel/arm/fp16_grad/layernorm_fp16_grad.cc",
+    "src/runtime/kernel/arm/fp16_grad/neg_fp16_grad.cc",
+    "src/runtime/kernel/arm/fp16_grad/pooling_fp16_grad.cc",
+    "src/runtime/kernel/arm/fp16_grad/resize_fp16_grad.cc",
+    "src/runtime/kernel/arm/fp16_grad/strided_slice_fp16_grad.cc",
+    "src/runtime/kernel/arm/fp16_grad/unsorted_segment_sum_fp16.cc",
+  ]
+
+  include_dirs = [ "../ccsrc/backend/kernel_compiler/cpu/" ]
 
-  part_name = "mindspore" # 必选，所属部件名称
+  part_name = "mindspore"
 }
 
 action("third_party") {
diff --git a/mindspore/lite/bundle.json b/mindspore/lite/bundle.json
index 0ea17236db..891e22639d 100644
--- a/mindspore/lite/bundle.json
+++ b/mindspore/lite/bundle.json
@@ -22,7 +22,7 @@
       "third_party": []
     },
     "build": {
-      "sub_component": [ "//third_party/mindspore/mindspore/lite:mindspore_lib" ],
+      "sub_component": [ "//third_party/mindspore/mindspore/lite:mindspore" ],
       "inner_kits": [],
       "test": [ "//third_party/mindspore/mindspore/lite/test:mindspore_test" ]
     }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
index bbdfca301e..84eeda7eca 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
@@ -152,7 +152,7 @@ void ArithmeticFP16CPUKernel::TileConstTensor(const void *in_data, void *out_dat
                        in_shape, in_strides, out_strides, multiple);
 }
 
-int ArithmeticFP16CPUKernel::Execute(const void *input0, const void *input1, void *output, int size, bool is_opt) {
+int ArithmeticFP16CPUKernel::DoExecute(const void *input0, const void *input1, void *output, int size, bool is_opt) {
   int ret = RET_OK;
   if (is_opt) {
     CHECK_NULL_RETURN(arithmetic_opt_func_);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.h
index a0c746cce8..24bdf23e5a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.h
@@ -49,7 +49,7 @@ class ArithmeticFP16CPUKernel : public ArithmeticCPUKernel {
   int ConstTensorBroadCast() override;
   void TileConstTensor(const void *in_data, void *out_data, size_t ndim, const int *in_shape, const int *in_strides,
                        const int *out_strides, const int *multiple) override;
-  int Execute(const void *input0, const void *input1, void *output, int size, bool is_opt) override;
+  int DoExecute(const void *input0, const void *input1, void *output, int size, bool is_opt) override;
   void FreeFp16Buffer();
   ArithmeticFuncFp16 arithmetic_func_ = nullptr;
   ArithmeticOptFuncFp16 arithmetic_opt_func_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.cc
index 4846164516..b9f55fa533 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.cc
@@ -94,7 +94,7 @@ int ConvolutionDepthwise3x3Fp16CPUKernel::ReSize() {
   return RET_OK;
 }
 
-int ConvolutionDepthwise3x3Fp16CPUKernel::Execute(int task_id) {
+int ConvolutionDepthwise3x3Fp16CPUKernel::DoExecute(int task_id) {
   int units = UP_DIV(conv_param_->output_w_, C2NUM);  // F(2, 3) contains 2 conv units
   int c8 = UP_ROUND(conv_param_->input_channel_, C8NUM);
   auto buffer = buffer_ + C12NUM * c8 * units * task_id;
@@ -108,7 +108,7 @@ int ConvolutionDepthwise3x3Fp16CPUKernel::Execute(int task_id) {
 
 int ConvDw3x3Fp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
   auto conv_dw = reinterpret_cast<ConvolutionDepthwise3x3Fp16CPUKernel *>(cdata);
-  auto ret = conv_dw->Execute(task_id);
+  auto ret = conv_dw->DoExecute(task_id);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "ConvolutionDepthwise3x3Run error task_id[" << task_id << "] error_code[" << ret << "]";
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.h
index 0b77b16cc0..125b96930b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.h
@@ -36,7 +36,7 @@ class ConvolutionDepthwise3x3Fp16CPUKernel : public ConvolutionBaseCPUKernel {
   int ReSize() override;
   int Run() override;
 
-  int Execute(int task_id);
+  int DoExecute(int task_id);
 
  private:
   void PackWeight() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc
index b0fa1ef2c6..856e32f5a7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc
@@ -90,7 +90,7 @@ int ConvolutionDepthwiseFp16CPUKernel::ReSize() {
   return RET_OK;
 }
 
-int ConvolutionDepthwiseFp16CPUKernel::Execute(int task_id) {
+int ConvolutionDepthwiseFp16CPUKernel::DoExecute(int task_id) {
   auto input_ptr = reinterpret_cast<float16_t *>(in_tensors_.at(0)->data());
   auto output_ptr = reinterpret_cast<float16_t *>(out_tensors_.at(0)->data());
   MS_ASSERT(input_ptr != nullptr);
@@ -106,7 +106,7 @@ int ConvolutionDepthwiseFp16CPUKernel::Execute(int task_id) {
 
 static int ConvDwFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
   auto conv_dw_fp16 = reinterpret_cast<ConvolutionDepthwiseFp16CPUKernel *>(cdata);
-  auto ret = conv_dw_fp16->Execute(task_id);
+  auto ret = conv_dw_fp16->DoExecute(task_id);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "ConvolutionDepthwiseFp16Run error task_id[" << task_id << "] error_code[" << ret << "]";
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h
index cdefc1456b..f1bbf9fc5d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h
@@ -44,7 +44,7 @@ class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel {
   int ReSize() override;
   int Run() override;
 
-  int Execute(int task_id);
+  int DoExecute(int task_id);
 
  private:
   void PackWeight() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc
index 2066dc66a7..634bf83e29 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc
@@ -128,7 +128,7 @@ int ConvolutionDepthwiseSWFp16CPUKernel::ReSize() {
   return RET_OK;
 }
 
-int ConvolutionDepthwiseSWFp16CPUKernel::Execute(int task_id) {
+int ConvolutionDepthwiseSWFp16CPUKernel::DoExecute(int task_id) {
   ConvDwC8Fp16(packed_output_, packed_input_, reinterpret_cast<float16_t *>(packed_weight_),
                reinterpret_cast<float16_t *>(bias_data_), conv_param_, sliding_, task_id);
   return RET_OK;
@@ -136,7 +136,7 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Execute(int task_id) {
 
 static int ConvDwSWFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
   auto conv_dw_fp16 = reinterpret_cast<ConvolutionDepthwiseSWFp16CPUKernel *>(cdata);
-  auto ret = conv_dw_fp16->Execute(task_id);
+  auto ret = conv_dw_fp16->DoExecute(task_id);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "ConvolutionDepthwiseSWFp16Run error task_id[" << task_id << "] error_code[" << ret << "]";
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h
index f732061a11..8657ad83ec 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h
@@ -46,7 +46,7 @@ class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseCPUKernel {
   int Run() override;
 
   int InitPackedInputOutput();
-  int Execute(int task_id);
+  int DoExecute(int task_id);
 
  private:
   void PackWeight() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc
index 9edf2ce18c..630d1360ae 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc
@@ -151,7 +151,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::ReSize() {
   return RET_OK;
 }
 
-int DeconvolutionDepthwiseFp16CPUKernel::Execute(int task_id) {
+int DeconvolutionDepthwiseFp16CPUKernel::DoExecute(int task_id) {
   DeconvDwC8Fp16(packed_output_, packed_input_, reinterpret_cast<float16_t *>(packed_weight_),
                  reinterpret_cast<float16_t *>(bias_data_), conv_param_, sliding_, task_id);
   return RET_OK;
@@ -159,7 +159,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::Execute(int task_id) {
 
 static int DeconvDwFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
   auto deconv_dw_fp16 = reinterpret_cast<DeconvolutionDepthwiseFp16CPUKernel *>(cdata);
-  auto ret = deconv_dw_fp16->Execute(task_id);
+  auto ret = deconv_dw_fp16->DoExecute(task_id);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "DeconvolutionDepthwiseFp16Run error task_id[" << task_id << "] error_code[" << ret << "]";
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h
index 5f4e7cfc17..c3769730ad 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h
@@ -48,7 +48,7 @@ class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel {
 
   int InitPackedInputOutput();
   int InitSlideParam();
-  int Execute(int task_id);
+  int DoExecute(int task_id);
 
  private:
   int MallocWeightBiasData() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc
index 7bacce7070..d927a6e9a1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc
@@ -83,7 +83,7 @@ int StackFp16CPUKernel::Prepare() {
   return ReSize();
 }
 
-int StackFp16CPUKernel::Execute(int task_id) {
+int StackFp16CPUKernel::DoExecute(int task_id) {
   auto inputs = buffers_.data();
   void *output_data = reinterpret_cast<void *>(out_buffer_);
   auto step = UP_DIV(outer_size_, num_threads_);
@@ -99,7 +99,7 @@ int StackFp16CPUKernel::Execute(int task_id) {
 
 static int StackRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
   auto stack = reinterpret_cast<StackFp16CPUKernel *>(cdata);
-  if (stack->Execute(task_id) != RET_OK) {
+  if (stack->DoExecute(task_id) != RET_OK) {
     return RET_ERROR;
   }
   return RET_OK;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.h
index 2aa3d8278a..37a5eb1901 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.h
@@ -29,7 +29,7 @@ class StackFp16CPUKernel : public StackBaseCPUKernel {
   ~StackFp16CPUKernel() override = default;
   int Prepare() override;
   int Run() override;
-  int Execute(int task_id);
+  int DoExecute(int task_id);
 
  private:
   void InitMallocFlags();
diff --git a/mindspore/lite/tools/benchmark/BUILD.gn b/mindspore/lite/tools/benchmark/BUILD.gn
new file mode 100644
index 0000000000..44d643bec0
--- /dev/null
+++ b/mindspore/lite/tools/benchmark/BUILD.gn
@@ -0,0 +1,53 @@
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import("//build/ohos.gni")
+
+ohos_executable("benchmark_bin") {
+  sources = [
+    "benchmark_base.cc",
+    "benchmark_c_api.cc",
+    "benchmark_unified_api.cc",
+    "benchmark.cc",
+    "main.cc",
+    "run_benchmark.cc",
+    "../common/flag_parser.cc",
+    "../common/string_util.cc",
+    "../../src/common/file_utils.cc",
+    "../../src/common/utils.cc",
+    "../../../ccsrc/backend/kernel_compiler/cpu/nnacl/nnacl_common.c",
+  ]
+
+  include_dirs = [
+    "../../",
+    "../../../core/",
+    "../../../../",
+    "../../../../third_party/",
+    "../../../../third_party/flatbuffers-v2.0.0/include/",
+    "../../../ccsrc/backend/kernel_compiler/cpu/"
+  ]
+
+  defines = [ "BENCHMARK_CLIP_JSON" ]
+
+  cflags = [ "-frtti" ]
+
+  deps = [
+    "../../:mindspore_lib",
+  ]
+
+  output_name = "benchmark_bin"
+  install_enable = true
+  part_name = "mindspore"
+}
-- 
Gitee