From 3b5c1620dbf408fe12f0d0019fa6cb943838695d Mon Sep 17 00:00:00 2001 From: sunsuodong Date: Wed, 16 Feb 2022 05:00:08 -0800 Subject: [PATCH] format gn file and support asm file Signed-off-by: sunsuodong --- .../kernel_compiler/cpu/nnacl/BUILD.gn | 200 +++++++++++++++--- mindspore/core/mindrt/BUILD.gn | 2 +- mindspore/lite/BUILD.gn | 177 ++++++++++++---- mindspore/lite/bundle.json | 2 +- .../kernel/arm/fp16/arithmetic_fp16.cc | 2 +- .../runtime/kernel/arm/fp16/arithmetic_fp16.h | 2 +- .../fp16/convolution_depthwise_3x3_fp16.cc | 4 +- .../arm/fp16/convolution_depthwise_3x3_fp16.h | 2 +- .../arm/fp16/convolution_depthwise_fp16.cc | 4 +- .../arm/fp16/convolution_depthwise_fp16.h | 2 +- .../convolution_depthwise_slidewindow_fp16.cc | 4 +- .../convolution_depthwise_slidewindow_fp16.h | 2 +- .../arm/fp16/deconvolution_depthwise_fp16.cc | 4 +- .../arm/fp16/deconvolution_depthwise_fp16.h | 2 +- .../src/runtime/kernel/arm/fp16/stack_fp16.cc | 4 +- .../src/runtime/kernel/arm/fp16/stack_fp16.h | 2 +- mindspore/lite/tools/benchmark/BUILD.gn | 53 +++++ 17 files changed, 388 insertions(+), 80 deletions(-) create mode 100644 mindspore/lite/tools/benchmark/BUILD.gn diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/BUILD.gn b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/BUILD.gn index 7bb3769d0b..0637531d21 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/BUILD.gn +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/BUILD.gn @@ -16,10 +16,6 @@ import("//build/ohos.gni") ohos_source_set("nnacl_o") { sources = [ - "common_func.c", - "kernel.c", - "nnacl_common.c", - "nnacl_utils.c", "base/arithmetic_base.c", "base/batch_to_space_base.c", "base/broadcast_to.c", @@ -39,10 +35,11 @@ ohos_source_set("nnacl_o") { "base/transpose_base.c", "base/unsorted_segment_sum_base.c", "base/unstack_base.c", + "common_func.c", "fp32/activation_fp32.c", "fp32/adam_fp32.c", - "fp32/adder_fp32.c", "fp32/add_fp32.c", + "fp32/adder_fp32.c", "fp32/arg_min_max_fp32.c", "fp32/arithmetic_compare_fp32.c", "fp32/arithmetic_fp32.c", @@ -122,23 +119,9 @@ ohos_source_set("nnacl_o") { "fp32_grad/softmax_grad.c", "fp32_grad/strided_slice_grad.c", "fp32_sparse/matmul_sparse_x1_fp32.c", - "infer/control/tensor_array_infer.c", - "infer/control/tensor_array_read_infer.c", - "infer/control/tensor_array_write_infer.c", - "infer/control/tensorlist_fromtensor_infer.c", - "infer/control/tensorlist_getitem_infer.c", - "infer/control/tensorlist_reserve_infer.c", - "infer/control/tensorlist_setitem_infer.c", - "infer/control/tensorlist_stack_infer.c", - "infer/string/custom_extract_features_infer.c", - "infer/string/custom_normalize_infer.c", - "infer/string/custom_predict_infer.c", - "infer/string/hashtable_lookup_infer.c", - "infer/string/lsh_projection_infer.c", - "infer/string/skip_gram_infer.c", "infer/adam_infer.c", - "infer/addn_infer.c", "infer/add_sub_grad_infer.c", + "infer/addn_infer.c", "infer/affine_infer.c", "infer/all_gather.c", "infer/apply_momentum_infer.c", @@ -160,6 +143,14 @@ ohos_source_set("nnacl_o") { "infer/common_infer.c", "infer/concat_infer.c", "infer/constant_of_shape_infer.c", + "infer/control/tensor_array_infer.c", + "infer/control/tensor_array_read_infer.c", + "infer/control/tensor_array_write_infer.c", + "infer/control/tensorlist_fromtensor_infer.c", + "infer/control/tensorlist_getitem_infer.c", + "infer/control/tensorlist_reserve_infer.c", + "infer/control/tensorlist_setitem_infer.c", + "infer/control/tensorlist_stack_infer.c", "infer/conv2d_grad_filter_infer.c", "infer/conv2d_grad_input_infer.c", "infer/conv2d_infer.c", @@ -240,6 +231,12 @@ ohos_source_set("nnacl_o") { "infer/stack_infer.c", "infer/strided_slice_grad_infer.c", "infer/strided_slice_infer.c", + "infer/string/custom_extract_features_infer.c", + "infer/string/custom_normalize_infer.c", + "infer/string/custom_predict_infer.c", + "infer/string/hashtable_lookup_infer.c", + "infer/string/lsh_projection_infer.c", + "infer/string/skip_gram_infer.c", "infer/tile_infer.c", "infer/topk_infer.c", "infer/transpose_infer.c", @@ -253,8 +250,8 @@ ohos_source_set("nnacl_o") { "int8/arg_min_max_int8.c", "int8/arithmetic_int8.c", "int8/arithmetic_self_int8.c", - "int8/batchnorm_int8.c", "int8/batch_to_space_int8.c", + "int8/batchnorm_int8.c", "int8/common_func_int8.c", "int8/concat_int8.c", "int8/conv1x1_int8.c", @@ -266,8 +263,8 @@ ohos_source_set("nnacl_o") { "int8/depth_to_space_int8.c", "int8/div_int8.c", "int8/fixed_point.c", - "int8/gather_int8.c", "int8/gatherNd_int8.c", + "int8/gather_int8.c", "int8/hswish_int8.c", "int8/l2_norm_int8.c", "int8/layer_norm_int8.c", @@ -296,11 +293,164 @@ ohos_source_set("nnacl_o") { "int8/topk_int8.c", "int8/transpose_int8.c", "int8/unsqueeze_int8.c", + "kernel.c", + "nnacl_common.c", + "nnacl_utils.c", ] - include_dirs = [ - "../", - ] + if (target_cpu == "arm") { + sources += [ + "assembly/arm32/ConvDw3x3Int8BorderPixel.S", + "assembly/arm32/ConvDwFp32Border.S", + "assembly/arm32/ConvDwFp32Center.S", + "assembly/arm32/ConvDwFp32Row.S", + "assembly/arm32/ConvDwInt8Center.S", + "assembly/arm32/ConvDwInt8PostAlign4.S", + "assembly/arm32/ConvDwInt8PostAlign4PerChannel.S", + "assembly/arm32/ConvDwInt8Row.S", + "assembly/arm32/DeconvDwFp32Center.S", + "assembly/arm32/DeconvDwInt8Center.S", + "assembly/arm32/DeconvDwInt8Post.S", + "assembly/arm32/IndirectGemmInt16to32_8x4.S", + "assembly/arm32/IndirectGemmInt8_2x4.S", + "assembly/arm32/MatVecMulFp32.S", + "assembly/arm32/MatmulFp32.S", + "assembly/arm32/MatmulFp32Opt.S", + "assembly/arm32/MatmulFp32Opt12x4.S", + "assembly/arm32/MatmulInt8.S", + "assembly/arm32/MatmulInt8Opt.S", + "assembly/arm32/MatmulWinogradFp32.S", + "assembly/arm32/PostFuncBiasReluC4.S", + "assembly/arm32/PostFuncBiasReluC8.S", + "assembly/arm32/PreSum4x16Int8Peroc.S", + "assembly/arm32/PreSum4x16Int8Pert.S", + "assembly/arm32/TiledC4MatmulFp32.S", + "assembly/arm32/WinogradTransLeft.S", + "assembly/arm32/WinogradTransRight.S", + ] + } else if (target_cpu == "arm64") { + sources += [ + "assembly/arm64/AdderFp32.S", + "assembly/arm64/ConvDw3x3Fp32Corner.S", + "assembly/arm64/ConvDw3x3Fp32Horizontal.S", + "assembly/arm64/ConvDw3x3Fp32Stride1.S", + "assembly/arm64/ConvDw3x3Fp32Stride2.S", + "assembly/arm64/ConvDw3x3Fp32Vertical.S", + "assembly/arm64/ConvDw3x3Int8.S", + "assembly/arm64/ConvDw3x3Int8Corner.S", + "assembly/arm64/ConvDw3x3Int8Horizontal.S", + "assembly/arm64/ConvDw3x3Int8Stride2.S", + "assembly/arm64/ConvDw3x3Int8Vertical.S", + "assembly/arm64/ConvDw3x3Line.S", + "assembly/arm64/ConvDwFp32Border.S", + "assembly/arm64/ConvDwFp32Center.S", + "assembly/arm64/ConvDwFp32Indirect3x3.S", + "assembly/arm64/ConvDwFp32Indirect5x5.S", + "assembly/arm64/ConvDwFp32Row.S", + "assembly/arm64/ConvDwInt8Center.S", + "assembly/arm64/ConvDwInt8PostAlign4.S", + "assembly/arm64/ConvDwInt8PostAlign4PerChannel.S", + "assembly/arm64/ConvDwInt8Row.S", + "assembly/arm64/ConvFp32Center.S", + "assembly/arm64/DeconvDwFp32Border.S", + "assembly/arm64/DeconvDwFp32Center.S", + "assembly/arm64/DeconvDwInt8Center.S", + "assembly/arm64/DeconvDwInt8Post.S", + "assembly/arm64/IndirectGemmInt16to32_8x4.S", + "assembly/arm64/MatVecMulFp32.S", + "assembly/arm64/MatmulFp32.S", + "assembly/arm64/MatmulFp32Opt.S", + "assembly/arm64/MatmulFp32OptRow12.S", + "assembly/arm64/MatmulFp32OptRow4.S", + "assembly/arm64/MatmulFp32OptRow8.S", + "assembly/arm64/MatmulInt8.S", + "assembly/arm64/MatmulInt8Opt.S", + "assembly/arm64/MatmulR4Int8.S", + "assembly/arm64/MatmulWinogradFp32.S", + "assembly/arm64/PostFuncBiasReluC4.S", + "assembly/arm64/PostFuncBiasReluC8.S", + "assembly/arm64/PostFuncInt8C4Neon64.S", + "assembly/arm64/PreSum4x16Int8Peroc.S", + "assembly/arm64/PreSum4x16Int8Pert.S", + "assembly/arm64/SPMM8x8Fp32.S", + "assembly/arm64/TiledC4MatmulFp32.S", + "assembly/arm64/WinogradTransLeft.S", + "assembly/arm64/WinogradTransRight.S", + "fp16/activation_fp16.c", + "fp16/arg_min_max_fp16.c", + "fp16/arithmetic_fp16.c", + "fp16/arithmetic_self_fp16.c", + "fp16/batchnorm_fp16.c", + "fp16/common_func_fp16.c", + "fp16/conv_depthwise_fp16.c", + "fp16/conv_fp16.c", + "fp16/crop_fp16.c", + "fp16/deconv_fp16.c", + "fp16/deconv_winograd_fp16.c", + "fp16/exp_fp16.c", + "fp16/fill_fp16.c", + "fp16/gru_fp16.c", + "fp16/instance_norm_fp16.c", + "fp16/layer_norm_fp16.c", + "fp16/log_softmax_fp16.c", + "fp16/lstm_fp16.c", + "fp16/matmul_fp16.c", + "fp16/matrix_fp16.c", + "fp16/one_hot_fp16.c", + "fp16/pack_fp16.c", + "fp16/pad_fp16.c", + "fp16/pooling_fp16.c", + "fp16/power_fp16.c", + "fp16/prelu_fp16.c", + "fp16/quant_dtype_cast_fp16.c", + "fp16/ragged_range_fp16.c", + "fp16/reduce_fp16.c", + "fp16/resize_fp16.c", + "fp16/scale_fp16.c", + "fp16/softmax_fp16.c", + "fp16/sparse_to_dense_fp16.c", + "fp16/splice_fp16.c", + "fp16/topk_fp16.c", + "fp16/transpose_fp16.c", + "fp16/unique_fp16.c", + "fp16/where_fp16.c", + "fp16/winograd_transform_fp16.c", + "fp16/winograd_utils_fp16.c", + "fp16_grad/activation_grad.c", + "fp16_grad/arithmetic_grad.c", + "fp16_grad/arithmetic_self_grad.c", + "fp16_grad/batch_norm.c", + "fp16_grad/convolution_grad_filter.c", + "fp16_grad/convolution_grad_input.c", + "fp16_grad/dropout_grad.c", + "fp16_grad/gemm_fp16.c", + "fp16_grad/layernorm_grad.c", + "fp16_grad/pack_fp16_ext.c", + "fp16_grad/pooling_grad.c", + "fp16_grad/resize_grad.c", + "fp16_grad/strided_slice_grad.c", + "fp16_grad/unsorted_segment_sum.c", + ] + } + + include_dirs = [ "../" ] + + defines = [] + + if (target_cpu == "arm") { + defines += [ + "ENABLE_ARM", + "ENABLE_ARM32", + "ENABLE_NEON", + ] + } else if (target_cpu == "arm64") { + defines += [ + "ENABLE_ARM", + "ENABLE_ARM64", + "ENABLE_NEON", + "ENABLE_FP16", + ] + } part_name = "mindspore" } diff --git a/mindspore/core/mindrt/BUILD.gn b/mindspore/core/mindrt/BUILD.gn index db4c0b53e4..a2d81c4ff4 100644 --- a/mindspore/core/mindrt/BUILD.gn +++ b/mindspore/core/mindrt/BUILD.gn @@ -35,7 +35,7 @@ ohos_source_set("mindrt_o") { "src/", "../../lite/", "../../lite/src/", - "../../core/" + "../../core/", ] part_name = "mindspore" diff --git a/mindspore/lite/BUILD.gn b/mindspore/lite/BUILD.gn index 841304b25e..45d16454a1 100644 --- a/mindspore/lite/BUILD.gn +++ b/mindspore/lite/BUILD.gn @@ -15,11 +15,20 @@ import("//build/ohos.gni") +ohos_group("mindspore") { + deps = [ + ":mindspore_lib", + #"tools/benchmark:benchmark_bin", + ] +} + +config("mindspore_lib_config") { + include_dirs = [ "../../include/" ] +} + ohos_shared_library("mindspore_lib") { sources = [ - "src/delegate/tensorrt/distribution/distribution_base.cc", - "tools/converter/quantizer/fse_decoder.cc", - "tools/converter/quantizer/fse_bit_stream.cc", + "../core/utils/status.cc", "src/c_api/context_c.cc", "src/c_api/model_c.cc", "src/c_api/tensor_c.cc", @@ -42,6 +51,7 @@ ohos_shared_library("mindspore_lib") { "src/control_flow/entrance_subgraph_kernel.cc", "src/control_flow/exit_subgraph_kernel.cc", "src/control_flow/identity_kernel.cc", + "src/cpu_info.cc", "src/cxx_api/cell.cc", "src/cxx_api/context.cc", "src/cxx_api/converters.cc", @@ -53,7 +63,20 @@ ohos_shared_library("mindspore_lib") { "src/cxx_api/tensor/tensor_impl.cc", "src/cxx_api/tensor_utils.cc", "src/cxx_api/types.cc", - "../core/utils/status.cc", + "src/delegate/tensorrt/distribution/distribution_base.cc", + "src/errorcode.cc", + "src/executor.cc", + "src/huffman_decode.cc", + "src/inner_context.cc", + "src/inner_kernel.cc", + "src/kernel_registry.cc", + "src/lite_kernel.cc", + "src/lite_kernel_util.cc", + "src/lite_mindrt.cc", + "src/lite_model.cc", + "src/lite_session.cc", + "src/mindrt_executor.cc", + "src/ms_tensor.cc", "src/ops/compat/attr_transfer_common.cc", "src/ops/compat/v0/broadcast_to_compat_v0.cc", "src/ops/compat/v0/cast_compat_v0.cc", @@ -77,8 +100,8 @@ ohos_shared_library("mindspore_lib") { "src/ops/populate/activation_grad_populate.cc", "src/ops/populate/activation_populate.cc", "src/ops/populate/adam_populate.cc", - "src/ops/populate/adder_populate.cc", "src/ops/populate/add_populate.cc", + "src/ops/populate/adder_populate.cc", "src/ops/populate/affine_populate.cc", "src/ops/populate/all_gather.cc", "src/ops/populate/argmax_populate.cc", @@ -120,8 +143,8 @@ ohos_shared_library("mindspore_lib") { "src/ops/populate/eltwise_populate.cc", "src/ops/populate/embedding_lookup_populate.cc", "src/ops/populate/erf_populate.cc", - "src/ops/populate/expand_dims_populate.cc", "src/ops/populate/exp_populate.cc", + "src/ops/populate/expand_dims_populate.cc", "src/ops/populate/fill_populate.cc", "src/ops/populate/flatten_populate.cc", "src/ops/populate/full_connection_populate.cc", @@ -147,12 +170,12 @@ ohos_shared_library("mindspore_lib") { "src/ops/populate/nonzero_populate.cc", "src/ops/populate/one_hot_populate.cc", "src/ops/populate/oneslike_populate.cc", + "src/ops/populate/p_relu_populate.cc", "src/ops/populate/pad_populate.cc", "src/ops/populate/partial_populate.cc", "src/ops/populate/pooling_populate.cc", "src/ops/populate/populate_register.cc", "src/ops/populate/power_populate.cc", - "src/ops/populate/p_relu_populate.cc", "src/ops/populate/prior_box_populate.cc", "src/ops/populate/quant_dtype_cast_populate.cc", "src/ops/populate/ragged_range_populate.cc", @@ -204,8 +227,8 @@ ohos_shared_library("mindspore_lib") { "src/ops/populate/v0/activation_grad_populate_v0.cc", "src/ops/populate/v0/activation_populate_v0.cc", "src/ops/populate/v0/adam_populate_v0.cc", - "src/ops/populate/v0/addn_populate_v0.cc", "src/ops/populate/v0/add_populate_v0.cc", + "src/ops/populate/v0/addn_populate_v0.cc", "src/ops/populate/v0/argmax_populate_v0.cc", "src/ops/populate/v0/argmin_populate_v0.cc", "src/ops/populate/v0/arithmetic_populate_v0.cc", @@ -241,8 +264,8 @@ ohos_shared_library("mindspore_lib") { "src/ops/populate/v0/div_populate_v0.cc", "src/ops/populate/v0/eltwise_populate_v0.cc", "src/ops/populate/v0/embedding_lookup_populate_v0.cc", - "src/ops/populate/v0/expand_dims_populate_v0.cc", "src/ops/populate/v0/exp_populate_v0.cc", + "src/ops/populate/v0/expand_dims_populate_v0.cc", "src/ops/populate/v0/fill_populate_v0.cc", "src/ops/populate/v0/flatten_populate_v0.cc", "src/ops/populate/v0/full_connection_populate_v0.cc", @@ -262,11 +285,11 @@ ohos_shared_library("mindspore_lib") { "src/ops/populate/v0/non_max_suppression_populate_v0.cc", "src/ops/populate/v0/one_hot_populate_v0.cc", "src/ops/populate/v0/oneslike_populate_v0.cc", + "src/ops/populate/v0/p_relu_populate_v0.cc", "src/ops/populate/v0/pad_populate_v0.cc", "src/ops/populate/v0/partial_populate_v0.cc", "src/ops/populate/v0/pooling_populate_v0.cc", "src/ops/populate/v0/power_populate_v0.cc", - "src/ops/populate/v0/p_relu_populate_v0.cc", "src/ops/populate/v0/prior_box_populate_v0.cc", "src/ops/populate/v0/quant_dtype_cast_populate_v0.cc", "src/ops/populate/v0/range_populate_v0.cc", @@ -356,8 +379,8 @@ ohos_shared_library("mindspore_lib") { "src/runtime/kernel/arm/fp32/arithmetic_compare_fp32.cc", "src/runtime/kernel/arm/fp32/arithmetic_fp32.cc", "src/runtime/kernel/arm/fp32/arithmetic_self_fp32.cc", - "src/runtime/kernel/arm/fp32/batchnorm_fp32.cc", "src/runtime/kernel/arm/fp32/batch_to_space_fp32.cc", + "src/runtime/kernel/arm/fp32/batchnorm_fp32.cc", "src/runtime/kernel/arm/fp32/bias_fp32.cc", "src/runtime/kernel/arm/fp32/broadcast_to_fp32.cc", "src/runtime/kernel/arm/fp32/cast_fp32.cc", @@ -385,8 +408,8 @@ ohos_shared_library("mindspore_lib") { "src/runtime/kernel/arm/fp32/fill_fp32.cc", "src/runtime/kernel/arm/fp32/fullconnection_fp32.cc", "src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.cc", - "src/runtime/kernel/arm/fp32/gather_fp32.cc", "src/runtime/kernel/arm/fp32/gatherNd_fp32.cc", + "src/runtime/kernel/arm/fp32/gather_fp32.cc", "src/runtime/kernel/arm/fp32/glu_fp32.cc", "src/runtime/kernel/arm/fp32/group_convolution_fp32.cc", "src/runtime/kernel/arm/fp32/gru_fp32.cc", @@ -397,8 +420,8 @@ ohos_shared_library("mindspore_lib") { "src/runtime/kernel/arm/fp32/local_response_norm_fp32.cc", "src/runtime/kernel/arm/fp32/log_softmax_fp32.cc", "src/runtime/kernel/arm/fp32/lstm_fp32.cc", - "src/runtime/kernel/arm/fp32/matmul_fp32_base.cc", "src/runtime/kernel/arm/fp32/matmul_fp32.cc", + "src/runtime/kernel/arm/fp32/matmul_fp32_base.cc", "src/runtime/kernel/arm/fp32/non_max_suppression_fp32.cc", "src/runtime/kernel/arm/fp32/nonzero_fp32.cc", "src/runtime/kernel/arm/fp32/pad_fp32.cc", @@ -423,7 +446,6 @@ ohos_shared_library("mindspore_lib") { "src/runtime/kernel/arm/fp32/softmax_fp32.cc", "src/runtime/kernel/arm/fp32/space_to_batch_fp32.cc", "src/runtime/kernel/arm/fp32/space_to_depth_fp32.cc", - "src/runtime/kernel/arm/fp32_sparse/matmul_sparse_fp32.cc", "src/runtime/kernel/arm/fp32/sparse_to_dense_fp32.cc", "src/runtime/kernel/arm/fp32/splice_fp32.cc", "src/runtime/kernel/arm/fp32/topk_fp32.cc", @@ -433,13 +455,14 @@ ohos_shared_library("mindspore_lib") { "src/runtime/kernel/arm/fp32/unstack_fp32.cc", "src/runtime/kernel/arm/fp32/where_fp32.cc", "src/runtime/kernel/arm/fp32/zeroslike_fp32.cc", + "src/runtime/kernel/arm/fp32_sparse/matmul_sparse_fp32.cc", "src/runtime/kernel/arm/int8/activation_int8.cc", "src/runtime/kernel/arm/int8/add_int8.cc", "src/runtime/kernel/arm/int8/argminmax_int8.cc", "src/runtime/kernel/arm/int8/arithmetic_int8.cc", "src/runtime/kernel/arm/int8/arithmetic_self_int8.cc", - "src/runtime/kernel/arm/int8/batchnorm_int8.cc", "src/runtime/kernel/arm/int8/batch_to_space_int8.cc", + "src/runtime/kernel/arm/int8/batchnorm_int8.cc", "src/runtime/kernel/arm/int8/bias_add_int8.cc", "src/runtime/kernel/arm/int8/concat_int8.cc", "src/runtime/kernel/arm/int8/convolution_1x1_int8.cc", @@ -456,8 +479,8 @@ ohos_shared_library("mindspore_lib") { "src/runtime/kernel/arm/int8/detection_post_process_int8.cc", "src/runtime/kernel/arm/int8/div_int8.cc", "src/runtime/kernel/arm/int8/fullconnection_int8.cc", - "src/runtime/kernel/arm/int8/gather_int8.cc", "src/runtime/kernel/arm/int8/gatherNd_int8.cc", + "src/runtime/kernel/arm/int8/gather_int8.cc", "src/runtime/kernel/arm/int8/group_convolution_int8.cc", "src/runtime/kernel/arm/int8/hswish_int8.cc", "src/runtime/kernel/arm/int8/l2_norm_int8.cc", @@ -495,28 +518,16 @@ ohos_shared_library("mindspore_lib") { "src/runtime/runtime_allocator.cc", "src/runtime/runtime_convert.cc", "src/runtime/runtime_pass.cc", - "src/cpu_info.cc", - "src/errorcode.cc", - "src/executor.cc", - "src/huffman_decode.cc", - "src/inner_context.cc", - "src/inner_kernel.cc", - "src/kernel_registry.cc", - "src/lite_kernel.cc", - "src/lite_kernel_util.cc", - "src/lite_mindrt.cc", - "src/lite_model.cc", - "src/lite_session.cc", - "src/mindrt_executor.cc", - "src/ms_tensor.cc", "src/scheduler.cc", "src/schema_tensor_wrapper.cc", "src/sub_graph_kernel.cc", "src/sub_graph_split.cc", - "src/tensor_category.cc", "src/tensor.cc", + "src/tensor_category.cc", "src/tensorlist.cc", "src/weight_decoder.cc", + "tools/converter/quantizer/fse_bit_stream.cc", + "tools/converter/quantizer/fse_decoder.cc", ] include_dirs = [ @@ -532,7 +543,23 @@ ohos_shared_library("mindspore_lib") { "../../mindspore/core/mindrt/include/", ] - defines = [] + defines = [ "ENABLE_MINDRT" ] + + if (target_cpu == "arm") { + defines += [ + "ENABLE_ARM", + "ENABLE_ARM32", + "ENABLE_NEON", + ] + } else if (target_cpu == "arm64") { + defines += [ + "ENABLE_ARM", + "ENABLE_ARM64", + "ENABLE_NEON", + "ENABLE_FP16", + ] + } + cflags = [] cflags_c = [] cflags_cc = [] @@ -541,15 +568,93 @@ ohos_shared_library("mindspore_lib") { configs = [] deps = [ + ":third_party", "../ccsrc/backend/kernel_compiler/cpu/nnacl/:nnacl_o", "../core/mindrt/:mindrt_o", - ":third_party" ] - output_name = "libmindspore-lite.huawei" # 可选,模块输出名 - output_extension = "so" # 可选,模块名后缀 + if (target_cpu == "arm64") { + deps += [ ":arm_fp16_cc_o" ] + } + + output_name = "libmindspore-lite.huawei" + output_extension = "so" + + public_configs = [ ":mindspore_lib_config" ] + part_name = "mindspore" +} + +ohos_source_set("arm_fp16_cc_o") { + sources = [ + "src/runtime/kernel/arm/fp16/activation_fp16.cc", + "src/runtime/kernel/arm/fp16/addn_fp16.cc", + "src/runtime/kernel/arm/fp16/arithmetic_compare_fp16.cc", + "src/runtime/kernel/arm/fp16/arithmetic_fp16.cc", + "src/runtime/kernel/arm/fp16/arithmetic_self_fp16.cc", + "src/runtime/kernel/arm/fp16/batchnorm_fp16.cc", + "src/runtime/kernel/arm/fp16/biasadd_fp16.cc", + "src/runtime/kernel/arm/fp16/cast_fp16.cc", + "src/runtime/kernel/arm/fp16/common_fp16.cc", + "src/runtime/kernel/arm/fp16/concat_fp16.cc", + "src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc", + "src/runtime/kernel/arm/fp16/convolution_delegate_fp16.cc", + "src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.cc", + "src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc", + "src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc", + "src/runtime/kernel/arm/fp16/convolution_fp16.cc", + "src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc", + "src/runtime/kernel/arm/fp16/crop_fp16.cc", + "src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc", + "src/runtime/kernel/arm/fp16/deconvolution_fp16.cc", + "src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc", + "src/runtime/kernel/arm/fp16/depth_to_space_fp16.cc", + "src/runtime/kernel/arm/fp16/exp_fp16.cc", + "src/runtime/kernel/arm/fp16/fill_fp16.cc", + "src/runtime/kernel/arm/fp16/fullconnection_fp16.cc", + "src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.cc", + "src/runtime/kernel/arm/fp16/gather_fp16.cc", + "src/runtime/kernel/arm/fp16/group_convolution_fp16.cc", + "src/runtime/kernel/arm/fp16/gru_fp16.cc", + "src/runtime/kernel/arm/fp16/instance_norm_fp16.cc", + "src/runtime/kernel/arm/fp16/layer_norm_fp16.cc", + "src/runtime/kernel/arm/fp16/layout_transform_fp16.cc", + "src/runtime/kernel/arm/fp16/log_softmax_fp16.cc", + "src/runtime/kernel/arm/fp16/lstm_fp16.cc", + "src/runtime/kernel/arm/fp16/matmul_base_fp16.cc", + "src/runtime/kernel/arm/fp16/matmul_fp16.cc", + "src/runtime/kernel/arm/fp16/pad_fp16.cc", + "src/runtime/kernel/arm/fp16/pooling_fp16.cc", + "src/runtime/kernel/arm/fp16/power_fp16.cc", + "src/runtime/kernel/arm/fp16/prelu_fp16.cc", + "src/runtime/kernel/arm/fp16/quant_dtype_cast_fp16.cc", + "src/runtime/kernel/arm/fp16/ragged_range_fp16.cc", + "src/runtime/kernel/arm/fp16/reduce_fp16.cc", + "src/runtime/kernel/arm/fp16/resize_fp16.cc", + "src/runtime/kernel/arm/fp16/scale_fp16.cc", + "src/runtime/kernel/arm/fp16/slice_fp16.cc", + "src/runtime/kernel/arm/fp16/softmax_fp16.cc", + "src/runtime/kernel/arm/fp16/stack_fp16.cc", + "src/runtime/kernel/arm/fp16/transpose_fp16.cc", + "src/runtime/kernel/arm/fp16/where_fp16.cc", + "src/runtime/kernel/arm/fp16_grad/activation_fp16_grad.cc", + "src/runtime/kernel/arm/fp16_grad/arithmetic_fp16_grad.cc", + "src/runtime/kernel/arm/fp16_grad/arithmetic_fp16_self_grad.cc", + "src/runtime/kernel/arm/fp16_grad/bias_fp16_grad.cc", + "src/runtime/kernel/arm/fp16_grad/bn_fp16_grad.cc", + "src/runtime/kernel/arm/fp16_grad/convolution_fp16_grad_filter.cc", + "src/runtime/kernel/arm/fp16_grad/convolution_fp16_grad_input.cc", + "src/runtime/kernel/arm/fp16_grad/dropout_fp16_grad.cc", + "src/runtime/kernel/arm/fp16_grad/layernorm_fp16_grad.cc", + "src/runtime/kernel/arm/fp16_grad/neg_fp16_grad.cc", + "src/runtime/kernel/arm/fp16_grad/pooling_fp16_grad.cc", + "src/runtime/kernel/arm/fp16_grad/resize_fp16_grad.cc", + "src/runtime/kernel/arm/fp16_grad/strided_slice_fp16_grad.cc", + "src/runtime/kernel/arm/fp16_grad/unsorted_segment_sum_fp16.cc", + ] + + include_dirs = [ "../ccsrc/backend/kernel_compiler/cpu/" ] - part_name = "mindspore" # 必选,所属部件名称 + part_name = "mindspore" } action("third_party") { diff --git a/mindspore/lite/bundle.json b/mindspore/lite/bundle.json index 0ea17236db..891e22639d 100644 --- a/mindspore/lite/bundle.json +++ b/mindspore/lite/bundle.json @@ -22,7 +22,7 @@ "third_party": [] }, "build": { - "sub_component": [ "//third_party/mindspore/mindspore/lite:mindspore_lib" ], + "sub_component": [ "//third_party/mindspore/mindspore/lite:mindspore" ], "inner_kits": [], "test": [ "//third_party/mindspore/mindspore/lite/test:mindspore_test" ] } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc index bbdfca301e..84eeda7eca 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc @@ -152,7 +152,7 @@ void ArithmeticFP16CPUKernel::TileConstTensor(const void *in_data, void *out_dat in_shape, in_strides, out_strides, multiple); } -int ArithmeticFP16CPUKernel::Execute(const void *input0, const void *input1, void *output, int size, bool is_opt) { +int ArithmeticFP16CPUKernel::DoExecute(const void *input0, const void *input1, void *output, int size, bool is_opt) { int ret = RET_OK; if (is_opt) { CHECK_NULL_RETURN(arithmetic_opt_func_); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.h index a0c746cce8..24bdf23e5a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.h @@ -49,7 +49,7 @@ class ArithmeticFP16CPUKernel : public ArithmeticCPUKernel { int ConstTensorBroadCast() override; void TileConstTensor(const void *in_data, void *out_data, size_t ndim, const int *in_shape, const int *in_strides, const int *out_strides, const int *multiple) override; - int Execute(const void *input0, const void *input1, void *output, int size, bool is_opt) override; + int DoExecute(const void *input0, const void *input1, void *output, int size, bool is_opt) override; void FreeFp16Buffer(); ArithmeticFuncFp16 arithmetic_func_ = nullptr; ArithmeticOptFuncFp16 arithmetic_opt_func_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.cc index 4846164516..b9f55fa533 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.cc @@ -94,7 +94,7 @@ int ConvolutionDepthwise3x3Fp16CPUKernel::ReSize() { return RET_OK; } -int ConvolutionDepthwise3x3Fp16CPUKernel::Execute(int task_id) { +int ConvolutionDepthwise3x3Fp16CPUKernel::DoExecute(int task_id) { int units = UP_DIV(conv_param_->output_w_, C2NUM); // F(2, 3) contains 2 conv units int c8 = UP_ROUND(conv_param_->input_channel_, C8NUM); auto buffer = buffer_ + C12NUM * c8 * units * task_id; @@ -108,7 +108,7 @@ int ConvolutionDepthwise3x3Fp16CPUKernel::Execute(int task_id) { int ConvDw3x3Fp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) { auto conv_dw = reinterpret_cast(cdata); - auto ret = conv_dw->Execute(task_id); + auto ret = conv_dw->DoExecute(task_id); if (ret != RET_OK) { MS_LOG(ERROR) << "ConvolutionDepthwise3x3Run error task_id[" << task_id << "] error_code[" << ret << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.h index 0b77b16cc0..125b96930b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.h @@ -36,7 +36,7 @@ class ConvolutionDepthwise3x3Fp16CPUKernel : public ConvolutionBaseCPUKernel { int ReSize() override; int Run() override; - int Execute(int task_id); + int DoExecute(int task_id); private: void PackWeight() override; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc index b0fa1ef2c6..856e32f5a7 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc @@ -90,7 +90,7 @@ int ConvolutionDepthwiseFp16CPUKernel::ReSize() { return RET_OK; } -int ConvolutionDepthwiseFp16CPUKernel::Execute(int task_id) { +int ConvolutionDepthwiseFp16CPUKernel::DoExecute(int task_id) { auto input_ptr = reinterpret_cast(in_tensors_.at(0)->data()); auto output_ptr = reinterpret_cast(out_tensors_.at(0)->data()); MS_ASSERT(input_ptr != nullptr); @@ -106,7 +106,7 @@ int ConvolutionDepthwiseFp16CPUKernel::Execute(int task_id) { static int ConvDwFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) { auto conv_dw_fp16 = reinterpret_cast(cdata); - auto ret = conv_dw_fp16->Execute(task_id); + auto ret = conv_dw_fp16->DoExecute(task_id); if (ret != RET_OK) { MS_LOG(ERROR) << "ConvolutionDepthwiseFp16Run error task_id[" << task_id << "] error_code[" << ret << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h index cdefc1456b..f1bbf9fc5d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h @@ -44,7 +44,7 @@ class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel { int ReSize() override; int Run() override; - int Execute(int task_id); + int DoExecute(int task_id); private: void PackWeight() override; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc index 2066dc66a7..634bf83e29 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc @@ -128,7 +128,7 @@ int ConvolutionDepthwiseSWFp16CPUKernel::ReSize() { return RET_OK; } -int ConvolutionDepthwiseSWFp16CPUKernel::Execute(int task_id) { +int ConvolutionDepthwiseSWFp16CPUKernel::DoExecute(int task_id) { ConvDwC8Fp16(packed_output_, packed_input_, reinterpret_cast(packed_weight_), reinterpret_cast(bias_data_), conv_param_, sliding_, task_id); return RET_OK; @@ -136,7 +136,7 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Execute(int task_id) { static int ConvDwSWFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) { auto conv_dw_fp16 = reinterpret_cast(cdata); - auto ret = conv_dw_fp16->Execute(task_id); + auto ret = conv_dw_fp16->DoExecute(task_id); if (ret != RET_OK) { MS_LOG(ERROR) << "ConvolutionDepthwiseSWFp16Run error task_id[" << task_id << "] error_code[" << ret << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h index f732061a11..8657ad83ec 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h @@ -46,7 +46,7 @@ class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseCPUKernel { int Run() override; int InitPackedInputOutput(); - int Execute(int task_id); + int DoExecute(int task_id); private: void PackWeight() override; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc index 9edf2ce18c..630d1360ae 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc @@ -151,7 +151,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::ReSize() { return RET_OK; } -int DeconvolutionDepthwiseFp16CPUKernel::Execute(int task_id) { +int DeconvolutionDepthwiseFp16CPUKernel::DoExecute(int task_id) { DeconvDwC8Fp16(packed_output_, packed_input_, reinterpret_cast(packed_weight_), reinterpret_cast(bias_data_), conv_param_, sliding_, task_id); return RET_OK; @@ -159,7 +159,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::Execute(int task_id) { static int DeconvDwFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) { auto deconv_dw_fp16 = reinterpret_cast(cdata); - auto ret = deconv_dw_fp16->Execute(task_id); + auto ret = deconv_dw_fp16->DoExecute(task_id); if (ret != RET_OK) { MS_LOG(ERROR) << "DeconvolutionDepthwiseFp16Run error task_id[" << task_id << "] error_code[" << ret << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h index 5f4e7cfc17..c3769730ad 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h @@ -48,7 +48,7 @@ class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel { int InitPackedInputOutput(); int InitSlideParam(); - int Execute(int task_id); + int DoExecute(int task_id); private: int MallocWeightBiasData() override; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc index 7bacce7070..d927a6e9a1 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc @@ -83,7 +83,7 @@ int StackFp16CPUKernel::Prepare() { return ReSize(); } -int StackFp16CPUKernel::Execute(int task_id) { +int StackFp16CPUKernel::DoExecute(int task_id) { auto inputs = buffers_.data(); void *output_data = reinterpret_cast(out_buffer_); auto step = UP_DIV(outer_size_, num_threads_); @@ -99,7 +99,7 @@ int StackFp16CPUKernel::Execute(int task_id) { static int StackRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) { auto stack = reinterpret_cast(cdata); - if (stack->Execute(task_id) != RET_OK) { + if (stack->DoExecute(task_id) != RET_OK) { return RET_ERROR; } return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.h index 2aa3d8278a..37a5eb1901 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.h @@ -29,7 +29,7 @@ class StackFp16CPUKernel : public StackBaseCPUKernel { ~StackFp16CPUKernel() override = default; int Prepare() override; int Run() override; - int Execute(int task_id); + int DoExecute(int task_id); private: void InitMallocFlags(); diff --git a/mindspore/lite/tools/benchmark/BUILD.gn b/mindspore/lite/tools/benchmark/BUILD.gn new file mode 100644 index 0000000000..44d643bec0 --- /dev/null +++ b/mindspore/lite/tools/benchmark/BUILD.gn @@ -0,0 +1,53 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import("//build/ohos.gni") + +ohos_executable("benchmark_bin") { + sources = [ + "benchmark_base.cc", + "benchmark_c_api.cc", + "benchmark_unified_api.cc", + "benchmark.cc", + "main.cc", + "run_benchmark.cc", + "../common/flag_parser.cc", + "../common/string_util.cc", + "../../src/common/file_utils.cc", + "../../src/common/utils.cc", + "../../../ccsrc/backend/kernel_compiler/cpu/nnacl/nnacl_common.c", + ] + + include_dirs = [ + "../../", + "../../../core/", + "../../../../", + "../../../../third_party/", + "../../../../third_party/flatbuffers-v2.0.0/include/", + "../../../ccsrc/backend/kernel_compiler/cpu/" + ] + + defines = [ "BENCHMARK_CLIP_JSON" ] + + cflags = [ "-frtti" ] + + deps = [ + "../../:mindspore_lib", + ] + + output_name = "benchmark_bin" + install_enable = true + part_name = "mindspore" +} -- Gitee