From 44f4b61a341d19603de42546023f03dc40d8449d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8D=A4=E8=BE=A3=E9=B2=9C=E9=A6=99?= Date: Wed, 16 Jul 2025 06:39:43 +0000 Subject: [PATCH 1/6] =?UTF-8?q?=E3=80=90oec-ascend=E3=80=91AscendC?= =?UTF-8?q?=E7=AE=97=E5=AD=90=E4=BB=BF=E7=9C=9F=E7=94=A8=E4=BE=8B=E9=80=82?= =?UTF-8?q?=E9=85=8D=E7=BB=84=E5=90=88=E5=8C=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../oec/resource/KernelDev/AddKernelInvocationNeo/run.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/run.sh b/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/run.sh index 7125d0e0..097440d2 100755 --- a/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/run.sh +++ b/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/run.sh @@ -83,7 +83,7 @@ echo "Current compile soc version is ${SOC_VERSION}" source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash if [ "${RUN_MODE}" = "sim" ]; then # in case of running op in simulator, use stub .so instead - export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:${_ASCEND_INSTALL_PATH}/lib64:$LD_LIBRARY_PATH if [ ! $CAMODEL_LOG_PATH ]; then export CAMODEL_LOG_PATH=$(pwd)/sim_log fi @@ -92,7 +92,7 @@ if [ "${RUN_MODE}" = "sim" ]; then fi mkdir -p $CAMODEL_LOG_PATH elif [ "${RUN_MODE}" = "cpu" ]; then - export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:${_ASCEND_INSTALL_PATH}/lib64:$LD_LIBRARY_PATH fi mkdir -p "$OUTPUT_DIR" cd "$OUTPUT_DIR" @@ -115,7 +115,7 @@ rm -rf input output mkdir -p input output python3 "${CURRENT_DIR}/scripts/gen_data.py" ( - export LD_LIBRARY_PATH=$(pwd)/out/lib:$(pwd)/out/lib64:${_ASCEND_INSTALL_PATH}/lib64:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=$(pwd)/out/lib:$(pwd)/out/lib64:$LD_LIBRARY_PATH if [[ "$RUN_WITH_TOOLCHAIN" -eq 1 ]]; then if [ "${RUN_MODE}" = "npu" ]; then msprof op --application=./ascendc_kernels_bbit -- Gitee From 9329589a288dfa4242559cf33b7dca7c73059bd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8D=A4=E8=BE=A3=E9=B2=9C=E9=A6=99?= Date: Wed, 16 Jul 2025 06:54:32 +0000 Subject: [PATCH 2/6] =?UTF-8?q?Revert=20"=E4=B8=8D=E5=AF=B9=E5=A4=96?= =?UTF-8?q?=E5=91=88=E7=8E=B0oec-ascend=E5=B7=A5=E5=85=B7"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 76eab288914f89076bb76a748145d536ed884d53. --- oec-ascend/MANIFEST.in | 5 + oec-ascend/README.md | 85 ++ oec-ascend/oec/BaseTest.py | 185 ++++ oec-ascend/oec/BaseTestCase.py | 189 ++++ oec-ascend/oec/BaseTypes.py | 16 + oec-ascend/oec/TestContext.py | 199 ++++ oec-ascend/oec/TestInterface.py | 34 + oec-ascend/oec/TestReport.py | 111 +++ oec-ascend/oec/TestUtils.py | 85 ++ oec-ascend/oec/__init__.py | 4 + oec-ascend/oec/main.py | 189 ++++ .../aclnn/aclnnAdd/CMakeLists.txt | 40 + .../aclnn/aclnnAdd/ascend_test_nn.py | 8 + .../ApplicationDev/aclnn/aclnnAdd/run.sh | 11 + .../aclnn/aclnnAdd/testcase.cpp | 175 ++++ .../aclnn/aclnnAddlayerNorm/CMakeLists.txt | 40 + .../aclnn/aclnnAddlayerNorm/ascend_test_nn.py | 11 + .../aclnn/aclnnAddlayerNorm/run.sh | 11 + .../aclnn/aclnnAddlayerNorm/testcase.cpp | 129 +++ .../aclnn/aclnnAllGatherMatmul/CMakeLists.txt | 49 + .../aclnnAllGatherMatmul/ascend_test_nn.py | 12 + .../aclnn/aclnnAllGatherMatmul/run.sh | 13 + .../aclnn/aclnnAllGatherMatmul/testcase.cpp | 207 ++++ .../aclnnFlashAttentionScore/CMakeLists.txt | 40 + .../ascend_test_nn.py | 11 + .../aclnn/aclnnFlashAttentionScore/run.sh | 11 + .../aclnnFlashAttentionScore/testcase.cpp | 206 ++++ .../aclnn/aclnnMatmul/CMakeLists.txt | 40 + .../aclnn/aclnnMatmul/ascend_test_nn.py | 11 + .../ApplicationDev/aclnn/aclnnMatmul/run.sh | 11 + .../aclnn/aclnnMatmul/testcase.cpp | 138 +++ .../aclnn/aclnnSilu/CMakeLists.txt | 40 + .../aclnn/aclnnSilu/ascend_test_nn.py | 11 + .../ApplicationDev/aclnn/aclnnSilu/run.sh | 11 + .../aclnn/aclnnSilu/testcase.cpp | 127 +++ .../aclnn/aclnnSoftmax/CMakeLists.txt | 40 + .../aclnn/aclnnSoftmax/ascend_test_nn.py | 11 + .../ApplicationDev/aclnn/aclnnSoftmax/run.sh | 11 + .../aclnn/aclnnSoftmax/testcase.cpp | 129 +++ .../ApplicationDev/atb/CMakeLists.txt | 46 + .../atb/aclnn/aclnn_gelu_operation.cpp | 179 ++++ .../atb/aclnn/aclnn_gelu_operation.h | 32 + .../atb/aclnn/aclnn_operation_base.cpp | 116 +++ .../atb/aclnn/aclnn_operation_base.h | 57 ++ .../ApplicationDev/atb/ascend_test_atb.py | 7 + .../ApplicationDev/atb/atb/atb_graph_op.cpp | 59 ++ .../ApplicationDev/atb/atb/atb_graph_op.h | 15 + .../oec/resource/ApplicationDev/atb/main.cpp | 57 ++ .../ApplicationDev/atb/memory/memory_env.h | 12 + .../atb/memory/memory_utils.cpp | 63 ++ .../ApplicationDev/atb/memory/memory_utils.h | 38 + .../ApplicationDev/atb/memory/memorypool.cpp | 104 ++ .../ApplicationDev/atb/memory/memorypool.h | 38 + .../ApplicationDev/atb/model/model.cpp | 263 +++++ .../resource/ApplicationDev/atb/model/model.h | 122 +++ .../oec/resource/ApplicationDev/atb/run.sh | 33 + .../resource/ApplicationDev/atb/utils/log.cpp | 28 + .../resource/ApplicationDev/atb/utils/log.h | 94 ++ .../ApplicationDev/atb/utils/utils.cpp | 70 ++ .../resource/ApplicationDev/atb/utils/utils.h | 39 + .../base_function_test/ascend_test_pyacl.py | 16 + .../base_function_test/test_acl_device.py | 133 +++ .../base_function_test/test_acl_event.py | 328 +++++++ .../base_function_test/utils.py | 40 + .../media/jpeg_vpc_test/CMakeLists.txt | 9 + .../jpeg_vpc_test/ascend_test_jpeg_vpc.py | 21 + .../media/jpeg_vpc_test/inc/dvpp_process.h | 164 ++++ .../media/jpeg_vpc_test/inc/sample_process.h | 63 ++ .../media/jpeg_vpc_test/inc/utils.h | 128 +++ .../ApplicationDev/media/jpeg_vpc_test/run.sh | 11 + .../media/jpeg_vpc_test/src/CMakeLists.txt | 49 + .../media/jpeg_vpc_test/src/dvpp_process.cpp | 895 ++++++++++++++++++ .../media/jpeg_vpc_test/src/main.cpp | 68 ++ .../jpeg_vpc_test/src/sample_process.cpp | 218 +++++ .../media/jpeg_vpc_test/src/utils.cpp | 422 +++++++++ .../media/vdec_test/CMakeLists.txt | 9 + .../media/vdec_test/ascend_test_vdec.py | 8 + .../media/vdec_test/inc/dvpp_process.h | 100 ++ .../media/vdec_test/inc/sample_process.h | 77 ++ .../media/vdec_test/inc/utils.h | 133 +++ .../media/vdec_test/inc/vdec_process.h | 91 ++ .../ApplicationDev/media/vdec_test/run.sh | 10 + .../media/vdec_test/src/CMakeLists.txt | 52 + .../media/vdec_test/src/dvpp_process.cpp | 232 +++++ .../media/vdec_test/src/main.cpp | 43 + .../media/vdec_test/src/sample_process.cpp | 270 ++++++ .../media/vdec_test/src/utils.cpp | 355 +++++++ .../media/vdec_test/src/vdec_process.cpp | 314 ++++++ .../test_acl_op/ascend_test_pyacl.py | 8 + .../test_acl_op/test_acl_op.py | 389 ++++++++ .../pyacl_test_case/test_acl_op/utils.py | 40 + .../test_acl_vpc/ascend_test_pyacl.py | 8 + .../test_acl_vpc/test_acl_vpc.py | 310 ++++++ .../pyacl_test_case/test_acl_vpc/utils.py | 40 + .../resnet50_offline/ascend_test_resnet.py | 7 + .../resnet50_offline/resnet50.cpp | 327 +++++++ .../ApplicationDev/resnet50_offline/run.sh | 24 + .../resource/Environment/ascend_test_cann.py | 7 + .../Environment/ascend_test_dependency.py | 15 + .../resource/Environment/ascend_test_env.py | 146 +++ .../Environment/check_package_version.py | 393 ++++++++ .../oec/resource/Environment/dependency.sh | 91 ++ .../resource/Environment/get_cann_version.py | 12 + .../oec/resource/Environment/get_npu_info.py | 9 + .../Environment/install_and_check_cann.sh | 9 + .../Environment/install_cann_packages.sh | 83 ++ .../AddKernelInvocationNeo/CMakeLists.txt | 44 + .../AddKernelInvocationNeo/add_custom.cpp | 89 ++ .../ascend_test_kerneldev.py | 28 + .../cmake/cpu_lib.cmake | 9 + .../cmake/npu_lib.cmake | 11 + .../AddKernelInvocationNeo/data_utils.h | 203 ++++ .../KernelDev/AddKernelInvocationNeo/main.cpp | 82 ++ .../KernelDev/AddKernelInvocationNeo/run.sh | 132 +++ .../scripts/gen_data.py | 25 + .../scripts/verify_result.py | 53 ++ .../AddKernelmsOpGen/add_custom.json | 40 + .../AddKernelmsOpGen/ascend_test_kernel.py | 7 + .../KernelDev/AddKernelmsOpGen/run.sh | 5 + .../KernelDev/HelloWorld/CMakeLists.txt | 47 + .../HelloWorld/ascend_test_kernel.py | 13 + .../KernelDev/HelloWorld/hello_world.cpp | 10 + .../resource/KernelDev/HelloWorld/main.cpp | 24 + .../oec/resource/KernelDev/HelloWorld/run.sh | 44 + .../KernelDev/bisheng/QuickStartDemo.cce | 56 ++ .../KernelDev/bisheng/ascend_test_kernel.py | 7 + .../oec/resource/KernelDev/bisheng/build.sh | 18 + .../KernelDev/dsl_vabs/ascend_test_tbe.py | 8 + .../resource/KernelDev/dsl_vabs/dsl_vabs.py | 33 + .../KernelDev/tik_matmul/ascend_test_tbe.py | 8 + .../KernelDev/tik_matmul/tik_matmul.py | 174 ++++ .../ModelDev/AOETool/ascend_test_aoe.py | 10 + .../oec/resource/ModelDev/ATCTool/add.json | 25 + .../ModelDev/ATCTool/ascend_test_atc.py | 30 + .../oec/resource/ModelDev/ATCTool/conv2d.json | 43 + .../ModelDev/ATCTool/dynamic_shape.json | 29 + .../oec/resource/ModelDev/ATCTool/runatc.sh | 6 + .../ModelDev/HCCLTest/ascend_test_hccl.py | 33 + .../HCCLTest/check_package_version.py | 303 ++++++ oec-ascend/oec/resource/base_report.xlsx | Bin 0 -> 11560 bytes oec-ascend/oec/resource/test_sequence.py | 19 + oec-ascend/setup.py | 18 + 142 files changed, 11991 insertions(+) create mode 100644 oec-ascend/MANIFEST.in create mode 100644 oec-ascend/README.md create mode 100644 oec-ascend/oec/BaseTest.py create mode 100644 oec-ascend/oec/BaseTestCase.py create mode 100644 oec-ascend/oec/BaseTypes.py create mode 100644 oec-ascend/oec/TestContext.py create mode 100644 oec-ascend/oec/TestInterface.py create mode 100644 oec-ascend/oec/TestReport.py create mode 100644 oec-ascend/oec/TestUtils.py create mode 100644 oec-ascend/oec/__init__.py create mode 100644 oec-ascend/oec/main.py create mode 100644 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAdd/CMakeLists.txt create mode 100644 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAdd/ascend_test_nn.py create mode 100755 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAdd/run.sh create mode 100644 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAdd/testcase.cpp create mode 100644 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAddlayerNorm/CMakeLists.txt create mode 100644 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAddlayerNorm/ascend_test_nn.py create mode 100755 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAddlayerNorm/run.sh create mode 100644 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAddlayerNorm/testcase.cpp create mode 100644 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAllGatherMatmul/CMakeLists.txt create mode 100644 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAllGatherMatmul/ascend_test_nn.py create mode 100755 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAllGatherMatmul/run.sh create mode 100644 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAllGatherMatmul/testcase.cpp create mode 100644 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnFlashAttentionScore/CMakeLists.txt create mode 100644 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnFlashAttentionScore/ascend_test_nn.py create mode 100755 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnFlashAttentionScore/run.sh create mode 100644 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnFlashAttentionScore/testcase.cpp create mode 100644 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnMatmul/CMakeLists.txt create mode 100644 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnMatmul/ascend_test_nn.py create mode 100755 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnMatmul/run.sh create mode 100644 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnMatmul/testcase.cpp create mode 100644 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSilu/CMakeLists.txt create mode 100644 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSilu/ascend_test_nn.py create mode 100755 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSilu/run.sh create mode 100644 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSilu/testcase.cpp create mode 100644 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSoftmax/CMakeLists.txt create mode 100644 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSoftmax/ascend_test_nn.py create mode 100755 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSoftmax/run.sh create mode 100644 oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSoftmax/testcase.cpp create mode 100644 oec-ascend/oec/resource/ApplicationDev/atb/CMakeLists.txt create mode 100644 oec-ascend/oec/resource/ApplicationDev/atb/aclnn/aclnn_gelu_operation.cpp create mode 100644 oec-ascend/oec/resource/ApplicationDev/atb/aclnn/aclnn_gelu_operation.h create mode 100644 oec-ascend/oec/resource/ApplicationDev/atb/aclnn/aclnn_operation_base.cpp create mode 100644 oec-ascend/oec/resource/ApplicationDev/atb/aclnn/aclnn_operation_base.h create mode 100644 oec-ascend/oec/resource/ApplicationDev/atb/ascend_test_atb.py create mode 100644 oec-ascend/oec/resource/ApplicationDev/atb/atb/atb_graph_op.cpp create mode 100644 oec-ascend/oec/resource/ApplicationDev/atb/atb/atb_graph_op.h create mode 100644 oec-ascend/oec/resource/ApplicationDev/atb/main.cpp create mode 100644 oec-ascend/oec/resource/ApplicationDev/atb/memory/memory_env.h create mode 100644 oec-ascend/oec/resource/ApplicationDev/atb/memory/memory_utils.cpp create mode 100644 oec-ascend/oec/resource/ApplicationDev/atb/memory/memory_utils.h create mode 100644 oec-ascend/oec/resource/ApplicationDev/atb/memory/memorypool.cpp create mode 100644 oec-ascend/oec/resource/ApplicationDev/atb/memory/memorypool.h create mode 100644 oec-ascend/oec/resource/ApplicationDev/atb/model/model.cpp create mode 100644 oec-ascend/oec/resource/ApplicationDev/atb/model/model.h create mode 100755 oec-ascend/oec/resource/ApplicationDev/atb/run.sh create mode 100644 oec-ascend/oec/resource/ApplicationDev/atb/utils/log.cpp create mode 100644 oec-ascend/oec/resource/ApplicationDev/atb/utils/log.h create mode 100644 oec-ascend/oec/resource/ApplicationDev/atb/utils/utils.cpp create mode 100644 oec-ascend/oec/resource/ApplicationDev/atb/utils/utils.h create mode 100644 oec-ascend/oec/resource/ApplicationDev/base_function_test/ascend_test_pyacl.py create mode 100644 oec-ascend/oec/resource/ApplicationDev/base_function_test/test_acl_device.py create mode 100644 oec-ascend/oec/resource/ApplicationDev/base_function_test/test_acl_event.py create mode 100644 oec-ascend/oec/resource/ApplicationDev/base_function_test/utils.py create mode 100644 oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/CMakeLists.txt create mode 100644 oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/ascend_test_jpeg_vpc.py create mode 100644 oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/inc/dvpp_process.h create mode 100644 oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/inc/sample_process.h create mode 100644 oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/inc/utils.h create mode 100755 oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/run.sh create mode 100644 oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/src/CMakeLists.txt create mode 100644 oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/src/dvpp_process.cpp create mode 100644 oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/src/main.cpp create mode 100644 oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/src/sample_process.cpp create mode 100644 oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/src/utils.cpp create mode 100644 oec-ascend/oec/resource/ApplicationDev/media/vdec_test/CMakeLists.txt create mode 100644 oec-ascend/oec/resource/ApplicationDev/media/vdec_test/ascend_test_vdec.py create mode 100644 oec-ascend/oec/resource/ApplicationDev/media/vdec_test/inc/dvpp_process.h create mode 100644 oec-ascend/oec/resource/ApplicationDev/media/vdec_test/inc/sample_process.h create mode 100644 oec-ascend/oec/resource/ApplicationDev/media/vdec_test/inc/utils.h create mode 100644 oec-ascend/oec/resource/ApplicationDev/media/vdec_test/inc/vdec_process.h create mode 100755 oec-ascend/oec/resource/ApplicationDev/media/vdec_test/run.sh create mode 100644 oec-ascend/oec/resource/ApplicationDev/media/vdec_test/src/CMakeLists.txt create mode 100644 oec-ascend/oec/resource/ApplicationDev/media/vdec_test/src/dvpp_process.cpp create mode 100644 oec-ascend/oec/resource/ApplicationDev/media/vdec_test/src/main.cpp create mode 100644 oec-ascend/oec/resource/ApplicationDev/media/vdec_test/src/sample_process.cpp create mode 100644 oec-ascend/oec/resource/ApplicationDev/media/vdec_test/src/utils.cpp create mode 100644 oec-ascend/oec/resource/ApplicationDev/media/vdec_test/src/vdec_process.cpp create mode 100644 oec-ascend/oec/resource/ApplicationDev/pyacl_test_case/test_acl_op/ascend_test_pyacl.py create mode 100644 oec-ascend/oec/resource/ApplicationDev/pyacl_test_case/test_acl_op/test_acl_op.py create mode 100644 oec-ascend/oec/resource/ApplicationDev/pyacl_test_case/test_acl_op/utils.py create mode 100644 oec-ascend/oec/resource/ApplicationDev/pyacl_test_case/test_acl_vpc/ascend_test_pyacl.py create mode 100644 oec-ascend/oec/resource/ApplicationDev/pyacl_test_case/test_acl_vpc/test_acl_vpc.py create mode 100644 oec-ascend/oec/resource/ApplicationDev/pyacl_test_case/test_acl_vpc/utils.py create mode 100644 oec-ascend/oec/resource/ApplicationDev/resnet50_offline/ascend_test_resnet.py create mode 100644 oec-ascend/oec/resource/ApplicationDev/resnet50_offline/resnet50.cpp create mode 100755 oec-ascend/oec/resource/ApplicationDev/resnet50_offline/run.sh create mode 100644 oec-ascend/oec/resource/Environment/ascend_test_cann.py create mode 100644 oec-ascend/oec/resource/Environment/ascend_test_dependency.py create mode 100644 oec-ascend/oec/resource/Environment/ascend_test_env.py create mode 100644 oec-ascend/oec/resource/Environment/check_package_version.py create mode 100755 oec-ascend/oec/resource/Environment/dependency.sh create mode 100644 oec-ascend/oec/resource/Environment/get_cann_version.py create mode 100644 oec-ascend/oec/resource/Environment/get_npu_info.py create mode 100755 oec-ascend/oec/resource/Environment/install_and_check_cann.sh create mode 100755 oec-ascend/oec/resource/Environment/install_cann_packages.sh create mode 100644 oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/CMakeLists.txt create mode 100644 oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/add_custom.cpp create mode 100644 oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/ascend_test_kerneldev.py create mode 100644 oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/cmake/cpu_lib.cmake create mode 100644 oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/cmake/npu_lib.cmake create mode 100644 oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/data_utils.h create mode 100644 oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/main.cpp create mode 100755 oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/run.sh create mode 100644 oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/scripts/gen_data.py create mode 100644 oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/scripts/verify_result.py create mode 100644 oec-ascend/oec/resource/KernelDev/AddKernelmsOpGen/add_custom.json create mode 100644 oec-ascend/oec/resource/KernelDev/AddKernelmsOpGen/ascend_test_kernel.py create mode 100644 oec-ascend/oec/resource/KernelDev/AddKernelmsOpGen/run.sh create mode 100644 oec-ascend/oec/resource/KernelDev/HelloWorld/CMakeLists.txt create mode 100644 oec-ascend/oec/resource/KernelDev/HelloWorld/ascend_test_kernel.py create mode 100644 oec-ascend/oec/resource/KernelDev/HelloWorld/hello_world.cpp create mode 100644 oec-ascend/oec/resource/KernelDev/HelloWorld/main.cpp create mode 100755 oec-ascend/oec/resource/KernelDev/HelloWorld/run.sh create mode 100644 oec-ascend/oec/resource/KernelDev/bisheng/QuickStartDemo.cce create mode 100644 oec-ascend/oec/resource/KernelDev/bisheng/ascend_test_kernel.py create mode 100755 oec-ascend/oec/resource/KernelDev/bisheng/build.sh create mode 100644 oec-ascend/oec/resource/KernelDev/dsl_vabs/ascend_test_tbe.py create mode 100644 oec-ascend/oec/resource/KernelDev/dsl_vabs/dsl_vabs.py create mode 100644 oec-ascend/oec/resource/KernelDev/tik_matmul/ascend_test_tbe.py create mode 100644 oec-ascend/oec/resource/KernelDev/tik_matmul/tik_matmul.py create mode 100644 oec-ascend/oec/resource/ModelDev/AOETool/ascend_test_aoe.py create mode 100644 oec-ascend/oec/resource/ModelDev/ATCTool/add.json create mode 100644 oec-ascend/oec/resource/ModelDev/ATCTool/ascend_test_atc.py create mode 100644 oec-ascend/oec/resource/ModelDev/ATCTool/conv2d.json create mode 100644 oec-ascend/oec/resource/ModelDev/ATCTool/dynamic_shape.json create mode 100644 oec-ascend/oec/resource/ModelDev/ATCTool/runatc.sh create mode 100644 oec-ascend/oec/resource/ModelDev/HCCLTest/ascend_test_hccl.py create mode 100644 oec-ascend/oec/resource/ModelDev/HCCLTest/check_package_version.py create mode 100644 oec-ascend/oec/resource/base_report.xlsx create mode 100644 oec-ascend/oec/resource/test_sequence.py create mode 100644 oec-ascend/setup.py diff --git a/oec-ascend/MANIFEST.in b/oec-ascend/MANIFEST.in new file mode 100644 index 00000000..21b70bea --- /dev/null +++ b/oec-ascend/MANIFEST.in @@ -0,0 +1,5 @@ +# MANIFEST.in +recursive-include oec/resource * +recursive-include oec/data * +global-exclude __pycache__ +global-exclude *.pyc \ No newline at end of file diff --git a/oec-ascend/README.md b/oec-ascend/README.md new file mode 100644 index 00000000..f2871e62 --- /dev/null +++ b/oec-ascend/README.md @@ -0,0 +1,85 @@ +# oec-ascend + +## 介绍 + +oec-ascend (昇腾软件操作系统兼容性验证工具)包含一套基础测试用例,用于检查操作系统软件和昇腾软件之间的兼容性。 + +## 功能 + +- 运行环境检查 +- 应用开发检查 +- 算子开发检查 +- 模型开发检查 +- 集成测试验证 + +## 快速开始 + +### 安装 + +#### 通过预构建离线安装包安装 + +``` +wget https://ascend-cann-open.obs.cn-north-4.myhuaweicloud.com/cann-os/oec_ascend_compatibility-1.0-py3-none-any.whl +pip install oec_ascend_compatibility-1.0-py3-none-any.whl +``` + +#### 网络问题 + +如果wget遇到证书相关的错误,可以手动下载安装包上传安装,或者增加`--no-check-certificate`参数规避(**不建议绕过SSL**,请确认网络环境安全) + +下载链接:[oec_ascend_compatibility-1.0-py3-none-any.whl](https://ascend-cann-open.obs.cn-north-4.myhuaweicloud.com/cann-os/oec_ascend_compatibility-1.0-py3-none-any.whl) + +### 使用 + +#### 必要步骤 + +1. 准备一个安装有昇腾NPU硬件的环境。 +2. 访问 [昇腾社区资源中心](https://www.hiascend.com/developer/download/community) ,获取昇腾驱动固件和CANN软件安装包。 +3. 请参考 [昇腾社区文档](https://www.hiascend.com/document),安装昇腾驱动,固件软件。 +4. 请参考 [昇腾社区文档](https://www.hiascend.com/document),安装Ascend-cann-Toolkit,Ascend-cann-kernels,Ascend-cann-nnal等CANN软件。 +5. 安装后检查您的安装流程是否有遗漏,请务必确保CANN软件和驱动软件运行**所需要的依赖已经正确安装** + +#### 系统依赖 + +除了安装Ascend驱动固件,CANN软件包所必须安装的依赖外,您**还需要安装 cmake, g++** 用于支持部分测试用例在您的系统架构下自动构建。请参考您的操作系统的操作指南,安装这些第三方依赖 + +#### 安装ais_bench_net_test工具 + +ais_bench_net_test是用于HCCL相关用例测试的python工具,如果您不需要测试hccl可以不安装,相关用例将直接失败跳过 + +请参考[ais_bench_net_test工具安装与卸载文档](https://gitee.com/ascend/tools/tree/develop/ais-bench_workload/tool/net_test#%E5%B7%A5%E5%85%B7%E5%AE%89%E8%A3%85%E4%B8%8E%E5%8D%B8%E8%BD%BD) + +#### 准备测试资源 + +在**剩余空间充足**的硬盘上创建一个**空目录**,例如cann_test, +上传需要验证安装和卸载的toolkit,kernels,nnal的**run格式安装包**到该路径下,并给软件安装包**添加执行权限**,如果您不需要测试昇腾软件的安装卸载,可以不上传,相关用例将直接失败跳过 + +#### 运行 + +**注意**:请按照CANN软件安装指南中的说明配置当前环境安装的CANN的环境变量,如果不配置环境变量,默认使用usr/local/Ascend目录下的CANN软件包 + +```bash +oec-ascend +``` + +### 查看运行报告 + +工具会在运行目录下生成"output/<时间戳>"目录存放测试报告,日志和临时文件,请下载 output/<时间戳>/report.xlsx 文件到本地使用表格软件打开查看,时间戳为兼容性验证工具启动时的时间,可以在工具运行的打屏信息中找到报告的的生成路径。 + +其中: +**sheet1为整体功能模块通过率,sheet2为各个测试用例运行情况和测试内容存放路径** + +## 通过源码安装 + +``` +git clone https://gitee.com/ascend/tools.git +cd tools/oec-ascend/oec +# 下载解压资源包 +wget https://ascend-cann-open.obs.cn-north-4.myhuaweicloud.com/cann-os/data.tar +tar -xvf data.tar +#安装oec-ascend工具 +cd .. +pip install -e . +``` + +后续使用方式与快速开始章节中使用章节内容一致 diff --git a/oec-ascend/oec/BaseTest.py b/oec-ascend/oec/BaseTest.py new file mode 100644 index 00000000..5ea12473 --- /dev/null +++ b/oec-ascend/oec/BaseTest.py @@ -0,0 +1,185 @@ +# encoding: utf-8 +import os +import threading +import inspect +from typing import Tuple # 兼容python3.7 +from logging import getLogger +from oec.TestInterface import TestInterface +from oec.TestContext import TestContext +from oec.BaseTypes import State + +logger = getLogger("oec-ascend") + + +Context:TestContext = TestContext() + + +class BaseTest(TestInterface): + def __init__( + self, + group: Tuple[str, str], + name: str, + optional: bool = True, + auxiliary: bool = False, + cached: bool = True, + log_dir: str = "", + ): + self._context: TestContext = Context + self._name: str = name + self._group = group + self._optional: bool = optional + self._state: State = State.NOT_RUNNING + self._auxiliary: bool = auxiliary + self._cached: bool = cached + self._log_dir_path = log_dir if log_dir else self._context.get_log_dir() + self._lock = threading.Lock() + self._filename = None + self._lineno = None + for stack in inspect.stack()[1:]: + if stack.function != "__init__": + self._filename = stack.filename + self._lineno = stack.lineno + break + + if not name: + raise ValueError(self.message_with_path("name can not be empty.")) + self.context.add_test(self) + + @property + def group(self): + return self._group + + @property + def context(self): + return self._context + + def message_with_path(self, message): + return f"{message} File {self.get_origin_path()}:{self.get_origin_lineno()}" + + @property + def state(self): + return self._state + + def can_cached(self): + return self._cached + + def is_finished(self): + return self.state not in [State.NOT_RUNNING, State.RUNNING] + + def can_continue(self): + if self.is_passed(): + return True + + if self.is_failed() and self.is_optional(): + return True + + return False + + def is_failed(self): + if self.state in [State.FAIL, State.TIMEOUT, State.UNSUPPORTED]: + return True + return False + + def is_passed(self): + if self.state in [State.PASS, State.NOTHING_TO_DO, State.WARNING]: + return True + return False + + def set_reason(self, str): + raise NotImplementedError() + + def get_reason(self): + raise NotImplementedError() + + def get_log_dir_path(self): + if self._log_dir_path is None: + raise RuntimeError("log dir path is not set") + return self._log_dir_path + + def set_log_dir_path(self, path): + if not isinstance(path, str): + raise TypeError("The path must be a str") + self._log_dir_path = os.path.abspath(path) + + def get_origin_lineno(self): + return self._lineno + + def get_origin_path(self): + return self._filename + + def run(self): + self._lock.acquire() + if self.is_finished() and self.can_cached(): + logger.debug( + f"The test {self.name} has been completed, using cached results" + ) + return + self.set_state(State.NOT_RUNNING) + try: + self.execute_command() + except Exception as e: + self.set_state(State.FAIL) + self.set_reason(f"{e}") + if self.is_failed(): + logger.debug( + f"{self.name} is {self.state.value}, reason: {self.get_reason()}" + ) + self._lock.release() + + def execute_command(self): + raise NotImplementedError() + + def count(self): + return 1 + + def set_name(self, name): + if not isinstance(name, str): + raise TypeError("name must be a string") + self._name = name + + @property + def name(self): + return self._name + + def is_optional(self): + return self._optional + + def set_optional(self, optional: bool): + if not isinstance(optional, bool): + raise TypeError("optional must be bool type") + self._optional = True + + @property + def auxiliary(self): + return self._auxiliary + + def set_state_if_not_finished(self, state: State): + if not self.is_finished(): + self.set_state(state) + + def set_state(self, state: State): + if not isinstance(state, State): + raise TypeError("state must be of type State") + if self.auxiliary and state == State.FAIL: + state=State.WARNING + + self.context.distribution[self.state] -= 1 + self._state = state + self.context.distribution[state] += 1 + + def get_test_content(self): + return ( + self.get_relative_log_file_path() + if self.is_finished() + else "No information due to the previous error." + ) + + @property + def logger(self): + return logger + + def __str__(self): + return self.name + + def __repr__(self): + return str(self) diff --git a/oec-ascend/oec/BaseTestCase.py b/oec-ascend/oec/BaseTestCase.py new file mode 100644 index 00000000..0d79c1ab --- /dev/null +++ b/oec-ascend/oec/BaseTestCase.py @@ -0,0 +1,189 @@ +# encoding: utf-8 +import os +import subprocess +from logging import getLogger +import re +from typing import List # 兼容python3.7 +from oec.BaseTest import BaseTest +from oec.BaseTypes import State + +logger = getLogger("oec-ascend") + + +class TestCase(BaseTest): + def __init__( + self, + cmd: str = None, + include: List[str] = None, + exclude: List[str] =[r"\bfailed\b", r"\bFailed\b", r"\bFAILED\b", + r"\berror\b", r"\bERROR\b", r"\bError\b"], + expect: List[int] = [0], + unexpect: List[int] = None, + count=1, + cwd=None, + timeout=None, + *args, + **kwargs, + ): + super(TestCase, self).__init__(*args, **kwargs) + self._count = count + self._cmd = cmd + + self._include = include + self._exclude = exclude + self._expected_code = expect + self._unexpected_code = unexpect + self.__reason = None + self._log = "" + self._retrun_code = 0 + self._cwd = cwd + self._timeout = timeout + if isinstance(self._include, str): + self._include = [self._include] + if isinstance(self._exclude, str): + self._exclude = [self._exclude] + if isinstance(self._expected_code, int): + self._expected_code = [self._expected_code] + if isinstance(self._unexpected_code, int): + self._unexpected_code = [self._unexpected_code] + + logger.debug(f"test case{self.group[0]}.{self.group[1]}.{self.name} ") + + @property + def cwd(self): + return self._cwd + + def set_reason(self, reason: str): + if not isinstance(reason, str): + raise TypeError(f"reason must be a string") + self.__reason = reason + + def get_reason(self): + return self.__reason + + def get_include(self): + return self._include + + def get_exclude(self): + return self._exclude + + def get_expected_code(self): + return self._expected_code + + def get_unexpected_code(self): + return self._unexpected_code + + def get_relative_log_file_path(self): + return f"{self._context.relative_output}{os.sep}logs{os.sep}{self.name}.log" + + def get_log_file_path(self): + return os.path.join(self.get_log_dir_path(), f"{self.name}.log") + + def get_test_content(self): + return ( + self.get_relative_log_file_path() + if self.is_finished() + else "No information due to the previous error." + ) + + def execute_command_with_cmd(self, cmd): + if self.state != State.NOT_RUNNING: + return + if cmd is None: + self.set_state(State.NOTHING_TO_DO) + return + self.set_state(State.RUNNING) + log = None + return_code = None + with open(self.get_log_file_path(), "w+") as f: + + process = subprocess.Popen( + cmd, + env=self.context.env, + cwd=os.path.dirname(self.get_origin_path()) if self.cwd is None else self.cwd, + shell=True, + stdout=f, + stderr=subprocess.STDOUT, + text=True, + ) + try: + process.wait(self._timeout) + except subprocess.TimeoutExpired: + self.set_state(State.TIMEOUT) + f.seek(0) + log = f.read(-1) + return_code = process.returncode + self._retrun_code = return_code + self._log = log + + self.check_result(log, return_code) + return log, return_code + + def execute_command(self): + self.execute_command_with_cmd(self.get_cmd()) + + def get_cmd(self): + return self._cmd + + def count(self): + return self._count + + def get_doc(self): + pass + + @property + def log(self): + return self._log + + @property + def return_code(self): + return self._return_code + + def check_result(self, log: str, return_code): + logger.debug( + f'\n>> {self.name}{"(optional)" if self.is_optional() else ""} -> return {return_code} :\n File "{self.get_origin_path()}" :\n{log}' + ) + if self.is_finished(): + return + if self.get_include() is not None: + for pattern in self.get_include(): + result = re.search(pattern, log) + if result is None: + self.set_state(State.FAIL) + self.set_reason( + f"'{pattern}' was not found in the output of {self.name}, {self.get_log_file_path()}" + ) + return + + if self.get_exclude() is not None: + for pattern in self.get_exclude(): + result = re.search(pattern, log) + if result is not None: + self.set_state(State.FAIL) + span = result.span() + + lineno = log.count("\n", 0, span[0]) + 1 + position = log.rfind("\n", 0, span[0]) + position = span[0] - position + self.set_reason( + f"Find '{pattern}' in the output of {self.name}, {self.get_log_file_path()}:{lineno}:{position}" + ) + return + + if self.get_expected_code() is not None: + if return_code not in self.get_expected_code(): + self.set_state(State.FAIL) + self.set_reason( + f"Then return code {return_code} of {self.name} does not match any of {self.get_expected_code()}, {self.get_log_file_path()}" + ) + return + + if self.get_unexpected_code() is not None: + if return_code in self.get_unexpected_code(): + self.set_state(State.FAIL) + self.set_reason( + f"Then return code {return_code} of {self.name} matches {self.get_expected_code()}, {self.get_log_file_path()}" + ) + return + + self.set_state(State.PASS) diff --git a/oec-ascend/oec/BaseTypes.py b/oec-ascend/oec/BaseTypes.py new file mode 100644 index 00000000..9fae51db --- /dev/null +++ b/oec-ascend/oec/BaseTypes.py @@ -0,0 +1,16 @@ +# encoding: utf-8 +from enum import Enum, unique + + +@unique +class State(Enum): + NOT_RUNNING = "not running" + RUNNING = "running" + + NOTHING_TO_DO = "nothing to do" + PASS = "passed" + WARNING = "warning" + + UNSUPPORTED = "unsupported" + TIMEOUT = "timeout" + FAIL = "failed" diff --git a/oec-ascend/oec/TestContext.py b/oec-ascend/oec/TestContext.py new file mode 100644 index 00000000..6f7e5737 --- /dev/null +++ b/oec-ascend/oec/TestContext.py @@ -0,0 +1,199 @@ +# encoding: utf-8 +import os +import random +from datetime import datetime +import threading +from importlib import import_module +from oec.BaseTypes import State +from logging import getLogger +import pandas as pd + +from oec.TestInterface import TestInterface + +logger = getLogger("oec-ascend") + + +def make_log_dir(log_dir): + logger.info(f"log dir is {log_dir}") + logger.info(f"create log path {log_dir}") + os.makedirs(log_dir) + return log_dir + + +class TestContext(object): + + def __init__(self): + self._all_tests = {} + self._data_path = "" + self._cann_path = "" + self._work_path = "" + self._output_dir:str = "" + self._relative_output:str = "" + self._defaut_log_dir:str = "" + self._used_tests = {} + self._test_order = [] + self._infomation = {} + self._states_distribution = {} + self._env = os.environ.copy() + self.finished = False + for state in State: + self._states_distribution.setdefault(state, 0) + + def set_env(self,env): + self._env = env + + @property + def env(self): + return self._env + + def set_output(self,output:str): + relative_output = ( + f'{datetime.now().strftime("%Y%m%d-%H-%M-%S")}-{random.randint(100,999)}' + ) + output_path = os.path.join(output, relative_output) + log_dir = os.path.join(output_path, "logs") + make_log_dir(log_dir) + self._output_dir = output_path + self._relative_output = relative_output + self._defaut_log_dir = log_dir + + def set_work_path(self,work_path:str): + self._work_path = work_path + + @property + def work_path(self): + return self._work_path + + def set_cann_path(self,cann_path:str): + self._cann_path = cann_path + + @property + def cann_path(self): + return self._cann_path + + def set_data_path(self,data_path:str): + self._data_path = data_path + + @property + def data_path(self): + return self._data_path + + def get_state_distribution_str(self): + success = self.distribution[State.PASS] + self.distribution[State.NOTHING_TO_DO] + failed = ( + self.distribution[State.FAIL] + + self.distribution[State.TIMEOUT] + + self.distribution[State.UNSUPPORTED] + ) + + total = len(self.get_used_tests()) + ran = total - self.distribution[State.NOT_RUNNING] - self.distribution[State.RUNNING] + if total == 0: + return "wait for start" + + return ( + f"total {total}, running {self.distribution[State.RUNNING]}, not running {self.distribution[State.NOT_RUNNING]}, " + f"passed {success}, warning {self.distribution[State.WARNING]}, failed {self.distribution[State.FAIL]}, " + f"timeout {self.distribution[State.TIMEOUT]}.\n" + f"Completion rate {round(ran/total*100,2)}%, pass rate { 0 if ran==0 else round(success/ran*100,2)}%" + ) + + @property + def relative_output(self): + return self._relative_output + + @property + def distribution(self): + return self._states_distribution + + @property + def infomation(self): + return self._infomation + + @property + def output_dir(self): + return self._output_dir + + def get_output_dir(self): + return self.output_dir + + def get_log_dir(self): + return self._defaut_log_dir + + def set_log_dir(self, path: str): + self._defaut_log_dir = path + + def add_test(self, test: TestInterface): + if test.name in self._all_tests: + t2 = self._all_tests[test.name] + raise RuntimeError( + f'"{test.name}" in {test.get_origin_path()}:{test.get_origin_lineno()}' + f" has been used in {t2.get_origin_path()}:{t2.get_origin_lineno()}" + ) + self._all_tests[test.name] = test + + @property + def test_order(self): + return self._test_order + + def set_test_order(self, path): + if not os.path.exists(path): + logger.fatal(f"Can not find the path: {path}") + exit(6500) + tests = self.get_tests() + path = os.path.join(path, "test_sequence.py") + + test_sequence = None + try: + test_sequence_module = import_module("test_sequence") + test_sequence = test_sequence_module.test_sequence + except Exception as e: + logger.fatal(f"Errors were found in test_sequence.py, error: {e}") + exit(7000) + + logger.debug("test_sequence is:") + logger.debug(test_sequence) + tmp_dict = {} + for group in test_sequence: + tmp_dict.setdefault(group, []) + used_test = {} + order_list = [] + for name, test in tests.items(): + if test.group in tmp_dict: + tmp_dict[test.group].append(test) + used_test[test.name] = test + + for group, t in tmp_dict.items(): + if not t: + continue + if test_sequence[group]: + order_list.append(t) + else: + for test in t: + order_list.append([test]) + logger.debug(f"test sequence detials:") + logger.debug(order_list) + self._test_order = order_list + self._used_tests = used_test + + def run_tests(self): + self.distribution[State.NOT_RUNNING] = len(self.get_used_tests()) + order_list = self.test_order + for item in order_list: + threads = [] + for test in item: + t = threading.Thread(target=test.run, name=test.name) + t.start() + threads.append(t) + for t in threads: + t.join() + for test in item: + if not test.can_continue(): + return State.FAIL + return State.PASS + + def get_used_tests(self): + return self._used_tests + + def get_tests(self): + return self._all_tests diff --git a/oec-ascend/oec/TestInterface.py b/oec-ascend/oec/TestInterface.py new file mode 100644 index 00000000..dfa95284 --- /dev/null +++ b/oec-ascend/oec/TestInterface.py @@ -0,0 +1,34 @@ +# encoding: utf-8 +from oec.BaseTypes import State + + +class TestInterface: + @property + def name(self) -> str: + raise NotImplementedError() + + @property + def group(self): + raise NotImplementedError() + + def get_origin_path(self) -> str: + raise NotImplementedError() + + def get_origin_lineno(self) -> int: + raise NotImplementedError() + + def set_log_dir_path(self, path: str): + raise NotImplementedError() + + def get_log_dir_path(self) -> str: + raise NotImplementedError() + + @property + def state(self) -> State: + raise NotImplementedError() + + def can_continue(self) -> bool: + raise NotImplementedError() + + def run(self): + raise NotImplementedError() diff --git a/oec-ascend/oec/TestReport.py b/oec-ascend/oec/TestReport.py new file mode 100644 index 00000000..5b64e6a7 --- /dev/null +++ b/oec-ascend/oec/TestReport.py @@ -0,0 +1,111 @@ +# encoding: utf-8 + +import pandas as pd +import os +import openpyxl +from oec.TestContext import TestContext +from oec.BaseTestCase import TestCase +from logging import getLogger + +logger = getLogger("oec-ascend") + + +# path = "resource/report0.xlsx" +def gen_report(path: str, context: TestContext): + log_dir = context.get_output_dir() + src_path = os.path.join(path, "base_report.xlsx") + path = os.path.join(log_dir, "report.xlsx") + + # copy(src_path,path) + df = pd.read_excel(src_path, header=None) + + excel = openpyxl.load_workbook(src_path) + sheet_name = excel.sheetnames[0] + sheet = excel[sheet_name] + for item in sheet.merged_cells: + top_col, top_row, bottom_col, bottom_row = item.bounds + df.iloc[top_row - 1 : bottom_row, top_col - 1 : bottom_col] = ( + item.start_cell.value + ) + + # 环境信息 + dfe = df.iloc[1:7, 3:] + dfe.set_index([dfe.columns[0]], inplace=True) + dfe.columns = pd.Index(["value"]) + info = context.infomation + + logger.debug(info) + for key in dfe.index: + dfe.loc[key, "value"] = info.get(key, "") + logger.debug(dfe) + # 测试结果 + dft = df.iloc[8:] + dft.columns = dft.iloc[0] + dft = dft.iloc[1:] + + # 初始化字典 + class info: + def __init__(self): + self.passed = 0 + self.tests:list[TestCase] = [] + + def add(self, test: TestCase): + self.tests.append(test) + self.passed += test.count() if test.is_passed() else 0 + + @property + def total(self): + return sum([test.count() for test in self.tests]) + + dic = {} + for i in range(len(dft)): + dic.setdefault((dft.iat[i, 0], dft.iat[i, 1]), info()) + + # 设置索引 + dft.set_index([dft.columns[0], dft.columns[1]], inplace=True) + dft.sort_index() + # 统计测试用例信息 + for _, test in context.get_used_tests().items(): + dic.setdefault(test.group, info()) + inf = dic[test.group] + inf.add(test) + + details = pd.DataFrame( + columns=["兼容性测试", "检测项", "用例编号", "测试内容", "结论"] + ) + # 写入表格 + for k in dic: + inf = dic[k] + if inf.total != 0: + dft.loc[k, "测试结果"] = f"{round(inf.passed/inf.total*100,2)}%" + dft.loc[k, "结论"] = "PASS" if inf.passed == inf.total else "FAILED" + for test in inf.tests: + g1, g2 = k + details.loc[len(details)] = [ + g1, + g2, + test.name, + test.get_test_content(), + test.state.value, + ] + + # 保存excel + logger.debug(details) + logger.debug(df) + for i in range(1, 7): + sheet.cell(i + 1, 5, df.iat[i, 4]) + + for i in range(9, len(df)): + for j in range(3, len(df.columns)): + sheet.cell(i + 1, j + 1, df.iat[i, j]) + sheet2 = excel[excel.sheetnames[1]] + + for i in range(len(details)): + for j in range(len(details.columns)): + sheet2.cell(i + 2, j + 1, details.iat[i, j]) + excel.save(path) + + +if __name__ == "__main__": + context = TestContext(".") + gen_report("resource", context) diff --git a/oec-ascend/oec/TestUtils.py b/oec-ascend/oec/TestUtils.py new file mode 100644 index 00000000..55b26c92 --- /dev/null +++ b/oec-ascend/oec/TestUtils.py @@ -0,0 +1,85 @@ +# encoding: utf-8 +import os +import re +from oec import BaseTest,TestCase,State + +def merge_env_variables(env_output, var_list): + """ + 解析env命令输出,提取指定环境变量并与当前环境合并 + + 参数: + env_output (str): env命令输出的文本 + var_list (list): 需要提取的环境变量名称列表 + + 返回: + dict: 合并后的环境变量字典,适用于subprocess模块 + """ + # 创建当前环境变量的副本 + merged_env = os.environ.copy() + + # 解析env命令的输出 + extracted_env = {} + for line in env_output.strip().splitlines(): + # 跳过空行和不符合格式的行 + if '=' not in line: + continue + + # 分割变量名和值(只分割第一个等号) + parts = line.split('=', 1) + var_name = parts[0] + var_value = parts[1] if len(parts) > 1 else '' + + # 如果变量在目标列表中,则记录 + if var_name in var_list: + extracted_env[var_name] = var_value + + # 合并到环境变量副本中 + merged_env.update(extracted_env) + return merged_env + +class SetEnvTestCase(TestCase): + def execute_command(self): + super().execute_command() + if not self.is_passed(): + return + cann_envname = [ + 'ASCEND_TOOLKIT_HOME', + 'ASCEND_HOME_PATH', + 'ASCEND_AICPU_PATH', + 'ASCEND_OPP_PATH', + 'TOOLCHAIN_HOME', + 'LD_LIBRARY_PATH', + 'PYTHONPATH', + 'PATH', + ] + env = merge_env_variables(self.log,cann_envname) + self.context.set_env(env) + self.logger.debug(self.context.env) + self.set_state(State.PASS) + + +class ResetEnvTestCase(BaseTest): + def execute_command(self): + self.context.env = os.environ.copy() + self.set_state(State.PASS) + +class NPUTestCase(TestCase): + """ + 从Context.infomation中获取和替换cmd中 包围的信息,其中key为需要获取和替换的键,注意左尖括号前需要有白字符 + """ + def replace_cmd_with_info(self, cmd): + # 正则表达式匹配:空白字符 + + # \s 匹配任何空白字符,[\w]+ 匹配单词字符(字母、数字、下划线) + pattern = re.compile(r'(\s+)<([^\n\r<>]+)>') + + def replacer(match): + whitespace = match.group(1) # 前面的空白字符 + key = match.group(2) # xxx 部分 + return f'{whitespace}{self.context.infomation.get(key, f"<{key}>")}' # 如果 key 不存在,保留原样 + + new_cmd = pattern.sub(replacer, cmd) + return new_cmd + + def execute_command(self): + cmd:str = self.replace_cmd_with_info(self.get_cmd()) + self.execute_command_with_cmd(cmd) \ No newline at end of file diff --git a/oec-ascend/oec/__init__.py b/oec-ascend/oec/__init__.py new file mode 100644 index 00000000..24dfa12a --- /dev/null +++ b/oec-ascend/oec/__init__.py @@ -0,0 +1,4 @@ +from oec.BaseTypes import * +from oec.BaseTestCase import TestCase +from oec.BaseTest import BaseTest,Context +from oec.TestUtils import * \ No newline at end of file diff --git a/oec-ascend/oec/main.py b/oec-ascend/oec/main.py new file mode 100644 index 00000000..87cd9192 --- /dev/null +++ b/oec-ascend/oec/main.py @@ -0,0 +1,189 @@ +#!python +# encoding: utf-8 +import argparse +import logging +import os +import random +import sys +import time +import threading +from datetime import datetime +from importlib import import_module +from logging import getLogger + +from oec.TestContext import TestContext +from oec.BaseTest import Context + +from oec.TestReport import gen_report + +logger = getLogger("oec-ascend") + + +def init_logger(level=logging.INFO): + class ErrorFilter(logging.Filter): + def filter(self, record): + return record.levelno < logging.ERROR + + logger.setLevel(logging.DEBUG) + stdout = logging.StreamHandler(sys.stdout) + stdout.setLevel(level) + stdout.addFilter(ErrorFilter()) + logger.addHandler(stdout) + + stderr = logging.StreamHandler(sys.stderr) + stderr.setFormatter( + logging.Formatter("[%(levelname)s][%(pathname)s:%(lineno)d] %(message)s") + ) + stderr.setLevel(logging.ERROR) + logger.addHandler(stderr) + + +def argparse_handler(): + parser = argparse.ArgumentParser( + prog="oec-ascend", + description="Ascend Operating System Compatibility Verification Tool", + ) + + parser.add_argument( + "-c", + "--cann", + default="/usr/local/Ascend", + help="The root path for installing CANN is by default `/usr/local/Ascend`.", + ) + + parser.add_argument( + "-d", + "--data", + default=f"./data", + help="The path to the data file that is necessary during the run", + ) + + parser.add_argument( + "-o", + "--output", + type=str, + default="./output", + help="Director to save results and log output", + ) + + parser.add_argument( + "--verbose", action="store_true", default=False, help="print verbose output" + ) + + args = parser.parse_args() + return args + + +def find_ascend_test_in_dir(path: str): + logger.info(f"test case director is '{path}' loading...") + sys.path.append(path) + for ( + prefix, + dirs, + files, + ) in os.walk(path): + for file in files: + if file[:11] != "ascend_test" or file[-3:] != ".py": + continue + root = os.path.relpath(prefix, path) + + module_name = file[:-3] + module_path = ( + module_name + if root == "." + else ".".join(root.split(os.sep) + [module_name]) + ) + file_path = os.path.join(prefix, file) + logger.debug(f"import {module_path} path:{file_path}") + try: + module = import_module(module_path) + except Exception as e: + logger.error(f"import {module_path} error: {e}") + continue + + +def get_absolute_out_path(output): + output_path = os.path.abspath(output) + + return output_path + + + + + +def print_state(context: TestContext): + logger.info(context.get_state_distribution_str()) + dynamic = ["|", "/", "-", "\\"] + count = 0 + while not context.finished: + logger.info( + f"\033[2A\033[K{context.get_state_distribution_str()} {dynamic[count % len(dynamic)]}\033[K" + ) + count += 1 + time.sleep(0.2) + logger.info(f"\033[2A\033[K{context.get_state_distribution_str()}\033[K") + +def enable_ansi_windows(): + """在 Windows 上启用 ANSI 转义序列支持""" + if sys.platform == "win32": + import ctypes + kernel32 = ctypes.windll.kernel32 + kernel32.SetConsoleMode(kernel32.GetStdHandle(-11), 7) # 启用 VT100 模式 +def main(): + # cmd_args = argparse_handler() + verbose = False + output_dir = "./output" + data_dir = os.path.dirname(__file__) + "/data" + cann_dir = "/usr/local/Ascend" + work_dir = os.path.realpath("./") + + init_logger(logging.DEBUG if verbose else logging.INFO) + + # 如果source了环境变量则提取组合包安装路径 + ascend_home_path = os.environ.get('ASCEND_HOME_PATH') + if ascend_home_path is not None: + cann_dir = os.path.realpath(f"{ascend_home_path}/../..") + logger.info(f"Ascend install path is {cann_dir}") + + output = os.path.abspath(output_dir) + data_path = os.path.realpath(data_dir) + if not os.path.exists(data_path): + logger.fatal(f"{data_path} is not existing, please create it first!") + exit(1000) + cann_path = os.path.realpath(cann_dir) + if not os.path.exists(cann_path): + logger.fatal(f"{cann_path} is not existing, please install CANN first!") + exit(2000) + Context.set_data_path(data_path) + Context.set_cann_path(cann_path) + Context.set_output(output) + Context.set_work_path(work_dir) + resource =f"{os.path.dirname(__file__)}/resource" + resource = os.path.realpath(resource) + + find_ascend_test_in_dir(resource) + + Context.set_test_order(resource) + logger.info( + f"Find {len(Context.get_tests())} test cases, using {len(Context.get_used_tests())} test cases." + ) + state_monitor = threading.Thread( + name="state_monitor", target=print_state, args=[Context] + ) + if not verbose: + Context.finished = False + enable_ansi_windows() + state_monitor.start() + result = Context.run_tests() + if not verbose: + Context.finished = True + state_monitor.join() + + logger.info(f"Complete!") + + gen_report(resource, Context) + logger.info(f"Generate an execution report with the path {Context.get_output_dir()}") + + +if __name__ == "__main__": + main() diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAdd/CMakeLists.txt b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAdd/CMakeLists.txt new file mode 100644 index 00000000..c129437b --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAdd/CMakeLists.txt @@ -0,0 +1,40 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.14) + +# 设置工程名 +project(ACLNN_EXAMPLE) + +# Compile options +add_compile_options(-std=c++11) + +# 设置编译选项 +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "./bin") +set(CMAKE_CXX_FLAGS_DEBUG "-fPIC -O0 -g -Wall") +set(CMAKE_CXX_FLAGS_RELEASE "-fPIC -O2 -Wall") + +# 设置可执行文件名(如opapi_test),并指定待运行算子文件*.cpp所在目录 +add_executable(testcase + testcase.cpp) + +# 设置ASCEND_PATH(CANN软件包目录,请根据实际路径修改)和INCLUDE_BASE_DIR(头文件目录) +if(NOT "$ENV{ASCEND_HOME_PATH}" STREQUAL "") + set(ASCEND_PATH $ENV{ASCEND_HOME_PATH}) +else() + set(ASCEND_PATH "/usr/local/Ascend/ascend-toolkit/latest") +endif() +set(INCLUDE_BASE_DIR "${ASCEND_PATH}/include") +include_directories( + ${INCLUDE_BASE_DIR} + ${INCLUDE_BASE_DIR}/aclnn +) + +# 设置链接的库文件路径 +target_link_libraries(testcase PRIVATE + ${ASCEND_PATH}/lib64/libascendcl.so + ${ASCEND_PATH}/lib64/libnnopbase.so + ${ASCEND_PATH}/lib64/libopapi.so) + +# 可执行文件在CMakeLists文件所在目录的bin目录下 +install(TARGETS testcase DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAdd/ascend_test_nn.py b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAdd/ascend_test_nn.py new file mode 100644 index 00000000..e16b24b7 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAdd/ascend_test_nn.py @@ -0,0 +1,8 @@ +# encoding: utf-8 +import oec + +oec.TestCase( + group=("应用开发", "算子加速库"), + name="ACLNN_ADD", + cmd=f"./run.sh {oec.Context.output_dir}/tmp/aclnn" +) diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAdd/run.sh b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAdd/run.sh new file mode 100755 index 00000000..009860d5 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAdd/run.sh @@ -0,0 +1,11 @@ +output_path="$1/aclnnAdd" +src_path=$(pwd) +mkdir -p "$output_path" +cd "${output_path}" +cmake "$src_path" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_SKIP_RPATH=TRUE +make + +cd bin +./testcase +echo $? +exit $? \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAdd/testcase.cpp b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAdd/testcase.cpp new file mode 100644 index 00000000..95f728b7 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAdd/testcase.cpp @@ -0,0 +1,175 @@ +#include +#include +#include "acl/acl.h" +#include "aclnnop/aclnn_add.h" + +#define CHECK_RET(cond, return_expr) \ + do { \ + if (!(cond)) { \ + return_expr; \ + } \ + } while (0) + +#define LOG_PRINT(message, ...) \ + do { \ + printf(message, ##__VA_ARGS__); \ + } while (0) + +int64_t GetShapeSize(const std::vector& shape) { + int64_t shapeSize = 1; + for (auto i : shape) { + shapeSize *= i; + } + return shapeSize; +} + +int Init(int32_t deviceId, aclrtStream* stream) { + // 固定写法,AscendCL初始化 + auto ret = aclInit(nullptr); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclInit failed. ERROR: %d\n", ret); return ret); + ret = aclrtSetDevice(deviceId); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSetDevice failed. ERROR: %d\n", ret); return ret); + ret = aclrtCreateStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtCreateStream failed. ERROR: %d\n", ret); return ret); + return 0; +} + +template +int CreateAclTensor(const std::vector& hostData, const std::vector& shape, void** deviceAddr, + aclDataType dataType, aclTensor** tensor) { + auto size = GetShapeSize(shape) * sizeof(T); + // 调用aclrtMalloc申请device侧内存 + auto ret = aclrtMalloc(deviceAddr, size, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMalloc failed. ERROR: %d\n", ret); return ret); + // 调用aclrtMemcpy将host侧数据拷贝到device侧内存上 + ret = aclrtMemcpy(*deviceAddr, size, hostData.data(), size, ACL_MEMCPY_HOST_TO_DEVICE); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMemcpy failed. ERROR: %d\n", ret); return ret); + + // 计算连续tensor的strides + std::vector strides(shape.size(), 1); + for (int64_t i = shape.size() - 2; i >= 0; i--) { + strides[i] = shape[i + 1] * strides[i + 1]; + } + + // 调用aclCreateTensor接口创建aclTensor + *tensor = aclCreateTensor(shape.data(), shape.size(), dataType, strides.data(), 0, aclFormat::ACL_FORMAT_ND, + shape.data(), shape.size(), *deviceAddr); + return 0; +} + +int main() { + // 1. (固定写法)device/stream初始化,参考AscendCL对外接口列表 + // 根据自己的实际device填写deviceId + int32_t deviceId = 0; + aclrtStream stream; + auto ret = Init(deviceId, &stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("Init acl failed. ERROR: %d\n", ret); return ret); + + // 2. 构造输入与输出,需要根据API的接口自定义构造 + std::vector selfShape = {4, 2}; + std::vector otherShape = {4, 2}; + std::vector outShape = {4, 2}; + void* selfDeviceAddr = nullptr; + void* otherDeviceAddr = nullptr; + void* outDeviceAddr = nullptr; + aclTensor* self = nullptr; + aclTensor* other = nullptr; + aclScalar* alpha = nullptr; + aclTensor* out = nullptr; + std::vector selfHostData = {0, 1, 2, 3, 4, 5, 6, 7}; + std::vector otherHostData = {1, 1, 1, 2, 2, 2, 3, 3}; + std::vector outHostData(8, 0); + float alphaValue = 1.2f; + // 创建self aclTensor + ret = CreateAclTensor(selfHostData, selfShape, &selfDeviceAddr, aclDataType::ACL_FLOAT, &self); + CHECK_RET(ret == ACL_SUCCESS, return ret); + // 创建other aclTensor + ret = CreateAclTensor(otherHostData, otherShape, &otherDeviceAddr, aclDataType::ACL_FLOAT, &other); + CHECK_RET(ret == ACL_SUCCESS, return ret); + // 创建alpha aclScalar + alpha = aclCreateScalar(&alphaValue, aclDataType::ACL_FLOAT); + CHECK_RET(alpha != nullptr, return ret); + // 创建out aclTensor + ret = CreateAclTensor(outHostData, outShape, &outDeviceAddr, aclDataType::ACL_FLOAT, &out); + CHECK_RET(ret == ACL_SUCCESS, return ret); + + uint64_t workspaceSize = 0; + aclOpExecutor* executor; + + // aclnnAdd接口调用示例 + // 3. 调用CANN算子库API + // 调用aclnnAdd第一段接口 + ret = aclnnAddGetWorkspaceSize(self, other, alpha, out, &workspaceSize, &executor); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnAddGetWorkspaceSize failed. ERROR: %d\n", ret); return ret); + // 根据第一段接口计算出的workspaceSize申请device内存 + void* workspaceAddr = nullptr; + if (workspaceSize > 0) { + ret = aclrtMalloc(&workspaceAddr, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("allocate workspace failed. ERROR: %d\n", ret); return ret); + } + // 调用aclnnAdd第二段接口 + ret = aclnnAdd(workspaceAddr, workspaceSize, executor, stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnAdd failed. ERROR: %d\n", ret); return ret); + + // 4. (固定写法)同步等待任务执行结束 + ret = aclrtSynchronizeStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSynchronizeStream failed. ERROR: %d\n", ret); return ret); + + // 5. 获取输出的值,将device侧内存上的结果拷贝至host侧,需要根据具体API的接口定义修改 + auto size = GetShapeSize(outShape); + std::vector resultData(size, 0); + ret = aclrtMemcpy(resultData.data(), resultData.size() * sizeof(resultData[0]), outDeviceAddr, + size * sizeof(resultData[0]), ACL_MEMCPY_DEVICE_TO_HOST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("copy result from device to host failed. ERROR: %d\n", ret); return ret); + for (int64_t i = 0; i < size; i++) { + LOG_PRINT("result[%ld] is: %f\n", i, resultData[i]); + } + + + // aclnnInplaceAdd接口调用示例 + // 3. 调用CANN算子库API + LOG_PRINT("\ntest aclnnInplaceAdd\n"); + // 调用aclnnInplaceAdd第一段接口 + ret = aclnnInplaceAddGetWorkspaceSize(self, other, alpha, &workspaceSize, &executor); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnInplaceAddGetWorkspaceSize failed. ERROR: %d\n", ret); return ret); + // 根据第一段接口计算出的workspaceSize申请device内存 + if (workspaceSize > 0) { + ret = aclrtMalloc(&workspaceAddr, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("allocate workspace failed. ERROR: %d\n", ret); return ret); + } + // 调用aclnnInplaceAdd第二段接口 + ret = aclnnInplaceAdd(workspaceAddr, workspaceSize, executor, stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnInplaceAdd failed. ERROR: %d\n", ret); return ret); + + // 4. (固定写法)同步等待任务执行结束 + ret = aclrtSynchronizeStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSynchronizeStream failed. ERROR: %d\n", ret); return ret); + + // 5. 获取输出的值,将device侧内存上的结果拷贝至host侧,需要根据具体API的接口定义修改 + ret = aclrtMemcpy(resultData.data(), resultData.size() * sizeof(resultData[0]), selfDeviceAddr, + size * sizeof(resultData[0]), ACL_MEMCPY_DEVICE_TO_HOST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("copy result from device to host failed. ERROR: %d\n", ret); return ret); + for (int64_t i = 0; i < size; i++) { + LOG_PRINT("result[%ld] is: %f\n", i, resultData[i]); + } + + + // 6. 释放aclTensor和aclScalar,需要根据具体API的接口定义修改 + aclDestroyTensor(self); + aclDestroyTensor(other); + aclDestroyScalar(alpha); + aclDestroyTensor(out); + + // 7. 释放Device资源,需要根据具体API的接口定义修改 + aclrtFree(selfDeviceAddr); + aclrtFree(otherDeviceAddr); + aclrtFree(outDeviceAddr); + if (workspaceSize > 0) { + aclrtFree(workspaceAddr); + } + aclrtDestroyStream(stream); + aclrtResetDevice(deviceId); + aclFinalize(); + + return 0; +} \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAddlayerNorm/CMakeLists.txt b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAddlayerNorm/CMakeLists.txt new file mode 100644 index 00000000..c129437b --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAddlayerNorm/CMakeLists.txt @@ -0,0 +1,40 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.14) + +# 设置工程名 +project(ACLNN_EXAMPLE) + +# Compile options +add_compile_options(-std=c++11) + +# 设置编译选项 +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "./bin") +set(CMAKE_CXX_FLAGS_DEBUG "-fPIC -O0 -g -Wall") +set(CMAKE_CXX_FLAGS_RELEASE "-fPIC -O2 -Wall") + +# 设置可执行文件名(如opapi_test),并指定待运行算子文件*.cpp所在目录 +add_executable(testcase + testcase.cpp) + +# 设置ASCEND_PATH(CANN软件包目录,请根据实际路径修改)和INCLUDE_BASE_DIR(头文件目录) +if(NOT "$ENV{ASCEND_HOME_PATH}" STREQUAL "") + set(ASCEND_PATH $ENV{ASCEND_HOME_PATH}) +else() + set(ASCEND_PATH "/usr/local/Ascend/ascend-toolkit/latest") +endif() +set(INCLUDE_BASE_DIR "${ASCEND_PATH}/include") +include_directories( + ${INCLUDE_BASE_DIR} + ${INCLUDE_BASE_DIR}/aclnn +) + +# 设置链接的库文件路径 +target_link_libraries(testcase PRIVATE + ${ASCEND_PATH}/lib64/libascendcl.so + ${ASCEND_PATH}/lib64/libnnopbase.so + ${ASCEND_PATH}/lib64/libopapi.so) + +# 可执行文件在CMakeLists文件所在目录的bin目录下 +install(TARGETS testcase DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAddlayerNorm/ascend_test_nn.py b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAddlayerNorm/ascend_test_nn.py new file mode 100644 index 00000000..1bd694b5 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAddlayerNorm/ascend_test_nn.py @@ -0,0 +1,11 @@ +#encoding: utf-8 +import oec + +oec.TestCase( + group= ("应用开发","算子加速库"), + + name = "ACLNN_ADDLAYERNORM", + cmd=f"./run.sh {oec.Context.output_dir}/tmp/aclnn" + ) + + diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAddlayerNorm/run.sh b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAddlayerNorm/run.sh new file mode 100755 index 00000000..fe92c5db --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAddlayerNorm/run.sh @@ -0,0 +1,11 @@ +output_path="$1/aclnnAddlayerNorm" +src_path=$(pwd) +mkdir -p "$output_path" +cd "${output_path}" +cmake "$src_path" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_SKIP_RPATH=TRUE +make + +cd bin +./testcase +echo $? +exit $? \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAddlayerNorm/testcase.cpp b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAddlayerNorm/testcase.cpp new file mode 100644 index 00000000..6c1856e9 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAddlayerNorm/testcase.cpp @@ -0,0 +1,129 @@ +#include +#include +#include "acl/acl.h" +#include "aclnnop/aclnn_softmax.h" + +#define CHECK_RET(cond, return_expr) \ + do { \ + if (!(cond)) { \ + return_expr; \ + } \ + } while (0) + +#define LOG_PRINT(message, ...) \ + do { \ + printf(message, ##__VA_ARGS__); \ + } while (0) + +int64_t GetShapeSize(const std::vector& shape) { + int64_t shapeSize = 1; + for (auto i : shape) { + shapeSize *= i; + } + return shapeSize; +} + +int Init(int32_t deviceId, aclrtStream* stream) { + // 固定写法,AscendCL初始化 + auto ret = aclInit(nullptr); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclInit failed. ERROR: %d\n", ret); return ret); + ret = aclrtSetDevice(deviceId); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSetDevice failed. ERROR: %d\n", ret); return ret); + ret = aclrtCreateStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtCreateStream failed. ERROR: %d\n", ret); return ret); + return 0; +} + +template +int CreateAclTensor(const std::vector& hostData, const std::vector& shape, void** deviceAddr, + aclDataType dataType, aclTensor** tensor) { + auto size = GetShapeSize(shape) * sizeof(T); + // 调用aclrtMalloc申请device侧内存 + auto ret = aclrtMalloc(deviceAddr, size, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMalloc failed. ERROR: %d\n", ret); return ret); + // 调用aclrtMemcpy将host侧数据拷贝到device侧内存上 + ret = aclrtMemcpy(*deviceAddr, size, hostData.data(), size, ACL_MEMCPY_HOST_TO_DEVICE); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMemcpy failed. ERROR: %d\n", ret); return ret); + + // 计算连续tensor的strides + std::vector strides(shape.size(), 1); + for (int64_t i = shape.size() - 2; i >= 0; i--) { + strides[i] = shape[i + 1] * strides[i + 1]; + } + + // 调用aclCreateTensor接口创建aclTensor + *tensor = aclCreateTensor(shape.data(), shape.size(), dataType, strides.data(), 0, aclFormat::ACL_FORMAT_ND, + shape.data(), shape.size(), *deviceAddr); + return 0; +} + +int main() { + // 1. (固定写法)device/stream初始化,参考AscendCL对外接口列表 + // 根据自己的实际device填写deviceId + int32_t deviceId = 0; + aclrtStream stream; + auto ret = Init(deviceId, &stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("Init acl failed. ERROR: %d\n", ret); return ret); + + // 2. 构造输入与输出,需要根据API的接口自定义构造 + std::vector selfShape = {4, 2}; + std::vector outShape = {4, 2}; + void* selfDeviceAddr = nullptr; + void* outDeviceAddr = nullptr; + aclTensor* self = nullptr; + aclTensor* out = nullptr; + std::vector selfHostData = {0, 1, 2, 3, 4, 5, 6, 7}; + std::vector outHostData = {0, 0, 0, 0, 0, 0, 0, 0}; + // 创建self aclTensor + ret = CreateAclTensor(selfHostData, selfShape, &selfDeviceAddr, aclDataType::ACL_FLOAT, &self); + CHECK_RET(ret == ACL_SUCCESS, return ret); + // 创建out aclTensor + ret = CreateAclTensor(outHostData, outShape, &outDeviceAddr, aclDataType::ACL_FLOAT, &out); + CHECK_RET(ret == ACL_SUCCESS, return ret); + + // 3. 调用CANN算子库API,需要修改为具体的Api名称 + uint64_t workspaceSize = 0; + aclOpExecutor* executor; + int64_t dim = 0; + // 调用aclnnSoftmax第一段接口 + ret = aclnnSoftmaxGetWorkspaceSize(self, dim, out, &workspaceSize, &executor); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnSoftmaxGetWorkspaceSize failed. ERROR: %d\n", ret); return ret); + // 根据第一段接口计算出的workspaceSize申请device内存 + void* workspaceAddr = nullptr; + if (workspaceSize > 0) { + ret = aclrtMalloc(&workspaceAddr, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("allocate workspace failed. ERROR: %d\n", ret); return ret); + } + // 调用aclnnSoftmax第二段接口 + ret = aclnnSoftmax(workspaceAddr, workspaceSize, executor, stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnSoftmax failed. ERROR: %d\n", ret); return ret); + + // 4. (固定写法)同步等待任务执行结束 + ret = aclrtSynchronizeStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSynchronizeStream failed. ERROR: %d\n", ret); return ret); + + // 5. 获取输出的值,将device侧内存上的结果拷贝至host侧,需要根据具体API的接口定义修改 + auto size = GetShapeSize(outShape); + std::vector resultData(size, 0); + ret = aclrtMemcpy(resultData.data(), resultData.size() * sizeof(resultData[0]), outDeviceAddr, + size * sizeof(resultData[0]), ACL_MEMCPY_DEVICE_TO_HOST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("copy result from device to host failed. ERROR: %d\n", ret); return ret); + for (int64_t i = 0; i < size; i++) { + LOG_PRINT("result[%ld] is: %f\n", i, resultData[i]); + } + + // 6. 释放aclTensor和aclScalar,需要根据具体API的接口定义修改 + aclDestroyTensor(self); + aclDestroyTensor(out); + + // 7. 释放device 资源 + aclrtFree(selfDeviceAddr); + aclrtFree(outDeviceAddr); + if (workspaceSize > 0) { + aclrtFree(workspaceAddr); + } + aclrtDestroyStream(stream); + aclrtResetDevice(deviceId); + aclFinalize(); + return 0; +} \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAllGatherMatmul/CMakeLists.txt b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAllGatherMatmul/CMakeLists.txt new file mode 100644 index 00000000..ee714c32 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAllGatherMatmul/CMakeLists.txt @@ -0,0 +1,49 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.14) + +# 设置工程名 +project(ACLNN_EXAMPLE) + +# Compile options +add_compile_options(-std=c++11) + +# 设置编译选项 +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "./bin") +set(CMAKE_CXX_FLAGS_DEBUG "-fPIC -O0 -g -Wall") +set(CMAKE_CXX_FLAGS_RELEASE "-fPIC -O2 -Wall") + +# 设置可执行文件名(如opapi_test),并指定待运行算子文件*.cpp所在目录 +add_executable(testcase + testcase.cpp) +if(DEFINED DEV_NUM) + target_compile_definitions(testcase PRIVATE DEV_NUM=${DEV_NUM}) +endif() +# 设置ASCEND_PATH(CANN软件包目录,请根据实际路径修改)和INCLUDE_BASE_DIR(头文件目录) +if(NOT "$ENV{ASCEND_HOME_PATH}" STREQUAL "") + set(ASCEND_PATH $ENV{ASCEND_HOME_PATH}) +else() + set(ASCEND_PATH "/usr/local/Ascend/ascend-toolkit/latest") +endif() +set(INCLUDE_BASE_DIR "${ASCEND_PATH}/include") +include_directories( + ${INCLUDE_BASE_DIR} + ${INCLUDE_BASE_DIR}/aclnn +) + +# 设置链接的库文件路径 +target_link_libraries(testcase PRIVATE + ${ASCEND_PATH}/lib64/libascendcl.so + ${ASCEND_PATH}/lib64/libnnopbase.so + ${ASCEND_PATH}/lib64/libopapi.so) +# 设置链接的库文件路径 +find_package(Threads REQUIRED) +target_link_libraries(testcase PRIVATE + ${ASCEND_PATH}/lib64/libascendcl.so + ${ASCEND_PATH}/lib64/libnnopbase.so + ${ASCEND_PATH}/lib64/libopapi.so + ${ASCEND_PATH}/lib64/libhccl.so # 集合通信库文件 + pthread) # 多线程依赖的库文件 +# 可执行文件在CMakeLists文件所在目录的bin目录下 +install(TARGETS testcase DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAllGatherMatmul/ascend_test_nn.py b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAllGatherMatmul/ascend_test_nn.py new file mode 100644 index 00000000..5b7f911c --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAllGatherMatmul/ascend_test_nn.py @@ -0,0 +1,12 @@ +#encoding: utf-8 +import oec + +oec.NPUTestCase( + group= ("应用开发","算子加速库"), + + name = "ACLNN_ALLGATHERMATMUL", + cmd=f"./run.sh {oec.Context.output_dir}/tmp/aclnn ", + exclude=[r"\berror\b", r"\bERROR\b", r"\bError\b"] + ) + + diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAllGatherMatmul/run.sh b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAllGatherMatmul/run.sh new file mode 100755 index 00000000..17bb5302 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAllGatherMatmul/run.sh @@ -0,0 +1,13 @@ +output_path="$1/aclnnAllGatherMatmul" +device_num=$2 +src_path=$(pwd) +mkdir -p "$output_path" +cd "${output_path}" +#device_num 是环境上npu的数量,当前测试在A2上需要为2,4,8时才能运行成功 +cmake "$src_path" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_SKIP_RPATH=TRUE -DDEV_NUM=${device_num} +make + +cd bin +./testcase +echo $? +exit $? \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAllGatherMatmul/testcase.cpp b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAllGatherMatmul/testcase.cpp new file mode 100644 index 00000000..104f7821 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnAllGatherMatmul/testcase.cpp @@ -0,0 +1,207 @@ +#include +#include +#include +#include "aclnnop/aclnn_all_gather_matmul.h" + +#define CHECK_RET(cond, return_expr) \ + do { \ + if (!(cond)) { \ + return_expr; \ + } \ + } while (0) + +#define LOG_PRINT(message, ...) \ + do { \ + printf(message, ##__VA_ARGS__); \ + } while(0) + +#ifndef DEV_NUM +#warning "DEVNUM is not defined, default is 8" +#define DEV_NUM 8 +#endif + +int64_t GetShapeSize(const std::vector &shape) +{ + int64_t shape_size = 1; + for (auto i : shape) { + shape_size *= i; + } + return shape_size; +} + +template +int CreateAclTensor(const std::vector &hostData, const std::vector &shape, void **deviceAddr, + aclDataType dataType, aclTensor **tensor) +{ + auto size = GetShapeSize(shape) * sizeof(T); + auto ret = aclrtMalloc(deviceAddr, size, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("[ERROR] aclrtMalloc failed. ret: %d\n", ret); return ret); + ret = aclrtMemcpy(*deviceAddr, size, hostData.data(), size, ACL_MEMCPY_HOST_TO_DEVICE); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("[ERROR] aclrtMemcpy failed. ret: %d\n", ret); return ret); + std::vector strides(shape.size(), 1); + for (int64_t i = shape.size() - 2; i >= 0; i--) { + strides[i] = shape[i +1] * strides[i + 1]; + } + *tensor = aclCreateTensor(shape.data(), shape.size(), dataType, strides.data(), 0, aclFormat::ACL_FORMAT_ND, + shape.data(), shape.size(), *deviceAddr); + return 0; +} + +struct Args { + int rankId; + HcclComm hcclComm; + aclrtStream stream; + }; + +int launchOneThread_AllGatherMm(Args &args) +{ + int ret = aclrtSetDevice(args.rankId); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("[ERROR] aclrtSetDevice failed. ret = %d \n", ret); return ret); + + char hcomName[128] = {0}; + ret = HcclGetCommName(args.hcclComm, hcomName); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("[ERROR] HcclGetCommName failed. ret: %d\n", ret); return -1); + LOG_PRINT("[INFO] rank = %d, hcomName = %s, stream = %p\n", args.rankId, hcomName, args.stream); + std::vector x1Shape = {128, 256}; + std::vector x2Shape = {256, 512}; + std::vector biasShape = {512}; + std::vector outShape = {128 * DEV_NUM, 512}; + std::vector gatherOutShape = {128 * DEV_NUM, 256}; + void *x1DeviceAddr = nullptr; + void *x2DeviceAddr = nullptr; + void *biasDeviceAddr = nullptr; + void *outDeviceAddr = nullptr; + void *gatherOutDeviceAddr = nullptr; + aclTensor *x1 = nullptr; + aclTensor *x2 = nullptr; + aclTensor *bias = nullptr; + aclTensor *out = nullptr; + aclTensor *gatherOut = nullptr; + + int64_t gatherIndex = 0; + int64_t commTurn = 0; + int64_t streamMode = 1; + uint64_t workspaceSize = 0; + aclOpExecutor *executor = nullptr; + void *workspaceAddr = nullptr; + + long long x1ShapeSize = GetShapeSize(x1Shape); + long long x2ShapeSize = GetShapeSize(x2Shape); + long long biasShapeSize = GetShapeSize(biasShape); + long long outShapeSize = GetShapeSize(outShape); + long long gatherOutShapeSize = GetShapeSize(gatherOutShape); + + std::vector x1HostData(x1ShapeSize, 0); + std::vector x2HostData(x2ShapeSize, 0); + std::vector biasHostData(biasShapeSize, 0); + std::vector outHostData(outShapeSize, 0); + std::vector gatherOutHostData(gatherOutShapeSize, 0); + + ret = CreateAclTensor(x1HostData, x1Shape, &x1DeviceAddr, aclDataType::ACL_FLOAT16, &x1); + CHECK_RET(ret == ACL_SUCCESS, return ret); + ret = CreateAclTensor(x2HostData, x2Shape, &x2DeviceAddr, aclDataType::ACL_FLOAT16, &x2); + CHECK_RET(ret == ACL_SUCCESS, return ret); + ret = CreateAclTensor(outHostData, outShape, &outDeviceAddr, aclDataType::ACL_FLOAT16, &out); + CHECK_RET(ret == ACL_SUCCESS, return ret); + ret = CreateAclTensor(gatherOutHostData, gatherOutShape, &gatherOutDeviceAddr, + aclDataType::ACL_FLOAT16, &gatherOut); + CHECK_RET(ret == ACL_SUCCESS, return ret); + + // 调用第一阶段接口 + ret = aclnnAllGatherMatmulGetWorkspaceSize( + x1, x2, bias, hcomName, gatherIndex, commTurn, streamMode, out, gatherOut, &workspaceSize, &executor); + CHECK_RET(ret == ACL_SUCCESS, + LOG_PRINT("[ERROR] aclnnAllGatherMatmulGetWorkspaceSize failed. ret = %d \n", ret); return ret); + // 根据第一阶段接口计算出的workspaceSize申请device内存 + if (workspaceSize > 0) { + ret = aclrtMalloc(&workspaceAddr, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("[ERROR] aclrtMalloc workspace failed. ret = %d \n", ret); return ret); + } + // 调用第二阶段接口 + ret = aclnnAllGatherMatmul(workspaceAddr, workspaceSize, executor, args.stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("[ERROR] aclnnAllGatherMatmul failed. ret = %d \n", ret); return ret); + // (固定写法)同步等待任务执行结束 + ret = aclrtSynchronizeStreamWithTimeout(args.stream, 10000); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("[ERROR] aclrtSynchronizeStreamWithTimeout failed. ret = %d \n", ret); + return ret); + LOG_PRINT("[INFO] device_%d aclnnAllGatherMatmul execute successfully.\n", args.rankId); + // 释放device资源,需要根据具体API的接口定义修改 + if (x1 != nullptr) { + aclDestroyTensor(x1); + } + if (x2 != nullptr) { + aclDestroyTensor(x2); + } + if (bias != nullptr) { + aclDestroyTensor(bias); + } + if (out != nullptr) { + aclDestroyTensor(out); + } + if (gatherOut != nullptr) { + aclDestroyTensor(gatherOut); + } + if (x1DeviceAddr != nullptr) { + aclrtFree(x1DeviceAddr); + } + if (x2DeviceAddr != nullptr) { + aclrtFree(x2DeviceAddr); + } + if (biasDeviceAddr != nullptr) { + aclrtFree(biasDeviceAddr); + } + if (outDeviceAddr != nullptr) { + aclrtFree(outDeviceAddr); + } + if (gatherOutDeviceAddr != nullptr) { + aclrtFree(gatherOutDeviceAddr); + } + if (workspaceSize > 0) { + aclrtFree(workspaceAddr); + } + ret = aclrtDestroyStream(args.stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("[ERROR] aclrtDestroyStream failed. ret = %d \n", ret); return ret); + ret = aclrtResetDevice(args.rankId); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("[ERROR] aclrtResetDevice failed. ret = %d \n", ret); return ret); + return 0; +} + +int main(int argc, char *argv[]) +{ + int ret = aclInit(nullptr); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("[ERROR] aclInit failed. ret = %d \n", ret); return ret); + aclrtStream stream[DEV_NUM]; + for (uint32_t rankId = 0; rankId < DEV_NUM; rankId++) { + ret = aclrtSetDevice(rankId); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("[ERROR] aclrtSetDevice failed. ret = %d \n", ret); return ret); + ret = aclrtCreateStream(&stream[rankId]); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("[ERROR] aclrtCreateStream failed. ret = %d \n", ret); return ret); + } + int32_t devices[DEV_NUM]; + for (int i = 0; i < DEV_NUM; i++) { + devices[i] = i; + } + // 初始化集合通信域 + HcclComm comms[DEV_NUM]; + ret = HcclCommInitAll(DEV_NUM, devices, comms); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("[ERROR] HcclCommInitAll failed. ret = %d \n", ret); return ret); + + Args args[DEV_NUM]; + // 启动多线程 + std::vector> threads(DEV_NUM); + for (uint32_t rankId = 0; rankId < DEV_NUM; rankId++) { + args[rankId].rankId = rankId; + args[rankId].hcclComm = comms[rankId]; + args[rankId].stream = stream[rankId]; + threads[rankId].reset(new(std::nothrow) std::thread(&launchOneThread_AllGatherMm, std::ref(args[rankId]))); + } + for (uint32_t rankId = 0; rankId < DEV_NUM; rankId++) { + threads[rankId]->join(); + } + for (int i = 0; i < DEV_NUM; i++) { + auto hcclRet = HcclCommDestroy(comms[i]); + CHECK_RET(hcclRet == HCCL_SUCCESS, LOG_PRINT("[ERROR] HcclCommDestory failed. ret = %d \n", ret); return -1); + } + aclFinalize(); + return 0; +} \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnFlashAttentionScore/CMakeLists.txt b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnFlashAttentionScore/CMakeLists.txt new file mode 100644 index 00000000..c129437b --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnFlashAttentionScore/CMakeLists.txt @@ -0,0 +1,40 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.14) + +# 设置工程名 +project(ACLNN_EXAMPLE) + +# Compile options +add_compile_options(-std=c++11) + +# 设置编译选项 +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "./bin") +set(CMAKE_CXX_FLAGS_DEBUG "-fPIC -O0 -g -Wall") +set(CMAKE_CXX_FLAGS_RELEASE "-fPIC -O2 -Wall") + +# 设置可执行文件名(如opapi_test),并指定待运行算子文件*.cpp所在目录 +add_executable(testcase + testcase.cpp) + +# 设置ASCEND_PATH(CANN软件包目录,请根据实际路径修改)和INCLUDE_BASE_DIR(头文件目录) +if(NOT "$ENV{ASCEND_HOME_PATH}" STREQUAL "") + set(ASCEND_PATH $ENV{ASCEND_HOME_PATH}) +else() + set(ASCEND_PATH "/usr/local/Ascend/ascend-toolkit/latest") +endif() +set(INCLUDE_BASE_DIR "${ASCEND_PATH}/include") +include_directories( + ${INCLUDE_BASE_DIR} + ${INCLUDE_BASE_DIR}/aclnn +) + +# 设置链接的库文件路径 +target_link_libraries(testcase PRIVATE + ${ASCEND_PATH}/lib64/libascendcl.so + ${ASCEND_PATH}/lib64/libnnopbase.so + ${ASCEND_PATH}/lib64/libopapi.so) + +# 可执行文件在CMakeLists文件所在目录的bin目录下 +install(TARGETS testcase DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnFlashAttentionScore/ascend_test_nn.py b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnFlashAttentionScore/ascend_test_nn.py new file mode 100644 index 00000000..599e3d48 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnFlashAttentionScore/ascend_test_nn.py @@ -0,0 +1,11 @@ +#encoding: utf-8 +import oec + +oec.TestCase( + group= ("应用开发","算子加速库"), + + name = "ACLNN_FLASHATTENTIONSCORE", + cmd=f"./run.sh {oec.Context.output_dir}/tmp/aclnn", + ) + + diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnFlashAttentionScore/run.sh b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnFlashAttentionScore/run.sh new file mode 100755 index 00000000..5dfbf703 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnFlashAttentionScore/run.sh @@ -0,0 +1,11 @@ +output_path="$1/aclnnFlashAttention" +src_path=$(pwd) +mkdir -p "$output_path" +cd "${output_path}" +cmake "$src_path" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_SKIP_RPATH=TRUE +make + +cd bin +./testcase +echo $? +exit $? \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnFlashAttentionScore/testcase.cpp b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnFlashAttentionScore/testcase.cpp new file mode 100644 index 00000000..41e4074f --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnFlashAttentionScore/testcase.cpp @@ -0,0 +1,206 @@ +#include +#include +#include "acl/acl.h" +#include "aclnnop/aclnn_flash_attention_score.h" + +#define CHECK_RET(cond, return_expr) \ + do { \ + if (!(cond)) { \ + return_expr; \ + } \ + } while (0) + +#define LOG_PRINT(message, ...) \ + do { \ + printf(message, ##__VA_ARGS__); \ + } while (0) + +int64_t GetShapeSize(const std::vector& shape) { + int64_t shapeSize = 1; + for (auto i : shape) { + shapeSize *= i; + } + return shapeSize; +} + +void PrintOutResult(std::vector &shape, void** deviceAddr) { + auto size = GetShapeSize(shape); + std::vector resultData(size, 0); + auto ret = aclrtMemcpy(resultData.data(), resultData.size() * sizeof(resultData[0]), + *deviceAddr, size * sizeof(resultData[0]), ACL_MEMCPY_DEVICE_TO_HOST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("copy result from device to host failed. ERROR: %d\n", ret); return); + for (int64_t i = 0; i < size; i++) { + LOG_PRINT("mean result[%ld] is: %f\n", i, resultData[i]); + } +} + +int Init(int32_t deviceId, aclrtContext* context, aclrtStream* stream) { + // 固定写法,AscendCL初始化 + auto ret = aclInit(nullptr); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclInit failed. ERROR: %d\n", ret); return ret); + ret = aclrtSetDevice(deviceId); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSetDevice failed. ERROR: %d\n", ret); return ret); + ret = aclrtCreateContext(context, deviceId); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtCreateContext failed. ERROR: %d\n", ret); return ret); + ret = aclrtSetCurrentContext(*context); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSetCurrentContext failed. ERROR: %d\n", ret); return ret); + ret = aclrtCreateStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtCreateStream failed. ERROR: %d\n", ret); return ret); + return 0; +} + +template +int CreateAclTensor(const std::vector& hostData, const std::vector& shape, void** deviceAddr, + aclDataType dataType, aclTensor** tensor) { + auto size = GetShapeSize(shape) * sizeof(T); + // 调用aclrtMalloc申请device侧内存 + auto ret = aclrtMalloc(deviceAddr, size, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMalloc failed. ERROR: %d\n", ret); return ret); + // 调用aclrtMemcpy将host侧数据拷贝到device侧内存上 + ret = aclrtMemcpy(*deviceAddr, size, hostData.data(), size, ACL_MEMCPY_HOST_TO_DEVICE); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMemcpy failed. ERROR: %d\n", ret); return ret); + + // 计算连续tensor的strides + std::vector strides(shape.size(), 1); + for (int64_t i = shape.size() - 2; i >= 0; i--) { + strides[i] = shape[i + 1] * strides[i + 1]; + } + + // 调用aclCreateTensor接口创建aclTensor + *tensor = aclCreateTensor(shape.data(), shape.size(), dataType, strides.data(), 0, aclFormat::ACL_FORMAT_ND, + shape.data(), shape.size(), *deviceAddr); + return 0; +} + +int main() { + // 1. (固定写法)device/context/stream初始化,参考AscendCL对外接口列表 + // 根据自己的实际device填写deviceId + int32_t deviceId = 0; + aclrtContext context; + aclrtStream stream; + auto ret = Init(deviceId, &context, &stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("Init acl failed. ERROR: %d\n", ret); return ret); + + // 2. 构造输入与输出,需要根据API的接口自定义构造 + std::vector qShape = {256, 1, 128}; + std::vector kShape = {256, 1, 128}; + std::vector vShape = {256, 1, 128}; + std::vector attenmaskShape = {256, 256}; + + std::vector attentionOutShape = {256, 1, 128}; + std::vector softmaxMaxShape = {1, 1, 256, 8}; + std::vector softmaxSumShape = {1, 1, 256, 8}; + + void* qDeviceAddr = nullptr; + void* kDeviceAddr = nullptr; + void* vDeviceAddr = nullptr; + void* attenmaskDeviceAddr = nullptr; + void* attentionOutDeviceAddr = nullptr; + void* softmaxMaxDeviceAddr = nullptr; + void* softmaxSumDeviceAddr = nullptr; + + aclTensor* q = nullptr; + aclTensor* k = nullptr; + aclTensor* v = nullptr; + aclTensor* pse = nullptr; + aclTensor* dropMask = nullptr; + aclTensor* padding = nullptr; + aclTensor* attenmask = nullptr; + aclTensor* attentionOut = nullptr; + aclTensor* softmaxMax = nullptr; + aclTensor* softmaxSum = nullptr; + aclTensor* softmaxOut = nullptr; + + std::vector qHostData(32768, 1); + std::vector kHostData(32768, 1); + std::vector vHostData(32768, 1); + std::vector attenmaskHostData(65536, 0); + std::vector attentionOutHostData(32768, 0); + std::vector softmaxMaxHostData(2048, 3.0); + std::vector softmaxSumHostData(2048, 3.0); + + ret = CreateAclTensor(qHostData, qShape, &qDeviceAddr, aclDataType::ACL_FLOAT16, &q); + CHECK_RET(ret == ACL_SUCCESS, return ret); + ret = CreateAclTensor(kHostData, kShape, &kDeviceAddr, aclDataType::ACL_FLOAT16, &k); + CHECK_RET(ret == ACL_SUCCESS, return ret); + ret = CreateAclTensor(vHostData, vShape, &vDeviceAddr, aclDataType::ACL_FLOAT16, &v); + CHECK_RET(ret == ACL_SUCCESS, return ret); + ret = CreateAclTensor(attenmaskHostData, attenmaskShape, &attenmaskDeviceAddr, aclDataType::ACL_UINT8, &attenmask); + CHECK_RET(ret == ACL_SUCCESS, return ret); + ret = CreateAclTensor(attentionOutHostData, attentionOutShape, &attentionOutDeviceAddr, aclDataType::ACL_FLOAT16, &attentionOut); + CHECK_RET(ret == ACL_SUCCESS, return ret); + ret = CreateAclTensor(softmaxMaxHostData, softmaxMaxShape, &softmaxMaxDeviceAddr, aclDataType::ACL_FLOAT, &softmaxMax); + CHECK_RET(ret == ACL_SUCCESS, return ret); + ret = CreateAclTensor(softmaxSumHostData, softmaxSumShape, &softmaxSumDeviceAddr, aclDataType::ACL_FLOAT, &softmaxSum); + CHECK_RET(ret == ACL_SUCCESS, return ret); + + std::vector prefixOp = {0}; + aclIntArray *prefix = aclCreateIntArray(prefixOp.data(), 1); + double scaleValue = 0.088388; + double keepProb = 1; + int64_t preTokens = 65536; + int64_t nextTokens = 65536; + int64_t headNum = 1; + int64_t innerPrecise = 0; + int64_t sparseMod = 0; + + char layOut[5] = {'S', 'B', 'H', 0}; + + // 3. 调用CANN算子库API,需要修改为具体的Api名称 + uint64_t workspaceSize = 0; + aclOpExecutor* executor; + + // 调用aclnnFlashAttentionScore第一段接口 + ret = aclnnFlashAttentionScoreGetWorkspaceSize( + q, k, v, pse, dropMask, padding, attenmask, prefix, scaleValue, + keepProb, preTokens, nextTokens, headNum, layOut, innerPrecise, + sparseMod, softmaxMax, softmaxSum, softmaxOut, attentionOut, &workspaceSize, &executor); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnFlashAttentionScoreGetWorkspaceSize failed. ERROR: %d\n", ret); return ret); + + // 根据第一段接口计算出的workspaceSize申请device内存 + void* workspaceAddr = nullptr; + if (workspaceSize > 0) { + ret = aclrtMalloc(&workspaceAddr, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("allocate workspace failed. ERROR: %d\n", ret); return ret); + } + + // 调用aclnnFlashAttentionScore第二段接口 + ret = aclnnFlashAttentionScore(workspaceAddr, workspaceSize, executor, stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnFlashAttentionScore failed. ERROR: %d\n", ret); return ret); + + // 4. (固定写法)同步等待任务执行结束 + ret = aclrtSynchronizeStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSynchronizeStream failed. ERROR: %d\n", ret); return ret); + + // 5. 获取输出的值,将device侧内存上的结果拷贝至host侧,需要根据具体API的接口定义修改 + PrintOutResult(attentionOutShape, &attentionOutDeviceAddr); + PrintOutResult(softmaxMaxShape, &softmaxMaxDeviceAddr); + PrintOutResult(softmaxSumShape, &softmaxSumDeviceAddr); + + // 6. 释放aclTensor和aclScalar,需要根据具体API的接口定义修改 + aclDestroyTensor(q); + aclDestroyTensor(k); + aclDestroyTensor(v); + aclDestroyTensor(attenmask); + aclDestroyTensor(attentionOut); + aclDestroyTensor(softmaxMax); + aclDestroyTensor(softmaxSum); + + // 7. 释放device资源 + aclrtFree(qDeviceAddr); + aclrtFree(kDeviceAddr); + aclrtFree(vDeviceAddr); + aclrtFree(attenmaskDeviceAddr); + aclrtFree(attentionOutDeviceAddr); + aclrtFree(softmaxMaxDeviceAddr); + aclrtFree(softmaxSumDeviceAddr); + if (workspaceSize > 0) { + aclrtFree(workspaceAddr); + } + aclrtDestroyStream(stream); + aclrtDestroyContext(context); + aclrtResetDevice(deviceId); + aclFinalize(); + + return 0; +} diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnMatmul/CMakeLists.txt b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnMatmul/CMakeLists.txt new file mode 100644 index 00000000..c129437b --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnMatmul/CMakeLists.txt @@ -0,0 +1,40 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.14) + +# 设置工程名 +project(ACLNN_EXAMPLE) + +# Compile options +add_compile_options(-std=c++11) + +# 设置编译选项 +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "./bin") +set(CMAKE_CXX_FLAGS_DEBUG "-fPIC -O0 -g -Wall") +set(CMAKE_CXX_FLAGS_RELEASE "-fPIC -O2 -Wall") + +# 设置可执行文件名(如opapi_test),并指定待运行算子文件*.cpp所在目录 +add_executable(testcase + testcase.cpp) + +# 设置ASCEND_PATH(CANN软件包目录,请根据实际路径修改)和INCLUDE_BASE_DIR(头文件目录) +if(NOT "$ENV{ASCEND_HOME_PATH}" STREQUAL "") + set(ASCEND_PATH $ENV{ASCEND_HOME_PATH}) +else() + set(ASCEND_PATH "/usr/local/Ascend/ascend-toolkit/latest") +endif() +set(INCLUDE_BASE_DIR "${ASCEND_PATH}/include") +include_directories( + ${INCLUDE_BASE_DIR} + ${INCLUDE_BASE_DIR}/aclnn +) + +# 设置链接的库文件路径 +target_link_libraries(testcase PRIVATE + ${ASCEND_PATH}/lib64/libascendcl.so + ${ASCEND_PATH}/lib64/libnnopbase.so + ${ASCEND_PATH}/lib64/libopapi.so) + +# 可执行文件在CMakeLists文件所在目录的bin目录下 +install(TARGETS testcase DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnMatmul/ascend_test_nn.py b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnMatmul/ascend_test_nn.py new file mode 100644 index 00000000..00b481a4 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnMatmul/ascend_test_nn.py @@ -0,0 +1,11 @@ +#encoding: utf-8 +import oec + +oec.TestCase( + group= ("应用开发","算子加速库"), + + name = "ACLNN_MATMUL", + cmd=f"./run.sh {oec.Context.output_dir}/tmp/aclnn" + ) + + diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnMatmul/run.sh b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnMatmul/run.sh new file mode 100755 index 00000000..7dcabc11 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnMatmul/run.sh @@ -0,0 +1,11 @@ +output_path="$1/aclnnMatmul" +src_path=$(pwd) +mkdir -p "$output_path" +cd "${output_path}" +cmake "$src_path" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_SKIP_RPATH=TRUE +make + +cd bin +./testcase +echo $? +exit $? \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnMatmul/testcase.cpp b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnMatmul/testcase.cpp new file mode 100644 index 00000000..0214ca93 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnMatmul/testcase.cpp @@ -0,0 +1,138 @@ +#include +#include +#include "acl/acl.h" +#include "aclnnop/aclnn_matmul.h" + +#define CHECK_RET(cond, return_expr) \ + do { \ + if (!(cond)) { \ + return_expr; \ + } \ + } while (0) + +#define LOG_PRINT(message, ...) \ + do { \ + printf(message, ##__VA_ARGS__); \ + } while (0) + +int64_t GetShapeSize(const std::vector& shape) { + int64_t shapeSize = 1; + for (auto i : shape) { + shapeSize *= i; + } + return shapeSize; +} + +int Init(int32_t deviceId, aclrtStream* stream) { + // 固定写法,AscendCL初始化 + auto ret = aclInit(nullptr); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclInit failed. ERROR: %d\n", ret); return ret); + ret = aclrtSetDevice(deviceId); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSetDevice failed. ERROR: %d\n", ret); return ret); + ret = aclrtCreateStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtCreateStream failed. ERROR: %d\n", ret); return ret); + return 0; +} + +template +int CreateAclTensor(const std::vector& hostData, const std::vector& shape, void** deviceAddr, + aclDataType dataType, aclTensor** tensor) { + auto size = GetShapeSize(shape) * sizeof(T); + // 调用aclrtMalloc申请device侧内存 + auto ret = aclrtMalloc(deviceAddr, size, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMalloc failed. ERROR: %d\n", ret); return ret); + // 调用aclrtMemcpy将host侧数据拷贝到device侧内存上 + ret = aclrtMemcpy(*deviceAddr, size, hostData.data(), size, ACL_MEMCPY_HOST_TO_DEVICE); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMemcpy failed. ERROR: %d\n", ret); return ret); + + // 计算连续tensor的strides + std::vector strides(shape.size(), 1); + for (int64_t i = shape.size() - 2; i >= 0; i--) { + strides[i] = shape[i + 1] * strides[i + 1]; + } + + // 调用aclCreateTensor接口创建aclTensor + *tensor = aclCreateTensor(shape.data(), shape.size(), dataType, strides.data(), 0, aclFormat::ACL_FORMAT_ND, + shape.data(), shape.size(), *deviceAddr); + return 0; +} + +int main() { + // 1. (固定写法)device/stream初始化,参考AscendCL对外接口列表 + // 根据自己的实际device填写deviceId + int32_t deviceId = 0; + aclrtStream stream; + auto ret = Init(deviceId, &stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("Init acl failed. ERROR: %d\n", ret); return ret); + + // 2. 构造输入与输出,需要根据API的接口自定义构造 + std::vector selfShape = {16, 32}; + std::vector mat2Shape = {32, 16}; + std::vector outShape = {16, 16}; + void* selfDeviceAddr = nullptr; + void* mat2DeviceAddr = nullptr; + void* outDeviceAddr = nullptr; + aclTensor* self = nullptr; + aclTensor* mat2 = nullptr; + aclTensor* out = nullptr; + std::vector selfHostData(512, 1); + std::vector mat2HostData(512, 1); + std::vector outHostData(256, 0); + // 创建self aclTensor + ret = CreateAclTensor(selfHostData, selfShape, &selfDeviceAddr, aclDataType::ACL_FLOAT, &self); + CHECK_RET(ret == ACL_SUCCESS, return ret); + // 创建mat2 aclTensor + ret = CreateAclTensor(mat2HostData, mat2Shape, &mat2DeviceAddr, aclDataType::ACL_FLOAT, &mat2); + CHECK_RET(ret == ACL_SUCCESS, return ret); + // 创建out aclTensor + ret = CreateAclTensor(outHostData, outShape, &outDeviceAddr, aclDataType::ACL_FLOAT, &out); + CHECK_RET(ret == ACL_SUCCESS, return ret); + + // 3. 调用CANN算子库API,需要修改为具体的Api名称 + int8_t cubeMathType = 1; + uint64_t workspaceSize = 0; + aclOpExecutor* executor; + // 调用aclnnMatmul第一段接口 + ret = aclnnMatmulGetWorkspaceSize(self, mat2, out, cubeMathType, &workspaceSize, &executor); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnMatmulGetWorkspaceSize failed. ERROR: %d\n", ret); return ret); + // 根据第一段接口计算出的workspaceSize申请device内存 + void* workspaceAddr = nullptr; + if (workspaceSize > 0) { + ret = aclrtMalloc(&workspaceAddr, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("allocate workspace failed. ERROR: %d\n", ret); return ret); + } + // 调用aclnnMatmul第二段接口 + ret = aclnnMatmul(workspaceAddr, workspaceSize, executor, stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnMatmul failed. ERROR: %d\n", ret); return ret); + + // 4. (固定写法)同步等待任务执行结束 + ret = aclrtSynchronizeStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSynchronizeStream failed. ERROR: %d\n", ret); return ret); + + // 5. 获取输出的值,将device侧内存上的结果拷贝至host侧,需要根据具体API的接口定义修改 + auto size = GetShapeSize(outShape); + std::vector resultData(size, 0); + ret = aclrtMemcpy(resultData.data(), resultData.size() * sizeof(resultData[0]), outDeviceAddr, + size * sizeof(resultData[0]), ACL_MEMCPY_DEVICE_TO_HOST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("copy result from device to host failed. ERROR: %d\n", ret); return ret); + for (int64_t i = 0; i < size; i++) { + LOG_PRINT("result[%ld] is: %f\n", i, resultData[i]); + } + + // 6. 释放aclTensor和aclScalar,需要根据具体API的接口定义修改 + aclDestroyTensor(self); + aclDestroyTensor(mat2); + aclDestroyTensor(out); + + // 7. 释放device资源,需要根据具体API的接口定义修改 + aclrtFree(selfDeviceAddr); + aclrtFree(mat2DeviceAddr); + aclrtFree(outDeviceAddr); + if (workspaceSize > 0) { + aclrtFree(workspaceAddr); + } + aclrtDestroyStream(stream); + aclrtResetDevice(deviceId); + aclFinalize(); + return 0; +} \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSilu/CMakeLists.txt b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSilu/CMakeLists.txt new file mode 100644 index 00000000..c129437b --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSilu/CMakeLists.txt @@ -0,0 +1,40 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.14) + +# 设置工程名 +project(ACLNN_EXAMPLE) + +# Compile options +add_compile_options(-std=c++11) + +# 设置编译选项 +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "./bin") +set(CMAKE_CXX_FLAGS_DEBUG "-fPIC -O0 -g -Wall") +set(CMAKE_CXX_FLAGS_RELEASE "-fPIC -O2 -Wall") + +# 设置可执行文件名(如opapi_test),并指定待运行算子文件*.cpp所在目录 +add_executable(testcase + testcase.cpp) + +# 设置ASCEND_PATH(CANN软件包目录,请根据实际路径修改)和INCLUDE_BASE_DIR(头文件目录) +if(NOT "$ENV{ASCEND_HOME_PATH}" STREQUAL "") + set(ASCEND_PATH $ENV{ASCEND_HOME_PATH}) +else() + set(ASCEND_PATH "/usr/local/Ascend/ascend-toolkit/latest") +endif() +set(INCLUDE_BASE_DIR "${ASCEND_PATH}/include") +include_directories( + ${INCLUDE_BASE_DIR} + ${INCLUDE_BASE_DIR}/aclnn +) + +# 设置链接的库文件路径 +target_link_libraries(testcase PRIVATE + ${ASCEND_PATH}/lib64/libascendcl.so + ${ASCEND_PATH}/lib64/libnnopbase.so + ${ASCEND_PATH}/lib64/libopapi.so) + +# 可执行文件在CMakeLists文件所在目录的bin目录下 +install(TARGETS testcase DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSilu/ascend_test_nn.py b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSilu/ascend_test_nn.py new file mode 100644 index 00000000..e5c5c51c --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSilu/ascend_test_nn.py @@ -0,0 +1,11 @@ +#encoding: utf-8 +import oec + +oec.TestCase( + group= ("应用开发","算子加速库"), + + name = "ACLNN_SILU", + cmd=f"./run.sh {oec.Context.output_dir}/tmp/aclnn" + ) + + diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSilu/run.sh b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSilu/run.sh new file mode 100755 index 00000000..1d7cee37 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSilu/run.sh @@ -0,0 +1,11 @@ +output_path="$1/aclnnSilu" +src_path=$(pwd) +mkdir -p "$output_path" +cd "${output_path}" +cmake "$src_path" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_SKIP_RPATH=TRUE +make + +cd bin +./testcase +echo $? +exit $? \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSilu/testcase.cpp b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSilu/testcase.cpp new file mode 100644 index 00000000..b3680a37 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSilu/testcase.cpp @@ -0,0 +1,127 @@ +#include +#include +#include "acl/acl.h" +#include "aclnnop/aclnn_silu.h" + +#define CHECK_RET(cond, return_expr) \ + do { \ + if (!(cond)) { \ + return_expr; \ + } \ + } while (0) + +#define LOG_PRINT(message, ...) \ + do { \ + printf(message, ##__VA_ARGS__); \ + } while (0) + +int64_t GetShapeSize(const std::vector& shape) { + int64_t shape_size = 1; + for (auto i : shape) { + shape_size *= i; + } + return shape_size; +} + +int Init(int32_t deviceId, aclrtStream* stream) { + // 固定写法,AscendCL初始化 + auto ret = aclInit(nullptr); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclInit failed. ERROR: %d\n", ret); return ret); + ret = aclrtSetDevice(deviceId); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSetDevice failed. ERROR: %d\n", ret); return ret); + ret = aclrtCreateStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtCreateStream failed. ERROR: %d\n", ret); return ret); + return 0; +} + +template +int CreateAclTensor(const std::vector& hostData, const std::vector& shape, void** deviceAddr, + aclDataType dataType, aclTensor** tensor) { + auto size = GetShapeSize(shape) * sizeof(T); + // 调用aclrtMalloc申请device侧内存 + auto ret = aclrtMalloc(deviceAddr, size, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMalloc failed. ERROR: %d\n", ret); return ret); + + // 调用aclrtMemcpy将host侧数据拷贝到device侧内存上 + ret = aclrtMemcpy(*deviceAddr, size, hostData.data(), size, ACL_MEMCPY_HOST_TO_DEVICE); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMemcpy failed. ERROR: %d\n", ret); return ret); + + // 计算连续tensor的strides + std::vector strides(shape.size(), 1); + for (int64_t i = shape.size() - 2; i >= 0; i--) { + strides[i] = shape[i + 1] * strides[i + 1]; + } + + // 调用aclCreateTensor接口创建aclTensor + *tensor = aclCreateTensor(shape.data(), shape.size(), dataType, strides.data(), 0, aclFormat::ACL_FORMAT_ND, + shape.data(), shape.size(), *deviceAddr); + return 0; +} + +int main() { + // 1. (固定写法)device/stream初始化, 参考AscendCL对外接口列表 + // 根据自己的实际device填写deviceId + int32_t deviceId = 0; + aclrtStream stream; + auto ret = Init(deviceId, &stream); + // check根据自己的需要处理 + CHECK_RET(ret == 0, LOG_PRINT("Init acl failed. ERROR: %d\n", ret); return ret); + // 2. 构造输入与输出,需要根据API的接口自定义构造 + std::vector selfShape = {4, 2}; + std::vector outShape = {4, 2}; + void* selfDeviceAddr = nullptr; + void* outDeviceAddr = nullptr; + aclTensor* self = nullptr; + aclTensor* out = nullptr; + std::vector selfHostData = {0, 1, 2, 3, 4, 5, 6, 7}; + std::vector outHostData = {0, 0, 0, 0, 0, 0, 0, 0}; + // 创建self aclTensor + ret = CreateAclTensor(selfHostData, selfShape, &selfDeviceAddr, aclDataType::ACL_FLOAT, &self); + CHECK_RET(ret == ACL_SUCCESS, return ret); + // 创建out aclTensor + ret = CreateAclTensor(outHostData, outShape, &outDeviceAddr, aclDataType::ACL_FLOAT, &out); + CHECK_RET(ret == ACL_SUCCESS, return ret); + + // 3. 调用CANN算子库API,需要修改为具体的API + uint64_t workspaceSize = 0; + aclOpExecutor* executor; + // 调用aclnnSilu第一段接口 + ret = aclnnSiluGetWorkspaceSize(self, out, &workspaceSize, &executor); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnSiluGetWorkspaceSize failed. ERROR: %d\n", ret); return ret); + // 根据第一段接口计算出的workspaceSize申请device内存 + void* workspaceAddr = nullptr; + if (workspaceSize > 0) { + ret = aclrtMalloc(&workspaceAddr, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("allocate workspace failed. ERROR: %d\n", ret); return ret;); + } + // 调用aclnnSilu第二段接口 + ret = aclnnSilu(workspaceAddr, workspaceSize, executor, stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnSilu failed. ERROR: %d\n", ret); return ret); + // 4. (固定写法)同步等待任务执行结束 + ret = aclrtSynchronizeStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSynchronizeStream failed. ERROR: %d\n", ret); return ret); + // 5. 获取输出的值,将device侧内存上的结果拷贝至host侧,需要根据具体API的接口定义修改 + auto size = GetShapeSize(outShape); + std::vector resultData(size, 0); + ret = aclrtMemcpy(resultData.data(), resultData.size() * sizeof(resultData[0]), outDeviceAddr, size * sizeof(float), + ACL_MEMCPY_DEVICE_TO_HOST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("copy result from device to host failed. ERROR: %d\n", ret); return ret); + for (int64_t i = 0; i < size; i++) { + LOG_PRINT("result[%ld] is: %f\n", i, resultData[i]); + } + + // 6. 释放aclTensor和aclScalar,需要根据具体API的接口定义修改 + aclDestroyTensor(self); + aclDestroyTensor(out); + + // 7. 释放device资源,需要根据具体API的接口定义修改 + aclrtFree(selfDeviceAddr); + aclrtFree(outDeviceAddr); + if (workspaceSize > 0) { + aclrtFree(workspaceAddr); + } + aclrtDestroyStream(stream); + aclrtResetDevice(deviceId); + aclFinalize(); + return 0; +} \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSoftmax/CMakeLists.txt b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSoftmax/CMakeLists.txt new file mode 100644 index 00000000..c129437b --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSoftmax/CMakeLists.txt @@ -0,0 +1,40 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.14) + +# 设置工程名 +project(ACLNN_EXAMPLE) + +# Compile options +add_compile_options(-std=c++11) + +# 设置编译选项 +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "./bin") +set(CMAKE_CXX_FLAGS_DEBUG "-fPIC -O0 -g -Wall") +set(CMAKE_CXX_FLAGS_RELEASE "-fPIC -O2 -Wall") + +# 设置可执行文件名(如opapi_test),并指定待运行算子文件*.cpp所在目录 +add_executable(testcase + testcase.cpp) + +# 设置ASCEND_PATH(CANN软件包目录,请根据实际路径修改)和INCLUDE_BASE_DIR(头文件目录) +if(NOT "$ENV{ASCEND_HOME_PATH}" STREQUAL "") + set(ASCEND_PATH $ENV{ASCEND_HOME_PATH}) +else() + set(ASCEND_PATH "/usr/local/Ascend/ascend-toolkit/latest") +endif() +set(INCLUDE_BASE_DIR "${ASCEND_PATH}/include") +include_directories( + ${INCLUDE_BASE_DIR} + ${INCLUDE_BASE_DIR}/aclnn +) + +# 设置链接的库文件路径 +target_link_libraries(testcase PRIVATE + ${ASCEND_PATH}/lib64/libascendcl.so + ${ASCEND_PATH}/lib64/libnnopbase.so + ${ASCEND_PATH}/lib64/libopapi.so) + +# 可执行文件在CMakeLists文件所在目录的bin目录下 +install(TARGETS testcase DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSoftmax/ascend_test_nn.py b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSoftmax/ascend_test_nn.py new file mode 100644 index 00000000..682d6622 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSoftmax/ascend_test_nn.py @@ -0,0 +1,11 @@ +#encoding: utf-8 +import oec + +oec.TestCase( + group= ("应用开发","算子加速库"), + + name = "ACLNN_SOFTMAX", + cmd=f"./run.sh {oec.Context.output_dir}/tmp/aclnn" + ) + + diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSoftmax/run.sh b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSoftmax/run.sh new file mode 100755 index 00000000..10a836d3 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSoftmax/run.sh @@ -0,0 +1,11 @@ +output_path="$1/aclnnSoftmax" +src_path=$(pwd) +mkdir -p "$output_path" +cd "${output_path}" +cmake "$src_path" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_SKIP_RPATH=TRUE +make + +cd bin +./testcase +echo $? +exit $? \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSoftmax/testcase.cpp b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSoftmax/testcase.cpp new file mode 100644 index 00000000..6c1856e9 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/aclnn/aclnnSoftmax/testcase.cpp @@ -0,0 +1,129 @@ +#include +#include +#include "acl/acl.h" +#include "aclnnop/aclnn_softmax.h" + +#define CHECK_RET(cond, return_expr) \ + do { \ + if (!(cond)) { \ + return_expr; \ + } \ + } while (0) + +#define LOG_PRINT(message, ...) \ + do { \ + printf(message, ##__VA_ARGS__); \ + } while (0) + +int64_t GetShapeSize(const std::vector& shape) { + int64_t shapeSize = 1; + for (auto i : shape) { + shapeSize *= i; + } + return shapeSize; +} + +int Init(int32_t deviceId, aclrtStream* stream) { + // 固定写法,AscendCL初始化 + auto ret = aclInit(nullptr); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclInit failed. ERROR: %d\n", ret); return ret); + ret = aclrtSetDevice(deviceId); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSetDevice failed. ERROR: %d\n", ret); return ret); + ret = aclrtCreateStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtCreateStream failed. ERROR: %d\n", ret); return ret); + return 0; +} + +template +int CreateAclTensor(const std::vector& hostData, const std::vector& shape, void** deviceAddr, + aclDataType dataType, aclTensor** tensor) { + auto size = GetShapeSize(shape) * sizeof(T); + // 调用aclrtMalloc申请device侧内存 + auto ret = aclrtMalloc(deviceAddr, size, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMalloc failed. ERROR: %d\n", ret); return ret); + // 调用aclrtMemcpy将host侧数据拷贝到device侧内存上 + ret = aclrtMemcpy(*deviceAddr, size, hostData.data(), size, ACL_MEMCPY_HOST_TO_DEVICE); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMemcpy failed. ERROR: %d\n", ret); return ret); + + // 计算连续tensor的strides + std::vector strides(shape.size(), 1); + for (int64_t i = shape.size() - 2; i >= 0; i--) { + strides[i] = shape[i + 1] * strides[i + 1]; + } + + // 调用aclCreateTensor接口创建aclTensor + *tensor = aclCreateTensor(shape.data(), shape.size(), dataType, strides.data(), 0, aclFormat::ACL_FORMAT_ND, + shape.data(), shape.size(), *deviceAddr); + return 0; +} + +int main() { + // 1. (固定写法)device/stream初始化,参考AscendCL对外接口列表 + // 根据自己的实际device填写deviceId + int32_t deviceId = 0; + aclrtStream stream; + auto ret = Init(deviceId, &stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("Init acl failed. ERROR: %d\n", ret); return ret); + + // 2. 构造输入与输出,需要根据API的接口自定义构造 + std::vector selfShape = {4, 2}; + std::vector outShape = {4, 2}; + void* selfDeviceAddr = nullptr; + void* outDeviceAddr = nullptr; + aclTensor* self = nullptr; + aclTensor* out = nullptr; + std::vector selfHostData = {0, 1, 2, 3, 4, 5, 6, 7}; + std::vector outHostData = {0, 0, 0, 0, 0, 0, 0, 0}; + // 创建self aclTensor + ret = CreateAclTensor(selfHostData, selfShape, &selfDeviceAddr, aclDataType::ACL_FLOAT, &self); + CHECK_RET(ret == ACL_SUCCESS, return ret); + // 创建out aclTensor + ret = CreateAclTensor(outHostData, outShape, &outDeviceAddr, aclDataType::ACL_FLOAT, &out); + CHECK_RET(ret == ACL_SUCCESS, return ret); + + // 3. 调用CANN算子库API,需要修改为具体的Api名称 + uint64_t workspaceSize = 0; + aclOpExecutor* executor; + int64_t dim = 0; + // 调用aclnnSoftmax第一段接口 + ret = aclnnSoftmaxGetWorkspaceSize(self, dim, out, &workspaceSize, &executor); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnSoftmaxGetWorkspaceSize failed. ERROR: %d\n", ret); return ret); + // 根据第一段接口计算出的workspaceSize申请device内存 + void* workspaceAddr = nullptr; + if (workspaceSize > 0) { + ret = aclrtMalloc(&workspaceAddr, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("allocate workspace failed. ERROR: %d\n", ret); return ret); + } + // 调用aclnnSoftmax第二段接口 + ret = aclnnSoftmax(workspaceAddr, workspaceSize, executor, stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnSoftmax failed. ERROR: %d\n", ret); return ret); + + // 4. (固定写法)同步等待任务执行结束 + ret = aclrtSynchronizeStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSynchronizeStream failed. ERROR: %d\n", ret); return ret); + + // 5. 获取输出的值,将device侧内存上的结果拷贝至host侧,需要根据具体API的接口定义修改 + auto size = GetShapeSize(outShape); + std::vector resultData(size, 0); + ret = aclrtMemcpy(resultData.data(), resultData.size() * sizeof(resultData[0]), outDeviceAddr, + size * sizeof(resultData[0]), ACL_MEMCPY_DEVICE_TO_HOST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("copy result from device to host failed. ERROR: %d\n", ret); return ret); + for (int64_t i = 0; i < size; i++) { + LOG_PRINT("result[%ld] is: %f\n", i, resultData[i]); + } + + // 6. 释放aclTensor和aclScalar,需要根据具体API的接口定义修改 + aclDestroyTensor(self); + aclDestroyTensor(out); + + // 7. 释放device 资源 + aclrtFree(selfDeviceAddr); + aclrtFree(outDeviceAddr); + if (workspaceSize > 0) { + aclrtFree(workspaceAddr); + } + aclrtDestroyStream(stream); + aclrtResetDevice(deviceId); + aclFinalize(); + return 0; +} \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/atb/CMakeLists.txt b/oec-ascend/oec/resource/ApplicationDev/atb/CMakeLists.txt new file mode 100644 index 00000000..57ea00f4 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/atb/CMakeLists.txt @@ -0,0 +1,46 @@ +cmake_minimum_required(VERSION 3.16) +project("test_model") +option(USE_CXX11_ABI "USE_CXX11_ABI" OFF) +set(CMAKE_BUILD_TYPE Debug) +if(USE_CXX11_ABI) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=1") +else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0") +endif() + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17") + +include_directories( + $ENV{ATB_HOME_PATH}/include + $ENV{ASCEND_HOME_PATH}/include + ${CMAKE_CURRENT_SOURCE_DIR}) + +link_directories( + $ENV{ATB_HOME_PATH}/lib + $ENV{ASCEND_HOME_PATH}/lib64 + ) + +set(TEST_MODEL_CXX + main.cpp + aclnn/aclnn_gelu_operation.cpp + aclnn/aclnn_operation_base.cpp + atb/atb_graph_op.cpp + utils/utils.cpp + utils/log.cpp + model/model.cpp + memory/memorypool.cpp + memory/memory_utils.cpp +) + +# 列出所有的头文件目录 +include_directories( + ${CMAKE_SOURCE_DIR}/aclnn + ${CMAKE_SOURCE_DIR}/atb + ${CMAKE_SOURCE_DIR}/model + ${CMAKE_SOURCE_DIR}/utils + ${CMAKE_SOURCE_DIR}/memory +) + +add_executable(test_model ${TEST_MODEL_CXX}) + +target_link_libraries(test_model PRIVATE atb ascendcl opapi nnopbase pthread) diff --git a/oec-ascend/oec/resource/ApplicationDev/atb/aclnn/aclnn_gelu_operation.cpp b/oec-ascend/oec/resource/ApplicationDev/atb/aclnn/aclnn_gelu_operation.cpp new file mode 100644 index 00000000..62c07b48 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/atb/aclnn/aclnn_gelu_operation.cpp @@ -0,0 +1,179 @@ +#include "aclnn_gelu_operation.h" +#include "acl/acl.h" +#include "aclnnop/aclnn_gelu.h" +#include "aclnnop/aclnn_gelu_v2.h" +#include "utils/log.h" +#include "utils/utils.h" + +const int DIM0 = 0; +const int DIM1 = 1; +const int DIM2 = 2; +const int DIM3 = 3; + +GeluOperation::GeluOperation(const std::string &name, AclnnGeluParam param) : AclnnBaseOperation(name), param_(param) +{} + +atb::Status GeluOperation::InferShape( + const atb::SVector &inTensorDesc, atb::SVector &outTensorDesc) const +{ + LOG_INFO(opName_ + " InferShape start"); + outTensorDesc.at(0).format = inTensorDesc.at(0).format; + outTensorDesc.at(0).dtype = inTensorDesc.at(0).dtype; + outTensorDesc.at(0).shape.dimNum = inTensorDesc.at(0).shape.dimNum; + + if (inTensorDesc.at(0).shape.dimNum == DIM3) { + LOG_INFO("[input0 dimNum = 3] CHECK " + opName_ + " input shape: [input0] " + + std::to_string(inTensorDesc.at(0).shape.dims[DIM0]) + ", " + + std::to_string(inTensorDesc.at(0).shape.dims[DIM1]) + ", " + + std::to_string(inTensorDesc.at(0).shape.dims[DIM2])); + outTensorDesc.at(0).shape.dims[DIM0] = inTensorDesc.at(0).shape.dims[DIM0]; + outTensorDesc.at(0).shape.dims[DIM1] = inTensorDesc.at(0).shape.dims[DIM1]; + outTensorDesc.at(0).shape.dims[DIM2] = inTensorDesc.at(0).shape.dims[DIM2]; + } else if (inTensorDesc.at(0).shape.dimNum == DIM2) { + LOG_INFO("[input0 dimNum = 2] CHECK " + opName_ + " input shape: [input0] " + + std::to_string(inTensorDesc.at(0).shape.dims[DIM0]) + ", " + + std::to_string(inTensorDesc.at(0).shape.dims[DIM1])); + outTensorDesc.at(0).shape.dims[DIM0] = inTensorDesc.at(0).shape.dims[DIM0]; + outTensorDesc.at(0).shape.dims[DIM1] = inTensorDesc.at(0).shape.dims[DIM1]; + } else { + LOG_ERROR(opName_ + " invalid dimNum = " + std::to_string(inTensorDesc.at(0).shape.dimNum)); + } + + LOG_INFO(opName_ + " InferShape end"); + return atb::NO_ERROR; +} + +uint32_t GeluOperation::GetInputNum() const +{ + return 1; // gelu入参个数 +} + +uint32_t GeluOperation::GetOutputNum() const +{ + return 1; // gelu出参个数 +} + +// 重写父类方法, 创建输入输出tensor,并存入VariantPack +atb::Status GeluOperation::CreateAclnnVariantPack(const atb::VariantPack &variantPack) +{ + LOG_INFO(opName_ + " CreateAclnnVariantPack start"); + + auto ret = CreateAclnnInTensor(variantPack); + if (ret != 0) { + LOG_ERROR(opName_ + " CreateAclnnInTensor fail"); + return atb::ERROR_INVALID_PARAM; + } + + ret = CreateAclnnOutTensor(variantPack); + if (ret != 0) { + LOG_ERROR(opName_ + " CreateAclNNOutTensorVariantPack fail"); + return atb::ERROR_INVALID_PARAM; + } + + LOG_INFO(opName_ + " CreateAclnnVariantPack end"); + return atb::NO_ERROR; +} + +atb::Status GeluOperation::CreateAclnnInTensor(const atb::VariantPack &variantPack) +{ + aclInTensors_.resize(GetInputNum()); + for (size_t i = 0; i < aclInTensors_.size(); ++i) { + auto aclnnTensor = CreateAclnnTensor(variantPack.inTensors.at(i), i); + if (aclnnTensor->tensor == nullptr) { + LOG_ERROR(opName_ + " InTensor aclCreateTensor index " + std::to_string(i) + " fail"); + return atb::ERROR_INTERNAL_ERROR; + } + aclInTensors_[i] = aclnnTensor; + } + return atb::NO_ERROR; +} + +atb::Status GeluOperation::CreateAclnnOutTensor(const atb::VariantPack &variantPack) +{ + aclOutTensors_.resize(GetOutputNum()); + for (size_t i = 0; i < aclOutTensors_.size(); ++i) { + auto aclnnTensor = CreateAclnnTensor(variantPack.outTensors.at(i), i); + if (aclnnTensor->tensor == nullptr) { + LOG_ERROR(opName_ + " outTensor aclCreateTensor index " + std::to_string(i) + " fail"); + return atb::ERROR_INTERNAL_ERROR; + } + LOG_INFO(opName_ + " input[" + std::to_string(i) + "] CreateAclnnTensor start"); + aclOutTensors_[i] = aclnnTensor; + } + return atb::NO_ERROR; +} + +atb::SVector GetCopyTensorStride(atb::Dims &tensorDims) +{ + atb::SVector tmpStrides(tensorDims.dimNum, 1); + if (tensorDims.dimNum > 8) { // 8: tensor最大维度数量 + LOG_ERROR("tensor's dimNum is larger than 8, GetCopyTensorStride failed."); + return tmpStrides; + } + for (int64_t i = static_cast(tensorDims.dimNum) - 2; i >= 0; i--) { + tmpStrides[i] = (tensorDims.dims[i + 1] * tmpStrides[i + 1]); + } + return tmpStrides; +} + +std::shared_ptr GeluOperation::CreateAclnnTensor(atb::Tensor atbTensor, size_t tensorIdx) +{ + auto aclnnTensor = std::make_shared(); + aclnnTensor->tensorIdx = static_cast(tensorIdx); + aclnnTensor->needUpdateTensorDataPtr = true; + aclnnTensor->atbTensor = atbTensor; + aclnnTensor->strides = GetCopyTensorStride(atbTensor.desc.shape); + + // 创建Aclnn tensor + aclnnTensor->tensor = aclCreateTensor(atbTensor.desc.shape.dims, + atbTensor.desc.shape.dimNum, + atbTensor.desc.dtype, + aclnnTensor->strides.data(), + 0, + atbTensor.desc.format, + atbTensor.desc.shape.dims, + atbTensor.desc.shape.dimNum, + atbTensor.deviceData); + return aclnnTensor; +} + +// 重写父类方法, 创建workspace和aclexecutor +atb::Status GeluOperation::SetAclnnWorkspaceExecutor() +{ + // 调用aclnn接口获取workspace大小 + LOG_INFO(opName_ + " SetAclnnWorkspaceExecutor start"); + if (param_.geluApproximate == -1) { + auto ret = aclnnGeluGetWorkspaceSize(aclInTensors_.at(0)->tensor, // self + aclOutTensors_.at(0)->tensor, // out + &workspaceSize_, + &aclExecutor_); + CHECK_RET(ret, opName_ + " aclnnGeluGetWorkspaceSize failed, ret: " + std::to_string(ret)); + LOG_INFO(opName_ + " SetAclnnWorkspaceExecutor end, workspaceSize_: " + std::to_string(workspaceSize_)); + return ret; + } + auto ret = aclnnGeluV2GetWorkspaceSize(aclInTensors_.at(0)->tensor, // x + param_.geluApproximate, // approximate + aclOutTensors_.at(0)->tensor, // y + &workspaceSize_, + &aclExecutor_); + CHECK_RET(ret, opName_ + " aclnnGeluV2GetWorkspaceSize failed, ret: " + std::to_string(ret)); + LOG_INFO(opName_ + " SetAclnnWorkspaceExecutor end, workspaceSize_: " + std::to_string(workspaceSize_)); + return ret; +} + +// 重写父类方法, 执行aclnn算子 +atb::Status GeluOperation::ExecuteAclnnOp(uint8_t *workspace, aclrtStream &stream) +{ + // 调用aclnn算子进行算子下发 + LOG_INFO(opName_ + " ExecuteAclnnOp start"); + if (param_.geluApproximate == -1) { + auto ret = aclnnGelu(workspace, workspaceSize_, aclExecutor_, stream); + CHECK_RET(ret, opName_ + " ExecuteAclnnOp failed, ret: " + std::to_string(ret)); + LOG_INFO(opName_ + " ExecuteAclnnOp end"); + return ret; + } + auto ret = aclnnGeluV2(workspace, workspaceSize_, aclExecutor_, stream); + CHECK_RET(ret, opName_ + " aclnnGeluV2 failed, ret: " + std::to_string(ret)); + LOG_INFO(opName_ + " ExecuteAclnnOp end"); + return ret; +} diff --git a/oec-ascend/oec/resource/ApplicationDev/atb/aclnn/aclnn_gelu_operation.h b/oec-ascend/oec/resource/ApplicationDev/atb/aclnn/aclnn_gelu_operation.h new file mode 100644 index 00000000..1d100830 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/atb/aclnn/aclnn_gelu_operation.h @@ -0,0 +1,32 @@ +#ifndef ACLNN_GELU_OPERATION_H +#define ACLNN_GELU_OPERATION_H + +#include "aclnn/aclnn_operation_base.h" + +struct AclnnGeluParam +{ + int64_t geluApproximate = -1; // gelu_v2计算的入参,指定高斯近似算法,0: "none", 1: "tanh" , -1: 不使用gelu_v2 +}; + +class GeluOperation : public AclnnBaseOperation +{ +public: + GeluOperation(const std::string &name, AclnnGeluParam param); + atb::Status InferShape( + const atb::SVector &inTensorDesc, atb::SVector &outTensorDesc) const override; + uint32_t GetInputNum() const override; + uint32_t GetOutputNum() const override; + + atb::Status CreateAclnnVariantPack(const atb::VariantPack &variantPack) override; + atb::Status SetAclnnWorkspaceExecutor() override; + atb::Status ExecuteAclnnOp(uint8_t *workspace, aclrtStream &stream) override; + +private: + atb::Status CreateAclnnInTensor(const atb::VariantPack &variantPack); + atb::Status CreateAclnnOutTensor(const atb::VariantPack &variantPack); + std::shared_ptr CreateAclnnTensor(atb::Tensor atbTensor, size_t tensorIdx); + + AclnnGeluParam param_; +}; + +#endif diff --git a/oec-ascend/oec/resource/ApplicationDev/atb/aclnn/aclnn_operation_base.cpp b/oec-ascend/oec/resource/ApplicationDev/atb/aclnn/aclnn_operation_base.cpp new file mode 100644 index 00000000..c64fdb41 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/atb/aclnn/aclnn_operation_base.cpp @@ -0,0 +1,116 @@ +#include "aclnn/aclnn_operation_base.h" +#include "utils/log.h" + +AclnnBaseOperation::AclnnBaseOperation(const std::string &opName) : opName_(opName) +{} + +AclnnBaseOperation::~AclnnBaseOperation() +{ + aclExecutor_ = nullptr; +} + +std::string AclnnBaseOperation::GetName() const +{ + return opName_; +} + +atb::Status AclnnBaseOperation::Setup( + const atb::VariantPack &variantPack, uint64_t &workspaceSize, atb::Context *context) +{ + LOG_INFO(opName_ + " setup start"); + + // 调用子类,创建输入输出tensor,并存入VariantPack + int ret = CreateAclnnVariantPack(variantPack); + if (ret != 0) { + LOG_ERROR(opName_ + " call CreateAclnnVariantPack fail, error: " + std::to_string(ret)); + return atb::ERROR_INVALID_PARAM; + } + + // 调用子类,获取Executor和Workspace + ret = SetAclnnWorkspaceExecutor(); + if (ret != 0) { + LOG_ERROR( + opName_ + " call CreateAclnnVaSetAclnnWorkspaceExecutorriantPack fail, error: " + std::to_string(ret)); + return atb::ERROR_INVALID_PARAM; + } + // 返回计算出的workspaceSize + workspaceSize = workspaceSize_; + LOG_INFO(opName_ + " setup end"); + return ret; +} + +atb::Status AclnnBaseOperation::Execute( + const atb::VariantPack &variantPack, uint8_t *workspace, uint64_t workspaceSize, atb::Context *context) +{ + LOG_INFO(opName_ + " execute start"); + if (!context) { + LOG_ERROR(opName_ + " execute fail, context param is null"); + return atb::ERROR_INVALID_PARAM; + } + + aclrtStream stream = context->GetExecuteStream(); + if (!stream) { + LOG_ERROR(opName_ + " execute fail, execute stream in context is null"); + return atb::ERROR_INVALID_PARAM; + } + + // 更新数据传入的地址 + int ret = UpdateAclnnVariantPack(variantPack); + if (ret != 0) { + LOG_ERROR(opName_ + " call UpdateAclnnVariantPack fail, error: " + std::to_string(ret)); + return atb::ERROR_CANN_ERROR; + } + + LOG_INFO("Input workspaceSize " + std::to_string(workspaceSize) + " localCache workspaceSize " + + std::to_string(workspaceSize_)); + ret = ExecuteAclnnOp(workspace, stream); // 调用aclnn接口 + if (ret != 0) { + LOG_ERROR(opName_ + " call ExecuteAclnnOp fail, error: " + std::to_string(ret)); + return atb::ERROR_CANN_ERROR; + } + LOG_INFO(opName_ + " execute start"); + + return ret; +} + +atb::Status AclnnBaseOperation::UpdateAclnnVariantPack(const atb::VariantPack &variantPack) +{ + // 更新inTensor的device地址 + for (size_t i = 0; i < aclInTensors_.size(); ++i) { + int ret = -1; + if (!aclInTensors_[i]->needUpdateTensorDataPtr) { + continue; + } + aclInTensors_[i]->atbTensor = variantPack.inTensors.at(i); + ret = aclSetInputTensorAddr(aclExecutor_, + aclInTensors_[i]->tensorIdx, + aclInTensors_[i]->tensor, + aclInTensors_[i]->atbTensor.deviceData); + + if (ret != 0) { + LOG_ERROR( + "inTensor " + std::to_string(i) + " call UpdateAclTensorDataPtr fail, error: " + std::to_string(ret)); + return atb::ERROR_CANN_ERROR; + } + } + + // 更新outTensor的device地址 + for (size_t i = 0; i < aclOutTensors_.size(); ++i) { + int ret = -1; + if (!aclOutTensors_[i]->needUpdateTensorDataPtr) { + continue; + } + aclOutTensors_[i]->atbTensor = variantPack.outTensors.at(i); + ret = aclSetOutputTensorAddr(aclExecutor_, + aclOutTensors_[i]->tensorIdx, + aclOutTensors_[i]->tensor, + aclOutTensors_[i]->atbTensor.deviceData); + + if (ret != 0) { + LOG_ERROR( + "outTensor " + std::to_string(i) + " call UpdateAclTensorDataPtr fail, error: " + std::to_string(ret)); + return atb::ERROR_CANN_ERROR; + } + } + return atb::NO_ERROR; +} diff --git a/oec-ascend/oec/resource/ApplicationDev/atb/aclnn/aclnn_operation_base.h b/oec-ascend/oec/resource/ApplicationDev/atb/aclnn/aclnn_operation_base.h new file mode 100644 index 00000000..6b384eb0 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/atb/aclnn/aclnn_operation_base.h @@ -0,0 +1,57 @@ +#ifndef ACLNN_OPERATION_BASE_H +#define ACLNN_OPERATION_BASE_H + +#include +#include +#include +#include +#include +#include "atb/infer_op_params.h" + +// 对atb::tensor的一层封装 +struct AclnnTensor +{ +public: + atb::Tensor atbTensor; // + aclTensor *tensor = nullptr; + int tensorIdx = -1; // aclTensor在aclExecutor中的index + bool needUpdateTensorDataPtr = false; + atb::SVector strides = {}; +}; + +// 保持与atb的算子的统一接口调用 +class AclnnBaseOperation : public atb::Operation +{ +public: + explicit AclnnBaseOperation(const std::string &opName); + ~AclnnBaseOperation() override; + std::string GetName() const override; + + // 仿atb接口,获取workspace的大小 + atb::Status Setup(const atb::VariantPack &variantPack, uint64_t &workspaceSize, atb::Context *context) override; + + // 仿atb接口,算子执行 + atb::Status Execute(const atb::VariantPack &variantPack, uint8_t *workspace, uint64_t workspaceSize, + atb::Context *context) override; + + // 创建输入aclnntensor + virtual atb::Status CreateAclnnVariantPack(const atb::VariantPack &variantPack) = 0; + + // 计算workspace大小 + virtual atb::Status SetAclnnWorkspaceExecutor() = 0; + + // 执行Aclnn op + virtual atb::Status ExecuteAclnnOp(uint8_t *workspace, aclrtStream &stream) = 0; + + // 更新aclnn输入和输出tensor的地址 + atb::Status UpdateAclnnVariantPack(const atb::VariantPack &variantPack); + + std::string opName_; + aclOpExecutor *aclExecutor_ = nullptr; + atb::SVector> aclInTensors_; + atb::SVector> aclOutTensors_; + uint64_t workspaceSize_; + int workspaceBlockId_ = -1; +}; + +#endif diff --git a/oec-ascend/oec/resource/ApplicationDev/atb/ascend_test_atb.py b/oec-ascend/oec/resource/ApplicationDev/atb/ascend_test_atb.py new file mode 100644 index 00000000..49322022 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/atb/ascend_test_atb.py @@ -0,0 +1,7 @@ +import oec + +oec.TestCase( + group=("集成测试","ATB"), + name="ATB_MASH_UP_GRAPH", + cmd=f'bash run.sh {oec.Context.cann_path} {oec.Context.output_dir}/tmp' +) \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/atb/atb/atb_graph_op.cpp b/oec-ascend/oec/resource/ApplicationDev/atb/atb/atb_graph_op.cpp new file mode 100644 index 00000000..6185920c --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/atb/atb/atb_graph_op.cpp @@ -0,0 +1,59 @@ +#include "atb/atb_graph_op.h" +#include "utils/utils.h" + +atb::Status CreateGraphOperation(atb::Operation **operation) +{ + // 构图流程 + // 图算子的输入a,b,c,d + // 计算公式:(a+b) + (c+d) + // 输入是4个参数,输出是1个参数,有3个add算子,中间产生的临时输出是2个 + atb::GraphParam opGraph; + + opGraph.inTensorNum = 4; + opGraph.outTensorNum = 1; + opGraph.internalTensorNum = 2; + opGraph.nodes.resize(3); + + enum InTensorId + { // 定义各TensorID + IN_TENSOR_A = 0, + IN_TENSOR_B, + IN_TENSOR_C, + IN_TENSOR_D, + ADD3_OUT, + ADD1_OUT, + ADD2_OUT + }; + + size_t nodeId = 0; + atb::Node &addNode = opGraph.nodes.at(nodeId++); + atb::Node &addNode2 = opGraph.nodes.at(nodeId++); + atb::Node &addNode3 = opGraph.nodes.at(nodeId++); + + atb::Operation *op = nullptr; + atb::infer::ElewiseParam addParam; + addParam.elewiseType = atb::infer::ElewiseParam::ElewiseType::ELEWISE_ADD; + auto status = atb::CreateOperation(addParam, &addNode.operation); + CHECK_RET(status, "addParam CreateOperation failed. status: " + std::to_string(status)); + addNode.inTensorIds = {IN_TENSOR_A, IN_TENSOR_B}; + addNode.outTensorIds = {ADD1_OUT}; + + atb::infer::ElewiseParam addParam2; + addParam2.elewiseType = atb::infer::ElewiseParam::ElewiseType::ELEWISE_ADD; + status = atb::CreateOperation(addParam2, &addNode2.operation); + CHECK_RET(status, "addParam2 CreateOperation failed. status: " + std::to_string(status)); + addNode2.inTensorIds = {IN_TENSOR_C, IN_TENSOR_D}; + addNode2.outTensorIds = {ADD2_OUT}; + + atb::infer::ElewiseParam addParam3; + addParam3.elewiseType = atb::infer::ElewiseParam::ElewiseType::ELEWISE_ADD; + status = CreateOperation(addParam3, &addNode3.operation); + CHECK_RET(status, "addParam3 CreateOperation failed. status: " + std::to_string(status)); + addNode3.inTensorIds = {ADD1_OUT, ADD2_OUT}; + addNode3.outTensorIds = {ADD3_OUT}; + + status = atb::CreateOperation(opGraph, operation); + CHECK_RET(status, "GraphParam CreateOperation failed. status: " + std::to_string(status)); + + return atb::NO_ERROR; +} diff --git a/oec-ascend/oec/resource/ApplicationDev/atb/atb/atb_graph_op.h b/oec-ascend/oec/resource/ApplicationDev/atb/atb/atb_graph_op.h new file mode 100644 index 00000000..05ca62a4 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/atb/atb/atb_graph_op.h @@ -0,0 +1,15 @@ +#ifndef ATB_GRAPH_OP_H +#define ATB_GRAPH_OP_H + +#include +#include +#include +#include +#include "atb/infer_op_params.h" + +// 在构造图参数时,有两个点需要重点关注。一是Tensor的ID,ATB图接口中把Tensor分为三种类型,输入、输出和中间tensor,顾名思义,输入输出tensor是整图的输入输出tensor, +// 中间tensor则是在整图内的Tensor。构图时的TensorID从小到大应保证//为输入tensor、输出tensor、中间tensor的顺序,且每一种Tensor的个数要与参数中设置的一致。 +// 二是要注意排布Node的顺序,用户需要根据计算图的拓扑结构把计算图变成一个有序队列,同时还要保证tensor与节点之间的关系和计算图保持一致。 +atb::Status CreateGraphOperation(atb::Operation **operation); + +#endif diff --git a/oec-ascend/oec/resource/ApplicationDev/atb/main.cpp b/oec-ascend/oec/resource/ApplicationDev/atb/main.cpp new file mode 100644 index 00000000..3b40f337 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/atb/main.cpp @@ -0,0 +1,57 @@ +#include "model/model.h" +#include "memory/memory_utils.h" +#include +#include "utils/utils.h" + +void ModelExecute(uint32_t deviceId, Model &model) +{ + // 初始化模型,创建需要的context,stream + model.InitResource(deviceId); + + // 创建模型图 + model.CreateModelGraph(); + + // 创建模型输入,并填入值 + model.CreateModelInput(); + + // 创建模型的输出大小 + model.CreateModelOutput(); + + // 模型执行 + model.Execute(); + + // 打印输出Tensor的值 + PrintOutTensorValue(model.modelOutTensors_.at(0)); + + // 资源释放 + model.FreeResource(); +} + +int main() +{ + // AscendCL初始化 + auto ret = aclInit(nullptr); + CHECK_RET(ret, "aclInit failed. ret: " + std::to_string(ret)); + + // 创建内存池 + size_t poolSize = 104857600; // Alloceted memory 100 MiB. + GetMemoryManager().CreateMemoryPool(poolSize); + + // 创建模型图 + uint32_t deviceCount = 0; + CHECK_RET(aclrtGetDeviceCount(&deviceCount), "get devicecount fail"); + std::vector modelArray(deviceCount); + + // 分多个线程进行模型图的下发 + std::vector threadArray(deviceCount); + for (size_t i = 0; i < deviceCount; i++) { + Model &model = modelArray.at(i); + threadArray.at(i) = std::thread([i, &model]{ModelExecute(i, model);}); // 线程创建及函数绑定 + } + for (size_t i = 0; i < deviceCount; i++) { + threadArray.at(i).join(); // 等待子线程结束 + } + + aclFinalize(); + return 0; +} diff --git a/oec-ascend/oec/resource/ApplicationDev/atb/memory/memory_env.h b/oec-ascend/oec/resource/ApplicationDev/atb/memory/memory_env.h new file mode 100644 index 00000000..6b4bafa8 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/atb/memory/memory_env.h @@ -0,0 +1,12 @@ +#ifndef MEMORY_ENV_H +#define MEMORY_ENV_H + +#include + +struct MemoryBlock { + int64_t blockId; // 内存块索引 + size_t blockSize; // 内存块大小 + void *address = nullptr; // 物理内存地址 +}; + +#endif diff --git a/oec-ascend/oec/resource/ApplicationDev/atb/memory/memory_utils.cpp b/oec-ascend/oec/resource/ApplicationDev/atb/memory/memory_utils.cpp new file mode 100644 index 00000000..65c90597 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/atb/memory/memory_utils.cpp @@ -0,0 +1,63 @@ +#include +#include "memory_utils.h" +#include "utils/log.h" +#include "utils/utils.h" + +// 全局MemoryManager实例 +static MemoryManager g_memoryManager; + +MemoryManager::MemoryManager() +{} + +void MemoryManager::CreateMemoryPool(size_t poolSize) +{ + uint32_t deviceCount = 0; + + // 获取全部Device的数量 + CHECK_RET(aclrtGetDeviceCount(&deviceCount), "get devicecount fail"); + for (size_t i = 0; i < deviceCount; i++) { + + // 指定操作的Device + aclrtSetDevice(i); + + // 创建内存池,poolSize参数指定预分配空间大小 + std::shared_ptr memoryPool = std::make_shared(poolSize); + memoryPools_.push_back(memoryPool); + LOG_INFO("create mempool for device " + std::to_string(i) + " success"); + } +} + +int32_t MemoryManager::GetDeviceId() +{ + int32_t deviceId = -1; + CHECK_RET(aclrtGetDevice(&deviceId), "get device ID fail"); + return deviceId; +} + +std::shared_ptr &MemoryManager::GetMemoryPool() +{ + // 获取当前操作的Device,返回对应的内存池 + size_t deviceId = static_cast(GetDeviceId()); + CHECK_RET(deviceId >= memoryPools_.size(), "Invalid device id " + deviceId); + return memoryPools_[deviceId]; +} + +void MemoryManager::AllocateBlock(uint32_t size, int &blockId) +{ + GetMemoryPool()->AllocateBlock(size, blockId); +} + +void MemoryManager::FreeBlock(int blockId) +{ + GetMemoryPool()->FreeBlock(blockId); +} + +void MemoryManager::GetBlockPtr(int blockId, void *&addr) +{ + GetMemoryPool()->GetBlockPtr(blockId, addr); +} + +MemoryManager &GetMemoryManager() +{ + return g_memoryManager; +} diff --git a/oec-ascend/oec/resource/ApplicationDev/atb/memory/memory_utils.h b/oec-ascend/oec/resource/ApplicationDev/atb/memory/memory_utils.h new file mode 100644 index 00000000..d45a0a8f --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/atb/memory/memory_utils.h @@ -0,0 +1,38 @@ +#ifndef MEMORY_UTILS_H +#define MEMORY_UTILS_H + +#include +#include +#include "memorypool.h" + +// 内存管理类,管理每个Device上的内存池 +class MemoryManager { +public: + MemoryManager(); + + // 在每个Device上创建对应的内存池 + void CreateMemoryPool(size_t poolSize); + + // 获取当前线程对应的Device + int32_t GetDeviceId(); + + // 获取当前线程对应设备上的内存池 + std::shared_ptr &GetMemoryPool(); + + // 分配内存块 + void AllocateBlock(uint32_t size, int &blockId); + + // 释放内存块 + void FreeBlock(int blockId); + + // 获取内存块的物理地址 + void GetBlockPtr(int blockId, void *&addr); + +private: + std::vector> memoryPools_; +}; + +// 获取全局MemoryManager实例 +MemoryManager &GetMemoryManager(); + +#endif diff --git a/oec-ascend/oec/resource/ApplicationDev/atb/memory/memorypool.cpp b/oec-ascend/oec/resource/ApplicationDev/atb/memory/memorypool.cpp new file mode 100644 index 00000000..adeab1c1 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/atb/memory/memorypool.cpp @@ -0,0 +1,104 @@ +#include +#include +#include "memorypool.h" +#include "utils/log.h" +#include "utils/utils.h" + +constexpr size_t POOL_SIZE = 104857600; // 预分配内存大小 100 MiB,大小可按实际需求决定 + +MemoryPool::MemoryPool(size_t poolSize = POOL_SIZE) +{ + // 调用acl接口预分配内存 + CHECK_RET(aclrtMalloc(&baseMemPtr_, poolSize, ACL_MEM_MALLOC_HUGE_FIRST), + "malloc huge size memrory " + std::to_string(poolSize) + " bytes fail"); + curMemPtr_ = baseMemPtr_; + remainSize_ = poolSize; +} + +MemoryPool::~MemoryPool() +{ + // 销毁时通过acl接口释放内存 + if (baseMemPtr_ != nullptr) { + CHECK_RET(aclrtFree(baseMemPtr_), "free huge memory fail"); + } + LOG_INFO("release MemoryPool success"); +} + +uint64_t MemoryPool::GenerateBlocksId() +{ + return static_cast(id_.fetch_add(1, std::memory_order_relaxed)); +} + +void MemoryPool::AllocateBlock(uint32_t size, int &blockId) +{ + std::unique_lock lock(blockMutex_); + + size_t alignSize = ((size + 31) & ~31) + 32; // 分配的空间需要32字节对齐后再加32字节 + + // 寻找是否有足够大小的空闲内存块 + for (auto it = freeBlocks_.begin(); it != freeBlocks_.end(); it++) { + if (it->second.blockSize >= alignSize) { + blockId = it->second.blockId; + usedBlocks_.insert(*it); + freeBlocks_.erase(it); + LOG_INFO("find free block id " + std::to_string(blockId) + " to allocate"); + return; + } + } + + // 没有找到符合的内存块,需要从剩余的内存空间中创建新内存块 + if (remainSize_ > alignSize) { + blockId = GenerateBlocksId(); + uint64_t curMemPtrAlign = (reinterpret_cast(curMemPtr_) + 63) & ~63; // 内存地址需要64字节对齐 + remainSize_ -= (curMemPtrAlign - reinterpret_cast(curMemPtr_)); + curMemPtr_ = reinterpret_cast(curMemPtrAlign); + + MemoryBlock block = {blockId, alignSize, curMemPtr_}; + usedBlocks_.insert({blockId, block}); + remainSize_ -= alignSize; + curMemPtr_ = reinterpret_cast(curMemPtr_) + alignSize; + LOG_INFO("allocate block id " + std::to_string(blockId) + " for size " + std::to_string(alignSize)); + return; + } + + // 剩余的内存空间不足,无法分配内存块 + LOG_ERROR("allocate block fail"); +} + +void MemoryPool::FreeBlock(int blockId) +{ + std::unique_lock lock(blockMutex_); + + // 内存块索引合法性校验 + if (blockId < 0) { + LOG_INFO("skip over the invalid block id " + std::to_string(blockId)); + return; + } + + // 将需要释放的内存块标记为空闲 + auto it = usedBlocks_.find(blockId); + if (it != usedBlocks_.end()) { + freeBlocks_.insert(*it); + usedBlocks_.erase(it); + } else { + LOG_ERROR("Double free block id " + std::to_string(blockId)); + } +} + +void MemoryPool::GetBlockPtr(int blockId, void *&addr) +{ + std::unique_lock lock(blockMutex_); + + if (blockId < 0) { + LOG_INFO("Invalid block id " + std::to_string(blockId) + "to get ptr"); + return; + } + + // 寻找内存块,返回物理内存地址 + auto it = usedBlocks_.find(blockId); + if (it != usedBlocks_.end()) { + addr = it->second.address; + } else { + LOG_ERROR("Get block address error, block id " + std::to_string(blockId)); + } +} diff --git a/oec-ascend/oec/resource/ApplicationDev/atb/memory/memorypool.h b/oec-ascend/oec/resource/ApplicationDev/atb/memory/memorypool.h new file mode 100644 index 00000000..9ffcd662 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/atb/memory/memorypool.h @@ -0,0 +1,38 @@ +#ifndef MEMORYPOOL_H +#define MEMORYPOOL_H + +#include +#include +#include +#include +#include "memory_env.h" + +// Device内存池 +class MemoryPool { +public: + explicit MemoryPool(size_t poolSize); + ~MemoryPool(); + + // 分配内存块 + void AllocateBlock(uint32_t size, int &blockId); + + // 释放内存块 + void FreeBlock(int blockId); + + // 获取内存块的物理地址 + void GetBlockPtr(int blockId, void *&addr); + +private: + // 生成内存块索引 + uint64_t GenerateBlocksId(); + + std::atomic id_ = 0; + std::mutex blockMutex_; + void *baseMemPtr_ = nullptr; + void *curMemPtr_ = nullptr; + int64_t remainSize_ = 0; + std::unordered_map freeBlocks_; + std::unordered_map usedBlocks_; +}; + +#endif \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/atb/model/model.cpp b/oec-ascend/oec/resource/ApplicationDev/atb/model/model.cpp new file mode 100644 index 00000000..508b9d15 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/atb/model/model.cpp @@ -0,0 +1,263 @@ +#define USE_MEMPOOL + +#include "model/model.h" +#include "aclnn/aclnn_gelu_operation.h" +#include "utils/utils.h" +#include "atb/atb_graph_op.h" +#include "memory/memory_utils.h" + +void Model::InitResource(uint32_t deviceId) +{ + // 配置deviceId + deviceId_ = deviceId; + auto ret = aclrtSetDevice(deviceId_); + CHECK_RET(ret, "aclrtSetDevice failed. ret: " + std::to_string(ret)); + + // 创建context + ret = atb::CreateContext(&modeContext_); + CHECK_RET(ret, "ATB CreateContext failed. ret: " + std::to_string(ret)); + + // 创建stream + ret = aclrtCreateStream(&modelStream_); + CHECK_RET(ret, "aclrtCreateStream failed. ret: " + std::to_string(ret)); + + // 配置stream + modeContext_->SetExecuteStream(modelStream_); +} + +void Model::CreateModelGraph() +{ + LOG_INFO("CreateModelGraph start"); + // 这里以模型中有2个节点参与演示 + nodes_.resize(2); + for (size_t i = 0; i < nodes_.size(); i++) { + auto node = Node(); + nodes_[i] = node; + } + + modelInTensors_.resize(Mode_INPUT_SIZE); + modelOutTensors_.resize(Mode_OUTPUT_SIZE); + + internalTensors_.resize(1); + size_t nodeId = 0; + CreateGraphOpLayer(nodeId++); + + // step2:创建aclnn算子的Node + CreateAclnnOpLayer(nodeId); + LOG_INFO("CreateModelGraph end"); +} + +void Model::CreateGraphOpLayer(size_t nodeId) +{ + // 创建图算子的opreation + Node &graph_node = nodes_[nodeId]; + auto ret = CreateGraphOperation(&graph_node.operation_); + CHECK_RET(ret, "CreateGraphOperation failed"); + graph_node.inTensors_.resize(graph_node.operation_->GetInputNum()); + + // 设置图算子node节点的输入 + // 因为图算子的输入就是整个model的输入,因此这里直接从model的inTensors_赋值 + size_t layerInTensorId = 0; + graph_node.inTensors_.at(layerInTensorId++) = &modelInTensors_.at(IN_TENSOR_A); + graph_node.inTensors_.at(layerInTensorId++) = &modelInTensors_.at(IN_TENSOR_B); + graph_node.inTensors_.at(layerInTensorId++) = &modelInTensors_.at(IN_TENSOR_C); + graph_node.inTensors_.at(layerInTensorId++) = &modelInTensors_.at(IN_TENSOR_D); + + // 设置图算子node节点的输出,因为只有一个中间节点 + graph_node.outTensors_ = {&internalTensors_.at(0)}; + graph_node.outTensorTypes_ = {TensorType::INTERNAL_TENSOR}; +}; + +void Model::CreateAclnnOpLayer(size_t nodeId) +{ + // 创建aclnn算子的opreation + Node &aclnn_node = nodes_[nodeId]; + AclnnGeluParam AclnnGeluParam; + AclnnGeluParam.geluApproximate = -1; + aclnn_node.operation_ = new GeluOperation("Gelu", AclnnGeluParam); + aclnn_node.inTensors_.resize(aclnn_node.operation_->GetInputNum()); + + // 设置aclnn算子node节点的输入 + // 因为图算子的输出就是aclnn算子的输入, + size_t layerInTensorId = 0; + aclnn_node.inTensors_.at(layerInTensorId++) = &internalTensors_.at(0); + + // 设置aclnn算子node节点的输出,model的输出 + aclnn_node.outTensors_ = {&modelOutTensors_.at(GLUE_OUT)}; + aclnn_node.outTensorTypes_ = {TensorType::NOT_INTERNAL_TENSOR}; +} + +void Model::CreateModelInput() +{ + LOG_INFO("CreateModelInput start"); + atb::SVector intensorDescs; + intensorDescs.resize(Mode_INPUT_SIZE); + CreateInTensorDescs(intensorDescs); + CreateInTensors(modelInTensors_, intensorDescs); + LOG_INFO("CreateModelInput end"); +} + +void Model::CreateModelOutput() +{ + LOG_INFO("CreateModelOutput start"); + atb::SVector outtensorDescs; + outtensorDescs.resize(Mode_OUTPUT_SIZE); + + // 设置输入的input desc + atb::SVector inTensorDescs; + inTensorDescs.resize(Mode_INPUT_SIZE); + for (size_t i = 0; i < modelInTensors_.size(); ++i) { + inTensorDescs.at(i) = modelInTensors_.at(i).desc; + } + + // 调用infer shape,推导出模型的输出 + InferShape(inTensorDescs, outtensorDescs); + CreateOutTensors(modelOutTensors_, outtensorDescs); + LOG_INFO("CreateModelOutput end"); +} + +atb::Status Model::InferShape( + const atb::SVector &inTensorDescs, atb::SVector &outTensorDescs) +{ + // 输出的shape和输入是相同的。取第一个的输入即可 + outTensorDescs.at(0) = modelInTensors_.at(0).desc; + return atb::NO_ERROR; +} + +void Model::Execute() +{ + LOG_INFO(modelName_ + " Execute start"); + for (size_t nodeId = 0; nodeId < nodes_.size(); ++nodeId) { + BuildNodeVariantPack(nodeId); + atb::Status status = ExecuteNode(nodeId); + CHECK_RET(status, "ExecuteNode " + std::to_string(nodeId) + " failed. status: " + std::to_string(status)); + } + + WaitFinish(); + LOG_INFO(modelName_ + " Execute end"); +} + +void Model::BuildNodeVariantPack(int nodeId) +{ + LOG_INFO("buildNodeVariantPack nodes[" + std::to_string(nodeId) + "] start"); + + auto &node = nodes_.at(nodeId); + atb::SVector inTensorDescs; + node.variantPack_.inTensors.resize(node.operation_->GetInputNum()); + inTensorDescs.resize(node.operation_->GetInputNum()); + + // 获取node中operation_的输入tensor desc + for (size_t i = 0; i < node.inTensors_.size(); ++i) { + node.variantPack_.inTensors.at(i) = *node.inTensors_.at(i); + inTensorDescs.at(i) = node.inTensors_.at(i)->desc; + } + + atb::SVector outTensorDescs; + outTensorDescs.resize(node.operation_->GetOutputNum()); + + // 调用operation_的InferShape,推导出out tensor的desc + atb::Status st = node.operation_->InferShape(inTensorDescs, outTensorDescs); + + node.variantPack_.outTensors.resize(node.operation_->GetOutputNum()); + for (size_t i = 0; i < node.outTensors_.size(); ++i) { + node.variantPack_.outTensors.at(i) = *node.outTensors_.at(i); + if (node.outTensorTypes_.at(i) == TensorType::INTERNAL_TENSOR) { + // 创建输出tensor的空间 + CreateTensorFromDesc(node.variantPack_.outTensors.at(i), outTensorDescs.at(i)); + *node.outTensors_.at(i) = node.variantPack_.outTensors.at(i); + } + } + LOG_INFO("buildNodeVariantPack nodes[" + std::to_string(nodeId) + "] end"); +} + +atb::Status Model::ExecuteNode(int nodeId) +{ + auto &node = nodes_.at(nodeId); + + // 调用Setup接口 + uint64_t workspaceSize = 0; + atb::Status status = node.operation_->Setup(node.variantPack_, workspaceSize, modeContext_); + CHECK_RET(status, "Setup node " + std::to_string(nodeId) + " failed. status: " + std::to_string(status)); + + LOG_INFO("Get node[" + std::to_string(nodeId) + "] workspace size:" + std::to_string(workspaceSize)); + + // 分配workspace +#ifdef USE_MEMPOOL + CreateWorkspaceBuffer(nodeId, workspaceSize); +#else + if (workspaceSize != 0) { + status = aclrtMalloc(&node.workspace_, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(status, "alloc error!"); + } +#endif + + // 调用Execute接口 + LOG_INFO("Execute node[" + std::to_string(nodeId) + "] start"); + status = node.operation_->Execute(node.variantPack_, (uint8_t *)(node.workspace_), workspaceSize, modeContext_); + CHECK_RET(status, "Execute node " + std::to_string(nodeId) + " failed. status: " + std::to_string(status)); + LOG_INFO("Execute node[" + std::to_string(nodeId) + "] end"); + return atb::NO_ERROR; +} + +void Model::CreateWorkspaceBuffer(int nodeId, int workspaceSizeNeeded) +{ + auto &node = nodes_.at(nodeId); + if (workspaceSizeNeeded == 0) { + LOG_INFO("skip the workspacebuffer for size 0"); + return; + } + if (node.workspaceBlockId_ == -1 || node.workspaceSize_ == 0) { + node.workspaceSize_ = workspaceSizeNeeded; + GetMemoryManager().AllocateBlock(node.workspaceSize_, node.workspaceBlockId_); + } + if (node.workspaceSize_ < workspaceSizeNeeded) { + GetMemoryManager().FreeBlock(node.workspaceBlockId_); + GetMemoryManager().AllocateBlock(workspaceSizeNeeded, node.workspaceBlockId_); + node.workspaceSize_ = workspaceSizeNeeded; + } + + GetMemoryManager().GetBlockPtr(node.workspaceBlockId_, node.workspace_); +} + +void Model::FreeResource() +{ + LOG_INFO("FreeResource start"); + auto status = aclrtDestroyStream(modelStream_); // 销毁stream + CHECK_RET(status, "aclrtDestroyStream failed"); + + // 释放operation + for (auto &node : nodes_) { + atb::DestroyOperation(node.operation_); +#ifdef USE_MEMPOOL + GetMemoryManager().FreeBlock(node.workspaceBlockId_); +#endif + } + // 销毁context + status = atb::DestroyContext(modeContext_); + CHECK_RET(status, "aclrtDestroyStream failed"); + // 销毁输入tensor + for (size_t i = 0; i < modelInTensors_.size(); i++) { + aclrtFree(modelInTensors_.at(i).deviceData); + } + + // 销毁输出tensor + for (size_t i = 0; i < modelOutTensors_.size(); i++) { + aclrtFree(modelOutTensors_.at(i).deviceData); + } + + // 释放中间tensor + for (size_t i = 0; i < internalTensors_.size(); i++) { + aclrtFree(internalTensors_.at(i).deviceData); + } + + aclrtResetDevice(deviceId_); // 重置deviceId + LOG_INFO("FreeResource end"); +} + +void Model::WaitFinish() +{ + // step9:销毁创建的对象,释放内存 + // 流同步,作用是等待device侧任务计算完成 + auto ret = aclrtSynchronizeStream(modelStream_); + CHECK_RET(ret, "sync error!"); +} diff --git a/oec-ascend/oec/resource/ApplicationDev/atb/model/model.h b/oec-ascend/oec/resource/ApplicationDev/atb/model/model.h new file mode 100644 index 00000000..f00419b4 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/atb/model/model.h @@ -0,0 +1,122 @@ +#ifndef MODEL_H +#define MODEL_H + +#include +#include +#include +#include +#include +#include "atb/infer_op_params.h" +#include "utils/log.h" + +enum class TensorType +{ + INTERNAL_TENSOR = 0, + NOT_INTERNAL_TENSOR, +}; + +// 图节点,每个Node表示一个Operation或者GraphOperation +struct Node +{ + // Node对应的operation或者graphOperation。 + atb::Operation *operation_ = nullptr; + + // Node的输入tensors + atb::SVector inTensors_{}; + + // Node的输出tensors + atb::SVector outTensors_{}; + + // Node的输出是中间tensor类型 + atb::SVector outTensorTypes_{}; + + atb::VariantPack variantPack_{}; + + uint64_t workspaceSize_ = 0; + int workspaceBlockId_ = -1; + void *workspace_ = nullptr; +}; + +// 所有的Node组成一个完整的图。 +class Model +{ +public: + // 描述该模型的输入 + enum InTensorId : int + { // 定义各TensorID + IN_TENSOR_A = 0, + IN_TENSOR_B, + IN_TENSOR_C, + IN_TENSOR_D, + Mode_INPUT_SIZE, + }; + + enum OutTensorId : int + { + GLUE_OUT = 0, + Mode_OUTPUT_SIZE, + }; + + explicit Model(std::string &&modelName = "") : modelName_(std::move(modelName)) + { + LOG_INFO("Create model: " + modelName_); + } + + // 模型初始化,设置模型的 + void InitResource(uint32_t deviceId); + + // 创建模型图 + void CreateModelGraph(); + + // 创建模型的输入tensors + void CreateModelInput(); + + // 创建模型的输入tensors + void CreateModelOutput(); + + // modle执行 + void Execute(); + + // stream流同步 + void WaitFinish(); + + // 资源释放 + void FreeResource(); + + // 模型的输入tensors + atb::SVector modelInTensors_; + + // 模型的输出tensors + atb::SVector modelOutTensors_; + +private: + // 创建图算子的opreation + void CreateGraphOpLayer(size_t nodeId); + + // 创建aclnn算子的opreation + void CreateAclnnOpLayer(size_t nodeId); + + // 构造对应nodeId的node的VariantPack + void BuildNodeVariantPack(int nodeId); + + // 下发nodeId对应的Operation + atb::Status ExecuteNode(int nodeId); + + // workspace创建函数 + void CreateWorkspaceBuffer(int nodeId, int workspaceSizeNeeded); + + // 模型图的shape推导函数 + atb::Status InferShape( + const atb::SVector &inTensorDescs, atb::SVector &outTensorDescs); + + std::string modelName_; + uint32_t deviceId_ = 1; + atb::Context *modeContext_ = nullptr; + aclrtStream modelStream_ = nullptr; + std::vector nodes_; + + // 模型的中间tensors,layer之间以internalTensors进行连接,这里要注意顺序 + std::vector internalTensors_; +}; + +#endif diff --git a/oec-ascend/oec/resource/ApplicationDev/atb/run.sh b/oec-ascend/oec/resource/ApplicationDev/atb/run.sh new file mode 100755 index 00000000..0d5ad5d7 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/atb/run.sh @@ -0,0 +1,33 @@ +cann_install_path=$1 +output_path=$2 +src_path=$(pwd) +source "${cann_install_path}/ascend-toolkit/set_env.sh" +source "${cann_install_path}/nnal/atb/set_env.sh" + +function compile_model() { + mkdir -p "${output_path}/atb_mash_up_graph"; + cd "${output_path}/atb_mash_up_graph"; + CXX11_ABI=$(env | awk -F'[=]' '/ATB_HOME_PATH/ {last=$2} END{print last}' | grep -oP 'cxx_abi_(\d)' | grep -oP '\d') + CXX11_ABI=$(test "$CXX11_ABI" -eq 1 && echo "ON" || echo "OFF") + echo "USE_CXX11_ABI=${CXX11_ABI}" + cmake "${src_path}" -DUSE_CXX11_ABI="${CXX11_ABI}"; + if [ $? -ne 0 ]; then + echo "ERROR: generate makefile failed!" + exit 1 + fi + + cmake --build . -j; + if [ $? -ne 0 ]; then + echo "ERROR: compile test failed!" + exit 1 + else + echo "INFO: compile test succeed!" + fi + cd -; + +} + +compile_model +cd "${output_path}/atb_mash_up_graph" +./test_model +exit $? diff --git a/oec-ascend/oec/resource/ApplicationDev/atb/utils/log.cpp b/oec-ascend/oec/resource/ApplicationDev/atb/utils/log.cpp new file mode 100644 index 00000000..7a7ba9be --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/atb/utils/log.cpp @@ -0,0 +1,28 @@ +#include "utils/log.h" + +const char *logLevelToString(LogLevel level) +{ + switch (level) + { + case LogLevel::DEBUG: + return "DEBUG"; + case LogLevel::INFO: + return "INFO"; + case LogLevel::WARNING: + return "WARNING"; + case LogLevel::ERROR: + return "ERROR"; + default: + return "UNKNOWN"; + } +} + +std::string getCurrentTime() +{ + auto now = std::chrono::system_clock::now(); + auto in_time_t = std::chrono::system_clock::to_time_t(now); + + std::stringstream ss; + ss << std::put_time(std::localtime(&in_time_t), "%Y-%m-%d %X"); + return ss.str(); +} diff --git a/oec-ascend/oec/resource/ApplicationDev/atb/utils/log.h b/oec-ascend/oec/resource/ApplicationDev/atb/utils/log.h new file mode 100644 index 00000000..06f38c01 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/atb/utils/log.h @@ -0,0 +1,94 @@ +#ifndef LOG_H +#define LOG_H + +#include +#include +#include +#include +#include +#include +#include + +// 定义日志级别 +enum class LogLevel +{ + DEBUG, + INFO, + WARNING, + ERROR +}; + +// 将日志级别转换为字符串 +const char *logLevelToString(LogLevel level); + +// 获取当前时间的字符串表示 +std::string getCurrentTime(); + +// 日志类 +class Logger +{ +public: + // 构造函数 + Logger(const std::string &filename, LogLevel minLevel = LogLevel::INFO) : minLogLevel(minLevel) + { + logFile.open(filename, std::ios::out | std::ios::app); + if (!logFile.is_open()) + { + std::cerr << "Failed to open log file: " << filename << std::endl; + } + } + + // 析构函数 + ~Logger() + { + if (logFile.is_open()) + { + logFile.close(); + } + } + + // 设置最小日志级别 + void setMinLogLevel(LogLevel level) + { + minLogLevel = level; + } + + // 打印日志 + template + void log(LogLevel level, const char *file, int line, const char *format, Args... args) + { + std::lock_guard lock(mutex); + if (level >= minLogLevel) + { + std::stringstream ss; + ss << "[" << getCurrentTime() << "] [" << logLevelToString(level) << "] [" << file << ":" << line << "] "; + (ss << ... << args); + + std::string logMessage = ss.str(); + std::cout << logMessage << std::endl; + if (logFile.is_open()) + { + logFile << logMessage << std::endl; + } + } + } + +private: + std::ofstream logFile; + LogLevel minLogLevel; + std::mutex mutex; +}; + +// 全局 logger 对象 +static Logger g_logger("app.log", LogLevel::DEBUG); + +// 辅助宏,用于处理可变参数列表 +#define LOG_HELPER(level, ...) g_logger.log(level, __FILE__, __LINE__, "%s", ##__VA_ARGS__) + +// 使用宏定义简化日志调用 +#define LOG_DEBUG(...) LOG_HELPER(LogLevel::DEBUG, ##__VA_ARGS__) +#define LOG_INFO(...) LOG_HELPER(LogLevel::INFO, ##__VA_ARGS__) +#define LOG_WARNING(...) LOG_HELPER(LogLevel::WARNING, ##__VA_ARGS__) +#define LOG_ERROR(...) LOG_HELPER(LogLevel::ERROR, ##__VA_ARGS__) + +#endif diff --git a/oec-ascend/oec/resource/ApplicationDev/atb/utils/utils.cpp b/oec-ascend/oec/resource/ApplicationDev/atb/utils/utils.cpp new file mode 100644 index 00000000..431aa84f --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/atb/utils/utils.cpp @@ -0,0 +1,70 @@ +#include "utils/log.h" +#include "utils/utils.h" + +void CreateInTensorDescs(atb::SVector &intensorDescs) +{ + for (size_t i = 0; i < intensorDescs.size(); i++) + { + intensorDescs.at(i).dtype = ACL_FLOAT16; + intensorDescs.at(i).format = ACL_FORMAT_ND; + intensorDescs.at(i).shape.dimNum = 2; + intensorDescs.at(i).shape.dims[0] = 2; + intensorDescs.at(i).shape.dims[1] = 2; + } +} + +void CreateInTensors(atb::SVector &inTensors, atb::SVector &intensorDescs) +{ + for (size_t i = 0; i < inTensors.size(); i++) + { + inTensors.at(i).desc = intensorDescs.at(i); + inTensors.at(i).dataSize = atb::Utils::GetTensorSize(inTensors.at(i)); + std::vector hostData(atb::Utils::GetTensorNumel(inTensors.at(i)), 2); // 一段全2的hostBuffer + int ret = aclrtMalloc( + &inTensors.at(i).deviceData, inTensors.at(i).dataSize, ACL_MEM_MALLOC_HUGE_FIRST); // 分配NPU内存 + CHECK_RET(ret, "alloc error!"); + + ret = aclrtMemcpy(inTensors.at(i).deviceData, + inTensors.at(i).dataSize, + hostData.data(), + hostData.size() * sizeof(uint16_t), + ACL_MEMCPY_HOST_TO_DEVICE); // 拷贝CPU内存到NPU侧 + CHECK_RET(ret, "aclrtMemcpy error!"); + } +} + +void CreateOutTensors(atb::SVector &outTensors, atb::SVector &outtensorDescs) +{ + for (size_t i = 0; i < outTensors.size(); i++) + { + outTensors.at(i).desc = outtensorDescs.at(i); + outTensors.at(i).dataSize = atb::Utils::GetTensorSize(outTensors.at(i)); + int ret = aclrtMalloc(&outTensors.at(i).deviceData, outTensors.at(i).dataSize, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret, "aclrtMalloc error!"); + } +} + +void CreateTensorFromDesc(atb::Tensor &tensor, atb::TensorDesc &tensorDescs) +{ + tensor.desc = tensorDescs; + tensor.dataSize = atb::Utils::GetTensorSize(tensor); + int ret = aclrtMalloc(&tensor.deviceData, tensor.dataSize, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret, "aclrtMalloc error!"); +} + +void PrintOutTensorValue(atb::Tensor &outTensor) +{ + // 输出tensor拷贝回host侧并打印 + std::vector outBuffer(atb::Utils::GetTensorNumel(outTensor)); + int ret = aclrtMemcpy(outBuffer.data(), + outBuffer.size() * sizeof(uint16_t), + outTensor.deviceData, + outTensor.dataSize, + ACL_MEMCPY_DEVICE_TO_HOST); + CHECK_RET(ret, "copy error!"); + + for (size_t i = 0; i < outBuffer.size(); i = i + 1) + { + LOG_INFO("out[" + std::to_string(i) + "] = " + std::to_string((uint32_t)outBuffer.at(i))); + } +} diff --git a/oec-ascend/oec/resource/ApplicationDev/atb/utils/utils.h b/oec-ascend/oec/resource/ApplicationDev/atb/utils/utils.h new file mode 100644 index 00000000..411f5be1 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/atb/utils/utils.h @@ -0,0 +1,39 @@ +#ifndef UTILS_H +#define UTILS_H + +#include +#include +#include +#include +#include +#include "atb/infer_op_params.h" +#include "utils/log.h" + +#define CHECK_RET(cond, str) \ + do \ + { \ + if (cond) \ + { \ + LOG_ERROR(str); \ + exit(0); \ + } \ + } while (0) + +// 设置各个intensor的属性 +void CreateInTensorDescs(atb::SVector &intensorDescs); + +// 设置各个输入tensor并且为各个输入tensor分配内存空间,此处的输入tensor为手动设置,工程实现上可以使用torchTensor转换或者其他简单数据结构转换的方式 +void CreateInTensors(atb::SVector &inTensors, atb::SVector &intensorDescs); + +// 设置各个outtensor并且为outtensor分配内存空间,同输入tensor设置 +void CreateOutTensors(atb::SVector &outTensors, atb::SVector &outtensorDescs); + +void CreateTensorFromDesc(atb::Tensor &tensor, atb::TensorDesc &tensorDescs); + +// 输出打印 +void PrintOutTensorValue(atb::Tensor &outTensor); + +// 创建图算子 +atb::Status CreateGraphOperation(atb::Operation **operation); + +#endif diff --git a/oec-ascend/oec/resource/ApplicationDev/base_function_test/ascend_test_pyacl.py b/oec-ascend/oec/resource/ApplicationDev/base_function_test/ascend_test_pyacl.py new file mode 100644 index 00000000..cecd8963 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/base_function_test/ascend_test_pyacl.py @@ -0,0 +1,16 @@ +#encoding: utf-8 +import oec + +oec.TestCase( + group= ("应用开发","基础功能"), + name = "PYACL_DEVICE", + cmd = f"python3 ./test_acl_device.py", + exclude=['failed',"ERROR","Error", "FAIL"], + include="OK" + ) + +oec.TestCase( + group= ("应用开发","基础功能"), + name = "PYACL_EVENT", + cmd = f"python3 ./test_acl_event.py" + ) \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/base_function_test/test_acl_device.py b/oec-ascend/oec/resource/ApplicationDev/base_function_test/test_acl_device.py new file mode 100644 index 00000000..305ce20a --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/base_function_test/test_acl_device.py @@ -0,0 +1,133 @@ +# encoding: utf-8 +# 版权所有 (C) 华为技术有限公司 2022-2023 +import unittest +import logging + + +import utils as util +import acl + +ACL_DEVICE = 0 +ACL_HOST = 1 +ACL_RT_OVERFLOW_MODE_SATURATION = 0 +ACL_RT_OVERFLOW_MODE_INFNAN = 1 +ACL_RT_OVERFLOW_MODEL_UNDEF=2 + + +class TestDevice(unittest.TestCase): + + @classmethod + def tearDownClass(cls): + #after all test + pass + + @classmethod + def setUpClass(cls): + # before all test + pass + + def setUp(self): + # before one test + pass + + def tearDown(self): + # after one test + pass + + def test_device_001_normal(self): + """ + test case for setting and restting device + 1. set device 0 + 2. get and check device id + 3. reset device 0 + """ + ret = acl.rt.set_device(0) + self.assertEqual(ret, 0) + d, ret = acl.rt.get_device() + self.assertEqual(d, 0) + self.assertEqual(ret, 0) + ret = acl.rt.reset_device(0) + self.assertEqual(ret, 0) + + + def test_device_007_get_device_utilization_rate(self): + """ + 获取device的cube, aicpu, vector core单元的使用率 + 1、获取环境上的npu数量 + 2、获取每个npu的使用率 + 3、检查获取到的使用率是否包含所有必要的字段 + """ + n, ret =acl.rt.get_device_count() + self.assertEqual(ret, 0) + tmp = { + 'cube_utilization': 0, + 'vector_utilization': 0, + 'aicpu_utilization': 0, + 'memory_utilization': 0, + 'utilization_extend': 0 + } + for i in range(n): + rst, ret = acl.rt.get_device_utilization_rate(i) + self.assertEqual(ret, 0) + for key in tmp: + self.assertIn(key, rst) + + def test_device_009_query_device_status(self): + """ + test device status + 1. get device count + 2. query status for each device + 3. check statis is ok for each device + """ + n, ret = acl.rt.get_device_count() + self.assertEqual(ret, 0) + for i in range(n): + status, ret = acl.rt.query_device_status(i) + self.assertEqual(ret, 0) + self.assertEqual(status, 0) + + def test_device_010_peek_at_last_error(self): + """ + test device peek at last error + 1. make a mistake to rasie error + 2. peek last error + 3. check error is not cleared + """ + ret = acl.rt.set_device(-1) + self.assertNotEqual(ret, 0) + + #测试捕获错误码 + ret = acl.rt.peek_at_last_error(0) + self.assertNotEqual(ret, 0) + + # 测试错误吗没有被清空 + ret = acl.rt.peek_at_last_error(0) + self.assertNotEqual(ret, 0) + + def test_device_011_synchronize_device_with_timeout(self): + """ + test synchronize device with timeout + """ + ret = acl.rt.set_device(ACL_DEVICE) + self.assertEqual(ret, 0) + + ret = acl.rt.synchronize_device_with_timeout(5) + self.assertEqual(ret, 0) + + def test_device_017_reset_device_force(self): + """ + test reset device force + """ + ret = acl.rt.set_device(ACL_DEVICE) + self.assertEqual(ret, 0) + ret = acl.rt.reset_device(ACL_DEVICE) + self.assertEqual(ret, 0) + ret = acl.rt.set_device(ACL_DEVICE) + self.assertEqual(ret, 0) + ret = acl.rt.reset_device_force(ACL_DEVICE) + self.assertEqual(ret, 0) + +if __name__ == "__main__": + suite = util.switch_cases(TestDevice, "all") + unittest.TextTestRunner(verbosity=2).run(suite) + \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/base_function_test/test_acl_event.py b/oec-ascend/oec/resource/ApplicationDev/base_function_test/test_acl_event.py new file mode 100644 index 00000000..8e0eebdc --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/base_function_test/test_acl_event.py @@ -0,0 +1,328 @@ +# encoding: utf-8 +import unittest +import time +import threading +import acl +import utils as util + + + +ACL_EVENT_TIME_LINE = 0x0000008 +ACL_EVENT_RECORD_STATUS_NOT_READY = 0 +ACL_EVENT_RECORD_STATUS_COMPLETE = 1 + +g_callbackRunFlag = True + + +def launch_callback_fun_1(args_list): + for i in range(3): + print("lanuch_callback_fun 1") + print(args_list) + + +def launch_callback_fun_2(args_list): + for i in range(3): + print("lanuch_callback_fun 2") + print(args_list) + + +def callback_thr_func(args_list): + print("[callbacl_thr_func] args = ", args_list[0], args_list[1]) + timeout = args_list[1] + + print("[callback_thr_func] g_callbackRunFlag = ", g_callbackRunFlag, timeout) + + while g_callbackRunFlag is True: + print("[callback_thr_func] g_callbackRunFlag = ", g_callbackRunFlag) + ret = acl.rt.process_report(timeout) + print("[INFO] process_report ret = ", ret) + + print("[INFO] end") + + +class TestEvent(unittest.TestCase): + + def setUp(self): + + pass + + def tearDown(self): + + pass + + @classmethod + def tearDownClass(cls): + ret = acl.finalize() + assert ret == 0 + + @classmethod + def setUpClass(cls): + ret = acl.init() + assert ret == 0 + + def test_event_001_normal(self): + """ + test case for creating and destroying event + :return: + """ + ret = acl.rt.set_device(0) + self.assertEqual(ret, 0) + et, ret = acl.rt.create_event() + self.assertEqual(ret, 0) + ret = acl.rt.destroy_event(et) + self.assertEqual(ret, 0) + + def test_event_006_callback(self): + """ + test case for launching a callback function to do soming + 1.init resource : create_contest create_stream + 2.start a task by starting a thread + the thread triggers callback processing by calling process_report + 3.register the thread tor handle the callback function + 4.launch a callback function + 5.unresgistering a thread + 6.free reasources + :return: + """ + ret = acl.rt.set_device(0) + self.assertEqual(ret, 0) + stream, ret = acl.rt.create_stream() + self.assertEqual(ret, 0) + + timeout = 1000 + global g_callbackRunFlag + g_callbackRunFlag = True + args_list = [g_callbackRunFlag, timeout] + thr_id, ret = acl.util.start_thread(callback_thr_func,args_list) + self.assertEqual(ret,0) + + ret = acl.rt.subscribe_report(thr_id, stream) + self.assertEqual(ret, 0) + + ret = acl.rt.launch_callback(launch_callback_fun_1, ["zzq", "qzz"], 1, stream) + self.assertEqual(ret, 0) + ret = acl.rt.launch_callback(launch_callback_fun_2, ["zzq", "qzz"], 1, stream) + self.assertEqual(ret, 0) + + ret = acl.rt.synchronize_stream(stream) + self.assertEqual(ret, 0) + + ret = acl.rt.subscribe_report(thr_id, 0) + self.assertEqual(ret, 0) + + ret = acl.rt.launch_callback(launch_callback_fun_1, ["zzq", "qzz"], 1, 0) + self.assertEqual(ret, 0) + ret = acl.rt.launch_callback(launch_callback_fun_2, ["zzq", "qzz"], 1, 0) + self.assertEqual(ret, 0) + + ret = acl.rt.synchronize_stream(0) + self.assertEqual(ret, 0) + + g_callbackRunFlag = False + + ret =acl.rt.unsubscribe_report(thr_id, stream) + self.assertEqual(ret, 0) + + ret =acl.rt.unsubscribe_report(thr_id, 0) + self.assertEqual(ret, 0) + + ret =acl.util.stop_thread(thr_id) + self.assertEqual(ret, 0) + + ret = acl.rt.destroy_stream(stream) + self.assertEqual(ret, 0) + ret = acl.rt.reset_device(0) + self.assertEqual(ret, 0) + + def test_event_0008_multi_streams(self): + """ + test case for acl synchroniztion waiting interface with mulit-streams + 1. set device, create stream1, stream2, event1, event2 + 2. record event1 and event2 to the stream1 handle + 3. call stream_waitevent function to block current stream, waiting for the event finished + 4. query the lapsed time between the two event + 5. free resources + :return: + """ + device_id = 0 + + context, ret = acl.rt.create_context(device_id) + stream, ret = acl.rt.create_stream() + self.assertEqual(ret, 0) + stream_2, ret = acl.rt.create_stream() + self.assertEqual(ret, 0) + event_1, ret = acl.rt.create_event() + self.assertEqual(ret, 0) + event_2, ret = acl.rt.create_event() + self.assertEqual(ret, 0) + ret = acl.rt.record_event(event_1, stream) + self.assertEqual(ret, 0) + time.sleep(0.005) + ret = acl.rt.record_event(event_2, stream) + self.assertEqual(ret, 0) + ret = acl.rt.synchronize_event(event_1) + self.assertEqual(ret, 0) + ret = acl.rt.stream_wait_event(stream_2, event_1) + self.assertEqual(ret, 0) + status, ret = acl.rt.query_event_wait_status(event_1) + self.assertEqual(ret, 0) + ret = acl.rt.synchronize_event(event_2) + self.assertEqual(ret, 0) + status, ret = acl.rt.query_event_status(event_2) + self.assertEqual(ret, 0) + self.assertEqual(status, ACL_EVENT_RECORD_STATUS_COMPLETE) + + ret = acl.rt.synchronize_stream(stream) + self.assertEqual(ret, 0) + ms, ret = acl.rt.event_elapsed_time(event_1, event_2) + self.assertEqual(ret, 0) + self.assertLessEqual(ms, 10) + + ret = acl.rt.destroy_event(event_1) + self.assertEqual(ret, 0) + ret = acl.rt.destroy_event(event_2) + self.assertEqual(ret, 0) + ret = acl.rt.destroy_stream(stream) + self.assertEqual(ret, 0) + ret = acl.rt.destroy_stream(stream_2) + self.assertEqual(ret, 0) + ret = acl.rt.destroy_context(context) + self.assertEqual(ret, 0) + + def test_event_009_elapsed_time(self): + """ + test case for acl event elapsed_time + :return: + """ + device_id = 0 + context, ret =acl.rt.create_context(device_id) + self.assertEqual(ret, 0) + stream, ret =acl.rt.create_stream() + self.assertEqual(ret, 0) + event_1, ret =acl.rt.create_event_with_flag(ACL_EVENT_TIME_LINE) + self.assertEqual(ret, 0) + event_2, ret = acl.rt.create_event_with_flag(ACL_EVENT_TIME_LINE) + self.assertEqual(ret, 0) + ret = acl.rt.record_event(event_1, stream) + self.assertEqual(ret, 0) + # sleep 2s to simulate the computational task + time.sleep(2) + ret = acl.rt.record_event(event_2, stream) + self.assertEqual(ret, 0) + ret = acl.rt.synchronize_stream(stream) + self.assertEqual(ret, 0) + ms, ret = acl.rt.event_elapsed_time(event_1,event_2) + self.assertEqual(ret, 0) + print("[INFO] ms = {}".format(ms)) + + ret = acl.rt.destroy_event(event_1) + self.assertEqual(ret, 0) + ret = acl.rt.destroy_event(event_2) + self.assertEqual(ret, 0) + ret = acl.rt.destroy_stream(stream) + self.assertEqual(ret, 0) + ret = acl.rt.destroy_context(context) + self.assertEqual(ret, 0) + + def launch_callback_with_pythreading(self, blocked, sleep_time, blocked_time): + """ + 1、创建回调处理线程 + 2、包装一个符合回调函数格式的sleep函数 + 3、调用launch_callback 组色参数设置为blocked + 4、验证阻塞时间是否为blocked_time + 5、关闭线程 + """ + ret = acl.rt.set_device(0) + self.assertEqual(ret, 0) + stm, ret = acl.rt.create_stream() + self.assertEqual(ret, 0) + thd_flag = True + + def process_report_loop(): + # 回调函数处理线程 + max_time = 15 # 最长运行15s + t = time.perf_counter() + while thd_flag and time.perf_counter() - t < max_time: + # 每200ms重新调用process_report + acl.rt.process_report(200) + + thd = threading.Thread(target=process_report_loop) + thd.start() + + ret = acl.rt.subscribe_report(thd.ident, stm) + self.assertEqual(0, ret) + + def sleep_cbk(t): + for i in t: + time.sleep(i) + + ret = acl.rt.synchronize_stream(stm) + self.assertEqual(ret, 0) + st = time.perf_counter() + # 调用回调函数阻塞stllep_time秒 + ret = acl.rt.launch_callback(sleep_cbk, [sleep_time], blocked, stm) + self.assertEqual(ret, 0) + ret = acl.rt.synchronize_stream(stm) + self.assertAlmostEqual(blocked_time, time.perf_counter() -st, delta=0.01) + self.assertEqual(ret, 0) + + # 将thd_flag设置为 false 关闭回调线程 + thd_flag = False + self.assertEqual(ret, 0) + + thd.join() + + ret = acl.rt.unsubscribe_report(thd.ident, stm) + + self.assertEqual(ret, 0) + ret = acl.rt.destroy_stream(stm) + self.assertEqual(ret, 0) + ret = acl.rt.reset_device(0) + self.assertEqual(ret, 0) + + def test_event_013_launch_callback_with_pythread_blocaked(self): + """ + 测试使用python线程库作为subscribe_report的回调线程,并验证 launch_callback blocked参数设置为1时回调函数是否能够阻塞stream + 阻塞stream场景下blocked_stream 和 stream 相等为2s + """ + self.launch_callback_with_pythreading(1, 2, 2) + + def test_event_017_ex_event(self): + """ + test case for acl event elapsed_time + :return: + """ + device_id = 0 + context, ret =acl.rt.create_context(device_id) + self.assertEqual(ret, 0) + stream, ret =acl.rt.create_stream() + self.assertEqual(ret, 0) + event_1, ret =acl.rt.create_event_ex_with_flag(ACL_EVENT_TIME_LINE) + self.assertEqual(ret, 0) + event_2, ret = acl.rt.create_event_ex_with_flag(ACL_EVENT_TIME_LINE) + self.assertEqual(ret, 0) + ret = acl.rt.record_event(event_1, stream) + self.assertEqual(ret, 0) + # sleep 2s to simulate the computational task + time.sleep(2) + ret = acl.rt.record_event(event_2, stream) + self.assertEqual(ret, 0) + ret = acl.rt.synchronize_stream(stream) + self.assertEqual(ret, 0) + ms, ret = acl.rt.event_elapsed_time(event_1,event_2) + self.assertEqual(ret, 0) + print("[INFO] ms = {}".format(ms)) + + ret = acl.rt.destroy_event(event_1) + self.assertEqual(ret, 0) + ret = acl.rt.destroy_event(event_2) + self.assertEqual(ret, 0) + ret = acl.rt.destroy_stream(stream) + self.assertEqual(ret, 0) + ret = acl.rt.destroy_context(context) + self.assertEqual(ret, 0) + +if __name__ == "__main__": + suite = util.switch_cases(TestEvent, "all") + unittest.TextTestRunner(verbosity=2).run(suite) + \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/base_function_test/utils.py b/oec-ascend/oec/resource/ApplicationDev/base_function_test/utils.py new file mode 100644 index 00000000..54d57ce8 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/base_function_test/utils.py @@ -0,0 +1,40 @@ +import unittest +import numpy as np +import acl + +def get_class_methods(class_name): + method_list = [method.split("_") for method in dir(class_name) if method.startswith("test_")] + method_list = sorted(method_list, key=lambda x: x[2]) + methods = ["_".join(method) for method in method_list] + return methods + +def switch_cases(case_class, opt): + suite = unittest.TestSuite() + methods = get_class_methods(case_class) + + if opt == "all": + for method in methods: + suite.addTest(case_class(method)) + return suite + +def align_size(origin_size, alignment): + if not alignment: + return 0 + return ((origin_size + (alignment - 1)) // alignment) * alignment + +def get_align_size(align_dict, pixel_fotmat, defaule_vale=0, case_value=0): + for key in align_dict.keys(): + if pixel_fotmat in key: + return align_dict.get(key)(defaule_vale, case_value) + return defaule_vale + +def get_device_type(): + device_type = acl.get_soc_name()[len('Ascend'):] + if "P" in device_type: + device_type = device_type[0:4] + else: + device_type = device_type[0:3] + device_type = device_type == "910P" and "910" or device_type + if device_type not in ["310", "310P", "910"]: + raise Exception(f"device_type = {device_type} not in 310/310P/910, npu-smi not found!") + return device_type \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/CMakeLists.txt b/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/CMakeLists.txt new file mode 100644 index 00000000..2e16c7ee --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.5.1) + +# project information +project(ACL_DVPP_RESNET50) + +add_subdirectory("./src") diff --git a/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/ascend_test_jpeg_vpc.py b/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/ascend_test_jpeg_vpc.py new file mode 100644 index 00000000..8718907a --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/ascend_test_jpeg_vpc.py @@ -0,0 +1,21 @@ +#encoding: utf-8 +import oec + +oec.TestCase( + group= ("应用开发","媒体处理"), + name = "ACL_MEDIA_JPEGD_VPC_CROP_PASTE", + cmd = f"bash run.sh 2 {oec.Context.data_path} {oec.Context.output_dir}/tmp/dvpp_jpeg_vpc" + ) + + +oec.TestCase( + group= ("应用开发","媒体处理"), + name = "ACL_MEDIA_JPEGE", + cmd = f"bash run.sh 3 {oec.Context.data_path} {oec.Context.output_dir}/tmp/dvpp_jpeg_vpc" + ) + +oec.TestCase( + group= ("应用开发","媒体处理"), + name = "ACL_MEDIA_JPEG_YUV_VPC_RESIZE", + cmd = f"bash run.sh 4 {oec.Context.data_path} {oec.Context.output_dir}/tmp/dvpp_jpeg_vpc" + ) \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/inc/dvpp_process.h b/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/inc/dvpp_process.h new file mode 100644 index 00000000..ce5ebb7f --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/inc/dvpp_process.h @@ -0,0 +1,164 @@ +/** +* @file dvpp_process.h +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#pragma once +#include +#include "utils.h" +#include "acl/acl.h" +#include "acl/ops/acl_dvpp.h" + +class DvppProcess { +public: + /** + * @brief Constructor + * @param [in] stream: stream + */ + explicit DvppProcess(aclrtStream &stream); + + /** + * @brief Destructor + */ + virtual ~DvppProcess(); + + /** + * @brief dvpp init + * @return result + */ + Result InitResource(); + + /** + * @brief init dvpp output para + * @param [in] modelInputWidth: model input width + * @param [in] modelInputHeight: model input height + * @return result + */ + Result InitDvppOutputPara(int modelInputWidth, int modelInputHeight); + + /** + * @brief set jpegd input + * @param [in] inDevBuffer: device buffer of input pic + * @param [in] inDevBufferSize: device buffer size of input pic + * @param [in] picDesc:picture description + */ + void SetInput4JpegD(char *inDevBuffer, uint32_t inDevBufferSize, const PicDesc &picDesc); + + /** + * @brief set jpege input + * @param [in] inDevBuffer: device buffer of input yuv file + * @param [in] inDevBufferSize: device input pic buffer size after align + * @param [in] inputWidth:width of pic after encode + * @param [in] inputHeight:height of pic after encode + */ + void SetInput4JpegE(char *inDevBuffer, int inDevBufferSize, int inputWidth, int inputHeight); + + /** + * @brief get dvpp output + * @param [in] outputBuffer: pointer which points to dvpp output buffer + * @param [out] outputSize: output size + */ + void GetDvppOutput(void **outputBuffer, int &outputSize); + + /** + * @brief dvpp process + * @return result + */ + Result Process(); + + /** + * @brief set dvpp type after JpegD(vpcResize/vpcCrop/vpcCropAndPaste) + * @return result + */ + void SetDvppType(DvppType dvppType); + + /** + * @brief compute encode input pic desc size + * @return input pic desc size + */ + uint32_t ComputeEncodeInputSize(int inputWidth, int inputHeight); + + /** + * @brief process encode + * @return result + */ + Result ProcessJpegE(); + + /** + * @brief process 8k resize + * @return result + */ + Result Process8kResize(); + +private: + Result InitDecodeOutputDesc(); + Result ProcessDecode(); + void DestroyDecodeResource(); + + Result InitResizeInputDesc(); + Result Init8kResizeInputDesc(); + Result InitResizeOutputDesc(); + Result Init8kResizeOutputDesc(); + Result ProcessResize(); + void DestroyResizeResource(); + + Result InitCropInputDesc(); + Result InitCropOutputDesc(); + Result ProcessCrop(); + void DestroyCropResource(); + + Result InitCropAndPasteInputDesc(); + Result InitCropAndPasteOutputDesc(); + Result ProcessCropAndPaste(); + void DestroyCropAndPasteResource(); + + Result InitEncodeResource(); + void DestroyEncodeResource(); + + void DestroyResource(); + void DestroyDvppOutputPara(); + void DestroyDecodeOutBuff(); + + aclrtStream stream_; + acldvppChannelDesc *dvppChannelDesc_; + + DvppType dvppType_; + acldvppRoiConfig *cropArea_; + acldvppRoiConfig *pasteArea_; + acldvppJpegeConfig *jpegeConfig_; + acldvppResizeConfig *resizeConfig_; + + void* decodeOutBufferDev_; // decode output buffer + acldvppPicDesc *decodeOutputDesc_; //decode output desc + + void* encodeOutBufferDev_; // encode output buffer + uint32_t encodeOutBufferSize_; // encode output buffer size + acldvppPicDesc *encodeInputDesc_; //encode input desc + + acldvppPicDesc *vpcInputDesc_; // vpc input desc + acldvppPicDesc *vpcOutputDesc_; // vpc output desc + + char *inDevBuffer_; // input pic dev buffer + uint32_t inDevBufferSizeD_; // input pic size for decode + uint32_t inDevBufferSizeE_; // input pic size for encode + uint32_t jpegDecodeOutputSize_; // jpeg decode output size + + uint32_t decodeOutputWidth_; // decode output width + uint32_t decodeOutputWidthStride_; // decode output width aligned + uint32_t decodeOutputHeight_; // decode output height + + void *vpcInBufferDev_; // vpc input buffer + void *vpcOutBufferDev_; // vpc output buffer + uint32_t vpcOutBufferSize_; // vpc output size + + uint32_t modelInputWidth_; // model input width + uint32_t modelInputHeight_; // model input height + + uint32_t jpegeInputWidth_; // encode input width + uint32_t jpegeInputHeight_; // encode input height +}; + diff --git a/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/inc/sample_process.h b/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/inc/sample_process.h new file mode 100644 index 00000000..14c42bf6 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/inc/sample_process.h @@ -0,0 +1,63 @@ +/** +* @file sample_process.h +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#pragma once +#include "utils.h" +#include "acl/acl.h" + +class SampleProcess { +public: + /** + * @brief Constructor + */ + SampleProcess(); + + /** + * @brief Destructor + */ + virtual ~SampleProcess(); + + /** + * @brief init reousce + * @return result + */ + Result InitResource(); + + /** + * @brief decode, vpc and infer sample process + * @param [in] dvpptype: dvpp type + * @return result + */ + Result JpegdProcess(DvppType dvpptype); + + /** + * @brief encode sample process + * @param [in] dvpptype: dvpp type + * @return result + */ + Result JpegeProcess(DvppType dvpptype); + + /** + * @brief resize 8k sample process + * @param [in] dvpptype: dvpp type + * @return result + */ + Result Resize8kProcess(DvppType dvpptype); + +private: + /** + * @brief destroy resource + */ + void DestroyResource(); + + int32_t deviceId_; + aclrtContext context_; + aclrtStream stream_; +}; + diff --git a/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/inc/utils.h b/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/inc/utils.h new file mode 100644 index 00000000..375374de --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/inc/utils.h @@ -0,0 +1,128 @@ +/** +* @file utils.h +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#pragma once +#include +#include "acl/acl.h" + +#define INFO_LOG(fmt, ...) fprintf(stdout, "[INFO] " fmt "\n", ##__VA_ARGS__) +#define WARN_LOG(fmt, ...) fprintf(stdout, "[WARN] " fmt "\n", ##__VA_ARGS__) +#define ERROR_LOG(fmt, ...) fprintf(stderr, "[ERROR] " fmt "\n", ##__VA_ARGS__) + +typedef enum Result { + SUCCESS = 0, + FAILED = 1 +} Result; + +typedef enum DvppType { + VPC_RESIZE = 0, + VPC_CROP = 1, + VPC_CROP_AND_PASTE = 2, + JPEG_ENCODE = 3, + VPC_8K_RESIZE = 4 +} DvppType; + +typedef struct PicDesc { + std::string picName; + uint32_t width; + uint32_t height; + uint32_t jpegDecodeSize; +} PicDesc; + +class RunStatus { +public: + static void SetDeviceStatus(bool isDevice) + { + isDevice_ = isDevice; + } + static bool GetDeviceStatus() + { + return isDevice_; + } +private: + RunStatus() = default; + ~RunStatus() = default; + static bool isDevice_; +}; + +class Utils { +public: + /** + * @brief create device buffer of pic + * @param [in] picDesc: pic desc + * @param [out] picDevBuffer: device memory of picture + * @param [out] devPicBufferSize: actual pic size + * @return device buffer of pic + */ + static Result GetPicDevBuffer4JpegD(PicDesc &picDesc, char *&picDevBuffer, uint32_t &devPicBufferSize); + + /** + * @brief create buffer of bin file + * @param [in] fileName: file name + * @param [out] inputBuff: input data buffer + * @param [out] fileSize: actual file szie + * @return buffer of pic + */ + static Result ReadBinFile(const std::string &fileName, void *&inputBuff, uint32_t &fileSize); + + /** + * @brief create device buffer of pic + * @param [in] picDesc: pic desc + * @param [in] PicBufferSize: aligned pic size + * @return device buffer of pic + */ + static void *GetPicDevBuffer(const PicDesc &picDesc, uint32_t &PicBufferSize); + + /** + * @brief pull model output data to file + * @param [in] modelOutput: model output dataset + * @param [in] fileName: file name + * @return result + */ + static Result PullModelOutputData(aclmdlDataset *modelOutput, const char *fileName); + + /** + * @brief save dvpp output data + * @param [in] fileName: file name + * @param [in] devPtr: dvpp output data device addr + * @param [in] dataSize: dvpp output data size + * @return result + */ + static Result SaveDvppOutputData(const char *fileName, void *devPtr, uint32_t dataSize); + + /** + * @brief check file if exist + * @param [in] fileName: file to check + * @return result + */ + static Result CheckFile(const char *fileName); + + /** + * @brief save model output data to dst file + * @param [in] srcfileName: src file name + * @param [in] dstfileName: dst file name + * @return result + */ + static Result SaveModelOutputData(const char *srcfileName, const char *dstfileName); + + /** + * @brief check fold, if not exist, create it + * @param [in] fileName: fold to check + * @return result + */ + static Result CheckAndCreateFolder(const char* foldName); + + /** + * @brief program waiting + * @param [in] wating time: seconds + * @return void + */ + static void SleepTime(unsigned int seconds); +}; + diff --git a/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/run.sh b/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/run.sh new file mode 100755 index 00000000..0b549787 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/run.sh @@ -0,0 +1,11 @@ +#! /bin/bash +src_path=$(pwd) +argv=$1 +data_path=$2 +output_path="$3/dvpp_vdec" + +mkdir -p "${output_path}" +cd "${output_path}" +cmake "${src_path}" +make +./main "${argv}" "${data_path}" "${output_path}" \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/src/CMakeLists.txt b/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/src/CMakeLists.txt new file mode 100644 index 00000000..5a8815ae --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/src/CMakeLists.txt @@ -0,0 +1,49 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.5.1) + +# project information +project(ACL_DVPP_RESNET50) + +# Compile options +add_compile_options(-std=c++11) + +add_definitions(-DENABLE_DVPP_INTERFACE) + +# Specify target generation path +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "../") +set(CMAKE_CXX_FLAGS_DEBUG "-fPIC -O0 -g -Wall") +set(CMAKE_CXX_FLAGS_RELEASE "-fPIC -O2 -Wall") + +set(INC_PATH $ENV{ASCEND_HOME_PATH}) +message(STATUS "env INC_PATH: ${INC_PATH}") +set(LIB_PATH "$ENV{ASCEND_HOME_PATH}/lib64") +message(STATUS "env LIB_PATH: ${LIB_PATH}") + +# Header path +include_directories( + ${INC_PATH}/runtime/include/ + ../inc/ +) + +# add host lib path +link_directories( + ${LIB_PATH} +) + +add_executable(main + utils.cpp + dvpp_process.cpp + sample_process.cpp + main.cpp) + +if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Windows") + target_link_libraries(main + libascendcl libacl_dvpp) +else () + target_link_libraries(main + ascendcl acl_dvpp stdc++) +endif () + +install(TARGETS main DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) diff --git a/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/src/dvpp_process.cpp b/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/src/dvpp_process.cpp new file mode 100644 index 00000000..e255a6a5 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/src/dvpp_process.cpp @@ -0,0 +1,895 @@ +/** +* @file dvpp_process.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include "dvpp_process.h" +#include +#include +#include "acl/acl.h" +#include "utils.h" +using namespace std; + +DvppProcess::DvppProcess(aclrtStream &stream) + : stream_(stream), dvppChannelDesc_(nullptr), dvppType_(VPC_RESIZE), + cropArea_(nullptr), pasteArea_(nullptr), jpegeConfig_(nullptr), resizeConfig_(nullptr), + decodeOutBufferDev_(nullptr), decodeOutputDesc_(nullptr), encodeOutBufferDev_(nullptr), + encodeInputDesc_(nullptr), vpcInputDesc_(nullptr), vpcOutputDesc_(nullptr), inDevBuffer_(nullptr), + inDevBufferSizeD_(0), inDevBufferSizeE_(0), jpegDecodeOutputSize_(0), decodeOutputWidth_(0), + decodeOutputWidthStride_(0), decodeOutputHeight_(0), vpcInBufferDev_(nullptr), vpcOutBufferDev_(nullptr), + vpcOutBufferSize_(0), modelInputWidth_(0), modelInputHeight_(0), jpegeInputWidth_(0), jpegeInputHeight_(0) +{ +} + +DvppProcess::~DvppProcess() +{ + DestroyResource(); + DestroyDvppOutputPara(); +} + +uint32_t AlignSize(uint32_t origSize, uint32_t alignment) +{ + if (alignment == 0) { + return 0; + } + uint32_t alignmentH = alignment - 1; + return (origSize + alignmentH) / alignment * alignment; +} + +void DvppProcess::SetDvppType(DvppType dvppType) +{ + dvppType_ = dvppType; +} + +Result DvppProcess::InitResource() +{ + dvppChannelDesc_ = acldvppCreateChannelDesc(); + if (dvppChannelDesc_ == nullptr) { + ERROR_LOG("acldvppCreateChannelDesc failed"); + return FAILED; + } + + aclError aclRet = acldvppCreateChannel(dvppChannelDesc_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppCreateChannel failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + INFO_LOG("dvpp init resource success"); + return SUCCESS; +} + +void DvppProcess::DestroyResource() +{ + if (dvppChannelDesc_ != nullptr) { + aclError aclRet = acldvppDestroyChannel(dvppChannelDesc_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppDestroyChannel failed, errorCode = %d", static_cast(aclRet)); + } + + (void)acldvppDestroyChannelDesc(dvppChannelDesc_); + dvppChannelDesc_ = nullptr; + } +} + +void DvppProcess::SetInput4JpegD(char *inDevBuffer, uint32_t inDevBufferSize, const PicDesc &picDesc) +{ + inDevBuffer_ = inDevBuffer; + inDevBufferSizeD_ = inDevBufferSize; + jpegDecodeOutputSize_ = picDesc.jpegDecodeSize; +} + +void DvppProcess::GetDvppOutput(void **outputBuffer, int &outputSize) +{ + *outputBuffer = vpcOutBufferDev_; + outputSize = vpcOutBufferSize_; + vpcOutBufferDev_ = nullptr; + vpcOutBufferSize_ = 0; +} + +Result DvppProcess::InitDvppOutputPara(int modelInputWidth, int modelInputHeight) +{ + if ((modelInputWidth <= 0) || (modelInputHeight <= 0)) { + ERROR_LOG("init dvpp output para invalid, modelInputWidth = %d, modelInputHeight = %d", + modelInputWidth, modelInputHeight); + return FAILED; + } + modelInputWidth_ = modelInputWidth; + modelInputHeight_ = modelInputHeight; + return SUCCESS; +} + +void DvppProcess::DestroyDvppOutputPara() +{ + if (vpcOutBufferDev_ != nullptr) { + (void)acldvppFree(vpcOutBufferDev_); + vpcOutBufferDev_ = nullptr; + } +} + +Result DvppProcess::InitDecodeOutputDesc() +{ + aclError aclRet = acldvppMalloc(&decodeOutBufferDev_, jpegDecodeOutputSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppMalloc decodeOutBufferDev_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + decodeOutputDesc_ = acldvppCreatePicDesc(); + if (decodeOutputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc decodeOutputDesc_ failed"); + return FAILED; + } + + acldvppSetPicDescData(decodeOutputDesc_, decodeOutBufferDev_); + acldvppSetPicDescFormat(decodeOutputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + acldvppSetPicDescSize(decodeOutputDesc_, jpegDecodeOutputSize_); + return SUCCESS; +} + +Result DvppProcess::ProcessDecode() +{ + Result ret = InitDecodeOutputDesc(); + if (ret != SUCCESS) { + ERROR_LOG("InitDecodeOutputDesc failed"); + return FAILED; + } + + aclError aclRet = acldvppJpegDecodeAsync(dvppChannelDesc_, reinterpret_cast(inDevBuffer_), + inDevBufferSizeD_, decodeOutputDesc_, stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppJpegDecodeAsync failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = aclrtSynchronizeStream(stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("decode aclrtSynchronizeStream failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + // get yuv image width and height + decodeOutputWidth_ = acldvppGetPicDescWidth(decodeOutputDesc_); + decodeOutputHeight_ = acldvppGetPicDescHeight(decodeOutputDesc_); + decodeOutputWidthStride_ = acldvppGetPicDescWidthStride(decodeOutputDesc_); + + return SUCCESS; +} + +void DvppProcess::DestroyDecodeResource() +{ + if (decodeOutputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(decodeOutputDesc_); + decodeOutputDesc_ = nullptr; + } +} + +Result DvppProcess::InitResizeInputDesc() +{ + uint32_t heightAlignment = 16; + uint32_t sizeAlignment = 3; + uint32_t sizeNum = 2; + uint32_t jpegOutWidthStride = decodeOutputWidthStride_; // 128-byte alignment on 310, 64-byte alignment on 310P + uint32_t jpegOutHeightStride = AlignSize(decodeOutputHeight_, heightAlignment); // 16-byte alignment + if (jpegOutWidthStride == 0 || jpegOutHeightStride == 0) { + ERROR_LOG("InitResizeInputDesc AlignSize failed"); + return FAILED; + } + uint32_t jpegOutBufferSize = jpegOutWidthStride * jpegOutHeightStride * sizeAlignment / sizeNum; + vpcInputDesc_ = acldvppCreatePicDesc(); + if (vpcInputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcInputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcInputDesc_, decodeOutBufferDev_); + (void)acldvppSetPicDescFormat(vpcInputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcInputDesc_, decodeOutputWidth_); + (void)acldvppSetPicDescHeight(vpcInputDesc_, decodeOutputHeight_); + (void)acldvppSetPicDescWidthStride(vpcInputDesc_, jpegOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcInputDesc_, jpegOutHeightStride); + (void)acldvppSetPicDescSize(vpcInputDesc_, jpegOutBufferSize); + return SUCCESS; +} + +Result DvppProcess::InitResizeOutputDesc() +{ + int widthAlignment = 16; + int heightAlignment = 2; + int sizeAlignment = 3; + int sizeNum = 2; + int resizeOutWidth = modelInputWidth_; + int resizeOutHeight = modelInputHeight_; + int resizeOutWidthStride = AlignSize(modelInputWidth_, widthAlignment); + int resizeOutHeightStride = AlignSize(modelInputHeight_, heightAlignment); + if (resizeOutWidthStride == 0 || resizeOutHeightStride == 0) { + ERROR_LOG("InitResizeOutputDesc AlignSize failed"); + return FAILED; + } + vpcOutBufferSize_ = resizeOutWidthStride * resizeOutHeightStride * sizeAlignment / sizeNum; + aclError aclRet = acldvppMalloc(&vpcOutBufferDev_, vpcOutBufferSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppMalloc vpcOutBufferDev_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + vpcOutputDesc_ = acldvppCreatePicDesc(); + if (vpcOutputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcOutputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcOutputDesc_, vpcOutBufferDev_); + (void)acldvppSetPicDescFormat(vpcOutputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcOutputDesc_, resizeOutWidth); + (void)acldvppSetPicDescHeight(vpcOutputDesc_, resizeOutHeight); + (void)acldvppSetPicDescWidthStride(vpcOutputDesc_, resizeOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcOutputDesc_, resizeOutHeightStride); + (void)acldvppSetPicDescSize(vpcOutputDesc_, vpcOutBufferSize_); + return SUCCESS; +} +extern string data_path ,output_path; +Result DvppProcess::Init8kResizeInputDesc() +{ + uint32_t inWidthStride = 8192; // 8k picture width + uint32_t inHeightStride = 8192; // 8k picture height + uint32_t sizeAlignment = 3; + uint32_t sizeNum = 2; + + uint32_t inBufferSize = inWidthStride * inWidthStride * sizeAlignment / sizeNum; + vpcInputDesc_ = acldvppCreatePicDesc(); + if (vpcInputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcInputDesc_ failed"); + return FAILED; + } + std::string dvppImagePath = data_path + "/data/dvpp_vpc_8192x8192_nv12.yuv"; + PicDesc testPic[] = { + { dvppImagePath.c_str(), 8192, 8192} + // other yuv file + }; + vpcInBufferDev_ = Utils::GetPicDevBuffer(testPic[0], inBufferSize); + if (vpcInBufferDev_ == nullptr) { + ERROR_LOG("get picDevBuffer failed, file name = %s", testPic[0].picName.c_str()); + return FAILED; + } + (void)acldvppSetPicDescData(vpcInputDesc_, vpcInBufferDev_); // JpegD -> vpcResize + (void)acldvppSetPicDescFormat(vpcInputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcInputDesc_, inWidthStride); + (void)acldvppSetPicDescHeight(vpcInputDesc_, inHeightStride); + (void)acldvppSetPicDescWidthStride(vpcInputDesc_, inWidthStride); + (void)acldvppSetPicDescHeightStride(vpcInputDesc_, inHeightStride); + (void)acldvppSetPicDescSize(vpcInputDesc_, inBufferSize); + return SUCCESS; +} + +Result DvppProcess::Init8kResizeOutputDesc() +{ + uint32_t resizeOutWidthStride = 4000; // output picture width + uint32_t resizeOutHeightStride = 4000; // output picture height + uint32_t sizeAlignment = 3; + uint32_t sizeNum = 2; + + vpcOutBufferSize_ = resizeOutWidthStride * resizeOutHeightStride * sizeAlignment / sizeNum; + aclError aclRet = acldvppMalloc(&vpcOutBufferDev_, vpcOutBufferSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppMalloc vpcOutBufferDev_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + vpcOutputDesc_ = acldvppCreatePicDesc(); + if (vpcOutputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcOutputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcOutputDesc_, vpcOutBufferDev_); + (void)acldvppSetPicDescFormat(vpcOutputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcOutputDesc_, resizeOutWidthStride); + (void)acldvppSetPicDescHeight(vpcOutputDesc_, resizeOutHeightStride); + (void)acldvppSetPicDescWidthStride(vpcOutputDesc_, resizeOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcOutputDesc_, resizeOutHeightStride); + (void)acldvppSetPicDescSize(vpcOutputDesc_, vpcOutBufferSize_); + return SUCCESS; +} + + +Result DvppProcess::ProcessResize() +{ + resizeConfig_ = acldvppCreateResizeConfig(); + if (resizeConfig_ == nullptr) { + ERROR_LOG("acldvppCreateResizeConfig failed"); + return FAILED; + } + + Result inputRet = SUCCESS; + Result outputRet = SUCCESS; + if (dvppType_ == VPC_RESIZE) { + inputRet = InitResizeInputDesc(); + outputRet = InitResizeOutputDesc(); + } else if (dvppType_ == VPC_8K_RESIZE) { + inputRet = Init8kResizeInputDesc(); + outputRet = Init8kResizeOutputDesc(); + } else { + ERROR_LOG("invalid dvppType_ %d", static_cast(dvppType_)); + return FAILED; + } + if ((inputRet != SUCCESS) || (outputRet != SUCCESS)) { + ERROR_LOG("init resize input or output description failed"); + return FAILED; + } + + // resize pic + aclError aclRet = acldvppVpcResizeAsync(dvppChannelDesc_, vpcInputDesc_, + vpcOutputDesc_, resizeConfig_, stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppVpcResizeAsync failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = aclrtSynchronizeStream(stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("resize aclrtSynchronizeStream failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + return SUCCESS; +} + +void DvppProcess::DestroyResizeResource() +{ + if (resizeConfig_ != nullptr) { + (void)acldvppDestroyResizeConfig(resizeConfig_); + resizeConfig_ = nullptr; + } + + DestroyDecodeOutBuff(); + + if (vpcInputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(vpcInputDesc_); + vpcInputDesc_ = nullptr; + } + + if (vpcOutputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(vpcOutputDesc_); + vpcOutputDesc_ = nullptr; + } + + if (vpcInBufferDev_ != nullptr) { + (void)acldvppFree(vpcInBufferDev_); + vpcInBufferDev_ = nullptr; + } +} + +Result DvppProcess::InitCropInputDesc() +{ + uint32_t heightAlignment = 16; + uint32_t sizeAlignment = 3; + uint32_t sizeNum = 2; + uint32_t jpegOutWidthStride = decodeOutputWidthStride_; // 128-byte alignment on 310, 64-byte alignment on 310P + uint32_t jpegOutHeightStride = AlignSize(decodeOutputHeight_, heightAlignment); // 16-byte alignment + if (jpegOutWidthStride == 0 || jpegOutHeightStride == 0) { + ERROR_LOG("InitCropInputDesc AlignSize failed"); + return FAILED; + } + uint32_t jpegOutBufferSize = jpegOutWidthStride * jpegOutHeightStride * sizeAlignment / sizeNum; + vpcInputDesc_ = acldvppCreatePicDesc(); + if (vpcInputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcInputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcInputDesc_, decodeOutBufferDev_); // JpegD -> vpcCrop + (void)acldvppSetPicDescFormat(vpcInputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcInputDesc_, decodeOutputWidth_); + (void)acldvppSetPicDescHeight(vpcInputDesc_, decodeOutputHeight_); + (void)acldvppSetPicDescWidthStride(vpcInputDesc_, jpegOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcInputDesc_, jpegOutHeightStride); + (void)acldvppSetPicDescSize(vpcInputDesc_, jpegOutBufferSize); + return SUCCESS; +} + +Result DvppProcess::InitCropOutputDesc() +{ + int sizeAlignment = 3; + int sizeNum = 2; + int dvppOutWidth = modelInputWidth_; + int dvppOutHeight = modelInputHeight_; + int dvppOutWidthStride = modelInputWidth_; + int dvppOutHeightStride = modelInputHeight_; + vpcOutBufferSize_ = dvppOutWidthStride * dvppOutHeightStride * sizeAlignment / sizeNum; + aclError aclRet = acldvppMalloc(&vpcOutBufferDev_, vpcOutBufferSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppMalloc vpcOutBufferDev_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + vpcOutputDesc_ = acldvppCreatePicDesc(); + if (vpcOutputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcOutputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcOutputDesc_, vpcOutBufferDev_); + (void)acldvppSetPicDescFormat(vpcOutputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcOutputDesc_, dvppOutWidth); + (void)acldvppSetPicDescHeight(vpcOutputDesc_, dvppOutHeight); + (void)acldvppSetPicDescWidthStride(vpcOutputDesc_, dvppOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcOutputDesc_, dvppOutHeightStride); + (void)acldvppSetPicDescSize(vpcOutputDesc_, vpcOutBufferSize_); + return SUCCESS; +} + +Result DvppProcess::ProcessCrop() +{ + resizeConfig_ = acldvppCreateResizeConfig(); + if (resizeConfig_ == nullptr) { + ERROR_LOG("acldvppCreateResizeConfig failed"); + return FAILED; + } + + uint32_t midNum = 2; + uint32_t oddNum = 1; + uint32_t cropSizeWidth = 200; + uint32_t cropSizeHeight = 200; + uint32_t cropLeftOffset = 550; // must even + uint32_t cropRightOffset = cropLeftOffset + cropSizeWidth - oddNum; // must odd + uint32_t cropTopOffset = 480; // must even + uint32_t cropBottomOffset = cropTopOffset + cropSizeHeight - oddNum; // must odd + cropArea_ = acldvppCreateRoiConfig(cropLeftOffset, cropRightOffset, + cropTopOffset, cropBottomOffset); + if (cropArea_ == nullptr) { + ERROR_LOG("acldvppCreateRoiConfig cropArea_ failed"); + return FAILED; + } + + Result ret = InitCropInputDesc(); + if (ret != SUCCESS) { + ERROR_LOG("InitCropInputDesc failed"); + return FAILED; + } + + ret = InitCropOutputDesc(); + if (ret != SUCCESS) { + ERROR_LOG("InitCropOutputDesc failed"); + return FAILED; + } + + // crop pic + aclError aclRet = acldvppSetResizeConfigInterpolation(resizeConfig_, 0); + aclRet = acldvppVpcCropResizeAsync(dvppChannelDesc_, vpcInputDesc_, + vpcOutputDesc_, cropArea_, resizeConfig_, stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppVpcCropAsync failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = aclrtSynchronizeStream(stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("crop aclrtSynchronizeStream failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + return SUCCESS; +} + +void DvppProcess::DestroyCropResource() +{ + if (cropArea_ != nullptr) { + (void)acldvppDestroyRoiConfig(cropArea_); + cropArea_ = nullptr; + } + + if (resizeConfig_ != nullptr) { + (void)acldvppDestroyResizeConfig(resizeConfig_); + resizeConfig_ = nullptr; + } + + DestroyDecodeOutBuff(); + + if (vpcInputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(vpcInputDesc_); + vpcInputDesc_ = nullptr; + } + + if (vpcOutputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(vpcOutputDesc_); + vpcOutputDesc_ = nullptr; + } +} + +Result DvppProcess::InitCropAndPasteInputDesc() +{ + uint32_t heightAlignment = 16; + uint32_t sizeAlignment = 3; + uint32_t sizeNum = 2; + + uint32_t jpegOutWidthStride = decodeOutputWidthStride_; // 128-byte alignment on 310, 64-byte alignment on 310P + uint32_t jpegOutHeightStride = AlignSize(decodeOutputHeight_, heightAlignment); + if (jpegOutWidthStride == 0 || jpegOutHeightStride == 0) { + ERROR_LOG("InitCropAndPasteInputDesc AlignSize failed"); + return FAILED; + } + uint32_t jpegOutBufferSize = jpegOutWidthStride * jpegOutHeightStride * sizeAlignment / sizeNum; + vpcInputDesc_ = acldvppCreatePicDesc(); + if (vpcInputDesc_ == nullptr) { + ERROR_LOG("InitResizeInputDesc vpcInputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcInputDesc_, decodeOutBufferDev_); // JpegD -> vpcCropAndPaste + (void)acldvppSetPicDescFormat(vpcInputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcInputDesc_, decodeOutputWidth_); + (void)acldvppSetPicDescHeight(vpcInputDesc_, decodeOutputHeight_); + (void)acldvppSetPicDescWidthStride(vpcInputDesc_, jpegOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcInputDesc_, jpegOutHeightStride); + (void)acldvppSetPicDescSize(vpcInputDesc_, jpegOutBufferSize); + return SUCCESS; +} + +Result DvppProcess::InitCropAndPasteOutputDesc() +{ + int dvppOutWidth = modelInputWidth_; + int dvppOutHeight = modelInputHeight_; + int dvppOutWidthStride = modelInputWidth_; + int dvppOutHeightStride = modelInputHeight_; + int sizeAlignment = 3; + int sizeNum = 2; + vpcOutBufferSize_ = + dvppOutWidthStride * dvppOutHeightStride * sizeAlignment / sizeNum; + aclError aclRet = acldvppMalloc(&vpcOutBufferDev_, vpcOutBufferSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppMalloc vpcOutBufferDev_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + vpcOutputDesc_ = acldvppCreatePicDesc(); + if (vpcOutputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcOutputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcOutputDesc_, vpcOutBufferDev_); + (void)acldvppSetPicDescFormat(vpcOutputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcOutputDesc_, dvppOutWidth); + (void)acldvppSetPicDescHeight(vpcOutputDesc_, dvppOutHeight); + (void)acldvppSetPicDescWidthStride(vpcOutputDesc_, dvppOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcOutputDesc_, dvppOutHeightStride); + (void)acldvppSetPicDescSize(vpcOutputDesc_, vpcOutBufferSize_); + return SUCCESS; +} + +Result DvppProcess::ProcessCropAndPaste() +{ + resizeConfig_ = acldvppCreateResizeConfig(); + if (resizeConfig_ == nullptr) { + ERROR_LOG("acldvppCreateResizeConfig failed"); + return FAILED; + } + + uint32_t midNum = 2; + uint32_t oddNum = 1; + uint32_t cropSizeWidth = 200; + uint32_t cropSizeHeight = 200; + uint32_t cropLeftOffset = 512; // must even + uint32_t cropRightOffset = cropLeftOffset + cropSizeWidth - oddNum; // must odd + uint32_t cropTopOffset = 512; // must even + uint32_t cropBottomOffset = cropTopOffset + cropSizeHeight - oddNum; // must odd + cropArea_ = acldvppCreateRoiConfig(cropLeftOffset, cropRightOffset, + cropTopOffset, cropBottomOffset); + if (cropArea_ == nullptr) { + ERROR_LOG("acldvppCreateRoiConfig cropArea_ failed"); + return FAILED; + } + + uint32_t pasteLeftOffset = 16; // must even + uint32_t pasteRightOffset = pasteLeftOffset + cropSizeWidth - oddNum; // must odd + uint32_t pasteTopOffset = 16; // must even + uint32_t pasteBottomOffset = pasteTopOffset + cropSizeHeight - oddNum; // must odd + pasteArea_ = acldvppCreateRoiConfig(pasteLeftOffset, pasteRightOffset, + pasteTopOffset, pasteBottomOffset); + if (pasteArea_ == nullptr) { + ERROR_LOG("acldvppCreateRoiConfig pasteArea_ failed"); + return FAILED; + } + + Result ret = InitCropAndPasteInputDesc(); + if (ret != SUCCESS) { + ERROR_LOG("InitCropAndPasteInputDesc failed"); + return FAILED; + } + + ret = InitCropAndPasteOutputDesc(); + if (ret != SUCCESS) { + ERROR_LOG("InitCropAndPasteOutputDesc failed"); + return FAILED; + } + + // crop and patse pic + aclError aclRet = acldvppSetResizeConfigInterpolation(resizeConfig_, 0); + aclRet = acldvppVpcCropResizePasteAsync(dvppChannelDesc_, vpcInputDesc_, + vpcOutputDesc_, cropArea_, pasteArea_, + resizeConfig_, stream_); + + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppVpcCropAndPasteAsync failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = aclrtSynchronizeStream(stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("crop and paste aclrtSynchronizeStream failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + return SUCCESS; +} + +void DvppProcess::DestroyCropAndPasteResource() +{ + if (cropArea_ != nullptr) { + (void)acldvppDestroyRoiConfig(cropArea_); + cropArea_ = nullptr; + } + + if (pasteArea_ != nullptr) { + (void)acldvppDestroyRoiConfig(pasteArea_); + pasteArea_ = nullptr; + } + + if (resizeConfig_ != nullptr) { + (void)acldvppDestroyResizeConfig(resizeConfig_); + resizeConfig_ = nullptr; + } + + DestroyDecodeOutBuff(); + + if (vpcInputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(vpcInputDesc_); + vpcInputDesc_ = nullptr; + } + + if (vpcOutputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(vpcOutputDesc_); + vpcOutputDesc_ = nullptr; + } +} + +void DvppProcess::SetInput4JpegE(char *inDevBuffer, int inDevBufferSize, int inputWidth, int inputHeight) +{ + inDevBuffer_ = inDevBuffer; + inDevBufferSizeE_ = inDevBufferSize; + jpegeInputWidth_ = inputWidth; + jpegeInputHeight_ = inputHeight; +} + +uint32_t DvppProcess::ComputeEncodeInputSize(int inputWidth, int inputHeight) +{ + uint32_t widthAlignment = 16; + uint32_t heightAlignment = 2; + uint32_t sizeAlignment = 3; + uint32_t sizeNum = 2; + uint32_t encodeInWidthStride = AlignSize(inputWidth, widthAlignment); + uint32_t encodeInHeightStride = AlignSize(inputHeight, heightAlignment); + if (encodeInWidthStride == 0 || encodeInHeightStride == 0) { + ERROR_LOG("ComputeEncodeInputSize AlignSize failed"); + return FAILED; + } + uint32_t encodeInBufferSize = + encodeInWidthStride * encodeInHeightStride * sizeAlignment / sizeNum; + return encodeInBufferSize; +} + +Result DvppProcess::InitEncodeResource() +{ + uint32_t widthAlignment = 16; + uint32_t heightAlignment = 2; + uint32_t encodeInWidthStride = AlignSize(jpegeInputWidth_, widthAlignment); + uint32_t encodeInHeightStride = AlignSize(jpegeInputHeight_, heightAlignment); + if (encodeInWidthStride == 0 || encodeInHeightStride == 0) { + ERROR_LOG("InitEncodeInputDesc AlignSize failed"); + return FAILED; + } + encodeInputDesc_ = acldvppCreatePicDesc(); + if (encodeInputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc encodeInputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(encodeInputDesc_, reinterpret_cast(inDevBuffer_)); + (void)acldvppSetPicDescFormat(encodeInputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(encodeInputDesc_, jpegeInputWidth_); + (void)acldvppSetPicDescHeight(encodeInputDesc_, jpegeInputHeight_); + (void)acldvppSetPicDescWidthStride(encodeInputDesc_, encodeInWidthStride); + (void)acldvppSetPicDescHeightStride(encodeInputDesc_, encodeInHeightStride); + (void)acldvppSetPicDescSize(encodeInputDesc_, inDevBufferSizeE_); + + jpegeConfig_ = acldvppCreateJpegeConfig(); + uint32_t encodeLevel = 100; // default optimal level (0-100) + (void)acldvppSetJpegeConfigLevel(jpegeConfig_, encodeLevel); + + aclError aclRet = acldvppJpegPredictEncSize(encodeInputDesc_, jpegeConfig_, &encodeOutBufferSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("predict encodeOutBufferSize_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = acldvppMalloc(&encodeOutBufferDev_, encodeOutBufferSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("malloc encodeOutBufferDev_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + return SUCCESS; +} + +Result DvppProcess::ProcessJpegE() +{ + std::string encodeOutFileName = output_path + "/result/jpege_output_"; + std::string dvppImagePath = data_path + "/data/wood_rabbit_1024_1068_nv12.yuv"; + PicDesc testPic[] = { + { dvppImagePath.c_str(), 1024, 1068} + // other yuv file + }; + + for (size_t index = 0; index < sizeof(testPic) / sizeof(testPic[0]); ++index) { + INFO_LOG("start to jpege picture %s", testPic[index].picName.c_str()); + + uint32_t jpegInBufferSize; + jpegInBufferSize = ComputeEncodeInputSize(testPic[index].width, testPic[index].height); + + // get input data buffer + char *picDevBuffer = reinterpret_cast(Utils::GetPicDevBuffer(testPic[index], jpegInBufferSize)); + if (picDevBuffer == nullptr) { + ERROR_LOG("get picDevBuffer failed, index is %zu", index); + return FAILED; + } + + // set jpege input data + SetInput4JpegE(picDevBuffer, jpegInBufferSize, testPic[index].width, testPic[index].height); + picDevBuffer = nullptr; + + // init jpege resource + Result ret = InitEncodeResource(); + if (ret != SUCCESS) { + ERROR_LOG("init jpeg encode failed"); + DestroyEncodeResource(); + return FAILED; + } + + aclError aclRet = acldvppJpegEncodeAsync(dvppChannelDesc_, encodeInputDesc_, encodeOutBufferDev_, + &encodeOutBufferSize_, jpegeConfig_, stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppJpegEncodeAsync failed, errorCode = %d", static_cast(aclRet)); + DestroyEncodeResource(); + return FAILED; + } + + aclRet = aclrtSynchronizeStream(stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("encode aclrtSynchronizeStream failed, errorCode = %d", static_cast(aclRet)); + DestroyEncodeResource(); + return FAILED; + } + + // save jpege result + encodeOutFileName = encodeOutFileName + std::to_string(index) + ".jpg"; + ret = Utils::SaveDvppOutputData(encodeOutFileName.c_str(), encodeOutBufferDev_, encodeOutBufferSize_); + if (ret != SUCCESS) { + ERROR_LOG("save encode output data failed."); + DestroyEncodeResource(); + return FAILED; + } + } + DestroyEncodeResource(); + return SUCCESS; +} + +void DvppProcess::DestroyEncodeResource() +{ + if (jpegeConfig_ != nullptr) { + (void)acldvppDestroyJpegeConfig(jpegeConfig_); + jpegeConfig_ = nullptr; + } + + if (encodeInputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(encodeInputDesc_); + encodeInputDesc_ = nullptr; + } + + if (inDevBuffer_ != nullptr) { + (void)acldvppFree(inDevBuffer_); + inDevBuffer_ = nullptr; + } + + if (encodeOutBufferDev_ != nullptr) { + (void)acldvppFree(encodeOutBufferDev_); + encodeOutBufferDev_ = nullptr; + } +} + +Result DvppProcess::Process8kResize() +{ + std::string vpcOutFileName = output_path + "/result/dvpp_vpc_4000x4000_nv12.yuv"; + Result ret = ProcessResize(); + if (ret != SUCCESS) { + ERROR_LOG("ProcessResize failed"); + DestroyResizeResource(); + return FAILED; + } + ret = Utils::SaveDvppOutputData(vpcOutFileName.c_str(), vpcOutBufferDev_, vpcOutBufferSize_); + if (ret != SUCCESS) { + ERROR_LOG("save encode output data failed."); + DestroyResizeResource(); + return FAILED; + } + DestroyResizeResource(); + + return SUCCESS; +} + +Result DvppProcess::Process() +{ + // pic decode + INFO_LOG("call JpegD"); + Result ret = ProcessDecode(); + if (ret != SUCCESS) { + ERROR_LOG("ProcessDecode failed"); + DestroyDecodeOutBuff(); + DestroyDecodeResource(); + return FAILED; + } + + DestroyDecodeResource(); + + switch (dvppType_) { + case VPC_RESIZE: + INFO_LOG("call vpcResize"); + ret = ProcessResize(); + if (ret != SUCCESS) { + ERROR_LOG("ProcessResize failed"); + DestroyResizeResource(); + return FAILED; + } + DestroyResizeResource(); + break; + + case VPC_CROP: + INFO_LOG("call vpcCrop"); + ret = ProcessCrop(); + if (ret != SUCCESS) { + ERROR_LOG("ProcessCrop failed"); + DestroyCropResource(); + return FAILED; + } + DestroyCropResource(); + break; + + case VPC_CROP_AND_PASTE: + INFO_LOG("call vpcCropAndPaste"); + ret = ProcessCropAndPaste(); + if (ret != SUCCESS) { + ERROR_LOG("ProcessCropAndPaste failed"); + DestroyCropAndPasteResource(); + return FAILED; + } + DestroyCropAndPasteResource(); + break; + + default: + ERROR_LOG("unsupported type"); + DestroyDecodeOutBuff(); + break; + } + + INFO_LOG("Process dvpp success"); + return SUCCESS; +} + +void DvppProcess::DestroyDecodeOutBuff() +{ + if (decodeOutBufferDev_ != nullptr) { + (void)acldvppFree(decodeOutBufferDev_); + decodeOutBufferDev_ = nullptr; + } +} diff --git a/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/src/main.cpp b/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/src/main.cpp new file mode 100644 index 00000000..8abbaec9 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/src/main.cpp @@ -0,0 +1,68 @@ +/** +* @file main.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include +#include +#include +#include "sample_process.h" +#include "utils.h" + +using namespace std; +string data_path ,output_path; +int main(int argc,const char *argv[]) +{ + INFO_LOG("./main param, param represents a vpc feature and must be set"); + if (argc != 4) { + ERROR_LOG("input param not be set"); + return FAILED; + } + data_path = string(argv[2]); + output_path = string(argv[3]); + string result_path_str = output_path + "/result"; + Result ret = Utils::CheckAndCreateFolder(result_path_str.c_str()); + if (ret != SUCCESS) { + ERROR_LOG("mkdir out folder error, dir = %s.", result_path_str.c_str()); + return FAILED; + } + + SampleProcess sampleProcess; + ret = sampleProcess.InitResource(); + if (ret != SUCCESS) { + ERROR_LOG("sample init resource failed"); + return FAILED; + } + + DvppType dvppType = static_cast(atoi(argv[1])); + if (dvppType == JPEG_ENCODE) { + ret = sampleProcess.JpegeProcess(dvppType); + if (ret != SUCCESS) { + ERROR_LOG("sample jpege process failed"); + return FAILED; + } + return SUCCESS; + } + + if (dvppType == VPC_8K_RESIZE) { + ret = sampleProcess.Resize8kProcess(dvppType); + if (ret != SUCCESS) { + ERROR_LOG("sample resize 8k process failed"); + return FAILED; + } + return SUCCESS; + } + + ret = sampleProcess.JpegdProcess(dvppType); + if (ret != SUCCESS) { + ERROR_LOG("sample model process failed"); + return FAILED; + } + + INFO_LOG("execute sample success"); + return SUCCESS; +} diff --git a/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/src/sample_process.cpp b/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/src/sample_process.cpp new file mode 100644 index 00000000..d42b34a3 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/src/sample_process.cpp @@ -0,0 +1,218 @@ +/** +* @file sample_process.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include "sample_process.h" +#include +#include +#include "dvpp_process.h" +#include "acl/acl.h" +#include "utils.h" +using namespace std; + +SampleProcess::SampleProcess() : deviceId_(0), context_(nullptr), stream_(nullptr) +{ +} + +SampleProcess::~SampleProcess() +{ + DestroyResource(); +} + +Result SampleProcess::InitResource() +{ + // ACL init + aclError ret = aclInit(nullptr); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl init failed, errorCode = %d", static_cast(ret)); + return FAILED; + } + INFO_LOG("acl init success"); + + // set device + ret = aclrtSetDevice(deviceId_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl set device %d failed, errorCode = %d", deviceId_, static_cast(ret)); + return FAILED; + } + INFO_LOG("set device %d success", deviceId_); + + // create context (set current) + ret = aclrtCreateContext(&context_, deviceId_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl create context failed, deviceId = %d, errorCode = %d", + deviceId_, static_cast(ret)); + return FAILED; + } + INFO_LOG("create context success"); + + // create stream + ret = aclrtCreateStream(&stream_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl create stream failed, deviceId = %d, errorCode = %d", + deviceId_, static_cast(ret)); + return FAILED; + } + INFO_LOG("create stream success"); + + // get run mode + // runMode is ACL_HOST which represents app is running in host + // runMode is ACL_DEVICE which represents app is running in device + aclrtRunMode runMode; + ret = aclrtGetRunMode(&runMode); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl get run mode failed, errorCode = %d", static_cast(ret)); + return FAILED; + } + bool isDivece = (runMode == ACL_DEVICE); + RunStatus::SetDeviceStatus(isDivece); + INFO_LOG("get run mode success"); + + return SUCCESS; +} + +Result SampleProcess::JpegeProcess(DvppType dvpptype) +{ + DvppProcess dvppProcess(stream_); + dvppProcess.SetDvppType(dvpptype); + Result ret = dvppProcess.InitResource(); + if (ret != SUCCESS) { + ERROR_LOG("init dvpp resource failed"); + return FAILED; + } + + ret = dvppProcess.ProcessJpegE(); + if (ret != SUCCESS) { + ERROR_LOG("process jpege failed"); + return FAILED; + } + return SUCCESS; +} + +Result SampleProcess::Resize8kProcess(DvppType dvpptype) +{ + INFO_LOG("dvpp process 8k resize begin"); + DvppProcess dvppProcess(stream_); + dvppProcess.SetDvppType(dvpptype); + Result ret = dvppProcess.InitResource(); + if (ret != SUCCESS) { + ERROR_LOG("init dvpp resource failed"); + return FAILED; + } + + ret = dvppProcess.Process8kResize(); + if (ret != SUCCESS) { + ERROR_LOG("dvpp process 8k resize failed"); + return FAILED; + } + INFO_LOG("dvpp process 8k resize success"); + + return SUCCESS; +} +extern string data_path ,output_path; +// jpegd -> vpc -> model execute +Result SampleProcess::JpegdProcess(DvppType dvpptype) +{ + std::string dvppOutputfileName = output_path + "/result/dvpp_output_"; + + // dvpp init + DvppProcess dvppProcess(stream_); + dvppProcess.SetDvppType(dvpptype); + Result ret = dvppProcess.InitResource(); + if (ret != SUCCESS) { + ERROR_LOG("init dvpp resource failed"); + return FAILED; + } + + int modelInputWidth = 224; + int modelInputHeight = 224; + std::string dvppImagePath1 = data_path + "/data/persian_cat_1024_1536_283.jpg"; + std::string dvppImagePath2 = data_path + "/data/wood_rabbit_1024_1061_330.jpg"; + // input image + PicDesc testPic[] = { + {dvppImagePath1.c_str(), 0, 0}, + {dvppImagePath2.c_str(), 0, 0}, + }; + INFO_LOG( "-------------------------------------------"); + for (size_t index = 0; index < sizeof(testPic) / sizeof(testPic[0]); ++index) { + INFO_LOG("start to process picture:%s", testPic[index].picName.c_str()); + // 1.dvpp process + uint32_t devPicBufferSize; + char *picDevBuffer = nullptr; + // get input image data buffer + ret = Utils::GetPicDevBuffer4JpegD(testPic[index], picDevBuffer, devPicBufferSize); + if (ret != SUCCESS) { + ERROR_LOG("get pic device buffer failed, index is %zu", index); + return FAILED; + } + + dvppProcess.SetInput4JpegD(picDevBuffer, devPicBufferSize, testPic[index]); + + ret = dvppProcess.InitDvppOutputPara(modelInputWidth, modelInputHeight); + if (ret != SUCCESS) { + ERROR_LOG("init dvpp output para failed"); + return FAILED; + } + + ret = dvppProcess.Process(); + if (ret != SUCCESS) { + ERROR_LOG("dvpp process failed"); + return FAILED; + } + + (void)acldvppFree(picDevBuffer); + picDevBuffer = nullptr; + + void *dvppOutputBuffer = nullptr; + int dvppOutputSize; + dvppProcess.GetDvppOutput(&dvppOutputBuffer, dvppOutputSize); + + std::string dvppOutputfileNameCur = dvppOutputfileName + std::to_string(index); + (void)Utils::SaveDvppOutputData(dvppOutputfileNameCur.c_str(), dvppOutputBuffer, dvppOutputSize); + + + (void)acldvppFree(dvppOutputBuffer); + + } + + return SUCCESS; +} + +void SampleProcess::DestroyResource() +{ + aclError ret; + if (stream_ != nullptr) { + ret = aclrtDestroyStream(stream_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("destroy stream failed, errorCode = %d", static_cast(ret)); + } + stream_ = nullptr; + } + INFO_LOG("end to destroy stream"); + + if (context_ != nullptr) { + ret = aclrtDestroyContext(context_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("destroy context failed, errorCode = %d", static_cast(ret)); + } + context_ = nullptr; + } + INFO_LOG("end to destroy context"); + + ret = aclrtResetDevice(deviceId_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("reset device %d failed, errorCode = %d", deviceId_, static_cast(ret)); + } + INFO_LOG("end to reset device %d", deviceId_); + + ret = aclFinalize(); + if (ret != ACL_SUCCESS) { + ERROR_LOG("finalize acl failed, errorCode = %d", static_cast(ret)); + } + INFO_LOG("end to finalize acl"); +} diff --git a/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/src/utils.cpp b/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/src/utils.cpp new file mode 100644 index 00000000..25d74569 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/media/jpeg_vpc_test/src/utils.cpp @@ -0,0 +1,422 @@ +/** +* @file utils.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include "utils.h" +#include +#include +#include +#include +#include +#if defined(_MSC_VER) +#include +#else +#include +#include +#include +#endif +#include "acl/acl.h" +#include "acl/ops/acl_dvpp.h" + +bool RunStatus::isDevice_ = false; + +Result Utils::ReadBinFile(const std::string &fileName, void *&inputBuff, uint32_t &fileSize) +{ + std::ifstream binFile(fileName, std::ifstream::binary); + if (!binFile.is_open()) { + ERROR_LOG("open file %s failed", fileName.c_str()); + return FAILED; + } + + binFile.seekg(0, binFile.end); + auto binFileBufferLen = binFile.tellg(); + if (binFileBufferLen == 0) { + ERROR_LOG("binfile is empty, filename is %s", fileName.c_str()); + binFile.close(); + return FAILED; + } + binFile.seekg(0, binFile.beg); + + aclError aclRet; + if (!(RunStatus::GetDeviceStatus())) { // app is running in host + aclRet = aclrtMallocHost(&inputBuff, binFileBufferLen); + if (inputBuff == nullptr) { + ERROR_LOG("host malloc binFileBufferData failed, errorCode = %d", static_cast(aclRet)); + binFile.close(); + return FAILED; + } + } else { // app is running in device + aclRet = acldvppMalloc(&inputBuff, binFileBufferLen); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("device malloc binFileBufferData failed, errorCode = %d", static_cast(aclRet)); + binFile.close(); + return FAILED; + } + } + binFile.read(static_cast(inputBuff), binFileBufferLen); + binFile.close(); + fileSize = binFileBufferLen; + + return SUCCESS; +} + +Result Utils::GetPicDevBuffer4JpegD(PicDesc &picDesc, char *&picDevBuffer, uint32_t &devPicBufferSize) +{ + if (picDesc.picName.empty()) { + ERROR_LOG("picture file name is empty"); + return FAILED; + } + + uint32_t inputBuffSize = 0; + void *inputBuff = nullptr; + auto ret = ReadBinFile(picDesc.picName, inputBuff, inputBuffSize); + if (ret != SUCCESS) { + ERROR_LOG("read bin file failed, file name is %s", picDesc.picName.c_str()); + return FAILED; + } + + aclError aclRet = acldvppJpegGetImageInfoV2(inputBuff, inputBuffSize, &picDesc.width, &picDesc.height, + nullptr, nullptr); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("get jpeg image info failed, errorCode = %d", static_cast(aclRet)); + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(inputBuff); + } else { + (void)acldvppFree(inputBuff); + } + return FAILED; + } + aclRet = acldvppJpegPredictDecSize(inputBuff, inputBuffSize, PIXEL_FORMAT_YUV_SEMIPLANAR_420, + &picDesc.jpegDecodeSize); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("get jpeg decode size failed, errorCode = %d", static_cast(aclRet)); + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(inputBuff); + } else { + (void)acldvppFree(inputBuff); + } + return FAILED; + } + + void *inBufferDev = nullptr; + uint32_t inBufferSize = inputBuffSize; + if (!(RunStatus::GetDeviceStatus())) { // app is running in host + aclRet = acldvppMalloc(&inBufferDev, inBufferSize); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("malloc inBufferSize failed, errorCode = %d", static_cast(aclRet)); + (void)aclrtFreeHost(inputBuff); + return FAILED; + } + + // if app is running in host, need copy data from host to device + aclRet = aclrtMemcpy(inBufferDev, inBufferSize, inputBuff, inputBuffSize, ACL_MEMCPY_HOST_TO_DEVICE); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("memcpy from host to device failed, errorCode = %d", static_cast(aclRet)); + (void)acldvppFree(inBufferDev); + (void)aclrtFreeHost(inputBuff); + return FAILED; + } + (void)aclrtFreeHost(inputBuff); + } else { // app is running in device + inBufferDev = inputBuff; + } + devPicBufferSize = inBufferSize; + picDevBuffer = reinterpret_cast(inBufferDev); + + return SUCCESS; +} + +void *Utils::GetPicDevBuffer(const PicDesc &picDesc, uint32_t &picBufferSize) +{ + if (picDesc.picName.empty()) { + ERROR_LOG("picture file name is empty"); + return nullptr; + } + + FILE *fp = fopen(picDesc.picName.c_str(), "rb"); + if (fp == nullptr) { + ERROR_LOG("open file %s failed", picDesc.picName.c_str()); + return nullptr; + } + + fseek(fp, 0, SEEK_END); + long fileLen = ftell(fp); + fseek(fp, 0, SEEK_SET); + + if (static_cast(fileLen) < picBufferSize) { + ERROR_LOG("need read %u bytes but file %s only %ld bytes", + picBufferSize, picDesc.picName.c_str(), fileLen); + fclose(fp); + return nullptr; + } + + void *inputDevBuff = nullptr; + aclError aclRet = acldvppMalloc(&inputDevBuff, picBufferSize); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("malloc device data buffer failed, errorCode = %d", static_cast(aclRet)); + fclose(fp); + return nullptr; + } + + void *inputBuff = nullptr; + size_t readSize; + if (!(RunStatus::GetDeviceStatus())) { // app is running in host + aclRet = aclrtMallocHost(&inputBuff, picBufferSize); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("malloc host data buffer failed, errorCode = %d", static_cast(aclRet)); + fclose(fp); + (void)acldvppFree(inputDevBuff); + return nullptr; + } + + readSize = fread(inputBuff, sizeof(char), picBufferSize, fp); + if (readSize < picBufferSize) { + ERROR_LOG("need read file %s %u bytes, but only %zu readed", + picDesc.picName.c_str(), picBufferSize, readSize); + (void)aclrtFreeHost(inputBuff); + (void)acldvppFree(inputDevBuff); + fclose(fp); + return nullptr; + } + + // if app is running in host, need copy model output data from host to device + aclRet = aclrtMemcpy(inputDevBuff, picBufferSize, inputBuff, picBufferSize, ACL_MEMCPY_HOST_TO_DEVICE); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("memcpy from host to device failed, errorCode = %d", static_cast(aclRet)); + (void)acldvppFree(inputDevBuff); + (void)aclrtFreeHost(inputBuff); + fclose(fp); + return nullptr; + } + (void)aclrtFreeHost(inputBuff); + } else { // app is running in device + readSize = fread(inputDevBuff, sizeof(char), picBufferSize, fp); + if (readSize < picBufferSize) { + ERROR_LOG("need read file %s %u bytes, but only %zu readed", + picDesc.picName.c_str(), picBufferSize, readSize); + (void)acldvppFree(inputDevBuff); + fclose(fp); + return nullptr; + } + } + + fclose(fp); + return inputDevBuff; +} + +Result Utils::PullModelOutputData(aclmdlDataset *modelOutput, const char *fileName) +{ + size_t outDatasetNum = aclmdlGetDatasetNumBuffers(modelOutput); + if (outDatasetNum == 0) { + ERROR_LOG("model out dataset num can't be 0"); + } + for (size_t i = 0; i < outDatasetNum; ++i) { + aclDataBuffer *dataBuffer = aclmdlGetDatasetBuffer(modelOutput, i); + if (dataBuffer == nullptr) { + ERROR_LOG("aclmdlGetDatasetBuffer failed"); + return FAILED; + } + + void *dataBufferDev = aclGetDataBufferAddr(dataBuffer); + if (dataBufferDev == nullptr) { + ERROR_LOG("aclGetDataBufferAddr failed"); + return FAILED; + } + + uint32_t bufferSize = aclGetDataBufferSizeV2(dataBuffer); + void *dataPtr = nullptr; + aclError aclRet; + if (!(RunStatus::GetDeviceStatus())) { + aclRet = aclrtMallocHost(&dataPtr, bufferSize); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("malloc host data buffer failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = aclrtMemcpy(dataPtr, bufferSize, dataBufferDev, bufferSize, ACL_MEMCPY_DEVICE_TO_HOST); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("aclrtMemcpy device to host failed, errorCode = %d", static_cast(aclRet)); + (void)aclrtFreeHost(dataPtr); + } + } else { + dataPtr = dataBufferDev; + } + + uint32_t len = static_cast(bufferSize); + FILE *outputFile = fopen(fileName, "wb+"); + if (outputFile != nullptr) { + fwrite(static_cast(dataPtr), len, sizeof(char), outputFile); + fclose(outputFile); + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(dataPtr); + } + } else { + ERROR_LOG("create output file %s failed, size is %u", fileName, len); + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(dataPtr); + } + return FAILED; + } + } + return SUCCESS; +} + +Result Utils::SaveDvppOutputData(const char *fileName, void *devPtr, uint32_t dataSize) +{ + void *dataPtr = nullptr; + aclError aclRet; + if (!(RunStatus::GetDeviceStatus())) { + aclRet = aclrtMallocHost(&dataPtr, dataSize); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("malloc host data buffer failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = aclrtMemcpy(dataPtr, dataSize, devPtr, dataSize, ACL_MEMCPY_DEVICE_TO_HOST); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("dvpp output memcpy to host failed, errorCode = %d", static_cast(aclRet)); + (void)aclrtFreeHost(dataPtr); + return FAILED; + } + } else { + dataPtr = devPtr; + } + + FILE *outFileFp = fopen(fileName, "wb+"); + if (outFileFp == nullptr) { + ERROR_LOG("fopen out file %s failed.", fileName); + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(dataPtr); + } + return FAILED; + } + + size_t writeSize = fwrite(dataPtr, sizeof(char), dataSize, outFileFp); + if (writeSize != dataSize) { + ERROR_LOG("need write %u bytes to %s, but only write %zu bytes.", + dataSize, fileName, writeSize); + fclose(outFileFp); + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(dataPtr); + } + return FAILED; + } + + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(dataPtr); + } + fflush(outFileFp); + fclose(outFileFp); + return SUCCESS; +} + +Result Utils::CheckFile(const char *fileName) +{ + int i = 0; + while (i < 10) { + std::ifstream f (fileName); + if (f.good()) { + break; + } + SleepTime(1); // slepp 1s + INFO_LOG("check result, wait time %d second", i + 1); + i++; + } + // 10 is max time of checking + if (i == 10) { + ERROR_LOG("check result failed, timeout, expect file:%s", fileName); + return FAILED; + } + return SUCCESS; +} + +Result Utils::SaveModelOutputData(const char *srcfileName, const char *dstfileName) +{ + Result ret = CheckFile(srcfileName); + if (ret != SUCCESS) { + ERROR_LOG("model output file not exist"); + return FAILED; + } + FILE *model_output = fopen(srcfileName, "rb"); + if (model_output == nullptr) { + ERROR_LOG("fopen out file %s failed.", srcfileName); + return FAILED; + } + + FILE *model_output_txt = fopen(dstfileName, "wb+"); + if (model_output_txt == nullptr) { + ERROR_LOG("fopen out file %s failed.", dstfileName); + fclose(model_output); + return FAILED; + } + + int i = 0; + float prop = 0.0; + std::map> mp; + std::map::iterator ite; + while (feof(model_output) == 0) { + ite = mp.end(); + fread(&prop, sizeof(float), 1, model_output); + mp.insert(ite, std::map::value_type(prop, i)); + fprintf(model_output_txt, "%f,%d\n", prop, i); + i++; + } + fclose(model_output); + ite = mp.begin(); + float sum = 0.0; + float max = ite->first; + int classType = ite->second; + for (i = 0 ; i < 5; i++) { + sum += ite->first; + ++ite; + } + fprintf(model_output_txt, "classType[%d], top1[%f], top5[%f]", classType, max, sum); + fclose(model_output_txt); + INFO_LOG("result : classType[%d], top1[%f], top5[%f]", classType, max, sum); + INFO_LOG("-------------------------------------------"); + return SUCCESS; +} + +Result Utils::CheckAndCreateFolder(const char* foldName) +{ + INFO_LOG("start check result fold:%s", foldName); +#if defined(_MSC_VER) + DWORD ret = GetFileAttributes((LPCSTR)foldName); + if (ret == INVALID_FILE_ATTRIBUTES) { + BOOL flag = CreateDirectory((LPCSTR)foldName, nullptr); + if (flag) { + INFO_LOG("make directory successfully."); + } else { + INFO_LOG("make directory errorly."); + return FAILED; + } + } +#else + if (access(foldName , 0) == -1) { + int flag = mkdir(foldName , 0777); + if (flag == 0) { + INFO_LOG("make directory successfully."); + } else { + ERROR_LOG("make directory errorly."); + return FAILED; + } + } +#endif + INFO_LOG("check result success, fold exist"); + return SUCCESS; +} + +void Utils::SleepTime(unsigned int seconds) +{ +#if defined(_MSC_VER) + unsigned long secs = static_cast(seconds); + Sleep(secs * 1000); // sleep 1 second +#else + sleep(seconds); +#endif +} diff --git a/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/CMakeLists.txt b/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/CMakeLists.txt new file mode 100644 index 00000000..f4680624 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.5.1) + +# project information +project(ACL_VDEC_RESNET50) + +add_subdirectory("./src") diff --git a/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/ascend_test_vdec.py b/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/ascend_test_vdec.py new file mode 100644 index 00000000..18f1e137 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/ascend_test_vdec.py @@ -0,0 +1,8 @@ +#encoding: utf-8 +import oec + +oec.TestCase( + group= ("应用开发","媒体处理"), + name = "ACL_MEDIA_VDEC", + cmd = f"bash run.sh {oec.Context.data_path}/data {oec.Context.output_dir}/tmp" + ) diff --git a/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/inc/dvpp_process.h b/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/inc/dvpp_process.h new file mode 100644 index 00000000..be541993 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/inc/dvpp_process.h @@ -0,0 +1,100 @@ +/** +* @file dvpp_process.h +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#pragma once +#include +#include +#include "utils.h" +#include "acl/acl.h" +#include "acl/ops/acl_dvpp.h" + +/** + * DvppProcess + */ +class DvppProcess { +public: + /** + * @brief Constructor + * @param [in] stream: stream + */ + explicit DvppProcess(aclrtStream &stream); + + /** + * @brief Destructor + */ + virtual ~DvppProcess(); + + /** + * @brief dvpp global init + * @return result + */ + Result InitResource(); + + /** + * @brief init dvpp output para + * @param [in] modelInputWidth: model input width + * @param [in] modelInputHeight: model input height + * @return result + */ + Result InitOutputPara(int modelInputWidth, int modelInputHeight); + + /** + * @brief set dvpp input + * @param [in] inputWidth:width of pic + * @param [in] inputHeight:height of pic + * @param [in] format:format of pic + */ + void SetInput(int inputWidth, int inputHeight, acldvppPixelFormat format); + + /** + * @brief gett dvpp output + * @param [in] outputBuffer: pointer which points to dvpp output buffer + * @param [out] outputSize: output size + */ + void GetOutput(void **outputBuffer, uint32_t &outputSize); + + /** + * @brief dvpp process + * @return result + */ + Result Process(void *buffer, uint32_t size); + + /** + * @brief destroy resource + */ + void DestroyResource(); + + void DestroyOutputPara(); + +private: + Result InitResizeInputDesc(); + Result InitResizeOutputDesc(); + Result ProcessResize(); + void DestroyResizeResource(); + + + aclrtStream stream_; + acldvppChannelDesc *dvppChannelDesc_; + acldvppResizeConfig *resizeConfig_; + + acldvppPicDesc *resizeOutputDesc_; // resize output desc + acldvppPicDesc *resizeInputDesc_; // resize input desc + + void *resizeOutBufferDev_; // resize output buffer + void *picOutBufferDev_; + + uint32_t resizeInBufferSize_; // resize input size + uint32_t resizeOutBufferSize_; // resize output size + uint32_t inputWidth_; // input pic width + uint32_t inputHeight_; // input pic height + uint32_t modelInputWidth_; // model input width + uint32_t modelInputHeight_; // model input height + acldvppPixelFormat format_; // pic format +}; + diff --git a/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/inc/sample_process.h b/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/inc/sample_process.h new file mode 100644 index 00000000..4bd46eea --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/inc/sample_process.h @@ -0,0 +1,77 @@ +/** +* @file sample_process.h +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#pragma once +#include +#include +#include +#include "utils.h" +#include "dvpp_process.h" +#include "acl/acl.h" +#include "vdec_process.h" + +/** +* SampleProcess +*/ +class SampleProcess { +public: + /** + * @brief Constructor + */ + SampleProcess(const char *data, const char *outFolder); + + /** + * @brief Destructor + */ + virtual ~SampleProcess(); + + /** + * @brief init reousce + * @return result + */ + Result InitResource(); + + /** + * @brief vdec process + * @return result + */ + Result DoVdecProcess(); + + /** + * @brief model process + * @return result + */ + Result DoModelProcess(); + +private: + void DestroyResource(); + + int32_t deviceId_; + aclrtContext context_; + aclrtStream stream_; + std::string filePath; + std::thread thread_; + const char *outFolder_; + PicDesc picDesc_; + + /** + * 0:H265 main level + * 1:H264 baseline level + * 2:H264 main level + * 3:H264 high level + */ + acldvppStreamFormat enType_; + + /** + * 1:YUV420 semi-planner(nv12) + * 2:YVU420 semi-planner(nv21) + */ + acldvppPixelFormat format_; +}; + diff --git a/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/inc/utils.h b/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/inc/utils.h new file mode 100644 index 00000000..8de334e2 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/inc/utils.h @@ -0,0 +1,133 @@ +/** +* @file utils.h +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include "acl/acl.h" +#include "acl/ops/acl_dvpp.h" + +#define INFO_LOG(fmt, ...) fprintf(stdout, "[INFO] " fmt "\n", ##__VA_ARGS__) +#define WARN_LOG(fmt, ...) fprintf(stdout, "[WARN] " fmt "\n", ##__VA_ARGS__) +#define ERROR_LOG(fmt, ...) fprintf(stderr, "[ERROR] " fmt "\n", ##__VA_ARGS__) + +typedef enum Result { + SUCCESS = 0, + FAILED = 1 +} Result; + +typedef struct PicDesc { + std::string picName; + int width; + int height; +} PicDesc; + +class RunStatus { +public: + static void SetDeviceStatus(bool isDevice) + { + isDevice_ = isDevice; + } + static bool GetDeviceStatus() + { + return isDevice_; + } +private: + RunStatus() = default; + ~RunStatus() = default; + static bool isDevice_; +}; + +class Utils { +public: + /** + * @brief write device memory to file + * @param [in] fileName: file name + * @param [in] buffer of input data + * @param [in] dataSize: size of data + * @return success or fail + */ + static bool WriteDeviceMemoryToFile(const char *fileName, void *dataDev, uint32_t dataSize); + + /** + * @brief read file to device memory + * @param [in] fileName: file name + * @param [out] buffer of input data + * @param [out] dataSize: size of data + * @return success or fail + */ + static bool ReadFileToDeviceMem(const char *fileName, void *&dataDev, uint32_t &dataSize); + + /** + * @brief pull model output data to file + * @param [in] modelOutput: model output dataset + * @param [in] fileName: file name + * @return result + */ + static Result PullModelOutputData(aclmdlDataset *modelOutput, const char *fileName); + + /** + * @brief save model output data to dst file + * @param [in] srcfileName: src file name + * @param [in] dstfileName: dst file name + * @return result + */ + static Result SaveModelOutputData(const char *srcfileName, const char *dstfileName); + + /** + * @brief save dvpp output data + * @param [in] fileName: file name + * @param [in] devPtr: dvpp output data device addr + * @param [in] dataSize: dvpp output data size + * @return result + */ + static Result SaveDvppOutputData(const char *fileName, const void *devPtr, uint32_t dataSize); + + /** + * @brief check file if exist + * @param [in] fileName: file to check + * @return result + */ + static Result CheckFile(const char* fileName); + + /** + * @brief check fold, if not exist, create it + * @param [in] fileName: fold to check + * @return result + */ + static Result CheckAndCreateFolder(const char *foldName); + + /** + * @brief read file of a dir + * @param [in] fileName: folder + * @return fileList + */ + static std::vector ReadDir(const char *folder); + + /** + * @brief remove dir + * @param [in] fileName: folder + * @return fileList + */ + static void RemoveDir(const char* outFolder_); + + /** + * @brief program waiting + * @param [in] wating time: seconds + * @return void + */ + static void SleepTime(unsigned int seconds); +}; + diff --git a/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/inc/vdec_process.h b/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/inc/vdec_process.h new file mode 100644 index 00000000..9fc12b0e --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/inc/vdec_process.h @@ -0,0 +1,91 @@ +/** +* @file vdec_process.h +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#pragma once +#include +#include +#include "utils.h" +#include "acl/acl.h" +#include "acl/ops/acl_dvpp.h" + +class VdecProcess { +public: + /** + * @brief Constructor + */ + VdecProcess(); + + /** + * @brief Destructor + */ + virtual ~VdecProcess(); + + /** + * @brief vdec global init + * @param [in] threadId: index of thread + * @param [in] enType: type of input stream + * @param [in] format: format of pic + * @return result + */ + Result InitResource(uint64_t threadId, acldvppStreamFormat enType, acldvppPixelFormat format); + + /** + * @brief set vdec input + * @param [in] inBufferDev: input buffer + * @param [in] inBufferSize: buffer size + * @param [in] inputWidth:width of pic + * @param [in] inputHeight:height of pic + */ + void SetInput(void *inBufferDev, uint32_t inBufferSize, int inputWidth, int inputHeight); + + /** + * @brief destroy StreamDesc + */ + void DestroyStreamDesc(); + + /** + * @brief destroy PicDesc + */ + void DestroyPicDesc(); + + /** + * @brief destroy resource + */ + void DestroyResource(); + + /** + * @brief vdec process + * @return result + */ + Result Process(); + + /** + * @brief vdec send eos frame + * @return result + */ + Result SendVdecEos(); + +private: + Result CreateStreamDesc(); + Result CreatePicDesc(size_t size); + + uint64_t threadId_; + + aclvdecChannelDesc *vdecChannelDesc_; + acldvppStreamDesc *streamInputDesc_; + acldvppPicDesc *picOutputDesc_; + void *picOutBufferDev_; + void *inBufferDev_; + uint32_t inBufferSize_; + uint32_t inputWidth_; + uint32_t inputHeight_; + acldvppStreamFormat enType_; + acldvppPixelFormat format_; +}; + diff --git a/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/run.sh b/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/run.sh new file mode 100755 index 00000000..9781227c --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/run.sh @@ -0,0 +1,10 @@ +#! /bin/bash +src_path=$(pwd) +data_path=$1 +output_path="$2/dvpp_vdec" + +mkdir -p "${output_path}" +cd "${output_path}" +cmake "${src_path}" +make +./main "${data_path}" "${output_path}/out_dir" \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/src/CMakeLists.txt b/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/src/CMakeLists.txt new file mode 100644 index 00000000..13ce6e25 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/src/CMakeLists.txt @@ -0,0 +1,52 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.5.1) + +# project information +project(ACL_VDEC_RESNET50) + +# Compile options +add_compile_options(-std=c++11) + +add_definitions(-DENABLE_DVPP_INTERFACE) + +# Specify target generation path +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "../") +set(CMAKE_CXX_FLAGS_DEBUG "-fPIC -O0 -g -Wall") +set(CMAKE_CXX_FLAGS_RELEASE "-fPIC -O2 -Wall") + +set(INC_PATH $ENV{ASCEND_HOME_PATH}) +message(STATUS "env INC_PATH: ${INC_PATH}") +set(LIB_PATH "$ENV{ASCEND_HOME_PATH}/lib64") +message(STATUS "env LIB_PATH: ${LIB_PATH}") +# Header path +include_directories( + ${INC_PATH}/include/ + ../inc/ +) + +# add host lib path +link_directories( + ${LIB_PATH} +) + +add_executable(main + utils.cpp + dvpp_process.cpp + + vdec_process.cpp + sample_process.cpp + main.cpp) + +if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Windows") + target_link_libraries(main libascendcl libacl_dvpp) +elseif (${CMAKE_CXX_COMPILER} MATCHES "android") + target_link_libraries(main + ascendcl acl_dvpp stdc++) +else () + target_link_libraries(main + ascendcl acl_dvpp pthread stdc++) +endif () + +install(TARGETS main DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) diff --git a/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/src/dvpp_process.cpp b/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/src/dvpp_process.cpp new file mode 100644 index 00000000..f97f4ded --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/src/dvpp_process.cpp @@ -0,0 +1,232 @@ +/** +* @file dvpp_process.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include "dvpp_process.h" +using namespace std; + +DvppProcess::DvppProcess(aclrtStream& stream) : stream_(stream), + dvppChannelDesc_(nullptr), resizeConfig_(nullptr), + resizeInputDesc_(nullptr), resizeOutputDesc_(nullptr), + resizeOutBufferDev_(nullptr), picOutBufferDev_(nullptr), + resizeInBufferSize_(0),resizeOutBufferSize_(0), + inputWidth_(0), inputHeight_(0),modelInputWidth_(0), + modelInputHeight_(0), format_(PIXEL_FORMAT_YUV_400) +{ +} + +DvppProcess::~DvppProcess() +{ + DestroyResizeResource(); + DestroyOutputPara(); + DestroyResource(); +} + +Result DvppProcess::InitResource() +{ + // create vpc channel description + dvppChannelDesc_ = acldvppCreateChannelDesc(); + if (dvppChannelDesc_ == nullptr) { + ERROR_LOG("acldvppCreateChannelDesc failed"); + return FAILED; + } + + // create vpc channel + aclError ret = acldvppCreateChannel(dvppChannelDesc_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acldvppCreateChannel failed, errorCode = %d", static_cast(ret)); + return FAILED; + } + + // create vpc resize config + resizeConfig_ = acldvppCreateResizeConfig(); + if (resizeConfig_ == nullptr) { + ERROR_LOG("acldvppCreateResizeConfig failed"); + return FAILED; + } + + INFO_LOG("dvpp init dvpp resource success"); + return SUCCESS; +} + +void DvppProcess::SetInput(int inputWidth, int inputHeight, acldvppPixelFormat format) +{ + inputWidth_ = inputWidth; + inputHeight_ = inputHeight; + format_ = format; +} + +void DvppProcess::GetOutput(void **outputBuffer, uint32_t &outputSize) +{ + *outputBuffer = resizeOutBufferDev_; + outputSize = resizeOutBufferSize_; + resizeOutBufferDev_ = nullptr; + resizeOutBufferSize_ = 0; +} + +Result DvppProcess::InitOutputPara(int modelInputWidth, int modelInputHeight) +{ + if ((modelInputWidth <= 0) || (modelInputHeight <= 0)) { + ERROR_LOG("InitInput para invalid, modelInputWidth = %d, modelInputHeight = %d", + modelInputWidth, modelInputHeight); + return FAILED; + } + + modelInputWidth_ = modelInputWidth; + modelInputHeight_ = modelInputHeight; + + // output buffer, adjust the value based on the actual model + int resizeOutWidth = modelInputWidth_; + int resizeOutHeight = modelInputHeight_; + int resizeOutWidthStride = (resizeOutWidth + 15) / 16 * 16; // 16-byte alignment + int resizeOutHeightStride = (resizeOutHeight + 1) / 2 * 2; // 2-byte alignment + resizeOutBufferSize_ = resizeOutWidthStride * resizeOutHeightStride * 3 / 2; // yuv format size + aclError ret = acldvppMalloc(&resizeOutBufferDev_, resizeOutBufferSize_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acldvppMalloc resizeOutBuffer failed, errorCode = %d", static_cast(ret)); + return FAILED; + } + + return SUCCESS; +} + +void DvppProcess::DestroyOutputPara() +{ + if (resizeOutBufferDev_ != nullptr) { + (void)acldvppFree(resizeOutBufferDev_); + resizeOutBufferDev_ = nullptr; + } + if (picOutBufferDev_ != nullptr) { + (void)acldvppFree(picOutBufferDev_); + picOutBufferDev_ = nullptr; + } +} + +Result DvppProcess::InitResizeInputDesc() +{ + uint32_t jpegOutWidthStride = (inputWidth_ + 15) / 16 * 16; // 16-byte alignment + uint32_t jpegOutHeightStride = (inputHeight_ + 1) / 2 * 2; // 2-byte alignment + uint32_t jpegOutBufferSize = jpegOutWidthStride * jpegOutHeightStride * 3 / 2; // yuv format size + resizeInputDesc_ = acldvppCreatePicDesc(); + if (resizeInputDesc_ == nullptr) { + ERROR_LOG("InitResizeInputDesc failed"); + return FAILED; + } + if (jpegOutBufferSize != resizeInBufferSize_) { + ERROR_LOG("jpegOutBufferSize [%u] != resizeInBufferSize_ [%u]", + jpegOutBufferSize, resizeInBufferSize_); + return FAILED; + } + + (void)acldvppSetPicDescData(resizeInputDesc_, picOutBufferDev_); + (void)acldvppSetPicDescFormat(resizeInputDesc_, format_); + (void)acldvppSetPicDescWidth(resizeInputDesc_, inputWidth_); + (void)acldvppSetPicDescHeight(resizeInputDesc_, inputHeight_); + (void)acldvppSetPicDescWidthStride(resizeInputDesc_, jpegOutWidthStride); + (void)acldvppSetPicDescHeightStride(resizeInputDesc_, jpegOutHeightStride); + (void)acldvppSetPicDescSize(resizeInputDesc_, jpegOutBufferSize); + return SUCCESS; +} + +Result DvppProcess::InitResizeOutputDesc() +{ + // adjust based on the actual model + int resizeOutputWidthStride = (modelInputWidth_+ 15) / 16 * 16; // 16-byte alignment + int resizeOutputHeightStride = (modelInputHeight_ + 1) / 2 * 2; // 2-byte alignment + resizeOutputDesc_ = acldvppCreatePicDesc(); + if (resizeOutputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(resizeOutputDesc_, resizeOutBufferDev_); + (void)acldvppSetPicDescFormat(resizeOutputDesc_, format_); + (void)acldvppSetPicDescWidth(resizeOutputDesc_, modelInputWidth_); + (void)acldvppSetPicDescHeight(resizeOutputDesc_, modelInputHeight_); + (void)acldvppSetPicDescWidthStride(resizeOutputDesc_, resizeOutputWidthStride); + (void)acldvppSetPicDescHeightStride(resizeOutputDesc_, resizeOutputHeightStride); + (void)acldvppSetPicDescSize(resizeOutputDesc_, resizeOutBufferSize_); + + return SUCCESS; +} + +Result DvppProcess::ProcessResize() +{ + // resize pic size + aclError ret = acldvppVpcResizeAsync(dvppChannelDesc_, resizeInputDesc_, + resizeOutputDesc_, resizeConfig_, stream_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acldvppVpcResizeAsync failed, errorCode = %d", static_cast(ret)); + return FAILED; + } + ret = aclrtSynchronizeStream(stream_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("aclrtSynchronizeStream failed, errorCode = %d", static_cast(ret)); + return FAILED; + } + + return SUCCESS; +} + +void DvppProcess::DestroyResizeResource() +{ + if (resizeOutputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(resizeOutputDesc_); + resizeOutputDesc_ = nullptr; + } + if (resizeInputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(resizeInputDesc_); + resizeInputDesc_ = nullptr; + } +} + +void DvppProcess::DestroyResource() +{ + if (resizeConfig_ != nullptr) { + (void)acldvppDestroyResizeConfig(resizeConfig_); + resizeConfig_ = nullptr; + } + + if (dvppChannelDesc_ != nullptr) { + aclError ret = acldvppDestroyChannel(dvppChannelDesc_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acldvppDestroyChannel failed, errorCode = %d", static_cast(ret)); + } + (void)acldvppDestroyChannelDesc(dvppChannelDesc_); + dvppChannelDesc_ = nullptr; + } +} + +Result DvppProcess::Process(void *buffer, uint32_t size) +{ + picOutBufferDev_ = buffer; + resizeInBufferSize_ = size; + Result ret = InitResizeInputDesc(); + if (ret != SUCCESS) { + ERROR_LOG("InitResizeInputDesc failed"); + DestroyResizeResource(); + return FAILED; + } + + ret = InitResizeOutputDesc(); + if (ret != SUCCESS) { + ERROR_LOG("InitResizeOutputDesc failed"); + DestroyResizeResource(); + return FAILED; + } + ret = ProcessResize(); + if (ret != SUCCESS) { + ERROR_LOG("ProcessResize failed"); + DestroyResizeResource(); + return FAILED; + } + DestroyResizeResource(); + + INFO_LOG("Process dvpp success"); + return SUCCESS; +} \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/src/main.cpp b/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/src/main.cpp new file mode 100644 index 00000000..0a7112ac --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/src/main.cpp @@ -0,0 +1,43 @@ +/** +* @file main.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include +#include "sample_process.h" +#include "utils.h" +using namespace std; +const char* output_dir; +int main(int argn, const char* argv[]) +{ + if(argn != 3){ + ERROR_LOG("please provide data and outpu dir to continue!"); + } + output_dir = argv[2]; + SampleProcess sampleProcess(argv[1], argv[2]); + Result ret = sampleProcess.InitResource(); + if (ret != SUCCESS) { + ERROR_LOG("sample init resource failed"); + + return FAILED; + } + + ret = sampleProcess.DoVdecProcess(); + if (ret != SUCCESS) { + ERROR_LOG("sample vdec process failed"); + return FAILED; + } + + ret = sampleProcess.DoModelProcess(); + if (ret != SUCCESS) { + ERROR_LOG("sample model process failed"); + return FAILED; + } + + INFO_LOG("execute sample success"); + return SUCCESS; +} \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/src/sample_process.cpp b/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/src/sample_process.cpp new file mode 100644 index 00000000..f98e0499 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/src/sample_process.cpp @@ -0,0 +1,270 @@ +/** +* @file sample_process.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include "sample_process.h" +#include +#include +#include "utils.h" + +using namespace std; + +namespace { + // const std::string filePath= "../data/vdec_h265_1frame_rabbit_1280x720.h265"; + bool runFlag = true; +} + +SampleProcess::SampleProcess(const char *data, const char *outFolder) : deviceId_(0), context_(nullptr), stream_(nullptr), thread_(),filePath(string(data)+"/vdec_h265_1frame_rabbit_1280x720.h265"), + outFolder_(outFolder), picDesc_({}), enType_(H265_MAIN_LEVEL), format_(PIXEL_FORMAT_YUV_SEMIPLANAR_420) +{ +} + +SampleProcess::~SampleProcess() +{ + DestroyResource(); +} +void *ThreadFunc(aclrtContext sharedContext) +{ + if (sharedContext == nullptr) { + ERROR_LOG("sharedContext can not be nullptr"); + return ((void*)(-1)); + } + INFO_LOG("use shared context for this thread"); + aclError ret = aclrtSetCurrentContext(sharedContext); + if (ret != ACL_SUCCESS) { + ERROR_LOG("aclrtSetCurrentContext failed, errorCode = %d", static_cast(ret)); + return ((void*)(-1)); + } + + INFO_LOG("thread start "); + while (runFlag) { + // Notice: timeout 1000ms + (void)aclrtProcessReport(1000); + } + return (void*)0; +} + +Result SampleProcess::InitResource() +{ + // ACL init + aclError ret = aclInit(nullptr); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl init failed, errorCode = %d", static_cast(ret)); + return FAILED; + } + INFO_LOG("acl init success"); + + // set device + ret = aclrtSetDevice(deviceId_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl set device %d failed, errorCode = %d", deviceId_, static_cast(ret)); + return FAILED; + } + INFO_LOG("set device %d success", deviceId_); + + // create context (set current) + ret = aclrtCreateContext(&context_, deviceId_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl create context failed, deviceId = %d, errorCode = %d", + deviceId_, static_cast(ret)); + return FAILED; + } + INFO_LOG("create context success"); + + // create stream + ret = aclrtCreateStream(&stream_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl create stream failed, deviceId = %d, errorCode = %d", + deviceId_, static_cast(ret)); + return FAILED; + } + INFO_LOG("create stream success"); + + // get run mode + // runMode is ACL_HOST which represents app is running in host + // runMode is ACL_DEVICE which represents app is running in device + aclrtRunMode runMode; + ret = aclrtGetRunMode(&runMode); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl get run mode failed, errorCode = %d", static_cast(ret)); + return FAILED; + } + bool isDivece = (runMode == ACL_DEVICE); + RunStatus::SetDeviceStatus(isDivece); + INFO_LOG("get run mode success"); + + return SUCCESS; +} + +Result SampleProcess::DoVdecProcess() +{ + // create threadId + thread_ = std::thread(ThreadFunc, context_); + std::ostringstream oss; + oss << thread_.get_id(); + uint64_t tid = std::stoull(oss.str()); + INFO_LOG("create thread successfully, threadId = %lu", tid); + + Result ret = Utils::CheckAndCreateFolder(outFolder_); + if (ret != SUCCESS) { + ERROR_LOG("mkdir out folder error."); + return FAILED; + } + + // dvpp init + VdecProcess vdecProcess; + ret = vdecProcess.InitResource(tid, enType_, format_); + if (ret != SUCCESS) { + ERROR_LOG("init dvpp resource failed"); + vdecProcess.DestroyResource(); + return FAILED; + } + + const int inputWidth = 1280; + const int inputHeight = 720; + int rest_len = 10; + picDesc_.width = inputWidth; + picDesc_.height = inputHeight; + + uint64_t count = 0; + while (rest_len > 0) { + void *inBufferDev = nullptr; + uint32_t inBufferSize = 0; + + // read file to device memory + if (!Utils::ReadFileToDeviceMem(filePath.c_str(), inBufferDev, inBufferSize)) { + ERROR_LOG("read file %s to device mem failed.\n", filePath.c_str()); + vdecProcess.DestroyResource(); + return FAILED; + } + vdecProcess.SetInput(inBufferDev, inBufferSize, picDesc_.width, picDesc_.height); + + ret = vdecProcess.Process(); + if (ret != SUCCESS) { + ERROR_LOG("dvpp ProcessVdec failed"); + vdecProcess.DestroyResource(); + return FAILED; + } + ++count; + rest_len = rest_len - 1; + INFO_LOG("success to execute aclvdecSendFrame, count = %lu", count); + } + ret = vdecProcess.SendVdecEos(); + if (ret != SUCCESS) { + ERROR_LOG("send vdec eos frame failed, errorCode = %d", static_cast(ret)); + vdecProcess.DestroyResource(); + return FAILED; + } + INFO_LOG("success to send vdec eos frame"); + + vdecProcess.DestroyResource(); + + return SUCCESS; +} + +Result SampleProcess::DoModelProcess() +{ + // model init + + Result ret = Utils::CheckAndCreateFolder("result"); + if (ret != SUCCESS) { + ERROR_LOG("mkdir out folder error."); + return FAILED; + } + + // dvpp init + DvppProcess dvppProcess(stream_); + ret = dvppProcess.InitResource(); + if (ret != SUCCESS) { + ERROR_LOG("init dvpp resource failed"); + return FAILED; + } + + + int modelInputWidth = 1280; + int modelInputHeight = 720; + + INFO_LOG("modelInputWidth %d modelInputHeight %d",modelInputWidth,modelInputHeight); + std::vector fileList = Utils::ReadDir(outFolder_); + for (size_t frameId = 0; frameId < fileList.size(); frameId++) { + + void *dvppOutputBuffer = nullptr; + uint32_t dvppOutputSize; + // read image file to device memory + std::string fileNameSave = std::string(outFolder_) + "/" + fileList[frameId]; + INFO_LOG("read file %s \n", fileNameSave.c_str()); + if (!Utils::ReadFileToDeviceMem(fileNameSave.c_str(), dvppOutputBuffer, dvppOutputSize)) { + ERROR_LOG("read file %s to device mem failed.\n", fileNameSave.c_str()); + return FAILED; + } + dvppProcess.SetInput(picDesc_.width, picDesc_.height, format_); + ret = dvppProcess.InitOutputPara(modelInputWidth, modelInputHeight); + if (ret != SUCCESS) { + ERROR_LOG("init dvpp output para failed"); + (void)acldvppFree(dvppOutputBuffer); + return FAILED; + } + + // dvpp process + ret = dvppProcess.Process(dvppOutputBuffer, dvppOutputSize); + if (ret != SUCCESS) { + ERROR_LOG("init dvpp resource failed"); + return FAILED; + } + + dvppProcess.GetOutput(&dvppOutputBuffer, dvppOutputSize); + + (void)acldvppFree(dvppOutputBuffer); + + dvppProcess.DestroyOutputPara(); + + } + + dvppProcess.DestroyResource(); + // Utils::RemoveDir(outFolder_); + return SUCCESS; +} + +void SampleProcess::DestroyResource() +{ + aclError ret; + runFlag = false; + if (thread_.joinable()) { + thread_.join(); + } + + if (stream_ != nullptr) { + ret = aclrtDestroyStream(stream_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("destroy stream failed, errorCode = %d", static_cast(ret)); + } + stream_ = nullptr; + } + INFO_LOG("end to destroy stream"); + + if (context_ != nullptr) { + ret = aclrtDestroyContext(context_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("destroy context failed, errorCode = %d", static_cast(ret)); + } + context_ = nullptr; + } + INFO_LOG("end to destroy context"); + + ret = aclrtResetDevice(deviceId_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("reset device %d failed, errorCode = %d", deviceId_, static_cast(ret)); + } + INFO_LOG("end to reset device %d", deviceId_); + + ret = aclFinalize(); + if (ret != ACL_SUCCESS) { + ERROR_LOG("finalize acl failed, errorCode = %d", static_cast(ret)); + } + INFO_LOG("end to finalize acl"); +} diff --git a/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/src/utils.cpp b/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/src/utils.cpp new file mode 100644 index 00000000..6f61025b --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/src/utils.cpp @@ -0,0 +1,355 @@ +/** +* @file utils.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include "utils.h" +#include +#if defined(_MSC_VER) +#include +#include +#else +#include +#include +#endif + +bool RunStatus::isDevice_ = false; + +bool Utils::ReadFileToDeviceMem(const char *fileName, void *&dataDev, uint32_t &dataSize) +{ + // read data from file. + FILE *fp = fopen(fileName, "rb"); + if (fp == nullptr) { + ERROR_LOG("open file %s failed.", fileName); + return false; + } + + fseek(fp, 0, SEEK_END); + long fileLenLong = ftell(fp); + if (fileLenLong <= 0) { + ERROR_LOG("file %s len is invalid.", fileName); + fclose(fp); + return false; + } + fseek(fp, 0, SEEK_SET); + + auto fileLen = static_cast(fileLenLong); + dataSize = fileLen; + size_t readSize; + // Malloc input device memory + auto aclRet = acldvppMalloc(&dataDev, dataSize); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acl malloc dvpp data failed, dataSize = %u, errorCode = %d.", + dataSize, static_cast(aclRet)); + fclose(fp); + return false; + } + + if (!RunStatus::GetDeviceStatus()) { + void *dataHost = nullptr; + auto aclRet = aclrtMallocHost(&dataHost, fileLen); + if (dataHost == nullptr) { + ERROR_LOG("acl malloc host data buffer failed. dataSize = %u, errorCode = %d.", + fileLen, static_cast(aclRet)); + (void)acldvppFree(dataDev); + dataDev = nullptr; + fclose(fp); + return false; + } + + readSize = fread(dataHost, 1, fileLen, fp); + if (readSize < fileLen) { + ERROR_LOG("need read file %s %u bytes, but only %zu read.", fileName, fileLen, readSize); + (void)aclrtFreeHost(dataHost); + (void)acldvppFree(dataDev); + dataDev = nullptr; + fclose(fp); + return false; + } + + // copy input to device memory + aclRet = aclrtMemcpy(dataDev, dataSize, dataHost, fileLen, ACL_MEMCPY_HOST_TO_DEVICE); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acl memcpy data to dev failed, fileLen = %u, errorCode = %d.", + fileLen, static_cast(aclRet)); + (void)aclrtFreeHost(dataHost); + (void)acldvppFree(dataDev); + dataDev = nullptr; + fclose(fp); + return false; + } + (void)aclrtFreeHost(dataHost); + } else { + readSize = fread(dataDev, 1, fileLen, fp); + if (readSize < fileLen) { + ERROR_LOG("need read file %s %u bytes, but only %zu read.", fileName, fileLen, readSize); + (void)acldvppFree(dataDev); + dataDev = nullptr; + fclose(fp); + return false; + } + } + + fclose(fp); + return true; +} + +bool Utils::WriteDeviceMemoryToFile(const char *fileName, void *dataDev, uint32_t dataSize) +{ + if (dataDev == nullptr) { + ERROR_LOG("dataDev is nullptr!"); + return false; + } + + // copy output to host memory + void *data = nullptr; + aclError aclRet; + if (!(RunStatus::GetDeviceStatus())) { + aclRet = aclrtMallocHost(&data, dataSize); + if (data == nullptr) { + ERROR_LOG("malloc host data buffer failed. dataSize = %u, errorCode = %d.", + dataSize, static_cast(aclRet)); + return false; + } + aclRet = aclrtMemcpy(data, dataSize, dataDev, dataSize, ACL_MEMCPY_DEVICE_TO_HOST); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acl memcpy data to host failed, dataSize=%u, ret=%d.", dataSize, aclRet); + (void)aclrtFreeHost(data); + return false; + } + } else { + data = dataDev; + } + + FILE *outFileFp = fopen(fileName, "wb+"); + if (outFileFp == nullptr) { + ERROR_LOG("fopen out file %s failed, error=%s.", fileName, strerror(errno)); + (void)aclrtFreeHost(data); + return false; + } + + bool ret = true; + size_t writeRet = fwrite(data, 1, dataSize, outFileFp); + if (writeRet != dataSize) { + ERROR_LOG("need write %u bytes to %s, but only write %zu bytes, error=%s.\n", + dataSize, fileName, writeRet, strerror(errno)); + ret = false; + } + + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(data); + } + fflush(outFileFp); + fclose(outFileFp); + return ret; +} + +Result Utils::PullModelOutputData(aclmdlDataset *modelOutput, const char *fileName) +{ + size_t outDatasetNum = aclmdlGetDatasetNumBuffers(modelOutput); + if (outDatasetNum == 0) { + ERROR_LOG("aclmdlGetDatasetNumBuffers from model output failed, outDatasetNum = 0"); + return FAILED; + } + FILE *outputFile = fopen(fileName, "wb+"); + if (outputFile == nullptr) { + ERROR_LOG("create output file %s failed", fileName); + return FAILED; + } + for (size_t i = 0; i < outDatasetNum; ++i) { + // get model output data + aclDataBuffer *dataBuffer = aclmdlGetDatasetBuffer(modelOutput, i); + if (dataBuffer == nullptr) { + ERROR_LOG("aclmdlGetDatasetBuffer from model output failed."); + continue; + } + void *data = aclGetDataBufferAddr(dataBuffer); + if (data == nullptr) { + ERROR_LOG("aclGetDataBufferAddr from dataBuffer failed."); + continue; + } + uint32_t bufferSize = aclGetDataBufferSizeV2(dataBuffer); + INFO_LOG("output[%zu] DataBuffer, buffer addr = %p, buffer size = %u", + i, data, bufferSize); + + void *dataPtr = nullptr; + aclError ret; + if (!(RunStatus::GetDeviceStatus())) { // app is running in host + ret = aclrtMallocHost(&dataPtr, bufferSize); + if (ret != ACL_SUCCESS) { + ERROR_LOG("malloc host data buffer failed, errorCode = %d.", static_cast(ret)); + fclose(outputFile); + return FAILED; + } + // if app is running in host, need copy model output data from device to host + ret = aclrtMemcpy(dataPtr, bufferSize, data, bufferSize, ACL_MEMCPY_DEVICE_TO_HOST); + if (ret != ACL_SUCCESS) { + (void)aclrtFreeHost(dataPtr); + ERROR_LOG("memcpy device to host failed, errorCode = %d.", static_cast(ret)); + } + INFO_LOG("memcopy output data from device to host buffer success."); + } else { + dataPtr = data; + } + uint32_t len = static_cast(bufferSize); + fwrite(static_cast(dataPtr), len, sizeof(char), outputFile); + INFO_LOG("create output file success, filename=%s, size=%u", fileName, len); + + if (!(RunStatus::GetDeviceStatus())) { // app is running in device + (void)aclrtFreeHost(dataPtr); + } + } + fclose(outputFile); + return SUCCESS; +} + +Result Utils::CheckFile(const char *fileName) +{ + int i = 0; + INFO_LOG("start check result file:%s", fileName); + while (i < 10) { + std::ifstream f(fileName); + if (f.good()) { + break; + } + SleepTime(1); // sleep 1s + INFO_LOG("check result, wait time [%ds]", i+1); + i++; + } + // 10 is max time of checking + if (i == 10) { + ERROR_LOG("check result failed, timeout,expect file:%s", fileName); + return FAILED; + } + INFO_LOG("check result success, file exist"); + return SUCCESS; +} + +Result Utils::CheckAndCreateFolder(const char *foldName) +{ + INFO_LOG( "start check result fold:%s", foldName); +#if defined(_MSC_VER) + DWORD ret = GetFileAttributes((LPCSTR)foldName); + if (ret == INVALID_FILE_ATTRIBUTES) { + BOOL flag = CreateDirectory((LPCSTR)foldName, nullptr); + if (flag) { + INFO_LOG("make dir successfully."); + } else { + INFO_LOG("make dir errorly."); + return FAILED; + } + } +#else + if (access(foldName , 0) == -1) { + int flag=mkdir(foldName , 0777); + if (flag == 0) + { + INFO_LOG("make dir successfully."); + } else { + ERROR_LOG("make dir errorly."); + return FAILED; + } + } +#endif + INFO_LOG("check result success, fold exist"); + return SUCCESS; +} + +Result Utils::SaveModelOutputData(const char *srcfileName, const char *dstfileName) +{ + Result ret = CheckFile(srcfileName); + if (ret != SUCCESS) { + ERROR_LOG("model output file not exist."); + return FAILED; + } + FILE *model_output; + model_output = fopen(srcfileName,"rb" ); + + FILE *model_output_txt; + model_output_txt = fopen(dstfileName, "wb+"); + INFO_LOG("open result file: [%s]", dstfileName); + + int i = 0; + float prop = 0.0; + std::map> mp; + std::map::iterator ite; + while (feof(model_output) == 0) { + ite = mp.end(); + fread(&prop, sizeof(float), 1, model_output); + mp.insert(ite, std::map::value_type(prop, i)); + fprintf(model_output_txt, "%f, %d", prop, i); + i++; + } + fclose(model_output); + ite = mp.begin(); + float sum = 0.0; + float max = ite->first; + int classType = ite->second; + for (i = 0 ; i < 5; i++) { + sum += ite->first; + ++ite; + } + fprintf(model_output_txt, "classType[%d], top1[%f], top5[%f]", classType, max, sum); + fclose(model_output_txt); + INFO_LOG("result:classType[%d], top1[%f], top5[%f]", classType,max,sum); + return SUCCESS; +} + +std::vector Utils::ReadDir(const char* folder) +{ + std::vector fileList; +#if defined(_MSC_VER) + std::string inputDirectory = folder; + inputDirectory = inputDirectory.append("*"); + + _finddata_t fileinfo; + long long handle = (long long)_findfirst(inputDirectory.c_str(), &fileinfo); + if (handle == -1) { + ERROR_LOG("_findfirst failed!"); + return fileList; + } + + do { + DWORD ret = GetFileAttributes((LPCSTR)fileinfo.name); + if (ret == FILE_ATTRIBUTE_DIRECTORY) { + continue; + } + fileList.push_back(fileinfo.name); + } while (!_findnext(handle, &fileinfo)); + + _findclose(handle); +#else + struct dirent *dirp; + DIR* dir = opendir(folder); + while ((dirp = readdir(dir)) != nullptr) { + if (dirp->d_type == DT_REG) { + fileList.push_back(dirp->d_name); + } + } + closedir(dir); +#endif + return fileList; +} + +void Utils::RemoveDir(const char* outFolder_) +{ +#if defined(_MSC_VER) + RemoveDirectory((LPCSTR)outFolder_); +#else + rmdir(outFolder_); +#endif +} + +void Utils::SleepTime(unsigned int seconds) +{ +#if defined(_MSC_VER) + unsigned long secs = static_cast(seconds); + Sleep(secs * 1000); // sleep 1 second +#else + sleep(seconds); +#endif +} diff --git a/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/src/vdec_process.cpp b/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/src/vdec_process.cpp new file mode 100644 index 00000000..dff9f338 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/media/vdec_test/src/vdec_process.cpp @@ -0,0 +1,314 @@ +/** +* @file vdec_process.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include "vdec_process.h" +#include + +using namespace std; + +VdecProcess::VdecProcess() + : vdecChannelDesc_(nullptr), streamInputDesc_(nullptr), + picOutputDesc_(nullptr), picOutBufferDev_(nullptr), + inBufferDev_(nullptr), inBufferSize_(0), inputWidth_(0), + inputHeight_(0), enType_(H265_MAIN_LEVEL), format_(PIXEL_FORMAT_YUV_400) +{ +} + +VdecProcess::~VdecProcess() +{ +} +extern const char *output_dir; +void callback(acldvppStreamDesc *input, acldvppPicDesc *output, void *userdata) +{ + uint64_t frameIndex = 0; + if (userdata != nullptr) { + // get frame index in callback process + frameIndex = *((uint64_t *)userdata); + INFO_LOG("start processing callback, frame index is %lu", frameIndex); + free(userdata); + userdata = nullptr; + } + // free input vdecInBufferDev and destroy stream desc + if (input != nullptr) { + void *vdecInBufferDev = acldvppGetStreamDescData(input); + if (vdecInBufferDev != nullptr) { + aclError ret = acldvppFree(vdecInBufferDev); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to free input stream desc data, errorCode = %d", static_cast(ret)); + } + } + aclError ret = acldvppDestroyStreamDesc(input); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to destroy input stream desc, errorCode = %d", static_cast(ret)); + } + } + + // if output is nullptr, acldvppGetPicDescData return nullptr + if (output != nullptr) { + void *vdecOutBufferDev = acldvppGetPicDescData(output); + // check whether decode success + int retCode = acldvppGetPicDescRetCode(output); + // decode fail, release resource and retuen + if (retCode != 0) { + ERROR_LOG("vdec decode frame failed, retCode = %d.", retCode); + if (vdecOutBufferDev != nullptr) { + aclError ret = acldvppFree(vdecOutBufferDev); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to free output pic desc data, errorCode = %d", static_cast(ret)); + } + } + aclError ret = acldvppDestroyPicDesc(output); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to destroy output pic desc, errorCode = %d", static_cast(ret)); + } + return; + } + + // decode success, process output pic + if (vdecOutBufferDev != nullptr) { + uint32_t size = acldvppGetPicDescSize(output); + std::string fileNameSave = std::string(output_dir) + "/image" + std::to_string(frameIndex); + if (!Utils::WriteDeviceMemoryToFile(fileNameSave.c_str(), vdecOutBufferDev, size)) { + ERROR_LOG("write file failed."); + } + + aclError ret = acldvppFree(vdecOutBufferDev); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to free output pic desc data, errorCode = %d", static_cast(ret)); + } + } + aclError ret = acldvppDestroyPicDesc(output); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to destroy output pic desc, errorCode = %d", static_cast(ret)); + } + } + + INFO_LOG("success to process vdec callback %lu.", frameIndex); +} + +Result VdecProcess::InitResource(uint64_t threadId, acldvppStreamFormat enType, acldvppPixelFormat format) +{ + threadId_ = threadId; + enType_ = enType; + format_ = format; + // create vdec channelDesc + vdecChannelDesc_ = aclvdecCreateChannelDesc(); + if (vdecChannelDesc_ == nullptr) { + ERROR_LOG("fail to create vdec channel desc"); + return FAILED; + } + + // channelId: 0-15 + aclError ret = aclvdecSetChannelDescChannelId(vdecChannelDesc_, 10); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to set vdec ChannelId, errorCode = %d", static_cast(ret)); + return FAILED; + } + + ret = aclvdecSetChannelDescThreadId(vdecChannelDesc_, threadId_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to create threadId, errorCode = %d", static_cast(ret)); + return FAILED; + } + + // callback func + ret = aclvdecSetChannelDescCallback(vdecChannelDesc_, callback); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to set vdec Callback, errorCode = %d", static_cast(ret)); + return FAILED; + } + + ret = aclvdecSetChannelDescEnType(vdecChannelDesc_, enType_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to set vdec EnType, errorCode = %d", static_cast(ret)); + return FAILED; + } + + ret = aclvdecSetChannelDescOutPicFormat(vdecChannelDesc_, format_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to set vdec OutPicFormat, errorCode = %d", static_cast(ret)); + return FAILED; + } + + // create vdec channel + ret = aclvdecCreateChannel(vdecChannelDesc_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to create vdec channel, errorCode = %d", static_cast(ret)); + return FAILED; + } + + INFO_LOG("vdec init resource success"); + return SUCCESS; +} + +void VdecProcess::SetInput(void *inBufferDev, uint32_t inBufferSize, + int inputWidth, int inputHeight) +{ + inBufferDev_ = inBufferDev; + inBufferSize_ = inBufferSize; + inputWidth_ = inputWidth; + inputHeight_ = inputHeight; +} + +Result VdecProcess::CreateStreamDesc() +{ + // create input stream desc + streamInputDesc_ = acldvppCreateStreamDesc(); + if (streamInputDesc_ == nullptr) { + ERROR_LOG("fail to create input stream desc"); + return FAILED; + } + + aclError ret = acldvppSetStreamDescData(streamInputDesc_, inBufferDev_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to set data for stream desc, errorCode = %d", static_cast(ret)); + return FAILED; + } + // set size for dvpp stream desc + ret = acldvppSetStreamDescSize(streamInputDesc_, inBufferSize_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to set size for stream desc, errorCode = %d", static_cast(ret)); + return FAILED; + } + return SUCCESS; +} + +void VdecProcess::DestroyStreamDesc() +{ + if (inBufferDev_ != nullptr) { + (void)acldvppFree(inBufferDev_); + inBufferDev_ = nullptr; + } + if (streamInputDesc_ != nullptr) { + (void)acldvppDestroyStreamDesc(streamInputDesc_); + streamInputDesc_ = nullptr; + } +} + +Result VdecProcess::CreatePicDesc(size_t size) +{ + // Malloc output device memory + aclError ret = acldvppMalloc(&picOutBufferDev_, size); + if (ret != ACL_SUCCESS) { + ERROR_LOG("aclrtMalloc failed, ret=%d", ret); + return FAILED; + } + picOutputDesc_ = acldvppCreatePicDesc(); + if (picOutputDesc_ == nullptr) { + ERROR_LOG("fail to create output pic desc"); + return FAILED; + } + ret = acldvppSetPicDescData(picOutputDesc_, picOutBufferDev_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to set PicDescData, errorCode = %d", static_cast(ret)); + return FAILED; + } + ret = acldvppSetPicDescSize(picOutputDesc_, size); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to set PicDescSize, errorCode = %d", static_cast(ret)); + return FAILED; + } + ret = acldvppSetPicDescFormat(picOutputDesc_, format_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to set PicDescHeight, errorCode = %d", static_cast(ret)); + return FAILED; + } + return SUCCESS; +} + +void VdecProcess::DestroyPicDesc() +{ + if (picOutBufferDev_ != nullptr) { + (void)acldvppFree(picOutBufferDev_); + picOutBufferDev_ = nullptr; + } + if (picOutputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(picOutputDesc_); + picOutputDesc_ = nullptr; + } +} + +Result VdecProcess::Process() +{ + // create stream desc + Result err = CreateStreamDesc(); + if (err != SUCCESS) { + DestroyStreamDesc(); + return FAILED; + } + // create pic desc + size_t DataSize = (inputWidth_ * inputHeight_ * 3) / 2; // yuv format size + err = CreatePicDesc(DataSize); + if (err != SUCCESS) { + DestroyStreamDesc(); + DestroyPicDesc(); + return FAILED; + } + + // set frame index, callback function can use it + static uint64_t index = 0; + uint64_t *frameIndex = (uint64_t *)malloc(sizeof(uint64_t)); + if (frameIndex != nullptr) { + *frameIndex = index++; + } + + // send vdec frame + aclError ret = aclvdecSendFrame(vdecChannelDesc_, streamInputDesc_, + picOutputDesc_, nullptr, static_cast(frameIndex)); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to send frame, ret=%u", ret); + DestroyStreamDesc(); + DestroyPicDesc(); + if (frameIndex != nullptr) { + free(frameIndex); + frameIndex = nullptr; + } + return FAILED; + } + return SUCCESS; +} + +Result VdecProcess::SendVdecEos() +{ + // create stream desc + acldvppStreamDesc *streamInputDesc = acldvppCreateStreamDesc(); + if (streamInputDesc == nullptr) { + ERROR_LOG("fail to create input stream desc"); + return FAILED; + } + aclError ret = acldvppSetStreamDescEos(streamInputDesc, 1); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to set eos for stream desc, errorCode = %d", static_cast(ret)); + (void)acldvppDestroyStreamDesc(streamInputDesc); + return FAILED; + } + + // send vdec eos frame. when all vdec callback are completed, aclvdecSendFrame can be returned. + ret = aclvdecSendFrame(vdecChannelDesc_, streamInputDesc, nullptr, nullptr, nullptr); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to send eos frame, ret=%u", ret); + (void)acldvppDestroyStreamDesc(streamInputDesc); + return FAILED; + } + (void)acldvppDestroyStreamDesc(streamInputDesc); + + return SUCCESS; +} + +void VdecProcess::DestroyResource() +{ + if (vdecChannelDesc_ != nullptr) { + aclError ret = aclvdecDestroyChannel(vdecChannelDesc_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acldvppDestroyChannel failed, , errorCode = %d", static_cast(ret)); + } + (void)aclvdecDestroyChannelDesc(vdecChannelDesc_); + vdecChannelDesc_ = nullptr; + } +} diff --git a/oec-ascend/oec/resource/ApplicationDev/pyacl_test_case/test_acl_op/ascend_test_pyacl.py b/oec-ascend/oec/resource/ApplicationDev/pyacl_test_case/test_acl_op/ascend_test_pyacl.py new file mode 100644 index 00000000..0d894ff5 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/pyacl_test_case/test_acl_op/ascend_test_pyacl.py @@ -0,0 +1,8 @@ +#encoding: utf-8 +import oec + +oec.TestCase( + group= ("应用开发","基础功能"), + name = "PYACL_OP", + cmd = f"mkdir -p '{oec.Context.output_dir}/tmp/pyacl_testcase' && python3 ./test_acl_op.py '{oec.Context.data_path}' '{oec.Context.output_dir}'" + ) diff --git a/oec-ascend/oec/resource/ApplicationDev/pyacl_test_case/test_acl_op/test_acl_op.py b/oec-ascend/oec/resource/ApplicationDev/pyacl_test_case/test_acl_op/test_acl_op.py new file mode 100644 index 00000000..25d9ce79 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/pyacl_test_case/test_acl_op/test_acl_op.py @@ -0,0 +1,389 @@ +# -*- coding:utf-8 -*- +# 版权所有 (c) 华为技术有限公司 2022-2023 +import unittest +from threading import Lock +import os +import shutil +import numpy as np +import acl +import utils as util +import sys +import subprocess +import json +#from constant import Const + +data_path = sys.argv[1] +print(f"data path is {data_path}") + +output_dir = sys.argv[2] +print(f"output dir is {output_dir}") + +#get soc version +soc_version = acl.get_soc_name() +print(f"soc version is {soc_version}") + +add_json=""" +[ + { + "op": "Add", + "input_desc": [ + { + "format": "ND", + "shape": [8, 16], + "type": "int32" + }, + { + "format": "ND", + "shape": [8, 16], + "type": "int32" + } + ], + "output_desc": [ + { + "format": "ND", + "shape": [8, 16], + "type": "int32" + } + ] + } +] +""" +add_json = json.loads(add_json) + +with open(f"{output_dir}/tmp/pyacl_testcase/add.json", 'w', encoding='utf-8') as json_file: + json.dump(add_json, json_file, ensure_ascii=False, indent=4) + +#transfer op model +subprocess.run(f"atc --singleop={output_dir}/tmp/pyacl_testcase/add.json --output={output_dir}/tmp/pyacl_testcase --soc_version={soc_version}", shell=True, cwd=f"{output_dir}/tmp/pyacl_testcase") + + +acl_dtype = { + "float32": 0, + "float16": 1, + "int8": 2, + "int32": 3, + "uint8": 4, + "int16": 6, + "uint16": 7, + "uint32": 8, + "int64": 9, + "double": 11, + "bool": 12 +} + +ACL_FORMAT_UNDEFINED = -1 +ACL_FORMAT_NCHW = 0 +ACL_FORMAT_NHWC = 1 +ACL_FORMAT_ND = 2 +ACL_FORMAT_NC1HWC0 = 3 +ACL_FORMAT_FRACTAL_Z = 4 +ACL_MEMCPY_HOST_TO_HOST = 0 +ACL_MEMCPY_HOST_TO_DEVICE = 1 +ACL_MEMCPY_DEVICE_TO_HOST = 2 +NPY_BYTE = 1 +ACL_FORMAT_ND = 2 +ACL_MEM_MALLOC_HUGE_FIRST = 0 +ACL_MEM_MALLOC_NORMAL_ONLY = 2 +ACL_STEP_START = 0 +ACL_STEP_END = 1 +ACL_OP_DUMP_OP_AICORE_ARGS = 0x00000001 + +def op_select(in_num, in_desc, out_num, out_desc, op_attr, op_kernel_desc): + """ + operator selector + """ + # get input + tilling_args = [] + args_list = [] + for i in range(in_num): + tilling_args.append(str(acl.get_tensor_desc_dim_v2(in_desc[i], 0)[0])) + tilling_args.append(str(acl.get_tensor_desc_dim_v2(in_desc[i], 1)[0])) + tilling_args.append(tilling_type[str(acl.get_tensor_desc_type(in_desc[i]))]) + + # get output + for i in range(out_num): + tilling_args.append(str(acl.get_tensor_desc_dim_v2(out_desc[i], 0)[0])) + tilling_args.append(str(acl.get_tensor_desc_dim_v2(out_desc[i], 1)[0])) + tilling_args.append(tilling_type[str(acl.get_tensor_desc_type(out_desc[i]))]) + + #set args + tilling = '_'.join(tilling_args) + args = np.array(args_list, dtype=np.uint32).tobytes() + args_ptr = acl.util.bytes_to_ptr(args) + size = len(args) + ret = acl.op.set_kernel_args(op_kernel_desc, tilling_mode[tilling], 2, args_ptr, size) + assert ret == 0 + workspace_sizes = bytes() + workspace_sizes_ptr = acl.util.bytes_to_ptr(workspace_sizes) + ret = acl.op.set_kernel_workspaces_sizes(op_kernel_desc, 0, workspace_sizes_ptr) + assert ret == 0 + + +class AclOp(object): + def __init__(self, a, b): + self.in_list = [] + self.in_host_list = [] + self.in_desc_list = [] + self.in_dev_list = [] + self.out_dev_list = [] + self.host_list = [] + self.out_list = [] + self.out_desc_list = [] + self.data = [a, b] + self.type = a.dtype + self.shape = a.shape + self.spec_type = ACL_FORMAT_ND + # attr + self.attr = acl.op.create_attr() + assert self.attr != 0 + # stream + self.stream, ret = acl.rt.create_stream() + assert ret == 0 + + def __del__(self): + # free resource + for i in range(len(self.in_desc_list)): + ret = acl.destroy_data_buffer(self.in_list[i]) + assert ret == 0 + ret = acl.destroy_data_buffer(self.in_host_list[i]) + assert ret == 0 + acl.destroy_tensor_desc(self.in_desc_list[i]) + + for i in range(len(self.out_desc_list)): + ret = acl.destroy_data_buffer(self.out_list[i]) + assert ret == 0 + acl.destroy_tensor_desc(self.out_desc_list[i]) + + for i in range(len(self.in_dev_list)): + ret = acl.rt.free(self.in_dev_list[i]) + assert ret == 0 + + for i in range(len(self.out_dev_list)): + ret = acl.rt.free(self.out_dev_list[i]) + assert ret == 0 + + for i in range(len(self.host_list)): + ret = acl.rt.free_host(self.host_list[i]) + assert ret == 0 + + acl.op.destroy_attr(self.attr) + + ret = acl.rt.destroy_stream(self.stream) + assert ret == 0 + + def tensor_desc_init(self, gen_dataset=True): + # create input output tensors + for data in self.data: + desc = acl.create_tensor_desc(acl_dtype[str(data.dtype)], list(data.shape), self.spec_type) + assert desc != 0 + self.in_desc_list.append(desc) + + size = acl.get_tensor_desc_size(desc) + bytes_data = data.tobytes() + data_ptr = acl.util.bytes_to_ptr(bytes_data) + host_data_buf = acl.create_data_buffer(data_ptr, size) + assert host_data_buf != 0 + self.in_host_list.append(host_data_buf) + + dev_ptr, ret = acl.rt.malloc(size, ACL_MEM_MALLOC_HUGE_FIRST) + assert ret == 0 + ret = acl.rt.memcpy(dev_ptr, size, data_ptr, size, ACL_MEMCPY_HOST_TO_DEVICE) + assert ret == 0 + self.in_dev_list.append(dev_ptr) + data_buf = acl.create_data_buffer(dev_ptr, size) + assert data_buf != 0 + self.in_list.append(data_buf) + + if gen_dataset: + out_desc = acl.create_tensor_desc(acl_dtype[str(self.type)], list(self.shape), self.spec_type) + assert out_desc != 0 + self.out_desc_list.append(out_desc) + self.gen_output_data_set() + + def gen_output_data_set(self): + size = acl.get_tensor_desc_size(self.out_desc_list[0]) + out_dev, ret = acl.rt.malloc(size, ACL_MEM_MALLOC_HUGE_FIRST) + assert ret == 0 + out_data_buf = acl.create_data_buffer(out_dev, size) + assert out_data_buf != 0 + self.out_list.append(out_data_buf) + self.out_dev_list.append(out_dev) + + host_ptr, ret = acl.rt.malloc_host(size) + assert ret == 0 + self.host_list.append(host_ptr) + + def model_update_params(self, op_type): + ret = acl.op.update_params(op_type, self.in_desc_list, self.out_desc_list, self.attr) + assert ret == 0 + + def model_execute(self, op_type="Add"): + # model execute + ret = acl.op.execute_v2(op_type, self.in_desc_list, self.in_list, self.out_desc_list, + self.out_list, self.attr, self.stream) + print("ret:",ret) + unittest.TestCase().assertEqual(ret, 0) + ret = acl.rt.synchronize_stream(self.stream) + unittest.TestCase().assertEqual(ret, 0) + #device to host + size = acl.get_tensor_desc_size(self.out_desc_list[0]) + acl.rt.memcpy(self.host_list[0], size, self.out_dev_list[0], size, ACL_MEMCPY_DEVICE_TO_HOST) + bytes_out = acl.util.ptr_to_bytes(self.host_list[0], size) + data = np.frombuffer(bytes_out, dtype=np.byte) + return data + + def model_op_execute(self, op_type="Add"): + # model execute + ret = acl.op.execute(op_type, self.in_desc_list, self.in_list, self.out_desc_list, + self.out_list, self.attr, self.stream) + assert ret == 0 + ret = acl.rt.synchronize_stream(self.stream) + assert ret == 0 + + #device to host + size = acl.get_tensor_desc_size(self.out_desc_list[0]) + acl.rt.memcpy(self.host_list[0], size, self.out_dev_list[0], size, ACL_MEMCPY_DEVICE_TO_HOST) + bytes_out = acl.util.ptr_to_bytes(self.host_list[0], size) + data = np.frombuffer(bytes_out, dtype=np.byte) + return data + + def np_data_format(self, data, dtype): + b_arr = data.tobytes() + arr_2 = np.frombuffer(b_arr, dtype=dtype) + return arr_2 + + def tensor_desc(self): + size = acl.get_tensor_desc_element_count(self.in_desc_list[0]) + print("size = ", size) + acl.set_tensor_desc_name(self.in_desc_list[0], "abc") + print("desc name= ", acl.get_tensor_desc_name(self.in_desc_list[0])) + fmt = acl.get_tensor_desc_format(self.indesc_list[0]) + print("fmt = ", fmt) + + def op_attr(self): + attr = acl.op.create_attr() + assert attr != 0 + + ret = acl.op.set_attr_bool(attr, "a", 0) + assert ret == 0 + ret = acl.op.set_attr_int(attr, "b", 1) + assert ret == 0 + ret = acl.op.set_attr_float(attr, "c", 2.0) + assert ret == 0 + ret = acl.op.set_attr_string(attr, "d", "123") + assert ret == 0 + data = [4, 5, 6] + ret = acl.op.set_attr_list_bool(attr, "e", data) + assert ret == 0 + data = [1.5, 2.14, 3.11] + ret = acl.op.set_attr_list_float(attr, "f", data) + assert ret == 0 + data = [10, 20, 30] + ret = acl.op.set_attr_list_int(attr, "g", data) + assert ret == 0 + ret = acl.op.set_attr_list_string(attr, "h", ["1", "2"]) + assert ret == 0 + data = [[10],[20, 30], [40, 50, 60]] + ret = acl.op.set_attr_list_list_int(attr, "i", data) + assert ret == 0 + acl.op.destroy_attr(attr) + return 0 + + def exe_with_dynamic_shape(self, op_type): + out_desc = acl.create_tensor_desc(acl_dtype[str(self.type)], [-1, -1], self.spec_type) + assert out_desc != 0 + self.out_desc_list.append(out_desc) + ret = acl.op.infer_shape(op_type, self.in_desc_list, self.in_host_list, + 1, self.out_desc_list, self.attr) + assert ret == 0 + + tensor_dims = [] + for i in range(len(self.out_desc_list)): + dim_nums = acl.get_tensor_desc_num_dims(self.out_desc_list[i]) + dim_size = [] + for j in range(dim_nums): + dim, ret = acl.get_tensor_desc_dim_v2(self.out_desc_list[i], j) + assert ret == 0 + if dim == -1: + dim_range, ret = acl.get_tensor_desc_dim_range(self.out_desc_list[i], j, 2) + assert ret == 0 + dim = dim_range[1] + dim_size.append(dim) + tensor_dims.append(dim_size) + print("[INFO] infer result: {}".format(tensor_dims)) + + self.shape = tensor_dims[0] + self.gen_output_data_set() + result = self.model_execute(op_type) + return result + + +g_callbackRunFlag = False + + +class TestOp(unittest.TestCase): + + def setUp(self) -> None: + pass + + def tearDown(self) -> None: + pass + + @classmethod + def tearDownClass(cls) -> None: + ret = acl.rt.reset_device(0) + if ret: + print("acl.rt.reset_device! ret:", ret) + raise AssertionError + ret = acl.finalize() + if ret: + print("acl.finalize failed! ret:", ret) + raise AssertionError + + @classmethod + def setUpClass(cls) -> None: + ret = acl.init() + if ret: + print("acl.init failed! ret:", ret) + raise AssertionError + ret = acl.op.set_model_dir(f"{output_dir}/tmp/pyacl_testcase") + if ret: + print("acl.op.set_model_dir failed! ret:", ret) + raise AssertionError + ret = acl.rt.set_device(0) + if ret: + print("acl.rt.set_device failed! ret:", ret) + raise AssertionError + + def test_op_015_load_op(self): + """ + test case for loading operator + """ + np_data = np.fromfile(f"{output_dir}/tmp/pyacl_testcase/0_Add_3_2_8_16_3_2_8_16_3_2_8_16.om", dtype="int8") + bytes_data = np_data.tobytes() + buffer = acl.util.bytes_to_ptr(bytes_data) + np_size = np_data.size + + ret = acl.op.load(buffer, np_size) + self.assertEqual(ret, 0) + + def test_op_017_normal_op_add(self): + """ + test case for operator add + """ + a = np.random.randint(100, size=(8, 16)).astype(np.int32) + b = np.random.randint(100, size=(8, 16)).astype(np.int32) + op_handle = AclOp(a, b) + op_handle.tensor_desc_init() + res = op_handle.model_execute() + data = op_handle.np_data_format(res, dtype=np.int32) + np_res = a + b + np_out = np.reshape(np_res, (np_res.size,)) + self.assertEqual((data == np_out).all(), True) + +if __name__ == "__main__": + #util.show_growth() + suite = util.switch_cases(TestOp, "all") + unittest.TextTestRunner(verbosity=2).run(suite) + #util.show_growth() + diff --git a/oec-ascend/oec/resource/ApplicationDev/pyacl_test_case/test_acl_op/utils.py b/oec-ascend/oec/resource/ApplicationDev/pyacl_test_case/test_acl_op/utils.py new file mode 100644 index 00000000..54d57ce8 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/pyacl_test_case/test_acl_op/utils.py @@ -0,0 +1,40 @@ +import unittest +import numpy as np +import acl + +def get_class_methods(class_name): + method_list = [method.split("_") for method in dir(class_name) if method.startswith("test_")] + method_list = sorted(method_list, key=lambda x: x[2]) + methods = ["_".join(method) for method in method_list] + return methods + +def switch_cases(case_class, opt): + suite = unittest.TestSuite() + methods = get_class_methods(case_class) + + if opt == "all": + for method in methods: + suite.addTest(case_class(method)) + return suite + +def align_size(origin_size, alignment): + if not alignment: + return 0 + return ((origin_size + (alignment - 1)) // alignment) * alignment + +def get_align_size(align_dict, pixel_fotmat, defaule_vale=0, case_value=0): + for key in align_dict.keys(): + if pixel_fotmat in key: + return align_dict.get(key)(defaule_vale, case_value) + return defaule_vale + +def get_device_type(): + device_type = acl.get_soc_name()[len('Ascend'):] + if "P" in device_type: + device_type = device_type[0:4] + else: + device_type = device_type[0:3] + device_type = device_type == "910P" and "910" or device_type + if device_type not in ["310", "310P", "910"]: + raise Exception(f"device_type = {device_type} not in 310/310P/910, npu-smi not found!") + return device_type \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/pyacl_test_case/test_acl_vpc/ascend_test_pyacl.py b/oec-ascend/oec/resource/ApplicationDev/pyacl_test_case/test_acl_vpc/ascend_test_pyacl.py new file mode 100644 index 00000000..7e144d0e --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/pyacl_test_case/test_acl_vpc/ascend_test_pyacl.py @@ -0,0 +1,8 @@ +#encoding: utf-8 +import oec + +oec.TestCase( + group= ("应用开发","媒体处理"), + name = "PYACL_VPC", + cmd = f"python3 ./test_acl_vpc.py {oec.Context.data_path}" + ) diff --git a/oec-ascend/oec/resource/ApplicationDev/pyacl_test_case/test_acl_vpc/test_acl_vpc.py b/oec-ascend/oec/resource/ApplicationDev/pyacl_test_case/test_acl_vpc/test_acl_vpc.py new file mode 100644 index 00000000..5cb596a7 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/pyacl_test_case/test_acl_vpc/test_acl_vpc.py @@ -0,0 +1,310 @@ +# -*- coding:utf-8 -*- +import os +import unittest +import numpy as np +from decimal import Decimal, getcontext +import acl +import utils as util +from utils import align_size +from utils import get_align_size +import sys + +data_path = sys.argv[1] +print(f"data path is {data_path}") + +YUV400 = 0 +YUV420 = 1 +YUV422 = 3 +ACL_MEMCPY_HOST_TO_DEVICE = 1 +ACL_MEMCPY_DEVICE_TO_HOST = 2 +HEIGHT_STRIDE = (1, 2) +ONE_PIXEL_OCCUPY_TWO_BYTE = 2 +ONE_PIXEL_OCCUPY_THREE_BYTE = 3 +ONE_PIXEL_OCCUPY_FOUR_BYTE = 4 + +WIDTH_STRIDE = { + (0, 1, 2, 3, 4, 5, 6, 1000, 1001):lambda x,y: align_size(y, 16), + (7, 8, 9, 10): lambda x,y: align_size(y, 16) * ONE_PIXEL_OCCUPY_TWO_BYTE, + (11, 12, 13):lambda x,y: align_size(y, 16) * ONE_PIXEL_OCCUPY_THREE_BYTE, + (14, 15, 16, 17):lambda x,y: align_size(y, 16) * ONE_PIXEL_OCCUPY_FOUR_BYTE +} + +BUFFER_SIZE = { + (0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17):lambda x, y:x * y, + (1, 2):lambda x, y:(x * y) * 3 // 2, + (3, 4):lambda x, y:(x * y) * 2, + (6):lambda x, y:(x * y) * 3, +} + + +class AclVpc(object): + def __init__(self, in_batch_size=1, out_batch_size=1): + self.out_batch_size = out_batch_size + self.in_batch_size = in_batch_size + self.out_batch_pic_desc = None + self.in_batch_pic_desc = None + self.crop_area = None + self.paste_area = None + self.output_desc = None + self.input_desc = None + self.in_buffer_dev = None + self.out_buffer_dev = None + self.dvpp_channel_desc = None + self.resize_config = None + self.context, ret = acl.rt.create_context(0) + assert ret == 0 + acl.rt.set_context(self.context) + self.stream, ret = acl.rt.create_stream() + assert ret == 0 + self.dev_buffer = {} + self.corpList, self.pasteList = [], [] + + def __del__(self): + acl.rt.set_context(self.context) + self._free_pic_desc() + for i in range(len(self.corpList)): + ret = acl.media.dvpp_destroy_roi_config(self.corpList[i]) + assert ret == 0 + for i in range(len(self.pasteList)): + ret = acl.media.dvpp_destroy_roi_config(self.pasteList[i]) + assert ret == 0 + for key in self.dev_buffer.keys(): + if self.dev_buffer[key]: + ret = acl.media.dvpp_free(self.dev_buffer[key]) + assert ret == 0 + if self.resize_config: + ret = acl.media.dvpp_destroy_resize_config(self.resize_config) + assert ret == 0 + roi_conf = [self.crop_area, self.paste_area] + for i in range(len(roi_conf)): + if roi_conf[i]: + ret = acl.media.dvpp_destroy_roi_config(roi_conf[i]) + assert ret == 0 + buffer_dev = [self.in_buffer_dev, self.out_buffer_dev] + for i in range(len(buffer_dev)): + if buffer_dev[i]: + ret = acl.media.dvpp_free(buffer_dev[i]) + assert ret == 0 + if self.dvpp_channel_desc: + ret = acl.media.dvpp_destroy_channel(self.dvpp_channel_desc) + assert ret == 0 + ret = acl.media.dvpp_destroy_channel_desc(self.dvpp_channel_desc) + assert ret == 0 + ret = acl.rt.destroy_stream(self.stream) + assert ret == 0 + ret = acl.rt.destroy_context(self.context) + assert ret == 0 + print("vpc free resource") + + def _free_pic_desc(self): + desc = [self.output_desc, self.input_desc] + for i in range(len(desc)): + if desc[i]: + ret = acl.mdeia.dvpp_destroy_pic_desc(desc[i]) + assert ret == 0 + + batch_pic_desc = [self.out_batch_pic_desc, self.in_batch_pic_desc] + for i in range(len(batch_pic_desc)): + if batch_pic_desc[i]: + ret = acl.media.dvpp_destroy_batch_pic_desc(batch_pic_desc[i]) + assert ret == 0 + + def dvpp_set_pic_desc(self, desc, buffer, width, height, wstride, hstride, size, format=YUV420): + ret = acl.media.dvpp_set_pic_desc_data(desc, buffer) + assert ret == 0 + ret = acl.media.dvpp_set_pic_desc_format(desc, format) + assert ret == 0 + ret = acl.media.dvpp_set_pic_desc_width(desc, width) + assert ret == 0 + ret = acl.media.dvpp_set_pic_desc_height(desc, height) + assert ret == 0 + ret = acl.media.dvpp_set_pic_desc_width_stride(desc, wstride) + assert ret == 0 + ret = acl.media.dvpp_set_pic_desc_height_stride(desc, hstride) + assert ret == 0 + ret = acl.media.dvpp_set_pic_desc_size(desc, size) + assert ret == 0 + + def vpc_init(self): + acl.rt.set_context(self.context) + + #create channel desc + self.dvpp_channel_desc = acl.media.dvpp_create_channel_desc() + assert self.dvpp_channel_desc != 0 + + #create channel desc + ret = acl.media.dvpp_create_channel(self.dvpp_channel_desc) + assert ret == 0 + + def get_picture_height_stride(self, format, height): + """ + get picture height stride + 1.YUV420 height stride 2 alignment + 2.other format height stride no aligment. + """ + if format in HEIGHT_STRIDE: + return int(((height + 1) // 2) * 2) + return int(height) + + def get_picture_width_stride(self, format, width): + """ + get picture width stride: + 1.width stride 16 alignment, + 2.width stride 16 alignment, one PIXEL occupy two byte, + 3.width stride 16 alignment, one PIXEL occupy three byte, + 4.width stride 16 alignment, one PIXEL occupy four byte, + """ + return get_align_size(WIDTH_STRIDE, format, 0, width) + + def get_picture_buffer_size(self, format, width_stride, height_stride, flag): + """ + get pictutre buffer size: + 1.YUV400 in 310P memory is width_stride * height_stride + 2.YUV400,YUV420 memory is width_stride * height_stride * 3 //2 + 3.YUV422SP,YUV440SP memory is width_stride * height_stride * 2 + 4.YUV4442SP memory is width_stride * height_stride * 3 + 5.other support format memory is width_stride * height_stride. + """ + if flag: + return width_stride * height_stride * 3 // 2 + return get_align_size(BUFFER_SIZE, + format, + width_stride, + height_stride) + + def set_picture_desc(self, desc, width, height, opt, i, format=YUV420, flag=True): + """"get picture info and set picture description""" + width_stride = self.get_picture_width_stride(format, width) + height_stride = self.get_picture_height_stride(format, height) + buffer_size = self.get_picture_buffer_size(format, + width_stride, + height_stride, + flag) + buffer_size = int(buffer_size) + dev, ret =acl.media.dvpp_malloc(buffer_size) + assert ret == 0 + ret = acl.rt.memset(dev, buffer_size, 0, buffer_size) + assert ret == 0 + key = opt + '_' + str(i) + self.dev_buffer[key] = dev + self.dvpp_set_pic_desc(desc, dev, width, + height, width_stride, height_stride, + buffer_size, format) + return buffer_size + + def get_pic_desc_data(self, pic_desc): + pic_data = acl.media.dvpp_get_pic_desc_data(pic_desc) + pic_data_size = acl.media.dvpp_get_pic_desc_size(pic_desc) + ret_code = acl.media.dvpp_get_pic_desc_ret_code(pic_desc) + assert ret_code == 0 + + # pic memcpy d2h + np_pic = np.zeros(pic_data_size, dtype=np.byte) + bytes_data = np_pic.tobytes() + np_pic_ptr = acl.util.bytes_to_ptr(bytes_data) + ret = acl.rt.memcpy(np_pic_ptr, pic_data_size, + pic_data, pic_data_size, ACL_MEMCPY_DEVICE_TO_HOST) + assert ret == 0 + return np_pic + + def async_vpc_batch_crop_resize_paste_synchronize(self, w, h, path): + self.out_batch_pic_desc = acl.media.dvpp_create_batch_pic_desc(self.out_batch_size) + self.in_batch_pic_desc = acl.media.dvpp_create_batch_pic_desc(self.in_batch_size) + # load data from file + np_yuv = np.fromfile(path, dtype=np.byte) + in_buffer_size = np_yuv.itemsize * np_yuv.size + bytes_data = np_yuv.tobytes() + bytes_yuv_ptr = acl.util.bytes_to_ptr(bytes_data) + roiList = [] + for i in range(self.in_batch_size): + input_desc = acl.media.dvpp_get_pic_desc(self.in_batch_pic_desc, i) + print(self.in_batch_pic_desc, input_desc, i) + assert input_desc != 0 + self.set_picture_desc(input_desc, w, h, "input", i) + #copy from host to device + key = "input" + '_' + str(i) + ret = acl.rt.memcpy(self.dev_buffer[key], in_buffer_size, bytes_yuv_ptr, + in_buffer_size, ACL_MEMCPY_HOST_TO_DEVICE) + assert ret == 0 + roiList.append(self.out_batch_size // self.in_batch_size) + + for i in range(self.out_batch_size): + out_desc = acl.media.dvpp_get_pic_desc(self.out_batch_pic_desc, i) + assert out_desc != 0 + self.set_picture_desc(out_desc, w // 2, h // 2, "output", i) + if i % 2 == 0: + crop_area = acl.media.dvpp_create_roi_config(w // 2, w - 1, h // 2, h - 1) + paste_area = acl.media.dvpp_create_roi_config(w // 4, w // 2 - 1, + h // 4, h // 2 - 1 ) + else: + crop_area = acl.media.dvpp_create_roi_config(0, w // 2 -1, 0, h // 2 -1) + paste_area = acl.media.dvpp_create_roi_config(0, w // 4 - 1, 0, h // 4 -1) + self.corpList.append(crop_area) + self.pasteList.append(paste_area) + + total_num = 0 + for i in range(self.in_batch_size): + total_num += roiList[i] + if self.out_batch_size % self.in_batch_size != 0: + roiList[-1] = self.out_batch_size - total_num + roiList[-1] + + self.resize_config = acl.media.dvpp_create_resize_config() + ret = acl.media.dvpp_vpc_batch_crop_resize_paste_async(self.dvpp_channel_desc, self.in_batch_pic_desc, + roiList, self.out_batch_pic_desc, self.corpList, + self.pasteList, self.resize_config, self.stream) + print("ret:",ret) + assert ret == 0 + ret = acl.rt.synchronize_stream(self.stream) + assert ret == 0 + np_list = [] + for i in range(self.out_batch_size): + output_desc = acl.media.dvpp_get_pic_desc(self.out_batch_pic_desc, i) + np_output = self.get_pic_desc_data(output_desc) + np_list.append(np_output) + + return np_list + + +class TestVpc(unittest.TestCase): + @classmethod + def setUpClass(cls): + """called only once before all testcase""" + # init + ret = acl.init("") + assert ret == 0 + ret = acl.rt.set_device(0) + assert ret == 0 + + @classmethod + def tearDownClass(cls): + """ called only once after all testcase """ + ret = acl.rt.reset_device(0) + assert ret == 0 + ret = acl.finalize() + assert ret == 0 + + def setUp(self) -> None: + pass + + def tearDown(self) -> None: + pass + + + def test_vpc_019_batch_crop_resize_paste_1_batch_input(self): + """ + test case for vpc batch crop resize paste + """ + vpc_handle = AclVpc(1, 2) + vpc_handle.vpc_init() + # 512x368 -> 256x184(crop) -> 128x92(resize) -> 256x184(paste) + out = vpc_handle.async_vpc_batch_crop_resize_paste_synchronize(1024, 368, f"{data_path}/data/wood_rabbit_1024_1068_nv12.yuv") + print("out:", out) + device_type = util.get_device_type() + + + +if __name__ == "__main__": + #util.show_growth() + suite = util.switch_cases(TestVpc, "all") + unittest.TextTestRunner(verbosity=2).run(suite) + #util.show_growth() \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/pyacl_test_case/test_acl_vpc/utils.py b/oec-ascend/oec/resource/ApplicationDev/pyacl_test_case/test_acl_vpc/utils.py new file mode 100644 index 00000000..54d57ce8 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/pyacl_test_case/test_acl_vpc/utils.py @@ -0,0 +1,40 @@ +import unittest +import numpy as np +import acl + +def get_class_methods(class_name): + method_list = [method.split("_") for method in dir(class_name) if method.startswith("test_")] + method_list = sorted(method_list, key=lambda x: x[2]) + methods = ["_".join(method) for method in method_list] + return methods + +def switch_cases(case_class, opt): + suite = unittest.TestSuite() + methods = get_class_methods(case_class) + + if opt == "all": + for method in methods: + suite.addTest(case_class(method)) + return suite + +def align_size(origin_size, alignment): + if not alignment: + return 0 + return ((origin_size + (alignment - 1)) // alignment) * alignment + +def get_align_size(align_dict, pixel_fotmat, defaule_vale=0, case_value=0): + for key in align_dict.keys(): + if pixel_fotmat in key: + return align_dict.get(key)(defaule_vale, case_value) + return defaule_vale + +def get_device_type(): + device_type = acl.get_soc_name()[len('Ascend'):] + if "P" in device_type: + device_type = device_type[0:4] + else: + device_type = device_type[0:3] + device_type = device_type == "910P" and "910" or device_type + if device_type not in ["310", "310P", "910"]: + raise Exception(f"device_type = {device_type} not in 310/310P/910, npu-smi not found!") + return device_type \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/resnet50_offline/ascend_test_resnet.py b/oec-ascend/oec/resource/ApplicationDev/resnet50_offline/ascend_test_resnet.py new file mode 100644 index 00000000..226cfb59 --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/resnet50_offline/ascend_test_resnet.py @@ -0,0 +1,7 @@ +import oec + +oec.NPUTestCase( + group=("集成测试","离线推理"), + name="OFFLINE_ACL_RESNET50", + cmd=f'bash run.sh {oec.Context.data_path} {oec.Context.output_dir} ' +) \ No newline at end of file diff --git a/oec-ascend/oec/resource/ApplicationDev/resnet50_offline/resnet50.cpp b/oec-ascend/oec/resource/ApplicationDev/resnet50_offline/resnet50.cpp new file mode 100644 index 00000000..da022fde --- /dev/null +++ b/oec-ascend/oec/resource/ApplicationDev/resnet50_offline/resnet50.cpp @@ -0,0 +1,327 @@ +#include "acl/acl.h" +#include +#include // 添加此行以引入accumulate函数 +#include +#include +#include +#include +#include +#include +using namespace std; + +// ---------------------- 全局变量定义 ---------------------- +int32_t deviceId = 0; // 计算设备ID +uint32_t modelId = 0; // 模型ID +size_t pictureDataSize = 0; // 图片数据大小 +void* pictureHostData = nullptr; // 主机侧图片数据 +void* pictureDeviceData = nullptr; // 设备侧图片数据 +aclmdlDataset* inputDataSet = nullptr;// 输入数据集 +aclDataBuffer* inputDataBuffer = nullptr; +aclmdlDataset* outputDataSet = nullptr;// 输出数据集 +aclDataBuffer* outputDataBuffer = nullptr; +aclmdlDesc* modelDesc = nullptr; // 模型描述信息 +size_t outputDataSize = 0; // 输出数据大小 +void* outputDeviceData = nullptr; // 设备侧输出数据 +void* outputHostData = nullptr; // 主机侧输出数据 + +// ---------------------- 预期结果配置 ---------------------- +const unsigned int EXPECTED_TOP1_INDEX = 162; // 预期Top1类别索引(需根据模型数据集调整) +const double MIN_CONFIDENCE_THRESHOLD = 0.9; // 最小置信度阈值(建议≥0.9) + +// ---------------------- 函数声明 ---------------------- +void InitResource(); // 资源初始化 +void LoadModel(const char* modelPath); // 加载模型 +void LoadPicture(const char* picturePath); // 加载图片(主机+设备内存) +void Inference(); // 执行推理 +int PrintResultAndValidate(); // 打印结果并验证 +void UnloadModel(); // 卸载模型 +void UnloadPicture(); // 释放图片相关资源 +void DestroyResource(); // 释放全局资源 + +// ---------------------- 函数定义 ---------------------- +// 1. 资源初始化(AscendCL初始化 + 指定计算设备) +void InitResource() { + aclError ret = aclInit(nullptr); // 初始化AscendCL,使用默认配置 + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] aclInit failed, error code: " << ret << endl; + exit(1); + } + ret = aclrtSetDevice(deviceId); // 指定计算设备 + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] aclrtSetDevice failed, error code: " << ret << endl; + exit(1); + } +} + +// 2. 加载模型(.om文件) +void LoadModel(const char* modelPath) { + aclError ret = aclmdlLoadFromFile(modelPath, &modelId); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Failed to load model from " << modelPath << ", error code: " << ret << endl; + exit(1); + } + cout << "[INFO] Model loaded successfully: " << modelPath << endl; +} + +// 3. 读取图片到主机内存 +void ReadPictureToHost(const char* picturePath) { + ifstream binFile(picturePath, ios::binary); + if (!binFile.is_open()) { + cerr << "[ERROR] Failed to open picture file: " << picturePath << endl; + exit(1); + } + // 获取文件大小并读取数据 + binFile.seekg(0, ios::end); + pictureDataSize = binFile.tellg(); + binFile.seekg(0, ios::beg); + + aclError ret = aclrtMallocHost(&pictureHostData, pictureDataSize); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] aclrtMallocHost failed, error code: " << ret << endl; + exit(1); + } + binFile.read((char*)pictureHostData, pictureDataSize); + binFile.close(); + cout << "[INFO] Picture loaded to host memory: " << picturePath << endl; +} + +// 4. 复制数据到设备内存 +void CopyDataFromHostToDevice() { + aclError ret = aclrtMalloc(&pictureDeviceData, pictureDataSize, ACL_MEM_MALLOC_HUGE_FIRST); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] aclrtMalloc failed, error code: " << ret << endl; + exit(1); + } + ret = aclrtMemcpy(pictureDeviceData, pictureDataSize, pictureHostData, pictureDataSize, ACL_MEMCPY_HOST_TO_DEVICE); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] aclrtMemcpy failed, error code: " << ret << endl; + exit(1); + } + cout << "[INFO] Picture data copied to device memory" << endl; +} + +// 5. 加载图片(组合函数) +void LoadPicture(const char* picturePath) { + ReadPictureToHost(picturePath); + CopyDataFromHostToDevice(); +} + +// 6. 创建模型输入数据结构 +void CreateModelInput() { + inputDataSet = aclmdlCreateDataset(); + inputDataBuffer = aclCreateDataBuffer(pictureDeviceData, pictureDataSize); + aclError ret = aclmdlAddDatasetBuffer(inputDataSet, inputDataBuffer); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Failed to create model input, error code: " << ret << endl; + exit(1); + } +} + +// 7. 创建模型输出数据结构 +void CreateModelOutput() { + modelDesc = aclmdlCreateDesc(); + aclError ret = aclmdlGetDesc(modelDesc, modelId); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Failed to get model description, error code: " << ret << endl; + exit(1); + } + + outputDataSet = aclmdlCreateDataset(); + outputDataSize = aclmdlGetOutputSizeByIndex(modelDesc, 0); // 获取第一个输出的大小 + + ret = aclrtMalloc(&outputDeviceData, outputDataSize, ACL_MEM_MALLOC_HUGE_FIRST); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Failed to allocate output memory, error code: " << ret << endl; + exit(1); + } + outputDataBuffer = aclCreateDataBuffer(outputDeviceData, outputDataSize); + ret = aclmdlAddDatasetBuffer(outputDataSet, outputDataBuffer); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Failed to create model output, error code: " << ret << endl; + exit(1); + } +} + +// 8. 执行推理 +void Inference() { + CreateModelInput(); + CreateModelOutput(); + aclError ret = aclmdlExecute(modelId, inputDataSet, outputDataSet); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Inference failed, error code: " << ret << endl; + exit(1); + } +} + +// 9. 打印结果并验证 +int PrintResultAndValidate() { + // 复制输出数据到主机内存 + aclError ret = aclrtMallocHost(&outputHostData, outputDataSize); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Failed to malloc host memory for output, error code: " << ret << endl; + return 1; + } + ret = aclrtMemcpy(outputHostData, outputDataSize, outputDeviceData, outputDataSize, ACL_MEMCPY_DEVICE_TO_HOST); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Failed to copy output data to host, error code: " << ret << endl; + return 1; + } + + // 解析输出数据(转换为float数组) + float* outFloatData = reinterpret_cast(outputHostData); + map> resultMap; // 按置信度降序排序 + for (unsigned int j = 0; j < outputDataSize / sizeof(float); ++j) { + resultMap[outFloatData[j]] = j; + } + + // 检查是否有推理结果 + if (resultMap.empty()) { + cerr << "[ERROR] No inference results found" << endl; + return 1; + } + + // 提取Top1结果 + auto top1 = resultMap.begin(); + unsigned int top1Index = top1->second; + double top1Score = top1->first; + double top1Confidence = exp(top1Score) / accumulate(resultMap.begin(), resultMap.end(), 0.0, + [](double sum, const pair& item) { return sum + exp(item.first); }); + + // 打印Top5结果 + cout << "\nTop 5 Inference Results:" << endl; + int cnt = 0; + for (auto it = resultMap.begin(); it != resultMap.end() && cnt < 5; ++it, ++cnt) { + double prob = exp(it->first) / accumulate(resultMap.begin(), resultMap.end(), 0.0, + [](double sum, const pair& item) { return sum + exp(item.first); }); + cout << "Top " << cnt + 1 << ": Index[" << it->second << "] Confidence[" << fixed << prob << "]" << endl; + } + + // 结果验证 + bool isSuccess = (top1Index == EXPECTED_TOP1_INDEX && top1Confidence >= MIN_CONFIDENCE_THRESHOLD); + if (isSuccess) { + cout << "\n[VALIDATION SUCCESS] Top1 matches expectations: Index[" << top1Index + << "] Confidence[" << fixed << top1Confidence << "]" << endl; + return 0; // 验证通过,返回0 + } else { + cerr << "\n[VALIDATION FAILED] Top1 does not match expectations:" << endl + << " Expected Index: " << EXPECTED_TOP1_INDEX << ", Confidence ≥ " << MIN_CONFIDENCE_THRESHOLD << endl + << " Actual Index: " << top1Index << ", Confidence: " << fixed << top1Confidence << endl; + return 1; // 验证失败,返回1 + } +} + +// 10. 卸载模型 +void UnloadModel() { + if (modelDesc != nullptr) { + aclmdlDestroyDesc(modelDesc); + modelDesc = nullptr; + } + aclError ret = aclmdlUnload(modelId); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Failed to unload model, error code: " << ret << endl; + } + cout << "[INFO] Model unloaded successfully" << endl; +} + +// 11. 释放图片相关资源 +void UnloadPicture() { + if (pictureHostData != nullptr) { + aclrtFreeHost(pictureHostData); + pictureHostData = nullptr; + } + if (pictureDeviceData != nullptr) { + aclrtFree(pictureDeviceData); + pictureDeviceData = nullptr; + } + if (inputDataBuffer != nullptr) { + aclDestroyDataBuffer(inputDataBuffer); + inputDataBuffer = nullptr; + } + if (inputDataSet != nullptr) { + aclmdlDestroyDataset(inputDataSet); + inputDataSet = nullptr; + } + if (outputHostData != nullptr) { + aclrtFreeHost(outputHostData); + outputHostData = nullptr; + } + if (outputDeviceData != nullptr) { + aclrtFree(outputDeviceData); + outputDeviceData = nullptr; + } + if (outputDataBuffer != nullptr) { + aclDestroyDataBuffer(outputDataBuffer); + outputDataBuffer = nullptr; + } + if (outputDataSet != nullptr) { + aclmdlDestroyDataset(outputDataSet); + outputDataSet = nullptr; + } + cout << "[INFO] Picture resources unloaded successfully" << endl; +} + +// 12. 释放全局资源 +void DestroyResource() { + aclError ret = aclrtResetDevice(deviceId); // 重置计算设备 + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] aclrtResetDevice failed, error code: " << ret << endl; + } + aclFinalize(); // 去初始化AscendCL + cout << "[INFO] Global resources released successfully" << endl; +} + +// ---------------------- 主函数 ---------------------- +int main(int argc, char* argv[]) { + // 检查命令行参数 + if (argc != 3) { + cerr << "[ERROR] Usage: " << argv[0] << " "<<" " << endl; + cerr << " Example: " << argv[0] << " /path/to/resources" <<" 1000" << endl; + cerr << " Model will be loaded from: /model/resnet50.om" << endl; + cerr << " Picture will be loaded from: /data/dog1_1024_683.bin" << endl; + return 1; + } + + // 构建模型和图片路径 + string basePath = argv[1]; + int test_times = atoi(argv[2]); + string modelPath = basePath + "/model/resnet50.om"; + string picturePath = basePath + "/data/dog1_1024_683.bin"; + + cout << "[INFO] Base path: " << basePath << endl; + cout << "[INFO] Model path: " << modelPath << endl; + cout << "[INFO] Picture path: " << picturePath << endl; + + // 1. 资源初始化 + InitResource(); + + // 2. 加载模型 + LoadModel(modelPath.c_str()); + + // 3. 加载测试图片 + LoadPicture(picturePath.c_str()); + + auto start = std::chrono::high_resolution_clock::now(); + for(int i =0; i < test_times; ++i){ + // 4. 执行推理 + Inference(); + } + auto end = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(end - start); + double fps = static_cast(test_times) / duration.count() * 1000000; + + // 5. 打印结果并验证 + int status = PrintResultAndValidate(); + + std::cout <<"\n" << "FPS: " << fps << "\n" < 1: + info["昇腾硬件"] = f"{info['NPU']} × {info['Count'] }" + else: + info["昇腾硬件"] = f"{info['NPU']}" + + self.logger.debug( + f"HDK NPU:{info['NPU']}, Count:{info['Count']}") + +class CANNNPUInfomationCase(TestCase): + + def check_result(self, log, return_code): + super(CANNNPUInfomationCase,self).check_result(log, return_code) + if self.is_failed(): + return + if log == "": + self.set_state(State.FAIL) + return + npu_count = log.split('\n') + if npu_count is None or len(npu_count) != 2: + self.set_state(State.FAIL) + return + npu,count = tuple(npu_count) + self.logger.debug(f"NPU:{npu}, Count:{count}") + info = self.context.infomation + info['NPU'] = npu + info['Count'] = int(count) + if info['Count'] > 1: + info["昇腾硬件"] = f'{npu} × {count}' + else: + info["昇腾硬件"] = f'{npu}' + self.set_state(State.PASS) + +class CANNVersionInfomationCase(TestCase): + + def check_result(self, log, return_code): + super(CANNVersionInfomationCase,self).check_result(log, return_code) + if self.is_failed(): + return + if log == "": + self.set_state(State.FAIL) + return + + self.logger.debug(f"CANN Version = {log}") + self.context.infomation['CANN Version'] = log + self.set_state(State.PASS) + +OSInfomationCase( + group=("运行环境","环境信息"), + name='READ_OS_INFOMATION') + +HDKInfomationCase( + group=("运行环境","环境信息"), + name='READ_DRIVER_INFOMATION', + cmd = 'npu-smi info') + +SetEnvTestCase( + group=("运行环境","CANN信息"), + name="READ_CANN_SET_ENV", + cmd=f"bash -c 'source {oec.Context.cann_path}/ascend-toolkit/set_env.sh && env'", + exclude=None, +) + +CANNVersionInfomationCase( + group=("运行环境","CANN信息"), + name='READ_CANN_VERSION_INFOMATION', + cmd = 'python3 get_cann_version.py' +) + +CANNNPUInfomationCase( + group=("运行环境","CANN信息"), + name='READ_CANN_NPU_INFOMATION', + cmd = 'python3 get_npu_info.py' +) \ No newline at end of file diff --git a/oec-ascend/oec/resource/Environment/check_package_version.py b/oec-ascend/oec/resource/Environment/check_package_version.py new file mode 100644 index 00000000..228ec2ad --- /dev/null +++ b/oec-ascend/oec/resource/Environment/check_package_version.py @@ -0,0 +1,393 @@ +import sys +import subprocess +from packaging.version import parse as parse_version + +def get_python_version(): + """获取当前Python版本字符串""" + return f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}" + +def get_installed_packages(): + """获取所有已安装的包及其版本 (使用pip list)""" + try: + result = subprocess.run( + [sys.executable, '-m', 'pip', 'list', '--format=freeze'], + capture_output=True, + text=True, + check=True + ) + packages = {} + for line in result.stdout.splitlines(): + if '==' in line: + name, version = line.split('==', 1) + packages[name.lower()] = version.strip() + return packages + except Exception as e: + print(f"错误: 无法获取已安装包列表 - {str(e)}") + print("请确保pip已安装并能正常工作") + sys.exit(1) + +def check_python_version(min_version=None, max_version=None): + """ + 检查Python版本是否在指定范围内 + + 参数: + min_version (str): 最小支持版本 (e.g., "3.8.0") + max_version (str): 最大支持版本 (e.g., "3.10.0") + + 返回: + tuple: (是否满足, 问题描述) + """ + current_ver = parse_version(get_python_version()) + problems = [] + + if min_version: + min_ver = parse_version(min_version) + if current_ver < min_ver: + problems.append(f"需要 ≥ {min_version}") + + if max_version: + max_ver = parse_version(max_version) + if current_ver > max_ver: + problems.append(f"需要 ≤ {max_version}") + + return (len(problems) == 0, problems) + +def check_package(pkg_info, installed_packages): + """ + 检查单个包是否满足要求 + + 参数: + pkg_info (dict): 包配置信息 + installed_packages (dict): 已安装包的字典 + + 返回: + tuple: (是否满足, 安装的版本, 问题描述) + """ + pypi_name = pkg_info["pypi_name"].lower() + installed_version = installed_packages.get(pypi_name) + + # 包未安装 + if not installed_version: + return (False, None, ["未安装"]) + + # 没有版本要求 + if "min_version" not in pkg_info and "max_version" not in pkg_info: + return (True, installed_version, []) + + # 检查版本要求 + problems = [] + try: + installed_ver = parse_version(installed_version) + + if "min_version" in pkg_info: + min_ver = parse_version(pkg_info["min_version"]) + if installed_ver < min_ver: + problems.append(f"需要 ≥ {pkg_info['min_version']}") + + if "max_version" in pkg_info: + max_ver = parse_version(pkg_info["max_version"]) + if installed_ver > max_ver: + problems.append(f"需要 ≤ {pkg_info['max_version']}") + except Exception as e: + problems.append(f"版本解析错误: {str(e)}") + + return (len(problems) == 0, installed_version, problems) + +def check_dependencies(requirements): + """ + 检查所有依赖项 + + 参数: + requirements (dict): 依赖配置字典 + + 返回: + tuple: (所有依赖是否满足, 包检查结果列表) + """ + # 获取已安装包列表 + installed_packages = get_installed_packages() + + print("=" * 70) + print("Python环境与包依赖检查") + print("=" * 70) + + all_ok = True + results = [] + + # 1. 检查Python版本 + py_req = requirements.get("python", {}) + if py_req: + min_py = py_req.get("min_version") + max_py = py_req.get("max_version") + py_ok, py_problems = check_python_version(min_py, max_py) + + version_range = [] + if min_py: version_range.append(f"≥ {min_py}") + if max_py: version_range.append(f"≤ {max_py}") + if min_py and max_py and min_py == max_py: + range_str = min_py + else: + range_str = " 且 ".join(version_range) if version_range else "任意版本" + + status = "✓" if py_ok else "✗" + problems = ", ".join(py_problems) if py_problems else "满足要求" + print(f"Python版本: {get_python_version()} | 要求: {range_str}") + print(f" {status} 状态: {problems}") + print("-" * 70) + + if not py_ok: + all_ok = False + + # 2. 检查包依赖 + packages = requirements.get("packages", []) + if not packages: + print("未配置包依赖检查") + else: + print("\n包依赖检查:") + print("-" * 70) + + for pkg in packages: + # 获取包信息 + name = pkg["name"] + pypi_name = pkg["name"] + + # 确定当前Python版本适用的规则 + current_py = f"{sys.version_info.major}.{sys.version_info.minor}" + version_rules = pkg.get("version_rules", {}) + rule = version_rules.get(current_py, pkg.get("default", {})) + + # 检查包 + satisfied, version, problems = check_package( + {"pypi_name": pypi_name, **rule}, + installed_packages + ) + + # 确定显示的要求范围 + + range_parts = [] + if "min_version" in rule: + range_parts.append(f"≥ {rule['min_version']}") + if "max_version" in rule: + range_parts.append(f"≤ {rule['max_version']}") + if len(range_parts) == 2 and rule['min_version'] == rule['max_version']: + range_str = rule['min_version'] + else: + range_str = " 且 ".join(range_parts) if range_parts else "任意版本" + + # 确定状态 + if not satisfied: + status = "✗" + all_ok = False + else: + status = "✓" + + # 收集结果 + results.append({ + "display_name": name, + "pypi_name": pypi_name, + "status": status, + "installed": version or "未安装", + "required": range_str, + "problems": problems, + "rule": rule + }) + + # 打印结果 + print(f"{status} {name}") + print(f" 已安装: {version or '未安装'}") + print(f" 要求: {range_str}") + if problems: + print(f" 问题: {', '.join(problems)}") + print("-" * 70) + + print("=" * 70) + return all_ok, results + +def generate_install_commands(results, py_req=None): + """ + 生成安装命令 + + 参数: + results (list): 包检查结果列表 + py_req (dict): Python版本要求 + + 返回: + str: 安装命令字符串 + """ + commands = [] + + # Python版本要求 + if py_req: + min_py = py_req.get("min_version") + max_py = py_req.get("max_version") + if min_py or max_py: + commands.append("# 请确保使用正确的Python版本") + if min_py and max_py: + if max_py == min_py: + commands.append(f"# 推荐使用 Python {min_py}") + else: + commands.append(f"# 推荐使用 Python {min_py} 到 {max_py} 之间的版本") + elif min_py: + commands.append(f"# 需要 Python {min_py} 或更高版本") + elif max_py: + commands.append(f"# 需要 Python {max_py} 或更低版本") + + # 包安装命令 + commands.append("\n# 包安装命令:") + + for res in results: + pkg_name = res["pypi_name"] + rule = res["rule"] + + if "min_version" in rule and "max_version" in rule: + if rule['min_version']==rule['max_version']: + commands.append(f"pip install '{pkg_name}=={rule['min_version']}'") + else: + commands.append(f"pip install '{pkg_name}>={rule['min_version']},<={rule['max_version']}'") + elif "min_version" in rule: + commands.append(f"pip install '{pkg_name}>={rule['min_version']}'") + elif "max_version" in rule: + commands.append(f"pip install '{pkg_name}<={rule['max_version']}'") + else: + commands.append(f"pip install {pkg_name}") + + # 创建 requirements.txt 的建议 + commands.append("\n# 或者创建 requirements.txt 文件:") + commands.append("# 将以下内容保存到 requirements.txt 文件中:") + for res in results: + pkg_name = res["pypi_name"] + rule = res["rule"] + + if "min_version" in rule and "max_version" in rule: + if rule['min_version'] == rule['max_version']: + commands.append(f"{pkg_name}=={rule['min_version']}") + else: + commands.append(f"{pkg_name}>={rule['min_version']},<={rule['max_version']}") + elif "min_version" in rule: + commands.append(f"{pkg_name}>={rule['min_version']}") + elif "max_version" in rule: + commands.append(f"{pkg_name}<={rule['max_version']}") + else: + commands.append(pkg_name) + + commands.append("\n# 然后运行:") + commands.append("pip install -r requirements.txt") + + return "\n".join(commands) + +if __name__ == "__main__": + # ====== 依赖配置 ====== + # 配置说明: + # - "python": 可选的Python版本要求 + # - "packages": 包依赖列表 + # 每个包必须包含: + # - "name": PyPI上的包名 + # - "version_rules": (可选) 针对不同Python版本的规则 + # - "default": (可选) 默认规则 + # + # 规则格式: + # { + # "min_version": "最低版本", + # "max_version": "最高版本" + # } + + DEPENDENCY_CONFIG = { + # Python版本要求 + "python": { + "min_version": "3.7.5", # 最低支持Python 3.8 + "max_version": "3.11.4" # 最高支持Python 3.10 + }, + + "packages": [ + # 通用包 - 所有Python版本使用相同要求 + { + "name": "numpy", + "version_rules": { + # Python 3.7 使用这个要求 + "3.7": { + "min_version": "1.21.6", + "max_version": "1.21.6" + } + }, + "default": { + "min_version": "1.19.2", + "max_version": "1.24.0" + } + }, + { + "name": "decorator", + "default": { + "min_version": "4.4.0" + } + }, + { + "name": "sympy", + "default": { + "min_version": "1.5.1" + } + }, + + # 仅在某些Python版本有特殊要求 + { + "name": "cffi", + # 其他Python版本使用默认要求 + "default": { + "min_version": "1.12.3" + } + }, + { + "name": "protobuf", + # 其他Python版本使用默认要求 + "default": { + "min_version": "3.20", + "max_version": "3.20", + } + }, + + # 仅检查是否安装,不限制版本 + { + "name": "attrs" + }, + { + "name": "cython" + }, + { + "name": "pyyaml" + }, + { + "name": "pathlib2" + }, + { + "name": "scipy" + }, + { + "name": "requests" + }, + { + "name": "psutil" + }, + { + "name": "absl-py" + }, + + ] + } + # ==================== + + # 检查依赖 + all_ok, results = check_dependencies(DEPENDENCY_CONFIG) + + if all_ok: + print("\n所有依赖满足! 可以运行主程序。") + # 这里可以继续执行你的主程序 + # from main import main + # main() + else: + print("\n错误: 环境不满足要求!") + print("请根据以下提示解决问题:") + + # 生成安装建议 + py_req = DEPENDENCY_CONFIG.get("python", {}) + commands = generate_install_commands(results, py_req) + print("\n" + commands) + + sys.exit(1) # 非零退出码表示错误 \ No newline at end of file diff --git a/oec-ascend/oec/resource/Environment/dependency.sh b/oec-ascend/oec/resource/Environment/dependency.sh new file mode 100755 index 00000000..bba0ae84 --- /dev/null +++ b/oec-ascend/oec/resource/Environment/dependency.sh @@ -0,0 +1,91 @@ +#!/bin/bash + +# 检查glibc版本是否大于2.17 +check_glibc() { + local required_version="2.17" + local glibc_version + + # 尝试多种方式获取glibc版本 + if command -v ldd >/dev/null 2>&1; then + glibc_version=$(ldd --version 2>&1 | awk 'NR==1 {print $NF}') + elif [ -f /lib/x86_64-linux-gnu/libc.so.6 ]; then + glibc_version=$(/lib/x86_64-linux-gnu/libc.so.6 2>&1 | grep "GNU C Library" | awk '{print $NF}') + elif [ -f /lib64/libc.so.6 ]; then + glibc_version=$(/lib64/libc.so.6 2>&1 | grep "GNU C Library" | awk '{print $NF}') + else + echo "错误: 无法检测glibc版本 - 请手动安装glibc" + return 1 + fi + + # 验证版本格式 + if ! [[ $glibc_version =~ ^[0-9]+\.[0-9]+ ]]; then + echo "错误: 无法解析glibc版本: '$glibc_version'" + return 1 + fi + + # 版本比较 + if awk -v req="$required_version" -v curr="$glibc_version" 'BEGIN { + split(req, r, "."); split(curr, c, "."); + for (i=1; i<=3; i++) { + if (c[i]+0 < r[i]+0) exit 1; + if (c[i]+0 > r[i]+0) exit 0; + } + exit 0 + }'; then + echo "通过: glibc版本满足要求 ($glibc_version >= $required_version)" + return 0 + else + echo "错误: glibc版本过低 (当前: $glibc_version < 要求: $required_version)" + return 1 + fi +} + +# 检查命令是否存在 +check_command() { + if command -v "$1" >/dev/null 2>&1; then + echo "通过: $1 命令可用" + return 0 + else + echo "错误: $1 命令未找到" + return 1 + fi +} + +# 主检查函数 +main() { + local all_success=0 + # 所有需要检查的命令列表 + local commands=( + "gcc" "g++" "cmake" "make" "ifconfig" + "tar" "realpath" "arch" "grep" "sed" + ) + + echo "开始依赖检查..." + echo "==============================" + + # 检查glibc版本 + if ! check_glibc; then + all_success=1 + fi + + # 检查必需命令 + for cmd in "${commands[@]}"; do + if ! check_command "$cmd"; then + all_success=1 + fi + done + + echo "==============================" + + # 返回最终状态 + if [ $all_success -ne 0 ]; then + echo "依赖检查失败! 请解决以上问题后再运行程序" + exit 1 + else + echo "所有依赖检查通过! 可以安全运行程序" + exit 0 + fi +} + +# 执行主函数 +main \ No newline at end of file diff --git a/oec-ascend/oec/resource/Environment/get_cann_version.py b/oec-ascend/oec/resource/Environment/get_cann_version.py new file mode 100644 index 00000000..29661901 --- /dev/null +++ b/oec-ascend/oec/resource/Environment/get_cann_version.py @@ -0,0 +1,12 @@ +import os + +ASCEND_HOME_PATH = os.environ.get("ASCEND_HOME_PATH") +if ASCEND_HOME_PATH is None: + raise ValueError("ASCEND_HOME_PATH is not set") +realpath = os.path.realpath(f"{ASCEND_HOME_PATH}/runtime") +realpath = os.path.dirname(realpath) +version = os.path.basename(realpath) +if version is None: + print(f"can not get cann version.ASCEND_HOME_PATH={ASCEND_HOME_PATH},cann path = {realpath}") + exit(1) +print(version, end='') \ No newline at end of file diff --git a/oec-ascend/oec/resource/Environment/get_npu_info.py b/oec-ascend/oec/resource/Environment/get_npu_info.py new file mode 100644 index 00000000..9696dbd8 --- /dev/null +++ b/oec-ascend/oec/resource/Environment/get_npu_info.py @@ -0,0 +1,9 @@ +try: + import acl + print(acl.get_soc_name()) + Count,ret = acl.rt.get_device_count() + if ret !=0: + exit(1) + print(Count,end='') +except Exception as e: + exit(2) \ No newline at end of file diff --git a/oec-ascend/oec/resource/Environment/install_and_check_cann.sh b/oec-ascend/oec/resource/Environment/install_and_check_cann.sh new file mode 100755 index 00000000..945030ac --- /dev/null +++ b/oec-ascend/oec/resource/Environment/install_and_check_cann.sh @@ -0,0 +1,9 @@ +if [[ -f /etc/Ascend/ascend_cann_install.info ]]; then + mv /etc/Ascend/ascend_cann_install.info /etc/Ascend/ascend_cann_install.info.bac +fi +bash install_cann_packages.sh "$1" "$2" +rst=$? +if [[ -f /etc/Ascend/ascend_cann_install.info.bac ]]; then + mv /etc/Ascend/ascend_cann_install.info.bac /etc/Ascend/ascend_cann_install.info +fi +exit $rst \ No newline at end of file diff --git a/oec-ascend/oec/resource/Environment/install_cann_packages.sh b/oec-ascend/oec/resource/Environment/install_cann_packages.sh new file mode 100755 index 00000000..7ce4c3da --- /dev/null +++ b/oec-ascend/oec/resource/Environment/install_cann_packages.sh @@ -0,0 +1,83 @@ +echo =============================================== +echo == CANN PACKAGES INSTALL UNINSTALL TEST == +echo =============================================== + +cd $1 +echo try to find Ascend-cann packages in $1 +install_path=$(realpath $2) +mkdir -p "$install_path" + +function install(){ + package=$1 + count=$(find . -type f -name "*$package*" | wc -l) + if [ "$count" -ne 1 ]; then + echo "ERROR: numer of $package is not equal to 1" + exit 1 + fi + echo =============================================== + echo INSTALL ./$package* + echo ">>>>>>>>>>>> ASCEND_HOME_PATH <<<<<<<<<<<<<<<<<" + env |grep ASCEND_HOME_PATH + echo =============================================== + echo ./*$package* --install --install-path="$install_path" --quiet + chmod +x *$package* + ./*$package* --install --quiet --install-path="$install_path" + rst=$? + if [[ $rst != 0 ]]; then + exit $rst + fi +} +function uninstall(){ + package=$1 + count=$(find . -type f -name "*$package*" | wc -l) + if [ "$count" -ne 1 ]; then + echo "ERROR: numer of $package is not equal to 1" + exit 1 + fi + echo ./*$package* --uninstall --install-path="$install_path" + chmod +x *$package* + ./*$package* --uninstall --install-path="$install_path" + rst=$? + if [[ $rst != 0 ]]; then + exit $rst + fi +} + +install cann-toolkit +rst=$? +if [[ $rst != 0 ]]; then + exit $rst +fi +source ${install_path}/ascend-toolkit/set_env.sh +install cann-kernels +rst=$? +if [[ $rst != 0 ]]; then + exit $rst +fi +install cann-nnal +rst=$? +if [[ $rst != 0 ]]; then + exit $rst +fi +uninstall cann-nnal +rst=$? +if [[ $rst != 0 ]]; then + exit $rst +fi +uninstall cann-kernels +rst=$? +if [[ $rst != 0 ]]; then + exit $rst +fi +uninstall cann-toolkit +rst=$? +if [[ $rst != 0 ]]; then + exit $rst +fi + +code=0 +if [[ -d Ascend/ascend-toolkit ]]; then + code=1 +fi +rm -rf Ascend +exit $code diff --git a/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/CMakeLists.txt b/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/CMakeLists.txt new file mode 100644 index 00000000..1e4d6de9 --- /dev/null +++ b/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/CMakeLists.txt @@ -0,0 +1,44 @@ +cmake_minimum_required(VERSION 3.16) +project(Ascend_c) + +set(RUN_MODE "npu" CACHE STRING "cpu/sim/npu") +set(SOC_VERSION "Ascend310P3" CACHE STRING "system on chip type") +set(ASCEND_CANN_PACKAGE_PATH "/usr/local/Ascend/ascend-toolkit/latest" + CACHE STRING "ASCEND CANN package installation directory" +) +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type Release/Debug (default Debug)" FORCE) +endif() +if(CMAKE_INSTALL_PREFIX STREQUAL /usr/local) + set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/out" CACHE STRING "path for install()" FORCE) +endif() + +# ${KERNEL_FILES} are used to compile library, push files written by ascendc in ${KERNEL_FILES}. +# ref to cmake/npu.cmake ascendc_library, cmake/cpu.cmake add_library +file(GLOB KERNEL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/add_custom.cpp) + +if("${RUN_MODE}" STREQUAL "cpu") + include(cmake/cpu_lib.cmake) +elseif("${RUN_MODE}" STREQUAL "sim" OR "${RUN_MODE}" STREQUAL "npu") + include(cmake/npu_lib.cmake) +else() + message("invalid RUN_MODE: ${RUN_MODE}") +endif() +add_executable(ascendc_kernels_bbit ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp) + +target_compile_options(ascendc_kernels_bbit PRIVATE + $:-g>> + -O2 -std=c++17 -D_GLIBCXX_USE_CXX11_ABI=0 -Wall -Werror +) + +target_link_libraries(ascendc_kernels_bbit PRIVATE + $,$>:host_intf_pub>> + $:ascendcl>> + ascendc_kernels_${RUN_MODE} +) + +install(TARGETS ascendc_kernels_bbit + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} +) diff --git a/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/add_custom.cpp b/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/add_custom.cpp new file mode 100644 index 00000000..896b5c81 --- /dev/null +++ b/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/add_custom.cpp @@ -0,0 +1,89 @@ +/** + * @file add_custom.cpp + * + * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ +#include "kernel_operator.h" + +constexpr int32_t TOTAL_LENGTH = 8 * 2048; // total length of data +constexpr int32_t USE_CORE_NUM = 8; // num of core used +constexpr int32_t BLOCK_LENGTH = TOTAL_LENGTH / USE_CORE_NUM; // length computed of each core +constexpr int32_t TILE_NUM = 8; // split data into 8 tiles for each core +constexpr int32_t BUFFER_NUM = 2; // tensor num for each queue +constexpr int32_t TILE_LENGTH = BLOCK_LENGTH / TILE_NUM / BUFFER_NUM; // separate to 2 parts, due to double buffer + +class KernelAdd { +public: + __aicore__ inline KernelAdd() {} + __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, GM_ADDR z) + { + xGm.SetGlobalBuffer((__gm__ half *)x + BLOCK_LENGTH * AscendC::GetBlockIdx(), BLOCK_LENGTH); + yGm.SetGlobalBuffer((__gm__ half *)y + BLOCK_LENGTH * AscendC::GetBlockIdx(), BLOCK_LENGTH); + zGm.SetGlobalBuffer((__gm__ half *)z + BLOCK_LENGTH * AscendC::GetBlockIdx(), BLOCK_LENGTH); + pipe.InitBuffer(inQueueX, BUFFER_NUM, TILE_LENGTH * sizeof(half)); + pipe.InitBuffer(inQueueY, BUFFER_NUM, TILE_LENGTH * sizeof(half)); + pipe.InitBuffer(outQueueZ, BUFFER_NUM, TILE_LENGTH * sizeof(half)); + } + __aicore__ inline void Process() + { + int32_t loopCount = TILE_NUM * BUFFER_NUM; + for (int32_t i = 0; i < loopCount; i++) { + CopyIn(i); + Compute(i); + CopyOut(i); + } + } + +private: + __aicore__ inline void CopyIn(int32_t progress) + { + AscendC::LocalTensor xLocal = inQueueX.AllocTensor(); + AscendC::LocalTensor yLocal = inQueueY.AllocTensor(); + AscendC::DataCopy(xLocal, xGm[progress * TILE_LENGTH], TILE_LENGTH); + AscendC::DataCopy(yLocal, yGm[progress * TILE_LENGTH], TILE_LENGTH); + inQueueX.EnQue(xLocal); + inQueueY.EnQue(yLocal); + } + __aicore__ inline void Compute(int32_t progress) + { + AscendC::LocalTensor xLocal = inQueueX.DeQue(); + AscendC::LocalTensor yLocal = inQueueY.DeQue(); + AscendC::LocalTensor zLocal = outQueueZ.AllocTensor(); + AscendC::Add(zLocal, xLocal, yLocal, TILE_LENGTH); + outQueueZ.EnQue(zLocal); + inQueueX.FreeTensor(xLocal); + inQueueY.FreeTensor(yLocal); + } + __aicore__ inline void CopyOut(int32_t progress) + { + AscendC::LocalTensor zLocal = outQueueZ.DeQue(); + AscendC::DataCopy(zGm[progress * TILE_LENGTH], zLocal, TILE_LENGTH); + outQueueZ.FreeTensor(zLocal); + } + +private: + AscendC::TPipe pipe; + AscendC::TQue inQueueX, inQueueY; + AscendC::TQue outQueueZ; + AscendC::GlobalTensor xGm; + AscendC::GlobalTensor yGm; + AscendC::GlobalTensor zGm; +}; + +extern "C" __global__ __aicore__ void add_custom(GM_ADDR x, GM_ADDR y, GM_ADDR z) +{ + KernelAdd op; + op.Init(x, y, z); + op.Process(); +} + +#ifndef ASCENDC_CPU_DEBUG +void add_custom_do(uint32_t blockDim, void *stream, uint8_t *x, uint8_t *y, uint8_t *z) +{ + add_custom<<>>(x, y, z); +} +#endif diff --git a/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/ascend_test_kerneldev.py b/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/ascend_test_kerneldev.py new file mode 100644 index 00000000..38e9c21b --- /dev/null +++ b/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/ascend_test_kerneldev.py @@ -0,0 +1,28 @@ +import oec + +oec.NPUTestCase( + group= ("算子","算子开发"), + name = "KERNEL_DEV_ADD_NPU", + cmd=f"bash run.sh -r npu -v -o '{oec.Context.output_dir}/tmp/AddKernelInvocationNeo_npu'", + include="test pass", + exclude=[r"\bfailed\b", r"\bFailed\b", r"\bFAILED\b", + r"\bERROR\b", r"\bError\b"] + ) + +oec.NPUTestCase( + group= ("算子","算子开发"), + name = "KERNEL_DEV_ADD_SIM", + cmd=f"bash run.sh -r sim -v -o '{oec.Context.output_dir}/tmp/AddKernelInvocationNeo_sim'", + include="test pass", + exclude=[r"\bfailed\b", r"\bFailed\b", r"\bFAILED\b", + r"\bERROR\b", r"\bError\b"] + ) + +oec.TestCase( + group= ("算子","算子开发"), + name = "KERNEL_DEV_ADD_CPU", + cmd=f"bash run.sh -r cpu -v Ascend910B3 -o '{oec.Context.output_dir}/tmp/AddKernelInvocationNeo_cpu'", + include="test pass", + exclude=[r"\bfailed\b", r"\bFailed\b", r"\bFAILED\b", + r"\bERROR\b", r"\bError\b"] + ) \ No newline at end of file diff --git a/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/cmake/cpu_lib.cmake b/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/cmake/cpu_lib.cmake new file mode 100644 index 00000000..5362c8b5 --- /dev/null +++ b/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/cmake/cpu_lib.cmake @@ -0,0 +1,9 @@ +if(NOT DEFINED ENV{CMAKE_PREFIX_PATH}) + set(CMAKE_PREFIX_PATH ${ASCEND_CANN_PACKAGE_PATH}/tools/tikicpulib/lib/cmake) +endif() +find_package(tikicpulib REQUIRED) + +add_library(ascendc_kernels_${RUN_MODE} SHARED ${KERNEL_FILES}) +target_link_libraries(ascendc_kernels_${RUN_MODE} PUBLIC tikicpulib::${SOC_VERSION}) +target_compile_options(ascendc_kernels_${RUN_MODE} PRIVATE -g -O0 -std=c++17) +install(TARGETS ascendc_kernels_${RUN_MODE} DESTINATION ${CMAKE_INSTALL_LIBDIR}) diff --git a/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/cmake/npu_lib.cmake b/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/cmake/npu_lib.cmake new file mode 100644 index 00000000..f92b095d --- /dev/null +++ b/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/cmake/npu_lib.cmake @@ -0,0 +1,11 @@ +if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake) + set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake) +elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake) + set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake) +else() + message(FATAL_ERROR "ascendc_kernel_cmake does not exist ,please check whether the cann package is installed") +endif() +include(${ASCENDC_CMAKE_DIR}/ascendc.cmake) + +# ascendc_library use to add kernel file to generate ascendc library +ascendc_library(ascendc_kernels_${RUN_MODE} SHARED ${KERNEL_FILES}) diff --git a/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/data_utils.h b/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/data_utils.h new file mode 100644 index 00000000..09d90637 --- /dev/null +++ b/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/data_utils.h @@ -0,0 +1,203 @@ +/** + * @file data_utils.h + * + * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ +#ifndef DATA_UTILS_H +#define DATA_UTILS_H +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "acl/acl.h" + +typedef enum { + DT_UNDEFINED = -1, + FLOAT = 0, + HALF = 1, + INT8_T = 2, + INT32_T = 3, + UINT8_T = 4, + INT16_T = 6, + UINT16_T = 7, + UINT32_T = 8, + INT64_T = 9, + UINT64_T = 10, + DOUBLE = 11, + BOOL = 12, + STRING = 13, + COMPLEX64 = 16, + COMPLEX128 = 17, + BF16 = 27 +} printDataType; + +#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO] " fmt "\n", ##args) +#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN] " fmt "\n", ##args) +#define ERROR_LOG(fmt, args...) fprintf(stdout, "[ERROR] " fmt "\n", ##args) +#define CHECK_ACL(x) \ + do { \ + aclError __ret = x; \ + if (__ret != ACL_ERROR_NONE) { \ + std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \ + } \ + } while (0); + +/** + * @brief Read data from file + * @param [in] filePath: file path + * @param [out] fileSize: file size + * @return read result + */ +bool ReadFile(const std::string &filePath, size_t &fileSize, void *buffer, size_t bufferSize) +{ + struct stat sBuf; + int fileStatus = stat(filePath.data(), &sBuf); + if (fileStatus == -1) { + ERROR_LOG("failed to get file"); + return false; + } + if (S_ISREG(sBuf.st_mode) == 0) { + ERROR_LOG("%s is not a file, please enter a file", filePath.c_str()); + return false; + } + + std::ifstream file; + file.open(filePath, std::ios::binary); + if (!file.is_open()) { + ERROR_LOG("Open file failed. path = %s", filePath.c_str()); + return false; + } + + std::filebuf *buf = file.rdbuf(); + size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in); + if (size == 0) { + ERROR_LOG("file size is 0"); + file.close(); + return false; + } + if (size > bufferSize) { + ERROR_LOG("file size is larger than buffer size"); + file.close(); + return false; + } + buf->pubseekpos(0, std::ios::in); + buf->sgetn(static_cast(buffer), size); + fileSize = size; + file.close(); + return true; +} + +/** + * @brief Write data to file + * @param [in] filePath: file path + * @param [in] buffer: data to write to file + * @param [in] size: size to write + * @return write result + */ +bool WriteFile(const std::string &filePath, const void *buffer, size_t size) +{ + if (buffer == nullptr) { + ERROR_LOG("Write file failed. buffer is nullptr"); + return false; + } + + int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE); + if (fd < 0) { + ERROR_LOG("Open file failed. path = %s", filePath.c_str()); + return false; + } + + size_t writeSize = write(fd, buffer, size); + (void)close(fd); + if (writeSize != size) { + ERROR_LOG("Write file Failed."); + return false; + } + + return true; +} + +template void DoPrintData(const T *data, size_t count, size_t elementsPerRow) +{ + assert(elementsPerRow != 0); + for (size_t i = 0; i < count; ++i) { + std::cout << std::setw(10) << data[i]; + if (i % elementsPerRow == elementsPerRow - 1) { + std::cout << std::endl; + } + } +} + +void DoPrintHalfData(const aclFloat16 *data, size_t count, size_t elementsPerRow) +{ + assert(elementsPerRow != 0); + for (size_t i = 0; i < count; ++i) { + std::cout << std::setw(10) << std::setprecision(6) << aclFloat16ToFloat(data[i]); + if (i % elementsPerRow == elementsPerRow - 1) { + std::cout << std::endl; + } + } +} + +void PrintData(const void *data, size_t count, printDataType dataType, size_t elementsPerRow = 16) +{ + if (data == nullptr) { + ERROR_LOG("Print data failed. data is nullptr"); + return; + } + + switch (dataType) { + case BOOL: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case INT8_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case UINT8_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case INT16_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case UINT16_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case INT32_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case UINT32_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case INT64_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case UINT64_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case HALF: + DoPrintHalfData(reinterpret_cast(data), count, elementsPerRow); + break; + case FLOAT: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case DOUBLE: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + default: + ERROR_LOG("Unsupported type: %d", dataType); + } + std::cout << std::endl; +} +#endif // DATA_UTILS_H diff --git a/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/main.cpp b/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/main.cpp new file mode 100644 index 00000000..d3d8fea3 --- /dev/null +++ b/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/main.cpp @@ -0,0 +1,82 @@ +/** + * @file main.cpp + * + * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ +#include "data_utils.h" +#ifndef ASCENDC_CPU_DEBUG +#include "acl/acl.h" +extern void add_custom_do(uint32_t blockDim, void *stream, uint8_t *x, uint8_t *y, uint8_t *z); +#else +#include "tikicpulib.h" +extern "C" __global__ __aicore__ void add_custom(GM_ADDR x, GM_ADDR y, GM_ADDR z); +#endif + +int32_t main(int32_t argc, char *argv[]) +{ + uint32_t blockDim = 8; + size_t inputByteSize = 8 * 2048 * sizeof(uint16_t); + size_t outputByteSize = 8 * 2048 * sizeof(uint16_t); + +#ifdef ASCENDC_CPU_DEBUG + uint8_t *x = (uint8_t *)AscendC::GmAlloc(inputByteSize); + uint8_t *y = (uint8_t *)AscendC::GmAlloc(inputByteSize); + uint8_t *z = (uint8_t *)AscendC::GmAlloc(outputByteSize); + + ReadFile("./input/input_x.bin", inputByteSize, x, inputByteSize); + ReadFile("./input/input_y.bin", inputByteSize, y, inputByteSize); + + AscendC::SetKernelMode(KernelMode::AIV_MODE); + ICPU_RUN_KF(add_custom, blockDim, x, y, z); // use this macro for cpu debug + + WriteFile("./output/output_z.bin", z, outputByteSize); + + AscendC::GmFree((void *)x); + AscendC::GmFree((void *)y); + AscendC::GmFree((void *)z); +#else + CHECK_ACL(aclInit(nullptr)); + int32_t deviceId = 0; + CHECK_ACL(aclrtSetDevice(deviceId)); + aclrtStream stream = nullptr; + CHECK_ACL(aclrtCreateStream(&stream)); + + uint8_t *xHost, *yHost, *zHost; + uint8_t *xDevice, *yDevice, *zDevice; + + CHECK_ACL(aclrtMallocHost((void **)(&xHost), inputByteSize)); + CHECK_ACL(aclrtMallocHost((void **)(&yHost), inputByteSize)); + CHECK_ACL(aclrtMallocHost((void **)(&zHost), outputByteSize)); + CHECK_ACL(aclrtMalloc((void **)&xDevice, inputByteSize, ACL_MEM_MALLOC_HUGE_FIRST)); + CHECK_ACL(aclrtMalloc((void **)&yDevice, inputByteSize, ACL_MEM_MALLOC_HUGE_FIRST)); + CHECK_ACL(aclrtMalloc((void **)&zDevice, outputByteSize, ACL_MEM_MALLOC_HUGE_FIRST)); + + ReadFile("./input/input_x.bin", inputByteSize, xHost, inputByteSize); + ReadFile("./input/input_y.bin", inputByteSize, yHost, inputByteSize); + + CHECK_ACL(aclrtMemcpy(xDevice, inputByteSize, xHost, inputByteSize, ACL_MEMCPY_HOST_TO_DEVICE)); + CHECK_ACL(aclrtMemcpy(yDevice, inputByteSize, yHost, inputByteSize, ACL_MEMCPY_HOST_TO_DEVICE)); + + add_custom_do(blockDim, stream, xDevice, yDevice, zDevice); + CHECK_ACL(aclrtSynchronizeStream(stream)); + + CHECK_ACL(aclrtMemcpy(zHost, outputByteSize, zDevice, outputByteSize, ACL_MEMCPY_DEVICE_TO_HOST)); + WriteFile("./output/output_z.bin", zHost, outputByteSize); + + CHECK_ACL(aclrtFree(xDevice)); + CHECK_ACL(aclrtFree(yDevice)); + CHECK_ACL(aclrtFree(zDevice)); + CHECK_ACL(aclrtFreeHost(xHost)); + CHECK_ACL(aclrtFreeHost(yHost)); + CHECK_ACL(aclrtFreeHost(zHost)); + + CHECK_ACL(aclrtDestroyStream(stream)); + CHECK_ACL(aclrtResetDevice(deviceId)); + CHECK_ACL(aclFinalize()); +#endif + return 0; +} diff --git a/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/run.sh b/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/run.sh new file mode 100755 index 00000000..7125d0e0 --- /dev/null +++ b/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/run.sh @@ -0,0 +1,132 @@ +#!/bin/bash +CURRENT_DIR=$( + cd $(dirname ${BASH_SOURCE:-$0}) + pwd +) + +BUILD_TYPE="Debug" + + +SHORT=r:,v:,i:,b:,p:,o:, +LONG=run-mode:,soc-version:,install-path:,build-type:,install-prefix:,output:, +OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@") +eval set -- "$OPTS" +SOC_VERSION="Ascend310P3" +export OUTPUT_DIR=$CURRENT_DIR +while :; do + case "$1" in + -r | --run-mode) + RUN_MODE="$2" + shift 2 + ;; + -v | --soc-version) + SOC_VERSION="$2" + shift 2 + ;; + -i | --install-path) + ASCEND_INSTALL_PATH="$2" + shift 2 + ;; + -b | --build-type) + BUILD_TYPE="$2" + shift 2 + ;; + -p | --install-prefix) + INSTALL_PREFIX="$2" + shift 2 + ;; + -o | --output) + export OUTPUT_DIR="$2" + export CAMODEL_LOG_PATH="${OUTPUT_DIR}/sim_log" + INSTALL_PREFIX="${OUTPUT_DIR}/out" + shift 2 + ;; + --) + shift + break + ;; + *) + echo "[ERROR] Unexpected option: $1" + break + ;; + esac +done + +RUN_MODE_LIST="cpu sim npu" +if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then + echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!" + exit -1 +fi + +# VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4" +# if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then +# echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]" +# exit -1 +# fi +echo $ASCEND_INSTALL_PATH +echo $ASCEND_HOME_PATH +if [ -n "$ASCEND_INSTALL_PATH" ]; then + _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH +elif [ -n "$ASCEND_HOME_PATH" ]; then + _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH +else + if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then + _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest + else + _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest + fi +fi + +export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH} +export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH} +echo "Current compile soc version is ${SOC_VERSION}" +source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash +if [ "${RUN_MODE}" = "sim" ]; then + # in case of running op in simulator, use stub .so instead + export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH + if [ ! $CAMODEL_LOG_PATH ]; then + export CAMODEL_LOG_PATH=$(pwd)/sim_log + fi + if [ -d "$CAMODEL_LOG_PATH" ]; then + rm -rf $CAMODEL_LOG_PATH + fi + mkdir -p $CAMODEL_LOG_PATH +elif [ "${RUN_MODE}" = "cpu" ]; then + export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH +fi +mkdir -p "$OUTPUT_DIR" +cd "$OUTPUT_DIR" +set -e +rm -rf build out +mkdir -p build +echo ${_ASCEND_INSTALL_PATH} +cmake "$CURRENT_DIR" -B build \ + -DRUN_MODE=${RUN_MODE} \ + -DSOC_VERSION=${SOC_VERSION} \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \ + -DASCEND_CANN_PACKAGE_PATH=${_ASCEND_INSTALL_PATH} +cmake --build build -j +cmake --install build + +rm -f ascendc_kernels_bbit +cp ./out/bin/ascendc_kernels_bbit ./ +rm -rf input output +mkdir -p input output +python3 "${CURRENT_DIR}/scripts/gen_data.py" +( + export LD_LIBRARY_PATH=$(pwd)/out/lib:$(pwd)/out/lib64:${_ASCEND_INSTALL_PATH}/lib64:$LD_LIBRARY_PATH + if [[ "$RUN_WITH_TOOLCHAIN" -eq 1 ]]; then + if [ "${RUN_MODE}" = "npu" ]; then + msprof op --application=./ascendc_kernels_bbit + elif [ "${RUN_MODE}" = "sim" ]; then + msprof op simulator --application=./ascendc_kernels_bbit + elif [ "${RUN_MODE}" = "cpu" ]; then + ./ascendc_kernels_bbit + fi + else + ./ascendc_kernels_bbit + fi +) +md5sum output/*.bin +python3 "${CURRENT_DIR}/scripts/verify_result.py" "output/output_z.bin" "output/golden.bin" diff --git a/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/scripts/gen_data.py b/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/scripts/gen_data.py new file mode 100644 index 00000000..ea8ce828 --- /dev/null +++ b/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/scripts/gen_data.py @@ -0,0 +1,25 @@ +#!/usr/bin/python3 +# coding=utf-8 +# +# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# =============================================================================== + +import numpy as np + + +def gen_golden_data_simple(): + input_x = np.random.uniform(1, 100, [8, 2048]).astype(np.float16) + input_y = np.random.uniform(1, 100, [8, 2048]).astype(np.float16) + golden = (input_x + input_y).astype(np.float16) + + input_x.tofile("./input/input_x.bin") + input_y.tofile("./input/input_y.bin") + golden.tofile("./output/golden.bin") + + +if __name__ == "__main__": + gen_golden_data_simple() diff --git a/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/scripts/verify_result.py b/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/scripts/verify_result.py new file mode 100644 index 00000000..2dd46f80 --- /dev/null +++ b/oec-ascend/oec/resource/KernelDev/AddKernelInvocationNeo/scripts/verify_result.py @@ -0,0 +1,53 @@ +#!/usr/bin/python3 +# coding=utf-8 +# +# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# =============================================================================== + +import sys +import numpy as np + +# for float16 +relative_tol = 1e-3 +absolute_tol = 1e-5 +error_tol = 1e-3 + + +def verify_result(output, golden): + output = np.fromfile(output, dtype=np.float16).reshape(-1) + golden = np.fromfile(golden, dtype=np.float16).reshape(-1) + different_element_results = np.isclose(output, + golden, + rtol=relative_tol, + atol=absolute_tol, + equal_nan=True) + different_element_indexes = np.where(different_element_results == False)[0] + for index in range(len(different_element_indexes)): + real_index = different_element_indexes[index] + golden_data = golden[real_index] + output_data = output[real_index] + print( + "data index: %06d, expected: %-.9f, actual: %-.9f, rdiff: %-.6f" % + (real_index, golden_data, output_data, + abs(output_data - golden_data) / golden_data)) + if index == 100: + break + error_ratio = float(different_element_indexes.size) / golden.size + print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol)) + return error_ratio <= error_tol + + +if __name__ == '__main__': + try: + res = verify_result(sys.argv[1], sys.argv[2]) + if not res: + raise ValueError("[ERROR] result error") + else: + print("test pass") + except Exception as e: + print(e) + sys.exit(1) diff --git a/oec-ascend/oec/resource/KernelDev/AddKernelmsOpGen/add_custom.json b/oec-ascend/oec/resource/KernelDev/AddKernelmsOpGen/add_custom.json new file mode 100644 index 00000000..dce1ed85 --- /dev/null +++ b/oec-ascend/oec/resource/KernelDev/AddKernelmsOpGen/add_custom.json @@ -0,0 +1,40 @@ +[ + { + "op": "AddCustom", + "language": "cpp", + "input_desc": [ + { + "name": "x", + "param_type": "required", + "format": [ + "ND" + ], + "type": [ + "float16" + ] + }, + { + "name": "y", + "param_type": "required", + "format": [ + "ND" + ], + "type": [ + "float16" + ] + } + ], + "output_desc": [ + { + "name": "z", + "param_type": "required", + "format": [ + "ND" + ], + "type": [ + "float16" + ] + } + ] + } +] \ No newline at end of file diff --git a/oec-ascend/oec/resource/KernelDev/AddKernelmsOpGen/ascend_test_kernel.py b/oec-ascend/oec/resource/KernelDev/AddKernelmsOpGen/ascend_test_kernel.py new file mode 100644 index 00000000..843388c8 --- /dev/null +++ b/oec-ascend/oec/resource/KernelDev/AddKernelmsOpGen/ascend_test_kernel.py @@ -0,0 +1,7 @@ +import oec +oec.TestCase( + group= ("算子","算子开发"), + name = "KERNEL_DEV_ADD_MSOPGEN", + cmd=f"bash run.sh '{oec.Context.output_dir}/tmp/AddKernelmsOpGen'", + include="successfully created" + ) \ No newline at end of file diff --git a/oec-ascend/oec/resource/KernelDev/AddKernelmsOpGen/run.sh b/oec-ascend/oec/resource/KernelDev/AddKernelmsOpGen/run.sh new file mode 100644 index 00000000..fea0e080 --- /dev/null +++ b/oec-ascend/oec/resource/KernelDev/AddKernelmsOpGen/run.sh @@ -0,0 +1,5 @@ +#!/bin/bash +output_path=$1 +msopgen gen -i add_custom.json -c ai_core-Ascend910B3 -lan cpp -out "$output_path" +cd "$output_path" +bash build.sh \ No newline at end of file diff --git a/oec-ascend/oec/resource/KernelDev/HelloWorld/CMakeLists.txt b/oec-ascend/oec/resource/KernelDev/HelloWorld/CMakeLists.txt new file mode 100644 index 00000000..d4c47d84 --- /dev/null +++ b/oec-ascend/oec/resource/KernelDev/HelloWorld/CMakeLists.txt @@ -0,0 +1,47 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.16.0) + +# project information +project(Ascend_C) +set(SOC_VERSION "Ascend310P3" CACHE STRING "system on chip type") +if(DEFINED ENV{USER} AND "$ENV{USER}" STREQUAL "root") + set(DEFAULT_ASCEND_CANN_PACKAGE_PATH "/usr/local/Ascend/ascend-toolkit/latest" CACHE PATH "ASCEND CANN package default installation directory for root user") +else() + set(DEFAULT_ASCEND_CANN_PACKAGE_PATH "$ENV{HOME}/Ascend/ascend-toolkit/latest" CACHE PATH "ASCEND CANN package default installation directory for other user") +endif() + +if(DEFINED ASCEND_CANN_PACKAGE_PATH) +elseif(DEFINED ENV{ASCEND_HOME_PATH}) + set(ASCEND_CANN_PACKAGE_PATH "$ENV{ASCEND_HOME_PATH}" CACHE PATH "ASCEND CANN package installation directory" FORCE) +else() + set(ASCEND_CANN_PACKAGE_PATH "${DEFAULT_ASCEND_CANN_PACKAGE_PATH}" CACHE PATH "ASCEND CANN package installation directory") +endif() + +set(RUN_MODE "npu" CACHE STRING "run mode: npu") +set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type Release/Debug (default Debug)" FORCE) +set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/out" CACHE STRING "path for install()") + +if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake) + set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake) +elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake) + set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake) +elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake) + set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake) +else() + message(FATAL_ERROR "ascendc_kernel_cmake does not exist, please check whether the cann package is installed." ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake) +endif() + +include(${ASCENDC_CMAKE_DIR}/ascendc.cmake) + +# ascendc_library use to add kernel file to generate ascendc library +ascendc_library(kernels STATIC + hello_world.cpp +) + +add_executable(main main.cpp) + +target_link_libraries(main PRIVATE + kernels +) \ No newline at end of file diff --git a/oec-ascend/oec/resource/KernelDev/HelloWorld/ascend_test_kernel.py b/oec-ascend/oec/resource/KernelDev/HelloWorld/ascend_test_kernel.py new file mode 100644 index 00000000..2ea65d45 --- /dev/null +++ b/oec-ascend/oec/resource/KernelDev/HelloWorld/ascend_test_kernel.py @@ -0,0 +1,13 @@ +import oec + +oec.NPUTestCase( + group= ("算子","算子开发"), + name = "KERNEL_DEV_HELLO_WORLD", + cmd=f"bash run.sh {oec.Context.output_dir}/tmp/KernelDev dev" + ) + +oec.TestCase( + group= ("算子","算子编译"), + name = "KERNEL_BUILD_HELLO_WORLD", + cmd=f"bash run.sh Ascend910B3 {oec.Context.output_dir}/tmp/KernelBuild build" + ) \ No newline at end of file diff --git a/oec-ascend/oec/resource/KernelDev/HelloWorld/hello_world.cpp b/oec-ascend/oec/resource/KernelDev/HelloWorld/hello_world.cpp new file mode 100644 index 00000000..1d758710 --- /dev/null +++ b/oec-ascend/oec/resource/KernelDev/HelloWorld/hello_world.cpp @@ -0,0 +1,10 @@ +#include "kernel_operator.h" +extern "C" __global__ __aicore__ void hello_world() +{ + AscendC::printf("Hello World!!!\n"); +} + +void hello_world_do(uint32_t blockDim, void* stream) +{ + hello_world<<>>(); +} diff --git a/oec-ascend/oec/resource/KernelDev/HelloWorld/main.cpp b/oec-ascend/oec/resource/KernelDev/HelloWorld/main.cpp new file mode 100644 index 00000000..4cb6424f --- /dev/null +++ b/oec-ascend/oec/resource/KernelDev/HelloWorld/main.cpp @@ -0,0 +1,24 @@ +#include "acl/acl.h" +extern void hello_world_do(uint32_t coreDim, void* stream); + +int32_t main(int argc, char const *argv[]) +{ + // AscendCL初始化 + aclInit(nullptr); + // 运行管理资源申请 + int32_t deviceId = 0; + aclrtSetDevice(deviceId); + aclrtStream stream = nullptr; + aclrtCreateStream(&stream); + + // 设置参与运算的核数为8(核数可根据实际需求设置) + constexpr uint32_t blockDim = 8; + // 用内核调用符<<<>>>调用核函数,hello_world_do中封装了<<<>>>调用 + hello_world_do(blockDim, stream); + aclrtSynchronizeStream(stream); + // 资源释放和AscendCL去初始化 + aclrtDestroyStream(stream); + aclrtResetDevice(deviceId); + aclFinalize(); + return 0; +} diff --git a/oec-ascend/oec/resource/KernelDev/HelloWorld/run.sh b/oec-ascend/oec/resource/KernelDev/HelloWorld/run.sh new file mode 100755 index 00000000..81b0fc0e --- /dev/null +++ b/oec-ascend/oec/resource/KernelDev/HelloWorld/run.sh @@ -0,0 +1,44 @@ +SOC_VERSION=$1 +output=$2 +type=$3 +function make_run(){ + mkdir -p "$output/HelloWorld/build" + + cmake -B "$output/HelloWorld/build" \ + -DSOC_VERSION=${SOC_VERSION} \ + -DASCEND_CANN_PACKAGE_PATH=${ASCEND_HOME_PATH} \ + -DCMAKE_INSTALL_PREFIX="$output/HelloWorld/out" + if [ $? -ne 0 ]; then + echo "cmake hello world failed" + return 1 + fi + cd "$output/HelloWorld" + cmake --build build -j + if [ $? -ne 0 ]; then + echo "buid hello world failed" + return 2 + fi + cmake --install build + if [ $? -ne 0 ]; then + echo "install hello world failed" + return 3 + fi + if [[ $type == "build" ]];then + return 0 # 算子编译场景下无需执行用例,算子开发需要执行用例 + fi + check_msg="Hello World" + file_path=output_msg.txt + + ./build/main | tee $file_path + count=$(grep -c "$check_msg" $file_path) + + if [ $count -ne 8 ]; then + echo "Error, Expected 8 occurrences of $check_msg, but found $count occurrences." + return 3 + fi + +} + + +make_run +exit $? \ No newline at end of file diff --git a/oec-ascend/oec/resource/KernelDev/bisheng/QuickStartDemo.cce b/oec-ascend/oec/resource/KernelDev/bisheng/QuickStartDemo.cce new file mode 100644 index 00000000..8b5f9bca --- /dev/null +++ b/oec-ascend/oec/resource/KernelDev/bisheng/QuickStartDemo.cce @@ -0,0 +1,56 @@ +// 文件名QuickStartDemo.cce +#include "acl/acl.h" +#include +#include + +#ifdef ASCENDC_CPU_DEBUG +#define __aicore__ +#else +#define __aicore__ [aicore] +#endif + +#define BLOCKS 4 +#define CACHELINE_SZ 64 + +// Define a kernel +__global__ __aicore__ void foo(__gm__ uint8_t *Out, int Stride) { + Out[block_idx * Stride] = block_idx; +} + +int main(int argc, char *argv[]) { + aclInit(nullptr); + aclrtSetDevice(0); + aclrtStream stream; + aclrtCreateStream(&stream); + + uint8_t ExpectedValue[] = {0, 1, 2, 3}; + uint8_t *OutputValue = nullptr; + aclrtMalloc((void **)&OutputValue, BLOCKS, ACL_MEM_MALLOC_HUGE_FIRST); + + uint8_t InitValue[BLOCKS] = {0}; + aclrtMemcpyAsync((void *)OutputValue, sizeof(InitValue), InitValue, + sizeof(InitValue), ACL_MEMCPY_HOST_TO_DEVICE, stream); + aclrtSynchronizeStream(stream); + + // Invoke a kernel + foo<<>>(OutputValue, CACHELINE_SZ); + + uint8_t *OutHost = nullptr; + aclrtMallocHost((void **)&OutHost, BLOCKS * CACHELINE_SZ); + aclrtMemcpyAsync(OutHost, BLOCKS * CACHELINE_SZ, OutputValue, + BLOCKS * CACHELINE_SZ, ACL_MEMCPY_DEVICE_TO_HOST, stream); + aclrtSynchronizeStream(stream); + + for (int I = 0; I < sizeof(ExpectedValue) / sizeof(uint8_t); I++) { + printf("i%d\t Expect: 0x%04x\t\t\t\tResult: 0x%04x\n", I, ExpectedValue[I], + OutHost[I * CACHELINE_SZ]); + } + + aclrtFreeHost(OutHost); + aclrtFree(OutputValue); + + aclrtDestroyStream(stream); + aclrtResetDevice(0); + aclFinalize(); + return 0; +} \ No newline at end of file diff --git a/oec-ascend/oec/resource/KernelDev/bisheng/ascend_test_kernel.py b/oec-ascend/oec/resource/KernelDev/bisheng/ascend_test_kernel.py new file mode 100644 index 00000000..2a0716c1 --- /dev/null +++ b/oec-ascend/oec/resource/KernelDev/bisheng/ascend_test_kernel.py @@ -0,0 +1,7 @@ +import oec + +oec.TestCase( + group= ("算子","算子编译"), + name = "KERNEL_BUILD_BISHENG_DEMO", + cmd=f"bash build.sh Ascend910B3 {oec.Context.output_dir}/tmp/KernelDev/bisheng" + ) \ No newline at end of file diff --git a/oec-ascend/oec/resource/KernelDev/bisheng/build.sh b/oec-ascend/oec/resource/KernelDev/bisheng/build.sh new file mode 100755 index 00000000..2b213603 --- /dev/null +++ b/oec-ascend/oec/resource/KernelDev/bisheng/build.sh @@ -0,0 +1,18 @@ +RT_INC=${ASCEND_HOME_PATH}/runtime/include +RT_LIB=${ASCEND_HOME_PATH}/runtime/lib64 +NPU=$1 +ouput=$2 +mkdir -p "$ouput" +inputpath=$(pwd) +cd "$ouput" +# 功能:Host & Device代码混合编译,生成可执行文件,仅需链接libruntime.so +# 编译选项--cce-soc-version和--cce-soc-core-type指的是编译AscendXXXYY上的Vector核程序 +bisheng -O2 --cce-soc-version=$NPU --cce-soc-core-type=VecCore -I$RT_INC -L$RT_LIB -lascendcl -lruntime "$inputpath/QuickStartDemo.cce" -o "QuickStartDemo" +rst=$? +if [[ $rst != 0 ]]; then + exit $rst +fi +if [[ ! -f QuickStartDemo ]]; then + exit 1 +fi + diff --git a/oec-ascend/oec/resource/KernelDev/dsl_vabs/ascend_test_tbe.py b/oec-ascend/oec/resource/KernelDev/dsl_vabs/ascend_test_tbe.py new file mode 100644 index 00000000..2ee881f7 --- /dev/null +++ b/oec-ascend/oec/resource/KernelDev/dsl_vabs/ascend_test_tbe.py @@ -0,0 +1,8 @@ +#encoding: utf-8 +import oec + +oec.TestCase( + group= ("算子","算子编译"), + name = "KERNEL_BUILD_DSL_VABS", + cmd=f"python3 dsl_vabs.py {oec.Context.output_dir}/tmp/dsl_vabs" + ) \ No newline at end of file diff --git a/oec-ascend/oec/resource/KernelDev/dsl_vabs/dsl_vabs.py b/oec-ascend/oec/resource/KernelDev/dsl_vabs/dsl_vabs.py new file mode 100644 index 00000000..307400d9 --- /dev/null +++ b/oec-ascend/oec/resource/KernelDev/dsl_vabs/dsl_vabs.py @@ -0,0 +1,33 @@ +from tbe import tvm +from tbe import dsl +import argparse +parser = argparse.ArgumentParser( + prog="dsl-test", + ) + +parser.add_argument( + "output_dir", + default=".", + help="The path where the kernel mate data is saved, which is the current directory by default", +) +args = parser.parse_args() + + +shape = (28,28) +dtype = "float16" +# 定义输入占位符 +data = tvm.placeholder(shape, name="data", dtype=dtype) +with tvm.target.cce(): + # 描述算子计算过程 + res = dsl.vabs(data) + # 生成schedule对象 + sch = dsl.auto_schedule(res) +# 定义build配置参数 +config = {"print_ir" : True, + "need_build" : True, + "name" : "abs_28_28_float16", + "tensor_list" : [data,res], + "kernel_meta_parent_dir": args.output_dir + } +# build算子 +dsl.build(sch, config) \ No newline at end of file diff --git a/oec-ascend/oec/resource/KernelDev/tik_matmul/ascend_test_tbe.py b/oec-ascend/oec/resource/KernelDev/tik_matmul/ascend_test_tbe.py new file mode 100644 index 00000000..bed788bb --- /dev/null +++ b/oec-ascend/oec/resource/KernelDev/tik_matmul/ascend_test_tbe.py @@ -0,0 +1,8 @@ +#encoding: utf-8 +import oec + +oec.TestCase( + group= ("算子","算子编译"), + name = "KERNEL_BUILD_TIK_MATMUL", + cmd=f"python3 tik_matmul.py {oec.Context.output_dir}/tmp/tik_matmul" + ) \ No newline at end of file diff --git a/oec-ascend/oec/resource/KernelDev/tik_matmul/tik_matmul.py b/oec-ascend/oec/resource/KernelDev/tik_matmul/tik_matmul.py new file mode 100644 index 00000000..765bf03a --- /dev/null +++ b/oec-ascend/oec/resource/KernelDev/tik_matmul/tik_matmul.py @@ -0,0 +1,174 @@ +from tbe import tik +import argparse +parser = argparse.ArgumentParser( + prog="tik-test", +) +parser.add_argument( + "output_dir", + default=".", + help="The path where the kernel mate data is saved, which is the current directory by default", +) +args = parser.parse_args() + +DTYPE_SIZE = { + 'int8': 1, + 'float16': 2, + 'float32': 4, +} + +def MK_TO_K1MK0(tik_instance, mk_input_tensor, k1mk0_tensor, dtype, k1, m, k0): + """change data format mk to k1mk0""" + src_ub = tik_instance.Tensor(dtype, (k1, m, k0), name="src_ub", scope=tik.scope_ubuf) + + # data_move(m,k) --> (k1,m,k0) + with tik_instance.for_range(0, k1) as i: + tik_instance.data_move(src_ub[i * m * k0:], mk_input_tensor[i * k0:], 0, m, k0 * DTYPE_SIZE[dtype] // 32, + (k1 - 1) * k0 * DTYPE_SIZE[dtype] // 32, 0) + # data_move out + tik_instance.data_move(k1mk0_tensor, src_ub, 0, 1, k1 * m * k0 * DTYPE_SIZE[dtype] // 32, 0, 0) + + +def KN_TO_K1NK0(tik_instance, kn_input_tensor, k1nk0_tensor, dtype, k1, n, k0): + """change data format kn to k1nk0""" + with tik_instance.for_range(0, k1) as index: + k1nk0_ub = tik_instance.Tensor(dtype, (n, k0), tik.scope_ubuf, "k1nk0_ub") + src_ub = tik_instance.Tensor(dtype, (k0, n), tik.scope_ubuf, "src_ub") + burst_len = k0 * n * DTYPE_SIZE[dtype] // 32 + tik_instance.data_move(src_ub, kn_input_tensor[index * k0 * n], 0, 1, burst_len, 0, 0) + dst_list = [k1nk0_ub[16 * i] for i in range(16)] + src_list = [src_ub[n * i] for i in range(16)] + rep_times = n // k0 + dst_rep_stride = k0 + src_rep_stride = 1 + tik_instance.vec_trans_scatter(False, False, dst_list, src_list, rep_times, dst_rep_stride, src_rep_stride) + tik_instance.data_move(k1nk0_tensor[index * k0 * n], k1nk0_ub, 0, 1, burst_len, 0, 0) + +def N1MN0_TO_MN(tik_instance, mn_output_tensor, n1mn0_tensor, dtype, n1, m, n0): + """change data format n1mn0 to mn""" + src_ub = tik_instance.Tensor(dtype, (m, n1 * n0), name="src_ub", scope=tik.scope_ubuf) + + # data_move (n1,m,n0) --> (m,n) + with tik_instance.for_range(0, n1) as i: + tik_instance.data_move(src_ub[i * n0:], n1mn0_tensor[i * m * n0:], 0, m, + n0 * DTYPE_SIZE[dtype] // 32, 0, (n1 - 1) * n0 * DTYPE_SIZE[dtype] // 32) + # data_move out + tik_instance.data_move(mn_output_tensor, src_ub, 0, 1, m * n1 * n0 * DTYPE_SIZE[dtype] // 32, 0, 0) + + +def matmul_tik_compute(params, kernel_name): + """ + matmul tik compute + @param params: matmul data + @param kernel_name: kernel name + @return: tik instance + """ + tik_instance = tik.Tik() + if not isinstance(params, dict): + params = params.__dict__ + m_size, k_size, n_size = params['M'], params['K'], params['N'] + data_type = params["data_type"] + m_tiling_size = int(params["m_tiling_size"]) + n_tiling_size = int(params["n_tiling_size"]) + k_tiling_size = int(params['k_tiling_size']) + + m_cycle_times = params["m_cycle_times"] + n_cycle_times = params["n_cycle_times"] + k_cycle_times = params["k_cycle_times"] + + # Determine the output type + if data_type == "float16": + C_loc_out_type = "float32" + K0 = 16 + else: + C_loc_out_type = "int32" + K0 = 32 + block_size = 16 + + n_thread_num = params['n_thread_num'] + m_thread_num = params['m_thread_num'] + k_thread_num = params['k_thread_num'] + + mk_gm_input = tik_instance.Tensor(data_type, (m_size, k_size), name="mk_input_gm", scope=tik.scope_gm) + kn_gm_input = tik_instance.Tensor(data_type, (k_size, n_size), name="kn_input_gm", scope=tik.scope_gm) + k1mk0_workspace = tik_instance.Tensor(data_type, (k_size // K0, m_size, K0), name="k1mk0_workspace", + scope=tik.scope_gm, is_workspace=True) + k1nk0_workspace = tik_instance.Tensor(data_type, (k_size // K0, n_size, K0), name="k1nk0_workspace", + scope=tik.scope_gm, is_workspace=True) + + mn_gm_output = tik_instance.Tensor(C_loc_out_type, (m_size, n_size), tik.scope_gm, "mn_output_gm") + nmk0_workspace = tik_instance.Tensor(C_loc_out_type, (n_size // block_size, m_size, block_size), + name="nmk0_workspace", scope=tik.scope_gm, is_workspace=True) + + MK_TO_K1MK0(tik_instance, mk_gm_input, k1mk0_workspace, data_type, k_size // K0, m_size, K0) + KN_TO_K1NK0(tik_instance, kn_gm_input, k1nk0_workspace, data_type, k_size // K0, n_size, K0) + + # Tiling is realized through the for_range() loop. + with tik_instance.for_range(0, 2, block_num=1) as core_id: + with tik_instance.for_range(0, n_cycle_times // 2, thread_num=n_thread_num) as n_idx: + with tik_instance.for_range(0, m_cycle_times, thread_num=m_thread_num) as m_idx: + dst_l0c = tik_instance.Tensor(C_loc_out_type, [n_tiling_size // 16, m_tiling_size, 16], name='dst_l0c', + scope=tik.scope_cbuf_out) + with tik_instance.for_range(0, k_cycle_times, + thread_num=k_thread_num) as k_idx: + # Calculation result data transfer. + inputa_l1 = tik_instance.Tensor(params['data_type'], [k_tiling_size // K0, m_tiling_size, K0], + name="A_tiling_l1", scope=tik.scope_cbuf) + tik_instance.data_move(inputa_l1, + k1mk0_workspace[k_idx * k_tiling_size // K0, m_idx * m_tiling_size, :], + 0, k_tiling_size // K0, m_tiling_size, m_size - m_tiling_size, 0) + inputb_l1 = tik_instance.Tensor(params["data_type"], [k_tiling_size // K0, n_tiling_size, K0], + name="B_tiling_l1", scope=tik.scope_cbuf) + if n_size - n_tiling_size > 65535: + with tik_instance.for_range(0, k_tiling_size // K0) \ + as dma_k_idx: + tik_instance.data_move(inputb_l1[dma_k_idx, :, :], + k1nk0_workspace[k_idx * k_tiling_size // K0 + dma_k_idx, + (core_id * n_cycle_times // 2 + n_idx) * n_tiling_size, :], + 0, 1, n_tiling_size, 0, 0) + else: + tik_instance.data_move(inputb_l1, k1nk0_workspace[k_idx * k_tiling_size // K0, + (core_id * n_cycle_times // 2 + n_idx) * n_tiling_size, :], 0, + k_tiling_size // K0, n_tiling_size, n_size - n_tiling_size, 0) + # Call matmul API to matrix multiplication calculation. + with tik_instance.if_scope(k_idx == 0): + tik_instance.matmul(dst_l0c, inputa_l1, inputb_l1, m_tiling_size, k_tiling_size, n_tiling_size, + init_l1out=True) + with tik_instance.else_scope(): + tik_instance.matmul(dst_l0c, inputa_l1, inputb_l1, m_tiling_size, k_tiling_size, n_tiling_size, + init_l1out=False) + tik_instance.fixpipe(nmk0_workspace[n_tiling_size // 16 * (core_id * n_cycle_times // 2 + n_idx), + m_idx * m_tiling_size, :], dst_l0c, n_tiling_size // 16, m_tiling_size * 16 * + DTYPE_SIZE[C_loc_out_type] // 32, + (m_size - m_tiling_size) * 16 * DTYPE_SIZE[C_loc_out_type] // 32, 0) + + N1MN0_TO_MN(tik_instance, mn_gm_output, nmk0_workspace, C_loc_out_type, n_size // K0, m_size, K0) + + tik_instance.BuildCCE(kernel_name=kernel_name, + inputs=[mk_gm_input, kn_gm_input], outputs=[mn_gm_output], + output_files_path=args.output_dir, + config={'l2_mode': 1}) + return tik_instance + +def test_matmul_tik(): + shape_a = [16, 64] + shape_b = [64, 1024] + # 输入参数和tiling信息 + params = { + 'M': shape_a[0], + 'K': shape_a[1], + 'N': shape_b[1], + 'data_type': "float16", + 'm_tiling_size': 16, + 'm_cycle_times': 1, + 'm_thread_num': 1, + 'n_tiling_size': 64, + 'n_cycle_times': 16, + 'n_thread_num': 1, + 'k_tiling_size': 32, + 'k_cycle_times': 2, + 'k_thread_num': 2, + } + tik_instance = matmul_tik_compute(params, "simple_matmul") + +if __name__ == "__main__": + test_matmul_tik() \ No newline at end of file diff --git a/oec-ascend/oec/resource/ModelDev/AOETool/ascend_test_aoe.py b/oec-ascend/oec/resource/ModelDev/AOETool/ascend_test_aoe.py new file mode 100644 index 00000000..afc7bcb4 --- /dev/null +++ b/oec-ascend/oec/resource/ModelDev/AOETool/ascend_test_aoe.py @@ -0,0 +1,10 @@ +import oec + +oec.TestCase( + group= ("模型开发","模型调优"), + name = "MODEL_AOE_TF", + cmd=f"mkdir -p '{oec.Context.output_dir}/tmp'\n" + f"cd '{oec.Context.output_dir}/tmp'\n" + f"aoe --framework=3 --model={oec.Context.data_path}/model/model_tf.pb --job_type=2\n", + timeout=900 + ) \ No newline at end of file diff --git a/oec-ascend/oec/resource/ModelDev/ATCTool/add.json b/oec-ascend/oec/resource/ModelDev/ATCTool/add.json new file mode 100644 index 00000000..7424b0db --- /dev/null +++ b/oec-ascend/oec/resource/ModelDev/ATCTool/add.json @@ -0,0 +1,25 @@ +[ + { + "op": "Add", + "name": "add", + "input_desc": [ + { + "format": "ND", + "shape": [3,3], + "type": "int32" + }, + { + "format": "ND", + "shape": [3,3], + "type": "int32" + } + ], + "output_desc": [ + { + "format": "ND", + "shape": [3,3], + "type": "int32" + } + ] + } +] diff --git a/oec-ascend/oec/resource/ModelDev/ATCTool/ascend_test_atc.py b/oec-ascend/oec/resource/ModelDev/ATCTool/ascend_test_atc.py new file mode 100644 index 00000000..d3a2c0ae --- /dev/null +++ b/oec-ascend/oec/resource/ModelDev/ATCTool/ascend_test_atc.py @@ -0,0 +1,30 @@ +import oec + +oec.TestCase( + group= ("模型开发","模型编译"), + name = "MODEL_ATC_SINGLE_ADD", + cmd=f"bash runatc.sh '{oec.Context.output_dir}' add.json --output=out --soc_version=Ascend910B3", + timeout=300 + ) + +oec.TestCase( + group= ("模型开发","模型编译"), + name = "MODEL_ATC_SINGLE_CONV2D", + cmd=f"bash runatc.sh '{oec.Context.output_dir}' conv2d.json --output=out --soc_version=Ascend910B3", + timeout=300 + ) + +oec.TestCase( + group= ("模型开发","模型编译"), + name = "MODEL_ATC_SINGLE_DYNAMIC_SHAPE", + cmd=f"bash runatc.sh '{oec.Context.output_dir}' dynamic_shape.json --output=out --soc_version=Ascend910B3", + timeout=300 + ) + +oec.TestCase( + group= ("模型开发","模型编译"), + name="MODEL_ATC_PB_TO_JSON_DESC", + cmd=f"mkdir -p '{oec.Context.output_dir}/tmp' && cd '{oec.Context.output_dir}/tmp'\n" + f"atc --mode=1 --om='{oec.Context.data_path}/model/model_tf.pb' --json=out/model.json --framework=3", + timeout=300 +) \ No newline at end of file diff --git a/oec-ascend/oec/resource/ModelDev/ATCTool/conv2d.json b/oec-ascend/oec/resource/ModelDev/ATCTool/conv2d.json new file mode 100644 index 00000000..c2a5d385 --- /dev/null +++ b/oec-ascend/oec/resource/ModelDev/ATCTool/conv2d.json @@ -0,0 +1,43 @@ +[ + { + "op": "Conv2D", + "name": "conv2d", + "input_desc": [ + { + "format": "NCHW", + "shape": [1, 3, 16, 16], + "type": "float16" + }, + { + "format": "NCHW", + "shape": [3, 3, 3, 3], + "type": "float16" + } + ], + "output_desc": [ + { + "format": "NCHW", + "shape": [1, 3, 16, 16], + "type": "float16" + } + ], + "attr": [ + { + "name": "strides", + "type": "list_int", + "value": [1, 1, 1, 1] + }, + { + "name": "pads", + "type": "list_int", + "value": [1, 1, 1, 1] + }, + { + "name": "dilations", + "type": "list_int", + "value": [1, 1, 1, 1] + } + ] + } + ] + \ No newline at end of file diff --git a/oec-ascend/oec/resource/ModelDev/ATCTool/dynamic_shape.json b/oec-ascend/oec/resource/ModelDev/ATCTool/dynamic_shape.json new file mode 100644 index 00000000..f524ec91 --- /dev/null +++ b/oec-ascend/oec/resource/ModelDev/ATCTool/dynamic_shape.json @@ -0,0 +1,29 @@ +[ + { + "op": "Add", + "name": "add", + "input_desc": [ + { + "format": "ND", + "shape": [-1,16], + "shape_range": [[0, 32]], + "type": "int64" + }, + { + "format": "ND", + "shape": [-1,16], + "shape_range": [[0, 32]], + "type": "int64" + } + ], + "output_desc": [ + { + "format": "ND", + "shape": [-1,16], + "shape_range": [[0,32]], + "type": "int64" + } + ] + } + ] + \ No newline at end of file diff --git a/oec-ascend/oec/resource/ModelDev/ATCTool/runatc.sh b/oec-ascend/oec/resource/ModelDev/ATCTool/runatc.sh new file mode 100644 index 00000000..ecc05e03 --- /dev/null +++ b/oec-ascend/oec/resource/ModelDev/ATCTool/runatc.sh @@ -0,0 +1,6 @@ +jsonfile=$(realpath $2) +mkdir -p "$1/tmp" && cd "$1/tmp" +shift +shift +atc --singleop="$jsonfile" $@ + diff --git a/oec-ascend/oec/resource/ModelDev/HCCLTest/ascend_test_hccl.py b/oec-ascend/oec/resource/ModelDev/HCCLTest/ascend_test_hccl.py new file mode 100644 index 00000000..98057db9 --- /dev/null +++ b/oec-ascend/oec/resource/ModelDev/HCCLTest/ascend_test_hccl.py @@ -0,0 +1,33 @@ +import oec + +oec.TestCase( + group= ("模型开发","集合通信"), + name = "MODEL_HCCL_AIS_BENCH_CHECK", + cmd=f"python3 check_package_version.py", + auxiliary=True + ) + +oec.NPUTestCase( + group= ("模型开发","集合通信"), + name = "MODEL_HCCL_ALL_REDUCE_TEST", + cmd=f"python3 -m ais_bench -n all_reduce_test -p -b 8K -e 64M -f 2 -d fp32 -o sum", + timeout=30 + + ) + +oec.NPUTestCase( + group= ("模型开发","集合通信"), + name = "MODEL_HCCL_ALL_GATHER_TEST", + cmd=f"python3 -m ais_bench -n all_gather_test -p -b 8K -e 64M -f 2 -d fp32", + timeout=30 + + ) + +oec.NPUTestCase( + group= ("模型开发","集合通信"), + name = "MODEL_HCCL_BROADCAST_TEST", + cmd=f"python3 -m ais_bench -n broadcast_test -p -b 8K -e 64M -f 2 -d fp32", + timeout=30 + + ) + diff --git a/oec-ascend/oec/resource/ModelDev/HCCLTest/check_package_version.py b/oec-ascend/oec/resource/ModelDev/HCCLTest/check_package_version.py new file mode 100644 index 00000000..8e7af631 --- /dev/null +++ b/oec-ascend/oec/resource/ModelDev/HCCLTest/check_package_version.py @@ -0,0 +1,303 @@ +import sys +import subprocess +from packaging.version import parse as parse_version + +def get_python_version(): + """获取当前Python版本字符串""" + return f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}" + +def get_installed_packages(): + """获取所有已安装的包及其版本 (使用pip list)""" + try: + result = subprocess.run( + [sys.executable, '-m', 'pip', 'list', '--format=freeze'], + capture_output=True, + text=True, + check=True + ) + packages = {} + for line in result.stdout.splitlines(): + if '==' in line: + name, version = line.split('==', 1) + packages[name.lower()] = version.strip() + return packages + except Exception as e: + print(f"错误: 无法获取已安装包列表 - {str(e)}") + print("请确保pip已安装并能正常工作") + sys.exit(1) + +def check_python_version(min_version=None, max_version=None): + """ + 检查Python版本是否在指定范围内 + + 参数: + min_version (str): 最小支持版本 (e.g., "3.8.0") + max_version (str): 最大支持版本 (e.g., "3.10.0") + + 返回: + tuple: (是否满足, 问题描述) + """ + current_ver = parse_version(get_python_version()) + problems = [] + + if min_version: + min_ver = parse_version(min_version) + if current_ver < min_ver: + problems.append(f"需要 ≥ {min_version}") + + if max_version: + max_ver = parse_version(max_version) + if current_ver > max_ver: + problems.append(f"需要 ≤ {max_version}") + + return (len(problems) == 0, problems) + +def check_package(pkg_info, installed_packages): + """ + 检查单个包是否满足要求 + + 参数: + pkg_info (dict): 包配置信息 + installed_packages (dict): 已安装包的字典 + + 返回: + tuple: (是否满足, 安装的版本, 问题描述) + """ + pypi_name = pkg_info["pypi_name"].lower() + installed_version = installed_packages.get(pypi_name) + + # 包未安装 + if not installed_version: + return (False, None, ["未安装"]) + + # 没有版本要求 + if "min_version" not in pkg_info and "max_version" not in pkg_info: + return (True, installed_version, []) + + # 检查版本要求 + problems = [] + try: + installed_ver = parse_version(installed_version) + + if "min_version" in pkg_info: + min_ver = parse_version(pkg_info["min_version"]) + if installed_ver < min_ver: + problems.append(f"需要 ≥ {pkg_info['min_version']}") + + if "max_version" in pkg_info: + max_ver = parse_version(pkg_info["max_version"]) + if installed_ver > max_ver: + problems.append(f"需要 ≤ {pkg_info['max_version']}") + except Exception as e: + problems.append(f"版本解析错误: {str(e)}") + + return (len(problems) == 0, installed_version, problems) + +def check_dependencies(requirements): + """ + 检查所有依赖项 + + 参数: + requirements (dict): 依赖配置字典 + + 返回: + tuple: (所有依赖是否满足, 包检查结果列表) + """ + # 获取已安装包列表 + installed_packages = get_installed_packages() + + print("=" * 70) + print("Python环境与包依赖检查") + print("=" * 70) + + all_ok = True + results = [] + + # 1. 检查Python版本 + py_req = requirements.get("python", {}) + if py_req: + min_py = py_req.get("min_version") + max_py = py_req.get("max_version") + py_ok, py_problems = check_python_version(min_py, max_py) + + version_range = [] + if min_py: version_range.append(f"≥ {min_py}") + if max_py: version_range.append(f"≤ {max_py}") + if min_py and max_py and min_py == max_py: + range_str = min_py + else: + range_str = " 且 ".join(version_range) if version_range else "任意版本" + + status = "✓" if py_ok else "✗" + problems = ", ".join(py_problems) if py_problems else "满足要求" + print(f"Python版本: {get_python_version()} | 要求: {range_str}") + print(f" {status} 状态: {problems}") + print("-" * 70) + + if not py_ok: + all_ok = False + + # 2. 检查包依赖 + packages = requirements.get("packages", []) + if not packages: + print("未配置包依赖检查") + else: + print("\n包依赖检查:") + print("-" * 70) + + for pkg in packages: + # 获取包信息 + name = pkg["name"] + pypi_name = pkg["name"] + + # 确定当前Python版本适用的规则 + current_py = f"{sys.version_info.major}.{sys.version_info.minor}" + version_rules = pkg.get("version_rules", {}) + rule = version_rules.get(current_py, pkg.get("default", {})) + + # 检查包 + satisfied, version, problems = check_package( + {"pypi_name": pypi_name, **rule}, + installed_packages + ) + + # 确定显示的要求范围 + + range_parts = [] + if "min_version" in rule: + range_parts.append(f"≥ {rule['min_version']}") + if "max_version" in rule: + range_parts.append(f"≤ {rule['max_version']}") + if len(range_parts) == 2 and rule['min_version'] == rule['max_version']: + range_str = rule['min_version'] + else: + range_str = " 且 ".join(range_parts) if range_parts else "任意版本" + + # 确定状态 + if not satisfied: + status = "✗" + all_ok = False + else: + status = "✓" + + # 收集结果 + results.append({ + "display_name": name, + "pypi_name": pypi_name, + "status": status, + "installed": version or "未安装", + "required": range_str, + "problems": problems, + "rule": rule + }) + + # 打印结果 + print(f"{status} {name}") + print(f" 已安装: {version or '未安装'}") + print(f" 要求: {range_str}") + if problems: + print(f" 问题: {', '.join(problems)}") + print("-" * 70) + + print("=" * 70) + return all_ok, results + +def generate_install_commands(results, py_req=None): + """ + 生成安装命令 + + 参数: + results (list): 包检查结果列表 + py_req (dict): Python版本要求 + + 返回: + str: 安装命令字符串 + """ + commands = [] + + # Python版本要求 + if py_req: + min_py = py_req.get("min_version") + max_py = py_req.get("max_version") + if min_py or max_py: + commands.append("# 请确保使用正确的Python版本") + if min_py and max_py: + if max_py == min_py: + commands.append(f"# 推荐使用 Python {min_py}") + else: + commands.append(f"# 推荐使用 Python {min_py} 到 {max_py} 之间的版本") + elif min_py: + commands.append(f"# 需要 Python {min_py} 或更高版本") + elif max_py: + commands.append(f"# 需要 Python {max_py} 或更低版本") + + # 包安装命令 + commands.append("\n# 包安装命令:") + + for res in results: + pkg_name = res["pypi_name"] + rule = res["rule"] + + if "min_version" in rule and "max_version" in rule: + if rule['min_version']==rule['max_version']: + commands.append(f"请先安装 '{pkg_name}=={rule['min_version']}'") + else: + commands.append(f"请先安装 '{pkg_name}>={rule['min_version']},<={rule['max_version']}'") + elif "min_version" in rule: + commands.append(f"请先安装 '{pkg_name}>={rule['min_version']}'") + elif "max_version" in rule: + commands.append(f"请先安装 '{pkg_name}<={rule['max_version']}'") + else: + commands.append(f"请先安装 {pkg_name}") + + return "\n".join(commands) + +if __name__ == "__main__": + # ====== 依赖配置 ====== + # 配置说明: + # - "python": 可选的Python版本要求 + # - "packages": 包依赖列表 + # 每个包必须包含: + # - "name": PyPI上的包名 + # - "version_rules": (可选) 针对不同Python版本的规则 + # - "default": (可选) 默认规则 + # + # 规则格式: + # { + # "min_version": "最低版本", + # "max_version": "最高版本" + # } + + DEPENDENCY_CONFIG = { + # Python版本要求 + "python": { + "min_version": "3.7" + }, + + "packages": [ + { + "name": "ais_bench_net_test" + }, + + ] + } + # ==================== + + # 检查依赖 + all_ok, results = check_dependencies(DEPENDENCY_CONFIG) + + if all_ok: + print("\n所有依赖满足! 可以运行主程序。") + # 这里可以继续执行你的主程序 + # from main import main + # main() + else: + print("\n错误: 环境不满足要求!") + print("请根据以下提示解决问题:") + + # 生成安装建议 + py_req = DEPENDENCY_CONFIG.get("python", {}) + commands = generate_install_commands(results, py_req) + print("\n" + commands) + + sys.exit(1) # 非零退出码表示错误 \ No newline at end of file diff --git a/oec-ascend/oec/resource/base_report.xlsx b/oec-ascend/oec/resource/base_report.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..b300f467c667d5178ba7c4b6a0fd8fdb882c06c3 GIT binary patch literal 11560 zcma*N19)EBx;7l!w(UlZZQE{a+qTU{P14wD%tnnHH@4MS-<$UAz1Hg4XaE1qb!Fmx zFy=kxeC8PUxD}*9!C-*iwlKL(fw%p?HfX?svE2s+M>~5bMg@Qw8sGxdFEgZ3XB#PC zARq@2ARwf_n;F{MGq~GW=Om6v^)sP`+y%|Pc32lqW3Pvuljx8g57oe$FubR1K*uJCTkI5dsl{vu?v&cv9B|l zK4lk#uc=K%&GfVTY}UqNv-O;L6rl{njoAuL_}YX)no^IQ(RQ{o@|gSN07hL*N$@ zeW)-$U^h5k+c>8$5ZxE?cEU@VyyHk6w4?%0T+)6oFs~s0#GT)R6psUdyC?v68;}LR-IrDZ>`zB;qa(~yy+q$Z97S9cG z4?==fCn`+t6|%hbc1zii%jl0_(dNa>Suniwqd#%T`RqwOW^hU;33#+>Mx&H8XtW^X zsv6e=LGnbBE>sp^Y=mpjJ}@?Y{4zmeXw+hxvr9~{0-?>YdJkbQID}^frE}SRA(*~2 z6%J91Rygh3@(EKSARic>!aduJ^TiaDXUVf$*PCh$d#prv?Jud+vmjy@iyGW)s{*MmN;cjj7L)f zA9=k#x!-{Q6XpmUk9%qW%*X(kQU3w6i<7gR%^T3g3A)mKOaP$4UxkN#G8wIwr(7!z zAT~ol_iT%c4%EyOr;BEYU))%B(HPsOM*BQG+*h6I?#mB@g79q2oDn3#1&JD&WJ}yQ z-Ipp1KtPTxSH5>lF++-5^t88hj?5KeZ|`$ifW}R-LwfCnVhgDq7k{?dpI@@mV)?vu z@9=|orYR>r1vx2ki8%6t9wo^|tY-rja=d-9=NWww%b~7*dCcLwRrCg$AEMGxHz}x@ zW>Xb?18Ul2gt(_Dm|FTnIBK+o9CE^g+7(=t=V)qw!qmu}L=`VaymmnA)hx=dGZ#MVa0d<@xv8oG<5v90l={ zll*K&!TsCsah+S|?$ZV!-;n*!^#v4wth@EE%JQEL5PloD**RJ{nVXn6|5=3)Bl}r< z!GVA-(1C!k{+I19mv?V{m7-x~x5jI)1zadC0Y+my63f2MlSQU8hlaBBoHsP?S(gWg%>*3ro8WAz>* z%bHc&am(n)!;snwq}6lvQJ$Zc-r;T~FVmcd0Y?+<>!eG`fcf`bNHr51g}eo1`p4F? zIs`xV>cX4vHP)0|sykFTafITkYIr`wHRiS(rMHhHW)?Ylw64cwl6@l;K`%E7*;->a zK5nRMRl^MF<#Qh-WlW+rV7CG~KM!TKR^IBy&(BIo;f_1W$AE7&obW5$N)48{}3%7ifkM8xuMQ<-xGmg8Z`waWw zW}TNa%1}1y7p3c_aqpx*_)^N)RkL|LDJX;2-&tb%jO%4a&QUlR5OkvFtqBhQdn6R|Pb`eB_qtPi6g zXGpHSYe|w2>b;R%NAC1c83+_aIK}yaW1&Po%17-#dbZaxnt8b2vO1KQLCSs+VV%8rIP zk1-DV)GCT^P{oBK0SFGPy+W#3f~VE2owTEB;V2FY4PV4;P@aY2f+IMaKjCvY z34FNQ!G#SqkWV&hizKNkV%za!OPhPM1q7S{Y%GwP`QhF&)HG~Eov6aMpPW;L!__j}S6-BI3o1X)CO{E9+{*hn#^7N=LtW5zgERG+$;G_yf}d z0gVy@VfH8yRKt8vBnKPIaA#h-lW=b2itn;fcKD2!RPoKoioRJ2A$SMAV~V(pazBYK17#wC$Ig_h1&mO<7jp53+tKL%mO(Q6ap z_@IjvZKJM?rQ#=K%A=cGGS@Yd+LnEEKTD)Xj6-T-F2=d`RZ1z8$AT61POzVkXNwGH zR0=wnsrG`Ojtl<5hPdwWPzj%&2BIn=lTH<_%!Yg+Vj1qanCVG`yEm*`w{yQWG$)>z zVr|qd!eejH6hBps;}Gmkvr%&?L7Zt%#VjG=Du%u+^5+D#d<)~od$cOyOYUXm=3JO#{s+gR&E5Mg8EWn}dxE>k<7 z;4Mrl8tqWL#eVdNQ`6Dd2wElZx{R3(ep%pEC*bEmj)LHCDDI-2Kv9c5~e%UFf zxJUVDG*<48|J~EWhmWL|MWqbn1+>#X8_hJ;A4nIIV&WM#CPe+B*L=b7QFKamIE)4T?# z{r^YZXZ|$`O}4UI1`LL}1I{f>H^`z!fx^OB<%8viM%F5%P9&SkQ6~=^QBg;nIEnOpJ%gCK4>a4=i?0R1H*NU z@%u_}7dL-`r~!kH88)}EX@e71{!WwutpW=lt0E8K;fr+w|a!@Pq z!1h$Rex>mTXR|#FO9!=&5JgF z0fnXs)wQ_aPH%~;5SHu4M|}lzJ!`2TEr}@KSmFZrSa3Z!n!+|;2>N^UtB9kSmlT=T zjm zGP`~0AGK;w+@NruH%60=&a$p?%bS?7kBg-a4d*V|;W2U`sBMjwupJkks}BA)E_{ym z)F#Kn85*3vZob90m1?sdZ~$)yhpdqQM3t^s;MMiuR<5-cRqU>W5qEs2x-|x!?t}J` zlBW3hF2MG5@YJV-yJfJP^A-009`n4-sQ_ahXLA!9lRxHEJlfDS<`6(YhPb~6o&Prb zW&CR(bD}A0hbxNJNqRvj=4H3V;o7NE5{cgyz;1)qCdwN-zqMjv>ZZoBRn(b(6CNB) z6%LE?9s#QKyOhpIH8h%F!ULwenW2`+u5HX%8aFlgssOIk?bJNa< zGH@^2N6+1)v(fj_2uGnM;i2R+VugzuD+~v?K+0Dwr&?}}ZCcYTKzN}a%TfmNzjV&+ zH#k{?8+Un{yUib6=LS_z>SrWWxNBC?LU`I9p(R$o(u@o4W3$ z^AqS}032;53ES8ZRv6L%el{HVKql}iSl1#}uMkBa`v|{(QBqrs%H{u6j zDVssY1~i2^s}CTY@1n5~R>|sYtspD#cz0VRdBs*dYsjEo+jkOBBG|_{&F%JD`5$Gf z5A+5};@oFMNus;?UnIo2L6*c?wI#uQHbqX3J>h=m(Er1XLzRPsT2Z2FD=gT(p-sB% z!#pnz48y%fej8fjyX#D2fiO9uG4_*buqWp01Gp+z>v}iHM$RjX4&hiGGBPkzf(^)E zPSf^KN*GGMM9}P-&pmbq=#Rn2@}e38F2x}7;=vBz@sX*0ylkN-n>T+RVI8K-pLkhk zwn*gf=f~~BfAc{9(uc_XPL7$dNq(j!@>zgDDBP%FA_cK?a0PB^VjVJ@!TA~W%Xo~P+^VE(8HUd*W6ZtGzAB5TzVWY-DaaRvfM$EyO#OMSXX0xThsYnWYJyTjMPQ}R+ zmvnlo{PFc&B!vHTr^awcmZr}Lmm98B$w!zinC(88Y;n!7eiq|?Uw`HP;j2wytw5ttWRwfC0(Xup zT_pvv^ppebg85t)FpR?-%vXD|&}vBthBQy}e!4)fVg+d2RWpjp;a#k_aM^r#g>PFU zOF}+5&J)2Z+!|j~Q$121^NKSQ(7RT_%Jq_;atoaJsMY3((N@#Z)-C4ijd>{YKR>kA z71)>&UPTU5Muz+HaES@CBST`fSCgXu#8!mH#C6H>dH}ZFXlNWu(hjPmlD zbf|1*2#GPOn5!L>b40JDvSSZ|ml$Ouh?*2wlF3Pf(%m(%)TN6IxEaq4Tnx8W5 zsIhh+eniwlPJ6hP9IsM|DQnck!1sp=93tP&s)fr zW4?Gc+KaFYF{*}8PUONeE|7R^rn%@|S_Jy+VUJ`NeDh9ou?pzV;&3ybmGjuezr3#+ zVJ(70Lqf~t!|52=I#XIC@o-3vQ=A-E%?9>W7yKltHK==6VA7H*mZ6)gNK38iL2FSg zVGCZJvhjj9kv?m=GijY+R-_ZOOCeRQ6{Gu9A8NIzU%PnIOvt3^j8OBqBa@yyb}>z| z#9JUlrA@Tsljrvo86H(oxohTFH;if8%%E*R-edXT<}<**MVK!?e(I{Fp+$1fp~FiV z1Hm{y3@58xA|y7Ei~=6{`4)(%yhza%3qfP08&^~#PSB}C`knT`7c3dmVI8-oD<|%% zzCI5=`vcOa+`!=uS32pmuO;!IG(g#iTC@TzXLOK=YuYiaPt0-3(0Z_^tQ*PBkZ4L@elT#edUdlt?=JaREF9r)_v>@n0YlEa48G}x%7 zreoo{F1e-ua7Yyy!23qWEzP{E<45^HpRp<8{T|7nlbTP|8T!nJW54SqZs>F^>%pdj z{Ecg_#jzAcyM}bf)5*yNCPq2gw8FbGcU`I%t-e_dEn2Ra#afTWK7^8^R`H35S)EU9 zz+)_(L~5Ke{Rd~;gh~5x->!a9SATKdu$g!0ivUs(9t;Qw@h`T?$j;8{*BhWbWxK?L z=9AapSMx-W4b}@qqEbYxRjj0%JauAmi4vwEdpyJ*WVzIJ#s$0-VKFju&CAEvd8^~) z>Qq7dGl_bvIK1Y){W~L)Bu>mWD8I#`aWGPw(jg0S@ThaznLE>$i@sx`*-=DleIzI- z{Eg{!Q>pNADK%x@czDzrWeHVf(6S}u6)G_&U1rn6$h#_T3bq{(HyGtvabXPb2@R5Z zpRwQr5w!BsfJ>KQ+?c)kKy~Y7mtgZ5o=KcTJ~b>=l`NBS_)$`xU=@M|TqJ{{8`<_f z^y5(B`9Wh}i%Ug8qqY@C*Gan;*{Ogi9fDjB+YzY80B(p;u49(1wrT%L-~Cp#xb_+I z7@$gak}0)&Uc6nZdecFRM$#o+aXn`gIfuxWCu)l+vLroM9)41OH(u$>Ph7u zpNdjLu#M{|Rwk=<&txUMG8OC9&ntH8NcAp-fADZ z*=69ut76~h7*=n!2$vTVXp>$)%&B+PabCvZAMFO1+{2V&#cCY}oDu5LriiV8T1{f> zx*%2(KTW%J2o8o-uliaz?`!6)hC{(;C#o^bO9jR9s$_&(1=-?|2gTkNUTlcXW^UNaU3Z^)o(`Qnstc+PpWL2Qv4zzr(R$3` zXNi=Q+1elsf;z3oC0B71JkZaE%TXVvz4x-OE1>yy(6kJS)tq8SrQYeWBX!?&3Bmc& z1MzZm?e?|m^((~VM|JO`rh-%ssr+ByBL9#Vd~l%7u~{mhBFZ~Da@MK#m{KmxDb*h zLySw0xT;F-x?P8ud0CF(=v)W3f2uR)@fWG%JGUdFL8E?$cZ^Myu;1y1=uJ!_Hzi$7 zZ0?#@lC76{MTWMmT5&k$ortYZE7dyfH)&)5@}431frqHPfuH0r2@=nOI)I==KRbH{SuhBeX{NZhqb6ODz3^#`j`%fRhf*~_#Mfw2Nl*$eAYqi+CCIX%5Q|_2cQl(+J z+Od0x#$2<_tr2+9xa%9CZ=pMP;AQVLp=0gv341@LoHQ+S;T?MhIF)|o`>Zjs z;yH8H%uqeej%S($yTce$Sx~H_elcD${$SO#B0}Z9G5e<46vz-x6!Py_Z>CtKwAVZHH*eiDIu5DIj~cuj1V zZV8S8D0t%}j-&y}DuO5$Wgr(H&DT-iV-Qyb%+?PEmC}dX(r!4K3rf@{>Nr$ekQ}nZ zIqunhk8xZnwPeG%YG@MXH>StYUy#y1ks#59Ne~!8BZ)A%8^Sc1eh4R^`)wud(Wn8V9HZ6sG$h&B^aVdHV zQHY53&6$4ii=-I-+L1UZPLTB$=V74_Tg_I)@fRX(w;|#RFt9M|pS_&|Kfs8CPJ<*9 zmAJyf#%rOM%4~1o;m+-ro!bYx*6fd;H@bU4?7XV%Tzp#j%i-1y%OTr>t(PxP7#8EF z!J(s0NH`BCgo?E-=Rjzp&61QAqd!i|>RBPH1fAk)!ckeJV>)~+d(3I0tbn4T4!Im?LL%$TT z3CpxXYc2y>YxZmB+YF^PqpN7y4@s8`AW0%pM3FGHP$@;^!$#^S3G^z%Q#C`6io(75*RnRhR zi0Z;uQ5J;%V9XA2!yfIx-Kmt>C z^RrPf`%w2wLBC_jJZpy7$PJR zT%lo#+)E;~w)rt%?_fLtlcXFTFV~xG-sGPhwI|(uno}Qr?ElZ+EHjl0hctv$IJbE z`80J6E#(a2 zE5=9yH4@pNULk9c^TX8e{h%Kd`92pNN z1L?Gh94|eLMUBCJ1dfWxy*?fOR3syD6e^7S`VAz@*U{~vWH1S`E9*zs`{(^A3AJtT zb^%4BSVdpCREE@ZGOOv#+?Ok3&)cqi&Q`|&k)rHO9$x}oMkS#nC8Ntl3+Ij@2-%U) zt2TbG`(q10V8J9u!^?p^v)2|#ygVKs=a-8xv#zJ)nbdJl(!3(kij=NT)>N>w;W`0} z0+JNb#%v^h2CZ@hpHeHtwv!ollKXQ?8Q2mum}bd)ka1Ha;B(M+dQ8rkvv*vrRa!DL zPNhcL7E0~e?1Qzabmay;szl%sb1sBWCGN3t(W}I^D;RdH0fsoWqBF(cVq##_Ntx&4 znbmt!N=;@($9uIs7<=E$=y+s^6;a-xc0n!gdAZp&_lLnQ^ydYo`-|oSmNtRi_0P>G+*R_b4Jy7* z7#^ZP(aQc?a|y3=P`UeI@+QZN^w)!J%h$W(Rb@7;Bg--H!p)y?#}04C#b(Xz&W_W^ z>eOss!I*{hXnapfjoMeE+D6Bek~(zo)NpCOHYR{p-`;?jfu6s=|B@Qn$#e(i18q?M zT83Jz_;dQI^7jhQe^vniC~+WtM-%HmSO2{@T-#p(zpVkN3BZ8-e{2}uEGrWy0P-WG zuV;a0NV}b~Zs^F1&Wt3-vc^9C=`ydA6sU=sC!HOFige#wA>%UcceB>KjoWZni1gll zifs>cQ4AEY8*=SbJok3jSP7J<=$HldP_gd?YdgF&dJGQQkayui7t*yT1O(@VqYK~YnVVEtMWrb{yr@n{x^m=?9i1)*x{N^` zIl@Ttk7M@&Gc$X=Jqc6V4WklV!Q=%;uVl=F{oAP?d~Es>j+gZ1ZvUUl5B~{Hh&`*q zp8y!e0kRAnz#hQz{5E@&U;Jy5`wQKU#8JyWCM40@ke8rzANpk<1*vUoDWc`MXCT8E z!v54?)9E=s{SBc_0sHr#aC>qDZ*#FN>YoWbFCqfC%+QS`VL)tlWXy2E33pCUvB7Ga z#BRq60+FGc22RFy;l0K#P#SY6k#*^)%`={(l2sZx%%!r$ak(2|gLbKeNjs_#S_Lyo z4wB%U5UCgd@KQl}T|pJ#`N+I1FVDKDZHnDZqSMjhnangOG11gh8ZF}E9N_jv` zXjU~cP*g<^5G5SNDEkSj<0VA58O9UZyXJ=S5hWQ!aY4lK_{-k-^#=~U3>r-#ffMWT z2hBKggJR{SamO1@A6Y_;Kt&C58Z%-uD*<+bl~uSBE5vk##)0gVcMsy@im3)*rsF}y z8NJ}7B=CLFUc`Gw5Z;qVD^3U2V7N0HRy9-S*f6!FMwfL3Yq*raG{(a-riNo98RwnT zN+OOUX=?Zi)AYoJBc|}FOI!Gh22WJ^x%Y%Mm0l(0fokzGBH?zu@$gmI*G}mO%#Fls0-ih?O{x|2kVlFOLn;?ZOCmE3Nqhh;4cb_*k zB7qJ`A-ED2GGh7s6*NFZluf*r+6o=%ZkEJ#)a_kBWA?Efi=@HZ?{ByvCx-x(SR2!; zqwOlTYdU*zE_VJ2_>*Z=ylb9it6PU%(P=fZ?ZF589f6m-^|3;(2VezhU=Xxlu{&7VpCEJ$x2zmt~#+vnE{^xu-b zEfD_R-gK33+utOZ|GddRMVN2Kzw-g^@h`Xkk!1ea+CQa)e|hHtp6q`o{g<%tPxn71 za(}r)01C)o?*A6d{kNNbQ-b~8-fr3eK(GO-&wq3HCrUHqe=ApiHuTTB_Lk>2N)LZ` z`(IV=&p3aw;D5zI18{`@i4*@b!k^U9UlEA0e@FN?k@Wxn-ci5%z^e{`?Qbpk73G&~L_r$-ZOj3L0{D{!WE-G*JNtiJc|H^X literal 0 HcmV?d00001 diff --git a/oec-ascend/oec/resource/test_sequence.py b/oec-ascend/oec/resource/test_sequence.py new file mode 100644 index 00000000..911d3085 --- /dev/null +++ b/oec-ascend/oec/resource/test_sequence.py @@ -0,0 +1,19 @@ +# encoding: utf-8 +# 安装order中分组配置的顺序执行测试用例,True并发执行测试,False串行执行测试,分组内的测试用例不分先后顺序 +test_sequence = { + ("运行环境", "环境信息"): False, + ("运行环境", "运行依赖"): False, + ("运行环境", "CANN信息"): False, + ("应用开发", "基础功能"): False, + ("应用开发", "算子加速库"): True, + ("应用开发", "媒体处理"): False, + ("算子", "算子编译"): True, + ("算子", "算子开发"): True, + ("模型开发", "模型编译"): False, + ("模型开发", "模型调优"): False, + ("模型开发", "集合通信"): False, + ("集成测试", "ATB"): False, + ("集成测试", "离线推理"): False, + ("集成测试", "在线训练"): False, + ("运行环境", "CANN安装卸载"): False, +} diff --git a/oec-ascend/setup.py b/oec-ascend/setup.py new file mode 100644 index 00000000..4dd24017 --- /dev/null +++ b/oec-ascend/setup.py @@ -0,0 +1,18 @@ +from setuptools import setup, find_packages + +setup( + name="oec-ascend-compatibility", + version="1.0", + packages=find_packages(), + include_package_data=True, + install_requires=["openpyxl", "pandas", "distro", "packaging"], + author="spicy-bittern", + author_email=" ", + description="Ascend Operating System Compatibility Verification Tool", + license="Apache-2.0", + entry_points={ # 定义命令行指令 + "console_scripts": ["oec-ascend = oec.main:main"] # 命令名 = 模块:函数 + }, + keywords="Ascend Operating System Compatibility Verification Tool oec-ascend", + url="https://gitee.com/ascend/tools/tree/master/oec-ascend", +) -- Gitee From 160c4da282c73afd7cc7f19c017d648643c51f1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8D=A4=E8=BE=A3=E9=B2=9C=E9=A6=99?= Date: Tue, 22 Jul 2025 03:25:12 +0000 Subject: [PATCH 3/6] =?UTF-8?q?=E3=80=90oec-ascend=E3=80=91=E6=A1=86?= =?UTF-8?q?=E6=9E=B6=E5=A2=9E=E5=8A=A0=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B?= =?UTF-8?q?=E6=A0=B9=E6=8D=AENPU=E5=9E=8B=E5=8F=B7=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E8=83=BD=E5=8A=9B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- oec-ascend/oec/BaseTest.py | 8 ++++---- oec-ascend/oec/BaseTypes.py | 2 +- oec-ascend/oec/TestContext.py | 8 +++++--- oec-ascend/oec/TestReport.py | 8 ++++++-- oec-ascend/oec/TestUtils.py | 31 +++++++++++++++++++++++++++++-- 5 files changed, 45 insertions(+), 12 deletions(-) diff --git a/oec-ascend/oec/BaseTest.py b/oec-ascend/oec/BaseTest.py index 5ea12473..a0c82611 100644 --- a/oec-ascend/oec/BaseTest.py +++ b/oec-ascend/oec/BaseTest.py @@ -67,7 +67,7 @@ class BaseTest(TestInterface): return self.state not in [State.NOT_RUNNING, State.RUNNING] def can_continue(self): - if self.is_passed(): + if self.is_passed() or self.state == State.UNSUPPORTED: return True if self.is_failed() and self.is_optional(): @@ -76,7 +76,7 @@ class BaseTest(TestInterface): return False def is_failed(self): - if self.state in [State.FAIL, State.TIMEOUT, State.UNSUPPORTED]: + if self.state in [State.FAIL, State.TIMEOUT]: return True return False @@ -163,9 +163,9 @@ class BaseTest(TestInterface): if self.auxiliary and state == State.FAIL: state=State.WARNING - self.context.distribution[self.state] -= 1 + self.context.distribution[self.state] -= self.count() self._state = state - self.context.distribution[state] += 1 + self.context.distribution[state] += self.count() def get_test_content(self): return ( diff --git a/oec-ascend/oec/BaseTypes.py b/oec-ascend/oec/BaseTypes.py index 9fae51db..8cb93a60 100644 --- a/oec-ascend/oec/BaseTypes.py +++ b/oec-ascend/oec/BaseTypes.py @@ -10,7 +10,7 @@ class State(Enum): NOTHING_TO_DO = "nothing to do" PASS = "passed" WARNING = "warning" - UNSUPPORTED = "unsupported" + TIMEOUT = "timeout" FAIL = "failed" diff --git a/oec-ascend/oec/TestContext.py b/oec-ascend/oec/TestContext.py index 6f7e5737..d24e2e2a 100644 --- a/oec-ascend/oec/TestContext.py +++ b/oec-ascend/oec/TestContext.py @@ -38,6 +38,8 @@ class TestContext(object): self.finished = False for state in State: self._states_distribution.setdefault(state, 0) + + self.infomation.setdefault("NPU", "unknow") def set_env(self,env): self._env = env @@ -83,18 +85,18 @@ class TestContext(object): failed = ( self.distribution[State.FAIL] + self.distribution[State.TIMEOUT] - + self.distribution[State.UNSUPPORTED] ) total = len(self.get_used_tests()) - ran = total - self.distribution[State.NOT_RUNNING] - self.distribution[State.RUNNING] + ran = total - self.distribution[State.NOT_RUNNING] - self.distribution[State.RUNNING] \ + - self.distribution[State.WARNING] - self.distribution[State.UNSUPPORTED] if total == 0: return "wait for start" return ( f"total {total}, running {self.distribution[State.RUNNING]}, not running {self.distribution[State.NOT_RUNNING]}, " f"passed {success}, warning {self.distribution[State.WARNING]}, failed {self.distribution[State.FAIL]}, " - f"timeout {self.distribution[State.TIMEOUT]}.\n" + f"timeout {self.distribution[State.TIMEOUT]}, unsupported {self.distribution[State.UNSUPPORTED]}.\n" f"Completion rate {round(ran/total*100,2)}%, pass rate { 0 if ran==0 else round(success/ran*100,2)}%" ) diff --git a/oec-ascend/oec/TestReport.py b/oec-ascend/oec/TestReport.py index 5b64e6a7..b6e49d3d 100644 --- a/oec-ascend/oec/TestReport.py +++ b/oec-ascend/oec/TestReport.py @@ -56,7 +56,11 @@ def gen_report(path: str, context: TestContext): @property def total(self): return sum([test.count() for test in self.tests]) - + + @property + def failed(self): + return sum([test.count() if test.is_failed() else 0 for test in self.tests]) + dic = {} for i in range(len(dft)): dic.setdefault((dft.iat[i, 0], dft.iat[i, 1]), info()) @@ -78,7 +82,7 @@ def gen_report(path: str, context: TestContext): inf = dic[k] if inf.total != 0: dft.loc[k, "测试结果"] = f"{round(inf.passed/inf.total*100,2)}%" - dft.loc[k, "结论"] = "PASS" if inf.passed == inf.total else "FAILED" + dft.loc[k, "结论"] = "PASS" if inf.failed == 0 else "FAILED" for test in inf.tests: g1, g2 = k details.loc[len(details)] = [ diff --git a/oec-ascend/oec/TestUtils.py b/oec-ascend/oec/TestUtils.py index 55b26c92..e6bc216f 100644 --- a/oec-ascend/oec/TestUtils.py +++ b/oec-ascend/oec/TestUtils.py @@ -65,8 +65,23 @@ class ResetEnvTestCase(BaseTest): class NPUTestCase(TestCase): """ - 从Context.infomation中获取和替换cmd中 包围的信息,其中key为需要获取和替换的键,注意左尖括号前需要有白字符 + 1.从Context.infomation中获取和替换cmd中 包围的信息,其中key为需要获取和替换的键,注意左尖括号前需要有白字符 + 2.支持根据NPU型号设置黑名单 NPUTestCase(...,black_list=[r"Ascend310P.*", r"Ascend310B.*"]), 禁用所有310P 310B系列 """ + def __init__(self, black_list=None, *args, **kwargs): + super().__init__(*args, **kwargs) + if black_list is None: + black_list = [] + if isinstance(black_list,str): + black_list = [black_list] + if not isinstance(black_list, list): + raise TypeError("black_list type is not correct!") + self._black_list = black_list + + @property + def black_list(self): + return self._black_list + def replace_cmd_with_info(self, cmd): # 正则表达式匹配:空白字符 + # \s 匹配任何空白字符,[\w]+ 匹配单词字符(字母、数字、下划线) @@ -79,7 +94,19 @@ class NPUTestCase(TestCase): new_cmd = pattern.sub(replacer, cmd) return new_cmd - + def check_npu_in_black_list(self): + if not self.black_list: + return False + npu = self.context.infomation.get("NPU", "unknow") + for name in self.black_list: + if re.fullmatch(name, npu): + return True + return False + def execute_command(self): + if self.check_npu_in_black_list(): + self.set_state(State.UNSUPPORTED) + self.set_reason(f"the npu {self.context.infomation.get('NPU', 'unkonw')} is unsupported in this case.") + return cmd:str = self.replace_cmd_with_info(self.get_cmd()) self.execute_command_with_cmd(cmd) \ No newline at end of file -- Gitee From 0d7ef13752f398e4aa0bb23adc27bfc7bc3bea60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8D=A4=E8=BE=A3=E9=B2=9C=E9=A6=99?= Date: Wed, 23 Jul 2025 07:15:14 +0000 Subject: [PATCH 4/6] =?UTF-8?q?=E3=80=90oec-ascend=E3=80=91=E6=A1=86?= =?UTF-8?q?=E6=9E=B6=E5=B1=8F=E6=98=BE=E4=BF=A1=E6=81=AF=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- oec-ascend/oec/BaseTest.py | 45 ++++++++++++++++++++++ oec-ascend/oec/TestContext.py | 70 ++++++++++++++++++++++++++++------- oec-ascend/oec/Utils.py | 19 ++++++++++ oec-ascend/oec/main.py | 32 +++++++++------- 4 files changed, 139 insertions(+), 27 deletions(-) create mode 100644 oec-ascend/oec/Utils.py diff --git a/oec-ascend/oec/BaseTest.py b/oec-ascend/oec/BaseTest.py index a0c82611..bc883e6c 100644 --- a/oec-ascend/oec/BaseTest.py +++ b/oec-ascend/oec/BaseTest.py @@ -2,6 +2,9 @@ import os import threading import inspect +import time +from datetime import datetime +from oec.Utils import elapsed_time_str from typing import Tuple # 兼容python3.7 from logging import getLogger from oec.TestInterface import TestInterface @@ -35,6 +38,9 @@ class BaseTest(TestInterface): self._lock = threading.Lock() self._filename = None self._lineno = None + self._start_time = datetime.now() + self._end_time = self._start_time + self._update_count = 0 for stack in inspect.stack()[1:]: if stack.function != "__init__": self._filename = stack.filename @@ -107,6 +113,42 @@ class BaseTest(TestInterface): def get_origin_path(self): return self._filename + + def update_elapsed_time(self): + if self.state in [State.RUNNING, State.NOT_RUNNING]: + self._end_time = datetime.now() + + @property + def elapsed_time(self): + delta = self._end_time - self._start_time + return delta + + @property + def elapsed_time_str(self): + return elapsed_time_str(self.elapsed_time) + + def update_console_message(self): + self.update_elapsed_time() + message = f"{self.name} {self.elapsed_time_str}" + anime = "⠋⠙⠸⠴⠦⠇" + if self.is_failed(): + message = f"\033[31m✕ {message} - {self.get_reason()}\033[0m" + elif self.is_passed(): + message = f"\033[32m✓ {message}\033[0m" + elif self.state == State.WARNING: + message = f"\033[33m! {message} - {self.get_reason()}\033[0m" + elif self.state == State.UNSUPPORTED: + message = f"\033[0m✓ {message} - {self.get_reason()}\033[0m" + else: + charactor = anime[self._update_count % len(anime)] + message = f"{charactor} {message}\033[0m" + + self.context.set_message(self.name, message) + self._update_count += 1 + + def del_console_message(self): + self.context.del_message(self.name) + def run(self): self._lock.acquire() if self.is_finished() and self.can_cached(): @@ -115,6 +157,8 @@ class BaseTest(TestInterface): ) return self.set_state(State.NOT_RUNNING) + self._start_time = datetime.now() + self.update_elapsed_time() try: self.execute_command() except Exception as e: @@ -124,6 +168,7 @@ class BaseTest(TestInterface): logger.debug( f"{self.name} is {self.state.value}, reason: {self.get_reason()}" ) + self.update_elapsed_time() self._lock.release() def execute_command(self): diff --git a/oec-ascend/oec/TestContext.py b/oec-ascend/oec/TestContext.py index d24e2e2a..763cf362 100644 --- a/oec-ascend/oec/TestContext.py +++ b/oec-ascend/oec/TestContext.py @@ -1,12 +1,13 @@ # encoding: utf-8 import os import random +import time from datetime import datetime import threading from importlib import import_module from oec.BaseTypes import State from logging import getLogger -import pandas as pd +from oec.Utils import elapsed_time_str from oec.TestInterface import TestInterface @@ -35,15 +36,34 @@ class TestContext(object): self._infomation = {} self._states_distribution = {} self._env = os.environ.copy() + self._console_output = {} + self._console_position = [] self.finished = False + self._running_tests = [] + self._start_time = datetime.now() for state in State: self._states_distribution.setdefault(state, 0) self.infomation.setdefault("NPU", "unknow") + self.set_message("distribution", "") + self.set_message("rate", "") + def set_env(self,env): self._env = env + def set_message(self, key, message:str): + if key not in self._console_output: + self._console_position.append(key) + self._console_output[key] = message + + def del_message(self, key): + if key not in self._console_output: + return + del self._console_output[key] + self._console_position.remove(key) + + @property def env(self): return self._env @@ -80,12 +100,8 @@ class TestContext(object): def data_path(self): return self._data_path - def get_state_distribution_str(self): + def update_state(self): success = self.distribution[State.PASS] + self.distribution[State.NOTHING_TO_DO] - failed = ( - self.distribution[State.FAIL] - + self.distribution[State.TIMEOUT] - ) total = len(self.get_used_tests()) ran = total - self.distribution[State.NOT_RUNNING] - self.distribution[State.RUNNING] \ @@ -93,12 +109,21 @@ class TestContext(object): if total == 0: return "wait for start" - return ( + self.set_message("distribution", f"total {total}, running {self.distribution[State.RUNNING]}, not running {self.distribution[State.NOT_RUNNING]}, " f"passed {success}, warning {self.distribution[State.WARNING]}, failed {self.distribution[State.FAIL]}, " - f"timeout {self.distribution[State.TIMEOUT]}, unsupported {self.distribution[State.UNSUPPORTED]}.\n" - f"Completion rate {round(ran/total*100,2)}%, pass rate { 0 if ran==0 else round(success/ran*100,2)}%" - ) + f"timeout {self.distribution[State.TIMEOUT]}, unsupported {self.distribution[State.UNSUPPORTED]}.") + self.set_message("rate", + f"Completion rate {round(ran/total*100,2)}%, pass rate { 0 if ran==0 else round(success/ran*100,2)}% - {elapsed_time_str(datetime.now() - self._start_time)}") + + for test in self._running_tests: + test.update_console_message() + + + def get_state_distribution_str(self): + self.update_state() + all = [self._console_output[k] for k in self._console_position] + return '\n'.join(all) @property def relative_output(self): @@ -177,21 +202,38 @@ class TestContext(object): logger.debug(order_list) self._test_order = order_list self._used_tests = used_test - + + def clear_unimportented_messages(self, items): + time.sleep(5) + for test in items: + if not test.is_failed() and not test.state == State.WARNING: + test.del_console_message() + def run_tests(self): self.distribution[State.NOT_RUNNING] = len(self.get_used_tests()) order_list = self.test_order - for item in order_list: + self._start_time = datetime.now() + final_thread = None + for items in order_list: threads = [] - for test in item: + self._running_tests = items + for test in items: t = threading.Thread(target=test.run, name=test.name) t.start() threads.append(t) for t in threads: t.join() - for test in item: + self.update_state() + final_thread = threading.Thread(target=self.clear_unimportented_messages, args=(items,)) + final_thread.start() + self._running_tests = [] + + for test in items: if not test.can_continue(): + final_thread.join() return State.FAIL + if final_thread: + final_thread.join() return State.PASS def get_used_tests(self): diff --git a/oec-ascend/oec/Utils.py b/oec-ascend/oec/Utils.py new file mode 100644 index 00000000..115a7aac --- /dev/null +++ b/oec-ascend/oec/Utils.py @@ -0,0 +1,19 @@ +from datetime import timedelta + +def elapsed_time_str(delta:timedelta): + + hours = delta.seconds // 3600 + minutes = (delta.seconds // 60) % 60 + seconds = round(delta.seconds % 60 + delta.microseconds // 10000 * 0.01,1) + x = [delta.days,hours,minutes,seconds] + y = ['d','h','m','s'] + for i in range(len(x)): + if x[i] > 0 or i == len(x) - 1: + x = x[i:] + y = y[i:] + break + + result = "" + for i in range(len(x)): + result += f"{x[i]}{y[i]}" + return result \ No newline at end of file diff --git a/oec-ascend/oec/main.py b/oec-ascend/oec/main.py index 87cd9192..3a3b84e6 100644 --- a/oec-ascend/oec/main.py +++ b/oec-ascend/oec/main.py @@ -108,21 +108,27 @@ def get_absolute_out_path(output): return output_path - - - def print_state(context: TestContext): - logger.info(context.get_state_distribution_str()) - dynamic = ["|", "/", "-", "\\"] - count = 0 - while not context.finished: - logger.info( - f"\033[2A\033[K{context.get_state_distribution_str()} {dynamic[count % len(dynamic)]}\033[K" - ) - count += 1 - time.sleep(0.2) - logger.info(f"\033[2A\033[K{context.get_state_distribution_str()}\033[K") + last_lines = [] + def update_state(): + nonlocal last_lines + state = context.get_state_distribution_str() + lines = state.split('\n') + logger.info(f"\033[{len(last_lines) + 1}A") + for v in lines: + logger.info(f"{v}\033[K") + for _ in range(len(lines),len(last_lines)): + logger.info(f"\033[K") + delta_lines = len(last_lines) -len(lines) + if delta_lines > 0: + logger.info(f"\033[{delta_lines + 1}A") + last_lines = lines + while not context.finished: + update_state() + time.sleep(0.125) + update_state() + def enable_ansi_windows(): """在 Windows 上启用 ANSI 转义序列支持""" if sys.platform == "win32": -- Gitee From c110b0ca9ed331cf0344229acf37b57b5fe05945 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8D=A4=E8=BE=A3=E9=B2=9C=E9=A6=99?= Date: Wed, 23 Jul 2025 08:04:21 +0000 Subject: [PATCH 5/6] =?UTF-8?q?=E3=80=90oec-ascend=E3=80=91=E6=A1=86?= =?UTF-8?q?=E6=9E=B6=E5=B1=8F=E6=98=BE=E4=BF=A1=E6=81=AF=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- oec-ascend/oec/BaseTest.py | 4 ++-- oec-ascend/oec/TestContext.py | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/oec-ascend/oec/BaseTest.py b/oec-ascend/oec/BaseTest.py index bc883e6c..19e1782f 100644 --- a/oec-ascend/oec/BaseTest.py +++ b/oec-ascend/oec/BaseTest.py @@ -133,10 +133,10 @@ class BaseTest(TestInterface): anime = "⠋⠙⠸⠴⠦⠇" if self.is_failed(): message = f"\033[31m✕ {message} - {self.get_reason()}\033[0m" - elif self.is_passed(): - message = f"\033[32m✓ {message}\033[0m" elif self.state == State.WARNING: message = f"\033[33m! {message} - {self.get_reason()}\033[0m" + elif self.is_passed(): + message = f"\033[32m✓ {message}\033[0m" elif self.state == State.UNSUPPORTED: message = f"\033[0m✓ {message} - {self.get_reason()}\033[0m" else: diff --git a/oec-ascend/oec/TestContext.py b/oec-ascend/oec/TestContext.py index 763cf362..edadcddf 100644 --- a/oec-ascend/oec/TestContext.py +++ b/oec-ascend/oec/TestContext.py @@ -206,8 +206,9 @@ class TestContext(object): def clear_unimportented_messages(self, items): time.sleep(5) for test in items: - if not test.is_failed() and not test.state == State.WARNING: - test.del_console_message() + if test.is_failed() or test.state == State.WARNING: + continue + test.del_console_message() def run_tests(self): self.distribution[State.NOT_RUNNING] = len(self.get_used_tests()) -- Gitee From a1c25a58a55cb70f392f6a44e9df21d0911dc552 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8D=A4=E8=BE=A3=E9=B2=9C=E9=A6=99?= Date: Wed, 23 Jul 2025 08:53:41 +0000 Subject: [PATCH 6/6] =?UTF-8?q?=E3=80=90oec-ascend=E3=80=91=E6=A1=86?= =?UTF-8?q?=E6=9E=B6=E5=B1=8F=E6=98=BE=E4=BF=A1=E6=81=AF=E4=BC=98=E5=8C=96?= =?UTF-8?q?-=E5=8E=BB=E6=8E=89=E9=97=AA=E7=83=81=E5=85=89=E6=A0=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- oec-ascend/oec/main.py | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/oec-ascend/oec/main.py b/oec-ascend/oec/main.py index 3a3b84e6..1c8dccce 100644 --- a/oec-ascend/oec/main.py +++ b/oec-ascend/oec/main.py @@ -108,21 +108,41 @@ def get_absolute_out_path(output): return output_path +class HideCursor: + def __init__(self): + self.state = False + + def hide(self): + self.state = True + print("\033[?25l",end="",flush=True) + + def __del__(self): + if not self.state: + return + print("\033[?25h",end="",flush=True) + +hider = HideCursor() def print_state(context: TestContext): - last_lines = [] + hider.hide() #隐藏光标显示 + last_lines_len = 0 def update_state(): - nonlocal last_lines + nonlocal last_lines_len state = context.get_state_distribution_str() lines = state.split('\n') - logger.info(f"\033[{len(last_lines) + 1}A") + lines_len = 0 + logger.info(f"\033[{last_lines_len + 1}A") for v in lines: - logger.info(f"{v}\033[K") - for _ in range(len(lines),len(last_lines)): + terminal_colums, terminal_lines= os.get_terminal_size() + for l in range(0, len(v), terminal_colums): + logger.info(f"{v[l:l + terminal_colums]}\033[K") + lines_len += 1 + + for _ in range(lines_len, last_lines_len): logger.info(f"\033[K") - delta_lines = len(last_lines) -len(lines) + delta_lines = last_lines_len -lines_len if delta_lines > 0: logger.info(f"\033[{delta_lines + 1}A") - last_lines = lines + last_lines_len = lines_len while not context.finished: update_state() @@ -192,4 +212,7 @@ def main(): if __name__ == "__main__": - main() + try: + main() + finally: + del hider #恢复光标显示 -- Gitee