From a1cd9b966c5781f14c6b3bc929f30074edc52ebc Mon Sep 17 00:00:00 2001 From: lishangfan Date: Wed, 24 Sep 2025 16:24:17 +0800 Subject: [PATCH] bishengcc to bisheng --- .../add/AddCustom/op_kernel/CMakeLists.txt | 2 +- .../ReduceSumCustom/op_kernel/CMakeLists.txt | 2 +- atvc/examples/ops_pytorch/add/README.md | 29 ++++++++++--------- atvc/examples/ops_pytorch/add/run.sh | 25 ++++++++++++---- .../examples/ops_pytorch/reduce_sum/README.md | 29 ++++++++++--------- atvc/examples/ops_pytorch/reduce_sum/run.sh | 23 +++++++++++---- atvc/examples/run_examples.sh | 29 +++++++++++++------ .../include/broadcast/broadcast_op_template.h | 11 +++---- .../broadcast/utils/broadcast_buf_pool.h | 17 +++++------ atvc/include/common/kernel_utils.h | 5 ---- atvc/include/elewise/elewise_op_template.h | 9 +++--- atvc/include/reduce/reduce_op_template.h | 12 ++++---- atvc/include/reduce/utils/reduce_buf_pool.h | 6 ++-- 13 files changed, 115 insertions(+), 84 deletions(-) diff --git a/atvc/examples/ops_aclnn/add/AddCustom/op_kernel/CMakeLists.txt b/atvc/examples/ops_aclnn/add/AddCustom/op_kernel/CMakeLists.txt index 178359e4..c58e5e11 100644 --- a/atvc/examples/ops_aclnn/add/AddCustom/op_kernel/CMakeLists.txt +++ b/atvc/examples/ops_aclnn/add/AddCustom/op_kernel/CMakeLists.txt @@ -8,5 +8,5 @@ if ("${CMAKE_BUILD_TYPE}x" STREQUAL "Debugx") add_ops_compile_options(ALL OPTIONS -g -O0) endif() -add_ops_compile_options(ALL OPTIONS -g -O0 --cce-aicore-block-local-init -w -I ${ATVC_PATH}) +add_ops_compile_options(ALL OPTIONS -w -I ${ATVC_PATH}) add_kernels_compile() \ No newline at end of file diff --git a/atvc/examples/ops_aclnn/reduce_sum/ReduceSumCustom/op_kernel/CMakeLists.txt b/atvc/examples/ops_aclnn/reduce_sum/ReduceSumCustom/op_kernel/CMakeLists.txt index 72549d35..76172559 100644 --- a/atvc/examples/ops_aclnn/reduce_sum/ReduceSumCustom/op_kernel/CMakeLists.txt +++ b/atvc/examples/ops_aclnn/reduce_sum/ReduceSumCustom/op_kernel/CMakeLists.txt @@ -8,5 +8,5 @@ if ("${CMAKE_BUILD_TYPE}x" STREQUAL "Debugx") add_ops_compile_options(ALL OPTIONS -g -O0) endif() -add_ops_compile_options(ALL OPTIONS -g -O0 --cce-aicore-block-local-init -w -I ${ATVC_PATH}) +add_ops_compile_options(ALL OPTIONS -w -I ${ATVC_PATH}) add_kernels_compile() \ No newline at end of file diff --git a/atvc/examples/ops_pytorch/add/README.md b/atvc/examples/ops_pytorch/add/README.md index 8cb593d4..d05f80b8 100644 --- a/atvc/examples/ops_pytorch/add/README.md +++ b/atvc/examples/ops_pytorch/add/README.md @@ -108,20 +108,21 @@ z = x + y atvc_path=$ATVC_PATH fi - # 使用bishengcc进行编译PyTorch算子 - bishengcc pytorch_ascendc_extension.cpp \ - -arch Ascend910B1 \ - -I${torch_location}/include \ - -I${torch_location}/include/torch/csrc/api/include \ - -I${python_include} \ - -I${atvc_path} \ - -I${torch_npu_location}/include \ - -L${torch_location}/lib \ - -L${torch_npu_location}/lib \ - -L${python_lib} \ - -ltorch -ltorch_cpu -lc10 -ltorch_npu -lpython3 -ltorch_python \ - -o libascendc_pytorch.so \ - -shared + # 使用bisheng进行编译PyTorch算子 + bisheng -x cce pytorch_ascendc_extension.cpp \ + -D_GLIBCXX_USE_CXX11_ABI=0 \ + -I${torch_location}/include \ + -I${torch_location}/include/torch/csrc/api/include \ + -I${python_include} \ + -I${atvc_path} \ + -I${torch_npu_location}/include \ + -L${torch_location}/lib \ + -L${torch_npu_location}/lib \ + -L${python_lib} \ + -L_ASCEND_INSTALL_PATH/lib64 \ + -ltorch -ltorch_cpu -lc10 -ltorch_npu -lpython3 -ltorch_python \ + -shared -cce-enable-plugin --cce-aicore-arch=dav-c220 -fPIC -ltiling_api -lplatform -lm -ldl \ + -o libascendc_pytorch.so # 执行测试用例 python3 run_op.py diff --git a/atvc/examples/ops_pytorch/add/run.sh b/atvc/examples/ops_pytorch/add/run.sh index bd56049e..217025b6 100644 --- a/atvc/examples/ops_pytorch/add/run.sh +++ b/atvc/examples/ops_pytorch/add/run.sh @@ -1,7 +1,7 @@ #!/bin/bash -# Copyright (c) 2025 Huawei Technologies Co., Ltd. +# Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. # This file is a part of the CANN Open Software. -# Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). +# Licensed under CANN Open Software License Agreement Version 2.0 (the "License"). # Please refer to the License for details. You may not use this file except in compliance with the License. # THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, # INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. @@ -25,8 +25,20 @@ rm -rf *.json rm -rf libascendc_pytorch.so -bishengcc pytorch_ascendc_extension.cpp \ - -arch Ascend910B1 \ +if [ -n "$ASCEND_INSTALL_PATH" ]; then + _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH +elif [ -n "$ASCEND_HOME_PATH" ]; then + _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH +else + if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then + _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest + else + _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest + fi +fi + +bisheng -x cce pytorch_ascendc_extension.cpp \ + -D_GLIBCXX_USE_CXX11_ABI=0 \ -I${torch_location}/include \ -I${torch_location}/include/torch/csrc/api/include \ -I${python_include} \ @@ -35,9 +47,10 @@ bishengcc pytorch_ascendc_extension.cpp \ -L${torch_location}/lib \ -L${torch_npu_location}/lib \ -L${python_lib} \ + -L_ASCEND_INSTALL_PATH/lib64 \ -ltorch -ltorch_cpu -lc10 -ltorch_npu -lpython3 -ltorch_python \ - -o libascendc_pytorch.so \ - -shared + -shared -cce-enable-plugin --cce-aicore-arch=dav-c220 -fPIC -ltiling_api -lplatform -lm -ldl \ + -o libascendc_pytorch.so python3 run_op.py diff --git a/atvc/examples/ops_pytorch/reduce_sum/README.md b/atvc/examples/ops_pytorch/reduce_sum/README.md index d154ffb5..a61696fd 100644 --- a/atvc/examples/ops_pytorch/reduce_sum/README.md +++ b/atvc/examples/ops_pytorch/reduce_sum/README.md @@ -136,20 +136,21 @@ ReduceSum是对输入tensor的指定轴进行规约累加的计算并输出结 atvc_path=$ATVC_PATH fi - # 使用bishengcc进行编译PyTorch算子 - bishengcc pytorch_ascendc_extension.cpp \ - -arch Ascend910B1 \ - -I${torch_location}/include \ - -I${torch_location}/include/torch/csrc/api/include \ - -I${python_include} \ - -I${atvc_path} \ - -I${torch_npu_location}/include \ - -L${torch_location}/lib \ - -L${torch_npu_location}/lib \ - -L${python_lib} \ - -ltorch -ltorch_cpu -lc10 -ltorch_npu -lpython3 -ltorch_python \ - -o libascendc_pytorch.so \ - -shared + # 使用bisheng进行编译PyTorch算子 + bisheng -x cce pytorch_ascendc_extension.cpp \ + -D_GLIBCXX_USE_CXX11_ABI=0 \ + -I${torch_location}/include \ + -I${torch_location}/include/torch/csrc/api/include \ + -I${python_include} \ + -I${atvc_path} \ + -I${torch_npu_location}/include \ + -L${torch_location}/lib \ + -L${torch_npu_location}/lib \ + -L${python_lib} \ + -L_ASCEND_INSTALL_PATH/lib64 \ + -ltorch -ltorch_cpu -lc10 -ltorch_npu -lpython3 -ltorch_python \ + -shared -cce-enable-plugin --cce-aicore-arch=dav-c220 -fPIC -ltiling_api -lplatform -lm -ldl \ + -o libascendc_pytorch.so # 执行测试用例 python3 run_op.py diff --git a/atvc/examples/ops_pytorch/reduce_sum/run.sh b/atvc/examples/ops_pytorch/reduce_sum/run.sh index bd56049e..8c8c371b 100644 --- a/atvc/examples/ops_pytorch/reduce_sum/run.sh +++ b/atvc/examples/ops_pytorch/reduce_sum/run.sh @@ -1,7 +1,7 @@ #!/bin/bash # Copyright (c) 2025 Huawei Technologies Co., Ltd. # This file is a part of the CANN Open Software. -# Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). +# Licensed under CANN Open Software License Agreement Version 2.0 (the "License"). # Please refer to the License for details. You may not use this file except in compliance with the License. # THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, # INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. @@ -25,8 +25,20 @@ rm -rf *.json rm -rf libascendc_pytorch.so -bishengcc pytorch_ascendc_extension.cpp \ - -arch Ascend910B1 \ +if [ -n "$ASCEND_INSTALL_PATH" ]; then + _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH +elif [ -n "$ASCEND_HOME_PATH" ]; then + _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH +else + if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then + _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest + else + _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest + fi +fi + +bisheng -x cce pytorch_ascendc_extension.cpp \ + -D_GLIBCXX_USE_CXX11_ABI=0 \ -I${torch_location}/include \ -I${torch_location}/include/torch/csrc/api/include \ -I${python_include} \ @@ -35,9 +47,10 @@ bishengcc pytorch_ascendc_extension.cpp \ -L${torch_location}/lib \ -L${torch_npu_location}/lib \ -L${python_lib} \ + -L_ASCEND_INSTALL_PATH/lib64 \ -ltorch -ltorch_cpu -lc10 -ltorch_npu -lpython3 -ltorch_python \ - -o libascendc_pytorch.so \ - -shared + -shared -cce-enable-plugin --cce-aicore-arch=dav-c220 -fPIC -ltiling_api -lplatform -lm -ldl \ + -o libascendc_pytorch.so python3 run_op.py diff --git a/atvc/examples/run_examples.sh b/atvc/examples/run_examples.sh index 6a0c4367..a3619492 100644 --- a/atvc/examples/run_examples.sh +++ b/atvc/examples/run_examples.sh @@ -1,24 +1,24 @@ #!/bin/bash -# Copyright (c) 2025 Huawei Technologies Co., Ltd. +# Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. # This file is a part of the CANN Open Software. -# Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). +# Licensed under CANN Open Software License Agreement Version 2.0 (the "License"). # Please refer to the License for details. You may not use this file except in compliance with the License. # THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, # INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. # See LICENSE in the root of the software repository for the full text of the License. # ====================================================================================================================== +set -e + CURRENT_DIR=$( cd $(dirname ${BASH_SOURCE:-$0}) pwd ) if command -v bishengcc; then - COMPILE_TOOL=bishengcc -elif command -v ascc; then - COMPILE_TOOL=ascc + COMPILE_TOOL=bisheng else - echo "Error: Cannot find bishengcc/ascc compiling tool, please check cann package version or set up envrionment first." + echo "Error: Cannot find bisheng compiling tool, please check cann package version or set up envrionment first." exit 1 fi @@ -43,16 +43,27 @@ function parse_run_mode(){ # 根据不同run-mode执行不同的操作 function compile_operator(){ + if [ -n "$ASCEND_INSTALL_PATH" ]; then + _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH + elif [ -n "$ASCEND_HOME_PATH" ]; then + _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH + else + if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then + _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest + else + _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest + fi + fi cd $ATVC_HOME_DIR/examples/$TEST_NAME if [ -z "$RUN_MODE" ]; then echo "Executing with npu mode" - ${COMPILE_TOOL} -arch Ascend910B1 $TEST_NAME.cpp -o $TEST_NAME -I ${ATVC_HOME_DIR}/include -I ${CURRENT_DIR}/common + ${COMPILE_TOOL} -x cce -cce-enable-plugin --cce-aicore-arch=dav-c220 $TEST_NAME.cpp -o $TEST_NAME -I ${ATVC_HOME_DIR}/include -I ${CURRENT_DIR}/common -ltiling_api -lplatform -lm -ldl -L${_ASCEND_INSTALL_PATH}/lib64 elif [ "$RUN_MODE" = "debug_print" ]; then echo "Executing with debug_print mode" - ${COMPILE_TOOL} -arch Ascend910B1 $TEST_NAME.cpp -o $TEST_NAME -I ${ATVC_HOME_DIR}/include -I ${CURRENT_DIR}/common -DATVC_DEBUG_MODE=1 + ${COMPILE_TOOL} -x cce -cce-enable-plugin --cce-aicore-arch=dav-c220 $TEST_NAME.cpp -o $TEST_NAME -I ${ATVC_HOME_DIR}/include -I ${CURRENT_DIR}/common -ltiling_api -lplatform -lm -ldl -L${_ASCEND_INSTALL_PATH}/lib64 -DATVC_DEBUG_MODE=1 elif [ "$RUN_MODE" = "profiling" ]; then echo "Executing with profiling mode" - ${COMPILE_TOOL} -arch Ascend910B1 $TEST_NAME.cpp -o $TEST_NAME -I ${ATVC_HOME_DIR}/include -I ${CURRENT_DIR}/common -DATVC_DEBUG_MODE=2 + ${COMPILE_TOOL} -x cce -cce-enable-plugin --cce-aicore-arch=dav-c220 -arch Ascend910B1 $TEST_NAME.cpp -o $TEST_NAME -I ${ATVC_HOME_DIR}/include -I ${CURRENT_DIR}/common -ltiling_api -lplatform -lm -ldl -L${_ASCEND_INSTALL_PATH}/lib64 -DATVC_DEBUG_MODE=2 else echo "--npu-mode is an optional parameter and can be left unset. If set, the value must be debug_print or profiling." echo "Execution example: 'bash run_examples.sh $TEST_NAME --run-mode=debug_print'" diff --git a/atvc/include/broadcast/broadcast_op_template.h b/atvc/include/broadcast/broadcast_op_template.h index d4c1f7b0..01ba82e9 100644 --- a/atvc/include/broadcast/broadcast_op_template.h +++ b/atvc/include/broadcast/broadcast_op_template.h @@ -99,6 +99,7 @@ public: return; } this->Process(); + pipeIn.Destroy(); ATVC::Kernel::DebugPrintf("[INFO]:[ATVC][Broadcast] End to run template function.\n"); } @@ -162,11 +163,10 @@ private: if (HAS_POST_COMPUTE) { outputCount_ = PostInputCount + PostTempCount + PostOutputCount; } - bufPool_.template Init(GetTPipePtr(), - inputCount_, // The number of inputs required for double buffer - outputCount_, // The number of calculation results is generally consistent with inputNum - tilingData_->A2 * tilingData_->A12 * DATA_SIZE, // Input Tensor size - tilingData_->A2 * tilingData_->B2 * DATA_SIZE); // Output Tensor Size + bufPool_.template Init(inputCount_, // The number of inputs required for double buffer + outputCount_, // The number of calculation results is generally consistent with inputNum + tilingData_->A2 * tilingData_->A12 * DATA_SIZE, // Input Tensor size + tilingData_->A2 * tilingData_->B2 * DATA_SIZE); // Output Tensor Size } template @@ -427,6 +427,7 @@ private: GM_ADDR src_; GM_ADDR dst_; + AscendC::TPipe pipeIn; AscendC::GlobalTensor srcGlobal_; AscendC::GlobalTensor dstGlobal_; BroadcastCompute compute_; diff --git a/atvc/include/broadcast/utils/broadcast_buf_pool.h b/atvc/include/broadcast/utils/broadcast_buf_pool.h index 02161524..9f2b5a31 100644 --- a/atvc/include/broadcast/utils/broadcast_buf_pool.h +++ b/atvc/include/broadcast/utils/broadcast_buf_pool.h @@ -36,8 +36,7 @@ public: __aicore__ inline BroadcastBufPool() {}; template - __aicore__ inline void Init(AscendC::TPipe *pipeIn, - int32_t inputNum, // The number of inputs required for doublebuff + __aicore__ inline void Init(int32_t inputNum, // The number of inputs required for doublebuff int32_t computeNum, // The number of calculation results is generally consistent with inputNum int32_t inBlockLen, // Basic input block size for one calculation int32_t outBlockLen) @@ -52,7 +51,6 @@ public: inputNum *= ATVC::CONST2; computeNum *= ATVC::CONST2; } - pipe_ = pipeIn; constexpr int32_t eleSize = static_cast(sizeof(T)); inputNum_ = inBlockLen / eleSize; outputNum_ = outBlockLen / eleSize; @@ -64,7 +62,7 @@ public: computeUnit_.eleSize = eleSize; computeUnit_.offset = inBlockLen * inputNum; // Init buffer - pipe_->InitBuffer(qQue_, poolSize); + GetTPipePtr()->InitBuffer(qQue_, poolSize); } template @@ -92,7 +90,7 @@ public: __aicore__ inline const void SetVecSync(AscendC::LocalTensor &tensor) { uint32_t idx = GetInputTensorIdx(tensor); - event_t eventId = static_cast(pipe_->AllocEventID()); + event_t eventId = static_cast(GetTPipePtr()->AllocEventID()); vecEventId_[idx] = eventId; AscendC::SetFlag(eventId); } @@ -102,14 +100,14 @@ public: { uint32_t idx = GetInputTensorIdx(tensor); AscendC::WaitFlag(vecEventId_[idx]); - pipe_->ReleaseEventID(vecEventId_[idx]); + GetTPipePtr()->ReleaseEventID(vecEventId_[idx]); } template __aicore__ inline const void SetCopyOutSync(AscendC::LocalTensor &tensor) { uint32_t idx = GetOutputTensorIdx(tensor); - event_t eventId = static_cast(pipe_->AllocEventID()); + event_t eventId = static_cast(GetTPipePtr()->AllocEventID()); outEventId_[idx] = eventId; AscendC::SetFlag(eventId); } @@ -119,7 +117,7 @@ public: { uint32_t idx = GetOutputTensorIdx(tensor); AscendC::WaitFlag(outEventId_[idx]); - pipe_->ReleaseEventID(outEventId_[idx]); + GetTPipePtr()->ReleaseEventID(outEventId_[idx]); } template @@ -142,7 +140,7 @@ public: __aicore__ inline const void ResetEvent() { - pipe_->Reset(); + GetTPipePtr()->Reset(); } private: @@ -176,7 +174,6 @@ private: event_t outEventId_[MAX_INPUT_SIZE]; bool isBusyOut_[MAX_INPUT_SIZE] = {false}; AscendC::TBuf<> qQue_; - AscendC::TPipe *pipe_; int32_t inputNum_; int32_t outputNum_; }; diff --git a/atvc/include/common/kernel_utils.h b/atvc/include/common/kernel_utils.h index 6aa2d977..be910f66 100644 --- a/atvc/include/common/kernel_utils.h +++ b/atvc/include/common/kernel_utils.h @@ -15,11 +15,6 @@ #include "common/const_def.h" #include "kernel_operator.h" namespace ATVC { -#ifndef __ASCC_HOST__ -#ifndef __NPU_HOST__ -__BLOCK_LOCAL__ static AscendC::TPipe g_pipe; -#endif // __NPU_HOST__ -#endif // __ASCC_HOST__ template __aicore__ inline void SetEvent(AscendC::HardEvent evt) { diff --git a/atvc/include/elewise/elewise_op_template.h b/atvc/include/elewise/elewise_op_template.h index e1a78c45..819caac1 100644 --- a/atvc/include/elewise/elewise_op_template.h +++ b/atvc/include/elewise/elewise_op_template.h @@ -61,11 +61,11 @@ public: __aicore__ inline void Run(Args&&... args) { ATVC::Kernel::DebugPrintf("[INFO]:[ATVC][EleWise] Start to run template function.\n"); - g_pipe.Reset(); constexpr std::size_t GM_ARGS_COUNT = INPUT_COUNT + OUTPUT_COUNT; GM_ADDR argsArr[INPUT_COUNT + OUTPUT_COUNT]; InitHelper<0>(argsArr, ATVC::Forward(args)...); ATVC::Kernel::DebugPrintf("[INFO]:[ATVC][EleWise] End to run template function.\n"); + pipeIn.Destroy(); } private: @@ -130,13 +130,13 @@ private: // Each in/out/temp uses a pipe for management, // and each pipe manages multiple tensors with consecutive sub addresses if constexpr (INPUT_COUNT > 0) { - g_pipe.InitBuffer(inQueue, param_->nBufferNum, param_->tilingData.tiledCnt * IN_TENSOR_SUM_BYTES); + GetTPipePtr()->InitBuffer(inQueue, param_->nBufferNum, param_->tilingData.tiledCnt * IN_TENSOR_SUM_BYTES); } if constexpr (OUTPUT_COUNT > 0) { - g_pipe.InitBuffer(outQueue, param_->nBufferNum, param_->tilingData.tiledCnt * OUT_TENSOR_SUM_BYTES); + GetTPipePtr()->InitBuffer(outQueue, param_->nBufferNum, param_->tilingData.tiledCnt * OUT_TENSOR_SUM_BYTES); } if constexpr(TEMP_COUNT > 0) { - g_pipe.InitBuffer(tempQueue, param_->tilingData.tiledCnt * TEMP_TENSOR_SUM_BYTES); + GetTPipePtr()->InitBuffer(tempQueue, param_->tilingData.tiledCnt * TEMP_TENSOR_SUM_BYTES); } } // Call CopyIn/CopyOut based on the tiling loop, as well as externally passed Compute calculations @@ -379,6 +379,7 @@ private: // The calculation object passed in by user EleWiseCompute compute_; + AscendC::TPipe pipeIn; }; } } diff --git a/atvc/include/reduce/reduce_op_template.h b/atvc/include/reduce/reduce_op_template.h index 4cdde076..3eca4811 100644 --- a/atvc/include/reduce/reduce_op_template.h +++ b/atvc/include/reduce/reduce_op_template.h @@ -82,6 +82,7 @@ public: Init((GM_ADDR)(param_->workspaceAddr), x, y); Process(); ATVC::Kernel::DebugPrintf("[INFO]:[ATVC][Reduce] End to run template function.\n"); + pipeIn.Destroy(); } public: @@ -93,19 +94,18 @@ public: template __aicore__ inline void Init(GM_ADDR workspace, Args... args) { - pipe_ = GetTPipePtr(); basicBlockLen_ = this->param_->tilingData.basicBlock; - bufPool_.template Init(pipe_, T_BUF_SIZE, PROMOTE_BUF_SIZE, this->param_->tilingData.basicBlock); + bufPool_.template Init(T_BUF_SIZE, PROMOTE_BUF_SIZE, this->param_->tilingData.basicBlock); InitArgsInput<0>(args...); InitArgsWorkspace(workspace); - pipe_->InitBuffer(tempResQue_, RES_BUF_SIZE); + GetTPipePtr()->InitBuffer(tempResQue_, RES_BUF_SIZE); computeRes_ = tempResQue_.Get(); - pipe_->InitBuffer(tempBufQue_, CACHE_BUF_SIZE); + GetTPipePtr()->InitBuffer(tempBufQue_, CACHE_BUF_SIZE); tempBuf_ = tempBufQue_.template Get(); - pipe_->InitBuffer(tempUbQue_, BLOCK_SIZE_BYTE); + GetTPipePtr()->InitBuffer(tempUbQue_, BLOCK_SIZE_BYTE); } /*! @@ -371,7 +371,7 @@ protected: private: ATVC::ReduceParam* param_; // The runtime parameters calculated by CalcReduceTiling API - AscendC::TPipe* pipe_; + AscendC::TPipe pipeIn; AscendC::TBuf<> oriVecQue_; AscendC::TBuf<> tempResQue_; AscendC::TBuf<> tempBufQue_; diff --git a/atvc/include/reduce/utils/reduce_buf_pool.h b/atvc/include/reduce/utils/reduce_buf_pool.h index e6779444..eeba4768 100644 --- a/atvc/include/reduce/utils/reduce_buf_pool.h +++ b/atvc/include/reduce/utils/reduce_buf_pool.h @@ -37,9 +37,8 @@ public: __aicore__ inline ReduceBufPool() {}; template - __aicore__ inline void Init(AscendC::TPipe* pipeIn, int32_t inputNum, int32_t computeNum, int32_t basicBlockLen) + __aicore__ inline void Init(int32_t inputNum, int32_t computeNum, int32_t basicBlockLen) { - pipe_ = pipeIn; constexpr int32_t inputEleSize = sizeof(DataType); constexpr int32_t computeEleSize = sizeof(PromoteDataType); basicNum_ = basicBlockLen / sizeof(DataType); @@ -51,7 +50,7 @@ public: computeUnit_.eleSize = computeEleSize; computeUnit_.offset = basicNum_ * sizeof(DataType) * inputNum; // Init buffer - pipe_->InitBuffer(qQue_, poolSize); + GetTPipePtr()->InitBuffer(qQue_, poolSize); AscendC::LocalTensor inputUb = qQue_.GetWithOffset(basicNum_ * inputNum, 0); AscendC::Duplicate(inputUb, 0, basicNum_ * inputNum); } @@ -124,7 +123,6 @@ private: PoolManagerUnit computeUnit_; event_t eventIdV2Mte2_[MAX_INPUT_SIZE]; AscendC::TBuf<> qQue_; - AscendC::TPipe* pipe_; int32_t basicNum_; }; // class ReduceBufPool } // namespace KernelUtils -- Gitee