From a1cd9b966c5781f14c6b3bc929f30074edc52ebc Mon Sep 17 00:00:00 2001
From: lishangfan <lishangfan@h-partners.com>
Date: Wed, 24 Sep 2025 16:24:17 +0800
Subject: [PATCH] bishengcc to bisheng

---
 .../add/AddCustom/op_kernel/CMakeLists.txt    |  2 +-
 .../ReduceSumCustom/op_kernel/CMakeLists.txt  |  2 +-
 atvc/examples/ops_pytorch/add/README.md       | 29 ++++++++++---------
 atvc/examples/ops_pytorch/add/run.sh          | 25 ++++++++++++----
 .../examples/ops_pytorch/reduce_sum/README.md | 29 ++++++++++---------
 atvc/examples/ops_pytorch/reduce_sum/run.sh   | 23 +++++++++++----
 atvc/examples/run_examples.sh                 | 29 +++++++++++++------
 .../include/broadcast/broadcast_op_template.h | 11 +++----
 .../broadcast/utils/broadcast_buf_pool.h      | 17 +++++------
 atvc/include/common/kernel_utils.h            |  5 ----
 atvc/include/elewise/elewise_op_template.h    |  9 +++---
 atvc/include/reduce/reduce_op_template.h      | 12 ++++----
 atvc/include/reduce/utils/reduce_buf_pool.h   |  6 ++--
 13 files changed, 115 insertions(+), 84 deletions(-)

diff --git a/atvc/examples/ops_aclnn/add/AddCustom/op_kernel/CMakeLists.txt b/atvc/examples/ops_aclnn/add/AddCustom/op_kernel/CMakeLists.txt
index 178359e4..c58e5e11 100644
--- a/atvc/examples/ops_aclnn/add/AddCustom/op_kernel/CMakeLists.txt
+++ b/atvc/examples/ops_aclnn/add/AddCustom/op_kernel/CMakeLists.txt
@@ -8,5 +8,5 @@ if ("${CMAKE_BUILD_TYPE}x" STREQUAL "Debugx")
     add_ops_compile_options(ALL OPTIONS -g -O0)
 endif()
 
-add_ops_compile_options(ALL OPTIONS -g -O0 --cce-aicore-block-local-init -w -I ${ATVC_PATH})
+add_ops_compile_options(ALL OPTIONS -w -I ${ATVC_PATH})
 add_kernels_compile()
\ No newline at end of file
diff --git a/atvc/examples/ops_aclnn/reduce_sum/ReduceSumCustom/op_kernel/CMakeLists.txt b/atvc/examples/ops_aclnn/reduce_sum/ReduceSumCustom/op_kernel/CMakeLists.txt
index 72549d35..76172559 100644
--- a/atvc/examples/ops_aclnn/reduce_sum/ReduceSumCustom/op_kernel/CMakeLists.txt
+++ b/atvc/examples/ops_aclnn/reduce_sum/ReduceSumCustom/op_kernel/CMakeLists.txt
@@ -8,5 +8,5 @@ if ("${CMAKE_BUILD_TYPE}x" STREQUAL "Debugx")
     add_ops_compile_options(ALL OPTIONS -g -O0)
 endif()
 
-add_ops_compile_options(ALL OPTIONS -g -O0 --cce-aicore-block-local-init -w -I ${ATVC_PATH})
+add_ops_compile_options(ALL OPTIONS -w -I ${ATVC_PATH})
 add_kernels_compile()
\ No newline at end of file
diff --git a/atvc/examples/ops_pytorch/add/README.md b/atvc/examples/ops_pytorch/add/README.md
index 8cb593d4..d05f80b8 100644
--- a/atvc/examples/ops_pytorch/add/README.md
+++ b/atvc/examples/ops_pytorch/add/README.md
@@ -108,20 +108,21 @@ z = x + y
         atvc_path=$ATVC_PATH
     fi
 
-    # 使用bishengcc进行编译PyTorch算子
-    bishengcc pytorch_ascendc_extension.cpp \
-        -arch Ascend910B1   \
-        -I${torch_location}/include   \
-        -I${torch_location}/include/torch/csrc/api/include   \
-        -I${python_include}   \
-        -I${atvc_path}   \
-        -I${torch_npu_location}/include   \
-        -L${torch_location}/lib   \
-        -L${torch_npu_location}/lib   \
-        -L${python_lib}   \
-        -ltorch -ltorch_cpu -lc10 -ltorch_npu -lpython3 -ltorch_python   \
-        -o libascendc_pytorch.so   \
-        -shared
+    # 使用bisheng进行编译PyTorch算子
+    bisheng -x cce pytorch_ascendc_extension.cpp \
+    -D_GLIBCXX_USE_CXX11_ABI=0  \
+    -I${torch_location}/include   \
+    -I${torch_location}/include/torch/csrc/api/include   \
+    -I${python_include}   \
+    -I${atvc_path}   \
+    -I${torch_npu_location}/include   \
+    -L${torch_location}/lib   \
+    -L${torch_npu_location}/lib   \
+    -L${python_lib}   \
+    -L_ASCEND_INSTALL_PATH/lib64 \
+    -ltorch -ltorch_cpu -lc10 -ltorch_npu -lpython3 -ltorch_python   \
+    -shared -cce-enable-plugin --cce-aicore-arch=dav-c220 -fPIC -ltiling_api -lplatform -lm -ldl  \
+    -o libascendc_pytorch.so
 
     # 执行测试用例
     python3 run_op.py
diff --git a/atvc/examples/ops_pytorch/add/run.sh b/atvc/examples/ops_pytorch/add/run.sh
index bd56049e..217025b6 100644
--- a/atvc/examples/ops_pytorch/add/run.sh
+++ b/atvc/examples/ops_pytorch/add/run.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
-# Copyright (c) 2025 Huawei Technologies Co., Ltd.
+# Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
 # This file is a part of the CANN Open Software.
-# Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+# Licensed under CANN Open Software License Agreement Version 2.0 (the "License").
 # Please refer to the License for details. You may not use this file except in compliance with the License.
 # THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
 # INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
@@ -25,8 +25,20 @@ rm -rf *.json
 rm -rf libascendc_pytorch.so
 
 
-bishengcc pytorch_ascendc_extension.cpp \
-    -arch Ascend910B1   \
+if [ -n "$ASCEND_INSTALL_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
+elif [ -n "$ASCEND_HOME_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH
+else
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+    else
+        _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
+
+bisheng -x cce pytorch_ascendc_extension.cpp \
+    -D_GLIBCXX_USE_CXX11_ABI=0  \
     -I${torch_location}/include   \
     -I${torch_location}/include/torch/csrc/api/include   \
     -I${python_include}   \
@@ -35,9 +47,10 @@ bishengcc pytorch_ascendc_extension.cpp \
     -L${torch_location}/lib   \
     -L${torch_npu_location}/lib   \
     -L${python_lib}   \
+    -L_ASCEND_INSTALL_PATH/lib64 \
     -ltorch -ltorch_cpu -lc10 -ltorch_npu -lpython3 -ltorch_python   \
-    -o libascendc_pytorch.so   \
-    -shared
+    -shared -cce-enable-plugin --cce-aicore-arch=dav-c220 -fPIC -ltiling_api -lplatform -lm -ldl  \
+    -o libascendc_pytorch.so
 
 python3 run_op.py
 
diff --git a/atvc/examples/ops_pytorch/reduce_sum/README.md b/atvc/examples/ops_pytorch/reduce_sum/README.md
index d154ffb5..a61696fd 100644
--- a/atvc/examples/ops_pytorch/reduce_sum/README.md
+++ b/atvc/examples/ops_pytorch/reduce_sum/README.md
@@ -136,20 +136,21 @@ ReduceSum是对输入tensor的指定轴进行规约累加的计算并输出结
         atvc_path=$ATVC_PATH
     fi
 
-    # 使用bishengcc进行编译PyTorch算子
-    bishengcc pytorch_ascendc_extension.cpp \
-        -arch Ascend910B1   \
-        -I${torch_location}/include   \
-        -I${torch_location}/include/torch/csrc/api/include   \
-        -I${python_include}   \
-        -I${atvc_path}   \
-        -I${torch_npu_location}/include   \
-        -L${torch_location}/lib   \
-        -L${torch_npu_location}/lib   \
-        -L${python_lib}   \
-        -ltorch -ltorch_cpu -lc10 -ltorch_npu -lpython3 -ltorch_python   \
-        -o libascendc_pytorch.so   \
-        -shared
+    # 使用bisheng进行编译PyTorch算子
+    bisheng -x cce pytorch_ascendc_extension.cpp \
+    -D_GLIBCXX_USE_CXX11_ABI=0  \
+    -I${torch_location}/include   \
+    -I${torch_location}/include/torch/csrc/api/include   \
+    -I${python_include}   \
+    -I${atvc_path}   \
+    -I${torch_npu_location}/include   \
+    -L${torch_location}/lib   \
+    -L${torch_npu_location}/lib   \
+    -L${python_lib}   \
+    -L_ASCEND_INSTALL_PATH/lib64 \
+    -ltorch -ltorch_cpu -lc10 -ltorch_npu -lpython3 -ltorch_python   \
+    -shared -cce-enable-plugin --cce-aicore-arch=dav-c220 -fPIC -ltiling_api -lplatform -lm -ldl  \
+    -o libascendc_pytorch.so
 
     # 执行测试用例
     python3 run_op.py
diff --git a/atvc/examples/ops_pytorch/reduce_sum/run.sh b/atvc/examples/ops_pytorch/reduce_sum/run.sh
index bd56049e..8c8c371b 100644
--- a/atvc/examples/ops_pytorch/reduce_sum/run.sh
+++ b/atvc/examples/ops_pytorch/reduce_sum/run.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # Copyright (c) 2025 Huawei Technologies Co., Ltd.
 # This file is a part of the CANN Open Software.
-# Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+# Licensed under CANN Open Software License Agreement Version 2.0 (the "License").
 # Please refer to the License for details. You may not use this file except in compliance with the License.
 # THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
 # INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
@@ -25,8 +25,20 @@ rm -rf *.json
 rm -rf libascendc_pytorch.so
 
 
-bishengcc pytorch_ascendc_extension.cpp \
-    -arch Ascend910B1   \
+if [ -n "$ASCEND_INSTALL_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
+elif [ -n "$ASCEND_HOME_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH
+else
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+    else
+        _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
+
+bisheng -x cce pytorch_ascendc_extension.cpp \
+    -D_GLIBCXX_USE_CXX11_ABI=0  \
     -I${torch_location}/include   \
     -I${torch_location}/include/torch/csrc/api/include   \
     -I${python_include}   \
@@ -35,9 +47,10 @@ bishengcc pytorch_ascendc_extension.cpp \
     -L${torch_location}/lib   \
     -L${torch_npu_location}/lib   \
     -L${python_lib}   \
+    -L_ASCEND_INSTALL_PATH/lib64 \
     -ltorch -ltorch_cpu -lc10 -ltorch_npu -lpython3 -ltorch_python   \
-    -o libascendc_pytorch.so   \
-    -shared
+    -shared -cce-enable-plugin --cce-aicore-arch=dav-c220 -fPIC -ltiling_api -lplatform -lm -ldl  \
+    -o libascendc_pytorch.so
 
 python3 run_op.py
 
diff --git a/atvc/examples/run_examples.sh b/atvc/examples/run_examples.sh
index 6a0c4367..a3619492 100644
--- a/atvc/examples/run_examples.sh
+++ b/atvc/examples/run_examples.sh
@@ -1,24 +1,24 @@
 #!/bin/bash
-# Copyright (c) 2025 Huawei Technologies Co., Ltd.
+# Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
 # This file is a part of the CANN Open Software.
-# Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+# Licensed under CANN Open Software License Agreement Version 2.0 (the "License").
 # Please refer to the License for details. You may not use this file except in compliance with the License.
 # THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
 # INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
 # See LICENSE in the root of the software repository for the full text of the License.
 # ======================================================================================================================
 
+set -e
+
 CURRENT_DIR=$(
     cd $(dirname ${BASH_SOURCE:-$0})
     pwd
 )
 
 if command -v bishengcc; then
-    COMPILE_TOOL=bishengcc
-elif command -v ascc; then
-    COMPILE_TOOL=ascc
+    COMPILE_TOOL=bisheng
 else
-    echo "Error: Cannot find bishengcc/ascc compiling tool, please check cann package version or set up envrionment first."
+    echo "Error: Cannot find bisheng compiling tool, please check cann package version or set up envrionment first."
     exit 1
 fi
 
@@ -43,16 +43,27 @@ function parse_run_mode(){
 
 # 根据不同run-mode执行不同的操作
 function compile_operator(){
+    if [ -n "$ASCEND_INSTALL_PATH" ]; then
+        _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
+    elif [ -n "$ASCEND_HOME_PATH" ]; then
+        _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH
+    else
+        if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+            _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+        else
+            _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+        fi
+    fi
     cd $ATVC_HOME_DIR/examples/$TEST_NAME
     if [ -z "$RUN_MODE" ]; then
         echo "Executing with npu mode"
-        ${COMPILE_TOOL} -arch Ascend910B1 $TEST_NAME.cpp -o $TEST_NAME -I ${ATVC_HOME_DIR}/include -I ${CURRENT_DIR}/common
+        ${COMPILE_TOOL} -x cce -cce-enable-plugin --cce-aicore-arch=dav-c220 $TEST_NAME.cpp -o $TEST_NAME -I ${ATVC_HOME_DIR}/include -I ${CURRENT_DIR}/common -ltiling_api -lplatform -lm -ldl -L${_ASCEND_INSTALL_PATH}/lib64
     elif [ "$RUN_MODE" = "debug_print" ]; then
         echo "Executing with debug_print mode"
-        ${COMPILE_TOOL} -arch Ascend910B1 $TEST_NAME.cpp -o $TEST_NAME -I ${ATVC_HOME_DIR}/include -I ${CURRENT_DIR}/common -DATVC_DEBUG_MODE=1
+        ${COMPILE_TOOL} -x cce -cce-enable-plugin --cce-aicore-arch=dav-c220 $TEST_NAME.cpp -o $TEST_NAME -I ${ATVC_HOME_DIR}/include -I ${CURRENT_DIR}/common  -ltiling_api -lplatform -lm -ldl -L${_ASCEND_INSTALL_PATH}/lib64 -DATVC_DEBUG_MODE=1
     elif [ "$RUN_MODE" = "profiling" ]; then
         echo "Executing with profiling mode"
-        ${COMPILE_TOOL} -arch Ascend910B1 $TEST_NAME.cpp -o $TEST_NAME -I ${ATVC_HOME_DIR}/include -I ${CURRENT_DIR}/common -DATVC_DEBUG_MODE=2
+        ${COMPILE_TOOL} -x cce -cce-enable-plugin --cce-aicore-arch=dav-c220 -arch Ascend910B1 $TEST_NAME.cpp -o $TEST_NAME -I ${ATVC_HOME_DIR}/include -I ${CURRENT_DIR}/common  -ltiling_api -lplatform -lm -ldl -L${_ASCEND_INSTALL_PATH}/lib64 -DATVC_DEBUG_MODE=2
     else
         echo "--npu-mode is an optional parameter and can be left unset. If set, the value must be debug_print or profiling."
         echo "Execution example: 'bash run_examples.sh $TEST_NAME --run-mode=debug_print'"
diff --git a/atvc/include/broadcast/broadcast_op_template.h b/atvc/include/broadcast/broadcast_op_template.h
index d4c1f7b0..01ba82e9 100644
--- a/atvc/include/broadcast/broadcast_op_template.h
+++ b/atvc/include/broadcast/broadcast_op_template.h
@@ -99,6 +99,7 @@ public:
             return;
         }
         this->Process();
+        pipeIn.Destroy();
         ATVC::Kernel::DebugPrintf("[INFO]:[ATVC][Broadcast] End to run template function.\n");
     }
 
@@ -162,11 +163,10 @@ private:
         if (HAS_POST_COMPUTE) {
             outputCount_ = PostInputCount + PostTempCount + PostOutputCount;
         }        
-        bufPool_.template Init<DataType>(GetTPipePtr(),
-                                    inputCount_, // The number of inputs required for double buffer
-                                    outputCount_, // The number of calculation results is generally consistent with inputNum
-                                    tilingData_->A2 * tilingData_->A12 * DATA_SIZE, // Input Tensor size
-                                    tilingData_->A2 * tilingData_->B2 * DATA_SIZE); // Output Tensor Size
+        bufPool_.template Init<DataType>(inputCount_,  // The number of inputs required for double buffer
+                                         outputCount_,  // The number of calculation results is generally consistent with inputNum
+                                         tilingData_->A2 * tilingData_->A12 * DATA_SIZE,  // Input Tensor size
+                                         tilingData_->A2 * tilingData_->B2 * DATA_SIZE);  // Output Tensor Size
     }
 
     template <class... Args>
@@ -427,6 +427,7 @@ private:
 
     GM_ADDR src_;
     GM_ADDR dst_;
+    AscendC::TPipe pipeIn;
     AscendC::GlobalTensor<DataType> srcGlobal_;
     AscendC::GlobalTensor<DataType> dstGlobal_;
     BroadcastCompute compute_;
diff --git a/atvc/include/broadcast/utils/broadcast_buf_pool.h b/atvc/include/broadcast/utils/broadcast_buf_pool.h
index 02161524..9f2b5a31 100644
--- a/atvc/include/broadcast/utils/broadcast_buf_pool.h
+++ b/atvc/include/broadcast/utils/broadcast_buf_pool.h
@@ -36,8 +36,7 @@ public:
     __aicore__ inline BroadcastBufPool() {};
 
     template <typename T>
-    __aicore__ inline void Init(AscendC::TPipe *pipeIn,
-        int32_t inputNum,    // The number of inputs required for doublebuff
+    __aicore__ inline void Init(int32_t inputNum,    // The number of inputs required for doublebuff
         int32_t computeNum,  // The number of calculation results is generally consistent with inputNum
         int32_t inBlockLen,  // Basic input block size for one calculation
         int32_t outBlockLen)
@@ -52,7 +51,6 @@ public:
             inputNum *= ATVC::CONST2;
             computeNum *= ATVC::CONST2;
         }
-        pipe_ = pipeIn;
         constexpr int32_t eleSize = static_cast<int32_t>(sizeof(T));
         inputNum_ = inBlockLen / eleSize;
         outputNum_ = outBlockLen / eleSize;
@@ -64,7 +62,7 @@ public:
         computeUnit_.eleSize = eleSize;
         computeUnit_.offset = inBlockLen * inputNum;
         // Init buffer
-        pipe_->InitBuffer(qQue_, poolSize);
+        GetTPipePtr()->InitBuffer(qQue_, poolSize);
     }
 
     template <bool IsInput, typename T>
@@ -92,7 +90,7 @@ public:
     __aicore__ inline const void SetVecSync(AscendC::LocalTensor<T> &tensor)
     {
         uint32_t idx = GetInputTensorIdx<T>(tensor);
-        event_t eventId = static_cast<event_t>(pipe_->AllocEventID<EVENT>());
+        event_t eventId = static_cast<event_t>(GetTPipePtr()->AllocEventID<EVENT>());
         vecEventId_[idx] = eventId;
         AscendC::SetFlag<EVENT>(eventId);
     }
@@ -102,14 +100,14 @@ public:
     {
         uint32_t idx = GetInputTensorIdx<T>(tensor);
         AscendC::WaitFlag<EVENT>(vecEventId_[idx]);
-        pipe_->ReleaseEventID<EVENT>(vecEventId_[idx]);
+        GetTPipePtr()->ReleaseEventID<EVENT>(vecEventId_[idx]);
     }
 
     template <typename T, AscendC::HardEvent EVENT>
     __aicore__ inline const void SetCopyOutSync(AscendC::LocalTensor<T> &tensor)
     {
         uint32_t idx = GetOutputTensorIdx<T>(tensor);
-        event_t eventId = static_cast<event_t>(pipe_->AllocEventID<EVENT>());
+        event_t eventId = static_cast<event_t>(GetTPipePtr()->AllocEventID<EVENT>());
         outEventId_[idx] = eventId;
         AscendC::SetFlag<EVENT>(eventId);
     }
@@ -119,7 +117,7 @@ public:
     {
         uint32_t idx = GetOutputTensorIdx<T>(tensor);
         AscendC::WaitFlag<EVENT>(outEventId_[idx]);
-        pipe_->ReleaseEventID<EVENT>(outEventId_[idx]);
+        GetTPipePtr()->ReleaseEventID<EVENT>(outEventId_[idx]);
     }
 
     template <typename T>
@@ -142,7 +140,7 @@ public:
 
     __aicore__ inline const void ResetEvent()
     {
-        pipe_->Reset();
+        GetTPipePtr()->Reset();
     }
 
 private:
@@ -176,7 +174,6 @@ private:
     event_t outEventId_[MAX_INPUT_SIZE];
     bool isBusyOut_[MAX_INPUT_SIZE] = {false};
     AscendC::TBuf<> qQue_;
-    AscendC::TPipe *pipe_;
     int32_t inputNum_;
     int32_t outputNum_;
 };
diff --git a/atvc/include/common/kernel_utils.h b/atvc/include/common/kernel_utils.h
index 6aa2d977..be910f66 100644
--- a/atvc/include/common/kernel_utils.h
+++ b/atvc/include/common/kernel_utils.h
@@ -15,11 +15,6 @@
 #include "common/const_def.h"
 #include "kernel_operator.h"
 namespace ATVC {
-#ifndef __ASCC_HOST__
-#ifndef __NPU_HOST__
-__BLOCK_LOCAL__ static AscendC::TPipe g_pipe;
-#endif // __NPU_HOST__
-#endif // __ASCC_HOST__
 template <AscendC::HardEvent EVENT>
 __aicore__ inline void SetEvent(AscendC::HardEvent evt)
 {
diff --git a/atvc/include/elewise/elewise_op_template.h b/atvc/include/elewise/elewise_op_template.h
index e1a78c45..819caac1 100644
--- a/atvc/include/elewise/elewise_op_template.h
+++ b/atvc/include/elewise/elewise_op_template.h
@@ -61,11 +61,11 @@ public:
     __aicore__ inline void Run(Args&&... args)
     {
         ATVC::Kernel::DebugPrintf("[INFO]:[ATVC][EleWise] Start to run template function.\n");
-        g_pipe.Reset();
         constexpr std::size_t GM_ARGS_COUNT = INPUT_COUNT + OUTPUT_COUNT;
         GM_ADDR argsArr[INPUT_COUNT + OUTPUT_COUNT];
         InitHelper<0>(argsArr, ATVC::Forward<Args>(args)...);
         ATVC::Kernel::DebugPrintf("[INFO]:[ATVC][EleWise] End to run template function.\n");
+        pipeIn.Destroy();
     }
 
 private:
@@ -130,13 +130,13 @@ private:
         // Each in/out/temp uses a pipe for management,
         // and each pipe manages multiple tensors with consecutive sub addresses
         if constexpr (INPUT_COUNT > 0) {
-            g_pipe.InitBuffer(inQueue, param_->nBufferNum, param_->tilingData.tiledCnt * IN_TENSOR_SUM_BYTES);
+            GetTPipePtr()->InitBuffer(inQueue, param_->nBufferNum, param_->tilingData.tiledCnt * IN_TENSOR_SUM_BYTES);
         }
         if constexpr (OUTPUT_COUNT > 0) {
-            g_pipe.InitBuffer(outQueue, param_->nBufferNum, param_->tilingData.tiledCnt * OUT_TENSOR_SUM_BYTES);
+            GetTPipePtr()->InitBuffer(outQueue, param_->nBufferNum, param_->tilingData.tiledCnt * OUT_TENSOR_SUM_BYTES);
         }
         if constexpr(TEMP_COUNT > 0) {
-            g_pipe.InitBuffer(tempQueue, param_->tilingData.tiledCnt * TEMP_TENSOR_SUM_BYTES);
+            GetTPipePtr()->InitBuffer(tempQueue, param_->tilingData.tiledCnt * TEMP_TENSOR_SUM_BYTES);
         }
     }
     // Call CopyIn/CopyOut based on the tiling loop, as well as externally passed Compute calculations
@@ -379,6 +379,7 @@ private:
 
     // The calculation object passed in by user
     EleWiseCompute compute_;
+    AscendC::TPipe pipeIn;
 };
 }
 }
diff --git a/atvc/include/reduce/reduce_op_template.h b/atvc/include/reduce/reduce_op_template.h
index 4cdde076..3eca4811 100644
--- a/atvc/include/reduce/reduce_op_template.h
+++ b/atvc/include/reduce/reduce_op_template.h
@@ -82,6 +82,7 @@ public:
         Init((GM_ADDR)(param_->workspaceAddr), x, y);  
         Process();
         ATVC::Kernel::DebugPrintf("[INFO]:[ATVC][Reduce] End to run template function.\n");
+        pipeIn.Destroy();
     }
 
 public:
@@ -93,19 +94,18 @@ public:
     template <class... Args>
     __aicore__ inline void Init(GM_ADDR workspace, Args... args)
     {
-        pipe_ = GetTPipePtr();
         basicBlockLen_ = this->param_->tilingData.basicBlock;
-        bufPool_.template Init<DataType, PromoteDataType>(pipe_, T_BUF_SIZE, PROMOTE_BUF_SIZE, this->param_->tilingData.basicBlock);
+        bufPool_.template Init<DataType, PromoteDataType>(T_BUF_SIZE, PROMOTE_BUF_SIZE, this->param_->tilingData.basicBlock);
 
         InitArgsInput<0>(args...);
         InitArgsWorkspace(workspace);
-        pipe_->InitBuffer(tempResQue_, RES_BUF_SIZE);
+        GetTPipePtr()->InitBuffer(tempResQue_, RES_BUF_SIZE);
         computeRes_ = tempResQue_.Get<PromoteDataType>();
-        pipe_->InitBuffer(tempBufQue_, CACHE_BUF_SIZE);
+        GetTPipePtr()->InitBuffer(tempBufQue_, CACHE_BUF_SIZE);
 
         tempBuf_ = tempBufQue_.template Get<PromoteDataType>();
 
-        pipe_->InitBuffer(tempUbQue_, BLOCK_SIZE_BYTE);
+        GetTPipePtr()->InitBuffer(tempUbQue_, BLOCK_SIZE_BYTE);
     }
 
     /*!
@@ -371,7 +371,7 @@ protected:
 
 private:
     ATVC::ReduceParam* param_;   // The runtime parameters calculated by CalcReduceTiling API
-    AscendC::TPipe* pipe_;
+    AscendC::TPipe pipeIn;
     AscendC::TBuf<> oriVecQue_;
     AscendC::TBuf<> tempResQue_;
     AscendC::TBuf<> tempBufQue_;
diff --git a/atvc/include/reduce/utils/reduce_buf_pool.h b/atvc/include/reduce/utils/reduce_buf_pool.h
index e6779444..eeba4768 100644
--- a/atvc/include/reduce/utils/reduce_buf_pool.h
+++ b/atvc/include/reduce/utils/reduce_buf_pool.h
@@ -37,9 +37,8 @@ public:
     __aicore__ inline ReduceBufPool() {};
 
     template <class DataType, class PromoteDataType>
-    __aicore__ inline void Init(AscendC::TPipe* pipeIn, int32_t inputNum, int32_t computeNum, int32_t basicBlockLen)
+    __aicore__ inline void Init(int32_t inputNum, int32_t computeNum, int32_t basicBlockLen)
     {
-        pipe_ = pipeIn;
         constexpr int32_t inputEleSize = sizeof(DataType);
         constexpr int32_t computeEleSize = sizeof(PromoteDataType);
         basicNum_ = basicBlockLen / sizeof(DataType);
@@ -51,7 +50,7 @@ public:
         computeUnit_.eleSize = computeEleSize;
         computeUnit_.offset = basicNum_ * sizeof(DataType) * inputNum;
         // Init buffer
-        pipe_->InitBuffer(qQue_, poolSize);
+        GetTPipePtr()->InitBuffer(qQue_, poolSize);
         AscendC::LocalTensor<DataType> inputUb = qQue_.GetWithOffset<DataType>(basicNum_ * inputNum, 0);
         AscendC::Duplicate<DataType>(inputUb, 0, basicNum_ * inputNum);
     }
@@ -124,7 +123,6 @@ private:
     PoolManagerUnit computeUnit_;
     event_t eventIdV2Mte2_[MAX_INPUT_SIZE];
     AscendC::TBuf<> qQue_;
-    AscendC::TPipe* pipe_;
     int32_t basicNum_;
 };  // class ReduceBufPool
 } // namespace KernelUtils
-- 
Gitee