diff --git a/impl/reduce/reduce_common_util_v220_impl.h b/impl/reduce/reduce_common_util_v220_impl.h index f110b2c6c3009da62a04e07ee5dfa90235180ad4..90f74fd50e40ac819f783b216eb179cf038ffc1e 100644 --- a/impl/reduce/reduce_common_util_v220_impl.h +++ b/impl/reduce/reduce_common_util_v220_impl.h @@ -374,7 +374,7 @@ __aicore__ inline void BlkReduceForLoop(const LocalTensor& dst, } if (tailNBlkNum > 0) { dstOffset = nMaxRepBlkNum * MAX_REPEAT_TIMES * blkReduceDstStride; - blkReduceSrcOffset = srcOffset + nMaxRepBlkNum * MAX_REPEAT_TIMES * srcPerBlkElements; + blkReduceSrcOffset = srcOffset + nMaxRepBlkNum * MAX_REPEAT_TIMES * DEFAULT_BLK_NUM * srcPerBlkElements; BlockReduceCompute(dst[dstOffset], tmp[blkReduceSrcOffset], tailNBlkNum, blkReduceMask, DEFAULT_BLK_STRIDE, DEFAULT_REPEAT_STRIDE); PipeBarrier();