From e6cd17de3abd0f32c1e9e4d8e2aef913ba7a1f3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E6=97=AD?= Date: Mon, 22 Sep 2025 20:29:12 +0800 Subject: [PATCH] version updata --- impl/matmul/kfc/matmul_server_aux.h | 8 ++++---- impl/matmul/kfc/matmul_server_impl_c310.h | 12 ++++++------ impl/matmul/param/matmul_shape_tiling.h | 8 ++++---- impl/matmul/param/matmul_tensor_info.h | 10 +++++----- impl/matmul/policy/matmul_policy.h | 8 ++++---- impl/matmul/policy/matmul_private_modules.h | 16 ++++++++-------- .../stage/copy_cube_in/base/copy_cube_in_base.h | 2 +- .../copy_cube_in/base/copy_cube_in_from_l1.h | 2 +- .../base/copy_cube_in_ubtol1_singleshape.h | 2 +- impl/matmul/stage/split/load_to_l0_utils.h | 2 +- impl/matmul/utils/matmul_utils.h | 4 ++-- .../layernorm/layernorm_tiling_impl.cpp | 1 + lib/matmul/matmul_client.h | 2 +- version.info | 2 +- 14 files changed, 40 insertions(+), 39 deletions(-) diff --git a/impl/matmul/kfc/matmul_server_aux.h b/impl/matmul/kfc/matmul_server_aux.h index 383097eb..3b220c91 100644 --- a/impl/matmul/kfc/matmul_server_aux.h +++ b/impl/matmul/kfc/matmul_server_aux.h @@ -408,22 +408,22 @@ public: } } #if defined(__DAV_C310__) - __aicore__ inline void SetTensorScaleA(const GlobalTensor &a, bool isTransposeScaleA = false) + __aicore__ inline void SetTensorScaleA(const GlobalTensor &a, bool isTransposeScaleA = false) { static_assert(!ToMatmulConfig(MM_CFG).enableMixDualMaster, "SetTensorScaleA not support when enableMixDualMaster is enabled."); } - __aicore__ inline void SetTensorScaleA(const LocalTensor &a, bool isTransposeScaleA = false) + __aicore__ inline void SetTensorScaleA(const LocalTensor &a, bool isTransposeScaleA = false) { static_assert(!ToMatmulConfig(MM_CFG).enableMixDualMaster, "SetTensorScaleA not support when enableMixDualMaster is enabled."); } - __aicore__ inline void SetTensorScaleB(const GlobalTensor &b, bool isTransposeScaleB = true) + __aicore__ inline void SetTensorScaleB(const GlobalTensor &b, bool isTransposeScaleB = true) { static_assert(!ToMatmulConfig(MM_CFG).enableMixDualMaster, "SetTensorScaleB not support when enableMixDualMaster is enabled."); } - __aicore__ inline void SetTensorScaleB(const LocalTensor &b, bool isTransposeScaleB = true) + __aicore__ inline void SetTensorScaleB(const LocalTensor &b, bool isTransposeScaleB = true) { static_assert(!ToMatmulConfig(MM_CFG).enableMixDualMaster, "SetTensorScaleB not support when enableMixDualMaster is enabled."); diff --git a/impl/matmul/kfc/matmul_server_impl_c310.h b/impl/matmul/kfc/matmul_server_impl_c310.h index 3fe9635b..a5f7054e 100644 --- a/impl/matmul/kfc/matmul_server_impl_c310.h +++ b/impl/matmul/kfc/matmul_server_impl_c310.h @@ -422,11 +422,11 @@ __aicore__ inline void MatmulService()) { - GlobalTensor scaleAGlobal; - scaleAGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ float8_e8m0_t *>(body.quantAddr), 0); + GlobalTensor scaleAGlobal; + scaleAGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ fp8_e8m0_t *>(body.quantAddr), 0); mul.SetTensorScaleA(scaleAGlobal, isTransScaleA); } else if constexpr (PhyMxScalePosIsUB() || PhyMxScalePosIsL1()) { - const auto &scaleALocal = GetLocalTensor(body.quantAddr, 0); + const auto &scaleALocal = GetLocalTensor(body.quantAddr, 0); mul.SetTensorScaleA(scaleALocal, isTransScaleA); return; } @@ -439,11 +439,11 @@ __aicore__ inline void MatmulService> 1) & 0b01; if constexpr (PhyMxScalePosIsGM()) { - GlobalTensor scaleBGlobal; - scaleBGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ float8_e8m0_t *>(body.quantScalar), 0); + GlobalTensor scaleBGlobal; + scaleBGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ fp8_e8m0_t *>(body.quantScalar), 0); mul.SetTensorScaleB(scaleBGlobal, isTransScaleB); } else if constexpr (PhyMxScalePosIsUB() || PhyMxScalePosIsL1()) { - const auto &scaleBLocal = GetLocalTensor(body.quantScalar, 0); + const auto &scaleBLocal = GetLocalTensor(body.quantScalar, 0); mul.SetTensorScaleB(scaleBLocal, isTransScaleB); return; } diff --git a/impl/matmul/param/matmul_shape_tiling.h b/impl/matmul/param/matmul_shape_tiling.h index c7244e8d..58649d49 100644 --- a/impl/matmul/param/matmul_shape_tiling.h +++ b/impl/matmul/param/matmul_shape_tiling.h @@ -167,17 +167,17 @@ private: const auto L0CUseSizeFactor = (tiling_.GetDbL0C() == Impl::DB_FACTOR) ? Impl::DB_FACTOR : 1; // A + scaleA () / ONE_BYTE_BIT_SIZE * L0ABUseSizeFactor + - tiling_.GetBaseM() * tiling_.GetBaseK() / 32 * sizeof(float8_e8m0_t) * L0ABUseSizeFactor)<= L0ASize_), { + tiling_.GetBaseM() * tiling_.GetBaseK() / 32 * sizeof(fp8_e8m0_t) * L0ABUseSizeFactor)<= L0ASize_), { KERNEL_LOG(KERNEL_ERROR, "baseM * baseK + baseM * baseScaleK is %d , which should be not larger than L0ASize_ %d", tiling_.GetBaseM() * tiling_.GetBaseK() * AscendC::GetBitSize() / ONE_BYTE_BIT_SIZE * L0ABUseSizeFactor + - tiling_.GetBaseM() * tiling_.GetBaseK() / 32 * sizeof(float8_e8m0_t) * L0ABUseSizeFactor, L0ASize_); + tiling_.GetBaseM() * tiling_.GetBaseK() / 32 * sizeof(fp8_e8m0_t) * L0ABUseSizeFactor, L0ASize_); }); // B + scaleB () / ONE_BYTE_BIT_SIZE * L0ABUseSizeFactor + - tiling_.GetBaseN() * tiling_.GetBaseK() / 32 * sizeof(float8_e8m0_t) * L0ABUseSizeFactor <= L0BSize_), { + tiling_.GetBaseN() * tiling_.GetBaseK() / 32 * sizeof(fp8_e8m0_t) * L0ABUseSizeFactor <= L0BSize_), { KERNEL_LOG(KERNEL_ERROR, "baseN * baseK + baseN * baseScaleK is %d , which should be not larger than L0BSize_ %d", tiling_.GetBaseN() * tiling_.GetBaseK() * AscendC::GetBitSize() / ONE_BYTE_BIT_SIZE * L0ABUseSizeFactor + - tiling_.GetBaseN() * tiling_.GetBaseK() / 32 * sizeof(float8_e8m0_t) * L0ABUseSizeFactor, L0BSize_); + tiling_.GetBaseN() * tiling_.GetBaseK() / 32 * sizeof(fp8_e8m0_t) * L0ABUseSizeFactor, L0BSize_); }); // C class MatmulTensorInfo::value && (INPUT_TYPE::TAG == InputTypeTag::scaleA || INPUT_TYPE::TAG == InputTypeTag::scaleB)>> { - using SrcT = float8_e8m0_t; + using SrcT = fp8_e8m0_t; MATMUL_USE_MODULE(MatmulShapeInfo); public: @@ -327,9 +327,9 @@ private: KERNEL_LOG(KERNEL_ERROR, "It is not allowed to set matrix scaleA transpose when matmul scaleA transpose is not defined."); }); #if defined(__DAV_C310__) - if constexpr (IsSameType::value) { + if constexpr (IsSameType::value) { ASCENDC_ASSERT(!isTransposeScaleA, { KERNEL_LOG(KERNEL_ERROR, - "When matrix scaleA DType is float8_e8m0_t, matrix scaleA should not be transposed");}); + "When matrix scaleA DType is fp8_e8m0_t, matrix scaleA should not be transposed");}); } #endif } @@ -342,9 +342,9 @@ private: KERNEL_LOG(KERNEL_ERROR, "It is not allowed to set matrix scaleB transpose when matmul scaleB transpose is not defined."); }); #if defined(__DAV_C310__) - if constexpr (IsSameType::value) { + if constexpr (IsSameType::value) { ASCENDC_ASSERT(isTransposeScaleB, { KERNEL_LOG(KERNEL_ERROR, - "When matrix scaleB DType is float8_e8m0_t, matrix scaleB should be transposed");}); + "When matrix scaleB DType is fp8_e8m0_t, matrix scaleB should be transposed");}); } #endif } diff --git a/impl/matmul/policy/matmul_policy.h b/impl/matmul/policy/matmul_policy.h index 8b26e9a1..c9b41143 100644 --- a/impl/matmul/policy/matmul_policy.h +++ b/impl/matmul/policy/matmul_policy.h @@ -59,10 +59,10 @@ struct MatmulWithScalePolicy : public MatmulPolicy, MM_CFG>; - using CopyCubeInScaleA = AscendC::Impl::Detail::CopyCubeIn, MM_CFG>; - using CubeInBufferScaleB = AscendC::Impl::Detail::CubeInBuffer, MM_CFG>; - using CopyCubeInScaleB = AscendC::Impl::Detail::CopyCubeIn, MM_CFG>; + using CubeInBufferScaleA = AscendC::Impl::Detail::CubeInBuffer, MM_CFG>; + using CopyCubeInScaleA = AscendC::Impl::Detail::CopyCubeIn, MM_CFG>; + using CubeInBufferScaleB = AscendC::Impl::Detail::CubeInBuffer, MM_CFG>; + using CopyCubeInScaleB = AscendC::Impl::Detail::CopyCubeIn, MM_CFG>; }; /* diff --git a/impl/matmul/policy/matmul_private_modules.h b/impl/matmul/policy/matmul_private_modules.h index 53f5f289..4b157c1f 100644 --- a/impl/matmul/policy/matmul_private_modules.h +++ b/impl/matmul/policy/matmul_private_modules.h @@ -68,14 +68,14 @@ struct MatmulPrivateModules { using BatchCopyCubeInA = BatchCopyCubeIn>; using BatchCopyCubeInB = BatchCopyCubeIn>; - using CopyCubeInParamsScaleA = CopyCubeInParams>; - using CopyCubeInParamsScaleB = CopyCubeInParams>; - using MatmulTensorInfoScaleA = MatmulTensorInfo>; - using MatmulTensorInfoScaleB = MatmulTensorInfo>; - using DataCopyUtilsScaleA = CopyTileToCubeWrapper>; - using DataCopyUtilsScaleB = CopyTileToCubeWrapper>; - using DataCopyWrapperScaleA = DataCopyWrapper>; - using DataCopyWrapperScaleB = DataCopyWrapper>; + using CopyCubeInParamsScaleA = CopyCubeInParams>; + using CopyCubeInParamsScaleB = CopyCubeInParams>; + using MatmulTensorInfoScaleA = MatmulTensorInfo>; + using MatmulTensorInfoScaleB = MatmulTensorInfo>; + using DataCopyUtilsScaleA = CopyTileToCubeWrapper>; + using DataCopyUtilsScaleB = CopyTileToCubeWrapper>; + using DataCopyWrapperScaleA = DataCopyWrapper>; + using DataCopyWrapperScaleB = DataCopyWrapper>; using LocalWorkspace = MatmulLocalWorkspace; using MatmulShapeInfo = AscendC::Impl::Detail::MatmulShapeInfo; diff --git a/impl/matmul/stage/copy_cube_in/base/copy_cube_in_base.h b/impl/matmul/stage/copy_cube_in/base/copy_cube_in_base.h index 6115778d..a96901f1 100644 --- a/impl/matmul/stage/copy_cube_in/base/copy_cube_in_base.h +++ b/impl/matmul/stage/copy_cube_in/base/copy_cube_in_base.h @@ -31,7 +31,7 @@ class CopyCubeInBase MATMUL_USE_MODULE(MatmulShapeInfo); using TransT = typename INPUT_TYPE::TRANS_T; - using SrcT = typename Conditional::value, float8_e8m0_t, typename INPUT_TYPE::T>::type; + using SrcT = typename Conditional::value, fp8_e8m0_t, typename INPUT_TYPE::T>::type; public: __aicore__ inline void Init() diff --git a/impl/matmul/stage/copy_cube_in/base/copy_cube_in_from_l1.h b/impl/matmul/stage/copy_cube_in/base/copy_cube_in_from_l1.h index 8379dee9..5dc287c4 100644 --- a/impl/matmul/stage/copy_cube_in/base/copy_cube_in_from_l1.h +++ b/impl/matmul/stage/copy_cube_in/base/copy_cube_in_from_l1.h @@ -34,7 +34,7 @@ GetCopyCubeInType() == CopyCubeInType::FROM_L1>> { MATMUL_USE_MODULE_ON(MatmulTensorInfo, INPUT_TYPE::TAG); using TransT = typename INPUT_TYPE::TRANS_T; - using SrcT = typename Conditional::value, float8_e8m0_t, typename INPUT_TYPE::T>::type; + using SrcT = typename Conditional::value, fp8_e8m0_t, typename INPUT_TYPE::T>::type; public: __aicore__ inline CopyCubeIn() = default; diff --git a/impl/matmul/stage/copy_cube_in/base/copy_cube_in_ubtol1_singleshape.h b/impl/matmul/stage/copy_cube_in/base/copy_cube_in_ubtol1_singleshape.h index 341f6422..0bcfd69d 100644 --- a/impl/matmul/stage/copy_cube_in/base/copy_cube_in_ubtol1_singleshape.h +++ b/impl/matmul/stage/copy_cube_in/base/copy_cube_in_ubtol1_singleshape.h @@ -37,7 +37,7 @@ GetCopyCubeInType() == CopyCubeInType::UBTOL1_SINGLESHAPE>> MATMUL_USE_MODULE_ON(MatmulTensorInfo, INPUT_TYPE::TAG); MATMUL_USE_MODULE(MatmulShapeInfo); using TransT = typename INPUT_TYPE::TRANS_T; - using SrcT = typename Conditional::value, float8_e8m0_t, typename INPUT_TYPE::T>::type; + using SrcT = typename Conditional::value, fp8_e8m0_t, typename INPUT_TYPE::T>::type; public: __aicore__ inline CopyCubeIn() = default; diff --git a/impl/matmul/stage/split/load_to_l0_utils.h b/impl/matmul/stage/split/load_to_l0_utils.h index e2ebb702..01ce5e46 100644 --- a/impl/matmul/stage/split/load_to_l0_utils.h +++ b/impl/matmul/stage/split/load_to_l0_utils.h @@ -105,7 +105,7 @@ __aicore__ inline constexpr auto GetAuxDataType() uint8_t auxData = 0; return auxData; } else if constexpr (HasScalePosition::value) { - float8_e8m0_t mxType = 0; + fp8_e8m0_t mxType = 0; return mxType; } else { uint8_t defaultData = 0; diff --git a/impl/matmul/utils/matmul_utils.h b/impl/matmul/utils/matmul_utils.h index bab85bee..457ec684 100644 --- a/impl/matmul/utils/matmul_utils.h +++ b/impl/matmul/utils/matmul_utils.h @@ -271,7 +271,7 @@ __aicore__ inline constexpr static int32_t AuxGetC0Size() return Impl::B32_C0SIZE; } #if defined(__DAV_C310__) - else if (IsTypeOneOfV) + else if (IsTypeOneOfV) { return Impl::B8_C0SIZE; } else if (IsTypeOneOfV) { @@ -484,7 +484,7 @@ __aicore__ constexpr int32_t GetBitSize() return ONE_BYTE_BIT_SIZE / 2; } #if defined(__DAV_C310__) - if constexpr (IsTypeOneOfV) { + if constexpr (IsTypeOneOfV) { return ONE_BYTE_BIT_SIZE; } if constexpr (IsTypeOneOfV) { diff --git a/impl/normalization/layernorm/layernorm_tiling_impl.cpp b/impl/normalization/layernorm/layernorm_tiling_impl.cpp index 5ecbf804..2ce74faa 100644 --- a/impl/normalization/layernorm/layernorm_tiling_impl.cpp +++ b/impl/normalization/layernorm/layernorm_tiling_impl.cpp @@ -266,6 +266,7 @@ void GetLayerNormNDTilingInfo(const ge::Shape& srcShape, const uint32_t stackBuf const bool isReuseSource, const bool isComputeRstd, optiling::LayerNormSeparateTiling& tiling) { (void)isReuseSource; + (void)isComputeRstd; CheckLayerNormHostCommon("LayerNorm", "GetLayerNormNDTilingInfo", srcShape, typeSize); ASCENDC_HOST_ASSERT(isComputeRstd == true, return, "isComputeRstd current only support true."); ASCENDC_HOST_ASSERT(typeSize != 0, return, "typeSize can not be 0!"); diff --git a/lib/matmul/matmul_client.h b/lib/matmul/matmul_client.h index ea92ee56..68d30001 100644 --- a/lib/matmul/matmul_client.h +++ b/lib/matmul/matmul_client.h @@ -1510,7 +1510,7 @@ public: } #if defined(__DAV_C310__) - using ScaleT = float8_e8m0_t; + using ScaleT = fp8_e8m0_t; __aicore__ inline void SetTensorScaleA(const GlobalTensor& gm, bool isTransposeScaleA = false) { diff --git a/version.info b/version.info index 67b399c1..16081fbd 100644 --- a/version.info +++ b/version.info @@ -1 +1 @@ -Version=8.3.T5.0.B028 \ No newline at end of file +Version=8.3.T9.0.B066 \ No newline at end of file -- Gitee