diff --git a/akg-mlir/cmake/external_libs/ascendnpu_ir.cmake b/akg-mlir/cmake/external_libs/ascendnpu_ir.cmake index 00d23527a952931670a5f2b94489aa397666d74d..229cf92fad9d60098d12c54025a3bd434688fa6e 100644 --- a/akg-mlir/cmake/external_libs/ascendnpu_ir.cmake +++ b/akg-mlir/cmake/external_libs/ascendnpu_ir.cmake @@ -24,7 +24,8 @@ akg_add_pkg(ascendnpu_ir VER 0.4.0 HEAD_ONLY bishengir/include URL ${AscendNpuIR_URL} - MD5 ${AscendNpuIR_MD5}) + MD5 ${AscendNpuIR_MD5} + PATCHES ${CMAKE_SOURCE_DIR}/third-party/patch/ascendnpu-ir/hacc.patch) execute_process(COMMAND chmod -R +x ${bisheng_ir_ROOT}/bin WORKING_DIRECTORY ${bisheng_ir_ROOT}/bin diff --git a/akg-mlir/compiler/lib/Dialect/Affine/Transforms/TilingFunc.cpp b/akg-mlir/compiler/lib/Dialect/Affine/Transforms/TilingFunc.cpp index 3c55d0065e1ed7082278877d61d9072e74119fbd..062640e351183c77606c88b7a13974713900334c 100644 --- a/akg-mlir/compiler/lib/Dialect/Affine/Transforms/TilingFunc.cpp +++ b/akg-mlir/compiler/lib/Dialect/Affine/Transforms/TilingFunc.cpp @@ -41,6 +41,9 @@ #include "mlir/Pass/Pass.h" #include "mlir/Transforms/Passes.h" +// HACC dialect enums/attrs +#include "bishengir/Dialect/HACC/IR/HACC.h" + #define DEBUG_TYPE "tiling-func" namespace mlir { @@ -51,21 +54,26 @@ namespace mlir { namespace mlir::affine { +// keep only non-enum mock keys that are still used namespace mockattr { -static constexpr const char *kFunctionKind = "hacc.function_kind"; -static constexpr const char *kHostFuncType = "hacc.host_func_type"; static constexpr const char *kEnableAutoMarkBufferSize = "enable_auto_mark_buffer_size"; static constexpr const char *kBlockDim = "hacc.block_dim"; -static constexpr const char *kTilingFunction = "hacc.tiling_function"; static constexpr const char *kFusionKind = "hfusion.fusion_kind"; -static constexpr const char *kDevice = "DEVICE"; -static constexpr const char *kHost = "HOST"; -static constexpr const char *kHostTilingFunction = "tiling_function"; static constexpr const char *kFusionKindPureElemwise = "PURE_ELEMWISE"; } // namespace mockattr namespace { +using hacc::HACCFuncType; +using hacc::HACCFuncTypeAttr; +using hacc::HostFuncType; +using hacc::HostFuncTypeAttr; +using hacc::InputIdxAttr; +using hacc::KernelArgType; +using hacc::KernelArgTypeAttr; +using hacc::OutputIdxAttr; +using hacc::TilingFunctionAttr; + struct AutoTilingOptions { unsigned blockDim = 40; }; @@ -107,8 +115,15 @@ class TilingBase { protected: LogicalResult runPreTilingProcedure(OpBuilder &) { + if (auto *ctx = originalKernel_.getContext()) { + ctx->getOrLoadDialect(); + } + kernelInfo_->baseKernelName = originalKernel_.getSymName().str(); kernelInfo_->blockDim = options_.blockDim; + // annotate original as HOST (enum attr) + auto *ctx = originalKernel_.getContext(); + originalKernel_->setAttr("hacc.function_kind", HACCFuncTypeAttr::get(ctx, HACCFuncType::HOST)); return success(); } @@ -155,25 +170,28 @@ class TilingBase { auto host = builder.create(originalKernel_.getLoc(), name, funcTy); host.addEntryBlock(); - host->setAttr(mockattr::kFunctionKind, StringAttr::get(builder.getContext(), mockattr::kHost)); - host->setAttr(mockattr::kHostFuncType, StringAttr::get(builder.getContext(), mockattr::kHostTilingFunction)); + // enum attrs for host function kind and host func type + host->setAttr("hacc.function_kind", HACCFuncTypeAttr::get(builder.getContext(), HACCFuncType::HOST)); + host->setAttr("hacc.host_func_type", HostFuncTypeAttr::get(builder.getContext(), HostFuncType::kTilingFunction)); unsigned nInputs = origTy.getNumInputs(); unsigned nResults = origTy.getNumResults(); + auto *ctx = builder.getContext(); for (unsigned i = 0; i < nInputs; ++i) { - host.setArgAttr(i, "hacc.arg_type", StringAttr::get(builder.getContext(), "input")); - host.setArgAttr(i, "hacc.input_idx", builder.getI64IntegerAttr(i)); + host.setArgAttr(i, "hacc.arg_type", KernelArgTypeAttr::get(ctx, KernelArgType::kInput)); + host.setArgAttr(i, "hacc.input_idx", InputIdxAttr::get(ctx, i)); } for (unsigned i = 0; i < nResults; ++i) { unsigned argIdx = nInputs + i; - host.setArgAttr(argIdx, "hacc.arg_type", StringAttr::get(builder.getContext(), "output")); - host.setArgAttr(argIdx, "hacc.output_idx", builder.getI64IntegerAttr(i)); + host.setArgAttr(argIdx, "hacc.arg_type", KernelArgTypeAttr::get(ctx, KernelArgType::kOutput)); + host.setArgAttr(argIdx, "hacc.output_idx", OutputIdxAttr::get(ctx, i)); } - host.setResultAttr(0, "hacc.arg_type", StringAttr::get(builder.getContext(), "tiling_key")); + // result attributes: first as sync block lock, others as tiling key (as in file B) + host.setResultAttr(0, "hacc.arg_type", KernelArgTypeAttr::get(ctx, KernelArgType::kSyncBlockLock)); for (unsigned i = 1; i < kN; ++i) { - host.setResultAttr(i, "hacc.arg_type", StringAttr::get(builder.getContext(), "tiling_data")); + host.setResultAttr(i, "hacc.arg_type", KernelArgTypeAttr::get(ctx, KernelArgType::kTilingKey)); } builder.setInsertionPointToEnd(&host.getBody().front()); @@ -219,14 +237,15 @@ class TilingBase { } void setHaccIOArgAttrs(func::FuncOp f, unsigned nInputs, unsigned nOutputs, OpBuilder &builder) { + auto *ctx = builder.getContext(); for (unsigned i = 0; i < nInputs; ++i) { - f.setArgAttr(i, "hacc.arg_type", StringAttr::get(builder.getContext(), "input")); - f.setArgAttr(i, "hacc.input_idx", builder.getI64IntegerAttr(i)); + f.setArgAttr(i, "hacc.arg_type", KernelArgTypeAttr::get(ctx, KernelArgType::kInput)); + f.setArgAttr(i, "hacc.input_idx", InputIdxAttr::get(ctx, i)); } for (unsigned i = 0; i < nOutputs; ++i) { unsigned argIdx = nInputs + i; - f.setArgAttr(argIdx, "hacc.arg_type", StringAttr::get(builder.getContext(), "output")); - f.setArgAttr(argIdx, "hacc.output_idx", builder.getI64IntegerAttr(i)); + f.setArgAttr(argIdx, "hacc.arg_type", KernelArgTypeAttr::get(ctx, KernelArgType::kOutput)); + f.setArgAttr(argIdx, "hacc.output_idx", OutputIdxAttr::get(ctx, i)); } } @@ -239,13 +258,16 @@ class TilingBase { setHaccIOArgAttrs(deviceFunc, nInputs, nOutputs, builder); deviceFunc->setAttr(mockattr::kEnableAutoMarkBufferSize, builder.getUnitAttr()); - deviceFunc->setAttr(mockattr::kFunctionKind, StringAttr::get(builder.getContext(), mockattr::kDevice)); + // enum attrs for function kind + deviceFunc->setAttr("hacc.function_kind", HACCFuncTypeAttr::get(builder.getContext(), HACCFuncType::DEVICE)); deviceFunc->setAttr(mockattr::kFusionKind, StringAttr::get(builder.getContext(), mockattr::kFusionKindPureElemwise)); deviceFunc->setAttr(mockattr::kBlockDim, builder.getI64IntegerAttr(blockDim)); deviceFunc->setAttr("hacc.entry", builder.getUnitAttr()); if (hostTiling) { - deviceFunc->setAttr(mockattr::kTilingFunction, FlatSymbolRefAttr::get(hostTiling.getSymNameAttr())); + deviceFunc->setAttr( + "hacc.tiling_function", + TilingFunctionAttr::get(builder.getContext(), FlatSymbolRefAttr::get(hostTiling.getSymNameAttr()))); } return deviceFunc; } @@ -523,7 +545,11 @@ class TilingBase { auto funcTy = FunctionType::get(module.getContext(), TypeRange{}, TypeRange{builder.getI64Type()}); auto host = builder.create(deviceFunc.getLoc(), hostName, funcTy); host.setVisibility(SymbolTable::Visibility::Public); - host->setAttr(mockattr::kFunctionKind, StringAttr::get(builder.getContext(), mockattr::kHost)); + // enum attrs for host util func + auto *ctx = builder.getContext(); + host->setAttr("hacc.function_kind", HACCFuncTypeAttr::get(ctx, HACCFuncType::HOST)); + host->setAttr("hacc.host_func_type", + HostFuncTypeAttr::get(ctx, HostFuncType::kInferSyncBlockLockNumFunction)); Block *entry = host.addEntryBlock(); OpBuilder b = OpBuilder::atBlockEnd(entry); @@ -560,8 +586,8 @@ struct TilingFunc : public mlir::impl::TilingFuncBase { SmallVector kernels; module.walk([&](func::FuncOp f) { - if (auto kind = f->getAttrOfType(mockattr::kFunctionKind); - !kind || kind.getValue() == mockattr::kDevice) { + if (auto kind = f->getAttrOfType("hacc.function_kind"); + !kind || kind.getValue() == "DEVICE") { kernels.push_back(f); } }); diff --git a/akg-mlir/third-party/patch/ascendnpu-ir/hacc.patch b/akg-mlir/third-party/patch/ascendnpu-ir/hacc.patch new file mode 100644 index 0000000000000000000000000000000000000000..3b96bcb1a97a79bb1fc673e1a7203bde29a1b500 --- /dev/null +++ b/akg-mlir/third-party/patch/ascendnpu-ir/hacc.patch @@ -0,0 +1,257 @@ +diff --git a/home/weijiao/workspace/akg_project/akg_1124_test/akg/akg-mlir/HACCAttrs.td b/home/weijiao/workspace/akg_project/akg_1124_test/akg/akg-mlir/build/ascendnpu_ir/bishengir/include/bishengir/Dialect/HACC/IR/HACCAttrs.td +index 4fefb9c3..d98f8ef3 100644 +--- a/bishengir/include/bishengir/Dialect/HACC/IR/HACCAttrs.td ++++ b/bishengir/include/bishengir/Dialect/HACC/IR/HACCAttrs.td +@@ -25,6 +25,7 @@ include "bishengir/Dialect/HACC/IR/HACCBase.td" + include "mlir/IR/AttrTypeBase.td" + include "mlir/IR/EnumAttr.td" + include "mlir/IR/OpBase.td" ++include "mlir/Dialect/DLTI/DLTIAttrs.td" + include "mlir/Dialect/LLVMIR/LLVMOpBase.td" + include "mlir/Dialect/SCF/IR/DeviceMappingInterface.td" + +@@ -62,4 +63,244 @@ def HACC_FuncTypeAttr : HACC_Attr<"HACCFuncType", "function_kind"> { + }]; + } + ++//===----------------------------------------------------------------------===// ++// HACC To LLVM Translation Attribute ++//===----------------------------------------------------------------------===// ++ ++def HACC_ToLLVMIRTranslateAttr : HACC_I32Enum<"HACCToLLVMIRTranslateAttr", "", [ ++ // For Device Entry Functions ++ I32EnumAttrCase<"ENTRY", 0, "hacc.entry">, ++ // For Mix Device Entry Functions ++ I32EnumAttrCase<"MIX_ENTRY", 1, "hacc.mix_entry">, ++ I32EnumAttrCase<"ALWAYS_INLINE", 2, "hacc.always_inline">, ++]> { ++ let cppNamespace = "::mlir::hacc"; ++} ++ ++//===----------------------------------------------------------------------===// ++// Kernel Argument Type ++//===----------------------------------------------------------------------===// ++ ++def HACC_kFFTSBaseAddr : I32EnumAttrCase<"kFFTSBaseAddr", 0, "ffts_base_address">; ++def HACC_kInput : I32EnumAttrCase<"kInput", 1, "input">; ++def HACC_kOutput : I32EnumAttrCase<"kOutput", 2, "output">; ++def HACC_kInputAndOutput : I32EnumAttrCase<"kInputAndOutput", 3, "input_and_output">; ++def HACC_kWorkspace : I32EnumAttrCase<"kWorkspace", 4, "workspace">; ++def HACC_kSyncBlockLock : I32EnumAttrCase<"kSyncBlockLock", 5, "sync_block_lock">; ++def HACC_kTilingKey : I32EnumAttrCase<"kTilingKey", 6, "tiling_key">; ++def HACC_kTilingData : I32EnumAttrCase<"kTilingData", 7, "tiling_data">; ++def HACC_kTilingStruct : I32EnumAttrCase<"kTilingStruct", 8, "tiling_struct">; ++def HACC_kMeshArg : I32EnumAttrCase<"kMeshArg", 9, "mesh_arg">; ++def HACC_kSanitizerAddr : I32EnumAttrCase<"kSanitizerAddr", 10, "sanitizer_addr">; ++ ++def HACC_KernelArgTypeEnum : HACC_I32Enum<"KernelArgType", "HACC Kernel Arg Category", [ ++ HACC_kFFTSBaseAddr, ++ HACC_kInput, ++ HACC_kOutput, ++ HACC_kInputAndOutput, ++ HACC_kWorkspace, ++ HACC_kSyncBlockLock, ++ HACC_kTilingKey, ++ HACC_kTilingData, ++ HACC_kTilingStruct, ++ HACC_kMeshArg, ++ HACC_kSanitizerAddr, ++]> {} ++ ++def HACC_KernelArgTypeAttr : HACC_Attr<"KernelArgType", "arg_type"> { ++ let parameters = (ins EnumParameter:$arg_type); ++ let assemblyFormat = "`<` params `>`"; ++ let description = [{ ++ HACC kernel arg type. ++ }]; ++} ++ ++//===----------------------------------------------------------------------===// ++// Host Function Type ++//===----------------------------------------------------------------------===// ++ ++// TODO: Refactor into HACC_ToLLVMIRTranslateAttr ++def HACC_kEntry : I32EnumAttrCase<"kEntry", 1, "host_entry">; ++def HACC_kTilingFunction : I32EnumAttrCase<"kTilingFunction", 2, "tiling_function">; ++def HACC_kInferOutputShapeFunction : I32EnumAttrCase<"kInferOutputShapeFunction", 3, "infer_output_shape_function">; ++def HACC_kInferWorkspaceShapeFunction : I32EnumAttrCase<"kInferWorkspaceShapeFunction", 4, "infer_workspace_shape_function">; ++def HACC_kInferSyncBlockLockNumFunction : I32EnumAttrCase<"kInferSyncBlockLockNumFunction", 5, "infer_sync_block_lock_num_function">; ++def HACC_kInferSyncBlockLockInitFunction : I32EnumAttrCase<"kInferSyncBlockLockInitFunction", 6, "infer_sync_block_lock_init_function">; ++def HACC_kGetTilingStructSizeFunction : I32EnumAttrCase<"kGetTilingStructSizeFunction", 7, "get_tiling_struct_size_function">; ++def HACC_HostFuncTypeEnum : HACC_I32Enum<"HostFuncType", "HACC Host function type", [ ++ HACC_kEntry, ++ HACC_kTilingFunction, ++ HACC_kInferOutputShapeFunction, ++ HACC_kInferWorkspaceShapeFunction, ++ HACC_kInferSyncBlockLockNumFunction, ++ HACC_kInferSyncBlockLockInitFunction, ++ HACC_kGetTilingStructSizeFunction, ++]> {} ++ ++def HACC_HostFuncTypeAttr : HACC_Attr<"HostFuncType", "host_func_type"> { ++ let parameters = (ins EnumParameter:$host_func_type); ++ let assemblyFormat = "`<` params `>`"; ++ let description = [{ ++ HACC host func type. ++ }]; ++} ++ ++//===----------------------------------------------------------------------===// ++// Host and Device Function Binding ++//===----------------------------------------------------------------------===// ++ ++class HACC_FuncRefAttr ++ : HACC_Attr { ++ let parameters = (ins AttrParameter<"::mlir::FlatSymbolRefAttr", ++ "function symbol name">:$funcName); ++ let assemblyFormat = "`<` $funcName `>`"; ++ let builders = [TypeBuilder<(ins "StringRef":$funcName), [{ ++ return Base::get($_ctxt, ++ ::mlir::FlatSymbolRefAttr::get($_ctxt, std::move(funcName))); ++ }]>]; ++ let extraClassDeclaration = [{ ++ StringRef getFuncNameStr() { ++ return getFuncName().getValue(); ++ } ++ }]; ++} ++ ++def HACC_TilingFunctionAttr ++ : HACC_FuncRefAttr<"TilingFunction", "tiling_function"> { ++ let description = [{ ++ HACC tiling function attribute. This is used to indicate the host ++ tiling function of the device function. ++ }]; ++} ++ ++def HACC_InferOutputShapeFunctionAttr ++ : HACC_FuncRefAttr<"InferOutputShapeFunction", ++ "infer_output_shape_function"> { ++ let description = [{ ++ HACC infer output shape function attribute. This is used to indicate the host ++ infer output shape function of the device function. ++ }]; ++} ++ ++def HACC_InferWorkspaceShapeFunctionAttr ++ : HACC_FuncRefAttr<"InferWorkspaceShapeFunction", ++ "infer_workspace_shape_function"> { ++ let description = [{ ++ HACC infer workspace shape function attribute. This is used to indicate the host ++ infer workspace shape function of the device function. ++ }]; ++} ++ ++def HACC_InferSyncBlockLockNumFunctionAttr ++ : HACC_FuncRefAttr<"InferSyncBlockLockNumFunction", ++ "infer_sync_block_lock_num_function"> { ++ let description = [{ ++ HACC infer sync block lock num function attribute. This is used to indicate the host ++ infer sync block lock num function of the device function. ++ ++ For 1 atomic op, all the blocks share 1 <1xi64> type memref on gm, which is called a lock, ++ and this function is used to infer how many locks the kernel needs. ++ }]; ++} ++ ++def HACC_InferSyncBlockLockInitFunctionAttr ++ : HACC_FuncRefAttr<"InferSyncBlockLockInitFunction", ++ "infer_sync_block_lock_init_function"> { ++ let description = [{ ++ HACC infer sync block lock init function attribute. This is used to indicate the host ++ get sync block lock init value for the device function. ++ ++ Every lock needs to be initialized before running the kernel, ++ this function is used to infer the initial value of the lock. ++ }]; ++} ++ ++def HACC_GetTilingStructSizeFunctionAttr ++ : HACC_FuncRefAttr<"GetTilingStructSizeFunction", ++ "get_tiling_struct_size_function"> { ++ let description = [{ ++ HACC get tiling struct size function attribute. This is used to indicate the host ++ get tiling struct size function of the device function. ++ }]; ++} ++ ++//===----------------------------------------------------------------------===// ++// Device Target and Specification ++//===----------------------------------------------------------------------===// ++ ++def HACC_TargetAttr : HACC_Attr<"Target", "target"> { ++ let description = [{ ++ This attribute is used to indicate the target device. ++ }]; ++ let parameters = (ins ++ AttrParameter<"StringAttr", "target device">:$target ++ ); ++ let assemblyFormat = "`<` $target `>`"; ++} ++ ++def HACC_DeviceSpecEnum : ++ HACC_I32Enum<"DeviceSpec", "HACC device spec", [ ++ I32EnumAttrCase<"AI_CORE_COUNT", 0>, ++ I32EnumAttrCase<"CUBE_CORE_COUNT", 1>, ++ I32EnumAttrCase<"VECTOR_CORE_COUNT", 2>, ++ I32EnumAttrCase<"UB_SIZE", 3>, ++ I32EnumAttrCase<"L1_SIZE", 4>, ++ I32EnumAttrCase<"L0A_SIZE", 5>, ++ I32EnumAttrCase<"L0B_SIZE", 6>, ++ I32EnumAttrCase<"L0C_SIZE", 7>, ++ I32EnumAttrCase<"UB_ALIGN_SIZE", 8>, ++ I32EnumAttrCase<"L1_ALIGN_SIZE", 9>, ++ I32EnumAttrCase<"L0C_ALIGN_SIZE", 10> ++]> { ++ let description = [{ ++ HACC `DeviceSpec` enum class defines the list of hardware specification ++ that might be useful to compiler. ++ }]; ++} ++ ++def HACC_InputIdxAttr : HACC_Attr<"InputIdx", "input_idx"> { ++ let parameters = (ins "unsigned":$argIdx); ++ let assemblyFormat = "`<` params `>`"; ++} ++ ++def HACC_OutputIdxAttr : HACC_Attr<"OutputIdx", "output_idx"> { ++ let parameters = (ins "unsigned":$argIdx); ++ let assemblyFormat = "`<` params `>`"; ++ let description = [{ ++ On NPU device, the kernel function's calling convention is that the output ++ is passed in as a input parameter. ++ ++ This attribute is used to indicate which output value does the current ++ function argument corresponds to. ++ }]; ++} ++ ++def HACC_ExternalFunctionPathAttr ++ : HACC_Attr<"ExternalFunctionPath", "external_function_path"> { ++ let description = [{ ++ This attribute is used to indicate the location of external functions. ++ }]; ++} ++ ++def HACC_CachedIOAttr ++ : HACC_Attr<"CachedIO", "cached_io"> { ++ let description = [{ ++ This attribute is used to indicate the value has been cached io. ++ }]; ++} ++ ++def HACC_NoIOAliasAttr ++ : HACC_Attr<"NoIOAlias", "no_io_alias"> { ++ let description = [{ ++ This attribute is used to indicate the function inputs/outputs are ++ strictly not alias. ++ }]; ++} ++ ++def HACC_BlockDimAttr : HACC_Attr<"BlockDim", "block_dim"> { ++ let description = [{ ++ HACC block dimension attribute for function. ++ }]; ++} ++ + #endif // BISHENGIR_DIALECT_HACC_IR_HACCATTRS_TD