diff --git a/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass_in_eager_style/CMakeLists.txt b/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass_in_eager_style/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..b7fe9dda55c3375cefde6fb7c0e363d1b32d2714 --- /dev/null +++ b/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass_in_eager_style/CMakeLists.txt @@ -0,0 +1,69 @@ +cmake_minimum_required(VERSION 3.5.1) +project(fuse_matmul_add_pass) + +set(CMAKE_SKIP_INSTALL_ALL_DEPENDENCY TRUE) + +if (DEFINED ENV{ASCEND_INSTALL_PATH}) + set(ASCEND_PATH $ENV{ASCEND_INSTALL_PATH}) +else () + set(ASCEND_PATH /home/hugo/ascend_pkg/toolkit/latest) +endif() + +set(INCLUDE_DIR ${ASCEND_PATH}/include) +aux_source_directory(${PROJECT_SOURCE_DIR}/src SRC_LIST) + +set(common_compile_options + -std=c++17 + -g + -Wall +) + +set(common_compile_definitions + _GLIBCXX_USE_CXX11_ABI=0 +) +message(STATUS "ASCEND_PATH: ${ASCEND_PATH}") + +add_subdirectory(gen_es_api) + +############ libfuse_matmul_add_pass.so ############ +add_library(${PROJECT_NAME} SHARED ${SRC_LIST}) + +set_target_properties(${PROJECT_NAME} PROPERTIES output_name ${PROJECT_NAME}) + +target_compile_options(${PROJECT_NAME} PRIVATE + ${common_compile_options} + +) + +target_compile_definitions(${PROJECT_NAME} PRIVATE + ${common_compile_definitions} +) + +target_include_directories(${PROJECT_NAME} PRIVATE + ${ASCEND_PATH}/opp/built-in/op_proto/inc + ${INCLUDE_DIR}/graph + ${INCLUDE_DIR}/ge + ${INCLUDE_DIR}/register + ${INCLUDE_DIR} + ${INCLUDE_DIR}/external + ${CMAKE_CURRENT_BINARY_DIR}/generated_ops +) + +target_link_directories(${PROJECT_NAME} PRIVATE + ${ASCEND_PATH}/compiler/lib64/stub + ${ASCEND_PATH}/compiler/lib64/ #todo delete eager_style_graph_builder_base change to stub +) +target_link_options(${PROJECT_NAME} PRIVATE +) +target_link_libraries(${PROJECT_NAME} PRIVATE + -Wl,--verbose + -Wl,--no-as-needed + graph + register + ge_compiler + esb_generated_shared + eager_style_graph_builder_base + -Wl,--as-needed +) + +add_dependencies(${PROJECT_NAME} esb_generated_shared) \ No newline at end of file diff --git a/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass_in_eager_style/data/tf_forward.py b/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass_in_eager_style/data/tf_forward.py new file mode 100644 index 0000000000000000000000000000000000000000..f1227ad49811afff49c7abd7681dfaf3d62309e2 --- /dev/null +++ b/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass_in_eager_style/data/tf_forward.py @@ -0,0 +1,63 @@ +""" +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import tensorflow as tf +import npu_bridge +from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig +import numpy as np + +def generate_tf_graph(): + a = tf.compat.v1.placeholder(tf.float32, shape=[2, 3], name='a') + b = tf.compat.v1.placeholder(tf.float32, shape=[3, 2], name='b') + matmul = tf.linalg.matmul(a, b, name="matmul") + c = tf.compat.v1.placeholder(tf.float32, shape=[2, 2], name='c') + add = tf.add(matmul, c, name="add") + return tf.compat.v1.get_default_graph() + +def NetworkRun(): + graph = generate_tf_graph() + input_a = graph.get_tensor_by_name('a:0') + input_b = graph.get_tensor_by_name('b:0') + input_c = graph.get_tensor_by_name('c:0') + output_nodes = graph.get_tensor_by_name('add:0') + a = np.array([[1.0, 2, 3], [4, 5, 6]]) + b = np.array([[1.0, 2], [3, 4], [5, 6]]) + c = np.array([[1.0, 1], [1, 1]]) + + # 适配npu + config = tf.compat.v1.ConfigProto() + custom_op = config.graph_options.rewrite_options.custom_optimizers.add() + + # 配置1:选择在昇腾AI处理器上执行推理 + custom_op.name = "NpuOptimizer" + custom_op.parameter_map["use_off_line"].b = True + + # 配置2:在线推理场景下建议保持默认值force_fp16,使用float16精度推理,以获得较优的性能 + custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("force_fp16") + + # 配置3:图执行模式,推理场景下请配置为0,训练场景下为默认1 + custom_op.parameter_map["graph_run_mode"].i = 0 + + # 配置4:关闭remapping和MemoryOptimizer + config.graph_options.rewrite_options.remapping = RewriterConfig.OFF + config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF + + with tf.compat.v1.Session(config=config, graph=graph) as sess: + out = sess.run(output_nodes, feed_dict={input_a:a, input_b:b, input_c:c}) + print('---out---\n', out) + +if __name__=='__main__': + NetworkRun() diff --git a/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass_in_eager_style/gen_es_api/CMakeLists.txt b/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass_in_eager_style/gen_es_api/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..60376b3fa427e1b72da7b3f64c77cc3afbafdb48 --- /dev/null +++ b/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass_in_eager_style/gen_es_api/CMakeLists.txt @@ -0,0 +1,74 @@ +message(STATUS "ASCEND_PATH: ${ASCEND_PATH}") + +########### generate eager stype api ###################### +set(CMAKE_CURRENT_BINARY_DIR ${PROJECT_SOURCE_DIR}/build) +message(STATUS "Binary directory: ${CMAKE_CURRENT_BINARY_DIR}") +set(common_compile_options + -std=c++17 + -g + -Wall +) + +set(common_compile_definitions + _GLIBCXX_USE_CXX11_ABI=0 +) +# 1. 创建输出目录 +file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/generated_ops) + +# 2. 在构建目录创建占位源文件 +file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/generated_ops/placeholder.cpp + "// Placeholder file - will be replaced during build\n" + "void placeholder_function() {}\n") + +# 3. 使用 file(GLOB) 获取文件列表(但使用 CONFIGURE_DEPENDS 特性) +file(GLOB GENERATED_CPP_FILES CONFIGURE_DEPENDS + "${CMAKE_CURRENT_BINARY_DIR}/generated_ops/*.cpp" +) +link_directories(${ASCEND_PATH}/libs) +# 4. 定义代码生成命令 +add_custom_command( + OUTPUT + ${CMAKE_CURRENT_BINARY_DIR}/generated_ops/generated_code.flag + COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_BINARY_DIR}/generated_ops/*.cpp + COMMAND ${CMAKE_COMMAND} -E env ASCEND_OPP_PATH=${ASCEND_PATH}/ops env LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${ASCEND_PATH}/lib64 ${ASCEND_PATH}/bin/gen_esb --output_dir=${CMAKE_CURRENT_BINARY_DIR}/generated_ops + COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/generated_ops/generated_code.flag + COMMENT "Generating eager style graph builder code..." + VERBATIM +) +# 5. 创建自定义目标触发代码生成 +add_custom_target(generate_es_api ALL + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/generated_ops/generated_code.flag) + +# 临时处理,后续run包发布了es api的so后,去掉这一步 +########### generate eager stype api so ###################### + +add_library(esb_generated_shared SHARED + ${CMAKE_CURRENT_BINARY_DIR}/generated_ops/placeholder.cpp + ${GENERATED_CPP_FILES} +) +set_source_files_properties(${GENERATED_CPP_FILES} + PROPERTIES + GENERATED TRUE +) +add_dependencies(esb_generated_shared generate_es_api) +target_compile_options(esb_generated_shared PRIVATE ${common_compile_options}) +target_compile_definitions(esb_generated_shared PRIVATE ${common_compile_definitions}) + +target_include_directories(esb_generated_shared PUBLIC + ${CMAKE_CURRENT_BINARY_DIR}/generated_ops + ${PROJECT_SOURCE_DIR}/gen_es_api/temp + ${ASCEND_PATH}/opp/built-in/op_proto/inc + ${INCLUDE_DIR}/graph + ${INCLUDE_DIR}/ge + ${INCLUDE_DIR}/register + ${INCLUDE_DIR} + ${INCLUDE_DIR}/external +) +target_link_directories(esb_generated_shared PRIVATE + ${ASCEND_PATH}/compiler/lib64/stub + ${ASCEND_PATH}/compiler/lib64/ +) +target_link_libraries(esb_generated_shared + PUBLIC + eager_style_graph_builder_base +) \ No newline at end of file diff --git a/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass_in_eager_style/gen_es_api/temp/compliant_node_builder.h b/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass_in_eager_style/gen_es_api/temp/compliant_node_builder.h new file mode 100644 index 0000000000000000000000000000000000000000..f08fda116d42a8ece38f2b422172009090478299 --- /dev/null +++ b/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass_in_eager_style/gen_es_api/temp/compliant_node_builder.h @@ -0,0 +1,303 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef AIR_CXX_COMPILER_GRAPH_EAGER_STYLE_GRAPH_BUILDER_COMPLIANT_NODE_BUILDER_H_ +#define AIR_CXX_COMPILER_GRAPH_EAGER_STYLE_GRAPH_BUILDER_COMPLIANT_NODE_BUILDER_H_ +#include +#include +#include +#include "graph/attr_value.h" +#include "graph/gnode.h" +#include "graph/operator.h" +#include "ge_common/ge_api_types.h" + +namespace ge { +/** + * @brief 从任意类型创建属性值的辅助函数 + * @tparam T 输入类型 + * @param t 输入值 + * @return 创建的属性值对象 + */ +template +AttrValue CreateFrom(T &&t) { + AttrValue av; + av.SetAttrValue(std::forward(t)); + return av; +} + +/** + * @brief 合规节点构建器类,用于构建符合IR规范的图节点 + * + * 该类提供了流式API来定义节点的IR输入、输出、属性和子图, + * 确保生成的节点符合图引擎的IR规范要求。 + */ +class CompliantNodeBuilder { + public: + /** + * @brief 任意类型操作符类,继承自Operator + * + * 提供了对Operator类中IR相关方法的访问,包括动态输入/输出注册、 + * 输入/输出注册、属性注册和子图注册等功能。 + */ + class AnyTypeOperator : public Operator { + public: + AnyTypeOperator(const char_t *name, const char_t *type) : Operator(name, type) {} + using Operator::DynamicInputRegister; + using Operator::InputRegister; + using Operator::OptionalInputRegister; + + using Operator::DynamicOutputRegister; + using Operator::OutputRegister; + + using Operator::AttrRegister; + using Operator::RequiredAttrWithTypeRegister; + + using Operator::SubgraphRegister; + }; + + /** + * @brief IR属性类型枚举 + */ + enum IrAttrType { + kEsAttrRequired, ///< 必需属性 + kEsAttrOptional ///< 可选属性 + }; + + /** + * @brief 子图类型枚举 + */ + enum SubgraphType { + kEsSubgraphStatic, ///< 静态子图 + kEsSubgraphDynamic, ///< 动态子图 + kEsSubgraphEnd + }; + + /** + * @brief IR输入类型枚举 + */ + enum IrInputType { + kEsIrInputRequired, ///< 必需输入 + kEsIrInputOptional, ///< 可选输入 + kEsIrInputDynamic, ///< 动态输入 + kEsIrInputTypeEnd + }; + + /** + * @brief IR输出类型枚举 + */ + enum IrOutputType { + kEsIrOutputRequired, ///< 必需输出 + kEsIrOutputDynamic, ///< 动态输出 + kEsIrOutputTypeEnd + }; + + /** + * @brief IR属性定义结构体 + */ + struct IrAttrDef { + std::string attr_name; + IrAttrType ir_attr_type; + std::string attr_data_type; // see `kIrAttrTypesMap` in `operator.cc` + AttrValue attr_default_value; + }; + + /** + * @brief IR输入定义结构体 + */ + struct IrInputDef { + std::string name; + IrInputType ir_input_type; + std::string symbol_id; + }; + + /** + * @brief IR输出定义结构体 + */ + struct IrOutputDef { + std::string name; + IrOutputType ir_output_type; + std::string symbol_id; + }; + + /** + * @brief IR子图定义结构体 + */ + struct IrSubgraphDef { + std::string name; + SubgraphType subgraph_type; + }; + + public: + /** + * @brief 构造函数 + * @param graph 所属的图对象 + */ + explicit CompliantNodeBuilder(ge::Graph *graph) : owner_graph_(graph) {} + + /** + * @brief 设置操作符类型 + * @param type 操作符类型字符串 + * @return 当前构建器对象的引用,支持链式调用 + */ + CompliantNodeBuilder &OpType(const char_t *type); + + /** + * @brief 定义IR输入规范 + * @param input_ir_def 输入IR定义向量 + * @return 当前构建器对象的引用,支持链式调用 + */ + CompliantNodeBuilder &IrDefInputs(std::vector input_ir_def); + + /** + * @brief 定义IR输出规范 + * @param output_ir_def 输出IR定义向量 + * @return 当前构建器对象的引用,支持链式调用 + */ + CompliantNodeBuilder &IrDefOutputs(std::vector output_ir_def); + + /** + * @brief 定义IR属性规范 + * @param attr_ir_def 属性IR定义向量 + * @return 当前构建器对象的引用,支持链式调用 + */ + CompliantNodeBuilder &IrDefAttrs(std::vector attr_ir_def); + + /** + * @brief 定义IR子图规范 + * @param subgraph_ir_def 子图IR定义向量 + * @return 当前构建器对象的引用,支持链式调用 + */ + CompliantNodeBuilder &IrDefSubgraphs(std::vector subgraph_ir_def); + + /** + * @brief 设置节点名称 + * @param name 节点名称 + * @return 当前构建器对象的引用,支持链式调用 + */ + CompliantNodeBuilder &Name(const char_t *name); + + /** + * @brief 设置动态输入实例数量 + * @param ir_name IR输入名称 + * @param num 实例数量 + * @return 当前构建器对象的引用,支持链式调用 + */ + CompliantNodeBuilder &InstanceDynamicInputNum(const char_t *ir_name, int32_t num); + + /** + * @brief 设置动态输出实例数量 + * @param ir_name IR输出名称 + * @param num 实例数量 + * @return 当前构建器对象的引用,支持链式调用 + */ + CompliantNodeBuilder &InstanceDynamicOutputNum(const char_t *ir_name, int32_t num); + + /** + * @brief 设置输出数据类型 + * @param name 输出名称 + * @param data_type 数据类型 + * @return 当前构建器对象的引用,支持链式调用 + */ + CompliantNodeBuilder &InstanceOutputDataType(const char_t *name, ge::DataType data_type); + + /** + * @brief 设置输出形状 + * @param name 输出名称 + * @param shape 形状向量 + * @return 当前构建器对象的引用,支持链式调用 + */ + CompliantNodeBuilder &InstanceOutputShape(const char_t *name, const std::vector &shape); + + /** + * @brief 设置输出原始形状 + * @param name 输出名称 + * @param shape 原始形状向量 + * @return 当前构建器对象的引用,支持链式调用 + */ + CompliantNodeBuilder &InstanceOutputOriginShape(const char_t *name, const std::vector &shape); + + /** + * @brief 设置输出存储形状 + * @param name 输出名称 + * @param shape 存储形状向量 + * @return 当前构建器对象的引用,支持链式调用 + */ + CompliantNodeBuilder &InstanceOutputStorageShape(const char_t *name, const std::vector &shape); + + /** + * @brief 设置输出格式 + * @param name 输出名称 + * @param format 格式 + * @return 当前构建器对象的引用,支持链式调用 + */ + CompliantNodeBuilder &InstanceOutputFormat(const char_t *name, ge::Format format); + + /** + * @brief 设置输出原始格式 + * @param name 输出名称 + * @param format 原始格式 + * @return 当前构建器对象的引用,支持链式调用 + */ + CompliantNodeBuilder &InstanceOutputOriginFormat(const char_t *name, ge::Format format); + + /** + * @brief 设置输出存储格式 + * @param name 输出名称 + * @param format 存储格式 + * @return 当前构建器对象的引用,支持链式调用 + */ + CompliantNodeBuilder &InstanceOutputStorageFormat(const char_t *name, ge::Format format); + + /** + * @brief 设置子图实例 + * @param name 子图名称 + * @param sub_graphs 子图向量 + * @return 当前构建器对象的引用,支持链式调用 + */ + CompliantNodeBuilder &InstanceSubgraphs(const char_t *name, const std::vector &sub_graphs); + + /** + * @brief 构建并返回图节点 + * @return 构建完成的图节点对象 + */ + ge::GNode Build() const; + + private: + void RegisterInputs(AnyTypeOperator &op) const; + void RegisterOutputs(AnyTypeOperator &op) const; + Status UpdateOutputDescs(AnyTypeOperator &op) const; + Status RegisterAttrs(AnyTypeOperator &op) const; + Status RegisterSubgraphs(AnyTypeOperator &op) const; + + private: + ge::Graph *owner_graph_{nullptr}; + + // IR定义相关成员 + std::string type_; + std::vector ir_def_inputs_; + std::vector ir_def_outputs_; + std::vector ir_def_attrs_; + std::vector ir_def_subgraphs_; + + // 实例信息相关成员 + std::string name_; + std::unordered_map dynamic_input_ir_names_to_inst_num_; + std::unordered_map dynamic_output_ir_names_to_inst_num_; + std::unordered_map output_names_to_td_; + std::unordered_map> ir_names_to_subgraphs_; +}; +} // namespace ge + +#endif // AIR_CXX_COMPILER_GRAPH_EAGER_STYLE_GRAPH_BUILDER_COMPLIANT_NODE_BUILDER_H_ diff --git a/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass_in_eager_style/gen_es_api/temp/es_c_graph_builder.h b/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass_in_eager_style/gen_es_api/temp/es_c_graph_builder.h new file mode 100644 index 0000000000000000000000000000000000000000..10f0df0fcae9bfa778257bda3f7ee761c5b68cd2 --- /dev/null +++ b/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass_in_eager_style/gen_es_api/temp/es_c_graph_builder.h @@ -0,0 +1,120 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef AIR_CXX_COMPILER_GRAPH_EAGER_STYLE_GRAPH_BUILDER_ESC_GRAPH_BUILDER_H_ +#define AIR_CXX_COMPILER_GRAPH_EAGER_STYLE_GRAPH_BUILDER_ESC_GRAPH_BUILDER_H_ +#include +#include +#include "graph/graph.h" +#include "graph/c_types.h" +#include "es_c_tensor_holder.h" +struct EsCGraphBuilder { + public: + /** + * @brief 默认构造函数,创建一个名为"graph"的图构建器 + */ + EsCGraphBuilder() : EsCGraphBuilder("graph") {} + + /** + * @brief 构造函数 + * @param name 图名称 + */ + explicit EsCGraphBuilder(const char *name); + + /** + * @brief 从图节点获取张量持有者 + * @param node 图节点 + * @param output_index 输出索引 + * @return 张量持有者指针 + */ + EsCTensorHolder *GetTensorHolderFromNode(const ge::GNode &node, int32_t output_index); + + /** + * @brief 在图的末尾添加图输入 + * @param name 输入名称,可选 + * @param type 输入类型,可选 + * @return 张量持有者指针 + */ + EsCTensorHolder *AppendGraphInput(const ge::char_t *name = nullptr, const ge::char_t *type = nullptr); + + /** + * @brief 在指定位置添加图输入 + * @param index 输入索引位置 + * @param name 输入名称,可选 + * @param type 输入类型,可选 + * @param data_type 数据类型,默认为C_DT_FLOAT + * @param format 数据格式,默认为C_FORMAT_ND + * @param dims 维度数组,可选 + * @param dim_num 维度数量,默认为0 + * @return 张量持有者指针 + */ + EsCTensorHolder *AddGraphInput(int32_t index, const ge::char_t *name = nullptr, const ge::char_t *type = nullptr, + C_DataType data_type = C_DT_FLOAT, C_Format format = C_FORMAT_ND, + const int64_t *dims = nullptr, int64_t dim_num = 0); + + /** + * @brief 设置图输出 + * @param tensor 张量持有者 + * @param output_index 输出索引 + * @return 操作状态 + */ + ge::Status SetGraphOutput(EsCTensorHolder *tensor, int32_t output_index); + + /** + * @brief 获取内部的图对象 + * @return 图对象指针 + */ + ge::Graph *GetGraph(); + + /** + * @brief 构建并返回图对象 + * @return 构建完成的图对象 + */ + std::unique_ptr BuildGraph(); + + /** + * @brief 生成节点名称 + * @param node_type 节点类型 + * @return 生成的节点名称字符串 + */ + ge::AscendString GenerateNodeName(const ge::char_t *node_type); + + private: + /** + * @brief 检查图是否有效 + * @return 图是否有效 + */ + bool IsGraphValid() const; + + /** + * @brief 从图节点获取张量持有者的内部实现 + * @param node 图节点 + * @param output_index 输出索引 + * @return 张量持有者指针 + */ + EsCTensorHolder *GetTensorHolderFromNodeInner(const ge::GNode &node, int32_t output_index); + + private: + int64_t NextNodeIndex(); + + private: + std::unique_ptr graph_; + std::list> tensors_holder_; + std::set graph_input_indexes_; + std::map output_indexes_to_tensor_; + int64_t nodes_num_; +}; + +#endif // AIR_CXX_COMPILER_GRAPH_EAGER_STYLE_GRAPH_BUILDER_ESC_GRAPH_BUILDER_H_ diff --git a/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass_in_eager_style/gen_es_api/temp/es_c_tensor_holder.h b/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass_in_eager_style/gen_es_api/temp/es_c_tensor_holder.h new file mode 100644 index 0000000000000000000000000000000000000000..db624bea2e2d54710458067f368f2902e9e5f262 --- /dev/null +++ b/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass_in_eager_style/gen_es_api/temp/es_c_tensor_holder.h @@ -0,0 +1,121 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AIR_CXX_COMPILER_GRAPH_EAGER_STYLE_EAGER_STYLE_GRAPH_BUILDER_ESC_TENSOR_HOLDER_H_ +#define AIR_CXX_COMPILER_GRAPH_EAGER_STYLE_EAGER_STYLE_GRAPH_BUILDER_ESC_TENSOR_HOLDER_H_ +#include "graph/gnode.h" +#include "ge_common/ge_api_types.h" +#include "ge_common/ge_api_error_codes.h" +struct EsCGraphBuilder; +struct EsCTensorHolder { + public: + /** + * @brief 构造函数 + * @param owner 所属的图构建器 + * @param producer 生产者节点 + * @param index 输出索引 + * + * 注意:调用者需要保证传入的`producer`不为空,`index`合法 + */ + EsCTensorHolder(EsCGraphBuilder &owner, const ge::GNode &producer, int32_t index) + : owner_graph_builder_(owner), producer_(producer), producer_out_index_(index) {} + + /** + * @brief 设置数据类型 + * @param data_type 数据类型 + * @return 操作状态 + */ + ge::Status SetDataType(const ge::DataType data_type); + + /** + * @brief 设置数据格式 + * @param format 数据格式 + * @return 操作状态 + */ + ge::Status SetFormat(const ge::Format format); + + /** + * @brief 设置原始数据格式 + * @param format 原始数据格式 + * @return 操作状态 + */ + ge::Status SetOriginFormat(const ge::Format format); + + /** + * @brief 设置存储数据格式 + * @param format 存储数据格式 + * @return 操作状态 + */ + ge::Status SetStorageFormat(const ge::Format format); + + /** + * @brief 设置原始形状 + * @param shape 原始形状 + * @return 操作状态 + */ + ge::Status SetOriginShape(const ge::Shape &shape); + + /** + * @brief 设置存储形状 + * @param shape 存储形状 + * @return 操作状态 + */ + ge::Status SetStorageShape(const ge::Shape &shape); + + /** + * @brief 设置形状(同时设置原始形状和存储形状) + * @param shape 形状 + * @return 操作状态 + */ + ge::Status SetShape(const ge::Shape &shape); + + /** + * @brief 设置原始符号形状 + * @param shape_str 形状字符串数组 + * @param shape_str_num 形状字符串数量 + * @return 操作状态 + */ + ge::Status SetOriginSymbolShape(const char *const *shape_str, const int64_t shape_str_num); + + /** + * @brief 获取生产者节点 + * @return 生产者节点引用 + */ + ge::GNode &GetProducer(); + + /** + * @brief 获取输出索引 + * @return 输出索引 + */ + int32_t GetOutIndex() const; + + /** + * @brief 获取所属的图构建器 + * @return 图构建器引用 + */ + EsCGraphBuilder &GetOwnerBuilder(); + + private: + ge::TensorDesc GetTd(); + + void UpdateTd(const ge::TensorDesc &td); + + EsCGraphBuilder &owner_graph_builder_; + ge::GNode producer_; + int32_t producer_out_index_; +}; + +#endif // AIR_CXX_COMPILER_GRAPH_EAGER_STYLE_EAGER_STYLE_GRAPH_BUILDER_ESC_TENSOR_HOLDER_H_ diff --git a/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass_in_eager_style/readme.md b/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass_in_eager_style/readme.md new file mode 100644 index 0000000000000000000000000000000000000000..05016ec67ea14d50a24c6a65245b74557182a845 --- /dev/null +++ b/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass_in_eager_style/readme.md @@ -0,0 +1,119 @@ +# 样例使用指导 + +## 功能描述 + +本样例为MatMul+Add融合为GEMM自定义pass样例,使用TF在线推理演示框架如何调用自定义pass完成图优化。 +本样例使用eager style api和融合接口实现 + +## 目录结构 + +``` +├── src +│ ├──fuse_matmul_add_pass.cpp // pass实现文件 +├── CMakeLists.txt // 编译脚本 +├── data +| ├──tf_forward.py // TF在线构出原图后进行自定义pass和其他框架内置pass优化,然后执行优化后的图得到结果 +|—— gen_es_api +| |——temp // 临时目录,待头文件发布后删除 +| | |——compliant_node_builder.h +| | |——es_c_graph_builder.h +| | |——es_c_tensor_holder.h +| |——CMakeLists.txt // 生成eager style api的编译脚本 +``` + +## 环境要求 + +- 操作系统及架构:CentOS x86系统、CentOS aarch64系统、Euleros x86系统、Euleros aarch64系统 +- 编译器:gcc7及以上 +- 芯片:all +- python及依赖的库:python3.7.5、tensorflow1.15.0 +- 已完成昇腾AI软件栈在开发环境上的部署 + + +## 程序编译 + +1. 根据实际情况修改**CMakeLists.txt**文件中的如下信息。 + + - ASCEND_PATH:指定到ATC或FwkACLlib的安装目录,例如/home/HwHiAiUser/Ascend/ascend-toolkit/latest + + - target_include_directories:需要包含的头文件,对于本示例,无需修改。如果是用户自行开发的代码,当需要添加头文件时,在示例下方直接增加行即可,注意不要删除原有项目。如果网络中有自定义算子,请增加自定义算子的原型定义头文件。 + + - target_link_libraries:需要链接的库,对于本示例,无需修改。如果是用户自行开发的代码,当需要添加链接库时,在示例下方直接增加行即可,注意不要删除原有项目。 + + >禁止链接软件包中的其他so,否则后续升级可能会导致兼容性问题。 + +2. 执行如下命令 生成eager style api + + 依次执行: + + ``` + mkdir build && cd build + cmake .. + make generate_es_api + ``` + 执行后,在**build**目录下产生generated_ops目录,内含es构图api的头文件及源码 +3. 执行如下命令生成es api对应的so,并拷贝到run包安装路径下 + 注意:此为临时步骤,待run包中发布了es api so,该步骤可删除 + ``` + make esb_generated_shared + ``` + 编译完成后,在**build**/gen_es_api目录下生成libesb_generated_shared.so + 将so拷贝到ASCEND_PATH下lib64目录下 + ``` + cp gen_es_api/libesb_generated_shared.so ${ASCEND_PATH}/x86_64-linux/lib64 + ln -sf ${ASCEND_PATH}/x86_64-linux/lib64/libesb_generated_shared.so ${ASCEND_PATH}/latest/compiler/lib64/libesb_generated_shared.so + ``` + 若为arm环境,目标路径调整为 + ``` + cp gen_es_api/libesb_generated_shared.so ${ASCEND_PATH}/aarch64-linux/lib64 + ln -sf ${ASCEND_PATH}/aarch64-linux/lib64/libesb_generated_shared.so ${ASCEND_PATH}/latest/compiler/lib64/libesb_generated_shared.so + ``` +4. 执行如下命令编译自定义pass so + ``` + make + ``` + 编译结束后,在**build**目录下生成动态库文件**libfuse_matmul_add_pass.so**。 + 将**libfuse_matmul_add_pass.so**拷贝到$\{ASCEND\_PATH\}/opp/vendors/xxx/custom\_fusion\_passes/目录下。其中“xxx”为用户自定义目录。 + +## 程序运行 + +1. 配置环境变量。 + + - 若运行环境上安装的“Ascend-cann-toolkit”包,环境变量设置如下: + + ``` + . ${HOME}/Ascend/ascend-toolkit/set_env.sh + ``` + + “$HOME/Ascend”请替换相关软件包的实际安装路径。 + +2. 使用TF在线推理。 + + - 在线推理分别在目标文件夹下存放和不存放自定义pass so,执行如下命令: + + **python3.7.5 tf_forward.py** + + 两次运行结果相同,结果展示: + + ``` + ---out--- + [[23. 29.] + [50. 65.]] + ``` + + - 检查执行结果: + + - 自定义pass生效前后运行结果相同。 + + - 自定义pass生效时对比npu编译过程中间dump图**ge_onnx_xxxxxxxx_graph_1_RunCustomPassBegin.pbtxt**和**ge_onnx_xxxxxxxx_graph_1_RunCustomPassEnd.pbtxt**发现模型已按照预期被优化。 + + - 日志中出现如下打印: + + ``` + EagerFuseMatMulAndAddPass begin. + Found MatMulAndAdd in graph. + Replace MatMulAndAdd to GEMM SUCCESS. + EagerFuseMatMulAndAddPass end. + ``` + + \ No newline at end of file diff --git a/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass_in_eager_style/src/fuse_matmul_add_pass.cpp b/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass_in_eager_style/src/fuse_matmul_add_pass.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d9e0687da595e2907e87674ddffb97197595da84 --- /dev/null +++ b/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass_in_eager_style/src/fuse_matmul_add_pass.cpp @@ -0,0 +1,66 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "register_custom_pass.h" +#include "es_all_ops.h" +#include "pattern_matcher.h" +#include "graph_rewriter.h" +#include "pattern_fusion_pass.h" +#include "fusion_pass_reg.h" + +using namespace ge; +using namespace fusion; +// |o>----------------------------------- +// |o> a b +// |o> \ / a b c +// |o> MatMul c ==> \ | / +// |o> \ / GEMM +// |o> Add +// |o>----------------------------------- +// 融合说明:本例识别上图中左边的MatMul+Add结构并通过图修改接口替换为右边的单个GEMM节点 +// 改图接口返回值说明:本文件中的改图接口需要判断返回值, 基于可读性考虑除了pass入口函数外其他函数中的改图接口只接收返回值 +// 但不增加返回值处理代码。如需判断返回值,可配合使用custom_context.SetErrorMessage("xxx")方法 +class FuseMatMulAndAddInEagerPass : public PatternFusionPass { +protected: + std::vector> PatternGraphs() override { + std::vector> pattern_graphs; + // build pattern graph + auto graph_builder = es::EsGraphBuilder("pattern"); + auto a = graph_builder.CreateInput(0); + auto b = graph_builder.CreateInput(1); + auto c = graph_builder.CreateInput(2); + + auto matmul = es::MatMulV2(a, b, nullptr, nullptr, false, false, false); + auto add = es::Add(c, matmul); + pattern_graphs.emplace_back(graph_builder.Build({add})); + return pattern_graphs; + } + bool MeetRequirements(const std::unique_ptr &match_result) override { + return true; + } + std::unique_ptr Replacement(const std::unique_ptr &match_result) override { + auto replace_graph_builder = es::EsGraphBuilder("replacement"); + auto r_a = replace_graph_builder.CreateInput(0); + auto r_b = replace_graph_builder.CreateInput(1); + auto r_c = replace_graph_builder.CreateInput(2); + auto alpha_const = replace_graph_builder.CreateScalar(1); + auto beta_const = replace_graph_builder.CreateScalar(1); + auto gemm = es::GEMM(r_a, r_b, r_c, alpha_const, beta_const); + return replace_graph_builder.Build({gemm}); + } +}; +REG_FUSION_PASS(FuseMatMulAndAddInEagerPass).Stage(CustomPassStage::kBeforeInferShape); \ No newline at end of file