From 47cffec33e926f1d3cbd3d13cfe8d9ae68b11637 Mon Sep 17 00:00:00 2001 From: huanruizhi Date: Fri, 13 Nov 2020 15:03:35 +0800 Subject: [PATCH] dynamic dims --- tf_adapter/interface_spec/api_npu_config.pyh | 4 +- tf_adapter/kernels/geop_npu.cc | 111 +++++++++++++++++- tf_adapter/kernels/geop_npu.h | 8 ++ .../optimizers/om_partition_subgraphs_pass.cc | 15 +++ .../npu_bridge/estimator/npu/npu_config.py | 8 +- .../npu_bridge/estimator/npu/npu_estimator.py | 8 +- tf_adapter/util/npu_attrs.cc | 51 +++++--- tf_adapter/util/npu_attrs.h | 1 + 8 files changed, 179 insertions(+), 27 deletions(-) diff --git a/tf_adapter/interface_spec/api_npu_config.pyh b/tf_adapter/interface_spec/api_npu_config.pyh index 51f311f98..1e963266c 100644 --- a/tf_adapter/interface_spec/api_npu_config.pyh +++ b/tf_adapter/interface_spec/api_npu_config.pyh @@ -10,7 +10,7 @@ class NPURunConfig(run_config_lib.RunConfig): dump_config=None, stream_max_parallel_num=None, is_tailing_optimization=False, horovod_mode=False, graph_run_mode=1, op_debug_level=0, enable_scope_fusion_passes=None, enable_exception_dump=0, op_select_implmode=None, optypelist_for_implmode=None, dynamic_input_config=None, - mstune_mode=None, work_path=None, buffer_optimize=None, enable_small_channel=0, fusion_switch_file=None, + mstune_mode=None, work_path=None, buffer_optimize="l2_optimize", enable_small_channel=0, fusion_switch_file=None, enable_compress_weight=False, compress_weight_conf=None, op_compiler_cache_mode=None, op_compiler_cache_dir=None, debug_dir=None): @@ -22,4 +22,4 @@ class DumpConfig(): dump_mode="output", enable_dump_debug=False, dump_debug_mode="all"): class DynamicInputConfig(): - def __init__(self, input_shape, dynamic_dims): \ No newline at end of file + def __init__(self, input_shape, dynamic_dims, dynamic_node_type): \ No newline at end of file diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc index 20da57128..7269b1fae 100644 --- a/tf_adapter/kernels/geop_npu.cc +++ b/tf_adapter/kernels/geop_npu.cc @@ -54,6 +54,7 @@ limitations under the License. #include "tensorflow/core/framework/node_def_util.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/graph/node_builder.h" #include "tensorflow/core/lib/strings/str_util.h" #include "framework/common/ge_inner_error_codes.h" @@ -141,12 +142,20 @@ Status BuildOutputTensorInfo(OpKernelContext *ctx, std::vector, uint32_t> &p1, const std::pair, uint32_t> &p2) { return p1.second < p2.second; } +bool CmpVecValue(Node *node1, Node *node2) { + if (node1 == nullptr || node2 == nullptr) { + LOG(ERROR) << "node1 or node2 is nullptr."; + return false; + } + return node1->name() < node2->name(); +} +} // namespace + std::string CurrentTimeInStr() { std::time_t now = std::time(nullptr); std::tm *ptm = std::localtime(&now); @@ -723,6 +732,10 @@ void GeOp::BuildGraphDef(OpKernelContext *ctx, DoneCallback done, const Function Graph graph(OpRegistry::Global()); OP_REQUIRES_OK_ASYNC(ctx, InferShapeUtil::InferShape(input_vec, &flib_def, &func_def, &graph), done); + bool is_set_dynamic_config = !sess_options_["ge.inputShape"].empty() && !sess_options_["ge.dynamicDims"].empty() && + !sess_options_["ge.dynamicNodeType"].empty(); + if (is_set_dynamic_config) { BuildShapeNodeAndCacheArgNodes(graph); } + bool is_tuning = !mstune_mode_.empty() && !work_path_.empty(); for (Node *node : graph.nodes()) { AddNodeAttrs(node, is_initialize); @@ -749,10 +762,104 @@ void GeOp::BuildGraphDef(OpKernelContext *ctx, DoneCallback done, const Function } } } - + // set input_shape to dynamic nodes shape desc + if (is_set_dynamic_config) { ChangeInputsShapeDesc(ctx, done); } graph.ToGraphDef(&graph_def); } +void GeOp::BuildShapeNodeAndCacheArgNodes(Graph &graph) { + std::string dynamic_node_type = sess_options_["ge.dynamicNodeType"]; + for (Node *node : graph.nodes()) { + // add shape node to get getnext node real shape + if (dynamic_node_type == "0" && node->type_string() == "IteratorGetNext") { + dynamic_shape_nodes_.emplace_back(node); + int i = 0; + for (auto out_edge : node->out_edges()) { + if (!out_edge->IsControlEdge()) { + std::string shape_name = "getnext_shape_" + std::to_string(i); + Node *shape_node = nullptr; + TF_CHECK_OK(NodeBuilder(shape_name, "Shape") + .Input(node, out_edge->src_output()) + .Device(node->def().device()) + .Finalize(&graph, &shape_node)); + std::string identity_name = "shape_identity_" + std::to_string(i); + Node *identity_node = nullptr; + TF_CHECK_OK(NodeBuilder(identity_name, "Identity") + .Input(shape_node, 0) + .Device(shape_node->def().device()) + .Finalize(&graph, &identity_node)); + } + i++; + } + } + // count data args and getnext args for dynamic dims + if (node->type_string() == "_Arg") { + if (node->name().find("IteratorGetNext_") != std::string::npos) { + if (dynamic_node_type == "0") { dynamic_shape_nodes_.emplace_back(node); } + } else { + if (dynamic_node_type == "1") { dynamic_shape_nodes_.emplace_back(node); } + } + } + } + // sort dynamic nodes to match input_shapes + std::sort(dynamic_shape_nodes_.begin(), dynamic_shape_nodes_.end(), CmpVecValue); +} + +void GeOp::ChangeInputsShapeDesc(OpKernelContext *ctx, DoneCallback done) { + std::vector result; + std::string input_shapes = sess_options_["ge.inputShape"]; + Split(input_shapes, result, ";"); //e.g. result:["data:2,3", "data1:3,4"] + + if (dynamic_shape_nodes_.size() == 1 && dynamic_shape_nodes_[0]->type_string() == "IteratorGetNext") { + LOG(INFO) << "[GEOP] change " << dynamic_shape_nodes_[0]->name() << " shape desc."; + NodeDef &node_def = const_cast(dynamic_shape_nodes_[0]->def()); + AttrValue &output_tensor_descs = (*node_def.mutable_attr())[OUTPUT_DESC]; + for (size_t i = 0; i < dynamic_shape_nodes_[0]->num_outputs(); ++i) { + AttrValue attr_shape_value; + SetShapesToOutputDesc(result, i, attr_shape_value); + (*output_tensor_descs.mutable_list()->mutable_func(i)->mutable_attr())[SERIALIZE_SHAPE] = attr_shape_value; + } + } else { + if (!dynamic_shape_nodes_.empty()) { + OP_REQUIRES_ASYNC(ctx, dynamic_shape_nodes_.size() == result.size(), + errors::Internal("input_shape is not match inputs num in graph"), done); + } + for (size_t i = 0; i < dynamic_shape_nodes_.size(); ++i) { + LOG(INFO) << "[GEOP] change " << dynamic_shape_nodes_[i]->name() << " shape desc."; + NodeDef &node_def = const_cast(dynamic_shape_nodes_[i]->def()); + AttrValue &output_tensor_descs = (*node_def.mutable_attr())[OUTPUT_DESC]; + AttrValue attr_shape_value; + SetShapesToOutputDesc(result, i, attr_shape_value); + (*output_tensor_descs.mutable_list()->mutable_func(0)->mutable_attr())[SERIALIZE_SHAPE] = attr_shape_value; + } + } + LOG(INFO) << "[GEOP] change input shapes desc success."; +} + +void GeOp::SetShapesToOutputDesc(const std::vector &input_shapes, + const int &index, AttrValue &attr_shape_value) { + if (input_shapes.empty()) { + LOG(ERROR) << "[GEOP] input_shapes is empty."; + return; + } + if (index < 0) { + LOG(ERROR) << "[GEOP] index must more than 0."; + return; + } + LOG(INFO) << "[GEOP] get input: " << index << " input shape is: " << input_shapes[index]; + std::vector shape; + Split(input_shapes[index], shape, ":"); // e.g. shape:["data", "2,3,4"] + if (shape.empty() || shape.size() != 2) { + LOG(ERROR) << "[GEOP] shape is empty or shape size is not 2."; + return; + } + std::vector dims; + Split(shape[1], dims, ","); // e.g. dims:["2", "3", "4"] + for (auto dim : dims) { + attr_shape_value.mutable_list()->add_i(std::atoi(dim.c_str())); + } +} + Status GeOp::BuildInputTensorInfo(OpKernelContext *ctx, std::vector &inputs) { // ctx is not nullptr int num_inputs = ctx->num_inputs(); diff --git a/tf_adapter/kernels/geop_npu.h b/tf_adapter/kernels/geop_npu.h index 5d37b6ec2..a178b61de 100644 --- a/tf_adapter/kernels/geop_npu.h +++ b/tf_adapter/kernels/geop_npu.h @@ -90,6 +90,13 @@ class GeOp : public AsyncOpKernel { void GetMsTuneConfig(std::map init_options); + void SetShapesToOutputDesc(const std::vector &input_shapes, + const int &index, AttrValue &attr_shape_value); + + void BuildShapeNodeAndCacheArgNodes(Graph &graph); + + void ChangeInputsShapeDesc(OpKernelContext *ctx, DoneCallback done); + private: static const std::string INPUT_DESC; static const std::string OUTPUT_DESC; @@ -129,6 +136,7 @@ class GeOp : public AsyncOpKernel { void *handle_; MsTuningFunc tuning_api_; string auto_tune_mode_; + std::vector dynamic_shape_nodes_; }; } // namespace tensorflow #endif // TENSORFLOW_KERNELS_GEOP_NPU_H_ diff --git a/tf_adapter/optimizers/om_partition_subgraphs_pass.cc b/tf_adapter/optimizers/om_partition_subgraphs_pass.cc index 7b0fc231d..151f21f9f 100644 --- a/tf_adapter/optimizers/om_partition_subgraphs_pass.cc +++ b/tf_adapter/optimizers/om_partition_subgraphs_pass.cc @@ -1764,6 +1764,13 @@ Status OMPartitionSubgraphsPass::ProcessGraph(std::unique_ptr *graph, Fun LOG(INFO) << "OMPartition subgraph_" << std::to_string(graph_num) << " begin."; LOG(INFO) << "mix_compile_mode is " << (mix_compile_mode ? "True" : "False"); LOG(INFO) << "iterations_per_loop is " << iterations_per_loop; + LOG(INFO) << "input_shape: " << all_options["input_shape"] + << "dynamic_dims: " << all_options["dynamic_dims"]; + bool is_set_dynamic_config = !all_options["input_shape"].empty() && + !all_options["dynamic_dims"].empty(); + if (is_set_dynamic_config && mix_compile_mode) { + LOG(FATAL) << "dynamic config can not use with mix compile."; + } char *need_print = getenv("PRINT_MODEL"); @@ -1777,6 +1784,7 @@ Status OMPartitionSubgraphsPass::ProcessGraph(std::unique_ptr *graph, Fun string graph_format_value; Graph *graphIn = graph->get(); + int getnext_node_count = 0; for (Node *node : graphIn->op_nodes()) { if (node->type_string() == "NPUInit") { std::string attr_name; @@ -1816,6 +1824,13 @@ Status OMPartitionSubgraphsPass::ProcessGraph(std::unique_ptr *graph, Fun } graphIn->RemoveNode(node); } + if (is_set_dynamic_config && node->type_string() == "IteratorGetNext") { + getnext_node_count++; + } + } + if (getnext_node_count > 1) { + LOG(FATAL) << "dynamic dims func can not support graph with " + << getnext_node_count << " IteratorGetNext node."; } for (Node *node : graphIn->op_nodes()) { diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py index bbd12754d..26d147b8c 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py @@ -54,7 +54,7 @@ class NPURunConfig(run_config_lib.RunConfig): dynamic_input_config=None, mstune_mode=None, work_path=None, - buffer_optimize=None, + buffer_optimize="l2_optimize", enable_small_channel=0, fusion_switch_file=None, enable_compress_weight=False, @@ -268,7 +268,7 @@ class NpuExecutePlacement(Enum): class DynamicInputConfig(): """dynamic dims and input shape config with npu support""" - def __init__(self, input_shape, dynamic_dims): + def __init__(self, input_shape, dynamic_dims, dynamic_node_type): """ Constructs a DynamicInputConfig. @@ -276,6 +276,8 @@ class DynamicInputConfig(): input_shape: the network's inputs shapes. dynamic_dims: This parameter corresponds to input_shape. The dim value in dims corresponds to the parameter "-1" in input_shape. + dynamic_node_type: Dataset or placeholder is dynamic input. type: 0 or 1. """ self._input_shape = input_shape - self._dynamic_dims = dynamic_dims \ No newline at end of file + self._dynamic_dims = dynamic_dims + self._dynamic_node_type = dynamic_node_type \ No newline at end of file diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py index 7aaa14d80..db6e64f43 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py @@ -672,11 +672,13 @@ class NPUEstimator(estimator_lib.Estimator): custom_op: Customer optimizers. """ - if config._dynamic_input_config is not None \ - and config._dynamic_input_config._input_shape is not None \ - and config._dynamic_input_config._dynamic_dims is not None: + if (config._dynamic_input_config is not None and + config._dynamic_input_config._input_shape is not None and + config._dynamic_input_config._dynamic_dims is not None and + config._dynamic_input_config._dynamic_node_type is not None): custom_op.parameter_map["input_shape"].s = tf.compat.as_bytes(config._dynamic_input_config._input_shape) custom_op.parameter_map["dynamic_dims"].s = tf.compat.as_bytes(config._dynamic_input_config._dynamic_dims) + custom_op.parameter_map["dynamic_node_type"].i = config._dynamic_input_config._dynamic_node_type def __load_mstune_config(self, config, custom_op): """Load mstune config ,and add to custom_optimizers diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index 45af4d65e..a893246ef 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -65,7 +65,7 @@ Status GetEnvDeviceID(uint32_t &device_id) { } return Status::OK(); } -inline void split(const std::string &s, std::vector &result, const char *delchar = " ") { +void Split(const std::string &s, std::vector &result, const char *delchar) { if (s.empty()) { return; } result.clear(); char *buffer = new char[s.size() + 1]; @@ -91,7 +91,7 @@ inline bool checkProfilingOptions(string &options) { validOptions.insert("op_trace"); std::vector optionVec; - split(options, optionVec, ":"); + Split(options, optionVec, ":"); if (optionVec.empty()) { return false; } for (const auto &option : optionVec) { if (validOptions.find(option) == validOptions.end()) { return false; } @@ -105,12 +105,12 @@ inline Status checkDumpStep(const string &dump_step) { std::vector match_vecs; std::regex pattern(R"((\d{1,}-\d{1,}\||\d{1,}\|)+)"); if (regex_match(tmp_dump_step, result, pattern)) { - split(result.str(), match_vecs, "|"); + Split(result.str(), match_vecs, "|"); // 100 is the max sets of dump steps. if (match_vecs.size() > 100) { return errors::InvalidArgument("dump_step only support dump <= 100 sets of data"); } for (const auto &match_vec : match_vecs) { std::vector tmp_vecs; - split(match_vec, tmp_vecs, "-"); + Split(match_vec, tmp_vecs, "-"); if (tmp_vecs.size() > 1) { if (std::atoi(tmp_vecs[0].c_str()) >= std::atoi(tmp_vecs[1].c_str())) { return errors::InvalidArgument("in range steps, the first step is >= " @@ -199,13 +199,14 @@ std::map NpuAttrs::GetSessOptions(OpKernelConstruction std::string is_tailing_optimization = std::to_string(false); std::string op_select_implmode; std::string optypelist_for_implmode; - string input_shape; - string dynamic_dims; - std::string buffer_optimize; + std::string buffer_optimize = "l2_optimize"; std::string enable_small_channel = "0"; std::string fusion_switch_file; std::string enable_compress_weight = std::to_string(false); std::string compress_weight_conf; + std::string input_shape; + std::string dynamic_dims; + std::string dynamic_node_type; if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { ctx->GetAttr("_variable_format_optimize", &variable_format_optimize); @@ -244,6 +245,7 @@ std::map NpuAttrs::GetSessOptions(OpKernelConstruction ctx->GetAttr("_fusion_switch_file", &fusion_switch_file); ctx->GetAttr("_enable_compress_weight", &enable_compress_weight); ctx->GetAttr("_compress_weight_conf", &compress_weight_conf); + ctx->GetAttr("_dynamic_node_type", &dynamic_node_type); } // session options @@ -268,6 +270,7 @@ std::map NpuAttrs::GetSessOptions(OpKernelConstruction sess_options["ge.fusionSwitchFile"] = fusion_switch_file; sess_options["ge.enableCompressWeight"] = enable_compress_weight; sess_options["compress_weight_conf"] = compress_weight_conf; + sess_options["ge.dynamicNodeType"] = dynamic_node_type; return sess_options; } @@ -510,11 +513,12 @@ std::map NpuAttrs::GetAllAttrOptions(AttrSlice attrs) string fp_point; std::string op_select_implmode; std::string optypelist_for_implmode; - string input_shape; - string dynamic_dims; + std::string input_shape; + std::string dynamic_dims; + std::string dynamic_node_type; string mstune_mode; string work_path; - std::string buffer_optimize; + std::string buffer_optimize = "l2_optimize"; std::string enable_small_channel = "0"; std::string fusion_switch_file; std::string enable_compress_weight = std::to_string(false); @@ -608,6 +612,9 @@ std::map NpuAttrs::GetAllAttrOptions(AttrSlice attrs) } if (attrs.Find("_input_shape") != nullptr) { input_shape = attrs.Find("_input_shape")->s(); } if (attrs.Find("_dynamic_dims") != nullptr) { dynamic_dims = attrs.Find("_dynamic_dims")->s(); } + if (attrs.Find("_dynamic_node_type") != nullptr) { + dynamic_node_type = attrs.Find("_dynamic_node_type")->s(); + } if (attrs.Find("_mstune_mode") != nullptr) { mstune_mode = attrs.Find("_mstune_mode")->s(); } if (attrs.Find("_work_path") != nullptr) { work_path = attrs.Find("_work_path")->s(); } if (attrs.Find("_buffer_optimize") != nullptr) { buffer_optimize = attrs.Find("_buffer_optimize")->s(); } @@ -674,6 +681,7 @@ std::map NpuAttrs::GetAllAttrOptions(AttrSlice attrs) all_options["optypelist_for_implmode"] = optypelist_for_implmode; all_options["input_shape"] = input_shape; all_options["dynamic_dims"] = dynamic_dims; + all_options["dynamic_node_type"] = dynamic_node_type; all_options["mstune_mode"] = mstune_mode; all_options["work_path"] = work_path; all_options["buffer_optimize"] = buffer_optimize; @@ -739,11 +747,12 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options int enable_exception_dump = 0; string op_select_implmode; string optypelist_for_implmode; - string input_shape; - string dynamic_dims; + std::string input_shape; + std::string dynamic_dims; + int dynamic_node_type = -1; string mstune_mode; string work_path; - std::string buffer_optimize; + std::string buffer_optimize = "l2_optimize"; int enable_small_channel = 0; std::string fusion_switch_file; bool enable_compress_weight = false; @@ -869,12 +878,19 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options if (!s.ok()) { LOG(FATAL) << s.error_message(); } optypelist_for_implmode = params.at("optypelist_for_implmode").s(); } - if (params.count("input_shape") && params.count("dynamic_dims")) { + if (params.count("input_shape") && params.count("dynamic_dims") && + params.count("dynamic_node_type")) { input_shape = params.at("input_shape").s(); dynamic_dims = params.at("dynamic_dims").s(); - } else if ((params.count("input_shape") && !params.count("dynamic_dims")) || - (!params.count("input_shape") && params.count("dynamic_dims"))) { - LOG(FATAL) << "input_shape and dynamic_dims should be paired."; + dynamic_node_type = params.at("dynamic_node_type").i(); + if (dynamic_node_type < 0 || dynamic_node_type > 1) { + LOG(FATAL) << "dynamic_node_type should be 0 or 1."; + } + } else if (!params.count("input_shape") && !params.count("dynamic_dims") && + !params.count("dynamic_node_type")) { + // the three parameters are not set normally. + } else { + LOG(FATAL) << "input_shape, dynamic_dims and dynamic_node_type should use together." } if (params.count("buffer_optimize")) { buffer_optimize = params.at("buffer_optimize").s(); @@ -919,6 +935,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options sess_options["optypelist_for_implmode"] = optypelist_for_implmode; sess_options["input_shape"] = input_shape; sess_options["dynamic_dims"] = dynamic_dims; + sess_options["dynamic_node_type"] = std::to_string(dynamic_node_type); sess_options["buffer_optimize"] = buffer_optimize; sess_options["enable_small_channel"] = std::to_string(enable_small_channel); sess_options["fusion_switch_file"] = fusion_switch_file; diff --git a/tf_adapter/util/npu_attrs.h b/tf_adapter/util/npu_attrs.h index 9e37c0beb..3d83f7cc8 100644 --- a/tf_adapter/util/npu_attrs.h +++ b/tf_adapter/util/npu_attrs.h @@ -40,6 +40,7 @@ limitations under the License. // single load all npu mode namespace tensorflow { Status GetEnvDeviceID(uint32_t &device_id); +void Split(const std::string &s, std::vector &result, const char *delchar = " "); class NpuAttrs { public: // This method returns instance Pointers -- Gitee