From 3d14d6a836255997fc7a80c34c91214824bbd745 Mon Sep 17 00:00:00 2001 From: xiaguozheng Date: Fri, 20 Jun 2025 11:24:54 +0800 Subject: [PATCH] all tensor not empty --- tf_adapter/interface_spec/api_npu_config.pyh | 2 +- tf_adapter/interface_spec/api_npu_plugin.pyh | 2 +- .../npu_bridge/estimator/npu/npu_config.py | 5 ++++- .../npu_bridge/estimator/npu/npu_estimator.py | 2 ++ .../npu_bridge/estimator/npu/npu_plugin.py | 5 ++++- tf_adapter/util/ge_plugin.cc | 1 + tf_adapter/util/npu_attrs.cc | 21 +++++++++++++++++++ .../npu_device/core/npu_wrapper.cpp | 3 ++- .../python/npu_device/configs/npu_config.py | 1 + .../tests/stub/include/stub/defines.h | 1 + 10 files changed, 38 insertions(+), 5 deletions(-) diff --git a/tf_adapter/interface_spec/api_npu_config.pyh b/tf_adapter/interface_spec/api_npu_config.pyh index a26565778..25d938728 100644 --- a/tf_adapter/interface_spec/api_npu_config.pyh +++ b/tf_adapter/interface_spec/api_npu_config.pyh @@ -23,7 +23,7 @@ class NPURunConfig(run_config_lib.RunConfig): frozen_variable=False, variable_placement="Device", jit_compile="auto", precision_mode_v2=None, ac_parallel_enable=None, quant_dumpable=None, input_fusion_size=131072, compile_dynamic_mode=None, execute_times=-1, graph_max_parallel_model_num=1, export_compile_stat=1, aicore_num=None, - oo_constant_folding=True, input_batch_cpy=False, shape_generalization_mode="STRICT"): + oo_constant_folding=True, input_batch_cpy=False, shape_generalization_mode="STRICT", all_tensor_not_empty=0): class ProfilingConfig(): def __init__(self, enable_profiling=False, profiling_options=None): diff --git a/tf_adapter/interface_spec/api_npu_plugin.pyh b/tf_adapter/interface_spec/api_npu_plugin.pyh index 2cd0a1608..bf91016a0 100644 --- a/tf_adapter/interface_spec/api_npu_plugin.pyh +++ b/tf_adapter/interface_spec/api_npu_plugin.pyh @@ -7,6 +7,6 @@ def npu_resource_init(graph_run_mode=1, op_debug_level=0, enable_profiling=False op_compiler_cache_mode=None, op_compiler_cache_dir=None, debug_dir=None, hcom_multi_mode=False, distribute_config=None, aoe_config_file=None, precision_mode_v2=None, export_compile_stat=1, aicore_num=None, oo_constant_folding=True, - input_batch_cpy=False): + input_batch_cpy=False, all_tensor_not_empty=0): def npu_resource_shutdown(): diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py index e0e32ff6c..e1bea6ba7 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py @@ -121,7 +121,8 @@ class NPURunConfig(run_config_lib.RunConfig): aicore_num=None, oo_constant_folding=True, input_batch_cpy=False, - shape_generalization_mode="STRICT" + shape_generalization_mode="STRICT", + all_tensor_not_empty=0 ): """ Constructs a NPUConfig. @@ -198,6 +199,7 @@ class NPURunConfig(run_config_lib.RunConfig): STRICT: default, use the input shape; FULL: full generalization; ADAPTIVE: generalizes the varying axes. + all_tensor_not_empty: default is: 0. """ # Check iterations_per_loop. @@ -301,6 +303,7 @@ class NPURunConfig(run_config_lib.RunConfig): self._oo_constant_folding = oo_constant_folding self._input_batch_cpy = input_batch_cpy self._shape_generalization_mode = shape_generalization_mode + self._all_tensor_not_empty = all_tensor_not_empty super(NPURunConfig, self).__init__( model_dir=model_dir, tf_random_seed=tf_random_seed, diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py index 965965680..af455b563 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py @@ -854,6 +854,8 @@ class NPUEstimator(estimator_lib.Estimator): if config._shape_generalization_mode is not None: custom_op.parameter_map["shape_generalization_mode"].s = tf.compat.as_bytes( config._shape_generalization_mode) + if config._all_tensor_not_empty is not None: + custom_op.parameter_map["all_tensor_not_empty"].i = config._all_tensor_not_empty self.__load_session_device_id(config, custom_op) self.__load_modify_mixlist(config, custom_op) diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py index 97571455e..b7b9e6db5 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py @@ -76,7 +76,8 @@ def npu_resource_init(graph_run_mode=1, export_compile_stat=1, aicore_num=None, oo_constant_folding=True, - input_batch_cpy=False): + input_batch_cpy=False, + all_tensor_not_empty=0): """Initialize NPU resource""" util.check_nonnegative_integer(graph_run_mode, "graph_run_mode") check_graph_run_mode(graph_run_mode) @@ -126,6 +127,8 @@ def npu_resource_init(graph_run_mode=1, init["ge.exportCompileStat"] = str(export_compile_stat) if aicore_num is not None: init["ge.aicoreNum"] = str(aicore_num) + if all_tensor_not_empty is not None: + init["ge.exec.allTensorNotEmpty"] = str(all_tensor_not_empty) if oo_constant_folding is not None: util.check_bool_type(oo_constant_folding, "oo_constant_folding") init["ge.oo.constantFolding"] = "true" if oo_constant_folding is True else "false" diff --git a/tf_adapter/util/ge_plugin.cc b/tf_adapter/util/ge_plugin.cc index cea5f60cd..694c6e5f6 100644 --- a/tf_adapter/util/ge_plugin.cc +++ b/tf_adapter/util/ge_plugin.cc @@ -130,6 +130,7 @@ void SetOptionNameMap(json &option_name_map) { option_name_map.emplace(ge::OPTION_EXEC_DYNAMIC_INPUT, "dynamic_input"); option_name_map.emplace(ge::AICORE_NUM, "aicore_num"); option_name_map.emplace("ge.inputBatchCpy", "input_batch_cpy"); + option_name_map.emplace(ge::OPTION_ALL_TENSOR_NOT_EMPTY, "all_tensor_not_empty"); } } // namespace diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index ca54a7d15..61b219b6c 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -495,6 +495,7 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr std::string graph_max_parallel_model_num = "1"; std::string input_batch_cpy; std::string jit_compile; + std::string all_tensor_not_empty = "0"; if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { (void) ctx->GetAttr("_variable_format_optimize", &variable_format_optimize); (void) ctx->GetAttr("_hcom_parallel", &hcom_parallel); @@ -572,6 +573,7 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr } (void) ctx->GetAttr("_graph_compiler_cache_dir", &graph_compiler_cache_dir); (void) ctx->GetAttr("_input_batch_cpy", &input_batch_cpy); + (void) ctx->GetAttr("_all_tensor_not_empty", &all_tensor_not_empty); } // session options @@ -643,6 +645,8 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr } sess_options["ge.inputBatchCpy"] = input_batch_cpy; sess_options["input_batch_cpy"] = input_batch_cpy; + sess_options[ge::OPTION_ALL_TENSOR_NOT_EMPTY] = all_tensor_not_empty; + sess_options["all_tensor_not_empty"] = all_tensor_not_empty; SetForbiddenClosePassOn(sess_options); return sess_options; } @@ -709,6 +713,7 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr std::string aicore_num; std::string oo_constant_folding; std::string input_batch_cpy; + std::string all_tensor_not_empty = "0"; if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { (void) ctx->GetAttr("_precision_mode", &precision_mode); @@ -755,6 +760,7 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr (void) ctx->GetAttr("_aicore_num", &aicore_num); (void) ctx->GetAttr("_oo_constant_folding", &oo_constant_folding); (void) ctx->GetAttr("_input_batch_cpy", &input_batch_cpy); + (void) ctx->GetAttr("_all_tensor_not_empty", &all_tensor_not_empty); } std::lock_guard lock(mutex_); @@ -819,6 +825,8 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr } init_options_["aicore_num"] = aicore_num; init_options_["ge.aicoreNum"] = aicore_num; + init_options_[ge::OPTION_ALL_TENSOR_NOT_EMPTY] = all_tensor_not_empty; + init_options_["all_tensor_not_empty"] = all_tensor_not_empty; if (!oo_constant_folding.empty()) { init_options_["oo_constant_folding"] = oo_constant_folding; init_options_["ge.oo.constantFolding"] = oo_constant_folding; @@ -1263,6 +1271,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & std::string oo_constant_folding; std::string input_batch_cpy; std::string shape_generalization_mode = "STRICT"; + std::string all_tensor_not_empty = "0"; auto NpuOptimizer_value = attrs.Find("_NpuOptimizer"); auto enable_data_pre_proc_value = attrs.Find("_enable_data_pre_proc"); @@ -1365,6 +1374,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & auto oo_constant_folding_value = attrs.Find("_oo_constant_folding"); auto input_batch_cpy_value = attrs.Find("_input_batch_cpy"); auto shape_generalization_mode_value = attrs.Find("_shape_generalization_mode"); + auto all_tensor_not_empty_value = attrs.Find("_all_tensor_not_empty"); if (NpuOptimizer_value != nullptr) { do_npu_optimizer = "1"; if (enable_data_pre_proc_value != nullptr) { @@ -1691,6 +1701,9 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & if (shape_generalization_mode_value != nullptr) { shape_generalization_mode = shape_generalization_mode_value->s(); } + if (all_tensor_not_empty_value != nullptr) { + all_tensor_not_empty = all_tensor_not_empty_value->s(); + } } all_options["variable_format_optimize"] = variable_format_optimize; @@ -1810,6 +1823,8 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & } all_options["aicore_num"] = aicore_num; all_options["ge.aicoreNum"] = aicore_num; + all_options[ge::OPTION_ALL_TENSOR_NOT_EMPTY] = all_tensor_not_empty; + all_options["all_tensor_not_empty"] = all_tensor_not_empty; if (!oo_constant_folding.empty()) { all_options["oo_constant_folding"] = oo_constant_folding; all_options["ge.oo.constantFolding"] = oo_constant_folding; @@ -1947,6 +1962,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options bool oo_constant_folding = true; bool input_batch_cpy = false; std::string shape_generalization_mode = "STRICT"; + int64_t all_tensor_not_empty = 0; const RewriterConfig &rewrite_options = options.session_options->config.graph_options().rewrite_options(); for (const auto &custom_optimizer : rewrite_options.custom_optimizers()) { if (custom_optimizer.name() == "NpuOptimizer") { @@ -2521,6 +2537,9 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options init_options_["aicore_num"] = aicore_num; init_options_["ge.aicoreNum"] = aicore_num; } + if ((params.count("all_tensor_not_empty") > 0)) { + all_tensor_not_empty = params.at("all_tensor_not_empty").i(); + } // input_batch_cpy if (params.count("input_batch_cpy") > 0) { input_batch_cpy = params.at("input_batch_cpy").b(); @@ -2618,6 +2637,8 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options sess_options["input_fusion_size"] = std::to_string(input_fusion_size); sess_options["input_batch_cpy"] = std::to_string(input_batch_cpy); sess_options["ge.inputBatchCpy"] = std::to_string(input_batch_cpy); + sess_options["all_tensor_not_empty"] = std::to_string(all_tensor_not_empty); + sess_options[ge::OPTION_ALL_TENSOR_NOT_EMPTY] = std::to_string(all_tensor_not_empty); init_options_["profiling_mode"] = std::to_string(static_cast(profiling_mode)); init_options_[ge::OPTION_EXEC_PROFILING_MODE] = std::to_string(static_cast(profiling_mode)); diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp index 14536b87f..553569f0d 100644 --- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp @@ -150,7 +150,8 @@ const std::map kSessionConfigOptions = { {"graph_compiler_cache_dir", "ge.graph_compiler_cache_dir"}, {"graph_slice", "ge.graphSliceMode"}, {"input_fusion_size", "ge.exec.input_fusion_size"}, - {"compile_dynamic_mode", "ge.compile_dynamic_mode"} + {"compile_dynamic_mode", "ge.compile_dynamic_mode"}, + {"all_tensor_not_empty", ge::OPTION_ALL_TENSOR_NOT_EMPTY} }; } // namespace diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py index 72a6c9f0e..65a1ccc5a 100644 --- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py +++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py @@ -84,5 +84,6 @@ class NpuConfig(NpuBaseConfig): self.oo_constant_folding = OptionValue(True, [True, False]) self.input_batch_cpy = OptionValue(False, [True, False]) self.shape_generalization_mode = OptionValue("STRICT", ["STRICT", "FULL", "ADAPTIVE"]) + self.all_tensor_not_empty = OptionValue(None, ['0', '1']) super(NpuConfig, self).__init__() diff --git a/tf_adapter_2.x/tests/stub/include/stub/defines.h b/tf_adapter_2.x/tests/stub/include/stub/defines.h index 51b8cfd7b..45dc1da2a 100644 --- a/tf_adapter_2.x/tests/stub/include/stub/defines.h +++ b/tf_adapter_2.x/tests/stub/include/stub/defines.h @@ -65,6 +65,7 @@ const char *const OPTION_EXEC_LOGICAL_DEVICE_CLUSTER_DEPLOY_MODE = "ge.exec.logi const char *const OPTION_EXEC_LOGICAL_DEVICE_ID = "ge.exec.logicalDeviceId"; const char *const OPTION_EXEC_MODEL_DEPLOY_MODE = "ge.exec.modelDeployMode"; const char *const OPTION_EXEC_MODEL_DEPLOY_DEVICELIST = "ge.exec.modelDeployDevicelist"; +const char *const OPTION_ALL_TENSOR_NOT_EMPTY = "ge.exec.allTensorNotEmpty"; // Option key: memory init const char *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize"; -- Gitee