From a745ddd3546417f89421cdf0736eed84d3cb0e58 Mon Sep 17 00:00:00 2001 From: yangyongqiang5033 Date: Wed, 12 Mar 2025 15:51:48 +0800 Subject: [PATCH] support oo_level --- tf_adapter/interface_spec/api_npu_config.pyh | 2 +- tf_adapter/interface_spec/api_npu_plugin.pyh | 3 ++- .../npu_bridge/estimator/npu/npu_config.py | 5 +++- .../npu_bridge/estimator/npu/npu_estimator.py | 11 ++++++++ .../npu_bridge/estimator/npu/npu_plugin.py | 5 +++- tf_adapter/util/ge_plugin.cc | 4 +++ tf_adapter/util/npu_attrs.cc | 25 +++++++++++++++++++ .../npu_device/core/npu_wrapper.cpp | 1 + .../python/npu_device/configs/npu_config.py | 1 + .../tests/stub/include/stub/defines.h | 2 ++ 10 files changed, 55 insertions(+), 4 deletions(-) diff --git a/tf_adapter/interface_spec/api_npu_config.pyh b/tf_adapter/interface_spec/api_npu_config.pyh index 21be81cae..c5c0e8ffd 100644 --- a/tf_adapter/interface_spec/api_npu_config.pyh +++ b/tf_adapter/interface_spec/api_npu_config.pyh @@ -23,7 +23,7 @@ class NPURunConfig(run_config_lib.RunConfig): frozen_variable=False, variable_placement="Device", jit_compile="auto", precision_mode_v2=None, ac_parallel_enable=None, quant_dumpable=None, input_fusion_size=131072, compile_dynamic_mode=None, execute_times=-1, graph_max_parallel_model_num=1, export_compile_stat=1, aicore_num=None, - oo_constant_folding=True): + oo_constant_folding=True, oo_level=None): class ProfilingConfig(): def __init__(self, enable_profiling=False, profiling_options=None): diff --git a/tf_adapter/interface_spec/api_npu_plugin.pyh b/tf_adapter/interface_spec/api_npu_plugin.pyh index a65a3272e..c6c7764b0 100644 --- a/tf_adapter/interface_spec/api_npu_plugin.pyh +++ b/tf_adapter/interface_spec/api_npu_plugin.pyh @@ -6,6 +6,7 @@ def npu_resource_init(graph_run_mode=1, op_debug_level=0, enable_profiling=False enable_exception_dump=2, aoe_mode=None, work_path=None, op_compiler_cache_mode=None, op_compiler_cache_dir=None, debug_dir=None, hcom_multi_mode=False, distribute_config=None, aoe_config_file=None, - precision_mode_v2=None, export_compile_stat=1, aicore_num=None, oo_constant_folding=True): + precision_mode_v2=None, export_compile_stat=1, aicore_num=None, oo_constant_folding=True, + oo_level=None): def npu_resource_shutdown(): diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py index 04b63e1ab..784425501 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py @@ -119,7 +119,8 @@ class NPURunConfig(run_config_lib.RunConfig): graph_max_parallel_model_num=1, export_compile_stat=1, aicore_num=None, - oo_constant_folding=True + oo_constant_folding=True, + oo_level=None ): """ Constructs a NPUConfig. @@ -191,6 +192,7 @@ class NPURunConfig(run_config_lib.RunConfig): aicore_num: default is: ''. exits (default); 2: Generated when graph compilation complete. oo_constant_folding: The switch of constant folding, False: disable; True(default): enable. + oo_level:The optimization level of the graph optimizer. """ # Check iterations_per_loop. @@ -292,6 +294,7 @@ class NPURunConfig(run_config_lib.RunConfig): self._export_compile_stat = export_compile_stat self._aicore_num = aicore_num self._oo_constant_folding = oo_constant_folding + self._oo_level = oo_level super(NPURunConfig, self).__init__( model_dir=model_dir, tf_random_seed=tf_random_seed, diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py index 737f97c03..de56f4c19 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py @@ -741,6 +741,15 @@ class NPUEstimator(estimator_lib.Estimator): if config._oo_constant_folding is not None: custom_op.parameter_map["oo_constant_folding"].b = config._oo_constant_folding + def __load_oo_level(self, config, custom_op): + """Load oo_level config, and add to custom_optimizers + Args: + config: NPURunConfig. + custom_op: Customer optimizers. + """ + if config._oo_level is not None: + custom_op.parameter_map["oo_level"].s = tf.compat.as_bytes(config._oo_level) + def __load_graph_optimizers(self, config): """ Change the session config and load the graph optimizers: @@ -885,6 +894,8 @@ class NPUEstimator(estimator_lib.Estimator): self.__oo_constant_folding(config, custom_op) + self.__load_oo_level(config, custom_op) + return config def __load_job_info(self, job_start_file): diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py index bba68219b..5e325d438 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py @@ -75,7 +75,8 @@ def npu_resource_init(graph_run_mode=1, precision_mode_v2=None, export_compile_stat=1, aicore_num=None, - oo_constant_folding=True): + oo_constant_folding=True, + oo_level=None): """Initialize NPU resource""" util.check_nonnegative_integer(graph_run_mode, "graph_run_mode") check_graph_run_mode(graph_run_mode) @@ -128,6 +129,8 @@ def npu_resource_init(graph_run_mode=1, if oo_constant_folding is not None: util.check_bool_type(oo_constant_folding, "oo_constant_folding") init["ge.oo.constantFolding"] = "true" if oo_constant_folding is True else "false" + if oo_level is not None: + init["ge.oo.level"] = str(oo_level) init_options = tf_adapter.map_string_string(init) tf_adapter.PluginInit(init_options) diff --git a/tf_adapter/util/ge_plugin.cc b/tf_adapter/util/ge_plugin.cc index 41c8ad26e..2d4b35862 100644 --- a/tf_adapter/util/ge_plugin.cc +++ b/tf_adapter/util/ge_plugin.cc @@ -304,6 +304,10 @@ void GePlugin::Init(std::map &init_options, const bool ADP_LOG(INFO) << "[GePlugin] oo_constant_folding : " << init_options["ge.oo.constantFolding"]; } + if (init_options.find("ge.oo.level") != init_options.end()) { + ADP_LOG(INFO) << "[GePlugin] oo_level : " << init_options["ge.oo.level"]; + } + bool tdt_uninit_env = false; (void) ReadBoolFromEnvVar("ASCEND_TDT_UNINIT", false, &tdt_uninit_env); if (!kIsHeterogeneous && !tdt_uninit_env) { diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index 8e580ab6e..e5954d2de 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -696,6 +696,7 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr std::string export_compile_stat; std::string aicore_num; std::string oo_constant_folding; + std::string oo_level; if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { (void) ctx->GetAttr("_precision_mode", &precision_mode); @@ -741,6 +742,7 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr (void) ctx->GetAttr("_export_compile_stat", &export_compile_stat); (void) ctx->GetAttr("_aicore_num", &aicore_num); (void) ctx->GetAttr("_oo_constant_folding", &oo_constant_folding); + (void) ctx->GetAttr("_oo_level", &oo_level); } std::lock_guard lock(mutex_); @@ -809,6 +811,10 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr init_options_["oo_constant_folding"] = oo_constant_folding; init_options_["ge.oo.constantFolding"] = oo_constant_folding; } + if (!oo_level.empty()) { + init_options_["oo_level"] = oo_level; + init_options_["ge.oo.level"] = oo_level; + } return init_options_; } @@ -1236,6 +1242,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & std::string export_compile_stat; std::string aicore_num; std::string oo_constant_folding; + std::string oo_level; auto NpuOptimizer_value = attrs.Find("_NpuOptimizer"); auto enable_data_pre_proc_value = attrs.Find("_enable_data_pre_proc"); @@ -1336,6 +1343,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & auto export_compile_stat_value = attrs.Find("_export_compile_stat"); auto aicore_num_value = attrs.Find("_aicore_num"); auto oo_constant_folding_value = attrs.Find("_oo_constant_folding"); + auto oo_level_value = attrs.Find("_oo_level"); if (NpuOptimizer_value != nullptr) { do_npu_optimizer = "1"; if (enable_data_pre_proc_value != nullptr) { @@ -1657,6 +1665,9 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & if (oo_constant_folding_value != nullptr) { oo_constant_folding = oo_constant_folding_value->s(); } + if (oo_level_value != nullptr) { + oo_level = oo_level_value->s(); + } } all_options["variable_format_optimize"] = variable_format_optimize; @@ -1780,6 +1791,10 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & all_options["oo_constant_folding"] = oo_constant_folding; all_options["ge.oo.constantFolding"] = oo_constant_folding; } + if (!oo_level.empty()) { + all_options["oo_level"] = oo_level; + all_options["ge.oo.level"] = oo_level; + } return all_options; } @@ -1907,6 +1922,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options int32_t export_compile_stat = 1; std::string aicore_num; bool oo_constant_folding = true; + std::string oo_level; const RewriterConfig &rewrite_options = options.session_options->config.graph_options().rewrite_options(); for (const auto &custom_optimizer : rewrite_options.custom_optimizers()) { @@ -2478,6 +2494,15 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options init_options_["aicore_num"] = aicore_num; init_options_["ge.aicoreNum"] = aicore_num; } + + if ((params.count("oo_level") > 0)) { + oo_level = params.at("oo_level").s(); + const static std::vector kOoLevelList = {"O1", "O3"}; + NPU_REQUIRES_OK(CheckValueAllowed(oo_level, kOoLevelList)); + init_options_["oo_level"] = oo_level; + init_options_["ge.oo.level"] = oo_level; + } + if (params.count("jit_compile") > 0) { const static std::vector kJitCompileList = {"true", "false", diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp index c652f2751..96e54a962 100644 --- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp @@ -96,6 +96,7 @@ const std::map kGlobalConfigOptions = { {"export_compile_stat", "ge.exportCompileStat"}, {"aicore_num", "ge.aicoreNum"}, {"oo_constant_folding", "ge.oo.constantFolding"}, + {"oo_level", "ge.oo.level"}, // private options {"_distribute.rank_id", ge::OPTION_EXEC_RANK_ID}, {"_distribute.rank_table", ge::OPTION_EXEC_RANK_TABLE_FILE}, diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py index 7e3b7fa7b..1bee09ced 100644 --- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py +++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py @@ -82,5 +82,6 @@ class NpuConfig(NpuBaseConfig): self.export_compile_stat = OptionValue(1, [0, 1, 2]) self.aicore_num = OptionValue(None, None) self.oo_constant_folding = OptionValue(True, [True, False]) + self.oo_level = OptionValue(None, None) super(NpuConfig, self).__init__() diff --git a/tf_adapter_2.x/tests/stub/include/stub/defines.h b/tf_adapter_2.x/tests/stub/include/stub/defines.h index 51b8cfd7b..cc5044d5b 100644 --- a/tf_adapter_2.x/tests/stub/include/stub/defines.h +++ b/tf_adapter_2.x/tests/stub/include/stub/defines.h @@ -314,6 +314,8 @@ const char *const OPTION_AICORE_NUM = "ge.aicoreNum"; const char *const OO_CONSTANT_FOLDING = "ge.oo.constantFolding"; +const char *const OO_LEVEL = "ge.oo.level"; + // Graph run mode enum GraphRunMode { PREDICTION = 0, TRAIN }; // Topo sorting mode -- Gitee