diff --git a/tf_adapter/interface_spec/api_npu_config.pyh b/tf_adapter/interface_spec/api_npu_config.pyh index cafdb3f24888a259d21b318300a9f51e266024f1..89223050491c18770dfc1b78e6857d928ec95714 100644 --- a/tf_adapter/interface_spec/api_npu_config.pyh +++ b/tf_adapter/interface_spec/api_npu_config.pyh @@ -22,7 +22,7 @@ class NPURunConfig(run_config_lib.RunConfig): event_sync_timeout=-1, external_weight=False, es_cluster_config=None, deterministic=0, frozen_variable=False, variable_placement="Device", jit_compile="auto", precision_mode_v2=None, ac_parallel_enable=None, quant_dumpable=None, input_fusion_size=131072, compile_dynamic_mode=None, - execute_times=-1, graph_max_parallel_model_num=1, export_compile_stat=1): + execute_times=-1, graph_max_parallel_model_num=1, export_compile_stat=1, aicore_num=None): class ProfilingConfig(): def __init__(self, enable_profiling=False, profiling_options=None): diff --git a/tf_adapter/interface_spec/api_npu_plugin.pyh b/tf_adapter/interface_spec/api_npu_plugin.pyh index 03d2987b2bb4f487cc75db056e7a39d1b1958770..59d0ea0f8df8bdffb0cbb0117f6f1de7ea0386c2 100644 --- a/tf_adapter/interface_spec/api_npu_plugin.pyh +++ b/tf_adapter/interface_spec/api_npu_plugin.pyh @@ -6,6 +6,6 @@ def npu_resource_init(graph_run_mode=1, op_debug_level=0, enable_profiling=False enable_exception_dump=2, aoe_mode=None, work_path=None, op_compiler_cache_mode=None, op_compiler_cache_dir=None, debug_dir=None, hcom_multi_mode=False, distribute_config=None, aoe_config_file=None, - precision_mode_v2=None, export_compile_stat=1): + precision_mode_v2=None, export_compile_stat=1, aicore_num=None): def npu_resource_shutdown(): diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py index a1789fe258a09b2bace88e994be2beda530b52af..aa678d02a735b70db5462b35f591ef55bad1bcb6 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py @@ -117,7 +117,8 @@ class NPURunConfig(run_config_lib.RunConfig): compile_dynamic_mode=None, execute_times=-1, graph_max_parallel_model_num=1, - export_compile_stat=1 + export_compile_stat=1, + aicore_num=None ): """ Constructs a NPUConfig. @@ -186,6 +187,7 @@ class NPURunConfig(run_config_lib.RunConfig): input_fusion_size: Merge input memory less than input_fusion_size, defualt 25600B, max size: 32M, min size: 0M precision_mode_v2: default is: ''. export_compile_stat: configure statistics of the graph compiler, 0: Not Generate; 1: Generated when the program + aicore_num: default is: ''. exits (default); 2: Generated when graph compilation complete. """ @@ -286,6 +288,7 @@ class NPURunConfig(run_config_lib.RunConfig): self._graph_max_parallel_model_num = graph_max_parallel_model_num self.execute_times = execute_times self._export_compile_stat = export_compile_stat + self._aicore_num = aicore_num super(NPURunConfig, self).__init__( model_dir=model_dir, tf_random_seed=tf_random_seed, diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py index 7b372f621a8294ee36c0cd86838da8f664ddf573..738eba1ea23b75d03382eb8f93cd57da1f2e0eb6 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py @@ -720,6 +720,15 @@ class NPUEstimator(estimator_lib.Estimator): if config._export_compile_stat is not None: custom_op.parameter_map["export_compile_stat"].i = config._export_compile_stat + def __load_aicore_num(self, config, custom_op): + """Load aicore_num config, and add to custom_optimizers + Args: + config: NPURunConfig. + custom_op: Customer optimizers. + """ + if config._aicore_num is not None: + custom_op.parameter_map["aicore_num"].s = tf.compat.as_bytes(config._aicore_num) + def __load_graph_optimizers(self, config): """ Change the session config and load the graph optimizers: @@ -860,6 +869,8 @@ class NPUEstimator(estimator_lib.Estimator): self.__load_export_compile_stat(config, custom_op) + self.__load_aicore_num(config, custom_op) + return config def __load_job_info(self, job_start_file): diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py index ffbfd7df0bc21ee84bf579c80a463c1cdb43d52d..fa4e69f0d6243eef30af0b80c2326b4b256457d4 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py @@ -73,7 +73,8 @@ def npu_resource_init(graph_run_mode=1, distribute_config=None, aoe_config_file=None, precision_mode_v2=None, - export_compile_stat=1): + export_compile_stat=1, + aicore_num=None): """Initialize NPU resource""" util.check_nonnegative_integer(graph_run_mode, "graph_run_mode") check_graph_run_mode(graph_run_mode) @@ -121,6 +122,8 @@ def npu_resource_init(graph_run_mode=1, init["ge.aoe_config_file"] = str(aoe_config_file) if export_compile_stat is not None: init["ge.exportCompileStat"] = str(export_compile_stat) + if aicore_num is not None: + init["ge.aicoreNum"] = str(aicore_num) init_options = tf_adapter.map_string_string(init) tf_adapter.PluginInit(init_options) diff --git a/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc b/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc index 3e2a4594bc20aeec145542d39967344ae380b878..10f0f4085b2e86f39a326fae1e139a77b66208f7 100644 --- a/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc +++ b/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc @@ -205,5 +205,13 @@ TEST_F(GePluginTest, PluginInitTest_export_compile_stat) { ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty()); NpuClose(); } + +TEST_F(GePluginTest, PluginInitTest_aicore_num) { + std::map init_options; + init_options["ge.aicoreNum"] = "2|2"; + PluginInit(init_options); + ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty()); + NpuClose(); +} } } // end tensorflow diff --git a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc index 4882fb30048ea52b56f5fc8b56101cf678847a39..dea78f7eb58b37f12ad3c24d977566b55b21a70f 100644 --- a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc @@ -371,5 +371,43 @@ TEST_F(NpuAttrTest, GetAllAttrOptions_export_compile_stat) { const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs); EXPECT_NE(all_options.find("export_compile_stat"), all_options.cend()); } + +TEST_F(NpuAttrTest, SetNpuOptimizerAttr_aicore_num) { + GraphOptimizationPassOptions options; + SessionOptions session_options; + session_options.config.mutable_graph_options()->mutable_optimizer_options()->set_do_function_inlining(true); + auto *custom_config = + session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers(); + custom_config->set_name("NpuOptimizer"); + options.session_options = &session_options; + + AttrValue aicore_num = AttrValue(); + aicore_num.set_s("2|2"); + (*custom_config->mutable_parameter_map())["aicore_num"] = aicore_num; + + AttrValue jit_compile = AttrValue(); + jit_compile.set_s("2"); + (*custom_config->mutable_parameter_map())["jit_compile"] = jit_compile; + + Status s = NpuAttrs::SetNpuOptimizerAttr(options, reinterpret_cast(1)); + EXPECT_EQ(s.ok(), false); +} + +TEST_F(NpuAttrTest, GetAllAttrOptions_aicore_num) { + AttrValueMap attr_map; + + AttrValue npu_optimizer = AttrValue(); + npu_optimizer.set_s("NpuOptimizer"); + attr_map["_NpuOptimizer"] = npu_optimizer; + + AttrValue aicore_num = AttrValue(); + aicore_num.set_s("2|2"); + attr_map["_aicore_num"] = aicore_num; + + AttrSlice attrs(&attr_map); + const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs); + EXPECT_NE(all_options.find("aicore_num"), all_options.cend()); +} + } } // end tensorflow diff --git a/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc b/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc index 3a130f1d38ba0e422fbd97d68da7148b0fecef92..f39fe3afc4c245d870a8affebcad7e63ca6d485c 100644 --- a/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc +++ b/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc @@ -195,5 +195,13 @@ TEST_F(GePluginTest, PluginInitTest_export_compile_stat) { ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty()); NpuClose(); } + +TEST_F(GePluginTest, PluginInitTest_aicore_num) { + std::map init_options; + init_options["ge.aicoreNum"] = "2|2"; + PluginInit(init_options); + ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty()); + NpuClose(); +} } } // end tensorflow diff --git a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc index 0a132fd17473f0cfe42a5ab1a93e78409704a825..ede01ab83b78c02de0be4cc11b309373e8df95e0 100644 --- a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc @@ -511,5 +511,42 @@ TEST_F(NpuAttrTest, GetAllAttrOptions_export_compile_stat) { const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs); EXPECT_NE(all_options.find("export_compile_stat"), all_options.cend()); } + +TEST_F(NpuAttrTest, SetNpuOptimizerAttr_aicore_num) { + GraphOptimizationPassOptions options; + SessionOptions session_options; + session_options.config.mutable_graph_options()->mutable_optimizer_options()->set_do_function_inlining(true); + auto *custom_config = + session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers(); + custom_config->set_name("NpuOptimizer"); + options.session_options = &session_options; + + AttrValue aicore_num = AttrValue(); + aicore_num.set_s("2|2"); + (*custom_config->mutable_parameter_map())["aicore_num"] = aicore_num; + + AttrValue jit_compile = AttrValue(); + jit_compile.set_s("2"); + (*custom_config->mutable_parameter_map())["jit_compile"] = jit_compile; + Status s = NpuAttrs::SetNpuOptimizerAttr(options, reinterpret_cast(1)); + EXPECT_EQ(s.ok(), false); +} + +TEST_F(NpuAttrTest, GetAllAttrOptions_aicore_num) { + AttrValueMap attr_map; + + AttrValue npu_optimizer = AttrValue(); + npu_optimizer.set_s("NpuOptimizer"); + attr_map["_NpuOptimizer"] = npu_optimizer; + + AttrValue aicore_num = AttrValue(); + aicore_num.set_s("2|2"); + attr_map["_aicore_num"] = aicore_num; + + AttrSlice attrs(&attr_map); + const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs); + EXPECT_NE(all_options.find("aicore_num"), all_options.cend()); +} + } } // end tensorflow diff --git a/tf_adapter/util/ge_plugin.cc b/tf_adapter/util/ge_plugin.cc index 443743f51f8f4d9b17c6fc9b58e7978e8f9a8385..29dca44671aa2fc3de765dbbf5541c2d05660b2a 100644 --- a/tf_adapter/util/ge_plugin.cc +++ b/tf_adapter/util/ge_plugin.cc @@ -294,6 +294,11 @@ void GePlugin::Init(std::map &init_options, const bool if (init_options.find("ge.exportCompileStat") != init_options.end()) { ADP_LOG(INFO) << "[GePlugin] export_compile_stat : " << init_options["ge.exportCompileStat"]; } + + if (init_options.find("ge.aicoreNum") != init_options.end()) { + ADP_LOG(INFO) << "[GePlugin] aicoreNum : " << init_options["ge.aicoreNum"]; + } + bool tdt_uninit_env = false; (void) ReadBoolFromEnvVar("ASCEND_TDT_UNINIT", false, &tdt_uninit_env); if (!kIsHeterogeneous && !tdt_uninit_env) { diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index bdffb42a27fe1e5355d58a25eb986c165626250c..b3ad2442cfa5472ad0b86a97c1e4a8b454be2340 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -689,6 +689,7 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr std::string es_cluster_config; std::string execute_times = "-1"; std::string export_compile_stat; + std::string aicore_num; if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { (void) ctx->GetAttr("_precision_mode", &precision_mode); @@ -731,6 +732,7 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr (void) ctx->GetAttr("_es_cluster_config", &es_cluster_config); (void) ctx->GetAttr("_execute_times", &execute_times); (void) ctx->GetAttr("_export_compile_stat", &export_compile_stat); + (void) ctx->GetAttr("_aicore_num", &aicore_num); } std::lock_guard lock(mutex_); @@ -789,6 +791,8 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr init_options_["export_compile_stat"] = export_compile_stat; init_options_["ge.exportCompileStat"] = export_compile_stat; } + init_options_["aicore_num"] = aicore_num; + init_options_["ge.aicoreNum"] = aicore_num; return init_options_; } @@ -1213,6 +1217,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & std::string compile_dynamic_mode; std::string execute_times = "-1"; std::string export_compile_stat; + std::string aicore_num; auto NpuOptimizer_value = attrs.Find("_NpuOptimizer"); auto enable_data_pre_proc_value = attrs.Find("_enable_data_pre_proc"); @@ -1310,6 +1315,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & auto compile_dynamic_mode_value = attrs.Find("_compile_dynamic_mode"); auto execute_times_value = attrs.Find("_execute_times"); auto export_compile_stat_value = attrs.Find("_export_compile_stat"); + auto aicore_num_value = attrs.Find("_aicore_num"); if (NpuOptimizer_value != nullptr) { do_npu_optimizer = "1"; if (enable_data_pre_proc_value != nullptr) { @@ -1622,6 +1628,9 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & if (export_compile_stat_value != nullptr) { export_compile_stat = export_compile_stat_value->s(); } + if (aicore_num_value != nullptr) { + aicore_num = aicore_num_value->s(); + } } all_options["variable_format_optimize"] = variable_format_optimize; @@ -1738,6 +1747,8 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & all_options["export_compile_stat"] = export_compile_stat; all_options["ge.exportCompileStat"] = export_compile_stat; } + all_options["aicore_num"] = aicore_num; + all_options["ge.aicoreNum"] = aicore_num; return all_options; } @@ -1862,6 +1873,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options std::string accelerate_train_mode; int32_t execute_times = -1; int32_t export_compile_stat = 1; + std::string aicore_num; const RewriterConfig &rewrite_options = options.session_options->config.graph_options().rewrite_options(); for (const auto &custom_optimizer : rewrite_options.custom_optimizers()) { @@ -2418,6 +2430,11 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options init_options_["export_compile_stat"] = std::to_string(export_compile_stat); init_options_["ge.exportCompileStat"] = std::to_string(export_compile_stat); } + if ((params.count("aicore_num") > 0)) { + aicore_num = params.at("aicore_num").s(); + init_options_["aicore_num"] = aicore_num; + init_options_["ge.aicoreNum"] = aicore_num; + } if (params.count("jit_compile") > 0) { const static std::vector kJitCompileList = {"true", "false", diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp index 73dec553550c4d8648a4ffb4f6977c2c573b457d..c6dd9bb34133933502bc494632b8b8f2d3a208f4 100644 --- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp @@ -94,6 +94,7 @@ const std::map kGlobalConfigOptions = { {"event_sync_timeout", "event_sync_timeout"}, {"execute_times", "execute_times"}, {"export_compile_stat", "ge.exportCompileStat"}, + {"aicore_num", "ge.aicoreNum"}, // private options {"_distribute.rank_id", ge::OPTION_EXEC_RANK_ID}, {"_distribute.rank_table", ge::OPTION_EXEC_RANK_TABLE_FILE}, diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py index 21abadeca145bcc1c416cfeb42a4c9d05031b1fd..cab34f04ac59dbe10674be0659ad46cb905d272c 100644 --- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py +++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py @@ -80,5 +80,6 @@ class NpuConfig(NpuBaseConfig): ['fp16', 'origin', 'cube_fp16in_fp32out', 'mixed_float16', 'mixed_bfloat16', 'cube_hif8', 'mixed_hif8']) self.export_compile_stat = OptionValue(1, [0, 1, 2]) + self.aicore_num = OptionValue(None, None) super(NpuConfig, self).__init__() diff --git a/tf_adapter_2.x/tests/stub/include/stub/defines.h b/tf_adapter_2.x/tests/stub/include/stub/defines.h index 2cbb5507fd1e6ea47d1ee05ed890b5ac13fffb41..fe6676d70a0c11d7812bfdd3d81016ae79333b6b 100644 --- a/tf_adapter_2.x/tests/stub/include/stub/defines.h +++ b/tf_adapter_2.x/tests/stub/include/stub/defines.h @@ -310,6 +310,8 @@ const std::string OP_PRECISION_MODE = "ge.exec.op_precision_mode"; const char *const OPTION_EXPORT_COMPILE_STAT = "ge.exportCompileStat"; +const char *const OPTION_AICORE_NUM = "ge.aicoreNum"; + // Graph run mode enum GraphRunMode { PREDICTION = 0, TRAIN }; // Topo sorting mode