diff --git a/SECURITY_README.md b/SECURITY_README.md index dcd094e23a1f06887c289e336f35a75e63c081f2..3218b6f81f239940aa790beaeb226e139dfc97a1 100644 --- a/SECURITY_README.md +++ b/SECURITY_README.md @@ -163,6 +163,7 @@ TorchAir项目采用C++和Python联合开发,当前正式接口只提供Python - *dump_path* 数据dump路径。默认为当前路径。 - *dump_mode* 数据dump类型。可选参数为["input", "output", "all"],分别指dump输入、输出、所有数据,默认为"all",dump所有数据。 - *quant_dumpable* 是否开启dump量化前的输出。默认为False,不开启。可选参数为[False, True]。 + - *dump_data* 数据dump内容类型。默认为"tensor",可选参数为["tensor", "stats"]。 - **export** 用于配置导出air格式离线图时的选项。建议通过dynamo_export接口来进行配置,参考[export功能](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC1alpha002/devguide/moddevg/torchair/torchair_01_0015.html)。 - *export_mode* -是否开启导出离线图。默认为False,不开启。 diff --git a/cmake/acl/acl_stub.cpp b/cmake/acl/acl_stub.cpp index 8d4a7cec11604338edbfa79c8168a7bdf021d1cc..f507d30c70160d690c546b2fc39c6bcdd8c464df 100644 --- a/cmake/acl/acl_stub.cpp +++ b/cmake/acl/acl_stub.cpp @@ -4,6 +4,7 @@ #include "acl/acl_rt.h" #include "acl/acl_tdt.h" #include "acl/acl_op_compiler.h" +#include "acl/acl_mdl.h" #ifdef __cplusplus extern "C" { @@ -159,6 +160,18 @@ aclError acltdtQueryChannelSize(const acltdtChannelHandle *handle, size_t *size) return ACL_ERROR_NONE; } +aclError aclmdlInitDump() { + return ACL_SUCCESS; +} + +aclError aclmdlSetDump(const char *dumpCfgPath) { + return ACL_SUCCESS; +} + +aclError aclmdlFinalizeDump() { + return ACL_SUCCESS; +} + #ifdef __cplusplus } #endif \ No newline at end of file diff --git a/python/torchair/configs/_option_base.py b/python/torchair/configs/_option_base.py index 37ecba564c957a053f3f6367ad5faee6a58dd176..97fc5b60ce96f7ad082a94b22eb9de60769d8d50 100644 --- a/python/torchair/configs/_option_base.py +++ b/python/torchair/configs/_option_base.py @@ -91,6 +91,19 @@ class MustExistedPathValue(OptionValue): raise FileNotFoundError('Please set legal dir path, ' + f'{str(v)} is not found or is not a file directory!') self._value = v + + +class MustExistedFileAddr(OptionValue): + @property + def value(self): + return super().value + + @value.setter + def value(self, v, suffix=".json"): + if v is None or not (os.path.exists(v) and v.endswith(suffix)): + raise FileNotFoundError('Please set legal file path, ' + + f'{str(v)} is not found or is not in {suffix} format!') + self._value = v class RegexValue(OptionValue): diff --git a/python/torchair/configs/dump_config.py b/python/torchair/configs/dump_config.py index 151fec161f348fbc6d910284630acada47ae42cc..05b6ae38258c554c659fb9a2a81a379dff14302d 100644 --- a/python/torchair/configs/dump_config.py +++ b/python/torchair/configs/dump_config.py @@ -1,7 +1,7 @@ __all__ = [] from torchair.configs._option_base import OptionValue, MustExistedPathValue, RegexValue -from torchair.configs._option_base import NpuBaseConfig +from torchair.configs._option_base import NpuBaseConfig, MustExistedFileAddr class _DataDumpConfig(NpuBaseConfig): @@ -14,11 +14,15 @@ class _DataDumpConfig(NpuBaseConfig): self.quant_dumpable = OptionValue(False, [False, True]) self.dump_step = RegexValue("", r'^(((\d+)|(\d+-{0,1}\d+))\|{0,1})*$', "0|1|2-5|6") self.dump_layer = RegexValue("", r'^[0-9a-zA-Z_" "/\\.]*$', "Mul_1 Add1 Conv2D_1") + self.dump_data = OptionValue('tensor', ['tensor', 'stats']) + self.dump_config_path = MustExistedFileAddr("") super(_DataDumpConfig, self).__init__() def as_dict(self): dump_option = {} + if self.dump_config_path.value != "": + dump_option['ge_dump_with_acl_config'] = self.dump_config_path.value if self.enable_dump: dump_option['ge.exec.enableDump'] = '1' dump_option['ge.exec.dumpPath'] = self.dump_path.value @@ -28,4 +32,5 @@ class _DataDumpConfig(NpuBaseConfig): dump_option['ge.exec.dumpStep'] = self.dump_step.value if self.dump_layer.value != "": dump_option['ge.exec.dumpLayer'] = self.dump_layer.value + dump_option['ge.exec.dumpData'] = self.dump_data.value return {}, dump_option diff --git a/tests/st/torchair_st.py b/tests/st/torchair_st.py index 6fc3bfe54c4c77717397f6688c40268dc463b7d9..b4a7825c80660ea61c43cfeb8964b6d9c26d6f2a 100644 --- a/tests/st/torchair_st.py +++ b/tests/st/torchair_st.py @@ -18,7 +18,7 @@ from torchair._ge_concrete_graph.fx2ge_converter import ExecutorType, Placement, from torchair._ge_concrete_graph import ge_apis as ge from torchair._ge_concrete_graph.graph_pass import optimize_reference_op_redundant_copy from torchair.configs.compiler_config import CompilerConfig -from torchair.core._backend import initialize_graph_engine +from torchair.core._backend import initialize_graph_engine, finalize_graph_engine from torchair._ge_concrete_graph.utils import _append_real_input_shape from torchair_st_utils import capture_stdout, generate_faked_module @@ -1014,6 +1014,10 @@ class TorchairSt(unittest.TestCase): config_error.dump_config.dump_path = "./*****" self.assertTrue('Please set legal dir path, ./***** is not found or is not a file directory!' in str(context.exception)) + with self.assertRaises(FileNotFoundError) as context: + config_error.dump_config.dump_config_path = "./*****" + self.assertTrue('Please set legal file path, ./***** is not found or is not in .json format!' + in str(context.exception)) with self.assertRaises(FileNotFoundError) as context: config_error.aoe_config.work_path = "./*****" self.assertTrue('Please set legal dir path, ./***** is not found or is not a file directory!' @@ -2112,6 +2116,31 @@ class TorchairSt(unittest.TestCase): self.assertEqual(y.dtype, x.dtype) self.assertEqual(y.shape, x.shape) + def test_fx_data_dump_data(self): + config.dump_config.enable_dump = True + config.dump_config.dump_data = "stats" + config.dump_config.dump_data = "tensor" + with self.assertRaises(ValueError): + config.dump_config.dump_data = "csv" + + def test_fx_dump_config_path(self): + finalize_graph_engine() + with open("./test_acl.json", "w") as file: + file.write(""" + { + "dump": { + "dump_list": [], + "dump_path": "dump_output" + } + } + """) + tmp_config = CompilerConfig() + tmp_config.dump_config.dump_config_path = "./test_acl.json" + _, global_compile_options = tmp_config.as_dict() + self.assertEqual(global_compile_options["ge_dump_with_acl_config"], + "./test_acl.json") + initialize_graph_engine(global_compile_options) + if __name__ == '__main__': unittest.main() diff --git a/torchair/concrete_graph/session.cpp b/torchair/concrete_graph/session.cpp index 268bae605772b11b1f264d029d3df94b5da4f62a..bdb93493f4415f6e3cacd88d5844a1f8d09be550 100644 --- a/torchair/concrete_graph/session.cpp +++ b/torchair/concrete_graph/session.cpp @@ -10,6 +10,7 @@ #include #include "acl/acl_rt.h" #include "acl/acl_tdt.h" +#include "acl/acl_mdl.h" #include "ge/ge_api_types.h" #include "ge/ge_api.h" #include "hdc_channel.h" @@ -55,6 +56,11 @@ Status Session::Initialize(const std::map &options) { run_with_torch_npu_ = option.second == "1"; continue; } + if (option.first == "ge_dump_with_acl_config") { + TNG_RETURN_IF_ERROR(AclDumpConfigInit(option.second)); + aclmd_initialzed_ = true; + continue; + } ge_options[option.first.c_str()] = option.second.c_str(); } @@ -110,6 +116,10 @@ Status Session::Finalize() { TNG_LOG(DEBUG) << "ACL synchronize device success in Finalize."; } + if (aclmd_initialzed_) { + (void)AclDumpConfigFinalize(); + } + global_ge_session.reset(nullptr); StopStdoutChannel(device_index_); // Stopped after all graph run finished @@ -274,4 +284,20 @@ Status Session::FastExecuteGraph(uint32_t graph_id, const std::vector(dump_path.c_str())); + TNG_ASSERT(dump_ret == ACL_ERROR_NONE, "Fail in acl set dump, return %d", dump_ret); + TNG_LOG(DEBUG) << "Success to config aclmd dump"; + return Status::Success(); +} + +Status Session::AclDumpConfigFinalize() { + auto dump_ret = aclmdlFinalizeDump(); + TNG_ASSERT(dump_ret == ACL_ERROR_NONE, "Fail in acl finalize dump, return %d", dump_ret); + TNG_LOG(DEBUG) << "Success to finalize aclmd dump"; + return Status::Success(); +} } // namespace tng diff --git a/torchair/include/session.h b/torchair/include/session.h index 1230f2550f0870c14aa23297810ced21ee3259f1..24eba6f107c50f8aecbf9712a71da80d14d6b41e 100644 --- a/torchair/include/session.h +++ b/torchair/include/session.h @@ -64,10 +64,15 @@ class Session { return initialized_; } + Status AclDumpConfigInit(const std::string &dump_path); + + Status AclDumpConfigFinalize(); + private: Session() : initialized_(false), status_(Status::Success()){}; std::mutex mu_; std::atomic_bool initialized_; + std::atomic_bool aclmd_initialzed_; std::atomic_bool run_with_torch_npu_ = false; Status status_; int32_t device_index_ = -1;