diff --git a/setup.cfg b/setup.cfg index a83fd20df37f539fd20f5df4f18a6a4e4b9dc623..1bb39cd35f5807ea5d25162ad612dbee2104fa36 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,3 +1,6 @@ -[metadata] +[ + + +4114445metadata] url = https://gitee.com/ascend/pytorch download_url = https://gitee.com/ascend/pytorch/tags diff --git a/torch_npu/profiler/analysis/npu_profiler.py b/torch_npu/profiler/analysis/npu_profiler.py index fa1d220b7861f9c3202d802dffc4b1755fdfe4b1..b5006ddebc0a2f61e8d6ffc3482cd1f85f4b8d39 100644 --- a/torch_npu/profiler/analysis/npu_profiler.py +++ b/torch_npu/profiler/analysis/npu_profiler.py @@ -2,7 +2,7 @@ import multiprocessing import os from multiprocessing.pool import Pool -from .prof_common_func.constant import Constant +from .prof_common_func.constant import Constant, print_warn_msg from .prof_common_func.path_manager import ProfilerPathManager from .prof_common_func.prof_process import ProfProcess from .profiling_parser import ProfilingParser @@ -10,10 +10,29 @@ from ...utils.path_manager import PathManager class NpuProfiler: - @classmethod def analyse(cls, input_path: str, analysis_type: str = Constant.TENSORBOARD_TRACE_HANDLER, output_path: str = None, **kwargs): + """ Muti-process in parsing use fork to generate child processes for better performance, while forking from a + muti-threaded process may cause deadlock. So spawn a pure process to be public parent process for parsing. + """ + print_warn_msg("analyse start") + os.environ["ASCEND_LAUNCH_BLOCKING"] = "1" + try: + mp = multiprocessing.get_context("spawn") + p = mp.Process(target=NpuProfiler._analyse, args=(input_path, analysis_type, output_path), + kwargs=kwargs) + p.start() + p.join() + except Exception as e: + print_warn_msg(f"analyse error {e}") + + @classmethod + def _analyse(cls, input_path: str, analysis_type: str = Constant.TENSORBOARD_TRACE_HANDLER, output_path: str = None, + **kwargs): + """ Muti-process in parsing use fork to generate child processes for better performance, while forking from a + muti-threaded process may cause deadlock. So spawn a pure process to be public parent process for parsing. + """ input_path = ProfilerPathManager.get_realpath(input_path) cls._check_input_path(input_path) profiler_path_list = ProfilerPathManager.get_profiler_path_list(input_path)