diff --git a/third_party/acl/inc/acl/acl_rt.h b/third_party/acl/inc/acl/acl_rt.h index dd4748a3561ab8b8fd4a5b3604de51154dea724b..93892ee8df7b4571d82ed2c1ae55f7b07f724fca 100644 --- a/third_party/acl/inc/acl/acl_rt.h +++ b/third_party/acl/inc/acl/acl_rt.h @@ -913,22 +913,6 @@ ACL_FUNC_VISIBILITY aclError aclrtCreateStreamWithConfig(aclrtStream *stream, ui */ ACL_FUNC_VISIBILITY aclError aclrtDestroyStream(aclrtStream stream); -/** - * @ingroup AscendCL - * @brief destroy stream instance by force - * - * @par Function - * Can only destroy streams created through the aclrtCreateStream interface - * - * @param stream [IN] the stream to destroy - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclrtCreateStream - */ -ACL_FUNC_VISIBILITY aclError aclrtDestroyStreamForce(aclrtStream stream); - /** * @ingroup AscendCL * @brief block the host until all tasks diff --git a/third_party/acl/libs/acl.cpp b/third_party/acl/libs/acl.cpp index 2c270d08576398b51a27383657f6f8e04ea8ef7d..625a9e92186dbf22a7d8300bfaf627a227fd8bc5 100644 --- a/third_party/acl/libs/acl.cpp +++ b/third_party/acl/libs/acl.cpp @@ -37,7 +37,6 @@ aclError aclrtSetStreamFailureMode(aclrtStream stream, uint64_t mode) { return 0 aclError aclrtSetOpWaitTimeout(uint32_t timeout) { return 0; } aclError aclrtCreateStreamWithConfig(aclrtStream *stream, uint32_t priority, uint32_t flag) { return 0; } aclError aclrtDestroyStream(aclrtStream stream){return 0;} -aclError aclrtDestroyStreamForce(aclrtStream stream){return 0;} aclError aclrtSynchronizeStream(aclrtStream stream){return 0;} // Event diff --git a/torch_npu/csrc/InitNpuBindings.cpp b/torch_npu/csrc/InitNpuBindings.cpp index b908b4ccdfeb4dce0e9b66bb41d33a3d3fcc2769..89e3b6afadbb8232d37d1457ff845fd0e7188c86 100644 --- a/torch_npu/csrc/InitNpuBindings.cpp +++ b/torch_npu/csrc/InitNpuBindings.cpp @@ -64,7 +64,14 @@ PyObject * THPModule_npu_shutdown(PyObject * /* unused */) } catch (std::exception& e) { NPU_LOGE("npuSynchronizeDevice failed err=:%s", e.what()); } - + at_npu::native::GraphExecutor::GetInstance().Finalize(); + at_npu::native::TdtChannelForPrint::GetInstance().Finalize(); + THNPUCachingHostAllocator_emptyCache(); + try { + c10_npu::NPUCachingAllocator::emptyCache(); + } catch (std::exception& e) { + NPU_LOGE("NPUCachingAllocator::emptyCache failed err=:%s", e.what()); + } c10_npu::NpuSysCtrl::SysStatus status = c10_npu::NpuSysCtrl::GetInstance().Finalize(); if (status != c10_npu::NpuSysCtrl::SysStatus::FINALIZE_SUCC) { fprintf(stdout, "THPModule_npu_shutdown failed.\n"); diff --git a/torch_npu/csrc/core/npu/sys_ctrl/npu_sys_ctrl.cpp b/torch_npu/csrc/core/npu/sys_ctrl/npu_sys_ctrl.cpp index 6fa3e5ec255a2524dd96359321c4759061da7772..ae2f26d60170e2a9f9a821f921c2d402364e07e7 100644 --- a/torch_npu/csrc/core/npu/sys_ctrl/npu_sys_ctrl.cpp +++ b/torch_npu/csrc/core/npu/sys_ctrl/npu_sys_ctrl.cpp @@ -216,11 +216,9 @@ NpuSysCtrl::NpuSysCtrl() : init_flag_(false), device_id_(0) {} } this->RegisterReleaseFn([=]() ->void { - // ACL relies on aclrtDestroyStream to clean up some host resources. - // If aclrtDestroyStream is not called, a core dump will occur - // during the automatic deconstruction of ACL resources (singleton object) after npu_shut_down. + c10_npu::NPUEventManager::GetInstance().ClearEvent(); auto stream = c10_npu::getCurrentNPUStream(); - C10_NPU_CHECK(aclrtDestroyStreamForce(stream)); + (void)aclrtDestroyStream(stream); C10_NPU_CHECK(ge::GEFinalize()); C10_NPU_CHECK(aclrtResetDevice(device_id_)); C10_NPU_CHECK(aclFinalize());