diff --git a/torch_npu/csrc/core/npu/interface/OpInterface.cpp b/torch_npu/csrc/core/npu/interface/OpInterface.cpp
index e950ee9f931b3a3fe24c5cd9b0ea4abdaa30be63..a2f4c515614b041d50752d367e295b6ea9e33684 100644
--- a/torch_npu/csrc/core/npu/interface/OpInterface.cpp
+++ b/torch_npu/csrc/core/npu/interface/OpInterface.cpp
@@ -1,19 +1,19 @@
 #include "OpInterface.h"
 #include "torch_npu/csrc/core/npu/register/FunctionLoader.h"
+#include "torch_npu/csrc/core/npu/NPUException.h"
 
 namespace c10_npu {
 
 namespace opapi {
 #undef LOAD_FUNCTION
-#define LOAD_FUNCTION(funcName) \
-  REGISTER_FUNCTION(libopapi, funcName)
+#define LOAD_FUNCTION(funcName) REGISTER_FUNCTION(libopapi, funcName)
 #undef GET_FUNC
-#define GET_FUNC(funcName)           \
-  GET_FUNCTION(libopapi, funcName)
+#define GET_FUNC(funcName) GET_FUNCTION(libopapi, funcName)
 
 REGISTER_LIBRARY(libopapi)
 LOAD_FUNCTION(aclnnSilentCheck)
 LOAD_FUNCTION(aclnnSilentCheckV2)
+LOAD_FUNCTION(aclnnReselectStaticKernel)
 
 bool IsExistAclnnSilentCheck()
 {
@@ -24,5 +24,20 @@ bool IsExistAclnnSilentCheck()
     return isExist;
 }
 
+aclnnStatus ReselectStaticKernel()
+{
+    typedef aclnnStatus (*AclnnApiFunc)();
+    static AclnnApiFunc aclnnReselectStaticKernelFunc = nullptr;
+    if (aclnnReselectStaticKernelFunc == nullptr) {
+        aclnnReselectStaticKernelFunc = (AclnnApiFunc)GET_FUNC(aclnnReselectStaticKernel);
+    }
+    TORCH_CHECK(aclnnReselectStaticKernelFunc,
+        "Failed to find function ",
+        "aclnnReselectStaticKernel",
+        PROF_ERROR(ErrCode::NOT_FOUND));
+    auto ret = aclnnReselectStaticKernelFunc();
+    return ret;
+}
+
 } // namespace opapi
 } // namespace c10_npu
diff --git a/torch_npu/csrc/core/npu/interface/OpInterface.h b/torch_npu/csrc/core/npu/interface/OpInterface.h
index 663f9a6144ed52569d2c92780c42e70c9ddff38d..111489a36f66f2bd99316aa3f1764cb19ec087e7 100644
--- a/torch_npu/csrc/core/npu/interface/OpInterface.h
+++ b/torch_npu/csrc/core/npu/interface/OpInterface.h
@@ -1,11 +1,20 @@
 #pragma once
 
+#include <cstdint>
+
 namespace c10_npu {
 namespace opapi {
+typedef int32_t aclnnStatus;
+
 /**
  * This API is used to check whether aclnnSilentCheck exist.
-*/
+ */
 bool IsExistAclnnSilentCheck();
 
+/**
+  This Api is used to reselect static kernel, it need to be called once at process.
+ */
+aclnnStatus ReselectStaticKernel();
+
 } // namespace opapi
 } // namespace c10_npu
diff --git a/torch_npu/csrc/npu/Module.cpp b/torch_npu/csrc/npu/Module.cpp
index 040e4754678597ae89ba1776919184cca6d058a6..4f4872bb281c5e4c13f59b82205470ce1efe6f38 100644
--- a/torch_npu/csrc/npu/Module.cpp
+++ b/torch_npu/csrc/npu/Module.cpp
@@ -51,6 +51,7 @@
 #include "torch_npu/csrc/aten/common/from_blob.h"
 #include "torch_npu/csrc/profiler/combined_traceback.h"
 #include "torch_npu/csrc/profiler/python/combined_traceback.h"
+#include "torch_npu/csrc/core/npu/interface/OpInterface.h"
 
 struct NPUDeviceProp {
     std::string name;
@@ -1594,6 +1595,18 @@ PyObject* THNPModule_npu_reset_thread_affinity(PyObject* self, PyObject* noargs)
     END_HANDLE_TH_ERRORS
 }
 
+PyObject* THNPModule_aclnn_reselect_static_kernel(PyObject* self, PyObject* noargs)
+{
+    HANDLE_TH_ERRORS
+    NPUStatus ret = c10_npu::emptyAllNPUStream();
+    if (ret != SUCCESS) {
+        ASCEND_LOGE("Failed to empty NPU task queue, ret: %s", ret.c_str());
+    }
+    c10_npu::opapi::ReselectStaticKernel();
+    Py_RETURN_NONE;
+    END_HANDLE_TH_ERRORS
+}
+
 PyObject* THNPModule_npu_set_fft_plan_cache_max_size(PyObject* self, PyObject* args)
 {
     HANDLE_TH_ERRORS
@@ -1800,6 +1813,7 @@ static struct PyMethodDef THNPModule_methods[] = {
     {"_get_silent_check_version", (PyCFunction)THNPModule_npu_get_silent_check_version, METH_NOARGS, nullptr},
     {"_npu_set_thread_affinity", (PyCFunction)THNPModule_npu_set_thread_affinity, METH_VARARGS, nullptr},
     {"_npu_reset_thread_affinity", (PyCFunction)THNPModule_npu_reset_thread_affinity, METH_NOARGS, nullptr},
+    {"_aclnn_reselect_static_kernel", (PyCFunction)THNPModule_aclnn_reselect_static_kernel, METH_NOARGS, nullptr},
     {"_npu_set_fft_plan_cache_max_size", (PyCFunction)THNPModule_npu_set_fft_plan_cache_max_size, METH_VARARGS, nullptr},
     {"_npu_get_fft_plan_cache_max_size", (PyCFunction)THNPModule_npu_get_fft_plan_cache_max_size, METH_NOARGS, nullptr},
     {"_npu_get_fft_plan_cache_size", (PyCFunction)THNPModule_npu_get_fft_plan_cache_size, METH_NOARGS, nullptr},
diff --git a/torch_npu/npu/__init__.py b/torch_npu/npu/__init__.py
index 182859d8a5aefc290a702d41fdd36cc33631c72c..855b96643ca22c2dc403f5511a50e7a3ffa67daa 100644
--- a/torch_npu/npu/__init__.py
+++ b/torch_npu/npu/__init__.py
@@ -400,6 +400,10 @@ def _device_count_ascend_hal() -> int:
         return -1
     return len(visible_devices)
 
+def _aclnn_reselect_static_kernel():
+    torch_npu.npu._lazy_init()
+    torch_npu._C._aclnn_reselect_static_kernel()
+
 
 _cached_device_count: Optional[int] = None