From dd192f7c4943cf60807ac510293f7afa7ca2331a Mon Sep 17 00:00:00 2001 From: GuoGuanghao Date: Sat, 12 Jul 2025 16:36:50 +0800 Subject: [PATCH 1/7] new feature set_res_limit --- test/npu/test_torch_npu.py | 5 ++ third_party/acl/inc/acl/acl_rt.h | 36 ++++++++++++++ third_party/acl/libs/acl.cpp | 3 ++ torch_npu/csrc/core/npu/NPUFunctions.cpp | 39 +++++++++++++++ torch_npu/csrc/core/npu/NPUFunctions.h | 6 +++ .../csrc/core/npu/interface/AclInterface.cpp | 39 +++++++++++++++ .../csrc/core/npu/interface/AclInterface.h | 10 ++++ torch_npu/csrc/npu/Module.cpp | 47 +++++++++++++++++++ torch_npu/npu/npu_config.py | 43 ++++++++++++++++- 9 files changed, 227 insertions(+), 1 deletion(-) diff --git a/test/npu/test_torch_npu.py b/test/npu/test_torch_npu.py index 0e2c96e1bd..ded9d8aea4 100644 --- a/test/npu/test_torch_npu.py +++ b/test/npu/test_torch_npu.py @@ -78,6 +78,11 @@ class TorchNPUDeviceTestCase(TestCase): torch_npu.npu.synchronize() after_free_memory, after_total_memory = torch_npu.npu.mem_get_info(0) self.assertEqual(before_total_memory, after_total_memory) + + def test_set_device_res_limit(self): + ans_dict = {'cube_num': 12, 'vector_num': 24} + torch.npu.set_device_res_limit(torch.npu.current_device(), 12, 24) + self.assertEqual(ans_dict, torch.npu.get_device_res_limit(torch.npu.current_device())) class TorchNPUMemoryApiTestCase(TestCase): def test_npu_memory_stats(self): diff --git a/third_party/acl/inc/acl/acl_rt.h b/third_party/acl/inc/acl/acl_rt.h index 98b520ba4a..ecc36f3812 100755 --- a/third_party/acl/inc/acl/acl_rt.h +++ b/third_party/acl/inc/acl/acl_rt.h @@ -181,6 +181,11 @@ typedef enum aclrtLastErrLevel { ACL_RT_THREAD_LEVEL = 0, } aclrtLastErrLevel; +typedef enum { + ACL_RT_DEV_RES_CUBE_CORE = 0, + ACL_RT_DEV_RES_VECTOR_CORE, +} aclrtDevResModelType; + typedef void* aclrtDrvMemHandle; typedef void (*aclrtCallback)(void *userData); @@ -1541,6 +1546,37 @@ ACL_FUNC_VISIBILITY aclError aclrtPeekAtLastError(aclrtLastErrLevel level); */ ACL_FUNC_VISIBILITY aclError aclrtGetLastError(aclrtLastErrLevel level); +/** + * @ingroup AscendCL + * @brief Get the value of the current device's limited resources + * @param [in] deviceId the device id + * @param [in] type resources type + * @param [out] value resources limit value + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtGetDeviceResLimit(int32_t deviceId, aclrtDevResModelType type, uint32_t* value); + +/** + * @ingroup AscendCL + * @brief Set the value of the current device's limited resources + * @param [in] deviceId the device id + * @param [in] type resource type + * @param [in] value resource limit value + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtSetDeviceResLimit(int32_t deviceId, aclrtDevResModelType type, uint32_t value); + +/** + * @ingroup AscendCL + * @brief Reset the value of the current device's limited resources + * @param [in] deviceId the device id + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtResetDeviceResLimit(int32_t deviceId); + #ifdef __cplusplus } #endif diff --git a/third_party/acl/libs/acl.cpp b/third_party/acl/libs/acl.cpp index 4f24e6bf04..9bb32581dd 100644 --- a/third_party/acl/libs/acl.cpp +++ b/third_party/acl/libs/acl.cpp @@ -18,6 +18,9 @@ aclError aclmdlSetDump(const char *configPath){return 0;} aclError aclmdlInitDump(){return 0;} aclError aclmdlFinalizeDump(){return 0;} aclError aclrtDeviceTaskAbort(int32_t deviceId, uint32_t timeout){return 0;} +aclError aclrtGetDeviceResLimit(int32_t deviceId, aclrtDevResModelType type, uint32_t* value){return 0;} +aclError aclrtSetDeviceResLimit(int32_t deviceId, aclrtDevResModelType type, uint32_t value){return 0;} +aclError aclrtResetDeviceResLimit(int32_t deviceId){return 0;} // Stream aclError aclrtCreateStream(aclrtStream *stream) { return 0; } diff --git a/torch_npu/csrc/core/npu/NPUFunctions.cpp b/torch_npu/csrc/core/npu/NPUFunctions.cpp index 085bb0be9d..970e7617cf 100644 --- a/torch_npu/csrc/core/npu/NPUFunctions.cpp +++ b/torch_npu/csrc/core/npu/NPUFunctions.cpp @@ -5,6 +5,7 @@ #include "torch_npu/csrc/core/npu/NPUStream.h" #include "torch_npu/csrc/core/npu/NPUAffinityController.h" #include "torch_npu/csrc/core/npu/register/OptionsManager.h" +#include "third_party/acl/inc/acl/acl_rt.h" #ifndef BUILD_LIBTORCH #include "torch_npu/csrc/sanitizer/NPUTrace.h" #endif @@ -293,4 +294,42 @@ void stream_synchronize(aclrtStream stream) NPU_CHECK_ERROR(aclrtSynchronizeStream(stream)); } +aclError SetDeviceResLimit(int32_t device, int32_t type, uint32_t value) +{ + std::lock_guard lock(mtx); + if (used_devices.find(device) == used_devices.end()) { + TORCH_CHECK(false, "NPU device ", device, " has not been initialized! Can not get device resource limit"); + } + TORCH_CHECK(device >= 0, "device id must be positive!", PTA_ERROR(ErrCode::VALUE)); + c10_npu::acl::aclrtDevResModelType restype = static_cast(type); + aclError err = c10_npu::acl::AclrtSetDeviceResLimit(device, restype, value); + NPU_CHECK_ERROR_WITHOUT_UCE(err); + return err; +} + +uint32_t GetDeviceResLimit(int32_t device, int32_t type) +{ + std::lock_guard lock(mtx); + if (used_devices.find(device) == used_devices.end()) { + TORCH_CHECK(false, "NPU device ", device, " has not been initialized! Can not get device resource limit"); + } + TORCH_CHECK(device >= 0, "device id must be positive!", PTA_ERROR(ErrCode::VALUE)); + c10_npu::acl::aclrtDevResModelType restype = static_cast(type); + uint32_t value; + NPU_CHECK_ERROR_WITHOUT_UCE(c10_npu::acl::AclrtGetDeviceResLimit(device, restype, &value)); + return value; +} + +aclError ResetDeviceResLimit(int32_t device) +{ + std::lock_guard lock(mtx); + if (used_devices.find(device) == used_devices.end()) { + TORCH_CHECK(false, "NPU device ", device, " has not been initialized! Can not reset device resource limit"); + } + TORCH_CHECK(device >= 0, "device id must be positive!", PTA_ERROR(ErrCode::VALUE)); + aclError err = c10_npu::acl::AclrtResetDeviceResLimit(device); + NPU_CHECK_ERROR_WITHOUT_UCE(err); + return err; +} + } // namespace c10_npu diff --git a/torch_npu/csrc/core/npu/NPUFunctions.h b/torch_npu/csrc/core/npu/NPUFunctions.h index 9489984597..ba3f4c0054 100644 --- a/torch_npu/csrc/core/npu/NPUFunctions.h +++ b/torch_npu/csrc/core/npu/NPUFunctions.h @@ -79,6 +79,12 @@ void SetTargetDevice(); int GetLocalDevice(); +aclError SetDeviceResLimit(int32_t device, int32_t type, uint32_t value); + +C10_NPU_API uint32_t GetDeviceResLimit(int32_t deviceId, int32_t type); + +aclError ResetDeviceResLimit(int32_t deviceId); + enum class SyncDebugMode { L_DISABLED = 0, L_WARN, L_ERROR }; // it's used to store npu synchronization state diff --git a/torch_npu/csrc/core/npu/interface/AclInterface.cpp b/torch_npu/csrc/core/npu/interface/AclInterface.cpp index f5bf5b9308..5aa72f1acd 100644 --- a/torch_npu/csrc/core/npu/interface/AclInterface.cpp +++ b/torch_npu/csrc/core/npu/interface/AclInterface.cpp @@ -89,6 +89,9 @@ LOAD_FUNCTION(aclrtIpcMemClose) LOAD_FUNCTION(aclrtMemExportToShareableHandle) LOAD_FUNCTION(aclrtMemSetPidToShareableHandle) LOAD_FUNCTION(aclrtMemImportFromShareableHandle) +LOAD_FUNCTION(aclrtGetDeviceResLimit) +LOAD_FUNCTION(aclrtSetDeviceResLimit) +LOAD_FUNCTION(aclrtResetDeviceResLimit) aclprofStepInfoPtr init_stepinfo() { @@ -1021,5 +1024,41 @@ aclError AclrtMemImportFromShareableHandle(uint64_t shareableHandle, int32_t dev return func(shareableHandle, deviceId, handle); } +aclError AclrtGetDeviceResLimit(int32_t deviceId, aclrtDevResModelType type, uint32_t* value) +{ + typedef aclError (*AclrtGetDeviceResLimit)(int32_t, aclrtDevResModelType, uint32_t*); + static AclrtGetDeviceResLimit func = nullptr; + if (func == nullptr) { + func = (AclrtGetDeviceResLimit) GET_FUNC(aclrtGetDeviceResLimit); + } + + TORCH_CHECK(func, "Failed to find function aclrtGetDeviceResLimit", PTA_ERROR(ErrCode::NOT_FOUND)); + return func(deviceId, type, value); +} + +aclError AclrtSetDeviceResLimit(int32_t deviceId, aclrtDevResModelType type, uint32_t value) +{ + typedef aclError (*AclrtSetDeviceResLimit)(int32_t, aclrtDevResModelType, uint32_t); + static AclrtSetDeviceResLimit func = nullptr; + if (func == nullptr) { + func = (AclrtSetDeviceResLimit) GET_FUNC(aclrtSetDeviceResLimit); + } + + TORCH_CHECK(func, "Failed to find function aclrtSetDeviceResLimit", PTA_ERROR(ErrCode::NOT_FOUND)); + return func(deviceId, type, value); +} + +aclError AclrtResetDeviceResLimit(int32_t deviceId) +{ + typedef aclError (*AclrtResetDeviceResLimit)(int32_t); + static AclrtResetDeviceResLimit func = nullptr; + if (func == nullptr) { + func = (AclrtResetDeviceResLimit) GET_FUNC(aclrtResetDeviceResLimit); + } + + TORCH_CHECK(func, "Failed to find function aclrtResetDeviceResLimit", PTA_ERROR(ErrCode::NOT_FOUND)); + return func(deviceId); +} + } // namespace acl } // namespace c10 diff --git a/torch_npu/csrc/core/npu/interface/AclInterface.h b/torch_npu/csrc/core/npu/interface/AclInterface.h index f2c991b19f..350558f046 100644 --- a/torch_npu/csrc/core/npu/interface/AclInterface.h +++ b/torch_npu/csrc/core/npu/interface/AclInterface.h @@ -32,6 +32,12 @@ enum aclrtStreamStatus { }; using aclrtStreamStatus = enum aclrtStreamStatus; +enum aclrtDevResModelType { + ACL_RT_DEV_RES_CUBE_CORE = 0, + ACL_RT_DEV_RES_VECTOR_CORE = 1, +}; +using aclrtDevResModelType = enum aclrtDevResModelType; + /** aclprofStepInfo is provide by acl, it used to be store dispatch op info. */ @@ -243,5 +249,9 @@ aclError AclrtMemSetPidToShareableHandle(uint64_t shareableHandle, int32_t *pid, aclError AclrtMemImportFromShareableHandle(uint64_t shareableHandle, int32_t deviceId, aclrtDrvMemHandle *handle); +aclError AclrtGetDeviceResLimit(int32_t deviceId, aclrtDevResModelType type, uint32_t* value); +aclError AclrtSetDeviceResLimit(int32_t deviceId, aclrtDevResModelType type, uint32_t value); +aclError AclrtResetDeviceResLimit(int32_t deviceId); + } // namespace acl } // namespace c10_npu diff --git a/torch_npu/csrc/npu/Module.cpp b/torch_npu/csrc/npu/Module.cpp index e7a23a2e64..eff7497729 100644 --- a/torch_npu/csrc/npu/Module.cpp +++ b/torch_npu/csrc/npu/Module.cpp @@ -1691,6 +1691,50 @@ static PyObject* THNPModule_add_p2p_access(PyObject* self, PyObject *args) END_HANDLE_TH_ERRORS } +static PyObject* THNPModule_set_device_res_limit(PyObject* self, PyObject *args) +{ + HANDLE_TH_ERRORS + PyObject* device = nullptr; + PyObject* type = nullptr; + PyObject* value = nullptr; + + if (!PyArg_ParseTuple(args, "OOO", &device, &type, &value)) { + throw torch::TypeError("Pybind failed to parse parameters." + + PTA_ERROR(ErrCode::TYPE)); + } + int32_t device_ = THPUtils_unpackLong(device); + int32_t type_ = THPUtils_unpackLong(type); + uint32_t value_ = static_cast(THPUtils_unpackUInt32(value)); + c10_npu::SetDeviceResLimit(device_, type_, value_); + Py_RETURN_NONE; + END_HANDLE_TH_ERRORS +} + +static PyObject* THNPModule_get_device_res_limit(PyObject* self, PyObject *args) +{ + HANDLE_TH_ERRORS + PyObject* device = nullptr; + PyObject* type = nullptr; + + if (!PyArg_ParseTuple(args, "OO", &device, &type)) { + throw torch::TypeError("Pybind failed to parse parameters." + + PTA_ERROR(ErrCode::TYPE)); + } + int32_t device_ = THPUtils_unpackLong(device); + int32_t type_ = THPUtils_unpackLong(type); + uint32_t value = c10_npu::GetDeviceResLimit(device_, type_); + return PyLong_FromUnsignedLong(value); + END_HANDLE_TH_ERRORS +} + +static PyObject* THNPModule_reset_device_res_limit(PyObject* self, PyObject *args) +{ + HANDLE_TH_ERRORS + int32_t device = THPUtils_unpackLong(args); + c10_npu::ResetDeviceResLimit(device); + Py_RETURN_NONE; + END_HANDLE_TH_ERRORS +} static struct PyMethodDef THNPModule_methods[] = { {"_npu_init", (PyCFunction)THNPModule_initExtension, METH_NOARGS, nullptr}, @@ -1755,6 +1799,9 @@ static struct PyMethodDef THNPModule_methods[] = { {"_is_gte_cann_version", (PyCFunction)THNPModule_is_gte_cann_version, METH_VARARGS, nullptr}, {"_add_ipc_pid", (PyCFunction)THNPModule_add_ipc_pid, METH_VARARGS, nullptr}, {"_add_p2p_access", (PyCFunction)THNPModule_add_p2p_access, METH_VARARGS, nullptr}, + {"_npu_get_device_res_limit", (PyCFunction)THNPModule_get_device_res_limit, METH_VARARGS, nullptr}, + {"_npu_set_device_res_limit", (PyCFunction)THNPModule_set_device_res_limit, METH_VARARGS, nullptr}, + {"_npu_reset_device_res_limit", (PyCFunction)THNPModule_reset_device_res_limit, METH_O, nullptr}, {nullptr}}; TORCH_NPU_API PyMethodDef* THNPModule_get_methods() diff --git a/torch_npu/npu/npu_config.py b/torch_npu/npu/npu_config.py index 2233f7841c..5a40b56c81 100644 --- a/torch_npu/npu/npu_config.py +++ b/torch_npu/npu/npu_config.py @@ -6,12 +6,14 @@ import torch_npu import torch_npu._C from torch_npu.utils._path_manager import PathManager from torch_npu.utils._error_code import ErrCode, pta_error, prof_error +from .utils import _get_device_index # this file is used to enhance the npu frontend API by set_option or other. __all__ = ["set_option", "set_aoe", "set_compile_mode", "set_mm_bmm_format_nd", "get_mm_bmm_format_nd", - "is_jit_compile_false", "finalize_dump", "init_dump", "set_dump"] + "is_jit_compile_false", "finalize_dump", "init_dump", "set_dump", + "set_device_res_limit", "get_device_res_limit", "reset_device_res_limit"] _option_map = {"ACL_PRECISION_MODE": ["allow_fp32_to_fp16", "must_keep_origin_dtype"], "ACL_OP_SELECT_IMPL_MODE": ["high_performance", "high_precision"], @@ -170,3 +172,42 @@ class _allowHF32Conv: hf32_value = torch_npu._C._npu_getOption("ALLOW_CONV_HF32") return (hf32_value is None) or (hf32_value.decode() == "") or (hf32_value.decode() == "enable") return None + + +class call_once_class: + def __init__(self, func): + self.func = func + self.called = False + self.result = None + + def __call__(self, *args, **kwargs): + if self.called: + raise RuntimeError(f"Function '{self.func.__name__}' has already been called, \ + You can only set this interface once.") + + self.called = True + self.result = self.func(*args, **kwargs) + return self.result + + +@call_once_class +def set_device_res_limit(device, cube_num=-1, vector_num=-1): + from torch_npu.npu import device_count + device_id = _get_device_index(device, optional=True) + if device_id < 0 or device_id >= device_count(): + raise AssertionError("Invalid device id" + pta_error(ErrCode.VALUE)) + torch_npu.npu._lazy_init() + if cube_num != -1: + torch_npu._C._npu_set_device_res_limit(device_id, 0, cube_num) + if vector_num != -1: + torch_npu._C._npu_set_device_res_limit(device_id, 1, vector_num) + + +def get_device_res_limit(device): + from torch_npu.npu import device_count + device_id = _get_device_index(device, optional=True) + if device_id < 0 or device_id >= device_count(): + raise AssertionError("Invalid device id" + pta_error(ErrCode.VALUE)) + torch_npu.npu._lazy_init() + return {"cube_num": torch_npu._C._npu_get_device_res_limit(device_id, 0), \ + "vector_num": torch_npu._C._npu_get_device_res_limit(device_id, 1)} \ No newline at end of file -- Gitee From 8ae4fbbff567d795ff1d74f8102d731bca77925e Mon Sep 17 00:00:00 2001 From: GuoGuanghao Date: Wed, 16 Jul 2025 18:13:58 +0800 Subject: [PATCH 2/7] fix ci --- test/npu/test_torch_npu.py | 1 + torch_npu/npu/__init__.py | 4 +++- torch_npu/npu/npu_config.py | 4 ++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/test/npu/test_torch_npu.py b/test/npu/test_torch_npu.py index ded9d8aea4..ebb72fba4d 100644 --- a/test/npu/test_torch_npu.py +++ b/test/npu/test_torch_npu.py @@ -79,6 +79,7 @@ class TorchNPUDeviceTestCase(TestCase): after_free_memory, after_total_memory = torch_npu.npu.mem_get_info(0) self.assertEqual(before_total_memory, after_total_memory) + @unittest.skip("CANN doesn't support now.") def test_set_device_res_limit(self): ans_dict = {'cube_num': 12, 'vector_num': 24} torch.npu.set_device_res_limit(torch.npu.current_device(), 12, 24) diff --git a/torch_npu/npu/__init__.py b/torch_npu/npu/__init__.py index ba883da8fc..6f78293adf 100644 --- a/torch_npu/npu/__init__.py +++ b/torch_npu/npu/__init__.py @@ -115,7 +115,9 @@ __all__ = [ "graph_task_group_begin", "graph_task_group_end", "graph_task_update_begin", - "graph_task_update_end" + "graph_task_update_end", + "set_device_res_limit", + "get_device_res_limit" ] from typing import Tuple, Union, List, cast, Optional diff --git a/torch_npu/npu/npu_config.py b/torch_npu/npu/npu_config.py index 5327db39b1..598b8abebb 100644 --- a/torch_npu/npu/npu_config.py +++ b/torch_npu/npu/npu_config.py @@ -175,7 +175,7 @@ class _allowHF32Conv: return None -class call_once_class: +class _call_once_class: def __init__(self, func): self.func = func self.called = False @@ -191,7 +191,7 @@ class call_once_class: return self.result -@call_once_class +@_call_once_class def set_device_res_limit(device, cube_num=-1, vector_num=-1): device_id = _get_device_index(device, optional=True) if device_id < 0 or device_id >= device_count(): -- Gitee From 0de89f0d0108c84054804117d7c5eb620325a18d Mon Sep 17 00:00:00 2001 From: GuoGuanghao Date: Wed, 16 Jul 2025 18:15:37 +0800 Subject: [PATCH 3/7] fix ci --- torch_npu/csrc/core/npu/interface/AclInterface.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/torch_npu/csrc/core/npu/interface/AclInterface.cpp b/torch_npu/csrc/core/npu/interface/AclInterface.cpp index fc9cd5abb7..411695f8e8 100644 --- a/torch_npu/csrc/core/npu/interface/AclInterface.cpp +++ b/torch_npu/csrc/core/npu/interface/AclInterface.cpp @@ -1025,7 +1025,6 @@ aclError AclrtMemImportFromShareableHandle(uint64_t shareableHandle, int32_t dev return func(shareableHandle, deviceId, handle); } -<<<<<<< HEAD aclError AclrtGetDeviceResLimit(int32_t deviceId, aclrtDevResModelType type, uint32_t* value) { typedef aclError (*AclrtGetDeviceResLimit)(int32_t, aclrtDevResModelType, uint32_t*); @@ -1060,7 +1059,7 @@ aclError AclrtResetDeviceResLimit(int32_t deviceId) TORCH_CHECK(func, "Failed to find function aclrtResetDeviceResLimit", PTA_ERROR(ErrCode::NOT_FOUND)); return func(deviceId); -======= + aclError AclrtDeviceGetBareTgid(int32_t *pid) { typedef aclError (*AclrtDeviceGetBareTgid)(int32_t *); @@ -1071,7 +1070,6 @@ aclError AclrtDeviceGetBareTgid(int32_t *pid) TORCH_CHECK(func, "Failed to find function aclrtDeviceGetBareTgid", PTA_ERROR(ErrCode::NOT_FOUND)); return func(pid); ->>>>>>> 35f7db211875ecb9838e59b78cfecf86ba930ebc } } // namespace acl -- Gitee From fe8e90a4d0bea319f5289d9118eea0d4ed83c7f4 Mon Sep 17 00:00:00 2001 From: GuoGuanghao Date: Wed, 16 Jul 2025 18:45:42 +0800 Subject: [PATCH 4/7] cleancode --- torch_npu/csrc/core/npu/interface/AclInterface.cpp | 2 +- torch_npu/npu/npu_config.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/torch_npu/csrc/core/npu/interface/AclInterface.cpp b/torch_npu/csrc/core/npu/interface/AclInterface.cpp index 411695f8e8..6641679671 100644 --- a/torch_npu/csrc/core/npu/interface/AclInterface.cpp +++ b/torch_npu/csrc/core/npu/interface/AclInterface.cpp @@ -1059,6 +1059,7 @@ aclError AclrtResetDeviceResLimit(int32_t deviceId) TORCH_CHECK(func, "Failed to find function aclrtResetDeviceResLimit", PTA_ERROR(ErrCode::NOT_FOUND)); return func(deviceId); +} aclError AclrtDeviceGetBareTgid(int32_t *pid) { @@ -1071,6 +1072,5 @@ aclError AclrtDeviceGetBareTgid(int32_t *pid) TORCH_CHECK(func, "Failed to find function aclrtDeviceGetBareTgid", PTA_ERROR(ErrCode::NOT_FOUND)); return func(pid); } - } // namespace acl } // namespace c10 diff --git a/torch_npu/npu/npu_config.py b/torch_npu/npu/npu_config.py index 598b8abebb..0ce19931b4 100644 --- a/torch_npu/npu/npu_config.py +++ b/torch_npu/npu/npu_config.py @@ -6,8 +6,8 @@ import torch_npu import torch_npu._C from torch_npu.utils._path_manager import PathManager from torch_npu.utils._error_code import ErrCode, pta_error, prof_error -from .utils import _get_device_index from torch_npu.npu import device_count +from .utils import _get_device_index # this file is used to enhance the npu frontend API by set_option or other. -- Gitee From a7bd925cd55c1557126edeb35b1138b0ea87bd93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=83=AD=E5=85=89=E6=B5=A9?= Date: Fri, 18 Jul 2025 02:58:48 +0000 Subject: [PATCH 5/7] update torch_npu/npu/npu_config.py. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 郭光浩 --- torch_npu/npu/npu_config.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/torch_npu/npu/npu_config.py b/torch_npu/npu/npu_config.py index 0ce19931b4..38f55e3da8 100644 --- a/torch_npu/npu/npu_config.py +++ b/torch_npu/npu/npu_config.py @@ -6,7 +6,6 @@ import torch_npu import torch_npu._C from torch_npu.utils._path_manager import PathManager from torch_npu.utils._error_code import ErrCode, pta_error, prof_error -from torch_npu.npu import device_count from .utils import _get_device_index # this file is used to enhance the npu frontend API by set_option or other. @@ -193,6 +192,7 @@ class _call_once_class: @_call_once_class def set_device_res_limit(device, cube_num=-1, vector_num=-1): + from torch_npu.npu import device_count device_id = _get_device_index(device, optional=True) if device_id < 0 or device_id >= device_count(): raise AssertionError("Invalid device id" + pta_error(ErrCode.VALUE)) @@ -204,6 +204,7 @@ def set_device_res_limit(device, cube_num=-1, vector_num=-1): def get_device_res_limit(device): + from torch_npu.npu import device_count device_id = _get_device_index(device, optional=True) if device_id < 0 or device_id >= device_count(): raise AssertionError("Invalid device id" + pta_error(ErrCode.VALUE)) -- Gitee From f14f230080004ad014e31e2c0ee0e6b5c7e76799 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=83=AD=E5=85=89=E6=B5=A9?= Date: Mon, 21 Jul 2025 09:24:51 +0000 Subject: [PATCH 6/7] update torch_npu/npu/npu_config.py. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 郭光浩 --- torch_npu/npu/npu_config.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/torch_npu/npu/npu_config.py b/torch_npu/npu/npu_config.py index 38f55e3da8..5ca745339f 100644 --- a/torch_npu/npu/npu_config.py +++ b/torch_npu/npu/npu_config.py @@ -13,7 +13,7 @@ from .utils import _get_device_index __all__ = ["set_option", "set_aoe", "set_compile_mode", "set_mm_bmm_format_nd", "get_mm_bmm_format_nd", "is_jit_compile_false", "finalize_dump", "init_dump", "set_dump", - "set_device_res_limit", "get_device_res_limit"] + "set_device_limit", "get_device_limit"] _option_map = {"ACL_PRECISION_MODE": ["allow_fp32_to_fp16", "must_keep_origin_dtype"], "ACL_OP_SELECT_IMPL_MODE": ["high_performance", "high_precision"], @@ -191,7 +191,7 @@ class _call_once_class: @_call_once_class -def set_device_res_limit(device, cube_num=-1, vector_num=-1): +def set_device_limit(device, cube_num=-1, vector_num=-1): from torch_npu.npu import device_count device_id = _get_device_index(device, optional=True) if device_id < 0 or device_id >= device_count(): @@ -203,11 +203,11 @@ def set_device_res_limit(device, cube_num=-1, vector_num=-1): torch_npu._C._npu_set_device_res_limit(device_id, 1, vector_num) -def get_device_res_limit(device): +def get_device_limit(device): from torch_npu.npu import device_count device_id = _get_device_index(device, optional=True) if device_id < 0 or device_id >= device_count(): raise AssertionError("Invalid device id" + pta_error(ErrCode.VALUE)) torch_npu.npu._lazy_init() - return {"cube_num": torch_npu._C._npu_get_device_res_limit(device_id, 0), \ - "vector_num": torch_npu._C._npu_get_device_res_limit(device_id, 1)} \ No newline at end of file + return {"cube_core_num": torch_npu._C._npu_get_device_res_limit(device_id, 0), \ + "vector_core_num": torch_npu._C._npu_get_device_res_limit(device_id, 1)} \ No newline at end of file -- Gitee From d0b7c8ad780a1343e911de79aa4b1204001feacb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=83=AD=E5=85=89=E6=B5=A9?= Date: Mon, 21 Jul 2025 09:25:52 +0000 Subject: [PATCH 7/7] update torch_npu/npu/__init__.py. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 郭光浩 --- torch_npu/npu/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/torch_npu/npu/__init__.py b/torch_npu/npu/__init__.py index 6f78293adf..7d71ec09ae 100644 --- a/torch_npu/npu/__init__.py +++ b/torch_npu/npu/__init__.py @@ -116,8 +116,8 @@ __all__ = [ "graph_task_group_end", "graph_task_update_begin", "graph_task_update_end", - "set_device_res_limit", - "get_device_res_limit" + "set_device_limit", + "get_device_limit" ] from typing import Tuple, Union, List, cast, Optional -- Gitee