From aee4a5d0a7da470d1656e8510f727d2eda12e890 Mon Sep 17 00:00:00 2001 From: zhttjd Date: Fri, 8 Aug 2025 10:10:36 +0800 Subject: [PATCH 1/5] fixed patcher ddp error, added exception when patch fails (catched and present as warning) --- mx_driving/patcher/__init__.py | 5 +++-- mx_driving/patcher/brake.py | 14 ++++++++++++++ mx_driving/patcher/distribute.py | 21 +++++++++++---------- mx_driving/patcher/functions.py | 2 ++ mx_driving/patcher/mmcv.py | 6 ++++++ mx_driving/patcher/mmdet.py | 10 ++++++++++ mx_driving/patcher/mmdet3d.py | 4 ++++ mx_driving/patcher/nuscenes.py | 4 +++- mx_driving/patcher/optimizer.py | 4 ++++ mx_driving/patcher/profiler.py | 14 ++++++++++++++ mx_driving/patcher/tensor.py | 18 ++++++++++++++---- 11 files changed, 85 insertions(+), 17 deletions(-) diff --git a/mx_driving/patcher/__init__.py b/mx_driving/patcher/__init__.py index 102c6cc4..34db464a 100644 --- a/mx_driving/patcher/__init__.py +++ b/mx_driving/patcher/__init__.py @@ -51,7 +51,7 @@ __all__ = [ "optimizer", ] -from mx_driving.patcher.distribute import ddp, ddp_forward +from mx_driving.patcher.distribute import ddp from mx_driving.patcher.functions import stream from mx_driving.patcher.mmcv import dc, mdc, msda, patch_mmcv_version from mx_driving.patcher.mmdet import pseudo_sampler, resnet_add_relu, resnet_maxpool, resnet_fp16 @@ -69,7 +69,8 @@ default_patcher_builder = ( .add_module_patch("torch", Patch(index), Patch(batch_matmul)) .add_module_patch("numpy", Patch(numpy_type)) .add_module_patch("mmdet.core.bbox.samplers", Patch(pseudo_sampler)) - .add_module_patch("mmcv.parallel", Patch(ddp), Patch(stream), Patch(ddp_forward)) + .add_module_patch("mmcv.parallel", Patch(stream)) + .add_module_patch("mmcv.parallel.distributed", Patch(ddp)) .add_module_patch("mmdet.models.backbones.resnet", Patch(resnet_add_relu), Patch(resnet_maxpool)) .add_module_patch("mmdet3d.datasets.nuscenes_dataset", Patch(nuscenes_dataset)) .add_module_patch("mmdet3d.evaluation.metrics", Patch(nuscenes_metric)) diff --git a/mx_driving/patcher/brake.py b/mx_driving/patcher/brake.py index da7aaf18..5b448611 100644 --- a/mx_driving/patcher/brake.py +++ b/mx_driving/patcher/brake.py @@ -142,9 +142,23 @@ def brake(runner: ModuleType, options: Dict): if hasattr(runner, "EpochBasedRunner"): runner.EpochBasedRunner.train = train + else: + raise AttributeError("EpochBasedRunner not found") + if hasattr(runner, "EpochBasedTrainLoop"): runner.EpochBasedTrainLoop.run_epoch = run_epoch + else: + raise AttributeError("EpochBasedTrainLoop not found") + + if hasattr(runner, "IterBasedTrainLoop"): runner.IterBasedTrainLoop.run = run + else: + raise AttributeError("IterBasedTrainLoop not found") + + if hasattr(runner, "IterBasedRunner"): runner.IterBasedRunner.run = run_iter + else: + raise AttributeError("IterBasedRunner not found") + \ No newline at end of file diff --git a/mx_driving/patcher/distribute.py b/mx_driving/patcher/distribute.py index 93c5c67c..913152a8 100644 --- a/mx_driving/patcher/distribute.py +++ b/mx_driving/patcher/distribute.py @@ -3,14 +3,10 @@ from types import ModuleType from typing import Dict -def ddp(mmcvparallel: ModuleType, options: Dict): - if hasattr(mmcvparallel, "distributed"): - import mmcv.device - mmcvparallel.distributed.MMDistributedDataParallel = mmcv.device.npu.NPUDistributedDataParallel - - -def ddp_forward(mmcvparallel: ModuleType, options: Dict): - def new_forward(self, *inputs, **kwargs): +def ddp(module: ModuleType, options: Dict): + # For mmcv 1.x: module path is mmcv.parallel.distributed + + def _run_ddp_forward(self, *inputs, **kwargs): module_to_run = self.module if self.device_ids: @@ -20,5 +16,10 @@ def ddp_forward(mmcvparallel: ModuleType, options: Dict): else: return module_to_run(*inputs, **kwargs) - if hasattr(mmcvparallel, "distributed"): - mmcvparallel.distributed.MMDistributedDataParallel._run_ddp_forward = new_forward \ No newline at end of file + + if hasattr(module, "MMDistributedDataParallel"): + import mmcv.device + module.MMDistributedDataParallel._run_ddp_forward = _run_ddp_forward + module.MMDistributedDataParallel = mmcv.device.npu.NPUDistributedDataParallel + else: + raise AttributeError("MMDistributedDataParallel not found") \ No newline at end of file diff --git a/mx_driving/patcher/functions.py b/mx_driving/patcher/functions.py index 1925c57a..7d689c03 100644 --- a/mx_driving/patcher/functions.py +++ b/mx_driving/patcher/functions.py @@ -32,3 +32,5 @@ def stream(mmcvparallel: ModuleType, options: Dict): if hasattr(mmcvparallel._functions, "Scatter"): mmcvparallel._functions.Scatter.forward = new_forward + else: + raise AttributeError("Scatter not found") \ No newline at end of file diff --git a/mx_driving/patcher/mmcv.py b/mx_driving/patcher/mmcv.py index a31bc761..95dfe5cc 100644 --- a/mx_driving/patcher/mmcv.py +++ b/mx_driving/patcher/mmcv.py @@ -30,6 +30,8 @@ def msda(mmcvops: ModuleType, options: Dict): if hasattr(mmcvops, "multi_scale_deformable_attn"): mmcvops.multi_scale_deformable_attn.MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction mmcvops.multi_scale_deformable_attn.multi_scale_deformable_attn = multi_scale_deformable_attn + else: + raise AttributeError("multi_scale_deformable_attn not found") def dc(mmcvops: ModuleType, options: Dict): @@ -38,6 +40,8 @@ def dc(mmcvops: ModuleType, options: Dict): if hasattr(mmcvops, "deform_conv"): mmcvops.deform_conv.DeformConv2dFunction = DeformConv2dFunction mmcvops.deform_conv.deform_conv2d = deform_conv2d + else: + raise AttributeError("deform_conv not found") def mdc(mmcvops: ModuleType, options: Dict): @@ -46,3 +50,5 @@ def mdc(mmcvops: ModuleType, options: Dict): if hasattr(mmcvops, "modulated_deform_conv"): mmcvops.modulated_deform_conv.ModulatedDeformConv2dFunction = ModulatedDeformConv2dFunction mmcvops.modulated_deform_conv.modulated_deform_conv2d = modulated_deform_conv2d + else: + raise AttributeError("modulated_deform_conv not found") \ No newline at end of file diff --git a/mx_driving/patcher/mmdet.py b/mx_driving/patcher/mmdet.py index 8311a9fd..6e5224f1 100644 --- a/mx_driving/patcher/mmdet.py +++ b/mx_driving/patcher/mmdet.py @@ -19,6 +19,8 @@ def pseudo_sampler(mmdetsamplers: ModuleType, options: Dict): return sampling_result mmdetsamplers.pseudo_sampler.PseudoSampler.sample = sample + else: + raise AttributeError("pseudo_sampler not found") def resnet_add_relu(mmdetresnet: ModuleType, options: Dict): @@ -50,6 +52,8 @@ def resnet_add_relu(mmdetresnet: ModuleType, options: Dict): return out mmdetresnet.BasicBlock.forward = forward + else: + raise AttributeError("BasicBlock not found") if hasattr(mmdetresnet, "Bottleneck"): @@ -93,6 +97,8 @@ def resnet_add_relu(mmdetresnet: ModuleType, options: Dict): return out mmdetresnet.Bottleneck.forward = forward + else: + raise AttributeError("Bottleneck not found") def resnet_maxpool(mmdetresnet: ModuleType, options: Dict): @@ -119,6 +125,8 @@ def resnet_maxpool(mmdetresnet: ModuleType, options: Dict): return tuple(out) mmdetresnet.ResNet.forward = forward + else: + raise AttributeError("ResNet not found") def resnet_fp16(mmdetresnet: ModuleType, options: Dict): @@ -144,3 +152,5 @@ def resnet_fp16(mmdetresnet: ModuleType, options: Dict): return tuple([out.float() for out in tuple(outs)]) mmdetresnet.ResNet.forward = forward + else: + raise AttributeError("ResNet not found") \ No newline at end of file diff --git a/mx_driving/patcher/mmdet3d.py b/mx_driving/patcher/mmdet3d.py index d02d1258..c04b552a 100644 --- a/mx_driving/patcher/mmdet3d.py +++ b/mx_driving/patcher/mmdet3d.py @@ -34,6 +34,8 @@ def nuscenes_dataset(mmdet3ddatasets: ModuleType, options: Dict): return box_list mmdet3ddatasets.output_to_nusc_box = output_to_nusc_box + else: + raise AttributeError("output_to_nusc_box not found") def nuscenes_metric(mmdet3dmetrics: ModuleType, options: Dict): @@ -96,3 +98,5 @@ def nuscenes_metric(mmdet3dmetrics: ModuleType, options: Dict): return box_list, attrs mmdet3dmetrics.output_to_nusc_box = output_to_nusc_box + else: + raise AttributeError("output_to_nusc_box not found") \ No newline at end of file diff --git a/mx_driving/patcher/nuscenes.py b/mx_driving/patcher/nuscenes.py index d00e2095..0effee7e 100644 --- a/mx_driving/patcher/nuscenes.py +++ b/mx_driving/patcher/nuscenes.py @@ -60,4 +60,6 @@ def nuscenes_mot_metric(nusceneseval: ModuleType, options: Dict): return r if hasattr(nusceneseval, "mot"): - nusceneseval.mot.MOTAccumulatorCustom.merge_event_dataframes = merge_event_dataframes_new \ No newline at end of file + nusceneseval.mot.MOTAccumulatorCustom.merge_event_dataframes = merge_event_dataframes_new + else: + raise AttributeError("mot not found") \ No newline at end of file diff --git a/mx_driving/patcher/optimizer.py b/mx_driving/patcher/optimizer.py index 7b915373..1ed5257a 100644 --- a/mx_driving/patcher/optimizer.py +++ b/mx_driving/patcher/optimizer.py @@ -251,6 +251,8 @@ def optimizer_hooks(mmcvhooks: ModuleType, options: Dict): # clear grads runner.model.zero_grad() runner.optimizer.zero_grad() + else: + raise AttributeError("optimizer not found") def optimizer_wrapper(mmcvoptwrapper: ModuleType, options: Dict): @@ -274,3 +276,5 @@ def optimizer_wrapper(mmcvoptwrapper: ModuleType, options: Dict): self.clip_grads = _get_clip_func(self.optimizer) OptimWrapper.__init__ = new_init + else: + raise AttributeError("OptimWrapper not found") \ No newline at end of file diff --git a/mx_driving/patcher/profiler.py b/mx_driving/patcher/profiler.py index 915c8eab..542544de 100644 --- a/mx_driving/patcher/profiler.py +++ b/mx_driving/patcher/profiler.py @@ -180,9 +180,23 @@ def profiler(runner: ModuleType, options: Dict): if hasattr(runner, "EpochBasedRunner"): runner.EpochBasedRunner.train = train + else: + raise AttributeError("EpochBasedRunner not found") + if hasattr(runner, "EpochBasedTrainLoop"): runner.EpochBasedTrainLoop.run_epoch = run_epoch + else: + raise AttributeError("EpochBasedTrainLoop not found") + + if hasattr(runner, "IterBasedTrainLoop"): runner.IterBasedTrainLoop.run = run + else: + raise AttributeError("IterBasedTrainLoop not found") + + if hasattr(runner, "IterBasedRunner"): runner.IterBasedRunner.run = run_iter + else: + raise AttributeError("IterBasedRunner not found") + \ No newline at end of file diff --git a/mx_driving/patcher/tensor.py b/mx_driving/patcher/tensor.py index bfe42e71..c41cc60e 100644 --- a/mx_driving/patcher/tensor.py +++ b/mx_driving/patcher/tensor.py @@ -18,7 +18,10 @@ def index(torch: ModuleType, options: Dict): return torch.masked_select(self, indices).view(-1, self.shape[1]) return fn(self, indices) # fallback to the original function - torch.Tensor.__getitem__ = new_fn + if hasattr(torch, "Tensor"): + torch.Tensor.__getitem__ = new_fn + else: + raise AttributeError('Tensor not found') def check_shape_bmm(a, b): @@ -44,6 +47,13 @@ def batch_matmul(torch: ModuleType, options: Dict): return original_fn(a, b) return wrapper - torch.matmul = create_wrapper(torch.matmul) - torch.Tensor.matmul = create_wrapper(torch.Tensor.matmul) - torch.Tensor.__matmul__ = create_wrapper(torch.Tensor.__matmul__) + if hasattr(torch, "matmul"): + torch.matmul = create_wrapper(torch.matmul) + else: + raise AttributeError("matmul not found") + + if hasattr(torch, "Tensor"): + torch.Tensor.matmul = create_wrapper(torch.Tensor.matmul) + torch.Tensor.__matmul__ = create_wrapper(torch.Tensor.__matmul__) + else: + raise AttributeError("Tensor not found") \ No newline at end of file -- Gitee From 930ae30b85409a7891614224db602e2a9a06f94d Mon Sep 17 00:00:00 2001 From: zhttjd Date: Fri, 8 Aug 2025 11:49:55 +0800 Subject: [PATCH 2/5] hot fix --- docs/get_started/patcher.md | 6 +- .../DiffusionDrive/migrate_to_ascend/patch.py | 21 +------ mx_driving/patcher/__init__.py | 1 - tests/torch/test_patcher_distribute.py | 55 +------------------ 4 files changed, 9 insertions(+), 74 deletions(-) diff --git a/docs/get_started/patcher.md b/docs/get_started/patcher.md index 12ca7667..ac050ac5 100644 --- a/docs/get_started/patcher.md +++ b/docs/get_started/patcher.md @@ -73,7 +73,7 @@ import torch import torch_npu import mx_driving from mx_driving.patcher import PatcherBuilder, Patch -from mx_driving.patcher import ddp, ddp_forward +from mx_driving.patcher import ddp from mx_driving.patcher import resnet_add_relu, resnet_maxpool, nuscenes_dataset from mx_driving.patcher import dc, mdc, msda @@ -82,7 +82,7 @@ bev_former_patcher_builder = ( .add_module_patch("mmcv.ops", Patch(msda), Patch(dc), Patch(mdc)) .add_module_patch("mmdet.models.backbones.resnet", Patch(resnet_add_relu), Patch(resnet_maxpool)) .add_module_patch("mmdet3d.datasets.nuscenes_dataset", Patch(nuscenes_dataset)) - .add_module_patch("mmcv.parallel", Patch(ddp), Patch(ddp_forward)) + .add_module_patch("mmcv.parallel.distributed", Patch(ddp)) ) ``` @@ -100,7 +100,7 @@ if __name__ == '__main__': ``` ### patcher使能特性说明 -- ddp, ddp_forward用于修改mmcv框架中并行相关代码适配NPU训练。 +- ddp用于修改mmcv框架中并行相关代码适配NPU训练。 - resnet_add_relu, resnet_maxpool用于resnet结构中特定算子的优化,替换为DrivingSDK中高性能算子。 - dc, mdc, msda用于mmcv中DeformConv2d,ModulatedDeformConv2d,MultiScaleDeformableAttn算子替换为DrivingSDK中高性能算子。 - nuscenes_dataset用于针对BEVFormer模型的性能优化。 diff --git a/model_examples/DiffusionDrive/migrate_to_ascend/patch.py b/model_examples/DiffusionDrive/migrate_to_ascend/patch.py index eaa6f5d6..9b82ca05 100644 --- a/model_examples/DiffusionDrive/migrate_to_ascend/patch.py +++ b/model_examples/DiffusionDrive/migrate_to_ascend/patch.py @@ -16,7 +16,7 @@ import torch_npu import mx_driving from mx_driving import deformable_aggregation from mx_driving.patcher import PatcherBuilder, Patch -from mx_driving.patcher import index, batch_matmul, numpy_type, ddp, stream, ddp_forward +from mx_driving.patcher import index, batch_matmul, numpy_type, ddp, stream from mx_driving.patcher import resnet_add_relu, resnet_maxpool @@ -402,22 +402,6 @@ def get_hccl_init_dist(runner: ModuleType): return None -def run_ddp_forward(parallel: ModuleType, options: Dict): - - def _run_ddp_forward(self, *inputs, **kwargs): - module_to_run = self.module - - if self.device_ids: - inputs, kwargs = self.to_kwargs( # type: ignore - inputs, kwargs, self.device_ids[0]) - return module_to_run(*inputs[0], **kwargs[0]) # type: ignore - else: - return module_to_run(*inputs, **kwargs) - - if hasattr(parallel, "MMDistributedDataParallel"): - parallel.MMDistributedDataParallel._run_ddp_forward = _run_ddp_forward - - def instance_queue(queue: ModuleType, options: Dict): def prepare_motion( @@ -470,7 +454,8 @@ def generate_patcher_builder(performance=False): PatcherBuilder() .add_module_patch("torch", Patch(index), Patch(batch_matmul)) .add_module_patch("numpy", Patch(numpy_type)) - .add_module_patch("mmcv.parallel", Patch(ddp), Patch(stream), Patch(ddp_forward), Patch(run_ddp_forward)) + .add_module_patch("mmcv.parallel", Patch(stream)) + .add_module_patch("mmcv.parallel.distributed", Patch(ddp)) .add_module_patch("mmdet.models.backbones.resnet", Patch(resnet_add_relu), Patch(resnet_maxpool)) .add_module_patch("projects.mmdet3d_plugin.models.attention", Patch(flash_attn)) diff --git a/mx_driving/patcher/__init__.py b/mx_driving/patcher/__init__.py index 34db464a..54dbb9e2 100644 --- a/mx_driving/patcher/__init__.py +++ b/mx_driving/patcher/__init__.py @@ -40,7 +40,6 @@ __all__ = [ "pseudo_sampler", "numpy_type", "ddp", - "ddp_forward", "stream", "resnet_add_relu", "resnet_maxpool", diff --git a/tests/torch/test_patcher_distribute.py b/tests/torch/test_patcher_distribute.py index 7dba095c..511c1467 100644 --- a/tests/torch/test_patcher_distribute.py +++ b/tests/torch/test_patcher_distribute.py @@ -6,7 +6,7 @@ from unittest.mock import ANY, patch, MagicMock, PropertyMock import torch import torch_npu from torch_npu.testing.testcase import TestCase, run_tests -from mx_driving.patcher import ddp, ddp_forward +from mx_driving.patcher import ddp def assertIsNotInstance(obj, cls): @@ -22,58 +22,9 @@ class TestDistribute(TestCase): def test_ddp_patch(self): # Apply monkey patch - ddp(self.mock_mmcvparallel, {}) + ddp(self.mock_mmcvparallel.distributed, {}) - assertIsNotInstance(self.mock_mmcvparallel.distributed.MMDistributedDataParallel, MagicMock) + assertIsNotInstance(self.mock_mmcvparallel.MMDistributedDataParallel, MagicMock) - def test_ddp_forward_patch(self): - # Apply the ddp_forward patch - ddp_forward(self.mock_mmcvparallel, {}) - - # Get the patched _run_ddp_forward method - new_forward = self.mock_mmcvparallel.distributed.MMDistributedDataParallel._run_ddp_forward - - # Verify _run_ddp_forward is correctly replaced - assertIsNotInstance( - new_forward, - MagicMock - ) - - # Create mock instance and inputs - mock_self = MagicMock() - mock_self.device_ids = [0] # Simulate device IDs present - mock_self.module = MagicMock(return_value="module_output") - - # Mock the to_kwargs method - mock_self.to_kwargs = MagicMock(return_value=( - [("processed_input",)], - [{"processed_kwarg": "value"}] - )) - - # Call the patched forward method - result = new_forward(mock_self, "input1", "input2", kwarg1="value1") - - # Check to_kwargs is called correctly - mock_self.to_kwargs.assert_called_once_with( - ("input1", "input2"), - {"kwarg1": "value1"}, - 0 - ) - - # Check module is called correctly - mock_self.module.assert_called_once_with( - "processed_input", - processed_kwarg="value" - ) - - # Verify return value - self.assertEqual(result, "module_output") - - # Test case with no device_ids - mock_self.reset_mock() - mock_self.device_ids = [] - result = new_forward(mock_self, "input3", kwarg2="value2") - mock_self.module.assert_called_once_with("input3", kwarg2="value2") - if __name__ == '__main__': run_tests() \ No newline at end of file -- Gitee From 0edd1ce0843da854d1f5c01d13e78ad5425cd501 Mon Sep 17 00:00:00 2001 From: zhttjd Date: Fri, 8 Aug 2025 12:50:29 +0800 Subject: [PATCH 3/5] hot fix --- tests/torch/test_patcher_distribute.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/torch/test_patcher_distribute.py b/tests/torch/test_patcher_distribute.py index 511c1467..7aa47fc2 100644 --- a/tests/torch/test_patcher_distribute.py +++ b/tests/torch/test_patcher_distribute.py @@ -24,7 +24,7 @@ class TestDistribute(TestCase): # Apply monkey patch ddp(self.mock_mmcvparallel.distributed, {}) - assertIsNotInstance(self.mock_mmcvparallel.MMDistributedDataParallel, MagicMock) + assertIsNotInstance(self.mock_mmcvparallel.distributed.MMDistributedDataParallel, MagicMock) if __name__ == '__main__': run_tests() \ No newline at end of file -- Gitee From 5e753fb40c964051ae736200a32127d3d6885fe6 Mon Sep 17 00:00:00 2001 From: zhttjd Date: Fri, 8 Aug 2025 15:24:10 +0800 Subject: [PATCH 4/5] hot fix --- tests/torch/test_patcher_functions.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/tests/torch/test_patcher_functions.py b/tests/torch/test_patcher_functions.py index 24822b65..66a3dcbf 100644 --- a/tests/torch/test_patcher_functions.py +++ b/tests/torch/test_patcher_functions.py @@ -109,18 +109,6 @@ class TestPatcherStream(TestCase): self.mock_mmcvparallel._functions.scatter.assert_called_once() self.assertIsInstance(result, tuple) - def test_no_scatter_class(self): - """Verify graceful handling when Scatter class is missing""" - mock_mmcvparallel = MagicMock() - mock_mmcvparallel._functions = MagicMock() - delattr(mock_mmcvparallel._functions, "Scatter") - - from mx_driving.patcher import stream - try: - stream(mock_mmcvparallel, {}) - except AttributeError: - self.fail("stream should handle missing Scatter class gracefully") - if __name__ == "__main__": run_tests() \ No newline at end of file -- Gitee From 23d3ca349c96803a9313f0d1cb858e86cd849874 Mon Sep 17 00:00:00 2001 From: zhttjd Date: Fri, 8 Aug 2025 16:02:28 +0800 Subject: [PATCH 5/5] ci fix --- mx_driving/patcher/mmcv.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mx_driving/patcher/mmcv.py b/mx_driving/patcher/mmcv.py index 59d690c0..3e61c44e 100644 --- a/mx_driving/patcher/mmcv.py +++ b/mx_driving/patcher/mmcv.py @@ -45,6 +45,7 @@ def msda(mmcvops: ModuleType, options: Dict): else: raise AttributeError("multi_scale_deform_attn not found") + def dc(mmcvops: ModuleType, options: Dict): from mx_driving import DeformConv2dFunction, deform_conv2d -- Gitee