diff --git a/model_examples/CenterPoint/OpenPCDet_npu.patch b/model_examples/CenterPoint/OpenPCDet_npu.patch index edfd0167536c172a04beeae0a36613ea1b5166b7..ad77435ea85de9f554000b5da4db220c5e416a30 100644 --- a/model_examples/CenterPoint/OpenPCDet_npu.patch +++ b/model_examples/CenterPoint/OpenPCDet_npu.patch @@ -123,10 +123,92 @@ index 9fdf7d2..e69de29 100644 -if script_version not in __version__: - __version__ = __version__ + '+py%s' % script_version diff --git a/pcdet/datasets/__init__.py b/pcdet/datasets/__init__.py -index 47c3900..81d40f4 100644 +index 47c3900..77db826 100644 --- a/pcdet/datasets/__init__.py +++ b/pcdet/datasets/__init__.py -@@ -75,7 +75,7 @@ def build_dataloader(dataset_cfg, class_names, batch_size, dist, root_path=None, +@@ -1,4 +1,5 @@ + import torch ++from typing import TypeVar, Optional, Iterator + from functools import partial + from torch.utils.data import DataLoader + from torch.utils.data import DistributedSampler as _DistributedSampler +@@ -14,7 +15,7 @@ from .lyft.lyft_dataset import LyftDataset + from .once.once_dataset import ONCEDataset + from .argo2.argo2_dataset import Argo2Dataset + from .custom.custom_dataset import CustomDataset +- ++import random + __all__ = { + 'DatasetTemplate': DatasetTemplate, + 'KittiDataset': KittiDataset, +@@ -50,6 +51,57 @@ class DistributedSampler(_DistributedSampler): + + return iter(indices) + ++class DynamicSampler(_DistributedSampler): ++ def __init__(self, dataset, num_replicas: Optional[int] = None, ++ rank: Optional[int] = None, shuffle: bool = True, ++ seed: int = 0, drop_last: bool = False, ++ group_size: int = 8) -> None: ++ super().__init__(dataset, num_replicas=num_replicas, rank=rank, drop_last=drop_last) ++ self.shuffle = shuffle ++ self.seed = seed ++ self.epoch = 0 ++ self.group_size = group_size ++ ++ def set_epoch(self, epoch: int) -> None: ++ self.epoch = epoch ++ ++ def __iter__(self): ++ indices = list(range(len(self.dataset))) # type: ignore[arg-type] ++ ++ if not self.drop_last: ++ # add extra samples to make it evenly divisible ++ padding_size = self.total_size - len(indices) ++ if padding_size <= len(indices): ++ indices += indices[:padding_size] ++ else: ++ indices += (indices * math.ceil(padding_size / len(indices)))[:padding_size] ++ else: ++ # remove tail of data to make it evenly divisible. ++ indices = indices[:self.total_size] ++ assert len(indices) == self.total_size ++ ++ if self.shuffle: ++ # 组内shuffle ++ random.seed(self.seed + self.epoch) ++ for i in range(0, len(indices), self.group_size): ++ group = indices[i:i+self.group_size] ++ random.shuffle(group) ++ indices[i:i+self.group_size] = group ++ ++ # 组间shuffle ++ # Step 1: 将 indices 切分成多个 group(不打乱组内顺序) ++ groups = [indices[i:i + self.group_size] for i in range(0, len(indices), self.group_size)] ++ ++ # Step 2: 打乱组间顺序 ++ random.shuffle(groups) ++ indices = [item for g in groups for item in g] ++ ++ # subsample ++ indices = indices[self.rank:self.total_size:self.num_replicas] ++ assert len(indices) == self.num_samples ++ ++ return iter(indices) ++ + + def build_dataloader(dataset_cfg, class_names, batch_size, dist, root_path=None, workers=4, seed=None, + logger=None, training=True, merge_all_iters_to_one_epoch=False, total_epochs=0): +@@ -68,14 +120,15 @@ def build_dataloader(dataset_cfg, class_names, batch_size, dist, root_path=None, + + if dist: + if training: +- sampler = torch.utils.data.distributed.DistributedSampler(dataset) ++ # sampler = torch.utils.data.distributed.DistributedSampler(dataset) ++ sampler = DynamicSampler(dataset) + else: + rank, world_size = common_utils.get_dist_info() + sampler = DistributedSampler(dataset, world_size, rank, shuffle=False) else: sampler = None dataloader = DataLoader( @@ -651,16 +733,19 @@ index 543d8f2..3cce4c2 100644 + return dev_iou \ No newline at end of file diff --git a/pcdet/datasets/nuscenes/nuscenes_dataset.py b/pcdet/datasets/nuscenes/nuscenes_dataset.py -index 0f70005..19ccf95 100644 +index 0f70005..9845ca4 100644 --- a/pcdet/datasets/nuscenes/nuscenes_dataset.py +++ b/pcdet/datasets/nuscenes/nuscenes_dataset.py -@@ -1,3 +1,5 @@ +@@ -1,5 +1,8 @@ +# Copyright 2024 Huawei Technologies Co., Ltd + import copy import pickle ++import json from pathlib import Path -@@ -11,6 +13,10 @@ from ..dataset import DatasetTemplate + + import numpy as np +@@ -11,6 +14,10 @@ from ..dataset import DatasetTemplate from pyquaternion import Quaternion from PIL import Image @@ -671,6 +756,41 @@ index 0f70005..19ccf95 100644 class NuScenesDataset(DatasetTemplate): def __init__(self, dataset_cfg, class_names, training=True, root_path=None, logger=None): +@@ -72,6 +79,12 @@ class NuScenesDataset(DatasetTemplate): + ).tolist() + self.logger.info('Total samples after balanced resampling: %s' % (len(sampled_infos))) + ++ import time ++ start_time = time.time() ++ sampled_infos.sort(key=lambda x: x['voxel_num'], reverse=True) ++ duration_time = time.time() - start_time ++ self.logger.info('Total samples sort time is : %.2f' % (duration_time)) ++ + cls_infos_new = {name: [] for name in self.class_names} + for info in sampled_infos: + for name in set(info['gt_names']): +@@ -389,6 +402,21 @@ def create_nuscenes_info(version, data_path, save_path, max_sweeps=10, with_cam= + test='test' in version, max_sweeps=max_sweeps, with_cam=with_cam + ) + ++ voxel_path = data_path / 'voxel.json' ++ with open(voxel_path, 'r') as f2: ++ voxel_data = json.load(f2) ++ ++ missing_tokens = [info['token'] for info in train_nusc_infos if info['token'] not in voxel_data] ++ if missing_tokens: ++ raise RuntimeError(f"Missing {len(missing_tokens)} tokens in voxel_data. First missing: {missing_tokens[:5]}") ++ ++ for info in train_nusc_infos: ++ token = info['token'] ++ if token in voxel_data: ++ info['voxel_num'] = voxel_data[token] ++ else: ++ raise KeyError(f"Token {token} not found in voxel_data") ++ + if version == 'v1.0-test': + print('test sample: %d' % len(train_nusc_infos)) + with open(save_path / f'nuscenes_infos_{max_sweeps}sweeps_test.pkl', 'wb') as f: diff --git a/pcdet/datasets/processor/data_processor.py b/pcdet/datasets/processor/data_processor.py index 4f72ab5..ed9f904 100644 --- a/pcdet/datasets/processor/data_processor.py @@ -2268,7 +2388,7 @@ index 51b7178..cd8652d 100644 help='set extra config keys if needed') diff --git a/tools/train.py b/tools/train.py -index 29a88bd..340a65b 100644 +index 29a88bd..405da74 100644 --- a/tools/train.py +++ b/tools/train.py @@ -7,6 +7,8 @@ from pathlib import Path @@ -2280,9 +2400,12 @@ index 29a88bd..340a65b 100644 import torch.nn as nn from tensorboardX import SummaryWriter -@@ -33,7 +35,7 @@ def parse_config(): +@@ -31,9 +33,9 @@ def parse_config(): + parser.add_argument('--launcher', choices=['none', 'pytorch', 'slurm'], default='none') + parser.add_argument('--tcp_port', type=int, default=18888, help='tcp port for distrbuted training') parser.add_argument('--sync_bn', action='store_true', default=False, help='whether to use sync bn') - parser.add_argument('--fix_random_seed', action='store_true', default=False, help='') +- parser.add_argument('--fix_random_seed', action='store_true', default=False, help='') ++ parser.add_argument('--fix_random_seed', action='store_true', default=True, help='') parser.add_argument('--ckpt_save_interval', type=int, default=1, help='number of training epochs') - parser.add_argument('--local_rank', type=int, default=0, help='local rank for distributed training') + parser.add_argument('--local-rank', type=int, default=0, help='local rank for distributed training') @@ -2298,6 +2421,15 @@ index 29a88bd..340a65b 100644 ) dist_train = True +@@ -85,7 +87,7 @@ def main(): + args.epochs = cfg.OPTIMIZATION.NUM_EPOCHS if args.epochs is None else args.epochs + + if args.fix_random_seed: +- common_utils.set_random_seed(666 + cfg.LOCAL_RANK) ++ common_utils.set_random_seed(666) + + output_dir = cfg.ROOT_DIR / 'output' / cfg.EXP_GROUP_PATH / cfg.TAG / args.extra_tag + ckpt_dir = output_dir / 'ckpt' @@ -108,7 +110,7 @@ def main(): for key, val in vars(args).items(): logger.info('{:16} {}'.format(key, val)) @@ -2316,6 +2448,16 @@ index 29a88bd..340a65b 100644 optimizer = build_optimizer(model, cfg.OPTIMIZATION) +@@ -159,7 +161,8 @@ def main(): + + model.train() # before wrap to DistributedDataParallel to support fixed some parameters + if dist_train: +- model = nn.parallel.DistributedDataParallel(model, device_ids=[cfg.LOCAL_RANK % torch.cuda.device_count()]) ++ # model = nn.parallel.DistributedDataParallel(model, device_ids=[cfg.LOCAL_RANK % torch.cuda.device_count()]) ++ model = nn.parallel.DistributedDataParallel(model, device_ids=[cfg.LOCAL_RANK % torch.cuda.device_count()], broadcast_buffers=False, find_unused_parameters=False) + logger.info(f'----------- Model {cfg.MODEL.NAME} created, param count: {sum([m.numel() for m in model.parameters()])} -----------') + logger.info(model) + diff --git a/tools/train_utils/optimization/__init__.py b/tools/train_utils/optimization/__init__.py index 888cfcf..aa631f0 100644 --- a/tools/train_utils/optimization/__init__.py