diff --git a/model_examples/Deformable-DETR/Deformable-DETR_npu.patch b/model_examples/Deformable-DETR/Deformable-DETR_npu.patch index eff06b0a0694ba3258dd4a207af246ddf366ad6e..1f8b04a20a28ceff7ed3393840a43bf15ec9a69d 100644 --- a/model_examples/Deformable-DETR/Deformable-DETR_npu.patch +++ b/model_examples/Deformable-DETR/Deformable-DETR_npu.patch @@ -136,9 +136,18 @@ new mode 100755 diff --git a/datasets/coco.py b/datasets/coco.py old mode 100644 new mode 100755 -index 1be8308..682e5fb +index 1be8308..ca7b02a --- a/datasets/coco.py +++ b/datasets/coco.py +@@ -122,7 +122,7 @@ class ConvertCocoPolysToMask(object): + return image, target + + +-def make_coco_transforms(image_set): ++def make_coco_transforms(image_set, step_size=None): + + normalize = T.Compose([ + T.ToTensor(), @@ -135,13 +135,13 @@ def make_coco_transforms(image_set): return T.Compose([ T.RandomHorizontalFlip(), @@ -151,10 +160,22 @@ index 1be8308..682e5fb - T.RandomResize(scales, max_size=1333), ]) ), -+ T.RandomResize(scales, max_size=1333), ++ T.BalancedRandomResize(scales, step_size, max_size=1333), normalize, ]) +@@ -164,6 +164,10 @@ def build(image_set, args): + } + + img_folder, ann_file = PATHS[image_set] +- dataset = CocoDetection(img_folder, ann_file, transforms=make_coco_transforms(image_set), return_masks=args.masks, ++ ++ batch_size, world_size = args.batch_size, args.world_size ++ step_size = batch_size * world_size ++ ++ dataset = CocoDetection(img_folder, ann_file, transforms=make_coco_transforms(image_set, step_size), return_masks=args.masks, + cache_mode=args.cache_mode, local_rank=get_local_rank(), local_size=get_local_size()) + return dataset diff --git a/datasets/coco_eval.py b/datasets/coco_eval.py old mode 100644 new mode 100755 @@ -170,19 +191,194 @@ new mode 100755 diff --git a/datasets/samplers.py b/datasets/samplers.py old mode 100644 new mode 100755 +index 14c0af2..6c0c013 +--- a/datasets/samplers.py ++++ b/datasets/samplers.py +@@ -52,7 +52,7 @@ class DistributedSampler(Sampler): + g.manual_seed(self.epoch) + indices = torch.randperm(len(self.dataset), generator=g).tolist() + else: +- indices = torch.arange(len(self.dataset)).tolist() ++ indices = self.bucket_arange() + + # add extra samples to make it evenly divisible + indices += indices[: (self.total_size - len(indices))] +@@ -65,6 +65,17 @@ class DistributedSampler(Sampler): + + return iter(indices) + ++ def bucket_arange(self): ++ g = torch.Generator() ++ g.manual_seed(self.epoch * self.epoch + 13) ++ ++ bucket_index = torch.randperm(len(self.dataset.bucket_keys), generator=g).tolist() ++ indices = [] ++ for bct_idx in bucket_index: ++ indices.extend(torch.randperm(len(self.dataset.bucket[self.dataset.bucket_keys[bct_idx]]), generator=g).tolist()) ++ ++ return indices ++ + def __len__(self): + return self.num_samples + diff --git a/datasets/torchvision_datasets/__init__.py b/datasets/torchvision_datasets/__init__.py old mode 100644 new mode 100755 diff --git a/datasets/torchvision_datasets/coco.py b/datasets/torchvision_datasets/coco.py old mode 100644 new mode 100755 +index 45b5f52..16247bd +--- a/datasets/torchvision_datasets/coco.py ++++ b/datasets/torchvision_datasets/coco.py +@@ -14,6 +14,7 @@ from PIL import Image + import os + import os.path + import tqdm ++from collections import OrderedDict + from io import BytesIO + + +@@ -36,6 +37,11 @@ class CocoDetection(VisionDataset): + from pycocotools.coco import COCO + self.coco = COCO(annFile) + self.ids = list(sorted(self.coco.imgs.keys())) ++ ++ self.bucket = {1.333: [], 1.499: [], 0.75: [], 1.502: [], 1.0: [], 0.667: [], 1.506: []} ++ self.bucketing() ++ self.bucket_keys = list(self.bucket.keys()) ++ + self.cache_mode = cache_mode + self.local_rank = local_rank + self.local_size = local_size +@@ -43,6 +49,55 @@ class CocoDetection(VisionDataset): + self.cache = {} + self.cache_images() + ++ def sort_wh_ratio(self, ori_dict): ++ sorted_ori = OrderedDict(sorted(ori_dict.items(), key=lambda item: item[1])) ++ sorted_ori_length, sorted_ori_keys = len(sorted_ori), list(sorted_ori.keys()) ++ base = sorted_ori[sorted_ori_keys[0]] ++ ++ tgt_dict = {sorted_ori[sorted_ori_keys[0]]: [sorted_ori_keys[0]]} ++ point, base_point =1, 0 ++ ++ while point < sorted_ori_length: ++ if base - 0.0025 <= sorted_ori[sorted_ori_keys[point]] < base + 0.0025: ++ tgt_dict[sorted_ori[sorted_ori_keys[base_point]]].append(sorted_ori_keys[point]) ++ point += 1 ++ else: ++ tgt_dict[sorted_ori[sorted_ori_keys[point]]] = [sorted_ori_keys[point]] ++ base = sorted_ori[sorted_ori_keys[point]] ++ base_point = point ++ point += 1 ++ ++ return tgt_dict ++ ++ def bucketing(self): ++ others = {} ++ ++ for img_id in self.ids: ++ wh_ratio = round(self.coco.imgs[img_id]['width'] / self.coco.imgs[img_id]['height'], 3) ++ self.coco.imgs[img_id]['wh_ratio'] = wh_ratio ++ isbucket = False ++ ++ for k in list(self.bucket.keys()): ++ if k - 0.001 <= wh_ratio <= k + 0.001: ++ self.bucket[k].append(img_id) ++ isbucket = True ++ break ++ if isbucket is False: ++ others[img_id] = wh_ratio ++ ++ bucket = self.sort_wh_ratio(others) ++ bucket_capacity = 64 ++ key = 0 ++ for _, bct in bucket.items(): ++ if str(key) not in self.bucket.keys(): ++ self.bucket[str(key)] = bct ++ else: ++ if len(self.bucket[str(key)]) < bucket_capacity: ++ self.bucket[str(key)].extend(bct) ++ else: ++ key += 1 ++ self.bucket[str(key)] = bct ++ + def cache_images(self): + self.cache = {} + for index, img_id in zip(tqdm.trange(len(self.ids)), self.ids): diff --git a/datasets/transforms.py b/datasets/transforms.py old mode 100644 new mode 100755 -index 8f4baeb..f99284b +index 8f4baeb..8ebe4c7 --- a/datasets/transforms.py +++ b/datasets/transforms.py -@@ -201,10 +201,20 @@ class RandomResize(object): +@@ -140,6 +140,65 @@ def resize(image, target, size, max_size=None): + return rescaled_image, target + + ++def balanced_resize(image, target, size, max_size=None): ++ # size can be min_size (scalar) or (w, h) tuple ++ ++ def get_size_with_aspect_ratio(image_size, size, max_size=None): ++ w, h = image_size ++ if max_size is not None: ++ min_original_size = float(min((w, h))) ++ max_original_size = float(max((w, h))) ++ if max_original_size / min_original_size * size > max_size: ++ size = int(round(max_size * min_original_size / max_original_size)) ++ ++ if (w <= h and w == size) or (h <= w and h == size): ++ return (h, w) ++ ++ if round(w / h, 3) == 0.75: ++ ow = size ++ oh = int(size * h / w) ++ else: ++ oh = size ++ ow = int(size * w / h) ++ ++ return (oh, ow) ++ ++ def get_size(image_size, size, max_size=None): ++ if isinstance(size, (list, tuple)): ++ return size[::-1] ++ else: ++ return get_size_with_aspect_ratio(image_size, size, max_size) ++ ++ size = get_size(image.size, size, max_size) ++ rescaled_image = F.resize(image, size) ++ ++ if target is None: ++ return rescaled_image, None ++ ++ ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(rescaled_image.size, image.size)) ++ ratio_width, ratio_height = ratios ++ ++ target = target.copy() ++ if "boxes" in target: ++ boxes = target["boxes"] ++ scaled_boxes = boxes * torch.as_tensor([ratio_width, ratio_height, ratio_width, ratio_height]) ++ target["boxes"] = scaled_boxes ++ ++ if "area" in target: ++ area = target["area"] ++ scaled_area = area * (ratio_width * ratio_height) ++ target["area"] = scaled_area ++ ++ h, w = size ++ target["size"] = torch.tensor([h, w]) ++ ++ if "masks" in target: ++ target['masks'] = interpolate( ++ target['masks'][:, None].float(), size, mode="nearest")[:, 0] > 0.5 ++ ++ return rescaled_image, target ++ ++ + def pad(image, target, padding): + # assumes that we only pad on the bottom right corners + padded_image = F.pad(image, (0, 0, padding[0], padding[1])) +@@ -201,10 +260,42 @@ class RandomResize(object): assert isinstance(sizes, (list, tuple)) self.sizes = sizes self.max_size = max_size @@ -202,10 +398,32 @@ index 8f4baeb..f99284b + + self.idx += 1 + return resize(img, target, self.size, self.max_size) ++ ++ ++class BalancedRandomResize(object): ++ def __init__(self, sizes, step_size, max_size=None): ++ assert isinstance(sizes, (list, tuple)) ++ self.step_size = step_size ++ self.sizes = sizes ++ self.max_size = max_size ++ self.idx = 0 ++ self.length = len(self.sizes) ++ ++ seed = 100 ++ torch.manual_seed(seed) ++ random.seed(seed) ++ self.size = self.sizes[random.randint(0, self.length - 1)] ++ ++ def __call__(self, img, target=None): ++ if self.idx % self.step_size == 0: ++ self.size = self.sizes[random.randint(0, self.length - 1)] ++ ++ self.idx += 1 ++ return balanced_resize(img, target, self.size, self.max_size) class RandomPad(object): -@@ -229,6 +239,8 @@ class RandomSelect(object): +@@ -229,6 +320,8 @@ class RandomSelect(object): def __call__(self, img, target): if random.random() < self.p: @@ -314,7 +532,7 @@ new mode 100755 diff --git a/main.py b/main.py old mode 100644 new mode 100755 -index fc6ccfa..6d0186b +index fc6ccfa..c09ba1a --- a/main.py +++ b/main.py @@ -18,6 +18,8 @@ from pathlib import Path @@ -368,6 +586,19 @@ index fc6ccfa..6d0186b help='path where to save, empty for no saving') parser.add_argument('--device', default='cuda', help='device to use for training / testing') +@@ -154,10 +159,10 @@ def main(args): + + if args.distributed: + if args.cache_mode: +- sampler_train = samplers.NodeDistributedSampler(dataset_train) ++ sampler_train = samplers.NodeDistributedSampler(dataset_train, shuffle=False) + sampler_val = samplers.NodeDistributedSampler(dataset_val, shuffle=False) + else: +- sampler_train = samplers.DistributedSampler(dataset_train) ++ sampler_train = samplers.DistributedSampler(dataset_train, shuffle=False) + sampler_val = samplers.DistributedSampler(dataset_val, shuffle=False) + else: + sampler_train = torch.utils.data.RandomSampler(dataset_train) @@ -201,12 +206,15 @@ def main(args): "lr": args.lr * args.lr_linear_proj_mult, } diff --git a/model_examples/Deformable-DETR/README.md b/model_examples/Deformable-DETR/README.md index 44459309422470410f902817b991f8ca352d32a7..599c9da4577bf4493135f7e2bf7e423ee72c14f6 100644 --- a/model_examples/Deformable-DETR/README.md +++ b/model_examples/Deformable-DETR/README.md @@ -112,6 +112,8 @@ bash test/train_8p_performance.sh --data_path='.data/coco' # 替换成你的coc 2024.12.23:首次发布 2025.5.7:性能优化、更新性能数据 + +2025.7.2:性能优化、更新性能和精度数据 ## FQA 暂无 \ No newline at end of file