From ee7dae79359d272606845c3c096da9e284d7d36f Mon Sep 17 00:00:00 2001
From: "li.ding"
Date: Thu, 9 Mar 2023 16:25:38 +0800
Subject: [PATCH 1/4] add pytorch SOLO
---
.../SOLO/pytorch/.github/CODE_OF_CONDUCT.md | 76 ++
.../SOLO/pytorch/.github/CONTRIBUTING.md | 53 ++
.../pytorch/.github/ISSUE_TEMPLATE/config.yml | 1 +
.../.github/ISSUE_TEMPLATE/error-report.md | 41 +
.../.github/ISSUE_TEMPLATE/feature_request.md | 22 +
.../ISSUE_TEMPLATE/general_questions.md | 10 +
.../SOLO/pytorch/.gitignore | 121 +++
.../SOLO/pytorch/.gitmodules | 3 +
.../SOLO/pytorch/.isort.cfg | 8 +
.../SOLO/pytorch/.pre-commit-config.yaml | 21 +
.../SOLO/pytorch/.style.yapf | 4 +
.../SOLO/pytorch/.travis.yml | 43 +
cv/instance_segmentation/SOLO/pytorch/LICENSE | 25 +
.../SOLO/pytorch/README.md | 55 ++
...ecoupled_solo_light_dcn_r50_fpn_8gpu_3x.py | 136 +++
.../decoupled_solo_light_r50_fpn_8gpu_3x.py | 129 +++
.../solo/decoupled_solo_r101_fpn_8gpu_3x.py | 130 +++
.../solo/decoupled_solo_r50_fpn_8gpu_1x.py | 126 +++
.../solo/decoupled_solo_r50_fpn_8gpu_3x.py | 130 +++
.../configs/solo/solo_r101_fpn_8gpu_3x.py | 130 +++
.../configs/solo/solo_r50_fpn_8gpu_1x.py | 126 +++
.../configs/solo/solo_r50_fpn_8gpu_3x.py | 130 +++
.../SOLO/pytorch/mmdet/__init__.py | 3 +
.../SOLO/pytorch/mmdet/apis/__init__.py | 9 +
.../SOLO/pytorch/mmdet/apis/inference.py | 290 ++++++
.../SOLO/pytorch/mmdet/apis/train.py | 297 ++++++
.../SOLO/pytorch/mmdet/core/__init__.py | 7 +
.../pytorch/mmdet/core/anchor/__init__.py | 12 +
.../mmdet/core/anchor/anchor_generator.py | 98 ++
.../mmdet/core/anchor/anchor_target.py | 188 ++++
.../mmdet/core/anchor/guided_anchor_target.py | 287 ++++++
.../mmdet/core/anchor/point_generator.py | 34 +
.../pytorch/mmdet/core/anchor/point_target.py | 165 ++++
.../SOLO/pytorch/mmdet/core/bbox/__init__.py | 22 +
.../mmdet/core/bbox/assign_sampling.py | 33 +
.../mmdet/core/bbox/assigners/__init__.py | 11 +
.../bbox/assigners/approx_max_iou_assigner.py | 139 +++
.../core/bbox/assigners/assign_result.py | 192 ++++
.../core/bbox/assigners/atss_assigner.py | 159 ++++
.../core/bbox/assigners/base_assigner.py | 8 +
.../core/bbox/assigners/max_iou_assigner.py | 195 ++++
.../core/bbox/assigners/point_assigner.py | 130 +++
.../pytorch/mmdet/core/bbox/bbox_target.py | 73 ++
.../SOLO/pytorch/mmdet/core/bbox/demodata.py | 65 ++
.../SOLO/pytorch/mmdet/core/bbox/geometry.py | 88 ++
.../mmdet/core/bbox/samplers/__init__.py | 14 +
.../mmdet/core/bbox/samplers/base_sampler.py | 98 ++
.../core/bbox/samplers/combined_sampler.py | 16 +
.../samplers/instance_balanced_pos_sampler.py | 41 +
.../bbox/samplers/iou_balanced_neg_sampler.py | 135 +++
.../mmdet/core/bbox/samplers/ohem_sampler.py | 79 ++
.../core/bbox/samplers/pseudo_sampler.py | 26 +
.../core/bbox/samplers/random_sampler.py | 54 ++
.../core/bbox/samplers/sampling_result.py | 154 +++
.../pytorch/mmdet/core/bbox/transforms.py | 223 +++++
.../pytorch/mmdet/core/evaluation/__init__.py | 18 +
.../mmdet/core/evaluation/bbox_overlaps.py | 49 +
.../mmdet/core/evaluation/class_names.py | 116 +++
.../mmdet/core/evaluation/coco_utils.py | 250 +++++
.../mmdet/core/evaluation/eval_hooks.py | 152 +++
.../pytorch/mmdet/core/evaluation/mean_ap.py | 455 +++++++++
.../pytorch/mmdet/core/evaluation/recall.py | 185 ++++
.../SOLO/pytorch/mmdet/core/fp16/__init__.py | 4 +
.../pytorch/mmdet/core/fp16/decorators.py | 160 ++++
.../SOLO/pytorch/mmdet/core/fp16/hooks.py | 127 +++
.../SOLO/pytorch/mmdet/core/fp16/utils.py | 23 +
.../SOLO/pytorch/mmdet/core/mask/__init__.py | 4 +
.../pytorch/mmdet/core/mask/mask_target.py | 41 +
.../SOLO/pytorch/mmdet/core/mask/utils.py | 30 +
.../mmdet/core/post_processing/__init__.py | 9 +
.../mmdet/core/post_processing/bbox_nms.py | 66 ++
.../mmdet/core/post_processing/matrix_nms.py | 117 +++
.../mmdet/core/post_processing/merge_augs.py | 101 ++
.../SOLO/pytorch/mmdet/core/utils/__init__.py | 7 +
.../pytorch/mmdet/core/utils/dist_utils.py | 58 ++
.../SOLO/pytorch/mmdet/core/utils/misc.py | 37 +
.../SOLO/pytorch/mmdet/datasets/__init__.py | 17 +
.../SOLO/pytorch/mmdet/datasets/builder.py | 41 +
.../SOLO/pytorch/mmdet/datasets/cityscapes.py | 9 +
.../SOLO/pytorch/mmdet/datasets/coco.py | 110 +++
.../SOLO/pytorch/mmdet/datasets/custom.py | 152 +++
.../mmdet/datasets/dataset_wrappers.py | 55 ++
.../pytorch/mmdet/datasets/loader/__init__.py | 4 +
.../mmdet/datasets/loader/build_loader.py | 70 ++
.../pytorch/mmdet/datasets/loader/sampler.py | 164 ++++
.../mmdet/datasets/pipelines/__init__.py | 17 +
.../mmdet/datasets/pipelines/compose.py | 35 +
.../mmdet/datasets/pipelines/formating.py | 192 ++++
.../mmdet/datasets/pipelines/instaboost.py | 91 ++
.../mmdet/datasets/pipelines/loading.py | 144 +++
.../mmdet/datasets/pipelines/test_aug.py | 38 +
.../mmdet/datasets/pipelines/transforms.py | 876 ++++++++++++++++++
.../SOLO/pytorch/mmdet/datasets/registry.py | 4 +
.../SOLO/pytorch/mmdet/datasets/voc.py | 20 +
.../SOLO/pytorch/mmdet/datasets/wider_face.py | 42 +
.../SOLO/pytorch/mmdet/datasets/xml_style.py | 86 ++
.../SOLO/pytorch/mmdet/models/__init__.py | 19 +
.../mmdet/models/anchor_heads/__init__.py | 25 +
.../mmdet/models/anchor_heads/anchor_head.py | 330 +++++++
.../mmdet/models/anchor_heads/atss_head.py | 487 ++++++++++
.../anchor_heads/decoupled_solo_head.py | 484 ++++++++++
.../anchor_heads/decoupled_solo_light_head.py | 479 ++++++++++
.../mmdet/models/anchor_heads/fcos_head.py | 408 ++++++++
.../mmdet/models/anchor_heads/fovea_head.py | 387 ++++++++
.../anchor_heads/free_anchor_retina_head.py | 188 ++++
.../models/anchor_heads/ga_retina_head.py | 107 +++
.../mmdet/models/anchor_heads/ga_rpn_head.py | 127 +++
.../models/anchor_heads/guided_anchor_head.py | 621 +++++++++++++
.../models/anchor_heads/reppoints_head.py | 596 ++++++++++++
.../mmdet/models/anchor_heads/retina_head.py | 103 ++
.../models/anchor_heads/retina_sepbn_head.py | 105 +++
.../mmdet/models/anchor_heads/rpn_head.py | 104 +++
.../mmdet/models/anchor_heads/solo_head.py | 433 +++++++++
.../mmdet/models/anchor_heads/solov2_head.py | 483 ++++++++++
.../models/anchor_heads/solov2_light_head.py | 482 ++++++++++
.../mmdet/models/anchor_heads/ssd_head.py | 201 ++++
.../mmdet/models/backbones/__init__.py | 6 +
.../pytorch/mmdet/models/backbones/hrnet.py | 524 +++++++++++
.../pytorch/mmdet/models/backbones/resnet.py | 516 +++++++++++
.../pytorch/mmdet/models/backbones/resnext.py | 222 +++++
.../pytorch/mmdet/models/backbones/ssd_vgg.py | 153 +++
.../mmdet/models/bbox_heads/__init__.py | 7 +
.../mmdet/models/bbox_heads/bbox_head.py | 282 ++++++
.../models/bbox_heads/convfc_bbox_head.py | 187 ++++
.../models/bbox_heads/double_bbox_head.py | 170 ++++
.../SOLO/pytorch/mmdet/models/builder.py | 43 +
.../mmdet/models/detectors/__init__.py | 27 +
.../pytorch/mmdet/models/detectors/atss.py | 16 +
.../pytorch/mmdet/models/detectors/base.py | 193 ++++
.../mmdet/models/detectors/cascade_rcnn.py | 520 +++++++++++
.../models/detectors/double_head_rcnn.py | 178 ++++
.../mmdet/models/detectors/fast_rcnn.py | 61 ++
.../mmdet/models/detectors/faster_rcnn.py | 27 +
.../pytorch/mmdet/models/detectors/fcos.py | 16 +
.../pytorch/mmdet/models/detectors/fovea.py | 16 +
.../mmdet/models/detectors/grid_rcnn.py | 229 +++++
.../pytorch/mmdet/models/detectors/htc.py | 516 +++++++++++
.../mmdet/models/detectors/mask_rcnn.py | 31 +
.../models/detectors/mask_scoring_rcnn.py | 200 ++++
.../models/detectors/reppoints_detector.py | 81 ++
.../mmdet/models/detectors/retinanet.py | 16 +
.../pytorch/mmdet/models/detectors/rpn.py | 97 ++
.../mmdet/models/detectors/single_stage.py | 86 ++
.../models/detectors/single_stage_ins.py | 96 ++
.../pytorch/mmdet/models/detectors/solo.py | 16 +
.../pytorch/mmdet/models/detectors/solov2.py | 17 +
.../mmdet/models/detectors/test_mixins.py | 266 ++++++
.../mmdet/models/detectors/two_stage.py | 346 +++++++
.../pytorch/mmdet/models/losses/__init__.py | 20 +
.../pytorch/mmdet/models/losses/accuracy.py | 31 +
.../mmdet/models/losses/balanced_l1_loss.py | 69 ++
.../mmdet/models/losses/cross_entropy_loss.py | 103 ++
.../pytorch/mmdet/models/losses/focal_loss.py | 82 ++
.../pytorch/mmdet/models/losses/ghm_loss.py | 171 ++++
.../pytorch/mmdet/models/losses/iou_loss.py | 212 +++++
.../pytorch/mmdet/models/losses/mse_loss.py | 25 +
.../mmdet/models/losses/smooth_l1_loss.py | 45 +
.../SOLO/pytorch/mmdet/models/losses/utils.py | 98 ++
.../mmdet/models/mask_heads/__init__.py | 11 +
.../mmdet/models/mask_heads/fcn_mask_head.py | 191 ++++
.../models/mask_heads/fused_semantic_head.py | 106 +++
.../mmdet/models/mask_heads/grid_head.py | 361 ++++++++
.../mmdet/models/mask_heads/htc_mask_head.py | 38 +
.../mmdet/models/mask_heads/mask_feat_head.py | 119 +++
.../mmdet/models/mask_heads/maskiou_head.py | 190 ++++
.../pytorch/mmdet/models/necks/__init__.py | 6 +
.../SOLO/pytorch/mmdet/models/necks/bfp.py | 102 ++
.../SOLO/pytorch/mmdet/models/necks/fpn.py | 141 +++
.../SOLO/pytorch/mmdet/models/necks/hrfpn.py | 100 ++
.../pytorch/mmdet/models/necks/nas_fpn.py | 186 ++++
.../pytorch/mmdet/models/plugins/__init__.py | 4 +
.../models/plugins/generalized_attention.py | 383 ++++++++
.../pytorch/mmdet/models/plugins/non_local.py | 114 +++
.../SOLO/pytorch/mmdet/models/registry.py | 9 +
.../mmdet/models/roi_extractors/__init__.py | 3 +
.../models/roi_extractors/single_level.py | 107 +++
.../mmdet/models/shared_heads/__init__.py | 3 +
.../mmdet/models/shared_heads/res_layer.py | 71 ++
.../pytorch/mmdet/models/utils/__init__.py | 12 +
.../pytorch/mmdet/models/utils/conv_module.py | 167 ++++
.../pytorch/mmdet/models/utils/conv_ws.py | 46 +
.../SOLO/pytorch/mmdet/models/utils/norm.py | 55 ++
.../SOLO/pytorch/mmdet/models/utils/scale.py | 15 +
.../pytorch/mmdet/models/utils/weight_init.py | 46 +
.../SOLO/pytorch/mmdet/ops/__init__.py | 21 +
.../SOLO/pytorch/mmdet/ops/context_block.py | 104 +++
.../SOLO/pytorch/mmdet/ops/dcn/__init__.py | 12 +
.../SOLO/pytorch/mmdet/ops/dcn/deform_conv.py | 431 +++++++++
.../SOLO/pytorch/mmdet/ops/dcn/deform_pool.py | 252 +++++
.../mmdet/ops/dcn/src/deform_conv_cuda.cpp | 701 ++++++++++++++
.../ops/dcn/src/deform_conv_cuda_kernel.cu | 867 +++++++++++++++++
.../mmdet/ops/dcn/src/deform_pool_cuda.cpp | 90 ++
.../ops/dcn/src/deform_pool_cuda_kernel.cu | 364 ++++++++
.../pytorch/mmdet/ops/masked_conv/__init__.py | 3 +
.../mmdet/ops/masked_conv/masked_conv.py | 89 ++
.../masked_conv/src/masked_conv2d_cuda.cpp | 74 ++
.../masked_conv/src/masked_conv2d_kernel.cu | 114 +++
.../SOLO/pytorch/mmdet/ops/nms/__init__.py | 3 +
.../SOLO/pytorch/mmdet/ops/nms/nms_wrapper.py | 102 ++
.../pytorch/mmdet/ops/nms/src/nms_cpu.cpp | 71 ++
.../pytorch/mmdet/ops/nms/src/nms_cuda.cpp | 17 +
.../pytorch/mmdet/ops/nms/src/nms_kernel.cu | 139 +++
.../mmdet/ops/nms/src/soft_nms_cpu.pyx | 127 +++
.../pytorch/mmdet/ops/roi_align/__init__.py | 3 +
.../pytorch/mmdet/ops/roi_align/gradcheck.py | 30 +
.../pytorch/mmdet/ops/roi_align/roi_align.py | 87 ++
.../ops/roi_align/src/roi_align_cuda.cpp | 87 ++
.../ops/roi_align/src/roi_align_kernel.cu | 283 ++++++
.../pytorch/mmdet/ops/roi_pool/__init__.py | 3 +
.../pytorch/mmdet/ops/roi_pool/gradcheck.py | 16 +
.../pytorch/mmdet/ops/roi_pool/roi_pool.py | 75 ++
.../mmdet/ops/roi_pool/src/roi_pool_cuda.cpp | 86 ++
.../mmdet/ops/roi_pool/src/roi_pool_kernel.cu | 157 ++++
.../mmdet/ops/sigmoid_focal_loss/__init__.py | 3 +
.../sigmoid_focal_loss/sigmoid_focal_loss.py | 54 ++
.../src/sigmoid_focal_loss.cpp | 45 +
.../src/sigmoid_focal_loss_cuda.cu | 171 ++++
.../SOLO/pytorch/mmdet/ops/utils/__init__.py | 7 +
.../mmdet/ops/utils/src/compiling_info.cpp | 56 ++
.../SOLO/pytorch/mmdet/utils/__init__.py | 8 +
.../pytorch/mmdet/utils/contextmanagers.py | 126 +++
.../SOLO/pytorch/mmdet/utils/flops_counter.py | 444 +++++++++
.../SOLO/pytorch/mmdet/utils/logger.py | 66 ++
.../SOLO/pytorch/mmdet/utils/profiling.py | 41 +
.../SOLO/pytorch/mmdet/utils/registry.py | 79 ++
.../SOLO/pytorch/mmdet/utils/util_mixins.py | 105 +++
.../SOLO/pytorch/pytest.ini | 7 +
.../SOLO/pytorch/requirements.txt | 4 +
.../SOLO/pytorch/requirements/build.txt | 4 +
.../SOLO/pytorch/requirements/optional.txt | 2 +
.../SOLO/pytorch/requirements/runtime.txt | 10 +
.../SOLO/pytorch/requirements/tests.txt | 11 +
.../SOLO/pytorch/setup.py | 301 ++++++
.../SOLO/pytorch/tests/async_benchmark.py | 104 +++
.../SOLO/pytorch/tests/test_assigner.py | 277 ++++++
.../SOLO/pytorch/tests/test_async.py | 78 ++
.../SOLO/pytorch/tests/test_config.py | 172 ++++
.../SOLO/pytorch/tests/test_forward.py | 388 ++++++++
.../SOLO/pytorch/tests/test_heads.py | 340 +++++++
.../SOLO/pytorch/tests/test_nms.py | 70 ++
.../SOLO/pytorch/tests/test_sampler.py | 249 +++++
.../SOLO/pytorch/tests/test_utils.py | 9 +
.../SOLO/pytorch/tools/analyze_logs.py | 178 ++++
.../SOLO/pytorch/tools/coco_error_analysis.py | 174 ++++
.../SOLO/pytorch/tools/coco_eval.py | 30 +
.../SOLO/pytorch/tools/collect_env.py | 64 ++
.../tools/convert_datasets/pascal_voc.py | 141 +++
.../SOLO/pytorch/tools/detectron2pytorch.py | 88 ++
.../SOLO/pytorch/tools/dist_test.sh | 11 +
.../SOLO/pytorch/tools/dist_train.sh | 8 +
.../SOLO/pytorch/tools/get_flops.py | 55 ++
.../SOLO/pytorch/tools/publish_model.py | 35 +
.../SOLO/pytorch/tools/robustness_eval.py | 256 +++++
.../SOLO/pytorch/tools/slurm_test.sh | 23 +
.../SOLO/pytorch/tools/slurm_train.sh | 23 +
.../SOLO/pytorch/tools/test.py | 282 ++++++
.../SOLO/pytorch/tools/test_ins.py | 257 +++++
.../SOLO/pytorch/tools/test_ins_vis.py | 296 ++++++
.../SOLO/pytorch/tools/test_robustness.py | 453 +++++++++
.../SOLO/pytorch/tools/train.py | 125 +++
.../pytorch/tools/upgrade_model_version.py | 42 +
.../SOLO/pytorch/tools/voc_eval.py | 47 +
262 files changed, 34040 insertions(+)
create mode 100644 cv/instance_segmentation/SOLO/pytorch/.github/CODE_OF_CONDUCT.md
create mode 100644 cv/instance_segmentation/SOLO/pytorch/.github/CONTRIBUTING.md
create mode 100644 cv/instance_segmentation/SOLO/pytorch/.github/ISSUE_TEMPLATE/config.yml
create mode 100644 cv/instance_segmentation/SOLO/pytorch/.github/ISSUE_TEMPLATE/error-report.md
create mode 100644 cv/instance_segmentation/SOLO/pytorch/.github/ISSUE_TEMPLATE/feature_request.md
create mode 100644 cv/instance_segmentation/SOLO/pytorch/.github/ISSUE_TEMPLATE/general_questions.md
create mode 100644 cv/instance_segmentation/SOLO/pytorch/.gitignore
create mode 100644 cv/instance_segmentation/SOLO/pytorch/.gitmodules
create mode 100644 cv/instance_segmentation/SOLO/pytorch/.isort.cfg
create mode 100644 cv/instance_segmentation/SOLO/pytorch/.pre-commit-config.yaml
create mode 100644 cv/instance_segmentation/SOLO/pytorch/.style.yapf
create mode 100644 cv/instance_segmentation/SOLO/pytorch/.travis.yml
create mode 100644 cv/instance_segmentation/SOLO/pytorch/LICENSE
create mode 100644 cv/instance_segmentation/SOLO/pytorch/README.md
create mode 100644 cv/instance_segmentation/SOLO/pytorch/configs/solo/decoupled_solo_light_dcn_r50_fpn_8gpu_3x.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/configs/solo/decoupled_solo_light_r50_fpn_8gpu_3x.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/configs/solo/decoupled_solo_r101_fpn_8gpu_3x.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/configs/solo/decoupled_solo_r50_fpn_8gpu_1x.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/configs/solo/decoupled_solo_r50_fpn_8gpu_3x.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/configs/solo/solo_r101_fpn_8gpu_3x.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/configs/solo/solo_r50_fpn_8gpu_1x.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/configs/solo/solo_r50_fpn_8gpu_3x.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/apis/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/apis/inference.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/apis/train.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/anchor_generator.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/anchor_target.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/guided_anchor_target.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/point_generator.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/point_target.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assign_sampling.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/approx_max_iou_assigner.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/assign_result.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/atss_assigner.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/base_assigner.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/max_iou_assigner.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/point_assigner.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/bbox_target.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/demodata.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/geometry.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/base_sampler.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/combined_sampler.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/iou_balanced_neg_sampler.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/ohem_sampler.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/pseudo_sampler.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/random_sampler.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/sampling_result.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/transforms.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/bbox_overlaps.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/class_names.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/coco_utils.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/eval_hooks.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/mean_ap.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/recall.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/fp16/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/fp16/decorators.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/fp16/hooks.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/fp16/utils.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/mask/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/mask/mask_target.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/mask/utils.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/post_processing/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/post_processing/bbox_nms.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/post_processing/matrix_nms.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/post_processing/merge_augs.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/utils/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/utils/dist_utils.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/utils/misc.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/builder.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/cityscapes.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/coco.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/custom.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/dataset_wrappers.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/loader/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/loader/build_loader.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/loader/sampler.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/compose.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/formating.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/instaboost.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/loading.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/test_aug.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/transforms.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/registry.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/voc.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/wider_face.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/xml_style.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/anchor_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/atss_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/decoupled_solo_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/decoupled_solo_light_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/fcos_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/fovea_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/free_anchor_retina_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/ga_retina_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/ga_rpn_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/guided_anchor_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/reppoints_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/retina_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/retina_sepbn_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/rpn_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/solo_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/solov2_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/solov2_light_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/ssd_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/backbones/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/backbones/hrnet.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/backbones/resnet.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/backbones/resnext.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/backbones/ssd_vgg.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/bbox_heads/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/bbox_heads/bbox_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/bbox_heads/convfc_bbox_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/bbox_heads/double_bbox_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/builder.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/atss.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/base.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/cascade_rcnn.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/double_head_rcnn.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/fast_rcnn.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/faster_rcnn.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/fcos.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/fovea.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/grid_rcnn.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/htc.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/mask_rcnn.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/mask_scoring_rcnn.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/reppoints_detector.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/retinanet.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/rpn.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/single_stage.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/single_stage_ins.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/solo.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/solov2.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/test_mixins.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/two_stage.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/accuracy.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/balanced_l1_loss.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/cross_entropy_loss.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/focal_loss.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/ghm_loss.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/iou_loss.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/mse_loss.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/smooth_l1_loss.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/utils.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/fcn_mask_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/fused_semantic_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/grid_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/htc_mask_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/mask_feat_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/maskiou_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/necks/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/necks/bfp.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/necks/fpn.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/necks/hrfpn.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/necks/nas_fpn.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/plugins/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/plugins/generalized_attention.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/plugins/non_local.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/registry.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/roi_extractors/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/roi_extractors/single_level.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/shared_heads/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/shared_heads/res_layer.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/conv_module.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/conv_ws.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/norm.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/scale.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/weight_init.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/context_block.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/deform_conv.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/deform_pool.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/src/deform_conv_cuda.cpp
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/src/deform_pool_cuda.cpp
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/src/deform_pool_cuda_kernel.cu
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/masked_conv/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/masked_conv/masked_conv.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/masked_conv/src/masked_conv2d_cuda.cpp
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/masked_conv/src/masked_conv2d_kernel.cu
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/nms_wrapper.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/src/nms_cpu.cpp
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/src/nms_cuda.cpp
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/src/nms_kernel.cu
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/src/soft_nms_cpu.pyx
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_align/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_align/gradcheck.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_align/roi_align.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_align/src/roi_align_cuda.cpp
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_align/src/roi_align_kernel.cu
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_pool/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_pool/gradcheck.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_pool/roi_pool.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_pool/src/roi_pool_kernel.cu
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/sigmoid_focal_loss/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/sigmoid_focal_loss/sigmoid_focal_loss.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss.cpp
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss_cuda.cu
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/utils/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/utils/src/compiling_info.cpp
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/utils/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/utils/contextmanagers.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/utils/flops_counter.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/utils/logger.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/utils/profiling.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/utils/registry.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/utils/util_mixins.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/pytest.ini
create mode 100644 cv/instance_segmentation/SOLO/pytorch/requirements.txt
create mode 100644 cv/instance_segmentation/SOLO/pytorch/requirements/build.txt
create mode 100644 cv/instance_segmentation/SOLO/pytorch/requirements/optional.txt
create mode 100644 cv/instance_segmentation/SOLO/pytorch/requirements/runtime.txt
create mode 100644 cv/instance_segmentation/SOLO/pytorch/requirements/tests.txt
create mode 100644 cv/instance_segmentation/SOLO/pytorch/setup.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tests/async_benchmark.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tests/test_assigner.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tests/test_async.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tests/test_config.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tests/test_forward.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tests/test_heads.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tests/test_nms.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tests/test_sampler.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tests/test_utils.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/analyze_logs.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/coco_error_analysis.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/coco_eval.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/collect_env.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/convert_datasets/pascal_voc.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/detectron2pytorch.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/dist_test.sh
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/dist_train.sh
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/get_flops.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/publish_model.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/robustness_eval.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/slurm_test.sh
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/slurm_train.sh
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/test.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/test_ins.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/test_ins_vis.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/test_robustness.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/train.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/upgrade_model_version.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/voc_eval.py
diff --git a/cv/instance_segmentation/SOLO/pytorch/.github/CODE_OF_CONDUCT.md b/cv/instance_segmentation/SOLO/pytorch/.github/CODE_OF_CONDUCT.md
new file mode 100644
index 000000000..efd430579
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/.github/CODE_OF_CONDUCT.md
@@ -0,0 +1,76 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to making participation in our project and
+our community a harassment-free experience for everyone, regardless of age, body
+size, disability, ethnicity, sex characteristics, gender identity and expression,
+level of experience, education, socio-economic status, nationality, personal
+appearance, race, religion, or sexual identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment
+include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or
+ advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic
+ address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+ professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or
+reject comments, commits, code, wiki edits, issues, and other contributions
+that are not aligned to this Code of Conduct, or to ban temporarily or
+permanently any contributor for other behaviors that they deem inappropriate,
+threatening, offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies both within project spaces and in public spaces
+when an individual is representing the project or its community. Examples of
+representing a project or community include using an official project e-mail
+address, posting via an official social media account, or acting as an appointed
+representative at an online or offline event. Representation of a project may be
+further defined and clarified by project maintainers.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by contacting the project team at chenkaidev@gmail.com. All
+complaints will be reviewed and investigated and will result in a response that
+is deemed necessary and appropriate to the circumstances. The project team is
+obligated to maintain confidentiality with regard to the reporter of an incident.
+Further details of specific enforcement policies may be posted separately.
+
+Project maintainers who do not follow or enforce the Code of Conduct in good
+faith may face temporary or permanent repercussions as determined by other
+members of the project's leadership.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
+available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
+
+[homepage]: https://www.contributor-covenant.org
+
+For answers to common questions about this code of conduct, see
+https://www.contributor-covenant.org/faq
diff --git a/cv/instance_segmentation/SOLO/pytorch/.github/CONTRIBUTING.md b/cv/instance_segmentation/SOLO/pytorch/.github/CONTRIBUTING.md
new file mode 100644
index 000000000..39c145a1f
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/.github/CONTRIBUTING.md
@@ -0,0 +1,53 @@
+# Contributing to mmdetection
+
+All kinds of contributions are welcome, including but not limited to the following.
+
+- Fixes (typo, bugs)
+- New features and components
+
+## Workflow
+
+1. fork and pull the latest mmdetection
+2. checkout a new branch (do not use master branch for PRs)
+3. commit your changes
+4. create a PR
+
+Note
+- If you plan to add some new features that involve large changes, it is encouraged to open an issue for discussion first.
+- If you are the author of some papers and would like to include your method to mmdetection,
+please contact Kai Chen (chenkaidev[at]gmail[dot]com). We will much appreciate your contribution.
+
+## Code style
+
+### Python
+We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style.
+
+We use the following tools for linting and formatting:
+- [flake8](http://flake8.pycqa.org/en/latest/): linter
+- [yapf](https://github.com/google/yapf): formatter
+- [isort](https://github.com/timothycrosley/isort): sort imports
+
+Style configurations of yapf and isort can be found in [.style.yapf](../.style.yapf) and [.isort.cfg](../.isort.cfg).
+
+We use [pre-commit hook](https://pre-commit.com/) that checks and formats for `flake8`, `yapf`, `isort`, `trailing whitespaces`,
+ fixes `end-of-files`, sorts `requirments.txt` automatically on every commit.
+The config for a pre-commit hook is stored in [.pre-commit-config](../.pre-commit-config.yaml).
+
+After you clone the repository, you will need to install initialize pre-commit hook.
+
+```
+pip install -U pre-commit
+```
+
+From the repository folder
+```
+pre-commit install
+```
+
+After this on every commit check code linters and formatter will be enforced.
+
+
+>Before you create a PR, make sure that your code lints and is formatted by yapf.
+
+### C++ and CUDA
+We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html).
diff --git a/cv/instance_segmentation/SOLO/pytorch/.github/ISSUE_TEMPLATE/config.yml b/cv/instance_segmentation/SOLO/pytorch/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 000000000..3ba13e0ce
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1 @@
+blank_issues_enabled: false
diff --git a/cv/instance_segmentation/SOLO/pytorch/.github/ISSUE_TEMPLATE/error-report.md b/cv/instance_segmentation/SOLO/pytorch/.github/ISSUE_TEMPLATE/error-report.md
new file mode 100644
index 000000000..80e1cc58e
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/.github/ISSUE_TEMPLATE/error-report.md
@@ -0,0 +1,41 @@
+---
+name: Error report
+about: Create a report to help us improve
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+Thanks for your error report and we appreciate it a lot.
+
+**Checklist**
+1. I have searched related issues but cannot get the expected help.
+2. The bug has not been fixed in the latest version.
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**Reproduction**
+1. What command or script did you run?
+```
+A placeholder for the command.
+```
+2. Did you make any modifications on the code or config? Did you understand what you have modified?
+3. What dataset did you use?
+
+**Environment**
+
+1. Please run `python tools/collect_env.py` to collect necessary environment infomation and paste it here.
+2. You may add addition that may be helpful for locating the problem, such as
+ - How you installed PyTorch [e.g., pip, conda, source]
+ - Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.)
+
+**Error traceback**
+If applicable, paste the error trackback here.
+```
+A placeholder for trackback.
+```
+
+**Bug fix**
+If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated!
diff --git a/cv/instance_segmentation/SOLO/pytorch/.github/ISSUE_TEMPLATE/feature_request.md b/cv/instance_segmentation/SOLO/pytorch/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 000000000..33f9d5f23
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,22 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Describe the feature**
+
+**Motivation**
+A clear and concise description of the motivation of the feature.
+Ex1. It is inconvenient when [....].
+Ex2. There is a recent paper [....], which is very helpful for [....].
+
+**Related resources**
+If there is an official code release or third-party implementations, please also provide the information here, which would be very helpful.
+
+**Additional context**
+Add any other context or screenshots about the feature request here.
+If you would like to implement the feature and create a PR, please leave a comment here and that would be much appreciated.
diff --git a/cv/instance_segmentation/SOLO/pytorch/.github/ISSUE_TEMPLATE/general_questions.md b/cv/instance_segmentation/SOLO/pytorch/.github/ISSUE_TEMPLATE/general_questions.md
new file mode 100644
index 000000000..6211ca283
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/.github/ISSUE_TEMPLATE/general_questions.md
@@ -0,0 +1,10 @@
+---
+name: General questions
+about: Ask general questions to get help
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+
diff --git a/cv/instance_segmentation/SOLO/pytorch/.gitignore b/cv/instance_segmentation/SOLO/pytorch/.gitignore
new file mode 100644
index 000000000..306a2bf4e
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/.gitignore
@@ -0,0 +1,121 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+# cython generated cpp
+mmdet/ops/nms/src/soft_nms_cpu.cpp
+mmdet/version.py
+data
+.vscode
+.idea
+
+# custom
+*.pkl
+*.pkl.json
+*.segm.json
+*.log.json
+work_dirs/
+
+# Pytorch
+*.pth
diff --git a/cv/instance_segmentation/SOLO/pytorch/.gitmodules b/cv/instance_segmentation/SOLO/pytorch/.gitmodules
new file mode 100644
index 000000000..03b361da1
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "paddlepaddle/paddledetection"]
+ path = paddlepaddle/paddledetection
+ url = https://github.com/PaddlePaddle/PaddleDetection
diff --git a/cv/instance_segmentation/SOLO/pytorch/.isort.cfg b/cv/instance_segmentation/SOLO/pytorch/.isort.cfg
new file mode 100644
index 000000000..9f43efc7d
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/.isort.cfg
@@ -0,0 +1,8 @@
+[isort]
+line_length = 79
+multi_line_output = 0
+known_standard_library = setuptools
+known_first_party = mmdet
+known_third_party = Cython,asynctest,cv2,matplotlib,mmcv,numpy,pycocotools,robustness_eval,roi_align,roi_pool,seaborn,six,terminaltables,torch,torchvision
+no_lines_before = STDLIB,LOCALFOLDER
+default_section = THIRDPARTY
diff --git a/cv/instance_segmentation/SOLO/pytorch/.pre-commit-config.yaml b/cv/instance_segmentation/SOLO/pytorch/.pre-commit-config.yaml
new file mode 100644
index 000000000..901104c2c
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/.pre-commit-config.yaml
@@ -0,0 +1,21 @@
+repos:
+- repo: https://github.com/asottile/seed-isort-config
+ rev: v1.9.3
+ hooks:
+ - id: seed-isort-config
+- repo: https://github.com/pre-commit/mirrors-isort
+ rev: v4.3.21
+ hooks:
+ - id: isort
+- repo: https://github.com/pre-commit/mirrors-yapf
+ rev: v0.29.0
+ hooks:
+ - id: yapf
+- repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v2.4.0
+ hooks:
+ - id: flake8
+ - id: trailing-whitespace
+ - id: check-yaml
+ - id: end-of-file-fixer
+ - id: requirements-txt-fixer
diff --git a/cv/instance_segmentation/SOLO/pytorch/.style.yapf b/cv/instance_segmentation/SOLO/pytorch/.style.yapf
new file mode 100644
index 000000000..286a3f1d7
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/.style.yapf
@@ -0,0 +1,4 @@
+[style]
+BASED_ON_STYLE = pep8
+BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
+SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
diff --git a/cv/instance_segmentation/SOLO/pytorch/.travis.yml b/cv/instance_segmentation/SOLO/pytorch/.travis.yml
new file mode 100644
index 000000000..b39defb3f
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/.travis.yml
@@ -0,0 +1,43 @@
+dist: bionic # ubuntu 18.04
+language: python
+
+python:
+ - "3.5"
+ - "3.6"
+ - "3.7"
+
+env: CUDA=10.1.105-1 CUDA_SHORT=10.1 UBUNTU_VERSION=ubuntu1804 FORCE_CUDA=1
+cache: pip
+
+# Ref to CUDA installation in Travis: https://github.com/jeremad/cuda-travis
+before_install:
+ - INSTALLER=cuda-repo-${UBUNTU_VERSION}_${CUDA}_amd64.deb
+ - wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/${INSTALLER}
+ - sudo dpkg -i ${INSTALLER}
+ - wget https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/7fa2af80.pub
+ - sudo apt-key add 7fa2af80.pub
+ - sudo apt update -qq
+ - sudo apt install -y cuda-${CUDA_SHORT/./-} cuda-cufft-dev-${CUDA_SHORT/./-}
+ - sudo apt clean
+ - CUDA_HOME=/usr/local/cuda-${CUDA_SHORT}
+ - LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${CUDA_HOME}/include:${LD_LIBRARY_PATH}
+ - PATH=${CUDA_HOME}/bin:${PATH}
+
+install:
+ - pip install Pillow==6.2.2 # remove this line when torchvision>=0.5
+ - pip install Cython torch==1.2 torchvision==0.4.0 # TODO: fix CI for pytorch>1.2
+ - pip install "git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI"
+ - pip install -r requirements.txt
+
+before_script:
+ - flake8 .
+ - isort -rc --check-only --diff mmdet/ tools/ tests/
+ - yapf -r -d --style .style.yapf mmdet/ tools/ tests/ configs/
+
+script:
+ - python setup.py check -m -s
+ - python setup.py build_ext --inplace
+ - coverage run --source mmdet -m py.test -v --xdoctest-modules tests mmdet
+
+after_success:
+ - coverage report
diff --git a/cv/instance_segmentation/SOLO/pytorch/LICENSE b/cv/instance_segmentation/SOLO/pytorch/LICENSE
new file mode 100644
index 000000000..e01680d91
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/LICENSE
@@ -0,0 +1,25 @@
+SOLO for non-commercial purposes
+
+Copyright (c) 2019 the authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/cv/instance_segmentation/SOLO/pytorch/README.md b/cv/instance_segmentation/SOLO/pytorch/README.md
new file mode 100644
index 000000000..25ab9bb8c
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/README.md
@@ -0,0 +1,55 @@
+# SOLO: Segmenting Objects by Locations
+
+## Model description
+
+We present a new, embarrassingly simple approach to instance segmentation in images. Compared to many other dense prediction tasks, e.g., semantic segmentation, it is the arbitrary number of instances that have made instance segmentation much more challenging. In order to predict a mask for each instance, mainstream approaches either follow the 'detect-thensegment' strategy as used by Mask R-CNN, or predict category masks first then use clustering techniques to group pixels into individual instances. We view the task of instance segmentation from a completely new perspective by introducing the notion of "instance categories", which assigns categories to each pixel within an instance according to the instance's location and size, thus nicely converting instance mask segmentation into a classification-solvable problem. Now instance segmentation is decomposed into two classification tasks. We demonstrate a much simpler and flexible instance segmentation framework with strong performance, achieving on par accuracy with Mask R-CNN and outperforming recent singleshot instance segmenters in accuracy. We hope that this very simple and strong framework can serve as a baseline for many instance-level recognition tasks besides instance segmentation.
+
+## Prepare
+
+### Install packages
+
+```shell
+
+pip3 install -r requirements/build.txt
+pip3 install "git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI"
+pip3 install -v -e .
+
+```
+
+### Download dataset
+
+```shell
+
+$ mkdir -p data/coco
+$ cd data/coco
+$ wget http://images.cocodataset.org/zips/annotations_trainval2017.zip
+$ wget http://images.cocodataset.org/zips/train2017.zip
+$ wget http://images.cocodataset.org/zips/val2017.zip
+$ unzip annotations_trainval2017.zip
+$ unzip train2017.zip
+$ unzip val2017.zip
+
+```
+
+## Training
+
+### Single GPU
+
+```shell
+
+python3 tools/train.py configs/solo/solo_r50_fpn_8gpu_1x.py
+
+```
+
+### Multi GPU
+
+```shell
+
+bash ./tools/dist_train.sh configs/solo/solo_r50_fpn_8gpu_1x.py ${GPU_NUM}
+
+```
+
+
+## Refrence
+
+Reference: https://github.com/WXinlong/SOLO
\ No newline at end of file
diff --git a/cv/instance_segmentation/SOLO/pytorch/configs/solo/decoupled_solo_light_dcn_r50_fpn_8gpu_3x.py b/cv/instance_segmentation/SOLO/pytorch/configs/solo/decoupled_solo_light_dcn_r50_fpn_8gpu_3x.py
new file mode 100644
index 000000000..e72d112ae
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/configs/solo/decoupled_solo_light_dcn_r50_fpn_8gpu_3x.py
@@ -0,0 +1,136 @@
+# model settings
+model = dict(
+ type='SOLO',
+ pretrained='torchvision://resnet50',
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3), # C2, C3, C4, C5
+ frozen_stages=1,
+ style='pytorch',
+ dcn=dict(
+ type='DCN',
+ deformable_groups=1,
+ fallback_on_stride=False),
+ stage_with_dcn=(False, True, True, True)),
+ neck=dict(
+ type='FPN',
+ in_channels=[256, 512, 1024, 2048],
+ out_channels=256,
+ start_level=0,
+ num_outs=5),
+ bbox_head=dict(
+ type='DecoupledSOLOLightHead',
+ num_classes=81,
+ in_channels=256,
+ stacked_convs=4,
+ use_dcn_in_tower=True,
+ type_dcn='DCN',
+ seg_feat_channels=256,
+ strides=[8, 8, 16, 32, 32],
+ scale_ranges=((1, 64), (32, 128), (64, 256), (128, 512), (256, 2048)),
+ sigma=0.2,
+ num_grids=[40, 36, 24, 16, 12],
+ cate_down_pos=0,
+ loss_ins=dict(
+ type='DiceLoss',
+ use_sigmoid=True,
+ loss_weight=3.0),
+ loss_cate=dict(
+ type='FocalLoss',
+ use_sigmoid=True,
+ gamma=2.0,
+ alpha=0.25,
+ loss_weight=1.0),
+ ))
+# training and testing settings
+train_cfg = dict()
+test_cfg = dict(
+ nms_pre=500,
+ score_thr=0.1,
+ mask_thr=0.5,
+ update_thr=0.05,
+ kernel='gaussian', # gaussian/linear
+ sigma=2.0,
+ max_per_img=100)
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+ dict(type='Resize',
+ img_scale=[(852, 512), (852, 480), (852, 448),
+ (852, 416), (852, 384), (852, 352)],
+ multiscale_mode='value',
+ keep_ratio=True),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(852, 512),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img']),
+ ])
+]
+data = dict(
+ imgs_per_gpu=2,
+ workers_per_gpu=2,
+ train=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_train2017.json',
+ img_prefix=data_root + 'train2017/',
+ pipeline=train_pipeline),
+ val=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_val2017.json',
+ img_prefix=data_root + 'val2017/',
+ pipeline=test_pipeline),
+ test=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_val2017.json',
+ img_prefix=data_root + 'val2017/',
+ pipeline=test_pipeline))
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=1.0 / 3,
+ step=[27, 33])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+ interval=50,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+# yapf:enable
+# runtime settings
+total_epochs = 36
+device_ids = range(8)
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/decoupled_solo_light_dcn_release_r50_fpn_8gpu_3x'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
diff --git a/cv/instance_segmentation/SOLO/pytorch/configs/solo/decoupled_solo_light_r50_fpn_8gpu_3x.py b/cv/instance_segmentation/SOLO/pytorch/configs/solo/decoupled_solo_light_r50_fpn_8gpu_3x.py
new file mode 100644
index 000000000..d38ee0f5b
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/configs/solo/decoupled_solo_light_r50_fpn_8gpu_3x.py
@@ -0,0 +1,129 @@
+# model settings
+model = dict(
+ type='SOLO',
+ pretrained='torchvision://resnet50',
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3), # C2, C3, C4, C5
+ frozen_stages=1,
+ style='pytorch'),
+ neck=dict(
+ type='FPN',
+ in_channels=[256, 512, 1024, 2048],
+ out_channels=256,
+ start_level=0,
+ num_outs=5),
+ bbox_head=dict(
+ type='DecoupledSOLOLightHead',
+ num_classes=81,
+ in_channels=256,
+ stacked_convs=4,
+ seg_feat_channels=256,
+ strides=[8, 8, 16, 32, 32],
+ scale_ranges=((1, 64), (32, 128), (64, 256), (128, 512), (256, 2048)),
+ sigma=0.2,
+ num_grids=[40, 36, 24, 16, 12],
+ cate_down_pos=0,
+ loss_ins=dict(
+ type='DiceLoss',
+ use_sigmoid=True,
+ loss_weight=3.0),
+ loss_cate=dict(
+ type='FocalLoss',
+ use_sigmoid=True,
+ gamma=2.0,
+ alpha=0.25,
+ loss_weight=1.0),
+ ))
+# training and testing settings
+train_cfg = dict()
+test_cfg = dict(
+ nms_pre=500,
+ score_thr=0.1,
+ mask_thr=0.5,
+ update_thr=0.05,
+ kernel='gaussian', # gaussian/linear
+ sigma=2.0,
+ max_per_img=100)
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+ dict(type='Resize',
+ img_scale=[(852, 512), (852, 480), (852, 448),
+ (852, 416), (852, 384), (852, 352)],
+ multiscale_mode='value',
+ keep_ratio=True),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(852, 512),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img']),
+ ])
+]
+data = dict(
+ imgs_per_gpu=2,
+ workers_per_gpu=2,
+ train=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_train2017.json',
+ img_prefix=data_root + 'train2017/',
+ pipeline=train_pipeline),
+ val=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_val2017.json',
+ img_prefix=data_root + 'val2017/',
+ pipeline=test_pipeline),
+ test=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_val2017.json',
+ img_prefix=data_root + 'val2017/',
+ pipeline=test_pipeline))
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=1.0 / 3,
+ step=[27, 33])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+ interval=50,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+# yapf:enable
+# runtime settings
+total_epochs = 36
+device_ids = range(8)
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/decoupled_solo_light_release_r50_fpn_8gpu_3x'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
diff --git a/cv/instance_segmentation/SOLO/pytorch/configs/solo/decoupled_solo_r101_fpn_8gpu_3x.py b/cv/instance_segmentation/SOLO/pytorch/configs/solo/decoupled_solo_r101_fpn_8gpu_3x.py
new file mode 100644
index 000000000..d64f0385c
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/configs/solo/decoupled_solo_r101_fpn_8gpu_3x.py
@@ -0,0 +1,130 @@
+# model settings
+model = dict(
+ type='SOLO',
+ pretrained='torchvision://resnet101',
+ backbone=dict(
+ type='ResNet',
+ depth=101,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3), # C2, C3, C4, C5
+ frozen_stages=1,
+ style='pytorch'),
+ neck=dict(
+ type='FPN',
+ in_channels=[256, 512, 1024, 2048],
+ out_channels=256,
+ start_level=0,
+ num_outs=5),
+ bbox_head=dict(
+ type='DecoupledSOLOHead',
+ num_classes=81,
+ in_channels=256,
+ stacked_convs=7,
+ seg_feat_channels=256,
+ strides=[8, 8, 16, 32, 32],
+ scale_ranges=((1, 96), (48, 192), (96, 384), (192, 768), (384, 2048)),
+ sigma=0.2,
+ num_grids=[40, 36, 24, 16, 12],
+ cate_down_pos=0,
+ with_deform=False,
+ loss_ins=dict(
+ type='DiceLoss',
+ use_sigmoid=True,
+ loss_weight=3.0),
+ loss_cate=dict(
+ type='FocalLoss',
+ use_sigmoid=True,
+ gamma=2.0,
+ alpha=0.25,
+ loss_weight=1.0),
+ ))
+# training and testing settings
+train_cfg = dict()
+test_cfg = dict(
+ nms_pre=500,
+ score_thr=0.1,
+ mask_thr=0.5,
+ update_thr=0.05,
+ kernel='gaussian', # gaussian/linear
+ sigma=2.0,
+ max_per_img=100)
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+ dict(type='Resize',
+ img_scale=[(1333, 800), (1333, 768), (1333, 736),
+ (1333, 704), (1333, 672), (1333, 640)],
+ multiscale_mode='value',
+ keep_ratio=True),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(1333, 800),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img']),
+ ])
+]
+data = dict(
+ imgs_per_gpu=2,
+ workers_per_gpu=2,
+ train=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_train2017.json',
+ img_prefix=data_root + 'train2017/',
+ pipeline=train_pipeline),
+ val=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_val2017.json',
+ img_prefix=data_root + 'val2017/',
+ pipeline=test_pipeline),
+ test=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_val2017.json',
+ img_prefix=data_root + 'val2017/',
+ pipeline=test_pipeline))
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=1.0 / 3,
+ step=[27, 33])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+ interval=50,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+# yapf:enable
+# runtime settings
+total_epochs = 36
+device_ids = range(8)
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/decoupled_solo_release_r101_fpn_8gpu_3x'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
diff --git a/cv/instance_segmentation/SOLO/pytorch/configs/solo/decoupled_solo_r50_fpn_8gpu_1x.py b/cv/instance_segmentation/SOLO/pytorch/configs/solo/decoupled_solo_r50_fpn_8gpu_1x.py
new file mode 100644
index 000000000..e4d6b5edc
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/configs/solo/decoupled_solo_r50_fpn_8gpu_1x.py
@@ -0,0 +1,126 @@
+# model settings
+model = dict(
+ type='SOLO',
+ pretrained='torchvision://resnet50',
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3), # C2, C3, C4, C5
+ frozen_stages=1,
+ style='pytorch'),
+ neck=dict(
+ type='FPN',
+ in_channels=[256, 512, 1024, 2048],
+ out_channels=256,
+ start_level=0,
+ num_outs=5),
+ bbox_head=dict(
+ type='DecoupledSOLOHead',
+ num_classes=81,
+ in_channels=256,
+ stacked_convs=7,
+ seg_feat_channels=256,
+ strides=[8, 8, 16, 32, 32],
+ scale_ranges=((1, 96), (48, 192), (96, 384), (192, 768), (384, 2048)),
+ sigma=0.2,
+ num_grids=[40, 36, 24, 16, 12],
+ cate_down_pos=0,
+ with_deform=False,
+ loss_ins=dict(
+ type='DiceLoss',
+ use_sigmoid=True,
+ loss_weight=3.0),
+ loss_cate=dict(
+ type='FocalLoss',
+ use_sigmoid=True,
+ gamma=2.0,
+ alpha=0.25,
+ loss_weight=1.0),
+ ))
+# training and testing settings
+train_cfg = dict()
+test_cfg = dict(
+ nms_pre=500,
+ score_thr=0.1,
+ mask_thr=0.5,
+ update_thr=0.05,
+ kernel='gaussian', # gaussian/linear
+ sigma=2.0,
+ max_per_img=100)
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+ dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(1333, 800),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img']),
+ ])
+]
+data = dict(
+ imgs_per_gpu=2,
+ workers_per_gpu=2,
+ train=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_train2017.json',
+ img_prefix=data_root + 'train2017/',
+ pipeline=train_pipeline),
+ val=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_val2017.json',
+ img_prefix=data_root + 'val2017/',
+ pipeline=test_pipeline),
+ test=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_val2017.json',
+ img_prefix=data_root + 'val2017/',
+ pipeline=test_pipeline))
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=1.0 / 3,
+ step=[9, 11])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+ interval=50,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+# yapf:enable
+# runtime settings
+total_epochs = 12
+device_ids = range(8)
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/decoupled_solo_release_r50_fpn_8gpu_1x'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
diff --git a/cv/instance_segmentation/SOLO/pytorch/configs/solo/decoupled_solo_r50_fpn_8gpu_3x.py b/cv/instance_segmentation/SOLO/pytorch/configs/solo/decoupled_solo_r50_fpn_8gpu_3x.py
new file mode 100644
index 000000000..fa54fd857
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/configs/solo/decoupled_solo_r50_fpn_8gpu_3x.py
@@ -0,0 +1,130 @@
+# model settings
+model = dict(
+ type='SOLO',
+ pretrained='torchvision://resnet50',
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3), # C2, C3, C4, C5
+ frozen_stages=1,
+ style='pytorch'),
+ neck=dict(
+ type='FPN',
+ in_channels=[256, 512, 1024, 2048],
+ out_channels=256,
+ start_level=0,
+ num_outs=5),
+ bbox_head=dict(
+ type='DecoupledSOLOHead',
+ num_classes=81,
+ in_channels=256,
+ stacked_convs=7,
+ seg_feat_channels=256,
+ strides=[8, 8, 16, 32, 32],
+ scale_ranges=((1, 96), (48, 192), (96, 384), (192, 768), (384, 2048)),
+ sigma=0.2,
+ num_grids=[40, 36, 24, 16, 12],
+ cate_down_pos=0,
+ with_deform=False,
+ loss_ins=dict(
+ type='DiceLoss',
+ use_sigmoid=True,
+ loss_weight=3.0),
+ loss_cate=dict(
+ type='FocalLoss',
+ use_sigmoid=True,
+ gamma=2.0,
+ alpha=0.25,
+ loss_weight=1.0),
+ ))
+# training and testing settings
+train_cfg = dict()
+test_cfg = dict(
+ nms_pre=500,
+ score_thr=0.1,
+ mask_thr=0.5,
+ update_thr=0.05,
+ kernel='gaussian', # gaussian/linear
+ sigma=2.0,
+ max_per_img=100)
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+ dict(type='Resize',
+ img_scale=[(1333, 800), (1333, 768), (1333, 736),
+ (1333, 704), (1333, 672), (1333, 640)],
+ multiscale_mode='value',
+ keep_ratio=True),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(1333, 800),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img']),
+ ])
+]
+data = dict(
+ imgs_per_gpu=2,
+ workers_per_gpu=2,
+ train=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_train2017.json',
+ img_prefix=data_root + 'train2017/',
+ pipeline=train_pipeline),
+ val=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_val2017.json',
+ img_prefix=data_root + 'val2017/',
+ pipeline=test_pipeline),
+ test=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_val2017.json',
+ img_prefix=data_root + 'val2017/',
+ pipeline=test_pipeline))
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=1.0 / 3,
+ step=[27, 33])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+ interval=50,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+# yapf:enable
+# runtime settings
+total_epochs = 36
+device_ids = range(8)
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/decoupled_solo_release_r50_fpn_8gpu_3x'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
diff --git a/cv/instance_segmentation/SOLO/pytorch/configs/solo/solo_r101_fpn_8gpu_3x.py b/cv/instance_segmentation/SOLO/pytorch/configs/solo/solo_r101_fpn_8gpu_3x.py
new file mode 100644
index 000000000..d6a30d917
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/configs/solo/solo_r101_fpn_8gpu_3x.py
@@ -0,0 +1,130 @@
+# model settings
+model = dict(
+ type='SOLO',
+ pretrained='torchvision://resnet101',
+ backbone=dict(
+ type='ResNet',
+ depth=101,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3), # C2, C3, C4, C5
+ frozen_stages=1,
+ style='pytorch'),
+ neck=dict(
+ type='FPN',
+ in_channels=[256, 512, 1024, 2048],
+ out_channels=256,
+ start_level=0,
+ num_outs=5),
+ bbox_head=dict(
+ type='SOLOHead',
+ num_classes=81,
+ in_channels=256,
+ stacked_convs=7,
+ seg_feat_channels=256,
+ strides=[8, 8, 16, 32, 32],
+ scale_ranges=((1, 96), (48, 192), (96, 384), (192, 768), (384, 2048)),
+ sigma=0.2,
+ num_grids=[40, 36, 24, 16, 12],
+ cate_down_pos=0,
+ with_deform=False,
+ loss_ins=dict(
+ type='DiceLoss',
+ use_sigmoid=True,
+ loss_weight=3.0),
+ loss_cate=dict(
+ type='FocalLoss',
+ use_sigmoid=True,
+ gamma=2.0,
+ alpha=0.25,
+ loss_weight=1.0),
+ ))
+# training and testing settings
+train_cfg = dict()
+test_cfg = dict(
+ nms_pre=500,
+ score_thr=0.1,
+ mask_thr=0.5,
+ update_thr=0.05,
+ kernel='gaussian', # gaussian/linear
+ sigma=2.0,
+ max_per_img=100)
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+ dict(type='Resize',
+ img_scale=[(1333, 800), (1333, 768), (1333, 736),
+ (1333, 704), (1333, 672), (1333, 640)],
+ multiscale_mode='value',
+ keep_ratio=True),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(1333, 800),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img']),
+ ])
+]
+data = dict(
+ imgs_per_gpu=2,
+ workers_per_gpu=2,
+ train=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_train2017.json',
+ img_prefix=data_root + 'train2017/',
+ pipeline=train_pipeline),
+ val=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_val2017.json',
+ img_prefix=data_root + 'val2017/',
+ pipeline=test_pipeline),
+ test=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_val2017.json',
+ img_prefix=data_root + 'val2017/',
+ pipeline=test_pipeline))
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=1.0 / 3,
+ step=[27, 33])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+ interval=50,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+# yapf:enable
+# runtime settings
+total_epochs = 36
+device_ids = range(8)
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/solo_release_r101_fpn_8gpu_3x'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
diff --git a/cv/instance_segmentation/SOLO/pytorch/configs/solo/solo_r50_fpn_8gpu_1x.py b/cv/instance_segmentation/SOLO/pytorch/configs/solo/solo_r50_fpn_8gpu_1x.py
new file mode 100644
index 000000000..7c1796a10
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/configs/solo/solo_r50_fpn_8gpu_1x.py
@@ -0,0 +1,126 @@
+# model settings
+model = dict(
+ type='SOLO',
+ pretrained='torchvision://resnet50',
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3), # C2, C3, C4, C5
+ frozen_stages=1,
+ style='pytorch'),
+ neck=dict(
+ type='FPN',
+ in_channels=[256, 512, 1024, 2048],
+ out_channels=256,
+ start_level=0,
+ num_outs=5),
+ bbox_head=dict(
+ type='SOLOHead',
+ num_classes=81,
+ in_channels=256,
+ stacked_convs=7,
+ seg_feat_channels=256,
+ strides=[8, 8, 16, 32, 32],
+ scale_ranges=((1, 96), (48, 192), (96, 384), (192, 768), (384, 2048)),
+ sigma=0.2,
+ num_grids=[40, 36, 24, 16, 12],
+ cate_down_pos=0,
+ with_deform=False,
+ loss_ins=dict(
+ type='DiceLoss',
+ use_sigmoid=True,
+ loss_weight=3.0),
+ loss_cate=dict(
+ type='FocalLoss',
+ use_sigmoid=True,
+ gamma=2.0,
+ alpha=0.25,
+ loss_weight=1.0),
+ ))
+# training and testing settings
+train_cfg = dict()
+test_cfg = dict(
+ nms_pre=500,
+ score_thr=0.1,
+ mask_thr=0.5,
+ update_thr=0.05,
+ kernel='gaussian', # gaussian/linear
+ sigma=2.0,
+ max_per_img=100)
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+ dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(1333, 800),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img']),
+ ])
+]
+data = dict(
+ imgs_per_gpu=2,
+ workers_per_gpu=2,
+ train=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_train2017.json',
+ img_prefix=data_root + 'train2017/',
+ pipeline=train_pipeline),
+ val=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_val2017.json',
+ img_prefix=data_root + 'val2017/',
+ pipeline=test_pipeline),
+ test=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_val2017.json',
+ img_prefix=data_root + 'val2017/',
+ pipeline=test_pipeline))
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=1.0 / 3,
+ step=[9, 11])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+ interval=50,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+# yapf:enable
+# runtime settings
+total_epochs = 12
+device_ids = range(8)
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/solo_release_r50_fpn_8gpu_1x'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
diff --git a/cv/instance_segmentation/SOLO/pytorch/configs/solo/solo_r50_fpn_8gpu_3x.py b/cv/instance_segmentation/SOLO/pytorch/configs/solo/solo_r50_fpn_8gpu_3x.py
new file mode 100644
index 000000000..7fc0bed65
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/configs/solo/solo_r50_fpn_8gpu_3x.py
@@ -0,0 +1,130 @@
+# model settings
+model = dict(
+ type='SOLO',
+ pretrained='torchvision://resnet50',
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3), # C2, C3, C4, C5
+ frozen_stages=1,
+ style='pytorch'),
+ neck=dict(
+ type='FPN',
+ in_channels=[256, 512, 1024, 2048],
+ out_channels=256,
+ start_level=0,
+ num_outs=5),
+ bbox_head=dict(
+ type='SOLOHead',
+ num_classes=81,
+ in_channels=256,
+ stacked_convs=7,
+ seg_feat_channels=256,
+ strides=[8, 8, 16, 32, 32],
+ scale_ranges=((1, 96), (48, 192), (96, 384), (192, 768), (384, 2048)),
+ sigma=0.2,
+ num_grids=[40, 36, 24, 16, 12],
+ cate_down_pos=0,
+ with_deform=False,
+ loss_ins=dict(
+ type='DiceLoss',
+ use_sigmoid=True,
+ loss_weight=3.0),
+ loss_cate=dict(
+ type='FocalLoss',
+ use_sigmoid=True,
+ gamma=2.0,
+ alpha=0.25,
+ loss_weight=1.0),
+ ))
+# training and testing settings
+train_cfg = dict()
+test_cfg = dict(
+ nms_pre=500,
+ score_thr=0.1,
+ mask_thr=0.5,
+ update_thr=0.05,
+ kernel='gaussian', # gaussian/linear
+ sigma=2.0,
+ max_per_img=100)
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+ dict(type='Resize',
+ img_scale=[(1333, 800), (1333, 768), (1333, 736),
+ (1333, 704), (1333, 672), (1333, 640)],
+ multiscale_mode='value',
+ keep_ratio=True),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(1333, 800),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img']),
+ ])
+]
+data = dict(
+ imgs_per_gpu=2,
+ workers_per_gpu=2,
+ train=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_train2017.json',
+ img_prefix=data_root + 'train2017/',
+ pipeline=train_pipeline),
+ val=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_val2017.json',
+ img_prefix=data_root + 'val2017/',
+ pipeline=test_pipeline),
+ test=dict(
+ type=dataset_type,
+ ann_file=data_root + 'annotations/instances_val2017.json',
+ img_prefix=data_root + 'val2017/',
+ pipeline=test_pipeline))
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=1.0 / 3,
+ step=[27, 33])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+ interval=50,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ # dict(type='TensorboardLoggerHook')
+ ])
+# yapf:enable
+# runtime settings
+total_epochs = 36
+device_ids = range(8)
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/solo_release_r50_fpn_8gpu_3x'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/__init__.py
new file mode 100644
index 000000000..1c4f7e8fc
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/__init__.py
@@ -0,0 +1,3 @@
+from .version import __version__, short_version
+
+__all__ = ['__version__', 'short_version']
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/apis/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/apis/__init__.py
new file mode 100644
index 000000000..164594445
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/apis/__init__.py
@@ -0,0 +1,9 @@
+from .inference import (async_inference_detector, inference_detector,
+ init_detector, show_result, show_result_pyplot, show_result_ins)
+from .train import get_root_logger, set_random_seed, train_detector
+
+__all__ = [
+ 'get_root_logger', 'set_random_seed', 'train_detector', 'init_detector',
+ 'async_inference_detector', 'inference_detector', 'show_result',
+ 'show_result_pyplot', 'show_result_ins'
+]
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/apis/inference.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/apis/inference.py
new file mode 100644
index 000000000..9470b6e2a
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/apis/inference.py
@@ -0,0 +1,290 @@
+import warnings
+
+import matplotlib.pyplot as plt
+import mmcv
+import numpy as np
+import pycocotools.mask as maskUtils
+import torch
+from mmcv.parallel import collate, scatter
+from mmcv.runner import load_checkpoint
+
+from mmdet.core import get_classes
+from mmdet.datasets.pipelines import Compose
+from mmdet.models import build_detector
+
+import cv2
+from scipy import ndimage
+
+def init_detector(config, checkpoint=None, device='cuda:0'):
+ """Initialize a detector from config file.
+
+ Args:
+ config (str or :obj:`mmcv.Config`): Config file path or the config
+ object.
+ checkpoint (str, optional): Checkpoint path. If left as None, the model
+ will not load any weights.
+
+ Returns:
+ nn.Module: The constructed detector.
+ """
+ if isinstance(config, str):
+ config = mmcv.Config.fromfile(config)
+ elif not isinstance(config, mmcv.Config):
+ raise TypeError('config must be a filename or Config object, '
+ 'but got {}'.format(type(config)))
+ config.model.pretrained = None
+ model = build_detector(config.model, test_cfg=config.test_cfg)
+ if checkpoint is not None:
+ checkpoint = load_checkpoint(model, checkpoint)
+ if 'CLASSES' in checkpoint['meta']:
+ model.CLASSES = checkpoint['meta']['CLASSES']
+ else:
+ warnings.warn('Class names are not saved in the checkpoint\'s '
+ 'meta data, use COCO classes by default.')
+ model.CLASSES = get_classes('coco')
+ model.cfg = config # save the config in the model for convenience
+ model.to(device)
+ model.eval()
+ return model
+
+
+class LoadImage(object):
+
+ def __call__(self, results):
+ if isinstance(results['img'], str):
+ results['filename'] = results['img']
+ else:
+ results['filename'] = None
+ img = mmcv.imread(results['img'])
+ results['img'] = img
+ results['img_shape'] = img.shape
+ results['ori_shape'] = img.shape
+ return results
+
+
+def inference_detector(model, img):
+ """Inference image(s) with the detector.
+
+ Args:
+ model (nn.Module): The loaded detector.
+ imgs (str/ndarray or list[str/ndarray]): Either image files or loaded
+ images.
+
+ Returns:
+ If imgs is a str, a generator will be returned, otherwise return the
+ detection results directly.
+ """
+ cfg = model.cfg
+ device = next(model.parameters()).device # model device
+ # build the data pipeline
+ test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:]
+ test_pipeline = Compose(test_pipeline)
+ # prepare data
+ data = dict(img=img)
+ data = test_pipeline(data)
+ data = scatter(collate([data], samples_per_gpu=1), [device])[0]
+ # forward the model
+ with torch.no_grad():
+ result = model(return_loss=False, rescale=True, **data)
+ return result
+
+
+async def async_inference_detector(model, img):
+ """Async inference image(s) with the detector.
+
+ Args:
+ model (nn.Module): The loaded detector.
+ imgs (str/ndarray or list[str/ndarray]): Either image files or loaded
+ images.
+
+ Returns:
+ Awaitable detection results.
+ """
+ cfg = model.cfg
+ device = next(model.parameters()).device # model device
+ # build the data pipeline
+ test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:]
+ test_pipeline = Compose(test_pipeline)
+ # prepare data
+ data = dict(img=img)
+ data = test_pipeline(data)
+ data = scatter(collate([data], samples_per_gpu=1), [device])[0]
+
+ # We don't restore `torch.is_grad_enabled()` value during concurrent
+ # inference since execution can overlap
+ torch.set_grad_enabled(False)
+ result = await model.aforward_test(rescale=True, **data)
+ return result
+
+
+# TODO: merge this method with the one in BaseDetector
+def show_result(img,
+ result,
+ class_names,
+ score_thr=0.3,
+ wait_time=0,
+ show=True,
+ out_file=None):
+ """Visualize the detection results on the image.
+
+ Args:
+ img (str or np.ndarray): Image filename or loaded image.
+ result (tuple[list] or list): The detection result, can be either
+ (bbox, segm) or just bbox.
+ class_names (list[str] or tuple[str]): A list of class names.
+ score_thr (float): The threshold to visualize the bboxes and masks.
+ wait_time (int): Value of waitKey param.
+ show (bool, optional): Whether to show the image with opencv or not.
+ out_file (str, optional): If specified, the visualization result will
+ be written to the out file instead of shown in a window.
+
+ Returns:
+ np.ndarray or None: If neither `show` nor `out_file` is specified, the
+ visualized image is returned, otherwise None is returned.
+ """
+ assert isinstance(class_names, (tuple, list))
+ img = mmcv.imread(img)
+ img = img.copy()
+ if isinstance(result, tuple):
+ bbox_result, segm_result = result
+ else:
+ bbox_result, segm_result = result, None
+ bboxes = np.vstack(bbox_result)
+ labels = [
+ np.full(bbox.shape[0], i, dtype=np.int32)
+ for i, bbox in enumerate(bbox_result)
+ ]
+ labels = np.concatenate(labels)
+ # draw segmentation masks
+ if segm_result is not None:
+ segms = mmcv.concat_list(segm_result)
+ inds = np.where(bboxes[:, -1] > score_thr)[0]
+ np.random.seed(42)
+ color_masks = [
+ np.random.randint(0, 256, (1, 3), dtype=np.uint8)
+ for _ in range(max(labels) + 1)
+ ]
+ for i in inds:
+ i = int(i)
+ color_mask = color_masks[labels[i]]
+ mask = maskUtils.decode(segms[i]).astype(np.bool)
+ img[mask] = img[mask] * 0.5 + color_mask * 0.5
+ # draw bounding boxes
+ mmcv.imshow_det_bboxes(
+ img,
+ bboxes,
+ labels,
+ class_names=class_names,
+ score_thr=score_thr,
+ show=show,
+ wait_time=wait_time,
+ out_file=out_file)
+ if not (show or out_file):
+ return img
+
+
+def show_result_pyplot(img,
+ result,
+ class_names,
+ score_thr=0.3,
+ fig_size=(15, 10)):
+ """Visualize the detection results on the image.
+
+ Args:
+ img (str or np.ndarray): Image filename or loaded image.
+ result (tuple[list] or list): The detection result, can be either
+ (bbox, segm) or just bbox.
+ class_names (list[str] or tuple[str]): A list of class names.
+ score_thr (float): The threshold to visualize the bboxes and masks.
+ fig_size (tuple): Figure size of the pyplot figure.
+ out_file (str, optional): If specified, the visualization result will
+ be written to the out file instead of shown in a window.
+ """
+ img = show_result(
+ img, result, class_names, score_thr=score_thr, show=False)
+ plt.figure(figsize=fig_size)
+ plt.imshow(mmcv.bgr2rgb(img))
+
+
+def show_result_ins(img,
+ result,
+ class_names,
+ score_thr=0.3,
+ sort_by_density=False,
+ out_file=None):
+ """Visualize the instance segmentation results on the image.
+
+ Args:
+ img (str or np.ndarray): Image filename or loaded image.
+ result (tuple[list] or list): The instance segmentation result.
+ class_names (list[str] or tuple[str]): A list of class names.
+ score_thr (float): The threshold to visualize the masks.
+ sort_by_density (bool): sort the masks by their density.
+ out_file (str, optional): If specified, the visualization result will
+ be written to the out file instead of shown in a window.
+
+ Returns:
+ np.ndarray or None: If neither `show` nor `out_file` is specified, the
+ visualized image is returned, otherwise None is returned.
+ """
+
+ assert isinstance(class_names, (tuple, list))
+ img = mmcv.imread(img)
+ img_show = img.copy()
+ h, w, _ = img.shape
+
+ if not result or result == [None]:
+ return img_show
+ cur_result = result[0]
+ seg_label = cur_result[0]
+ seg_label = seg_label.cpu().numpy().astype(np.uint8)
+ cate_label = cur_result[1]
+ cate_label = cate_label.cpu().numpy()
+ score = cur_result[2].cpu().numpy()
+
+ vis_inds = score > score_thr
+ seg_label = seg_label[vis_inds]
+ num_mask = seg_label.shape[0]
+ cate_label = cate_label[vis_inds]
+ cate_score = score[vis_inds]
+
+ if sort_by_density:
+ mask_density = []
+ for idx in range(num_mask):
+ cur_mask = seg_label[idx, :, :]
+ cur_mask = mmcv.imresize(cur_mask, (w, h))
+ cur_mask = (cur_mask > 0.5).astype(np.int32)
+ mask_density.append(cur_mask.sum())
+ orders = np.argsort(mask_density)
+ seg_label = seg_label[orders]
+ cate_label = cate_label[orders]
+ cate_score = cate_score[orders]
+
+ np.random.seed(42)
+ color_masks = [
+ np.random.randint(0, 256, (1, 3), dtype=np.uint8)
+ for _ in range(num_mask)
+ ]
+ for idx in range(num_mask):
+ idx = -(idx+1)
+ cur_mask = seg_label[idx, :, :]
+ cur_mask = mmcv.imresize(cur_mask, (w, h))
+ cur_mask = (cur_mask > 0.5).astype(np.uint8)
+ if cur_mask.sum() == 0:
+ continue
+ color_mask = color_masks[idx]
+ cur_mask_bool = cur_mask.astype(np.bool)
+ img_show[cur_mask_bool] = img[cur_mask_bool] * 0.5 + color_mask * 0.5
+
+ cur_cate = cate_label[idx]
+ cur_score = cate_score[idx]
+ label_text = class_names[cur_cate]
+ #label_text += '|{:.02f}'.format(cur_score)
+ center_y, center_x = ndimage.measurements.center_of_mass(cur_mask)
+ vis_pos = (max(int(center_x) - 10, 0), int(center_y))
+ cv2.putText(img_show, label_text, vis_pos,
+ cv2.FONT_HERSHEY_COMPLEX, 0.3, (255, 255, 255)) # green
+ if out_file is None:
+ return img_show
+ else:
+ mmcv.imwrite(img_show, out_file)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/apis/train.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/apis/train.py
new file mode 100644
index 000000000..97c0dc69e
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/apis/train.py
@@ -0,0 +1,297 @@
+import random
+import re
+from collections import OrderedDict
+
+import numpy as np
+import torch
+import torch.distributed as dist
+from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
+from mmcv.runner import DistSamplerSeedHook, Runner, obj_from_dict
+
+from mmdet import datasets
+from mmdet.core import (CocoDistEvalmAPHook, CocoDistEvalRecallHook,
+ DistEvalmAPHook, DistOptimizerHook, Fp16OptimizerHook)
+from mmdet.datasets import DATASETS, build_dataloader
+from mmdet.models import RPN
+from mmdet.utils import get_root_logger
+
+
+def set_random_seed(seed, deterministic=False):
+ """Set random seed.
+
+ Args:
+ seed (int): Seed to be used.
+ deterministic (bool): Whether to set the deterministic option for
+ CUDNN backend, i.e., set `torch.backends.cudnn.deterministic`
+ to True and `torch.backends.cudnn.benchmark` to False.
+ Default: False.
+ """
+ random.seed(seed)
+ np.random.seed(seed)
+ torch.manual_seed(seed)
+ torch.cuda.manual_seed_all(seed)
+ if deterministic:
+ torch.backends.cudnn.deterministic = True
+ torch.backends.cudnn.benchmark = False
+
+
+def parse_losses(losses):
+ log_vars = OrderedDict()
+ for loss_name, loss_value in losses.items():
+ if isinstance(loss_value, torch.Tensor):
+ log_vars[loss_name] = loss_value.mean()
+ elif isinstance(loss_value, list):
+ log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value)
+ else:
+ raise TypeError(
+ '{} is not a tensor or list of tensors'.format(loss_name))
+
+ loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key)
+
+ log_vars['loss'] = loss
+ for loss_name, loss_value in log_vars.items():
+ # reduce loss when distributed training
+ if dist.is_available() and dist.is_initialized():
+ loss_value = loss_value.data.clone()
+ dist.all_reduce(loss_value.div_(dist.get_world_size()))
+ log_vars[loss_name] = loss_value.item()
+
+ return loss, log_vars
+
+
+def batch_processor(model, data, train_mode):
+ """Process a data batch.
+
+ This method is required as an argument of Runner, which defines how to
+ process a data batch and obtain proper outputs. The first 3 arguments of
+ batch_processor are fixed.
+
+ Args:
+ model (nn.Module): A PyTorch model.
+ data (dict): The data batch in a dict.
+ train_mode (bool): Training mode or not. It may be useless for some
+ models.
+
+ Returns:
+ dict: A dict containing losses and log vars.
+ """
+ losses = model(**data)
+ loss, log_vars = parse_losses(losses)
+
+ outputs = dict(
+ loss=loss, log_vars=log_vars, num_samples=len(data['img'].data))
+
+ return outputs
+
+
+def train_detector(model,
+ dataset,
+ cfg,
+ distributed=False,
+ validate=False,
+ timestamp=None):
+ logger = get_root_logger(cfg.log_level)
+
+ # start training
+ if distributed:
+ _dist_train(
+ model,
+ dataset,
+ cfg,
+ validate=validate,
+ logger=logger,
+ timestamp=timestamp)
+ else:
+ _non_dist_train(
+ model,
+ dataset,
+ cfg,
+ validate=validate,
+ logger=logger,
+ timestamp=timestamp)
+
+
+def build_optimizer(model, optimizer_cfg):
+ """Build optimizer from configs.
+
+ Args:
+ model (:obj:`nn.Module`): The model with parameters to be optimized.
+ optimizer_cfg (dict): The config dict of the optimizer.
+ Positional fields are:
+ - type: class name of the optimizer.
+ - lr: base learning rate.
+ Optional fields are:
+ - any arguments of the corresponding optimizer type, e.g.,
+ weight_decay, momentum, etc.
+ - paramwise_options: a dict with 3 accepted fileds
+ (bias_lr_mult, bias_decay_mult, norm_decay_mult).
+ `bias_lr_mult` and `bias_decay_mult` will be multiplied to
+ the lr and weight decay respectively for all bias parameters
+ (except for the normalization layers), and
+ `norm_decay_mult` will be multiplied to the weight decay
+ for all weight and bias parameters of normalization layers.
+
+ Returns:
+ torch.optim.Optimizer: The initialized optimizer.
+
+ Example:
+ >>> model = torch.nn.modules.Conv1d(1, 1, 1)
+ >>> optimizer_cfg = dict(type='SGD', lr=0.01, momentum=0.9,
+ >>> weight_decay=0.0001)
+ >>> optimizer = build_optimizer(model, optimizer_cfg)
+ """
+ if hasattr(model, 'module'):
+ model = model.module
+
+ optimizer_cfg = optimizer_cfg.copy()
+ paramwise_options = optimizer_cfg.pop('paramwise_options', None)
+ # if no paramwise option is specified, just use the global setting
+ if paramwise_options is None:
+ return obj_from_dict(optimizer_cfg, torch.optim,
+ dict(params=model.parameters()))
+ else:
+ assert isinstance(paramwise_options, dict)
+ # get base lr and weight decay
+ base_lr = optimizer_cfg['lr']
+ base_wd = optimizer_cfg.get('weight_decay', None)
+ # weight_decay must be explicitly specified if mult is specified
+ if ('bias_decay_mult' in paramwise_options
+ or 'norm_decay_mult' in paramwise_options):
+ assert base_wd is not None
+ # get param-wise options
+ bias_lr_mult = paramwise_options.get('bias_lr_mult', 1.)
+ bias_decay_mult = paramwise_options.get('bias_decay_mult', 1.)
+ norm_decay_mult = paramwise_options.get('norm_decay_mult', 1.)
+ # set param-wise lr and weight decay
+ params = []
+ for name, param in model.named_parameters():
+ param_group = {'params': [param]}
+ if not param.requires_grad:
+ # FP16 training needs to copy gradient/weight between master
+ # weight copy and model weight, it is convenient to keep all
+ # parameters here to align with model.parameters()
+ params.append(param_group)
+ continue
+
+ # for norm layers, overwrite the weight decay of weight and bias
+ # TODO: obtain the norm layer prefixes dynamically
+ if re.search(r'(bn|gn)(\d+)?.(weight|bias)', name):
+ if base_wd is not None:
+ param_group['weight_decay'] = base_wd * norm_decay_mult
+ # for other layers, overwrite both lr and weight decay of bias
+ elif name.endswith('.bias'):
+ param_group['lr'] = base_lr * bias_lr_mult
+ if base_wd is not None:
+ param_group['weight_decay'] = base_wd * bias_decay_mult
+ # otherwise use the global settings
+
+ params.append(param_group)
+
+ optimizer_cls = getattr(torch.optim, optimizer_cfg.pop('type'))
+ return optimizer_cls(params, **optimizer_cfg)
+
+
+def _dist_train(model,
+ dataset,
+ cfg,
+ validate=False,
+ logger=None,
+ timestamp=None):
+ # prepare data loaders
+ dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
+ data_loaders = [
+ build_dataloader(
+ ds, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, dist=True)
+ for ds in dataset
+ ]
+ # put model on gpus
+ model = MMDistributedDataParallel(model.cuda())
+
+ # build runner
+ optimizer = build_optimizer(model, cfg.optimizer)
+ runner = Runner(
+ model, batch_processor, optimizer, cfg.work_dir, logger=logger)
+ # an ugly walkaround to make the .log and .log.json filenames the same
+ runner.timestamp = timestamp
+
+ # fp16 setting
+ fp16_cfg = cfg.get('fp16', None)
+ if fp16_cfg is not None:
+ optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config,
+ **fp16_cfg)
+ else:
+ optimizer_config = DistOptimizerHook(**cfg.optimizer_config)
+
+ # register hooks
+ runner.register_training_hooks(cfg.lr_config, optimizer_config,
+ cfg.checkpoint_config, cfg.log_config)
+ runner.register_hook(DistSamplerSeedHook())
+ # register eval hooks
+ if validate:
+ val_dataset_cfg = cfg.data.val
+ eval_cfg = cfg.get('evaluation', {})
+ if isinstance(model.module, RPN):
+ # TODO: implement recall hooks for other datasets
+ runner.register_hook(
+ CocoDistEvalRecallHook(val_dataset_cfg, **eval_cfg))
+ else:
+ dataset_type = DATASETS.get(val_dataset_cfg.type)
+ if issubclass(dataset_type, datasets.CocoDataset):
+ runner.register_hook(
+ CocoDistEvalmAPHook(val_dataset_cfg, **eval_cfg))
+ else:
+ runner.register_hook(
+ DistEvalmAPHook(val_dataset_cfg, **eval_cfg))
+
+ if cfg.resume_from:
+ runner.resume(cfg.resume_from)
+ elif cfg.load_from:
+ runner.load_checkpoint(cfg.load_from)
+ runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
+
+
+def _non_dist_train(model,
+ dataset,
+ cfg,
+ validate=False,
+ logger=None,
+ timestamp=None):
+ if validate:
+ raise NotImplementedError('Built-in validation is not implemented '
+ 'yet in not-distributed training. Use '
+ 'distributed training or test.py and '
+ '*eval.py scripts instead.')
+ # prepare data loaders
+ dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
+ data_loaders = [
+ build_dataloader(
+ ds,
+ cfg.data.imgs_per_gpu,
+ cfg.data.workers_per_gpu,
+ cfg.gpus,
+ dist=False) for ds in dataset
+ ]
+ # put model on gpus
+ model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda()
+
+ # build runner
+ optimizer = build_optimizer(model, cfg.optimizer)
+ runner = Runner(
+ model, batch_processor, optimizer, cfg.work_dir, logger=logger)
+ # an ugly walkaround to make the .log and .log.json filenames the same
+ runner.timestamp = timestamp
+ # fp16 setting
+ fp16_cfg = cfg.get('fp16', None)
+ if fp16_cfg is not None:
+ optimizer_config = Fp16OptimizerHook(
+ **cfg.optimizer_config, **fp16_cfg, distributed=False)
+ else:
+ optimizer_config = cfg.optimizer_config
+ runner.register_training_hooks(cfg.lr_config, optimizer_config,
+ cfg.checkpoint_config, cfg.log_config)
+
+ if cfg.resume_from:
+ runner.resume(cfg.resume_from)
+ elif cfg.load_from:
+ runner.load_checkpoint(cfg.load_from)
+ runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/__init__.py
new file mode 100644
index 000000000..f8eb6cba5
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/__init__.py
@@ -0,0 +1,7 @@
+from .anchor import * # noqa: F401, F403
+from .bbox import * # noqa: F401, F403
+from .evaluation import * # noqa: F401, F403
+from .fp16 import * # noqa: F401, F403
+from .mask import * # noqa: F401, F403
+from .post_processing import * # noqa: F401, F403
+from .utils import * # noqa: F401, F403
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/__init__.py
new file mode 100644
index 000000000..06e2d1232
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/__init__.py
@@ -0,0 +1,12 @@
+from .anchor_generator import AnchorGenerator
+from .anchor_target import (anchor_inside_flags, anchor_target,
+ images_to_levels, unmap)
+from .guided_anchor_target import ga_loc_target, ga_shape_target
+from .point_generator import PointGenerator
+from .point_target import point_target
+
+__all__ = [
+ 'AnchorGenerator', 'anchor_target', 'anchor_inside_flags', 'ga_loc_target',
+ 'ga_shape_target', 'PointGenerator', 'point_target', 'images_to_levels',
+ 'unmap'
+]
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/anchor_generator.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/anchor_generator.py
new file mode 100644
index 000000000..cd227ad06
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/anchor_generator.py
@@ -0,0 +1,98 @@
+import torch
+
+
+class AnchorGenerator(object):
+ """
+ Examples:
+ >>> from mmdet.core import AnchorGenerator
+ >>> self = AnchorGenerator(9, [1.], [1.])
+ >>> all_anchors = self.grid_anchors((2, 2), device='cpu')
+ >>> print(all_anchors)
+ tensor([[ 0., 0., 8., 8.],
+ [16., 0., 24., 8.],
+ [ 0., 16., 8., 24.],
+ [16., 16., 24., 24.]])
+ """
+
+ def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None):
+ self.base_size = base_size
+ self.scales = torch.Tensor(scales)
+ self.ratios = torch.Tensor(ratios)
+ self.scale_major = scale_major
+ self.ctr = ctr
+ self.base_anchors = self.gen_base_anchors()
+
+ @property
+ def num_base_anchors(self):
+ return self.base_anchors.size(0)
+
+ def gen_base_anchors(self):
+ w = self.base_size
+ h = self.base_size
+ if self.ctr is None:
+ x_ctr = 0.5 * (w - 1)
+ y_ctr = 0.5 * (h - 1)
+ else:
+ x_ctr, y_ctr = self.ctr
+
+ h_ratios = torch.sqrt(self.ratios)
+ w_ratios = 1 / h_ratios
+ if self.scale_major:
+ ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1)
+ hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1)
+ else:
+ ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1)
+ hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1)
+
+ # yapf: disable
+ base_anchors = torch.stack(
+ [
+ x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
+ x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)
+ ],
+ dim=-1).round()
+ # yapf: enable
+
+ return base_anchors
+
+ def _meshgrid(self, x, y, row_major=True):
+ xx = x.repeat(len(y))
+ yy = y.view(-1, 1).repeat(1, len(x)).view(-1)
+ if row_major:
+ return xx, yy
+ else:
+ return yy, xx
+
+ def grid_anchors(self, featmap_size, stride=16, device='cuda'):
+ base_anchors = self.base_anchors.to(device)
+
+ feat_h, feat_w = featmap_size
+ shift_x = torch.arange(0, feat_w, device=device) * stride
+ shift_y = torch.arange(0, feat_h, device=device) * stride
+ shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
+ shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1)
+ shifts = shifts.type_as(base_anchors)
+ # first feat_w elements correspond to the first row of shifts
+ # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get
+ # shifted anchors (K, A, 4), reshape to (K*A, 4)
+
+ all_anchors = base_anchors[None, :, :] + shifts[:, None, :]
+ all_anchors = all_anchors.view(-1, 4)
+ # first A rows correspond to A anchors of (0, 0) in feature map,
+ # then (0, 1), (0, 2), ...
+ return all_anchors
+
+ def valid_flags(self, featmap_size, valid_size, device='cuda'):
+ feat_h, feat_w = featmap_size
+ valid_h, valid_w = valid_size
+ assert valid_h <= feat_h and valid_w <= feat_w
+ valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device)
+ valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device)
+ valid_x[:valid_w] = 1
+ valid_y[:valid_h] = 1
+ valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
+ valid = valid_xx & valid_yy
+ valid = valid[:,
+ None].expand(valid.size(0),
+ self.num_base_anchors).contiguous().view(-1)
+ return valid
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/anchor_target.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/anchor_target.py
new file mode 100644
index 000000000..daf43c45e
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/anchor_target.py
@@ -0,0 +1,188 @@
+import torch
+
+from ..bbox import PseudoSampler, assign_and_sample, bbox2delta, build_assigner
+from ..utils import multi_apply
+
+
+def anchor_target(anchor_list,
+ valid_flag_list,
+ gt_bboxes_list,
+ img_metas,
+ target_means,
+ target_stds,
+ cfg,
+ gt_bboxes_ignore_list=None,
+ gt_labels_list=None,
+ label_channels=1,
+ sampling=True,
+ unmap_outputs=True):
+ """Compute regression and classification targets for anchors.
+
+ Args:
+ anchor_list (list[list]): Multi level anchors of each image.
+ valid_flag_list (list[list]): Multi level valid flags of each image.
+ gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.
+ img_metas (list[dict]): Meta info of each image.
+ target_means (Iterable): Mean value of regression targets.
+ target_stds (Iterable): Std value of regression targets.
+ cfg (dict): RPN train configs.
+
+ Returns:
+ tuple
+ """
+ num_imgs = len(img_metas)
+ assert len(anchor_list) == len(valid_flag_list) == num_imgs
+
+ # anchor number of multi levels
+ num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]
+ # concat all level anchors and flags to a single tensor
+ for i in range(num_imgs):
+ assert len(anchor_list[i]) == len(valid_flag_list[i])
+ anchor_list[i] = torch.cat(anchor_list[i])
+ valid_flag_list[i] = torch.cat(valid_flag_list[i])
+
+ # compute targets for each image
+ if gt_bboxes_ignore_list is None:
+ gt_bboxes_ignore_list = [None for _ in range(num_imgs)]
+ if gt_labels_list is None:
+ gt_labels_list = [None for _ in range(num_imgs)]
+ (all_labels, all_label_weights, all_bbox_targets, all_bbox_weights,
+ pos_inds_list, neg_inds_list) = multi_apply(
+ anchor_target_single,
+ anchor_list,
+ valid_flag_list,
+ gt_bboxes_list,
+ gt_bboxes_ignore_list,
+ gt_labels_list,
+ img_metas,
+ target_means=target_means,
+ target_stds=target_stds,
+ cfg=cfg,
+ label_channels=label_channels,
+ sampling=sampling,
+ unmap_outputs=unmap_outputs)
+ # no valid anchors
+ if any([labels is None for labels in all_labels]):
+ return None
+ # sampled anchors of all images
+ num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])
+ num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])
+ # split targets to a list w.r.t. multiple levels
+ labels_list = images_to_levels(all_labels, num_level_anchors)
+ label_weights_list = images_to_levels(all_label_weights, num_level_anchors)
+ bbox_targets_list = images_to_levels(all_bbox_targets, num_level_anchors)
+ bbox_weights_list = images_to_levels(all_bbox_weights, num_level_anchors)
+ return (labels_list, label_weights_list, bbox_targets_list,
+ bbox_weights_list, num_total_pos, num_total_neg)
+
+
+def images_to_levels(target, num_level_anchors):
+ """Convert targets by image to targets by feature level.
+
+ [target_img0, target_img1] -> [target_level0, target_level1, ...]
+ """
+ target = torch.stack(target, 0)
+ level_targets = []
+ start = 0
+ for n in num_level_anchors:
+ end = start + n
+ level_targets.append(target[:, start:end].squeeze(0))
+ start = end
+ return level_targets
+
+
+def anchor_target_single(flat_anchors,
+ valid_flags,
+ gt_bboxes,
+ gt_bboxes_ignore,
+ gt_labels,
+ img_meta,
+ target_means,
+ target_stds,
+ cfg,
+ label_channels=1,
+ sampling=True,
+ unmap_outputs=True):
+ inside_flags = anchor_inside_flags(flat_anchors, valid_flags,
+ img_meta['img_shape'][:2],
+ cfg.allowed_border)
+ if not inside_flags.any():
+ return (None, ) * 6
+ # assign gt and sample anchors
+ anchors = flat_anchors[inside_flags, :]
+
+ if sampling:
+ assign_result, sampling_result = assign_and_sample(
+ anchors, gt_bboxes, gt_bboxes_ignore, None, cfg)
+ else:
+ bbox_assigner = build_assigner(cfg.assigner)
+ assign_result = bbox_assigner.assign(anchors, gt_bboxes,
+ gt_bboxes_ignore, gt_labels)
+ bbox_sampler = PseudoSampler()
+ sampling_result = bbox_sampler.sample(assign_result, anchors,
+ gt_bboxes)
+
+ num_valid_anchors = anchors.shape[0]
+ bbox_targets = torch.zeros_like(anchors)
+ bbox_weights = torch.zeros_like(anchors)
+ labels = anchors.new_zeros(num_valid_anchors, dtype=torch.long)
+ label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)
+
+ pos_inds = sampling_result.pos_inds
+ neg_inds = sampling_result.neg_inds
+ if len(pos_inds) > 0:
+ pos_bbox_targets = bbox2delta(sampling_result.pos_bboxes,
+ sampling_result.pos_gt_bboxes,
+ target_means, target_stds)
+ bbox_targets[pos_inds, :] = pos_bbox_targets
+ bbox_weights[pos_inds, :] = 1.0
+ if gt_labels is None:
+ labels[pos_inds] = 1
+ else:
+ labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]
+ if cfg.pos_weight <= 0:
+ label_weights[pos_inds] = 1.0
+ else:
+ label_weights[pos_inds] = cfg.pos_weight
+ if len(neg_inds) > 0:
+ label_weights[neg_inds] = 1.0
+
+ # map up to original set of anchors
+ if unmap_outputs:
+ num_total_anchors = flat_anchors.size(0)
+ labels = unmap(labels, num_total_anchors, inside_flags)
+ label_weights = unmap(label_weights, num_total_anchors, inside_flags)
+ bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags)
+ bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags)
+
+ return (labels, label_weights, bbox_targets, bbox_weights, pos_inds,
+ neg_inds)
+
+
+def anchor_inside_flags(flat_anchors,
+ valid_flags,
+ img_shape,
+ allowed_border=0):
+ img_h, img_w = img_shape[:2]
+ if allowed_border >= 0:
+ inside_flags = valid_flags & \
+ (flat_anchors[:, 0] >= -allowed_border).type(torch.uint8) & \
+ (flat_anchors[:, 1] >= -allowed_border).type(torch.uint8) & \
+ (flat_anchors[:, 2] < img_w + allowed_border).type(torch.uint8) & \
+ (flat_anchors[:, 3] < img_h + allowed_border).type(torch.uint8)
+ else:
+ inside_flags = valid_flags
+ return inside_flags
+
+
+def unmap(data, count, inds, fill=0):
+ """ Unmap a subset of item (data) back to the original set of items (of
+ size count) """
+ if data.dim() == 1:
+ ret = data.new_full((count, ), fill)
+ ret[inds] = data
+ else:
+ new_size = (count, ) + data.size()[1:]
+ ret = data.new_full(new_size, fill)
+ ret[inds, :] = data
+ return ret
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/guided_anchor_target.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/guided_anchor_target.py
new file mode 100644
index 000000000..21162eb9e
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/guided_anchor_target.py
@@ -0,0 +1,287 @@
+import torch
+
+from ..bbox import PseudoSampler, build_assigner, build_sampler
+from ..utils import multi_apply, unmap
+
+
+def calc_region(bbox, ratio, featmap_size=None):
+ """Calculate a proportional bbox region.
+
+ The bbox center are fixed and the new h' and w' is h * ratio and w * ratio.
+
+ Args:
+ bbox (Tensor): Bboxes to calculate regions, shape (n, 4)
+ ratio (float): Ratio of the output region.
+ featmap_size (tuple): Feature map size used for clipping the boundary.
+
+ Returns:
+ tuple: x1, y1, x2, y2
+ """
+ x1 = torch.round((1 - ratio) * bbox[0] + ratio * bbox[2]).long()
+ y1 = torch.round((1 - ratio) * bbox[1] + ratio * bbox[3]).long()
+ x2 = torch.round(ratio * bbox[0] + (1 - ratio) * bbox[2]).long()
+ y2 = torch.round(ratio * bbox[1] + (1 - ratio) * bbox[3]).long()
+ if featmap_size is not None:
+ x1 = x1.clamp(min=0, max=featmap_size[1] - 1)
+ y1 = y1.clamp(min=0, max=featmap_size[0] - 1)
+ x2 = x2.clamp(min=0, max=featmap_size[1] - 1)
+ y2 = y2.clamp(min=0, max=featmap_size[0] - 1)
+ return (x1, y1, x2, y2)
+
+
+def ga_loc_target(gt_bboxes_list,
+ featmap_sizes,
+ anchor_scale,
+ anchor_strides,
+ center_ratio=0.2,
+ ignore_ratio=0.5):
+ """Compute location targets for guided anchoring.
+
+ Each feature map is divided into positive, negative and ignore regions.
+ - positive regions: target 1, weight 1
+ - ignore regions: target 0, weight 0
+ - negative regions: target 0, weight 0.1
+
+ Args:
+ gt_bboxes_list (list[Tensor]): Gt bboxes of each image.
+ featmap_sizes (list[tuple]): Multi level sizes of each feature maps.
+ anchor_scale (int): Anchor scale.
+ anchor_strides ([list[int]]): Multi level anchor strides.
+ center_ratio (float): Ratio of center region.
+ ignore_ratio (float): Ratio of ignore region.
+
+ Returns:
+ tuple
+ """
+ img_per_gpu = len(gt_bboxes_list)
+ num_lvls = len(featmap_sizes)
+ r1 = (1 - center_ratio) / 2
+ r2 = (1 - ignore_ratio) / 2
+ all_loc_targets = []
+ all_loc_weights = []
+ all_ignore_map = []
+ for lvl_id in range(num_lvls):
+ h, w = featmap_sizes[lvl_id]
+ loc_targets = torch.zeros(
+ img_per_gpu,
+ 1,
+ h,
+ w,
+ device=gt_bboxes_list[0].device,
+ dtype=torch.float32)
+ loc_weights = torch.full_like(loc_targets, -1)
+ ignore_map = torch.zeros_like(loc_targets)
+ all_loc_targets.append(loc_targets)
+ all_loc_weights.append(loc_weights)
+ all_ignore_map.append(ignore_map)
+ for img_id in range(img_per_gpu):
+ gt_bboxes = gt_bboxes_list[img_id]
+ scale = torch.sqrt((gt_bboxes[:, 2] - gt_bboxes[:, 0] + 1) *
+ (gt_bboxes[:, 3] - gt_bboxes[:, 1] + 1))
+ min_anchor_size = scale.new_full(
+ (1, ), float(anchor_scale * anchor_strides[0]))
+ # assign gt bboxes to different feature levels w.r.t. their scales
+ target_lvls = torch.floor(
+ torch.log2(scale) - torch.log2(min_anchor_size) + 0.5)
+ target_lvls = target_lvls.clamp(min=0, max=num_lvls - 1).long()
+ for gt_id in range(gt_bboxes.size(0)):
+ lvl = target_lvls[gt_id].item()
+ # rescaled to corresponding feature map
+ gt_ = gt_bboxes[gt_id, :4] / anchor_strides[lvl]
+ # calculate ignore regions
+ ignore_x1, ignore_y1, ignore_x2, ignore_y2 = calc_region(
+ gt_, r2, featmap_sizes[lvl])
+ # calculate positive (center) regions
+ ctr_x1, ctr_y1, ctr_x2, ctr_y2 = calc_region(
+ gt_, r1, featmap_sizes[lvl])
+ all_loc_targets[lvl][img_id, 0, ctr_y1:ctr_y2 + 1,
+ ctr_x1:ctr_x2 + 1] = 1
+ all_loc_weights[lvl][img_id, 0, ignore_y1:ignore_y2 + 1,
+ ignore_x1:ignore_x2 + 1] = 0
+ all_loc_weights[lvl][img_id, 0, ctr_y1:ctr_y2 + 1,
+ ctr_x1:ctr_x2 + 1] = 1
+ # calculate ignore map on nearby low level feature
+ if lvl > 0:
+ d_lvl = lvl - 1
+ # rescaled to corresponding feature map
+ gt_ = gt_bboxes[gt_id, :4] / anchor_strides[d_lvl]
+ ignore_x1, ignore_y1, ignore_x2, ignore_y2 = calc_region(
+ gt_, r2, featmap_sizes[d_lvl])
+ all_ignore_map[d_lvl][img_id, 0, ignore_y1:ignore_y2 + 1,
+ ignore_x1:ignore_x2 + 1] = 1
+ # calculate ignore map on nearby high level feature
+ if lvl < num_lvls - 1:
+ u_lvl = lvl + 1
+ # rescaled to corresponding feature map
+ gt_ = gt_bboxes[gt_id, :4] / anchor_strides[u_lvl]
+ ignore_x1, ignore_y1, ignore_x2, ignore_y2 = calc_region(
+ gt_, r2, featmap_sizes[u_lvl])
+ all_ignore_map[u_lvl][img_id, 0, ignore_y1:ignore_y2 + 1,
+ ignore_x1:ignore_x2 + 1] = 1
+ for lvl_id in range(num_lvls):
+ # ignore negative regions w.r.t. ignore map
+ all_loc_weights[lvl_id][(all_loc_weights[lvl_id] < 0)
+ & (all_ignore_map[lvl_id] > 0)] = 0
+ # set negative regions with weight 0.1
+ all_loc_weights[lvl_id][all_loc_weights[lvl_id] < 0] = 0.1
+ # loc average factor to balance loss
+ loc_avg_factor = sum(
+ [t.size(0) * t.size(-1) * t.size(-2) for t in all_loc_targets]) / 200
+ return all_loc_targets, all_loc_weights, loc_avg_factor
+
+
+def ga_shape_target(approx_list,
+ inside_flag_list,
+ square_list,
+ gt_bboxes_list,
+ img_metas,
+ approxs_per_octave,
+ cfg,
+ gt_bboxes_ignore_list=None,
+ sampling=True,
+ unmap_outputs=True):
+ """Compute guided anchoring targets.
+
+ Args:
+ approx_list (list[list]): Multi level approxs of each image.
+ inside_flag_list (list[list]): Multi level inside flags of each image.
+ square_list (list[list]): Multi level squares of each image.
+ gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.
+ img_metas (list[dict]): Meta info of each image.
+ approxs_per_octave (int): number of approxs per octave
+ cfg (dict): RPN train configs.
+ gt_bboxes_ignore_list (list[Tensor]): ignore list of gt bboxes.
+ sampling (bool): sampling or not.
+ unmap_outputs (bool): unmap outputs or not.
+
+ Returns:
+ tuple
+ """
+ num_imgs = len(img_metas)
+ assert len(approx_list) == len(inside_flag_list) == len(
+ square_list) == num_imgs
+ # anchor number of multi levels
+ num_level_squares = [squares.size(0) for squares in square_list[0]]
+ # concat all level anchors and flags to a single tensor
+ inside_flag_flat_list = []
+ approx_flat_list = []
+ square_flat_list = []
+ for i in range(num_imgs):
+ assert len(square_list[i]) == len(inside_flag_list[i])
+ inside_flag_flat_list.append(torch.cat(inside_flag_list[i]))
+ approx_flat_list.append(torch.cat(approx_list[i]))
+ square_flat_list.append(torch.cat(square_list[i]))
+
+ # compute targets for each image
+ if gt_bboxes_ignore_list is None:
+ gt_bboxes_ignore_list = [None for _ in range(num_imgs)]
+ (all_bbox_anchors, all_bbox_gts, all_bbox_weights, pos_inds_list,
+ neg_inds_list) = multi_apply(
+ ga_shape_target_single,
+ approx_flat_list,
+ inside_flag_flat_list,
+ square_flat_list,
+ gt_bboxes_list,
+ gt_bboxes_ignore_list,
+ img_metas,
+ approxs_per_octave=approxs_per_octave,
+ cfg=cfg,
+ sampling=sampling,
+ unmap_outputs=unmap_outputs)
+ # no valid anchors
+ if any([bbox_anchors is None for bbox_anchors in all_bbox_anchors]):
+ return None
+ # sampled anchors of all images
+ num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])
+ num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])
+ # split targets to a list w.r.t. multiple levels
+ bbox_anchors_list = images_to_levels(all_bbox_anchors, num_level_squares)
+ bbox_gts_list = images_to_levels(all_bbox_gts, num_level_squares)
+ bbox_weights_list = images_to_levels(all_bbox_weights, num_level_squares)
+ return (bbox_anchors_list, bbox_gts_list, bbox_weights_list, num_total_pos,
+ num_total_neg)
+
+
+def images_to_levels(target, num_level_anchors):
+ """Convert targets by image to targets by feature level.
+
+ [target_img0, target_img1] -> [target_level0, target_level1, ...]
+ """
+ target = torch.stack(target, 0)
+ level_targets = []
+ start = 0
+ for n in num_level_anchors:
+ end = start + n
+ level_targets.append(target[:, start:end].squeeze(0))
+ start = end
+ return level_targets
+
+
+def ga_shape_target_single(flat_approxs,
+ inside_flags,
+ flat_squares,
+ gt_bboxes,
+ gt_bboxes_ignore,
+ img_meta,
+ approxs_per_octave,
+ cfg,
+ sampling=True,
+ unmap_outputs=True):
+ """Compute guided anchoring targets.
+
+ This function returns sampled anchors and gt bboxes directly
+ rather than calculates regression targets.
+
+ Args:
+ flat_approxs (Tensor): flat approxs of a single image,
+ shape (n, 4)
+ inside_flags (Tensor): inside flags of a single image,
+ shape (n, ).
+ flat_squares (Tensor): flat squares of a single image,
+ shape (approxs_per_octave * n, 4)
+ gt_bboxes (Tensor): Ground truth bboxes of a single image.
+ img_meta (dict): Meta info of a single image.
+ approxs_per_octave (int): number of approxs per octave
+ cfg (dict): RPN train configs.
+ sampling (bool): sampling or not.
+ unmap_outputs (bool): unmap outputs or not.
+
+ Returns:
+ tuple
+ """
+ if not inside_flags.any():
+ return (None, ) * 5
+ # assign gt and sample anchors
+ expand_inside_flags = inside_flags[:, None].expand(
+ -1, approxs_per_octave).reshape(-1)
+ approxs = flat_approxs[expand_inside_flags, :]
+ squares = flat_squares[inside_flags, :]
+
+ bbox_assigner = build_assigner(cfg.ga_assigner)
+ assign_result = bbox_assigner.assign(approxs, squares, approxs_per_octave,
+ gt_bboxes, gt_bboxes_ignore)
+ if sampling:
+ bbox_sampler = build_sampler(cfg.ga_sampler)
+ else:
+ bbox_sampler = PseudoSampler()
+ sampling_result = bbox_sampler.sample(assign_result, squares, gt_bboxes)
+
+ bbox_anchors = torch.zeros_like(squares)
+ bbox_gts = torch.zeros_like(squares)
+ bbox_weights = torch.zeros_like(squares)
+
+ pos_inds = sampling_result.pos_inds
+ neg_inds = sampling_result.neg_inds
+ if len(pos_inds) > 0:
+ bbox_anchors[pos_inds, :] = sampling_result.pos_bboxes
+ bbox_gts[pos_inds, :] = sampling_result.pos_gt_bboxes
+ bbox_weights[pos_inds, :] = 1.0
+
+ # map up to original set of anchors
+ if unmap_outputs:
+ num_total_anchors = flat_squares.size(0)
+ bbox_anchors = unmap(bbox_anchors, num_total_anchors, inside_flags)
+ bbox_gts = unmap(bbox_gts, num_total_anchors, inside_flags)
+ bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags)
+
+ return (bbox_anchors, bbox_gts, bbox_weights, pos_inds, neg_inds)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/point_generator.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/point_generator.py
new file mode 100644
index 000000000..c1a34dddd
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/point_generator.py
@@ -0,0 +1,34 @@
+import torch
+
+
+class PointGenerator(object):
+
+ def _meshgrid(self, x, y, row_major=True):
+ xx = x.repeat(len(y))
+ yy = y.view(-1, 1).repeat(1, len(x)).view(-1)
+ if row_major:
+ return xx, yy
+ else:
+ return yy, xx
+
+ def grid_points(self, featmap_size, stride=16, device='cuda'):
+ feat_h, feat_w = featmap_size
+ shift_x = torch.arange(0., feat_w, device=device) * stride
+ shift_y = torch.arange(0., feat_h, device=device) * stride
+ shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
+ stride = shift_x.new_full((shift_xx.shape[0], ), stride)
+ shifts = torch.stack([shift_xx, shift_yy, stride], dim=-1)
+ all_points = shifts.to(device)
+ return all_points
+
+ def valid_flags(self, featmap_size, valid_size, device='cuda'):
+ feat_h, feat_w = featmap_size
+ valid_h, valid_w = valid_size
+ assert valid_h <= feat_h and valid_w <= feat_w
+ valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device)
+ valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device)
+ valid_x[:valid_w] = 1
+ valid_y[:valid_h] = 1
+ valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
+ valid = valid_xx & valid_yy
+ return valid
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/point_target.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/point_target.py
new file mode 100644
index 000000000..1ab8d0260
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/point_target.py
@@ -0,0 +1,165 @@
+import torch
+
+from ..bbox import PseudoSampler, assign_and_sample, build_assigner
+from ..utils import multi_apply
+
+
+def point_target(proposals_list,
+ valid_flag_list,
+ gt_bboxes_list,
+ img_metas,
+ cfg,
+ gt_bboxes_ignore_list=None,
+ gt_labels_list=None,
+ label_channels=1,
+ sampling=True,
+ unmap_outputs=True):
+ """Compute corresponding GT box and classification targets for proposals.
+
+ Args:
+ points_list (list[list]): Multi level points of each image.
+ valid_flag_list (list[list]): Multi level valid flags of each image.
+ gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.
+ img_metas (list[dict]): Meta info of each image.
+ cfg (dict): train sample configs.
+
+ Returns:
+ tuple
+ """
+ num_imgs = len(img_metas)
+ assert len(proposals_list) == len(valid_flag_list) == num_imgs
+
+ # points number of multi levels
+ num_level_proposals = [points.size(0) for points in proposals_list[0]]
+
+ # concat all level points and flags to a single tensor
+ for i in range(num_imgs):
+ assert len(proposals_list[i]) == len(valid_flag_list[i])
+ proposals_list[i] = torch.cat(proposals_list[i])
+ valid_flag_list[i] = torch.cat(valid_flag_list[i])
+
+ # compute targets for each image
+ if gt_bboxes_ignore_list is None:
+ gt_bboxes_ignore_list = [None for _ in range(num_imgs)]
+ if gt_labels_list is None:
+ gt_labels_list = [None for _ in range(num_imgs)]
+ (all_labels, all_label_weights, all_bbox_gt, all_proposals,
+ all_proposal_weights, pos_inds_list, neg_inds_list) = multi_apply(
+ point_target_single,
+ proposals_list,
+ valid_flag_list,
+ gt_bboxes_list,
+ gt_bboxes_ignore_list,
+ gt_labels_list,
+ cfg=cfg,
+ label_channels=label_channels,
+ sampling=sampling,
+ unmap_outputs=unmap_outputs)
+ # no valid points
+ if any([labels is None for labels in all_labels]):
+ return None
+ # sampled points of all images
+ num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])
+ num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])
+ labels_list = images_to_levels(all_labels, num_level_proposals)
+ label_weights_list = images_to_levels(all_label_weights,
+ num_level_proposals)
+ bbox_gt_list = images_to_levels(all_bbox_gt, num_level_proposals)
+ proposals_list = images_to_levels(all_proposals, num_level_proposals)
+ proposal_weights_list = images_to_levels(all_proposal_weights,
+ num_level_proposals)
+ return (labels_list, label_weights_list, bbox_gt_list, proposals_list,
+ proposal_weights_list, num_total_pos, num_total_neg)
+
+
+def images_to_levels(target, num_level_grids):
+ """Convert targets by image to targets by feature level.
+
+ [target_img0, target_img1] -> [target_level0, target_level1, ...]
+ """
+ target = torch.stack(target, 0)
+ level_targets = []
+ start = 0
+ for n in num_level_grids:
+ end = start + n
+ level_targets.append(target[:, start:end].squeeze(0))
+ start = end
+ return level_targets
+
+
+def point_target_single(flat_proposals,
+ valid_flags,
+ gt_bboxes,
+ gt_bboxes_ignore,
+ gt_labels,
+ cfg,
+ label_channels=1,
+ sampling=True,
+ unmap_outputs=True):
+ inside_flags = valid_flags
+ if not inside_flags.any():
+ return (None, ) * 7
+ # assign gt and sample proposals
+ proposals = flat_proposals[inside_flags, :]
+
+ if sampling:
+ assign_result, sampling_result = assign_and_sample(
+ proposals, gt_bboxes, gt_bboxes_ignore, None, cfg)
+ else:
+ bbox_assigner = build_assigner(cfg.assigner)
+ assign_result = bbox_assigner.assign(proposals, gt_bboxes,
+ gt_bboxes_ignore, gt_labels)
+ bbox_sampler = PseudoSampler()
+ sampling_result = bbox_sampler.sample(assign_result, proposals,
+ gt_bboxes)
+
+ num_valid_proposals = proposals.shape[0]
+ bbox_gt = proposals.new_zeros([num_valid_proposals, 4])
+ pos_proposals = torch.zeros_like(proposals)
+ proposals_weights = proposals.new_zeros([num_valid_proposals, 4])
+ labels = proposals.new_zeros(num_valid_proposals, dtype=torch.long)
+ label_weights = proposals.new_zeros(num_valid_proposals, dtype=torch.float)
+
+ pos_inds = sampling_result.pos_inds
+ neg_inds = sampling_result.neg_inds
+ if len(pos_inds) > 0:
+ pos_gt_bboxes = sampling_result.pos_gt_bboxes
+ bbox_gt[pos_inds, :] = pos_gt_bboxes
+ pos_proposals[pos_inds, :] = proposals[pos_inds, :]
+ proposals_weights[pos_inds, :] = 1.0
+ if gt_labels is None:
+ labels[pos_inds] = 1
+ else:
+ labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]
+ if cfg.pos_weight <= 0:
+ label_weights[pos_inds] = 1.0
+ else:
+ label_weights[pos_inds] = cfg.pos_weight
+ if len(neg_inds) > 0:
+ label_weights[neg_inds] = 1.0
+
+ # map up to original set of proposals
+ if unmap_outputs:
+ num_total_proposals = flat_proposals.size(0)
+ labels = unmap(labels, num_total_proposals, inside_flags)
+ label_weights = unmap(label_weights, num_total_proposals, inside_flags)
+ bbox_gt = unmap(bbox_gt, num_total_proposals, inside_flags)
+ pos_proposals = unmap(pos_proposals, num_total_proposals, inside_flags)
+ proposals_weights = unmap(proposals_weights, num_total_proposals,
+ inside_flags)
+
+ return (labels, label_weights, bbox_gt, pos_proposals, proposals_weights,
+ pos_inds, neg_inds)
+
+
+def unmap(data, count, inds, fill=0):
+ """ Unmap a subset of item (data) back to the original set of items (of
+ size count) """
+ if data.dim() == 1:
+ ret = data.new_full((count, ), fill)
+ ret[inds] = data
+ else:
+ new_size = (count, ) + data.size()[1:]
+ ret = data.new_full(new_size, fill)
+ ret[inds, :] = data
+ return ret
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/__init__.py
new file mode 100644
index 000000000..a0de91724
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/__init__.py
@@ -0,0 +1,22 @@
+from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner
+from .bbox_target import bbox_target
+from .geometry import bbox_overlaps
+from .samplers import (BaseSampler, CombinedSampler,
+ InstanceBalancedPosSampler, IoUBalancedNegSampler,
+ PseudoSampler, RandomSampler, SamplingResult)
+from .transforms import (bbox2delta, bbox2result, bbox2roi, bbox_flip,
+ bbox_mapping, bbox_mapping_back, delta2bbox,
+ distance2bbox, roi2bbox)
+
+from .assign_sampling import ( # isort:skip, avoid recursive imports
+ assign_and_sample, build_assigner, build_sampler)
+
+__all__ = [
+ 'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult',
+ 'BaseSampler', 'PseudoSampler', 'RandomSampler',
+ 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
+ 'SamplingResult', 'build_assigner', 'build_sampler', 'assign_and_sample',
+ 'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping',
+ 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result',
+ 'distance2bbox', 'bbox_target'
+]
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assign_sampling.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assign_sampling.py
new file mode 100644
index 000000000..4267174bb
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assign_sampling.py
@@ -0,0 +1,33 @@
+import mmcv
+
+from . import assigners, samplers
+
+
+def build_assigner(cfg, **kwargs):
+ if isinstance(cfg, assigners.BaseAssigner):
+ return cfg
+ elif isinstance(cfg, dict):
+ return mmcv.runner.obj_from_dict(cfg, assigners, default_args=kwargs)
+ else:
+ raise TypeError('Invalid type {} for building a sampler'.format(
+ type(cfg)))
+
+
+def build_sampler(cfg, **kwargs):
+ if isinstance(cfg, samplers.BaseSampler):
+ return cfg
+ elif isinstance(cfg, dict):
+ return mmcv.runner.obj_from_dict(cfg, samplers, default_args=kwargs)
+ else:
+ raise TypeError('Invalid type {} for building a sampler'.format(
+ type(cfg)))
+
+
+def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg):
+ bbox_assigner = build_assigner(cfg.assigner)
+ bbox_sampler = build_sampler(cfg.sampler)
+ assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore,
+ gt_labels)
+ sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes,
+ gt_labels)
+ return assign_result, sampling_result
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/__init__.py
new file mode 100644
index 000000000..4ed1d5643
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/__init__.py
@@ -0,0 +1,11 @@
+from .approx_max_iou_assigner import ApproxMaxIoUAssigner
+from .assign_result import AssignResult
+from .atss_assigner import ATSSAssigner
+from .base_assigner import BaseAssigner
+from .max_iou_assigner import MaxIoUAssigner
+from .point_assigner import PointAssigner
+
+__all__ = [
+ 'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult',
+ 'PointAssigner', 'ATSSAssigner'
+]
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/approx_max_iou_assigner.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/approx_max_iou_assigner.py
new file mode 100644
index 000000000..e7d3510a0
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/approx_max_iou_assigner.py
@@ -0,0 +1,139 @@
+import torch
+
+from ..geometry import bbox_overlaps
+from .max_iou_assigner import MaxIoUAssigner
+
+
+class ApproxMaxIoUAssigner(MaxIoUAssigner):
+ """Assign a corresponding gt bbox or background to each bbox.
+
+ Each proposals will be assigned with `-1`, `0`, or a positive integer
+ indicating the ground truth index.
+
+ - -1: don't care
+ - 0: negative sample, no assigned gt
+ - positive integer: positive sample, index (1-based) of assigned gt
+
+ Args:
+ pos_iou_thr (float): IoU threshold for positive bboxes.
+ neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
+ min_pos_iou (float): Minimum iou for a bbox to be considered as a
+ positive bbox. Positive samples can have smaller IoU than
+ pos_iou_thr due to the 4th step (assign max IoU sample to each gt).
+ gt_max_assign_all (bool): Whether to assign all bboxes with the same
+ highest overlap with some gt to that gt.
+ ignore_iof_thr (float): IoF threshold for ignoring bboxes (if
+ `gt_bboxes_ignore` is specified). Negative values mean not
+ ignoring any bboxes.
+ ignore_wrt_candidates (bool): Whether to compute the iof between
+ `bboxes` and `gt_bboxes_ignore`, or the contrary.
+ gpu_assign_thr (int): The upper bound of the number of GT for GPU
+ assign. When the number of gt is above this threshold, will assign
+ on CPU device. Negative values mean not assign on CPU.
+ """
+
+ def __init__(self,
+ pos_iou_thr,
+ neg_iou_thr,
+ min_pos_iou=.0,
+ gt_max_assign_all=True,
+ ignore_iof_thr=-1,
+ ignore_wrt_candidates=True,
+ gpu_assign_thr=-1):
+ self.pos_iou_thr = pos_iou_thr
+ self.neg_iou_thr = neg_iou_thr
+ self.min_pos_iou = min_pos_iou
+ self.gt_max_assign_all = gt_max_assign_all
+ self.ignore_iof_thr = ignore_iof_thr
+ self.ignore_wrt_candidates = ignore_wrt_candidates
+ self.gpu_assign_thr = gpu_assign_thr
+
+ def assign(self,
+ approxs,
+ squares,
+ approxs_per_octave,
+ gt_bboxes,
+ gt_bboxes_ignore=None,
+ gt_labels=None):
+ """Assign gt to approxs.
+
+ This method assign a gt bbox to each group of approxs (bboxes),
+ each group of approxs is represent by a base approx (bbox) and
+ will be assigned with -1, 0, or a positive number.
+ -1 means don't care, 0 means negative sample,
+ positive number is the index (1-based) of assigned gt.
+ The assignment is done in following steps, the order matters.
+
+ 1. assign every bbox to -1
+ 2. use the max IoU of each group of approxs to assign
+ 2. assign proposals whose iou with all gts < neg_iou_thr to 0
+ 3. for each bbox, if the iou with its nearest gt >= pos_iou_thr,
+ assign it to that bbox
+ 4. for each gt bbox, assign its nearest proposals (may be more than
+ one) to itself
+
+ Args:
+ approxs (Tensor): Bounding boxes to be assigned,
+ shape(approxs_per_octave*n, 4).
+ squares (Tensor): Base Bounding boxes to be assigned,
+ shape(n, 4).
+ approxs_per_octave (int): number of approxs per octave
+ gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
+ gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
+ labelled as `ignored`, e.g., crowd boxes in COCO.
+ gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).
+
+ Returns:
+ :obj:`AssignResult`: The assign result.
+ """
+ num_squares = squares.size(0)
+ num_gts = gt_bboxes.size(0)
+
+ if num_squares == 0 or num_gts == 0:
+ # No predictions and/or truth, return empty assignment
+ overlaps = approxs.new(num_gts, num_squares)
+ assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)
+ return assign_result
+
+ # re-organize anchors by approxs_per_octave x num_squares
+ approxs = torch.transpose(
+ approxs.view(num_squares, approxs_per_octave, 4), 0,
+ 1).contiguous().view(-1, 4)
+ assign_on_cpu = True if (self.gpu_assign_thr > 0) and (
+ num_gts > self.gpu_assign_thr) else False
+ # compute overlap and assign gt on CPU when number of GT is large
+ if assign_on_cpu:
+ device = approxs.device
+ approxs = approxs.cpu()
+ gt_bboxes = gt_bboxes.cpu()
+ if gt_bboxes_ignore is not None:
+ gt_bboxes_ignore = gt_bboxes_ignore.cpu()
+ if gt_labels is not None:
+ gt_labels = gt_labels.cpu()
+ all_overlaps = bbox_overlaps(approxs, gt_bboxes)
+
+ overlaps, _ = all_overlaps.view(approxs_per_octave, num_squares,
+ num_gts).max(dim=0)
+ overlaps = torch.transpose(overlaps, 0, 1)
+
+ bboxes = squares[:, :4]
+
+ if (self.ignore_iof_thr > 0) and (gt_bboxes_ignore is not None) and (
+ gt_bboxes_ignore.numel() > 0):
+ if self.ignore_wrt_candidates:
+ ignore_overlaps = bbox_overlaps(
+ bboxes, gt_bboxes_ignore, mode='iof')
+ ignore_max_overlaps, _ = ignore_overlaps.max(dim=1)
+ else:
+ ignore_overlaps = bbox_overlaps(
+ gt_bboxes_ignore, bboxes, mode='iof')
+ ignore_max_overlaps, _ = ignore_overlaps.max(dim=0)
+ overlaps[:, ignore_max_overlaps > self.ignore_iof_thr] = -1
+
+ assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)
+ if assign_on_cpu:
+ assign_result.gt_inds = assign_result.gt_inds.to(device)
+ assign_result.max_overlaps = assign_result.max_overlaps.to(device)
+ if assign_result.labels is not None:
+ assign_result.labels = assign_result.labels.to(device)
+ return assign_result
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/assign_result.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/assign_result.py
new file mode 100644
index 000000000..5e81c8978
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/assign_result.py
@@ -0,0 +1,192 @@
+import torch
+
+from mmdet.utils import util_mixins
+
+
+class AssignResult(util_mixins.NiceRepr):
+ """
+ Stores assignments between predicted and truth boxes.
+
+ Attributes:
+ num_gts (int): the number of truth boxes considered when computing this
+ assignment
+
+ gt_inds (LongTensor): for each predicted box indicates the 1-based
+ index of the assigned truth box. 0 means unassigned and -1 means
+ ignore.
+
+ max_overlaps (FloatTensor): the iou between the predicted box and its
+ assigned truth box.
+
+ labels (None | LongTensor): If specified, for each predicted box
+ indicates the category label of the assigned truth box.
+
+ Example:
+ >>> # An assign result between 4 predicted boxes and 9 true boxes
+ >>> # where only two boxes were assigned.
+ >>> num_gts = 9
+ >>> max_overlaps = torch.LongTensor([0, .5, .9, 0])
+ >>> gt_inds = torch.LongTensor([-1, 1, 2, 0])
+ >>> labels = torch.LongTensor([0, 3, 4, 0])
+ >>> self = AssignResult(num_gts, gt_inds, max_overlaps, labels)
+ >>> print(str(self)) # xdoctest: +IGNORE_WANT
+
+ >>> # Force addition of gt labels (when adding gt as proposals)
+ >>> new_labels = torch.LongTensor([3, 4, 5])
+ >>> self.add_gt_(new_labels)
+ >>> print(str(self)) # xdoctest: +IGNORE_WANT
+
+ """
+
+ def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):
+ self.num_gts = num_gts
+ self.gt_inds = gt_inds
+ self.max_overlaps = max_overlaps
+ self.labels = labels
+
+ @property
+ def num_preds(self):
+ """
+ Return the number of predictions in this assignment
+ """
+ return len(self.gt_inds)
+
+ @property
+ def info(self):
+ """
+ Returns a dictionary of info about the object
+ """
+ return {
+ 'num_gts': self.num_gts,
+ 'num_preds': self.num_preds,
+ 'gt_inds': self.gt_inds,
+ 'max_overlaps': self.max_overlaps,
+ 'labels': self.labels,
+ }
+
+ def __nice__(self):
+ """
+ Create a "nice" summary string describing this assign result
+ """
+ parts = []
+ parts.append('num_gts={!r}'.format(self.num_gts))
+ if self.gt_inds is None:
+ parts.append('gt_inds={!r}'.format(self.gt_inds))
+ else:
+ parts.append('gt_inds.shape={!r}'.format(
+ tuple(self.gt_inds.shape)))
+ if self.max_overlaps is None:
+ parts.append('max_overlaps={!r}'.format(self.max_overlaps))
+ else:
+ parts.append('max_overlaps.shape={!r}'.format(
+ tuple(self.max_overlaps.shape)))
+ if self.labels is None:
+ parts.append('labels={!r}'.format(self.labels))
+ else:
+ parts.append('labels.shape={!r}'.format(tuple(self.labels.shape)))
+ return ', '.join(parts)
+
+ @classmethod
+ def random(cls, **kwargs):
+ """
+ Create random AssignResult for tests or debugging.
+
+ Kwargs:
+ num_preds: number of predicted boxes
+ num_gts: number of true boxes
+ p_ignore (float): probability of a predicted box assinged to an
+ ignored truth
+ p_assigned (float): probability of a predicted box not being
+ assigned
+ p_use_label (float | bool): with labels or not
+ rng (None | int | numpy.random.RandomState): seed or state
+
+ Returns:
+ AssignResult :
+
+ Example:
+ >>> from mmdet.core.bbox.assigners.assign_result import * # NOQA
+ >>> self = AssignResult.random()
+ >>> print(self.info)
+ """
+ from mmdet.core.bbox import demodata
+ rng = demodata.ensure_rng(kwargs.get('rng', None))
+
+ num_gts = kwargs.get('num_gts', None)
+ num_preds = kwargs.get('num_preds', None)
+ p_ignore = kwargs.get('p_ignore', 0.3)
+ p_assigned = kwargs.get('p_assigned', 0.7)
+ p_use_label = kwargs.get('p_use_label', 0.5)
+ num_classes = kwargs.get('p_use_label', 3)
+
+ if num_gts is None:
+ num_gts = rng.randint(0, 8)
+ if num_preds is None:
+ num_preds = rng.randint(0, 16)
+
+ if num_gts == 0:
+ max_overlaps = torch.zeros(num_preds, dtype=torch.float32)
+ gt_inds = torch.zeros(num_preds, dtype=torch.int64)
+ if p_use_label is True or p_use_label < rng.rand():
+ labels = torch.zeros(num_preds, dtype=torch.int64)
+ else:
+ labels = None
+ else:
+ import numpy as np
+ # Create an overlap for each predicted box
+ max_overlaps = torch.from_numpy(rng.rand(num_preds))
+
+ # Construct gt_inds for each predicted box
+ is_assigned = torch.from_numpy(rng.rand(num_preds) < p_assigned)
+ # maximum number of assignments constraints
+ n_assigned = min(num_preds, min(num_gts, is_assigned.sum()))
+
+ assigned_idxs = np.where(is_assigned)[0]
+ rng.shuffle(assigned_idxs)
+ assigned_idxs = assigned_idxs[0:n_assigned]
+ assigned_idxs.sort()
+
+ is_assigned[:] = 0
+ is_assigned[assigned_idxs] = True
+
+ is_ignore = torch.from_numpy(
+ rng.rand(num_preds) < p_ignore) & is_assigned
+
+ gt_inds = torch.zeros(num_preds, dtype=torch.int64)
+
+ true_idxs = np.arange(num_gts)
+ rng.shuffle(true_idxs)
+ true_idxs = torch.from_numpy(true_idxs)
+ gt_inds[is_assigned] = true_idxs[:n_assigned]
+
+ gt_inds = torch.from_numpy(
+ rng.randint(1, num_gts + 1, size=num_preds))
+ gt_inds[is_ignore] = -1
+ gt_inds[~is_assigned] = 0
+ max_overlaps[~is_assigned] = 0
+
+ if p_use_label is True or p_use_label < rng.rand():
+ if num_classes == 0:
+ labels = torch.zeros(num_preds, dtype=torch.int64)
+ else:
+ labels = torch.from_numpy(
+ rng.randint(1, num_classes + 1, size=num_preds))
+ labels[~is_assigned] = 0
+ else:
+ labels = None
+
+ self = cls(num_gts, gt_inds, max_overlaps, labels)
+ return self
+
+ def add_gt_(self, gt_labels):
+ self_inds = torch.arange(
+ 1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device)
+ self.gt_inds = torch.cat([self_inds, self.gt_inds])
+
+ self.max_overlaps = torch.cat(
+ [self.max_overlaps.new_ones(len(gt_labels)), self.max_overlaps])
+
+ if self.labels is not None:
+ self.labels = torch.cat([gt_labels, self.labels])
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/atss_assigner.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/atss_assigner.py
new file mode 100644
index 000000000..e442ac709
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/atss_assigner.py
@@ -0,0 +1,159 @@
+import torch
+
+from ..geometry import bbox_overlaps
+from .assign_result import AssignResult
+from .base_assigner import BaseAssigner
+
+
+class ATSSAssigner(BaseAssigner):
+ """Assign a corresponding gt bbox or background to each bbox.
+
+ Each proposals will be assigned with `0` or a positive integer
+ indicating the ground truth index.
+
+ - 0: negative sample, no assigned gt
+ - positive integer: positive sample, index (1-based) of assigned gt
+
+ Args:
+ topk (float): number of bbox selected in each level
+ """
+
+ def __init__(self, topk):
+ self.topk = topk
+
+ # https://github.com/sfzhang15/ATSS/blob/master/atss_core/modeling/rpn/atss/loss.py
+
+ def assign(self,
+ bboxes,
+ num_level_bboxes,
+ gt_bboxes,
+ gt_bboxes_ignore=None,
+ gt_labels=None):
+ """Assign gt to bboxes.
+
+ The assignment is done in following steps
+
+ 1. compute iou between all bbox (bbox of all pyramid levels) and gt
+ 2. compute center distance between all bbox and gt
+ 3. on each pyramid level, for each gt, select k bbox whose center
+ are closest to the gt center, so we total select k*l bbox as
+ candidates for each gt
+ 4. get corresponding iou for the these candidates, and compute the
+ mean and std, set mean + std as the iou threshold
+ 5. select these candidates whose iou are greater than or equal to
+ the threshold as postive
+ 6. limit the positive sample's center in gt
+
+
+ Args:
+ bboxes (Tensor): Bounding boxes to be assigned, shape(n, 4).
+ num_level_bboxes (List): num of bboxes in each level
+ gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
+ gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
+ labelled as `ignored`, e.g., crowd boxes in COCO.
+ gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).
+
+ Returns:
+ :obj:`AssignResult`: The assign result.
+ """
+ INF = 100000000
+ bboxes = bboxes[:, :4]
+ num_gt, num_bboxes = gt_bboxes.size(0), bboxes.size(0)
+
+ # compute iou between all bbox and gt
+ overlaps = bbox_overlaps(bboxes, gt_bboxes)
+
+ # assign 0 by default
+ assigned_gt_inds = overlaps.new_full((num_bboxes, ),
+ 0,
+ dtype=torch.long)
+
+ if num_gt == 0 or num_bboxes == 0:
+ # No ground truth or boxes, return empty assignment
+ max_overlaps = overlaps.new_zeros((num_bboxes, ))
+ if num_gt == 0:
+ # No truth, assign everything to background
+ assigned_gt_inds[:] = 0
+ if gt_labels is None:
+ assigned_labels = None
+ else:
+ assigned_labels = overlaps.new_zeros((num_bboxes, ),
+ dtype=torch.long)
+ return AssignResult(
+ num_gt, assigned_gt_inds, max_overlaps, labels=assigned_labels)
+
+ # compute center distance between all bbox and gt
+ gt_cx = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
+ gt_cy = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
+ gt_points = torch.stack((gt_cx, gt_cy), dim=1)
+
+ bboxes_cx = (bboxes[:, 0] + bboxes[:, 2]) / 2.0
+ bboxes_cy = (bboxes[:, 1] + bboxes[:, 3]) / 2.0
+ bboxes_points = torch.stack((bboxes_cx, bboxes_cy), dim=1)
+
+ distances = (bboxes_points[:, None, :] -
+ gt_points[None, :, :]).pow(2).sum(-1).sqrt()
+
+ # Selecting candidates based on the center distance
+ candidate_idxs = []
+ start_idx = 0
+ for level, bboxes_per_level in enumerate(num_level_bboxes):
+ # on each pyramid level, for each gt,
+ # select k bbox whose center are closest to the gt center
+ end_idx = start_idx + bboxes_per_level
+ distances_per_level = distances[start_idx:end_idx, :]
+ _, topk_idxs_per_level = distances_per_level.topk(
+ self.topk, dim=0, largest=False)
+ candidate_idxs.append(topk_idxs_per_level + start_idx)
+ start_idx = end_idx
+ candidate_idxs = torch.cat(candidate_idxs, dim=0)
+
+ # get corresponding iou for the these candidates, and compute the
+ # mean and std, set mean + std as the iou threshold
+ candidate_overlaps = overlaps[candidate_idxs, torch.arange(num_gt)]
+ overlaps_mean_per_gt = candidate_overlaps.mean(0)
+ overlaps_std_per_gt = candidate_overlaps.std(0)
+ overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt
+
+ is_pos = candidate_overlaps >= overlaps_thr_per_gt[None, :]
+
+ # limit the positive sample's center in gt
+ for gt_idx in range(num_gt):
+ candidate_idxs[:, gt_idx] += gt_idx * num_bboxes
+ ep_bboxes_cx = bboxes_cx.view(1, -1).expand(
+ num_gt, num_bboxes).contiguous().view(-1)
+ ep_bboxes_cy = bboxes_cy.view(1, -1).expand(
+ num_gt, num_bboxes).contiguous().view(-1)
+ candidate_idxs = candidate_idxs.view(-1)
+
+ # calculate the left, top, right, bottom distance between positive
+ # bbox center and gt side
+ l_ = ep_bboxes_cx[candidate_idxs].view(-1, num_gt) - gt_bboxes[:, 0]
+ t_ = ep_bboxes_cy[candidate_idxs].view(-1, num_gt) - gt_bboxes[:, 1]
+ r_ = gt_bboxes[:, 2] - ep_bboxes_cx[candidate_idxs].view(-1, num_gt)
+ b_ = gt_bboxes[:, 3] - ep_bboxes_cy[candidate_idxs].view(-1, num_gt)
+ is_in_gts = torch.stack([l_, t_, r_, b_], dim=1).min(dim=1)[0] > 0.01
+ is_pos = is_pos & is_in_gts
+
+ # if an anchor box is assigned to multiple gts,
+ # the one with the highest IoU will be selected.
+ overlaps_inf = torch.full_like(overlaps,
+ -INF).t().contiguous().view(-1)
+ index = candidate_idxs.view(-1)[is_pos.view(-1)]
+ overlaps_inf[index] = overlaps.t().contiguous().view(-1)[index]
+ overlaps_inf = overlaps_inf.view(num_gt, -1).t()
+
+ max_overlaps, argmax_overlaps = overlaps_inf.max(dim=1)
+ assigned_gt_inds[
+ max_overlaps != -INF] = argmax_overlaps[max_overlaps != -INF] + 1
+
+ if gt_labels is not None:
+ assigned_labels = assigned_gt_inds.new_zeros((num_bboxes, ))
+ pos_inds = torch.nonzero(assigned_gt_inds > 0).squeeze()
+ if pos_inds.numel() > 0:
+ assigned_labels[pos_inds] = gt_labels[
+ assigned_gt_inds[pos_inds] - 1]
+ else:
+ assigned_labels = None
+ return AssignResult(
+ num_gt, assigned_gt_inds, max_overlaps, labels=assigned_labels)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/base_assigner.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/base_assigner.py
new file mode 100644
index 000000000..7bd02dce1
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/base_assigner.py
@@ -0,0 +1,8 @@
+from abc import ABCMeta, abstractmethod
+
+
+class BaseAssigner(metaclass=ABCMeta):
+
+ @abstractmethod
+ def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
+ pass
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/max_iou_assigner.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/max_iou_assigner.py
new file mode 100644
index 000000000..93ffc42ca
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/max_iou_assigner.py
@@ -0,0 +1,195 @@
+import torch
+
+from ..geometry import bbox_overlaps
+from .assign_result import AssignResult
+from .base_assigner import BaseAssigner
+
+
+class MaxIoUAssigner(BaseAssigner):
+ """Assign a corresponding gt bbox or background to each bbox.
+
+ Each proposals will be assigned with `-1`, `0`, or a positive integer
+ indicating the ground truth index.
+
+ - -1: don't care
+ - 0: negative sample, no assigned gt
+ - positive integer: positive sample, index (1-based) of assigned gt
+
+ Args:
+ pos_iou_thr (float): IoU threshold for positive bboxes.
+ neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
+ min_pos_iou (float): Minimum iou for a bbox to be considered as a
+ positive bbox. Positive samples can have smaller IoU than
+ pos_iou_thr due to the 4th step (assign max IoU sample to each gt).
+ gt_max_assign_all (bool): Whether to assign all bboxes with the same
+ highest overlap with some gt to that gt.
+ ignore_iof_thr (float): IoF threshold for ignoring bboxes (if
+ `gt_bboxes_ignore` is specified). Negative values mean not
+ ignoring any bboxes.
+ ignore_wrt_candidates (bool): Whether to compute the iof between
+ `bboxes` and `gt_bboxes_ignore`, or the contrary.
+ gpu_assign_thr (int): The upper bound of the number of GT for GPU
+ assign. When the number of gt is above this threshold, will assign
+ on CPU device. Negative values mean not assign on CPU.
+ """
+
+ def __init__(self,
+ pos_iou_thr,
+ neg_iou_thr,
+ min_pos_iou=.0,
+ gt_max_assign_all=True,
+ ignore_iof_thr=-1,
+ ignore_wrt_candidates=True,
+ gpu_assign_thr=-1):
+ self.pos_iou_thr = pos_iou_thr
+ self.neg_iou_thr = neg_iou_thr
+ self.min_pos_iou = min_pos_iou
+ self.gt_max_assign_all = gt_max_assign_all
+ self.ignore_iof_thr = ignore_iof_thr
+ self.ignore_wrt_candidates = ignore_wrt_candidates
+ self.gpu_assign_thr = gpu_assign_thr
+
+ def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
+ """Assign gt to bboxes.
+
+ This method assign a gt bbox to every bbox (proposal/anchor), each bbox
+ will be assigned with -1, 0, or a positive number. -1 means don't care,
+ 0 means negative sample, positive number is the index (1-based) of
+ assigned gt.
+ The assignment is done in following steps, the order matters.
+
+ 1. assign every bbox to -1
+ 2. assign proposals whose iou with all gts < neg_iou_thr to 0
+ 3. for each bbox, if the iou with its nearest gt >= pos_iou_thr,
+ assign it to that bbox
+ 4. for each gt bbox, assign its nearest proposals (may be more than
+ one) to itself
+
+ Args:
+ bboxes (Tensor): Bounding boxes to be assigned, shape(n, 4).
+ gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
+ gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
+ labelled as `ignored`, e.g., crowd boxes in COCO.
+ gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).
+
+ Returns:
+ :obj:`AssignResult`: The assign result.
+
+ Example:
+ >>> self = MaxIoUAssigner(0.5, 0.5)
+ >>> bboxes = torch.Tensor([[0, 0, 10, 10], [10, 10, 20, 20]])
+ >>> gt_bboxes = torch.Tensor([[0, 0, 10, 9]])
+ >>> assign_result = self.assign(bboxes, gt_bboxes)
+ >>> expected_gt_inds = torch.LongTensor([1, 0])
+ >>> assert torch.all(assign_result.gt_inds == expected_gt_inds)
+ """
+ assign_on_cpu = True if (self.gpu_assign_thr > 0) and (
+ gt_bboxes.shape[0] > self.gpu_assign_thr) else False
+ # compute overlap and assign gt on CPU when number of GT is large
+ if assign_on_cpu:
+ device = bboxes.device
+ bboxes = bboxes.cpu()
+ gt_bboxes = gt_bboxes.cpu()
+ if gt_bboxes_ignore is not None:
+ gt_bboxes_ignore = gt_bboxes_ignore.cpu()
+ if gt_labels is not None:
+ gt_labels = gt_labels.cpu()
+
+ bboxes = bboxes[:, :4]
+ overlaps = bbox_overlaps(gt_bboxes, bboxes)
+
+ if (self.ignore_iof_thr > 0) and (gt_bboxes_ignore is not None) and (
+ gt_bboxes_ignore.numel() > 0):
+ if self.ignore_wrt_candidates:
+ ignore_overlaps = bbox_overlaps(
+ bboxes, gt_bboxes_ignore, mode='iof')
+ ignore_max_overlaps, _ = ignore_overlaps.max(dim=1)
+ else:
+ ignore_overlaps = bbox_overlaps(
+ gt_bboxes_ignore, bboxes, mode='iof')
+ ignore_max_overlaps, _ = ignore_overlaps.max(dim=0)
+ overlaps[:, ignore_max_overlaps > self.ignore_iof_thr] = -1
+
+ assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)
+ if assign_on_cpu:
+ assign_result.gt_inds = assign_result.gt_inds.to(device)
+ assign_result.max_overlaps = assign_result.max_overlaps.to(device)
+ if assign_result.labels is not None:
+ assign_result.labels = assign_result.labels.to(device)
+ return assign_result
+
+ def assign_wrt_overlaps(self, overlaps, gt_labels=None):
+ """Assign w.r.t. the overlaps of bboxes with gts.
+
+ Args:
+ overlaps (Tensor): Overlaps between k gt_bboxes and n bboxes,
+ shape(k, n).
+ gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ).
+
+ Returns:
+ :obj:`AssignResult`: The assign result.
+ """
+ num_gts, num_bboxes = overlaps.size(0), overlaps.size(1)
+
+ # 1. assign -1 by default
+ assigned_gt_inds = overlaps.new_full((num_bboxes, ),
+ -1,
+ dtype=torch.long)
+
+ if num_gts == 0 or num_bboxes == 0:
+ # No ground truth or boxes, return empty assignment
+ max_overlaps = overlaps.new_zeros((num_bboxes, ))
+ if num_gts == 0:
+ # No truth, assign everything to background
+ assigned_gt_inds[:] = 0
+ if gt_labels is None:
+ assigned_labels = None
+ else:
+ assigned_labels = overlaps.new_zeros((num_bboxes, ),
+ dtype=torch.long)
+ return AssignResult(
+ num_gts,
+ assigned_gt_inds,
+ max_overlaps,
+ labels=assigned_labels)
+
+ # for each anchor, which gt best overlaps with it
+ # for each anchor, the max iou of all gts
+ max_overlaps, argmax_overlaps = overlaps.max(dim=0)
+ # for each gt, which anchor best overlaps with it
+ # for each gt, the max iou of all proposals
+ gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=1)
+
+ # 2. assign negative: below
+ if isinstance(self.neg_iou_thr, float):
+ assigned_gt_inds[(max_overlaps >= 0)
+ & (max_overlaps < self.neg_iou_thr)] = 0
+ elif isinstance(self.neg_iou_thr, tuple):
+ assert len(self.neg_iou_thr) == 2
+ assigned_gt_inds[(max_overlaps >= self.neg_iou_thr[0])
+ & (max_overlaps < self.neg_iou_thr[1])] = 0
+
+ # 3. assign positive: above positive IoU threshold
+ pos_inds = max_overlaps >= self.pos_iou_thr
+ assigned_gt_inds[pos_inds] = argmax_overlaps[pos_inds] + 1
+
+ # 4. assign fg: for each gt, proposals with highest IoU
+ for i in range(num_gts):
+ if gt_max_overlaps[i] >= self.min_pos_iou:
+ if self.gt_max_assign_all:
+ max_iou_inds = overlaps[i, :] == gt_max_overlaps[i]
+ assigned_gt_inds[max_iou_inds] = i + 1
+ else:
+ assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1
+
+ if gt_labels is not None:
+ assigned_labels = assigned_gt_inds.new_zeros((num_bboxes, ))
+ pos_inds = torch.nonzero(assigned_gt_inds > 0).squeeze()
+ if pos_inds.numel() > 0:
+ assigned_labels[pos_inds] = gt_labels[
+ assigned_gt_inds[pos_inds] - 1]
+ else:
+ assigned_labels = None
+
+ return AssignResult(
+ num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/point_assigner.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/point_assigner.py
new file mode 100644
index 000000000..263b3096c
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/point_assigner.py
@@ -0,0 +1,130 @@
+import torch
+
+from .assign_result import AssignResult
+from .base_assigner import BaseAssigner
+
+
+class PointAssigner(BaseAssigner):
+ """Assign a corresponding gt bbox or background to each point.
+
+ Each proposals will be assigned with `0`, or a positive integer
+ indicating the ground truth index.
+
+ - 0: negative sample, no assigned gt
+ - positive integer: positive sample, index (1-based) of assigned gt
+
+ """
+
+ def __init__(self, scale=4, pos_num=3):
+ self.scale = scale
+ self.pos_num = pos_num
+
+ def assign(self, points, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
+ """Assign gt to points.
+
+ This method assign a gt bbox to every points set, each points set
+ will be assigned with 0, or a positive number.
+ 0 means negative sample, positive number is the index (1-based) of
+ assigned gt.
+ The assignment is done in following steps, the order matters.
+
+ 1. assign every points to 0
+ 2. A point is assigned to some gt bbox if
+ (i) the point is within the k closest points to the gt bbox
+ (ii) the distance between this point and the gt is smaller than
+ other gt bboxes
+
+ Args:
+ points (Tensor): points to be assigned, shape(n, 3) while last
+ dimension stands for (x, y, stride).
+ gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
+ gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
+ labelled as `ignored`, e.g., crowd boxes in COCO.
+ NOTE: currently unused.
+ gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).
+
+ Returns:
+ :obj:`AssignResult`: The assign result.
+ """
+ num_points = points.shape[0]
+ num_gts = gt_bboxes.shape[0]
+
+ if num_gts == 0 or num_points == 0:
+ # If no truth assign everything to the background
+ assigned_gt_inds = points.new_full((num_points, ),
+ 0,
+ dtype=torch.long)
+ if gt_labels is None:
+ assigned_labels = None
+ else:
+ assigned_labels = points.new_zeros((num_points, ),
+ dtype=torch.long)
+ return AssignResult(
+ num_gts, assigned_gt_inds, None, labels=assigned_labels)
+
+ points_xy = points[:, :2]
+ points_stride = points[:, 2]
+ points_lvl = torch.log2(
+ points_stride).int() # [3...,4...,5...,6...,7...]
+ lvl_min, lvl_max = points_lvl.min(), points_lvl.max()
+
+ # assign gt box
+ gt_bboxes_xy = (gt_bboxes[:, :2] + gt_bboxes[:, 2:]) / 2
+ gt_bboxes_wh = (gt_bboxes[:, 2:] - gt_bboxes[:, :2]).clamp(min=1e-6)
+ scale = self.scale
+ gt_bboxes_lvl = ((torch.log2(gt_bboxes_wh[:, 0] / scale) +
+ torch.log2(gt_bboxes_wh[:, 1] / scale)) / 2).int()
+ gt_bboxes_lvl = torch.clamp(gt_bboxes_lvl, min=lvl_min, max=lvl_max)
+
+ # stores the assigned gt index of each point
+ assigned_gt_inds = points.new_zeros((num_points, ), dtype=torch.long)
+ # stores the assigned gt dist (to this point) of each point
+ assigned_gt_dist = points.new_full((num_points, ), float('inf'))
+ points_range = torch.arange(points.shape[0])
+
+ for idx in range(num_gts):
+ gt_lvl = gt_bboxes_lvl[idx]
+ # get the index of points in this level
+ lvl_idx = gt_lvl == points_lvl
+ points_index = points_range[lvl_idx]
+ # get the points in this level
+ lvl_points = points_xy[lvl_idx, :]
+ # get the center point of gt
+ gt_point = gt_bboxes_xy[[idx], :]
+ # get width and height of gt
+ gt_wh = gt_bboxes_wh[[idx], :]
+ # compute the distance between gt center and
+ # all points in this level
+ points_gt_dist = ((lvl_points - gt_point) / gt_wh).norm(dim=1)
+ # find the nearest k points to gt center in this level
+ min_dist, min_dist_index = torch.topk(
+ points_gt_dist, self.pos_num, largest=False)
+ # the index of nearest k points to gt center in this level
+ min_dist_points_index = points_index[min_dist_index]
+ # The less_than_recorded_index stores the index
+ # of min_dist that is less then the assigned_gt_dist. Where
+ # assigned_gt_dist stores the dist from previous assigned gt
+ # (if exist) to each point.
+ less_than_recorded_index = min_dist < assigned_gt_dist[
+ min_dist_points_index]
+ # The min_dist_points_index stores the index of points satisfy:
+ # (1) it is k nearest to current gt center in this level.
+ # (2) it is closer to current gt center than other gt center.
+ min_dist_points_index = min_dist_points_index[
+ less_than_recorded_index]
+ # assign the result
+ assigned_gt_inds[min_dist_points_index] = idx + 1
+ assigned_gt_dist[min_dist_points_index] = min_dist[
+ less_than_recorded_index]
+
+ if gt_labels is not None:
+ assigned_labels = assigned_gt_inds.new_zeros((num_points, ))
+ pos_inds = torch.nonzero(assigned_gt_inds > 0).squeeze()
+ if pos_inds.numel() > 0:
+ assigned_labels[pos_inds] = gt_labels[
+ assigned_gt_inds[pos_inds] - 1]
+ else:
+ assigned_labels = None
+
+ return AssignResult(
+ num_gts, assigned_gt_inds, None, labels=assigned_labels)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/bbox_target.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/bbox_target.py
new file mode 100644
index 000000000..2a918bf87
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/bbox_target.py
@@ -0,0 +1,73 @@
+import torch
+
+from ..utils import multi_apply
+from .transforms import bbox2delta
+
+
+def bbox_target(pos_bboxes_list,
+ neg_bboxes_list,
+ pos_gt_bboxes_list,
+ pos_gt_labels_list,
+ cfg,
+ reg_classes=1,
+ target_means=[.0, .0, .0, .0],
+ target_stds=[1.0, 1.0, 1.0, 1.0],
+ concat=True):
+ labels, label_weights, bbox_targets, bbox_weights = multi_apply(
+ bbox_target_single,
+ pos_bboxes_list,
+ neg_bboxes_list,
+ pos_gt_bboxes_list,
+ pos_gt_labels_list,
+ cfg=cfg,
+ reg_classes=reg_classes,
+ target_means=target_means,
+ target_stds=target_stds)
+
+ if concat:
+ labels = torch.cat(labels, 0)
+ label_weights = torch.cat(label_weights, 0)
+ bbox_targets = torch.cat(bbox_targets, 0)
+ bbox_weights = torch.cat(bbox_weights, 0)
+ return labels, label_weights, bbox_targets, bbox_weights
+
+
+def bbox_target_single(pos_bboxes,
+ neg_bboxes,
+ pos_gt_bboxes,
+ pos_gt_labels,
+ cfg,
+ reg_classes=1,
+ target_means=[.0, .0, .0, .0],
+ target_stds=[1.0, 1.0, 1.0, 1.0]):
+ num_pos = pos_bboxes.size(0)
+ num_neg = neg_bboxes.size(0)
+ num_samples = num_pos + num_neg
+ labels = pos_bboxes.new_zeros(num_samples, dtype=torch.long)
+ label_weights = pos_bboxes.new_zeros(num_samples)
+ bbox_targets = pos_bboxes.new_zeros(num_samples, 4)
+ bbox_weights = pos_bboxes.new_zeros(num_samples, 4)
+ if num_pos > 0:
+ labels[:num_pos] = pos_gt_labels
+ pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight
+ label_weights[:num_pos] = pos_weight
+ pos_bbox_targets = bbox2delta(pos_bboxes, pos_gt_bboxes, target_means,
+ target_stds)
+ bbox_targets[:num_pos, :] = pos_bbox_targets
+ bbox_weights[:num_pos, :] = 1
+ if num_neg > 0:
+ label_weights[-num_neg:] = 1.0
+
+ return labels, label_weights, bbox_targets, bbox_weights
+
+
+def expand_target(bbox_targets, bbox_weights, labels, num_classes):
+ bbox_targets_expand = bbox_targets.new_zeros(
+ (bbox_targets.size(0), 4 * num_classes))
+ bbox_weights_expand = bbox_weights.new_zeros(
+ (bbox_weights.size(0), 4 * num_classes))
+ for i in torch.nonzero(labels > 0).squeeze(-1):
+ start, end = labels[i] * 4, (labels[i] + 1) * 4
+ bbox_targets_expand[i, start:end] = bbox_targets[i, :]
+ bbox_weights_expand[i, start:end] = bbox_weights[i, :]
+ return bbox_targets_expand, bbox_weights_expand
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/demodata.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/demodata.py
new file mode 100644
index 000000000..d59d65427
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/demodata.py
@@ -0,0 +1,65 @@
+import numpy as np
+import torch
+
+
+def ensure_rng(rng=None):
+ """
+ Simple version of the ``kwarray.ensure_rng``
+
+ Args:
+ rng (int | numpy.random.RandomState | None):
+ if None, then defaults to the global rng. Otherwise this can be an
+ integer or a RandomState class
+ Returns:
+ (numpy.random.RandomState) : rng -
+ a numpy random number generator
+
+ References:
+ https://gitlab.kitware.com/computer-vision/kwarray/blob/master/kwarray/util_random.py#L270
+ """
+
+ if rng is None:
+ rng = np.random.mtrand._rand
+ elif isinstance(rng, int):
+ rng = np.random.RandomState(rng)
+ else:
+ rng = rng
+ return rng
+
+
+def random_boxes(num=1, scale=1, rng=None):
+ """
+ Simple version of ``kwimage.Boxes.random``
+
+ Returns:
+ Tensor: shape (n, 4) in x1, y1, x2, y2 format.
+
+ References:
+ https://gitlab.kitware.com/computer-vision/kwimage/blob/master/kwimage/structs/boxes.py#L1390
+
+ Example:
+ >>> num = 3
+ >>> scale = 512
+ >>> rng = 0
+ >>> boxes = random_boxes(num, scale, rng)
+ >>> print(boxes)
+ tensor([[280.9925, 278.9802, 308.6148, 366.1769],
+ [216.9113, 330.6978, 224.0446, 456.5878],
+ [405.3632, 196.3221, 493.3953, 270.7942]])
+ """
+ rng = ensure_rng(rng)
+
+ tlbr = rng.rand(num, 4).astype(np.float32)
+
+ tl_x = np.minimum(tlbr[:, 0], tlbr[:, 2])
+ tl_y = np.minimum(tlbr[:, 1], tlbr[:, 3])
+ br_x = np.maximum(tlbr[:, 0], tlbr[:, 2])
+ br_y = np.maximum(tlbr[:, 1], tlbr[:, 3])
+
+ tlbr[:, 0] = tl_x * scale
+ tlbr[:, 1] = tl_y * scale
+ tlbr[:, 2] = br_x * scale
+ tlbr[:, 3] = br_y * scale
+
+ boxes = torch.from_numpy(tlbr)
+ return boxes
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/geometry.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/geometry.py
new file mode 100644
index 000000000..ff7c5d4fa
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/geometry.py
@@ -0,0 +1,88 @@
+import torch
+
+
+def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):
+ """Calculate overlap between two set of bboxes.
+
+ If ``is_aligned`` is ``False``, then calculate the ious between each bbox
+ of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
+ bboxes1 and bboxes2.
+
+ Args:
+ bboxes1 (Tensor): shape (m, 4) in format.
+ bboxes2 (Tensor): shape (n, 4) in format.
+ If is_aligned is ``True``, then m and n must be equal.
+ mode (str): "iou" (intersection over union) or iof (intersection over
+ foreground).
+
+ Returns:
+ ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1)
+
+ Example:
+ >>> bboxes1 = torch.FloatTensor([
+ >>> [0, 0, 10, 10],
+ >>> [10, 10, 20, 20],
+ >>> [32, 32, 38, 42],
+ >>> ])
+ >>> bboxes2 = torch.FloatTensor([
+ >>> [0, 0, 10, 20],
+ >>> [0, 10, 10, 19],
+ >>> [10, 10, 20, 20],
+ >>> ])
+ >>> bbox_overlaps(bboxes1, bboxes2)
+ tensor([[0.5238, 0.0500, 0.0041],
+ [0.0323, 0.0452, 1.0000],
+ [0.0000, 0.0000, 0.0000]])
+
+ Example:
+ >>> empty = torch.FloatTensor([])
+ >>> nonempty = torch.FloatTensor([
+ >>> [0, 0, 10, 9],
+ >>> ])
+ >>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1)
+ >>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0)
+ >>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0)
+ """
+
+ assert mode in ['iou', 'iof']
+
+ rows = bboxes1.size(0)
+ cols = bboxes2.size(0)
+ if is_aligned:
+ assert rows == cols
+
+ if rows * cols == 0:
+ return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols)
+
+ if is_aligned:
+ lt = torch.max(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2]
+ rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2]
+
+ wh = (rb - lt + 1).clamp(min=0) # [rows, 2]
+ overlap = wh[:, 0] * wh[:, 1]
+ area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
+ bboxes1[:, 3] - bboxes1[:, 1] + 1)
+
+ if mode == 'iou':
+ area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
+ bboxes2[:, 3] - bboxes2[:, 1] + 1)
+ ious = overlap / (area1 + area2 - overlap)
+ else:
+ ious = overlap / area1
+ else:
+ lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2]
+ rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2]
+
+ wh = (rb - lt + 1).clamp(min=0) # [rows, cols, 2]
+ overlap = wh[:, :, 0] * wh[:, :, 1]
+ area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
+ bboxes1[:, 3] - bboxes1[:, 1] + 1)
+
+ if mode == 'iou':
+ area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
+ bboxes2[:, 3] - bboxes2[:, 1] + 1)
+ ious = overlap / (area1[:, None] + area2 - overlap)
+ else:
+ ious = overlap / (area1[:, None])
+
+ return ious
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/__init__.py
new file mode 100644
index 000000000..d709d8ecb
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/__init__.py
@@ -0,0 +1,14 @@
+from .base_sampler import BaseSampler
+from .combined_sampler import CombinedSampler
+from .instance_balanced_pos_sampler import InstanceBalancedPosSampler
+from .iou_balanced_neg_sampler import IoUBalancedNegSampler
+from .ohem_sampler import OHEMSampler
+from .pseudo_sampler import PseudoSampler
+from .random_sampler import RandomSampler
+from .sampling_result import SamplingResult
+
+__all__ = [
+ 'BaseSampler', 'PseudoSampler', 'RandomSampler',
+ 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
+ 'OHEMSampler', 'SamplingResult'
+]
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/base_sampler.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/base_sampler.py
new file mode 100644
index 000000000..f437195f6
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/base_sampler.py
@@ -0,0 +1,98 @@
+from abc import ABCMeta, abstractmethod
+
+import torch
+
+from .sampling_result import SamplingResult
+
+
+class BaseSampler(metaclass=ABCMeta):
+
+ def __init__(self,
+ num,
+ pos_fraction,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=True,
+ **kwargs):
+ self.num = num
+ self.pos_fraction = pos_fraction
+ self.neg_pos_ub = neg_pos_ub
+ self.add_gt_as_proposals = add_gt_as_proposals
+ self.pos_sampler = self
+ self.neg_sampler = self
+
+ @abstractmethod
+ def _sample_pos(self, assign_result, num_expected, **kwargs):
+ pass
+
+ @abstractmethod
+ def _sample_neg(self, assign_result, num_expected, **kwargs):
+ pass
+
+ def sample(self,
+ assign_result,
+ bboxes,
+ gt_bboxes,
+ gt_labels=None,
+ **kwargs):
+ """Sample positive and negative bboxes.
+
+ This is a simple implementation of bbox sampling given candidates,
+ assigning results and ground truth bboxes.
+
+ Args:
+ assign_result (:obj:`AssignResult`): Bbox assigning results.
+ bboxes (Tensor): Boxes to be sampled from.
+ gt_bboxes (Tensor): Ground truth bboxes.
+ gt_labels (Tensor, optional): Class labels of ground truth bboxes.
+
+ Returns:
+ :obj:`SamplingResult`: Sampling result.
+
+ Example:
+ >>> from mmdet.core.bbox import RandomSampler
+ >>> from mmdet.core.bbox import AssignResult
+ >>> from mmdet.core.bbox.demodata import ensure_rng, random_boxes
+ >>> rng = ensure_rng(None)
+ >>> assign_result = AssignResult.random(rng=rng)
+ >>> bboxes = random_boxes(assign_result.num_preds, rng=rng)
+ >>> gt_bboxes = random_boxes(assign_result.num_gts, rng=rng)
+ >>> gt_labels = None
+ >>> self = RandomSampler(num=32, pos_fraction=0.5, neg_pos_ub=-1,
+ >>> add_gt_as_proposals=False)
+ >>> self = self.sample(assign_result, bboxes, gt_bboxes, gt_labels)
+ """
+ if len(bboxes.shape) < 2:
+ bboxes = bboxes[None, :]
+
+ bboxes = bboxes[:, :4]
+
+ gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8)
+ if self.add_gt_as_proposals and len(gt_bboxes) > 0:
+ if gt_labels is None:
+ raise ValueError(
+ 'gt_labels must be given when add_gt_as_proposals is True')
+ bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
+ assign_result.add_gt_(gt_labels)
+ gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8)
+ gt_flags = torch.cat([gt_ones, gt_flags])
+
+ num_expected_pos = int(self.num * self.pos_fraction)
+ pos_inds = self.pos_sampler._sample_pos(
+ assign_result, num_expected_pos, bboxes=bboxes, **kwargs)
+ # We found that sampled indices have duplicated items occasionally.
+ # (may be a bug of PyTorch)
+ pos_inds = pos_inds.unique()
+ num_sampled_pos = pos_inds.numel()
+ num_expected_neg = self.num - num_sampled_pos
+ if self.neg_pos_ub >= 0:
+ _pos = max(1, num_sampled_pos)
+ neg_upper_bound = int(self.neg_pos_ub * _pos)
+ if num_expected_neg > neg_upper_bound:
+ num_expected_neg = neg_upper_bound
+ neg_inds = self.neg_sampler._sample_neg(
+ assign_result, num_expected_neg, bboxes=bboxes, **kwargs)
+ neg_inds = neg_inds.unique()
+
+ sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
+ assign_result, gt_flags)
+ return sampling_result
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/combined_sampler.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/combined_sampler.py
new file mode 100644
index 000000000..351a097f6
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/combined_sampler.py
@@ -0,0 +1,16 @@
+from ..assign_sampling import build_sampler
+from .base_sampler import BaseSampler
+
+
+class CombinedSampler(BaseSampler):
+
+ def __init__(self, pos_sampler, neg_sampler, **kwargs):
+ super(CombinedSampler, self).__init__(**kwargs)
+ self.pos_sampler = build_sampler(pos_sampler, **kwargs)
+ self.neg_sampler = build_sampler(neg_sampler, **kwargs)
+
+ def _sample_pos(self, **kwargs):
+ raise NotImplementedError
+
+ def _sample_neg(self, **kwargs):
+ raise NotImplementedError
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py
new file mode 100644
index 000000000..bc829a236
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py
@@ -0,0 +1,41 @@
+import numpy as np
+import torch
+
+from .random_sampler import RandomSampler
+
+
+class InstanceBalancedPosSampler(RandomSampler):
+
+ def _sample_pos(self, assign_result, num_expected, **kwargs):
+ pos_inds = torch.nonzero(assign_result.gt_inds > 0)
+ if pos_inds.numel() != 0:
+ pos_inds = pos_inds.squeeze(1)
+ if pos_inds.numel() <= num_expected:
+ return pos_inds
+ else:
+ unique_gt_inds = assign_result.gt_inds[pos_inds].unique()
+ num_gts = len(unique_gt_inds)
+ num_per_gt = int(round(num_expected / float(num_gts)) + 1)
+ sampled_inds = []
+ for i in unique_gt_inds:
+ inds = torch.nonzero(assign_result.gt_inds == i.item())
+ if inds.numel() != 0:
+ inds = inds.squeeze(1)
+ else:
+ continue
+ if len(inds) > num_per_gt:
+ inds = self.random_choice(inds, num_per_gt)
+ sampled_inds.append(inds)
+ sampled_inds = torch.cat(sampled_inds)
+ if len(sampled_inds) < num_expected:
+ num_extra = num_expected - len(sampled_inds)
+ extra_inds = np.array(
+ list(set(pos_inds.cpu()) - set(sampled_inds.cpu())))
+ if len(extra_inds) > num_extra:
+ extra_inds = self.random_choice(extra_inds, num_extra)
+ extra_inds = torch.from_numpy(extra_inds).to(
+ assign_result.gt_inds.device).long()
+ sampled_inds = torch.cat([sampled_inds, extra_inds])
+ elif len(sampled_inds) > num_expected:
+ sampled_inds = self.random_choice(sampled_inds, num_expected)
+ return sampled_inds
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/iou_balanced_neg_sampler.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/iou_balanced_neg_sampler.py
new file mode 100644
index 000000000..d9239e070
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/iou_balanced_neg_sampler.py
@@ -0,0 +1,135 @@
+import numpy as np
+import torch
+
+from .random_sampler import RandomSampler
+
+
+class IoUBalancedNegSampler(RandomSampler):
+ """IoU Balanced Sampling
+
+ arXiv: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019)
+
+ Sampling proposals according to their IoU. `floor_fraction` of needed RoIs
+ are sampled from proposals whose IoU are lower than `floor_thr` randomly.
+ The others are sampled from proposals whose IoU are higher than
+ `floor_thr`. These proposals are sampled from some bins evenly, which are
+ split by `num_bins` via IoU evenly.
+
+ Args:
+ num (int): number of proposals.
+ pos_fraction (float): fraction of positive proposals.
+ floor_thr (float): threshold (minimum) IoU for IoU balanced sampling,
+ set to -1 if all using IoU balanced sampling.
+ floor_fraction (float): sampling fraction of proposals under floor_thr.
+ num_bins (int): number of bins in IoU balanced sampling.
+ """
+
+ def __init__(self,
+ num,
+ pos_fraction,
+ floor_thr=-1,
+ floor_fraction=0,
+ num_bins=3,
+ **kwargs):
+ super(IoUBalancedNegSampler, self).__init__(num, pos_fraction,
+ **kwargs)
+ assert floor_thr >= 0 or floor_thr == -1
+ assert 0 <= floor_fraction <= 1
+ assert num_bins >= 1
+
+ self.floor_thr = floor_thr
+ self.floor_fraction = floor_fraction
+ self.num_bins = num_bins
+
+ def sample_via_interval(self, max_overlaps, full_set, num_expected):
+ max_iou = max_overlaps.max()
+ iou_interval = (max_iou - self.floor_thr) / self.num_bins
+ per_num_expected = int(num_expected / self.num_bins)
+
+ sampled_inds = []
+ for i in range(self.num_bins):
+ start_iou = self.floor_thr + i * iou_interval
+ end_iou = self.floor_thr + (i + 1) * iou_interval
+ tmp_set = set(
+ np.where(
+ np.logical_and(max_overlaps >= start_iou,
+ max_overlaps < end_iou))[0])
+ tmp_inds = list(tmp_set & full_set)
+ if len(tmp_inds) > per_num_expected:
+ tmp_sampled_set = self.random_choice(tmp_inds,
+ per_num_expected)
+ else:
+ tmp_sampled_set = np.array(tmp_inds, dtype=np.int)
+ sampled_inds.append(tmp_sampled_set)
+
+ sampled_inds = np.concatenate(sampled_inds)
+ if len(sampled_inds) < num_expected:
+ num_extra = num_expected - len(sampled_inds)
+ extra_inds = np.array(list(full_set - set(sampled_inds)))
+ if len(extra_inds) > num_extra:
+ extra_inds = self.random_choice(extra_inds, num_extra)
+ sampled_inds = np.concatenate([sampled_inds, extra_inds])
+
+ return sampled_inds
+
+ def _sample_neg(self, assign_result, num_expected, **kwargs):
+ neg_inds = torch.nonzero(assign_result.gt_inds == 0)
+ if neg_inds.numel() != 0:
+ neg_inds = neg_inds.squeeze(1)
+ if len(neg_inds) <= num_expected:
+ return neg_inds
+ else:
+ max_overlaps = assign_result.max_overlaps.cpu().numpy()
+ # balance sampling for negative samples
+ neg_set = set(neg_inds.cpu().numpy())
+
+ if self.floor_thr > 0:
+ floor_set = set(
+ np.where(
+ np.logical_and(max_overlaps >= 0,
+ max_overlaps < self.floor_thr))[0])
+ iou_sampling_set = set(
+ np.where(max_overlaps >= self.floor_thr)[0])
+ elif self.floor_thr == 0:
+ floor_set = set(np.where(max_overlaps == 0)[0])
+ iou_sampling_set = set(
+ np.where(max_overlaps > self.floor_thr)[0])
+ else:
+ floor_set = set()
+ iou_sampling_set = set(
+ np.where(max_overlaps > self.floor_thr)[0])
+ # for sampling interval calculation
+ self.floor_thr = 0
+
+ floor_neg_inds = list(floor_set & neg_set)
+ iou_sampling_neg_inds = list(iou_sampling_set & neg_set)
+ num_expected_iou_sampling = int(num_expected *
+ (1 - self.floor_fraction))
+ if len(iou_sampling_neg_inds) > num_expected_iou_sampling:
+ if self.num_bins >= 2:
+ iou_sampled_inds = self.sample_via_interval(
+ max_overlaps, set(iou_sampling_neg_inds),
+ num_expected_iou_sampling)
+ else:
+ iou_sampled_inds = self.random_choice(
+ iou_sampling_neg_inds, num_expected_iou_sampling)
+ else:
+ iou_sampled_inds = np.array(
+ iou_sampling_neg_inds, dtype=np.int)
+ num_expected_floor = num_expected - len(iou_sampled_inds)
+ if len(floor_neg_inds) > num_expected_floor:
+ sampled_floor_inds = self.random_choice(
+ floor_neg_inds, num_expected_floor)
+ else:
+ sampled_floor_inds = np.array(floor_neg_inds, dtype=np.int)
+ sampled_inds = np.concatenate(
+ (sampled_floor_inds, iou_sampled_inds))
+ if len(sampled_inds) < num_expected:
+ num_extra = num_expected - len(sampled_inds)
+ extra_inds = np.array(list(neg_set - set(sampled_inds)))
+ if len(extra_inds) > num_extra:
+ extra_inds = self.random_choice(extra_inds, num_extra)
+ sampled_inds = np.concatenate((sampled_inds, extra_inds))
+ sampled_inds = torch.from_numpy(sampled_inds).long().to(
+ assign_result.gt_inds.device)
+ return sampled_inds
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/ohem_sampler.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/ohem_sampler.py
new file mode 100644
index 000000000..3701d83ac
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/ohem_sampler.py
@@ -0,0 +1,79 @@
+import torch
+
+from ..transforms import bbox2roi
+from .base_sampler import BaseSampler
+
+
+class OHEMSampler(BaseSampler):
+ """
+ Online Hard Example Mining Sampler described in [1]_.
+
+ References:
+ .. [1] https://arxiv.org/pdf/1604.03540.pdf
+ """
+
+ def __init__(self,
+ num,
+ pos_fraction,
+ context,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=True,
+ **kwargs):
+ super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub,
+ add_gt_as_proposals)
+ if not hasattr(context, 'num_stages'):
+ self.bbox_roi_extractor = context.bbox_roi_extractor
+ self.bbox_head = context.bbox_head
+ else:
+ self.bbox_roi_extractor = context.bbox_roi_extractor[
+ context.current_stage]
+ self.bbox_head = context.bbox_head[context.current_stage]
+
+ def hard_mining(self, inds, num_expected, bboxes, labels, feats):
+ with torch.no_grad():
+ rois = bbox2roi([bboxes])
+ bbox_feats = self.bbox_roi_extractor(
+ feats[:self.bbox_roi_extractor.num_inputs], rois)
+ cls_score, _ = self.bbox_head(bbox_feats)
+ loss = self.bbox_head.loss(
+ cls_score=cls_score,
+ bbox_pred=None,
+ labels=labels,
+ label_weights=cls_score.new_ones(cls_score.size(0)),
+ bbox_targets=None,
+ bbox_weights=None,
+ reduction_override='none')['loss_cls']
+ _, topk_loss_inds = loss.topk(num_expected)
+ return inds[topk_loss_inds]
+
+ def _sample_pos(self,
+ assign_result,
+ num_expected,
+ bboxes=None,
+ feats=None,
+ **kwargs):
+ # Sample some hard positive samples
+ pos_inds = torch.nonzero(assign_result.gt_inds > 0)
+ if pos_inds.numel() != 0:
+ pos_inds = pos_inds.squeeze(1)
+ if pos_inds.numel() <= num_expected:
+ return pos_inds
+ else:
+ return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds],
+ assign_result.labels[pos_inds], feats)
+
+ def _sample_neg(self,
+ assign_result,
+ num_expected,
+ bboxes=None,
+ feats=None,
+ **kwargs):
+ # Sample some hard negative samples
+ neg_inds = torch.nonzero(assign_result.gt_inds == 0)
+ if neg_inds.numel() != 0:
+ neg_inds = neg_inds.squeeze(1)
+ if len(neg_inds) <= num_expected:
+ return neg_inds
+ else:
+ return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds],
+ assign_result.labels[neg_inds], feats)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/pseudo_sampler.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/pseudo_sampler.py
new file mode 100644
index 000000000..b4c2ea09b
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/pseudo_sampler.py
@@ -0,0 +1,26 @@
+import torch
+
+from .base_sampler import BaseSampler
+from .sampling_result import SamplingResult
+
+
+class PseudoSampler(BaseSampler):
+
+ def __init__(self, **kwargs):
+ pass
+
+ def _sample_pos(self, **kwargs):
+ raise NotImplementedError
+
+ def _sample_neg(self, **kwargs):
+ raise NotImplementedError
+
+ def sample(self, assign_result, bboxes, gt_bboxes, **kwargs):
+ pos_inds = torch.nonzero(
+ assign_result.gt_inds > 0).squeeze(-1).unique()
+ neg_inds = torch.nonzero(
+ assign_result.gt_inds == 0).squeeze(-1).unique()
+ gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8)
+ sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
+ assign_result, gt_flags)
+ return sampling_result
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/random_sampler.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/random_sampler.py
new file mode 100644
index 000000000..3db00bab0
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/random_sampler.py
@@ -0,0 +1,54 @@
+import numpy as np
+import torch
+
+from .base_sampler import BaseSampler
+
+
+class RandomSampler(BaseSampler):
+
+ def __init__(self,
+ num,
+ pos_fraction,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=True,
+ **kwargs):
+ from mmdet.core.bbox import demodata
+ super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub,
+ add_gt_as_proposals)
+ self.rng = demodata.ensure_rng(kwargs.get('rng', None))
+
+ def random_choice(self, gallery, num):
+ """Random select some elements from the gallery.
+
+ It seems that Pytorch's implementation is slower than numpy so we use
+ numpy to randperm the indices.
+ """
+ assert len(gallery) >= num
+ if isinstance(gallery, list):
+ gallery = np.array(gallery)
+ cands = np.arange(len(gallery))
+ self.rng.shuffle(cands)
+ rand_inds = cands[:num]
+ if not isinstance(gallery, np.ndarray):
+ rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device)
+ return gallery[rand_inds]
+
+ def _sample_pos(self, assign_result, num_expected, **kwargs):
+ """Randomly sample some positive samples."""
+ pos_inds = torch.nonzero(assign_result.gt_inds > 0)
+ if pos_inds.numel() != 0:
+ pos_inds = pos_inds.squeeze(1)
+ if pos_inds.numel() <= num_expected:
+ return pos_inds
+ else:
+ return self.random_choice(pos_inds, num_expected)
+
+ def _sample_neg(self, assign_result, num_expected, **kwargs):
+ """Randomly sample some negative samples."""
+ neg_inds = torch.nonzero(assign_result.gt_inds == 0)
+ if neg_inds.numel() != 0:
+ neg_inds = neg_inds.squeeze(1)
+ if len(neg_inds) <= num_expected:
+ return neg_inds
+ else:
+ return self.random_choice(neg_inds, num_expected)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/sampling_result.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/sampling_result.py
new file mode 100644
index 000000000..dcf25eecd
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/sampling_result.py
@@ -0,0 +1,154 @@
+import torch
+
+from mmdet.utils import util_mixins
+
+
+class SamplingResult(util_mixins.NiceRepr):
+ """
+ Example:
+ >>> # xdoctest: +IGNORE_WANT
+ >>> from mmdet.core.bbox.samplers.sampling_result import * # NOQA
+ >>> self = SamplingResult.random(rng=10)
+ >>> print('self = {}'.format(self))
+ self =
+ """
+
+ def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result,
+ gt_flags):
+ self.pos_inds = pos_inds
+ self.neg_inds = neg_inds
+ self.pos_bboxes = bboxes[pos_inds]
+ self.neg_bboxes = bboxes[neg_inds]
+ self.pos_is_gt = gt_flags[pos_inds]
+
+ self.num_gts = gt_bboxes.shape[0]
+ self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1
+
+ if gt_bboxes.numel() == 0:
+ # hack for index error case
+ assert self.pos_assigned_gt_inds.numel() == 0
+ self.pos_gt_bboxes = torch.empty_like(gt_bboxes).view(-1, 4)
+ else:
+ if len(gt_bboxes.shape) < 2:
+ gt_bboxes = gt_bboxes.view(-1, 4)
+
+ self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :]
+
+ if assign_result.labels is not None:
+ self.pos_gt_labels = assign_result.labels[pos_inds]
+ else:
+ self.pos_gt_labels = None
+
+ @property
+ def bboxes(self):
+ return torch.cat([self.pos_bboxes, self.neg_bboxes])
+
+ def to(self, device):
+ """
+ Change the device of the data inplace.
+
+ Example:
+ >>> self = SamplingResult.random()
+ >>> print('self = {}'.format(self.to(None)))
+ >>> # xdoctest: +REQUIRES(--gpu)
+ >>> print('self = {}'.format(self.to(0)))
+ """
+ _dict = self.__dict__
+ for key, value in _dict.items():
+ if isinstance(value, torch.Tensor):
+ _dict[key] = value.to(device)
+ return self
+
+ def __nice__(self):
+ data = self.info.copy()
+ data['pos_bboxes'] = data.pop('pos_bboxes').shape
+ data['neg_bboxes'] = data.pop('neg_bboxes').shape
+ parts = ['\'{}\': {!r}'.format(k, v) for k, v in sorted(data.items())]
+ body = ' ' + ',\n '.join(parts)
+ return '{\n' + body + '\n}'
+
+ @property
+ def info(self):
+ """
+ Returns a dictionary of info about the object
+ """
+ return {
+ 'pos_inds': self.pos_inds,
+ 'neg_inds': self.neg_inds,
+ 'pos_bboxes': self.pos_bboxes,
+ 'neg_bboxes': self.neg_bboxes,
+ 'pos_is_gt': self.pos_is_gt,
+ 'num_gts': self.num_gts,
+ 'pos_assigned_gt_inds': self.pos_assigned_gt_inds,
+ }
+
+ @classmethod
+ def random(cls, rng=None, **kwargs):
+ """
+ Args:
+ rng (None | int | numpy.random.RandomState): seed or state
+
+ Kwargs:
+ num_preds: number of predicted boxes
+ num_gts: number of true boxes
+ p_ignore (float): probability of a predicted box assinged to an
+ ignored truth
+ p_assigned (float): probability of a predicted box not being
+ assigned
+ p_use_label (float | bool): with labels or not
+
+ Returns:
+ AssignResult :
+
+ Example:
+ >>> from mmdet.core.bbox.samplers.sampling_result import * # NOQA
+ >>> self = SamplingResult.random()
+ >>> print(self.__dict__)
+ """
+ from mmdet.core.bbox.samplers.random_sampler import RandomSampler
+ from mmdet.core.bbox.assigners.assign_result import AssignResult
+ from mmdet.core.bbox import demodata
+ rng = demodata.ensure_rng(rng)
+
+ # make probabalistic?
+ num = 32
+ pos_fraction = 0.5
+ neg_pos_ub = -1
+
+ assign_result = AssignResult.random(rng=rng, **kwargs)
+
+ # Note we could just compute an assignment
+ bboxes = demodata.random_boxes(assign_result.num_preds, rng=rng)
+ gt_bboxes = demodata.random_boxes(assign_result.num_gts, rng=rng)
+
+ if rng.rand() > 0.2:
+ # sometimes algorithms squeeze their data, be robust to that
+ gt_bboxes = gt_bboxes.squeeze()
+ bboxes = bboxes.squeeze()
+
+ if assign_result.labels is None:
+ gt_labels = None
+ else:
+ gt_labels = None # todo
+
+ if gt_labels is None:
+ add_gt_as_proposals = False
+ else:
+ add_gt_as_proposals = True # make probabalistic?
+
+ sampler = RandomSampler(
+ num,
+ pos_fraction,
+ neg_pos_ubo=neg_pos_ub,
+ add_gt_as_proposals=add_gt_as_proposals,
+ rng=rng)
+ self = sampler.sample(assign_result, bboxes, gt_bboxes, gt_labels)
+ return self
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/transforms.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/transforms.py
new file mode 100644
index 000000000..b9d1e6605
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/transforms.py
@@ -0,0 +1,223 @@
+import mmcv
+import numpy as np
+import torch
+
+
+def bbox2delta(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]):
+ assert proposals.size() == gt.size()
+
+ proposals = proposals.float()
+ gt = gt.float()
+ px = (proposals[..., 0] + proposals[..., 2]) * 0.5
+ py = (proposals[..., 1] + proposals[..., 3]) * 0.5
+ pw = proposals[..., 2] - proposals[..., 0] + 1.0
+ ph = proposals[..., 3] - proposals[..., 1] + 1.0
+
+ gx = (gt[..., 0] + gt[..., 2]) * 0.5
+ gy = (gt[..., 1] + gt[..., 3]) * 0.5
+ gw = gt[..., 2] - gt[..., 0] + 1.0
+ gh = gt[..., 3] - gt[..., 1] + 1.0
+
+ dx = (gx - px) / pw
+ dy = (gy - py) / ph
+ dw = torch.log(gw / pw)
+ dh = torch.log(gh / ph)
+ deltas = torch.stack([dx, dy, dw, dh], dim=-1)
+
+ means = deltas.new_tensor(means).unsqueeze(0)
+ stds = deltas.new_tensor(stds).unsqueeze(0)
+ deltas = deltas.sub_(means).div_(stds)
+
+ return deltas
+
+
+def delta2bbox(rois,
+ deltas,
+ means=[0, 0, 0, 0],
+ stds=[1, 1, 1, 1],
+ max_shape=None,
+ wh_ratio_clip=16 / 1000):
+ """
+ Apply deltas to shift/scale base boxes.
+
+ Typically the rois are anchor or proposed bounding boxes and the deltas are
+ network outputs used to shift/scale those boxes.
+
+ Args:
+ rois (Tensor): boxes to be transformed. Has shape (N, 4)
+ deltas (Tensor): encoded offsets with respect to each roi.
+ Has shape (N, 4). Note N = num_anchors * W * H when rois is a grid
+ of anchors. Offset encoding follows [1]_.
+ means (list): denormalizing means for delta coordinates
+ stds (list): denormalizing standard deviation for delta coordinates
+ max_shape (tuple[int, int]): maximum bounds for boxes. specifies (H, W)
+ wh_ratio_clip (float): maximum aspect ratio for boxes.
+
+ Returns:
+ Tensor: boxes with shape (N, 4), where columns represent
+ tl_x, tl_y, br_x, br_y.
+
+ References:
+ .. [1] https://arxiv.org/abs/1311.2524
+
+ Example:
+ >>> rois = torch.Tensor([[ 0., 0., 1., 1.],
+ >>> [ 0., 0., 1., 1.],
+ >>> [ 0., 0., 1., 1.],
+ >>> [ 5., 5., 5., 5.]])
+ >>> deltas = torch.Tensor([[ 0., 0., 0., 0.],
+ >>> [ 1., 1., 1., 1.],
+ >>> [ 0., 0., 2., -1.],
+ >>> [ 0.7, -1.9, -0.5, 0.3]])
+ >>> delta2bbox(rois, deltas, max_shape=(32, 32))
+ tensor([[0.0000, 0.0000, 1.0000, 1.0000],
+ [0.2817, 0.2817, 4.7183, 4.7183],
+ [0.0000, 0.6321, 7.3891, 0.3679],
+ [5.8967, 2.9251, 5.5033, 3.2749]])
+ """
+ means = deltas.new_tensor(means).repeat(1, deltas.size(1) // 4)
+ stds = deltas.new_tensor(stds).repeat(1, deltas.size(1) // 4)
+ denorm_deltas = deltas * stds + means
+ dx = denorm_deltas[:, 0::4]
+ dy = denorm_deltas[:, 1::4]
+ dw = denorm_deltas[:, 2::4]
+ dh = denorm_deltas[:, 3::4]
+ max_ratio = np.abs(np.log(wh_ratio_clip))
+ dw = dw.clamp(min=-max_ratio, max=max_ratio)
+ dh = dh.clamp(min=-max_ratio, max=max_ratio)
+ # Compute center of each roi
+ px = ((rois[:, 0] + rois[:, 2]) * 0.5).unsqueeze(1).expand_as(dx)
+ py = ((rois[:, 1] + rois[:, 3]) * 0.5).unsqueeze(1).expand_as(dy)
+ # Compute width/height of each roi
+ pw = (rois[:, 2] - rois[:, 0] + 1.0).unsqueeze(1).expand_as(dw)
+ ph = (rois[:, 3] - rois[:, 1] + 1.0).unsqueeze(1).expand_as(dh)
+ # Use exp(network energy) to enlarge/shrink each roi
+ gw = pw * dw.exp()
+ gh = ph * dh.exp()
+ # Use network energy to shift the center of each roi
+ gx = torch.addcmul(px, 1, pw, dx) # gx = px + pw * dx
+ gy = torch.addcmul(py, 1, ph, dy) # gy = py + ph * dy
+ # Convert center-xy/width/height to top-left, bottom-right
+ x1 = gx - gw * 0.5 + 0.5
+ y1 = gy - gh * 0.5 + 0.5
+ x2 = gx + gw * 0.5 - 0.5
+ y2 = gy + gh * 0.5 - 0.5
+ if max_shape is not None:
+ x1 = x1.clamp(min=0, max=max_shape[1] - 1)
+ y1 = y1.clamp(min=0, max=max_shape[0] - 1)
+ x2 = x2.clamp(min=0, max=max_shape[1] - 1)
+ y2 = y2.clamp(min=0, max=max_shape[0] - 1)
+ bboxes = torch.stack([x1, y1, x2, y2], dim=-1).view_as(deltas)
+ return bboxes
+
+
+def bbox_flip(bboxes, img_shape):
+ """Flip bboxes horizontally.
+
+ Args:
+ bboxes(Tensor or ndarray): Shape (..., 4*k)
+ img_shape(tuple): Image shape.
+
+ Returns:
+ Same type as `bboxes`: Flipped bboxes.
+ """
+ if isinstance(bboxes, torch.Tensor):
+ assert bboxes.shape[-1] % 4 == 0
+ flipped = bboxes.clone()
+ flipped[:, 0::4] = img_shape[1] - bboxes[:, 2::4] - 1
+ flipped[:, 2::4] = img_shape[1] - bboxes[:, 0::4] - 1
+ return flipped
+ elif isinstance(bboxes, np.ndarray):
+ return mmcv.bbox_flip(bboxes, img_shape)
+
+
+def bbox_mapping(bboxes, img_shape, scale_factor, flip):
+ """Map bboxes from the original image scale to testing scale"""
+ new_bboxes = bboxes * scale_factor
+ if flip:
+ new_bboxes = bbox_flip(new_bboxes, img_shape)
+ return new_bboxes
+
+
+def bbox_mapping_back(bboxes, img_shape, scale_factor, flip):
+ """Map bboxes from testing scale to original image scale"""
+ new_bboxes = bbox_flip(bboxes, img_shape) if flip else bboxes
+ new_bboxes = new_bboxes / scale_factor
+ return new_bboxes
+
+
+def bbox2roi(bbox_list):
+ """Convert a list of bboxes to roi format.
+
+ Args:
+ bbox_list (list[Tensor]): a list of bboxes corresponding to a batch
+ of images.
+
+ Returns:
+ Tensor: shape (n, 5), [batch_ind, x1, y1, x2, y2]
+ """
+ rois_list = []
+ for img_id, bboxes in enumerate(bbox_list):
+ if bboxes.size(0) > 0:
+ img_inds = bboxes.new_full((bboxes.size(0), 1), img_id)
+ rois = torch.cat([img_inds, bboxes[:, :4]], dim=-1)
+ else:
+ rois = bboxes.new_zeros((0, 5))
+ rois_list.append(rois)
+ rois = torch.cat(rois_list, 0)
+ return rois
+
+
+def roi2bbox(rois):
+ bbox_list = []
+ img_ids = torch.unique(rois[:, 0].cpu(), sorted=True)
+ for img_id in img_ids:
+ inds = (rois[:, 0] == img_id.item())
+ bbox = rois[inds, 1:]
+ bbox_list.append(bbox)
+ return bbox_list
+
+
+def bbox2result(bboxes, labels, num_classes):
+ """Convert detection results to a list of numpy arrays.
+
+ Args:
+ bboxes (Tensor): shape (n, 5)
+ labels (Tensor): shape (n, )
+ num_classes (int): class number, including background class
+
+ Returns:
+ list(ndarray): bbox results of each class
+ """
+ if bboxes.shape[0] == 0:
+ return [
+ np.zeros((0, 5), dtype=np.float32) for i in range(num_classes - 1)
+ ]
+ else:
+ bboxes = bboxes.cpu().numpy()
+ labels = labels.cpu().numpy()
+ return [bboxes[labels == i, :] for i in range(num_classes - 1)]
+
+
+def distance2bbox(points, distance, max_shape=None):
+ """Decode distance prediction to bounding box.
+
+ Args:
+ points (Tensor): Shape (n, 2), [x, y].
+ distance (Tensor): Distance from the given point to 4
+ boundaries (left, top, right, bottom).
+ max_shape (tuple): Shape of the image.
+
+ Returns:
+ Tensor: Decoded bboxes.
+ """
+ x1 = points[:, 0] - distance[:, 0]
+ y1 = points[:, 1] - distance[:, 1]
+ x2 = points[:, 0] + distance[:, 2]
+ y2 = points[:, 1] + distance[:, 3]
+ if max_shape is not None:
+ x1 = x1.clamp(min=0, max=max_shape[1] - 1)
+ y1 = y1.clamp(min=0, max=max_shape[0] - 1)
+ x2 = x2.clamp(min=0, max=max_shape[1] - 1)
+ y2 = y2.clamp(min=0, max=max_shape[0] - 1)
+ return torch.stack([x1, y1, x2, y2], -1)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/__init__.py
new file mode 100644
index 000000000..2e59f020c
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/__init__.py
@@ -0,0 +1,18 @@
+from .class_names import (coco_classes, dataset_aliases, get_classes,
+ imagenet_det_classes, imagenet_vid_classes,
+ voc_classes)
+from .coco_utils import coco_eval, fast_eval_recall, results2json, results2json_segm
+from .eval_hooks import (CocoDistEvalmAPHook, CocoDistEvalRecallHook,
+ DistEvalHook, DistEvalmAPHook)
+from .mean_ap import average_precision, eval_map, print_map_summary
+from .recall import (eval_recalls, plot_iou_recall, plot_num_recall,
+ print_recall_summary)
+
+__all__ = [
+ 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes',
+ 'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval',
+ 'fast_eval_recall', 'results2json', 'DistEvalHook', 'DistEvalmAPHook',
+ 'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision',
+ 'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary',
+ 'plot_num_recall', 'plot_iou_recall', 'results2json_segm'
+]
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/bbox_overlaps.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/bbox_overlaps.py
new file mode 100644
index 000000000..ad4c70523
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/bbox_overlaps.py
@@ -0,0 +1,49 @@
+import numpy as np
+
+
+def bbox_overlaps(bboxes1, bboxes2, mode='iou'):
+ """Calculate the ious between each bbox of bboxes1 and bboxes2.
+
+ Args:
+ bboxes1(ndarray): shape (n, 4)
+ bboxes2(ndarray): shape (k, 4)
+ mode(str): iou (intersection over union) or iof (intersection
+ over foreground)
+
+ Returns:
+ ious(ndarray): shape (n, k)
+ """
+
+ assert mode in ['iou', 'iof']
+
+ bboxes1 = bboxes1.astype(np.float32)
+ bboxes2 = bboxes2.astype(np.float32)
+ rows = bboxes1.shape[0]
+ cols = bboxes2.shape[0]
+ ious = np.zeros((rows, cols), dtype=np.float32)
+ if rows * cols == 0:
+ return ious
+ exchange = False
+ if bboxes1.shape[0] > bboxes2.shape[0]:
+ bboxes1, bboxes2 = bboxes2, bboxes1
+ ious = np.zeros((cols, rows), dtype=np.float32)
+ exchange = True
+ area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
+ bboxes1[:, 3] - bboxes1[:, 1] + 1)
+ area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
+ bboxes2[:, 3] - bboxes2[:, 1] + 1)
+ for i in range(bboxes1.shape[0]):
+ x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0])
+ y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1])
+ x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2])
+ y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3])
+ overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum(
+ y_end - y_start + 1, 0)
+ if mode == 'iou':
+ union = area1[i] + area2 - overlap
+ else:
+ union = area1[i] if not exchange else area2
+ ious[i, :] = overlap / union
+ if exchange:
+ ious = ious.T
+ return ious
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/class_names.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/class_names.py
new file mode 100644
index 000000000..784277345
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/class_names.py
@@ -0,0 +1,116 @@
+import mmcv
+
+
+def wider_face_classes():
+ return ['face']
+
+
+def voc_classes():
+ return [
+ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat',
+ 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person',
+ 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
+ ]
+
+
+def imagenet_det_classes():
+ return [
+ 'accordion', 'airplane', 'ant', 'antelope', 'apple', 'armadillo',
+ 'artichoke', 'axe', 'baby_bed', 'backpack', 'bagel', 'balance_beam',
+ 'banana', 'band_aid', 'banjo', 'baseball', 'basketball', 'bathing_cap',
+ 'beaker', 'bear', 'bee', 'bell_pepper', 'bench', 'bicycle', 'binder',
+ 'bird', 'bookshelf', 'bow_tie', 'bow', 'bowl', 'brassiere', 'burrito',
+ 'bus', 'butterfly', 'camel', 'can_opener', 'car', 'cart', 'cattle',
+ 'cello', 'centipede', 'chain_saw', 'chair', 'chime', 'cocktail_shaker',
+ 'coffee_maker', 'computer_keyboard', 'computer_mouse', 'corkscrew',
+ 'cream', 'croquet_ball', 'crutch', 'cucumber', 'cup_or_mug', 'diaper',
+ 'digital_clock', 'dishwasher', 'dog', 'domestic_cat', 'dragonfly',
+ 'drum', 'dumbbell', 'electric_fan', 'elephant', 'face_powder', 'fig',
+ 'filing_cabinet', 'flower_pot', 'flute', 'fox', 'french_horn', 'frog',
+ 'frying_pan', 'giant_panda', 'goldfish', 'golf_ball', 'golfcart',
+ 'guacamole', 'guitar', 'hair_dryer', 'hair_spray', 'hamburger',
+ 'hammer', 'hamster', 'harmonica', 'harp', 'hat_with_a_wide_brim',
+ 'head_cabbage', 'helmet', 'hippopotamus', 'horizontal_bar', 'horse',
+ 'hotdog', 'iPod', 'isopod', 'jellyfish', 'koala_bear', 'ladle',
+ 'ladybug', 'lamp', 'laptop', 'lemon', 'lion', 'lipstick', 'lizard',
+ 'lobster', 'maillot', 'maraca', 'microphone', 'microwave', 'milk_can',
+ 'miniskirt', 'monkey', 'motorcycle', 'mushroom', 'nail', 'neck_brace',
+ 'oboe', 'orange', 'otter', 'pencil_box', 'pencil_sharpener', 'perfume',
+ 'person', 'piano', 'pineapple', 'ping-pong_ball', 'pitcher', 'pizza',
+ 'plastic_bag', 'plate_rack', 'pomegranate', 'popsicle', 'porcupine',
+ 'power_drill', 'pretzel', 'printer', 'puck', 'punching_bag', 'purse',
+ 'rabbit', 'racket', 'ray', 'red_panda', 'refrigerator',
+ 'remote_control', 'rubber_eraser', 'rugby_ball', 'ruler',
+ 'salt_or_pepper_shaker', 'saxophone', 'scorpion', 'screwdriver',
+ 'seal', 'sheep', 'ski', 'skunk', 'snail', 'snake', 'snowmobile',
+ 'snowplow', 'soap_dispenser', 'soccer_ball', 'sofa', 'spatula',
+ 'squirrel', 'starfish', 'stethoscope', 'stove', 'strainer',
+ 'strawberry', 'stretcher', 'sunglasses', 'swimming_trunks', 'swine',
+ 'syringe', 'table', 'tape_player', 'tennis_ball', 'tick', 'tie',
+ 'tiger', 'toaster', 'traffic_light', 'train', 'trombone', 'trumpet',
+ 'turtle', 'tv_or_monitor', 'unicycle', 'vacuum', 'violin',
+ 'volleyball', 'waffle_iron', 'washer', 'water_bottle', 'watercraft',
+ 'whale', 'wine_bottle', 'zebra'
+ ]
+
+
+def imagenet_vid_classes():
+ return [
+ 'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car',
+ 'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda',
+ 'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit',
+ 'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle',
+ 'watercraft', 'whale', 'zebra'
+ ]
+
+
+def coco_classes():
+ return [
+ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
+ 'truck', 'boat', 'traffic_light', 'fire_hydrant', 'stop_sign',
+ 'parking_meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
+ 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
+ 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
+ 'sports_ball', 'kite', 'baseball_bat', 'baseball_glove', 'skateboard',
+ 'surfboard', 'tennis_racket', 'bottle', 'wine_glass', 'cup', 'fork',
+ 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
+ 'broccoli', 'carrot', 'hot_dog', 'pizza', 'donut', 'cake', 'chair',
+ 'couch', 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv',
+ 'laptop', 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
+ 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
+ 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush'
+ ]
+
+
+def cityscapes_classes():
+ return [
+ 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',
+ 'bicycle'
+ ]
+
+
+dataset_aliases = {
+ 'voc': ['voc', 'pascal_voc', 'voc07', 'voc12'],
+ 'imagenet_det': ['det', 'imagenet_det', 'ilsvrc_det'],
+ 'imagenet_vid': ['vid', 'imagenet_vid', 'ilsvrc_vid'],
+ 'coco': ['coco', 'mscoco', 'ms_coco'],
+ 'wider_face': ['WIDERFaceDataset', 'wider_face', 'WDIERFace'],
+ 'cityscapes': ['cityscapes']
+}
+
+
+def get_classes(dataset):
+ """Get class names of a dataset."""
+ alias2name = {}
+ for name, aliases in dataset_aliases.items():
+ for alias in aliases:
+ alias2name[alias] = name
+
+ if mmcv.is_str(dataset):
+ if dataset in alias2name:
+ labels = eval(alias2name[dataset] + '_classes()')
+ else:
+ raise ValueError('Unrecognized dataset: {}'.format(dataset))
+ else:
+ raise TypeError('dataset must a str, but got {}'.format(type(dataset)))
+ return labels
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/coco_utils.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/coco_utils.py
new file mode 100644
index 000000000..d57ca4d19
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/coco_utils.py
@@ -0,0 +1,250 @@
+import itertools
+
+import mmcv
+import numpy as np
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+from terminaltables import AsciiTable
+
+from .recall import eval_recalls
+
+
+def coco_eval(result_files,
+ result_types,
+ coco,
+ max_dets=(100, 300, 1000),
+ classwise=False):
+ for res_type in result_types:
+ assert res_type in [
+ 'proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'
+ ]
+
+ if mmcv.is_str(coco):
+ coco = COCO(coco)
+ assert isinstance(coco, COCO)
+
+ if result_types == ['proposal_fast']:
+ ar = fast_eval_recall(result_files, coco, np.array(max_dets))
+ for i, num in enumerate(max_dets):
+ print('AR@{}\t= {:.4f}'.format(num, ar[i]))
+ return
+
+ for res_type in result_types:
+ if isinstance(result_files, str):
+ result_file = result_files
+ elif isinstance(result_files, dict):
+ result_file = result_files[res_type]
+ else:
+ assert TypeError('result_files must be a str or dict')
+ assert result_file.endswith('.json')
+
+ coco_dets = coco.loadRes(result_file)
+ img_ids = coco.getImgIds()
+ iou_type = 'bbox' if res_type == 'proposal' else res_type
+ cocoEval = COCOeval(coco, coco_dets, iou_type)
+ cocoEval.params.imgIds = img_ids
+ if res_type == 'proposal':
+ cocoEval.params.useCats = 0
+ cocoEval.params.maxDets = list(max_dets)
+ cocoEval.evaluate()
+ cocoEval.accumulate()
+ cocoEval.summarize()
+
+ if classwise:
+ # Compute per-category AP
+ # from https://github.com/facebookresearch/detectron2/blob/03064eb5bafe4a3e5750cc7a16672daf5afe8435/detectron2/evaluation/coco_evaluation.py#L259-L283 # noqa
+ precisions = cocoEval.eval['precision']
+ catIds = coco.getCatIds()
+ # precision has dims (iou, recall, cls, area range, max dets)
+ assert len(catIds) == precisions.shape[2]
+
+ results_per_category = []
+ for idx, catId in enumerate(catIds):
+ # area range index 0: all area ranges
+ # max dets index -1: typically 100 per image
+ nm = coco.loadCats(catId)[0]
+ precision = precisions[:, :, idx, 0, -1]
+ precision = precision[precision > -1]
+ ap = np.mean(precision) if precision.size else float('nan')
+ results_per_category.append(
+ ('{}'.format(nm['name']),
+ '{:0.3f}'.format(float(ap * 100))))
+
+ N_COLS = min(6, len(results_per_category) * 2)
+ results_flatten = list(itertools.chain(*results_per_category))
+ headers = ['category', 'AP'] * (N_COLS // 2)
+ results_2d = itertools.zip_longest(
+ *[results_flatten[i::N_COLS] for i in range(N_COLS)])
+ table_data = [headers]
+ table_data += [result for result in results_2d]
+ table = AsciiTable(table_data)
+ print(table.table)
+
+
+def fast_eval_recall(results,
+ coco,
+ max_dets,
+ iou_thrs=np.arange(0.5, 0.96, 0.05)):
+ if mmcv.is_str(results):
+ assert results.endswith('.pkl')
+ results = mmcv.load(results)
+ elif not isinstance(results, list):
+ raise TypeError(
+ 'results must be a list of numpy arrays or a filename, not {}'.
+ format(type(results)))
+
+ gt_bboxes = []
+ img_ids = coco.getImgIds()
+ for i in range(len(img_ids)):
+ ann_ids = coco.getAnnIds(imgIds=img_ids[i])
+ ann_info = coco.loadAnns(ann_ids)
+ if len(ann_info) == 0:
+ gt_bboxes.append(np.zeros((0, 4)))
+ continue
+ bboxes = []
+ for ann in ann_info:
+ if ann.get('ignore', False) or ann['iscrowd']:
+ continue
+ x1, y1, w, h = ann['bbox']
+ bboxes.append([x1, y1, x1 + w - 1, y1 + h - 1])
+ bboxes = np.array(bboxes, dtype=np.float32)
+ if bboxes.shape[0] == 0:
+ bboxes = np.zeros((0, 4))
+ gt_bboxes.append(bboxes)
+
+ recalls = eval_recalls(
+ gt_bboxes, results, max_dets, iou_thrs, print_summary=False)
+ ar = recalls.mean(axis=1)
+ return ar
+
+
+def xyxy2xywh(bbox):
+ _bbox = bbox.tolist()
+ return [
+ _bbox[0],
+ _bbox[1],
+ _bbox[2] - _bbox[0] + 1,
+ _bbox[3] - _bbox[1] + 1,
+ ]
+
+
+def proposal2json(dataset, results):
+ json_results = []
+ for idx in range(len(dataset)):
+ img_id = dataset.img_ids[idx]
+ bboxes = results[idx]
+ for i in range(bboxes.shape[0]):
+ data = dict()
+ data['image_id'] = img_id
+ data['bbox'] = xyxy2xywh(bboxes[i])
+ data['score'] = float(bboxes[i][4])
+ data['category_id'] = 1
+ json_results.append(data)
+ return json_results
+
+
+def det2json(dataset, results):
+ json_results = []
+ for idx in range(len(dataset)):
+ img_id = dataset.img_ids[idx]
+ result = results[idx]
+ for label in range(len(result)):
+ bboxes = result[label]
+ for i in range(bboxes.shape[0]):
+ data = dict()
+ data['image_id'] = img_id
+ data['bbox'] = xyxy2xywh(bboxes[i])
+ data['score'] = float(bboxes[i][4])
+ data['category_id'] = dataset.cat_ids[label]
+ json_results.append(data)
+ return json_results
+
+
+def segm2json(dataset, results):
+ bbox_json_results = []
+ segm_json_results = []
+ for idx in range(len(dataset)):
+ img_id = dataset.img_ids[idx]
+ det, seg = results[idx]
+ for label in range(len(det)):
+ # bbox results
+ bboxes = det[label]
+ for i in range(bboxes.shape[0]):
+ data = dict()
+ data['image_id'] = img_id
+ data['bbox'] = xyxy2xywh(bboxes[i])
+ data['score'] = float(bboxes[i][4])
+ data['category_id'] = dataset.cat_ids[label]
+ bbox_json_results.append(data)
+
+ # segm results
+ # some detectors use different score for det and segm
+ if isinstance(seg, tuple):
+ segms = seg[0][label]
+ mask_score = seg[1][label]
+ else:
+ segms = seg[label]
+ mask_score = [bbox[4] for bbox in bboxes]
+ for i in range(bboxes.shape[0]):
+ data = dict()
+ data['image_id'] = img_id
+ data['bbox'] = xyxy2xywh(bboxes[i])
+ data['score'] = float(mask_score[i])
+ data['category_id'] = dataset.cat_ids[label]
+ if isinstance(segms[i]['counts'], bytes):
+ segms[i]['counts'] = segms[i]['counts'].decode()
+ data['segmentation'] = segms[i]
+ segm_json_results.append(data)
+ return bbox_json_results, segm_json_results
+
+
+def segm2json_segm(dataset, results):
+ segm_json_results = []
+ for idx in range(len(dataset)):
+ img_id = dataset.img_ids[idx]
+ seg = results[idx]
+ for label in range(len(seg)):
+ masks = seg[label]
+ for i in range(len(masks)):
+ mask_score = masks[i][1]
+ segm = masks[i][0]
+ data = dict()
+ data['image_id'] = img_id
+ data['score'] = float(mask_score)
+ data['category_id'] = dataset.cat_ids[label]
+ segm['counts'] = segm['counts'].decode()
+ data['segmentation'] = segm
+ segm_json_results.append(data)
+ return segm_json_results
+
+
+def results2json(dataset, results, out_file):
+ result_files = dict()
+ if isinstance(results[0], list):
+ json_results = det2json(dataset, results)
+ result_files['bbox'] = '{}.{}.json'.format(out_file, 'bbox')
+ result_files['proposal'] = '{}.{}.json'.format(out_file, 'bbox')
+ mmcv.dump(json_results, result_files['bbox'])
+ elif isinstance(results[0], tuple):
+ json_results = segm2json(dataset, results)
+ result_files['bbox'] = '{}.{}.json'.format(out_file, 'bbox')
+ result_files['proposal'] = '{}.{}.json'.format(out_file, 'bbox')
+ result_files['segm'] = '{}.{}.json'.format(out_file, 'segm')
+ mmcv.dump(json_results[0], result_files['bbox'])
+ mmcv.dump(json_results[1], result_files['segm'])
+ elif isinstance(results[0], np.ndarray):
+ json_results = proposal2json(dataset, results)
+ result_files['proposal'] = '{}.{}.json'.format(out_file, 'proposal')
+ mmcv.dump(json_results, result_files['proposal'])
+ else:
+ raise TypeError('invalid type of results')
+ return result_files
+
+
+def results2json_segm(dataset, results, out_file):
+ result_files = dict()
+ json_results = segm2json_segm(dataset, results)
+ result_files['segm'] = '{}.{}.json'.format(out_file, 'segm')
+ mmcv.dump(json_results, result_files['segm'])
+
+ return result_files
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/eval_hooks.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/eval_hooks.py
new file mode 100644
index 000000000..1a074eec1
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/eval_hooks.py
@@ -0,0 +1,152 @@
+import os
+import os.path as osp
+
+import mmcv
+import numpy as np
+import torch
+import torch.distributed as dist
+from mmcv.parallel import collate, scatter
+from mmcv.runner import Hook
+from pycocotools.cocoeval import COCOeval
+from torch.utils.data import Dataset
+
+from mmdet import datasets
+from .coco_utils import fast_eval_recall, results2json
+from .mean_ap import eval_map
+
+
+class DistEvalHook(Hook):
+
+ def __init__(self, dataset, interval=1):
+ if isinstance(dataset, Dataset):
+ self.dataset = dataset
+ elif isinstance(dataset, dict):
+ self.dataset = datasets.build_dataset(dataset, {'test_mode': True})
+ else:
+ raise TypeError(
+ 'dataset must be a Dataset object or a dict, not {}'.format(
+ type(dataset)))
+ self.interval = interval
+
+ def after_train_epoch(self, runner):
+ if not self.every_n_epochs(runner, self.interval):
+ return
+ runner.model.eval()
+ results = [None for _ in range(len(self.dataset))]
+ if runner.rank == 0:
+ prog_bar = mmcv.ProgressBar(len(self.dataset))
+ for idx in range(runner.rank, len(self.dataset), runner.world_size):
+ data = self.dataset[idx]
+ data_gpu = scatter(
+ collate([data], samples_per_gpu=1),
+ [torch.cuda.current_device()])[0]
+
+ # compute output
+ with torch.no_grad():
+ result = runner.model(
+ return_loss=False, rescale=True, **data_gpu)
+ results[idx] = result
+
+ batch_size = runner.world_size
+ if runner.rank == 0:
+ for _ in range(batch_size):
+ prog_bar.update()
+
+ if runner.rank == 0:
+ print('\n')
+ dist.barrier()
+ for i in range(1, runner.world_size):
+ tmp_file = osp.join(runner.work_dir, 'temp_{}.pkl'.format(i))
+ tmp_results = mmcv.load(tmp_file)
+ for idx in range(i, len(results), runner.world_size):
+ results[idx] = tmp_results[idx]
+ os.remove(tmp_file)
+ self.evaluate(runner, results)
+ else:
+ tmp_file = osp.join(runner.work_dir,
+ 'temp_{}.pkl'.format(runner.rank))
+ mmcv.dump(results, tmp_file)
+ dist.barrier()
+ dist.barrier()
+
+ def evaluate(self):
+ raise NotImplementedError
+
+
+class DistEvalmAPHook(DistEvalHook):
+
+ def evaluate(self, runner, results):
+ annotations = [
+ self.dataset.get_ann_info(i) for i in range(len(self.dataset))
+ ]
+ # If the dataset is VOC2007, then use 11 points mAP evaluation.
+ if hasattr(self.dataset, 'year') and self.dataset.year == 2007:
+ ds_name = 'voc07'
+ else:
+ ds_name = self.dataset.CLASSES
+ mean_ap, eval_results = eval_map(
+ results,
+ annotations,
+ scale_ranges=None,
+ iou_thr=0.5,
+ dataset=ds_name,
+ logger=runner.logger)
+ runner.log_buffer.output['mAP'] = mean_ap
+ runner.log_buffer.ready = True
+
+
+class CocoDistEvalRecallHook(DistEvalHook):
+
+ def __init__(self,
+ dataset,
+ interval=1,
+ proposal_nums=(100, 300, 1000),
+ iou_thrs=np.arange(0.5, 0.96, 0.05)):
+ super(CocoDistEvalRecallHook, self).__init__(
+ dataset, interval=interval)
+ self.proposal_nums = np.array(proposal_nums, dtype=np.int32)
+ self.iou_thrs = np.array(iou_thrs, dtype=np.float32)
+
+ def evaluate(self, runner, results):
+ # the official coco evaluation is too slow, here we use our own
+ # implementation instead, which may get slightly different results
+ ar = fast_eval_recall(results, self.dataset.coco, self.proposal_nums,
+ self.iou_thrs)
+ for i, num in enumerate(self.proposal_nums):
+ runner.log_buffer.output['AR@{}'.format(num)] = ar[i]
+ runner.log_buffer.ready = True
+
+
+class CocoDistEvalmAPHook(DistEvalHook):
+
+ def evaluate(self, runner, results):
+ tmp_file = osp.join(runner.work_dir, 'temp_0')
+ result_files = results2json(self.dataset, results, tmp_file)
+
+ res_types = ['bbox', 'segm'
+ ] if runner.model.module.with_mask else ['bbox']
+ cocoGt = self.dataset.coco
+ imgIds = cocoGt.getImgIds()
+ for res_type in res_types:
+ try:
+ cocoDt = cocoGt.loadRes(result_files[res_type])
+ except IndexError:
+ print('No prediction found.')
+ break
+ iou_type = res_type
+ cocoEval = COCOeval(cocoGt, cocoDt, iou_type)
+ cocoEval.params.imgIds = imgIds
+ cocoEval.evaluate()
+ cocoEval.accumulate()
+ cocoEval.summarize()
+ metrics = ['mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l']
+ for i in range(len(metrics)):
+ key = '{}_{}'.format(res_type, metrics[i])
+ val = float('{:.3f}'.format(cocoEval.stats[i]))
+ runner.log_buffer.output[key] = val
+ runner.log_buffer.output['{}_mAP_copypaste'.format(res_type)] = (
+ '{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} {ap[3]:.3f} '
+ '{ap[4]:.3f} {ap[5]:.3f}').format(ap=cocoEval.stats[:6])
+ runner.log_buffer.ready = True
+ for res_type in res_types:
+ os.remove(result_files[res_type])
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/mean_ap.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/mean_ap.py
new file mode 100644
index 000000000..4e3cd5d07
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/mean_ap.py
@@ -0,0 +1,455 @@
+from multiprocessing import Pool
+
+import mmcv
+import numpy as np
+from terminaltables import AsciiTable
+
+from mmdet.utils import print_log
+from .bbox_overlaps import bbox_overlaps
+from .class_names import get_classes
+
+
+def average_precision(recalls, precisions, mode='area'):
+ """Calculate average precision (for single or multiple scales).
+
+ Args:
+ recalls (ndarray): shape (num_scales, num_dets) or (num_dets, )
+ precisions (ndarray): shape (num_scales, num_dets) or (num_dets, )
+ mode (str): 'area' or '11points', 'area' means calculating the area
+ under precision-recall curve, '11points' means calculating
+ the average precision of recalls at [0, 0.1, ..., 1]
+
+ Returns:
+ float or ndarray: calculated average precision
+ """
+ no_scale = False
+ if recalls.ndim == 1:
+ no_scale = True
+ recalls = recalls[np.newaxis, :]
+ precisions = precisions[np.newaxis, :]
+ assert recalls.shape == precisions.shape and recalls.ndim == 2
+ num_scales = recalls.shape[0]
+ ap = np.zeros(num_scales, dtype=np.float32)
+ if mode == 'area':
+ zeros = np.zeros((num_scales, 1), dtype=recalls.dtype)
+ ones = np.ones((num_scales, 1), dtype=recalls.dtype)
+ mrec = np.hstack((zeros, recalls, ones))
+ mpre = np.hstack((zeros, precisions, zeros))
+ for i in range(mpre.shape[1] - 1, 0, -1):
+ mpre[:, i - 1] = np.maximum(mpre[:, i - 1], mpre[:, i])
+ for i in range(num_scales):
+ ind = np.where(mrec[i, 1:] != mrec[i, :-1])[0]
+ ap[i] = np.sum(
+ (mrec[i, ind + 1] - mrec[i, ind]) * mpre[i, ind + 1])
+ elif mode == '11points':
+ for i in range(num_scales):
+ for thr in np.arange(0, 1 + 1e-3, 0.1):
+ precs = precisions[i, recalls[i, :] >= thr]
+ prec = precs.max() if precs.size > 0 else 0
+ ap[i] += prec
+ ap /= 11
+ else:
+ raise ValueError(
+ 'Unrecognized mode, only "area" and "11points" are supported')
+ if no_scale:
+ ap = ap[0]
+ return ap
+
+
+def tpfp_imagenet(det_bboxes,
+ gt_bboxes,
+ gt_bboxes_ignore=None,
+ default_iou_thr=0.5,
+ area_ranges=None):
+ """Check if detected bboxes are true positive or false positive.
+
+ Args:
+ det_bbox (ndarray): Detected bboxes of this image, of shape (m, 5).
+ gt_bboxes (ndarray): GT bboxes of this image, of shape (n, 4).
+ gt_bboxes_ignore (ndarray): Ignored gt bboxes of this image,
+ of shape (k, 4). Default: None
+ default_iou_thr (float): IoU threshold to be considered as matched for
+ medium and large bboxes (small ones have special rules).
+ Default: 0.5.
+ area_ranges (list[tuple] | None): Range of bbox areas to be evaluated,
+ in the format [(min1, max1), (min2, max2), ...]. Default: None.
+
+ Returns:
+ tuple[np.ndarray]: (tp, fp) whose elements are 0 and 1. The shape of
+ each array is (num_scales, m).
+ """
+ # an indicator of ignored gts
+ gt_ignore_inds = np.concatenate(
+ (np.zeros(gt_bboxes.shape[0], dtype=np.bool),
+ np.ones(gt_bboxes_ignore.shape[0], dtype=np.bool)))
+ # stack gt_bboxes and gt_bboxes_ignore for convenience
+ gt_bboxes = np.vstack((gt_bboxes, gt_bboxes_ignore))
+
+ num_dets = det_bboxes.shape[0]
+ num_gts = gt_bboxes.shape[0]
+ if area_ranges is None:
+ area_ranges = [(None, None)]
+ num_scales = len(area_ranges)
+ # tp and fp are of shape (num_scales, num_gts), each row is tp or fp
+ # of a certain scale.
+ tp = np.zeros((num_scales, num_dets), dtype=np.float32)
+ fp = np.zeros((num_scales, num_dets), dtype=np.float32)
+ if gt_bboxes.shape[0] == 0:
+ if area_ranges == [(None, None)]:
+ fp[...] = 1
+ else:
+ det_areas = (det_bboxes[:, 2] - det_bboxes[:, 0] + 1) * (
+ det_bboxes[:, 3] - det_bboxes[:, 1] + 1)
+ for i, (min_area, max_area) in enumerate(area_ranges):
+ fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1
+ return tp, fp
+ ious = bbox_overlaps(det_bboxes, gt_bboxes - 1)
+ gt_w = gt_bboxes[:, 2] - gt_bboxes[:, 0] + 1
+ gt_h = gt_bboxes[:, 3] - gt_bboxes[:, 1] + 1
+ iou_thrs = np.minimum((gt_w * gt_h) / ((gt_w + 10.0) * (gt_h + 10.0)),
+ default_iou_thr)
+ # sort all detections by scores in descending order
+ sort_inds = np.argsort(-det_bboxes[:, -1])
+ for k, (min_area, max_area) in enumerate(area_ranges):
+ gt_covered = np.zeros(num_gts, dtype=bool)
+ # if no area range is specified, gt_area_ignore is all False
+ if min_area is None:
+ gt_area_ignore = np.zeros_like(gt_ignore_inds, dtype=bool)
+ else:
+ gt_areas = gt_w * gt_h
+ gt_area_ignore = (gt_areas < min_area) | (gt_areas >= max_area)
+ for i in sort_inds:
+ max_iou = -1
+ matched_gt = -1
+ # find best overlapped available gt
+ for j in range(num_gts):
+ # different from PASCAL VOC: allow finding other gts if the
+ # best overlaped ones are already matched by other det bboxes
+ if gt_covered[j]:
+ continue
+ elif ious[i, j] >= iou_thrs[j] and ious[i, j] > max_iou:
+ max_iou = ious[i, j]
+ matched_gt = j
+ # there are 4 cases for a det bbox:
+ # 1. it matches a gt, tp = 1, fp = 0
+ # 2. it matches an ignored gt, tp = 0, fp = 0
+ # 3. it matches no gt and within area range, tp = 0, fp = 1
+ # 4. it matches no gt but is beyond area range, tp = 0, fp = 0
+ if matched_gt >= 0:
+ gt_covered[matched_gt] = 1
+ if not (gt_ignore_inds[matched_gt]
+ or gt_area_ignore[matched_gt]):
+ tp[k, i] = 1
+ elif min_area is None:
+ fp[k, i] = 1
+ else:
+ bbox = det_bboxes[i, :4]
+ area = (bbox[2] - bbox[0] + 1) * (bbox[3] - bbox[1] + 1)
+ if area >= min_area and area < max_area:
+ fp[k, i] = 1
+ return tp, fp
+
+
+def tpfp_default(det_bboxes,
+ gt_bboxes,
+ gt_bboxes_ignore=None,
+ iou_thr=0.5,
+ area_ranges=None):
+ """Check if detected bboxes are true positive or false positive.
+
+ Args:
+ det_bbox (ndarray): Detected bboxes of this image, of shape (m, 5).
+ gt_bboxes (ndarray): GT bboxes of this image, of shape (n, 4).
+ gt_bboxes_ignore (ndarray): Ignored gt bboxes of this image,
+ of shape (k, 4). Default: None
+ iou_thr (float): IoU threshold to be considered as matched.
+ Default: 0.5.
+ area_ranges (list[tuple] | None): Range of bbox areas to be evaluated,
+ in the format [(min1, max1), (min2, max2), ...]. Default: None.
+
+ Returns:
+ tuple[np.ndarray]: (tp, fp) whose elements are 0 and 1. The shape of
+ each array is (num_scales, m).
+ """
+ # an indicator of ignored gts
+ gt_ignore_inds = np.concatenate(
+ (np.zeros(gt_bboxes.shape[0], dtype=np.bool),
+ np.ones(gt_bboxes_ignore.shape[0], dtype=np.bool)))
+ # stack gt_bboxes and gt_bboxes_ignore for convenience
+ gt_bboxes = np.vstack((gt_bboxes, gt_bboxes_ignore))
+
+ num_dets = det_bboxes.shape[0]
+ num_gts = gt_bboxes.shape[0]
+ if area_ranges is None:
+ area_ranges = [(None, None)]
+ num_scales = len(area_ranges)
+ # tp and fp are of shape (num_scales, num_gts), each row is tp or fp of
+ # a certain scale
+ tp = np.zeros((num_scales, num_dets), dtype=np.float32)
+ fp = np.zeros((num_scales, num_dets), dtype=np.float32)
+
+ # if there is no gt bboxes in this image, then all det bboxes
+ # within area range are false positives
+ if gt_bboxes.shape[0] == 0:
+ if area_ranges == [(None, None)]:
+ fp[...] = 1
+ else:
+ det_areas = (det_bboxes[:, 2] - det_bboxes[:, 0] + 1) * (
+ det_bboxes[:, 3] - det_bboxes[:, 1] + 1)
+ for i, (min_area, max_area) in enumerate(area_ranges):
+ fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1
+ return tp, fp
+
+ ious = bbox_overlaps(det_bboxes, gt_bboxes)
+ # for each det, the max iou with all gts
+ ious_max = ious.max(axis=1)
+ # for each det, which gt overlaps most with it
+ ious_argmax = ious.argmax(axis=1)
+ # sort all dets in descending order by scores
+ sort_inds = np.argsort(-det_bboxes[:, -1])
+ for k, (min_area, max_area) in enumerate(area_ranges):
+ gt_covered = np.zeros(num_gts, dtype=bool)
+ # if no area range is specified, gt_area_ignore is all False
+ if min_area is None:
+ gt_area_ignore = np.zeros_like(gt_ignore_inds, dtype=bool)
+ else:
+ gt_areas = (gt_bboxes[:, 2] - gt_bboxes[:, 0] + 1) * (
+ gt_bboxes[:, 3] - gt_bboxes[:, 1] + 1)
+ gt_area_ignore = (gt_areas < min_area) | (gt_areas >= max_area)
+ for i in sort_inds:
+ if ious_max[i] >= iou_thr:
+ matched_gt = ious_argmax[i]
+ if not (gt_ignore_inds[matched_gt]
+ or gt_area_ignore[matched_gt]):
+ if not gt_covered[matched_gt]:
+ gt_covered[matched_gt] = True
+ tp[k, i] = 1
+ else:
+ fp[k, i] = 1
+ # otherwise ignore this detected bbox, tp = 0, fp = 0
+ elif min_area is None:
+ fp[k, i] = 1
+ else:
+ bbox = det_bboxes[i, :4]
+ area = (bbox[2] - bbox[0] + 1) * (bbox[3] - bbox[1] + 1)
+ if area >= min_area and area < max_area:
+ fp[k, i] = 1
+ return tp, fp
+
+
+def get_cls_results(det_results, annotations, class_id):
+ """Get det results and gt information of a certain class.
+
+ Args:
+ det_results (list[list]): Same as `eval_map()`.
+ annotations (list[dict]): Same as `eval_map()`.
+
+ Returns:
+ tuple[list[np.ndarray]]: detected bboxes, gt bboxes, ignored gt bboxes
+ """
+ cls_dets = [img_res[class_id] for img_res in det_results]
+ cls_gts = []
+ cls_gts_ignore = []
+ for ann in annotations:
+ gt_inds = ann['labels'] == (class_id + 1)
+ cls_gts.append(ann['bboxes'][gt_inds, :])
+
+ if ann.get('labels_ignore', None) is not None:
+ ignore_inds = ann['labels_ignore'] == (class_id + 1)
+ cls_gts_ignore.append(ann['bboxes_ignore'][ignore_inds, :])
+ else:
+ cls_gts_ignore.append(np.array((0, 4), dtype=np.float32))
+
+ return cls_dets, cls_gts, cls_gts_ignore
+
+
+def eval_map(det_results,
+ annotations,
+ scale_ranges=None,
+ iou_thr=0.5,
+ dataset=None,
+ logger=None,
+ nproc=4):
+ """Evaluate mAP of a dataset.
+
+ Args:
+ det_results (list[list]): [[cls1_det, cls2_det, ...], ...].
+ The outer list indicates images, and the inner list indicates
+ per-class detected bboxes.
+ annotations (list[dict]): Ground truth annotations where each item of
+ the list indicates an image. Keys of annotations are:
+ - "bboxes": numpy array of shape (n, 4)
+ - "labels": numpy array of shape (n, )
+ - "bboxes_ignore" (optional): numpy array of shape (k, 4)
+ - "labels_ignore" (optional): numpy array of shape (k, )
+ scale_ranges (list[tuple] | None): Range of scales to be evaluated,
+ in the format [(min1, max1), (min2, max2), ...]. A range of
+ (32, 64) means the area range between (32**2, 64**2).
+ Default: None.
+ iou_thr (float): IoU threshold to be considered as matched.
+ Default: 0.5.
+ dataset (list[str] | str | None): Dataset name or dataset classes,
+ there are minor differences in metrics for different datsets, e.g.
+ "voc07", "imagenet_det", etc. Default: None.
+ logger (logging.Logger | str | None): The way to print the mAP
+ summary. See `mmdet.utils.print_log()` for details. Default: None.
+ nproc (int): Processes used for computing TP and FP.
+ Default: 4.
+
+ Returns:
+ tuple: (mAP, [dict, dict, ...])
+ """
+ assert len(det_results) == len(annotations)
+
+ num_imgs = len(det_results)
+ num_scales = len(scale_ranges) if scale_ranges is not None else 1
+ num_classes = len(det_results[0]) # positive class num
+ area_ranges = ([(rg[0]**2, rg[1]**2) for rg in scale_ranges]
+ if scale_ranges is not None else None)
+
+ pool = Pool(nproc)
+ eval_results = []
+ for i in range(num_classes):
+ # get gt and det bboxes of this class
+ cls_dets, cls_gts, cls_gts_ignore = get_cls_results(
+ det_results, annotations, i)
+ # choose proper function according to datasets to compute tp and fp
+ if dataset in ['det', 'vid']:
+ tpfp_func = tpfp_imagenet
+ else:
+ tpfp_func = tpfp_default
+ # compute tp and fp for each image with multiple processes
+ tpfp = pool.starmap(
+ tpfp_func,
+ zip(cls_dets, cls_gts, cls_gts_ignore,
+ [iou_thr for _ in range(num_imgs)],
+ [area_ranges for _ in range(num_imgs)]))
+ tp, fp = tuple(zip(*tpfp))
+ # calculate gt number of each scale
+ # ignored gts or gts beyond the specific scale are not counted
+ num_gts = np.zeros(num_scales, dtype=int)
+ for j, bbox in enumerate(cls_gts):
+ if area_ranges is None:
+ num_gts[0] += bbox.shape[0]
+ else:
+ gt_areas = (bbox[:, 2] - bbox[:, 0] + 1) * (
+ bbox[:, 3] - bbox[:, 1] + 1)
+ for k, (min_area, max_area) in enumerate(area_ranges):
+ num_gts[k] += np.sum((gt_areas >= min_area)
+ & (gt_areas < max_area))
+ # sort all det bboxes by score, also sort tp and fp
+ cls_dets = np.vstack(cls_dets)
+ num_dets = cls_dets.shape[0]
+ sort_inds = np.argsort(-cls_dets[:, -1])
+ tp = np.hstack(tp)[:, sort_inds]
+ fp = np.hstack(fp)[:, sort_inds]
+ # calculate recall and precision with tp and fp
+ tp = np.cumsum(tp, axis=1)
+ fp = np.cumsum(fp, axis=1)
+ eps = np.finfo(np.float32).eps
+ recalls = tp / np.maximum(num_gts[:, np.newaxis], eps)
+ precisions = tp / np.maximum((tp + fp), eps)
+ # calculate AP
+ if scale_ranges is None:
+ recalls = recalls[0, :]
+ precisions = precisions[0, :]
+ num_gts = num_gts.item()
+ mode = 'area' if dataset != 'voc07' else '11points'
+ ap = average_precision(recalls, precisions, mode)
+ eval_results.append({
+ 'num_gts': num_gts,
+ 'num_dets': num_dets,
+ 'recall': recalls,
+ 'precision': precisions,
+ 'ap': ap
+ })
+ if scale_ranges is not None:
+ # shape (num_classes, num_scales)
+ all_ap = np.vstack([cls_result['ap'] for cls_result in eval_results])
+ all_num_gts = np.vstack(
+ [cls_result['num_gts'] for cls_result in eval_results])
+ mean_ap = []
+ for i in range(num_scales):
+ if np.any(all_num_gts[:, i] > 0):
+ mean_ap.append(all_ap[all_num_gts[:, i] > 0, i].mean())
+ else:
+ mean_ap.append(0.0)
+ else:
+ aps = []
+ for cls_result in eval_results:
+ if cls_result['num_gts'] > 0:
+ aps.append(cls_result['ap'])
+ mean_ap = np.array(aps).mean().item() if aps else 0.0
+
+ print_map_summary(
+ mean_ap, eval_results, dataset, area_ranges, logger=logger)
+
+ return mean_ap, eval_results
+
+
+def print_map_summary(mean_ap,
+ results,
+ dataset=None,
+ scale_ranges=None,
+ logger=None):
+ """Print mAP and results of each class.
+
+ A table will be printed to show the gts/dets/recall/AP of each class and
+ the mAP.
+
+ Args:
+ mean_ap (float): Calculated from `eval_map()`.
+ results (list[dict]): Calculated from `eval_map()`.
+ dataset (list[str] | str | None): Dataset name or dataset classes.
+ scale_ranges (list[tuple] | None): Range of scales to be evaluated.
+ logger (logging.Logger | str | None): The way to print the mAP
+ summary. See `mmdet.utils.print_log()` for details. Default: None.
+ """
+
+ if logger == 'silent':
+ return
+
+ if isinstance(results[0]['ap'], np.ndarray):
+ num_scales = len(results[0]['ap'])
+ else:
+ num_scales = 1
+
+ if scale_ranges is not None:
+ assert len(scale_ranges) == num_scales
+
+ num_classes = len(results)
+
+ recalls = np.zeros((num_scales, num_classes), dtype=np.float32)
+ aps = np.zeros((num_scales, num_classes), dtype=np.float32)
+ num_gts = np.zeros((num_scales, num_classes), dtype=int)
+ for i, cls_result in enumerate(results):
+ if cls_result['recall'].size > 0:
+ recalls[:, i] = np.array(cls_result['recall'], ndmin=2)[:, -1]
+ aps[:, i] = cls_result['ap']
+ num_gts[:, i] = cls_result['num_gts']
+
+ if dataset is None:
+ label_names = [str(i) for i in range(1, num_classes + 1)]
+ elif mmcv.is_str(dataset):
+ label_names = get_classes(dataset)
+ else:
+ label_names = dataset
+
+ if not isinstance(mean_ap, list):
+ mean_ap = [mean_ap]
+
+ header = ['class', 'gts', 'dets', 'recall', 'ap']
+ for i in range(num_scales):
+ if scale_ranges is not None:
+ print_log('Scale range {}'.format(scale_ranges[i]), logger=logger)
+ table_data = [header]
+ for j in range(num_classes):
+ row_data = [
+ label_names[j], num_gts[i, j], results[j]['num_dets'],
+ '{:.3f}'.format(recalls[i, j]), '{:.3f}'.format(aps[i, j])
+ ]
+ table_data.append(row_data)
+ table_data.append(['mAP', '', '', '', '{:.3f}'.format(mean_ap[i])])
+ table = AsciiTable(table_data)
+ table.inner_footing_row_border = True
+ print_log('\n' + table.table, logger=logger)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/recall.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/recall.py
new file mode 100644
index 000000000..2a56f42fd
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/recall.py
@@ -0,0 +1,185 @@
+import numpy as np
+from terminaltables import AsciiTable
+
+from .bbox_overlaps import bbox_overlaps
+
+
+def _recalls(all_ious, proposal_nums, thrs):
+
+ img_num = all_ious.shape[0]
+ total_gt_num = sum([ious.shape[0] for ious in all_ious])
+
+ _ious = np.zeros((proposal_nums.size, total_gt_num), dtype=np.float32)
+ for k, proposal_num in enumerate(proposal_nums):
+ tmp_ious = np.zeros(0)
+ for i in range(img_num):
+ ious = all_ious[i][:, :proposal_num].copy()
+ gt_ious = np.zeros((ious.shape[0]))
+ if ious.size == 0:
+ tmp_ious = np.hstack((tmp_ious, gt_ious))
+ continue
+ for j in range(ious.shape[0]):
+ gt_max_overlaps = ious.argmax(axis=1)
+ max_ious = ious[np.arange(0, ious.shape[0]), gt_max_overlaps]
+ gt_idx = max_ious.argmax()
+ gt_ious[j] = max_ious[gt_idx]
+ box_idx = gt_max_overlaps[gt_idx]
+ ious[gt_idx, :] = -1
+ ious[:, box_idx] = -1
+ tmp_ious = np.hstack((tmp_ious, gt_ious))
+ _ious[k, :] = tmp_ious
+
+ _ious = np.fliplr(np.sort(_ious, axis=1))
+ recalls = np.zeros((proposal_nums.size, thrs.size))
+ for i, thr in enumerate(thrs):
+ recalls[:, i] = (_ious >= thr).sum(axis=1) / float(total_gt_num)
+
+ return recalls
+
+
+def set_recall_param(proposal_nums, iou_thrs):
+ """Check proposal_nums and iou_thrs and set correct format.
+ """
+ if isinstance(proposal_nums, list):
+ _proposal_nums = np.array(proposal_nums)
+ elif isinstance(proposal_nums, int):
+ _proposal_nums = np.array([proposal_nums])
+ else:
+ _proposal_nums = proposal_nums
+
+ if iou_thrs is None:
+ _iou_thrs = np.array([0.5])
+ elif isinstance(iou_thrs, list):
+ _iou_thrs = np.array(iou_thrs)
+ elif isinstance(iou_thrs, float):
+ _iou_thrs = np.array([iou_thrs])
+ else:
+ _iou_thrs = iou_thrs
+
+ return _proposal_nums, _iou_thrs
+
+
+def eval_recalls(gts,
+ proposals,
+ proposal_nums=None,
+ iou_thrs=None,
+ print_summary=True):
+ """Calculate recalls.
+
+ Args:
+ gts(list or ndarray): a list of arrays of shape (n, 4)
+ proposals(list or ndarray): a list of arrays of shape (k, 4) or (k, 5)
+ proposal_nums(int or list of int or ndarray): top N proposals
+ thrs(float or list or ndarray): iou thresholds
+
+ Returns:
+ ndarray: recalls of different ious and proposal nums
+ """
+
+ img_num = len(gts)
+ assert img_num == len(proposals)
+
+ proposal_nums, iou_thrs = set_recall_param(proposal_nums, iou_thrs)
+
+ all_ious = []
+ for i in range(img_num):
+ if proposals[i].ndim == 2 and proposals[i].shape[1] == 5:
+ scores = proposals[i][:, 4]
+ sort_idx = np.argsort(scores)[::-1]
+ img_proposal = proposals[i][sort_idx, :]
+ else:
+ img_proposal = proposals[i]
+ prop_num = min(img_proposal.shape[0], proposal_nums[-1])
+ if gts[i] is None or gts[i].shape[0] == 0:
+ ious = np.zeros((0, img_proposal.shape[0]), dtype=np.float32)
+ else:
+ ious = bbox_overlaps(gts[i], img_proposal[:prop_num, :4])
+ all_ious.append(ious)
+ all_ious = np.array(all_ious)
+ recalls = _recalls(all_ious, proposal_nums, iou_thrs)
+ if print_summary:
+ print_recall_summary(recalls, proposal_nums, iou_thrs)
+ return recalls
+
+
+def print_recall_summary(recalls,
+ proposal_nums,
+ iou_thrs,
+ row_idxs=None,
+ col_idxs=None):
+ """Print recalls in a table.
+
+ Args:
+ recalls(ndarray): calculated from `bbox_recalls`
+ proposal_nums(ndarray or list): top N proposals
+ iou_thrs(ndarray or list): iou thresholds
+ row_idxs(ndarray): which rows(proposal nums) to print
+ col_idxs(ndarray): which cols(iou thresholds) to print
+ """
+ proposal_nums = np.array(proposal_nums, dtype=np.int32)
+ iou_thrs = np.array(iou_thrs)
+ if row_idxs is None:
+ row_idxs = np.arange(proposal_nums.size)
+ if col_idxs is None:
+ col_idxs = np.arange(iou_thrs.size)
+ row_header = [''] + iou_thrs[col_idxs].tolist()
+ table_data = [row_header]
+ for i, num in enumerate(proposal_nums[row_idxs]):
+ row = [
+ '{:.3f}'.format(val)
+ for val in recalls[row_idxs[i], col_idxs].tolist()
+ ]
+ row.insert(0, num)
+ table_data.append(row)
+ table = AsciiTable(table_data)
+ print(table.table)
+
+
+def plot_num_recall(recalls, proposal_nums):
+ """Plot Proposal_num-Recalls curve.
+
+ Args:
+ recalls(ndarray or list): shape (k,)
+ proposal_nums(ndarray or list): same shape as `recalls`
+ """
+ if isinstance(proposal_nums, np.ndarray):
+ _proposal_nums = proposal_nums.tolist()
+ else:
+ _proposal_nums = proposal_nums
+ if isinstance(recalls, np.ndarray):
+ _recalls = recalls.tolist()
+ else:
+ _recalls = recalls
+
+ import matplotlib.pyplot as plt
+ f = plt.figure()
+ plt.plot([0] + _proposal_nums, [0] + _recalls)
+ plt.xlabel('Proposal num')
+ plt.ylabel('Recall')
+ plt.axis([0, proposal_nums.max(), 0, 1])
+ f.show()
+
+
+def plot_iou_recall(recalls, iou_thrs):
+ """Plot IoU-Recalls curve.
+
+ Args:
+ recalls(ndarray or list): shape (k,)
+ iou_thrs(ndarray or list): same shape as `recalls`
+ """
+ if isinstance(iou_thrs, np.ndarray):
+ _iou_thrs = iou_thrs.tolist()
+ else:
+ _iou_thrs = iou_thrs
+ if isinstance(recalls, np.ndarray):
+ _recalls = recalls.tolist()
+ else:
+ _recalls = recalls
+
+ import matplotlib.pyplot as plt
+ f = plt.figure()
+ plt.plot(_iou_thrs + [1.0], _recalls + [0.])
+ plt.xlabel('IoU')
+ plt.ylabel('Recall')
+ plt.axis([iou_thrs.min(), 1, 0, 1])
+ f.show()
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/fp16/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/fp16/__init__.py
new file mode 100644
index 000000000..cc655b7c3
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/fp16/__init__.py
@@ -0,0 +1,4 @@
+from .decorators import auto_fp16, force_fp32
+from .hooks import Fp16OptimizerHook, wrap_fp16_model
+
+__all__ = ['auto_fp16', 'force_fp32', 'Fp16OptimizerHook', 'wrap_fp16_model']
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/fp16/decorators.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/fp16/decorators.py
new file mode 100644
index 000000000..10ffbf898
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/fp16/decorators.py
@@ -0,0 +1,160 @@
+import functools
+from inspect import getfullargspec
+
+import torch
+
+from .utils import cast_tensor_type
+
+
+def auto_fp16(apply_to=None, out_fp32=False):
+ """Decorator to enable fp16 training automatically.
+
+ This decorator is useful when you write custom modules and want to support
+ mixed precision training. If inputs arguments are fp32 tensors, they will
+ be converted to fp16 automatically. Arguments other than fp32 tensors are
+ ignored.
+
+ Args:
+ apply_to (Iterable, optional): The argument names to be converted.
+ `None` indicates all arguments.
+ out_fp32 (bool): Whether to convert the output back to fp32.
+
+ :Example:
+
+ class MyModule1(nn.Module)
+
+ # Convert x and y to fp16
+ @auto_fp16()
+ def forward(self, x, y):
+ pass
+
+ class MyModule2(nn.Module):
+
+ # convert pred to fp16
+ @auto_fp16(apply_to=('pred', ))
+ def do_something(self, pred, others):
+ pass
+ """
+
+ def auto_fp16_wrapper(old_func):
+
+ @functools.wraps(old_func)
+ def new_func(*args, **kwargs):
+ # check if the module has set the attribute `fp16_enabled`, if not,
+ # just fallback to the original method.
+ if not isinstance(args[0], torch.nn.Module):
+ raise TypeError('@auto_fp16 can only be used to decorate the '
+ 'method of nn.Module')
+ if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled):
+ return old_func(*args, **kwargs)
+ # get the arg spec of the decorated method
+ args_info = getfullargspec(old_func)
+ # get the argument names to be casted
+ args_to_cast = args_info.args if apply_to is None else apply_to
+ # convert the args that need to be processed
+ new_args = []
+ # NOTE: default args are not taken into consideration
+ if args:
+ arg_names = args_info.args[:len(args)]
+ for i, arg_name in enumerate(arg_names):
+ if arg_name in args_to_cast:
+ new_args.append(
+ cast_tensor_type(args[i], torch.float, torch.half))
+ else:
+ new_args.append(args[i])
+ # convert the kwargs that need to be processed
+ new_kwargs = {}
+ if kwargs:
+ for arg_name, arg_value in kwargs.items():
+ if arg_name in args_to_cast:
+ new_kwargs[arg_name] = cast_tensor_type(
+ arg_value, torch.float, torch.half)
+ else:
+ new_kwargs[arg_name] = arg_value
+ # apply converted arguments to the decorated method
+ output = old_func(*new_args, **new_kwargs)
+ # cast the results back to fp32 if necessary
+ if out_fp32:
+ output = cast_tensor_type(output, torch.half, torch.float)
+ return output
+
+ return new_func
+
+ return auto_fp16_wrapper
+
+
+def force_fp32(apply_to=None, out_fp16=False):
+ """Decorator to convert input arguments to fp32 in force.
+
+ This decorator is useful when you write custom modules and want to support
+ mixed precision training. If there are some inputs that must be processed
+ in fp32 mode, then this decorator can handle it. If inputs arguments are
+ fp16 tensors, they will be converted to fp32 automatically. Arguments other
+ than fp16 tensors are ignored.
+
+ Args:
+ apply_to (Iterable, optional): The argument names to be converted.
+ `None` indicates all arguments.
+ out_fp16 (bool): Whether to convert the output back to fp16.
+
+ :Example:
+
+ class MyModule1(nn.Module)
+
+ # Convert x and y to fp32
+ @force_fp32()
+ def loss(self, x, y):
+ pass
+
+ class MyModule2(nn.Module):
+
+ # convert pred to fp32
+ @force_fp32(apply_to=('pred', ))
+ def post_process(self, pred, others):
+ pass
+ """
+
+ def force_fp32_wrapper(old_func):
+
+ @functools.wraps(old_func)
+ def new_func(*args, **kwargs):
+ # check if the module has set the attribute `fp16_enabled`, if not,
+ # just fallback to the original method.
+ if not isinstance(args[0], torch.nn.Module):
+ raise TypeError('@force_fp32 can only be used to decorate the '
+ 'method of nn.Module')
+ if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled):
+ return old_func(*args, **kwargs)
+ # get the arg spec of the decorated method
+ args_info = getfullargspec(old_func)
+ # get the argument names to be casted
+ args_to_cast = args_info.args if apply_to is None else apply_to
+ # convert the args that need to be processed
+ new_args = []
+ if args:
+ arg_names = args_info.args[:len(args)]
+ for i, arg_name in enumerate(arg_names):
+ if arg_name in args_to_cast:
+ new_args.append(
+ cast_tensor_type(args[i], torch.half, torch.float))
+ else:
+ new_args.append(args[i])
+ # convert the kwargs that need to be processed
+ new_kwargs = dict()
+ if kwargs:
+ for arg_name, arg_value in kwargs.items():
+ if arg_name in args_to_cast:
+ new_kwargs[arg_name] = cast_tensor_type(
+ arg_value, torch.half, torch.float)
+ else:
+ new_kwargs[arg_name] = arg_value
+ # apply converted arguments to the decorated method
+ output = old_func(*new_args, **new_kwargs)
+ # cast the results back to fp32 if necessary
+ if out_fp16:
+ output = cast_tensor_type(output, torch.float, torch.half)
+ return output
+
+ return new_func
+
+ return force_fp32_wrapper
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/fp16/hooks.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/fp16/hooks.py
new file mode 100644
index 000000000..6b4dacb1c
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/fp16/hooks.py
@@ -0,0 +1,127 @@
+import copy
+
+import torch
+import torch.nn as nn
+from mmcv.runner import OptimizerHook
+
+from ..utils.dist_utils import allreduce_grads
+from .utils import cast_tensor_type
+
+
+class Fp16OptimizerHook(OptimizerHook):
+ """FP16 optimizer hook.
+
+ The steps of fp16 optimizer is as follows.
+ 1. Scale the loss value.
+ 2. BP in the fp16 model.
+ 2. Copy gradients from fp16 model to fp32 weights.
+ 3. Update fp32 weights.
+ 4. Copy updated parameters from fp32 weights to fp16 model.
+
+ Refer to https://arxiv.org/abs/1710.03740 for more details.
+
+ Args:
+ loss_scale (float): Scale factor multiplied with loss.
+ """
+
+ def __init__(self,
+ grad_clip=None,
+ coalesce=True,
+ bucket_size_mb=-1,
+ loss_scale=512.,
+ distributed=True):
+ self.grad_clip = grad_clip
+ self.coalesce = coalesce
+ self.bucket_size_mb = bucket_size_mb
+ self.loss_scale = loss_scale
+ self.distributed = distributed
+
+ def before_run(self, runner):
+ # keep a copy of fp32 weights
+ runner.optimizer.param_groups = copy.deepcopy(
+ runner.optimizer.param_groups)
+ # convert model to fp16
+ wrap_fp16_model(runner.model)
+
+ def copy_grads_to_fp32(self, fp16_net, fp32_weights):
+ """Copy gradients from fp16 model to fp32 weight copy."""
+ for fp32_param, fp16_param in zip(fp32_weights, fp16_net.parameters()):
+ if fp16_param.grad is not None:
+ if fp32_param.grad is None:
+ fp32_param.grad = fp32_param.data.new(fp32_param.size())
+ fp32_param.grad.copy_(fp16_param.grad)
+
+ def copy_params_to_fp16(self, fp16_net, fp32_weights):
+ """Copy updated params from fp32 weight copy to fp16 model."""
+ for fp16_param, fp32_param in zip(fp16_net.parameters(), fp32_weights):
+ fp16_param.data.copy_(fp32_param.data)
+
+ def after_train_iter(self, runner):
+ # clear grads of last iteration
+ runner.model.zero_grad()
+ runner.optimizer.zero_grad()
+ # scale the loss value
+ scaled_loss = runner.outputs['loss'] * self.loss_scale
+ scaled_loss.backward()
+ # copy fp16 grads in the model to fp32 params in the optimizer
+ fp32_weights = []
+ for param_group in runner.optimizer.param_groups:
+ fp32_weights += param_group['params']
+ self.copy_grads_to_fp32(runner.model, fp32_weights)
+ # allreduce grads
+ if self.distributed:
+ allreduce_grads(fp32_weights, self.coalesce, self.bucket_size_mb)
+ # scale the gradients back
+ for param in fp32_weights:
+ if param.grad is not None:
+ param.grad.div_(self.loss_scale)
+ if self.grad_clip is not None:
+ self.clip_grads(fp32_weights)
+ # update fp32 params
+ runner.optimizer.step()
+ # copy fp32 params to the fp16 model
+ self.copy_params_to_fp16(runner.model, fp32_weights)
+
+
+def wrap_fp16_model(model):
+ # convert model to fp16
+ model.half()
+ # patch the normalization layers to make it work in fp32 mode
+ patch_norm_fp32(model)
+ # set `fp16_enabled` flag
+ for m in model.modules():
+ if hasattr(m, 'fp16_enabled'):
+ m.fp16_enabled = True
+
+
+def patch_norm_fp32(module):
+ if isinstance(module, (nn.modules.batchnorm._BatchNorm, nn.GroupNorm)):
+ module.float()
+ module.forward = patch_forward_method(module.forward, torch.half,
+ torch.float)
+ for child in module.children():
+ patch_norm_fp32(child)
+ return module
+
+
+def patch_forward_method(func, src_type, dst_type, convert_output=True):
+ """Patch the forward method of a module.
+
+ Args:
+ func (callable): The original forward method.
+ src_type (torch.dtype): Type of input arguments to be converted from.
+ dst_type (torch.dtype): Type of input arguments to be converted to.
+ convert_output (bool): Whether to convert the output back to src_type.
+
+ Returns:
+ callable: The patched forward method.
+ """
+
+ def new_forward(*args, **kwargs):
+ output = func(*cast_tensor_type(args, src_type, dst_type),
+ **cast_tensor_type(kwargs, src_type, dst_type))
+ if convert_output:
+ output = cast_tensor_type(output, dst_type, src_type)
+ return output
+
+ return new_forward
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/fp16/utils.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/fp16/utils.py
new file mode 100644
index 000000000..ce691c799
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/fp16/utils.py
@@ -0,0 +1,23 @@
+from collections import abc
+
+import numpy as np
+import torch
+
+
+def cast_tensor_type(inputs, src_type, dst_type):
+ if isinstance(inputs, torch.Tensor):
+ return inputs.to(dst_type)
+ elif isinstance(inputs, str):
+ return inputs
+ elif isinstance(inputs, np.ndarray):
+ return inputs
+ elif isinstance(inputs, abc.Mapping):
+ return type(inputs)({
+ k: cast_tensor_type(v, src_type, dst_type)
+ for k, v in inputs.items()
+ })
+ elif isinstance(inputs, abc.Iterable):
+ return type(inputs)(
+ cast_tensor_type(item, src_type, dst_type) for item in inputs)
+ else:
+ return inputs
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/mask/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/mask/__init__.py
new file mode 100644
index 000000000..845e7180e
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/mask/__init__.py
@@ -0,0 +1,4 @@
+from .mask_target import mask_target
+from .utils import split_combined_polys
+
+__all__ = ['split_combined_polys', 'mask_target']
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/mask/mask_target.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/mask/mask_target.py
new file mode 100644
index 000000000..6603f11a4
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/mask/mask_target.py
@@ -0,0 +1,41 @@
+import mmcv
+import numpy as np
+import torch
+from torch.nn.modules.utils import _pair
+
+
+def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list,
+ cfg):
+ cfg_list = [cfg for _ in range(len(pos_proposals_list))]
+ mask_targets = map(mask_target_single, pos_proposals_list,
+ pos_assigned_gt_inds_list, gt_masks_list, cfg_list)
+ mask_targets = torch.cat(list(mask_targets))
+ return mask_targets
+
+
+def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):
+ mask_size = _pair(cfg.mask_size)
+ num_pos = pos_proposals.size(0)
+ mask_targets = []
+ if num_pos > 0:
+ proposals_np = pos_proposals.cpu().numpy()
+ _, maxh, maxw = gt_masks.shape
+ proposals_np[:, [0, 2]] = np.clip(proposals_np[:, [0, 2]], 0, maxw - 1)
+ proposals_np[:, [1, 3]] = np.clip(proposals_np[:, [1, 3]], 0, maxh - 1)
+ pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()
+ for i in range(num_pos):
+ gt_mask = gt_masks[pos_assigned_gt_inds[i]]
+ bbox = proposals_np[i, :].astype(np.int32)
+ x1, y1, x2, y2 = bbox
+ w = np.maximum(x2 - x1 + 1, 1)
+ h = np.maximum(y2 - y1 + 1, 1)
+ # mask is uint8 both before and after resizing
+ # mask_size (h, w) to (w, h)
+ target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w],
+ mask_size[::-1])
+ mask_targets.append(target)
+ mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to(
+ pos_proposals.device)
+ else:
+ mask_targets = pos_proposals.new_zeros((0, ) + mask_size)
+ return mask_targets
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/mask/utils.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/mask/utils.py
new file mode 100644
index 000000000..a68312b17
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/mask/utils.py
@@ -0,0 +1,30 @@
+import mmcv
+
+
+def split_combined_polys(polys, poly_lens, polys_per_mask):
+ """Split the combined 1-D polys into masks.
+
+ A mask is represented as a list of polys, and a poly is represented as
+ a 1-D array. In dataset, all masks are concatenated into a single 1-D
+ tensor. Here we need to split the tensor into original representations.
+
+ Args:
+ polys (list): a list (length = image num) of 1-D tensors
+ poly_lens (list): a list (length = image num) of poly length
+ polys_per_mask (list): a list (length = image num) of poly number
+ of each mask
+
+ Returns:
+ list: a list (length = image num) of list (length = mask num) of
+ list (length = poly num) of numpy array
+ """
+ mask_polys_list = []
+ for img_id in range(len(polys)):
+ polys_single = polys[img_id]
+ polys_lens_single = poly_lens[img_id].tolist()
+ polys_per_mask_single = polys_per_mask[img_id].tolist()
+
+ split_polys = mmcv.slice_list(polys_single, polys_lens_single)
+ mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single)
+ mask_polys_list.append(mask_polys)
+ return mask_polys_list
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/post_processing/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/post_processing/__init__.py
new file mode 100644
index 000000000..73fb1990c
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/post_processing/__init__.py
@@ -0,0 +1,9 @@
+from .bbox_nms import multiclass_nms
+from .matrix_nms import matrix_nms
+from .merge_augs import (merge_aug_bboxes, merge_aug_masks,
+ merge_aug_proposals, merge_aug_scores)
+
+__all__ = [
+ 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',
+ 'merge_aug_scores', 'merge_aug_masks', 'matrix_nms'
+]
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/post_processing/bbox_nms.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/post_processing/bbox_nms.py
new file mode 100644
index 000000000..ce3794c64
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/post_processing/bbox_nms.py
@@ -0,0 +1,66 @@
+import torch
+
+from mmdet.ops.nms import nms_wrapper
+
+
+def multiclass_nms(multi_bboxes,
+ multi_scores,
+ score_thr,
+ nms_cfg,
+ max_num=-1,
+ score_factors=None):
+ """NMS for multi-class bboxes.
+
+ Args:
+ multi_bboxes (Tensor): shape (n, #class*4) or (n, 4)
+ multi_scores (Tensor): shape (n, #class), where the 0th column
+ contains scores of the background class, but this will be ignored.
+ score_thr (float): bbox threshold, bboxes with scores lower than it
+ will not be considered.
+ nms_thr (float): NMS IoU threshold
+ max_num (int): if there are more than max_num bboxes after NMS,
+ only top max_num will be kept.
+ score_factors (Tensor): The factors multiplied to scores before
+ applying NMS
+
+ Returns:
+ tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels
+ are 0-based.
+ """
+ num_classes = multi_scores.shape[1]
+ bboxes, labels = [], []
+ nms_cfg_ = nms_cfg.copy()
+ nms_type = nms_cfg_.pop('type', 'nms')
+ nms_op = getattr(nms_wrapper, nms_type)
+ for i in range(1, num_classes):
+ cls_inds = multi_scores[:, i] > score_thr
+ if not cls_inds.any():
+ continue
+ # get bboxes and scores of this class
+ if multi_bboxes.shape[1] == 4:
+ _bboxes = multi_bboxes[cls_inds, :]
+ else:
+ _bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4]
+ _scores = multi_scores[cls_inds, i]
+ if score_factors is not None:
+ _scores *= score_factors[cls_inds]
+ cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1)
+ cls_dets, _ = nms_op(cls_dets, **nms_cfg_)
+ cls_labels = multi_bboxes.new_full((cls_dets.shape[0], ),
+ i - 1,
+ dtype=torch.long)
+ bboxes.append(cls_dets)
+ labels.append(cls_labels)
+ if bboxes:
+ bboxes = torch.cat(bboxes)
+ labels = torch.cat(labels)
+ if bboxes.shape[0] > max_num:
+ _, inds = bboxes[:, -1].sort(descending=True)
+ inds = inds[:max_num]
+ bboxes = bboxes[inds]
+ labels = labels[inds]
+ else:
+ bboxes = multi_bboxes.new_zeros((0, 5))
+ labels = multi_bboxes.new_zeros((0, ), dtype=torch.long)
+
+ return bboxes, labels
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/post_processing/matrix_nms.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/post_processing/matrix_nms.py
new file mode 100644
index 000000000..cbbe4209f
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/post_processing/matrix_nms.py
@@ -0,0 +1,117 @@
+import torch
+
+
+def matrix_nms(seg_masks, cate_labels, cate_scores, kernel='gaussian', sigma=2.0, sum_masks=None):
+ """Matrix NMS for multi-class masks.
+
+ Args:
+ seg_masks (Tensor): shape (n, h, w)
+ cate_labels (Tensor): shape (n), mask labels in descending order
+ cate_scores (Tensor): shape (n), mask scores in descending order
+ kernel (str): 'linear' or 'gauss'
+ sigma (float): std in gaussian method
+ sum_masks (Tensor): The sum of seg_masks
+
+ Returns:
+ Tensor: cate_scores_update, tensors of shape (n)
+ """
+ n_samples = len(cate_labels)
+ if n_samples == 0:
+ return []
+ if sum_masks is None:
+ sum_masks = seg_masks.sum((1, 2)).float()
+ seg_masks = seg_masks.reshape(n_samples, -1).float()
+ # inter.
+ inter_matrix = torch.mm(seg_masks, seg_masks.transpose(1, 0))
+ # union.
+ sum_masks_x = sum_masks.expand(n_samples, n_samples)
+ # iou.
+ iou_matrix = (inter_matrix / (sum_masks_x + sum_masks_x.transpose(1, 0) - inter_matrix)).triu(diagonal=1)
+ # label_specific matrix.
+ cate_labels_x = cate_labels.expand(n_samples, n_samples)
+ label_matrix = (cate_labels_x == cate_labels_x.transpose(1, 0)).float().triu(diagonal=1)
+
+ # IoU compensation
+ compensate_iou, _ = (iou_matrix * label_matrix).max(0)
+ compensate_iou = compensate_iou.expand(n_samples, n_samples).transpose(1, 0)
+
+ # IoU decay
+ decay_iou = iou_matrix * label_matrix
+
+ # matrix nms
+ if kernel == 'gaussian':
+ decay_matrix = torch.exp(-1 * sigma * (decay_iou ** 2))
+ compensate_matrix = torch.exp(-1 * sigma * (compensate_iou ** 2))
+ decay_coefficient, _ = (decay_matrix / compensate_matrix).min(0)
+ elif kernel == 'linear':
+ decay_matrix = (1-decay_iou)/(1-compensate_iou)
+ decay_coefficient, _ = decay_matrix.min(0)
+ else:
+ raise NotImplementedError
+
+ # update the score.
+ cate_scores_update = cate_scores * decay_coefficient
+ return cate_scores_update
+
+
+def multiclass_nms(multi_bboxes,
+ multi_scores,
+ score_thr,
+ nms_cfg,
+ max_num=-1,
+ score_factors=None):
+ """NMS for multi-class bboxes.
+
+ Args:
+ multi_bboxes (Tensor): shape (n, #class*4) or (n, 4)
+ multi_scores (Tensor): shape (n, #class), where the 0th column
+ contains scores of the background class, but this will be ignored.
+ score_thr (float): bbox threshold, bboxes with scores lower than it
+ will not be considered.
+ nms_thr (float): NMS IoU threshold
+ max_num (int): if there are more than max_num bboxes after NMS,
+ only top max_num will be kept.
+ score_factors (Tensor): The factors multiplied to scores before
+ applying NMS
+
+ Returns:
+ tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels
+ are 0-based.
+ """
+ num_classes = multi_scores.shape[1]
+ bboxes, labels = [], []
+ nms_cfg_ = nms_cfg.copy()
+ nms_type = nms_cfg_.pop('type', 'nms')
+ nms_op = getattr(nms_wrapper, nms_type)
+ for i in range(1, num_classes):
+ cls_inds = multi_scores[:, i] > score_thr
+ if not cls_inds.any():
+ continue
+ # get bboxes and scores of this class
+ if multi_bboxes.shape[1] == 4:
+ _bboxes = multi_bboxes[cls_inds, :]
+ else:
+ _bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4]
+ _scores = multi_scores[cls_inds, i]
+ if score_factors is not None:
+ _scores *= score_factors[cls_inds]
+ cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1)
+ cls_dets, _ = nms_op(cls_dets, **nms_cfg_)
+ cls_labels = multi_bboxes.new_full((cls_dets.shape[0], ),
+ i - 1,
+ dtype=torch.long)
+ bboxes.append(cls_dets)
+ labels.append(cls_labels)
+ if bboxes:
+ bboxes = torch.cat(bboxes)
+ labels = torch.cat(labels)
+ if bboxes.shape[0] > max_num:
+ _, inds = bboxes[:, -1].sort(descending=True)
+ inds = inds[:max_num]
+ bboxes = bboxes[inds]
+ labels = labels[inds]
+ else:
+ bboxes = multi_bboxes.new_zeros((0, 5))
+ labels = multi_bboxes.new_zeros((0, ), dtype=torch.long)
+
+ return bboxes, labels
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/post_processing/merge_augs.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/post_processing/merge_augs.py
new file mode 100644
index 000000000..a0214d63f
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/post_processing/merge_augs.py
@@ -0,0 +1,101 @@
+import numpy as np
+import torch
+
+from mmdet.ops import nms
+from ..bbox import bbox_mapping_back
+
+
+def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg):
+ """Merge augmented proposals (multiscale, flip, etc.)
+
+ Args:
+ aug_proposals (list[Tensor]): proposals from different testing
+ schemes, shape (n, 5). Note that they are not rescaled to the
+ original image size.
+
+ img_metas (list[dict]): list of image info dict where each dict has:
+ 'img_shape', 'scale_factor', 'flip', and my also contain
+ 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+ For details on the values of these keys see
+ `mmdet/datasets/pipelines/formatting.py:Collect`.
+
+ rpn_test_cfg (dict): rpn test config.
+
+ Returns:
+ Tensor: shape (n, 4), proposals corresponding to original image scale.
+ """
+ recovered_proposals = []
+ for proposals, img_info in zip(aug_proposals, img_metas):
+ img_shape = img_info['img_shape']
+ scale_factor = img_info['scale_factor']
+ flip = img_info['flip']
+ _proposals = proposals.clone()
+ _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape,
+ scale_factor, flip)
+ recovered_proposals.append(_proposals)
+ aug_proposals = torch.cat(recovered_proposals, dim=0)
+ merged_proposals, _ = nms(aug_proposals, rpn_test_cfg.nms_thr)
+ scores = merged_proposals[:, 4]
+ _, order = scores.sort(0, descending=True)
+ num = min(rpn_test_cfg.max_num, merged_proposals.shape[0])
+ order = order[:num]
+ merged_proposals = merged_proposals[order, :]
+ return merged_proposals
+
+
+def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg):
+ """Merge augmented detection bboxes and scores.
+
+ Args:
+ aug_bboxes (list[Tensor]): shape (n, 4*#class)
+ aug_scores (list[Tensor] or None): shape (n, #class)
+ img_shapes (list[Tensor]): shape (3, ).
+ rcnn_test_cfg (dict): rcnn test config.
+
+ Returns:
+ tuple: (bboxes, scores)
+ """
+ recovered_bboxes = []
+ for bboxes, img_info in zip(aug_bboxes, img_metas):
+ img_shape = img_info[0]['img_shape']
+ scale_factor = img_info[0]['scale_factor']
+ flip = img_info[0]['flip']
+ bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip)
+ recovered_bboxes.append(bboxes)
+ bboxes = torch.stack(recovered_bboxes).mean(dim=0)
+ if aug_scores is None:
+ return bboxes
+ else:
+ scores = torch.stack(aug_scores).mean(dim=0)
+ return bboxes, scores
+
+
+def merge_aug_scores(aug_scores):
+ """Merge augmented bbox scores."""
+ if isinstance(aug_scores[0], torch.Tensor):
+ return torch.mean(torch.stack(aug_scores), dim=0)
+ else:
+ return np.mean(aug_scores, axis=0)
+
+
+def merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None):
+ """Merge augmented mask prediction.
+
+ Args:
+ aug_masks (list[ndarray]): shape (n, #class, h, w)
+ img_shapes (list[ndarray]): shape (3, ).
+ rcnn_test_cfg (dict): rcnn test config.
+
+ Returns:
+ tuple: (bboxes, scores)
+ """
+ recovered_masks = [
+ mask if not img_info[0]['flip'] else mask[..., ::-1]
+ for mask, img_info in zip(aug_masks, img_metas)
+ ]
+ if weights is None:
+ merged_masks = np.mean(recovered_masks, axis=0)
+ else:
+ merged_masks = np.average(
+ np.array(recovered_masks), axis=0, weights=np.array(weights))
+ return merged_masks
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/utils/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/utils/__init__.py
new file mode 100644
index 000000000..cc999ea10
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/utils/__init__.py
@@ -0,0 +1,7 @@
+from .dist_utils import DistOptimizerHook, allreduce_grads
+from .misc import multi_apply, tensor2imgs, unmap
+
+__all__ = [
+ 'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap',
+ 'multi_apply'
+]
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/utils/dist_utils.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/utils/dist_utils.py
new file mode 100644
index 000000000..be830b6a2
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/utils/dist_utils.py
@@ -0,0 +1,58 @@
+from collections import OrderedDict
+
+import torch.distributed as dist
+from mmcv.runner import OptimizerHook
+from torch._utils import (_flatten_dense_tensors, _take_tensors,
+ _unflatten_dense_tensors)
+
+
+def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
+ if bucket_size_mb > 0:
+ bucket_size_bytes = bucket_size_mb * 1024 * 1024
+ buckets = _take_tensors(tensors, bucket_size_bytes)
+ else:
+ buckets = OrderedDict()
+ for tensor in tensors:
+ tp = tensor.type()
+ if tp not in buckets:
+ buckets[tp] = []
+ buckets[tp].append(tensor)
+ buckets = buckets.values()
+
+ for bucket in buckets:
+ flat_tensors = _flatten_dense_tensors(bucket)
+ dist.all_reduce(flat_tensors)
+ flat_tensors.div_(world_size)
+ for tensor, synced in zip(
+ bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
+ tensor.copy_(synced)
+
+
+def allreduce_grads(params, coalesce=True, bucket_size_mb=-1):
+ grads = [
+ param.grad.data for param in params
+ if param.requires_grad and param.grad is not None
+ ]
+ world_size = dist.get_world_size()
+ if coalesce:
+ _allreduce_coalesced(grads, world_size, bucket_size_mb)
+ else:
+ for tensor in grads:
+ dist.all_reduce(tensor.div_(world_size))
+
+
+class DistOptimizerHook(OptimizerHook):
+
+ def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1):
+ self.grad_clip = grad_clip
+ self.coalesce = coalesce
+ self.bucket_size_mb = bucket_size_mb
+
+ def after_train_iter(self, runner):
+ runner.optimizer.zero_grad()
+ runner.outputs['loss'].backward()
+ allreduce_grads(runner.model.parameters(), self.coalesce,
+ self.bucket_size_mb)
+ if self.grad_clip is not None:
+ self.clip_grads(runner.model.parameters())
+ runner.optimizer.step()
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/core/utils/misc.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/utils/misc.py
new file mode 100644
index 000000000..262f168e6
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/core/utils/misc.py
@@ -0,0 +1,37 @@
+from functools import partial
+
+import mmcv
+import numpy as np
+from six.moves import map, zip
+
+
+def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True):
+ num_imgs = tensor.size(0)
+ mean = np.array(mean, dtype=np.float32)
+ std = np.array(std, dtype=np.float32)
+ imgs = []
+ for img_id in range(num_imgs):
+ img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0)
+ img = mmcv.imdenormalize(
+ img, mean, std, to_bgr=to_rgb).astype(np.uint8)
+ imgs.append(np.ascontiguousarray(img))
+ return imgs
+
+
+def multi_apply(func, *args, **kwargs):
+ pfunc = partial(func, **kwargs) if kwargs else func
+ map_results = map(pfunc, *args)
+ return tuple(map(list, zip(*map_results)))
+
+
+def unmap(data, count, inds, fill=0):
+ """ Unmap a subset of item (data) back to the original set of items (of
+ size count) """
+ if data.dim() == 1:
+ ret = data.new_full((count, ), fill)
+ ret[inds] = data
+ else:
+ new_size = (count, ) + data.size()[1:]
+ ret = data.new_full(new_size, fill)
+ ret[inds, :] = data
+ return ret
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/__init__.py
new file mode 100644
index 000000000..7ad926d4c
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/__init__.py
@@ -0,0 +1,17 @@
+from .builder import build_dataset
+from .cityscapes import CityscapesDataset
+from .coco import CocoDataset
+from .custom import CustomDataset
+from .dataset_wrappers import ConcatDataset, RepeatDataset
+from .loader import DistributedGroupSampler, GroupSampler, build_dataloader
+from .registry import DATASETS
+from .voc import VOCDataset
+from .wider_face import WIDERFaceDataset
+from .xml_style import XMLDataset
+
+__all__ = [
+ 'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset',
+ 'CityscapesDataset', 'GroupSampler', 'DistributedGroupSampler',
+ 'build_dataloader', 'ConcatDataset', 'RepeatDataset', 'WIDERFaceDataset',
+ 'DATASETS', 'build_dataset'
+]
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/builder.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/builder.py
new file mode 100644
index 000000000..6e707b190
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/builder.py
@@ -0,0 +1,41 @@
+import copy
+
+from mmdet.utils import build_from_cfg
+from .dataset_wrappers import ConcatDataset, RepeatDataset
+from .registry import DATASETS
+
+
+def _concat_dataset(cfg, default_args=None):
+ ann_files = cfg['ann_file']
+ img_prefixes = cfg.get('img_prefix', None)
+ seg_prefixes = cfg.get('seg_prefix', None)
+ proposal_files = cfg.get('proposal_file', None)
+
+ datasets = []
+ num_dset = len(ann_files)
+ for i in range(num_dset):
+ data_cfg = copy.deepcopy(cfg)
+ data_cfg['ann_file'] = ann_files[i]
+ if isinstance(img_prefixes, (list, tuple)):
+ data_cfg['img_prefix'] = img_prefixes[i]
+ if isinstance(seg_prefixes, (list, tuple)):
+ data_cfg['seg_prefix'] = seg_prefixes[i]
+ if isinstance(proposal_files, (list, tuple)):
+ data_cfg['proposal_file'] = proposal_files[i]
+ datasets.append(build_dataset(data_cfg, default_args))
+
+ return ConcatDataset(datasets)
+
+
+def build_dataset(cfg, default_args=None):
+ if isinstance(cfg, (list, tuple)):
+ dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg])
+ elif cfg['type'] == 'RepeatDataset':
+ dataset = RepeatDataset(
+ build_dataset(cfg['dataset'], default_args), cfg['times'])
+ elif isinstance(cfg['ann_file'], (list, tuple)):
+ dataset = _concat_dataset(cfg, default_args)
+ else:
+ dataset = build_from_cfg(cfg, DATASETS, default_args)
+
+ return dataset
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/cityscapes.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/cityscapes.py
new file mode 100644
index 000000000..51ca04987
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/cityscapes.py
@@ -0,0 +1,9 @@
+from .coco import CocoDataset
+from .registry import DATASETS
+
+
+@DATASETS.register_module
+class CityscapesDataset(CocoDataset):
+
+ CLASSES = ('person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',
+ 'bicycle')
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/coco.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/coco.py
new file mode 100644
index 000000000..d041532ab
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/coco.py
@@ -0,0 +1,110 @@
+import numpy as np
+from pycocotools.coco import COCO
+
+from .custom import CustomDataset
+from .registry import DATASETS
+
+
+@DATASETS.register_module
+class CocoDataset(CustomDataset):
+
+ CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+ 'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
+ 'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
+ 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
+ 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+ 'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
+ 'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
+ 'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
+ 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+ 'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+ 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
+ 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
+ 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
+ 'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush')
+
+ def load_annotations(self, ann_file):
+ self.coco = COCO(ann_file)
+ self.cat_ids = self.coco.getCatIds()
+ self.cat2label = {
+ cat_id: i + 1
+ for i, cat_id in enumerate(self.cat_ids)
+ }
+ self.img_ids = self.coco.getImgIds()
+ img_infos = []
+ for i in self.img_ids:
+ info = self.coco.loadImgs([i])[0]
+ info['filename'] = info['file_name']
+ img_infos.append(info)
+ return img_infos
+
+ def get_ann_info(self, idx):
+ img_id = self.img_infos[idx]['id']
+ ann_ids = self.coco.getAnnIds(imgIds=[img_id])
+ ann_info = self.coco.loadAnns(ann_ids)
+ return self._parse_ann_info(self.img_infos[idx], ann_info)
+
+ def _filter_imgs(self, min_size=32):
+ """Filter images too small or without ground truths."""
+ valid_inds = []
+ ids_with_ann = set(_['image_id'] for _ in self.coco.anns.values())
+ for i, img_info in enumerate(self.img_infos):
+ if self.filter_empty_gt and self.img_ids[i] not in ids_with_ann:
+ continue
+ if min(img_info['width'], img_info['height']) >= min_size:
+ valid_inds.append(i)
+ return valid_inds
+
+ def _parse_ann_info(self, img_info, ann_info):
+ """Parse bbox and mask annotation.
+
+ Args:
+ ann_info (list[dict]): Annotation info of an image.
+ with_mask (bool): Whether to parse mask annotations.
+
+ Returns:
+ dict: A dict containing the following keys: bboxes, bboxes_ignore,
+ labels, masks, seg_map. "masks" are raw annotations and not
+ decoded into binary masks.
+ """
+ gt_bboxes = []
+ gt_labels = []
+ gt_bboxes_ignore = []
+ gt_masks_ann = []
+
+ for i, ann in enumerate(ann_info):
+ if ann.get('ignore', False):
+ continue
+ x1, y1, w, h = ann['bbox']
+ if ann['area'] <= 0 or w < 1 or h < 1:
+ continue
+ bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
+ if ann.get('iscrowd', False):
+ gt_bboxes_ignore.append(bbox)
+ else:
+ gt_bboxes.append(bbox)
+ gt_labels.append(self.cat2label[ann['category_id']])
+ gt_masks_ann.append(ann['segmentation'])
+
+ if gt_bboxes:
+ gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
+ gt_labels = np.array(gt_labels, dtype=np.int64)
+ else:
+ gt_bboxes = np.zeros((0, 4), dtype=np.float32)
+ gt_labels = np.array([], dtype=np.int64)
+
+ if gt_bboxes_ignore:
+ gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
+ else:
+ gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
+
+ seg_map = img_info['filename'].replace('jpg', 'png')
+
+ ann = dict(
+ bboxes=gt_bboxes,
+ labels=gt_labels,
+ bboxes_ignore=gt_bboxes_ignore,
+ masks=gt_masks_ann,
+ seg_map=seg_map)
+
+ return ann
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/custom.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/custom.py
new file mode 100644
index 000000000..935b39d2c
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/custom.py
@@ -0,0 +1,152 @@
+import os.path as osp
+
+import mmcv
+import numpy as np
+from torch.utils.data import Dataset
+
+from .pipelines import Compose
+from .registry import DATASETS
+
+
+@DATASETS.register_module
+class CustomDataset(Dataset):
+ """Custom dataset for detection.
+
+ Annotation format:
+ [
+ {
+ 'filename': 'a.jpg',
+ 'width': 1280,
+ 'height': 720,
+ 'ann': {
+ 'bboxes': (n, 4),
+ 'labels': (n, ),
+ 'bboxes_ignore': (k, 4), (optional field)
+ 'labels_ignore': (k, 4) (optional field)
+ }
+ },
+ ...
+ ]
+
+ The `ann` field is optional for testing.
+ """
+
+ CLASSES = None
+
+ def __init__(self,
+ ann_file,
+ pipeline,
+ data_root=None,
+ img_prefix='',
+ seg_prefix=None,
+ proposal_file=None,
+ test_mode=False,
+ filter_empty_gt=True):
+ self.ann_file = ann_file
+ self.data_root = data_root
+ self.img_prefix = img_prefix
+ self.seg_prefix = seg_prefix
+ self.proposal_file = proposal_file
+ self.test_mode = test_mode
+ self.filter_empty_gt = filter_empty_gt
+
+ # join paths if data_root is specified
+ if self.data_root is not None:
+ if not osp.isabs(self.ann_file):
+ self.ann_file = osp.join(self.data_root, self.ann_file)
+ if not (self.img_prefix is None or osp.isabs(self.img_prefix)):
+ self.img_prefix = osp.join(self.data_root, self.img_prefix)
+ if not (self.seg_prefix is None or osp.isabs(self.seg_prefix)):
+ self.seg_prefix = osp.join(self.data_root, self.seg_prefix)
+ if not (self.proposal_file is None
+ or osp.isabs(self.proposal_file)):
+ self.proposal_file = osp.join(self.data_root,
+ self.proposal_file)
+ # load annotations (and proposals)
+ self.img_infos = self.load_annotations(self.ann_file)
+ if self.proposal_file is not None:
+ self.proposals = self.load_proposals(self.proposal_file)
+ else:
+ self.proposals = None
+ # filter images too small
+ if not test_mode:
+ valid_inds = self._filter_imgs()
+ self.img_infos = [self.img_infos[i] for i in valid_inds]
+ if self.proposals is not None:
+ self.proposals = [self.proposals[i] for i in valid_inds]
+ # set group flag for the sampler
+ if not self.test_mode:
+ self._set_group_flag()
+ # processing pipeline
+ self.pipeline = Compose(pipeline)
+
+ def __len__(self):
+ return len(self.img_infos)
+
+ def load_annotations(self, ann_file):
+ return mmcv.load(ann_file)
+
+ def load_proposals(self, proposal_file):
+ return mmcv.load(proposal_file)
+
+ def get_ann_info(self, idx):
+ return self.img_infos[idx]['ann']
+
+ def pre_pipeline(self, results):
+ results['img_prefix'] = self.img_prefix
+ results['seg_prefix'] = self.seg_prefix
+ results['proposal_file'] = self.proposal_file
+ results['bbox_fields'] = []
+ results['mask_fields'] = []
+ results['seg_fields'] = []
+
+ def _filter_imgs(self, min_size=32):
+ """Filter images too small."""
+ valid_inds = []
+ for i, img_info in enumerate(self.img_infos):
+ if min(img_info['width'], img_info['height']) >= min_size:
+ valid_inds.append(i)
+ return valid_inds
+
+ def _set_group_flag(self):
+ """Set flag according to image aspect ratio.
+
+ Images with aspect ratio greater than 1 will be set as group 1,
+ otherwise group 0.
+ """
+ self.flag = np.zeros(len(self), dtype=np.uint8)
+ for i in range(len(self)):
+ img_info = self.img_infos[i]
+ if img_info['width'] / img_info['height'] > 1:
+ self.flag[i] = 1
+
+ def _rand_another(self, idx):
+ pool = np.where(self.flag == self.flag[idx])[0]
+ return np.random.choice(pool)
+
+ def __getitem__(self, idx):
+ if self.test_mode:
+ return self.prepare_test_img(idx)
+ while True:
+ data = self.prepare_train_img(idx)
+ if data is None:
+ idx = self._rand_another(idx)
+ continue
+ return data
+
+ def prepare_train_img(self, idx):
+ img_info = self.img_infos[idx]
+ ann_info = self.get_ann_info(idx)
+ results = dict(img_info=img_info, ann_info=ann_info)
+ if self.proposals is not None:
+ results['proposals'] = self.proposals[idx]
+ self.pre_pipeline(results)
+ return self.pipeline(results)
+
+ def prepare_test_img(self, idx):
+ img_info = self.img_infos[idx]
+ results = dict(img_info=img_info)
+ if self.proposals is not None:
+ results['proposals'] = self.proposals[idx]
+ self.pre_pipeline(results)
+ return self.pipeline(results)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/dataset_wrappers.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/dataset_wrappers.py
new file mode 100644
index 000000000..e749cb076
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/dataset_wrappers.py
@@ -0,0 +1,55 @@
+import numpy as np
+from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
+
+from .registry import DATASETS
+
+
+@DATASETS.register_module
+class ConcatDataset(_ConcatDataset):
+ """A wrapper of concatenated dataset.
+
+ Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but
+ concat the group flag for image aspect ratio.
+
+ Args:
+ datasets (list[:obj:`Dataset`]): A list of datasets.
+ """
+
+ def __init__(self, datasets):
+ super(ConcatDataset, self).__init__(datasets)
+ self.CLASSES = datasets[0].CLASSES
+ if hasattr(datasets[0], 'flag'):
+ flags = []
+ for i in range(0, len(datasets)):
+ flags.append(datasets[i].flag)
+ self.flag = np.concatenate(flags)
+
+
+@DATASETS.register_module
+class RepeatDataset(object):
+ """A wrapper of repeated dataset.
+
+ The length of repeated dataset will be `times` larger than the original
+ dataset. This is useful when the data loading time is long but the dataset
+ is small. Using RepeatDataset can reduce the data loading time between
+ epochs.
+
+ Args:
+ dataset (:obj:`Dataset`): The dataset to be repeated.
+ times (int): Repeat times.
+ """
+
+ def __init__(self, dataset, times):
+ self.dataset = dataset
+ self.times = times
+ self.CLASSES = dataset.CLASSES
+ if hasattr(self.dataset, 'flag'):
+ self.flag = np.tile(self.dataset.flag, times)
+
+ self._ori_len = len(self.dataset)
+
+ def __getitem__(self, idx):
+ return self.dataset[idx % self._ori_len]
+
+ def __len__(self):
+ return self.times * self._ori_len
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/loader/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/loader/__init__.py
new file mode 100644
index 000000000..4404615be
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/loader/__init__.py
@@ -0,0 +1,4 @@
+from .build_loader import build_dataloader
+from .sampler import DistributedGroupSampler, GroupSampler
+
+__all__ = ['GroupSampler', 'DistributedGroupSampler', 'build_dataloader']
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/loader/build_loader.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/loader/build_loader.py
new file mode 100644
index 000000000..e9431d7ba
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/loader/build_loader.py
@@ -0,0 +1,70 @@
+import platform
+from functools import partial
+
+from mmcv.parallel import collate
+from mmcv.runner import get_dist_info
+from torch.utils.data import DataLoader
+
+from .sampler import DistributedGroupSampler, DistributedSampler, GroupSampler
+
+if platform.system() != 'Windows':
+ # https://github.com/pytorch/pytorch/issues/973
+ import resource
+ rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
+ resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1]))
+
+
+def build_dataloader(dataset,
+ imgs_per_gpu,
+ workers_per_gpu,
+ num_gpus=1,
+ dist=True,
+ shuffle=True,
+ **kwargs):
+ """Build PyTorch DataLoader.
+
+ In distributed training, each GPU/process has a dataloader.
+ In non-distributed training, there is only one dataloader for all GPUs.
+
+ Args:
+ dataset (Dataset): A PyTorch dataset.
+ imgs_per_gpu (int): Number of images on each GPU, i.e., batch size of
+ each GPU.
+ workers_per_gpu (int): How many subprocesses to use for data loading
+ for each GPU.
+ num_gpus (int): Number of GPUs. Only used in non-distributed training.
+ dist (bool): Distributed training/test or not. Default: True.
+ shuffle (bool): Whether to shuffle the data at every epoch.
+ Default: True.
+ kwargs: any keyword argument to be used to initialize DataLoader
+
+ Returns:
+ DataLoader: A PyTorch dataloader.
+ """
+ if dist:
+ rank, world_size = get_dist_info()
+ # DistributedGroupSampler will definitely shuffle the data to satisfy
+ # that images on each GPU are in the same group
+ if shuffle:
+ sampler = DistributedGroupSampler(dataset, imgs_per_gpu,
+ world_size, rank)
+ else:
+ sampler = DistributedSampler(
+ dataset, world_size, rank, shuffle=False)
+ batch_size = imgs_per_gpu
+ num_workers = workers_per_gpu
+ else:
+ sampler = GroupSampler(dataset, imgs_per_gpu) if shuffle else None
+ batch_size = num_gpus * imgs_per_gpu
+ num_workers = num_gpus * workers_per_gpu
+
+ data_loader = DataLoader(
+ dataset,
+ batch_size=batch_size,
+ sampler=sampler,
+ num_workers=num_workers,
+ collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu),
+ pin_memory=False,
+ **kwargs)
+
+ return data_loader
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/loader/sampler.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/loader/sampler.py
new file mode 100644
index 000000000..f3dd99620
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/loader/sampler.py
@@ -0,0 +1,164 @@
+from __future__ import division
+import math
+
+import numpy as np
+import torch
+from mmcv.runner import get_dist_info
+from torch.utils.data import DistributedSampler as _DistributedSampler
+from torch.utils.data import Sampler
+
+
+class DistributedSampler(_DistributedSampler):
+
+ def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
+ super().__init__(dataset, num_replicas=num_replicas, rank=rank)
+ self.shuffle = shuffle
+
+ def __iter__(self):
+ # deterministically shuffle based on epoch
+ if self.shuffle:
+ g = torch.Generator()
+ g.manual_seed(self.epoch)
+ indices = torch.randperm(len(self.dataset), generator=g).tolist()
+ else:
+ indices = torch.arange(len(self.dataset)).tolist()
+
+ # add extra samples to make it evenly divisible
+ indices += indices[:(self.total_size - len(indices))]
+ assert len(indices) == self.total_size
+
+ # subsample
+ indices = indices[self.rank:self.total_size:self.num_replicas]
+ assert len(indices) == self.num_samples
+
+ return iter(indices)
+
+
+class GroupSampler(Sampler):
+
+ def __init__(self, dataset, samples_per_gpu=1):
+ assert hasattr(dataset, 'flag')
+ self.dataset = dataset
+ self.samples_per_gpu = samples_per_gpu
+ self.flag = dataset.flag.astype(np.int64)
+ self.group_sizes = np.bincount(self.flag)
+ self.num_samples = 0
+ for i, size in enumerate(self.group_sizes):
+ self.num_samples += int(np.ceil(
+ size / self.samples_per_gpu)) * self.samples_per_gpu
+
+ def __iter__(self):
+ indices = []
+ for i, size in enumerate(self.group_sizes):
+ if size == 0:
+ continue
+ indice = np.where(self.flag == i)[0]
+ assert len(indice) == size
+ np.random.shuffle(indice)
+ num_extra = int(np.ceil(size / self.samples_per_gpu)
+ ) * self.samples_per_gpu - len(indice)
+ indice = np.concatenate(
+ [indice, np.random.choice(indice, num_extra)])
+ indices.append(indice)
+ indices = np.concatenate(indices)
+ indices = [
+ indices[i * self.samples_per_gpu:(i + 1) * self.samples_per_gpu]
+ for i in np.random.permutation(
+ range(len(indices) // self.samples_per_gpu))
+ ]
+ indices = np.concatenate(indices)
+ indices = indices.astype(np.int64).tolist()
+ assert len(indices) == self.num_samples
+ return iter(indices)
+
+ def __len__(self):
+ return self.num_samples
+
+
+class DistributedGroupSampler(Sampler):
+ """Sampler that restricts data loading to a subset of the dataset.
+ It is especially useful in conjunction with
+ :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
+ process can pass a DistributedSampler instance as a DataLoader sampler,
+ and load a subset of the original dataset that is exclusive to it.
+ .. note::
+ Dataset is assumed to be of constant size.
+ Arguments:
+ dataset: Dataset used for sampling.
+ num_replicas (optional): Number of processes participating in
+ distributed training.
+ rank (optional): Rank of the current process within num_replicas.
+ """
+
+ def __init__(self,
+ dataset,
+ samples_per_gpu=1,
+ num_replicas=None,
+ rank=None):
+ _rank, _num_replicas = get_dist_info()
+ if num_replicas is None:
+ num_replicas = _num_replicas
+ if rank is None:
+ rank = _rank
+ self.dataset = dataset
+ self.samples_per_gpu = samples_per_gpu
+ self.num_replicas = num_replicas
+ self.rank = rank
+ self.epoch = 0
+
+ assert hasattr(self.dataset, 'flag')
+ self.flag = self.dataset.flag
+ self.group_sizes = np.bincount(self.flag)
+
+ self.num_samples = 0
+ for i, j in enumerate(self.group_sizes):
+ self.num_samples += int(
+ math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu /
+ self.num_replicas)) * self.samples_per_gpu
+ self.total_size = self.num_samples * self.num_replicas
+
+ def __iter__(self):
+ # deterministically shuffle based on epoch
+ g = torch.Generator()
+ g.manual_seed(self.epoch)
+
+ indices = []
+ for i, size in enumerate(self.group_sizes):
+ if size > 0:
+ indice = np.where(self.flag == i)[0]
+ assert len(indice) == size
+ indice = indice[list(torch.randperm(int(size),
+ generator=g))].tolist()
+ extra = int(
+ math.ceil(
+ size * 1.0 / self.samples_per_gpu / self.num_replicas)
+ ) * self.samples_per_gpu * self.num_replicas - len(indice)
+ # pad indice
+ tmp = indice.copy()
+ for _ in range(extra // size):
+ indice.extend(tmp)
+ indice.extend(tmp[:extra % size])
+ indices.extend(indice)
+
+ assert len(indices) == self.total_size
+
+ indices = [
+ indices[j] for i in list(
+ torch.randperm(
+ len(indices) // self.samples_per_gpu, generator=g))
+ for j in range(i * self.samples_per_gpu, (i + 1) *
+ self.samples_per_gpu)
+ ]
+
+ # subsample
+ offset = self.num_samples * self.rank
+ indices = indices[offset:offset + self.num_samples]
+ assert len(indices) == self.num_samples
+
+ return iter(indices)
+
+ def __len__(self):
+ return self.num_samples
+
+ def set_epoch(self, epoch):
+ self.epoch = epoch
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/__init__.py
new file mode 100644
index 000000000..fca8d984c
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/__init__.py
@@ -0,0 +1,17 @@
+from .compose import Compose
+from .formating import (Collect, ImageToTensor, ToDataContainer, ToTensor,
+ Transpose, to_tensor)
+from .instaboost import InstaBoost
+from .loading import LoadAnnotations, LoadImageFromFile, LoadProposals
+from .test_aug import MultiScaleFlipAug
+from .transforms import (Albu, Expand, MinIoURandomCrop, Normalize, Pad,
+ PhotoMetricDistortion, RandomCrop, RandomFlip, Resize,
+ SegRescale)
+
+__all__ = [
+ 'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer',
+ 'Transpose', 'Collect', 'LoadAnnotations', 'LoadImageFromFile',
+ 'LoadProposals', 'MultiScaleFlipAug', 'Resize', 'RandomFlip', 'Pad',
+ 'RandomCrop', 'Normalize', 'SegRescale', 'MinIoURandomCrop', 'Expand',
+ 'PhotoMetricDistortion', 'Albu', 'InstaBoost'
+]
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/compose.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/compose.py
new file mode 100644
index 000000000..f160eed97
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/compose.py
@@ -0,0 +1,35 @@
+import collections
+
+from mmdet.utils import build_from_cfg
+from ..registry import PIPELINES
+
+
+@PIPELINES.register_module
+class Compose(object):
+
+ def __init__(self, transforms):
+ assert isinstance(transforms, collections.abc.Sequence)
+ self.transforms = []
+ for transform in transforms:
+ if isinstance(transform, dict):
+ transform = build_from_cfg(transform, PIPELINES)
+ self.transforms.append(transform)
+ elif callable(transform):
+ self.transforms.append(transform)
+ else:
+ raise TypeError('transform must be callable or a dict')
+
+ def __call__(self, data):
+ for t in self.transforms:
+ data = t(data)
+ if data is None:
+ return None
+ return data
+
+ def __repr__(self):
+ format_string = self.__class__.__name__ + '('
+ for t in self.transforms:
+ format_string += '\n'
+ format_string += ' {0}'.format(t)
+ format_string += '\n)'
+ return format_string
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/formating.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/formating.py
new file mode 100644
index 000000000..e14dd0a97
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/formating.py
@@ -0,0 +1,192 @@
+from collections.abc import Sequence
+
+import mmcv
+import numpy as np
+import torch
+from mmcv.parallel import DataContainer as DC
+
+from ..registry import PIPELINES
+
+
+def to_tensor(data):
+ """Convert objects of various python types to :obj:`torch.Tensor`.
+
+ Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
+ :class:`Sequence`, :class:`int` and :class:`float`.
+ """
+ if isinstance(data, torch.Tensor):
+ return data
+ elif isinstance(data, np.ndarray):
+ return torch.from_numpy(data)
+ elif isinstance(data, Sequence) and not mmcv.is_str(data):
+ return torch.tensor(data)
+ elif isinstance(data, int):
+ return torch.LongTensor([data])
+ elif isinstance(data, float):
+ return torch.FloatTensor([data])
+ else:
+ raise TypeError('type {} cannot be converted to tensor.'.format(
+ type(data)))
+
+
+@PIPELINES.register_module
+class ToTensor(object):
+
+ def __init__(self, keys):
+ self.keys = keys
+
+ def __call__(self, results):
+ for key in self.keys:
+ results[key] = to_tensor(results[key])
+ return results
+
+ def __repr__(self):
+ return self.__class__.__name__ + '(keys={})'.format(self.keys)
+
+
+@PIPELINES.register_module
+class ImageToTensor(object):
+
+ def __init__(self, keys):
+ self.keys = keys
+
+ def __call__(self, results):
+ for key in self.keys:
+ img = results[key]
+ if len(img.shape) < 3:
+ img = np.expand_dims(img, -1)
+ results[key] = to_tensor(img.transpose(2, 0, 1))
+ return results
+
+ def __repr__(self):
+ return self.__class__.__name__ + '(keys={})'.format(self.keys)
+
+
+@PIPELINES.register_module
+class Transpose(object):
+
+ def __init__(self, keys, order):
+ self.keys = keys
+ self.order = order
+
+ def __call__(self, results):
+ for key in self.keys:
+ results[key] = results[key].transpose(self.order)
+ return results
+
+ def __repr__(self):
+ return self.__class__.__name__ + '(keys={}, order={})'.format(
+ self.keys, self.order)
+
+
+@PIPELINES.register_module
+class ToDataContainer(object):
+
+ def __init__(self,
+ fields=(dict(key='img', stack=True), dict(key='gt_bboxes'),
+ dict(key='gt_labels'))):
+ self.fields = fields
+
+ def __call__(self, results):
+ for field in self.fields:
+ field = field.copy()
+ key = field.pop('key')
+ results[key] = DC(results[key], **field)
+ return results
+
+ def __repr__(self):
+ return self.__class__.__name__ + '(fields={})'.format(self.fields)
+
+
+@PIPELINES.register_module
+class DefaultFormatBundle(object):
+ """Default formatting bundle.
+
+ It simplifies the pipeline of formatting common fields, including "img",
+ "proposals", "gt_bboxes", "gt_labels", "gt_masks" and "gt_semantic_seg".
+ These fields are formatted as follows.
+
+ - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)
+ - proposals: (1)to tensor, (2)to DataContainer
+ - gt_bboxes: (1)to tensor, (2)to DataContainer
+ - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer
+ - gt_labels: (1)to tensor, (2)to DataContainer
+ - gt_masks: (1)to tensor, (2)to DataContainer (cpu_only=True)
+ - gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor,
+ (3)to DataContainer (stack=True)
+ """
+
+ def __call__(self, results):
+ if 'img' in results:
+ img = results['img']
+ if len(img.shape) < 3:
+ img = np.expand_dims(img, -1)
+ img = np.ascontiguousarray(img.transpose(2, 0, 1))
+ results['img'] = DC(to_tensor(img), stack=True)
+ for key in ['proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_labels']:
+ if key not in results:
+ continue
+ results[key] = DC(to_tensor(results[key]))
+ if 'gt_masks' in results:
+ results['gt_masks'] = DC(results['gt_masks'], cpu_only=True)
+ if 'gt_semantic_seg' in results:
+ results['gt_semantic_seg'] = DC(
+ to_tensor(results['gt_semantic_seg'][None, ...]), stack=True)
+ return results
+
+ def __repr__(self):
+ return self.__class__.__name__
+
+
+@PIPELINES.register_module
+class Collect(object):
+ """
+ Collect data from the loader relevant to the specific task.
+
+ This is usually the last stage of the data loader pipeline. Typically keys
+ is set to some subset of "img", "proposals", "gt_bboxes",
+ "gt_bboxes_ignore", "gt_labels", and/or "gt_masks".
+
+ The "img_meta" item is always populated. The contents of the "img_meta"
+ dictionary depends on "meta_keys". By default this includes:
+
+ - "img_shape": shape of the image input to the network as a tuple
+ (h, w, c). Note that images may be zero padded on the bottom/right
+ if the batch tensor is larger than this shape.
+
+ - "scale_factor": a float indicating the preprocessing scale
+
+ - "flip": a boolean indicating if image flip transform was used
+
+ - "filename": path to the image file
+
+ - "ori_shape": original shape of the image as a tuple (h, w, c)
+
+ - "pad_shape": image shape after padding
+
+ - "img_norm_cfg": a dict of normalization information:
+ - mean - per channel mean subtraction
+ - std - per channel std divisor
+ - to_rgb - bool indicating if bgr was converted to rgb
+ """
+
+ def __init__(self,
+ keys,
+ meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape',
+ 'scale_factor', 'flip', 'img_norm_cfg')):
+ self.keys = keys
+ self.meta_keys = meta_keys
+
+ def __call__(self, results):
+ data = {}
+ img_meta = {}
+ for key in self.meta_keys:
+ img_meta[key] = results[key]
+ data['img_meta'] = DC(img_meta, cpu_only=True)
+ for key in self.keys:
+ data[key] = results[key]
+ return data
+
+ def __repr__(self):
+ return self.__class__.__name__ + '(keys={}, meta_keys={})'.format(
+ self.keys, self.meta_keys)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/instaboost.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/instaboost.py
new file mode 100644
index 000000000..6777d4425
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/instaboost.py
@@ -0,0 +1,91 @@
+import numpy as np
+
+from ..registry import PIPELINES
+
+
+@PIPELINES.register_module
+class InstaBoost(object):
+ """
+ Data augmentation method in paper "InstaBoost: Boosting Instance
+ Segmentation Via Probability Map Guided Copy-Pasting"
+ Implementation details can refer to https://github.com/GothicAi/Instaboost.
+ """
+
+ def __init__(self,
+ action_candidate=('normal', 'horizontal', 'skip'),
+ action_prob=(1, 0, 0),
+ scale=(0.8, 1.2),
+ dx=15,
+ dy=15,
+ theta=(-1, 1),
+ color_prob=0.5,
+ hflag=False,
+ aug_ratio=0.5):
+ try:
+ import instaboostfast as instaboost
+ except ImportError:
+ raise ImportError(
+ 'Please run "pip install instaboostfast" '
+ 'to install instaboostfast first for instaboost augmentation.')
+ self.cfg = instaboost.InstaBoostConfig(action_candidate, action_prob,
+ scale, dx, dy, theta,
+ color_prob, hflag)
+ self.aug_ratio = aug_ratio
+
+ def _load_anns(self, results):
+ labels = results['ann_info']['labels']
+ masks = results['ann_info']['masks']
+ bboxes = results['ann_info']['bboxes']
+ n = len(labels)
+
+ anns = []
+ for i in range(n):
+ label = labels[i]
+ bbox = bboxes[i]
+ mask = masks[i]
+ x1, y1, x2, y2 = bbox
+ bbox = [x1, y1, x2 - x1 + 1, y2 - y1 + 1]
+ anns.append({
+ 'category_id': label,
+ 'segmentation': mask,
+ 'bbox': bbox
+ })
+
+ return anns
+
+ def _parse_anns(self, results, anns, img):
+ gt_bboxes = []
+ gt_labels = []
+ gt_masks_ann = []
+ for ann in anns:
+ x1, y1, w, h = ann['bbox']
+ bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
+ gt_bboxes.append(bbox)
+ gt_labels.append(ann['category_id'])
+ gt_masks_ann.append(ann['segmentation'])
+ gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
+ gt_labels = np.array(gt_labels, dtype=np.int64)
+ results['ann_info']['labels'] = gt_labels
+ results['ann_info']['bboxes'] = gt_bboxes
+ results['ann_info']['masks'] = gt_masks_ann
+ results['img'] = img
+ return results
+
+ def __call__(self, results):
+ img = results['img']
+ anns = self._load_anns(results)
+ if np.random.choice([0, 1], p=[1 - self.aug_ratio, self.aug_ratio]):
+ try:
+ import instaboostfast as instaboost
+ except ImportError:
+ raise ImportError('Please run "pip install instaboostfast" '
+ 'to install instaboostfast first.')
+ anns, img = instaboost.get_new_data(
+ anns, img, self.cfg, background=None)
+ results = self._parse_anns(results, anns, img)
+ return results
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__
+ repr_str += ('(cfg={}, aug_ratio={})').format(self.cfg, self.aug_ratio)
+ return repr_str
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/loading.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/loading.py
new file mode 100644
index 000000000..190773b15
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/loading.py
@@ -0,0 +1,144 @@
+import os.path as osp
+
+import mmcv
+import numpy as np
+import pycocotools.mask as maskUtils
+
+from ..registry import PIPELINES
+
+
+@PIPELINES.register_module
+class LoadImageFromFile(object):
+
+ def __init__(self, to_float32=False, color_type='color'):
+ self.to_float32 = to_float32
+ self.color_type = color_type
+
+ def __call__(self, results):
+ if results['img_prefix'] is not None:
+ filename = osp.join(results['img_prefix'],
+ results['img_info']['filename'])
+ else:
+ filename = results['img_info']['filename']
+ img = mmcv.imread(filename, self.color_type)
+ if self.to_float32:
+ img = img.astype(np.float32)
+ results['filename'] = filename
+ results['img'] = img
+ results['img_shape'] = img.shape
+ results['ori_shape'] = img.shape
+ return results
+
+ def __repr__(self):
+ return '{} (to_float32={}, color_type={})'.format(
+ self.__class__.__name__, self.to_float32, self.color_type)
+
+
+@PIPELINES.register_module
+class LoadAnnotations(object):
+
+ def __init__(self,
+ with_bbox=True,
+ with_label=True,
+ with_mask=False,
+ with_seg=False,
+ poly2mask=True):
+ self.with_bbox = with_bbox
+ self.with_label = with_label
+ self.with_mask = with_mask
+ self.with_seg = with_seg
+ self.poly2mask = poly2mask
+
+ def _load_bboxes(self, results):
+ ann_info = results['ann_info']
+ results['gt_bboxes'] = ann_info['bboxes']
+
+ gt_bboxes_ignore = ann_info.get('bboxes_ignore', None)
+ if gt_bboxes_ignore is not None:
+ results['gt_bboxes_ignore'] = gt_bboxes_ignore
+ results['bbox_fields'].append('gt_bboxes_ignore')
+ results['bbox_fields'].append('gt_bboxes')
+ return results
+
+ def _load_labels(self, results):
+ results['gt_labels'] = results['ann_info']['labels']
+ return results
+
+ def _poly2mask(self, mask_ann, img_h, img_w):
+ if isinstance(mask_ann, list):
+ # polygon -- a single object might consist of multiple parts
+ # we merge all parts into one mask rle code
+ rles = maskUtils.frPyObjects(mask_ann, img_h, img_w)
+ rle = maskUtils.merge(rles)
+ elif isinstance(mask_ann['counts'], list):
+ # uncompressed RLE
+ rle = maskUtils.frPyObjects(mask_ann, img_h, img_w)
+ else:
+ # rle
+ rle = mask_ann
+ mask = maskUtils.decode(rle)
+ return mask
+
+ def _load_masks(self, results):
+ h, w = results['img_info']['height'], results['img_info']['width']
+ gt_masks = results['ann_info']['masks']
+ if self.poly2mask:
+ gt_masks = [self._poly2mask(mask, h, w) for mask in gt_masks]
+ results['gt_masks'] = gt_masks
+ results['mask_fields'].append('gt_masks')
+ return results
+
+ def _load_semantic_seg(self, results):
+ results['gt_semantic_seg'] = mmcv.imread(
+ osp.join(results['seg_prefix'], results['ann_info']['seg_map']),
+ flag='unchanged').squeeze()
+ results['seg_fields'].append('gt_semantic_seg')
+ return results
+
+ def __call__(self, results):
+ if self.with_bbox:
+ results = self._load_bboxes(results)
+ if results is None:
+ return None
+ if self.with_label:
+ results = self._load_labels(results)
+ if self.with_mask:
+ results = self._load_masks(results)
+ if self.with_seg:
+ results = self._load_semantic_seg(results)
+ return results
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__
+ repr_str += ('(with_bbox={}, with_label={}, with_mask={},'
+ ' with_seg={})').format(self.with_bbox, self.with_label,
+ self.with_mask, self.with_seg)
+ return repr_str
+
+
+@PIPELINES.register_module
+class LoadProposals(object):
+
+ def __init__(self, num_max_proposals=None):
+ self.num_max_proposals = num_max_proposals
+
+ def __call__(self, results):
+ proposals = results['proposals']
+ if proposals.shape[1] not in (4, 5):
+ raise AssertionError(
+ 'proposals should have shapes (n, 4) or (n, 5), '
+ 'but found {}'.format(proposals.shape))
+ proposals = proposals[:, :4]
+
+ if self.num_max_proposals is not None:
+ proposals = proposals[:self.num_max_proposals]
+
+ if len(proposals) == 0:
+ proposals = np.array([[0, 0, 0, 0]], dtype=np.float32)
+ results['proposals'] = proposals
+ results['bbox_fields'].append('proposals')
+ return results
+
+ def __repr__(self):
+ return self.__class__.__name__ + '(num_max_proposals={})'.format(
+ self.num_max_proposals)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/test_aug.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/test_aug.py
new file mode 100644
index 000000000..b5d218075
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/test_aug.py
@@ -0,0 +1,38 @@
+import mmcv
+
+from ..registry import PIPELINES
+from .compose import Compose
+
+
+@PIPELINES.register_module
+class MultiScaleFlipAug(object):
+
+ def __init__(self, transforms, img_scale, flip=False):
+ self.transforms = Compose(transforms)
+ self.img_scale = img_scale if isinstance(img_scale,
+ list) else [img_scale]
+ assert mmcv.is_list_of(self.img_scale, tuple)
+ self.flip = flip
+
+ def __call__(self, results):
+ aug_data = []
+ flip_aug = [False, True] if self.flip else [False]
+ for scale in self.img_scale:
+ for flip in flip_aug:
+ _results = results.copy()
+ _results['scale'] = scale
+ _results['flip'] = flip
+ data = self.transforms(_results)
+ aug_data.append(data)
+ # list of dict to dict of list
+ aug_data_dict = {key: [] for key in aug_data[0]}
+ for data in aug_data:
+ for key, val in data.items():
+ aug_data_dict[key].append(val)
+ return aug_data_dict
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__
+ repr_str += '(transforms={}, img_scale={}, flip={})'.format(
+ self.transforms, self.img_scale, self.flip)
+ return repr_str
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/transforms.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/transforms.py
new file mode 100644
index 000000000..58c1c2131
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/transforms.py
@@ -0,0 +1,876 @@
+import inspect
+
+import mmcv
+import numpy as np
+from numpy import random
+
+from mmdet.core.evaluation.bbox_overlaps import bbox_overlaps
+from ..registry import PIPELINES
+
+try:
+ from imagecorruptions import corrupt
+except ImportError:
+ corrupt = None
+
+try:
+ import albumentations
+ from albumentations import Compose
+except ImportError:
+ albumentations = None
+ Compose = None
+
+
+@PIPELINES.register_module
+class Resize(object):
+ """Resize images & bbox & mask.
+
+ This transform resizes the input image to some scale. Bboxes and masks are
+ then resized with the same scale factor. If the input dict contains the key
+ "scale", then the scale in the input dict is used, otherwise the specified
+ scale in the init method is used.
+
+ `img_scale` can either be a tuple (single-scale) or a list of tuple
+ (multi-scale). There are 3 multiscale modes:
+ - `ratio_range` is not None: randomly sample a ratio from the ratio range
+ and multiply it with the image scale.
+ - `ratio_range` is None and `multiscale_mode` == "range": randomly sample a
+ scale from the a range.
+ - `ratio_range` is None and `multiscale_mode` == "value": randomly sample a
+ scale from multiple scales.
+
+ Args:
+ img_scale (tuple or list[tuple]): Images scales for resizing.
+ multiscale_mode (str): Either "range" or "value".
+ ratio_range (tuple[float]): (min_ratio, max_ratio)
+ keep_ratio (bool): Whether to keep the aspect ratio when resizing the
+ image.
+ """
+
+ def __init__(self,
+ img_scale=None,
+ multiscale_mode='range',
+ ratio_range=None,
+ keep_ratio=True):
+ if img_scale is None:
+ self.img_scale = None
+ else:
+ if isinstance(img_scale, list):
+ self.img_scale = img_scale
+ else:
+ self.img_scale = [img_scale]
+ assert mmcv.is_list_of(self.img_scale, tuple)
+
+ if ratio_range is not None:
+ # mode 1: given a scale and a range of image ratio
+ assert len(self.img_scale) == 1
+ else:
+ # mode 2: given multiple scales or a range of scales
+ assert multiscale_mode in ['value', 'range']
+
+ self.multiscale_mode = multiscale_mode
+ self.ratio_range = ratio_range
+ self.keep_ratio = keep_ratio
+
+ @staticmethod
+ def random_select(img_scales):
+ assert mmcv.is_list_of(img_scales, tuple)
+ scale_idx = np.random.randint(len(img_scales))
+ img_scale = img_scales[scale_idx]
+ return img_scale, scale_idx
+
+ @staticmethod
+ def random_sample(img_scales):
+ assert mmcv.is_list_of(img_scales, tuple) and len(img_scales) == 2
+ img_scale_long = [max(s) for s in img_scales]
+ img_scale_short = [min(s) for s in img_scales]
+ long_edge = np.random.randint(
+ min(img_scale_long),
+ max(img_scale_long) + 1)
+ short_edge = np.random.randint(
+ min(img_scale_short),
+ max(img_scale_short) + 1)
+ img_scale = (long_edge, short_edge)
+ return img_scale, None
+
+ @staticmethod
+ def random_sample_ratio(img_scale, ratio_range):
+ assert isinstance(img_scale, tuple) and len(img_scale) == 2
+ min_ratio, max_ratio = ratio_range
+ assert min_ratio <= max_ratio
+ ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio
+ scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio)
+ return scale, None
+
+ def _random_scale(self, results):
+ if self.ratio_range is not None:
+ scale, scale_idx = self.random_sample_ratio(
+ self.img_scale[0], self.ratio_range)
+ elif len(self.img_scale) == 1:
+ scale, scale_idx = self.img_scale[0], 0
+ elif self.multiscale_mode == 'range':
+ scale, scale_idx = self.random_sample(self.img_scale)
+ elif self.multiscale_mode == 'value':
+ scale, scale_idx = self.random_select(self.img_scale)
+ else:
+ raise NotImplementedError
+
+ results['scale'] = scale
+ results['scale_idx'] = scale_idx
+
+ def _resize_img(self, results):
+ if self.keep_ratio:
+ img, scale_factor = mmcv.imrescale(
+ results['img'], results['scale'], return_scale=True)
+ else:
+ img, w_scale, h_scale = mmcv.imresize(
+ results['img'], results['scale'], return_scale=True)
+ scale_factor = np.array([w_scale, h_scale, w_scale, h_scale],
+ dtype=np.float32)
+ results['img'] = img
+ results['img_shape'] = img.shape
+ results['pad_shape'] = img.shape # in case that there is no padding
+ results['scale_factor'] = scale_factor
+ results['keep_ratio'] = self.keep_ratio
+
+ def _resize_bboxes(self, results):
+ img_shape = results['img_shape']
+ for key in results.get('bbox_fields', []):
+ bboxes = results[key] * results['scale_factor']
+ bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1] - 1)
+ bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0] - 1)
+ results[key] = bboxes
+
+ def _resize_masks(self, results):
+ for key in results.get('mask_fields', []):
+ if results[key] is None:
+ continue
+ if self.keep_ratio:
+ masks = [
+ mmcv.imrescale(
+ mask, results['scale_factor'], interpolation='nearest')
+ for mask in results[key]
+ ]
+ else:
+ mask_size = (results['img_shape'][1], results['img_shape'][0])
+ masks = [
+ mmcv.imresize(mask, mask_size, interpolation='nearest')
+ for mask in results[key]
+ ]
+ results[key] = np.stack(masks)
+
+ def _resize_seg(self, results):
+ for key in results.get('seg_fields', []):
+ if self.keep_ratio:
+ gt_seg = mmcv.imrescale(
+ results[key], results['scale'], interpolation='nearest')
+ else:
+ gt_seg = mmcv.imresize(
+ results[key], results['scale'], interpolation='nearest')
+ results['gt_semantic_seg'] = gt_seg
+
+ def __call__(self, results):
+ if 'scale' not in results:
+ self._random_scale(results)
+ self._resize_img(results)
+ self._resize_bboxes(results)
+ self._resize_masks(results)
+ self._resize_seg(results)
+ return results
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__
+ repr_str += ('(img_scale={}, multiscale_mode={}, ratio_range={}, '
+ 'keep_ratio={})').format(self.img_scale,
+ self.multiscale_mode,
+ self.ratio_range,
+ self.keep_ratio)
+ return repr_str
+
+
+@PIPELINES.register_module
+class RandomFlip(object):
+ """Flip the image & bbox & mask.
+
+ If the input dict contains the key "flip", then the flag will be used,
+ otherwise it will be randomly decided by a ratio specified in the init
+ method.
+
+ Args:
+ flip_ratio (float, optional): The flipping probability.
+ """
+
+ def __init__(self, flip_ratio=None, direction='horizontal'):
+ self.flip_ratio = flip_ratio
+ self.direction = direction
+ if flip_ratio is not None:
+ assert flip_ratio >= 0 and flip_ratio <= 1
+ assert direction in ['horizontal', 'vertical']
+
+ def bbox_flip(self, bboxes, img_shape, direction):
+ """Flip bboxes horizontally.
+
+ Args:
+ bboxes(ndarray): shape (..., 4*k)
+ img_shape(tuple): (height, width)
+ """
+ assert bboxes.shape[-1] % 4 == 0
+ flipped = bboxes.copy()
+ if direction == 'horizontal':
+ w = img_shape[1]
+ flipped[..., 0::4] = w - bboxes[..., 2::4] - 1
+ flipped[..., 2::4] = w - bboxes[..., 0::4] - 1
+ elif direction == 'vertical':
+ h = img_shape[0]
+ flipped[..., 1::4] = h - bboxes[..., 3::4] - 1
+ flipped[..., 3::4] = h - bboxes[..., 1::4] - 1
+ else:
+ raise ValueError(
+ 'Invalid flipping direction "{}"'.format(direction))
+ return flipped
+
+ def __call__(self, results):
+ if 'flip' not in results:
+ flip = True if np.random.rand() < self.flip_ratio else False
+ results['flip'] = flip
+ if 'flip_direction' not in results:
+ results['flip_direction'] = self.direction
+ if results['flip']:
+ # flip image
+ results['img'] = mmcv.imflip(
+ results['img'], direction=results['flip_direction'])
+ # flip bboxes
+ for key in results.get('bbox_fields', []):
+ results[key] = self.bbox_flip(results[key],
+ results['img_shape'],
+ results['flip_direction'])
+ # flip masks
+ for key in results.get('mask_fields', []):
+ results[key] = np.stack([
+ mmcv.imflip(mask, direction=results['flip_direction'])
+ for mask in results[key]
+ ])
+
+ # flip segs
+ for key in results.get('seg_fields', []):
+ results[key] = mmcv.imflip(
+ results[key], direction=results['flip_direction'])
+ return results
+
+ def __repr__(self):
+ return self.__class__.__name__ + '(flip_ratio={})'.format(
+ self.flip_ratio)
+
+
+@PIPELINES.register_module
+class Pad(object):
+ """Pad the image & mask.
+
+ There are two padding modes: (1) pad to a fixed size and (2) pad to the
+ minimum size that is divisible by some number.
+
+ Args:
+ size (tuple, optional): Fixed padding size.
+ size_divisor (int, optional): The divisor of padded size.
+ pad_val (float, optional): Padding value, 0 by default.
+ """
+
+ def __init__(self, size=None, size_divisor=None, pad_val=0):
+ self.size = size
+ self.size_divisor = size_divisor
+ self.pad_val = pad_val
+ # only one of size and size_divisor should be valid
+ assert size is not None or size_divisor is not None
+ assert size is None or size_divisor is None
+
+ def _pad_img(self, results):
+ if self.size is not None:
+ padded_img = mmcv.impad(results['img'], self.size)
+ elif self.size_divisor is not None:
+ padded_img = mmcv.impad_to_multiple(
+ results['img'], self.size_divisor, pad_val=self.pad_val)
+ results['img'] = padded_img
+ results['pad_shape'] = padded_img.shape
+ results['pad_fixed_size'] = self.size
+ results['pad_size_divisor'] = self.size_divisor
+
+ def _pad_masks(self, results):
+ pad_shape = results['pad_shape'][:2]
+ for key in results.get('mask_fields', []):
+ padded_masks = [
+ mmcv.impad(mask, pad_shape, pad_val=self.pad_val)
+ for mask in results[key]
+ ]
+ if padded_masks:
+ results[key] = np.stack(padded_masks, axis=0)
+ else:
+ results[key] = np.empty((0, ) + pad_shape, dtype=np.uint8)
+
+ def _pad_seg(self, results):
+ for key in results.get('seg_fields', []):
+ results[key] = mmcv.impad(results[key], results['pad_shape'][:2])
+
+ def __call__(self, results):
+ self._pad_img(results)
+ self._pad_masks(results)
+ self._pad_seg(results)
+ return results
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__
+ repr_str += '(size={}, size_divisor={}, pad_val={})'.format(
+ self.size, self.size_divisor, self.pad_val)
+ return repr_str
+
+
+@PIPELINES.register_module
+class Normalize(object):
+ """Normalize the image.
+
+ Args:
+ mean (sequence): Mean values of 3 channels.
+ std (sequence): Std values of 3 channels.
+ to_rgb (bool): Whether to convert the image from BGR to RGB,
+ default is true.
+ """
+
+ def __init__(self, mean, std, to_rgb=True):
+ self.mean = np.array(mean, dtype=np.float32)
+ self.std = np.array(std, dtype=np.float32)
+ self.to_rgb = to_rgb
+
+ def __call__(self, results):
+ results['img'] = mmcv.imnormalize(results['img'], self.mean, self.std,
+ self.to_rgb)
+ results['img_norm_cfg'] = dict(
+ mean=self.mean, std=self.std, to_rgb=self.to_rgb)
+ return results
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__
+ repr_str += '(mean={}, std={}, to_rgb={})'.format(
+ self.mean, self.std, self.to_rgb)
+ return repr_str
+
+
+@PIPELINES.register_module
+class RandomCrop(object):
+ """Random crop the image & bboxes & masks.
+
+ Args:
+ crop_size (tuple): Expected size after cropping, (h, w).
+ """
+
+ def __init__(self, crop_size):
+ self.crop_size = crop_size
+
+ def __call__(self, results):
+ img = results['img']
+ margin_h = max(img.shape[0] - self.crop_size[0], 0)
+ margin_w = max(img.shape[1] - self.crop_size[1], 0)
+ offset_h = np.random.randint(0, margin_h + 1)
+ offset_w = np.random.randint(0, margin_w + 1)
+ crop_y1, crop_y2 = offset_h, offset_h + self.crop_size[0]
+ crop_x1, crop_x2 = offset_w, offset_w + self.crop_size[1]
+
+ # crop the image
+ img = img[crop_y1:crop_y2, crop_x1:crop_x2, ...]
+ img_shape = img.shape
+ results['img'] = img
+ results['img_shape'] = img_shape
+
+ # crop bboxes accordingly and clip to the image boundary
+ for key in results.get('bbox_fields', []):
+ bbox_offset = np.array([offset_w, offset_h, offset_w, offset_h],
+ dtype=np.float32)
+ bboxes = results[key] - bbox_offset
+ bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1] - 1)
+ bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0] - 1)
+ results[key] = bboxes
+
+ # crop semantic seg
+ for key in results.get('seg_fields', []):
+ results[key] = results[key][crop_y1:crop_y2, crop_x1:crop_x2]
+
+ # filter out the gt bboxes that are completely cropped
+ if 'gt_bboxes' in results:
+ gt_bboxes = results['gt_bboxes']
+ valid_inds = (gt_bboxes[:, 2] > gt_bboxes[:, 0]) & (
+ gt_bboxes[:, 3] > gt_bboxes[:, 1])
+ # if no gt bbox remains after cropping, just skip this image
+ if not np.any(valid_inds):
+ return None
+ results['gt_bboxes'] = gt_bboxes[valid_inds, :]
+ if 'gt_labels' in results:
+ results['gt_labels'] = results['gt_labels'][valid_inds]
+
+ # filter and crop the masks
+ if 'gt_masks' in results:
+ valid_gt_masks = []
+ for i in np.where(valid_inds)[0]:
+ gt_mask = results['gt_masks'][i][crop_y1:crop_y2,
+ crop_x1:crop_x2]
+ valid_gt_masks.append(gt_mask)
+ results['gt_masks'] = np.stack(valid_gt_masks)
+
+ return results
+
+ def __repr__(self):
+ return self.__class__.__name__ + '(crop_size={})'.format(
+ self.crop_size)
+
+
+@PIPELINES.register_module
+class SegRescale(object):
+ """Rescale semantic segmentation maps.
+
+ Args:
+ scale_factor (float): The scale factor of the final output.
+ """
+
+ def __init__(self, scale_factor=1):
+ self.scale_factor = scale_factor
+
+ def __call__(self, results):
+ for key in results.get('seg_fields', []):
+ if self.scale_factor != 1:
+ results[key] = mmcv.imrescale(
+ results[key], self.scale_factor, interpolation='nearest')
+ return results
+
+ def __repr__(self):
+ return self.__class__.__name__ + '(scale_factor={})'.format(
+ self.scale_factor)
+
+
+@PIPELINES.register_module
+class PhotoMetricDistortion(object):
+ """Apply photometric distortion to image sequentially, every transformation
+ is applied with a probability of 0.5. The position of random contrast is in
+ second or second to last.
+
+ 1. random brightness
+ 2. random contrast (mode 0)
+ 3. convert color from BGR to HSV
+ 4. random saturation
+ 5. random hue
+ 6. convert color from HSV to BGR
+ 7. random contrast (mode 1)
+ 8. randomly swap channels
+
+ Args:
+ brightness_delta (int): delta of brightness.
+ contrast_range (tuple): range of contrast.
+ saturation_range (tuple): range of saturation.
+ hue_delta (int): delta of hue.
+ """
+
+ def __init__(self,
+ brightness_delta=32,
+ contrast_range=(0.5, 1.5),
+ saturation_range=(0.5, 1.5),
+ hue_delta=18):
+ self.brightness_delta = brightness_delta
+ self.contrast_lower, self.contrast_upper = contrast_range
+ self.saturation_lower, self.saturation_upper = saturation_range
+ self.hue_delta = hue_delta
+
+ def __call__(self, results):
+ img = results['img']
+ # random brightness
+ if random.randint(2):
+ delta = random.uniform(-self.brightness_delta,
+ self.brightness_delta)
+ img += delta
+
+ # mode == 0 --> do random contrast first
+ # mode == 1 --> do random contrast last
+ mode = random.randint(2)
+ if mode == 1:
+ if random.randint(2):
+ alpha = random.uniform(self.contrast_lower,
+ self.contrast_upper)
+ img *= alpha
+
+ # convert color from BGR to HSV
+ img = mmcv.bgr2hsv(img)
+
+ # random saturation
+ if random.randint(2):
+ img[..., 1] *= random.uniform(self.saturation_lower,
+ self.saturation_upper)
+
+ # random hue
+ if random.randint(2):
+ img[..., 0] += random.uniform(-self.hue_delta, self.hue_delta)
+ img[..., 0][img[..., 0] > 360] -= 360
+ img[..., 0][img[..., 0] < 0] += 360
+
+ # convert color from HSV to BGR
+ img = mmcv.hsv2bgr(img)
+
+ # random contrast
+ if mode == 0:
+ if random.randint(2):
+ alpha = random.uniform(self.contrast_lower,
+ self.contrast_upper)
+ img *= alpha
+
+ # randomly swap channels
+ if random.randint(2):
+ img = img[..., random.permutation(3)]
+
+ results['img'] = img
+ return results
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__
+ repr_str += ('(brightness_delta={}, contrast_range={}, '
+ 'saturation_range={}, hue_delta={})').format(
+ self.brightness_delta, self.contrast_range,
+ self.saturation_range, self.hue_delta)
+ return repr_str
+
+
+@PIPELINES.register_module
+class Expand(object):
+ """Random expand the image & bboxes.
+
+ Randomly place the original image on a canvas of 'ratio' x original image
+ size filled with mean values. The ratio is in the range of ratio_range.
+
+ Args:
+ mean (tuple): mean value of dataset.
+ to_rgb (bool): if need to convert the order of mean to align with RGB.
+ ratio_range (tuple): range of expand ratio.
+ prob (float): probability of applying this transformation
+ """
+
+ def __init__(self,
+ mean=(0, 0, 0),
+ to_rgb=True,
+ ratio_range=(1, 4),
+ seg_ignore_label=None,
+ prob=0.5):
+ self.to_rgb = to_rgb
+ self.ratio_range = ratio_range
+ if to_rgb:
+ self.mean = mean[::-1]
+ else:
+ self.mean = mean
+ self.min_ratio, self.max_ratio = ratio_range
+ self.seg_ignore_label = seg_ignore_label
+ self.prob = prob
+
+ def __call__(self, results):
+ if random.uniform(0, 1) > self.prob:
+ return results
+
+ img, boxes = [results[k] for k in ('img', 'gt_bboxes')]
+
+ h, w, c = img.shape
+ ratio = random.uniform(self.min_ratio, self.max_ratio)
+ expand_img = np.full((int(h * ratio), int(w * ratio), c),
+ self.mean).astype(img.dtype)
+ left = int(random.uniform(0, w * ratio - w))
+ top = int(random.uniform(0, h * ratio - h))
+ expand_img[top:top + h, left:left + w] = img
+ boxes = boxes + np.tile((left, top), 2).astype(boxes.dtype)
+
+ results['img'] = expand_img
+ results['gt_bboxes'] = boxes
+
+ if 'gt_masks' in results:
+ expand_gt_masks = []
+ for mask in results['gt_masks']:
+ expand_mask = np.full((int(h * ratio), int(w * ratio)),
+ 0).astype(mask.dtype)
+ expand_mask[top:top + h, left:left + w] = mask
+ expand_gt_masks.append(expand_mask)
+ results['gt_masks'] = np.stack(expand_gt_masks)
+
+ # not tested
+ if 'gt_semantic_seg' in results:
+ assert self.seg_ignore_label is not None
+ gt_seg = results['gt_semantic_seg']
+ expand_gt_seg = np.full((int(h * ratio), int(w * ratio)),
+ self.seg_ignore_label).astype(gt_seg.dtype)
+ expand_gt_seg[top:top + h, left:left + w] = gt_seg
+ results['gt_semantic_seg'] = expand_gt_seg
+ return results
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__
+ repr_str += '(mean={}, to_rgb={}, ratio_range={}, ' \
+ 'seg_ignore_label={})'.format(
+ self.mean, self.to_rgb, self.ratio_range,
+ self.seg_ignore_label)
+ return repr_str
+
+
+@PIPELINES.register_module
+class MinIoURandomCrop(object):
+ """Random crop the image & bboxes, the cropped patches have minimum IoU
+ requirement with original image & bboxes, the IoU threshold is randomly
+ selected from min_ious.
+
+ Args:
+ min_ious (tuple): minimum IoU threshold for all intersections with
+ bounding boxes
+ min_crop_size (float): minimum crop's size (i.e. h,w := a*h, a*w,
+ where a >= min_crop_size).
+ """
+
+ def __init__(self, min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3):
+ # 1: return ori img
+ self.sample_mode = (1, *min_ious, 0)
+ self.min_crop_size = min_crop_size
+
+ def __call__(self, results):
+ img, boxes, labels = [
+ results[k] for k in ('img', 'gt_bboxes', 'gt_labels')
+ ]
+ h, w, c = img.shape
+ while True:
+ mode = random.choice(self.sample_mode)
+ if mode == 1:
+ return results
+
+ min_iou = mode
+ for i in range(50):
+ new_w = random.uniform(self.min_crop_size * w, w)
+ new_h = random.uniform(self.min_crop_size * h, h)
+
+ # h / w in [0.5, 2]
+ if new_h / new_w < 0.5 or new_h / new_w > 2:
+ continue
+
+ left = random.uniform(w - new_w)
+ top = random.uniform(h - new_h)
+
+ patch = np.array(
+ (int(left), int(top), int(left + new_w), int(top + new_h)))
+ overlaps = bbox_overlaps(
+ patch.reshape(-1, 4), boxes.reshape(-1, 4)).reshape(-1)
+ if overlaps.min() < min_iou:
+ continue
+
+ # center of boxes should inside the crop img
+ center = (boxes[:, :2] + boxes[:, 2:]) / 2
+ mask = ((center[:, 0] > patch[0]) * (center[:, 1] > patch[1]) *
+ (center[:, 0] < patch[2]) * (center[:, 1] < patch[3]))
+ if not mask.any():
+ continue
+ boxes = boxes[mask]
+ labels = labels[mask]
+
+ # adjust boxes
+ img = img[patch[1]:patch[3], patch[0]:patch[2]]
+ boxes[:, 2:] = boxes[:, 2:].clip(max=patch[2:])
+ boxes[:, :2] = boxes[:, :2].clip(min=patch[:2])
+ boxes -= np.tile(patch[:2], 2)
+
+ results['img'] = img
+ results['gt_bboxes'] = boxes
+ results['gt_labels'] = labels
+
+ if 'gt_masks' in results:
+ valid_masks = [
+ results['gt_masks'][i] for i in range(len(mask))
+ if mask[i]
+ ]
+ results['gt_masks'] = np.stack([
+ gt_mask[patch[1]:patch[3], patch[0]:patch[2]]
+ for gt_mask in valid_masks
+ ])
+
+ # not tested
+ if 'gt_semantic_seg' in results:
+ results['gt_semantic_seg'] = results['gt_semantic_seg'][
+ patch[1]:patch[3], patch[0]:patch[2]]
+ return results
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__
+ repr_str += '(min_ious={}, min_crop_size={})'.format(
+ self.min_ious, self.min_crop_size)
+ return repr_str
+
+
+@PIPELINES.register_module
+class Corrupt(object):
+
+ def __init__(self, corruption, severity=1):
+ self.corruption = corruption
+ self.severity = severity
+
+ def __call__(self, results):
+ if corrupt is None:
+ raise RuntimeError('imagecorruptions is not installed')
+ results['img'] = corrupt(
+ results['img'].astype(np.uint8),
+ corruption_name=self.corruption,
+ severity=self.severity)
+ return results
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__
+ repr_str += '(corruption={}, severity={})'.format(
+ self.corruption, self.severity)
+ return repr_str
+
+
+@PIPELINES.register_module
+class Albu(object):
+
+ def __init__(self,
+ transforms,
+ bbox_params=None,
+ keymap=None,
+ update_pad_shape=False,
+ skip_img_without_anno=False):
+ """
+ Adds custom transformations from Albumentations lib.
+ Please, visit `https://albumentations.readthedocs.io`
+ to get more information.
+
+ transforms (list): list of albu transformations
+ bbox_params (dict): bbox_params for albumentation `Compose`
+ keymap (dict): contains {'input key':'albumentation-style key'}
+ skip_img_without_anno (bool): whether to skip the image
+ if no ann left after aug
+ """
+ if Compose is None:
+ raise RuntimeError('albumentations is not installed')
+
+ self.transforms = transforms
+ self.filter_lost_elements = False
+ self.update_pad_shape = update_pad_shape
+ self.skip_img_without_anno = skip_img_without_anno
+
+ # A simple workaround to remove masks without boxes
+ if (isinstance(bbox_params, dict) and 'label_fields' in bbox_params
+ and 'filter_lost_elements' in bbox_params):
+ self.filter_lost_elements = True
+ self.origin_label_fields = bbox_params['label_fields']
+ bbox_params['label_fields'] = ['idx_mapper']
+ del bbox_params['filter_lost_elements']
+
+ self.bbox_params = (
+ self.albu_builder(bbox_params) if bbox_params else None)
+ self.aug = Compose([self.albu_builder(t) for t in self.transforms],
+ bbox_params=self.bbox_params)
+
+ if not keymap:
+ self.keymap_to_albu = {
+ 'img': 'image',
+ 'gt_masks': 'masks',
+ 'gt_bboxes': 'bboxes'
+ }
+ else:
+ self.keymap_to_albu = keymap
+ self.keymap_back = {v: k for k, v in self.keymap_to_albu.items()}
+
+ def albu_builder(self, cfg):
+ """Import a module from albumentations.
+ Inherits some of `build_from_cfg` logic.
+
+ Args:
+ cfg (dict): Config dict. It should at least contain the key "type".
+ Returns:
+ obj: The constructed object.
+ """
+ assert isinstance(cfg, dict) and "type" in cfg
+ args = cfg.copy()
+
+ obj_type = args.pop("type")
+ if mmcv.is_str(obj_type):
+ if albumentations is None:
+ raise RuntimeError('albumentations is not installed')
+ obj_cls = getattr(albumentations, obj_type)
+ elif inspect.isclass(obj_type):
+ obj_cls = obj_type
+ else:
+ raise TypeError(
+ 'type must be a str or valid type, but got {}'.format(
+ type(obj_type)))
+
+ if 'transforms' in args:
+ args['transforms'] = [
+ self.albu_builder(transform)
+ for transform in args['transforms']
+ ]
+
+ return obj_cls(**args)
+
+ @staticmethod
+ def mapper(d, keymap):
+ """
+ Dictionary mapper.
+ Renames keys according to keymap provided.
+
+ Args:
+ d (dict): old dict
+ keymap (dict): {'old_key':'new_key'}
+ Returns:
+ dict: new dict.
+ """
+ updated_dict = {}
+ for k, v in zip(d.keys(), d.values()):
+ new_k = keymap.get(k, k)
+ updated_dict[new_k] = d[k]
+ return updated_dict
+
+ def __call__(self, results):
+ # dict to albumentations format
+ results = self.mapper(results, self.keymap_to_albu)
+
+ if 'bboxes' in results:
+ # to list of boxes
+ if isinstance(results['bboxes'], np.ndarray):
+ results['bboxes'] = [x for x in results['bboxes']]
+ # add pseudo-field for filtration
+ if self.filter_lost_elements:
+ results['idx_mapper'] = np.arange(len(results['bboxes']))
+
+ results = self.aug(**results)
+
+ if 'bboxes' in results:
+ if isinstance(results['bboxes'], list):
+ results['bboxes'] = np.array(
+ results['bboxes'], dtype=np.float32)
+ results['bboxes'] = results['bboxes'].reshape(-1, 4)
+
+ # filter label_fields
+ if self.filter_lost_elements:
+
+ results['idx_mapper'] = np.arange(len(results['bboxes']))
+
+ for label in self.origin_label_fields:
+ results[label] = np.array(
+ [results[label][i] for i in results['idx_mapper']])
+ if 'masks' in results:
+ results['masks'] = np.array(
+ [results['masks'][i] for i in results['idx_mapper']])
+
+ if (not len(results['idx_mapper'])
+ and self.skip_img_without_anno):
+ return None
+
+ if 'gt_labels' in results:
+ if isinstance(results['gt_labels'], list):
+ results['gt_labels'] = np.array(results['gt_labels'])
+ results['gt_labels'] = results['gt_labels'].astype(np.int64)
+
+ # back to the original format
+ results = self.mapper(results, self.keymap_back)
+
+ # update final shape
+ if self.update_pad_shape:
+ results['pad_shape'] = results['img'].shape
+
+ return results
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__
+ repr_str += '(transformations={})'.format(self.transformations)
+ return repr_str
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/registry.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/registry.py
new file mode 100644
index 000000000..974a4fbb7
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/registry.py
@@ -0,0 +1,4 @@
+from mmdet.utils import Registry
+
+DATASETS = Registry('dataset')
+PIPELINES = Registry('pipeline')
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/voc.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/voc.py
new file mode 100644
index 000000000..77bffe355
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/voc.py
@@ -0,0 +1,20 @@
+from .registry import DATASETS
+from .xml_style import XMLDataset
+
+
+@DATASETS.register_module
+class VOCDataset(XMLDataset):
+
+ CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
+ 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
+ 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
+ 'tvmonitor')
+
+ def __init__(self, **kwargs):
+ super(VOCDataset, self).__init__(**kwargs)
+ if 'VOC2007' in self.img_prefix:
+ self.year = 2007
+ elif 'VOC2012' in self.img_prefix:
+ self.year = 2012
+ else:
+ raise ValueError('Cannot infer dataset year from img_prefix')
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/wider_face.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/wider_face.py
new file mode 100644
index 000000000..b83e3d664
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/wider_face.py
@@ -0,0 +1,42 @@
+import os.path as osp
+import xml.etree.ElementTree as ET
+
+import mmcv
+
+from .registry import DATASETS
+from .xml_style import XMLDataset
+
+
+@DATASETS.register_module
+class WIDERFaceDataset(XMLDataset):
+ """
+ Reader for the WIDER Face dataset in PASCAL VOC format.
+ Conversion scripts can be found in
+ https://github.com/sovrasov/wider-face-pascal-voc-annotations
+ """
+ CLASSES = ('face', )
+
+ def __init__(self, **kwargs):
+ super(WIDERFaceDataset, self).__init__(**kwargs)
+
+ def load_annotations(self, ann_file):
+ img_infos = []
+ img_ids = mmcv.list_from_file(ann_file)
+ for img_id in img_ids:
+ filename = '{}.jpg'.format(img_id)
+ xml_path = osp.join(self.img_prefix, 'Annotations',
+ '{}.xml'.format(img_id))
+ tree = ET.parse(xml_path)
+ root = tree.getroot()
+ size = root.find('size')
+ width = int(size.find('width').text)
+ height = int(size.find('height').text)
+ folder = root.find('folder').text
+ img_infos.append(
+ dict(
+ id=img_id,
+ filename=osp.join(folder, filename),
+ width=width,
+ height=height))
+
+ return img_infos
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/xml_style.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/xml_style.py
new file mode 100644
index 000000000..39d57042e
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/xml_style.py
@@ -0,0 +1,86 @@
+import os.path as osp
+import xml.etree.ElementTree as ET
+
+import mmcv
+import numpy as np
+
+from .custom import CustomDataset
+from .registry import DATASETS
+
+
+@DATASETS.register_module
+class XMLDataset(CustomDataset):
+
+ def __init__(self, min_size=None, **kwargs):
+ super(XMLDataset, self).__init__(**kwargs)
+ self.cat2label = {cat: i + 1 for i, cat in enumerate(self.CLASSES)}
+ self.min_size = min_size
+
+ def load_annotations(self, ann_file):
+ img_infos = []
+ img_ids = mmcv.list_from_file(ann_file)
+ for img_id in img_ids:
+ filename = 'JPEGImages/{}.jpg'.format(img_id)
+ xml_path = osp.join(self.img_prefix, 'Annotations',
+ '{}.xml'.format(img_id))
+ tree = ET.parse(xml_path)
+ root = tree.getroot()
+ size = root.find('size')
+ width = int(size.find('width').text)
+ height = int(size.find('height').text)
+ img_infos.append(
+ dict(id=img_id, filename=filename, width=width, height=height))
+ return img_infos
+
+ def get_ann_info(self, idx):
+ img_id = self.img_infos[idx]['id']
+ xml_path = osp.join(self.img_prefix, 'Annotations',
+ '{}.xml'.format(img_id))
+ tree = ET.parse(xml_path)
+ root = tree.getroot()
+ bboxes = []
+ labels = []
+ bboxes_ignore = []
+ labels_ignore = []
+ for obj in root.findall('object'):
+ name = obj.find('name').text
+ label = self.cat2label[name]
+ difficult = int(obj.find('difficult').text)
+ bnd_box = obj.find('bndbox')
+ bbox = [
+ int(bnd_box.find('xmin').text),
+ int(bnd_box.find('ymin').text),
+ int(bnd_box.find('xmax').text),
+ int(bnd_box.find('ymax').text)
+ ]
+ ignore = False
+ if self.min_size:
+ assert not self.test_mode
+ w = bbox[2] - bbox[0]
+ h = bbox[3] - bbox[1]
+ if w < self.min_size or h < self.min_size:
+ ignore = True
+ if difficult or ignore:
+ bboxes_ignore.append(bbox)
+ labels_ignore.append(label)
+ else:
+ bboxes.append(bbox)
+ labels.append(label)
+ if not bboxes:
+ bboxes = np.zeros((0, 4))
+ labels = np.zeros((0, ))
+ else:
+ bboxes = np.array(bboxes, ndmin=2) - 1
+ labels = np.array(labels)
+ if not bboxes_ignore:
+ bboxes_ignore = np.zeros((0, 4))
+ labels_ignore = np.zeros((0, ))
+ else:
+ bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1
+ labels_ignore = np.array(labels_ignore)
+ ann = dict(
+ bboxes=bboxes.astype(np.float32),
+ labels=labels.astype(np.int64),
+ bboxes_ignore=bboxes_ignore.astype(np.float32),
+ labels_ignore=labels_ignore.astype(np.int64))
+ return ann
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/__init__.py
new file mode 100644
index 000000000..35f0a09e3
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/__init__.py
@@ -0,0 +1,19 @@
+from .anchor_heads import * # noqa: F401,F403
+from .backbones import * # noqa: F401,F403
+from .bbox_heads import * # noqa: F401,F403
+from .builder import (build_backbone, build_detector, build_head, build_loss,
+ build_neck, build_roi_extractor, build_shared_head)
+from .detectors import * # noqa: F401,F403
+from .losses import * # noqa: F401,F403
+from .mask_heads import * # noqa: F401,F403
+from .necks import * # noqa: F401,F403
+from .registry import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS,
+ ROI_EXTRACTORS, SHARED_HEADS)
+from .roi_extractors import * # noqa: F401,F403
+from .shared_heads import * # noqa: F401,F403
+
+__all__ = [
+ 'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES',
+ 'DETECTORS', 'build_backbone', 'build_neck', 'build_roi_extractor',
+ 'build_shared_head', 'build_head', 'build_loss', 'build_detector'
+]
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/__init__.py
new file mode 100644
index 000000000..de1d7ef01
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/__init__.py
@@ -0,0 +1,25 @@
+from .anchor_head import AnchorHead
+from .atss_head import ATSSHead
+from .fcos_head import FCOSHead
+from .fovea_head import FoveaHead
+from .free_anchor_retina_head import FreeAnchorRetinaHead
+from .ga_retina_head import GARetinaHead
+from .ga_rpn_head import GARPNHead
+from .guided_anchor_head import FeatureAdaption, GuidedAnchorHead
+from .reppoints_head import RepPointsHead
+from .retina_head import RetinaHead
+from .retina_sepbn_head import RetinaSepBNHead
+from .rpn_head import RPNHead
+from .ssd_head import SSDHead
+from .solo_head import SOLOHead
+from .solov2_head import SOLOv2Head
+from .solov2_light_head import SOLOv2LightHead
+from .decoupled_solo_head import DecoupledSOLOHead
+from .decoupled_solo_light_head import DecoupledSOLOLightHead
+
+__all__ = [
+ 'AnchorHead', 'GuidedAnchorHead', 'FeatureAdaption', 'RPNHead',
+ 'GARPNHead', 'RetinaHead', 'RetinaSepBNHead', 'GARetinaHead', 'SSDHead',
+ 'FCOSHead', 'RepPointsHead', 'FoveaHead', 'FreeAnchorRetinaHead',
+ 'ATSSHead', 'SOLOHead', 'SOLOv2Head', 'SOLOv2LightHead', 'DecoupledSOLOHead', 'DecoupledSOLOLightHead'
+]
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/anchor_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/anchor_head.py
new file mode 100644
index 000000000..0fdc0aade
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/anchor_head.py
@@ -0,0 +1,330 @@
+from __future__ import division
+
+import numpy as np
+import torch
+import torch.nn as nn
+from mmcv.cnn import normal_init
+
+from mmdet.core import (AnchorGenerator, anchor_target, delta2bbox, force_fp32,
+ multi_apply, multiclass_nms)
+from ..builder import build_loss
+from ..registry import HEADS
+
+
+@HEADS.register_module
+class AnchorHead(nn.Module):
+ """Anchor-based head (RPN, RetinaNet, SSD, etc.).
+
+ Args:
+ num_classes (int): Number of categories including the background
+ category.
+ in_channels (int): Number of channels in the input feature map.
+ feat_channels (int): Number of hidden channels. Used in child classes.
+ anchor_scales (Iterable): Anchor scales.
+ anchor_ratios (Iterable): Anchor aspect ratios.
+ anchor_strides (Iterable): Anchor strides.
+ anchor_base_sizes (Iterable): Anchor base sizes.
+ target_means (Iterable): Mean values of regression targets.
+ target_stds (Iterable): Std values of regression targets.
+ loss_cls (dict): Config of classification loss.
+ loss_bbox (dict): Config of localization loss.
+ """ # noqa: W605
+
+ def __init__(self,
+ num_classes,
+ in_channels,
+ feat_channels=256,
+ anchor_scales=[8, 16, 32],
+ anchor_ratios=[0.5, 1.0, 2.0],
+ anchor_strides=[4, 8, 16, 32, 64],
+ anchor_base_sizes=None,
+ target_means=(.0, .0, .0, .0),
+ target_stds=(1.0, 1.0, 1.0, 1.0),
+ loss_cls=dict(
+ type='CrossEntropyLoss',
+ use_sigmoid=True,
+ loss_weight=1.0),
+ loss_bbox=dict(
+ type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)):
+ super(AnchorHead, self).__init__()
+ self.in_channels = in_channels
+ self.num_classes = num_classes
+ self.feat_channels = feat_channels
+ self.anchor_scales = anchor_scales
+ self.anchor_ratios = anchor_ratios
+ self.anchor_strides = anchor_strides
+ self.anchor_base_sizes = list(
+ anchor_strides) if anchor_base_sizes is None else anchor_base_sizes
+ self.target_means = target_means
+ self.target_stds = target_stds
+
+ self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)
+ self.sampling = loss_cls['type'] not in ['FocalLoss', 'GHMC']
+ if self.use_sigmoid_cls:
+ self.cls_out_channels = num_classes - 1
+ else:
+ self.cls_out_channels = num_classes
+
+ if self.cls_out_channels <= 0:
+ raise ValueError('num_classes={} is too small'.format(num_classes))
+
+ self.loss_cls = build_loss(loss_cls)
+ self.loss_bbox = build_loss(loss_bbox)
+ self.fp16_enabled = False
+
+ self.anchor_generators = []
+ for anchor_base in self.anchor_base_sizes:
+ self.anchor_generators.append(
+ AnchorGenerator(anchor_base, anchor_scales, anchor_ratios))
+
+ self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales)
+ self._init_layers()
+
+ def _init_layers(self):
+ self.conv_cls = nn.Conv2d(self.in_channels,
+ self.num_anchors * self.cls_out_channels, 1)
+ self.conv_reg = nn.Conv2d(self.in_channels, self.num_anchors * 4, 1)
+
+ def init_weights(self):
+ normal_init(self.conv_cls, std=0.01)
+ normal_init(self.conv_reg, std=0.01)
+
+ def forward_single(self, x):
+ cls_score = self.conv_cls(x)
+ bbox_pred = self.conv_reg(x)
+ return cls_score, bbox_pred
+
+ def forward(self, feats):
+ return multi_apply(self.forward_single, feats)
+
+ def get_anchors(self, featmap_sizes, img_metas, device='cuda'):
+ """Get anchors according to feature map sizes.
+
+ Args:
+ featmap_sizes (list[tuple]): Multi-level feature map sizes.
+ img_metas (list[dict]): Image meta info.
+ device (torch.device | str): device for returned tensors
+
+ Returns:
+ tuple: anchors of each image, valid flags of each image
+ """
+ num_imgs = len(img_metas)
+ num_levels = len(featmap_sizes)
+
+ # since feature map sizes of all images are the same, we only compute
+ # anchors for one time
+ multi_level_anchors = []
+ for i in range(num_levels):
+ anchors = self.anchor_generators[i].grid_anchors(
+ featmap_sizes[i], self.anchor_strides[i], device=device)
+ multi_level_anchors.append(anchors)
+ anchor_list = [multi_level_anchors for _ in range(num_imgs)]
+
+ # for each image, we compute valid flags of multi level anchors
+ valid_flag_list = []
+ for img_id, img_meta in enumerate(img_metas):
+ multi_level_flags = []
+ for i in range(num_levels):
+ anchor_stride = self.anchor_strides[i]
+ feat_h, feat_w = featmap_sizes[i]
+ h, w = img_meta['pad_shape'][:2]
+ valid_feat_h = min(int(np.ceil(h / anchor_stride)), feat_h)
+ valid_feat_w = min(int(np.ceil(w / anchor_stride)), feat_w)
+ flags = self.anchor_generators[i].valid_flags(
+ (feat_h, feat_w), (valid_feat_h, valid_feat_w),
+ device=device)
+ multi_level_flags.append(flags)
+ valid_flag_list.append(multi_level_flags)
+
+ return anchor_list, valid_flag_list
+
+ def loss_single(self, cls_score, bbox_pred, labels, label_weights,
+ bbox_targets, bbox_weights, num_total_samples, cfg):
+ # classification loss
+ labels = labels.reshape(-1)
+ label_weights = label_weights.reshape(-1)
+ cls_score = cls_score.permute(0, 2, 3,
+ 1).reshape(-1, self.cls_out_channels)
+ loss_cls = self.loss_cls(
+ cls_score, labels, label_weights, avg_factor=num_total_samples)
+ # regression loss
+ bbox_targets = bbox_targets.reshape(-1, 4)
+ bbox_weights = bbox_weights.reshape(-1, 4)
+ bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4)
+ loss_bbox = self.loss_bbox(
+ bbox_pred,
+ bbox_targets,
+ bbox_weights,
+ avg_factor=num_total_samples)
+ return loss_cls, loss_bbox
+
+ @force_fp32(apply_to=('cls_scores', 'bbox_preds'))
+ def loss(self,
+ cls_scores,
+ bbox_preds,
+ gt_bboxes,
+ gt_labels,
+ img_metas,
+ cfg,
+ gt_bboxes_ignore=None):
+ featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
+ assert len(featmap_sizes) == len(self.anchor_generators)
+
+ device = cls_scores[0].device
+
+ anchor_list, valid_flag_list = self.get_anchors(
+ featmap_sizes, img_metas, device=device)
+ label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1
+ cls_reg_targets = anchor_target(
+ anchor_list,
+ valid_flag_list,
+ gt_bboxes,
+ img_metas,
+ self.target_means,
+ self.target_stds,
+ cfg,
+ gt_bboxes_ignore_list=gt_bboxes_ignore,
+ gt_labels_list=gt_labels,
+ label_channels=label_channels,
+ sampling=self.sampling)
+ if cls_reg_targets is None:
+ return None
+ (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,
+ num_total_pos, num_total_neg) = cls_reg_targets
+ num_total_samples = (
+ num_total_pos + num_total_neg if self.sampling else num_total_pos)
+ losses_cls, losses_bbox = multi_apply(
+ self.loss_single,
+ cls_scores,
+ bbox_preds,
+ labels_list,
+ label_weights_list,
+ bbox_targets_list,
+ bbox_weights_list,
+ num_total_samples=num_total_samples,
+ cfg=cfg)
+ return dict(loss_cls=losses_cls, loss_bbox=losses_bbox)
+
+ @force_fp32(apply_to=('cls_scores', 'bbox_preds'))
+ def get_bboxes(self,
+ cls_scores,
+ bbox_preds,
+ img_metas,
+ cfg,
+ rescale=False):
+ """
+ Transform network output for a batch into labeled boxes.
+
+ Args:
+ cls_scores (list[Tensor]): Box scores for each scale level
+ Has shape (N, num_anchors * num_classes, H, W)
+ bbox_preds (list[Tensor]): Box energies / deltas for each scale
+ level with shape (N, num_anchors * 4, H, W)
+ img_metas (list[dict]): size / scale info for each image
+ cfg (mmcv.Config): test / postprocessing configuration
+ rescale (bool): if True, return boxes in original image space
+
+ Returns:
+ list[tuple[Tensor, Tensor]]: each item in result_list is 2-tuple.
+ The first item is an (n, 5) tensor, where the first 4 columns
+ are bounding box positions (tl_x, tl_y, br_x, br_y) and the
+ 5-th column is a score between 0 and 1. The second item is a
+ (n,) tensor where each item is the class index of the
+ corresponding box.
+
+ Example:
+ >>> import mmcv
+ >>> self = AnchorHead(num_classes=9, in_channels=1)
+ >>> img_metas = [{'img_shape': (32, 32, 3), 'scale_factor': 1}]
+ >>> cfg = mmcv.Config(dict(
+ >>> score_thr=0.00,
+ >>> nms=dict(type='nms', iou_thr=1.0),
+ >>> max_per_img=10))
+ >>> feat = torch.rand(1, 1, 3, 3)
+ >>> cls_score, bbox_pred = self.forward_single(feat)
+ >>> # note the input lists are over different levels, not images
+ >>> cls_scores, bbox_preds = [cls_score], [bbox_pred]
+ >>> result_list = self.get_bboxes(cls_scores, bbox_preds,
+ >>> img_metas, cfg)
+ >>> det_bboxes, det_labels = result_list[0]
+ >>> assert len(result_list) == 1
+ >>> assert det_bboxes.shape[1] == 5
+ >>> assert len(det_bboxes) == len(det_labels) == cfg.max_per_img
+ """
+ assert len(cls_scores) == len(bbox_preds)
+ num_levels = len(cls_scores)
+
+ device = cls_scores[0].device
+ mlvl_anchors = [
+ self.anchor_generators[i].grid_anchors(
+ cls_scores[i].size()[-2:],
+ self.anchor_strides[i],
+ device=device) for i in range(num_levels)
+ ]
+ result_list = []
+ for img_id in range(len(img_metas)):
+ cls_score_list = [
+ cls_scores[i][img_id].detach() for i in range(num_levels)
+ ]
+ bbox_pred_list = [
+ bbox_preds[i][img_id].detach() for i in range(num_levels)
+ ]
+ img_shape = img_metas[img_id]['img_shape']
+ scale_factor = img_metas[img_id]['scale_factor']
+ proposals = self.get_bboxes_single(cls_score_list, bbox_pred_list,
+ mlvl_anchors, img_shape,
+ scale_factor, cfg, rescale)
+ result_list.append(proposals)
+ return result_list
+
+ def get_bboxes_single(self,
+ cls_score_list,
+ bbox_pred_list,
+ mlvl_anchors,
+ img_shape,
+ scale_factor,
+ cfg,
+ rescale=False):
+ """
+ Transform outputs for a single batch item into labeled boxes.
+ """
+ assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors)
+ mlvl_bboxes = []
+ mlvl_scores = []
+ for cls_score, bbox_pred, anchors in zip(cls_score_list,
+ bbox_pred_list, mlvl_anchors):
+ assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
+ cls_score = cls_score.permute(1, 2,
+ 0).reshape(-1, self.cls_out_channels)
+ if self.use_sigmoid_cls:
+ scores = cls_score.sigmoid()
+ else:
+ scores = cls_score.softmax(-1)
+ bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
+ nms_pre = cfg.get('nms_pre', -1)
+ if nms_pre > 0 and scores.shape[0] > nms_pre:
+ # Get maximum scores for foreground classes.
+ if self.use_sigmoid_cls:
+ max_scores, _ = scores.max(dim=1)
+ else:
+ max_scores, _ = scores[:, 1:].max(dim=1)
+ _, topk_inds = max_scores.topk(nms_pre)
+ anchors = anchors[topk_inds, :]
+ bbox_pred = bbox_pred[topk_inds, :]
+ scores = scores[topk_inds, :]
+ bboxes = delta2bbox(anchors, bbox_pred, self.target_means,
+ self.target_stds, img_shape)
+ mlvl_bboxes.append(bboxes)
+ mlvl_scores.append(scores)
+ mlvl_bboxes = torch.cat(mlvl_bboxes)
+ if rescale:
+ mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
+ mlvl_scores = torch.cat(mlvl_scores)
+ if self.use_sigmoid_cls:
+ # Add a dummy background class to the front when using sigmoid
+ padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
+ mlvl_scores = torch.cat([padding, mlvl_scores], dim=1)
+ det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores,
+ cfg.score_thr, cfg.nms,
+ cfg.max_per_img)
+ return det_bboxes, det_labels
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/atss_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/atss_head.py
new file mode 100644
index 000000000..e0f2e0abc
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/atss_head.py
@@ -0,0 +1,487 @@
+import numpy as np
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+from mmcv.cnn import normal_init
+
+from mmdet.core import (PseudoSampler, anchor_inside_flags, bbox2delta,
+ build_assigner, delta2bbox, force_fp32,
+ images_to_levels, multi_apply, multiclass_nms, unmap)
+from ..builder import build_loss
+from ..registry import HEADS
+from ..utils import ConvModule, Scale, bias_init_with_prob
+from .anchor_head import AnchorHead
+
+
+def reduce_mean(tensor):
+ if not (dist.is_available() and dist.is_initialized()):
+ return tensor
+ tensor = tensor.clone()
+ dist.all_reduce(tensor.div_(dist.get_world_size()), op=dist.reduce_op.SUM)
+ return tensor
+
+
+@HEADS.register_module
+class ATSSHead(AnchorHead):
+ """
+ Bridging the Gap Between Anchor-based and Anchor-free Detection via
+ Adaptive Training Sample Selection
+
+ ATSS head structure is similar with FCOS, however ATSS use anchor boxes
+ and assign label by Adaptive Training Sample Selection instead max-iou.
+
+ https://arxiv.org/abs/1912.02424
+ """
+
+ def __init__(self,
+ num_classes,
+ in_channels,
+ stacked_convs=4,
+ octave_base_scale=4,
+ scales_per_octave=1,
+ conv_cfg=None,
+ norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),
+ loss_centerness=dict(
+ type='CrossEntropyLoss',
+ use_sigmoid=True,
+ loss_weight=1.0),
+ **kwargs):
+ self.stacked_convs = stacked_convs
+ self.octave_base_scale = octave_base_scale
+ self.scales_per_octave = scales_per_octave
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+
+ octave_scales = np.array(
+ [2**(i / scales_per_octave) for i in range(scales_per_octave)])
+ anchor_scales = octave_scales * octave_base_scale
+ super(ATSSHead, self).__init__(
+ num_classes, in_channels, anchor_scales=anchor_scales, **kwargs)
+
+ self.loss_centerness = build_loss(loss_centerness)
+
+ def _init_layers(self):
+ self.relu = nn.ReLU(inplace=True)
+ self.cls_convs = nn.ModuleList()
+ self.reg_convs = nn.ModuleList()
+ for i in range(self.stacked_convs):
+ chn = self.in_channels if i == 0 else self.feat_channels
+ self.cls_convs.append(
+ ConvModule(
+ chn,
+ self.feat_channels,
+ 3,
+ stride=1,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg))
+ self.reg_convs.append(
+ ConvModule(
+ chn,
+ self.feat_channels,
+ 3,
+ stride=1,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg))
+ self.atss_cls = nn.Conv2d(
+ self.feat_channels,
+ self.num_anchors * self.cls_out_channels,
+ 3,
+ padding=1)
+ self.atss_reg = nn.Conv2d(
+ self.feat_channels, self.num_anchors * 4, 3, padding=1)
+ self.atss_centerness = nn.Conv2d(
+ self.feat_channels, self.num_anchors * 1, 3, padding=1)
+ self.scales = nn.ModuleList([Scale(1.0) for _ in self.anchor_strides])
+
+ def init_weights(self):
+ for m in self.cls_convs:
+ normal_init(m.conv, std=0.01)
+ for m in self.reg_convs:
+ normal_init(m.conv, std=0.01)
+ bias_cls = bias_init_with_prob(0.01)
+ normal_init(self.atss_cls, std=0.01, bias=bias_cls)
+ normal_init(self.atss_reg, std=0.01)
+ normal_init(self.atss_centerness, std=0.01)
+
+ def forward(self, feats):
+ return multi_apply(self.forward_single, feats, self.scales)
+
+ def forward_single(self, x, scale):
+ cls_feat = x
+ reg_feat = x
+ for cls_conv in self.cls_convs:
+ cls_feat = cls_conv(cls_feat)
+ for reg_conv in self.reg_convs:
+ reg_feat = reg_conv(reg_feat)
+ cls_score = self.atss_cls(cls_feat)
+ # we just follow atss, not apply exp in bbox_pred
+ bbox_pred = scale(self.atss_reg(reg_feat)).float()
+ centerness = self.atss_centerness(reg_feat)
+ return cls_score, bbox_pred, centerness
+
+ def loss_single(self, anchors, cls_score, bbox_pred, centerness, labels,
+ label_weights, bbox_targets, num_total_samples, cfg):
+
+ anchors = anchors.reshape(-1, 4)
+ cls_score = cls_score.permute(0, 2, 3,
+ 1).reshape(-1, self.cls_out_channels)
+ bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4)
+ centerness = centerness.permute(0, 2, 3, 1).reshape(-1)
+ bbox_targets = bbox_targets.reshape(-1, 4)
+ labels = labels.reshape(-1)
+ label_weights = label_weights.reshape(-1)
+
+ # classification loss
+ loss_cls = self.loss_cls(
+ cls_score, labels, label_weights, avg_factor=num_total_samples)
+
+ pos_inds = torch.nonzero(labels).squeeze(1)
+
+ if len(pos_inds) > 0:
+ pos_bbox_targets = bbox_targets[pos_inds]
+ pos_bbox_pred = bbox_pred[pos_inds]
+ pos_anchors = anchors[pos_inds]
+ pos_centerness = centerness[pos_inds]
+
+ centerness_targets = self.centerness_target(
+ pos_anchors, pos_bbox_targets)
+ pos_decode_bbox_pred = delta2bbox(pos_anchors, pos_bbox_pred,
+ self.target_means,
+ self.target_stds)
+ pos_decode_bbox_targets = delta2bbox(pos_anchors, pos_bbox_targets,
+ self.target_means,
+ self.target_stds)
+
+ # regression loss
+ loss_bbox = self.loss_bbox(
+ pos_decode_bbox_pred,
+ pos_decode_bbox_targets,
+ weight=centerness_targets,
+ avg_factor=1.0)
+
+ # centerness loss
+ loss_centerness = self.loss_centerness(
+ pos_centerness,
+ centerness_targets,
+ avg_factor=num_total_samples)
+
+ else:
+ loss_bbox = loss_cls * 0
+ loss_centerness = loss_bbox * 0
+ centerness_targets = torch.tensor(0).cuda()
+
+ return loss_cls, loss_bbox, loss_centerness, centerness_targets.sum()
+
+ @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'centernesses'))
+ def loss(self,
+ cls_scores,
+ bbox_preds,
+ centernesses,
+ gt_bboxes,
+ gt_labels,
+ img_metas,
+ cfg,
+ gt_bboxes_ignore=None):
+
+ featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
+ assert len(featmap_sizes) == len(self.anchor_generators)
+
+ device = cls_scores[0].device
+ anchor_list, valid_flag_list = self.get_anchors(
+ featmap_sizes, img_metas, device=device)
+ label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1
+
+ cls_reg_targets = self.atss_target(
+ anchor_list,
+ valid_flag_list,
+ gt_bboxes,
+ img_metas,
+ cfg,
+ gt_bboxes_ignore_list=gt_bboxes_ignore,
+ gt_labels_list=gt_labels,
+ label_channels=label_channels)
+ if cls_reg_targets is None:
+ return None
+
+ (anchor_list, labels_list, label_weights_list, bbox_targets_list,
+ bbox_weights_list, num_total_pos, num_total_neg) = cls_reg_targets
+
+ num_total_samples = reduce_mean(
+ torch.tensor(num_total_pos).cuda()).item()
+ num_total_samples = max(num_total_samples, 1.0)
+
+ losses_cls, losses_bbox, loss_centerness,\
+ bbox_avg_factor = multi_apply(
+ self.loss_single,
+ anchor_list,
+ cls_scores,
+ bbox_preds,
+ centernesses,
+ labels_list,
+ label_weights_list,
+ bbox_targets_list,
+ num_total_samples=num_total_samples,
+ cfg=cfg)
+
+ bbox_avg_factor = sum(bbox_avg_factor)
+ bbox_avg_factor = reduce_mean(bbox_avg_factor).item()
+ losses_bbox = list(map(lambda x: x / bbox_avg_factor, losses_bbox))
+ return dict(
+ loss_cls=losses_cls,
+ loss_bbox=losses_bbox,
+ loss_centerness=loss_centerness)
+
+ def centerness_target(self, anchors, bbox_targets):
+ # only calculate pos centerness targets, otherwise there may be nan
+ gts = delta2bbox(anchors, bbox_targets, self.target_means,
+ self.target_stds)
+ anchors_cx = (anchors[:, 2] + anchors[:, 0]) / 2
+ anchors_cy = (anchors[:, 3] + anchors[:, 1]) / 2
+ l_ = anchors_cx - gts[:, 0]
+ t_ = anchors_cy - gts[:, 1]
+ r_ = gts[:, 2] - anchors_cx
+ b_ = gts[:, 3] - anchors_cy
+
+ left_right = torch.stack([l_, r_], dim=1)
+ top_bottom = torch.stack([t_, b_], dim=1)
+ centerness = torch.sqrt(
+ (left_right.min(dim=-1)[0] / left_right.max(dim=-1)[0]) *
+ (top_bottom.min(dim=-1)[0] / top_bottom.max(dim=-1)[0]))
+ assert not torch.isnan(centerness).any()
+ return centerness
+
+ @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'centernesses'))
+ def get_bboxes(self,
+ cls_scores,
+ bbox_preds,
+ centernesses,
+ img_metas,
+ cfg,
+ rescale=False):
+
+ assert len(cls_scores) == len(bbox_preds)
+ num_levels = len(cls_scores)
+ device = cls_scores[0].device
+ mlvl_anchors = [
+ self.anchor_generators[i].grid_anchors(
+ cls_scores[i].size()[-2:],
+ self.anchor_strides[i],
+ device=device) for i in range(num_levels)
+ ]
+
+ result_list = []
+ for img_id in range(len(img_metas)):
+ cls_score_list = [
+ cls_scores[i][img_id].detach() for i in range(num_levels)
+ ]
+ bbox_pred_list = [
+ bbox_preds[i][img_id].detach() for i in range(num_levels)
+ ]
+ centerness_pred_list = [
+ centernesses[i][img_id].detach() for i in range(num_levels)
+ ]
+ img_shape = img_metas[img_id]['img_shape']
+ scale_factor = img_metas[img_id]['scale_factor']
+ proposals = self.get_bboxes_single(cls_score_list, bbox_pred_list,
+ centerness_pred_list,
+ mlvl_anchors, img_shape,
+ scale_factor, cfg, rescale)
+ result_list.append(proposals)
+ return result_list
+
+ def get_bboxes_single(self,
+ cls_scores,
+ bbox_preds,
+ centernesses,
+ mlvl_anchors,
+ img_shape,
+ scale_factor,
+ cfg,
+ rescale=False):
+ assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
+ mlvl_bboxes = []
+ mlvl_scores = []
+ mlvl_centerness = []
+ for cls_score, bbox_pred, centerness, anchors in zip(
+ cls_scores, bbox_preds, centernesses, mlvl_anchors):
+ assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
+
+ scores = cls_score.permute(1, 2, 0).reshape(
+ -1, self.cls_out_channels).sigmoid()
+ bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
+ centerness = centerness.permute(1, 2, 0).reshape(-1).sigmoid()
+
+ nms_pre = cfg.get('nms_pre', -1)
+ if nms_pre > 0 and scores.shape[0] > nms_pre:
+ max_scores, _ = (scores * centerness[:, None]).max(dim=1)
+ _, topk_inds = max_scores.topk(nms_pre)
+ anchors = anchors[topk_inds, :]
+ bbox_pred = bbox_pred[topk_inds, :]
+ scores = scores[topk_inds, :]
+ centerness = centerness[topk_inds]
+
+ bboxes = delta2bbox(anchors, bbox_pred, self.target_means,
+ self.target_stds, img_shape)
+ mlvl_bboxes.append(bboxes)
+ mlvl_scores.append(scores)
+ mlvl_centerness.append(centerness)
+
+ mlvl_bboxes = torch.cat(mlvl_bboxes)
+ if rescale:
+ mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
+
+ mlvl_scores = torch.cat(mlvl_scores)
+ padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
+ mlvl_scores = torch.cat([padding, mlvl_scores], dim=1)
+ mlvl_centerness = torch.cat(mlvl_centerness)
+
+ det_bboxes, det_labels = multiclass_nms(
+ mlvl_bboxes,
+ mlvl_scores,
+ cfg.score_thr,
+ cfg.nms,
+ cfg.max_per_img,
+ score_factors=mlvl_centerness)
+ return det_bboxes, det_labels
+
+ def atss_target(self,
+ anchor_list,
+ valid_flag_list,
+ gt_bboxes_list,
+ img_metas,
+ cfg,
+ gt_bboxes_ignore_list=None,
+ gt_labels_list=None,
+ label_channels=1,
+ unmap_outputs=True):
+ """
+ almost the same with anchor_target, with a little modification,
+ here we need return the anchor
+ """
+ num_imgs = len(img_metas)
+ assert len(anchor_list) == len(valid_flag_list) == num_imgs
+
+ # anchor number of multi levels
+ num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]
+ num_level_anchors_list = [num_level_anchors] * num_imgs
+
+ # concat all level anchors and flags to a single tensor
+ for i in range(num_imgs):
+ assert len(anchor_list[i]) == len(valid_flag_list[i])
+ anchor_list[i] = torch.cat(anchor_list[i])
+ valid_flag_list[i] = torch.cat(valid_flag_list[i])
+
+ # compute targets for each image
+ if gt_bboxes_ignore_list is None:
+ gt_bboxes_ignore_list = [None for _ in range(num_imgs)]
+ if gt_labels_list is None:
+ gt_labels_list = [None for _ in range(num_imgs)]
+ (all_anchors, all_labels, all_label_weights, all_bbox_targets,
+ all_bbox_weights, pos_inds_list, neg_inds_list) = multi_apply(
+ self.atss_target_single,
+ anchor_list,
+ valid_flag_list,
+ num_level_anchors_list,
+ gt_bboxes_list,
+ gt_bboxes_ignore_list,
+ gt_labels_list,
+ img_metas,
+ cfg=cfg,
+ label_channels=label_channels,
+ unmap_outputs=unmap_outputs)
+ # no valid anchors
+ if any([labels is None for labels in all_labels]):
+ return None
+ # sampled anchors of all images
+ num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])
+ num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])
+ # split targets to a list w.r.t. multiple levels
+ anchors_list = images_to_levels(all_anchors, num_level_anchors)
+ labels_list = images_to_levels(all_labels, num_level_anchors)
+ label_weights_list = images_to_levels(all_label_weights,
+ num_level_anchors)
+ bbox_targets_list = images_to_levels(all_bbox_targets,
+ num_level_anchors)
+ bbox_weights_list = images_to_levels(all_bbox_weights,
+ num_level_anchors)
+ return (anchors_list, labels_list, label_weights_list,
+ bbox_targets_list, bbox_weights_list, num_total_pos,
+ num_total_neg)
+
+ def atss_target_single(self,
+ flat_anchors,
+ valid_flags,
+ num_level_anchors,
+ gt_bboxes,
+ gt_bboxes_ignore,
+ gt_labels,
+ img_meta,
+ cfg,
+ label_channels=1,
+ unmap_outputs=True):
+ inside_flags = anchor_inside_flags(flat_anchors, valid_flags,
+ img_meta['img_shape'][:2],
+ cfg.allowed_border)
+ if not inside_flags.any():
+ return (None, ) * 6
+ # assign gt and sample anchors
+ anchors = flat_anchors[inside_flags, :]
+
+ num_level_anchors_inside = self.get_num_level_anchors_inside(
+ num_level_anchors, inside_flags)
+ bbox_assigner = build_assigner(cfg.assigner)
+ assign_result = bbox_assigner.assign(anchors, num_level_anchors_inside,
+ gt_bboxes, gt_bboxes_ignore,
+ gt_labels)
+
+ bbox_sampler = PseudoSampler()
+ sampling_result = bbox_sampler.sample(assign_result, anchors,
+ gt_bboxes)
+
+ num_valid_anchors = anchors.shape[0]
+ bbox_targets = torch.zeros_like(anchors)
+ bbox_weights = torch.zeros_like(anchors)
+ labels = anchors.new_zeros(num_valid_anchors, dtype=torch.long)
+ label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)
+
+ pos_inds = sampling_result.pos_inds
+ neg_inds = sampling_result.neg_inds
+ if len(pos_inds) > 0:
+ pos_bbox_targets = bbox2delta(sampling_result.pos_bboxes,
+ sampling_result.pos_gt_bboxes,
+ self.target_means, self.target_stds)
+ bbox_targets[pos_inds, :] = pos_bbox_targets
+ bbox_weights[pos_inds, :] = 1.0
+ if gt_labels is None:
+ labels[pos_inds] = 1
+ else:
+ labels[pos_inds] = gt_labels[
+ sampling_result.pos_assigned_gt_inds]
+ if cfg.pos_weight <= 0:
+ label_weights[pos_inds] = 1.0
+ else:
+ label_weights[pos_inds] = cfg.pos_weight
+ if len(neg_inds) > 0:
+ label_weights[neg_inds] = 1.0
+
+ # map up to original set of anchors
+ if unmap_outputs:
+ num_total_anchors = flat_anchors.size(0)
+ anchors = unmap(anchors, num_total_anchors, inside_flags)
+ labels = unmap(labels, num_total_anchors, inside_flags)
+ label_weights = unmap(label_weights, num_total_anchors,
+ inside_flags)
+ bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags)
+ bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags)
+
+ return (anchors, labels, label_weights, bbox_targets, bbox_weights,
+ pos_inds, neg_inds)
+
+ def get_num_level_anchors_inside(self, num_level_anchors, inside_flags):
+ split_inside_flags = torch.split(inside_flags, num_level_anchors)
+ num_level_anchors_inside = [
+ int(flags.sum()) for flags in split_inside_flags
+ ]
+ return num_level_anchors_inside
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/decoupled_solo_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/decoupled_solo_head.py
new file mode 100644
index 000000000..1b6001142
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/decoupled_solo_head.py
@@ -0,0 +1,484 @@
+import mmcv
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import normal_init
+from mmdet.ops import DeformConv, roi_align
+from mmdet.core import multi_apply, bbox2roi, matrix_nms
+from ..builder import build_loss
+from ..registry import HEADS
+from ..utils import bias_init_with_prob, ConvModule
+
+INF = 1e8
+
+def center_of_mass(bitmasks):
+ _, h, w = bitmasks.size()
+ ys = torch.arange(0, h, dtype=torch.float32, device=bitmasks.device)
+ xs = torch.arange(0, w, dtype=torch.float32, device=bitmasks.device)
+
+ m00 = bitmasks.sum(dim=-1).sum(dim=-1).clamp(min=1e-6)
+ m10 = (bitmasks * xs).sum(dim=-1).sum(dim=-1)
+ m01 = (bitmasks * ys[:, None]).sum(dim=-1).sum(dim=-1)
+ center_x = m10 / m00
+ center_y = m01 / m00
+ return center_x, center_y
+
+def points_nms(heat, kernel=2):
+ # kernel must be 2
+ hmax = nn.functional.max_pool2d(
+ heat, (kernel, kernel), stride=1, padding=1)
+ keep = (hmax[:, :, :-1, :-1] == heat).float()
+ return heat * keep
+
+def dice_loss(input, target):
+ input = input.contiguous().view(input.size()[0], -1)
+ target = target.contiguous().view(target.size()[0], -1).float()
+
+ a = torch.sum(input * target, 1)
+ b = torch.sum(input * input, 1) + 0.001
+ c = torch.sum(target * target, 1) + 0.001
+ d = (2 * a) / (b + c)
+ return 1-d
+
+@HEADS.register_module
+class DecoupledSOLOHead(nn.Module):
+ def __init__(self,
+ num_classes,
+ in_channels,
+ seg_feat_channels=256,
+ stacked_convs=4,
+ strides=(4, 8, 16, 32, 64),
+ base_edge_list=(16, 32, 64, 128, 256),
+ scale_ranges=((8, 32), (16, 64), (32, 128), (64, 256), (128, 512)),
+ sigma=0.4,
+ num_grids=None,
+ cate_down_pos=0,
+ with_deform=False,
+ loss_ins=None,
+ loss_cate=None,
+ conv_cfg=None,
+ norm_cfg=None):
+ super(DecoupledSOLOHead, self).__init__()
+ self.num_classes = num_classes
+ self.seg_num_grids = num_grids
+ self.cate_out_channels = self.num_classes - 1
+ self.in_channels = in_channels
+ self.seg_feat_channels = seg_feat_channels
+ self.stacked_convs = stacked_convs
+ self.strides = strides
+ self.sigma = sigma
+ self.cate_down_pos = cate_down_pos
+ self.base_edge_list = base_edge_list
+ self.scale_ranges = scale_ranges
+ self.with_deform = with_deform
+ self.loss_cate = build_loss(loss_cate)
+ self.ins_loss_weight = loss_ins['loss_weight']
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self._init_layers()
+
+ def _init_layers(self):
+ norm_cfg = dict(type='GN', num_groups=32, requires_grad=True)
+ self.ins_convs_x = nn.ModuleList()
+ self.ins_convs_y = nn.ModuleList()
+ self.cate_convs = nn.ModuleList()
+
+ for i in range(self.stacked_convs):
+ chn = self.in_channels + 1 if i == 0 else self.seg_feat_channels
+ self.ins_convs_x.append(
+ ConvModule(
+ chn,
+ self.seg_feat_channels,
+ 3,
+ stride=1,
+ padding=1,
+ norm_cfg=norm_cfg,
+ bias=norm_cfg is None))
+ self.ins_convs_y.append(
+ ConvModule(
+ chn,
+ self.seg_feat_channels,
+ 3,
+ stride=1,
+ padding=1,
+ norm_cfg=norm_cfg,
+ bias=norm_cfg is None))
+
+ chn = self.in_channels if i == 0 else self.seg_feat_channels
+ self.cate_convs.append(
+ ConvModule(
+ chn,
+ self.seg_feat_channels,
+ 3,
+ stride=1,
+ padding=1,
+ norm_cfg=norm_cfg,
+ bias=norm_cfg is None))
+
+ self.dsolo_ins_list_x = nn.ModuleList()
+ self.dsolo_ins_list_y = nn.ModuleList()
+ for seg_num_grid in self.seg_num_grids:
+ self.dsolo_ins_list_x.append(
+ nn.Conv2d(
+ self.seg_feat_channels, seg_num_grid, 3, padding=1))
+ self.dsolo_ins_list_y.append(
+ nn.Conv2d(
+ self.seg_feat_channels, seg_num_grid, 3, padding=1))
+ self.dsolo_cate = nn.Conv2d(
+ self.seg_feat_channels, self.cate_out_channels, 3, padding=1)
+
+ def init_weights(self):
+ for m in self.ins_convs_x:
+ normal_init(m.conv, std=0.01)
+ for m in self.ins_convs_y:
+ normal_init(m.conv, std=0.01)
+ for m in self.cate_convs:
+ normal_init(m.conv, std=0.01)
+ bias_ins = bias_init_with_prob(0.01)
+ for m in self.dsolo_ins_list_x:
+ normal_init(m, std=0.01, bias=bias_ins)
+ for m in self.dsolo_ins_list_y:
+ normal_init(m, std=0.01, bias=bias_ins)
+ bias_cate = bias_init_with_prob(0.01)
+ normal_init(self.dsolo_cate, std=0.01, bias=bias_cate)
+
+ def forward(self, feats, eval=False):
+ new_feats = self.split_feats(feats)
+ featmap_sizes = [featmap.size()[-2:] for featmap in new_feats]
+ upsampled_size = (featmap_sizes[0][0] * 2, featmap_sizes[0][1] * 2)
+ ins_pred_x, ins_pred_y, cate_pred = multi_apply(self.forward_single, new_feats,
+ list(range(len(self.seg_num_grids))),
+ eval=eval, upsampled_size=upsampled_size)
+ return ins_pred_x, ins_pred_y, cate_pred
+
+ def split_feats(self, feats):
+ return (F.interpolate(feats[0], scale_factor=0.5, mode='bilinear'),
+ feats[1],
+ feats[2],
+ feats[3],
+ F.interpolate(feats[4], size=feats[3].shape[-2:], mode='bilinear'))
+
+ def forward_single(self, x, idx, eval=False, upsampled_size=None):
+ ins_feat = x
+ cate_feat = x
+ # ins branch
+ # concat coord
+ x_range = torch.linspace(-1, 1, ins_feat.shape[-1], device=ins_feat.device)
+ y_range = torch.linspace(-1, 1, ins_feat.shape[-2], device=ins_feat.device)
+ y, x = torch.meshgrid(y_range, x_range)
+ y = y.expand([ins_feat.shape[0], 1, -1, -1])
+ x = x.expand([ins_feat.shape[0], 1, -1, -1])
+ ins_feat_x = torch.cat([ins_feat, x], 1)
+ ins_feat_y = torch.cat([ins_feat, y], 1)
+
+ for ins_layer_x, ins_layer_y in zip(self.ins_convs_x, self.ins_convs_y):
+ ins_feat_x = ins_layer_x(ins_feat_x)
+ ins_feat_y = ins_layer_y(ins_feat_y)
+
+ ins_feat_x = F.interpolate(ins_feat_x, scale_factor=2, mode='bilinear')
+ ins_feat_y = F.interpolate(ins_feat_y, scale_factor=2, mode='bilinear')
+
+ ins_pred_x = self.dsolo_ins_list_x[idx](ins_feat_x)
+ ins_pred_y = self.dsolo_ins_list_y[idx](ins_feat_y)
+
+ # cate branch
+ for i, cate_layer in enumerate(self.cate_convs):
+ if i == self.cate_down_pos:
+ seg_num_grid = self.seg_num_grids[idx]
+ cate_feat = F.interpolate(cate_feat, size=seg_num_grid, mode='bilinear')
+ cate_feat = cate_layer(cate_feat)
+
+ cate_pred = self.dsolo_cate(cate_feat)
+
+ if eval:
+ ins_pred_x = F.interpolate(ins_pred_x.sigmoid(), size=upsampled_size, mode='bilinear')
+ ins_pred_y = F.interpolate(ins_pred_y.sigmoid(), size=upsampled_size, mode='bilinear')
+ cate_pred = points_nms(cate_pred.sigmoid(), kernel=2).permute(0, 2, 3, 1)
+ return ins_pred_x, ins_pred_y, cate_pred
+
+ def loss(self,
+ ins_preds_x,
+ ins_preds_y,
+ cate_preds,
+ gt_bbox_list,
+ gt_label_list,
+ gt_mask_list,
+ img_metas,
+ cfg,
+ gt_bboxes_ignore=None):
+ featmap_sizes = [featmap.size()[-2:] for featmap in
+ ins_preds_x]
+ ins_label_list, cate_label_list, ins_ind_label_list, ins_ind_label_list_xy = multi_apply(
+ self.solo_target_single,
+ gt_bbox_list,
+ gt_label_list,
+ gt_mask_list,
+ featmap_sizes=featmap_sizes)
+
+ # ins
+ ins_labels = [torch.cat([ins_labels_level_img[ins_ind_labels_level_img, ...]
+ for ins_labels_level_img, ins_ind_labels_level_img in
+ zip(ins_labels_level, ins_ind_labels_level)], 0)
+ for ins_labels_level, ins_ind_labels_level in zip(zip(*ins_label_list), zip(*ins_ind_label_list))]
+
+ ins_preds_x_final = [torch.cat([ins_preds_level_img_x[ins_ind_labels_level_img[:, 1], ...]
+ for ins_preds_level_img_x, ins_ind_labels_level_img in
+ zip(ins_preds_level_x, ins_ind_labels_level)], 0)
+ for ins_preds_level_x, ins_ind_labels_level in
+ zip(ins_preds_x, zip(*ins_ind_label_list_xy))]
+
+ ins_preds_y_final = [torch.cat([ins_preds_level_img_y[ins_ind_labels_level_img[:, 0], ...]
+ for ins_preds_level_img_y, ins_ind_labels_level_img in
+ zip(ins_preds_level_y, ins_ind_labels_level)], 0)
+ for ins_preds_level_y, ins_ind_labels_level in
+ zip(ins_preds_y, zip(*ins_ind_label_list_xy))]
+
+ num_ins = 0.
+ # dice loss
+ loss_ins = []
+ for input_x, input_y, target in zip(ins_preds_x_final, ins_preds_y_final, ins_labels):
+ mask_n = input_x.size(0)
+ if mask_n == 0:
+ continue
+ num_ins += mask_n
+ input = (input_x.sigmoid())*(input_y.sigmoid())
+ loss_ins.append(dice_loss(input, target))
+
+ loss_ins = torch.cat(loss_ins).mean() * self.ins_loss_weight
+
+ # cate
+ cate_labels = [
+ torch.cat([cate_labels_level_img.flatten()
+ for cate_labels_level_img in cate_labels_level])
+ for cate_labels_level in zip(*cate_label_list)
+ ]
+ flatten_cate_labels = torch.cat(cate_labels)
+
+ cate_preds = [
+ cate_pred.permute(0, 2, 3, 1).reshape(-1, self.cate_out_channels)
+ for cate_pred in cate_preds
+ ]
+ flatten_cate_preds = torch.cat(cate_preds)
+
+ loss_cate = self.loss_cate(flatten_cate_preds, flatten_cate_labels, avg_factor=num_ins + 1)
+ return dict(
+ loss_ins=loss_ins,
+ loss_cate=loss_cate)
+
+ def solo_target_single(self,
+ gt_bboxes_raw,
+ gt_labels_raw,
+ gt_masks_raw,
+ featmap_sizes=None):
+
+ device = gt_labels_raw[0].device
+ # ins
+ gt_areas = torch.sqrt((gt_bboxes_raw[:, 2] - gt_bboxes_raw[:, 0]) * (
+ gt_bboxes_raw[:, 3] - gt_bboxes_raw[:, 1]))
+ ins_label_list = []
+ cate_label_list = []
+ ins_ind_label_list = []
+ ins_ind_label_list_xy = []
+ for (lower_bound, upper_bound), stride, featmap_size, num_grid \
+ in zip(self.scale_ranges, self.strides, featmap_sizes, self.seg_num_grids):
+
+ ins_label = torch.zeros([num_grid**2, featmap_size[0], featmap_size[1]], dtype=torch.uint8, device=device)
+ cate_label = torch.zeros([num_grid, num_grid], dtype=torch.int64, device=device)
+ ins_ind_label = torch.zeros([num_grid**2], dtype=torch.bool, device=device)
+
+ hit_indices = ((gt_areas >= lower_bound) & (gt_areas <= upper_bound)).nonzero().flatten()
+
+ if len(hit_indices) == 0:
+ ins_label = torch.zeros([1, featmap_size[0], featmap_size[1]], dtype=torch.uint8,
+ device=device)
+ ins_label_list.append(ins_label)
+ cate_label_list.append(cate_label)
+ ins_ind_label = torch.zeros([1], dtype=torch.bool, device=device)
+ ins_ind_label_list.append(ins_ind_label)
+ ins_ind_label_list_xy.append(cate_label.nonzero())
+ continue
+ gt_bboxes = gt_bboxes_raw[hit_indices]
+ gt_labels = gt_labels_raw[hit_indices]
+ gt_masks = gt_masks_raw[hit_indices.cpu().numpy(), ...]
+
+ half_ws = 0.5 * (gt_bboxes[:, 2] - gt_bboxes[:, 0]) * self.sigma
+ half_hs = 0.5 * (gt_bboxes[:, 3] - gt_bboxes[:, 1]) * self.sigma
+
+ # mass center
+ gt_masks_pt = torch.from_numpy(gt_masks).to(device=device)
+ center_ws, center_hs = center_of_mass(gt_masks_pt)
+ valid_mask_flags = gt_masks_pt.sum(dim=-1).sum(dim=-1) > 0
+
+ output_stride = stride / 2
+ for seg_mask, gt_label, half_h, half_w, center_h, center_w, valid_mask_flag in zip(gt_masks, gt_labels, half_hs, half_ws, center_hs, center_ws, valid_mask_flags):
+ if not valid_mask_flag:
+ continue
+ upsampled_size = (featmap_sizes[0][0] * 4, featmap_sizes[0][1] * 4)
+ coord_w = int((center_w / upsampled_size[1]) // (1. / num_grid))
+ coord_h = int((center_h / upsampled_size[0]) // (1. / num_grid))
+
+ # left, top, right, down
+ top_box = max(0, int(((center_h - half_h) / upsampled_size[0]) // (1. / num_grid)))
+ down_box = min(num_grid - 1, int(((center_h + half_h) / upsampled_size[0]) // (1. / num_grid)))
+ left_box = max(0, int(((center_w - half_w) / upsampled_size[1]) // (1. / num_grid)))
+ right_box = min(num_grid - 1, int(((center_w + half_w) / upsampled_size[1]) // (1. / num_grid)))
+
+ top = max(top_box, coord_h-1)
+ down = min(down_box, coord_h+1)
+ left = max(coord_w-1, left_box)
+ right = min(right_box, coord_w+1)
+
+ # squared
+ cate_label[top:(down+1), left:(right+1)] = gt_label
+ # ins
+ seg_mask = mmcv.imrescale(seg_mask, scale=1. / output_stride)
+ seg_mask = torch.from_numpy(seg_mask).to(device=device)
+ for i in range(top, down+1):
+ for j in range(left, right+1):
+ label = int(i * num_grid + j)
+ ins_label[label, :seg_mask.shape[0], :seg_mask.shape[1]] = seg_mask
+ ins_ind_label[label] = True
+
+ ins_label = ins_label[ins_ind_label]
+ ins_label_list.append(ins_label)
+
+ cate_label_list.append(cate_label)
+
+ ins_ind_label = ins_ind_label[ins_ind_label]
+ ins_ind_label_list.append(ins_ind_label)
+
+ ins_ind_label_list_xy.append(cate_label.nonzero())
+ return ins_label_list, cate_label_list, ins_ind_label_list, ins_ind_label_list_xy
+
+ def get_seg(self, seg_preds_x, seg_preds_y, cate_preds, img_metas, cfg, rescale=None):
+ assert len(seg_preds_x) == len(cate_preds)
+ num_levels = len(cate_preds)
+ featmap_size = seg_preds_x[0].size()[-2:]
+
+ result_list = []
+ for img_id in range(len(img_metas)):
+ cate_pred_list = [
+ cate_preds[i][img_id].view(-1, self.cate_out_channels).detach() for i in range(num_levels)
+ ]
+ seg_pred_list_x = [
+ seg_preds_x[i][img_id].detach() for i in range(num_levels)
+ ]
+ seg_pred_list_y = [
+ seg_preds_y[i][img_id].detach() for i in range(num_levels)
+ ]
+ img_shape = img_metas[img_id]['img_shape']
+ scale_factor = img_metas[img_id]['scale_factor']
+ ori_shape = img_metas[img_id]['ori_shape']
+
+ cate_pred_list = torch.cat(cate_pred_list, dim=0)
+ seg_pred_list_x = torch.cat(seg_pred_list_x, dim=0)
+ seg_pred_list_y = torch.cat(seg_pred_list_y, dim=0)
+
+ result = self.get_seg_single(cate_pred_list, seg_pred_list_x, seg_pred_list_y,
+ featmap_size, img_shape, ori_shape, scale_factor, cfg, rescale)
+ result_list.append(result)
+ return result_list
+
+ def get_seg_single(self,
+ cate_preds,
+ seg_preds_x,
+ seg_preds_y,
+ featmap_size,
+ img_shape,
+ ori_shape,
+ scale_factor,
+ cfg,
+ rescale=False, debug=False):
+
+
+ # overall info.
+ h, w, _ = img_shape
+ upsampled_size_out = (featmap_size[0] * 4, featmap_size[1] * 4)
+
+ # trans trans_diff.
+ trans_size = torch.Tensor(self.seg_num_grids).pow(2).cumsum(0).long()
+ trans_diff = torch.ones(trans_size[-1].item(), device=cate_preds.device).long()
+ num_grids = torch.ones(trans_size[-1].item(), device=cate_preds.device).long()
+ seg_size = torch.Tensor(self.seg_num_grids).cumsum(0).long()
+ seg_diff = torch.ones(trans_size[-1].item(), device=cate_preds.device).long()
+ strides = torch.ones(trans_size[-1].item(), device=cate_preds.device)
+
+ n_stage = len(self.seg_num_grids)
+ trans_diff[:trans_size[0]] *= 0
+ seg_diff[:trans_size[0]] *= 0
+ num_grids[:trans_size[0]] *= self.seg_num_grids[0]
+ strides[:trans_size[0]] *= self.strides[0]
+
+ for ind_ in range(1, n_stage):
+ trans_diff[trans_size[ind_ - 1]:trans_size[ind_]] *= trans_size[ind_ - 1]
+ seg_diff[trans_size[ind_ - 1]:trans_size[ind_]] *= seg_size[ind_ - 1]
+ num_grids[trans_size[ind_ - 1]:trans_size[ind_]] *= self.seg_num_grids[ind_]
+ strides[trans_size[ind_ - 1]:trans_size[ind_]] *= self.strides[ind_]
+
+ # process.
+ inds = (cate_preds > cfg.score_thr)
+ cate_scores = cate_preds[inds]
+
+ inds = inds.nonzero()
+ trans_diff = torch.index_select(trans_diff, dim=0, index=inds[:, 0])
+ seg_diff = torch.index_select(seg_diff, dim=0, index=inds[:, 0])
+ num_grids = torch.index_select(num_grids, dim=0, index=inds[:, 0])
+ strides = torch.index_select(strides, dim=0, index=inds[:, 0])
+
+ y_inds = (inds[:, 0] - trans_diff) // num_grids
+ x_inds = (inds[:, 0] - trans_diff) % num_grids
+ y_inds += seg_diff
+ x_inds += seg_diff
+
+ cate_labels = inds[:, 1]
+ seg_masks_soft = seg_preds_x[x_inds, ...] * seg_preds_y[y_inds, ...]
+ seg_masks = seg_masks_soft > cfg.mask_thr
+ sum_masks = seg_masks.sum((1, 2)).float()
+ keep = sum_masks > strides
+
+ seg_masks_soft = seg_masks_soft[keep, ...]
+ seg_masks = seg_masks[keep, ...]
+ cate_scores = cate_scores[keep]
+ sum_masks = sum_masks[keep]
+ cate_labels = cate_labels[keep]
+ # maskness
+ seg_score = (seg_masks_soft * seg_masks.float()).sum((1, 2)) / sum_masks
+ cate_scores *= seg_score
+
+ if len(cate_scores) == 0:
+ return None
+
+ # sort and keep top nms_pre
+ sort_inds = torch.argsort(cate_scores, descending=True)
+ if len(sort_inds) > cfg.nms_pre:
+ sort_inds = sort_inds[:cfg.nms_pre]
+ seg_masks_soft = seg_masks_soft[sort_inds, :, :]
+ seg_masks = seg_masks[sort_inds, :, :]
+ cate_scores = cate_scores[sort_inds]
+ sum_masks = sum_masks[sort_inds]
+ cate_labels = cate_labels[sort_inds]
+
+ # Matrix NMS
+ cate_scores = matrix_nms(seg_masks, cate_labels, cate_scores,
+ kernel=cfg.kernel, sigma=cfg.sigma, sum_masks=sum_masks)
+
+ keep = cate_scores >= cfg.update_thr
+ seg_masks_soft = seg_masks_soft[keep, :, :]
+ cate_scores = cate_scores[keep]
+ cate_labels = cate_labels[keep]
+ # sort and keep top_k
+ sort_inds = torch.argsort(cate_scores, descending=True)
+ if len(sort_inds) > cfg.max_per_img:
+ sort_inds = sort_inds[:cfg.max_per_img]
+ seg_masks_soft = seg_masks_soft[sort_inds, :, :]
+ cate_scores = cate_scores[sort_inds]
+ cate_labels = cate_labels[sort_inds]
+
+ seg_masks_soft = F.interpolate(seg_masks_soft.unsqueeze(0),
+ size=upsampled_size_out,
+ mode='bilinear')[:, :, :h, :w]
+ seg_masks = F.interpolate(seg_masks_soft,
+ size=ori_shape[:2],
+ mode='bilinear').squeeze(0)
+ seg_masks = seg_masks > cfg.mask_thr
+ return seg_masks, cate_labels, cate_scores
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/decoupled_solo_light_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/decoupled_solo_light_head.py
new file mode 100644
index 000000000..5b52802b2
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/decoupled_solo_light_head.py
@@ -0,0 +1,479 @@
+import mmcv
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import normal_init
+from mmdet.ops import DeformConv, roi_align
+from mmdet.core import multi_apply, bbox2roi, matrix_nms
+from ..builder import build_loss
+from ..registry import HEADS
+from ..utils import bias_init_with_prob, ConvModule
+
+INF = 1e8
+
+def center_of_mass(bitmasks):
+ _, h, w = bitmasks.size()
+ ys = torch.arange(0, h, dtype=torch.float32, device=bitmasks.device)
+ xs = torch.arange(0, w, dtype=torch.float32, device=bitmasks.device)
+
+ m00 = bitmasks.sum(dim=-1).sum(dim=-1).clamp(min=1e-6)
+ m10 = (bitmasks * xs).sum(dim=-1).sum(dim=-1)
+ m01 = (bitmasks * ys[:, None]).sum(dim=-1).sum(dim=-1)
+ center_x = m10 / m00
+ center_y = m01 / m00
+ return center_x, center_y
+
+def points_nms(heat, kernel=2):
+ # kernel must be 2
+ hmax = nn.functional.max_pool2d(
+ heat, (kernel, kernel), stride=1, padding=1)
+ keep = (hmax[:, :, :-1, :-1] == heat).float()
+ return heat * keep
+
+def dice_loss(input, target):
+ input = input.contiguous().view(input.size()[0], -1)
+ target = target.contiguous().view(target.size()[0], -1).float()
+
+ a = torch.sum(input * target, 1)
+ b = torch.sum(input * input, 1) + 0.001
+ c = torch.sum(target * target, 1) + 0.001
+ d = (2 * a) / (b + c)
+ return 1-d
+
+@HEADS.register_module
+class DecoupledSOLOLightHead(nn.Module):
+ def __init__(self,
+ num_classes,
+ in_channels,
+ seg_feat_channels=256,
+ stacked_convs=4,
+ strides=(4, 8, 16, 32, 64),
+ base_edge_list=(16, 32, 64, 128, 256),
+ scale_ranges=((8, 32), (16, 64), (32, 128), (64, 256), (128, 512)),
+ sigma=0.4,
+ num_grids=None,
+ cate_down_pos=0,
+ loss_ins=None,
+ loss_cate=None,
+ conv_cfg=None,
+ norm_cfg=None,
+ use_dcn_in_tower=False,
+ type_dcn=None):
+ super(DecoupledSOLOLightHead, self).__init__()
+ self.num_classes = num_classes
+ self.seg_num_grids = num_grids
+ self.cate_out_channels = self.num_classes - 1
+ self.in_channels = in_channels
+ self.seg_feat_channels = seg_feat_channels
+ self.stacked_convs = stacked_convs
+ self.strides = strides
+ self.sigma = sigma
+ self.cate_down_pos = cate_down_pos
+ self.base_edge_list = base_edge_list
+ self.scale_ranges = scale_ranges
+ self.loss_cate = build_loss(loss_cate)
+ self.ins_loss_weight = loss_ins['loss_weight']
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.use_dcn_in_tower = use_dcn_in_tower
+ self.type_dcn = type_dcn
+ self._init_layers()
+
+ def _init_layers(self):
+ norm_cfg = dict(type='GN', num_groups=32, requires_grad=True)
+ self.ins_convs = nn.ModuleList()
+ self.cate_convs = nn.ModuleList()
+
+ for i in range(self.stacked_convs):
+ if self.use_dcn_in_tower and i == self.stacked_convs - 1:
+ cfg_conv = dict(type=self.type_dcn)
+ else:
+ cfg_conv = self.conv_cfg
+
+ chn = self.in_channels + 2 if i == 0 else self.seg_feat_channels
+ self.ins_convs.append(
+ ConvModule(
+ chn,
+ self.seg_feat_channels,
+ 3,
+ stride=1,
+ padding=1,
+ conv_cfg=cfg_conv,
+ norm_cfg=norm_cfg,
+ bias=norm_cfg is None))
+
+ chn = self.in_channels if i == 0 else self.seg_feat_channels
+ self.cate_convs.append(
+ ConvModule(
+ chn,
+ self.seg_feat_channels,
+ 3,
+ stride=1,
+ padding=1,
+ conv_cfg=cfg_conv,
+ norm_cfg=norm_cfg,
+ bias=norm_cfg is None))
+
+ self.dsolo_ins_list_x = nn.ModuleList()
+ self.dsolo_ins_list_y = nn.ModuleList()
+ for seg_num_grid in self.seg_num_grids:
+ self.dsolo_ins_list_x.append(
+ nn.Conv2d(
+ self.seg_feat_channels, seg_num_grid, 3, padding=1))
+ self.dsolo_ins_list_y.append(
+ nn.Conv2d(
+ self.seg_feat_channels, seg_num_grid, 3, padding=1))
+ self.dsolo_cate = nn.Conv2d(
+ self.seg_feat_channels, self.cate_out_channels, 3, padding=1)
+
+ def init_weights(self):
+ for m in self.ins_convs:
+ normal_init(m.conv, std=0.01)
+ for m in self.cate_convs:
+ normal_init(m.conv, std=0.01)
+ bias_ins = bias_init_with_prob(0.01)
+ for m in self.dsolo_ins_list_x:
+ normal_init(m, std=0.01, bias=bias_ins)
+ for m in self.dsolo_ins_list_y:
+ normal_init(m, std=0.01, bias=bias_ins)
+ bias_cate = bias_init_with_prob(0.01)
+ normal_init(self.dsolo_cate, std=0.01, bias=bias_cate)
+
+ def forward(self, feats, eval=False):
+ new_feats = self.split_feats(feats)
+ featmap_sizes = [featmap.size()[-2:] for featmap in new_feats]
+ upsampled_size = (featmap_sizes[0][0] * 2, featmap_sizes[0][1] * 2)
+ ins_pred_x, ins_pred_y, cate_pred = multi_apply(self.forward_single, new_feats,
+ list(range(len(self.seg_num_grids))),
+ eval=eval, upsampled_size=upsampled_size)
+ return ins_pred_x, ins_pred_y, cate_pred
+
+ def split_feats(self, feats):
+ return (F.interpolate(feats[0], scale_factor=0.5, mode='bilinear'),
+ feats[1],
+ feats[2],
+ feats[3],
+ F.interpolate(feats[4], size=feats[3].shape[-2:], mode='bilinear'))
+
+ def forward_single(self, x, idx, eval=False, upsampled_size=None):
+ ins_feat = x
+ cate_feat = x
+ # ins branch
+ # concat coord
+ x_range = torch.linspace(-1, 1, ins_feat.shape[-1], device=ins_feat.device)
+ y_range = torch.linspace(-1, 1, ins_feat.shape[-2], device=ins_feat.device)
+ y, x = torch.meshgrid(y_range, x_range)
+ y = y.expand([ins_feat.shape[0], 1, -1, -1])
+ x = x.expand([ins_feat.shape[0], 1, -1, -1])
+ coord_feat = torch.cat([x, y], 1)
+ ins_feat = torch.cat([ins_feat, coord_feat], 1)
+
+ for ins_layer in self.ins_convs:
+ ins_feat = ins_layer(ins_feat)
+
+ ins_feat = F.interpolate(ins_feat, scale_factor=2, mode='bilinear')
+
+ ins_pred_x = self.dsolo_ins_list_x[idx](ins_feat)
+ ins_pred_y = self.dsolo_ins_list_y[idx](ins_feat)
+
+ # cate branch
+ for i, cate_layer in enumerate(self.cate_convs):
+ if i == self.cate_down_pos:
+ seg_num_grid = self.seg_num_grids[idx]
+ cate_feat = F.interpolate(cate_feat, size=seg_num_grid, mode='bilinear')
+ cate_feat = cate_layer(cate_feat)
+
+ cate_pred = self.dsolo_cate(cate_feat)
+
+ if eval:
+ ins_pred_x = F.interpolate(ins_pred_x.sigmoid(), size=upsampled_size, mode='bilinear')
+ ins_pred_y = F.interpolate(ins_pred_y.sigmoid(), size=upsampled_size, mode='bilinear')
+ cate_pred = points_nms(cate_pred.sigmoid(), kernel=2).permute(0, 2, 3, 1)
+ return ins_pred_x, ins_pred_y, cate_pred
+
+ def loss(self,
+ ins_preds_x,
+ ins_preds_y,
+ cate_preds,
+ gt_bbox_list,
+ gt_label_list,
+ gt_mask_list,
+ img_metas,
+ cfg,
+ gt_bboxes_ignore=None):
+ featmap_sizes = [featmap.size()[-2:] for featmap in
+ ins_preds_x]
+ ins_label_list, cate_label_list, ins_ind_label_list, ins_ind_label_list_xy = multi_apply(
+ self.solo_target_single,
+ gt_bbox_list,
+ gt_label_list,
+ gt_mask_list,
+ featmap_sizes=featmap_sizes)
+
+ # ins
+ ins_labels = [torch.cat([ins_labels_level_img[ins_ind_labels_level_img, ...]
+ for ins_labels_level_img, ins_ind_labels_level_img in
+ zip(ins_labels_level, ins_ind_labels_level)], 0)
+ for ins_labels_level, ins_ind_labels_level in zip(zip(*ins_label_list), zip(*ins_ind_label_list))]
+
+ ins_preds_x_final = [torch.cat([ins_preds_level_img_x[ins_ind_labels_level_img[:, 1], ...]
+ for ins_preds_level_img_x, ins_ind_labels_level_img in
+ zip(ins_preds_level_x, ins_ind_labels_level)], 0)
+ for ins_preds_level_x, ins_ind_labels_level in
+ zip(ins_preds_x, zip(*ins_ind_label_list_xy))]
+
+ ins_preds_y_final = [torch.cat([ins_preds_level_img_y[ins_ind_labels_level_img[:, 0], ...]
+ for ins_preds_level_img_y, ins_ind_labels_level_img in
+ zip(ins_preds_level_y, ins_ind_labels_level)], 0)
+ for ins_preds_level_y, ins_ind_labels_level in
+ zip(ins_preds_y, zip(*ins_ind_label_list_xy))]
+
+ num_ins = 0.
+ # dice loss
+ loss_ins = []
+ for input_x, input_y, target in zip(ins_preds_x_final, ins_preds_y_final, ins_labels):
+ mask_n = input_x.size(0)
+ if mask_n == 0:
+ continue
+ num_ins += mask_n
+ input = (input_x.sigmoid())*(input_y.sigmoid())
+ loss_ins.append(dice_loss(input, target))
+
+ loss_ins = torch.cat(loss_ins).mean() * self.ins_loss_weight
+
+ # cate
+ cate_labels = [
+ torch.cat([cate_labels_level_img.flatten()
+ for cate_labels_level_img in cate_labels_level])
+ for cate_labels_level in zip(*cate_label_list)
+ ]
+ flatten_cate_labels = torch.cat(cate_labels)
+
+ cate_preds = [
+ cate_pred.permute(0, 2, 3, 1).reshape(-1, self.cate_out_channels)
+ for cate_pred in cate_preds
+ ]
+ flatten_cate_preds = torch.cat(cate_preds)
+
+ loss_cate = self.loss_cate(flatten_cate_preds, flatten_cate_labels, avg_factor=num_ins + 1)
+ return dict(
+ loss_ins=loss_ins,
+ loss_cate=loss_cate)
+
+ def solo_target_single(self,
+ gt_bboxes_raw,
+ gt_labels_raw,
+ gt_masks_raw,
+ featmap_sizes=None):
+
+ device = gt_labels_raw[0].device
+ # ins
+ gt_areas = torch.sqrt((gt_bboxes_raw[:, 2] - gt_bboxes_raw[:, 0]) * (
+ gt_bboxes_raw[:, 3] - gt_bboxes_raw[:, 1]))
+ ins_label_list = []
+ cate_label_list = []
+ ins_ind_label_list = []
+ ins_ind_label_list_xy = []
+ for (lower_bound, upper_bound), stride, featmap_size, num_grid \
+ in zip(self.scale_ranges, self.strides, featmap_sizes, self.seg_num_grids):
+
+ ins_label = torch.zeros([num_grid**2, featmap_size[0], featmap_size[1]], dtype=torch.uint8, device=device)
+ cate_label = torch.zeros([num_grid, num_grid], dtype=torch.int64, device=device)
+ ins_ind_label = torch.zeros([num_grid**2], dtype=torch.bool, device=device)
+
+ hit_indices = ((gt_areas >= lower_bound) & (gt_areas <= upper_bound)).nonzero().flatten()
+
+ if len(hit_indices) == 0:
+ ins_label = torch.zeros([1, featmap_size[0], featmap_size[1]], dtype=torch.uint8,
+ device=device)
+ ins_label_list.append(ins_label)
+ cate_label_list.append(cate_label)
+ ins_ind_label = torch.zeros([1], dtype=torch.bool, device=device)
+ ins_ind_label_list.append(ins_ind_label)
+ ins_ind_label_list_xy.append(cate_label.nonzero())
+ continue
+ gt_bboxes = gt_bboxes_raw[hit_indices]
+ gt_labels = gt_labels_raw[hit_indices]
+ gt_masks = gt_masks_raw[hit_indices.cpu().numpy(), ...]
+
+ half_ws = 0.5 * (gt_bboxes[:, 2] - gt_bboxes[:, 0]) * self.sigma
+ half_hs = 0.5 * (gt_bboxes[:, 3] - gt_bboxes[:, 1]) * self.sigma
+
+ # mass center
+ gt_masks_pt = torch.from_numpy(gt_masks).to(device=device)
+ center_ws, center_hs = center_of_mass(gt_masks_pt)
+ valid_mask_flags = gt_masks_pt.sum(dim=-1).sum(dim=-1) > 0
+
+ output_stride = stride / 2
+ for seg_mask, gt_label, half_h, half_w, center_h, center_w, valid_mask_flag in zip(gt_masks, gt_labels, half_hs, half_ws, center_hs, center_ws, valid_mask_flags):
+ if not valid_mask_flag:
+ continue
+ upsampled_size = (featmap_sizes[0][0] * 4, featmap_sizes[0][1] * 4)
+ coord_w = int((center_w / upsampled_size[1]) // (1. / num_grid))
+ coord_h = int((center_h / upsampled_size[0]) // (1. / num_grid))
+
+ # left, top, right, down
+ top_box = max(0, int(((center_h - half_h) / upsampled_size[0]) // (1. / num_grid)))
+ down_box = min(num_grid - 1, int(((center_h + half_h) / upsampled_size[0]) // (1. / num_grid)))
+ left_box = max(0, int(((center_w - half_w) / upsampled_size[1]) // (1. / num_grid)))
+ right_box = min(num_grid - 1, int(((center_w + half_w) / upsampled_size[1]) // (1. / num_grid)))
+
+ top = max(top_box, coord_h-1)
+ down = min(down_box, coord_h+1)
+ left = max(coord_w-1, left_box)
+ right = min(right_box, coord_w+1)
+
+ # squared
+ cate_label[top:(down+1), left:(right+1)] = gt_label
+ # ins
+ seg_mask = mmcv.imrescale(seg_mask, scale=1. / output_stride)
+ seg_mask = torch.from_numpy(seg_mask).to(device=device)
+ for i in range(top, down+1):
+ for j in range(left, right+1):
+ label = int(i * num_grid + j)
+ ins_label[label, :seg_mask.shape[0], :seg_mask.shape[1]] = seg_mask
+ ins_ind_label[label] = True
+
+ ins_label = ins_label[ins_ind_label]
+ ins_label_list.append(ins_label)
+
+ cate_label_list.append(cate_label)
+
+ ins_ind_label = ins_ind_label[ins_ind_label]
+ ins_ind_label_list.append(ins_ind_label)
+
+ ins_ind_label_list_xy.append(cate_label.nonzero())
+ return ins_label_list, cate_label_list, ins_ind_label_list, ins_ind_label_list_xy
+
+ def get_seg(self, seg_preds_x, seg_preds_y, cate_preds, img_metas, cfg, rescale=None):
+ assert len(seg_preds_x) == len(cate_preds)
+ num_levels = len(cate_preds)
+ featmap_size = seg_preds_x[0].size()[-2:]
+
+ result_list = []
+ for img_id in range(len(img_metas)):
+ cate_pred_list = [
+ cate_preds[i][img_id].view(-1, self.cate_out_channels).detach() for i in range(num_levels)
+ ]
+ seg_pred_list_x = [
+ seg_preds_x[i][img_id].detach() for i in range(num_levels)
+ ]
+ seg_pred_list_y = [
+ seg_preds_y[i][img_id].detach() for i in range(num_levels)
+ ]
+ img_shape = img_metas[img_id]['img_shape']
+ scale_factor = img_metas[img_id]['scale_factor']
+ ori_shape = img_metas[img_id]['ori_shape']
+
+ cate_pred_list = torch.cat(cate_pred_list, dim=0)
+ seg_pred_list_x = torch.cat(seg_pred_list_x, dim=0)
+ seg_pred_list_y = torch.cat(seg_pred_list_y, dim=0)
+
+ result = self.get_seg_single(cate_pred_list, seg_pred_list_x, seg_pred_list_y,
+ featmap_size, img_shape, ori_shape, scale_factor, cfg, rescale)
+ result_list.append(result)
+ return result_list
+
+ def get_seg_single(self,
+ cate_preds,
+ seg_preds_x,
+ seg_preds_y,
+ featmap_size,
+ img_shape,
+ ori_shape,
+ scale_factor,
+ cfg,
+ rescale=False, debug=False):
+
+
+ # overall info.
+ h, w, _ = img_shape
+ upsampled_size_out = (featmap_size[0] * 4, featmap_size[1] * 4)
+
+ # trans trans_diff.
+ trans_size = torch.Tensor(self.seg_num_grids).pow(2).cumsum(0).long()
+ trans_diff = torch.ones(trans_size[-1].item(), device=cate_preds.device).long()
+ num_grids = torch.ones(trans_size[-1].item(), device=cate_preds.device).long()
+ seg_size = torch.Tensor(self.seg_num_grids).cumsum(0).long()
+ seg_diff = torch.ones(trans_size[-1].item(), device=cate_preds.device).long()
+ strides = torch.ones(trans_size[-1].item(), device=cate_preds.device)
+
+ n_stage = len(self.seg_num_grids)
+ trans_diff[:trans_size[0]] *= 0
+ seg_diff[:trans_size[0]] *= 0
+ num_grids[:trans_size[0]] *= self.seg_num_grids[0]
+ strides[:trans_size[0]] *= self.strides[0]
+
+ for ind_ in range(1, n_stage):
+ trans_diff[trans_size[ind_ - 1]:trans_size[ind_]] *= trans_size[ind_ - 1]
+ seg_diff[trans_size[ind_ - 1]:trans_size[ind_]] *= seg_size[ind_ - 1]
+ num_grids[trans_size[ind_ - 1]:trans_size[ind_]] *= self.seg_num_grids[ind_]
+ strides[trans_size[ind_ - 1]:trans_size[ind_]] *= self.strides[ind_]
+
+ # process.
+ inds = (cate_preds > cfg.score_thr)
+ cate_scores = cate_preds[inds]
+
+ inds = inds.nonzero()
+ trans_diff = torch.index_select(trans_diff, dim=0, index=inds[:, 0])
+ seg_diff = torch.index_select(seg_diff, dim=0, index=inds[:, 0])
+ num_grids = torch.index_select(num_grids, dim=0, index=inds[:, 0])
+ strides = torch.index_select(strides, dim=0, index=inds[:, 0])
+
+ y_inds = (inds[:, 0] - trans_diff) // num_grids
+ x_inds = (inds[:, 0] - trans_diff) % num_grids
+ y_inds += seg_diff
+ x_inds += seg_diff
+
+ cate_labels = inds[:, 1]
+ seg_masks_soft = seg_preds_x[x_inds, ...] * seg_preds_y[y_inds, ...]
+ seg_masks = seg_masks_soft > cfg.mask_thr
+ sum_masks = seg_masks.sum((1, 2)).float()
+ keep = sum_masks > strides
+
+ seg_masks_soft = seg_masks_soft[keep, ...]
+ seg_masks = seg_masks[keep, ...]
+ cate_scores = cate_scores[keep]
+ sum_masks = sum_masks[keep]
+ cate_labels = cate_labels[keep]
+ # maskness
+ seg_score = (seg_masks_soft * seg_masks.float()).sum((1, 2)) / sum_masks
+ cate_scores *= seg_score
+
+ if len(cate_scores) == 0:
+ return None
+
+ # sort and keep top nms_pre
+ sort_inds = torch.argsort(cate_scores, descending=True)
+ if len(sort_inds) > cfg.nms_pre:
+ sort_inds = sort_inds[:cfg.nms_pre]
+ seg_masks_soft = seg_masks_soft[sort_inds, :, :]
+ seg_masks = seg_masks[sort_inds, :, :]
+ cate_scores = cate_scores[sort_inds]
+ sum_masks = sum_masks[sort_inds]
+ cate_labels = cate_labels[sort_inds]
+
+ # Matrix NMS
+ cate_scores = matrix_nms(seg_masks, cate_labels, cate_scores,
+ kernel=cfg.kernel, sigma=cfg.sigma, sum_masks=sum_masks)
+
+ keep = cate_scores >= cfg.update_thr
+ seg_masks_soft = seg_masks_soft[keep, :, :]
+ cate_scores = cate_scores[keep]
+ cate_labels = cate_labels[keep]
+ # sort and keep top_k
+ sort_inds = torch.argsort(cate_scores, descending=True)
+ if len(sort_inds) > cfg.max_per_img:
+ sort_inds = sort_inds[:cfg.max_per_img]
+ seg_masks_soft = seg_masks_soft[sort_inds, :, :]
+ cate_scores = cate_scores[sort_inds]
+ cate_labels = cate_labels[sort_inds]
+
+ seg_masks_soft = F.interpolate(seg_masks_soft.unsqueeze(0),
+ size=upsampled_size_out,
+ mode='bilinear')[:, :, :h, :w]
+ seg_masks = F.interpolate(seg_masks_soft,
+ size=ori_shape[:2],
+ mode='bilinear').squeeze(0)
+ seg_masks = seg_masks > cfg.mask_thr
+ return seg_masks, cate_labels, cate_scores
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/fcos_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/fcos_head.py
new file mode 100644
index 000000000..a8c2cd411
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/fcos_head.py
@@ -0,0 +1,408 @@
+import torch
+import torch.nn as nn
+from mmcv.cnn import normal_init
+
+from mmdet.core import distance2bbox, force_fp32, multi_apply, multiclass_nms
+from ..builder import build_loss
+from ..registry import HEADS
+from ..utils import ConvModule, Scale, bias_init_with_prob
+
+INF = 1e8
+
+
+@HEADS.register_module
+class FCOSHead(nn.Module):
+ """
+ Fully Convolutional One-Stage Object Detection head from [1]_.
+
+ The FCOS head does not use anchor boxes. Instead bounding boxes are
+ predicted at each pixel and a centerness measure is used to supress
+ low-quality predictions.
+
+ References:
+ .. [1] https://arxiv.org/abs/1904.01355
+
+ Example:
+ >>> self = FCOSHead(11, 7)
+ >>> feats = [torch.rand(1, 7, s, s) for s in [4, 8, 16, 32, 64]]
+ >>> cls_score, bbox_pred, centerness = self.forward(feats)
+ >>> assert len(cls_score) == len(self.scales)
+ """
+
+ def __init__(self,
+ num_classes,
+ in_channels,
+ feat_channels=256,
+ stacked_convs=4,
+ strides=(4, 8, 16, 32, 64),
+ regress_ranges=((-1, 64), (64, 128), (128, 256), (256, 512),
+ (512, INF)),
+ loss_cls=dict(
+ type='FocalLoss',
+ use_sigmoid=True,
+ gamma=2.0,
+ alpha=0.25,
+ loss_weight=1.0),
+ loss_bbox=dict(type='IoULoss', loss_weight=1.0),
+ loss_centerness=dict(
+ type='CrossEntropyLoss',
+ use_sigmoid=True,
+ loss_weight=1.0),
+ conv_cfg=None,
+ norm_cfg=dict(type='GN', num_groups=32, requires_grad=True)):
+ super(FCOSHead, self).__init__()
+
+ self.num_classes = num_classes
+ self.cls_out_channels = num_classes - 1
+ self.in_channels = in_channels
+ self.feat_channels = feat_channels
+ self.stacked_convs = stacked_convs
+ self.strides = strides
+ self.regress_ranges = regress_ranges
+ self.loss_cls = build_loss(loss_cls)
+ self.loss_bbox = build_loss(loss_bbox)
+ self.loss_centerness = build_loss(loss_centerness)
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.fp16_enabled = False
+
+ self._init_layers()
+
+ def _init_layers(self):
+ self.cls_convs = nn.ModuleList()
+ self.reg_convs = nn.ModuleList()
+ for i in range(self.stacked_convs):
+ chn = self.in_channels if i == 0 else self.feat_channels
+ self.cls_convs.append(
+ ConvModule(
+ chn,
+ self.feat_channels,
+ 3,
+ stride=1,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ bias=self.norm_cfg is None))
+ self.reg_convs.append(
+ ConvModule(
+ chn,
+ self.feat_channels,
+ 3,
+ stride=1,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ bias=self.norm_cfg is None))
+ self.fcos_cls = nn.Conv2d(
+ self.feat_channels, self.cls_out_channels, 3, padding=1)
+ self.fcos_reg = nn.Conv2d(self.feat_channels, 4, 3, padding=1)
+ self.fcos_centerness = nn.Conv2d(self.feat_channels, 1, 3, padding=1)
+
+ self.scales = nn.ModuleList([Scale(1.0) for _ in self.strides])
+
+ def init_weights(self):
+ for m in self.cls_convs:
+ normal_init(m.conv, std=0.01)
+ for m in self.reg_convs:
+ normal_init(m.conv, std=0.01)
+ bias_cls = bias_init_with_prob(0.01)
+ normal_init(self.fcos_cls, std=0.01, bias=bias_cls)
+ normal_init(self.fcos_reg, std=0.01)
+ normal_init(self.fcos_centerness, std=0.01)
+
+ def forward(self, feats):
+ return multi_apply(self.forward_single, feats, self.scales)
+
+ def forward_single(self, x, scale):
+ cls_feat = x
+ reg_feat = x
+
+ for cls_layer in self.cls_convs:
+ cls_feat = cls_layer(cls_feat)
+ cls_score = self.fcos_cls(cls_feat)
+ centerness = self.fcos_centerness(cls_feat)
+
+ for reg_layer in self.reg_convs:
+ reg_feat = reg_layer(reg_feat)
+ # scale the bbox_pred of different level
+ # float to avoid overflow when enabling FP16
+ bbox_pred = scale(self.fcos_reg(reg_feat)).float().exp()
+ return cls_score, bbox_pred, centerness
+
+ @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'centernesses'))
+ def loss(self,
+ cls_scores,
+ bbox_preds,
+ centernesses,
+ gt_bboxes,
+ gt_labels,
+ img_metas,
+ cfg,
+ gt_bboxes_ignore=None):
+ assert len(cls_scores) == len(bbox_preds) == len(centernesses)
+ featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
+ all_level_points = self.get_points(featmap_sizes, bbox_preds[0].dtype,
+ bbox_preds[0].device)
+ labels, bbox_targets = self.fcos_target(all_level_points, gt_bboxes,
+ gt_labels)
+
+ num_imgs = cls_scores[0].size(0)
+ # flatten cls_scores, bbox_preds and centerness
+ flatten_cls_scores = [
+ cls_score.permute(0, 2, 3, 1).reshape(-1, self.cls_out_channels)
+ for cls_score in cls_scores
+ ]
+ flatten_bbox_preds = [
+ bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4)
+ for bbox_pred in bbox_preds
+ ]
+ flatten_centerness = [
+ centerness.permute(0, 2, 3, 1).reshape(-1)
+ for centerness in centernesses
+ ]
+ flatten_cls_scores = torch.cat(flatten_cls_scores)
+ flatten_bbox_preds = torch.cat(flatten_bbox_preds)
+ flatten_centerness = torch.cat(flatten_centerness)
+ flatten_labels = torch.cat(labels)
+ flatten_bbox_targets = torch.cat(bbox_targets)
+ # repeat points to align with bbox_preds
+ flatten_points = torch.cat(
+ [points.repeat(num_imgs, 1) for points in all_level_points])
+
+ pos_inds = flatten_labels.nonzero().reshape(-1)
+ num_pos = len(pos_inds)
+ loss_cls = self.loss_cls(
+ flatten_cls_scores, flatten_labels,
+ avg_factor=num_pos + num_imgs) # avoid num_pos is 0
+
+ pos_bbox_preds = flatten_bbox_preds[pos_inds]
+ pos_centerness = flatten_centerness[pos_inds]
+
+ if num_pos > 0:
+ pos_bbox_targets = flatten_bbox_targets[pos_inds]
+ pos_centerness_targets = self.centerness_target(pos_bbox_targets)
+ pos_points = flatten_points[pos_inds]
+ pos_decoded_bbox_preds = distance2bbox(pos_points, pos_bbox_preds)
+ pos_decoded_target_preds = distance2bbox(pos_points,
+ pos_bbox_targets)
+ # centerness weighted iou loss
+ loss_bbox = self.loss_bbox(
+ pos_decoded_bbox_preds,
+ pos_decoded_target_preds,
+ weight=pos_centerness_targets,
+ avg_factor=pos_centerness_targets.sum())
+ loss_centerness = self.loss_centerness(pos_centerness,
+ pos_centerness_targets)
+ else:
+ loss_bbox = pos_bbox_preds.sum()
+ loss_centerness = pos_centerness.sum()
+
+ return dict(
+ loss_cls=loss_cls,
+ loss_bbox=loss_bbox,
+ loss_centerness=loss_centerness)
+
+ @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'centernesses'))
+ def get_bboxes(self,
+ cls_scores,
+ bbox_preds,
+ centernesses,
+ img_metas,
+ cfg,
+ rescale=None):
+ assert len(cls_scores) == len(bbox_preds)
+ num_levels = len(cls_scores)
+
+ featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
+ mlvl_points = self.get_points(featmap_sizes, bbox_preds[0].dtype,
+ bbox_preds[0].device)
+ result_list = []
+ for img_id in range(len(img_metas)):
+ cls_score_list = [
+ cls_scores[i][img_id].detach() for i in range(num_levels)
+ ]
+ bbox_pred_list = [
+ bbox_preds[i][img_id].detach() for i in range(num_levels)
+ ]
+ centerness_pred_list = [
+ centernesses[i][img_id].detach() for i in range(num_levels)
+ ]
+ img_shape = img_metas[img_id]['img_shape']
+ scale_factor = img_metas[img_id]['scale_factor']
+ det_bboxes = self.get_bboxes_single(cls_score_list, bbox_pred_list,
+ centerness_pred_list,
+ mlvl_points, img_shape,
+ scale_factor, cfg, rescale)
+ result_list.append(det_bboxes)
+ return result_list
+
+ def get_bboxes_single(self,
+ cls_scores,
+ bbox_preds,
+ centernesses,
+ mlvl_points,
+ img_shape,
+ scale_factor,
+ cfg,
+ rescale=False):
+ assert len(cls_scores) == len(bbox_preds) == len(mlvl_points)
+ mlvl_bboxes = []
+ mlvl_scores = []
+ mlvl_centerness = []
+ for cls_score, bbox_pred, centerness, points in zip(
+ cls_scores, bbox_preds, centernesses, mlvl_points):
+ assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
+ scores = cls_score.permute(1, 2, 0).reshape(
+ -1, self.cls_out_channels).sigmoid()
+ centerness = centerness.permute(1, 2, 0).reshape(-1).sigmoid()
+
+ bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
+ nms_pre = cfg.get('nms_pre', -1)
+ if nms_pre > 0 and scores.shape[0] > nms_pre:
+ max_scores, _ = (scores * centerness[:, None]).max(dim=1)
+ _, topk_inds = max_scores.topk(nms_pre)
+ points = points[topk_inds, :]
+ bbox_pred = bbox_pred[topk_inds, :]
+ scores = scores[topk_inds, :]
+ centerness = centerness[topk_inds]
+ bboxes = distance2bbox(points, bbox_pred, max_shape=img_shape)
+ mlvl_bboxes.append(bboxes)
+ mlvl_scores.append(scores)
+ mlvl_centerness.append(centerness)
+ mlvl_bboxes = torch.cat(mlvl_bboxes)
+ if rescale:
+ mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
+ mlvl_scores = torch.cat(mlvl_scores)
+ padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
+ mlvl_scores = torch.cat([padding, mlvl_scores], dim=1)
+ mlvl_centerness = torch.cat(mlvl_centerness)
+ det_bboxes, det_labels = multiclass_nms(
+ mlvl_bboxes,
+ mlvl_scores,
+ cfg.score_thr,
+ cfg.nms,
+ cfg.max_per_img,
+ score_factors=mlvl_centerness)
+ return det_bboxes, det_labels
+
+ def get_points(self, featmap_sizes, dtype, device):
+ """Get points according to feature map sizes.
+
+ Args:
+ featmap_sizes (list[tuple]): Multi-level feature map sizes.
+ dtype (torch.dtype): Type of points.
+ device (torch.device): Device of points.
+
+ Returns:
+ tuple: points of each image.
+ """
+ mlvl_points = []
+ for i in range(len(featmap_sizes)):
+ mlvl_points.append(
+ self.get_points_single(featmap_sizes[i], self.strides[i],
+ dtype, device))
+ return mlvl_points
+
+ def get_points_single(self, featmap_size, stride, dtype, device):
+ h, w = featmap_size
+ x_range = torch.arange(
+ 0, w * stride, stride, dtype=dtype, device=device)
+ y_range = torch.arange(
+ 0, h * stride, stride, dtype=dtype, device=device)
+ y, x = torch.meshgrid(y_range, x_range)
+ points = torch.stack(
+ (x.reshape(-1), y.reshape(-1)), dim=-1) + stride // 2
+ return points
+
+ def fcos_target(self, points, gt_bboxes_list, gt_labels_list):
+ assert len(points) == len(self.regress_ranges)
+ num_levels = len(points)
+ # expand regress ranges to align with points
+ expanded_regress_ranges = [
+ points[i].new_tensor(self.regress_ranges[i])[None].expand_as(
+ points[i]) for i in range(num_levels)
+ ]
+ # concat all levels points and regress ranges
+ concat_regress_ranges = torch.cat(expanded_regress_ranges, dim=0)
+ concat_points = torch.cat(points, dim=0)
+ # get labels and bbox_targets of each image
+ labels_list, bbox_targets_list = multi_apply(
+ self.fcos_target_single,
+ gt_bboxes_list,
+ gt_labels_list,
+ points=concat_points,
+ regress_ranges=concat_regress_ranges)
+
+ # split to per img, per level
+ num_points = [center.size(0) for center in points]
+ labels_list = [labels.split(num_points, 0) for labels in labels_list]
+ bbox_targets_list = [
+ bbox_targets.split(num_points, 0)
+ for bbox_targets in bbox_targets_list
+ ]
+
+ # concat per level image
+ concat_lvl_labels = []
+ concat_lvl_bbox_targets = []
+ for i in range(num_levels):
+ concat_lvl_labels.append(
+ torch.cat([labels[i] for labels in labels_list]))
+ concat_lvl_bbox_targets.append(
+ torch.cat(
+ [bbox_targets[i] for bbox_targets in bbox_targets_list]))
+ return concat_lvl_labels, concat_lvl_bbox_targets
+
+ def fcos_target_single(self, gt_bboxes, gt_labels, points, regress_ranges):
+ num_points = points.size(0)
+ num_gts = gt_labels.size(0)
+ if num_gts == 0:
+ return gt_labels.new_zeros(num_points), \
+ gt_bboxes.new_zeros((num_points, 4))
+
+ areas = (gt_bboxes[:, 2] - gt_bboxes[:, 0] + 1) * (
+ gt_bboxes[:, 3] - gt_bboxes[:, 1] + 1)
+ # TODO: figure out why these two are different
+ # areas = areas[None].expand(num_points, num_gts)
+ areas = areas[None].repeat(num_points, 1)
+ regress_ranges = regress_ranges[:, None, :].expand(
+ num_points, num_gts, 2)
+ gt_bboxes = gt_bboxes[None].expand(num_points, num_gts, 4)
+ xs, ys = points[:, 0], points[:, 1]
+ xs = xs[:, None].expand(num_points, num_gts)
+ ys = ys[:, None].expand(num_points, num_gts)
+
+ left = xs - gt_bboxes[..., 0]
+ right = gt_bboxes[..., 2] - xs
+ top = ys - gt_bboxes[..., 1]
+ bottom = gt_bboxes[..., 3] - ys
+ bbox_targets = torch.stack((left, top, right, bottom), -1)
+
+ # condition1: inside a gt bbox
+ inside_gt_bbox_mask = bbox_targets.min(-1)[0] > 0
+
+ # condition2: limit the regression range for each location
+ max_regress_distance = bbox_targets.max(-1)[0]
+ inside_regress_range = (
+ max_regress_distance >= regress_ranges[..., 0]) & (
+ max_regress_distance <= regress_ranges[..., 1])
+
+ # if there are still more than one objects for a location,
+ # we choose the one with minimal area
+ areas[inside_gt_bbox_mask == 0] = INF
+ areas[inside_regress_range == 0] = INF
+ min_area, min_area_inds = areas.min(dim=1)
+
+ labels = gt_labels[min_area_inds]
+ labels[min_area == INF] = 0
+ bbox_targets = bbox_targets[range(num_points), min_area_inds]
+
+ return labels, bbox_targets
+
+ def centerness_target(self, pos_bbox_targets):
+ # only calculate pos centerness targets, otherwise there may be nan
+ left_right = pos_bbox_targets[:, [0, 2]]
+ top_bottom = pos_bbox_targets[:, [1, 3]]
+ centerness_targets = (
+ left_right.min(dim=-1)[0] / left_right.max(dim=-1)[0]) * (
+ top_bottom.min(dim=-1)[0] / top_bottom.max(dim=-1)[0])
+ return torch.sqrt(centerness_targets)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/fovea_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/fovea_head.py
new file mode 100644
index 000000000..a17e0b127
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/fovea_head.py
@@ -0,0 +1,387 @@
+import torch
+import torch.nn as nn
+from mmcv.cnn import normal_init
+
+from mmdet.core import multi_apply, multiclass_nms
+from mmdet.ops import DeformConv
+from ..builder import build_loss
+from ..registry import HEADS
+from ..utils import ConvModule, bias_init_with_prob
+
+INF = 1e8
+
+
+class FeatureAlign(nn.Module):
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size=3,
+ deformable_groups=4):
+ super(FeatureAlign, self).__init__()
+ offset_channels = kernel_size * kernel_size * 2
+ self.conv_offset = nn.Conv2d(
+ 4, deformable_groups * offset_channels, 1, bias=False)
+ self.conv_adaption = DeformConv(
+ in_channels,
+ out_channels,
+ kernel_size=kernel_size,
+ padding=(kernel_size - 1) // 2,
+ deformable_groups=deformable_groups)
+ self.relu = nn.ReLU(inplace=True)
+
+ def init_weights(self):
+ normal_init(self.conv_offset, std=0.1)
+ normal_init(self.conv_adaption, std=0.01)
+
+ def forward(self, x, shape):
+ offset = self.conv_offset(shape)
+ x = self.relu(self.conv_adaption(x, offset))
+ return x
+
+
+@HEADS.register_module
+class FoveaHead(nn.Module):
+ """FoveaBox: Beyond Anchor-based Object Detector
+ https://arxiv.org/abs/1904.03797
+ """
+
+ def __init__(self,
+ num_classes,
+ in_channels,
+ feat_channels=256,
+ stacked_convs=4,
+ strides=(4, 8, 16, 32, 64),
+ base_edge_list=(16, 32, 64, 128, 256),
+ scale_ranges=((8, 32), (16, 64), (32, 128), (64, 256), (128,
+ 512)),
+ sigma=0.4,
+ with_deform=False,
+ deformable_groups=4,
+ loss_cls=None,
+ loss_bbox=None,
+ conv_cfg=None,
+ norm_cfg=None):
+ super(FoveaHead, self).__init__()
+ self.num_classes = num_classes
+ self.cls_out_channels = num_classes - 1
+ self.in_channels = in_channels
+ self.feat_channels = feat_channels
+ self.stacked_convs = stacked_convs
+ self.strides = strides
+ self.base_edge_list = base_edge_list
+ self.scale_ranges = scale_ranges
+ self.sigma = sigma
+ self.with_deform = with_deform
+ self.deformable_groups = deformable_groups
+ self.loss_cls = build_loss(loss_cls)
+ self.loss_bbox = build_loss(loss_bbox)
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self._init_layers()
+
+ def _init_layers(self):
+ self.cls_convs = nn.ModuleList()
+ self.reg_convs = nn.ModuleList()
+ # box branch
+ for i in range(self.stacked_convs):
+ chn = self.in_channels if i == 0 else self.feat_channels
+ self.reg_convs.append(
+ ConvModule(
+ chn,
+ self.feat_channels,
+ 3,
+ stride=1,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ bias=self.norm_cfg is None))
+ self.fovea_reg = nn.Conv2d(self.feat_channels, 4, 3, padding=1)
+ # cls branch
+ if not self.with_deform:
+ for i in range(self.stacked_convs):
+ chn = self.in_channels if i == 0 else self.feat_channels
+ self.cls_convs.append(
+ ConvModule(
+ chn,
+ self.feat_channels,
+ 3,
+ stride=1,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ bias=self.norm_cfg is None))
+ self.fovea_cls = nn.Conv2d(
+ self.feat_channels, self.cls_out_channels, 3, padding=1)
+ else:
+ self.cls_convs.append(
+ ConvModule(
+ self.feat_channels, (self.feat_channels * 4),
+ 3,
+ stride=1,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ bias=self.norm_cfg is None))
+ self.cls_convs.append(
+ ConvModule((self.feat_channels * 4), (self.feat_channels * 4),
+ 1,
+ stride=1,
+ padding=0,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ bias=self.norm_cfg is None))
+ self.feature_adaption = FeatureAlign(
+ self.feat_channels,
+ self.feat_channels,
+ kernel_size=3,
+ deformable_groups=self.deformable_groups)
+ self.fovea_cls = nn.Conv2d(
+ int(self.feat_channels * 4),
+ self.cls_out_channels,
+ 3,
+ padding=1)
+
+ def init_weights(self):
+ for m in self.cls_convs:
+ normal_init(m.conv, std=0.01)
+ for m in self.reg_convs:
+ normal_init(m.conv, std=0.01)
+ bias_cls = bias_init_with_prob(0.01)
+ normal_init(self.fovea_cls, std=0.01, bias=bias_cls)
+ normal_init(self.fovea_reg, std=0.01)
+ if self.with_deform:
+ self.feature_adaption.init_weights()
+
+ def forward(self, feats):
+ return multi_apply(self.forward_single, feats)
+
+ def forward_single(self, x):
+ cls_feat = x
+ reg_feat = x
+ for reg_layer in self.reg_convs:
+ reg_feat = reg_layer(reg_feat)
+ bbox_pred = self.fovea_reg(reg_feat)
+ if self.with_deform:
+ cls_feat = self.feature_adaption(cls_feat, bbox_pred.exp())
+ for cls_layer in self.cls_convs:
+ cls_feat = cls_layer(cls_feat)
+ cls_score = self.fovea_cls(cls_feat)
+ return cls_score, bbox_pred
+
+ def get_points(self, featmap_sizes, dtype, device, flatten=False):
+ points = []
+ for featmap_size in featmap_sizes:
+ x_range = torch.arange(
+ featmap_size[1], dtype=dtype, device=device) + 0.5
+ y_range = torch.arange(
+ featmap_size[0], dtype=dtype, device=device) + 0.5
+ y, x = torch.meshgrid(y_range, x_range)
+ if flatten:
+ points.append((y.flatten(), x.flatten()))
+ else:
+ points.append((y, x))
+ return points
+
+ def loss(self,
+ cls_scores,
+ bbox_preds,
+ gt_bbox_list,
+ gt_label_list,
+ img_metas,
+ cfg,
+ gt_bboxes_ignore=None):
+ assert len(cls_scores) == len(bbox_preds)
+
+ featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
+ points = self.get_points(featmap_sizes, bbox_preds[0].dtype,
+ bbox_preds[0].device)
+ num_imgs = cls_scores[0].size(0)
+ flatten_cls_scores = [
+ cls_score.permute(0, 2, 3, 1).reshape(-1, self.cls_out_channels)
+ for cls_score in cls_scores
+ ]
+ flatten_bbox_preds = [
+ bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4)
+ for bbox_pred in bbox_preds
+ ]
+ flatten_cls_scores = torch.cat(flatten_cls_scores)
+ flatten_bbox_preds = torch.cat(flatten_bbox_preds)
+ flatten_labels, flatten_bbox_targets = self.fovea_target(
+ gt_bbox_list, gt_label_list, featmap_sizes, points)
+ pos_inds = (flatten_labels > 0).nonzero().view(-1)
+ num_pos = len(pos_inds)
+ loss_cls = self.loss_cls(
+ flatten_cls_scores, flatten_labels, avg_factor=num_pos + num_imgs)
+ if num_pos > 0:
+ pos_bbox_preds = flatten_bbox_preds[pos_inds]
+ pos_bbox_targets = flatten_bbox_targets[pos_inds]
+ pos_weights = pos_bbox_targets.new_zeros(
+ pos_bbox_targets.size()) + 1.0
+ loss_bbox = self.loss_bbox(
+ pos_bbox_preds,
+ pos_bbox_targets,
+ pos_weights,
+ avg_factor=num_pos)
+ else:
+ loss_bbox = torch.tensor([0],
+ dtype=flatten_bbox_preds.dtype,
+ device=flatten_bbox_preds.device)
+ return dict(loss_cls=loss_cls, loss_bbox=loss_bbox)
+
+ def fovea_target(self, gt_bbox_list, gt_label_list, featmap_sizes, points):
+ label_list, bbox_target_list = multi_apply(
+ self.fovea_target_single,
+ gt_bbox_list,
+ gt_label_list,
+ featmap_size_list=featmap_sizes,
+ point_list=points)
+ flatten_labels = [
+ torch.cat([
+ labels_level_img.flatten() for labels_level_img in labels_level
+ ]) for labels_level in zip(*label_list)
+ ]
+ flatten_bbox_targets = [
+ torch.cat([
+ bbox_targets_level_img.reshape(-1, 4)
+ for bbox_targets_level_img in bbox_targets_level
+ ]) for bbox_targets_level in zip(*bbox_target_list)
+ ]
+ flatten_labels = torch.cat(flatten_labels)
+ flatten_bbox_targets = torch.cat(flatten_bbox_targets)
+ return flatten_labels, flatten_bbox_targets
+
+ def fovea_target_single(self,
+ gt_bboxes_raw,
+ gt_labels_raw,
+ featmap_size_list=None,
+ point_list=None):
+
+ gt_areas = torch.sqrt((gt_bboxes_raw[:, 2] - gt_bboxes_raw[:, 0]) *
+ (gt_bboxes_raw[:, 3] - gt_bboxes_raw[:, 1]))
+ label_list = []
+ bbox_target_list = []
+ # for each pyramid, find the cls and box target
+ for base_len, (lower_bound, upper_bound), stride, featmap_size, \
+ (y, x) in zip(self.base_edge_list, self.scale_ranges,
+ self.strides, featmap_size_list, point_list):
+ labels = gt_labels_raw.new_zeros(featmap_size)
+ bbox_targets = gt_bboxes_raw.new(featmap_size[0], featmap_size[1],
+ 4) + 1
+ # scale assignment
+ hit_indices = ((gt_areas >= lower_bound) &
+ (gt_areas <= upper_bound)).nonzero().flatten()
+ if len(hit_indices) == 0:
+ label_list.append(labels)
+ bbox_target_list.append(torch.log(bbox_targets))
+ continue
+ _, hit_index_order = torch.sort(-gt_areas[hit_indices])
+ hit_indices = hit_indices[hit_index_order]
+ gt_bboxes = gt_bboxes_raw[hit_indices, :] / stride
+ gt_labels = gt_labels_raw[hit_indices]
+ half_w = 0.5 * (gt_bboxes[:, 2] - gt_bboxes[:, 0])
+ half_h = 0.5 * (gt_bboxes[:, 3] - gt_bboxes[:, 1])
+ # valid fovea area: left, right, top, down
+ pos_left = torch.ceil(
+ gt_bboxes[:, 0] + (1 - self.sigma) * half_w - 0.5).long().\
+ clamp(0, featmap_size[1] - 1)
+ pos_right = torch.floor(
+ gt_bboxes[:, 0] + (1 + self.sigma) * half_w - 0.5).long().\
+ clamp(0, featmap_size[1] - 1)
+ pos_top = torch.ceil(
+ gt_bboxes[:, 1] + (1 - self.sigma) * half_h - 0.5).long().\
+ clamp(0, featmap_size[0] - 1)
+ pos_down = torch.floor(
+ gt_bboxes[:, 1] + (1 + self.sigma) * half_h - 0.5).long().\
+ clamp(0, featmap_size[0] - 1)
+ for px1, py1, px2, py2, label, (gt_x1, gt_y1, gt_x2, gt_y2) in \
+ zip(pos_left, pos_top, pos_right, pos_down, gt_labels,
+ gt_bboxes_raw[hit_indices, :]):
+ labels[py1:py2 + 1, px1:px2 + 1] = label
+ bbox_targets[py1:py2 + 1, px1:px2 + 1, 0] = \
+ (stride * x[py1:py2 + 1, px1:px2 + 1] - gt_x1) / base_len
+ bbox_targets[py1:py2 + 1, px1:px2 + 1, 1] = \
+ (stride * y[py1:py2 + 1, px1:px2 + 1] - gt_y1) / base_len
+ bbox_targets[py1:py2 + 1, px1:px2 + 1, 2] = \
+ (gt_x2 - stride * x[py1:py2 + 1, px1:px2 + 1]) / base_len
+ bbox_targets[py1:py2 + 1, px1:px2 + 1, 3] = \
+ (gt_y2 - stride * y[py1:py2 + 1, px1:px2 + 1]) / base_len
+ bbox_targets = bbox_targets.clamp(min=1. / 16, max=16.)
+ label_list.append(labels)
+ bbox_target_list.append(torch.log(bbox_targets))
+ return label_list, bbox_target_list
+
+ def get_bboxes(self, cls_scores, bbox_preds, img_metas, cfg, rescale=None):
+ assert len(cls_scores) == len(bbox_preds)
+ num_levels = len(cls_scores)
+ featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
+ points = self.get_points(
+ featmap_sizes,
+ bbox_preds[0].dtype,
+ bbox_preds[0].device,
+ flatten=True)
+ result_list = []
+ for img_id in range(len(img_metas)):
+ cls_score_list = [
+ cls_scores[i][img_id].detach() for i in range(num_levels)
+ ]
+ bbox_pred_list = [
+ bbox_preds[i][img_id].detach() for i in range(num_levels)
+ ]
+ img_shape = img_metas[img_id]['img_shape']
+ scale_factor = img_metas[img_id]['scale_factor']
+ det_bboxes = self.get_bboxes_single(cls_score_list, bbox_pred_list,
+ featmap_sizes, points,
+ img_shape, scale_factor, cfg,
+ rescale)
+ result_list.append(det_bboxes)
+ return result_list
+
+ def get_bboxes_single(self,
+ cls_scores,
+ bbox_preds,
+ featmap_sizes,
+ point_list,
+ img_shape,
+ scale_factor,
+ cfg,
+ rescale=False):
+ assert len(cls_scores) == len(bbox_preds) == len(point_list)
+ det_bboxes = []
+ det_scores = []
+ for cls_score, bbox_pred, featmap_size, stride, base_len, (y, x) \
+ in zip(cls_scores, bbox_preds, featmap_sizes, self.strides,
+ self.base_edge_list, point_list):
+ assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
+ scores = cls_score.permute(1, 2, 0).reshape(
+ -1, self.cls_out_channels).sigmoid()
+ bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4).exp()
+ nms_pre = cfg.get('nms_pre', -1)
+ if (nms_pre > 0) and (scores.shape[0] > nms_pre):
+ max_scores, _ = scores.max(dim=1)
+ _, topk_inds = max_scores.topk(nms_pre)
+ bbox_pred = bbox_pred[topk_inds, :]
+ scores = scores[topk_inds, :]
+ y = y[topk_inds]
+ x = x[topk_inds]
+ x1 = (stride * x - base_len * bbox_pred[:, 0]).\
+ clamp(min=0, max=img_shape[1] - 1)
+ y1 = (stride * y - base_len * bbox_pred[:, 1]).\
+ clamp(min=0, max=img_shape[0] - 1)
+ x2 = (stride * x + base_len * bbox_pred[:, 2]).\
+ clamp(min=0, max=img_shape[1] - 1)
+ y2 = (stride * y + base_len * bbox_pred[:, 3]).\
+ clamp(min=0, max=img_shape[0] - 1)
+ bboxes = torch.stack([x1, y1, x2, y2], -1)
+ det_bboxes.append(bboxes)
+ det_scores.append(scores)
+ det_bboxes = torch.cat(det_bboxes)
+ if rescale:
+ det_bboxes /= det_bboxes.new_tensor(scale_factor)
+ det_scores = torch.cat(det_scores)
+ padding = det_scores.new_zeros(det_scores.shape[0], 1)
+ det_scores = torch.cat([padding, det_scores], dim=1)
+ det_bboxes, det_labels = multiclass_nms(det_bboxes, det_scores,
+ cfg.score_thr, cfg.nms,
+ cfg.max_per_img)
+ return det_bboxes, det_labels
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/free_anchor_retina_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/free_anchor_retina_head.py
new file mode 100644
index 000000000..3179aad20
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/free_anchor_retina_head.py
@@ -0,0 +1,188 @@
+import torch
+import torch.nn.functional as F
+
+from mmdet.core import bbox2delta, bbox_overlaps, delta2bbox
+from ..registry import HEADS
+from .retina_head import RetinaHead
+
+
+@HEADS.register_module
+class FreeAnchorRetinaHead(RetinaHead):
+
+ def __init__(self,
+ num_classes,
+ in_channels,
+ stacked_convs=4,
+ octave_base_scale=4,
+ scales_per_octave=3,
+ conv_cfg=None,
+ norm_cfg=None,
+ pre_anchor_topk=50,
+ bbox_thr=0.6,
+ gamma=2.0,
+ alpha=0.5,
+ **kwargs):
+ super(FreeAnchorRetinaHead,
+ self).__init__(num_classes, in_channels, stacked_convs,
+ octave_base_scale, scales_per_octave, conv_cfg,
+ norm_cfg, **kwargs)
+
+ self.pre_anchor_topk = pre_anchor_topk
+ self.bbox_thr = bbox_thr
+ self.gamma = gamma
+ self.alpha = alpha
+
+ def loss(self,
+ cls_scores,
+ bbox_preds,
+ gt_bboxes,
+ gt_labels,
+ img_metas,
+ cfg,
+ gt_bboxes_ignore=None):
+ featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
+ assert len(featmap_sizes) == len(self.anchor_generators)
+
+ anchor_list, _ = self.get_anchors(featmap_sizes, img_metas)
+ anchors = [torch.cat(anchor) for anchor in anchor_list]
+
+ # concatenate each level
+ cls_scores = [
+ cls.permute(0, 2, 3,
+ 1).reshape(cls.size(0), -1, self.cls_out_channels)
+ for cls in cls_scores
+ ]
+ bbox_preds = [
+ bbox_pred.permute(0, 2, 3, 1).reshape(bbox_pred.size(0), -1, 4)
+ for bbox_pred in bbox_preds
+ ]
+ cls_scores = torch.cat(cls_scores, dim=1)
+ bbox_preds = torch.cat(bbox_preds, dim=1)
+
+ cls_prob = torch.sigmoid(cls_scores)
+ box_prob = []
+ num_pos = 0
+ positive_losses = []
+ for _, (anchors_, gt_labels_, gt_bboxes_, cls_prob_,
+ bbox_preds_) in enumerate(
+ zip(anchors, gt_labels, gt_bboxes, cls_prob, bbox_preds)):
+ gt_labels_ -= 1
+
+ with torch.no_grad():
+ # box_localization: a_{j}^{loc}, shape: [j, 4]
+ pred_boxes = delta2bbox(anchors_, bbox_preds_,
+ self.target_means, self.target_stds)
+
+ # object_box_iou: IoU_{ij}^{loc}, shape: [i, j]
+ object_box_iou = bbox_overlaps(gt_bboxes_, pred_boxes)
+
+ # object_box_prob: P{a_{j} -> b_{i}}, shape: [i, j]
+ t1 = self.bbox_thr
+ t2 = object_box_iou.max(
+ dim=1, keepdim=True).values.clamp(min=t1 + 1e-12)
+ object_box_prob = ((object_box_iou - t1) / (t2 - t1)).clamp(
+ min=0, max=1)
+
+ # object_cls_box_prob: P{a_{j} -> b_{i}}, shape: [i, c, j]
+ num_obj = gt_labels_.size(0)
+ indices = torch.stack(
+ [torch.arange(num_obj).type_as(gt_labels_), gt_labels_],
+ dim=0)
+ object_cls_box_prob = torch.sparse_coo_tensor(
+ indices, object_box_prob)
+
+ # image_box_iou: P{a_{j} \in A_{+}}, shape: [c, j]
+ """
+ from "start" to "end" implement:
+ image_box_iou = torch.sparse.max(object_cls_box_prob,
+ dim=0).t()
+
+ """
+ # start
+ box_cls_prob = torch.sparse.sum(
+ object_cls_box_prob, dim=0).to_dense()
+
+ indices = torch.nonzero(box_cls_prob).t_()
+ if indices.numel() == 0:
+ image_box_prob = torch.zeros(
+ anchors_.size(0),
+ self.cls_out_channels).type_as(object_box_prob)
+ else:
+ nonzero_box_prob = torch.where(
+ (gt_labels_.unsqueeze(dim=-1) == indices[0]),
+ object_box_prob[:, indices[1]],
+ torch.tensor(
+ [0]).type_as(object_box_prob)).max(dim=0).values
+
+ # upmap to shape [j, c]
+ image_box_prob = torch.sparse_coo_tensor(
+ indices.flip([0]),
+ nonzero_box_prob,
+ size=(anchors_.size(0),
+ self.cls_out_channels)).to_dense()
+ # end
+
+ box_prob.append(image_box_prob)
+
+ # construct bags for objects
+ match_quality_matrix = bbox_overlaps(gt_bboxes_, anchors_)
+ _, matched = torch.topk(
+ match_quality_matrix,
+ self.pre_anchor_topk,
+ dim=1,
+ sorted=False)
+ del match_quality_matrix
+
+ # matched_cls_prob: P_{ij}^{cls}
+ matched_cls_prob = torch.gather(
+ cls_prob_[matched], 2,
+ gt_labels_.view(-1, 1, 1).repeat(1, self.pre_anchor_topk,
+ 1)).squeeze(2)
+
+ # matched_box_prob: P_{ij}^{loc}
+ matched_anchors = anchors_[matched]
+ matched_object_targets = bbox2delta(
+ matched_anchors,
+ gt_bboxes_.unsqueeze(dim=1).expand_as(matched_anchors),
+ self.target_means, self.target_stds)
+ loss_bbox = self.loss_bbox(
+ bbox_preds_[matched],
+ matched_object_targets,
+ reduction_override='none').sum(-1)
+ matched_box_prob = torch.exp(-loss_bbox)
+
+ # positive_losses: {-log( Mean-max(P_{ij}^{cls} * P_{ij}^{loc}) )}
+ num_pos += len(gt_bboxes_)
+ positive_losses.append(
+ self.positive_bag_loss(matched_cls_prob, matched_box_prob))
+ positive_loss = torch.cat(positive_losses).sum() / max(1, num_pos)
+
+ # box_prob: P{a_{j} \in A_{+}}
+ box_prob = torch.stack(box_prob, dim=0)
+
+ # negative_loss:
+ # \sum_{j}{ FL((1 - P{a_{j} \in A_{+}}) * (1 - P_{j}^{bg})) } / n||B||
+ negative_loss = self.negative_bag_loss(cls_prob, box_prob).sum() / max(
+ 1, num_pos * self.pre_anchor_topk)
+
+ losses = {
+ 'positive_bag_loss': positive_loss,
+ 'negative_bag_loss': negative_loss
+ }
+ return losses
+
+ def positive_bag_loss(self, matched_cls_prob, matched_box_prob):
+ # bag_prob = Mean-max(matched_prob)
+ matched_prob = matched_cls_prob * matched_box_prob
+ weight = 1 / torch.clamp(1 - matched_prob, 1e-12, None)
+ weight /= weight.sum(dim=1).unsqueeze(dim=-1)
+ bag_prob = (weight * matched_prob).sum(dim=1)
+ # positive_bag_loss = -self.alpha * log(bag_prob)
+ return self.alpha * F.binary_cross_entropy(
+ bag_prob, torch.ones_like(bag_prob), reduction='none')
+
+ def negative_bag_loss(self, cls_prob, box_prob):
+ prob = cls_prob * (1 - box_prob)
+ negative_bag_loss = prob**self.gamma * F.binary_cross_entropy(
+ prob, torch.zeros_like(prob), reduction='none')
+ return (1 - self.alpha) * negative_bag_loss
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/ga_retina_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/ga_retina_head.py
new file mode 100644
index 000000000..73f89d725
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/ga_retina_head.py
@@ -0,0 +1,107 @@
+import torch.nn as nn
+from mmcv.cnn import normal_init
+
+from mmdet.ops import MaskedConv2d
+from ..registry import HEADS
+from ..utils import ConvModule, bias_init_with_prob
+from .guided_anchor_head import FeatureAdaption, GuidedAnchorHead
+
+
+@HEADS.register_module
+class GARetinaHead(GuidedAnchorHead):
+ """Guided-Anchor-based RetinaNet head."""
+
+ def __init__(self,
+ num_classes,
+ in_channels,
+ stacked_convs=4,
+ conv_cfg=None,
+ norm_cfg=None,
+ **kwargs):
+ self.stacked_convs = stacked_convs
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ super(GARetinaHead, self).__init__(num_classes, in_channels, **kwargs)
+
+ def _init_layers(self):
+ self.relu = nn.ReLU(inplace=True)
+ self.cls_convs = nn.ModuleList()
+ self.reg_convs = nn.ModuleList()
+ for i in range(self.stacked_convs):
+ chn = self.in_channels if i == 0 else self.feat_channels
+ self.cls_convs.append(
+ ConvModule(
+ chn,
+ self.feat_channels,
+ 3,
+ stride=1,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg))
+ self.reg_convs.append(
+ ConvModule(
+ chn,
+ self.feat_channels,
+ 3,
+ stride=1,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg))
+
+ self.conv_loc = nn.Conv2d(self.feat_channels, 1, 1)
+ self.conv_shape = nn.Conv2d(self.feat_channels, self.num_anchors * 2,
+ 1)
+ self.feature_adaption_cls = FeatureAdaption(
+ self.feat_channels,
+ self.feat_channels,
+ kernel_size=3,
+ deformable_groups=self.deformable_groups)
+ self.feature_adaption_reg = FeatureAdaption(
+ self.feat_channels,
+ self.feat_channels,
+ kernel_size=3,
+ deformable_groups=self.deformable_groups)
+ self.retina_cls = MaskedConv2d(
+ self.feat_channels,
+ self.num_anchors * self.cls_out_channels,
+ 3,
+ padding=1)
+ self.retina_reg = MaskedConv2d(
+ self.feat_channels, self.num_anchors * 4, 3, padding=1)
+
+ def init_weights(self):
+ for m in self.cls_convs:
+ normal_init(m.conv, std=0.01)
+ for m in self.reg_convs:
+ normal_init(m.conv, std=0.01)
+
+ self.feature_adaption_cls.init_weights()
+ self.feature_adaption_reg.init_weights()
+
+ bias_cls = bias_init_with_prob(0.01)
+ normal_init(self.conv_loc, std=0.01, bias=bias_cls)
+ normal_init(self.conv_shape, std=0.01)
+ normal_init(self.retina_cls, std=0.01, bias=bias_cls)
+ normal_init(self.retina_reg, std=0.01)
+
+ def forward_single(self, x):
+ cls_feat = x
+ reg_feat = x
+ for cls_conv in self.cls_convs:
+ cls_feat = cls_conv(cls_feat)
+ for reg_conv in self.reg_convs:
+ reg_feat = reg_conv(reg_feat)
+
+ loc_pred = self.conv_loc(cls_feat)
+ shape_pred = self.conv_shape(reg_feat)
+
+ cls_feat = self.feature_adaption_cls(cls_feat, shape_pred)
+ reg_feat = self.feature_adaption_reg(reg_feat, shape_pred)
+
+ if not self.training:
+ mask = loc_pred.sigmoid()[0] >= self.loc_filter_thr
+ else:
+ mask = None
+ cls_score = self.retina_cls(cls_feat, mask)
+ bbox_pred = self.retina_reg(reg_feat, mask)
+ return cls_score, bbox_pred, shape_pred, loc_pred
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/ga_rpn_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/ga_rpn_head.py
new file mode 100644
index 000000000..11512ffc5
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/ga_rpn_head.py
@@ -0,0 +1,127 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import normal_init
+
+from mmdet.core import delta2bbox
+from mmdet.ops import nms
+from ..registry import HEADS
+from .guided_anchor_head import GuidedAnchorHead
+
+
+@HEADS.register_module
+class GARPNHead(GuidedAnchorHead):
+ """Guided-Anchor-based RPN head."""
+
+ def __init__(self, in_channels, **kwargs):
+ super(GARPNHead, self).__init__(2, in_channels, **kwargs)
+
+ def _init_layers(self):
+ self.rpn_conv = nn.Conv2d(
+ self.in_channels, self.feat_channels, 3, padding=1)
+ super(GARPNHead, self)._init_layers()
+
+ def init_weights(self):
+ normal_init(self.rpn_conv, std=0.01)
+ super(GARPNHead, self).init_weights()
+
+ def forward_single(self, x):
+ x = self.rpn_conv(x)
+ x = F.relu(x, inplace=True)
+ (cls_score, bbox_pred, shape_pred,
+ loc_pred) = super(GARPNHead, self).forward_single(x)
+ return cls_score, bbox_pred, shape_pred, loc_pred
+
+ def loss(self,
+ cls_scores,
+ bbox_preds,
+ shape_preds,
+ loc_preds,
+ gt_bboxes,
+ img_metas,
+ cfg,
+ gt_bboxes_ignore=None):
+ losses = super(GARPNHead, self).loss(
+ cls_scores,
+ bbox_preds,
+ shape_preds,
+ loc_preds,
+ gt_bboxes,
+ None,
+ img_metas,
+ cfg,
+ gt_bboxes_ignore=gt_bboxes_ignore)
+ return dict(
+ loss_rpn_cls=losses['loss_cls'],
+ loss_rpn_bbox=losses['loss_bbox'],
+ loss_anchor_shape=losses['loss_shape'],
+ loss_anchor_loc=losses['loss_loc'])
+
+ def get_bboxes_single(self,
+ cls_scores,
+ bbox_preds,
+ mlvl_anchors,
+ mlvl_masks,
+ img_shape,
+ scale_factor,
+ cfg,
+ rescale=False):
+ mlvl_proposals = []
+ for idx in range(len(cls_scores)):
+ rpn_cls_score = cls_scores[idx]
+ rpn_bbox_pred = bbox_preds[idx]
+ anchors = mlvl_anchors[idx]
+ mask = mlvl_masks[idx]
+ assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:]
+ # if no location is kept, end.
+ if mask.sum() == 0:
+ continue
+ rpn_cls_score = rpn_cls_score.permute(1, 2, 0)
+ if self.use_sigmoid_cls:
+ rpn_cls_score = rpn_cls_score.reshape(-1)
+ scores = rpn_cls_score.sigmoid()
+ else:
+ rpn_cls_score = rpn_cls_score.reshape(-1, 2)
+ scores = rpn_cls_score.softmax(dim=1)[:, 1]
+ # filter scores, bbox_pred w.r.t. mask.
+ # anchors are filtered in get_anchors() beforehand.
+ scores = scores[mask]
+ rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1,
+ 4)[mask, :]
+ if scores.dim() == 0:
+ rpn_bbox_pred = rpn_bbox_pred.unsqueeze(0)
+ anchors = anchors.unsqueeze(0)
+ scores = scores.unsqueeze(0)
+ # filter anchors, bbox_pred, scores w.r.t. scores
+ if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre:
+ _, topk_inds = scores.topk(cfg.nms_pre)
+ rpn_bbox_pred = rpn_bbox_pred[topk_inds, :]
+ anchors = anchors[topk_inds, :]
+ scores = scores[topk_inds]
+ # get proposals w.r.t. anchors and rpn_bbox_pred
+ proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means,
+ self.target_stds, img_shape)
+ # filter out too small bboxes
+ if cfg.min_bbox_size > 0:
+ w = proposals[:, 2] - proposals[:, 0] + 1
+ h = proposals[:, 3] - proposals[:, 1] + 1
+ valid_inds = torch.nonzero((w >= cfg.min_bbox_size) &
+ (h >= cfg.min_bbox_size)).squeeze()
+ proposals = proposals[valid_inds, :]
+ scores = scores[valid_inds]
+ proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1)
+ # NMS in current level
+ proposals, _ = nms(proposals, cfg.nms_thr)
+ proposals = proposals[:cfg.nms_post, :]
+ mlvl_proposals.append(proposals)
+ proposals = torch.cat(mlvl_proposals, 0)
+ if cfg.nms_across_levels:
+ # NMS across multi levels
+ proposals, _ = nms(proposals, cfg.nms_thr)
+ proposals = proposals[:cfg.max_num, :]
+ else:
+ scores = proposals[:, 4]
+ num = min(cfg.max_num, proposals.shape[0])
+ _, topk_inds = scores.topk(num)
+ proposals = proposals[topk_inds, :]
+ return proposals
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/guided_anchor_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/guided_anchor_head.py
new file mode 100644
index 000000000..9fdf4f664
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/guided_anchor_head.py
@@ -0,0 +1,621 @@
+from __future__ import division
+
+import numpy as np
+import torch
+import torch.nn as nn
+from mmcv.cnn import normal_init
+
+from mmdet.core import (AnchorGenerator, anchor_inside_flags, anchor_target,
+ delta2bbox, force_fp32, ga_loc_target, ga_shape_target,
+ multi_apply, multiclass_nms)
+from mmdet.ops import DeformConv, MaskedConv2d
+from ..builder import build_loss
+from ..registry import HEADS
+from ..utils import bias_init_with_prob
+from .anchor_head import AnchorHead
+
+
+class FeatureAdaption(nn.Module):
+ """Feature Adaption Module.
+
+ Feature Adaption Module is implemented based on DCN v1.
+ It uses anchor shape prediction rather than feature map to
+ predict offsets of deformable conv layer.
+
+ Args:
+ in_channels (int): Number of channels in the input feature map.
+ out_channels (int): Number of channels in the output feature map.
+ kernel_size (int): Deformable conv kernel size.
+ deformable_groups (int): Deformable conv group size.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size=3,
+ deformable_groups=4):
+ super(FeatureAdaption, self).__init__()
+ offset_channels = kernel_size * kernel_size * 2
+ self.conv_offset = nn.Conv2d(
+ 2, deformable_groups * offset_channels, 1, bias=False)
+ self.conv_adaption = DeformConv(
+ in_channels,
+ out_channels,
+ kernel_size=kernel_size,
+ padding=(kernel_size - 1) // 2,
+ deformable_groups=deformable_groups)
+ self.relu = nn.ReLU(inplace=True)
+
+ def init_weights(self):
+ normal_init(self.conv_offset, std=0.1)
+ normal_init(self.conv_adaption, std=0.01)
+
+ def forward(self, x, shape):
+ offset = self.conv_offset(shape.detach())
+ x = self.relu(self.conv_adaption(x, offset))
+ return x
+
+
+@HEADS.register_module
+class GuidedAnchorHead(AnchorHead):
+ """Guided-Anchor-based head (GA-RPN, GA-RetinaNet, etc.).
+
+ This GuidedAnchorHead will predict high-quality feature guided
+ anchors and locations where anchors will be kept in inference.
+ There are mainly 3 categories of bounding-boxes.
+ - Sampled (9) pairs for target assignment. (approxes)
+ - The square boxes where the predicted anchors are based on.
+ (squares)
+ - Guided anchors.
+ Please refer to https://arxiv.org/abs/1901.03278 for more details.
+
+ Args:
+ num_classes (int): Number of classes.
+ in_channels (int): Number of channels in the input feature map.
+ feat_channels (int): Number of hidden channels.
+ octave_base_scale (int): Base octave scale of each level of
+ feature map.
+ scales_per_octave (int): Number of octave scales in each level of
+ feature map
+ octave_ratios (Iterable): octave aspect ratios.
+ anchor_strides (Iterable): Anchor strides.
+ anchor_base_sizes (Iterable): Anchor base sizes.
+ anchoring_means (Iterable): Mean values of anchoring targets.
+ anchoring_stds (Iterable): Std values of anchoring targets.
+ target_means (Iterable): Mean values of regression targets.
+ target_stds (Iterable): Std values of regression targets.
+ deformable_groups: (int): Group number of DCN in
+ FeatureAdaption module.
+ loc_filter_thr (float): Threshold to filter out unconcerned regions.
+ loss_loc (dict): Config of location loss.
+ loss_shape (dict): Config of anchor shape loss.
+ loss_cls (dict): Config of classification loss.
+ loss_bbox (dict): Config of bbox regression loss.
+ """
+
+ def __init__(
+ self,
+ num_classes,
+ in_channels,
+ feat_channels=256,
+ octave_base_scale=8,
+ scales_per_octave=3,
+ octave_ratios=[0.5, 1.0, 2.0],
+ anchor_strides=[4, 8, 16, 32, 64],
+ anchor_base_sizes=None,
+ anchoring_means=(.0, .0, .0, .0),
+ anchoring_stds=(1.0, 1.0, 1.0, 1.0),
+ target_means=(.0, .0, .0, .0),
+ target_stds=(1.0, 1.0, 1.0, 1.0),
+ deformable_groups=4,
+ loc_filter_thr=0.01,
+ loss_loc=dict(
+ type='FocalLoss',
+ use_sigmoid=True,
+ gamma=2.0,
+ alpha=0.25,
+ loss_weight=1.0),
+ loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0),
+ loss_cls=dict(
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
+ loss_weight=1.0)): # yapf: disable
+ super(AnchorHead, self).__init__()
+ self.in_channels = in_channels
+ self.num_classes = num_classes
+ self.feat_channels = feat_channels
+ self.octave_base_scale = octave_base_scale
+ self.scales_per_octave = scales_per_octave
+ self.octave_scales = octave_base_scale * np.array(
+ [2**(i / scales_per_octave) for i in range(scales_per_octave)])
+ self.approxs_per_octave = len(self.octave_scales) * len(octave_ratios)
+ self.octave_ratios = octave_ratios
+ self.anchor_strides = anchor_strides
+ self.anchor_base_sizes = list(
+ anchor_strides) if anchor_base_sizes is None else anchor_base_sizes
+ self.anchoring_means = anchoring_means
+ self.anchoring_stds = anchoring_stds
+ self.target_means = target_means
+ self.target_stds = target_stds
+ self.deformable_groups = deformable_groups
+ self.loc_filter_thr = loc_filter_thr
+ self.approx_generators = []
+ self.square_generators = []
+ for anchor_base in self.anchor_base_sizes:
+ # Generators for approxs
+ self.approx_generators.append(
+ AnchorGenerator(anchor_base, self.octave_scales,
+ self.octave_ratios))
+ # Generators for squares
+ self.square_generators.append(
+ AnchorGenerator(anchor_base, [self.octave_base_scale], [1.0]))
+ # one anchor per location
+ self.num_anchors = 1
+ self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)
+ self.cls_focal_loss = loss_cls['type'] in ['FocalLoss']
+ self.loc_focal_loss = loss_loc['type'] in ['FocalLoss']
+ if self.use_sigmoid_cls:
+ self.cls_out_channels = self.num_classes - 1
+ else:
+ self.cls_out_channels = self.num_classes
+
+ # build losses
+ self.loss_loc = build_loss(loss_loc)
+ self.loss_shape = build_loss(loss_shape)
+ self.loss_cls = build_loss(loss_cls)
+ self.loss_bbox = build_loss(loss_bbox)
+
+ self.fp16_enabled = False
+
+ self._init_layers()
+
+ def _init_layers(self):
+ self.relu = nn.ReLU(inplace=True)
+ self.conv_loc = nn.Conv2d(self.in_channels, 1, 1)
+ self.conv_shape = nn.Conv2d(self.in_channels, self.num_anchors * 2, 1)
+ self.feature_adaption = FeatureAdaption(
+ self.in_channels,
+ self.feat_channels,
+ kernel_size=3,
+ deformable_groups=self.deformable_groups)
+ self.conv_cls = MaskedConv2d(self.feat_channels,
+ self.num_anchors * self.cls_out_channels,
+ 1)
+ self.conv_reg = MaskedConv2d(self.feat_channels, self.num_anchors * 4,
+ 1)
+
+ def init_weights(self):
+ normal_init(self.conv_cls, std=0.01)
+ normal_init(self.conv_reg, std=0.01)
+
+ bias_cls = bias_init_with_prob(0.01)
+ normal_init(self.conv_loc, std=0.01, bias=bias_cls)
+ normal_init(self.conv_shape, std=0.01)
+
+ self.feature_adaption.init_weights()
+
+ def forward_single(self, x):
+ loc_pred = self.conv_loc(x)
+ shape_pred = self.conv_shape(x)
+ x = self.feature_adaption(x, shape_pred)
+ # masked conv is only used during inference for speed-up
+ if not self.training:
+ mask = loc_pred.sigmoid()[0] >= self.loc_filter_thr
+ else:
+ mask = None
+ cls_score = self.conv_cls(x, mask)
+ bbox_pred = self.conv_reg(x, mask)
+ return cls_score, bbox_pred, shape_pred, loc_pred
+
+ def forward(self, feats):
+ return multi_apply(self.forward_single, feats)
+
+ def get_sampled_approxs(self,
+ featmap_sizes,
+ img_metas,
+ cfg,
+ device='cuda'):
+ """Get sampled approxs and inside flags according to feature map sizes.
+
+ Args:
+ featmap_sizes (list[tuple]): Multi-level feature map sizes.
+ img_metas (list[dict]): Image meta info.
+ device (torch.device | str): device for returned tensors
+
+ Returns:
+ tuple: approxes of each image, inside flags of each image
+ """
+ num_imgs = len(img_metas)
+ num_levels = len(featmap_sizes)
+
+ # since feature map sizes of all images are the same, we only compute
+ # approxes for one time
+ multi_level_approxs = []
+ for i in range(num_levels):
+ approxs = self.approx_generators[i].grid_anchors(
+ featmap_sizes[i], self.anchor_strides[i], device=device)
+ multi_level_approxs.append(approxs)
+ approxs_list = [multi_level_approxs for _ in range(num_imgs)]
+
+ # for each image, we compute inside flags of multi level approxes
+ inside_flag_list = []
+ for img_id, img_meta in enumerate(img_metas):
+ multi_level_flags = []
+ multi_level_approxs = approxs_list[img_id]
+ for i in range(num_levels):
+ approxs = multi_level_approxs[i]
+ anchor_stride = self.anchor_strides[i]
+ feat_h, feat_w = featmap_sizes[i]
+ h, w = img_meta['pad_shape'][:2]
+ valid_feat_h = min(int(np.ceil(h / anchor_stride)), feat_h)
+ valid_feat_w = min(int(np.ceil(w / anchor_stride)), feat_w)
+ flags = self.approx_generators[i].valid_flags(
+ (feat_h, feat_w), (valid_feat_h, valid_feat_w),
+ device=device)
+ inside_flags_list = []
+ for i in range(self.approxs_per_octave):
+ split_valid_flags = flags[i::self.approxs_per_octave]
+ split_approxs = approxs[i::self.approxs_per_octave, :]
+ inside_flags = anchor_inside_flags(
+ split_approxs, split_valid_flags,
+ img_meta['img_shape'][:2], cfg.allowed_border)
+ inside_flags_list.append(inside_flags)
+ # inside_flag for a position is true if any anchor in this
+ # position is true
+ inside_flags = (
+ torch.stack(inside_flags_list, 0).sum(dim=0) > 0)
+ multi_level_flags.append(inside_flags)
+ inside_flag_list.append(multi_level_flags)
+ return approxs_list, inside_flag_list
+
+ def get_anchors(self,
+ featmap_sizes,
+ shape_preds,
+ loc_preds,
+ img_metas,
+ use_loc_filter=False,
+ device='cuda'):
+ """Get squares according to feature map sizes and guided
+ anchors.
+
+ Args:
+ featmap_sizes (list[tuple]): Multi-level feature map sizes.
+ shape_preds (list[tensor]): Multi-level shape predictions.
+ loc_preds (list[tensor]): Multi-level location predictions.
+ img_metas (list[dict]): Image meta info.
+ use_loc_filter (bool): Use loc filter or not.
+ device (torch.device | str): device for returned tensors
+
+ Returns:
+ tuple: square approxs of each image, guided anchors of each image,
+ loc masks of each image
+ """
+ num_imgs = len(img_metas)
+ num_levels = len(featmap_sizes)
+
+ # since feature map sizes of all images are the same, we only compute
+ # squares for one time
+ multi_level_squares = []
+ for i in range(num_levels):
+ squares = self.square_generators[i].grid_anchors(
+ featmap_sizes[i], self.anchor_strides[i], device=device)
+ multi_level_squares.append(squares)
+ squares_list = [multi_level_squares for _ in range(num_imgs)]
+
+ # for each image, we compute multi level guided anchors
+ guided_anchors_list = []
+ loc_mask_list = []
+ for img_id, img_meta in enumerate(img_metas):
+ multi_level_guided_anchors = []
+ multi_level_loc_mask = []
+ for i in range(num_levels):
+ squares = squares_list[img_id][i]
+ shape_pred = shape_preds[i][img_id]
+ loc_pred = loc_preds[i][img_id]
+ guided_anchors, loc_mask = self.get_guided_anchors_single(
+ squares,
+ shape_pred,
+ loc_pred,
+ use_loc_filter=use_loc_filter)
+ multi_level_guided_anchors.append(guided_anchors)
+ multi_level_loc_mask.append(loc_mask)
+ guided_anchors_list.append(multi_level_guided_anchors)
+ loc_mask_list.append(multi_level_loc_mask)
+ return squares_list, guided_anchors_list, loc_mask_list
+
+ def get_guided_anchors_single(self,
+ squares,
+ shape_pred,
+ loc_pred,
+ use_loc_filter=False):
+ """Get guided anchors and loc masks for a single level.
+
+ Args:
+ square (tensor): Squares of a single level.
+ shape_pred (tensor): Shape predections of a single level.
+ loc_pred (tensor): Loc predections of a single level.
+ use_loc_filter (list[tensor]): Use loc filter or not.
+
+ Returns:
+ tuple: guided anchors, location masks
+ """
+ # calculate location filtering mask
+ loc_pred = loc_pred.sigmoid().detach()
+ if use_loc_filter:
+ loc_mask = loc_pred >= self.loc_filter_thr
+ else:
+ loc_mask = loc_pred >= 0.0
+ mask = loc_mask.permute(1, 2, 0).expand(-1, -1, self.num_anchors)
+ mask = mask.contiguous().view(-1)
+ # calculate guided anchors
+ squares = squares[mask]
+ anchor_deltas = shape_pred.permute(1, 2, 0).contiguous().view(
+ -1, 2).detach()[mask]
+ bbox_deltas = anchor_deltas.new_full(squares.size(), 0)
+ bbox_deltas[:, 2:] = anchor_deltas
+ guided_anchors = delta2bbox(
+ squares,
+ bbox_deltas,
+ self.anchoring_means,
+ self.anchoring_stds,
+ wh_ratio_clip=1e-6)
+ return guided_anchors, mask
+
+ def loss_shape_single(self, shape_pred, bbox_anchors, bbox_gts,
+ anchor_weights, anchor_total_num):
+ shape_pred = shape_pred.permute(0, 2, 3, 1).contiguous().view(-1, 2)
+ bbox_anchors = bbox_anchors.contiguous().view(-1, 4)
+ bbox_gts = bbox_gts.contiguous().view(-1, 4)
+ anchor_weights = anchor_weights.contiguous().view(-1, 4)
+ bbox_deltas = bbox_anchors.new_full(bbox_anchors.size(), 0)
+ bbox_deltas[:, 2:] += shape_pred
+ # filter out negative samples to speed-up weighted_bounded_iou_loss
+ inds = torch.nonzero(anchor_weights[:, 0] > 0).squeeze(1)
+ bbox_deltas_ = bbox_deltas[inds]
+ bbox_anchors_ = bbox_anchors[inds]
+ bbox_gts_ = bbox_gts[inds]
+ anchor_weights_ = anchor_weights[inds]
+ pred_anchors_ = delta2bbox(
+ bbox_anchors_,
+ bbox_deltas_,
+ self.anchoring_means,
+ self.anchoring_stds,
+ wh_ratio_clip=1e-6)
+ loss_shape = self.loss_shape(
+ pred_anchors_,
+ bbox_gts_,
+ anchor_weights_,
+ avg_factor=anchor_total_num)
+ return loss_shape
+
+ def loss_loc_single(self, loc_pred, loc_target, loc_weight, loc_avg_factor,
+ cfg):
+ loss_loc = self.loss_loc(
+ loc_pred.reshape(-1, 1),
+ loc_target.reshape(-1, 1).long(),
+ loc_weight.reshape(-1, 1),
+ avg_factor=loc_avg_factor)
+ return loss_loc
+
+ @force_fp32(
+ apply_to=('cls_scores', 'bbox_preds', 'shape_preds', 'loc_preds'))
+ def loss(self,
+ cls_scores,
+ bbox_preds,
+ shape_preds,
+ loc_preds,
+ gt_bboxes,
+ gt_labels,
+ img_metas,
+ cfg,
+ gt_bboxes_ignore=None):
+ featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
+ assert len(featmap_sizes) == len(self.approx_generators)
+
+ device = cls_scores[0].device
+
+ # get loc targets
+ loc_targets, loc_weights, loc_avg_factor = ga_loc_target(
+ gt_bboxes,
+ featmap_sizes,
+ self.octave_base_scale,
+ self.anchor_strides,
+ center_ratio=cfg.center_ratio,
+ ignore_ratio=cfg.ignore_ratio)
+
+ # get sampled approxes
+ approxs_list, inside_flag_list = self.get_sampled_approxs(
+ featmap_sizes, img_metas, cfg, device=device)
+ # get squares and guided anchors
+ squares_list, guided_anchors_list, _ = self.get_anchors(
+ featmap_sizes, shape_preds, loc_preds, img_metas, device=device)
+
+ # get shape targets
+ sampling = False if not hasattr(cfg, 'ga_sampler') else True
+ shape_targets = ga_shape_target(
+ approxs_list,
+ inside_flag_list,
+ squares_list,
+ gt_bboxes,
+ img_metas,
+ self.approxs_per_octave,
+ cfg,
+ sampling=sampling)
+ if shape_targets is None:
+ return None
+ (bbox_anchors_list, bbox_gts_list, anchor_weights_list, anchor_fg_num,
+ anchor_bg_num) = shape_targets
+ anchor_total_num = (
+ anchor_fg_num if not sampling else anchor_fg_num + anchor_bg_num)
+
+ # get anchor targets
+ sampling = False if self.cls_focal_loss else True
+ label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1
+ cls_reg_targets = anchor_target(
+ guided_anchors_list,
+ inside_flag_list,
+ gt_bboxes,
+ img_metas,
+ self.target_means,
+ self.target_stds,
+ cfg,
+ gt_bboxes_ignore_list=gt_bboxes_ignore,
+ gt_labels_list=gt_labels,
+ label_channels=label_channels,
+ sampling=sampling)
+ if cls_reg_targets is None:
+ return None
+ (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,
+ num_total_pos, num_total_neg) = cls_reg_targets
+ num_total_samples = (
+ num_total_pos if self.cls_focal_loss else num_total_pos +
+ num_total_neg)
+
+ # get classification and bbox regression losses
+ losses_cls, losses_bbox = multi_apply(
+ self.loss_single,
+ cls_scores,
+ bbox_preds,
+ labels_list,
+ label_weights_list,
+ bbox_targets_list,
+ bbox_weights_list,
+ num_total_samples=num_total_samples,
+ cfg=cfg)
+
+ # get anchor location loss
+ losses_loc = []
+ for i in range(len(loc_preds)):
+ loss_loc = self.loss_loc_single(
+ loc_preds[i],
+ loc_targets[i],
+ loc_weights[i],
+ loc_avg_factor=loc_avg_factor,
+ cfg=cfg)
+ losses_loc.append(loss_loc)
+
+ # get anchor shape loss
+ losses_shape = []
+ for i in range(len(shape_preds)):
+ loss_shape = self.loss_shape_single(
+ shape_preds[i],
+ bbox_anchors_list[i],
+ bbox_gts_list[i],
+ anchor_weights_list[i],
+ anchor_total_num=anchor_total_num)
+ losses_shape.append(loss_shape)
+
+ return dict(
+ loss_cls=losses_cls,
+ loss_bbox=losses_bbox,
+ loss_shape=losses_shape,
+ loss_loc=losses_loc)
+
+ @force_fp32(
+ apply_to=('cls_scores', 'bbox_preds', 'shape_preds', 'loc_preds'))
+ def get_bboxes(self,
+ cls_scores,
+ bbox_preds,
+ shape_preds,
+ loc_preds,
+ img_metas,
+ cfg,
+ rescale=False):
+ assert len(cls_scores) == len(bbox_preds) == len(shape_preds) == len(
+ loc_preds)
+ num_levels = len(cls_scores)
+ featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
+ device = cls_scores[0].device
+ # get guided anchors
+ _, guided_anchors, loc_masks = self.get_anchors(
+ featmap_sizes,
+ shape_preds,
+ loc_preds,
+ img_metas,
+ use_loc_filter=not self.training,
+ device=device)
+ result_list = []
+ for img_id in range(len(img_metas)):
+ cls_score_list = [
+ cls_scores[i][img_id].detach() for i in range(num_levels)
+ ]
+ bbox_pred_list = [
+ bbox_preds[i][img_id].detach() for i in range(num_levels)
+ ]
+ guided_anchor_list = [
+ guided_anchors[img_id][i].detach() for i in range(num_levels)
+ ]
+ loc_mask_list = [
+ loc_masks[img_id][i].detach() for i in range(num_levels)
+ ]
+ img_shape = img_metas[img_id]['img_shape']
+ scale_factor = img_metas[img_id]['scale_factor']
+ proposals = self.get_bboxes_single(cls_score_list, bbox_pred_list,
+ guided_anchor_list,
+ loc_mask_list, img_shape,
+ scale_factor, cfg, rescale)
+ result_list.append(proposals)
+ return result_list
+
+ def get_bboxes_single(self,
+ cls_scores,
+ bbox_preds,
+ mlvl_anchors,
+ mlvl_masks,
+ img_shape,
+ scale_factor,
+ cfg,
+ rescale=False):
+ assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
+ mlvl_bboxes = []
+ mlvl_scores = []
+ for cls_score, bbox_pred, anchors, mask in zip(cls_scores, bbox_preds,
+ mlvl_anchors,
+ mlvl_masks):
+ assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
+ # if no location is kept, end.
+ if mask.sum() == 0:
+ continue
+ # reshape scores and bbox_pred
+ cls_score = cls_score.permute(1, 2,
+ 0).reshape(-1, self.cls_out_channels)
+ if self.use_sigmoid_cls:
+ scores = cls_score.sigmoid()
+ else:
+ scores = cls_score.softmax(-1)
+ bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
+ # filter scores, bbox_pred w.r.t. mask.
+ # anchors are filtered in get_anchors() beforehand.
+ scores = scores[mask, :]
+ bbox_pred = bbox_pred[mask, :]
+ if scores.dim() == 0:
+ anchors = anchors.unsqueeze(0)
+ scores = scores.unsqueeze(0)
+ bbox_pred = bbox_pred.unsqueeze(0)
+ # filter anchors, bbox_pred, scores w.r.t. scores
+ nms_pre = cfg.get('nms_pre', -1)
+ if nms_pre > 0 and scores.shape[0] > nms_pre:
+ if self.use_sigmoid_cls:
+ max_scores, _ = scores.max(dim=1)
+ else:
+ max_scores, _ = scores[:, 1:].max(dim=1)
+ _, topk_inds = max_scores.topk(nms_pre)
+ anchors = anchors[topk_inds, :]
+ bbox_pred = bbox_pred[topk_inds, :]
+ scores = scores[topk_inds, :]
+ bboxes = delta2bbox(anchors, bbox_pred, self.target_means,
+ self.target_stds, img_shape)
+ mlvl_bboxes.append(bboxes)
+ mlvl_scores.append(scores)
+ mlvl_bboxes = torch.cat(mlvl_bboxes)
+ if rescale:
+ mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
+ mlvl_scores = torch.cat(mlvl_scores)
+ if self.use_sigmoid_cls:
+ padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
+ mlvl_scores = torch.cat([padding, mlvl_scores], dim=1)
+ # multi class NMS
+ det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores,
+ cfg.score_thr, cfg.nms,
+ cfg.max_per_img)
+ return det_bboxes, det_labels
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/reppoints_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/reppoints_head.py
new file mode 100644
index 000000000..b3214f357
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/reppoints_head.py
@@ -0,0 +1,596 @@
+from __future__ import division
+
+import numpy as np
+import torch
+import torch.nn as nn
+from mmcv.cnn import normal_init
+
+from mmdet.core import (PointGenerator, multi_apply, multiclass_nms,
+ point_target)
+from mmdet.ops import DeformConv
+from ..builder import build_loss
+from ..registry import HEADS
+from ..utils import ConvModule, bias_init_with_prob
+
+
+@HEADS.register_module
+class RepPointsHead(nn.Module):
+ """RepPoint head.
+
+ Args:
+ in_channels (int): Number of channels in the input feature map.
+ feat_channels (int): Number of channels of the feature map.
+ point_feat_channels (int): Number of channels of points features.
+ stacked_convs (int): How many conv layers are used.
+ gradient_mul (float): The multiplier to gradients from
+ points refinement and recognition.
+ point_strides (Iterable): points strides.
+ point_base_scale (int): bbox scale for assigning labels.
+ loss_cls (dict): Config of classification loss.
+ loss_bbox_init (dict): Config of initial points loss.
+ loss_bbox_refine (dict): Config of points loss in refinement.
+ use_grid_points (bool): If we use bounding box representation, the
+ reppoints is represented as grid points on the bounding box.
+ center_init (bool): Whether to use center point assignment.
+ transform_method (str): The methods to transform RepPoints to bbox.
+ """ # noqa: W605
+
+ def __init__(self,
+ num_classes,
+ in_channels,
+ feat_channels=256,
+ point_feat_channels=256,
+ stacked_convs=3,
+ num_points=9,
+ gradient_mul=0.1,
+ point_strides=[8, 16, 32, 64, 128],
+ point_base_scale=4,
+ conv_cfg=None,
+ norm_cfg=None,
+ loss_cls=dict(
+ type='FocalLoss',
+ use_sigmoid=True,
+ gamma=2.0,
+ alpha=0.25,
+ loss_weight=1.0),
+ loss_bbox_init=dict(
+ type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=0.5),
+ loss_bbox_refine=dict(
+ type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
+ use_grid_points=False,
+ center_init=True,
+ transform_method='moment',
+ moment_mul=0.01):
+ super(RepPointsHead, self).__init__()
+ self.in_channels = in_channels
+ self.num_classes = num_classes
+ self.feat_channels = feat_channels
+ self.point_feat_channels = point_feat_channels
+ self.stacked_convs = stacked_convs
+ self.num_points = num_points
+ self.gradient_mul = gradient_mul
+ self.point_base_scale = point_base_scale
+ self.point_strides = point_strides
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)
+ self.sampling = loss_cls['type'] not in ['FocalLoss']
+ self.loss_cls = build_loss(loss_cls)
+ self.loss_bbox_init = build_loss(loss_bbox_init)
+ self.loss_bbox_refine = build_loss(loss_bbox_refine)
+ self.use_grid_points = use_grid_points
+ self.center_init = center_init
+ self.transform_method = transform_method
+ if self.transform_method == 'moment':
+ self.moment_transfer = nn.Parameter(
+ data=torch.zeros(2), requires_grad=True)
+ self.moment_mul = moment_mul
+ if self.use_sigmoid_cls:
+ self.cls_out_channels = self.num_classes - 1
+ else:
+ self.cls_out_channels = self.num_classes
+ self.point_generators = [PointGenerator() for _ in self.point_strides]
+ # we use deformable conv to extract points features
+ self.dcn_kernel = int(np.sqrt(num_points))
+ self.dcn_pad = int((self.dcn_kernel - 1) / 2)
+ assert self.dcn_kernel * self.dcn_kernel == num_points, \
+ "The points number should be a square number."
+ assert self.dcn_kernel % 2 == 1, \
+ "The points number should be an odd square number."
+ dcn_base = np.arange(-self.dcn_pad,
+ self.dcn_pad + 1).astype(np.float64)
+ dcn_base_y = np.repeat(dcn_base, self.dcn_kernel)
+ dcn_base_x = np.tile(dcn_base, self.dcn_kernel)
+ dcn_base_offset = np.stack([dcn_base_y, dcn_base_x], axis=1).reshape(
+ (-1))
+ self.dcn_base_offset = torch.tensor(dcn_base_offset).view(1, -1, 1, 1)
+ self._init_layers()
+
+ def _init_layers(self):
+ self.relu = nn.ReLU(inplace=True)
+ self.cls_convs = nn.ModuleList()
+ self.reg_convs = nn.ModuleList()
+ for i in range(self.stacked_convs):
+ chn = self.in_channels if i == 0 else self.feat_channels
+ self.cls_convs.append(
+ ConvModule(
+ chn,
+ self.feat_channels,
+ 3,
+ stride=1,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg))
+ self.reg_convs.append(
+ ConvModule(
+ chn,
+ self.feat_channels,
+ 3,
+ stride=1,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg))
+ pts_out_dim = 4 if self.use_grid_points else 2 * self.num_points
+ self.reppoints_cls_conv = DeformConv(self.feat_channels,
+ self.point_feat_channels,
+ self.dcn_kernel, 1, self.dcn_pad)
+ self.reppoints_cls_out = nn.Conv2d(self.point_feat_channels,
+ self.cls_out_channels, 1, 1, 0)
+ self.reppoints_pts_init_conv = nn.Conv2d(self.feat_channels,
+ self.point_feat_channels, 3,
+ 1, 1)
+ self.reppoints_pts_init_out = nn.Conv2d(self.point_feat_channels,
+ pts_out_dim, 1, 1, 0)
+ self.reppoints_pts_refine_conv = DeformConv(self.feat_channels,
+ self.point_feat_channels,
+ self.dcn_kernel, 1,
+ self.dcn_pad)
+ self.reppoints_pts_refine_out = nn.Conv2d(self.point_feat_channels,
+ pts_out_dim, 1, 1, 0)
+
+ def init_weights(self):
+ for m in self.cls_convs:
+ normal_init(m.conv, std=0.01)
+ for m in self.reg_convs:
+ normal_init(m.conv, std=0.01)
+ bias_cls = bias_init_with_prob(0.01)
+ normal_init(self.reppoints_cls_conv, std=0.01)
+ normal_init(self.reppoints_cls_out, std=0.01, bias=bias_cls)
+ normal_init(self.reppoints_pts_init_conv, std=0.01)
+ normal_init(self.reppoints_pts_init_out, std=0.01)
+ normal_init(self.reppoints_pts_refine_conv, std=0.01)
+ normal_init(self.reppoints_pts_refine_out, std=0.01)
+
+ def points2bbox(self, pts, y_first=True):
+ """
+ Converting the points set into bounding box.
+ :param pts: the input points sets (fields), each points
+ set (fields) is represented as 2n scalar.
+ :param y_first: if y_fisrt=True, the point set is represented as
+ [y1, x1, y2, x2 ... yn, xn], otherwise the point set is
+ represented as [x1, y1, x2, y2 ... xn, yn].
+ :return: each points set is converting to a bbox [x1, y1, x2, y2].
+ """
+ pts_reshape = pts.view(pts.shape[0], -1, 2, *pts.shape[2:])
+ pts_y = pts_reshape[:, :, 0, ...] if y_first else pts_reshape[:, :, 1,
+ ...]
+ pts_x = pts_reshape[:, :, 1, ...] if y_first else pts_reshape[:, :, 0,
+ ...]
+ if self.transform_method == 'minmax':
+ bbox_left = pts_x.min(dim=1, keepdim=True)[0]
+ bbox_right = pts_x.max(dim=1, keepdim=True)[0]
+ bbox_up = pts_y.min(dim=1, keepdim=True)[0]
+ bbox_bottom = pts_y.max(dim=1, keepdim=True)[0]
+ bbox = torch.cat([bbox_left, bbox_up, bbox_right, bbox_bottom],
+ dim=1)
+ elif self.transform_method == 'partial_minmax':
+ pts_y = pts_y[:, :4, ...]
+ pts_x = pts_x[:, :4, ...]
+ bbox_left = pts_x.min(dim=1, keepdim=True)[0]
+ bbox_right = pts_x.max(dim=1, keepdim=True)[0]
+ bbox_up = pts_y.min(dim=1, keepdim=True)[0]
+ bbox_bottom = pts_y.max(dim=1, keepdim=True)[0]
+ bbox = torch.cat([bbox_left, bbox_up, bbox_right, bbox_bottom],
+ dim=1)
+ elif self.transform_method == 'moment':
+ pts_y_mean = pts_y.mean(dim=1, keepdim=True)
+ pts_x_mean = pts_x.mean(dim=1, keepdim=True)
+ pts_y_std = torch.std(pts_y - pts_y_mean, dim=1, keepdim=True)
+ pts_x_std = torch.std(pts_x - pts_x_mean, dim=1, keepdim=True)
+ moment_transfer = (self.moment_transfer * self.moment_mul) + (
+ self.moment_transfer.detach() * (1 - self.moment_mul))
+ moment_width_transfer = moment_transfer[0]
+ moment_height_transfer = moment_transfer[1]
+ half_width = pts_x_std * torch.exp(moment_width_transfer)
+ half_height = pts_y_std * torch.exp(moment_height_transfer)
+ bbox = torch.cat([
+ pts_x_mean - half_width, pts_y_mean - half_height,
+ pts_x_mean + half_width, pts_y_mean + half_height
+ ],
+ dim=1)
+ else:
+ raise NotImplementedError
+ return bbox
+
+ def gen_grid_from_reg(self, reg, previous_boxes):
+ """
+ Base on the previous bboxes and regression values, we compute the
+ regressed bboxes and generate the grids on the bboxes.
+ :param reg: the regression value to previous bboxes.
+ :param previous_boxes: previous bboxes.
+ :return: generate grids on the regressed bboxes.
+ """
+ b, _, h, w = reg.shape
+ bxy = (previous_boxes[:, :2, ...] + previous_boxes[:, 2:, ...]) / 2.
+ bwh = (previous_boxes[:, 2:, ...] -
+ previous_boxes[:, :2, ...]).clamp(min=1e-6)
+ grid_topleft = bxy + bwh * reg[:, :2, ...] - 0.5 * bwh * torch.exp(
+ reg[:, 2:, ...])
+ grid_wh = bwh * torch.exp(reg[:, 2:, ...])
+ grid_left = grid_topleft[:, [0], ...]
+ grid_top = grid_topleft[:, [1], ...]
+ grid_width = grid_wh[:, [0], ...]
+ grid_height = grid_wh[:, [1], ...]
+ intervel = torch.linspace(0., 1., self.dcn_kernel).view(
+ 1, self.dcn_kernel, 1, 1).type_as(reg)
+ grid_x = grid_left + grid_width * intervel
+ grid_x = grid_x.unsqueeze(1).repeat(1, self.dcn_kernel, 1, 1, 1)
+ grid_x = grid_x.view(b, -1, h, w)
+ grid_y = grid_top + grid_height * intervel
+ grid_y = grid_y.unsqueeze(2).repeat(1, 1, self.dcn_kernel, 1, 1)
+ grid_y = grid_y.view(b, -1, h, w)
+ grid_yx = torch.stack([grid_y, grid_x], dim=2)
+ grid_yx = grid_yx.view(b, -1, h, w)
+ regressed_bbox = torch.cat([
+ grid_left, grid_top, grid_left + grid_width, grid_top + grid_height
+ ], 1)
+ return grid_yx, regressed_bbox
+
+ def forward_single(self, x):
+ dcn_base_offset = self.dcn_base_offset.type_as(x)
+ # If we use center_init, the initial reppoints is from center points.
+ # If we use bounding bbox representation, the initial reppoints is
+ # from regular grid placed on a pre-defined bbox.
+ if self.use_grid_points or not self.center_init:
+ scale = self.point_base_scale / 2
+ points_init = dcn_base_offset / dcn_base_offset.max() * scale
+ bbox_init = x.new_tensor([-scale, -scale, scale,
+ scale]).view(1, 4, 1, 1)
+ else:
+ points_init = 0
+ cls_feat = x
+ pts_feat = x
+ for cls_conv in self.cls_convs:
+ cls_feat = cls_conv(cls_feat)
+ for reg_conv in self.reg_convs:
+ pts_feat = reg_conv(pts_feat)
+ # initialize reppoints
+ pts_out_init = self.reppoints_pts_init_out(
+ self.relu(self.reppoints_pts_init_conv(pts_feat)))
+ if self.use_grid_points:
+ pts_out_init, bbox_out_init = self.gen_grid_from_reg(
+ pts_out_init, bbox_init.detach())
+ else:
+ pts_out_init = pts_out_init + points_init
+ # refine and classify reppoints
+ pts_out_init_grad_mul = (1 - self.gradient_mul) * pts_out_init.detach(
+ ) + self.gradient_mul * pts_out_init
+ dcn_offset = pts_out_init_grad_mul - dcn_base_offset
+ cls_out = self.reppoints_cls_out(
+ self.relu(self.reppoints_cls_conv(cls_feat, dcn_offset)))
+ pts_out_refine = self.reppoints_pts_refine_out(
+ self.relu(self.reppoints_pts_refine_conv(pts_feat, dcn_offset)))
+ if self.use_grid_points:
+ pts_out_refine, bbox_out_refine = self.gen_grid_from_reg(
+ pts_out_refine, bbox_out_init.detach())
+ else:
+ pts_out_refine = pts_out_refine + pts_out_init.detach()
+ return cls_out, pts_out_init, pts_out_refine
+
+ def forward(self, feats):
+ return multi_apply(self.forward_single, feats)
+
+ def get_points(self, featmap_sizes, img_metas):
+ """Get points according to feature map sizes.
+
+ Args:
+ featmap_sizes (list[tuple]): Multi-level feature map sizes.
+ img_metas (list[dict]): Image meta info.
+
+ Returns:
+ tuple: points of each image, valid flags of each image
+ """
+ num_imgs = len(img_metas)
+ num_levels = len(featmap_sizes)
+
+ # since feature map sizes of all images are the same, we only compute
+ # points center for one time
+ multi_level_points = []
+ for i in range(num_levels):
+ points = self.point_generators[i].grid_points(
+ featmap_sizes[i], self.point_strides[i])
+ multi_level_points.append(points)
+ points_list = [[point.clone() for point in multi_level_points]
+ for _ in range(num_imgs)]
+
+ # for each image, we compute valid flags of multi level grids
+ valid_flag_list = []
+ for img_id, img_meta in enumerate(img_metas):
+ multi_level_flags = []
+ for i in range(num_levels):
+ point_stride = self.point_strides[i]
+ feat_h, feat_w = featmap_sizes[i]
+ h, w = img_meta['pad_shape'][:2]
+ valid_feat_h = min(int(np.ceil(h / point_stride)), feat_h)
+ valid_feat_w = min(int(np.ceil(w / point_stride)), feat_w)
+ flags = self.point_generators[i].valid_flags(
+ (feat_h, feat_w), (valid_feat_h, valid_feat_w))
+ multi_level_flags.append(flags)
+ valid_flag_list.append(multi_level_flags)
+
+ return points_list, valid_flag_list
+
+ def centers_to_bboxes(self, point_list):
+ """Get bboxes according to center points. Only used in MaxIOUAssigner.
+ """
+ bbox_list = []
+ for i_img, point in enumerate(point_list):
+ bbox = []
+ for i_lvl in range(len(self.point_strides)):
+ scale = self.point_base_scale * self.point_strides[i_lvl] * 0.5
+ bbox_shift = torch.Tensor([-scale, -scale, scale,
+ scale]).view(1, 4).type_as(point[0])
+ bbox_center = torch.cat(
+ [point[i_lvl][:, :2], point[i_lvl][:, :2]], dim=1)
+ bbox.append(bbox_center + bbox_shift)
+ bbox_list.append(bbox)
+ return bbox_list
+
+ def offset_to_pts(self, center_list, pred_list):
+ """Change from point offset to point coordinate.
+ """
+ pts_list = []
+ for i_lvl in range(len(self.point_strides)):
+ pts_lvl = []
+ for i_img in range(len(center_list)):
+ pts_center = center_list[i_img][i_lvl][:, :2].repeat(
+ 1, self.num_points)
+ pts_shift = pred_list[i_lvl][i_img]
+ yx_pts_shift = pts_shift.permute(1, 2, 0).view(
+ -1, 2 * self.num_points)
+ y_pts_shift = yx_pts_shift[..., 0::2]
+ x_pts_shift = yx_pts_shift[..., 1::2]
+ xy_pts_shift = torch.stack([x_pts_shift, y_pts_shift], -1)
+ xy_pts_shift = xy_pts_shift.view(*yx_pts_shift.shape[:-1], -1)
+ pts = xy_pts_shift * self.point_strides[i_lvl] + pts_center
+ pts_lvl.append(pts)
+ pts_lvl = torch.stack(pts_lvl, 0)
+ pts_list.append(pts_lvl)
+ return pts_list
+
+ def loss_single(self, cls_score, pts_pred_init, pts_pred_refine, labels,
+ label_weights, bbox_gt_init, bbox_weights_init,
+ bbox_gt_refine, bbox_weights_refine, stride,
+ num_total_samples_init, num_total_samples_refine):
+ # classification loss
+ labels = labels.reshape(-1)
+ label_weights = label_weights.reshape(-1)
+ cls_score = cls_score.permute(0, 2, 3,
+ 1).reshape(-1, self.cls_out_channels)
+ loss_cls = self.loss_cls(
+ cls_score,
+ labels,
+ label_weights,
+ avg_factor=num_total_samples_refine)
+
+ # points loss
+ bbox_gt_init = bbox_gt_init.reshape(-1, 4)
+ bbox_weights_init = bbox_weights_init.reshape(-1, 4)
+ bbox_pred_init = self.points2bbox(
+ pts_pred_init.reshape(-1, 2 * self.num_points), y_first=False)
+ bbox_gt_refine = bbox_gt_refine.reshape(-1, 4)
+ bbox_weights_refine = bbox_weights_refine.reshape(-1, 4)
+ bbox_pred_refine = self.points2bbox(
+ pts_pred_refine.reshape(-1, 2 * self.num_points), y_first=False)
+ normalize_term = self.point_base_scale * stride
+ loss_pts_init = self.loss_bbox_init(
+ bbox_pred_init / normalize_term,
+ bbox_gt_init / normalize_term,
+ bbox_weights_init,
+ avg_factor=num_total_samples_init)
+ loss_pts_refine = self.loss_bbox_refine(
+ bbox_pred_refine / normalize_term,
+ bbox_gt_refine / normalize_term,
+ bbox_weights_refine,
+ avg_factor=num_total_samples_refine)
+ return loss_cls, loss_pts_init, loss_pts_refine
+
+ def loss(self,
+ cls_scores,
+ pts_preds_init,
+ pts_preds_refine,
+ gt_bboxes,
+ gt_labels,
+ img_metas,
+ cfg,
+ gt_bboxes_ignore=None):
+ featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
+ assert len(featmap_sizes) == len(self.point_generators)
+ label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1
+
+ # target for initial stage
+ center_list, valid_flag_list = self.get_points(featmap_sizes,
+ img_metas)
+ pts_coordinate_preds_init = self.offset_to_pts(center_list,
+ pts_preds_init)
+ if cfg.init.assigner['type'] == 'PointAssigner':
+ # Assign target for center list
+ candidate_list = center_list
+ else:
+ # transform center list to bbox list and
+ # assign target for bbox list
+ bbox_list = self.centers_to_bboxes(center_list)
+ candidate_list = bbox_list
+ cls_reg_targets_init = point_target(
+ candidate_list,
+ valid_flag_list,
+ gt_bboxes,
+ img_metas,
+ cfg.init,
+ gt_bboxes_ignore_list=gt_bboxes_ignore,
+ gt_labels_list=gt_labels,
+ label_channels=label_channels,
+ sampling=self.sampling)
+ (*_, bbox_gt_list_init, candidate_list_init, bbox_weights_list_init,
+ num_total_pos_init, num_total_neg_init) = cls_reg_targets_init
+ num_total_samples_init = (
+ num_total_pos_init +
+ num_total_neg_init if self.sampling else num_total_pos_init)
+
+ # target for refinement stage
+ center_list, valid_flag_list = self.get_points(featmap_sizes,
+ img_metas)
+ pts_coordinate_preds_refine = self.offset_to_pts(
+ center_list, pts_preds_refine)
+ bbox_list = []
+ for i_img, center in enumerate(center_list):
+ bbox = []
+ for i_lvl in range(len(pts_preds_refine)):
+ bbox_preds_init = self.points2bbox(
+ pts_preds_init[i_lvl].detach())
+ bbox_shift = bbox_preds_init * self.point_strides[i_lvl]
+ bbox_center = torch.cat(
+ [center[i_lvl][:, :2], center[i_lvl][:, :2]], dim=1)
+ bbox.append(bbox_center +
+ bbox_shift[i_img].permute(1, 2, 0).reshape(-1, 4))
+ bbox_list.append(bbox)
+ cls_reg_targets_refine = point_target(
+ bbox_list,
+ valid_flag_list,
+ gt_bboxes,
+ img_metas,
+ cfg.refine,
+ gt_bboxes_ignore_list=gt_bboxes_ignore,
+ gt_labels_list=gt_labels,
+ label_channels=label_channels,
+ sampling=self.sampling)
+ (labels_list, label_weights_list, bbox_gt_list_refine,
+ candidate_list_refine, bbox_weights_list_refine, num_total_pos_refine,
+ num_total_neg_refine) = cls_reg_targets_refine
+ num_total_samples_refine = (
+ num_total_pos_refine +
+ num_total_neg_refine if self.sampling else num_total_pos_refine)
+
+ # compute loss
+ losses_cls, losses_pts_init, losses_pts_refine = multi_apply(
+ self.loss_single,
+ cls_scores,
+ pts_coordinate_preds_init,
+ pts_coordinate_preds_refine,
+ labels_list,
+ label_weights_list,
+ bbox_gt_list_init,
+ bbox_weights_list_init,
+ bbox_gt_list_refine,
+ bbox_weights_list_refine,
+ self.point_strides,
+ num_total_samples_init=num_total_samples_init,
+ num_total_samples_refine=num_total_samples_refine)
+ loss_dict_all = {
+ 'loss_cls': losses_cls,
+ 'loss_pts_init': losses_pts_init,
+ 'loss_pts_refine': losses_pts_refine
+ }
+ return loss_dict_all
+
+ def get_bboxes(self,
+ cls_scores,
+ pts_preds_init,
+ pts_preds_refine,
+ img_metas,
+ cfg,
+ rescale=False,
+ nms=True):
+ assert len(cls_scores) == len(pts_preds_refine)
+ bbox_preds_refine = [
+ self.points2bbox(pts_pred_refine)
+ for pts_pred_refine in pts_preds_refine
+ ]
+ num_levels = len(cls_scores)
+ mlvl_points = [
+ self.point_generators[i].grid_points(cls_scores[i].size()[-2:],
+ self.point_strides[i])
+ for i in range(num_levels)
+ ]
+ result_list = []
+ for img_id in range(len(img_metas)):
+ cls_score_list = [
+ cls_scores[i][img_id].detach() for i in range(num_levels)
+ ]
+ bbox_pred_list = [
+ bbox_preds_refine[i][img_id].detach()
+ for i in range(num_levels)
+ ]
+ img_shape = img_metas[img_id]['img_shape']
+ scale_factor = img_metas[img_id]['scale_factor']
+ proposals = self.get_bboxes_single(cls_score_list, bbox_pred_list,
+ mlvl_points, img_shape,
+ scale_factor, cfg, rescale, nms)
+ result_list.append(proposals)
+ return result_list
+
+ def get_bboxes_single(self,
+ cls_scores,
+ bbox_preds,
+ mlvl_points,
+ img_shape,
+ scale_factor,
+ cfg,
+ rescale=False,
+ nms=True):
+ assert len(cls_scores) == len(bbox_preds) == len(mlvl_points)
+ mlvl_bboxes = []
+ mlvl_scores = []
+ for i_lvl, (cls_score, bbox_pred, points) in enumerate(
+ zip(cls_scores, bbox_preds, mlvl_points)):
+ assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
+ cls_score = cls_score.permute(1, 2,
+ 0).reshape(-1, self.cls_out_channels)
+ if self.use_sigmoid_cls:
+ scores = cls_score.sigmoid()
+ else:
+ scores = cls_score.softmax(-1)
+ bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
+ nms_pre = cfg.get('nms_pre', -1)
+ if nms_pre > 0 and scores.shape[0] > nms_pre:
+ if self.use_sigmoid_cls:
+ max_scores, _ = scores.max(dim=1)
+ else:
+ max_scores, _ = scores[:, 1:].max(dim=1)
+ _, topk_inds = max_scores.topk(nms_pre)
+ points = points[topk_inds, :]
+ bbox_pred = bbox_pred[topk_inds, :]
+ scores = scores[topk_inds, :]
+ bbox_pos_center = torch.cat([points[:, :2], points[:, :2]], dim=1)
+ bboxes = bbox_pred * self.point_strides[i_lvl] + bbox_pos_center
+ x1 = bboxes[:, 0].clamp(min=0, max=img_shape[1])
+ y1 = bboxes[:, 1].clamp(min=0, max=img_shape[0])
+ x2 = bboxes[:, 2].clamp(min=0, max=img_shape[1])
+ y2 = bboxes[:, 3].clamp(min=0, max=img_shape[0])
+ bboxes = torch.stack([x1, y1, x2, y2], dim=-1)
+ mlvl_bboxes.append(bboxes)
+ mlvl_scores.append(scores)
+ mlvl_bboxes = torch.cat(mlvl_bboxes)
+ if rescale:
+ mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
+ mlvl_scores = torch.cat(mlvl_scores)
+ if self.use_sigmoid_cls:
+ padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
+ mlvl_scores = torch.cat([padding, mlvl_scores], dim=1)
+ if nms:
+ det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores,
+ cfg.score_thr, cfg.nms,
+ cfg.max_per_img)
+ return det_bboxes, det_labels
+ else:
+ return mlvl_bboxes, mlvl_scores
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/retina_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/retina_head.py
new file mode 100644
index 000000000..e3b8143ad
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/retina_head.py
@@ -0,0 +1,103 @@
+import numpy as np
+import torch.nn as nn
+from mmcv.cnn import normal_init
+
+from ..registry import HEADS
+from ..utils import ConvModule, bias_init_with_prob
+from .anchor_head import AnchorHead
+
+
+@HEADS.register_module
+class RetinaHead(AnchorHead):
+ """
+ An anchor-based head used in [1]_.
+
+ The head contains two subnetworks. The first classifies anchor boxes and
+ the second regresses deltas for the anchors.
+
+ References:
+ .. [1] https://arxiv.org/pdf/1708.02002.pdf
+
+ Example:
+ >>> import torch
+ >>> self = RetinaHead(11, 7)
+ >>> x = torch.rand(1, 7, 32, 32)
+ >>> cls_score, bbox_pred = self.forward_single(x)
+ >>> # Each anchor predicts a score for each class except background
+ >>> cls_per_anchor = cls_score.shape[1] / self.num_anchors
+ >>> box_per_anchor = bbox_pred.shape[1] / self.num_anchors
+ >>> assert cls_per_anchor == (self.num_classes - 1)
+ >>> assert box_per_anchor == 4
+ """
+
+ def __init__(self,
+ num_classes,
+ in_channels,
+ stacked_convs=4,
+ octave_base_scale=4,
+ scales_per_octave=3,
+ conv_cfg=None,
+ norm_cfg=None,
+ **kwargs):
+ self.stacked_convs = stacked_convs
+ self.octave_base_scale = octave_base_scale
+ self.scales_per_octave = scales_per_octave
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ octave_scales = np.array(
+ [2**(i / scales_per_octave) for i in range(scales_per_octave)])
+ anchor_scales = octave_scales * octave_base_scale
+ super(RetinaHead, self).__init__(
+ num_classes, in_channels, anchor_scales=anchor_scales, **kwargs)
+
+ def _init_layers(self):
+ self.relu = nn.ReLU(inplace=True)
+ self.cls_convs = nn.ModuleList()
+ self.reg_convs = nn.ModuleList()
+ for i in range(self.stacked_convs):
+ chn = self.in_channels if i == 0 else self.feat_channels
+ self.cls_convs.append(
+ ConvModule(
+ chn,
+ self.feat_channels,
+ 3,
+ stride=1,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg))
+ self.reg_convs.append(
+ ConvModule(
+ chn,
+ self.feat_channels,
+ 3,
+ stride=1,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg))
+ self.retina_cls = nn.Conv2d(
+ self.feat_channels,
+ self.num_anchors * self.cls_out_channels,
+ 3,
+ padding=1)
+ self.retina_reg = nn.Conv2d(
+ self.feat_channels, self.num_anchors * 4, 3, padding=1)
+
+ def init_weights(self):
+ for m in self.cls_convs:
+ normal_init(m.conv, std=0.01)
+ for m in self.reg_convs:
+ normal_init(m.conv, std=0.01)
+ bias_cls = bias_init_with_prob(0.01)
+ normal_init(self.retina_cls, std=0.01, bias=bias_cls)
+ normal_init(self.retina_reg, std=0.01)
+
+ def forward_single(self, x):
+ cls_feat = x
+ reg_feat = x
+ for cls_conv in self.cls_convs:
+ cls_feat = cls_conv(cls_feat)
+ for reg_conv in self.reg_convs:
+ reg_feat = reg_conv(reg_feat)
+ cls_score = self.retina_cls(cls_feat)
+ bbox_pred = self.retina_reg(reg_feat)
+ return cls_score, bbox_pred
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/retina_sepbn_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/retina_sepbn_head.py
new file mode 100644
index 000000000..0f0766179
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/retina_sepbn_head.py
@@ -0,0 +1,105 @@
+import numpy as np
+import torch.nn as nn
+from mmcv.cnn import normal_init
+
+from ..registry import HEADS
+from ..utils import ConvModule, bias_init_with_prob
+from .anchor_head import AnchorHead
+
+
+@HEADS.register_module
+class RetinaSepBNHead(AnchorHead):
+ """"RetinaHead with separate BN.
+
+ In RetinaHead, conv/norm layers are shared across different FPN levels,
+ while in RetinaSepBNHead, conv layers are shared across different FPN
+ levels, but BN layers are separated.
+ """
+
+ def __init__(self,
+ num_classes,
+ num_ins,
+ in_channels,
+ stacked_convs=4,
+ octave_base_scale=4,
+ scales_per_octave=3,
+ conv_cfg=None,
+ norm_cfg=None,
+ **kwargs):
+ self.stacked_convs = stacked_convs
+ self.octave_base_scale = octave_base_scale
+ self.scales_per_octave = scales_per_octave
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.num_ins = num_ins
+ octave_scales = np.array(
+ [2**(i / scales_per_octave) for i in range(scales_per_octave)])
+ anchor_scales = octave_scales * octave_base_scale
+ super(RetinaSepBNHead, self).__init__(
+ num_classes, in_channels, anchor_scales=anchor_scales, **kwargs)
+
+ def _init_layers(self):
+ self.relu = nn.ReLU(inplace=True)
+ self.cls_convs = nn.ModuleList()
+ self.reg_convs = nn.ModuleList()
+ for i in range(self.num_ins):
+ cls_convs = nn.ModuleList()
+ reg_convs = nn.ModuleList()
+ for i in range(self.stacked_convs):
+ chn = self.in_channels if i == 0 else self.feat_channels
+ cls_convs.append(
+ ConvModule(
+ chn,
+ self.feat_channels,
+ 3,
+ stride=1,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg))
+ reg_convs.append(
+ ConvModule(
+ chn,
+ self.feat_channels,
+ 3,
+ stride=1,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg))
+ self.cls_convs.append(cls_convs)
+ self.reg_convs.append(reg_convs)
+ for i in range(self.stacked_convs):
+ for j in range(1, self.num_ins):
+ self.cls_convs[j][i].conv = self.cls_convs[0][i].conv
+ self.reg_convs[j][i].conv = self.reg_convs[0][i].conv
+ self.retina_cls = nn.Conv2d(
+ self.feat_channels,
+ self.num_anchors * self.cls_out_channels,
+ 3,
+ padding=1)
+ self.retina_reg = nn.Conv2d(
+ self.feat_channels, self.num_anchors * 4, 3, padding=1)
+
+ def init_weights(self):
+ for m in self.cls_convs[0]:
+ normal_init(m.conv, std=0.01)
+ for m in self.reg_convs[0]:
+ normal_init(m.conv, std=0.01)
+ bias_cls = bias_init_with_prob(0.01)
+ normal_init(self.retina_cls, std=0.01, bias=bias_cls)
+ normal_init(self.retina_reg, std=0.01)
+
+ def forward(self, feats):
+ cls_scores = []
+ bbox_preds = []
+ for i, x in enumerate(feats):
+ cls_feat = feats[i]
+ reg_feat = feats[i]
+ for cls_conv in self.cls_convs[i]:
+ cls_feat = cls_conv(cls_feat)
+ for reg_conv in self.reg_convs[i]:
+ reg_feat = reg_conv(reg_feat)
+ cls_score = self.retina_cls(cls_feat)
+ bbox_pred = self.retina_reg(reg_feat)
+ cls_scores.append(cls_score)
+ bbox_preds.append(bbox_pred)
+ return cls_scores, bbox_preds
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/rpn_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/rpn_head.py
new file mode 100644
index 000000000..f88b949cf
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/rpn_head.py
@@ -0,0 +1,104 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import normal_init
+
+from mmdet.core import delta2bbox
+from mmdet.ops import nms
+from ..registry import HEADS
+from .anchor_head import AnchorHead
+
+
+@HEADS.register_module
+class RPNHead(AnchorHead):
+
+ def __init__(self, in_channels, **kwargs):
+ super(RPNHead, self).__init__(2, in_channels, **kwargs)
+
+ def _init_layers(self):
+ self.rpn_conv = nn.Conv2d(
+ self.in_channels, self.feat_channels, 3, padding=1)
+ self.rpn_cls = nn.Conv2d(self.feat_channels,
+ self.num_anchors * self.cls_out_channels, 1)
+ self.rpn_reg = nn.Conv2d(self.feat_channels, self.num_anchors * 4, 1)
+
+ def init_weights(self):
+ normal_init(self.rpn_conv, std=0.01)
+ normal_init(self.rpn_cls, std=0.01)
+ normal_init(self.rpn_reg, std=0.01)
+
+ def forward_single(self, x):
+ x = self.rpn_conv(x)
+ x = F.relu(x, inplace=True)
+ rpn_cls_score = self.rpn_cls(x)
+ rpn_bbox_pred = self.rpn_reg(x)
+ return rpn_cls_score, rpn_bbox_pred
+
+ def loss(self,
+ cls_scores,
+ bbox_preds,
+ gt_bboxes,
+ img_metas,
+ cfg,
+ gt_bboxes_ignore=None):
+ losses = super(RPNHead, self).loss(
+ cls_scores,
+ bbox_preds,
+ gt_bboxes,
+ None,
+ img_metas,
+ cfg,
+ gt_bboxes_ignore=gt_bboxes_ignore)
+ return dict(
+ loss_rpn_cls=losses['loss_cls'], loss_rpn_bbox=losses['loss_bbox'])
+
+ def get_bboxes_single(self,
+ cls_scores,
+ bbox_preds,
+ mlvl_anchors,
+ img_shape,
+ scale_factor,
+ cfg,
+ rescale=False):
+ mlvl_proposals = []
+ for idx in range(len(cls_scores)):
+ rpn_cls_score = cls_scores[idx]
+ rpn_bbox_pred = bbox_preds[idx]
+ assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:]
+ rpn_cls_score = rpn_cls_score.permute(1, 2, 0)
+ if self.use_sigmoid_cls:
+ rpn_cls_score = rpn_cls_score.reshape(-1)
+ scores = rpn_cls_score.sigmoid()
+ else:
+ rpn_cls_score = rpn_cls_score.reshape(-1, 2)
+ scores = rpn_cls_score.softmax(dim=1)[:, 1]
+ rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4)
+ anchors = mlvl_anchors[idx]
+ if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre:
+ _, topk_inds = scores.topk(cfg.nms_pre)
+ rpn_bbox_pred = rpn_bbox_pred[topk_inds, :]
+ anchors = anchors[topk_inds, :]
+ scores = scores[topk_inds]
+ proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means,
+ self.target_stds, img_shape)
+ if cfg.min_bbox_size > 0:
+ w = proposals[:, 2] - proposals[:, 0] + 1
+ h = proposals[:, 3] - proposals[:, 1] + 1
+ valid_inds = torch.nonzero((w >= cfg.min_bbox_size) &
+ (h >= cfg.min_bbox_size)).squeeze()
+ proposals = proposals[valid_inds, :]
+ scores = scores[valid_inds]
+ proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1)
+ proposals, _ = nms(proposals, cfg.nms_thr)
+ proposals = proposals[:cfg.nms_post, :]
+ mlvl_proposals.append(proposals)
+ proposals = torch.cat(mlvl_proposals, 0)
+ if cfg.nms_across_levels:
+ proposals, _ = nms(proposals, cfg.nms_thr)
+ proposals = proposals[:cfg.max_num, :]
+ else:
+ scores = proposals[:, 4]
+ num = min(cfg.max_num, proposals.shape[0])
+ _, topk_inds = scores.topk(num)
+ proposals = proposals[topk_inds, :]
+ return proposals
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/solo_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/solo_head.py
new file mode 100644
index 000000000..e6c060726
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/solo_head.py
@@ -0,0 +1,433 @@
+import mmcv
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import normal_init
+from mmdet.ops import DeformConv, roi_align
+from mmdet.core import multi_apply, bbox2roi, matrix_nms
+from ..builder import build_loss
+from ..registry import HEADS
+from ..utils import bias_init_with_prob, ConvModule
+
+INF = 1e8
+
+def center_of_mass(bitmasks):
+ _, h, w = bitmasks.size()
+ ys = torch.arange(0, h, dtype=torch.float32, device=bitmasks.device)
+ xs = torch.arange(0, w, dtype=torch.float32, device=bitmasks.device)
+
+ m00 = bitmasks.sum(dim=-1).sum(dim=-1).clamp(min=1e-6)
+ m10 = (bitmasks * xs).sum(dim=-1).sum(dim=-1)
+ m01 = (bitmasks * ys[:, None]).sum(dim=-1).sum(dim=-1)
+ center_x = m10 / m00
+ center_y = m01 / m00
+ return center_x, center_y
+
+def points_nms(heat, kernel=2):
+ # kernel must be 2
+ hmax = nn.functional.max_pool2d(
+ heat, (kernel, kernel), stride=1, padding=1)
+ keep = (hmax[:, :, :-1, :-1] == heat).float()
+ return heat * keep
+
+def dice_loss(input, target):
+ input = input.contiguous().view(input.size()[0], -1)
+ target = target.contiguous().view(target.size()[0], -1).float()
+
+ a = torch.sum(input * target, 1)
+ b = torch.sum(input * input, 1) + 0.001
+ c = torch.sum(target * target, 1) + 0.001
+ d = (2 * a) / (b + c)
+ return 1-d
+
+@HEADS.register_module
+class SOLOHead(nn.Module):
+
+ def __init__(self,
+ num_classes,
+ in_channels,
+ seg_feat_channels=256,
+ stacked_convs=4,
+ strides=(4, 8, 16, 32, 64),
+ base_edge_list=(16, 32, 64, 128, 256),
+ scale_ranges=((8, 32), (16, 64), (32, 128), (64, 256), (128, 512)),
+ sigma=0.4,
+ num_grids=None,
+ cate_down_pos=0,
+ with_deform=False,
+ loss_ins=None,
+ loss_cate=None,
+ conv_cfg=None,
+ norm_cfg=None):
+ super(SOLOHead, self).__init__()
+ self.num_classes = num_classes
+ self.seg_num_grids = num_grids
+ self.cate_out_channels = self.num_classes - 1
+ self.in_channels = in_channels
+ self.seg_feat_channels = seg_feat_channels
+ self.stacked_convs = stacked_convs
+ self.strides = strides
+ self.sigma = sigma
+ self.cate_down_pos = cate_down_pos
+ self.base_edge_list = base_edge_list
+ self.scale_ranges = scale_ranges
+ self.with_deform = with_deform
+ self.loss_cate = build_loss(loss_cate)
+ self.ins_loss_weight = loss_ins['loss_weight']
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self._init_layers()
+
+ def _init_layers(self):
+ norm_cfg = dict(type='GN', num_groups=32, requires_grad=True)
+ self.ins_convs = nn.ModuleList()
+ self.cate_convs = nn.ModuleList()
+ for i in range(self.stacked_convs):
+ chn = self.in_channels + 2 if i == 0 else self.seg_feat_channels
+ self.ins_convs.append(
+ ConvModule(
+ chn,
+ self.seg_feat_channels,
+ 3,
+ stride=1,
+ padding=1,
+ norm_cfg=norm_cfg,
+ bias=norm_cfg is None))
+
+ chn = self.in_channels if i == 0 else self.seg_feat_channels
+ self.cate_convs.append(
+ ConvModule(
+ chn,
+ self.seg_feat_channels,
+ 3,
+ stride=1,
+ padding=1,
+ norm_cfg=norm_cfg,
+ bias=norm_cfg is None))
+
+ self.solo_ins_list = nn.ModuleList()
+ for seg_num_grid in self.seg_num_grids:
+ self.solo_ins_list.append(
+ nn.Conv2d(
+ self.seg_feat_channels, seg_num_grid**2, 1))
+
+ self.solo_cate = nn.Conv2d(
+ self.seg_feat_channels, self.cate_out_channels, 3, padding=1)
+
+ def init_weights(self):
+ for m in self.ins_convs:
+ normal_init(m.conv, std=0.01)
+ for m in self.cate_convs:
+ normal_init(m.conv, std=0.01)
+ bias_ins = bias_init_with_prob(0.01)
+ for m in self.solo_ins_list:
+ normal_init(m, std=0.01, bias=bias_ins)
+ bias_cate = bias_init_with_prob(0.01)
+ normal_init(self.solo_cate, std=0.01, bias=bias_cate)
+
+ def forward(self, feats, eval=False):
+ new_feats = self.split_feats(feats)
+ featmap_sizes = [featmap.size()[-2:] for featmap in new_feats]
+ upsampled_size = (featmap_sizes[0][0] * 2, featmap_sizes[0][1] * 2)
+ ins_pred, cate_pred = multi_apply(self.forward_single, new_feats,
+ list(range(len(self.seg_num_grids))),
+ eval=eval, upsampled_size=upsampled_size)
+ return ins_pred, cate_pred
+
+ def split_feats(self, feats):
+ return (F.interpolate(feats[0], scale_factor=0.5, mode='bilinear'),
+ feats[1],
+ feats[2],
+ feats[3],
+ F.interpolate(feats[4], size=feats[3].shape[-2:], mode='bilinear'))
+
+ def forward_single(self, x, idx, eval=False, upsampled_size=None):
+ ins_feat = x
+ cate_feat = x
+ # ins branch
+ # concat coord
+ x_range = torch.linspace(-1, 1, ins_feat.shape[-1], device=ins_feat.device)
+ y_range = torch.linspace(-1, 1, ins_feat.shape[-2], device=ins_feat.device)
+ y, x = torch.meshgrid(y_range, x_range)
+ y = y.expand([ins_feat.shape[0], 1, -1, -1])
+ x = x.expand([ins_feat.shape[0], 1, -1, -1])
+ coord_feat = torch.cat([x, y], 1)
+ ins_feat = torch.cat([ins_feat, coord_feat], 1)
+
+ for i, ins_layer in enumerate(self.ins_convs):
+ ins_feat = ins_layer(ins_feat)
+
+ ins_feat = F.interpolate(ins_feat, scale_factor=2, mode='bilinear')
+ ins_pred = self.solo_ins_list[idx](ins_feat)
+
+ # cate branch
+ for i, cate_layer in enumerate(self.cate_convs):
+ if i == self.cate_down_pos:
+ seg_num_grid = self.seg_num_grids[idx]
+ cate_feat = F.interpolate(cate_feat, size=seg_num_grid, mode='bilinear')
+ cate_feat = cate_layer(cate_feat)
+
+ cate_pred = self.solo_cate(cate_feat)
+ if eval:
+ ins_pred = F.interpolate(ins_pred.sigmoid(), size=upsampled_size, mode='bilinear')
+ cate_pred = points_nms(cate_pred.sigmoid(), kernel=2).permute(0, 2, 3, 1)
+ return ins_pred, cate_pred
+
+ def loss(self,
+ ins_preds,
+ cate_preds,
+ gt_bbox_list,
+ gt_label_list,
+ gt_mask_list,
+ img_metas,
+ cfg,
+ gt_bboxes_ignore=None):
+ featmap_sizes = [featmap.size()[-2:] for featmap in
+ ins_preds]
+ ins_label_list, cate_label_list, ins_ind_label_list = multi_apply(
+ self.solo_target_single,
+ gt_bbox_list,
+ gt_label_list,
+ gt_mask_list,
+ featmap_sizes=featmap_sizes)
+
+ # ins
+ ins_labels = [torch.cat([ins_labels_level_img[ins_ind_labels_level_img, ...]
+ for ins_labels_level_img, ins_ind_labels_level_img in
+ zip(ins_labels_level, ins_ind_labels_level)], 0)
+ for ins_labels_level, ins_ind_labels_level in zip(zip(*ins_label_list), zip(*ins_ind_label_list))]
+
+ ins_preds = [torch.cat([ins_preds_level_img[ins_ind_labels_level_img, ...]
+ for ins_preds_level_img, ins_ind_labels_level_img in
+ zip(ins_preds_level, ins_ind_labels_level)], 0)
+ for ins_preds_level, ins_ind_labels_level in zip(ins_preds, zip(*ins_ind_label_list))]
+
+
+ ins_ind_labels = [
+ torch.cat([ins_ind_labels_level_img.flatten()
+ for ins_ind_labels_level_img in ins_ind_labels_level])
+ for ins_ind_labels_level in zip(*ins_ind_label_list)
+ ]
+ flatten_ins_ind_labels = torch.cat(ins_ind_labels)
+
+ num_ins = flatten_ins_ind_labels.sum()
+
+ # dice loss
+ loss_ins = []
+ for input, target in zip(ins_preds, ins_labels):
+ if input.size()[0] == 0:
+ continue
+ input = torch.sigmoid(input)
+ loss_ins.append(dice_loss(input, target))
+ loss_ins = torch.cat(loss_ins).mean()
+ loss_ins = loss_ins * self.ins_loss_weight
+
+ # cate
+ cate_labels = [
+ torch.cat([cate_labels_level_img.flatten()
+ for cate_labels_level_img in cate_labels_level])
+ for cate_labels_level in zip(*cate_label_list)
+ ]
+ flatten_cate_labels = torch.cat(cate_labels)
+
+ cate_preds = [
+ cate_pred.permute(0, 2, 3, 1).reshape(-1, self.cate_out_channels)
+ for cate_pred in cate_preds
+ ]
+ flatten_cate_preds = torch.cat(cate_preds)
+
+ loss_cate = self.loss_cate(flatten_cate_preds, flatten_cate_labels, avg_factor=num_ins + 1)
+ return dict(
+ loss_ins=loss_ins,
+ loss_cate=loss_cate)
+
+ def solo_target_single(self,
+ gt_bboxes_raw,
+ gt_labels_raw,
+ gt_masks_raw,
+ featmap_sizes=None):
+
+ device = gt_labels_raw[0].device
+
+ # ins
+ gt_areas = torch.sqrt((gt_bboxes_raw[:, 2] - gt_bboxes_raw[:, 0]) * (
+ gt_bboxes_raw[:, 3] - gt_bboxes_raw[:, 1]))
+
+ ins_label_list = []
+ cate_label_list = []
+ ins_ind_label_list = []
+ for (lower_bound, upper_bound), stride, featmap_size, num_grid \
+ in zip(self.scale_ranges, self.strides, featmap_sizes, self.seg_num_grids):
+
+ ins_label = torch.zeros([num_grid ** 2, featmap_size[0], featmap_size[1]], dtype=torch.uint8, device=device)
+ cate_label = torch.zeros([num_grid, num_grid], dtype=torch.int64, device=device)
+ ins_ind_label = torch.zeros([num_grid ** 2], dtype=torch.bool, device=device)
+
+ hit_indices = ((gt_areas >= lower_bound) & (gt_areas <= upper_bound)).nonzero().flatten()
+ if len(hit_indices) == 0:
+ ins_label_list.append(ins_label)
+ cate_label_list.append(cate_label)
+ ins_ind_label_list.append(ins_ind_label)
+ continue
+ gt_bboxes = gt_bboxes_raw[hit_indices]
+ gt_labels = gt_labels_raw[hit_indices]
+ gt_masks = gt_masks_raw[hit_indices.cpu().numpy(), ...]
+
+ half_ws = 0.5 * (gt_bboxes[:, 2] - gt_bboxes[:, 0]) * self.sigma
+ half_hs = 0.5 * (gt_bboxes[:, 3] - gt_bboxes[:, 1]) * self.sigma
+
+ # mass center
+ gt_masks_pt = torch.from_numpy(gt_masks).to(device=device)
+ center_ws, center_hs = center_of_mass(gt_masks_pt)
+ valid_mask_flags = gt_masks_pt.sum(dim=-1).sum(dim=-1) > 0
+
+ output_stride = stride / 2
+ for seg_mask, gt_label, half_h, half_w, center_h, center_w, valid_mask_flag in zip(gt_masks, gt_labels, half_hs, half_ws, center_hs, center_ws, valid_mask_flags):
+ if not valid_mask_flag:
+ continue
+ upsampled_size = (featmap_sizes[0][0] * 4, featmap_sizes[0][1] * 4)
+ coord_w = int((center_w / upsampled_size[1]) // (1. / num_grid))
+ coord_h = int((center_h / upsampled_size[0]) // (1. / num_grid))
+
+ # left, top, right, down
+ top_box = max(0, int(((center_h - half_h) / upsampled_size[0]) // (1. / num_grid)))
+ down_box = min(num_grid - 1, int(((center_h + half_h) / upsampled_size[0]) // (1. / num_grid)))
+ left_box = max(0, int(((center_w - half_w) / upsampled_size[1]) // (1. / num_grid)))
+ right_box = min(num_grid - 1, int(((center_w + half_w) / upsampled_size[1]) // (1. / num_grid)))
+
+ top = max(top_box, coord_h-1)
+ down = min(down_box, coord_h+1)
+ left = max(coord_w-1, left_box)
+ right = min(right_box, coord_w+1)
+
+ cate_label[top:(down+1), left:(right+1)] = gt_label
+ # ins
+ seg_mask = mmcv.imrescale(seg_mask, scale=1. / output_stride)
+ seg_mask = torch.from_numpy(seg_mask).to(device=device)
+ for i in range(top, down+1):
+ for j in range(left, right+1):
+ label = int(i * num_grid + j)
+ ins_label[label, :seg_mask.shape[0], :seg_mask.shape[1]] = seg_mask
+ ins_ind_label[label] = True
+ ins_label_list.append(ins_label)
+ cate_label_list.append(cate_label)
+ ins_ind_label_list.append(ins_ind_label)
+ return ins_label_list, cate_label_list, ins_ind_label_list
+
+ def get_seg(self, seg_preds, cate_preds, img_metas, cfg, rescale=None):
+ assert len(seg_preds) == len(cate_preds)
+ num_levels = len(cate_preds)
+ featmap_size = seg_preds[0].size()[-2:]
+
+ result_list = []
+ for img_id in range(len(img_metas)):
+ cate_pred_list = [
+ cate_preds[i][img_id].view(-1, self.cate_out_channels).detach() for i in range(num_levels)
+ ]
+ seg_pred_list = [
+ seg_preds[i][img_id].detach() for i in range(num_levels)
+ ]
+ img_shape = img_metas[img_id]['img_shape']
+ scale_factor = img_metas[img_id]['scale_factor']
+ ori_shape = img_metas[img_id]['ori_shape']
+
+ cate_pred_list = torch.cat(cate_pred_list, dim=0)
+ seg_pred_list = torch.cat(seg_pred_list, dim=0)
+
+ result = self.get_seg_single(cate_pred_list, seg_pred_list,
+ featmap_size, img_shape, ori_shape, scale_factor, cfg, rescale)
+ result_list.append(result)
+ return result_list
+
+ def get_seg_single(self,
+ cate_preds,
+ seg_preds,
+ featmap_size,
+ img_shape,
+ ori_shape,
+ scale_factor,
+ cfg,
+ rescale=False, debug=False):
+ assert len(cate_preds) == len(seg_preds)
+
+ # overall info.
+ h, w, _ = img_shape
+ upsampled_size_out = (featmap_size[0] * 4, featmap_size[1] * 4)
+
+ # process.
+ inds = (cate_preds > cfg.score_thr)
+ # category scores.
+ cate_scores = cate_preds[inds]
+ if len(cate_scores) == 0:
+ return None
+ # category labels.
+ inds = inds.nonzero()
+ cate_labels = inds[:, 1]
+
+ # strides.
+ size_trans = cate_labels.new_tensor(self.seg_num_grids).pow(2).cumsum(0)
+ strides = cate_scores.new_ones(size_trans[-1])
+ n_stage = len(self.seg_num_grids)
+ strides[:size_trans[0]] *= self.strides[0]
+ for ind_ in range(1, n_stage):
+ strides[size_trans[ind_ - 1]:size_trans[ind_]] *= self.strides[ind_]
+ strides = strides[inds[:, 0]]
+
+ # masks.
+ seg_preds = seg_preds[inds[:, 0]]
+ seg_masks = seg_preds > cfg.mask_thr
+ sum_masks = seg_masks.sum((1, 2)).float()
+
+ # filter.
+ keep = sum_masks > strides
+ if keep.sum() == 0:
+ return None
+
+ seg_masks = seg_masks[keep, ...]
+ seg_preds = seg_preds[keep, ...]
+ sum_masks = sum_masks[keep]
+ cate_scores = cate_scores[keep]
+ cate_labels = cate_labels[keep]
+
+ # maskness.
+ seg_scores = (seg_preds * seg_masks.float()).sum((1, 2)) / sum_masks
+ cate_scores *= seg_scores
+
+ # sort and keep top nms_pre
+ sort_inds = torch.argsort(cate_scores, descending=True)
+ if len(sort_inds) > cfg.nms_pre:
+ sort_inds = sort_inds[:cfg.nms_pre]
+ seg_masks = seg_masks[sort_inds, :, :]
+ seg_preds = seg_preds[sort_inds, :, :]
+ sum_masks = sum_masks[sort_inds]
+ cate_scores = cate_scores[sort_inds]
+ cate_labels = cate_labels[sort_inds]
+
+ # Matrix NMS
+ cate_scores = matrix_nms(seg_masks, cate_labels, cate_scores,
+ kernel=cfg.kernel, sigma=cfg.sigma, sum_masks=sum_masks)
+
+ # filter.
+ keep = cate_scores >= cfg.update_thr
+ if keep.sum() == 0:
+ return None
+ seg_preds = seg_preds[keep, :, :]
+ cate_scores = cate_scores[keep]
+ cate_labels = cate_labels[keep]
+
+ # sort and keep top_k
+ sort_inds = torch.argsort(cate_scores, descending=True)
+ if len(sort_inds) > cfg.max_per_img:
+ sort_inds = sort_inds[:cfg.max_per_img]
+ seg_preds = seg_preds[sort_inds, :, :]
+ cate_scores = cate_scores[sort_inds]
+ cate_labels = cate_labels[sort_inds]
+
+ seg_preds = F.interpolate(seg_preds.unsqueeze(0),
+ size=upsampled_size_out,
+ mode='bilinear')[:, :, :h, :w]
+ seg_masks = F.interpolate(seg_preds,
+ size=ori_shape[:2],
+ mode='bilinear').squeeze(0)
+ seg_masks = seg_masks > cfg.mask_thr
+ return seg_masks, cate_labels, cate_scores
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/solov2_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/solov2_head.py
new file mode 100644
index 000000000..9616b99b1
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/solov2_head.py
@@ -0,0 +1,483 @@
+import mmcv
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import normal_init
+from mmdet.ops import DeformConv, roi_align
+from mmdet.core import multi_apply, matrix_nms
+from ..builder import build_loss
+from ..registry import HEADS
+from ..utils import bias_init_with_prob, ConvModule
+
+INF = 1e8
+
+def center_of_mass(bitmasks):
+ _, h, w = bitmasks.size()
+ ys = torch.arange(0, h, dtype=torch.float32, device=bitmasks.device)
+ xs = torch.arange(0, w, dtype=torch.float32, device=bitmasks.device)
+
+ m00 = bitmasks.sum(dim=-1).sum(dim=-1).clamp(min=1e-6)
+ m10 = (bitmasks * xs).sum(dim=-1).sum(dim=-1)
+ m01 = (bitmasks * ys[:, None]).sum(dim=-1).sum(dim=-1)
+ center_x = m10 / m00
+ center_y = m01 / m00
+ return center_x, center_y
+
+def points_nms(heat, kernel=2):
+ # kernel must be 2
+ hmax = nn.functional.max_pool2d(
+ heat, (kernel, kernel), stride=1, padding=1)
+ keep = (hmax[:, :, :-1, :-1] == heat).float()
+ return heat * keep
+
+def dice_loss(input, target):
+ input = input.contiguous().view(input.size()[0], -1)
+ target = target.contiguous().view(target.size()[0], -1).float()
+
+ a = torch.sum(input * target, 1)
+ b = torch.sum(input * input, 1) + 0.001
+ c = torch.sum(target * target, 1) + 0.001
+ d = (2 * a) / (b + c)
+ return 1-d
+
+@HEADS.register_module
+class SOLOv2Head(nn.Module):
+
+ def __init__(self,
+ num_classes,
+ in_channels,
+ seg_feat_channels=256,
+ stacked_convs=4,
+ strides=(4, 8, 16, 32, 64),
+ base_edge_list=(16, 32, 64, 128, 256),
+ scale_ranges=((8, 32), (16, 64), (32, 128), (64, 256), (128, 512)),
+ sigma=0.2,
+ num_grids=None,
+ ins_out_channels=64,
+ loss_ins=None,
+ loss_cate=None,
+ conv_cfg=None,
+ norm_cfg=None,
+ use_dcn_in_tower=False,
+ type_dcn=None):
+ super(SOLOv2Head, self).__init__()
+ self.num_classes = num_classes
+ self.seg_num_grids = num_grids
+ self.cate_out_channels = self.num_classes - 1
+ self.ins_out_channels = ins_out_channels
+ self.in_channels = in_channels
+ self.seg_feat_channels = seg_feat_channels
+ self.stacked_convs = stacked_convs
+ self.strides = strides
+ self.sigma = sigma
+ self.stacked_convs = stacked_convs
+ self.kernel_out_channels = self.ins_out_channels * 1 * 1
+ self.base_edge_list = base_edge_list
+ self.scale_ranges = scale_ranges
+ self.loss_cate = build_loss(loss_cate)
+ self.ins_loss_weight = loss_ins['loss_weight']
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.use_dcn_in_tower = use_dcn_in_tower
+ self.type_dcn = type_dcn
+ self._init_layers()
+
+ def _init_layers(self):
+ norm_cfg = dict(type='GN', num_groups=32, requires_grad=True)
+ self.cate_convs = nn.ModuleList()
+ self.kernel_convs = nn.ModuleList()
+ for i in range(self.stacked_convs):
+ if self.use_dcn_in_tower:
+ cfg_conv = dict(type=self.type_dcn)
+ else:
+ cfg_conv = self.conv_cfg
+
+ chn = self.in_channels + 2 if i == 0 else self.seg_feat_channels
+ self.kernel_convs.append(
+ ConvModule(
+ chn,
+ self.seg_feat_channels,
+ 3,
+ stride=1,
+ padding=1,
+ conv_cfg=cfg_conv,
+ norm_cfg=norm_cfg,
+ bias=norm_cfg is None))
+
+ chn = self.in_channels if i == 0 else self.seg_feat_channels
+ self.cate_convs.append(
+ ConvModule(
+ chn,
+ self.seg_feat_channels,
+ 3,
+ stride=1,
+ padding=1,
+ conv_cfg=cfg_conv,
+ norm_cfg=norm_cfg,
+ bias=norm_cfg is None))
+
+ self.solo_cate = nn.Conv2d(
+ self.seg_feat_channels, self.cate_out_channels, 3, padding=1)
+
+ self.solo_kernel = nn.Conv2d(
+ self.seg_feat_channels, self.kernel_out_channels, 3, padding=1)
+
+ def init_weights(self):
+ for m in self.cate_convs:
+ normal_init(m.conv, std=0.01)
+ for m in self.kernel_convs:
+ normal_init(m.conv, std=0.01)
+ bias_cate = bias_init_with_prob(0.01)
+ normal_init(self.solo_cate, std=0.01, bias=bias_cate)
+ normal_init(self.solo_kernel, std=0.01)
+
+ def forward(self, feats, eval=False):
+ new_feats = self.split_feats(feats)
+ featmap_sizes = [featmap.size()[-2:] for featmap in new_feats]
+ upsampled_size = (featmap_sizes[0][0] * 2, featmap_sizes[0][1] * 2)
+ cate_pred, kernel_pred = multi_apply(self.forward_single, new_feats,
+ list(range(len(self.seg_num_grids))),
+ eval=eval, upsampled_size=upsampled_size)
+ return cate_pred, kernel_pred
+
+ def split_feats(self, feats):
+ return (F.interpolate(feats[0], scale_factor=0.5, mode='bilinear'),
+ feats[1],
+ feats[2],
+ feats[3],
+ F.interpolate(feats[4], size=feats[3].shape[-2:], mode='bilinear'))
+
+ def forward_single(self, x, idx, eval=False, upsampled_size=None):
+ ins_kernel_feat = x
+ # ins branch
+ # concat coord
+ x_range = torch.linspace(-1, 1, ins_kernel_feat.shape[-1], device=ins_kernel_feat.device)
+ y_range = torch.linspace(-1, 1, ins_kernel_feat.shape[-2], device=ins_kernel_feat.device)
+ y, x = torch.meshgrid(y_range, x_range)
+ y = y.expand([ins_kernel_feat.shape[0], 1, -1, -1])
+ x = x.expand([ins_kernel_feat.shape[0], 1, -1, -1])
+ coord_feat = torch.cat([x, y], 1)
+ ins_kernel_feat = torch.cat([ins_kernel_feat, coord_feat], 1)
+
+ # kernel branch
+ kernel_feat = ins_kernel_feat
+ seg_num_grid = self.seg_num_grids[idx]
+ kernel_feat = F.interpolate(kernel_feat, size=seg_num_grid, mode='bilinear')
+
+ cate_feat = kernel_feat[:, :-2, :, :]
+
+ kernel_feat = kernel_feat.contiguous()
+ for i, kernel_layer in enumerate(self.kernel_convs):
+ kernel_feat = kernel_layer(kernel_feat)
+ kernel_pred = self.solo_kernel(kernel_feat)
+
+ # cate branch
+ cate_feat = cate_feat.contiguous()
+ for i, cate_layer in enumerate(self.cate_convs):
+ cate_feat = cate_layer(cate_feat)
+ cate_pred = self.solo_cate(cate_feat)
+
+ if eval:
+ cate_pred = points_nms(cate_pred.sigmoid(), kernel=2).permute(0, 2, 3, 1)
+ return cate_pred, kernel_pred
+
+ def loss(self,
+ cate_preds,
+ kernel_preds,
+ ins_pred,
+ gt_bbox_list,
+ gt_label_list,
+ gt_mask_list,
+ img_metas,
+ cfg,
+ gt_bboxes_ignore=None):
+ mask_feat_size = ins_pred.size()[-2:]
+ ins_label_list, cate_label_list, ins_ind_label_list, grid_order_list = multi_apply(
+ self.solov2_target_single,
+ gt_bbox_list,
+ gt_label_list,
+ gt_mask_list,
+ mask_feat_size=mask_feat_size)
+
+ # ins
+ ins_labels = [torch.cat([ins_labels_level_img
+ for ins_labels_level_img in ins_labels_level], 0)
+ for ins_labels_level in zip(*ins_label_list)]
+
+ kernel_preds = [[kernel_preds_level_img.view(kernel_preds_level_img.shape[0], -1)[:, grid_orders_level_img]
+ for kernel_preds_level_img, grid_orders_level_img in
+ zip(kernel_preds_level, grid_orders_level)]
+ for kernel_preds_level, grid_orders_level in zip(kernel_preds, zip(*grid_order_list))]
+ # generate masks
+ ins_pred = ins_pred
+ ins_pred_list = []
+ for b_kernel_pred in kernel_preds:
+ b_mask_pred = []
+ for idx, kernel_pred in enumerate(b_kernel_pred):
+
+ if kernel_pred.size()[-1] == 0:
+ continue
+ cur_ins_pred = ins_pred[idx, ...]
+ H, W = cur_ins_pred.shape[-2:]
+ N, I = kernel_pred.shape
+ cur_ins_pred = cur_ins_pred.unsqueeze(0)
+ kernel_pred = kernel_pred.permute(1, 0).view(I, -1, 1, 1)
+ cur_ins_pred = F.conv2d(cur_ins_pred, kernel_pred, stride=1).view(-1, H, W)
+ b_mask_pred.append(cur_ins_pred)
+ if len(b_mask_pred) == 0:
+ b_mask_pred = None
+ else:
+ b_mask_pred = torch.cat(b_mask_pred, 0)
+ ins_pred_list.append(b_mask_pred)
+
+ ins_ind_labels = [
+ torch.cat([ins_ind_labels_level_img.flatten()
+ for ins_ind_labels_level_img in ins_ind_labels_level])
+ for ins_ind_labels_level in zip(*ins_ind_label_list)
+ ]
+ flatten_ins_ind_labels = torch.cat(ins_ind_labels)
+
+ num_ins = flatten_ins_ind_labels.sum()
+
+ # dice loss
+ loss_ins = []
+ for input, target in zip(ins_pred_list, ins_labels):
+ if input is None:
+ continue
+ input = torch.sigmoid(input)
+ loss_ins.append(dice_loss(input, target))
+ loss_ins = torch.cat(loss_ins).mean()
+ loss_ins = loss_ins * self.ins_loss_weight
+
+ # cate
+ cate_labels = [
+ torch.cat([cate_labels_level_img.flatten()
+ for cate_labels_level_img in cate_labels_level])
+ for cate_labels_level in zip(*cate_label_list)
+ ]
+ flatten_cate_labels = torch.cat(cate_labels)
+
+ cate_preds = [
+ cate_pred.permute(0, 2, 3, 1).reshape(-1, self.cate_out_channels)
+ for cate_pred in cate_preds
+ ]
+ flatten_cate_preds = torch.cat(cate_preds)
+
+ loss_cate = self.loss_cate(flatten_cate_preds, flatten_cate_labels, avg_factor=num_ins + 1)
+ return dict(
+ loss_ins=loss_ins,
+ loss_cate=loss_cate)
+
+ def solov2_target_single(self,
+ gt_bboxes_raw,
+ gt_labels_raw,
+ gt_masks_raw,
+ mask_feat_size):
+
+ device = gt_labels_raw[0].device
+
+ # ins
+ gt_areas = torch.sqrt((gt_bboxes_raw[:, 2] - gt_bboxes_raw[:, 0]) * (
+ gt_bboxes_raw[:, 3] - gt_bboxes_raw[:, 1]))
+
+ ins_label_list = []
+ cate_label_list = []
+ ins_ind_label_list = []
+ grid_order_list = []
+ for (lower_bound, upper_bound), stride, num_grid \
+ in zip(self.scale_ranges, self.strides, self.seg_num_grids):
+
+ hit_indices = ((gt_areas >= lower_bound) & (gt_areas <= upper_bound)).nonzero().flatten()
+ num_ins = len(hit_indices)
+
+ ins_label = []
+ grid_order = []
+ cate_label = torch.zeros([num_grid, num_grid], dtype=torch.int64, device=device)
+ ins_ind_label = torch.zeros([num_grid ** 2], dtype=torch.bool, device=device)
+
+ if num_ins == 0:
+ ins_label = torch.zeros([0, mask_feat_size[0], mask_feat_size[1]], dtype=torch.uint8, device=device)
+ ins_label_list.append(ins_label)
+ cate_label_list.append(cate_label)
+ ins_ind_label_list.append(ins_ind_label)
+ grid_order_list.append([])
+ continue
+ gt_bboxes = gt_bboxes_raw[hit_indices]
+ gt_labels = gt_labels_raw[hit_indices]
+ gt_masks = gt_masks_raw[hit_indices.cpu().numpy(), ...]
+
+ half_ws = 0.5 * (gt_bboxes[:, 2] - gt_bboxes[:, 0]) * self.sigma
+ half_hs = 0.5 * (gt_bboxes[:, 3] - gt_bboxes[:, 1]) * self.sigma
+
+ # mass center
+ gt_masks_pt = torch.from_numpy(gt_masks).to(device=device)
+ center_ws, center_hs = center_of_mass(gt_masks_pt)
+ valid_mask_flags = gt_masks_pt.sum(dim=-1).sum(dim=-1) > 0
+
+ output_stride = 4
+ for seg_mask, gt_label, half_h, half_w, center_h, center_w, valid_mask_flag in zip(gt_masks, gt_labels, half_hs, half_ws, center_hs, center_ws, valid_mask_flags):
+ if not valid_mask_flag:
+ continue
+ upsampled_size = (mask_feat_size[0] * 4, mask_feat_size[1] * 4)
+ coord_w = int((center_w / upsampled_size[1]) // (1. / num_grid))
+ coord_h = int((center_h / upsampled_size[0]) // (1. / num_grid))
+
+ # left, top, right, down
+ top_box = max(0, int(((center_h - half_h) / upsampled_size[0]) // (1. / num_grid)))
+ down_box = min(num_grid - 1, int(((center_h + half_h) / upsampled_size[0]) // (1. / num_grid)))
+ left_box = max(0, int(((center_w - half_w) / upsampled_size[1]) // (1. / num_grid)))
+ right_box = min(num_grid - 1, int(((center_w + half_w) / upsampled_size[1]) // (1. / num_grid)))
+
+ top = max(top_box, coord_h-1)
+ down = min(down_box, coord_h+1)
+ left = max(coord_w-1, left_box)
+ right = min(right_box, coord_w+1)
+
+ cate_label[top:(down+1), left:(right+1)] = gt_label
+ seg_mask = mmcv.imrescale(seg_mask, scale=1. / output_stride)
+ seg_mask = torch.from_numpy(seg_mask).to(device=device)
+ for i in range(top, down+1):
+ for j in range(left, right+1):
+ label = int(i * num_grid + j)
+
+ cur_ins_label = torch.zeros([mask_feat_size[0], mask_feat_size[1]], dtype=torch.uint8,
+ device=device)
+ cur_ins_label[:seg_mask.shape[0], :seg_mask.shape[1]] = seg_mask
+ ins_label.append(cur_ins_label)
+ ins_ind_label[label] = True
+ grid_order.append(label)
+ if len(ins_label) == 0:
+ ins_label = torch.zeros([0, mask_feat_size[0], mask_feat_size[1]], dtype=torch.uint8, device=device)
+ else:
+ ins_label = torch.stack(ins_label, 0)
+ ins_label_list.append(ins_label)
+ cate_label_list.append(cate_label)
+ ins_ind_label_list.append(ins_ind_label)
+ grid_order_list.append(grid_order)
+ return ins_label_list, cate_label_list, ins_ind_label_list, grid_order_list
+
+ def get_seg(self, cate_preds, kernel_preds, seg_pred, img_metas, cfg, rescale=None):
+ num_levels = len(cate_preds)
+ featmap_size = seg_pred.size()[-2:]
+
+ result_list = []
+ for img_id in range(len(img_metas)):
+ cate_pred_list = [
+ cate_preds[i][img_id].view(-1, self.cate_out_channels).detach() for i in range(num_levels)
+ ]
+ seg_pred_list = seg_pred[img_id, ...].unsqueeze(0)
+ kernel_pred_list = [
+ kernel_preds[i][img_id].permute(1, 2, 0).view(-1, self.kernel_out_channels).detach()
+ for i in range(num_levels)
+ ]
+ img_shape = img_metas[img_id]['img_shape']
+ scale_factor = img_metas[img_id]['scale_factor']
+ ori_shape = img_metas[img_id]['ori_shape']
+
+ cate_pred_list = torch.cat(cate_pred_list, dim=0)
+ kernel_pred_list = torch.cat(kernel_pred_list, dim=0)
+
+ result = self.get_seg_single(cate_pred_list, seg_pred_list, kernel_pred_list,
+ featmap_size, img_shape, ori_shape, scale_factor, cfg, rescale)
+ result_list.append(result)
+ return result_list
+
+ def get_seg_single(self,
+ cate_preds,
+ seg_preds,
+ kernel_preds,
+ featmap_size,
+ img_shape,
+ ori_shape,
+ scale_factor,
+ cfg,
+ rescale=False, debug=False):
+
+ assert len(cate_preds) == len(kernel_preds)
+
+ # overall info.
+ h, w, _ = img_shape
+ upsampled_size_out = (featmap_size[0] * 4, featmap_size[1] * 4)
+
+ # process.
+ inds = (cate_preds > cfg.score_thr)
+ cate_scores = cate_preds[inds]
+ if len(cate_scores) == 0:
+ return None
+
+ # cate_labels & kernel_preds
+ inds = inds.nonzero()
+ cate_labels = inds[:, 1]
+ kernel_preds = kernel_preds[inds[:, 0]]
+
+ # trans vector.
+ size_trans = cate_labels.new_tensor(self.seg_num_grids).pow(2).cumsum(0)
+ strides = kernel_preds.new_ones(size_trans[-1])
+
+ n_stage = len(self.seg_num_grids)
+ strides[:size_trans[0]] *= self.strides[0]
+ for ind_ in range(1, n_stage):
+ strides[size_trans[ind_-1]:size_trans[ind_]] *= self.strides[ind_]
+ strides = strides[inds[:, 0]]
+
+ # mask encoding.
+ I, N = kernel_preds.shape
+ kernel_preds = kernel_preds.view(I, N, 1, 1)
+ seg_preds = F.conv2d(seg_preds, kernel_preds, stride=1).squeeze(0).sigmoid()
+ # mask.
+ seg_masks = seg_preds > cfg.mask_thr
+ sum_masks = seg_masks.sum((1, 2)).float()
+
+ # filter.
+ keep = sum_masks > strides
+ if keep.sum() == 0:
+ return None
+
+ seg_masks = seg_masks[keep, ...]
+ seg_preds = seg_preds[keep, ...]
+ sum_masks = sum_masks[keep]
+ cate_scores = cate_scores[keep]
+ cate_labels = cate_labels[keep]
+
+ # maskness.
+ seg_scores = (seg_preds * seg_masks.float()).sum((1, 2)) / sum_masks
+ cate_scores *= seg_scores
+
+ # sort and keep top nms_pre
+ sort_inds = torch.argsort(cate_scores, descending=True)
+ if len(sort_inds) > cfg.nms_pre:
+ sort_inds = sort_inds[:cfg.nms_pre]
+ seg_masks = seg_masks[sort_inds, :, :]
+ seg_preds = seg_preds[sort_inds, :, :]
+ sum_masks = sum_masks[sort_inds]
+ cate_scores = cate_scores[sort_inds]
+ cate_labels = cate_labels[sort_inds]
+
+ # Matrix NMS
+ cate_scores = matrix_nms(seg_masks, cate_labels, cate_scores,
+ kernel=cfg.kernel,sigma=cfg.sigma, sum_masks=sum_masks)
+
+ # filter.
+ keep = cate_scores >= cfg.update_thr
+ if keep.sum() == 0:
+ return None
+ seg_preds = seg_preds[keep, :, :]
+ cate_scores = cate_scores[keep]
+ cate_labels = cate_labels[keep]
+
+ # sort and keep top_k
+ sort_inds = torch.argsort(cate_scores, descending=True)
+ if len(sort_inds) > cfg.max_per_img:
+ sort_inds = sort_inds[:cfg.max_per_img]
+ seg_preds = seg_preds[sort_inds, :, :]
+ cate_scores = cate_scores[sort_inds]
+ cate_labels = cate_labels[sort_inds]
+
+ seg_preds = F.interpolate(seg_preds.unsqueeze(0),
+ size=upsampled_size_out,
+ mode='bilinear')[:, :, :h, :w]
+ seg_masks = F.interpolate(seg_preds,
+ size=ori_shape[:2],
+ mode='bilinear').squeeze(0)
+ seg_masks = seg_masks > cfg.mask_thr
+ return seg_masks, cate_labels, cate_scores
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/solov2_light_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/solov2_light_head.py
new file mode 100644
index 000000000..46e90a159
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/solov2_light_head.py
@@ -0,0 +1,482 @@
+import mmcv
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import normal_init
+from mmdet.ops import DeformConv, roi_align
+from mmdet.core import multi_apply, matrix_nms
+from ..builder import build_loss
+from ..registry import HEADS
+from ..utils import bias_init_with_prob, ConvModule
+
+INF = 1e8
+
+def center_of_mass(bitmasks):
+ _, h, w = bitmasks.size()
+ ys = torch.arange(0, h, dtype=torch.float32, device=bitmasks.device)
+ xs = torch.arange(0, w, dtype=torch.float32, device=bitmasks.device)
+
+ m00 = bitmasks.sum(dim=-1).sum(dim=-1).clamp(min=1e-6)
+ m10 = (bitmasks * xs).sum(dim=-1).sum(dim=-1)
+ m01 = (bitmasks * ys[:, None]).sum(dim=-1).sum(dim=-1)
+ center_x = m10 / m00
+ center_y = m01 / m00
+ return center_x, center_y
+
+def points_nms(heat, kernel=2):
+ # kernel must be 2
+ hmax = nn.functional.max_pool2d(
+ heat, (kernel, kernel), stride=1, padding=1)
+ keep = (hmax[:, :, :-1, :-1] == heat).float()
+ return heat * keep
+
+def dice_loss(input, target):
+ input = input.contiguous().view(input.size()[0], -1)
+ target = target.contiguous().view(target.size()[0], -1).float()
+
+ a = torch.sum(input * target, 1)
+ b = torch.sum(input * input, 1) + 0.001
+ c = torch.sum(target * target, 1) + 0.001
+ d = (2 * a) / (b + c)
+ return 1-d
+
+@HEADS.register_module
+class SOLOv2LightHead(nn.Module):
+
+ def __init__(self,
+ num_classes,
+ in_channels,
+ seg_feat_channels=256,
+ strides=(4, 8, 16, 32, 64),
+ base_edge_list=(16, 32, 64, 128, 256),
+ scale_ranges=((8, 32), (16, 64), (32, 128), (64, 256), (128, 512)),
+ sigma=0.2,
+ num_grids=None,
+ ins_out_channels=64,
+ stacked_convs=4,
+ loss_ins=None,
+ loss_cate=None,
+ conv_cfg=None,
+ norm_cfg=None,
+ use_dcn_in_tower=False,
+ type_dcn=None):
+ super(SOLOv2LightHead, self).__init__()
+ self.num_classes = num_classes
+ self.seg_num_grids = num_grids
+ self.cate_out_channels = self.num_classes - 1
+ self.ins_out_channels = ins_out_channels
+ self.in_channels = in_channels
+ self.seg_feat_channels = seg_feat_channels
+ self.stacked_convs = stacked_convs
+ self.strides = strides
+ self.sigma = sigma
+ self.stacked_convs = stacked_convs
+ self.kernel_out_channels = self.ins_out_channels * 1 * 1
+ self.base_edge_list = base_edge_list
+ self.scale_ranges = scale_ranges
+ self.loss_cate = build_loss(loss_cate)
+ self.ins_loss_weight = loss_ins['loss_weight']
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.use_dcn_in_tower = use_dcn_in_tower
+ self.type_dcn = type_dcn
+ self._init_layers()
+
+ def _init_layers(self):
+ norm_cfg = dict(type='GN', num_groups=32, requires_grad=True)
+ self.cate_convs = nn.ModuleList()
+ self.kernel_convs = nn.ModuleList()
+ for i in range(self.stacked_convs):
+ if self.use_dcn_in_tower and i == self.stacked_convs - 1:
+ cfg_conv = dict(type=self.type_dcn)
+ else:
+ cfg_conv = self.conv_cfg
+
+ chn = self.in_channels + 2 if i == 0 else self.seg_feat_channels
+ self.kernel_convs.append(
+ ConvModule(
+ chn,
+ self.seg_feat_channels,
+ 3,
+ stride=1,
+ padding=1,
+ conv_cfg=cfg_conv,
+ norm_cfg=norm_cfg,
+ bias=norm_cfg is None))
+
+ chn = self.in_channels if i == 0 else self.seg_feat_channels
+ self.cate_convs.append(
+ ConvModule(
+ chn,
+ self.seg_feat_channels,
+ 3,
+ stride=1,
+ padding=1,
+ conv_cfg=cfg_conv,
+ norm_cfg=norm_cfg,
+ bias=norm_cfg is None))
+
+ self.solo_cate = nn.Conv2d(
+ self.seg_feat_channels, self.cate_out_channels, 3, padding=1)
+
+ self.solo_kernel = nn.Conv2d(
+ self.seg_feat_channels, self.kernel_out_channels, 3, padding=1)
+
+ def init_weights(self):
+ for m in self.cate_convs:
+ normal_init(m.conv, std=0.01)
+ for m in self.kernel_convs:
+ normal_init(m.conv, std=0.01)
+ bias_cate = bias_init_with_prob(0.01)
+ normal_init(self.solo_cate, std=0.01, bias=bias_cate)
+ normal_init(self.solo_kernel, std=0.01)
+
+ def forward(self, feats, eval=False):
+ new_feats = self.split_feats(feats)
+ featmap_sizes = [featmap.size()[-2:] for featmap in new_feats]
+ upsampled_size = (featmap_sizes[0][0] * 2, featmap_sizes[0][1] * 2)
+ cate_pred, kernel_pred = multi_apply(self.forward_single, new_feats,
+ list(range(len(self.seg_num_grids))),
+ eval=eval, upsampled_size=upsampled_size)
+ return cate_pred, kernel_pred
+
+ def split_feats(self, feats):
+ return (F.interpolate(feats[0], scale_factor=0.5, mode='bilinear'),
+ feats[1],
+ feats[2],
+ feats[3],
+ F.interpolate(feats[4], size=feats[3].shape[-2:], mode='bilinear'))
+
+ def forward_single(self, x, idx, eval=False, upsampled_size=None):
+ ins_kernel_feat = x
+ # ins branch
+ # concat coord
+ x_range = torch.linspace(-1, 1, ins_kernel_feat.shape[-1], device=ins_kernel_feat.device)
+ y_range = torch.linspace(-1, 1, ins_kernel_feat.shape[-2], device=ins_kernel_feat.device)
+ y, x = torch.meshgrid(y_range, x_range)
+ y = y.expand([ins_kernel_feat.shape[0], 1, -1, -1])
+ x = x.expand([ins_kernel_feat.shape[0], 1, -1, -1])
+ coord_feat = torch.cat([x, y], 1)
+ ins_kernel_feat = torch.cat([ins_kernel_feat, coord_feat], 1)
+
+ # kernel branch
+ kernel_feat = ins_kernel_feat
+ seg_num_grid = self.seg_num_grids[idx]
+ kernel_feat = F.interpolate(kernel_feat, size=seg_num_grid, mode='bilinear')
+
+ cate_feat = kernel_feat[:, :-2, :, :]
+
+ kernel_feat = kernel_feat.contiguous()
+ for i, kernel_layer in enumerate(self.kernel_convs):
+ kernel_feat = kernel_layer(kernel_feat)
+ kernel_pred = self.solo_kernel(kernel_feat)
+
+ # cate branch
+ cate_feat = cate_feat.contiguous()
+ for i, cate_layer in enumerate(self.cate_convs):
+ cate_feat = cate_layer(cate_feat)
+ cate_pred = self.solo_cate(cate_feat)
+
+ if eval:
+ cate_pred = points_nms(cate_pred.sigmoid(), kernel=2).permute(0, 2, 3, 1)
+ return cate_pred, kernel_pred
+
+ def loss(self,
+ cate_preds,
+ kernel_preds,
+ ins_pred,
+ gt_bbox_list,
+ gt_label_list,
+ gt_mask_list,
+ img_metas,
+ cfg,
+ gt_bboxes_ignore=None):
+ mask_feat_size = ins_pred.size()[-2:]
+ ins_label_list, cate_label_list, ins_ind_label_list, grid_order_list = multi_apply(
+ self.solov2_target_single,
+ gt_bbox_list,
+ gt_label_list,
+ gt_mask_list,
+ mask_feat_size=mask_feat_size)
+
+ # ins
+ ins_labels = [torch.cat([ins_labels_level_img
+ for ins_labels_level_img in ins_labels_level], 0)
+ for ins_labels_level in zip(*ins_label_list)]
+
+ kernel_preds = [[kernel_preds_level_img.view(kernel_preds_level_img.shape[0], -1)[:, grid_orders_level_img]
+ for kernel_preds_level_img, grid_orders_level_img in
+ zip(kernel_preds_level, grid_orders_level)]
+ for kernel_preds_level, grid_orders_level in zip(kernel_preds, zip(*grid_order_list))]
+ # generate masks
+ ins_pred = ins_pred
+ ins_pred_list = []
+ for b_kernel_pred in kernel_preds:
+ b_mask_pred = []
+ for idx, kernel_pred in enumerate(b_kernel_pred):
+
+ if kernel_pred.size()[-1] == 0:
+ continue
+ cur_ins_pred = ins_pred[idx, ...]
+ H, W = cur_ins_pred.shape[-2:]
+ N, I = kernel_pred.shape
+ cur_ins_pred = cur_ins_pred.unsqueeze(0)
+ kernel_pred = kernel_pred.permute(1, 0).view(I, -1, 1, 1)
+ cur_ins_pred = F.conv2d(cur_ins_pred, kernel_pred, stride=1).view(-1, H, W)
+ b_mask_pred.append(cur_ins_pred)
+ if len(b_mask_pred) == 0:
+ b_mask_pred = None
+ else:
+ b_mask_pred = torch.cat(b_mask_pred, 0)
+ ins_pred_list.append(b_mask_pred)
+
+ ins_ind_labels = [
+ torch.cat([ins_ind_labels_level_img.flatten()
+ for ins_ind_labels_level_img in ins_ind_labels_level])
+ for ins_ind_labels_level in zip(*ins_ind_label_list)
+ ]
+ flatten_ins_ind_labels = torch.cat(ins_ind_labels)
+
+ num_ins = flatten_ins_ind_labels.sum()
+
+ # dice loss
+ loss_ins = []
+ for input, target in zip(ins_pred_list, ins_labels):
+ if input is None:
+ continue
+ input = torch.sigmoid(input)
+ loss_ins.append(dice_loss(input, target))
+ loss_ins = torch.cat(loss_ins).mean()
+ loss_ins = loss_ins * self.ins_loss_weight
+
+ # cate
+ cate_labels = [
+ torch.cat([cate_labels_level_img.flatten()
+ for cate_labels_level_img in cate_labels_level])
+ for cate_labels_level in zip(*cate_label_list)
+ ]
+ flatten_cate_labels = torch.cat(cate_labels)
+
+ cate_preds = [
+ cate_pred.permute(0, 2, 3, 1).reshape(-1, self.cate_out_channels)
+ for cate_pred in cate_preds
+ ]
+ flatten_cate_preds = torch.cat(cate_preds)
+
+ loss_cate = self.loss_cate(flatten_cate_preds, flatten_cate_labels, avg_factor=num_ins + 1)
+ return dict(
+ loss_ins=loss_ins,
+ loss_cate=loss_cate)
+
+ def solov2_target_single(self,
+ gt_bboxes_raw,
+ gt_labels_raw,
+ gt_masks_raw,
+ mask_feat_size):
+
+ device = gt_labels_raw[0].device
+
+ # ins
+ gt_areas = torch.sqrt((gt_bboxes_raw[:, 2] - gt_bboxes_raw[:, 0]) * (
+ gt_bboxes_raw[:, 3] - gt_bboxes_raw[:, 1]))
+
+ ins_label_list = []
+ cate_label_list = []
+ ins_ind_label_list = []
+ grid_order_list = []
+ for (lower_bound, upper_bound), stride, num_grid \
+ in zip(self.scale_ranges, self.strides, self.seg_num_grids):
+
+ hit_indices = ((gt_areas >= lower_bound) & (gt_areas <= upper_bound)).nonzero().flatten()
+ num_ins = len(hit_indices)
+
+ ins_label = []
+ grid_order = []
+ cate_label = torch.zeros([num_grid, num_grid], dtype=torch.int64, device=device)
+ ins_ind_label = torch.zeros([num_grid ** 2], dtype=torch.bool, device=device)
+
+ if num_ins == 0:
+ ins_label = torch.zeros([0, mask_feat_size[0], mask_feat_size[1]], dtype=torch.uint8, device=device)
+ ins_label_list.append(ins_label)
+ cate_label_list.append(cate_label)
+ ins_ind_label_list.append(ins_ind_label)
+ grid_order_list.append([])
+ continue
+ gt_bboxes = gt_bboxes_raw[hit_indices]
+ gt_labels = gt_labels_raw[hit_indices]
+ gt_masks = gt_masks_raw[hit_indices.cpu().numpy(), ...]
+
+ half_ws = 0.5 * (gt_bboxes[:, 2] - gt_bboxes[:, 0]) * self.sigma
+ half_hs = 0.5 * (gt_bboxes[:, 3] - gt_bboxes[:, 1]) * self.sigma
+
+ # mass center
+ gt_masks_pt = torch.from_numpy(gt_masks).to(device=device)
+ center_ws, center_hs = center_of_mass(gt_masks_pt)
+ valid_mask_flags = gt_masks_pt.sum(dim=-1).sum(dim=-1) > 0
+ output_stride = 4
+ for seg_mask, gt_label, half_h, half_w, center_h, center_w, valid_mask_flag in zip(gt_masks, gt_labels, half_hs, half_ws, center_hs, center_ws, valid_mask_flags):
+ if not valid_mask_flag:
+ continue
+ upsampled_size = (mask_feat_size[0] * 4, mask_feat_size[1] * 4)
+ coord_w = int((center_w / upsampled_size[1]) // (1. / num_grid))
+ coord_h = int((center_h / upsampled_size[0]) // (1. / num_grid))
+
+ # left, top, right, down
+ top_box = max(0, int(((center_h - half_h) / upsampled_size[0]) // (1. / num_grid)))
+ down_box = min(num_grid - 1, int(((center_h + half_h) / upsampled_size[0]) // (1. / num_grid)))
+ left_box = max(0, int(((center_w - half_w) / upsampled_size[1]) // (1. / num_grid)))
+ right_box = min(num_grid - 1, int(((center_w + half_w) / upsampled_size[1]) // (1. / num_grid)))
+
+ top = max(top_box, coord_h-1)
+ down = min(down_box, coord_h+1)
+ left = max(coord_w-1, left_box)
+ right = min(right_box, coord_w+1)
+
+ cate_label[top:(down+1), left:(right+1)] = gt_label
+ seg_mask = mmcv.imrescale(seg_mask, scale=1. / output_stride)
+ seg_mask = torch.from_numpy(seg_mask).to(device=device)
+ for i in range(top, down+1):
+ for j in range(left, right+1):
+ label = int(i * num_grid + j)
+
+ cur_ins_label = torch.zeros([mask_feat_size[0], mask_feat_size[1]], dtype=torch.uint8,
+ device=device)
+ cur_ins_label[:seg_mask.shape[0], :seg_mask.shape[1]] = seg_mask
+ ins_label.append(cur_ins_label)
+ ins_ind_label[label] = True
+ grid_order.append(label)
+ if len(ins_label) == 0:
+ ins_label = torch.zeros([0, mask_feat_size[0], mask_feat_size[1]], dtype=torch.uint8, device=device)
+ else:
+ ins_label = torch.stack(ins_label, 0)
+ ins_label_list.append(ins_label)
+ cate_label_list.append(cate_label)
+ ins_ind_label_list.append(ins_ind_label)
+ grid_order_list.append(grid_order)
+ return ins_label_list, cate_label_list, ins_ind_label_list, grid_order_list
+
+ def get_seg(self, cate_preds, kernel_preds, seg_pred, img_metas, cfg, rescale=None):
+ num_levels = len(cate_preds)
+ featmap_size = seg_pred.size()[-2:]
+
+ result_list = []
+ for img_id in range(len(img_metas)):
+ cate_pred_list = [
+ cate_preds[i][img_id].view(-1, self.cate_out_channels).detach() for i in range(num_levels)
+ ]
+ seg_pred_list = seg_pred[img_id, ...].unsqueeze(0)
+ kernel_pred_list = [
+ kernel_preds[i][img_id].permute(1, 2, 0).view(-1, self.kernel_out_channels).detach()
+ for i in range(num_levels)
+ ]
+ img_shape = img_metas[img_id]['img_shape']
+ scale_factor = img_metas[img_id]['scale_factor']
+ ori_shape = img_metas[img_id]['ori_shape']
+
+ cate_pred_list = torch.cat(cate_pred_list, dim=0)
+ kernel_pred_list = torch.cat(kernel_pred_list, dim=0)
+
+ result = self.get_seg_single(cate_pred_list, seg_pred_list, kernel_pred_list,
+ featmap_size, img_shape, ori_shape, scale_factor, cfg, rescale)
+ result_list.append(result)
+ return result_list
+
+ def get_seg_single(self,
+ cate_preds,
+ seg_preds,
+ kernel_preds,
+ featmap_size,
+ img_shape,
+ ori_shape,
+ scale_factor,
+ cfg,
+ rescale=False, debug=False):
+
+ assert len(cate_preds) == len(kernel_preds)
+
+ # overall info.
+ h, w, _ = img_shape
+ upsampled_size_out = (featmap_size[0] * 4, featmap_size[1] * 4)
+
+ # process.
+ inds = (cate_preds > cfg.score_thr)
+ cate_scores = cate_preds[inds]
+ if len(cate_scores) == 0:
+ return None
+
+ # cate_labels & kernel_preds
+ inds = inds.nonzero()
+ cate_labels = inds[:, 1]
+ kernel_preds = kernel_preds[inds[:, 0]]
+
+ # trans vector.
+ size_trans = cate_labels.new_tensor(self.seg_num_grids).pow(2).cumsum(0)
+ strides = kernel_preds.new_ones(size_trans[-1])
+
+ n_stage = len(self.seg_num_grids)
+ strides[:size_trans[0]] *= self.strides[0]
+ for ind_ in range(1, n_stage):
+ strides[size_trans[ind_-1]:size_trans[ind_]] *= self.strides[ind_]
+ strides = strides[inds[:, 0]]
+
+ # mask encoding.
+ I, N = kernel_preds.shape
+ kernel_preds = kernel_preds.view(I, N, 1, 1)
+ seg_preds = F.conv2d(seg_preds, kernel_preds, stride=1).squeeze(0).sigmoid()
+ # mask.
+ seg_masks = seg_preds > cfg.mask_thr
+ sum_masks = seg_masks.sum((1, 2)).float()
+
+ # filter.
+ keep = sum_masks > strides
+ if keep.sum() == 0:
+ return None
+
+ seg_masks = seg_masks[keep, ...]
+ seg_preds = seg_preds[keep, ...]
+ sum_masks = sum_masks[keep]
+ cate_scores = cate_scores[keep]
+ cate_labels = cate_labels[keep]
+
+ # maskness.
+ seg_scores = (seg_preds * seg_masks.float()).sum((1, 2)) / sum_masks
+ cate_scores *= seg_scores
+
+ # sort and keep top nms_pre
+ sort_inds = torch.argsort(cate_scores, descending=True)
+ if len(sort_inds) > cfg.nms_pre:
+ sort_inds = sort_inds[:cfg.nms_pre]
+ seg_masks = seg_masks[sort_inds, :, :]
+ seg_preds = seg_preds[sort_inds, :, :]
+ sum_masks = sum_masks[sort_inds]
+ cate_scores = cate_scores[sort_inds]
+ cate_labels = cate_labels[sort_inds]
+
+ # Matrix NMS
+ cate_scores = matrix_nms(seg_masks, cate_labels, cate_scores,
+ kernel=cfg.kernel,sigma=cfg.sigma, sum_masks=sum_masks)
+
+ # filter.
+ keep = cate_scores >= cfg.update_thr
+ if keep.sum() == 0:
+ return None
+ seg_preds = seg_preds[keep, :, :]
+ cate_scores = cate_scores[keep]
+ cate_labels = cate_labels[keep]
+
+ # sort and keep top_k
+ sort_inds = torch.argsort(cate_scores, descending=True)
+ if len(sort_inds) > cfg.max_per_img:
+ sort_inds = sort_inds[:cfg.max_per_img]
+ seg_preds = seg_preds[sort_inds, :, :]
+ cate_scores = cate_scores[sort_inds]
+ cate_labels = cate_labels[sort_inds]
+
+ seg_preds = F.interpolate(seg_preds.unsqueeze(0),
+ size=upsampled_size_out,
+ mode='bilinear')[:, :, :h, :w]
+ seg_masks = F.interpolate(seg_preds,
+ size=ori_shape[:2],
+ mode='bilinear').squeeze(0)
+ seg_masks = seg_masks > cfg.mask_thr
+ return seg_masks, cate_labels, cate_scores
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/ssd_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/ssd_head.py
new file mode 100644
index 000000000..57113679b
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/ssd_head.py
@@ -0,0 +1,201 @@
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import xavier_init
+
+from mmdet.core import AnchorGenerator, anchor_target, multi_apply
+from ..losses import smooth_l1_loss
+from ..registry import HEADS
+from .anchor_head import AnchorHead
+
+
+# TODO: add loss evaluator for SSD
+@HEADS.register_module
+class SSDHead(AnchorHead):
+
+ def __init__(self,
+ input_size=300,
+ num_classes=81,
+ in_channels=(512, 1024, 512, 256, 256, 256),
+ anchor_strides=(8, 16, 32, 64, 100, 300),
+ basesize_ratio_range=(0.1, 0.9),
+ anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]),
+ target_means=(.0, .0, .0, .0),
+ target_stds=(1.0, 1.0, 1.0, 1.0)):
+ super(AnchorHead, self).__init__()
+ self.input_size = input_size
+ self.num_classes = num_classes
+ self.in_channels = in_channels
+ self.cls_out_channels = num_classes
+ num_anchors = [len(ratios) * 2 + 2 for ratios in anchor_ratios]
+ reg_convs = []
+ cls_convs = []
+ for i in range(len(in_channels)):
+ reg_convs.append(
+ nn.Conv2d(
+ in_channels[i],
+ num_anchors[i] * 4,
+ kernel_size=3,
+ padding=1))
+ cls_convs.append(
+ nn.Conv2d(
+ in_channels[i],
+ num_anchors[i] * num_classes,
+ kernel_size=3,
+ padding=1))
+ self.reg_convs = nn.ModuleList(reg_convs)
+ self.cls_convs = nn.ModuleList(cls_convs)
+
+ min_ratio, max_ratio = basesize_ratio_range
+ min_ratio = int(min_ratio * 100)
+ max_ratio = int(max_ratio * 100)
+ step = int(np.floor(max_ratio - min_ratio) / (len(in_channels) - 2))
+ min_sizes = []
+ max_sizes = []
+ for r in range(int(min_ratio), int(max_ratio) + 1, step):
+ min_sizes.append(int(input_size * r / 100))
+ max_sizes.append(int(input_size * (r + step) / 100))
+ if input_size == 300:
+ if basesize_ratio_range[0] == 0.15: # SSD300 COCO
+ min_sizes.insert(0, int(input_size * 7 / 100))
+ max_sizes.insert(0, int(input_size * 15 / 100))
+ elif basesize_ratio_range[0] == 0.2: # SSD300 VOC
+ min_sizes.insert(0, int(input_size * 10 / 100))
+ max_sizes.insert(0, int(input_size * 20 / 100))
+ elif input_size == 512:
+ if basesize_ratio_range[0] == 0.1: # SSD512 COCO
+ min_sizes.insert(0, int(input_size * 4 / 100))
+ max_sizes.insert(0, int(input_size * 10 / 100))
+ elif basesize_ratio_range[0] == 0.15: # SSD512 VOC
+ min_sizes.insert(0, int(input_size * 7 / 100))
+ max_sizes.insert(0, int(input_size * 15 / 100))
+ self.anchor_generators = []
+ self.anchor_strides = anchor_strides
+ for k in range(len(anchor_strides)):
+ base_size = min_sizes[k]
+ stride = anchor_strides[k]
+ ctr = ((stride - 1) / 2., (stride - 1) / 2.)
+ scales = [1., np.sqrt(max_sizes[k] / min_sizes[k])]
+ ratios = [1.]
+ for r in anchor_ratios[k]:
+ ratios += [1 / r, r] # 4 or 6 ratio
+ anchor_generator = AnchorGenerator(
+ base_size, scales, ratios, scale_major=False, ctr=ctr)
+ indices = list(range(len(ratios)))
+ indices.insert(1, len(indices))
+ anchor_generator.base_anchors = torch.index_select(
+ anchor_generator.base_anchors, 0, torch.LongTensor(indices))
+ self.anchor_generators.append(anchor_generator)
+
+ self.target_means = target_means
+ self.target_stds = target_stds
+ self.use_sigmoid_cls = False
+ self.cls_focal_loss = False
+ self.fp16_enabled = False
+
+ def init_weights(self):
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ xavier_init(m, distribution='uniform', bias=0)
+
+ def forward(self, feats):
+ cls_scores = []
+ bbox_preds = []
+ for feat, reg_conv, cls_conv in zip(feats, self.reg_convs,
+ self.cls_convs):
+ cls_scores.append(cls_conv(feat))
+ bbox_preds.append(reg_conv(feat))
+ return cls_scores, bbox_preds
+
+ def loss_single(self, cls_score, bbox_pred, labels, label_weights,
+ bbox_targets, bbox_weights, num_total_samples, cfg):
+ loss_cls_all = F.cross_entropy(
+ cls_score, labels, reduction='none') * label_weights
+ pos_inds = (labels > 0).nonzero().view(-1)
+ neg_inds = (labels == 0).nonzero().view(-1)
+
+ num_pos_samples = pos_inds.size(0)
+ num_neg_samples = cfg.neg_pos_ratio * num_pos_samples
+ if num_neg_samples > neg_inds.size(0):
+ num_neg_samples = neg_inds.size(0)
+ topk_loss_cls_neg, _ = loss_cls_all[neg_inds].topk(num_neg_samples)
+ loss_cls_pos = loss_cls_all[pos_inds].sum()
+ loss_cls_neg = topk_loss_cls_neg.sum()
+ loss_cls = (loss_cls_pos + loss_cls_neg) / num_total_samples
+
+ loss_bbox = smooth_l1_loss(
+ bbox_pred,
+ bbox_targets,
+ bbox_weights,
+ beta=cfg.smoothl1_beta,
+ avg_factor=num_total_samples)
+ return loss_cls[None], loss_bbox
+
+ def loss(self,
+ cls_scores,
+ bbox_preds,
+ gt_bboxes,
+ gt_labels,
+ img_metas,
+ cfg,
+ gt_bboxes_ignore=None):
+ featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
+ assert len(featmap_sizes) == len(self.anchor_generators)
+
+ device = cls_scores[0].device
+
+ anchor_list, valid_flag_list = self.get_anchors(
+ featmap_sizes, img_metas, device=device)
+ cls_reg_targets = anchor_target(
+ anchor_list,
+ valid_flag_list,
+ gt_bboxes,
+ img_metas,
+ self.target_means,
+ self.target_stds,
+ cfg,
+ gt_bboxes_ignore_list=gt_bboxes_ignore,
+ gt_labels_list=gt_labels,
+ label_channels=1,
+ sampling=False,
+ unmap_outputs=False)
+ if cls_reg_targets is None:
+ return None
+ (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,
+ num_total_pos, num_total_neg) = cls_reg_targets
+
+ num_images = len(img_metas)
+ all_cls_scores = torch.cat([
+ s.permute(0, 2, 3, 1).reshape(
+ num_images, -1, self.cls_out_channels) for s in cls_scores
+ ], 1)
+ all_labels = torch.cat(labels_list, -1).view(num_images, -1)
+ all_label_weights = torch.cat(label_weights_list,
+ -1).view(num_images, -1)
+ all_bbox_preds = torch.cat([
+ b.permute(0, 2, 3, 1).reshape(num_images, -1, 4)
+ for b in bbox_preds
+ ], -2)
+ all_bbox_targets = torch.cat(bbox_targets_list,
+ -2).view(num_images, -1, 4)
+ all_bbox_weights = torch.cat(bbox_weights_list,
+ -2).view(num_images, -1, 4)
+
+ # check NaN and Inf
+ assert torch.isfinite(all_cls_scores).all().item(), \
+ 'classification scores become infinite or NaN!'
+ assert torch.isfinite(all_bbox_preds).all().item(), \
+ 'bbox predications become infinite or NaN!'
+
+ losses_cls, losses_bbox = multi_apply(
+ self.loss_single,
+ all_cls_scores,
+ all_bbox_preds,
+ all_labels,
+ all_label_weights,
+ all_bbox_targets,
+ all_bbox_weights,
+ num_total_samples=num_total_pos,
+ cfg=cfg)
+ return dict(loss_cls=losses_cls, loss_bbox=losses_bbox)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/backbones/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/backbones/__init__.py
new file mode 100644
index 000000000..6fb56d63c
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/backbones/__init__.py
@@ -0,0 +1,6 @@
+from .hrnet import HRNet
+from .resnet import ResNet, make_res_layer
+from .resnext import ResNeXt
+from .ssd_vgg import SSDVGG
+
+__all__ = ['ResNet', 'make_res_layer', 'ResNeXt', 'SSDVGG', 'HRNet']
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/backbones/hrnet.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/backbones/hrnet.py
new file mode 100644
index 000000000..0f7a082cf
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/backbones/hrnet.py
@@ -0,0 +1,524 @@
+import torch.nn as nn
+from mmcv.cnn import constant_init, kaiming_init
+from mmcv.runner import load_checkpoint
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmdet.utils import get_root_logger
+from ..registry import BACKBONES
+from ..utils import build_conv_layer, build_norm_layer
+from .resnet import BasicBlock, Bottleneck
+
+
+class HRModule(nn.Module):
+ """ High-Resolution Module for HRNet. In this module, every branch
+ has 4 BasicBlocks/Bottlenecks. Fusion/Exchange is in this module.
+ """
+
+ def __init__(self,
+ num_branches,
+ blocks,
+ num_blocks,
+ in_channels,
+ num_channels,
+ multiscale_output=True,
+ with_cp=False,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN')):
+ super(HRModule, self).__init__()
+ self._check_branches(num_branches, num_blocks, in_channels,
+ num_channels)
+
+ self.in_channels = in_channels
+ self.num_branches = num_branches
+
+ self.multiscale_output = multiscale_output
+ self.norm_cfg = norm_cfg
+ self.conv_cfg = conv_cfg
+ self.with_cp = with_cp
+ self.branches = self._make_branches(num_branches, blocks, num_blocks,
+ num_channels)
+ self.fuse_layers = self._make_fuse_layers()
+ self.relu = nn.ReLU(inplace=False)
+
+ def _check_branches(self, num_branches, num_blocks, in_channels,
+ num_channels):
+ if num_branches != len(num_blocks):
+ error_msg = 'NUM_BRANCHES({}) <> NUM_BLOCKS({})'.format(
+ num_branches, len(num_blocks))
+ raise ValueError(error_msg)
+
+ if num_branches != len(num_channels):
+ error_msg = 'NUM_BRANCHES({}) <> NUM_CHANNELS({})'.format(
+ num_branches, len(num_channels))
+ raise ValueError(error_msg)
+
+ if num_branches != len(in_channels):
+ error_msg = 'NUM_BRANCHES({}) <> NUM_INCHANNELS({})'.format(
+ num_branches, len(in_channels))
+ raise ValueError(error_msg)
+
+ def _make_one_branch(self,
+ branch_index,
+ block,
+ num_blocks,
+ num_channels,
+ stride=1):
+ downsample = None
+ if stride != 1 or \
+ self.in_channels[branch_index] != \
+ num_channels[branch_index] * block.expansion:
+ downsample = nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ self.in_channels[branch_index],
+ num_channels[branch_index] * block.expansion,
+ kernel_size=1,
+ stride=stride,
+ bias=False),
+ build_norm_layer(self.norm_cfg, num_channels[branch_index] *
+ block.expansion)[1])
+
+ layers = []
+ layers.append(
+ block(
+ self.in_channels[branch_index],
+ num_channels[branch_index],
+ stride,
+ downsample=downsample,
+ with_cp=self.with_cp,
+ norm_cfg=self.norm_cfg,
+ conv_cfg=self.conv_cfg))
+ self.in_channels[branch_index] = \
+ num_channels[branch_index] * block.expansion
+ for i in range(1, num_blocks[branch_index]):
+ layers.append(
+ block(
+ self.in_channels[branch_index],
+ num_channels[branch_index],
+ with_cp=self.with_cp,
+ norm_cfg=self.norm_cfg,
+ conv_cfg=self.conv_cfg))
+
+ return nn.Sequential(*layers)
+
+ def _make_branches(self, num_branches, block, num_blocks, num_channels):
+ branches = []
+
+ for i in range(num_branches):
+ branches.append(
+ self._make_one_branch(i, block, num_blocks, num_channels))
+
+ return nn.ModuleList(branches)
+
+ def _make_fuse_layers(self):
+ if self.num_branches == 1:
+ return None
+
+ num_branches = self.num_branches
+ in_channels = self.in_channels
+ fuse_layers = []
+ num_out_branches = num_branches if self.multiscale_output else 1
+ for i in range(num_out_branches):
+ fuse_layer = []
+ for j in range(num_branches):
+ if j > i:
+ fuse_layer.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels[j],
+ in_channels[i],
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ bias=False),
+ build_norm_layer(self.norm_cfg, in_channels[i])[1],
+ nn.Upsample(
+ scale_factor=2**(j - i), mode='nearest')))
+ elif j == i:
+ fuse_layer.append(None)
+ else:
+ conv_downsamples = []
+ for k in range(i - j):
+ if k == i - j - 1:
+ conv_downsamples.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels[j],
+ in_channels[i],
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias=False),
+ build_norm_layer(self.norm_cfg,
+ in_channels[i])[1]))
+ else:
+ conv_downsamples.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels[j],
+ in_channels[j],
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias=False),
+ build_norm_layer(self.norm_cfg,
+ in_channels[j])[1],
+ nn.ReLU(inplace=False)))
+ fuse_layer.append(nn.Sequential(*conv_downsamples))
+ fuse_layers.append(nn.ModuleList(fuse_layer))
+
+ return nn.ModuleList(fuse_layers)
+
+ def forward(self, x):
+ if self.num_branches == 1:
+ return [self.branches[0](x[0])]
+
+ for i in range(self.num_branches):
+ x[i] = self.branches[i](x[i])
+
+ x_fuse = []
+ for i in range(len(self.fuse_layers)):
+ y = 0
+ for j in range(self.num_branches):
+ if i == j:
+ y += x[j]
+ else:
+ y += self.fuse_layers[i][j](x[j])
+ x_fuse.append(self.relu(y))
+ return x_fuse
+
+
+@BACKBONES.register_module
+class HRNet(nn.Module):
+ """HRNet backbone.
+
+ High-Resolution Representations for Labeling Pixels and Regions
+ arXiv: https://arxiv.org/abs/1904.04514
+
+ Args:
+ extra (dict): detailed configuration for each stage of HRNet.
+ in_channels (int): Number of input image channels. Normally 3.
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+ zero_init_residual (bool): whether to use zero init for last norm layer
+ in resblocks to let them behave as identity.
+
+ Example:
+ >>> from mmdet.models import HRNet
+ >>> import torch
+ >>> extra = dict(
+ >>> stage1=dict(
+ >>> num_modules=1,
+ >>> num_branches=1,
+ >>> block='BOTTLENECK',
+ >>> num_blocks=(4, ),
+ >>> num_channels=(64, )),
+ >>> stage2=dict(
+ >>> num_modules=1,
+ >>> num_branches=2,
+ >>> block='BASIC',
+ >>> num_blocks=(4, 4),
+ >>> num_channels=(32, 64)),
+ >>> stage3=dict(
+ >>> num_modules=4,
+ >>> num_branches=3,
+ >>> block='BASIC',
+ >>> num_blocks=(4, 4, 4),
+ >>> num_channels=(32, 64, 128)),
+ >>> stage4=dict(
+ >>> num_modules=3,
+ >>> num_branches=4,
+ >>> block='BASIC',
+ >>> num_blocks=(4, 4, 4, 4),
+ >>> num_channels=(32, 64, 128, 256)))
+ >>> self = HRNet(extra, in_channels=1)
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 1, 32, 32)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 32, 8, 8)
+ (1, 64, 4, 4)
+ (1, 128, 2, 2)
+ (1, 256, 1, 1)
+ """
+
+ blocks_dict = {'BASIC': BasicBlock, 'BOTTLENECK': Bottleneck}
+
+ def __init__(self,
+ extra,
+ in_channels=3,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ norm_eval=True,
+ with_cp=False,
+ zero_init_residual=False):
+ super(HRNet, self).__init__()
+ self.extra = extra
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.norm_eval = norm_eval
+ self.with_cp = with_cp
+ self.zero_init_residual = zero_init_residual
+
+ # stem net
+ self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, 64, postfix=1)
+ self.norm2_name, norm2 = build_norm_layer(self.norm_cfg, 64, postfix=2)
+
+ self.conv1 = build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ 64,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias=False)
+
+ self.add_module(self.norm1_name, norm1)
+ self.conv2 = build_conv_layer(
+ self.conv_cfg,
+ 64,
+ 64,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias=False)
+
+ self.add_module(self.norm2_name, norm2)
+ self.relu = nn.ReLU(inplace=True)
+
+ # stage 1
+ self.stage1_cfg = self.extra['stage1']
+ num_channels = self.stage1_cfg['num_channels'][0]
+ block_type = self.stage1_cfg['block']
+ num_blocks = self.stage1_cfg['num_blocks'][0]
+
+ block = self.blocks_dict[block_type]
+ stage1_out_channels = num_channels * block.expansion
+ self.layer1 = self._make_layer(block, 64, num_channels, num_blocks)
+
+ # stage 2
+ self.stage2_cfg = self.extra['stage2']
+ num_channels = self.stage2_cfg['num_channels']
+ block_type = self.stage2_cfg['block']
+
+ block = self.blocks_dict[block_type]
+ num_channels = [channel * block.expansion for channel in num_channels]
+ self.transition1 = self._make_transition_layer([stage1_out_channels],
+ num_channels)
+ self.stage2, pre_stage_channels = self._make_stage(
+ self.stage2_cfg, num_channels)
+
+ # stage 3
+ self.stage3_cfg = self.extra['stage3']
+ num_channels = self.stage3_cfg['num_channels']
+ block_type = self.stage3_cfg['block']
+
+ block = self.blocks_dict[block_type]
+ num_channels = [channel * block.expansion for channel in num_channels]
+ self.transition2 = self._make_transition_layer(pre_stage_channels,
+ num_channels)
+ self.stage3, pre_stage_channels = self._make_stage(
+ self.stage3_cfg, num_channels)
+
+ # stage 4
+ self.stage4_cfg = self.extra['stage4']
+ num_channels = self.stage4_cfg['num_channels']
+ block_type = self.stage4_cfg['block']
+
+ block = self.blocks_dict[block_type]
+ num_channels = [channel * block.expansion for channel in num_channels]
+ self.transition3 = self._make_transition_layer(pre_stage_channels,
+ num_channels)
+ self.stage4, pre_stage_channels = self._make_stage(
+ self.stage4_cfg, num_channels)
+
+ @property
+ def norm1(self):
+ return getattr(self, self.norm1_name)
+
+ @property
+ def norm2(self):
+ return getattr(self, self.norm2_name)
+
+ def _make_transition_layer(self, num_channels_pre_layer,
+ num_channels_cur_layer):
+ num_branches_cur = len(num_channels_cur_layer)
+ num_branches_pre = len(num_channels_pre_layer)
+
+ transition_layers = []
+ for i in range(num_branches_cur):
+ if i < num_branches_pre:
+ if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
+ transition_layers.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ num_channels_pre_layer[i],
+ num_channels_cur_layer[i],
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ bias=False),
+ build_norm_layer(self.norm_cfg,
+ num_channels_cur_layer[i])[1],
+ nn.ReLU(inplace=True)))
+ else:
+ transition_layers.append(None)
+ else:
+ conv_downsamples = []
+ for j in range(i + 1 - num_branches_pre):
+ in_channels = num_channels_pre_layer[-1]
+ out_channels = num_channels_cur_layer[i] \
+ if j == i - num_branches_pre else in_channels
+ conv_downsamples.append(
+ nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ out_channels,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias=False),
+ build_norm_layer(self.norm_cfg, out_channels)[1],
+ nn.ReLU(inplace=True)))
+ transition_layers.append(nn.Sequential(*conv_downsamples))
+
+ return nn.ModuleList(transition_layers)
+
+ def _make_layer(self, block, inplanes, planes, blocks, stride=1):
+ downsample = None
+ if stride != 1 or inplanes != planes * block.expansion:
+ downsample = nn.Sequential(
+ build_conv_layer(
+ self.conv_cfg,
+ inplanes,
+ planes * block.expansion,
+ kernel_size=1,
+ stride=stride,
+ bias=False),
+ build_norm_layer(self.norm_cfg, planes * block.expansion)[1])
+
+ layers = []
+ layers.append(
+ block(
+ inplanes,
+ planes,
+ stride,
+ downsample=downsample,
+ with_cp=self.with_cp,
+ norm_cfg=self.norm_cfg,
+ conv_cfg=self.conv_cfg))
+ inplanes = planes * block.expansion
+ for i in range(1, blocks):
+ layers.append(
+ block(
+ inplanes,
+ planes,
+ with_cp=self.with_cp,
+ norm_cfg=self.norm_cfg,
+ conv_cfg=self.conv_cfg))
+
+ return nn.Sequential(*layers)
+
+ def _make_stage(self, layer_config, in_channels, multiscale_output=True):
+ num_modules = layer_config['num_modules']
+ num_branches = layer_config['num_branches']
+ num_blocks = layer_config['num_blocks']
+ num_channels = layer_config['num_channels']
+ block = self.blocks_dict[layer_config['block']]
+
+ hr_modules = []
+ for i in range(num_modules):
+ # multi_scale_output is only used for the last module
+ if not multiscale_output and i == num_modules - 1:
+ reset_multiscale_output = False
+ else:
+ reset_multiscale_output = True
+
+ hr_modules.append(
+ HRModule(
+ num_branches,
+ block,
+ num_blocks,
+ in_channels,
+ num_channels,
+ reset_multiscale_output,
+ with_cp=self.with_cp,
+ norm_cfg=self.norm_cfg,
+ conv_cfg=self.conv_cfg))
+
+ return nn.Sequential(*hr_modules), in_channels
+
+ def init_weights(self, pretrained=None):
+ if isinstance(pretrained, str):
+ logger = get_root_logger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ kaiming_init(m)
+ elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+ constant_init(m, 1)
+
+ if self.zero_init_residual:
+ for m in self.modules():
+ if isinstance(m, Bottleneck):
+ constant_init(m.norm3, 0)
+ elif isinstance(m, BasicBlock):
+ constant_init(m.norm2, 0)
+ else:
+ raise TypeError('pretrained must be a str or None')
+
+ def forward(self, x):
+
+ x = self.conv1(x)
+ x = self.norm1(x)
+ x = self.relu(x)
+ x = self.conv2(x)
+ x = self.norm2(x)
+ x = self.relu(x)
+ x = self.layer1(x)
+
+ x_list = []
+ for i in range(self.stage2_cfg['num_branches']):
+ if self.transition1[i] is not None:
+ x_list.append(self.transition1[i](x))
+ else:
+ x_list.append(x)
+ y_list = self.stage2(x_list)
+
+ x_list = []
+ for i in range(self.stage3_cfg['num_branches']):
+ if self.transition2[i] is not None:
+ x_list.append(self.transition2[i](y_list[-1]))
+ else:
+ x_list.append(y_list[i])
+ y_list = self.stage3(x_list)
+
+ x_list = []
+ for i in range(self.stage4_cfg['num_branches']):
+ if self.transition3[i] is not None:
+ x_list.append(self.transition3[i](y_list[-1]))
+ else:
+ x_list.append(y_list[i])
+ y_list = self.stage4(x_list)
+
+ return y_list
+
+ def train(self, mode=True):
+ super(HRNet, self).train(mode)
+ if mode and self.norm_eval:
+ for m in self.modules():
+ # trick: eval have effect on BatchNorm only
+ if isinstance(m, _BatchNorm):
+ m.eval()
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/backbones/resnet.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/backbones/resnet.py
new file mode 100644
index 000000000..ab6913e82
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/backbones/resnet.py
@@ -0,0 +1,516 @@
+import torch.nn as nn
+import torch.utils.checkpoint as cp
+from mmcv.cnn import constant_init, kaiming_init
+from mmcv.runner import load_checkpoint
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmdet.models.plugins import GeneralizedAttention
+from mmdet.ops import ContextBlock
+from mmdet.utils import get_root_logger
+from ..registry import BACKBONES
+from ..utils import build_conv_layer, build_norm_layer
+
+
+class BasicBlock(nn.Module):
+ expansion = 1
+
+ def __init__(self,
+ inplanes,
+ planes,
+ stride=1,
+ dilation=1,
+ downsample=None,
+ style='pytorch',
+ with_cp=False,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ dcn=None,
+ gcb=None,
+ gen_attention=None):
+ super(BasicBlock, self).__init__()
+ assert dcn is None, "Not implemented yet."
+ assert gen_attention is None, "Not implemented yet."
+ assert gcb is None, "Not implemented yet."
+
+ self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1)
+ self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2)
+
+ self.conv1 = build_conv_layer(
+ conv_cfg,
+ inplanes,
+ planes,
+ 3,
+ stride=stride,
+ padding=dilation,
+ dilation=dilation,
+ bias=False)
+ self.add_module(self.norm1_name, norm1)
+ self.conv2 = build_conv_layer(
+ conv_cfg, planes, planes, 3, padding=1, bias=False)
+ self.add_module(self.norm2_name, norm2)
+
+ self.relu = nn.ReLU(inplace=True)
+ self.downsample = downsample
+ self.stride = stride
+ self.dilation = dilation
+ assert not with_cp
+
+ @property
+ def norm1(self):
+ return getattr(self, self.norm1_name)
+
+ @property
+ def norm2(self):
+ return getattr(self, self.norm2_name)
+
+ def forward(self, x):
+ identity = x
+
+ out = self.conv1(x)
+ out = self.norm1(out)
+ out = self.relu(out)
+
+ out = self.conv2(out)
+ out = self.norm2(out)
+
+ if self.downsample is not None:
+ identity = self.downsample(x)
+
+ out += identity
+ out = self.relu(out)
+
+ return out
+
+
+class Bottleneck(nn.Module):
+ expansion = 4
+
+ def __init__(self,
+ inplanes,
+ planes,
+ stride=1,
+ dilation=1,
+ downsample=None,
+ style='pytorch',
+ with_cp=False,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ dcn=None,
+ gcb=None,
+ gen_attention=None):
+ """Bottleneck block for ResNet.
+ If style is "pytorch", the stride-two layer is the 3x3 conv layer,
+ if it is "caffe", the stride-two layer is the first 1x1 conv layer.
+ """
+ super(Bottleneck, self).__init__()
+ assert style in ['pytorch', 'caffe']
+ assert dcn is None or isinstance(dcn, dict)
+ assert gcb is None or isinstance(gcb, dict)
+ assert gen_attention is None or isinstance(gen_attention, dict)
+
+ self.inplanes = inplanes
+ self.planes = planes
+ self.stride = stride
+ self.dilation = dilation
+ self.style = style
+ self.with_cp = with_cp
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.dcn = dcn
+ self.with_dcn = dcn is not None
+ self.gcb = gcb
+ self.with_gcb = gcb is not None
+ self.gen_attention = gen_attention
+ self.with_gen_attention = gen_attention is not None
+
+ if self.style == 'pytorch':
+ self.conv1_stride = 1
+ self.conv2_stride = stride
+ else:
+ self.conv1_stride = stride
+ self.conv2_stride = 1
+
+ self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1)
+ self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2)
+ self.norm3_name, norm3 = build_norm_layer(
+ norm_cfg, planes * self.expansion, postfix=3)
+
+ self.conv1 = build_conv_layer(
+ conv_cfg,
+ inplanes,
+ planes,
+ kernel_size=1,
+ stride=self.conv1_stride,
+ bias=False)
+ self.add_module(self.norm1_name, norm1)
+ fallback_on_stride = False
+ if self.with_dcn:
+ fallback_on_stride = dcn.pop('fallback_on_stride', False)
+ if not self.with_dcn or fallback_on_stride:
+ self.conv2 = build_conv_layer(
+ conv_cfg,
+ planes,
+ planes,
+ kernel_size=3,
+ stride=self.conv2_stride,
+ padding=dilation,
+ dilation=dilation,
+ bias=False)
+ else:
+ assert self.conv_cfg is None, 'conv_cfg cannot be None for DCN'
+ self.conv2 = build_conv_layer(
+ dcn,
+ planes,
+ planes,
+ kernel_size=3,
+ stride=self.conv2_stride,
+ padding=dilation,
+ dilation=dilation,
+ bias=False)
+
+ self.add_module(self.norm2_name, norm2)
+ self.conv3 = build_conv_layer(
+ conv_cfg,
+ planes,
+ planes * self.expansion,
+ kernel_size=1,
+ bias=False)
+ self.add_module(self.norm3_name, norm3)
+
+ self.relu = nn.ReLU(inplace=True)
+ self.downsample = downsample
+
+ if self.with_gcb:
+ gcb_inplanes = planes * self.expansion
+ self.context_block = ContextBlock(inplanes=gcb_inplanes, **gcb)
+
+ # gen_attention
+ if self.with_gen_attention:
+ self.gen_attention_block = GeneralizedAttention(
+ planes, **gen_attention)
+
+ @property
+ def norm1(self):
+ return getattr(self, self.norm1_name)
+
+ @property
+ def norm2(self):
+ return getattr(self, self.norm2_name)
+
+ @property
+ def norm3(self):
+ return getattr(self, self.norm3_name)
+
+ def forward(self, x):
+
+ def _inner_forward(x):
+ identity = x
+
+ out = self.conv1(x)
+ out = self.norm1(out)
+ out = self.relu(out)
+
+ out = self.conv2(out)
+ out = self.norm2(out)
+ out = self.relu(out)
+
+ if self.with_gen_attention:
+ out = self.gen_attention_block(out)
+
+ out = self.conv3(out)
+ out = self.norm3(out)
+
+ if self.with_gcb:
+ out = self.context_block(out)
+
+ if self.downsample is not None:
+ identity = self.downsample(x)
+
+ out += identity
+
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ out = self.relu(out)
+
+ return out
+
+
+def make_res_layer(block,
+ inplanes,
+ planes,
+ blocks,
+ stride=1,
+ dilation=1,
+ style='pytorch',
+ with_cp=False,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ dcn=None,
+ gcb=None,
+ gen_attention=None,
+ gen_attention_blocks=[]):
+ downsample = None
+ if stride != 1 or inplanes != planes * block.expansion:
+ downsample = nn.Sequential(
+ build_conv_layer(
+ conv_cfg,
+ inplanes,
+ planes * block.expansion,
+ kernel_size=1,
+ stride=stride,
+ bias=False),
+ build_norm_layer(norm_cfg, planes * block.expansion)[1],
+ )
+
+ layers = []
+ layers.append(
+ block(
+ inplanes=inplanes,
+ planes=planes,
+ stride=stride,
+ dilation=dilation,
+ downsample=downsample,
+ style=style,
+ with_cp=with_cp,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ dcn=dcn,
+ gcb=gcb,
+ gen_attention=gen_attention if
+ (0 in gen_attention_blocks) else None))
+ inplanes = planes * block.expansion
+ for i in range(1, blocks):
+ layers.append(
+ block(
+ inplanes=inplanes,
+ planes=planes,
+ stride=1,
+ dilation=dilation,
+ style=style,
+ with_cp=with_cp,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ dcn=dcn,
+ gcb=gcb,
+ gen_attention=gen_attention if
+ (i in gen_attention_blocks) else None))
+
+ return nn.Sequential(*layers)
+
+
+@BACKBONES.register_module
+class ResNet(nn.Module):
+ """ResNet backbone.
+
+ Args:
+ depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.
+ in_channels (int): Number of input image channels. Normally 3.
+ num_stages (int): Resnet stages, normally 4.
+ strides (Sequence[int]): Strides of the first block of each stage.
+ dilations (Sequence[int]): Dilation of each stage.
+ out_indices (Sequence[int]): Output from which stages.
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer.
+ frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+ -1 means not freezing any parameters.
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+ zero_init_residual (bool): whether to use zero init for last norm layer
+ in resblocks to let them behave as identity.
+
+ Example:
+ >>> from mmdet.models import ResNet
+ >>> import torch
+ >>> self = ResNet(depth=18)
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 32, 32)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 64, 8, 8)
+ (1, 128, 4, 4)
+ (1, 256, 2, 2)
+ (1, 512, 1, 1)
+ """
+
+ arch_settings = {
+ 18: (BasicBlock, (2, 2, 2, 2)),
+ 34: (BasicBlock, (3, 4, 6, 3)),
+ 50: (Bottleneck, (3, 4, 6, 3)),
+ 101: (Bottleneck, (3, 4, 23, 3)),
+ 152: (Bottleneck, (3, 8, 36, 3))
+ }
+
+ def __init__(self,
+ depth,
+ in_channels=3,
+ num_stages=4,
+ strides=(1, 2, 2, 2),
+ dilations=(1, 1, 1, 1),
+ out_indices=(0, 1, 2, 3),
+ style='pytorch',
+ frozen_stages=-1,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN', requires_grad=True),
+ norm_eval=True,
+ dcn=None,
+ stage_with_dcn=(False, False, False, False),
+ gcb=None,
+ stage_with_gcb=(False, False, False, False),
+ gen_attention=None,
+ stage_with_gen_attention=((), (), (), ()),
+ with_cp=False,
+ zero_init_residual=True):
+ super(ResNet, self).__init__()
+ if depth not in self.arch_settings:
+ raise KeyError('invalid depth {} for resnet'.format(depth))
+ self.depth = depth
+ self.num_stages = num_stages
+ assert num_stages >= 1 and num_stages <= 4
+ self.strides = strides
+ self.dilations = dilations
+ assert len(strides) == len(dilations) == num_stages
+ self.out_indices = out_indices
+ assert max(out_indices) < num_stages
+ self.style = style
+ self.frozen_stages = frozen_stages
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.with_cp = with_cp
+ self.norm_eval = norm_eval
+ self.dcn = dcn
+ self.stage_with_dcn = stage_with_dcn
+ if dcn is not None:
+ assert len(stage_with_dcn) == num_stages
+ self.gen_attention = gen_attention
+ self.gcb = gcb
+ self.stage_with_gcb = stage_with_gcb
+ if gcb is not None:
+ assert len(stage_with_gcb) == num_stages
+ self.zero_init_residual = zero_init_residual
+ self.block, stage_blocks = self.arch_settings[depth]
+ self.stage_blocks = stage_blocks[:num_stages]
+ self.inplanes = 64
+
+ self._make_stem_layer(in_channels)
+
+ self.res_layers = []
+ for i, num_blocks in enumerate(self.stage_blocks):
+ stride = strides[i]
+ dilation = dilations[i]
+ dcn = self.dcn if self.stage_with_dcn[i] else None
+ gcb = self.gcb if self.stage_with_gcb[i] else None
+ planes = 64 * 2**i
+ res_layer = make_res_layer(
+ self.block,
+ self.inplanes,
+ planes,
+ num_blocks,
+ stride=stride,
+ dilation=dilation,
+ style=self.style,
+ with_cp=with_cp,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ dcn=dcn,
+ gcb=gcb,
+ gen_attention=gen_attention,
+ gen_attention_blocks=stage_with_gen_attention[i])
+ self.inplanes = planes * self.block.expansion
+ layer_name = 'layer{}'.format(i + 1)
+ self.add_module(layer_name, res_layer)
+ self.res_layers.append(layer_name)
+
+ self._freeze_stages()
+
+ self.feat_dim = self.block.expansion * 64 * 2**(
+ len(self.stage_blocks) - 1)
+
+ @property
+ def norm1(self):
+ return getattr(self, self.norm1_name)
+
+ def _make_stem_layer(self, in_channels):
+ self.conv1 = build_conv_layer(
+ self.conv_cfg,
+ in_channels,
+ 64,
+ kernel_size=7,
+ stride=2,
+ padding=3,
+ bias=False)
+ self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, 64, postfix=1)
+ self.add_module(self.norm1_name, norm1)
+ self.relu = nn.ReLU(inplace=True)
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+
+ def _freeze_stages(self):
+ if self.frozen_stages >= 0:
+ self.norm1.eval()
+ for m in [self.conv1, self.norm1]:
+ for param in m.parameters():
+ param.requires_grad = False
+
+ for i in range(1, self.frozen_stages + 1):
+ m = getattr(self, 'layer{}'.format(i))
+ m.eval()
+ for param in m.parameters():
+ param.requires_grad = False
+
+ def init_weights(self, pretrained=None):
+ if isinstance(pretrained, str):
+ logger = get_root_logger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ kaiming_init(m)
+ elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+ constant_init(m, 1)
+
+ if self.dcn is not None:
+ for m in self.modules():
+ if isinstance(m, Bottleneck) and hasattr(
+ m, 'conv2_offset'):
+ constant_init(m.conv2_offset, 0)
+
+ if self.zero_init_residual:
+ for m in self.modules():
+ if isinstance(m, Bottleneck):
+ constant_init(m.norm3, 0)
+ elif isinstance(m, BasicBlock):
+ constant_init(m.norm2, 0)
+ else:
+ raise TypeError('pretrained must be a str or None')
+
+ def forward(self, x):
+ x = self.conv1(x)
+ x = self.norm1(x)
+ x = self.relu(x)
+ x = self.maxpool(x)
+ outs = []
+ for i, layer_name in enumerate(self.res_layers):
+ res_layer = getattr(self, layer_name)
+ x = res_layer(x)
+ if i in self.out_indices:
+ outs.append(x)
+ return tuple(outs)
+
+ def train(self, mode=True):
+ super(ResNet, self).train(mode)
+ self._freeze_stages()
+ if mode and self.norm_eval:
+ for m in self.modules():
+ # trick: eval have effect on BatchNorm only
+ if isinstance(m, _BatchNorm):
+ m.eval()
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/backbones/resnext.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/backbones/resnext.py
new file mode 100644
index 000000000..0c184abb6
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/backbones/resnext.py
@@ -0,0 +1,222 @@
+import math
+
+import torch.nn as nn
+
+from ..registry import BACKBONES
+from ..utils import build_conv_layer, build_norm_layer
+from .resnet import Bottleneck as _Bottleneck
+from .resnet import ResNet
+
+
+class Bottleneck(_Bottleneck):
+
+ def __init__(self, inplanes, planes, groups=1, base_width=4, **kwargs):
+ """Bottleneck block for ResNeXt.
+ If style is "pytorch", the stride-two layer is the 3x3 conv layer,
+ if it is "caffe", the stride-two layer is the first 1x1 conv layer.
+ """
+ super(Bottleneck, self).__init__(inplanes, planes, **kwargs)
+
+ if groups == 1:
+ width = self.planes
+ else:
+ width = math.floor(self.planes * (base_width / 64)) * groups
+
+ self.norm1_name, norm1 = build_norm_layer(
+ self.norm_cfg, width, postfix=1)
+ self.norm2_name, norm2 = build_norm_layer(
+ self.norm_cfg, width, postfix=2)
+ self.norm3_name, norm3 = build_norm_layer(
+ self.norm_cfg, self.planes * self.expansion, postfix=3)
+
+ self.conv1 = build_conv_layer(
+ self.conv_cfg,
+ self.inplanes,
+ width,
+ kernel_size=1,
+ stride=self.conv1_stride,
+ bias=False)
+ self.add_module(self.norm1_name, norm1)
+ fallback_on_stride = False
+ self.with_modulated_dcn = False
+ if self.with_dcn:
+ fallback_on_stride = self.dcn.pop('fallback_on_stride', False)
+ if not self.with_dcn or fallback_on_stride:
+ self.conv2 = build_conv_layer(
+ self.conv_cfg,
+ width,
+ width,
+ kernel_size=3,
+ stride=self.conv2_stride,
+ padding=self.dilation,
+ dilation=self.dilation,
+ groups=groups,
+ bias=False)
+ else:
+ assert self.conv_cfg is None, 'conv_cfg must be None for DCN'
+ self.conv2 = build_conv_layer(
+ self.dcn,
+ width,
+ width,
+ kernel_size=3,
+ stride=self.conv2_stride,
+ padding=self.dilation,
+ dilation=self.dilation,
+ groups=groups,
+ bias=False)
+
+ self.add_module(self.norm2_name, norm2)
+ self.conv3 = build_conv_layer(
+ self.conv_cfg,
+ width,
+ self.planes * self.expansion,
+ kernel_size=1,
+ bias=False)
+ self.add_module(self.norm3_name, norm3)
+
+
+def make_res_layer(block,
+ inplanes,
+ planes,
+ blocks,
+ stride=1,
+ dilation=1,
+ groups=1,
+ base_width=4,
+ style='pytorch',
+ with_cp=False,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ dcn=None,
+ gcb=None):
+ downsample = None
+ if stride != 1 or inplanes != planes * block.expansion:
+ downsample = nn.Sequential(
+ build_conv_layer(
+ conv_cfg,
+ inplanes,
+ planes * block.expansion,
+ kernel_size=1,
+ stride=stride,
+ bias=False),
+ build_norm_layer(norm_cfg, planes * block.expansion)[1],
+ )
+
+ layers = []
+ layers.append(
+ block(
+ inplanes=inplanes,
+ planes=planes,
+ stride=stride,
+ dilation=dilation,
+ downsample=downsample,
+ groups=groups,
+ base_width=base_width,
+ style=style,
+ with_cp=with_cp,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ dcn=dcn,
+ gcb=gcb))
+ inplanes = planes * block.expansion
+ for i in range(1, blocks):
+ layers.append(
+ block(
+ inplanes=inplanes,
+ planes=planes,
+ stride=1,
+ dilation=dilation,
+ groups=groups,
+ base_width=base_width,
+ style=style,
+ with_cp=with_cp,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ dcn=dcn,
+ gcb=gcb))
+
+ return nn.Sequential(*layers)
+
+
+@BACKBONES.register_module
+class ResNeXt(ResNet):
+ """ResNeXt backbone.
+
+ Args:
+ depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.
+ in_channels (int): Number of input image channels. Normally 3.
+ num_stages (int): Resnet stages, normally 4.
+ groups (int): Group of resnext.
+ base_width (int): Base width of resnext.
+ strides (Sequence[int]): Strides of the first block of each stage.
+ dilations (Sequence[int]): Dilation of each stage.
+ out_indices (Sequence[int]): Output from which stages.
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer.
+ frozen_stages (int): Stages to be frozen (all param fixed). -1 means
+ not freezing any parameters.
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ norm_eval (bool): Whether to set norm layers to eval mode, namely,
+ freeze running stats (mean and var). Note: Effect on Batch Norm
+ and its variants only.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+ zero_init_residual (bool): whether to use zero init for last norm layer
+ in resblocks to let them behave as identity.
+
+ Example:
+ >>> from mmdet.models import ResNeXt
+ >>> import torch
+ >>> self = ResNeXt(depth=50)
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 32, 32)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 256, 8, 8)
+ (1, 512, 4, 4)
+ (1, 1024, 2, 2)
+ (1, 2048, 1, 1)
+ """
+
+ arch_settings = {
+ 50: (Bottleneck, (3, 4, 6, 3)),
+ 101: (Bottleneck, (3, 4, 23, 3)),
+ 152: (Bottleneck, (3, 8, 36, 3))
+ }
+
+ def __init__(self, groups=1, base_width=4, **kwargs):
+ super(ResNeXt, self).__init__(**kwargs)
+ self.groups = groups
+ self.base_width = base_width
+
+ self.inplanes = 64
+ self.res_layers = []
+ for i, num_blocks in enumerate(self.stage_blocks):
+ stride = self.strides[i]
+ dilation = self.dilations[i]
+ dcn = self.dcn if self.stage_with_dcn[i] else None
+ gcb = self.gcb if self.stage_with_gcb[i] else None
+ planes = 64 * 2**i
+ res_layer = make_res_layer(
+ self.block,
+ self.inplanes,
+ planes,
+ num_blocks,
+ stride=stride,
+ dilation=dilation,
+ groups=self.groups,
+ base_width=self.base_width,
+ style=self.style,
+ with_cp=self.with_cp,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ dcn=dcn,
+ gcb=gcb)
+ self.inplanes = planes * self.block.expansion
+ layer_name = 'layer{}'.format(i + 1)
+ self.add_module(layer_name, res_layer)
+ self.res_layers.append(layer_name)
+
+ self._freeze_stages()
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/backbones/ssd_vgg.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/backbones/ssd_vgg.py
new file mode 100644
index 000000000..c7615e2a7
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/backbones/ssd_vgg.py
@@ -0,0 +1,153 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import VGG, constant_init, kaiming_init, normal_init, xavier_init
+from mmcv.runner import load_checkpoint
+
+from mmdet.utils import get_root_logger
+from ..registry import BACKBONES
+
+
+@BACKBONES.register_module
+class SSDVGG(VGG):
+ """VGG Backbone network for single-shot-detection
+
+ Args:
+ input_size (int): width and height of input, from {300, 512}.
+ depth (int): Depth of vgg, from {11, 13, 16, 19}.
+ out_indices (Sequence[int]): Output from which stages.
+
+ Example:
+ >>> self = SSDVGG(input_size=300, depth=11)
+ >>> self.eval()
+ >>> inputs = torch.rand(1, 3, 300, 300)
+ >>> level_outputs = self.forward(inputs)
+ >>> for level_out in level_outputs:
+ ... print(tuple(level_out.shape))
+ (1, 1024, 19, 19)
+ (1, 512, 10, 10)
+ (1, 256, 5, 5)
+ (1, 256, 3, 3)
+ (1, 256, 1, 1)
+ """
+ extra_setting = {
+ 300: (256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256),
+ 512: (256, 'S', 512, 128, 'S', 256, 128, 'S', 256, 128, 'S', 256, 128),
+ }
+
+ def __init__(self,
+ input_size,
+ depth,
+ with_last_pool=False,
+ ceil_mode=True,
+ out_indices=(3, 4),
+ out_feature_indices=(22, 34),
+ l2_norm_scale=20.):
+ # TODO: in_channels for mmcv.VGG
+ super(SSDVGG, self).__init__(
+ depth,
+ with_last_pool=with_last_pool,
+ ceil_mode=ceil_mode,
+ out_indices=out_indices)
+ assert input_size in (300, 512)
+ self.input_size = input_size
+
+ self.features.add_module(
+ str(len(self.features)),
+ nn.MaxPool2d(kernel_size=3, stride=1, padding=1))
+ self.features.add_module(
+ str(len(self.features)),
+ nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6))
+ self.features.add_module(
+ str(len(self.features)), nn.ReLU(inplace=True))
+ self.features.add_module(
+ str(len(self.features)), nn.Conv2d(1024, 1024, kernel_size=1))
+ self.features.add_module(
+ str(len(self.features)), nn.ReLU(inplace=True))
+ self.out_feature_indices = out_feature_indices
+
+ self.inplanes = 1024
+ self.extra = self._make_extra_layers(self.extra_setting[input_size])
+ self.l2_norm = L2Norm(
+ self.features[out_feature_indices[0] - 1].out_channels,
+ l2_norm_scale)
+
+ def init_weights(self, pretrained=None):
+ if isinstance(pretrained, str):
+ logger = get_root_logger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for m in self.features.modules():
+ if isinstance(m, nn.Conv2d):
+ kaiming_init(m)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+ elif isinstance(m, nn.Linear):
+ normal_init(m, std=0.01)
+ else:
+ raise TypeError('pretrained must be a str or None')
+
+ for m in self.extra.modules():
+ if isinstance(m, nn.Conv2d):
+ xavier_init(m, distribution='uniform')
+
+ constant_init(self.l2_norm, self.l2_norm.scale)
+
+ def forward(self, x):
+ outs = []
+ for i, layer in enumerate(self.features):
+ x = layer(x)
+ if i in self.out_feature_indices:
+ outs.append(x)
+ for i, layer in enumerate(self.extra):
+ x = F.relu(layer(x), inplace=True)
+ if i % 2 == 1:
+ outs.append(x)
+ outs[0] = self.l2_norm(outs[0])
+ if len(outs) == 1:
+ return outs[0]
+ else:
+ return tuple(outs)
+
+ def _make_extra_layers(self, outplanes):
+ layers = []
+ kernel_sizes = (1, 3)
+ num_layers = 0
+ outplane = None
+ for i in range(len(outplanes)):
+ if self.inplanes == 'S':
+ self.inplanes = outplane
+ continue
+ k = kernel_sizes[num_layers % 2]
+ if outplanes[i] == 'S':
+ outplane = outplanes[i + 1]
+ conv = nn.Conv2d(
+ self.inplanes, outplane, k, stride=2, padding=1)
+ else:
+ outplane = outplanes[i]
+ conv = nn.Conv2d(
+ self.inplanes, outplane, k, stride=1, padding=0)
+ layers.append(conv)
+ self.inplanes = outplanes[i]
+ num_layers += 1
+ if self.input_size == 512:
+ layers.append(nn.Conv2d(self.inplanes, 256, 4, padding=1))
+
+ return nn.Sequential(*layers)
+
+
+class L2Norm(nn.Module):
+
+ def __init__(self, n_dims, scale=20., eps=1e-10):
+ super(L2Norm, self).__init__()
+ self.n_dims = n_dims
+ self.weight = nn.Parameter(torch.Tensor(self.n_dims))
+ self.eps = eps
+ self.scale = scale
+
+ def forward(self, x):
+ # normalization layer convert to FP32 in FP16 training
+ x_float = x.float()
+ norm = x_float.pow(2).sum(1, keepdim=True).sqrt() + self.eps
+ return (self.weight[None, :, None, None].float().expand_as(x_float) *
+ x_float / norm).type_as(x)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/bbox_heads/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/bbox_heads/__init__.py
new file mode 100644
index 000000000..a668bdb01
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/bbox_heads/__init__.py
@@ -0,0 +1,7 @@
+from .bbox_head import BBoxHead
+from .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead
+from .double_bbox_head import DoubleConvFCBBoxHead
+
+__all__ = [
+ 'BBoxHead', 'ConvFCBBoxHead', 'SharedFCBBoxHead', 'DoubleConvFCBBoxHead'
+]
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/bbox_heads/bbox_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/bbox_heads/bbox_head.py
new file mode 100644
index 000000000..8ab878a01
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/bbox_heads/bbox_head.py
@@ -0,0 +1,282 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.modules.utils import _pair
+
+from mmdet.core import (auto_fp16, bbox_target, delta2bbox, force_fp32,
+ multiclass_nms)
+from ..builder import build_loss
+from ..losses import accuracy
+from ..registry import HEADS
+
+
+@HEADS.register_module
+class BBoxHead(nn.Module):
+ """Simplest RoI head, with only two fc layers for classification and
+ regression respectively"""
+
+ def __init__(self,
+ with_avg_pool=False,
+ with_cls=True,
+ with_reg=True,
+ roi_feat_size=7,
+ in_channels=256,
+ num_classes=81,
+ target_means=[0., 0., 0., 0.],
+ target_stds=[0.1, 0.1, 0.2, 0.2],
+ reg_class_agnostic=False,
+ loss_cls=dict(
+ type='CrossEntropyLoss',
+ use_sigmoid=False,
+ loss_weight=1.0),
+ loss_bbox=dict(
+ type='SmoothL1Loss', beta=1.0, loss_weight=1.0)):
+ super(BBoxHead, self).__init__()
+ assert with_cls or with_reg
+ self.with_avg_pool = with_avg_pool
+ self.with_cls = with_cls
+ self.with_reg = with_reg
+ self.roi_feat_size = _pair(roi_feat_size)
+ self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1]
+ self.in_channels = in_channels
+ self.num_classes = num_classes
+ self.target_means = target_means
+ self.target_stds = target_stds
+ self.reg_class_agnostic = reg_class_agnostic
+ self.fp16_enabled = False
+
+ self.loss_cls = build_loss(loss_cls)
+ self.loss_bbox = build_loss(loss_bbox)
+
+ in_channels = self.in_channels
+ if self.with_avg_pool:
+ self.avg_pool = nn.AvgPool2d(self.roi_feat_size)
+ else:
+ in_channels *= self.roi_feat_area
+ if self.with_cls:
+ self.fc_cls = nn.Linear(in_channels, num_classes)
+ if self.with_reg:
+ out_dim_reg = 4 if reg_class_agnostic else 4 * num_classes
+ self.fc_reg = nn.Linear(in_channels, out_dim_reg)
+ self.debug_imgs = None
+
+ def init_weights(self):
+ if self.with_cls:
+ nn.init.normal_(self.fc_cls.weight, 0, 0.01)
+ nn.init.constant_(self.fc_cls.bias, 0)
+ if self.with_reg:
+ nn.init.normal_(self.fc_reg.weight, 0, 0.001)
+ nn.init.constant_(self.fc_reg.bias, 0)
+
+ @auto_fp16()
+ def forward(self, x):
+ if self.with_avg_pool:
+ x = self.avg_pool(x)
+ x = x.view(x.size(0), -1)
+ cls_score = self.fc_cls(x) if self.with_cls else None
+ bbox_pred = self.fc_reg(x) if self.with_reg else None
+ return cls_score, bbox_pred
+
+ def get_target(self, sampling_results, gt_bboxes, gt_labels,
+ rcnn_train_cfg):
+ pos_proposals = [res.pos_bboxes for res in sampling_results]
+ neg_proposals = [res.neg_bboxes for res in sampling_results]
+ pos_gt_bboxes = [res.pos_gt_bboxes for res in sampling_results]
+ pos_gt_labels = [res.pos_gt_labels for res in sampling_results]
+ reg_classes = 1 if self.reg_class_agnostic else self.num_classes
+ cls_reg_targets = bbox_target(
+ pos_proposals,
+ neg_proposals,
+ pos_gt_bboxes,
+ pos_gt_labels,
+ rcnn_train_cfg,
+ reg_classes,
+ target_means=self.target_means,
+ target_stds=self.target_stds)
+ return cls_reg_targets
+
+ @force_fp32(apply_to=('cls_score', 'bbox_pred'))
+ def loss(self,
+ cls_score,
+ bbox_pred,
+ labels,
+ label_weights,
+ bbox_targets,
+ bbox_weights,
+ reduction_override=None):
+ losses = dict()
+ if cls_score is not None:
+ avg_factor = max(torch.sum(label_weights > 0).float().item(), 1.)
+ if cls_score.numel() > 0:
+ losses['loss_cls'] = self.loss_cls(
+ cls_score,
+ labels,
+ label_weights,
+ avg_factor=avg_factor,
+ reduction_override=reduction_override)
+ losses['acc'] = accuracy(cls_score, labels)
+ if bbox_pred is not None:
+ pos_inds = labels > 0
+ if pos_inds.any():
+ if self.reg_class_agnostic:
+ pos_bbox_pred = bbox_pred.view(bbox_pred.size(0),
+ 4)[pos_inds]
+ else:
+ pos_bbox_pred = bbox_pred.view(bbox_pred.size(0), -1,
+ 4)[pos_inds,
+ labels[pos_inds]]
+ losses['loss_bbox'] = self.loss_bbox(
+ pos_bbox_pred,
+ bbox_targets[pos_inds],
+ bbox_weights[pos_inds],
+ avg_factor=bbox_targets.size(0),
+ reduction_override=reduction_override)
+ return losses
+
+ @force_fp32(apply_to=('cls_score', 'bbox_pred'))
+ def get_det_bboxes(self,
+ rois,
+ cls_score,
+ bbox_pred,
+ img_shape,
+ scale_factor,
+ rescale=False,
+ cfg=None):
+ if isinstance(cls_score, list):
+ cls_score = sum(cls_score) / float(len(cls_score))
+ scores = F.softmax(cls_score, dim=1) if cls_score is not None else None
+
+ if bbox_pred is not None:
+ bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means,
+ self.target_stds, img_shape)
+ else:
+ bboxes = rois[:, 1:].clone()
+ if img_shape is not None:
+ bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1] - 1)
+ bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0] - 1)
+
+ if rescale:
+ if isinstance(scale_factor, float):
+ bboxes /= scale_factor
+ else:
+ scale_factor = torch.from_numpy(scale_factor).to(bboxes.device)
+ bboxes = (bboxes.view(bboxes.size(0), -1, 4) /
+ scale_factor).view(bboxes.size()[0], -1)
+
+ if cfg is None:
+ return bboxes, scores
+ else:
+ det_bboxes, det_labels = multiclass_nms(bboxes, scores,
+ cfg.score_thr, cfg.nms,
+ cfg.max_per_img)
+
+ return det_bboxes, det_labels
+
+ @force_fp32(apply_to=('bbox_preds', ))
+ def refine_bboxes(self, rois, labels, bbox_preds, pos_is_gts, img_metas):
+ """Refine bboxes during training.
+
+ Args:
+ rois (Tensor): Shape (n*bs, 5), where n is image number per GPU,
+ and bs is the sampled RoIs per image. The first column is
+ the image id and the next 4 columns are x1, y1, x2, y2.
+ labels (Tensor): Shape (n*bs, ).
+ bbox_preds (Tensor): Shape (n*bs, 4) or (n*bs, 4*#class).
+ pos_is_gts (list[Tensor]): Flags indicating if each positive bbox
+ is a gt bbox.
+ img_metas (list[dict]): Meta info of each image.
+
+ Returns:
+ list[Tensor]: Refined bboxes of each image in a mini-batch.
+
+ Example:
+ >>> # xdoctest: +REQUIRES(module:kwarray)
+ >>> import kwarray
+ >>> import numpy as np
+ >>> from mmdet.core.bbox.demodata import random_boxes
+ >>> self = BBoxHead(reg_class_agnostic=True)
+ >>> n_roi = 2
+ >>> n_img = 4
+ >>> scale = 512
+ >>> rng = np.random.RandomState(0)
+ >>> img_metas = [{'img_shape': (scale, scale)}
+ ... for _ in range(n_img)]
+ >>> # Create rois in the expected format
+ >>> roi_boxes = random_boxes(n_roi, scale=scale, rng=rng)
+ >>> img_ids = torch.randint(0, n_img, (n_roi,))
+ >>> img_ids = img_ids.float()
+ >>> rois = torch.cat([img_ids[:, None], roi_boxes], dim=1)
+ >>> # Create other args
+ >>> labels = torch.randint(0, 2, (n_roi,)).long()
+ >>> bbox_preds = random_boxes(n_roi, scale=scale, rng=rng)
+ >>> # For each image, pretend random positive boxes are gts
+ >>> is_label_pos = (labels.numpy() > 0).astype(np.int)
+ >>> lbl_per_img = kwarray.group_items(is_label_pos,
+ ... img_ids.numpy())
+ >>> pos_per_img = [sum(lbl_per_img.get(gid, []))
+ ... for gid in range(n_img)]
+ >>> pos_is_gts = [
+ >>> torch.randint(0, 2, (npos,)).byte().sort(
+ >>> descending=True)[0]
+ >>> for npos in pos_per_img
+ >>> ]
+ >>> bboxes_list = self.refine_bboxes(rois, labels, bbox_preds,
+ >>> pos_is_gts, img_metas)
+ >>> print(bboxes_list)
+ """
+ img_ids = rois[:, 0].long().unique(sorted=True)
+ assert img_ids.numel() <= len(img_metas)
+
+ bboxes_list = []
+ for i in range(len(img_metas)):
+ inds = torch.nonzero(rois[:, 0] == i).squeeze(dim=1)
+ num_rois = inds.numel()
+
+ bboxes_ = rois[inds, 1:]
+ label_ = labels[inds]
+ bbox_pred_ = bbox_preds[inds]
+ img_meta_ = img_metas[i]
+ pos_is_gts_ = pos_is_gts[i]
+
+ bboxes = self.regress_by_class(bboxes_, label_, bbox_pred_,
+ img_meta_)
+
+ # filter gt bboxes
+ pos_keep = 1 - pos_is_gts_
+ keep_inds = pos_is_gts_.new_ones(num_rois)
+ keep_inds[:len(pos_is_gts_)] = pos_keep
+
+ bboxes_list.append(bboxes[keep_inds])
+
+ return bboxes_list
+
+ @force_fp32(apply_to=('bbox_pred', ))
+ def regress_by_class(self, rois, label, bbox_pred, img_meta):
+ """Regress the bbox for the predicted class. Used in Cascade R-CNN.
+
+ Args:
+ rois (Tensor): shape (n, 4) or (n, 5)
+ label (Tensor): shape (n, )
+ bbox_pred (Tensor): shape (n, 4*(#class+1)) or (n, 4)
+ img_meta (dict): Image meta info.
+
+ Returns:
+ Tensor: Regressed bboxes, the same shape as input rois.
+ """
+ assert rois.size(1) == 4 or rois.size(1) == 5, repr(rois.shape)
+
+ if not self.reg_class_agnostic:
+ label = label * 4
+ inds = torch.stack((label, label + 1, label + 2, label + 3), 1)
+ bbox_pred = torch.gather(bbox_pred, 1, inds)
+ assert bbox_pred.size(1) == 4
+
+ if rois.size(1) == 4:
+ new_rois = delta2bbox(rois, bbox_pred, self.target_means,
+ self.target_stds, img_meta['img_shape'])
+ else:
+ bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means,
+ self.target_stds, img_meta['img_shape'])
+ new_rois = torch.cat((rois[:, [0]], bboxes), dim=1)
+
+ return new_rois
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/bbox_heads/convfc_bbox_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/bbox_heads/convfc_bbox_head.py
new file mode 100644
index 000000000..f0f89778e
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/bbox_heads/convfc_bbox_head.py
@@ -0,0 +1,187 @@
+import torch.nn as nn
+
+from ..registry import HEADS
+from ..utils import ConvModule
+from .bbox_head import BBoxHead
+
+
+@HEADS.register_module
+class ConvFCBBoxHead(BBoxHead):
+ r"""More general bbox head, with shared conv and fc layers and two optional
+ separated branches.
+
+ /-> cls convs -> cls fcs -> cls
+ shared convs -> shared fcs
+ \-> reg convs -> reg fcs -> reg
+ """ # noqa: W605
+
+ def __init__(self,
+ num_shared_convs=0,
+ num_shared_fcs=0,
+ num_cls_convs=0,
+ num_cls_fcs=0,
+ num_reg_convs=0,
+ num_reg_fcs=0,
+ conv_out_channels=256,
+ fc_out_channels=1024,
+ conv_cfg=None,
+ norm_cfg=None,
+ *args,
+ **kwargs):
+ super(ConvFCBBoxHead, self).__init__(*args, **kwargs)
+ assert (num_shared_convs + num_shared_fcs + num_cls_convs +
+ num_cls_fcs + num_reg_convs + num_reg_fcs > 0)
+ if num_cls_convs > 0 or num_reg_convs > 0:
+ assert num_shared_fcs == 0
+ if not self.with_cls:
+ assert num_cls_convs == 0 and num_cls_fcs == 0
+ if not self.with_reg:
+ assert num_reg_convs == 0 and num_reg_fcs == 0
+ self.num_shared_convs = num_shared_convs
+ self.num_shared_fcs = num_shared_fcs
+ self.num_cls_convs = num_cls_convs
+ self.num_cls_fcs = num_cls_fcs
+ self.num_reg_convs = num_reg_convs
+ self.num_reg_fcs = num_reg_fcs
+ self.conv_out_channels = conv_out_channels
+ self.fc_out_channels = fc_out_channels
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+
+ # add shared convs and fcs
+ self.shared_convs, self.shared_fcs, last_layer_dim = \
+ self._add_conv_fc_branch(
+ self.num_shared_convs, self.num_shared_fcs, self.in_channels,
+ True)
+ self.shared_out_channels = last_layer_dim
+
+ # add cls specific branch
+ self.cls_convs, self.cls_fcs, self.cls_last_dim = \
+ self._add_conv_fc_branch(
+ self.num_cls_convs, self.num_cls_fcs, self.shared_out_channels)
+
+ # add reg specific branch
+ self.reg_convs, self.reg_fcs, self.reg_last_dim = \
+ self._add_conv_fc_branch(
+ self.num_reg_convs, self.num_reg_fcs, self.shared_out_channels)
+
+ if self.num_shared_fcs == 0 and not self.with_avg_pool:
+ if self.num_cls_fcs == 0:
+ self.cls_last_dim *= self.roi_feat_area
+ if self.num_reg_fcs == 0:
+ self.reg_last_dim *= self.roi_feat_area
+
+ self.relu = nn.ReLU(inplace=True)
+ # reconstruct fc_cls and fc_reg since input channels are changed
+ if self.with_cls:
+ self.fc_cls = nn.Linear(self.cls_last_dim, self.num_classes)
+ if self.with_reg:
+ out_dim_reg = (4 if self.reg_class_agnostic else 4 *
+ self.num_classes)
+ self.fc_reg = nn.Linear(self.reg_last_dim, out_dim_reg)
+
+ def _add_conv_fc_branch(self,
+ num_branch_convs,
+ num_branch_fcs,
+ in_channels,
+ is_shared=False):
+ """Add shared or separable branch
+
+ convs -> avg pool (optional) -> fcs
+ """
+ last_layer_dim = in_channels
+ # add branch specific conv layers
+ branch_convs = nn.ModuleList()
+ if num_branch_convs > 0:
+ for i in range(num_branch_convs):
+ conv_in_channels = (
+ last_layer_dim if i == 0 else self.conv_out_channels)
+ branch_convs.append(
+ ConvModule(
+ conv_in_channels,
+ self.conv_out_channels,
+ 3,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg))
+ last_layer_dim = self.conv_out_channels
+ # add branch specific fc layers
+ branch_fcs = nn.ModuleList()
+ if num_branch_fcs > 0:
+ # for shared branch, only consider self.with_avg_pool
+ # for separated branches, also consider self.num_shared_fcs
+ if (is_shared
+ or self.num_shared_fcs == 0) and not self.with_avg_pool:
+ last_layer_dim *= self.roi_feat_area
+ for i in range(num_branch_fcs):
+ fc_in_channels = (
+ last_layer_dim if i == 0 else self.fc_out_channels)
+ branch_fcs.append(
+ nn.Linear(fc_in_channels, self.fc_out_channels))
+ last_layer_dim = self.fc_out_channels
+ return branch_convs, branch_fcs, last_layer_dim
+
+ def init_weights(self):
+ super(ConvFCBBoxHead, self).init_weights()
+ for module_list in [self.shared_fcs, self.cls_fcs, self.reg_fcs]:
+ for m in module_list.modules():
+ if isinstance(m, nn.Linear):
+ nn.init.xavier_uniform_(m.weight)
+ nn.init.constant_(m.bias, 0)
+
+ def forward(self, x):
+ # shared part
+ if self.num_shared_convs > 0:
+ for conv in self.shared_convs:
+ x = conv(x)
+
+ if self.num_shared_fcs > 0:
+ if self.with_avg_pool:
+ x = self.avg_pool(x)
+
+ x = x.flatten(1)
+
+ for fc in self.shared_fcs:
+ x = self.relu(fc(x))
+ # separate branches
+ x_cls = x
+ x_reg = x
+
+ for conv in self.cls_convs:
+ x_cls = conv(x_cls)
+ if x_cls.dim() > 2:
+ if self.with_avg_pool:
+ x_cls = self.avg_pool(x_cls)
+ x_cls = x_cls.flatten(1)
+ for fc in self.cls_fcs:
+ x_cls = self.relu(fc(x_cls))
+
+ for conv in self.reg_convs:
+ x_reg = conv(x_reg)
+ if x_reg.dim() > 2:
+ if self.with_avg_pool:
+ x_reg = self.avg_pool(x_reg)
+ x_reg = x_reg.flatten(1)
+ for fc in self.reg_fcs:
+ x_reg = self.relu(fc(x_reg))
+
+ cls_score = self.fc_cls(x_cls) if self.with_cls else None
+ bbox_pred = self.fc_reg(x_reg) if self.with_reg else None
+ return cls_score, bbox_pred
+
+
+@HEADS.register_module
+class SharedFCBBoxHead(ConvFCBBoxHead):
+
+ def __init__(self, num_fcs=2, fc_out_channels=1024, *args, **kwargs):
+ assert num_fcs >= 1
+ super(SharedFCBBoxHead, self).__init__(
+ num_shared_convs=0,
+ num_shared_fcs=num_fcs,
+ num_cls_convs=0,
+ num_cls_fcs=0,
+ num_reg_convs=0,
+ num_reg_fcs=0,
+ fc_out_channels=fc_out_channels,
+ *args,
+ **kwargs)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/bbox_heads/double_bbox_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/bbox_heads/double_bbox_head.py
new file mode 100644
index 000000000..c8a0e2699
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/bbox_heads/double_bbox_head.py
@@ -0,0 +1,170 @@
+import torch.nn as nn
+from mmcv.cnn.weight_init import normal_init, xavier_init
+
+from ..backbones.resnet import Bottleneck
+from ..registry import HEADS
+from ..utils import ConvModule
+from .bbox_head import BBoxHead
+
+
+class BasicResBlock(nn.Module):
+ """Basic residual block.
+
+ This block is a little different from the block in the ResNet backbone.
+ The kernel size of conv1 is 1 in this block while 3 in ResNet BasicBlock.
+
+ Args:
+ in_channels (int): Channels of the input feature map.
+ out_channels (int): Channels of the output feature map.
+ conv_cfg (dict): The config dict for convolution layers.
+ norm_cfg (dict): The config dict for normalization layers.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN')):
+ super(BasicResBlock, self).__init__()
+
+ # main path
+ self.conv1 = ConvModule(
+ in_channels,
+ in_channels,
+ kernel_size=3,
+ padding=1,
+ bias=False,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg)
+ self.conv2 = ConvModule(
+ in_channels,
+ out_channels,
+ kernel_size=1,
+ bias=False,
+ activation=None,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg)
+
+ # identity path
+ self.conv_identity = ConvModule(
+ in_channels,
+ out_channels,
+ kernel_size=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ activation=None)
+
+ self.relu = nn.ReLU(inplace=True)
+
+ def forward(self, x):
+ identity = x
+
+ x = self.conv1(x)
+ x = self.conv2(x)
+
+ identity = self.conv_identity(identity)
+ out = x + identity
+
+ out = self.relu(out)
+ return out
+
+
+@HEADS.register_module
+class DoubleConvFCBBoxHead(BBoxHead):
+ r"""Bbox head used in Double-Head R-CNN
+
+ /-> cls
+ /-> shared convs ->
+ \-> reg
+ roi features
+ /-> cls
+ \-> shared fc ->
+ \-> reg
+ """ # noqa: W605
+
+ def __init__(self,
+ num_convs=0,
+ num_fcs=0,
+ conv_out_channels=1024,
+ fc_out_channels=1024,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ **kwargs):
+ kwargs.setdefault('with_avg_pool', True)
+ super(DoubleConvFCBBoxHead, self).__init__(**kwargs)
+ assert self.with_avg_pool
+ assert num_convs > 0
+ assert num_fcs > 0
+ self.num_convs = num_convs
+ self.num_fcs = num_fcs
+ self.conv_out_channels = conv_out_channels
+ self.fc_out_channels = fc_out_channels
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+
+ # increase the channel of input features
+ self.res_block = BasicResBlock(self.in_channels,
+ self.conv_out_channels)
+
+ # add conv heads
+ self.conv_branch = self._add_conv_branch()
+ # add fc heads
+ self.fc_branch = self._add_fc_branch()
+
+ out_dim_reg = 4 if self.reg_class_agnostic else 4 * self.num_classes
+ self.fc_reg = nn.Linear(self.conv_out_channels, out_dim_reg)
+
+ self.fc_cls = nn.Linear(self.fc_out_channels, self.num_classes)
+ self.relu = nn.ReLU(inplace=True)
+
+ def _add_conv_branch(self):
+ """Add the fc branch which consists of a sequential of conv layers"""
+ branch_convs = nn.ModuleList()
+ for i in range(self.num_convs):
+ branch_convs.append(
+ Bottleneck(
+ inplanes=self.conv_out_channels,
+ planes=self.conv_out_channels // 4,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg))
+ return branch_convs
+
+ def _add_fc_branch(self):
+ """Add the fc branch which consists of a sequential of fc layers"""
+ branch_fcs = nn.ModuleList()
+ for i in range(self.num_fcs):
+ fc_in_channels = (
+ self.in_channels *
+ self.roi_feat_area if i == 0 else self.fc_out_channels)
+ branch_fcs.append(nn.Linear(fc_in_channels, self.fc_out_channels))
+ return branch_fcs
+
+ def init_weights(self):
+ normal_init(self.fc_cls, std=0.01)
+ normal_init(self.fc_reg, std=0.001)
+
+ for m in self.fc_branch.modules():
+ if isinstance(m, nn.Linear):
+ xavier_init(m, distribution='uniform')
+
+ def forward(self, x_cls, x_reg):
+ # conv head
+ x_conv = self.res_block(x_reg)
+
+ for conv in self.conv_branch:
+ x_conv = conv(x_conv)
+
+ if self.with_avg_pool:
+ x_conv = self.avg_pool(x_conv)
+
+ x_conv = x_conv.view(x_conv.size(0), -1)
+ bbox_pred = self.fc_reg(x_conv)
+
+ # fc head
+ x_fc = x_cls.view(x_cls.size(0), -1)
+ for fc in self.fc_branch:
+ x_fc = self.relu(fc(x_fc))
+
+ cls_score = self.fc_cls(x_fc)
+
+ return cls_score, bbox_pred
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/builder.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/builder.py
new file mode 100644
index 000000000..dc82ab711
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/builder.py
@@ -0,0 +1,43 @@
+from torch import nn
+
+from mmdet.utils import build_from_cfg
+from .registry import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS,
+ ROI_EXTRACTORS, SHARED_HEADS)
+
+
+def build(cfg, registry, default_args=None):
+ if isinstance(cfg, list):
+ modules = [
+ build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg
+ ]
+ return nn.Sequential(*modules)
+ else:
+ return build_from_cfg(cfg, registry, default_args)
+
+
+def build_backbone(cfg):
+ return build(cfg, BACKBONES)
+
+
+def build_neck(cfg):
+ return build(cfg, NECKS)
+
+
+def build_roi_extractor(cfg):
+ return build(cfg, ROI_EXTRACTORS)
+
+
+def build_shared_head(cfg):
+ return build(cfg, SHARED_HEADS)
+
+
+def build_head(cfg):
+ return build(cfg, HEADS)
+
+
+def build_loss(cfg):
+ return build(cfg, LOSSES)
+
+
+def build_detector(cfg, train_cfg=None, test_cfg=None):
+ return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/__init__.py
new file mode 100644
index 000000000..e7aad355d
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/__init__.py
@@ -0,0 +1,27 @@
+from .atss import ATSS
+from .base import BaseDetector
+from .cascade_rcnn import CascadeRCNN
+from .double_head_rcnn import DoubleHeadRCNN
+from .fast_rcnn import FastRCNN
+from .faster_rcnn import FasterRCNN
+from .fcos import FCOS
+from .fovea import FOVEA
+from .grid_rcnn import GridRCNN
+from .htc import HybridTaskCascade
+from .mask_rcnn import MaskRCNN
+from .mask_scoring_rcnn import MaskScoringRCNN
+from .reppoints_detector import RepPointsDetector
+from .retinanet import RetinaNet
+from .rpn import RPN
+from .single_stage import SingleStageDetector
+from .single_stage_ins import SingleStageInsDetector
+from .two_stage import TwoStageDetector
+from .solo import SOLO
+from .solov2 import SOLOv2
+
+__all__ = [
+ 'ATSS', 'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN',
+ 'FastRCNN', 'FasterRCNN', 'MaskRCNN', 'CascadeRCNN', 'HybridTaskCascade',
+ 'DoubleHeadRCNN', 'RetinaNet', 'FCOS', 'GridRCNN', 'MaskScoringRCNN',
+ 'RepPointsDetector', 'FOVEA', 'SingleStageInsDetector', 'SOLO', 'SOLOv2'
+]
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/atss.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/atss.py
new file mode 100644
index 000000000..ac22bf928
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/atss.py
@@ -0,0 +1,16 @@
+from ..registry import DETECTORS
+from .single_stage import SingleStageDetector
+
+
+@DETECTORS.register_module
+class ATSS(SingleStageDetector):
+
+ def __init__(self,
+ backbone,
+ neck,
+ bbox_head,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None):
+ super(ATSS, self).__init__(backbone, neck, bbox_head, train_cfg,
+ test_cfg, pretrained)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/base.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/base.py
new file mode 100644
index 000000000..82f91bd10
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/base.py
@@ -0,0 +1,193 @@
+from abc import ABCMeta, abstractmethod
+
+import mmcv
+import numpy as np
+import pycocotools.mask as maskUtils
+import torch.nn as nn
+
+from mmdet.core import auto_fp16, get_classes, tensor2imgs
+from mmdet.utils import print_log
+
+
+class BaseDetector(nn.Module, metaclass=ABCMeta):
+ """Base class for detectors"""
+
+ def __init__(self):
+ super(BaseDetector, self).__init__()
+ self.fp16_enabled = False
+
+ @property
+ def with_neck(self):
+ return hasattr(self, 'neck') and self.neck is not None
+
+ @property
+ def with_mask_feat_head(self):
+ return hasattr(self, 'mask_feat_head') and \
+ self.mask_feat_head is not None
+
+ @property
+ def with_shared_head(self):
+ return hasattr(self, 'shared_head') and self.shared_head is not None
+
+ @property
+ def with_bbox(self):
+ return hasattr(self, 'bbox_head') and self.bbox_head is not None
+
+ @property
+ def with_mask(self):
+ return hasattr(self, 'mask_head') and self.mask_head is not None
+
+ @abstractmethod
+ def extract_feat(self, imgs):
+ pass
+
+ def extract_feats(self, imgs):
+ assert isinstance(imgs, list)
+ for img in imgs:
+ yield self.extract_feat(img)
+
+ @abstractmethod
+ def forward_train(self, imgs, img_metas, **kwargs):
+ """
+ Args:
+ img (list[Tensor]): list of tensors of shape (1, C, H, W).
+ Typically these should be mean centered and std scaled.
+
+ img_metas (list[dict]): list of image info dict where each dict
+ has:
+ 'img_shape', 'scale_factor', 'flip', and my also contain
+ 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+ For details on the values of these keys see
+ `mmdet/datasets/pipelines/formatting.py:Collect`.
+
+ **kwargs: specific to concrete implementation
+ """
+ pass
+
+ async def async_simple_test(self, img, img_meta, **kwargs):
+ raise NotImplementedError
+
+ @abstractmethod
+ def simple_test(self, img, img_meta, **kwargs):
+ pass
+
+ @abstractmethod
+ def aug_test(self, imgs, img_metas, **kwargs):
+ pass
+
+ def init_weights(self, pretrained=None):
+ if pretrained is not None:
+ print_log('load model from: {}'.format(pretrained), logger='root')
+
+ async def aforward_test(self, *, img, img_meta, **kwargs):
+ for var, name in [(img, 'img'), (img_meta, 'img_meta')]:
+ if not isinstance(var, list):
+ raise TypeError('{} must be a list, but got {}'.format(
+ name, type(var)))
+
+ num_augs = len(img)
+ if num_augs != len(img_meta):
+ raise ValueError(
+ 'num of augmentations ({}) != num of image meta ({})'.format(
+ len(img), len(img_meta)))
+ # TODO: remove the restriction of imgs_per_gpu == 1 when prepared
+ imgs_per_gpu = img[0].size(0)
+ assert imgs_per_gpu == 1
+
+ if num_augs == 1:
+ return await self.async_simple_test(img[0], img_meta[0], **kwargs)
+ else:
+ raise NotImplementedError
+
+ def forward_test(self, imgs, img_metas, **kwargs):
+ """
+ Args:
+ imgs (List[Tensor]): the outer list indicates test-time
+ augmentations and inner Tensor should have a shape NxCxHxW,
+ which contains all images in the batch.
+ img_meta (List[List[dict]]): the outer list indicates test-time
+ augs (multiscale, flip, etc.) and the inner list indicates
+ images in a batch
+ """
+ for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]:
+ if not isinstance(var, list):
+ raise TypeError('{} must be a list, but got {}'.format(
+ name, type(var)))
+
+ num_augs = len(imgs)
+ if num_augs != len(img_metas):
+ raise ValueError(
+ 'num of augmentations ({}) != num of image meta ({})'.format(
+ len(imgs), len(img_metas)))
+ # TODO: remove the restriction of imgs_per_gpu == 1 when prepared
+ imgs_per_gpu = imgs[0].size(0)
+ assert imgs_per_gpu == 1
+
+ if num_augs == 1:
+ return self.simple_test(imgs[0], img_metas[0], **kwargs)
+ else:
+ return self.aug_test(imgs, img_metas, **kwargs)
+
+ @auto_fp16(apply_to=('img', ))
+ def forward(self, img, img_meta, return_loss=True, **kwargs):
+ """
+ Calls either forward_train or forward_test depending on whether
+ return_loss=True. Note this setting will change the expected inputs.
+ When `return_loss=True`, img and img_meta are single-nested (i.e.
+ Tensor and List[dict]), and when `resturn_loss=False`, img and img_meta
+ should be double nested (i.e. List[Tensor], List[List[dict]]), with
+ the outer list indicating test time augmentations.
+ """
+ if return_loss:
+ return self.forward_train(img, img_meta, **kwargs)
+ else:
+ return self.forward_test(img, img_meta, **kwargs)
+
+ def show_result(self, data, result, dataset=None, score_thr=0.3):
+ if isinstance(result, tuple):
+ bbox_result, segm_result = result
+ else:
+ bbox_result, segm_result = result, None
+
+ img_tensor = data['img'][0]
+ img_metas = data['img_meta'][0].data[0]
+ imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
+ assert len(imgs) == len(img_metas)
+
+ if dataset is None:
+ class_names = self.CLASSES
+ elif isinstance(dataset, str):
+ class_names = get_classes(dataset)
+ elif isinstance(dataset, (list, tuple)):
+ class_names = dataset
+ else:
+ raise TypeError(
+ 'dataset must be a valid dataset name or a sequence'
+ ' of class names, not {}'.format(type(dataset)))
+
+ for img, img_meta in zip(imgs, img_metas):
+ h, w, _ = img_meta['img_shape']
+ img_show = img[:h, :w, :]
+
+ bboxes = np.vstack(bbox_result)
+ # draw segmentation masks
+ if segm_result is not None:
+ segms = mmcv.concat_list(segm_result)
+ inds = np.where(bboxes[:, -1] > score_thr)[0]
+ for i in inds:
+ color_mask = np.random.randint(
+ 0, 256, (1, 3), dtype=np.uint8)
+ mask = maskUtils.decode(segms[i]).astype(np.bool)
+ img_show[mask] = img_show[mask] * 0.5 + color_mask * 0.5
+ # draw bounding boxes
+ labels = [
+ np.full(bbox.shape[0], i, dtype=np.int32)
+ for i, bbox in enumerate(bbox_result)
+ ]
+ labels = np.concatenate(labels)
+ mmcv.imshow_det_bboxes(
+ img_show,
+ bboxes,
+ labels,
+ class_names=class_names,
+ score_thr=score_thr)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/cascade_rcnn.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/cascade_rcnn.py
new file mode 100644
index 000000000..4ab1e5789
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/cascade_rcnn.py
@@ -0,0 +1,520 @@
+from __future__ import division
+
+import torch
+import torch.nn as nn
+
+from mmdet.core import (bbox2result, bbox2roi, bbox_mapping, build_assigner,
+ build_sampler, merge_aug_bboxes, merge_aug_masks,
+ multiclass_nms)
+from .. import builder
+from ..registry import DETECTORS
+from .base import BaseDetector
+from .test_mixins import RPNTestMixin
+
+
+@DETECTORS.register_module
+class CascadeRCNN(BaseDetector, RPNTestMixin):
+
+ def __init__(self,
+ num_stages,
+ backbone,
+ neck=None,
+ shared_head=None,
+ rpn_head=None,
+ bbox_roi_extractor=None,
+ bbox_head=None,
+ mask_roi_extractor=None,
+ mask_head=None,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None):
+ assert bbox_roi_extractor is not None
+ assert bbox_head is not None
+ super(CascadeRCNN, self).__init__()
+
+ self.num_stages = num_stages
+ self.backbone = builder.build_backbone(backbone)
+
+ if neck is not None:
+ self.neck = builder.build_neck(neck)
+
+ if rpn_head is not None:
+ self.rpn_head = builder.build_head(rpn_head)
+
+ if shared_head is not None:
+ self.shared_head = builder.build_shared_head(shared_head)
+
+ if bbox_head is not None:
+ self.bbox_roi_extractor = nn.ModuleList()
+ self.bbox_head = nn.ModuleList()
+ if not isinstance(bbox_roi_extractor, list):
+ bbox_roi_extractor = [
+ bbox_roi_extractor for _ in range(num_stages)
+ ]
+ if not isinstance(bbox_head, list):
+ bbox_head = [bbox_head for _ in range(num_stages)]
+ assert len(bbox_roi_extractor) == len(bbox_head) == self.num_stages
+ for roi_extractor, head in zip(bbox_roi_extractor, bbox_head):
+ self.bbox_roi_extractor.append(
+ builder.build_roi_extractor(roi_extractor))
+ self.bbox_head.append(builder.build_head(head))
+
+ if mask_head is not None:
+ self.mask_head = nn.ModuleList()
+ if not isinstance(mask_head, list):
+ mask_head = [mask_head for _ in range(num_stages)]
+ assert len(mask_head) == self.num_stages
+ for head in mask_head:
+ self.mask_head.append(builder.build_head(head))
+ if mask_roi_extractor is not None:
+ self.share_roi_extractor = False
+ self.mask_roi_extractor = nn.ModuleList()
+ if not isinstance(mask_roi_extractor, list):
+ mask_roi_extractor = [
+ mask_roi_extractor for _ in range(num_stages)
+ ]
+ assert len(mask_roi_extractor) == self.num_stages
+ for roi_extractor in mask_roi_extractor:
+ self.mask_roi_extractor.append(
+ builder.build_roi_extractor(roi_extractor))
+ else:
+ self.share_roi_extractor = True
+ self.mask_roi_extractor = self.bbox_roi_extractor
+
+ self.train_cfg = train_cfg
+ self.test_cfg = test_cfg
+
+ self.init_weights(pretrained=pretrained)
+
+ @property
+ def with_rpn(self):
+ return hasattr(self, 'rpn_head') and self.rpn_head is not None
+
+ def init_weights(self, pretrained=None):
+ super(CascadeRCNN, self).init_weights(pretrained)
+ self.backbone.init_weights(pretrained=pretrained)
+ if self.with_neck:
+ if isinstance(self.neck, nn.Sequential):
+ for m in self.neck:
+ m.init_weights()
+ else:
+ self.neck.init_weights()
+ if self.with_rpn:
+ self.rpn_head.init_weights()
+ if self.with_shared_head:
+ self.shared_head.init_weights(pretrained=pretrained)
+ for i in range(self.num_stages):
+ if self.with_bbox:
+ self.bbox_roi_extractor[i].init_weights()
+ self.bbox_head[i].init_weights()
+ if self.with_mask:
+ if not self.share_roi_extractor:
+ self.mask_roi_extractor[i].init_weights()
+ self.mask_head[i].init_weights()
+
+ def extract_feat(self, img):
+ x = self.backbone(img)
+ if self.with_neck:
+ x = self.neck(x)
+ return x
+
+ def forward_dummy(self, img):
+ outs = ()
+ # backbone
+ x = self.extract_feat(img)
+ # rpn
+ if self.with_rpn:
+ rpn_outs = self.rpn_head(x)
+ outs = outs + (rpn_outs, )
+ proposals = torch.randn(1000, 4).cuda()
+ # bbox heads
+ rois = bbox2roi([proposals])
+ if self.with_bbox:
+ for i in range(self.num_stages):
+ bbox_feats = self.bbox_roi_extractor[i](
+ x[:self.bbox_roi_extractor[i].num_inputs], rois)
+ if self.with_shared_head:
+ bbox_feats = self.shared_head(bbox_feats)
+ cls_score, bbox_pred = self.bbox_head[i](bbox_feats)
+ outs = outs + (cls_score, bbox_pred)
+ # mask heads
+ if self.with_mask:
+ mask_rois = rois[:100]
+ for i in range(self.num_stages):
+ mask_feats = self.mask_roi_extractor[i](
+ x[:self.mask_roi_extractor[i].num_inputs], mask_rois)
+ if self.with_shared_head:
+ mask_feats = self.shared_head(mask_feats)
+ mask_pred = self.mask_head[i](mask_feats)
+ outs = outs + (mask_pred, )
+ return outs
+
+ def forward_train(self,
+ img,
+ img_meta,
+ gt_bboxes,
+ gt_labels,
+ gt_bboxes_ignore=None,
+ gt_masks=None,
+ proposals=None):
+ """
+ Args:
+ img (Tensor): of shape (N, C, H, W) encoding input images.
+ Typically these should be mean centered and std scaled.
+
+ img_meta (list[dict]): list of image info dict where each dict has:
+ 'img_shape', 'scale_factor', 'flip', and my also contain
+ 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+ For details on the values of these keys see
+ `mmdet/datasets/pipelines/formatting.py:Collect`.
+
+ gt_bboxes (list[Tensor]): each item are the truth boxes for each
+ image in [tl_x, tl_y, br_x, br_y] format.
+
+ gt_labels (list[Tensor]): class indices corresponding to each box
+
+ gt_bboxes_ignore (None | list[Tensor]): specify which bounding
+ boxes can be ignored when computing the loss.
+
+ gt_masks (None | Tensor) : true segmentation masks for each box
+ used if the architecture supports a segmentation task.
+
+ proposals : override rpn proposals with custom proposals. Use when
+ `with_rpn` is False.
+
+ Returns:
+ dict[str, Tensor]: a dictionary of loss components
+ """
+ x = self.extract_feat(img)
+
+ losses = dict()
+
+ if self.with_rpn:
+ rpn_outs = self.rpn_head(x)
+ rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta,
+ self.train_cfg.rpn)
+ rpn_losses = self.rpn_head.loss(
+ *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
+ losses.update(rpn_losses)
+
+ proposal_cfg = self.train_cfg.get('rpn_proposal',
+ self.test_cfg.rpn)
+ proposal_inputs = rpn_outs + (img_meta, proposal_cfg)
+ proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)
+ else:
+ proposal_list = proposals
+
+ for i in range(self.num_stages):
+ self.current_stage = i
+ rcnn_train_cfg = self.train_cfg.rcnn[i]
+ lw = self.train_cfg.stage_loss_weights[i]
+
+ # assign gts and sample proposals
+ sampling_results = []
+ if self.with_bbox or self.with_mask:
+ bbox_assigner = build_assigner(rcnn_train_cfg.assigner)
+ bbox_sampler = build_sampler(
+ rcnn_train_cfg.sampler, context=self)
+ num_imgs = img.size(0)
+ if gt_bboxes_ignore is None:
+ gt_bboxes_ignore = [None for _ in range(num_imgs)]
+
+ for j in range(num_imgs):
+ assign_result = bbox_assigner.assign(
+ proposal_list[j], gt_bboxes[j], gt_bboxes_ignore[j],
+ gt_labels[j])
+ sampling_result = bbox_sampler.sample(
+ assign_result,
+ proposal_list[j],
+ gt_bboxes[j],
+ gt_labels[j],
+ feats=[lvl_feat[j][None] for lvl_feat in x])
+ sampling_results.append(sampling_result)
+
+ # bbox head forward and loss
+ bbox_roi_extractor = self.bbox_roi_extractor[i]
+ bbox_head = self.bbox_head[i]
+
+ rois = bbox2roi([res.bboxes for res in sampling_results])
+
+ if len(rois) == 0:
+ # If there are no predicted and/or truth boxes, then we cannot
+ # compute head / mask losses
+ continue
+
+ bbox_feats = bbox_roi_extractor(x[:bbox_roi_extractor.num_inputs],
+ rois)
+ if self.with_shared_head:
+ bbox_feats = self.shared_head(bbox_feats)
+ cls_score, bbox_pred = bbox_head(bbox_feats)
+
+ bbox_targets = bbox_head.get_target(sampling_results, gt_bboxes,
+ gt_labels, rcnn_train_cfg)
+ loss_bbox = bbox_head.loss(cls_score, bbox_pred, *bbox_targets)
+ for name, value in loss_bbox.items():
+ losses['s{}.{}'.format(i, name)] = (
+ value * lw if 'loss' in name else value)
+
+ # mask head forward and loss
+ if self.with_mask:
+ if not self.share_roi_extractor:
+ mask_roi_extractor = self.mask_roi_extractor[i]
+ pos_rois = bbox2roi(
+ [res.pos_bboxes for res in sampling_results])
+ mask_feats = mask_roi_extractor(
+ x[:mask_roi_extractor.num_inputs], pos_rois)
+ if self.with_shared_head:
+ mask_feats = self.shared_head(mask_feats)
+ else:
+ # reuse positive bbox feats
+ pos_inds = []
+ device = bbox_feats.device
+ for res in sampling_results:
+ pos_inds.append(
+ torch.ones(
+ res.pos_bboxes.shape[0],
+ device=device,
+ dtype=torch.uint8))
+ pos_inds.append(
+ torch.zeros(
+ res.neg_bboxes.shape[0],
+ device=device,
+ dtype=torch.uint8))
+ pos_inds = torch.cat(pos_inds)
+ mask_feats = bbox_feats[pos_inds]
+ mask_head = self.mask_head[i]
+ mask_pred = mask_head(mask_feats)
+ mask_targets = mask_head.get_target(sampling_results, gt_masks,
+ rcnn_train_cfg)
+ pos_labels = torch.cat(
+ [res.pos_gt_labels for res in sampling_results])
+ loss_mask = mask_head.loss(mask_pred, mask_targets, pos_labels)
+ for name, value in loss_mask.items():
+ losses['s{}.{}'.format(i, name)] = (
+ value * lw if 'loss' in name else value)
+
+ # refine bboxes
+ if i < self.num_stages - 1:
+ pos_is_gts = [res.pos_is_gt for res in sampling_results]
+ roi_labels = bbox_targets[0] # bbox_targets is a tuple
+ with torch.no_grad():
+ proposal_list = bbox_head.refine_bboxes(
+ rois, roi_labels, bbox_pred, pos_is_gts, img_meta)
+
+ return losses
+
+ def simple_test(self, img, img_meta, proposals=None, rescale=False):
+ """Run inference on a single image.
+
+ Args:
+ img (Tensor): must be in shape (N, C, H, W)
+ img_meta (list[dict]): a list with one dictionary element.
+ See `mmdet/datasets/pipelines/formatting.py:Collect` for
+ details of meta dicts.
+ proposals : if specified overrides rpn proposals
+ rescale (bool): if True returns boxes in original image space
+
+ Returns:
+ dict: results
+ """
+ x = self.extract_feat(img)
+
+ proposal_list = self.simple_test_rpn(
+ x, img_meta, self.test_cfg.rpn) if proposals is None else proposals
+
+ img_shape = img_meta[0]['img_shape']
+ ori_shape = img_meta[0]['ori_shape']
+ scale_factor = img_meta[0]['scale_factor']
+
+ # "ms" in variable names means multi-stage
+ ms_bbox_result = {}
+ ms_segm_result = {}
+ ms_scores = []
+ rcnn_test_cfg = self.test_cfg.rcnn
+
+ rois = bbox2roi(proposal_list)
+ for i in range(self.num_stages):
+ bbox_roi_extractor = self.bbox_roi_extractor[i]
+ bbox_head = self.bbox_head[i]
+
+ bbox_feats = bbox_roi_extractor(
+ x[:len(bbox_roi_extractor.featmap_strides)], rois)
+ if self.with_shared_head:
+ bbox_feats = self.shared_head(bbox_feats)
+
+ cls_score, bbox_pred = bbox_head(bbox_feats)
+ ms_scores.append(cls_score)
+
+ if i < self.num_stages - 1:
+ bbox_label = cls_score.argmax(dim=1)
+ rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred,
+ img_meta[0])
+
+ cls_score = sum(ms_scores) / self.num_stages
+ det_bboxes, det_labels = self.bbox_head[-1].get_det_bboxes(
+ rois,
+ cls_score,
+ bbox_pred,
+ img_shape,
+ scale_factor,
+ rescale=rescale,
+ cfg=rcnn_test_cfg)
+ bbox_result = bbox2result(det_bboxes, det_labels,
+ self.bbox_head[-1].num_classes)
+ ms_bbox_result['ensemble'] = bbox_result
+
+ if self.with_mask:
+ if det_bboxes.shape[0] == 0:
+ mask_classes = self.mask_head[-1].num_classes - 1
+ segm_result = [[] for _ in range(mask_classes)]
+ else:
+ if isinstance(scale_factor, float): # aspect ratio fixed
+ _bboxes = (
+ det_bboxes[:, :4] *
+ scale_factor if rescale else det_bboxes)
+ else:
+ _bboxes = (
+ det_bboxes[:, :4] *
+ torch.from_numpy(scale_factor).to(det_bboxes.device)
+ if rescale else det_bboxes)
+
+ mask_rois = bbox2roi([_bboxes])
+ aug_masks = []
+ for i in range(self.num_stages):
+ mask_roi_extractor = self.mask_roi_extractor[i]
+ mask_feats = mask_roi_extractor(
+ x[:len(mask_roi_extractor.featmap_strides)], mask_rois)
+ if self.with_shared_head:
+ mask_feats = self.shared_head(mask_feats)
+ mask_pred = self.mask_head[i](mask_feats)
+ aug_masks.append(mask_pred.sigmoid().cpu().numpy())
+ merged_masks = merge_aug_masks(aug_masks,
+ [img_meta] * self.num_stages,
+ self.test_cfg.rcnn)
+ segm_result = self.mask_head[-1].get_seg_masks(
+ merged_masks, _bboxes, det_labels, rcnn_test_cfg,
+ ori_shape, scale_factor, rescale)
+ ms_segm_result['ensemble'] = segm_result
+
+ if self.with_mask:
+ results = (ms_bbox_result['ensemble'], ms_segm_result['ensemble'])
+ else:
+ results = ms_bbox_result['ensemble']
+
+ return results
+
+ def aug_test(self, imgs, img_metas, proposals=None, rescale=False):
+ """Test with augmentations.
+
+ If rescale is False, then returned bboxes and masks will fit the scale
+ of imgs[0].
+ """
+ # recompute feats to save memory
+ proposal_list = self.aug_test_rpn(
+ self.extract_feats(imgs), img_metas, self.test_cfg.rpn)
+
+ rcnn_test_cfg = self.test_cfg.rcnn
+ aug_bboxes = []
+ aug_scores = []
+ for x, img_meta in zip(self.extract_feats(imgs), img_metas):
+ # only one image in the batch
+ img_shape = img_meta[0]['img_shape']
+ scale_factor = img_meta[0]['scale_factor']
+ flip = img_meta[0]['flip']
+
+ proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
+ scale_factor, flip)
+ # "ms" in variable names means multi-stage
+ ms_scores = []
+
+ rois = bbox2roi([proposals])
+ for i in range(self.num_stages):
+ bbox_roi_extractor = self.bbox_roi_extractor[i]
+ bbox_head = self.bbox_head[i]
+
+ bbox_feats = bbox_roi_extractor(
+ x[:len(bbox_roi_extractor.featmap_strides)], rois)
+ if self.with_shared_head:
+ bbox_feats = self.shared_head(bbox_feats)
+
+ cls_score, bbox_pred = bbox_head(bbox_feats)
+ ms_scores.append(cls_score)
+
+ if i < self.num_stages - 1:
+ bbox_label = cls_score.argmax(dim=1)
+ rois = bbox_head.regress_by_class(rois, bbox_label,
+ bbox_pred, img_meta[0])
+
+ cls_score = sum(ms_scores) / float(len(ms_scores))
+ bboxes, scores = self.bbox_head[-1].get_det_bboxes(
+ rois,
+ cls_score,
+ bbox_pred,
+ img_shape,
+ scale_factor,
+ rescale=False,
+ cfg=None)
+ aug_bboxes.append(bboxes)
+ aug_scores.append(scores)
+
+ # after merging, bboxes will be rescaled to the original image size
+ merged_bboxes, merged_scores = merge_aug_bboxes(
+ aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
+ det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
+ rcnn_test_cfg.score_thr,
+ rcnn_test_cfg.nms,
+ rcnn_test_cfg.max_per_img)
+
+ bbox_result = bbox2result(det_bboxes, det_labels,
+ self.bbox_head[-1].num_classes)
+
+ if self.with_mask:
+ if det_bboxes.shape[0] == 0:
+ segm_result = [[]
+ for _ in range(self.mask_head[-1].num_classes -
+ 1)]
+ else:
+ aug_masks = []
+ aug_img_metas = []
+ for x, img_meta in zip(self.extract_feats(imgs), img_metas):
+ img_shape = img_meta[0]['img_shape']
+ scale_factor = img_meta[0]['scale_factor']
+ flip = img_meta[0]['flip']
+ _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,
+ scale_factor, flip)
+ mask_rois = bbox2roi([_bboxes])
+ for i in range(self.num_stages):
+ mask_feats = self.mask_roi_extractor[i](
+ x[:len(self.mask_roi_extractor[i].featmap_strides
+ )], mask_rois)
+ if self.with_shared_head:
+ mask_feats = self.shared_head(mask_feats)
+ mask_pred = self.mask_head[i](mask_feats)
+ aug_masks.append(mask_pred.sigmoid().cpu().numpy())
+ aug_img_metas.append(img_meta)
+ merged_masks = merge_aug_masks(aug_masks, aug_img_metas,
+ self.test_cfg.rcnn)
+
+ ori_shape = img_metas[0][0]['ori_shape']
+ segm_result = self.mask_head[-1].get_seg_masks(
+ merged_masks,
+ det_bboxes,
+ det_labels,
+ rcnn_test_cfg,
+ ori_shape,
+ scale_factor=1.0,
+ rescale=False)
+ return bbox_result, segm_result
+ else:
+ return bbox_result
+
+ def show_result(self, data, result, **kwargs):
+ if self.with_mask:
+ ms_bbox_result, ms_segm_result = result
+ if isinstance(ms_bbox_result, dict):
+ result = (ms_bbox_result['ensemble'],
+ ms_segm_result['ensemble'])
+ else:
+ if isinstance(result, dict):
+ result = result['ensemble']
+ super(CascadeRCNN, self).show_result(data, result, **kwargs)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/double_head_rcnn.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/double_head_rcnn.py
new file mode 100644
index 000000000..7a783353f
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/double_head_rcnn.py
@@ -0,0 +1,178 @@
+import torch
+
+from mmdet.core import bbox2roi, build_assigner, build_sampler
+from ..registry import DETECTORS
+from .two_stage import TwoStageDetector
+
+
+@DETECTORS.register_module
+class DoubleHeadRCNN(TwoStageDetector):
+
+ def __init__(self, reg_roi_scale_factor, **kwargs):
+ super().__init__(**kwargs)
+ self.reg_roi_scale_factor = reg_roi_scale_factor
+
+ def forward_dummy(self, img):
+ outs = ()
+ # backbone
+ x = self.extract_feat(img)
+ # rpn
+ if self.with_rpn:
+ rpn_outs = self.rpn_head(x)
+ outs = outs + (rpn_outs, )
+ proposals = torch.randn(1000, 4).cuda()
+ # bbox head
+ rois = bbox2roi([proposals])
+ bbox_cls_feats = self.bbox_roi_extractor(
+ x[:self.bbox_roi_extractor.num_inputs], rois)
+ bbox_reg_feats = self.bbox_roi_extractor(
+ x[:self.bbox_roi_extractor.num_inputs],
+ rois,
+ roi_scale_factor=self.reg_roi_scale_factor)
+ if self.with_shared_head:
+ bbox_cls_feats = self.shared_head(bbox_cls_feats)
+ bbox_reg_feats = self.shared_head(bbox_reg_feats)
+ cls_score, bbox_pred = self.bbox_head(bbox_cls_feats, bbox_reg_feats)
+ outs += (cls_score, bbox_pred)
+ return outs
+
+ def forward_train(self,
+ img,
+ img_meta,
+ gt_bboxes,
+ gt_labels,
+ gt_bboxes_ignore=None,
+ gt_masks=None,
+ proposals=None):
+ x = self.extract_feat(img)
+
+ losses = dict()
+
+ # RPN forward and loss
+ if self.with_rpn:
+ rpn_outs = self.rpn_head(x)
+ rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta,
+ self.train_cfg.rpn)
+ rpn_losses = self.rpn_head.loss(
+ *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
+ losses.update(rpn_losses)
+
+ proposal_cfg = self.train_cfg.get('rpn_proposal',
+ self.test_cfg.rpn)
+ proposal_inputs = rpn_outs + (img_meta, proposal_cfg)
+ proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)
+ else:
+ proposal_list = proposals
+
+ # assign gts and sample proposals
+ if self.with_bbox or self.with_mask:
+ bbox_assigner = build_assigner(self.train_cfg.rcnn.assigner)
+ bbox_sampler = build_sampler(
+ self.train_cfg.rcnn.sampler, context=self)
+ num_imgs = img.size(0)
+ if gt_bboxes_ignore is None:
+ gt_bboxes_ignore = [None for _ in range(num_imgs)]
+ sampling_results = []
+ for i in range(num_imgs):
+ assign_result = bbox_assigner.assign(proposal_list[i],
+ gt_bboxes[i],
+ gt_bboxes_ignore[i],
+ gt_labels[i])
+ sampling_result = bbox_sampler.sample(
+ assign_result,
+ proposal_list[i],
+ gt_bboxes[i],
+ gt_labels[i],
+ feats=[lvl_feat[i][None] for lvl_feat in x])
+ sampling_results.append(sampling_result)
+
+ # bbox head forward and loss
+ if self.with_bbox:
+ rois = bbox2roi([res.bboxes for res in sampling_results])
+ # TODO: a more flexible way to decide which feature maps to use
+ bbox_cls_feats = self.bbox_roi_extractor(
+ x[:self.bbox_roi_extractor.num_inputs], rois)
+ bbox_reg_feats = self.bbox_roi_extractor(
+ x[:self.bbox_roi_extractor.num_inputs],
+ rois,
+ roi_scale_factor=self.reg_roi_scale_factor)
+ if self.with_shared_head:
+ bbox_cls_feats = self.shared_head(bbox_cls_feats)
+ bbox_reg_feats = self.shared_head(bbox_reg_feats)
+ cls_score, bbox_pred = self.bbox_head(bbox_cls_feats,
+ bbox_reg_feats)
+
+ bbox_targets = self.bbox_head.get_target(sampling_results,
+ gt_bboxes, gt_labels,
+ self.train_cfg.rcnn)
+ loss_bbox = self.bbox_head.loss(cls_score, bbox_pred,
+ *bbox_targets)
+ losses.update(loss_bbox)
+
+ # mask head forward and loss
+ if self.with_mask:
+ if not self.share_roi_extractor:
+ pos_rois = bbox2roi(
+ [res.pos_bboxes for res in sampling_results])
+ mask_feats = self.mask_roi_extractor(
+ x[:self.mask_roi_extractor.num_inputs], pos_rois)
+ if self.with_shared_head:
+ mask_feats = self.shared_head(mask_feats)
+ else:
+ pos_inds = []
+ device = bbox_cls_feats.device
+ for res in sampling_results:
+ pos_inds.append(
+ torch.ones(
+ res.pos_bboxes.shape[0],
+ device=device,
+ dtype=torch.uint8))
+ pos_inds.append(
+ torch.zeros(
+ res.neg_bboxes.shape[0],
+ device=device,
+ dtype=torch.uint8))
+ pos_inds = torch.cat(pos_inds)
+ mask_feats = bbox_cls_feats[pos_inds]
+ mask_pred = self.mask_head(mask_feats)
+
+ mask_targets = self.mask_head.get_target(sampling_results,
+ gt_masks,
+ self.train_cfg.rcnn)
+ pos_labels = torch.cat(
+ [res.pos_gt_labels for res in sampling_results])
+ loss_mask = self.mask_head.loss(mask_pred, mask_targets,
+ pos_labels)
+ losses.update(loss_mask)
+
+ return losses
+
+ def simple_test_bboxes(self,
+ x,
+ img_meta,
+ proposals,
+ rcnn_test_cfg,
+ rescale=False):
+ """Test only det bboxes without augmentation."""
+ rois = bbox2roi(proposals)
+ bbox_cls_feats = self.bbox_roi_extractor(
+ x[:self.bbox_roi_extractor.num_inputs], rois)
+ bbox_reg_feats = self.bbox_roi_extractor(
+ x[:self.bbox_roi_extractor.num_inputs],
+ rois,
+ roi_scale_factor=self.reg_roi_scale_factor)
+ if self.with_shared_head:
+ bbox_cls_feats = self.shared_head(bbox_cls_feats)
+ bbox_reg_feats = self.shared_head(bbox_reg_feats)
+ cls_score, bbox_pred = self.bbox_head(bbox_cls_feats, bbox_reg_feats)
+ img_shape = img_meta[0]['img_shape']
+ scale_factor = img_meta[0]['scale_factor']
+ det_bboxes, det_labels = self.bbox_head.get_det_bboxes(
+ rois,
+ cls_score,
+ bbox_pred,
+ img_shape,
+ scale_factor,
+ rescale=rescale,
+ cfg=rcnn_test_cfg)
+ return det_bboxes, det_labels
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/fast_rcnn.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/fast_rcnn.py
new file mode 100644
index 000000000..8e4231855
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/fast_rcnn.py
@@ -0,0 +1,61 @@
+from ..registry import DETECTORS
+from .two_stage import TwoStageDetector
+
+
+@DETECTORS.register_module
+class FastRCNN(TwoStageDetector):
+
+ def __init__(self,
+ backbone,
+ bbox_roi_extractor,
+ bbox_head,
+ train_cfg,
+ test_cfg,
+ neck=None,
+ shared_head=None,
+ mask_roi_extractor=None,
+ mask_head=None,
+ pretrained=None):
+ super(FastRCNN, self).__init__(
+ backbone=backbone,
+ neck=neck,
+ shared_head=shared_head,
+ bbox_roi_extractor=bbox_roi_extractor,
+ bbox_head=bbox_head,
+ train_cfg=train_cfg,
+ test_cfg=test_cfg,
+ mask_roi_extractor=mask_roi_extractor,
+ mask_head=mask_head,
+ pretrained=pretrained)
+
+ def forward_test(self, imgs, img_metas, proposals, **kwargs):
+ """
+ Args:
+ imgs (List[Tensor]): the outer list indicates test-time
+ augmentations and inner Tensor should have a shape NxCxHxW,
+ which contains all images in the batch.
+ img_meta (List[List[dict]]): the outer list indicates test-time
+ augs (multiscale, flip, etc.) and the inner list indicates
+ images in a batch
+ proposals (List[List[Tensor | None]]): predefiend proposals for
+ each test-time augmentation and each item.
+ """
+ for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]:
+ if not isinstance(var, list):
+ raise TypeError('{} must be a list, but got {}'.format(
+ name, type(var)))
+
+ num_augs = len(imgs)
+ if num_augs != len(img_metas):
+ raise ValueError(
+ 'num of augmentations ({}) != num of image meta ({})'.format(
+ len(imgs), len(img_metas)))
+ # TODO: remove the restriction of imgs_per_gpu == 1 when prepared
+ imgs_per_gpu = imgs[0].size(0)
+ assert imgs_per_gpu == 1
+
+ if num_augs == 1:
+ return self.simple_test(imgs[0], img_metas[0], proposals[0],
+ **kwargs)
+ else:
+ return self.aug_test(imgs, img_metas, proposals, **kwargs)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/faster_rcnn.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/faster_rcnn.py
new file mode 100644
index 000000000..969cd7ccd
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/faster_rcnn.py
@@ -0,0 +1,27 @@
+from ..registry import DETECTORS
+from .two_stage import TwoStageDetector
+
+
+@DETECTORS.register_module
+class FasterRCNN(TwoStageDetector):
+
+ def __init__(self,
+ backbone,
+ rpn_head,
+ bbox_roi_extractor,
+ bbox_head,
+ train_cfg,
+ test_cfg,
+ neck=None,
+ shared_head=None,
+ pretrained=None):
+ super(FasterRCNN, self).__init__(
+ backbone=backbone,
+ neck=neck,
+ shared_head=shared_head,
+ rpn_head=rpn_head,
+ bbox_roi_extractor=bbox_roi_extractor,
+ bbox_head=bbox_head,
+ train_cfg=train_cfg,
+ test_cfg=test_cfg,
+ pretrained=pretrained)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/fcos.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/fcos.py
new file mode 100644
index 000000000..89cc5929a
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/fcos.py
@@ -0,0 +1,16 @@
+from ..registry import DETECTORS
+from .single_stage import SingleStageDetector
+
+
+@DETECTORS.register_module
+class FCOS(SingleStageDetector):
+
+ def __init__(self,
+ backbone,
+ neck,
+ bbox_head,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None):
+ super(FCOS, self).__init__(backbone, neck, bbox_head, train_cfg,
+ test_cfg, pretrained)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/fovea.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/fovea.py
new file mode 100644
index 000000000..0d264bb24
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/fovea.py
@@ -0,0 +1,16 @@
+from ..registry import DETECTORS
+from .single_stage import SingleStageDetector
+
+
+@DETECTORS.register_module
+class FOVEA(SingleStageDetector):
+
+ def __init__(self,
+ backbone,
+ neck,
+ bbox_head,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None):
+ super(FOVEA, self).__init__(backbone, neck, bbox_head, train_cfg,
+ test_cfg, pretrained)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/grid_rcnn.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/grid_rcnn.py
new file mode 100644
index 000000000..853242c16
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/grid_rcnn.py
@@ -0,0 +1,229 @@
+import torch
+
+from mmdet.core import bbox2result, bbox2roi, build_assigner, build_sampler
+from .. import builder
+from ..registry import DETECTORS
+from .two_stage import TwoStageDetector
+
+
+@DETECTORS.register_module
+class GridRCNN(TwoStageDetector):
+ """Grid R-CNN.
+
+ This detector is the implementation of:
+ - Grid R-CNN (https://arxiv.org/abs/1811.12030)
+ - Grid R-CNN Plus: Faster and Better (https://arxiv.org/abs/1906.05688)
+ """
+
+ def __init__(self,
+ backbone,
+ rpn_head,
+ bbox_roi_extractor,
+ bbox_head,
+ grid_roi_extractor,
+ grid_head,
+ train_cfg,
+ test_cfg,
+ neck=None,
+ shared_head=None,
+ pretrained=None):
+ assert grid_head is not None
+ super(GridRCNN, self).__init__(
+ backbone=backbone,
+ neck=neck,
+ shared_head=shared_head,
+ rpn_head=rpn_head,
+ bbox_roi_extractor=bbox_roi_extractor,
+ bbox_head=bbox_head,
+ train_cfg=train_cfg,
+ test_cfg=test_cfg,
+ pretrained=pretrained)
+
+ if grid_roi_extractor is not None:
+ self.grid_roi_extractor = builder.build_roi_extractor(
+ grid_roi_extractor)
+ self.share_roi_extractor = False
+ else:
+ self.share_roi_extractor = True
+ self.grid_roi_extractor = self.bbox_roi_extractor
+ self.grid_head = builder.build_head(grid_head)
+
+ self.init_extra_weights()
+
+ def init_extra_weights(self):
+ self.grid_head.init_weights()
+ if not self.share_roi_extractor:
+ self.grid_roi_extractor.init_weights()
+
+ def _random_jitter(self, sampling_results, img_metas, amplitude=0.15):
+ """Ramdom jitter positive proposals for training."""
+ for sampling_result, img_meta in zip(sampling_results, img_metas):
+ bboxes = sampling_result.pos_bboxes
+ random_offsets = bboxes.new_empty(bboxes.shape[0], 4).uniform_(
+ -amplitude, amplitude)
+ # before jittering
+ cxcy = (bboxes[:, 2:4] + bboxes[:, :2]) / 2
+ wh = (bboxes[:, 2:4] - bboxes[:, :2]).abs()
+ # after jittering
+ new_cxcy = cxcy + wh * random_offsets[:, :2]
+ new_wh = wh * (1 + random_offsets[:, 2:])
+ # xywh to xyxy
+ new_x1y1 = (new_cxcy - new_wh / 2)
+ new_x2y2 = (new_cxcy + new_wh / 2)
+ new_bboxes = torch.cat([new_x1y1, new_x2y2], dim=1)
+ # clip bboxes
+ max_shape = img_meta['img_shape']
+ if max_shape is not None:
+ new_bboxes[:, 0::2].clamp_(min=0, max=max_shape[1] - 1)
+ new_bboxes[:, 1::2].clamp_(min=0, max=max_shape[0] - 1)
+
+ sampling_result.pos_bboxes = new_bboxes
+ return sampling_results
+
+ def forward_dummy(self, img):
+ outs = ()
+ # backbone
+ x = self.extract_feat(img)
+ # rpn
+ if self.with_rpn:
+ rpn_outs = self.rpn_head(x)
+ outs = outs + (rpn_outs, )
+ proposals = torch.randn(1000, 4).cuda()
+ # bbox head
+ rois = bbox2roi([proposals])
+ bbox_feats = self.bbox_roi_extractor(
+ x[:self.bbox_roi_extractor.num_inputs], rois)
+ if self.with_shared_head:
+ bbox_feats = self.shared_head(bbox_feats)
+ cls_score, bbox_pred = self.bbox_head(bbox_feats)
+ # grid head
+ grid_rois = rois[:100]
+ grid_feats = self.grid_roi_extractor(
+ x[:self.grid_roi_extractor.num_inputs], grid_rois)
+ if self.with_shared_head:
+ grid_feats = self.shared_head(grid_feats)
+ grid_pred = self.grid_head(grid_feats)
+ return rpn_outs, cls_score, bbox_pred, grid_pred
+
+ def forward_train(self,
+ img,
+ img_meta,
+ gt_bboxes,
+ gt_labels,
+ gt_bboxes_ignore=None,
+ gt_masks=None,
+ proposals=None):
+ x = self.extract_feat(img)
+
+ losses = dict()
+
+ # RPN forward and loss
+ if self.with_rpn:
+ rpn_outs = self.rpn_head(x)
+ rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta,
+ self.train_cfg.rpn)
+ rpn_losses = self.rpn_head.loss(
+ *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
+ losses.update(rpn_losses)
+
+ proposal_cfg = self.train_cfg.get('rpn_proposal',
+ self.test_cfg.rpn)
+ proposal_inputs = rpn_outs + (img_meta, proposal_cfg)
+ proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)
+ else:
+ proposal_list = proposals
+
+ if self.with_bbox:
+ # assign gts and sample proposals
+ bbox_assigner = build_assigner(self.train_cfg.rcnn.assigner)
+ bbox_sampler = build_sampler(
+ self.train_cfg.rcnn.sampler, context=self)
+ num_imgs = img.size(0)
+ if gt_bboxes_ignore is None:
+ gt_bboxes_ignore = [None for _ in range(num_imgs)]
+ sampling_results = []
+ for i in range(num_imgs):
+ assign_result = bbox_assigner.assign(proposal_list[i],
+ gt_bboxes[i],
+ gt_bboxes_ignore[i],
+ gt_labels[i])
+ sampling_result = bbox_sampler.sample(
+ assign_result,
+ proposal_list[i],
+ gt_bboxes[i],
+ gt_labels[i],
+ feats=[lvl_feat[i][None] for lvl_feat in x])
+ sampling_results.append(sampling_result)
+
+ # bbox head forward and loss
+ rois = bbox2roi([res.bboxes for res in sampling_results])
+ # TODO: a more flexible way to decide which feature maps to use
+ bbox_feats = self.bbox_roi_extractor(
+ x[:self.bbox_roi_extractor.num_inputs], rois)
+ if self.with_shared_head:
+ bbox_feats = self.shared_head(bbox_feats)
+ cls_score, bbox_pred = self.bbox_head(bbox_feats)
+
+ bbox_targets = self.bbox_head.get_target(sampling_results,
+ gt_bboxes, gt_labels,
+ self.train_cfg.rcnn)
+ loss_bbox = self.bbox_head.loss(cls_score, bbox_pred,
+ *bbox_targets)
+ losses.update(loss_bbox)
+
+ # Grid head forward and loss
+ sampling_results = self._random_jitter(sampling_results, img_meta)
+ pos_rois = bbox2roi([res.pos_bboxes for res in sampling_results])
+ grid_feats = self.grid_roi_extractor(
+ x[:self.grid_roi_extractor.num_inputs], pos_rois)
+ if self.with_shared_head:
+ grid_feats = self.shared_head(grid_feats)
+ # Accelerate training
+ max_sample_num_grid = self.train_cfg.rcnn.get('max_num_grid', 192)
+ sample_idx = torch.randperm(
+ grid_feats.shape[0])[:min(grid_feats.
+ shape[0], max_sample_num_grid)]
+ grid_feats = grid_feats[sample_idx]
+
+ grid_pred = self.grid_head(grid_feats)
+
+ grid_targets = self.grid_head.get_target(sampling_results,
+ self.train_cfg.rcnn)
+ grid_targets = grid_targets[sample_idx]
+
+ loss_grid = self.grid_head.loss(grid_pred, grid_targets)
+ losses.update(loss_grid)
+
+ return losses
+
+ def simple_test(self, img, img_meta, proposals=None, rescale=False):
+ """Test without augmentation."""
+ assert self.with_bbox, "Bbox head must be implemented."
+
+ x = self.extract_feat(img)
+
+ proposal_list = self.simple_test_rpn(
+ x, img_meta, self.test_cfg.rpn) if proposals is None else proposals
+
+ det_bboxes, det_labels = self.simple_test_bboxes(
+ x, img_meta, proposal_list, self.test_cfg.rcnn, rescale=False)
+
+ # pack rois into bboxes
+ grid_rois = bbox2roi([det_bboxes[:, :4]])
+ grid_feats = self.grid_roi_extractor(
+ x[:len(self.grid_roi_extractor.featmap_strides)], grid_rois)
+ if grid_rois.shape[0] != 0:
+ self.grid_head.test_mode = True
+ grid_pred = self.grid_head(grid_feats)
+ det_bboxes = self.grid_head.get_bboxes(det_bboxes,
+ grid_pred['fused'],
+ img_meta)
+ if rescale:
+ det_bboxes[:, :4] /= img_meta[0]['scale_factor']
+ else:
+ det_bboxes = torch.Tensor([])
+
+ bbox_results = bbox2result(det_bboxes, det_labels,
+ self.bbox_head.num_classes)
+
+ return bbox_results
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/htc.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/htc.py
new file mode 100644
index 000000000..a989e17f0
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/htc.py
@@ -0,0 +1,516 @@
+import torch
+import torch.nn.functional as F
+
+from mmdet.core import (bbox2result, bbox2roi, bbox_mapping, build_assigner,
+ build_sampler, merge_aug_bboxes, merge_aug_masks,
+ multiclass_nms)
+from .. import builder
+from ..registry import DETECTORS
+from .cascade_rcnn import CascadeRCNN
+
+
+@DETECTORS.register_module
+class HybridTaskCascade(CascadeRCNN):
+
+ def __init__(self,
+ num_stages,
+ backbone,
+ semantic_roi_extractor=None,
+ semantic_head=None,
+ semantic_fusion=('bbox', 'mask'),
+ interleaved=True,
+ mask_info_flow=True,
+ **kwargs):
+ super(HybridTaskCascade, self).__init__(num_stages, backbone, **kwargs)
+ assert self.with_bbox and self.with_mask
+ assert not self.with_shared_head # shared head not supported
+ if semantic_head is not None:
+ self.semantic_roi_extractor = builder.build_roi_extractor(
+ semantic_roi_extractor)
+ self.semantic_head = builder.build_head(semantic_head)
+
+ self.semantic_fusion = semantic_fusion
+ self.interleaved = interleaved
+ self.mask_info_flow = mask_info_flow
+
+ @property
+ def with_semantic(self):
+ if hasattr(self, 'semantic_head') and self.semantic_head is not None:
+ return True
+ else:
+ return False
+
+ def _bbox_forward_train(self,
+ stage,
+ x,
+ sampling_results,
+ gt_bboxes,
+ gt_labels,
+ rcnn_train_cfg,
+ semantic_feat=None):
+ rois = bbox2roi([res.bboxes for res in sampling_results])
+ bbox_roi_extractor = self.bbox_roi_extractor[stage]
+ bbox_head = self.bbox_head[stage]
+ bbox_feats = bbox_roi_extractor(x[:bbox_roi_extractor.num_inputs],
+ rois)
+ # semantic feature fusion
+ # element-wise sum for original features and pooled semantic features
+ if self.with_semantic and 'bbox' in self.semantic_fusion:
+ bbox_semantic_feat = self.semantic_roi_extractor([semantic_feat],
+ rois)
+ if bbox_semantic_feat.shape[-2:] != bbox_feats.shape[-2:]:
+ bbox_semantic_feat = F.adaptive_avg_pool2d(
+ bbox_semantic_feat, bbox_feats.shape[-2:])
+ bbox_feats += bbox_semantic_feat
+
+ cls_score, bbox_pred = bbox_head(bbox_feats)
+
+ bbox_targets = bbox_head.get_target(sampling_results, gt_bboxes,
+ gt_labels, rcnn_train_cfg)
+ loss_bbox = bbox_head.loss(cls_score, bbox_pred, *bbox_targets)
+ return loss_bbox, rois, bbox_targets, bbox_pred
+
+ def _mask_forward_train(self,
+ stage,
+ x,
+ sampling_results,
+ gt_masks,
+ rcnn_train_cfg,
+ semantic_feat=None):
+ mask_roi_extractor = self.mask_roi_extractor[stage]
+ mask_head = self.mask_head[stage]
+ pos_rois = bbox2roi([res.pos_bboxes for res in sampling_results])
+ mask_feats = mask_roi_extractor(x[:mask_roi_extractor.num_inputs],
+ pos_rois)
+
+ # semantic feature fusion
+ # element-wise sum for original features and pooled semantic features
+ if self.with_semantic and 'mask' in self.semantic_fusion:
+ mask_semantic_feat = self.semantic_roi_extractor([semantic_feat],
+ pos_rois)
+ if mask_semantic_feat.shape[-2:] != mask_feats.shape[-2:]:
+ mask_semantic_feat = F.adaptive_avg_pool2d(
+ mask_semantic_feat, mask_feats.shape[-2:])
+ mask_feats += mask_semantic_feat
+
+ # mask information flow
+ # forward all previous mask heads to obtain last_feat, and fuse it
+ # with the normal mask feature
+ if self.mask_info_flow:
+ last_feat = None
+ for i in range(stage):
+ last_feat = self.mask_head[i](
+ mask_feats, last_feat, return_logits=False)
+ mask_pred = mask_head(mask_feats, last_feat, return_feat=False)
+ else:
+ mask_pred = mask_head(mask_feats)
+
+ mask_targets = mask_head.get_target(sampling_results, gt_masks,
+ rcnn_train_cfg)
+ pos_labels = torch.cat([res.pos_gt_labels for res in sampling_results])
+ loss_mask = mask_head.loss(mask_pred, mask_targets, pos_labels)
+ return loss_mask
+
+ def _bbox_forward_test(self, stage, x, rois, semantic_feat=None):
+ bbox_roi_extractor = self.bbox_roi_extractor[stage]
+ bbox_head = self.bbox_head[stage]
+ bbox_feats = bbox_roi_extractor(
+ x[:len(bbox_roi_extractor.featmap_strides)], rois)
+ if self.with_semantic and 'bbox' in self.semantic_fusion:
+ bbox_semantic_feat = self.semantic_roi_extractor([semantic_feat],
+ rois)
+ if bbox_semantic_feat.shape[-2:] != bbox_feats.shape[-2:]:
+ bbox_semantic_feat = F.adaptive_avg_pool2d(
+ bbox_semantic_feat, bbox_feats.shape[-2:])
+ bbox_feats += bbox_semantic_feat
+ cls_score, bbox_pred = bbox_head(bbox_feats)
+ return cls_score, bbox_pred
+
+ def _mask_forward_test(self, stage, x, bboxes, semantic_feat=None):
+ mask_roi_extractor = self.mask_roi_extractor[stage]
+ mask_head = self.mask_head[stage]
+ mask_rois = bbox2roi([bboxes])
+ mask_feats = mask_roi_extractor(
+ x[:len(mask_roi_extractor.featmap_strides)], mask_rois)
+ if self.with_semantic and 'mask' in self.semantic_fusion:
+ mask_semantic_feat = self.semantic_roi_extractor([semantic_feat],
+ mask_rois)
+ if mask_semantic_feat.shape[-2:] != mask_feats.shape[-2:]:
+ mask_semantic_feat = F.adaptive_avg_pool2d(
+ mask_semantic_feat, mask_feats.shape[-2:])
+ mask_feats += mask_semantic_feat
+ if self.mask_info_flow:
+ last_feat = None
+ last_pred = None
+ for i in range(stage):
+ mask_pred, last_feat = self.mask_head[i](mask_feats, last_feat)
+ if last_pred is not None:
+ mask_pred = mask_pred + last_pred
+ last_pred = mask_pred
+ mask_pred = mask_head(mask_feats, last_feat, return_feat=False)
+ if last_pred is not None:
+ mask_pred = mask_pred + last_pred
+ else:
+ mask_pred = mask_head(mask_feats)
+ return mask_pred
+
+ def forward_dummy(self, img):
+ outs = ()
+ # backbone
+ x = self.extract_feat(img)
+ # rpn
+ if self.with_rpn:
+ rpn_outs = self.rpn_head(x)
+ outs = outs + (rpn_outs, )
+ proposals = torch.randn(1000, 4).cuda()
+ # semantic head
+ if self.with_semantic:
+ _, semantic_feat = self.semantic_head(x)
+ else:
+ semantic_feat = None
+ # bbox heads
+ rois = bbox2roi([proposals])
+ for i in range(self.num_stages):
+ cls_score, bbox_pred = self._bbox_forward_test(
+ i, x, rois, semantic_feat=semantic_feat)
+ outs = outs + (cls_score, bbox_pred)
+ # mask heads
+ if self.with_mask:
+ mask_rois = rois[:100]
+ mask_roi_extractor = self.mask_roi_extractor[-1]
+ mask_feats = mask_roi_extractor(
+ x[:len(mask_roi_extractor.featmap_strides)], mask_rois)
+ if self.with_semantic and 'mask' in self.semantic_fusion:
+ mask_semantic_feat = self.semantic_roi_extractor(
+ [semantic_feat], mask_rois)
+ mask_feats += mask_semantic_feat
+ last_feat = None
+ for i in range(self.num_stages):
+ mask_head = self.mask_head[i]
+ if self.mask_info_flow:
+ mask_pred, last_feat = mask_head(mask_feats, last_feat)
+ else:
+ mask_pred = mask_head(mask_feats)
+ outs = outs + (mask_pred, )
+ return outs
+
+ def forward_train(self,
+ img,
+ img_meta,
+ gt_bboxes,
+ gt_labels,
+ gt_bboxes_ignore=None,
+ gt_masks=None,
+ gt_semantic_seg=None,
+ proposals=None):
+ x = self.extract_feat(img)
+
+ losses = dict()
+
+ # RPN part, the same as normal two-stage detectors
+ if self.with_rpn:
+ rpn_outs = self.rpn_head(x)
+ rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta,
+ self.train_cfg.rpn)
+ rpn_losses = self.rpn_head.loss(
+ *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
+ losses.update(rpn_losses)
+
+ proposal_cfg = self.train_cfg.get('rpn_proposal',
+ self.test_cfg.rpn)
+ proposal_inputs = rpn_outs + (img_meta, proposal_cfg)
+ proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)
+ else:
+ proposal_list = proposals
+
+ # semantic segmentation part
+ # 2 outputs: segmentation prediction and embedded features
+ if self.with_semantic:
+ semantic_pred, semantic_feat = self.semantic_head(x)
+ loss_seg = self.semantic_head.loss(semantic_pred, gt_semantic_seg)
+ losses['loss_semantic_seg'] = loss_seg
+ else:
+ semantic_feat = None
+
+ for i in range(self.num_stages):
+ self.current_stage = i
+ rcnn_train_cfg = self.train_cfg.rcnn[i]
+ lw = self.train_cfg.stage_loss_weights[i]
+
+ # assign gts and sample proposals
+ sampling_results = []
+ bbox_assigner = build_assigner(rcnn_train_cfg.assigner)
+ bbox_sampler = build_sampler(rcnn_train_cfg.sampler, context=self)
+ num_imgs = img.size(0)
+ if gt_bboxes_ignore is None:
+ gt_bboxes_ignore = [None for _ in range(num_imgs)]
+
+ for j in range(num_imgs):
+ assign_result = bbox_assigner.assign(proposal_list[j],
+ gt_bboxes[j],
+ gt_bboxes_ignore[j],
+ gt_labels[j])
+ sampling_result = bbox_sampler.sample(
+ assign_result,
+ proposal_list[j],
+ gt_bboxes[j],
+ gt_labels[j],
+ feats=[lvl_feat[j][None] for lvl_feat in x])
+ sampling_results.append(sampling_result)
+
+ # bbox head forward and loss
+ loss_bbox, rois, bbox_targets, bbox_pred = \
+ self._bbox_forward_train(
+ i, x, sampling_results, gt_bboxes, gt_labels,
+ rcnn_train_cfg, semantic_feat)
+ roi_labels = bbox_targets[0]
+
+ for name, value in loss_bbox.items():
+ losses['s{}.{}'.format(i, name)] = (
+ value * lw if 'loss' in name else value)
+
+ # mask head forward and loss
+ if self.with_mask:
+ # interleaved execution: use regressed bboxes by the box branch
+ # to train the mask branch
+ if self.interleaved:
+ pos_is_gts = [res.pos_is_gt for res in sampling_results]
+ with torch.no_grad():
+ proposal_list = self.bbox_head[i].refine_bboxes(
+ rois, roi_labels, bbox_pred, pos_is_gts, img_meta)
+ # re-assign and sample 512 RoIs from 512 RoIs
+ sampling_results = []
+ for j in range(num_imgs):
+ assign_result = bbox_assigner.assign(
+ proposal_list[j], gt_bboxes[j],
+ gt_bboxes_ignore[j], gt_labels[j])
+ sampling_result = bbox_sampler.sample(
+ assign_result,
+ proposal_list[j],
+ gt_bboxes[j],
+ gt_labels[j],
+ feats=[lvl_feat[j][None] for lvl_feat in x])
+ sampling_results.append(sampling_result)
+ loss_mask = self._mask_forward_train(i, x, sampling_results,
+ gt_masks, rcnn_train_cfg,
+ semantic_feat)
+ for name, value in loss_mask.items():
+ losses['s{}.{}'.format(i, name)] = (
+ value * lw if 'loss' in name else value)
+
+ # refine bboxes (same as Cascade R-CNN)
+ if i < self.num_stages - 1 and not self.interleaved:
+ pos_is_gts = [res.pos_is_gt for res in sampling_results]
+ with torch.no_grad():
+ proposal_list = self.bbox_head[i].refine_bboxes(
+ rois, roi_labels, bbox_pred, pos_is_gts, img_meta)
+
+ return losses
+
+ def simple_test(self, img, img_meta, proposals=None, rescale=False):
+ x = self.extract_feat(img)
+ proposal_list = self.simple_test_rpn(
+ x, img_meta, self.test_cfg.rpn) if proposals is None else proposals
+
+ if self.with_semantic:
+ _, semantic_feat = self.semantic_head(x)
+ else:
+ semantic_feat = None
+
+ img_shape = img_meta[0]['img_shape']
+ ori_shape = img_meta[0]['ori_shape']
+ scale_factor = img_meta[0]['scale_factor']
+
+ # "ms" in variable names means multi-stage
+ ms_bbox_result = {}
+ ms_segm_result = {}
+ ms_scores = []
+ rcnn_test_cfg = self.test_cfg.rcnn
+
+ rois = bbox2roi(proposal_list)
+ for i in range(self.num_stages):
+ bbox_head = self.bbox_head[i]
+ cls_score, bbox_pred = self._bbox_forward_test(
+ i, x, rois, semantic_feat=semantic_feat)
+ ms_scores.append(cls_score)
+
+ if i < self.num_stages - 1:
+ bbox_label = cls_score.argmax(dim=1)
+ rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred,
+ img_meta[0])
+
+ cls_score = sum(ms_scores) / float(len(ms_scores))
+ det_bboxes, det_labels = self.bbox_head[-1].get_det_bboxes(
+ rois,
+ cls_score,
+ bbox_pred,
+ img_shape,
+ scale_factor,
+ rescale=rescale,
+ cfg=rcnn_test_cfg)
+ bbox_result = bbox2result(det_bboxes, det_labels,
+ self.bbox_head[-1].num_classes)
+ ms_bbox_result['ensemble'] = bbox_result
+
+ if self.with_mask:
+ if det_bboxes.shape[0] == 0:
+ mask_classes = self.mask_head[-1].num_classes - 1
+ segm_result = [[] for _ in range(mask_classes)]
+ else:
+ _bboxes = (
+ det_bboxes[:, :4] *
+ scale_factor if rescale else det_bboxes)
+
+ mask_rois = bbox2roi([_bboxes])
+ aug_masks = []
+ mask_roi_extractor = self.mask_roi_extractor[-1]
+ mask_feats = mask_roi_extractor(
+ x[:len(mask_roi_extractor.featmap_strides)], mask_rois)
+ if self.with_semantic and 'mask' in self.semantic_fusion:
+ mask_semantic_feat = self.semantic_roi_extractor(
+ [semantic_feat], mask_rois)
+ mask_feats += mask_semantic_feat
+ last_feat = None
+ for i in range(self.num_stages):
+ mask_head = self.mask_head[i]
+ if self.mask_info_flow:
+ mask_pred, last_feat = mask_head(mask_feats, last_feat)
+ else:
+ mask_pred = mask_head(mask_feats)
+ aug_masks.append(mask_pred.sigmoid().cpu().numpy())
+ merged_masks = merge_aug_masks(aug_masks,
+ [img_meta] * self.num_stages,
+ self.test_cfg.rcnn)
+ segm_result = self.mask_head[-1].get_seg_masks(
+ merged_masks, _bboxes, det_labels, rcnn_test_cfg,
+ ori_shape, scale_factor, rescale)
+ ms_segm_result['ensemble'] = segm_result
+
+ if self.with_mask:
+ results = (ms_bbox_result['ensemble'], ms_segm_result['ensemble'])
+ else:
+ results = ms_bbox_result['ensemble']
+
+ return results
+
+ def aug_test(self, imgs, img_metas, proposals=None, rescale=False):
+ """Test with augmentations.
+
+ If rescale is False, then returned bboxes and masks will fit the scale
+ of imgs[0].
+ """
+ if self.with_semantic:
+ semantic_feats = [
+ self.semantic_head(feat)[1]
+ for feat in self.extract_feats(imgs)
+ ]
+ else:
+ semantic_feats = [None] * len(img_metas)
+
+ # recompute feats to save memory
+ proposal_list = self.aug_test_rpn(
+ self.extract_feats(imgs), img_metas, self.test_cfg.rpn)
+
+ rcnn_test_cfg = self.test_cfg.rcnn
+ aug_bboxes = []
+ aug_scores = []
+ for x, img_meta, semantic in zip(
+ self.extract_feats(imgs), img_metas, semantic_feats):
+ # only one image in the batch
+ img_shape = img_meta[0]['img_shape']
+ scale_factor = img_meta[0]['scale_factor']
+ flip = img_meta[0]['flip']
+
+ proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
+ scale_factor, flip)
+ # "ms" in variable names means multi-stage
+ ms_scores = []
+
+ rois = bbox2roi([proposals])
+ for i in range(self.num_stages):
+ bbox_head = self.bbox_head[i]
+ cls_score, bbox_pred = self._bbox_forward_test(
+ i, x, rois, semantic_feat=semantic)
+ ms_scores.append(cls_score)
+
+ if i < self.num_stages - 1:
+ bbox_label = cls_score.argmax(dim=1)
+ rois = bbox_head.regress_by_class(rois, bbox_label,
+ bbox_pred, img_meta[0])
+
+ cls_score = sum(ms_scores) / float(len(ms_scores))
+ bboxes, scores = self.bbox_head[-1].get_det_bboxes(
+ rois,
+ cls_score,
+ bbox_pred,
+ img_shape,
+ scale_factor,
+ rescale=False,
+ cfg=None)
+ aug_bboxes.append(bboxes)
+ aug_scores.append(scores)
+
+ # after merging, bboxes will be rescaled to the original image size
+ merged_bboxes, merged_scores = merge_aug_bboxes(
+ aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
+ det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
+ rcnn_test_cfg.score_thr,
+ rcnn_test_cfg.nms,
+ rcnn_test_cfg.max_per_img)
+
+ bbox_result = bbox2result(det_bboxes, det_labels,
+ self.bbox_head[-1].num_classes)
+
+ if self.with_mask:
+ if det_bboxes.shape[0] == 0:
+ segm_result = [[]
+ for _ in range(self.mask_head[-1].num_classes -
+ 1)]
+ else:
+ aug_masks = []
+ aug_img_metas = []
+ for x, img_meta, semantic in zip(
+ self.extract_feats(imgs), img_metas, semantic_feats):
+ img_shape = img_meta[0]['img_shape']
+ scale_factor = img_meta[0]['scale_factor']
+ flip = img_meta[0]['flip']
+ _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,
+ scale_factor, flip)
+ mask_rois = bbox2roi([_bboxes])
+ mask_feats = self.mask_roi_extractor[-1](
+ x[:len(self.mask_roi_extractor[-1].featmap_strides)],
+ mask_rois)
+ if self.with_semantic:
+ semantic_feat = semantic
+ mask_semantic_feat = self.semantic_roi_extractor(
+ [semantic_feat], mask_rois)
+ if mask_semantic_feat.shape[-2:] != mask_feats.shape[
+ -2:]:
+ mask_semantic_feat = F.adaptive_avg_pool2d(
+ mask_semantic_feat, mask_feats.shape[-2:])
+ mask_feats += mask_semantic_feat
+ last_feat = None
+ for i in range(self.num_stages):
+ mask_head = self.mask_head[i]
+ if self.mask_info_flow:
+ mask_pred, last_feat = mask_head(
+ mask_feats, last_feat)
+ else:
+ mask_pred = mask_head(mask_feats)
+ aug_masks.append(mask_pred.sigmoid().cpu().numpy())
+ aug_img_metas.append(img_meta)
+ merged_masks = merge_aug_masks(aug_masks, aug_img_metas,
+ self.test_cfg.rcnn)
+
+ ori_shape = img_metas[0][0]['ori_shape']
+ segm_result = self.mask_head[-1].get_seg_masks(
+ merged_masks,
+ det_bboxes,
+ det_labels,
+ rcnn_test_cfg,
+ ori_shape,
+ scale_factor=1.0,
+ rescale=False)
+ return bbox_result, segm_result
+ else:
+ return bbox_result
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/mask_rcnn.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/mask_rcnn.py
new file mode 100644
index 000000000..becfdad53
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/mask_rcnn.py
@@ -0,0 +1,31 @@
+from ..registry import DETECTORS
+from .two_stage import TwoStageDetector
+
+
+@DETECTORS.register_module
+class MaskRCNN(TwoStageDetector):
+
+ def __init__(self,
+ backbone,
+ rpn_head,
+ bbox_roi_extractor,
+ bbox_head,
+ mask_roi_extractor,
+ mask_head,
+ train_cfg,
+ test_cfg,
+ neck=None,
+ shared_head=None,
+ pretrained=None):
+ super(MaskRCNN, self).__init__(
+ backbone=backbone,
+ neck=neck,
+ shared_head=shared_head,
+ rpn_head=rpn_head,
+ bbox_roi_extractor=bbox_roi_extractor,
+ bbox_head=bbox_head,
+ mask_roi_extractor=mask_roi_extractor,
+ mask_head=mask_head,
+ train_cfg=train_cfg,
+ test_cfg=test_cfg,
+ pretrained=pretrained)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/mask_scoring_rcnn.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/mask_scoring_rcnn.py
new file mode 100644
index 000000000..f184c453b
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/mask_scoring_rcnn.py
@@ -0,0 +1,200 @@
+import torch
+
+from mmdet.core import bbox2roi, build_assigner, build_sampler
+from .. import builder
+from ..registry import DETECTORS
+from .two_stage import TwoStageDetector
+
+
+@DETECTORS.register_module
+class MaskScoringRCNN(TwoStageDetector):
+ """Mask Scoring RCNN.
+
+ https://arxiv.org/abs/1903.00241
+ """
+
+ def __init__(self,
+ backbone,
+ rpn_head,
+ bbox_roi_extractor,
+ bbox_head,
+ mask_roi_extractor,
+ mask_head,
+ train_cfg,
+ test_cfg,
+ neck=None,
+ shared_head=None,
+ mask_iou_head=None,
+ pretrained=None):
+ super(MaskScoringRCNN, self).__init__(
+ backbone=backbone,
+ neck=neck,
+ shared_head=shared_head,
+ rpn_head=rpn_head,
+ bbox_roi_extractor=bbox_roi_extractor,
+ bbox_head=bbox_head,
+ mask_roi_extractor=mask_roi_extractor,
+ mask_head=mask_head,
+ train_cfg=train_cfg,
+ test_cfg=test_cfg,
+ pretrained=pretrained)
+
+ self.mask_iou_head = builder.build_head(mask_iou_head)
+ self.mask_iou_head.init_weights()
+
+ def forward_dummy(self, img):
+ raise NotImplementedError
+
+ # TODO: refactor forward_train in two stage to reduce code redundancy
+ def forward_train(self,
+ img,
+ img_meta,
+ gt_bboxes,
+ gt_labels,
+ gt_bboxes_ignore=None,
+ gt_masks=None,
+ proposals=None):
+ x = self.extract_feat(img)
+
+ losses = dict()
+
+ # RPN forward and loss
+ if self.with_rpn:
+ rpn_outs = self.rpn_head(x)
+ rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta,
+ self.train_cfg.rpn)
+ rpn_losses = self.rpn_head.loss(
+ *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
+ losses.update(rpn_losses)
+
+ proposal_cfg = self.train_cfg.get('rpn_proposal',
+ self.test_cfg.rpn)
+ proposal_inputs = rpn_outs + (img_meta, proposal_cfg)
+ proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)
+ else:
+ proposal_list = proposals
+
+ # assign gts and sample proposals
+ if self.with_bbox or self.with_mask:
+ bbox_assigner = build_assigner(self.train_cfg.rcnn.assigner)
+ bbox_sampler = build_sampler(
+ self.train_cfg.rcnn.sampler, context=self)
+ num_imgs = img.size(0)
+ if gt_bboxes_ignore is None:
+ gt_bboxes_ignore = [None for _ in range(num_imgs)]
+ sampling_results = []
+ for i in range(num_imgs):
+ assign_result = bbox_assigner.assign(proposal_list[i],
+ gt_bboxes[i],
+ gt_bboxes_ignore[i],
+ gt_labels[i])
+ sampling_result = bbox_sampler.sample(
+ assign_result,
+ proposal_list[i],
+ gt_bboxes[i],
+ gt_labels[i],
+ feats=[lvl_feat[i][None] for lvl_feat in x])
+ sampling_results.append(sampling_result)
+
+ # bbox head forward and loss
+ if self.with_bbox:
+ rois = bbox2roi([res.bboxes for res in sampling_results])
+ # TODO: a more flexible way to decide which feature maps to use
+ bbox_feats = self.bbox_roi_extractor(
+ x[:self.bbox_roi_extractor.num_inputs], rois)
+ if self.with_shared_head:
+ bbox_feats = self.shared_head(bbox_feats)
+ cls_score, bbox_pred = self.bbox_head(bbox_feats)
+
+ bbox_targets = self.bbox_head.get_target(sampling_results,
+ gt_bboxes, gt_labels,
+ self.train_cfg.rcnn)
+ loss_bbox = self.bbox_head.loss(cls_score, bbox_pred,
+ *bbox_targets)
+ losses.update(loss_bbox)
+
+ # mask head forward and loss
+ if self.with_mask:
+ if not self.share_roi_extractor:
+ pos_rois = bbox2roi(
+ [res.pos_bboxes for res in sampling_results])
+ mask_feats = self.mask_roi_extractor(
+ x[:self.mask_roi_extractor.num_inputs], pos_rois)
+ if self.with_shared_head:
+ mask_feats = self.shared_head(mask_feats)
+ else:
+ pos_inds = []
+ device = bbox_feats.device
+ for res in sampling_results:
+ pos_inds.append(
+ torch.ones(
+ res.pos_bboxes.shape[0],
+ device=device,
+ dtype=torch.uint8))
+ pos_inds.append(
+ torch.zeros(
+ res.neg_bboxes.shape[0],
+ device=device,
+ dtype=torch.uint8))
+ pos_inds = torch.cat(pos_inds)
+ mask_feats = bbox_feats[pos_inds]
+ mask_pred = self.mask_head(mask_feats)
+
+ mask_targets = self.mask_head.get_target(sampling_results,
+ gt_masks,
+ self.train_cfg.rcnn)
+ pos_labels = torch.cat(
+ [res.pos_gt_labels for res in sampling_results])
+ loss_mask = self.mask_head.loss(mask_pred, mask_targets,
+ pos_labels)
+ losses.update(loss_mask)
+
+ # mask iou head forward and loss
+ pos_mask_pred = mask_pred[range(mask_pred.size(0)), pos_labels]
+ mask_iou_pred = self.mask_iou_head(mask_feats, pos_mask_pred)
+ pos_mask_iou_pred = mask_iou_pred[range(mask_iou_pred.size(0)),
+ pos_labels]
+ mask_iou_targets = self.mask_iou_head.get_target(
+ sampling_results, gt_masks, pos_mask_pred, mask_targets,
+ self.train_cfg.rcnn)
+ loss_mask_iou = self.mask_iou_head.loss(pos_mask_iou_pred,
+ mask_iou_targets)
+ losses.update(loss_mask_iou)
+ return losses
+
+ def simple_test_mask(self,
+ x,
+ img_meta,
+ det_bboxes,
+ det_labels,
+ rescale=False):
+ # image shape of the first image in the batch (only one)
+ ori_shape = img_meta[0]['ori_shape']
+ scale_factor = img_meta[0]['scale_factor']
+
+ if det_bboxes.shape[0] == 0:
+ segm_result = [[] for _ in range(self.mask_head.num_classes - 1)]
+ mask_scores = [[] for _ in range(self.mask_head.num_classes - 1)]
+ else:
+ # if det_bboxes is rescaled to the original image size, we need to
+ # rescale it back to the testing scale to obtain RoIs.
+ _bboxes = (
+ det_bboxes[:, :4] * scale_factor if rescale else det_bboxes)
+ mask_rois = bbox2roi([_bboxes])
+ mask_feats = self.mask_roi_extractor(
+ x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois)
+ if self.with_shared_head:
+ mask_feats = self.shared_head(mask_feats)
+ mask_pred = self.mask_head(mask_feats)
+ segm_result = self.mask_head.get_seg_masks(mask_pred, _bboxes,
+ det_labels,
+ self.test_cfg.rcnn,
+ ori_shape, scale_factor,
+ rescale)
+ # get mask scores with mask iou head
+ mask_iou_pred = self.mask_iou_head(
+ mask_feats, mask_pred[range(det_labels.size(0)),
+ det_labels + 1])
+ mask_scores = self.mask_iou_head.get_mask_scores(
+ mask_iou_pred, det_bboxes, det_labels)
+ return segm_result, mask_scores
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/reppoints_detector.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/reppoints_detector.py
new file mode 100644
index 000000000..53d698f1f
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/reppoints_detector.py
@@ -0,0 +1,81 @@
+import torch
+
+from mmdet.core import bbox2result, bbox_mapping_back, multiclass_nms
+from ..registry import DETECTORS
+from .single_stage import SingleStageDetector
+
+
+@DETECTORS.register_module
+class RepPointsDetector(SingleStageDetector):
+ """RepPoints: Point Set Representation for Object Detection.
+
+ This detector is the implementation of:
+ - RepPoints detector (https://arxiv.org/pdf/1904.11490)
+ """
+
+ def __init__(self,
+ backbone,
+ neck,
+ bbox_head,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None):
+ super(RepPointsDetector,
+ self).__init__(backbone, neck, bbox_head, train_cfg, test_cfg,
+ pretrained)
+
+ def merge_aug_results(self, aug_bboxes, aug_scores, img_metas):
+ """Merge augmented detection bboxes and scores.
+
+ Args:
+ aug_bboxes (list[Tensor]): shape (n, 4*#class)
+ aug_scores (list[Tensor] or None): shape (n, #class)
+ img_shapes (list[Tensor]): shape (3, ).
+
+ Returns:
+ tuple: (bboxes, scores)
+ """
+ recovered_bboxes = []
+ for bboxes, img_info in zip(aug_bboxes, img_metas):
+ img_shape = img_info[0]['img_shape']
+ scale_factor = img_info[0]['scale_factor']
+ flip = img_info[0]['flip']
+ bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip)
+ recovered_bboxes.append(bboxes)
+ bboxes = torch.cat(recovered_bboxes, dim=0)
+ if aug_scores is None:
+ return bboxes
+ else:
+ scores = torch.cat(aug_scores, dim=0)
+ return bboxes, scores
+
+ def aug_test(self, imgs, img_metas, rescale=False):
+ # recompute feats to save memory
+ feats = self.extract_feats(imgs)
+
+ aug_bboxes = []
+ aug_scores = []
+ for x, img_meta in zip(feats, img_metas):
+ # only one image in the batch
+ outs = self.bbox_head(x)
+ bbox_inputs = outs + (img_meta, self.test_cfg, False, False)
+ det_bboxes, det_scores = self.bbox_head.get_bboxes(*bbox_inputs)[0]
+ aug_bboxes.append(det_bboxes)
+ aug_scores.append(det_scores)
+
+ # after merging, bboxes will be rescaled to the original image size
+ merged_bboxes, merged_scores = self.merge_aug_results(
+ aug_bboxes, aug_scores, img_metas)
+ det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
+ self.test_cfg.score_thr,
+ self.test_cfg.nms,
+ self.test_cfg.max_per_img)
+
+ if rescale:
+ _det_bboxes = det_bboxes
+ else:
+ _det_bboxes = det_bboxes.clone()
+ _det_bboxes[:, :4] *= img_metas[0][0]['scale_factor']
+ bbox_results = bbox2result(_det_bboxes, det_labels,
+ self.bbox_head.num_classes)
+ return bbox_results
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/retinanet.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/retinanet.py
new file mode 100644
index 000000000..7c93d7419
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/retinanet.py
@@ -0,0 +1,16 @@
+from ..registry import DETECTORS
+from .single_stage import SingleStageDetector
+
+
+@DETECTORS.register_module
+class RetinaNet(SingleStageDetector):
+
+ def __init__(self,
+ backbone,
+ neck,
+ bbox_head,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None):
+ super(RetinaNet, self).__init__(backbone, neck, bbox_head, train_cfg,
+ test_cfg, pretrained)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/rpn.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/rpn.py
new file mode 100644
index 000000000..fafee4fc2
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/rpn.py
@@ -0,0 +1,97 @@
+import mmcv
+
+from mmdet.core import bbox_mapping, tensor2imgs
+from .. import builder
+from ..registry import DETECTORS
+from .base import BaseDetector
+from .test_mixins import RPNTestMixin
+
+
+@DETECTORS.register_module
+class RPN(BaseDetector, RPNTestMixin):
+
+ def __init__(self,
+ backbone,
+ neck,
+ rpn_head,
+ train_cfg,
+ test_cfg,
+ pretrained=None):
+ super(RPN, self).__init__()
+ self.backbone = builder.build_backbone(backbone)
+ self.neck = builder.build_neck(neck) if neck is not None else None
+ self.rpn_head = builder.build_head(rpn_head)
+ self.train_cfg = train_cfg
+ self.test_cfg = test_cfg
+ self.init_weights(pretrained=pretrained)
+
+ def init_weights(self, pretrained=None):
+ super(RPN, self).init_weights(pretrained)
+ self.backbone.init_weights(pretrained=pretrained)
+ if self.with_neck:
+ self.neck.init_weights()
+ self.rpn_head.init_weights()
+
+ def extract_feat(self, img):
+ x = self.backbone(img)
+ if self.with_neck:
+ x = self.neck(x)
+ return x
+
+ def forward_dummy(self, img):
+ x = self.extract_feat(img)
+ rpn_outs = self.rpn_head(x)
+ return rpn_outs
+
+ def forward_train(self,
+ img,
+ img_meta,
+ gt_bboxes=None,
+ gt_bboxes_ignore=None):
+ if self.train_cfg.rpn.get('debug', False):
+ self.rpn_head.debug_imgs = tensor2imgs(img)
+
+ x = self.extract_feat(img)
+ rpn_outs = self.rpn_head(x)
+
+ rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta, self.train_cfg.rpn)
+ losses = self.rpn_head.loss(
+ *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
+ return losses
+
+ def simple_test(self, img, img_meta, rescale=False):
+ x = self.extract_feat(img)
+ proposal_list = self.simple_test_rpn(x, img_meta, self.test_cfg.rpn)
+ if rescale:
+ for proposals, meta in zip(proposal_list, img_meta):
+ proposals[:, :4] /= meta['scale_factor']
+ # TODO: remove this restriction
+ return proposal_list[0].cpu().numpy()
+
+ def aug_test(self, imgs, img_metas, rescale=False):
+ proposal_list = self.aug_test_rpn(
+ self.extract_feats(imgs), img_metas, self.test_cfg.rpn)
+ if not rescale:
+ for proposals, img_meta in zip(proposal_list, img_metas[0]):
+ img_shape = img_meta['img_shape']
+ scale_factor = img_meta['scale_factor']
+ flip = img_meta['flip']
+ proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape,
+ scale_factor, flip)
+ # TODO: remove this restriction
+ return proposal_list[0].cpu().numpy()
+
+ def show_result(self, data, result, dataset=None, top_k=20):
+ """Show RPN proposals on the image.
+
+ Although we assume batch size is 1, this method supports arbitrary
+ batch size.
+ """
+ img_tensor = data['img'][0]
+ img_metas = data['img_meta'][0].data[0]
+ imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
+ assert len(imgs) == len(img_metas)
+ for img, img_meta in zip(imgs, img_metas):
+ h, w, _ = img_meta['img_shape']
+ img_show = img[:h, :w, :]
+ mmcv.imshow_bboxes(img_show, result, top_k=top_k)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/single_stage.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/single_stage.py
new file mode 100644
index 000000000..b25af7b82
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/single_stage.py
@@ -0,0 +1,86 @@
+import torch.nn as nn
+
+from mmdet.core import bbox2result
+from .. import builder
+from ..registry import DETECTORS
+from .base import BaseDetector
+
+
+@DETECTORS.register_module
+class SingleStageDetector(BaseDetector):
+ """Base class for single-stage detectors.
+
+ Single-stage detectors directly and densely predict bounding boxes on the
+ output features of the backbone+neck.
+ """
+
+ def __init__(self,
+ backbone,
+ neck=None,
+ bbox_head=None,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None):
+ super(SingleStageDetector, self).__init__()
+ self.backbone = builder.build_backbone(backbone)
+ if neck is not None:
+ self.neck = builder.build_neck(neck)
+ self.bbox_head = builder.build_head(bbox_head)
+ self.train_cfg = train_cfg
+ self.test_cfg = test_cfg
+ self.init_weights(pretrained=pretrained)
+
+ def init_weights(self, pretrained=None):
+ super(SingleStageDetector, self).init_weights(pretrained)
+ self.backbone.init_weights(pretrained=pretrained)
+ if self.with_neck:
+ if isinstance(self.neck, nn.Sequential):
+ for m in self.neck:
+ m.init_weights()
+ else:
+ self.neck.init_weights()
+ self.bbox_head.init_weights()
+
+ def extract_feat(self, img):
+ """Directly extract features from the backbone+neck
+ """
+ x = self.backbone(img)
+ if self.with_neck:
+ x = self.neck(x)
+ return x
+
+ def forward_dummy(self, img):
+ """Used for computing network flops.
+
+ See `mmedetection/tools/get_flops.py`
+ """
+ x = self.extract_feat(img)
+ outs = self.bbox_head(x)
+ return outs
+
+ def forward_train(self,
+ img,
+ img_metas,
+ gt_bboxes,
+ gt_labels,
+ gt_bboxes_ignore=None):
+ x = self.extract_feat(img)
+ outs = self.bbox_head(x)
+ loss_inputs = outs + (gt_bboxes, gt_labels, img_metas, self.train_cfg)
+ losses = self.bbox_head.loss(
+ *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
+ return losses
+
+ def simple_test(self, img, img_meta, rescale=False):
+ x = self.extract_feat(img)
+ outs = self.bbox_head(x)
+ bbox_inputs = outs + (img_meta, self.test_cfg, rescale)
+ bbox_list = self.bbox_head.get_bboxes(*bbox_inputs)
+ bbox_results = [
+ bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes)
+ for det_bboxes, det_labels in bbox_list
+ ]
+ return bbox_results[0]
+
+ def aug_test(self, imgs, img_metas, rescale=False):
+ raise NotImplementedError
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/single_stage_ins.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/single_stage_ins.py
new file mode 100644
index 000000000..773d5d22e
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/single_stage_ins.py
@@ -0,0 +1,96 @@
+import torch.nn as nn
+
+from mmdet.core import bbox2result
+from .. import builder
+from ..registry import DETECTORS
+from .base import BaseDetector
+
+
+@DETECTORS.register_module
+class SingleStageInsDetector(BaseDetector):
+
+ def __init__(self,
+ backbone,
+ neck=None,
+ bbox_head=None,
+ mask_feat_head=None,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None):
+ super(SingleStageInsDetector, self).__init__()
+ self.backbone = builder.build_backbone(backbone)
+ if neck is not None:
+ self.neck = builder.build_neck(neck)
+ if mask_feat_head is not None:
+ self.mask_feat_head = builder.build_head(mask_feat_head)
+
+ self.bbox_head = builder.build_head(bbox_head)
+ self.train_cfg = train_cfg
+ self.test_cfg = test_cfg
+ self.init_weights(pretrained=pretrained)
+
+ def init_weights(self, pretrained=None):
+ super(SingleStageInsDetector, self).init_weights(pretrained)
+ self.backbone.init_weights(pretrained=pretrained)
+ if self.with_neck:
+ if isinstance(self.neck, nn.Sequential):
+ for m in self.neck:
+ m.init_weights()
+ else:
+ self.neck.init_weights()
+ if self.with_mask_feat_head:
+ if isinstance(self.mask_feat_head, nn.Sequential):
+ for m in self.mask_feat_head:
+ m.init_weights()
+ else:
+ self.mask_feat_head.init_weights()
+ self.bbox_head.init_weights()
+
+ def extract_feat(self, img):
+ x = self.backbone(img)
+ if self.with_neck:
+ x = self.neck(x)
+ return x
+
+ def forward_dummy(self, img):
+ x = self.extract_feat(img)
+ outs = self.bbox_head(x)
+ return outs
+
+ def forward_train(self,
+ img,
+ img_metas,
+ gt_bboxes,
+ gt_labels,
+ gt_bboxes_ignore=None,
+ gt_masks=None):
+ x = self.extract_feat(img)
+ outs = self.bbox_head(x)
+
+ if self.with_mask_feat_head:
+ mask_feat_pred = self.mask_feat_head(
+ x[self.mask_feat_head.
+ start_level:self.mask_feat_head.end_level + 1])
+ loss_inputs = outs + (mask_feat_pred, gt_bboxes, gt_labels, gt_masks, img_metas, self.train_cfg)
+ else:
+ loss_inputs = outs + (gt_bboxes, gt_labels, gt_masks, img_metas, self.train_cfg)
+ losses = self.bbox_head.loss(
+ *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
+ return losses
+
+ def simple_test(self, img, img_meta, rescale=False):
+ x = self.extract_feat(img)
+ outs = self.bbox_head(x, eval=True)
+
+ if self.with_mask_feat_head:
+ mask_feat_pred = self.mask_feat_head(
+ x[self.mask_feat_head.
+ start_level:self.mask_feat_head.end_level + 1])
+ seg_inputs = outs + (mask_feat_pred, img_meta, self.test_cfg, rescale)
+ else:
+ seg_inputs = outs + (img_meta, self.test_cfg, rescale)
+ seg_result = self.bbox_head.get_seg(*seg_inputs)
+ return seg_result
+
+ def aug_test(self, imgs, img_metas, rescale=False):
+ raise NotImplementedError
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/solo.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/solo.py
new file mode 100644
index 000000000..cd0df7486
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/solo.py
@@ -0,0 +1,16 @@
+from .single_stage_ins import SingleStageInsDetector
+from ..registry import DETECTORS
+
+
+@DETECTORS.register_module
+class SOLO(SingleStageInsDetector):
+
+ def __init__(self,
+ backbone,
+ neck,
+ bbox_head,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None):
+ super(SOLO, self).__init__(backbone, neck, bbox_head, None, train_cfg,
+ test_cfg, pretrained)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/solov2.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/solov2.py
new file mode 100644
index 000000000..02dac9646
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/solov2.py
@@ -0,0 +1,17 @@
+from .single_stage_ins import SingleStageInsDetector
+from ..registry import DETECTORS
+
+
+@DETECTORS.register_module
+class SOLOv2(SingleStageInsDetector):
+
+ def __init__(self,
+ backbone,
+ neck,
+ bbox_head,
+ mask_feat_head,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None):
+ super(SOLOv2, self).__init__(backbone, neck, bbox_head, mask_feat_head, train_cfg,
+ test_cfg, pretrained)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/test_mixins.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/test_mixins.py
new file mode 100644
index 000000000..84a96d167
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/test_mixins.py
@@ -0,0 +1,266 @@
+import logging
+import sys
+
+import torch
+
+from mmdet.core import (bbox2roi, bbox_mapping, merge_aug_bboxes,
+ merge_aug_masks, merge_aug_proposals, multiclass_nms)
+
+logger = logging.getLogger(__name__)
+
+if sys.version_info >= (3, 7):
+ from mmdet.utils.contextmanagers import completed
+
+
+class RPNTestMixin(object):
+
+ if sys.version_info >= (3, 7):
+
+ async def async_test_rpn(self, x, img_meta, rpn_test_cfg):
+ sleep_interval = rpn_test_cfg.pop("async_sleep_interval", 0.025)
+ async with completed(
+ __name__, "rpn_head_forward",
+ sleep_interval=sleep_interval):
+ rpn_outs = self.rpn_head(x)
+
+ proposal_inputs = rpn_outs + (img_meta, rpn_test_cfg)
+
+ proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)
+ return proposal_list
+
+ def simple_test_rpn(self, x, img_meta, rpn_test_cfg):
+ rpn_outs = self.rpn_head(x)
+ proposal_inputs = rpn_outs + (img_meta, rpn_test_cfg)
+ proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)
+ return proposal_list
+
+ def aug_test_rpn(self, feats, img_metas, rpn_test_cfg):
+ imgs_per_gpu = len(img_metas[0])
+ aug_proposals = [[] for _ in range(imgs_per_gpu)]
+ for x, img_meta in zip(feats, img_metas):
+ proposal_list = self.simple_test_rpn(x, img_meta, rpn_test_cfg)
+ for i, proposals in enumerate(proposal_list):
+ aug_proposals[i].append(proposals)
+ # reorganize the order of 'img_metas' to match the dimensions
+ # of 'aug_proposals'
+ aug_img_metas = []
+ for i in range(imgs_per_gpu):
+ aug_img_meta = []
+ for j in range(len(img_metas)):
+ aug_img_meta.append(img_metas[j][i])
+ aug_img_metas.append(aug_img_meta)
+ # after merging, proposals will be rescaled to the original image size
+ merged_proposals = [
+ merge_aug_proposals(proposals, aug_img_meta, rpn_test_cfg)
+ for proposals, aug_img_meta in zip(aug_proposals, aug_img_metas)
+ ]
+ return merged_proposals
+
+
+class BBoxTestMixin(object):
+
+ if sys.version_info >= (3, 7):
+
+ async def async_test_bboxes(self,
+ x,
+ img_meta,
+ proposals,
+ rcnn_test_cfg,
+ rescale=False,
+ bbox_semaphore=None,
+ global_lock=None):
+ """Async test only det bboxes without augmentation."""
+ rois = bbox2roi(proposals)
+ roi_feats = self.bbox_roi_extractor(
+ x[:len(self.bbox_roi_extractor.featmap_strides)], rois)
+ if self.with_shared_head:
+ roi_feats = self.shared_head(roi_feats)
+ sleep_interval = rcnn_test_cfg.get("async_sleep_interval", 0.017)
+
+ async with completed(
+ __name__, "bbox_head_forward",
+ sleep_interval=sleep_interval):
+ cls_score, bbox_pred = self.bbox_head(roi_feats)
+
+ img_shape = img_meta[0]['img_shape']
+ scale_factor = img_meta[0]['scale_factor']
+ det_bboxes, det_labels = self.bbox_head.get_det_bboxes(
+ rois,
+ cls_score,
+ bbox_pred,
+ img_shape,
+ scale_factor,
+ rescale=rescale,
+ cfg=rcnn_test_cfg)
+ return det_bboxes, det_labels
+
+ def simple_test_bboxes(self,
+ x,
+ img_meta,
+ proposals,
+ rcnn_test_cfg,
+ rescale=False):
+ """Test only det bboxes without augmentation."""
+ rois = bbox2roi(proposals)
+ roi_feats = self.bbox_roi_extractor(
+ x[:len(self.bbox_roi_extractor.featmap_strides)], rois)
+ if self.with_shared_head:
+ roi_feats = self.shared_head(roi_feats)
+ cls_score, bbox_pred = self.bbox_head(roi_feats)
+ img_shape = img_meta[0]['img_shape']
+ scale_factor = img_meta[0]['scale_factor']
+ det_bboxes, det_labels = self.bbox_head.get_det_bboxes(
+ rois,
+ cls_score,
+ bbox_pred,
+ img_shape,
+ scale_factor,
+ rescale=rescale,
+ cfg=rcnn_test_cfg)
+ return det_bboxes, det_labels
+
+ def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg):
+ aug_bboxes = []
+ aug_scores = []
+ for x, img_meta in zip(feats, img_metas):
+ # only one image in the batch
+ img_shape = img_meta[0]['img_shape']
+ scale_factor = img_meta[0]['scale_factor']
+ flip = img_meta[0]['flip']
+ # TODO more flexible
+ proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
+ scale_factor, flip)
+ rois = bbox2roi([proposals])
+ # recompute feature maps to save GPU memory
+ roi_feats = self.bbox_roi_extractor(
+ x[:len(self.bbox_roi_extractor.featmap_strides)], rois)
+ if self.with_shared_head:
+ roi_feats = self.shared_head(roi_feats)
+ cls_score, bbox_pred = self.bbox_head(roi_feats)
+ bboxes, scores = self.bbox_head.get_det_bboxes(
+ rois,
+ cls_score,
+ bbox_pred,
+ img_shape,
+ scale_factor,
+ rescale=False,
+ cfg=None)
+ aug_bboxes.append(bboxes)
+ aug_scores.append(scores)
+ # after merging, bboxes will be rescaled to the original image size
+ merged_bboxes, merged_scores = merge_aug_bboxes(
+ aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
+ det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
+ rcnn_test_cfg.score_thr,
+ rcnn_test_cfg.nms,
+ rcnn_test_cfg.max_per_img)
+ return det_bboxes, det_labels
+
+
+class MaskTestMixin(object):
+
+ if sys.version_info >= (3, 7):
+
+ async def async_test_mask(self,
+ x,
+ img_meta,
+ det_bboxes,
+ det_labels,
+ rescale=False,
+ mask_test_cfg=None):
+ # image shape of the first image in the batch (only one)
+ ori_shape = img_meta[0]['ori_shape']
+ scale_factor = img_meta[0]['scale_factor']
+ if det_bboxes.shape[0] == 0:
+ segm_result = [[]
+ for _ in range(self.mask_head.num_classes - 1)]
+ else:
+ _bboxes = (
+ det_bboxes[:, :4] *
+ scale_factor if rescale else det_bboxes)
+ mask_rois = bbox2roi([_bboxes])
+ mask_feats = self.mask_roi_extractor(
+ x[:len(self.mask_roi_extractor.featmap_strides)],
+ mask_rois)
+
+ if self.with_shared_head:
+ mask_feats = self.shared_head(mask_feats)
+ if mask_test_cfg and mask_test_cfg.get('async_sleep_interval'):
+ sleep_interval = mask_test_cfg['async_sleep_interval']
+ else:
+ sleep_interval = 0.035
+ async with completed(
+ __name__,
+ "mask_head_forward",
+ sleep_interval=sleep_interval):
+ mask_pred = self.mask_head(mask_feats)
+ segm_result = self.mask_head.get_seg_masks(
+ mask_pred, _bboxes, det_labels, self.test_cfg.rcnn,
+ ori_shape, scale_factor, rescale)
+ return segm_result
+
+ def simple_test_mask(self,
+ x,
+ img_meta,
+ det_bboxes,
+ det_labels,
+ rescale=False):
+ # image shape of the first image in the batch (only one)
+ ori_shape = img_meta[0]['ori_shape']
+ scale_factor = img_meta[0]['scale_factor']
+ if det_bboxes.shape[0] == 0:
+ segm_result = [[] for _ in range(self.mask_head.num_classes - 1)]
+ else:
+ # if det_bboxes is rescaled to the original image size, we need to
+ # rescale it back to the testing scale to obtain RoIs.
+ if rescale and not isinstance(scale_factor, float):
+ scale_factor = torch.from_numpy(scale_factor).to(
+ det_bboxes.device)
+ _bboxes = (
+ det_bboxes[:, :4] * scale_factor if rescale else det_bboxes)
+ mask_rois = bbox2roi([_bboxes])
+ mask_feats = self.mask_roi_extractor(
+ x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois)
+ if self.with_shared_head:
+ mask_feats = self.shared_head(mask_feats)
+ mask_pred = self.mask_head(mask_feats)
+ segm_result = self.mask_head.get_seg_masks(mask_pred, _bboxes,
+ det_labels,
+ self.test_cfg.rcnn,
+ ori_shape, scale_factor,
+ rescale)
+ return segm_result
+
+ def aug_test_mask(self, feats, img_metas, det_bboxes, det_labels):
+ if det_bboxes.shape[0] == 0:
+ segm_result = [[] for _ in range(self.mask_head.num_classes - 1)]
+ else:
+ aug_masks = []
+ for x, img_meta in zip(feats, img_metas):
+ img_shape = img_meta[0]['img_shape']
+ scale_factor = img_meta[0]['scale_factor']
+ flip = img_meta[0]['flip']
+ _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,
+ scale_factor, flip)
+ mask_rois = bbox2roi([_bboxes])
+ mask_feats = self.mask_roi_extractor(
+ x[:len(self.mask_roi_extractor.featmap_strides)],
+ mask_rois)
+ if self.with_shared_head:
+ mask_feats = self.shared_head(mask_feats)
+ mask_pred = self.mask_head(mask_feats)
+ # convert to numpy array to save memory
+ aug_masks.append(mask_pred.sigmoid().cpu().numpy())
+ merged_masks = merge_aug_masks(aug_masks, img_metas,
+ self.test_cfg.rcnn)
+
+ ori_shape = img_metas[0][0]['ori_shape']
+ segm_result = self.mask_head.get_seg_masks(
+ merged_masks,
+ det_bboxes,
+ det_labels,
+ self.test_cfg.rcnn,
+ ori_shape,
+ scale_factor=1.0,
+ rescale=False)
+ return segm_result
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/two_stage.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/two_stage.py
new file mode 100644
index 000000000..962e0cb51
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/two_stage.py
@@ -0,0 +1,346 @@
+import torch
+import torch.nn as nn
+
+from mmdet.core import bbox2result, bbox2roi, build_assigner, build_sampler
+from .. import builder
+from ..registry import DETECTORS
+from .base import BaseDetector
+from .test_mixins import BBoxTestMixin, MaskTestMixin, RPNTestMixin
+
+
+@DETECTORS.register_module
+class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
+ MaskTestMixin):
+ """Base class for two-stage detectors.
+
+ Two-stage detectors typically consisting of a region proposal network and a
+ task-specific regression head.
+ """
+
+ def __init__(self,
+ backbone,
+ neck=None,
+ shared_head=None,
+ rpn_head=None,
+ bbox_roi_extractor=None,
+ bbox_head=None,
+ mask_roi_extractor=None,
+ mask_head=None,
+ train_cfg=None,
+ test_cfg=None,
+ pretrained=None):
+ super(TwoStageDetector, self).__init__()
+ self.backbone = builder.build_backbone(backbone)
+
+ if neck is not None:
+ self.neck = builder.build_neck(neck)
+
+ if shared_head is not None:
+ self.shared_head = builder.build_shared_head(shared_head)
+
+ if rpn_head is not None:
+ self.rpn_head = builder.build_head(rpn_head)
+
+ if bbox_head is not None:
+ self.bbox_roi_extractor = builder.build_roi_extractor(
+ bbox_roi_extractor)
+ self.bbox_head = builder.build_head(bbox_head)
+
+ if mask_head is not None:
+ if mask_roi_extractor is not None:
+ self.mask_roi_extractor = builder.build_roi_extractor(
+ mask_roi_extractor)
+ self.share_roi_extractor = False
+ else:
+ self.share_roi_extractor = True
+ self.mask_roi_extractor = self.bbox_roi_extractor
+ self.mask_head = builder.build_head(mask_head)
+
+ self.train_cfg = train_cfg
+ self.test_cfg = test_cfg
+
+ self.init_weights(pretrained=pretrained)
+
+ @property
+ def with_rpn(self):
+ return hasattr(self, 'rpn_head') and self.rpn_head is not None
+
+ def init_weights(self, pretrained=None):
+ super(TwoStageDetector, self).init_weights(pretrained)
+ self.backbone.init_weights(pretrained=pretrained)
+ if self.with_neck:
+ if isinstance(self.neck, nn.Sequential):
+ for m in self.neck:
+ m.init_weights()
+ else:
+ self.neck.init_weights()
+ if self.with_shared_head:
+ self.shared_head.init_weights(pretrained=pretrained)
+ if self.with_rpn:
+ self.rpn_head.init_weights()
+ if self.with_bbox:
+ self.bbox_roi_extractor.init_weights()
+ self.bbox_head.init_weights()
+ if self.with_mask:
+ self.mask_head.init_weights()
+ if not self.share_roi_extractor:
+ self.mask_roi_extractor.init_weights()
+
+ def extract_feat(self, img):
+ """Directly extract features from the backbone+neck
+ """
+ x = self.backbone(img)
+ if self.with_neck:
+ x = self.neck(x)
+ return x
+
+ def forward_dummy(self, img):
+ """Used for computing network flops.
+
+ See `mmedetection/tools/get_flops.py`
+ """
+ outs = ()
+ # backbone
+ x = self.extract_feat(img)
+ # rpn
+ if self.with_rpn:
+ rpn_outs = self.rpn_head(x)
+ outs = outs + (rpn_outs, )
+ proposals = torch.randn(1000, 4).cuda()
+ # bbox head
+ rois = bbox2roi([proposals])
+ if self.with_bbox:
+ bbox_feats = self.bbox_roi_extractor(
+ x[:self.bbox_roi_extractor.num_inputs], rois)
+ if self.with_shared_head:
+ bbox_feats = self.shared_head(bbox_feats)
+ cls_score, bbox_pred = self.bbox_head(bbox_feats)
+ outs = outs + (cls_score, bbox_pred)
+ # mask head
+ if self.with_mask:
+ mask_rois = rois[:100]
+ mask_feats = self.mask_roi_extractor(
+ x[:self.mask_roi_extractor.num_inputs], mask_rois)
+ if self.with_shared_head:
+ mask_feats = self.shared_head(mask_feats)
+ mask_pred = self.mask_head(mask_feats)
+ outs = outs + (mask_pred, )
+ return outs
+
+ def forward_train(self,
+ img,
+ img_meta,
+ gt_bboxes,
+ gt_labels,
+ gt_bboxes_ignore=None,
+ gt_masks=None,
+ proposals=None):
+ """
+ Args:
+ img (Tensor): of shape (N, C, H, W) encoding input images.
+ Typically these should be mean centered and std scaled.
+
+ img_meta (list[dict]): list of image info dict where each dict has:
+ 'img_shape', 'scale_factor', 'flip', and may also contain
+ 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+ For details on the values of these keys see
+ `mmdet/datasets/pipelines/formatting.py:Collect`.
+
+ gt_bboxes (list[Tensor]): each item are the truth boxes for each
+ image in [tl_x, tl_y, br_x, br_y] format.
+
+ gt_labels (list[Tensor]): class indices corresponding to each box
+
+ gt_bboxes_ignore (None | list[Tensor]): specify which bounding
+ boxes can be ignored when computing the loss.
+
+ gt_masks (None | Tensor) : true segmentation masks for each box
+ used if the architecture supports a segmentation task.
+
+ proposals : override rpn proposals with custom proposals. Use when
+ `with_rpn` is False.
+
+ Returns:
+ dict[str, Tensor]: a dictionary of loss components
+ """
+ x = self.extract_feat(img)
+
+ losses = dict()
+
+ # RPN forward and loss
+ if self.with_rpn:
+ rpn_outs = self.rpn_head(x)
+ rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta,
+ self.train_cfg.rpn)
+ rpn_losses = self.rpn_head.loss(
+ *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
+ losses.update(rpn_losses)
+
+ proposal_cfg = self.train_cfg.get('rpn_proposal',
+ self.test_cfg.rpn)
+ proposal_inputs = rpn_outs + (img_meta, proposal_cfg)
+ proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)
+ else:
+ proposal_list = proposals
+
+ # assign gts and sample proposals
+ if self.with_bbox or self.with_mask:
+ bbox_assigner = build_assigner(self.train_cfg.rcnn.assigner)
+ bbox_sampler = build_sampler(
+ self.train_cfg.rcnn.sampler, context=self)
+ num_imgs = img.size(0)
+ if gt_bboxes_ignore is None:
+ gt_bboxes_ignore = [None for _ in range(num_imgs)]
+ sampling_results = []
+ for i in range(num_imgs):
+ assign_result = bbox_assigner.assign(proposal_list[i],
+ gt_bboxes[i],
+ gt_bboxes_ignore[i],
+ gt_labels[i])
+ sampling_result = bbox_sampler.sample(
+ assign_result,
+ proposal_list[i],
+ gt_bboxes[i],
+ gt_labels[i],
+ feats=[lvl_feat[i][None] for lvl_feat in x])
+ sampling_results.append(sampling_result)
+
+ # bbox head forward and loss
+ if self.with_bbox:
+ rois = bbox2roi([res.bboxes for res in sampling_results])
+ # TODO: a more flexible way to decide which feature maps to use
+ bbox_feats = self.bbox_roi_extractor(
+ x[:self.bbox_roi_extractor.num_inputs], rois)
+ if self.with_shared_head:
+ bbox_feats = self.shared_head(bbox_feats)
+ cls_score, bbox_pred = self.bbox_head(bbox_feats)
+
+ bbox_targets = self.bbox_head.get_target(sampling_results,
+ gt_bboxes, gt_labels,
+ self.train_cfg.rcnn)
+ loss_bbox = self.bbox_head.loss(cls_score, bbox_pred,
+ *bbox_targets)
+ losses.update(loss_bbox)
+
+ # mask head forward and loss
+ if self.with_mask:
+ if not self.share_roi_extractor:
+ pos_rois = bbox2roi(
+ [res.pos_bboxes for res in sampling_results])
+ mask_feats = self.mask_roi_extractor(
+ x[:self.mask_roi_extractor.num_inputs], pos_rois)
+ if self.with_shared_head:
+ mask_feats = self.shared_head(mask_feats)
+ else:
+ pos_inds = []
+ device = bbox_feats.device
+ for res in sampling_results:
+ pos_inds.append(
+ torch.ones(
+ res.pos_bboxes.shape[0],
+ device=device,
+ dtype=torch.uint8))
+ pos_inds.append(
+ torch.zeros(
+ res.neg_bboxes.shape[0],
+ device=device,
+ dtype=torch.uint8))
+ pos_inds = torch.cat(pos_inds)
+ mask_feats = bbox_feats[pos_inds]
+
+ if mask_feats.shape[0] > 0:
+ mask_pred = self.mask_head(mask_feats)
+ mask_targets = self.mask_head.get_target(
+ sampling_results, gt_masks, self.train_cfg.rcnn)
+ pos_labels = torch.cat(
+ [res.pos_gt_labels for res in sampling_results])
+ loss_mask = self.mask_head.loss(mask_pred, mask_targets,
+ pos_labels)
+ losses.update(loss_mask)
+
+ return losses
+
+ async def async_simple_test(self,
+ img,
+ img_meta,
+ proposals=None,
+ rescale=False):
+ """Async test without augmentation."""
+ assert self.with_bbox, "Bbox head must be implemented."
+ x = self.extract_feat(img)
+
+ if proposals is None:
+ proposal_list = await self.async_test_rpn(x, img_meta,
+ self.test_cfg.rpn)
+ else:
+ proposal_list = proposals
+
+ det_bboxes, det_labels = await self.async_test_bboxes(
+ x, img_meta, proposal_list, self.test_cfg.rcnn, rescale=rescale)
+ bbox_results = bbox2result(det_bboxes, det_labels,
+ self.bbox_head.num_classes)
+
+ if not self.with_mask:
+ return bbox_results
+ else:
+ segm_results = await self.async_test_mask(
+ x,
+ img_meta,
+ det_bboxes,
+ det_labels,
+ rescale=rescale,
+ mask_test_cfg=self.test_cfg.get('mask'))
+ return bbox_results, segm_results
+
+ def simple_test(self, img, img_meta, proposals=None, rescale=False):
+ """Test without augmentation."""
+ assert self.with_bbox, "Bbox head must be implemented."
+
+ x = self.extract_feat(img)
+
+ if proposals is None:
+ proposal_list = self.simple_test_rpn(x, img_meta,
+ self.test_cfg.rpn)
+ else:
+ proposal_list = proposals
+
+ det_bboxes, det_labels = self.simple_test_bboxes(
+ x, img_meta, proposal_list, self.test_cfg.rcnn, rescale=rescale)
+ bbox_results = bbox2result(det_bboxes, det_labels,
+ self.bbox_head.num_classes)
+
+ if not self.with_mask:
+ return bbox_results
+ else:
+ segm_results = self.simple_test_mask(
+ x, img_meta, det_bboxes, det_labels, rescale=rescale)
+ return bbox_results, segm_results
+
+ def aug_test(self, imgs, img_metas, rescale=False):
+ """Test with augmentations.
+
+ If rescale is False, then returned bboxes and masks will fit the scale
+ of imgs[0].
+ """
+ # recompute feats to save memory
+ proposal_list = self.aug_test_rpn(
+ self.extract_feats(imgs), img_metas, self.test_cfg.rpn)
+ det_bboxes, det_labels = self.aug_test_bboxes(
+ self.extract_feats(imgs), img_metas, proposal_list,
+ self.test_cfg.rcnn)
+
+ if rescale:
+ _det_bboxes = det_bboxes
+ else:
+ _det_bboxes = det_bboxes.clone()
+ _det_bboxes[:, :4] *= img_metas[0][0]['scale_factor']
+ bbox_results = bbox2result(_det_bboxes, det_labels,
+ self.bbox_head.num_classes)
+
+ # det_bboxes always keep the original scale
+ if self.with_mask:
+ segm_results = self.aug_test_mask(
+ self.extract_feats(imgs), img_metas, det_bboxes, det_labels)
+ return bbox_results, segm_results
+ else:
+ return bbox_results
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/__init__.py
new file mode 100644
index 000000000..07731d710
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/__init__.py
@@ -0,0 +1,20 @@
+from .accuracy import Accuracy, accuracy
+from .balanced_l1_loss import BalancedL1Loss, balanced_l1_loss
+from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy,
+ cross_entropy, mask_cross_entropy)
+from .focal_loss import FocalLoss, sigmoid_focal_loss
+from .ghm_loss import GHMC, GHMR
+from .iou_loss import (BoundedIoULoss, GIoULoss, IoULoss, bounded_iou_loss,
+ iou_loss)
+from .mse_loss import MSELoss, mse_loss
+from .smooth_l1_loss import SmoothL1Loss, smooth_l1_loss
+from .utils import reduce_loss, weight_reduce_loss, weighted_loss
+
+__all__ = [
+ 'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy',
+ 'mask_cross_entropy', 'CrossEntropyLoss', 'sigmoid_focal_loss',
+ 'FocalLoss', 'smooth_l1_loss', 'SmoothL1Loss', 'balanced_l1_loss',
+ 'BalancedL1Loss', 'mse_loss', 'MSELoss', 'iou_loss', 'bounded_iou_loss',
+ 'IoULoss', 'BoundedIoULoss', 'GIoULoss', 'GHMC', 'GHMR', 'reduce_loss',
+ 'weight_reduce_loss', 'weighted_loss'
+]
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/accuracy.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/accuracy.py
new file mode 100644
index 000000000..20d0ad8cd
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/accuracy.py
@@ -0,0 +1,31 @@
+import torch.nn as nn
+
+
+def accuracy(pred, target, topk=1):
+ assert isinstance(topk, (int, tuple))
+ if isinstance(topk, int):
+ topk = (topk, )
+ return_single = True
+ else:
+ return_single = False
+
+ maxk = max(topk)
+ _, pred_label = pred.topk(maxk, dim=1)
+ pred_label = pred_label.t()
+ correct = pred_label.eq(target.view(1, -1).expand_as(pred_label))
+
+ res = []
+ for k in topk:
+ correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
+ res.append(correct_k.mul_(100.0 / pred.size(0)))
+ return res[0] if return_single else res
+
+
+class Accuracy(nn.Module):
+
+ def __init__(self, topk=(1, )):
+ super().__init__()
+ self.topk = topk
+
+ def forward(self, pred, target):
+ return accuracy(pred, target, self.topk)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/balanced_l1_loss.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/balanced_l1_loss.py
new file mode 100644
index 000000000..fab60dbc6
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/balanced_l1_loss.py
@@ -0,0 +1,69 @@
+import numpy as np
+import torch
+import torch.nn as nn
+
+from ..registry import LOSSES
+from .utils import weighted_loss
+
+
+@weighted_loss
+def balanced_l1_loss(pred,
+ target,
+ beta=1.0,
+ alpha=0.5,
+ gamma=1.5,
+ reduction='mean'):
+ assert beta > 0
+ assert pred.size() == target.size() and target.numel() > 0
+
+ diff = torch.abs(pred - target)
+ b = np.e**(gamma / alpha) - 1
+ loss = torch.where(
+ diff < beta, alpha / b *
+ (b * diff + 1) * torch.log(b * diff / beta + 1) - alpha * diff,
+ gamma * diff + gamma / b - alpha * beta)
+
+ return loss
+
+
+@LOSSES.register_module
+class BalancedL1Loss(nn.Module):
+ """Balanced L1 Loss
+
+ arXiv: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019)
+ """
+
+ def __init__(self,
+ alpha=0.5,
+ gamma=1.5,
+ beta=1.0,
+ reduction='mean',
+ loss_weight=1.0):
+ super(BalancedL1Loss, self).__init__()
+ self.alpha = alpha
+ self.gamma = gamma
+ self.beta = beta
+ self.reduction = reduction
+ self.loss_weight = loss_weight
+
+ def forward(self,
+ pred,
+ target,
+ weight=None,
+ avg_factor=None,
+ reduction_override=None,
+ **kwargs):
+ assert reduction_override in (None, 'none', 'mean', 'sum')
+ reduction = (
+ reduction_override if reduction_override else self.reduction)
+ loss_bbox = self.loss_weight * balanced_l1_loss(
+ pred,
+ target,
+ weight,
+ alpha=self.alpha,
+ gamma=self.gamma,
+ beta=self.beta,
+ reduction=reduction,
+ avg_factor=avg_factor,
+ **kwargs)
+ return loss_bbox
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/cross_entropy_loss.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/cross_entropy_loss.py
new file mode 100644
index 000000000..dd9d4776f
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/cross_entropy_loss.py
@@ -0,0 +1,103 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..registry import LOSSES
+from .utils import weight_reduce_loss
+
+
+def cross_entropy(pred, label, weight=None, reduction='mean', avg_factor=None):
+ # element-wise losses
+ loss = F.cross_entropy(pred, label, reduction='none')
+
+ # apply weights and do the reduction
+ if weight is not None:
+ weight = weight.float()
+ loss = weight_reduce_loss(
+ loss, weight=weight, reduction=reduction, avg_factor=avg_factor)
+
+ return loss
+
+
+def _expand_binary_labels(labels, label_weights, label_channels):
+ bin_labels = labels.new_full((labels.size(0), label_channels), 0)
+ inds = torch.nonzero(labels >= 1).squeeze()
+ if inds.numel() > 0:
+ bin_labels[inds, labels[inds] - 1] = 1
+ if label_weights is None:
+ bin_label_weights = None
+ else:
+ bin_label_weights = label_weights.view(-1, 1).expand(
+ label_weights.size(0), label_channels)
+ return bin_labels, bin_label_weights
+
+
+def binary_cross_entropy(pred,
+ label,
+ weight=None,
+ reduction='mean',
+ avg_factor=None):
+ if pred.dim() != label.dim():
+ label, weight = _expand_binary_labels(label, weight, pred.size(-1))
+
+ # weighted element-wise losses
+ if weight is not None:
+ weight = weight.float()
+ loss = F.binary_cross_entropy_with_logits(
+ pred, label.float(), weight, reduction='none')
+ # do the reduction for the weighted loss
+ loss = weight_reduce_loss(loss, reduction=reduction, avg_factor=avg_factor)
+
+ return loss
+
+
+def mask_cross_entropy(pred, target, label, reduction='mean', avg_factor=None):
+ # TODO: handle these two reserved arguments
+ assert reduction == 'mean' and avg_factor is None
+ num_rois = pred.size()[0]
+ inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device)
+ pred_slice = pred[inds, label].squeeze(1)
+ return F.binary_cross_entropy_with_logits(
+ pred_slice, target, reduction='mean')[None]
+
+
+@LOSSES.register_module
+class CrossEntropyLoss(nn.Module):
+
+ def __init__(self,
+ use_sigmoid=False,
+ use_mask=False,
+ reduction='mean',
+ loss_weight=1.0):
+ super(CrossEntropyLoss, self).__init__()
+ assert (use_sigmoid is False) or (use_mask is False)
+ self.use_sigmoid = use_sigmoid
+ self.use_mask = use_mask
+ self.reduction = reduction
+ self.loss_weight = loss_weight
+
+ if self.use_sigmoid:
+ self.cls_criterion = binary_cross_entropy
+ elif self.use_mask:
+ self.cls_criterion = mask_cross_entropy
+ else:
+ self.cls_criterion = cross_entropy
+
+ def forward(self,
+ cls_score,
+ label,
+ weight=None,
+ avg_factor=None,
+ reduction_override=None,
+ **kwargs):
+ assert reduction_override in (None, 'none', 'mean', 'sum')
+ reduction = (
+ reduction_override if reduction_override else self.reduction)
+ loss_cls = self.loss_weight * self.cls_criterion(
+ cls_score,
+ label,
+ weight,
+ reduction=reduction,
+ avg_factor=avg_factor,
+ **kwargs)
+ return loss_cls
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/focal_loss.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/focal_loss.py
new file mode 100644
index 000000000..6b28e1257
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/focal_loss.py
@@ -0,0 +1,82 @@
+import torch.nn as nn
+import torch.nn.functional as F
+
+from mmdet.ops import sigmoid_focal_loss as _sigmoid_focal_loss
+from ..registry import LOSSES
+from .utils import weight_reduce_loss
+
+
+# This method is only for debugging
+def py_sigmoid_focal_loss(pred,
+ target,
+ weight=None,
+ gamma=2.0,
+ alpha=0.25,
+ reduction='mean',
+ avg_factor=None):
+ pred_sigmoid = pred.sigmoid()
+ target = target.type_as(pred)
+ pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target)
+ focal_weight = (alpha * target + (1 - alpha) *
+ (1 - target)) * pt.pow(gamma)
+ loss = F.binary_cross_entropy_with_logits(
+ pred, target, reduction='none') * focal_weight
+ loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
+ return loss
+
+
+def sigmoid_focal_loss(pred,
+ target,
+ weight=None,
+ gamma=2.0,
+ alpha=0.25,
+ reduction='mean',
+ avg_factor=None):
+ # Function.apply does not accept keyword arguments, so the decorator
+ # "weighted_loss" is not applicable
+ loss = _sigmoid_focal_loss(pred, target, gamma, alpha)
+ # TODO: find a proper way to handle the shape of weight
+ if weight is not None:
+ weight = weight.view(-1, 1)
+ loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
+ return loss
+
+
+@LOSSES.register_module
+class FocalLoss(nn.Module):
+
+ def __init__(self,
+ use_sigmoid=True,
+ gamma=2.0,
+ alpha=0.25,
+ reduction='mean',
+ loss_weight=1.0):
+ super(FocalLoss, self).__init__()
+ assert use_sigmoid is True, 'Only sigmoid focal loss supported now.'
+ self.use_sigmoid = use_sigmoid
+ self.gamma = gamma
+ self.alpha = alpha
+ self.reduction = reduction
+ self.loss_weight = loss_weight
+
+ def forward(self,
+ pred,
+ target,
+ weight=None,
+ avg_factor=None,
+ reduction_override=None):
+ assert reduction_override in (None, 'none', 'mean', 'sum')
+ reduction = (
+ reduction_override if reduction_override else self.reduction)
+ if self.use_sigmoid:
+ loss_cls = self.loss_weight * sigmoid_focal_loss(
+ pred,
+ target,
+ weight,
+ gamma=self.gamma,
+ alpha=self.alpha,
+ reduction=reduction,
+ avg_factor=avg_factor)
+ else:
+ raise NotImplementedError
+ return loss_cls
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/ghm_loss.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/ghm_loss.py
new file mode 100644
index 000000000..e62b9904f
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/ghm_loss.py
@@ -0,0 +1,171 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..registry import LOSSES
+
+
+def _expand_binary_labels(labels, label_weights, label_channels):
+ bin_labels = labels.new_full((labels.size(0), label_channels), 0)
+ inds = torch.nonzero(labels >= 1).squeeze()
+ if inds.numel() > 0:
+ bin_labels[inds, labels[inds] - 1] = 1
+ bin_label_weights = label_weights.view(-1, 1).expand(
+ label_weights.size(0), label_channels)
+ return bin_labels, bin_label_weights
+
+
+# TODO: code refactoring to make it consistent with other losses
+@LOSSES.register_module
+class GHMC(nn.Module):
+ """GHM Classification Loss.
+
+ Details of the theorem can be viewed in the paper
+ "Gradient Harmonized Single-stage Detector".
+ https://arxiv.org/abs/1811.05181
+
+ Args:
+ bins (int): Number of the unit regions for distribution calculation.
+ momentum (float): The parameter for moving average.
+ use_sigmoid (bool): Can only be true for BCE based loss now.
+ loss_weight (float): The weight of the total GHM-C loss.
+ """
+
+ def __init__(self, bins=10, momentum=0, use_sigmoid=True, loss_weight=1.0):
+ super(GHMC, self).__init__()
+ self.bins = bins
+ self.momentum = momentum
+ edges = torch.arange(bins + 1).float() / bins
+ self.register_buffer('edges', edges)
+ self.edges[-1] += 1e-6
+ if momentum > 0:
+ acc_sum = torch.zeros(bins)
+ self.register_buffer('acc_sum', acc_sum)
+ self.use_sigmoid = use_sigmoid
+ if not self.use_sigmoid:
+ raise NotImplementedError
+ self.loss_weight = loss_weight
+
+ def forward(self, pred, target, label_weight, *args, **kwargs):
+ """Calculate the GHM-C loss.
+
+ Args:
+ pred (float tensor of size [batch_num, class_num]):
+ The direct prediction of classification fc layer.
+ target (float tensor of size [batch_num, class_num]):
+ Binary class target for each sample.
+ label_weight (float tensor of size [batch_num, class_num]):
+ the value is 1 if the sample is valid and 0 if ignored.
+ Returns:
+ The gradient harmonized loss.
+ """
+ # the target should be binary class label
+ if pred.dim() != target.dim():
+ target, label_weight = _expand_binary_labels(
+ target, label_weight, pred.size(-1))
+ target, label_weight = target.float(), label_weight.float()
+ edges = self.edges
+ mmt = self.momentum
+ weights = torch.zeros_like(pred)
+
+ # gradient length
+ g = torch.abs(pred.sigmoid().detach() - target)
+
+ valid = label_weight > 0
+ tot = max(valid.float().sum().item(), 1.0)
+ n = 0 # n valid bins
+ for i in range(self.bins):
+ inds = (g >= edges[i]) & (g < edges[i + 1]) & valid
+ num_in_bin = inds.sum().item()
+ if num_in_bin > 0:
+ if mmt > 0:
+ self.acc_sum[i] = mmt * self.acc_sum[i] \
+ + (1 - mmt) * num_in_bin
+ weights[inds] = tot / self.acc_sum[i]
+ else:
+ weights[inds] = tot / num_in_bin
+ n += 1
+ if n > 0:
+ weights = weights / n
+
+ loss = F.binary_cross_entropy_with_logits(
+ pred, target, weights, reduction='sum') / tot
+ return loss * self.loss_weight
+
+
+# TODO: code refactoring to make it consistent with other losses
+@LOSSES.register_module
+class GHMR(nn.Module):
+ """GHM Regression Loss.
+
+ Details of the theorem can be viewed in the paper
+ "Gradient Harmonized Single-stage Detector"
+ https://arxiv.org/abs/1811.05181
+
+ Args:
+ mu (float): The parameter for the Authentic Smooth L1 loss.
+ bins (int): Number of the unit regions for distribution calculation.
+ momentum (float): The parameter for moving average.
+ loss_weight (float): The weight of the total GHM-R loss.
+ """
+
+ def __init__(self, mu=0.02, bins=10, momentum=0, loss_weight=1.0):
+ super(GHMR, self).__init__()
+ self.mu = mu
+ self.bins = bins
+ edges = torch.arange(bins + 1).float() / bins
+ self.register_buffer('edges', edges)
+ self.edges[-1] = 1e3
+ self.momentum = momentum
+ if momentum > 0:
+ acc_sum = torch.zeros(bins)
+ self.register_buffer('acc_sum', acc_sum)
+ self.loss_weight = loss_weight
+
+ # TODO: support reduction parameter
+ def forward(self, pred, target, label_weight, avg_factor=None):
+ """Calculate the GHM-R loss.
+
+ Args:
+ pred (float tensor of size [batch_num, 4 (* class_num)]):
+ The prediction of box regression layer. Channel number can be 4
+ or 4 * class_num depending on whether it is class-agnostic.
+ target (float tensor of size [batch_num, 4 (* class_num)]):
+ The target regression values with the same size of pred.
+ label_weight (float tensor of size [batch_num, 4 (* class_num)]):
+ The weight of each sample, 0 if ignored.
+ Returns:
+ The gradient harmonized loss.
+ """
+ mu = self.mu
+ edges = self.edges
+ mmt = self.momentum
+
+ # ASL1 loss
+ diff = pred - target
+ loss = torch.sqrt(diff * diff + mu * mu) - mu
+
+ # gradient length
+ g = torch.abs(diff / torch.sqrt(mu * mu + diff * diff)).detach()
+ weights = torch.zeros_like(g)
+
+ valid = label_weight > 0
+ tot = max(label_weight.float().sum().item(), 1.0)
+ n = 0 # n: valid bins
+ for i in range(self.bins):
+ inds = (g >= edges[i]) & (g < edges[i + 1]) & valid
+ num_in_bin = inds.sum().item()
+ if num_in_bin > 0:
+ n += 1
+ if mmt > 0:
+ self.acc_sum[i] = mmt * self.acc_sum[i] \
+ + (1 - mmt) * num_in_bin
+ weights[inds] = tot / self.acc_sum[i]
+ else:
+ weights[inds] = tot / num_in_bin
+ if n > 0:
+ weights /= n
+
+ loss = loss * weights
+ loss = loss.sum() / tot
+ return loss * self.loss_weight
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/iou_loss.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/iou_loss.py
new file mode 100644
index 000000000..c19c1d1d6
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/iou_loss.py
@@ -0,0 +1,212 @@
+import torch
+import torch.nn as nn
+
+from mmdet.core import bbox_overlaps
+from ..registry import LOSSES
+from .utils import weighted_loss
+
+
+@weighted_loss
+def iou_loss(pred, target, eps=1e-6):
+ """IoU loss.
+
+ Computing the IoU loss between a set of predicted bboxes and target bboxes.
+ The loss is calculated as negative log of IoU.
+
+ Args:
+ pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2),
+ shape (n, 4).
+ target (Tensor): Corresponding gt bboxes, shape (n, 4).
+ eps (float): Eps to avoid log(0).
+
+ Return:
+ Tensor: Loss tensor.
+ """
+ ious = bbox_overlaps(pred, target, is_aligned=True).clamp(min=eps)
+ loss = -ious.log()
+ return loss
+
+
+@weighted_loss
+def bounded_iou_loss(pred, target, beta=0.2, eps=1e-3):
+ """Improving Object Localization with Fitness NMS and Bounded IoU Loss,
+ https://arxiv.org/abs/1711.00164.
+
+ Args:
+ pred (tensor): Predicted bboxes.
+ target (tensor): Target bboxes.
+ beta (float): beta parameter in smoothl1.
+ eps (float): eps to avoid NaN.
+ """
+ pred_ctrx = (pred[:, 0] + pred[:, 2]) * 0.5
+ pred_ctry = (pred[:, 1] + pred[:, 3]) * 0.5
+ pred_w = pred[:, 2] - pred[:, 0] + 1
+ pred_h = pred[:, 3] - pred[:, 1] + 1
+ with torch.no_grad():
+ target_ctrx = (target[:, 0] + target[:, 2]) * 0.5
+ target_ctry = (target[:, 1] + target[:, 3]) * 0.5
+ target_w = target[:, 2] - target[:, 0] + 1
+ target_h = target[:, 3] - target[:, 1] + 1
+
+ dx = target_ctrx - pred_ctrx
+ dy = target_ctry - pred_ctry
+
+ loss_dx = 1 - torch.max(
+ (target_w - 2 * dx.abs()) /
+ (target_w + 2 * dx.abs() + eps), torch.zeros_like(dx))
+ loss_dy = 1 - torch.max(
+ (target_h - 2 * dy.abs()) /
+ (target_h + 2 * dy.abs() + eps), torch.zeros_like(dy))
+ loss_dw = 1 - torch.min(target_w / (pred_w + eps), pred_w /
+ (target_w + eps))
+ loss_dh = 1 - torch.min(target_h / (pred_h + eps), pred_h /
+ (target_h + eps))
+ loss_comb = torch.stack([loss_dx, loss_dy, loss_dw, loss_dh],
+ dim=-1).view(loss_dx.size(0), -1)
+
+ loss = torch.where(loss_comb < beta, 0.5 * loss_comb * loss_comb / beta,
+ loss_comb - 0.5 * beta)
+ return loss
+
+
+@weighted_loss
+def giou_loss(pred, target, eps=1e-7):
+ """
+ Generalized Intersection over Union: A Metric and A Loss for
+ Bounding Box Regression
+ https://arxiv.org/abs/1902.09630
+
+ code refer to:
+ https://github.com/sfzhang15/ATSS/blob/master/atss_core/modeling/rpn/atss/loss.py#L36
+
+ Args:
+ pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2),
+ shape (n, 4).
+ target (Tensor): Corresponding gt bboxes, shape (n, 4).
+ eps (float): Eps to avoid log(0).
+
+ Return:
+ Tensor: Loss tensor.
+ """
+ # overlap
+ lt = torch.max(pred[:, :2], target[:, :2])
+ rb = torch.min(pred[:, 2:], target[:, 2:])
+ wh = (rb - lt + 1).clamp(min=0)
+ overlap = wh[:, 0] * wh[:, 1]
+
+ # union
+ ap = (pred[:, 2] - pred[:, 0] + 1) * (pred[:, 3] - pred[:, 1] + 1)
+ ag = (target[:, 2] - target[:, 0] + 1) * (target[:, 3] - target[:, 1] + 1)
+ union = ap + ag - overlap + eps
+
+ # IoU
+ ious = overlap / union
+
+ # enclose area
+ enclose_x1y1 = torch.min(pred[:, :2], target[:, :2])
+ enclose_x2y2 = torch.max(pred[:, 2:], target[:, 2:])
+ enclose_wh = (enclose_x2y2 - enclose_x1y1 + 1).clamp(min=0)
+ enclose_area = enclose_wh[:, 0] * enclose_wh[:, 1] + eps
+
+ # GIoU
+ gious = ious - (enclose_area - union) / enclose_area
+ loss = 1 - gious
+ return loss
+
+
+@LOSSES.register_module
+class IoULoss(nn.Module):
+
+ def __init__(self, eps=1e-6, reduction='mean', loss_weight=1.0):
+ super(IoULoss, self).__init__()
+ self.eps = eps
+ self.reduction = reduction
+ self.loss_weight = loss_weight
+
+ def forward(self,
+ pred,
+ target,
+ weight=None,
+ avg_factor=None,
+ reduction_override=None,
+ **kwargs):
+ if weight is not None and not torch.any(weight > 0):
+ return (pred * weight).sum() # 0
+ assert reduction_override in (None, 'none', 'mean', 'sum')
+ reduction = (
+ reduction_override if reduction_override else self.reduction)
+ loss = self.loss_weight * iou_loss(
+ pred,
+ target,
+ weight,
+ eps=self.eps,
+ reduction=reduction,
+ avg_factor=avg_factor,
+ **kwargs)
+ return loss
+
+
+@LOSSES.register_module
+class BoundedIoULoss(nn.Module):
+
+ def __init__(self, beta=0.2, eps=1e-3, reduction='mean', loss_weight=1.0):
+ super(BoundedIoULoss, self).__init__()
+ self.beta = beta
+ self.eps = eps
+ self.reduction = reduction
+ self.loss_weight = loss_weight
+
+ def forward(self,
+ pred,
+ target,
+ weight=None,
+ avg_factor=None,
+ reduction_override=None,
+ **kwargs):
+ if weight is not None and not torch.any(weight > 0):
+ return (pred * weight).sum() # 0
+ assert reduction_override in (None, 'none', 'mean', 'sum')
+ reduction = (
+ reduction_override if reduction_override else self.reduction)
+ loss = self.loss_weight * bounded_iou_loss(
+ pred,
+ target,
+ weight,
+ beta=self.beta,
+ eps=self.eps,
+ reduction=reduction,
+ avg_factor=avg_factor,
+ **kwargs)
+ return loss
+
+
+@LOSSES.register_module
+class GIoULoss(nn.Module):
+
+ def __init__(self, eps=1e-6, reduction='mean', loss_weight=1.0):
+ super(GIoULoss, self).__init__()
+ self.eps = eps
+ self.reduction = reduction
+ self.loss_weight = loss_weight
+
+ def forward(self,
+ pred,
+ target,
+ weight=None,
+ avg_factor=None,
+ reduction_override=None,
+ **kwargs):
+ if weight is not None and not torch.any(weight > 0):
+ return (pred * weight).sum() # 0
+ assert reduction_override in (None, 'none', 'mean', 'sum')
+ reduction = (
+ reduction_override if reduction_override else self.reduction)
+ loss = self.loss_weight * giou_loss(
+ pred,
+ target,
+ weight,
+ eps=self.eps,
+ reduction=reduction,
+ avg_factor=avg_factor,
+ **kwargs)
+ return loss
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/mse_loss.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/mse_loss.py
new file mode 100644
index 000000000..a868b2be9
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/mse_loss.py
@@ -0,0 +1,25 @@
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..registry import LOSSES
+from .utils import weighted_loss
+
+mse_loss = weighted_loss(F.mse_loss)
+
+
+@LOSSES.register_module
+class MSELoss(nn.Module):
+
+ def __init__(self, reduction='mean', loss_weight=1.0):
+ super().__init__()
+ self.reduction = reduction
+ self.loss_weight = loss_weight
+
+ def forward(self, pred, target, weight=None, avg_factor=None):
+ loss = self.loss_weight * mse_loss(
+ pred,
+ target,
+ weight,
+ reduction=self.reduction,
+ avg_factor=avg_factor)
+ return loss
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/smooth_l1_loss.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/smooth_l1_loss.py
new file mode 100644
index 000000000..bc340730b
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/smooth_l1_loss.py
@@ -0,0 +1,45 @@
+import torch
+import torch.nn as nn
+
+from ..registry import LOSSES
+from .utils import weighted_loss
+
+
+@weighted_loss
+def smooth_l1_loss(pred, target, beta=1.0):
+ assert beta > 0
+ assert pred.size() == target.size() and target.numel() > 0
+ diff = torch.abs(pred - target)
+ loss = torch.where(diff < beta, 0.5 * diff * diff / beta,
+ diff - 0.5 * beta)
+ return loss
+
+
+@LOSSES.register_module
+class SmoothL1Loss(nn.Module):
+
+ def __init__(self, beta=1.0, reduction='mean', loss_weight=1.0):
+ super(SmoothL1Loss, self).__init__()
+ self.beta = beta
+ self.reduction = reduction
+ self.loss_weight = loss_weight
+
+ def forward(self,
+ pred,
+ target,
+ weight=None,
+ avg_factor=None,
+ reduction_override=None,
+ **kwargs):
+ assert reduction_override in (None, 'none', 'mean', 'sum')
+ reduction = (
+ reduction_override if reduction_override else self.reduction)
+ loss_bbox = self.loss_weight * smooth_l1_loss(
+ pred,
+ target,
+ weight,
+ beta=self.beta,
+ reduction=reduction,
+ avg_factor=avg_factor,
+ **kwargs)
+ return loss_bbox
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/utils.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/utils.py
new file mode 100644
index 000000000..3361c6cad
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/utils.py
@@ -0,0 +1,98 @@
+import functools
+
+import torch.nn.functional as F
+
+
+def reduce_loss(loss, reduction):
+ """Reduce loss as specified.
+
+ Args:
+ loss (Tensor): Elementwise loss tensor.
+ reduction (str): Options are "none", "mean" and "sum".
+
+ Return:
+ Tensor: Reduced loss tensor.
+ """
+ reduction_enum = F._Reduction.get_enum(reduction)
+ # none: 0, elementwise_mean:1, sum: 2
+ if reduction_enum == 0:
+ return loss
+ elif reduction_enum == 1:
+ return loss.mean()
+ elif reduction_enum == 2:
+ return loss.sum()
+
+
+def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):
+ """Apply element-wise weight and reduce loss.
+
+ Args:
+ loss (Tensor): Element-wise loss.
+ weight (Tensor): Element-wise weights.
+ reduction (str): Same as built-in losses of PyTorch.
+ avg_factor (float): Avarage factor when computing the mean of losses.
+
+ Returns:
+ Tensor: Processed loss values.
+ """
+ # if weight is specified, apply element-wise weight
+ if weight is not None:
+ loss = loss * weight
+
+ # if avg_factor is not specified, just reduce the loss
+ if avg_factor is None:
+ loss = reduce_loss(loss, reduction)
+ else:
+ # if reduction is mean, then average the loss by avg_factor
+ if reduction == 'mean':
+ loss = loss.sum() / avg_factor
+ # if reduction is 'none', then do nothing, otherwise raise an error
+ elif reduction != 'none':
+ raise ValueError('avg_factor can not be used with reduction="sum"')
+ return loss
+
+
+def weighted_loss(loss_func):
+ """Create a weighted version of a given loss function.
+
+ To use this decorator, the loss function must have the signature like
+ `loss_func(pred, target, **kwargs)`. The function only needs to compute
+ element-wise loss without any reduction. This decorator will add weight
+ and reduction arguments to the function. The decorated function will have
+ the signature like `loss_func(pred, target, weight=None, reduction='mean',
+ avg_factor=None, **kwargs)`.
+
+ :Example:
+
+ >>> import torch
+ >>> @weighted_loss
+ >>> def l1_loss(pred, target):
+ >>> return (pred - target).abs()
+
+ >>> pred = torch.Tensor([0, 2, 3])
+ >>> target = torch.Tensor([1, 1, 1])
+ >>> weight = torch.Tensor([1, 0, 1])
+
+ >>> l1_loss(pred, target)
+ tensor(1.3333)
+ >>> l1_loss(pred, target, weight)
+ tensor(1.)
+ >>> l1_loss(pred, target, reduction='none')
+ tensor([1., 1., 2.])
+ >>> l1_loss(pred, target, weight, avg_factor=2)
+ tensor(1.5000)
+ """
+
+ @functools.wraps(loss_func)
+ def wrapper(pred,
+ target,
+ weight=None,
+ reduction='mean',
+ avg_factor=None,
+ **kwargs):
+ # get element-wise loss
+ loss = loss_func(pred, target, **kwargs)
+ loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
+ return loss
+
+ return wrapper
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/__init__.py
new file mode 100644
index 000000000..0cae03ac7
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/__init__.py
@@ -0,0 +1,11 @@
+from .fcn_mask_head import FCNMaskHead
+from .fused_semantic_head import FusedSemanticHead
+from .grid_head import GridHead
+from .htc_mask_head import HTCMaskHead
+from .maskiou_head import MaskIoUHead
+from .mask_feat_head import MaskFeatHead
+
+__all__ = [
+ 'FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead', 'GridHead',
+ 'MaskIoUHead', 'MaskFeatHead'
+]
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/fcn_mask_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/fcn_mask_head.py
new file mode 100644
index 000000000..6d11cfffc
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/fcn_mask_head.py
@@ -0,0 +1,191 @@
+import mmcv
+import numpy as np
+import pycocotools.mask as mask_util
+import torch
+import torch.nn as nn
+from torch.nn.modules.utils import _pair
+
+from mmdet.core import auto_fp16, force_fp32, mask_target
+from ..builder import build_loss
+from ..registry import HEADS
+from ..utils import ConvModule
+
+
+@HEADS.register_module
+class FCNMaskHead(nn.Module):
+
+ def __init__(self,
+ num_convs=4,
+ roi_feat_size=14,
+ in_channels=256,
+ conv_kernel_size=3,
+ conv_out_channels=256,
+ upsample_method='deconv',
+ upsample_ratio=2,
+ num_classes=81,
+ class_agnostic=False,
+ conv_cfg=None,
+ norm_cfg=None,
+ loss_mask=dict(
+ type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)):
+ super(FCNMaskHead, self).__init__()
+ if upsample_method not in [None, 'deconv', 'nearest', 'bilinear']:
+ raise ValueError(
+ 'Invalid upsample method {}, accepted methods '
+ 'are "deconv", "nearest", "bilinear"'.format(upsample_method))
+ self.num_convs = num_convs
+ # WARN: roi_feat_size is reserved and not used
+ self.roi_feat_size = _pair(roi_feat_size)
+ self.in_channels = in_channels
+ self.conv_kernel_size = conv_kernel_size
+ self.conv_out_channels = conv_out_channels
+ self.upsample_method = upsample_method
+ self.upsample_ratio = upsample_ratio
+ self.num_classes = num_classes
+ self.class_agnostic = class_agnostic
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.fp16_enabled = False
+ self.loss_mask = build_loss(loss_mask)
+
+ self.convs = nn.ModuleList()
+ for i in range(self.num_convs):
+ in_channels = (
+ self.in_channels if i == 0 else self.conv_out_channels)
+ padding = (self.conv_kernel_size - 1) // 2
+ self.convs.append(
+ ConvModule(
+ in_channels,
+ self.conv_out_channels,
+ self.conv_kernel_size,
+ padding=padding,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg))
+ upsample_in_channels = (
+ self.conv_out_channels if self.num_convs > 0 else in_channels)
+ if self.upsample_method is None:
+ self.upsample = None
+ elif self.upsample_method == 'deconv':
+ self.upsample = nn.ConvTranspose2d(
+ upsample_in_channels,
+ self.conv_out_channels,
+ self.upsample_ratio,
+ stride=self.upsample_ratio)
+ else:
+ self.upsample = nn.Upsample(
+ scale_factor=self.upsample_ratio, mode=self.upsample_method)
+
+ out_channels = 1 if self.class_agnostic else self.num_classes
+ logits_in_channel = (
+ self.conv_out_channels
+ if self.upsample_method == 'deconv' else upsample_in_channels)
+ self.conv_logits = nn.Conv2d(logits_in_channel, out_channels, 1)
+ self.relu = nn.ReLU(inplace=True)
+ self.debug_imgs = None
+
+ def init_weights(self):
+ for m in [self.upsample, self.conv_logits]:
+ if m is None:
+ continue
+ nn.init.kaiming_normal_(
+ m.weight, mode='fan_out', nonlinearity='relu')
+ nn.init.constant_(m.bias, 0)
+
+ @auto_fp16()
+ def forward(self, x):
+ for conv in self.convs:
+ x = conv(x)
+ if self.upsample is not None:
+ x = self.upsample(x)
+ if self.upsample_method == 'deconv':
+ x = self.relu(x)
+ mask_pred = self.conv_logits(x)
+ return mask_pred
+
+ def get_target(self, sampling_results, gt_masks, rcnn_train_cfg):
+ pos_proposals = [res.pos_bboxes for res in sampling_results]
+ pos_assigned_gt_inds = [
+ res.pos_assigned_gt_inds for res in sampling_results
+ ]
+ mask_targets = mask_target(pos_proposals, pos_assigned_gt_inds,
+ gt_masks, rcnn_train_cfg)
+ return mask_targets
+
+ @force_fp32(apply_to=('mask_pred', ))
+ def loss(self, mask_pred, mask_targets, labels):
+ loss = dict()
+ if self.class_agnostic:
+ loss_mask = self.loss_mask(mask_pred, mask_targets,
+ torch.zeros_like(labels))
+ else:
+ loss_mask = self.loss_mask(mask_pred, mask_targets, labels)
+ loss['loss_mask'] = loss_mask
+ return loss
+
+ def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg,
+ ori_shape, scale_factor, rescale):
+ """Get segmentation masks from mask_pred and bboxes.
+
+ Args:
+ mask_pred (Tensor or ndarray): shape (n, #class+1, h, w).
+ For single-scale testing, mask_pred is the direct output of
+ model, whose type is Tensor, while for multi-scale testing,
+ it will be converted to numpy array outside of this method.
+ det_bboxes (Tensor): shape (n, 4/5)
+ det_labels (Tensor): shape (n, )
+ img_shape (Tensor): shape (3, )
+ rcnn_test_cfg (dict): rcnn testing config
+ ori_shape: original image size
+
+ Returns:
+ list[list]: encoded masks
+ """
+ if isinstance(mask_pred, torch.Tensor):
+ mask_pred = mask_pred.sigmoid().cpu().numpy()
+ assert isinstance(mask_pred, np.ndarray)
+ # when enabling mixed precision training, mask_pred may be float16
+ # numpy array
+ mask_pred = mask_pred.astype(np.float32)
+
+ cls_segms = [[] for _ in range(self.num_classes - 1)]
+ bboxes = det_bboxes.cpu().numpy()[:, :4]
+ labels = det_labels.cpu().numpy() + 1
+
+ if rescale:
+ img_h, img_w = ori_shape[:2]
+ else:
+ img_h = np.round(ori_shape[0] * scale_factor).astype(np.int32)
+ img_w = np.round(ori_shape[1] * scale_factor).astype(np.int32)
+ scale_factor = 1.0
+
+ for i in range(bboxes.shape[0]):
+ if not isinstance(scale_factor, (float, np.ndarray)):
+ scale_factor = scale_factor.cpu().numpy()
+ bbox = (bboxes[i, :] / scale_factor).astype(np.int32)
+ label = labels[i]
+ w = max(bbox[2] - bbox[0] + 1, 1)
+ h = max(bbox[3] - bbox[1] + 1, 1)
+
+ if not self.class_agnostic:
+ mask_pred_ = mask_pred[i, label, :, :]
+ else:
+ mask_pred_ = mask_pred[i, 0, :, :]
+
+ bbox_mask = mmcv.imresize(mask_pred_, (w, h))
+ bbox_mask = (bbox_mask > rcnn_test_cfg.mask_thr_binary).astype(
+ np.uint8)
+
+ if rcnn_test_cfg.get('crop_mask', False):
+ im_mask = bbox_mask
+ else:
+ im_mask = np.zeros((img_h, img_w), dtype=np.uint8)
+ im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = bbox_mask
+
+ if rcnn_test_cfg.get('rle_mask_encode', True):
+ rle = mask_util.encode(
+ np.array(im_mask[:, :, np.newaxis], order='F'))[0]
+ cls_segms[label - 1].append(rle)
+ else:
+ cls_segms[label - 1].append(im_mask)
+
+ return cls_segms
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/fused_semantic_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/fused_semantic_head.py
new file mode 100644
index 000000000..80dab0516
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/fused_semantic_head.py
@@ -0,0 +1,106 @@
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import kaiming_init
+
+from mmdet.core import auto_fp16, force_fp32
+from ..registry import HEADS
+from ..utils import ConvModule
+
+
+@HEADS.register_module
+class FusedSemanticHead(nn.Module):
+ r"""Multi-level fused semantic segmentation head.
+
+ in_1 -> 1x1 conv ---
+ |
+ in_2 -> 1x1 conv -- |
+ ||
+ in_3 -> 1x1 conv - ||
+ ||| /-> 1x1 conv (mask prediction)
+ in_4 -> 1x1 conv -----> 3x3 convs (*4)
+ | \-> 1x1 conv (feature)
+ in_5 -> 1x1 conv ---
+ """ # noqa: W605
+
+ def __init__(self,
+ num_ins,
+ fusion_level,
+ num_convs=4,
+ in_channels=256,
+ conv_out_channels=256,
+ num_classes=183,
+ ignore_label=255,
+ loss_weight=0.2,
+ conv_cfg=None,
+ norm_cfg=None):
+ super(FusedSemanticHead, self).__init__()
+ self.num_ins = num_ins
+ self.fusion_level = fusion_level
+ self.num_convs = num_convs
+ self.in_channels = in_channels
+ self.conv_out_channels = conv_out_channels
+ self.num_classes = num_classes
+ self.ignore_label = ignore_label
+ self.loss_weight = loss_weight
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.fp16_enabled = False
+
+ self.lateral_convs = nn.ModuleList()
+ for i in range(self.num_ins):
+ self.lateral_convs.append(
+ ConvModule(
+ self.in_channels,
+ self.in_channels,
+ 1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ inplace=False))
+
+ self.convs = nn.ModuleList()
+ for i in range(self.num_convs):
+ in_channels = self.in_channels if i == 0 else conv_out_channels
+ self.convs.append(
+ ConvModule(
+ in_channels,
+ conv_out_channels,
+ 3,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg))
+ self.conv_embedding = ConvModule(
+ conv_out_channels,
+ conv_out_channels,
+ 1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg)
+ self.conv_logits = nn.Conv2d(conv_out_channels, self.num_classes, 1)
+
+ self.criterion = nn.CrossEntropyLoss(ignore_index=ignore_label)
+
+ def init_weights(self):
+ kaiming_init(self.conv_logits)
+
+ @auto_fp16()
+ def forward(self, feats):
+ x = self.lateral_convs[self.fusion_level](feats[self.fusion_level])
+ fused_size = tuple(x.shape[-2:])
+ for i, feat in enumerate(feats):
+ if i != self.fusion_level:
+ feat = F.interpolate(
+ feat, size=fused_size, mode='bilinear', align_corners=True)
+ x += self.lateral_convs[i](feat)
+
+ for i in range(self.num_convs):
+ x = self.convs[i](x)
+
+ mask_pred = self.conv_logits(x)
+ x = self.conv_embedding(x)
+ return mask_pred, x
+
+ @force_fp32(apply_to=('mask_pred', ))
+ def loss(self, mask_pred, labels):
+ labels = labels.squeeze(1).long()
+ loss_semantic_seg = self.criterion(mask_pred, labels)
+ loss_semantic_seg *= self.loss_weight
+ return loss_semantic_seg
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/grid_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/grid_head.py
new file mode 100644
index 000000000..72065309b
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/grid_head.py
@@ -0,0 +1,361 @@
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import kaiming_init, normal_init
+
+from ..builder import build_loss
+from ..registry import HEADS
+from ..utils import ConvModule
+
+
+@HEADS.register_module
+class GridHead(nn.Module):
+
+ def __init__(self,
+ grid_points=9,
+ num_convs=8,
+ roi_feat_size=14,
+ in_channels=256,
+ conv_kernel_size=3,
+ point_feat_channels=64,
+ deconv_kernel_size=4,
+ class_agnostic=False,
+ loss_grid=dict(
+ type='CrossEntropyLoss', use_sigmoid=True,
+ loss_weight=15),
+ conv_cfg=None,
+ norm_cfg=dict(type='GN', num_groups=36)):
+ super(GridHead, self).__init__()
+ self.grid_points = grid_points
+ self.num_convs = num_convs
+ self.roi_feat_size = roi_feat_size
+ self.in_channels = in_channels
+ self.conv_kernel_size = conv_kernel_size
+ self.point_feat_channels = point_feat_channels
+ self.conv_out_channels = self.point_feat_channels * self.grid_points
+ self.class_agnostic = class_agnostic
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ if isinstance(norm_cfg, dict) and norm_cfg['type'] == 'GN':
+ assert self.conv_out_channels % norm_cfg['num_groups'] == 0
+
+ assert self.grid_points >= 4
+ self.grid_size = int(np.sqrt(self.grid_points))
+ if self.grid_size * self.grid_size != self.grid_points:
+ raise ValueError('grid_points must be a square number')
+
+ # the predicted heatmap is half of whole_map_size
+ if not isinstance(self.roi_feat_size, int):
+ raise ValueError('Only square RoIs are supporeted in Grid R-CNN')
+ self.whole_map_size = self.roi_feat_size * 4
+
+ # compute point-wise sub-regions
+ self.sub_regions = self.calc_sub_regions()
+
+ self.convs = []
+ for i in range(self.num_convs):
+ in_channels = (
+ self.in_channels if i == 0 else self.conv_out_channels)
+ stride = 2 if i == 0 else 1
+ padding = (self.conv_kernel_size - 1) // 2
+ self.convs.append(
+ ConvModule(
+ in_channels,
+ self.conv_out_channels,
+ self.conv_kernel_size,
+ stride=stride,
+ padding=padding,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ bias=True))
+ self.convs = nn.Sequential(*self.convs)
+
+ self.deconv1 = nn.ConvTranspose2d(
+ self.conv_out_channels,
+ self.conv_out_channels,
+ kernel_size=deconv_kernel_size,
+ stride=2,
+ padding=(deconv_kernel_size - 2) // 2,
+ groups=grid_points)
+ self.norm1 = nn.GroupNorm(grid_points, self.conv_out_channels)
+ self.deconv2 = nn.ConvTranspose2d(
+ self.conv_out_channels,
+ grid_points,
+ kernel_size=deconv_kernel_size,
+ stride=2,
+ padding=(deconv_kernel_size - 2) // 2,
+ groups=grid_points)
+
+ # find the 4-neighbor of each grid point
+ self.neighbor_points = []
+ grid_size = self.grid_size
+ for i in range(grid_size): # i-th column
+ for j in range(grid_size): # j-th row
+ neighbors = []
+ if i > 0: # left: (i - 1, j)
+ neighbors.append((i - 1) * grid_size + j)
+ if j > 0: # up: (i, j - 1)
+ neighbors.append(i * grid_size + j - 1)
+ if j < grid_size - 1: # down: (i, j + 1)
+ neighbors.append(i * grid_size + j + 1)
+ if i < grid_size - 1: # right: (i + 1, j)
+ neighbors.append((i + 1) * grid_size + j)
+ self.neighbor_points.append(tuple(neighbors))
+ # total edges in the grid
+ self.num_edges = sum([len(p) for p in self.neighbor_points])
+
+ self.forder_trans = nn.ModuleList() # first-order feature transition
+ self.sorder_trans = nn.ModuleList() # second-order feature transition
+ for neighbors in self.neighbor_points:
+ fo_trans = nn.ModuleList()
+ so_trans = nn.ModuleList()
+ for _ in range(len(neighbors)):
+ # each transition module consists of a 5x5 depth-wise conv and
+ # 1x1 conv.
+ fo_trans.append(
+ nn.Sequential(
+ nn.Conv2d(
+ self.point_feat_channels,
+ self.point_feat_channels,
+ 5,
+ stride=1,
+ padding=2,
+ groups=self.point_feat_channels),
+ nn.Conv2d(self.point_feat_channels,
+ self.point_feat_channels, 1)))
+ so_trans.append(
+ nn.Sequential(
+ nn.Conv2d(
+ self.point_feat_channels,
+ self.point_feat_channels,
+ 5,
+ 1,
+ 2,
+ groups=self.point_feat_channels),
+ nn.Conv2d(self.point_feat_channels,
+ self.point_feat_channels, 1)))
+ self.forder_trans.append(fo_trans)
+ self.sorder_trans.append(so_trans)
+
+ self.loss_grid = build_loss(loss_grid)
+
+ def init_weights(self):
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
+ # TODO: compare mode = "fan_in" or "fan_out"
+ kaiming_init(m)
+ for m in self.modules():
+ if isinstance(m, nn.ConvTranspose2d):
+ normal_init(m, std=0.001)
+ nn.init.constant_(self.deconv2.bias, -np.log(0.99 / 0.01))
+
+ def forward(self, x):
+ assert x.shape[-1] == x.shape[-2] == self.roi_feat_size
+ # RoI feature transformation, downsample 2x
+ x = self.convs(x)
+
+ c = self.point_feat_channels
+ # first-order fusion
+ x_fo = [None for _ in range(self.grid_points)]
+ for i, points in enumerate(self.neighbor_points):
+ x_fo[i] = x[:, i * c:(i + 1) * c]
+ for j, point_idx in enumerate(points):
+ x_fo[i] = x_fo[i] + self.forder_trans[i][j](
+ x[:, point_idx * c:(point_idx + 1) * c])
+
+ # second-order fusion
+ x_so = [None for _ in range(self.grid_points)]
+ for i, points in enumerate(self.neighbor_points):
+ x_so[i] = x[:, i * c:(i + 1) * c]
+ for j, point_idx in enumerate(points):
+ x_so[i] = x_so[i] + self.sorder_trans[i][j](x_fo[point_idx])
+
+ # predicted heatmap with fused features
+ x2 = torch.cat(x_so, dim=1)
+ x2 = self.deconv1(x2)
+ x2 = F.relu(self.norm1(x2), inplace=True)
+ heatmap = self.deconv2(x2)
+
+ # predicted heatmap with original features (applicable during training)
+ if self.training:
+ x1 = x
+ x1 = self.deconv1(x1)
+ x1 = F.relu(self.norm1(x1), inplace=True)
+ heatmap_unfused = self.deconv2(x1)
+ else:
+ heatmap_unfused = heatmap
+
+ return dict(fused=heatmap, unfused=heatmap_unfused)
+
+ def calc_sub_regions(self):
+ """Compute point specific representation regions.
+
+ See Grid R-CNN Plus (https://arxiv.org/abs/1906.05688) for details.
+ """
+ # to make it consistent with the original implementation, half_size
+ # is computed as 2 * quarter_size, which is smaller
+ half_size = self.whole_map_size // 4 * 2
+ sub_regions = []
+ for i in range(self.grid_points):
+ x_idx = i // self.grid_size
+ y_idx = i % self.grid_size
+ if x_idx == 0:
+ sub_x1 = 0
+ elif x_idx == self.grid_size - 1:
+ sub_x1 = half_size
+ else:
+ ratio = x_idx / (self.grid_size - 1) - 0.25
+ sub_x1 = max(int(ratio * self.whole_map_size), 0)
+
+ if y_idx == 0:
+ sub_y1 = 0
+ elif y_idx == self.grid_size - 1:
+ sub_y1 = half_size
+ else:
+ ratio = y_idx / (self.grid_size - 1) - 0.25
+ sub_y1 = max(int(ratio * self.whole_map_size), 0)
+ sub_regions.append(
+ (sub_x1, sub_y1, sub_x1 + half_size, sub_y1 + half_size))
+ return sub_regions
+
+ def get_target(self, sampling_results, rcnn_train_cfg):
+ # mix all samples (across images) together.
+ pos_bboxes = torch.cat([res.pos_bboxes for res in sampling_results],
+ dim=0).cpu()
+ pos_gt_bboxes = torch.cat(
+ [res.pos_gt_bboxes for res in sampling_results], dim=0).cpu()
+ assert pos_bboxes.shape == pos_gt_bboxes.shape
+
+ # expand pos_bboxes to 2x of original size
+ x1 = pos_bboxes[:, 0] - (pos_bboxes[:, 2] - pos_bboxes[:, 0]) / 2
+ y1 = pos_bboxes[:, 1] - (pos_bboxes[:, 3] - pos_bboxes[:, 1]) / 2
+ x2 = pos_bboxes[:, 2] + (pos_bboxes[:, 2] - pos_bboxes[:, 0]) / 2
+ y2 = pos_bboxes[:, 3] + (pos_bboxes[:, 3] - pos_bboxes[:, 1]) / 2
+ pos_bboxes = torch.stack([x1, y1, x2, y2], dim=-1)
+ pos_bbox_ws = (pos_bboxes[:, 2] - pos_bboxes[:, 0]).unsqueeze(-1)
+ pos_bbox_hs = (pos_bboxes[:, 3] - pos_bboxes[:, 1]).unsqueeze(-1)
+
+ num_rois = pos_bboxes.shape[0]
+ map_size = self.whole_map_size
+ # this is not the final target shape
+ targets = torch.zeros((num_rois, self.grid_points, map_size, map_size),
+ dtype=torch.float)
+
+ # pre-compute interpolation factors for all grid points.
+ # the first item is the factor of x-dim, and the second is y-dim.
+ # for a 9-point grid, factors are like (1, 0), (0.5, 0.5), (0, 1)
+ factors = []
+ for j in range(self.grid_points):
+ x_idx = j // self.grid_size
+ y_idx = j % self.grid_size
+ factors.append((1 - x_idx / (self.grid_size - 1),
+ 1 - y_idx / (self.grid_size - 1)))
+
+ radius = rcnn_train_cfg.pos_radius
+ radius2 = radius**2
+ for i in range(num_rois):
+ # ignore small bboxes
+ if (pos_bbox_ws[i] <= self.grid_size
+ or pos_bbox_hs[i] <= self.grid_size):
+ continue
+ # for each grid point, mark a small circle as positive
+ for j in range(self.grid_points):
+ factor_x, factor_y = factors[j]
+ gridpoint_x = factor_x * pos_gt_bboxes[i, 0] + (
+ 1 - factor_x) * pos_gt_bboxes[i, 2]
+ gridpoint_y = factor_y * pos_gt_bboxes[i, 1] + (
+ 1 - factor_y) * pos_gt_bboxes[i, 3]
+
+ cx = int((gridpoint_x - pos_bboxes[i, 0]) / pos_bbox_ws[i] *
+ map_size)
+ cy = int((gridpoint_y - pos_bboxes[i, 1]) / pos_bbox_hs[i] *
+ map_size)
+
+ for x in range(cx - radius, cx + radius + 1):
+ for y in range(cy - radius, cy + radius + 1):
+ if x >= 0 and x < map_size and y >= 0 and y < map_size:
+ if (x - cx)**2 + (y - cy)**2 <= radius2:
+ targets[i, j, y, x] = 1
+ # reduce the target heatmap size by a half
+ # proposed in Grid R-CNN Plus (https://arxiv.org/abs/1906.05688).
+ sub_targets = []
+ for i in range(self.grid_points):
+ sub_x1, sub_y1, sub_x2, sub_y2 = self.sub_regions[i]
+ sub_targets.append(targets[:, [i], sub_y1:sub_y2, sub_x1:sub_x2])
+ sub_targets = torch.cat(sub_targets, dim=1)
+ sub_targets = sub_targets.cuda()
+ return sub_targets
+
+ def loss(self, grid_pred, grid_targets):
+ loss_fused = self.loss_grid(grid_pred['fused'], grid_targets)
+ loss_unfused = self.loss_grid(grid_pred['unfused'], grid_targets)
+ loss_grid = loss_fused + loss_unfused
+ return dict(loss_grid=loss_grid)
+
+ def get_bboxes(self, det_bboxes, grid_pred, img_meta):
+ # TODO: refactoring
+ assert det_bboxes.shape[0] == grid_pred.shape[0]
+ det_bboxes = det_bboxes.cpu()
+ cls_scores = det_bboxes[:, [4]]
+ det_bboxes = det_bboxes[:, :4]
+ grid_pred = grid_pred.sigmoid().cpu()
+
+ R, c, h, w = grid_pred.shape
+ half_size = self.whole_map_size // 4 * 2
+ assert h == w == half_size
+ assert c == self.grid_points
+
+ # find the point with max scores in the half-sized heatmap
+ grid_pred = grid_pred.view(R * c, h * w)
+ pred_scores, pred_position = grid_pred.max(dim=1)
+ xs = pred_position % w
+ ys = pred_position // w
+
+ # get the position in the whole heatmap instead of half-sized heatmap
+ for i in range(self.grid_points):
+ xs[i::self.grid_points] += self.sub_regions[i][0]
+ ys[i::self.grid_points] += self.sub_regions[i][1]
+
+ # reshape to (num_rois, grid_points)
+ pred_scores, xs, ys = tuple(
+ map(lambda x: x.view(R, c), [pred_scores, xs, ys]))
+
+ # get expanded pos_bboxes
+ widths = (det_bboxes[:, 2] - det_bboxes[:, 0]).unsqueeze(-1)
+ heights = (det_bboxes[:, 3] - det_bboxes[:, 1]).unsqueeze(-1)
+ x1 = (det_bboxes[:, 0, None] - widths / 2)
+ y1 = (det_bboxes[:, 1, None] - heights / 2)
+ # map the grid point to the absolute coordinates
+ abs_xs = (xs.float() + 0.5) / w * widths + x1
+ abs_ys = (ys.float() + 0.5) / h * heights + y1
+
+ # get the grid points indices that fall on the bbox boundaries
+ x1_inds = [i for i in range(self.grid_size)]
+ y1_inds = [i * self.grid_size for i in range(self.grid_size)]
+ x2_inds = [
+ self.grid_points - self.grid_size + i
+ for i in range(self.grid_size)
+ ]
+ y2_inds = [(i + 1) * self.grid_size - 1 for i in range(self.grid_size)]
+
+ # voting of all grid points on some boundary
+ bboxes_x1 = (abs_xs[:, x1_inds] * pred_scores[:, x1_inds]).sum(
+ dim=1, keepdim=True) / (
+ pred_scores[:, x1_inds].sum(dim=1, keepdim=True))
+ bboxes_y1 = (abs_ys[:, y1_inds] * pred_scores[:, y1_inds]).sum(
+ dim=1, keepdim=True) / (
+ pred_scores[:, y1_inds].sum(dim=1, keepdim=True))
+ bboxes_x2 = (abs_xs[:, x2_inds] * pred_scores[:, x2_inds]).sum(
+ dim=1, keepdim=True) / (
+ pred_scores[:, x2_inds].sum(dim=1, keepdim=True))
+ bboxes_y2 = (abs_ys[:, y2_inds] * pred_scores[:, y2_inds]).sum(
+ dim=1, keepdim=True) / (
+ pred_scores[:, y2_inds].sum(dim=1, keepdim=True))
+
+ bbox_res = torch.cat(
+ [bboxes_x1, bboxes_y1, bboxes_x2, bboxes_y2, cls_scores], dim=1)
+ bbox_res[:, [0, 2]].clamp_(min=0, max=img_meta[0]['img_shape'][1] - 1)
+ bbox_res[:, [1, 3]].clamp_(min=0, max=img_meta[0]['img_shape'][0] - 1)
+
+ return bbox_res
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/htc_mask_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/htc_mask_head.py
new file mode 100644
index 000000000..7c8125543
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/htc_mask_head.py
@@ -0,0 +1,38 @@
+from ..registry import HEADS
+from ..utils import ConvModule
+from .fcn_mask_head import FCNMaskHead
+
+
+@HEADS.register_module
+class HTCMaskHead(FCNMaskHead):
+
+ def __init__(self, *args, **kwargs):
+ super(HTCMaskHead, self).__init__(*args, **kwargs)
+ self.conv_res = ConvModule(
+ self.conv_out_channels,
+ self.conv_out_channels,
+ 1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg)
+
+ def init_weights(self):
+ super(HTCMaskHead, self).init_weights()
+ self.conv_res.init_weights()
+
+ def forward(self, x, res_feat=None, return_logits=True, return_feat=True):
+ if res_feat is not None:
+ res_feat = self.conv_res(res_feat)
+ x = x + res_feat
+ for conv in self.convs:
+ x = conv(x)
+ res_feat = x
+ outs = []
+ if return_logits:
+ x = self.upsample(x)
+ if self.upsample_method == 'deconv':
+ x = self.relu(x)
+ mask_pred = self.conv_logits(x)
+ outs.append(mask_pred)
+ if return_feat:
+ outs.append(res_feat)
+ return outs if len(outs) > 1 else outs[0]
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/mask_feat_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/mask_feat_head.py
new file mode 100644
index 000000000..980b4ad8f
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/mask_feat_head.py
@@ -0,0 +1,119 @@
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import xavier_init, normal_init
+
+from ..registry import HEADS
+from ..builder import build_loss
+from ..utils import ConvModule
+
+import torch
+import numpy as np
+
+
+@HEADS.register_module
+class MaskFeatHead(nn.Module):
+ def __init__(self,
+ in_channels,
+ out_channels,
+ start_level,
+ end_level,
+ num_classes,
+ conv_cfg=None,
+ norm_cfg=None):
+ super(MaskFeatHead, self).__init__()
+
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.start_level = start_level
+ self.end_level = end_level
+ assert start_level >= 0 and end_level >= start_level
+ self.num_classes = num_classes
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+
+ self.convs_all_levels = nn.ModuleList()
+ for i in range(self.start_level, self.end_level + 1):
+ convs_per_level = nn.Sequential()
+ if i == 0:
+ one_conv = ConvModule(
+ self.in_channels,
+ self.out_channels,
+ 3,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ inplace=False)
+ convs_per_level.add_module('conv' + str(i), one_conv)
+ self.convs_all_levels.append(convs_per_level)
+ continue
+
+ for j in range(i):
+ if j == 0:
+ chn = self.in_channels+2 if i==3 else self.in_channels
+ one_conv = ConvModule(
+ chn,
+ self.out_channels,
+ 3,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ inplace=False)
+ convs_per_level.add_module('conv' + str(j), one_conv)
+ one_upsample = nn.Upsample(
+ scale_factor=2, mode='bilinear', align_corners=False)
+ convs_per_level.add_module(
+ 'upsample' + str(j), one_upsample)
+ continue
+
+ one_conv = ConvModule(
+ self.out_channels,
+ self.out_channels,
+ 3,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ inplace=False)
+ convs_per_level.add_module('conv' + str(j), one_conv)
+ one_upsample = nn.Upsample(
+ scale_factor=2,
+ mode='bilinear',
+ align_corners=False)
+ convs_per_level.add_module('upsample' + str(j), one_upsample)
+
+ self.convs_all_levels.append(convs_per_level)
+
+ self.conv_pred = nn.Sequential(
+ ConvModule(
+ self.out_channels,
+ self.num_classes,
+ 1,
+ padding=0,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg),
+ )
+
+ def init_weights(self):
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ normal_init(m, std=0.01)
+
+ def forward(self, inputs):
+ assert len(inputs) == (self.end_level - self.start_level + 1)
+
+ feature_add_all_level = self.convs_all_levels[0](inputs[0])
+ for i in range(1, len(inputs)):
+ input_p = inputs[i]
+ if i == 3:
+ input_feat = input_p
+ x_range = torch.linspace(-1, 1, input_feat.shape[-1], device=input_feat.device)
+ y_range = torch.linspace(-1, 1, input_feat.shape[-2], device=input_feat.device)
+ y, x = torch.meshgrid(y_range, x_range)
+ y = y.expand([input_feat.shape[0], 1, -1, -1])
+ x = x.expand([input_feat.shape[0], 1, -1, -1])
+ coord_feat = torch.cat([x, y], 1)
+ input_p = torch.cat([input_p, coord_feat], 1)
+
+ feature_add_all_level += self.convs_all_levels[i](input_p)
+
+ feature_pred = self.conv_pred(feature_add_all_level)
+ return feature_pred
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/maskiou_head.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/maskiou_head.py
new file mode 100644
index 000000000..d509f177f
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/maskiou_head.py
@@ -0,0 +1,190 @@
+import numpy as np
+import torch
+import torch.nn as nn
+from mmcv.cnn import kaiming_init, normal_init
+from torch.nn.modules.utils import _pair
+
+from mmdet.core import force_fp32
+from ..builder import build_loss
+from ..registry import HEADS
+
+
+@HEADS.register_module
+class MaskIoUHead(nn.Module):
+ """Mask IoU Head.
+
+ This head predicts the IoU of predicted masks and corresponding gt masks.
+ """
+
+ def __init__(self,
+ num_convs=4,
+ num_fcs=2,
+ roi_feat_size=14,
+ in_channels=256,
+ conv_out_channels=256,
+ fc_out_channels=1024,
+ num_classes=81,
+ loss_iou=dict(type='MSELoss', loss_weight=0.5)):
+ super(MaskIoUHead, self).__init__()
+ self.in_channels = in_channels
+ self.conv_out_channels = conv_out_channels
+ self.fc_out_channels = fc_out_channels
+ self.num_classes = num_classes
+ self.fp16_enabled = False
+
+ self.convs = nn.ModuleList()
+ for i in range(num_convs):
+ if i == 0:
+ # concatenation of mask feature and mask prediction
+ in_channels = self.in_channels + 1
+ else:
+ in_channels = self.conv_out_channels
+ stride = 2 if i == num_convs - 1 else 1
+ self.convs.append(
+ nn.Conv2d(
+ in_channels,
+ self.conv_out_channels,
+ 3,
+ stride=stride,
+ padding=1))
+
+ roi_feat_size = _pair(roi_feat_size)
+ pooled_area = (roi_feat_size[0] // 2) * (roi_feat_size[1] // 2)
+ self.fcs = nn.ModuleList()
+ for i in range(num_fcs):
+ in_channels = (
+ self.conv_out_channels *
+ pooled_area if i == 0 else self.fc_out_channels)
+ self.fcs.append(nn.Linear(in_channels, self.fc_out_channels))
+
+ self.fc_mask_iou = nn.Linear(self.fc_out_channels, self.num_classes)
+ self.relu = nn.ReLU()
+ self.max_pool = nn.MaxPool2d(2, 2)
+ self.loss_iou = build_loss(loss_iou)
+
+ def init_weights(self):
+ for conv in self.convs:
+ kaiming_init(conv)
+ for fc in self.fcs:
+ kaiming_init(
+ fc,
+ a=1,
+ mode='fan_in',
+ nonlinearity='leaky_relu',
+ distribution='uniform')
+ normal_init(self.fc_mask_iou, std=0.01)
+
+ def forward(self, mask_feat, mask_pred):
+ mask_pred = mask_pred.sigmoid()
+ mask_pred_pooled = self.max_pool(mask_pred.unsqueeze(1))
+
+ x = torch.cat((mask_feat, mask_pred_pooled), 1)
+
+ for conv in self.convs:
+ x = self.relu(conv(x))
+ x = x.view(x.size(0), -1)
+ for fc in self.fcs:
+ x = self.relu(fc(x))
+ mask_iou = self.fc_mask_iou(x)
+ return mask_iou
+
+ @force_fp32(apply_to=('mask_iou_pred', ))
+ def loss(self, mask_iou_pred, mask_iou_targets):
+ pos_inds = mask_iou_targets > 0
+ if pos_inds.sum() > 0:
+ loss_mask_iou = self.loss_iou(mask_iou_pred[pos_inds],
+ mask_iou_targets[pos_inds])
+ else:
+ loss_mask_iou = mask_iou_pred * 0
+ return dict(loss_mask_iou=loss_mask_iou)
+
+ @force_fp32(apply_to=('mask_pred', ))
+ def get_target(self, sampling_results, gt_masks, mask_pred, mask_targets,
+ rcnn_train_cfg):
+ """Compute target of mask IoU.
+
+ Mask IoU target is the IoU of the predicted mask (inside a bbox) and
+ the gt mask of corresponding gt mask (the whole instance).
+ The intersection area is computed inside the bbox, and the gt mask area
+ is computed with two steps, firstly we compute the gt area inside the
+ bbox, then divide it by the area ratio of gt area inside the bbox and
+ the gt area of the whole instance.
+
+ Args:
+ sampling_results (list[:obj:`SamplingResult`]): sampling results.
+ gt_masks (list[ndarray]): Gt masks (the whole instance) of each
+ image, binary maps with the same shape of the input image.
+ mask_pred (Tensor): Predicted masks of each positive proposal,
+ shape (num_pos, h, w).
+ mask_targets (Tensor): Gt mask of each positive proposal,
+ binary map of the shape (num_pos, h, w).
+ rcnn_train_cfg (dict): Training config for R-CNN part.
+
+ Returns:
+ Tensor: mask iou target (length == num positive).
+ """
+ pos_proposals = [res.pos_bboxes for res in sampling_results]
+ pos_assigned_gt_inds = [
+ res.pos_assigned_gt_inds for res in sampling_results
+ ]
+
+ # compute the area ratio of gt areas inside the proposals and
+ # the whole instance
+ area_ratios = map(self._get_area_ratio, pos_proposals,
+ pos_assigned_gt_inds, gt_masks)
+ area_ratios = torch.cat(list(area_ratios))
+ assert mask_targets.size(0) == area_ratios.size(0)
+
+ mask_pred = (mask_pred > rcnn_train_cfg.mask_thr_binary).float()
+ mask_pred_areas = mask_pred.sum((-1, -2))
+
+ # mask_pred and mask_targets are binary maps
+ overlap_areas = (mask_pred * mask_targets).sum((-1, -2))
+
+ # compute the mask area of the whole instance
+ gt_full_areas = mask_targets.sum((-1, -2)) / (area_ratios + 1e-7)
+
+ mask_iou_targets = overlap_areas / (
+ mask_pred_areas + gt_full_areas - overlap_areas)
+ return mask_iou_targets
+
+ def _get_area_ratio(self, pos_proposals, pos_assigned_gt_inds, gt_masks):
+ """Compute area ratio of the gt mask inside the proposal and the gt
+ mask of the corresponding instance"""
+ num_pos = pos_proposals.size(0)
+ if num_pos > 0:
+ area_ratios = []
+ proposals_np = pos_proposals.cpu().numpy()
+ pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()
+ # compute mask areas of gt instances (batch processing for speedup)
+ gt_instance_mask_area = gt_masks.sum((-1, -2))
+ for i in range(num_pos):
+ gt_mask = gt_masks[pos_assigned_gt_inds[i]]
+
+ # crop the gt mask inside the proposal
+ x1, y1, x2, y2 = proposals_np[i, :].astype(np.int32)
+ gt_mask_in_proposal = gt_mask[y1:y2 + 1, x1:x2 + 1]
+
+ ratio = gt_mask_in_proposal.sum() / (
+ gt_instance_mask_area[pos_assigned_gt_inds[i]] + 1e-7)
+ area_ratios.append(ratio)
+ area_ratios = torch.from_numpy(np.stack(area_ratios)).float().to(
+ pos_proposals.device)
+ else:
+ area_ratios = pos_proposals.new_zeros((0, ))
+ return area_ratios
+
+ @force_fp32(apply_to=('mask_iou_pred', ))
+ def get_mask_scores(self, mask_iou_pred, det_bboxes, det_labels):
+ """Get the mask scores.
+
+ mask_score = bbox_score * mask_iou
+ """
+ inds = range(det_labels.size(0))
+ mask_scores = mask_iou_pred[inds, det_labels + 1] * det_bboxes[inds,
+ -1]
+ mask_scores = mask_scores.cpu().numpy()
+ det_labels = det_labels.cpu().numpy()
+ return [
+ mask_scores[det_labels == i] for i in range(self.num_classes - 1)
+ ]
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/necks/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/necks/__init__.py
new file mode 100644
index 000000000..fa5740443
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/necks/__init__.py
@@ -0,0 +1,6 @@
+from .bfp import BFP
+from .fpn import FPN
+from .hrfpn import HRFPN
+from .nas_fpn import NASFPN
+
+__all__ = ['FPN', 'BFP', 'HRFPN', 'NASFPN']
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/necks/bfp.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/necks/bfp.py
new file mode 100644
index 000000000..03aee106d
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/necks/bfp.py
@@ -0,0 +1,102 @@
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import xavier_init
+
+from ..plugins import NonLocal2D
+from ..registry import NECKS
+from ..utils import ConvModule
+
+
+@NECKS.register_module
+class BFP(nn.Module):
+ """BFP (Balanced Feature Pyrmamids)
+
+ BFP takes multi-level features as inputs and gather them into a single one,
+ then refine the gathered feature and scatter the refined results to
+ multi-level features. This module is used in Libra R-CNN (CVPR 2019), see
+ https://arxiv.org/pdf/1904.02701.pdf for details.
+
+ Args:
+ in_channels (int): Number of input channels (feature maps of all levels
+ should have the same channels).
+ num_levels (int): Number of input feature levels.
+ conv_cfg (dict): The config dict for convolution layers.
+ norm_cfg (dict): The config dict for normalization layers.
+ refine_level (int): Index of integration and refine level of BSF in
+ multi-level features from bottom to top.
+ refine_type (str): Type of the refine op, currently support
+ [None, 'conv', 'non_local'].
+ """
+
+ def __init__(self,
+ in_channels,
+ num_levels,
+ refine_level=2,
+ refine_type=None,
+ conv_cfg=None,
+ norm_cfg=None):
+ super(BFP, self).__init__()
+ assert refine_type in [None, 'conv', 'non_local']
+
+ self.in_channels = in_channels
+ self.num_levels = num_levels
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+
+ self.refine_level = refine_level
+ self.refine_type = refine_type
+ assert 0 <= self.refine_level < self.num_levels
+
+ if self.refine_type == 'conv':
+ self.refine = ConvModule(
+ self.in_channels,
+ self.in_channels,
+ 3,
+ padding=1,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg)
+ elif self.refine_type == 'non_local':
+ self.refine = NonLocal2D(
+ self.in_channels,
+ reduction=1,
+ use_scale=False,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg)
+
+ def init_weights(self):
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ xavier_init(m, distribution='uniform')
+
+ def forward(self, inputs):
+ assert len(inputs) == self.num_levels
+
+ # step 1: gather multi-level features by resize and average
+ feats = []
+ gather_size = inputs[self.refine_level].size()[2:]
+ for i in range(self.num_levels):
+ if i < self.refine_level:
+ gathered = F.adaptive_max_pool2d(
+ inputs[i], output_size=gather_size)
+ else:
+ gathered = F.interpolate(
+ inputs[i], size=gather_size, mode='nearest')
+ feats.append(gathered)
+
+ bsf = sum(feats) / len(feats)
+
+ # step 2: refine gathered features
+ if self.refine_type is not None:
+ bsf = self.refine(bsf)
+
+ # step 3: scatter refined features to multi-levels by a residual path
+ outs = []
+ for i in range(self.num_levels):
+ out_size = inputs[i].size()[2:]
+ if i < self.refine_level:
+ residual = F.interpolate(bsf, size=out_size, mode='nearest')
+ else:
+ residual = F.adaptive_max_pool2d(bsf, output_size=out_size)
+ outs.append(residual + inputs[i])
+
+ return tuple(outs)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/necks/fpn.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/necks/fpn.py
new file mode 100644
index 000000000..77dd409c4
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/necks/fpn.py
@@ -0,0 +1,141 @@
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import xavier_init
+
+from mmdet.core import auto_fp16
+from ..registry import NECKS
+from ..utils import ConvModule
+
+
+@NECKS.register_module
+class FPN(nn.Module):
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ num_outs,
+ start_level=0,
+ end_level=-1,
+ add_extra_convs=False,
+ extra_convs_on_inputs=True,
+ relu_before_extra_convs=False,
+ no_norm_on_lateral=False,
+ conv_cfg=None,
+ norm_cfg=None,
+ activation=None):
+ super(FPN, self).__init__()
+ assert isinstance(in_channels, list)
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.num_ins = len(in_channels)
+ self.num_outs = num_outs
+ self.activation = activation
+ self.relu_before_extra_convs = relu_before_extra_convs
+ self.no_norm_on_lateral = no_norm_on_lateral
+ self.fp16_enabled = False
+
+ if end_level == -1:
+ self.backbone_end_level = self.num_ins
+ assert num_outs >= self.num_ins - start_level
+ else:
+ # if end_level < inputs, no extra level is allowed
+ self.backbone_end_level = end_level
+ assert end_level <= len(in_channels)
+ assert num_outs == end_level - start_level
+ self.start_level = start_level
+ self.end_level = end_level
+ self.add_extra_convs = add_extra_convs
+ self.extra_convs_on_inputs = extra_convs_on_inputs
+
+ self.lateral_convs = nn.ModuleList()
+ self.fpn_convs = nn.ModuleList()
+
+ for i in range(self.start_level, self.backbone_end_level):
+ l_conv = ConvModule(
+ in_channels[i],
+ out_channels,
+ 1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg if not self.no_norm_on_lateral else None,
+ activation=self.activation,
+ inplace=False)
+ fpn_conv = ConvModule(
+ out_channels,
+ out_channels,
+ 3,
+ padding=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ activation=self.activation,
+ inplace=False)
+
+ self.lateral_convs.append(l_conv)
+ self.fpn_convs.append(fpn_conv)
+
+ # add extra conv layers (e.g., RetinaNet)
+ extra_levels = num_outs - self.backbone_end_level + self.start_level
+ if add_extra_convs and extra_levels >= 1:
+ for i in range(extra_levels):
+ if i == 0 and self.extra_convs_on_inputs:
+ in_channels = self.in_channels[self.backbone_end_level - 1]
+ else:
+ in_channels = out_channels
+ extra_fpn_conv = ConvModule(
+ in_channels,
+ out_channels,
+ 3,
+ stride=2,
+ padding=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ activation=self.activation,
+ inplace=False)
+ self.fpn_convs.append(extra_fpn_conv)
+
+ # default init_weights for conv(msra) and norm in ConvModule
+ def init_weights(self):
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ xavier_init(m, distribution='uniform')
+
+ @auto_fp16()
+ def forward(self, inputs):
+ assert len(inputs) == len(self.in_channels)
+
+ # build laterals
+ laterals = [
+ lateral_conv(inputs[i + self.start_level])
+ for i, lateral_conv in enumerate(self.lateral_convs)
+ ]
+
+ # build top-down path
+ used_backbone_levels = len(laterals)
+ for i in range(used_backbone_levels - 1, 0, -1):
+ laterals[i - 1] += F.interpolate(
+ laterals[i], scale_factor=2, mode='nearest')
+
+ # build outputs
+ # part 1: from original levels
+ outs = [
+ self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels)
+ ]
+ # part 2: add extra levels
+ if self.num_outs > len(outs):
+ # use max pool to get more levels on top of outputs
+ # (e.g., Faster R-CNN, Mask R-CNN)
+ if not self.add_extra_convs:
+ for i in range(self.num_outs - used_backbone_levels):
+ outs.append(F.max_pool2d(outs[-1], 1, stride=2))
+ # add conv layers on top of original feature maps (RetinaNet)
+ else:
+ if self.extra_convs_on_inputs:
+ orig = inputs[self.backbone_end_level - 1]
+ outs.append(self.fpn_convs[used_backbone_levels](orig))
+ else:
+ outs.append(self.fpn_convs[used_backbone_levels](outs[-1]))
+ for i in range(used_backbone_levels + 1, self.num_outs):
+ if self.relu_before_extra_convs:
+ outs.append(self.fpn_convs[i](F.relu(outs[-1])))
+ else:
+ outs.append(self.fpn_convs[i](outs[-1]))
+ return tuple(outs)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/necks/hrfpn.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/necks/hrfpn.py
new file mode 100644
index 000000000..33155f057
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/necks/hrfpn.py
@@ -0,0 +1,100 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn.weight_init import caffe2_xavier_init
+from torch.utils.checkpoint import checkpoint
+
+from ..registry import NECKS
+from ..utils import ConvModule
+
+
+@NECKS.register_module
+class HRFPN(nn.Module):
+ """HRFPN (High Resolution Feature Pyrmamids)
+
+ arXiv: https://arxiv.org/abs/1904.04514
+
+ Args:
+ in_channels (list): number of channels for each branch.
+ out_channels (int): output channels of feature pyramids.
+ num_outs (int): number of output stages.
+ pooling_type (str): pooling for generating feature pyramids
+ from {MAX, AVG}.
+ conv_cfg (dict): dictionary to construct and config conv layer.
+ norm_cfg (dict): dictionary to construct and config norm layer.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+ stride (int): stride of 3x3 convolutional layers
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ num_outs=5,
+ pooling_type='AVG',
+ conv_cfg=None,
+ norm_cfg=None,
+ with_cp=False,
+ stride=1):
+ super(HRFPN, self).__init__()
+ assert isinstance(in_channels, list)
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.num_ins = len(in_channels)
+ self.num_outs = num_outs
+ self.with_cp = with_cp
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+
+ self.reduction_conv = ConvModule(
+ sum(in_channels),
+ out_channels,
+ kernel_size=1,
+ conv_cfg=self.conv_cfg,
+ activation=None)
+
+ self.fpn_convs = nn.ModuleList()
+ for i in range(self.num_outs):
+ self.fpn_convs.append(
+ ConvModule(
+ out_channels,
+ out_channels,
+ kernel_size=3,
+ padding=1,
+ stride=stride,
+ conv_cfg=self.conv_cfg,
+ activation=None))
+
+ if pooling_type == 'MAX':
+ self.pooling = F.max_pool2d
+ else:
+ self.pooling = F.avg_pool2d
+
+ def init_weights(self):
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ caffe2_xavier_init(m)
+
+ def forward(self, inputs):
+ assert len(inputs) == self.num_ins
+ outs = [inputs[0]]
+ for i in range(1, self.num_ins):
+ outs.append(
+ F.interpolate(inputs[i], scale_factor=2**i, mode='bilinear'))
+ out = torch.cat(outs, dim=1)
+ if out.requires_grad and self.with_cp:
+ out = checkpoint(self.reduction_conv, out)
+ else:
+ out = self.reduction_conv(out)
+ outs = [out]
+ for i in range(1, self.num_outs):
+ outs.append(self.pooling(out, kernel_size=2**i, stride=2**i))
+ outputs = []
+
+ for i in range(self.num_outs):
+ if outs[i].requires_grad and self.with_cp:
+ tmp_out = checkpoint(self.fpn_convs[i], outs[i])
+ else:
+ tmp_out = self.fpn_convs[i](outs[i])
+ outputs.append(tmp_out)
+ return tuple(outputs)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/necks/nas_fpn.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/necks/nas_fpn.py
new file mode 100644
index 000000000..b0a689837
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/necks/nas_fpn.py
@@ -0,0 +1,186 @@
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import caffe2_xavier_init
+
+from ..registry import NECKS
+from ..utils import ConvModule
+
+
+class MergingCell(nn.Module):
+
+ def __init__(self, channels=256, with_conv=True, norm_cfg=None):
+ super(MergingCell, self).__init__()
+ self.with_conv = with_conv
+ if self.with_conv:
+ self.conv_out = ConvModule(
+ channels,
+ channels,
+ 3,
+ padding=1,
+ norm_cfg=norm_cfg,
+ order=('act', 'conv', 'norm'))
+
+ def _binary_op(self, x1, x2):
+ raise NotImplementedError
+
+ def _resize(self, x, size):
+ if x.shape[-2:] == size:
+ return x
+ elif x.shape[-2:] < size:
+ return F.interpolate(x, size=size, mode='nearest')
+ else:
+ assert x.shape[-2] % size[-2] == 0 and x.shape[-1] % size[-1] == 0
+ kernel_size = x.shape[-1] // size[-1]
+ x = F.max_pool2d(x, kernel_size=kernel_size, stride=kernel_size)
+ return x
+
+ def forward(self, x1, x2, out_size):
+ assert x1.shape[:2] == x2.shape[:2]
+ assert len(out_size) == 2
+
+ x1 = self._resize(x1, out_size)
+ x2 = self._resize(x2, out_size)
+
+ x = self._binary_op(x1, x2)
+ if self.with_conv:
+ x = self.conv_out(x)
+ return x
+
+
+class SumCell(MergingCell):
+
+ def _binary_op(self, x1, x2):
+ return x1 + x2
+
+
+class GPCell(MergingCell):
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
+
+ def _binary_op(self, x1, x2):
+ x2_att = self.global_pool(x2).sigmoid()
+ return x2 + x2_att * x1
+
+
+@NECKS.register_module
+class NASFPN(nn.Module):
+ """NAS-FPN.
+
+ NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object
+ Detection. (https://arxiv.org/abs/1904.07392)
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ num_outs,
+ stack_times,
+ start_level=0,
+ end_level=-1,
+ add_extra_convs=False,
+ norm_cfg=None):
+ super(NASFPN, self).__init__()
+ assert isinstance(in_channels, list)
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.num_ins = len(in_channels) # num of input feature levels
+ self.num_outs = num_outs # num of output feature levels
+ self.stack_times = stack_times
+ self.norm_cfg = norm_cfg
+
+ if end_level == -1:
+ self.backbone_end_level = self.num_ins
+ assert num_outs >= self.num_ins - start_level
+ else:
+ # if end_level < inputs, no extra level is allowed
+ self.backbone_end_level = end_level
+ assert end_level <= len(in_channels)
+ assert num_outs == end_level - start_level
+ self.start_level = start_level
+ self.end_level = end_level
+ self.add_extra_convs = add_extra_convs
+
+ # add lateral connections
+ self.lateral_convs = nn.ModuleList()
+ for i in range(self.start_level, self.backbone_end_level):
+ l_conv = ConvModule(
+ in_channels[i],
+ out_channels,
+ 1,
+ norm_cfg=norm_cfg,
+ activation=None)
+ self.lateral_convs.append(l_conv)
+
+ # add extra downsample layers (stride-2 pooling or conv)
+ extra_levels = num_outs - self.backbone_end_level + self.start_level
+ self.extra_downsamples = nn.ModuleList()
+ for i in range(extra_levels):
+ extra_conv = ConvModule(
+ out_channels,
+ out_channels,
+ 1,
+ norm_cfg=norm_cfg,
+ activation=None)
+ self.extra_downsamples.append(
+ nn.Sequential(extra_conv, nn.MaxPool2d(2, 2)))
+
+ # add NAS FPN connections
+ self.fpn_stages = nn.ModuleList()
+ for _ in range(self.stack_times):
+ stage = nn.ModuleDict()
+ # gp(p6, p4) -> p4_1
+ stage['gp_64_4'] = GPCell(out_channels, norm_cfg=norm_cfg)
+ # sum(p4_1, p4) -> p4_2
+ stage['sum_44_4'] = SumCell(out_channels, norm_cfg=norm_cfg)
+ # sum(p4_2, p3) -> p3_out
+ stage['sum_43_3'] = SumCell(out_channels, norm_cfg=norm_cfg)
+ # sum(p3_out, p4_2) -> p4_out
+ stage['sum_34_4'] = SumCell(out_channels, norm_cfg=norm_cfg)
+ # sum(p5, gp(p4_out, p3_out)) -> p5_out
+ stage['gp_43_5'] = GPCell(with_conv=False)
+ stage['sum_55_5'] = SumCell(out_channels, norm_cfg=norm_cfg)
+ # sum(p7, gp(p5_out, p4_2)) -> p7_out
+ stage['gp_54_7'] = GPCell(with_conv=False)
+ stage['sum_77_7'] = SumCell(out_channels, norm_cfg=norm_cfg)
+ # gp(p7_out, p5_out) -> p6_out
+ stage['gp_75_6'] = GPCell(out_channels, norm_cfg=norm_cfg)
+ self.fpn_stages.append(stage)
+
+ def init_weights(self):
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ caffe2_xavier_init(m)
+
+ def forward(self, inputs):
+ # build P3-P5
+ feats = [
+ lateral_conv(inputs[i + self.start_level])
+ for i, lateral_conv in enumerate(self.lateral_convs)
+ ]
+ # build P6-P7 on top of P5
+ for downsample in self.extra_downsamples:
+ feats.append(downsample(feats[-1]))
+
+ p3, p4, p5, p6, p7 = feats
+
+ for stage in self.fpn_stages:
+ # gp(p6, p4) -> p4_1
+ p4_1 = stage['gp_64_4'](p6, p4, out_size=p4.shape[-2:])
+ # sum(p4_1, p4) -> p4_2
+ p4_2 = stage['sum_44_4'](p4_1, p4, out_size=p4.shape[-2:])
+ # sum(p4_2, p3) -> p3_out
+ p3 = stage['sum_43_3'](p4_2, p3, out_size=p3.shape[-2:])
+ # sum(p3_out, p4_2) -> p4_out
+ p4 = stage['sum_34_4'](p3, p4_2, out_size=p4.shape[-2:])
+ # sum(p5, gp(p4_out, p3_out)) -> p5_out
+ p5_tmp = stage['gp_43_5'](p4, p3, out_size=p5.shape[-2:])
+ p5 = stage['sum_55_5'](p5, p5_tmp, out_size=p5.shape[-2:])
+ # sum(p7, gp(p5_out, p4_2)) -> p7_out
+ p7_tmp = stage['gp_54_7'](p5, p4_2, out_size=p7.shape[-2:])
+ p7 = stage['sum_77_7'](p7, p7_tmp, out_size=p7.shape[-2:])
+ # gp(p7_out, p5_out) -> p6_out
+ p6 = stage['gp_75_6'](p7, p5, out_size=p6.shape[-2:])
+
+ return p3, p4, p5, p6, p7
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/plugins/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/plugins/__init__.py
new file mode 100644
index 000000000..0ff85f2f5
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/plugins/__init__.py
@@ -0,0 +1,4 @@
+from .generalized_attention import GeneralizedAttention
+from .non_local import NonLocal2D
+
+__all__ = ['NonLocal2D', 'GeneralizedAttention']
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/plugins/generalized_attention.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/plugins/generalized_attention.py
new file mode 100644
index 000000000..86e5b1e9d
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/plugins/generalized_attention.py
@@ -0,0 +1,383 @@
+import math
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import kaiming_init
+
+
+class GeneralizedAttention(nn.Module):
+ """GeneralizedAttention module.
+
+ See 'An Empirical Study of Spatial Attention Mechanisms in Deep Networks'
+ (https://arxiv.org/abs/1711.07971) for details.
+
+ Args:
+ in_dim (int): Channels of the input feature map.
+ spatial_range (int): The spatial range.
+ -1 indicates no spatial range constraint.
+ num_heads (int): The head number of empirical_attention module.
+ position_embedding_dim (int): The position embedding dimension.
+ position_magnitude (int): A multiplier acting on coord difference.
+ kv_stride (int): The feature stride acting on key/value feature map.
+ q_stride (int): The feature stride acting on query feature map.
+ attention_type (str): A binary indicator string for indicating which
+ items in generalized empirical_attention module are used.
+ '1000' indicates 'query and key content' (appr - appr) item,
+ '0100' indicates 'query content and relative position'
+ (appr - position) item,
+ '0010' indicates 'key content only' (bias - appr) item,
+ '0001' indicates 'relative position only' (bias - position) item.
+ """
+
+ def __init__(self,
+ in_dim,
+ spatial_range=-1,
+ num_heads=9,
+ position_embedding_dim=-1,
+ position_magnitude=1,
+ kv_stride=2,
+ q_stride=1,
+ attention_type='1111'):
+
+ super(GeneralizedAttention, self).__init__()
+
+ # hard range means local range for non-local operation
+ self.position_embedding_dim = (
+ position_embedding_dim if position_embedding_dim > 0 else in_dim)
+
+ self.position_magnitude = position_magnitude
+ self.num_heads = num_heads
+ self.channel_in = in_dim
+ self.spatial_range = spatial_range
+ self.kv_stride = kv_stride
+ self.q_stride = q_stride
+ self.attention_type = [bool(int(_)) for _ in attention_type]
+ self.qk_embed_dim = in_dim // num_heads
+ out_c = self.qk_embed_dim * num_heads
+
+ if self.attention_type[0] or self.attention_type[1]:
+ self.query_conv = nn.Conv2d(
+ in_channels=in_dim,
+ out_channels=out_c,
+ kernel_size=1,
+ bias=False)
+ self.query_conv.kaiming_init = True
+
+ if self.attention_type[0] or self.attention_type[2]:
+ self.key_conv = nn.Conv2d(
+ in_channels=in_dim,
+ out_channels=out_c,
+ kernel_size=1,
+ bias=False)
+ self.key_conv.kaiming_init = True
+
+ self.v_dim = in_dim // num_heads
+ self.value_conv = nn.Conv2d(
+ in_channels=in_dim,
+ out_channels=self.v_dim * num_heads,
+ kernel_size=1,
+ bias=False)
+ self.value_conv.kaiming_init = True
+
+ if self.attention_type[1] or self.attention_type[3]:
+ self.appr_geom_fc_x = nn.Linear(
+ self.position_embedding_dim // 2, out_c, bias=False)
+ self.appr_geom_fc_x.kaiming_init = True
+
+ self.appr_geom_fc_y = nn.Linear(
+ self.position_embedding_dim // 2, out_c, bias=False)
+ self.appr_geom_fc_y.kaiming_init = True
+
+ if self.attention_type[2]:
+ stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2)
+ appr_bias_value = -2 * stdv * torch.rand(out_c) + stdv
+ self.appr_bias = nn.Parameter(appr_bias_value)
+
+ if self.attention_type[3]:
+ stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2)
+ geom_bias_value = -2 * stdv * torch.rand(out_c) + stdv
+ self.geom_bias = nn.Parameter(geom_bias_value)
+
+ self.proj_conv = nn.Conv2d(
+ in_channels=self.v_dim * num_heads,
+ out_channels=in_dim,
+ kernel_size=1,
+ bias=True)
+ self.proj_conv.kaiming_init = True
+ self.gamma = nn.Parameter(torch.zeros(1))
+
+ if self.spatial_range >= 0:
+ # only works when non local is after 3*3 conv
+ if in_dim == 256:
+ max_len = 84
+ elif in_dim == 512:
+ max_len = 42
+
+ max_len_kv = int((max_len - 1.0) / self.kv_stride + 1)
+ local_constraint_map = np.ones(
+ (max_len, max_len, max_len_kv, max_len_kv), dtype=np.int)
+ for iy in range(max_len):
+ for ix in range(max_len):
+ local_constraint_map[
+ iy, ix,
+ max((iy - self.spatial_range) //
+ self.kv_stride, 0):min((iy + self.spatial_range +
+ 1) // self.kv_stride +
+ 1, max_len),
+ max((ix - self.spatial_range) //
+ self.kv_stride, 0):min((ix + self.spatial_range +
+ 1) // self.kv_stride +
+ 1, max_len)] = 0
+
+ self.local_constraint_map = nn.Parameter(
+ torch.from_numpy(local_constraint_map).byte(),
+ requires_grad=False)
+
+ if self.q_stride > 1:
+ self.q_downsample = nn.AvgPool2d(
+ kernel_size=1, stride=self.q_stride)
+ else:
+ self.q_downsample = None
+
+ if self.kv_stride > 1:
+ self.kv_downsample = nn.AvgPool2d(
+ kernel_size=1, stride=self.kv_stride)
+ else:
+ self.kv_downsample = None
+
+ self.init_weights()
+
+ def get_position_embedding(self,
+ h,
+ w,
+ h_kv,
+ w_kv,
+ q_stride,
+ kv_stride,
+ device,
+ feat_dim,
+ wave_length=1000):
+ h_idxs = torch.linspace(0, h - 1, h).cuda(device)
+ h_idxs = h_idxs.view((h, 1)) * q_stride
+
+ w_idxs = torch.linspace(0, w - 1, w).cuda(device)
+ w_idxs = w_idxs.view((w, 1)) * q_stride
+
+ h_kv_idxs = torch.linspace(0, h_kv - 1, h_kv).cuda(device)
+ h_kv_idxs = h_kv_idxs.view((h_kv, 1)) * kv_stride
+
+ w_kv_idxs = torch.linspace(0, w_kv - 1, w_kv).cuda(device)
+ w_kv_idxs = w_kv_idxs.view((w_kv, 1)) * kv_stride
+
+ # (h, h_kv, 1)
+ h_diff = h_idxs.unsqueeze(1) - h_kv_idxs.unsqueeze(0)
+ h_diff *= self.position_magnitude
+
+ # (w, w_kv, 1)
+ w_diff = w_idxs.unsqueeze(1) - w_kv_idxs.unsqueeze(0)
+ w_diff *= self.position_magnitude
+
+ feat_range = torch.arange(0, feat_dim / 4).cuda(device)
+
+ dim_mat = torch.Tensor([wave_length]).cuda(device)
+ dim_mat = dim_mat**((4. / feat_dim) * feat_range)
+ dim_mat = dim_mat.view((1, 1, -1))
+
+ embedding_x = torch.cat(
+ ((w_diff / dim_mat).sin(), (w_diff / dim_mat).cos()), dim=2)
+
+ embedding_y = torch.cat(
+ ((h_diff / dim_mat).sin(), (h_diff / dim_mat).cos()), dim=2)
+
+ return embedding_x, embedding_y
+
+ def forward(self, x_input):
+ num_heads = self.num_heads
+
+ # use empirical_attention
+ if self.q_downsample is not None:
+ x_q = self.q_downsample(x_input)
+ else:
+ x_q = x_input
+ n, _, h, w = x_q.shape
+
+ if self.kv_downsample is not None:
+ x_kv = self.kv_downsample(x_input)
+ else:
+ x_kv = x_input
+ _, _, h_kv, w_kv = x_kv.shape
+
+ if self.attention_type[0] or self.attention_type[1]:
+ proj_query = self.query_conv(x_q).view(
+ (n, num_heads, self.qk_embed_dim, h * w))
+ proj_query = proj_query.permute(0, 1, 3, 2)
+
+ if self.attention_type[0] or self.attention_type[2]:
+ proj_key = self.key_conv(x_kv).view(
+ (n, num_heads, self.qk_embed_dim, h_kv * w_kv))
+
+ if self.attention_type[1] or self.attention_type[3]:
+ position_embed_x, position_embed_y = self.get_position_embedding(
+ h, w, h_kv, w_kv, self.q_stride, self.kv_stride,
+ x_input.device, self.position_embedding_dim)
+ # (n, num_heads, w, w_kv, dim)
+ position_feat_x = self.appr_geom_fc_x(position_embed_x).\
+ view(1, w, w_kv, num_heads, self.qk_embed_dim).\
+ permute(0, 3, 1, 2, 4).\
+ repeat(n, 1, 1, 1, 1)
+
+ # (n, num_heads, h, h_kv, dim)
+ position_feat_y = self.appr_geom_fc_y(position_embed_y).\
+ view(1, h, h_kv, num_heads, self.qk_embed_dim).\
+ permute(0, 3, 1, 2, 4).\
+ repeat(n, 1, 1, 1, 1)
+
+ position_feat_x /= math.sqrt(2)
+ position_feat_y /= math.sqrt(2)
+
+ # accelerate for saliency only
+ if (np.sum(self.attention_type) == 1) and self.attention_type[2]:
+ appr_bias = self.appr_bias.\
+ view(1, num_heads, 1, self.qk_embed_dim).\
+ repeat(n, 1, 1, 1)
+
+ energy = torch.matmul(appr_bias, proj_key).\
+ view(n, num_heads, 1, h_kv * w_kv)
+
+ h = 1
+ w = 1
+ else:
+ # (n, num_heads, h*w, h_kv*w_kv), query before key, 540mb for
+ if not self.attention_type[0]:
+ energy = torch.zeros(
+ n,
+ num_heads,
+ h,
+ w,
+ h_kv,
+ w_kv,
+ dtype=x_input.dtype,
+ device=x_input.device)
+
+ # attention_type[0]: appr - appr
+ # attention_type[1]: appr - position
+ # attention_type[2]: bias - appr
+ # attention_type[3]: bias - position
+ if self.attention_type[0] or self.attention_type[2]:
+ if self.attention_type[0] and self.attention_type[2]:
+ appr_bias = self.appr_bias.\
+ view(1, num_heads, 1, self.qk_embed_dim)
+ energy = torch.matmul(proj_query + appr_bias, proj_key).\
+ view(n, num_heads, h, w, h_kv, w_kv)
+
+ elif self.attention_type[0]:
+ energy = torch.matmul(proj_query, proj_key).\
+ view(n, num_heads, h, w, h_kv, w_kv)
+
+ elif self.attention_type[2]:
+ appr_bias = self.appr_bias.\
+ view(1, num_heads, 1, self.qk_embed_dim).\
+ repeat(n, 1, 1, 1)
+
+ energy += torch.matmul(appr_bias, proj_key).\
+ view(n, num_heads, 1, 1, h_kv, w_kv)
+
+ if self.attention_type[1] or self.attention_type[3]:
+ if self.attention_type[1] and self.attention_type[3]:
+ geom_bias = self.geom_bias.\
+ view(1, num_heads, 1, self.qk_embed_dim)
+
+ proj_query_reshape = (proj_query + geom_bias).\
+ view(n, num_heads, h, w, self.qk_embed_dim)
+
+ energy_x = torch.matmul(
+ proj_query_reshape.permute(0, 1, 3, 2, 4),
+ position_feat_x.permute(0, 1, 2, 4, 3))
+ energy_x = energy_x.\
+ permute(0, 1, 3, 2, 4).unsqueeze(4)
+
+ energy_y = torch.matmul(
+ proj_query_reshape,
+ position_feat_y.permute(0, 1, 2, 4, 3))
+ energy_y = energy_y.unsqueeze(5)
+
+ energy += energy_x + energy_y
+
+ elif self.attention_type[1]:
+ proj_query_reshape = proj_query.\
+ view(n, num_heads, h, w, self.qk_embed_dim)
+ proj_query_reshape = proj_query_reshape.\
+ permute(0, 1, 3, 2, 4)
+ position_feat_x_reshape = position_feat_x.\
+ permute(0, 1, 2, 4, 3)
+ position_feat_y_reshape = position_feat_y.\
+ permute(0, 1, 2, 4, 3)
+
+ energy_x = torch.matmul(proj_query_reshape,
+ position_feat_x_reshape)
+ energy_x = energy_x.permute(0, 1, 3, 2, 4).unsqueeze(4)
+
+ energy_y = torch.matmul(proj_query_reshape,
+ position_feat_y_reshape)
+ energy_y = energy_y.unsqueeze(5)
+
+ energy += energy_x + energy_y
+
+ elif self.attention_type[3]:
+ geom_bias = self.geom_bias.\
+ view(1, num_heads, self.qk_embed_dim, 1).\
+ repeat(n, 1, 1, 1)
+
+ position_feat_x_reshape = position_feat_x.\
+ view(n, num_heads, w*w_kv, self.qk_embed_dim)
+
+ position_feat_y_reshape = position_feat_y.\
+ view(n, num_heads, h * h_kv, self.qk_embed_dim)
+
+ energy_x = torch.matmul(position_feat_x_reshape, geom_bias)
+ energy_x = energy_x.view(n, num_heads, 1, w, 1, w_kv)
+
+ energy_y = torch.matmul(position_feat_y_reshape, geom_bias)
+ energy_y = energy_y.view(n, num_heads, h, 1, h_kv, 1)
+
+ energy += energy_x + energy_y
+
+ energy = energy.view(n, num_heads, h * w, h_kv * w_kv)
+
+ if self.spatial_range >= 0:
+ cur_local_constraint_map = \
+ self.local_constraint_map[:h, :w, :h_kv, :w_kv].\
+ contiguous().\
+ view(1, 1, h*w, h_kv*w_kv)
+
+ energy = energy.masked_fill_(cur_local_constraint_map,
+ float('-inf'))
+
+ attention = F.softmax(energy, 3)
+
+ proj_value = self.value_conv(x_kv)
+ proj_value_reshape = proj_value.\
+ view((n, num_heads, self.v_dim, h_kv * w_kv)).\
+ permute(0, 1, 3, 2)
+
+ out = torch.matmul(attention, proj_value_reshape).\
+ permute(0, 1, 3, 2).\
+ contiguous().\
+ view(n, self.v_dim * self.num_heads, h, w)
+
+ out = self.proj_conv(out)
+ out = self.gamma * out + x_input
+ return out
+
+ def init_weights(self):
+ for m in self.modules():
+ if hasattr(m, 'kaiming_init') and m.kaiming_init:
+ kaiming_init(
+ m,
+ mode='fan_in',
+ nonlinearity='leaky_relu',
+ bias=0,
+ distribution='uniform',
+ a=1)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/plugins/non_local.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/plugins/non_local.py
new file mode 100644
index 000000000..2e89c2fdc
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/plugins/non_local.py
@@ -0,0 +1,114 @@
+import torch
+import torch.nn as nn
+from mmcv.cnn import constant_init, normal_init
+
+from ..utils import ConvModule
+
+
+class NonLocal2D(nn.Module):
+ """Non-local module.
+
+ See https://arxiv.org/abs/1711.07971 for details.
+
+ Args:
+ in_channels (int): Channels of the input feature map.
+ reduction (int): Channel reduction ratio.
+ use_scale (bool): Whether to scale pairwise_weight by 1/inter_channels.
+ conv_cfg (dict): The config dict for convolution layers.
+ (only applicable to conv_out)
+ norm_cfg (dict): The config dict for normalization layers.
+ (only applicable to conv_out)
+ mode (str): Options are `embedded_gaussian` and `dot_product`.
+ """
+
+ def __init__(self,
+ in_channels,
+ reduction=2,
+ use_scale=True,
+ conv_cfg=None,
+ norm_cfg=None,
+ mode='embedded_gaussian'):
+ super(NonLocal2D, self).__init__()
+ self.in_channels = in_channels
+ self.reduction = reduction
+ self.use_scale = use_scale
+ self.inter_channels = in_channels // reduction
+ self.mode = mode
+ assert mode in ['embedded_gaussian', 'dot_product']
+
+ # g, theta, phi are actually `nn.Conv2d`. Here we use ConvModule for
+ # potential usage.
+ self.g = ConvModule(
+ self.in_channels,
+ self.inter_channels,
+ kernel_size=1,
+ activation=None)
+ self.theta = ConvModule(
+ self.in_channels,
+ self.inter_channels,
+ kernel_size=1,
+ activation=None)
+ self.phi = ConvModule(
+ self.in_channels,
+ self.inter_channels,
+ kernel_size=1,
+ activation=None)
+ self.conv_out = ConvModule(
+ self.inter_channels,
+ self.in_channels,
+ kernel_size=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ activation=None)
+
+ self.init_weights()
+
+ def init_weights(self, std=0.01, zeros_init=True):
+ for m in [self.g, self.theta, self.phi]:
+ normal_init(m.conv, std=std)
+ if zeros_init:
+ constant_init(self.conv_out.conv, 0)
+ else:
+ normal_init(self.conv_out.conv, std=std)
+
+ def embedded_gaussian(self, theta_x, phi_x):
+ # pairwise_weight: [N, HxW, HxW]
+ pairwise_weight = torch.matmul(theta_x, phi_x)
+ if self.use_scale:
+ # theta_x.shape[-1] is `self.inter_channels`
+ pairwise_weight /= theta_x.shape[-1]**0.5
+ pairwise_weight = pairwise_weight.softmax(dim=-1)
+ return pairwise_weight
+
+ def dot_product(self, theta_x, phi_x):
+ # pairwise_weight: [N, HxW, HxW]
+ pairwise_weight = torch.matmul(theta_x, phi_x)
+ pairwise_weight /= pairwise_weight.shape[-1]
+ return pairwise_weight
+
+ def forward(self, x):
+ n, _, h, w = x.shape
+
+ # g_x: [N, HxW, C]
+ g_x = self.g(x).view(n, self.inter_channels, -1)
+ g_x = g_x.permute(0, 2, 1)
+
+ # theta_x: [N, HxW, C]
+ theta_x = self.theta(x).view(n, self.inter_channels, -1)
+ theta_x = theta_x.permute(0, 2, 1)
+
+ # phi_x: [N, C, HxW]
+ phi_x = self.phi(x).view(n, self.inter_channels, -1)
+
+ pairwise_func = getattr(self, self.mode)
+ # pairwise_weight: [N, HxW, HxW]
+ pairwise_weight = pairwise_func(theta_x, phi_x)
+
+ # y: [N, HxW, C]
+ y = torch.matmul(pairwise_weight, g_x)
+ # y: [N, C, H, W]
+ y = y.permute(0, 2, 1).reshape(n, self.inter_channels, h, w)
+
+ output = x + self.conv_out(y)
+
+ return output
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/registry.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/registry.py
new file mode 100644
index 000000000..78ef24815
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/registry.py
@@ -0,0 +1,9 @@
+from mmdet.utils import Registry
+
+BACKBONES = Registry('backbone')
+NECKS = Registry('neck')
+ROI_EXTRACTORS = Registry('roi_extractor')
+SHARED_HEADS = Registry('shared_head')
+HEADS = Registry('head')
+LOSSES = Registry('loss')
+DETECTORS = Registry('detector')
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/roi_extractors/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/roi_extractors/__init__.py
new file mode 100644
index 000000000..9161708ce
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/roi_extractors/__init__.py
@@ -0,0 +1,3 @@
+from .single_level import SingleRoIExtractor
+
+__all__ = ['SingleRoIExtractor']
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/roi_extractors/single_level.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/roi_extractors/single_level.py
new file mode 100644
index 000000000..6620d1d86
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/roi_extractors/single_level.py
@@ -0,0 +1,107 @@
+from __future__ import division
+
+import torch
+import torch.nn as nn
+
+from mmdet import ops
+from mmdet.core import force_fp32
+from ..registry import ROI_EXTRACTORS
+
+
+@ROI_EXTRACTORS.register_module
+class SingleRoIExtractor(nn.Module):
+ """Extract RoI features from a single level feature map.
+
+ If there are mulitple input feature levels, each RoI is mapped to a level
+ according to its scale.
+
+ Args:
+ roi_layer (dict): Specify RoI layer type and arguments.
+ out_channels (int): Output channels of RoI layers.
+ featmap_strides (int): Strides of input feature maps.
+ finest_scale (int): Scale threshold of mapping to level 0.
+ """
+
+ def __init__(self,
+ roi_layer,
+ out_channels,
+ featmap_strides,
+ finest_scale=56):
+ super(SingleRoIExtractor, self).__init__()
+ self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides)
+ self.out_channels = out_channels
+ self.featmap_strides = featmap_strides
+ self.finest_scale = finest_scale
+ self.fp16_enabled = False
+
+ @property
+ def num_inputs(self):
+ """int: Input feature map levels."""
+ return len(self.featmap_strides)
+
+ def init_weights(self):
+ pass
+
+ def build_roi_layers(self, layer_cfg, featmap_strides):
+ cfg = layer_cfg.copy()
+ layer_type = cfg.pop('type')
+ assert hasattr(ops, layer_type)
+ layer_cls = getattr(ops, layer_type)
+ roi_layers = nn.ModuleList(
+ [layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides])
+ return roi_layers
+
+ def map_roi_levels(self, rois, num_levels):
+ """Map rois to corresponding feature levels by scales.
+
+ - scale < finest_scale * 2: level 0
+ - finest_scale * 2 <= scale < finest_scale * 4: level 1
+ - finest_scale * 4 <= scale < finest_scale * 8: level 2
+ - scale >= finest_scale * 8: level 3
+
+ Args:
+ rois (Tensor): Input RoIs, shape (k, 5).
+ num_levels (int): Total level number.
+
+ Returns:
+ Tensor: Level index (0-based) of each RoI, shape (k, )
+ """
+ scale = torch.sqrt(
+ (rois[:, 3] - rois[:, 1] + 1) * (rois[:, 4] - rois[:, 2] + 1))
+ target_lvls = torch.floor(torch.log2(scale / self.finest_scale + 1e-6))
+ target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long()
+ return target_lvls
+
+ def roi_rescale(self, rois, scale_factor):
+ cx = (rois[:, 1] + rois[:, 3]) * 0.5
+ cy = (rois[:, 2] + rois[:, 4]) * 0.5
+ w = rois[:, 3] - rois[:, 1] + 1
+ h = rois[:, 4] - rois[:, 2] + 1
+ new_w = w * scale_factor
+ new_h = h * scale_factor
+ x1 = cx - new_w * 0.5 + 0.5
+ x2 = cx + new_w * 0.5 - 0.5
+ y1 = cy - new_h * 0.5 + 0.5
+ y2 = cy + new_h * 0.5 - 0.5
+ new_rois = torch.stack((rois[:, 0], x1, y1, x2, y2), dim=-1)
+ return new_rois
+
+ @force_fp32(apply_to=('feats', ), out_fp16=True)
+ def forward(self, feats, rois, roi_scale_factor=None):
+ if len(feats) == 1:
+ return self.roi_layers[0](feats[0], rois)
+
+ out_size = self.roi_layers[0].out_size
+ num_levels = len(feats)
+ target_lvls = self.map_roi_levels(rois, num_levels)
+ roi_feats = feats[0].new_zeros(
+ rois.size(0), self.out_channels, *out_size)
+ if roi_scale_factor is not None:
+ rois = self.roi_rescale(rois, roi_scale_factor)
+ for i in range(num_levels):
+ inds = target_lvls == i
+ if inds.any():
+ rois_ = rois[inds, :]
+ roi_feats_t = self.roi_layers[i](feats[i], rois_)
+ roi_feats[inds] = roi_feats_t
+ return roi_feats
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/shared_heads/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/shared_heads/__init__.py
new file mode 100644
index 000000000..bbe70145b
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/shared_heads/__init__.py
@@ -0,0 +1,3 @@
+from .res_layer import ResLayer
+
+__all__ = ['ResLayer']
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/shared_heads/res_layer.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/shared_heads/res_layer.py
new file mode 100644
index 000000000..e1a1ba0d7
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/shared_heads/res_layer.py
@@ -0,0 +1,71 @@
+import torch.nn as nn
+from mmcv.cnn import constant_init, kaiming_init
+from mmcv.runner import load_checkpoint
+
+from mmdet.core import auto_fp16
+from mmdet.utils import get_root_logger
+from ..backbones import ResNet, make_res_layer
+from ..registry import SHARED_HEADS
+
+
+@SHARED_HEADS.register_module
+class ResLayer(nn.Module):
+
+ def __init__(self,
+ depth,
+ stage=3,
+ stride=2,
+ dilation=1,
+ style='pytorch',
+ norm_cfg=dict(type='BN', requires_grad=True),
+ norm_eval=True,
+ with_cp=False,
+ dcn=None):
+ super(ResLayer, self).__init__()
+ self.norm_eval = norm_eval
+ self.norm_cfg = norm_cfg
+ self.stage = stage
+ self.fp16_enabled = False
+ block, stage_blocks = ResNet.arch_settings[depth]
+ stage_block = stage_blocks[stage]
+ planes = 64 * 2**stage
+ inplanes = 64 * 2**(stage - 1) * block.expansion
+
+ res_layer = make_res_layer(
+ block,
+ inplanes,
+ planes,
+ stage_block,
+ stride=stride,
+ dilation=dilation,
+ style=style,
+ with_cp=with_cp,
+ norm_cfg=self.norm_cfg,
+ dcn=dcn)
+ self.add_module('layer{}'.format(stage + 1), res_layer)
+
+ def init_weights(self, pretrained=None):
+ if isinstance(pretrained, str):
+ logger = get_root_logger()
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ kaiming_init(m)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+ else:
+ raise TypeError('pretrained must be a str or None')
+
+ @auto_fp16()
+ def forward(self, x):
+ res_layer = getattr(self, 'layer{}'.format(self.stage + 1))
+ out = res_layer(x)
+ return out
+
+ def train(self, mode=True):
+ super(ResLayer, self).train(mode)
+ if self.norm_eval:
+ for m in self.modules():
+ if isinstance(m, nn.BatchNorm2d):
+ m.eval()
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/__init__.py
new file mode 100644
index 000000000..3db40920d
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/__init__.py
@@ -0,0 +1,12 @@
+from .conv_module import ConvModule, build_conv_layer
+from .conv_ws import ConvWS2d, conv_ws_2d
+from .norm import build_norm_layer
+from .scale import Scale
+from .weight_init import (bias_init_with_prob, kaiming_init, normal_init,
+ uniform_init, xavier_init)
+
+__all__ = [
+ 'conv_ws_2d', 'ConvWS2d', 'build_conv_layer', 'ConvModule',
+ 'build_norm_layer', 'xavier_init', 'normal_init', 'uniform_init',
+ 'kaiming_init', 'bias_init_with_prob', 'Scale'
+]
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/conv_module.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/conv_module.py
new file mode 100644
index 000000000..3be32c3a4
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/conv_module.py
@@ -0,0 +1,167 @@
+import warnings
+
+import torch.nn as nn
+from mmcv.cnn import constant_init, kaiming_init
+
+from mmdet.ops import DeformConvPack, ModulatedDeformConvPack
+from .conv_ws import ConvWS2d
+from .norm import build_norm_layer
+
+conv_cfg = {
+ 'Conv': nn.Conv2d,
+ 'ConvWS': ConvWS2d,
+ 'DCN': DeformConvPack,
+ 'DCNv2': ModulatedDeformConvPack,
+ # TODO: octave conv
+}
+
+
+def build_conv_layer(cfg, *args, **kwargs):
+ """ Build convolution layer
+
+ Args:
+ cfg (None or dict): cfg should contain:
+ type (str): identify conv layer type.
+ layer args: args needed to instantiate a conv layer.
+
+ Returns:
+ layer (nn.Module): created conv layer
+ """
+ if cfg is None:
+ cfg_ = dict(type='Conv')
+ else:
+ assert isinstance(cfg, dict) and 'type' in cfg
+ cfg_ = cfg.copy()
+
+ layer_type = cfg_.pop('type')
+ if layer_type not in conv_cfg:
+ raise KeyError('Unrecognized norm type {}'.format(layer_type))
+ else:
+ conv_layer = conv_cfg[layer_type]
+
+ layer = conv_layer(*args, **kwargs, **cfg_)
+
+ return layer
+
+
+class ConvModule(nn.Module):
+ """A conv block that contains conv/norm/activation layers.
+
+ Args:
+ in_channels (int): Same as nn.Conv2d.
+ out_channels (int): Same as nn.Conv2d.
+ kernel_size (int or tuple[int]): Same as nn.Conv2d.
+ stride (int or tuple[int]): Same as nn.Conv2d.
+ padding (int or tuple[int]): Same as nn.Conv2d.
+ dilation (int or tuple[int]): Same as nn.Conv2d.
+ groups (int): Same as nn.Conv2d.
+ bias (bool or str): If specified as `auto`, it will be decided by the
+ norm_cfg. Bias will be set as True if norm_cfg is None, otherwise
+ False.
+ conv_cfg (dict): Config dict for convolution layer.
+ norm_cfg (dict): Config dict for normalization layer.
+ activation (str or None): Activation type, "ReLU" by default.
+ inplace (bool): Whether to use inplace mode for activation.
+ order (tuple[str]): The order of conv/norm/activation layers. It is a
+ sequence of "conv", "norm" and "act". Examples are
+ ("conv", "norm", "act") and ("act", "conv", "norm").
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=1,
+ padding=0,
+ dilation=1,
+ groups=1,
+ bias='auto',
+ conv_cfg=None,
+ norm_cfg=None,
+ activation='relu',
+ inplace=True,
+ order=('conv', 'norm', 'act')):
+ super(ConvModule, self).__init__()
+ assert conv_cfg is None or isinstance(conv_cfg, dict)
+ assert norm_cfg is None or isinstance(norm_cfg, dict)
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.activation = activation
+ self.inplace = inplace
+ self.order = order
+ assert isinstance(self.order, tuple) and len(self.order) == 3
+ assert set(order) == set(['conv', 'norm', 'act'])
+
+ self.with_norm = norm_cfg is not None
+ self.with_activation = activation is not None
+ # if the conv layer is before a norm layer, bias is unnecessary.
+ if bias == 'auto':
+ bias = False if self.with_norm else True
+ self.with_bias = bias
+
+ if self.with_norm and self.with_bias:
+ warnings.warn('ConvModule has norm and bias at the same time')
+
+ # build convolution layer
+ self.conv = build_conv_layer(
+ conv_cfg,
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=stride,
+ padding=padding,
+ dilation=dilation,
+ groups=groups,
+ bias=bias)
+ # export the attributes of self.conv to a higher level for convenience
+ self.in_channels = self.conv.in_channels
+ self.out_channels = self.conv.out_channels
+ self.kernel_size = self.conv.kernel_size
+ self.stride = self.conv.stride
+ self.padding = self.conv.padding
+ self.dilation = self.conv.dilation
+ self.transposed = self.conv.transposed
+ self.output_padding = self.conv.output_padding
+ self.groups = self.conv.groups
+
+ # build normalization layers
+ if self.with_norm:
+ # norm layer is after conv layer
+ if order.index('norm') > order.index('conv'):
+ norm_channels = out_channels
+ else:
+ norm_channels = in_channels
+ self.norm_name, norm = build_norm_layer(norm_cfg, norm_channels)
+ self.add_module(self.norm_name, norm)
+
+ # build activation layer
+ if self.with_activation:
+ # TODO: introduce `act_cfg` and supports more activation layers
+ if self.activation not in ['relu']:
+ raise ValueError('{} is currently not supported.'.format(
+ self.activation))
+ if self.activation == 'relu':
+ self.activate = nn.ReLU(inplace=inplace)
+
+ # Use msra init by default
+ self.init_weights()
+
+ @property
+ def norm(self):
+ return getattr(self, self.norm_name)
+
+ def init_weights(self):
+ nonlinearity = 'relu' if self.activation is None else self.activation
+ kaiming_init(self.conv, nonlinearity=nonlinearity)
+ if self.with_norm:
+ constant_init(self.norm, 1, bias=0)
+
+ def forward(self, x, activate=True, norm=True):
+ for layer in self.order:
+ if layer == 'conv':
+ x = self.conv(x)
+ elif layer == 'norm' and norm and self.with_norm:
+ x = self.norm(x)
+ elif layer == 'act' and activate and self.with_activation:
+ x = self.activate(x)
+ return x
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/conv_ws.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/conv_ws.py
new file mode 100644
index 000000000..5ccd735fd
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/conv_ws.py
@@ -0,0 +1,46 @@
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+def conv_ws_2d(input,
+ weight,
+ bias=None,
+ stride=1,
+ padding=0,
+ dilation=1,
+ groups=1,
+ eps=1e-5):
+ c_in = weight.size(0)
+ weight_flat = weight.view(c_in, -1)
+ mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1)
+ std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1)
+ weight = (weight - mean) / (std + eps)
+ return F.conv2d(input, weight, bias, stride, padding, dilation, groups)
+
+
+class ConvWS2d(nn.Conv2d):
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=1,
+ padding=0,
+ dilation=1,
+ groups=1,
+ bias=True,
+ eps=1e-5):
+ super(ConvWS2d, self).__init__(
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=stride,
+ padding=padding,
+ dilation=dilation,
+ groups=groups,
+ bias=bias)
+ self.eps = eps
+
+ def forward(self, x):
+ return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding,
+ self.dilation, self.groups, self.eps)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/norm.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/norm.py
new file mode 100644
index 000000000..d5687cbd9
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/norm.py
@@ -0,0 +1,55 @@
+import torch.nn as nn
+
+norm_cfg = {
+ # format: layer_type: (abbreviation, module)
+ 'BN': ('bn', nn.BatchNorm2d),
+ 'SyncBN': ('bn', nn.SyncBatchNorm),
+ 'GN': ('gn', nn.GroupNorm),
+ # and potentially 'SN'
+}
+
+
+def build_norm_layer(cfg, num_features, postfix=''):
+ """ Build normalization layer
+
+ Args:
+ cfg (dict): cfg should contain:
+ type (str): identify norm layer type.
+ layer args: args needed to instantiate a norm layer.
+ requires_grad (bool): [optional] whether stop gradient updates
+ num_features (int): number of channels from input.
+ postfix (int, str): appended into norm abbreviation to
+ create named layer.
+
+ Returns:
+ name (str): abbreviation + postfix
+ layer (nn.Module): created norm layer
+ """
+ assert isinstance(cfg, dict) and 'type' in cfg
+ cfg_ = cfg.copy()
+
+ layer_type = cfg_.pop('type')
+ if layer_type not in norm_cfg:
+ raise KeyError('Unrecognized norm type {}'.format(layer_type))
+ else:
+ abbr, norm_layer = norm_cfg[layer_type]
+ if norm_layer is None:
+ raise NotImplementedError
+
+ assert isinstance(postfix, (int, str))
+ name = abbr + str(postfix)
+
+ requires_grad = cfg_.pop('requires_grad', True)
+ cfg_.setdefault('eps', 1e-5)
+ if layer_type != 'GN':
+ layer = norm_layer(num_features, **cfg_)
+ if layer_type == 'SyncBN':
+ layer._specify_ddp_gpu_num(1)
+ else:
+ assert 'num_groups' in cfg_
+ layer = norm_layer(num_channels=num_features, **cfg_)
+
+ for param in layer.parameters():
+ param.requires_grad = requires_grad
+
+ return name, layer
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/scale.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/scale.py
new file mode 100644
index 000000000..2461af8a6
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/scale.py
@@ -0,0 +1,15 @@
+import torch
+import torch.nn as nn
+
+
+class Scale(nn.Module):
+ """
+ A learnable scale parameter
+ """
+
+ def __init__(self, scale=1.0):
+ super(Scale, self).__init__()
+ self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float))
+
+ def forward(self, x):
+ return x * self.scale
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/weight_init.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/weight_init.py
new file mode 100644
index 000000000..17d49880f
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/weight_init.py
@@ -0,0 +1,46 @@
+import numpy as np
+import torch.nn as nn
+
+
+def xavier_init(module, gain=1, bias=0, distribution='normal'):
+ assert distribution in ['uniform', 'normal']
+ if distribution == 'uniform':
+ nn.init.xavier_uniform_(module.weight, gain=gain)
+ else:
+ nn.init.xavier_normal_(module.weight, gain=gain)
+ if hasattr(module, 'bias'):
+ nn.init.constant_(module.bias, bias)
+
+
+def normal_init(module, mean=0, std=1, bias=0):
+ nn.init.normal_(module.weight, mean, std)
+ if hasattr(module, 'bias'):
+ nn.init.constant_(module.bias, bias)
+
+
+def uniform_init(module, a=0, b=1, bias=0):
+ nn.init.uniform_(module.weight, a, b)
+ if hasattr(module, 'bias'):
+ nn.init.constant_(module.bias, bias)
+
+
+def kaiming_init(module,
+ mode='fan_out',
+ nonlinearity='relu',
+ bias=0,
+ distribution='normal'):
+ assert distribution in ['uniform', 'normal']
+ if distribution == 'uniform':
+ nn.init.kaiming_uniform_(
+ module.weight, mode=mode, nonlinearity=nonlinearity)
+ else:
+ nn.init.kaiming_normal_(
+ module.weight, mode=mode, nonlinearity=nonlinearity)
+ if hasattr(module, 'bias'):
+ nn.init.constant_(module.bias, bias)
+
+
+def bias_init_with_prob(prior_prob):
+ """ initialize conv/fc bias value according to giving probablity"""
+ bias_init = float(-np.log((1 - prior_prob) / prior_prob))
+ return bias_init
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/__init__.py
new file mode 100644
index 000000000..5c6a1f37c
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/__init__.py
@@ -0,0 +1,21 @@
+from .context_block import ContextBlock
+from .dcn import (DeformConv, DeformConvPack, DeformRoIPooling,
+ DeformRoIPoolingPack, ModulatedDeformConv,
+ ModulatedDeformConvPack, ModulatedDeformRoIPoolingPack,
+ deform_conv, deform_roi_pooling, modulated_deform_conv)
+from .masked_conv import MaskedConv2d
+from .nms import nms, soft_nms
+from .roi_align import RoIAlign, roi_align
+from .roi_pool import RoIPool, roi_pool
+from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss
+from .utils import get_compiler_version, get_compiling_cuda_version
+
+__all__ = [
+ 'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool',
+ 'DeformConv', 'DeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack',
+ 'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv',
+ 'ModulatedDeformConvPack', 'deform_conv', 'modulated_deform_conv',
+ 'deform_roi_pooling', 'SigmoidFocalLoss', 'sigmoid_focal_loss',
+ 'MaskedConv2d', 'ContextBlock', 'get_compiler_version',
+ 'get_compiling_cuda_version'
+]
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/context_block.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/context_block.py
new file mode 100644
index 000000000..be9092c48
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/context_block.py
@@ -0,0 +1,104 @@
+import torch
+from mmcv.cnn import constant_init, kaiming_init
+from torch import nn
+
+
+def last_zero_init(m):
+ if isinstance(m, nn.Sequential):
+ constant_init(m[-1], val=0)
+ else:
+ constant_init(m, val=0)
+
+
+class ContextBlock(nn.Module):
+
+ def __init__(self,
+ inplanes,
+ ratio,
+ pooling_type='att',
+ fusion_types=('channel_add', )):
+ super(ContextBlock, self).__init__()
+ assert pooling_type in ['avg', 'att']
+ assert isinstance(fusion_types, (list, tuple))
+ valid_fusion_types = ['channel_add', 'channel_mul']
+ assert all([f in valid_fusion_types for f in fusion_types])
+ assert len(fusion_types) > 0, 'at least one fusion should be used'
+ self.inplanes = inplanes
+ self.ratio = ratio
+ self.planes = int(inplanes * ratio)
+ self.pooling_type = pooling_type
+ self.fusion_types = fusion_types
+ if pooling_type == 'att':
+ self.conv_mask = nn.Conv2d(inplanes, 1, kernel_size=1)
+ self.softmax = nn.Softmax(dim=2)
+ else:
+ self.avg_pool = nn.AdaptiveAvgPool2d(1)
+ if 'channel_add' in fusion_types:
+ self.channel_add_conv = nn.Sequential(
+ nn.Conv2d(self.inplanes, self.planes, kernel_size=1),
+ nn.LayerNorm([self.planes, 1, 1]),
+ nn.ReLU(inplace=True), # yapf: disable
+ nn.Conv2d(self.planes, self.inplanes, kernel_size=1))
+ else:
+ self.channel_add_conv = None
+ if 'channel_mul' in fusion_types:
+ self.channel_mul_conv = nn.Sequential(
+ nn.Conv2d(self.inplanes, self.planes, kernel_size=1),
+ nn.LayerNorm([self.planes, 1, 1]),
+ nn.ReLU(inplace=True), # yapf: disable
+ nn.Conv2d(self.planes, self.inplanes, kernel_size=1))
+ else:
+ self.channel_mul_conv = None
+ self.reset_parameters()
+
+ def reset_parameters(self):
+ if self.pooling_type == 'att':
+ kaiming_init(self.conv_mask, mode='fan_in')
+ self.conv_mask.inited = True
+
+ if self.channel_add_conv is not None:
+ last_zero_init(self.channel_add_conv)
+ if self.channel_mul_conv is not None:
+ last_zero_init(self.channel_mul_conv)
+
+ def spatial_pool(self, x):
+ batch, channel, height, width = x.size()
+ if self.pooling_type == 'att':
+ input_x = x
+ # [N, C, H * W]
+ input_x = input_x.view(batch, channel, height * width)
+ # [N, 1, C, H * W]
+ input_x = input_x.unsqueeze(1)
+ # [N, 1, H, W]
+ context_mask = self.conv_mask(x)
+ # [N, 1, H * W]
+ context_mask = context_mask.view(batch, 1, height * width)
+ # [N, 1, H * W]
+ context_mask = self.softmax(context_mask)
+ # [N, 1, H * W, 1]
+ context_mask = context_mask.unsqueeze(-1)
+ # [N, 1, C, 1]
+ context = torch.matmul(input_x, context_mask)
+ # [N, C, 1, 1]
+ context = context.view(batch, channel, 1, 1)
+ else:
+ # [N, C, 1, 1]
+ context = self.avg_pool(x)
+
+ return context
+
+ def forward(self, x):
+ # [N, C, 1, 1]
+ context = self.spatial_pool(x)
+
+ out = x
+ if self.channel_mul_conv is not None:
+ # [N, C, 1, 1]
+ channel_mul_term = torch.sigmoid(self.channel_mul_conv(context))
+ out = out * channel_mul_term
+ if self.channel_add_conv is not None:
+ # [N, C, 1, 1]
+ channel_add_term = self.channel_add_conv(context)
+ out = out + channel_add_term
+
+ return out
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/__init__.py
new file mode 100644
index 000000000..79594c90b
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/__init__.py
@@ -0,0 +1,12 @@
+from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv,
+ ModulatedDeformConvPack, deform_conv,
+ modulated_deform_conv)
+from .deform_pool import (DeformRoIPooling, DeformRoIPoolingPack,
+ ModulatedDeformRoIPoolingPack, deform_roi_pooling)
+
+__all__ = [
+ 'DeformConv', 'DeformConvPack', 'ModulatedDeformConv',
+ 'ModulatedDeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack',
+ 'ModulatedDeformRoIPoolingPack', 'deform_conv', 'modulated_deform_conv',
+ 'deform_roi_pooling'
+]
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/deform_conv.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/deform_conv.py
new file mode 100644
index 000000000..5ba5a5e8f
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/deform_conv.py
@@ -0,0 +1,431 @@
+import math
+
+import torch
+import torch.nn as nn
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+from torch.nn.modules.utils import _pair, _single
+
+from mmdet.utils import print_log
+from . import deform_conv_cuda
+
+
+class DeformConvFunction(Function):
+
+ @staticmethod
+ def forward(ctx,
+ input,
+ offset,
+ weight,
+ stride=1,
+ padding=0,
+ dilation=1,
+ groups=1,
+ deformable_groups=1,
+ im2col_step=64):
+ if input is not None and input.dim() != 4:
+ raise ValueError(
+ 'Expected 4D tensor as input, got {}D tensor instead.'.format(
+ input.dim()))
+ ctx.stride = _pair(stride)
+ ctx.padding = _pair(padding)
+ ctx.dilation = _pair(dilation)
+ ctx.groups = groups
+ ctx.deformable_groups = deformable_groups
+ ctx.im2col_step = im2col_step
+
+ ctx.save_for_backward(input, offset, weight)
+
+ output = input.new_empty(
+ DeformConvFunction._output_size(input, weight, ctx.padding,
+ ctx.dilation, ctx.stride))
+
+ ctx.bufs_ = [input.new_empty(0), input.new_empty(0)] # columns, ones
+
+ if not input.is_cuda:
+ raise NotImplementedError
+ else:
+ cur_im2col_step = min(ctx.im2col_step, input.shape[0])
+ assert (input.shape[0] %
+ cur_im2col_step) == 0, 'im2col step must divide batchsize'
+ deform_conv_cuda.deform_conv_forward_cuda(
+ input, weight, offset, output, ctx.bufs_[0], ctx.bufs_[1],
+ weight.size(3), weight.size(2), ctx.stride[1], ctx.stride[0],
+ ctx.padding[1], ctx.padding[0], ctx.dilation[1],
+ ctx.dilation[0], ctx.groups, ctx.deformable_groups,
+ cur_im2col_step)
+ return output
+
+ @staticmethod
+ @once_differentiable
+ def backward(ctx, grad_output):
+ input, offset, weight = ctx.saved_tensors
+
+ grad_input = grad_offset = grad_weight = None
+
+ if not grad_output.is_cuda:
+ raise NotImplementedError
+ else:
+ cur_im2col_step = min(ctx.im2col_step, input.shape[0])
+ assert (input.shape[0] %
+ cur_im2col_step) == 0, 'im2col step must divide batchsize'
+
+ if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]:
+ grad_input = torch.zeros_like(input)
+ grad_offset = torch.zeros_like(offset)
+ deform_conv_cuda.deform_conv_backward_input_cuda(
+ input, offset, grad_output, grad_input,
+ grad_offset, weight, ctx.bufs_[0], weight.size(3),
+ weight.size(2), ctx.stride[1], ctx.stride[0],
+ ctx.padding[1], ctx.padding[0], ctx.dilation[1],
+ ctx.dilation[0], ctx.groups, ctx.deformable_groups,
+ cur_im2col_step)
+
+ if ctx.needs_input_grad[2]:
+ grad_weight = torch.zeros_like(weight)
+ deform_conv_cuda.deform_conv_backward_parameters_cuda(
+ input, offset, grad_output,
+ grad_weight, ctx.bufs_[0], ctx.bufs_[1], weight.size(3),
+ weight.size(2), ctx.stride[1], ctx.stride[0],
+ ctx.padding[1], ctx.padding[0], ctx.dilation[1],
+ ctx.dilation[0], ctx.groups, ctx.deformable_groups, 1,
+ cur_im2col_step)
+
+ return (grad_input, grad_offset, grad_weight, None, None, None, None,
+ None)
+
+ @staticmethod
+ def _output_size(input, weight, padding, dilation, stride):
+ channels = weight.size(0)
+ output_size = (input.size(0), channels)
+ for d in range(input.dim() - 2):
+ in_size = input.size(d + 2)
+ pad = padding[d]
+ kernel = dilation[d] * (weight.size(d + 2) - 1) + 1
+ stride_ = stride[d]
+ output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, )
+ if not all(map(lambda s: s > 0, output_size)):
+ raise ValueError(
+ 'convolution input is too small (output would be {})'.format(
+ 'x'.join(map(str, output_size))))
+ return output_size
+
+
+class ModulatedDeformConvFunction(Function):
+
+ @staticmethod
+ def forward(ctx,
+ input,
+ offset,
+ mask,
+ weight,
+ bias=None,
+ stride=1,
+ padding=0,
+ dilation=1,
+ groups=1,
+ deformable_groups=1):
+ ctx.stride = stride
+ ctx.padding = padding
+ ctx.dilation = dilation
+ ctx.groups = groups
+ ctx.deformable_groups = deformable_groups
+ ctx.with_bias = bias is not None
+ if not ctx.with_bias:
+ bias = input.new_empty(1) # fake tensor
+ if not input.is_cuda:
+ raise NotImplementedError
+ if weight.requires_grad or mask.requires_grad or offset.requires_grad \
+ or input.requires_grad:
+ ctx.save_for_backward(input, offset, mask, weight, bias)
+ output = input.new_empty(
+ ModulatedDeformConvFunction._infer_shape(ctx, input, weight))
+ ctx._bufs = [input.new_empty(0), input.new_empty(0)]
+ deform_conv_cuda.modulated_deform_conv_cuda_forward(
+ input, weight, bias, ctx._bufs[0], offset, mask, output,
+ ctx._bufs[1], weight.shape[2], weight.shape[3], ctx.stride,
+ ctx.stride, ctx.padding, ctx.padding, ctx.dilation, ctx.dilation,
+ ctx.groups, ctx.deformable_groups, ctx.with_bias)
+ return output
+
+ @staticmethod
+ @once_differentiable
+ def backward(ctx, grad_output):
+ if not grad_output.is_cuda:
+ raise NotImplementedError
+ input, offset, mask, weight, bias = ctx.saved_tensors
+ grad_input = torch.zeros_like(input)
+ grad_offset = torch.zeros_like(offset)
+ grad_mask = torch.zeros_like(mask)
+ grad_weight = torch.zeros_like(weight)
+ grad_bias = torch.zeros_like(bias)
+ deform_conv_cuda.modulated_deform_conv_cuda_backward(
+ input, weight, bias, ctx._bufs[0], offset, mask, ctx._bufs[1],
+ grad_input, grad_weight, grad_bias, grad_offset, grad_mask,
+ grad_output, weight.shape[2], weight.shape[3], ctx.stride,
+ ctx.stride, ctx.padding, ctx.padding, ctx.dilation, ctx.dilation,
+ ctx.groups, ctx.deformable_groups, ctx.with_bias)
+ if not ctx.with_bias:
+ grad_bias = None
+
+ return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias,
+ None, None, None, None, None)
+
+ @staticmethod
+ def _infer_shape(ctx, input, weight):
+ n = input.size(0)
+ channels_out = weight.size(0)
+ height, width = input.shape[2:4]
+ kernel_h, kernel_w = weight.shape[2:4]
+ height_out = (height + 2 * ctx.padding -
+ (ctx.dilation * (kernel_h - 1) + 1)) // ctx.stride + 1
+ width_out = (width + 2 * ctx.padding -
+ (ctx.dilation * (kernel_w - 1) + 1)) // ctx.stride + 1
+ return n, channels_out, height_out, width_out
+
+
+deform_conv = DeformConvFunction.apply
+modulated_deform_conv = ModulatedDeformConvFunction.apply
+
+
+class DeformConv(nn.Module):
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=1,
+ padding=0,
+ dilation=1,
+ groups=1,
+ deformable_groups=1,
+ bias=False):
+ super(DeformConv, self).__init__()
+
+ assert not bias
+ assert in_channels % groups == 0, \
+ 'in_channels {} cannot be divisible by groups {}'.format(
+ in_channels, groups)
+ assert out_channels % groups == 0, \
+ 'out_channels {} cannot be divisible by groups {}'.format(
+ out_channels, groups)
+
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.kernel_size = _pair(kernel_size)
+ self.stride = _pair(stride)
+ self.padding = _pair(padding)
+ self.dilation = _pair(dilation)
+ self.groups = groups
+ self.deformable_groups = deformable_groups
+ # enable compatibility with nn.Conv2d
+ self.transposed = False
+ self.output_padding = _single(0)
+
+ self.weight = nn.Parameter(
+ torch.Tensor(out_channels, in_channels // self.groups,
+ *self.kernel_size))
+
+ self.reset_parameters()
+
+ def reset_parameters(self):
+ n = self.in_channels
+ for k in self.kernel_size:
+ n *= k
+ stdv = 1. / math.sqrt(n)
+ self.weight.data.uniform_(-stdv, stdv)
+
+ def forward(self, x, offset):
+ return deform_conv(x, offset, self.weight, self.stride, self.padding,
+ self.dilation, self.groups, self.deformable_groups)
+
+
+class DeformConvPack(DeformConv):
+ """A Deformable Conv Encapsulation that acts as normal Conv layers.
+
+ Args:
+ in_channels (int): Same as nn.Conv2d.
+ out_channels (int): Same as nn.Conv2d.
+ kernel_size (int or tuple[int]): Same as nn.Conv2d.
+ stride (int or tuple[int]): Same as nn.Conv2d.
+ padding (int or tuple[int]): Same as nn.Conv2d.
+ dilation (int or tuple[int]): Same as nn.Conv2d.
+ groups (int): Same as nn.Conv2d.
+ bias (bool or str): If specified as `auto`, it will be decided by the
+ norm_cfg. Bias will be set as True if norm_cfg is None, otherwise
+ False.
+ """
+
+ _version = 2
+
+ def __init__(self, *args, **kwargs):
+ super(DeformConvPack, self).__init__(*args, **kwargs)
+
+ self.conv_offset = nn.Conv2d(
+ self.in_channels,
+ self.deformable_groups * 2 * self.kernel_size[0] *
+ self.kernel_size[1],
+ kernel_size=self.kernel_size,
+ stride=_pair(self.stride),
+ padding=_pair(self.padding),
+ bias=True)
+ self.init_offset()
+
+ def init_offset(self):
+ self.conv_offset.weight.data.zero_()
+ self.conv_offset.bias.data.zero_()
+
+ def forward(self, x):
+ offset = self.conv_offset(x)
+ return deform_conv(x, offset, self.weight, self.stride, self.padding,
+ self.dilation, self.groups, self.deformable_groups)
+
+ def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
+ missing_keys, unexpected_keys, error_msgs):
+ version = local_metadata.get('version', None)
+
+ if version is None or version < 2:
+ # the key is different in early versions
+ # In version < 2, DeformConvPack loads previous benchmark models.
+ if (prefix + 'conv_offset.weight' not in state_dict
+ and prefix[:-1] + '_offset.weight' in state_dict):
+ state_dict[prefix + 'conv_offset.weight'] = state_dict.pop(
+ prefix[:-1] + '_offset.weight')
+ if (prefix + 'conv_offset.bias' not in state_dict
+ and prefix[:-1] + '_offset.bias' in state_dict):
+ state_dict[prefix +
+ 'conv_offset.bias'] = state_dict.pop(prefix[:-1] +
+ '_offset.bias')
+
+ if version is not None and version > 1:
+ print_log(
+ 'DeformConvPack {} is upgraded to version 2.'.format(
+ prefix.rstrip('.')),
+ logger='root')
+
+ super()._load_from_state_dict(state_dict, prefix, local_metadata,
+ strict, missing_keys, unexpected_keys,
+ error_msgs)
+
+
+class ModulatedDeformConv(nn.Module):
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=1,
+ padding=0,
+ dilation=1,
+ groups=1,
+ deformable_groups=1,
+ bias=True):
+ super(ModulatedDeformConv, self).__init__()
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.kernel_size = _pair(kernel_size)
+ self.stride = stride
+ self.padding = padding
+ self.dilation = dilation
+ self.groups = groups
+ self.deformable_groups = deformable_groups
+ self.with_bias = bias
+ # enable compatibility with nn.Conv2d
+ self.transposed = False
+ self.output_padding = _single(0)
+
+ self.weight = nn.Parameter(
+ torch.Tensor(out_channels, in_channels // groups,
+ *self.kernel_size))
+ if bias:
+ self.bias = nn.Parameter(torch.Tensor(out_channels))
+ else:
+ self.register_parameter('bias', None)
+ self.reset_parameters()
+
+ def reset_parameters(self):
+ n = self.in_channels
+ for k in self.kernel_size:
+ n *= k
+ stdv = 1. / math.sqrt(n)
+ self.weight.data.uniform_(-stdv, stdv)
+ if self.bias is not None:
+ self.bias.data.zero_()
+
+ def forward(self, x, offset, mask):
+ return modulated_deform_conv(x, offset, mask, self.weight, self.bias,
+ self.stride, self.padding, self.dilation,
+ self.groups, self.deformable_groups)
+
+
+class ModulatedDeformConvPack(ModulatedDeformConv):
+ """A ModulatedDeformable Conv Encapsulation that acts as normal Conv layers.
+
+ Args:
+ in_channels (int): Same as nn.Conv2d.
+ out_channels (int): Same as nn.Conv2d.
+ kernel_size (int or tuple[int]): Same as nn.Conv2d.
+ stride (int or tuple[int]): Same as nn.Conv2d.
+ padding (int or tuple[int]): Same as nn.Conv2d.
+ dilation (int or tuple[int]): Same as nn.Conv2d.
+ groups (int): Same as nn.Conv2d.
+ bias (bool or str): If specified as `auto`, it will be decided by the
+ norm_cfg. Bias will be set as True if norm_cfg is None, otherwise
+ False.
+ """
+
+ _version = 2
+
+ def __init__(self, *args, **kwargs):
+ super(ModulatedDeformConvPack, self).__init__(*args, **kwargs)
+
+ self.conv_offset = nn.Conv2d(
+ self.in_channels,
+ self.deformable_groups * 3 * self.kernel_size[0] *
+ self.kernel_size[1],
+ kernel_size=self.kernel_size,
+ stride=_pair(self.stride),
+ padding=_pair(self.padding),
+ bias=True)
+ self.init_offset()
+
+ def init_offset(self):
+ self.conv_offset.weight.data.zero_()
+ self.conv_offset.bias.data.zero_()
+
+ def forward(self, x):
+ out = self.conv_offset(x)
+ o1, o2, mask = torch.chunk(out, 3, dim=1)
+ offset = torch.cat((o1, o2), dim=1)
+ mask = torch.sigmoid(mask)
+ return modulated_deform_conv(x, offset, mask, self.weight, self.bias,
+ self.stride, self.padding, self.dilation,
+ self.groups, self.deformable_groups)
+
+ def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
+ missing_keys, unexpected_keys, error_msgs):
+ version = local_metadata.get('version', None)
+
+ if version is None or version < 2:
+ # the key is different in early versions
+ # In version < 2, ModulatedDeformConvPack
+ # loads previous benchmark models.
+ if (prefix + 'conv_offset.weight' not in state_dict
+ and prefix[:-1] + '_offset.weight' in state_dict):
+ state_dict[prefix + 'conv_offset.weight'] = state_dict.pop(
+ prefix[:-1] + '_offset.weight')
+ if (prefix + 'conv_offset.bias' not in state_dict
+ and prefix[:-1] + '_offset.bias' in state_dict):
+ state_dict[prefix +
+ 'conv_offset.bias'] = state_dict.pop(prefix[:-1] +
+ '_offset.bias')
+
+ if version is not None and version > 1:
+ print_log(
+ 'ModulatedDeformConvPack {} is upgraded to version 2.'.format(
+ prefix.rstrip('.')),
+ logger='root')
+
+ super()._load_from_state_dict(state_dict, prefix, local_metadata,
+ strict, missing_keys, unexpected_keys,
+ error_msgs)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/deform_pool.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/deform_pool.py
new file mode 100644
index 000000000..99a4a3618
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/deform_pool.py
@@ -0,0 +1,252 @@
+import torch
+import torch.nn as nn
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+from torch.nn.modules.utils import _pair
+
+from . import deform_pool_cuda
+
+
+class DeformRoIPoolingFunction(Function):
+
+ @staticmethod
+ def forward(ctx,
+ data,
+ rois,
+ offset,
+ spatial_scale,
+ out_size,
+ out_channels,
+ no_trans,
+ group_size=1,
+ part_size=None,
+ sample_per_part=4,
+ trans_std=.0):
+ # TODO: support unsquare RoIs
+ out_h, out_w = _pair(out_size)
+ assert isinstance(out_h, int) and isinstance(out_w, int)
+ assert out_h == out_w
+ out_size = out_h # out_h and out_w must be equal
+
+ ctx.spatial_scale = spatial_scale
+ ctx.out_size = out_size
+ ctx.out_channels = out_channels
+ ctx.no_trans = no_trans
+ ctx.group_size = group_size
+ ctx.part_size = out_size if part_size is None else part_size
+ ctx.sample_per_part = sample_per_part
+ ctx.trans_std = trans_std
+
+ assert 0.0 <= ctx.trans_std <= 1.0
+ if not data.is_cuda:
+ raise NotImplementedError
+
+ n = rois.shape[0]
+ output = data.new_empty(n, out_channels, out_size, out_size)
+ output_count = data.new_empty(n, out_channels, out_size, out_size)
+ deform_pool_cuda.deform_psroi_pooling_cuda_forward(
+ data, rois, offset, output, output_count, ctx.no_trans,
+ ctx.spatial_scale, ctx.out_channels, ctx.group_size, ctx.out_size,
+ ctx.part_size, ctx.sample_per_part, ctx.trans_std)
+
+ if data.requires_grad or rois.requires_grad or offset.requires_grad:
+ ctx.save_for_backward(data, rois, offset)
+ ctx.output_count = output_count
+
+ return output
+
+ @staticmethod
+ @once_differentiable
+ def backward(ctx, grad_output):
+ if not grad_output.is_cuda:
+ raise NotImplementedError
+
+ data, rois, offset = ctx.saved_tensors
+ output_count = ctx.output_count
+ grad_input = torch.zeros_like(data)
+ grad_rois = None
+ grad_offset = torch.zeros_like(offset)
+
+ deform_pool_cuda.deform_psroi_pooling_cuda_backward(
+ grad_output, data, rois, offset, output_count, grad_input,
+ grad_offset, ctx.no_trans, ctx.spatial_scale, ctx.out_channels,
+ ctx.group_size, ctx.out_size, ctx.part_size, ctx.sample_per_part,
+ ctx.trans_std)
+ return (grad_input, grad_rois, grad_offset, None, None, None, None,
+ None, None, None, None)
+
+
+deform_roi_pooling = DeformRoIPoolingFunction.apply
+
+
+class DeformRoIPooling(nn.Module):
+
+ def __init__(self,
+ spatial_scale,
+ out_size,
+ out_channels,
+ no_trans,
+ group_size=1,
+ part_size=None,
+ sample_per_part=4,
+ trans_std=.0):
+ super(DeformRoIPooling, self).__init__()
+ self.spatial_scale = spatial_scale
+ self.out_size = _pair(out_size)
+ self.out_channels = out_channels
+ self.no_trans = no_trans
+ self.group_size = group_size
+ self.part_size = out_size if part_size is None else part_size
+ self.sample_per_part = sample_per_part
+ self.trans_std = trans_std
+
+ def forward(self, data, rois, offset):
+ if self.no_trans:
+ offset = data.new_empty(0)
+ return deform_roi_pooling(data, rois, offset, self.spatial_scale,
+ self.out_size, self.out_channels,
+ self.no_trans, self.group_size,
+ self.part_size, self.sample_per_part,
+ self.trans_std)
+
+
+class DeformRoIPoolingPack(DeformRoIPooling):
+
+ def __init__(self,
+ spatial_scale,
+ out_size,
+ out_channels,
+ no_trans,
+ group_size=1,
+ part_size=None,
+ sample_per_part=4,
+ trans_std=.0,
+ num_offset_fcs=3,
+ deform_fc_channels=1024):
+ super(DeformRoIPoolingPack,
+ self).__init__(spatial_scale, out_size, out_channels, no_trans,
+ group_size, part_size, sample_per_part, trans_std)
+
+ self.num_offset_fcs = num_offset_fcs
+ self.deform_fc_channels = deform_fc_channels
+
+ if not no_trans:
+ seq = []
+ ic = self.out_size[0] * self.out_size[1] * self.out_channels
+ for i in range(self.num_offset_fcs):
+ if i < self.num_offset_fcs - 1:
+ oc = self.deform_fc_channels
+ else:
+ oc = self.out_size[0] * self.out_size[1] * 2
+ seq.append(nn.Linear(ic, oc))
+ ic = oc
+ if i < self.num_offset_fcs - 1:
+ seq.append(nn.ReLU(inplace=True))
+ self.offset_fc = nn.Sequential(*seq)
+ self.offset_fc[-1].weight.data.zero_()
+ self.offset_fc[-1].bias.data.zero_()
+
+ def forward(self, data, rois):
+ assert data.size(1) == self.out_channels
+ if self.no_trans:
+ offset = data.new_empty(0)
+ return deform_roi_pooling(data, rois, offset, self.spatial_scale,
+ self.out_size, self.out_channels,
+ self.no_trans, self.group_size,
+ self.part_size, self.sample_per_part,
+ self.trans_std)
+ else:
+ n = rois.shape[0]
+ offset = data.new_empty(0)
+ x = deform_roi_pooling(data, rois, offset, self.spatial_scale,
+ self.out_size, self.out_channels, True,
+ self.group_size, self.part_size,
+ self.sample_per_part, self.trans_std)
+ offset = self.offset_fc(x.view(n, -1))
+ offset = offset.view(n, 2, self.out_size[0], self.out_size[1])
+ return deform_roi_pooling(data, rois, offset, self.spatial_scale,
+ self.out_size, self.out_channels,
+ self.no_trans, self.group_size,
+ self.part_size, self.sample_per_part,
+ self.trans_std)
+
+
+class ModulatedDeformRoIPoolingPack(DeformRoIPooling):
+
+ def __init__(self,
+ spatial_scale,
+ out_size,
+ out_channels,
+ no_trans,
+ group_size=1,
+ part_size=None,
+ sample_per_part=4,
+ trans_std=.0,
+ num_offset_fcs=3,
+ num_mask_fcs=2,
+ deform_fc_channels=1024):
+ super(ModulatedDeformRoIPoolingPack,
+ self).__init__(spatial_scale, out_size, out_channels, no_trans,
+ group_size, part_size, sample_per_part, trans_std)
+
+ self.num_offset_fcs = num_offset_fcs
+ self.num_mask_fcs = num_mask_fcs
+ self.deform_fc_channels = deform_fc_channels
+
+ if not no_trans:
+ offset_fc_seq = []
+ ic = self.out_size[0] * self.out_size[1] * self.out_channels
+ for i in range(self.num_offset_fcs):
+ if i < self.num_offset_fcs - 1:
+ oc = self.deform_fc_channels
+ else:
+ oc = self.out_size[0] * self.out_size[1] * 2
+ offset_fc_seq.append(nn.Linear(ic, oc))
+ ic = oc
+ if i < self.num_offset_fcs - 1:
+ offset_fc_seq.append(nn.ReLU(inplace=True))
+ self.offset_fc = nn.Sequential(*offset_fc_seq)
+ self.offset_fc[-1].weight.data.zero_()
+ self.offset_fc[-1].bias.data.zero_()
+
+ mask_fc_seq = []
+ ic = self.out_size[0] * self.out_size[1] * self.out_channels
+ for i in range(self.num_mask_fcs):
+ if i < self.num_mask_fcs - 1:
+ oc = self.deform_fc_channels
+ else:
+ oc = self.out_size[0] * self.out_size[1]
+ mask_fc_seq.append(nn.Linear(ic, oc))
+ ic = oc
+ if i < self.num_mask_fcs - 1:
+ mask_fc_seq.append(nn.ReLU(inplace=True))
+ else:
+ mask_fc_seq.append(nn.Sigmoid())
+ self.mask_fc = nn.Sequential(*mask_fc_seq)
+ self.mask_fc[-2].weight.data.zero_()
+ self.mask_fc[-2].bias.data.zero_()
+
+ def forward(self, data, rois):
+ assert data.size(1) == self.out_channels
+ if self.no_trans:
+ offset = data.new_empty(0)
+ return deform_roi_pooling(data, rois, offset, self.spatial_scale,
+ self.out_size, self.out_channels,
+ self.no_trans, self.group_size,
+ self.part_size, self.sample_per_part,
+ self.trans_std)
+ else:
+ n = rois.shape[0]
+ offset = data.new_empty(0)
+ x = deform_roi_pooling(data, rois, offset, self.spatial_scale,
+ self.out_size, self.out_channels, True,
+ self.group_size, self.part_size,
+ self.sample_per_part, self.trans_std)
+ offset = self.offset_fc(x.view(n, -1))
+ offset = offset.view(n, 2, self.out_size[0], self.out_size[1])
+ mask = self.mask_fc(x.view(n, -1))
+ mask = mask.view(n, 1, self.out_size[0], self.out_size[1])
+ return deform_roi_pooling(
+ data, rois, offset, self.spatial_scale, self.out_size,
+ self.out_channels, self.no_trans, self.group_size,
+ self.part_size, self.sample_per_part, self.trans_std) * mask
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/src/deform_conv_cuda.cpp b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/src/deform_conv_cuda.cpp
new file mode 100644
index 000000000..ffe740dba
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/src/deform_conv_cuda.cpp
@@ -0,0 +1,701 @@
+// modify from
+// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.c
+
+#include
+#include
+
+#include
+#include
+
+void deformable_im2col(const at::Tensor data_im, const at::Tensor data_offset,
+ const int channels, const int height, const int width,
+ const int ksize_h, const int ksize_w, const int pad_h,
+ const int pad_w, const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
+ const int parallel_imgs, const int deformable_group,
+ at::Tensor data_col);
+
+void deformable_col2im(const at::Tensor data_col, const at::Tensor data_offset,
+ const int channels, const int height, const int width,
+ const int ksize_h, const int ksize_w, const int pad_h,
+ const int pad_w, const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
+ const int parallel_imgs, const int deformable_group,
+ at::Tensor grad_im);
+
+void deformable_col2im_coord(
+ const at::Tensor data_col, const at::Tensor data_im,
+ const at::Tensor data_offset, const int channels, const int height,
+ const int width, const int ksize_h, const int ksize_w, const int pad_h,
+ const int pad_w, const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w, const int parallel_imgs,
+ const int deformable_group, at::Tensor grad_offset);
+
+void modulated_deformable_im2col_cuda(
+ const at::Tensor data_im, const at::Tensor data_offset,
+ const at::Tensor data_mask, const int batch_size, const int channels,
+ const int height_im, const int width_im, const int height_col,
+ const int width_col, const int kernel_h, const int kenerl_w,
+ const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w, const int deformable_group,
+ at::Tensor data_col);
+
+void modulated_deformable_col2im_cuda(
+ const at::Tensor data_col, const at::Tensor data_offset,
+ const at::Tensor data_mask, const int batch_size, const int channels,
+ const int height_im, const int width_im, const int height_col,
+ const int width_col, const int kernel_h, const int kenerl_w,
+ const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w, const int deformable_group,
+ at::Tensor grad_im);
+
+void modulated_deformable_col2im_coord_cuda(
+ const at::Tensor data_col, const at::Tensor data_im,
+ const at::Tensor data_offset, const at::Tensor data_mask,
+ const int batch_size, const int channels, const int height_im,
+ const int width_im, const int height_col, const int width_col,
+ const int kernel_h, const int kenerl_w, const int pad_h, const int pad_w,
+ const int stride_h, const int stride_w, const int dilation_h,
+ const int dilation_w, const int deformable_group, at::Tensor grad_offset,
+ at::Tensor grad_mask);
+
+void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput,
+ at::Tensor weight, int kH, int kW, int dH, int dW, int padH,
+ int padW, int dilationH, int dilationW, int group,
+ int deformable_group) {
+ TORCH_CHECK(weight.ndimension() == 4,
+ "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, "
+ "but got: %s",
+ weight.ndimension());
+
+ TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
+
+ TORCH_CHECK(kW > 0 && kH > 0,
+ "kernel size should be greater than zero, but got kH: %d kW: %d", kH,
+ kW);
+
+ TORCH_CHECK((weight.size(2) == kH && weight.size(3) == kW),
+ "kernel size should be consistent with weight, ",
+ "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", kH,
+ kW, weight.size(2), weight.size(3));
+
+ TORCH_CHECK(dW > 0 && dH > 0,
+ "stride should be greater than zero, but got dH: %d dW: %d", dH, dW);
+
+ TORCH_CHECK(
+ dilationW > 0 && dilationH > 0,
+ "dilation should be greater than 0, but got dilationH: %d dilationW: %d",
+ dilationH, dilationW);
+
+ int ndim = input.ndimension();
+ int dimf = 0;
+ int dimh = 1;
+ int dimw = 2;
+
+ if (ndim == 4) {
+ dimf++;
+ dimh++;
+ dimw++;
+ }
+
+ TORCH_CHECK(ndim == 3 || ndim == 4, "3D or 4D input tensor expected but got: %s",
+ ndim);
+
+ long nInputPlane = weight.size(1) * group;
+ long inputHeight = input.size(dimh);
+ long inputWidth = input.size(dimw);
+ long nOutputPlane = weight.size(0);
+ long outputHeight =
+ (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+ long outputWidth =
+ (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+
+ TORCH_CHECK(nInputPlane % deformable_group == 0,
+ "input channels must divide deformable group size");
+
+ if (outputWidth < 1 || outputHeight < 1)
+ AT_ERROR(
+ "Given input size: (%ld x %ld x %ld). "
+ "Calculated output size: (%ld x %ld x %ld). Output size is too small",
+ nInputPlane, inputHeight, inputWidth, nOutputPlane, outputHeight,
+ outputWidth);
+
+ TORCH_CHECK(input.size(1) == nInputPlane,
+ "invalid number of input planes, expected: %d, but got: %d",
+ nInputPlane, input.size(1));
+
+ TORCH_CHECK((inputHeight >= kH && inputWidth >= kW),
+ "input image is smaller than kernel");
+
+ TORCH_CHECK((offset.size(2) == outputHeight && offset.size(3) == outputWidth),
+ "invalid spatial size of offset, expected height: %d width: %d, but "
+ "got height: %d width: %d",
+ outputHeight, outputWidth, offset.size(2), offset.size(3));
+
+ TORCH_CHECK((offset.size(1) == deformable_group * 2 * kH * kW),
+ "invalid number of channels of offset");
+
+ if (gradOutput != NULL) {
+ TORCH_CHECK(gradOutput->size(dimf) == nOutputPlane,
+ "invalid number of gradOutput planes, expected: %d, but got: %d",
+ nOutputPlane, gradOutput->size(dimf));
+
+ TORCH_CHECK((gradOutput->size(dimh) == outputHeight &&
+ gradOutput->size(dimw) == outputWidth),
+ "invalid size of gradOutput, expected height: %d width: %d , but "
+ "got height: %d width: %d",
+ outputHeight, outputWidth, gradOutput->size(dimh),
+ gradOutput->size(dimw));
+ }
+}
+
+int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
+ at::Tensor offset, at::Tensor output,
+ at::Tensor columns, at::Tensor ones, int kW,
+ int kH, int dW, int dH, int padW, int padH,
+ int dilationW, int dilationH, int group,
+ int deformable_group, int im2col_step) {
+ // todo: resize columns to include im2col: done
+ // todo: add im2col_step as input
+ // todo: add new output buffer and transpose it to output (or directly
+ // transpose output) todo: possibly change data indexing because of
+ // parallel_imgs
+
+ shape_check(input, offset, NULL, weight, kH, kW, dH, dW, padH, padW,
+ dilationH, dilationW, group, deformable_group);
+ at::DeviceGuard guard(input.device());
+
+ input = input.contiguous();
+ offset = offset.contiguous();
+ weight = weight.contiguous();
+
+ int batch = 1;
+ if (input.ndimension() == 3) {
+ // Force batch
+ batch = 0;
+ input.unsqueeze_(0);
+ offset.unsqueeze_(0);
+ }
+
+ // todo: assert batchsize dividable by im2col_step
+
+ long batchSize = input.size(0);
+ long nInputPlane = input.size(1);
+ long inputHeight = input.size(2);
+ long inputWidth = input.size(3);
+
+ long nOutputPlane = weight.size(0);
+
+ long outputWidth =
+ (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+ long outputHeight =
+ (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+ TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");
+
+ output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane,
+ outputHeight, outputWidth});
+ columns = at::zeros(
+ {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
+ input.options());
+
+ if (ones.ndimension() != 2 ||
+ ones.size(0) * ones.size(1) < outputHeight * outputWidth) {
+ ones = at::ones({outputHeight, outputWidth}, input.options());
+ }
+
+ input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,
+ inputHeight, inputWidth});
+ offset =
+ offset.view({batchSize / im2col_step, im2col_step,
+ deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+ at::Tensor output_buffer =
+ at::zeros({batchSize / im2col_step, nOutputPlane,
+ im2col_step * outputHeight, outputWidth},
+ output.options());
+
+ output_buffer = output_buffer.view(
+ {output_buffer.size(0), group, output_buffer.size(1) / group,
+ output_buffer.size(2), output_buffer.size(3)});
+
+ for (int elt = 0; elt < batchSize / im2col_step; elt++) {
+ deformable_im2col(input[elt], offset[elt], nInputPlane, inputHeight,
+ inputWidth, kH, kW, padH, padW, dH, dW, dilationH,
+ dilationW, im2col_step, deformable_group, columns);
+
+ columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+ weight = weight.view({group, weight.size(0) / group, weight.size(1),
+ weight.size(2), weight.size(3)});
+
+ for (int g = 0; g < group; g++) {
+ output_buffer[elt][g] = output_buffer[elt][g]
+ .flatten(1)
+ .addmm_(weight[g].flatten(1), columns[g])
+ .view_as(output_buffer[elt][g]);
+ }
+ }
+
+ output_buffer = output_buffer.view(
+ {output_buffer.size(0), output_buffer.size(1) * output_buffer.size(2),
+ output_buffer.size(3), output_buffer.size(4)});
+
+ output_buffer = output_buffer.view({batchSize / im2col_step, nOutputPlane,
+ im2col_step, outputHeight, outputWidth});
+ output_buffer.transpose_(1, 2);
+ output.copy_(output_buffer);
+ output = output.view({batchSize, nOutputPlane, outputHeight, outputWidth});
+
+ input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
+ offset = offset.view(
+ {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+ if (batch == 0) {
+ output = output.view({nOutputPlane, outputHeight, outputWidth});
+ input = input.view({nInputPlane, inputHeight, inputWidth});
+ offset = offset.view({offset.size(1), offset.size(2), offset.size(3)});
+ }
+
+ return 1;
+}
+
+int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset,
+ at::Tensor gradOutput, at::Tensor gradInput,
+ at::Tensor gradOffset, at::Tensor weight,
+ at::Tensor columns, int kW, int kH, int dW,
+ int dH, int padW, int padH, int dilationW,
+ int dilationH, int group,
+ int deformable_group, int im2col_step) {
+ shape_check(input, offset, &gradOutput, weight, kH, kW, dH, dW, padH, padW,
+ dilationH, dilationW, group, deformable_group);
+ at::DeviceGuard guard(input.device());
+
+ input = input.contiguous();
+ offset = offset.contiguous();
+ gradOutput = gradOutput.contiguous();
+ weight = weight.contiguous();
+
+ int batch = 1;
+
+ if (input.ndimension() == 3) {
+ // Force batch
+ batch = 0;
+ input = input.view({1, input.size(0), input.size(1), input.size(2)});
+ offset = offset.view({1, offset.size(0), offset.size(1), offset.size(2)});
+ gradOutput = gradOutput.view(
+ {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)});
+ }
+
+ long batchSize = input.size(0);
+ long nInputPlane = input.size(1);
+ long inputHeight = input.size(2);
+ long inputWidth = input.size(3);
+
+ long nOutputPlane = weight.size(0);
+
+ long outputWidth =
+ (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+ long outputHeight =
+ (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+ TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset");
+ gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth});
+ columns = at::zeros(
+ {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
+ input.options());
+
+ // change order of grad output
+ gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step,
+ nOutputPlane, outputHeight, outputWidth});
+ gradOutput.transpose_(1, 2);
+
+ gradInput = gradInput.view({batchSize / im2col_step, im2col_step, nInputPlane,
+ inputHeight, inputWidth});
+ input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,
+ inputHeight, inputWidth});
+ gradOffset = gradOffset.view({batchSize / im2col_step, im2col_step,
+ deformable_group * 2 * kH * kW, outputHeight,
+ outputWidth});
+ offset =
+ offset.view({batchSize / im2col_step, im2col_step,
+ deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+ for (int elt = 0; elt < batchSize / im2col_step; elt++) {
+ // divide into groups
+ columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+ weight = weight.view({group, weight.size(0) / group, weight.size(1),
+ weight.size(2), weight.size(3)});
+ gradOutput = gradOutput.view(
+ {gradOutput.size(0), group, gradOutput.size(1) / group,
+ gradOutput.size(2), gradOutput.size(3), gradOutput.size(4)});
+
+ for (int g = 0; g < group; g++) {
+ columns[g] = columns[g].addmm_(weight[g].flatten(1).transpose(0, 1),
+ gradOutput[elt][g].flatten(1), 0.0f, 1.0f);
+ }
+
+ columns =
+ columns.view({columns.size(0) * columns.size(1), columns.size(2)});
+ gradOutput = gradOutput.view(
+ {gradOutput.size(0), gradOutput.size(1) * gradOutput.size(2),
+ gradOutput.size(3), gradOutput.size(4), gradOutput.size(5)});
+
+ deformable_col2im_coord(columns, input[elt], offset[elt], nInputPlane,
+ inputHeight, inputWidth, kH, kW, padH, padW, dH, dW,
+ dilationH, dilationW, im2col_step, deformable_group,
+ gradOffset[elt]);
+
+ deformable_col2im(columns, offset[elt], nInputPlane, inputHeight,
+ inputWidth, kH, kW, padH, padW, dH, dW, dilationH,
+ dilationW, im2col_step, deformable_group, gradInput[elt]);
+ }
+
+ gradOutput.transpose_(1, 2);
+ gradOutput =
+ gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth});
+
+ gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth});
+ input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
+ gradOffset = gradOffset.view(
+ {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+ offset = offset.view(
+ {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+ if (batch == 0) {
+ gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth});
+ input = input.view({nInputPlane, inputHeight, inputWidth});
+ gradInput = gradInput.view({nInputPlane, inputHeight, inputWidth});
+ offset = offset.view({offset.size(1), offset.size(2), offset.size(3)});
+ gradOffset =
+ gradOffset.view({offset.size(1), offset.size(2), offset.size(3)});
+ }
+
+ return 1;
+}
+
+int deform_conv_backward_parameters_cuda(
+ at::Tensor input, at::Tensor offset, at::Tensor gradOutput,
+ at::Tensor gradWeight, // at::Tensor gradBias,
+ at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH,
+ int padW, int padH, int dilationW, int dilationH, int group,
+ int deformable_group, float scale, int im2col_step) {
+ // todo: transpose and reshape outGrad
+ // todo: reshape columns
+ // todo: add im2col_step as input
+
+ shape_check(input, offset, &gradOutput, gradWeight, kH, kW, dH, dW, padH,
+ padW, dilationH, dilationW, group, deformable_group);
+ at::DeviceGuard guard(input.device());
+
+ input = input.contiguous();
+ offset = offset.contiguous();
+ gradOutput = gradOutput.contiguous();
+
+ int batch = 1;
+
+ if (input.ndimension() == 3) {
+ // Force batch
+ batch = 0;
+ input = input.view(
+ at::IntList({1, input.size(0), input.size(1), input.size(2)}));
+ gradOutput = gradOutput.view(
+ {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)});
+ }
+
+ long batchSize = input.size(0);
+ long nInputPlane = input.size(1);
+ long inputHeight = input.size(2);
+ long inputWidth = input.size(3);
+
+ long nOutputPlane = gradWeight.size(0);
+
+ long outputWidth =
+ (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+ long outputHeight =
+ (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+ TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");
+
+ columns = at::zeros(
+ {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
+ input.options());
+
+ gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step,
+ nOutputPlane, outputHeight, outputWidth});
+ gradOutput.transpose_(1, 2);
+
+ at::Tensor gradOutputBuffer = at::zeros_like(gradOutput);
+ gradOutputBuffer =
+ gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane, im2col_step,
+ outputHeight, outputWidth});
+ gradOutputBuffer.copy_(gradOutput);
+ gradOutputBuffer =
+ gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane,
+ im2col_step * outputHeight, outputWidth});
+
+ gradOutput.transpose_(1, 2);
+ gradOutput =
+ gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth});
+
+ input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,
+ inputHeight, inputWidth});
+ offset =
+ offset.view({batchSize / im2col_step, im2col_step,
+ deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+ for (int elt = 0; elt < batchSize / im2col_step; elt++) {
+ deformable_im2col(input[elt], offset[elt], nInputPlane, inputHeight,
+ inputWidth, kH, kW, padH, padW, dH, dW, dilationH,
+ dilationW, im2col_step, deformable_group, columns);
+
+ // divide into group
+ gradOutputBuffer = gradOutputBuffer.view(
+ {gradOutputBuffer.size(0), group, gradOutputBuffer.size(1) / group,
+ gradOutputBuffer.size(2), gradOutputBuffer.size(3)});
+ columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+ gradWeight =
+ gradWeight.view({group, gradWeight.size(0) / group, gradWeight.size(1),
+ gradWeight.size(2), gradWeight.size(3)});
+
+ for (int g = 0; g < group; g++) {
+ gradWeight[g] = gradWeight[g]
+ .flatten(1)
+ .addmm_(gradOutputBuffer[elt][g].flatten(1),
+ columns[g].transpose(1, 0), 1.0, scale)
+ .view_as(gradWeight[g]);
+ }
+ gradOutputBuffer = gradOutputBuffer.view(
+ {gradOutputBuffer.size(0),
+ gradOutputBuffer.size(1) * gradOutputBuffer.size(2),
+ gradOutputBuffer.size(3), gradOutputBuffer.size(4)});
+ columns =
+ columns.view({columns.size(0) * columns.size(1), columns.size(2)});
+ gradWeight = gradWeight.view({gradWeight.size(0) * gradWeight.size(1),
+ gradWeight.size(2), gradWeight.size(3),
+ gradWeight.size(4)});
+ }
+
+ input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
+ offset = offset.view(
+ {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+ if (batch == 0) {
+ gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth});
+ input = input.view({nInputPlane, inputHeight, inputWidth});
+ }
+
+ return 1;
+}
+
+void modulated_deform_conv_cuda_forward(
+ at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones,
+ at::Tensor offset, at::Tensor mask, at::Tensor output, at::Tensor columns,
+ int kernel_h, int kernel_w, const int stride_h, const int stride_w,
+ const int pad_h, const int pad_w, const int dilation_h,
+ const int dilation_w, const int group, const int deformable_group,
+ const bool with_bias) {
+ TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
+ TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
+ at::DeviceGuard guard(input.device());
+
+ const int batch = input.size(0);
+ const int channels = input.size(1);
+ const int height = input.size(2);
+ const int width = input.size(3);
+
+ const int channels_out = weight.size(0);
+ const int channels_kernel = weight.size(1);
+ const int kernel_h_ = weight.size(2);
+ const int kernel_w_ = weight.size(3);
+
+ if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)
+ AT_ERROR("Input shape and kernel shape wont match: (%d x %d vs %d x %d).",
+ kernel_h_, kernel_w, kernel_h_, kernel_w_);
+ if (channels != channels_kernel * group)
+ AT_ERROR("Input shape and kernel channels wont match: (%d vs %d).",
+ channels, channels_kernel * group);
+
+ const int height_out =
+ (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+ const int width_out =
+ (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
+
+ if (ones.ndimension() != 2 ||
+ ones.size(0) * ones.size(1) < height_out * width_out) {
+ // Resize plane and fill with ones...
+ ones = at::ones({height_out, width_out}, input.options());
+ }
+
+ // resize output
+ output = output.view({batch, channels_out, height_out, width_out}).zero_();
+ // resize temporary columns
+ columns =
+ at::zeros({channels * kernel_h * kernel_w, 1 * height_out * width_out},
+ input.options());
+
+ output = output.view({output.size(0), group, output.size(1) / group,
+ output.size(2), output.size(3)});
+
+ for (int b = 0; b < batch; b++) {
+ modulated_deformable_im2col_cuda(
+ input[b], offset[b], mask[b], 1, channels, height, width, height_out,
+ width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
+ dilation_h, dilation_w, deformable_group, columns);
+
+ // divide into group
+ weight = weight.view({group, weight.size(0) / group, weight.size(1),
+ weight.size(2), weight.size(3)});
+ columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+
+ for (int g = 0; g < group; g++) {
+ output[b][g] = output[b][g]
+ .flatten(1)
+ .addmm_(weight[g].flatten(1), columns[g])
+ .view_as(output[b][g]);
+ }
+
+ weight = weight.view({weight.size(0) * weight.size(1), weight.size(2),
+ weight.size(3), weight.size(4)});
+ columns =
+ columns.view({columns.size(0) * columns.size(1), columns.size(2)});
+ }
+
+ output = output.view({output.size(0), output.size(1) * output.size(2),
+ output.size(3), output.size(4)});
+
+ if (with_bias) {
+ output += bias.view({1, bias.size(0), 1, 1});
+ }
+}
+
+void modulated_deform_conv_cuda_backward(
+ at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones,
+ at::Tensor offset, at::Tensor mask, at::Tensor columns,
+ at::Tensor grad_input, at::Tensor grad_weight, at::Tensor grad_bias,
+ at::Tensor grad_offset, at::Tensor grad_mask, at::Tensor grad_output,
+ int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
+ int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
+ const bool with_bias) {
+ TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
+ TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
+ at::DeviceGuard guard(input.device());
+
+ const int batch = input.size(0);
+ const int channels = input.size(1);
+ const int height = input.size(2);
+ const int width = input.size(3);
+
+ const int channels_kernel = weight.size(1);
+ const int kernel_h_ = weight.size(2);
+ const int kernel_w_ = weight.size(3);
+ if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)
+ AT_ERROR("Input shape and kernel shape wont match: (%d x %d vs %d x %d).",
+ kernel_h_, kernel_w, kernel_h_, kernel_w_);
+ if (channels != channels_kernel * group)
+ AT_ERROR("Input shape and kernel channels wont match: (%d vs %d).",
+ channels, channels_kernel * group);
+
+ const int height_out =
+ (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+ const int width_out =
+ (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
+
+ if (ones.ndimension() != 2 ||
+ ones.size(0) * ones.size(1) < height_out * width_out) {
+ // Resize plane and fill with ones...
+ ones = at::ones({height_out, width_out}, input.options());
+ }
+
+ grad_input = grad_input.view({batch, channels, height, width});
+ columns = at::zeros({channels * kernel_h * kernel_w, height_out * width_out},
+ input.options());
+
+ grad_output =
+ grad_output.view({grad_output.size(0), group, grad_output.size(1) / group,
+ grad_output.size(2), grad_output.size(3)});
+
+ for (int b = 0; b < batch; b++) {
+ // divide int group
+ columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+ weight = weight.view({group, weight.size(0) / group, weight.size(1),
+ weight.size(2), weight.size(3)});
+
+ for (int g = 0; g < group; g++) {
+ columns[g].addmm_(weight[g].flatten(1).transpose(0, 1),
+ grad_output[b][g].flatten(1), 0.0f, 1.0f);
+ }
+
+ columns =
+ columns.view({columns.size(0) * columns.size(1), columns.size(2)});
+ weight = weight.view({weight.size(0) * weight.size(1), weight.size(2),
+ weight.size(3), weight.size(4)});
+
+ // gradient w.r.t. input coordinate data
+ modulated_deformable_col2im_coord_cuda(
+ columns, input[b], offset[b], mask[b], 1, channels, height, width,
+ height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h,
+ stride_w, dilation_h, dilation_w, deformable_group, grad_offset[b],
+ grad_mask[b]);
+ // gradient w.r.t. input data
+ modulated_deformable_col2im_cuda(
+ columns, offset[b], mask[b], 1, channels, height, width, height_out,
+ width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
+ dilation_h, dilation_w, deformable_group, grad_input[b]);
+
+ // gradient w.r.t. weight, dWeight should accumulate across the batch and
+ // group
+ modulated_deformable_im2col_cuda(
+ input[b], offset[b], mask[b], 1, channels, height, width, height_out,
+ width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
+ dilation_h, dilation_w, deformable_group, columns);
+
+ columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+ grad_weight = grad_weight.view({group, grad_weight.size(0) / group,
+ grad_weight.size(1), grad_weight.size(2),
+ grad_weight.size(3)});
+ if (with_bias)
+ grad_bias = grad_bias.view({group, grad_bias.size(0) / group});
+
+ for (int g = 0; g < group; g++) {
+ grad_weight[g] =
+ grad_weight[g]
+ .flatten(1)
+ .addmm_(grad_output[b][g].flatten(1), columns[g].transpose(0, 1))
+ .view_as(grad_weight[g]);
+ if (with_bias) {
+ grad_bias[g] =
+ grad_bias[g]
+ .view({-1, 1})
+ .addmm_(grad_output[b][g].flatten(1), ones.view({-1, 1}))
+ .view(-1);
+ }
+ }
+
+ columns =
+ columns.view({columns.size(0) * columns.size(1), columns.size(2)});
+ grad_weight = grad_weight.view({grad_weight.size(0) * grad_weight.size(1),
+ grad_weight.size(2), grad_weight.size(3),
+ grad_weight.size(4)});
+ if (with_bias)
+ grad_bias = grad_bias.view({grad_bias.size(0) * grad_bias.size(1)});
+ }
+ grad_output = grad_output.view({grad_output.size(0) * grad_output.size(1),
+ grad_output.size(2), grad_output.size(3),
+ grad_output.size(4)});
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+ m.def("deform_conv_forward_cuda", &deform_conv_forward_cuda,
+ "deform forward (CUDA)");
+ m.def("deform_conv_backward_input_cuda", &deform_conv_backward_input_cuda,
+ "deform_conv_backward_input (CUDA)");
+ m.def("deform_conv_backward_parameters_cuda",
+ &deform_conv_backward_parameters_cuda,
+ "deform_conv_backward_parameters (CUDA)");
+ m.def("modulated_deform_conv_cuda_forward",
+ &modulated_deform_conv_cuda_forward,
+ "modulated deform conv forward (CUDA)");
+ m.def("modulated_deform_conv_cuda_backward",
+ &modulated_deform_conv_cuda_backward,
+ "modulated deform conv backward (CUDA)");
+}
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
new file mode 100644
index 000000000..e7a26f2e8
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
@@ -0,0 +1,867 @@
+/*!
+ ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
+ *
+ * COPYRIGHT
+ *
+ * All contributions by the University of California:
+ * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
+ * All rights reserved.
+ *
+ * All other contributions:
+ * Copyright (c) 2014-2017, the respective contributors
+ * All rights reserved.
+ *
+ * Caffe uses a shared copyright model: each contributor holds copyright over
+ * their contributions to Caffe. The project versioning records all such
+ * contribution and copyright details. If a contributor wants to further mark
+ * their specific copyright on a particular contribution, they should indicate
+ * their copyright solely in the commit message of the change when it is
+ * committed.
+ *
+ * LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * CONTRIBUTION AGREEMENT
+ *
+ * By contributing to the BVLC/caffe repository through pull-request, comment,
+ * or otherwise, the contributor releases their content to the
+ * license and copyright terms herein.
+ *
+ ***************** END Caffe Copyright Notice and Disclaimer ********************
+ *
+ * Copyright (c) 2018 Microsoft
+ * Licensed under The MIT License [see LICENSE for details]
+ * \file modulated_deformable_im2col.cuh
+ * \brief Function definitions of converting an image to
+ * column matrix based on kernel, padding, dilation, and offset.
+ * These functions are mainly used in deformable convolution operators.
+ * \ref: https://arxiv.org/abs/1703.06211
+ * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng
+ */
+
+// modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+using namespace at;
+
+#define CUDA_KERNEL_LOOP(i, n) \
+ for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
+ i += blockDim.x * gridDim.x)
+
+const int CUDA_NUM_THREADS = 1024;
+const int kMaxGridNum = 65535;
+
+inline int GET_BLOCKS(const int N)
+{
+ return std::min(kMaxGridNum, (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS);
+}
+
+template
+__device__ scalar_t deformable_im2col_bilinear(const scalar_t *bottom_data, const int data_width,
+ const int height, const int width, scalar_t h, scalar_t w)
+{
+
+ int h_low = floor(h);
+ int w_low = floor(w);
+ int h_high = h_low + 1;
+ int w_high = w_low + 1;
+
+ scalar_t lh = h - h_low;
+ scalar_t lw = w - w_low;
+ scalar_t hh = 1 - lh, hw = 1 - lw;
+
+ scalar_t v1 = 0;
+ if (h_low >= 0 && w_low >= 0)
+ v1 = bottom_data[h_low * data_width + w_low];
+ scalar_t v2 = 0;
+ if (h_low >= 0 && w_high <= width - 1)
+ v2 = bottom_data[h_low * data_width + w_high];
+ scalar_t v3 = 0;
+ if (h_high <= height - 1 && w_low >= 0)
+ v3 = bottom_data[h_high * data_width + w_low];
+ scalar_t v4 = 0;
+ if (h_high <= height - 1 && w_high <= width - 1)
+ v4 = bottom_data[h_high * data_width + w_high];
+
+ scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+
+ scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+ return val;
+}
+
+template
+__device__ scalar_t get_gradient_weight(scalar_t argmax_h, scalar_t argmax_w,
+ const int h, const int w, const int height, const int width)
+{
+
+ if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)
+ {
+ //empty
+ return 0;
+ }
+
+ int argmax_h_low = floor(argmax_h);
+ int argmax_w_low = floor(argmax_w);
+ int argmax_h_high = argmax_h_low + 1;
+ int argmax_w_high = argmax_w_low + 1;
+
+ scalar_t weight = 0;
+ if (h == argmax_h_low && w == argmax_w_low)
+ weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);
+ if (h == argmax_h_low && w == argmax_w_high)
+ weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);
+ if (h == argmax_h_high && w == argmax_w_low)
+ weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);
+ if (h == argmax_h_high && w == argmax_w_high)
+ weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);
+ return weight;
+}
+
+template
+__device__ scalar_t get_coordinate_weight(scalar_t argmax_h, scalar_t argmax_w,
+ const int height, const int width, const scalar_t *im_data,
+ const int data_width, const int bp_dir)
+{
+
+ if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)
+ {
+ //empty
+ return 0;
+ }
+
+ int argmax_h_low = floor(argmax_h);
+ int argmax_w_low = floor(argmax_w);
+ int argmax_h_high = argmax_h_low + 1;
+ int argmax_w_high = argmax_w_low + 1;
+
+ scalar_t weight = 0;
+
+ if (bp_dir == 0)
+ {
+ if (argmax_h_low >= 0 && argmax_w_low >= 0)
+ weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low];
+ if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+ weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high];
+ if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+ weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low];
+ if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+ weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high];
+ }
+ else if (bp_dir == 1)
+ {
+ if (argmax_h_low >= 0 && argmax_w_low >= 0)
+ weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low];
+ if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+ weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high];
+ if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+ weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low];
+ if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+ weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high];
+ }
+
+ return weight;
+}
+
+template
+__global__ void deformable_im2col_gpu_kernel(const int n, const scalar_t *data_im, const scalar_t *data_offset,
+ const int height, const int width, const int kernel_h, const int kernel_w,
+ const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w, const int channel_per_deformable_group,
+ const int batch_size, const int num_channels, const int deformable_group,
+ const int height_col, const int width_col,
+ scalar_t *data_col)
+{
+ CUDA_KERNEL_LOOP(index, n)
+ {
+ // index index of output matrix
+ const int w_col = index % width_col;
+ const int h_col = (index / width_col) % height_col;
+ const int b_col = (index / width_col / height_col) % batch_size;
+ const int c_im = (index / width_col / height_col) / batch_size;
+ const int c_col = c_im * kernel_h * kernel_w;
+
+ // compute deformable group index
+ const int deformable_group_index = c_im / channel_per_deformable_group;
+
+ const int h_in = h_col * stride_h - pad_h;
+ const int w_in = w_col * stride_w - pad_w;
+ scalar_t *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;
+ //const scalar_t* data_im_ptr = data_im + ((b_col * num_channels + c_im) * height + h_in) * width + w_in;
+ const scalar_t *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width;
+ const scalar_t *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
+
+ for (int i = 0; i < kernel_h; ++i)
+ {
+ for (int j = 0; j < kernel_w; ++j)
+ {
+ const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;
+ const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col;
+ const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
+ const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
+ scalar_t val = static_cast(0);
+ const scalar_t h_im = h_in + i * dilation_h + offset_h;
+ const scalar_t w_im = w_in + j * dilation_w + offset_w;
+ if (h_im > -1 && w_im > -1 && h_im < height && w_im < width)
+ {
+ //const scalar_t map_h = i * dilation_h + offset_h;
+ //const scalar_t map_w = j * dilation_w + offset_w;
+ //const int cur_height = height - h_in;
+ //const int cur_width = width - w_in;
+ //val = deformable_im2col_bilinear(data_im_ptr, width, cur_height, cur_width, map_h, map_w);
+ val = deformable_im2col_bilinear(data_im_ptr, width, height, width, h_im, w_im);
+ }
+ *data_col_ptr = val;
+ data_col_ptr += batch_size * height_col * width_col;
+ }
+ }
+ }
+}
+
+void deformable_im2col(
+ const at::Tensor data_im, const at::Tensor data_offset, const int channels,
+ const int height, const int width, const int ksize_h, const int ksize_w,
+ const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w, const int parallel_imgs,
+ const int deformable_group, at::Tensor data_col)
+{
+ // num_axes should be smaller than block size
+ // todo: check parallel_imgs is correctly passed in
+ int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
+ int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
+ int num_kernels = channels * height_col * width_col * parallel_imgs;
+ int channel_per_deformable_group = channels / deformable_group;
+
+ AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+ data_im.scalar_type(), "deformable_im2col_gpu", ([&] {
+ const scalar_t *data_im_ = data_im.data();
+ const scalar_t *data_offset_ = data_offset.data();
+ scalar_t *data_col_ = data_col.data();
+
+ deformable_im2col_gpu_kernel<<>>(
+ num_kernels, data_im_, data_offset_, height, width, ksize_h, ksize_w,
+ pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
+ channel_per_deformable_group, parallel_imgs, channels, deformable_group,
+ height_col, width_col, data_col_);
+ }));
+
+ cudaError_t err = cudaGetLastError();
+ if (err != cudaSuccess)
+ {
+ printf("error in deformable_im2col: %s\n", cudaGetErrorString(err));
+ }
+}
+
+template
+__global__ void deformable_col2im_gpu_kernel(
+ const int n, const scalar_t *data_col, const scalar_t *data_offset,
+ const int channels, const int height, const int width,
+ const int kernel_h, const int kernel_w,
+ const int pad_h, const int pad_w,
+ const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
+ const int channel_per_deformable_group,
+ const int batch_size, const int deformable_group,
+ const int height_col, const int width_col,
+ scalar_t *grad_im)
+{
+ CUDA_KERNEL_LOOP(index, n)
+ {
+ const int j = (index / width_col / height_col / batch_size) % kernel_w;
+ const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h;
+ const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h;
+ // compute the start and end of the output
+
+ const int deformable_group_index = c / channel_per_deformable_group;
+
+ int w_out = index % width_col;
+ int h_out = (index / width_col) % height_col;
+ int b = (index / width_col / height_col) % batch_size;
+ int w_in = w_out * stride_w - pad_w;
+ int h_in = h_out * stride_h - pad_h;
+
+ const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) *
+ 2 * kernel_h * kernel_w * height_col * width_col;
+ const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;
+ const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;
+ const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
+ const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
+ const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h;
+ const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w;
+
+ const scalar_t cur_top_grad = data_col[index];
+ const int cur_h = (int)cur_inv_h_data;
+ const int cur_w = (int)cur_inv_w_data;
+ for (int dy = -2; dy <= 2; dy++)
+ {
+ for (int dx = -2; dx <= 2; dx++)
+ {
+ if (cur_h + dy >= 0 && cur_h + dy < height &&
+ cur_w + dx >= 0 && cur_w + dx < width &&
+ abs(cur_inv_h_data - (cur_h + dy)) < 1 &&
+ abs(cur_inv_w_data - (cur_w + dx)) < 1)
+ {
+ int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;
+ scalar_t weight = get_gradient_weight(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width);
+ atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);
+ }
+ }
+ }
+ }
+}
+
+void deformable_col2im(
+ const at::Tensor data_col, const at::Tensor data_offset, const int channels,
+ const int height, const int width, const int ksize_h,
+ const int ksize_w, const int pad_h, const int pad_w,
+ const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
+ const int parallel_imgs, const int deformable_group,
+ at::Tensor grad_im)
+{
+
+ // todo: make sure parallel_imgs is passed in correctly
+ int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
+ int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
+ int num_kernels = channels * ksize_h * ksize_w * height_col * width_col * parallel_imgs;
+ int channel_per_deformable_group = channels / deformable_group;
+
+ AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+ data_col.scalar_type(), "deformable_col2im_gpu", ([&] {
+ const scalar_t *data_col_ = data_col.data();
+ const scalar_t *data_offset_ = data_offset.data();
+ scalar_t *grad_im_ = grad_im.data();
+
+ deformable_col2im_gpu_kernel<<>>(
+ num_kernels, data_col_, data_offset_, channels, height, width, ksize_h,
+ ksize_w, pad_h, pad_w, stride_h, stride_w,
+ dilation_h, dilation_w, channel_per_deformable_group,
+ parallel_imgs, deformable_group, height_col, width_col, grad_im_);
+ }));
+
+ cudaError_t err = cudaGetLastError();
+ if (err != cudaSuccess)
+ {
+ printf("error in deformable_col2im: %s\n", cudaGetErrorString(err));
+ }
+}
+
+template
+__global__ void deformable_col2im_coord_gpu_kernel(const int n, const scalar_t *data_col,
+ const scalar_t *data_im, const scalar_t *data_offset,
+ const int channels, const int height, const int width,
+ const int kernel_h, const int kernel_w,
+ const int pad_h, const int pad_w,
+ const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
+ const int channel_per_deformable_group,
+ const int batch_size, const int offset_channels, const int deformable_group,
+ const int height_col, const int width_col, scalar_t *grad_offset)
+{
+ CUDA_KERNEL_LOOP(index, n)
+ {
+ scalar_t val = 0;
+ int w = index % width_col;
+ int h = (index / width_col) % height_col;
+ int c = (index / width_col / height_col) % offset_channels;
+ int b = (index / width_col / height_col) / offset_channels;
+ // compute the start and end of the output
+
+ const int deformable_group_index = c / (2 * kernel_h * kernel_w);
+ const int col_step = kernel_h * kernel_w;
+ int cnt = 0;
+ const scalar_t *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group *
+ batch_size * width_col * height_col;
+ const scalar_t *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) *
+ channel_per_deformable_group / kernel_h / kernel_w * height * width;
+ const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 *
+ kernel_h * kernel_w * height_col * width_col;
+
+ const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;
+
+ for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step)
+ {
+ const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w;
+ const int bp_dir = offset_c % 2;
+
+ int j = (col_pos / width_col / height_col / batch_size) % kernel_w;
+ int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;
+ int w_out = col_pos % width_col;
+ int h_out = (col_pos / width_col) % height_col;
+ int w_in = w_out * stride_w - pad_w;
+ int h_in = h_out * stride_h - pad_h;
+ const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);
+ const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out);
+ const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
+ const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
+ scalar_t inv_h = h_in + i * dilation_h + offset_h;
+ scalar_t inv_w = w_in + j * dilation_w + offset_w;
+ if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width)
+ {
+ inv_h = inv_w = -2;
+ }
+ const scalar_t weight = get_coordinate_weight(
+ inv_h, inv_w,
+ height, width, data_im_ptr + cnt * height * width, width, bp_dir);
+ val += weight * data_col_ptr[col_pos];
+ cnt += 1;
+ }
+
+ grad_offset[index] = val;
+ }
+}
+
+void deformable_col2im_coord(
+ const at::Tensor data_col, const at::Tensor data_im, const at::Tensor data_offset,
+ const int channels, const int height, const int width, const int ksize_h,
+ const int ksize_w, const int pad_h, const int pad_w, const int stride_h,
+ const int stride_w, const int dilation_h, const int dilation_w,
+ const int parallel_imgs, const int deformable_group, at::Tensor grad_offset)
+{
+
+ int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
+ int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
+ int num_kernels = height_col * width_col * 2 * ksize_h * ksize_w * deformable_group * parallel_imgs;
+ int channel_per_deformable_group = channels * ksize_h * ksize_w / deformable_group;
+
+ AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+ data_col.scalar_type(), "deformable_col2im_coord_gpu", ([&] {
+ const scalar_t *data_col_ = data_col.data();
+ const scalar_t *data_im_ = data_im.data();
+ const scalar_t *data_offset_ = data_offset.data();
+ scalar_t *grad_offset_ = grad_offset.data();
+
+ deformable_col2im_coord_gpu_kernel<<>>(
+ num_kernels, data_col_, data_im_, data_offset_, channels, height, width,
+ ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w,
+ dilation_h, dilation_w, channel_per_deformable_group,
+ parallel_imgs, 2 * ksize_h * ksize_w * deformable_group, deformable_group,
+ height_col, width_col, grad_offset_);
+ }));
+}
+
+template
+__device__ scalar_t dmcn_im2col_bilinear(const scalar_t *bottom_data, const int data_width,
+ const int height, const int width, scalar_t h, scalar_t w)
+{
+ int h_low = floor(h);
+ int w_low = floor(w);
+ int h_high = h_low + 1;
+ int w_high = w_low + 1;
+
+ scalar_t lh = h - h_low;
+ scalar_t lw = w - w_low;
+ scalar_t hh = 1 - lh, hw = 1 - lw;
+
+ scalar_t v1 = 0;
+ if (h_low >= 0 && w_low >= 0)
+ v1 = bottom_data[h_low * data_width + w_low];
+ scalar_t v2 = 0;
+ if (h_low >= 0 && w_high <= width - 1)
+ v2 = bottom_data[h_low * data_width + w_high];
+ scalar_t v3 = 0;
+ if (h_high <= height - 1 && w_low >= 0)
+ v3 = bottom_data[h_high * data_width + w_low];
+ scalar_t v4 = 0;
+ if (h_high <= height - 1 && w_high <= width - 1)
+ v4 = bottom_data[h_high * data_width + w_high];
+
+ scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+
+ scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+ return val;
+}
+
+template
+__device__ scalar_t dmcn_get_gradient_weight(scalar_t argmax_h, scalar_t argmax_w,
+ const int h, const int w, const int height, const int width)
+{
+ if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)
+ {
+ //empty
+ return 0;
+ }
+
+ int argmax_h_low = floor(argmax_h);
+ int argmax_w_low = floor(argmax_w);
+ int argmax_h_high = argmax_h_low + 1;
+ int argmax_w_high = argmax_w_low + 1;
+
+ scalar_t weight = 0;
+ if (h == argmax_h_low && w == argmax_w_low)
+ weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);
+ if (h == argmax_h_low && w == argmax_w_high)
+ weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);
+ if (h == argmax_h_high && w == argmax_w_low)
+ weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);
+ if (h == argmax_h_high && w == argmax_w_high)
+ weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);
+ return weight;
+}
+
+template
+__device__ scalar_t dmcn_get_coordinate_weight(scalar_t argmax_h, scalar_t argmax_w,
+ const int height, const int width, const scalar_t *im_data,
+ const int data_width, const int bp_dir)
+{
+ if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)
+ {
+ //empty
+ return 0;
+ }
+
+ int argmax_h_low = floor(argmax_h);
+ int argmax_w_low = floor(argmax_w);
+ int argmax_h_high = argmax_h_low + 1;
+ int argmax_w_high = argmax_w_low + 1;
+
+ scalar_t weight = 0;
+
+ if (bp_dir == 0)
+ {
+ if (argmax_h_low >= 0 && argmax_w_low >= 0)
+ weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low];
+ if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+ weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high];
+ if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+ weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low];
+ if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+ weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high];
+ }
+ else if (bp_dir == 1)
+ {
+ if (argmax_h_low >= 0 && argmax_w_low >= 0)
+ weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low];
+ if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+ weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high];
+ if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+ weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low];
+ if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+ weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high];
+ }
+
+ return weight;
+}
+
+template
+__global__ void modulated_deformable_im2col_gpu_kernel(const int n,
+ const scalar_t *data_im, const scalar_t *data_offset, const scalar_t *data_mask,
+ const int height, const int width, const int kernel_h, const int kernel_w,
+ const int pad_h, const int pad_w,
+ const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
+ const int channel_per_deformable_group,
+ const int batch_size, const int num_channels, const int deformable_group,
+ const int height_col, const int width_col,
+ scalar_t *data_col)
+{
+ CUDA_KERNEL_LOOP(index, n)
+ {
+ // index index of output matrix
+ const int w_col = index % width_col;
+ const int h_col = (index / width_col) % height_col;
+ const int b_col = (index / width_col / height_col) % batch_size;
+ const int c_im = (index / width_col / height_col) / batch_size;
+ const int c_col = c_im * kernel_h * kernel_w;
+
+ // compute deformable group index
+ const int deformable_group_index = c_im / channel_per_deformable_group;
+
+ const int h_in = h_col * stride_h - pad_h;
+ const int w_in = w_col * stride_w - pad_w;
+
+ scalar_t *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;
+ //const float* data_im_ptr = data_im + ((b_col * num_channels + c_im) * height + h_in) * width + w_in;
+ const scalar_t *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width;
+ const scalar_t *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
+
+ const scalar_t *data_mask_ptr = data_mask + (b_col * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;
+
+ for (int i = 0; i < kernel_h; ++i)
+ {
+ for (int j = 0; j < kernel_w; ++j)
+ {
+ const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;
+ const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col;
+ const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_col) * width_col + w_col;
+ const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
+ const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
+ const scalar_t mask = data_mask_ptr[data_mask_hw_ptr];
+ scalar_t val = static_cast(0);
+ const scalar_t h_im = h_in + i * dilation_h + offset_h;
+ const scalar_t w_im = w_in + j * dilation_w + offset_w;
+ //if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) {
+ if (h_im > -1 && w_im > -1 && h_im < height && w_im < width)
+ {
+ //const float map_h = i * dilation_h + offset_h;
+ //const float map_w = j * dilation_w + offset_w;
+ //const int cur_height = height - h_in;
+ //const int cur_width = width - w_in;
+ //val = dmcn_im2col_bilinear(data_im_ptr, width, cur_height, cur_width, map_h, map_w);
+ val = dmcn_im2col_bilinear(data_im_ptr, width, height, width, h_im, w_im);
+ }
+ *data_col_ptr = val * mask;
+ data_col_ptr += batch_size * height_col * width_col;
+ //data_col_ptr += height_col * width_col;
+ }
+ }
+ }
+}
+
+template
+__global__ void modulated_deformable_col2im_gpu_kernel(const int n,
+ const scalar_t *data_col, const scalar_t *data_offset, const scalar_t *data_mask,
+ const int channels, const int height, const int width,
+ const int kernel_h, const int kernel_w,
+ const int pad_h, const int pad_w,
+ const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
+ const int channel_per_deformable_group,
+ const int batch_size, const int deformable_group,
+ const int height_col, const int width_col,
+ scalar_t *grad_im)
+{
+ CUDA_KERNEL_LOOP(index, n)
+ {
+ const int j = (index / width_col / height_col / batch_size) % kernel_w;
+ const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h;
+ const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h;
+ // compute the start and end of the output
+
+ const int deformable_group_index = c / channel_per_deformable_group;
+
+ int w_out = index % width_col;
+ int h_out = (index / width_col) % height_col;
+ int b = (index / width_col / height_col) % batch_size;
+ int w_in = w_out * stride_w - pad_w;
+ int h_in = h_out * stride_h - pad_h;
+
+ const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
+ const scalar_t *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;
+ const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;
+ const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;
+ const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_out) * width_col + w_out;
+ const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
+ const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
+ const scalar_t mask = data_mask_ptr[data_mask_hw_ptr];
+ const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h;
+ const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w;
+
+ const scalar_t cur_top_grad = data_col[index] * mask;
+ const int cur_h = (int)cur_inv_h_data;
+ const int cur_w = (int)cur_inv_w_data;
+ for (int dy = -2; dy <= 2; dy++)
+ {
+ for (int dx = -2; dx <= 2; dx++)
+ {
+ if (cur_h + dy >= 0 && cur_h + dy < height &&
+ cur_w + dx >= 0 && cur_w + dx < width &&
+ abs(cur_inv_h_data - (cur_h + dy)) < 1 &&
+ abs(cur_inv_w_data - (cur_w + dx)) < 1)
+ {
+ int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;
+ scalar_t weight = dmcn_get_gradient_weight(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width);
+ atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);
+ }
+ }
+ }
+ }
+}
+
+template
+__global__ void modulated_deformable_col2im_coord_gpu_kernel(const int n,
+ const scalar_t *data_col, const scalar_t *data_im,
+ const scalar_t *data_offset, const scalar_t *data_mask,
+ const int channels, const int height, const int width,
+ const int kernel_h, const int kernel_w,
+ const int pad_h, const int pad_w,
+ const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
+ const int channel_per_deformable_group,
+ const int batch_size, const int offset_channels, const int deformable_group,
+ const int height_col, const int width_col,
+ scalar_t *grad_offset, scalar_t *grad_mask)
+{
+ CUDA_KERNEL_LOOP(index, n)
+ {
+ scalar_t val = 0, mval = 0;
+ int w = index % width_col;
+ int h = (index / width_col) % height_col;
+ int c = (index / width_col / height_col) % offset_channels;
+ int b = (index / width_col / height_col) / offset_channels;
+ // compute the start and end of the output
+
+ const int deformable_group_index = c / (2 * kernel_h * kernel_w);
+ const int col_step = kernel_h * kernel_w;
+ int cnt = 0;
+ const scalar_t *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group * batch_size * width_col * height_col;
+ const scalar_t *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) * channel_per_deformable_group / kernel_h / kernel_w * height * width;
+ const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
+ const scalar_t *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;
+
+ const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;
+
+ for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step)
+ {
+ const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w;
+ const int bp_dir = offset_c % 2;
+
+ int j = (col_pos / width_col / height_col / batch_size) % kernel_w;
+ int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;
+ int w_out = col_pos % width_col;
+ int h_out = (col_pos / width_col) % height_col;
+ int w_in = w_out * stride_w - pad_w;
+ int h_in = h_out * stride_h - pad_h;
+ const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);
+ const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out);
+ const int data_mask_hw_ptr = (((i * kernel_w + j) * height_col + h_out) * width_col + w_out);
+ const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
+ const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
+ const scalar_t mask = data_mask_ptr[data_mask_hw_ptr];
+ scalar_t inv_h = h_in + i * dilation_h + offset_h;
+ scalar_t inv_w = w_in + j * dilation_w + offset_w;
+ if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width)
+ {
+ inv_h = inv_w = -2;
+ }
+ else
+ {
+ mval += data_col_ptr[col_pos] * dmcn_im2col_bilinear(data_im_ptr + cnt * height * width, width, height, width, inv_h, inv_w);
+ }
+ const scalar_t weight = dmcn_get_coordinate_weight(
+ inv_h, inv_w,
+ height, width, data_im_ptr + cnt * height * width, width, bp_dir);
+ val += weight * data_col_ptr[col_pos] * mask;
+ cnt += 1;
+ }
+ // KERNEL_ASSIGN(grad_offset[index], offset_req, val);
+ grad_offset[index] = val;
+ if (offset_c % 2 == 0)
+ // KERNEL_ASSIGN(grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w], mask_req, mval);
+ grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w] = mval;
+ }
+}
+
+void modulated_deformable_im2col_cuda(
+ const at::Tensor data_im, const at::Tensor data_offset, const at::Tensor data_mask,
+ const int batch_size, const int channels, const int height_im, const int width_im,
+ const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
+ const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
+ const int deformable_group, at::Tensor data_col)
+{
+ // num_axes should be smaller than block size
+ const int channel_per_deformable_group = channels / deformable_group;
+ const int num_kernels = channels * batch_size * height_col * width_col;
+
+ AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+ data_im.scalar_type(), "modulated_deformable_im2col_gpu", ([&] {
+ const scalar_t *data_im_ = data_im.data();
+ const scalar_t *data_offset_ = data_offset.data();
+ const scalar_t *data_mask_ = data_mask.data();
+ scalar_t *data_col_ = data_col.data();
+
+ modulated_deformable_im2col_gpu_kernel<<>>(
+ num_kernels, data_im_, data_offset_, data_mask_, height_im, width_im, kernel_h, kenerl_w,
+ pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group,
+ batch_size, channels, deformable_group, height_col, width_col, data_col_);
+ }));
+
+ cudaError_t err = cudaGetLastError();
+ if (err != cudaSuccess)
+ {
+ printf("error in modulated_deformable_im2col_cuda: %s\n", cudaGetErrorString(err));
+ }
+}
+
+void modulated_deformable_col2im_cuda(
+ const at::Tensor data_col, const at::Tensor data_offset, const at::Tensor data_mask,
+ const int batch_size, const int channels, const int height_im, const int width_im,
+ const int height_col, const int width_col, const int kernel_h, const int kernel_w,
+ const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
+ const int deformable_group, at::Tensor grad_im)
+{
+
+ const int channel_per_deformable_group = channels / deformable_group;
+ const int num_kernels = channels * kernel_h * kernel_w * batch_size * height_col * width_col;
+
+ AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+ data_col.scalar_type(), "modulated_deformable_col2im_gpu", ([&] {
+ const scalar_t *data_col_ = data_col.data();
+ const scalar_t *data_offset_ = data_offset.data();
+ const scalar_t *data_mask_ = data_mask.data();
+ scalar_t *grad_im_ = grad_im.data();
+
+ modulated_deformable_col2im_gpu_kernel<<>>(
+ num_kernels, data_col_, data_offset_, data_mask_, channels, height_im, width_im,
+ kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
+ dilation_h, dilation_w, channel_per_deformable_group,
+ batch_size, deformable_group, height_col, width_col, grad_im_);
+ }));
+
+ cudaError_t err = cudaGetLastError();
+ if (err != cudaSuccess)
+ {
+ printf("error in modulated_deformable_col2im_cuda: %s\n", cudaGetErrorString(err));
+ }
+}
+
+void modulated_deformable_col2im_coord_cuda(
+ const at::Tensor data_col, const at::Tensor data_im, const at::Tensor data_offset, const at::Tensor data_mask,
+ const int batch_size, const int channels, const int height_im, const int width_im,
+ const int height_col, const int width_col, const int kernel_h, const int kernel_w,
+ const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
+ const int deformable_group,
+ at::Tensor grad_offset, at::Tensor grad_mask)
+{
+ const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h * kernel_w * deformable_group;
+ const int channel_per_deformable_group = channels * kernel_h * kernel_w / deformable_group;
+
+ AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+ data_col.scalar_type(), "modulated_deformable_col2im_coord_gpu", ([&] {
+ const scalar_t *data_col_ = data_col.data();
+ const scalar_t *data_im_ = data_im.data();
+ const scalar_t *data_offset_ = data_offset.data();
+ const scalar_t *data_mask_ = data_mask.data();
+ scalar_t *grad_offset_ = grad_offset.data();
+ scalar_t *grad_mask_ = grad_mask.data();
+
+ modulated_deformable_col2im_coord_gpu_kernel<<>>(
+ num_kernels, data_col_, data_im_, data_offset_, data_mask_, channels, height_im, width_im,
+ kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
+ dilation_h, dilation_w, channel_per_deformable_group,
+ batch_size, 2 * kernel_h * kernel_w * deformable_group, deformable_group, height_col, width_col,
+ grad_offset_, grad_mask_);
+ }));
+ cudaError_t err = cudaGetLastError();
+ if (err != cudaSuccess)
+ {
+ printf("error in modulated_deformable_col2im_coord_cuda: %s\n", cudaGetErrorString(err));
+ }
+}
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/src/deform_pool_cuda.cpp b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/src/deform_pool_cuda.cpp
new file mode 100644
index 000000000..f6f087b88
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/src/deform_pool_cuda.cpp
@@ -0,0 +1,90 @@
+// modify from
+// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c
+
+// based on
+// author: Charles Shang
+// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
+
+#include
+#include
+
+#include
+#include
+
+void DeformablePSROIPoolForward(
+ const at::Tensor data, const at::Tensor bbox, const at::Tensor trans,
+ at::Tensor out, at::Tensor top_count, const int batch, const int channels,
+ const int height, const int width, const int num_bbox,
+ const int channels_trans, const int no_trans, const float spatial_scale,
+ const int output_dim, const int group_size, const int pooled_size,
+ const int part_size, const int sample_per_part, const float trans_std);
+
+void DeformablePSROIPoolBackwardAcc(
+ const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox,
+ const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad,
+ at::Tensor trans_grad, const int batch, const int channels,
+ const int height, const int width, const int num_bbox,
+ const int channels_trans, const int no_trans, const float spatial_scale,
+ const int output_dim, const int group_size, const int pooled_size,
+ const int part_size, const int sample_per_part, const float trans_std);
+
+void deform_psroi_pooling_cuda_forward(
+ at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out,
+ at::Tensor top_count, const int no_trans, const float spatial_scale,
+ const int output_dim, const int group_size, const int pooled_size,
+ const int part_size, const int sample_per_part, const float trans_std) {
+ TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
+ at::DeviceGuard guard(input.device());
+
+ const int batch = input.size(0);
+ const int channels = input.size(1);
+ const int height = input.size(2);
+ const int width = input.size(3);
+ const int channels_trans = no_trans ? 2 : trans.size(1);
+
+ const int num_bbox = bbox.size(0);
+ if (num_bbox != out.size(0))
+ AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
+ out.size(0), num_bbox);
+
+ DeformablePSROIPoolForward(
+ input, bbox, trans, out, top_count, batch, channels, height, width,
+ num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size,
+ pooled_size, part_size, sample_per_part, trans_std);
+}
+
+void deform_psroi_pooling_cuda_backward(
+ at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans,
+ at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad,
+ const int no_trans, const float spatial_scale, const int output_dim,
+ const int group_size, const int pooled_size, const int part_size,
+ const int sample_per_part, const float trans_std) {
+ TORCH_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
+ TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
+ at::DeviceGuard guard(input.device());
+
+ const int batch = input.size(0);
+ const int channels = input.size(1);
+ const int height = input.size(2);
+ const int width = input.size(3);
+ const int channels_trans = no_trans ? 2 : trans.size(1);
+
+ const int num_bbox = bbox.size(0);
+ if (num_bbox != out_grad.size(0))
+ AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
+ out_grad.size(0), num_bbox);
+
+ DeformablePSROIPoolBackwardAcc(
+ out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch,
+ channels, height, width, num_bbox, channels_trans, no_trans,
+ spatial_scale, output_dim, group_size, pooled_size, part_size,
+ sample_per_part, trans_std);
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+ m.def("deform_psroi_pooling_cuda_forward", &deform_psroi_pooling_cuda_forward,
+ "deform psroi pooling forward(CUDA)");
+ m.def("deform_psroi_pooling_cuda_backward",
+ &deform_psroi_pooling_cuda_backward,
+ "deform psroi pooling backward(CUDA)");
+}
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/src/deform_pool_cuda_kernel.cu b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/src/deform_pool_cuda_kernel.cu
new file mode 100644
index 000000000..05b00d4be
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/src/deform_pool_cuda_kernel.cu
@@ -0,0 +1,364 @@
+/*!
+ * Copyright (c) 2017 Microsoft
+ * Licensed under The MIT License [see LICENSE for details]
+ * \file deformable_psroi_pooling.cu
+ * \brief
+ * \author Yi Li, Guodong Zhang, Jifeng Dai
+*/
+/***************** Adapted by Charles Shang *********************/
+// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/cuda/deform_psroi_pooling_cuda.cu
+
+#include
+#include
+#include
+#include
+#include
+
+using namespace at;
+
+#define CUDA_KERNEL_LOOP(i, n) \
+ for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
+ i < (n); \
+ i += blockDim.x * gridDim.x)
+
+const int CUDA_NUM_THREADS = 1024;
+inline int GET_BLOCKS(const int N)
+{
+ return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;
+}
+
+template
+__device__ scalar_t bilinear_interp(
+ const scalar_t *data,
+ const scalar_t x,
+ const scalar_t y,
+ const int width,
+ const int height)
+{
+ int x1 = floor(x);
+ int x2 = ceil(x);
+ int y1 = floor(y);
+ int y2 = ceil(y);
+ scalar_t dist_x = (scalar_t)(x - x1);
+ scalar_t dist_y = (scalar_t)(y - y1);
+ scalar_t value11 = data[y1 * width + x1];
+ scalar_t value12 = data[y2 * width + x1];
+ scalar_t value21 = data[y1 * width + x2];
+ scalar_t value22 = data[y2 * width + x2];
+ scalar_t value = (1 - dist_x) * (1 - dist_y) * value11 + (1 - dist_x) * dist_y * value12 + dist_x * (1 - dist_y) * value21 + dist_x * dist_y * value22;
+ return value;
+}
+
+template
+__global__ void DeformablePSROIPoolForwardKernel(
+ const int count,
+ const scalar_t *bottom_data,
+ const scalar_t spatial_scale,
+ const int channels,
+ const int height, const int width,
+ const int pooled_height, const int pooled_width,
+ const scalar_t *bottom_rois, const scalar_t *bottom_trans,
+ const int no_trans,
+ const scalar_t trans_std,
+ const int sample_per_part,
+ const int output_dim,
+ const int group_size,
+ const int part_size,
+ const int num_classes,
+ const int channels_each_class,
+ scalar_t *top_data,
+ scalar_t *top_count)
+{
+ CUDA_KERNEL_LOOP(index, count)
+ {
+ // The output is in order (n, ctop, ph, pw)
+ int pw = index % pooled_width;
+ int ph = (index / pooled_width) % pooled_height;
+ int ctop = (index / pooled_width / pooled_height) % output_dim;
+ int n = index / pooled_width / pooled_height / output_dim;
+
+ // [start, end) interval for spatial sampling
+ const scalar_t *offset_bottom_rois = bottom_rois + n * 5;
+ int roi_batch_ind = offset_bottom_rois[0];
+ scalar_t roi_start_w = (scalar_t)(round(offset_bottom_rois[1])) * spatial_scale - 0.5;
+ scalar_t roi_start_h = (scalar_t)(round(offset_bottom_rois[2])) * spatial_scale - 0.5;
+ scalar_t roi_end_w = (scalar_t)(round(offset_bottom_rois[3]) + 1.) * spatial_scale - 0.5;
+ scalar_t roi_end_h = (scalar_t)(round(offset_bottom_rois[4]) + 1.) * spatial_scale - 0.5;
+
+ // Force too small ROIs to be 1x1
+ scalar_t roi_width = max(roi_end_w - roi_start_w, 0.1); //avoid 0
+ scalar_t roi_height = max(roi_end_h - roi_start_h, 0.1);
+
+ // Compute w and h at bottom
+ scalar_t bin_size_h = roi_height / (scalar_t)(pooled_height);
+ scalar_t bin_size_w = roi_width / (scalar_t)(pooled_width);
+
+ scalar_t sub_bin_size_h = bin_size_h / (scalar_t)(sample_per_part);
+ scalar_t sub_bin_size_w = bin_size_w / (scalar_t)(sample_per_part);
+
+ int part_h = floor((scalar_t)(ph) / pooled_height * part_size);
+ int part_w = floor((scalar_t)(pw) / pooled_width * part_size);
+ int class_id = ctop / channels_each_class;
+ scalar_t trans_x = no_trans ? (scalar_t)(0) : bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w] * (scalar_t)trans_std;
+ scalar_t trans_y = no_trans ? (scalar_t)(0) : bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w] * (scalar_t)trans_std;
+
+ scalar_t wstart = (scalar_t)(pw)*bin_size_w + roi_start_w;
+ wstart += trans_x * roi_width;
+ scalar_t hstart = (scalar_t)(ph)*bin_size_h + roi_start_h;
+ hstart += trans_y * roi_height;
+
+ scalar_t sum = 0;
+ int count = 0;
+ int gw = floor((scalar_t)(pw)*group_size / pooled_width);
+ int gh = floor((scalar_t)(ph)*group_size / pooled_height);
+ gw = min(max(gw, 0), group_size - 1);
+ gh = min(max(gh, 0), group_size - 1);
+
+ const scalar_t *offset_bottom_data = bottom_data + (roi_batch_ind * channels) * height * width;
+ for (int ih = 0; ih < sample_per_part; ih++)
+ {
+ for (int iw = 0; iw < sample_per_part; iw++)
+ {
+ scalar_t w = wstart + iw * sub_bin_size_w;
+ scalar_t h = hstart + ih * sub_bin_size_h;
+ // bilinear interpolation
+ if (w < -0.5 || w > width - 0.5 || h < -0.5 || h > height - 0.5)
+ {
+ continue;
+ }
+ w = min(max(w, 0.), width - 1.);
+ h = min(max(h, 0.), height - 1.);
+ int c = (ctop * group_size + gh) * group_size + gw;
+ scalar_t val = bilinear_interp(offset_bottom_data + c * height * width, w, h, width, height);
+ sum += val;
+ count++;
+ }
+ }
+ top_data[index] = count == 0 ? (scalar_t)(0) : sum / count;
+ top_count[index] = count;
+ }
+}
+
+template
+__global__ void DeformablePSROIPoolBackwardAccKernel(
+ const int count,
+ const scalar_t *top_diff,
+ const scalar_t *top_count,
+ const int num_rois,
+ const scalar_t spatial_scale,
+ const int channels,
+ const int height, const int width,
+ const int pooled_height, const int pooled_width,
+ const int output_dim,
+ scalar_t *bottom_data_diff, scalar_t *bottom_trans_diff,
+ const scalar_t *bottom_data,
+ const scalar_t *bottom_rois,
+ const scalar_t *bottom_trans,
+ const int no_trans,
+ const scalar_t trans_std,
+ const int sample_per_part,
+ const int group_size,
+ const int part_size,
+ const int num_classes,
+ const int channels_each_class)
+{
+ CUDA_KERNEL_LOOP(index, count)
+ {
+ // The output is in order (n, ctop, ph, pw)
+ int pw = index % pooled_width;
+ int ph = (index / pooled_width) % pooled_height;
+ int ctop = (index / pooled_width / pooled_height) % output_dim;
+ int n = index / pooled_width / pooled_height / output_dim;
+
+ // [start, end) interval for spatial sampling
+ const scalar_t *offset_bottom_rois = bottom_rois + n * 5;
+ int roi_batch_ind = offset_bottom_rois[0];
+ scalar_t roi_start_w = (scalar_t)(round(offset_bottom_rois[1])) * spatial_scale - 0.5;
+ scalar_t roi_start_h = (scalar_t)(round(offset_bottom_rois[2])) * spatial_scale - 0.5;
+ scalar_t roi_end_w = (scalar_t)(round(offset_bottom_rois[3]) + 1.) * spatial_scale - 0.5;
+ scalar_t roi_end_h = (scalar_t)(round(offset_bottom_rois[4]) + 1.) * spatial_scale - 0.5;
+
+ // Force too small ROIs to be 1x1
+ scalar_t roi_width = max(roi_end_w - roi_start_w, 0.1); //avoid 0
+ scalar_t roi_height = max(roi_end_h - roi_start_h, 0.1);
+
+ // Compute w and h at bottom
+ scalar_t bin_size_h = roi_height / (scalar_t)(pooled_height);
+ scalar_t bin_size_w = roi_width / (scalar_t)(pooled_width);
+
+ scalar_t sub_bin_size_h = bin_size_h / (scalar_t)(sample_per_part);
+ scalar_t sub_bin_size_w = bin_size_w / (scalar_t)(sample_per_part);
+
+ int part_h = floor((scalar_t)(ph) / pooled_height * part_size);
+ int part_w = floor((scalar_t)(pw) / pooled_width * part_size);
+ int class_id = ctop / channels_each_class;
+ scalar_t trans_x = no_trans ? (scalar_t)(0) : bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w] * (scalar_t)trans_std;
+ scalar_t trans_y = no_trans ? (scalar_t)(0) : bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w] * (scalar_t)trans_std;
+
+ scalar_t wstart = (scalar_t)(pw)*bin_size_w + roi_start_w;
+ wstart += trans_x * roi_width;
+ scalar_t hstart = (scalar_t)(ph)*bin_size_h + roi_start_h;
+ hstart += trans_y * roi_height;
+
+ if (top_count[index] <= 0)
+ {
+ continue;
+ }
+ scalar_t diff_val = top_diff[index] / top_count[index];
+ const scalar_t *offset_bottom_data = bottom_data + roi_batch_ind * channels * height * width;
+ scalar_t *offset_bottom_data_diff = bottom_data_diff + roi_batch_ind * channels * height * width;
+ int gw = floor((scalar_t)(pw)*group_size / pooled_width);
+ int gh = floor((scalar_t)(ph)*group_size / pooled_height);
+ gw = min(max(gw, 0), group_size - 1);
+ gh = min(max(gh, 0), group_size - 1);
+
+ for (int ih = 0; ih < sample_per_part; ih++)
+ {
+ for (int iw = 0; iw < sample_per_part; iw++)
+ {
+ scalar_t w = wstart + iw * sub_bin_size_w;
+ scalar_t h = hstart + ih * sub_bin_size_h;
+ // bilinear interpolation
+ if (w < -0.5 || w > width - 0.5 || h < -0.5 || h > height - 0.5)
+ {
+ continue;
+ }
+ w = min(max(w, 0.), width - 1.);
+ h = min(max(h, 0.), height - 1.);
+ int c = (ctop * group_size + gh) * group_size + gw;
+ // backward on feature
+ int x0 = floor(w);
+ int x1 = ceil(w);
+ int y0 = floor(h);
+ int y1 = ceil(h);
+ scalar_t dist_x = w - x0, dist_y = h - y0;
+ scalar_t q00 = (1 - dist_x) * (1 - dist_y);
+ scalar_t q01 = (1 - dist_x) * dist_y;
+ scalar_t q10 = dist_x * (1 - dist_y);
+ scalar_t q11 = dist_x * dist_y;
+ int bottom_index_base = c * height * width;
+ atomicAdd(offset_bottom_data_diff + bottom_index_base + y0 * width + x0, q00 * diff_val);
+ atomicAdd(offset_bottom_data_diff + bottom_index_base + y1 * width + x0, q01 * diff_val);
+ atomicAdd(offset_bottom_data_diff + bottom_index_base + y0 * width + x1, q10 * diff_val);
+ atomicAdd(offset_bottom_data_diff + bottom_index_base + y1 * width + x1, q11 * diff_val);
+
+ if (no_trans)
+ {
+ continue;
+ }
+ scalar_t U00 = offset_bottom_data[bottom_index_base + y0 * width + x0];
+ scalar_t U01 = offset_bottom_data[bottom_index_base + y1 * width + x0];
+ scalar_t U10 = offset_bottom_data[bottom_index_base + y0 * width + x1];
+ scalar_t U11 = offset_bottom_data[bottom_index_base + y1 * width + x1];
+ scalar_t diff_x = (U11 * dist_y + U10 * (1 - dist_y) - U01 * dist_y - U00 * (1 - dist_y)) * trans_std * diff_val;
+ diff_x *= roi_width;
+ scalar_t diff_y = (U11 * dist_x + U01 * (1 - dist_x) - U10 * dist_x - U00 * (1 - dist_x)) * trans_std * diff_val;
+ diff_y *= roi_height;
+
+ atomicAdd(bottom_trans_diff + (((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w, diff_x);
+ atomicAdd(bottom_trans_diff + (((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w, diff_y);
+ }
+ }
+ }
+}
+
+void DeformablePSROIPoolForward(const at::Tensor data,
+ const at::Tensor bbox,
+ const at::Tensor trans,
+ at::Tensor out,
+ at::Tensor top_count,
+ const int batch,
+ const int channels,
+ const int height,
+ const int width,
+ const int num_bbox,
+ const int channels_trans,
+ const int no_trans,
+ const float spatial_scale,
+ const int output_dim,
+ const int group_size,
+ const int pooled_size,
+ const int part_size,
+ const int sample_per_part,
+ const float trans_std)
+{
+ const int pooled_height = pooled_size;
+ const int pooled_width = pooled_size;
+ const int count = num_bbox * output_dim * pooled_height * pooled_width;
+ const int num_classes = no_trans ? 1 : channels_trans / 2;
+ const int channels_each_class = no_trans ? output_dim : output_dim / num_classes;
+
+ AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+ data.scalar_type(), "deformable_psroi_pool_forward", ([&] {
+ const scalar_t *bottom_data = data.data();
+ const scalar_t *bottom_rois = bbox.data();
+ const scalar_t *bottom_trans = no_trans ? NULL : trans.data();
+ scalar_t *top_data = out.data();
+ scalar_t *top_count_data = top_count.data();
+
+ DeformablePSROIPoolForwardKernel<<>>(
+ count, bottom_data, (scalar_t)spatial_scale, channels, height, width, pooled_height, pooled_width,
+ bottom_rois, bottom_trans, no_trans, (scalar_t)trans_std, sample_per_part, output_dim,
+ group_size, part_size, num_classes, channels_each_class, top_data, top_count_data);
+ }));
+
+ cudaError_t err = cudaGetLastError();
+ if (err != cudaSuccess)
+ {
+ printf("error in DeformablePSROIPoolForward: %s\n", cudaGetErrorString(err));
+ }
+}
+
+void DeformablePSROIPoolBackwardAcc(const at::Tensor out_grad,
+ const at::Tensor data,
+ const at::Tensor bbox,
+ const at::Tensor trans,
+ const at::Tensor top_count,
+ at::Tensor in_grad,
+ at::Tensor trans_grad,
+ const int batch,
+ const int channels,
+ const int height,
+ const int width,
+ const int num_bbox,
+ const int channels_trans,
+ const int no_trans,
+ const float spatial_scale,
+ const int output_dim,
+ const int group_size,
+ const int pooled_size,
+ const int part_size,
+ const int sample_per_part,
+ const float trans_std)
+{
+ // LOG(INFO) << "DeformablePSROIPoolBackward";
+ const int num_rois = num_bbox;
+ const int pooled_height = pooled_size;
+ const int pooled_width = pooled_size;
+ const int count = num_bbox * output_dim * pooled_height * pooled_width;
+ const int num_classes = no_trans ? 1 : channels_trans / 2;
+ const int channels_each_class = no_trans ? output_dim : output_dim / num_classes;
+
+ AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+ out_grad.scalar_type(), "deformable_psroi_pool_backward_acc", ([&] {
+ const scalar_t *top_diff = out_grad.data();
+ const scalar_t *bottom_data = data.data();
+ const scalar_t *bottom_rois = bbox.data();
+ const scalar_t *bottom_trans = no_trans ? NULL : trans.data();
+ scalar_t *bottom_data_diff = in_grad.data();
+ scalar_t *bottom_trans_diff = no_trans ? NULL : trans_grad.data();
+ const scalar_t *top_count_data = top_count.data();
+
+ DeformablePSROIPoolBackwardAccKernel<<>>(
+ count, top_diff, top_count_data, num_rois, (scalar_t)spatial_scale, channels, height, width,
+ pooled_height, pooled_width, output_dim, bottom_data_diff, bottom_trans_diff,
+ bottom_data, bottom_rois, bottom_trans, no_trans, (scalar_t)trans_std, sample_per_part,
+ group_size, part_size, num_classes, channels_each_class);
+ }));
+
+ cudaError_t err = cudaGetLastError();
+ if (err != cudaSuccess)
+ {
+ printf("error in DeformablePSROIPoolForward: %s\n", cudaGetErrorString(err));
+ }
+}
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/masked_conv/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/masked_conv/__init__.py
new file mode 100644
index 000000000..f537ace08
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/masked_conv/__init__.py
@@ -0,0 +1,3 @@
+from .masked_conv import MaskedConv2d, masked_conv2d
+
+__all__ = ['masked_conv2d', 'MaskedConv2d']
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/masked_conv/masked_conv.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/masked_conv/masked_conv.py
new file mode 100644
index 000000000..7d84f503c
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/masked_conv/masked_conv.py
@@ -0,0 +1,89 @@
+import math
+
+import torch
+import torch.nn as nn
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+from torch.nn.modules.utils import _pair
+
+from . import masked_conv2d_cuda
+
+
+class MaskedConv2dFunction(Function):
+
+ @staticmethod
+ def forward(ctx, features, mask, weight, bias, padding=0, stride=1):
+ assert mask.dim() == 3 and mask.size(0) == 1
+ assert features.dim() == 4 and features.size(0) == 1
+ assert features.size()[2:] == mask.size()[1:]
+ pad_h, pad_w = _pair(padding)
+ stride_h, stride_w = _pair(stride)
+ if stride_h != 1 or stride_w != 1:
+ raise ValueError(
+ 'Stride could not only be 1 in masked_conv2d currently.')
+ if not features.is_cuda:
+ raise NotImplementedError
+
+ out_channel, in_channel, kernel_h, kernel_w = weight.size()
+
+ batch_size = features.size(0)
+ out_h = int(
+ math.floor((features.size(2) + 2 * pad_h -
+ (kernel_h - 1) - 1) / stride_h + 1))
+ out_w = int(
+ math.floor((features.size(3) + 2 * pad_w -
+ (kernel_h - 1) - 1) / stride_w + 1))
+ mask_inds = torch.nonzero(mask[0] > 0)
+ output = features.new_zeros(batch_size, out_channel, out_h, out_w)
+ if mask_inds.numel() > 0:
+ mask_h_idx = mask_inds[:, 0].contiguous()
+ mask_w_idx = mask_inds[:, 1].contiguous()
+ data_col = features.new_zeros(in_channel * kernel_h * kernel_w,
+ mask_inds.size(0))
+ masked_conv2d_cuda.masked_im2col_forward(features, mask_h_idx,
+ mask_w_idx, kernel_h,
+ kernel_w, pad_h, pad_w,
+ data_col)
+
+ masked_output = torch.addmm(1, bias[:, None], 1,
+ weight.view(out_channel, -1), data_col)
+ masked_conv2d_cuda.masked_col2im_forward(masked_output, mask_h_idx,
+ mask_w_idx, out_h, out_w,
+ out_channel, output)
+ return output
+
+ @staticmethod
+ @once_differentiable
+ def backward(ctx, grad_output):
+ return (None, ) * 5
+
+
+masked_conv2d = MaskedConv2dFunction.apply
+
+
+class MaskedConv2d(nn.Conv2d):
+ """A MaskedConv2d which inherits the official Conv2d.
+
+ The masked forward doesn't implement the backward function and only
+ supports the stride parameter to be 1 currently.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=1,
+ padding=0,
+ dilation=1,
+ groups=1,
+ bias=True):
+ super(MaskedConv2d,
+ self).__init__(in_channels, out_channels, kernel_size, stride,
+ padding, dilation, groups, bias)
+
+ def forward(self, input, mask=None):
+ if mask is None: # fallback to the normal Conv2d
+ return super(MaskedConv2d, self).forward(input)
+ else:
+ return masked_conv2d(input, mask, self.weight, self.bias,
+ self.padding)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/masked_conv/src/masked_conv2d_cuda.cpp b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/masked_conv/src/masked_conv2d_cuda.cpp
new file mode 100644
index 000000000..6e495abe3
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/masked_conv/src/masked_conv2d_cuda.cpp
@@ -0,0 +1,74 @@
+#include
+
+#include
+#include
+
+int MaskedIm2colForwardLaucher(const at::Tensor im, const int height,
+ const int width, const int channels,
+ const int kernel_h, const int kernel_w,
+ const int pad_h, const int pad_w,
+ const at::Tensor mask_h_idx,
+ const at::Tensor mask_w_idx, const int mask_cnt,
+ at::Tensor col);
+
+int MaskedCol2imForwardLaucher(const at::Tensor col, const int height,
+ const int width, const int channels,
+ const at::Tensor mask_h_idx,
+ const at::Tensor mask_w_idx, const int mask_cnt,
+ at::Tensor im);
+
+#define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x, " must be a CUDAtensor ")
+#define CHECK_CONTIGUOUS(x) \
+ TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
+#define CHECK_INPUT(x) \
+ CHECK_CUDA(x); \
+ CHECK_CONTIGUOUS(x)
+
+int masked_im2col_forward_cuda(const at::Tensor im, const at::Tensor mask_h_idx,
+ const at::Tensor mask_w_idx, const int kernel_h,
+ const int kernel_w, const int pad_h,
+ const int pad_w, at::Tensor col) {
+ CHECK_INPUT(im);
+ CHECK_INPUT(mask_h_idx);
+ CHECK_INPUT(mask_w_idx);
+ CHECK_INPUT(col);
+ // im: (n, ic, h, w), kernel size (kh, kw)
+ // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh)
+
+ int channels = im.size(1);
+ int height = im.size(2);
+ int width = im.size(3);
+ int mask_cnt = mask_h_idx.size(0);
+
+ MaskedIm2colForwardLaucher(im, height, width, channels, kernel_h, kernel_w,
+ pad_h, pad_w, mask_h_idx, mask_w_idx, mask_cnt,
+ col);
+
+ return 1;
+}
+
+int masked_col2im_forward_cuda(const at::Tensor col,
+ const at::Tensor mask_h_idx,
+ const at::Tensor mask_w_idx, int height,
+ int width, int channels, at::Tensor im) {
+ CHECK_INPUT(col);
+ CHECK_INPUT(mask_h_idx);
+ CHECK_INPUT(mask_w_idx);
+ CHECK_INPUT(im);
+ // im: (n, ic, h, w), kernel size (kh, kw)
+ // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh)
+
+ int mask_cnt = mask_h_idx.size(0);
+
+ MaskedCol2imForwardLaucher(col, height, width, channels, mask_h_idx,
+ mask_w_idx, mask_cnt, im);
+
+ return 1;
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+ m.def("masked_im2col_forward", &masked_im2col_forward_cuda,
+ "masked_im2col forward (CUDA)");
+ m.def("masked_col2im_forward", &masked_col2im_forward_cuda,
+ "masked_col2im forward (CUDA)");
+}
\ No newline at end of file
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/masked_conv/src/masked_conv2d_kernel.cu b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/masked_conv/src/masked_conv2d_kernel.cu
new file mode 100644
index 000000000..0f66eb71b
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/masked_conv/src/masked_conv2d_kernel.cu
@@ -0,0 +1,114 @@
+#include
+#include
+#include
+
+#define CUDA_1D_KERNEL_LOOP(i, n) \
+ for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
+ i += blockDim.x * gridDim.x)
+
+#define THREADS_PER_BLOCK 1024
+
+inline int GET_BLOCKS(const int N) {
+ int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
+ int max_block_num = 65000;
+ return optimal_block_num - max_block_num < 0? optimal_block_num: max_block_num;
+}
+
+template
+__global__ void MaskedIm2colForward(const int n, const scalar_t *data_im,
+ const int height, const int width,
+ const int kernel_h, const int kernel_w,
+ const int pad_h, const int pad_w,
+ const int64_t *mask_h_idx,
+ const int64_t *mask_w_idx,
+ const int mask_cnt, scalar_t *data_col) {
+ // mask_cnt * channels
+ CUDA_1D_KERNEL_LOOP(index, n) {
+ const int m_index = index % mask_cnt;
+ const int h_col = mask_h_idx[m_index];
+ const int w_col = mask_w_idx[m_index];
+ const int c_im = index / mask_cnt;
+ const int c_col = c_im * kernel_h * kernel_w;
+ const int h_offset = h_col - pad_h;
+ const int w_offset = w_col - pad_w;
+ scalar_t *data_col_ptr = data_col + c_col * mask_cnt + m_index;
+ for (int i = 0; i < kernel_h; ++i) {
+ int h_im = h_offset + i;
+ for (int j = 0; j < kernel_w; ++j) {
+ int w_im = w_offset + j;
+ if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) {
+ *data_col_ptr =
+ (scalar_t)data_im[(c_im * height + h_im) * width + w_im];
+ } else {
+ *data_col_ptr = 0.0;
+ }
+ data_col_ptr += mask_cnt;
+ }
+ }
+ }
+}
+
+int MaskedIm2colForwardLaucher(const at::Tensor bottom_data, const int height,
+ const int width, const int channels,
+ const int kernel_h, const int kernel_w,
+ const int pad_h, const int pad_w,
+ const at::Tensor mask_h_idx,
+ const at::Tensor mask_w_idx, const int mask_cnt,
+ at::Tensor top_data) {
+ const int output_size = mask_cnt * channels;
+
+ AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+ bottom_data.scalar_type(), "MaskedIm2colLaucherForward", ([&] {
+ const scalar_t *bottom_data_ = bottom_data.data();
+ const int64_t *mask_h_idx_ = mask_h_idx.data();
+ const int64_t *mask_w_idx_ = mask_w_idx.data();
+ scalar_t *top_data_ = top_data.data();
+ MaskedIm2colForward
+ <<>>(
+ output_size, bottom_data_, height, width, kernel_h, kernel_w,
+ pad_h, pad_w, mask_h_idx_, mask_w_idx_, mask_cnt, top_data_);
+ }));
+ THCudaCheck(cudaGetLastError());
+ return 1;
+}
+
+template
+__global__ void MaskedCol2imForward(const int n, const scalar_t *data_col,
+ const int height, const int width,
+ const int channels,
+ const int64_t *mask_h_idx,
+ const int64_t *mask_w_idx,
+ const int mask_cnt, scalar_t *data_im) {
+ CUDA_1D_KERNEL_LOOP(index, n) {
+ const int m_index = index % mask_cnt;
+ const int h_im = mask_h_idx[m_index];
+ const int w_im = mask_w_idx[m_index];
+ const int c_im = index / mask_cnt;
+ // compute the start and end of the output
+ data_im[(c_im * height + h_im) * width + w_im] = data_col[index];
+ }
+}
+
+int MaskedCol2imForwardLaucher(const at::Tensor bottom_data, const int height,
+ const int width, const int channels,
+ const at::Tensor mask_h_idx,
+ const at::Tensor mask_w_idx, const int mask_cnt,
+ at::Tensor top_data) {
+ const int output_size = mask_cnt * channels;
+
+ AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+ bottom_data.scalar_type(), "MaskedCol2imLaucherForward", ([&] {
+ const scalar_t *bottom_data_ = bottom_data.data();
+ const int64_t *mask_h_idx_ = mask_h_idx.data();
+ const int64_t *mask_w_idx_ = mask_w_idx.data();
+ scalar_t *top_data_ = top_data.data();
+
+ MaskedCol2imForward
+ <<>>(
+ output_size, bottom_data_, height, width, channels, mask_h_idx_,
+ mask_w_idx_, mask_cnt, top_data_);
+ }));
+ THCudaCheck(cudaGetLastError());
+ return 1;
+}
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/__init__.py
new file mode 100644
index 000000000..c4407041a
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/__init__.py
@@ -0,0 +1,3 @@
+from .nms_wrapper import nms, soft_nms
+
+__all__ = ['nms', 'soft_nms']
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/nms_wrapper.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/nms_wrapper.py
new file mode 100644
index 000000000..b82e49345
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/nms_wrapper.py
@@ -0,0 +1,102 @@
+import numpy as np
+import torch
+
+from . import nms_cpu, nms_cuda
+from .soft_nms_cpu import soft_nms_cpu
+
+
+def nms(dets, iou_thr, device_id=None):
+ """Dispatch to either CPU or GPU NMS implementations.
+
+ The input can be either a torch tensor or numpy array. GPU NMS will be used
+ if the input is a gpu tensor or device_id is specified, otherwise CPU NMS
+ will be used. The returned type will always be the same as inputs.
+
+ Arguments:
+ dets (torch.Tensor or np.ndarray): bboxes with scores.
+ iou_thr (float): IoU threshold for NMS.
+ device_id (int, optional): when `dets` is a numpy array, if `device_id`
+ is None, then cpu nms is used, otherwise gpu_nms will be used.
+
+ Returns:
+ tuple: kept bboxes and indice, which is always the same data type as
+ the input.
+
+ Example:
+ >>> dets = np.array([[49.1, 32.4, 51.0, 35.9, 0.9],
+ >>> [49.3, 32.9, 51.0, 35.3, 0.9],
+ >>> [49.2, 31.8, 51.0, 35.4, 0.5],
+ >>> [35.1, 11.5, 39.1, 15.7, 0.5],
+ >>> [35.6, 11.8, 39.3, 14.2, 0.5],
+ >>> [35.3, 11.5, 39.9, 14.5, 0.4],
+ >>> [35.2, 11.7, 39.7, 15.7, 0.3]], dtype=np.float32)
+ >>> iou_thr = 0.7
+ >>> supressed, inds = nms(dets, iou_thr)
+ >>> assert len(inds) == len(supressed) == 3
+ """
+ # convert dets (tensor or numpy array) to tensor
+ if isinstance(dets, torch.Tensor):
+ is_numpy = False
+ dets_th = dets
+ elif isinstance(dets, np.ndarray):
+ is_numpy = True
+ device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id)
+ dets_th = torch.from_numpy(dets).to(device)
+ else:
+ raise TypeError(
+ 'dets must be either a Tensor or numpy array, but got {}'.format(
+ type(dets)))
+
+ # execute cpu or cuda nms
+ if dets_th.shape[0] == 0:
+ inds = dets_th.new_zeros(0, dtype=torch.long)
+ else:
+ if dets_th.is_cuda:
+ inds = nms_cuda.nms(dets_th, iou_thr)
+ else:
+ inds = nms_cpu.nms(dets_th, iou_thr)
+
+ if is_numpy:
+ inds = inds.cpu().numpy()
+ return dets[inds, :], inds
+
+
+def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3):
+ """
+ Example:
+ >>> dets = np.array([[4., 3., 5., 3., 0.9],
+ >>> [4., 3., 5., 4., 0.9],
+ >>> [3., 1., 3., 1., 0.5],
+ >>> [3., 1., 3., 1., 0.5],
+ >>> [3., 1., 3., 1., 0.4],
+ >>> [3., 1., 3., 1., 0.0]], dtype=np.float32)
+ >>> iou_thr = 0.7
+ >>> supressed, inds = soft_nms(dets, iou_thr, sigma=0.5)
+ >>> assert len(inds) == len(supressed) == 3
+ """
+ if isinstance(dets, torch.Tensor):
+ is_tensor = True
+ dets_np = dets.detach().cpu().numpy()
+ elif isinstance(dets, np.ndarray):
+ is_tensor = False
+ dets_np = dets
+ else:
+ raise TypeError(
+ 'dets must be either a Tensor or numpy array, but got {}'.format(
+ type(dets)))
+
+ method_codes = {'linear': 1, 'gaussian': 2}
+ if method not in method_codes:
+ raise ValueError('Invalid method for SoftNMS: {}'.format(method))
+ new_dets, inds = soft_nms_cpu(
+ dets_np,
+ iou_thr,
+ method=method_codes[method],
+ sigma=sigma,
+ min_score=min_score)
+
+ if is_tensor:
+ return dets.new_tensor(new_dets), dets.new_tensor(
+ inds, dtype=torch.long)
+ else:
+ return new_dets.astype(np.float32), inds.astype(np.int64)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/src/nms_cpu.cpp b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/src/nms_cpu.cpp
new file mode 100644
index 000000000..f7cffb490
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/src/nms_cpu.cpp
@@ -0,0 +1,71 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+#include
+
+template
+at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) {
+ AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
+
+ if (dets.numel() == 0) {
+ return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
+ }
+
+ auto x1_t = dets.select(1, 0).contiguous();
+ auto y1_t = dets.select(1, 1).contiguous();
+ auto x2_t = dets.select(1, 2).contiguous();
+ auto y2_t = dets.select(1, 3).contiguous();
+ auto scores = dets.select(1, 4).contiguous();
+
+ at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
+
+ auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
+
+ auto ndets = dets.size(0);
+ at::Tensor suppressed_t =
+ at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
+
+ auto suppressed = suppressed_t.data();
+ auto order = order_t.data();
+ auto x1 = x1_t.data();
+ auto y1 = y1_t.data();
+ auto x2 = x2_t.data();
+ auto y2 = y2_t.data();
+ auto areas = areas_t.data();
+
+ for (int64_t _i = 0; _i < ndets; _i++) {
+ auto i = order[_i];
+ if (suppressed[i] == 1) continue;
+ auto ix1 = x1[i];
+ auto iy1 = y1[i];
+ auto ix2 = x2[i];
+ auto iy2 = y2[i];
+ auto iarea = areas[i];
+
+ for (int64_t _j = _i + 1; _j < ndets; _j++) {
+ auto j = order[_j];
+ if (suppressed[j] == 1) continue;
+ auto xx1 = std::max(ix1, x1[j]);
+ auto yy1 = std::max(iy1, y1[j]);
+ auto xx2 = std::min(ix2, x2[j]);
+ auto yy2 = std::min(iy2, y2[j]);
+
+ auto w = std::max(static_cast(0), xx2 - xx1 + 1);
+ auto h = std::max(static_cast(0), yy2 - yy1 + 1);
+ auto inter = w * h;
+ auto ovr = inter / (iarea + areas[j] - inter);
+ if (ovr >= threshold) suppressed[j] = 1;
+ }
+ }
+ return at::nonzero(suppressed_t == 0).squeeze(1);
+}
+
+at::Tensor nms(const at::Tensor& dets, const float threshold) {
+ at::Tensor result;
+ AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] {
+ result = nms_cpu_kernel(dets, threshold);
+ });
+ return result;
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+ m.def("nms", &nms, "non-maximum suppression");
+}
\ No newline at end of file
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/src/nms_cuda.cpp b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/src/nms_cuda.cpp
new file mode 100644
index 000000000..2ac6cd23f
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/src/nms_cuda.cpp
@@ -0,0 +1,17 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+#include
+
+#define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x, " must be a CUDAtensor ")
+
+at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
+
+at::Tensor nms(const at::Tensor& dets, const float threshold) {
+ CHECK_CUDA(dets);
+ if (dets.numel() == 0)
+ return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
+ return nms_cuda(dets, threshold);
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+ m.def("nms", &nms, "non-maximum suppression");
+}
\ No newline at end of file
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/src/nms_kernel.cu b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/src/nms_kernel.cu
new file mode 100644
index 000000000..ada9bea25
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/src/nms_kernel.cu
@@ -0,0 +1,139 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+#include
+#include
+#include
+
+#include
+#include
+
+#include
+#include
+
+int const threadsPerBlock = sizeof(unsigned long long) * 8;
+
+__device__ inline float devIoU(float const * const a, float const * const b) {
+ float left = max(a[0], b[0]), right = min(a[2], b[2]);
+ float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
+ float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
+ float interS = width * height;
+ float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
+ float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
+ return interS / (Sa + Sb - interS);
+}
+
+__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
+ const float *dev_boxes, unsigned long long *dev_mask) {
+ const int row_start = blockIdx.y;
+ const int col_start = blockIdx.x;
+
+ // if (row_start > col_start) return;
+
+ const int row_size =
+ min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
+ const int col_size =
+ min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
+
+ __shared__ float block_boxes[threadsPerBlock * 5];
+ if (threadIdx.x < col_size) {
+ block_boxes[threadIdx.x * 5 + 0] =
+ dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
+ block_boxes[threadIdx.x * 5 + 1] =
+ dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
+ block_boxes[threadIdx.x * 5 + 2] =
+ dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
+ block_boxes[threadIdx.x * 5 + 3] =
+ dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
+ block_boxes[threadIdx.x * 5 + 4] =
+ dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
+ }
+ __syncthreads();
+
+ if (threadIdx.x < row_size) {
+ const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
+ const float *cur_box = dev_boxes + cur_box_idx * 5;
+ int i = 0;
+ unsigned long long t = 0;
+ int start = 0;
+ if (row_start == col_start) {
+ start = threadIdx.x + 1;
+ }
+ for (i = start; i < col_size; i++) {
+ if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
+ t |= 1ULL << i;
+ }
+ }
+ const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);
+ dev_mask[cur_box_idx * col_blocks + col_start] = t;
+ }
+}
+
+// boxes is a N x 5 tensor
+at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {
+
+ // Ensure CUDA uses the input tensor device.
+ at::DeviceGuard guard(boxes.device());
+
+ using scalar_t = float;
+ AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor");
+ auto scores = boxes.select(1, 4);
+ auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
+ auto boxes_sorted = boxes.index_select(0, order_t);
+
+ int boxes_num = boxes.size(0);
+
+ const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
+
+ scalar_t* boxes_dev = boxes_sorted.data();
+
+ THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
+
+ unsigned long long* mask_dev = NULL;
+ //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,
+ // boxes_num * col_blocks * sizeof(unsigned long long)));
+
+ mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
+
+ dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),
+ THCCeilDiv(boxes_num, threadsPerBlock));
+ dim3 threads(threadsPerBlock);
+ nms_kernel<<>>(boxes_num,
+ nms_overlap_thresh,
+ boxes_dev,
+ mask_dev);
+
+ std::vector mask_host(boxes_num * col_blocks);
+ THCudaCheck(cudaMemcpyAsync(
+ &mask_host[0],
+ mask_dev,
+ sizeof(unsigned long long) * boxes_num * col_blocks,
+ cudaMemcpyDeviceToHost,
+ at::cuda::getCurrentCUDAStream()
+ ));
+
+ std::vector remv(col_blocks);
+ memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
+
+ at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));
+ int64_t* keep_out = keep.data();
+
+ int num_to_keep = 0;
+ for (int i = 0; i < boxes_num; i++) {
+ int nblock = i / threadsPerBlock;
+ int inblock = i % threadsPerBlock;
+
+ if (!(remv[nblock] & (1ULL << inblock))) {
+ keep_out[num_to_keep++] = i;
+ unsigned long long *p = &mask_host[0] + i * col_blocks;
+ for (int j = nblock; j < col_blocks; j++) {
+ remv[j] |= p[j];
+ }
+ }
+ }
+
+ THCudaFree(state, mask_dev);
+ // TODO improve this part
+ return std::get<0>(order_t.index({
+ keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(
+ order_t.device(), keep.scalar_type())
+ }).sort(0, false));
+}
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/src/soft_nms_cpu.pyx b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/src/soft_nms_cpu.pyx
new file mode 100644
index 000000000..97f53f18d
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/src/soft_nms_cpu.pyx
@@ -0,0 +1,127 @@
+# ----------------------------------------------------------
+# Soft-NMS: Improving Object Detection With One Line of Code
+# Copyright (c) University of Maryland, College Park
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Navaneeth Bodla and Bharat Singh
+# Modified by Kai Chen
+# ----------------------------------------------------------
+
+# cython: language_level=3, boundscheck=False
+
+import numpy as np
+cimport numpy as np
+
+
+cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
+ return a if a >= b else b
+
+cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
+ return a if a <= b else b
+
+
+def soft_nms_cpu(
+ np.ndarray[float, ndim=2] boxes_in,
+ float iou_thr,
+ unsigned int method=1,
+ float sigma=0.5,
+ float min_score=0.001,
+):
+ boxes = boxes_in.copy()
+ cdef int N = boxes.shape[0]
+ cdef float iw, ih, box_area
+ cdef float ua
+ cdef int pos = 0
+ cdef float maxscore = 0
+ cdef int maxpos = 0
+ cdef float x1, x2, y1, y2, tx1, tx2, ty1, ty2, ts, area, weight, ov
+ inds = np.arange(N)
+
+ for i in range(N):
+ maxscore = boxes[i, 4]
+ maxpos = i
+
+ tx1 = boxes[i, 0]
+ ty1 = boxes[i, 1]
+ tx2 = boxes[i, 2]
+ ty2 = boxes[i, 3]
+ ts = boxes[i, 4]
+ ti = inds[i]
+
+ pos = i + 1
+ # get max box
+ while pos < N:
+ if maxscore < boxes[pos, 4]:
+ maxscore = boxes[pos, 4]
+ maxpos = pos
+ pos = pos + 1
+
+ # add max box as a detection
+ boxes[i, 0] = boxes[maxpos, 0]
+ boxes[i, 1] = boxes[maxpos, 1]
+ boxes[i, 2] = boxes[maxpos, 2]
+ boxes[i, 3] = boxes[maxpos, 3]
+ boxes[i, 4] = boxes[maxpos, 4]
+ inds[i] = inds[maxpos]
+
+ # swap ith box with position of max box
+ boxes[maxpos, 0] = tx1
+ boxes[maxpos, 1] = ty1
+ boxes[maxpos, 2] = tx2
+ boxes[maxpos, 3] = ty2
+ boxes[maxpos, 4] = ts
+ inds[maxpos] = ti
+
+ tx1 = boxes[i, 0]
+ ty1 = boxes[i, 1]
+ tx2 = boxes[i, 2]
+ ty2 = boxes[i, 3]
+ ts = boxes[i, 4]
+
+ pos = i + 1
+ # NMS iterations, note that N changes if detection boxes fall below
+ # threshold
+ while pos < N:
+ x1 = boxes[pos, 0]
+ y1 = boxes[pos, 1]
+ x2 = boxes[pos, 2]
+ y2 = boxes[pos, 3]
+ s = boxes[pos, 4]
+
+ area = (x2 - x1 + 1) * (y2 - y1 + 1)
+ iw = (min(tx2, x2) - max(tx1, x1) + 1)
+ if iw > 0:
+ ih = (min(ty2, y2) - max(ty1, y1) + 1)
+ if ih > 0:
+ ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
+ ov = iw * ih / ua # iou between max box and detection box
+
+ if method == 1: # linear
+ if ov > iou_thr:
+ weight = 1 - ov
+ else:
+ weight = 1
+ elif method == 2: # gaussian
+ weight = np.exp(-(ov * ov) / sigma)
+ else: # original NMS
+ if ov > iou_thr:
+ weight = 0
+ else:
+ weight = 1
+
+ boxes[pos, 4] = weight * boxes[pos, 4]
+
+ # if box score falls below threshold, discard the box by
+ # swapping with last box update N
+ if boxes[pos, 4] < min_score:
+ boxes[pos, 0] = boxes[N-1, 0]
+ boxes[pos, 1] = boxes[N-1, 1]
+ boxes[pos, 2] = boxes[N-1, 2]
+ boxes[pos, 3] = boxes[N-1, 3]
+ boxes[pos, 4] = boxes[N-1, 4]
+ inds[pos] = inds[N - 1]
+ N = N - 1
+ pos = pos - 1
+
+ pos = pos + 1
+
+ return boxes[:N], inds[:N]
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_align/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_align/__init__.py
new file mode 100644
index 000000000..6da98298f
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_align/__init__.py
@@ -0,0 +1,3 @@
+from .roi_align import RoIAlign, roi_align
+
+__all__ = ['roi_align', 'RoIAlign']
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_align/gradcheck.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_align/gradcheck.py
new file mode 100644
index 000000000..136456b39
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_align/gradcheck.py
@@ -0,0 +1,30 @@
+import os.path as osp
+import sys
+
+import numpy as np
+import torch
+from torch.autograd import gradcheck
+
+sys.path.append(osp.abspath(osp.join(__file__, '../../')))
+from roi_align import RoIAlign # noqa: E402, isort:skip
+
+feat_size = 15
+spatial_scale = 1.0 / 8
+img_size = feat_size / spatial_scale
+num_imgs = 2
+num_rois = 20
+
+batch_ind = np.random.randint(num_imgs, size=(num_rois, 1))
+rois = np.random.rand(num_rois, 4) * img_size * 0.5
+rois[:, 2:] += img_size * 0.5
+rois = np.hstack((batch_ind, rois))
+
+feat = torch.randn(
+ num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0')
+rois = torch.from_numpy(rois).float().cuda()
+inputs = (feat, rois)
+print('Gradcheck for roi align...')
+test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3)
+print(test)
+test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3)
+print(test)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_align/roi_align.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_align/roi_align.py
new file mode 100644
index 000000000..a4cf24459
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_align/roi_align.py
@@ -0,0 +1,87 @@
+import torch.nn as nn
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+from torch.nn.modules.utils import _pair
+
+from . import roi_align_cuda
+
+
+class RoIAlignFunction(Function):
+
+ @staticmethod
+ def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0):
+ out_h, out_w = _pair(out_size)
+ assert isinstance(out_h, int) and isinstance(out_w, int)
+ ctx.spatial_scale = spatial_scale
+ ctx.sample_num = sample_num
+ ctx.save_for_backward(rois)
+ ctx.feature_size = features.size()
+
+ batch_size, num_channels, data_height, data_width = features.size()
+ num_rois = rois.size(0)
+
+ output = features.new_zeros(num_rois, num_channels, out_h, out_w)
+ if features.is_cuda:
+ roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale,
+ sample_num, output)
+ else:
+ raise NotImplementedError
+
+ return output
+
+ @staticmethod
+ @once_differentiable
+ def backward(ctx, grad_output):
+ feature_size = ctx.feature_size
+ spatial_scale = ctx.spatial_scale
+ sample_num = ctx.sample_num
+ rois = ctx.saved_tensors[0]
+ assert (feature_size is not None and grad_output.is_cuda)
+
+ batch_size, num_channels, data_height, data_width = feature_size
+ out_w = grad_output.size(3)
+ out_h = grad_output.size(2)
+
+ grad_input = grad_rois = None
+ if ctx.needs_input_grad[0]:
+ grad_input = rois.new_zeros(batch_size, num_channels, data_height,
+ data_width)
+ roi_align_cuda.backward(grad_output.contiguous(), rois, out_h,
+ out_w, spatial_scale, sample_num,
+ grad_input)
+
+ return grad_input, grad_rois, None, None, None
+
+
+roi_align = RoIAlignFunction.apply
+
+
+class RoIAlign(nn.Module):
+
+ def __init__(self,
+ out_size,
+ spatial_scale,
+ sample_num=0,
+ use_torchvision=False):
+ super(RoIAlign, self).__init__()
+
+ self.out_size = _pair(out_size)
+ self.spatial_scale = float(spatial_scale)
+ self.sample_num = int(sample_num)
+ self.use_torchvision = use_torchvision
+
+ def forward(self, features, rois):
+ if self.use_torchvision:
+ from torchvision.ops import roi_align as tv_roi_align
+ return tv_roi_align(features, rois, self.out_size,
+ self.spatial_scale, self.sample_num)
+ else:
+ return roi_align(features, rois, self.out_size, self.spatial_scale,
+ self.sample_num)
+
+ def __repr__(self):
+ format_str = self.__class__.__name__
+ format_str += '(out_size={}, spatial_scale={}, sample_num={}'.format(
+ self.out_size, self.spatial_scale, self.sample_num)
+ format_str += ', use_torchvision={})'.format(self.use_torchvision)
+ return format_str
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_align/src/roi_align_cuda.cpp b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_align/src/roi_align_cuda.cpp
new file mode 100644
index 000000000..66a557252
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_align/src/roi_align_cuda.cpp
@@ -0,0 +1,87 @@
+#include
+
+#include
+
+#include
+#include
+
+int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois,
+ const float spatial_scale, const int sample_num,
+ const int channels, const int height,
+ const int width, const int num_rois,
+ const int pooled_height, const int pooled_width,
+ at::Tensor output);
+
+int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
+ const float spatial_scale, const int sample_num,
+ const int channels, const int height,
+ const int width, const int num_rois,
+ const int pooled_height, const int pooled_width,
+ at::Tensor bottom_grad);
+
+#define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x, " must be a CUDAtensor ")
+#define CHECK_CONTIGUOUS(x) \
+ TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
+#define CHECK_INPUT(x) \
+ CHECK_CUDA(x); \
+ CHECK_CONTIGUOUS(x)
+
+int roi_align_forward_cuda(at::Tensor features, at::Tensor rois,
+ int pooled_height, int pooled_width,
+ float spatial_scale, int sample_num,
+ at::Tensor output) {
+ CHECK_INPUT(features);
+ CHECK_INPUT(rois);
+ CHECK_INPUT(output);
+
+ // Number of ROIs
+ int num_rois = rois.size(0);
+ int size_rois = rois.size(1);
+
+ if (size_rois != 5) {
+ printf("wrong roi size\n");
+ return 0;
+ }
+
+ int num_channels = features.size(1);
+ int data_height = features.size(2);
+ int data_width = features.size(3);
+
+ ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num,
+ num_channels, data_height, data_width, num_rois,
+ pooled_height, pooled_width, output);
+
+ return 1;
+}
+
+int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois,
+ int pooled_height, int pooled_width,
+ float spatial_scale, int sample_num,
+ at::Tensor bottom_grad) {
+ CHECK_INPUT(top_grad);
+ CHECK_INPUT(rois);
+ CHECK_INPUT(bottom_grad);
+
+ // Number of ROIs
+ int num_rois = rois.size(0);
+ int size_rois = rois.size(1);
+ if (size_rois != 5) {
+ printf("wrong roi size\n");
+ return 0;
+ }
+
+ int num_channels = bottom_grad.size(1);
+ int data_height = bottom_grad.size(2);
+ int data_width = bottom_grad.size(3);
+
+ ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num,
+ num_channels, data_height, data_width, num_rois,
+ pooled_height, pooled_width, bottom_grad);
+
+ return 1;
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+ m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)");
+ m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)");
+}
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_align/src/roi_align_kernel.cu b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_align/src/roi_align_kernel.cu
new file mode 100644
index 000000000..038fc23e0
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_align/src/roi_align_kernel.cu
@@ -0,0 +1,283 @@
+#include
+#include
+#include
+
+#define CUDA_1D_KERNEL_LOOP(i, n) \
+ for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
+ i += blockDim.x * gridDim.x)
+
+#define THREADS_PER_BLOCK 1024
+
+inline int GET_BLOCKS(const int N) {
+ int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
+ int max_block_num = 65000;
+ return optimal_block_num - max_block_num < 0? optimal_block_num: max_block_num;
+}
+
+template
+__device__ scalar_t bilinear_interpolate(const scalar_t *bottom_data,
+ const int height, const int width,
+ scalar_t y, scalar_t x) {
+ // deal with cases that inverse elements are out of feature map boundary
+ if (y < -1.0 || y > height || x < -1.0 || x > width) {
+ return 0;
+ }
+
+ if (y <= 0) y = 0;
+ if (x <= 0) x = 0;
+
+ int y_low = (int)y;
+ int x_low = (int)x;
+ int y_high;
+ int x_high;
+
+ if (y_low >= height - 1) {
+ y_high = y_low = height - 1;
+ y = (scalar_t)y_low;
+ } else {
+ y_high = y_low + 1;
+ }
+
+ if (x_low >= width - 1) {
+ x_high = x_low = width - 1;
+ x = (scalar_t)x_low;
+ } else {
+ x_high = x_low + 1;
+ }
+
+ scalar_t ly = y - y_low;
+ scalar_t lx = x - x_low;
+ scalar_t hy = 1. - ly;
+ scalar_t hx = 1. - lx;
+ // do bilinear interpolation
+ scalar_t lt = bottom_data[y_low * width + x_low];
+ scalar_t rt = bottom_data[y_low * width + x_high];
+ scalar_t lb = bottom_data[y_high * width + x_low];
+ scalar_t rb = bottom_data[y_high * width + x_high];
+ scalar_t w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+
+ scalar_t val = (w1 * lt + w2 * rt + w3 * lb + w4 * rb);
+
+ return val;
+}
+
+template
+__global__ void ROIAlignForward(const int nthreads, const scalar_t *bottom_data,
+ const scalar_t *bottom_rois,
+ const scalar_t spatial_scale,
+ const int sample_num, const int channels,
+ const int height, const int width,
+ const int pooled_height, const int pooled_width,
+ scalar_t *top_data) {
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ // (n, c, ph, pw) is an element in the aligned output
+ int pw = index % pooled_width;
+ int ph = (index / pooled_width) % pooled_height;
+ int c = (index / pooled_width / pooled_height) % channels;
+ int n = index / pooled_width / pooled_height / channels;
+
+ const scalar_t *offset_bottom_rois = bottom_rois + n * 5;
+ int roi_batch_ind = offset_bottom_rois[0];
+ scalar_t roi_start_w = offset_bottom_rois[1] * spatial_scale;
+ scalar_t roi_start_h = offset_bottom_rois[2] * spatial_scale;
+ scalar_t roi_end_w = (offset_bottom_rois[3] + 1) * spatial_scale;
+ scalar_t roi_end_h = (offset_bottom_rois[4] + 1) * spatial_scale;
+
+ // Force malformed ROIs to be 1x1
+ scalar_t roi_width = fmaxf((scalar_t)roi_end_w - roi_start_w, 0.);
+ scalar_t roi_height = fmaxf((scalar_t)roi_end_h - roi_start_h, 0.);
+
+ scalar_t bin_size_h = roi_height / pooled_height;
+ scalar_t bin_size_w = roi_width / pooled_width;
+
+ const scalar_t *offset_bottom_data =
+ bottom_data + (roi_batch_ind * channels + c) * height * width;
+
+ int sample_num_h = (sample_num > 0)
+ ? sample_num
+ : ceil(roi_height / pooled_height); // e.g., = 2
+ int sample_num_w =
+ (sample_num > 0) ? sample_num : ceil(roi_width / pooled_width);
+
+ scalar_t output_val = 0;
+ for (int iy = 0; iy < sample_num_h; iy++) {
+ const scalar_t y = roi_start_h + ph * bin_size_h +
+ (scalar_t)(iy + scalar_t(.5f)) * bin_size_h /
+ (scalar_t)(sample_num_h);
+ for (int ix = 0; ix < sample_num_w; ix++) {
+ const scalar_t x = roi_start_w + pw * bin_size_w +
+ (scalar_t)(ix + scalar_t(.5f)) * bin_size_w /
+ (scalar_t)(sample_num_w);
+ scalar_t val = bilinear_interpolate(offset_bottom_data,
+ height, width, y, x);
+ output_val += val;
+ }
+ }
+ output_val /= (sample_num_h * sample_num_w);
+ top_data[index] = output_val;
+ }
+}
+
+int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois,
+ const float spatial_scale, const int sample_num,
+ const int channels, const int height,
+ const int width, const int num_rois,
+ const int pooled_height, const int pooled_width,
+ at::Tensor output) {
+ const int output_size = num_rois * pooled_height * pooled_width * channels;
+ AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+ features.scalar_type(), "ROIAlignLaucherForward", ([&] {
+ const scalar_t *bottom_data = features.data();
+ const scalar_t *rois_data = rois.data();
+ scalar_t *top_data = output.data();
+
+ ROIAlignForward
+ <<>>(
+ output_size, bottom_data, rois_data, scalar_t(spatial_scale),
+ sample_num, channels, height, width, pooled_height,
+ pooled_width, top_data);
+ }));
+ THCudaCheck(cudaGetLastError());
+ return 1;
+}
+
+template
+__device__ void bilinear_interpolate_gradient(const int height, const int width,
+ scalar_t y, scalar_t x,
+ scalar_t &w1, scalar_t &w2,
+ scalar_t &w3, scalar_t &w4,
+ int &x_low, int &x_high,
+ int &y_low, int &y_high) {
+ // deal with cases that inverse elements are out of feature map boundary
+ if (y < -1.0 || y > height || x < -1.0 || x > width) {
+ w1 = w2 = w3 = w4 = 0.;
+ x_low = x_high = y_low = y_high = -1;
+ return;
+ }
+
+ if (y <= 0) y = 0;
+ if (x <= 0) x = 0;
+
+ y_low = (int)y;
+ x_low = (int)x;
+
+ if (y_low >= height - 1) {
+ y_high = y_low = height - 1;
+ y = (scalar_t)y_low;
+ } else {
+ y_high = y_low + 1;
+ }
+
+ if (x_low >= width - 1) {
+ x_high = x_low = width - 1;
+ x = (scalar_t)x_low;
+ } else {
+ x_high = x_low + 1;
+ }
+
+ scalar_t ly = y - y_low;
+ scalar_t lx = x - x_low;
+ scalar_t hy = 1. - ly;
+ scalar_t hx = 1. - lx;
+
+ w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+
+ return;
+}
+
+template
+__global__ void ROIAlignBackward(
+ const int nthreads, const scalar_t *top_diff, const scalar_t *bottom_rois,
+ const scalar_t spatial_scale, const int sample_num, const int channels,
+ const int height, const int width, const int pooled_height,
+ const int pooled_width, scalar_t *bottom_diff) {
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ // (n, c, ph, pw) is an element in the aligned output
+ int pw = index % pooled_width;
+ int ph = (index / pooled_width) % pooled_height;
+ int c = (index / pooled_width / pooled_height) % channels;
+ int n = index / pooled_width / pooled_height / channels;
+
+ const scalar_t *offset_bottom_rois = bottom_rois + n * 5;
+ int roi_batch_ind = offset_bottom_rois[0];
+ scalar_t roi_start_w = offset_bottom_rois[1] * spatial_scale;
+ scalar_t roi_start_h = offset_bottom_rois[2] * spatial_scale;
+ scalar_t roi_end_w = (offset_bottom_rois[3] + 1) * spatial_scale;
+ scalar_t roi_end_h = (offset_bottom_rois[4] + 1) * spatial_scale;
+
+ // Force malformed ROIs to be 1x1
+ scalar_t roi_width = fmaxf((scalar_t)roi_end_w - roi_start_w, 0.);
+ scalar_t roi_height = fmaxf((scalar_t)roi_end_h - roi_start_h, 0.);
+
+ scalar_t bin_size_h = roi_height / pooled_height;
+ scalar_t bin_size_w = roi_width / pooled_width;
+
+ scalar_t *offset_bottom_diff =
+ bottom_diff + (roi_batch_ind * channels + c) * height * width;
+ int offset_top = (n * channels + c) * pooled_height * pooled_width +
+ ph * pooled_width + pw;
+ scalar_t offset_top_diff = top_diff[offset_top];
+
+ int sample_num_h = (sample_num > 0)
+ ? sample_num
+ : ceil(roi_height / pooled_height); // e.g., = 2
+ int sample_num_w =
+ (sample_num > 0) ? sample_num : ceil(roi_width / pooled_width);
+
+ const scalar_t count = (scalar_t)(sample_num_h * sample_num_w);
+
+ for (int iy = 0; iy < sample_num_h; iy++) {
+ const scalar_t y =
+ roi_start_h + ph * bin_size_h +
+ (scalar_t)(iy + .5f) * bin_size_h / (scalar_t)(sample_num_h);
+ for (int ix = 0; ix < sample_num_w; ix++) {
+ const scalar_t x =
+ roi_start_w + pw * bin_size_w +
+ (scalar_t)(ix + .5f) * bin_size_w / (scalar_t)(sample_num_w);
+ scalar_t w1, w2, w3, w4;
+ int x_low, x_high, y_low, y_high;
+
+ bilinear_interpolate_gradient(
+ height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high);
+ scalar_t g1 = offset_top_diff * w1 / count;
+ scalar_t g2 = offset_top_diff * w2 / count;
+ scalar_t g3 = offset_top_diff * w3 / count;
+ scalar_t g4 = offset_top_diff * w4 / count;
+ if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
+ atomicAdd(offset_bottom_diff + y_low * width + x_low, g1);
+ atomicAdd(offset_bottom_diff + y_low * width + x_high, g2);
+ atomicAdd(offset_bottom_diff + y_high * width + x_low, g3);
+ atomicAdd(offset_bottom_diff + y_high * width + x_high, g4);
+ }
+ }
+ }
+ }
+}
+
+int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
+ const float spatial_scale, const int sample_num,
+ const int channels, const int height,
+ const int width, const int num_rois,
+ const int pooled_height, const int pooled_width,
+ at::Tensor bottom_grad) {
+ const int output_size = num_rois * pooled_height * pooled_width * channels;
+
+ AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+ top_grad.scalar_type(), "ROIAlignLaucherBackward", ([&] {
+ const scalar_t *top_diff = top_grad.data();
+ const scalar_t *rois_data = rois.data();
+ scalar_t *bottom_diff = bottom_grad.data();
+ if (sizeof(scalar_t) == sizeof(double)) {
+ fprintf(stderr, "double is not supported\n");
+ exit(-1);
+ }
+
+ ROIAlignBackward
+ <<>>(
+ output_size, top_diff, rois_data, spatial_scale, sample_num,
+ channels, height, width, pooled_height, pooled_width,
+ bottom_diff);
+ }));
+ THCudaCheck(cudaGetLastError());
+ return 1;
+}
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_pool/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_pool/__init__.py
new file mode 100644
index 000000000..9f0474e59
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_pool/__init__.py
@@ -0,0 +1,3 @@
+from .roi_pool import RoIPool, roi_pool
+
+__all__ = ['roi_pool', 'RoIPool']
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_pool/gradcheck.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_pool/gradcheck.py
new file mode 100644
index 000000000..d11af7902
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_pool/gradcheck.py
@@ -0,0 +1,16 @@
+import os.path as osp
+import sys
+
+import torch
+from torch.autograd import gradcheck
+
+sys.path.append(osp.abspath(osp.join(__file__, '../../')))
+from roi_pool import RoIPool # noqa: E402, isort:skip
+
+feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda()
+rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55],
+ [1, 67, 40, 110, 120]]).cuda()
+inputs = (feat, rois)
+print('Gradcheck for roi pooling...')
+test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3)
+print(test)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_pool/roi_pool.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_pool/roi_pool.py
new file mode 100644
index 000000000..26d900f78
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_pool/roi_pool.py
@@ -0,0 +1,75 @@
+import torch
+import torch.nn as nn
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+from torch.nn.modules.utils import _pair
+
+from . import roi_pool_cuda
+
+
+class RoIPoolFunction(Function):
+
+ @staticmethod
+ def forward(ctx, features, rois, out_size, spatial_scale):
+ assert features.is_cuda
+ out_h, out_w = _pair(out_size)
+ assert isinstance(out_h, int) and isinstance(out_w, int)
+ ctx.save_for_backward(rois)
+ num_channels = features.size(1)
+ num_rois = rois.size(0)
+ out_size = (num_rois, num_channels, out_h, out_w)
+ output = features.new_zeros(out_size)
+ argmax = features.new_zeros(out_size, dtype=torch.int)
+ roi_pool_cuda.forward(features, rois, out_h, out_w, spatial_scale,
+ output, argmax)
+ ctx.spatial_scale = spatial_scale
+ ctx.feature_size = features.size()
+ ctx.argmax = argmax
+
+ return output
+
+ @staticmethod
+ @once_differentiable
+ def backward(ctx, grad_output):
+ assert grad_output.is_cuda
+ spatial_scale = ctx.spatial_scale
+ feature_size = ctx.feature_size
+ argmax = ctx.argmax
+ rois = ctx.saved_tensors[0]
+ assert feature_size is not None
+
+ grad_input = grad_rois = None
+ if ctx.needs_input_grad[0]:
+ grad_input = grad_output.new_zeros(feature_size)
+ roi_pool_cuda.backward(grad_output.contiguous(), rois, argmax,
+ spatial_scale, grad_input)
+
+ return grad_input, grad_rois, None, None
+
+
+roi_pool = RoIPoolFunction.apply
+
+
+class RoIPool(nn.Module):
+
+ def __init__(self, out_size, spatial_scale, use_torchvision=False):
+ super(RoIPool, self).__init__()
+
+ self.out_size = _pair(out_size)
+ self.spatial_scale = float(spatial_scale)
+ self.use_torchvision = use_torchvision
+
+ def forward(self, features, rois):
+ if self.use_torchvision:
+ from torchvision.ops import roi_pool as tv_roi_pool
+ return tv_roi_pool(features, rois, self.out_size,
+ self.spatial_scale)
+ else:
+ return roi_pool(features, rois, self.out_size, self.spatial_scale)
+
+ def __repr__(self):
+ format_str = self.__class__.__name__
+ format_str += '(out_size={}, spatial_scale={}'.format(
+ self.out_size, self.spatial_scale)
+ format_str += ', use_torchvision={})'.format(self.use_torchvision)
+ return format_str
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp
new file mode 100644
index 000000000..740c6fdcf
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp
@@ -0,0 +1,86 @@
+#include
+
+#include
+#include
+
+int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois,
+ const float spatial_scale, const int channels,
+ const int height, const int width, const int num_rois,
+ const int pooled_h, const int pooled_w,
+ at::Tensor output, at::Tensor argmax);
+
+int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
+ const at::Tensor argmax, const float spatial_scale,
+ const int batch_size, const int channels,
+ const int height, const int width,
+ const int num_rois, const int pooled_h,
+ const int pooled_w, at::Tensor bottom_grad);
+
+#define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x, " must be a CUDAtensor ")
+#define CHECK_CONTIGUOUS(x) \
+ TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
+#define CHECK_INPUT(x) \
+ CHECK_CUDA(x); \
+ CHECK_CONTIGUOUS(x)
+
+int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois,
+ int pooled_height, int pooled_width,
+ float spatial_scale, at::Tensor output,
+ at::Tensor argmax) {
+ CHECK_INPUT(features);
+ CHECK_INPUT(rois);
+ CHECK_INPUT(output);
+ CHECK_INPUT(argmax);
+
+ // Number of ROIs
+ int num_rois = rois.size(0);
+ int size_rois = rois.size(1);
+
+ if (size_rois != 5) {
+ printf("wrong roi size\n");
+ return 0;
+ }
+
+ int channels = features.size(1);
+ int height = features.size(2);
+ int width = features.size(3);
+
+ ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width,
+ num_rois, pooled_height, pooled_width, output, argmax);
+
+ return 1;
+}
+
+int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois,
+ at::Tensor argmax, float spatial_scale,
+ at::Tensor bottom_grad) {
+ CHECK_INPUT(top_grad);
+ CHECK_INPUT(rois);
+ CHECK_INPUT(argmax);
+ CHECK_INPUT(bottom_grad);
+
+ int pooled_height = top_grad.size(2);
+ int pooled_width = top_grad.size(3);
+ int num_rois = rois.size(0);
+ int size_rois = rois.size(1);
+
+ if (size_rois != 5) {
+ printf("wrong roi size\n");
+ return 0;
+ }
+ int batch_size = bottom_grad.size(0);
+ int channels = bottom_grad.size(1);
+ int height = bottom_grad.size(2);
+ int width = bottom_grad.size(3);
+
+ ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size,
+ channels, height, width, num_rois, pooled_height,
+ pooled_width, bottom_grad);
+
+ return 1;
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+ m.def("forward", &roi_pooling_forward_cuda, "Roi_Pooling forward (CUDA)");
+ m.def("backward", &roi_pooling_backward_cuda, "Roi_Pooling backward (CUDA)");
+}
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_pool/src/roi_pool_kernel.cu b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_pool/src/roi_pool_kernel.cu
new file mode 100644
index 000000000..82a70beaa
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_pool/src/roi_pool_kernel.cu
@@ -0,0 +1,157 @@
+#include
+#include
+#include
+
+#define CUDA_1D_KERNEL_LOOP(i, n) \
+ for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
+ i += blockDim.x * gridDim.x)
+
+#define THREADS_PER_BLOCK 1024
+
+inline int GET_BLOCKS(const int N) {
+ int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
+ int max_block_num = 65000;
+ return optimal_block_num - max_block_num < 0? optimal_block_num: max_block_num;
+}
+
+template
+__global__ void ROIPoolForward(const int nthreads, const scalar_t *bottom_data,
+ const scalar_t *rois,
+ const scalar_t spatial_scale, const int channels,
+ const int height, const int width,
+ const int pooled_h, const int pooled_w,
+ scalar_t *top_data, int *argmax_data) {
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ // (n, c, ph, pw) is an element in the pooled output
+ int pw = index % pooled_w;
+ int ph = (index / pooled_w) % pooled_h;
+ int c = (index / pooled_w / pooled_h) % channels;
+ int n = index / pooled_w / pooled_h / channels;
+
+ const scalar_t *offset_rois = rois + n * 5;
+ int roi_batch_ind = offset_rois[0];
+ // calculate the roi region on feature maps
+ scalar_t roi_x1 = offset_rois[1] * spatial_scale;
+ scalar_t roi_y1 = offset_rois[2] * spatial_scale;
+ scalar_t roi_x2 = (offset_rois[3] + 1) * spatial_scale;
+ scalar_t roi_y2 = (offset_rois[4] + 1) * spatial_scale;
+
+ // force malformed rois to be 1x1
+ scalar_t roi_w = roi_x2 - roi_x1;
+ scalar_t roi_h = roi_y2 - roi_y1;
+ if (roi_w <= 0 || roi_h <= 0) continue;
+
+ scalar_t bin_size_w = roi_w / static_cast(pooled_w);
+ scalar_t bin_size_h = roi_h / static_cast(pooled_h);
+
+ // the corresponding bin region
+ int bin_x1 = floor(static_cast(pw) * bin_size_w + roi_x1);
+ int bin_y1 = floor(static_cast(ph) * bin_size_h + roi_y1);
+ int bin_x2 = ceil(static_cast(pw + 1) * bin_size_w + roi_x1);
+ int bin_y2 = ceil(static_cast(ph + 1) * bin_size_h + roi_y1);
+
+ // add roi offsets and clip to input boundaries
+ bin_x1 = min(max(bin_x1, 0), width);
+ bin_y1 = min(max(bin_y1, 0), height);
+ bin_x2 = min(max(bin_x2, 0), width);
+ bin_y2 = min(max(bin_y2, 0), height);
+ bool is_empty = (bin_y2 <= bin_y1) || (bin_x2 <= bin_x1);
+
+ // If nothing is pooled, argmax = -1 causes nothing to be backprop'd
+ int max_idx = -1;
+ bottom_data += (roi_batch_ind * channels + c) * height * width;
+
+ // Define an empty pooling region to be zero
+ scalar_t max_val = is_empty ? static_cast(0)
+ : bottom_data[bin_y1 * width + bin_x1] - 1;
+
+ for (int h = bin_y1; h < bin_y2; ++h) {
+ for (int w = bin_x1; w < bin_x2; ++w) {
+ int offset = h * width + w;
+ if (bottom_data[offset] > max_val) {
+ max_val = bottom_data[offset];
+ max_idx = offset;
+ }
+ }
+ }
+ top_data[index] = max_val;
+ if (argmax_data != NULL) argmax_data[index] = max_idx;
+ }
+}
+
+int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois,
+ const float spatial_scale, const int channels,
+ const int height, const int width, const int num_rois,
+ const int pooled_h, const int pooled_w,
+ at::Tensor output, at::Tensor argmax) {
+ const int output_size = num_rois * channels * pooled_h * pooled_w;
+
+ AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+ features.scalar_type(), "ROIPoolLaucherForward", ([&] {
+ const scalar_t *bottom_data = features.data();
+ const scalar_t *rois_data = rois.data();
+ scalar_t *top_data = output.data();
+ int *argmax_data = argmax.data();
+
+ ROIPoolForward
+ <<>>(
+ output_size, bottom_data, rois_data, scalar_t(spatial_scale),
+ channels, height, width, pooled_h, pooled_w, top_data,
+ argmax_data);
+ }));
+ THCudaCheck(cudaGetLastError());
+ return 1;
+}
+
+template
+__global__ void ROIPoolBackward(const int nthreads, const scalar_t *top_diff,
+ const scalar_t *rois, const int *argmax_data,
+ const scalar_t spatial_scale,
+ const int channels, const int height,
+ const int width, const int pooled_h,
+ const int pooled_w, scalar_t *bottom_diff) {
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ int pw = index % pooled_w;
+ int ph = (index / pooled_w) % pooled_h;
+ int c = (index / pooled_w / pooled_h) % channels;
+ int n = index / pooled_w / pooled_h / channels;
+
+ int roi_batch_ind = rois[n * 5];
+ int bottom_index = argmax_data[(n * channels + c) * pooled_h * pooled_w +
+ ph * pooled_w + pw];
+
+ atomicAdd(bottom_diff + (roi_batch_ind * channels + c) * height * width +
+ bottom_index,
+ top_diff[index]);
+ }
+}
+
+int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
+ const at::Tensor argmax, const float spatial_scale,
+ const int batch_size, const int channels,
+ const int height, const int width,
+ const int num_rois, const int pooled_h,
+ const int pooled_w, at::Tensor bottom_grad) {
+ const int output_size = num_rois * pooled_h * pooled_w * channels;
+
+ AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+ top_grad.scalar_type(), "ROIPoolLaucherBackward", ([&] {
+ const scalar_t *top_diff = top_grad.data();
+ const scalar_t *rois_data = rois.data();
+ const int *argmax_data = argmax.data();
+ scalar_t *bottom_diff = bottom_grad.data();
+
+ if (sizeof(scalar_t) == sizeof(double)) {
+ fprintf(stderr, "double is not supported\n");
+ exit(-1);
+ }
+
+ ROIPoolBackward
+ <<>>(
+ output_size, top_diff, rois_data, argmax_data,
+ scalar_t(spatial_scale), channels, height, width, pooled_h,
+ pooled_w, bottom_diff);
+ }));
+ THCudaCheck(cudaGetLastError());
+ return 1;
+}
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/sigmoid_focal_loss/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/sigmoid_focal_loss/__init__.py
new file mode 100644
index 000000000..218032945
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/sigmoid_focal_loss/__init__.py
@@ -0,0 +1,3 @@
+from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss
+
+__all__ = ['SigmoidFocalLoss', 'sigmoid_focal_loss']
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/sigmoid_focal_loss/sigmoid_focal_loss.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/sigmoid_focal_loss/sigmoid_focal_loss.py
new file mode 100644
index 000000000..8298f433f
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/sigmoid_focal_loss/sigmoid_focal_loss.py
@@ -0,0 +1,54 @@
+import torch.nn as nn
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+
+from . import sigmoid_focal_loss_cuda
+
+
+class SigmoidFocalLossFunction(Function):
+
+ @staticmethod
+ def forward(ctx, input, target, gamma=2.0, alpha=0.25):
+ ctx.save_for_backward(input, target)
+ num_classes = input.shape[1]
+ ctx.num_classes = num_classes
+ ctx.gamma = gamma
+ ctx.alpha = alpha
+
+ loss = sigmoid_focal_loss_cuda.forward(input, target, num_classes,
+ gamma, alpha)
+ return loss
+
+ @staticmethod
+ @once_differentiable
+ def backward(ctx, d_loss):
+ input, target = ctx.saved_tensors
+ num_classes = ctx.num_classes
+ gamma = ctx.gamma
+ alpha = ctx.alpha
+ d_loss = d_loss.contiguous()
+ d_input = sigmoid_focal_loss_cuda.backward(input, target, d_loss,
+ num_classes, gamma, alpha)
+ return d_input, None, None, None, None
+
+
+sigmoid_focal_loss = SigmoidFocalLossFunction.apply
+
+
+# TODO: remove this module
+class SigmoidFocalLoss(nn.Module):
+
+ def __init__(self, gamma, alpha):
+ super(SigmoidFocalLoss, self).__init__()
+ self.gamma = gamma
+ self.alpha = alpha
+
+ def forward(self, logits, targets):
+ assert logits.is_cuda
+ loss = sigmoid_focal_loss(logits, targets, self.gamma, self.alpha)
+ return loss.sum()
+
+ def __repr__(self):
+ tmpstr = self.__class__.__name__ + '(gamma={}, alpha={})'.format(
+ self.gamma, self.alpha)
+ return tmpstr
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss.cpp b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss.cpp
new file mode 100644
index 000000000..8330c9b45
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss.cpp
@@ -0,0 +1,45 @@
+// modify from
+// https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h
+#include
+
+at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits,
+ const at::Tensor &targets,
+ const int num_classes,
+ const float gamma, const float alpha);
+
+at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits,
+ const at::Tensor &targets,
+ const at::Tensor &d_losses,
+ const int num_classes,
+ const float gamma, const float alpha);
+
+// Interface for Python
+at::Tensor SigmoidFocalLoss_forward(const at::Tensor &logits,
+ const at::Tensor &targets,
+ const int num_classes, const float gamma,
+ const float alpha) {
+ if (logits.type().is_cuda()) {
+ return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma,
+ alpha);
+ }
+ AT_ERROR("SigmoidFocalLoss is not implemented on the CPU");
+}
+
+at::Tensor SigmoidFocalLoss_backward(const at::Tensor &logits,
+ const at::Tensor &targets,
+ const at::Tensor &d_losses,
+ const int num_classes, const float gamma,
+ const float alpha) {
+ if (logits.is_cuda()) {
+ return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses,
+ num_classes, gamma, alpha);
+ }
+ AT_ERROR("SigmoidFocalLoss is not implemented on the CPU");
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+ m.def("forward", &SigmoidFocalLoss_forward,
+ "SigmoidFocalLoss forward (CUDA)");
+ m.def("backward", &SigmoidFocalLoss_backward,
+ "SigmoidFocalLoss backward (CUDA)");
+}
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss_cuda.cu b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss_cuda.cu
new file mode 100644
index 000000000..0e152d38f
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss_cuda.cu
@@ -0,0 +1,171 @@
+// modified from
+// https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu
+
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+// This file is modified from
+// https://github.com/pytorch/pytorch/blob/master/modules/detectron/sigmoid_focal_loss_op.cu
+// Cheng-Yang Fu
+// cyfu@cs.unc.edu
+#include
+#include
+
+#include
+#include
+#include
+
+#include
+
+// TODO make it in a common file
+#define CUDA_1D_KERNEL_LOOP(i, n) \
+ for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
+ i += blockDim.x * gridDim.x)
+
+template
+__global__ void SigmoidFocalLossForward(const int nthreads,
+ const scalar_t *logits,
+ const int64_t *targets,
+ const int num_classes,
+ const float gamma, const float alpha,
+ const int num, scalar_t *losses) {
+ CUDA_1D_KERNEL_LOOP(i, nthreads) {
+ int n = i / num_classes;
+ int d = i % num_classes; // current class[0~79];
+ int t = targets[n]; // target class [1~80];
+
+ // Decide it is positive or negative case.
+ scalar_t c1 = (t == (d + 1));
+ scalar_t c2 = (t >= 0 & t != (d + 1));
+
+ scalar_t zn = (1.0 - alpha);
+ scalar_t zp = (alpha);
+
+ // p = 1. / 1. + expf(-x); p = sigmoid(x)
+ scalar_t p = 1. / (1. + expf(-logits[i]));
+
+ // (1-p)**gamma * log(p) where
+ scalar_t term1 = powf((1. - p), gamma) * logf(max(p, FLT_MIN));
+
+ // p**gamma * log(1-p)
+ scalar_t term2 =
+ powf(p, gamma) *
+ (-1. * logits[i] * (logits[i] >= 0) -
+ logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0))));
+
+ losses[i] = 0.0;
+ losses[i] += -c1 * term1 * zp;
+ losses[i] += -c2 * term2 * zn;
+
+ } // CUDA_1D_KERNEL_LOOP
+} // SigmoidFocalLossForward
+
+template
+__global__ void SigmoidFocalLossBackward(
+ const int nthreads, const scalar_t *logits, const int64_t *targets,
+ const scalar_t *d_losses, const int num_classes, const float gamma,
+ const float alpha, const int num, scalar_t *d_logits) {
+ CUDA_1D_KERNEL_LOOP(i, nthreads) {
+ int n = i / num_classes;
+ int d = i % num_classes; // current class[0~79];
+ int t = targets[n]; // target class [1~80], 0 is background;
+
+ // Decide it is positive or negative case.
+ scalar_t c1 = (t == (d + 1));
+ scalar_t c2 = (t >= 0 & t != (d + 1));
+
+ scalar_t zn = (1.0 - alpha);
+ scalar_t zp = (alpha);
+ // p = 1. / 1. + expf(-x); p = sigmoid(x)
+ scalar_t p = 1. / (1. + expf(-logits[i]));
+
+ // (1-p)**g * (1 - p - g*p*log(p)
+ scalar_t term1 =
+ powf((1. - p), gamma) * (1. - p - (p * gamma * logf(max(p, FLT_MIN))));
+
+ // (p**g) * (g*(1-p)*log(1-p) - p)
+ scalar_t term2 =
+ powf(p, gamma) *
+ ((-1. * logits[i] * (logits[i] >= 0) -
+ logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0)))) *
+ (1. - p) * gamma -
+ p);
+ d_logits[i] = 0.0;
+ d_logits[i] += -c1 * term1 * zp;
+ d_logits[i] += -c2 * term2 * zn;
+ d_logits[i] = d_logits[i] * d_losses[i];
+
+ } // CUDA_1D_KERNEL_LOOP
+} // SigmoidFocalLossBackward
+
+at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits,
+ const at::Tensor &targets,
+ const int num_classes,
+ const float gamma, const float alpha) {
+ AT_ASSERTM(logits.type().is_cuda(), "logits must be a CUDA tensor");
+ AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor");
+ AT_ASSERTM(logits.dim() == 2, "logits should be NxClass");
+
+ const int num_samples = logits.size(0);
+
+ auto losses = at::empty({num_samples, logits.size(1)}, logits.options());
+ auto losses_size = num_samples * logits.size(1);
+
+ dim3 grid(
+ std::min(THCCeilDiv((int64_t)losses_size, (int64_t)512), (int64_t)4096));
+ dim3 block(512);
+
+ if (losses.numel() == 0) {
+ THCudaCheck(cudaGetLastError());
+ return losses;
+ }
+
+ AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+ logits.scalar_type(), "SigmoidFocalLoss_forward", [&] {
+ SigmoidFocalLossForward<<>>(
+ losses_size, logits.contiguous().data(),
+ targets.contiguous().data(), num_classes, gamma, alpha,
+ num_samples, losses.data());
+ });
+ THCudaCheck(cudaGetLastError());
+ return losses;
+}
+
+at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits,
+ const at::Tensor &targets,
+ const at::Tensor &d_losses,
+ const int num_classes,
+ const float gamma,
+ const float alpha) {
+ AT_ASSERTM(logits.type().is_cuda(), "logits must be a CUDA tensor");
+ AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor");
+ AT_ASSERTM(d_losses.type().is_cuda(), "d_losses must be a CUDA tensor");
+
+ AT_ASSERTM(logits.dim() == 2, "logits should be NxClass");
+
+ const int num_samples = logits.size(0);
+ AT_ASSERTM(logits.size(1) == num_classes,
+ "logits.size(1) should be num_classes");
+
+ auto d_logits = at::zeros({num_samples, num_classes}, logits.options());
+ auto d_logits_size = num_samples * logits.size(1);
+
+ dim3 grid(std::min(THCCeilDiv((int64_t)d_logits_size, (int64_t)512),
+ (int64_t)4096));
+ dim3 block(512);
+
+ if (d_logits.numel() == 0) {
+ THCudaCheck(cudaGetLastError());
+ return d_logits;
+ }
+
+ AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+ logits.scalar_type(), "SigmoidFocalLoss_backward", [&] {
+ SigmoidFocalLossBackward<<>>(
+ d_logits_size, logits.contiguous().data(),
+ targets.contiguous().data(),
+ d_losses.contiguous().data(), num_classes, gamma, alpha,
+ num_samples, d_logits.data());
+ });
+
+ THCudaCheck(cudaGetLastError());
+ return d_logits;
+}
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/utils/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/utils/__init__.py
new file mode 100644
index 000000000..0244c0f54
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/utils/__init__.py
@@ -0,0 +1,7 @@
+# from . import compiling_info
+from .compiling_info import get_compiler_version, get_compiling_cuda_version
+
+# get_compiler_version = compiling_info.get_compiler_version
+# get_compiling_cuda_version = compiling_info.get_compiling_cuda_version
+
+__all__ = ['get_compiler_version', 'get_compiling_cuda_version']
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/utils/src/compiling_info.cpp b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/utils/src/compiling_info.cpp
new file mode 100644
index 000000000..fd62aabcf
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/ops/utils/src/compiling_info.cpp
@@ -0,0 +1,56 @@
+// modified from
+// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/vision.cpp
+#include
+#include
+
+#ifdef WITH_CUDA
+int get_cudart_version() { return CUDART_VERSION; }
+#endif
+
+std::string get_compiling_cuda_version() {
+#ifdef WITH_CUDA
+ std::ostringstream oss;
+
+ // copied from
+ // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231
+ auto printCudaStyleVersion = [&](int v) {
+ oss << (v / 1000) << "." << (v / 10 % 100);
+ if (v % 10 != 0) {
+ oss << "." << (v % 10);
+ }
+ };
+ printCudaStyleVersion(get_cudart_version());
+ return oss.str();
+#else
+ return std::string("not available");
+#endif
+}
+
+// similar to
+// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp
+std::string get_compiler_version() {
+ std::ostringstream ss;
+#if defined(__GNUC__)
+#ifndef __clang__
+ { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; }
+#endif
+#endif
+
+#if defined(__clang_major__)
+ {
+ ss << "clang " << __clang_major__ << "." << __clang_minor__ << "."
+ << __clang_patchlevel__;
+ }
+#endif
+
+#if defined(_MSC_VER)
+ { ss << "MSVC " << _MSC_FULL_VER; }
+#endif
+ return ss.str();
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+ m.def("get_compiler_version", &get_compiler_version, "get_compiler_version");
+ m.def("get_compiling_cuda_version", &get_compiling_cuda_version,
+ "get_compiling_cuda_version");
+}
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/utils/__init__.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/utils/__init__.py
new file mode 100644
index 000000000..537a34a13
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/utils/__init__.py
@@ -0,0 +1,8 @@
+from .flops_counter import get_model_complexity_info
+from .logger import get_root_logger, print_log
+from .registry import Registry, build_from_cfg
+
+__all__ = [
+ 'Registry', 'build_from_cfg', 'get_model_complexity_info',
+ 'get_root_logger', 'print_log'
+]
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/utils/contextmanagers.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/utils/contextmanagers.py
new file mode 100644
index 000000000..0363f0145
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/utils/contextmanagers.py
@@ -0,0 +1,126 @@
+# coding: utf-8
+import asyncio
+import contextlib
+import logging
+import os
+import time
+from typing import List
+
+import torch
+
+logger = logging.getLogger(__name__)
+
+DEBUG_COMPLETED_TIME = bool(os.environ.get('DEBUG_COMPLETED_TIME', False))
+
+
+@contextlib.asynccontextmanager
+async def completed(trace_name='',
+ name='',
+ sleep_interval=0.05,
+ streams: List[torch.cuda.Stream] = None):
+ """
+ Async context manager that waits for work to complete on
+ given CUDA streams.
+
+ """
+ if not torch.cuda.is_available():
+ yield
+ return
+
+ stream_before_context_switch = torch.cuda.current_stream()
+ if not streams:
+ streams = [stream_before_context_switch]
+ else:
+ streams = [s if s else stream_before_context_switch for s in streams]
+
+ end_events = [
+ torch.cuda.Event(enable_timing=DEBUG_COMPLETED_TIME) for _ in streams
+ ]
+
+ if DEBUG_COMPLETED_TIME:
+ start = torch.cuda.Event(enable_timing=True)
+ stream_before_context_switch.record_event(start)
+
+ cpu_start = time.monotonic()
+ logger.debug('%s %s starting, streams: %s', trace_name, name, streams)
+ grad_enabled_before = torch.is_grad_enabled()
+ try:
+ yield
+ finally:
+ current_stream = torch.cuda.current_stream()
+ assert current_stream == stream_before_context_switch
+
+ if DEBUG_COMPLETED_TIME:
+ cpu_end = time.monotonic()
+ for i, stream in enumerate(streams):
+ event = end_events[i]
+ stream.record_event(event)
+
+ grad_enabled_after = torch.is_grad_enabled()
+
+ # observed change of torch.is_grad_enabled() during concurrent run of
+ # async_test_bboxes code
+ assert (grad_enabled_before == grad_enabled_after
+ ), 'Unexpected is_grad_enabled() value change'
+
+ are_done = [e.query() for e in end_events]
+ logger.debug('%s %s completed: %s streams: %s', trace_name, name,
+ are_done, streams)
+ with torch.cuda.stream(stream_before_context_switch):
+ while not all(are_done):
+ await asyncio.sleep(sleep_interval)
+ are_done = [e.query() for e in end_events]
+ logger.debug(
+ '%s %s completed: %s streams: %s',
+ trace_name,
+ name,
+ are_done,
+ streams,
+ )
+
+ current_stream = torch.cuda.current_stream()
+ assert current_stream == stream_before_context_switch
+
+ if DEBUG_COMPLETED_TIME:
+ cpu_time = (cpu_end - cpu_start) * 1000
+ stream_times_ms = ''
+ for i, stream in enumerate(streams):
+ elapsed_time = start.elapsed_time(end_events[i])
+ stream_times_ms += ' {} {:.2f} ms'.format(stream, elapsed_time)
+ logger.info('%s %s %.2f ms %s', trace_name, name, cpu_time,
+ stream_times_ms)
+
+
+@contextlib.asynccontextmanager
+async def concurrent(streamqueue: asyncio.Queue,
+ trace_name='concurrent',
+ name='stream'):
+ """Run code concurrently in different streams.
+
+ :param streamqueue: asyncio.Queue instance.
+
+ Queue tasks define the pool of streams used for concurrent execution.
+
+ """
+ if not torch.cuda.is_available():
+ yield
+ return
+
+ initial_stream = torch.cuda.current_stream()
+
+ with torch.cuda.stream(initial_stream):
+ stream = await streamqueue.get()
+ assert isinstance(stream, torch.cuda.Stream)
+
+ try:
+ with torch.cuda.stream(stream):
+ logger.debug('%s %s is starting, stream: %s', trace_name, name,
+ stream)
+ yield
+ current = torch.cuda.current_stream()
+ assert current == stream
+ logger.debug('%s %s has finished, stream: %s', trace_name,
+ name, stream)
+ finally:
+ streamqueue.task_done()
+ streamqueue.put_nowait(stream)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/utils/flops_counter.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/utils/flops_counter.py
new file mode 100644
index 000000000..df2163fd7
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/utils/flops_counter.py
@@ -0,0 +1,444 @@
+# Modified from flops-counter.pytorch by Vladislav Sovrasov
+# original repo: https://github.com/sovrasov/flops-counter.pytorch
+
+# MIT License
+
+# Copyright (c) 2018 Vladislav Sovrasov
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import sys
+
+import numpy as np
+import torch
+import torch.nn as nn
+from torch.nn.modules.batchnorm import _BatchNorm
+from torch.nn.modules.conv import _ConvNd, _ConvTransposeMixin
+from torch.nn.modules.pooling import (_AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd,
+ _AvgPoolNd, _MaxPoolNd)
+
+
+def get_model_complexity_info(model,
+ input_res,
+ print_per_layer_stat=True,
+ as_strings=True,
+ input_constructor=None,
+ ost=sys.stdout):
+ assert type(input_res) is tuple
+ assert len(input_res) >= 2
+ flops_model = add_flops_counting_methods(model)
+ flops_model.eval().start_flops_count()
+ if input_constructor:
+ input = input_constructor(input_res)
+ _ = flops_model(**input)
+ else:
+ batch = torch.ones(()).new_empty(
+ (1, *input_res),
+ dtype=next(flops_model.parameters()).dtype,
+ device=next(flops_model.parameters()).device)
+ flops_model(batch)
+
+ if print_per_layer_stat:
+ print_model_with_flops(flops_model, ost=ost)
+ flops_count = flops_model.compute_average_flops_cost()
+ params_count = get_model_parameters_number(flops_model)
+ flops_model.stop_flops_count()
+
+ if as_strings:
+ return flops_to_string(flops_count), params_to_string(params_count)
+
+ return flops_count, params_count
+
+
+def flops_to_string(flops, units='GMac', precision=2):
+ if units is None:
+ if flops // 10**9 > 0:
+ return str(round(flops / 10.**9, precision)) + ' GMac'
+ elif flops // 10**6 > 0:
+ return str(round(flops / 10.**6, precision)) + ' MMac'
+ elif flops // 10**3 > 0:
+ return str(round(flops / 10.**3, precision)) + ' KMac'
+ else:
+ return str(flops) + ' Mac'
+ else:
+ if units == 'GMac':
+ return str(round(flops / 10.**9, precision)) + ' ' + units
+ elif units == 'MMac':
+ return str(round(flops / 10.**6, precision)) + ' ' + units
+ elif units == 'KMac':
+ return str(round(flops / 10.**3, precision)) + ' ' + units
+ else:
+ return str(flops) + ' Mac'
+
+
+def params_to_string(params_num):
+ """converting number to string
+
+ :param float params_num: number
+ :returns str: number
+
+ >>> params_to_string(1e9)
+ '1000.0 M'
+ >>> params_to_string(2e5)
+ '200.0 k'
+ >>> params_to_string(3e-9)
+ '3e-09'
+ """
+ if params_num // 10**6 > 0:
+ return str(round(params_num / 10**6, 2)) + ' M'
+ elif params_num // 10**3:
+ return str(round(params_num / 10**3, 2)) + ' k'
+ else:
+ return str(params_num)
+
+
+def print_model_with_flops(model, units='GMac', precision=3, ost=sys.stdout):
+ total_flops = model.compute_average_flops_cost()
+
+ def accumulate_flops(self):
+ if is_supported_instance(self):
+ return self.__flops__ / model.__batch_counter__
+ else:
+ sum = 0
+ for m in self.children():
+ sum += m.accumulate_flops()
+ return sum
+
+ def flops_repr(self):
+ accumulated_flops_cost = self.accumulate_flops()
+ return ', '.join([
+ flops_to_string(
+ accumulated_flops_cost, units=units, precision=precision),
+ '{:.3%} MACs'.format(accumulated_flops_cost / total_flops),
+ self.original_extra_repr()
+ ])
+
+ def add_extra_repr(m):
+ m.accumulate_flops = accumulate_flops.__get__(m)
+ flops_extra_repr = flops_repr.__get__(m)
+ if m.extra_repr != flops_extra_repr:
+ m.original_extra_repr = m.extra_repr
+ m.extra_repr = flops_extra_repr
+ assert m.extra_repr != m.original_extra_repr
+
+ def del_extra_repr(m):
+ if hasattr(m, 'original_extra_repr'):
+ m.extra_repr = m.original_extra_repr
+ del m.original_extra_repr
+ if hasattr(m, 'accumulate_flops'):
+ del m.accumulate_flops
+
+ model.apply(add_extra_repr)
+ print(model, file=ost)
+ model.apply(del_extra_repr)
+
+
+def get_model_parameters_number(model):
+ params_num = sum(p.numel() for p in model.parameters() if p.requires_grad)
+ return params_num
+
+
+def add_flops_counting_methods(net_main_module):
+ # adding additional methods to the existing module object,
+ # this is done this way so that each function has access to self object
+ net_main_module.start_flops_count = start_flops_count.__get__(
+ net_main_module)
+ net_main_module.stop_flops_count = stop_flops_count.__get__(
+ net_main_module)
+ net_main_module.reset_flops_count = reset_flops_count.__get__(
+ net_main_module)
+ net_main_module.compute_average_flops_cost = \
+ compute_average_flops_cost.__get__(net_main_module)
+
+ net_main_module.reset_flops_count()
+
+ # Adding variables necessary for masked flops computation
+ net_main_module.apply(add_flops_mask_variable_or_reset)
+
+ return net_main_module
+
+
+def compute_average_flops_cost(self):
+ """
+ A method that will be available after add_flops_counting_methods() is
+ called on a desired net object.
+ Returns current mean flops consumption per image.
+ """
+
+ batches_count = self.__batch_counter__
+ flops_sum = 0
+ for module in self.modules():
+ if is_supported_instance(module):
+ flops_sum += module.__flops__
+
+ return flops_sum / batches_count
+
+
+def start_flops_count(self):
+ """
+ A method that will be available after add_flops_counting_methods() is
+ called on a desired net object.
+ Activates the computation of mean flops consumption per image.
+ Call it before you run the network.
+ """
+ add_batch_counter_hook_function(self)
+ self.apply(add_flops_counter_hook_function)
+
+
+def stop_flops_count(self):
+ """
+ A method that will be available after add_flops_counting_methods() is
+ called on a desired net object.
+ Stops computing the mean flops consumption per image.
+ Call whenever you want to pause the computation.
+ """
+ remove_batch_counter_hook_function(self)
+ self.apply(remove_flops_counter_hook_function)
+
+
+def reset_flops_count(self):
+ """
+ A method that will be available after add_flops_counting_methods() is
+ called on a desired net object.
+ Resets statistics computed so far.
+ """
+ add_batch_counter_variables_or_reset(self)
+ self.apply(add_flops_counter_variable_or_reset)
+
+
+def add_flops_mask(module, mask):
+
+ def add_flops_mask_func(module):
+ if isinstance(module, torch.nn.Conv2d):
+ module.__mask__ = mask
+
+ module.apply(add_flops_mask_func)
+
+
+def remove_flops_mask(module):
+ module.apply(add_flops_mask_variable_or_reset)
+
+
+def is_supported_instance(module):
+ for mod in hook_mapping:
+ if issubclass(type(module), mod):
+ return True
+ return False
+
+
+def empty_flops_counter_hook(module, input, output):
+ module.__flops__ += 0
+
+
+def upsample_flops_counter_hook(module, input, output):
+ output_size = output[0]
+ batch_size = output_size.shape[0]
+ output_elements_count = batch_size
+ for val in output_size.shape[1:]:
+ output_elements_count *= val
+ module.__flops__ += int(output_elements_count)
+
+
+def relu_flops_counter_hook(module, input, output):
+ active_elements_count = output.numel()
+ module.__flops__ += int(active_elements_count)
+
+
+def linear_flops_counter_hook(module, input, output):
+ input = input[0]
+ batch_size = input.shape[0]
+ module.__flops__ += int(batch_size * input.shape[1] * output.shape[1])
+
+
+def pool_flops_counter_hook(module, input, output):
+ input = input[0]
+ module.__flops__ += int(np.prod(input.shape))
+
+
+def bn_flops_counter_hook(module, input, output):
+ input = input[0]
+
+ batch_flops = np.prod(input.shape)
+ if module.affine:
+ batch_flops *= 2
+ module.__flops__ += int(batch_flops)
+
+
+def gn_flops_counter_hook(module, input, output):
+ elems = np.prod(input[0].shape)
+ # there is no precise FLOPs estimation of computing mean and variance,
+ # and we just set it 2 * elems: half muladds for computing
+ # means and half for computing vars
+ batch_flops = 3 * elems
+ if module.affine:
+ batch_flops += elems
+ module.__flops__ += int(batch_flops)
+
+
+def deconv_flops_counter_hook(conv_module, input, output):
+ # Can have multiple inputs, getting the first one
+ input = input[0]
+
+ batch_size = input.shape[0]
+ input_height, input_width = input.shape[2:]
+
+ kernel_height, kernel_width = conv_module.kernel_size
+ in_channels = conv_module.in_channels
+ out_channels = conv_module.out_channels
+ groups = conv_module.groups
+
+ filters_per_channel = out_channels // groups
+ conv_per_position_flops = (
+ kernel_height * kernel_width * in_channels * filters_per_channel)
+
+ active_elements_count = batch_size * input_height * input_width
+ overall_conv_flops = conv_per_position_flops * active_elements_count
+ bias_flops = 0
+ if conv_module.bias is not None:
+ output_height, output_width = output.shape[2:]
+ bias_flops = out_channels * batch_size * output_height * output_height
+ overall_flops = overall_conv_flops + bias_flops
+
+ conv_module.__flops__ += int(overall_flops)
+
+
+def conv_flops_counter_hook(conv_module, input, output):
+ # Can have multiple inputs, getting the first one
+ input = input[0]
+
+ batch_size = input.shape[0]
+ output_dims = list(output.shape[2:])
+
+ kernel_dims = list(conv_module.kernel_size)
+ in_channels = conv_module.in_channels
+ out_channels = conv_module.out_channels
+ groups = conv_module.groups
+
+ filters_per_channel = out_channels // groups
+ conv_per_position_flops = np.prod(
+ kernel_dims) * in_channels * filters_per_channel
+
+ active_elements_count = batch_size * np.prod(output_dims)
+
+ if conv_module.__mask__ is not None:
+ # (b, 1, h, w)
+ output_height, output_width = output.shape[2:]
+ flops_mask = conv_module.__mask__.expand(batch_size, 1, output_height,
+ output_width)
+ active_elements_count = flops_mask.sum()
+
+ overall_conv_flops = conv_per_position_flops * active_elements_count
+
+ bias_flops = 0
+
+ if conv_module.bias is not None:
+
+ bias_flops = out_channels * active_elements_count
+
+ overall_flops = overall_conv_flops + bias_flops
+
+ conv_module.__flops__ += int(overall_flops)
+
+
+hook_mapping = {
+ # conv
+ _ConvNd: conv_flops_counter_hook,
+ # deconv
+ _ConvTransposeMixin: deconv_flops_counter_hook,
+ # fc
+ nn.Linear: linear_flops_counter_hook,
+ # pooling
+ _AvgPoolNd: pool_flops_counter_hook,
+ _MaxPoolNd: pool_flops_counter_hook,
+ _AdaptiveAvgPoolNd: pool_flops_counter_hook,
+ _AdaptiveMaxPoolNd: pool_flops_counter_hook,
+ # activation
+ nn.ReLU: relu_flops_counter_hook,
+ nn.PReLU: relu_flops_counter_hook,
+ nn.ELU: relu_flops_counter_hook,
+ nn.LeakyReLU: relu_flops_counter_hook,
+ nn.ReLU6: relu_flops_counter_hook,
+ # normalization
+ _BatchNorm: bn_flops_counter_hook,
+ nn.GroupNorm: gn_flops_counter_hook,
+ # upsample
+ nn.Upsample: upsample_flops_counter_hook,
+}
+
+
+def batch_counter_hook(module, input, output):
+ batch_size = 1
+ if len(input) > 0:
+ # Can have multiple inputs, getting the first one
+ input = input[0]
+ batch_size = len(input)
+ else:
+ print('Warning! No positional inputs found for a module, '
+ 'assuming batch size is 1.')
+ module.__batch_counter__ += batch_size
+
+
+def add_batch_counter_variables_or_reset(module):
+ module.__batch_counter__ = 0
+
+
+def add_batch_counter_hook_function(module):
+ if hasattr(module, '__batch_counter_handle__'):
+ return
+
+ handle = module.register_forward_hook(batch_counter_hook)
+ module.__batch_counter_handle__ = handle
+
+
+def remove_batch_counter_hook_function(module):
+ if hasattr(module, '__batch_counter_handle__'):
+ module.__batch_counter_handle__.remove()
+ del module.__batch_counter_handle__
+
+
+def add_flops_counter_variable_or_reset(module):
+ if is_supported_instance(module):
+ module.__flops__ = 0
+
+
+def add_flops_counter_hook_function(module):
+ if is_supported_instance(module):
+ if hasattr(module, '__flops_handle__'):
+ return
+
+ for mod_type, counter_hook in hook_mapping.items():
+ if issubclass(type(module), mod_type):
+ handle = module.register_forward_hook(counter_hook)
+ break
+
+ module.__flops_handle__ = handle
+
+
+def remove_flops_counter_hook_function(module):
+ if is_supported_instance(module):
+ if hasattr(module, '__flops_handle__'):
+ module.__flops_handle__.remove()
+ del module.__flops_handle__
+
+
+# --- Masked flops counting
+# Also being run in the initialization
+def add_flops_mask_variable_or_reset(module):
+ if is_supported_instance(module):
+ module.__mask__ = None
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/utils/logger.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/utils/logger.py
new file mode 100644
index 000000000..3e6a1396b
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/utils/logger.py
@@ -0,0 +1,66 @@
+import logging
+
+from mmcv.runner import get_dist_info
+
+
+def get_root_logger(log_file=None, log_level=logging.INFO):
+ """Get the root logger.
+
+ The logger will be initialized if it has not been initialized. By default a
+ StreamHandler will be added. If `log_file` is specified, a FileHandler will
+ also be added. The name of the root logger is the top-level package name,
+ e.g., "mmdet".
+
+ Args:
+ log_file (str | None): The log filename. If specified, a FileHandler
+ will be added to the root logger.
+ log_level (int): The root logger level. Note that only the process of
+ rank 0 is affected, while other processes will set the level to
+ "Error" and be silent most of the time.
+
+ Returns:
+ logging.Logger: The root logger.
+ """
+ logger = logging.getLogger(__name__.split('.')[0]) # i.e., mmdet
+ # if the logger has been initialized, just return it
+ if logger.hasHandlers():
+ return logger
+
+ format_str = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+ logging.basicConfig(format=format_str, level=log_level)
+ rank, _ = get_dist_info()
+ if rank != 0:
+ logger.setLevel('ERROR')
+ elif log_file is not None:
+ file_handler = logging.FileHandler(log_file, 'w')
+ file_handler.setFormatter(logging.Formatter(format_str))
+ file_handler.setLevel(log_level)
+ logger.addHandler(file_handler)
+
+ return logger
+
+
+def print_log(msg, logger=None, level=logging.INFO):
+ """Print a log message.
+
+ Args:
+ msg (str): The message to be logged.
+ logger (logging.Logger | str | None): The logger to be used. Some
+ special loggers are:
+ - "root": the root logger obtained with `get_root_logger()`.
+ - "silent": no message will be printed.
+ - None: The `print()` method will be used to print log messages.
+ level (int): Logging level. Only available when `logger` is a Logger
+ object or "root".
+ """
+ if logger is None:
+ print(msg)
+ elif logger == 'root':
+ _logger = get_root_logger()
+ _logger.log(level, msg)
+ elif isinstance(logger, logging.Logger):
+ logger.log(level, msg)
+ elif logger != 'silent':
+ raise TypeError(
+ 'logger should be either a logging.Logger object, "root", '
+ '"silent" or None, but got {}'.format(logger))
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/utils/profiling.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/utils/profiling.py
new file mode 100644
index 000000000..58b1c87dd
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/utils/profiling.py
@@ -0,0 +1,41 @@
+import contextlib
+import sys
+import time
+
+import torch
+
+if sys.version_info >= (3, 7):
+
+ @contextlib.contextmanager
+ def profile_time(trace_name,
+ name,
+ enabled=True,
+ stream=None,
+ end_stream=None):
+ """Print time spent by CPU and GPU.
+
+ Useful as a temporary context manager to find sweet spots of
+ code suitable for async implementation.
+
+ """
+ if (not enabled) or not torch.cuda.is_available():
+ yield
+ return
+ stream = stream if stream else torch.cuda.current_stream()
+ end_stream = end_stream if end_stream else stream
+ start = torch.cuda.Event(enable_timing=True)
+ end = torch.cuda.Event(enable_timing=True)
+ stream.record_event(start)
+ try:
+ cpu_start = time.monotonic()
+ yield
+ finally:
+ cpu_end = time.monotonic()
+ end_stream.record_event(end)
+ end.synchronize()
+ cpu_time = (cpu_end - cpu_start) * 1000
+ gpu_time = start.elapsed_time(end)
+ msg = "{} {} cpu_time {:.2f} ms ".format(trace_name, name,
+ cpu_time)
+ msg += "gpu_time {:.2f} ms stream {}".format(gpu_time, stream)
+ print(msg, end_stream)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/utils/registry.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/utils/registry.py
new file mode 100644
index 000000000..4ad9f876c
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/utils/registry.py
@@ -0,0 +1,79 @@
+import inspect
+from functools import partial
+
+import mmcv
+
+
+class Registry(object):
+
+ def __init__(self, name):
+ self._name = name
+ self._module_dict = dict()
+
+ def __repr__(self):
+ format_str = self.__class__.__name__ + '(name={}, items={})'.format(
+ self._name, list(self._module_dict.keys()))
+ return format_str
+
+ @property
+ def name(self):
+ return self._name
+
+ @property
+ def module_dict(self):
+ return self._module_dict
+
+ def get(self, key):
+ return self._module_dict.get(key, None)
+
+ def _register_module(self, module_class, force=False):
+ """Register a module.
+
+ Args:
+ module (:obj:`nn.Module`): Module to be registered.
+ """
+ if not inspect.isclass(module_class):
+ raise TypeError('module must be a class, but got {}'.format(
+ type(module_class)))
+ module_name = module_class.__name__
+ if not force and module_name in self._module_dict:
+ raise KeyError('{} is already registered in {}'.format(
+ module_name, self.name))
+ self._module_dict[module_name] = module_class
+
+ def register_module(self, cls=None, force=False):
+ if cls is None:
+ return partial(self.register_module, force=force)
+ self._register_module(cls, force=force)
+ return cls
+
+
+def build_from_cfg(cfg, registry, default_args=None):
+ """Build a module from config dict.
+
+ Args:
+ cfg (dict): Config dict. It should at least contain the key "type".
+ registry (:obj:`Registry`): The registry to search the type from.
+ default_args (dict, optional): Default initialization arguments.
+
+ Returns:
+ obj: The constructed object.
+ """
+ assert isinstance(cfg, dict) and 'type' in cfg
+ assert isinstance(default_args, dict) or default_args is None
+ args = cfg.copy()
+ obj_type = args.pop('type')
+ if mmcv.is_str(obj_type):
+ obj_cls = registry.get(obj_type)
+ if obj_cls is None:
+ raise KeyError('{} is not in the {} registry'.format(
+ obj_type, registry.name))
+ elif inspect.isclass(obj_type):
+ obj_cls = obj_type
+ else:
+ raise TypeError('type must be a str or valid type, but got {}'.format(
+ type(obj_type)))
+ if default_args is not None:
+ for name, value in default_args.items():
+ args.setdefault(name, value)
+ return obj_cls(**args)
diff --git a/cv/instance_segmentation/SOLO/pytorch/mmdet/utils/util_mixins.py b/cv/instance_segmentation/SOLO/pytorch/mmdet/utils/util_mixins.py
new file mode 100644
index 000000000..5585ac652
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/mmdet/utils/util_mixins.py
@@ -0,0 +1,105 @@
+# -*- coding: utf-8 -*-
+"""
+This module defines the :class:`NiceRepr` mixin class, which defines a
+``__repr__`` and ``__str__`` method that only depend on a custom ``__nice__``
+method, which you must define. This means you only have to overload one
+function instead of two. Furthermore, if the object defines a ``__len__``
+method, then the ``__nice__`` method defaults to something sensible, otherwise
+it is treated as abstract and raises ``NotImplementedError``.
+
+To use simply have your object inherit from :class:`NiceRepr`
+(multi-inheritance should be ok).
+
+This code was copied from the ubelt library: https://github.com/Erotemic/ubelt
+
+Example:
+ >>> # Objects that define __nice__ have a default __str__ and __repr__
+ >>> class Student(NiceRepr):
+ ... def __init__(self, name):
+ ... self.name = name
+ ... def __nice__(self):
+ ... return self.name
+ >>> s1 = Student('Alice')
+ >>> s2 = Student('Bob')
+ >>> print('s1 = {}'.format(s1))
+ >>> print('s2 = {}'.format(s2))
+ s1 =
+ s2 =
+
+Example:
+ >>> # Objects that define __len__ have a default __nice__
+ >>> class Group(NiceRepr):
+ ... def __init__(self, data):
+ ... self.data = data
+ ... def __len__(self):
+ ... return len(self.data)
+ >>> g = Group([1, 2, 3])
+ >>> print('g = {}'.format(g))
+ g =
+
+"""
+import warnings
+
+
+class NiceRepr(object):
+ """
+ Inherit from this class and define ``__nice__`` to "nicely" print your
+ objects.
+
+ Defines ``__str__`` and ``__repr__`` in terms of ``__nice__`` function
+ Classes that inherit from :class:`NiceRepr` should redefine ``__nice__``.
+ If the inheriting class has a ``__len__``, method then the default
+ ``__nice__`` method will return its length.
+
+ Example:
+ >>> class Foo(NiceRepr):
+ ... def __nice__(self):
+ ... return 'info'
+ >>> foo = Foo()
+ >>> assert str(foo) == ''
+ >>> assert repr(foo).startswith('>> class Bar(NiceRepr):
+ ... pass
+ >>> bar = Bar()
+ >>> import pytest
+ >>> with pytest.warns(None) as record:
+ >>> assert 'object at' in str(bar)
+ >>> assert 'object at' in repr(bar)
+
+ Example:
+ >>> class Baz(NiceRepr):
+ ... def __len__(self):
+ ... return 5
+ >>> baz = Baz()
+ >>> assert str(baz) == ''
+ """
+
+ def __nice__(self):
+ if hasattr(self, '__len__'):
+ # It is a common pattern for objects to use __len__ in __nice__
+ # As a convenience we define a default __nice__ for these objects
+ return str(len(self))
+ else:
+ # In all other cases force the subclass to overload __nice__
+ raise NotImplementedError(
+ 'Define the __nice__ method for {!r}'.format(self.__class__))
+
+ def __repr__(self):
+ try:
+ nice = self.__nice__()
+ classname = self.__class__.__name__
+ return '<{0}({1}) at {2}>'.format(classname, nice, hex(id(self)))
+ except NotImplementedError as ex:
+ warnings.warn(str(ex), category=RuntimeWarning)
+ return object.__repr__(self)
+
+ def __str__(self):
+ try:
+ classname = self.__class__.__name__
+ nice = self.__nice__()
+ return '<{0}({1})>'.format(classname, nice)
+ except NotImplementedError as ex:
+ warnings.warn(str(ex), category=RuntimeWarning)
+ return object.__repr__(self)
diff --git a/cv/instance_segmentation/SOLO/pytorch/pytest.ini b/cv/instance_segmentation/SOLO/pytorch/pytest.ini
new file mode 100644
index 000000000..9796e871e
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/pytest.ini
@@ -0,0 +1,7 @@
+[pytest]
+addopts = --xdoctest --xdoctest-style=auto
+norecursedirs = .git ignore build __pycache__ data docker docs .eggs
+
+filterwarnings= default
+ ignore:.*No cfgstr given in Cacher constructor or call.*:Warning
+ ignore:.*Define the __nice__ method for.*:Warning
diff --git a/cv/instance_segmentation/SOLO/pytorch/requirements.txt b/cv/instance_segmentation/SOLO/pytorch/requirements.txt
new file mode 100644
index 000000000..52ee8f552
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/requirements.txt
@@ -0,0 +1,4 @@
+-r requirements/runtime.txt
+-r requirements/optional.txt
+-r requirements/tests.txt
+-r requirements/build.txt
diff --git a/cv/instance_segmentation/SOLO/pytorch/requirements/build.txt b/cv/instance_segmentation/SOLO/pytorch/requirements/build.txt
new file mode 100644
index 000000000..a24ea0c6f
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/requirements/build.txt
@@ -0,0 +1,4 @@
+# These must be installed before building mmdetection
+cython
+numpy
+torch>=1.1
diff --git a/cv/instance_segmentation/SOLO/pytorch/requirements/optional.txt b/cv/instance_segmentation/SOLO/pytorch/requirements/optional.txt
new file mode 100644
index 000000000..eb36729e0
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/requirements/optional.txt
@@ -0,0 +1,2 @@
+albumentations>=0.3.2
+imagecorruptions
diff --git a/cv/instance_segmentation/SOLO/pytorch/requirements/runtime.txt b/cv/instance_segmentation/SOLO/pytorch/requirements/runtime.txt
new file mode 100644
index 000000000..0d0178788
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/requirements/runtime.txt
@@ -0,0 +1,10 @@
+matplotlib
+mmcv==0.2.16
+numpy
+scipy
+# need older pillow until torchvision is fixed
+Pillow<=6.2.2
+six
+terminaltables
+torch>=1.1
+torchvision
diff --git a/cv/instance_segmentation/SOLO/pytorch/requirements/tests.txt b/cv/instance_segmentation/SOLO/pytorch/requirements/tests.txt
new file mode 100644
index 000000000..d45e54096
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/requirements/tests.txt
@@ -0,0 +1,11 @@
+asynctest
+codecov
+flake8
+isort
+pytest
+pytest-cov
+pytest-runner
+xdoctest >= 0.10.0
+yapf
+# Note: used for kwarray.group_items, this may be ported to mmcv in the future.
+kwarray
diff --git a/cv/instance_segmentation/SOLO/pytorch/setup.py b/cv/instance_segmentation/SOLO/pytorch/setup.py
new file mode 100644
index 000000000..aee4ddbf6
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/setup.py
@@ -0,0 +1,301 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import os
+import platform
+import subprocess
+import time
+from setuptools import Extension, dist, find_packages, setup
+
+import torch
+from torch.utils.cpp_extension import BuildExtension, CUDAExtension
+
+dist.Distribution().fetch_build_eggs(['Cython', 'numpy>=1.11.1'])
+import numpy as np # noqa: E402, isort:skip
+from Cython.Build import cythonize # noqa: E402, isort:skip
+
+
+def readme():
+ with open('README.md', encoding='utf-8') as f:
+ content = f.read()
+ return content
+
+
+MAJOR = 1
+MINOR = 0
+PATCH = 0
+SUFFIX = ''
+if PATCH != '':
+ SHORT_VERSION = '{}.{}.{}{}'.format(MAJOR, MINOR, PATCH, SUFFIX)
+else:
+ SHORT_VERSION = '{}.{}{}'.format(MAJOR, MINOR, SUFFIX)
+
+version_file = 'mmdet/version.py'
+
+
+def get_git_hash():
+
+ def _minimal_ext_cmd(cmd):
+ # construct minimal environment
+ env = {}
+ for k in ['SYSTEMROOT', 'PATH', 'HOME']:
+ v = os.environ.get(k)
+ if v is not None:
+ env[k] = v
+ # LANGUAGE is used on win32
+ env['LANGUAGE'] = 'C'
+ env['LANG'] = 'C'
+ env['LC_ALL'] = 'C'
+ out = subprocess.Popen(
+ cmd, stdout=subprocess.PIPE, env=env).communicate()[0]
+ return out
+
+ try:
+ out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD'])
+ sha = out.strip().decode('ascii')
+ except OSError:
+ sha = 'unknown'
+
+ return sha
+
+
+def get_hash():
+ if os.path.exists('.git'):
+ sha = get_git_hash()[:7]
+ elif os.path.exists(version_file):
+ try:
+ from mmdet.version import __version__
+ sha = __version__.split('+')[-1]
+ except ImportError:
+ raise ImportError('Unable to get git version')
+ else:
+ sha = 'unknown'
+
+ return sha
+
+
+def write_version_py():
+ content = """# GENERATED VERSION FILE
+# TIME: {}
+
+__version__ = '{}'
+short_version = '{}'
+"""
+ sha = get_hash()
+ VERSION = SHORT_VERSION + '+' + sha
+
+ with open(version_file, 'w') as f:
+ f.write(content.format(time.asctime(), VERSION, SHORT_VERSION))
+
+
+def get_version():
+ with open(version_file, 'r') as f:
+ exec(compile(f.read(), version_file, 'exec'))
+ return locals()['__version__']
+
+
+def make_cuda_ext(name, module, sources):
+
+ define_macros = []
+
+ if torch.cuda.is_available() or os.getenv('FORCE_CUDA', '0') == '1':
+ define_macros += [("WITH_CUDA", None)]
+ else:
+ raise EnvironmentError('CUDA is required to compile MMDetection!')
+
+ return CUDAExtension(
+ name='{}.{}'.format(module, name),
+ sources=[os.path.join(*module.split('.'), p) for p in sources],
+ define_macros=define_macros,
+ extra_compile_args={
+ 'cxx': [],
+ 'nvcc': [
+ '-D__CUDA_NO_HALF_OPERATORS__',
+ '-D__CUDA_NO_HALF_CONVERSIONS__',
+ '-D__CUDA_NO_HALF2_OPERATORS__',
+ ]
+ })
+
+
+def make_cython_ext(name, module, sources):
+ extra_compile_args = None
+ if platform.system() != 'Windows':
+ extra_compile_args = {
+ 'cxx': ['-Wno-unused-function', '-Wno-write-strings']
+ }
+
+ extension = Extension(
+ '{}.{}'.format(module, name),
+ [os.path.join(*module.split('.'), p) for p in sources],
+ include_dirs=[np.get_include()],
+ language='c++',
+ extra_compile_args=extra_compile_args)
+ extension, = cythonize(extension)
+ return extension
+
+
+def parse_requirements(fname='requirements.txt', with_version=True):
+ """
+ Parse the package dependencies listed in a requirements file but strips
+ specific versioning information.
+
+ Args:
+ fname (str): path to requirements file
+ with_version (bool, default=False): if True include version specs
+
+ Returns:
+ List[str]: list of requirements items
+
+ CommandLine:
+ python -c "import setup; print(setup.parse_requirements())"
+ """
+ import sys
+ from os.path import exists
+ import re
+ require_fpath = fname
+
+ def parse_line(line):
+ """
+ Parse information from a line in a requirements text file
+ """
+ if line.startswith('-r '):
+ # Allow specifying requirements in other files
+ target = line.split(' ')[1]
+ for info in parse_require_file(target):
+ yield info
+ else:
+ info = {'line': line}
+ if line.startswith('-e '):
+ info['package'] = line.split('#egg=')[1]
+ else:
+ # Remove versioning from the package
+ pat = '(' + '|'.join(['>=', '==', '>']) + ')'
+ parts = re.split(pat, line, maxsplit=1)
+ parts = [p.strip() for p in parts]
+
+ info['package'] = parts[0]
+ if len(parts) > 1:
+ op, rest = parts[1:]
+ if ';' in rest:
+ # Handle platform specific dependencies
+ # http://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-platform-specific-dependencies
+ version, platform_deps = map(str.strip,
+ rest.split(';'))
+ info['platform_deps'] = platform_deps
+ else:
+ version = rest # NOQA
+ info['version'] = (op, version)
+ yield info
+
+ def parse_require_file(fpath):
+ with open(fpath, 'r') as f:
+ for line in f.readlines():
+ line = line.strip()
+ if line and not line.startswith('#'):
+ for info in parse_line(line):
+ yield info
+
+ def gen_packages_items():
+ if exists(require_fpath):
+ for info in parse_require_file(require_fpath):
+ parts = [info['package']]
+ if with_version and 'version' in info:
+ parts.extend(info['version'])
+ if not sys.version.startswith('3.4'):
+ # apparently package_deps are broken in 3.4
+ platform_deps = info.get('platform_deps')
+ if platform_deps is not None:
+ parts.append(';' + platform_deps)
+ item = ''.join(parts)
+ yield item
+
+ packages = list(gen_packages_items())
+ return packages
+
+
+if __name__ == '__main__':
+ write_version_py()
+ setup(
+ name='mmdet',
+ version=get_version(),
+ description='Open MMLab Detection Toolbox and Benchmark',
+ long_description=readme(),
+ author='OpenMMLab',
+ author_email='chenkaidev@gmail.com',
+ keywords='computer vision, object detection',
+ url='https://github.com/open-mmlab/mmdetection',
+ packages=find_packages(exclude=('configs', 'tools', 'demo')),
+ package_data={'mmdet.ops': ['*/*.so']},
+ classifiers=[
+ 'Development Status :: 4 - Beta',
+ 'License :: OSI Approved :: Apache Software License',
+ 'Operating System :: OS Independent',
+ 'Programming Language :: Python :: 3',
+ 'Programming Language :: Python :: 3.5',
+ 'Programming Language :: Python :: 3.6',
+ 'Programming Language :: Python :: 3.7',
+ ],
+ license='Apache License 2.0',
+ setup_requires=parse_requirements('requirements/build.txt'),
+ tests_require=parse_requirements('requirements/tests.txt'),
+ install_requires=parse_requirements('requirements/runtime.txt'),
+ extras_require={
+ 'all': parse_requirements('requirements.txt'),
+ 'tests': parse_requirements('requirements/tests.txt'),
+ 'build': parse_requirements('requirements/build.txt'),
+ 'optional': parse_requirements('requirements/optional.txt'),
+ },
+ ext_modules=[
+ make_cuda_ext(
+ name='compiling_info',
+ module='mmdet.ops.utils',
+ sources=['src/compiling_info.cpp']),
+ make_cython_ext(
+ name='soft_nms_cpu',
+ module='mmdet.ops.nms',
+ sources=['src/soft_nms_cpu.pyx']),
+ make_cuda_ext(
+ name='nms_cpu',
+ module='mmdet.ops.nms',
+ sources=['src/nms_cpu.cpp']),
+ make_cuda_ext(
+ name='nms_cuda',
+ module='mmdet.ops.nms',
+ sources=['src/nms_cuda.cpp', 'src/nms_kernel.cu']),
+ make_cuda_ext(
+ name='roi_align_cuda',
+ module='mmdet.ops.roi_align',
+ sources=['src/roi_align_cuda.cpp', 'src/roi_align_kernel.cu']),
+ make_cuda_ext(
+ name='roi_pool_cuda',
+ module='mmdet.ops.roi_pool',
+ sources=['src/roi_pool_cuda.cpp', 'src/roi_pool_kernel.cu']),
+ make_cuda_ext(
+ name='deform_conv_cuda',
+ module='mmdet.ops.dcn',
+ sources=[
+ 'src/deform_conv_cuda.cpp',
+ 'src/deform_conv_cuda_kernel.cu'
+ ]),
+ make_cuda_ext(
+ name='deform_pool_cuda',
+ module='mmdet.ops.dcn',
+ sources=[
+ 'src/deform_pool_cuda.cpp',
+ 'src/deform_pool_cuda_kernel.cu'
+ ]),
+ make_cuda_ext(
+ name='sigmoid_focal_loss_cuda',
+ module='mmdet.ops.sigmoid_focal_loss',
+ sources=[
+ 'src/sigmoid_focal_loss.cpp',
+ 'src/sigmoid_focal_loss_cuda.cu'
+ ]),
+ make_cuda_ext(
+ name='masked_conv2d_cuda',
+ module='mmdet.ops.masked_conv',
+ sources=[
+ 'src/masked_conv2d_cuda.cpp', 'src/masked_conv2d_kernel.cu'
+ ]),
+ ],
+ cmdclass={'build_ext': BuildExtension},
+ zip_safe=False)
diff --git a/cv/instance_segmentation/SOLO/pytorch/tests/async_benchmark.py b/cv/instance_segmentation/SOLO/pytorch/tests/async_benchmark.py
new file mode 100644
index 000000000..0017783d3
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tests/async_benchmark.py
@@ -0,0 +1,104 @@
+# coding: utf-8
+
+import asyncio
+import os
+import shutil
+import urllib
+
+import mmcv
+import torch
+
+from mmdet.apis import (async_inference_detector, inference_detector,
+ init_detector, show_result)
+from mmdet.utils.contextmanagers import concurrent
+from mmdet.utils.profiling import profile_time
+
+
+async def main():
+ """
+
+ Benchmark between async and synchronous inference interfaces.
+
+ Sample runs for 20 demo images on K80 GPU, model - mask_rcnn_r50_fpn_1x:
+
+ async sync
+
+ 7981.79 ms 9660.82 ms
+ 8074.52 ms 9660.94 ms
+ 7976.44 ms 9406.83 ms
+
+ Async variant takes about 0.83-0.85 of the time of the synchronous
+ interface.
+
+ """
+ project_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
+
+ config_file = os.path.join(project_dir, 'configs/mask_rcnn_r50_fpn_1x.py')
+ checkpoint_file = os.path.join(
+ project_dir, 'checkpoints/mask_rcnn_r50_fpn_1x_20181010-069fa190.pth')
+
+ if not os.path.exists(checkpoint_file):
+ url = ('https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection'
+ '/models/mask_rcnn_r50_fpn_1x_20181010-069fa190.pth')
+ print('Downloading {} ...'.format(url))
+ local_filename, _ = urllib.request.urlretrieve(url)
+ os.makedirs(os.path.dirname(checkpoint_file), exist_ok=True)
+ shutil.move(local_filename, checkpoint_file)
+ print('Saved as {}'.format(checkpoint_file))
+ else:
+ print('Using existing checkpoint {}'.format(checkpoint_file))
+
+ device = 'cuda:0'
+ model = init_detector(
+ config_file, checkpoint=checkpoint_file, device=device)
+
+ # queue is used for concurrent inference of multiple images
+ streamqueue = asyncio.Queue()
+ # queue size defines concurrency level
+ streamqueue_size = 4
+
+ for _ in range(streamqueue_size):
+ streamqueue.put_nowait(torch.cuda.Stream(device=device))
+
+ # test a single image and show the results
+ img = mmcv.imread(os.path.join(project_dir, 'demo/demo.jpg'))
+
+ # warmup
+ await async_inference_detector(model, img)
+
+ async def detect(img):
+ async with concurrent(streamqueue):
+ return await async_inference_detector(model, img)
+
+ num_of_images = 20
+ with profile_time('benchmark', 'async'):
+ tasks = [
+ asyncio.create_task(detect(img)) for _ in range(num_of_images)
+ ]
+ async_results = await asyncio.gather(*tasks)
+
+ with torch.cuda.stream(torch.cuda.default_stream()):
+ with profile_time('benchmark', 'sync'):
+ sync_results = [
+ inference_detector(model, img) for _ in range(num_of_images)
+ ]
+
+ result_dir = os.path.join(project_dir, 'demo')
+ show_result(
+ img,
+ async_results[0],
+ model.CLASSES,
+ score_thr=0.5,
+ show=False,
+ out_file=os.path.join(result_dir, 'result_async.jpg'))
+ show_result(
+ img,
+ sync_results[0],
+ model.CLASSES,
+ score_thr=0.5,
+ show=False,
+ out_file=os.path.join(result_dir, 'result_sync.jpg'))
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/cv/instance_segmentation/SOLO/pytorch/tests/test_assigner.py b/cv/instance_segmentation/SOLO/pytorch/tests/test_assigner.py
new file mode 100644
index 000000000..5348eaba3
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tests/test_assigner.py
@@ -0,0 +1,277 @@
+"""
+Tests the Assigner objects.
+
+CommandLine:
+ pytest tests/test_assigner.py
+ xdoctest tests/test_assigner.py zero
+
+
+
+"""
+import torch
+
+from mmdet.core import MaxIoUAssigner
+from mmdet.core.bbox.assigners import ApproxMaxIoUAssigner, PointAssigner
+
+
+def test_max_iou_assigner():
+ self = MaxIoUAssigner(
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ )
+ bboxes = torch.FloatTensor([
+ [0, 0, 10, 10],
+ [10, 10, 20, 20],
+ [5, 5, 15, 15],
+ [32, 32, 38, 42],
+ ])
+ gt_bboxes = torch.FloatTensor([
+ [0, 0, 10, 9],
+ [0, 10, 10, 19],
+ ])
+ gt_labels = torch.LongTensor([2, 3])
+ assign_result = self.assign(bboxes, gt_bboxes, gt_labels=gt_labels)
+ assert len(assign_result.gt_inds) == 4
+ assert len(assign_result.labels) == 4
+
+ expected_gt_inds = torch.LongTensor([1, 0, 2, 0])
+ assert torch.all(assign_result.gt_inds == expected_gt_inds)
+
+
+def test_max_iou_assigner_with_ignore():
+ self = MaxIoUAssigner(
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ ignore_iof_thr=0.5,
+ ignore_wrt_candidates=False,
+ )
+ bboxes = torch.FloatTensor([
+ [0, 0, 10, 10],
+ [10, 10, 20, 20],
+ [5, 5, 15, 15],
+ [32, 32, 38, 42],
+ ])
+ gt_bboxes = torch.FloatTensor([
+ [0, 0, 10, 9],
+ [0, 10, 10, 19],
+ ])
+ gt_bboxes_ignore = torch.Tensor([
+ [30, 30, 40, 40],
+ ])
+ assign_result = self.assign(
+ bboxes, gt_bboxes, gt_bboxes_ignore=gt_bboxes_ignore)
+
+ expected_gt_inds = torch.LongTensor([1, 0, 2, -1])
+ assert torch.all(assign_result.gt_inds == expected_gt_inds)
+
+
+def test_max_iou_assigner_with_empty_gt():
+ """
+ Test corner case where an image might have no true detections
+ """
+ self = MaxIoUAssigner(
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ )
+ bboxes = torch.FloatTensor([
+ [0, 0, 10, 10],
+ [10, 10, 20, 20],
+ [5, 5, 15, 15],
+ [32, 32, 38, 42],
+ ])
+ gt_bboxes = torch.FloatTensor([])
+ assign_result = self.assign(bboxes, gt_bboxes)
+
+ expected_gt_inds = torch.LongTensor([0, 0, 0, 0])
+ assert torch.all(assign_result.gt_inds == expected_gt_inds)
+
+
+def test_max_iou_assigner_with_empty_boxes():
+ """
+ Test corner case where an network might predict no boxes
+ """
+ self = MaxIoUAssigner(
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ )
+ bboxes = torch.empty((0, 4))
+ gt_bboxes = torch.FloatTensor([
+ [0, 0, 10, 9],
+ [0, 10, 10, 19],
+ ])
+ gt_labels = torch.LongTensor([2, 3])
+
+ # Test with gt_labels
+ assign_result = self.assign(bboxes, gt_bboxes, gt_labels=gt_labels)
+ assert len(assign_result.gt_inds) == 0
+ assert tuple(assign_result.labels.shape) == (0, )
+
+ # Test without gt_labels
+ assign_result = self.assign(bboxes, gt_bboxes, gt_labels=None)
+ assert len(assign_result.gt_inds) == 0
+ assert assign_result.labels is None
+
+
+def test_max_iou_assigner_with_empty_boxes_and_gt():
+ """
+ Test corner case where an network might predict no boxes and no gt
+ """
+ self = MaxIoUAssigner(
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ )
+ bboxes = torch.empty((0, 4))
+ gt_bboxes = torch.empty((0, 4))
+ assign_result = self.assign(bboxes, gt_bboxes)
+ assert len(assign_result.gt_inds) == 0
+
+
+def test_point_assigner():
+ self = PointAssigner()
+ points = torch.FloatTensor([ # [x, y, stride]
+ [0, 0, 1],
+ [10, 10, 1],
+ [5, 5, 1],
+ [32, 32, 1],
+ ])
+ gt_bboxes = torch.FloatTensor([
+ [0, 0, 10, 9],
+ [0, 10, 10, 19],
+ ])
+ assign_result = self.assign(points, gt_bboxes)
+ expected_gt_inds = torch.LongTensor([1, 2, 1, 0])
+ assert torch.all(assign_result.gt_inds == expected_gt_inds)
+
+
+def test_point_assigner_with_empty_gt():
+ """
+ Test corner case where an image might have no true detections
+ """
+ self = PointAssigner()
+ points = torch.FloatTensor([ # [x, y, stride]
+ [0, 0, 1],
+ [10, 10, 1],
+ [5, 5, 1],
+ [32, 32, 1],
+ ])
+ gt_bboxes = torch.FloatTensor([])
+ assign_result = self.assign(points, gt_bboxes)
+
+ expected_gt_inds = torch.LongTensor([0, 0, 0, 0])
+ assert torch.all(assign_result.gt_inds == expected_gt_inds)
+
+
+def test_point_assigner_with_empty_boxes_and_gt():
+ """
+ Test corner case where an image might predict no points and no gt
+ """
+ self = PointAssigner()
+ points = torch.FloatTensor([])
+ gt_bboxes = torch.FloatTensor([])
+ assign_result = self.assign(points, gt_bboxes)
+ assert len(assign_result.gt_inds) == 0
+
+
+def test_approx_iou_assigner():
+ self = ApproxMaxIoUAssigner(
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ )
+ bboxes = torch.FloatTensor([
+ [0, 0, 10, 10],
+ [10, 10, 20, 20],
+ [5, 5, 15, 15],
+ [32, 32, 38, 42],
+ ])
+ gt_bboxes = torch.FloatTensor([
+ [0, 0, 10, 9],
+ [0, 10, 10, 19],
+ ])
+ approxs_per_octave = 1
+ approxs = bboxes
+ squares = bboxes
+ assign_result = self.assign(approxs, squares, approxs_per_octave,
+ gt_bboxes)
+
+ expected_gt_inds = torch.LongTensor([1, 0, 2, 0])
+ assert torch.all(assign_result.gt_inds == expected_gt_inds)
+
+
+def test_approx_iou_assigner_with_empty_gt():
+ """
+ Test corner case where an image might have no true detections
+ """
+ self = ApproxMaxIoUAssigner(
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ )
+ bboxes = torch.FloatTensor([
+ [0, 0, 10, 10],
+ [10, 10, 20, 20],
+ [5, 5, 15, 15],
+ [32, 32, 38, 42],
+ ])
+ gt_bboxes = torch.FloatTensor([])
+ approxs_per_octave = 1
+ approxs = bboxes
+ squares = bboxes
+ assign_result = self.assign(approxs, squares, approxs_per_octave,
+ gt_bboxes)
+
+ expected_gt_inds = torch.LongTensor([0, 0, 0, 0])
+ assert torch.all(assign_result.gt_inds == expected_gt_inds)
+
+
+def test_approx_iou_assigner_with_empty_boxes():
+ """
+ Test corner case where an network might predict no boxes
+ """
+ self = ApproxMaxIoUAssigner(
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ )
+ bboxes = torch.empty((0, 4))
+ gt_bboxes = torch.FloatTensor([
+ [0, 0, 10, 9],
+ [0, 10, 10, 19],
+ ])
+ approxs_per_octave = 1
+ approxs = bboxes
+ squares = bboxes
+ assign_result = self.assign(approxs, squares, approxs_per_octave,
+ gt_bboxes)
+ assert len(assign_result.gt_inds) == 0
+
+
+def test_approx_iou_assigner_with_empty_boxes_and_gt():
+ """
+ Test corner case where an network might predict no boxes and no gt
+ """
+ self = ApproxMaxIoUAssigner(
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ )
+ bboxes = torch.empty((0, 4))
+ gt_bboxes = torch.empty((0, 4))
+ approxs_per_octave = 1
+ approxs = bboxes
+ squares = bboxes
+ assign_result = self.assign(approxs, squares, approxs_per_octave,
+ gt_bboxes)
+ assert len(assign_result.gt_inds) == 0
+
+
+def test_random_assign_result():
+ """
+ Test random instantiation of assign result to catch corner cases
+ """
+ from mmdet.core.bbox.assigners.assign_result import AssignResult
+ AssignResult.random()
+
+ AssignResult.random(num_gts=0, num_preds=0)
+ AssignResult.random(num_gts=0, num_preds=3)
+ AssignResult.random(num_gts=3, num_preds=3)
+ AssignResult.random(num_gts=0, num_preds=3)
+ AssignResult.random(num_gts=7, num_preds=7)
+ AssignResult.random(num_gts=7, num_preds=64)
+ AssignResult.random(num_gts=24, num_preds=3)
diff --git a/cv/instance_segmentation/SOLO/pytorch/tests/test_async.py b/cv/instance_segmentation/SOLO/pytorch/tests/test_async.py
new file mode 100644
index 000000000..68ecde33d
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tests/test_async.py
@@ -0,0 +1,78 @@
+"""Tests for async interface."""
+
+import asyncio
+import os
+import sys
+
+import asynctest
+import mmcv
+import torch
+
+from mmdet.apis import async_inference_detector, init_detector
+
+if sys.version_info >= (3, 7):
+ from mmdet.utils.contextmanagers import concurrent
+
+
+class AsyncTestCase(asynctest.TestCase):
+ use_default_loop = False
+ forbid_get_event_loop = True
+
+ TEST_TIMEOUT = int(os.getenv("ASYNCIO_TEST_TIMEOUT", "30"))
+
+ def _run_test_method(self, method):
+ result = method()
+ if asyncio.iscoroutine(result):
+ self.loop.run_until_complete(
+ asyncio.wait_for(result, timeout=self.TEST_TIMEOUT))
+
+
+class MaskRCNNDetector:
+
+ def __init__(self,
+ model_config,
+ checkpoint=None,
+ streamqueue_size=3,
+ device="cuda:0"):
+
+ self.streamqueue_size = streamqueue_size
+ self.device = device
+ # build the model and load checkpoint
+ self.model = init_detector(
+ model_config, checkpoint=None, device=self.device)
+ self.streamqueue = None
+
+ async def init(self):
+ self.streamqueue = asyncio.Queue()
+ for _ in range(self.streamqueue_size):
+ stream = torch.cuda.Stream(device=self.device)
+ self.streamqueue.put_nowait(stream)
+
+ if sys.version_info >= (3, 7):
+
+ async def apredict(self, img):
+ if isinstance(img, str):
+ img = mmcv.imread(img)
+ async with concurrent(self.streamqueue):
+ result = await async_inference_detector(self.model, img)
+ return result
+
+
+class AsyncInferenceTestCase(AsyncTestCase):
+
+ if sys.version_info >= (3, 7):
+
+ async def test_simple_inference(self):
+ if not torch.cuda.is_available():
+ import pytest
+
+ pytest.skip("test requires GPU and torch+cuda")
+
+ root_dir = os.path.dirname(os.path.dirname(__name__))
+ model_config = os.path.join(root_dir,
+ "configs/mask_rcnn_r50_fpn_1x.py")
+ detector = MaskRCNNDetector(model_config)
+ await detector.init()
+ img_path = os.path.join(root_dir, "demo/demo.jpg")
+ bboxes, _ = await detector.apredict(img_path)
+ self.assertTrue(bboxes)
diff --git a/cv/instance_segmentation/SOLO/pytorch/tests/test_config.py b/cv/instance_segmentation/SOLO/pytorch/tests/test_config.py
new file mode 100644
index 000000000..ebc399ff3
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tests/test_config.py
@@ -0,0 +1,172 @@
+from os.path import dirname, exists, join
+
+
+def _get_config_directory():
+ """ Find the predefined detector config directory """
+ try:
+ # Assume we are running in the source mmdetection repo
+ repo_dpath = dirname(dirname(__file__))
+ except NameError:
+ # For IPython development when this __file__ is not defined
+ import mmdet
+ repo_dpath = dirname(dirname(mmdet.__file__))
+ config_dpath = join(repo_dpath, 'configs')
+ if not exists(config_dpath):
+ raise Exception('Cannot find config path')
+ return config_dpath
+
+
+def test_config_build_detector():
+ """
+ Test that all detection models defined in the configs can be initialized.
+ """
+ from xdoctest.utils import import_module_from_path
+ from mmdet.models import build_detector
+
+ config_dpath = _get_config_directory()
+ print('Found config_dpath = {!r}'.format(config_dpath))
+
+ # import glob
+ # config_fpaths = list(glob.glob(join(config_dpath, '**', '*.py')))
+ # config_names = [relpath(p, config_dpath) for p in config_fpaths]
+
+ # Only tests a representative subset of configurations
+
+ config_names = [
+ # 'dcn/faster_rcnn_dconv_c3-c5_r50_fpn_1x.py',
+ # 'dcn/cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x.py',
+ # 'dcn/faster_rcnn_dpool_r50_fpn_1x.py',
+ 'dcn/mask_rcnn_dconv_c3-c5_r50_fpn_1x.py',
+ # 'dcn/faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x.py',
+ # 'dcn/cascade_rcnn_dconv_c3-c5_r50_fpn_1x.py',
+ # 'dcn/faster_rcnn_mdpool_r50_fpn_1x.py',
+ # 'dcn/faster_rcnn_mdconv_c3-c5_group4_r50_fpn_1x.py',
+ # 'dcn/faster_rcnn_mdconv_c3-c5_r50_fpn_1x.py',
+ # ---
+ # 'htc/htc_x101_32x4d_fpn_20e_16gpu.py',
+ 'htc/htc_without_semantic_r50_fpn_1x.py',
+ # 'htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py',
+ # 'htc/htc_x101_64x4d_fpn_20e_16gpu.py',
+ # 'htc/htc_r50_fpn_1x.py',
+ # 'htc/htc_r101_fpn_20e.py',
+ # 'htc/htc_r50_fpn_20e.py',
+ # ---
+ 'cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py',
+ # 'cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py',
+ # ---
+ # 'scratch/scratch_faster_rcnn_r50_fpn_gn_6x.py',
+ # 'scratch/scratch_mask_rcnn_r50_fpn_gn_6x.py',
+ # ---
+ # 'grid_rcnn/grid_rcnn_gn_head_x101_32x4d_fpn_2x.py',
+ 'grid_rcnn/grid_rcnn_gn_head_r50_fpn_2x.py',
+ # ---
+ 'double_heads/dh_faster_rcnn_r50_fpn_1x.py',
+ # ---
+ 'empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x.py',
+ # 'empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x.py',
+ # 'empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x.py',
+ # 'empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x.py',
+ # ---
+ # 'ms_rcnn/ms_rcnn_r101_caffe_fpn_1x.py',
+ # 'ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x.py',
+ # 'ms_rcnn/ms_rcnn_r50_caffe_fpn_1x.py',
+ # ---
+ # 'guided_anchoring/ga_faster_x101_32x4d_fpn_1x.py',
+ # 'guided_anchoring/ga_rpn_x101_32x4d_fpn_1x.py',
+ # 'guided_anchoring/ga_retinanet_r50_caffe_fpn_1x.py',
+ # 'guided_anchoring/ga_fast_r50_caffe_fpn_1x.py',
+ # 'guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x.py',
+ # 'guided_anchoring/ga_rpn_r101_caffe_rpn_1x.py',
+ # 'guided_anchoring/ga_faster_r50_caffe_fpn_1x.py',
+ 'guided_anchoring/ga_rpn_r50_caffe_fpn_1x.py',
+ # ---
+ 'foveabox/fovea_r50_fpn_4gpu_1x.py',
+ # 'foveabox/fovea_align_gn_ms_r101_fpn_4gpu_2x.py',
+ # 'foveabox/fovea_align_gn_r50_fpn_4gpu_2x.py',
+ # 'foveabox/fovea_align_gn_r101_fpn_4gpu_2x.py',
+ 'foveabox/fovea_align_gn_ms_r50_fpn_4gpu_2x.py',
+ # ---
+ # 'hrnet/cascade_rcnn_hrnetv2p_w32_20e.py',
+ # 'hrnet/mask_rcnn_hrnetv2p_w32_1x.py',
+ # 'hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e.py',
+ # 'hrnet/htc_hrnetv2p_w32_20e.py',
+ # 'hrnet/faster_rcnn_hrnetv2p_w18_1x.py',
+ # 'hrnet/mask_rcnn_hrnetv2p_w18_1x.py',
+ # 'hrnet/faster_rcnn_hrnetv2p_w32_1x.py',
+ # 'hrnet/faster_rcnn_hrnetv2p_w40_1x.py',
+ 'hrnet/fcos_hrnetv2p_w32_gn_1x_4gpu.py',
+ # ---
+ # 'gn+ws/faster_rcnn_r50_fpn_gn_ws_1x.py',
+ # 'gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws_2x.py',
+ 'gn+ws/mask_rcnn_r50_fpn_gn_ws_2x.py',
+ # 'gn+ws/mask_rcnn_r50_fpn_gn_ws_20_23_24e.py',
+ # ---
+ # 'wider_face/ssd300_wider_face.py',
+ # ---
+ 'pascal_voc/ssd300_voc.py',
+ 'pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py',
+ 'pascal_voc/ssd512_voc.py',
+ # ---
+ # 'gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_syncbn_1x.py',
+ # 'gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_syncbn_1x.py',
+ # 'gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_1x.py',
+ # 'gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_1x.py',
+ 'gcnet/mask_rcnn_r50_fpn_sbn_1x.py',
+ # ---
+ 'gn/mask_rcnn_r50_fpn_gn_contrib_2x.py',
+ # 'gn/mask_rcnn_r50_fpn_gn_2x.py',
+ # 'gn/mask_rcnn_r101_fpn_gn_2x.py',
+ # ---
+ # 'reppoints/reppoints_moment_x101_dcn_fpn_2x.py',
+ 'reppoints/reppoints_moment_r50_fpn_2x.py',
+ # 'reppoints/reppoints_moment_x101_dcn_fpn_2x_mt.py',
+ 'reppoints/reppoints_partial_minmax_r50_fpn_1x.py',
+ 'reppoints/bbox_r50_grid_center_fpn_1x.py',
+ # 'reppoints/reppoints_moment_r101_dcn_fpn_2x.py',
+ # 'reppoints/reppoints_moment_r101_fpn_2x_mt.py',
+ # 'reppoints/reppoints_moment_r50_fpn_2x_mt.py',
+ 'reppoints/reppoints_minmax_r50_fpn_1x.py',
+ # 'reppoints/reppoints_moment_r50_fpn_1x.py',
+ # 'reppoints/reppoints_moment_r101_fpn_2x.py',
+ # 'reppoints/reppoints_moment_r101_dcn_fpn_2x_mt.py',
+ 'reppoints/bbox_r50_grid_fpn_1x.py',
+ # ---
+ # 'fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py',
+ # 'fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu.py',
+ 'fcos/fcos_r50_caffe_fpn_gn_1x_4gpu.py',
+ # ---
+ 'albu_example/mask_rcnn_r50_fpn_1x.py',
+ # ---
+ 'libra_rcnn/libra_faster_rcnn_r50_fpn_1x.py',
+ # 'libra_rcnn/libra_retinanet_r50_fpn_1x.py',
+ # 'libra_rcnn/libra_faster_rcnn_r101_fpn_1x.py',
+ # 'libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x.py',
+ # 'libra_rcnn/libra_fast_rcnn_r50_fpn_1x.py',
+ # ---
+ # 'ghm/retinanet_ghm_r50_fpn_1x.py',
+ # ---
+ # 'fp16/retinanet_r50_fpn_fp16_1x.py',
+ 'fp16/mask_rcnn_r50_fpn_fp16_1x.py',
+ 'fp16/faster_rcnn_r50_fpn_fp16_1x.py'
+ ]
+
+ print('Using {} config files'.format(len(config_names)))
+
+ for config_fname in config_names:
+ config_fpath = join(config_dpath, config_fname)
+ config_mod = import_module_from_path(config_fpath)
+
+ config_mod.model
+ config_mod.train_cfg
+ config_mod.test_cfg
+ print('Building detector, config_fpath = {!r}'.format(config_fpath))
+
+ # Remove pretrained keys to allow for testing in an offline environment
+ if 'pretrained' in config_mod.model:
+ config_mod.model['pretrained'] = None
+
+ detector = build_detector(
+ config_mod.model,
+ train_cfg=config_mod.train_cfg,
+ test_cfg=config_mod.test_cfg)
+ assert detector is not None
diff --git a/cv/instance_segmentation/SOLO/pytorch/tests/test_forward.py b/cv/instance_segmentation/SOLO/pytorch/tests/test_forward.py
new file mode 100644
index 000000000..5ba56bf24
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tests/test_forward.py
@@ -0,0 +1,388 @@
+"""
+pytest tests/test_forward.py
+"""
+import copy
+from os.path import dirname, exists, join
+
+import numpy as np
+import torch
+
+
+def _get_config_directory():
+ """ Find the predefined detector config directory """
+ try:
+ # Assume we are running in the source mmdetection repo
+ repo_dpath = dirname(dirname(__file__))
+ except NameError:
+ # For IPython development when this __file__ is not defined
+ import mmdet
+ repo_dpath = dirname(dirname(mmdet.__file__))
+ config_dpath = join(repo_dpath, 'configs')
+ if not exists(config_dpath):
+ raise Exception('Cannot find config path')
+ return config_dpath
+
+
+def _get_config_module(fname):
+ """
+ Load a configuration as a python module
+ """
+ from xdoctest.utils import import_module_from_path
+ config_dpath = _get_config_directory()
+ config_fpath = join(config_dpath, fname)
+ config_mod = import_module_from_path(config_fpath)
+ return config_mod
+
+
+def _get_detector_cfg(fname):
+ """
+ Grab configs necessary to create a detector. These are deep copied to allow
+ for safe modification of parameters without influencing other tests.
+ """
+ import mmcv
+ config = _get_config_module(fname)
+ model = copy.deepcopy(config.model)
+ train_cfg = mmcv.Config(copy.deepcopy(config.train_cfg))
+ test_cfg = mmcv.Config(copy.deepcopy(config.test_cfg))
+ return model, train_cfg, test_cfg
+
+
+def test_ssd300_forward():
+ model, train_cfg, test_cfg = _get_detector_cfg('ssd300_coco.py')
+ model['pretrained'] = None
+
+ from mmdet.models import build_detector
+ detector = build_detector(model, train_cfg=train_cfg, test_cfg=test_cfg)
+
+ input_shape = (1, 3, 300, 300)
+ mm_inputs = _demo_mm_inputs(input_shape)
+
+ imgs = mm_inputs.pop('imgs')
+ img_metas = mm_inputs.pop('img_metas')
+
+ # Test forward train
+ gt_bboxes = mm_inputs['gt_bboxes']
+ gt_labels = mm_inputs['gt_labels']
+ losses = detector.forward(
+ imgs,
+ img_metas,
+ gt_bboxes=gt_bboxes,
+ gt_labels=gt_labels,
+ return_loss=True)
+ assert isinstance(losses, dict)
+
+ # Test forward test
+ with torch.no_grad():
+ img_list = [g[None, :] for g in imgs]
+ batch_results = []
+ for one_img, one_meta in zip(img_list, img_metas):
+ result = detector.forward([one_img], [[one_meta]],
+ return_loss=False)
+ batch_results.append(result)
+
+
+def test_rpn_forward():
+ model, train_cfg, test_cfg = _get_detector_cfg('rpn_r50_fpn_1x.py')
+ model['pretrained'] = None
+
+ from mmdet.models import build_detector
+ detector = build_detector(model, train_cfg=train_cfg, test_cfg=test_cfg)
+
+ input_shape = (1, 3, 224, 224)
+ mm_inputs = _demo_mm_inputs(input_shape)
+
+ imgs = mm_inputs.pop('imgs')
+ img_metas = mm_inputs.pop('img_metas')
+
+ # Test forward train
+ gt_bboxes = mm_inputs['gt_bboxes']
+ losses = detector.forward(
+ imgs, img_metas, gt_bboxes=gt_bboxes, return_loss=True)
+ assert isinstance(losses, dict)
+
+ # Test forward test
+ with torch.no_grad():
+ img_list = [g[None, :] for g in imgs]
+ batch_results = []
+ for one_img, one_meta in zip(img_list, img_metas):
+ result = detector.forward([one_img], [[one_meta]],
+ return_loss=False)
+ batch_results.append(result)
+
+
+def test_retina_ghm_forward():
+ model, train_cfg, test_cfg = _get_detector_cfg(
+ 'ghm/retinanet_ghm_r50_fpn_1x.py')
+ model['pretrained'] = None
+
+ from mmdet.models import build_detector
+ detector = build_detector(model, train_cfg=train_cfg, test_cfg=test_cfg)
+
+ input_shape = (3, 3, 224, 224)
+ mm_inputs = _demo_mm_inputs(input_shape)
+
+ imgs = mm_inputs.pop('imgs')
+ img_metas = mm_inputs.pop('img_metas')
+
+ # Test forward train
+ gt_bboxes = mm_inputs['gt_bboxes']
+ gt_labels = mm_inputs['gt_labels']
+ losses = detector.forward(
+ imgs,
+ img_metas,
+ gt_bboxes=gt_bboxes,
+ gt_labels=gt_labels,
+ return_loss=True)
+ assert isinstance(losses, dict)
+
+ # Test forward test
+ with torch.no_grad():
+ img_list = [g[None, :] for g in imgs]
+ batch_results = []
+ for one_img, one_meta in zip(img_list, img_metas):
+ result = detector.forward([one_img], [[one_meta]],
+ return_loss=False)
+ batch_results.append(result)
+
+ if torch.cuda.is_available():
+ detector = detector.cuda()
+ imgs = imgs.cuda()
+ # Test forward train
+ gt_bboxes = [b.cuda() for b in mm_inputs['gt_bboxes']]
+ gt_labels = [g.cuda() for g in mm_inputs['gt_labels']]
+ losses = detector.forward(
+ imgs,
+ img_metas,
+ gt_bboxes=gt_bboxes,
+ gt_labels=gt_labels,
+ return_loss=True)
+ assert isinstance(losses, dict)
+
+ # Test forward test
+ with torch.no_grad():
+ img_list = [g[None, :] for g in imgs]
+ batch_results = []
+ for one_img, one_meta in zip(img_list, img_metas):
+ result = detector.forward([one_img], [[one_meta]],
+ return_loss=False)
+ batch_results.append(result)
+
+
+def test_cascade_forward():
+ try:
+ from torchvision import _C as C # NOQA
+ except ImportError:
+ import pytest
+ raise pytest.skip('requires torchvision on cpu')
+
+ model, train_cfg, test_cfg = _get_detector_cfg(
+ 'cascade_rcnn_r50_fpn_1x.py')
+ model['pretrained'] = None
+ # torchvision roi align supports CPU
+ model['bbox_roi_extractor']['roi_layer']['use_torchvision'] = True
+
+ from mmdet.models import build_detector
+ detector = build_detector(model, train_cfg=train_cfg, test_cfg=test_cfg)
+
+ input_shape = (1, 3, 256, 256)
+
+ # Test forward train with a non-empty truth batch
+ mm_inputs = _demo_mm_inputs(input_shape, num_items=[10])
+ imgs = mm_inputs.pop('imgs')
+ img_metas = mm_inputs.pop('img_metas')
+ gt_bboxes = mm_inputs['gt_bboxes']
+ gt_labels = mm_inputs['gt_labels']
+ losses = detector.forward(
+ imgs,
+ img_metas,
+ gt_bboxes=gt_bboxes,
+ gt_labels=gt_labels,
+ return_loss=True)
+ assert isinstance(losses, dict)
+ from mmdet.apis.train import parse_losses
+ total_loss = float(parse_losses(losses)[0].item())
+ assert total_loss > 0
+
+ # Test forward train with an empty truth batch
+ mm_inputs = _demo_mm_inputs(input_shape, num_items=[0])
+ imgs = mm_inputs.pop('imgs')
+ img_metas = mm_inputs.pop('img_metas')
+ gt_bboxes = mm_inputs['gt_bboxes']
+ gt_labels = mm_inputs['gt_labels']
+ losses = detector.forward(
+ imgs,
+ img_metas,
+ gt_bboxes=gt_bboxes,
+ gt_labels=gt_labels,
+ return_loss=True)
+ assert isinstance(losses, dict)
+ from mmdet.apis.train import parse_losses
+ total_loss = float(parse_losses(losses)[0].item())
+ assert total_loss > 0
+
+
+def test_faster_rcnn_forward():
+ try:
+ from torchvision import _C as C # NOQA
+ except ImportError:
+ import pytest
+ raise pytest.skip('requires torchvision on cpu')
+
+ model, train_cfg, test_cfg = _get_detector_cfg('faster_rcnn_r50_fpn_1x.py')
+ model['pretrained'] = None
+ # torchvision roi align supports CPU
+ model['bbox_roi_extractor']['roi_layer']['use_torchvision'] = True
+
+ from mmdet.models import build_detector
+ detector = build_detector(model, train_cfg=train_cfg, test_cfg=test_cfg)
+
+ input_shape = (1, 3, 256, 256)
+
+ # Test forward train with a non-empty truth batch
+ mm_inputs = _demo_mm_inputs(input_shape, num_items=[10])
+ imgs = mm_inputs.pop('imgs')
+ img_metas = mm_inputs.pop('img_metas')
+ gt_bboxes = mm_inputs['gt_bboxes']
+ gt_labels = mm_inputs['gt_labels']
+ losses = detector.forward(
+ imgs,
+ img_metas,
+ gt_bboxes=gt_bboxes,
+ gt_labels=gt_labels,
+ return_loss=True)
+ assert isinstance(losses, dict)
+ from mmdet.apis.train import parse_losses
+ total_loss = float(parse_losses(losses)[0].item())
+ assert total_loss > 0
+
+ # Test forward train with an empty truth batch
+ mm_inputs = _demo_mm_inputs(input_shape, num_items=[0])
+ imgs = mm_inputs.pop('imgs')
+ img_metas = mm_inputs.pop('img_metas')
+ gt_bboxes = mm_inputs['gt_bboxes']
+ gt_labels = mm_inputs['gt_labels']
+ losses = detector.forward(
+ imgs,
+ img_metas,
+ gt_bboxes=gt_bboxes,
+ gt_labels=gt_labels,
+ return_loss=True)
+ assert isinstance(losses, dict)
+ from mmdet.apis.train import parse_losses
+ total_loss = float(parse_losses(losses)[0].item())
+ assert total_loss > 0
+
+
+def test_faster_rcnn_ohem_forward():
+ try:
+ from torchvision import _C as C # NOQA
+ except ImportError:
+ import pytest
+ raise pytest.skip('requires torchvision on cpu')
+
+ model, train_cfg, test_cfg = _get_detector_cfg(
+ 'faster_rcnn_ohem_r50_fpn_1x.py')
+ model['pretrained'] = None
+ # torchvision roi align supports CPU
+ model['bbox_roi_extractor']['roi_layer']['use_torchvision'] = True
+
+ from mmdet.models import build_detector
+ detector = build_detector(model, train_cfg=train_cfg, test_cfg=test_cfg)
+
+ input_shape = (1, 3, 256, 256)
+
+ # Test forward train with a non-empty truth batch
+ mm_inputs = _demo_mm_inputs(input_shape, num_items=[10])
+ imgs = mm_inputs.pop('imgs')
+ img_metas = mm_inputs.pop('img_metas')
+ gt_bboxes = mm_inputs['gt_bboxes']
+ gt_labels = mm_inputs['gt_labels']
+ losses = detector.forward(
+ imgs,
+ img_metas,
+ gt_bboxes=gt_bboxes,
+ gt_labels=gt_labels,
+ return_loss=True)
+ assert isinstance(losses, dict)
+ from mmdet.apis.train import parse_losses
+ total_loss = float(parse_losses(losses)[0].item())
+ assert total_loss > 0
+
+ # Test forward train with an empty truth batch
+ mm_inputs = _demo_mm_inputs(input_shape, num_items=[0])
+ imgs = mm_inputs.pop('imgs')
+ img_metas = mm_inputs.pop('img_metas')
+ gt_bboxes = mm_inputs['gt_bboxes']
+ gt_labels = mm_inputs['gt_labels']
+ losses = detector.forward(
+ imgs,
+ img_metas,
+ gt_bboxes=gt_bboxes,
+ gt_labels=gt_labels,
+ return_loss=True)
+ assert isinstance(losses, dict)
+ from mmdet.apis.train import parse_losses
+ total_loss = float(parse_losses(losses)[0].item())
+ assert total_loss > 0
+
+
+def _demo_mm_inputs(input_shape=(1, 3, 300, 300),
+ num_items=None, num_classes=10): # yapf: disable
+ """
+ Create a superset of inputs needed to run test or train batches.
+
+ Args:
+ input_shape (tuple):
+ input batch dimensions
+
+ num_items (None | List[int]):
+ specifies the number of boxes in each batch item
+
+ num_classes (int):
+ number of different labels a box might have
+ """
+ (N, C, H, W) = input_shape
+
+ rng = np.random.RandomState(0)
+
+ imgs = rng.rand(*input_shape)
+
+ img_metas = [{
+ 'img_shape': (H, W, C),
+ 'ori_shape': (H, W, C),
+ 'pad_shape': (H, W, C),
+ 'filename': '.png',
+ 'scale_factor': 1.0,
+ 'flip': False,
+ } for _ in range(N)]
+
+ gt_bboxes = []
+ gt_labels = []
+
+ for batch_idx in range(N):
+ if num_items is None:
+ num_boxes = rng.randint(1, 10)
+ else:
+ num_boxes = num_items[batch_idx]
+
+ cx, cy, bw, bh = rng.rand(num_boxes, 4).T
+
+ tl_x = ((cx * W) - (W * bw / 2)).clip(0, W)
+ tl_y = ((cy * H) - (H * bh / 2)).clip(0, H)
+ br_x = ((cx * W) + (W * bw / 2)).clip(0, W)
+ br_y = ((cy * H) + (H * bh / 2)).clip(0, H)
+
+ boxes = np.vstack([tl_x, tl_y, br_x, br_y]).T
+ class_idxs = rng.randint(1, num_classes, size=num_boxes)
+
+ gt_bboxes.append(torch.FloatTensor(boxes))
+ gt_labels.append(torch.LongTensor(class_idxs))
+
+ mm_inputs = {
+ 'imgs': torch.FloatTensor(imgs),
+ 'img_metas': img_metas,
+ 'gt_bboxes': gt_bboxes,
+ 'gt_labels': gt_labels,
+ 'gt_bboxes_ignore': None,
+ }
+ return mm_inputs
diff --git a/cv/instance_segmentation/SOLO/pytorch/tests/test_heads.py b/cv/instance_segmentation/SOLO/pytorch/tests/test_heads.py
new file mode 100644
index 000000000..b1e4ceebf
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tests/test_heads.py
@@ -0,0 +1,340 @@
+import mmcv
+import torch
+
+from mmdet.core import build_assigner, build_sampler
+from mmdet.models.anchor_heads import AnchorHead
+from mmdet.models.bbox_heads import BBoxHead
+
+
+def test_anchor_head_loss():
+ """
+ Tests anchor head loss when truth is empty and non-empty
+ """
+ self = AnchorHead(num_classes=4, in_channels=1)
+ s = 256
+ img_metas = [{
+ 'img_shape': (s, s, 3),
+ 'scale_factor': 1,
+ 'pad_shape': (s, s, 3)
+ }]
+
+ cfg = mmcv.Config({
+ 'assigner': {
+ 'type': 'MaxIoUAssigner',
+ 'pos_iou_thr': 0.7,
+ 'neg_iou_thr': 0.3,
+ 'min_pos_iou': 0.3,
+ 'ignore_iof_thr': -1
+ },
+ 'sampler': {
+ 'type': 'RandomSampler',
+ 'num': 256,
+ 'pos_fraction': 0.5,
+ 'neg_pos_ub': -1,
+ 'add_gt_as_proposals': False
+ },
+ 'allowed_border': 0,
+ 'pos_weight': -1,
+ 'debug': False
+ })
+
+ # Anchor head expects a multiple levels of features per image
+ feat = [
+ torch.rand(1, 1, s // (2**(i + 2)), s // (2**(i + 2)))
+ for i in range(len(self.anchor_generators))
+ ]
+ cls_scores, bbox_preds = self.forward(feat)
+
+ # Test that empty ground truth encourages the network to predict background
+ gt_bboxes = [torch.empty((0, 4))]
+ gt_labels = [torch.LongTensor([])]
+
+ gt_bboxes_ignore = None
+ empty_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,
+ img_metas, cfg, gt_bboxes_ignore)
+ # When there is no truth, the cls loss should be nonzero but there should
+ # be no box loss.
+ empty_cls_loss = sum(empty_gt_losses['loss_cls'])
+ empty_box_loss = sum(empty_gt_losses['loss_bbox'])
+ assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'
+ assert empty_box_loss.item() == 0, (
+ 'there should be no box loss when there are no true boxes')
+
+ # When truth is non-empty then both cls and box loss should be nonzero for
+ # random inputs
+ gt_bboxes = [
+ torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),
+ ]
+ gt_labels = [torch.LongTensor([2])]
+ one_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,
+ img_metas, cfg, gt_bboxes_ignore)
+ onegt_cls_loss = sum(one_gt_losses['loss_cls'])
+ onegt_box_loss = sum(one_gt_losses['loss_bbox'])
+ assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'
+ assert onegt_box_loss.item() > 0, 'box loss should be non-zero'
+
+
+def test_bbox_head_loss():
+ """
+ Tests bbox head loss when truth is empty and non-empty
+ """
+ self = BBoxHead(in_channels=8, roi_feat_size=3)
+
+ num_imgs = 1
+ feat = torch.rand(1, 1, 3, 3)
+
+ # Dummy proposals
+ proposal_list = [
+ torch.Tensor([[23.6667, 23.8757, 228.6326, 153.8874]]),
+ ]
+
+ target_cfg = mmcv.Config({'pos_weight': 1})
+
+ def _dummy_bbox_sampling(proposal_list, gt_bboxes, gt_labels):
+ """
+ Create sample results that can be passed to BBoxHead.get_target
+ """
+ assign_config = {
+ 'type': 'MaxIoUAssigner',
+ 'pos_iou_thr': 0.5,
+ 'neg_iou_thr': 0.5,
+ 'min_pos_iou': 0.5,
+ 'ignore_iof_thr': -1
+ }
+ sampler_config = {
+ 'type': 'RandomSampler',
+ 'num': 512,
+ 'pos_fraction': 0.25,
+ 'neg_pos_ub': -1,
+ 'add_gt_as_proposals': True
+ }
+ bbox_assigner = build_assigner(assign_config)
+ bbox_sampler = build_sampler(sampler_config)
+ gt_bboxes_ignore = [None for _ in range(num_imgs)]
+ sampling_results = []
+ for i in range(num_imgs):
+ assign_result = bbox_assigner.assign(proposal_list[i],
+ gt_bboxes[i],
+ gt_bboxes_ignore[i],
+ gt_labels[i])
+ sampling_result = bbox_sampler.sample(
+ assign_result,
+ proposal_list[i],
+ gt_bboxes[i],
+ gt_labels[i],
+ feats=feat)
+ sampling_results.append(sampling_result)
+ return sampling_results
+
+ # Test bbox loss when truth is empty
+ gt_bboxes = [torch.empty((0, 4))]
+ gt_labels = [torch.LongTensor([])]
+
+ sampling_results = _dummy_bbox_sampling(proposal_list, gt_bboxes,
+ gt_labels)
+
+ bbox_targets = self.get_target(sampling_results, gt_bboxes, gt_labels,
+ target_cfg)
+ labels, label_weights, bbox_targets, bbox_weights = bbox_targets
+
+ # Create dummy features "extracted" for each sampled bbox
+ num_sampled = sum(len(res.bboxes) for res in sampling_results)
+ dummy_feats = torch.rand(num_sampled, 8 * 3 * 3)
+ cls_scores, bbox_preds = self.forward(dummy_feats)
+
+ losses = self.loss(cls_scores, bbox_preds, labels, label_weights,
+ bbox_targets, bbox_weights)
+ assert losses.get('loss_cls', 0) > 0, 'cls-loss should be non-zero'
+ assert losses.get('loss_bbox', 0) == 0, 'empty gt loss should be zero'
+
+ # Test bbox loss when truth is non-empty
+ gt_bboxes = [
+ torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),
+ ]
+ gt_labels = [torch.LongTensor([2])]
+
+ sampling_results = _dummy_bbox_sampling(proposal_list, gt_bboxes,
+ gt_labels)
+
+ bbox_targets = self.get_target(sampling_results, gt_bboxes, gt_labels,
+ target_cfg)
+ labels, label_weights, bbox_targets, bbox_weights = bbox_targets
+
+ # Create dummy features "extracted" for each sampled bbox
+ num_sampled = sum(len(res.bboxes) for res in sampling_results)
+ dummy_feats = torch.rand(num_sampled, 8 * 3 * 3)
+ cls_scores, bbox_preds = self.forward(dummy_feats)
+
+ losses = self.loss(cls_scores, bbox_preds, labels, label_weights,
+ bbox_targets, bbox_weights)
+ assert losses.get('loss_cls', 0) > 0, 'cls-loss should be non-zero'
+ assert losses.get('loss_bbox', 0) > 0, 'box-loss should be non-zero'
+
+
+def test_refine_boxes():
+ """
+ Mirrors the doctest in
+ ``mmdet.models.bbox_heads.bbox_head.BBoxHead.refine_boxes`` but checks for
+ multiple values of n_roi / n_img.
+ """
+ self = BBoxHead(reg_class_agnostic=True)
+
+ test_settings = [
+
+ # Corner case: less rois than images
+ {
+ 'n_roi': 2,
+ 'n_img': 4,
+ 'rng': 34285940
+ },
+
+ # Corner case: no images
+ {
+ 'n_roi': 0,
+ 'n_img': 0,
+ 'rng': 52925222
+ },
+
+ # Corner cases: few images / rois
+ {
+ 'n_roi': 1,
+ 'n_img': 1,
+ 'rng': 1200281
+ },
+ {
+ 'n_roi': 2,
+ 'n_img': 1,
+ 'rng': 1200282
+ },
+ {
+ 'n_roi': 2,
+ 'n_img': 2,
+ 'rng': 1200283
+ },
+ {
+ 'n_roi': 1,
+ 'n_img': 2,
+ 'rng': 1200284
+ },
+
+ # Corner case: no rois few images
+ {
+ 'n_roi': 0,
+ 'n_img': 1,
+ 'rng': 23955860
+ },
+ {
+ 'n_roi': 0,
+ 'n_img': 2,
+ 'rng': 25830516
+ },
+
+ # Corner case: no rois many images
+ {
+ 'n_roi': 0,
+ 'n_img': 10,
+ 'rng': 671346
+ },
+ {
+ 'n_roi': 0,
+ 'n_img': 20,
+ 'rng': 699807
+ },
+
+ # Corner case: similar num rois and images
+ {
+ 'n_roi': 20,
+ 'n_img': 20,
+ 'rng': 1200238
+ },
+ {
+ 'n_roi': 10,
+ 'n_img': 20,
+ 'rng': 1200238
+ },
+ {
+ 'n_roi': 5,
+ 'n_img': 5,
+ 'rng': 1200238
+ },
+
+ # ----------------------------------
+ # Common case: more rois than images
+ {
+ 'n_roi': 100,
+ 'n_img': 1,
+ 'rng': 337156
+ },
+ {
+ 'n_roi': 150,
+ 'n_img': 2,
+ 'rng': 275898
+ },
+ {
+ 'n_roi': 500,
+ 'n_img': 5,
+ 'rng': 4903221
+ },
+ ]
+
+ for demokw in test_settings:
+ try:
+ n_roi = demokw['n_roi']
+ n_img = demokw['n_img']
+ rng = demokw['rng']
+
+ print('Test refine_boxes case: {!r}'.format(demokw))
+ tup = _demodata_refine_boxes(n_roi, n_img, rng=rng)
+ rois, labels, bbox_preds, pos_is_gts, img_metas = tup
+ bboxes_list = self.refine_bboxes(rois, labels, bbox_preds,
+ pos_is_gts, img_metas)
+ assert len(bboxes_list) == n_img
+ assert sum(map(len, bboxes_list)) <= n_roi
+ assert all(b.shape[1] == 4 for b in bboxes_list)
+ except Exception:
+ print('Test failed with demokw={!r}'.format(demokw))
+ raise
+
+
+def _demodata_refine_boxes(n_roi, n_img, rng=0):
+ """
+ Create random test data for the
+ ``mmdet.models.bbox_heads.bbox_head.BBoxHead.refine_boxes`` method
+ """
+ import numpy as np
+ from mmdet.core.bbox.demodata import random_boxes
+ from mmdet.core.bbox.demodata import ensure_rng
+ try:
+ import kwarray
+ except ImportError:
+ import pytest
+ pytest.skip('kwarray is required for this test')
+ scale = 512
+ rng = ensure_rng(rng)
+ img_metas = [{'img_shape': (scale, scale)} for _ in range(n_img)]
+ # Create rois in the expected format
+ roi_boxes = random_boxes(n_roi, scale=scale, rng=rng)
+ if n_img == 0:
+ assert n_roi == 0, 'cannot have any rois if there are no images'
+ img_ids = torch.empty((0, ), dtype=torch.long)
+ roi_boxes = torch.empty((0, 4), dtype=torch.float32)
+ else:
+ img_ids = rng.randint(0, n_img, (n_roi, ))
+ img_ids = torch.from_numpy(img_ids)
+ rois = torch.cat([img_ids[:, None].float(), roi_boxes], dim=1)
+ # Create other args
+ labels = rng.randint(0, 2, (n_roi, ))
+ labels = torch.from_numpy(labels).long()
+ bbox_preds = random_boxes(n_roi, scale=scale, rng=rng)
+ # For each image, pretend random positive boxes are gts
+ is_label_pos = (labels.numpy() > 0).astype(np.int)
+ lbl_per_img = kwarray.group_items(is_label_pos, img_ids.numpy())
+ pos_per_img = [sum(lbl_per_img.get(gid, [])) for gid in range(n_img)]
+ # randomly generate with numpy then sort with torch
+ _pos_is_gts = [
+ rng.randint(0, 2, (npos, )).astype(np.uint8) for npos in pos_per_img
+ ]
+ pos_is_gts = [
+ torch.from_numpy(p).sort(descending=True)[0] for p in _pos_is_gts
+ ]
+ return rois, labels, bbox_preds, pos_is_gts, img_metas
diff --git a/cv/instance_segmentation/SOLO/pytorch/tests/test_nms.py b/cv/instance_segmentation/SOLO/pytorch/tests/test_nms.py
new file mode 100644
index 000000000..6861f1e59
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tests/test_nms.py
@@ -0,0 +1,70 @@
+"""
+CommandLine:
+ pytest tests/test_nms.py
+"""
+import numpy as np
+import torch
+
+from mmdet.ops.nms.nms_wrapper import nms
+
+
+def test_nms_device_and_dtypes_cpu():
+ """
+ CommandLine:
+ xdoctest -m tests/test_nms.py test_nms_device_and_dtypes_cpu
+ """
+ iou_thr = 0.7
+ base_dets = np.array([[49.1, 32.4, 51.0, 35.9, 0.9],
+ [49.3, 32.9, 51.0, 35.3, 0.9],
+ [35.3, 11.5, 39.9, 14.5, 0.4],
+ [35.2, 11.7, 39.7, 15.7, 0.3]])
+
+ # CPU can handle float32 and float64
+ dets = base_dets.astype(np.float32)
+ supressed, inds = nms(dets, iou_thr)
+ assert dets.dtype == supressed.dtype
+ assert len(inds) == len(supressed) == 3
+
+ dets = torch.FloatTensor(base_dets)
+ surpressed, inds = nms(dets, iou_thr)
+ assert dets.dtype == surpressed.dtype
+ assert len(inds) == len(surpressed) == 3
+
+ dets = base_dets.astype(np.float64)
+ supressed, inds = nms(dets, iou_thr)
+ assert dets.dtype == supressed.dtype
+ assert len(inds) == len(supressed) == 3
+
+ dets = torch.DoubleTensor(base_dets)
+ surpressed, inds = nms(dets, iou_thr)
+ assert dets.dtype == surpressed.dtype
+ assert len(inds) == len(surpressed) == 3
+
+
+def test_nms_device_and_dtypes_gpu():
+ """
+ CommandLine:
+ xdoctest -m tests/test_nms.py test_nms_device_and_dtypes_gpu
+ """
+ if not torch.cuda.is_available():
+ import pytest
+ pytest.skip('test requires GPU and torch+cuda')
+
+ iou_thr = 0.7
+ base_dets = np.array([[49.1, 32.4, 51.0, 35.9, 0.9],
+ [49.3, 32.9, 51.0, 35.3, 0.9],
+ [35.3, 11.5, 39.9, 14.5, 0.4],
+ [35.2, 11.7, 39.7, 15.7, 0.3]])
+
+ for device_id in range(torch.cuda.device_count()):
+ print('Run NMS on device_id = {!r}'.format(device_id))
+ # GPU can handle float32 but not float64
+ dets = base_dets.astype(np.float32)
+ supressed, inds = nms(dets, iou_thr, device_id)
+ assert dets.dtype == supressed.dtype
+ assert len(inds) == len(supressed) == 3
+
+ dets = torch.FloatTensor(base_dets).to(device_id)
+ surpressed, inds = nms(dets, iou_thr)
+ assert dets.dtype == surpressed.dtype
+ assert len(inds) == len(surpressed) == 3
diff --git a/cv/instance_segmentation/SOLO/pytorch/tests/test_sampler.py b/cv/instance_segmentation/SOLO/pytorch/tests/test_sampler.py
new file mode 100644
index 000000000..c75360268
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tests/test_sampler.py
@@ -0,0 +1,249 @@
+import torch
+
+from mmdet.core import MaxIoUAssigner
+from mmdet.core.bbox.samplers import OHEMSampler, RandomSampler
+
+
+def test_random_sampler():
+ assigner = MaxIoUAssigner(
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ ignore_iof_thr=0.5,
+ ignore_wrt_candidates=False,
+ )
+ bboxes = torch.FloatTensor([
+ [0, 0, 10, 10],
+ [10, 10, 20, 20],
+ [5, 5, 15, 15],
+ [32, 32, 38, 42],
+ ])
+ gt_bboxes = torch.FloatTensor([
+ [0, 0, 10, 9],
+ [0, 10, 10, 19],
+ ])
+ gt_labels = torch.LongTensor([1, 2])
+ gt_bboxes_ignore = torch.Tensor([
+ [30, 30, 40, 40],
+ ])
+ assign_result = assigner.assign(
+ bboxes,
+ gt_bboxes,
+ gt_bboxes_ignore=gt_bboxes_ignore,
+ gt_labels=gt_labels)
+
+ sampler = RandomSampler(
+ num=10, pos_fraction=0.5, neg_pos_ub=-1, add_gt_as_proposals=True)
+
+ sample_result = sampler.sample(assign_result, bboxes, gt_bboxes, gt_labels)
+
+ assert len(sample_result.pos_bboxes) == len(sample_result.pos_inds)
+ assert len(sample_result.neg_bboxes) == len(sample_result.neg_inds)
+
+
+def test_random_sampler_empty_gt():
+ assigner = MaxIoUAssigner(
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ ignore_iof_thr=0.5,
+ ignore_wrt_candidates=False,
+ )
+ bboxes = torch.FloatTensor([
+ [0, 0, 10, 10],
+ [10, 10, 20, 20],
+ [5, 5, 15, 15],
+ [32, 32, 38, 42],
+ ])
+ gt_bboxes = torch.empty(0, 4)
+ gt_labels = torch.empty(0, ).long()
+ assign_result = assigner.assign(bboxes, gt_bboxes, gt_labels=gt_labels)
+
+ sampler = RandomSampler(
+ num=10, pos_fraction=0.5, neg_pos_ub=-1, add_gt_as_proposals=True)
+
+ sample_result = sampler.sample(assign_result, bboxes, gt_bboxes, gt_labels)
+
+ assert len(sample_result.pos_bboxes) == len(sample_result.pos_inds)
+ assert len(sample_result.neg_bboxes) == len(sample_result.neg_inds)
+
+
+def test_random_sampler_empty_pred():
+ assigner = MaxIoUAssigner(
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ ignore_iof_thr=0.5,
+ ignore_wrt_candidates=False,
+ )
+ bboxes = torch.empty(0, 4)
+ gt_bboxes = torch.FloatTensor([
+ [0, 0, 10, 9],
+ [0, 10, 10, 19],
+ ])
+ gt_labels = torch.LongTensor([1, 2])
+ assign_result = assigner.assign(bboxes, gt_bboxes, gt_labels=gt_labels)
+
+ sampler = RandomSampler(
+ num=10, pos_fraction=0.5, neg_pos_ub=-1, add_gt_as_proposals=True)
+
+ sample_result = sampler.sample(assign_result, bboxes, gt_bboxes, gt_labels)
+
+ assert len(sample_result.pos_bboxes) == len(sample_result.pos_inds)
+ assert len(sample_result.neg_bboxes) == len(sample_result.neg_inds)
+
+
+def _context_for_ohem():
+ try:
+ from test_forward import _get_detector_cfg
+ except ImportError:
+ # Hack: grab testing utils from test_forward to make a context for ohem
+ import sys
+ from os.path import dirname
+ sys.path.insert(0, dirname(__file__))
+ from test_forward import _get_detector_cfg
+ model, train_cfg, test_cfg = _get_detector_cfg(
+ 'faster_rcnn_ohem_r50_fpn_1x.py')
+ model['pretrained'] = None
+ # torchvision roi align supports CPU
+ model['bbox_roi_extractor']['roi_layer']['use_torchvision'] = True
+ from mmdet.models import build_detector
+ context = build_detector(model, train_cfg=train_cfg, test_cfg=test_cfg)
+ return context
+
+
+def test_ohem_sampler():
+
+ assigner = MaxIoUAssigner(
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ ignore_iof_thr=0.5,
+ ignore_wrt_candidates=False,
+ )
+ bboxes = torch.FloatTensor([
+ [0, 0, 10, 10],
+ [10, 10, 20, 20],
+ [5, 5, 15, 15],
+ [32, 32, 38, 42],
+ ])
+ gt_bboxes = torch.FloatTensor([
+ [0, 0, 10, 9],
+ [0, 10, 10, 19],
+ ])
+ gt_labels = torch.LongTensor([1, 2])
+ gt_bboxes_ignore = torch.Tensor([
+ [30, 30, 40, 40],
+ ])
+ assign_result = assigner.assign(
+ bboxes,
+ gt_bboxes,
+ gt_bboxes_ignore=gt_bboxes_ignore,
+ gt_labels=gt_labels)
+
+ context = _context_for_ohem()
+
+ sampler = OHEMSampler(
+ num=10,
+ pos_fraction=0.5,
+ context=context,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=True)
+
+ feats = [torch.rand(1, 256, int(2**i), int(2**i)) for i in [6, 5, 4, 3, 2]]
+ sample_result = sampler.sample(
+ assign_result, bboxes, gt_bboxes, gt_labels, feats=feats)
+
+ assert len(sample_result.pos_bboxes) == len(sample_result.pos_inds)
+ assert len(sample_result.neg_bboxes) == len(sample_result.neg_inds)
+
+
+def test_ohem_sampler_empty_gt():
+
+ assigner = MaxIoUAssigner(
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ ignore_iof_thr=0.5,
+ ignore_wrt_candidates=False,
+ )
+ bboxes = torch.FloatTensor([
+ [0, 0, 10, 10],
+ [10, 10, 20, 20],
+ [5, 5, 15, 15],
+ [32, 32, 38, 42],
+ ])
+ gt_bboxes = torch.empty(0, 4)
+ gt_labels = torch.LongTensor([])
+ gt_bboxes_ignore = torch.Tensor([])
+ assign_result = assigner.assign(
+ bboxes,
+ gt_bboxes,
+ gt_bboxes_ignore=gt_bboxes_ignore,
+ gt_labels=gt_labels)
+
+ context = _context_for_ohem()
+
+ sampler = OHEMSampler(
+ num=10,
+ pos_fraction=0.5,
+ context=context,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=True)
+
+ feats = [torch.rand(1, 256, int(2**i), int(2**i)) for i in [6, 5, 4, 3, 2]]
+
+ sample_result = sampler.sample(
+ assign_result, bboxes, gt_bboxes, gt_labels, feats=feats)
+
+ assert len(sample_result.pos_bboxes) == len(sample_result.pos_inds)
+ assert len(sample_result.neg_bboxes) == len(sample_result.neg_inds)
+
+
+def test_ohem_sampler_empty_pred():
+ assigner = MaxIoUAssigner(
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ ignore_iof_thr=0.5,
+ ignore_wrt_candidates=False,
+ )
+ bboxes = torch.empty(0, 4)
+ gt_bboxes = torch.FloatTensor([
+ [0, 0, 10, 10],
+ [10, 10, 20, 20],
+ [5, 5, 15, 15],
+ [32, 32, 38, 42],
+ ])
+ gt_labels = torch.LongTensor([1, 2, 2, 3])
+ gt_bboxes_ignore = torch.Tensor([])
+ assign_result = assigner.assign(
+ bboxes,
+ gt_bboxes,
+ gt_bboxes_ignore=gt_bboxes_ignore,
+ gt_labels=gt_labels)
+
+ context = _context_for_ohem()
+
+ sampler = OHEMSampler(
+ num=10,
+ pos_fraction=0.5,
+ context=context,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=True)
+
+ feats = [torch.rand(1, 256, int(2**i), int(2**i)) for i in [6, 5, 4, 3, 2]]
+
+ sample_result = sampler.sample(
+ assign_result, bboxes, gt_bboxes, gt_labels, feats=feats)
+
+ assert len(sample_result.pos_bboxes) == len(sample_result.pos_inds)
+ assert len(sample_result.neg_bboxes) == len(sample_result.neg_inds)
+
+
+def test_random_sample_result():
+ from mmdet.core.bbox.samplers.sampling_result import SamplingResult
+ SamplingResult.random(num_gts=0, num_preds=0)
+ SamplingResult.random(num_gts=0, num_preds=3)
+ SamplingResult.random(num_gts=3, num_preds=3)
+ SamplingResult.random(num_gts=0, num_preds=3)
+ SamplingResult.random(num_gts=7, num_preds=7)
+ SamplingResult.random(num_gts=7, num_preds=64)
+ SamplingResult.random(num_gts=24, num_preds=3)
+
+ for i in range(3):
+ SamplingResult.random(rng=i)
diff --git a/cv/instance_segmentation/SOLO/pytorch/tests/test_utils.py b/cv/instance_segmentation/SOLO/pytorch/tests/test_utils.py
new file mode 100644
index 000000000..cdefd2df2
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tests/test_utils.py
@@ -0,0 +1,9 @@
+import numpy.testing as npt
+
+from mmdet.utils.flops_counter import params_to_string
+
+
+def test_params_to_string():
+ npt.assert_equal(params_to_string(1e9), '1000.0 M')
+ npt.assert_equal(params_to_string(2e5), '200.0 k')
+ npt.assert_equal(params_to_string(3e-9), '3e-09')
diff --git a/cv/instance_segmentation/SOLO/pytorch/tools/analyze_logs.py b/cv/instance_segmentation/SOLO/pytorch/tools/analyze_logs.py
new file mode 100644
index 000000000..2810c98f1
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tools/analyze_logs.py
@@ -0,0 +1,178 @@
+import argparse
+import json
+from collections import defaultdict
+
+import matplotlib.pyplot as plt
+import numpy as np
+import seaborn as sns
+
+
+def cal_train_time(log_dicts, args):
+ for i, log_dict in enumerate(log_dicts):
+ print('{}Analyze train time of {}{}'.format('-' * 5, args.json_logs[i],
+ '-' * 5))
+ all_times = []
+ for epoch in log_dict.keys():
+ if args.include_outliers:
+ all_times.append(log_dict[epoch]['time'])
+ else:
+ all_times.append(log_dict[epoch]['time'][1:])
+ all_times = np.array(all_times)
+ epoch_ave_time = all_times.mean(-1)
+ slowest_epoch = epoch_ave_time.argmax()
+ fastest_epoch = epoch_ave_time.argmin()
+ std_over_epoch = epoch_ave_time.std()
+ print('slowest epoch {}, average time is {:.4f}'.format(
+ slowest_epoch + 1, epoch_ave_time[slowest_epoch]))
+ print('fastest epoch {}, average time is {:.4f}'.format(
+ fastest_epoch + 1, epoch_ave_time[fastest_epoch]))
+ print('time std over epochs is {:.4f}'.format(std_over_epoch))
+ print('average iter time: {:.4f} s/iter'.format(np.mean(all_times)))
+ print()
+
+
+def plot_curve(log_dicts, args):
+ if args.backend is not None:
+ plt.switch_backend(args.backend)
+ sns.set_style(args.style)
+ # if legend is None, use {filename}_{key} as legend
+ legend = args.legend
+ if legend is None:
+ legend = []
+ for json_log in args.json_logs:
+ for metric in args.keys:
+ legend.append('{}_{}'.format(json_log, metric))
+ assert len(legend) == (len(args.json_logs) * len(args.keys))
+ metrics = args.keys
+
+ num_metrics = len(metrics)
+ for i, log_dict in enumerate(log_dicts):
+ epochs = list(log_dict.keys())
+ for j, metric in enumerate(metrics):
+ print('plot curve of {}, metric is {}'.format(
+ args.json_logs[i], metric))
+ if metric not in log_dict[epochs[0]]:
+ raise KeyError('{} does not contain metric {}'.format(
+ args.json_logs[i], metric))
+
+ if 'mAP' in metric:
+ xs = np.arange(1, max(epochs) + 1)
+ ys = []
+ for epoch in epochs:
+ ys += log_dict[epoch][metric]
+ ax = plt.gca()
+ ax.set_xticks(xs)
+ plt.xlabel('epoch')
+ plt.plot(xs, ys, label=legend[i * num_metrics + j], marker='o')
+ else:
+ xs = []
+ ys = []
+ num_iters_per_epoch = log_dict[epochs[0]]['iter'][-1]
+ for epoch in epochs:
+ iters = log_dict[epoch]['iter']
+ if log_dict[epoch]['mode'][-1] == 'val':
+ iters = iters[:-1]
+ xs.append(
+ np.array(iters) + (epoch - 1) * num_iters_per_epoch)
+ ys.append(np.array(log_dict[epoch][metric][:len(iters)]))
+ xs = np.concatenate(xs)
+ ys = np.concatenate(ys)
+ plt.xlabel('iter')
+ plt.plot(
+ xs, ys, label=legend[i * num_metrics + j], linewidth=0.5)
+ plt.legend()
+ if args.title is not None:
+ plt.title(args.title)
+ if args.out is None:
+ plt.show()
+ else:
+ print('save curve to: {}'.format(args.out))
+ plt.savefig(args.out)
+ plt.cla()
+
+
+def add_plot_parser(subparsers):
+ parser_plt = subparsers.add_parser(
+ 'plot_curve', help='parser for plotting curves')
+ parser_plt.add_argument(
+ 'json_logs',
+ type=str,
+ nargs='+',
+ help='path of train log in json format')
+ parser_plt.add_argument(
+ '--keys',
+ type=str,
+ nargs='+',
+ default=['bbox_mAP'],
+ help='the metric that you want to plot')
+ parser_plt.add_argument('--title', type=str, help='title of figure')
+ parser_plt.add_argument(
+ '--legend',
+ type=str,
+ nargs='+',
+ default=None,
+ help='legend of each plot')
+ parser_plt.add_argument(
+ '--backend', type=str, default=None, help='backend of plt')
+ parser_plt.add_argument(
+ '--style', type=str, default='dark', help='style of plt')
+ parser_plt.add_argument('--out', type=str, default=None)
+
+
+def add_time_parser(subparsers):
+ parser_time = subparsers.add_parser(
+ 'cal_train_time',
+ help='parser for computing the average time per training iteration')
+ parser_time.add_argument(
+ 'json_logs',
+ type=str,
+ nargs='+',
+ help='path of train log in json format')
+ parser_time.add_argument(
+ '--include-outliers',
+ action='store_true',
+ help='include the first value of every epoch when computing '
+ 'the average time')
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='Analyze Json Log')
+ # currently only support plot curve and calculate average train time
+ subparsers = parser.add_subparsers(dest='task', help='task parser')
+ add_plot_parser(subparsers)
+ add_time_parser(subparsers)
+ args = parser.parse_args()
+ return args
+
+
+def load_json_logs(json_logs):
+ # load and convert json_logs to log_dict, key is epoch, value is a sub dict
+ # keys of sub dict is different metrics, e.g. memory, bbox_mAP
+ # value of sub dict is a list of corresponding values of all iterations
+ log_dicts = [dict() for _ in json_logs]
+ for json_log, log_dict in zip(json_logs, log_dicts):
+ with open(json_log, 'r') as log_file:
+ for l in log_file:
+ log = json.loads(l.strip())
+ epoch = log.pop('epoch')
+ if epoch not in log_dict:
+ log_dict[epoch] = defaultdict(list)
+ for k, v in log.items():
+ log_dict[epoch][k].append(v)
+ return log_dicts
+
+
+def main():
+ args = parse_args()
+
+ json_logs = args.json_logs
+ for json_log in json_logs:
+ assert json_log.endswith('.json')
+
+ log_dicts = load_json_logs(json_logs)
+
+ eval(args.task)(log_dicts, args)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/cv/instance_segmentation/SOLO/pytorch/tools/coco_error_analysis.py b/cv/instance_segmentation/SOLO/pytorch/tools/coco_error_analysis.py
new file mode 100644
index 000000000..6aeadadb9
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tools/coco_error_analysis.py
@@ -0,0 +1,174 @@
+import copy
+import os
+from argparse import ArgumentParser
+from multiprocessing import Pool
+
+import matplotlib.pyplot as plt
+import numpy as np
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+
+
+def makeplot(rs, ps, outDir, class_name, iou_type):
+ cs = np.vstack([
+ np.ones((2, 3)),
+ np.array([.31, .51, .74]),
+ np.array([.75, .31, .30]),
+ np.array([.36, .90, .38]),
+ np.array([.50, .39, .64]),
+ np.array([1, .6, 0])
+ ])
+ areaNames = ['allarea', 'small', 'medium', 'large']
+ types = ['C75', 'C50', 'Loc', 'Sim', 'Oth', 'BG', 'FN']
+ for i in range(len(areaNames)):
+ area_ps = ps[..., i, 0]
+ figure_tile = iou_type + '-' + class_name + '-' + areaNames[i]
+ aps = [ps_.mean() for ps_ in area_ps]
+ ps_curve = [
+ ps_.mean(axis=1) if ps_.ndim > 1 else ps_ for ps_ in area_ps
+ ]
+ ps_curve.insert(0, np.zeros(ps_curve[0].shape))
+ fig = plt.figure()
+ ax = plt.subplot(111)
+ for k in range(len(types)):
+ ax.plot(rs, ps_curve[k + 1], color=[0, 0, 0], linewidth=0.5)
+ ax.fill_between(
+ rs,
+ ps_curve[k],
+ ps_curve[k + 1],
+ color=cs[k],
+ label=str('[{:.3f}'.format(aps[k]) + ']' + types[k]))
+ plt.xlabel('recall')
+ plt.ylabel('precision')
+ plt.xlim(0, 1.)
+ plt.ylim(0, 1.)
+ plt.title(figure_tile)
+ plt.legend()
+ # plt.show()
+ fig.savefig(outDir + '/{}.png'.format(figure_tile))
+ plt.close(fig)
+
+
+def analyze_individual_category(k, cocoDt, cocoGt, catId, iou_type):
+ nm = cocoGt.loadCats(catId)[0]
+ print('--------------analyzing {}-{}---------------'.format(
+ k + 1, nm['name']))
+ ps_ = {}
+ dt = copy.deepcopy(cocoDt)
+ nm = cocoGt.loadCats(catId)[0]
+ imgIds = cocoGt.getImgIds()
+ dt_anns = dt.dataset['annotations']
+ select_dt_anns = []
+ for ann in dt_anns:
+ if ann['category_id'] == catId:
+ select_dt_anns.append(ann)
+ dt.dataset['annotations'] = select_dt_anns
+ dt.createIndex()
+ # compute precision but ignore superclass confusion
+ gt = copy.deepcopy(cocoGt)
+ child_catIds = gt.getCatIds(supNms=[nm['supercategory']])
+ for idx, ann in enumerate(gt.dataset['annotations']):
+ if (ann['category_id'] in child_catIds
+ and ann['category_id'] != catId):
+ gt.dataset['annotations'][idx]['ignore'] = 1
+ gt.dataset['annotations'][idx]['iscrowd'] = 1
+ gt.dataset['annotations'][idx]['category_id'] = catId
+ cocoEval = COCOeval(gt, copy.deepcopy(dt), iou_type)
+ cocoEval.params.imgIds = imgIds
+ cocoEval.params.maxDets = [100]
+ cocoEval.params.iouThrs = [.1]
+ cocoEval.params.useCats = 1
+ cocoEval.evaluate()
+ cocoEval.accumulate()
+ ps_supercategory = cocoEval.eval['precision'][0, :, k, :, :]
+ ps_['ps_supercategory'] = ps_supercategory
+ # compute precision but ignore any class confusion
+ gt = copy.deepcopy(cocoGt)
+ for idx, ann in enumerate(gt.dataset['annotations']):
+ if ann['category_id'] != catId:
+ gt.dataset['annotations'][idx]['ignore'] = 1
+ gt.dataset['annotations'][idx]['iscrowd'] = 1
+ gt.dataset['annotations'][idx]['category_id'] = catId
+ cocoEval = COCOeval(gt, copy.deepcopy(dt), iou_type)
+ cocoEval.params.imgIds = imgIds
+ cocoEval.params.maxDets = [100]
+ cocoEval.params.iouThrs = [.1]
+ cocoEval.params.useCats = 1
+ cocoEval.evaluate()
+ cocoEval.accumulate()
+ ps_allcategory = cocoEval.eval['precision'][0, :, k, :, :]
+ ps_['ps_allcategory'] = ps_allcategory
+ return k, ps_
+
+
+def analyze_results(res_file, ann_file, res_types, out_dir):
+ for res_type in res_types:
+ assert res_type in ['bbox', 'segm']
+
+ directory = os.path.dirname(out_dir + '/')
+ if not os.path.exists(directory):
+ print('-------------create {}-----------------'.format(out_dir))
+ os.makedirs(directory)
+
+ cocoGt = COCO(ann_file)
+ cocoDt = cocoGt.loadRes(res_file)
+ imgIds = cocoGt.getImgIds()
+ for res_type in res_types:
+ res_out_dir = out_dir + '/' + res_type + '/'
+ res_directory = os.path.dirname(res_out_dir)
+ if not os.path.exists(res_directory):
+ print(
+ '-------------create {}-----------------'.format(res_out_dir))
+ os.makedirs(res_directory)
+ iou_type = res_type
+ cocoEval = COCOeval(
+ copy.deepcopy(cocoGt), copy.deepcopy(cocoDt), iou_type)
+ cocoEval.params.imgIds = imgIds
+ cocoEval.params.iouThrs = [.75, .5, .1]
+ cocoEval.params.maxDets = [100]
+ cocoEval.evaluate()
+ cocoEval.accumulate()
+ ps = cocoEval.eval['precision']
+ ps = np.vstack([ps, np.zeros((4, *ps.shape[1:]))])
+ catIds = cocoGt.getCatIds()
+ recThrs = cocoEval.params.recThrs
+ with Pool(processes=48) as pool:
+ args = [(k, cocoDt, cocoGt, catId, iou_type)
+ for k, catId in enumerate(catIds)]
+ analyze_results = pool.starmap(analyze_individual_category, args)
+ for k, catId in enumerate(catIds):
+ nm = cocoGt.loadCats(catId)[0]
+ print('--------------saving {}-{}---------------'.format(
+ k + 1, nm['name']))
+ analyze_result = analyze_results[k]
+ assert k == analyze_result[0]
+ ps_supercategory = analyze_result[1]['ps_supercategory']
+ ps_allcategory = analyze_result[1]['ps_allcategory']
+ # compute precision but ignore superclass confusion
+ ps[3, :, k, :, :] = ps_supercategory
+ # compute precision but ignore any class confusion
+ ps[4, :, k, :, :] = ps_allcategory
+ # fill in background and false negative errors and plot
+ ps[ps == -1] = 0
+ ps[5, :, k, :, :] = (ps[4, :, k, :, :] > 0)
+ ps[6, :, k, :, :] = 1.0
+ makeplot(recThrs, ps[:, :, k], res_out_dir, nm['name'], iou_type)
+ makeplot(recThrs, ps, res_out_dir, 'allclass', iou_type)
+
+
+def main():
+ parser = ArgumentParser(description='COCO Error Analysis Tool')
+ parser.add_argument('result', help='result file (json format) path')
+ parser.add_argument('out_dir', help='dir to save analyze result images')
+ parser.add_argument(
+ '--ann',
+ default='data/coco/annotations/instances_val2017.json',
+ help='annotation file path')
+ parser.add_argument(
+ '--types', type=str, nargs='+', default=['bbox'], help='result types')
+ args = parser.parse_args()
+ analyze_results(args.result, args.ann, args.types, out_dir=args.out_dir)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/cv/instance_segmentation/SOLO/pytorch/tools/coco_eval.py b/cv/instance_segmentation/SOLO/pytorch/tools/coco_eval.py
new file mode 100644
index 000000000..bc3c96b3c
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tools/coco_eval.py
@@ -0,0 +1,30 @@
+from argparse import ArgumentParser
+
+from mmdet.core import coco_eval
+
+
+def main():
+ parser = ArgumentParser(description='COCO Evaluation')
+ parser.add_argument('result', help='result file path')
+ parser.add_argument('--ann', help='annotation file path')
+ parser.add_argument(
+ '--types',
+ type=str,
+ nargs='+',
+ choices=['proposal_fast', 'proposal', 'bbox', 'segm', 'keypoint'],
+ default=['bbox'],
+ help='result types')
+ parser.add_argument(
+ '--max-dets',
+ type=int,
+ nargs='+',
+ default=[100, 300, 1000],
+ help='proposal numbers, only used for recall evaluation')
+ parser.add_argument(
+ '--classwise', action='store_true', help='whether eval class wise ap')
+ args = parser.parse_args()
+ coco_eval(args.result, args.types, args.ann, args.max_dets, args.classwise)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/cv/instance_segmentation/SOLO/pytorch/tools/collect_env.py b/cv/instance_segmentation/SOLO/pytorch/tools/collect_env.py
new file mode 100644
index 000000000..81d6c7aaa
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tools/collect_env.py
@@ -0,0 +1,64 @@
+import os.path as osp
+import subprocess
+import sys
+from collections import defaultdict
+
+import cv2
+import mmcv
+import torch
+import torchvision
+
+import mmdet
+from mmdet.ops import get_compiler_version, get_compiling_cuda_version
+
+
+def collect_env():
+ env_info = {}
+ env_info['sys.platform'] = sys.platform
+ env_info['Python'] = sys.version.replace('\n', '')
+
+ cuda_available = torch.cuda.is_available()
+ env_info['CUDA available'] = cuda_available
+
+ if cuda_available:
+ from torch.utils.cpp_extension import CUDA_HOME
+ env_info['CUDA_HOME'] = CUDA_HOME
+
+ if CUDA_HOME is not None and osp.isdir(CUDA_HOME):
+ try:
+ nvcc = osp.join(CUDA_HOME, 'bin/nvcc')
+ nvcc = subprocess.check_output(
+ '"{}" -V | tail -n1'.format(nvcc), shell=True)
+ nvcc = nvcc.decode('utf-8').strip()
+ except subprocess.SubprocessError:
+ nvcc = 'Not Available'
+ env_info['NVCC'] = nvcc
+
+ devices = defaultdict(list)
+ for k in range(torch.cuda.device_count()):
+ devices[torch.cuda.get_device_name(k)].append(str(k))
+ for name, devids in devices.items():
+ env_info['GPU ' + ','.join(devids)] = name
+
+ gcc = subprocess.check_output('gcc --version | head -n1', shell=True)
+ gcc = gcc.decode('utf-8').strip()
+ env_info['GCC'] = gcc
+
+ env_info['PyTorch'] = torch.__version__
+ env_info['PyTorch compiling details'] = torch.__config__.show()
+
+ env_info['TorchVision'] = torchvision.__version__
+
+ env_info['OpenCV'] = cv2.__version__
+
+ env_info['MMCV'] = mmcv.__version__
+ env_info['MMDetection'] = mmdet.__version__
+ env_info['MMDetection Compiler'] = get_compiler_version()
+ env_info['MMDetection CUDA Compiler'] = get_compiling_cuda_version()
+
+ for name, val in env_info.items():
+ print('{}: {}'.format(name, val))
+
+
+if __name__ == "__main__":
+ collect_env()
diff --git a/cv/instance_segmentation/SOLO/pytorch/tools/convert_datasets/pascal_voc.py b/cv/instance_segmentation/SOLO/pytorch/tools/convert_datasets/pascal_voc.py
new file mode 100644
index 000000000..029eeb0a9
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tools/convert_datasets/pascal_voc.py
@@ -0,0 +1,141 @@
+import argparse
+import os.path as osp
+import xml.etree.ElementTree as ET
+
+import mmcv
+import numpy as np
+
+from mmdet.core import voc_classes
+
+label_ids = {name: i + 1 for i, name in enumerate(voc_classes())}
+
+
+def parse_xml(args):
+ xml_path, img_path = args
+ tree = ET.parse(xml_path)
+ root = tree.getroot()
+ size = root.find('size')
+ w = int(size.find('width').text)
+ h = int(size.find('height').text)
+ bboxes = []
+ labels = []
+ bboxes_ignore = []
+ labels_ignore = []
+ for obj in root.findall('object'):
+ name = obj.find('name').text
+ label = label_ids[name]
+ difficult = int(obj.find('difficult').text)
+ bnd_box = obj.find('bndbox')
+ bbox = [
+ int(bnd_box.find('xmin').text),
+ int(bnd_box.find('ymin').text),
+ int(bnd_box.find('xmax').text),
+ int(bnd_box.find('ymax').text)
+ ]
+ if difficult:
+ bboxes_ignore.append(bbox)
+ labels_ignore.append(label)
+ else:
+ bboxes.append(bbox)
+ labels.append(label)
+ if not bboxes:
+ bboxes = np.zeros((0, 4))
+ labels = np.zeros((0, ))
+ else:
+ bboxes = np.array(bboxes, ndmin=2) - 1
+ labels = np.array(labels)
+ if not bboxes_ignore:
+ bboxes_ignore = np.zeros((0, 4))
+ labels_ignore = np.zeros((0, ))
+ else:
+ bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1
+ labels_ignore = np.array(labels_ignore)
+ annotation = {
+ 'filename': img_path,
+ 'width': w,
+ 'height': h,
+ 'ann': {
+ 'bboxes': bboxes.astype(np.float32),
+ 'labels': labels.astype(np.int64),
+ 'bboxes_ignore': bboxes_ignore.astype(np.float32),
+ 'labels_ignore': labels_ignore.astype(np.int64)
+ }
+ }
+ return annotation
+
+
+def cvt_annotations(devkit_path, years, split, out_file):
+ if not isinstance(years, list):
+ years = [years]
+ annotations = []
+ for year in years:
+ filelist = osp.join(devkit_path,
+ 'VOC{}/ImageSets/Main/{}.txt'.format(year, split))
+ if not osp.isfile(filelist):
+ print('filelist does not exist: {}, skip voc{} {}'.format(
+ filelist, year, split))
+ return
+ img_names = mmcv.list_from_file(filelist)
+ xml_paths = [
+ osp.join(devkit_path,
+ 'VOC{}/Annotations/{}.xml'.format(year, img_name))
+ for img_name in img_names
+ ]
+ img_paths = [
+ 'VOC{}/JPEGImages/{}.jpg'.format(year, img_name)
+ for img_name in img_names
+ ]
+ part_annotations = mmcv.track_progress(parse_xml,
+ list(zip(xml_paths, img_paths)))
+ annotations.extend(part_annotations)
+ mmcv.dump(annotations, out_file)
+ return annotations
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(
+ description='Convert PASCAL VOC annotations to mmdetection format')
+ parser.add_argument('devkit_path', help='pascal voc devkit path')
+ parser.add_argument('-o', '--out-dir', help='output path')
+ args = parser.parse_args()
+ return args
+
+
+def main():
+ args = parse_args()
+ devkit_path = args.devkit_path
+ out_dir = args.out_dir if args.out_dir else devkit_path
+ mmcv.mkdir_or_exist(out_dir)
+
+ years = []
+ if osp.isdir(osp.join(devkit_path, 'VOC2007')):
+ years.append('2007')
+ if osp.isdir(osp.join(devkit_path, 'VOC2012')):
+ years.append('2012')
+ if '2007' in years and '2012' in years:
+ years.append(['2007', '2012'])
+ if not years:
+ raise IOError('The devkit path {} contains neither "VOC2007" nor '
+ '"VOC2012" subfolder'.format(devkit_path))
+ for year in years:
+ if year == '2007':
+ prefix = 'voc07'
+ elif year == '2012':
+ prefix = 'voc12'
+ elif year == ['2007', '2012']:
+ prefix = 'voc0712'
+ for split in ['train', 'val', 'trainval']:
+ dataset_name = prefix + '_' + split
+ print('processing {} ...'.format(dataset_name))
+ cvt_annotations(devkit_path, year, split,
+ osp.join(out_dir, dataset_name + '.pkl'))
+ if not isinstance(year, list):
+ dataset_name = prefix + '_test'
+ print('processing {} ...'.format(dataset_name))
+ cvt_annotations(devkit_path, year, 'test',
+ osp.join(out_dir, dataset_name + '.pkl'))
+ print('Done!')
+
+
+if __name__ == '__main__':
+ main()
diff --git a/cv/instance_segmentation/SOLO/pytorch/tools/detectron2pytorch.py b/cv/instance_segmentation/SOLO/pytorch/tools/detectron2pytorch.py
new file mode 100644
index 000000000..0a90ad172
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tools/detectron2pytorch.py
@@ -0,0 +1,88 @@
+import argparse
+from collections import OrderedDict
+
+import mmcv
+import torch
+
+arch_settings = {50: (3, 4, 6, 3), 101: (3, 4, 23, 3)}
+
+
+def convert_bn(blobs, state_dict, caffe_name, torch_name, converted_names):
+ # detectron replace bn with affine channel layer
+ state_dict[torch_name + '.bias'] = torch.from_numpy(blobs[caffe_name +
+ '_b'])
+ state_dict[torch_name + '.weight'] = torch.from_numpy(blobs[caffe_name +
+ '_s'])
+ bn_size = state_dict[torch_name + '.weight'].size()
+ state_dict[torch_name + '.running_mean'] = torch.zeros(bn_size)
+ state_dict[torch_name + '.running_var'] = torch.ones(bn_size)
+ converted_names.add(caffe_name + '_b')
+ converted_names.add(caffe_name + '_s')
+
+
+def convert_conv_fc(blobs, state_dict, caffe_name, torch_name,
+ converted_names):
+ state_dict[torch_name + '.weight'] = torch.from_numpy(blobs[caffe_name +
+ '_w'])
+ converted_names.add(caffe_name + '_w')
+ if caffe_name + '_b' in blobs:
+ state_dict[torch_name + '.bias'] = torch.from_numpy(blobs[caffe_name +
+ '_b'])
+ converted_names.add(caffe_name + '_b')
+
+
+def convert(src, dst, depth):
+ """Convert keys in detectron pretrained ResNet models to pytorch style."""
+ # load arch_settings
+ if depth not in arch_settings:
+ raise ValueError('Only support ResNet-50 and ResNet-101 currently')
+ block_nums = arch_settings[depth]
+ # load caffe model
+ caffe_model = mmcv.load(src, encoding='latin1')
+ blobs = caffe_model['blobs'] if 'blobs' in caffe_model else caffe_model
+ # convert to pytorch style
+ state_dict = OrderedDict()
+ converted_names = set()
+ convert_conv_fc(blobs, state_dict, 'conv1', 'conv1', converted_names)
+ convert_bn(blobs, state_dict, 'res_conv1_bn', 'bn1', converted_names)
+ for i in range(1, len(block_nums) + 1):
+ for j in range(block_nums[i - 1]):
+ if j == 0:
+ convert_conv_fc(blobs, state_dict,
+ 'res{}_{}_branch1'.format(i + 1, j),
+ 'layer{}.{}.downsample.0'.format(i, j),
+ converted_names)
+ convert_bn(blobs, state_dict,
+ 'res{}_{}_branch1_bn'.format(i + 1, j),
+ 'layer{}.{}.downsample.1'.format(i, j),
+ converted_names)
+ for k, letter in enumerate(['a', 'b', 'c']):
+ convert_conv_fc(blobs, state_dict,
+ 'res{}_{}_branch2{}'.format(i + 1, j, letter),
+ 'layer{}.{}.conv{}'.format(i, j, k + 1),
+ converted_names)
+ convert_bn(blobs, state_dict,
+ 'res{}_{}_branch2{}_bn'.format(i + 1, j, letter),
+ 'layer{}.{}.bn{}'.format(i, j,
+ k + 1), converted_names)
+ # check if all layers are converted
+ for key in blobs:
+ if key not in converted_names:
+ print('Not Convert: {}'.format(key))
+ # save checkpoint
+ checkpoint = dict()
+ checkpoint['state_dict'] = state_dict
+ torch.save(checkpoint, dst)
+
+
+def main():
+ parser = argparse.ArgumentParser(description='Convert model keys')
+ parser.add_argument('src', help='src detectron model path')
+ parser.add_argument('dst', help='save path')
+ parser.add_argument('depth', type=int, help='ResNet model depth')
+ args = parser.parse_args()
+ convert(args.src, args.dst, args.depth)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/cv/instance_segmentation/SOLO/pytorch/tools/dist_test.sh b/cv/instance_segmentation/SOLO/pytorch/tools/dist_test.sh
new file mode 100644
index 000000000..efab6ea27
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tools/dist_test.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+PYTHON=${PYTHON:-"python"}
+
+CONFIG=$1
+CHECKPOINT=$2
+GPUS=$3
+PORT=${PORT:-29500}
+
+$PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
+ $(dirname "$0")/test_ins.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
diff --git a/cv/instance_segmentation/SOLO/pytorch/tools/dist_train.sh b/cv/instance_segmentation/SOLO/pytorch/tools/dist_train.sh
new file mode 100644
index 000000000..0b8adf711
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tools/dist_train.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+CONFIG=$1
+GPUS=$2
+PORT=${PORT:-29500}
+
+python3 -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
+ $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
diff --git a/cv/instance_segmentation/SOLO/pytorch/tools/get_flops.py b/cv/instance_segmentation/SOLO/pytorch/tools/get_flops.py
new file mode 100644
index 000000000..6c9cb2340
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tools/get_flops.py
@@ -0,0 +1,55 @@
+import argparse
+
+from mmcv import Config
+
+from mmdet.models import build_detector
+from mmdet.utils import get_model_complexity_info
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='Train a detector')
+ parser.add_argument('config', help='train config file path')
+ parser.add_argument(
+ '--shape',
+ type=int,
+ nargs='+',
+ default=[1280, 800],
+ help='input image size')
+ args = parser.parse_args()
+ return args
+
+
+def main():
+
+ args = parse_args()
+
+ if len(args.shape) == 1:
+ input_shape = (3, args.shape[0], args.shape[0])
+ elif len(args.shape) == 2:
+ input_shape = (3, ) + tuple(args.shape)
+ else:
+ raise ValueError('invalid input shape')
+
+ cfg = Config.fromfile(args.config)
+ model = build_detector(
+ cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg).cuda()
+ model.eval()
+
+ if hasattr(model, 'forward_dummy'):
+ model.forward = model.forward_dummy
+ else:
+ raise NotImplementedError(
+ 'FLOPs counter is currently not currently supported with {}'.
+ format(model.__class__.__name__))
+
+ flops, params = get_model_complexity_info(model, input_shape)
+ split_line = '=' * 30
+ print('{0}\nInput shape: {1}\nFlops: {2}\nParams: {3}\n{0}'.format(
+ split_line, input_shape, flops, params))
+ print('!!!Please be cautious if you use the results in papers. '
+ 'You may need to check if all ops are supported and verify that the '
+ 'flops computation is correct.')
+
+
+if __name__ == '__main__':
+ main()
diff --git a/cv/instance_segmentation/SOLO/pytorch/tools/publish_model.py b/cv/instance_segmentation/SOLO/pytorch/tools/publish_model.py
new file mode 100644
index 000000000..a049f1767
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tools/publish_model.py
@@ -0,0 +1,35 @@
+import argparse
+import subprocess
+
+import torch
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(
+ description='Process a checkpoint to be published')
+ parser.add_argument('in_file', help='input checkpoint filename')
+ parser.add_argument('out_file', help='output checkpoint filename')
+ args = parser.parse_args()
+ return args
+
+
+def process_checkpoint(in_file, out_file):
+ checkpoint = torch.load(in_file, map_location='cpu')
+ # remove optimizer for smaller file size
+ if 'optimizer' in checkpoint:
+ del checkpoint['optimizer']
+ # if it is necessary to remove some sensitive data in checkpoint['meta'],
+ # add the code here.
+ torch.save(checkpoint, out_file)
+ sha = subprocess.check_output(['sha256sum', out_file]).decode()
+ final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
+ subprocess.Popen(['mv', out_file, final_file])
+
+
+def main():
+ args = parse_args()
+ process_checkpoint(args.in_file, args.out_file)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/cv/instance_segmentation/SOLO/pytorch/tools/robustness_eval.py b/cv/instance_segmentation/SOLO/pytorch/tools/robustness_eval.py
new file mode 100644
index 000000000..a07aa0159
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tools/robustness_eval.py
@@ -0,0 +1,256 @@
+import os.path as osp
+from argparse import ArgumentParser
+
+import mmcv
+import numpy as np
+
+
+def print_coco_results(results):
+
+ def _print(result, ap=1, iouThr=None, areaRng='all', maxDets=100):
+ iStr = ' {:<18} {} @[ IoU={:<9} | \
+ area={:>6s} | maxDets={:>3d} ] = {:0.3f}'
+
+ titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
+ typeStr = '(AP)' if ap == 1 else '(AR)'
+ iouStr = '{:0.2f}:{:0.2f}'.format(.5, .95) \
+ if iouThr is None else '{:0.2f}'.format(iouThr)
+ print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, result))
+
+ stats = np.zeros((12, ))
+ stats[0] = _print(results[0], 1)
+ stats[1] = _print(results[1], 1, iouThr=.5)
+ stats[2] = _print(results[2], 1, iouThr=.75)
+ stats[3] = _print(results[3], 1, areaRng='small')
+ stats[4] = _print(results[4], 1, areaRng='medium')
+ stats[5] = _print(results[5], 1, areaRng='large')
+ stats[6] = _print(results[6], 0, maxDets=1)
+ stats[7] = _print(results[7], 0, maxDets=10)
+ stats[8] = _print(results[8], 0)
+ stats[9] = _print(results[9], 0, areaRng='small')
+ stats[10] = _print(results[10], 0, areaRng='medium')
+ stats[11] = _print(results[11], 0, areaRng='large')
+
+
+def get_coco_style_results(filename,
+ task='bbox',
+ metric=None,
+ prints='mPC',
+ aggregate='benchmark'):
+
+ assert aggregate in ['benchmark', 'all']
+
+ if prints == 'all':
+ prints = ['P', 'mPC', 'rPC']
+ elif isinstance(prints, str):
+ prints = [prints]
+ for p in prints:
+ assert p in ['P', 'mPC', 'rPC']
+
+ if metric is None:
+ metrics = [
+ 'AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'AR1', 'AR10', 'AR100',
+ 'ARs', 'ARm', 'ARl'
+ ]
+ elif isinstance(metric, list):
+ metrics = metric
+ else:
+ metrics = [metric]
+
+ for metric_name in metrics:
+ assert metric_name in [
+ 'AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'AR1', 'AR10', 'AR100',
+ 'ARs', 'ARm', 'ARl'
+ ]
+
+ eval_output = mmcv.load(filename)
+
+ num_distortions = len(list(eval_output.keys()))
+ results = np.zeros((num_distortions, 6, len(metrics)), dtype='float32')
+
+ for corr_i, distortion in enumerate(eval_output):
+ for severity in eval_output[distortion]:
+ for metric_j, metric_name in enumerate(metrics):
+ mAP = eval_output[distortion][severity][task][metric_name]
+ results[corr_i, severity, metric_j] = mAP
+
+ P = results[0, 0, :]
+ if aggregate == 'benchmark':
+ mPC = np.mean(results[:15, 1:, :], axis=(0, 1))
+ else:
+ mPC = np.mean(results[:, 1:, :], axis=(0, 1))
+ rPC = mPC / P
+
+ print('\nmodel: {}'.format(osp.basename(filename)))
+ if metric is None:
+ if 'P' in prints:
+ print('Performance on Clean Data [P] ({})'.format(task))
+ print_coco_results(P)
+ if 'mPC' in prints:
+ print('Mean Performance under Corruption [mPC] ({})'.format(task))
+ print_coco_results(mPC)
+ if 'rPC' in prints:
+ print('Realtive Performance under Corruption [rPC] ({})'.format(
+ task))
+ print_coco_results(rPC)
+ else:
+ if 'P' in prints:
+ print('Performance on Clean Data [P] ({})'.format(task))
+ for metric_i, metric_name in enumerate(metrics):
+ print('{:5} = {:0.3f}'.format(metric_name, P[metric_i]))
+ if 'mPC' in prints:
+ print('Mean Performance under Corruption [mPC] ({})'.format(task))
+ for metric_i, metric_name in enumerate(metrics):
+ print('{:5} = {:0.3f}'.format(metric_name, mPC[metric_i]))
+ if 'rPC' in prints:
+ print('Relative Performance under Corruption [rPC] ({})'.format(
+ task))
+ for metric_i, metric_name in enumerate(metrics):
+ print('{:5} => {:0.1f} %'.format(metric_name,
+ rPC[metric_i] * 100))
+
+ return results
+
+
+def get_voc_style_results(filename, prints='mPC', aggregate='benchmark'):
+
+ assert aggregate in ['benchmark', 'all']
+
+ if prints == 'all':
+ prints = ['P', 'mPC', 'rPC']
+ elif isinstance(prints, str):
+ prints = [prints]
+ for p in prints:
+ assert p in ['P', 'mPC', 'rPC']
+
+ eval_output = mmcv.load(filename)
+
+ num_distortions = len(list(eval_output.keys()))
+ results = np.zeros((num_distortions, 6, 20), dtype='float32')
+
+ for i, distortion in enumerate(eval_output):
+ for severity in eval_output[distortion]:
+ mAP = [
+ eval_output[distortion][severity][j]['ap']
+ for j in range(len(eval_output[distortion][severity]))
+ ]
+ results[i, severity, :] = mAP
+
+ P = results[0, 0, :]
+ if aggregate == 'benchmark':
+ mPC = np.mean(results[:15, 1:, :], axis=(0, 1))
+ else:
+ mPC = np.mean(results[:, 1:, :], axis=(0, 1))
+ rPC = mPC / P
+
+ print('\nmodel: {}'.format(osp.basename(filename)))
+ if 'P' in prints:
+ print('{:48} = {:0.3f}'.format('Performance on Clean Data [P] in AP50',
+ np.mean(P)))
+ if 'mPC' in prints:
+ print('{:48} = {:0.3f}'.format(
+ 'Mean Performance under Corruption [mPC] in AP50', np.mean(mPC)))
+ if 'rPC' in prints:
+ print('{:48} = {:0.1f}'.format(
+ 'Realtive Performance under Corruption [rPC] in %',
+ np.mean(rPC) * 100))
+
+ return np.mean(results, axis=2, keepdims=True)
+
+
+def get_results(filename,
+ dataset='coco',
+ task='bbox',
+ metric=None,
+ prints='mPC',
+ aggregate='benchmark'):
+ assert dataset in ['coco', 'voc', 'cityscapes']
+
+ if dataset in ['coco', 'cityscapes']:
+ results = get_coco_style_results(
+ filename,
+ task=task,
+ metric=metric,
+ prints=prints,
+ aggregate=aggregate)
+ elif dataset == 'voc':
+ if task != 'bbox':
+ print('Only bbox analysis is supported for Pascal VOC')
+ print('Will report bbox results\n')
+ if metric not in [None, ['AP'], ['AP50']]:
+ print('Only the AP50 metric is supported for Pascal VOC')
+ print('Will report AP50 metric\n')
+ results = get_voc_style_results(
+ filename, prints=prints, aggregate=aggregate)
+
+ return results
+
+
+def get_distortions_from_file(filename):
+
+ eval_output = mmcv.load(filename)
+
+ return get_distortions_from_results(eval_output)
+
+
+def get_distortions_from_results(eval_output):
+ distortions = []
+ for i, distortion in enumerate(eval_output):
+ distortions.append(distortion.replace("_", " "))
+ return distortions
+
+
+def main():
+ parser = ArgumentParser(description='Corruption Result Analysis')
+ parser.add_argument('filename', help='result file path')
+ parser.add_argument(
+ '--dataset',
+ type=str,
+ choices=['coco', 'voc', 'cityscapes'],
+ default='coco',
+ help='dataset type')
+ parser.add_argument(
+ '--task',
+ type=str,
+ nargs='+',
+ choices=['bbox', 'segm'],
+ default=['bbox'],
+ help='task to report')
+ parser.add_argument(
+ '--metric',
+ nargs='+',
+ choices=[
+ None, 'AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'AR1', 'AR10',
+ 'AR100', 'ARs', 'ARm', 'ARl'
+ ],
+ default=None,
+ help='metric to report')
+ parser.add_argument(
+ '--prints',
+ type=str,
+ nargs='+',
+ choices=['P', 'mPC', 'rPC'],
+ default='mPC',
+ help='corruption benchmark metric to print')
+ parser.add_argument(
+ '--aggregate',
+ type=str,
+ choices=['all', 'benchmark'],
+ default='benchmark',
+ help='aggregate all results or only those \
+ for benchmark corruptions')
+
+ args = parser.parse_args()
+
+ for task in args.task:
+ get_results(
+ args.filename,
+ dataset=args.dataset,
+ task=task,
+ metric=args.metric,
+ prints=args.prints,
+ aggregate=args.aggregate)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/cv/instance_segmentation/SOLO/pytorch/tools/slurm_test.sh b/cv/instance_segmentation/SOLO/pytorch/tools/slurm_test.sh
new file mode 100644
index 000000000..8950bc816
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tools/slurm_test.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+set -x
+
+PARTITION=$1
+JOB_NAME=$2
+CONFIG=$3
+CHECKPOINT=$4
+GPUS=${GPUS:-8}
+GPUS_PER_NODE=${GPUS_PER_NODE:-8}
+CPUS_PER_TASK=${CPUS_PER_TASK:-5}
+PY_ARGS=${@:5}
+SRUN_ARGS=${SRUN_ARGS:-""}
+
+srun -p ${PARTITION} \
+ --job-name=${JOB_NAME} \
+ --gres=gpu:${GPUS_PER_NODE} \
+ --ntasks=${GPUS} \
+ --ntasks-per-node=${GPUS_PER_NODE} \
+ --cpus-per-task=${CPUS_PER_TASK} \
+ --kill-on-bad-exit=1 \
+ ${SRUN_ARGS} \
+ python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
diff --git a/cv/instance_segmentation/SOLO/pytorch/tools/slurm_train.sh b/cv/instance_segmentation/SOLO/pytorch/tools/slurm_train.sh
new file mode 100644
index 000000000..45474c46a
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tools/slurm_train.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+set -x
+
+PARTITION=$1
+JOB_NAME=$2
+CONFIG=$3
+WORK_DIR=$4
+GPUS=${5:-8}
+GPUS_PER_NODE=${GPUS_PER_NODE:-8}
+CPUS_PER_TASK=${CPUS_PER_TASK:-5}
+SRUN_ARGS=${SRUN_ARGS:-""}
+PY_ARGS=${PY_ARGS:-"--validate"}
+
+srun -p ${PARTITION} \
+ --job-name=${JOB_NAME} \
+ --gres=gpu:${GPUS_PER_NODE} \
+ --ntasks=${GPUS} \
+ --ntasks-per-node=${GPUS_PER_NODE} \
+ --cpus-per-task=${CPUS_PER_TASK} \
+ --kill-on-bad-exit=1 \
+ ${SRUN_ARGS} \
+ python -u tools/train.py ${CONFIG} --work_dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
diff --git a/cv/instance_segmentation/SOLO/pytorch/tools/test.py b/cv/instance_segmentation/SOLO/pytorch/tools/test.py
new file mode 100644
index 000000000..b39cf13ab
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tools/test.py
@@ -0,0 +1,282 @@
+import argparse
+import os
+import os.path as osp
+import pickle
+import shutil
+import tempfile
+
+import mmcv
+import torch
+import torch.distributed as dist
+from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
+from mmcv.runner import get_dist_info, init_dist, load_checkpoint
+
+from mmdet.core import coco_eval, results2json, wrap_fp16_model
+from mmdet.datasets import build_dataloader, build_dataset
+from mmdet.models import build_detector
+
+
+def single_gpu_test(model, data_loader, show=False):
+ model.eval()
+ results = []
+ dataset = data_loader.dataset
+ prog_bar = mmcv.ProgressBar(len(dataset))
+ for i, data in enumerate(data_loader):
+ with torch.no_grad():
+ result = model(return_loss=False, rescale=not show, **data)
+ results.append(result)
+
+ if show:
+ model.module.show_result(data, result)
+
+ batch_size = data['img'][0].size(0)
+ for _ in range(batch_size):
+ prog_bar.update()
+ return results
+
+
+def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):
+ """Test model with multiple gpus.
+
+ This method tests model with multiple gpus and collects the results
+ under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
+ it encodes results to gpu tensors and use gpu communication for results
+ collection. On cpu mode it saves the results on different gpus to 'tmpdir'
+ and collects them by the rank 0 worker.
+
+ Args:
+ model (nn.Module): Model to be tested.
+ data_loader (nn.Dataloader): Pytorch data loader.
+ tmpdir (str): Path of directory to save the temporary results from
+ different gpus under cpu mode.
+ gpu_collect (bool): Option to use either gpu or cpu to collect results.
+
+ Returns:
+ list: The prediction results.
+ """
+ model.eval()
+ results = []
+ dataset = data_loader.dataset
+ rank, world_size = get_dist_info()
+ if rank == 0:
+ prog_bar = mmcv.ProgressBar(len(dataset))
+ for i, data in enumerate(data_loader):
+ with torch.no_grad():
+ result = model(return_loss=False, rescale=True, **data)
+ results.append(result)
+
+ if rank == 0:
+ batch_size = data['img'][0].size(0)
+ for _ in range(batch_size * world_size):
+ prog_bar.update()
+
+ # collect results from all ranks
+ if gpu_collect:
+ results = collect_results_gpu(results, len(dataset))
+ else:
+ results = collect_results_cpu(results, len(dataset), tmpdir)
+ return results
+
+
+def collect_results_cpu(result_part, size, tmpdir=None):
+ rank, world_size = get_dist_info()
+ # create a tmp dir if it is not specified
+ if tmpdir is None:
+ MAX_LEN = 512
+ # 32 is whitespace
+ dir_tensor = torch.full((MAX_LEN, ),
+ 32,
+ dtype=torch.uint8,
+ device='cuda')
+ if rank == 0:
+ tmpdir = tempfile.mkdtemp()
+ tmpdir = torch.tensor(
+ bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
+ dir_tensor[:len(tmpdir)] = tmpdir
+ dist.broadcast(dir_tensor, 0)
+ tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
+ else:
+ mmcv.mkdir_or_exist(tmpdir)
+ # dump the part result to the dir
+ mmcv.dump(result_part, osp.join(tmpdir, 'part_{}.pkl'.format(rank)))
+ dist.barrier()
+ # collect all parts
+ if rank != 0:
+ return None
+ else:
+ # load results of all parts from tmp dir
+ part_list = []
+ for i in range(world_size):
+ part_file = osp.join(tmpdir, 'part_{}.pkl'.format(i))
+ part_list.append(mmcv.load(part_file))
+ # sort the results
+ ordered_results = []
+ for res in zip(*part_list):
+ ordered_results.extend(list(res))
+ # the dataloader may pad some samples
+ ordered_results = ordered_results[:size]
+ # remove tmp dir
+ shutil.rmtree(tmpdir)
+ return ordered_results
+
+
+def collect_results_gpu(result_part, size):
+ rank, world_size = get_dist_info()
+ # dump result part to tensor with pickle
+ part_tensor = torch.tensor(
+ bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda')
+ # gather all result part tensor shape
+ shape_tensor = torch.tensor(part_tensor.shape, device='cuda')
+ shape_list = [shape_tensor.clone() for _ in range(world_size)]
+ dist.all_gather(shape_list, shape_tensor)
+ # padding result part tensor to max length
+ shape_max = torch.tensor(shape_list).max()
+ part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')
+ part_send[:shape_tensor[0]] = part_tensor
+ part_recv_list = [
+ part_tensor.new_zeros(shape_max) for _ in range(world_size)
+ ]
+ # gather all result part
+ dist.all_gather(part_recv_list, part_send)
+
+ if rank == 0:
+ part_list = []
+ for recv, shape in zip(part_recv_list, shape_list):
+ part_list.append(
+ pickle.loads(recv[:shape[0]].cpu().numpy().tobytes()))
+ # sort the results
+ ordered_results = []
+ for res in zip(*part_list):
+ ordered_results.extend(list(res))
+ # the dataloader may pad some samples
+ ordered_results = ordered_results[:size]
+ return ordered_results
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='MMDet test detector')
+ parser.add_argument('config', help='test config file path')
+ parser.add_argument('checkpoint', help='checkpoint file')
+ parser.add_argument('--out', help='output result file')
+ parser.add_argument(
+ '--json_out',
+ help='output result file name without extension',
+ type=str)
+ parser.add_argument(
+ '--eval',
+ type=str,
+ nargs='+',
+ choices=['proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'],
+ help='eval types')
+ parser.add_argument('--show', action='store_true', help='show results')
+ parser.add_argument(
+ '--gpu_collect',
+ action='store_true',
+ help='whether to use gpu to collect results')
+ parser.add_argument('--tmpdir', help='tmp dir for writing some results')
+ parser.add_argument(
+ '--launcher',
+ choices=['none', 'pytorch', 'slurm', 'mpi'],
+ default='none',
+ help='job launcher')
+ parser.add_argument('--local_rank', type=int, default=0)
+ args = parser.parse_args()
+ if 'LOCAL_RANK' not in os.environ:
+ os.environ['LOCAL_RANK'] = str(args.local_rank)
+ return args
+
+
+def main():
+ args = parse_args()
+
+ assert args.out or args.show or args.json_out, \
+ ('Please specify at least one operation (save or show the results) '
+ 'with the argument "--out" or "--show" or "--json_out"')
+
+ if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
+ raise ValueError('The output file must be a pkl file.')
+
+ if args.json_out is not None and args.json_out.endswith('.json'):
+ args.json_out = args.json_out[:-5]
+
+ cfg = mmcv.Config.fromfile(args.config)
+ # set cudnn_benchmark
+ if cfg.get('cudnn_benchmark', False):
+ torch.backends.cudnn.benchmark = True
+ cfg.model.pretrained = None
+ cfg.data.test.test_mode = True
+
+ # init distributed env first, since logger depends on the dist info.
+ if args.launcher == 'none':
+ distributed = False
+ else:
+ distributed = True
+ init_dist(args.launcher, **cfg.dist_params)
+
+ # build the dataloader
+ # TODO: support multiple images per gpu (only minor changes are needed)
+ dataset = build_dataset(cfg.data.test)
+ data_loader = build_dataloader(
+ dataset,
+ imgs_per_gpu=1,
+ workers_per_gpu=cfg.data.workers_per_gpu,
+ dist=distributed,
+ shuffle=False)
+
+ # build the model and load checkpoint
+ model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
+ fp16_cfg = cfg.get('fp16', None)
+ if fp16_cfg is not None:
+ wrap_fp16_model(model)
+ checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu')
+ # old versions did not save class info in checkpoints, this walkaround is
+ # for backward compatibility
+ if 'CLASSES' in checkpoint['meta']:
+ model.CLASSES = checkpoint['meta']['CLASSES']
+ else:
+ model.CLASSES = dataset.CLASSES
+
+ if not distributed:
+ model = MMDataParallel(model, device_ids=[0])
+ outputs = single_gpu_test(model, data_loader, args.show)
+ else:
+ model = MMDistributedDataParallel(model.cuda())
+ outputs = multi_gpu_test(model, data_loader, args.tmpdir,
+ args.gpu_collect)
+
+ rank, _ = get_dist_info()
+ if args.out and rank == 0:
+ print('\nwriting results to {}'.format(args.out))
+ mmcv.dump(outputs, args.out)
+ eval_types = args.eval
+ if eval_types:
+ print('Starting evaluate {}'.format(' and '.join(eval_types)))
+ if eval_types == ['proposal_fast']:
+ result_file = args.out
+ coco_eval(result_file, eval_types, dataset.coco)
+ else:
+ if not isinstance(outputs[0], dict):
+ result_files = results2json(dataset, outputs, args.out)
+ coco_eval(result_files, eval_types, dataset.coco)
+ else:
+ for name in outputs[0]:
+ print('\nEvaluating {}'.format(name))
+ outputs_ = [out[name] for out in outputs]
+ result_file = args.out + '.{}'.format(name)
+ result_files = results2json(dataset, outputs_,
+ result_file)
+ coco_eval(result_files, eval_types, dataset.coco)
+
+ # Save predictions in the COCO json format
+ if args.json_out and rank == 0:
+ if not isinstance(outputs[0], dict):
+ results2json(dataset, outputs, args.json_out)
+ else:
+ for name in outputs[0]:
+ outputs_ = [out[name] for out in outputs]
+ result_file = args.json_out + '.{}'.format(name)
+ results2json(dataset, outputs_, result_file)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/cv/instance_segmentation/SOLO/pytorch/tools/test_ins.py b/cv/instance_segmentation/SOLO/pytorch/tools/test_ins.py
new file mode 100644
index 000000000..66843fb1d
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tools/test_ins.py
@@ -0,0 +1,257 @@
+import argparse
+import os
+import os.path as osp
+import shutil
+import tempfile
+
+import mmcv
+import torch
+import torch.nn.functional as F
+import torch.distributed as dist
+from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
+from mmcv.runner import init_dist, get_dist_info, load_checkpoint
+
+from mmdet.core import coco_eval, results2json, results2json_segm, wrap_fp16_model, tensor2imgs, get_classes
+from mmdet.datasets import build_dataloader, build_dataset
+from mmdet.models import build_detector
+import time
+import numpy as np
+import pycocotools.mask as mask_util
+
+
+def get_masks(result, num_classes=80):
+ for cur_result in result:
+ masks = [[] for _ in range(num_classes)]
+ if cur_result is None:
+ return masks
+ seg_pred = cur_result[0].cpu().numpy().astype(np.uint8)
+ cate_label = cur_result[1].cpu().numpy().astype(np.int)
+ cate_score = cur_result[2].cpu().numpy().astype(np.float)
+ num_ins = seg_pred.shape[0]
+ for idx in range(num_ins):
+ cur_mask = seg_pred[idx, ...]
+ rle = mask_util.encode(
+ np.array(cur_mask[:, :, np.newaxis], order='F'))[0]
+ rst = (rle, cate_score[idx])
+ masks[cate_label[idx]].append(rst)
+
+ return masks
+
+
+def single_gpu_test(model, data_loader, show=False, verbose=True):
+ model.eval()
+ results = []
+ dataset = data_loader.dataset
+
+ num_classes = len(dataset.CLASSES)
+
+ prog_bar = mmcv.ProgressBar(len(dataset))
+ for i, data in enumerate(data_loader):
+ with torch.no_grad():
+ seg_result = model(return_loss=False, rescale=not show, **data)
+
+ result = get_masks(seg_result, num_classes=num_classes)
+ results.append(result)
+
+ batch_size = data['img'][0].size(0)
+ for _ in range(batch_size):
+ prog_bar.update()
+ return results
+
+
+def multi_gpu_test(model, data_loader, tmpdir=None):
+ model.eval()
+ results = []
+ dataset = data_loader.dataset
+ num_classes = len(dataset.CLASSES)
+
+ rank, world_size = get_dist_info()
+ if rank == 0:
+ prog_bar = mmcv.ProgressBar(len(dataset))
+ for i, data in enumerate(data_loader):
+ with torch.no_grad():
+ seg_result = model(return_loss=False, rescale=True, **data)
+
+ result = get_masks(seg_result, num_classes=num_classes)
+ results.append(result)
+
+ if rank == 0:
+ batch_size = data['img'][0].size(0)
+ for _ in range(batch_size * world_size):
+ prog_bar.update()
+
+ # collect results from all ranks
+ results = collect_results(results, len(dataset), tmpdir)
+
+ return results
+
+
+def collect_results(result_part, size, tmpdir=None):
+ rank, world_size = get_dist_info()
+ # create a tmp dir if it is not specified
+ if tmpdir is None:
+ MAX_LEN = 512
+ # 32 is whitespace
+ dir_tensor = torch.full((MAX_LEN, ),
+ 32,
+ dtype=torch.uint8,
+ device='cuda')
+ if rank == 0:
+ tmpdir = tempfile.mkdtemp()
+ tmpdir = torch.tensor(
+ bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
+ dir_tensor[:len(tmpdir)] = tmpdir
+ dist.broadcast(dir_tensor, 0)
+ tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
+ else:
+ mmcv.mkdir_or_exist(tmpdir)
+ # dump the part result to the dir
+ mmcv.dump(result_part, osp.join(tmpdir, 'part_{}.pkl'.format(rank)))
+ dist.barrier()
+ # collect all parts
+ if rank != 0:
+ return None
+ else:
+ # load results of all parts from tmp dir
+ part_list = []
+ for i in range(world_size):
+ part_file = osp.join(tmpdir, 'part_{}.pkl'.format(i))
+ part_list.append(mmcv.load(part_file))
+ # sort the results
+ ordered_results = []
+ for res in zip(*part_list):
+ ordered_results.extend(list(res))
+ # the dataloader may pad some samples
+ ordered_results = ordered_results[:size]
+ # remove tmp dir
+ shutil.rmtree(tmpdir)
+ return ordered_results
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='MMDet test detector')
+ parser.add_argument('config', help='test config file path')
+ parser.add_argument('checkpoint', help='checkpoint file')
+ parser.add_argument('--out', help='output result file')
+ parser.add_argument(
+ '--json_out',
+ help='output result file name without extension',
+ type=str)
+ parser.add_argument(
+ '--eval',
+ type=str,
+ nargs='+',
+ choices=['proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'],
+ help='eval types')
+ parser.add_argument('--show', action='store_true', help='show results')
+ parser.add_argument('--tmpdir', help='tmp dir for writing some results')
+ parser.add_argument(
+ '--launcher',
+ choices=['none', 'pytorch', 'slurm', 'mpi'],
+ default='none',
+ help='job launcher')
+ parser.add_argument('--local_rank', type=int, default=0)
+ args = parser.parse_args()
+ if 'LOCAL_RANK' not in os.environ:
+ os.environ['LOCAL_RANK'] = str(args.local_rank)
+ return args
+
+
+def main():
+ args = parse_args()
+
+ assert args.out or args.show or args.json_out, \
+ ('Please specify at least one operation (save or show the results) '
+ 'with the argument "--out" or "--show" or "--json_out"')
+
+ if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
+ raise ValueError('The output file must be a pkl file.')
+
+ if args.json_out is not None and args.json_out.endswith('.json'):
+ args.json_out = args.json_out[:-5]
+
+ cfg = mmcv.Config.fromfile(args.config)
+ # set cudnn_benchmark
+ if cfg.get('cudnn_benchmark', False):
+ torch.backends.cudnn.benchmark = True
+ cfg.model.pretrained = None
+ cfg.data.test.test_mode = True
+
+ # init distributed env first, since logger depends on the dist info.
+ if args.launcher == 'none':
+ distributed = False
+ else:
+ distributed = True
+ init_dist(args.launcher, **cfg.dist_params)
+
+ # build the dataloader
+ # TODO: support multiple images per gpu (only minor changes are needed)
+ dataset = build_dataset(cfg.data.test)
+ data_loader = build_dataloader(
+ dataset,
+ imgs_per_gpu=1,
+ workers_per_gpu=cfg.data.workers_per_gpu,
+ dist=distributed,
+ shuffle=False)
+
+ # build the model and load checkpoint
+ model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
+ fp16_cfg = cfg.get('fp16', None)
+ if fp16_cfg is not None:
+ wrap_fp16_model(model)
+
+ while not osp.isfile(args.checkpoint):
+ print('Waiting for {} to exist...'.format(args.checkpoint))
+ time.sleep(60)
+
+ checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu')
+ # old versions did not save class info in checkpoints, this walkaround is
+ # for backward compatibility
+ if 'CLASSES' in checkpoint['meta']:
+ model.CLASSES = checkpoint['meta']['CLASSES']
+ else:
+ model.CLASSES = dataset.CLASSES
+
+ if not distributed:
+ model = MMDataParallel(model, device_ids=[0])
+ outputs = single_gpu_test(model, data_loader)
+ else:
+ model = MMDistributedDataParallel(model.cuda())
+ outputs = multi_gpu_test(model, data_loader, args.tmpdir)
+
+ rank, _ = get_dist_info()
+ if args.out and rank == 0:
+ print('\nwriting results to {}'.format(args.out))
+ mmcv.dump(outputs, args.out)
+ eval_types = args.eval
+ if eval_types:
+ print('Starting evaluate {}'.format(' and '.join(eval_types)))
+ if eval_types == ['proposal_fast']:
+ result_file = args.out
+ coco_eval(result_file, eval_types, dataset.coco)
+ else:
+ if not isinstance(outputs[0], dict):
+ result_files = results2json_segm(dataset, outputs, args.out)
+ coco_eval(result_files, eval_types, dataset.coco)
+ else:
+ for name in outputs[0]:
+ print('\nEvaluating {}'.format(name))
+ outputs_ = [out[name] for out in outputs]
+ result_file = args.out + '.{}'.format(name)
+ result_files = results2json(dataset, outputs_,
+ result_file)
+ coco_eval(result_files, eval_types, dataset.coco)
+
+ # Save predictions in the COCO json format
+ if args.json_out and rank == 0:
+ if not isinstance(outputs[0], dict):
+ results2json(dataset, outputs, args.json_out)
+ else:
+ for name in outputs[0]:
+ outputs_ = [out[name] for out in outputs]
+ result_file = args.json_out + '.{}'.format(name)
+ results2json(dataset, outputs_, result_file)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/cv/instance_segmentation/SOLO/pytorch/tools/test_ins_vis.py b/cv/instance_segmentation/SOLO/pytorch/tools/test_ins_vis.py
new file mode 100644
index 000000000..e4490d25e
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tools/test_ins_vis.py
@@ -0,0 +1,296 @@
+import argparse
+import os
+import os.path as osp
+import shutil
+import tempfile
+from scipy import ndimage
+import mmcv
+import torch
+import torch.distributed as dist
+from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
+from mmcv.runner import init_dist, get_dist_info, load_checkpoint
+
+from mmdet.core import coco_eval, results2json, wrap_fp16_model, tensor2imgs, get_classes
+from mmdet.datasets import build_dataloader, build_dataset
+from mmdet.models import build_detector
+import cv2
+
+import numpy as np
+import matplotlib.cm as cm
+
+def vis_seg(data, result, img_norm_cfg, data_id, colors, score_thr, save_dir):
+ img_tensor = data['img'][0]
+ img_metas = data['img_meta'][0].data[0]
+ imgs = tensor2imgs(img_tensor, **img_norm_cfg)
+ assert len(imgs) == len(img_metas)
+ class_names = get_classes('coco')
+
+ for img, img_meta, cur_result in zip(imgs, img_metas, result):
+ if cur_result is None:
+ continue
+ h, w, _ = img_meta['img_shape']
+ img_show = img[:h, :w, :]
+
+ seg_label = cur_result[0]
+ seg_label = seg_label.cpu().numpy().astype(np.uint8)
+ cate_label = cur_result[1]
+ cate_label = cate_label.cpu().numpy()
+ score = cur_result[2].cpu().numpy()
+
+ vis_inds = score > score_thr
+ seg_label = seg_label[vis_inds]
+ num_mask = seg_label.shape[0]
+ cate_label = cate_label[vis_inds]
+ cate_score = score[vis_inds]
+
+ mask_density = []
+ for idx in range(num_mask):
+ cur_mask = seg_label[idx, :, :]
+ cur_mask = mmcv.imresize(cur_mask, (w, h))
+ cur_mask = (cur_mask > 0.5).astype(np.int32)
+ mask_density.append(cur_mask.sum())
+ orders = np.argsort(mask_density)
+ seg_label = seg_label[orders]
+ cate_label = cate_label[orders]
+ cate_score = cate_score[orders]
+
+ seg_show = img_show.copy()
+ for idx in range(num_mask):
+ idx = -(idx+1)
+ cur_mask = seg_label[idx, :,:]
+ cur_mask = mmcv.imresize(cur_mask, (w, h))
+ cur_mask = (cur_mask > 0.5).astype(np.uint8)
+ if cur_mask.sum() == 0:
+ continue
+ color_mask = np.random.randint(
+ 0, 256, (1, 3), dtype=np.uint8)
+ cur_mask_bool = cur_mask.astype(np.bool)
+ seg_show[cur_mask_bool] = img_show[cur_mask_bool] * 0.5 + color_mask * 0.5
+
+ cur_cate = cate_label[idx]
+ cur_score = cate_score[idx]
+
+ label_text = class_names[cur_cate]
+ #label_text += '|{:.02f}'.format(cur_score)
+ # center
+ center_y, center_x = ndimage.measurements.center_of_mass(cur_mask)
+ vis_pos = (max(int(center_x) - 10, 0), int(center_y))
+ cv2.putText(seg_show, label_text, vis_pos,
+ cv2.FONT_HERSHEY_COMPLEX, 0.3, (255, 255, 255)) # green
+ mmcv.imwrite(seg_show, '{}/{}.jpg'.format(save_dir, data_id))
+
+
+def single_gpu_test(model, data_loader, args, cfg=None, verbose=True):
+ model.eval()
+ results = []
+ dataset = data_loader.dataset
+
+ class_num = 1000 # ins
+ colors = [(np.random.random((1, 3)) * 255).tolist()[0] for i in range(class_num)]
+
+ prog_bar = mmcv.ProgressBar(len(dataset))
+ for i, data in enumerate(data_loader):
+ with torch.no_grad():
+ seg_result = model(return_loss=False, rescale=True, **data)
+ result = None
+ results.append(result)
+
+ if verbose:
+ vis_seg(data, seg_result, cfg.img_norm_cfg, data_id=i, colors=colors, score_thr=args.score_thr, save_dir=args.save_dir)
+
+ batch_size = data['img'][0].size(0)
+ for _ in range(batch_size):
+ prog_bar.update()
+ return results
+
+
+def multi_gpu_test(model, data_loader, tmpdir=None):
+ model.eval()
+ results = []
+ dataset = data_loader.dataset
+ rank, world_size = get_dist_info()
+ if rank == 0:
+ prog_bar = mmcv.ProgressBar(len(dataset))
+ for i, data in enumerate(data_loader):
+ with torch.no_grad():
+ result = model(return_loss=False, rescale=True, **data)
+ results.append(result)
+
+ if rank == 0:
+ batch_size = data['img'][0].size(0)
+ for _ in range(batch_size * world_size):
+ prog_bar.update()
+
+ # collect results from all ranks
+ results = collect_results(results, len(dataset), tmpdir)
+
+ return results
+
+
+def collect_results(result_part, size, tmpdir=None):
+ rank, world_size = get_dist_info()
+ # create a tmp dir if it is not specified
+ if tmpdir is None:
+ MAX_LEN = 512
+ # 32 is whitespace
+ dir_tensor = torch.full((MAX_LEN, ),
+ 32,
+ dtype=torch.uint8,
+ device='cuda')
+ if rank == 0:
+ tmpdir = tempfile.mkdtemp()
+ tmpdir = torch.tensor(
+ bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
+ dir_tensor[:len(tmpdir)] = tmpdir
+ dist.broadcast(dir_tensor, 0)
+ tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
+ else:
+ mmcv.mkdir_or_exist(tmpdir)
+ # dump the part result to the dir
+ mmcv.dump(result_part, osp.join(tmpdir, 'part_{}.pkl'.format(rank)))
+ dist.barrier()
+ # collect all parts
+ if rank != 0:
+ return None
+ else:
+ # load results of all parts from tmp dir
+ part_list = []
+ for i in range(world_size):
+ part_file = osp.join(tmpdir, 'part_{}.pkl'.format(i))
+ part_list.append(mmcv.load(part_file))
+ # sort the results
+ ordered_results = []
+ for res in zip(*part_list):
+ ordered_results.extend(list(res))
+ # the dataloader may pad some samples
+ ordered_results = ordered_results[:size]
+ # remove tmp dir
+ shutil.rmtree(tmpdir)
+ return ordered_results
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='MMDet test detector')
+ parser.add_argument('config', help='test config file path')
+ parser.add_argument('checkpoint', help='checkpoint file')
+ parser.add_argument('--out', help='output result file')
+ parser.add_argument(
+ '--json_out',
+ help='output result file name without extension',
+ type=str)
+ parser.add_argument(
+ '--eval',
+ type=str,
+ nargs='+',
+ choices=['proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'],
+ help='eval types')
+ parser.add_argument('--show', action='store_true', help='show results')
+ parser.add_argument('--score_thr', type=float, default=0.3, help='score threshold for visualization')
+ parser.add_argument('--tmpdir', help='tmp dir for writing some results')
+ parser.add_argument('--save_dir', help='dir for saveing visualized images')
+ parser.add_argument(
+ '--launcher',
+ choices=['none', 'pytorch', 'slurm', 'mpi'],
+ default='none',
+ help='job launcher')
+ parser.add_argument('--local_rank', type=int, default=0)
+ args = parser.parse_args()
+ if 'LOCAL_RANK' not in os.environ:
+ os.environ['LOCAL_RANK'] = str(args.local_rank)
+ return args
+
+
+def main():
+ args = parse_args()
+
+ assert args.out or args.show or args.json_out, \
+ ('Please specify at least one operation (save or show the results) '
+ 'with the argument "--out" or "--show" or "--json_out"')
+
+ if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
+ raise ValueError('The output file must be a pkl file.')
+
+ if args.json_out is not None and args.json_out.endswith('.json'):
+ args.json_out = args.json_out[:-5]
+
+ cfg = mmcv.Config.fromfile(args.config)
+ # set cudnn_benchmark
+ if cfg.get('cudnn_benchmark', False):
+ torch.backends.cudnn.benchmark = True
+ cfg.model.pretrained = None
+ cfg.data.test.test_mode = True
+
+ # init distributed env first, since logger depends on the dist info.
+ if args.launcher == 'none':
+ distributed = False
+ else:
+ distributed = True
+ init_dist(args.launcher, **cfg.dist_params)
+
+ # build the dataloader
+ # TODO: support multiple images per gpu (only minor changes are needed)
+ dataset = build_dataset(cfg.data.test)
+ data_loader = build_dataloader(
+ dataset,
+ imgs_per_gpu=1,
+ workers_per_gpu=cfg.data.workers_per_gpu,
+ dist=distributed,
+ shuffle=False)
+
+ # build the model and load checkpoint
+ model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
+ fp16_cfg = cfg.get('fp16', None)
+ if fp16_cfg is not None:
+ wrap_fp16_model(model)
+ checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu')
+ # old versions did not save class info in checkpoints, this walkaround is
+ # for backward compatibility
+ if 'CLASSES' in checkpoint['meta']:
+ model.CLASSES = checkpoint['meta']['CLASSES']
+ else:
+ model.CLASSES = dataset.CLASSES
+
+ assert not distributed
+ if not distributed:
+ model = MMDataParallel(model, device_ids=[0])
+ outputs = single_gpu_test(model, data_loader, args, cfg=cfg)
+ else:
+ model = MMDistributedDataParallel(model.cuda())
+ outputs = multi_gpu_test(model, data_loader, args.tmpdir)
+
+ rank, _ = get_dist_info()
+ if args.out and rank == 0:
+ print('\nwriting results to {}'.format(args.out))
+ mmcv.dump(outputs, args.out)
+ eval_types = args.eval
+ if eval_types:
+ print('Starting evaluate {}'.format(' and '.join(eval_types)))
+ if eval_types == ['proposal_fast']:
+ result_file = args.out
+ coco_eval(result_file, eval_types, dataset.coco)
+ else:
+ if not isinstance(outputs[0], dict):
+ result_files = results2json(dataset, outputs, args.out)
+ coco_eval(result_files, eval_types, dataset.coco)
+ else:
+ for name in outputs[0]:
+ print('\nEvaluating {}'.format(name))
+ outputs_ = [out[name] for out in outputs]
+ result_file = args.out + '.{}'.format(name)
+ result_files = results2json(dataset, outputs_,
+ result_file)
+ coco_eval(result_files, eval_types, dataset.coco)
+
+ # Save predictions in the COCO json format
+ if args.json_out and rank == 0:
+ if not isinstance(outputs[0], dict):
+ results2json(dataset, outputs, args.json_out)
+ else:
+ for name in outputs[0]:
+ outputs_ = [out[name] for out in outputs]
+ result_file = args.json_out + '.{}'.format(name)
+ results2json(dataset, outputs_, result_file)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/cv/instance_segmentation/SOLO/pytorch/tools/test_robustness.py b/cv/instance_segmentation/SOLO/pytorch/tools/test_robustness.py
new file mode 100644
index 000000000..2271f4c06
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tools/test_robustness.py
@@ -0,0 +1,453 @@
+import argparse
+import copy
+import os
+import os.path as osp
+import shutil
+import tempfile
+
+import mmcv
+import numpy as np
+import torch
+import torch.distributed as dist
+from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
+from mmcv.runner import get_dist_info, init_dist, load_checkpoint
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+from robustness_eval import get_results
+
+from mmdet import datasets
+from mmdet.apis import set_random_seed
+from mmdet.core import (eval_map, fast_eval_recall, results2json,
+ wrap_fp16_model)
+from mmdet.datasets import build_dataloader, build_dataset
+from mmdet.models import build_detector
+
+
+def coco_eval_with_return(result_files,
+ result_types,
+ coco,
+ max_dets=(100, 300, 1000)):
+ for res_type in result_types:
+ assert res_type in [
+ 'proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'
+ ]
+
+ if mmcv.is_str(coco):
+ coco = COCO(coco)
+ assert isinstance(coco, COCO)
+
+ if result_types == ['proposal_fast']:
+ ar = fast_eval_recall(result_files, coco, np.array(max_dets))
+ for i, num in enumerate(max_dets):
+ print('AR@{}\t= {:.4f}'.format(num, ar[i]))
+ return
+
+ eval_results = {}
+ for res_type in result_types:
+ result_file = result_files[res_type]
+ assert result_file.endswith('.json')
+
+ coco_dets = coco.loadRes(result_file)
+ img_ids = coco.getImgIds()
+ iou_type = 'bbox' if res_type == 'proposal' else res_type
+ cocoEval = COCOeval(coco, coco_dets, iou_type)
+ cocoEval.params.imgIds = img_ids
+ if res_type == 'proposal':
+ cocoEval.params.useCats = 0
+ cocoEval.params.maxDets = list(max_dets)
+ cocoEval.evaluate()
+ cocoEval.accumulate()
+ cocoEval.summarize()
+ if res_type == 'segm' or res_type == 'bbox':
+ metric_names = [
+ 'AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'AR1', 'AR10',
+ 'AR100', 'ARs', 'ARm', 'ARl'
+ ]
+ eval_results[res_type] = {
+ metric_names[i]: cocoEval.stats[i]
+ for i in range(len(metric_names))
+ }
+ else:
+ eval_results[res_type] = cocoEval.stats
+
+ return eval_results
+
+
+def voc_eval_with_return(result_file,
+ dataset,
+ iou_thr=0.5,
+ logger='print',
+ only_ap=True):
+ det_results = mmcv.load(result_file)
+ annotations = [dataset.get_ann_info(i) for i in range(len(dataset))]
+ if hasattr(dataset, 'year') and dataset.year == 2007:
+ dataset_name = 'voc07'
+ else:
+ dataset_name = dataset.CLASSES
+ mean_ap, eval_results = eval_map(
+ det_results,
+ annotations,
+ scale_ranges=None,
+ iou_thr=iou_thr,
+ dataset=dataset_name,
+ logger=logger)
+
+ if only_ap:
+ eval_results = [{
+ 'ap': eval_results[i]['ap']
+ } for i in range(len(eval_results))]
+
+ return mean_ap, eval_results
+
+
+def single_gpu_test(model, data_loader, show=False):
+ model.eval()
+ results = []
+ dataset = data_loader.dataset
+ prog_bar = mmcv.ProgressBar(len(dataset))
+ for i, data in enumerate(data_loader):
+ with torch.no_grad():
+ result = model(return_loss=False, rescale=not show, **data)
+ results.append(result)
+
+ if show:
+ model.module.show_result(data, result, dataset.img_norm_cfg)
+
+ batch_size = data['img'][0].size(0)
+ for _ in range(batch_size):
+ prog_bar.update()
+ return results
+
+
+def multi_gpu_test(model, data_loader, tmpdir=None):
+ model.eval()
+ results = []
+ dataset = data_loader.dataset
+ rank, world_size = get_dist_info()
+ if rank == 0:
+ prog_bar = mmcv.ProgressBar(len(dataset))
+ for i, data in enumerate(data_loader):
+ with torch.no_grad():
+ result = model(return_loss=False, rescale=True, **data)
+ results.append(result)
+
+ if rank == 0:
+ batch_size = data['img'][0].size(0)
+ for _ in range(batch_size * world_size):
+ prog_bar.update()
+
+ # collect results from all ranks
+ results = collect_results(results, len(dataset), tmpdir)
+
+ return results
+
+
+def collect_results(result_part, size, tmpdir=None):
+ rank, world_size = get_dist_info()
+ # create a tmp dir if it is not specified
+ if tmpdir is None:
+ MAX_LEN = 512
+ # 32 is whitespace
+ dir_tensor = torch.full((MAX_LEN, ),
+ 32,
+ dtype=torch.uint8,
+ device='cuda')
+ if rank == 0:
+ tmpdir = tempfile.mkdtemp()
+ tmpdir = torch.tensor(
+ bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
+ dir_tensor[:len(tmpdir)] = tmpdir
+ dist.broadcast(dir_tensor, 0)
+ tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
+ else:
+ mmcv.mkdir_or_exist(tmpdir)
+ # dump the part result to the dir
+ mmcv.dump(result_part, osp.join(tmpdir, 'part_{}.pkl'.format(rank)))
+ dist.barrier()
+ # collect all parts
+ if rank != 0:
+ return None
+ else:
+ # load results of all parts from tmp dir
+ part_list = []
+ for i in range(world_size):
+ part_file = osp.join(tmpdir, 'part_{}.pkl'.format(i))
+ part_list.append(mmcv.load(part_file))
+ # sort the results
+ ordered_results = []
+ for res in zip(*part_list):
+ ordered_results.extend(list(res))
+ # the dataloader may pad some samples
+ ordered_results = ordered_results[:size]
+ # remove tmp dir
+ shutil.rmtree(tmpdir)
+ return ordered_results
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='MMDet test detector')
+ parser.add_argument('config', help='test config file path')
+ parser.add_argument('checkpoint', help='checkpoint file')
+ parser.add_argument('--out', help='output result file')
+ parser.add_argument(
+ '--corruptions',
+ type=str,
+ nargs='+',
+ default='benchmark',
+ choices=[
+ 'all', 'benchmark', 'noise', 'blur', 'weather', 'digital',
+ 'holdout', 'None', 'gaussian_noise', 'shot_noise', 'impulse_noise',
+ 'defocus_blur', 'glass_blur', 'motion_blur', 'zoom_blur', 'snow',
+ 'frost', 'fog', 'brightness', 'contrast', 'elastic_transform',
+ 'pixelate', 'jpeg_compression', 'speckle_noise', 'gaussian_blur',
+ 'spatter', 'saturate'
+ ],
+ help='corruptions')
+ parser.add_argument(
+ '--severities',
+ type=int,
+ nargs='+',
+ default=[0, 1, 2, 3, 4, 5],
+ help='corruption severity levels')
+ parser.add_argument(
+ '--eval',
+ type=str,
+ nargs='+',
+ choices=['proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'],
+ help='eval types')
+ parser.add_argument(
+ '--iou-thr',
+ type=float,
+ default=0.5,
+ help='IoU threshold for pascal voc evaluation')
+ parser.add_argument(
+ '--summaries',
+ type=bool,
+ default=False,
+ help='Print summaries for every corruption and severity')
+ parser.add_argument(
+ '--workers', type=int, default=32, help='workers per gpu')
+ parser.add_argument('--show', action='store_true', help='show results')
+ parser.add_argument('--tmpdir', help='tmp dir for writing some results')
+ parser.add_argument('--seed', type=int, default=None, help='random seed')
+ parser.add_argument(
+ '--launcher',
+ choices=['none', 'pytorch', 'slurm', 'mpi'],
+ default='none',
+ help='job launcher')
+ parser.add_argument('--local_rank', type=int, default=0)
+ parser.add_argument(
+ '--final-prints',
+ type=str,
+ nargs='+',
+ choices=['P', 'mPC', 'rPC'],
+ default='mPC',
+ help='corruption benchmark metric to print at the end')
+ parser.add_argument(
+ '--final-prints-aggregate',
+ type=str,
+ choices=['all', 'benchmark'],
+ default='benchmark',
+ help='aggregate all results or only those for benchmark corruptions')
+ args = parser.parse_args()
+ if 'LOCAL_RANK' not in os.environ:
+ os.environ['LOCAL_RANK'] = str(args.local_rank)
+ return args
+
+
+def main():
+ args = parse_args()
+
+ assert args.out or args.show, \
+ ('Please specify at least one operation (save or show the results) '
+ 'with the argument "--out" or "--show"')
+
+ if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
+ raise ValueError('The output file must be a pkl file.')
+
+ cfg = mmcv.Config.fromfile(args.config)
+ # set cudnn_benchmark
+ if cfg.get('cudnn_benchmark', False):
+ torch.backends.cudnn.benchmark = True
+ cfg.model.pretrained = None
+ cfg.data.test.test_mode = True
+ if args.workers == 0:
+ args.workers = cfg.data.workers_per_gpu
+
+ # init distributed env first, since logger depends on the dist info.
+ if args.launcher == 'none':
+ distributed = False
+ else:
+ distributed = True
+ init_dist(args.launcher, **cfg.dist_params)
+
+ # set random seeds
+ if args.seed is not None:
+ set_random_seed(args.seed)
+
+ if 'all' in args.corruptions:
+ corruptions = [
+ 'gaussian_noise', 'shot_noise', 'impulse_noise', 'defocus_blur',
+ 'glass_blur', 'motion_blur', 'zoom_blur', 'snow', 'frost', 'fog',
+ 'brightness', 'contrast', 'elastic_transform', 'pixelate',
+ 'jpeg_compression', 'speckle_noise', 'gaussian_blur', 'spatter',
+ 'saturate'
+ ]
+ elif 'benchmark' in args.corruptions:
+ corruptions = [
+ 'gaussian_noise', 'shot_noise', 'impulse_noise', 'defocus_blur',
+ 'glass_blur', 'motion_blur', 'zoom_blur', 'snow', 'frost', 'fog',
+ 'brightness', 'contrast', 'elastic_transform', 'pixelate',
+ 'jpeg_compression'
+ ]
+ elif 'noise' in args.corruptions:
+ corruptions = ['gaussian_noise', 'shot_noise', 'impulse_noise']
+ elif 'blur' in args.corruptions:
+ corruptions = [
+ 'defocus_blur', 'glass_blur', 'motion_blur', 'zoom_blur'
+ ]
+ elif 'weather' in args.corruptions:
+ corruptions = ['snow', 'frost', 'fog', 'brightness']
+ elif 'digital' in args.corruptions:
+ corruptions = [
+ 'contrast', 'elastic_transform', 'pixelate', 'jpeg_compression'
+ ]
+ elif 'holdout' in args.corruptions:
+ corruptions = ['speckle_noise', 'gaussian_blur', 'spatter', 'saturate']
+ elif 'None' in args.corruptions:
+ corruptions = ['None']
+ args.severities = [0]
+ else:
+ corruptions = args.corruptions
+
+ aggregated_results = {}
+ for corr_i, corruption in enumerate(corruptions):
+ aggregated_results[corruption] = {}
+ for sev_i, corruption_severity in enumerate(args.severities):
+ # evaluate severity 0 (= no corruption) only once
+ if corr_i > 0 and corruption_severity == 0:
+ aggregated_results[corruption][0] = \
+ aggregated_results[corruptions[0]][0]
+ continue
+
+ test_data_cfg = copy.deepcopy(cfg.data.test)
+ # assign corruption and severity
+ if corruption_severity > 0:
+ corruption_trans = dict(
+ type='Corrupt',
+ corruption=corruption,
+ severity=corruption_severity)
+ # TODO: hard coded "1", we assume that the first step is
+ # loading images, which needs to be fixed in the future
+ test_data_cfg['pipeline'].insert(1, corruption_trans)
+
+ # print info
+ print('\nTesting {} at severity {}'.format(corruption,
+ corruption_severity))
+
+ # build the dataloader
+ # TODO: support multiple images per gpu
+ # (only minor changes are needed)
+ dataset = build_dataset(test_data_cfg)
+ data_loader = build_dataloader(
+ dataset,
+ imgs_per_gpu=1,
+ workers_per_gpu=args.workers,
+ dist=distributed,
+ shuffle=False)
+
+ # build the model and load checkpoint
+ model = build_detector(
+ cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
+ fp16_cfg = cfg.get('fp16', None)
+ if fp16_cfg is not None:
+ wrap_fp16_model(model)
+ checkpoint = load_checkpoint(
+ model, args.checkpoint, map_location='cpu')
+ # old versions did not save class info in checkpoints,
+ # this walkaround is for backward compatibility
+ if 'CLASSES' in checkpoint['meta']:
+ model.CLASSES = checkpoint['meta']['CLASSES']
+ else:
+ model.CLASSES = dataset.CLASSES
+
+ if not distributed:
+ model = MMDataParallel(model, device_ids=[0])
+ outputs = single_gpu_test(model, data_loader, args.show)
+ else:
+ model = MMDistributedDataParallel(model.cuda())
+ outputs = multi_gpu_test(model, data_loader, args.tmpdir)
+
+ rank, _ = get_dist_info()
+ if args.out and rank == 0:
+ eval_results_filename = (
+ osp.splitext(args.out)[0] + '_results' +
+ osp.splitext(args.out)[1])
+ mmcv.dump(outputs, args.out)
+ eval_types = args.eval
+ if cfg.dataset_type == 'VOCDataset':
+ if eval_types:
+ for eval_type in eval_types:
+ if eval_type == 'bbox':
+ test_dataset = mmcv.runner.obj_from_dict(
+ cfg.data.test, datasets)
+ logger = 'print' if args.summaries else None
+ mean_ap, eval_results = \
+ voc_eval_with_return(
+ args.out, test_dataset,
+ args.iou_thr, logger)
+ aggregated_results[corruption][
+ corruption_severity] = eval_results
+ else:
+ print('\nOnly "bbox" evaluation \
+ is supported for pascal voc')
+ else:
+ if eval_types:
+ print('Starting evaluate {}'.format(
+ ' and '.join(eval_types)))
+ if eval_types == ['proposal_fast']:
+ result_file = args.out
+ else:
+ if not isinstance(outputs[0], dict):
+ result_files = results2json(
+ dataset, outputs, args.out)
+ else:
+ for name in outputs[0]:
+ print('\nEvaluating {}'.format(name))
+ outputs_ = [out[name] for out in outputs]
+ result_file = args.out
+ + '.{}'.format(name)
+ result_files = results2json(
+ dataset, outputs_, result_file)
+ eval_results = coco_eval_with_return(
+ result_files, eval_types, dataset.coco)
+ aggregated_results[corruption][
+ corruption_severity] = eval_results
+ else:
+ print('\nNo task was selected for evaluation;'
+ '\nUse --eval to select a task')
+
+ # save results after each evaluation
+ mmcv.dump(aggregated_results, eval_results_filename)
+
+ # print filan results
+ print('\nAggregated results:')
+ prints = args.final_prints
+ aggregate = args.final_prints_aggregate
+
+ if cfg.dataset_type == 'VOCDataset':
+ get_results(
+ eval_results_filename,
+ dataset='voc',
+ prints=prints,
+ aggregate=aggregate)
+ else:
+ get_results(
+ eval_results_filename,
+ dataset='coco',
+ prints=prints,
+ aggregate=aggregate)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/cv/instance_segmentation/SOLO/pytorch/tools/train.py b/cv/instance_segmentation/SOLO/pytorch/tools/train.py
new file mode 100644
index 000000000..7f89795d5
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tools/train.py
@@ -0,0 +1,125 @@
+from __future__ import division
+import argparse
+import os
+import os.path as osp
+import time
+
+import mmcv
+import torch
+from mmcv import Config
+from mmcv.runner import init_dist
+
+from mmdet import __version__
+from mmdet.apis import set_random_seed, train_detector
+from mmdet.datasets import build_dataset
+from mmdet.models import build_detector
+from mmdet.utils import get_root_logger
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='Train a detector')
+ parser.add_argument('config', help='train config file path')
+ parser.add_argument('--work_dir', help='the dir to save logs and models')
+ parser.add_argument(
+ '--resume_from', help='the checkpoint file to resume from')
+ parser.add_argument(
+ '--validate',
+ action='store_true',
+ help='whether to evaluate the checkpoint during training')
+ parser.add_argument(
+ '--gpus',
+ type=int,
+ default=1,
+ help='number of gpus to use '
+ '(only applicable to non-distributed training)')
+ parser.add_argument('--seed', type=int, default=None, help='random seed')
+ parser.add_argument(
+ '--deterministic',
+ action='store_true',
+ help='whether to set deterministic options for CUDNN backend.')
+ parser.add_argument(
+ '--launcher',
+ choices=['none', 'pytorch', 'slurm', 'mpi'],
+ default='none',
+ help='job launcher')
+ parser.add_argument('--local_rank', type=int, default=0)
+ parser.add_argument(
+ '--autoscale-lr',
+ action='store_true',
+ help='automatically scale lr with the number of gpus')
+ args = parser.parse_args()
+ if 'LOCAL_RANK' not in os.environ:
+ os.environ['LOCAL_RANK'] = str(args.local_rank)
+
+ return args
+
+
+def main():
+ args = parse_args()
+
+ cfg = Config.fromfile(args.config)
+ # set cudnn_benchmark
+ if cfg.get('cudnn_benchmark', False):
+ torch.backends.cudnn.benchmark = True
+ # update configs according to CLI args
+ if args.work_dir is not None:
+ cfg.work_dir = args.work_dir
+ if args.resume_from is not None:
+ cfg.resume_from = args.resume_from
+ cfg.gpus = args.gpus
+
+ if args.autoscale_lr:
+ # apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
+ cfg.optimizer['lr'] = cfg.optimizer['lr'] * cfg.gpus / 8
+
+ # init distributed env first, since logger depends on the dist info.
+ if args.launcher == 'none':
+ distributed = False
+ else:
+ distributed = True
+ init_dist(args.launcher, **cfg.dist_params)
+
+ # create work_dir
+ mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
+ # init the logger before other steps
+ timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
+ log_file = osp.join(cfg.work_dir, '{}.log'.format(timestamp))
+ logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)
+
+ # log some basic info
+ logger.info('Distributed training: {}'.format(distributed))
+ logger.info('MMDetection Version: {}'.format(__version__))
+ logger.info('Config:\n{}'.format(cfg.text))
+
+ # set random seeds
+ if args.seed is not None:
+ logger.info('Set random seed to {}, deterministic: {}'.format(
+ args.seed, args.deterministic))
+ set_random_seed(args.seed, deterministic=args.deterministic)
+
+ model = build_detector(
+ cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg)
+
+ datasets = [build_dataset(cfg.data.train)]
+ if len(cfg.workflow) == 2:
+ datasets.append(build_dataset(cfg.data.val))
+ if cfg.checkpoint_config is not None:
+ # save mmdet version, config file content and class names in
+ # checkpoints as meta data
+ cfg.checkpoint_config.meta = dict(
+ mmdet_version=__version__,
+ config=cfg.text,
+ CLASSES=datasets[0].CLASSES)
+ # add an attribute for visualization convenience
+ model.CLASSES = datasets[0].CLASSES
+ train_detector(
+ model,
+ datasets,
+ cfg,
+ distributed=distributed,
+ validate=args.validate,
+ timestamp=timestamp)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/cv/instance_segmentation/SOLO/pytorch/tools/upgrade_model_version.py b/cv/instance_segmentation/SOLO/pytorch/tools/upgrade_model_version.py
new file mode 100644
index 000000000..00bcdf44a
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tools/upgrade_model_version.py
@@ -0,0 +1,42 @@
+import argparse
+import re
+from collections import OrderedDict
+
+import torch
+
+
+def convert(in_file, out_file):
+ """Convert keys in checkpoints.
+
+ There can be some breaking changes during the development of mmdetection,
+ and this tool is used for upgrading checkpoints trained with old versions
+ to the latest one.
+ """
+ checkpoint = torch.load(in_file)
+ in_state_dict = checkpoint.pop('state_dict')
+ out_state_dict = OrderedDict()
+ for key, val in in_state_dict.items():
+ # Use ConvModule instead of nn.Conv2d in RetinaNet
+ # cls_convs.0.weight -> cls_convs.0.conv.weight
+ m = re.search(r'(cls_convs|reg_convs).\d.(weight|bias)', key)
+ if m is not None:
+ param = m.groups()[1]
+ new_key = key.replace(param, 'conv.{}'.format(param))
+ out_state_dict[new_key] = val
+ continue
+
+ out_state_dict[key] = val
+ checkpoint['state_dict'] = out_state_dict
+ torch.save(checkpoint, out_file)
+
+
+def main():
+ parser = argparse.ArgumentParser(description='Upgrade model version')
+ parser.add_argument('in_file', help='input checkpoint file')
+ parser.add_argument('out_file', help='output checkpoint file')
+ args = parser.parse_args()
+ convert(args.in_file, args.out_file)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/cv/instance_segmentation/SOLO/pytorch/tools/voc_eval.py b/cv/instance_segmentation/SOLO/pytorch/tools/voc_eval.py
new file mode 100644
index 000000000..be0bde6db
--- /dev/null
+++ b/cv/instance_segmentation/SOLO/pytorch/tools/voc_eval.py
@@ -0,0 +1,47 @@
+from argparse import ArgumentParser
+
+import mmcv
+
+from mmdet import datasets
+from mmdet.core import eval_map
+
+
+def voc_eval(result_file, dataset, iou_thr=0.5, nproc=4):
+ det_results = mmcv.load(result_file)
+ annotations = [dataset.get_ann_info(i) for i in range(len(dataset))]
+ if hasattr(dataset, 'year') and dataset.year == 2007:
+ dataset_name = 'voc07'
+ else:
+ dataset_name = dataset.CLASSES
+ eval_map(
+ det_results,
+ annotations,
+ scale_ranges=None,
+ iou_thr=iou_thr,
+ dataset=dataset_name,
+ logger='print',
+ nproc=nproc)
+
+
+def main():
+ parser = ArgumentParser(description='VOC Evaluation')
+ parser.add_argument('result', help='result file path')
+ parser.add_argument('config', help='config file path')
+ parser.add_argument(
+ '--iou-thr',
+ type=float,
+ default=0.5,
+ help='IoU threshold for evaluation')
+ parser.add_argument(
+ '--nproc',
+ type=int,
+ default=4,
+ help='Processes to be used for computing mAP')
+ args = parser.parse_args()
+ cfg = mmcv.Config.fromfile(args.config)
+ test_dataset = mmcv.runner.obj_from_dict(cfg.data.test, datasets)
+ voc_eval(args.result, test_dataset, args.iou_thr, args.nproc)
+
+
+if __name__ == '__main__':
+ main()
--
Gitee
From 9bac65c93777edb31d76c69a9c4b4b08fe2809ff Mon Sep 17 00:00:00 2001
From: "li.ding"
Date: Fri, 10 Mar 2023 20:06:13 +0800
Subject: [PATCH 2/4] Add pytorch SOLO
---
cv/instance_segmentation/SOLO.zip | Bin 0 -> 604461 bytes
.../SOLO/pytorch/.github/CODE_OF_CONDUCT.md | 76 -
.../SOLO/pytorch/.github/CONTRIBUTING.md | 53 -
.../pytorch/.github/ISSUE_TEMPLATE/config.yml | 1 -
.../.github/ISSUE_TEMPLATE/error-report.md | 41 -
.../.github/ISSUE_TEMPLATE/feature_request.md | 22 -
.../ISSUE_TEMPLATE/general_questions.md | 10 -
.../SOLO/pytorch/.gitignore | 15 +-
.../SOLO/pytorch/.gitmodules | 3 -
.../SOLO/pytorch/.isort.cfg | 8 -
.../SOLO/pytorch/.pre-commit-config.yaml | 21 -
.../SOLO/pytorch/.style.yapf | 4 -
.../SOLO/pytorch/.travis.yml | 43 -
cv/instance_segmentation/SOLO/pytorch/LICENSE | 228 +-
.../SOLO/pytorch/README.md | 43 +-
.../datasets/coco_instance.py} | 81 +-
.../pytorch/configs/_base_/default_runtime.py | 27 +
.../configs/_base_/schedules/schedule_1x.py | 11 +
.../SOLO/pytorch/configs/solo/README.md | 54 +
...ecoupled_solo_light_dcn_r50_fpn_8gpu_3x.py | 136 -
...> decoupled_solo_light_r50_fpn_3x_coco.py} | 110 +-
.../solo/decoupled_solo_r101_fpn_8gpu_3x.py | 130 -
.../solo/decoupled_solo_r50_fpn_1x_coco.py | 28 +
.../solo/decoupled_solo_r50_fpn_3x_coco.py | 25 +
.../solo/decoupled_solo_r50_fpn_8gpu_1x.py | 126 -
.../solo/decoupled_solo_r50_fpn_8gpu_3x.py | 130 -
.../SOLO/pytorch/configs/solo/metafile.yml | 115 +
.../configs/solo/solo_r101_fpn_8gpu_3x.py | 130 -
.../configs/solo/solo_r50_fpn_1x_coco.py | 53 +
.../configs/solo/solo_r50_fpn_3x_coco.py | 28 +
.../configs/solo/solo_r50_fpn_8gpu_3x.py | 130 -
.../SOLO/pytorch/docker/Dockerfile | 25 +
.../SOLO/pytorch/docker/serve/Dockerfile | 49 +
.../pytorch/docker/serve/config.properties | 5 +
.../SOLO/pytorch/docker/serve/entrypoint.sh | 12 +
.../SOLO/pytorch/mmcv/__init__.py | 11 +
.../SOLO/pytorch/mmcv/cnn/__init__.py | 41 +
.../SOLO/pytorch/mmcv/cnn/alexnet.py | 61 +
.../SOLO/pytorch/mmcv/cnn/bricks/__init__.py | 35 +
.../pytorch/mmcv/cnn/bricks/activation.py | 92 +
.../ops => mmcv/cnn/bricks}/context_block.py | 39 +-
.../SOLO/pytorch/mmcv/cnn/bricks/conv.py | 44 +
.../cnn/bricks/conv2d_adaptive_padding.py | 62 +
.../utils => mmcv/cnn/bricks}/conv_module.py | 179 +-
.../SOLO/pytorch/mmcv/cnn/bricks/conv_ws.py | 148 +
.../bricks/depthwise_separable_conv_module.py | 96 +
.../SOLO/pytorch/mmcv/cnn/bricks/drop.py | 65 +
.../cnn/bricks}/generalized_attention.py | 81 +-
.../SOLO/pytorch/mmcv/cnn/bricks/hsigmoid.py | 34 +
.../SOLO/pytorch/mmcv/cnn/bricks/hswish.py | 29 +
.../SOLO/pytorch/mmcv/cnn/bricks/non_local.py | 306 ++
.../SOLO/pytorch/mmcv/cnn/bricks/norm.py | 144 +
.../SOLO/pytorch/mmcv/cnn/bricks/padding.py | 36 +
.../SOLO/pytorch/mmcv/cnn/bricks/plugin.py | 88 +
.../SOLO/pytorch/mmcv/cnn/bricks/registry.py | 16 +
.../models/utils => mmcv/cnn/bricks}/scale.py | 10 +-
.../SOLO/pytorch/mmcv/cnn/bricks/swish.py | 25 +
.../pytorch/mmcv/cnn/bricks/transformer.py | 595 ++++
.../SOLO/pytorch/mmcv/cnn/bricks/upsample.py | 84 +
.../SOLO/pytorch/mmcv/cnn/bricks/wrappers.py | 180 ++
.../SOLO/pytorch/mmcv/cnn/builder.py | 30 +
.../SOLO/pytorch/mmcv/cnn/resnet.py | 316 ++
.../SOLO/pytorch/mmcv/cnn/utils/__init__.py | 19 +
.../cnn}/utils/flops_counter.py | 481 ++--
.../pytorch/mmcv/cnn/utils/fuse_conv_bn.py | 59 +
.../SOLO/pytorch/mmcv/cnn/utils/sync_bn.py | 59 +
.../pytorch/mmcv/cnn/utils/weight_init.py | 684 +++++
.../SOLO/pytorch/mmcv/cnn/vgg.py | 175 ++
.../SOLO/pytorch/mmcv/engine/__init__.py | 8 +
.../SOLO/pytorch/mmcv/engine/test.py | 202 ++
.../SOLO/pytorch/mmcv/fileio/__init__.py | 11 +
.../SOLO/pytorch/mmcv/fileio/file_client.py | 1148 ++++++++
.../pytorch/mmcv/fileio/handlers/__init__.py | 7 +
.../SOLO/pytorch/mmcv/fileio/handlers/base.py | 30 +
.../mmcv/fileio/handlers/json_handler.py | 36 +
.../mmcv/fileio/handlers/pickle_handler.py | 28 +
.../mmcv/fileio/handlers/yaml_handler.py | 24 +
.../SOLO/pytorch/mmcv/fileio/io.py | 151 +
.../SOLO/pytorch/mmcv/fileio/parse.py | 97 +
.../SOLO/pytorch/mmcv/image/__init__.py | 28 +
.../SOLO/pytorch/mmcv/image/colorspace.py | 306 ++
.../SOLO/pytorch/mmcv/image/geometric.py | 728 +++++
.../SOLO/pytorch/mmcv/image/io.py | 258 ++
.../SOLO/pytorch/mmcv/image/misc.py | 44 +
.../SOLO/pytorch/mmcv/image/photometric.py | 428 +++
.../pytorch/mmcv/model_zoo/deprecated.json | 6 +
.../SOLO/pytorch/mmcv/model_zoo/mmcls.json | 31 +
.../pytorch/mmcv/model_zoo/open_mmlab.json | 50 +
.../SOLO/pytorch/mmcv/ops/__init__.py | 13 +
.../SOLO/pytorch/mmcv/ops/csrc/README.md | 169 ++
.../csrc/common/cuda/common_cuda_helper.hpp | 112 +
.../ops/csrc/common/cuda/nms_cuda_kernel.cuh | 74 +
.../ops/csrc/common/cuda/nms_rotated_cuda.cuh | 135 +
.../common/cuda/roi_align_cuda_kernel.cuh | 212 ++
.../csrc/common/cuda/roi_pool_cuda_kernel.cuh | 93 +
.../cuda/sigmoid_focal_loss_cuda_kernel.cuh | 71 +
.../cuda/softmax_focal_loss_cuda_kernel.cuh | 72 +
.../csrc/common/cuda/sync_bn_cuda_kernel.cuh | 331 +++
.../ops/csrc/common/pytorch_cpp_helper.hpp | 24 +
.../ops/csrc/common/pytorch_cuda_helper.hpp | 19 +
.../ops/csrc/pytorch/cuda/focal_loss_cuda.cu | 111 +
.../mmcv/ops/csrc/pytorch/cuda/nms_cuda.cu | 53 +
.../ops/csrc/pytorch/cuda/roi_align_cuda.cu | 58 +
.../ops/csrc/pytorch/cuda/roi_pool_cuda.cu | 50 +
.../ops/csrc/pytorch/cuda/sync_bn_cuda.cu | 110 +
.../mmcv/ops/csrc/pytorch/focal_loss.cpp | 131 +
.../ops/csrc/pytorch/info.cpp} | 22 +-
.../pytorch/mmcv/ops/csrc/pytorch/nms.cpp | 261 ++
.../pytorch/mmcv/ops/csrc/pytorch/pybind.cpp | 131 +
.../mmcv/ops/csrc/pytorch/roi_align.cpp | 130 +
.../mmcv/ops/csrc/pytorch/roi_align_cpu.cpp | 431 +++
.../mmcv/ops/csrc/pytorch/roi_pool.cpp | 67 +
.../pytorch/mmcv/ops/csrc/pytorch/sync_bn.cpp | 159 ++
.../pytorch/mmcv/ops/deprecated_wrappers.py | 43 +
.../SOLO/pytorch/mmcv/ops/focal_loss.py | 212 ++
.../SOLO/pytorch/mmcv/ops/info.py | 36 +
.../SOLO/pytorch/mmcv/ops/nms.py | 417 +++
.../SOLO/pytorch/mmcv/ops/point_sample.py | 336 +++
.../SOLO/pytorch/mmcv/ops/roi_align.py | 223 ++
.../SOLO/pytorch/mmcv/ops/roi_pool.py | 86 +
.../SOLO/pytorch/mmcv/ops/sync_bn.py | 279 ++
.../SOLO/pytorch/mmcv/parallel/__init__.py | 13 +
.../SOLO/pytorch/mmcv/parallel/_functions.py | 79 +
.../SOLO/pytorch/mmcv/parallel/collate.py | 84 +
.../pytorch/mmcv/parallel/data_container.py | 89 +
.../pytorch/mmcv/parallel/data_parallel.py | 89 +
.../SOLO/pytorch/mmcv/parallel/distributed.py | 112 +
.../mmcv/parallel/distributed_deprecated.py | 70 +
.../SOLO/pytorch/mmcv/parallel/registry.py | 8 +
.../pytorch/mmcv/parallel/scatter_gather.py | 59 +
.../SOLO/pytorch/mmcv/parallel/utils.py | 20 +
.../SOLO/pytorch/mmcv/runner/__init__.py | 47 +
.../SOLO/pytorch/mmcv/runner/base_module.py | 195 ++
.../SOLO/pytorch/mmcv/runner/base_runner.py | 542 ++++
.../SOLO/pytorch/mmcv/runner/builder.py | 24 +
.../SOLO/pytorch/mmcv/runner/checkpoint.py | 707 +++++
.../mmcv/runner/default_constructor.py | 44 +
.../SOLO/pytorch/mmcv/runner/dist_utils.py | 164 ++
.../pytorch/mmcv/runner/epoch_based_runner.py | 187 ++
.../runner/fp16_utils.py} | 312 +-
.../pytorch/mmcv/runner/hooks/__init__.py | 29 +
.../pytorch/mmcv/runner/hooks/checkpoint.py | 167 ++
.../SOLO/pytorch/mmcv/runner/hooks/closure.py | 11 +
.../SOLO/pytorch/mmcv/runner/hooks/ema.py | 89 +
.../pytorch/mmcv/runner/hooks/evaluation.py | 509 ++++
.../SOLO/pytorch/mmcv/runner/hooks/hook.py | 92 +
.../pytorch/mmcv/runner/hooks/iter_timer.py | 18 +
.../mmcv/runner/hooks/logger/__init__.py | 15 +
.../pytorch/mmcv/runner/hooks/logger/base.py | 166 ++
.../mmcv/runner/hooks/logger/dvclive.py | 58 +
.../mmcv/runner/hooks/logger/mlflow.py | 78 +
.../mmcv/runner/hooks/logger/neptune.py | 82 +
.../pytorch/mmcv/runner/hooks/logger/pavi.py | 117 +
.../mmcv/runner/hooks/logger/tensorboard.py | 57 +
.../pytorch/mmcv/runner/hooks/logger/text.py | 256 ++
.../pytorch/mmcv/runner/hooks/logger/wandb.py | 56 +
.../pytorch/mmcv/runner/hooks/lr_updater.py | 670 +++++
.../SOLO/pytorch/mmcv/runner/hooks/memory.py | 25 +
.../mmcv/runner/hooks/momentum_updater.py | 493 ++++
.../pytorch/mmcv/runner/hooks/optimizer.py | 508 ++++
.../pytorch/mmcv/runner/hooks/profiler.py | 180 ++
.../pytorch/mmcv/runner/hooks/sampler_seed.py | 20 +
.../pytorch/mmcv/runner/hooks/sync_buffer.py | 22 +
.../pytorch/mmcv/runner/iter_based_runner.py | 273 ++
.../SOLO/pytorch/mmcv/runner/log_buffer.py | 41 +
.../pytorch/mmcv/runner/optimizer/__init__.py | 9 +
.../pytorch/mmcv/runner/optimizer/builder.py | 44 +
.../runner/optimizer/default_constructor.py | 247 ++
.../SOLO/pytorch/mmcv/runner/priority.py | 60 +
.../SOLO/pytorch/mmcv/runner/utils.py | 93 +
.../SOLO/pytorch/mmcv/utils/__init__.py | 69 +
.../SOLO/pytorch/mmcv/utils/config.py | 688 +++++
.../collect_env.py => mmcv/utils/env.py} | 81 +-
.../SOLO/pytorch/mmcv/utils/ext_loader.py | 71 +
.../SOLO/pytorch/mmcv/utils/logging.py | 110 +
.../SOLO/pytorch/mmcv/utils/misc.py | 377 +++
.../SOLO/pytorch/mmcv/utils/parrots_jit.py | 41 +
.../pytorch/mmcv/utils/parrots_wrapper.py | 107 +
.../SOLO/pytorch/mmcv/utils/path.py | 101 +
.../SOLO/pytorch/mmcv/utils/progressbar.py | 208 ++
.../SOLO/pytorch/mmcv/utils/registry.py | 315 ++
.../SOLO/pytorch/mmcv/utils/testing.py | 140 +
.../SOLO/pytorch/mmcv/utils/timer.py | 118 +
.../SOLO/pytorch/mmcv/utils/trace.py | 23 +
.../SOLO/pytorch/mmcv/utils/version_utils.py | 90 +
.../SOLO/pytorch/mmcv/version.py | 35 +
.../SOLO/pytorch/mmdet/__init__.py | 26 +
.../SOLO/pytorch/mmdet/apis/__init__.py | 11 +-
.../SOLO/pytorch/mmdet/apis/inference.py | 363 ++-
.../pytorch/{tools => mmdet/apis}/test.py | 221 +-
.../SOLO/pytorch/mmdet/apis/train.py | 417 ++-
.../SOLO/pytorch/mmdet/core/__init__.py | 5 +-
.../pytorch/mmdet/core/anchor/__init__.py | 20 +-
.../mmdet/core/anchor/anchor_generator.py | 858 +++++-
.../mmdet/core/anchor/anchor_target.py | 188 --
.../SOLO/pytorch/mmdet/core/anchor/builder.py | 19 +
.../mmdet/core/anchor/guided_anchor_target.py | 287 --
.../mmdet/core/anchor/point_generator.py | 235 +-
.../pytorch/mmdet/core/anchor/point_target.py | 165 --
.../SOLO/pytorch/mmdet/core/anchor/utils.py | 72 +
.../SOLO/pytorch/mmdet/core/bbox/__init__.py | 38 +-
.../mmdet/core/bbox/assign_sampling.py | 33 -
.../mmdet/core/bbox/assigners/__init__.py | 13 +-
.../bbox/assigners/approx_max_iou_assigner.py | 49 +-
.../core/bbox/assigners/assign_result.py | 70 +-
.../core/bbox/assigners/atss_assigner.py | 101 +-
.../core/bbox/assigners/base_assigner.py | 4 +-
.../bbox/assigners/center_region_assigner.py | 336 +++
.../core/bbox/assigners/grid_assigner.py | 156 +
.../core/bbox/assigners/hungarian_assigner.py | 146 +
.../bbox/assigners/mask_hungarian_assigner.py | 132 +
.../core/bbox/assigners/max_iou_assigner.py | 79 +-
.../core/bbox/assigners/point_assigner.py | 20 +-
.../core/bbox/assigners/region_assigner.py | 222 ++
.../core/bbox/assigners/sim_ota_assigner.py | 257 ++
.../bbox/assigners/task_aligned_assigner.py | 151 +
.../core/bbox/assigners/uniform_assigner.py | 135 +
.../pytorch/mmdet/core/bbox/bbox_target.py | 73 -
.../SOLO/pytorch/mmdet/core/bbox/builder.py | 21 +
.../pytorch/mmdet/core/bbox/coder/__init__.py | 15 +
.../mmdet/core/bbox/coder/base_bbox_coder.py | 18 +
.../core/bbox/coder/bucketing_bbox_coder.py | 351 +++
.../core/bbox/coder/delta_xywh_bbox_coder.py | 392 +++
.../bbox/coder/distance_point_bbox_coder.py | 63 +
.../coder/legacy_delta_xywh_bbox_coder.py | 216 ++
.../core/bbox/coder/pseudo_bbox_coder.py | 19 +
.../mmdet/core/bbox/coder/tblr_bbox_coder.py | 206 ++
.../mmdet/core/bbox/coder/yolo_bbox_coder.py | 83 +
.../SOLO/pytorch/mmdet/core/bbox/demodata.py | 29 +-
.../SOLO/pytorch/mmdet/core/bbox/geometry.py | 88 -
.../core/bbox/iou_calculators/__init__.py | 5 +
.../core/bbox/iou_calculators/builder.py | 9 +
.../bbox/iou_calculators/iou2d_calculator.py | 261 ++
.../mmdet/core/bbox/match_costs/__init__.py | 9 +
.../mmdet/core/bbox/match_costs/builder.py | 9 +
.../mmdet/core/bbox/match_costs/match_cost.py | 359 +++
.../mmdet/core/bbox/samplers/__init__.py | 7 +-
.../mmdet/core/bbox/samplers/base_sampler.py | 4 +
.../core/bbox/samplers/combined_sampler.py | 7 +-
.../samplers/instance_balanced_pos_sampler.py | 19 +-
.../bbox/samplers/iou_balanced_neg_sampler.py | 27 +-
.../core/bbox/samplers/mask_pseudo_sampler.py | 44 +
.../bbox/samplers/mask_sampling_result.py | 60 +
.../mmdet/core/bbox/samplers/ohem_sampler.py | 68 +-
.../core/bbox/samplers/pseudo_sampler.py | 22 +-
.../core/bbox/samplers/random_sampler.py | 54 +-
.../core/bbox/samplers/sampling_result.py | 49 +-
.../core/bbox/samplers/score_hlr_sampler.py | 265 ++
.../pytorch/mmdet/core/bbox/transforms.py | 341 ++-
.../mmdet/core/data_structures/__init__.py | 5 +
.../core/data_structures/general_data.py | 326 +++
.../core/data_structures/instance_data.py | 188 ++
.../pytorch/mmdet/core/evaluation/__init__.py | 23 +-
.../mmdet/core/evaluation/bbox_overlaps.py | 40 +-
.../mmdet/core/evaluation/class_names.py | 224 +-
.../mmdet/core/evaluation/coco_utils.py | 250 --
.../mmdet/core/evaluation/eval_hooks.py | 276 +-
.../pytorch/mmdet/core/evaluation/mean_ap.py | 425 ++-
.../mmdet/core/evaluation/panoptic_utils.py | 6 +
.../pytorch/mmdet/core/evaluation/recall.py | 64 +-
.../pytorch/mmdet/core/export/__init__.py | 12 +
.../mmdet/core/export/model_wrappers.py | 183 ++
.../pytorch/mmdet/core/export/onnx_helper.py | 223 ++
.../pytorch/mmdet/core/export/pytorch2onnx.py | 159 ++
.../SOLO/pytorch/mmdet/core/fp16/__init__.py | 4 -
.../SOLO/pytorch/mmdet/core/fp16/hooks.py | 127 -
.../SOLO/pytorch/mmdet/core/fp16/utils.py | 23 -
.../SOLO/pytorch/mmdet/core/hook/__init__.py | 17 +
.../pytorch/mmdet/core/hook/checkloss_hook.py | 24 +
.../SOLO/pytorch/mmdet/core/hook/ema.py | 130 +
.../mmdet/core/hook/memory_profiler_hook.py | 55 +
.../mmdet/core/hook/set_epoch_info_hook.py | 15 +
.../pytorch/mmdet/core/hook/sync_norm_hook.py | 52 +
.../mmdet/core/hook/sync_random_size_hook.py | 72 +
.../mmdet/core/hook/wandblogger_hook.py | 587 ++++
.../mmdet/core/hook/yolox_lrupdater_hook.py | 67 +
.../mmdet/core/hook/yolox_mode_switch_hook.py | 52 +
.../SOLO/pytorch/mmdet/core/mask/__init__.py | 9 +-
.../pytorch/mmdet/core/mask/mask_target.py | 124 +-
.../pytorch/mmdet/core/mask/structures.py | 1102 +++++++
.../SOLO/pytorch/mmdet/core/mask/utils.py | 63 +-
.../pytorch/mmdet/core/optimizers/__init__.py | 9 +
.../pytorch/mmdet/core/optimizers/builder.py | 33 +
.../layer_decay_optimizer_constructor.py | 154 +
.../mmdet/core/post_processing/__init__.py | 7 +-
.../mmdet/core/post_processing/bbox_nms.py | 193 +-
.../mmdet/core/post_processing/matrix_nms.py | 186 +-
.../mmdet/core/post_processing/merge_augs.py | 77 +-
.../SOLO/pytorch/mmdet/core/utils/__init__.py | 14 +-
.../pytorch/mmdet/core/utils/dist_utils.py | 163 +-
.../SOLO/pytorch/mmdet/core/utils/misc.py | 205 +-
.../mmdet/core/visualization/__init__.py | 9 +
.../pytorch/mmdet/core/visualization/image.py | 559 ++++
.../mmdet/core/visualization/palette.py | 63 +
.../SOLO/pytorch/mmdet/datasets/__init__.py | 23 +-
.../mmdet/datasets/api_wrappers/__init__.py | 7 +
.../mmdet/datasets/api_wrappers/coco_api.py | 47 +
.../api_wrappers/panoptic_evaluation.py | 228 ++
.../SOLO/pytorch/mmdet/datasets/builder.py | 184 +-
.../SOLO/pytorch/mmdet/datasets/cityscapes.py | 9 -
.../SOLO/pytorch/mmdet/datasets/coco.py | 605 +++-
.../SOLO/pytorch/mmdet/datasets/custom.py | 322 ++-
.../mmdet/datasets/dataset_wrappers.py | 411 ++-
.../pytorch/mmdet/datasets/loader/__init__.py | 4 -
.../mmdet/datasets/loader/build_loader.py | 70 -
.../mmdet/datasets/pipelines/__init__.py | 28 +-
.../mmdet/datasets/pipelines/compose.py | 30 +-
.../mmdet/datasets/pipelines/formating.py | 197 +-
.../mmdet/datasets/pipelines/formatting.py | 392 +++
.../mmdet/datasets/pipelines/instaboost.py | 91 -
.../mmdet/datasets/pipelines/loading.py | 549 +++-
.../mmdet/datasets/pipelines/test_aug.py | 38 -
.../mmdet/datasets/pipelines/test_time_aug.py | 121 +
.../mmdet/datasets/pipelines/transforms.py | 2541 +++++++++++++++--
.../SOLO/pytorch/mmdet/datasets/registry.py | 4 -
.../mmdet/datasets/samplers/__init__.py | 10 +
.../datasets/samplers/class_aware_sampler.py | 176 ++
.../datasets/samplers/distributed_sampler.py | 54 +
.../sampler.py => samplers/group_sampler.py} | 48 +-
.../datasets/samplers/infinite_sampler.py | 186 ++
.../SOLO/pytorch/mmdet/datasets/utils.py | 164 ++
.../SOLO/pytorch/mmdet/datasets/voc.py | 20 -
.../SOLO/pytorch/mmdet/datasets/wider_face.py | 42 -
.../SOLO/pytorch/mmdet/datasets/xml_style.py | 86 -
.../SOLO/pytorch/mmdet/models/__init__.py | 15 +-
.../mmdet/models/anchor_heads/__init__.py | 25 -
.../mmdet/models/anchor_heads/anchor_head.py | 330 ---
.../mmdet/models/anchor_heads/atss_head.py | 487 ----
.../anchor_heads/decoupled_solo_head.py | 484 ----
.../anchor_heads/decoupled_solo_light_head.py | 479 ----
.../mmdet/models/anchor_heads/fcos_head.py | 408 ---
.../mmdet/models/anchor_heads/fovea_head.py | 387 ---
.../anchor_heads/free_anchor_retina_head.py | 188 --
.../models/anchor_heads/ga_retina_head.py | 107 -
.../mmdet/models/anchor_heads/ga_rpn_head.py | 127 -
.../models/anchor_heads/guided_anchor_head.py | 621 ----
.../models/anchor_heads/reppoints_head.py | 596 ----
.../mmdet/models/anchor_heads/retina_head.py | 103 -
.../models/anchor_heads/retina_sepbn_head.py | 105 -
.../mmdet/models/anchor_heads/rpn_head.py | 104 -
.../mmdet/models/anchor_heads/solo_head.py | 433 ---
.../mmdet/models/anchor_heads/solov2_head.py | 483 ----
.../models/anchor_heads/solov2_light_head.py | 482 ----
.../mmdet/models/anchor_heads/ssd_head.py | 201 --
.../mmdet/models/backbones/__init__.py | 10 +-
.../pytorch/mmdet/models/backbones/hrnet.py | 524 ----
.../pytorch/mmdet/models/backbones/resnet.py | 536 ++--
.../pytorch/mmdet/models/backbones/resnext.py | 222 --
.../pytorch/mmdet/models/backbones/ssd_vgg.py | 153 -
.../mmdet/models/bbox_heads/__init__.py | 7 -
.../mmdet/models/bbox_heads/bbox_head.py | 282 --
.../models/bbox_heads/convfc_bbox_head.py | 187 --
.../models/bbox_heads/double_bbox_head.py | 170 --
.../SOLO/pytorch/mmdet/models/builder.py | 54 +-
.../mmdet/models/dense_heads/__init__.py | 3 +
.../mmdet/models/dense_heads/anchor_head.py | 542 ++++
.../models/dense_heads/base_dense_head.py | 526 ++++
.../models/dense_heads/base_mask_head.py | 116 +
.../models/dense_heads/dense_test_mixins.py | 206 ++
.../mmdet/models/dense_heads/solo_head.py | 1177 ++++++++
.../mmdet/models/detectors/__init__.py | 29 +-
.../pytorch/mmdet/models/detectors/atss.py | 16 -
.../pytorch/mmdet/models/detectors/base.py | 381 ++-
.../mmdet/models/detectors/cascade_rcnn.py | 520 ----
.../models/detectors/double_head_rcnn.py | 178 --
.../mmdet/models/detectors/fast_rcnn.py | 61 -
.../mmdet/models/detectors/faster_rcnn.py | 27 -
.../pytorch/mmdet/models/detectors/fcos.py | 16 -
.../pytorch/mmdet/models/detectors/fovea.py | 16 -
.../mmdet/models/detectors/grid_rcnn.py | 229 --
.../pytorch/mmdet/models/detectors/htc.py | 516 ----
.../mmdet/models/detectors/mask_rcnn.py | 31 -
.../models/detectors/mask_scoring_rcnn.py | 200 --
.../models/detectors/reppoints_detector.py | 81 -
.../mmdet/models/detectors/retinanet.py | 16 -
.../pytorch/mmdet/models/detectors/rpn.py | 97 -
.../mmdet/models/detectors/single_stage.py | 86 -
.../models/detectors/single_stage_ins.py | 96 -
.../detectors/single_stage_instance_seg.py | 343 +++
.../pytorch/mmdet/models/detectors/solo.py | 30 +-
.../pytorch/mmdet/models/detectors/solov2.py | 17 -
.../mmdet/models/detectors/test_mixins.py | 266 --
.../mmdet/models/detectors/two_stage.py | 346 ---
.../pytorch/mmdet/models/losses/__init__.py | 23 +-
.../pytorch/mmdet/models/losses/accuracy.py | 62 +-
.../mmdet/models/losses/balanced_l1_loss.py | 69 -
.../mmdet/models/losses/cross_entropy_loss.py | 103 -
.../pytorch/mmdet/models/losses/dice_loss.py | 146 +
.../pytorch/mmdet/models/losses/focal_loss.py | 178 +-
.../pytorch/mmdet/models/losses/ghm_loss.py | 171 --
.../pytorch/mmdet/models/losses/iou_loss.py | 330 ++-
.../pytorch/mmdet/models/losses/mse_loss.py | 25 -
.../mmdet/models/losses/smooth_l1_loss.py | 45 -
.../SOLO/pytorch/mmdet/models/losses/utils.py | 11 +-
.../mmdet/models/mask_heads/__init__.py | 11 -
.../mmdet/models/mask_heads/fcn_mask_head.py | 191 --
.../models/mask_heads/fused_semantic_head.py | 106 -
.../mmdet/models/mask_heads/grid_head.py | 361 ---
.../mmdet/models/mask_heads/htc_mask_head.py | 38 -
.../mmdet/models/mask_heads/mask_feat_head.py | 119 -
.../mmdet/models/mask_heads/maskiou_head.py | 190 --
.../pytorch/mmdet/models/necks/__init__.py | 9 +-
.../SOLO/pytorch/mmdet/models/necks/bfp.py | 102 -
.../SOLO/pytorch/mmdet/models/necks/fpn.py | 119 +-
.../SOLO/pytorch/mmdet/models/necks/hrfpn.py | 100 -
.../pytorch/mmdet/models/necks/nas_fpn.py | 186 --
.../pytorch/mmdet/models/plugins/__init__.py | 4 -
.../pytorch/mmdet/models/plugins/non_local.py | 114 -
.../SOLO/pytorch/mmdet/models/registry.py | 9 -
.../mmdet/models/roi_extractors/__init__.py | 3 -
.../models/roi_extractors/single_level.py | 107 -
.../mmdet/models/shared_heads/__init__.py | 3 -
.../mmdet/models/shared_heads/res_layer.py | 71 -
.../pytorch/mmdet/models/utils/__init__.py | 15 +-
.../pytorch/mmdet/models/utils/conv_ws.py | 46 -
.../SOLO/pytorch/mmdet/models/utils/norm.py | 55 -
.../pytorch/mmdet/models/utils/res_layer.py | 190 ++
.../pytorch/mmdet/models/utils/weight_init.py | 46 -
.../SOLO/pytorch/mmdet/ops/__init__.py | 21 -
.../SOLO/pytorch/mmdet/ops/dcn/__init__.py | 12 -
.../SOLO/pytorch/mmdet/ops/dcn/deform_conv.py | 431 ---
.../SOLO/pytorch/mmdet/ops/dcn/deform_pool.py | 252 --
.../mmdet/ops/dcn/src/deform_conv_cuda.cpp | 701 -----
.../ops/dcn/src/deform_conv_cuda_kernel.cu | 867 ------
.../mmdet/ops/dcn/src/deform_pool_cuda.cpp | 90 -
.../ops/dcn/src/deform_pool_cuda_kernel.cu | 364 ---
.../pytorch/mmdet/ops/masked_conv/__init__.py | 3 -
.../mmdet/ops/masked_conv/masked_conv.py | 89 -
.../masked_conv/src/masked_conv2d_cuda.cpp | 74 -
.../masked_conv/src/masked_conv2d_kernel.cu | 114 -
.../SOLO/pytorch/mmdet/ops/nms/__init__.py | 3 -
.../SOLO/pytorch/mmdet/ops/nms/nms_wrapper.py | 102 -
.../pytorch/mmdet/ops/nms/src/nms_cpu.cpp | 71 -
.../pytorch/mmdet/ops/nms/src/nms_cuda.cpp | 17 -
.../pytorch/mmdet/ops/nms/src/nms_kernel.cu | 139 -
.../mmdet/ops/nms/src/soft_nms_cpu.pyx | 127 -
.../pytorch/mmdet/ops/roi_align/__init__.py | 3 -
.../pytorch/mmdet/ops/roi_align/gradcheck.py | 30 -
.../pytorch/mmdet/ops/roi_align/roi_align.py | 87 -
.../ops/roi_align/src/roi_align_cuda.cpp | 87 -
.../ops/roi_align/src/roi_align_kernel.cu | 283 --
.../pytorch/mmdet/ops/roi_pool/__init__.py | 3 -
.../pytorch/mmdet/ops/roi_pool/gradcheck.py | 16 -
.../pytorch/mmdet/ops/roi_pool/roi_pool.py | 75 -
.../mmdet/ops/roi_pool/src/roi_pool_cuda.cpp | 86 -
.../mmdet/ops/roi_pool/src/roi_pool_kernel.cu | 157 -
.../mmdet/ops/sigmoid_focal_loss/__init__.py | 3 -
.../sigmoid_focal_loss/sigmoid_focal_loss.py | 54 -
.../src/sigmoid_focal_loss.cpp | 45 -
.../src/sigmoid_focal_loss_cuda.cu | 171 --
.../SOLO/pytorch/mmdet/ops/utils/__init__.py | 7 -
.../SOLO/pytorch/mmdet/utils/__init__.py | 19 +-
.../SOLO/pytorch/mmdet/utils/collect_env.py | 17 +
.../SOLO/pytorch/mmdet/utils/compat_config.py | 139 +
.../pytorch/mmdet/utils/contextmanagers.py | 12 +-
.../SOLO/pytorch/mmdet/utils/logger.py | 97 +-
.../SOLO/pytorch/mmdet/utils/memory.py | 213 ++
.../SOLO/pytorch/mmdet/utils/misc.py | 76 +
.../SOLO/pytorch/mmdet/utils/profiling.py | 11 +-
.../SOLO/pytorch/mmdet/utils/registry.py | 79 -
.../pytorch/mmdet/utils/replace_cfg_vals.py | 70 +
.../SOLO/pytorch/mmdet/utils/setup_env.py | 53 +
.../SOLO/pytorch/mmdet/utils/split_batch.py | 45 +
.../pytorch/mmdet/utils/util_distribution.py | 74 +
.../SOLO/pytorch/mmdet/utils/util_mixins.py | 26 +-
.../SOLO/pytorch/mmdet/utils/util_random.py | 34 +
.../SOLO/pytorch/mmdet/version.py | 19 +
.../SOLO/pytorch/requirements.txt | 11 +-
.../SOLO/pytorch/requirements/build.txt | 4 -
.../SOLO/pytorch/requirements/optional.txt | 2 -
.../SOLO/pytorch/requirements/runtime.txt | 10 -
.../SOLO/pytorch/requirements/tests.txt | 11 -
.../SOLO/pytorch/setup.py | 489 ++--
.../SOLO/pytorch/tests/async_benchmark.py | 104 -
.../SOLO/pytorch/tests/test_assigner.py | 277 --
.../SOLO/pytorch/tests/test_async.py | 78 -
.../SOLO/pytorch/tests/test_config.py | 172 --
.../SOLO/pytorch/tests/test_forward.py | 388 ---
.../SOLO/pytorch/tests/test_heads.py | 340 ---
.../SOLO/pytorch/tests/test_nms.py | 70 -
.../SOLO/pytorch/tests/test_sampler.py | 249 --
.../SOLO/pytorch/tests/test_utils.py | 9 -
.../SOLO/pytorch/tools/analyze_logs.py | 178 --
.../SOLO/pytorch/tools/coco_error_analysis.py | 174 --
.../SOLO/pytorch/tools/coco_eval.py | 30 -
.../tools/convert_datasets/pascal_voc.py | 141 -
.../SOLO/pytorch/tools/detectron2pytorch.py | 88 -
.../SOLO/pytorch/tools/dist_test.sh | 11 -
.../SOLO/pytorch/tools/dist_train.sh | 8 -
.../SOLO/pytorch/tools/get_flops.py | 55 -
.../SOLO/pytorch/tools/publish_model.py | 35 -
.../SOLO/pytorch/tools/robustness_eval.py | 256 --
.../SOLO/pytorch/tools/slurm_test.sh | 23 -
.../SOLO/pytorch/tools/slurm_train.sh | 23 -
.../SOLO/pytorch/tools/test_ins.py | 257 --
.../SOLO/pytorch/tools/test_ins_vis.py | 296 --
.../SOLO/pytorch/tools/test_robustness.py | 453 ---
.../SOLO/pytorch/tools/train.py | 125 -
.../pytorch/tools/upgrade_model_version.py | 42 -
.../SOLO/pytorch/tools/voc_eval.py | 47 -
.../SOLO/pytorch/train.py | 243 ++
.../SOLO/pytorch/train.sh | 17 +
.../SOLO/pytorch/train_dist.sh | 33 +
503 files changed, 46490 insertions(+), 27464 deletions(-)
create mode 100644 cv/instance_segmentation/SOLO.zip
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/.github/CODE_OF_CONDUCT.md
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/.github/CONTRIBUTING.md
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/.github/ISSUE_TEMPLATE/config.yml
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/.github/ISSUE_TEMPLATE/error-report.md
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/.github/ISSUE_TEMPLATE/feature_request.md
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/.github/ISSUE_TEMPLATE/general_questions.md
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/.gitmodules
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/.isort.cfg
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/.pre-commit-config.yaml
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/.style.yapf
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/.travis.yml
rename cv/instance_segmentation/SOLO/pytorch/configs/{solo/solo_r50_fpn_8gpu_1x.py => _base_/datasets/coco_instance.py} (44%)
create mode 100644 cv/instance_segmentation/SOLO/pytorch/configs/_base_/default_runtime.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/configs/_base_/schedules/schedule_1x.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/configs/solo/README.md
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/configs/solo/decoupled_solo_light_dcn_r50_fpn_8gpu_3x.py
rename cv/instance_segmentation/SOLO/pytorch/configs/solo/{decoupled_solo_light_r50_fpn_8gpu_3x.py => decoupled_solo_light_r50_fpn_3x_coco.py} (36%)
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/configs/solo/decoupled_solo_r101_fpn_8gpu_3x.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/configs/solo/decoupled_solo_r50_fpn_1x_coco.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/configs/solo/decoupled_solo_r50_fpn_3x_coco.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/configs/solo/decoupled_solo_r50_fpn_8gpu_1x.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/configs/solo/decoupled_solo_r50_fpn_8gpu_3x.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/configs/solo/metafile.yml
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/configs/solo/solo_r101_fpn_8gpu_3x.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/configs/solo/solo_r50_fpn_1x_coco.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/configs/solo/solo_r50_fpn_3x_coco.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/configs/solo/solo_r50_fpn_8gpu_3x.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/docker/Dockerfile
create mode 100644 cv/instance_segmentation/SOLO/pytorch/docker/serve/Dockerfile
create mode 100644 cv/instance_segmentation/SOLO/pytorch/docker/serve/config.properties
create mode 100644 cv/instance_segmentation/SOLO/pytorch/docker/serve/entrypoint.sh
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/alexnet.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/bricks/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/bricks/activation.py
rename cv/instance_segmentation/SOLO/pytorch/{mmdet/ops => mmcv/cnn/bricks}/context_block.py (69%)
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/bricks/conv.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/bricks/conv2d_adaptive_padding.py
rename cv/instance_segmentation/SOLO/pytorch/{mmdet/models/utils => mmcv/cnn/bricks}/conv_module.py (35%)
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/bricks/conv_ws.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/bricks/depthwise_separable_conv_module.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/bricks/drop.py
rename cv/instance_segmentation/SOLO/pytorch/{mmdet/models/plugins => mmcv/cnn/bricks}/generalized_attention.py (85%)
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/bricks/hsigmoid.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/bricks/hswish.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/bricks/non_local.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/bricks/norm.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/bricks/padding.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/bricks/plugin.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/bricks/registry.py
rename cv/instance_segmentation/SOLO/pytorch/{mmdet/models/utils => mmcv/cnn/bricks}/scale.py (47%)
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/bricks/swish.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/bricks/transformer.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/bricks/upsample.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/bricks/wrappers.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/builder.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/resnet.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/utils/__init__.py
rename cv/instance_segmentation/SOLO/pytorch/{mmdet => mmcv/cnn}/utils/flops_counter.py (38%)
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/utils/fuse_conv_bn.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/utils/sync_bn.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/utils/weight_init.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/cnn/vgg.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/engine/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/engine/test.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/fileio/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/fileio/file_client.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/fileio/handlers/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/fileio/handlers/base.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/fileio/handlers/json_handler.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/fileio/handlers/pickle_handler.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/fileio/handlers/yaml_handler.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/fileio/io.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/fileio/parse.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/image/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/image/colorspace.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/image/geometric.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/image/io.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/image/misc.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/image/photometric.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/model_zoo/deprecated.json
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/model_zoo/mmcls.json
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/model_zoo/open_mmlab.json
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/csrc/README.md
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/csrc/common/cuda/common_cuda_helper.hpp
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/csrc/common/cuda/nms_cuda_kernel.cuh
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/csrc/common/cuda/nms_rotated_cuda.cuh
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/csrc/common/cuda/roi_align_cuda_kernel.cuh
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/csrc/common/cuda/roi_pool_cuda_kernel.cuh
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/csrc/common/cuda/sigmoid_focal_loss_cuda_kernel.cuh
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/csrc/common/cuda/softmax_focal_loss_cuda_kernel.cuh
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/csrc/common/cuda/sync_bn_cuda_kernel.cuh
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/csrc/common/pytorch_cpp_helper.hpp
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/csrc/common/pytorch_cuda_helper.hpp
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/csrc/pytorch/cuda/focal_loss_cuda.cu
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/csrc/pytorch/cuda/nms_cuda.cu
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/csrc/pytorch/cuda/roi_align_cuda.cu
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/csrc/pytorch/cuda/roi_pool_cuda.cu
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/csrc/pytorch/cuda/sync_bn_cuda.cu
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/csrc/pytorch/focal_loss.cpp
rename cv/instance_segmentation/SOLO/pytorch/{mmdet/ops/utils/src/compiling_info.cpp => mmcv/ops/csrc/pytorch/info.cpp} (79%)
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/csrc/pytorch/nms.cpp
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/csrc/pytorch/pybind.cpp
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/csrc/pytorch/roi_align.cpp
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/csrc/pytorch/roi_align_cpu.cpp
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/csrc/pytorch/roi_pool.cpp
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/csrc/pytorch/sync_bn.cpp
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/deprecated_wrappers.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/focal_loss.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/info.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/nms.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/point_sample.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/roi_align.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/roi_pool.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/ops/sync_bn.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/parallel/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/parallel/_functions.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/parallel/collate.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/parallel/data_container.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/parallel/data_parallel.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/parallel/distributed.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/parallel/distributed_deprecated.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/parallel/registry.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/parallel/scatter_gather.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/parallel/utils.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/base_module.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/base_runner.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/builder.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/checkpoint.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/default_constructor.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/dist_utils.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/epoch_based_runner.py
rename cv/instance_segmentation/SOLO/pytorch/{mmdet/core/fp16/decorators.py => mmcv/runner/fp16_utils.py} (33%)
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/hooks/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/hooks/checkpoint.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/hooks/closure.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/hooks/ema.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/hooks/evaluation.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/hooks/hook.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/hooks/iter_timer.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/hooks/logger/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/hooks/logger/base.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/hooks/logger/dvclive.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/hooks/logger/mlflow.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/hooks/logger/neptune.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/hooks/logger/pavi.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/hooks/logger/tensorboard.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/hooks/logger/text.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/hooks/logger/wandb.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/hooks/lr_updater.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/hooks/memory.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/hooks/momentum_updater.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/hooks/optimizer.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/hooks/profiler.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/hooks/sampler_seed.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/hooks/sync_buffer.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/iter_based_runner.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/log_buffer.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/optimizer/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/optimizer/builder.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/optimizer/default_constructor.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/priority.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/runner/utils.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/utils/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/utils/config.py
rename cv/instance_segmentation/SOLO/pytorch/{tools/collect_env.py => mmcv/utils/env.py} (32%)
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/utils/ext_loader.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/utils/logging.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/utils/misc.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/utils/parrots_jit.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/utils/parrots_wrapper.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/utils/path.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/utils/progressbar.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/utils/registry.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/utils/testing.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/utils/timer.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/utils/trace.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/utils/version_utils.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmcv/version.py
rename cv/instance_segmentation/SOLO/pytorch/{tools => mmdet/apis}/test.py (44%)
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/anchor_target.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/builder.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/guided_anchor_target.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/point_target.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/anchor/utils.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assign_sampling.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/center_region_assigner.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/grid_assigner.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/hungarian_assigner.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/mask_hungarian_assigner.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/region_assigner.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/sim_ota_assigner.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/task_aligned_assigner.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/assigners/uniform_assigner.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/bbox_target.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/builder.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/coder/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/coder/base_bbox_coder.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/coder/bucketing_bbox_coder.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/coder/delta_xywh_bbox_coder.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/coder/distance_point_bbox_coder.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/coder/legacy_delta_xywh_bbox_coder.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/coder/pseudo_bbox_coder.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/coder/tblr_bbox_coder.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/coder/yolo_bbox_coder.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/geometry.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/iou_calculators/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/iou_calculators/builder.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/iou_calculators/iou2d_calculator.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/match_costs/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/match_costs/builder.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/match_costs/match_cost.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/mask_pseudo_sampler.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/mask_sampling_result.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/bbox/samplers/score_hlr_sampler.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/data_structures/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/data_structures/general_data.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/data_structures/instance_data.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/coco_utils.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/evaluation/panoptic_utils.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/export/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/export/model_wrappers.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/export/onnx_helper.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/export/pytorch2onnx.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/fp16/__init__.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/fp16/hooks.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/fp16/utils.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/hook/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/hook/checkloss_hook.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/hook/ema.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/hook/memory_profiler_hook.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/hook/set_epoch_info_hook.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/hook/sync_norm_hook.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/hook/sync_random_size_hook.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/hook/wandblogger_hook.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/hook/yolox_lrupdater_hook.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/hook/yolox_mode_switch_hook.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/mask/structures.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/optimizers/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/optimizers/builder.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/optimizers/layer_decay_optimizer_constructor.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/visualization/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/visualization/image.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/core/visualization/palette.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/api_wrappers/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/api_wrappers/coco_api.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/api_wrappers/panoptic_evaluation.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/cityscapes.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/loader/__init__.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/loader/build_loader.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/formatting.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/instaboost.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/test_aug.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/pipelines/test_time_aug.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/registry.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/samplers/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/samplers/class_aware_sampler.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/samplers/distributed_sampler.py
rename cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/{loader/sampler.py => samplers/group_sampler.py} (79%)
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/samplers/infinite_sampler.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/utils.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/voc.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/wider_face.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/datasets/xml_style.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/__init__.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/anchor_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/atss_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/decoupled_solo_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/decoupled_solo_light_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/fcos_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/fovea_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/free_anchor_retina_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/ga_retina_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/ga_rpn_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/guided_anchor_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/reppoints_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/retina_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/retina_sepbn_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/rpn_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/solo_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/solov2_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/solov2_light_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/anchor_heads/ssd_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/backbones/hrnet.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/backbones/resnext.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/backbones/ssd_vgg.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/bbox_heads/__init__.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/bbox_heads/bbox_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/bbox_heads/convfc_bbox_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/bbox_heads/double_bbox_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/dense_heads/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/dense_heads/anchor_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/dense_heads/base_dense_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/dense_heads/base_mask_head.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/dense_heads/dense_test_mixins.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/dense_heads/solo_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/atss.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/cascade_rcnn.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/double_head_rcnn.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/fast_rcnn.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/faster_rcnn.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/fcos.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/fovea.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/grid_rcnn.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/htc.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/mask_rcnn.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/mask_scoring_rcnn.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/reppoints_detector.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/retinanet.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/rpn.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/single_stage.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/single_stage_ins.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/single_stage_instance_seg.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/solov2.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/test_mixins.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/detectors/two_stage.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/balanced_l1_loss.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/cross_entropy_loss.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/dice_loss.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/ghm_loss.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/mse_loss.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/losses/smooth_l1_loss.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/__init__.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/fcn_mask_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/fused_semantic_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/grid_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/htc_mask_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/mask_feat_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/mask_heads/maskiou_head.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/necks/bfp.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/necks/hrfpn.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/necks/nas_fpn.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/plugins/__init__.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/plugins/non_local.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/registry.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/roi_extractors/__init__.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/roi_extractors/single_level.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/shared_heads/__init__.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/shared_heads/res_layer.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/conv_ws.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/norm.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/res_layer.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/models/utils/weight_init.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/__init__.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/__init__.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/deform_conv.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/deform_pool.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/src/deform_conv_cuda.cpp
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/src/deform_pool_cuda.cpp
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/dcn/src/deform_pool_cuda_kernel.cu
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/masked_conv/__init__.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/masked_conv/masked_conv.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/masked_conv/src/masked_conv2d_cuda.cpp
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/masked_conv/src/masked_conv2d_kernel.cu
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/__init__.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/nms_wrapper.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/src/nms_cpu.cpp
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/src/nms_cuda.cpp
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/src/nms_kernel.cu
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/nms/src/soft_nms_cpu.pyx
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_align/__init__.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_align/gradcheck.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_align/roi_align.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_align/src/roi_align_cuda.cpp
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_align/src/roi_align_kernel.cu
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_pool/__init__.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_pool/gradcheck.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_pool/roi_pool.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/roi_pool/src/roi_pool_kernel.cu
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/sigmoid_focal_loss/__init__.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/sigmoid_focal_loss/sigmoid_focal_loss.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss.cpp
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss_cuda.cu
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/ops/utils/__init__.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/utils/collect_env.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/utils/compat_config.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/utils/memory.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/utils/misc.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/utils/registry.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/utils/replace_cfg_vals.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/utils/setup_env.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/utils/split_batch.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/utils/util_distribution.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/utils/util_random.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/mmdet/version.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/requirements/build.txt
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/requirements/optional.txt
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/requirements/runtime.txt
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/requirements/tests.txt
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tests/async_benchmark.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tests/test_assigner.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tests/test_async.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tests/test_config.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tests/test_forward.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tests/test_heads.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tests/test_nms.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tests/test_sampler.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tests/test_utils.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/analyze_logs.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/coco_error_analysis.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/coco_eval.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/convert_datasets/pascal_voc.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/detectron2pytorch.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/dist_test.sh
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/dist_train.sh
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/get_flops.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/publish_model.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/robustness_eval.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/slurm_test.sh
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/slurm_train.sh
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/test_ins.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/test_ins_vis.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/test_robustness.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/train.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/upgrade_model_version.py
delete mode 100644 cv/instance_segmentation/SOLO/pytorch/tools/voc_eval.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/train.py
create mode 100644 cv/instance_segmentation/SOLO/pytorch/train.sh
create mode 100644 cv/instance_segmentation/SOLO/pytorch/train_dist.sh
diff --git a/cv/instance_segmentation/SOLO.zip b/cv/instance_segmentation/SOLO.zip
new file mode 100644
index 0000000000000000000000000000000000000000..bf78bb57d9197c7157c70878cc67ebac9c6c3b2a
GIT binary patch
literal 604461
zcmbSy18^)_x^-;Z&WY`u*tTU=S35KYqfq%~k&P;=g`Dd_CK`IojA8n9|Ap*D45rYCkiTqq___bYK90
z3s3+6q(7~qH8FEEGqJX@H?mTajM-p?>B7G8L5R&p!+o2{57|n;0uzg)_P3ssl*6c>
zsB1EaC?9cig@3suHJWtPd>{&!y}gyUnOINKJGKq;4(@FPBYqp~K6v{b0&h%bUtUoi
zQPE)SUu|)IRvmV(8q%L*|C_ctDuX6WUaOO#8>@vL!s&Pfyj;D%9Mip{-c5X(&DfV-gdW#>uX2Rhnbg7Wqkb+G;WYOtSnzG
z%g74IQ6`tq1v7$rhO{jBe`}>e#QpiT+f{F^A-XY6*U%q
zc5fZ#K(nwsSpjHlX8C~#laA$$2f>t9%+Ak)Pe`79a^x_ctAnMRAU0!POb{S0w>4Ov
zQKr-->8cMMf0NM+wcw*0kkM#eB%#D_%Zmf!)C!j*`@*A(M~h9ZiXy#=s(_HpSS%?<
z)fE}yM^~)qb=ga$M)A`I$=czPDT0lf8aWBDBvl=Kmc5Y@)au*3>-71AJXcS=6(SO7
zfw>J1WRSH0F(j3_u?HDcQW0BVMkV%@ZN0}OteCFnF
z!sovKR(vACRw8O4W}wAP4{41(N_)tP9JQauP$QXo_g{%C7P!D=8JANXXB@f77?jLp
zDi$QulwgY;(X<|mb0a=)=88Yf@GgYbMpRMt0saS#0079c{wLWG{zM}K8*5`T6Ni72
z?q64<|C`l1`g#sVI{%9i!GAU~)N}kYcKq)@`Ne%-eWgIos&Nh=0Dw_2008>`gSCN;
zfsKxtwS%LcwSf_>t=poKbi_J6Lg$+*hWm}NMdr#-#zStua|&M`P0=I`NZyKpY7{l5
z_*TrvYc$Srg`G1+Q3#fQei|E-?7fKlKKcYB@X~P3VLhX^P3~hWV9s-I*4X0jB{WBw
z@}&Byeb#-P&HbKzC*h(wmdSA_FVN}Bq~m>6-~>ZVGBZb}Ndewyn|#CIbCRffN7UCL
zG{dyX{0Ch31~U}0&3G9mM-5@8rf<-jeIpI&cH-DuzB_aBn$DnY3enV%Jd`S)rRty#
zRc9RN2$6k+6IF?0jN?hn{3pEjZ&}sLXJr-h*A~XC>un~OUH-egfWVt+(o-O@73|XU
zGda@V3TCzen3dWeQjqA?33z+4r4;t&3drQ0;XWdzjr28dUSC@#k5}j_`4SC8p+9<}
z=18Qc^q%DERaRQZEw`KNF)SCdkbvy5TY$N->tSkHt^LQGCxPZ|Kr{nZtbVLZ)%ICZ
zP5amR5UM^+duiT~8BTP!TDK2v{v0ui!9df$cBc8}@C^pXl3i*paRrcTb<0)!L{B(`
z2yRRpth4-tZ$femuBW}4b^(ZlNEY>nRy3BYIjrdQ%
z7zrD`oV0dLDKHcsZx=Yyw8X}NS%Q$1<0|~xeo#CIGwK8mq!R_Q1Xrxz^b#Me_))>}
zyjjJo;E^#s?O~)P*V5a;;NL!A-K5^h7CfHmZ}%gX@11{sMm~=KgEVXPZuTtdvP|1C
zZz3OT;Q%W~F`fMe`Ug)S0BXMEX8bY*vAV0!IC$fwySl20?ZAu=E3tt|S#{-hXWlaEh#QhuiyEro
zjra7WbnAhGeu+Wu≪Z)3s}7ly1lh_3CVOlNz^ZuL~}b`*`h@=ILJ}NzU#^2V_p+
z$=+;qm8|!UW^pf!Fz_CljD7cJ3&Xt-uybo*Y4w{-U8l7
zp=JOH$J7pIE-B5F7#kWcN@2W2)hI^osIyweKV*9b{|5~I)d3RzXBapbm>L;6SsMM>
znf-yH;z_gJnJ+VmFC@|ZU(EmUK{^bsUs%$T86%6%
z&7h-lhk>kldo?ULg?RlH;&FZkO18B5@)G+$?=9-T*&GKOOPha6%iqNTF$5m~=ZleW
zUvIholZl~`fsK=`rIDe|AD^IOX=Y;jMNk%c9b;Q-9VSvKa9`Ww>KUE|V98Ht@Z3&gJ_NXJ2G$IK0oEnnkmrdeiS7l_pd%8|TrwwF1&3}sn}
zA&A+Y3k-wKuzDr(ljI5~1?){p#Iwcopir6Vs`ontn<&%x(lPsuin8>==7c<
z&%o^s8wcjIagU1hR(?O99h|4E(=~T+ruzV#z1q;PE*2svSrpsh$-MoD-xDh`6xe6i
zlm%a#*#q>aP4rw~&w_)COs{a!(Do0ox*~oSw!kN?`+aSk>JKHNK}iKrtr8crRCZjD
z1S(=@U2?u4s=F+sH!P}-vfkNLS7X{^D5qH!g37S4w`sg0JpN#QI1M?>$T2+KihXBYL2lbmzUPv
zvX$YjpG=wwlIOCAqThDrP(ZNUd%E{P?K@~De4f6?4w161#4Kulo~#u^r!hdL49&=?
zB$JdfTOK?usEQw5h>9TwMU}1ZOA9z2uFP@
zp9-R0f6uQ*x@GJZ`wm&MDvdDso2+$oXQrp1{4oH1xDkb9^ClRQ7L$GT5zhE_i&Eo5
zb3WWkL)oT9_bPP<@+cs2Os+qv5r)^!f#j{z9q;9Gxs%Jin+ckX?%t>P#f7zC98FK|
z!a2$0HlyRsEj?tOYqG?x(L008?TiKzd)PH|a7${}i3|oGFUyK@#j7wsWB9&D4fq-n
ziCfZW!gmablGGL0iM_`}D1_e`t_#?`?1V|=dV3m@clg8PJ4*c_A*s)SMiW7{!-}YL
zZtTF22CI)#eTUfUwgiL<2K{r7_Rebi4mOk^5RI9=iWko=p_AWUjM|uGKL{@uUsJ
zap2?h&kG%^RjXYd$3cf91U7r7IBe;~EAd!G~SI=oK-t7OGvwVkkdg8TDK(mpOK|4;~QsIbCa
zM%k;JsNj?~Y27N|k_hLbN?Esp3|1@8To9|P{Fc9$D59Ad<>N5d0=6|6#;`;@cJc&Y
zA$!4!byaFUxGFXrS$x@HHn6P+GL;B-qzsC{Hb!(K{3ibjE9C*P$2dNZFBo=uF@Y`X
zkrhl^{|QK4Uvd(26sb^sCYjES
zRP!_&Yz7qahUSzC0KVV9-Mz&&I(+vD&T%ix5&%K5YI?3pd9CaK56R!cPX-Vv%IQJK
z@e#3W0FlYvTTGwD!n&zAjO@DfYtYL2*hc5j;D{}_!zHz5)trhB@o$h$fQoivj47+O
ziWkS5vWBy&A01TJVzH8^z6|!g>ntiAbNAYa)(tHxE5&xI%t$l!5HG-$K-TVP-0G=l
zK^OuRN_+Sq_!YFF(hqEOm?`bpY1+=CM(iDpY+LNuI8Eu_q6p_#Gq$bS?Rz;Te=%16
zOrMxqJ=Fp2xa4+1`3k9vda1NcC7-gmR<&WPBHsMlsp~R)IwUT8wk+etxbW?oq^FXt
zlLzH=Nc8#pDq^gsIT&R)(c3@oGJmJB9M+^(<5y($0~P?_-`!>Yrngo`j(WysmPWL0
zR+fj!(hh5XV{AD0H>TdDkSJbjio&$1#2%7HZudayTC$&q8vcPFuVdc``6$F8MoC34
z$7)pGcp6kxjnj5GlY*c$0jyf|TOoFhZ@hGq0lf0mCzzo#PI$JM#waSrC#($u=C)XT$hdHL3ownvMxgYhv_muXGDr
znKt~J5#sJ!s3a}+tW6Nb-E3IXqf3V1Pq_6Olcb2>Q@@01D?l@Ucyqw^HoS+t3sbo1
zw?&F0i$#Q$3iX1XFr(YXQ+=P5#UEUwH(8z7fZdNitV~a@rR*{o3Lh#}cAa>xBHu#@
zj)-H9t|DcYeEu_#+yKcanTMCMPnVidjJ9DdpgDjrVP@5(wM`0Bk57~Ts_
zGvM>b?FTSFq2sYAikwBbE{p9DG8u&hx>97T&vE!M{
zaEuq!nALyzNle3MT&O3YwKl~5Z6K_(lLYtuhsPW7q>glEKc1PN#qKo!R{(T^DBjmD
z*7y02Qx@@~Sz63j&E$fOzi%4+Tp`?X2$LYH*FT}EESlyo
zk=%xgvt!*-2FkG`17_|$I%iGS?eIPh+HCI(uU~d&BUHPuyMdRzF4!|NZb(!#;)T9X
zVM8?)LfHTTWlnMORiuiEC-zm0bC9bnl7cEME5sH8(D>RFLCTVsE{RwmUIA5z$s~cd
z=a+{SmT~+*Ev)p|0r
z$<2O}i%SKK_fdX=%z|6965t$5C_48I@
zHL(bmV%gQrH!g>9E!`Z*4s)ua)JP$>A{HV6GF@LmH=j?ssC;IJN-UH37veK`lUz
z6kP=^$-1(UmP=pB%28K^K5+3seQL3&CK4Xa?i^?WK~;*TZ+R$=q}O&eBCylqWfBUK`Jz_(!lBO8LJXs+J8Xlm$1gR(&1v4wfbsB-ayQeH;-7JbIsij
z0I?S|CW>)8pj0Y`rkDG-YBS6GZ4siPq#Y~k)CnfDH6&RL(faso28Ul(fJCTSkZ$I)
zj2z!)_+b!?Z5F@L0F9k2JVzbJWr7WWN%nN0s+Z(;$}+nXT9J@QOL^6T`jrcuOi%PE
z(ZB1QHPWKy7t>HN_(CB(Ix#TRnMM7`$kq)-O-hv(g9T!cZ#2x*09zxf?^vKYML`Cx
zLgFy7ARsIbFH+isi4QqMFaszY@bpMjE5QE_ubvbTKUxrF#35`g?jk>8A6OhV33n7F
zf(S3TcKH&(zZ2rkb|SogOO6F*y)(~`_MqRq`~+@ogHm|KP9WHv6IK5~4wFBE@6IyK
z*5+!2K506BgQzSW7PPrjN2fqixw`eE18QoQEk1Jyh{4x^eaS&+s`4=g5E*orCxjc4
zl0u&=LUwkg51)mi4mK%H>YD@QITQ4h=6=2^SWhxddj?0+so?ybtp;D
z^fhk%sn>fdFWZ8Up!E7i9a0hTf-?;dYK8OAd%hf)ELtjf?;-FmU=C?)iRpR(s^w
zI}7saK87>!Tm)rqJ?497JFiyQ=oGQV!IRpqQ-A~&*8izhZg#AeEN}KounyOZKxVOc4h&oHeeM5YSmayVE-!um2Q)E3-63Qan7E
zjDFmxVY~jU&l}Hhk&VG>_)}2e27)TdrPJ}Uu49H*AMnlP*SW}ApmvKsR}
zdEcag;wOnY^}6!;XwoGF7m+G~anbLbO2*D9BUx>;5ro%_MwvDxFcYL$%*0%x5S{^Y!+?f?Ki>geL2gbH7=G_n@UH*B5T82%y4Dfv;_t+L
zj?fd6A9;Q%q5obJS}Qxue(R#!Z0b6T(0&izKxBhGg@)u&3G9)mep!c5Qe`FbCi&14
zU=qdLJ$6(1+p4&C6
zSIvt0KgrMEk`(_YTvGa5xYT*9s6_E*|aIRsM6l4oOGBDsRo$QB)$Hy
ze2o#Wzd|d}HxX&Fl|lE|M?)_#Iv=@%Op`*j6sE%A^6cbH+BhRcdESClYZ)!i#nO}?
z*iqHvovrq|r5=sbm))FXP^>JolvT2NZ}}Ltz!?&*OHHP%XY!klv=zmAE69Er%=F>y
zi|G((aJYas9A<^bn0!~e&wy)wMecXq>MTZIi-<9ctV3!gD_X|HF~mfp}+@HroYTj
zjpSf=K1FQ|qXDy>8G6|UNh-Z)8hqPPaj+L{4=p%y44P_a(HPQf-h(P8@N3>XH;3@$
zkrKuuvUm$hrw@oq)eC6Of6N)CMV+ESfYn*nmMopjF82~k2r}o_Q!m?$xe#=OP}DRw
zO_Xq0R$E-mX)5VjG_n6sL2F9p@M5MJ_#MR-D|~w5o<{wxi+-U{ji1V_aOOLn*(W69
z99l;;$7J%T+C)Y10T1qa9oIlg^bLj8*(uJZu0zgT>H3g@ju^0yMp!}s;}$Sy%3g;|
z+ibs=K+9Md@VU!)tpR5rIgUOog84WWa0^|0t!g9Skwk!{f8BL@cM2HFQ@y>e%pHjw
zPLFQITjG@G;tQbhqZ_Bq
z^gP1W4}-#T!30`kAHe?^H?q|k>m7c@jn!Xqt3N3MT_6RH7OXuU}Zdo?GD56OO$yW~U8%WW%o{YwCs)W%8)eurL`7
zIS1&!e7A6_nBNU85PR?_Zah_QU-z&i({Mp2^l{$_XLaMBE2l+HJXY4I?3fhOh?n-S
zJU3OWR=6898Rwjg&=u2qExb2>qxo$i5=v~+B4>#JESA?3!OTw}*18XX@;TS>wtCN9
z8FjTR02G}tO|O$Q7yUgiX%NyK`6|e&`IY-*;wvq`TiC7((R`X&n%x`j|bH)(ChVIBKLFP|+?sS~CDIJXt9nsb|u0&F`fi;am>a$NJqhS!i*aP#EDo8wXQ0)NYGVS!{ROjWNb
zudpS^D+KpRxRT8HiMjl0zSnDBLExNg7ce@Rb;ZI1{8|eGZ)G*k6Z-B#%AnD8f0b2!
z7?-!kc!d