From 0ada209919c56198f9049651cb3e7723290148b9 Mon Sep 17 00:00:00 2001
From: wang-shihao21 <wangshihao21@huawei.com>
Date: Thu, 5 Sep 2024 21:12:55 +0800
Subject: [PATCH] =?UTF-8?q?mxDriving=E4=BB=A3=E7=A0=81=E4=BB=93=E7=9B=AE?=
 =?UTF-8?q?=E5=BD=95=E7=BB=93=E6=9E=84=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CMakeLists.txt                                |   6 +-
 README.md                                     |  27 +-
 bind/pybind.cpp                               |   7 +-
 docs/api/README.md                            | 914 +++++++++---------
 include/csrc/pybind.h                         |   8 +-
 .../BEVFormer/.gitignore                      |   1 -
 .../BEVFormer/LICENSE                         |   0
 .../BEVFormer/README.md                       |   0
 .../BEVFormer/README_ORI.md                   |   0
 .../BEVFormer/docs/getting_started.md         |   0
 .../BEVFormer/docs/install.md                 |   0
 .../BEVFormer/docs/prepare_dataset.md         |   0
 .../BEVFormer/mmcv_need/base_runner.py        |   0
 .../BEVFormer/mmcv_need/distributed.py        |   0
 .../BEVFormer/mmcv_need/epoch_based_runner.py |   0
 .../mmcv_need/modulated_deform_conv.py        |   0
 .../BEVFormer/mmcv_need/optimizer.py          |   0
 .../mmcv_need/points_in_polygons_npu.cpp      |   0
 .../BEVFormer/mmcv_need/runtime.txt           |   0
 .../BEVFormer/mmdet3d_need/__init__.py        |   0
 .../mmdet3d_need/nuscenes_dataset.py          |   0
 .../BEVFormer/mmdet3d_need/runtime.txt        |   0
 .../BEVFormer/mmdet_need/__init__.py          |   0
 .../BEVFormer/mmdet_need/resnet.py            |   0
 .../BEVFormer/projects/__init__.py            |   0
 .../configs/_base_/datasets/coco_instance.py  |   0
 .../_base_/datasets/kitti-3d-3class.py        |   0
 .../configs/_base_/datasets/kitti-3d-car.py   |   0
 .../configs/_base_/datasets/lyft-3d.py        |   0
 .../configs/_base_/datasets/nuim_instance.py  |   0
 .../configs/_base_/datasets/nus-3d.py         |   0
 .../configs/_base_/datasets/nus-mono3d.py     |   0
 .../_base_/datasets/range100_lyft-3d.py       |   0
 .../_base_/datasets/s3dis-3d-5class.py        |   0
 .../_base_/datasets/s3dis_seg-3d-13class.py   |   0
 .../_base_/datasets/scannet-3d-18class.py     |   0
 .../_base_/datasets/scannet_seg-3d-20class.py |   0
 .../_base_/datasets/sunrgbd-3d-10class.py     |   0
 .../_base_/datasets/waymoD5-3d-3class.py      |   0
 .../configs/_base_/datasets/waymoD5-3d-car.py |   0
 .../configs/_base_/default_runtime.py         |   0
 .../projects/configs/_base_/models/3dssd.py   |   0
 .../models/cascade_mask_rcnn_r50_fpn.py       |   0
 .../centerpoint_01voxel_second_secfpn_nus.py  |   0
 .../centerpoint_02pillar_second_secfpn_nus.py |   0
 .../projects/configs/_base_/models/fcos3d.py  |   0
 .../configs/_base_/models/groupfree3d.py      |   0
 .../projects/configs/_base_/models/h3dnet.py  |   0
 .../_base_/models/hv_pointpillars_fpn_lyft.py |   0
 .../_base_/models/hv_pointpillars_fpn_nus.py  |   0
 .../hv_pointpillars_fpn_range100_lyft.py      |   0
 .../models/hv_pointpillars_secfpn_kitti.py    |   0
 .../models/hv_pointpillars_secfpn_waymo.py    |   0
 .../_base_/models/hv_second_secfpn_kitti.py   |   0
 .../_base_/models/hv_second_secfpn_waymo.py   |   0
 .../configs/_base_/models/imvotenet_image.py  |   0
 .../_base_/models/mask_rcnn_r50_fpn.py        |   0
 .../configs/_base_/models/paconv_cuda_ssg.py  |   0
 .../configs/_base_/models/paconv_ssg.py       |   0
 .../projects/configs/_base_/models/parta2.py  |   0
 .../configs/_base_/models/pointnet2_msg.py    |   0
 .../configs/_base_/models/pointnet2_ssg.py    |   0
 .../projects/configs/_base_/models/votenet.py |   0
 .../configs/_base_/schedules/cosine.py        |   0
 .../configs/_base_/schedules/cyclic_20e.py    |   0
 .../configs/_base_/schedules/cyclic_40e.py    |   0
 .../_base_/schedules/mmdet_schedule_1x.py     |   0
 .../configs/_base_/schedules/schedule_2x.py   |   0
 .../configs/_base_/schedules/schedule_3x.py   |   0
 .../_base_/schedules/seg_cosine_150e.py       |   0
 .../_base_/schedules/seg_cosine_200e.py       |   0
 .../_base_/schedules/seg_cosine_50e.py        |   0
 .../configs/bevformer/bevformer_base.py       |   0
 .../configs/bevformer/bevformer_small.py      |   0
 .../configs/bevformer/bevformer_tiny.py       |   0
 .../bevformer_fp16/bevformer_tiny_fp16.py     |   0
 .../bevformerv2/bevformerv2-r50-t1-24ep.py    |   0
 .../bevformerv2/bevformerv2-r50-t1-48ep.py    |   0
 .../bevformerv2-r50-t1-base-24ep.py           |   0
 .../bevformerv2-r50-t1-base-48ep.py           |   0
 .../bevformerv2/bevformerv2-r50-t2-24ep.py    |   0
 .../bevformerv2/bevformerv2-r50-t2-48ep.py    |   0
 .../bevformerv2/bevformerv2-r50-t8-24ep.py    |   0
 .../configs/datasets/custom_lyft-3d.py        |   0
 .../configs/datasets/custom_nus-3d.py         |   0
 .../configs/datasets/custom_waymo-3d.py       |   0
 .../projects/mmdet3d_plugin/__init__.py       |   2 +
 .../mmdet3d_plugin/bevformer/__init__.py      |   0
 .../mmdet3d_plugin/bevformer/apis/__init__.py |   0
 .../bevformer/apis/mmdet_train.py             |   0
 .../mmdet3d_plugin/bevformer/apis/test.py     |   0
 .../mmdet3d_plugin/bevformer/apis/train.py    |   0
 .../bevformer/dense_heads/__init__.py         |   0
 .../bevformer/dense_heads/bev_head.py         |   0
 .../bevformer/dense_heads/bevformer_head.py   |   0
 .../bevformer/detectors/__init__.py           |   0
 .../bevformer/detectors/bevformer.py          |   0
 .../bevformer/detectors/bevformerV2.py        |   0
 .../bevformer/detectors/bevformer_fp16.py     |   0
 .../bevformer/hooks/__init__.py               |   0
 .../bevformer/hooks/custom_hooks.py           |   0
 .../bevformer/modules/__init__.py             |   0
 .../modules/custom_base_transformer_layer.py  |   0
 .../bevformer/modules/decoder.py              |   0
 .../bevformer/modules/encoder.py              |   0
 .../bevformer/modules/group_attention.py      |   0
 .../multi_scale_deformable_attn_function.py   |   0
 .../modules/spatial_cross_attention.py        |  35 +-
 .../modules/temporal_self_attention.py        |   0
 .../bevformer/modules/transformer.py          |   0
 .../bevformer/modules/transformerV2.py        |   0
 .../bevformer/runner/__init__.py              |   0
 .../bevformer/runner/epoch_based_runner.py    |   0
 .../core/bbox/assigners/__init__.py           |   0
 .../bbox/assigners/hungarian_assigner_3d.py   |   0
 .../core/bbox/coders/__init__.py              |   0
 .../core/bbox/coders/nms_free_coder.py        |   0
 .../core/bbox/match_costs/__init__.py         |   0
 .../core/bbox/match_costs/match_cost.py       |   0
 .../projects/mmdet3d_plugin/core/bbox/util.py |   0
 .../core/evaluation/__init__.py               |   0
 .../core/evaluation/eval_hooks.py             |   0
 .../core/evaluation/kitti2waymo.py            |   0
 .../mmdet3d_plugin/datasets/__init__.py       |   0
 .../mmdet3d_plugin/datasets/builder.py        |   0
 .../datasets/nuscenes_dataset.py              |   0
 .../datasets/nuscenes_dataset_v2.py           |   0
 .../datasets/nuscenes_mono_dataset.py         |   0
 .../mmdet3d_plugin/datasets/nuscnes_eval.py   |   0
 .../datasets/pipelines/__init__.py            |   0
 .../datasets/pipelines/augmentation.py        |   0
 .../datasets/pipelines/dd3d_mapper.py         |   0
 .../datasets/pipelines/formating.py           |   0
 .../datasets/pipelines/loading.py             |   0
 .../datasets/pipelines/transform_3d.py        |   0
 .../datasets/samplers/__init__.py             |   0
 .../datasets/samplers/distributed_sampler.py  |   0
 .../datasets/samplers/group_sampler.py        |   0
 .../datasets/samplers/sampler.py              |   0
 .../projects/mmdet3d_plugin/dd3d/__init__.py  |   0
 .../mmdet3d_plugin/dd3d/datasets/__init__.py  |   0
 .../mmdet3d_plugin/dd3d/datasets/nuscenes.py  |   0
 .../dd3d/datasets/transform_utils.py          |   0
 .../mmdet3d_plugin/dd3d/layers/iou_loss.py    |   0
 .../dd3d/layers/normalization.py              |   0
 .../dd3d/layers/smooth_l1_loss.py             |   0
 .../mmdet3d_plugin/dd3d/modeling/__init__.py  |   0
 .../mmdet3d_plugin/dd3d/modeling/core.py      |   0
 .../dd3d/modeling/disentangled_box3d_loss.py  |   0
 .../mmdet3d_plugin/dd3d/modeling/fcos2d.py    |   0
 .../mmdet3d_plugin/dd3d/modeling/fcos3d.py    |   0
 .../dd3d/modeling/nuscenes_dd3d.py            |   0
 .../dd3d/modeling/prepare_targets.py          |   0
 .../dd3d/structures/__init__.py               |   0
 .../mmdet3d_plugin/dd3d/structures/boxes3d.py |   0
 .../dd3d/structures/image_list.py             |   0
 .../mmdet3d_plugin/dd3d/structures/pose.py    |   0
 .../dd3d/structures/transform3d.py            |   0
 .../mmdet3d_plugin/dd3d/utils/comm.py         |   0
 .../mmdet3d_plugin/dd3d/utils/geometry.py     |   0
 .../mmdet3d_plugin/dd3d/utils/tasks.py        |   0
 .../mmdet3d_plugin/dd3d/utils/tensor2d.py     |   0
 .../dd3d/utils/visualization.py               |   0
 .../models/backbones/__init__.py              |   0
 .../mmdet3d_plugin/models/backbones/vovnet.py |   0
 .../mmdet3d_plugin/models/hooks/__init__.py   |   0
 .../mmdet3d_plugin/models/hooks/hooks.py      |   2 +
 .../mmdet3d_plugin/models/opt/__init__.py     |   0
 .../mmdet3d_plugin/models/opt/adamw.py        |   0
 .../mmdet3d_plugin/models/utils/__init__.py   |   0
 .../mmdet3d_plugin/models/utils/bricks.py     |   0
 .../mmdet3d_plugin/models/utils/grid_mask.py  |   2 +
 .../models/utils/position_embedding.py        |   0
 .../mmdet3d_plugin/models/utils/visual.py     |   0
 .../BEVFormer/public_address_statement.md     |   0
 .../BEVFormer/requirements.txt                |   0
 model_examples/BEVFormer/test/env_npu.sh      |  59 ++
 .../BEVFormer/test/train_full_8p_base_fp32.sh |  97 ++
 .../test/train_performance_8p_base_fp32.sh    |  92 ++
 .../BEVFormer/tools/__init__.py               |   0
 .../tools/analysis_tools/__init__.py          |   0
 .../tools/analysis_tools/analyze_logs.py      |   0
 .../tools/analysis_tools/benchmark.py         |   0
 .../tools/analysis_tools/get_params.py        |   0
 .../BEVFormer/tools/analysis_tools/visual.py  |   0
 .../BEVFormer/tools/create_data.py            |   0
 .../tools/data_converter/__init__.py          |   0
 .../data_converter/create_gt_database.py      |   0
 .../tools/data_converter/indoor_converter.py  |   0
 .../tools/data_converter/kitti_converter.py   |   0
 .../tools/data_converter/kitti_data_utils.py  |   0
 .../tools/data_converter/lyft_converter.py    |   0
 .../tools/data_converter/lyft_data_fixer.py   |   0
 .../tools/data_converter/nuimage_converter.py |   0
 .../data_converter/nuscenes_converter.py      |   0
 .../tools/data_converter/s3dis_data_utils.py  |   0
 .../data_converter/scannet_data_utils.py      |   0
 .../data_converter/sunrgbd_data_utils.py      |   0
 .../tools/data_converter/waymo_converter.py   |   0
 .../BEVFormer/tools/dist_test.sh              |   0
 .../BEVFormer/tools/dist_train.sh             |   0
 .../BEVFormer/tools/fp16/dist_train.sh        |   0
 .../BEVFormer/tools/fp16/train.py             |   0
 .../BEVFormer/tools/misc/browse_dataset.py    |   0
 .../BEVFormer/tools/misc/fuse_conv_bn.py      |   0
 .../BEVFormer/tools/misc/print_config.py      |   0
 .../BEVFormer/tools/misc/visualize_results.py |   0
 .../convert_votenet_checkpoints.py            |   0
 .../tools/model_converters/publish_model.py   |   0
 .../tools/model_converters/regnet2mmdet.py    |   0
 .../BEVFormer/tools/test.py                   |   0
 .../BEVFormer/tools/train.py                  |   0
 mx_driving/common/__init__.py                 |  17 -
 mx_driving/common/ops/csrc/functions.h        |  75 +-
 mx_driving/common/ops/csrc/pybind.cpp         |  70 +-
 mx_driving/data/CMakeLists.txt                |   7 +
 mx_driving/data/__init__.py                   |   3 +
 .../{motion => data}/components/README.md     |   0
 mx_driving/{motion => data/ops}/__init__.py   |   0
 .../{common => data}/ops/csrc/PointsInBox.cpp |   0
 .../ops/csrc/PointsInBoxAll.cpp               |   0
 mx_driving/data/ops/csrc/README.md            |   6 +
 .../ops/csrc/RoipointPool3dForward.cpp        |   0
 .../vision => data}/ops/csrc/functions.h      |   7 +-
 mx_driving/data/ops/csrc/pybind.cpp           |  15 +
 .../ops/kernels/CMakeLists.txt                |   0
 mx_driving/data/ops/kernels/README.md         |  13 +
 .../ops/kernels/framework/CMakeLists.txt      |   0
 .../ops/kernels/op_host/CMakeLists.txt        |   0
 mx_driving/data/ops/kernels/op_host/common.h  |  28 +
 .../ops/kernels/op_host/points_in_box.cpp     |   0
 .../ops/kernels/op_host/points_in_box_all.cpp |   0
 .../op_host/points_in_box_all_tiling.h        |   0
 .../kernels/op_host/points_in_box_tiling.h    |   0
 .../op_host/roipoint_pool3d_forward.cpp       |   0
 .../op_host/roipoint_pool3d_forward_tiling.h  |   0
 .../ops/kernels/op_kernel/CMakeLists.txt      |   0
 .../ops/kernels/op_kernel/points_in_box.cpp   |   0
 .../kernels/op_kernel/points_in_box_all.cpp   |   0
 .../op_kernel/roipoint_pool3d_forward.cpp     |   0
 .../{common => data}/ops/npu_points_in_box.py |   0
 .../ops/npu_points_in_box_all.py              |   0
 .../ops/npu_roipoint_pool3d.py                |   0
 .../{motion => detection}/CMakeLists.txt      |   0
 mx_driving/detection/__init__.py              |   5 +
 .../fused => detection}/components/README.md  |   0
 .../{perception => detection/ops}/__init__.py |   0
 .../ops/boxes_overlap_bev.py                  |   0
 .../ops/csrc/BoxesOverlapBev.cpp              |   0
 .../{common => detection}/ops/csrc/Nms3d.cpp  |   0
 .../ops/csrc/Nms3dNormal.cpp                  |   0
 .../{motion => detection}/ops/csrc/README.md  |   0
 .../ops/csrc/RotatedIou.cpp                   |   0
 .../ops/csrc/RotatedOverlaps.cpp              |   0
 .../fused => detection}/ops/csrc/functions.h  |  26 +-
 .../vision => detection}/ops/csrc/pybind.cpp  |  14 +-
 .../ops/kernels/CMakeLists.txt                |   0
 .../ops/kernels/README.md                     |   0
 .../ops/kernels/op_host/CMakeLists.txt        |   0
 .../ops/kernels/op_host/boxes_overlap_bev.cpp |   0
 .../op_host/boxes_overlap_bev_tiling.h        |   0
 .../op_host/gather_nms3d_mask_tiling.cpp      |   0
 .../op_host/gather_nms3d_mask_tiling.h        |   0
 .../ops/kernels/op_host/nms3d.cpp             |   0
 .../kernels/op_host/nms3d_normal_tiling.cpp   |   0
 .../ops/kernels/op_host/nms3d_normal_tiling.h |  24 +
 .../ops/kernels/op_host/nms3d_tiling.h        |   0
 .../ops/kernels/op_kernel/CMakeLists.txt      |   0
 .../kernels/op_kernel/boxes_overlap_bev.cpp   |   0
 .../kernels/op_kernel/gather_nms3d_mask.cpp   |   0
 .../ops/kernels/op_kernel/nms3d.cpp           |   0
 .../ops/kernels/op_kernel/nms3d_normal.cpp    |   0
 .../{common => detection}/ops/nms3d_normal.py |   0
 .../{common => detection}/ops/npu_nms3d.py    |   0
 .../{common => detection}/ops/rotated_iou.py  |   0
 .../ops/rotated_overlaps.py                   |   0
 mx_driving/fused/CMakeLists.txt               |   3 +
 mx_driving/fused/__init__.py                  |   5 +
 .../point => fused}/components/README.md      |   0
 .../{perception => }/fused/ops/__init__.py    |   0
 .../{common => fused}/ops/csrc/AddRelu.cpp    |   0
 .../ops/csrc/DeformableAggregation.cpp        |   0
 .../ops/csrc/FusedBiasLeakyRelu.cpp           |   0
 .../{common => fused}/ops/csrc/MaxPool2d.cpp  |   0
 .../csrc/MultiScaleDeformableAttnFunction.cpp |   0
 .../{perception => }/fused/ops/csrc/README.md |   0
 mx_driving/fused/ops/csrc/functions.h         |  57 ++
 mx_driving/fused/ops/csrc/pybind.cpp          |  25 +
 .../ops/fused_bias_leaky_relu.py              |   0
 .../fused/ops/kernels/CMakeLists.txt          |   0
 .../fused/ops/kernels/README.md               |   0
 .../ops/kernels/op_host/CMakeLists.txt        |   0
 .../ops/kernels/op_host/add_relu.cpp          |   0
 .../ops/kernels/op_host/add_relu_tiling.h     |   0
 mx_driving/fused/ops/kernels/op_host/common.h |  28 +
 .../op_host/deformable_aggregation.cpp        |   0
 .../op_host/deformable_aggregation_grad.cpp   |   0
 .../deformable_aggregation_grad_tiling.h      |   0
 .../op_host/deformable_aggregation_tiling.h   |   0
 .../kernels/op_host/fused_bias_leaky_relu.cpp |   0
 .../op_host/fused_bias_leaky_relu_tiling.h    |   0
 .../ops/kernels/op_host/max_pool2d.cpp        |   0
 .../ops/kernels/op_host/max_pool2d.h          |   0
 .../op_host/multi_scale_deformable_attn.cpp   |   0
 .../multi_scale_deformable_attn_grad.cpp      |   0
 .../multi_scale_deformable_attn_grad_tiling.h |   0
 ...lti_scale_deformable_attn_grad_tiling_v2.h |   0
 .../multi_scale_deformable_attn_grad_v2.cpp   |   0
 .../multi_scale_deformable_attn_tiling.h      |   0
 .../ops/kernels/op_kernel/CMakeLists.txt      |   0
 .../ops/kernels/op_kernel/add_relu.cpp        |   0
 .../fused/ops/kernels/op_kernel/common.h      |   0
 .../op_kernel/deformable_aggregation.cpp      |   0
 .../op_kernel/deformable_aggregation_grad.cpp |   0
 .../op_kernel/fused_bias_leaky_relu.cpp       |   0
 .../kernels/op_kernel/fused_bias_leaky_relu.h |   0
 .../ops/kernels/op_kernel/max_pool2d.cpp      |   0
 .../op_kernel/ms_deform_attn_generic.h        |   0
 .../op_kernel/ms_deform_attn_grad_generic.h   |   0
 .../ms_deform_attn_grad_generic_v2.h          |   0
 .../op_kernel/ms_deform_attn_grad_high_perf.h |   0
 .../ms_deform_attn_grad_high_perf_v2.h        |   0
 .../op_kernel/ms_deform_attn_high_perf.h      |   0
 .../op_kernel/multi_scale_deformable_attn.cpp |   0
 .../multi_scale_deformable_attn_grad.cpp      |   0
 .../multi_scale_deformable_attn_grad_v2.cpp   |   0
 .../{common => fused}/ops/npu_add_relu.py     |   0
 .../ops/npu_deformable_aggregation.py         |   0
 .../{common => fused}/ops/npu_max_pool2d.py   |   1 +
 ...pu_multi_scale_deformable_attn_function.py |   0
 mx_driving/motion/ops/csrc/pybind.cpp         |   5 -
 .../ops/kernels/op_kernel/CMakeLists.txt      |   0
 mx_driving/perception/CMakeLists.txt          |   9 -
 mx_driving/perception/fused/__init__.py       |   2 -
 .../perception/fused/ops/csrc/pybind.cpp      |  13 -
 mx_driving/perception/point/__init__.py       |   2 -
 mx_driving/perception/vision/__init__.py      |   1 -
 mx_driving/perception/vision/ops/__init__.py  |   0
 .../perception/vision/ops/csrc/README.md      |   2 -
 .../perception/vision/ops/kernels/README.md   |   2 -
 mx_driving/point/CMakeLists.txt               |   3 +
 mx_driving/point/__init__.py                  |   8 +
 .../vision => point}/components/README.md     |   0
 .../{perception => }/point/ops/__init__.py    |   0
 .../fused => point}/ops/bev_pool.py           |   0
 .../fused => point}/ops/bev_pool_v2.py        |   0
 .../fused => point}/ops/csrc/BEVPool.cpp      |   0
 .../ops/csrc/BEVPoolBackward.cpp              |   0
 .../fused => point}/ops/csrc/BEVPoolV2.cpp    |   0
 .../ops/csrc/BEVPoolV2Backward.cpp            |   0
 .../ops/csrc/DynamicScatter.cpp               |   0
 .../ops/csrc/DynamicVoxelization.cpp          |   0
 .../ops/csrc/FurthestPointSampling.cpp        |   0
 .../csrc/FurthestPointSamplingWithDist.cpp    |   0
 .../point/ops/csrc/GroupPoints.cpp            |   0
 .../point/ops/csrc/HardVoxelize.cpp           |   0
 .../point/ops/csrc/PointToVoxel.cpp           |   0
 .../{perception => }/point/ops/csrc/README.md |   0
 .../point/ops/csrc/UniqueVoxel.cpp            |   0
 .../point/ops/csrc/VecPoolBackward.cpp        |   0
 .../ops/csrc/VoxelPoolingTrain.cpp            |   0
 .../point/ops/csrc/VoxelToPoint.cpp           |   0
 .../point/ops/csrc/functions.h                |  35 +
 .../point/ops/csrc/pybind.cpp                 |  25 +-
 .../ops/furthest_point_sampling.py            |   0
 .../ops/furthest_point_sampling_with_dist.py  |   0
 .../point/ops/group_points.py                 |   0
 .../ops/kernels/CMakeLists.txt                |   0
 .../point/ops/kernels/README.md               |   0
 .../ops/kernels/op_host/CMakeLists.txt        |   0
 .../ops/kernels/op_host/bev_pool.cpp          |   0
 .../ops/kernels/op_host/bev_pool_tiling.h     |   0
 .../ops/kernels/op_host/dynamic_scatter.cpp   |   0
 .../kernels/op_host/dynamic_scatter_grad.cpp  |   0
 .../op_host/dynamic_scatter_grad_tiling.h     |   0
 .../kernels/op_host/dynamic_scatter_tiling.h  |   0
 .../kernels/op_host/dynamic_voxelization.cpp  |   0
 .../op_host/dynamic_voxelization_tiling.h     |   0
 .../op_host/furthest_point_sampling.cpp       |   0
 .../op_host/furthest_point_sampling_tiling.h  |   0
 .../furthest_point_sampling_with_dist.cpp     |   0
 ...furthest_point_sampling_with_dist_tiling.h |   0
 .../ops/kernels/op_host/group_points.cpp      |   0
 .../ops/kernels/op_host/group_points_grad.cpp |   0
 .../op_host/group_points_grad_tiling.h        |   0
 .../ops/kernels/op_host/group_points_tiling.h |   0
 .../ops/kernels/op_host/hard_voxelize.cpp     |   0
 .../kernels/op_host/hard_voxelize_tiling.h    |   0
 .../ops/kernels/op_host/point_to_voxel.cpp    |   0
 .../kernels/op_host/point_to_voxel_tiling.h   |   0
 .../ops/kernels/op_host/unique_voxel.cpp      |   0
 .../ops/kernels/op_host/unique_voxel_tiling.h |   0
 .../ops/kernels/op_host/vec_pool_grad.cpp     |   0
 .../kernels/op_host/vec_pool_grad_tiling.h    |   0
 .../kernels/op_host/voxel_pooling_train.cpp   |   0
 .../op_host/voxel_pooling_train_grad.cpp      |   0
 .../op_host/voxel_pooling_train_grad_tiling.h |   0
 .../op_host/voxel_pooling_train_tiling.h      |   0
 .../ops/kernels/op_kernel/CMakeLists.txt      |   4 +
 .../ops/kernels/op_kernel/bev_pool.cpp        |   0
 .../ops/kernels/op_kernel/bev_pool.h          |   0
 .../ops/kernels/op_kernel/bev_pool_grad.cpp   |   0
 .../ops/kernels/op_kernel/bev_pool_v2.cpp     |   0
 .../ops/kernels/op_kernel/bev_pool_v2.h       |   0
 .../kernels/op_kernel/bev_pool_v2_grad.cpp    |   0
 .../point/ops/kernels/op_kernel/common.h      |  46 +
 .../ops/kernels/op_kernel/dynamic_scatter.cpp |   0
 .../kernels/op_kernel/dynamic_scatter_base.h  |   0
 .../op_kernel/dynamic_scatter_grad.cpp        |   0
 .../op_kernel/dynamic_scatter_grad_base.h     |   0
 .../op_kernel/dynamic_scatter_grad_max.h      |   0
 .../op_kernel/dynamic_scatter_grad_mean.h     |   0
 .../op_kernel/dynamic_scatter_grad_sum.h      |   0
 .../kernels/op_kernel/dynamic_scatter_max.h   |   0
 .../kernels/op_kernel/dynamic_scatter_mean.h  |   0
 .../kernels/op_kernel/dynamic_scatter_sum.h   |   0
 .../op_kernel/dynamic_voxelization.cpp        |   0
 .../op_kernel/furthest_point_sampling.cpp     |   0
 .../op_kernel/furthest_point_sampling.h       |   0
 .../furthest_point_sampling_with_dist.cpp     |   0
 .../ops/kernels/op_kernel/group_points.cpp    |   0
 .../kernels/op_kernel/group_points_grad.cpp   |   0
 .../ops/kernels/op_kernel/hard_voxelize.cpp   |   0
 .../ops/kernels/op_kernel/point_to_voxel.cpp  |   0
 .../ops/kernels/op_kernel/unique_voxel.cpp    |   0
 .../ops/kernels/op_kernel/vec_pool_grad.cpp   |   0
 .../kernels/op_kernel/voxel_pooling_train.cpp |   0
 .../op_kernel/voxel_pooling_train_grad.cpp    |   0
 .../ops/kernels/op_kernel/voxel_to_point.cpp  |   0
 .../ops/npu_dynamic_scatter.py                |   0
 .../ops/voxel_pooling_train.py                |   0
 .../{common => point}/ops/voxelization.py     |   0
 setup.py                                      |   2 +-
 tests/torch/test_add_relu.py                  |  10 +-
 tests/torch/test_bev_pool.py                  |   2 +-
 tests/torch/test_bev_pool_v2.py               |   2 +-
 tests/torch/test_boxes_overlap_bev.py         |   4 +-
 tests/torch/test_deformable_aggregation.py    |   4 +-
 .../torch/test_deformable_aggregation_grad.py |   4 +-
 .../test_furthest_point_sample_with_dist.py   |   4 +-
 tests/torch/test_furthest_point_sampling.py   |   4 +-
 tests/torch/test_fused_bias_leaky_relu.py     |  10 +-
 tests/torch/test_group_points.py              |   2 +-
 ...st_multi_scale_deformable_attn_function.py |   4 +-
 tests/torch/test_npu_dyn_voxelization.py      |   4 +-
 tests/torch/test_npu_dynamic_scatter.py       |   6 +-
 tests/torch/test_npu_max_pool2d.py            |   4 +-
 tests/torch/test_npu_nms3d.py                 |   4 +-
 tests/torch/test_npu_nms3d_normal.py          |  16 +-
 tests/torch/test_points_in_box.py             |  12 +-
 tests/torch/test_points_in_box_all.py         |  12 +-
 tests/torch/test_roipoint_pool3d.py           |   2 +-
 tests/torch/test_rotated_iou.py               |   4 +-
 tests/torch/test_voxel_pooling_train.py       |   4 +-
 454 files changed, 1200 insertions(+), 758 deletions(-)
 rename {examples => model_examples}/BEVFormer/.gitignore (99%)
 rename {examples => model_examples}/BEVFormer/LICENSE (100%)
 rename {examples => model_examples}/BEVFormer/README.md (100%)
 rename {examples => model_examples}/BEVFormer/README_ORI.md (100%)
 rename {examples => model_examples}/BEVFormer/docs/getting_started.md (100%)
 rename {examples => model_examples}/BEVFormer/docs/install.md (100%)
 rename {examples => model_examples}/BEVFormer/docs/prepare_dataset.md (100%)
 rename {examples => model_examples}/BEVFormer/mmcv_need/base_runner.py (100%)
 rename {examples => model_examples}/BEVFormer/mmcv_need/distributed.py (100%)
 rename {examples => model_examples}/BEVFormer/mmcv_need/epoch_based_runner.py (100%)
 rename {examples => model_examples}/BEVFormer/mmcv_need/modulated_deform_conv.py (100%)
 rename {examples => model_examples}/BEVFormer/mmcv_need/optimizer.py (100%)
 rename {examples => model_examples}/BEVFormer/mmcv_need/points_in_polygons_npu.cpp (100%)
 rename {examples => model_examples}/BEVFormer/mmcv_need/runtime.txt (100%)
 rename {examples => model_examples}/BEVFormer/mmdet3d_need/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/mmdet3d_need/nuscenes_dataset.py (100%)
 rename {examples => model_examples}/BEVFormer/mmdet3d_need/runtime.txt (100%)
 rename {examples => model_examples}/BEVFormer/mmdet_need/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/mmdet_need/resnet.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/datasets/coco_instance.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/datasets/kitti-3d-3class.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/datasets/kitti-3d-car.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/datasets/lyft-3d.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/datasets/nuim_instance.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/datasets/nus-3d.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/datasets/nus-mono3d.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/datasets/range100_lyft-3d.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/datasets/s3dis-3d-5class.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/datasets/s3dis_seg-3d-13class.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/datasets/scannet-3d-18class.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/datasets/scannet_seg-3d-20class.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/datasets/sunrgbd-3d-10class.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/datasets/waymoD5-3d-3class.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/datasets/waymoD5-3d-car.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/default_runtime.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/models/3dssd.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/models/fcos3d.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/models/groupfree3d.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/models/h3dnet.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/models/hv_pointpillars_fpn_lyft.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/models/hv_pointpillars_fpn_nus.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/models/hv_pointpillars_secfpn_kitti.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/models/hv_pointpillars_secfpn_waymo.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/models/hv_second_secfpn_kitti.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/models/hv_second_secfpn_waymo.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/models/imvotenet_image.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/models/mask_rcnn_r50_fpn.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/models/paconv_cuda_ssg.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/models/paconv_ssg.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/models/parta2.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/models/pointnet2_msg.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/models/pointnet2_ssg.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/models/votenet.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/schedules/cosine.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/schedules/cyclic_20e.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/schedules/cyclic_40e.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/schedules/mmdet_schedule_1x.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/schedules/schedule_2x.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/schedules/schedule_3x.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/schedules/seg_cosine_150e.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/schedules/seg_cosine_200e.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/_base_/schedules/seg_cosine_50e.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/bevformer/bevformer_base.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/bevformer/bevformer_small.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/bevformer/bevformer_tiny.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/bevformer_fp16/bevformer_tiny_fp16.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t1-24ep.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t1-48ep.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t1-base-24ep.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t1-base-48ep.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t2-24ep.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t2-48ep.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t8-24ep.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/datasets/custom_lyft-3d.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/datasets/custom_nus-3d.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/configs/datasets/custom_waymo-3d.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/__init__.py (83%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/bevformer/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/bevformer/apis/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/bevformer/apis/mmdet_train.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/bevformer/apis/test.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/bevformer/apis/train.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/bevformer/dense_heads/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/bevformer/dense_heads/bev_head.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/bevformer/dense_heads/bevformer_head.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/bevformer/detectors/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/bevformer/detectors/bevformer.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/bevformer/detectors/bevformerV2.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/bevformer/detectors/bevformer_fp16.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/bevformer/hooks/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/bevformer/hooks/custom_hooks.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/custom_base_transformer_layer.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/decoder.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/encoder.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/group_attention.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/multi_scale_deformable_attn_function.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/spatial_cross_attention.py (95%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/temporal_self_attention.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/transformer.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/transformerV2.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/bevformer/runner/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/bevformer/runner/epoch_based_runner.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/core/bbox/assigners/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/core/bbox/coders/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/core/bbox/match_costs/match_cost.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/core/bbox/util.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/core/evaluation/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/core/evaluation/eval_hooks.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/core/evaluation/kitti2waymo.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/datasets/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/datasets/builder.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/datasets/nuscenes_dataset.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/datasets/nuscenes_dataset_v2.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/datasets/nuscenes_mono_dataset.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/datasets/nuscnes_eval.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/augmentation.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/dd3d_mapper.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/formating.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/loading.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/transform_3d.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/distributed_sampler.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/group_sampler.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/sampler.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/dd3d/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/dd3d/datasets/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/dd3d/datasets/nuscenes.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/dd3d/datasets/transform_utils.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/dd3d/layers/iou_loss.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/dd3d/layers/normalization.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/dd3d/layers/smooth_l1_loss.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/core.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/disentangled_box3d_loss.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/fcos2d.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/fcos3d.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/nuscenes_dd3d.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/prepare_targets.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/boxes3d.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/image_list.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/pose.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/transform3d.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/dd3d/utils/comm.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/dd3d/utils/geometry.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/dd3d/utils/tasks.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/dd3d/utils/tensor2d.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/dd3d/utils/visualization.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/models/backbones/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/models/backbones/vovnet.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/models/hooks/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/models/hooks/hooks.py (81%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/models/opt/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/models/opt/adamw.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/models/utils/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/models/utils/bricks.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/models/utils/grid_mask.py (97%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/models/utils/position_embedding.py (100%)
 rename {examples => model_examples}/BEVFormer/projects/mmdet3d_plugin/models/utils/visual.py (100%)
 rename {examples => model_examples}/BEVFormer/public_address_statement.md (100%)
 rename {examples => model_examples}/BEVFormer/requirements.txt (100%)
 create mode 100644 model_examples/BEVFormer/test/env_npu.sh
 create mode 100644 model_examples/BEVFormer/test/train_full_8p_base_fp32.sh
 create mode 100644 model_examples/BEVFormer/test/train_performance_8p_base_fp32.sh
 rename {examples => model_examples}/BEVFormer/tools/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/analysis_tools/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/analysis_tools/analyze_logs.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/analysis_tools/benchmark.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/analysis_tools/get_params.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/analysis_tools/visual.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/create_data.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/data_converter/__init__.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/data_converter/create_gt_database.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/data_converter/indoor_converter.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/data_converter/kitti_converter.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/data_converter/kitti_data_utils.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/data_converter/lyft_converter.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/data_converter/lyft_data_fixer.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/data_converter/nuimage_converter.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/data_converter/nuscenes_converter.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/data_converter/s3dis_data_utils.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/data_converter/scannet_data_utils.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/data_converter/sunrgbd_data_utils.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/data_converter/waymo_converter.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/dist_test.sh (100%)
 rename {examples => model_examples}/BEVFormer/tools/dist_train.sh (100%)
 rename {examples => model_examples}/BEVFormer/tools/fp16/dist_train.sh (100%)
 rename {examples => model_examples}/BEVFormer/tools/fp16/train.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/misc/browse_dataset.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/misc/fuse_conv_bn.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/misc/print_config.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/misc/visualize_results.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/model_converters/convert_votenet_checkpoints.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/model_converters/publish_model.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/model_converters/regnet2mmdet.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/test.py (100%)
 rename {examples => model_examples}/BEVFormer/tools/train.py (100%)
 create mode 100644 mx_driving/data/CMakeLists.txt
 create mode 100644 mx_driving/data/__init__.py
 rename mx_driving/{motion => data}/components/README.md (100%)
 rename mx_driving/{motion => data/ops}/__init__.py (100%)
 rename mx_driving/{common => data}/ops/csrc/PointsInBox.cpp (100%)
 rename mx_driving/{common => data}/ops/csrc/PointsInBoxAll.cpp (100%)
 create mode 100644 mx_driving/data/ops/csrc/README.md
 rename mx_driving/{common => data}/ops/csrc/RoipointPool3dForward.cpp (100%)
 rename mx_driving/{perception/vision => data}/ops/csrc/functions.h (69%)
 create mode 100644 mx_driving/data/ops/csrc/pybind.cpp
 rename mx_driving/{motion => data}/ops/kernels/CMakeLists.txt (100%)
 create mode 100644 mx_driving/data/ops/kernels/README.md
 rename mx_driving/{motion => data}/ops/kernels/framework/CMakeLists.txt (100%)
 rename mx_driving/{motion => data}/ops/kernels/op_host/CMakeLists.txt (100%)
 create mode 100644 mx_driving/data/ops/kernels/op_host/common.h
 rename mx_driving/{common => data}/ops/kernels/op_host/points_in_box.cpp (100%)
 rename mx_driving/{common => data}/ops/kernels/op_host/points_in_box_all.cpp (100%)
 rename mx_driving/{common => data}/ops/kernels/op_host/points_in_box_all_tiling.h (100%)
 rename mx_driving/{common => data}/ops/kernels/op_host/points_in_box_tiling.h (100%)
 rename mx_driving/{common => data}/ops/kernels/op_host/roipoint_pool3d_forward.cpp (100%)
 rename mx_driving/{common => data}/ops/kernels/op_host/roipoint_pool3d_forward_tiling.h (100%)
 rename mx_driving/{perception/fused => data}/ops/kernels/op_kernel/CMakeLists.txt (100%)
 rename mx_driving/{common => data}/ops/kernels/op_kernel/points_in_box.cpp (100%)
 rename mx_driving/{common => data}/ops/kernels/op_kernel/points_in_box_all.cpp (100%)
 rename mx_driving/{common => data}/ops/kernels/op_kernel/roipoint_pool3d_forward.cpp (100%)
 rename mx_driving/{common => data}/ops/npu_points_in_box.py (100%)
 rename mx_driving/{common => data}/ops/npu_points_in_box_all.py (100%)
 rename mx_driving/{common => data}/ops/npu_roipoint_pool3d.py (100%)
 rename mx_driving/{motion => detection}/CMakeLists.txt (100%)
 create mode 100644 mx_driving/detection/__init__.py
 rename mx_driving/{perception/fused => detection}/components/README.md (100%)
 rename mx_driving/{perception => detection/ops}/__init__.py (100%)
 rename mx_driving/{perception/vision => detection}/ops/boxes_overlap_bev.py (100%)
 rename mx_driving/{perception/vision => detection}/ops/csrc/BoxesOverlapBev.cpp (100%)
 rename mx_driving/{common => detection}/ops/csrc/Nms3d.cpp (100%)
 rename mx_driving/{common => detection}/ops/csrc/Nms3dNormal.cpp (100%)
 rename mx_driving/{motion => detection}/ops/csrc/README.md (100%)
 rename mx_driving/{common => detection}/ops/csrc/RotatedIou.cpp (100%)
 rename mx_driving/{common => detection}/ops/csrc/RotatedOverlaps.cpp (100%)
 rename mx_driving/{perception/fused => detection}/ops/csrc/functions.h (34%)
 rename mx_driving/{perception/vision => detection}/ops/csrc/pybind.cpp (37%)
 rename mx_driving/{perception/point => detection}/ops/kernels/CMakeLists.txt (100%)
 rename mx_driving/{motion => detection}/ops/kernels/README.md (100%)
 rename mx_driving/{perception/fused => detection}/ops/kernels/op_host/CMakeLists.txt (100%)
 rename mx_driving/{perception/vision => detection}/ops/kernels/op_host/boxes_overlap_bev.cpp (100%)
 rename mx_driving/{perception/vision => detection}/ops/kernels/op_host/boxes_overlap_bev_tiling.h (100%)
 rename mx_driving/{common => detection}/ops/kernels/op_host/gather_nms3d_mask_tiling.cpp (100%)
 rename mx_driving/{common => detection}/ops/kernels/op_host/gather_nms3d_mask_tiling.h (100%)
 rename mx_driving/{common => detection}/ops/kernels/op_host/nms3d.cpp (100%)
 rename mx_driving/{common => detection}/ops/kernels/op_host/nms3d_normal_tiling.cpp (100%)
 create mode 100644 mx_driving/detection/ops/kernels/op_host/nms3d_normal_tiling.h
 rename mx_driving/{common => detection}/ops/kernels/op_host/nms3d_tiling.h (100%)
 rename mx_driving/{perception/point => detection}/ops/kernels/op_kernel/CMakeLists.txt (100%)
 rename mx_driving/{perception/vision => detection}/ops/kernels/op_kernel/boxes_overlap_bev.cpp (100%)
 rename mx_driving/{common => detection}/ops/kernels/op_kernel/gather_nms3d_mask.cpp (100%)
 rename mx_driving/{common => detection}/ops/kernels/op_kernel/nms3d.cpp (100%)
 rename mx_driving/{common => detection}/ops/kernels/op_kernel/nms3d_normal.cpp (100%)
 rename mx_driving/{common => detection}/ops/nms3d_normal.py (100%)
 rename mx_driving/{common => detection}/ops/npu_nms3d.py (100%)
 rename mx_driving/{common => detection}/ops/rotated_iou.py (100%)
 rename mx_driving/{common => detection}/ops/rotated_overlaps.py (100%)
 create mode 100644 mx_driving/fused/CMakeLists.txt
 create mode 100644 mx_driving/fused/__init__.py
 rename mx_driving/{perception/point => fused}/components/README.md (100%)
 rename mx_driving/{perception => }/fused/ops/__init__.py (100%)
 rename mx_driving/{common => fused}/ops/csrc/AddRelu.cpp (100%)
 rename mx_driving/{common => fused}/ops/csrc/DeformableAggregation.cpp (100%)
 rename mx_driving/{common => fused}/ops/csrc/FusedBiasLeakyRelu.cpp (100%)
 rename mx_driving/{common => fused}/ops/csrc/MaxPool2d.cpp (100%)
 rename mx_driving/{common => fused}/ops/csrc/MultiScaleDeformableAttnFunction.cpp (100%)
 rename mx_driving/{perception => }/fused/ops/csrc/README.md (100%)
 create mode 100644 mx_driving/fused/ops/csrc/functions.h
 create mode 100644 mx_driving/fused/ops/csrc/pybind.cpp
 rename mx_driving/{common => fused}/ops/fused_bias_leaky_relu.py (100%)
 rename mx_driving/{perception => }/fused/ops/kernels/CMakeLists.txt (100%)
 rename mx_driving/{perception => }/fused/ops/kernels/README.md (100%)
 rename mx_driving/{perception/point => fused}/ops/kernels/op_host/CMakeLists.txt (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_host/add_relu.cpp (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_host/add_relu_tiling.h (100%)
 create mode 100644 mx_driving/fused/ops/kernels/op_host/common.h
 rename mx_driving/{common => fused}/ops/kernels/op_host/deformable_aggregation.cpp (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_host/deformable_aggregation_grad.cpp (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_host/deformable_aggregation_grad_tiling.h (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_host/deformable_aggregation_tiling.h (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_host/fused_bias_leaky_relu.cpp (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_host/fused_bias_leaky_relu_tiling.h (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_host/max_pool2d.cpp (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_host/max_pool2d.h (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_host/multi_scale_deformable_attn.cpp (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_host/multi_scale_deformable_attn_grad.cpp (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_host/multi_scale_deformable_attn_grad_tiling.h (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_host/multi_scale_deformable_attn_grad_tiling_v2.h (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_host/multi_scale_deformable_attn_grad_v2.cpp (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_host/multi_scale_deformable_attn_tiling.h (100%)
 rename mx_driving/{perception/vision => fused}/ops/kernels/op_kernel/CMakeLists.txt (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_kernel/add_relu.cpp (100%)
 rename mx_driving/{perception => }/fused/ops/kernels/op_kernel/common.h (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_kernel/deformable_aggregation.cpp (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_kernel/deformable_aggregation_grad.cpp (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_kernel/fused_bias_leaky_relu.cpp (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_kernel/fused_bias_leaky_relu.h (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_kernel/max_pool2d.cpp (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_kernel/ms_deform_attn_generic.h (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_kernel/ms_deform_attn_grad_generic.h (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_kernel/ms_deform_attn_grad_generic_v2.h (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_kernel/ms_deform_attn_grad_high_perf.h (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_kernel/ms_deform_attn_grad_high_perf_v2.h (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_kernel/ms_deform_attn_high_perf.h (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_kernel/multi_scale_deformable_attn.cpp (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_kernel/multi_scale_deformable_attn_grad.cpp (100%)
 rename mx_driving/{common => fused}/ops/kernels/op_kernel/multi_scale_deformable_attn_grad_v2.cpp (100%)
 rename mx_driving/{common => fused}/ops/npu_add_relu.py (100%)
 rename mx_driving/{common => fused}/ops/npu_deformable_aggregation.py (100%)
 rename mx_driving/{common => fused}/ops/npu_max_pool2d.py (99%)
 rename mx_driving/{common => fused}/ops/npu_multi_scale_deformable_attn_function.py (100%)
 delete mode 100644 mx_driving/motion/ops/csrc/pybind.cpp
 delete mode 100644 mx_driving/motion/ops/kernels/op_kernel/CMakeLists.txt
 delete mode 100644 mx_driving/perception/CMakeLists.txt
 delete mode 100644 mx_driving/perception/fused/__init__.py
 delete mode 100644 mx_driving/perception/fused/ops/csrc/pybind.cpp
 delete mode 100644 mx_driving/perception/point/__init__.py
 delete mode 100644 mx_driving/perception/vision/__init__.py
 delete mode 100644 mx_driving/perception/vision/ops/__init__.py
 delete mode 100644 mx_driving/perception/vision/ops/csrc/README.md
 delete mode 100644 mx_driving/perception/vision/ops/kernels/README.md
 create mode 100644 mx_driving/point/CMakeLists.txt
 create mode 100644 mx_driving/point/__init__.py
 rename mx_driving/{perception/vision => point}/components/README.md (100%)
 rename mx_driving/{perception => }/point/ops/__init__.py (100%)
 rename mx_driving/{perception/fused => point}/ops/bev_pool.py (100%)
 rename mx_driving/{perception/fused => point}/ops/bev_pool_v2.py (100%)
 rename mx_driving/{perception/fused => point}/ops/csrc/BEVPool.cpp (100%)
 rename mx_driving/{perception/fused => point}/ops/csrc/BEVPoolBackward.cpp (100%)
 rename mx_driving/{perception/fused => point}/ops/csrc/BEVPoolV2.cpp (100%)
 rename mx_driving/{perception/fused => point}/ops/csrc/BEVPoolV2Backward.cpp (100%)
 rename mx_driving/{common => point}/ops/csrc/DynamicScatter.cpp (100%)
 rename mx_driving/{common => point}/ops/csrc/DynamicVoxelization.cpp (100%)
 rename mx_driving/{common => point}/ops/csrc/FurthestPointSampling.cpp (100%)
 rename mx_driving/{common => point}/ops/csrc/FurthestPointSamplingWithDist.cpp (100%)
 rename mx_driving/{perception => }/point/ops/csrc/GroupPoints.cpp (100%)
 rename mx_driving/{perception => }/point/ops/csrc/HardVoxelize.cpp (100%)
 rename mx_driving/{perception => }/point/ops/csrc/PointToVoxel.cpp (100%)
 rename mx_driving/{perception => }/point/ops/csrc/README.md (100%)
 rename mx_driving/{perception => }/point/ops/csrc/UniqueVoxel.cpp (100%)
 rename mx_driving/{perception => }/point/ops/csrc/VecPoolBackward.cpp (100%)
 rename mx_driving/{common => point}/ops/csrc/VoxelPoolingTrain.cpp (100%)
 rename mx_driving/{perception => }/point/ops/csrc/VoxelToPoint.cpp (100%)
 rename mx_driving/{perception => }/point/ops/csrc/functions.h (43%)
 rename mx_driving/{perception => }/point/ops/csrc/pybind.cpp (31%)
 rename mx_driving/{common => point}/ops/furthest_point_sampling.py (100%)
 rename mx_driving/{common => point}/ops/furthest_point_sampling_with_dist.py (100%)
 rename mx_driving/{perception => }/point/ops/group_points.py (100%)
 rename mx_driving/{perception/vision => point}/ops/kernels/CMakeLists.txt (100%)
 rename mx_driving/{perception => }/point/ops/kernels/README.md (100%)
 rename mx_driving/{perception/vision => point}/ops/kernels/op_host/CMakeLists.txt (100%)
 rename mx_driving/{perception/fused => point}/ops/kernels/op_host/bev_pool.cpp (100%)
 rename mx_driving/{perception/fused => point}/ops/kernels/op_host/bev_pool_tiling.h (100%)
 rename mx_driving/{common => point}/ops/kernels/op_host/dynamic_scatter.cpp (100%)
 rename mx_driving/{common => point}/ops/kernels/op_host/dynamic_scatter_grad.cpp (100%)
 rename mx_driving/{common => point}/ops/kernels/op_host/dynamic_scatter_grad_tiling.h (100%)
 rename mx_driving/{common => point}/ops/kernels/op_host/dynamic_scatter_tiling.h (100%)
 rename mx_driving/{common => point}/ops/kernels/op_host/dynamic_voxelization.cpp (100%)
 rename mx_driving/{common => point}/ops/kernels/op_host/dynamic_voxelization_tiling.h (100%)
 rename mx_driving/{common => point}/ops/kernels/op_host/furthest_point_sampling.cpp (100%)
 rename mx_driving/{common => point}/ops/kernels/op_host/furthest_point_sampling_tiling.h (100%)
 rename mx_driving/{common => point}/ops/kernels/op_host/furthest_point_sampling_with_dist.cpp (100%)
 rename mx_driving/{common => point}/ops/kernels/op_host/furthest_point_sampling_with_dist_tiling.h (100%)
 rename mx_driving/{perception => }/point/ops/kernels/op_host/group_points.cpp (100%)
 rename mx_driving/{perception => }/point/ops/kernels/op_host/group_points_grad.cpp (100%)
 rename mx_driving/{perception => }/point/ops/kernels/op_host/group_points_grad_tiling.h (100%)
 rename mx_driving/{perception => }/point/ops/kernels/op_host/group_points_tiling.h (100%)
 rename mx_driving/{perception => }/point/ops/kernels/op_host/hard_voxelize.cpp (100%)
 rename mx_driving/{perception => }/point/ops/kernels/op_host/hard_voxelize_tiling.h (100%)
 rename mx_driving/{perception => }/point/ops/kernels/op_host/point_to_voxel.cpp (100%)
 rename mx_driving/{perception => }/point/ops/kernels/op_host/point_to_voxel_tiling.h (100%)
 rename mx_driving/{perception => }/point/ops/kernels/op_host/unique_voxel.cpp (100%)
 rename mx_driving/{perception => }/point/ops/kernels/op_host/unique_voxel_tiling.h (100%)
 rename mx_driving/{perception => }/point/ops/kernels/op_host/vec_pool_grad.cpp (100%)
 rename mx_driving/{perception => }/point/ops/kernels/op_host/vec_pool_grad_tiling.h (100%)
 rename mx_driving/{common => point}/ops/kernels/op_host/voxel_pooling_train.cpp (100%)
 rename mx_driving/{common => point}/ops/kernels/op_host/voxel_pooling_train_grad.cpp (100%)
 rename mx_driving/{common => point}/ops/kernels/op_host/voxel_pooling_train_grad_tiling.h (100%)
 rename mx_driving/{common => point}/ops/kernels/op_host/voxel_pooling_train_tiling.h (100%)
 create mode 100644 mx_driving/point/ops/kernels/op_kernel/CMakeLists.txt
 rename mx_driving/{perception/fused => point}/ops/kernels/op_kernel/bev_pool.cpp (100%)
 rename mx_driving/{perception/fused => point}/ops/kernels/op_kernel/bev_pool.h (100%)
 rename mx_driving/{perception/fused => point}/ops/kernels/op_kernel/bev_pool_grad.cpp (100%)
 rename mx_driving/{perception/fused => point}/ops/kernels/op_kernel/bev_pool_v2.cpp (100%)
 rename mx_driving/{perception/fused => point}/ops/kernels/op_kernel/bev_pool_v2.h (100%)
 rename mx_driving/{perception/fused => point}/ops/kernels/op_kernel/bev_pool_v2_grad.cpp (100%)
 create mode 100644 mx_driving/point/ops/kernels/op_kernel/common.h
 rename mx_driving/{common => point}/ops/kernels/op_kernel/dynamic_scatter.cpp (100%)
 rename mx_driving/{common => point}/ops/kernels/op_kernel/dynamic_scatter_base.h (100%)
 rename mx_driving/{common => point}/ops/kernels/op_kernel/dynamic_scatter_grad.cpp (100%)
 rename mx_driving/{common => point}/ops/kernels/op_kernel/dynamic_scatter_grad_base.h (100%)
 rename mx_driving/{common => point}/ops/kernels/op_kernel/dynamic_scatter_grad_max.h (100%)
 rename mx_driving/{common => point}/ops/kernels/op_kernel/dynamic_scatter_grad_mean.h (100%)
 rename mx_driving/{common => point}/ops/kernels/op_kernel/dynamic_scatter_grad_sum.h (100%)
 rename mx_driving/{common => point}/ops/kernels/op_kernel/dynamic_scatter_max.h (100%)
 rename mx_driving/{common => point}/ops/kernels/op_kernel/dynamic_scatter_mean.h (100%)
 rename mx_driving/{common => point}/ops/kernels/op_kernel/dynamic_scatter_sum.h (100%)
 rename mx_driving/{common => point}/ops/kernels/op_kernel/dynamic_voxelization.cpp (100%)
 rename mx_driving/{common => point}/ops/kernels/op_kernel/furthest_point_sampling.cpp (100%)
 rename mx_driving/{common => point}/ops/kernels/op_kernel/furthest_point_sampling.h (100%)
 rename mx_driving/{common => point}/ops/kernels/op_kernel/furthest_point_sampling_with_dist.cpp (100%)
 rename mx_driving/{perception => }/point/ops/kernels/op_kernel/group_points.cpp (100%)
 rename mx_driving/{perception => }/point/ops/kernels/op_kernel/group_points_grad.cpp (100%)
 rename mx_driving/{perception => }/point/ops/kernels/op_kernel/hard_voxelize.cpp (100%)
 rename mx_driving/{perception => }/point/ops/kernels/op_kernel/point_to_voxel.cpp (100%)
 rename mx_driving/{perception => }/point/ops/kernels/op_kernel/unique_voxel.cpp (100%)
 rename mx_driving/{perception => }/point/ops/kernels/op_kernel/vec_pool_grad.cpp (100%)
 rename mx_driving/{common => point}/ops/kernels/op_kernel/voxel_pooling_train.cpp (100%)
 rename mx_driving/{common => point}/ops/kernels/op_kernel/voxel_pooling_train_grad.cpp (100%)
 rename mx_driving/{perception => }/point/ops/kernels/op_kernel/voxel_to_point.cpp (100%)
 rename mx_driving/{common => point}/ops/npu_dynamic_scatter.py (100%)
 rename mx_driving/{common => point}/ops/voxel_pooling_train.py (100%)
 rename mx_driving/{common => point}/ops/voxelization.py (100%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fb306ada..bec89142 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,8 +8,10 @@ include(cmake/intf.cmake)
 
 set(MX_DRIVING_DIR ${CMAKE_CURRENT_SOURCE_DIR}/mx_driving)
 add_subdirectory(${MX_DRIVING_DIR}/common)
-add_subdirectory(${MX_DRIVING_DIR}/motion)
-add_subdirectory(${MX_DRIVING_DIR}/perception)
+add_subdirectory(${MX_DRIVING_DIR}/data)
+add_subdirectory(${MX_DRIVING_DIR}/fused)
+add_subdirectory(${MX_DRIVING_DIR}/point)
+add_subdirectory(${MX_DRIVING_DIR}/detection)
 add_subdirectory(${MX_DRIVING_DIR}/spconv)
 
 opbuild(OPS_SRC ${ASCEND_HOST_SRC} OUT_DIR ${ASCEND_AUTOGEN_PATH})
diff --git a/README.md b/README.md
index 2a25559e..2101f702 100644
--- a/README.md
+++ b/README.md
@@ -87,22 +87,27 @@ export LD_LIBRARY_PATH=xxx/site-packages/mx_driving/packages/vendors/customize/o
 │  │  ├── CMakeLists.txt
 │  │  ├── components            # 通用组件
 │  │  └── ops                   # 通用算子
-│  ├── motion                   # 运动模块
+│  ├── data                     # 数据预处理模块
 │  │  ├── __init__.py
 │  │  ├── CMakeLists.txt   
-│  │  ├── components            # 运动组件
-│  │  └── ops                   # 运动算子
-│  ├── perception               # 感知模块
-│  |   ├── __init__.py
-│  |   ├── CMakeLists.txt
-│  |   ├── fused                 # 融合模块
-│  |   ├── point                 # 点云模块
-│  |   └── vision                # 视觉模块
-│  └── spconv                    # 稀疏卷积模块
+│  │  └── ops                   # 数据预处理算子
+│  ├── detection                # 目标检测模块
+│  │  ├── __init__.py
+│  │  ├── CMakeLists.txt   
+│  │  └── ops                   # 目标检测算子
+│  ├── point                    # 点云模块
+│  │  ├── __init__.py
+│  │  ├── CMakeLists.txt   
+│  │  └── ops                   # 点云算子
+│  ├── fused                    # 融合模块
+│  │  ├── __init__.py
+│  │  ├── CMakeLists.txt   
+│  │  └── ops                   # 融合算子
+│  └── spconv                   # 稀疏卷积模块
 │     ├── __init__.py
 │     ├── CMakeLists.txt
 |     └── ops                   # 稀疏卷积算子
-├── examples                    # 自动驾驶模型示例
+├── model_examples              # 自动驾驶模型示例
 │  └── BEVFormer                # BEVFormer模型示例
 ├── bind                        # torch 绑定
 ├── ci                          # ci脚本
diff --git a/bind/pybind.cpp b/bind/pybind.cpp
index 13f43d75..ade78570 100644
--- a/bind/pybind.cpp
+++ b/bind/pybind.cpp
@@ -3,8 +3,9 @@
 
 PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
     init_common(m);
-    init_perception_fused(m);
-    init_perception_point(m);
-    init_perception_vision(m);
+    init_fused(m);
+    init_point(m);
+    init_data(m);
+    init_detection(m);
     init_spconv(m);
 }
diff --git a/docs/api/README.md b/docs/api/README.md
index d29c7113..a581aa0b 100644
--- a/docs/api/README.md
+++ b/docs/api/README.md
@@ -1,5 +1,5 @@
 > Note: 以prototype标注的接口，表示该接口为预发布接口，可能会有变动，不建议在生产环境中使用。
-# Common 算子
+# 通用算子
 ## scatter_max
 ### 接口原型
 ```python
@@ -45,108 +45,188 @@ tensor([[0, 2, 2, 2, 2, 2, 2, 0],
         [3, 3, 3, 3, 3, 3, 3, 3],
         [1, 1, 1, 1, 1, 1, 1, 1]])
 ```
-## npu_rotated_overlaps
+## knn
 ### 接口原型
 ```python
-mx_driving.common.npu_rotated_overlaps(Tensor self, Tensor query_boxes, bool trans=False) -> Tensor
+mx_driving.common.knn(int k, Tensor xyz, Tensor center_xyz, bool Transposed) -> Tensor
 ```
 ### 功能描述
-计算旋转框的重叠面积。
+對center_xyz中的每個點找到xyz中對應batch中的距離最近的k個點，并且返回此k個點的索引值。
 ### 参数说明
-- `self(Tensor)`：边界框张量，数据类型为`float32, float16`，形状为`[B, N, 5]`。
-- `query_boxes(Tensor)`：查询框张量，数据类型为`float32, float16`，形状为`[B, M, 5]`。
-- `trans(bool)`：是否进行坐标变换。默认值为`False`。值为`True`时，表示`xyxyt`, 值为`False`时，表示`xywht`。
+- `xyz(Tensor)`：点数据，表示(x, y, z)三維坐標，数据类型为`float32`。shape为`[B, N, 3]`(当Transposed=False)或`[B, 3, N]`(当Transposed=True)。其中`B`为batch size，`N`为点的数量。
+- `center_xyz(Tensor)`：点数据，表示(x, y, z)三維坐標，数据类型为`float32`。shape为`[B, npoint, 3]`(当Transposed=False)或`[B, 3, npoint]`(当Transposed=True)。其中`B`为batch size，`npoint`为点的数量。
+- `k(int)`：采样点的数量。
+- `Transposed(bool)`: 輸入是否需要進行轉置
 ### 返回值
-- `Tensor`：重叠面积张量，数据类型为`float32, float16`，形状为`[B, N, M]`。
+- `idx(Tensor)`：采样后的索引数据，数据类型为`int32`。shape为`[B, k, npoint]`。
 ### 支持的型号
 - Atlas A2 训练系列产品
 ### 调用示例
 ```python
 import torch, torch_npu
-import numpy as np
-from mx_driving.common import npu_rotated_overlaps
-a = np.random.uniform(0, 1, (1, 3, 5)).astype(np.float16)
-b = np.random.uniform(0, 1, (1, 2, 5)).astype(np.float16)
-box1 = torch.from_numpy(a).npu()
-box2 = torch.from_numpy(b).npu()
-output = npu_rotated_overlaps(box1, box2, True)
-print(output)
+from mx_driving.common import knn
+xyz = torch.tensor([[[1, 1, 1], [1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1], [1, 1, 1]]], dtype=torch.float32).npu()
+center_xyz = torch.tensor([[1, 2, 3]], [[1, 2, 3]], dtype=torch.float32).npu()
+idx = knn(2, xyz, center_xyz, False)
+print(idx)
 ```
 ```text
-tensor([[[0.0000, 0.1562, 0.0000],
-         [0.1562, 0.3713, 0.0611],
-         [0.0000, 0.0611, 0.0000]]], dtype=torch.float16)
+tensor([[0, 0], [1, 1]], dtype=torch.int32)
 ```
-## npu_rotated_iou
+### 算子約束
+1. k必須>0且<100
+2. xyz中的每個batch中的任意一個點到center_xyz對應batch中的任意一個點的距離必須在1e10f以内
+3. xyz和center_xyz的shape必须是3维，当Transposed=True时，xyz和center_xyz的shape的dim的第1位必须是3；当Transposed=False时，xyz和center_xyz的shape的dim的第2位必须是3
+4. 距离相同时索引存在不稳定排序问题,遇到距离精度通过但索引精度错误时，复用不稳定排序的CCB结论
+
+## scatter_mean
 ### 接口原型
 ```python
-mx_driving.common.npu_rotated_iou(Tensor self, Tensor query_boxes, bool trans=False, int mode=0, bool is_cross=True, float v_threshold=0.0, float e_threshold=0.0) -> Tensor
+mx_driving.common.scatter_mean(Tensor src, Tensor indices, int dim=0， Tensor out=None, int dim_size=None) -> Tensor
 ```
 ### 功能描述
-计算旋转框的IoU。
+将输入张量`src`中的元素按照`indices`中的索引在指定的`dim`维进行分组，并计算每组的平均值，返回平均值。
 ### 参数说明
-- `self(Tensor)`：边界框张量，数据类型为`float32, float16`，形状为`[B, N, 5]`。
-- `query_boxes(Tensor)`：查询框张量，数据类型为`float32, float16`，形状为`[B, M, 5]`。
-- `trans(bool)`：是否进行坐标变换。默认值为`False`。值为`True`时，表示`xyxyt`, 值为`False`时，表示`xywht`，其中`t`为角度制。
-- `is_cross(bool)`：值为`True`时，则对两组边界框中每个边界框之间进行计算。值为`False`时，只对对齐的边界框之间进行计算。
-- `mode(int)`：计算IoU的模式。默认值为`0`。值为`0`时，表示计算`IoU`，值为`1`时，表示计算`IoF`。
-- `v_threshold(float)`：顶点判断的容忍阈值。
-- `e_threshold(float)`：边相交判断的容忍阈值。
+- `src`：源张量，数据类型为`float32`。
+- `indices`：索引张量，数据类型为`int32`，且
+  - `indices`的维度必须小于等于`src`的维度，
+  - `indices`每一维的长度均必须与`src`长度相同。
+  - `indices`的取值必须为非负的有效索引值，参数`out`或`data_size`不为`None`时，`indices`的取值应该为输出张量在`dim`维的有效索引值。
+- `out`：被更新张量，数据类型为`float32`，可选入参，默认为`None`，输入`out`不为`None`时，`out`中的元素参与平均值的计算，且
+  - `out`的维度必须与`src`的维度相同。
+  - `out`除第`dim`维外其余维的长度必须与`src`相同。
+- `dim`：指定的维度，表示按照哪个维度进行分组平均计算，数据类型为`int32`，可选入参，默认取值为`0`，`dim`取值不超过`indices`的维度。
+- `dim_size`：输出张量在`dim`维的长度，数据类型为`int32`，可选入参，默认为`None`，`dim_size`的取值必须为非负的有效长度值，该参数仅在输入`out`为`None`时生效。
 ### 返回值
-- `Tensor`：IoU张量，数据类型为`float32, float16`，`is_cross`为`True`时形状为`[B, N, M]，反之则为`[B, N]`。
+- `out`：求平均后的张量，数据类型为`float32`。
 ### 支持的型号
 - Atlas A2 训练系列产品
 ### 调用示例
+
 ```python
 import torch, torch_npu
-import numpy as np
-from mx_driving.common import npu_rotated_iou
-a = np.random.uniform(0, 1, (2, 2, 5)).astype(np.float16)
-b = np.random.uniform(0, 1, (2, 3, 5)).astype(np.float16)
-box1 = torch.from_numpy(a).npu()
-box2 = torch.from_numpy(b).npu()
-iou = npu_rotated_iou(box1, box2, False, 0, True, 1e-5, 1e-5)
-print(iou)
+from mx_driving.common import scatter_mean
+src = torch.randn(4, 5, 6).to(torch.float)
+indices = torch.randint(5, (4, 5)).to(torch.int32)
+dim = 0
+out = scatter_mean(src.npu(), indices.npu(), None, dim)
+print(out)
 ```
-```text
-tensor([[[3.3325e-01, 1.0162e-01],
-         [1.0162e-01, 1.0000e+00]],
+### 其他说明
+- 该算子对尾块较大的场景较为亲和，对尾块很小的场景不亲和，其中，尾块表示`src`后`N`维的大小，`N = src.dim() - indices.dim()`。
 
-        [[0.0000e+00, 0.0000e+00],
-         [0.0000e+00, 5.9605e-08]]], dtype=torch.float16)
+## three_interpolate
+### 接口原型
+```python
+mx_driving.common.three_interpolate(features: torch.Tensor, indices: torch.Tensor,
+                weight: torch.Tensor) -> torch.Tensor:
 ```
-## npu_dynamic_scatter
+### 功能描述
+对三维数据进行加权最近邻线性插值处理
+### 参数说明
+- `features`：需要被插值的特征，数据类型为`float32|float16`，维度为（B, C, M）。
+- `indices`：获取目标特征计算的索引，数据类型为`int32`，维度为（B, N, 3），
+  - `indices`的元素值需小于`features`的第三维度，即值在[0, M)。
+- `weight`：获取目标特征计算的权重，数据类型为`float32|float16`，维度为（B, N, 3）。
+  - `weight`数据类型与`features`须一致。
+- `features`，`indices`，`weights`三个参数的每个维度须小于10000。
+- `features`，`indices`，`weights`三个参数的大小请勿超过2^24。
+### 返回值
+- `output`：目标特征张量，数据类型为`float32|float16`，维度为（B, C, N）。
+### 支持的型号
+- Atlas A2 训练系列产品
+### 调用示例
+```python
+import torch
+from mx_driving.common import three_interpolate
+features = torch.tensor(
+            [[[2.4350, 4.7516, 4.4995, 2.4350, 2.4350, 2.4350],
+            [3.1236, 2.6278, 3.0447, 3.1236, 3.1236, 3.1236],
+            [2.6732, 2.8677, 2.6436, 2.6732, 2.6732, 2.6732],
+            [0.0124, 7.0150, 7.0199, 0.0124, 0.0124, 0.0124],
+            [0.3207, 0.0000, 0.3411, 0.3207, 0.3207, 0.3207]],
+            [[0.0000, 0.9544, 2.4532, 0.0000, 0.0000, 0.0000],
+            [0.5346, 1.9176, 1.4715, 0.5346, 0.5346, 0.5346],
+            [0.0000, 0.2744, 2.0842, 0.0000, 0.0000, 0.0000],
+            [0.3414, 1.5063, 1.6209, 0.3414, 0.3414, 0.3414],
+            [0.5814, 0.0103, 0.0000, 0.5814, 0.5814, 0.5814]]],
+            ).npu()
+idx = torch.tensor(
+            [[[0, 1, 2], [2, 3, 4], [2, 3, 4], [0, 1, 2], [0, 1, 2], [0, 1, 3]],
+            [[0, 2, 3], [1, 3, 4], [2, 1, 4], [0, 2, 4], [0, 2, 4], [0, 1, 2]]],
+            ).int().npu()
+weight = torch.tensor(
+            [[[3.3333e-01, 3.3333e-01, 3.3333e-01],
+              [1.0000e+00, 5.8155e-08, 2.2373e-08],
+              [1.0000e+00, 1.7737e-08, 1.7356e-08],
+              [3.3333e-01, 3.3333e-01, 3.3333e-01],
+              [3.3333e-01, 3.3333e-01, 3.3333e-01],
+              [3.3333e-01, 3.3333e-01, 3.3333e-01]],
+             [[3.3333e-01, 3.3333e-01, 3.3333e-01],
+              [1.0000e+00, 1.3651e-08, 7.7312e-09],
+              [1.0000e+00, 1.7148e-08, 1.4070e-08],
+              [3.3333e-01, 3.3333e-01, 3.3333e-01],
+              [3.3333e-01, 3.3333e-01, 3.3333e-01],
+              [3.3333e-01, 3.3333e-01, 3.3333e-01]]],
+            ).npu()
+output = three_interpolate(features, idx, weight)
+print(output)
+```
+```text
+torch.tensor(
+        [[[3.8953e+00, 4.4995e+00, 4.4995e+00, 3.8953e+00, 3.8953e+00, 3.2072e+00], 
+        [2.9320e+00, 3.0447e+00, 3.0447e+00, 2.9320e+00, 2.9320e+00, 2.9583e+00], 
+        [2.7281e+00, 2.6436e+00, 2.6436e+00, 2.7281e+00, 2.7281e+00, 2.7380e+00], 
+        [4.6824e+00, 7.0199e+00, 7.0199e+00, 4.6824e+00, 4.6824e+00, 2.3466e+00], 
+        [2.2060e-01, 3.4110e-01, 3.4110e-01, 2.2060e-01, 2.2060e-01, 2.1380e-01]],
+        [[8.1773e-01, 9.5440e-01, 2.4532e+00,8.1773e-01, 8.1773e-01, 1.1359e+00],
+        [8.4689e-01, 1.9176e+00, 1.4715e+00, 8.4689e-01, 8.4689e-01, 1.3079e+00],
+        [6.9473e-01, 2.7440e-01, 2.0842e+00, 6.9473e-01, 6.9473e-01, 7.8619e-01],
+        [7.6789e-01, 1.5063e+00, 1.6209e+00, 7.6789e-01, 7.6789e-01, 1.1562e+00],
+        [3.8760e-01, 1.0300e-02, 8.3569e-09, 3.8760e-01, 3.8760e-01, 1.9723e-01]]],
+        device='npu:0'
+        )
+```
+
+## three_nn
 ### 接口原型
 ```python
-mx_driving.common.npu_dynamic_scatter(Tensor feats, Tensor coors, str reduce_type = 'max') -> Tuple[torch.Tensor, torch.Tensor]
+mx_driving.common.three_nn(Tensor target, Tensor source) -> (Tensor dist, Tensor idx)
 ```
 ### 功能描述
-将点云特征点在对应体素中进行特征压缩。
+對target中的每個點找到source中對應batch中的距離最近的3個點，并且返回此3個點的距離和索引值。
 ### 参数说明
-- `feats(Tensor)`：点云特征张量[N, C]，仅支持两维，数据类型为`float32`，特征向量`C`长度上限为2048。
-- `coors(Tensor)`：体素坐标映射张量[N, 3]，仅支持两维，数据类型为`int32`，此处以x, y, z指代体素三维坐标，其取值范围为`0 <= x, y <= 2048`,  `0 <= z <= 256`。
-- `reduce_type(str)`：压缩类型。可选值为`'max'`, `'mean'`, `'sum'`。默认值为`'max'`
+- `target(Tensor)`：点数据，表示(x, y, z)三維坐標，数据类型为`float32/float16`。shape为`[B, npoint, 3]`。其中`B`为batch size，`npoint`为点的数量。
+- `source(Tensor)`：点数据，表示(x, y, z)三維坐標，数据类型为`float32/float16`。shape为`[B, N, 3]`。其中`B`为batch size，`N`为点的数量。
 ### 返回值
-- `voxel_feats(Tensor)`：压缩后的体素特征张量，仅支持两维，数据类型为`float32`。
-- `voxel_coors(Tensor)`：去重后的体素坐标，仅支持两维，数据类型为`int32`。
+- `dist(Tensor)`：采样后的索引数据，数据类型为`float32/float16`。shape为`[B, npoint, 3]`。
+- `idx(Tensor)`：采样后的索引数据，数据类型为`int32/int32`。shape为`[B, npoint, 3]`。
 ### 支持的型号
 - Atlas A2 训练系列产品
 ### 调用示例
 ```python
 import torch, torch_npu
-from mx_driving.common import npu_dynamic_scatter
+from mx_driving.common import three_nn
+source = torch.tensor([[[1, 1, 1], [1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1], [1, 1, 1]]], dtype=torch.float32).npu()
+target = torch.tensor([[1, 2, 3]], [[1, 2, 3]], dtype=torch.float32).npu()
+dist, idx = three_nn(target, source)
+print(dist)
+print(idx)
+```
+```text
+tensor([[2.236, 2.236, 2.236], [2.236, 2.236, 2.236]], dtype=torch.float32)
+tensor([[0, 1, 2], [0, 1, 2]], dtype=torch.int32)
+```
+### 算子約束
+1. source和target的shape必须是3维，且source和target的shape的dim的第2位必须是3
+2. 距离相同时索引存在不稳定排序问题，遇到距离精度通过但索引精度错误时，复用不稳定排序的CCB结论
 
-feats = torch.tensor([[1, 2, 3], [3, 2, 1], [7, 8, 9], [9, 8, 7]], dtype=torch.float32).npu()
-coors = torch.tensor([[1, 1, 1], [1, 1, 1], [2, 2, 2], [2, 2, 2]], dtype=torch.int32).npu()
-voxel_feats, voxel_coors = npu_dynamic_scatter(feats, coors, 'max')
 
-print(voxel_feats)
-print(voxel_coors)
-```
+# 数据预处理算子
 ## npu_points_in_box
 ### 接口原型
 ```python
-mx_driving.common.npu_points_in_box(Tensor boxes, Tensor points) -> Tensor
+mx_driving.data.npu_points_in_box(Tensor boxes, Tensor points) -> Tensor
 ```
 ### 功能描述
 判断点是否在框内。
@@ -162,7 +242,7 @@ mx_driving.common.npu_points_in_box(Tensor boxes, Tensor points) -> Tensor
 ### 调用示例
 ```python
 import torch, torch_npu
-from mx_driving.common import npu_points_in_box
+from mx_driving.data import npu_points_in_box
 boxes = torch.tensor([[[1, 2, 3, 4, 5, 6, 7], [3, 4, 5, 6, 7, 8, 9]]], dtype=torch.float32).npu()
 points = torch.tensor([[[1, 2, 3], [3, 4, 5]]], dtype=torch.float32).npu()
 out = npu_points_in_box(boxes, points)
@@ -174,7 +254,7 @@ tensor([[0, 1]], dtype=torch.int32)
 ## npu_points_in_box_all
 ### 接口原型
 ```python
-mx_driving.common.npu_points_in_box_all(Tensor boxes, Tensor points) -> Tensor
+mx_driving.data.npu_points_in_box_all(Tensor boxes, Tensor points) -> Tensor
 ```
 ### 功能描述
 判断点是否在框内。
@@ -190,7 +270,7 @@ mx_driving.common.npu_points_in_box_all(Tensor boxes, Tensor points) -> Tensor
 ### 调用示例
 ```python
 import torch, torch_npu
-from mx_driving.common import npu_points_in_box
+from mx_driving.data import npu_points_in_box
 boxes = torch.tensor([[[1, 2, 3, 4, 5, 6, 7], [3, 4, 5, 6, 7, 8, 9]]], dtype=torch.float32).npu()
 points = torch.tensor([[[1, 2, 5], [3, 4, 8]]], dtype=torch.float32).npu()
 out = npu_points_in_box(boxes, points)
@@ -200,82 +280,80 @@ print(out)
 tensor([[[1, 0],
          [0, 1]]], dtype=torch.int32)
 ```
-## npu_multi_scale_deformable_attn_function
+## RoipointPool3d
 ### 接口原型
 ```python
-mx_driving.common.npu_multi_scale_deformable_attn_function(Tensor value, Tensor shape, Tensor offset, Tensor locations, Tensor weight) -> Tensor
+mx_driving.data.RoipointPool3d(int num_sampled_points, Tensor points, Tensor point_features, Tensor boxes3d) -> (Tensor pooled_features, Tensor pooled_empty_flag)
 ```
 ### 功能描述
-多尺度可变形注意力机制, 将多个视角的特征图进行融合。
+对每个3D方案的几何特定特征进行编码。
 ### 参数说明
-- `value(Tensor)`：特征张量，数据类型为`float32, float16`。shape为`[bs, num_keys, num_heads, embed_dims]`。其中`bs`为batch size，`num_keys`为特征图的大小，`num_heads`为头的数量，`embed_dims`为特征图的维度，需要为8的倍数。
-- `shape(Tensor)`：特征图的形状，数据类型为`int32, int64`。shape为`[num_levels, 2]`。其中`num_levels`为特征图的数量，`2`分别代表`H, W`。
-- `offset(Tensor)`：偏移量张量，数据类型为`int32, int64`。shape为`[num_levels]`。
-- `locations(Tensor)`：位置张量，数据类型为`float32, float16`。shape为`[bs, num_queries, num_heads, num_levels, num_points, 2]`。其中`bs`为batch size，`num_queries`为查询的数量，`num_heads`为头的数量，`num_levels`为特征图的数量，`num_points`为采样点的数量，`2`分别代表`y, x`。
-- `weight(Tensor)`：权重张量，数据类型为`float32, float16`。shape为`[bs, num_queries, num_heads, num_levels, num_points]`。其中`bs`为batch size，`num_queries`为查询的数量，`num_heads`为头的数量，`num_levels`为特征图的数量，`num_points`为采样点的数量。
+- `num_sampled_points(int)`：特征点的数量，正整数。
+- `points(Tensor)`：点张量，数据类型为`float32, float16`。shape 为`[B, N, 3]`。`3`分别代表`x, y, z`。
+- `point_features(Tensor)`：点特征张量，数据类型为`float32, float16`。shape 为`[B, N, C]`。`C`分别代表`x, y, z`。
+- `boxes3d(Tensor)`：框张量，数据类型为`float32, float16`。shape 为`[B, M, 7]`。`7`分别代表`x, y, z, x_size, y_size, z_size, rz`。
 ### 返回值
-- `output(Tensor)`：融合后的特征张量，数据类型为`float32, float16`。shape为`[bs, num_queries, num_heads*embed_dims]`。
+- `pooled_features(Tensor)`：点在框内的特征张量，数据类型为`float32, float16`。shape 为`[B, M, num, 3+C]`。
+- `pooled_empty_flag(Tensor)`：所有点不在框内的空标记张量，数据类型为`int32`。shape 为`[B, M]`。
+### 约束说明
+- `points`、`point_features`和`boxes3d`的数据类型必须相同，以及`B`也必须相同。
+- `num_sampled_points`必须小于等于`N`。
+- 数据类型为`float32`时，建议`B`小于100、`N`小于等于2640、`M`小于等于48、`num_sampled_points`小于等于48，个别shape值略微超过建议值无影响，但所有shape值均大于建议值时，算子执行会发生错误。
+- 数据类型为`float16`时，建议`B`小于100、`N`小于等于3360、`M`小于等于60、`num_sampled_points`小于等于60，个别shape值略微超过建议值无影响，但所有shape值均大于建议值时，算子执行会发生错误。
+- `N`/`M`的值越大，性能劣化越严重，建议`N`小于`M`的六百倍，否则性能可能会低于0.1x A100。
 ### 支持的型号
 - Atlas A2 训练系列产品
-### 约束说明
-- `locations`的值在`[0, 1]`之间。
 ### 调用示例
 ```python
 import torch, torch_npu
-from mx_driving.common import npu_multi_scale_deformable_attn_function
-bs, num_levels, num_heads, num_points, num_queries, embed_dims = 1, 1, 4, 8, 16, 32
-
-shapes = torch.as_tensor([(100, 100)], dtype=torch.long)
-num_keys = sum((H * W).item() for H, W in shapes)
-
-value = torch.rand(bs, num_keys, num_heads, embed_dims) * 0.01
-sampling_locations = torch.ones(bs, num_queries, num_heads, num_levels, num_points, 2) * 0.005
-attention_weights = torch.rand(bs, num_queries, num_heads, num_levels, num_points) + 1e-5
-level_start_index = torch.cat((shapes.new_zeros((1, )), shapes.prod(1).cumsum(0)[:-1]))
-
-out = npu_multi_scale_deformable_attn_function(value.npu(), shapes.npu(), level_start_index.npu(), sampling_locations.npu(), attention_weights.npu())
-print(out)
+from mx_driving.data import RoIPointPool3d
+num_sampled_points = 1
+points = torch.tensor([[[1, 2, 3]]], dtype=torch.float).npu()
+point_features = points.clone()
+boxes3d = torch.tensor([[[1, 2, 3, 4, 5, 6, 1]]], dtype=torch.float).npu()
+roipoint_pool3d = RoIPointPool3d(num_sampled_points)
+pooled_features, pooled_empty_flag = roipoint_pool3d(points, point_features, boxes3d)
+print(pooled_features)
+print(pooled_empty_flag)
 ```
 ```text
-tensor([[[9.3002, 11.1603, 0.0000, 0.0000]]], dtype=torch.float32)
+tensor([[[[1., 2., 3., 1., 2., 3.]]]])
+tensor([[0]], dtype=torch.int32)
 ```
-## voxelization
+
+# 目标检测算子
+## boxes_overlap_bev
 ### 接口原型
 ```python
-mx_driving.common.voxelization(Tensor points, List[float] voxel_size, List[float] coors_range, int max_points=-1, int max_voxels=-1, bool deterministic=True) -> Tensor
+mx_driving.detection.boxes_overlap_bev(Tensor boxes_a, Tensor boxes_b) -> Tensor
 ```
 ### 功能描述
-将点云数据进行体素化。
+Calculates the intersection of bounding boxes in Bird's Eye View.
 ### 参数说明
-- `points(Tensor)`：点云数据，数据类型为`float32`。shape为`[N, F]`。其中`N`为点的数量，`F`分别代表每个点的特征维度，其中`N > 0, F >= 3`。
-- `voxel_size(List[float])`：体素大小，数据类型为`float32`。shape为`[3]`。其中`3`分别代表`x, y, z`。
-- `coors_range(List[float])`：体素范围，数据类型为`float32`。shape为`[6]`。其中`6`分别代表`x_min, y_min, z_min, x_max, y_max, z_max`。
-- `max_points(int)`：每个体素的最大点数。默认值为`-1`。
-- `max_voxels(int)`：最大体素数。默认值为`-1`。
-- `deterministic(bool)`：是否确定性。默认值为`True`。
+- `boxes_a (Tensor)`：第一组bounding boxes，数据类型为`float32`。shape为`[M, 5]`。其中`5`分别代表`x1, y1, x2, y2, angle`, `x1, y1, x2, y2`代表box四个顶点的横纵坐标，`angle`代表box的弧度制旋转角。
+- `boxes_b (Tensor)`：第二组bounding boxes，数据类型为`float32`。shape为`[N, 5]`。其中`5`分别代表`x1, y1, x2, y2, angle`, `x1, y1, x2, y2`代表box四个顶点的横纵坐标，`angle`代表box的弧度制旋转角。
 ### 返回值
-- `coors(Tensor)`：每个点所属的体素坐标，数据类型为`int32`。shape为`[N, 3]`。
+- `area_overlap(Tensor)`：包含两组bounding boxes交叠面积的张量，数据类型为`float32`。shape为`[M, N]`。
+### 约束说明
+
 ### 支持的型号
 - Atlas A2 训练系列产品
 ### 调用示例
 ```python
 import torch, torch_npu
-from mx_driving.common import Voxelization
-points = torch.randint(-20, 100, [16, 3], dtype=torch.float32).npu()
-coors_range = [0, -40, -3, 70.4, 40, 1]
-max_points = -1
-voxel_size = [0.5, 0.5, 0.5]
-dynamic_voxelization = Voxelization(voxel_size, coors_range, max_points)
-out = dynamic_voxelization.forward(points)
-print(out)
+from mx_driving.detection import boxes_overlap_bev
+boxes_a = torch.tensor([[0, 0, 2, 2, 0]], dtype=torch.float32).npu()
+boxes_b = torch.tensor([[1, 1, 3, 3, 0]], dtype=torch.float32).npu()
+area_overlap = boxes_overlap_bev(boxes_a, boxes_b)
+print(area_overlap)
 ```
-## npu_nms3d_normal
+## npu_nms3d
 ### 接口原型
 ```python
-mx_driving.common.npu_nms3d_normal(Tensor boxes, Tensor scores, float: iou_threshold) -> Tensor
+mx_driving.detection.npu_nms3d(Tensor boxes, Tensor scores, float: iou_threshold) -> Tensor
 ```
 ### 功能描述
-3D非极大值抑制。
+3D非极大值抑制，在bev视角下剔除多个3d box交并比大于阈值的box。
 ### 参数说明
 - `boxes(Tensor)`：框张量，数据类型为`float32, float16`。shape 为`[N, 7]`。`7`分别代表`x, y, z, x_size, y_size, z_size, rz`。
 - `scores(Tensor)`：评分张量，数据类型为`float32, float16`。shape 为`[N]`。
@@ -287,22 +365,23 @@ mx_driving.common.npu_nms3d_normal(Tensor boxes, Tensor scores, float: iou_thres
 ### 调用示例
 ```python
 import torch, torch_npu
-from mx_driving.common import npu_nms3d_normal
+from mx_driving.detection import npu_nms3d
 boxes = torch.tensor([[1, 2, 3, 4, 5, 6, 7], [3, 4, 5, 6, 7, 8, 9]], dtype=torch.float32).npu()
 scores = torch.tensor([1, 2], dtype=torch.float32).npu()
-out = npu_nms3d_normal(boxes, scores, 0.5)
+out = npu_nms3d(boxes, scores, 0.5)
 print(out)
 ```
 ```text
-tensor([[1, 0]], dtype=torch.int32)
+tensor([[1]], dtype=torch.int32)
 ```
-## npu_nms3d
+
+## npu_nms3d_normal
 ### 接口原型
 ```python
-mx_driving.common.npu_nms3d(Tensor boxes, Tensor scores, float: iou_threshold) -> Tensor
+mx_driving.detection.npu_nms3d_normal(Tensor boxes, Tensor scores, float: iou_threshold) -> Tensor
 ```
 ### 功能描述
-3D非极大值抑制，在bev视角下剔除多个3d box交并比大于阈值的box。
+3D非极大值抑制。
 ### 参数说明
 - `boxes(Tensor)`：框张量，数据类型为`float32, float16`。shape 为`[N, 7]`。`7`分别代表`x, y, z, x_size, y_size, z_size, rz`。
 - `scores(Tensor)`：评分张量，数据类型为`float32, float16`。shape 为`[N]`。
@@ -314,241 +393,157 @@ mx_driving.common.npu_nms3d(Tensor boxes, Tensor scores, float: iou_threshold) -
 ### 调用示例
 ```python
 import torch, torch_npu
-from mx_driving.common import npu_nms3d
+from mx_driving.detection import npu_nms3d_normal
 boxes = torch.tensor([[1, 2, 3, 4, 5, 6, 7], [3, 4, 5, 6, 7, 8, 9]], dtype=torch.float32).npu()
 scores = torch.tensor([1, 2], dtype=torch.float32).npu()
-out = npu_nms3d(boxes, scores, 0.5)
-print(out)
-```
-```text
-tensor([[1]], dtype=torch.int32)
-```
-## npu_furthest_point_sampling
-### 接口原型
-```python
-mx_driving.common.npu_furthest_point_sampling(Tensor points, int num_points) -> Tensor
-```
-### 功能描述
-点云数据的最远点采样。
-### 参数说明
-- `points(Tensor)`：点云数据，数据类型为`float32`。shape为`[B, N, 3]`。其中`B`为batch size，`N`为点的数量，`3`分别代表`x, y, z`。
-- `num_points(int)`：采样点的数量。
-### 返回值
-- `Tensor`：采样后的点云数据，数据类型为`float32`。shape为`[B, num_points]`。
-### 支持的型号
-- Atlas A2 训练系列产品
-### 调用示例
-```python
-import torch, torch_npu
-from mx_driving.common import npu_furthest_point_sampling
-points = torch.tensor([[[1, 2, 3], [4, 5, 6], [7, 8, 9]]], dtype=torch.float32).npu()
-out = npu_furthest_point_sampling(points, 2)
+out = npu_nms3d_normal(boxes, scores, 0.5)
 print(out)
 ```
 ```text
-tensor([[0, 2]], dtype=torch.int32)
+tensor([[1, 0]], dtype=torch.int32)
 ```
-### 算子約束
-1. points输入shape[B, N, 3]的总大小(B x N x 3)不应该超过383166
-## furthest_point_sample_with_dist
+## npu_rotated_iou
 ### 接口原型
 ```python
-mx_driving.common.furthest_point_sample_with_dist(Tensor points, int num_points) -> (Tensor, Tensor)
+mx_driving.detection.npu_rotated_iou(Tensor self, Tensor query_boxes, bool trans=False, int mode=0, bool is_cross=True, float v_threshold=0.0, float e_threshold=0.0) -> Tensor
 ```
 ### 功能描述
-与`npu_furthest_point_sampling`功能相同，但输入略有不同。
-### 参数说明
-- `points(Tensor)`：点云数据，表示各点间的距离，数据类型为`float32`。shape为`[B, N, N]`。其中`B`为batch size，`N`为点的数量。
-- `num_points(int)`：采样点的数量。
+计算旋转框的IoU。
+### 参数说明
+- `self(Tensor)`：边界框张量，数据类型为`float32, float16`，形状为`[B, N, 5]`。
+- `query_boxes(Tensor)`：查询框张量，数据类型为`float32, float16`，形状为`[B, M, 5]`。
+- `trans(bool)`：是否进行坐标变换。默认值为`False`。值为`True`时，表示`xyxyt`, 值为`False`时，表示`xywht`，其中`t`为角度制。
+- `is_cross(bool)`：值为`True`时，则对两组边界框中每个边界框之间进行计算。值为`False`时，只对对齐的边界框之间进行计算。
+- `mode(int)`：计算IoU的模式。默认值为`0`。值为`0`时，表示计算`IoU`，值为`1`时，表示计算`IoF`。
+- `v_threshold(float)`：顶点判断的容忍阈值。
+- `e_threshold(float)`：边相交判断的容忍阈值。
 ### 返回值
-- `Tensor`：采样后的点云数据，数据类型为`float32`。shape为`[B, num_points]`。
+- `Tensor`：IoU张量，数据类型为`float32, float16`，`is_cross`为`True`时形状为`[B, N, M]，反之则为`[B, N]`。
 ### 支持的型号
 - Atlas A2 训练系列产品
 ### 调用示例
 ```python
 import torch, torch_npu
-from mx_driving.common import furthest_point_sample_with_dist
-points = torch.tensor([[[1, 2, 3], [4, 5, 6], [7, 8, 9]]], dtype=torch.float32).npu()
-out = furthest_point_sample_with_dist(points, 2)
-print(out)
+import numpy as np
+from mx_driving.detection import npu_rotated_iou
+a = np.random.uniform(0, 1, (2, 2, 5)).astype(np.float16)
+b = np.random.uniform(0, 1, (2, 3, 5)).astype(np.float16)
+box1 = torch.from_numpy(a).npu()
+box2 = torch.from_numpy(b).npu()
+iou = npu_rotated_iou(box1, box2, False, 0, True, 1e-5, 1e-5)
+print(iou)
 ```
 ```text
-tensor([[0, 2]], dtype=torch.int32)
-```
+tensor([[[3.3325e-01, 1.0162e-01],
+         [1.0162e-01, 1.0000e+00]],
 
-## three_interpolate
+        [[0.0000e+00, 0.0000e+00],
+         [0.0000e+00, 5.9605e-08]]], dtype=torch.float16)
+```
+## npu_rotated_overlaps
 ### 接口原型
 ```python
-mx_driving.common.three_interpolate(features: torch.Tensor, indices: torch.Tensor,
-                weight: torch.Tensor) -> torch.Tensor:
+mx_driving.detection.npu_rotated_overlaps(Tensor self, Tensor query_boxes, bool trans=False) -> Tensor
 ```
 ### 功能描述
-对三维数据进行加权最近邻线性插值处理
+计算旋转框的重叠面积。
 ### 参数说明
-- `features`：需要被插值的特征，数据类型为`float32|float16`，维度为（B, C, M）。
-- `indices`：获取目标特征计算的索引，数据类型为`int32`，维度为（B, N, 3），
-  - `indices`的元素值需小于`features`的第三维度，即值在[0, M)。
-- `weight`：获取目标特征计算的权重，数据类型为`float32|float16`，维度为（B, N, 3）。
-  - `weight`数据类型与`features`须一致。
-- `features`，`indices`，`weights`三个参数的每个维度须小于10000。
-- `features`，`indices`，`weights`三个参数的大小请勿超过2^24。
+- `self(Tensor)`：边界框张量，数据类型为`float32, float16`，形状为`[B, N, 5]`。
+- `query_boxes(Tensor)`：查询框张量，数据类型为`float32, float16`，形状为`[B, M, 5]`。
+- `trans(bool)`：是否进行坐标变换。默认值为`False`。值为`True`时，表示`xyxyt`, 值为`False`时，表示`xywht`。
 ### 返回值
-- `output`：目标特征张量，数据类型为`float32|float16`，维度为（B, C, N）。
+- `Tensor`：重叠面积张量，数据类型为`float32, float16`，形状为`[B, N, M]`。
 ### 支持的型号
 - Atlas A2 训练系列产品
 ### 调用示例
 ```python
-import torch
-from mx_driving.common import three_interpolate
-features = torch.tensor(
-            [[[2.4350, 4.7516, 4.4995, 2.4350, 2.4350, 2.4350],
-            [3.1236, 2.6278, 3.0447, 3.1236, 3.1236, 3.1236],
-            [2.6732, 2.8677, 2.6436, 2.6732, 2.6732, 2.6732],
-            [0.0124, 7.0150, 7.0199, 0.0124, 0.0124, 0.0124],
-            [0.3207, 0.0000, 0.3411, 0.3207, 0.3207, 0.3207]],
-            [[0.0000, 0.9544, 2.4532, 0.0000, 0.0000, 0.0000],
-            [0.5346, 1.9176, 1.4715, 0.5346, 0.5346, 0.5346],
-            [0.0000, 0.2744, 2.0842, 0.0000, 0.0000, 0.0000],
-            [0.3414, 1.5063, 1.6209, 0.3414, 0.3414, 0.3414],
-            [0.5814, 0.0103, 0.0000, 0.5814, 0.5814, 0.5814]]],
-            ).npu()
-idx = torch.tensor(
-            [[[0, 1, 2], [2, 3, 4], [2, 3, 4], [0, 1, 2], [0, 1, 2], [0, 1, 3]],
-            [[0, 2, 3], [1, 3, 4], [2, 1, 4], [0, 2, 4], [0, 2, 4], [0, 1, 2]]],
-            ).int().npu()
-weight = torch.tensor(
-            [[[3.3333e-01, 3.3333e-01, 3.3333e-01],
-              [1.0000e+00, 5.8155e-08, 2.2373e-08],
-              [1.0000e+00, 1.7737e-08, 1.7356e-08],
-              [3.3333e-01, 3.3333e-01, 3.3333e-01],
-              [3.3333e-01, 3.3333e-01, 3.3333e-01],
-              [3.3333e-01, 3.3333e-01, 3.3333e-01]],
-             [[3.3333e-01, 3.3333e-01, 3.3333e-01],
-              [1.0000e+00, 1.3651e-08, 7.7312e-09],
-              [1.0000e+00, 1.7148e-08, 1.4070e-08],
-              [3.3333e-01, 3.3333e-01, 3.3333e-01],
-              [3.3333e-01, 3.3333e-01, 3.3333e-01],
-              [3.3333e-01, 3.3333e-01, 3.3333e-01]]],
-            ).npu()
-output = three_interpolate(features, idx, weight)
+import torch, torch_npu
+import numpy as np
+from mx_driving.detection import npu_rotated_overlaps
+a = np.random.uniform(0, 1, (1, 3, 5)).astype(np.float16)
+b = np.random.uniform(0, 1, (1, 2, 5)).astype(np.float16)
+box1 = torch.from_numpy(a).npu()
+box2 = torch.from_numpy(b).npu()
+output = npu_rotated_overlaps(box1, box2, True)
 print(output)
 ```
 ```text
-torch.tensor(
-        [[[3.8953e+00, 4.4995e+00, 4.4995e+00, 3.8953e+00, 3.8953e+00, 3.2072e+00], 
-        [2.9320e+00, 3.0447e+00, 3.0447e+00, 2.9320e+00, 2.9320e+00, 2.9583e+00], 
-        [2.7281e+00, 2.6436e+00, 2.6436e+00, 2.7281e+00, 2.7281e+00, 2.7380e+00], 
-        [4.6824e+00, 7.0199e+00, 7.0199e+00, 4.6824e+00, 4.6824e+00, 2.3466e+00], 
-        [2.2060e-01, 3.4110e-01, 3.4110e-01, 2.2060e-01, 2.2060e-01, 2.1380e-01]],
-        [[8.1773e-01, 9.5440e-01, 2.4532e+00,8.1773e-01, 8.1773e-01, 1.1359e+00],
-        [8.4689e-01, 1.9176e+00, 1.4715e+00, 8.4689e-01, 8.4689e-01, 1.3079e+00],
-        [6.9473e-01, 2.7440e-01, 2.0842e+00, 6.9473e-01, 6.9473e-01, 7.8619e-01],
-        [7.6789e-01, 1.5063e+00, 1.6209e+00, 7.6789e-01, 7.6789e-01, 1.1562e+00],
-        [3.8760e-01, 1.0300e-02, 8.3569e-09, 3.8760e-01, 3.8760e-01, 1.9723e-01]]],
-        device='npu:0'
-        )
+tensor([[[0.0000, 0.1562, 0.0000],
+         [0.1562, 0.3713, 0.0611],
+         [0.0000, 0.0611, 0.0000]]], dtype=torch.float16)
 ```
-
-## voxel_pooling_train
+# 融合算子
+## npu_multi_scale_deformable_attn_function
 ### 接口原型
 ```python
-mx_driving.common.npu_voxel_pooling_train(Tensor geom_xyz, Tensor input_features, List[int] voxel_num) -> Tensor
+mx_driving.fused.npu_multi_scale_deformable_attn_function(Tensor value, Tensor shape, Tensor offset, Tensor locations, Tensor weight) -> Tensor
 ```
 ### 功能描述
-点云数据体素化。
+多尺度可变形注意力机制, 将多个视角的特征图进行融合。
 ### 参数说明
-- `geom_xyz`：体素坐标，数据类型为`int32`，维度为（B, N, 3）, 3表示x, y, z。
-- `input_features`：点云数据，数据类型为`float32|float16`，维度为（B, N, C）。
-- `voxel_num`：体素格子长宽高，数据类型为`int32`，维度为（3），3表示体素格子的长宽高。
+- `value(Tensor)`：特征张量，数据类型为`float32, float16`。shape为`[bs, num_keys, num_heads, embed_dims]`。其中`bs`为batch size，`num_keys`为特征图的大小，`num_heads`为头的数量，`embed_dims`为特征图的维度，需要为8的倍数。
+- `shape(Tensor)`：特征图的形状，数据类型为`int32, int64`。shape为`[num_levels, 2]`。其中`num_levels`为特征图的数量，`2`分别代表`H, W`。
+- `offset(Tensor)`：偏移量张量，数据类型为`int32, int64`。shape为`[num_levels]`。
+- `locations(Tensor)`：位置张量，数据类型为`float32, float16`。shape为`[bs, num_queries, num_heads, num_levels, num_points, 2]`。其中`bs`为batch size，`num_queries`为查询的数量，`num_heads`为头的数量，`num_levels`为特征图的数量，`num_points`为采样点的数量，`2`分别代表`y, x`。
+- `weight(Tensor)`：权重张量，数据类型为`float32, float16`。shape为`[bs, num_queries, num_heads, num_levels, num_points]`。其中`bs`为batch size，`num_queries`为查询的数量，`num_heads`为头的数量，`num_levels`为特征图的数量，`num_points`为采样点的数量。
 ### 返回值
-- `output(Tensor)`：输出结果，数据类型为`float32|float16`。shape为`[B, num_voxel_y, num_voxel_x, C]`。
-### 约束说明
-- B <= 128
-- N <= 100000
-- C <= 256
-- num_voxel_x <= 1000
-- num_voxel_y <= 1000
-- num_voxel_z <= 10
-- B * num_voxel_y * num_voxel_x * C <= 100000000
-- B * N * C <= 100000000
-- 反向具有相同约束。
+- `output(Tensor)`：融合后的特征张量，数据类型为`float32, float16`。shape为`[bs, num_queries, num_heads*embed_dims]`。
 ### 支持的型号
 - Atlas A2 训练系列产品
+### 约束说明
+- `locations`的值在`[0, 1]`之间。
 ### 调用示例
 ```python
-import torch
-import torch_npu
-import mx_driving.common
-
-def gen_data(geom_shape, feature_shape, coeff, batch_size, num_channels, dtype):
-       geom_xyz = torch.rand(geom_shape) * coeff
-       geom_xyz = geom_xyz.reshape(batch_size, -1, 3)
-       geom_xyz[:, :, 2] /= 100
-       geom_xyz_cpu = geom_xyz.int()
-       features = torch.rand(feature_shape, dtype=dtype) - 0.5
-       features_cpu = features.reshape(batch_size, -1, num_channels)
-
-       return geom_xyz_cpu, features_cpu
-
-dtype = torch.float32
-coeff = 90
-voxel_num = [128, 128, 1]
-batch_size = 2
-num_points = 40
-num_channel = 80
-xyz = 3
-
-geom_shape = [batch_size, num_points, xyz]
-feature_shape = [batch_size, num_points, num_channel]
+import torch, torch_npu
+from mx_driving.fused import npu_multi_scale_deformable_attn_function
+bs, num_levels, num_heads, num_points, num_queries, embed_dims = 1, 1, 4, 8, 16, 32
 
-geom_cpu, feature_cpu = gen_data(geom_shape, feature_shape, coeff, batch_size, num_channel, dtype)
+shapes = torch.as_tensor([(100, 100)], dtype=torch.long)
+num_keys = sum((H * W).item() for H, W in shapes)
 
-geom_npu = geom_cpu.npu()
-feature_npu = feature_cpu.npu()
+value = torch.rand(bs, num_keys, num_heads, embed_dims) * 0.01
+sampling_locations = torch.ones(bs, num_queries, num_heads, num_levels, num_points, 2) * 0.005
+attention_weights = torch.rand(bs, num_queries, num_heads, num_levels, num_points) + 1e-5
+level_start_index = torch.cat((shapes.new_zeros((1, )), shapes.prod(1).cumsum(0)[:-1]))
 
-result_npu = mx_driving.common.npu_voxel_pooling_train(geom_npu, feature_npu, voxel_num)
+out = npu_multi_scale_deformable_attn_function(value.npu(), shapes.npu(), level_start_index.npu(), sampling_locations.npu(), attention_weights.npu())
+print(out)
+```
+```text
+tensor([[[9.3002, 11.1603, 0.0000, 0.0000]]], dtype=torch.float32)
 ```
 
-## scatter_mean
+## npu_max_pool2d
 ### 接口原型
 ```python
-mx_driving.common.scatter_mean(Tensor src, Tensor indices, int dim=0， Tensor out=None, int dim_size=None) -> Tensor
+mx_driving.fused.npu_max_pool2d(Tensor x, int kernel_size, int stride, int padding) -> Tensor
 ```
 ### 功能描述
-将输入张量`src`中的元素按照`indices`中的索引在指定的`dim`维进行分组，并计算每组的平均值，返回平均值。
+Performs max pooling on the input and outputs max values.
 ### 参数说明
-- `src`：源张量，数据类型为`float32`。
-- `indices`：索引张量，数据类型为`int32`，且
-  - `indices`的维度必须小于等于`src`的维度，
-  - `indices`每一维的长度均必须与`src`长度相同。
-  - `indices`的取值必须为非负的有效索引值，参数`out`或`data_size`不为`None`时，`indices`的取值应该为输出张量在`dim`维的有效索引值。
-- `out`：被更新张量，数据类型为`float32`，可选入参，默认为`None`，输入`out`不为`None`时，`out`中的元素参与平均值的计算，且
-  - `out`的维度必须与`src`的维度相同。
-  - `out`除第`dim`维外其余维的长度必须与`src`相同。
-- `dim`：指定的维度，表示按照哪个维度进行分组平均计算，数据类型为`int32`，可选入参，默认取值为`0`，`dim`取值不超过`indices`的维度。
-- `dim_size`：输出张量在`dim`维的长度，数据类型为`int32`，可选入参，默认为`None`，`dim_size`的取值必须为非负的有效长度值，该参数仅在输入`out`为`None`时生效。
+- `x (Tensor)`：一组待池化对象，数据类型为`float32`，format为NCHW。
 ### 返回值
-- `out`：求平均后的张量，数据类型为`float32`。
+- `y (Tensor)`：池化后的最大值，数据类型为`float32`，format为NCHW。
+### 约束说明
+kernel_size仅支持3，stride仅支持2，padding仅支持1，且输入C轴数据量要求为8的倍数，H和W需要大于100。
 ### 支持的型号
 - Atlas A2 训练系列产品
 ### 调用示例
-
 ```python
 import torch, torch_npu
-from mx_driving.common import scatter_mean
-src = torch.randn(4, 5, 6).to(torch.float)
-indices = torch.randint(5, (4, 5)).to(torch.int32)
-dim = 0
-out = scatter_mean(src.npu(), indices.npu(), None, dim)
-print(out)
+from mx_driving.fused import npu_max_pool2d
+kernel_size = 3
+stride = 2
+padding = 1
+x = torch.randn(18, 64, 464, 800).npu()
+res = npu_max_pool2d(x, kernel_size, stride, padding)
+print(res)
 ```
-### 其他说明
-- 该算子对尾块较大的场景较为亲和，对尾块很小的场景不亲和，其中，尾块表示`src`后`N`维的大小，`N = src.dim() - indices.dim()`。
 
 ## npu_deformable_aggregation
 ### 接口原型
 ```python
-mx_driving.common.npu_deformable_aggregation(Tensor feature_maps, Tensor spatial_shape, Tensor scale_start_index, Tensor sample_locations, Tensor weight) -> Tensor
+mx_driving.fused.npu_deformable_aggregation(Tensor feature_maps, Tensor spatial_shape, Tensor scale_start_index, Tensor sample_locations, Tensor weight) -> Tensor
 ```
 ### 功能描述
 可变形聚合，对于每个锚点实例，对多个关键点的多时间戳、视图、缩放特征进行稀疏采样后分层融合为实例特征，实现精确的锚点细化。
@@ -577,7 +572,7 @@ mx_driving.common.npu_deformable_aggregation(Tensor feature_maps, Tensor spatial
 ### 调用示例
 ```python
 import torch, torch_npu
-import mx_driving.common
+import mx_driving.fused
 
 bs, num_feat, c, cam, anchor, pts, scale, group = 1, 2816, 256, 1, 10, 2000, 1, 8
 
@@ -587,15 +582,15 @@ scale_start_index = torch.tensor([[[0]]])
 sampling_location = torch.rand(bs, anchor, pts, cam, 2)
 weights = torch.randn(bs, anchor, pts, cam, scale, group)
 
-out = mx_driving.common.npu_deformable_aggregation(feature_maps.npu(), spatial_shape.npu(), scale_start_index.npu(), sampling_location.npu(), weights.npu())
+out = mx_driving.fused.npu_deformable_aggregation(feature_maps.npu(), spatial_shape.npu(), scale_start_index.npu(), sampling_location.npu(), weights.npu())
 print(out)
 ```
 
-# Perception 算子
+# 点云算子
 ## bev_pool
 ### 接口原型
 ```python
-mx_driving.perception.fused.bev_pool(Tensor feat, Tensor geom_feat, int B, int D, int H, int W) -> Tensor
+mx_driving.point.bev_pool(Tensor feat, Tensor geom_feat, int B, int D, int H, int W) -> Tensor
 ```
 ### 功能描述
 BEV池化。可参考论文`BEVFusion: Multi-Task Multi-Sensor Fusion with Unified Bird's-Eye View Representation`
@@ -619,7 +614,7 @@ BEV池化。可参考论文`BEVFusion: Multi-Task Multi-Sensor Fusion with Unifi
 ### 调用示例
 ```python
 import torch, torch_npu
-from mx_driving.perception.fused import bev_pool
+from mx_driving.point import bev_pool
 feat = torch.rand(4, 256).npu()
 feat.requires_grad_()
 geom_feat = torch.tensor([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 0, 2], [0, 0, 0, 3]], dtype=torch.int32).npu()
@@ -628,11 +623,10 @@ loss = bev_pooled_feat.sum()
 loss.backward()
 print(feat.grad)
 ```
-
 ## bev_pool_v2
 ### 接口原型
 ```python
-mx_driving.perception.fused.bev_pool_v2(Tensor depth, feat, Tensor ranks_depth, Tensor ranks_feat, Tensor ranks_bev,
+mx_driving.point.bev_pool_v2(Tensor depth, feat, Tensor ranks_depth, Tensor ranks_feat, Tensor ranks_bev,
                                  List[int] bev_feat_shape, Tensor interval_starts, Tensor interval_lengths) -> Tensor
 ```
 ### 功能描述
@@ -647,41 +641,92 @@ BEV池化优化版。可参考论文`BEVDet: High-performance Multi-camera 3D Ob
 - `interval_starts(Tensor)`：间隔开始张量，数据类型为`int32`。shape为`[N_INTERVALS]`。
 - `interval_lengths(Tensor)`：间隔长度张量，数据类型为`int32`。shape为`[N_INTERVALS]`。
 ### 返回值
-- `bev_pooled_feat(Tensor)`：BEV池化后的特征张量，数据类型为`float32`。shape为`[B, D, H, W, C]`。
-### 约束说明
-- `ranks_depth`的值必须在`[0, B*B*D*H*W]`之间。
-- `ranks_feat`的值必须在`[0, B*N*H*W]`之间。
-- `ranks_bev`的值必须在`[0, B*D*H*W]`之间。
-- C <= 1024
-- B * D * H * W * C <= 2^31, B, D <= 8, H, W <= 256
-- N_RANKS <= 2^21
-- 对于反向也是同样的约束。
+- `bev_pooled_feat(Tensor)`：BEV池化后的特征张量，数据类型为`float32`。shape为`[B, D, H, W, C]`。
+### 约束说明
+- `ranks_depth`的值必须在`[0, B*B*D*H*W]`之间。
+- `ranks_feat`的值必须在`[0, B*N*H*W]`之间。
+- `ranks_bev`的值必须在`[0, B*D*H*W]`之间。
+- C <= 1024
+- B * D * H * W * C <= 2^31, B, D <= 8, H, W <= 256
+- N_RANKS <= 2^21
+- 对于反向也是同样的约束。
+### 支持的型号
+- Atlas A2 训练系列产品
+### 调用示例
+```python
+import torch, torch_npu
+from mx_driving.point import bev_pool_v2
+depth = torch.rand(2, 1, 8, 256, 256).npu()
+feat = torch.rand(2, 1, 256, 256, 64).npu()
+feat.requires_grad_()
+ranks_depth = torch.tensor([0, 1], dtype=torch.int32).npu()
+ranks_feat = torch.tensor([0, 1], dtype=torch.int32).npu()
+ranks_bev = torch.tensor([0, 1], dtype=torch.int32).npu()
+bev_feat_shape = [2, 8, 256, 256, 64]
+interval_starts = torch.tensor([0], dtype=torch.int32).npu()
+interval_lengths = torch.tensor([2], dtype=torch.int32).npu()
+bev_pooled_feat = bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev, bev_feat_shape, interval_starts, interval_lengths)
+loss = bev_pooled_feat.sum()
+loss.backward()
+print(loss)
+print(feat.grad)
+```
+## furthest_point_sample_with_dist
+### 接口原型
+```python
+mx_driving.point.furthest_point_sample_with_dist(Tensor points, int num_points) -> (Tensor, Tensor)
+```
+### 功能描述
+与`npu_furthest_point_sampling`功能相同，但输入略有不同。
+### 参数说明
+- `points(Tensor)`：点云数据，表示各点间的距离，数据类型为`float32`。shape为`[B, N, N]`。其中`B`为batch size，`N`为点的数量。
+- `num_points(int)`：采样点的数量。
+### 返回值
+- `Tensor`：采样后的点云数据，数据类型为`float32`。shape为`[B, num_points]`。
+### 支持的型号
+- Atlas A2 训练系列产品
+### 调用示例
+```python
+import torch, torch_npu
+from mx_driving.point import furthest_point_sample_with_dist
+points = torch.tensor([[[1, 2, 3], [4, 5, 6], [7, 8, 9]]], dtype=torch.float32).npu()
+out = furthest_point_sample_with_dist(points, 2)
+print(out)
+```
+```text
+tensor([[0, 2]], dtype=torch.int32)
+```
+## npu_furthest_point_sampling
+### 接口原型
+```python
+mx_driving.point.npu_furthest_point_sampling(Tensor points, int num_points) -> Tensor
+```
+### 功能描述
+点云数据的最远点采样。
+### 参数说明
+- `points(Tensor)`：点云数据，数据类型为`float32`。shape为`[B, N, 3]`。其中`B`为batch size，`N`为点的数量，`3`分别代表`x, y, z`。
+- `num_points(int)`：采样点的数量。
+### 返回值
+- `Tensor`：采样后的点云数据，数据类型为`float32`。shape为`[B, num_points]`。
 ### 支持的型号
 - Atlas A2 训练系列产品
 ### 调用示例
 ```python
 import torch, torch_npu
-from mx_driving.perception.fused import bev_pool_v2
-depth = torch.rand(2, 1, 8, 256, 256).npu()
-feat = torch.rand(2, 1, 256, 256, 64).npu()
-feat.requires_grad_()
-ranks_depth = torch.tensor([0, 1], dtype=torch.int32).npu()
-ranks_feat = torch.tensor([0, 1], dtype=torch.int32).npu()
-ranks_bev = torch.tensor([0, 1], dtype=torch.int32).npu()
-bev_feat_shape = [2, 8, 256, 256, 64]
-interval_starts = torch.tensor([0], dtype=torch.int32).npu()
-interval_lengths = torch.tensor([2], dtype=torch.int32).npu()
-bev_pooled_feat = bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev, bev_feat_shape, interval_starts, interval_lengths)
-loss = bev_pooled_feat.sum()
-loss.backward()
-print(loss)
-print(feat.grad)
+from mx_driving.point import npu_furthest_point_sampling
+points = torch.tensor([[[1, 2, 3], [4, 5, 6], [7, 8, 9]]], dtype=torch.float32).npu()
+out = npu_furthest_point_sampling(points, 2)
+print(out)
 ```
-
+```text
+tensor([[0, 2]], dtype=torch.int32)
+```
+### 算子约束
+1. points输入shape[B, N, 3]的总大小(B x N x 3)不应该超过383166
 ## group_points
 ### 接口原型
 ```python
-mx_driving.perception.point.npu_group_points(Tensor features, Tensor indices) -> Tensor
+mx_driving.point.npu_group_points(Tensor features, Tensor indices) -> Tensor
 ```
 ### 功能描述
 点云数据按照索引重新分组。
@@ -700,7 +745,7 @@ mx_driving.perception.point.npu_group_points(Tensor features, Tensor indices) ->
 ```python
 import torch
 import torch_npu
-import mx_driving.perception.point
+import mx_driving.point
 indices = torch.tensor([[[0, 2, 5, 5], [1, 0, 5, 0], [2, 1, 4, 4]]]).int().npu()
 features = torch.tensor([[[0.9178, -0.7250, -1.6587, 0.0715, -0.2252, 0.4994],
                           [0.6190, 0.1755, -1.7902, -0.5852, -0.3311, 1.9764],
@@ -709,7 +754,7 @@ features = torch.tensor([[[0.9178, -0.7250, -1.6587, 0.0715, -0.2252, 0.4994],
                           [0.7239, 0.2321, -0.6578, -1.1395, -2.3874, 1.1281]]],
                           dtype=torch.float32).npu()
 features.requires_grad = True
-output = mx_driving.perception.point.npu_group_points(features, indices)
+output = mx_driving.point.npu_group_points(features, indices)
 output.backward(output)
 grad_features = features.grad
 ```
@@ -741,166 +786,64 @@ expected_grad_features = tensor(
          [-0.7029,  0.2388,  0.8612,  0.0000, -2.8564, -1.9131],
          [ 2.1717,  0.4642, -1.3156,  0.0000, -4.7748,  3.3843]]], dtype=torch.float32)
 ```
-
-## knn
-### 接口原型
-```python
-mx_driving.common.knn(int k, Tensor xyz, Tensor center_xyz, bool Transposed) -> Tensor
-```
-### 功能描述
-對center_xyz中的每個點找到xyz中對應batch中的距離最近的k個點，并且返回此k個點的索引值。
-### 参数说明
-- `xyz(Tensor)`：点数据，表示(x, y, z)三維坐標，数据类型为`float32`。shape为`[B, N, 3]`(当Transposed=False)或`[B, 3, N]`(当Transposed=True)。其中`B`为batch size，`N`为点的数量。
-- `center_xyz(Tensor)`：点数据，表示(x, y, z)三維坐標，数据类型为`float32`。shape为`[B, npoint, 3]`(当Transposed=False)或`[B, 3, npoint]`(当Transposed=True)。其中`B`为batch size，`npoint`为点的数量。
-- `k(int)`：采样点的数量。
-- `Transposed(bool)`: 輸入是否需要進行轉置
-### 返回值
-- `idx(Tensor)`：采样后的索引数据，数据类型为`int32`。shape为`[B, k, npoint]`。
-### 支持的型号
-- Atlas A2 训练系列产品
-### 调用示例
-```python
-import torch, torch_npu
-from mx_driving.common import knn
-xyz = torch.tensor([[[1, 1, 1], [1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1], [1, 1, 1]]], dtype=torch.float32).npu()
-center_xyz = torch.tensor([[1, 2, 3]], [[1, 2, 3]], dtype=torch.float32).npu()
-idx = knn(2, xyz, center_xyz, False)
-print(idx)
-```
-```text
-tensor([[0, 0], [1, 1]], dtype=torch.int32)
-```
-### 算子約束
-1. k必須>0且<100
-2. xyz中的每個batch中的任意一個點到center_xyz對應batch中的任意一個點的距離必須在1e10f以内
-3. xyz和center_xyz的shape必须是3维，当Transposed=True时，xyz和center_xyz的shape的dim的第1位必须是3；当Transposed=False时，xyz和center_xyz的shape的dim的第2位必须是3
-4. 距离相同时索引存在不稳定排序问题,遇到距离精度通过但索引精度错误时，复用不稳定排序的CCB结论
-
-## three_nn
+## voxelization
 ### 接口原型
 ```python
-mx_driving.common.three_nn(Tensor target, Tensor source) -> (Tensor dist, Tensor idx)
+mx_driving.point.voxelization(Tensor points, List[float] voxel_size, List[float] coors_range, int max_points=-1, int max_voxels=-1, bool deterministic=True) -> Tensor
 ```
 ### 功能描述
-對target中的每個點找到source中對應batch中的距離最近的3個點，并且返回此3個點的距離和索引值。
+将点云数据进行体素化。
 ### 参数说明
-- `target(Tensor)`：点数据，表示(x, y, z)三維坐標，数据类型为`float32/float16`。shape为`[B, npoint, 3]`。其中`B`为batch size，`npoint`为点的数量。
-- `source(Tensor)`：点数据，表示(x, y, z)三維坐標，数据类型为`float32/float16`。shape为`[B, N, 3]`。其中`B`为batch size，`N`为点的数量。
+- `points(Tensor)`：点云数据，数据类型为`float32`。shape为`[N, F]`。其中`N`为点的数量，`F`分别代表每个点的特征维度，其中`N > 0, F >= 3`。
+- `voxel_size(List[float])`：体素大小，数据类型为`float32`。shape为`[3]`。其中`3`分别代表`x, y, z`。
+- `coors_range(List[float])`：体素范围，数据类型为`float32`。shape为`[6]`。其中`6`分别代表`x_min, y_min, z_min, x_max, y_max, z_max`。
+- `max_points(int)`：每个体素的最大点数。默认值为`-1`。
+- `max_voxels(int)`：最大体素数。默认值为`-1`。
+- `deterministic(bool)`：是否确定性。默认值为`True`。
 ### 返回值
-- `dist(Tensor)`：采样后的索引数据，数据类型为`float32/float16`。shape为`[B, npoint, 3]`。
-- `idx(Tensor)`：采样后的索引数据，数据类型为`int32/int32`。shape为`[B, npoint, 3]`。
+- `coors(Tensor)`：每个点所属的体素坐标，数据类型为`int32`。shape为`[N, 3]`。
 ### 支持的型号
 - Atlas A2 训练系列产品
 ### 调用示例
 ```python
 import torch, torch_npu
-from mx_driving.common import three_nn
-source = torch.tensor([[[1, 1, 1], [1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1], [1, 1, 1]]], dtype=torch.float32).npu()
-target = torch.tensor([[1, 2, 3]], [[1, 2, 3]], dtype=torch.float32).npu()
-dist, idx = three_nn(target, source)
-print(dist)
-print(idx)
-```
-```text
-tensor([[2.236, 2.236, 2.236], [2.236, 2.236, 2.236]], dtype=torch.float32)
-tensor([[0, 1, 2], [0, 1, 2]], dtype=torch.int32)
+from mx_driving.point import Voxelization
+points = torch.randint(-20, 100, [16, 3], dtype=torch.float32).npu()
+coors_range = [0, -40, -3, 70.4, 40, 1]
+max_points = -1
+voxel_size = [0.5, 0.5, 0.5]
+dynamic_voxelization = Voxelization(voxel_size, coors_range, max_points)
+out = dynamic_voxelization.forward(points)
+print(out)
 ```
-### 算子約束
-1. source和target的shape必须是3维，且source和target的shape的dim的第2位必须是3
-2. 距离相同时索引存在不稳定排序问题，遇到距离精度通过但索引精度错误时，复用不稳定排序的CCB结论
-## RoipointPool3d
+## npu_dynamic_scatter
 ### 接口原型
 ```python
-mx_driving.common.RoipointPool3d(int num_sampled_points, Tensor points, Tensor point_features, Tensor boxes3d) -> (Tensor pooled_features, Tensor pooled_empty_flag)
+mx_driving.point.npu_dynamic_scatter(Tensor feats, Tensor coors, str reduce_type = 'max') -> Tuple[torch.Tensor, torch.Tensor]
 ```
 ### 功能描述
-对每个3D方案的几何特定特征进行编码。
+将点云特征点在对应体素中进行特征压缩。
 ### 参数说明
-- `num_sampled_points(int)`：特征点的数量，正整数。
-- `points(Tensor)`：点张量，数据类型为`float32, float16`。shape 为`[B, N, 3]`。`3`分别代表`x, y, z`。
-- `point_features(Tensor)`：点特征张量，数据类型为`float32, float16`。shape 为`[B, N, C]`。`C`分别代表`x, y, z`。
-- `boxes3d(Tensor)`：框张量，数据类型为`float32, float16`。shape 为`[B, M, 7]`。`7`分别代表`x, y, z, x_size, y_size, z_size, rz`。
+- `feats(Tensor)`：点云特征张量[N, C]，仅支持两维，数据类型为`float32`，特征向量`C`长度上限为2048。
+- `coors(Tensor)`：体素坐标映射张量[N, 3]，仅支持两维，数据类型为`int32`，此处以x, y, z指代体素三维坐标，其取值范围为`0 <= x, y <= 2048`,  `0 <= z <= 256`。
+- `reduce_type(str)`：压缩类型。可选值为`'max'`, `'mean'`, `'sum'`。默认值为`'max'`
 ### 返回值
-- `pooled_features(Tensor)`：点在框内的特征张量，数据类型为`float32, float16`。shape 为`[B, M, num, 3+C]`。
-- `pooled_empty_flag(Tensor)`：所有点不在框内的空标记张量，数据类型为`int32`。shape 为`[B, M]`。
-### 约束说明
-- `points`、`point_features`和`boxes3d`的数据类型必须相同，以及`B`也必须相同。
-- `num_sampled_points`必须小于等于`N`。
-- 数据类型为`float32`时，建议`B`小于100、`N`小于等于2640、`M`小于等于48、`num_sampled_points`小于等于48，个别shape值略微超过建议值无影响，但所有shape值均大于建议值时，算子执行会发生错误。
-- 数据类型为`float16`时，建议`B`小于100、`N`小于等于3360、`M`小于等于60、`num_sampled_points`小于等于60，个别shape值略微超过建议值无影响，但所有shape值均大于建议值时，算子执行会发生错误。
-- `N`/`M`的值越大，性能劣化越严重，建议`N`小于`M`的六百倍，否则性能可能会低于0.1x A100。
+- `voxel_feats(Tensor)`：压缩后的体素特征张量，仅支持两维，数据类型为`float32`。
+- `voxel_coors(Tensor)`：去重后的体素坐标，仅支持两维，数据类型为`int32`。
 ### 支持的型号
 - Atlas A2 训练系列产品
 ### 调用示例
 ```python
 import torch, torch_npu
-from mx_driving.common import RoIPointPool3d
-num_sampled_points = 1
-points = torch.tensor([[[1, 2, 3]]], dtype=torch.float).npu()
-point_features = points.clone()
-boxes3d = torch.tensor([[[1, 2, 3, 4, 5, 6, 1]]], dtype=torch.float).npu()
-roipoint_pool3d = RoIPointPool3d(num_sampled_points)
-pooled_features, pooled_empty_flag = roipoint_pool3d(points, point_features, boxes3d)
-print(pooled_features)
-print(pooled_empty_flag)
-```
-```text
-tensor([[[[1., 2., 3., 1., 2., 3.]]]])
-tensor([[0]], dtype=torch.int32)
-```
-## boxes_overlap_bev
-### 接口原型
-```python
-mx_driving.perception.vision.boxes_overlap_bev(Tensor boxes_a, Tensor boxes_b) -> Tensor
-```
-### 功能描述
-Calculates the intersection of bounding boxes in Bird's Eye View.
-### 参数说明
-- `boxes_a (Tensor)`：第一组bounding boxes，数据类型为`float32`。shape为`[M, 5]`。其中`5`分别代表`x1, y1, x2, y2, angle`, `x1, y1, x2, y2`代表box四个顶点的横纵坐标，`angle`代表box的弧度制旋转角。
-- `boxes_b (Tensor)`：第二组bounding boxes，数据类型为`float32`。shape为`[N, 5]`。其中`5`分别代表`x1, y1, x2, y2, angle`, `x1, y1, x2, y2`代表box四个顶点的横纵坐标，`angle`代表box的弧度制旋转角。
-### 返回值
-- `area_overlap(Tensor)`：包含两组bounding boxes交叠面积的张量，数据类型为`float32`。shape为`[M, N]`。
-### 约束说明
+from mx_driving.point import npu_dynamic_scatter
 
-### 支持的型号
-- Atlas A2 训练系列产品
-### 调用示例
-```python
-import torch, torch_npu
-from mx_driving.perception.vision import boxes_overlap_bev
-boxes_a = torch.tensor([[0, 0, 2, 2, 0]], dtype=torch.float32).npu()
-boxes_b = torch.tensor([[1, 1, 3, 3, 0]], dtype=torch.float32).npu()
-area_overlap = boxes_overlap_bev(boxes_a, boxes_b)
-print(area_overlap)
-```
+feats = torch.tensor([[1, 2, 3], [3, 2, 1], [7, 8, 9], [9, 8, 7]], dtype=torch.float32).npu()
+coors = torch.tensor([[1, 1, 1], [1, 1, 1], [2, 2, 2], [2, 2, 2]], dtype=torch.int32).npu()
+voxel_feats, voxel_coors = npu_dynamic_scatter(feats, coors, 'max')
 
-## npu_max_pool2d
-### 接口原型
-```python
-mx_driving.common.npu_max_pool2d(Tensor x, int kernel_size, int stride, int padding) -> Tensor
-```
-### 功能描述
-Performs max pooling on the input and outputs max values.
-### 参数说明
-- `x (Tensor)`：一组待池化对象，数据类型为`float32`，format为NCHW。
-### 返回值
-- `y (Tensor)`：池化后的最大值，数据类型为`float32`，format为NCHW。
-### 约束说明
-kernel_size仅支持3，stride仅支持2，padding仅支持1，且输入C轴数据量要求为8的倍数，H和W需要大于100。
-### 支持的型号
-- Atlas A2 训练系列产品
-### 调用示例
-```python
-import torch, torch_npu
-from mx_driving.common import npu_max_pool2d
-kernel_size = 3
-stride = 2
-padding = 1
-x = torch.randn(18, 64, 464, 800).npu()
-res = npu_max_pool2d(x, kernel_size, stride, padding)
-print(res)
+print(voxel_feats)
+print(voxel_coors)
 ```
-
 ## unique_voxel
 ### 接口原型
 ```python
@@ -936,3 +879,66 @@ print(argsort_indices)
 print(uin_argsort_indices)
 
 ```
+
+
+## voxel_pooling_train
+### 接口原型
+```python
+mx_driving.point.npu_voxel_pooling_train(Tensor geom_xyz, Tensor input_features, List[int] voxel_num) -> Tensor
+```
+### 功能描述
+点云数据体素化。
+### 参数说明
+- `geom_xyz`：体素坐标，数据类型为`int32`，维度为（B, N, 3）, 3表示x, y, z。
+- `input_features`：点云数据，数据类型为`float32|float16`，维度为（B, N, C）。
+- `voxel_num`：体素格子长宽高，数据类型为`int32`，维度为（3），3表示体素格子的长宽高。
+### 返回值
+- `output(Tensor)`：输出结果，数据类型为`float32|float16`。shape为`[B, num_voxel_y, num_voxel_x, C]`。
+### 约束说明
+- B <= 128
+- N <= 100000
+- C <= 256
+- num_voxel_x <= 1000
+- num_voxel_y <= 1000
+- num_voxel_z <= 10
+- B * num_voxel_y * num_voxel_x * C <= 100000000
+- B * N * C <= 100000000
+- 反向具有相同约束。
+### 支持的型号
+- Atlas A2 训练系列产品
+### 调用示例
+```python
+import torch
+import torch_npu
+import mx_driving.point
+
+def gen_data(geom_shape, feature_shape, coeff, batch_size, num_channels, dtype):
+       geom_xyz = torch.rand(geom_shape) * coeff
+       geom_xyz = geom_xyz.reshape(batch_size, -1, 3)
+       geom_xyz[:, :, 2] /= 100
+       geom_xyz_cpu = geom_xyz.int()
+       features = torch.rand(feature_shape, dtype=dtype) - 0.5
+       features_cpu = features.reshape(batch_size, -1, num_channels)
+
+       return geom_xyz_cpu, features_cpu
+
+dtype = torch.float32
+coeff = 90
+voxel_num = [128, 128, 1]
+batch_size = 2
+num_points = 40
+num_channel = 80
+xyz = 3
+
+geom_shape = [batch_size, num_points, xyz]
+feature_shape = [batch_size, num_points, num_channel]
+
+geom_cpu, feature_cpu = gen_data(geom_shape, feature_shape, coeff, batch_size, num_channel, dtype)
+
+geom_npu = geom_cpu.npu()
+feature_npu = feature_cpu.npu()
+
+result_npu = mx_driving.point.npu_voxel_pooling_train(geom_npu, feature_npu, voxel_num)
+```
+# 稀疏卷积算子
+
diff --git a/include/csrc/pybind.h b/include/csrc/pybind.h
index 27ccf462..c2851f26 100644
--- a/include/csrc/pybind.h
+++ b/include/csrc/pybind.h
@@ -18,9 +18,9 @@
 #include <pybind11/numpy.h>
 
 void init_common(pybind11::module& m);
-void init_motion(pybind11::module& m);
-void init_perception_fused(pybind11::module& m);
-void init_perception_point(pybind11::module& m);
-void init_perception_vision(pybind11::module& m);
+void init_fused(pybind11::module& m);
+void init_point(pybind11::module& m);
+void init_data(pybind11::module& m);
+void init_detection(pybind11::module& m);
 void init_spconv(pybind11::module& m);
 #endif // CSRC_PYBIND_H_
diff --git a/examples/BEVFormer/.gitignore b/model_examples/BEVFormer/.gitignore
similarity index 99%
rename from examples/BEVFormer/.gitignore
rename to model_examples/BEVFormer/.gitignore
index 4b6213ed..95ec5be9 100644
--- a/examples/BEVFormer/.gitignore
+++ b/model_examples/BEVFormer/.gitignore
@@ -13,7 +13,6 @@ ckpts/
 data/
 ckpts
 data
-test/
 val/
 work_dirs/
 develop-eggs/
diff --git a/examples/BEVFormer/LICENSE b/model_examples/BEVFormer/LICENSE
similarity index 100%
rename from examples/BEVFormer/LICENSE
rename to model_examples/BEVFormer/LICENSE
diff --git a/examples/BEVFormer/README.md b/model_examples/BEVFormer/README.md
similarity index 100%
rename from examples/BEVFormer/README.md
rename to model_examples/BEVFormer/README.md
diff --git a/examples/BEVFormer/README_ORI.md b/model_examples/BEVFormer/README_ORI.md
similarity index 100%
rename from examples/BEVFormer/README_ORI.md
rename to model_examples/BEVFormer/README_ORI.md
diff --git a/examples/BEVFormer/docs/getting_started.md b/model_examples/BEVFormer/docs/getting_started.md
similarity index 100%
rename from examples/BEVFormer/docs/getting_started.md
rename to model_examples/BEVFormer/docs/getting_started.md
diff --git a/examples/BEVFormer/docs/install.md b/model_examples/BEVFormer/docs/install.md
similarity index 100%
rename from examples/BEVFormer/docs/install.md
rename to model_examples/BEVFormer/docs/install.md
diff --git a/examples/BEVFormer/docs/prepare_dataset.md b/model_examples/BEVFormer/docs/prepare_dataset.md
similarity index 100%
rename from examples/BEVFormer/docs/prepare_dataset.md
rename to model_examples/BEVFormer/docs/prepare_dataset.md
diff --git a/examples/BEVFormer/mmcv_need/base_runner.py b/model_examples/BEVFormer/mmcv_need/base_runner.py
similarity index 100%
rename from examples/BEVFormer/mmcv_need/base_runner.py
rename to model_examples/BEVFormer/mmcv_need/base_runner.py
diff --git a/examples/BEVFormer/mmcv_need/distributed.py b/model_examples/BEVFormer/mmcv_need/distributed.py
similarity index 100%
rename from examples/BEVFormer/mmcv_need/distributed.py
rename to model_examples/BEVFormer/mmcv_need/distributed.py
diff --git a/examples/BEVFormer/mmcv_need/epoch_based_runner.py b/model_examples/BEVFormer/mmcv_need/epoch_based_runner.py
similarity index 100%
rename from examples/BEVFormer/mmcv_need/epoch_based_runner.py
rename to model_examples/BEVFormer/mmcv_need/epoch_based_runner.py
diff --git a/examples/BEVFormer/mmcv_need/modulated_deform_conv.py b/model_examples/BEVFormer/mmcv_need/modulated_deform_conv.py
similarity index 100%
rename from examples/BEVFormer/mmcv_need/modulated_deform_conv.py
rename to model_examples/BEVFormer/mmcv_need/modulated_deform_conv.py
diff --git a/examples/BEVFormer/mmcv_need/optimizer.py b/model_examples/BEVFormer/mmcv_need/optimizer.py
similarity index 100%
rename from examples/BEVFormer/mmcv_need/optimizer.py
rename to model_examples/BEVFormer/mmcv_need/optimizer.py
diff --git a/examples/BEVFormer/mmcv_need/points_in_polygons_npu.cpp b/model_examples/BEVFormer/mmcv_need/points_in_polygons_npu.cpp
similarity index 100%
rename from examples/BEVFormer/mmcv_need/points_in_polygons_npu.cpp
rename to model_examples/BEVFormer/mmcv_need/points_in_polygons_npu.cpp
diff --git a/examples/BEVFormer/mmcv_need/runtime.txt b/model_examples/BEVFormer/mmcv_need/runtime.txt
similarity index 100%
rename from examples/BEVFormer/mmcv_need/runtime.txt
rename to model_examples/BEVFormer/mmcv_need/runtime.txt
diff --git a/examples/BEVFormer/mmdet3d_need/__init__.py b/model_examples/BEVFormer/mmdet3d_need/__init__.py
similarity index 100%
rename from examples/BEVFormer/mmdet3d_need/__init__.py
rename to model_examples/BEVFormer/mmdet3d_need/__init__.py
diff --git a/examples/BEVFormer/mmdet3d_need/nuscenes_dataset.py b/model_examples/BEVFormer/mmdet3d_need/nuscenes_dataset.py
similarity index 100%
rename from examples/BEVFormer/mmdet3d_need/nuscenes_dataset.py
rename to model_examples/BEVFormer/mmdet3d_need/nuscenes_dataset.py
diff --git a/examples/BEVFormer/mmdet3d_need/runtime.txt b/model_examples/BEVFormer/mmdet3d_need/runtime.txt
similarity index 100%
rename from examples/BEVFormer/mmdet3d_need/runtime.txt
rename to model_examples/BEVFormer/mmdet3d_need/runtime.txt
diff --git a/examples/BEVFormer/mmdet_need/__init__.py b/model_examples/BEVFormer/mmdet_need/__init__.py
similarity index 100%
rename from examples/BEVFormer/mmdet_need/__init__.py
rename to model_examples/BEVFormer/mmdet_need/__init__.py
diff --git a/examples/BEVFormer/mmdet_need/resnet.py b/model_examples/BEVFormer/mmdet_need/resnet.py
similarity index 100%
rename from examples/BEVFormer/mmdet_need/resnet.py
rename to model_examples/BEVFormer/mmdet_need/resnet.py
diff --git a/examples/BEVFormer/projects/__init__.py b/model_examples/BEVFormer/projects/__init__.py
similarity index 100%
rename from examples/BEVFormer/projects/__init__.py
rename to model_examples/BEVFormer/projects/__init__.py
diff --git a/examples/BEVFormer/projects/configs/_base_/datasets/coco_instance.py b/model_examples/BEVFormer/projects/configs/_base_/datasets/coco_instance.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/datasets/coco_instance.py
rename to model_examples/BEVFormer/projects/configs/_base_/datasets/coco_instance.py
diff --git a/examples/BEVFormer/projects/configs/_base_/datasets/kitti-3d-3class.py b/model_examples/BEVFormer/projects/configs/_base_/datasets/kitti-3d-3class.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/datasets/kitti-3d-3class.py
rename to model_examples/BEVFormer/projects/configs/_base_/datasets/kitti-3d-3class.py
diff --git a/examples/BEVFormer/projects/configs/_base_/datasets/kitti-3d-car.py b/model_examples/BEVFormer/projects/configs/_base_/datasets/kitti-3d-car.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/datasets/kitti-3d-car.py
rename to model_examples/BEVFormer/projects/configs/_base_/datasets/kitti-3d-car.py
diff --git a/examples/BEVFormer/projects/configs/_base_/datasets/lyft-3d.py b/model_examples/BEVFormer/projects/configs/_base_/datasets/lyft-3d.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/datasets/lyft-3d.py
rename to model_examples/BEVFormer/projects/configs/_base_/datasets/lyft-3d.py
diff --git a/examples/BEVFormer/projects/configs/_base_/datasets/nuim_instance.py b/model_examples/BEVFormer/projects/configs/_base_/datasets/nuim_instance.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/datasets/nuim_instance.py
rename to model_examples/BEVFormer/projects/configs/_base_/datasets/nuim_instance.py
diff --git a/examples/BEVFormer/projects/configs/_base_/datasets/nus-3d.py b/model_examples/BEVFormer/projects/configs/_base_/datasets/nus-3d.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/datasets/nus-3d.py
rename to model_examples/BEVFormer/projects/configs/_base_/datasets/nus-3d.py
diff --git a/examples/BEVFormer/projects/configs/_base_/datasets/nus-mono3d.py b/model_examples/BEVFormer/projects/configs/_base_/datasets/nus-mono3d.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/datasets/nus-mono3d.py
rename to model_examples/BEVFormer/projects/configs/_base_/datasets/nus-mono3d.py
diff --git a/examples/BEVFormer/projects/configs/_base_/datasets/range100_lyft-3d.py b/model_examples/BEVFormer/projects/configs/_base_/datasets/range100_lyft-3d.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/datasets/range100_lyft-3d.py
rename to model_examples/BEVFormer/projects/configs/_base_/datasets/range100_lyft-3d.py
diff --git a/examples/BEVFormer/projects/configs/_base_/datasets/s3dis-3d-5class.py b/model_examples/BEVFormer/projects/configs/_base_/datasets/s3dis-3d-5class.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/datasets/s3dis-3d-5class.py
rename to model_examples/BEVFormer/projects/configs/_base_/datasets/s3dis-3d-5class.py
diff --git a/examples/BEVFormer/projects/configs/_base_/datasets/s3dis_seg-3d-13class.py b/model_examples/BEVFormer/projects/configs/_base_/datasets/s3dis_seg-3d-13class.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/datasets/s3dis_seg-3d-13class.py
rename to model_examples/BEVFormer/projects/configs/_base_/datasets/s3dis_seg-3d-13class.py
diff --git a/examples/BEVFormer/projects/configs/_base_/datasets/scannet-3d-18class.py b/model_examples/BEVFormer/projects/configs/_base_/datasets/scannet-3d-18class.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/datasets/scannet-3d-18class.py
rename to model_examples/BEVFormer/projects/configs/_base_/datasets/scannet-3d-18class.py
diff --git a/examples/BEVFormer/projects/configs/_base_/datasets/scannet_seg-3d-20class.py b/model_examples/BEVFormer/projects/configs/_base_/datasets/scannet_seg-3d-20class.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/datasets/scannet_seg-3d-20class.py
rename to model_examples/BEVFormer/projects/configs/_base_/datasets/scannet_seg-3d-20class.py
diff --git a/examples/BEVFormer/projects/configs/_base_/datasets/sunrgbd-3d-10class.py b/model_examples/BEVFormer/projects/configs/_base_/datasets/sunrgbd-3d-10class.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/datasets/sunrgbd-3d-10class.py
rename to model_examples/BEVFormer/projects/configs/_base_/datasets/sunrgbd-3d-10class.py
diff --git a/examples/BEVFormer/projects/configs/_base_/datasets/waymoD5-3d-3class.py b/model_examples/BEVFormer/projects/configs/_base_/datasets/waymoD5-3d-3class.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/datasets/waymoD5-3d-3class.py
rename to model_examples/BEVFormer/projects/configs/_base_/datasets/waymoD5-3d-3class.py
diff --git a/examples/BEVFormer/projects/configs/_base_/datasets/waymoD5-3d-car.py b/model_examples/BEVFormer/projects/configs/_base_/datasets/waymoD5-3d-car.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/datasets/waymoD5-3d-car.py
rename to model_examples/BEVFormer/projects/configs/_base_/datasets/waymoD5-3d-car.py
diff --git a/examples/BEVFormer/projects/configs/_base_/default_runtime.py b/model_examples/BEVFormer/projects/configs/_base_/default_runtime.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/default_runtime.py
rename to model_examples/BEVFormer/projects/configs/_base_/default_runtime.py
diff --git a/examples/BEVFormer/projects/configs/_base_/models/3dssd.py b/model_examples/BEVFormer/projects/configs/_base_/models/3dssd.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/models/3dssd.py
rename to model_examples/BEVFormer/projects/configs/_base_/models/3dssd.py
diff --git a/examples/BEVFormer/projects/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py b/model_examples/BEVFormer/projects/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py
rename to model_examples/BEVFormer/projects/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py
diff --git a/examples/BEVFormer/projects/configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py b/model_examples/BEVFormer/projects/configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py
rename to model_examples/BEVFormer/projects/configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py
diff --git a/examples/BEVFormer/projects/configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py b/model_examples/BEVFormer/projects/configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py
rename to model_examples/BEVFormer/projects/configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py
diff --git a/examples/BEVFormer/projects/configs/_base_/models/fcos3d.py b/model_examples/BEVFormer/projects/configs/_base_/models/fcos3d.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/models/fcos3d.py
rename to model_examples/BEVFormer/projects/configs/_base_/models/fcos3d.py
diff --git a/examples/BEVFormer/projects/configs/_base_/models/groupfree3d.py b/model_examples/BEVFormer/projects/configs/_base_/models/groupfree3d.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/models/groupfree3d.py
rename to model_examples/BEVFormer/projects/configs/_base_/models/groupfree3d.py
diff --git a/examples/BEVFormer/projects/configs/_base_/models/h3dnet.py b/model_examples/BEVFormer/projects/configs/_base_/models/h3dnet.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/models/h3dnet.py
rename to model_examples/BEVFormer/projects/configs/_base_/models/h3dnet.py
diff --git a/examples/BEVFormer/projects/configs/_base_/models/hv_pointpillars_fpn_lyft.py b/model_examples/BEVFormer/projects/configs/_base_/models/hv_pointpillars_fpn_lyft.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/models/hv_pointpillars_fpn_lyft.py
rename to model_examples/BEVFormer/projects/configs/_base_/models/hv_pointpillars_fpn_lyft.py
diff --git a/examples/BEVFormer/projects/configs/_base_/models/hv_pointpillars_fpn_nus.py b/model_examples/BEVFormer/projects/configs/_base_/models/hv_pointpillars_fpn_nus.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/models/hv_pointpillars_fpn_nus.py
rename to model_examples/BEVFormer/projects/configs/_base_/models/hv_pointpillars_fpn_nus.py
diff --git a/examples/BEVFormer/projects/configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py b/model_examples/BEVFormer/projects/configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py
rename to model_examples/BEVFormer/projects/configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py
diff --git a/examples/BEVFormer/projects/configs/_base_/models/hv_pointpillars_secfpn_kitti.py b/model_examples/BEVFormer/projects/configs/_base_/models/hv_pointpillars_secfpn_kitti.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/models/hv_pointpillars_secfpn_kitti.py
rename to model_examples/BEVFormer/projects/configs/_base_/models/hv_pointpillars_secfpn_kitti.py
diff --git a/examples/BEVFormer/projects/configs/_base_/models/hv_pointpillars_secfpn_waymo.py b/model_examples/BEVFormer/projects/configs/_base_/models/hv_pointpillars_secfpn_waymo.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/models/hv_pointpillars_secfpn_waymo.py
rename to model_examples/BEVFormer/projects/configs/_base_/models/hv_pointpillars_secfpn_waymo.py
diff --git a/examples/BEVFormer/projects/configs/_base_/models/hv_second_secfpn_kitti.py b/model_examples/BEVFormer/projects/configs/_base_/models/hv_second_secfpn_kitti.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/models/hv_second_secfpn_kitti.py
rename to model_examples/BEVFormer/projects/configs/_base_/models/hv_second_secfpn_kitti.py
diff --git a/examples/BEVFormer/projects/configs/_base_/models/hv_second_secfpn_waymo.py b/model_examples/BEVFormer/projects/configs/_base_/models/hv_second_secfpn_waymo.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/models/hv_second_secfpn_waymo.py
rename to model_examples/BEVFormer/projects/configs/_base_/models/hv_second_secfpn_waymo.py
diff --git a/examples/BEVFormer/projects/configs/_base_/models/imvotenet_image.py b/model_examples/BEVFormer/projects/configs/_base_/models/imvotenet_image.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/models/imvotenet_image.py
rename to model_examples/BEVFormer/projects/configs/_base_/models/imvotenet_image.py
diff --git a/examples/BEVFormer/projects/configs/_base_/models/mask_rcnn_r50_fpn.py b/model_examples/BEVFormer/projects/configs/_base_/models/mask_rcnn_r50_fpn.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/models/mask_rcnn_r50_fpn.py
rename to model_examples/BEVFormer/projects/configs/_base_/models/mask_rcnn_r50_fpn.py
diff --git a/examples/BEVFormer/projects/configs/_base_/models/paconv_cuda_ssg.py b/model_examples/BEVFormer/projects/configs/_base_/models/paconv_cuda_ssg.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/models/paconv_cuda_ssg.py
rename to model_examples/BEVFormer/projects/configs/_base_/models/paconv_cuda_ssg.py
diff --git a/examples/BEVFormer/projects/configs/_base_/models/paconv_ssg.py b/model_examples/BEVFormer/projects/configs/_base_/models/paconv_ssg.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/models/paconv_ssg.py
rename to model_examples/BEVFormer/projects/configs/_base_/models/paconv_ssg.py
diff --git a/examples/BEVFormer/projects/configs/_base_/models/parta2.py b/model_examples/BEVFormer/projects/configs/_base_/models/parta2.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/models/parta2.py
rename to model_examples/BEVFormer/projects/configs/_base_/models/parta2.py
diff --git a/examples/BEVFormer/projects/configs/_base_/models/pointnet2_msg.py b/model_examples/BEVFormer/projects/configs/_base_/models/pointnet2_msg.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/models/pointnet2_msg.py
rename to model_examples/BEVFormer/projects/configs/_base_/models/pointnet2_msg.py
diff --git a/examples/BEVFormer/projects/configs/_base_/models/pointnet2_ssg.py b/model_examples/BEVFormer/projects/configs/_base_/models/pointnet2_ssg.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/models/pointnet2_ssg.py
rename to model_examples/BEVFormer/projects/configs/_base_/models/pointnet2_ssg.py
diff --git a/examples/BEVFormer/projects/configs/_base_/models/votenet.py b/model_examples/BEVFormer/projects/configs/_base_/models/votenet.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/models/votenet.py
rename to model_examples/BEVFormer/projects/configs/_base_/models/votenet.py
diff --git a/examples/BEVFormer/projects/configs/_base_/schedules/cosine.py b/model_examples/BEVFormer/projects/configs/_base_/schedules/cosine.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/schedules/cosine.py
rename to model_examples/BEVFormer/projects/configs/_base_/schedules/cosine.py
diff --git a/examples/BEVFormer/projects/configs/_base_/schedules/cyclic_20e.py b/model_examples/BEVFormer/projects/configs/_base_/schedules/cyclic_20e.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/schedules/cyclic_20e.py
rename to model_examples/BEVFormer/projects/configs/_base_/schedules/cyclic_20e.py
diff --git a/examples/BEVFormer/projects/configs/_base_/schedules/cyclic_40e.py b/model_examples/BEVFormer/projects/configs/_base_/schedules/cyclic_40e.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/schedules/cyclic_40e.py
rename to model_examples/BEVFormer/projects/configs/_base_/schedules/cyclic_40e.py
diff --git a/examples/BEVFormer/projects/configs/_base_/schedules/mmdet_schedule_1x.py b/model_examples/BEVFormer/projects/configs/_base_/schedules/mmdet_schedule_1x.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/schedules/mmdet_schedule_1x.py
rename to model_examples/BEVFormer/projects/configs/_base_/schedules/mmdet_schedule_1x.py
diff --git a/examples/BEVFormer/projects/configs/_base_/schedules/schedule_2x.py b/model_examples/BEVFormer/projects/configs/_base_/schedules/schedule_2x.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/schedules/schedule_2x.py
rename to model_examples/BEVFormer/projects/configs/_base_/schedules/schedule_2x.py
diff --git a/examples/BEVFormer/projects/configs/_base_/schedules/schedule_3x.py b/model_examples/BEVFormer/projects/configs/_base_/schedules/schedule_3x.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/schedules/schedule_3x.py
rename to model_examples/BEVFormer/projects/configs/_base_/schedules/schedule_3x.py
diff --git a/examples/BEVFormer/projects/configs/_base_/schedules/seg_cosine_150e.py b/model_examples/BEVFormer/projects/configs/_base_/schedules/seg_cosine_150e.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/schedules/seg_cosine_150e.py
rename to model_examples/BEVFormer/projects/configs/_base_/schedules/seg_cosine_150e.py
diff --git a/examples/BEVFormer/projects/configs/_base_/schedules/seg_cosine_200e.py b/model_examples/BEVFormer/projects/configs/_base_/schedules/seg_cosine_200e.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/schedules/seg_cosine_200e.py
rename to model_examples/BEVFormer/projects/configs/_base_/schedules/seg_cosine_200e.py
diff --git a/examples/BEVFormer/projects/configs/_base_/schedules/seg_cosine_50e.py b/model_examples/BEVFormer/projects/configs/_base_/schedules/seg_cosine_50e.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/_base_/schedules/seg_cosine_50e.py
rename to model_examples/BEVFormer/projects/configs/_base_/schedules/seg_cosine_50e.py
diff --git a/examples/BEVFormer/projects/configs/bevformer/bevformer_base.py b/model_examples/BEVFormer/projects/configs/bevformer/bevformer_base.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/bevformer/bevformer_base.py
rename to model_examples/BEVFormer/projects/configs/bevformer/bevformer_base.py
diff --git a/examples/BEVFormer/projects/configs/bevformer/bevformer_small.py b/model_examples/BEVFormer/projects/configs/bevformer/bevformer_small.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/bevformer/bevformer_small.py
rename to model_examples/BEVFormer/projects/configs/bevformer/bevformer_small.py
diff --git a/examples/BEVFormer/projects/configs/bevformer/bevformer_tiny.py b/model_examples/BEVFormer/projects/configs/bevformer/bevformer_tiny.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/bevformer/bevformer_tiny.py
rename to model_examples/BEVFormer/projects/configs/bevformer/bevformer_tiny.py
diff --git a/examples/BEVFormer/projects/configs/bevformer_fp16/bevformer_tiny_fp16.py b/model_examples/BEVFormer/projects/configs/bevformer_fp16/bevformer_tiny_fp16.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/bevformer_fp16/bevformer_tiny_fp16.py
rename to model_examples/BEVFormer/projects/configs/bevformer_fp16/bevformer_tiny_fp16.py
diff --git a/examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t1-24ep.py b/model_examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t1-24ep.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t1-24ep.py
rename to model_examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t1-24ep.py
diff --git a/examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t1-48ep.py b/model_examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t1-48ep.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t1-48ep.py
rename to model_examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t1-48ep.py
diff --git a/examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t1-base-24ep.py b/model_examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t1-base-24ep.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t1-base-24ep.py
rename to model_examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t1-base-24ep.py
diff --git a/examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t1-base-48ep.py b/model_examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t1-base-48ep.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t1-base-48ep.py
rename to model_examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t1-base-48ep.py
diff --git a/examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t2-24ep.py b/model_examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t2-24ep.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t2-24ep.py
rename to model_examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t2-24ep.py
diff --git a/examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t2-48ep.py b/model_examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t2-48ep.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t2-48ep.py
rename to model_examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t2-48ep.py
diff --git a/examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t8-24ep.py b/model_examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t8-24ep.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t8-24ep.py
rename to model_examples/BEVFormer/projects/configs/bevformerv2/bevformerv2-r50-t8-24ep.py
diff --git a/examples/BEVFormer/projects/configs/datasets/custom_lyft-3d.py b/model_examples/BEVFormer/projects/configs/datasets/custom_lyft-3d.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/datasets/custom_lyft-3d.py
rename to model_examples/BEVFormer/projects/configs/datasets/custom_lyft-3d.py
diff --git a/examples/BEVFormer/projects/configs/datasets/custom_nus-3d.py b/model_examples/BEVFormer/projects/configs/datasets/custom_nus-3d.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/datasets/custom_nus-3d.py
rename to model_examples/BEVFormer/projects/configs/datasets/custom_nus-3d.py
diff --git a/examples/BEVFormer/projects/configs/datasets/custom_waymo-3d.py b/model_examples/BEVFormer/projects/configs/datasets/custom_waymo-3d.py
similarity index 100%
rename from examples/BEVFormer/projects/configs/datasets/custom_waymo-3d.py
rename to model_examples/BEVFormer/projects/configs/datasets/custom_waymo-3d.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/__init__.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/__init__.py
similarity index 83%
rename from examples/BEVFormer/projects/mmdet3d_plugin/__init__.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/__init__.py
index 1c81f6a2..267b2d4f 100644
--- a/examples/BEVFormer/projects/mmdet3d_plugin/__init__.py
+++ b/model_examples/BEVFormer/projects/mmdet3d_plugin/__init__.py
@@ -1,3 +1,5 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+# Copyright 2024 Huawei Technologies Co., Ltd
 import models.utils
 import bevformer
 import dd3d
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/__init__.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/__init__.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/bevformer/__init__.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/__init__.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/apis/__init__.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/apis/__init__.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/bevformer/apis/__init__.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/apis/__init__.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/apis/mmdet_train.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/apis/mmdet_train.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/bevformer/apis/mmdet_train.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/apis/mmdet_train.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/apis/test.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/apis/test.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/bevformer/apis/test.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/apis/test.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/apis/train.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/apis/train.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/bevformer/apis/train.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/apis/train.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/dense_heads/__init__.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/dense_heads/__init__.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/bevformer/dense_heads/__init__.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/dense_heads/__init__.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/dense_heads/bev_head.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/dense_heads/bev_head.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/bevformer/dense_heads/bev_head.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/dense_heads/bev_head.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/dense_heads/bevformer_head.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/dense_heads/bevformer_head.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/bevformer/dense_heads/bevformer_head.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/dense_heads/bevformer_head.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/detectors/__init__.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/detectors/__init__.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/bevformer/detectors/__init__.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/detectors/__init__.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/detectors/bevformer.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/detectors/bevformer.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/bevformer/detectors/bevformer.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/detectors/bevformer.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/detectors/bevformerV2.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/detectors/bevformerV2.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/bevformer/detectors/bevformerV2.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/detectors/bevformerV2.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/detectors/bevformer_fp16.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/detectors/bevformer_fp16.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/bevformer/detectors/bevformer_fp16.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/detectors/bevformer_fp16.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/hooks/__init__.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/hooks/__init__.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/bevformer/hooks/__init__.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/hooks/__init__.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/hooks/custom_hooks.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/hooks/custom_hooks.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/bevformer/hooks/custom_hooks.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/hooks/custom_hooks.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/__init__.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/__init__.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/__init__.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/__init__.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/custom_base_transformer_layer.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/custom_base_transformer_layer.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/custom_base_transformer_layer.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/custom_base_transformer_layer.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/decoder.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/decoder.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/decoder.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/decoder.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/encoder.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/encoder.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/encoder.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/encoder.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/group_attention.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/group_attention.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/group_attention.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/group_attention.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/multi_scale_deformable_attn_function.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/multi_scale_deformable_attn_function.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/multi_scale_deformable_attn_function.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/multi_scale_deformable_attn_function.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/spatial_cross_attention.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/spatial_cross_attention.py
similarity index 95%
rename from examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/spatial_cross_attention.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/spatial_cross_attention.py
index d7dccdb6..c72099ed 100644
--- a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/spatial_cross_attention.py
+++ b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/spatial_cross_attention.py
@@ -3,20 +3,20 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 # Copyright 2024 Huawei Technologies Co., Ltd
 # ---------------------------------------------
-#  Modified by ZheXu Liu
+#  Modified by Zhiqi Li
 # ---------------------------------------------
 
-from mmcv.ops.multi_scale_deform_attn import multi_scale_deformable_attn_pytorch
 import warnings
+import math
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+from mmcv.ops.multi_scale_deform_attn import multi_scale_deformable_attn_pytorch
 from mmcv.cnn import xavier_init, constant_init
 from mmcv.cnn.bricks.registry import (ATTENTION,
                                       TRANSFORMER_LAYER,
                                       TRANSFORMER_LAYER_SEQUENCE)
 from mmcv.cnn.bricks.transformer import build_attention
-import math
 from mmcv.runner import force_fp32
 from mmcv.runner.base_module import BaseModule, ModuleList, Sequential
 from projects.mmdet3d_plugin.models.utils.bricks import run_time
@@ -26,6 +26,8 @@ bev_mask_global = torch.tensor([]).npu()
 indexes_global = None
 max_len_global = None
 bev_mask_id_global = -1
+
+
 @ATTENTION.register_module()
 class SpatialCrossAttention(BaseModule):
     """An attention module used in BEVFormer.
@@ -133,23 +135,16 @@ class SpatialCrossAttention(BaseModule):
 
         D = reference_points_cam.size(3)
         indexes = []
-        global bev_mask_global, indexes_global, max_len_global, bev_mask_id_global, count_global
+        global bev_mask_global, indexes_global, max_len_global, bev_mask_id_global
         bev_mask_id = id(bev_mask)
         if bev_mask_id == bev_mask_id_global:
             indexes = indexes_global
             max_len = max_len_global
-            count = count_global
         else:
-            count = torch.any(bev_mask, 3)
-            bev_mask_ = count.squeeze()
-            for i, mask_per_img in enumerate(bev_mask_):
-                index_query_per_img = mask_per_img.nonzero().squeeze(-1)
+            for i, mask_per_img in enumerate(bev_mask):
+                index_query_per_img = mask_per_img[0].sum(-1).to(torch.float).nonzero().squeeze(-1)
                 indexes.append(index_query_per_img)
-
             max_len = max([len(each) for each in indexes])
-            count = count.permute(1, 2, 0).sum(-1)
-            count = torch.clamp(count, min=1.0)
-            count_global = count
             bev_mask_global = bev_mask.clone()
             indexes_global = indexes
             max_len_global = max_len
@@ -161,9 +156,9 @@ class SpatialCrossAttention(BaseModule):
         reference_points_rebatch = reference_points_cam.new_zeros(
             [bs, self.num_cams, max_len, D, 2])
         
-        for i, reference_points_per_img in enumerate(reference_points_cam):   
-            index_query_per_img = indexes[i]
-            for j in range(bs):
+        for j in range(bs):
+            for i, reference_points_per_img in enumerate(reference_points_cam):   
+                index_query_per_img = indexes[i]
                 queries_rebatch[j, i, :len(index_query_per_img)] = query[j, index_query_per_img]
                 reference_points_rebatch[j, i, :len(index_query_per_img)] = reference_points_per_img[j, index_query_per_img]
 
@@ -174,14 +169,16 @@ class SpatialCrossAttention(BaseModule):
         value = value.permute(2, 0, 1, 3).reshape(
             bs * self.num_cams, l, self.embed_dims)
 
-        queries = self.deformable_attention(query=queries_rebatch.view(bs*self.num_cams, max_len, self.embed_dims), key=key, value=value,
-                                            reference_points=reference_points_rebatch.view(bs*self.num_cams, max_len, D, 2), spatial_shapes=spatial_shapes,
+        queries = self.deformable_attention(query=queries_rebatch.view(bs * self.num_cams, max_len, self.embed_dims), key=key, value=value,
+                                            reference_points=reference_points_rebatch.view(bs * self.num_cams, max_len, D, 2), spatial_shapes=spatial_shapes,
                                             level_start_index=level_start_index).view(bs, self.num_cams, max_len, self.embed_dims)
         for j in range(bs):
             for i, index_query_per_img in enumerate(indexes):
                 slots[j, index_query_per_img] += queries[j, i, :len(index_query_per_img)]
 
-
+        count = bev_mask.sum(-1) > 0
+        count = count.permute(1, 2, 0).sum(-1)
+        count = torch.clamp(count, min=1.0)
         slots = slots / count[..., None]
         slots = self.output_proj(slots)
 
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/temporal_self_attention.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/temporal_self_attention.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/temporal_self_attention.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/temporal_self_attention.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/transformer.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/transformer.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/transformer.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/transformer.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/transformerV2.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/transformerV2.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/transformerV2.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/modules/transformerV2.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/runner/__init__.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/runner/__init__.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/bevformer/runner/__init__.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/runner/__init__.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/bevformer/runner/epoch_based_runner.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/runner/epoch_based_runner.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/bevformer/runner/epoch_based_runner.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/bevformer/runner/epoch_based_runner.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/assigners/__init__.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/assigners/__init__.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/assigners/__init__.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/assigners/__init__.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/coders/__init__.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/coders/__init__.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/coders/__init__.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/coders/__init__.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/match_costs/match_cost.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/match_costs/match_cost.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/match_costs/match_cost.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/match_costs/match_cost.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/util.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/util.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/util.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/core/bbox/util.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/core/evaluation/__init__.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/core/evaluation/__init__.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/core/evaluation/__init__.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/core/evaluation/__init__.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/core/evaluation/eval_hooks.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/core/evaluation/eval_hooks.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/core/evaluation/eval_hooks.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/core/evaluation/eval_hooks.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/core/evaluation/kitti2waymo.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/core/evaluation/kitti2waymo.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/core/evaluation/kitti2waymo.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/core/evaluation/kitti2waymo.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/datasets/__init__.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/__init__.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/datasets/__init__.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/__init__.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/datasets/builder.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/builder.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/datasets/builder.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/builder.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/datasets/nuscenes_dataset.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/nuscenes_dataset.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/datasets/nuscenes_dataset.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/nuscenes_dataset.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/datasets/nuscenes_dataset_v2.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/nuscenes_dataset_v2.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/datasets/nuscenes_dataset_v2.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/nuscenes_dataset_v2.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/datasets/nuscenes_mono_dataset.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/nuscenes_mono_dataset.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/datasets/nuscenes_mono_dataset.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/nuscenes_mono_dataset.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/datasets/nuscnes_eval.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/nuscnes_eval.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/datasets/nuscnes_eval.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/nuscnes_eval.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/__init__.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/__init__.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/__init__.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/__init__.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/augmentation.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/augmentation.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/augmentation.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/augmentation.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/dd3d_mapper.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/dd3d_mapper.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/dd3d_mapper.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/dd3d_mapper.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/formating.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/formating.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/formating.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/formating.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/loading.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/loading.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/loading.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/loading.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/transform_3d.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/transform_3d.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/transform_3d.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/pipelines/transform_3d.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/__init__.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/__init__.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/__init__.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/__init__.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/distributed_sampler.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/distributed_sampler.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/distributed_sampler.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/distributed_sampler.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/group_sampler.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/group_sampler.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/group_sampler.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/group_sampler.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/sampler.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/sampler.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/sampler.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/sampler.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/dd3d/__init__.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/__init__.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/dd3d/__init__.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/__init__.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/dd3d/datasets/__init__.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/datasets/__init__.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/dd3d/datasets/__init__.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/datasets/__init__.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/dd3d/datasets/nuscenes.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/datasets/nuscenes.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/dd3d/datasets/nuscenes.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/datasets/nuscenes.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/dd3d/datasets/transform_utils.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/datasets/transform_utils.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/dd3d/datasets/transform_utils.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/datasets/transform_utils.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/dd3d/layers/iou_loss.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/layers/iou_loss.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/dd3d/layers/iou_loss.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/layers/iou_loss.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/dd3d/layers/normalization.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/layers/normalization.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/dd3d/layers/normalization.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/layers/normalization.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/dd3d/layers/smooth_l1_loss.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/layers/smooth_l1_loss.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/dd3d/layers/smooth_l1_loss.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/layers/smooth_l1_loss.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/__init__.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/__init__.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/__init__.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/__init__.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/core.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/core.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/core.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/core.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/disentangled_box3d_loss.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/disentangled_box3d_loss.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/disentangled_box3d_loss.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/disentangled_box3d_loss.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/fcos2d.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/fcos2d.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/fcos2d.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/fcos2d.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/fcos3d.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/fcos3d.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/fcos3d.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/fcos3d.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/nuscenes_dd3d.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/nuscenes_dd3d.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/nuscenes_dd3d.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/nuscenes_dd3d.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/prepare_targets.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/prepare_targets.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/prepare_targets.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/prepare_targets.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/__init__.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/__init__.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/__init__.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/__init__.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/boxes3d.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/boxes3d.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/boxes3d.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/boxes3d.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/image_list.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/image_list.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/image_list.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/image_list.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/pose.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/pose.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/pose.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/pose.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/transform3d.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/transform3d.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/transform3d.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/transform3d.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/dd3d/utils/comm.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/utils/comm.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/dd3d/utils/comm.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/utils/comm.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/dd3d/utils/geometry.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/utils/geometry.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/dd3d/utils/geometry.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/utils/geometry.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/dd3d/utils/tasks.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/utils/tasks.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/dd3d/utils/tasks.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/utils/tasks.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/dd3d/utils/tensor2d.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/utils/tensor2d.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/dd3d/utils/tensor2d.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/utils/tensor2d.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/dd3d/utils/visualization.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/utils/visualization.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/dd3d/utils/visualization.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/dd3d/utils/visualization.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/models/backbones/__init__.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/models/backbones/__init__.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/models/backbones/__init__.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/models/backbones/__init__.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/models/backbones/vovnet.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/models/backbones/vovnet.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/models/backbones/vovnet.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/models/backbones/vovnet.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/models/hooks/__init__.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/models/hooks/__init__.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/models/hooks/__init__.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/models/hooks/__init__.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/models/hooks/hooks.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/models/hooks/hooks.py
similarity index 81%
rename from examples/BEVFormer/projects/mmdet3d_plugin/models/hooks/hooks.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/models/hooks/hooks.py
index f41f0a7a..b6274b94 100644
--- a/examples/BEVFormer/projects/mmdet3d_plugin/models/hooks/hooks.py
+++ b/model_examples/BEVFormer/projects/mmdet3d_plugin/models/hooks/hooks.py
@@ -1,3 +1,5 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+# Copyright 2024 Huawei Technologies Co., Ltd
 from mmcv.runner.hooks.hook import HOOKS, Hook
 from projects.mmdet3d_plugin.models.utils import run_time
 
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/models/opt/__init__.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/models/opt/__init__.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/models/opt/__init__.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/models/opt/__init__.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/models/opt/adamw.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/models/opt/adamw.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/models/opt/adamw.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/models/opt/adamw.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/models/utils/__init__.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/models/utils/__init__.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/models/utils/__init__.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/models/utils/__init__.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/models/utils/bricks.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/models/utils/bricks.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/models/utils/bricks.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/models/utils/bricks.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/models/utils/grid_mask.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/models/utils/grid_mask.py
similarity index 97%
rename from examples/BEVFormer/projects/mmdet3d_plugin/models/utils/grid_mask.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/models/utils/grid_mask.py
index e97cd766..f5be94e2 100644
--- a/examples/BEVFormer/projects/mmdet3d_plugin/models/utils/grid_mask.py
+++ b/model_examples/BEVFormer/projects/mmdet3d_plugin/models/utils/grid_mask.py
@@ -1,3 +1,5 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+# Copyright 2024 Huawei Technologies Co., Ltd
 import torch
 import torch.nn as nn
 import numpy as np
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/models/utils/position_embedding.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/models/utils/position_embedding.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/models/utils/position_embedding.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/models/utils/position_embedding.py
diff --git a/examples/BEVFormer/projects/mmdet3d_plugin/models/utils/visual.py b/model_examples/BEVFormer/projects/mmdet3d_plugin/models/utils/visual.py
similarity index 100%
rename from examples/BEVFormer/projects/mmdet3d_plugin/models/utils/visual.py
rename to model_examples/BEVFormer/projects/mmdet3d_plugin/models/utils/visual.py
diff --git a/examples/BEVFormer/public_address_statement.md b/model_examples/BEVFormer/public_address_statement.md
similarity index 100%
rename from examples/BEVFormer/public_address_statement.md
rename to model_examples/BEVFormer/public_address_statement.md
diff --git a/examples/BEVFormer/requirements.txt b/model_examples/BEVFormer/requirements.txt
similarity index 100%
rename from examples/BEVFormer/requirements.txt
rename to model_examples/BEVFormer/requirements.txt
diff --git a/model_examples/BEVFormer/test/env_npu.sh b/model_examples/BEVFormer/test/env_npu.sh
new file mode 100644
index 00000000..aa4642ab
--- /dev/null
+++ b/model_examples/BEVFormer/test/env_npu.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+CANN_INSTALL_PATH_CONF='/etc/Ascend/ascend_cann_install.info'
+
+if [ -f $CANN_INSTALL_PATH_CONF ]; then
+    CANN_INSTALL_PATH=$(cat $CANN_INSTALL_PATH_CONF | grep Install_Path | cut -d "=" -f 2)
+else
+    CANN_INSTALL_PATH="/usr/local/Ascend"
+fi
+
+if [ -d ${CANN_INSTALL_PATH}/ascend-toolkit/latest ]; then
+    source ${CANN_INSTALL_PATH}/ascend-toolkit/set_env.sh
+else
+    source ${CANN_INSTALL_PATH}/nnae/set_env.sh
+fi
+msnpureport -g error -d 0
+msnpureport -g error -d 1
+msnpureport -g error -d 2
+msnpureport -g error -d 3
+msnpureport -g error -d 4
+msnpureport -g error -d 5
+msnpureport -g error -d 6
+msnpureport -g error -d 7
+
+#设置Shape数据缓存
+export HOST_CACHE_CAPACITY=20
+#将Host日志输出到串口,0-关闭/1-开启
+export ASCEND_SLOG_PRINT_TO_STDOUT=0
+#设置默认日志级别,0-debug/1-info/2-warning/3-error
+export ASCEND_GLOBAL_LOG_LEVEL=3
+#设置Event日志开启标志,0-关闭/1-开启
+export ASCEND_GLOBAL_EVENT_ENABLE=0
+#设置是否开启taskque,0-关闭/1-开启
+export TASK_QUEUE_ENABLE=1
+#设置是否开启combined标志,0-关闭/1-开启
+export COMBINED_ENABLE=1
+#HCCL白名单开关,1-关闭/0-开启
+export HCCL_WHITELIST_DISABLE=1
+export HCCL_IF_IP=$(hostname -I |awk '{print $1}')
+export HCCL_CONNECT_TIMEOUT=1200
+
+
+path_lib=$(python3 -c """
+import sys
+import re
+result=''
+for index in range(len(sys.path)):
+    match_sit = re.search('-packages', sys.path[index])
+    if match_sit is not None:
+        match_lib = re.search('lib', sys.path[index])
+
+        if match_lib is not None:
+            end=match_lib.span()[1]
+            result += sys.path[index][0:end] + ':'
+
+        result+=sys.path[index] + '/torch/lib:'
+print(result)"""
+)
+
+echo ${path_lib}
diff --git a/model_examples/BEVFormer/test/train_full_8p_base_fp32.sh b/model_examples/BEVFormer/test/train_full_8p_base_fp32.sh
new file mode 100644
index 00000000..d88070b2
--- /dev/null
+++ b/model_examples/BEVFormer/test/train_full_8p_base_fp32.sh
@@ -0,0 +1,97 @@
+# 网络名称,同目录名称,需要模型审视修改
+Network="BEVFormer_Base"
+batch_size=1
+world_size=8
+epochs=24
+
+# cd到与test文件夹同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
+cur_path=$(pwd)
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ]; then
+  test_path_dir=${cur_path}
+  cd ..
+  cur_path=$(pwd)
+else
+  test_path_dir=${cur_path}/test
+fi
+
+source ${test_path_dir}/env_npu.sh
+
+#创建DeviceID输出目录，不需要修改
+output_path=${cur_path}/test/output/
+
+if [ -d ${output_path} ]; then
+  rm -rf ${output_path}
+fi
+
+mkdir -p ${output_path}
+
+for para in $*
+do
+    if [[ $para == --epochs* ]];then
+        epochs=`echo ${para#*=}`
+    fi
+done
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+sed -i "s|log_config = dict(interval=1,|log_config = dict(interval=50,|g" projects/configs/bevformer/bevformer_base.py
+sed -i "s|total_epochs = .*|total_epochs = ${epochs}|g" projects/configs/bevformer/bevformer_base.py
+sed -i "s|runner = dict(type='EpochBasedRunner', max_epochs=total_epochs, stop_iters=100)|runner = dict(type='EpochBasedRunner', max_epochs=total_epochs)|g" projects/configs/bevformer/bevformer_base.py
+
+bash ./tools/dist_train.sh ./projects/configs/bevformer/bevformer_base.py ${world_size} > ${test_path_dir}/output/train_full_8p_base_fp32.log 2>&1 &
+
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(($end_time - $start_time))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+
+#获取性能数据，不需要修改
+#单迭代训练时长，不需要修改
+TrainingTime=$(grep -o ", time: [0-9.]*" ${test_path_dir}/output/train_full_8p_base_fp32.log | tail -n 10 | grep -o "[0-9.]*" | awk '{sum += $1} END {print sum/NR}')
+
+#吞吐量
+ActualFPS=$(awk BEGIN'{print ('$batch_size' * '$world_size') / '$TrainingTime'}')
+
+#打印，不需要修改
+echo "Final Performance images/sec : $ActualFPS"
+
+#loss值，不需要修改
+ActualLoss=$(grep -o "loss: [0-9.]*" ${test_path_dir}/output/train_full_8p_base_fp32.log | awk 'END {print $NF}')
+
+#NDS值
+NDS=$(grep -o "pts_bbox_NuScenes/NDS: [0-9.]*" ${test_path_dir}/output/train_full_8p_base_fp32.log | awk 'END {print $NF}')
+
+#mAP值
+mAP=$(grep -o "pts_bbox_NuScenes/mAP: [0-9.]*" ${test_path_dir}/output/train_full_8p_base_fp32.log | awk 'END {print $NF}')
+
+#打印，不需要修改
+echo "Final Train Loss : ${ActualLoss}"
+echo "NDS : ${NDS}"
+echo "mAP : ${mAP}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+WORLD_SIZE=${world_size}
+DeviceType=$(uname -m)
+CaseName=${Network}_bs${BatchSize}_${WORLD_SIZE}'p'_'acc'
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" >${test_path_dir}/output/${CaseName}.log
+echo "RankSize = ${WORLD_SIZE}" >>${test_path_dir}/output/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >>${test_path_dir}/output/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >>${test_path_dir}/output/${CaseName}.log
+echo "CaseName = ${CaseName}" >>${test_path_dir}/output/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >>${test_path_dir}/output/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >>${test_path_dir}/output/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >>${test_path_dir}/output/${CaseName}.log
+echo "NDS = ${NDS}" >>${test_path_dir}/output/${CaseName}.log
+echo "mAP = ${mAP}" >>${test_path_dir}/output/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >>${test_path_dir}/output/${CaseName}.log
\ No newline at end of file
diff --git a/model_examples/BEVFormer/test/train_performance_8p_base_fp32.sh b/model_examples/BEVFormer/test/train_performance_8p_base_fp32.sh
new file mode 100644
index 00000000..7088b69c
--- /dev/null
+++ b/model_examples/BEVFormer/test/train_performance_8p_base_fp32.sh
@@ -0,0 +1,92 @@
+# 网络名称,同目录名称,需要模型审视修改
+Network="BEVFormer_Base"
+batch_size=1
+world_size=8
+
+# cd到与test文件夹同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
+cur_path=$(pwd)
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ]; then
+  test_path_dir=${cur_path}
+  cd ..
+  cur_path=$(pwd)
+else
+  test_path_dir=${cur_path}/test
+fi
+
+source ${test_path_dir}/env_npu.sh
+
+#创建DeviceID输出目录，不需要修改
+output_path=${cur_path}/test/output/
+
+if [ -d ${output_path} ]; then
+  rm -rf ${output_path}
+fi
+
+mkdir -p ${output_path}
+
+sed -i "s|log_config = dict(interval=50,|log_config = dict(interval=1,|g" projects/configs/bevformer/bevformer_base.py
+sed -i "s|runner = dict(type='EpochBasedRunner', max_epochs=total_epochs)|runner = dict(type='EpochBasedRunner', max_epochs=total_epochs, stop_iters=1500)|g" projects/configs/bevformer/bevformer_base.py
+
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+bash ./tools/dist_train.sh ./projects/configs/bevformer/bevformer_base.py ${world_size} > ${test_path_dir}/output/train_performance_8p_base_fp32.log 2>&1 &
+
+wait
+
+sed -i "s|log_config = dict(interval=1,|log_config = dict(interval=50,|g" projects/configs/bevformer/bevformer_base.py
+sed -i "s|runner = dict(type='EpochBasedRunner', max_epochs=total_epochs, stop_iters=1500)|runner = dict(type='EpochBasedRunner', max_epochs=total_epochs)|g" projects/configs/bevformer/bevformer_base.py
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(($end_time - $start_time))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+
+#获取性能数据，不需要修改
+#单迭代训练时长，不需要修改
+TrainingTime=$(grep -o ", time: [0-9.]*" ${test_path_dir}/output/train_performance_8p_base_fp32.log | tail -n 30 | grep -o "[0-9.]*" | awk '{sum += $1} END {print sum/NR}')
+
+#吞吐量
+ActualFPS=$(awk BEGIN'{print ('$batch_size' * '$world_size') / '$TrainingTime'}')
+
+#打印，不需要修改
+echo "Final Performance images/sec : $ActualFPS"
+
+#loss值，不需要修改
+ActualLoss=$(grep -o "loss: [0-9.]*" ${test_path_dir}/output/train_performance_8p_base_fp32.log | awk 'END {print $NF}')
+
+#NDS值
+NDS=$(grep -o "pts_bbox_NuScenes/NDS: [0-9.]*" ${test_path_dir}/output/train_performance_8p_base_fp32.log | awk 'END {print $NF}')
+
+#mAP值
+mAP=$(grep -o "pts_bbox_NuScenes/mAP: [0-9.]*" ${test_path_dir}/output/train_performance_8p_base_fp32.log | awk 'END {print $NF}')
+
+#打印，不需要修改
+echo "Final Train Loss : ${ActualLoss}"
+echo "NDS : ${NDS}"
+echo "mAP : ${mAP}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+WORLD_SIZE=${world_size}
+DeviceType=$(uname -m)
+CaseName=${Network}_bs${BatchSize}_${WORLD_SIZE}'p'_'performance'
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" >${test_path_dir}/output/${CaseName}.log
+echo "RankSize = ${WORLD_SIZE}" >>${test_path_dir}/output/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >>${test_path_dir}/output/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >>${test_path_dir}/output/${CaseName}.log
+echo "CaseName = ${CaseName}" >>${test_path_dir}/output/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >>${test_path_dir}/output/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >>${test_path_dir}/output/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >>${test_path_dir}/output/${CaseName}.log
+echo "NDS = ${NDS}" >>${test_path_dir}/output/${CaseName}.log
+echo "mAP = ${mAP}" >>${test_path_dir}/output/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >>${test_path_dir}/output/${CaseName}.log
\ No newline at end of file
diff --git a/examples/BEVFormer/tools/__init__.py b/model_examples/BEVFormer/tools/__init__.py
similarity index 100%
rename from examples/BEVFormer/tools/__init__.py
rename to model_examples/BEVFormer/tools/__init__.py
diff --git a/examples/BEVFormer/tools/analysis_tools/__init__.py b/model_examples/BEVFormer/tools/analysis_tools/__init__.py
similarity index 100%
rename from examples/BEVFormer/tools/analysis_tools/__init__.py
rename to model_examples/BEVFormer/tools/analysis_tools/__init__.py
diff --git a/examples/BEVFormer/tools/analysis_tools/analyze_logs.py b/model_examples/BEVFormer/tools/analysis_tools/analyze_logs.py
similarity index 100%
rename from examples/BEVFormer/tools/analysis_tools/analyze_logs.py
rename to model_examples/BEVFormer/tools/analysis_tools/analyze_logs.py
diff --git a/examples/BEVFormer/tools/analysis_tools/benchmark.py b/model_examples/BEVFormer/tools/analysis_tools/benchmark.py
similarity index 100%
rename from examples/BEVFormer/tools/analysis_tools/benchmark.py
rename to model_examples/BEVFormer/tools/analysis_tools/benchmark.py
diff --git a/examples/BEVFormer/tools/analysis_tools/get_params.py b/model_examples/BEVFormer/tools/analysis_tools/get_params.py
similarity index 100%
rename from examples/BEVFormer/tools/analysis_tools/get_params.py
rename to model_examples/BEVFormer/tools/analysis_tools/get_params.py
diff --git a/examples/BEVFormer/tools/analysis_tools/visual.py b/model_examples/BEVFormer/tools/analysis_tools/visual.py
similarity index 100%
rename from examples/BEVFormer/tools/analysis_tools/visual.py
rename to model_examples/BEVFormer/tools/analysis_tools/visual.py
diff --git a/examples/BEVFormer/tools/create_data.py b/model_examples/BEVFormer/tools/create_data.py
similarity index 100%
rename from examples/BEVFormer/tools/create_data.py
rename to model_examples/BEVFormer/tools/create_data.py
diff --git a/examples/BEVFormer/tools/data_converter/__init__.py b/model_examples/BEVFormer/tools/data_converter/__init__.py
similarity index 100%
rename from examples/BEVFormer/tools/data_converter/__init__.py
rename to model_examples/BEVFormer/tools/data_converter/__init__.py
diff --git a/examples/BEVFormer/tools/data_converter/create_gt_database.py b/model_examples/BEVFormer/tools/data_converter/create_gt_database.py
similarity index 100%
rename from examples/BEVFormer/tools/data_converter/create_gt_database.py
rename to model_examples/BEVFormer/tools/data_converter/create_gt_database.py
diff --git a/examples/BEVFormer/tools/data_converter/indoor_converter.py b/model_examples/BEVFormer/tools/data_converter/indoor_converter.py
similarity index 100%
rename from examples/BEVFormer/tools/data_converter/indoor_converter.py
rename to model_examples/BEVFormer/tools/data_converter/indoor_converter.py
diff --git a/examples/BEVFormer/tools/data_converter/kitti_converter.py b/model_examples/BEVFormer/tools/data_converter/kitti_converter.py
similarity index 100%
rename from examples/BEVFormer/tools/data_converter/kitti_converter.py
rename to model_examples/BEVFormer/tools/data_converter/kitti_converter.py
diff --git a/examples/BEVFormer/tools/data_converter/kitti_data_utils.py b/model_examples/BEVFormer/tools/data_converter/kitti_data_utils.py
similarity index 100%
rename from examples/BEVFormer/tools/data_converter/kitti_data_utils.py
rename to model_examples/BEVFormer/tools/data_converter/kitti_data_utils.py
diff --git a/examples/BEVFormer/tools/data_converter/lyft_converter.py b/model_examples/BEVFormer/tools/data_converter/lyft_converter.py
similarity index 100%
rename from examples/BEVFormer/tools/data_converter/lyft_converter.py
rename to model_examples/BEVFormer/tools/data_converter/lyft_converter.py
diff --git a/examples/BEVFormer/tools/data_converter/lyft_data_fixer.py b/model_examples/BEVFormer/tools/data_converter/lyft_data_fixer.py
similarity index 100%
rename from examples/BEVFormer/tools/data_converter/lyft_data_fixer.py
rename to model_examples/BEVFormer/tools/data_converter/lyft_data_fixer.py
diff --git a/examples/BEVFormer/tools/data_converter/nuimage_converter.py b/model_examples/BEVFormer/tools/data_converter/nuimage_converter.py
similarity index 100%
rename from examples/BEVFormer/tools/data_converter/nuimage_converter.py
rename to model_examples/BEVFormer/tools/data_converter/nuimage_converter.py
diff --git a/examples/BEVFormer/tools/data_converter/nuscenes_converter.py b/model_examples/BEVFormer/tools/data_converter/nuscenes_converter.py
similarity index 100%
rename from examples/BEVFormer/tools/data_converter/nuscenes_converter.py
rename to model_examples/BEVFormer/tools/data_converter/nuscenes_converter.py
diff --git a/examples/BEVFormer/tools/data_converter/s3dis_data_utils.py b/model_examples/BEVFormer/tools/data_converter/s3dis_data_utils.py
similarity index 100%
rename from examples/BEVFormer/tools/data_converter/s3dis_data_utils.py
rename to model_examples/BEVFormer/tools/data_converter/s3dis_data_utils.py
diff --git a/examples/BEVFormer/tools/data_converter/scannet_data_utils.py b/model_examples/BEVFormer/tools/data_converter/scannet_data_utils.py
similarity index 100%
rename from examples/BEVFormer/tools/data_converter/scannet_data_utils.py
rename to model_examples/BEVFormer/tools/data_converter/scannet_data_utils.py
diff --git a/examples/BEVFormer/tools/data_converter/sunrgbd_data_utils.py b/model_examples/BEVFormer/tools/data_converter/sunrgbd_data_utils.py
similarity index 100%
rename from examples/BEVFormer/tools/data_converter/sunrgbd_data_utils.py
rename to model_examples/BEVFormer/tools/data_converter/sunrgbd_data_utils.py
diff --git a/examples/BEVFormer/tools/data_converter/waymo_converter.py b/model_examples/BEVFormer/tools/data_converter/waymo_converter.py
similarity index 100%
rename from examples/BEVFormer/tools/data_converter/waymo_converter.py
rename to model_examples/BEVFormer/tools/data_converter/waymo_converter.py
diff --git a/examples/BEVFormer/tools/dist_test.sh b/model_examples/BEVFormer/tools/dist_test.sh
similarity index 100%
rename from examples/BEVFormer/tools/dist_test.sh
rename to model_examples/BEVFormer/tools/dist_test.sh
diff --git a/examples/BEVFormer/tools/dist_train.sh b/model_examples/BEVFormer/tools/dist_train.sh
similarity index 100%
rename from examples/BEVFormer/tools/dist_train.sh
rename to model_examples/BEVFormer/tools/dist_train.sh
diff --git a/examples/BEVFormer/tools/fp16/dist_train.sh b/model_examples/BEVFormer/tools/fp16/dist_train.sh
similarity index 100%
rename from examples/BEVFormer/tools/fp16/dist_train.sh
rename to model_examples/BEVFormer/tools/fp16/dist_train.sh
diff --git a/examples/BEVFormer/tools/fp16/train.py b/model_examples/BEVFormer/tools/fp16/train.py
similarity index 100%
rename from examples/BEVFormer/tools/fp16/train.py
rename to model_examples/BEVFormer/tools/fp16/train.py
diff --git a/examples/BEVFormer/tools/misc/browse_dataset.py b/model_examples/BEVFormer/tools/misc/browse_dataset.py
similarity index 100%
rename from examples/BEVFormer/tools/misc/browse_dataset.py
rename to model_examples/BEVFormer/tools/misc/browse_dataset.py
diff --git a/examples/BEVFormer/tools/misc/fuse_conv_bn.py b/model_examples/BEVFormer/tools/misc/fuse_conv_bn.py
similarity index 100%
rename from examples/BEVFormer/tools/misc/fuse_conv_bn.py
rename to model_examples/BEVFormer/tools/misc/fuse_conv_bn.py
diff --git a/examples/BEVFormer/tools/misc/print_config.py b/model_examples/BEVFormer/tools/misc/print_config.py
similarity index 100%
rename from examples/BEVFormer/tools/misc/print_config.py
rename to model_examples/BEVFormer/tools/misc/print_config.py
diff --git a/examples/BEVFormer/tools/misc/visualize_results.py b/model_examples/BEVFormer/tools/misc/visualize_results.py
similarity index 100%
rename from examples/BEVFormer/tools/misc/visualize_results.py
rename to model_examples/BEVFormer/tools/misc/visualize_results.py
diff --git a/examples/BEVFormer/tools/model_converters/convert_votenet_checkpoints.py b/model_examples/BEVFormer/tools/model_converters/convert_votenet_checkpoints.py
similarity index 100%
rename from examples/BEVFormer/tools/model_converters/convert_votenet_checkpoints.py
rename to model_examples/BEVFormer/tools/model_converters/convert_votenet_checkpoints.py
diff --git a/examples/BEVFormer/tools/model_converters/publish_model.py b/model_examples/BEVFormer/tools/model_converters/publish_model.py
similarity index 100%
rename from examples/BEVFormer/tools/model_converters/publish_model.py
rename to model_examples/BEVFormer/tools/model_converters/publish_model.py
diff --git a/examples/BEVFormer/tools/model_converters/regnet2mmdet.py b/model_examples/BEVFormer/tools/model_converters/regnet2mmdet.py
similarity index 100%
rename from examples/BEVFormer/tools/model_converters/regnet2mmdet.py
rename to model_examples/BEVFormer/tools/model_converters/regnet2mmdet.py
diff --git a/examples/BEVFormer/tools/test.py b/model_examples/BEVFormer/tools/test.py
similarity index 100%
rename from examples/BEVFormer/tools/test.py
rename to model_examples/BEVFormer/tools/test.py
diff --git a/examples/BEVFormer/tools/train.py b/model_examples/BEVFormer/tools/train.py
similarity index 100%
rename from examples/BEVFormer/tools/train.py
rename to model_examples/BEVFormer/tools/train.py
diff --git a/mx_driving/common/__init__.py b/mx_driving/common/__init__.py
index 8f7b0296..747cb6b7 100644
--- a/mx_driving/common/__init__.py
+++ b/mx_driving/common/__init__.py
@@ -1,25 +1,8 @@
 from .ops.three_interpolate import three_interpolate
 from .ops.scatter_max import scatter_max
-from .ops.rotated_iou import npu_rotated_iou
-from .ops.furthest_point_sampling_with_dist import furthest_point_sample_with_dist
-from .ops.npu_dynamic_scatter import npu_dynamic_scatter
-from .ops.npu_points_in_box import npu_points_in_box
-from .ops.npu_points_in_box_all import npu_points_in_box_all
-from .ops.npu_multi_scale_deformable_attn_function import npu_multi_scale_deformable_attn_function
-from .ops.voxelization import voxelization, Voxelization
-from .ops.nms3d_normal import npu_nms3d_normal
-from .ops.furthest_point_sampling import npu_furthest_point_sampling
-from .ops.npu_nms3d import npu_nms3d
-from .ops.rotated_overlaps import npu_rotated_overlaps
 from .ops.npu_scatter_mean_grad import npu_scatter_mean_grad
-from .ops.voxel_pooling_train import npu_voxel_pooling_train
 from .ops.knn import knn
 from .ops.threeNN import three_nn
-from .ops.npu_roipoint_pool3d import RoipointPool3d as RoIPointPool3d
-from .ops.npu_max_pool2d import npu_max_pool2d
-from .ops.npu_add_relu import npu_add_relu
 from .ops.scatter_mean import scatter_mean
 from .ops.sort_pairs import sort_pairs
-from .ops.fused_bias_leaky_relu import npu_fused_bias_leaky_relu
-from .ops.npu_deformable_aggregation import npu_deformable_aggregation
 
diff --git a/mx_driving/common/ops/csrc/functions.h b/mx_driving/common/ops/csrc/functions.h
index be3ee366..e24f832a 100644
--- a/mx_driving/common/ops/csrc/functions.h
+++ b/mx_driving/common/ops/csrc/functions.h
@@ -16,6 +16,8 @@
 
 #include <ATen/ATen.h>
 
+at::Tensor knn(const at::Tensor& xyz, const at::Tensor& center_xyz, int32_t nsample, bool is_from_knn);
+
 at::Tensor npu_three_interpolate(
     int b, int c, int m, int n, const at::Tensor& points, const at::Tensor& idx, const at::Tensor& weight);
 
@@ -27,86 +29,13 @@ std::tuple<at::Tensor, at::Tensor> scatter_max_with_argmax_v2(
 
 at::Tensor npu_scatter_max_backward(const at::Tensor& x, const at::Tensor& segment_ids, const at::Tensor& num_segments);
 
-at::Tensor npu_rotated_overlaps(const at::Tensor& self, const at::Tensor& query_boxes, bool trans);
-
-at::Tensor npu_rotated_iou(const at::Tensor& boxes, const at::Tensor& query_boxes, bool trans, int64_t mode,
-    bool is_cross, double v_threshold, double e_threshold);
-
 at::Tensor npu_scatter(const at::Tensor& self, const at::Tensor& indices, const at::Tensor& updates, int64_t dim);
 
-at::Tensor furthest_point_sampling_with_dist(
-    const at::Tensor& points_dist, const at::Tensor& nearest_temp, int32_t num_points);
-
-std::tuple<at::Tensor, at::Tensor> npu_dynamic_scatter(const at::Tensor& feats, const at::Tensor& coors,
-    const at::Tensor& prefix_sum_point_per_voxel, const at::Tensor& argsort_coor, int32_t num_voxels,
-    const char* reduce_type);
-
-void npu_dynamic_scatter_grad(at::Tensor& grad_point_feats, const at::Tensor& grad_voxel_feats,
-    const at::Tensor& prefix_sum_point_per_voxel, const at::Tensor& argsort_coor, const at::Tensor& compare_mask,
-    const char* reduce_type);
-
-at::Tensor npu_max_pool2d(const at::Tensor& x, int kernel_size, int stride, int padding);
-
-at::Tensor npu_points_in_box(const at::Tensor& boxes, const at::Tensor& pts);
-
-at::Tensor npu_points_in_box_all(const at::Tensor& boxes, const at::Tensor& pts);
-
-at::Tensor npu_multi_scale_deformable_attn_function(const at::Tensor& value, const at::Tensor& value_spatial_shapes,
-    const at::Tensor& value_level_start_index, const at::Tensor& sampling_locations,
-    const at::Tensor& attention_weights);
-
-std::tuple<at::Tensor, at::Tensor, at::Tensor> multi_scale_deformable_attn_grad(const at::Tensor& value,
-    const at::Tensor& shape, const at::Tensor& level_start_index, const at::Tensor& location_trans,
-    const at::Tensor& attn_weight_trans, const at::Tensor& grad_output);
-
-std::tuple<at::Tensor, at::Tensor, at::Tensor> multi_scale_deformable_attn_grad_v2(const at::Tensor& value,
-    const at::Tensor& shape, const at::Tensor& level_start_index, const at::Tensor& location_trans,
-    const at::Tensor& attn_weight_trans, const at::Tensor& grad_output);
-
-at::Tensor npu_furthest_point_sampling(const at::Tensor& point_xyz, const at::Tensor& nearset_temp, int32_t num_points);
-
-at::Tensor dynamic_voxelization(const at::Tensor& points, at::Tensor& coors, int grid_x, int grid_y, int grid_z,
-    double voxel_x, double voxel_y, double voxel_z, double coors_min_x, double coors_min_y, double coorsMinZ);
-
-std::tuple<at::Tensor, at::Tensor> nms3d_normal(const at::Tensor& boxes, double nms_overlap_thresh);
-
-std::tuple<at::Tensor, at::Tensor> nms3d(const at::Tensor& boxes, double threshold);
-
 at::Tensor npu_scatter_mean_grad(const at::Tensor& grad_out, const at::Tensor& index, int32_t dim);
 
-std::tuple<at::Tensor&, at::Tensor&> voxel_pooling_train(const at::Tensor& inputFeatures, const at::Tensor& geom,
-    at::Tensor& outputFeatures, at::Tensor& posMemo, int batchSize, int numPoints, int numChannels, int numVoxelX,
-    int numVoxelY, int numVoxelZ);
-
-at::Tensor voxel_pool_train_backward(const at::Tensor& grad_out, const at::Tensor& posMemo, const int64_t batchSize,
-    const int64_t numPoints, const int64_t numChannels, const int64_t h, const int64_t w);
-
-at::Tensor knn(const at::Tensor& xyz, const at::Tensor& center_xyz, int32_t nsample, bool is_from_knn);
-
-std::tuple<at::Tensor, at::Tensor> npu_roipoint_pool3d_forward(const int32_t num_sampled_points,
-    const at::Tensor& points, const at::Tensor& point_features, const at::Tensor& boxes3d);
-
-at::Tensor npu_add_relu(at::Tensor& x, const at::Tensor& y);
-
-at::Tensor npu_add_relu_grad(at::Tensor& self, at::Tensor& grad_output);
 std::tuple<at::Tensor, at::Tensor> npu_scatter_mean(at::Tensor& src, at::Tensor& index,
                                                     c10::optional<at::Tensor> out, c10::optional<int> dim,
                                                     c10::optional<int> dim_size);
 std::tuple<at::Tensor, at::Tensor> npu_sort_pairs(const at::Tensor &keys_in, const at::Tensor &values_in, int64_t dim, bool descending);
 
-at::Tensor fused_bias_leaky_relu(const at::Tensor& x, const at::Tensor& bias, const double negative_slop, const double scale);
-
-at::Tensor deformable_aggregation(const at::Tensor& mc_ms_feat, const at::Tensor& spatial_shape,
-    const at::Tensor& scale_start_index, const at::Tensor& sampling_location, const at::Tensor& weights);
-std::tuple<at::Tensor, at::Tensor, at::Tensor> deformable_aggregation_grad(
-    const at::Tensor& mc_ms_feat,
-    const at::Tensor& spatial_shape,
-    const at::Tensor& scale_start_index,
-    const at::Tensor& sampling_location,
-    const at::Tensor& weights,
-    const at::Tensor& grad_output,
-    const at::Tensor& grad_mc_ms_feat,
-    const at::Tensor& grad_sampling_location,
-    const at::Tensor& grad_weights
-);
 #endif // COMMON_OPS_CSRC_FUNCTIONS_H_
diff --git a/mx_driving/common/ops/csrc/pybind.cpp b/mx_driving/common/ops/csrc/pybind.cpp
index a246f9cd..e09c4920 100644
--- a/mx_driving/common/ops/csrc/pybind.cpp
+++ b/mx_driving/common/ops/csrc/pybind.cpp
@@ -22,75 +22,23 @@
 
 void init_common(pybind11::module& m)
 {
-    // three_interpolate
-    m.def("npu_three_interpolate", &npu_three_interpolate);
-    m.def("npu_three_interpolate_backward", &npu_three_interpolate_backward);
-    
-    // scatter_max
-    m.def("scatter_max_with_argmax_v2", &scatter_max_with_argmax_v2);
-    m.def("npu_scatter_max_backward", &npu_scatter_max_backward);
-
-    // roated overlap
-    m.def("npu_rotated_overlaps", &npu_rotated_overlaps, "npu_rotated_overlap NPU version");
-
-    // rotated iou
-    m.def("npu_rotated_iou", &npu_rotated_iou);
-
-    // furthest_points_sampling_with_dist
-    m.def("furthest_point_sampling_with_dist", &furthest_point_sampling_with_dist);
-
-    // npu_points_in_box
-    m.def("npu_points_in_box", &npu_points_in_box);
-    // npu_points_in_box_all
-    m.def("npu_points_in_box_all", &npu_points_in_box_all);
-    // npu_points_in_box
-    m.def("npu_max_pool2d", &npu_max_pool2d);
-    // npu_multi_scale_deformable_attn_function
-    m.def("npu_multi_scale_deformable_attn_function", &npu_multi_scale_deformable_attn_function);
-    m.def("multi_scale_deformable_attn_grad", &multi_scale_deformable_attn_grad);
-    m.def("multi_scale_deformable_attn_grad_v2", &multi_scale_deformable_attn_grad_v2);
-
-    // npu_dynamic_scatter
-    m.def("npu_dynamic_scatter", &npu_dynamic_scatter);
-    m.def("npu_dynamic_scatter_grad", &npu_dynamic_scatter_grad);
-
-    // dyn_voxelization
-    m.def("dynamic_voxelization", &dynamic_voxelization);
-
-    // nms3d_normal
-    m.def("nms3d_normal", &nms3d_normal);
-
-    // nms3d
-    m.def("nms3d", &nms3d);
-
-    // npu_furthest_point_sampling
-    m.def("npu_furthest_point_sampling", &npu_furthest_point_sampling);
-
-    // npu_scatter_mean_grad
-    m.def("npu_scatter_mean_grad", &npu_scatter_mean_grad);
-
-    // voxel_pooling
-    m.def("voxel_pooling_train", &voxel_pooling_train);
-    m.def("voxel_pool_train_backward", &voxel_pool_train_backward);
-
     // knn
     m.def("knn", &knn);
 
-    // npu_roipoint_pool3d_forward
-    m.def("npu_roipoint_pool3d_forward", &npu_roipoint_pool3d_forward);
+    // npu_scatter_mean_grad
+    m.def("npu_scatter_mean_grad", &npu_scatter_mean_grad);
 
-    // npu_add_relu
-    m.def("npu_add_relu", &npu_add_relu);
-    m.def("npu_add_relu_grad", &npu_add_relu_grad);
+    // three_interpolate
+    m.def("npu_three_interpolate", &npu_three_interpolate);
+    m.def("npu_three_interpolate_backward", &npu_three_interpolate_backward);
 
     // scatter_mean
     m.def("npu_scatter_mean", &npu_scatter_mean, "npu_scatter_mean NPU version");
+    
+    // scatter_max
+    m.def("scatter_max_with_argmax_v2", &scatter_max_with_argmax_v2);
+    m.def("npu_scatter_max_backward", &npu_scatter_max_backward);
 
     // npu_sort_pairs
     m.def("npu_sort_pairs", &npu_sort_pairs, "sort_pairs NPU version");
-    // fused_bias_leaky_relu
-    m.def("fused_bias_leaky_relu", &fused_bias_leaky_relu);
-    // npu_deformable_aggregation
-    m.def("npu_deformable_aggregation", &deformable_aggregation);
-    m.def("npu_deformable_aggregation_grad", &deformable_aggregation_grad);
 }
diff --git a/mx_driving/data/CMakeLists.txt b/mx_driving/data/CMakeLists.txt
new file mode 100644
index 00000000..3f1ac043
--- /dev/null
+++ b/mx_driving/data/CMakeLists.txt
@@ -0,0 +1,7 @@
+if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/ops/kernels)
+  add_subdirectory(ops/kernels)
+endif()
+
+if (${ENABLE_ONNX} AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/ops/onnx)
+  add_subdirectory(ops/onnx/plugin)
+endif()
diff --git a/mx_driving/data/__init__.py b/mx_driving/data/__init__.py
new file mode 100644
index 00000000..e8973220
--- /dev/null
+++ b/mx_driving/data/__init__.py
@@ -0,0 +1,3 @@
+from .ops.npu_points_in_box import npu_points_in_box
+from .ops.npu_points_in_box_all import npu_points_in_box_all
+from .ops.npu_roipoint_pool3d import RoipointPool3d as RoIPointPool3d
\ No newline at end of file
diff --git a/mx_driving/motion/components/README.md b/mx_driving/data/components/README.md
similarity index 100%
rename from mx_driving/motion/components/README.md
rename to mx_driving/data/components/README.md
diff --git a/mx_driving/motion/__init__.py b/mx_driving/data/ops/__init__.py
similarity index 100%
rename from mx_driving/motion/__init__.py
rename to mx_driving/data/ops/__init__.py
diff --git a/mx_driving/common/ops/csrc/PointsInBox.cpp b/mx_driving/data/ops/csrc/PointsInBox.cpp
similarity index 100%
rename from mx_driving/common/ops/csrc/PointsInBox.cpp
rename to mx_driving/data/ops/csrc/PointsInBox.cpp
diff --git a/mx_driving/common/ops/csrc/PointsInBoxAll.cpp b/mx_driving/data/ops/csrc/PointsInBoxAll.cpp
similarity index 100%
rename from mx_driving/common/ops/csrc/PointsInBoxAll.cpp
rename to mx_driving/data/ops/csrc/PointsInBoxAll.cpp
diff --git a/mx_driving/data/ops/csrc/README.md b/mx_driving/data/ops/csrc/README.md
new file mode 100644
index 00000000..8073915f
--- /dev/null
+++ b/mx_driving/data/ops/csrc/README.md
@@ -0,0 +1,6 @@
+## Description
+The `csrc` lib implements python interface, which use `pybind11` to wrap the C++ code.
+There are 3 files you need to focus:
+1. `pybind.cpp`: Define the python interface.
+2. `functions.cpp`: Define the C++ interface.
+3. The file naming in `Pascal` style: The implementation of the C++ interface.
\ No newline at end of file
diff --git a/mx_driving/common/ops/csrc/RoipointPool3dForward.cpp b/mx_driving/data/ops/csrc/RoipointPool3dForward.cpp
similarity index 100%
rename from mx_driving/common/ops/csrc/RoipointPool3dForward.cpp
rename to mx_driving/data/ops/csrc/RoipointPool3dForward.cpp
diff --git a/mx_driving/perception/vision/ops/csrc/functions.h b/mx_driving/data/ops/csrc/functions.h
similarity index 69%
rename from mx_driving/perception/vision/ops/csrc/functions.h
rename to mx_driving/data/ops/csrc/functions.h
index b268f4a7..e509d755 100644
--- a/mx_driving/perception/vision/ops/csrc/functions.h
+++ b/mx_driving/data/ops/csrc/functions.h
@@ -17,5 +17,10 @@
 #include <ATen/ATen.h>
 #include <torch/library.h>
 
-at::Tensor npu_boxes_overlap_bev(const at::Tensor &boxes_a, const at::Tensor &boxes_b);
+at::Tensor npu_points_in_box(const at::Tensor& boxes, const at::Tensor& pts);
+
+at::Tensor npu_points_in_box_all(const at::Tensor& boxes, const at::Tensor& pts);
+
+std::tuple<at::Tensor, at::Tensor> npu_roipoint_pool3d_forward(const int32_t num_sampled_points,
+    const at::Tensor& points, const at::Tensor& point_features, const at::Tensor& boxes3d);
 #endif // PERCEPTION_VISION_OPS_CSRC_FUNCTIONS_H_
diff --git a/mx_driving/data/ops/csrc/pybind.cpp b/mx_driving/data/ops/csrc/pybind.cpp
new file mode 100644
index 00000000..21a84a7d
--- /dev/null
+++ b/mx_driving/data/ops/csrc/pybind.cpp
@@ -0,0 +1,15 @@
+#include <torch/extension.h>
+#include "csrc/pybind.h"
+#include "functions.h"
+
+void init_data(pybind11::module& m)
+{
+    // npu_points_in_box
+    m.def("npu_points_in_box", &npu_points_in_box);
+    
+    // npu_points_in_box_all
+    m.def("npu_points_in_box_all", &npu_points_in_box_all);
+
+    // npu_roipoint_pool3d_forward
+    m.def("npu_roipoint_pool3d_forward", &npu_roipoint_pool3d_forward);
+}
diff --git a/mx_driving/motion/ops/kernels/CMakeLists.txt b/mx_driving/data/ops/kernels/CMakeLists.txt
similarity index 100%
rename from mx_driving/motion/ops/kernels/CMakeLists.txt
rename to mx_driving/data/ops/kernels/CMakeLists.txt
diff --git a/mx_driving/data/ops/kernels/README.md b/mx_driving/data/ops/kernels/README.md
new file mode 100644
index 00000000..1e664555
--- /dev/null
+++ b/mx_driving/data/ops/kernels/README.md
@@ -0,0 +1,13 @@
+## 算子原型
+<table>
+<tr><td rowspan="1" align="center">算子类型(OpType)</td><td colspan="4" align="center">Add</td></tr>
+</tr>
+<tr><td rowspan="3" align="center">算子输入</td><td align="center">name</td><td align="center">shape</td><td align="center">data type</td><td align="center">format</td></tr>
+<tr><td align="center">x</td><td align="center">-</td><td align="center">float</td><td align="center">ND</td></tr>
+<tr><td align="center">y</td><td align="center">-</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+</tr>
+<tr><td rowspan="1" align="center">算子输出</td><td align="center">z</td><td align="center">-</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+<tr><td rowspan="1" align="center">核函数名</td><td colspan="4" align="center">add_custom</td></tr>
+</table>
\ No newline at end of file
diff --git a/mx_driving/motion/ops/kernels/framework/CMakeLists.txt b/mx_driving/data/ops/kernels/framework/CMakeLists.txt
similarity index 100%
rename from mx_driving/motion/ops/kernels/framework/CMakeLists.txt
rename to mx_driving/data/ops/kernels/framework/CMakeLists.txt
diff --git a/mx_driving/motion/ops/kernels/op_host/CMakeLists.txt b/mx_driving/data/ops/kernels/op_host/CMakeLists.txt
similarity index 100%
rename from mx_driving/motion/ops/kernels/op_host/CMakeLists.txt
rename to mx_driving/data/ops/kernels/op_host/CMakeLists.txt
diff --git a/mx_driving/data/ops/kernels/op_host/common.h b/mx_driving/data/ops/kernels/op_host/common.h
new file mode 100644
index 00000000..4580dff5
--- /dev/null
+++ b/mx_driving/data/ops/kernels/op_host/common.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
+ */
+#ifndef COMMON_H
+#define COMMON_H
+
+#include "register/op_def_registry.h"
+#include "tiling/platform/platform_ascendc.h"
+#include "tiling/tiling_api.h"
+#include "register/tilingdata_base.h"
+
+inline uint32_t ceil_multiple(uint32_t num, uint32_t block)
+{
+    if (block == 0) {
+        return 0;
+    }
+    return (num + block - 1) / block;
+}
+
+inline uint32_t ceil_value(uint32_t num, uint32_t block)
+{
+    if (block == 0) {
+        return 0;
+    }
+    return ((num + block - 1) / block) * block;
+}
+
+#endif // COMMON_H
diff --git a/mx_driving/common/ops/kernels/op_host/points_in_box.cpp b/mx_driving/data/ops/kernels/op_host/points_in_box.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/points_in_box.cpp
rename to mx_driving/data/ops/kernels/op_host/points_in_box.cpp
diff --git a/mx_driving/common/ops/kernels/op_host/points_in_box_all.cpp b/mx_driving/data/ops/kernels/op_host/points_in_box_all.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/points_in_box_all.cpp
rename to mx_driving/data/ops/kernels/op_host/points_in_box_all.cpp
diff --git a/mx_driving/common/ops/kernels/op_host/points_in_box_all_tiling.h b/mx_driving/data/ops/kernels/op_host/points_in_box_all_tiling.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/points_in_box_all_tiling.h
rename to mx_driving/data/ops/kernels/op_host/points_in_box_all_tiling.h
diff --git a/mx_driving/common/ops/kernels/op_host/points_in_box_tiling.h b/mx_driving/data/ops/kernels/op_host/points_in_box_tiling.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/points_in_box_tiling.h
rename to mx_driving/data/ops/kernels/op_host/points_in_box_tiling.h
diff --git a/mx_driving/common/ops/kernels/op_host/roipoint_pool3d_forward.cpp b/mx_driving/data/ops/kernels/op_host/roipoint_pool3d_forward.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/roipoint_pool3d_forward.cpp
rename to mx_driving/data/ops/kernels/op_host/roipoint_pool3d_forward.cpp
diff --git a/mx_driving/common/ops/kernels/op_host/roipoint_pool3d_forward_tiling.h b/mx_driving/data/ops/kernels/op_host/roipoint_pool3d_forward_tiling.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/roipoint_pool3d_forward_tiling.h
rename to mx_driving/data/ops/kernels/op_host/roipoint_pool3d_forward_tiling.h
diff --git a/mx_driving/perception/fused/ops/kernels/op_kernel/CMakeLists.txt b/mx_driving/data/ops/kernels/op_kernel/CMakeLists.txt
similarity index 100%
rename from mx_driving/perception/fused/ops/kernels/op_kernel/CMakeLists.txt
rename to mx_driving/data/ops/kernels/op_kernel/CMakeLists.txt
diff --git a/mx_driving/common/ops/kernels/op_kernel/points_in_box.cpp b/mx_driving/data/ops/kernels/op_kernel/points_in_box.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/points_in_box.cpp
rename to mx_driving/data/ops/kernels/op_kernel/points_in_box.cpp
diff --git a/mx_driving/common/ops/kernels/op_kernel/points_in_box_all.cpp b/mx_driving/data/ops/kernels/op_kernel/points_in_box_all.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/points_in_box_all.cpp
rename to mx_driving/data/ops/kernels/op_kernel/points_in_box_all.cpp
diff --git a/mx_driving/common/ops/kernels/op_kernel/roipoint_pool3d_forward.cpp b/mx_driving/data/ops/kernels/op_kernel/roipoint_pool3d_forward.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/roipoint_pool3d_forward.cpp
rename to mx_driving/data/ops/kernels/op_kernel/roipoint_pool3d_forward.cpp
diff --git a/mx_driving/common/ops/npu_points_in_box.py b/mx_driving/data/ops/npu_points_in_box.py
similarity index 100%
rename from mx_driving/common/ops/npu_points_in_box.py
rename to mx_driving/data/ops/npu_points_in_box.py
diff --git a/mx_driving/common/ops/npu_points_in_box_all.py b/mx_driving/data/ops/npu_points_in_box_all.py
similarity index 100%
rename from mx_driving/common/ops/npu_points_in_box_all.py
rename to mx_driving/data/ops/npu_points_in_box_all.py
diff --git a/mx_driving/common/ops/npu_roipoint_pool3d.py b/mx_driving/data/ops/npu_roipoint_pool3d.py
similarity index 100%
rename from mx_driving/common/ops/npu_roipoint_pool3d.py
rename to mx_driving/data/ops/npu_roipoint_pool3d.py
diff --git a/mx_driving/motion/CMakeLists.txt b/mx_driving/detection/CMakeLists.txt
similarity index 100%
rename from mx_driving/motion/CMakeLists.txt
rename to mx_driving/detection/CMakeLists.txt
diff --git a/mx_driving/detection/__init__.py b/mx_driving/detection/__init__.py
new file mode 100644
index 00000000..118eebed
--- /dev/null
+++ b/mx_driving/detection/__init__.py
@@ -0,0 +1,5 @@
+from .ops.boxes_overlap_bev import boxes_overlap_bev
+from .ops.nms3d_normal import npu_nms3d_normal
+from .ops.npu_nms3d import npu_nms3d
+from .ops.rotated_iou import npu_rotated_iou
+from .ops.rotated_overlaps import npu_rotated_overlaps
\ No newline at end of file
diff --git a/mx_driving/perception/fused/components/README.md b/mx_driving/detection/components/README.md
similarity index 100%
rename from mx_driving/perception/fused/components/README.md
rename to mx_driving/detection/components/README.md
diff --git a/mx_driving/perception/__init__.py b/mx_driving/detection/ops/__init__.py
similarity index 100%
rename from mx_driving/perception/__init__.py
rename to mx_driving/detection/ops/__init__.py
diff --git a/mx_driving/perception/vision/ops/boxes_overlap_bev.py b/mx_driving/detection/ops/boxes_overlap_bev.py
similarity index 100%
rename from mx_driving/perception/vision/ops/boxes_overlap_bev.py
rename to mx_driving/detection/ops/boxes_overlap_bev.py
diff --git a/mx_driving/perception/vision/ops/csrc/BoxesOverlapBev.cpp b/mx_driving/detection/ops/csrc/BoxesOverlapBev.cpp
similarity index 100%
rename from mx_driving/perception/vision/ops/csrc/BoxesOverlapBev.cpp
rename to mx_driving/detection/ops/csrc/BoxesOverlapBev.cpp
diff --git a/mx_driving/common/ops/csrc/Nms3d.cpp b/mx_driving/detection/ops/csrc/Nms3d.cpp
similarity index 100%
rename from mx_driving/common/ops/csrc/Nms3d.cpp
rename to mx_driving/detection/ops/csrc/Nms3d.cpp
diff --git a/mx_driving/common/ops/csrc/Nms3dNormal.cpp b/mx_driving/detection/ops/csrc/Nms3dNormal.cpp
similarity index 100%
rename from mx_driving/common/ops/csrc/Nms3dNormal.cpp
rename to mx_driving/detection/ops/csrc/Nms3dNormal.cpp
diff --git a/mx_driving/motion/ops/csrc/README.md b/mx_driving/detection/ops/csrc/README.md
similarity index 100%
rename from mx_driving/motion/ops/csrc/README.md
rename to mx_driving/detection/ops/csrc/README.md
diff --git a/mx_driving/common/ops/csrc/RotatedIou.cpp b/mx_driving/detection/ops/csrc/RotatedIou.cpp
similarity index 100%
rename from mx_driving/common/ops/csrc/RotatedIou.cpp
rename to mx_driving/detection/ops/csrc/RotatedIou.cpp
diff --git a/mx_driving/common/ops/csrc/RotatedOverlaps.cpp b/mx_driving/detection/ops/csrc/RotatedOverlaps.cpp
similarity index 100%
rename from mx_driving/common/ops/csrc/RotatedOverlaps.cpp
rename to mx_driving/detection/ops/csrc/RotatedOverlaps.cpp
diff --git a/mx_driving/perception/fused/ops/csrc/functions.h b/mx_driving/detection/ops/csrc/functions.h
similarity index 34%
rename from mx_driving/perception/fused/ops/csrc/functions.h
rename to mx_driving/detection/ops/csrc/functions.h
index e8335acf..56ca5a6c 100644
--- a/mx_driving/perception/fused/ops/csrc/functions.h
+++ b/mx_driving/detection/ops/csrc/functions.h
@@ -11,22 +11,20 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+#ifndef PERCEPTION_VISION_OPS_CSRC_FUNCTIONS_H_
+#define PERCEPTION_VISION_OPS_CSRC_FUNCTIONS_H_
 
-#ifndef PERCEPTION_FUSED_OPS_CSRC_FUNCTIONS_H_
-#define PERCEPTION_FUSED_OPS_CSRC_FUNCTIONS_H_
-#include <ATen/Tensor.h>
+#include <ATen/ATen.h>
 #include <torch/library.h>
 
+std::tuple<at::Tensor, at::Tensor> nms3d_normal(const at::Tensor& boxes, double nms_overlap_thresh);
 
-at::Tensor npu_bev_pool(const at::Tensor& feat, const at::Tensor& geom_feat, const at::Tensor& interval_lengths,
-    const at::Tensor& interval_starts, int64_t b, int64_t d, int64_t h, int64_t w);
-at::Tensor npu_bev_pool_backward(const at::Tensor& grad_out, const at::Tensor& geom_feat,
-    const at::Tensor& interval_lengths, const at::Tensor& interval_starts, int64_t b, int64_t d, int64_t h, int64_t w);
+std::tuple<at::Tensor, at::Tensor> nms3d(const at::Tensor& boxes, double threshold);
 
-at::Tensor npu_bev_pool_v2(const at::Tensor& depth, const at::Tensor& feat, const at::Tensor& ranks_depth,
-    const at::Tensor& ranks_feat, const at::Tensor& ranks_bev, const at::Tensor& interval_lengths,
-    const at::Tensor& interval_starts, int64_t b, int64_t d, int64_t h, int64_t w);
-std::tuple<at::Tensor, at::Tensor> npu_bev_pool_v2_backward(const at::Tensor& grad_out, const at::Tensor& depth,
-    const at::Tensor& feat, const at::Tensor& ranks_depth, const at::Tensor& ranks_feat, const at::Tensor& ranks_bev,
-    const at::Tensor& interval_lengths, const at::Tensor& interval_starts, int64_t b, int64_t d, int64_t h, int64_t w);
-#endif // PERCEPTION_FUSED_OPS_CSRC_FUNCTIONS_H_
+at::Tensor npu_rotated_overlaps(const at::Tensor& self, const at::Tensor& query_boxes, bool trans);
+
+at::Tensor npu_rotated_iou(const at::Tensor& boxes, const at::Tensor& query_boxes, bool trans, int64_t mode,
+    bool is_cross, double v_threshold, double e_threshold);
+
+at::Tensor npu_boxes_overlap_bev(const at::Tensor &boxes_a, const at::Tensor &boxes_b);
+#endif // PERCEPTION_VISION_OPS_CSRC_FUNCTIONS_H_
diff --git a/mx_driving/perception/vision/ops/csrc/pybind.cpp b/mx_driving/detection/ops/csrc/pybind.cpp
similarity index 37%
rename from mx_driving/perception/vision/ops/csrc/pybind.cpp
rename to mx_driving/detection/ops/csrc/pybind.cpp
index ff49f49e..c2941001 100644
--- a/mx_driving/perception/vision/ops/csrc/pybind.cpp
+++ b/mx_driving/detection/ops/csrc/pybind.cpp
@@ -2,8 +2,20 @@
 #include "csrc/pybind.h"
 #include "functions.h"
 
-void init_perception_vision(pybind11::module &m)
+void init_detection(pybind11::module& m)
 {
+    // nms3d_normal
+    m.def("nms3d_normal", &nms3d_normal);
+
+    // nms3d
+    m.def("nms3d", &nms3d);
+
+    // roated overlap
+    m.def("npu_rotated_overlaps", &npu_rotated_overlaps, "npu_rotated_overlap NPU version");
+
+    // rotated iou
+    m.def("npu_rotated_iou", &npu_rotated_iou);
+    
     // npu_boxes_overlap_bev
     m.def("npu_boxes_overlap_bev", &npu_boxes_overlap_bev, "boxes_overlap_bev NPU version");
 }
diff --git a/mx_driving/perception/point/ops/kernels/CMakeLists.txt b/mx_driving/detection/ops/kernels/CMakeLists.txt
similarity index 100%
rename from mx_driving/perception/point/ops/kernels/CMakeLists.txt
rename to mx_driving/detection/ops/kernels/CMakeLists.txt
diff --git a/mx_driving/motion/ops/kernels/README.md b/mx_driving/detection/ops/kernels/README.md
similarity index 100%
rename from mx_driving/motion/ops/kernels/README.md
rename to mx_driving/detection/ops/kernels/README.md
diff --git a/mx_driving/perception/fused/ops/kernels/op_host/CMakeLists.txt b/mx_driving/detection/ops/kernels/op_host/CMakeLists.txt
similarity index 100%
rename from mx_driving/perception/fused/ops/kernels/op_host/CMakeLists.txt
rename to mx_driving/detection/ops/kernels/op_host/CMakeLists.txt
diff --git a/mx_driving/perception/vision/ops/kernels/op_host/boxes_overlap_bev.cpp b/mx_driving/detection/ops/kernels/op_host/boxes_overlap_bev.cpp
similarity index 100%
rename from mx_driving/perception/vision/ops/kernels/op_host/boxes_overlap_bev.cpp
rename to mx_driving/detection/ops/kernels/op_host/boxes_overlap_bev.cpp
diff --git a/mx_driving/perception/vision/ops/kernels/op_host/boxes_overlap_bev_tiling.h b/mx_driving/detection/ops/kernels/op_host/boxes_overlap_bev_tiling.h
similarity index 100%
rename from mx_driving/perception/vision/ops/kernels/op_host/boxes_overlap_bev_tiling.h
rename to mx_driving/detection/ops/kernels/op_host/boxes_overlap_bev_tiling.h
diff --git a/mx_driving/common/ops/kernels/op_host/gather_nms3d_mask_tiling.cpp b/mx_driving/detection/ops/kernels/op_host/gather_nms3d_mask_tiling.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/gather_nms3d_mask_tiling.cpp
rename to mx_driving/detection/ops/kernels/op_host/gather_nms3d_mask_tiling.cpp
diff --git a/mx_driving/common/ops/kernels/op_host/gather_nms3d_mask_tiling.h b/mx_driving/detection/ops/kernels/op_host/gather_nms3d_mask_tiling.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/gather_nms3d_mask_tiling.h
rename to mx_driving/detection/ops/kernels/op_host/gather_nms3d_mask_tiling.h
diff --git a/mx_driving/common/ops/kernels/op_host/nms3d.cpp b/mx_driving/detection/ops/kernels/op_host/nms3d.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/nms3d.cpp
rename to mx_driving/detection/ops/kernels/op_host/nms3d.cpp
diff --git a/mx_driving/common/ops/kernels/op_host/nms3d_normal_tiling.cpp b/mx_driving/detection/ops/kernels/op_host/nms3d_normal_tiling.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/nms3d_normal_tiling.cpp
rename to mx_driving/detection/ops/kernels/op_host/nms3d_normal_tiling.cpp
diff --git a/mx_driving/detection/ops/kernels/op_host/nms3d_normal_tiling.h b/mx_driving/detection/ops/kernels/op_host/nms3d_normal_tiling.h
new file mode 100644
index 00000000..9976c248
--- /dev/null
+++ b/mx_driving/detection/ops/kernels/op_host/nms3d_normal_tiling.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
+ */
+#ifndef NMS3D_NORMAL_TILING_H
+#define NMS3D_NORMAL_TILING_H
+
+#include "register/tilingdata_base.h"
+
+namespace optiling {
+BEGIN_TILING_DATA_DEF(Nms3dNormalTilingData)
+    TILING_DATA_FIELD_DEF(uint32_t, usedCoreNum)  // used cores
+    TILING_DATA_FIELD_DEF(uint32_t, boxNum)  // count of boxes
+    TILING_DATA_FIELD_DEF(uint32_t, loopTime)  // loop times
+    TILING_DATA_FIELD_DEF(uint32_t, eachSum) // count of each core, = loop_time * 8
+    TILING_DATA_FIELD_DEF(uint32_t, tailSum) // count of tail core
+    TILING_DATA_FIELD_DEF(uint32_t, tailNum) // last time count of tail core
+    TILING_DATA_FIELD_DEF(uint32_t, maskNum) // mask align 32bit
+    TILING_DATA_FIELD_DEF(float, overlapThresh)
+END_TILING_DATA_DEF;
+
+REGISTER_TILING_DATA_CLASS(Nms3dNormal, Nms3dNormalTilingData)
+} // namespace optiling
+
+#endif // NMS3D_NORMAL_TILING_H
diff --git a/mx_driving/common/ops/kernels/op_host/nms3d_tiling.h b/mx_driving/detection/ops/kernels/op_host/nms3d_tiling.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/nms3d_tiling.h
rename to mx_driving/detection/ops/kernels/op_host/nms3d_tiling.h
diff --git a/mx_driving/perception/point/ops/kernels/op_kernel/CMakeLists.txt b/mx_driving/detection/ops/kernels/op_kernel/CMakeLists.txt
similarity index 100%
rename from mx_driving/perception/point/ops/kernels/op_kernel/CMakeLists.txt
rename to mx_driving/detection/ops/kernels/op_kernel/CMakeLists.txt
diff --git a/mx_driving/perception/vision/ops/kernels/op_kernel/boxes_overlap_bev.cpp b/mx_driving/detection/ops/kernels/op_kernel/boxes_overlap_bev.cpp
similarity index 100%
rename from mx_driving/perception/vision/ops/kernels/op_kernel/boxes_overlap_bev.cpp
rename to mx_driving/detection/ops/kernels/op_kernel/boxes_overlap_bev.cpp
diff --git a/mx_driving/common/ops/kernels/op_kernel/gather_nms3d_mask.cpp b/mx_driving/detection/ops/kernels/op_kernel/gather_nms3d_mask.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/gather_nms3d_mask.cpp
rename to mx_driving/detection/ops/kernels/op_kernel/gather_nms3d_mask.cpp
diff --git a/mx_driving/common/ops/kernels/op_kernel/nms3d.cpp b/mx_driving/detection/ops/kernels/op_kernel/nms3d.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/nms3d.cpp
rename to mx_driving/detection/ops/kernels/op_kernel/nms3d.cpp
diff --git a/mx_driving/common/ops/kernels/op_kernel/nms3d_normal.cpp b/mx_driving/detection/ops/kernels/op_kernel/nms3d_normal.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/nms3d_normal.cpp
rename to mx_driving/detection/ops/kernels/op_kernel/nms3d_normal.cpp
diff --git a/mx_driving/common/ops/nms3d_normal.py b/mx_driving/detection/ops/nms3d_normal.py
similarity index 100%
rename from mx_driving/common/ops/nms3d_normal.py
rename to mx_driving/detection/ops/nms3d_normal.py
diff --git a/mx_driving/common/ops/npu_nms3d.py b/mx_driving/detection/ops/npu_nms3d.py
similarity index 100%
rename from mx_driving/common/ops/npu_nms3d.py
rename to mx_driving/detection/ops/npu_nms3d.py
diff --git a/mx_driving/common/ops/rotated_iou.py b/mx_driving/detection/ops/rotated_iou.py
similarity index 100%
rename from mx_driving/common/ops/rotated_iou.py
rename to mx_driving/detection/ops/rotated_iou.py
diff --git a/mx_driving/common/ops/rotated_overlaps.py b/mx_driving/detection/ops/rotated_overlaps.py
similarity index 100%
rename from mx_driving/common/ops/rotated_overlaps.py
rename to mx_driving/detection/ops/rotated_overlaps.py
diff --git a/mx_driving/fused/CMakeLists.txt b/mx_driving/fused/CMakeLists.txt
new file mode 100644
index 00000000..621d1fa9
--- /dev/null
+++ b/mx_driving/fused/CMakeLists.txt
@@ -0,0 +1,3 @@
+if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/ops/kernels)
+  add_subdirectory(ops/kernels)
+endif()
diff --git a/mx_driving/fused/__init__.py b/mx_driving/fused/__init__.py
new file mode 100644
index 00000000..3c5a5f99
--- /dev/null
+++ b/mx_driving/fused/__init__.py
@@ -0,0 +1,5 @@
+from .ops.npu_max_pool2d import npu_max_pool2d
+from .ops.npu_add_relu import npu_add_relu
+from .ops.npu_multi_scale_deformable_attn_function import npu_multi_scale_deformable_attn_function
+from .ops.fused_bias_leaky_relu import npu_fused_bias_leaky_relu
+from .ops.npu_deformable_aggregation import npu_deformable_aggregation
\ No newline at end of file
diff --git a/mx_driving/perception/point/components/README.md b/mx_driving/fused/components/README.md
similarity index 100%
rename from mx_driving/perception/point/components/README.md
rename to mx_driving/fused/components/README.md
diff --git a/mx_driving/perception/fused/ops/__init__.py b/mx_driving/fused/ops/__init__.py
similarity index 100%
rename from mx_driving/perception/fused/ops/__init__.py
rename to mx_driving/fused/ops/__init__.py
diff --git a/mx_driving/common/ops/csrc/AddRelu.cpp b/mx_driving/fused/ops/csrc/AddRelu.cpp
similarity index 100%
rename from mx_driving/common/ops/csrc/AddRelu.cpp
rename to mx_driving/fused/ops/csrc/AddRelu.cpp
diff --git a/mx_driving/common/ops/csrc/DeformableAggregation.cpp b/mx_driving/fused/ops/csrc/DeformableAggregation.cpp
similarity index 100%
rename from mx_driving/common/ops/csrc/DeformableAggregation.cpp
rename to mx_driving/fused/ops/csrc/DeformableAggregation.cpp
diff --git a/mx_driving/common/ops/csrc/FusedBiasLeakyRelu.cpp b/mx_driving/fused/ops/csrc/FusedBiasLeakyRelu.cpp
similarity index 100%
rename from mx_driving/common/ops/csrc/FusedBiasLeakyRelu.cpp
rename to mx_driving/fused/ops/csrc/FusedBiasLeakyRelu.cpp
diff --git a/mx_driving/common/ops/csrc/MaxPool2d.cpp b/mx_driving/fused/ops/csrc/MaxPool2d.cpp
similarity index 100%
rename from mx_driving/common/ops/csrc/MaxPool2d.cpp
rename to mx_driving/fused/ops/csrc/MaxPool2d.cpp
diff --git a/mx_driving/common/ops/csrc/MultiScaleDeformableAttnFunction.cpp b/mx_driving/fused/ops/csrc/MultiScaleDeformableAttnFunction.cpp
similarity index 100%
rename from mx_driving/common/ops/csrc/MultiScaleDeformableAttnFunction.cpp
rename to mx_driving/fused/ops/csrc/MultiScaleDeformableAttnFunction.cpp
diff --git a/mx_driving/perception/fused/ops/csrc/README.md b/mx_driving/fused/ops/csrc/README.md
similarity index 100%
rename from mx_driving/perception/fused/ops/csrc/README.md
rename to mx_driving/fused/ops/csrc/README.md
diff --git a/mx_driving/fused/ops/csrc/functions.h b/mx_driving/fused/ops/csrc/functions.h
new file mode 100644
index 00000000..97ae736d
--- /dev/null
+++ b/mx_driving/fused/ops/csrc/functions.h
@@ -0,0 +1,57 @@
+// Copyright (c) 2024, Huawei Technologies.All rights reserved.
+//
+// Licensed under the BSD 3-Clause License  (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://opensource.org/licenses/BSD-3-Clause
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef PERCEPTION_FUSED_OPS_CSRC_FUNCTIONS_H_
+#define PERCEPTION_FUSED_OPS_CSRC_FUNCTIONS_H_
+#include <ATen/Tensor.h>
+#include <torch/library.h>
+
+at::Tensor npu_max_pool2d(const at::Tensor& x, int kernel_size, int stride, int padding);
+
+at::Tensor npu_multi_scale_deformable_attn_function(const at::Tensor& value, const at::Tensor& value_spatial_shapes,
+    const at::Tensor& value_level_start_index, const at::Tensor& sampling_locations,
+    const at::Tensor& attention_weights);
+
+std::tuple<at::Tensor, at::Tensor, at::Tensor> multi_scale_deformable_attn_grad(const at::Tensor& value,
+    const at::Tensor& shape, const at::Tensor& level_start_index, const at::Tensor& location_trans,
+    const at::Tensor& attn_weight_trans, const at::Tensor& grad_output);
+
+std::tuple<at::Tensor, at::Tensor, at::Tensor> multi_scale_deformable_attn_grad_v2(const at::Tensor& value,
+    const at::Tensor& shape, const at::Tensor& level_start_index, const at::Tensor& location_trans,
+    const at::Tensor& attn_weight_trans, const at::Tensor& grad_output);
+
+at::Tensor npu_add_relu(at::Tensor& x, const at::Tensor& y);
+
+at::Tensor npu_add_relu_grad(at::Tensor& self, at::Tensor& grad_output);
+std::tuple<at::Tensor, at::Tensor> npu_scatter_mean(at::Tensor& src, at::Tensor& index,
+                                                    c10::optional<at::Tensor> out, c10::optional<int> dim,
+                                                    c10::optional<int> dim_size);
+
+at::Tensor fused_bias_leaky_relu(const at::Tensor& x, const at::Tensor& bias, const double negative_slop, const double scale);
+
+at::Tensor deformable_aggregation(const at::Tensor& mc_ms_feat, const at::Tensor& spatial_shape,
+    const at::Tensor& scale_start_index, const at::Tensor& sampling_location, const at::Tensor& weights);
+std::tuple<at::Tensor, at::Tensor, at::Tensor> deformable_aggregation_grad(
+    const at::Tensor& mc_ms_feat,
+    const at::Tensor& spatial_shape,
+    const at::Tensor& scale_start_index,
+    const at::Tensor& sampling_location,
+    const at::Tensor& weights,
+    const at::Tensor& grad_output,
+    const at::Tensor& grad_mc_ms_feat,
+    const at::Tensor& grad_sampling_location,
+    const at::Tensor& grad_weights
+);
+
+#endif // PERCEPTION_FUSED_OPS_CSRC_FUNCTIONS_H_
diff --git a/mx_driving/fused/ops/csrc/pybind.cpp b/mx_driving/fused/ops/csrc/pybind.cpp
new file mode 100644
index 00000000..80819b12
--- /dev/null
+++ b/mx_driving/fused/ops/csrc/pybind.cpp
@@ -0,0 +1,25 @@
+#include "csrc/pybind.h"
+
+#include <torch/extension.h>
+
+#include "functions.h"
+void init_fused(pybind11::module& m)
+{
+    // nnpu_max_pool2d
+    m.def("npu_max_pool2d", &npu_max_pool2d);
+    // npu_multi_scale_deformable_attn_function
+    m.def("npu_multi_scale_deformable_attn_function", &npu_multi_scale_deformable_attn_function);
+    m.def("multi_scale_deformable_attn_grad", &multi_scale_deformable_attn_grad);
+    m.def("multi_scale_deformable_attn_grad_v2", &multi_scale_deformable_attn_grad_v2);
+
+    // npu_add_relu
+    m.def("npu_add_relu", &npu_add_relu);
+    m.def("npu_add_relu_grad", &npu_add_relu_grad);
+
+    // fused_bias_leaky_relu
+    m.def("fused_bias_leaky_relu", &fused_bias_leaky_relu);
+    
+    // npu_deformable_aggregation
+    m.def("npu_deformable_aggregation", &deformable_aggregation);
+    m.def("npu_deformable_aggregation_grad", &deformable_aggregation_grad);
+}
diff --git a/mx_driving/common/ops/fused_bias_leaky_relu.py b/mx_driving/fused/ops/fused_bias_leaky_relu.py
similarity index 100%
rename from mx_driving/common/ops/fused_bias_leaky_relu.py
rename to mx_driving/fused/ops/fused_bias_leaky_relu.py
diff --git a/mx_driving/perception/fused/ops/kernels/CMakeLists.txt b/mx_driving/fused/ops/kernels/CMakeLists.txt
similarity index 100%
rename from mx_driving/perception/fused/ops/kernels/CMakeLists.txt
rename to mx_driving/fused/ops/kernels/CMakeLists.txt
diff --git a/mx_driving/perception/fused/ops/kernels/README.md b/mx_driving/fused/ops/kernels/README.md
similarity index 100%
rename from mx_driving/perception/fused/ops/kernels/README.md
rename to mx_driving/fused/ops/kernels/README.md
diff --git a/mx_driving/perception/point/ops/kernels/op_host/CMakeLists.txt b/mx_driving/fused/ops/kernels/op_host/CMakeLists.txt
similarity index 100%
rename from mx_driving/perception/point/ops/kernels/op_host/CMakeLists.txt
rename to mx_driving/fused/ops/kernels/op_host/CMakeLists.txt
diff --git a/mx_driving/common/ops/kernels/op_host/add_relu.cpp b/mx_driving/fused/ops/kernels/op_host/add_relu.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/add_relu.cpp
rename to mx_driving/fused/ops/kernels/op_host/add_relu.cpp
diff --git a/mx_driving/common/ops/kernels/op_host/add_relu_tiling.h b/mx_driving/fused/ops/kernels/op_host/add_relu_tiling.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/add_relu_tiling.h
rename to mx_driving/fused/ops/kernels/op_host/add_relu_tiling.h
diff --git a/mx_driving/fused/ops/kernels/op_host/common.h b/mx_driving/fused/ops/kernels/op_host/common.h
new file mode 100644
index 00000000..4580dff5
--- /dev/null
+++ b/mx_driving/fused/ops/kernels/op_host/common.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
+ */
+#ifndef COMMON_H
+#define COMMON_H
+
+#include "register/op_def_registry.h"
+#include "tiling/platform/platform_ascendc.h"
+#include "tiling/tiling_api.h"
+#include "register/tilingdata_base.h"
+
+inline uint32_t ceil_multiple(uint32_t num, uint32_t block)
+{
+    if (block == 0) {
+        return 0;
+    }
+    return (num + block - 1) / block;
+}
+
+inline uint32_t ceil_value(uint32_t num, uint32_t block)
+{
+    if (block == 0) {
+        return 0;
+    }
+    return ((num + block - 1) / block) * block;
+}
+
+#endif // COMMON_H
diff --git a/mx_driving/common/ops/kernels/op_host/deformable_aggregation.cpp b/mx_driving/fused/ops/kernels/op_host/deformable_aggregation.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/deformable_aggregation.cpp
rename to mx_driving/fused/ops/kernels/op_host/deformable_aggregation.cpp
diff --git a/mx_driving/common/ops/kernels/op_host/deformable_aggregation_grad.cpp b/mx_driving/fused/ops/kernels/op_host/deformable_aggregation_grad.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/deformable_aggregation_grad.cpp
rename to mx_driving/fused/ops/kernels/op_host/deformable_aggregation_grad.cpp
diff --git a/mx_driving/common/ops/kernels/op_host/deformable_aggregation_grad_tiling.h b/mx_driving/fused/ops/kernels/op_host/deformable_aggregation_grad_tiling.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/deformable_aggregation_grad_tiling.h
rename to mx_driving/fused/ops/kernels/op_host/deformable_aggregation_grad_tiling.h
diff --git a/mx_driving/common/ops/kernels/op_host/deformable_aggregation_tiling.h b/mx_driving/fused/ops/kernels/op_host/deformable_aggregation_tiling.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/deformable_aggregation_tiling.h
rename to mx_driving/fused/ops/kernels/op_host/deformable_aggregation_tiling.h
diff --git a/mx_driving/common/ops/kernels/op_host/fused_bias_leaky_relu.cpp b/mx_driving/fused/ops/kernels/op_host/fused_bias_leaky_relu.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/fused_bias_leaky_relu.cpp
rename to mx_driving/fused/ops/kernels/op_host/fused_bias_leaky_relu.cpp
diff --git a/mx_driving/common/ops/kernels/op_host/fused_bias_leaky_relu_tiling.h b/mx_driving/fused/ops/kernels/op_host/fused_bias_leaky_relu_tiling.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/fused_bias_leaky_relu_tiling.h
rename to mx_driving/fused/ops/kernels/op_host/fused_bias_leaky_relu_tiling.h
diff --git a/mx_driving/common/ops/kernels/op_host/max_pool2d.cpp b/mx_driving/fused/ops/kernels/op_host/max_pool2d.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/max_pool2d.cpp
rename to mx_driving/fused/ops/kernels/op_host/max_pool2d.cpp
diff --git a/mx_driving/common/ops/kernels/op_host/max_pool2d.h b/mx_driving/fused/ops/kernels/op_host/max_pool2d.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/max_pool2d.h
rename to mx_driving/fused/ops/kernels/op_host/max_pool2d.h
diff --git a/mx_driving/common/ops/kernels/op_host/multi_scale_deformable_attn.cpp b/mx_driving/fused/ops/kernels/op_host/multi_scale_deformable_attn.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/multi_scale_deformable_attn.cpp
rename to mx_driving/fused/ops/kernels/op_host/multi_scale_deformable_attn.cpp
diff --git a/mx_driving/common/ops/kernels/op_host/multi_scale_deformable_attn_grad.cpp b/mx_driving/fused/ops/kernels/op_host/multi_scale_deformable_attn_grad.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/multi_scale_deformable_attn_grad.cpp
rename to mx_driving/fused/ops/kernels/op_host/multi_scale_deformable_attn_grad.cpp
diff --git a/mx_driving/common/ops/kernels/op_host/multi_scale_deformable_attn_grad_tiling.h b/mx_driving/fused/ops/kernels/op_host/multi_scale_deformable_attn_grad_tiling.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/multi_scale_deformable_attn_grad_tiling.h
rename to mx_driving/fused/ops/kernels/op_host/multi_scale_deformable_attn_grad_tiling.h
diff --git a/mx_driving/common/ops/kernels/op_host/multi_scale_deformable_attn_grad_tiling_v2.h b/mx_driving/fused/ops/kernels/op_host/multi_scale_deformable_attn_grad_tiling_v2.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/multi_scale_deformable_attn_grad_tiling_v2.h
rename to mx_driving/fused/ops/kernels/op_host/multi_scale_deformable_attn_grad_tiling_v2.h
diff --git a/mx_driving/common/ops/kernels/op_host/multi_scale_deformable_attn_grad_v2.cpp b/mx_driving/fused/ops/kernels/op_host/multi_scale_deformable_attn_grad_v2.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/multi_scale_deformable_attn_grad_v2.cpp
rename to mx_driving/fused/ops/kernels/op_host/multi_scale_deformable_attn_grad_v2.cpp
diff --git a/mx_driving/common/ops/kernels/op_host/multi_scale_deformable_attn_tiling.h b/mx_driving/fused/ops/kernels/op_host/multi_scale_deformable_attn_tiling.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/multi_scale_deformable_attn_tiling.h
rename to mx_driving/fused/ops/kernels/op_host/multi_scale_deformable_attn_tiling.h
diff --git a/mx_driving/perception/vision/ops/kernels/op_kernel/CMakeLists.txt b/mx_driving/fused/ops/kernels/op_kernel/CMakeLists.txt
similarity index 100%
rename from mx_driving/perception/vision/ops/kernels/op_kernel/CMakeLists.txt
rename to mx_driving/fused/ops/kernels/op_kernel/CMakeLists.txt
diff --git a/mx_driving/common/ops/kernels/op_kernel/add_relu.cpp b/mx_driving/fused/ops/kernels/op_kernel/add_relu.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/add_relu.cpp
rename to mx_driving/fused/ops/kernels/op_kernel/add_relu.cpp
diff --git a/mx_driving/perception/fused/ops/kernels/op_kernel/common.h b/mx_driving/fused/ops/kernels/op_kernel/common.h
similarity index 100%
rename from mx_driving/perception/fused/ops/kernels/op_kernel/common.h
rename to mx_driving/fused/ops/kernels/op_kernel/common.h
diff --git a/mx_driving/common/ops/kernels/op_kernel/deformable_aggregation.cpp b/mx_driving/fused/ops/kernels/op_kernel/deformable_aggregation.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/deformable_aggregation.cpp
rename to mx_driving/fused/ops/kernels/op_kernel/deformable_aggregation.cpp
diff --git a/mx_driving/common/ops/kernels/op_kernel/deformable_aggregation_grad.cpp b/mx_driving/fused/ops/kernels/op_kernel/deformable_aggregation_grad.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/deformable_aggregation_grad.cpp
rename to mx_driving/fused/ops/kernels/op_kernel/deformable_aggregation_grad.cpp
diff --git a/mx_driving/common/ops/kernels/op_kernel/fused_bias_leaky_relu.cpp b/mx_driving/fused/ops/kernels/op_kernel/fused_bias_leaky_relu.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/fused_bias_leaky_relu.cpp
rename to mx_driving/fused/ops/kernels/op_kernel/fused_bias_leaky_relu.cpp
diff --git a/mx_driving/common/ops/kernels/op_kernel/fused_bias_leaky_relu.h b/mx_driving/fused/ops/kernels/op_kernel/fused_bias_leaky_relu.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/fused_bias_leaky_relu.h
rename to mx_driving/fused/ops/kernels/op_kernel/fused_bias_leaky_relu.h
diff --git a/mx_driving/common/ops/kernels/op_kernel/max_pool2d.cpp b/mx_driving/fused/ops/kernels/op_kernel/max_pool2d.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/max_pool2d.cpp
rename to mx_driving/fused/ops/kernels/op_kernel/max_pool2d.cpp
diff --git a/mx_driving/common/ops/kernels/op_kernel/ms_deform_attn_generic.h b/mx_driving/fused/ops/kernels/op_kernel/ms_deform_attn_generic.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/ms_deform_attn_generic.h
rename to mx_driving/fused/ops/kernels/op_kernel/ms_deform_attn_generic.h
diff --git a/mx_driving/common/ops/kernels/op_kernel/ms_deform_attn_grad_generic.h b/mx_driving/fused/ops/kernels/op_kernel/ms_deform_attn_grad_generic.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/ms_deform_attn_grad_generic.h
rename to mx_driving/fused/ops/kernels/op_kernel/ms_deform_attn_grad_generic.h
diff --git a/mx_driving/common/ops/kernels/op_kernel/ms_deform_attn_grad_generic_v2.h b/mx_driving/fused/ops/kernels/op_kernel/ms_deform_attn_grad_generic_v2.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/ms_deform_attn_grad_generic_v2.h
rename to mx_driving/fused/ops/kernels/op_kernel/ms_deform_attn_grad_generic_v2.h
diff --git a/mx_driving/common/ops/kernels/op_kernel/ms_deform_attn_grad_high_perf.h b/mx_driving/fused/ops/kernels/op_kernel/ms_deform_attn_grad_high_perf.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/ms_deform_attn_grad_high_perf.h
rename to mx_driving/fused/ops/kernels/op_kernel/ms_deform_attn_grad_high_perf.h
diff --git a/mx_driving/common/ops/kernels/op_kernel/ms_deform_attn_grad_high_perf_v2.h b/mx_driving/fused/ops/kernels/op_kernel/ms_deform_attn_grad_high_perf_v2.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/ms_deform_attn_grad_high_perf_v2.h
rename to mx_driving/fused/ops/kernels/op_kernel/ms_deform_attn_grad_high_perf_v2.h
diff --git a/mx_driving/common/ops/kernels/op_kernel/ms_deform_attn_high_perf.h b/mx_driving/fused/ops/kernels/op_kernel/ms_deform_attn_high_perf.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/ms_deform_attn_high_perf.h
rename to mx_driving/fused/ops/kernels/op_kernel/ms_deform_attn_high_perf.h
diff --git a/mx_driving/common/ops/kernels/op_kernel/multi_scale_deformable_attn.cpp b/mx_driving/fused/ops/kernels/op_kernel/multi_scale_deformable_attn.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/multi_scale_deformable_attn.cpp
rename to mx_driving/fused/ops/kernels/op_kernel/multi_scale_deformable_attn.cpp
diff --git a/mx_driving/common/ops/kernels/op_kernel/multi_scale_deformable_attn_grad.cpp b/mx_driving/fused/ops/kernels/op_kernel/multi_scale_deformable_attn_grad.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/multi_scale_deformable_attn_grad.cpp
rename to mx_driving/fused/ops/kernels/op_kernel/multi_scale_deformable_attn_grad.cpp
diff --git a/mx_driving/common/ops/kernels/op_kernel/multi_scale_deformable_attn_grad_v2.cpp b/mx_driving/fused/ops/kernels/op_kernel/multi_scale_deformable_attn_grad_v2.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/multi_scale_deformable_attn_grad_v2.cpp
rename to mx_driving/fused/ops/kernels/op_kernel/multi_scale_deformable_attn_grad_v2.cpp
diff --git a/mx_driving/common/ops/npu_add_relu.py b/mx_driving/fused/ops/npu_add_relu.py
similarity index 100%
rename from mx_driving/common/ops/npu_add_relu.py
rename to mx_driving/fused/ops/npu_add_relu.py
diff --git a/mx_driving/common/ops/npu_deformable_aggregation.py b/mx_driving/fused/ops/npu_deformable_aggregation.py
similarity index 100%
rename from mx_driving/common/ops/npu_deformable_aggregation.py
rename to mx_driving/fused/ops/npu_deformable_aggregation.py
diff --git a/mx_driving/common/ops/npu_max_pool2d.py b/mx_driving/fused/ops/npu_max_pool2d.py
similarity index 99%
rename from mx_driving/common/ops/npu_max_pool2d.py
rename to mx_driving/fused/ops/npu_max_pool2d.py
index 981b2888..38b68afb 100644
--- a/mx_driving/common/ops/npu_max_pool2d.py
+++ b/mx_driving/fused/ops/npu_max_pool2d.py
@@ -9,6 +9,7 @@ Modification 1. Add support for Ascend NPU
 from torch.autograd import Function
 import ads_c
 
+
 class MaxPool2d(Function):
     @staticmethod
     # 'pylint: disable=too-many-arguments,huawei-too-many-arguments
diff --git a/mx_driving/common/ops/npu_multi_scale_deformable_attn_function.py b/mx_driving/fused/ops/npu_multi_scale_deformable_attn_function.py
similarity index 100%
rename from mx_driving/common/ops/npu_multi_scale_deformable_attn_function.py
rename to mx_driving/fused/ops/npu_multi_scale_deformable_attn_function.py
diff --git a/mx_driving/motion/ops/csrc/pybind.cpp b/mx_driving/motion/ops/csrc/pybind.cpp
deleted file mode 100644
index 7b362419..00000000
--- a/mx_driving/motion/ops/csrc/pybind.cpp
+++ /dev/null
@@ -1,5 +0,0 @@
-#include <torch/extension.h>
-#include "csrc/pybind.h"
-
-void init_motion(pybind11::module& m) {
-}
diff --git a/mx_driving/motion/ops/kernels/op_kernel/CMakeLists.txt b/mx_driving/motion/ops/kernels/op_kernel/CMakeLists.txt
deleted file mode 100644
index e69de29b..00000000
diff --git a/mx_driving/perception/CMakeLists.txt b/mx_driving/perception/CMakeLists.txt
deleted file mode 100644
index c8777acb..00000000
--- a/mx_driving/perception/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/fused/ops/kernels)
-  add_subdirectory(fused/ops/kernels)
-endif()
-if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/point/ops/kernels)
-  add_subdirectory(point/ops/kernels)
-endif()
-if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/vision/ops/kernels)
-  add_subdirectory(vision/ops/kernels)
-endif()
diff --git a/mx_driving/perception/fused/__init__.py b/mx_driving/perception/fused/__init__.py
deleted file mode 100644
index 30f8aefb..00000000
--- a/mx_driving/perception/fused/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-from .ops.bev_pool import bev_pool
-from .ops.bev_pool_v2 import bev_pool_v2
\ No newline at end of file
diff --git a/mx_driving/perception/fused/ops/csrc/pybind.cpp b/mx_driving/perception/fused/ops/csrc/pybind.cpp
deleted file mode 100644
index d4001c57..00000000
--- a/mx_driving/perception/fused/ops/csrc/pybind.cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-#include "csrc/pybind.h"
-
-#include <torch/extension.h>
-
-#include "functions.h"
-void init_perception_fused(pybind11::module& m)
-{
-    // bev_pool
-    m.def("npu_bev_pool", &npu_bev_pool, "npu_bev_pool NPU version");
-    m.def("npu_bev_pool_backward", &npu_bev_pool_backward, "npu_bev_pool_backward NPU version");
-    m.def("npu_bev_pool_v2", &npu_bev_pool_v2, "npu_bev_pool_v2 NPU version");
-    m.def("npu_bev_pool_v2_backward", &npu_bev_pool_v2_backward, "npu_bev_pool_v2_backward NPU version");
-}
diff --git a/mx_driving/perception/point/__init__.py b/mx_driving/perception/point/__init__.py
deleted file mode 100644
index 218b2f73..00000000
--- a/mx_driving/perception/point/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-from .ops.group_points import npu_group_points
-
diff --git a/mx_driving/perception/vision/__init__.py b/mx_driving/perception/vision/__init__.py
deleted file mode 100644
index 586c264f..00000000
--- a/mx_driving/perception/vision/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .ops.boxes_overlap_bev import boxes_overlap_bev
diff --git a/mx_driving/perception/vision/ops/__init__.py b/mx_driving/perception/vision/ops/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/mx_driving/perception/vision/ops/csrc/README.md b/mx_driving/perception/vision/ops/csrc/README.md
deleted file mode 100644
index 0bbe4f39..00000000
--- a/mx_driving/perception/vision/ops/csrc/README.md
+++ /dev/null
@@ -1,2 +0,0 @@
-## Description
-+ The folder contains some cpp source files, which provide code for adaptation of ascend kernels. It provide links for kernels and cpp interfaces.
\ No newline at end of file
diff --git a/mx_driving/perception/vision/ops/kernels/README.md b/mx_driving/perception/vision/ops/kernels/README.md
deleted file mode 100644
index 214fb0a6..00000000
--- a/mx_driving/perception/vision/ops/kernels/README.md
+++ /dev/null
@@ -1,2 +0,0 @@
-## Description
-+ The folder contains some ascend-kernel source files, which are like cuda-kernels and supply some ops that can be run on ascend device.
\ No newline at end of file
diff --git a/mx_driving/point/CMakeLists.txt b/mx_driving/point/CMakeLists.txt
new file mode 100644
index 00000000..621d1fa9
--- /dev/null
+++ b/mx_driving/point/CMakeLists.txt
@@ -0,0 +1,3 @@
+if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/ops/kernels)
+  add_subdirectory(ops/kernels)
+endif()
diff --git a/mx_driving/point/__init__.py b/mx_driving/point/__init__.py
new file mode 100644
index 00000000..16f62bef
--- /dev/null
+++ b/mx_driving/point/__init__.py
@@ -0,0 +1,8 @@
+from .ops.group_points import npu_group_points
+from .ops.bev_pool import bev_pool
+from .ops.bev_pool_v2 import bev_pool_v2
+from .ops.furthest_point_sampling_with_dist import furthest_point_sample_with_dist
+from .ops.furthest_point_sampling import npu_furthest_point_sampling
+from .ops.npu_dynamic_scatter import npu_dynamic_scatter
+from .ops.voxelization import voxelization, Voxelization
+from .ops.voxel_pooling_train import npu_voxel_pooling_train
\ No newline at end of file
diff --git a/mx_driving/perception/vision/components/README.md b/mx_driving/point/components/README.md
similarity index 100%
rename from mx_driving/perception/vision/components/README.md
rename to mx_driving/point/components/README.md
diff --git a/mx_driving/perception/point/ops/__init__.py b/mx_driving/point/ops/__init__.py
similarity index 100%
rename from mx_driving/perception/point/ops/__init__.py
rename to mx_driving/point/ops/__init__.py
diff --git a/mx_driving/perception/fused/ops/bev_pool.py b/mx_driving/point/ops/bev_pool.py
similarity index 100%
rename from mx_driving/perception/fused/ops/bev_pool.py
rename to mx_driving/point/ops/bev_pool.py
diff --git a/mx_driving/perception/fused/ops/bev_pool_v2.py b/mx_driving/point/ops/bev_pool_v2.py
similarity index 100%
rename from mx_driving/perception/fused/ops/bev_pool_v2.py
rename to mx_driving/point/ops/bev_pool_v2.py
diff --git a/mx_driving/perception/fused/ops/csrc/BEVPool.cpp b/mx_driving/point/ops/csrc/BEVPool.cpp
similarity index 100%
rename from mx_driving/perception/fused/ops/csrc/BEVPool.cpp
rename to mx_driving/point/ops/csrc/BEVPool.cpp
diff --git a/mx_driving/perception/fused/ops/csrc/BEVPoolBackward.cpp b/mx_driving/point/ops/csrc/BEVPoolBackward.cpp
similarity index 100%
rename from mx_driving/perception/fused/ops/csrc/BEVPoolBackward.cpp
rename to mx_driving/point/ops/csrc/BEVPoolBackward.cpp
diff --git a/mx_driving/perception/fused/ops/csrc/BEVPoolV2.cpp b/mx_driving/point/ops/csrc/BEVPoolV2.cpp
similarity index 100%
rename from mx_driving/perception/fused/ops/csrc/BEVPoolV2.cpp
rename to mx_driving/point/ops/csrc/BEVPoolV2.cpp
diff --git a/mx_driving/perception/fused/ops/csrc/BEVPoolV2Backward.cpp b/mx_driving/point/ops/csrc/BEVPoolV2Backward.cpp
similarity index 100%
rename from mx_driving/perception/fused/ops/csrc/BEVPoolV2Backward.cpp
rename to mx_driving/point/ops/csrc/BEVPoolV2Backward.cpp
diff --git a/mx_driving/common/ops/csrc/DynamicScatter.cpp b/mx_driving/point/ops/csrc/DynamicScatter.cpp
similarity index 100%
rename from mx_driving/common/ops/csrc/DynamicScatter.cpp
rename to mx_driving/point/ops/csrc/DynamicScatter.cpp
diff --git a/mx_driving/common/ops/csrc/DynamicVoxelization.cpp b/mx_driving/point/ops/csrc/DynamicVoxelization.cpp
similarity index 100%
rename from mx_driving/common/ops/csrc/DynamicVoxelization.cpp
rename to mx_driving/point/ops/csrc/DynamicVoxelization.cpp
diff --git a/mx_driving/common/ops/csrc/FurthestPointSampling.cpp b/mx_driving/point/ops/csrc/FurthestPointSampling.cpp
similarity index 100%
rename from mx_driving/common/ops/csrc/FurthestPointSampling.cpp
rename to mx_driving/point/ops/csrc/FurthestPointSampling.cpp
diff --git a/mx_driving/common/ops/csrc/FurthestPointSamplingWithDist.cpp b/mx_driving/point/ops/csrc/FurthestPointSamplingWithDist.cpp
similarity index 100%
rename from mx_driving/common/ops/csrc/FurthestPointSamplingWithDist.cpp
rename to mx_driving/point/ops/csrc/FurthestPointSamplingWithDist.cpp
diff --git a/mx_driving/perception/point/ops/csrc/GroupPoints.cpp b/mx_driving/point/ops/csrc/GroupPoints.cpp
similarity index 100%
rename from mx_driving/perception/point/ops/csrc/GroupPoints.cpp
rename to mx_driving/point/ops/csrc/GroupPoints.cpp
diff --git a/mx_driving/perception/point/ops/csrc/HardVoxelize.cpp b/mx_driving/point/ops/csrc/HardVoxelize.cpp
similarity index 100%
rename from mx_driving/perception/point/ops/csrc/HardVoxelize.cpp
rename to mx_driving/point/ops/csrc/HardVoxelize.cpp
diff --git a/mx_driving/perception/point/ops/csrc/PointToVoxel.cpp b/mx_driving/point/ops/csrc/PointToVoxel.cpp
similarity index 100%
rename from mx_driving/perception/point/ops/csrc/PointToVoxel.cpp
rename to mx_driving/point/ops/csrc/PointToVoxel.cpp
diff --git a/mx_driving/perception/point/ops/csrc/README.md b/mx_driving/point/ops/csrc/README.md
similarity index 100%
rename from mx_driving/perception/point/ops/csrc/README.md
rename to mx_driving/point/ops/csrc/README.md
diff --git a/mx_driving/perception/point/ops/csrc/UniqueVoxel.cpp b/mx_driving/point/ops/csrc/UniqueVoxel.cpp
similarity index 100%
rename from mx_driving/perception/point/ops/csrc/UniqueVoxel.cpp
rename to mx_driving/point/ops/csrc/UniqueVoxel.cpp
diff --git a/mx_driving/perception/point/ops/csrc/VecPoolBackward.cpp b/mx_driving/point/ops/csrc/VecPoolBackward.cpp
similarity index 100%
rename from mx_driving/perception/point/ops/csrc/VecPoolBackward.cpp
rename to mx_driving/point/ops/csrc/VecPoolBackward.cpp
diff --git a/mx_driving/common/ops/csrc/VoxelPoolingTrain.cpp b/mx_driving/point/ops/csrc/VoxelPoolingTrain.cpp
similarity index 100%
rename from mx_driving/common/ops/csrc/VoxelPoolingTrain.cpp
rename to mx_driving/point/ops/csrc/VoxelPoolingTrain.cpp
diff --git a/mx_driving/perception/point/ops/csrc/VoxelToPoint.cpp b/mx_driving/point/ops/csrc/VoxelToPoint.cpp
similarity index 100%
rename from mx_driving/perception/point/ops/csrc/VoxelToPoint.cpp
rename to mx_driving/point/ops/csrc/VoxelToPoint.cpp
diff --git a/mx_driving/perception/point/ops/csrc/functions.h b/mx_driving/point/ops/csrc/functions.h
similarity index 43%
rename from mx_driving/perception/point/ops/csrc/functions.h
rename to mx_driving/point/ops/csrc/functions.h
index 00127f9c..8c8a43bf 100644
--- a/mx_driving/perception/point/ops/csrc/functions.h
+++ b/mx_driving/point/ops/csrc/functions.h
@@ -39,4 +39,39 @@ std::tuple<int32_t, at::Tensor, at::Tensor, at::Tensor, at::Tensor> unique_voxel
 std::tuple<int32_t, at::Tensor, at::Tensor, at::Tensor> hard_voxelize(const at::Tensor& points,
     const std::vector<float> voxel_sizes, const std::vector<float> coor_ranges, int64_t max_points, int64_t max_voxels);
 
+at::Tensor npu_bev_pool(const at::Tensor& feat, const at::Tensor& geom_feat, const at::Tensor& interval_lengths,
+    const at::Tensor& interval_starts, int64_t b, int64_t d, int64_t h, int64_t w);
+at::Tensor npu_bev_pool_backward(const at::Tensor& grad_out, const at::Tensor& geom_feat,
+    const at::Tensor& interval_lengths, const at::Tensor& interval_starts, int64_t b, int64_t d, int64_t h, int64_t w);
+
+at::Tensor npu_bev_pool_v2(const at::Tensor& depth, const at::Tensor& feat, const at::Tensor& ranks_depth,
+    const at::Tensor& ranks_feat, const at::Tensor& ranks_bev, const at::Tensor& interval_lengths,
+    const at::Tensor& interval_starts, int64_t b, int64_t d, int64_t h, int64_t w);
+std::tuple<at::Tensor, at::Tensor> npu_bev_pool_v2_backward(const at::Tensor& grad_out, const at::Tensor& depth,
+    const at::Tensor& feat, const at::Tensor& ranks_depth, const at::Tensor& ranks_feat, const at::Tensor& ranks_bev,
+    const at::Tensor& interval_lengths, const at::Tensor& interval_starts, int64_t b, int64_t d, int64_t h, int64_t w);
+
+at::Tensor furthest_point_sampling_with_dist(
+    const at::Tensor& points_dist, const at::Tensor& nearest_temp, int32_t num_points);
+
+std::tuple<at::Tensor, at::Tensor> npu_dynamic_scatter(const at::Tensor& feats, const at::Tensor& coors,
+    const at::Tensor& prefix_sum_point_per_voxel, const at::Tensor& argsort_coor, int32_t num_voxels,
+    const char* reduce_type);
+
+void npu_dynamic_scatter_grad(at::Tensor& grad_point_feats, const at::Tensor& grad_voxel_feats,
+    const at::Tensor& prefix_sum_point_per_voxel, const at::Tensor& argsort_coor, const at::Tensor& compare_mask,
+    const char* reduce_type);
+
+at::Tensor npu_furthest_point_sampling(const at::Tensor& point_xyz, const at::Tensor& nearset_temp, int32_t num_points);
+
+std::tuple<at::Tensor&, at::Tensor&> voxel_pooling_train(const at::Tensor& inputFeatures, const at::Tensor& geom,
+    at::Tensor& outputFeatures, at::Tensor& posMemo, int batchSize, int numPoints, int numChannels, int numVoxelX,
+    int numVoxelY, int numVoxelZ);
+
+at::Tensor voxel_pool_train_backward(const at::Tensor& grad_out, const at::Tensor& posMemo, const int64_t batchSize,
+    const int64_t numPoints, const int64_t numChannels, const int64_t h, const int64_t w);
+
+at::Tensor dynamic_voxelization(const at::Tensor& points, at::Tensor& coors, int grid_x, int grid_y, int grid_z,
+    double voxel_x, double voxel_y, double voxel_z, double coors_min_x, double coors_min_y, double coorsMinZ);
+
 #endif // PERCEPTION_POINT_OPS_CSRC_FUNCTIONS_H_
diff --git a/mx_driving/perception/point/ops/csrc/pybind.cpp b/mx_driving/point/ops/csrc/pybind.cpp
similarity index 31%
rename from mx_driving/perception/point/ops/csrc/pybind.cpp
rename to mx_driving/point/ops/csrc/pybind.cpp
index e4a5fe44..98fe18c7 100644
--- a/mx_driving/perception/point/ops/csrc/pybind.cpp
+++ b/mx_driving/point/ops/csrc/pybind.cpp
@@ -4,7 +4,7 @@
 
 #include "functions.h"
 
-void init_perception_point(pybind11::module& m)
+void init_point(pybind11::module& m)
 {
     // group_points
     m.def("group_points", &group_points);
@@ -20,4 +20,27 @@ void init_perception_point(pybind11::module& m)
     m.def("unique_voxel", &unique_voxel);
     
     m.def("hard_voxelize", &hard_voxelize);
+
+    // bev_pool
+    m.def("npu_bev_pool", &npu_bev_pool, "npu_bev_pool NPU version");
+    m.def("npu_bev_pool_backward", &npu_bev_pool_backward, "npu_bev_pool_backward NPU version");
+    m.def("npu_bev_pool_v2", &npu_bev_pool_v2, "npu_bev_pool_v2 NPU version");
+    m.def("npu_bev_pool_v2_backward", &npu_bev_pool_v2_backward, "npu_bev_pool_v2_backward NPU version");
+
+    // furthest_points_sampling_with_dist
+    m.def("furthest_point_sampling_with_dist", &furthest_point_sampling_with_dist);
+
+    // npu_dynamic_scatter
+    m.def("npu_dynamic_scatter", &npu_dynamic_scatter);
+    m.def("npu_dynamic_scatter_grad", &npu_dynamic_scatter_grad);
+
+    // dyn_voxelization
+    m.def("dynamic_voxelization", &dynamic_voxelization);
+
+    // npu_furthest_point_sampling
+    m.def("npu_furthest_point_sampling", &npu_furthest_point_sampling);
+
+    // voxel_pooling
+    m.def("voxel_pooling_train", &voxel_pooling_train);
+    m.def("voxel_pool_train_backward", &voxel_pool_train_backward);
 }
diff --git a/mx_driving/common/ops/furthest_point_sampling.py b/mx_driving/point/ops/furthest_point_sampling.py
similarity index 100%
rename from mx_driving/common/ops/furthest_point_sampling.py
rename to mx_driving/point/ops/furthest_point_sampling.py
diff --git a/mx_driving/common/ops/furthest_point_sampling_with_dist.py b/mx_driving/point/ops/furthest_point_sampling_with_dist.py
similarity index 100%
rename from mx_driving/common/ops/furthest_point_sampling_with_dist.py
rename to mx_driving/point/ops/furthest_point_sampling_with_dist.py
diff --git a/mx_driving/perception/point/ops/group_points.py b/mx_driving/point/ops/group_points.py
similarity index 100%
rename from mx_driving/perception/point/ops/group_points.py
rename to mx_driving/point/ops/group_points.py
diff --git a/mx_driving/perception/vision/ops/kernels/CMakeLists.txt b/mx_driving/point/ops/kernels/CMakeLists.txt
similarity index 100%
rename from mx_driving/perception/vision/ops/kernels/CMakeLists.txt
rename to mx_driving/point/ops/kernels/CMakeLists.txt
diff --git a/mx_driving/perception/point/ops/kernels/README.md b/mx_driving/point/ops/kernels/README.md
similarity index 100%
rename from mx_driving/perception/point/ops/kernels/README.md
rename to mx_driving/point/ops/kernels/README.md
diff --git a/mx_driving/perception/vision/ops/kernels/op_host/CMakeLists.txt b/mx_driving/point/ops/kernels/op_host/CMakeLists.txt
similarity index 100%
rename from mx_driving/perception/vision/ops/kernels/op_host/CMakeLists.txt
rename to mx_driving/point/ops/kernels/op_host/CMakeLists.txt
diff --git a/mx_driving/perception/fused/ops/kernels/op_host/bev_pool.cpp b/mx_driving/point/ops/kernels/op_host/bev_pool.cpp
similarity index 100%
rename from mx_driving/perception/fused/ops/kernels/op_host/bev_pool.cpp
rename to mx_driving/point/ops/kernels/op_host/bev_pool.cpp
diff --git a/mx_driving/perception/fused/ops/kernels/op_host/bev_pool_tiling.h b/mx_driving/point/ops/kernels/op_host/bev_pool_tiling.h
similarity index 100%
rename from mx_driving/perception/fused/ops/kernels/op_host/bev_pool_tiling.h
rename to mx_driving/point/ops/kernels/op_host/bev_pool_tiling.h
diff --git a/mx_driving/common/ops/kernels/op_host/dynamic_scatter.cpp b/mx_driving/point/ops/kernels/op_host/dynamic_scatter.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/dynamic_scatter.cpp
rename to mx_driving/point/ops/kernels/op_host/dynamic_scatter.cpp
diff --git a/mx_driving/common/ops/kernels/op_host/dynamic_scatter_grad.cpp b/mx_driving/point/ops/kernels/op_host/dynamic_scatter_grad.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/dynamic_scatter_grad.cpp
rename to mx_driving/point/ops/kernels/op_host/dynamic_scatter_grad.cpp
diff --git a/mx_driving/common/ops/kernels/op_host/dynamic_scatter_grad_tiling.h b/mx_driving/point/ops/kernels/op_host/dynamic_scatter_grad_tiling.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/dynamic_scatter_grad_tiling.h
rename to mx_driving/point/ops/kernels/op_host/dynamic_scatter_grad_tiling.h
diff --git a/mx_driving/common/ops/kernels/op_host/dynamic_scatter_tiling.h b/mx_driving/point/ops/kernels/op_host/dynamic_scatter_tiling.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/dynamic_scatter_tiling.h
rename to mx_driving/point/ops/kernels/op_host/dynamic_scatter_tiling.h
diff --git a/mx_driving/common/ops/kernels/op_host/dynamic_voxelization.cpp b/mx_driving/point/ops/kernels/op_host/dynamic_voxelization.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/dynamic_voxelization.cpp
rename to mx_driving/point/ops/kernels/op_host/dynamic_voxelization.cpp
diff --git a/mx_driving/common/ops/kernels/op_host/dynamic_voxelization_tiling.h b/mx_driving/point/ops/kernels/op_host/dynamic_voxelization_tiling.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/dynamic_voxelization_tiling.h
rename to mx_driving/point/ops/kernels/op_host/dynamic_voxelization_tiling.h
diff --git a/mx_driving/common/ops/kernels/op_host/furthest_point_sampling.cpp b/mx_driving/point/ops/kernels/op_host/furthest_point_sampling.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/furthest_point_sampling.cpp
rename to mx_driving/point/ops/kernels/op_host/furthest_point_sampling.cpp
diff --git a/mx_driving/common/ops/kernels/op_host/furthest_point_sampling_tiling.h b/mx_driving/point/ops/kernels/op_host/furthest_point_sampling_tiling.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/furthest_point_sampling_tiling.h
rename to mx_driving/point/ops/kernels/op_host/furthest_point_sampling_tiling.h
diff --git a/mx_driving/common/ops/kernels/op_host/furthest_point_sampling_with_dist.cpp b/mx_driving/point/ops/kernels/op_host/furthest_point_sampling_with_dist.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/furthest_point_sampling_with_dist.cpp
rename to mx_driving/point/ops/kernels/op_host/furthest_point_sampling_with_dist.cpp
diff --git a/mx_driving/common/ops/kernels/op_host/furthest_point_sampling_with_dist_tiling.h b/mx_driving/point/ops/kernels/op_host/furthest_point_sampling_with_dist_tiling.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/furthest_point_sampling_with_dist_tiling.h
rename to mx_driving/point/ops/kernels/op_host/furthest_point_sampling_with_dist_tiling.h
diff --git a/mx_driving/perception/point/ops/kernels/op_host/group_points.cpp b/mx_driving/point/ops/kernels/op_host/group_points.cpp
similarity index 100%
rename from mx_driving/perception/point/ops/kernels/op_host/group_points.cpp
rename to mx_driving/point/ops/kernels/op_host/group_points.cpp
diff --git a/mx_driving/perception/point/ops/kernels/op_host/group_points_grad.cpp b/mx_driving/point/ops/kernels/op_host/group_points_grad.cpp
similarity index 100%
rename from mx_driving/perception/point/ops/kernels/op_host/group_points_grad.cpp
rename to mx_driving/point/ops/kernels/op_host/group_points_grad.cpp
diff --git a/mx_driving/perception/point/ops/kernels/op_host/group_points_grad_tiling.h b/mx_driving/point/ops/kernels/op_host/group_points_grad_tiling.h
similarity index 100%
rename from mx_driving/perception/point/ops/kernels/op_host/group_points_grad_tiling.h
rename to mx_driving/point/ops/kernels/op_host/group_points_grad_tiling.h
diff --git a/mx_driving/perception/point/ops/kernels/op_host/group_points_tiling.h b/mx_driving/point/ops/kernels/op_host/group_points_tiling.h
similarity index 100%
rename from mx_driving/perception/point/ops/kernels/op_host/group_points_tiling.h
rename to mx_driving/point/ops/kernels/op_host/group_points_tiling.h
diff --git a/mx_driving/perception/point/ops/kernels/op_host/hard_voxelize.cpp b/mx_driving/point/ops/kernels/op_host/hard_voxelize.cpp
similarity index 100%
rename from mx_driving/perception/point/ops/kernels/op_host/hard_voxelize.cpp
rename to mx_driving/point/ops/kernels/op_host/hard_voxelize.cpp
diff --git a/mx_driving/perception/point/ops/kernels/op_host/hard_voxelize_tiling.h b/mx_driving/point/ops/kernels/op_host/hard_voxelize_tiling.h
similarity index 100%
rename from mx_driving/perception/point/ops/kernels/op_host/hard_voxelize_tiling.h
rename to mx_driving/point/ops/kernels/op_host/hard_voxelize_tiling.h
diff --git a/mx_driving/perception/point/ops/kernels/op_host/point_to_voxel.cpp b/mx_driving/point/ops/kernels/op_host/point_to_voxel.cpp
similarity index 100%
rename from mx_driving/perception/point/ops/kernels/op_host/point_to_voxel.cpp
rename to mx_driving/point/ops/kernels/op_host/point_to_voxel.cpp
diff --git a/mx_driving/perception/point/ops/kernels/op_host/point_to_voxel_tiling.h b/mx_driving/point/ops/kernels/op_host/point_to_voxel_tiling.h
similarity index 100%
rename from mx_driving/perception/point/ops/kernels/op_host/point_to_voxel_tiling.h
rename to mx_driving/point/ops/kernels/op_host/point_to_voxel_tiling.h
diff --git a/mx_driving/perception/point/ops/kernels/op_host/unique_voxel.cpp b/mx_driving/point/ops/kernels/op_host/unique_voxel.cpp
similarity index 100%
rename from mx_driving/perception/point/ops/kernels/op_host/unique_voxel.cpp
rename to mx_driving/point/ops/kernels/op_host/unique_voxel.cpp
diff --git a/mx_driving/perception/point/ops/kernels/op_host/unique_voxel_tiling.h b/mx_driving/point/ops/kernels/op_host/unique_voxel_tiling.h
similarity index 100%
rename from mx_driving/perception/point/ops/kernels/op_host/unique_voxel_tiling.h
rename to mx_driving/point/ops/kernels/op_host/unique_voxel_tiling.h
diff --git a/mx_driving/perception/point/ops/kernels/op_host/vec_pool_grad.cpp b/mx_driving/point/ops/kernels/op_host/vec_pool_grad.cpp
similarity index 100%
rename from mx_driving/perception/point/ops/kernels/op_host/vec_pool_grad.cpp
rename to mx_driving/point/ops/kernels/op_host/vec_pool_grad.cpp
diff --git a/mx_driving/perception/point/ops/kernels/op_host/vec_pool_grad_tiling.h b/mx_driving/point/ops/kernels/op_host/vec_pool_grad_tiling.h
similarity index 100%
rename from mx_driving/perception/point/ops/kernels/op_host/vec_pool_grad_tiling.h
rename to mx_driving/point/ops/kernels/op_host/vec_pool_grad_tiling.h
diff --git a/mx_driving/common/ops/kernels/op_host/voxel_pooling_train.cpp b/mx_driving/point/ops/kernels/op_host/voxel_pooling_train.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/voxel_pooling_train.cpp
rename to mx_driving/point/ops/kernels/op_host/voxel_pooling_train.cpp
diff --git a/mx_driving/common/ops/kernels/op_host/voxel_pooling_train_grad.cpp b/mx_driving/point/ops/kernels/op_host/voxel_pooling_train_grad.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/voxel_pooling_train_grad.cpp
rename to mx_driving/point/ops/kernels/op_host/voxel_pooling_train_grad.cpp
diff --git a/mx_driving/common/ops/kernels/op_host/voxel_pooling_train_grad_tiling.h b/mx_driving/point/ops/kernels/op_host/voxel_pooling_train_grad_tiling.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/voxel_pooling_train_grad_tiling.h
rename to mx_driving/point/ops/kernels/op_host/voxel_pooling_train_grad_tiling.h
diff --git a/mx_driving/common/ops/kernels/op_host/voxel_pooling_train_tiling.h b/mx_driving/point/ops/kernels/op_host/voxel_pooling_train_tiling.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_host/voxel_pooling_train_tiling.h
rename to mx_driving/point/ops/kernels/op_host/voxel_pooling_train_tiling.h
diff --git a/mx_driving/point/ops/kernels/op_kernel/CMakeLists.txt b/mx_driving/point/ops/kernels/op_kernel/CMakeLists.txt
new file mode 100644
index 00000000..c51870f1
--- /dev/null
+++ b/mx_driving/point/ops/kernels/op_kernel/CMakeLists.txt
@@ -0,0 +1,4 @@
+file(GLOB KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/*.h)
+set(ASCEND_KERNEL_SRC
+        ${ASCEND_KERNEL_SRC} ${KERNEL_SRC}
+        CACHE INTERNAL "")
diff --git a/mx_driving/perception/fused/ops/kernels/op_kernel/bev_pool.cpp b/mx_driving/point/ops/kernels/op_kernel/bev_pool.cpp
similarity index 100%
rename from mx_driving/perception/fused/ops/kernels/op_kernel/bev_pool.cpp
rename to mx_driving/point/ops/kernels/op_kernel/bev_pool.cpp
diff --git a/mx_driving/perception/fused/ops/kernels/op_kernel/bev_pool.h b/mx_driving/point/ops/kernels/op_kernel/bev_pool.h
similarity index 100%
rename from mx_driving/perception/fused/ops/kernels/op_kernel/bev_pool.h
rename to mx_driving/point/ops/kernels/op_kernel/bev_pool.h
diff --git a/mx_driving/perception/fused/ops/kernels/op_kernel/bev_pool_grad.cpp b/mx_driving/point/ops/kernels/op_kernel/bev_pool_grad.cpp
similarity index 100%
rename from mx_driving/perception/fused/ops/kernels/op_kernel/bev_pool_grad.cpp
rename to mx_driving/point/ops/kernels/op_kernel/bev_pool_grad.cpp
diff --git a/mx_driving/perception/fused/ops/kernels/op_kernel/bev_pool_v2.cpp b/mx_driving/point/ops/kernels/op_kernel/bev_pool_v2.cpp
similarity index 100%
rename from mx_driving/perception/fused/ops/kernels/op_kernel/bev_pool_v2.cpp
rename to mx_driving/point/ops/kernels/op_kernel/bev_pool_v2.cpp
diff --git a/mx_driving/perception/fused/ops/kernels/op_kernel/bev_pool_v2.h b/mx_driving/point/ops/kernels/op_kernel/bev_pool_v2.h
similarity index 100%
rename from mx_driving/perception/fused/ops/kernels/op_kernel/bev_pool_v2.h
rename to mx_driving/point/ops/kernels/op_kernel/bev_pool_v2.h
diff --git a/mx_driving/perception/fused/ops/kernels/op_kernel/bev_pool_v2_grad.cpp b/mx_driving/point/ops/kernels/op_kernel/bev_pool_v2_grad.cpp
similarity index 100%
rename from mx_driving/perception/fused/ops/kernels/op_kernel/bev_pool_v2_grad.cpp
rename to mx_driving/point/ops/kernels/op_kernel/bev_pool_v2_grad.cpp
diff --git a/mx_driving/point/ops/kernels/op_kernel/common.h b/mx_driving/point/ops/kernels/op_kernel/common.h
new file mode 100644
index 00000000..2041af49
--- /dev/null
+++ b/mx_driving/point/ops/kernels/op_kernel/common.h
@@ -0,0 +1,46 @@
+#ifndef COMMON_H_
+#define COMMON_H_
+
+#include "kernel_operator.h"
+
+constexpr int32_t TILING_ALIGN32B_FLAG = 1;
+constexpr int32_t TILING_FP32_BIT = 1;
+constexpr int32_t TILING_FP16_BIT = 2;
+constexpr int32_t TILING_BF16_BIT = 3;
+
+class TaskIterator {
+public:
+    __aicore__ inline TaskIterator(
+        int32_t blkIdx, int32_t blkDim, int32_t avgTaskNum, int32_t tailTaskNum, int32_t totalTaskNum)
+        : blkIdx_(blkIdx), blkDim_(blkDim), totalTaskNum_(totalTaskNum)
+    {
+        nextIdx_ = blkIdx * avgTaskNum + (blkIdx < tailTaskNum ? blkIdx : tailTaskNum);
+        endIdx_ = nextIdx_ + avgTaskNum + (blkIdx < tailTaskNum ? 1 : 0);
+    }
+
+    __aicore__ inline bool HasNext() const
+    {
+        return nextIdx_ < endIdx_;
+    }
+
+    __aicore__ inline int32_t Next()
+    {
+        return nextIdx_++;
+    }
+
+    __aicore__ inline int32_t GetNext() const
+    {
+        return nextIdx_;
+    }
+
+    __aicore__ inline int32_t GetTaskNum() const
+    {
+        return totalTaskNum_;
+    }
+
+private:
+    int32_t blkIdx_, blkDim_;
+    int32_t nextIdx_, endIdx_;
+    int32_t totalTaskNum_;
+};
+#endif // COMMON_H_
\ No newline at end of file
diff --git a/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter.cpp b/mx_driving/point/ops/kernels/op_kernel/dynamic_scatter.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/dynamic_scatter.cpp
rename to mx_driving/point/ops/kernels/op_kernel/dynamic_scatter.cpp
diff --git a/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_base.h b/mx_driving/point/ops/kernels/op_kernel/dynamic_scatter_base.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_base.h
rename to mx_driving/point/ops/kernels/op_kernel/dynamic_scatter_base.h
diff --git a/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_grad.cpp b/mx_driving/point/ops/kernels/op_kernel/dynamic_scatter_grad.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_grad.cpp
rename to mx_driving/point/ops/kernels/op_kernel/dynamic_scatter_grad.cpp
diff --git a/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_grad_base.h b/mx_driving/point/ops/kernels/op_kernel/dynamic_scatter_grad_base.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_grad_base.h
rename to mx_driving/point/ops/kernels/op_kernel/dynamic_scatter_grad_base.h
diff --git a/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_grad_max.h b/mx_driving/point/ops/kernels/op_kernel/dynamic_scatter_grad_max.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_grad_max.h
rename to mx_driving/point/ops/kernels/op_kernel/dynamic_scatter_grad_max.h
diff --git a/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_grad_mean.h b/mx_driving/point/ops/kernels/op_kernel/dynamic_scatter_grad_mean.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_grad_mean.h
rename to mx_driving/point/ops/kernels/op_kernel/dynamic_scatter_grad_mean.h
diff --git a/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_grad_sum.h b/mx_driving/point/ops/kernels/op_kernel/dynamic_scatter_grad_sum.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_grad_sum.h
rename to mx_driving/point/ops/kernels/op_kernel/dynamic_scatter_grad_sum.h
diff --git a/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_max.h b/mx_driving/point/ops/kernels/op_kernel/dynamic_scatter_max.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_max.h
rename to mx_driving/point/ops/kernels/op_kernel/dynamic_scatter_max.h
diff --git a/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_mean.h b/mx_driving/point/ops/kernels/op_kernel/dynamic_scatter_mean.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_mean.h
rename to mx_driving/point/ops/kernels/op_kernel/dynamic_scatter_mean.h
diff --git a/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_sum.h b/mx_driving/point/ops/kernels/op_kernel/dynamic_scatter_sum.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_sum.h
rename to mx_driving/point/ops/kernels/op_kernel/dynamic_scatter_sum.h
diff --git a/mx_driving/common/ops/kernels/op_kernel/dynamic_voxelization.cpp b/mx_driving/point/ops/kernels/op_kernel/dynamic_voxelization.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/dynamic_voxelization.cpp
rename to mx_driving/point/ops/kernels/op_kernel/dynamic_voxelization.cpp
diff --git a/mx_driving/common/ops/kernels/op_kernel/furthest_point_sampling.cpp b/mx_driving/point/ops/kernels/op_kernel/furthest_point_sampling.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/furthest_point_sampling.cpp
rename to mx_driving/point/ops/kernels/op_kernel/furthest_point_sampling.cpp
diff --git a/mx_driving/common/ops/kernels/op_kernel/furthest_point_sampling.h b/mx_driving/point/ops/kernels/op_kernel/furthest_point_sampling.h
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/furthest_point_sampling.h
rename to mx_driving/point/ops/kernels/op_kernel/furthest_point_sampling.h
diff --git a/mx_driving/common/ops/kernels/op_kernel/furthest_point_sampling_with_dist.cpp b/mx_driving/point/ops/kernels/op_kernel/furthest_point_sampling_with_dist.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/furthest_point_sampling_with_dist.cpp
rename to mx_driving/point/ops/kernels/op_kernel/furthest_point_sampling_with_dist.cpp
diff --git a/mx_driving/perception/point/ops/kernels/op_kernel/group_points.cpp b/mx_driving/point/ops/kernels/op_kernel/group_points.cpp
similarity index 100%
rename from mx_driving/perception/point/ops/kernels/op_kernel/group_points.cpp
rename to mx_driving/point/ops/kernels/op_kernel/group_points.cpp
diff --git a/mx_driving/perception/point/ops/kernels/op_kernel/group_points_grad.cpp b/mx_driving/point/ops/kernels/op_kernel/group_points_grad.cpp
similarity index 100%
rename from mx_driving/perception/point/ops/kernels/op_kernel/group_points_grad.cpp
rename to mx_driving/point/ops/kernels/op_kernel/group_points_grad.cpp
diff --git a/mx_driving/perception/point/ops/kernels/op_kernel/hard_voxelize.cpp b/mx_driving/point/ops/kernels/op_kernel/hard_voxelize.cpp
similarity index 100%
rename from mx_driving/perception/point/ops/kernels/op_kernel/hard_voxelize.cpp
rename to mx_driving/point/ops/kernels/op_kernel/hard_voxelize.cpp
diff --git a/mx_driving/perception/point/ops/kernels/op_kernel/point_to_voxel.cpp b/mx_driving/point/ops/kernels/op_kernel/point_to_voxel.cpp
similarity index 100%
rename from mx_driving/perception/point/ops/kernels/op_kernel/point_to_voxel.cpp
rename to mx_driving/point/ops/kernels/op_kernel/point_to_voxel.cpp
diff --git a/mx_driving/perception/point/ops/kernels/op_kernel/unique_voxel.cpp b/mx_driving/point/ops/kernels/op_kernel/unique_voxel.cpp
similarity index 100%
rename from mx_driving/perception/point/ops/kernels/op_kernel/unique_voxel.cpp
rename to mx_driving/point/ops/kernels/op_kernel/unique_voxel.cpp
diff --git a/mx_driving/perception/point/ops/kernels/op_kernel/vec_pool_grad.cpp b/mx_driving/point/ops/kernels/op_kernel/vec_pool_grad.cpp
similarity index 100%
rename from mx_driving/perception/point/ops/kernels/op_kernel/vec_pool_grad.cpp
rename to mx_driving/point/ops/kernels/op_kernel/vec_pool_grad.cpp
diff --git a/mx_driving/common/ops/kernels/op_kernel/voxel_pooling_train.cpp b/mx_driving/point/ops/kernels/op_kernel/voxel_pooling_train.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/voxel_pooling_train.cpp
rename to mx_driving/point/ops/kernels/op_kernel/voxel_pooling_train.cpp
diff --git a/mx_driving/common/ops/kernels/op_kernel/voxel_pooling_train_grad.cpp b/mx_driving/point/ops/kernels/op_kernel/voxel_pooling_train_grad.cpp
similarity index 100%
rename from mx_driving/common/ops/kernels/op_kernel/voxel_pooling_train_grad.cpp
rename to mx_driving/point/ops/kernels/op_kernel/voxel_pooling_train_grad.cpp
diff --git a/mx_driving/perception/point/ops/kernels/op_kernel/voxel_to_point.cpp b/mx_driving/point/ops/kernels/op_kernel/voxel_to_point.cpp
similarity index 100%
rename from mx_driving/perception/point/ops/kernels/op_kernel/voxel_to_point.cpp
rename to mx_driving/point/ops/kernels/op_kernel/voxel_to_point.cpp
diff --git a/mx_driving/common/ops/npu_dynamic_scatter.py b/mx_driving/point/ops/npu_dynamic_scatter.py
similarity index 100%
rename from mx_driving/common/ops/npu_dynamic_scatter.py
rename to mx_driving/point/ops/npu_dynamic_scatter.py
diff --git a/mx_driving/common/ops/voxel_pooling_train.py b/mx_driving/point/ops/voxel_pooling_train.py
similarity index 100%
rename from mx_driving/common/ops/voxel_pooling_train.py
rename to mx_driving/point/ops/voxel_pooling_train.py
diff --git a/mx_driving/common/ops/voxelization.py b/mx_driving/point/ops/voxelization.py
similarity index 100%
rename from mx_driving/common/ops/voxelization.py
rename to mx_driving/point/ops/voxelization.py
diff --git a/setup.py b/setup.py
index a317752d..1effcb4a 100644
--- a/setup.py
+++ b/setup.py
@@ -12,7 +12,7 @@ from utils import extension
 
 BASE_DIR = os.path.dirname(os.path.realpath(__file__))
 VERSION = torch.__version__
-full_components = ["common", "motion", "perception/fused", "perception/point", "perception/vision", "spconv"]
+full_components = ["common", "data", "fused", "point", "detection", "spconv"]
 source_file = glob.glob(os.path.join("./bind/", "*.cpp"))
 include_dirs = [os.path.join(BASE_DIR, "include")]
 for part in full_components:
diff --git a/tests/torch/test_add_relu.py b/tests/torch/test_add_relu.py
index 210d72da..2a9901bb 100644
--- a/tests/torch/test_add_relu.py
+++ b/tests/torch/test_add_relu.py
@@ -5,7 +5,7 @@ import torch_npu
 import torch.nn.functional as F
 
 from torch_npu.testing.testcase import TestCase, run_tests
-import mx_driving.common
+import mx_driving.fused
 
 DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10]
 
@@ -18,7 +18,7 @@ class TestPointsInBox(TestCase):
         y = np.random.uniform(2.0, 2.0, [1, 100, 3]).astype(np.float32)
         y = torch.from_numpy(y)
         cpu_result = F.relu(x + y)
-        x = mx_driving.common.npu_add_relu(x.npu(), y.npu()).cpu().numpy()
+        x = mx_driving.fused.npu_add_relu(x.npu(), y.npu()).cpu().numpy()
         self.assertRtolEqual(x, cpu_result.numpy())
     
     @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `AddRelu` is only supported on 910B, skip this ut!")
@@ -28,7 +28,7 @@ class TestPointsInBox(TestCase):
         y = np.random.uniform(2.0, 2.0, [18, 256, 232, 400]).astype(np.float32)
         y = torch.from_numpy(y)
         cpu_result = F.relu(x + y)
-        x = mx_driving.common.npu_add_relu(x.npu(), y.npu()).cpu().numpy()
+        x = mx_driving.fused.npu_add_relu(x.npu(), y.npu()).cpu().numpy()
         self.assertRtolEqual(x, cpu_result.numpy())
     
     @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `AddRelu` is only supported on 910B, skip this ut!")
@@ -38,7 +38,7 @@ class TestPointsInBox(TestCase):
         y = np.random.uniform(2.0, 2.0, [18, 256, 232, 400]).astype(np.float16)
         y = torch.from_numpy(y)
         cpu_result = F.relu(x.float() + y.float())
-        x = mx_driving.common.npu_add_relu(x.npu(), y.npu()).cpu().numpy()
+        x = mx_driving.fused.npu_add_relu(x.npu(), y.npu()).cpu().numpy()
         self.assertRtolEqual(x, cpu_result.half().numpy())
     
     @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `AddRelu` is only supported on 910B, skip this ut!")
@@ -48,7 +48,7 @@ class TestPointsInBox(TestCase):
         y = np.random.uniform(2.0, 2.0, [18]).astype(np.float16)
         y = torch.from_numpy(y)
         cpu_result = F.relu(x.float() + y.float())
-        x = mx_driving.common.npu_add_relu(x.npu(), y.npu()).cpu().numpy()
+        x = mx_driving.fused.npu_add_relu(x.npu(), y.npu()).cpu().numpy()
         self.assertRtolEqual(x, cpu_result.half().numpy())
 
 
diff --git a/tests/torch/test_bev_pool.py b/tests/torch/test_bev_pool.py
index f7cbf4f3..f08e92ad 100644
--- a/tests/torch/test_bev_pool.py
+++ b/tests/torch/test_bev_pool.py
@@ -3,7 +3,7 @@ import torch
 import numpy as np
 import torch_npu
 from torch_npu.testing.testcase import TestCase, run_tests
-from mx_driving.perception.fused import bev_pool
+from mx_driving.point import bev_pool
 
 DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10]
 
diff --git a/tests/torch/test_bev_pool_v2.py b/tests/torch/test_bev_pool_v2.py
index ac8c4939..9870e4ff 100644
--- a/tests/torch/test_bev_pool_v2.py
+++ b/tests/torch/test_bev_pool_v2.py
@@ -6,7 +6,7 @@ import torch_npu
 from ads_c import npu_bev_pool_v2_backward
 from torch_npu.testing.testcase import TestCase, run_tests
 
-from mx_driving.perception.fused import bev_pool_v2
+from mx_driving.point import bev_pool_v2
 
 DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10]
 
diff --git a/tests/torch/test_boxes_overlap_bev.py b/tests/torch/test_boxes_overlap_bev.py
index 41ce509d..209cbde5 100644
--- a/tests/torch/test_boxes_overlap_bev.py
+++ b/tests/torch/test_boxes_overlap_bev.py
@@ -6,7 +6,7 @@ import numpy as np
 import torch
 import torch_npu
 from torch_npu.testing.testcase import TestCase, run_tests
-import mx_driving.perception.vision
+import mx_driving.detection
 
 DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10]
 
@@ -284,7 +284,7 @@ class TestBoxesOverlapBev(TestCase):
     def npu_to_exec(self, npu_inputs):
         npu_boxes_a = npu_inputs.boxes_a
         npu_boxes_b = npu_inputs.boxes_b
-        npu_ans_overlap = mx_driving.perception.vision.boxes_overlap_bev(npu_boxes_a, npu_boxes_b)
+        npu_ans_overlap = mx_driving.detection.boxes_overlap_bev(npu_boxes_a, npu_boxes_b)
         return npu_ans_overlap.cpu().float().numpy()
 
     def check_precision(self, actual, expected, rtol=1e-4, atol=1e-4, msg=None):
diff --git a/tests/torch/test_deformable_aggregation.py b/tests/torch/test_deformable_aggregation.py
index 2c22b0a1..a274b789 100644
--- a/tests/torch/test_deformable_aggregation.py
+++ b/tests/torch/test_deformable_aggregation.py
@@ -4,7 +4,7 @@ import numpy as np
 
 import torch_npu
 from torch_npu.testing.testcase import TestCase, run_tests
-import mx_driving.common
+import mx_driving.fused
 
 
 DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10]
@@ -151,7 +151,7 @@ class TestDeformableAggregation(TestCase):
                                                                num_scale, num_embeds, num_groups, num_feat,
                                                                feature_maps, spatial_shape, scale_start_index,
                                                                sample_location, weights)
-                            out_npu = mx_driving.common.npu_deformable_aggregation(torch_feature_maps,
+                            out_npu = mx_driving.fused.npu_deformable_aggregation(torch_feature_maps,
                                                                                    torch_spatial_shape,
                                                                                    torch_scale_start_index,
                                                                                    torch_sample_location,
diff --git a/tests/torch/test_deformable_aggregation_grad.py b/tests/torch/test_deformable_aggregation_grad.py
index aec4c8be..8c2bbae4 100644
--- a/tests/torch/test_deformable_aggregation_grad.py
+++ b/tests/torch/test_deformable_aggregation_grad.py
@@ -5,7 +5,7 @@ import torch
 import torch_npu
 from torch_npu.testing.testcase import TestCase, run_tests
 
-import mx_driving.common
+import mx_driving.fused
 
 
 DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10]
@@ -227,7 +227,7 @@ class TestDeformableAggregation(TestCase):
                                 grad_weights,
                             )
 
-                            out_npu = mx_driving.common.npu_deformable_aggregation(
+                            out_npu = mx_driving.fused.npu_deformable_aggregation(
                                 torch_feature_maps,
                                 torch_spatial_shape,
                                 torch_scale_start_index,
diff --git a/tests/torch/test_furthest_point_sample_with_dist.py b/tests/torch/test_furthest_point_sample_with_dist.py
index e6302881..9f13d8c3 100644
--- a/tests/torch/test_furthest_point_sample_with_dist.py
+++ b/tests/torch/test_furthest_point_sample_with_dist.py
@@ -18,7 +18,7 @@ import numpy as np
 
 import torch_npu
 from torch_npu.testing.testcase import TestCase, run_tests
-import mx_driving.common
+import mx_driving.point
 
 DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10]
 
@@ -65,7 +65,7 @@ class TestFurthestPointSampleWithDist(TestCase):
 
     def custom_op_exec(self, point_dist, point_num, input_dtype):
         point_dist_npu = torch.tensor(point_dist, dtype=input_dtype).npu()
-        output = mx_driving.common.furthest_point_sample_with_dist(point_dist_npu, point_num)
+        output = mx_driving.point.furthest_point_sample_with_dist(point_dist_npu, point_num)
         return output.cpu().numpy()
 
     @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `FurthestPointSampleWithDist` is only supported on 910B, skip this ut!")
diff --git a/tests/torch/test_furthest_point_sampling.py b/tests/torch/test_furthest_point_sampling.py
index a822d2fe..a637d8eb 100644
--- a/tests/torch/test_furthest_point_sampling.py
+++ b/tests/torch/test_furthest_point_sampling.py
@@ -20,7 +20,7 @@ import torch
 import torch_npu
 from torch_npu.testing.testcase import TestCase, run_tests
 from torch_npu.testing.common_utils import create_common_tensor
-import mx_driving.common
+import mx_driving.point
 
 DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10]
 
@@ -142,7 +142,7 @@ class TestFurthestPointSample(TestCase):
         return myTest.getCpuRes()
 
     def npu_op_exec(self, myTest):
-        return mx_driving.common.npu_furthest_point_sampling(myTest.point.clone().permute(0, 2, 1).npu(), myTest.numPoints)
+        return mx_driving.point.npu_furthest_point_sampling(myTest.point.clone().permute(0, 2, 1).npu(), myTest.numPoints)
 
     def compare_res(self, myTest):
         myTest.createData()
diff --git a/tests/torch/test_fused_bias_leaky_relu.py b/tests/torch/test_fused_bias_leaky_relu.py
index 731a6635..cfd7f15e 100644
--- a/tests/torch/test_fused_bias_leaky_relu.py
+++ b/tests/torch/test_fused_bias_leaky_relu.py
@@ -5,7 +5,7 @@ import torch_npu
 import torch.nn.functional as F
 
 from torch_npu.testing.testcase import TestCase, run_tests
-import mx_driving.common
+import mx_driving.fused
 
 DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10]
 
@@ -27,7 +27,7 @@ class TestFusedBiasLeakyRelu(TestCase):
         cpu_result = F.leaky_relu(x + bias, negative_slop)
         cpu_result = cpu_result * scale
 
-        npu_result = mx_driving.common.npu_fused_bias_leaky_relu(x.npu(), bias.npu(), negative_slop, scale).cpu().numpy()
+        npu_result = mx_driving.fused.npu_fused_bias_leaky_relu(x.npu(), bias.npu(), negative_slop, scale).cpu().numpy()
         self.assertRtolEqual(npu_result, cpu_result.numpy())
     
     @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `FusedBiasLeakyRelu` is only supported on 910B, skip this ut!")
@@ -40,7 +40,7 @@ class TestFusedBiasLeakyRelu(TestCase):
         cpu_result = F.leaky_relu(x + bias, negative_slop)
         cpu_result = cpu_result * scale
 
-        npu_result = mx_driving.common.npu_fused_bias_leaky_relu(x.npu(), bias.npu(), negative_slop, scale).cpu().numpy()
+        npu_result = mx_driving.fused.npu_fused_bias_leaky_relu(x.npu(), bias.npu(), negative_slop, scale).cpu().numpy()
         self.assertRtolEqual(npu_result, cpu_result.numpy())
     
     @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `FusedBiasLeakyRelu` is only supported on 910B, skip this ut!")
@@ -53,7 +53,7 @@ class TestFusedBiasLeakyRelu(TestCase):
         cpu_result = F.leaky_relu(x.float() + bias.float(), negative_slop)
         cpu_result = cpu_result * scale
 
-        npu_result = mx_driving.common.npu_fused_bias_leaky_relu(x.npu(), bias.npu(), negative_slop, scale).cpu().numpy()
+        npu_result = mx_driving.fused.npu_fused_bias_leaky_relu(x.npu(), bias.npu(), negative_slop, scale).cpu().numpy()
         self.assertRtolEqual(npu_result, cpu_result.half().numpy())
     
     @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `FusedBiasLeakyRelu` is only supported on 910B, skip this ut!")
@@ -66,7 +66,7 @@ class TestFusedBiasLeakyRelu(TestCase):
         cpu_result = F.leaky_relu(x.float() + bias.float(), negative_slop)
         cpu_result = cpu_result * scale
 
-        npu_result = mx_driving.common.npu_fused_bias_leaky_relu(x.npu(), bias.npu(), negative_slop, scale).cpu().numpy()
+        npu_result = mx_driving.fused.npu_fused_bias_leaky_relu(x.npu(), bias.npu(), negative_slop, scale).cpu().numpy()
         self.assertRtolEqual(npu_result, cpu_result.half().numpy())
 
 
diff --git a/tests/torch/test_group_points.py b/tests/torch/test_group_points.py
index 013062e3..a9bf4c3e 100644
--- a/tests/torch/test_group_points.py
+++ b/tests/torch/test_group_points.py
@@ -4,7 +4,7 @@ import numpy as np
 
 import torch_npu
 from torch_npu.testing.testcase import TestCase, run_tests
-from mx_driving.perception.point import npu_group_points
+from mx_driving.point import npu_group_points
 
 
 DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10]
diff --git a/tests/torch/test_multi_scale_deformable_attn_function.py b/tests/torch/test_multi_scale_deformable_attn_function.py
index 776b084c..96aee415 100644
--- a/tests/torch/test_multi_scale_deformable_attn_function.py
+++ b/tests/torch/test_multi_scale_deformable_attn_function.py
@@ -3,7 +3,7 @@ from collections import namedtuple
 import torch
 import torch_npu
 from torch_npu.testing.testcase import TestCase, run_tests
-import mx_driving.common
+import mx_driving.fused
 
 
 DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10]
@@ -130,7 +130,7 @@ class TestMultiScaleDeformableAttnFunction(TestCase):
         npu_sampling_locations = npu_inputs.sampling_locations
         npu_attention_weights = npu_inputs.attention_weights
         npu_grad_output = npu_inputs.grad_output
-        npu_output = mx_driving.common.npu_multi_scale_deformable_attn_function(npu_value, npu_shapes, npu_offset, npu_sampling_locations, npu_attention_weights)
+        npu_output = mx_driving.fused.npu_multi_scale_deformable_attn_function(npu_value, npu_shapes, npu_offset, npu_sampling_locations, npu_attention_weights)
         npu_output.backward(npu_grad_output)
         return ExecResults(
             output=npu_output.detach().cpu().numpy(),
diff --git a/tests/torch/test_npu_dyn_voxelization.py b/tests/torch/test_npu_dyn_voxelization.py
index 0f8c4e19..09ac9f12 100644
--- a/tests/torch/test_npu_dyn_voxelization.py
+++ b/tests/torch/test_npu_dyn_voxelization.py
@@ -5,7 +5,7 @@ import torch
 import torch_npu
 
 from torch_npu.testing.testcase import TestCase, run_tests
-import mx_driving.common
+import mx_driving.point
 
 DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10]
 
@@ -54,7 +54,7 @@ class TestDynVoxelization(TestCase):
     
     def npu_to_exec(self, points, coors_range, voxel_size):
         max_num_points = -1
-        dynamic_voxelization_npu = mx_driving.common.Voxelization(voxel_size, coors_range, max_num_points)
+        dynamic_voxelization_npu = mx_driving.point.Voxelization(voxel_size, coors_range, max_num_points)
         coors = dynamic_voxelization_npu.forward(points)
         return coors
 
diff --git a/tests/torch/test_npu_dynamic_scatter.py b/tests/torch/test_npu_dynamic_scatter.py
index 0d071665..bca00da3 100644
--- a/tests/torch/test_npu_dynamic_scatter.py
+++ b/tests/torch/test_npu_dynamic_scatter.py
@@ -7,7 +7,7 @@ from torch_npu.testing.testcase import TestCase, run_tests
 from torch_npu.testing.common_utils import create_common_tensor
 
 import ads_c
-import mx_driving.common
+import mx_driving.point
 
 
 DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10]
@@ -70,11 +70,11 @@ class TestDynamicScatter(TestCase):
                     grad_point_feats[point_idx, :] = torch.where(mask_bit, grad_voxel_feats[voxel_idx, :], zero_tensor)
 
     def npu_op_exec(self, feats, coors, reduce_type):
-        output_feats, output_coors = mx_driving.common.npu_dynamic_scatter(feats, coors, reduce_type)
+        output_feats, output_coors = mx_driving.point.npu_dynamic_scatter(feats, coors, reduce_type)
         return output_feats.cpu().numpy(), output_coors.cpu().numpy()
 
     def grad_npu_op_exec(self, feats, coors, reduce_type):
-        output_feats, output_coors = mx_driving.common.npu_dynamic_scatter(feats, coors, reduce_type)
+        output_feats, output_coors = mx_driving.point.npu_dynamic_scatter(feats, coors, reduce_type)
         return output_feats.cpu().numpy(), output_coors.cpu().numpy()
 
     @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `DynamicScatter` is only supported on 910B, skip this ut!")
diff --git a/tests/torch/test_npu_max_pool2d.py b/tests/torch/test_npu_max_pool2d.py
index d2e2adec..e9294f13 100644
--- a/tests/torch/test_npu_max_pool2d.py
+++ b/tests/torch/test_npu_max_pool2d.py
@@ -1,7 +1,7 @@
 import torch
 import torch.nn as nn
 from torch_npu.testing.testcase import TestCase, run_tests
-import mx_driving.common
+import mx_driving.fused
 
 
 def gen_inputs(shape, dtype):
@@ -17,7 +17,7 @@ def cpu_to_exec(x_data_cpu):
 
 
 def npu_to_exec(x_data_cpu):
-    npu_output = mx_driving.common.npu_max_pool2d(x_data_cpu.npu(), 3, 2, 1)
+    npu_output = mx_driving.fused.npu_max_pool2d(x_data_cpu.npu(), 3, 2, 1)
     return npu_output
 
 
diff --git a/tests/torch/test_npu_nms3d.py b/tests/torch/test_npu_nms3d.py
index 60c039c0..bcc9fc0a 100644
--- a/tests/torch/test_npu_nms3d.py
+++ b/tests/torch/test_npu_nms3d.py
@@ -8,7 +8,7 @@ import torch_npu
 from torch_npu.testing.common_utils import create_common_tensor
 from torch_npu.testing.testcase import TestCase, run_tests
 
-import mx_driving.common
+import mx_driving.detection
 
 torch.npu.config.allow_internal_format = False
 torch_npu.npu.set_compile_mode(jit_compile=False)
@@ -246,7 +246,7 @@ class TestNms3d(TestCase):
         return keep, num_out
 
     def npu_to_exec(self, boxes, scores, threshold=0.0):
-        keep = mx_driving.common.npu_nms3d(boxes, scores, threshold)
+        keep = mx_driving.detection.npu_nms3d(boxes, scores, threshold)
         return keep.cpu()
 
     @unittest.skipIf(DEVICE_NAME != True, "OP `Nms3d` is only supported on 910B, skip this ut!")
diff --git a/tests/torch/test_npu_nms3d_normal.py b/tests/torch/test_npu_nms3d_normal.py
index 88036555..450b1e0c 100644
--- a/tests/torch/test_npu_nms3d_normal.py
+++ b/tests/torch/test_npu_nms3d_normal.py
@@ -5,7 +5,7 @@ import numpy as np
 
 import torch_npu
 from torch_npu.testing.testcase import TestCase, run_tests
-import mx_driving.common
+import mx_driving.detection
 
 DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10]
 
@@ -24,7 +24,7 @@ class TestNms3dNormal(TestCase):
         np_inds = np.array([1, 0, 3])
         boxes = torch.from_numpy(np_boxes)
         scores = torch.from_numpy(np_scores)
-        inds = mx_driving.common.npu_nms3d_normal(boxes.npu(), scores.npu(), 0.3)
+        inds = mx_driving.detection.npu_nms3d_normal(boxes.npu(), scores.npu(), 0.3)
         self.assertRtolEqual(inds.cpu().numpy(), np_inds)
     
     @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `Nms3d_Normal` is only supported on 910B, skip this ut!")
@@ -36,7 +36,7 @@ class TestNms3dNormal(TestCase):
         np_scores = np.random.rand(10).astype(np.float32)
         boxes = torch.from_numpy(np_boxes)
         scores = torch.from_numpy(np_scores)
-        inds = mx_driving.common.npu_nms3d_normal(boxes.npu(), scores.npu(), 0.3)
+        inds = mx_driving.detection.npu_nms3d_normal(boxes.npu(), scores.npu(), 0.3)
         self.assertRtolEqual(len(inds.cpu().numpy()), 9)
 
     @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `Nms3d_Normal` is only supported on 910B, skip this ut!")
@@ -47,7 +47,7 @@ class TestNms3dNormal(TestCase):
         np_scores = np.random.rand(200).astype(np.float32)
         boxes = torch.from_numpy(np_boxes)
         scores = torch.from_numpy(np_scores)
-        inds = mx_driving.common.npu_nms3d_normal(boxes.npu(), scores.npu(), 0.3)
+        inds = mx_driving.detection.npu_nms3d_normal(boxes.npu(), scores.npu(), 0.3)
         self.assertRtolEqual(len(inds.cpu().numpy()), 79)
 
     @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `Nms3d_Normal` is only supported on 910B, skip this ut!")
@@ -58,7 +58,7 @@ class TestNms3dNormal(TestCase):
         np_scores = np.random.rand(369).astype(np.float32)
         boxes = torch.from_numpy(np_boxes)
         scores = torch.from_numpy(np_scores)
-        inds = mx_driving.common.npu_nms3d_normal(boxes.npu(), scores.npu(), 0.3)
+        inds = mx_driving.detection.npu_nms3d_normal(boxes.npu(), scores.npu(), 0.3)
         self.assertRtolEqual(len(inds.cpu().numpy()), 109)
 
     @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `Nms3d_Normal` is only supported on 910B, skip this ut!")
@@ -69,7 +69,7 @@ class TestNms3dNormal(TestCase):
         np_scores = np.random.rand(555).astype(np.float32)
         boxes = torch.from_numpy(np_boxes)
         scores = torch.from_numpy(np_scores)
-        inds = mx_driving.common.npu_nms3d_normal(boxes.npu(), scores.npu(), 0.3)
+        inds = mx_driving.detection.npu_nms3d_normal(boxes.npu(), scores.npu(), 0.3)
         self.assertRtolEqual(len(inds.cpu().numpy()), 148)
 
     @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `Nms3d_Normal` is only supported on 910B, skip this ut!")
@@ -80,7 +80,7 @@ class TestNms3dNormal(TestCase):
         np_scores = np.random.rand(300).astype(np.float32)
         boxes = torch.from_numpy(np_boxes)
         scores = torch.from_numpy(np_scores)
-        inds = mx_driving.common.npu_nms3d_normal(boxes.npu(), scores.npu(), 0.3)
+        inds = mx_driving.detection.npu_nms3d_normal(boxes.npu(), scores.npu(), 0.3)
         self.assertRtolEqual(len(inds.cpu().numpy()), 102)
 
     @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `Nms3d_Normal` is only supported on 910B, skip this ut!")
@@ -91,7 +91,7 @@ class TestNms3dNormal(TestCase):
         np_scores = np.random.rand(600).astype(np.float32)
         boxes = torch.from_numpy(np_boxes)
         scores = torch.from_numpy(np_scores)
-        inds = mx_driving.common.npu_nms3d_normal(boxes.npu(), scores.npu(), 0.3)
+        inds = mx_driving.detection.npu_nms3d_normal(boxes.npu(), scores.npu(), 0.3)
         self.assertRtolEqual(len(inds.cpu().numpy()), 161)
 
 if __name__ == "__main__":
diff --git a/tests/torch/test_points_in_box.py b/tests/torch/test_points_in_box.py
index 195116fe..15dbc3bd 100644
--- a/tests/torch/test_points_in_box.py
+++ b/tests/torch/test_points_in_box.py
@@ -17,7 +17,7 @@ import numpy as np
 import torch_npu
 
 from torch_npu.testing.testcase import TestCase, run_tests
-import mx_driving.common
+import mx_driving.data
 
 DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10]
 
@@ -75,7 +75,7 @@ class TestPointsInBox(TestCase):
             [[[4, 6.928, 0], [6.928, 4, 0], [4, -6.928, 0], [6.928, -4, 0],
             [-4, 6.928, 0], [-6.928, 4, 0], [-4, -6.928, 0], [-6.928, -4, 0]]],
             dtype=torch.float32).npu()
-        point_indices = mx_driving.common.npu_points_in_box(boxes, pts).cpu().numpy()
+        point_indices = mx_driving.data.npu_points_in_box(boxes, pts).cpu().numpy()
         expected_point_indices = torch.tensor([[-1, -1, 0, -1, 0, -1, -1, -1]],
                                             dtype=torch.int32).cpu().numpy()
         self.assertRtolEqual(point_indices, expected_point_indices)
@@ -96,7 +96,7 @@ class TestPointsInBox(TestCase):
                                         points[b].float(),
                                         point_indices[b])
 
-        point_indices_npu = mx_driving.common.npu_points_in_box(boxes.npu(), points.npu())
+        point_indices_npu = mx_driving.data.npu_points_in_box(boxes.npu(), points.npu())
         self.assertRtolEqual(point_indices.numpy(), point_indices_npu.cpu().numpy())
     
     @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `PointsInBox` is only supported on 910B, skip this ut!")
@@ -116,7 +116,7 @@ class TestPointsInBox(TestCase):
                                         point_indices[b])
 
         with self.assertRaisesRegex(RuntimeError, "boxes is larger than 200"):
-            point_indices_npu = mx_driving.common.npu_points_in_box(boxes.npu(), points.npu())
+            point_indices_npu = mx_driving.data.npu_points_in_box(boxes.npu(), points.npu())
     
     @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `PointsInBox` is only supported on 910B, skip this ut!")
     def test_points_in_box_shape_large_points(self, device="npu"):
@@ -134,7 +134,7 @@ class TestPointsInBox(TestCase):
                                         points[b].float(),
                                         point_indices[b])
 
-        point_indices_npu = mx_driving.common.npu_points_in_box(boxes.npu(), points.npu())
+        point_indices_npu = mx_driving.data.npu_points_in_box(boxes.npu(), points.npu())
         self.assertRtolEqual(point_indices.numpy(), point_indices_npu.cpu().numpy())
     
     @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `PointsInBox` is only supported on 910B, skip this ut!")
@@ -154,7 +154,7 @@ class TestPointsInBox(TestCase):
                                         point_indices[b])
 
         with self.assertRaisesRegex(RuntimeError, "points_in_box npu only support batch size = 1"):
-            point_indices_npu = mx_driving.common.npu_points_in_box(boxes.npu(), points.npu())
+            point_indices_npu = mx_driving.data.npu_points_in_box(boxes.npu(), points.npu())
 
 
 if __name__ == "__main__":
diff --git a/tests/torch/test_points_in_box_all.py b/tests/torch/test_points_in_box_all.py
index 8952f9c8..23ca047f 100644
--- a/tests/torch/test_points_in_box_all.py
+++ b/tests/torch/test_points_in_box_all.py
@@ -16,7 +16,7 @@ import torch
 import numpy as np
 import torch_npu
 from torch_npu.testing.testcase import TestCase, run_tests
-import mx_driving.common
+import mx_driving.data
 
 DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10]
 
@@ -72,7 +72,7 @@ class TestPointsInBoxAll(TestCase):
         for b in range(batch_size):
             point_indices[b] = points_in_boxes_all_cpu_forward(boxes[b].float(),
                                         points[b].float())
-        point_indices_npu = mx_driving.common.npu_points_in_box_all(boxes.npu(), points.npu())
+        point_indices_npu = mx_driving.data.npu_points_in_box_all(boxes.npu(), points.npu())
         self.assertRtolEqual(point_indices.numpy(), point_indices_npu.cpu().numpy())
     
     @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `PointsInBoxAll` is only supported on 910B, skip this ut!")
@@ -89,7 +89,7 @@ class TestPointsInBoxAll(TestCase):
         for b in range(batch_size):
             point_indices[b] = points_in_boxes_all_cpu_forward(boxes[b].float(),
                                         points[b].float())
-        point_indices_npu = mx_driving.common.npu_points_in_box_all(boxes.npu(), points.npu())
+        point_indices_npu = mx_driving.data.npu_points_in_box_all(boxes.npu(), points.npu())
         self.assertRtolEqual(point_indices.numpy(), point_indices_npu.cpu().numpy())
     
     @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `PointsInBoxAll` is only supported on 910B, skip this ut!")
@@ -106,7 +106,7 @@ class TestPointsInBoxAll(TestCase):
         for b in range(batch_size):
             point_indices[b] = points_in_boxes_all_cpu_forward(boxes[b].float(),
                                         points[b].float())
-        point_indices_npu = mx_driving.common.npu_points_in_box_all(boxes.npu(), points.npu())
+        point_indices_npu = mx_driving.data.npu_points_in_box_all(boxes.npu(), points.npu())
         self.assertRtolEqual(point_indices.numpy(), point_indices_npu.cpu().numpy())
     
     @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `PointsInBoxAll` is only supported on 910B, skip this ut!")
@@ -123,7 +123,7 @@ class TestPointsInBoxAll(TestCase):
         for b in range(batch_size):
             point_indices[b] = points_in_boxes_all_cpu_forward(boxes[b].float(),
                                         points[b].float())
-        point_indices_npu = mx_driving.common.npu_points_in_box_all(boxes.npu(), points.npu())
+        point_indices_npu = mx_driving.data.npu_points_in_box_all(boxes.npu(), points.npu())
         self.assertRtolEqual(point_indices.numpy(), point_indices_npu.cpu().numpy())
     
     @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `PointsInBoxAll` is only supported on 910B, skip this ut!")
@@ -140,7 +140,7 @@ class TestPointsInBoxAll(TestCase):
         for b in range(batch_size):
             point_indices[b] = points_in_boxes_all_cpu_forward(boxes[b].float(),
                                         points[b].float())
-        point_indices_npu = mx_driving.common.npu_points_in_box_all(boxes.npu(), points.npu())
+        point_indices_npu = mx_driving.data.npu_points_in_box_all(boxes.npu(), points.npu())
         self.assertRtolEqual(point_indices.numpy(), point_indices_npu.cpu().numpy())
 
 if __name__ == "__main__":
diff --git a/tests/torch/test_roipoint_pool3d.py b/tests/torch/test_roipoint_pool3d.py
index 53eef4de..f902ea28 100644
--- a/tests/torch/test_roipoint_pool3d.py
+++ b/tests/torch/test_roipoint_pool3d.py
@@ -18,7 +18,7 @@ import torch
 import torch_npu
 import numpy as np
 from torch_npu.testing.testcase import TestCase, run_tests
-from mx_driving.common import RoIPointPool3d
+from mx_driving.data import RoIPointPool3d
 sys.path.append("../utils")
 from random_matrix import random_value
 
diff --git a/tests/torch/test_rotated_iou.py b/tests/torch/test_rotated_iou.py
index f66f8f08..2692d579 100644
--- a/tests/torch/test_rotated_iou.py
+++ b/tests/torch/test_rotated_iou.py
@@ -6,7 +6,7 @@ import numpy as np
 import torch
 import torch_npu
 from torch_npu.testing.testcase import TestCase, run_tests
-import mx_driving.common
+import mx_driving.detection
 
 DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10]
 
@@ -288,7 +288,7 @@ class TestNpuRotatedIou(TestCase):
     def npu_to_exec(self, npu_inputs):
         npu_boxes_a = npu_inputs.boxes_a
         npu_boxes_b = npu_inputs.boxes_b
-        npu_ans_overlap = mx_driving.common.npu_rotated_iou(npu_boxes_a, npu_boxes_b, False, 0, True, 1e-5, 1e-5)
+        npu_ans_overlap = mx_driving.detection.npu_rotated_iou(npu_boxes_a, npu_boxes_b, False, 0, True, 1e-5, 1e-5)
         return npu_ans_overlap.cpu().float().numpy()
 
     def check_precision(self, actual, expected, rtol=1e-4, atol=1e-4, msg=None):
diff --git a/tests/torch/test_voxel_pooling_train.py b/tests/torch/test_voxel_pooling_train.py
index 42cd23d2..ac372802 100644
--- a/tests/torch/test_voxel_pooling_train.py
+++ b/tests/torch/test_voxel_pooling_train.py
@@ -6,7 +6,7 @@ import torch_npu
 from torch_npu.testing.testcase import TestCase, run_tests
 import ads_c
 import numpy as np
-import mx_driving.common
+import mx_driving.point
 
 DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10]
 
@@ -71,7 +71,7 @@ class TestVoxelPoolingTrain(TestCase):
         return pos, result, grad_features_cpu
 
     def npu_to_exec(self, geom_xyz, input_features, voxel_num):
-        result = mx_driving.common.npu_voxel_pooling_train(
+        result = mx_driving.point.npu_voxel_pooling_train(
             geom_xyz, input_features, voxel_num)
 
         result.backward(result)
-- 
Gitee