From 53a8f436248c0c159e3dc2351ae636b8a3a35cb5 Mon Sep 17 00:00:00 2001 From: chenyang Date: Tue, 18 Mar 2025 21:02:06 +0800 Subject: [PATCH] add new models reformate reformat reformat add new models add new models add new models add new models temp temp2 ADD new models ADD new models --- .../assets/mindspore/2.5/bit_resnet50.md | 30 +- .../2.5/bit_resnet50_imagenet2012.md | 131 ++++++ .../assets/mindspore/2.5/coat_lite_tiny.md | 28 +- .../mindspore/2.5/coat_tiny_imagenet2012.md | 121 +++++ mshub_res/assets/mindspore/2.5/convit.md | 30 +- .../mindspore/2.5/convit_tiny_imagenet2012.md | 139 ++++++ mshub_res/assets/mindspore/2.5/convnext.md | 28 +- .../2.5/convnext_tiny_imagenet2012.md | 135 ++++++ mshub_res/assets/mindspore/2.5/convnextv2.md | 28 +- .../2.5/convnextv2_tiny_imagenet2012.md | 134 ++++++ .../assets/mindspore/2.5/crnn_vgg7_lmdbd.md | 413 ++++++++++++++++++ mshub_res/assets/mindspore/2.5/crossvit.md | 30 +- .../mindspore/2.5/crossvit_9_imagenet2012.md | 134 ++++++ .../2.5/dbnet_mobilenetv3_icdar2015.md | 360 +++++++++++++++ .../mindspore/2.5/dbnet_resnet50_icdar2015.md | 360 +++++++++++++++ .../mindspore/2.5/densenet121_imagenet2012.md | 146 +++++++ mshub_res/assets/mindspore/2.5/edgenext.md | 30 +- .../2.5/edgenext_xx_small_imagenet2012.md | 136 ++++++ .../assets/mindspore/2.5/efficientnet.md | 30 +- .../2.5/efficientnet_b0_imagenet2012.md | 142 ++++++ mshub_res/assets/mindspore/2.5/ghostnet.md | 28 +- .../2.5/ghostnet_050_imagenet2012.md | 137 ++++++ mshub_res/assets/mindspore/2.5/googlenet.md | 28 +- .../mindspore/2.5/googlenet_imagenet2012.md | 135 ++++++ .../mindspore/2.5/hrnet_w32_imagenet2012.md | 140 ++++++ .../2.5/inception_v3_imagenet2012.md | 136 ++++++ .../2.5/inception_v4_imagenet2012.md | 133 ++++++ .../mindspore/2.5/mixnet_s_imagenet2012.md | 135 ++++++ .../mindspore/2.5/mnasnet_075_imagenet2012.md | 130 ++++++ .../2.5/mobilenet_v1_025_imagenet2012.md | 130 ++++++ .../2.5/mobilenet_v2_075_imagenet2012.md | 132 ++++++ .../mobilenet_v3_large_100_imagenet2012.md | 134 ++++++ .../mobilenet_v3_small_100_imagenet2012.md | 134 ++++++ .../2.5/mobilevit_xx_small_imagenet2012.md | 126 ++++++ .../2.5/nasnet_a_4x1056_imagenet2012.md | 142 ++++++ .../mindspore/2.5/pit_ti_imagenet2012.md | 131 ++++++ .../2.5/poolformer_s12_imagenet2012.md | 127 ++++++ .../mindspore/2.5/pvt_tiny_imagenet2012.md | 139 ++++++ .../mindspore/2.5/pvt_v2_b0_imagenet2012.md | 140 ++++++ .../2.5/regnet_x_800mf_imagenet2012.md | 144 ++++++ .../mindspore/2.5/repvgg_a0_imagenet2012.md | 152 +++++++ .../mindspore/2.5/repvgg_a1_imagenet2012.md | 152 +++++++ .../mindspore/2.5/res2net50_imagenet2012.md | 141 ++++++ .../mindspore/2.5/resnet50_imagenet2012.md | 139 ++++++ .../mindspore/2.5/resnetv2_50_imagenet2012.md | 139 ++++++ .../2.5/resnext50_32x4d_imagenet2012.md | 142 ++++++ .../mindspore/2.5/rexnet_09_imagenet2012.md | 129 ++++++ .../mindspore/2.5/seresnet18_imagenet2012.md | 141 ++++++ .../2.5/shufflenet_v1_g3_05_imagenet2012.md | 140 ++++++ .../2.5/shufflenet_v2_x0_5_imagenet2012.md | 152 +++++++ .../mindspore/2.5/skresnet18_imagenet2012.md | 168 +++++++ .../2.5/squeezenet1_0_imagenet2012.md | 146 +++++++ .../mindspore/2.5/swin_tiny_imagenet2012.md | 153 +++++++ .../2.5/swinv2_tiny_window8_imagenet2012.md | 143 ++++++ .../mindspore/2.5/vgg13_imagenet2012.md | 151 +++++++ .../mindspore/2.5/vgg19_imagenet2012.md | 151 +++++++ .../2.5/visformer_tiny_imagenet2012.md | 141 ++++++ .../mindspore/2.5/vit_b32_224_imagenet2012.md | 159 +++++++ .../mindspore/2.5/vit_l32_224_imagenet2012.md | 159 +++++++ .../2.5/xcit_tiny_12_p16_224_imagenet2012.md | 138 ++++++ .../2.5/yolov3_darknet53_coco2017.md | 140 ++++++ .../2.5/yolov4_cspdarknet53_coco2017.md | 161 +++++++ .../assets/mindspore/2.5/yolov5n_coco2017.md | 144 ++++++ .../assets/mindspore/2.5/yolov5s_coco2017.md | 144 ++++++ .../mindspore/2.5/yolov7_tiny_coco2017.md | 132 ++++++ .../assets/mindspore/2.5/yolov8n_coco2017.md | 144 ++++++ .../assets/mindspore/2.5/yolov8s_coco2017.md | 144 ++++++ .../assets/mindspore/2.5/yolox_s_coco2017.md | 133 ++++++ 68 files changed, 8999 insertions(+), 145 deletions(-) create mode 100644 mshub_res/assets/mindspore/2.5/bit_resnet50_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/coat_tiny_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/convit_tiny_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/convnext_tiny_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/convnextv2_tiny_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/crnn_vgg7_lmdbd.md create mode 100644 mshub_res/assets/mindspore/2.5/crossvit_9_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/dbnet_mobilenetv3_icdar2015.md create mode 100644 mshub_res/assets/mindspore/2.5/dbnet_resnet50_icdar2015.md create mode 100644 mshub_res/assets/mindspore/2.5/densenet121_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/edgenext_xx_small_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/efficientnet_b0_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/ghostnet_050_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/googlenet_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/hrnet_w32_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/inception_v3_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/inception_v4_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/mixnet_s_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/mnasnet_075_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/mobilenet_v1_025_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/mobilenet_v2_075_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/mobilenet_v3_large_100_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/mobilenet_v3_small_100_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/mobilevit_xx_small_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/nasnet_a_4x1056_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/pit_ti_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/poolformer_s12_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/pvt_tiny_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/pvt_v2_b0_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/regnet_x_800mf_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/repvgg_a0_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/repvgg_a1_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/res2net50_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/resnet50_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/resnetv2_50_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/resnext50_32x4d_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/rexnet_09_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/seresnet18_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/shufflenet_v1_g3_05_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/shufflenet_v2_x0_5_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/skresnet18_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/squeezenet1_0_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/swin_tiny_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/swinv2_tiny_window8_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/vgg13_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/vgg19_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/visformer_tiny_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/vit_b32_224_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/vit_l32_224_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/xcit_tiny_12_p16_224_imagenet2012.md create mode 100644 mshub_res/assets/mindspore/2.5/yolov3_darknet53_coco2017.md create mode 100644 mshub_res/assets/mindspore/2.5/yolov4_cspdarknet53_coco2017.md create mode 100644 mshub_res/assets/mindspore/2.5/yolov5n_coco2017.md create mode 100644 mshub_res/assets/mindspore/2.5/yolov5s_coco2017.md create mode 100644 mshub_res/assets/mindspore/2.5/yolov7_tiny_coco2017.md create mode 100644 mshub_res/assets/mindspore/2.5/yolov8n_coco2017.md create mode 100644 mshub_res/assets/mindspore/2.5/yolov8s_coco2017.md create mode 100644 mshub_res/assets/mindspore/2.5/yolox_s_coco2017.md diff --git a/mshub_res/assets/mindspore/2.5/bit_resnet50.md b/mshub_res/assets/mindspore/2.5/bit_resnet50.md index 0ed3d90..75b2c61 100644 --- a/mshub_res/assets/mindspore/2.5/bit_resnet50.md +++ b/mshub_res/assets/mindspore/2.5/bit_resnet50.md @@ -20,7 +20,7 @@ author: MindSpore team update-time: 2025-03-10 -repo-link: +repo-link: user-id: MindSpore @@ -89,33 +89,33 @@ Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/201 - Distributed Training -It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run -```shell -# distributed training on multiple Ascend devices -mpirun -n 8 python train.py --config configs/bit/bit_resnet50_ascend.yaml --data_dir /path/to/imagenet -``` + ```shell + # distributed training on multiple Ascend devices + mpirun -n 8 python train.py --config configs/bit/bit_resnet50_ascend.yaml --data_dir /path/to/imagenet + ``` -> If the script is executed by the root user, the `--allow-run-as-root` parameter must be added to `mpirun`. + > If the script is executed by the root user, the `--allow-run-as-root` parameter must be added to `mpirun`. -For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). -**Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. - Standalone Training -If you want to train or finetune the model on a smaller dataset without distributed training, please run: + If you want to train or finetune the model on a smaller dataset without distributed training, please run: -```shell -# standalone training on a CPU/Ascend device -python train.py --config configs/bit/bit_resnet50_ascend.yaml --data_dir /path/to/dataset --distribute False -``` + ```shell + # standalone training on a CPU/Ascend device + python train.py --config configs/bit/bit_resnet50_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` ### Validation To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. -```python +```shell python validate.py -c configs/bit/bit_resnet50_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt ``` diff --git a/mshub_res/assets/mindspore/2.5/bit_resnet50_imagenet2012.md b/mshub_res/assets/mindspore/2.5/bit_resnet50_imagenet2012.md new file mode 100644 index 0000000..d542ba7 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/bit_resnet50_imagenet2012.md @@ -0,0 +1,131 @@ +# bit_resnet50 + +--- + +model-name: bit_resnet50 + +backbone-name: resnet50 + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc76.85 | top5acc93.24 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 77dcaf0f + +license: Apache2.0 + +summary: bit_resnet50 is used for cv + +--- + +# BigTransfer + +> [Big Transfer (BiT): General Visual Representation Learning](https://arxiv.org/abs/1912.11370) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +Transfer of pre-trained representations improves sample efficiency and simplifies hyperparameter tuning when training deep neural networks for vision. +Big Transfer (BiT) can achieve strong performance on more than 20 data sets by combining some carefully selected components and using simple heuristic +methods for transmission. The components distilled by BiT for training models that transfer well are: 1) Big datasets: as the size of the dataset increases, +the optimal performance of the BIT model will also increase. 2) Big architectures: In order to make full use of large datasets, a large enough architecture +is required. 3) Long pre-training time: Pretraining on a larger dataset requires more training epoch and training time. 4) GroupNorm and Weight Standardisation: +BiT use GroupNorm combined with Weight Standardisation instead of BatchNorm. Since BatchNorm performs worse when the number of images on each accelerator is +too low. 5) With BiT fine-tuning, good performance can be achieved even if there are only a few examples of each type on natural images.[[1, 2](#references)] + +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +_coming soon_ + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ------------ | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ---------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------- | +| bit_resnet50 | 25.55 | 8 | 32 | 224x224 | O2 | 146s | 74.52 | 3413.33 | 76.81 | 93.17 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/bit/bit_resnet50_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/bit/BiT_resnet50-1e4795a4.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/bit/bit_resnet50_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/bit/bit_resnet50_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. + +```shell +python validate.py -c configs/bit/bit_resnet50_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + + + +[1] Kolesnikov A, Beyer L, Zhai X, et al. Big transfer (bit): General visual representation learning[C]//European conference on computer vision. Springer, Cham, 2020: 491-507. + +[2] BigTransfer (BiT): State-of-the-art transfer learning for computer vision, diff --git a/mshub_res/assets/mindspore/2.5/coat_lite_tiny.md b/mshub_res/assets/mindspore/2.5/coat_lite_tiny.md index 11664fa..9c83390 100644 --- a/mshub_res/assets/mindspore/2.5/coat_lite_tiny.md +++ b/mshub_res/assets/mindspore/2.5/coat_lite_tiny.md @@ -20,7 +20,7 @@ author: MindSpore team update-time: 2025-03-10 -repo-link: +repo-link: user-id: MindSpore @@ -84,27 +84,27 @@ Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/201 - Distributed Training -It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run -```shell -# distributed training on multiple Ascend devices -mpirun -n 8 python train.py --config configs/coat/coat_lite_tiny_ascend.yaml --data_dir /path/to/imagenet -``` + ```shell + # distributed training on multiple Ascend devices + mpirun -n 8 python train.py --config configs/coat/coat_lite_tiny_ascend.yaml --data_dir /path/to/imagenet + ``` -> If the script is executed by the root user, the `--allow-run-as-root` parameter must be added to `mpirun` + > If the script is executed by the root user, the `--allow-run-as-root` parameter must be added to `mpirun` -For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). -**Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. - Standalone Training -If you want to train or finetune the model on a smaller dataset without distributed training, please run: + If you want to train or finetune the model on a smaller dataset without distributed training, please run: -```shell -# standalone training on a CPU/Ascend device -python train.py --config configs/coat/coat_lite_tiny_ascend.yaml --data_dir /path/to/dataset --distribute False -``` + ```shell + # standalone training on a CPU/Ascend device + python train.py --config configs/coat/coat_lite_tiny_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` ### Validation diff --git a/mshub_res/assets/mindspore/2.5/coat_tiny_imagenet2012.md b/mshub_res/assets/mindspore/2.5/coat_tiny_imagenet2012.md new file mode 100644 index 0000000..fb5e799 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/coat_tiny_imagenet2012.md @@ -0,0 +1,121 @@ +# coat_tiny + +--- + +model-name: coat_tiny + +backbone-name: coat + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc79.28 | top5acc94.45 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: dcca16b1 + +license: Apache2.0 + +summary: coat is used for cv + +--- + +# CoaT + +> [Co-Scale Conv-Attentional Image Transformers](https://arxiv.org/abs/2104.06399v2) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +Co-Scale Conv-Attentional Image Transformer (CoaT) is a Transformer-based image classifier equipped with co-scale and conv-attentional mechanisms. First, the co-scale mechanism maintains the integrity of Transformers' encoder branches at individual scales, while allowing representations learned at different scales to effectively communicate with each other. Second, the conv-attentional mechanism is designed by realizing a relative position embedding formulation in the factorized attention module with an efficient convolution-like implementation. CoaT empowers image Transformers with enriched multi-scale and contextual modeling capabilities. + +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +_coming soon_ + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | -------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- | +| coat_tiny | 5.50 | 8 | 32 | 224x224 | O2 | 543s | 254.95 | 1003.92 | 79.67 | 94.88 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/coat/coat_tiny_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/coat/coat_tiny-071cb792.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/coat/coat_lite_tiny_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/coat/coat_lite_tiny_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. + +```shell +python validate.py -c configs/coat/coat_lite_tiny_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + +[1] Han D, Yun S, Heo B, et al. Rethinking channel dimensions for efficient model design[C]//Proceedings of the IEEE/CVF conference on Computer Vision and Pattern Recognition. 2021: 732-741. diff --git a/mshub_res/assets/mindspore/2.5/convit.md b/mshub_res/assets/mindspore/2.5/convit.md index 427cfa9..5d2bf6c 100644 --- a/mshub_res/assets/mindspore/2.5/convit.md +++ b/mshub_res/assets/mindspore/2.5/convit.md @@ -20,7 +20,7 @@ author: MindSpore team update-time: 2025-03-10 -repo-link: +repo-link: user-id: MindSpore @@ -100,33 +100,33 @@ Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/201 - Distributed Training -It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run -```shell -# distributed training on multiple Ascend devices -mpirun -n 8 python train.py --config configs/convit/convit_tiny_ascend.yaml --data_dir /path/to/imagenet -``` + ```shell + # distributed training on multiple Ascend devices + mpirun -n 8 python train.py --config configs/convit/convit_tiny_ascend.yaml --data_dir /path/to/imagenet + ``` -> If the script is executed by the root user, the `--allow-run-as-root` parameter must be added to `mpirun`. + > If the script is executed by the root user, the `--allow-run-as-root` parameter must be added to `mpirun`. -For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). -**Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. - Standalone Training -If you want to train or finetune the model on a smaller dataset without distributed training, please run: + If you want to train or finetune the model on a smaller dataset without distributed training, please run: -```shell -# standalone training on a CPU/Ascend device -python train.py --config configs/convit/convit_tiny_ascend.yaml --data_dir /path/to/dataset --distribute False -``` + ```shell + # standalone training on a CPU/Ascend device + python train.py --config configs/convit/convit_tiny_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` ### Validation To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. -```python +```shell python validate.py -c configs/convit/convit_tiny_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt ``` diff --git a/mshub_res/assets/mindspore/2.5/convit_tiny_imagenet2012.md b/mshub_res/assets/mindspore/2.5/convit_tiny_imagenet2012.md new file mode 100644 index 0000000..a814af4 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/convit_tiny_imagenet2012.md @@ -0,0 +1,139 @@ +# convit_tiny + +--- + +model-name: convit_tiny + +backbone-name: convit + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc73.79 | top5acc91.70 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 1961717e + +license: Apache2.0 + +summary: convit is used for cv + +--- + +# ConViT + +> [ConViT: Improving Vision Transformers with Soft Convolutional Inductive Biases](https://arxiv.org/abs/2103.10697) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +ConViT combines the strengths of convolutional architectures and Vision Transformers (ViTs). +ConViT introduces gated positional self-attention (GPSA), a form of positional self-attention +that can be equipped with a “soft” convolutional inductive bias. +ConViT initializes the GPSA layers to mimic the locality of convolutional layers, +then gives each attention head the freedom to escape locality by adjusting a gating parameter +regulating the attention paid to position versus content information. +ConViT, outperforms the DeiT (Touvron et al., 2020) on ImageNet, +while offering a much improved sample efficiency.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of ConViT [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ----------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------- | +| convit_tiny | 5.71 | 8 | 256 | 224x224 | O2 | 153s | 226.51 | 9022.03 | 73.79 | 91.70 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/convit/convit_tiny_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/convit/convit_tiny-1961717e-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ----------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------- | +| convit_tiny | 5.71 | 8 | 256 | 224x224 | O2 | 133s | 231.62 | 8827.59 | 73.66 | 91.72 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/convit/convit_tiny_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/convit/convit_tiny-e31023f2.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/convit/convit_tiny_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/convit/convit_tiny_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. + +```shell +python validate.py -c configs/convit/convit_tiny_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + + + +[1] d’Ascoli S, Touvron H, Leavitt M L, et al. Convit: Improving vision transformers with soft convolutional inductive biases[C]//International Conference on Machine Learning. PMLR, 2021: 2286-2296. diff --git a/mshub_res/assets/mindspore/2.5/convnext.md b/mshub_res/assets/mindspore/2.5/convnext.md index 935a9d7..6bd1efa 100644 --- a/mshub_res/assets/mindspore/2.5/convnext.md +++ b/mshub_res/assets/mindspore/2.5/convnext.md @@ -20,7 +20,7 @@ author: MindSpore team update-time: 2025-03-10 -repo-link: +repo-link: user-id: MindSpore @@ -95,27 +95,27 @@ Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/201 - Distributed Training -It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run -```shell -# distributed training on multiple Ascend devices -mpirun -n 8 python train.py --config configs/convnext/convnext_tiny_ascend.yaml --data_dir /path/to/imagenet -``` + ```shell + # distributed training on multiple Ascend devices + mpirun -n 8 python train.py --config configs/convnext/convnext_tiny_ascend.yaml --data_dir /path/to/imagenet + ``` -> If the script is executed by the root user, the `--allow-run-as-root` parameter must be added to `mpirun`. + > If the script is executed by the root user, the `--allow-run-as-root` parameter must be added to `mpirun`. -For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). -**Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. - Standalone Training -If you want to train or finetune the model on a smaller dataset without distributed training, please run: + If you want to train or finetune the model on a smaller dataset without distributed training, please run: -```shell -# standalone training on a CPU/Ascend device -python train.py --config configs/convnext/convnext_tiny_ascend.yaml --data_dir /path/to/dataset --distribute False -``` + ```shell + # standalone training on a CPU/Ascend device + python train.py --config configs/convnext/convnext_tiny_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` ### Validation diff --git a/mshub_res/assets/mindspore/2.5/convnext_tiny_imagenet2012.md b/mshub_res/assets/mindspore/2.5/convnext_tiny_imagenet2012.md new file mode 100644 index 0000000..dd69d4a --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/convnext_tiny_imagenet2012.md @@ -0,0 +1,135 @@ +# convnext_tiny + +--- + +model-name: convnext_tiny + +backbone-name: convnext + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc81.28 | top5acc95.61 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: db11dc82 + +license: Apache2.0 + +summary: convnext is used for cv + +--- + +# ConvNeXt + +> [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +In this work, the authors reexamine the design spaces and test the limits of what a pure ConvNet can achieve. +The authors gradually "modernize" a standard ResNet toward the design of a vision Transformer, and discover several key +components that contribute to the performance difference along the way. The outcome of this exploration is a family of +pure ConvNet models dubbed ConvNeXt. Constructed entirely from standard ConvNet modules, ConvNeXts compete favorably +with Transformers in terms of accuracy and scalability, achieving 87.8% ImageNet top-1 accuracy, while maintaining the +simplicity and efficiency of standard ConvNets.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of ConvNeXt [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ---------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------- | +| convnext_tiny | 28.59 | 8 | 16 | 224x224 | O2 | 137s | 48.7 | 2612.24 | 81.28 | 95.61 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/convnext/convnext_tiny_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/convnext/convnext_tiny-db11dc82-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ---------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- | +| convnext_tiny | 28.59 | 8 | 16 | 224x224 | O2 | 127s | 66.79 | 1910.45 | 81.91 | 95.79 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/convnext/convnext_tiny_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/convnext/convnext_tiny-ae5ff8d7.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/convnext/convnext_tiny_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/convnext/convnext_tiny_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. + +```shell +python validate.py -c configs/convnext/convnext_tiny_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + +[1] Liu Z, Mao H, Wu C Y, et al. A convnet for the 2020s[C]//Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2022: 11976-11986. diff --git a/mshub_res/assets/mindspore/2.5/convnextv2.md b/mshub_res/assets/mindspore/2.5/convnextv2.md index 6efdc24..382fc53 100644 --- a/mshub_res/assets/mindspore/2.5/convnextv2.md +++ b/mshub_res/assets/mindspore/2.5/convnextv2.md @@ -20,7 +20,7 @@ author: MindSpore team update-time: 2025-03-10 -repo-link: +repo-link: user-id: MindSpore @@ -92,27 +92,27 @@ Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/201 - Distributed Training -It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run -```shell -# distributed training on multiple Ascend devices -mpirun -n 8 python train.py --config configs/convnextv2/convnextv2_tiny_ascend.yaml --data_dir /path/to/imagenet -``` + ```shell + # distributed training on multiple Ascend devices + mpirun -n 8 python train.py --config configs/convnextv2/convnextv2_tiny_ascend.yaml --data_dir /path/to/imagenet + ``` -> If the script is executed by the root user, the `--allow-run-as-root` parameter must be added to `mpirun`. + > If the script is executed by the root user, the `--allow-run-as-root` parameter must be added to `mpirun`. -For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). -**Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. - Standalone Training -If you want to train or finetune the model on a smaller dataset without distributed training, please run: + If you want to train or finetune the model on a smaller dataset without distributed training, please run: -```shell -# standalone training on a CPU/Ascend device -python train.py --config configs/convnextv2/convnextv2_tiny_ascend.yaml --data_dir /path/to/dataset --distribute False -``` + ```shell + # standalone training on a CPU/Ascend device + python train.py --config configs/convnextv2/convnextv2_tiny_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` ### Validation diff --git a/mshub_res/assets/mindspore/2.5/convnextv2_tiny_imagenet2012.md b/mshub_res/assets/mindspore/2.5/convnextv2_tiny_imagenet2012.md new file mode 100644 index 0000000..e7df055 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/convnextv2_tiny_imagenet2012.md @@ -0,0 +1,134 @@ +# convnextv2_tiny + +--- + +model-name: convnextv2_tiny + +backbone-name: convnextv2 + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc82.39 | top5acc95.95 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: a35b79ce + +license: Apache2.0 + +summary: convnextv2 is used for cv + +--- + +# ConvNeXt V2 + +> [ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders](https://arxiv.org/abs/2301.00808) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +In this paper, the authors propose a fully convolutional masked autoencoder framework and a new Global Response +Normalization (GRN) layer that can be added to the ConvNeXt architecture to enhance inter-channel feature competition. +This co-design of self-supervised learning techniques (such as MAE) and architectural improvement results in a new model +family called ConvNeXt V2, which significantly improves the performance of pure ConvNets on various recognition +benchmarks, including ImageNet classification, COCO detection, and ADE20K segmentation.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of ConvNeXt V2 [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| --------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | -------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------- | +| convnextv2_tiny | 28.64 | 8 | 128 | 224x224 | O2 | 268s | 257.2 | 3984.44 | 82.39 | 95.95 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/convnextv2/convnextv2_tiny_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/convnextv2/convnextv2_tiny-a35b79ce-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| --------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | -------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | +| convnextv2_tiny | 28.64 | 8 | 128 | 224x224 | O2 | 237s | 400.20 | 2560.00 | 82.43 | 95.98 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/convnextv2/convnextv2_tiny_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/convnextv2/convnextv2_tiny-d441ba2c.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/convnextv2/convnextv2_tiny_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/convnextv2/convnextv2_tiny_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. + +```shell +python validate.py -c configs/convnextv2/convnextv2_tiny_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + +[1] Woo S, Debnath S, Hu R, et al. ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders[J]. arXiv preprint arXiv:2301.00808, 2023. diff --git a/mshub_res/assets/mindspore/2.5/crnn_vgg7_lmdbd.md b/mshub_res/assets/mindspore/2.5/crnn_vgg7_lmdbd.md new file mode 100644 index 0000000..b726c7a --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/crnn_vgg7_lmdbd.md @@ -0,0 +1,413 @@ +# crnn_vgg7 + +--- + +model-name: crnn_vgg7 + +backbone-name: vgg7 + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: LMDBDataset + +evaluation: acc81.3 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 6faf1b2d + +license: Apache2.0 + +summary: crnn_vgg7 is used for cv + +--- + +# CRNN + + + +> [An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition](https://arxiv.org/abs/1507.05717) + +## Introduction + + + +Convolutional Recurrent Neural Network (CRNN) integrates CNN feature extraction and RNN sequence modeling as well as transcription into a unified framework. + +As shown in the architecture graph (Figure 1), CRNN firstly extracts a feature sequence from the input image via Convolutional Layers. After that, the image is represented by a sequence extracted features, where each vector is associated with a receptive field on the input image. For further process the feature, CRNN adopts Recurrent Layers to predict a label distribution for each frame. To map the distribution to text field, CRNN adds a Transcription Layer to translate the per-frame predictions into the final label sequence. [1] + + + +

+ +

+

+ Figure 1. Architecture of CRNN [1] +

+ +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://github.com/mindspore-lab/mindocr#installation) in MindOCR. + +#### Dataset Download + +Please download lmdb dataset for traininig and evaluation from [here](https://www.dropbox.com/sh/i39abvnefllx2si/AAAbAYRvxzRp3cIE5HzqUw3ra?dl=0) (ref: [deep-text-recognition-benchmark](https://github.com/clovaai/deep-text-recognition-benchmark#download-lmdb-dataset-for-traininig-and-evaluation-from-here)). There're several zip files: + +- `data_lmdb_release.zip` contains the **entire** datasets including training data, validation data and evaluation data. +- `training/` contains two datasets: [MJSynth (MJ)](http://www.robots.ox.ac.uk/~vgg/data/text/) and [SynthText (ST)](https://academictorrents.com/details/2dba9518166cbd141534cbf381aa3e99a087e83c) +- `validation/` is the union of the training sets of [IC13](http://rrc.cvc.uab.es/?ch=2), [IC15](http://rrc.cvc.uab.es/?ch=4), [IIIT](http://cvit.iiit.ac.in/projects/SceneTextUnderstanding/IIIT5K.html), and [SVT](http://www.iapr-tc11.org/mediawiki/index.php/The_Street_View_Text_Dataset). +- `evaluation/` contains several benchmarking datasets, which are [IIIT](http://cvit.iiit.ac.in/projects/SceneTextUnderstanding/IIIT5K.html), [SVT](http://www.iapr-tc11.org/mediawiki/index.php/The_Street_View_Text_Dataset), [IC03](http://www.iapr-tc11.org/mediawiki/index.php/ICDAR_2003_Robust_Reading_Competitions), [IC13](http://rrc.cvc.uab.es/?ch=2), [IC15](http://rrc.cvc.uab.es/?ch=4), [SVTP](http://openaccess.thecvf.com/content_iccv_2013/papers/Phan_Recognizing_Text_with_2013_ICCV_paper.pdf), and [CUTE](http://cs-chan.com/downloads_CUTE80_dataset.html). +- `validation.zip`: same as the validation/ within data_lmdb_release.zip +- `evaluation.zip`: same as the evaluation/ within data_lmdb_release.zip + +Unzip the `data_lmdb_release.zip`, the data structure should be like + +```text +data_lmdb_release/ +├── evaluation +│ ├── CUTE80 +│ │ ├── data.mdb +│ │ └── lock.mdb +│ ├── IC03_860 +│ │ ├── data.mdb +│ │ └── lock.mdb +│ ├── IC03_867 +│ │ ├── data.mdb +│ │ └── lock.mdb +│ ├── IC13_1015 +│ │ ├── data.mdb +│ │ └── lock.mdb +│ ├── ... +├── training +│ ├── MJ +│ │ ├── MJ_test +│ │ │ ├── data.mdb +│ │ │ └── lock.mdb +│ │ ├── MJ_train +│ │ │ ├── data.mdb +│ │ │ └── lock.mdb +│ │ └── MJ_valid +│ │ ├── data.mdb +│ │ └── lock.mdb +│ └── ST +│ ├── data.mdb +│ └── lock.mdb +└── validation + ├── data.mdb + └── lock.mdb +``` + +#### Dataset Usage + +Here we used the datasets under `training/` folders for **training**, and the union dataset `validation/` for validation. After training, we used the datasets under `evaluation/` to evluation model accuracy. + +**Training:** (total 14,442,049 samples) + +- [MJSynth (MJ)](http://www.robots.ox.ac.uk/~vgg/data/text/) +- Train: 21.2 GB, 7224586 samples +- Valid: 2.36 GB, 802731 samples +- Test: 2.61 GB, 891924 samples +- [SynthText (ST)](https://academictorrents.com/details/2dba9518166cbd141534cbf381aa3e99a087e83c) +- Train: 16.0 GB, 5522808 samples + +**Validation:** + +- Valid: 138 MB, 6992 samples + +**Evaluation:** (total 12,067 samples) + +- [CUTE80](http://cs-chan.com/downloads_CUTE80_dataset.html): 8.8 MB, 288 samples +- [IC03_860](http://www.iapr-tc11.org/mediawiki/index.php/ICDAR_2003_Robust_Reading_Competitions): 36 MB, 860 samples +- [IC03_867](http://www.iapr-tc11.org/mediawiki/index.php/ICDAR_2003_Robust_Reading_Competitions): 4.9 MB, 867 samples +- [IC13_857](http://rrc.cvc.uab.es/?ch=2): 72 MB, 857 samples +- [IC13_1015](http://rrc.cvc.uab.es/?ch=2): 77 MB, 1015 samples +- [IC15_1811](http://rrc.cvc.uab.es/?ch=4): 21 MB, 1811 samples +- [IC15_2077](http://rrc.cvc.uab.es/?ch=4): 25 MB, 2077 samples +- [IIIT5k_3000](http://cvit.iiit.ac.in/projects/SceneTextUnderstanding/IIIT5K.html): 50 MB, 3000 samples +- [SVT](http://www.iapr-tc11.org/mediawiki/index.php/The_Street_View_Text_Dataset): 2.4 MB, 647 samples +- [SVTP](http://openaccess.thecvf.com/content_iccv_2013/papers/Phan_Recognizing_Text_with_2013_ICCV_paper.pdf): 1.8 MB, 645 samples + +**Data configuration for model training** + +To reproduce the training of model, it is recommended that you modify the configuration yaml as follows: + +```yaml +... +train: + ... + dataset: + type: LMDBDataset + dataset_root: dir/to/data_lmdb_release/ # Root dir of training dataset + data_dir: training/ # Dir of training dataset, concatenated with `dataset_root` to be the complete dir of training dataset + # label_file: # Path of training label file, concatenated with `dataset_root` to be the complete path of training label file, not required when using LMDBDataset +... +eval: + dataset: + type: LMDBDataset + dataset_root: dir/to/data_lmdb_release/ # Root dir of validation dataset + data_dir: validation/ # Dir of validation dataset, concatenated with `dataset_root` to be the complete dir of validation dataset + # label_file: # Path of validation label file, concatenated with `dataset_root` to be the complete path of validation label file, not required when using LMDBDataset + ... +``` + +**Data configuration for model evaluation** + +We use the dataset under `evaluation/` as the benchmark dataset. On **each individual dataset** (e.g. CUTE80, IC03_860, etc.), we perform a full evaluation by setting the dataset's directory to the evaluation dataset. This way, we get a list of the corresponding accuracies for each dataset, and then the reported accuracies are the average of these values. + +To reproduce the reported evaluation results, you can: + +- Option 1: Repeat the evaluation step for all individual datasets: CUTE80, IC03_860, IC03_867, IC13_857, IC131015, IC15_1811, IC15_2077, IIIT5k_3000, SVT, SVTP. Then take the average score. + +- Option 2: Put all the benchmark datasets folder under the same directory, e.g. `evaluation/`. Modify the `eval.dataset.data_dir` in the config yaml accordingly. Then execute the script `tools/benchmarking/multi_dataset_eval.py`. + +1. Evaluate on one specific dataset + +For example, you can evaluate the model on dataset `CUTE80` by modifying the config yaml as follows: + +```yaml +... +train: + # NO NEED TO CHANGE ANYTHING IN TRAIN SINCE IT IS NOT USED +... +eval: + dataset: + type: LMDBDataset + dataset_root: dir/to/data_lmdb_release/ # Root dir of evaluation dataset + data_dir: evaluation/CUTE80/ # Dir of evaluation dataset, concatenated with `dataset_root` to be the complete dir of evaluation dataset + # label_file: # Path of evaluation label file, concatenated with `dataset_root` to be the complete path of evaluation label file, not required when using LMDBDataset + ... +``` + +By running `tools/eval.py` as noted in section [Model Evaluation](#33-model-evaluation) with the above config yaml, you can get the accuracy performance on dataset CUTE80. + +2. Evaluate on multiple datasets under the same folder + +Assume you have put all benchmark datasets under evaluation/ as shown below: + +```text +data_lmdb_release/ +├── evaluation +│ ├── CUTE80 +│ │ ├── data.mdb +│ │ └── lock.mdb +│ ├── IC03_860 +│ │ ├── data.mdb +│ │ └── lock.mdb +│ ├── IC03_867 +│ │ ├── data.mdb +│ │ └── lock.mdb +│ ├── IC13_1015 +│ │ ├── data.mdb +│ │ └── lock.mdb +│ ├── ... +``` + +then you can evaluate on each dataset by modifying the config yaml as follows, and execute the script `tools/benchmarking/multi_dataset_eval.py`. + +```yaml +... +train: + # NO NEED TO CHANGE ANYTHING IN TRAIN SINCE IT IS NOT USED +... +eval: + dataset: + type: LMDBDataset + dataset_root: dir/to/data_lmdb_release/ # Root dir of evaluation dataset + data_dir: evaluation/ # Dir of evaluation dataset, concatenated with `dataset_root` to be the complete dir of evaluation dataset + # label_file: # Path of evaluation label file, concatenated with `dataset_root` to be the complete path of evaluation label file, not required when using LMDBDataset + ... +``` + +#### Check YAML Config Files + +Apart from the dataset setting, please also check the following important args: `system.distribute`, `system.val_while_train`, `common.batch_size`, `train.ckpt_save_dir`, `train.dataset.dataset_root`, `train.dataset.data_dir`, `train.dataset.label_file`, +`eval.ckpt_load_path`, `eval.dataset.dataset_root`, `eval.dataset.data_dir`, `eval.dataset.label_file`, `eval.loader.batch_size`. Explanations of these important args: + +```yaml +system: + distribute: True # `True` for distributed training, `False` for standalone training + amp_level: 'O3' + seed: 42 + val_while_train: True # Validate while training + drop_overflow_update: False +common: + ... + batch_size: &batch_size 64 # Batch size for training +... +train: + ckpt_save_dir: './tmp_rec' # The training result (including checkpoints, per-epoch performance and curves) saving directory + dataset_sink_mode: False + dataset: + type: LMDBDataset + dataset_root: dir/to/data_lmdb_release/ # Root dir of training dataset + data_dir: training/ # Dir of training dataset, concatenated with `dataset_root` to be the complete dir of training dataset + # label_file: # Path of training label file, concatenated with `dataset_root` to be the complete path of training label file, not required when using LMDBDataset +... +eval: + ckpt_load_path: './tmp_rec/best.ckpt' # checkpoint file path + dataset_sink_mode: False + dataset: + type: LMDBDataset + dataset_root: dir/to/data_lmdb_release/ # Root dir of validation/evaluation dataset + data_dir: validation/ # Dir of validation/evaluation dataset, concatenated with `dataset_root` to be the complete dir of validation/evaluation dataset + # label_file: # Path of validation/evaluation label file, concatenated with `dataset_root` to be the complete path of validation/evaluation label file, not required when using LMDBDataset + ... + loader: + shuffle: False + batch_size: 64 # Batch size for validation/evaluation +... +``` + +**Notes:** + +- As the global batch size (batch_size x num_devices) is important for reproducing the result, please adjust `batch_size` accordingly to keep the global batch size unchanged for a different number of NPUs, or adjust the learning rate linearly to a new global batch size. + +### Model Training + + + +- Distributed Training + +It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please modify the configuration parameter `system.distribute` as True and run + +```shell +# distributed training on multiple Ascend devices +mpirun --allow-run-as-root -n 8 python tools/train.py --config configs/rec/crnn/crnn_resnet34.yaml +``` + +- Standalone Training + +If you want to train or finetune the model on a smaller dataset without distributed training, please modify the configuration parameter`system.distribute` as False and run: + +```shell +# standalone training on a CPU/Ascend device +python tools/train.py --config configs/rec/crnn/crnn_resnet34.yaml +``` + +The training result (including checkpoints, per-epoch performance and curves) will be saved in the directory parsed by the arg `train.ckpt_save_dir`. The default directory is `./tmp_rec`. + +### Model Evaluation + +To evaluate the accuracy of the trained model, you can use `eval.py`. Please set the checkpoint path to the arg `eval.ckpt_load_path` in the yaml config file, set the evaluation dataset path to the arg `eval.dataset.data_dir`, set `system.distribute` to be False, and then run: + +```shell +python tools/eval.py --config configs/rec/crnn/crnn_resnet34.yaml +``` + +Similarly, the accuracy of the trained model can be evaluated using multiple evaluation datasets by properly setting the args `eval.ckpt_load_path`, `eval.dataset.data_dir`, and `system.distribute` in the yaml config file. And then run: + +```shell +python tools/benchmarking/multi_dataset_eval.py --config configs/rec/crnn/crnn_resnet34.yaml +``` + +## Character Dictionary + +### Default Setting + +To transform the groud-truth text into label ids, we have to provide the character dictionary where keys are characters and values ​​are IDs. By default, the dictionary is **"0123456789abcdefghijklmnopqrstuvwxyz"**, which means id=0 will correspond to the character "0". In this case, the dictionary only considers numbers and lowercase English characters, excluding spaces. + +### Built-in Dictionaries + +There are some built-in dictionaries, which are placed in `mindocr/utils/dict/`, and you can choose the appropriate dictionary to use. + +- `en_dict.txt` is an English dictionary containing 94 characters, including numbers, common symbols, and uppercase and lowercase English letters. +- `ch_dict.txt` is a Chinese dictionary containing 6623 characters, including commonly used simplified and traditional Chinese, numbers, common symbols, uppercase and lowercase English letters. + +### Customized Dictionary + +You can also customize a dictionary file (\*\*\*.txt) and place it under `mindocr/utils/dict/`, the format of the dictionary file should be a .txt file with one character per line. + +To use a specific dictionary, set the parameter `common.character_dict_path` to the path of the dictionary, and change the parameter `common.num_classes` to the corresponding number, which is the number of characters in the dictionary + 1. + +**Notes:** + +- You can include the space character by setting the parameter `common.use_space_char` in configuration yaml to True. +- Remember to check the value of `dataset->transform_pipeline->RecCTCLabelEncode->lower` in the configuration yaml. Set it to False if you prefer case-sensitive encoding. + +## Chinese Text Recognition Model Training + +Currently, this model supports multilingual recognition and provides pre-trained models for different languages. Details are as follows: + +### Chinese Dataset Preparation and Configuration + +We use a public Chinese text benchmark dataset [Benchmarking-Chinese-Text-Recognition](https://github.com/FudanVI/benchmarking-chinese-text-recognition) for CRNN training and evaluation. + +For detailed instruction of data preparation and yaml configuration, please refer to [ch_dataeset](../../../docs/en/datasets/chinese_text_recognition.md). + +## Performance + +### General Purpose Chinese Models + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| **model name** | **backbone** | **cards** | **batch size** | **language** | **jit level** | **graph compile** | **ms/step** | **img/s** | **scene** | **web** | **document** | **recipe** | **weight** | +| :------------: | :----------: | :-------: | :------------: | :----------: | :-----------: | :---------------: | :---------: | :-------: | :-------: | :-----: | :----------: | :-----------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| CRNN | ResNet34_vd | 4 | 256 | Chinese | O2 | 203.48 s | 38.01 | 1180 | 60.71% | 65.94% | 97.67% | [yaml](https://github.com/mindspore-lab/mindocr/blob/main/configs/rec/crnn/crnn_resnet34_ch.yaml) | [ckpt](https://download.mindspore.cn/toolkits/mindocr/crnn/crnn_resnet34_ch-7a342e3c.ckpt) \| [mindir](https://download.mindspore.cn/toolkits/mindocr/crnn/crnn_resnet34_ch-7a342e3c-105bccb2.mindir) | + +> The input shape for exported MindIR file in the download link is (1, 3, 32, 320). + +### Specific Purpose Models + +#### Training Performance + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| **model name** | **backbone** | **train dataset** | **params(M)** | **cards** | **batch size** | **jit level** | **graph compile** | **ms/step** | **img/s** | **accuracy** | **recipe** | **weight** | +| :------------: | :----------: | :---------------: | :-----------: | :-------: | :------------: | :-----------: | :---------------: | :---------: | :-------: | :----------: | :--------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| CRNN | VGG7 | MJ+ST | 8.72 | 8 | 16 | O2 | 59 s | 15.47 | 8274.08 | 81.31% | [yaml](https://github.com/mindspore-lab/mindocr/blob/main/configs/rec/crnn/crnn_vgg7.yaml) | [ckpt](https://download-mindspore.osinfra.cn/toolkits/mindocr/crnn/crnn_vgg7-6faf1b2d-910v2.ckpt) | +| CRNN | ResNet34_vd | MJ+ST | 24.48 | 8 | 64 | O2 | 120.41 s | 60.86 | 8412.75 | 84.73% | [yaml](https://github.com/mindspore-lab/mindocr/blob/main/configs/rec/crnn/crnn_resnet34.yaml) | [ckpt](https://download.mindspore.cn/toolkits/mindocr/crnn/crnn_resnet34-83f37f07.ckpt) \| [mindir](https://download.mindspore.cn/toolkits/mindocr/crnn/crnn_resnet34-83f37f07-eb10a0c9.mindir) | + +Detailed accuracy results for each benchmark dataset (IC03, IC13, IC15, IIIT, SVT, SVTP, CUTE): + +| **model name** | **backbone** | **cards** | **IC03_860** | **IC03_867** | **IC13_857** | **IC13_1015** | **IC15_1811** | **IC15_2077** | **IIIT5k_3000** | **SVT** | **SVTP** | **CUTE80** | **average** | +| :------------: | :----------: | :-------: | :----------: | :----------: | :----------: | :-----------: | :-----------: | :-----------: | :-------------: | :-----: | :------: | :--------: | :---------: | +| CRNN | VGG7 | 1 | 93.72% | 93.43% | 91.83% | 90.84% | 70.84% | 64.95% | 84.40% | 82.84% | 72.87% | 67.36% | 81.31% | +| CRNN | ResNet34_vd | 1 | 95.35% | 95.27% | 93.70% | 92.71% | 75.65% | 69.72% | 87.30% | 86.09% | 78.60% | 72.92% | 84.73% | + +#### Inference Performance + +| model name | backbone | test dataset | params(M) | cards | batch size | **jit level** | **graph compile** | img/s | +| :--------: | :---------: | :----------: | :-------: | :---: | :--------: | :-----------: | :---------------: | :----: | +| CRNN | ResNet34_vd | IC15 | 24.48 | 1 | 1 | O2 | 10.46 s | 361.09 | +| CRNN | ResNet34_vd | SVT | 24.48 | 1 | 1 | O2 | 10.31 s | 274.67 | + +### Notes + +- To reproduce the result on other contexts, please ensure the global batch size is the same. +- The characters supported by model are lowercase English characters from a to z and numbers from 0 to 9. More explanation on dictionary, please refer to [Character Dictionary](#character-dictionary). +- The models are trained from scratch without any pre-training. For more dataset details of training and evaluation, please refer to [Dataset preparation](#dataset-preparation) section. +- The input Shapes of MindIR of CRNN_VGG7 and CRNN_ResNet34_vd are both (1, 3, 32, 100). + +## References + + + +[1] Baoguang Shi, Xiang Bai, Cong Yao. An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition. arXiv preprint arXiv:1507.05717, 2015. diff --git a/mshub_res/assets/mindspore/2.5/crossvit.md b/mshub_res/assets/mindspore/2.5/crossvit.md index b8a8e05..dff93c1 100644 --- a/mshub_res/assets/mindspore/2.5/crossvit.md +++ b/mshub_res/assets/mindspore/2.5/crossvit.md @@ -20,7 +20,7 @@ author: MindSpore team update-time: 2025-03-10 -repo-link: +repo-link: user-id: MindSpore @@ -92,33 +92,33 @@ Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/201 - Distributed Training -It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run -```shell -# distributed training on multiple Ascend devices -mpirun -n 8 python train.py --config configs/crossvit/crossvit_15_ascend.yaml --data_dir /path/to/imagenet -``` + ```shell + # distributed training on multiple Ascend devices + mpirun -n 8 python train.py --config configs/crossvit/crossvit_15_ascend.yaml --data_dir /path/to/imagenet + ``` -> If the script is executed by the root user, the `--allow-run-as-root` parameter must be added to `mpirun`. + > If the script is executed by the root user, the `--allow-run-as-root` parameter must be added to `mpirun`. -For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). -**Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. - Standalone Training -If you want to train or finetune the model on a smaller dataset without distributed training, please run: + If you want to train or finetune the model on a smaller dataset without distributed training, please run: -```shell -# standalone training on a CPU/Ascend device -python train.py --config configs/crossvit/crossvit_15_ascend.yaml --data_dir /path/to/dataset --distribute False -``` + ```shell + # standalone training on a CPU/Ascend device + python train.py --config configs/crossvit/crossvit_15_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` ### Validation To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. -```python +```shell python validate.py -c configs/crossvit/crossvit_15_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt ``` diff --git a/mshub_res/assets/mindspore/2.5/crossvit_9_imagenet2012.md b/mshub_res/assets/mindspore/2.5/crossvit_9_imagenet2012.md new file mode 100644 index 0000000..9ab454c --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/crossvit_9_imagenet2012.md @@ -0,0 +1,134 @@ +# crossvit_9 + +--- + +model-name: crossvit_9 + +backbone-name: crossvit + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc73.38 | top5acc91.51 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 32c69c96 + +license: Apache2.0 + +summary: crossvit is used for cv + +--- + +# CrossViT + +> [CrossViT: Cross-Attention Multi-Scale Vision Transformer for Image Classification](https://arxiv.org/abs/2103.14899) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +CrossViT is a type of vision transformer that uses a dual-branch architecture to extract multi-scale feature representations for image classification. The architecture combines image patches (i.e. tokens in a transformer) of different sizes to produce stronger visual features for image classification. It processes small and large patch tokens with two separate branches of different computational complexities and these tokens are fused together multiple times to complement each other. + +Fusion is achieved by an efficient cross-attention module, in which each transformer branch creates a non-patch token as an agent to exchange information with the other branch by attention. This allows for linear-time generation of the attention map in fusion instead of quadratic time otherwise.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of CrossViT [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------- | +| crossvit_9 | 8.55 | 8 | 256 | 240x240 | O2 | 221s | 514.36 | 3984.44 | 73.38 | 91.51 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/crossvit/crossvit_9_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/crossvit/crossvit_9-32c69c96-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------ | +| crossvit_9 | 8.55 | 8 | 256 | 240x240 | O2 | 206s | 550.79 | 3719.30 | 73.56 | 91.79 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/crossvit/crossvit_9_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/crossvit/crossvit_9-e74c8e18.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/crossvit/crossvit_15_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/crossvit/crossvit_15_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. + +```shell +python validate.py -c configs/crossvit/crossvit_15_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + + + +[1] Chun-Fu Chen, Quanfu Fan, Rameswar Panda. CrossViT: Cross-Attention Multi-Scale Vision Transformer for Image Classification diff --git a/mshub_res/assets/mindspore/2.5/dbnet_mobilenetv3_icdar2015.md b/mshub_res/assets/mindspore/2.5/dbnet_mobilenetv3_icdar2015.md new file mode 100644 index 0000000..474d116 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/dbnet_mobilenetv3_icdar2015.md @@ -0,0 +1,360 @@ +# dbnet_mobilenetv3 + +--- + +model-name: dbnet_mobilenetv3 + +backbone-name: mobilenetv3 + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ICDAR2015 + +evaluation: Recall76.27 | Precision76.06 | F-score76.17 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 7e89e1df + +license: Apache2.0 + +summary: dbnet_mobilenetv3 is used for cv + +--- + +# DBNet and DBNet++ + + + +> DBNet: [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/abs/1911.08947) +> DBNet++: [Real-Time Scene Text Detection with Differentiable Binarization and Adaptive Scale Fusion](https://arxiv.org/abs/2202.10304) + +## Introduction + +### DBNet + +DBNet is a segmentation-based scene text detection method. Segmentation-based methods are gaining popularity for scene +text detection purposes as they can more accurately describe scene text of various shapes, such as curved text. +The drawback of current segmentation-based SOTA methods is the post-processing of binarization (conversion of +probability maps into text bounding boxes) which often requires a manually set threshold (reduces prediction accuracy) +and complex algorithms for grouping pixels (resulting in a considerable time cost during inference). +To eliminate the problem described above, DBNet integrates an adaptive threshold called Differentiable Binarization(DB) +into the architecture. DB simplifies post-processing and enhances the performance of text detection.Moreover, it can be +removed in the inference stage without sacrificing performance.[[1](#references)] + +

Figure 1. Overall DBNet architecture

+

Figure 1. Overall DBNet architecture

+ +The overall architecture of DBNet is presented in _Figure 1._ It consists of multiple stages: + +1. Feature extraction from a backbone at different scales. ResNet-50 is used as a backbone, and features are extracted + from stages 2, 3, 4, and 5. +2. The extracted features are upscaled and summed up with the previous stage features in a cascade fashion. +3. The resulting features are upscaled once again to match the size of the largest feature map (from the stage 2) and + concatenated along the channel axis. +4. Then, the final feature map (shown in dark blue) is used to predict both the probability and threshold maps by + applying 3×3 convolutional operator and two de-convolutional operators with stride 2. +5. The probability and threshold maps are merged into one approximate binary map by the Differentiable binarization + module. The approximate binary map is used to generate text bounding boxes. + +### DBNet++ + +DBNet++ is an extension of DBNet and thus replicates its architecture. The only difference is that instead of +concatenating extracted and scaled features from the backbone as DBNet did, DBNet++ uses an adaptive way to fuse those +features called Adaptive Scale Fusion (ASF) module (Figure 2). It improves the scale robustness of the network by +fusing features of different scales adaptively. By using ASF, DBNet++’s ability to detect text instances of diverse +scales is distinctly strengthened.[[2](#references)] + +

Figure 2. Overall DBNet++ architecture

+

Figure 2. Overall DBNet++ architecture

+ +

Figure 3. Detailed architecture of the Adaptive Scale Fusion module

+

Figure 3. Detailed architecture of the Adaptive Scale Fusion module

+ +ASF consists of two attention modules – stage-wise attention and spatial attention, where the latter is integrated in +the former as described in the Figure 3. The stage-wise attention module learns the weights of the feature maps of +different scales. While the spatial attention module learns the attention across the spatial dimensions. The +combination of these two modules leads to scale-robust feature fusion. +DBNet++ performs better in detecting text instances of diverse scales, especially for large-scale text instances where +DBNet may generate inaccurate or discrete bounding boxes. + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Quick Start + +### Installation + +Please refer to the [installation instruction](https://github.com/mindspore-lab/mindocr#installation) in MindOCR. + +### Dataset preparation + +#### ICDAR2015 dataset + +Please download [ICDAR2015](https://rrc.cvc.uab.es/?ch=4&com=downloads) dataset, and convert the labels to the desired format referring to [dataset_converters](../../../tools/dataset_converters/README.md). + +The prepared dataset file structure should be: + +```text +. +├── test +│   ├── images +│   │   ├── img_1.jpg +│   │   ├── img_2.jpg +│   │   └── ... +│   └── test_det_gt.txt +└── train +    ├── images +    │   ├── img_1.jpg +    │   ├── img_2.jpg +    │   └── ....jpg +    └── train_det_gt.txt +``` + +#### MSRA-TD500 dataset + +Please download MSRA-TD500 dataset,and convert the labels to the desired format referring to [dataset_converters](../../../tools/dataset_converters/README.md). + +The prepared dataset file structure should be: + +```txt +MSRA-TD500 + ├── test + │ ├── IMG_0059.gt + │ ├── IMG_0059.JPG + │ ├── IMG_0080.gt + │ ├── IMG_0080.JPG + │ ├── ... + │ ├── train_det_gt.txt + ├── train + │ ├── IMG_0030.gt + │ ├── IMG_0030.JPG + │ ├── IMG_0063.gt + │ ├── IMG_0063.JPG + │ ├── ... + │ ├── test_det_gt.txt +``` + +#### SCUT-CTW1500 dataset + +Please download [SCUT-CTW1500](https://github.com/Yuliang-Liu/Curve-Text-Detector) dataset,and convert the labels to the desired format referring to [dataset_converters](https://github.com/mindspore-lab/mindocr/blob/main/tools/dataset_converters/README.md). + +The prepared dataset file structure should be: + +```txt +ctw1500 + ├── test_images + │ ├── 1001.jpg + │ ├── 1002.jpg + │ ├── ... + ├── train_images + │ ├── 0001.jpg + │ ├── 0002.jpg + │ ├── ... + ├── test_det_gt.txt + ├── train_det_gt.txt +``` + +#### Total-Text dataset + +Please download [Total-Text](https://github.com/cs-chan/Total-Text-Dataset/tree/master/Dataset) dataset,and convert the labels to the desired format referring to [dataset_converters](https://github.com/mindspore-lab/mindocr/blob/main/tools/dataset_converters/README.md). + +The prepared dataset file structure should be: + +```txt +totaltext + ├── Images + │ ├── Train + │ │ ├── img1001.jpg + │ │ ├── img1002.jpg + │ │ ├── ... + │ ├── Test + │ │ ├── img1.jpg + │ │ ├── img2.jpg + │ │ ├── ... + ├── test_det_gt.txt + ├── train_det_gt.txt +``` + +#### MLT2017 dataset + +The MLT2017 dataset is a multilingual text detection and recognition dataset that includes nine languages: Chinese, Japanese, Korean, English, French, Arabic, Italian, German, and Hindi. Please download [MLT2017](https://rrc.cvc.uab.es/?ch=8&com=downloads) and extract the dataset. Then convert the .gif format images in the data to .jpg or .png format, and convert the labels to the desired format referring to [dataset_converters](https://github.com/mindspore-lab/mindocr/blob/main/tools/dataset_converters/README.md). + +The prepared dataset file structure should be: + +```txt +MLT_2017 + ├── train + │ ├── img_1.png + │ ├── img_2.png + │ ├── img_3.jpg + │ ├── img_4.jpg + │ ├── ... + ├── validation + │ ├── img_1.jpg + │ ├── img_2.jpg + │ ├── ... + ├── train_det_gt.txt + ├── validation_det_gt.txt +``` + +> If users want to use their own dataset for training, please convert the labels to the desired format referring to [dataset_converters](https://github.com/mindspore-lab/mindocr/blob/main/tools/dataset_converters/README.md). Then configure the yaml file, and use a single or multiple devices to run train.py for training. For detailed information, please refer to the following tutorials. + +#### SynthText dataset + +Please download [SynthText](https://academictorrents.com/details/2dba9518166cbd141534cbf381aa3e99a087e83c) dataset and process it as described in [dataset_converters](../../../tools/dataset_converters/README.md) + +```text +. +├── SynthText +│   ├── 1 +│   │   ├── img_1.jpg +│   │   ├── img_2.jpg +│   │   └── ... +│   ├── 2 +│   │   ├── img_1.jpg +│   │   ├── img_2.jpg +│   │   └── ... +│   ├── ... +│   ├── 200 +│   │   ├── img_1.jpg +│   │   ├── img_2.jpg +│   │   └── ... +│   └── gt.mat + +``` + +> :warning: Additionally, It is strongly recommended to pre-process the `SynthText` dataset before using it as it contains some faulty data: +> +> ```shell +> python tools/dataset_converters/convert.py --dataset_name=synthtext --task=det --label_dir=/path-to-data-dir/SynthText/gt.mat --output_path=/path-to-data-dir/SynthText/gt_processed.mat +> ``` +> +> This operation will generate a filtered output in the same format as the original `SynthText`. + +### Update yaml config file + +Update `configs/det/dbnet/db_r50_icdar15.yaml` configuration file with data paths, +specifically the following parts. The `dataset_root` will be concatenated with `data_dir` and `label_file` respectively to be the complete dataset directory and label file path. + +```yaml +--- +train: + ckpt_save_dir: "./tmp_det" + dataset_sink_mode: False + dataset: + type: DetDataset + dataset_root: dir/to/dataset <--- Update + data_dir: train/images <--- Update + label_file: train/train_det_gt.txt <--- Update +--- +eval: + dataset_sink_mode: False + dataset: + type: DetDataset + dataset_root: dir/to/dataset <--- Update + data_dir: test/images <--- Update + label_file: test/test_det_gt.txt <--- Update +``` + +> Optionally, change `num_workers` according to the cores of CPU. + +DBNet consists of 3 parts: `backbone`, `neck`, and `head`. Specifically: + +```yaml +model: + type: det + transform: null + backbone: + name: det_resnet50 # Only ResNet50 is supported at the moment + pretrained: True # Whether to use weights pretrained on ImageNet + neck: + name: DBFPN # FPN part of the DBNet + out_channels: 256 + bias: False + use_asf: False # Adaptive Scale Fusion module from DBNet++ (use it for DBNet++ only) + head: + name: DBHead + k: 50 # amplifying factor for Differentiable Binarization + bias: False + adaptive: True # True for training, False for inference +``` + +### Training + +- Standalone training + + Please set `distribute` in yaml config file to be False. + + ```shell + python tools/train.py -c=configs/det/dbnet/db_r50_icdar15.yaml + ``` + +- Distributed training + + Please set `distribute` in yaml config file to be True. + + ```shell + # n is the number of NPUs + mpirun --allow-run-as-root -n 2 python tools/train.py --config configs/det/dbnet/db_r50_icdar15.yaml + ``` + +The training result (including checkpoints, per-epoch performance and curves) will be saved in the directory parsed by the arg `ckpt_save_dir` in yaml config file. The default directory is `./tmp_det`. + +### Evaluation + +To evaluate the accuracy of the trained model, you can use `eval.py`. Please set the checkpoint path to the arg `ckpt_load_path` in the `eval` section of yaml config file, set `distribute` to be False, and then run: + +```shell +python tools/eval.py -c=configs/det/dbnet/db_r50_icdar15.yaml +``` + +### Performance + +DBNet and DBNet++ were trained on the ICDAR2015, MSRA-TD500, SCUT-CTW1500, Total-Text, and MLT2017 datasets. In addition, we conducted pre-training on the ImageNet or SynthText dataset and provided a URL to download pretrained weights. All training results are as follows: + +#### ICDAR2015 + +| **model name** | **backbone** | **pretrained** | **cards** | **batch size** | **jit level** | **graph compile** | **ms/step** | **img/s** | **recall** | **precision** | **f-score** | **recipe** | **weight** | +| :------------: | :----------: | :------------: | :-------: | :------------: | :-----------: | :---------------: | :---------: | :-------: | :--------: | :-----------: | :---------: | :------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| DBNet | MobileNetV3 | ImageNet | 1 | 10 | O2 | 403.87 s | 65.69 | 152.23 | 74.68% | 79.38% | 76.95% | [yaml](db_mobilenetv3_icdar15.yaml) | [ckpt](https://download-mindspore.osinfra.cn/toolkits/mindocr/dbnet/dbnet_mobilenetv3-e72f9b8b-910v2.ckpt) \| [mindir](https://download.mindspore.cn/toolkits/mindocr/dbnet/dbnet_mobilenetv3-62c44539-f14c6a13.mindir) | +| DBNet | MobileNetV3 | ImageNet | 8 | 8 | O2 | 405.35 s | 54.46 | 1175.12 | 76.27% | 76.06% | 76.17% | [yaml](db_mobilenetv3_icdar15_8p.yaml) | [ckpt](https://download-mindspore.osinfra.cn/toolkits/mindocr/dbnet/dbnet_mobilenetv3-7e89e1df-910v2.ckpt) | +| DBNet | ResNet-50 | ImageNet | 1 | 10 | O2 | 147.81 s | 155.62 | 64.25 | 84.50% | 85.36% | 84.93% | [yaml](db_r50_icdar15.yaml) | [ckpt](https://download-mindspore.osinfra.cn/toolkits/mindocr/dbnet/dbnet_resnet50-48153c3b-910v2.ckpt) \| [mindir](https://download.mindspore.cn/toolkits/mindocr/dbnet/dbnet_resnet50-c3a4aa24-fbf95c82.mindir) | +| DBNet | ResNet-50 | ImageNet | 8 | 10 | O2 | 151.23 s | 159.22 | 502.4 | 81.15% | 87.63% | 84.26% | [yaml](db_r50_icdar15_8p.yaml) | [ckpt](https://download-mindspore.osinfra.cn/toolkits/mindocr/dbnet/dbnet_resnet50-e10bad35-910v2.ckpt) | +| DBNet++ | ResNet-50 | SynthText | 1 | 32 | O2 | 191.93 s | 549.24 | 58.26 | 86.81% | 86.85% | 86.86% | [yaml](dbpp_r50_icdar15.yaml) | [ckpt](https://download.mindspore.cn/toolkits/mindocr/dbnet/dbnetpp_resnet50_910-35dc71f2.ckpt) \| [mindir](https://download.mindspore.cn/toolkits/mindocr/dbnet/dbnetpp_resnet50_910-35dc71f2-e61a9c37.mindir) | + +### Notes + +- Note that the training time of DBNet is highly affected by data processing and varies on different machines. +- The input_shape for exported DBNet MindIR and DBNet++ MindIR in the links are `(1,3,736,1280)` and `(1,3,1152,2048)`, respectively. + +## References + + + +[1] Minghui Liao, Zhaoyi Wan, Cong Yao, Kai Chen, Xiang Bai. Real-time Scene Text Detection with Differentiable +Binarization. arXiv:1911.08947, 2019 + +[2] Minghui Liao, Zhisheng Zou, Zhaoyi Wan, Cong Yao, Xiang Bai. Real-Time Scene Text Detection with Differentiable +Binarization and Adaptive Scale Fusion. arXiv:2202.10304, 2022 diff --git a/mshub_res/assets/mindspore/2.5/dbnet_resnet50_icdar2015.md b/mshub_res/assets/mindspore/2.5/dbnet_resnet50_icdar2015.md new file mode 100644 index 0000000..7923ca4 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/dbnet_resnet50_icdar2015.md @@ -0,0 +1,360 @@ +# dbnet_resnet50 + +--- + +model-name: dbnet_resnet50 + +backbone-name: resnet50 + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ICDAR2015 + +evaluation: Recall81.15 | Precision87.63 | F-score84.26 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: e10bad35 + +license: Apache2.0 + +summary: dbnet_resnet50 is used for cv + +--- + +# DBNet and DBNet++ + + + +> DBNet: [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/abs/1911.08947) +> DBNet++: [Real-Time Scene Text Detection with Differentiable Binarization and Adaptive Scale Fusion](https://arxiv.org/abs/2202.10304) + +## Introduction + +### DBNet + +DBNet is a segmentation-based scene text detection method. Segmentation-based methods are gaining popularity for scene +text detection purposes as they can more accurately describe scene text of various shapes, such as curved text. +The drawback of current segmentation-based SOTA methods is the post-processing of binarization (conversion of +probability maps into text bounding boxes) which often requires a manually set threshold (reduces prediction accuracy) +and complex algorithms for grouping pixels (resulting in a considerable time cost during inference). +To eliminate the problem described above, DBNet integrates an adaptive threshold called Differentiable Binarization(DB) +into the architecture. DB simplifies post-processing and enhances the performance of text detection.Moreover, it can be +removed in the inference stage without sacrificing performance.[[1](#references)] + +

Figure 1. Overall DBNet architecture

+

Figure 1. Overall DBNet architecture

+ +The overall architecture of DBNet is presented in _Figure 1._ It consists of multiple stages: + +1. Feature extraction from a backbone at different scales. ResNet-50 is used as a backbone, and features are extracted + from stages 2, 3, 4, and 5. +2. The extracted features are upscaled and summed up with the previous stage features in a cascade fashion. +3. The resulting features are upscaled once again to match the size of the largest feature map (from the stage 2) and + concatenated along the channel axis. +4. Then, the final feature map (shown in dark blue) is used to predict both the probability and threshold maps by + applying 3×3 convolutional operator and two de-convolutional operators with stride 2. +5. The probability and threshold maps are merged into one approximate binary map by the Differentiable binarization + module. The approximate binary map is used to generate text bounding boxes. + +### DBNet++ + +DBNet++ is an extension of DBNet and thus replicates its architecture. The only difference is that instead of +concatenating extracted and scaled features from the backbone as DBNet did, DBNet++ uses an adaptive way to fuse those +features called Adaptive Scale Fusion (ASF) module (Figure 2). It improves the scale robustness of the network by +fusing features of different scales adaptively. By using ASF, DBNet++’s ability to detect text instances of diverse +scales is distinctly strengthened.[[2](#references)] + +

Figure 2. Overall DBNet++ architecture

+

Figure 2. Overall DBNet++ architecture

+ +

Figure 3. Detailed architecture of the Adaptive Scale Fusion module

+

Figure 3. Detailed architecture of the Adaptive Scale Fusion module

+ +ASF consists of two attention modules – stage-wise attention and spatial attention, where the latter is integrated in +the former as described in the Figure 3. The stage-wise attention module learns the weights of the feature maps of +different scales. While the spatial attention module learns the attention across the spatial dimensions. The +combination of these two modules leads to scale-robust feature fusion. +DBNet++ performs better in detecting text instances of diverse scales, especially for large-scale text instances where +DBNet may generate inaccurate or discrete bounding boxes. + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Quick Start + +### Installation + +Please refer to the [installation instruction](https://github.com/mindspore-lab/mindocr#installation) in MindOCR. + +### Dataset preparation + +#### ICDAR2015 dataset + +Please download [ICDAR2015](https://rrc.cvc.uab.es/?ch=4&com=downloads) dataset, and convert the labels to the desired format referring to [dataset_converters](../../../tools/dataset_converters/README.md). + +The prepared dataset file structure should be: + +```text +. +├── test +│   ├── images +│   │   ├── img_1.jpg +│   │   ├── img_2.jpg +│   │   └── ... +│   └── test_det_gt.txt +└── train +    ├── images +    │   ├── img_1.jpg +    │   ├── img_2.jpg +    │   └── ....jpg +    └── train_det_gt.txt +``` + +#### MSRA-TD500 dataset + +Please download MSRA-TD500 dataset,and convert the labels to the desired format referring to [dataset_converters](../../../tools/dataset_converters/README.md). + +The prepared dataset file structure should be: + +```txt +MSRA-TD500 + ├── test + │ ├── IMG_0059.gt + │ ├── IMG_0059.JPG + │ ├── IMG_0080.gt + │ ├── IMG_0080.JPG + │ ├── ... + │ ├── train_det_gt.txt + ├── train + │ ├── IMG_0030.gt + │ ├── IMG_0030.JPG + │ ├── IMG_0063.gt + │ ├── IMG_0063.JPG + │ ├── ... + │ ├── test_det_gt.txt +``` + +#### SCUT-CTW1500 dataset + +Please download SCUT-CTW1500 dataset,and convert the labels to the desired format referring to [dataset_converters](https://github.com/mindspore-lab/mindocr/blob/main/tools/dataset_converters/README.md). + +The prepared dataset file structure should be: + +```txt +ctw1500 + ├── test_images + │ ├── 1001.jpg + │ ├── 1002.jpg + │ ├── ... + ├── train_images + │ ├── 0001.jpg + │ ├── 0002.jpg + │ ├── ... + ├── test_det_gt.txt + ├── train_det_gt.txt +``` + +#### Total-Text dataset + +Please download [Total-Text](https://github.com/cs-chan/Total-Text-Dataset/tree/master/Dataset) dataset,and convert the labels to the desired format referring to [dataset_converters](https://github.com/mindspore-lab/mindocr/blob/main/tools/dataset_converters/README.md). + +The prepared dataset file structure should be: + +```txt +totaltext + ├── Images + │ ├── Train + │ │ ├── img1001.jpg + │ │ ├── img1002.jpg + │ │ ├── ... + │ ├── Test + │ │ ├── img1.jpg + │ │ ├── img2.jpg + │ │ ├── ... + ├── test_det_gt.txt + ├── train_det_gt.txt +``` + +#### MLT2017 dataset + +The MLT2017 dataset is a multilingual text detection and recognition dataset that includes nine languages: Chinese, Japanese, Korean, English, French, Arabic, Italian, German, and Hindi. Please download [MLT2017](https://rrc.cvc.uab.es/?ch=8&com=downloads) and extract the dataset. Then convert the .gif format images in the data to .jpg or .png format, and convert the labels to the desired format referring to [dataset_converters](https://github.com/mindspore-lab/mindocr/blob/main/tools/dataset_converters/README.md). + +The prepared dataset file structure should be: + +```txt +MLT_2017 + ├── train + │ ├── img_1.png + │ ├── img_2.png + │ ├── img_3.jpg + │ ├── img_4.jpg + │ ├── ... + ├── validation + │ ├── img_1.jpg + │ ├── img_2.jpg + │ ├── ... + ├── train_det_gt.txt + ├── validation_det_gt.txt +``` + +> If users want to use their own dataset for training, please convert the labels to the desired format referring to [dataset_converters](https://github.com/mindspore-lab/mindocr/blob/main/tools/dataset_converters/README.md). Then configure the yaml file, and use a single or multiple devices to run train.py for training. For detailed information, please refer to the following tutorials. + +#### SynthText dataset + +Please download [SynthText](https://academictorrents.com/details/2dba9518166cbd141534cbf381aa3e99a087e83c) dataset and process it as described in [dataset_converters](../../../tools/dataset_converters/README.md) + +```text +. +├── SynthText +│   ├── 1 +│   │   ├── img_1.jpg +│   │   ├── img_2.jpg +│   │   └── ... +│   ├── 2 +│   │   ├── img_1.jpg +│   │   ├── img_2.jpg +│   │   └── ... +│   ├── ... +│   ├── 200 +│   │   ├── img_1.jpg +│   │   ├── img_2.jpg +│   │   └── ... +│   └── gt.mat + +``` + +> :warning: Additionally, It is strongly recommended to pre-process the `SynthText` dataset before using it as it contains some faulty data: +> +> ```shell +> python tools/dataset_converters/convert.py --dataset_name=synthtext --task=det --label_dir=/path-to-data-dir/SynthText/gt.mat --output_path=/path-to-data-dir/SynthText/gt_processed.mat +> ``` +> +> This operation will generate a filtered output in the same format as the original `SynthText`. + +### Update yaml config file + +Update `configs/det/dbnet/db_r50_icdar15.yaml` configuration file with data paths, +specifically the following parts. The `dataset_root` will be concatenated with `data_dir` and `label_file` respectively to be the complete dataset directory and label file path. + +```yaml +--- +train: + ckpt_save_dir: "./tmp_det" + dataset_sink_mode: False + dataset: + type: DetDataset + dataset_root: dir/to/dataset <--- Update + data_dir: train/images <--- Update + label_file: train/train_det_gt.txt <--- Update +--- +eval: + dataset_sink_mode: False + dataset: + type: DetDataset + dataset_root: dir/to/dataset <--- Update + data_dir: test/images <--- Update + label_file: test/test_det_gt.txt <--- Update +``` + +> Optionally, change `num_workers` according to the cores of CPU. + +DBNet consists of 3 parts: `backbone`, `neck`, and `head`. Specifically: + +```yaml +model: + type: det + transform: null + backbone: + name: det_resnet50 # Only ResNet50 is supported at the moment + pretrained: True # Whether to use weights pretrained on ImageNet + neck: + name: DBFPN # FPN part of the DBNet + out_channels: 256 + bias: False + use_asf: False # Adaptive Scale Fusion module from DBNet++ (use it for DBNet++ only) + head: + name: DBHead + k: 50 # amplifying factor for Differentiable Binarization + bias: False + adaptive: True # True for training, False for inference +``` + +### Training + +- Standalone training + + Please set `distribute` in yaml config file to be False. + + ```shell + python tools/train.py -c=configs/det/dbnet/db_r50_icdar15.yaml + ``` + +- Distributed training + + Please set `distribute` in yaml config file to be True. + + ```shell + # n is the number of NPUs + mpirun --allow-run-as-root -n 2 python tools/train.py --config configs/det/dbnet/db_r50_icdar15.yaml + ``` + +The training result (including checkpoints, per-epoch performance and curves) will be saved in the directory parsed by the arg `ckpt_save_dir` in yaml config file. The default directory is `./tmp_det`. + +### Evaluation + +To evaluate the accuracy of the trained model, you can use `eval.py`. Please set the checkpoint path to the arg `ckpt_load_path` in the `eval` section of yaml config file, set `distribute` to be False, and then run: + +```shell +python tools/eval.py -c=configs/det/dbnet/db_r50_icdar15.yaml +``` + +### Performance + +DBNet and DBNet++ were trained on the ICDAR2015, MSRA-TD500, SCUT-CTW1500, Total-Text, and MLT2017 datasets. In addition, we conducted pre-training on the ImageNet or SynthText dataset and provided a URL to download pretrained weights. All training results are as follows: + +#### ICDAR2015 + +| **model name** | **backbone** | **pretrained** | **cards** | **batch size** | **jit level** | **graph compile** | **ms/step** | **img/s** | **recall** | **precision** | **f-score** | **recipe** | **weight** | +| :------------: | :----------: | :------------: | :-------: | :------------: | :-----------: | :---------------: | :---------: | :-------: | :--------: | :-----------: | :---------: | :------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| DBNet | MobileNetV3 | ImageNet | 1 | 10 | O2 | 403.87 s | 65.69 | 152.23 | 74.68% | 79.38% | 76.95% | [yaml](db_mobilenetv3_icdar15.yaml) | [ckpt](https://download-mindspore.osinfra.cn/toolkits/mindocr/dbnet/dbnet_mobilenetv3-e72f9b8b-910v2.ckpt) \| [mindir](https://download.mindspore.cn/toolkits/mindocr/dbnet/dbnet_mobilenetv3-62c44539-f14c6a13.mindir) | +| DBNet | MobileNetV3 | ImageNet | 8 | 8 | O2 | 405.35 s | 54.46 | 1175.12 | 76.27% | 76.06% | 76.17% | [yaml](db_mobilenetv3_icdar15_8p.yaml) | [ckpt](https://download-mindspore.osinfra.cn/toolkits/mindocr/dbnet/dbnet_mobilenetv3-7e89e1df-910v2.ckpt) | +| DBNet | ResNet-50 | ImageNet | 1 | 10 | O2 | 147.81 s | 155.62 | 64.25 | 84.50% | 85.36% | 84.93% | [yaml](db_r50_icdar15.yaml) | [ckpt](https://download-mindspore.osinfra.cn/toolkits/mindocr/dbnet/dbnet_resnet50-48153c3b-910v2.ckpt) \| [mindir](https://download.mindspore.cn/toolkits/mindocr/dbnet/dbnet_resnet50-c3a4aa24-fbf95c82.mindir) | +| DBNet | ResNet-50 | ImageNet | 8 | 10 | O2 | 151.23 s | 159.22 | 502.4 | 81.15% | 87.63% | 84.26% | [yaml](db_r50_icdar15_8p.yaml) | [ckpt](https://download-mindspore.osinfra.cn/toolkits/mindocr/dbnet/dbnet_resnet50-e10bad35-910v2.ckpt) | +| DBNet++ | ResNet-50 | SynthText | 1 | 32 | O2 | 191.93 s | 549.24 | 58.26 | 86.81% | 86.85% | 86.86% | [yaml](dbpp_r50_icdar15.yaml) | [ckpt](https://download.mindspore.cn/toolkits/mindocr/dbnet/dbnetpp_resnet50_910-35dc71f2.ckpt) \| [mindir](https://download.mindspore.cn/toolkits/mindocr/dbnet/dbnetpp_resnet50_910-35dc71f2-e61a9c37.mindir) | + +### Notes + +- Note that the training time of DBNet is highly affected by data processing and varies on different machines. +- The input_shape for exported DBNet MindIR and DBNet++ MindIR in the links are `(1,3,736,1280)` and `(1,3,1152,2048)`, respectively. + +## References + + + +[1] Minghui Liao, Zhaoyi Wan, Cong Yao, Kai Chen, Xiang Bai. Real-time Scene Text Detection with Differentiable +Binarization. arXiv:1911.08947, 2019 + +[2] Minghui Liao, Zhisheng Zou, Zhaoyi Wan, Cong Yao, Xiang Bai. Real-Time Scene Text Detection with Differentiable +Binarization and Adaptive Scale Fusion. arXiv:2202.10304, 2022 diff --git a/mshub_res/assets/mindspore/2.5/densenet121_imagenet2012.md b/mshub_res/assets/mindspore/2.5/densenet121_imagenet2012.md new file mode 100644 index 0000000..8746f08 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/densenet121_imagenet2012.md @@ -0,0 +1,146 @@ +# densenet121 + +--- + +model-name: densenet121 + +backbone-name: densenet + +module-type: cv-classification + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc75.67 | top5acc92.77 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: bf4ab27f + +license: Apache2.0 + +summary: densenet is used for cv + +--- + +# DenseNet + + + +> [Densely Connected Convolutional Networks](https://arxiv.org/abs/1608.06993) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + + + + +Recent work has shown that convolutional networks can be substantially deeper, more accurate, and more efficient to train if +they contain shorter connections between layers close to the input and those close to the output. Dense Convolutional +Network (DenseNet) is introduced based on this observation, which connects each layer to every other layer in a +feed-forward fashion. Whereas traditional convolutional networks with $L$ layers have $L$ connections-one between each +layer and its subsequent layer, DenseNet has $\frac{L(L+1)}{2}$ direct connections. For each layer, the feature maps +of all preceding layers are used as inputs, and their feature maps are used as inputs into all subsequent layers. +DenseNets have several compelling advantages: they alleviate the vanishing-gradient problem, strengthen feature +propagation, encourage feature reuse, and substantially reduce the number of parameters.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of DenseNet [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ----------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | --------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------- | +| densenet121 | 8.06 | 8 | 32 | 224x224 | O2 | 300s | 47,34 | 5446.81 | 75.67 | 92.77 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/densenet/densenet_121_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/densenet/densenet121-bf4ab27f-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ----------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | --------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------- | +| densenet121 | 8.06 | 8 | 32 | 224x224 | O2 | 191s | 43.28 | 5914.97 | 75.64 | 92.84 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/densenet/densenet_121_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/densenet/densenet121-120_5004_Ascend.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training and validation. + +### Training + + + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/densenet/densenet_121_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/densenet/densenet_121_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. + +```shell +python validate.py -c configs/densenet/densenet_121_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + + + +[1] Huang G, Liu Z, Van Der Maaten L, et al. Densely connected convolutional networks[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2017: 4700-4708. diff --git a/mshub_res/assets/mindspore/2.5/edgenext.md b/mshub_res/assets/mindspore/2.5/edgenext.md index 66c6dc7..52cfeaa 100644 --- a/mshub_res/assets/mindspore/2.5/edgenext.md +++ b/mshub_res/assets/mindspore/2.5/edgenext.md @@ -20,7 +20,7 @@ author: MindSpore team update-time: 2025-03-10 -repo-link: +repo-link: user-id: MindSpore @@ -95,33 +95,33 @@ Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/201 - Distributed Training -It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run -```shell -# distributed training on multiple Ascend devices -mpirun -n 8 python train.py --config configs/edgenext/edgenext_small_ascend.yaml --data_dir /path/to/imagenet -``` + ```shell + # distributed training on multiple Ascend devices + mpirun -n 8 python train.py --config configs/edgenext/edgenext_small_ascend.yaml --data_dir /path/to/imagenet + ``` -> If the script is executed by the root user, the `--allow-run-as-root` parameter must be added to `mpirun`. + > If the script is executed by the root user, the `--allow-run-as-root` parameter must be added to `mpirun`. -For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). -**Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. - Standalone Training -If you want to train or finetune the model on a smaller dataset without distributed training, please run: + If you want to train or finetune the model on a smaller dataset without distributed training, please run: -```shell -# standalone training on a CPU/Ascend device -python train.py --config configs/edgenext/edgenext_small_ascend.yaml --data_dir /path/to/dataset --distribute False -``` + ```shell + # standalone training on a CPU/Ascend device + python train.py --config configs/edgenext/edgenext_small_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` ### Validation To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. -```python +```shell python validate.py -c configs/edgenext/edgenext_small_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt ``` diff --git a/mshub_res/assets/mindspore/2.5/edgenext_xx_small_imagenet2012.md b/mshub_res/assets/mindspore/2.5/edgenext_xx_small_imagenet2012.md new file mode 100644 index 0000000..88b88bb --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/edgenext_xx_small_imagenet2012.md @@ -0,0 +1,136 @@ +# edgenext_xx_small + +--- + +model-name: edgenext_xx_small + +backbone-name: edgenext + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc70.64 | top5acc89.75 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: cad13d2c + +license: Apache2.0 + +summary: edgenext is used for cv + +--- + +# EdgeNeXt + +> [EdgeNeXt: Efficiently Amalgamated CNN-Transformer Architecture for Mobile Vision Applications](https://arxiv.org/abs/2206.10589) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +EdgeNeXt effectively combines the strengths of both CNN and Transformer models and is a +new efficient hybrid architecture. EdgeNeXt introduces a split depth-wise transpose +attention (SDTA) encoder that splits input tensors into multiple channel groups and +utilizes depth-wise convolution along with self-attention across channel dimensions +to implicitly increase the receptive field and encode multi-scale features.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of EdgeNeXt [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ----------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | -------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------- | +| edgenext_xx_small | 1.33 | 8 | 256 | 256x256 | O2 | 389s | 239.38 | 8555.43 | 70.64 | 89.75 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/edgenext/edgenext_xx_small_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/edgenext/edgenext_xx_small-cad13d2c-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ----------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | -------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | +| edgenext_xx_small | 1.33 | 8 | 256 | 256x256 | O2 | 311s | 191.24 | 10709.06 | 71.02 | 89.99 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/edgenext/edgenext_xx_small_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/edgenext/edgenext_xx_small-afc971fb.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/edgenext/edgenext_small_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/edgenext/edgenext_small_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. + +```shell +python validate.py -c configs/edgenext/edgenext_small_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + + + +[1] Maaz M, Shaker A, Cholakkal H, et al. EdgeNeXt: efficiently amalgamated CNN-transformer architecture for Mobile vision applications[J]. arXiv preprint arXiv:2206.10589, 2022. diff --git a/mshub_res/assets/mindspore/2.5/efficientnet.md b/mshub_res/assets/mindspore/2.5/efficientnet.md index 6f45acc..ca7b971 100644 --- a/mshub_res/assets/mindspore/2.5/efficientnet.md +++ b/mshub_res/assets/mindspore/2.5/efficientnet.md @@ -20,7 +20,7 @@ author: MindSpore team update-time: 2025-03-10 -repo-link: +repo-link: user-id: MindSpore @@ -111,33 +111,33 @@ Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/201 - Distributed Training -It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run -```shell -# distributed training on multiple Ascend devices -mpirun -n 64 python train.py --config configs/efficientnet/efficientnet_b0_ascend.yaml --data_dir /path/to/imagenet -``` + ```shell + # distributed training on multiple Ascend devices + mpirun -n 64 python train.py --config configs/efficientnet/efficientnet_b0_ascend.yaml --data_dir /path/to/imagenet + ``` -> If the script is executed by the root user, the `--allow-run-as-root` parameter must be added to `mpirun`. + > If the script is executed by the root user, the `--allow-run-as-root` parameter must be added to `mpirun`. -For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). -**Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. - Standalone Training -If you want to train or finetune the model on a smaller dataset without distributed training, please run: + If you want to train or finetune the model on a smaller dataset without distributed training, please run: -```shell -# standalone training on a CPU/Ascend device -python train.py --config configs/efficientnet/efficientnet_b0_ascend.yaml --data_dir /path/to/dataset --distribute False -``` + ```shell + # standalone training on a CPU/Ascend device + python train.py --config configs/efficientnet/efficientnet_b0_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` ### Validation To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. -```python +```shell python validate.py -c configs/efficientnet/efficientnet_b0_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt ``` diff --git a/mshub_res/assets/mindspore/2.5/efficientnet_b0_imagenet2012.md b/mshub_res/assets/mindspore/2.5/efficientnet_b0_imagenet2012.md new file mode 100644 index 0000000..4b464f7 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/efficientnet_b0_imagenet2012.md @@ -0,0 +1,142 @@ +# efficientnet_b0 + +--- + +model-name: efficientnet_b0 + +backbone-name: efficientnet + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc76.88 | top5acc93.28 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: f8d7aa2a + +license: Apache2.0 + +summary: efficientnet_b0 is used for cv + +--- + +# EfficientNet + + + +> [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + + + + +Figure 1 shows the methods from three dimensions -- width, depth, resolution and compound to expand the model. Increasing the model +size solely would cause the model performance to sub-optimal solution. However, if three methods could be applied together into the model +, it is more likely to achieve optimal solution. By using neural architecture search, the best configurations for width scaling, depth scaling +and resolution scaling could be found. EfficientNet could achieve better model performance on ImageNet-1K dataset compared with previous methods.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of Efficientent [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| --------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ---------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | +| efficientnet_b0 | 5.33 | 8 | 128 | 224x224 | O2 | 353s | 172.64 | 5931.42 | 76.88 | 93.28 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/efficientnet/efficientnet_b0_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/efficientnet/efficientnet_b0-f8d7aa2a-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| --------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ---------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- | +| efficientnet_b0 | 5.33 | 8 | 128 | 224x224 | O2 | 203s | 172.78 | 5926.61 | 76.89 | 93.16 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/efficientnet/efficientnet_b0_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/efficientnet/efficientnet_b0-103ec70c.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training and validation. + +### Training + + + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/efficientnet/efficientnet_b0_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/efficientnet/efficientnet_b0_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. + +```shell +python validate.py -c configs/efficientnet/efficientnet_b0_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + + + +[1] Tan M, Le Q. Efficientnet: Rethinking model scaling for convolutional neural networks[C]//International conference on machine learning. PMLR, 2019: 6105-6114. diff --git a/mshub_res/assets/mindspore/2.5/ghostnet.md b/mshub_res/assets/mindspore/2.5/ghostnet.md index ec31b5d..c10026f 100644 --- a/mshub_res/assets/mindspore/2.5/ghostnet.md +++ b/mshub_res/assets/mindspore/2.5/ghostnet.md @@ -20,7 +20,7 @@ author: MindSpore team update-time: 2025-03-10 -repo-link: +repo-link: user-id: MindSpore @@ -99,27 +99,27 @@ Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/201 - Distributed Training -It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run -```shell -# distributed training on multiple Ascend devices -mpirun -n 8 python train.py --config configs/ghostnet/ghostnet_100_ascend.yaml --data_dir /path/to/imagenet -``` + ```shell + # distributed training on multiple Ascend devices + mpirun -n 8 python train.py --config configs/ghostnet/ghostnet_100_ascend.yaml --data_dir /path/to/imagenet + ``` -> If the script is executed by the root user, the `--allow-run-as-root` parameter must be added to `mpirun`. + > If the script is executed by the root user, the `--allow-run-as-root` parameter must be added to `mpirun`. -For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). -**Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. - Standalone Training -If you want to train or finetune the model on a smaller dataset without distributed training, please run: + If you want to train or finetune the model on a smaller dataset without distributed training, please run: -```shell -# standalone training on a CPU/Ascend device -python train.py --config configs/ghostnet/ghostnet_100_ascend.yaml --data_dir /path/to/dataset --distribute False -``` + ```shell + # standalone training on a CPU/Ascend device + python train.py --config configs/ghostnet/ghostnet_100_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` ### Validation diff --git a/mshub_res/assets/mindspore/2.5/ghostnet_050_imagenet2012.md b/mshub_res/assets/mindspore/2.5/ghostnet_050_imagenet2012.md new file mode 100644 index 0000000..32443f7 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/ghostnet_050_imagenet2012.md @@ -0,0 +1,137 @@ +# ghostnet_050 + +--- + +model-name: ghostnet_050 + +backbone-name: ghostnet + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc65.84 | top5acc86.60 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: ae7771cb + +license: Apache2.0 + +summary: ghostnet is used for cv + +--- + +# GhostNet + +> [GhostNet: More Features from Cheap Operations](https://arxiv.org/abs/1911.11907) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +The redundancy in feature maps is an important characteristic of those successful CNNs, but has rarely been +investigated in neural architecture design. This paper proposes a novel Ghost module to generate more feature maps from +cheap operations. Based on a set of intrinsic feature maps, the authors apply a series of linear transformations with +cheap cost to generate many ghost feature maps that could fully reveal information underlying intrinsic features. The +proposed Ghost module can be taken as a plug-and-play component to upgrade existing convolutional neural networks. +Ghost bottlenecks are designed to stack Ghost modules, and then the lightweight GhostNet can be easily +established. Experiments conducted on benchmarks demonstrate that the Ghost module is an impressive alternative of +convolution layers in baseline models, and GhostNet can achieve higher recognition performance (e.g. 75.7% top-1 +accuracy) than MobileNetV3 with similar computational cost on the ImageNet ILSVRC-2012 classification +dataset.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of GhostNet [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +_coming soon_ + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ------------ | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | --------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------- | +| ghostnet_050 | 2.60 | 8 | 128 | 224x224 | O2 | 383s | 211.13 | 4850.09 | 66.03 | 86.64 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/ghostnet/ghostnet_050_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/ghostnet/ghostnet_050-85b91860.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/ghostnet/ghostnet_100_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/ghostnet/ghostnet_100_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. + +```shell +python validate.py -c configs/ghostnet/ghostnet_100_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + +[1] Han K, Wang Y, Tian Q, et al. Ghostnet: More features from cheap operations[C]//Proceedings of the IEEE/CVF conference on computer vision and pattern recognition. 2020: 1580-1589. diff --git a/mshub_res/assets/mindspore/2.5/googlenet.md b/mshub_res/assets/mindspore/2.5/googlenet.md index 6ead3fb..8826aa7 100644 --- a/mshub_res/assets/mindspore/2.5/googlenet.md +++ b/mshub_res/assets/mindspore/2.5/googlenet.md @@ -20,7 +20,7 @@ author: MindSpore team update-time: 2025-03-10 -repo-link: +repo-link: user-id: MindSpore @@ -93,27 +93,27 @@ Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/201 - Distributed Training -It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run -```shell -# distributed training on multiple Ascend devices -mpirun -n 8 python train.py --config configs/googlenet/googlenet_ascend.yaml --data_dir /path/to/imagenet -``` + ```shell + # distributed training on multiple Ascend devices + mpirun -n 8 python train.py --config configs/googlenet/googlenet_ascend.yaml --data_dir /path/to/imagenet + ``` -> If the script is executed by the root user, the `--allow-run-as-root` parameter must be added to `mpirun`. + > If the script is executed by the root user, the `--allow-run-as-root` parameter must be added to `mpirun`. -For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). -**Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. - Standalone Training -If you want to train or finetune the model on a smaller dataset without distributed training, please run: + If you want to train or finetune the model on a smaller dataset without distributed training, please run: -```shell -# standalone training on a CPU/Ascend device -python train.py --config configs/googlenet/googlenet_ascend.yaml --data_dir /path/to/dataset --distribute False -``` + ```shell + # standalone training on a CPU/Ascend device + python train.py --config configs/googlenet/googlenet_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` ### Validation diff --git a/mshub_res/assets/mindspore/2.5/googlenet_imagenet2012.md b/mshub_res/assets/mindspore/2.5/googlenet_imagenet2012.md new file mode 100644 index 0000000..736797b --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/googlenet_imagenet2012.md @@ -0,0 +1,135 @@ +# googlenet + +--- + +model-name: googlenet + +backbone-name: googlenet + +module-type: cv-classification + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc72.89 | top5acc90.89 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: de74c31d + +license: Apache2.0 + +summary: googlenet is used for cv + +--- + +# GoogLeNet + +> [GoogLeNet: Going Deeper with Convolutions](https://arxiv.org/abs/1409.4842) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +GoogLeNet is a new deep learning structure proposed by Christian Szegedy in 2014. Prior to this, AlexNet, VGG and other +structures achieved better training effects by increasing the depth (number of layers) of the network, but the increase +in the number of layers It will bring many negative effects, such as overfit, gradient disappearance, gradient +explosion, etc. The proposal of inception improves the training results from another perspective: it can use computing +resources more efficiently, and can extract more features under the same amount of computing, thereby improving the +training results.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of GoogLeNet [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | ------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------- | +| googlenet | 6.99 | 8 | 32 | 224x224 | O2 | 113s | 23.5 | 10893.62 | 72.89 | 90.89 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/googlenet/googlenet_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/googlenet/googlenet-de74c31d-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | ------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------ | +| googlenet | 6.99 | 8 | 32 | 224x224 | O2 | 72s | 21.40 | 11962.62 | 72.68 | 90.89 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/googlenet/googlenet_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/googlenet/googlenet-5552fcd3.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/googlenet/googlenet_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/googlenet/googlenet_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. + +```shell +python validate.py -c configs/googlenet/googlenet_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + +[1] Szegedy C, Liu W, Jia Y, et al. Going deeper with convolutions[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2015: 1-9. diff --git a/mshub_res/assets/mindspore/2.5/hrnet_w32_imagenet2012.md b/mshub_res/assets/mindspore/2.5/hrnet_w32_imagenet2012.md new file mode 100644 index 0000000..44852c6 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/hrnet_w32_imagenet2012.md @@ -0,0 +1,140 @@ +# hrnet_w32 + +--- + +model-name: hrnet_w32 + +backbone-name: HRNet + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc80.66 | top5acc95.30 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: e616cdcb + +license: Apache2.0 + +summary: HRNet is used for cv + +--- + +# HRNet + + + +> [Deep High-Resolution Representation Learning for Visual Recognition](https://arxiv.org/abs/1908.07919) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + + + +High-resolution representations are essential for position-sensitive vision problems, such as human pose estimation, semantic segmentation, and object detection. Existing state-of-the-art frameworks first encode the input image as a low-resolution representation through a subnetwork that is formed by connecting high-to-low resolution convolutions (e.g., ResNet, VGGNet), and then recover the high-resolution representation from the encoded low-resolution representation. Instead, the proposed network, named as High-Resolution Network (HRNet), maintains high-resolution representations through the whole process. There are two key characteristics: (i) Connect the high-to-low resolution convolution streams in parallel; (ii) Repeatedly exchange the information across resolutions. The benefit is that the resulting representation is semantically richer and spatially more precise. It shows the superiority of the proposed HRNet in a wide range of applications, including human pose estimation, semantic segmentation, and object detection, suggesting that the HRNet is a stronger backbone for computer vision problems. + + + +

+ +

+

+ Figure 1. Architecture of HRNet [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | --------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------- | +| hrnet_w32 | 41.30 | 8 | 128 | 224x224 | O2 | 1069s | 238.03 | 4301.98 | 80.66 | 95.30 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/hrnet/hrnet_w32_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/hrnet/hrnet_w32-e616cdcb-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | --------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------- | +| hrnet_w32 | 41.30 | 128 | 8 | 224x224 | O2 | 1312s | 279.10 | 3668.94 | 80.64 | 95.44 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/hrnet/hrnet_w32_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/hrnet/hrnet_w32-cc4fbd91.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training and validation. + +### Training + + + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/hrnet/hrnet_w32_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/hrnet/hrnet_w32_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. + +```shell +python validate.py -c configs/hrnet/hrnet_w32_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + + + +[1] Jingdong Wang, Ke Sun, Tianheng Cheng, et al. Deep High-Resolution Representation Learning for Visual Recognition[J]. arXiv preprint arXiv:1908.07919, 2019. diff --git a/mshub_res/assets/mindspore/2.5/inception_v3_imagenet2012.md b/mshub_res/assets/mindspore/2.5/inception_v3_imagenet2012.md new file mode 100644 index 0000000..2ed4f95 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/inception_v3_imagenet2012.md @@ -0,0 +1,136 @@ +# inception_v3 + +--- + +model-name: inception_v3 + +backbone-name: inception_v3 + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc79.25 | top5acc94.47 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 61a8e9ed + +license: Apache2.0 + +summary: inception_v3 is used for cv + +--- + +# InceptionV3 + +> [InceptionV3: Rethinking the Inception Architecture for Computer Vision](https://arxiv.org/pdf/1512.00567.pdf) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +InceptionV3 is an upgraded version of GoogLeNet. One of the most important improvements of V3 is Factorization, which +decomposes 7x7 into two one-dimensional convolutions (1x7, 7x1), and 3x3 is the same (1x3, 3x1), such benefits, both It +can accelerate the calculation (excess computing power can be used to deepen the network), and can split 1 conv into 2 +convs, which further increases the network depth and increases the nonlinearity of the network. It is also worth noting +that the network input from 224x224 has become 299x299, and 35x35/17x17/8x8 modules are designed more precisely. In +addition, V3 also adds batch normalization, which makes the model converge more quickly, which plays a role in partial +regularization and effectively reduces overfitting.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of InceptionV3 [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ------------ | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------- | +| inception_v3 | 27.20 | 8 | 32 | 299x299 | O2 | 172s | 70.83 | 3614.29 | 79.25 | 94.47 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/inceptionv3/inception_v3_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/inception_v3/inception_v3-61a8e9ed-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ------------ | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------ | +| inception_v3 | 27.20 | 8 | 32 | 299x299 | O2 | 120s | 76.42 | 3349.91 | 79.11 | 94.40 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/inceptionv3/inception_v3_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/inception_v3/inception_v3-38f67890.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/inceptionv3/inception_v3_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/inceptionv3/inception_v3_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. + +```shell +python validate.py -c configs/inceptionv3/inception_v3_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + +[1] Szegedy C, Vanhoucke V, Ioffe S, et al. Rethinking the inception architecture for computer vision[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2016: 2818-2826. diff --git a/mshub_res/assets/mindspore/2.5/inception_v4_imagenet2012.md b/mshub_res/assets/mindspore/2.5/inception_v4_imagenet2012.md new file mode 100644 index 0000000..2efb625 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/inception_v4_imagenet2012.md @@ -0,0 +1,133 @@ +# inception_v4 + +--- + +model-name: inception_v4 + +backbone-name: inception_v4 + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc80.98 | top5acc95.25 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 56e798fc + +license: Apache2.0 + +summary: inception_v4 is used for cv + +--- + +# InceptionV4 + +> [InceptionV4: Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning](https://arxiv.org/pdf/1602.07261.pdf) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +InceptionV4 studies whether the Inception module combined with Residual Connection can be improved. It is found that the +structure of ResNet can greatly accelerate the training, and the performance is also improved. An Inception-ResNet v2 +network is obtained, and a deeper and more optimized Inception v4 model is also designed, which can achieve comparable +performance with Inception-ResNet v2.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of InceptionV4 [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ------------ | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------- | +| inception_v4 | 42.74 | 8 | 32 | 299x299 | O2 | 263s | 80.97 | 3161.66 | 80.98 | 95.25 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/inceptionv4/inception_v4_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/inception_v4/inception_v4-56e798fc-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ------------ | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------ | +| inception_v4 | 42.74 | 8 | 32 | 299x299 | O2 | 177s | 76.19 | 3360.02 | 80.88 | 95.34 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/inceptionv4/inception_v4_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/inception_v4/inception_v4-db9c45b3.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/inceptionv4/inception_v4_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/inceptionv4/inception_v4_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. + +```shell +python validate.py -c configs/inceptionv4/inception_v4_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + +[1] Szegedy C, Ioffe S, Vanhoucke V, et al. Inception-v4, inception-resnet and the impact of residual connections on learning[C]//Thirty-first AAAI conference on artificial intelligence. 2017. diff --git a/mshub_res/assets/mindspore/2.5/mixnet_s_imagenet2012.md b/mshub_res/assets/mindspore/2.5/mixnet_s_imagenet2012.md new file mode 100644 index 0000000..73e9021 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/mixnet_s_imagenet2012.md @@ -0,0 +1,135 @@ +# mixnet_s + +--- + +model-name: mixnet_s + +backbone-name: mixnet + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc75.58 | top5acc95.54 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: fe4fcc63 + +license: Apache2.0 + +summary: mixnet is used for cv + +--- + +# MixNet + +> [MixConv: Mixed Depthwise Convolutional Kernels](https://arxiv.org/abs/1907.09595) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +Depthwise convolution is becoming increasingly popular in modern efficient ConvNets, but its kernel size is often +overlooked. In this paper, the authors systematically study the impact of different kernel sizes, and observe that +combining the benefits of multiple kernel sizes can lead to better accuracy and efficiency. Based on this observation, +the authors propose a new mixed depthwise convolution (MixConv), which naturally mixes up multiple kernel sizes in a +single convolution. As a simple drop-in replacement of vanilla depthwise convolution, our MixConv improves the accuracy +and efficiency for existing MobileNets on both ImageNet classification and COCO object detection.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of MixNet [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | --------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------- | +| mixnet_s | 4.17 | 8 | 128 | 224x224 | O2 | 706s | 228.03 | 4490.64 | 75.58 | 95.54 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mixnet/mixnet_s_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/mixnet/mixnet_s-fe4fcc63-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | --------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------- | +| mixnet_s | 4.17 | 8 | 128 | 224x224 | O2 | 556s | 252.49 | 4055.61 | 75.52 | 92.52 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mixnet/mixnet_s_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mixnet/mixnet_s-2a5ef3a3.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + + ```shell + # distributed training on multiple GPU/Ascend devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/mixnet/mixnet_s_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/mixnet/mixnet_s_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. + +```shell +python validate.py -c configs/mixnet/mixnet_s_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + +[1] Tan M, Le Q V. Mixconv: Mixed depthwise convolutional kernels[J]. arXiv preprint arXiv:1907.09595, 2019. diff --git a/mshub_res/assets/mindspore/2.5/mnasnet_075_imagenet2012.md b/mshub_res/assets/mindspore/2.5/mnasnet_075_imagenet2012.md new file mode 100644 index 0000000..9f26945 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/mnasnet_075_imagenet2012.md @@ -0,0 +1,130 @@ +# mnasnet_075 + +--- + +model-name: mnasnet_075 + +backbone-name: mnasnet + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc71.77 | top5acc90.52 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 083b2bc4 + +license: Apache2.0 + +summary: mnasnet is used for cv + +--- + +# MnasNet + +> [MnasNet: Platform-Aware Neural Architecture Search for Mobile](https://arxiv.org/abs/1807.11626) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +Designing convolutional neural networks (CNN) for mobile devices is challenging because mobile models need to be small and fast, yet still accurate. Although significant efforts have been dedicated to design and improve mobile CNNs on all dimensions, it is very difficult to manually balance these trade-offs when there are so many architectural possibilities to consider. In this paper, the authors propose an automated mobile neural architecture search (MNAS) approach, which explicitly incorporate model latency into the main objective so that the search can identify a model that achieves a good trade-off between accuracy and latency. Unlike previous work, where latency is considered via another, often inaccurate proxy (e.g., FLOPS), our approach directly measures real-world inference latency by executing the model on mobile phones. To further strike the right balance between flexibility and search space size, the authors propose a novel factorized hierarchical search space that encourages layer diversity throughout the network.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of MnasNet [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ----------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | -------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------- | +| mnasnet_075 | 3.20 | 8 | 256 | 224x224 | O2 | 144s | 175.85 | 11646.29 | 71.77 | 90.52 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mnasnet/mnasnet_0.75_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/mnasnet/mnasnet_075-083b2bc4-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ----------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | -------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------ | +| mnasnet_075 | 3.20 | 8 | 256 | 224x224 | O2 | 140s | 165.43 | 12379.86 | 71.81 | 90.53 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mnasnet/mnasnet_0.75_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mnasnet/mnasnet_075-465d366d.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/mnasnet/mnasnet_0.75_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/mnasnet/mnasnet_0.75_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. + +```shell +python validate.py -c configs/mnasnet/mnasnet_0.75_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + +[1] Tan M, Chen B, Pang R, et al. Mnasnet: Platform-aware neural architecture search for mobile[C]//Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2019: 2820-2828. diff --git a/mshub_res/assets/mindspore/2.5/mobilenet_v1_025_imagenet2012.md b/mshub_res/assets/mindspore/2.5/mobilenet_v1_025_imagenet2012.md new file mode 100644 index 0000000..3444118 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/mobilenet_v1_025_imagenet2012.md @@ -0,0 +1,130 @@ +# mobilenet_v1_025 + +--- + +model-name: mobilenet_v1_025 + +backbone-name: mobilenet_v1 + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc54.05 | top5acc77.74 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: cbe3d3b3 + +license: Apache2.0 + +summary: mobilenetv1 is used for cv + +--- + +# MobileNetV1 + +> [MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications](https://arxiv.org/abs/1704.04861) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +Compared with the traditional convolutional neural network, MobileNetV1's parameters and the amount of computation are greatly reduced on the premise that the accuracy rate is slightly reduced. (Compared to VGG16, the accuracy rate is reduced by 0.9%, but the model parameters are only 1/32 of VGG). The model is based on a streamlined architecture that uses depthwise separable convolutions to build lightweight deep neural networks. At the same time, two simple global hyperparameters are introduced, which can effectively trade off latency and accuracy.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of MobileNetV1 [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | ----------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------- | +| mobilenet_v1_025 | 0.47 | 8 | 64 | 224x224 | O2 | 195s | 47.47 | 10785.76 | 54.05 | 77.74 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv1/mobilenet_v1_0.25_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/mobilenet/mobilenetv1/mobilenet_v1_025-cbe3d3b3-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | ----------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------- | +| mobilenet_v1_025 | 0.47 | 8 | 64 | 224x224 | O2 | 89s | 42.43 | 12066.93 | 53.87 | 77.66 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv1/mobilenet_v1_0.25_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv1/mobilenet_v1_025-d3377fba.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/mobilenetv1/mobilenet_v1_0.25_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/mobilenetv1/mobilenet_v1_0.25_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. + +```shell +python validate.py -c configs/mobilenetv1/mobilenet_v1_0.25_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + +[1] Howard A G, Zhu M, Chen B, et al. Mobilenets: Efficient convolutional neural networks for mobile vision applications[J]. arXiv preprint arXiv:1704.04861, 2017. diff --git a/mshub_res/assets/mindspore/2.5/mobilenet_v2_075_imagenet2012.md b/mshub_res/assets/mindspore/2.5/mobilenet_v2_075_imagenet2012.md new file mode 100644 index 0000000..fa9a35b --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/mobilenet_v2_075_imagenet2012.md @@ -0,0 +1,132 @@ +# mobilenet_v2_075 + +--- + +model-name: mobilenet_v2_075 + +backbone-name: mobilenetv2 + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc69.73 | top1acc89.35 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 755932c4 + +license: Apache2.0 + +summary: mobilenetv2 is used for cv + +--- + +# MobileNetV2 + +> [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +The model is a new neural network architecture that is specifically tailored for mobile and resource-constrained environments. This network pushes the state of the art for mobile custom computer vision models, significantly reducing the amount of operations and memory required while maintaining the same accuracy. + +The main innovation of the model is the proposal of a new layer module: The Inverted Residual with Linear Bottleneck. The module takes as input a low-dimensional compressed representation that is first extended to high-dimensionality and then filtered with lightweight depth convolution. Linear convolution is then used to project the features back to the low-dimensional representation.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of MobileNetV2 [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | ----------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------- | +| mobilenet_v2_075 | 2.66 | 8 | 256 | 224x224 | O2 | 233s | 174.65 | 11726.31 | 69.73 | 89.35 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv2/mobilenet_v2_0.75_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/mobilenet/mobilenetv2/mobilenet_v2_075-755932c4-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | ----------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------- | +| mobilenet_v2_075 | 2.66 | 8 | 256 | 224x224 | O2 | 164s | 155.94 | 13133.26 | 69.98 | 89.32 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv2/mobilenet_v2_0.75_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv2/mobilenet_v2_075-bd7bd4c4.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/mobilenetv2/mobilenet_v2_0.75_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/mobilenetv2/mobilenet_v2_0.75_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. + +```shell +python validate.py -c configs/mobilenetv2/mobilenet_v2_0.75_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + +[1] Sandler M, Howard A, Zhu M, et al. Mobilenetv2: Inverted residuals and linear bottlenecks[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2018: 4510-4520. diff --git a/mshub_res/assets/mindspore/2.5/mobilenet_v3_large_100_imagenet2012.md b/mshub_res/assets/mindspore/2.5/mobilenet_v3_large_100_imagenet2012.md new file mode 100644 index 0000000..f99a445 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/mobilenet_v3_large_100_imagenet2012.md @@ -0,0 +1,134 @@ +# mobilenet_v3_large_100 + +--- + +model-name: mobilenet_v3_large_100 + +backbone-name: mobilenetv3 + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc75.59 | top1acc92.57 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: bd4e7bdc + +license: Apache2.0 + +summary: mobilenetv3 is used for cv + +--- + +# MobileNetV3 + +> [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +MobileNet v3 was published in 2019, and this v3 version combines the deep separable convolution of v1, the Inverted Residuals and Linear Bottleneck of v2, and the SE module to search the configuration and parameters of the network using NAS (Neural Architecture Search).MobileNetV3 first uses MnasNet to perform a coarse structure search, and then uses reinforcement learning to select the optimal configuration from a set of discrete choices. Afterwards, MobileNetV3 then fine-tunes the architecture using NetAdapt, which exemplifies NetAdapt's complementary capability to tune underutilized activation channels with a small drop. + +mobilenet-v3 offers two versions, mobilenet-v3 large and mobilenet-v3 small, for situations with different resource requirements. The paper mentions that mobilenet-v3 small, for the imagenet classification task, has an accuracy The paper mentions that mobilenet-v3 small achieves about 3.2% better accuracy and 15% less time than mobilenet-v2 for the imagenet classification task, mobilenet-v3 large achieves about 4.6% better accuracy and 5% less time than mobilenet-v2 for the imagenet classification task, mobilenet-v3 large achieves the same accuracy and 25% faster speedup in COCO compared to v2 The improvement in the segmentation algorithm is also observed.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of MobileNetV3 [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | ------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------- | +| mobilenet_v3_small_100 | 2.55 | 8 | 75 | 224x224 | O2 | 184s | 52.38 | 11454.75 | 68.07 | 87.77 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv3/mobilenet_v3_small_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/mobilenet/mobilenetv3/mobilenet_v3_small_100-6fa3c17d-910v2.ckpt) | +| mobilenet_v3_large_100 | 5.51 | 8 | 75 | 224x224 | O2 | 354s | 55.89 | 10735.37 | 75.59 | 92.57 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv3/mobilenet_v3_large_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/mobilenet/mobilenetv3/mobilenet_v3_large_100-bd4e7bdc-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | ------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------- | +| mobilenet_v3_small_100 | 2.55 | 8 | 75 | 224x224 | O2 | 145s | 48.14 | 12463.65 | 68.10 | 87.86 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv3/mobilenet_v3_small_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv3/mobilenet_v3_small_100-509c6047.ckpt) | +| mobilenet_v3_large_100 | 5.51 | 8 | 75 | 224x224 | O2 | 271s | 47.49 | 12634.24 | 75.23 | 92.31 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv3/mobilenet_v3_large_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv3/mobilenet_v3_large_100-1279ad5f.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/mobilenetv3/mobilenet_v3_small_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/mobilenetv3/mobilenet_v3_small_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. + +```shell +python validate.py -c configs/mobilenetv3/mobilenet_v3_small_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + +[1] Howard A, Sandler M, Chu G, et al. Searching for mobilenetv3[C]//Proceedings of the IEEE/CVF international conference on computer vision. 2019: 1314-1324. diff --git a/mshub_res/assets/mindspore/2.5/mobilenet_v3_small_100_imagenet2012.md b/mshub_res/assets/mindspore/2.5/mobilenet_v3_small_100_imagenet2012.md new file mode 100644 index 0000000..6008252 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/mobilenet_v3_small_100_imagenet2012.md @@ -0,0 +1,134 @@ +# mobilenet_v3_small_100 + +--- + +model-name: mobilenet_v3_small_100 + +backbone-name: mobilenetv3 + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc68.07 | top1acc87.77 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 6fa3c17d + +license: Apache2.0 + +summary: mobilenetv3 is used for cv + +--- + +# MobileNetV3 + +> [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +MobileNet v3 was published in 2019, and this v3 version combines the deep separable convolution of v1, the Inverted Residuals and Linear Bottleneck of v2, and the SE module to search the configuration and parameters of the network using NAS (Neural Architecture Search).MobileNetV3 first uses MnasNet to perform a coarse structure search, and then uses reinforcement learning to select the optimal configuration from a set of discrete choices. Afterwards, MobileNetV3 then fine-tunes the architecture using NetAdapt, which exemplifies NetAdapt's complementary capability to tune underutilized activation channels with a small drop. + +mobilenet-v3 offers two versions, mobilenet-v3 large and mobilenet-v3 small, for situations with different resource requirements. The paper mentions that mobilenet-v3 small, for the imagenet classification task, has an accuracy The paper mentions that mobilenet-v3 small achieves about 3.2% better accuracy and 15% less time than mobilenet-v2 for the imagenet classification task, mobilenet-v3 large achieves about 4.6% better accuracy and 5% less time than mobilenet-v2 for the imagenet classification task, mobilenet-v3 large achieves the same accuracy and 25% faster speedup in COCO compared to v2 The improvement in the segmentation algorithm is also observed.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of MobileNetV3 [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | ------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------- | +| mobilenet_v3_small_100 | 2.55 | 8 | 75 | 224x224 | O2 | 184s | 52.38 | 11454.75 | 68.07 | 87.77 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv3/mobilenet_v3_small_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/mobilenet/mobilenetv3/mobilenet_v3_small_100-6fa3c17d-910v2.ckpt) | +| mobilenet_v3_large_100 | 5.51 | 8 | 75 | 224x224 | O2 | 354s | 55.89 | 10735.37 | 75.59 | 92.57 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv3/mobilenet_v3_large_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/mobilenet/mobilenetv3/mobilenet_v3_large_100-bd4e7bdc-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | ------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------- | +| mobilenet_v3_small_100 | 2.55 | 8 | 75 | 224x224 | O2 | 145s | 48.14 | 12463.65 | 68.10 | 87.86 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv3/mobilenet_v3_small_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv3/mobilenet_v3_small_100-509c6047.ckpt) | +| mobilenet_v3_large_100 | 5.51 | 8 | 75 | 224x224 | O2 | 271s | 47.49 | 12634.24 | 75.23 | 92.31 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv3/mobilenet_v3_large_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv3/mobilenet_v3_large_100-1279ad5f.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/mobilenetv3/mobilenet_v3_small_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/mobilenetv3/mobilenet_v3_small_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. + +```shell +python validate.py -c configs/mobilenetv3/mobilenet_v3_small_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + +[1] Howard A, Sandler M, Chu G, et al. Searching for mobilenetv3[C]//Proceedings of the IEEE/CVF international conference on computer vision. 2019: 1314-1324. diff --git a/mshub_res/assets/mindspore/2.5/mobilevit_xx_small_imagenet2012.md b/mshub_res/assets/mindspore/2.5/mobilevit_xx_small_imagenet2012.md new file mode 100644 index 0000000..7de0153 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/mobilevit_xx_small_imagenet2012.md @@ -0,0 +1,126 @@ +# mobilevit_xx_small + +--- + +model-name: mobilevit_xx_small + +backbone-name: mobilevit + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc67.11 | top5acc87.85 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 6f2745c3 + +license: Apache2.0 + +summary: mobilevit is used for cv + +--- + +# MobileViT + +> [MobileViT:Light-weight, General-purpose, and Mobile-friendly Vision Transformer](https://arxiv.org/pdf/2110.02178.pdf) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +MobileViT, a light-weight and general-purpose vision transformer for mobile devices. MobileViT presents a different perspective for the global processing of information with transformers, i.e., transformers as convolutions. MobileViT significantly outperforms CNN- and ViT-based networks across different tasks and datasets. On the ImageNet-1k dataset, MobileViT achieves top-1 accuracy of 78.4% with about 6 million parameters, which is 3.2% and 6.2% more accurate than MobileNetv3 (CNN-based) and DeIT (ViT-based) for a similar number of parameters. On the MS-COCO object detection task, MobileViT is 5.7% more accurate than MobileNetv3 for a similar number of parameters. + +

+ +

+

+ Figure 1. Architecture of MobileViT [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ------------------ | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ---------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | +| mobilevit_xx_small | 1.27 | 8 | 64 | 256x256 | O2 | 437s | 67.24 | 7614.52 | 67.11 | 87.85 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilevit/mobilevit_xx_small_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/mobilevit/mobilevit_xx_small-6f2745c3-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ------------------ | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ---------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- | +| mobilevit_xx_small | 1.27 | 64 | 8 | 256x256 | O2 | 301s | 53.52 | 9566.52 | 68.91 | 88.91 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilevit/mobilevit_xx_small_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilevit/mobilevit_xx_small-af9da8a0.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/mobilevit/mobilevit_xx_small_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/mobilevit/mobilevit_xx_small_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. + +```shell +python validate.py -c configs/mobilevit/mobilevit_xx_small_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` diff --git a/mshub_res/assets/mindspore/2.5/nasnet_a_4x1056_imagenet2012.md b/mshub_res/assets/mindspore/2.5/nasnet_a_4x1056_imagenet2012.md new file mode 100644 index 0000000..e9d32f1 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/nasnet_a_4x1056_imagenet2012.md @@ -0,0 +1,142 @@ +# nasnet_a_4x1056 + +--- + +model-name: nasnet_a_4x1056 + +backbone-name: nasnet + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc74.12 | top5acc91.36 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 015ba575c + +license: Apache2.0 + +summary: nasnet is used for cv + +--- + +# NasNet + + + +> [Learning Transferable Architectures for Scalable Image Recognition](https://arxiv.org/abs/1707.07012) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + + + + +Neural architecture search (NAS) shows the flexibility on model configuration. By doing neural architecture search in a pooling with convolution layer, max pooling and average pooling layer, +the normal cell and the reduction cell are selected to be part of NasNet. Figure 1 shows NasNet architecture for ImageNet, which are stacked with reduction cell and normal cell. +In conclusion, NasNet could achieve better model performance with fewer model parameters and fewer computation cost on image classification +compared with previous state-of-the-art methods on ImageNet-1K dataset.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of Nasnet [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| --------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ---------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------ | +| nasnet_a_4x1056 | 5.33 | 8 | 256 | 224x224 | O2 | 800s | 364.35 | 5620.97 | 74.12 | 91.36 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/nasnet/nasnet_a_4x1056_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/nasnet/nasnet_a_4x1056-015ba575c-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| --------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ---------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- | +| nasnet_a_4x1056 | 5.33 | 8 | 256 | 224x224 | O2 | 656s | 330.89 | 6189.37 | 73.65 | 91.25 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/nasnet/nasnet_a_4x1056_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/nasnet/nasnet_a_4x1056-0fbb5cdd.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training and validation. + +### Training + + + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/nasnet/nasnet_a_4x1056_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/nasnet/nasnet_a_4x1056_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. + +```shell +python validate.py -c configs/nasnet/nasnet_a_4x1056_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + + + +[1] Zoph B, Vasudevan V, Shlens J, et al. Learning transferable architectures for scalable image recognition[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2018: 8697-8710. diff --git a/mshub_res/assets/mindspore/2.5/pit_ti_imagenet2012.md b/mshub_res/assets/mindspore/2.5/pit_ti_imagenet2012.md new file mode 100644 index 0000000..7fd461f --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/pit_ti_imagenet2012.md @@ -0,0 +1,131 @@ +# pit_ti + +--- + +model-name: pit_ti + +backbone-name: pit + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc73.26 | top5acc91.57 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 33466a0d + +license: Apache2.0 + +summary: pit is used for cv + +--- + +# PiT + +> [PiT: Rethinking Spatial Dimensions of Vision Transformers](https://arxiv.org/abs/2103.16302v2) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +PiT (Pooling-based Vision Transformer) is an improvement of Vision Transformer (ViT) model proposed by Byeongho Heo in 2021. PiT adds pooling layer on the basis of ViT model, so that the spatial dimension of each layer is reduced like CNN, instead of ViT using the same spatial dimension for all layers. PiT achieves the improved model capability and generalization performance against ViT. [[1](#references)] + +

+ + +

+

+ Figure 1. Architecture of PiT [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ---------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------- | +| pit_ti | 4.85 | 8 | 128 | 224x224 | O2 | 212s | 266.47 | 3842.83 | 73.26 | 91.57 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/pit/pit_ti_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/pit/pit_ti-33466a0d-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ---------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------- | +| pit_ti | 4.85 | 8 | 128 | 224x224 | O2 | 192s | 271.50 | 3771.64 | 72.96 | 91.33 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/pit/pit_ti_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/pit/pit_ti-e647a593.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/pit/pit_xs_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/pit/pit_xs_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path with `--ckpt_path`. + +```shell +python validate.py -c configs/pit/pit_xs_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + +[1] Heo B, Yun S, Han D, et al. Rethinking spatial dimensions of vision transformers[C]//Proceedings of the IEEE/CVF International Conference on Computer Vision. 2021: 11936-11945. diff --git a/mshub_res/assets/mindspore/2.5/poolformer_s12_imagenet2012.md b/mshub_res/assets/mindspore/2.5/poolformer_s12_imagenet2012.md new file mode 100644 index 0000000..7b7271a --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/poolformer_s12_imagenet2012.md @@ -0,0 +1,127 @@ +# poolformer_s12 + +--- + +model-name: poolformer_s12 + +backbone-name: poolformer + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc77.49 | top5acc93.55 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: c7e14eea + +license: Apache2.0 + +summary: poolformer is used for cv + +--- + +# PoolFormer + +> [MetaFormer Is Actually What You Need for Vision](https://arxiv.org/abs/2111.11418) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +Instead of designing complicated token mixer to achieve SOTA performance, the target of this work is to demonstrate the competence of Transformer models largely stem from the general architecture MetaFormer. Pooling/PoolFormer are just the tools to support the authors' claim. + +![MetaFormer](https://user-images.githubusercontent.com/74176172/210046827-c218f5d3-1ee8-47bf-a78a-482d821ece89.png) +Figure 1. MetaFormer and performance of MetaFormer-based models on ImageNet-1K validation set. The authors argue that the competence of Transformer/MLP-like models primarily stem from the general architecture MetaFormer instead of the equipped specific token mixers. To demonstrate this, they exploit an embarrassingly simple non-parametric operator, pooling, to conduct extremely basic token mixing. Surprisingly, the resulted model PoolFormer consistently outperforms the DeiT and ResMLP as shown in (b), which well supports that MetaFormer is actually what we need to achieve competitive performance. RSB-ResNet in (b) means the results are from “ResNet Strikes Back” where ResNet is trained with improved training procedure for 300 epochs. + +![PoolFormer](https://user-images.githubusercontent.com/74176172/210046845-6caa1574-b6a4-47f3-8298-c8ca3b4f8fa4.png) +Figure 2. (a) The overall framework of PoolFormer. (b) The architecture of PoolFormer block. Compared with Transformer block, it replaces attention with an extremely simple non-parametric operator, pooling, to conduct only basic token mixing.[[1](#references)] + +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| -------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------- | +| poolformer_s12 | 11.92 | 8 | 128 | 224x224 | O2 | 177s | 211.81 | 4834.52 | 77.49 | 93.55 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/poolformer/poolformer_s12_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/poolformer/poolformer_s12-c7e14eea-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| -------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------ | +| poolformer_s12 | 11.92 | 8 | 128 | 224x224 | O2 | 118s | 220.13 | 4651.80 | 77.33 | 93.34 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/poolformer/poolformer_s12_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/poolformer/poolformer_s12-5be5c4e4.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/poolformer/poolformer_s12_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/poolformer/poolformer_s12_ascend.yaml --data_dir /path/to/imagenet --distribute False + ``` + +### Validation + +```text +validation of poolformer has to be done in amp O3 mode which is not supported, coming soon... +``` + +## References + +[1]. Yu W, Luo M, Zhou P, et al. Metaformer is actually what you need for vision[C]//Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2022: 10819-10829. diff --git a/mshub_res/assets/mindspore/2.5/pvt_tiny_imagenet2012.md b/mshub_res/assets/mindspore/2.5/pvt_tiny_imagenet2012.md new file mode 100644 index 0000000..4aaf579 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/pvt_tiny_imagenet2012.md @@ -0,0 +1,139 @@ +# pvt_tiny + +--- + +model-name: pvt_tiny + +backbone-name: pvt + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc74.88 | top5acc92.12 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 6676051f + +license: Apache2.0 + +summary: pvt is used for cv + +--- + +# Pyramid Vision Transformer + +> [Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions](https://arxiv.org/abs/2102.12122) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +PVT is a general backbone network for dense prediction without convolution operation. PVT introduces a pyramid structure +in Transformer to generate multi-scale feature maps for dense prediction tasks. PVT uses a gradual reduction strategy to +control the size of the feature maps through the patch embedding layer, and proposes a spatial reduction attention (SRA) +layer to replace the traditional multi head attention layer in the encoder, which greatly reduces the computing/memory +overhead.[[1](#references)] + +![PVT](https://user-images.githubusercontent.com/74176172/210046926-2322161b-a963-4603-b3cb-86ecdca41262.png) + +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------- | +| pvt_tiny | 13.23 | 8 | 128 | 224x224 | O2 | 212s | 237.5 | 4311.58 | 74.88 | 92.12 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/pvt/pvt_tiny_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/pvt/pvt_tiny-6676051f-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------- | +| pvt_tiny | 13.23 | 8 | 128 | 224x224 | O2 | 192s | 229.63 | 4459.35 | 74.81 | 92.18 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/pvt/pvt_tiny_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/pvt/pvt_tiny-6abb953d.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training +and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple + Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/pvt/pvt_tiny_ascend.yaml --data_dir /path/to/imagenet + ``` + + > If use Ascend 910 devices, need to open SATURATION_MODE via `export MS_ASCEND_CHECK_OVERFLOW_MODE="SATURATION_MODE"` + + For detailed illustration of all hyper-parameters, please refer + to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep + the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/pvt/pvt_tiny_ascend.yaml --data_dir /path/to/imagenet --distribute False + ``` + + > If use Ascend 910 devices, need to open SATURATION_MODE via `export MS_ASCEND_CHECK_OVERFLOW_MODE="SATURATION_MODE"` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path +with `--ckpt_path`. + +```shell +python validate.py --model=pvt_tiny --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + +[1]. Wang W, Xie E, Li X, et al. Pyramid vision transformer: A versatile backbone for dense prediction without +convolutions[C]//Proceedings of the IEEE/CVF International Conference on Computer Vision. 2021: 568-578. diff --git a/mshub_res/assets/mindspore/2.5/pvt_v2_b0_imagenet2012.md b/mshub_res/assets/mindspore/2.5/pvt_v2_b0_imagenet2012.md new file mode 100644 index 0000000..c57fc1e --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/pvt_v2_b0_imagenet2012.md @@ -0,0 +1,140 @@ +# pvt_v2_b0 + +--- + +model-name: pvt_v2_b0 + +backbone-name: pvt_v2 + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc71.25 | top5acc90.50 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: d9cd9d6a + +license: Apache2.0 + +summary: pvt_v2 is used for cv + +--- + +# PVTV2 + +> [PVT v2: Improved Baselines with Pyramid Vision Transformer](https://arxiv.org/abs/2106.13797) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +In this work, the authors present new baselines by improving the original Pyramid Vision Transformer (PVT v1) by adding +three designs, including (1) linear complexity attention layer, (2) overlapping patch embedding, and (3) convolutional +feed-forward network. With these modifications, PVT v2 reduces the computational complexity of PVT v1 to linear and +achieves significant improvements on fundamental vision tasks such as classification, detection, and +segmentation.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of PVTV2 [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | --------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------- | +| pvt_v2_b0 | 3.67 | 8 | 128 | 224x224 | O2 | 323s | 255.76 | 4003.75 | 71.25 | 90.50 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/pvtv2/pvt_v2_b0_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/pvt_v2/pvt_v2_b0-d9cd9d6a-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | --------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------- | +| pvt_v2_b0 | 3.67 | 8 | 128 | 224x224 | O2 | 269s | 269.38 | 3801.32 | 71.50 | 90.60 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/pvtv2/pvt_v2_b0_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/pvt_v2/pvt_v2_b0-1c4f6683.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training +and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple + Ascend 910 devices, please run + + ```shell + # distributed training on multiple GPU/Ascend devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/pvtv2/pvt_v2_b0_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer + to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to + keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/pvtv2/pvt_v2_b0_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path +with `--ckpt_path`. + +```shell +python validate.py -c configs/pvtv2/pvt_v2_b0_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + +[1] Wang W, Xie E, Li X, et al. Pvt v2: Improved baselines with pyramid vision transformer[J]. Computational Visual +Media, 2022, 8(3): 415-424. diff --git a/mshub_res/assets/mindspore/2.5/regnet_x_800mf_imagenet2012.md b/mshub_res/assets/mindspore/2.5/regnet_x_800mf_imagenet2012.md new file mode 100644 index 0000000..abdceb0 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/regnet_x_800mf_imagenet2012.md @@ -0,0 +1,144 @@ +# regnet_x_800mf + +--- + +model-name: regnet_x_800mf + +backbone-name: regnet + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc76.11 | top5acc93.00 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 68fe1cca + +license: Apache2.0 + +summary: regnet is used for cv + +--- + +# RegNet + +> [Designing Network Design Spaces](https://arxiv.org/abs/2003.13678) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +In this work, the authors present a new network design paradigm that combines the advantages of manual design and NAS. +Instead of focusing on designing individual network instances, they design design spaces that parametrize populations of +networks. Like in manual design, the authors aim for interpretability and to discover general design principles that +describe networks that are simple, work well, and generalize across settings. Like in NAS, the authors aim to take +advantage of semi-automated procedures to help achieve these goals The general strategy they adopt is to progressively +design simplified versions of an initial, relatively unconstrained, design space while maintaining or improving its +quality. The overall process is analogous to manual design, elevated to the population level and guided via distribution +estimates of network design spaces. As a testbed for this paradigm, their focus is on exploring network structure (e.g., +width, depth, groups, etc.) assuming standard model families including VGG, ResNet, and ResNeXt. The authors start with +a relatively unconstrained design space they call AnyNet (e.g., widths and depths vary freely across stages) and apply +their humanin-the-loop methodology to arrive at a low-dimensional design space consisting of simple “regular” networks, +that they call RegNet. The core of the RegNet design space is simple: stage widths and depths are determined by a +quantized linear function. Compared to AnyNet, the RegNet design space has simpler models, is easier to interpret, and +has a higher concentration of good models.[[1](#references)] + +![RegNet](https://user-images.githubusercontent.com/74176172/210046899-4e83bb56-f7f6-49b2-9dde-dce200428e92.png) + +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| -------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | --------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------- | +| regnet_x_800mf | 7.26 | 8 | 64 | 224x224 | O2 | 228s | 50.74 | 10090.66 | 76.11 | 93.00 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/regnet/regnet_x_800mf_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/regnet/regnet_x_800mf-68fe1cca-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| -------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | --------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------- | +| regnet_x_800mf | 7.26 | 8 | 64 | 224x224 | O2 | 99s | 42.49 | 12049.89 | 76.04 | 92.97 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/regnet/regnet_x_800mf_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/regnet/regnet_x_800mf-617227f4.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training +and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple + Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/regnet/regnet_x_800mf_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer + to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep + the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/regnet/regnet_x_800mf_ascend.yaml --data_dir /path/to/imagenet --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path +with `--ckpt_path`. + +```shell +python validate.py --model=regnet_x_800mf --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + +[1]. Radosavovic I, Kosaraju R P, Girshick R, et al. Designing network design spaces[C]//Proceedings of the IEEE/CVF +conference on computer vision and pattern recognition. 2020: 10428-10436. diff --git a/mshub_res/assets/mindspore/2.5/repvgg_a0_imagenet2012.md b/mshub_res/assets/mindspore/2.5/repvgg_a0_imagenet2012.md new file mode 100644 index 0000000..0595305 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/repvgg_a0_imagenet2012.md @@ -0,0 +1,152 @@ +# repvgg_a0 + +--- + +model-name: repvgg_a0 + +backbone-name: repvgg + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc72.29 | top5acc90.78 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: b67a9f15 + +license: Apache2.0 + +summary: repvgg is used for cv + +--- + +# RepVGG + + + +> [RepVGG: Making VGG-style ConvNets Great Again](https://arxiv.org/abs/2101.03697) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + + + + +The key idea of Repvgg is that by using re-parameterization, the model architecture could be trained in multi-branch but +validated in single branch. +Figure 1 shows the basic model architecture of Repvgg. By utilizing different values for a and b, we could get various +repvgg models. +Repvgg could achieve better model performance with smaller model parameters on ImageNet-1K dataset compared with +previous methods.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of Repvgg [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | ---------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------- | +| repvgg_a0 | 9.13 | 8 | 32 | 224x224 | O2 | 76s | 24.12 | 10613.60 | 72.29 | 90.78 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/repvgg/repvgg_a0_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/repvgg/repvgg_a0-b67a9f15-910v2.ckpt) | +| repvgg_a1 | 14.12 | 8 | 32 | 224x224 | O2 | 81s | 28.29 | 9096.13 | 73.68 | 91.51 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/repvgg/repvgg_a1_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/repvgg/repvgg_a1-a40aa623-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | ---------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------- | +| repvgg_a0 | 9.13 | 8 | 32 | 224x224 | O2 | 50s
| 20.58 | 12439.26 | 72.19 | 90.75 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/repvgg/repvgg_a0_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/repvgg/repvgg_a0-6e71139d.ckpt) | +| repvgg_a1 | 14.12 | 8 | 32 | 224x224 | O2 | 29s | 20.70 | 12367.15 | 74.19 | 91.89 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/repvgg/repvgg_a1_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/repvgg/repvgg_a1-539513ac.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training +and validation. + +### Training + + + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple + Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/repvgg/repvgg_a1_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer + to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to + keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/repvgg/repvgg_a1_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path +with `--ckpt_path`. + +```shell +python validate.py -c configs/repvgg/repvgg_a1_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + + + +[1] Ding X, Zhang X, Ma N, et al. Repvgg: Making vgg-style convnets great again[C]//Proceedings of the IEEE/CVF +conference on computer vision and pattern recognition. 2021: 13733-13742. diff --git a/mshub_res/assets/mindspore/2.5/repvgg_a1_imagenet2012.md b/mshub_res/assets/mindspore/2.5/repvgg_a1_imagenet2012.md new file mode 100644 index 0000000..93b9731 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/repvgg_a1_imagenet2012.md @@ -0,0 +1,152 @@ +# repvgg_a1 + +--- + +model-name: repvgg_a1 + +backbone-name: repvgg + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc73.68 | top5acc91.51 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: a40aa623 + +license: Apache2.0 + +summary: repvgg is used for cv + +--- + +# RepVGG + + + +> [RepVGG: Making VGG-style ConvNets Great Again](https://arxiv.org/abs/2101.03697) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + + + + +The key idea of Repvgg is that by using re-parameterization, the model architecture could be trained in multi-branch but +validated in single branch. +Figure 1 shows the basic model architecture of Repvgg. By utilizing different values for a and b, we could get various +repvgg models. +Repvgg could achieve better model performance with smaller model parameters on ImageNet-1K dataset compared with +previous methods.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of Repvgg [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | ---------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------- | +| repvgg_a0 | 9.13 | 8 | 32 | 224x224 | O2 | 76s | 24.12 | 10613.60 | 72.29 | 90.78 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/repvgg/repvgg_a0_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/repvgg/repvgg_a0-b67a9f15-910v2.ckpt) | +| repvgg_a1 | 14.12 | 8 | 32 | 224x224 | O2 | 81s | 28.29 | 9096.13 | 73.68 | 91.51 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/repvgg/repvgg_a1_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/repvgg/repvgg_a1-a40aa623-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | ---------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------- | +| repvgg_a0 | 9.13 | 8 | 32 | 224x224 | O2 | 50s
| 20.58 | 12439.26 | 72.19 | 90.75 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/repvgg/repvgg_a0_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/repvgg/repvgg_a0-6e71139d.ckpt) | +| repvgg_a1 | 14.12 | 8 | 32 | 224x224 | O2 | 29s | 20.70 | 12367.15 | 74.19 | 91.89 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/repvgg/repvgg_a1_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/repvgg/repvgg_a1-539513ac.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training +and validation. + +### Training + + + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple + Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/repvgg/repvgg_a1_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer + to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to + keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/repvgg/repvgg_a1_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path +with `--ckpt_path`. + +```shell +python validate.py -c configs/repvgg/repvgg_a1_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + + + +[1] Ding X, Zhang X, Ma N, et al. Repvgg: Making vgg-style convnets great again[C]//Proceedings of the IEEE/CVF +conference on computer vision and pattern recognition. 2021: 13733-13742. diff --git a/mshub_res/assets/mindspore/2.5/res2net50_imagenet2012.md b/mshub_res/assets/mindspore/2.5/res2net50_imagenet2012.md new file mode 100644 index 0000000..8f2aed8 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/res2net50_imagenet2012.md @@ -0,0 +1,141 @@ +# res2net50 + +--- + +model-name: res2net50 + +backbone-name: res2net + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc79.33 | top5acc94.64 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: aa758355 + +license: Apache2.0 + +summary: res2net is used for cv + +--- + +# Res2Net + +> [Res2Net: A New Multi-scale Backbone Architecture](https://arxiv.org/abs/1904.01169) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +Res2Net is a novel building block for CNNs proposed by constructing hierarchical residual-like connections within one +single residual block. The Res2Net represents multi-scale features at a granular level and increases the range of +receptive fields for each network layer. Res2Net block can be plugged into the state-of-the-art backbone CNN models, +e.g., ResNet, ResNeXt, and DLA. Ablation studies and experimental results on representative computer vision tasks, i.e., +object detection, class activation mapping, and salient object detection, verify the superiority of the Res2Net over the +state-of-the-art baseline methods such as ResNet-50, DLA-60 and etc. + +

+ +

+

+ Figure 1. Architecture of Res2Net [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------ | +| res2net50 | 25.76 | 8 | 32 | 224x224 | O2 | 174s | 39.6 | 6464.65 | 79.33 | 94.64 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/res2net/res2net_50_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/res2net/res2net50-aa758355-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------- | +| res2net50 | 25.76 | 8 | 32 | 224x224 | O2 | 119s | 39.68 | 6451.61 | 79.35 | 94.64 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/res2net/res2net_50_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/res2net/res2net50-f42cf71b.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training +and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple + Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/res2net/res2net_50_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer + to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to + keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/res2net/res2net_50_ascend.yaml --data_dir /path/to/imagenet --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path +with `--ckpt_path`. + +```shell +python validate.py -c configs/res2net/res2net_50_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + +[1] Gao S H, Cheng M M, Zhao K, et al. Res2net: A new multi-scale backbone architecture[J]. IEEE transactions on pattern +analysis and machine intelligence, 2019, 43(2): 652-662. diff --git a/mshub_res/assets/mindspore/2.5/resnet50_imagenet2012.md b/mshub_res/assets/mindspore/2.5/resnet50_imagenet2012.md new file mode 100644 index 0000000..dbb256a --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/resnet50_imagenet2012.md @@ -0,0 +1,139 @@ +# resnet50 + +--- + +model-name: resnet50 + +backbone-name: resnet + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc76.76 | top5acc93.31 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: f369a08d + +license: Apache2.0 + +summary: resnet is used for cv + +--- + +# ResNet + +> [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +Resnet is a residual learning framework to ease the training of networks that are substantially deeper than those used +previously, which is explicitly reformulated that the layers are learning residual functions with reference to the layer +inputs, instead of learning unreferenced functions. Lots of comprehensive empirical evidence showing that these residual +networks are easier to optimize, and can gain accuracy from considerably increased depth. + +

+ +

+

+ Figure 1. Architecture of ResNet [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ---------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------- | +| resnet50 | 25.61 | 8 | 32 | 224x224 | O2 | 77s | 31.9 | 8025.08 | 76.76 | 93.31 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/resnet/resnet_50_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/resnet/resnet50-f369a08d-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ---------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------- | +| resnet50 | 25.61 | 8 | 32 | 224x224 | O2 | 43s | 31.41 | 8150.27 | 76.69 | 93.50 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/resnet/resnet_50_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/resnet/resnet50-e0733ab8.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training +and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple + Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/resnet/resnet_18_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer + to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to + keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/resnet/resnet_18_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path +with `--ckpt_path`. + +```shell +python validate.py -c configs/resnet/resnet_18_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + +[1] He K, Zhang X, Ren S, et al. Deep residual learning for image recognition[C]//Proceedings of the IEEE conference on +computer vision and pattern recognition. 2016: 770-778. diff --git a/mshub_res/assets/mindspore/2.5/resnetv2_50_imagenet2012.md b/mshub_res/assets/mindspore/2.5/resnetv2_50_imagenet2012.md new file mode 100644 index 0000000..a1b2fc7 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/resnetv2_50_imagenet2012.md @@ -0,0 +1,139 @@ +# resnetv2_50 + +--- + +model-name: resnetv2_50 + +backbone-name: resnetv2 + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc77.03 | top5acc93.29 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: a0b9f7f8 + +license: Apache2.0 + +summary: resnetv2 is used for cv + +--- + +# ResNetV2 + +> [Identity Mappings in Deep Residual Networks](https://arxiv.org/abs/1603.05027) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +Author analyzes the propagation formulations behind the residual building blocks, which suggest that the forward and +backward signals can be directly propagated from one block +to any other block, when using identity mappings as the skip connections and after-addition activation. + +

+ +

+

+ Figure 1. Architecture of ResNetV2 [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ----------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | -------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------- | +| resnetv2_50 | 25.60 | 8 | 32 | 224x224 | O2 | 120s | 32.19 | 7781.16 | 77.03 | 93.29 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/resnetv2/resnetv2_50_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/resnetv2/resnetv2_50-a0b9f7f8-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ----------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | -------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------- | +| resnetv2_50 | 25.60 | 8 | 32 | 224x224 | O2 | 52s | 32.66 | 7838.33 | 76.90 | 93.37 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/resnetv2/resnetv2_50_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/resnetv2/resnetv2_50-3c2f143b.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training +and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple + Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/resnetv2/resnetv2_50_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer + to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to + keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/resnetv2/resnetv2_50_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path +with `--ckpt_path`. + +```shell +python validate.py -c configs/resnetv2/resnetv2_50_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + +[1] He K, Zhang X, Ren S, et al. Identity mappings in deep residual networks[C]//Computer Vision–ECCV 2016: 14th +European Conference, Amsterdam, The Netherlands, October 11–14, 2016, Proceedings, Part IV 14. Springer International +Publishing, 2016: 630-645. diff --git a/mshub_res/assets/mindspore/2.5/resnext50_32x4d_imagenet2012.md b/mshub_res/assets/mindspore/2.5/resnext50_32x4d_imagenet2012.md new file mode 100644 index 0000000..dfcca9e --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/resnext50_32x4d_imagenet2012.md @@ -0,0 +1,142 @@ +# resnext50_32x4d + +--- + +model-name: resnext50_32x4d + +backbone-name: resnext + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc78.64 | top5acc94.18 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 988f75bc + +license: Apache2.0 + +summary: resnext is used for cv + +--- + +# ResNeXt + +> [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/abs/1611.05431) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +The authors present a simple, highly modularized network architecture for image classification. The network is +constructed by repeating a building block that aggregates a set of transformations with the same topology. The simple +design results in a homogeneous, multi-branch architecture that has only a few hyper-parameters to set. This strategy +exposes a new dimension, which the authors call "cardinality" (the size of the set of transformations), as an essential +factor in addition to the dimensions of depth and width. On the ImageNet-1K dataset, the authors empirically show that +even under the restricted condition of maintaining complexity, increasing cardinality is able to improve classification +accuracy.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of ResNeXt [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| --------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ----------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------ | +| resnext50_32x4d | 25.10 | 8 | 32 | 224x224 | O2 | 156s | 44.61 | 5738.62 | 78.64 | 94.18 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/resnext/resnext50_32x4d_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/resnext/resnext50_32x4d-988f75bc-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| --------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ----------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------- | +| resnext50_32x4d | 25.10 | 8 | 32 | 224x224 | O2 | 49s | 37.22 | 6878.02 | 78.53 | 94.10 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/resnext/resnext50_32x4d_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/resnext/resnext50_32x4d-af8aba16.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training +and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple + Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/resnext/resnext50_32x4d_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer + to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to + keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/resnext/resnext50_32x4d_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path +with `--ckpt_path`. + +```shell +python validate.py -c configs/resnext/resnext50_32x4d_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + +[1] Xie S, Girshick R, Dollár P, et al. Aggregated residual transformations for deep neural networks[C]//Proceedings of +the IEEE conference on computer vision and pattern recognition. 2017: 1492-1500. diff --git a/mshub_res/assets/mindspore/2.5/rexnet_09_imagenet2012.md b/mshub_res/assets/mindspore/2.5/rexnet_09_imagenet2012.md new file mode 100644 index 0000000..6a7736e --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/rexnet_09_imagenet2012.md @@ -0,0 +1,129 @@ +# rexnet_09 + +--- + +model-name: rexnet_09 + +backbone-name: rexnet + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc76.14 | top5acc92.96 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 00223eb4 + +license: Apache2.0 + +summary: rexnet is used for cv + +--- + +# ReXNet + +> [ReXNet: Rethinking Channel Dimensions for Efficient Model Design](https://arxiv.org/abs/2007.00992) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +ReXNets is a new model achieved based on parameterization. It utilizes a new search method for a channel configuration +via piece-wise linear functions of block index. The search space contains the conventions, and an effective channel +configuration that can be parameterized by a linear function of the block index is used. ReXNets outperforms the recent +lightweight models including NAS-based models and further showed remarkable fine-tuning performances on COCO object +detection, instance segmentation, and fine-grained classifications. + +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +_coming soon_ + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +_coming soon_ + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training +and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple + Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/rexnet/rexnet_x09_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer + to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to + keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/rexnet/rexnet_x09_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path +with `--ckpt_path`. + +```shell +python validate.py -c configs/rexnet/rexnet_x09_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + +[1] Han D, Yun S, Heo B, et al. Rethinking channel dimensions for efficient model design[C]//Proceedings of the IEEE/CVF +conference on Computer Vision and Pattern Recognition. 2021: 732-741. diff --git a/mshub_res/assets/mindspore/2.5/seresnet18_imagenet2012.md b/mshub_res/assets/mindspore/2.5/seresnet18_imagenet2012.md new file mode 100644 index 0000000..53da02d --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/seresnet18_imagenet2012.md @@ -0,0 +1,141 @@ +# seresnet18 + +--- + +model-name: seresnet18 + +backbone-name: resnet + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc72.05 | top5acc90.59 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 7b971c78 + +license: Apache2.0 + +summary: senet is used for cv + +--- + +# SENet + +> [Squeeze-and-Excitation Networks](https://arxiv.org/abs/1709.01507) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +In this work, the authors focus instead on the channel relationship and propose a novel architectural unit, which the +authors term the "Squeeze-and-Excitation" (SE) block, that adaptively recalibrates channel-wise feature responses by +explicitly modelling interdependencies between channels. The results show that these blocks can be stacked together to +form SENet architectures that generalise extremely effectively across different datasets. The authors further +demonstrate that SE blocks bring significant improvements in performance for existing state-of-the-art CNNs at slight +additional computational cost.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of SENet [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | ---------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------- | +| seresnet18 | 11.80 | 8 | 64 | 224x224 | O2 | 90s | 51.09 | 10021.53 | 72.05 | 90.59 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/senet/seresnet18_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/senet/seresnet18-7b971c78-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | ---------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------- | +| seresnet18 | 11.80 | 8 | 64 | 224x224 | O2 | 43s | 44.40 | 11531.53 | 71.81 | 90.49 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/senet/seresnet18_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/senet/seresnet18-7880643b.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training +and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple + Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/senet/seresnet50_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer + to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to + keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/senet/seresnet50_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path +with `--ckpt_path`. + +```shell +python validate.py -c configs/senet/seresnet50_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + +[1] Hu J, Shen L, Sun G. Squeeze-and-excitation networks[C]//Proceedings of the IEEE conference on computer vision and +pattern recognition. 2018: 7132-7141. diff --git a/mshub_res/assets/mindspore/2.5/shufflenet_v1_g3_05_imagenet2012.md b/mshub_res/assets/mindspore/2.5/shufflenet_v1_g3_05_imagenet2012.md new file mode 100644 index 0000000..56b3ec6 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/shufflenet_v1_g3_05_imagenet2012.md @@ -0,0 +1,140 @@ +# shufflenet_v1_g3_05 + +--- + +model-name: shufflenet_v1_g3_05 + +backbone-name: shufflenetv1 + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc57.08 | top5acc79.89 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 56209ef3 + +license: Apache2.0 + +summary: shufflenetv1 is used for cv + +--- + +# ShuffleNetV1 + +> [ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices](https://arxiv.org/abs/1707.01083) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +ShuffleNet is a computationally efficient CNN model proposed by KuangShi Technology in 2017, which, like MobileNet and +SqueezeNet, etc., is mainly intended to be applied to mobile. ShuffleNet uses two operations at its core: pointwise +group convolution and channel shuffle, which greatly reduces the model computation while maintaining accuracy. +ShuffleNet designs more efficient network structures to achieve smaller and faster models, instead of compressing or +migrating a large trained model. + +

+ +

+

+ Figure 1. Architecture of ShuffleNetV1 [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ------------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | ------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------- | +| shufflenet_v1_g3_05 | 0.73 | 8 | 64 | 224x224 | O2 | 191s | 47.77 | 10718.02 | 57.08 | 79.89 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/shufflenetv1/shufflenet_v1_0.5_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/shufflenet/shufflenetv1/shufflenet_v1_g3_05-56209ef3-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ------------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | ------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------ | +| shufflenet_v1_g3_05 | 0.73 | 8 | 64 | 224x224 | O2 | 169s | 40.62 | 12604.63 | 57.05 | 79.73 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/shufflenetv1/shufflenet_v1_0.5_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/shufflenet/shufflenetv1/shufflenet_v1_g3_05-42cfe109.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training +and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple + Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/shufflenetv1/shufflenet_v1_0.5_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer + to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to + keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/shufflenetv1/shufflenet_v1_0.5_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path +with `--ckpt_path`. + +```shell +python validate.py -c configs/shufflenetv1/shufflenet_v1_0.5_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + +[1] Zhang X, Zhou X, Lin M, et al. Shufflenet: An extremely efficient convolutional neural network for mobile devices[C] +//Proceedings of the IEEE conference on computer vision and pattern recognition. 2018: 6848-6856. diff --git a/mshub_res/assets/mindspore/2.5/shufflenet_v2_x0_5_imagenet2012.md b/mshub_res/assets/mindspore/2.5/shufflenet_v2_x0_5_imagenet2012.md new file mode 100644 index 0000000..6fa6c9f --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/shufflenet_v2_x0_5_imagenet2012.md @@ -0,0 +1,152 @@ +# shufflenet_v2_x0_5 + +--- + +model-name: shufflenet_v2_x0_5 + +backbone-name: shufflenetv2 + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc60.65 | top5acc82.26 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 39d05bb6 + +license: Apache2.0 + +summary: shufflenetv2 is used for cv + +--- + +# ShuffleNetV2 + +> [ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design](https://arxiv.org/abs/1807.11164) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +A key point was raised in ShuffleNetV2, where previous lightweight networks were guided by computing an indirect measure +of network complexity, namely FLOPs. The speed of lightweight networks is described by calculating the amount of +floating point operations. But the speed of operation was never considered directly. The running speed in mobile devices +needs to consider not only FLOPs, but also other factors such as memory accesscost and platform characterics. + +Therefore, based on these two principles, ShuffleNetV2 proposes four effective network design principles. + +- MAC is minimized when the input feature matrix of the convolutional layer is equal to the output feature + matrixchannel (when FLOPs are kept constant). +- MAC increases when the groups of GConv increase (while keeping FLOPs constant). +- the higher the fragmentation of the network design, the slower the speed. +- The impact of Element-wise operation is not negligible. + +

+ +

+

+ Figure 1. Architecture Design in ShuffleNetV2 [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ------------------ | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | ------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------- | +| shufflenet_v2_x0_5 | 1.37 | 8 | 64 | 224x224 | O2 | 100s | 47.32 | 10819.95 | 60.65 | 82.26 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/shufflenetv2/shufflenet_v2_0.5_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/shufflenet/shufflenetv2/shufflenet_v2_x0_5-39d05bb6-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ------------------ | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | ------------------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------- | +| shufflenet_v2_x0_5 | 1.37 | 8 | 64 | 224x224 | O2 | 62s | 41.87 | 12228.33 | 60.53 | 82.11 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/shufflenetv2/shufflenet_v2_0.5_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/shufflenet/shufflenetv2/shufflenet_v2_x0_5-8c841061.ckpt) | + +### Notes + +- All models are trained on ImageNet-1K training set and the top-1 accuracy is reported on the validatoin set. +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training +and validation. + +### Training + + + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple + Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/shufflenetv2/shufflenet_v2_0.5_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer + to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to + keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/shufflenetv2/shufflenet_v2_0.5_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path +with `--ckpt_path`. + +```shell +python validate.py -c configs/shufflenetv2/shufflenet_v2_0.5_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + + + +[1] Ma N, Zhang X, Zheng H T, et al. Shufflenet v2: Practical guidelines for efficient cnn architecture design[C] +//Proceedings of the European conference on computer vision (ECCV). 2018: 116-131. diff --git a/mshub_res/assets/mindspore/2.5/skresnet18_imagenet2012.md b/mshub_res/assets/mindspore/2.5/skresnet18_imagenet2012.md new file mode 100644 index 0000000..69610e5 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/skresnet18_imagenet2012.md @@ -0,0 +1,168 @@ +# skresnet18 + +--- + +model-name: skresnet18 + +backbone-name: sknet + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc72.85 | top5acc90.83 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 9d8b1afc + +license: Apache2.0 + +summary: sknet is used for cv + +--- + +# SKNet + +> [Selective Kernel Networks](https://arxiv.org/pdf/1903.06586) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +The local receptive fields (RFs) of neurons in the primary visual cortex (V1) of cats [[1](#references)] have inspired +the +construction of Convolutional Neural Networks (CNNs) [[2](#references)] in the last century, and it continues to inspire +modern CNN +structure construction. For instance, it is well-known that in the visual cortex, the RF sizes of neurons in the +same area (e.g.,V1 region) are different, which enables the neurons to collect multi-scale spatial information in the +same processing stage. This mechanism has been widely adopted in recent Convolutional Neural Networks (CNNs). +A typical example is InceptionNets [[3](#references), [4](#references), [5](#references), [6](#references)], in which a +simple concatenation is designed to aggregate +multi-scale information from, e.g., 3×3, 5×5, 7×7 convolutional kernels inside the “inception” building block. + +

+ +

+

+ Figure 1. Selective Kernel Convolution. +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | ---------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------- | +| skresnet18 | 11.97 | 8 | 64 | 224x224 | O2 | 134s | 49.83 | 10274.93 | 72.85 | 90.83 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/sknet/skresnet18_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/sknet/skresnet18-9d8b1afc-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | ---------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------- | +| skresnet18 | 11.97 | 8 | 64 | 224x224 | O2 | 60s | 45.84 | 11169.28 | 73.09 | 91.20 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/sknet/skresnet18_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/sknet/skresnet18-868228e5.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training +and validation. + +### Training + + + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple + Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/sknet/skresnext50_32x4d_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer + to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to + keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/sknet/skresnext50_32x4d_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path +with `--ckpt_path`. + +```shell +python validate.py -c configs/sknet/skresnext50_32x4d_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + + + +[1] D. H. Hubel and T. N. Wiesel. Receptive fields, binocular interaction and functional architecture in the cat’s +visual +cortex. The Journal of Physiology, 1962. + +[2] Y . LeCun, B. Boser, J. S. Denker, D. Henderson, R. E. Howard, W. Hubbard, and L. D. Jackel. Backpropagation +applied to handwritten zip code recognition. Neural Computation, 1989. + +[3] C. Szegedy, V . V anhoucke, S. Ioffe, J. Shlens, and Z. Wojna. Rethinking the inception architecture for computer +vision. In +CVPR, 2016. + +[4] S. Ioffe and C. Szegedy. Batch normalization: Accelerating deep network training by reducing internal covariate +shift. +arXiv preprint arXiv:1502.03167, 2015. + +[5] C. Szegedy, V . V anhoucke, S. Ioffe, J. Shlens, and Z. Wojna. Rethinking the inception architecture for computer +vision. In +CVPR, 2016. + +[6] C. Szegedy, S. Ioffe, V . V anhoucke, and A. A. Alemi. Inception-v4, inception-resnet and the impact of residual +connections on learning. In AAAI, 2017. diff --git a/mshub_res/assets/mindspore/2.5/squeezenet1_0_imagenet2012.md b/mshub_res/assets/mindspore/2.5/squeezenet1_0_imagenet2012.md new file mode 100644 index 0000000..c5f4dd2 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/squeezenet1_0_imagenet2012.md @@ -0,0 +1,146 @@ +# squeezenet1_0 + +--- + +model-name: squeezenet1_0 + +backbone-name: squeezenet + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc58.75 | top5acc80.76 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 24010b28 + +license: Apache2.0 + +summary: squeezenet is used for cv + +--- + +# SqueezeNet + +> [SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size](https://arxiv.org/abs/1602.07360) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +SqueezeNet is a smaller CNN architectures which is comprised mainly of Fire modules and it achieves AlexNet-level +accuracy on ImageNet with 50x fewer parameters. SqueezeNet can offer at least three advantages: (1) Smaller CNNs require +less communication across servers during distributed training. (2) Smaller CNNs require less bandwidth to export a new +model from the cloud to an autonomous car. (3) Smaller CNNs are more feasible to deploy on FPGAs and other hardware with +limited memory. Additionally, with model compression techniques, SqueezeNet is able to be compressed to less than +0.5MB (510× smaller than AlexNet). Blow is macroarchitectural view of SqueezeNet architecture. Left: SqueezeNet ; +Middle: SqueezeNet with simple bypass; Right: SqueezeNet with complex bypass. + +

+ +

+

+ Figure 1. Architecture of SqueezeNet [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | ------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------- | +| squeezenet1_0 | 1.25 | 8 | 32 | 224x224 | O2 | 64s | 23.48 | 10902.90 | 58.75 | 80.76 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/squeezenet/squeezenet_1.0_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/squeezenet/squeezenet1_0-24010b28-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | -------- | -------- | -------- | ------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------- | +| squeezenet1_0 | 1.25 | 8 | 32 | 224x224 | O2 | 45s | 22.36 | 11449.02 | 58.67 | 80.61 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/squeezenet/squeezenet_1.0_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/squeezenet/squeezenet1_0-eb911778.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training +and validation. + +### Training + + + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple + Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/squeezenet/squeezenet_1.0_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer + to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to + keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/squeezenet/squeezenet_1.0_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path +with `--ckpt_path`. + +```shell +python validate.py -c configs/squeezenet/squeezenet_1.0_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + + + +[1] Iandola F N, Han S, Moskewicz M W, et al. SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and< 0.5 MB +model size[J]. arXiv preprint arXiv:1602.07360, 2016. diff --git a/mshub_res/assets/mindspore/2.5/swin_tiny_imagenet2012.md b/mshub_res/assets/mindspore/2.5/swin_tiny_imagenet2012.md new file mode 100644 index 0000000..8ea9a51 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/swin_tiny_imagenet2012.md @@ -0,0 +1,153 @@ +# swin_tiny + +--- + +model-name: swin_tiny + +backbone-name: swin_transformer + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc80.90 | top5acc94.90 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 72b3c5e6 + +license: Apache2.0 + +summary: swin_transformer is used for cv + +--- + +# Swin Transformer + + + +> [Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/abs/2103.14030) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + + + + +The key idea of Swin transformer is that the features in shifted window go through transformer module rather than the +whole feature map. +Besides that, Swin transformer extracts features of different levels. Additionally, compared with Vision Transformer ( +ViT), the resolution +of Swin Transformer in different stages varies so that features with different sizes could be learned. Figure 1 shows +the model architecture +of Swin transformer. Swin transformer could achieve better model performance with smaller model parameters and less +computation cost +on ImageNet-1K dataset compared with ViT and ResNet.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of Swin Transformer [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- | +| swin_tiny | 33.38 | 8 | 256 | 224x224 | O2 | 266s | 466.6 | 4389.20 | 80.90 | 94.90 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/swintransformer/swin_tiny_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/swin/swin_tiny-72b3c5e6-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- | +| swin_tiny | 33.38 | 8 | 256 | 224x224 | O2 | 226s | 454.49 | 4506.15 | 80.82 | 94.80 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/swintransformer/swin_tiny_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/swin/swin_tiny-0ff2f96d.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training +and validation. + +### Training + + + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple + Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/swintransformer/swin_tiny_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer + to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to + keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/swintransformer/swin_tiny_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path +with `--ckpt_path`. + +```shell +python validate.py -c configs/swintransformer/swin_tiny_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + + + +[1] Liu Z, Lin Y, Cao Y, et al. Swin transformer: Hierarchical vision transformer using shifted windows[C]//Proceedings +of the IEEE/CVF international conference on computer vision. 2021: 10012-10022. diff --git a/mshub_res/assets/mindspore/2.5/swinv2_tiny_window8_imagenet2012.md b/mshub_res/assets/mindspore/2.5/swinv2_tiny_window8_imagenet2012.md new file mode 100644 index 0000000..df39e6b --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/swinv2_tiny_window8_imagenet2012.md @@ -0,0 +1,143 @@ +# swinv2_tiny_window8 + +--- + +model-name: swinv2_tiny_window8 + +backbone-name: swin_transformer_v2 + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc81.38 | top5acc95.46 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 70c5e903 + +license: Apache2.0 + +summary: swin_transformer_v2 is used for cv + +--- + +# Swin Transformer V2 + +> [Swin Transformer V2: Scaling Up Capacity and Resolution](https://arxiv.org/abs/2111.09883) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +This paper aims to explore large-scale models in computer vision. The authors tackle three major issues in training and +application of large vision models, including training instability, resolution gaps between pre-training and +fine-tuning, and hunger on labelled data. Three main techniques are proposed: 1) a residual-post-norm method combined +with cosine attention to improve training stability; 2) A log-spaced continuous position bias method to effectively +transfer models pre-trained using low-resolution images to downstream tasks with high-resolution inputs; 3) A +self-supervised pre-training method, SimMIM, to reduce the needs of vast labeled images. This model set new performance +records on 4 representative vision tasks, including ImageNet-V2 image classification, COCO object detection, ADE20K +semantic segmentation, and Kinetics-400 video action classification.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of Swin Transformer V2 [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ------------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------- | +| swinv2_tiny_window8 | 28.78 | 8 | 128 | 256x256 | O2 | 385s | 335.18 | 3055.07 | 81.38 | 95.46 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/swintransformerv2/swinv2_tiny_window8_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/swinv2/swinv2_tiny_window8-70c5e903-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.3.1 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ------------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | +| swinv2_tiny_window8 | 28.78 | 8 | 128 | 256x256 | O2 | 273s | 317.19 | 3228.35 | 81.42 | 95.43 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/swintransformerv2/swinv2_tiny_window8_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/swinv2/swinv2_tiny_window8-3ef8b787.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training +and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple + Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/swintransformerv2/swinv2_tiny_window8_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer + to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to + keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/swintransformerv2/swinv2_tiny_window8_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path +with `--ckpt_path`. + +```shell +python validate.py -c configs/swintransformerv2/swinv2_tiny_window8_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + +[1] Liu Z, Hu H, Lin Y, et al. Swin transformer v2: Scaling up capacity and resolution[C]//Proceedings of the IEEE/CVF +conference on computer vision and pattern recognition. 2022: 12009-12019. diff --git a/mshub_res/assets/mindspore/2.5/vgg13_imagenet2012.md b/mshub_res/assets/mindspore/2.5/vgg13_imagenet2012.md new file mode 100644 index 0000000..941351a --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/vgg13_imagenet2012.md @@ -0,0 +1,151 @@ +# vgg13 + +--- + +model-name: vgg13 + +backbone-name: vgg13 + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc72.81 | top5acc91.02 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 7756f33c + +license: Apache2.0 + +summary: vgg13 is used for cv + +--- + +# VGGNet + + + +> [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + + + + +Figure 1 shows the model architecture of VGGNet. VGGNet is a key milestone on image classification task. It expands the +model to 16-19 layers for the first time. The key motivation of this model is +that it shows usage of 3x3 kernels is efficient and by adding 3x3 kernels, it could have the same effect as 5x5 or 7x7 +kernels. VGGNet could achieve better model performance compared with previous +methods such as GoogleLeNet and AlexNet on ImageNet-1K dataset.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of VGG [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | --------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------- | +| vgg13 | 133.04 | 8 | 32 | 224x224 | O2 | 41s | 30.52 | 8387.94 | 72.81 | 91.02 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/vgg/vgg13_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/vgg/vgg13-7756f33c-910v2.ckpt) | +| vgg19 | 143.66 | 8 | 32 | 224x224 | O2 | 53s | 39.17 | 6535.61 | 75.24 | 92.55 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/vgg/vgg19_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/vgg/vgg19-5104d1ea-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | --------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | +| vgg13 | 133.04 | 8 | 32 | 224x224 | O2 | 23s | 55.20 | 4637.68 | 72.87 | 91.02 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/vgg/vgg13_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/vgg/vgg13-da805e6e.ckpt) | +| vgg19 | 143.66 | 8 | 32 | 224x224 | O2 | 22s | 67.42 | 3797.09 | 75.21 | 92.56 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/vgg/vgg19_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/vgg/vgg19-bedee7b6.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training +and validation. + +### Training + + + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple + Ascend 910 devices, please run + + ```shell + # distributed training on multiple GPU/Ascend devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/vgg/vgg16_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer + to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to + keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/vgg/vgg16_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path +with `--ckpt_path`. + +```shell +python validate.py -c configs/vgg/vgg16_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + + + +[1] Simonyan K, Zisserman A. Very deep convolutional networks for large-scale image recognition[J]. arXiv preprint +arXiv:1409.1556, 2014. diff --git a/mshub_res/assets/mindspore/2.5/vgg19_imagenet2012.md b/mshub_res/assets/mindspore/2.5/vgg19_imagenet2012.md new file mode 100644 index 0000000..e775326 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/vgg19_imagenet2012.md @@ -0,0 +1,151 @@ +# vgg19 + +--- + +model-name: vgg19 + +backbone-name: vgg19 + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc75.24 | top5acc92.55 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 5104d1ea + +license: Apache2.0 + +summary: vgg19 is used for cv + +--- + +# VGGNet + + + +> [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + + + + +Figure 1 shows the model architecture of VGGNet. VGGNet is a key milestone on image classification task. It expands the +model to 16-19 layers for the first time. The key motivation of this model is +that it shows usage of 3x3 kernels is efficient and by adding 3x3 kernels, it could have the same effect as 5x5 or 7x7 +kernels. VGGNet could achieve better model performance compared with previous +methods such as GoogleLeNet and AlexNet on ImageNet-1K dataset.[[1](#references)] + +

+ +

+

+ Figure 1. Architecture of VGG [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | --------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------- | +| vgg13 | 133.04 | 8 | 32 | 224x224 | O2 | 41s | 30.52 | 8387.94 | 72.81 | 91.02 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/vgg/vgg13_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/vgg/vgg13-7756f33c-910v2.ckpt) | +| vgg19 | 143.66 | 8 | 32 | 224x224 | O2 | 53s | 39.17 | 6535.61 | 75.24 | 92.55 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/vgg/vgg19_ascend.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindcv/vgg/vgg19-5104d1ea-910v2.ckpt) | + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| ---------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | --------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | +| vgg13 | 133.04 | 8 | 32 | 224x224 | O2 | 23s | 55.20 | 4637.68 | 72.87 | 91.02 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/vgg/vgg13_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/vgg/vgg13-da805e6e.ckpt) | +| vgg19 | 143.66 | 8 | 32 | 224x224 | O2 | 22s | 67.42 | 3797.09 | 75.21 | 92.56 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/vgg/vgg19_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/vgg/vgg19-bedee7b6.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training +and validation. + +### Training + + + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple + Ascend 910 devices, please run + + ```shell + # distributed training on multiple GPU/Ascend devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/vgg/vgg16_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer + to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to + keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/vgg/vgg16_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path +with `--ckpt_path`. + +```shell +python validate.py -c configs/vgg/vgg16_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + + + +[1] Simonyan K, Zisserman A. Very deep convolutional networks for large-scale image recognition[J]. arXiv preprint +arXiv:1409.1556, 2014. diff --git a/mshub_res/assets/mindspore/2.5/visformer_tiny_imagenet2012.md b/mshub_res/assets/mindspore/2.5/visformer_tiny_imagenet2012.md new file mode 100644 index 0000000..af13fe2 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/visformer_tiny_imagenet2012.md @@ -0,0 +1,141 @@ +# visformer_tiny + +--- + +model-name: visformer_tiny + +backbone-name: visformer + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc78.40 | top5acc94.30 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: df995ba4 + +license: Apache2.0 + +summary: visformer is used for cv + +--- + +# Visformer + +> [Visformer: The Vision-friendly Transformer](https://arxiv.org/abs/2104.12533) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +Visformer, or Vision-friendly Transformer, is an architecture that combines Transformer-based architectural features +with those from convolutional neural network architectures. Visformer adopts the stage-wise design for higher base +performance. But self-attentions are only utilized in the last two stages, considering that self-attention in the +high-resolution stage is relatively inefficient even when the FLOPs are balanced. Visformer employs bottleneck blocks in +the first stage and utilizes group 3 × 3 convolutions in bottleneck blocks inspired by ResNeXt. It also introduces +BatchNorm to patch embedding modules as in CNNs. [[2](#references)] + +

+ +

+

+ Figure 1. Network Configuration of Visformer [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +_coming soon_ + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +| model name | params(M) | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | acc@top1 | acc@top5 | recipe | weight | +| -------------- | --------- | ----- | ---------- | ---------- | --------- | ------------- | ------- | ------- | -------- | -------- | ------------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------- | +| visformer_tiny | 10.33 | 8 | 128 | 224x224 | O2 | 137s | 217.92 | 4698.97 | 78.28 | 94.15 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/visformer/visformer_tiny_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/visformer/visformer_tiny-daee0322.ckpt) | + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training +and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple + Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/visformer/visformer_tiny_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer + to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to + keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/visformer/visformer_tiny_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path +with `--ckpt_path`. + +```shell +python validate.py -c configs/visformer/visformer_tiny_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + +[1] Chen Z, Xie L, Niu J, et al. Visformer: The vision-friendly transformer. Proceedings of the IEEE/CVF International +Conference on Computer Vision. 2021: 589-598. + +[2] Visformer, diff --git a/mshub_res/assets/mindspore/2.5/vit_b32_224_imagenet2012.md b/mshub_res/assets/mindspore/2.5/vit_b32_224_imagenet2012.md new file mode 100644 index 0000000..9a22814 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/vit_b32_224_imagenet2012.md @@ -0,0 +1,159 @@ +# vit_b32_224 + +--- + +model-name: vit_b32_224 + +backbone-name: vit + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc77.53 | top5acc93.20 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: a2165e72 + +license: Apache2.0 + +summary: vit is used for cv + +--- + +# ViT + + + +> [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2010.11929) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + + + +Vision Transformer (ViT) achieves remarkable results compared to convolutional neural networks (CNN) while obtaining +fewer computational resources for pre-training. In comparison to convolutional neural networks (CNN), Vision +Transformer (ViT) shows a generally weaker inductive bias resulting in increased reliance on model regularization or +data augmentation (AugReg) when training on smaller datasets. + +The ViT is a visual model based on the architecture of a transformer originally designed for text-based tasks, as shown +in the below figure. The ViT model represents an input image as a series of image patches, like the series of word +embeddings used when using transformers to text, and directly predicts class labels for the image. ViT exhibits an +extraordinary performance when trained on enough data, breaking the performance of a similar state-of-art CNN with 4x +fewer computational resources. [[2](#references)] + + + +

+ +

+

+ Figure 1. Architecture of ViT [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +_coming soon_ + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +_coming soon_ + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training +and validation. + +### Training + + + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple + Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/vit/vit_b32_224_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer + to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** + + 1. As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the + global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + 2. The current configuration with a batch_size of 512, was initially set for a machine with 64GB of VRAM. To avoid + running out of memory (OOM) on machines with smaller VRAM, consider reducing the batch_size to 256 or lower. + Simultaneously, to maintain the consistency of training results, please scale the learning rate down proportionally + with decreasing batch_size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/vit/vit_b32_224_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path +with `--ckpt_path`. + +```shell +python validate.py -c configs/vit/vit_b32_224_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + + + +[1] Dosovitskiy A, Beyer L, Kolesnikov A, et al. An image is worth 16x16 words: Transformers for image recognition at +scale[J]. arXiv preprint arXiv:2010.11929, 2020. + +[2] "Vision Transformers (ViT) in Image Recognition – 2022 Guide", diff --git a/mshub_res/assets/mindspore/2.5/vit_l32_224_imagenet2012.md b/mshub_res/assets/mindspore/2.5/vit_l32_224_imagenet2012.md new file mode 100644 index 0000000..7285219 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/vit_l32_224_imagenet2012.md @@ -0,0 +1,159 @@ +# vit_l32_224 + +--- + +model-name: vit_l32_224 + +backbone-name: vit + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc72.13 | top5acc90.21 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: e0039f16 + +license: Apache2.0 + +summary: vit is used for cv + +--- + +# ViT + + + +> [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2010.11929) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + + + +Vision Transformer (ViT) achieves remarkable results compared to convolutional neural networks (CNN) while obtaining +fewer computational resources for pre-training. In comparison to convolutional neural networks (CNN), Vision +Transformer (ViT) shows a generally weaker inductive bias resulting in increased reliance on model regularization or +data augmentation (AugReg) when training on smaller datasets. + +The ViT is a visual model based on the architecture of a transformer originally designed for text-based tasks, as shown +in the below figure. The ViT model represents an input image as a series of image patches, like the series of word +embeddings used when using transformers to text, and directly predicts class labels for the image. ViT exhibits an +extraordinary performance when trained on enough data, breaking the performance of a similar state-of-art CNN with 4x +fewer computational resources. [[2](#references)] + + + +

+ +

+

+ Figure 1. Architecture of ViT [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +_coming soon_ + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +_coming soon_ + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training +and validation. + +### Training + + + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple + Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/vit/vit_b32_224_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer + to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** + + 1. As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the + global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + 2. The current configuration with a batch_size of 512, was initially set for a machine with 64GB of VRAM. To avoid + running out of memory (OOM) on machines with smaller VRAM, consider reducing the batch_size to 256 or lower. + Simultaneously, to maintain the consistency of training results, please scale the learning rate down proportionally + with decreasing batch_size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/vit/vit_b32_224_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path +with `--ckpt_path`. + +```shell +python validate.py -c configs/vit/vit_b32_224_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + + + +[1] Dosovitskiy A, Beyer L, Kolesnikov A, et al. An image is worth 16x16 words: Transformers for image recognition at +scale[J]. arXiv preprint arXiv:2010.11929, 2020. + +[2] "Vision Transformers (ViT) in Image Recognition – 2022 Guide", diff --git a/mshub_res/assets/mindspore/2.5/xcit_tiny_12_p16_224_imagenet2012.md b/mshub_res/assets/mindspore/2.5/xcit_tiny_12_p16_224_imagenet2012.md new file mode 100644 index 0000000..2c587af --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/xcit_tiny_12_p16_224_imagenet2012.md @@ -0,0 +1,138 @@ +# xcit_tiny_12_p16_224 + +--- + +model-name: xcit_tiny_12_p16_224 + +backbone-name: xcit + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: ImageNet2012 + +evaluation: top1acc77.27 | top5acc93.56 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: bd90776e + +license: Apache2.0 + +summary: xcit is used for cv + +--- + +# XCiT: Cross-Covariance Image Transformers + +> [XCiT: Cross-Covariance Image Transformers](https://arxiv.org/abs/2106.09681) + +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Introduction + +XCiT models propose a “transposed” version of self-attention that operates across feature channels rather than tokens, +where the interactions are based on the cross-covariance matrix between keys and queries. The resulting cross-covariance +attention (XCA) has linear complexity in the number of tokens, and allows efficient processing of high-resolution +images. Our cross-covariance image transformer (XCiT) – built upon XCA – combines the accuracy of conventional +transformers with the scalability of convolutional architectures. + +

+ +

+

+ Figure 1. Architecture of XCiT [1] +

+ +## Performance + +Our reproduced model performance on ImageNet-1K is reported as follows. + +Experiments are tested on ascend 910\* with mindspore 2.5.0 graph mode. + +_coming soon_ + +Experiments are tested on ascend 910 with mindspore 2.5.0 graph mode. + +_coming soon_ + +### Notes + +- top-1 and top-5: Accuracy reported on the validation set of ImageNet-1K. + +## Quick Start + +### Preparation + +#### Installation + +Please refer to the [installation instruction](https://mindspore-lab.github.io/mindcv/installation/) in MindCV. + +#### Dataset Preparation + +Please download the [ImageNet-1K](https://www.image-net.org/challenges/LSVRC/2012/index.php) dataset for model training +and validation. + +### Training + +- Distributed Training + + It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple + Ascend 910 devices, please run + + ```shell + # distributed training on multiple NPU devices + msrun --bind_core=True --worker_num 8 python train.py --config configs/xcit/xcit_tiny_12_p16_ascend.yaml --data_dir /path/to/imagenet + ``` + + For detailed illustration of all hyper-parameters, please refer + to [config.py](https://github.com/mindspore-lab/mindcv/blob/main/config.py). + + **Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to + keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +- Standalone Training + + If you want to train or finetune the model on a smaller dataset without distributed training, please run: + + ```shell + # standalone training on single NPU device + python train.py --config configs/xcit/xcit_tiny_12_p16_ascend.yaml --data_dir /path/to/dataset --distribute False + ``` + +### Validation + +To validate the accuracy of the trained model, you can use `validate.py` and parse the checkpoint path +with `--ckpt_path`. + +```shell +python validate.py -c configs/xcit/xcit_tiny_12_p16_224_ascend.yaml --data_dir /path/to/imagenet --ckpt_path /path/to/ckpt +``` + +## References + + + +[1] Ali A, Touvron H, Caron M, et al. Xcit: Cross-covariance image transformers[J]. Advances in neural information +processing systems, 2021, 34: 20014-20027. diff --git a/mshub_res/assets/mindspore/2.5/yolov3_darknet53_coco2017.md b/mshub_res/assets/mindspore/2.5/yolov3_darknet53_coco2017.md new file mode 100644 index 0000000..c44d8ae --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/yolov3_darknet53_coco2017.md @@ -0,0 +1,140 @@ +# yolov3_darknet53 + +--- + +model-name: yolov3_darknet53 + +backbone-name: yolov3 + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: COCO2017 + +evaluation: mAP46.6 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 81895f09 + +license: Apache2.0 + +summary: yolov3 is used for cv + +--- + +# YOLOv3 + +> [YOLOv3: An Incremental Improvement](https://arxiv.org/pdf/1804.02767.pdf) + +## Abstract + +We present some updates to YOLO! We made a bunch of little design changes to make it better. We also trained this new network that's pretty swell. It's a little bigger than last time but more accurate. It's still fast though, don't worry. At 320x320 YOLOv3 runs in 22 ms at 28.2 mAP, as accurate as SSD but three times faster. When we look at the old .5 IOU mAP detection metric YOLOv3 is quite good. It achieves 57.9 mAP@50 in 51 ms on a Titan X, compared to 57.5 mAP@50 in 198 ms by RetinaNet, similar performance but 3.8x faster. + +
+ +
+ +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Quick Start + +Please refer to the [GETTING_STARTED](https://github.com/mindspore-lab/mindyolo/blob/master/GETTING_STARTED.md) in MindYOLO for details. + +### Training + +
+View More + +#### - Pretraining Model + +You can get the pre-training model from [here](https://pjreddie.com/media/files/darknet53.conv.74). + +To convert it to a loadable ckpt file for mindyolo, please put it in the root directory then run it + +```shell +python mindyolo/utils/convert_weight_darknet53.py +``` + +#### - Distributed Training + +It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + +```shell +# distributed training on multiple Ascend devices +msrun --worker_num=8 --local_worker_num=8 --bind_core=True --log_dir=./yolov3_log python train.py --config ./configs/yolov3/yolov3.yaml --device_target Ascend --is_parallel True +``` + +**Note:** For more information about msrun configuration, please refer to [here](https://www.mindspore.cn/docs/zh-CN/r2.5.0/model_train/parallel/msrun_launcher.html) + +For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindyolo/blob/master/mindyolo/utils/config.py). + +**Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +#### - Standalone Training + +If you want to train or finetune the model on a smaller dataset without distributed training, please run: + +```shell +# standalone training on a CPU/Ascend device +python train.py --config ./configs/yolov3/yolov3.yaml --device_target Ascend +``` + +
+ +### Validation and Test + +To validate the accuracy of the trained model, you can use `test.py` and parse the checkpoint path with `--weight`. + +```shell +python test.py --config ./configs/yolov3/yolov3.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt +``` + +## Performance + +Experiments are tested on Ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | map | recipe | weight | +| :--------: | :---: | :--------: | :--------: | :-------: | :-----------: | :-----: | :----: | :---: | :-------------------: | :------------------------------------------------------------------------------------------------------------------------: | +| YOLOv3 | 8 | 16 | 640x640 | O2 | 274.32s | 383.68 | 333.61 | 46.6% | [yaml](./yolov3.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindyolo/yolov3/yolov3-darknet53_300e_mAP455-81895f09-910v2.ckpt) | + +Experiments are tested on Ascend 910 with mindspore 2.5.0 graph mode. + +| model name | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | map | recipe | weight | +| :--------: | :---: | :--------: | :--------: | :-------: | :-----------: | :-----: | :----: | :---: | :-------------------: | :----------------------------------------------------------------------------------------------------------: | +| YOLOv3 | 8 | 16 | 640x640 | O2 | 160.80s | 409.66 | 312.45 | 45.5% | [yaml](./yolov3.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov3/yolov3-darknet53_300e_mAP455-adfb27af.ckpt) | + +
+ +### Notes + +- map: Accuracy reported on the validation set. +- We referred to a commonly used third-party [YOLOv3](https://github.com/ultralytics/yolov3) implementation. + +## References + + + +[1] Jocher Glenn. YOLOv3 release v9.1. , 2021. +[2] Joseph Redmon and Ali Farhadi. YOLOv3: An incremental improvement. arXiv preprint arXiv:1804.02767, 2018. diff --git a/mshub_res/assets/mindspore/2.5/yolov4_cspdarknet53_coco2017.md b/mshub_res/assets/mindspore/2.5/yolov4_cspdarknet53_coco2017.md new file mode 100644 index 0000000..8018f49 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/yolov4_cspdarknet53_coco2017.md @@ -0,0 +1,161 @@ +# yolov4_cspdarknet53 + +--- + +model-name: yolov4_cspdarknet53 + +backbone-name: yolov4 + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: COCO2017 + +evaluation: mAP46.1 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 64b8506f + +license: Apache2.0 + +summary: yolov4 is used for cv + +--- + +# YOLOv4 + +> [YOLOv4: Optimal Speed and Accuracy of Object Detection](https://arxiv.org/pdf/2004.10934.pdf) + +## Abstract + +There are a huge number of features which are said to +improve Convolutional Neural Network (CNN) accuracy. +Practical testing of combinations of such features on large +datasets, and theoretical justification of the result, is required. Some features operate on certain models exclusively +and for certain problems exclusively, or only for small-scale +datasets; while some features, such as batch-normalization +and residual-connections, are applicable to the majority of +models, tasks, and datasets. We assume that such universal +features include Weighted-Residual-Connections (WRC), +Cross-Stage-Partial-connections (CSP), Cross mini-Batch +Normalization (CmBN), Self-adversarial-training (SAT) +and Mish-activation. We use new features: WRC, CSP, +CmBN, SAT, Mish activation, Mosaic data augmentation, +CmBN, DropBlock regularization, and CIoU loss, and combine some of them to achieve state-of-the-art results: 43.5% +AP (65.7% AP50) for the MS COCO dataset at a realtime speed of 65 FPS on Tesla V100. + +
+ +
+ +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Quick Start + +Please refer to the [GETTING_STARTED](https://github.com/mindspore-lab/mindyolo/blob/master/GETTING_STARTED.md) in MindYOLO for details. + +### Training + +
+View More + +#### - Pretraining Model + +You can get the pre-training model trained on ImageNet2012 from [here](https://download.mindspore.cn/model_zoo/r1.2/cspdarknet53_ascend_v120_imagenet2012_official_cv_bs64_top1acc7854_top5acc9428/cspdarknet53_ascend_v120_imagenet2012_official_cv_bs64_top1acc7854_top5acc9428.ckpt). + +To convert it to a loadable ckpt file for mindyolo, please put it in the root directory then run it + +```shell +python mindyolo/utils/convert_weight_cspdarknet53.py +``` + +#### - Distributed Training + +It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + +```shell +# distributed training on multiple Ascend devices +msrun --worker_num=8 --local_worker_num=8 --bind_core=True --log_dir=./yolov4_log python train.py --config ./configs/yolov4/yolov4-silu.yaml --device_target Ascend --is_parallel True --epochs 320 +``` + +**Note:** For more information about msrun configuration, please refer to [here](https://www.mindspore.cn/docs/zh-CN/r2.5.0/model_train/parallel/msrun_launcher.html) + +For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindyolo/blob/master/mindyolo/utils/config.py). + +### Notes + +- As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + + - If the following warning occurs, setting the environment variable PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning' will fix it. + + ```shell + multiprocessing/semaphore_tracker.py: 144 UserWarning: semaphore_tracker: There appear to be 235 leaked semaphores to clean up at shutdown len(cache)) + ``` + +#### - Standalone Training + +If you want to train or finetune the model on a smaller dataset without distributed training, please run: + +```shell +# standalone training on a CPU/Ascend device +python train.py --config ./configs/yolov4/yolov4-silu.yaml --device_target Ascend --epochs 320 +``` + +
+ +### Validation and Test + +To validate the accuracy of the trained model, you can use `test.py` and parse the checkpoint path with `--weight`. + +```shell +python test.py --config ./configs/yolov4/yolov4-silu.yaml --device_target Ascend --iou_thres 0.6 --weight /PATH/TO/WEIGHT.ckpt +``` + +## Performance + +Experiments are tested on Ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | backbone | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | map | recipe | weight | +| :--------: | :----------: | :---: | :--------: | :--------: | :-------: | :-----------: | :-----: | :----: | :---: | :-------------------: | :---------------------------------------------------------------------------------------------------------------------------: | +| YOLOv4 | CSPDarknet53 | 8 | 16 | 608x608 | O2 | 467.47s | 308.43 | 415.01 | 46.1% | [yaml](./yolov4.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindyolo/yolov4/yolov4-cspdarknet53_320e_map454-64b8506f-910v2.ckpt) | + +Experiments are tested on Ascend 910 with mindspore 2.5.0 graph mode. + +| model name | backbone | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | map | recipe | weight | +| :--------: | :----------------: | :---: | :--------: | :--------: | :-------: | :-----------: | :-----: | :----: | :---: | :------------------------: | :------------------------------------------------------------------------------------------------------------------: | +| YOLOv4 | CSPDarknet53 | 8 | 16 | 608x608 | O2 | 188.52s | 505.98 | 252.97 | 45.4% | [yaml](./yolov4.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov4/yolov4-cspdarknet53_320e_map454-50172f93.ckpt) | +| YOLOv4 | CSPDarknet53(silu) | 8 | 16 | 608x608 | O2 | 274.18s | 443.21 | 288.80 | 45.8% | [yaml](./yolov4-silu.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov4/yolov4-cspdarknet53_silu_320e_map458-bdfc3205.ckpt) | + +
+ +### Notes + +- map: Accuracy reported on the validation set. + +## References + + + +[1] Alexey Bochkovskiy, Chien-Yao Wang and Ali Farhadi. YOLOv4: Optimal Speed and Accuracy of Object Detection. arXiv preprint arXiv:2004.10934, 2020. diff --git a/mshub_res/assets/mindspore/2.5/yolov5n_coco2017.md b/mshub_res/assets/mindspore/2.5/yolov5n_coco2017.md new file mode 100644 index 0000000..da2c0aa --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/yolov5n_coco2017.md @@ -0,0 +1,144 @@ +# yolov5n + +--- + +model-name: yolov5n + +backbone-name: yolov5 + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: COCO2017 + +evaluation: mAP27.4 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: bedf9a93 + +license: Apache2.0 + +summary: yolov5 is used for cv + +--- + +# YOLOv5 + +## Abstract + +YOLOv5 is a family of object detection architectures and models pretrained on the COCO dataset, representing Ultralytics open-source research into future vision AI methods, incorporating lessons learned and best practices evolved over thousands of hours of research and development. + +
+ +
+ +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Quick Start + +Please refer to the [GETTING_STARTED](https://github.com/mindspore-lab/mindyolo/blob/master/GETTING_STARTED.md) in MindYOLO for details. + +### Training + +
+View More + +#### - Distributed Training + +It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + +```shell +# distributed training on multiple Ascend devices +msrun --worker_num=8 --local_worker_num=8 --bind_core=True --log_dir=./yolov5_log python train.py --config ./configs/yolov5/yolov5n.yaml --device_target Ascend --is_parallel True +``` + +**Note:** For more information about msrun configuration, please refer to [here](https://www.mindspore.cn/docs/zh-CN/r2.5.0/model_train/parallel/msrun_launcher.html) + +For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindyolo/blob/master/mindyolo/utils/config.py). + +**Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +#### - Standalone Training + +If you want to train or finetune the model on a smaller dataset without distributed training, please run: + +```shell +# standalone training on a CPU/Ascend device +python train.py --config ./configs/yolov5/yolov5n.yaml --device_target Ascend +``` + +
+ +### Validation and Test + +To validate the accuracy of the trained model, you can use `test.py` and parse the checkpoint path with `--weight`. + +```shell +python test.py --config ./configs/yolov5/yolov5n.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt +``` + +To validate the accuracy of the trained model for resolution of 1280, you can use `test.py` and parse the checkpoint path with `--weight` and parse the image sizes with `--img_size`. + +```shell +python test.py --config ./configs/yolov5/yolov5n6.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt --img_size 1280 +``` + +## Performance + +Experiments are tested on Ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | scale | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | map | recipe | weight | +| :--------: | :---: | :---: | :--------: | :--------: | :-------: | :-----------: | :-----: | :----: | :---: | :---------------------: | :---------------------------------------------------------------------------------------------------------------: | +| YOLOv5 | N | 8 | 32 | 640x640 | O2 | 377.81s | 520.79 | 491.56 | 27.4% | [yaml](./yolov5n.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindyolo/yolov5/yolov5n_300e_mAP273-bedf9a93-910v2.ckpt) | +| YOLOv5 | S | 8 | 32 | 640x640 | O2 | 378.18s | 526.49 | 486.30 | 37.6% | [yaml](./yolov5s.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindyolo/yolov5/yolov5s_300e_mAP376-df4a45b6-910v2.ckpt) | +| YOLOv5 | N6 | 8 | 32 | 1280x1280 | O2 | 494.36s | 1543.35 | 165.87 | 35.7% | [yaml](./yolov5n6.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindyolo/yolov5/yolov5n6_300e_mAP357-49d91077.ckpt) | +| YOLOv5 | S6 | 8 | 32 | 1280x1280 | O2 | 524.91s | 1514.98 | 168.98 | 44.4% | [yaml](./yolov5s6.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindyolo/yolov5/yolov5s6_300e_mAP444-aeaffe77.ckpt) | +| YOLOv5 | M6 | 8 | 32 | 1280x1280 | O2 | 572.32s | 1769.17 | 144.70 | 51.1% | [yaml](./yolov5m6.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindyolo/yolov5/yolov5m6_300e_mAP511-025d9536.ckpt) | +| YOLOv5 | L6 | 8 | 16 | 1280x1280 | O2 | 800.34s | 894.65 | 143.07 | 53.6% | [yaml](./yolov5l6.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindyolo/yolov5/yolov5l6_300e_mAP536-617a1cc1.ckpt) | +| YOLOv5 | X6 | 8 | 8 | 1280x1280 | O2 | 995.73s | 864.43 | 74.04 | 54.5% | [yaml](./yolov5x6.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindyolo/yolov5/yolov5x6_300e_mAP545-81ebdca9.ckpt) | + +Experiments are tested on Ascend 910 with mindspore 2.5.0 graph mode. + +| model name | scale | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | map | recipe | weight | +| :--------: | :---: | :---: | :--------: | :--------: | :-------: | :-----------: | :-----: | :----: | :---: | :--------------------: | :-------------------------------------------------------------------------------------------------: | +| YOLOv5 | N | 8 | 32 | 640x640 | O2 | 233.25s | 650.57 | 393.50 | 27.3% | [yaml](./yolov5n.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov5/yolov5n_300e_mAP273-9b16bd7b.ckpt) | +| YOLOv5 | S | 8 | 32 | 640x640 | O2 | 166.00s | 650.14 | 393.76 | 37.6% | [yaml](./yolov5s.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov5/yolov5s_300e_mAP376-860bcf3b.ckpt) | +| YOLOv5 | M | 8 | 32 | 640x640 | O2 | 256.51s | 712.31 | 359.39 | 44.9% | [yaml](./yolov5m.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov5/yolov5m_300e_mAP449-e7bbf695.ckpt) | +| YOLOv5 | L | 8 | 32 | 640x640 | O2 | 274.15s | 723.35 | 353.91 | 48.5% | [yaml](./yolov5l.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov5/yolov5l_300e_mAP485-a28bce73.ckpt) | +| YOLOv5 | X | 8 | 16 | 640x640 | O2 | 436.18s | 569.96 | 224.58 | 50.5% | [yaml](./yolov5x.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov5/yolov5x_300e_mAP505-97d36ddc.ckpt) | + +
+ +### Notes + +- map: Accuracy reported on the validation set. +- We refer to the official [YOLOV5](https://github.com/ultralytics/yolov5) to reproduce the P5 series model, and the differences are as follows: + The single-device batch size is 32. This is different from the official codes. + +## References + + + +[1] Jocher Glenn. YOLOv5 release v6.1. , 2022. diff --git a/mshub_res/assets/mindspore/2.5/yolov5s_coco2017.md b/mshub_res/assets/mindspore/2.5/yolov5s_coco2017.md new file mode 100644 index 0000000..1ae8dfe --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/yolov5s_coco2017.md @@ -0,0 +1,144 @@ +# yolov5s + +--- + +model-name: yolov5s + +backbone-name: yolov5 + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: COCO2017 + +evaluation: mAP37.6 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: df4a45b6 + +license: Apache2.0 + +summary: yolov5 is used for cv + +--- + +# YOLOv5 + +## Abstract + +YOLOv5 is a family of object detection architectures and models pretrained on the COCO dataset, representing Ultralytics open-source research into future vision AI methods, incorporating lessons learned and best practices evolved over thousands of hours of research and development. + +
+ +
+ +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Quick Start + +Please refer to the [GETTING_STARTED](https://github.com/mindspore-lab/mindyolo/blob/master/GETTING_STARTED.md) in MindYOLO for details. + +### Training + +
+View More + +#### - Distributed Training + +It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + +```shell +# distributed training on multiple Ascend devices +msrun --worker_num=8 --local_worker_num=8 --bind_core=True --log_dir=./yolov5_log python train.py --config ./configs/yolov5/yolov5n.yaml --device_target Ascend --is_parallel True +``` + +**Note:** For more information about msrun configuration, please refer to [here](https://www.mindspore.cn/docs/zh-CN/r2.5.0/model_train/parallel/msrun_launcher.html) + +For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindyolo/blob/master/mindyolo/utils/config.py). + +**Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +#### - Standalone Training + +If you want to train or finetune the model on a smaller dataset without distributed training, please run: + +```shell +# standalone training on a CPU/Ascend device +python train.py --config ./configs/yolov5/yolov5n.yaml --device_target Ascend +``` + +
+ +### Validation and Test + +To validate the accuracy of the trained model, you can use `test.py` and parse the checkpoint path with `--weight`. + +```shell +python test.py --config ./configs/yolov5/yolov5n.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt +``` + +To validate the accuracy of the trained model for resolution of 1280, you can use `test.py` and parse the checkpoint path with `--weight` and parse the image sizes with `--img_size`. + +```shell +python test.py --config ./configs/yolov5/yolov5n6.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt --img_size 1280 +``` + +## Performance + +Experiments are tested on Ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | scale | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | map | recipe | weight | +| :--------: | :---: | :---: | :--------: | :--------: | :-------: | :-----------: | :-----: | :----: | :---: | :---------------------: | :---------------------------------------------------------------------------------------------------------------: | +| YOLOv5 | N | 8 | 32 | 640x640 | O2 | 377.81s | 520.79 | 491.56 | 27.4% | [yaml](./yolov5n.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindyolo/yolov5/yolov5n_300e_mAP273-bedf9a93-910v2.ckpt) | +| YOLOv5 | S | 8 | 32 | 640x640 | O2 | 378.18s | 526.49 | 486.30 | 37.6% | [yaml](./yolov5s.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindyolo/yolov5/yolov5s_300e_mAP376-df4a45b6-910v2.ckpt) | +| YOLOv5 | N6 | 8 | 32 | 1280x1280 | O2 | 494.36s | 1543.35 | 165.87 | 35.7% | [yaml](./yolov5n6.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindyolo/yolov5/yolov5n6_300e_mAP357-49d91077.ckpt) | +| YOLOv5 | S6 | 8 | 32 | 1280x1280 | O2 | 524.91s | 1514.98 | 168.98 | 44.4% | [yaml](./yolov5s6.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindyolo/yolov5/yolov5s6_300e_mAP444-aeaffe77.ckpt) | +| YOLOv5 | M6 | 8 | 32 | 1280x1280 | O2 | 572.32s | 1769.17 | 144.70 | 51.1% | [yaml](./yolov5m6.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindyolo/yolov5/yolov5m6_300e_mAP511-025d9536.ckpt) | +| YOLOv5 | L6 | 8 | 16 | 1280x1280 | O2 | 800.34s | 894.65 | 143.07 | 53.6% | [yaml](./yolov5l6.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindyolo/yolov5/yolov5l6_300e_mAP536-617a1cc1.ckpt) | +| YOLOv5 | X6 | 8 | 8 | 1280x1280 | O2 | 995.73s | 864.43 | 74.04 | 54.5% | [yaml](./yolov5x6.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindyolo/yolov5/yolov5x6_300e_mAP545-81ebdca9.ckpt) | + +Experiments are tested on Ascend 910 with mindspore 2.5.0 graph mode. + +| model name | scale | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | map | recipe | weight | +| :--------: | :---: | :---: | :--------: | :--------: | :-------: | :-----------: | :-----: | :----: | :---: | :--------------------: | :-------------------------------------------------------------------------------------------------: | +| YOLOv5 | N | 8 | 32 | 640x640 | O2 | 233.25s | 650.57 | 393.50 | 27.3% | [yaml](./yolov5n.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov5/yolov5n_300e_mAP273-9b16bd7b.ckpt) | +| YOLOv5 | S | 8 | 32 | 640x640 | O2 | 166.00s | 650.14 | 393.76 | 37.6% | [yaml](./yolov5s.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov5/yolov5s_300e_mAP376-860bcf3b.ckpt) | +| YOLOv5 | M | 8 | 32 | 640x640 | O2 | 256.51s | 712.31 | 359.39 | 44.9% | [yaml](./yolov5m.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov5/yolov5m_300e_mAP449-e7bbf695.ckpt) | +| YOLOv5 | L | 8 | 32 | 640x640 | O2 | 274.15s | 723.35 | 353.91 | 48.5% | [yaml](./yolov5l.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov5/yolov5l_300e_mAP485-a28bce73.ckpt) | +| YOLOv5 | X | 8 | 16 | 640x640 | O2 | 436.18s | 569.96 | 224.58 | 50.5% | [yaml](./yolov5x.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov5/yolov5x_300e_mAP505-97d36ddc.ckpt) | + +
+ +### Notes + +- map: Accuracy reported on the validation set. +- We refer to the official [YOLOV5](https://github.com/ultralytics/yolov5) to reproduce the P5 series model, and the differences are as follows: + The single-device batch size is 32. This is different from the official codes. + +## References + + + +[1] Jocher Glenn. YOLOv5 release v6.1. , 2022. diff --git a/mshub_res/assets/mindspore/2.5/yolov7_tiny_coco2017.md b/mshub_res/assets/mindspore/2.5/yolov7_tiny_coco2017.md new file mode 100644 index 0000000..9ebb0cf --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/yolov7_tiny_coco2017.md @@ -0,0 +1,132 @@ +# yolov7_tiny + +--- + +model-name: yolov7_tiny + +backbone-name: yolov7 + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: COCO2017 + +evaluation: mAP37.5 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 1d2ddf4b + +license: Apache2.0 + +summary: yolov7 is used for cv + +--- + +# YOLOv7 + +> [YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors](https://arxiv.org/pdf/2207.02696.pdf) + +## Abstract + +YOLOv7 surpasses all known object detectors in both speed and accuracy in the range from 5 FPS to 160 FPS and has the highest accuracy 56.8% AP among all known real-time object detectors with 30 FPS or higher on GPU V100. YOLOv7-E6 object detector (56 FPS V100, 55.9% AP) outperforms both transformer-based detector SWIN-L Cascade-Mask R-CNN (9.2 FPS A100, 53.9% AP) by 509% in speed and 2% in accuracy, and convolutional-based detector ConvNeXt-XL Cascade-Mask R-CNN (8.6 FPS A100, 55.2% AP) by 551% in speed and 0.7% AP in accuracy, as well as YOLOv7 outperforms: YOLOR, YOLOX, Scaled-YOLOv4, YOLOv5, DETR, Deformable DETR, DINO-5scale-R50, ViT-Adapter-B and many other object detectors in speed and accuracy. Moreover, we train YOLOv7 only on MS COCO dataset from scratch without using any other datasets or pre-trained weights. + +
+ +
+ +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Quick Start + +Please refer to the [GETTING_STARTED](https://github.com/mindspore-lab/mindyolo/blob/master/GETTING_STARTED.md) in MindYOLO for details. + +### Training + +
+View More + +#### - Distributed Training + +It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + +```shell +# distributed training on multiple Ascend devices +msrun --worker_num=8 --local_worker_num=8 --bind_core=True --log_dir=./yolov7_log python train.py --config ./configs/yolov7/yolov7.yaml --device_target Ascend --is_parallel True +``` + +**Note:** For more information about msrun configuration, please refer to [here](https://www.mindspore.cn/docs/zh-CN/r2.5.0/model_train/parallel/msrun_launcher.html) + +For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindyolo/blob/master/mindyolo/utils/config.py). + +**Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +#### - Standalone Training + +If you want to train or finetune the model on a smaller dataset without distributed training, please run: + +```shell +# standalone training on a CPU/Ascend device +python train.py --config ./configs/yolov7/yolov7.yaml --device_target Ascend +``` + +
+ +### Validation and Test + +To validate the accuracy of the trained model, you can use `test.py` and parse the checkpoint path with `--weight`. + +```shell +python test.py --config ./configs/yolov7/yolov7.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt +``` + +## Performance + +Experiments are tested on Ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | scale | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | map | recipe | weight | +| :--------: | :---: | :---: | :--------: | :--------: | :-------: | :-----------: | :-----: | :----: | :---: | :------------------------: | :-------------------------------------------------------------------------------------------------------------------: | +| YOLOv7 | Tiny | 8 | 16 | 640x640 | O2 | 363.74s | 352.92 | 362.69 | 37.5% | [yaml](./yolov7-tiny.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindyolo/yolov7/yolov7-tiny_300e_mAP375-1d2ddf4b-910v2.ckpt) | + +Experiments are tested on Ascend 910 with mindspore 2.5.0 graph mode. + +| model name | scale | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | map | recipe | weight | +| :--------: | :---: | :---: | :--------: | :--------: | :-------: | :-----------: | :-----: | :----: | :---: | :------------------------: | :-----------------------------------------------------------------------------------------------------: | +| YOLOv7 | Tiny | 8 | 16 | 640x640 | O2 | 232.63s | 472.37 | 270.97 | 37.5% | [yaml](./yolov7-tiny.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov7/yolov7-tiny_300e_mAP375-d8972c94.ckpt) | +| YOLOv7 | L | 8 | 16 | 640x640 | O2 | 290.93s | 678.07 | 188.77 | 50.8% | [yaml](./yolov7.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov7/yolov7_300e_mAP508-734ac919.ckpt) | +| YOLOv7 | X | 8 | 12 | 640x640 | O2 | 404.77s | 636.36 | 150.86 | 52.4% | [yaml](./yolov7-x.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov7/yolov7-x_300e_mAP524-e2f58741.ckpt) | + +
+ +### Notes + +- map: Accuracy reported on the validation set. +- We refer to the official [YOLOV7](https://github.com/WongKinYiu/yolov7) to reproduce the P5 series model, and the differences are as follows: + The single-device batch size for tiny/l/x is 16/16/12. This is different from the official codes. + +## References + + + +[1] Chien-Yao Wang, Alexey Bochkovskiy, and HongYuan Mark Liao. Yolov7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors. arXiv preprint arXiv:2207.02696, 2022. diff --git a/mshub_res/assets/mindspore/2.5/yolov8n_coco2017.md b/mshub_res/assets/mindspore/2.5/yolov8n_coco2017.md new file mode 100644 index 0000000..3a03e54 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/yolov8n_coco2017.md @@ -0,0 +1,144 @@ +# yolov8n + +--- + +model-name: yolov8n + +backbone-name: yolov8 + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: COCO2017 + +evaluation: mAP37.3 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: 0e737186 + +license: Apache2.0 + +summary: yolov8 is used for cv + +--- + +# YOLOv8 + +## Abstract + +Ultralytics YOLOv8, developed by Ultralytics, is a cutting-edge, state-of-the-art (SOTA) model that builds upon the success of previous YOLO versions and introduces new features and improvements to further boost performance and flexibility. YOLOv8 is designed to be fast, accurate, and easy to use, making it an excellent choice for a wide range of object detection, image segmentation and image classification tasks. + +
+ +
+ +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Quick Start + +Please refer to the [GETTING_STARTED](https://github.com/mindspore-lab/mindyolo/blob/master/GETTING_STARTED.md) in MindYOLO for details. + +### Training + +
+View More + +#### - Distributed Training + +It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + +```shell +# distributed training on multiple Ascend devices +msrun --worker_num=8 --local_worker_num=8 --bind_core=True --log_dir=./yolov8_log python train.py --config ./configs/yolov8/yolov8n.yaml --device_target Ascend --is_parallel True +``` + +**Note:** For more information about msrun configuration, please refer to [here](https://www.mindspore.cn/docs/zh-CN/r2.5.0/model_train/parallel/msrun_launcher.html) + +For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindyolo/blob/master/mindyolo/utils/config.py). + +**Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +#### - Standalone Training + +If you want to train or finetune the model on a smaller dataset without distributed training, please run: + +```shell +# standalone training on a CPU/Ascend device +python train.py --config ./configs/yolov8/yolov8n.yaml --device_target Ascend +``` + +
+ +### Validation and Test + +To validate the accuracy of the trained model, you can use `test.py` and parse the checkpoint path with `--weight`. + +```shell +python test.py --config ./configs/yolov8/yolov8n.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt +``` + +## Performance + +### Detection + +Experiments are tested on Ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | scale | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | map | recipe | weight | +| :--------: | :---: | :---: | :--------: | :--------: | :-------: | :-----------: | :-----: | :----: | :---: | :--------------------: | :----------------------------------------------------------------------------------------------------------------: | +| YOLOv8 | N | 8 | 16 | 640x640 | O2 | 145.89s | 252.79 | 506.35 | 37.3% | [yaml](./yolov8n.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindyolo/yolov8/yolov8-n_500e_mAP372-0e737186-910v2.ckpt) | +| YOLOv8 | S | 8 | 16 | 640x640 | O2 | 172.22s | 251.30 | 509.35 | 44.7% | [yaml](./yolov8s.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindyolo/yolov8/yolov8-s_500e_mAP446-fae4983f-910v2.ckpt) | + +Experiments are tested on Ascend 910 with mindspore 2.5.0 graph mode. + +| model name | scale | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | map | recipe | weight | +| :--------: | :---: | :---: | :--------: | :--------: | :-------: | :-----------: | :-----: | :----: | :---: | :--------------------: | :--------------------------------------------------------------------------------------------------: | +| YOLOv8 | N | 8 | 16 | 640x640 | O2 | 195.63s | 265.13 | 482.78 | 37.2% | [yaml](./yolov8n.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov8/yolov8-n_500e_mAP372-cc07f5bd.ckpt) | +| YOLOv8 | S | 8 | 16 | 640x640 | O2 | 115.60s | 292.68 | 437.34 | 44.6% | [yaml](./yolov8s.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov8/yolov8-s_500e_mAP446-3086f0c9.ckpt) | +| YOLOv8 | M | 8 | 16 | 640x640 | O2 | 185.25s | 383.72 | 333.58 | 50.5% | [yaml](./yolov8m.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov8/yolov8-m_500e_mAP505-8ff7a728.ckpt) | +| YOLOv8 | L | 8 | 16 | 640x640 | O2 | 175.08s | 429.02 | 298.35 | 52.8% | [yaml](./yolov8l.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov8/yolov8-l_500e_mAP528-6e96d6bb.ckpt) | +| YOLOv8 | X | 8 | 16 | 640x640 | O2 | 183.68s | 521.97 | 245.22 | 53.7% | [yaml](./yolov8x.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov8/yolov8-x_500e_mAP537-b958e1c7.ckpt) | + +### Segmentation + +Experiments are tested on Ascend 910 with mindspore 2.5.0 graph mode. + +_coming soon_ + +Experiments are tested on Ascend 910 with mindspore 2.5.0 graph mode. + +| model Name | scale | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | map | mask map | recipe | weight | +| :--------: | :---: | :---: | :--------: | :--------: | :-------: | :-----------: | :-----: | :----: | :---: | :------: | :----------------------------: | :------------------------------------------------------------------------------------------------------------: | +| YOLOv8-seg | X | 8 | 16 | 640x640 | O2 | 183.68s | 641.25 | 199.61 | 52.5% | 42.9% | [yaml](./seg/yolov8x-seg.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov8/yolov8-x-seg_300e_mAP_mask_429-b4920557.ckpt) | + +### Notes + +- map: Accuracy reported on the validation set. +- We refer to the official [YOLOV8](https://github.com/ultralytics/ultralytics) to reproduce the P5 series model. + +## References + + + +[1] Jocher Glenn. Ultralytics YOLOv8. , 2023. diff --git a/mshub_res/assets/mindspore/2.5/yolov8s_coco2017.md b/mshub_res/assets/mindspore/2.5/yolov8s_coco2017.md new file mode 100644 index 0000000..ae8c905 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/yolov8s_coco2017.md @@ -0,0 +1,144 @@ +# yolov8s + +--- + +model-name: yolov8s + +backbone-name: yolov8 + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: COCO2017 + +evaluation: mAP44.7 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: fae4983f + +license: Apache2.0 + +summary: yolov8 is used for cv + +--- + +# YOLOv8 + +## Abstract + +Ultralytics YOLOv8, developed by Ultralytics, is a cutting-edge, state-of-the-art (SOTA) model that builds upon the success of previous YOLO versions and introduces new features and improvements to further boost performance and flexibility. YOLOv8 is designed to be fast, accurate, and easy to use, making it an excellent choice for a wide range of object detection, image segmentation and image classification tasks. + +
+ +
+ +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Quick Start + +Please refer to the [GETTING_STARTED](https://github.com/mindspore-lab/mindyolo/blob/master/GETTING_STARTED.md) in MindYOLO for details. + +### Training + +
+View More + +#### - Distributed Training + +It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + +```shell +# distributed training on multiple Ascend devices +msrun --worker_num=8 --local_worker_num=8 --bind_core=True --log_dir=./yolov8_log python train.py --config ./configs/yolov8/yolov8n.yaml --device_target Ascend --is_parallel True +``` + +**Note:** For more information about msrun configuration, please refer to [here](https://www.mindspore.cn/docs/zh-CN/r2.5.0/model_train/parallel/msrun_launcher.html) + +For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindyolo/blob/master/mindyolo/utils/config.py). + +**Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size. + +#### - Standalone Training + +If you want to train or finetune the model on a smaller dataset without distributed training, please run: + +```shell +# standalone training on a CPU/Ascend device +python train.py --config ./configs/yolov8/yolov8n.yaml --device_target Ascend +``` + +
+ +### Validation and Test + +To validate the accuracy of the trained model, you can use `test.py` and parse the checkpoint path with `--weight`. + +```shell +python test.py --config ./configs/yolov8/yolov8n.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt +``` + +## Performance + +### Detection + +Experiments are tested on Ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | scale | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | map | recipe | weight | +| :--------: | :---: | :---: | :--------: | :--------: | :-------: | :-----------: | :-----: | :----: | :---: | :--------------------: | :----------------------------------------------------------------------------------------------------------------: | +| YOLOv8 | N | 8 | 16 | 640x640 | O2 | 145.89s | 252.79 | 506.35 | 37.3% | [yaml](./yolov8n.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindyolo/yolov8/yolov8-n_500e_mAP372-0e737186-910v2.ckpt) | +| YOLOv8 | S | 8 | 16 | 640x640 | O2 | 172.22s | 251.30 | 509.35 | 44.7% | [yaml](./yolov8s.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindyolo/yolov8/yolov8-s_500e_mAP446-fae4983f-910v2.ckpt) | + +Experiments are tested on Ascend 910 with mindspore 2.5.0 graph mode. + +| model name | scale | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | map | recipe | weight | +| :--------: | :---: | :---: | :--------: | :--------: | :-------: | :-----------: | :-----: | :----: | :---: | :--------------------: | :--------------------------------------------------------------------------------------------------: | +| YOLOv8 | N | 8 | 16 | 640x640 | O2 | 195.63s | 265.13 | 482.78 | 37.2% | [yaml](./yolov8n.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov8/yolov8-n_500e_mAP372-cc07f5bd.ckpt) | +| YOLOv8 | S | 8 | 16 | 640x640 | O2 | 115.60s | 292.68 | 437.34 | 44.6% | [yaml](./yolov8s.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov8/yolov8-s_500e_mAP446-3086f0c9.ckpt) | +| YOLOv8 | M | 8 | 16 | 640x640 | O2 | 185.25s | 383.72 | 333.58 | 50.5% | [yaml](./yolov8m.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov8/yolov8-m_500e_mAP505-8ff7a728.ckpt) | +| YOLOv8 | L | 8 | 16 | 640x640 | O2 | 175.08s | 429.02 | 298.35 | 52.8% | [yaml](./yolov8l.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov8/yolov8-l_500e_mAP528-6e96d6bb.ckpt) | +| YOLOv8 | X | 8 | 16 | 640x640 | O2 | 183.68s | 521.97 | 245.22 | 53.7% | [yaml](./yolov8x.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov8/yolov8-x_500e_mAP537-b958e1c7.ckpt) | + +### Segmentation + +Experiments are tested on Ascend 910 with mindspore 2.5.0 graph mode. + +_coming soon_ + +Experiments are tested on Ascend 910 with mindspore 2.5.0 graph mode. + +| model Name | scale | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | map | mask map | recipe | weight | +| :--------: | :---: | :---: | :--------: | :--------: | :-------: | :-----------: | :-----: | :----: | :---: | :------: | :----------------------------: | :------------------------------------------------------------------------------------------------------------: | +| YOLOv8-seg | X | 8 | 16 | 640x640 | O2 | 183.68s | 641.25 | 199.61 | 52.5% | 42.9% | [yaml](./seg/yolov8x-seg.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolov8/yolov8-x-seg_300e_mAP_mask_429-b4920557.ckpt) | + +### Notes + +- map: Accuracy reported on the validation set. +- We refer to the official [YOLOV8](https://github.com/ultralytics/ultralytics) to reproduce the P5 series model. + +## References + + + +[1] Jocher Glenn. Ultralytics YOLOv8. , 2023. diff --git a/mshub_res/assets/mindspore/2.5/yolox_s_coco2017.md b/mshub_res/assets/mindspore/2.5/yolox_s_coco2017.md new file mode 100644 index 0000000..ba41224 --- /dev/null +++ b/mshub_res/assets/mindspore/2.5/yolox_s_coco2017.md @@ -0,0 +1,133 @@ +# yolox_s + +--- + +model-name: yolox_s + +backbone-name: yolox + +module-type: cv + +fine-tunable: True + +model-version: 2.5 + +train-dataset: COCO2017 + +evaluation: mAP41.0 + +author: MindSpore team + +update-time: 2025-03-10 + +repo-link: + +user-id: MindSpore + +used-for: inference + +mindspore-version: 2.5 + +asset: + +- file-format: ckpt + asset-link: + asset-sha256: cebd0183 + +license: Apache2.0 + +summary: yolox is used for cv + +--- + +# YOLOX + +## Abstract + +YOLOX is a new high-performance detector with some experienced improvements to YOLO series. We switch the YOLO detector to an anchor-free manner and conduct other advanced detection techniques, i.e., a decoupled head and the leading label assignment strategy SimOTA to achieve state-of-the-art results across a large scale range of models: For YOLO-Nano with only 0.91M parameters and 1.08G FLOPs, we get 25.3% AP on COCO, surpassing NanoDet by 1.8% AP; for YOLOv3, one of the most widely used detectors in industry, we boost it to 47.3% AP on COCO, outperforming the current best practice by 3.0% AP; for YOLOX-L with roughly the same amount of parameters as YOLOv4-CSP, YOLOv5-L, we achieve 50.0% AP on COCO at a speed of 68.9 FPS on Tesla V100, exceeding YOLOv5-L by 1.8% AP. Further, we won the 1st Place on Streaming Perception Challenge (Workshop on Autonomous Driving at CVPR 2021) using a single YOLOX-L model. + +
+ +
+ +## Requirements + +| mindspore | ascend driver | firmware | cann toolkit/kernel | +| :-------: | :-----------: | :---------: | :-----------------: | +| 2.5.0 | 24.1.0 | 7.5.0.3.220 | 8.0.0.beta1 | + +## Quick Start + +Please refer to the [GETTING_STARTED](https://github.com/mindspore-lab/mindyolo/blob/master/GETTING_STARTED.md) in MindYOLO for details. + +### Training + +
+View More + +#### - Distributed Training + +It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run + +```shell +# distributed training on multiple Ascend devices +msrun --worker_num=8 --local_worker_num=8 --bind_core=True --log_dir=./yolox_log python train.py --config ./configs/yolox/yolox-s.yaml --device_target Ascend --is_parallel True +``` + +**Note:** For more information about msrun configuration, please refer to [here](https://www.mindspore.cn/docs/zh-CN/r2.5.0/model_train/parallel/msrun_launcher.html) + +For detailed illustration of all hyper-parameters, please refer to [config.py](https://github.com/mindspore-lab/mindyolo/blob/master/mindyolo/utils/config.py). + +**Note:** As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction. + +#### - Standalone Training + +If you want to train or finetune the model on a smaller dataset without distributed training, please firstly run: + +```shell +# standalone 1st stage training on a CPU/Ascend device +python train.py --config ./configs/yolox/yolox-s.yaml --device_target Ascend +``` + +
+ +### Validation and Test + +To validate the accuracy of the trained model, you can use `test.py` and parse the checkpoint path with `--weight`. + +```shell +python test.py --config ./configs/yolox/yolox-s.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt +``` + +## Performance + +Experiments are tested on Ascend 910\* with mindspore 2.5.0 graph mode. + +| model name | scale | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | map | recipe | weight | +| :--------: | :---: | :---: | :--------: | :--------: | :-------: | :-----------: | :-----: | :----: | :---: | :--------------------: | :--------------------------------------------------------------------------------------------------------------: | +| YOLOX | S | 8 | 8 | 640x640 | O2 | 299.01s | 177.65 | 360.26 | 41.0% | [yaml](./yolox-s.yaml) | [weights](https://download-mindspore.osinfra.cn/toolkits/mindyolo/yolox/yolox-s_300e_map407-cebd0183-910v2.ckpt) | + +Experiments are tested on Ascend 910 with mindspore 2.5.0 graph mode. + +| model name | scale | cards | batch size | resolution | jit level | graph compile | ms/step | img/s | map | recipe | weight | +| :--------: | :-------: | :---: | :--------: | :--------: | :-------: | :-----------: | :-----: | :----: | :---: | :----------------------------: | :--------------------------------------------------------------------------------------------------------: | +| YOLOX | N | 8 | 8 | 416x416 | O2 | 202.49s | 138.84 | 460.96 | 24.1% | [yaml](./yolox-nano.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolox/yolox-n_300e_map241-ec9815e3.ckpt) | +| YOLOX | Tiny | 8 | 8 | 416x416 | O2 | 169.71s | 126.85 | 504.53 | 33.3% | [yaml](./yolox-tiny.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolox/yolox-tiny_300e_map333-e5ae3a2e.ckpt) | +| YOLOX | S | 8 | 8 | 640x640 | O2 | 202.46s | 243.99 | 262.31 | 40.7% | [yaml](./yolox-s.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolox/yolox-s_300e_map407-0983e07f.ckpt) | +| YOLOX | M | 8 | 8 | 640x640 | O2 | 212.78s | 267.68 | 239.09 | 46.7% | [yaml](./yolox-m.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolox/yolox-m_300e_map467-1db321ee.ckpt) | +| YOLOX | L | 8 | 8 | 640x640 | O2 | 262.52s | 316.78 | 202.03 | 49.2% | [yaml](./yolox-l.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolox/yolox-l_300e_map492-52a4ab80.ckpt) | +| YOLOX | X | 8 | 8 | 640x640 | O2 | 341.33s | 415.67 | 153.97 | 51.6% | [yaml](./yolox-x.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolox/yolox-x_300e_map516-52216d90.ckpt) | +| YOLOX | Darknet53 | 8 | 8 | 640x640 | O2 | 198.15s | 407.53 | 157.04 | 47.7% | [yaml](./yolox-darknet53.yaml) | [weights](https://download.mindspore.cn/toolkits/mindyolo/yolox/yolox-darknet53_300e_map477-b5fcaba9.ckpt) | + +
+ +### Notes + +- map: Accuracy reported on the validation set. +- We refer to the official [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX) to reproduce the results. + +## References + + + +[1] Zheng Ge. YOLOX: Exceeding YOLO Series in 2021. , 2021. -- Gitee