commit fd82b5c7ebe5b1cacfb8d79144d1ad93589ca3bf
Author: leaf <48828021+leafliber@users.noreply.github.com>
Date: Wed Jun 23 08:58:10 2021 +0800
initialize
diff --git a/.travis/precommit.sh b/.travis/precommit.sh
new file mode 100644
index 0000000..bcbfb2b
--- /dev/null
+++ b/.travis/precommit.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+function abort(){
+ echo "Your commit not fit PaddlePaddle code style" 1>&2
+ echo "Please use pre-commit scripts to auto-format your code" 1>&2
+ exit 1
+}
+
+trap 'abort' 0
+set -e
+cd `dirname $0`
+cd ..
+export PATH=/usr/bin:$PATH
+pre-commit install
+
+if ! pre-commit run -a ; then
+ ls -lh
+ git diff --exit-code
+ exit 1
+fi
+
+trap : 0
diff --git a/.travis/requirements.txt b/.travis/requirements.txt
new file mode 100644
index 0000000..27a340d
--- /dev/null
+++ b/.travis/requirements.txt
@@ -0,0 +1,8 @@
+# add python requirements for unittests here, note install pycocotools
+# directly is not supported in travis ci, it is installed by compiling
+# from source files in unittest.sh
+tqdm
+cython
+shapely
+llvmlite==0.33
+numba==0.50
diff --git a/.travis/unittest.sh b/.travis/unittest.sh
new file mode 100644
index 0000000..e718331
--- /dev/null
+++ b/.travis/unittest.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+abort(){
+ echo "Run unittest failed" 1>&2
+ echo "Please check your code" 1>&2
+ echo " 1. you can run unit tests by 'bash .travis/unittest.sh' locally" 1>&2
+ echo " 2. you can add python requirements in .travis/requirements.txt if you use new requirements in unit tests" 1>&2
+ exit 1
+}
+
+unittest(){
+ if [ $? != 0 ]; then
+ exit 1
+ fi
+ find "./ppdet" -name 'tests' -type d -print0 | \
+ xargs -0 -I{} -n1 bash -c \
+ 'python -m unittest discover -v -s {}'
+}
+
+trap 'abort' 0
+set -e
+
+# install travis python dependencies exclude pycocotools
+if [ -f ".travis/requirements.txt" ]; then
+ pip install -r .travis/requirements.txt
+fi
+
+# install pycocotools
+if [ `pip list | grep pycocotools | wc -l` -eq 0 ]; then
+ # install git if needed
+ if [ -n `which git` ]; then
+ apt-get update
+ apt-get install -y git
+ fi;
+ git clone https://github.com/cocodataset/cocoapi.git
+ cd cocoapi/PythonAPI
+ make install
+ python setup.py install --user
+ cd ../..
+ rm -rf cocoapi
+fi
+
+export PYTHONPATH=`pwd`:$PYTHONPATH
+
+unittest .
+
+trap : 0
diff --git a/configs/cascade_rcnn/README.md b/configs/cascade_rcnn/README.md
new file mode 100644
index 0000000..d93ec4f
--- /dev/null
+++ b/configs/cascade_rcnn/README.md
@@ -0,0 +1,28 @@
+# Cascade R-CNN: High Quality Object Detection and Instance Segmentation
+
+## Model Zoo
+
+| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | Mask AP | 下载 | 配置文件 |
+| :------------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----: | :-----------------------------------------------------: | :-----: |
+| ResNet50-FPN | Cascade Faster | 1 | 1x | ---- | 41.1 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.yml) |
+| ResNet50-FPN | Cascade Mask | 1 | 1x | ---- | 41.8 | 36.3 | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.yml) |
+| ResNet50-vd-SSLDv2-FPN | Cascade Faster | 1 | 1x | ---- | 44.4 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_r50_vd_fpn_ssld_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_1x_coco.yml) |
+| ResNet50-vd-SSLDv2-FPN | Cascade Faster | 1 | 2x | ---- | 45.0 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.yml) |
+| ResNet50-vd-SSLDv2-FPN | Cascade Mask | 1 | 1x | ---- | 44.9 | 39.1 | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml) |
+| ResNet50-vd-SSLDv2-FPN | Cascade Mask | 1 | 2x | ---- | 45.7 | 39.7 | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml) |
+
+
+## Citations
+```
+@article{Cai_2019,
+ title={Cascade R-CNN: High Quality Object Detection and Instance Segmentation},
+ ISSN={1939-3539},
+ url={http://dx.doi.org/10.1109/tpami.2019.2956516},
+ DOI={10.1109/tpami.2019.2956516},
+ journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
+ publisher={Institute of Electrical and Electronics Engineers (IEEE)},
+ author={Cai, Zhaowei and Vasconcelos, Nuno},
+ year={2019},
+ pages={1–1}
+}
+```
diff --git a/configs/cascade_rcnn/_base_/cascade_fpn_reader.yml b/configs/cascade_rcnn/_base_/cascade_fpn_reader.yml
new file mode 100644
index 0000000..cf54ecc
--- /dev/null
+++ b/configs/cascade_rcnn/_base_/cascade_fpn_reader.yml
@@ -0,0 +1,40 @@
+worker_num: 2
+TrainReader:
+ sample_transforms:
+ - Decode: {}
+ - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+ - RandomFlip: {prob: 0.5}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 32, pad_gt: true}
+ batch_size: 1
+ shuffle: true
+ drop_last: true
+
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 32, pad_gt: false}
+ batch_size: 1
+ shuffle: false
+ drop_last: false
+ drop_empty: false
+
+
+TestReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 32, pad_gt: false}
+ batch_size: 1
+ shuffle: false
+ drop_last: false
diff --git a/configs/cascade_rcnn/_base_/cascade_mask_fpn_reader.yml b/configs/cascade_rcnn/_base_/cascade_mask_fpn_reader.yml
new file mode 100644
index 0000000..cf54ecc
--- /dev/null
+++ b/configs/cascade_rcnn/_base_/cascade_mask_fpn_reader.yml
@@ -0,0 +1,40 @@
+worker_num: 2
+TrainReader:
+ sample_transforms:
+ - Decode: {}
+ - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+ - RandomFlip: {prob: 0.5}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 32, pad_gt: true}
+ batch_size: 1
+ shuffle: true
+ drop_last: true
+
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 32, pad_gt: false}
+ batch_size: 1
+ shuffle: false
+ drop_last: false
+ drop_empty: false
+
+
+TestReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 32, pad_gt: false}
+ batch_size: 1
+ shuffle: false
+ drop_last: false
diff --git a/configs/cascade_rcnn/_base_/cascade_mask_rcnn_r50_fpn.yml b/configs/cascade_rcnn/_base_/cascade_mask_rcnn_r50_fpn.yml
new file mode 100644
index 0000000..ea2937b
--- /dev/null
+++ b/configs/cascade_rcnn/_base_/cascade_mask_rcnn_r50_fpn.yml
@@ -0,0 +1,97 @@
+architecture: CascadeRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+
+CascadeRCNN:
+ backbone: ResNet
+ neck: FPN
+ rpn_head: RPNHead
+ bbox_head: CascadeHead
+ mask_head: MaskHead
+ # post process
+ bbox_post_process: BBoxPostProcess
+ mask_post_process: MaskPostProcess
+
+ResNet:
+ # index 0 stands for res2
+ depth: 50
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+
+FPN:
+ out_channel: 256
+
+RPNHead:
+ anchor_generator:
+ aspect_ratios: [0.5, 1.0, 2.0]
+ anchor_sizes: [[32], [64], [128], [256], [512]]
+ strides: [4, 8, 16, 32, 64]
+ rpn_target_assign:
+ batch_size_per_im: 256
+ fg_fraction: 0.5
+ negative_overlap: 0.3
+ positive_overlap: 0.7
+ use_random: True
+ train_proposal:
+ min_size: 0.0
+ nms_thresh: 0.7
+ pre_nms_top_n: 2000
+ post_nms_top_n: 2000
+ topk_after_collect: True
+ test_proposal:
+ min_size: 0.0
+ nms_thresh: 0.7
+ pre_nms_top_n: 1000
+ post_nms_top_n: 1000
+
+
+CascadeHead:
+ head: CascadeTwoFCHead
+ roi_extractor:
+ resolution: 7
+ sampling_ratio: 0
+ aligned: True
+ bbox_assigner: BBoxAssigner
+
+BBoxAssigner:
+ batch_size_per_im: 512
+ bg_thresh: 0.5
+ fg_thresh: 0.5
+ fg_fraction: 0.25
+ cascade_iou: [0.5, 0.6, 0.7]
+ use_random: True
+
+CascadeTwoFCHead:
+ out_channel: 1024
+
+BBoxPostProcess:
+ decode:
+ name: RCNNBox
+ prior_box_var: [30.0, 30.0, 15.0, 15.0]
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 100
+ score_threshold: 0.05
+ nms_threshold: 0.5
+
+
+MaskHead:
+ head: MaskFeat
+ roi_extractor:
+ resolution: 14
+ sampling_ratio: 0
+ aligned: True
+ mask_assigner: MaskAssigner
+ share_bbox_feat: False
+
+MaskFeat:
+ num_convs: 4
+ out_channel: 256
+
+MaskAssigner:
+ mask_resolution: 28
+
+MaskPostProcess:
+ binary_thresh: 0.5
diff --git a/configs/cascade_rcnn/_base_/cascade_rcnn_r50_fpn.yml b/configs/cascade_rcnn/_base_/cascade_rcnn_r50_fpn.yml
new file mode 100644
index 0000000..c5afe77
--- /dev/null
+++ b/configs/cascade_rcnn/_base_/cascade_rcnn_r50_fpn.yml
@@ -0,0 +1,75 @@
+architecture: CascadeRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+
+CascadeRCNN:
+ backbone: ResNet
+ neck: FPN
+ rpn_head: RPNHead
+ bbox_head: CascadeHead
+ # post process
+ bbox_post_process: BBoxPostProcess
+
+ResNet:
+ # index 0 stands for res2
+ depth: 50
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+
+FPN:
+ out_channel: 256
+
+RPNHead:
+ anchor_generator:
+ aspect_ratios: [0.5, 1.0, 2.0]
+ anchor_sizes: [[32], [64], [128], [256], [512]]
+ strides: [4, 8, 16, 32, 64]
+ rpn_target_assign:
+ batch_size_per_im: 256
+ fg_fraction: 0.5
+ negative_overlap: 0.3
+ positive_overlap: 0.7
+ use_random: True
+ train_proposal:
+ min_size: 0.0
+ nms_thresh: 0.7
+ pre_nms_top_n: 2000
+ post_nms_top_n: 2000
+ topk_after_collect: True
+ test_proposal:
+ min_size: 0.0
+ nms_thresh: 0.7
+ pre_nms_top_n: 1000
+ post_nms_top_n: 1000
+
+
+CascadeHead:
+ head: CascadeTwoFCHead
+ roi_extractor:
+ resolution: 7
+ sampling_ratio: 0
+ aligned: True
+ bbox_assigner: BBoxAssigner
+
+BBoxAssigner:
+ batch_size_per_im: 512
+ bg_thresh: 0.5
+ fg_thresh: 0.5
+ fg_fraction: 0.25
+ cascade_iou: [0.5, 0.6, 0.7]
+ use_random: True
+
+CascadeTwoFCHead:
+ out_channel: 1024
+
+BBoxPostProcess:
+ decode:
+ name: RCNNBox
+ prior_box_var: [30.0, 30.0, 15.0, 15.0]
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 100
+ score_threshold: 0.05
+ nms_threshold: 0.5
diff --git a/configs/cascade_rcnn/_base_/optimizer_1x.yml b/configs/cascade_rcnn/_base_/optimizer_1x.yml
new file mode 100644
index 0000000..63f898e
--- /dev/null
+++ b/configs/cascade_rcnn/_base_/optimizer_1x.yml
@@ -0,0 +1,19 @@
+epoch: 12
+
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [8, 11]
+ - !LinearWarmup
+ start_factor: 0.001
+ steps: 1000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0001
+ type: L2
diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.yml b/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.yml
new file mode 100644
index 0000000..b2c7e53
--- /dev/null
+++ b/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.yml
@@ -0,0 +1,8 @@
+_BASE_: [
+ '../datasets/coco_instance.yml',
+ '../runtime.yml',
+ '_base_/optimizer_1x.yml',
+ '_base_/cascade_mask_rcnn_r50_fpn.yml',
+ '_base_/cascade_mask_fpn_reader.yml',
+]
+weights: output/cascade_mask_rcnn_r50_fpn_1x_coco/model_final
diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml b/configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml
new file mode 100644
index 0000000..0ab507c
--- /dev/null
+++ b/configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml
@@ -0,0 +1,18 @@
+_BASE_: [
+ '../datasets/coco_instance.yml',
+ '../runtime.yml',
+ '_base_/optimizer_1x.yml',
+ '_base_/cascade_mask_rcnn_r50_fpn.yml',
+ '_base_/cascade_mask_fpn_reader.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
+weights: output/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco/model_final
+
+ResNet:
+ depth: 50
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+ lr_mult_list: [0.05, 0.05, 0.1, 0.15]
diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml b/configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml
new file mode 100644
index 0000000..736ba2e
--- /dev/null
+++ b/configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml
@@ -0,0 +1,29 @@
+_BASE_: [
+ '../datasets/coco_instance.yml',
+ '../runtime.yml',
+ '_base_/optimizer_1x.yml',
+ '_base_/cascade_mask_rcnn_r50_fpn.yml',
+ '_base_/cascade_mask_fpn_reader.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
+weights: output/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco/model_final
+
+ResNet:
+ depth: 50
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+ lr_mult_list: [0.05, 0.05, 0.1, 0.15]
+
+epoch: 24
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [12, 22]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
diff --git a/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.yml b/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.yml
new file mode 100644
index 0000000..b2cc799
--- /dev/null
+++ b/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.yml
@@ -0,0 +1,8 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_1x.yml',
+ '_base_/cascade_rcnn_r50_fpn.yml',
+ '_base_/cascade_fpn_reader.yml',
+]
+weights: output/cascade_rcnn_r50_fpn_1x_coco/model_final
diff --git a/configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_1x_coco.yml b/configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_1x_coco.yml
new file mode 100644
index 0000000..905adbd
--- /dev/null
+++ b/configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_1x_coco.yml
@@ -0,0 +1,18 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_1x.yml',
+ '_base_/cascade_rcnn_r50_fpn.yml',
+ '_base_/cascade_fpn_reader.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
+weights: output/cascade_rcnn_r50_vd_fpn_ssld_1x_coco/model_final
+
+ResNet:
+ depth: 50
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+ lr_mult_list: [0.05, 0.05, 0.1, 0.15]
diff --git a/configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.yml b/configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.yml
new file mode 100644
index 0000000..a627214
--- /dev/null
+++ b/configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.yml
@@ -0,0 +1,29 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_1x.yml',
+ '_base_/cascade_rcnn_r50_fpn.yml',
+ '_base_/cascade_fpn_reader.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
+weights: output/cascade_rcnn_r50_vd_fpn_ssld_2x_coco/model_final
+
+ResNet:
+ depth: 50
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+ lr_mult_list: [0.05, 0.05, 0.1, 0.15]
+
+epoch: 24
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [12, 22]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
diff --git a/configs/datasets/coco_detection.yml b/configs/datasets/coco_detection.yml
new file mode 100644
index 0000000..7a62c3b
--- /dev/null
+++ b/configs/datasets/coco_detection.yml
@@ -0,0 +1,19 @@
+metric: COCO
+num_classes: 80
+
+TrainDataset:
+ !COCODataSet
+ image_dir: train2017
+ anno_path: annotations/instances_train2017.json
+ dataset_dir: dataset/coco
+ data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+ !COCODataSet
+ image_dir: val2017
+ anno_path: annotations/instances_val2017.json
+ dataset_dir: dataset/coco
+
+TestDataset:
+ !ImageFolder
+ anno_path: annotations/instances_val2017.json
diff --git a/configs/datasets/coco_instance.yml b/configs/datasets/coco_instance.yml
new file mode 100644
index 0000000..5eaf767
--- /dev/null
+++ b/configs/datasets/coco_instance.yml
@@ -0,0 +1,19 @@
+metric: COCO
+num_classes: 80
+
+TrainDataset:
+ !COCODataSet
+ image_dir: train2017
+ anno_path: annotations/instances_train2017.json
+ dataset_dir: dataset/coco
+ data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd']
+
+EvalDataset:
+ !COCODataSet
+ image_dir: val2017
+ anno_path: annotations/instances_val2017.json
+ dataset_dir: dataset/coco
+
+TestDataset:
+ !ImageFolder
+ anno_path: annotations/instances_val2017.json
diff --git a/configs/datasets/dota.yml b/configs/datasets/dota.yml
new file mode 100644
index 0000000..2953a79
--- /dev/null
+++ b/configs/datasets/dota.yml
@@ -0,0 +1,20 @@
+metric: COCO
+num_classes: 15
+
+TrainDataset:
+ !COCODataSet
+ image_dir: trainval_split/images
+ anno_path: trainval_split/s2anet_trainval_paddle_coco.json
+ dataset_dir: dataset/DOTA_1024_s2anet
+ data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_rbox']
+
+EvalDataset:
+ !COCODataSet
+ image_dir: trainval_split/images
+ anno_path: trainval_split/s2anet_trainval_paddle_coco.json
+ dataset_dir: dataset/DOTA_1024_s2anet/
+
+TestDataset:
+ !ImageFolder
+ anno_path: trainval_split/s2anet_trainval_paddle_coco.json
+ dataset_dir: dataset/DOTA_1024_s2anet/
diff --git a/configs/datasets/roadsign_voc.yml b/configs/datasets/roadsign_voc.yml
new file mode 100644
index 0000000..ddbfc78
--- /dev/null
+++ b/configs/datasets/roadsign_voc.yml
@@ -0,0 +1,21 @@
+metric: VOC
+map_type: integral
+num_classes: 4
+
+TrainDataset:
+ !VOCDataSet
+ dataset_dir: dataset/roadsign_voc
+ anno_path: train.txt
+ label_list: label_list.txt
+ data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
+
+EvalDataset:
+ !VOCDataSet
+ dataset_dir: dataset/roadsign_voc
+ anno_path: valid.txt
+ label_list: label_list.txt
+ data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
+
+TestDataset:
+ !ImageFolder
+ anno_path: dataset/roadsign_voc/label_list.txt
diff --git a/configs/datasets/voc.yml b/configs/datasets/voc.yml
new file mode 100644
index 0000000..9fb492f
--- /dev/null
+++ b/configs/datasets/voc.yml
@@ -0,0 +1,21 @@
+metric: VOC
+map_type: 11point
+num_classes: 20
+
+TrainDataset:
+ !VOCDataSet
+ dataset_dir: dataset/voc
+ anno_path: trainval.txt
+ label_list: label_list.txt
+ data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
+
+EvalDataset:
+ !VOCDataSet
+ dataset_dir: dataset/voc
+ anno_path: test.txt
+ label_list: label_list.txt
+ data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
+
+TestDataset:
+ !ImageFolder
+ anno_path: dataset/voc/label_list.txt
diff --git a/configs/datasets/wider_face.yml b/configs/datasets/wider_face.yml
new file mode 100644
index 0000000..cc01378
--- /dev/null
+++ b/configs/datasets/wider_face.yml
@@ -0,0 +1,20 @@
+metric: WiderFace
+num_classes: 1
+
+TrainDataset:
+ !WIDERFaceDataSet
+ dataset_dir: dataset/wider_face
+ anno_path: wider_face_split/wider_face_train_bbx_gt.txt
+ image_dir: WIDER_train/images
+ data_fields: ['image', 'gt_bbox', 'gt_class']
+
+EvalDataset:
+ !WIDERFaceDataSet
+ dataset_dir: dataset/wider_face
+ anno_path: wider_face_split/wider_face_val_bbx_gt.txt
+ image_dir: WIDER_val/images
+ data_fields: ['image']
+
+TestDataset:
+ !ImageFolder
+ use_default_label: true
diff --git a/configs/dcn/README.md b/configs/dcn/README.md
new file mode 100644
index 0000000..9c8613f
--- /dev/null
+++ b/configs/dcn/README.md
@@ -0,0 +1,37 @@
+### Deformable ConvNets v2
+
+| 骨架网络 | 网络类型 | 卷积 | 每张GPU图片个数 | 学习率策略 |推理时间(fps)| Box AP | Mask AP | 下载 | 配置文件 |
+| :------------------- | :------------- | :-----: |:--------: | :-----: | :-----------: |:----: | :-----: | :----------------------------------------------------------: | :----: |
+| ResNet50-FPN | Faster | c3-c5 | 1 | 1x | - | 42.1 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/dcn/faster_rcnn_dcn_r50_fpn_1x_coco.yml) |
+| ResNet50-vd-FPN | Faster | c3-c5 | 1 | 1x | - | 42.7 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r50_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_1x_coco.yml) |
+| ResNet50-vd-FPN | Faster | c3-c5 | 1 | 2x | - | 43.7 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r50_vd_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_2x_coco.yml) |
+| ResNet101-vd-FPN | Faster | c3-c5 | 1 | 1x | - | 45.1 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r101_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/dcn/faster_rcnn_dcn_r101_vd_fpn_1x_coco.yml) |
+| ResNeXt101-vd-FPN | Faster | c3-c5 | 1 | 1x | - | 46.5 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.pdparams) |[配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/dcn/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml) |
+| ResNet50-FPN | Mask | c3-c5 | 1 | 1x | - | 42.7 | 38.4 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/dcn/mask_rcnn_dcn_r50_fpn_1x_coco.yml) |
+| ResNet50-vd-FPN | Mask | c3-c5 | 1 | 2x | - | 44.6 | 39.8 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_r50_vd_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/dcn/mask_rcnn_dcn_r50_vd_fpn_2x_coco.yml) |
+| ResNet101-vd-FPN | Mask | c3-c5 | 1 | 1x | - | 45.6 | 40.6 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_r101_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/dcn/mask_rcnn_dcn_r101_vd_fpn_1x_coco.yml) |
+| ResNeXt101-vd-FPN | Mask | c3-c5 | 1 | 1x | - | 47.3 | 42.0 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/dcn/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml) |
+| ResNet50-FPN | Cascade Faster | c3-c5 | 1 | 1x | - | 42.1 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_dcn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/dcn/cascade_rcnn_dcn_r50_fpn_1x_coco.yml) |
+| ResNeXt101-vd-FPN | Cascade Faster | c3-c5 | 1 | 1x | - | 48.8 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml) |
+
+
+**注意事项:**
+
+- Deformable卷积网络v2(dcn_v2)参考自论文[Deformable ConvNets v2](https://arxiv.org/abs/1811.11168).
+- `c3-c5`意思是在resnet模块的3到5阶段增加`dcn`.
+
+## Citations
+```
+@inproceedings{dai2017deformable,
+ title={Deformable Convolutional Networks},
+ author={Dai, Jifeng and Qi, Haozhi and Xiong, Yuwen and Li, Yi and Zhang, Guodong and Hu, Han and Wei, Yichen},
+ booktitle={Proceedings of the IEEE international conference on computer vision},
+ year={2017}
+}
+@article{zhu2018deformable,
+ title={Deformable ConvNets v2: More Deformable, Better Results},
+ author={Zhu, Xizhou and Hu, Han and Lin, Stephen and Dai, Jifeng},
+ journal={arXiv preprint arXiv:1811.11168},
+ year={2018}
+}
+```
diff --git a/configs/dcn/cascade_rcnn_dcn_r50_fpn_1x_coco.yml b/configs/dcn/cascade_rcnn_dcn_r50_fpn_1x_coco.yml
new file mode 100644
index 0000000..9f2738f
--- /dev/null
+++ b/configs/dcn/cascade_rcnn_dcn_r50_fpn_1x_coco.yml
@@ -0,0 +1,16 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '../cascade_rcnn/_base_/optimizer_1x.yml',
+ '../cascade_rcnn/_base_/cascade_rcnn_r50_fpn.yml',
+ '../cascade_rcnn/_base_/cascade_fpn_reader.yml',
+]
+weights: output/cascade_rcnn_dcn_r50_fpn_1x_coco/model_final
+
+ResNet:
+ depth: 50
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+ dcn_v2_stages: [1,2,3]
diff --git a/configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml b/configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml
new file mode 100644
index 0000000..4180919
--- /dev/null
+++ b/configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml
@@ -0,0 +1,16 @@
+_BASE_: [
+ 'cascade_rcnn_dcn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
+weights: output/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final
+
+ResNet:
+ depth: 101
+ groups: 64
+ base_width: 4
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+ dcn_v2_stages: [1,2,3]
diff --git a/configs/dcn/faster_rcnn_dcn_r101_vd_fpn_1x_coco.yml b/configs/dcn/faster_rcnn_dcn_r101_vd_fpn_1x_coco.yml
new file mode 100644
index 0000000..274c171
--- /dev/null
+++ b/configs/dcn/faster_rcnn_dcn_r101_vd_fpn_1x_coco.yml
@@ -0,0 +1,15 @@
+_BASE_: [
+ 'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
+weights: output/faster_rcnn_dcn_r101_vd_fpn_1x_coco/model_final
+
+ResNet:
+ # index 0 stands for res2
+ depth: 101
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+ dcn_v2_stages: [1,2,3]
diff --git a/configs/dcn/faster_rcnn_dcn_r50_fpn_1x_coco.yml b/configs/dcn/faster_rcnn_dcn_r50_fpn_1x_coco.yml
new file mode 100644
index 0000000..1cd02ac
--- /dev/null
+++ b/configs/dcn/faster_rcnn_dcn_r50_fpn_1x_coco.yml
@@ -0,0 +1,16 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '../faster_rcnn/_base_/optimizer_1x.yml',
+ '../faster_rcnn/_base_/faster_rcnn_r50_fpn.yml',
+ '../faster_rcnn/_base_/faster_fpn_reader.yml',
+]
+weights: output/faster_rcnn_dcn_r50_fpn_1x_coco/model_final
+
+ResNet:
+ depth: 50
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+ dcn_v2_stages: [1,2,3]
diff --git a/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_1x_coco.yml b/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_1x_coco.yml
new file mode 100644
index 0000000..735edbb
--- /dev/null
+++ b/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_1x_coco.yml
@@ -0,0 +1,15 @@
+_BASE_: [
+ 'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
+weights: output/faster_rcnn_dcn_r50_vd_fpn_2x_coco/model_final
+
+ResNet:
+ # index 0 stands for res2
+ depth: 50
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+ dcn_v2_stages: [1,2,3]
diff --git a/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_2x_coco.yml b/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_2x_coco.yml
new file mode 100644
index 0000000..685d967
--- /dev/null
+++ b/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_2x_coco.yml
@@ -0,0 +1,26 @@
+_BASE_: [
+ 'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
+weights: output/faster_rcnn_dcn_r50_vd_fpn_2x_coco/model_final
+
+ResNet:
+ # index 0 stands for res2
+ depth: 50
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+ dcn_v2_stages: [1,2,3]
+
+epoch: 24
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [16, 22]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
diff --git a/configs/dcn/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml b/configs/dcn/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml
new file mode 100644
index 0000000..68fef48
--- /dev/null
+++ b/configs/dcn/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml
@@ -0,0 +1,17 @@
+_BASE_: [
+ 'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
+weights: output/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final
+
+ResNet:
+ # for ResNeXt: groups, base_width, base_channels
+ depth: 101
+ groups: 64
+ base_width: 4
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+ dcn_v2_stages: [1,2,3]
diff --git a/configs/dcn/mask_rcnn_dcn_r101_vd_fpn_1x_coco.yml b/configs/dcn/mask_rcnn_dcn_r101_vd_fpn_1x_coco.yml
new file mode 100644
index 0000000..930bd89
--- /dev/null
+++ b/configs/dcn/mask_rcnn_dcn_r101_vd_fpn_1x_coco.yml
@@ -0,0 +1,15 @@
+_BASE_: [
+ 'mask_rcnn_dcn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
+weights: output/mask_rcnn_dcn_r101_vd_fpn_1x_coco/model_final
+
+ResNet:
+ # index 0 stands for res2
+ depth: 101
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+ dcn_v2_stages: [1,2,3]
diff --git a/configs/dcn/mask_rcnn_dcn_r50_fpn_1x_coco.yml b/configs/dcn/mask_rcnn_dcn_r50_fpn_1x_coco.yml
new file mode 100644
index 0000000..b14a1ed
--- /dev/null
+++ b/configs/dcn/mask_rcnn_dcn_r50_fpn_1x_coco.yml
@@ -0,0 +1,16 @@
+_BASE_: [
+ '../datasets/coco_instance.yml',
+ '../runtime.yml',
+ '../mask_rcnn/_base_/optimizer_1x.yml',
+ '../mask_rcnn/_base_/mask_rcnn_r50_fpn.yml',
+ '../mask_rcnn/_base_/mask_fpn_reader.yml',
+]
+weights: output/mask_rcnn_dcn_r50_fpn_1x_coco/model_final
+
+ResNet:
+ depth: 50
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+ dcn_v2_stages: [1,2,3]
diff --git a/configs/dcn/mask_rcnn_dcn_r50_vd_fpn_2x_coco.yml b/configs/dcn/mask_rcnn_dcn_r50_vd_fpn_2x_coco.yml
new file mode 100644
index 0000000..d36b5f5
--- /dev/null
+++ b/configs/dcn/mask_rcnn_dcn_r50_vd_fpn_2x_coco.yml
@@ -0,0 +1,26 @@
+_BASE_: [
+ 'mask_rcnn_dcn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
+weights: output/mask_rcnn_dcn_r50_vd_fpn_2x_coco/model_final
+
+ResNet:
+ # index 0 stands for res2
+ depth: 50
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+ dcn_v2_stages: [1,2,3]
+
+epoch: 24
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [16, 22]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
diff --git a/configs/dcn/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml b/configs/dcn/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml
new file mode 100644
index 0000000..8e7857c
--- /dev/null
+++ b/configs/dcn/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml
@@ -0,0 +1,17 @@
+_BASE_: [
+ 'mask_rcnn_dcn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
+weights: output/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final
+
+ResNet:
+ # for ResNeXt: groups, base_width, base_channels
+ depth: 101
+ variant: d
+ groups: 64
+ base_width: 4
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+ dcn_v2_stages: [1,2,3]
diff --git a/configs/dota/README.md b/configs/dota/README.md
new file mode 100644
index 0000000..3fe6bd8
--- /dev/null
+++ b/configs/dota/README.md
@@ -0,0 +1,125 @@
+# S2ANet模型
+
+## 内容
+- [简介](#简介)
+- [DOTA数据集](#DOTA数据集)
+- [模型库](#模型库)
+- [训练说明](#训练说明)
+
+## 简介
+
+[S2ANet](https://arxiv.org/pdf/2008.09397.pdf)是用于检测旋转框的模型,要求使用PaddlePaddle 2.0.1(可使用pip安装) 或适当的[develop版本](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/install/Tables.html#whl-release)。
+
+
+## DOTA数据集
+[DOTA Dataset]是航空影像中物体检测的数据集,包含2806张图像,每张图像4000*4000分辨率。
+
+| 数据版本 | 类别数 | 图像数 | 图像尺寸 | 实例数 | 标注方式 |
+|:--------:|:-------:|:---------:|:---------:| :---------:| :------------: |
+| v1.0 | 15 | 2806 | 800~4000 | 118282 | OBB + HBB |
+| v1.5 | 16 | 2806 | 800~4000 | 400000 | OBB + HBB |
+
+注:OBB标注方式是指标注任意四边形;顶点按顺时针顺序排列。HBB标注方式是指标注示例的外接矩形。
+
+DOTA数据集中总共有2806张图像,其中1411张图像作为训练集,458张图像作为评估集,剩余937张图像作为测试集。
+
+如果需要切割图像数据,请参考[DOTA_devkit](https://github.com/CAPTAIN-WHU/DOTA_devkit) 。
+
+设置`crop_size=1024, stride=824, gap=200`参数切割数据后,训练集15749张图像,评估集5297张图像,测试集10833张图像。
+
+## 模型库
+
+### S2ANet模型
+
+| 模型 | GPU个数 | Conv类型 | mAP | 模型下载 | 配置文件 |
+|:-----------:|:-------:|:----------:|:--------:| :----------:| :---------: |
+| S2ANet | 8 | Conv | 71.42 | [model](https://paddledet.bj.bcebos.com/models/s2anet_conv_1x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/dota/s2anet_conv_1x_dota.yml) |
+
+**注意:**这里使用`multiclass_nms`,与原作者使用nms略有不同,精度相比原始论文中高0.15 (71.27-->71.42)。
+
+## 训练说明
+
+### 1. 旋转框IOU计算OP
+
+旋转框IOU计算OP[ext_op](../../ppdet/ext_op)是参考Paddle[自定义外部算子](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/07_new_op/new_custom_op.html) 的方式开发。
+
+若使用旋转框IOU计算OP,需要环境满足:
+- PaddlePaddle >= 2.0.1
+- GCC == 8.2
+
+推荐使用docker镜像[paddle:2.0.1-gpu-cuda10.1-cudnn7](registry.baidubce.com/paddlepaddle/paddle:2.0.1-gpu-cuda10.1-cudnn7)。
+
+执行如下命令下载镜像并启动容器:
+```
+sudo nvidia-docker run -it --name paddle_s2anet -v $PWD:/paddle --network=host registry.baidubce.com/paddlepaddle/paddle:2.0.1-gpu-cuda10.1-cudnn7 /bin/bash
+```
+
+镜像中paddle2.0.1已安装好,进入python3.7,执行如下代码检查paddle安装是否正常:
+```
+import paddle
+print(paddle.__version__)
+paddle.utils.run_check()
+```
+
+进入到`ppdet/ext_op`文件夹,安装:
+```
+python3.7 setup.py install
+```
+
+Windows环境请按照如下步骤安装:
+
+(1)准备Visual Studio (版本需要>=Visual Studio 2015 update3),这里以VS2017为例;
+
+(2)点击开始-->Visual Studio 2017-->适用于 VS 2017 的x64本机工具命令提示;
+
+(3)设置环境变量:`set DISTUTILS_USE_SDK=1`
+
+(4)进入`PaddleDetection/ppdet/ext_op`目录,通过`python3.7 setup.py install`命令进行安装。
+
+安装完成后,测试自定义op是否可以正常编译以及计算结果:
+```
+cd PaddleDetecetion/ppdet/ext_op
+python3.7 test.py
+```
+
+### 2. 数据格式
+DOTA 数据集中实例是按照任意四边形标注,在进行训练模型前,需要参考[DOTA2COCO](https://github.com/CAPTAIN-WHU/DOTA_devkit/blob/master/DOTA2COCO.py) 转换成`[xc, yc, bow_w, bow_h, angle]`格式,并以coco数据格式存储。
+
+## 评估
+
+执行如下命令,会在`output_dir`文件夹下将每个图像预测结果保存到同文件夹名的txt文本中。
+```
+python3.7 tools/infer.py -c configs/dota/s2anet_1x_dota.yml -o weights=./weights/s2anet_1x_dota.pdparams --infer_dir=dota_test_images --draw_threshold=0.05 --save_txt=True --output_dir=output
+```
+
+
+请参考[DOTA_devkit](https://github.com/CAPTAIN-WHU/DOTA_devkit) 生成评估文件,评估文件格式请参考[DOTA Test](http://captain.whu.edu.cn/DOTAweb/tasks.html) ,生成zip文件,每个类一个txt文件,txt文件中每行格式为:`image_id score x1 y1 x2 y2 x3 y3 x4 y4`,提交服务器进行评估。
+
+## 预测部署
+
+Paddle中`multiclass_nms`算子的输入支持四边形输入,因此部署时可以不不需要依赖旋转框IOU计算算子。
+
+```bash
+# 预测
+CUDA_VISIBLE_DEVICES=0 python tools/infer.py -c configs/dota/s2anet_1x_dota.yml -o weights=model.pdparams --infer_img=demo/P0072__1.0__0___0.png
+```
+
+
+## Citations
+```
+@article{han2021align,
+ author={J. {Han} and J. {Ding} and J. {Li} and G. -S. {Xia}},
+ journal={IEEE Transactions on Geoscience and Remote Sensing},
+ title={Align Deep Features for Oriented Object Detection},
+ year={2021},
+ pages={1-11},
+ doi={10.1109/TGRS.2021.3062048}}
+
+@inproceedings{xia2018dota,
+ title={DOTA: A large-scale dataset for object detection in aerial images},
+ author={Xia, Gui-Song and Bai, Xiang and Ding, Jian and Zhu, Zhen and Belongie, Serge and Luo, Jiebo and Datcu, Mihai and Pelillo, Marcello and Zhang, Liangpei},
+ booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
+ pages={3974--3983},
+ year={2018}
+}
+```
diff --git a/configs/dota/_base_/s2anet.yml b/configs/dota/_base_/s2anet.yml
new file mode 100644
index 0000000..f4e4974
--- /dev/null
+++ b/configs/dota/_base_/s2anet.yml
@@ -0,0 +1,55 @@
+architecture: S2ANet
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+weights: output/s2anet_r50_fpn_1x_dota/model_final.pdparams
+
+
+# Model Achitecture
+S2ANet:
+ backbone: ResNet
+ neck: FPN
+ s2anet_head: S2ANetHead
+ s2anet_bbox_post_process: S2ANetBBoxPostProcess
+
+ResNet:
+ depth: 50
+ norm_type: bn
+ return_idx: [1,2,3]
+ num_stages: 4
+
+FPN:
+ in_channels: [256, 512, 1024]
+ out_channel: 256
+ spatial_scales: [0.25, 0.125, 0.0625]
+ has_extra_convs: True
+ extra_stage: 2
+ relu_before_extra_convs: False
+
+S2ANetHead:
+ anchor_strides: [8, 16, 32, 64, 128]
+ anchor_scales: [4]
+ anchor_ratios: [1.0]
+ anchor_assign: RBoxAssigner
+ stacked_convs: 2
+ feat_in: 256
+ feat_out: 256
+ num_classes: 15
+ align_conv_type: 'Conv' # AlignConv Conv
+ align_conv_size: 3
+ use_sigmoid_cls: True
+
+RBoxAssigner:
+ pos_iou_thr: 0.5
+ neg_iou_thr: 0.4
+ min_iou_thr: 0.0
+ ignore_iof_thr: -2
+
+S2ANetBBoxPostProcess:
+ nms_pre: 2000
+ min_bbox_size: 0.0
+ nms:
+ name: MultiClassNMS
+ keep_top_k: -1
+ score_threshold: 0.05
+ nms_threshold: 0.1
+ normalized: False
+ #background_label: -1
diff --git a/configs/dota/_base_/s2anet_optimizer_1x.yml b/configs/dota/_base_/s2anet_optimizer_1x.yml
new file mode 100644
index 0000000..65f794d
--- /dev/null
+++ b/configs/dota/_base_/s2anet_optimizer_1x.yml
@@ -0,0 +1,20 @@
+epoch: 12
+
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [7, 10]
+ - !LinearWarmup
+ start_factor: 0.3333333333333333
+ steps: 500
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0001
+ type: L2
+ clip_grad_by_norm: 35
diff --git a/configs/dota/_base_/s2anet_reader.yml b/configs/dota/_base_/s2anet_reader.yml
new file mode 100644
index 0000000..c3df7a0
--- /dev/null
+++ b/configs/dota/_base_/s2anet_reader.yml
@@ -0,0 +1,42 @@
+worker_num: 0
+TrainReader:
+ sample_transforms:
+ - Decode: {}
+ - Rbox2Poly: {}
+ # Resize can process rbox
+ - Resize: {target_size: [1024, 1024], interp: 2, keep_ratio: False}
+ - RandomFlip: {prob: 0.5}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - RboxPadBatch: {pad_to_stride: 32, pad_gt: true}
+ batch_size: 1
+ shuffle: true
+ drop_last: true
+
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {interp: 2, target_size: [1024, 1024], keep_ratio: True}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - RboxPadBatch: {pad_to_stride: 32, pad_gt: false}
+ batch_size: 1
+ shuffle: false
+ drop_last: false
+ drop_empty: false
+
+
+TestReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {interp: 2, target_size: [1024, 1024], keep_ratio: True}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - RboxPadBatch: {pad_to_stride: 32, pad_gt: false}
+ batch_size: 1
+ shuffle: false
+ drop_last: false
diff --git a/configs/dota/s2anet_1x_dota.yml b/configs/dota/s2anet_1x_dota.yml
new file mode 100644
index 0000000..d480c1c
--- /dev/null
+++ b/configs/dota/s2anet_1x_dota.yml
@@ -0,0 +1,8 @@
+_BASE_: [
+ '../datasets/dota.yml',
+ '../runtime.yml',
+ '_base_/s2anet_optimizer_1x.yml',
+ '_base_/s2anet.yml',
+ '_base_/s2anet_reader.yml',
+]
+weights: output/s2anet_1x_dota/model_final
diff --git a/configs/dota/s2anet_conv_1x_dota.yml b/configs/dota/s2anet_conv_1x_dota.yml
new file mode 100644
index 0000000..60931b1
--- /dev/null
+++ b/configs/dota/s2anet_conv_1x_dota.yml
@@ -0,0 +1,21 @@
+_BASE_: [
+ '../datasets/dota.yml',
+ '../runtime.yml',
+ '_base_/s2anet_optimizer_1x.yml',
+ '_base_/s2anet.yml',
+ '_base_/s2anet_reader.yml',
+]
+weights: output/s2anet_1x_dota/model_final
+
+S2ANetHead:
+ anchor_strides: [8, 16, 32, 64, 128]
+ anchor_scales: [4]
+ anchor_ratios: [1.0]
+ anchor_assign: RBoxAssigner
+ stacked_convs: 2
+ feat_in: 256
+ feat_out: 256
+ num_classes: 15
+ align_conv_type: 'Conv' # AlignConv Conv
+ align_conv_size: 3
+ use_sigmoid_cls: True
diff --git a/configs/face_detection/README.md b/configs/face_detection/README.md
new file mode 100644
index 0000000..3f0fe24
--- /dev/null
+++ b/configs/face_detection/README.md
@@ -0,0 +1,106 @@
+# 人脸检测模型
+
+## 简介
+`face_detection`中提供高效、高速的人脸检测解决方案,包括最先进的模型和经典模型。
+
+
+
+## 模型库
+
+#### WIDER-FACE数据集上的mAP
+
+| 网络结构 | 输入尺寸 | 图片个数/GPU | 学习率策略 | Easy/Medium/Hard Set | 预测时延(SD855)| 模型大小(MB) | 下载 | 配置文件 |
+|:------------:|:--------:|:----:|:-------:|:-------:|:---------:|:----------:|:---------:|:--------:|
+| BlazeFace | 640 | 8 | 1000e | 0.885 / 0.855 / 0.731 | - | 0.472 |[下载链接](https://paddledet.bj.bcebos.com/models/blazeface_1000e.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/face_detection/blazeface_1000e.yml) |
+
+**注意:**
+- 我们使用多尺度评估策略得到`Easy/Medium/Hard Set`里的mAP。具体细节请参考[在WIDER-FACE数据集上评估](#在WIDER-FACE数据集上评估)。
+
+## 快速开始
+
+### 数据准备
+我们使用[WIDER-FACE数据集](http://shuoyang1213.me/WIDERFACE/)进行训练和模型测试,官方网站提供了详细的数据介绍。
+- WIDER-Face数据源:
+使用如下目录结构加载`wider_face`类型的数据集:
+
+ ```
+ dataset/wider_face/
+ ├── wider_face_split
+ │ ├── wider_face_train_bbx_gt.txt
+ │ ├── wider_face_val_bbx_gt.txt
+ ├── WIDER_train
+ │ ├── images
+ │ │ ├── 0--Parade
+ │ │ │ ├── 0_Parade_marchingband_1_100.jpg
+ │ │ │ ├── 0_Parade_marchingband_1_381.jpg
+ │ │ │ │ ...
+ │ │ ├── 10--People_Marching
+ │ │ │ ...
+ ├── WIDER_val
+ │ ├── images
+ │ │ ├── 0--Parade
+ │ │ │ ├── 0_Parade_marchingband_1_1004.jpg
+ │ │ │ ├── 0_Parade_marchingband_1_1045.jpg
+ │ │ │ │ ...
+ │ │ ├── 10--People_Marching
+ │ │ │ ...
+ ```
+
+- 手动下载数据集:
+要下载WIDER-FACE数据集,请运行以下命令:
+```
+cd dataset/wider_face && ./download_wider_face.sh
+```
+
+### 训练与评估
+训练流程与评估流程方法与其他算法一致,请参考[GETTING_STARTED_cn.md](../../docs/tutorials/GETTING_STARTED_cn.md)。
+**注意:** 人脸检测模型目前不支持边训练边评估。
+
+#### 在WIDER-FACE数据集上评估
+- 步骤一:评估并生成结果文件:
+```shell
+python -u tools/eval.py -c configs/face_detection/blazeface_1000e.yml \
+ -o weights=output/blazeface_1000e/model_final \
+ multi_scale=True
+```
+设置`multi_scale=True`进行多尺度评估,评估完成后,将在`output/pred`中生成txt格式的测试结果。
+
+- 步骤二:下载官方评估脚本和Ground Truth文件:
+```
+wget http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/support/eval_script/eval_tools.zip
+unzip eval_tools.zip && rm -f eval_tools.zip
+```
+
+- 步骤三:开始评估
+
+方法一:python评估:
+```
+git clone https://github.com/wondervictor/WiderFace-Evaluation.git
+cd WiderFace-Evaluation
+# 编译
+python3 setup.py build_ext --inplace
+# 开始评估
+python3 evaluation.py -p /path/to/PaddleDetection/output/pred -g /path/to/eval_tools/ground_truth
+```
+
+方法二:MatLab评估:
+```
+# 在`eval_tools/wider_eval.m`中修改保存结果路径和绘制曲线的名称:
+pred_dir = './pred';
+legend_name = 'Paddle-BlazeFace';
+
+`wider_eval.m` 是评估模块的主要执行程序。运行命令如下:
+matlab -nodesktop -nosplash -nojvm -r "run wider_eval.m;quit;"
+```
+
+
+## Citations
+
+```
+@article{bazarevsky2019blazeface,
+ title={BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs},
+ author={Valentin Bazarevsky and Yury Kartynnik and Andrey Vakunov and Karthik Raveendran and Matthias Grundmann},
+ year={2019},
+ eprint={1907.05047},
+ archivePrefix={arXiv},
+```
diff --git a/configs/face_detection/_base_/blazeface.yml b/configs/face_detection/_base_/blazeface.yml
new file mode 100644
index 0000000..469aa9c
--- /dev/null
+++ b/configs/face_detection/_base_/blazeface.yml
@@ -0,0 +1,39 @@
+architecture: SSD
+
+SSD:
+ backbone: BlazeNet
+ ssd_head: FaceHead
+ post_process: BBoxPostProcess
+
+BlazeNet:
+ blaze_filters: [[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]]
+ double_blaze_filters: [[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
+ [96, 24, 96, 2], [96, 24, 96], [96, 24, 96]]
+
+FaceHead:
+ in_channels: [96, 96]
+ anchor_generator: AnchorGeneratorSSD
+ loss: SSDLoss
+
+SSDLoss:
+ overlap_threshold: 0.35
+
+AnchorGeneratorSSD:
+ steps: [8., 16.]
+ aspect_ratios: [[1.], [1.]]
+ min_sizes: [[16.,24.], [32., 48., 64., 80., 96., 128.]]
+ max_sizes: [[], []]
+ offset: 0.5
+ flip: False
+ min_max_aspect_ratios_order: false
+
+BBoxPostProcess:
+ decode:
+ name: SSDBox
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 750
+ score_threshold: 0.01
+ nms_threshold: 0.3
+ nms_top_k: 5000
+ nms_eta: 1.0
diff --git a/configs/face_detection/_base_/face_reader.yml b/configs/face_detection/_base_/face_reader.yml
new file mode 100644
index 0000000..7b31b49
--- /dev/null
+++ b/configs/face_detection/_base_/face_reader.yml
@@ -0,0 +1,45 @@
+worker_num: 2
+TrainReader:
+ inputs_def:
+ num_max_boxes: 90
+ sample_transforms:
+ - Decode: {}
+ - RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False}
+ - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+ - RandomFlip: {}
+ - CropWithDataAchorSampling: {
+ anchor_sampler: [[1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0]],
+ batch_sampler: [
+ [1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
+ [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
+ [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
+ [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
+ [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
+ ],
+ target_size: 640}
+ - Resize: {target_size: [640, 640], keep_ratio: False, interp: 1}
+ - NormalizeBox: {}
+ - PadBox: {num_max_boxes: 90}
+ batch_transforms:
+ - NormalizeImage: {mean: [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false}
+ - Permute: {}
+ batch_size: 8
+ shuffle: true
+ drop_last: true
+
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - NormalizeImage: {mean: [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false}
+ - Permute: {}
+ batch_size: 1
+ drop_empty: false
+
+
+TestReader:
+ sample_transforms:
+ - Decode: {}
+ - NormalizeImage: {mean: [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false}
+ - Permute: {}
+ batch_size: 1
diff --git a/configs/face_detection/_base_/optimizer_1000e.yml b/configs/face_detection/_base_/optimizer_1000e.yml
new file mode 100644
index 0000000..d67da4c
--- /dev/null
+++ b/configs/face_detection/_base_/optimizer_1000e.yml
@@ -0,0 +1,21 @@
+epoch: 1000
+
+LearningRate:
+ base_lr: 0.001
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones:
+ - 333
+ - 800
+ - !LinearWarmup
+ start_factor: 0.3333333333333333
+ steps: 500
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.0
+ type: RMSProp
+ regularizer:
+ factor: 0.0005
+ type: L2
diff --git a/configs/face_detection/blazeface_1000e.yml b/configs/face_detection/blazeface_1000e.yml
new file mode 100644
index 0000000..58fc908
--- /dev/null
+++ b/configs/face_detection/blazeface_1000e.yml
@@ -0,0 +1,9 @@
+_BASE_: [
+ '../datasets/wider_face.yml',
+ '../runtime.yml',
+ '_base_/optimizer_1000e.yml',
+ '_base_/blazeface.yml',
+ '_base_/face_reader.yml',
+]
+weights: output/blazeface_1000e/model_final
+multi_scale_eval: True
diff --git a/configs/faster_rcnn/README.md b/configs/faster_rcnn/README.md
new file mode 100644
index 0000000..a7e08ab
--- /dev/null
+++ b/configs/faster_rcnn/README.md
@@ -0,0 +1,35 @@
+# Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks
+
+## Model Zoo
+
+| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | 下载 | 配置文件 |
+| :------------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: |
+| ResNet50 | Faster | 1 | 1x | ---- | 36.7 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r50_1x_coco.yml) |
+| ResNet50-vd | Faster | 1 | 1x | ---- | 37.6 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r50_vd_1x_coco.yml) |
+| ResNet101 | Faster | 1 | 1x | ---- | 39.0 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r101_1x_coco.yml) |
+| ResNet34-FPN | Faster | 1 | 1x | ---- | 37.8 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r34_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r34_fpn_1x_coco.yml) |
+| ResNet34-vd-FPN | Faster | 1 | 1x | ---- | 38.5 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r34_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r34_vd_fpn_1x_coco.yml) |
+| ResNet50-FPN | Faster | 1 | 1x | ---- | 38.4 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.yml) |
+| ResNet50-FPN | Faster | 1 | 2x | ---- | 40.0 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r50_fpn_2x_coco.yml) |
+| ResNet50-vd-FPN | Faster | 1 | 1x | ---- | 39.5 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r50_vd_fpn_1x_coco.yml) |
+| ResNet50-vd-FPN | Faster | 1 | 2x | ---- | 40.8 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r50_vd_fpn_2x_coco.yml) |
+| ResNet101-FPN | Faster | 1 | 2x | ---- | 41.4 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r101_fpn_2x_coco.yml) |
+| ResNet101-vd-FPN | Faster | 1 | 1x | ---- | 42.0 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r101_vd_fpn_1x_coco.yml) |
+| ResNet101-vd-FPN | Faster | 1 | 2x | ---- | 43.0 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_vd_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r101_vd_fpn_2x_coco.yml) |
+| ResNeXt101-vd-FPN | Faster | 1 | 1x | ---- | 43.4 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_x101_vd_64x4d_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_x101_vd_64x4d_fpn_1x_coco.yml) |
+| ResNeXt101-vd-FPN | Faster | 1 | 2x | ---- | 44.0 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_x101_vd_64x4d_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_x101_vd_64x4d_fpn_2x_coco.yml) |
+| ResNet50-vd-SSLDv2-FPN | Faster | 1 | 1x | ---- | 41.4 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_ssld_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r50_vd_fpn_ssld_1x_coco.yml) |
+| ResNet50-vd-SSLDv2-FPN | Faster | 1 | 2x | ---- | 42.3 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_ssld_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r50_vd_ssld_fpn_2x_coco.yml) |
+
+
+## Citations
+```
+@article{Ren_2017,
+ title={Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks},
+ journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
+ publisher={Institute of Electrical and Electronics Engineers (IEEE)},
+ author={Ren, Shaoqing and He, Kaiming and Girshick, Ross and Sun, Jian},
+ year={2017},
+ month={Jun},
+}
+```
diff --git a/configs/faster_rcnn/_base_/faster_fpn_reader.yml b/configs/faster_rcnn/_base_/faster_fpn_reader.yml
new file mode 100644
index 0000000..cf54ecc
--- /dev/null
+++ b/configs/faster_rcnn/_base_/faster_fpn_reader.yml
@@ -0,0 +1,40 @@
+worker_num: 2
+TrainReader:
+ sample_transforms:
+ - Decode: {}
+ - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+ - RandomFlip: {prob: 0.5}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 32, pad_gt: true}
+ batch_size: 1
+ shuffle: true
+ drop_last: true
+
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 32, pad_gt: false}
+ batch_size: 1
+ shuffle: false
+ drop_last: false
+ drop_empty: false
+
+
+TestReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 32, pad_gt: false}
+ batch_size: 1
+ shuffle: false
+ drop_last: false
diff --git a/configs/faster_rcnn/_base_/faster_rcnn_r50.yml b/configs/faster_rcnn/_base_/faster_rcnn_r50.yml
new file mode 100644
index 0000000..fd29f5e
--- /dev/null
+++ b/configs/faster_rcnn/_base_/faster_rcnn_r50.yml
@@ -0,0 +1,66 @@
+architecture: FasterRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+FasterRCNN:
+ backbone: ResNet
+ rpn_head: RPNHead
+ bbox_head: BBoxHead
+ # post process
+ bbox_post_process: BBoxPostProcess
+
+
+ResNet:
+ # index 0 stands for res2
+ depth: 50
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [2]
+ num_stages: 3
+
+RPNHead:
+ anchor_generator:
+ aspect_ratios: [0.5, 1.0, 2.0]
+ anchor_sizes: [32, 64, 128, 256, 512]
+ strides: [16]
+ rpn_target_assign:
+ batch_size_per_im: 256
+ fg_fraction: 0.5
+ negative_overlap: 0.3
+ positive_overlap: 0.7
+ use_random: True
+ train_proposal:
+ min_size: 0.0
+ nms_thresh: 0.7
+ pre_nms_top_n: 12000
+ post_nms_top_n: 2000
+ topk_after_collect: False
+ test_proposal:
+ min_size: 0.0
+ nms_thresh: 0.7
+ pre_nms_top_n: 6000
+ post_nms_top_n: 1000
+
+
+BBoxHead:
+ head: Res5Head
+ roi_extractor:
+ resolution: 14
+ sampling_ratio: 0
+ aligned: True
+ bbox_assigner: BBoxAssigner
+ with_pool: true
+
+BBoxAssigner:
+ batch_size_per_im: 512
+ bg_thresh: 0.5
+ fg_thresh: 0.5
+ fg_fraction: 0.25
+ use_random: True
+
+BBoxPostProcess:
+ decode: RCNNBox
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 100
+ score_threshold: 0.05
+ nms_threshold: 0.5
diff --git a/configs/faster_rcnn/_base_/faster_rcnn_r50_fpn.yml b/configs/faster_rcnn/_base_/faster_rcnn_r50_fpn.yml
new file mode 100644
index 0000000..38ee81d
--- /dev/null
+++ b/configs/faster_rcnn/_base_/faster_rcnn_r50_fpn.yml
@@ -0,0 +1,73 @@
+architecture: FasterRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+FasterRCNN:
+ backbone: ResNet
+ neck: FPN
+ rpn_head: RPNHead
+ bbox_head: BBoxHead
+ # post process
+ bbox_post_process: BBoxPostProcess
+
+
+ResNet:
+ # index 0 stands for res2
+ depth: 50
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+
+FPN:
+ out_channel: 256
+
+RPNHead:
+ anchor_generator:
+ aspect_ratios: [0.5, 1.0, 2.0]
+ anchor_sizes: [[32], [64], [128], [256], [512]]
+ strides: [4, 8, 16, 32, 64]
+ rpn_target_assign:
+ batch_size_per_im: 256
+ fg_fraction: 0.5
+ negative_overlap: 0.3
+ positive_overlap: 0.7
+ use_random: True
+ train_proposal:
+ min_size: 0.0
+ nms_thresh: 0.7
+ pre_nms_top_n: 2000
+ post_nms_top_n: 1000
+ topk_after_collect: True
+ test_proposal:
+ min_size: 0.0
+ nms_thresh: 0.7
+ pre_nms_top_n: 1000
+ post_nms_top_n: 1000
+
+
+BBoxHead:
+ head: TwoFCHead
+ roi_extractor:
+ resolution: 7
+ sampling_ratio: 0
+ aligned: True
+ bbox_assigner: BBoxAssigner
+
+BBoxAssigner:
+ batch_size_per_im: 512
+ bg_thresh: 0.5
+ fg_thresh: 0.5
+ fg_fraction: 0.25
+ use_random: True
+
+TwoFCHead:
+ out_channel: 1024
+
+
+BBoxPostProcess:
+ decode: RCNNBox
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 100
+ score_threshold: 0.05
+ nms_threshold: 0.5
diff --git a/configs/faster_rcnn/_base_/faster_reader.yml b/configs/faster_rcnn/_base_/faster_reader.yml
new file mode 100644
index 0000000..ebb4e79
--- /dev/null
+++ b/configs/faster_rcnn/_base_/faster_reader.yml
@@ -0,0 +1,40 @@
+worker_num: 2
+TrainReader:
+ sample_transforms:
+ - Decode: {}
+ - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+ - RandomFlip: {prob: 0.5}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: -1, pad_gt: true}
+ batch_size: 1
+ shuffle: true
+ drop_last: true
+
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: -1, pad_gt: false}
+ batch_size: 1
+ shuffle: false
+ drop_last: false
+ drop_empty: false
+
+
+TestReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: -1}
+ batch_size: 1
+ shuffle: false
+ drop_last: false
diff --git a/configs/faster_rcnn/_base_/optimizer_1x.yml b/configs/faster_rcnn/_base_/optimizer_1x.yml
new file mode 100644
index 0000000..4caaa63
--- /dev/null
+++ b/configs/faster_rcnn/_base_/optimizer_1x.yml
@@ -0,0 +1,19 @@
+epoch: 12
+
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [8, 11]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0001
+ type: L2
diff --git a/configs/faster_rcnn/faster_rcnn_r101_1x_coco.yml b/configs/faster_rcnn/faster_rcnn_r101_1x_coco.yml
new file mode 100644
index 0000000..8876426
--- /dev/null
+++ b/configs/faster_rcnn/faster_rcnn_r101_1x_coco.yml
@@ -0,0 +1,14 @@
+_BASE_: [
+ 'faster_rcnn_r50_1x_coco.yml',
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
+weights: output/faster_rcnn_r101_1x_coco/model_final
+
+ResNet:
+ # index 0 stands for res2
+ depth: 101
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [2]
+ num_stages: 3
diff --git a/configs/faster_rcnn/faster_rcnn_r101_fpn_1x_coco.yml b/configs/faster_rcnn/faster_rcnn_r101_fpn_1x_coco.yml
new file mode 100644
index 0000000..a2e5ee5
--- /dev/null
+++ b/configs/faster_rcnn/faster_rcnn_r101_fpn_1x_coco.yml
@@ -0,0 +1,14 @@
+_BASE_: [
+ 'faster_rcnn_r50_fpn_1x_coco.yml',
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
+weights: output/faster_rcnn_r101_fpn_1x_coco/model_final
+
+ResNet:
+ # index 0 stands for res2
+ depth: 101
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
diff --git a/configs/faster_rcnn/faster_rcnn_r101_fpn_2x_coco.yml b/configs/faster_rcnn/faster_rcnn_r101_fpn_2x_coco.yml
new file mode 100644
index 0000000..0a07dec
--- /dev/null
+++ b/configs/faster_rcnn/faster_rcnn_r101_fpn_2x_coco.yml
@@ -0,0 +1,25 @@
+_BASE_: [
+ 'faster_rcnn_r50_fpn_1x_coco.yml',
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
+weights: output/faster_rcnn_r101_fpn_2x_coco/model_final
+
+ResNet:
+ # index 0 stands for res2
+ depth: 101
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+
+epoch: 24
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [16, 22]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
diff --git a/configs/faster_rcnn/faster_rcnn_r101_vd_fpn_1x_coco.yml b/configs/faster_rcnn/faster_rcnn_r101_vd_fpn_1x_coco.yml
new file mode 100644
index 0000000..32e308b
--- /dev/null
+++ b/configs/faster_rcnn/faster_rcnn_r101_vd_fpn_1x_coco.yml
@@ -0,0 +1,14 @@
+_BASE_: [
+ 'faster_rcnn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
+weights: output/faster_rcnn_r101_vd_fpn_1x_coco/model_final
+
+ResNet:
+ # index 0 stands for res2
+ depth: 101
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
diff --git a/configs/faster_rcnn/faster_rcnn_r101_vd_fpn_2x_coco.yml b/configs/faster_rcnn/faster_rcnn_r101_vd_fpn_2x_coco.yml
new file mode 100644
index 0000000..65b8226
--- /dev/null
+++ b/configs/faster_rcnn/faster_rcnn_r101_vd_fpn_2x_coco.yml
@@ -0,0 +1,25 @@
+_BASE_: [
+ 'faster_rcnn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
+weights: output/faster_rcnn_r101_vd_fpn_2x_coco/model_final
+
+ResNet:
+ # index 0 stands for res2
+ depth: 101
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+
+epoch: 24
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [16, 22]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
diff --git a/configs/faster_rcnn/faster_rcnn_r34_fpn_1x_coco.yml b/configs/faster_rcnn/faster_rcnn_r34_fpn_1x_coco.yml
new file mode 100644
index 0000000..f108352
--- /dev/null
+++ b/configs/faster_rcnn/faster_rcnn_r34_fpn_1x_coco.yml
@@ -0,0 +1,14 @@
+_BASE_: [
+ 'faster_rcnn_r50_fpn_1x_coco.yml',
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet34_pretrained.pdparams
+weights: output/faster_rcnn_r34_fpn_1x_coco/model_final
+
+ResNet:
+ # index 0 stands for res2
+ depth: 34
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
diff --git a/configs/faster_rcnn/faster_rcnn_r34_vd_fpn_1x_coco.yml b/configs/faster_rcnn/faster_rcnn_r34_vd_fpn_1x_coco.yml
new file mode 100644
index 0000000..5cf576b
--- /dev/null
+++ b/configs/faster_rcnn/faster_rcnn_r34_vd_fpn_1x_coco.yml
@@ -0,0 +1,15 @@
+_BASE_: [
+ 'faster_rcnn_r50_fpn_1x_coco.yml',
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet34_vd_pretrained.pdparams
+weights: output/faster_rcnn_r34_vd_fpn_1x_coco/model_final
+
+ResNet:
+ # index 0 stands for res2
+ depth: 34
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
diff --git a/configs/faster_rcnn/faster_rcnn_r50_1x_coco.yml b/configs/faster_rcnn/faster_rcnn_r50_1x_coco.yml
new file mode 100644
index 0000000..a49bde8
--- /dev/null
+++ b/configs/faster_rcnn/faster_rcnn_r50_1x_coco.yml
@@ -0,0 +1,8 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_1x.yml',
+ '_base_/faster_rcnn_r50.yml',
+ '_base_/faster_reader.yml',
+]
+weights: output/faster_rcnn_r50_1x_coco/model_final
diff --git a/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.yml b/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.yml
new file mode 100644
index 0000000..e7b4518
--- /dev/null
+++ b/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.yml
@@ -0,0 +1,8 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_1x.yml',
+ '_base_/faster_rcnn_r50_fpn.yml',
+ '_base_/faster_fpn_reader.yml',
+]
+weights: output/faster_rcnn_r50_fpn_1x_coco/model_final
diff --git a/configs/faster_rcnn/faster_rcnn_r50_fpn_2x_coco.yml b/configs/faster_rcnn/faster_rcnn_r50_fpn_2x_coco.yml
new file mode 100644
index 0000000..7edaadc
--- /dev/null
+++ b/configs/faster_rcnn/faster_rcnn_r50_fpn_2x_coco.yml
@@ -0,0 +1,15 @@
+_BASE_: [
+ 'faster_rcnn_r50_fpn_1x_coco.yml',
+]
+weights: output/faster_rcnn_r50_fpn_2x_coco/model_final
+
+epoch: 24
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [16, 22]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
diff --git a/configs/faster_rcnn/faster_rcnn_r50_vd_1x_coco.yml b/configs/faster_rcnn/faster_rcnn_r50_vd_1x_coco.yml
new file mode 100644
index 0000000..ac0e720
--- /dev/null
+++ b/configs/faster_rcnn/faster_rcnn_r50_vd_1x_coco.yml
@@ -0,0 +1,14 @@
+_BASE_: [
+ 'faster_rcnn_r50_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
+weights: output/faster_rcnn_r50_vd_1x_coco/model_final
+
+ResNet:
+ # index 0 stands for res2
+ depth: 50
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [2]
+ num_stages: 3
diff --git a/configs/faster_rcnn/faster_rcnn_r50_vd_fpn_1x_coco.yml b/configs/faster_rcnn/faster_rcnn_r50_vd_fpn_1x_coco.yml
new file mode 100644
index 0000000..6bf9d71
--- /dev/null
+++ b/configs/faster_rcnn/faster_rcnn_r50_vd_fpn_1x_coco.yml
@@ -0,0 +1,14 @@
+_BASE_: [
+ 'faster_rcnn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
+weights: output/faster_rcnn_r50_vd_fpn_1x_coco/model_final
+
+ResNet:
+ # index 0 stands for res2
+ depth: 50
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
diff --git a/configs/faster_rcnn/faster_rcnn_r50_vd_fpn_2x_coco.yml b/configs/faster_rcnn/faster_rcnn_r50_vd_fpn_2x_coco.yml
new file mode 100644
index 0000000..7fc3a88
--- /dev/null
+++ b/configs/faster_rcnn/faster_rcnn_r50_vd_fpn_2x_coco.yml
@@ -0,0 +1,25 @@
+_BASE_: [
+ 'faster_rcnn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
+weights: output/faster_rcnn_r50_vd_fpn_2x_coco/model_final
+
+ResNet:
+ # index 0 stands for res2
+ depth: 50
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+
+epoch: 24
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [16, 22]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
diff --git a/configs/faster_rcnn/faster_rcnn_r50_vd_fpn_ssld_1x_coco.yml b/configs/faster_rcnn/faster_rcnn_r50_vd_fpn_ssld_1x_coco.yml
new file mode 100644
index 0000000..d71b82d
--- /dev/null
+++ b/configs/faster_rcnn/faster_rcnn_r50_vd_fpn_ssld_1x_coco.yml
@@ -0,0 +1,29 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_1x.yml',
+ '_base_/faster_rcnn_r50_fpn.yml',
+ '_base_/faster_fpn_reader.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
+weights: output/faster_rcnn_r50_vd_fpn_ssld_1x_coco/model_final
+
+ResNet:
+ depth: 50
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+ lr_mult_list: [0.05, 0.05, 0.1, 0.15]
+
+epoch: 12
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [8, 11]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
diff --git a/configs/faster_rcnn/faster_rcnn_r50_vd_fpn_ssld_2x_coco.yml b/configs/faster_rcnn/faster_rcnn_r50_vd_fpn_ssld_2x_coco.yml
new file mode 100644
index 0000000..0562354
--- /dev/null
+++ b/configs/faster_rcnn/faster_rcnn_r50_vd_fpn_ssld_2x_coco.yml
@@ -0,0 +1,29 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_1x.yml',
+ '_base_/faster_rcnn_r50_fpn.yml',
+ '_base_/faster_fpn_reader.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
+weights: output/faster_rcnn_r50_vd_fpn_ssld_2x_coco/model_final
+
+ResNet:
+ depth: 50
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+ lr_mult_list: [0.05, 0.05, 0.1, 0.15]
+
+epoch: 24
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [12, 22]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
diff --git a/configs/faster_rcnn/faster_rcnn_x101_vd_64x4d_fpn_1x_coco.yml b/configs/faster_rcnn/faster_rcnn_x101_vd_64x4d_fpn_1x_coco.yml
new file mode 100644
index 0000000..317d374
--- /dev/null
+++ b/configs/faster_rcnn/faster_rcnn_x101_vd_64x4d_fpn_1x_coco.yml
@@ -0,0 +1,17 @@
+_BASE_: [
+ 'faster_rcnn_r50_fpn_1x_coco.yml',
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
+weights: output/faster_rcnn_x101_vd_64x4d_fpn_1x_coco/model_final
+
+ResNet:
+ # for ResNeXt: groups, base_width, base_channels
+ depth: 101
+ groups: 64
+ base_width: 4
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
diff --git a/configs/faster_rcnn/faster_rcnn_x101_vd_64x4d_fpn_2x_coco.yml b/configs/faster_rcnn/faster_rcnn_x101_vd_64x4d_fpn_2x_coco.yml
new file mode 100644
index 0000000..939878f
--- /dev/null
+++ b/configs/faster_rcnn/faster_rcnn_x101_vd_64x4d_fpn_2x_coco.yml
@@ -0,0 +1,28 @@
+_BASE_: [
+ 'faster_rcnn_r50_fpn_1x_coco.yml',
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
+weights: output/faster_rcnn_x101_vd_64x4d_fpn_2x_coco/model_final
+
+ResNet:
+ # for ResNeXt: groups, base_width, base_channels
+ depth: 101
+ groups: 64
+ base_width: 4
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+
+epoch: 24
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [16, 22]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
diff --git a/configs/fcos/README.md b/configs/fcos/README.md
new file mode 100644
index 0000000..27362f2
--- /dev/null
+++ b/configs/fcos/README.md
@@ -0,0 +1,31 @@
+# FCOS for Object Detection
+
+## Introduction
+
+FCOS (Fully Convolutional One-Stage Object Detection) is a fast anchor-free object detection framework with strong performance. We reproduced the model of the paper, and improved and optimized the accuracy of the FCOS.
+
+**Highlights:**
+
+- Training Time: The training time of the model of `fcos_r50_fpn_1x` on Tesla v100 with 8 GPU is only 8.5 hours.
+
+## Model Zoo
+
+| Backbone | Model | images/GPU | lr schedule |FPS | Box AP | download | config |
+| :-------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: |
+| ResNet50-FPN | FCOS | 2 | 1x | ---- | 39.6 | [download](https://paddledet.bj.bcebos.com/models/fcos_r50_fpn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/fcos/fcos_r50_fpn_1x_coco.yml) |
+| ResNet50-FPN | FCOS+DCN | 2 | 1x | ---- | 44.3 | [download](https://paddledet.bj.bcebos.com/models/fcos_dcn_r50_fpn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/fcos/fcos_dcn_r50_fpn_1x_coco.yml) |
+| ResNet50-FPN | FCOS+multiscale_train | 2 | 2x | ---- | 41.8 | [download](https://paddledet.bj.bcebos.com/models/fcos_r50_fpn_multiscale_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/fcos/fcos_r50_fpn_multiscale_2x_coco.yml) |
+
+**Notes:**
+
+- FCOS is trained on COCO train2017 dataset and evaluated on val2017 results of `mAP(IoU=0.5:0.95)`.
+
+## Citations
+```
+@inproceedings{tian2019fcos,
+ title = {{FCOS}: Fully Convolutional One-Stage Object Detection},
+ author = {Tian, Zhi and Shen, Chunhua and Chen, Hao and He, Tong},
+ booktitle = {Proc. Int. Conf. Computer Vision (ICCV)},
+ year = {2019}
+}
+```
diff --git a/configs/fcos/_base_/fcos_r50_fpn.yml b/configs/fcos/_base_/fcos_r50_fpn.yml
new file mode 100644
index 0000000..64a275d
--- /dev/null
+++ b/configs/fcos/_base_/fcos_r50_fpn.yml
@@ -0,0 +1,55 @@
+architecture: FCOS
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+FCOS:
+ backbone: ResNet
+ neck: FPN
+ fcos_head: FCOSHead
+ fcos_post_process: FCOSPostProcess
+
+ResNet:
+ # index 0 stands for res2
+ depth: 50
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [1,2,3]
+ num_stages: 4
+
+FPN:
+ out_channel: 256
+ spatial_scales: [0.125, 0.0625, 0.03125]
+ extra_stage: 2
+ has_extra_convs: true
+ use_c5: false
+
+FCOSHead:
+ fcos_feat:
+ name: FCOSFeat
+ feat_in: 256
+ feat_out: 256
+ num_convs: 4
+ norm_type: "gn"
+ use_dcn: false
+ num_classes: 80
+ fpn_stride: [8, 16, 32, 64, 128]
+ prior_prob: 0.01
+ fcos_loss: FCOSLoss
+ norm_reg_targets: true
+ centerness_on_reg: true
+
+FCOSLoss:
+ loss_alpha: 0.25
+ loss_gamma: 2.0
+ iou_loss_type: "giou"
+ reg_weights: 1.0
+
+FCOSPostProcess:
+ decode:
+ name: FCOSBox
+ num_classes: 80
+ nms:
+ name: MultiClassNMS
+ nms_top_k: 1000
+ keep_top_k: 100
+ score_threshold: 0.025
+ nms_threshold: 0.6
diff --git a/configs/fcos/_base_/fcos_reader.yml b/configs/fcos/_base_/fcos_reader.yml
new file mode 100644
index 0000000..4aa343a
--- /dev/null
+++ b/configs/fcos/_base_/fcos_reader.yml
@@ -0,0 +1,42 @@
+worker_num: 2
+TrainReader:
+ sample_transforms:
+ - Decode: {}
+ - RandomFlip: {prob: 0.5}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Resize: {target_size: [800, 1333], keep_ratio: true, interp: 1}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 128}
+ - Gt2FCOSTarget:
+ object_sizes_boundary: [64, 128, 256, 512]
+ center_sampling_radius: 1.5
+ downsample_ratios: [8, 16, 32, 64, 128]
+ norm_reg_targets: True
+ batch_size: 2
+ shuffle: true
+ drop_last: true
+
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 128}
+ batch_size: 1
+ shuffle: false
+
+
+TestReader:
+ sample_transforms:
+ - Decode: {}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 128}
+ batch_size: 1
+ shuffle: false
diff --git a/configs/fcos/_base_/optimizer_1x.yml b/configs/fcos/_base_/optimizer_1x.yml
new file mode 100644
index 0000000..d28b094
--- /dev/null
+++ b/configs/fcos/_base_/optimizer_1x.yml
@@ -0,0 +1,19 @@
+epoch: 12
+
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [8, 11]
+ - !LinearWarmup
+ start_factor: 0.3333333333333333
+ steps: 500
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0001
+ type: L2
diff --git a/configs/fcos/fcos_dcn_r50_fpn_1x_coco.yml b/configs/fcos/fcos_dcn_r50_fpn_1x_coco.yml
new file mode 100644
index 0000000..fe45428
--- /dev/null
+++ b/configs/fcos/fcos_dcn_r50_fpn_1x_coco.yml
@@ -0,0 +1,32 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/fcos_r50_fpn.yml',
+ '_base_/optimizer_1x.yml',
+ '_base_/fcos_reader.yml',
+]
+
+weights: output/fcos_dcn_r50_fpn_1x_coco/model_final
+
+ResNet:
+ depth: 50
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [1,2,3]
+ num_stages: 4
+ dcn_v2_stages: [1,2,3]
+
+FCOSHead:
+ fcos_feat:
+ name: FCOSFeat
+ feat_in: 256
+ feat_out: 256
+ num_convs: 4
+ norm_type: "gn"
+ use_dcn: true
+ num_classes: 80
+ fpn_stride: [8, 16, 32, 64, 128]
+ prior_prob: 0.01
+ fcos_loss: FCOSLoss
+ norm_reg_targets: true
+ centerness_on_reg: true
diff --git a/configs/fcos/fcos_r50_fpn_1x_coco.yml b/configs/fcos/fcos_r50_fpn_1x_coco.yml
new file mode 100644
index 0000000..0b47d45
--- /dev/null
+++ b/configs/fcos/fcos_r50_fpn_1x_coco.yml
@@ -0,0 +1,9 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/fcos_r50_fpn.yml',
+ '_base_/optimizer_1x.yml',
+ '_base_/fcos_reader.yml',
+]
+
+weights: output/fcos_r50_fpn_1x_coco/model_final
diff --git a/configs/fcos/fcos_r50_fpn_multiscale_2x_coco.yml b/configs/fcos/fcos_r50_fpn_multiscale_2x_coco.yml
new file mode 100644
index 0000000..291f8d8
--- /dev/null
+++ b/configs/fcos/fcos_r50_fpn_multiscale_2x_coco.yml
@@ -0,0 +1,39 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/fcos_r50_fpn.yml',
+ '_base_/optimizer_1x.yml',
+ '_base_/fcos_reader.yml',
+]
+
+weights: output/fcos_r50_fpn_multiscale_2x_coco/model_final
+
+TrainReader:
+ sample_transforms:
+ - Decode: {}
+ - RandomFlip: {prob: 0.5}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], keep_ratio: true, interp: 1}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 128}
+ - Gt2FCOSTarget:
+ object_sizes_boundary: [64, 128, 256, 512]
+ center_sampling_radius: 1.5
+ downsample_ratios: [8, 16, 32, 64, 128]
+ norm_reg_targets: True
+ batch_size: 2
+ shuffle: true
+ drop_last: true
+
+epoch: 24
+
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [16, 22]
+ - !LinearWarmup
+ start_factor: 0.3333333333333333
+ steps: 500
diff --git a/configs/gn/README.md b/configs/gn/README.md
new file mode 100644
index 0000000..cc398af
--- /dev/null
+++ b/configs/gn/README.md
@@ -0,0 +1,23 @@
+# Group Normalization
+
+## Model Zoo
+
+| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps)| Box AP | Mask AP | 下载 | 配置文件 |
+| :------------- | :------------- | :-----------: | :------: | :--------: |:-----: | :-----: | :----: | :----: |
+| ResNet50-FPN | Faster | 1 | 2x | - | 41.9 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_fpn_gn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/gn/faster_rcnn_r50_fpn_gn_2x_coco.yml) |
+| ResNet50-FPN | Mask | 1 | 2x | - | 42.3 | 38.4 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_fpn_gn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/gn/mask_rcnn_r50_fpn_gn_2x_coco.yml) |
+| ResNet50-FPN | Cascade Faster | 1 | 2x | - | 44.6 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_r50_fpn_gn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/gn/cascade_rcnn_r50_fpn_gn_2x_coco.yml) |
+| ResNet50-FPN | Cacade Mask | 1 | 2x | - | 45.0 | 39.3 | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_fpn_gn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/gn/cascade_mask_rcnn_r50_fpn_gn_2x_coco.yml) |
+
+
+**注意:** Faster R-CNN baseline仅使用 `2fc` head,而此处使用[`4conv1fc` head](https://arxiv.org/abs/1803.08494)(4层conv之间使用GN),并且FPN也使用GN,而对于Mask R-CNN是在mask head的4层conv之间也使用GN。
+
+## Citations
+```
+@inproceedings{wu2018group,
+ title={Group Normalization},
+ author={Wu, Yuxin and He, Kaiming},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2018}
+}
+```
diff --git a/configs/gn/cascade_mask_rcnn_r50_fpn_gn_2x_coco.yml b/configs/gn/cascade_mask_rcnn_r50_fpn_gn_2x_coco.yml
new file mode 100644
index 0000000..e2c750d
--- /dev/null
+++ b/configs/gn/cascade_mask_rcnn_r50_fpn_gn_2x_coco.yml
@@ -0,0 +1,61 @@
+_BASE_: [
+ '../datasets/coco_instance.yml',
+ '../runtime.yml',
+ '../cascade_rcnn/_base_/optimizer_1x.yml',
+ '../cascade_rcnn/_base_/cascade_mask_rcnn_r50_fpn.yml',
+ '../cascade_rcnn/_base_/cascade_mask_fpn_reader.yml',
+]
+weights: output/cascade_mask_rcnn_r50_fpn_gn_2x_coco/model_final
+
+CascadeRCNN:
+ backbone: ResNet
+ neck: FPN
+ rpn_head: RPNHead
+ bbox_head: CascadeHead
+ mask_head: MaskHead
+ # post process
+ bbox_post_process: BBoxPostProcess
+ mask_post_process: MaskPostProcess
+
+FPN:
+ out_channel: 256
+ norm_type: gn
+
+CascadeHead:
+ head: CascadeXConvNormHead
+ roi_extractor:
+ resolution: 7
+ sampling_ratio: 0
+ aligned: True
+ bbox_assigner: BBoxAssigner
+
+CascadeXConvNormHead:
+ num_convs: 4
+ out_channel: 1024
+ norm_type: gn
+
+MaskHead:
+ head: MaskFeat
+ roi_extractor:
+ resolution: 14
+ sampling_ratio: 0
+ aligned: True
+ mask_assigner: MaskAssigner
+ share_bbox_feat: False
+
+MaskFeat:
+ num_convs: 4
+ out_channel: 256
+ norm_type: gn
+
+
+epoch: 24
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [16, 22]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
diff --git a/configs/gn/cascade_rcnn_r50_fpn_gn_2x_coco.yml b/configs/gn/cascade_rcnn_r50_fpn_gn_2x_coco.yml
new file mode 100644
index 0000000..2706790
--- /dev/null
+++ b/configs/gn/cascade_rcnn_r50_fpn_gn_2x_coco.yml
@@ -0,0 +1,37 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '../cascade_rcnn/_base_/optimizer_1x.yml',
+ '../cascade_rcnn/_base_/cascade_rcnn_r50_fpn.yml',
+ '../cascade_rcnn/_base_/cascade_fpn_reader.yml',
+]
+weights: output/cascade_rcnn_r50_fpn_gn_2x_coco/model_final
+
+FPN:
+ out_channel: 256
+ norm_type: gn
+
+CascadeHead:
+ head: CascadeXConvNormHead
+ roi_extractor:
+ resolution: 7
+ sampling_ratio: 0
+ aligned: True
+ bbox_assigner: BBoxAssigner
+
+CascadeXConvNormHead:
+ num_convs: 4
+ out_channel: 1024
+ norm_type: gn
+
+
+epoch: 24
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [16, 22]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
diff --git a/configs/gn/faster_rcnn_r50_fpn_gn_2x_coco.yml b/configs/gn/faster_rcnn_r50_fpn_gn_2x_coco.yml
new file mode 100644
index 0000000..200a98b
--- /dev/null
+++ b/configs/gn/faster_rcnn_r50_fpn_gn_2x_coco.yml
@@ -0,0 +1,45 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '../faster_rcnn/_base_/optimizer_1x.yml',
+ '../faster_rcnn/_base_/faster_rcnn_r50_fpn.yml',
+ '../faster_rcnn/_base_/faster_fpn_reader.yml',
+]
+weights: output/faster_rcnn_r50_fpn_gn_2x_coco/model_final
+
+FasterRCNN:
+ backbone: ResNet
+ neck: FPN
+ rpn_head: RPNHead
+ bbox_head: BBoxHead
+ # post process
+ bbox_post_process: BBoxPostProcess
+
+FPN:
+ out_channel: 256
+ norm_type: gn
+
+BBoxHead:
+ head: XConvNormHead
+ roi_extractor:
+ resolution: 7
+ sampling_ratio: 0
+ aligned: True
+ bbox_assigner: BBoxAssigner
+
+XConvNormHead:
+ num_convs: 4
+ out_channel: 1024
+ norm_type: gn
+
+
+epoch: 24
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [16, 22]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
diff --git a/configs/gn/mask_rcnn_r50_fpn_gn_2x_coco.yml b/configs/gn/mask_rcnn_r50_fpn_gn_2x_coco.yml
new file mode 100644
index 0000000..70beaf5
--- /dev/null
+++ b/configs/gn/mask_rcnn_r50_fpn_gn_2x_coco.yml
@@ -0,0 +1,61 @@
+_BASE_: [
+ '../datasets/coco_instance.yml',
+ '../runtime.yml',
+ '../mask_rcnn/_base_/optimizer_1x.yml',
+ '../mask_rcnn/_base_/mask_rcnn_r50_fpn.yml',
+ '../mask_rcnn/_base_/mask_fpn_reader.yml',
+]
+weights: output/mask_rcnn_r50_fpn_gn_2x_coco/model_final
+
+MaskRCNN:
+ backbone: ResNet
+ neck: FPN
+ rpn_head: RPNHead
+ bbox_head: BBoxHead
+ mask_head: MaskHead
+ # post process
+ bbox_post_process: BBoxPostProcess
+ mask_post_process: MaskPostProcess
+
+FPN:
+ out_channel: 256
+ norm_type: gn
+
+BBoxHead:
+ head: XConvNormHead
+ roi_extractor:
+ resolution: 7
+ sampling_ratio: 0
+ aligned: True
+ bbox_assigner: BBoxAssigner
+
+XConvNormHead:
+ num_convs: 4
+ out_channel: 1024
+ norm_type: gn
+
+MaskHead:
+ head: MaskFeat
+ roi_extractor:
+ resolution: 14
+ sampling_ratio: 0
+ aligned: True
+ mask_assigner: MaskAssigner
+ share_bbox_feat: False
+
+MaskFeat:
+ num_convs: 4
+ out_channel: 256
+ norm_type: gn
+
+
+epoch: 24
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [16, 22]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
diff --git a/configs/hrnet/README.md b/configs/hrnet/README.md
new file mode 100644
index 0000000..9f581ab
--- /dev/null
+++ b/configs/hrnet/README.md
@@ -0,0 +1,34 @@
+# High-resolution networks (HRNets) for object detection
+
+## Introduction
+
+- Deep High-Resolution Representation Learning for Human Pose Estimation: [https://arxiv.org/abs/1902.09212](https://arxiv.org/abs/1902.09212)
+
+```
+@inproceedings{SunXLW19,
+ title={Deep High-Resolution Representation Learning for Human Pose Estimation},
+ author={Ke Sun and Bin Xiao and Dong Liu and Jingdong Wang},
+ booktitle={CVPR},
+ year={2019}
+}
+```
+
+- High-Resolution Representations for Labeling Pixels and Regions: [https://arxiv.org/abs/1904.04514](https://arxiv.org/abs/1904.04514)
+
+```
+@article{SunZJCXLMWLW19,
+ title={High-Resolution Representations for Labeling Pixels and Regions},
+ author={Ke Sun and Yang Zhao and Borui Jiang and Tianheng Cheng and Bin Xiao
+ and Dong Liu and Yadong Mu and Xinggang Wang and Wenyu Liu and Jingdong Wang},
+ journal = {CoRR},
+ volume = {abs/1904.04514},
+ year={2019}
+}
+```
+
+## Model Zoo
+
+| Backbone | Type | Image/gpu | Lr schd | Inf time (fps) | Box AP | Mask AP | Download | Configs |
+| :---------------------- | :------------- | :-------: | :-----: | :------------: | :----: | :-----: | :----------------------------------------------------------: | :-----: |
+| HRNetV2p_W18 | Faster | 1 | 1x | - | 36.8 | - | [model](https://paddledet.bj.bcebos.com/models/faster_rcnn_hrnetv2p_w18_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.yml) |
+| HRNetV2p_W18 | Faster | 1 | 2x | - | 39.0 | - | [model](https://paddledet.bj.bcebos.com/models/faster_rcnn_hrnetv2p_w18_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco.yml) |
diff --git a/configs/hrnet/_base_/faster_rcnn_hrnetv2p_w18.yml b/configs/hrnet/_base_/faster_rcnn_hrnetv2p_w18.yml
new file mode 100644
index 0000000..6c556f3
--- /dev/null
+++ b/configs/hrnet/_base_/faster_rcnn_hrnetv2p_w18.yml
@@ -0,0 +1,68 @@
+architecture: FasterRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/HRNet_W18_C_pretrained.pdparams
+
+FasterRCNN:
+ backbone: HRNet
+ neck: HRFPN
+ rpn_head: RPNHead
+ bbox_head: BBoxHead
+ # post process
+ bbox_post_process: BBoxPostProcess
+
+HRNet:
+ width: 18
+ freeze_at: 0
+ return_idx: [0, 1, 2, 3]
+
+HRFPN:
+ out_channel: 256
+ share_conv: false
+
+RPNHead:
+ anchor_generator:
+ aspect_ratios: [0.5, 1.0, 2.0]
+ anchor_sizes: [[32], [64], [128], [256], [512]]
+ strides: [4, 8, 16, 32, 64]
+ rpn_target_assign:
+ batch_size_per_im: 256
+ fg_fraction: 0.5
+ negative_overlap: 0.3
+ positive_overlap: 0.7
+ use_random: True
+ train_proposal:
+ min_size: 0.0
+ nms_thresh: 0.7
+ pre_nms_top_n: 2000
+ post_nms_top_n: 2000
+ topk_after_collect: True
+ test_proposal:
+ min_size: 0.0
+ nms_thresh: 0.7
+ pre_nms_top_n: 1000
+ post_nms_top_n: 1000
+
+BBoxHead:
+ head: TwoFCHead
+ roi_extractor:
+ resolution: 7
+ sampling_ratio: 0
+ aligned: True
+ bbox_assigner: BBoxAssigner
+
+BBoxAssigner:
+ batch_size_per_im: 512
+ bg_thresh: 0.5
+ fg_thresh: 0.5
+ fg_fraction: 0.25
+ use_random: True
+
+TwoFCHead:
+ out_channel: 1024
+
+BBoxPostProcess:
+ decode: RCNNBox
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 100
+ score_threshold: 0.05
+ nms_threshold: 0.5
diff --git a/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.yml b/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.yml
new file mode 100644
index 0000000..6ff0596
--- /dev/null
+++ b/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.yml
@@ -0,0 +1,23 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ './_base_/faster_rcnn_hrnetv2p_w18.yml',
+ '../faster_rcnn/_base_/optimizer_1x.yml',
+ '../faster_rcnn/_base_/faster_fpn_reader.yml',
+ '../runtime.yml',
+]
+
+weights: output/faster_rcnn_hrnetv2p_w18_1x_coco/model_final
+epoch: 12
+
+LearningRate:
+ base_lr: 0.02
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [8, 11]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
+
+TrainReader:
+ batch_size: 2
diff --git a/configs/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco.yml b/configs/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco.yml
new file mode 100644
index 0000000..73d9dc8
--- /dev/null
+++ b/configs/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco.yml
@@ -0,0 +1,23 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ './_base_/faster_rcnn_hrnetv2p_w18.yml',
+ '../faster_rcnn/_base_/optimizer_1x.yml',
+ '../faster_rcnn/_base_/faster_fpn_reader.yml',
+ '../runtime.yml',
+]
+
+weights: output/faster_rcnn_hrnetv2p_w18_2x_coco/model_final
+epoch: 24
+
+LearningRate:
+ base_lr: 0.02
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [16, 22]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
+
+TrainReader:
+ batch_size: 2
diff --git a/configs/mask_rcnn/README.md b/configs/mask_rcnn/README.md
new file mode 100644
index 0000000..89f7f8e
--- /dev/null
+++ b/configs/mask_rcnn/README.md
@@ -0,0 +1,31 @@
+# Mask R-CNN
+
+## Model Zoo
+
+| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | Mask AP | 下载 | 配置文件 |
+| :------------------- | :------------| :-----: | :-----: | :------------: | :-----: | :-----: | :-----------------------------------------------------: | :-----: |
+| ResNet50 | Mask | 1 | 1x | ---- | 37.4 | 32.8 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/mask_rcnn/mask_rcnn_r50_1x_coco.yml) |
+| ResNet50 | Mask | 1 | 2x | ---- | 39.7 | 34.5 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/mask_rcnn/mask_rcnn_r50_2x_coco.yml) |
+| ResNet50-FPN | Mask | 1 | 1x | ---- | 39.2 | 35.6 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.yml) |
+| ResNet50-FPN | Mask | 1 | 2x | ---- | 40.5 | 36.7 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/mask_rcnn/mask_rcnn_r50_fpn_2x_coco.yml) |
+| ResNet50-vd-FPN | Mask | 1 | 1x | ---- | 40.3 | 36.4 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/mask_rcnn/mask_rcnn_r50_vd_fpn_1x_coco.yml) |
+| ResNet50-vd-FPN | Mask | 1 | 2x | ---- | 41.4 | 37.5 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_vd_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/mask_rcnn/mask_rcnn_r50_vd_fpn_2x_coco.yml) |
+| ResNet101-FPN | Mask | 1 | 1x | ---- | 40.6 | 36.6 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_r101_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.yml) |
+| ResNet101-vd-FPN | Mask | 1 | 1x | ---- | 42.4 | 38.1 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_r101_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/mask_rcnn/mask_rcnn_r101_vd_fpn_1x_coco.yml) |
+| ResNeXt101-vd-FPN | Mask | 1 | 1x | ---- | 44.0 | 39.5 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_x101_vd_64x4d_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/mask_rcnn/mask_rcnn_x101_vd_64x4d_fpn_1x_coco.yml) |
+| ResNeXt101-vd-FPN | Mask | 1 | 2x | ---- | 44.6 | 39.8 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_x101_vd_64x4d_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/mask_rcnn/mask_rcnn_x101_vd_64x4d_fpn_2x_coco.yml) |
+| ResNet50-vd-SSLDv2-FPN | Mask | 1 | 1x | ---- | 42.0 | 38.2 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_vd_fpn_ssld_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/mask_rcnn/mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml) |
+| ResNet50-vd-SSLDv2-FPN | Mask | 1 | 2x | ---- | 42.7 | 38.9 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_vd_fpn_ssld_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/mask_rcnn/mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml) |
+
+
+## Citations
+```
+@article{He_2017,
+ title={Mask R-CNN},
+ journal={2017 IEEE International Conference on Computer Vision (ICCV)},
+ publisher={IEEE},
+ author={He, Kaiming and Gkioxari, Georgia and Dollar, Piotr and Girshick, Ross},
+ year={2017},
+ month={Oct}
+}
+```
diff --git a/configs/mask_rcnn/_base_/mask_fpn_reader.yml b/configs/mask_rcnn/_base_/mask_fpn_reader.yml
new file mode 100644
index 0000000..d2cb8ec
--- /dev/null
+++ b/configs/mask_rcnn/_base_/mask_fpn_reader.yml
@@ -0,0 +1,39 @@
+worker_num: 2
+TrainReader:
+ sample_transforms:
+ - Decode: {}
+ - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+ - RandomFlip: {prob: 0.5}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 32, pad_gt: true}
+ batch_size: 1
+ shuffle: true
+ drop_last: true
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 32, pad_gt: false}
+ batch_size: 1
+ shuffle: false
+ drop_last: false
+ drop_empty: false
+
+
+TestReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 32, pad_gt: false}
+ batch_size: 1
+ shuffle: false
+ drop_last: false
diff --git a/configs/mask_rcnn/_base_/mask_rcnn_r50.yml b/configs/mask_rcnn/_base_/mask_rcnn_r50.yml
new file mode 100644
index 0000000..04dab63
--- /dev/null
+++ b/configs/mask_rcnn/_base_/mask_rcnn_r50.yml
@@ -0,0 +1,87 @@
+architecture: MaskRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+MaskRCNN:
+ backbone: ResNet
+ rpn_head: RPNHead
+ bbox_head: BBoxHead
+ mask_head: MaskHead
+ # post process
+ bbox_post_process: BBoxPostProcess
+ mask_post_process: MaskPostProcess
+
+ResNet:
+ # index 0 stands for res2
+ depth: 50
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [2]
+ num_stages: 3
+
+RPNHead:
+ anchor_generator:
+ aspect_ratios: [0.5, 1.0, 2.0]
+ anchor_sizes: [32, 64, 128, 256, 512]
+ strides: [16]
+ rpn_target_assign:
+ batch_size_per_im: 256
+ fg_fraction: 0.5
+ negative_overlap: 0.3
+ positive_overlap: 0.7
+ use_random: True
+ train_proposal:
+ min_size: 0.0
+ nms_thresh: 0.7
+ pre_nms_top_n: 12000
+ post_nms_top_n: 2000
+ topk_after_collect: False
+ test_proposal:
+ min_size: 0.0
+ nms_thresh: 0.7
+ pre_nms_top_n: 6000
+ post_nms_top_n: 1000
+
+
+BBoxHead:
+ head: Res5Head
+ roi_extractor:
+ resolution: 14
+ sampling_ratio: 0
+ aligned: True
+ bbox_assigner: BBoxAssigner
+ with_pool: true
+
+BBoxAssigner:
+ batch_size_per_im: 512
+ bg_thresh: 0.5
+ fg_thresh: 0.5
+ fg_fraction: 0.25
+ use_random: True
+
+
+BBoxPostProcess:
+ decode: RCNNBox
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 100
+ score_threshold: 0.05
+ nms_threshold: 0.5
+
+MaskHead:
+ head: MaskFeat
+ roi_extractor:
+ resolution: 14
+ sampling_ratio: 0
+ aligned: True
+ mask_assigner: MaskAssigner
+ share_bbox_feat: true
+
+MaskFeat:
+ num_convs: 0
+ out_channel: 256
+
+MaskAssigner:
+ mask_resolution: 14
+
+MaskPostProcess:
+ binary_thresh: 0.5
diff --git a/configs/mask_rcnn/_base_/mask_rcnn_r50_fpn.yml b/configs/mask_rcnn/_base_/mask_rcnn_r50_fpn.yml
new file mode 100644
index 0000000..dd75876
--- /dev/null
+++ b/configs/mask_rcnn/_base_/mask_rcnn_r50_fpn.yml
@@ -0,0 +1,91 @@
+architecture: MaskRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+MaskRCNN:
+ backbone: ResNet
+ neck: FPN
+ rpn_head: RPNHead
+ bbox_head: BBoxHead
+ mask_head: MaskHead
+ # post process
+ bbox_post_process: BBoxPostProcess
+ mask_post_process: MaskPostProcess
+
+ResNet:
+ # index 0 stands for res2
+ depth: 50
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+
+FPN:
+ out_channel: 256
+
+RPNHead:
+ anchor_generator:
+ aspect_ratios: [0.5, 1.0, 2.0]
+ anchor_sizes: [[32], [64], [128], [256], [512]]
+ strides: [4, 8, 16, 32, 64]
+ rpn_target_assign:
+ batch_size_per_im: 256
+ fg_fraction: 0.5
+ negative_overlap: 0.3
+ positive_overlap: 0.7
+ use_random: True
+ train_proposal:
+ min_size: 0.0
+ nms_thresh: 0.7
+ pre_nms_top_n: 2000
+ post_nms_top_n: 1000
+ topk_after_collect: True
+ test_proposal:
+ min_size: 0.0
+ nms_thresh: 0.7
+ pre_nms_top_n: 1000
+ post_nms_top_n: 1000
+
+BBoxHead:
+ head: TwoFCHead
+ roi_extractor:
+ resolution: 7
+ sampling_ratio: 0
+ aligned: True
+ bbox_assigner: BBoxAssigner
+
+BBoxAssigner:
+ batch_size_per_im: 512
+ bg_thresh: 0.5
+ fg_thresh: 0.5
+ fg_fraction: 0.25
+ use_random: True
+
+TwoFCHead:
+ out_channel: 1024
+
+BBoxPostProcess:
+ decode: RCNNBox
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 100
+ score_threshold: 0.05
+ nms_threshold: 0.5
+
+MaskHead:
+ head: MaskFeat
+ roi_extractor:
+ resolution: 14
+ sampling_ratio: 0
+ aligned: True
+ mask_assigner: MaskAssigner
+ share_bbox_feat: False
+
+MaskFeat:
+ num_convs: 4
+ out_channel: 256
+
+MaskAssigner:
+ mask_resolution: 28
+
+MaskPostProcess:
+ binary_thresh: 0.5
diff --git a/configs/mask_rcnn/_base_/mask_reader.yml b/configs/mask_rcnn/_base_/mask_reader.yml
new file mode 100644
index 0000000..b43d312
--- /dev/null
+++ b/configs/mask_rcnn/_base_/mask_reader.yml
@@ -0,0 +1,41 @@
+worker_num: 2
+TrainReader:
+ sample_transforms:
+ - Decode: {}
+ - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+ - RandomFlip: {prob: 0.5}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: -1, pad_gt: true}
+ batch_size: 1
+ shuffle: true
+ drop_last: true
+
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: -1}
+ batch_size: 1
+ shuffle: false
+ drop_last: false
+ drop_empty: false
+
+
+TestReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: -1}
+ batch_size: 1
+ shuffle: false
+ drop_last: false
+ drop_empty: false
diff --git a/configs/mask_rcnn/_base_/optimizer_1x.yml b/configs/mask_rcnn/_base_/optimizer_1x.yml
new file mode 100644
index 0000000..63f898e
--- /dev/null
+++ b/configs/mask_rcnn/_base_/optimizer_1x.yml
@@ -0,0 +1,19 @@
+epoch: 12
+
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [8, 11]
+ - !LinearWarmup
+ start_factor: 0.001
+ steps: 1000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0001
+ type: L2
diff --git a/configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.yml b/configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.yml
new file mode 100644
index 0000000..aae703c
--- /dev/null
+++ b/configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.yml
@@ -0,0 +1,13 @@
+_BASE_: [
+ 'mask_rcnn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
+weights: output/mask_rcnn_r101_fpn_1x_coco/model_final
+
+ResNet:
+ # index 0 stands for res2
+ depth: 101
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
diff --git a/configs/mask_rcnn/mask_rcnn_r101_vd_fpn_1x_coco.yml b/configs/mask_rcnn/mask_rcnn_r101_vd_fpn_1x_coco.yml
new file mode 100644
index 0000000..58d7a78
--- /dev/null
+++ b/configs/mask_rcnn/mask_rcnn_r101_vd_fpn_1x_coco.yml
@@ -0,0 +1,14 @@
+_BASE_: [
+ 'mask_rcnn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
+weights: output/mask_rcnn_r101_vd_fpn_1x_coco/model_final
+
+ResNet:
+ # index 0 stands for res2
+ depth: 101
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
diff --git a/configs/mask_rcnn/mask_rcnn_r50_1x_coco.yml b/configs/mask_rcnn/mask_rcnn_r50_1x_coco.yml
new file mode 100644
index 0000000..01f4721
--- /dev/null
+++ b/configs/mask_rcnn/mask_rcnn_r50_1x_coco.yml
@@ -0,0 +1,8 @@
+_BASE_: [
+ '../datasets/coco_instance.yml',
+ '../runtime.yml',
+ '_base_/optimizer_1x.yml',
+ '_base_/mask_rcnn_r50.yml',
+ '_base_/mask_reader.yml',
+]
+weights: output/mask_rcnn_r50_1x_coco/model_final
diff --git a/configs/mask_rcnn/mask_rcnn_r50_2x_coco.yml b/configs/mask_rcnn/mask_rcnn_r50_2x_coco.yml
new file mode 100644
index 0000000..f1e6b66
--- /dev/null
+++ b/configs/mask_rcnn/mask_rcnn_r50_2x_coco.yml
@@ -0,0 +1,15 @@
+_BASE_: [
+ 'mask_rcnn_r50_1x_coco.yml',
+]
+weights: output/mask_rcnn_r50_2x_coco/model_final
+
+epoch: 24
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [16, 22]
+ - !LinearWarmup
+ start_factor: 0.3333333333333333
+ steps: 500
diff --git a/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.yml b/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.yml
new file mode 100644
index 0000000..95e48c2
--- /dev/null
+++ b/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.yml
@@ -0,0 +1,8 @@
+_BASE_: [
+ '../datasets/coco_instance.yml',
+ '../runtime.yml',
+ '_base_/optimizer_1x.yml',
+ '_base_/mask_rcnn_r50_fpn.yml',
+ '_base_/mask_fpn_reader.yml',
+]
+weights: output/mask_rcnn_r50_fpn_1x_coco/model_final
diff --git a/configs/mask_rcnn/mask_rcnn_r50_fpn_2x_coco.yml b/configs/mask_rcnn/mask_rcnn_r50_fpn_2x_coco.yml
new file mode 100644
index 0000000..f687fd6
--- /dev/null
+++ b/configs/mask_rcnn/mask_rcnn_r50_fpn_2x_coco.yml
@@ -0,0 +1,15 @@
+_BASE_: [
+ 'mask_rcnn_r50_fpn_1x_coco.yml',
+]
+weights: output/mask_rcnn_r50_fpn_2x_coco/model_final
+
+epoch: 24
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [16, 22]
+ - !LinearWarmup
+ start_factor: 0.3333333333333333
+ steps: 500
diff --git a/configs/mask_rcnn/mask_rcnn_r50_vd_fpn_1x_coco.yml b/configs/mask_rcnn/mask_rcnn_r50_vd_fpn_1x_coco.yml
new file mode 100644
index 0000000..d538741
--- /dev/null
+++ b/configs/mask_rcnn/mask_rcnn_r50_vd_fpn_1x_coco.yml
@@ -0,0 +1,15 @@
+_BASE_: [
+ 'mask_rcnn_r50_fpn_1x_coco.yml',
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
+weights: output/mask_rcnn_r50_vd_fpn_1x_coco/model_final
+
+ResNet:
+ # index 0 stands for res2
+ depth: 50
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
diff --git a/configs/mask_rcnn/mask_rcnn_r50_vd_fpn_2x_coco.yml b/configs/mask_rcnn/mask_rcnn_r50_vd_fpn_2x_coco.yml
new file mode 100644
index 0000000..f85f029
--- /dev/null
+++ b/configs/mask_rcnn/mask_rcnn_r50_vd_fpn_2x_coco.yml
@@ -0,0 +1,26 @@
+_BASE_: [
+ 'mask_rcnn_r50_fpn_1x_coco.yml',
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
+weights: output/mask_rcnn_r50_vd_fpn_2x_coco/model_final
+
+ResNet:
+ # index 0 stands for res2
+ depth: 50
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+
+epoch: 24
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [16, 22]
+ - !LinearWarmup
+ start_factor: 0.3333333333333333
+ steps: 500
diff --git a/configs/mask_rcnn/mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml b/configs/mask_rcnn/mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml
new file mode 100644
index 0000000..c5718a8
--- /dev/null
+++ b/configs/mask_rcnn/mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml
@@ -0,0 +1,29 @@
+_BASE_: [
+ '../datasets/coco_instance.yml',
+ '../runtime.yml',
+ '_base_/optimizer_1x.yml',
+ '_base_/mask_rcnn_r50_fpn.yml',
+ '_base_/mask_fpn_reader.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
+weights: output/mask_rcnn_r50_vd_fpn_ssld_1x_coco/model_final
+
+ResNet:
+ depth: 50
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+ lr_mult_list: [0.05, 0.05, 0.1, 0.15]
+
+epoch: 12
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [8, 11]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
diff --git a/configs/mask_rcnn/mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml b/configs/mask_rcnn/mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml
new file mode 100644
index 0000000..65b31e6
--- /dev/null
+++ b/configs/mask_rcnn/mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml
@@ -0,0 +1,29 @@
+_BASE_: [
+ '../datasets/coco_instance.yml',
+ '../runtime.yml',
+ '_base_/optimizer_1x.yml',
+ '_base_/mask_rcnn_r50_fpn.yml',
+ '_base_/mask_fpn_reader.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
+weights: output/mask_rcnn_r50_vd_fpn_ssld_2x_coco/model_final
+
+ResNet:
+ depth: 50
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+ lr_mult_list: [0.05, 0.05, 0.1, 0.15]
+
+epoch: 24
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [12, 22]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
diff --git a/configs/mask_rcnn/mask_rcnn_x101_vd_64x4d_fpn_1x_coco.yml b/configs/mask_rcnn/mask_rcnn_x101_vd_64x4d_fpn_1x_coco.yml
new file mode 100644
index 0000000..2387502
--- /dev/null
+++ b/configs/mask_rcnn/mask_rcnn_x101_vd_64x4d_fpn_1x_coco.yml
@@ -0,0 +1,28 @@
+_BASE_: [
+ 'mask_rcnn_r50_fpn_1x_coco.yml',
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
+weights: output/mask_rcnn_x101_vd_64x4d_fpn_1x_coco/model_final
+
+ResNet:
+ # for ResNeXt: groups, base_width, base_channels
+ depth: 101
+ variant: d
+ groups: 64
+ base_width: 4
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+
+epoch: 12
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [8, 11]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
diff --git a/configs/mask_rcnn/mask_rcnn_x101_vd_64x4d_fpn_2x_coco.yml b/configs/mask_rcnn/mask_rcnn_x101_vd_64x4d_fpn_2x_coco.yml
new file mode 100644
index 0000000..6a0d0f7
--- /dev/null
+++ b/configs/mask_rcnn/mask_rcnn_x101_vd_64x4d_fpn_2x_coco.yml
@@ -0,0 +1,28 @@
+_BASE_: [
+ 'mask_rcnn_r50_fpn_1x_coco.yml',
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
+weights: output/mask_rcnn_x101_vd_64x4d_fpn_2x_coco/model_final
+
+ResNet:
+ # for ResNeXt: groups, base_width, base_channels
+ depth: 101
+ variant: d
+ groups: 64
+ base_width: 4
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+
+epoch: 24
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [16, 22]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
diff --git a/configs/pedestrian/README.md b/configs/pedestrian/README.md
new file mode 100644
index 0000000..b5b9124
--- /dev/null
+++ b/configs/pedestrian/README.md
@@ -0,0 +1,50 @@
+English | [简体中文](README_cn.md)
+# PaddleDetection applied for specific scenarios
+
+We provide some models implemented by PaddlePaddle to detect objects in specific scenarios, users can download the models and use them in these scenarios.
+
+| Task | Algorithm | Box AP | Download | Configs |
+|:---------------------|:---------:|:------:| :-------------------------------------------------------------------------------------: |:------:|
+| Pedestrian Detection | YOLOv3 | 51.8 | [model](https://paddledet.bj.bcebos.com/models/pedestrian_yolov3_darknet.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/pedestrian/pedestrian_yolov3_darknet.yml) |
+
+## Pedestrian Detection
+
+The main applications of pedetestrian detection include intelligent monitoring. In this scenary, photos of pedetestrians are taken by surveillance cameras in public areas, then pedestrian detection are conducted on these photos.
+
+### 1. Network
+
+The network for detecting vehicles is YOLOv3, the backbone of which is Dacknet53.
+
+### 2. Configuration for training
+
+PaddleDetection provides users with a configuration file [yolov3_darknet53_270e_coco.yml](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.0/configs/yolov3/yolov3_darknet53_270e_coco.yml) to train YOLOv3 on the COCO dataset, compared with this file, we modify some parameters as followed to conduct the training for pedestrian detection:
+
+* num_classes: 1
+* dataset_dir: dataset/pedestrian
+
+### 3. Accuracy
+
+The accuracy of the model trained and evaluted on our private data is shown as followed:
+
+AP at IoU=.50:.05:.95 is 0.518.
+
+AP at IoU=.50 is 0.792.
+
+### 4. Inference
+
+Users can employ the model to conduct the inference:
+
+```
+export CUDA_VISIBLE_DEVICES=0
+python -u tools/infer.py -c configs/pedestrian/pedestrian_yolov3_darknet.yml \
+ -o weights=https://paddledet.bj.bcebos.com/models/pedestrian_yolov3_darknet.pdparams \
+ --infer_dir configs/pedestrian/demo \
+ --draw_threshold 0.3 \
+ --output_dir configs/pedestrian/demo/output
+```
+
+Some inference results are visualized below:
+
+
+
+
diff --git a/configs/pedestrian/README_cn.md b/configs/pedestrian/README_cn.md
new file mode 100644
index 0000000..3456670
--- /dev/null
+++ b/configs/pedestrian/README_cn.md
@@ -0,0 +1,51 @@
+[English](README.md) | 简体中文
+# 特色垂类检测模型
+
+我们提供了针对不同场景的基于PaddlePaddle的检测模型,用户可以下载模型进行使用。
+
+| 任务 | 算法 | 精度(Box AP) | 下载 | 配置文件 |
+|:---------------------|:---------:|:------:| :---------------------------------------------------------------------------------: | :------:|
+| 行人检测 | YOLOv3 | 51.8 | [下载链接](https://paddledet.bj.bcebos.com/models/pedestrian_yolov3_darknet.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/pedestrian/pedestrian_yolov3_darknet.yml) |
+
+## 行人检测(Pedestrian Detection)
+
+行人检测的主要应用有智能监控。在监控场景中,大多是从公共区域的监控摄像头视角拍摄行人,获取图像后再进行行人检测。
+
+### 1. 模型结构
+
+Backbone为Dacknet53的YOLOv3。
+
+
+### 2. 训练参数配置
+
+PaddleDetection提供了使用COCO数据集对YOLOv3进行训练的参数配置文件[yolov3_darknet53_270e_coco.yml](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.0/configs/yolov3/yolov3_darknet53_270e_coco.yml),与之相比,在进行行人检测的模型训练时,我们对以下参数进行了修改:
+
+* num_classes: 1
+* dataset_dir: dataset/pedestrian
+
+### 2. 精度指标
+
+模型在我们针对监控场景的内部数据上精度指标为:
+
+IOU=.5时的AP为 0.792。
+
+IOU=.5-.95时的AP为 0.518。
+
+### 3. 预测
+
+用户可以使用我们训练好的模型进行行人检测:
+
+```
+export CUDA_VISIBLE_DEVICES=0
+python -u tools/infer.py -c configs/pedestrian/pedestrian_yolov3_darknet.yml \
+ -o weights=https://paddledet.bj.bcebos.com/models/pedestrian_yolov3_darknet.pdparams \
+ --infer_dir configs/pedestrian/demo \
+ --draw_threshold 0.3 \
+ --output_dir configs/pedestrian/demo/output
+```
+
+预测结果示例:
+
+
+
+
diff --git a/configs/pedestrian/demo/001.png b/configs/pedestrian/demo/001.png
new file mode 100644
index 0000000..63ae916
Binary files /dev/null and b/configs/pedestrian/demo/001.png differ
diff --git a/configs/pedestrian/demo/002.png b/configs/pedestrian/demo/002.png
new file mode 100644
index 0000000..0de905c
Binary files /dev/null and b/configs/pedestrian/demo/002.png differ
diff --git a/configs/pedestrian/demo/003.png b/configs/pedestrian/demo/003.png
new file mode 100644
index 0000000..e9026e0
Binary files /dev/null and b/configs/pedestrian/demo/003.png differ
diff --git a/configs/pedestrian/demo/004.png b/configs/pedestrian/demo/004.png
new file mode 100644
index 0000000..d8118ec
Binary files /dev/null and b/configs/pedestrian/demo/004.png differ
diff --git a/configs/pedestrian/pedestrian.json b/configs/pedestrian/pedestrian.json
new file mode 100644
index 0000000..f72fe6d
--- /dev/null
+++ b/configs/pedestrian/pedestrian.json
@@ -0,0 +1,11 @@
+{
+ "images": [],
+ "annotations": [],
+ "categories": [
+ {
+ "supercategory": "component",
+ "id": 1,
+ "name": "pedestrian"
+ }
+ ]
+}
diff --git a/configs/pedestrian/pedestrian_yolov3_darknet.yml b/configs/pedestrian/pedestrian_yolov3_darknet.yml
new file mode 100644
index 0000000..eb860dc
--- /dev/null
+++ b/configs/pedestrian/pedestrian_yolov3_darknet.yml
@@ -0,0 +1,29 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '../yolov3/_base_/optimizer_270e.yml',
+ '../yolov3/_base_/yolov3_darknet53.yml',
+ '../yolov3/_base_/yolov3_reader.yml',
+]
+
+snapshot_epoch: 5
+weights: https://paddledet.bj.bcebos.com/models/pedestrian_yolov3_darknet.pdparams
+
+num_classes: 1
+
+TrainDataset:
+ !COCODataSet
+ dataset_dir: dataset/pedestrian
+ anno_path: annotations/instances_train2017.json
+ image_dir: train2017
+ data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+ !COCODataSet
+ dataset_dir: dataset/pedestrian
+ anno_path: annotations/instances_val2017.json
+ image_dir: val2017
+
+TestDataset:
+ !ImageFolder
+ anno_path: configs/pedestrian/pedestrian.json
diff --git a/configs/ppyolo/README.md b/configs/ppyolo/README.md
new file mode 100644
index 0000000..3143229
--- /dev/null
+++ b/configs/ppyolo/README.md
@@ -0,0 +1,236 @@
+English | [简体中文](README_cn.md)
+
+# PP-YOLO
+
+## Table of Contents
+- [Introduction](#Introduction)
+- [Model Zoo](#Model_Zoo)
+- [Getting Start](#Getting_Start)
+- [Future Work](#Future_Work)
+- [Appendix](#Appendix)
+
+## Introduction
+
+[PP-YOLO](https://arxiv.org/abs/2007.12099) is a optimized model based on YOLOv3 in PaddleDetection,whose performance(mAP on COCO) and inference speed are better than [YOLOv4](https://arxiv.org/abs/2004.10934),PaddlePaddle 2.0.0rc1(available on pip now) or [Daily Version](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/install/Tables.html#whl-release) is required to run this PP-YOLO。
+
+PP-YOLO reached mAP(IoU=0.5:0.95) as 45.9% on COCO test-dev2017 dataset, and inference speed of FP32 on single V100 is 72.9 FPS, inference speed of FP16 with TensorRT on single V100 is 155.6 FPS.
+
+
+

+
+
+PP-YOLO improved performance and speed of YOLOv3 with following methods:
+
+- Better backbone: ResNet50vd-DCN
+- Larger training batch size: 8 GPUs and mini-batch size as 24 on each GPU
+- [Drop Block](https://arxiv.org/abs/1810.12890)
+- [Exponential Moving Average](https://www.investopedia.com/terms/e/ema.asp)
+- [IoU Loss](https://arxiv.org/pdf/1902.09630.pdf)
+- [Grid Sensitive](https://arxiv.org/abs/2004.10934)
+- [Matrix NMS](https://arxiv.org/pdf/2003.10152.pdf)
+- [CoordConv](https://arxiv.org/abs/1807.03247)
+- [Spatial Pyramid Pooling](https://arxiv.org/abs/1406.4729)
+- Better ImageNet pretrain weights
+
+## Model Zoo
+
+### PP-YOLO
+
+| Model | GPU number | images/GPU | backbone | input shape | Box APval | Box APtest | V100 FP32(FPS) | V100 TensorRT FP16(FPS) | download | config |
+|:------------------------:|:-------:|:-------------:|:----------:| :-------:| :------------------: | :-------------------: | :------------: | :---------------------: | :------: | :------: |
+| PP-YOLO | 8 | 24 | ResNet50vd | 608 | 44.8 | 45.2 | 72.9 | 155.6 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) |
+| PP-YOLO | 8 | 24 | ResNet50vd | 512 | 43.9 | 44.4 | 89.9 | 188.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) |
+| PP-YOLO | 8 | 24 | ResNet50vd | 416 | 42.1 | 42.5 | 109.1 | 215.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) |
+| PP-YOLO | 8 | 24 | ResNet50vd | 320 | 38.9 | 39.3 | 132.2 | 242.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) |
+| PP-YOLO_2x | 8 | 24 | ResNet50vd | 608 | 45.3 | 45.9 | 72.9 | 155.6 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) |
+| PP-YOLO_2x | 8 | 24 | ResNet50vd | 512 | 44.4 | 45.0 | 89.9 | 188.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) |
+| PP-YOLO_2x | 8 | 24 | ResNet50vd | 416 | 42.7 | 43.2 | 109.1 | 215.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) |
+| PP-YOLO_2x | 8 | 24 | ResNet50vd | 320 | 39.5 | 40.1 | 132.2 | 242.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) |
+| PP-YOLO | 4 | 32 | ResNet18vd | 512 | 29.2 | 29.5 | 357.1 | 657.9 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r18vd_coco.yml) |
+| PP-YOLO | 4 | 32 | ResNet18vd | 416 | 28.6 | 28.9 | 409.8 | 719.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r18vd_coco.yml) |
+| PP-YOLO | 4 | 32 | ResNet18vd | 320 | 26.2 | 26.4 | 480.7 | 763.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r18vd_coco.yml) |
+| PP-YOLOv2 | 8 | 12 | ResNet50vd | 640 | 49.1 | 49.5 | 68.9 | 106.5 | [model](https://paddledet.bj.bcebos.com/models/ppyolov2_r50vd_dcn_365e_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml) |
+| PP-YOLOv2 | 8 | 12 | ResNet101vd | 640 | 49.7 | 50.3 | 49.5 | 87.0 | [model](https://paddledet.bj.bcebos.com/models/ppyolov2_r101vd_dcn_365e_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolov2_r101vd_dcn_365e_coco.yml) |
+
+
+**Notes:**
+
+- PP-YOLO is trained on COCO train2017 dataset and evaluated on val2017 & test-dev2017 dataset,Box APtest is evaluation results of `mAP(IoU=0.5:0.95)`.
+- PP-YOLO used 8 GPUs for training and mini-batch size as 24 on each GPU, if GPU number and mini-batch size is changed, learning rate and iteration times should be adjusted according [FAQ](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.0/static/docs/FAQ.md).
+- PP-YOLO inference speed is tesed on single Tesla V100 with batch size as 1, CUDA 10.2, CUDNN 7.5.1, TensorRT 5.1.2.2 in TensorRT mode.
+- PP-YOLO FP32 inference speed testing uses inference model exported by `tools/export_model.py` and benchmarked by running `depoly/python/infer.py` with `--run_benchmark`. All testing results do not contains the time cost of data reading and post-processing(NMS), which is same as [YOLOv4(AlexyAB)](https://github.com/AlexeyAB/darknet) in testing method.
+- TensorRT FP16 inference speed testing exclude the time cost of bounding-box decoding(`yolo_box`) part comparing with FP32 testing above, which means that data reading, bounding-box decoding and post-processing(NMS) is excluded(test method same as [YOLOv4(AlexyAB)](https://github.com/AlexeyAB/darknet) too)
+
+### PP-YOLO for mobile
+
+| Model | GPU number | images/GPU | Model Size | input shape | Box APval | Box AP50val | Kirin 990 1xCore(FPS) | download | config |
+|:----------------------------:|:-------:|:-------------:|:----------:| :-------:| :------------------: | :--------------------: | :--------------------: | :------: | :------: |
+| PP-YOLO_MobileNetV3_large | 4 | 32 | 28MB | 320 | 23.2 | 42.6 | 14.1 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_mbv3_large_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_mbv3_large_coco.yml) |
+| PP-YOLO_MobileNetV3_small | 4 | 32 | 16MB | 320 | 17.2 | 33.8 | 21.5 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_mbv3_small_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_mbv3_small_coco.yml) |
+
+**Notes:**
+
+- PP-YOLO_MobileNetV3 is trained on COCO train2017 datast and evaluated on val2017 dataset,Box APval is evaluation results of `mAP(IoU=0.5:0.95)`, Box AP50val is evaluation results of `mAP(IoU=0.5)`.
+- PP-YOLO_MobileNetV3 used 4 GPUs for training and mini-batch size as 32 on each GPU, if GPU number and mini-batch size is changed, learning rate and iteration times should be adjusted according [FAQ](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.0/static/docs/FAQ.md).
+- PP-YOLO_MobileNetV3 inference speed is tested on Kirin 990 with 1 thread.
+
+### PP-YOLO tiny
+
+| Model | GPU number | images/GPU | Model Size | Post Quant Model Size | input shape | Box APval | Kirin 990 4xCore(FPS) | download | config | post quant model |
+|:----------------------------:|:-------:|:-------------:|:----------:| :-------------------: | :---------: | :------------------: | :-------------------: | :------: | :----: | :--------------: |
+| PP-YOLO tiny | 8 | 32 | 4.2MB | **1.3M** | 320 | 20.6 | 92.3 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_tiny_650e_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_tiny_650e_coco.yml) | [inference model](https://paddledet.bj.bcebos.com/models/ppyolo_tiny_quant.tar) |
+| PP-YOLO tiny | 8 | 32 | 4.2MB | **1.3M** | 416 | 22.7 | 65.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_tiny_650e_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_tiny_650e_coco.yml) | [inference model](https://paddledet.bj.bcebos.com/models/ppyolo_tiny_quant.tar) |
+
+**Notes:**
+
+- PP-YOLO-tiny is trained on COCO train2017 datast and evaluated on val2017 dataset,Box APval is evaluation results of `mAP(IoU=0.5:0.95)`.
+- PP-YOLO-tiny used 8 GPUs for training and mini-batch size as 32 on each GPU, if GPU number and mini-batch size is changed, learning rate and iteration times should be adjusted according [FAQ](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.0/static/docs/FAQ.md).
+- PP-YOLO-tiny inference speed is tested on Kirin 990 with 4 threads by arm8
+- we alse provide PP-YOLO-tiny post quant inference model, which can compress model to **1.3MB** with nearly no inference on inference speed and performance
+
+### PP-YOLO on Pascal VOC
+
+PP-YOLO trained on Pascal VOC dataset as follows:
+
+| Model | GPU number | images/GPU | backbone | input shape | Box AP50val | download | config |
+|:------------------:|:----------:|:----------:|:----------:| :----------:| :--------------------: | :------: | :-----: |
+| PP-YOLO | 8 | 12 | ResNet50vd | 608 | 84.9 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) |
+| PP-YOLO | 8 | 12 | ResNet50vd | 416 | 84.3 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) |
+| PP-YOLO | 8 | 12 | ResNet50vd | 320 | 82.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) |
+
+## Getting Start
+
+### 1. Training
+
+Training PP-YOLO on 8 GPUs with following command(all commands should be run under PaddleDetection dygraph directory as default)
+
+```bash
+python -m paddle.distributed.launch --log_dir=./ppyolo_dygraph/ --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml &>ppyolo_dygraph.log 2>&1 &
+```
+
+optional: Run `tools/anchor_cluster.py` to get anchors suitable for your dataset, and modify the anchor setting in model configuration file and reader configuration file, such as `configs/ppyolo/_base_/ppyolo_tiny.yml` and `configs/ppyolo/_base_/ppyolo_tiny_reader.yml`.
+
+``` bash
+python tools/anchor_cluster.py -c configs/ppyolo/ppyolo_tiny_650e_coco.yml -n 9 -s 320 -m v2 -i 1000
+```
+
+### 2. Evaluation
+
+Evaluating PP-YOLO on COCO val2017 dataset in single GPU with following commands:
+
+```bash
+# use weights released in PaddleDetection model zoo
+CUDA_VISIBLE_DEVICES=0 python tools/eval.py -c configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams
+
+# use saved checkpoint in training
+CUDA_VISIBLE_DEVICES=0 python tools/eval.py -c configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml -o weights=output/ppyolo_r50vd_dcn_1x_coco/model_final
+```
+
+For evaluation on COCO test-dev2017 dataset, `configs/ppyolo/ppyolo_test.yml` should be used, please download COCO test-dev2017 dataset from [COCO dataset download](https://cocodataset.org/#download) and decompress to pathes configured by `EvalReader.dataset` in `configs/ppyolo/ppyolo_test.yml` and run evaluation by following command:
+
+```bash
+# use weights released in PaddleDetection model zoo
+CUDA_VISIBLE_DEVICES=0 python tools/eval.py -c configs/ppyolo/ppyolo_test.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams
+
+# use saved checkpoint in training
+CUDA_VISIBLE_DEVICES=0 python tools/eval.py -c configs/ppyolo/ppyolo_test.yml -o weights=output/ppyolo_r50vd_dcn_1x_coco/model_final
+```
+
+Evaluation results will be saved in `bbox.json`, compress it into a `zip` package and upload to [COCO dataset evaluation](https://competitions.codalab.org/competitions/20794#participate) to evaluate.
+
+**NOTE 1:** `configs/ppyolo/ppyolo_test.yml` is only used for evaluation on COCO test-dev2017 dataset, could not be used for training or COCO val2017 dataset evaluating.
+
+**NOTE 2:** Due to the overall upgrade of the dynamic graph framework, the following weight models published by paddledetection need to be evaluated by adding the -- bias field, such as
+
+```bash
+# use weights released in PaddleDetection model zoo
+CUDA_VISIBLE_DEVICES=0 python tools/eval.py -c configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams --bias
+```
+These models are:
+
+1.ppyolo_r50vd_dcn_1x_coco
+
+2.ppyolo_r50vd_dcn_voc
+
+3.ppyolo_r18vd_coco
+
+4.ppyolo_mbv3_large_coco
+
+5.ppyolo_mbv3_small_coco
+
+6.ppyolo_tiny_650e_coco
+
+### 3. Inference
+
+Inference images in single GPU with following commands, use `--infer_img` to inference a single image and `--infer_dir` to inference all images in the directory.
+
+```bash
+# inference single image
+CUDA_VISIBLE_DEVICES=0 python tools/infer.py -c configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams --infer_img=demo/000000014439_640x640.jpg
+
+# inference all images in the directory
+CUDA_VISIBLE_DEVICES=0 python tools/infer.py -c configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams --infer_dir=demo
+```
+
+### 4. Inferece deployment
+
+For inference deployment or benchmard, model exported with `tools/export_model.py` should be used and perform inference with Paddle inference library with following commands:
+
+```bash
+# export model, model will be save in output/ppyolo as default
+python tools/export_model.py -c configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams
+
+# inference with Paddle Inference library
+CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/ppyolo_r50vd_dcn_1x_coco --image_file=demo/000000014439_640x640.jpg --use_gpu=True
+```
+
+
+## Future work
+
+1. more PP-YOLO tiny model
+2. PP-YOLO model with more backbones
+
+## Appendix
+
+Optimizing method and ablation experiments of PP-YOLO compared with YOLOv3.
+
+| NO. | Model | Box APval | Box APtest | Params(M) | FLOPs(G) | V100 FP32 FPS |
+| :--: | :--------------------------- | :------------------: |:--------------------: | :-------: | :------: | :-----------: |
+| A | YOLOv3-DarkNet53 | 38.9 | - | 59.13 | 65.52 | 58.2 |
+| B | YOLOv3-ResNet50vd-DCN | 39.1 | - | 43.89 | 44.71 | 79.2 |
+| C | B + LB + EMA + DropBlock | 41.4 | - | 43.89 | 44.71 | 79.2 |
+| D | C + IoU Loss | 41.9 | - | 43.89 | 44.71 | 79.2 |
+| E | D + IoU Aware | 42.5 | - | 43.90 | 44.71 | 74.9 |
+| F | E + Grid Sensitive | 42.8 | - | 43.90 | 44.71 | 74.8 |
+| G | F + Matrix NMS | 43.5 | - | 43.90 | 44.71 | 74.8 |
+| H | G + CoordConv | 44.0 | - | 43.93 | 44.76 | 74.1 |
+| I | H + SPP | 44.3 | 45.2 | 44.93 | 45.12 | 72.9 |
+| J | I + Better ImageNet Pretrain | 44.8 | 45.2 | 44.93 | 45.12 | 72.9 |
+| K | J + 2x Scheduler | 45.3 | 45.9 | 44.93 | 45.12 | 72.9 |
+
+**Notes:**
+
+- Performance and inference spedd are measure with input shape as 608
+- All models are trained on COCO train2017 datast and evaluated on val2017 & test-dev2017 dataset,`Box AP` is evaluation results as `mAP(IoU=0.5:0.95)`.
+- Inference speed is tested on single Tesla V100 with batch size as 1 following test method and environment configuration in benchmark above.
+- [YOLOv3-DarkNet53](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.0/configs/yolov3/yolov3_darknet53_270e_coco.yml) with mAP as 39.0 is optimized YOLOv3 model in PaddleDetection,see [Model Zoo](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.0/docs/MODEL_ZOO_cn.md) for details.
+
+
+## Citation
+
+```
+@misc{long2020ppyolo,
+title={PP-YOLO: An Effective and Efficient Implementation of Object Detector},
+author={Xiang Long and Kaipeng Deng and Guanzhong Wang and Yang Zhang and Qingqing Dang and Yuan Gao and Hui Shen and Jianguo Ren and Shumin Han and Errui Ding and Shilei Wen},
+year={2020},
+eprint={2007.12099},
+archivePrefix={arXiv},
+primaryClass={cs.CV}
+}
+@misc{ppdet2019,
+title={PaddleDetection, Object detection and instance segmentation toolkit based on PaddlePaddle.},
+author={PaddlePaddle Authors},
+howpublished = {\url{https://github.com/PaddlePaddle/PaddleDetection}},
+year={2019}
+}
+```
diff --git a/configs/ppyolo/README_cn.md b/configs/ppyolo/README_cn.md
new file mode 100644
index 0000000..4e7c7bc
--- /dev/null
+++ b/configs/ppyolo/README_cn.md
@@ -0,0 +1,231 @@
+简体中文 | [English](README.md)
+
+# PP-YOLO 模型
+
+## 内容
+- [简介](#简介)
+- [模型库与基线](#模型库与基线)
+- [使用说明](#使用说明)
+- [未来工作](#未来工作)
+- [附录](#附录)
+
+## 简介
+
+[PP-YOLO](https://arxiv.org/abs/2007.12099)是PaddleDetection优化和改进的YOLOv3的模型,其精度(COCO数据集mAP)和推理速度均优于[YOLOv4](https://arxiv.org/abs/2004.10934)模型,要求使用PaddlePaddle 2.0.0rc1(可使用pip安装) 或适当的[develop版本](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/install/Tables.html#whl-release)。
+
+PP-YOLO在[COCO](http://cocodataset.org) test-dev2017数据集上精度达到45.9%,在单卡V100上FP32推理速度为72.9 FPS, V100上开启TensorRT下FP16推理速度为155.6 FPS。
+
+
+

+
+
+PP-YOLO从如下方面优化和提升YOLOv3模型的精度和速度:
+
+- 更优的骨干网络: ResNet50vd-DCN
+- 更大的训练batch size: 8 GPUs,每GPU batch_size=24,对应调整学习率和迭代轮数
+- [Drop Block](https://arxiv.org/abs/1810.12890)
+- [Exponential Moving Average](https://www.investopedia.com/terms/e/ema.asp)
+- [IoU Loss](https://arxiv.org/pdf/1902.09630.pdf)
+- [Grid Sensitive](https://arxiv.org/abs/2004.10934)
+- [Matrix NMS](https://arxiv.org/pdf/2003.10152.pdf)
+- [CoordConv](https://arxiv.org/abs/1807.03247)
+- [Spatial Pyramid Pooling](https://arxiv.org/abs/1406.4729)
+- 更优的预训练模型
+
+## 模型库
+
+### PP-YOLO模型
+
+| 模型 | GPU个数 | 每GPU图片个数 | 骨干网络 | 输入尺寸 | Box APval | Box APtest | V100 FP32(FPS) | V100 TensorRT FP16(FPS) | 模型下载 | 配置文件 |
+|:------------------------:|:-------:|:-------------:|:----------:| :-------:| :------------------: | :-------------------: | :------------: | :---------------------: | :------: | :------: |
+| PP-YOLO | 8 | 24 | ResNet50vd | 608 | 44.8 | 45.2 | 72.9 | 155.6 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) |
+| PP-YOLO | 8 | 24 | ResNet50vd | 512 | 43.9 | 44.4 | 89.9 | 188.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) |
+| PP-YOLO | 8 | 24 | ResNet50vd | 416 | 42.1 | 42.5 | 109.1 | 215.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) |
+| PP-YOLO | 8 | 24 | ResNet50vd | 320 | 38.9 | 39.3 | 132.2 | 242.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) |
+| PP-YOLO_2x | 8 | 24 | ResNet50vd | 608 | 45.3 | 45.9 | 72.9 | 155.6 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) |
+| PP-YOLO_2x | 8 | 24 | ResNet50vd | 512 | 44.4 | 45.0 | 89.9 | 188.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) |
+| PP-YOLO_2x | 8 | 24 | ResNet50vd | 416 | 42.7 | 43.2 | 109.1 | 215.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) |
+| PP-YOLO_2x | 8 | 24 | ResNet50vd | 320 | 39.5 | 40.1 | 132.2 | 242.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) |
+| PP-YOLO | 4 | 32 | ResNet18vd | 512 | 29.2 | 29.5 | 357.1 | 657.9 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r18vd_coco.yml) |
+| PP-YOLO | 4 | 32 | ResNet18vd | 416 | 28.6 | 28.9 | 409.8 | 719.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r18vd_coco.yml) |
+| PP-YOLO | 4 | 32 | ResNet18vd | 320 | 26.2 | 26.4 | 480.7 | 763.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r18vd_coco.yml) |
+| PP-YOLOv2 | 8 | 12 | ResNet50vd | 640 | 49.1 | 49.5 | 68.9 | 106.5 | [model](https://paddledet.bj.bcebos.com/models/ppyolov2_r50vd_dcn_365e_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml) |
+| PP-YOLOv2 | 8 | 12 | ResNet101vd | 640 | 49.7 | 50.3 | 49.5 | 87.0 | [model](https://paddledet.bj.bcebos.com/models/ppyolov2_r101vd_dcn_365e_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolov2_r101vd_dcn_365e_coco.yml) |
+
+**注意:**
+
+- PP-YOLO模型使用COCO数据集中train2017作为训练集,使用val2017和test-dev2017作为测试集,Box APtest为`mAP(IoU=0.5:0.95)`评估结果。
+- PP-YOLO模型训练过程中使用8 GPUs,每GPU batch size为24进行训练,如训练GPU数和batch size不使用上述配置,须参考[FAQ](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.0/static/docs/FAQ.md)调整学习率和迭代次数。
+- PP-YOLO模型推理速度测试采用单卡V100,batch size=1进行测试,使用CUDA 10.2, CUDNN 7.5.1,TensorRT推理速度测试使用TensorRT 5.1.2.2。
+- PP-YOLO模型FP32的推理速度测试数据为使用`tools/export_model.py`脚本导出模型后,使用`deploy/python/infer.py`脚本中的`--run_benchnark`参数使用Paddle预测库进行推理速度benchmark测试结果, 且测试的均为不包含数据预处理和模型输出后处理(NMS)的数据(与[YOLOv4(AlexyAB)](https://github.com/AlexeyAB/darknet)测试方法一致)。
+- TensorRT FP16的速度测试相比于FP32去除了`yolo_box`(bbox解码)部分耗时,即不包含数据预处理,bbox解码和NMS(与[YOLOv4(AlexyAB)](https://github.com/AlexeyAB/darknet)测试方法一致)。
+
+### PP-YOLO 轻量级模型
+
+| 模型 | GPU个数 | 每GPU图片个数 | 模型体积 | 输入尺寸 | Box APval | Box AP50val | Kirin 990 1xCore (FPS) | 模型下载 | 配置文件 |
+|:----------------------------:|:-------:|:-------------:|:----------:| :-------:| :------------------: | :--------------------: | :--------------------: | :------: | :------: |
+| PP-YOLO_MobileNetV3_large | 4 | 32 | 28MB | 320 | 23.2 | 42.6 | 14.1 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyolo_mbv3_large_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_mbv3_large_coco.yml) |
+| PP-YOLO_MobileNetV3_small | 4 | 32 | 16MB | 320 | 17.2 | 33.8 | 21.5 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyolo_mbv3_small_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_mbv3_small_coco.yml) |
+
+- PP-YOLO_MobileNetV3 模型使用COCO数据集中train2017作为训练集,使用val2017作为测试集,Box APval为`mAP(IoU=0.5:0.95)`评估结果, Box AP50val为`mAP(IoU=0.5)`评估结果。
+- PP-YOLO_MobileNetV3 模型训练过程中使用4GPU,每GPU batch size为32进行训练,如训练GPU数和batch size不使用上述配置,须参考[FAQ](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.0/static/docs/FAQ.md)调整学习率和迭代次数。
+- PP-YOLO_MobileNetV3 模型推理速度测试环境配置为麒麟990芯片单线程。
+
+### PP-YOLO tiny
+
+| 模型 | GPU个数 | 每GPU图片个数 | 模型体积 | 量化后模型体积 | 输入尺寸 | Box APval | Kirin 990 4xCore(FPS) | 模型下载 | 配置文件 | 量化后模型下载 |
+|:---------:|:-------:|:---------:|:---------:| :-------------------: | :---------: | :------------------: | :-------------------: | :------: | :----: | :--------------: |
+| PP-YOLO tiny | 8 | 32 | 4.2MB | **1.3M** | 320 | 20.6 | 92.3 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyolo_tiny_650e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_tiny_650e_coco.yml) | [推理模型](https://paddledet.bj.bcebos.com/models/ppyolo_tiny_quant.tar) |
+| PP-YOLO tiny | 8 | 32 | 4.2MB | **1.3M** | 416 | 22.7 | 65.4 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyolo_tiny_650e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_tiny_650e_coco.yml) | [推理模型](https://paddledet.bj.bcebos.com/models/ppyolo_tiny_quant.tar) |
+
+**注意:**
+
+- PP-YOLO-tiny 在COCO train2017数据集上进行训练,在val2017数据集上进行评估,Box APval 是`mAP(IoU=0.5:0.95)`的评估结果。
+- PP-YOLO-tiny 使用8个GPU进行训练,每个GPU上的batch size为32,如果GPU数量和最小批量大小发生变化,则应根据[FAQ](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.0/static/docs/FAQ.md)调整学习速率和迭代次数。
+- PP-YOLO-tiny 是利用arm8在Kirin 990上4个线程来测试推理速度的。
+- 我们还提供了PP-YOLO-tiny 量化后的推理模型, 它可以将模型压缩到**1.3MB**,并且几乎不需要对推理速度和性能进行任何推理。
+
+### Pascal VOC数据集上的PP-YOLO
+
+PP-YOLO在Pascal VOC数据集上训练模型如下:
+
+| 模型 | GPU个数 | 每GPU图片个数 | 骨干网络 | 输入尺寸 | Box AP50val | 模型下载 | 配置文件 |
+|:------------------:|:-------:|:-------------:|:----------:| :----------:| :--------------------: | :------: | :-----: |
+| PP-YOLO | 8 | 12 | ResNet50vd | 608 | 84.9 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) |
+| PP-YOLO | 8 | 12 | ResNet50vd | 416 | 84.3 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) |
+| PP-YOLO | 8 | 12 | ResNet50vd | 320 | 82.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) |
+
+## 使用说明
+
+### 1. 训练
+
+使用8GPU通过如下命令一键式启动训练(以下命令均默认在PaddleDetection根目录运行), 通过`--eval`参数开启训练中交替评估。
+
+```bash
+python -m paddle.distributed.launch --log_dir=./ppyolo_dygraph/ --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml &>ppyolo_dygraph.log 2>&1 &
+```
+
+可选:在训练之前使用`tools/anchor_cluster.py`得到适用于你的数据集的anchor,并注意修改模型配置文件和Reader配置文件中的anchor设置,如`configs/ppyolo/_base_/ppyolo_tiny.yml`和`configs/ppyolo/_base_/ppyolo_tiny_reader.yml`中anchor设置
+```bash
+python tools/anchor_cluster.py -c configs/ppyolo/ppyolo_tiny_650e_coco.yml -n 9 -s 320 -m v2 -i 1000
+```
+
+### 2. 评估
+
+使用单GPU通过如下命令一键式评估模型在COCO val2017数据集效果
+
+```bash
+# 使用PaddleDetection发布的权重
+CUDA_VISIBLE_DEVICES=0 python tools/eval.py -c configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams
+
+# 使用训练保存的checkpoint
+CUDA_VISIBLE_DEVICES=0 python tools/eval.py -c configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml -o weights=output/ppyolo_r50vd_dcn_1x_coco/model_final
+```
+
+我们提供了`configs/ppyolo/ppyolo_test.yml`用于评估COCO test-dev2017数据集的效果,评估COCO test-dev2017数据集的效果须先从[COCO数据集下载页](https://cocodataset.org/#download)下载test-dev2017数据集,解压到`configs/ppyolo/ppyolo_test.yml`中`EvalReader.dataset`中配置的路径,并使用如下命令进行评估
+
+```bash
+# 使用PaddleDetection发布的权重
+CUDA_VISIBLE_DEVICES=0 python tools/eval.py -c configs/ppyolo/ppyolo_test.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams
+
+# 使用训练保存的checkpoint
+CUDA_VISIBLE_DEVICES=0 python tools/eval.py -c configs/ppyolo/ppyolo_test.yml -o weights=output/ppyolo_r50vd_dcn_1x_coco/model_final
+```
+
+评估结果保存于`bbox.json`中,将其压缩为zip包后通过[COCO数据集评估页](https://competitions.codalab.org/competitions/20794#participate)提交评估。
+
+**注意1:** `configs/ppyolo/ppyolo_test.yml`仅用于评估COCO test-dev数据集,不用于训练和评估COCO val2017数据集。
+
+**注意2:** 由于动态图框架整体升级,以下几个PaddleDetection发布的权重模型评估时需要添加--bias字段, 例如
+
+```bash
+# 使用PaddleDetection发布的权重
+CUDA_VISIBLE_DEVICES=0 python tools/eval.py -c configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams --bias
+```
+主要有:
+
+1.ppyolo_r50vd_dcn_1x_coco
+
+2.ppyolo_r50vd_dcn_voc
+
+3.ppyolo_r18vd_coco
+
+4.ppyolo_mbv3_large_coco
+
+5.ppyolo_mbv3_small_coco
+
+6.ppyolo_tiny_650e_coco
+
+### 3. 推理
+
+使用单GPU通过如下命令一键式推理图像,通过`--infer_img`指定图像路径,或通过`--infer_dir`指定目录并推理目录下所有图像
+
+```bash
+# 推理单张图像
+CUDA_VISIBLE_DEVICES=0 python tools/infer.py -c configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams --infer_img=demo/000000014439_640x640.jpg
+
+# 推理目录下所有图像
+CUDA_VISIBLE_DEVICES=0 python tools/infer.py -c configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams --infer_dir=demo
+```
+
+### 4. 推理部署
+
+PP-YOLO模型部署及推理benchmark需要通过`tools/export_model.py`导出模型后使用Paddle预测库进行部署和推理,可通过如下命令一键式启动。
+
+```bash
+# 导出模型,默认存储于output/ppyolo目录
+python tools/export_model.py -c configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams
+
+# 预测库推理
+CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/ppyolo_r50vd_dcn_1x_coco --image_file=demo/000000014439_640x640.jpg --use_gpu=True
+```
+
+
+## 未来工作
+
+1. 发布PP-YOLO-tiny模型
+2. 发布更多骨干网络的PP-YOLO模型
+
+## 附录
+
+PP-YOLO模型相对于YOLOv3模型优化项消融实验数据如下表所示。
+
+| 序号 | 模型 | Box APval | Box APtest | 参数量(M) | FLOPs(G) | V100 FP32 FPS |
+| :--: | :--------------------------- | :------------------: | :-------------------: | :-------: | :------: | :-----------: |
+| A | YOLOv3-DarkNet53 | 38.9 | - | 59.13 | 65.52 | 58.2 |
+| B | YOLOv3-ResNet50vd-DCN | 39.1 | - | 43.89 | 44.71 | 79.2 |
+| C | B + LB + EMA + DropBlock | 41.4 | - | 43.89 | 44.71 | 79.2 |
+| D | C + IoU Loss | 41.9 | - | 43.89 | 44.71 | 79.2 |
+| E | D + IoU Aware | 42.5 | - | 43.90 | 44.71 | 74.9 |
+| F | E + Grid Sensitive | 42.8 | - | 43.90 | 44.71 | 74.8 |
+| G | F + Matrix NMS | 43.5 | - | 43.90 | 44.71 | 74.8 |
+| H | G + CoordConv | 44.0 | - | 43.93 | 44.76 | 74.1 |
+| I | H + SPP | 44.3 | 45.2 | 44.93 | 45.12 | 72.9 |
+| J | I + Better ImageNet Pretrain | 44.8 | 45.2 | 44.93 | 45.12 | 72.9 |
+| K | J + 2x Scheduler | 45.3 | 45.9 | 44.93 | 45.12 | 72.9 |
+
+**注意:**
+
+- 精度与推理速度数据均为使用输入图像尺寸为608的测试结果
+- Box AP为在COCO train2017数据集训练,val2017和test-dev2017数据集上评估`mAP(IoU=0.5:0.95)`数据
+- 推理速度为单卡V100上,batch size=1, 使用上述benchmark测试方法的测试结果,测试环境配置为CUDA 10.2,CUDNN 7.5.1
+- [YOLOv3-DarkNet53](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.0/configs/yolov3/yolov3_darknet53_270e_coco.yml)精度38.9为PaddleDetection优化后的YOLOv3模型,可参见[模型库](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.0/docs/MODEL_ZOO_cn.md)
+
+## 引用
+
+```
+@misc{long2020ppyolo,
+title={PP-YOLO: An Effective and Efficient Implementation of Object Detector},
+author={Xiang Long and Kaipeng Deng and Guanzhong Wang and Yang Zhang and Qingqing Dang and Yuan Gao and Hui Shen and Jianguo Ren and Shumin Han and Errui Ding and Shilei Wen},
+year={2020},
+eprint={2007.12099},
+archivePrefix={arXiv},
+primaryClass={cs.CV}
+}
+@misc{ppdet2019,
+title={PaddleDetection, Object detection and instance segmentation toolkit based on PaddlePaddle.},
+author={PaddlePaddle Authors},
+howpublished = {\url{https://github.com/PaddlePaddle/PaddleDetection}},
+year={2019}
+}
+```
diff --git a/configs/ppyolo/_base_/optimizer_1x.yml b/configs/ppyolo/_base_/optimizer_1x.yml
new file mode 100644
index 0000000..8e6301e
--- /dev/null
+++ b/configs/ppyolo/_base_/optimizer_1x.yml
@@ -0,0 +1,21 @@
+epoch: 405
+
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones:
+ - 243
+ - 324
+ - !LinearWarmup
+ start_factor: 0.
+ steps: 4000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0005
+ type: L2
diff --git a/configs/ppyolo/_base_/optimizer_2x.yml b/configs/ppyolo/_base_/optimizer_2x.yml
new file mode 100644
index 0000000..92ddbf2
--- /dev/null
+++ b/configs/ppyolo/_base_/optimizer_2x.yml
@@ -0,0 +1,21 @@
+epoch: 811
+
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones:
+ - 649
+ - 730
+ - !LinearWarmup
+ start_factor: 0.
+ steps: 4000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0005
+ type: L2
diff --git a/configs/ppyolo/_base_/optimizer_365e.yml b/configs/ppyolo/_base_/optimizer_365e.yml
new file mode 100644
index 0000000..d834a4c
--- /dev/null
+++ b/configs/ppyolo/_base_/optimizer_365e.yml
@@ -0,0 +1,21 @@
+epoch: 365
+
+LearningRate:
+ base_lr: 0.005
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones:
+ - 243
+ - !LinearWarmup
+ start_factor: 0.
+ steps: 4000
+
+OptimizerBuilder:
+ clip_grad_by_norm: 35.
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0005
+ type: L2
diff --git a/configs/ppyolo/_base_/optimizer_650e.yml b/configs/ppyolo/_base_/optimizer_650e.yml
new file mode 100644
index 0000000..79a1f98
--- /dev/null
+++ b/configs/ppyolo/_base_/optimizer_650e.yml
@@ -0,0 +1,22 @@
+epoch: 650
+
+LearningRate:
+ base_lr: 0.005
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones:
+ - 430
+ - 540
+ - 610
+ - !LinearWarmup
+ start_factor: 0.
+ steps: 4000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0005
+ type: L2
diff --git a/configs/ppyolo/_base_/ppyolo_mbv3_large.yml b/configs/ppyolo/_base_/ppyolo_mbv3_large.yml
new file mode 100644
index 0000000..0faaa9a
--- /dev/null
+++ b/configs/ppyolo/_base_/ppyolo_mbv3_large.yml
@@ -0,0 +1,56 @@
+architecture: YOLOv3
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_large_x1_0_ssld_pretrained.pdparams
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+
+YOLOv3:
+ backbone: MobileNetV3
+ neck: PPYOLOFPN
+ yolo_head: YOLOv3Head
+ post_process: BBoxPostProcess
+
+MobileNetV3:
+ model_name: large
+ scale: 1.
+ with_extra_blocks: false
+ extra_block_filters: []
+ feature_maps: [13, 16]
+
+PPYOLOFPN:
+ in_channels: [160, 368]
+ coord_conv: true
+ conv_block_num: 0
+ spp: true
+ drop_block: true
+
+YOLOv3Head:
+ anchors: [[11, 18], [34, 47], [51, 126],
+ [115, 71], [120, 195], [254, 235]]
+ anchor_masks: [[3, 4, 5], [0, 1, 2]]
+ loss: YOLOv3Loss
+
+YOLOv3Loss:
+ ignore_thresh: 0.5
+ downsample: [32, 16]
+ label_smooth: false
+ scale_x_y: 1.05
+ iou_loss: IouLoss
+
+IouLoss:
+ loss_weight: 2.5
+ loss_square: true
+
+BBoxPostProcess:
+ decode:
+ name: YOLOBox
+ conf_thresh: 0.005
+ downsample_ratio: 32
+ clip_bbox: true
+ scale_x_y: 1.05
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 100
+ nms_threshold: 0.45
+ nms_top_k: 1000
+ score_threshold: 0.005
diff --git a/configs/ppyolo/_base_/ppyolo_mbv3_small.yml b/configs/ppyolo/_base_/ppyolo_mbv3_small.yml
new file mode 100644
index 0000000..dda9382
--- /dev/null
+++ b/configs/ppyolo/_base_/ppyolo_mbv3_small.yml
@@ -0,0 +1,56 @@
+architecture: YOLOv3
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_small_x1_0_ssld_pretrained.pdparams
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+
+YOLOv3:
+ backbone: MobileNetV3
+ neck: PPYOLOFPN
+ yolo_head: YOLOv3Head
+ post_process: BBoxPostProcess
+
+MobileNetV3:
+ model_name: small
+ scale: 1.
+ with_extra_blocks: false
+ extra_block_filters: []
+ feature_maps: [9, 12]
+
+PPYOLOFPN:
+ in_channels: [96, 304]
+ coord_conv: true
+ conv_block_num: 0
+ spp: true
+ drop_block: true
+
+YOLOv3Head:
+ anchors: [[11, 18], [34, 47], [51, 126],
+ [115, 71], [120, 195], [254, 235]]
+ anchor_masks: [[3, 4, 5], [0, 1, 2]]
+ loss: YOLOv3Loss
+
+YOLOv3Loss:
+ ignore_thresh: 0.5
+ downsample: [32, 16]
+ label_smooth: false
+ scale_x_y: 1.05
+ iou_loss: IouLoss
+
+IouLoss:
+ loss_weight: 2.5
+ loss_square: true
+
+BBoxPostProcess:
+ decode:
+ name: YOLOBox
+ conf_thresh: 0.005
+ downsample_ratio: 32
+ clip_bbox: true
+ scale_x_y: 1.05
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 100
+ nms_threshold: 0.45
+ nms_top_k: 1000
+ score_threshold: 0.005
diff --git a/configs/ppyolo/_base_/ppyolo_r18vd.yml b/configs/ppyolo/_base_/ppyolo_r18vd.yml
new file mode 100644
index 0000000..56a3483
--- /dev/null
+++ b/configs/ppyolo/_base_/ppyolo_r18vd.yml
@@ -0,0 +1,57 @@
+architecture: YOLOv3
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet18_vd_pretrained.pdparams
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+
+YOLOv3:
+ backbone: ResNet
+ neck: PPYOLOFPN
+ yolo_head: YOLOv3Head
+ post_process: BBoxPostProcess
+
+ResNet:
+ depth: 18
+ variant: d
+ return_idx: [2, 3]
+ freeze_at: -1
+ freeze_norm: false
+ norm_decay: 0.
+
+PPYOLOFPN:
+ drop_block: true
+ block_size: 3
+ keep_prob: 0.9
+ conv_block_num: 0
+
+YOLOv3Head:
+ anchor_masks: [[3, 4, 5], [0, 1, 2]]
+ anchors: [[10, 14], [23, 27], [37, 58],
+ [81, 82], [135, 169], [344, 319]]
+ loss: YOLOv3Loss
+
+YOLOv3Loss:
+ ignore_thresh: 0.7
+ downsample: [32, 16]
+ label_smooth: false
+ scale_x_y: 1.05
+ iou_loss: IouLoss
+
+IouLoss:
+ loss_weight: 2.5
+ loss_square: true
+
+BBoxPostProcess:
+ decode:
+ name: YOLOBox
+ conf_thresh: 0.01
+ downsample_ratio: 32
+ clip_bbox: true
+ scale_x_y: 1.05
+ nms:
+ name: MatrixNMS
+ keep_top_k: 100
+ score_threshold: 0.01
+ post_threshold: 0.01
+ nms_top_k: -1
+ background_label: -1
diff --git a/configs/ppyolo/_base_/ppyolo_r50vd_dcn.yml b/configs/ppyolo/_base_/ppyolo_r50vd_dcn.yml
new file mode 100644
index 0000000..22cad95
--- /dev/null
+++ b/configs/ppyolo/_base_/ppyolo_r50vd_dcn.yml
@@ -0,0 +1,66 @@
+architecture: YOLOv3
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+
+YOLOv3:
+ backbone: ResNet
+ neck: PPYOLOFPN
+ yolo_head: YOLOv3Head
+ post_process: BBoxPostProcess
+
+ResNet:
+ depth: 50
+ variant: d
+ return_idx: [1, 2, 3]
+ dcn_v2_stages: [3]
+ freeze_at: -1
+ freeze_norm: false
+ norm_decay: 0.
+
+PPYOLOFPN:
+ coord_conv: true
+ drop_block: true
+ block_size: 3
+ keep_prob: 0.9
+ spp: true
+
+YOLOv3Head:
+ anchors: [[10, 13], [16, 30], [33, 23],
+ [30, 61], [62, 45], [59, 119],
+ [116, 90], [156, 198], [373, 326]]
+ anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+ loss: YOLOv3Loss
+ iou_aware: true
+ iou_aware_factor: 0.4
+
+YOLOv3Loss:
+ ignore_thresh: 0.7
+ downsample: [32, 16, 8]
+ label_smooth: false
+ scale_x_y: 1.05
+ iou_loss: IouLoss
+ iou_aware_loss: IouAwareLoss
+
+IouLoss:
+ loss_weight: 2.5
+ loss_square: true
+
+IouAwareLoss:
+ loss_weight: 1.0
+
+BBoxPostProcess:
+ decode:
+ name: YOLOBox
+ conf_thresh: 0.01
+ downsample_ratio: 32
+ clip_bbox: true
+ scale_x_y: 1.05
+ nms:
+ name: MatrixNMS
+ keep_top_k: 100
+ score_threshold: 0.01
+ post_threshold: 0.01
+ nms_top_k: -1
+ background_label: -1
diff --git a/configs/ppyolo/_base_/ppyolo_reader.yml b/configs/ppyolo/_base_/ppyolo_reader.yml
new file mode 100644
index 0000000..0e9e0cc
--- /dev/null
+++ b/configs/ppyolo/_base_/ppyolo_reader.yml
@@ -0,0 +1,43 @@
+worker_num: 2
+TrainReader:
+ inputs_def:
+ num_max_boxes: 50
+ sample_transforms:
+ - Decode: {}
+ - Mixup: {alpha: 1.5, beta: 1.5}
+ - RandomDistort: {}
+ - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+ - RandomCrop: {}
+ - RandomFlip: {}
+ batch_transforms:
+ - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608], random_size: True, random_interp: True, keep_ratio: False}
+ - NormalizeBox: {}
+ - PadBox: {num_max_boxes: 50}
+ - BboxXYXY2XYWH: {}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - Permute: {}
+ - Gt2YoloTarget: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]], downsample_ratios: [32, 16, 8]}
+ batch_size: 24
+ shuffle: true
+ drop_last: true
+ mixup_epoch: 25000
+ use_shared_memory: true
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [608, 608], keep_ratio: False, interp: 2}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - Permute: {}
+ batch_size: 8
+ drop_empty: false
+
+TestReader:
+ inputs_def:
+ image_shape: [3, 608, 608]
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [608, 608], keep_ratio: False, interp: 2}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - Permute: {}
+ batch_size: 1
diff --git a/configs/ppyolo/_base_/ppyolo_tiny.yml b/configs/ppyolo/_base_/ppyolo_tiny.yml
new file mode 100644
index 0000000..d03e2bb
--- /dev/null
+++ b/configs/ppyolo/_base_/ppyolo_tiny.yml
@@ -0,0 +1,55 @@
+architecture: YOLOv3
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_large_x0_5_pretrained.pdparams
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+
+YOLOv3:
+ backbone: MobileNetV3
+ neck: PPYOLOTinyFPN
+ yolo_head: YOLOv3Head
+ post_process: BBoxPostProcess
+
+MobileNetV3:
+ model_name: large
+ scale: .5
+ with_extra_blocks: false
+ extra_block_filters: []
+ feature_maps: [7, 13, 16]
+
+PPYOLOTinyFPN:
+ detection_block_channels: [160, 128, 96]
+ spp: true
+ drop_block: true
+
+YOLOv3Head:
+ anchors: [[10, 15], [24, 36], [72, 42],
+ [35, 87], [102, 96], [60, 170],
+ [220, 125], [128, 222], [264, 266]]
+ anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+ loss: YOLOv3Loss
+
+YOLOv3Loss:
+ ignore_thresh: 0.5
+ downsample: [32, 16, 8]
+ label_smooth: false
+ scale_x_y: 1.05
+ iou_loss: IouLoss
+
+IouLoss:
+ loss_weight: 2.5
+ loss_square: true
+
+BBoxPostProcess:
+ decode:
+ name: YOLOBox
+ conf_thresh: 0.005
+ downsample_ratio: 32
+ clip_bbox: true
+ scale_x_y: 1.05
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 100
+ nms_threshold: 0.45
+ nms_top_k: 1000
+ score_threshold: 0.005
diff --git a/configs/ppyolo/_base_/ppyolo_tiny_reader.yml b/configs/ppyolo/_base_/ppyolo_tiny_reader.yml
new file mode 100644
index 0000000..4cbc090
--- /dev/null
+++ b/configs/ppyolo/_base_/ppyolo_tiny_reader.yml
@@ -0,0 +1,43 @@
+worker_num: 4
+TrainReader:
+ inputs_def:
+ num_max_boxes: 100
+ sample_transforms:
+ - Decode: {}
+ - Mixup: {alpha: 1.5, beta: 1.5}
+ - RandomDistort: {}
+ - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+ - RandomCrop: {}
+ - RandomFlip: {}
+ batch_transforms:
+ - BatchRandomResize: {target_size: [192, 224, 256, 288, 320, 352, 384, 416, 448, 480, 512], random_size: True, random_interp: True, keep_ratio: False}
+ - NormalizeBox: {}
+ - PadBox: {num_max_boxes: 100}
+ - BboxXYXY2XYWH: {}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - Permute: {}
+ - Gt2YoloTarget: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 15], [24, 36], [72, 42], [35, 87], [102, 96], [60, 170], [220, 125], [128, 222], [264, 266]], downsample_ratios: [32, 16, 8]}
+ batch_size: 32
+ shuffle: true
+ drop_last: true
+ mixup_epoch: 500
+ use_shared_memory: true
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [320, 320], keep_ratio: False, interp: 2}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - Permute: {}
+ batch_size: 8
+ drop_empty: false
+
+TestReader:
+ inputs_def:
+ image_shape: [3, 320, 320]
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [320, 320], keep_ratio: False, interp: 2}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - Permute: {}
+ batch_size: 1
diff --git a/configs/ppyolo/_base_/ppyolov2_r50vd_dcn.yml b/configs/ppyolo/_base_/ppyolov2_r50vd_dcn.yml
new file mode 100644
index 0000000..6288ade
--- /dev/null
+++ b/configs/ppyolo/_base_/ppyolov2_r50vd_dcn.yml
@@ -0,0 +1,65 @@
+architecture: YOLOv3
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+
+YOLOv3:
+ backbone: ResNet
+ neck: PPYOLOPAN
+ yolo_head: YOLOv3Head
+ post_process: BBoxPostProcess
+
+ResNet:
+ depth: 50
+ variant: d
+ return_idx: [1, 2, 3]
+ dcn_v2_stages: [3]
+ freeze_at: -1
+ freeze_norm: false
+ norm_decay: 0.
+
+PPYOLOPAN:
+ drop_block: true
+ block_size: 3
+ keep_prob: 0.9
+ spp: true
+
+YOLOv3Head:
+ anchors: [[10, 13], [16, 30], [33, 23],
+ [30, 61], [62, 45], [59, 119],
+ [116, 90], [156, 198], [373, 326]]
+ anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+ loss: YOLOv3Loss
+ iou_aware: true
+ iou_aware_factor: 0.5
+
+YOLOv3Loss:
+ ignore_thresh: 0.7
+ downsample: [32, 16, 8]
+ label_smooth: false
+ scale_x_y: 1.05
+ iou_loss: IouLoss
+ iou_aware_loss: IouAwareLoss
+
+IouLoss:
+ loss_weight: 2.5
+ loss_square: true
+
+IouAwareLoss:
+ loss_weight: 1.0
+
+BBoxPostProcess:
+ decode:
+ name: YOLOBox
+ conf_thresh: 0.01
+ downsample_ratio: 32
+ clip_bbox: true
+ scale_x_y: 1.05
+ nms:
+ name: MatrixNMS
+ keep_top_k: 100
+ score_threshold: 0.01
+ post_threshold: 0.01
+ nms_top_k: -1
+ background_label: -1
diff --git a/configs/ppyolo/_base_/ppyolov2_reader.yml b/configs/ppyolo/_base_/ppyolov2_reader.yml
new file mode 100644
index 0000000..7472531
--- /dev/null
+++ b/configs/ppyolo/_base_/ppyolov2_reader.yml
@@ -0,0 +1,43 @@
+worker_num: 8
+TrainReader:
+ inputs_def:
+ num_max_boxes: 100
+ sample_transforms:
+ - Decode: {}
+ - Mixup: {alpha: 1.5, beta: 1.5}
+ - RandomDistort: {}
+ - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+ - RandomCrop: {}
+ - RandomFlip: {}
+ batch_transforms:
+ - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768], random_size: True, random_interp: True, keep_ratio: False}
+ - NormalizeBox: {}
+ - PadBox: {num_max_boxes: 100}
+ - BboxXYXY2XYWH: {}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - Permute: {}
+ - Gt2YoloTarget: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]], downsample_ratios: [32, 16, 8]}
+ batch_size: 12
+ shuffle: true
+ drop_last: true
+ mixup_epoch: 25000
+ use_shared_memory: true
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - Permute: {}
+ batch_size: 8
+ drop_empty: false
+
+TestReader:
+ inputs_def:
+ image_shape: [3, 640, 640]
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - Permute: {}
+ batch_size: 1
diff --git a/configs/ppyolo/ppyolo_mbv3_large_coco.yml b/configs/ppyolo/ppyolo_mbv3_large_coco.yml
new file mode 100644
index 0000000..d51696d
--- /dev/null
+++ b/configs/ppyolo/ppyolo_mbv3_large_coco.yml
@@ -0,0 +1,82 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ './_base_/ppyolo_mbv3_large.yml',
+ './_base_/optimizer_1x.yml',
+ './_base_/ppyolo_reader.yml',
+]
+
+snapshot_epoch: 10
+weights: output/ppyolo_mbv3_large_coco/model_final
+
+TrainReader:
+ inputs_def:
+ num_max_boxes: 90
+ sample_transforms:
+ - Decode: {}
+ - Mixup: {alpha: 1.5, beta: 1.5}
+ - RandomDistort: {}
+ - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+ - RandomCrop: {}
+ - RandomFlip: {}
+ batch_transforms:
+ - BatchRandomResize:
+ target_size: [224, 256, 288, 320, 352, 384, 416, 448, 480, 512]
+ random_size: True
+ random_interp: True
+ keep_ratio: False
+ - NormalizeBox: {}
+ - PadBox: {num_max_boxes: 90}
+ - BboxXYXY2XYWH: {}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - Permute: {}
+ - Gt2YoloTarget:
+ anchor_masks: [[3, 4, 5], [0, 1, 2]]
+ anchors: [[11, 18], [34, 47], [51, 126], [115, 71], [120, 195], [254, 235]]
+ downsample_ratios: [32, 16]
+ iou_thresh: 0.25
+ num_classes: 80
+ batch_size: 32
+ mixup_epoch: 200
+ shuffle: true
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [320, 320], keep_ratio: False, interp: 2}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - Permute: {}
+ batch_size: 8
+ drop_empty: false
+
+TestReader:
+ inputs_def:
+ image_shape: [3, 320, 320]
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [320, 320], keep_ratio: False, interp: 2}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - Permute: {}
+ batch_size: 1
+
+epoch: 270
+
+LearningRate:
+ base_lr: 0.005
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones:
+ - 162
+ - 216
+ - !LinearWarmup
+ start_factor: 0.
+ steps: 4000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0005
+ type: L2
diff --git a/configs/ppyolo/ppyolo_mbv3_small_coco.yml b/configs/ppyolo/ppyolo_mbv3_small_coco.yml
new file mode 100644
index 0000000..6dba297
--- /dev/null
+++ b/configs/ppyolo/ppyolo_mbv3_small_coco.yml
@@ -0,0 +1,82 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ './_base_/ppyolo_mbv3_small.yml',
+ './_base_/optimizer_1x.yml',
+ './_base_/ppyolo_reader.yml',
+]
+
+snapshot_epoch: 10
+weights: output/ppyolo_mbv3_small_coco/model_final
+
+TrainReader:
+ inputs_def:
+ num_max_boxes: 90
+ sample_transforms:
+ - Decode: {}
+ - Mixup: {alpha: 1.5, beta: 1.5}
+ - RandomDistort: {}
+ - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+ - RandomCrop: {}
+ - RandomFlip: {}
+ batch_transforms:
+ - BatchRandomResize:
+ target_size: [224, 256, 288, 320, 352, 384, 416, 448, 480, 512]
+ random_size: True
+ random_interp: True
+ keep_ratio: False
+ - NormalizeBox: {}
+ - PadBox: {num_max_boxes: 90}
+ - BboxXYXY2XYWH: {}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - Permute: {}
+ - Gt2YoloTarget:
+ anchor_masks: [[3, 4, 5], [0, 1, 2]]
+ anchors: [[11, 18], [34, 47], [51, 126], [115, 71], [120, 195], [254, 235]]
+ downsample_ratios: [32, 16]
+ iou_thresh: 0.25
+ num_classes: 80
+ batch_size: 32
+ mixup_epoch: 200
+ shuffle: true
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [320, 320], keep_ratio: False, interp: 2}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - Permute: {}
+ batch_size: 8
+ drop_empty: false
+
+TestReader:
+ inputs_def:
+ image_shape: [3, 320, 320]
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [320, 320], keep_ratio: False, interp: 2}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - Permute: {}
+ batch_size: 1
+
+epoch: 270
+
+LearningRate:
+ base_lr: 0.005
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones:
+ - 162
+ - 216
+ - !LinearWarmup
+ start_factor: 0.
+ steps: 4000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0005
+ type: L2
diff --git a/configs/ppyolo/ppyolo_r18vd_coco.yml b/configs/ppyolo/ppyolo_r18vd_coco.yml
new file mode 100644
index 0000000..c15800e
--- /dev/null
+++ b/configs/ppyolo/ppyolo_r18vd_coco.yml
@@ -0,0 +1,82 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ './_base_/ppyolo_r18vd.yml',
+ './_base_/optimizer_1x.yml',
+ './_base_/ppyolo_reader.yml',
+]
+
+snapshot_epoch: 10
+weights: output/ppyolo_r18vd_coco/model_final
+
+TrainReader:
+ sample_transforms:
+ - Decode: {}
+ - Mixup: {alpha: 1.5, beta: 1.5}
+ - RandomDistort: {}
+ - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+ - RandomCrop: {}
+ - RandomFlip: {}
+ batch_transforms:
+ - BatchRandomResize:
+ target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
+ random_size: True
+ random_interp: True
+ keep_ratio: False
+ - NormalizeBox: {}
+ - PadBox: {num_max_boxes: 50}
+ - BboxXYXY2XYWH: {}
+ - NormalizeImage:
+ mean: [0.485, 0.456, 0.406]
+ std: [0.229, 0.224, 0.225]
+ is_scale: True
+ - Permute: {}
+ - Gt2YoloTarget:
+ anchor_masks: [[3, 4, 5], [0, 1, 2]]
+ anchors: [[10, 14], [23, 27], [37, 58], [81, 82], [135, 169], [344, 319]]
+ downsample_ratios: [32, 16]
+
+ batch_size: 32
+ mixup_epoch: 500
+ shuffle: true
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [512, 512], keep_ratio: False, interp: 2}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - Permute: {}
+ batch_size: 8
+ drop_empty: false
+
+TestReader:
+ inputs_def:
+ image_shape: [3, 512, 512]
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [512, 512], keep_ratio: False, interp: 2}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - Permute: {}
+ batch_size: 1
+
+epoch: 270
+
+LearningRate:
+ base_lr: 0.004
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones:
+ - 162
+ - 216
+ - !LinearWarmup
+ start_factor: 0.
+ steps: 4000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0005
+ type: L2
diff --git a/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml b/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml
new file mode 100644
index 0000000..918f340
--- /dev/null
+++ b/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml
@@ -0,0 +1,10 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ './_base_/ppyolo_r50vd_dcn.yml',
+ './_base_/optimizer_1x.yml',
+ './_base_/ppyolo_reader.yml',
+]
+
+snapshot_epoch: 16
+weights: output/ppyolo_r50vd_dcn_1x_coco/model_final
diff --git a/configs/ppyolo/ppyolo_r50vd_dcn_1x_minicoco.yml b/configs/ppyolo/ppyolo_r50vd_dcn_1x_minicoco.yml
new file mode 100644
index 0000000..87b976b
--- /dev/null
+++ b/configs/ppyolo/ppyolo_r50vd_dcn_1x_minicoco.yml
@@ -0,0 +1,44 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ './_base_/ppyolo_r50vd_dcn.yml',
+ './_base_/optimizer_1x.yml',
+ './_base_/ppyolo_reader.yml',
+]
+
+snapshot_epoch: 8
+use_ema: true
+weights: output/ppyolo_r50vd_dcn_1x_minicoco/model_final
+
+TrainReader:
+ batch_size: 12
+
+TrainDataset:
+ !COCODataSet
+ image_dir: train2017
+ # refer to https://github.com/giddyyupp/coco-minitrain
+ anno_path: annotations/instances_minitrain2017.json
+ dataset_dir: dataset/coco
+ data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+epoch: 192
+
+LearningRate:
+ base_lr: 0.005
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones:
+ - 153
+ - 173
+ - !LinearWarmup
+ start_factor: 0.
+ steps: 4000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0005
+ type: L2
diff --git a/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml b/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml
new file mode 100644
index 0000000..ac6531f
--- /dev/null
+++ b/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml
@@ -0,0 +1,10 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ './_base_/ppyolo_r50vd_dcn.yml',
+ './_base_/optimizer_2x.yml',
+ './_base_/ppyolo_reader.yml',
+]
+
+snapshot_epoch: 16
+weights: output/ppyolo_r50vd_dcn_2x_coco/model_final
diff --git a/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml b/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml
new file mode 100644
index 0000000..eac22ce
--- /dev/null
+++ b/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml
@@ -0,0 +1,40 @@
+_BASE_: [
+ '../datasets/voc.yml',
+ '../runtime.yml',
+ './_base_/ppyolo_r50vd_dcn.yml',
+ './_base_/optimizer_1x.yml',
+ './_base_/ppyolo_reader.yml',
+]
+
+snapshot_epoch: 83
+weights: output/ppyolo_r50vd_dcn_voc/model_final
+
+TrainReader:
+ mixup_epoch: 350
+ batch_size: 12
+
+EvalReader:
+ batch_transforms:
+ - PadBatch: {pad_gt: True}
+
+epoch: 583
+
+LearningRate:
+ base_lr: 0.00333
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones:
+ - 466
+ - 516
+ - !LinearWarmup
+ start_factor: 0.
+ steps: 4000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0005
+ type: L2
diff --git a/configs/ppyolo/ppyolo_test.yml b/configs/ppyolo/ppyolo_test.yml
new file mode 100644
index 0000000..928f1c9
--- /dev/null
+++ b/configs/ppyolo/ppyolo_test.yml
@@ -0,0 +1,15 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ './_base_/ppyolo_r50vd_dcn.yml',
+ './_base_/ppyolo_1x.yml',
+ './_base_/ppyolo_reader.yml',
+]
+
+snapshot_epoch: 16
+
+EvalDataset:
+ !COCODataSet
+ image_dir: test2017
+ anno_path: annotations/image_info_test-dev2017.json
+ dataset_dir: dataset/coco
diff --git a/configs/ppyolo/ppyolo_tiny_650e_coco.yml b/configs/ppyolo/ppyolo_tiny_650e_coco.yml
new file mode 100644
index 0000000..288a0eb
--- /dev/null
+++ b/configs/ppyolo/ppyolo_tiny_650e_coco.yml
@@ -0,0 +1,10 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ './_base_/ppyolo_tiny.yml',
+ './_base_/optimizer_650e.yml',
+ './_base_/ppyolo_tiny_reader.yml',
+]
+
+snapshot_epoch: 1
+weights: output/ppyolo_tiny_650e_coco/model_final
diff --git a/configs/ppyolo/ppyolov2_r101vd_dcn_365e_coco.yml b/configs/ppyolo/ppyolov2_r101vd_dcn_365e_coco.yml
new file mode 100644
index 0000000..0f1aee7
--- /dev/null
+++ b/configs/ppyolo/ppyolov2_r101vd_dcn_365e_coco.yml
@@ -0,0 +1,20 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ './_base_/ppyolov2_r50vd_dcn.yml',
+ './_base_/optimizer_365e.yml',
+ './_base_/ppyolov2_reader.yml',
+]
+
+snapshot_epoch: 8
+weights: output/ppyolov2_r101vd_dcn_365e_coco/model_final
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_ssld_pretrained.pdparams
+
+ResNet:
+ depth: 101
+ variant: d
+ return_idx: [1, 2, 3]
+ dcn_v2_stages: [3]
+ freeze_at: -1
+ freeze_norm: false
+ norm_decay: 0.
diff --git a/configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml b/configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml
new file mode 100644
index 0000000..a5e1bc3
--- /dev/null
+++ b/configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml
@@ -0,0 +1,10 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ './_base_/ppyolov2_r50vd_dcn.yml',
+ './_base_/optimizer_365e.yml',
+ './_base_/ppyolov2_reader.yml',
+]
+
+snapshot_epoch: 8
+weights: output/ppyolov2_r50vd_dcn_365e_coco/model_final
diff --git a/configs/rcnn_enhance/README.md b/configs/rcnn_enhance/README.md
new file mode 100644
index 0000000..4a53da5
--- /dev/null
+++ b/configs/rcnn_enhance/README.md
@@ -0,0 +1,12 @@
+## 服务器端实用目标检测方案
+
+### 简介
+
+* 近年来,学术界和工业界广泛关注图像中目标检测任务。基于[PaddleClas](https://github.com/PaddlePaddle/PaddleClas)中SSLD蒸馏方案训练得到的ResNet50_vd预训练模型(ImageNet1k验证集上Top1 Acc为82.39%),结合PaddleDetection中的丰富算子,飞桨提供了一种面向服务器端实用的目标检测方案PSS-DET(Practical Server Side Detection)。基于COCO2017目标检测数据集,V100单卡预测速度为为61FPS时,COCO mAP可达41.2%。
+
+
+### 模型库
+
+| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | Mask AP | 下载 | 配置文件 |
+| :---------------------- | :-------------: | :-------: | :-----: | :------------: | :----: | :-----: | :-------------: | :-----: |
+| ResNet50-vd-FPN-Dcnv2 | Faster | 2 | 3x | 61.425 | 41.5 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_enhance_3x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/rcnn_enhance/faster_rcnn_enhance_3x_coco.yml) |
diff --git a/configs/rcnn_enhance/_base_/faster_rcnn_enhance.yml b/configs/rcnn_enhance/_base_/faster_rcnn_enhance.yml
new file mode 100644
index 0000000..d47fd2c
--- /dev/null
+++ b/configs/rcnn_enhance/_base_/faster_rcnn_enhance.yml
@@ -0,0 +1,81 @@
+architecture: FasterRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
+
+FasterRCNN:
+ backbone: ResNet
+ neck: FPN
+ rpn_head: RPNHead
+ bbox_head: BBoxHead
+ # post process
+ bbox_post_process: BBoxPostProcess
+
+
+ResNet:
+ # index 0 stands for res2
+ depth: 50
+ norm_type: bn
+ variant: d
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+ dcn_v2_stages: [1,2,3]
+ lr_mult_list: [0.05, 0.05, 0.1, 0.15]
+
+FPN:
+ in_channels: [256, 512, 1024, 2048]
+ out_channel: 64
+
+RPNHead:
+ anchor_generator:
+ aspect_ratios: [0.5, 1.0, 2.0]
+ anchor_sizes: [[32], [64], [128], [256], [512]]
+ strides: [4, 8, 16, 32, 64]
+ rpn_target_assign:
+ batch_size_per_im: 256
+ fg_fraction: 0.5
+ negative_overlap: 0.3
+ positive_overlap: 0.7
+ use_random: True
+ train_proposal:
+ min_size: 0.0
+ nms_thresh: 0.7
+ pre_nms_top_n: 2000
+ post_nms_top_n: 2000
+ topk_after_collect: True
+ test_proposal:
+ min_size: 0.0
+ nms_thresh: 0.7
+ pre_nms_top_n: 500
+ post_nms_top_n: 300
+
+
+BBoxHead:
+ head: TwoFCHead
+ roi_extractor:
+ resolution: 7
+ sampling_ratio: 0
+ aligned: True
+ bbox_assigner: BBoxLibraAssigner
+ bbox_loss: DIouLoss
+
+TwoFCHead:
+ out_channel: 1024
+
+BBoxLibraAssigner:
+ batch_size_per_im: 512
+ bg_thresh: 0.5
+ fg_thresh: 0.5
+ fg_fraction: 0.25
+ use_random: True
+
+DIouLoss:
+ loss_weight: 10.0
+ use_complete_iou_loss: true
+
+BBoxPostProcess:
+ decode: RCNNBox
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 100
+ score_threshold: 0.05
+ nms_threshold: 0.5
diff --git a/configs/rcnn_enhance/_base_/faster_rcnn_enhance_reader.yml b/configs/rcnn_enhance/_base_/faster_rcnn_enhance_reader.yml
new file mode 100644
index 0000000..da6ce65
--- /dev/null
+++ b/configs/rcnn_enhance/_base_/faster_rcnn_enhance_reader.yml
@@ -0,0 +1,41 @@
+worker_num: 2
+TrainReader:
+ sample_transforms:
+ - Decode: {}
+ - RandomResize: {target_size: [[384,1000], [416,1000], [448,1000], [480,1000], [512,1000], [544,1000], [576,1000], [608,1000], [640,1000], [672,1000]], interp: 2, keep_ratio: True}
+ - RandomFlip: {prob: 0.5}
+ - AutoAugment: {autoaug_type: v1}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 32, pad_gt: true}
+ batch_size: 2
+ shuffle: true
+ drop_last: true
+
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {interp: 2, target_size: [640, 640], keep_ratio: True}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 32, pad_gt: false}
+ batch_size: 1
+ shuffle: false
+ drop_last: false
+ drop_empty: false
+
+
+TestReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {interp: 2, target_size: [640, 640], keep_ratio: True}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 32, pad_gt: false}
+ batch_size: 1
+ shuffle: false
+ drop_last: false
diff --git a/configs/rcnn_enhance/_base_/optimizer_3x.yml b/configs/rcnn_enhance/_base_/optimizer_3x.yml
new file mode 100644
index 0000000..8bd85fa
--- /dev/null
+++ b/configs/rcnn_enhance/_base_/optimizer_3x.yml
@@ -0,0 +1,19 @@
+epoch: 36
+
+LearningRate:
+ base_lr: 0.02
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [24, 33]
+ - !LinearWarmup
+ start_factor: 0.
+ steps: 1000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0001
+ type: L2
diff --git a/configs/rcnn_enhance/faster_rcnn_enhance_3x_coco.yml b/configs/rcnn_enhance/faster_rcnn_enhance_3x_coco.yml
new file mode 100644
index 0000000..a49f245
--- /dev/null
+++ b/configs/rcnn_enhance/faster_rcnn_enhance_3x_coco.yml
@@ -0,0 +1,8 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_3x.yml',
+ '_base_/faster_rcnn_enhance.yml',
+ '_base_/faster_rcnn_enhance_reader.yml',
+]
+weights: output/faster_rcnn_enhance_r50_3x_coco/model_final
diff --git a/configs/runtime.yml b/configs/runtime.yml
new file mode 100644
index 0000000..4c8d0b4
--- /dev/null
+++ b/configs/runtime.yml
@@ -0,0 +1,4 @@
+use_gpu: false
+log_iter: 20
+save_dir: output
+snapshot_epoch: 1
diff --git a/configs/slim/README.md b/configs/slim/README.md
new file mode 100644
index 0000000..8a07b08
--- /dev/null
+++ b/configs/slim/README.md
@@ -0,0 +1,131 @@
+# 模型压缩
+
+在PaddleDetection中, 提供了基于[PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim)进行模型压缩的完整教程和benchmark。目前支持的方法:
+
+- [剪裁](prune)
+- [量化](quant)
+- [蒸馏](distill)
+- [联合策略](extensions)
+
+推荐您使用剪裁和蒸馏联合训练,或者使用剪裁和量化,进行检测模型压缩。 下面以YOLOv3为例,进行剪裁、蒸馏和量化实验。
+
+## 实验环境
+
+- Python 3.7+
+- PaddlePaddle >= 2.0.1
+- PaddleSlim >= 2.0.0
+- CUDA 9.0+
+- cuDNN >=7.5
+
+**注意:** 量化训练需要依赖Paddle develop分支,可在[PaddlePaddle每日版本](https://www.paddlepaddle.org.cn/documentation/docs/zh/install/Tables.html#whl-dev)中下载安装合适的PaddlePaddle版本。
+
+#### 安装PaddleSlim
+- 方法一:直接安装:
+```
+pip install paddleslim -i https://pypi.tuna.tsinghua.edu.cn/simple
+```
+- 方法二:编译安装:
+```
+git clone https://github.com/PaddlePaddle/PaddleSlim.git
+cd PaddleSlim
+python setup.py install
+```
+
+## 快速开始
+
+### 训练
+
+```shell
+python tools/train.py -c configs/{MODEL.yml} --slim_config configs/slim/{SLIM_CONFIG.yml}
+```
+
+- `-c`: 指定模型配置文件。
+- `--slim_config`: 指定压缩策略配置文件。
+
+
+### 评估
+
+```shell
+python tools/eval.py -c configs/{MODEL.yml} --slim_config configs/slim/{SLIM_CONFIG.yml} -o weights=output/{SLIM_CONFIG}/model_final
+```
+
+- `-c`: 指定模型配置文件。
+- `--slim_config`: 指定压缩策略配置文件。
+- `-o weights`: 指定压缩算法训好的模型路径。
+
+### 测试
+
+```shell
+python tools/infer.py -c configs/{MODEL.yml} --slim_config configs/slim/{SLIM_CONFIG.yml} \
+ -o weights=output/{SLIM_CONFIG}/model_final
+ --infer_img={IMAGE_PATH}
+```
+
+- `-c`: 指定模型配置文件。
+- `--slim_config`: 指定压缩策略配置文件。
+- `-o weights`: 指定压缩算法训好的模型路径。
+- `--infer_img`: 指定测试图像路径。
+
+
+### 动转静导出模型
+
+```shell
+python tools/export_model.py -c configs/{MODEL.yml} --slim_config configs/slim/{SLIM_CONFIG.yml} -o weights=output/{SLIM_CONFIG}/model_final
+```
+
+- `-c`: 指定模型配置文件。
+- `--slim_config`: 指定压缩策略配置文件。
+- `-o weights`: 指定压缩算法训好的模型路径。
+
+
+## Benchmark
+
+### 剪裁
+
+#### Pascal VOC上benchmark
+
+| 模型 | 压缩策略 | GFLOPs | 模型体积(MB) | 输入尺寸 | 预测时延(SD855)| Box AP | 下载 | 模型配置文件 | 压缩算法配置文件 |
+| :----------------| :-------: | :------------: | :-------------: | :------: | :--------: | :------: | :-----------------------------------------------------: |:-------------: | :------: |
+| YOLOv3-MobileNetV1 | baseline | 24.13 | 93 | 608 | 289.9ms | 75.1 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml) | - |
+| YOLOv3-MobileNetV1 | 剪裁-l1_norm(sensity) | 15.78(-34.49%) | 66(-29%) | 608 | - | 78.4(+3.3) | [下载链接](https://paddledet.bj.bcebos.com/models/slim/yolov3_mobilenet_v1_voc_prune_l1_norm.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml) | [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/slim/prune/yolov3_prune_l1_norm.yml) |
+
+- 目前剪裁支持YOLO系列、SSD、TTFNet、BlazeFace,其余模型正在开发支持中。
+- SD855预测时延为使用PaddleLite部署,使用arm8架构并使用4线程(4 Threads)推理时延。
+
+### 量化
+
+#### COCO上benchmark
+
+| 模型 | 压缩策略 | 输入尺寸 | Box AP | 下载 | 模型配置文件 | 压缩算法配置文件 |
+| ------------------ | ------------ | -------- | :---------: | :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: |
+| YOLOv3-MobileNetV1 | baseline | 608 | 28.8 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) | - |
+| YOLOv3-MobileNetV1 | 普通在线量化 | 608 | 30.5 (+1.7) | [下载链接](https://paddledet.bj.bcebos.com/models/slim/yolov3_mobilenet_v1_coco_qat.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) | [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/slim/quant/yolov3_mobilenet_v1_qat.yml) |
+| YOLOv3-MobileNetV3 | baseline | 608 | 31.4 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v3_large_270e_coco.yml) | - |
+| YOLOv3-MobileNetV3 | PACT在线量化 | 608 | 29.1 (-2.3) | [下载链接](https://paddledet.bj.bcebos.com/models/slim/yolov3_mobilenet_v3_coco_qat.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v3_large_270e_coco.yml) | [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/slim/quant/yolov3_mobilenet_v3_qat.yml) |
+| YOLOv3-DarkNet53 | baseline | 608 | 39.0 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_darknet53_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_darknet53_270e_coco.yml) | - |
+| YOLOv3-DarkNet53 | 普通在线量化 | 608 | 38.8 (-0.2) | [下载链接](https://paddledet.bj.bcebos.com/models/slim/yolov3_darknet_coco_qat.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_darknet53_270e_coco.yml) | [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/slim/quant/yolov3_darknet_qat.yml) |
+| SSD-MobileNet_v1 | baseline | 300 | 73.8 | [下载链接](https://paddledet.bj.bcebos.com/models/ssd_mobilenet_v1_300_120e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ssd/ssd_mobilenet_v1_300_120e_voc.yml) | - |
+| SSD-MobileNet_v1 | 普通在线量化 | 300 | 72.9(-0.9) | [下载链接](https://paddledet.bj.bcebos.com/models/slim/ssd_mobilenet_v1_300_voc_qat.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ssd/ssd_mobilenet_v1_300_120e_voc.yml) | [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/slim/quant/ssd_mobilenet_v1_qat.yml) |
+| Mask-ResNet50-FPN | baseline | (800, 1333) | 39.2/35.6 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.yml) | - |
+| Mask-ResNet50-FPN | 普通在线量化 | (800, 1333) | 39.7(+0.5)/35.9(+0.3) | [下载链接](https://paddledet.bj.bcebos.com/models/slim/mask_rcnn_r50_fpn_1x_qat.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.yml) | [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/slim/quant/mask_rcnn_r50_fpn_1x_qat.yml) |
+
+
+### 蒸馏
+
+#### COCO上benchmark
+
+| 模型 | 压缩策略 | 输入尺寸 | Box AP | 下载 | 模型配置文件 | 压缩算法配置文件 |
+| ------------------ | ------------ | -------- | :---------: | :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: |
+| YOLOv3-MobileNetV1 | baseline | 608 | 29.4 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) | - |
+| YOLOv3-MobileNetV1 | 蒸馏 | 608 | 31.0(+1.6) | [下载链接](https://paddledet.bj.bcebos.com/models/slim/yolov3_mobilenet_v1_coco_distill.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) | [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/slim/distill/yolov3_mobilenet_v1_coco_distill.yml) |
+
+- 具体蒸馏方法请参考[蒸馏策略文档](distill/README.md)
+
+### 蒸馏剪裁联合策略
+
+#### COCO上benchmark
+
+| 模型 | 压缩策略 | 输入尺寸 | GFLOPs | 模型体积(MB) | Box AP | 下载 | 模型配置文件 | 压缩算法配置文件 |
+| ------------------ | ------------ | -------- | :---------: |:---------: | :---------: |:----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: |
+| YOLOv3-MobileNetV1 | baseline | 608 | 24.65 | 94.6 | 29.4 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) | - |
+| YOLOv3-MobileNetV1 | 蒸馏+剪裁 | 608 | 7.54(-69.4%) | 32.0(-66.0%) | 28.4(-1.0) | [下载链接](https://paddledet.bj.bcebos.com/models/slim/yolov3_mobilenet_v1_coco_distill_prune.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) | [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/slim/extensions/yolov3_mobilenet_v1_coco_distill_prune.yml) |
diff --git a/configs/slim/distill/README.md b/configs/slim/distill/README.md
new file mode 100644
index 0000000..da57957
--- /dev/null
+++ b/configs/slim/distill/README.md
@@ -0,0 +1,18 @@
+# Distillation(蒸馏)
+
+## YOLOv3模型蒸馏
+以YOLOv3-MobileNetV1为例,使用YOLOv3-ResNet34作为蒸馏训练的teacher网络, 对YOLOv3-MobileNetV1结构的student网络进行蒸馏。
+COCO数据集作为目标检测任务的训练目标难度更大,意味着teacher网络会预测出更多的背景bbox,如果直接用teacher的预测输出作为student学习的`soft label`会有严重的类别不均衡问题。解决这个问题需要引入新的方法,详细背景请参考论文:[Object detection at 200 Frames Per Second](https://arxiv.org/abs/1805.06361)。
+为了确定蒸馏的对象,我们首先需要找到student和teacher网络得到的`x,y,w,h,cls,objness`等Tensor,用teacher得到的结果指导student训练。具体实现可参考[代码](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/ppdet/slim/distill.py)
+
+## Citations
+```
+@article{mehta2018object,
+ title={Object detection at 200 Frames Per Second},
+ author={Rakesh Mehta and Cemalettin Ozturk},
+ year={2018},
+ eprint={1805.06361},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
diff --git a/configs/slim/distill/yolov3_mobilenet_v1_coco_distill.yml b/configs/slim/distill/yolov3_mobilenet_v1_coco_distill.yml
new file mode 100644
index 0000000..9998dec
--- /dev/null
+++ b/configs/slim/distill/yolov3_mobilenet_v1_coco_distill.yml
@@ -0,0 +1,12 @@
+_BASE_: [
+ '../../yolov3/yolov3_r34_270e_coco.yml',
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_r34_270e_coco.pdparams
+
+
+slim: Distill
+distill_loss: DistillYOLOv3Loss
+
+DistillYOLOv3Loss:
+ weight: 1000
diff --git a/configs/slim/extensions/yolov3_mobilenet_v1_coco_distill_prune.yml b/configs/slim/extensions/yolov3_mobilenet_v1_coco_distill_prune.yml
new file mode 100644
index 0000000..f86fac5
--- /dev/null
+++ b/configs/slim/extensions/yolov3_mobilenet_v1_coco_distill_prune.yml
@@ -0,0 +1,24 @@
+_BASE_: [
+ '../../yolov3/yolov3_r34_270e_coco.yml',
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_r34_270e_coco.pdparams
+
+slim: DistillPrune
+
+distill_loss: DistillYOLOv3Loss
+
+DistillYOLOv3Loss:
+ weight: 1000
+
+pruner: Pruner
+
+Pruner:
+ criterion: l1_norm
+ pruned_params: ['conv2d_27.w_0', 'conv2d_28.w_0', 'conv2d_29.w_0',
+ 'conv2d_30.w_0', 'conv2d_31.w_0', 'conv2d_32.w_0',
+ 'conv2d_34.w_0', 'conv2d_35.w_0', 'conv2d_36.w_0',
+ 'conv2d_37.w_0', 'conv2d_38.w_0', 'conv2d_39.w_0',
+ 'conv2d_41.w_0', 'conv2d_42.w_0', 'conv2d_43.w_0',
+ 'conv2d_44.w_0', 'conv2d_45.w_0', 'conv2d_46.w_0']
+ pruned_ratios: [0.5,0.5,0.5,0.5,0.5,0.5,0.7,0.7,0.7,0.7,0.7,0.7,0.8,0.8,0.8,0.8,0.8,0.8]
diff --git a/configs/slim/prune/yolov3_prune_fpgm.yml b/configs/slim/prune/yolov3_prune_fpgm.yml
new file mode 100644
index 0000000..f374538
--- /dev/null
+++ b/configs/slim/prune/yolov3_prune_fpgm.yml
@@ -0,0 +1,14 @@
+# Weights of yolov3_mobilenet_v1_voc
+pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_voc.pdparams
+slim: Pruner
+
+Pruner:
+ criterion: fpgm
+ pruned_params: ['conv2d_27.w_0', 'conv2d_28.w_0', 'conv2d_29.w_0',
+ 'conv2d_30.w_0', 'conv2d_31.w_0', 'conv2d_32.w_0',
+ 'conv2d_34.w_0', 'conv2d_35.w_0', 'conv2d_36.w_0',
+ 'conv2d_37.w_0', 'conv2d_38.w_0', 'conv2d_39.w_0',
+ 'conv2d_41.w_0', 'conv2d_42.w_0', 'conv2d_43.w_0',
+ 'conv2d_44.w_0', 'conv2d_45.w_0', 'conv2d_46.w_0']
+ pruned_ratios: [0.1,0.2,0.2,0.2,0.2,0.1,0.2,0.3,0.3,0.3,0.2,0.1,0.3,0.4,0.4,0.4,0.4,0.3]
+ print_params: False
diff --git a/configs/slim/prune/yolov3_prune_l1_norm.yml b/configs/slim/prune/yolov3_prune_l1_norm.yml
new file mode 100644
index 0000000..5b4f466
--- /dev/null
+++ b/configs/slim/prune/yolov3_prune_l1_norm.yml
@@ -0,0 +1,14 @@
+# Weights of yolov3_mobilenet_v1_voc
+pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_voc.pdparams
+slim: Pruner
+
+Pruner:
+ criterion: l1_norm
+ pruned_params: ['conv2d_27.w_0', 'conv2d_28.w_0', 'conv2d_29.w_0',
+ 'conv2d_30.w_0', 'conv2d_31.w_0', 'conv2d_32.w_0',
+ 'conv2d_34.w_0', 'conv2d_35.w_0', 'conv2d_36.w_0',
+ 'conv2d_37.w_0', 'conv2d_38.w_0', 'conv2d_39.w_0',
+ 'conv2d_41.w_0', 'conv2d_42.w_0', 'conv2d_43.w_0',
+ 'conv2d_44.w_0', 'conv2d_45.w_0', 'conv2d_46.w_0']
+ pruned_ratios: [0.1,0.2,0.2,0.2,0.2,0.1,0.2,0.3,0.3,0.3,0.2,0.1,0.3,0.4,0.4,0.4,0.4,0.3]
+ print_params: False
diff --git a/configs/slim/quant/mask_rcnn_r50_fpn_1x_qat.yml b/configs/slim/quant/mask_rcnn_r50_fpn_1x_qat.yml
new file mode 100644
index 0000000..7363b4e
--- /dev/null
+++ b/configs/slim/quant/mask_rcnn_r50_fpn_1x_qat.yml
@@ -0,0 +1,22 @@
+pretrain_weights: https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_fpn_1x_coco.pdparams
+slim: QAT
+
+QAT:
+ quant_config: {
+ 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max',
+ 'weight_bits': 8, 'activation_bits': 8, 'dtype': 'int8', 'window_size': 10000, 'moving_rate': 0.9,
+ 'quantizable_layer_type': ['Conv2D', 'Linear']}
+ print_model: True
+
+
+epoch: 5
+
+LearningRate:
+ base_lr: 0.001
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [3, 4]
+ - !LinearWarmup
+ start_factor: 0.001
+ steps: 100
diff --git a/configs/slim/quant/ssd_mobilenet_v1_qat.yml b/configs/slim/quant/ssd_mobilenet_v1_qat.yml
new file mode 100644
index 0000000..05e0683
--- /dev/null
+++ b/configs/slim/quant/ssd_mobilenet_v1_qat.yml
@@ -0,0 +1,9 @@
+pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ssd_mobilenet_v1_300_120e_voc.pdparams
+slim: QAT
+
+QAT:
+ quant_config: {
+ 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max',
+ 'weight_bits': 8, 'activation_bits': 8, 'dtype': 'int8', 'window_size': 10000, 'moving_rate': 0.9,
+ 'quantizable_layer_type': ['Conv2D', 'Linear']}
+ print_model: True
diff --git a/configs/slim/quant/yolov3_darknet_qat.yml b/configs/slim/quant/yolov3_darknet_qat.yml
new file mode 100644
index 0000000..281b534
--- /dev/null
+++ b/configs/slim/quant/yolov3_darknet_qat.yml
@@ -0,0 +1,31 @@
+pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_darknet53_270e_coco.pdparams
+slim: QAT
+
+QAT:
+ quant_config: {
+ 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max',
+ 'weight_bits': 8, 'activation_bits': 8, 'dtype': 'int8', 'window_size': 10000, 'moving_rate': 0.9,
+ 'quantizable_layer_type': ['Conv2D', 'Linear']}
+ print_model: True
+
+epoch: 50
+
+LearningRate:
+ base_lr: 0.0001
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones:
+ - 30
+ - 45
+ - !LinearWarmup
+ start_factor: 0.
+ steps: 1000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0005
+ type: L2
diff --git a/configs/slim/quant/yolov3_mobilenet_v1_qat.yml b/configs/slim/quant/yolov3_mobilenet_v1_qat.yml
new file mode 100644
index 0000000..d145208
--- /dev/null
+++ b/configs/slim/quant/yolov3_mobilenet_v1_qat.yml
@@ -0,0 +1,10 @@
+# Weights of yolov3_mobilenet_v1_coco
+pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams
+slim: QAT
+
+QAT:
+ quant_config: {
+ 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max',
+ 'weight_bits': 8, 'activation_bits': 8, 'dtype': 'int8', 'window_size': 10000, 'moving_rate': 0.9,
+ 'quantizable_layer_type': ['Conv2D', 'Linear']}
+ print_model: True
diff --git a/configs/slim/quant/yolov3_mobilenet_v3_qat.yml b/configs/slim/quant/yolov3_mobilenet_v3_qat.yml
new file mode 100644
index 0000000..8126909
--- /dev/null
+++ b/configs/slim/quant/yolov3_mobilenet_v3_qat.yml
@@ -0,0 +1,24 @@
+# Weights of yolov3_mobilenet_v3_coco
+pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_270e_coco.pdparams
+slim: QAT
+
+QAT:
+ quant_config: {
+ 'weight_preprocess_type': 'PACT',
+ 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max',
+ 'weight_bits': 8, 'activation_bits': 8, 'dtype': 'int8', 'window_size': 10000, 'moving_rate': 0.9,
+ 'quantizable_layer_type': ['Conv2D', 'Linear']}
+ print_model: True
+
+epoch: 30
+LearningRate:
+ base_lr: 0.0001
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones:
+ - 25
+ - 28
+ - !LinearWarmup
+ start_factor: 0.
+ steps: 2000
diff --git a/configs/solov2/README.md b/configs/solov2/README.md
new file mode 100644
index 0000000..b3268df
--- /dev/null
+++ b/configs/solov2/README.md
@@ -0,0 +1,38 @@
+# SOLOv2 for instance segmentation
+
+## Introduction
+
+SOLOv2 (Segmenting Objects by Locations) is a fast instance segmentation framework with strong performance. We reproduced the model of the paper, and improved and optimized the accuracy and speed of the SOLOv2.
+
+**Highlights:**
+
+- Training Time: The training time of the model of `solov2_r50_fpn_1x` on Tesla v100 with 8 GPU is only 10 hours.
+
+## Model Zoo
+
+| Detector | Backbone | Multi-scale training | Lr schd | Mask APval | V100 FP32(FPS) | GPU | Download | Configs |
+| :-------: | :---------------------: | :-------------------: | :-----: | :--------------------: | :-------------: | :-----: | :---------: | :------------------------: |
+| YOLACT++ | R50-FPN | False | 80w iter | 34.1 (test-dev) | 33.5 | Xp | - | - |
+| CenterMask | R50-FPN | True | 2x | 36.4 | 13.9 | Xp | - | - |
+| CenterMask | V2-99-FPN | True | 3x | 40.2 | 8.9 | Xp | - | - |
+| PolarMask | R50-FPN | True | 2x | 30.5 | 9.4 | V100 | - | - |
+| BlendMask | R50-FPN | True | 3x | 37.8 | 13.5 | V100 | - | - |
+| SOLOv2 (Paper) | R50-FPN | False | 1x | 34.8 | 18.5 | V100 | - | - |
+| SOLOv2 (Paper) | X101-DCN-FPN | True | 3x | 42.4 | 5.9 | V100 | - | - |
+| SOLOv2 | R50-FPN | False | 1x | 35.5 | 21.9 | V100 | [model](https://paddledet.bj.bcebos.com/models/solov2_r50_fpn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/solov2/solov2_r50_fpn_1x_coco.yml) |
+| SOLOv2 | R50-FPN | True | 3x | 38.0 | 21.9 | V100 | [model](https://paddledet.bj.bcebos.com/models/solov2_r50_fpn_3x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/solov2/solov2_r50_fpn_3x_coco.yml) |
+
+**Notes:**
+
+- SOLOv2 is trained on COCO train2017 dataset and evaluated on val2017 results of `mAP(IoU=0.5:0.95)`.
+- SOLOv2 training performace is dependented on Paddle develop branch, performance reproduction shoule based on [Paddle daily version](https://www.paddlepaddle.org.cn/documentation/docs/zh/install/Tables.html#whl-dev) or Paddle 2.0.1(will be published on 2021.03), performace will loss slightly is training base on Paddle 2.0.0
+
+## Citations
+```
+@article{wang2020solov2,
+ title={SOLOv2: Dynamic, Faster and Stronger},
+ author={Wang, Xinlong and Zhang, Rufeng and Kong, Tao and Li, Lei and Shen, Chunhua},
+ journal={arXiv preprint arXiv:2003.10152},
+ year={2020}
+}
+```
diff --git a/configs/solov2/_base_/optimizer_1x.yml b/configs/solov2/_base_/optimizer_1x.yml
new file mode 100644
index 0000000..d034482
--- /dev/null
+++ b/configs/solov2/_base_/optimizer_1x.yml
@@ -0,0 +1,19 @@
+epoch: 12
+
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [8, 11]
+ - !LinearWarmup
+ start_factor: 0.
+ steps: 1000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0001
+ type: L2
diff --git a/configs/solov2/_base_/solov2_r50_fpn.yml b/configs/solov2/_base_/solov2_r50_fpn.yml
new file mode 100644
index 0000000..53ec3b2
--- /dev/null
+++ b/configs/solov2/_base_/solov2_r50_fpn.yml
@@ -0,0 +1,41 @@
+architecture: SOLOv2
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+SOLOv2:
+ backbone: ResNet
+ neck: FPN
+ solov2_head: SOLOv2Head
+ mask_head: SOLOv2MaskHead
+
+ResNet:
+ depth: 50
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [0,1,2,3]
+ num_stages: 4
+
+FPN:
+ out_channel: 256
+
+SOLOv2Head:
+ seg_feat_channels: 512
+ stacked_convs: 4
+ num_grids: [40, 36, 24, 16, 12]
+ kernel_out_channels: 256
+ solov2_loss: SOLOv2Loss
+ mask_nms: MaskMatrixNMS
+
+SOLOv2MaskHead:
+ mid_channels: 128
+ out_channels: 256
+ start_level: 0
+ end_level: 3
+
+SOLOv2Loss:
+ ins_loss_weight: 3.0
+ focal_loss_gamma: 2.0
+ focal_loss_alpha: 0.25
+
+MaskMatrixNMS:
+ pre_nms_top_n: 500
+ post_nms_top_n: 100
diff --git a/configs/solov2/_base_/solov2_reader.yml b/configs/solov2/_base_/solov2_reader.yml
new file mode 100644
index 0000000..cd980d7
--- /dev/null
+++ b/configs/solov2/_base_/solov2_reader.yml
@@ -0,0 +1,44 @@
+worker_num: 2
+TrainReader:
+ sample_transforms:
+ - Decode: {}
+ - Poly2Mask: {}
+ - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
+ - RandomFlip: {}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 32}
+ - Gt2Solov2Target: {num_grids: [40, 36, 24, 16, 12],
+ scale_ranges: [[1, 96], [48, 192], [96, 384], [192, 768], [384, 2048]],
+ coord_sigma: 0.2}
+ batch_size: 2
+ shuffle: true
+ drop_last: true
+
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 32}
+ batch_size: 1
+ shuffle: false
+ drop_last: false
+ drop_empty: false
+
+
+TestReader:
+ sample_transforms:
+ - Decode: {}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 32}
+ batch_size: 1
+ shuffle: false
+ drop_last: false
diff --git a/configs/solov2/solov2_r50_fpn_1x_coco.yml b/configs/solov2/solov2_r50_fpn_1x_coco.yml
new file mode 100644
index 0000000..e5f548d
--- /dev/null
+++ b/configs/solov2/solov2_r50_fpn_1x_coco.yml
@@ -0,0 +1,8 @@
+_BASE_: [
+ '../datasets/coco_instance.yml',
+ '../runtime.yml',
+ '_base_/solov2_r50_fpn.yml',
+ '_base_/optimizer_1x.yml',
+ '_base_/solov2_reader.yml',
+]
+weights: output/solov2_r50_fpn_1x_coco/model_final
diff --git a/configs/solov2/solov2_r50_fpn_3x_coco.yml b/configs/solov2/solov2_r50_fpn_3x_coco.yml
new file mode 100644
index 0000000..6ffff46
--- /dev/null
+++ b/configs/solov2/solov2_r50_fpn_3x_coco.yml
@@ -0,0 +1,38 @@
+_BASE_: [
+ '../datasets/coco_instance.yml',
+ '../runtime.yml',
+ '_base_/solov2_r50_fpn.yml',
+ '_base_/optimizer_1x.yml',
+ '_base_/solov2_reader.yml',
+]
+weights: output/solov2_r50_fpn_3x_coco/model_final
+epoch: 36
+
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [24, 33]
+ - !LinearWarmup
+ start_factor: 0.
+ steps: 1000
+
+TrainReader:
+ sample_transforms:
+ - Decode: {}
+ - Poly2Mask: {}
+ - RandomResize: {interp: 1,
+ target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]],
+ keep_ratio: True}
+ - RandomFlip: {}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 32}
+ - Gt2Solov2Target: {num_grids: [40, 36, 24, 16, 12],
+ scale_ranges: [[1, 96], [48, 192], [96, 384], [192, 768], [384, 2048]],
+ coord_sigma: 0.2}
+ batch_size: 2
+ shuffle: true
+ drop_last: true
diff --git a/configs/ssd/README.md b/configs/ssd/README.md
new file mode 100644
index 0000000..b2bcd67
--- /dev/null
+++ b/configs/ssd/README.md
@@ -0,0 +1,22 @@
+# SSD: Single Shot MultiBox Detector
+
+## Model Zoo
+
+### SSD on Pascal VOC
+
+| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | 下载 | 配置文件 |
+| :-------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: |
+| VGG | SSD | 8 | 240e | ---- | 77.8 | [下载链接](https://paddledet.bj.bcebos.com/models/ssd_vgg16_300_240e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ssd/ssd_vgg16_300_240e_voc.yml) |
+| MobileNet v1 | SSD | 32 | 120e | ---- | 73.8 | [下载链接](https://paddledet.bj.bcebos.com/models/ssd_mobilenet_v1_300_120e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ssd/ssd_mobilenet_v1_300_120e_voc.yml) |
+
+**注意:** SSD-VGG使用4GPU在总batch size为32下训练240个epoch。SSD-MobileNetv1使用2GPU在总batch size为64下训练120周期。
+
+## Citations
+```
+@article{Liu_2016,
+ title={SSD: Single Shot MultiBox Detector},
+ journal={ECCV},
+ author={Liu, Wei and Anguelov, Dragomir and Erhan, Dumitru and Szegedy, Christian and Reed, Scott and Fu, Cheng-Yang and Berg, Alexander C.},
+ year={2016},
+}
+```
diff --git a/configs/ssd/_base_/optimizer_120e.yml b/configs/ssd/_base_/optimizer_120e.yml
new file mode 100644
index 0000000..0625b66
--- /dev/null
+++ b/configs/ssd/_base_/optimizer_120e.yml
@@ -0,0 +1,17 @@
+epoch: 120
+
+LearningRate:
+ base_lr: 0.001
+ schedulers:
+ - !PiecewiseDecay
+ milestones: [40, 60, 80, 100]
+ values: [0.001, 0.0005, 0.00025, 0.0001, 0.00001]
+ use_warmup: false
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.0
+ type: RMSProp
+ regularizer:
+ factor: 0.00005
+ type: L2
diff --git a/configs/ssd/_base_/optimizer_1700e.yml b/configs/ssd/_base_/optimizer_1700e.yml
new file mode 100644
index 0000000..fe5fedc
--- /dev/null
+++ b/configs/ssd/_base_/optimizer_1700e.yml
@@ -0,0 +1,18 @@
+epoch: 1700
+
+LearningRate:
+ base_lr: 0.4
+ schedulers:
+ - !CosineDecay
+ max_epochs: 1700
+ - !LinearWarmup
+ start_factor: 0.3333333333333333
+ steps: 2000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0005
+ type: L2
diff --git a/configs/ssd/_base_/optimizer_240e.yml b/configs/ssd/_base_/optimizer_240e.yml
new file mode 100644
index 0000000..de31eac
--- /dev/null
+++ b/configs/ssd/_base_/optimizer_240e.yml
@@ -0,0 +1,21 @@
+epoch: 240
+
+LearningRate:
+ base_lr: 0.001
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones:
+ - 160
+ - 200
+ - !LinearWarmup
+ start_factor: 0.3333333333333333
+ steps: 500
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0005
+ type: L2
diff --git a/configs/ssd/_base_/ssd_mobilenet_reader.yml b/configs/ssd/_base_/ssd_mobilenet_reader.yml
new file mode 100644
index 0000000..2af8da2
--- /dev/null
+++ b/configs/ssd/_base_/ssd_mobilenet_reader.yml
@@ -0,0 +1,40 @@
+worker_num: 8
+TrainReader:
+ inputs_def:
+ num_max_boxes: 90
+ sample_transforms:
+ - Decode: {}
+ - RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False}
+ - RandomExpand: {fill_value: [127.5, 127.5, 127.5]}
+ - RandomCrop: {allow_no_crop: Fasle}
+ - RandomFlip: {}
+ - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
+ - NormalizeBox: {}
+ - PadBox: {num_max_boxes: 90}
+ batch_transforms:
+ - NormalizeImage: {mean: [127.5, 127.5, 127.5], std: [127.502231, 127.502231, 127.502231], is_scale: false}
+ - Permute: {}
+ batch_size: 32
+ shuffle: true
+ drop_last: true
+
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
+ - NormalizeImage: {mean: [127.5, 127.5, 127.5], std: [127.502231, 127.502231, 127.502231], is_scale: false}
+ - Permute: {}
+ batch_size: 1
+ drop_empty: false
+
+
+TestReader:
+ inputs_def:
+ image_shape: [3, 300, 300]
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
+ - NormalizeImage: {mean: [127.5, 127.5, 127.5], std: [127.502231, 127.502231, 127.502231], is_scale: false}
+ - Permute: {}
+ batch_size: 1
diff --git a/configs/ssd/_base_/ssd_mobilenet_v1_300.yml b/configs/ssd/_base_/ssd_mobilenet_v1_300.yml
new file mode 100644
index 0000000..b8fe694
--- /dev/null
+++ b/configs/ssd/_base_/ssd_mobilenet_v1_300.yml
@@ -0,0 +1,41 @@
+architecture: SSD
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ssd_mobilenet_v1_coco_pretrained.pdparams
+
+SSD:
+ backbone: MobileNet
+ ssd_head: SSDHead
+ post_process: BBoxPostProcess
+
+MobileNet:
+ norm_decay: 0.
+ scale: 1
+ conv_learning_rate: 0.1
+ extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]]
+ with_extra_blocks: true
+ feature_maps: [11, 13, 14, 15, 16, 17]
+
+SSDHead:
+ kernel_size: 1
+ padding: 0
+ anchor_generator:
+ steps: [0, 0, 0, 0, 0, 0]
+ aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]]
+ min_ratio: 20
+ max_ratio: 90
+ base_size: 300
+ min_sizes: [60.0, 105.0, 150.0, 195.0, 240.0, 285.0]
+ max_sizes: [[], 150.0, 195.0, 240.0, 285.0, 300.0]
+ offset: 0.5
+ flip: true
+ min_max_aspect_ratios_order: false
+
+BBoxPostProcess:
+ decode:
+ name: SSDBox
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 200
+ score_threshold: 0.01
+ nms_threshold: 0.45
+ nms_top_k: 400
+ nms_eta: 1.0
diff --git a/configs/ssd/_base_/ssd_reader.yml b/configs/ssd/_base_/ssd_reader.yml
new file mode 100644
index 0000000..e25bed6
--- /dev/null
+++ b/configs/ssd/_base_/ssd_reader.yml
@@ -0,0 +1,42 @@
+worker_num: 2
+TrainReader:
+ inputs_def:
+ num_max_boxes: 90
+
+ sample_transforms:
+ - Decode: {}
+ - RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False}
+ - RandomExpand: {fill_value: [104., 117., 123.]}
+ - RandomCrop: {allow_no_crop: true}
+ - RandomFlip: {}
+ - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
+ - NormalizeBox: {}
+ - PadBox: {num_max_boxes: 90}
+
+ batch_transforms:
+ - NormalizeImage: {mean: [104., 117., 123.], std: [1., 1., 1.], is_scale: false}
+ - Permute: {}
+
+ batch_size: 8
+ shuffle: true
+ drop_last: true
+
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
+ - NormalizeImage: {mean: [104., 117., 123.], std: [1., 1., 1.], is_scale: false}
+ - Permute: {}
+ batch_size: 1
+ drop_empty: false
+
+TestReader:
+ inputs_def:
+ image_shape: [3, 300, 300]
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
+ - NormalizeImage: {mean: [104., 117., 123.], std: [1., 1., 1.], is_scale: false}
+ - Permute: {}
+ batch_size: 1
diff --git a/configs/ssd/_base_/ssd_vgg16_300.yml b/configs/ssd/_base_/ssd_vgg16_300.yml
new file mode 100644
index 0000000..5982105
--- /dev/null
+++ b/configs/ssd/_base_/ssd_vgg16_300.yml
@@ -0,0 +1,37 @@
+architecture: SSD
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/VGG16_caffe_pretrained.pdparams
+
+# Model Achitecture
+SSD:
+ # model feat info flow
+ backbone: VGG
+ ssd_head: SSDHead
+ # post process
+ post_process: BBoxPostProcess
+
+VGG:
+ depth: 16
+ normalizations: [20., -1, -1, -1, -1, -1]
+
+SSDHead:
+ anchor_generator:
+ steps: [8, 16, 32, 64, 100, 300]
+ aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]]
+ min_ratio: 20
+ max_ratio: 90
+ min_sizes: [30.0, 60.0, 111.0, 162.0, 213.0, 264.0]
+ max_sizes: [60.0, 111.0, 162.0, 213.0, 264.0, 315.0]
+ offset: 0.5
+ flip: true
+ min_max_aspect_ratios_order: true
+
+BBoxPostProcess:
+ decode:
+ name: SSDBox
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 200
+ score_threshold: 0.01
+ nms_threshold: 0.45
+ nms_top_k: 400
+ nms_eta: 1.0
diff --git a/configs/ssd/_base_/ssdlite300_reader.yml b/configs/ssd/_base_/ssdlite300_reader.yml
new file mode 100644
index 0000000..cd13112
--- /dev/null
+++ b/configs/ssd/_base_/ssdlite300_reader.yml
@@ -0,0 +1,40 @@
+worker_num: 8
+TrainReader:
+ inputs_def:
+ num_max_boxes: 90
+ sample_transforms:
+ - Decode: {}
+ - RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False}
+ - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+ - RandomCrop: {allow_no_crop: Fasle}
+ - RandomFlip: {}
+ - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
+ - NormalizeBox: {}
+ - PadBox: {num_max_boxes: 90}
+ batch_transforms:
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
+ - Permute: {}
+ batch_size: 64
+ shuffle: true
+ drop_last: true
+
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
+ - Permute: {}
+ batch_size: 1
+ drop_empty: false
+
+
+TestReader:
+ inputs_def:
+ image_shape: [3, 300, 300]
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
+ - Permute: {}
+ batch_size: 1
diff --git a/configs/ssd/_base_/ssdlite320_reader.yml b/configs/ssd/_base_/ssdlite320_reader.yml
new file mode 100644
index 0000000..51db614
--- /dev/null
+++ b/configs/ssd/_base_/ssdlite320_reader.yml
@@ -0,0 +1,40 @@
+worker_num: 8
+TrainReader:
+ inputs_def:
+ num_max_boxes: 90
+ sample_transforms:
+ - Decode: {}
+ - RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False}
+ - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+ - RandomCrop: {allow_no_crop: Fasle}
+ - RandomFlip: {}
+ - Resize: {target_size: [320, 320], keep_ratio: False, interp: 1}
+ - NormalizeBox: {}
+ - PadBox: {num_max_boxes: 90}
+ batch_transforms:
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
+ - Permute: {}
+ batch_size: 64
+ shuffle: true
+ drop_last: true
+
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [320, 320], keep_ratio: False, interp: 1}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
+ - Permute: {}
+ batch_size: 1
+ drop_empty: false
+
+
+TestReader:
+ inputs_def:
+ image_shape: [3, 320, 320]
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [320, 320], keep_ratio: False, interp: 1}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
+ - Permute: {}
+ batch_size: 1
diff --git a/configs/ssd/_base_/ssdlite_ghostnet_320.yml b/configs/ssd/_base_/ssdlite_ghostnet_320.yml
new file mode 100644
index 0000000..6a9e13b
--- /dev/null
+++ b/configs/ssd/_base_/ssdlite_ghostnet_320.yml
@@ -0,0 +1,42 @@
+architecture: SSD
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/GhostNet_x1_3_ssld_pretrained.pdparams
+
+SSD:
+ backbone: GhostNet
+ ssd_head: SSDHead
+ post_process: BBoxPostProcess
+
+GhostNet:
+ scale: 1.3
+ conv_decay: 0.00004
+ with_extra_blocks: true
+ extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]]
+ feature_maps: [13, 18, 19, 20, 21, 22]
+ lr_mult_list: [0.25, 0.25, 0.5, 0.5, 0.75]
+
+SSDHead:
+ use_sepconv: True
+ conv_decay: 0.00004
+ anchor_generator:
+ steps: [16, 32, 64, 107, 160, 320]
+ aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]]
+ min_ratio: 20
+ max_ratio: 95
+ base_size: 320
+ min_sizes: []
+ max_sizes: []
+ offset: 0.5
+ flip: true
+ clip: true
+ min_max_aspect_ratios_order: false
+
+BBoxPostProcess:
+ decode:
+ name: SSDBox
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 200
+ score_threshold: 0.01
+ nms_threshold: 0.45
+ nms_top_k: 400
+ nms_eta: 1.0
diff --git a/configs/ssd/_base_/ssdlite_mobilenet_v1_300.yml b/configs/ssd/_base_/ssdlite_mobilenet_v1_300.yml
new file mode 100644
index 0000000..db811ad
--- /dev/null
+++ b/configs/ssd/_base_/ssdlite_mobilenet_v1_300.yml
@@ -0,0 +1,41 @@
+architecture: SSD
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV1_ssld_pretrained.pdparams
+
+SSD:
+ backbone: MobileNet
+ ssd_head: SSDHead
+ post_process: BBoxPostProcess
+
+MobileNet:
+ conv_decay: 0.00004
+ scale: 1
+ extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]]
+ with_extra_blocks: true
+ feature_maps: [11, 13, 14, 15, 16, 17]
+
+SSDHead:
+ use_sepconv: True
+ conv_decay: 0.00004
+ anchor_generator:
+ steps: [16, 32, 64, 100, 150, 300]
+ aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]]
+ min_ratio: 20
+ max_ratio: 95
+ base_size: 300
+ min_sizes: []
+ max_sizes: []
+ offset: 0.5
+ flip: true
+ clip: true
+ min_max_aspect_ratios_order: False
+
+BBoxPostProcess:
+ decode:
+ name: SSDBox
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 200
+ score_threshold: 0.01
+ nms_threshold: 0.45
+ nms_top_k: 400
+ nms_eta: 1.0
diff --git a/configs/ssd/_base_/ssdlite_mobilenet_v3_large_320.yml b/configs/ssd/_base_/ssdlite_mobilenet_v3_large_320.yml
new file mode 100644
index 0000000..cc6e328
--- /dev/null
+++ b/configs/ssd/_base_/ssdlite_mobilenet_v3_large_320.yml
@@ -0,0 +1,44 @@
+architecture: SSD
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_large_x1_0_ssld_pretrained.pdparams
+
+SSD:
+ backbone: MobileNetV3
+ ssd_head: SSDHead
+ post_process: BBoxPostProcess
+
+MobileNetV3:
+ scale: 1.0
+ model_name: large
+ conv_decay: 0.00004
+ with_extra_blocks: true
+ extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]]
+ feature_maps: [14, 17, 18, 19, 20, 21]
+ lr_mult_list: [0.25, 0.25, 0.5, 0.5, 0.75]
+ multiplier: 0.5
+
+SSDHead:
+ use_sepconv: True
+ conv_decay: 0.00004
+ anchor_generator:
+ steps: [16, 32, 64, 107, 160, 320]
+ aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]]
+ min_ratio: 20
+ max_ratio: 95
+ base_size: 320
+ min_sizes: []
+ max_sizes: []
+ offset: 0.5
+ flip: true
+ clip: true
+ min_max_aspect_ratios_order: false
+
+BBoxPostProcess:
+ decode:
+ name: SSDBox
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 200
+ score_threshold: 0.01
+ nms_threshold: 0.45
+ nms_top_k: 400
+ nms_eta: 1.0
diff --git a/configs/ssd/_base_/ssdlite_mobilenet_v3_small_320.yml b/configs/ssd/_base_/ssdlite_mobilenet_v3_small_320.yml
new file mode 100644
index 0000000..887f95f
--- /dev/null
+++ b/configs/ssd/_base_/ssdlite_mobilenet_v3_small_320.yml
@@ -0,0 +1,44 @@
+architecture: SSD
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_small_x1_0_ssld_pretrained.pdparams
+
+SSD:
+ backbone: MobileNetV3
+ ssd_head: SSDHead
+ post_process: BBoxPostProcess
+
+MobileNetV3:
+ scale: 1.0
+ model_name: small
+ conv_decay: 0.00004
+ with_extra_blocks: true
+ extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]]
+ feature_maps: [10, 13, 14, 15, 16, 17]
+ lr_mult_list: [0.25, 0.25, 0.5, 0.5, 0.75]
+ multiplier: 0.5
+
+SSDHead:
+ use_sepconv: True
+ conv_decay: 0.00004
+ anchor_generator:
+ steps: [16, 32, 64, 107, 160, 320]
+ aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]]
+ min_ratio: 20
+ max_ratio: 95
+ base_size: 320
+ min_sizes: []
+ max_sizes: []
+ offset: 0.5
+ flip: true
+ clip: true
+ min_max_aspect_ratios_order: false
+
+BBoxPostProcess:
+ decode:
+ name: SSDBox
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 200
+ score_threshold: 0.01
+ nms_threshold: 0.45
+ nms_top_k: 400
+ nms_eta: 1.0
diff --git a/configs/ssd/ssd_mobilenet_v1_300_120e_voc.yml b/configs/ssd/ssd_mobilenet_v1_300_120e_voc.yml
new file mode 100644
index 0000000..3453f02
--- /dev/null
+++ b/configs/ssd/ssd_mobilenet_v1_300_120e_voc.yml
@@ -0,0 +1,12 @@
+_BASE_: [
+ '../datasets/voc.yml',
+ '../runtime.yml',
+ '_base_/optimizer_120e.yml',
+ '_base_/ssd_mobilenet_v1_300.yml',
+ '_base_/ssd_mobilenet_reader.yml',
+]
+weights: output/ssd_mobilenet_v1_300_120e_voc/model_final
+
+EvalReader:
+ batch_transforms:
+ - PadBatch: {pad_gt: True}
diff --git a/configs/ssd/ssd_vgg16_300_240e_voc.yml b/configs/ssd/ssd_vgg16_300_240e_voc.yml
new file mode 100644
index 0000000..e2e2d30
--- /dev/null
+++ b/configs/ssd/ssd_vgg16_300_240e_voc.yml
@@ -0,0 +1,12 @@
+_BASE_: [
+ '../datasets/voc.yml',
+ '../runtime.yml',
+ '_base_/optimizer_240e.yml',
+ '_base_/ssd_vgg16_300.yml',
+ '_base_/ssd_reader.yml',
+]
+weights: output/ssd_vgg16_300_240e_voc/model_final
+
+EvalReader:
+ batch_transforms:
+ - PadBatch: {pad_gt: True}
diff --git a/configs/ssd/ssdlite_ghostnet_320_coco.yml b/configs/ssd/ssdlite_ghostnet_320_coco.yml
new file mode 100644
index 0000000..c6eb6c1
--- /dev/null
+++ b/configs/ssd/ssdlite_ghostnet_320_coco.yml
@@ -0,0 +1,27 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_1700e.yml',
+ '_base_/ssdlite_ghostnet_320.yml',
+ '_base_/ssdlite320_reader.yml',
+]
+weights: output/ssdlite_ghostnet_320_coco/model_final
+
+epoch: 1700
+
+LearningRate:
+ base_lr: 0.2
+ schedulers:
+ - !CosineDecay
+ max_epochs: 1700
+ - !LinearWarmup
+ start_factor: 0.33333
+ steps: 2000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0005
+ type: L2
diff --git a/configs/ssd/ssdlite_mobilenet_v1_300_coco.yml b/configs/ssd/ssdlite_mobilenet_v1_300_coco.yml
new file mode 100644
index 0000000..75cb8a8
--- /dev/null
+++ b/configs/ssd/ssdlite_mobilenet_v1_300_coco.yml
@@ -0,0 +1,8 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_1700e.yml',
+ '_base_/ssdlite_mobilenet_v1_300.yml',
+ '_base_/ssdlite300_reader.yml',
+]
+weights: output/ssdlite_mobilenet_v1_300_coco/model_final
diff --git a/configs/ssd/ssdlite_mobilenet_v3_large_320_coco.yml b/configs/ssd/ssdlite_mobilenet_v3_large_320_coco.yml
new file mode 100644
index 0000000..78d561a
--- /dev/null
+++ b/configs/ssd/ssdlite_mobilenet_v3_large_320_coco.yml
@@ -0,0 +1,8 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_1700e.yml',
+ '_base_/ssdlite_mobilenet_v3_large_320.yml',
+ '_base_/ssdlite320_reader.yml',
+]
+weights: output/ssdlite_mobilenet_v3_large_320_coco/model_final
diff --git a/configs/ssd/ssdlite_mobilenet_v3_small_320_coco.yml b/configs/ssd/ssdlite_mobilenet_v3_small_320_coco.yml
new file mode 100644
index 0000000..fa0ce53
--- /dev/null
+++ b/configs/ssd/ssdlite_mobilenet_v3_small_320_coco.yml
@@ -0,0 +1,8 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_1700e.yml',
+ '_base_/ssdlite_mobilenet_v3_small_320.yml',
+ '_base_/ssdlite320_reader.yml',
+]
+weights: output/ssdlite_mobilenet_v3_small_320_coco/model_final
diff --git a/configs/ttfnet/README.md b/configs/ttfnet/README.md
new file mode 100644
index 0000000..a20660e
--- /dev/null
+++ b/configs/ttfnet/README.md
@@ -0,0 +1,68 @@
+# 1. TTFNet
+
+## 简介
+
+TTFNet是一种用于实时目标检测且对训练时间友好的网络,对CenterNet收敛速度慢的问题进行改进,提出了利用高斯核生成训练样本的新方法,有效的消除了anchor-free head中存在的模糊性。同时简单轻量化的网络结构也易于进行任务扩展。
+
+**特点:**
+
+结构简单,仅需要两个head检测目标位置和大小,并且去除了耗时的后处理操作
+训练时间短,基于DarkNet53的骨干网路,V100 8卡仅需要训练2个小时即可达到较好的模型效果
+
+## Model Zoo
+
+| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | 下载 | 配置文件 |
+| :-------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: |
+| DarkNet53 | TTFNet | 12 | 1x | ---- | 33.5 | [下载链接](https://paddledet.bj.bcebos.com/models/ttfnet_darknet53_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ttfnet/ttfnet_darknet53_1x_coco.yml) |
+
+
+
+
+
+# 2. PAFNet
+
+## 简介
+
+PAFNet(Paddle Anchor Free)是PaddleDetection基于TTFNet的优化模型,精度达到anchor free领域SOTA水平,同时产出移动端轻量级模型PAFNet-Lite
+
+PAFNet系列模型从如下方面优化TTFNet模型:
+
+- [CutMix](https://arxiv.org/abs/1905.04899)
+- 更优的骨干网络: ResNet50vd-DCN
+- 更大的训练batch size: 8 GPUs,每GPU batch_size=18
+- Synchronized Batch Normalization
+- [Deformable Convolution](https://arxiv.org/abs/1703.06211)
+- [Exponential Moving Average](https://www.investopedia.com/terms/e/ema.asp)
+- 更优的预训练模型
+
+
+## 模型库
+
+| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | 下载 | 配置文件 |
+| :-------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: |
+| ResNet50vd | PAFNet | 18 | 10x | ---- | 39.8 | [下载链接](https://paddledet.bj.bcebos.com/models/pafnet_10x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ttfnet/pafnet_10x_coco.yml) |
+
+
+
+### PAFNet-Lite
+
+| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 | Box AP | 麒麟990延时(ms) | 体积(M) | 下载 | 配置文件 |
+| :-------------- | :------------- | :-----: | :-----: | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: |
+| MobileNetv3 | PAFNet-Lite | 12 | 20x | 23.9 | 26.00 | 14 | [下载链接](https://paddledet.bj.bcebos.com/models/pafnet_lite_mobilenet_v3_20x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ttfnet/pafnet_lite_mobilenet_v3_20x_coco.yml) |
+
+**注意:** 由于动态图框架整体升级,PAFNet的PaddleDetection发布的权重模型评估时需要添加--bias字段, 例如
+
+```bash
+# 使用PaddleDetection发布的权重
+CUDA_VISIBLE_DEVICES=0 python tools/eval.py -c configs/ttfnet/pafnet_10x_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/pafnet_10x_coco.pdparams --bias
+```
+
+## Citations
+```
+@article{liu2019training,
+ title = {Training-Time-Friendly Network for Real-Time Object Detection},
+ author = {Zili Liu, Tu Zheng, Guodong Xu, Zheng Yang, Haifeng Liu, Deng Cai},
+ journal = {arXiv preprint arXiv:1909.00700},
+ year = {2019}
+}
+```
diff --git a/configs/ttfnet/_base_/optimizer_10x.yml b/configs/ttfnet/_base_/optimizer_10x.yml
new file mode 100644
index 0000000..dd2c29d
--- /dev/null
+++ b/configs/ttfnet/_base_/optimizer_10x.yml
@@ -0,0 +1,19 @@
+epoch: 120
+
+LearningRate:
+ base_lr: 0.015
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [80, 110]
+ - !LinearWarmup
+ start_factor: 0.2
+ steps: 500
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0004
+ type: L2
diff --git a/configs/ttfnet/_base_/optimizer_1x.yml b/configs/ttfnet/_base_/optimizer_1x.yml
new file mode 100644
index 0000000..8457ead
--- /dev/null
+++ b/configs/ttfnet/_base_/optimizer_1x.yml
@@ -0,0 +1,19 @@
+epoch: 12
+
+LearningRate:
+ base_lr: 0.015
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [8, 11]
+ - !LinearWarmup
+ start_factor: 0.2
+ steps: 500
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0004
+ type: L2
diff --git a/configs/ttfnet/_base_/optimizer_20x.yml b/configs/ttfnet/_base_/optimizer_20x.yml
new file mode 100644
index 0000000..4dd3492
--- /dev/null
+++ b/configs/ttfnet/_base_/optimizer_20x.yml
@@ -0,0 +1,20 @@
+epoch: 240
+
+LearningRate:
+ base_lr: 0.015
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [160, 220]
+ - !LinearWarmup
+ start_factor: 0.2
+ steps: 1000
+
+OptimizerBuilder:
+ clip_grad_by_norm: 35
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0004
+ type: L2
diff --git a/configs/ttfnet/_base_/pafnet.yml b/configs/ttfnet/_base_/pafnet.yml
new file mode 100644
index 0000000..5319fe6
--- /dev/null
+++ b/configs/ttfnet/_base_/pafnet.yml
@@ -0,0 +1,41 @@
+architecture: TTFNet
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+
+TTFNet:
+ backbone: ResNet
+ neck: TTFFPN
+ ttf_head: TTFHead
+ post_process: BBoxPostProcess
+
+ResNet:
+ depth: 50
+ variant: d
+ return_idx: [0, 1, 2, 3]
+ freeze_at: -1
+ norm_decay: 0.
+ variant: d
+ dcn_v2_stages: [1, 2, 3]
+
+TTFFPN:
+ planes: [256, 128, 64]
+ shortcut_num: [3, 2, 1]
+
+TTFHead:
+ dcn_head: true
+ hm_loss:
+ name: CTFocalLoss
+ loss_weight: 1.
+ wh_loss:
+ name: GIoULoss
+ loss_weight: 5.
+ reduction: sum
+
+BBoxPostProcess:
+ decode:
+ name: TTFBox
+ max_per_img: 100
+ score_thresh: 0.01
+ down_ratio: 4
diff --git a/configs/ttfnet/_base_/pafnet_lite.yml b/configs/ttfnet/_base_/pafnet_lite.yml
new file mode 100644
index 0000000..5ed2fa2
--- /dev/null
+++ b/configs/ttfnet/_base_/pafnet_lite.yml
@@ -0,0 +1,44 @@
+architecture: TTFNet
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_large_x1_0_ssld_pretrained.pdparams
+norm_type: sync_bn
+
+TTFNet:
+ backbone: MobileNetV3
+ neck: TTFFPN
+ ttf_head: TTFHead
+ post_process: BBoxPostProcess
+
+MobileNetV3:
+ scale: 1.0
+ model_name: large
+ feature_maps: [5, 8, 14, 17]
+ with_extra_blocks: true
+ lr_mult_list: [0.25, 0.25, 0.5, 0.5, 0.75]
+ conv_decay: 0.00001
+ norm_decay: 0.0
+ extra_block_filters: []
+
+TTFFPN:
+ planes: [96, 48, 24]
+ shortcut_num: [2, 2, 1]
+ lite_neck: true
+ fusion_method: concat
+
+TTFHead:
+ hm_head_planes: 48
+ wh_head_planes: 24
+ lite_head: true
+ hm_loss:
+ name: CTFocalLoss
+ loss_weight: 1.
+ wh_loss:
+ name: GIoULoss
+ loss_weight: 5.
+ reduction: sum
+
+BBoxPostProcess:
+ decode:
+ name: TTFBox
+ max_per_img: 100
+ score_thresh: 0.01
+ down_ratio: 4
diff --git a/configs/ttfnet/_base_/pafnet_lite_reader.yml b/configs/ttfnet/_base_/pafnet_lite_reader.yml
new file mode 100644
index 0000000..446a13a
--- /dev/null
+++ b/configs/ttfnet/_base_/pafnet_lite_reader.yml
@@ -0,0 +1,40 @@
+worker_num: 2
+TrainReader:
+ sample_transforms:
+ - Decode: {}
+ - Cutmix: {alpha: 1.5, beta: 1.5}
+ - RandomDistort: {}
+ - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+ - RandomCrop: {aspect_ratio: NULL, cover_all_box: True}
+ - RandomFlip: {}
+ - GridMask: {upper_iter: 300000}
+ batch_transforms:
+ - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512], random_interp: True, keep_ratio: False}
+ - NormalizeImage: {mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375], is_scale: false}
+ - Permute: {}
+ - Gt2TTFTarget: {down_ratio: 4}
+ - PadBatch: {pad_to_stride: 32}
+ batch_size: 12
+ shuffle: true
+ drop_last: true
+ use_shared_memory: true
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {interp: 1, target_size: [320, 320], keep_ratio: False}
+ - NormalizeImage: {is_scale: false, mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375]}
+ - Permute: {}
+ batch_size: 1
+ drop_last: false
+ drop_empty: false
+
+TestReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {interp: 1, target_size: [320, 320], keep_ratio: False}
+ - NormalizeImage: {is_scale: false, mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375]}
+ - Permute: {}
+ batch_size: 1
+ drop_last: false
+ drop_empty: false
diff --git a/configs/ttfnet/_base_/pafnet_reader.yml b/configs/ttfnet/_base_/pafnet_reader.yml
new file mode 100644
index 0000000..ea90a13
--- /dev/null
+++ b/configs/ttfnet/_base_/pafnet_reader.yml
@@ -0,0 +1,40 @@
+worker_num: 2
+TrainReader:
+ sample_transforms:
+ - Decode: {}
+ - Cutmix: {alpha: 1.5, beta: 1.5}
+ - RandomDistort: {random_apply: false, random_channel: true}
+ - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+ - RandomCrop: {aspect_ratio: NULL, cover_all_box: True}
+ - RandomFlip: {prob: 0.5}
+ batch_transforms:
+ - BatchRandomResize: {target_size: [416, 448, 480, 512, 544, 576, 608, 640, 672], keep_ratio: false}
+ - NormalizeImage: {mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375], is_scale: false}
+ - Permute: {}
+ - Gt2TTFTarget: {down_ratio: 4}
+ - PadBatch: {pad_to_stride: 32}
+ batch_size: 18
+ shuffle: true
+ drop_last: true
+ use_shared_memory: true
+ mixup_epoch: 100
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {interp: 1, target_size: [512, 512], keep_ratio: False}
+ - NormalizeImage: {is_scale: false, mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375]}
+ - Permute: {}
+ batch_size: 1
+ drop_last: false
+ drop_empty: false
+
+TestReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {interp: 1, target_size: [512, 512], keep_ratio: False}
+ - NormalizeImage: {is_scale: false, mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375]}
+ - Permute: {}
+ batch_size: 1
+ drop_last: false
+ drop_empty: false
diff --git a/configs/ttfnet/_base_/ttfnet_darknet53.yml b/configs/ttfnet/_base_/ttfnet_darknet53.yml
new file mode 100644
index 0000000..05c7dce
--- /dev/null
+++ b/configs/ttfnet/_base_/ttfnet_darknet53.yml
@@ -0,0 +1,35 @@
+architecture: TTFNet
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/DarkNet53_pretrained.pdparams
+
+TTFNet:
+ backbone: DarkNet
+ neck: TTFFPN
+ ttf_head: TTFHead
+ post_process: BBoxPostProcess
+
+DarkNet:
+ depth: 53
+ freeze_at: 0
+ return_idx: [1, 2, 3, 4]
+ norm_type: bn
+ norm_decay: 0.0004
+
+TTFFPN:
+ planes: [256, 128, 64]
+ shortcut_num: [3, 2, 1]
+
+TTFHead:
+ hm_loss:
+ name: CTFocalLoss
+ loss_weight: 1.
+ wh_loss:
+ name: GIoULoss
+ loss_weight: 5.
+ reduction: sum
+
+BBoxPostProcess:
+ decode:
+ name: TTFBox
+ max_per_img: 100
+ score_thresh: 0.01
+ down_ratio: 4
diff --git a/configs/ttfnet/_base_/ttfnet_reader.yml b/configs/ttfnet/_base_/ttfnet_reader.yml
new file mode 100644
index 0000000..f9ed6cc
--- /dev/null
+++ b/configs/ttfnet/_base_/ttfnet_reader.yml
@@ -0,0 +1,35 @@
+worker_num: 2
+TrainReader:
+ sample_transforms:
+ - Decode: {}
+ - RandomFlip: {prob: 0.5}
+ - Resize: {interp: 1, target_size: [512, 512], keep_ratio: False}
+ - NormalizeImage: {mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375], is_scale: false}
+ - Permute: {}
+ batch_transforms:
+ - Gt2TTFTarget: {down_ratio: 4}
+ - PadBatch: {pad_to_stride: 32}
+ batch_size: 12
+ shuffle: true
+ drop_last: true
+ use_shared_memory: true
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {interp: 1, target_size: [512, 512], keep_ratio: False}
+ - NormalizeImage: {is_scale: false, mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375]}
+ - Permute: {}
+ batch_size: 1
+ drop_last: false
+ drop_empty: false
+
+TestReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {interp: 1, target_size: [512, 512], keep_ratio: False}
+ - NormalizeImage: {is_scale: false, mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375]}
+ - Permute: {}
+ batch_size: 1
+ drop_last: false
+ drop_empty: false
diff --git a/configs/ttfnet/pafnet_10x_coco.yml b/configs/ttfnet/pafnet_10x_coco.yml
new file mode 100644
index 0000000..b14a2bc
--- /dev/null
+++ b/configs/ttfnet/pafnet_10x_coco.yml
@@ -0,0 +1,8 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_10x.yml',
+ '_base_/pafnet.yml',
+ '_base_/pafnet_reader.yml',
+]
+weights: output/pafnet_10x_coco/model_final
diff --git a/configs/ttfnet/pafnet_lite_mobilenet_v3_20x_coco.yml b/configs/ttfnet/pafnet_lite_mobilenet_v3_20x_coco.yml
new file mode 100644
index 0000000..577af16
--- /dev/null
+++ b/configs/ttfnet/pafnet_lite_mobilenet_v3_20x_coco.yml
@@ -0,0 +1,8 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_20x.yml',
+ '_base_/pafnet_lite.yml',
+ '_base_/pafnet_lite_reader.yml',
+]
+weights: output/pafnet_lite_mobilenet_v3_10x_coco/model_final
diff --git a/configs/ttfnet/ttfnet_darknet53_1x_coco.yml b/configs/ttfnet/ttfnet_darknet53_1x_coco.yml
new file mode 100644
index 0000000..5912392
--- /dev/null
+++ b/configs/ttfnet/ttfnet_darknet53_1x_coco.yml
@@ -0,0 +1,8 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_1x.yml',
+ '_base_/ttfnet_darknet53.yml',
+ '_base_/ttfnet_reader.yml',
+]
+weights: output/ttfnet_darknet53_1x_coco/model_final
diff --git a/configs/vehicle/README.md b/configs/vehicle/README.md
new file mode 100644
index 0000000..56e5e19
--- /dev/null
+++ b/configs/vehicle/README.md
@@ -0,0 +1,53 @@
+English | [简体中文](README_cn.md)
+# PaddleDetection applied for specific scenarios
+
+We provide some models implemented by PaddlePaddle to detect objects in specific scenarios, users can download the models and use them in these scenarios.
+
+| Task | Algorithm | Box AP | Download | Configs |
+|:---------------------|:---------:|:------:| :-------------------------------------------------------------------------------------: |:------:|
+| Vehicle Detection | YOLOv3 | 54.5 | [model](https://paddledet.bj.bcebos.com/models/vehicle_yolov3_darknet.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/vehicle/vehicle_yolov3_darknet.yml) |
+
+## Vehicle Detection
+
+One of major applications of vehichle detection is traffic monitoring. In this scenary, vehicles to be detected are mostly captured by the cameras mounted on top of traffic light columns.
+
+### 1. Network
+
+The network for detecting vehicles is YOLOv3, the backbone of which is Dacknet53.
+
+### 2. Configuration for training
+
+PaddleDetection provides users with a configuration file [yolov3_darknet53_270e_coco.yml](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.0/configs/yolov3/yolov3_darknet53_270e_coco.yml) to train YOLOv3 on the COCO dataset, compared with this file, we modify some parameters as followed to conduct the training for vehicle detection:
+
+* num_classes: 6
+* anchors: [[8, 9], [10, 23], [19, 15], [23, 33], [40, 25], [54, 50], [101, 80], [139, 145], [253, 224]]
+* nms/nms_top_k: 400
+* nms/score_threshold: 0.005
+* dataset_dir: dataset/vehicle
+
+### 3. Accuracy
+
+The accuracy of the model trained and evaluated on our private data is shown as followed:
+
+AP at IoU=.50:.05:.95 is 0.545.
+
+AP at IoU=.50 is 0.764.
+
+### 4. Inference
+
+Users can employ the model to conduct the inference:
+
+```
+export CUDA_VISIBLE_DEVICES=0
+python -u tools/infer.py -c configs/vehicle/vehicle_yolov3_darknet.yml \
+ -o weights=https://paddledet.bj.bcebos.com/models/vehicle_yolov3_darknet.pdparams \
+ --infer_dir configs/vehicle/demo \
+ --draw_threshold 0.2 \
+ --output_dir configs/vehicle/demo/output
+```
+
+Some inference results are visualized below:
+
+
+
+
diff --git a/configs/vehicle/README_cn.md b/configs/vehicle/README_cn.md
new file mode 100644
index 0000000..5fd7f66
--- /dev/null
+++ b/configs/vehicle/README_cn.md
@@ -0,0 +1,54 @@
+[English](README.md) | 简体中文
+# 特色垂类检测模型
+
+我们提供了针对不同场景的基于PaddlePaddle的检测模型,用户可以下载模型进行使用。
+
+| 任务 | 算法 | 精度(Box AP) | 下载 | 配置文件 |
+|:---------------------|:---------:|:------:| :---------------------------------------------------------------------------------: | :------:|
+| 车辆检测 | YOLOv3 | 54.5 | [下载链接](https://paddledet.bj.bcebos.com/models/vehicle_yolov3_darknet.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/vehicle/vehicle_yolov3_darknet.yml) |
+
+
+## 车辆检测(Vehicle Detection)
+
+车辆检测的主要应用之一是交通监控。在这样的监控场景中,待检测的车辆多为道路红绿灯柱上的摄像头拍摄所得。
+
+### 1. 模型结构
+
+Backbone为Dacknet53的YOLOv3。
+
+### 2. 训练参数配置
+
+PaddleDetection提供了使用COCO数据集对YOLOv3进行训练的参数配置文件[yolov3_darknet53_270e_coco.yml](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.0/configs/yolov3/yolov3_darknet53_270e_coco.yml),与之相比,在进行车辆检测的模型训练时,我们对以下参数进行了修改:
+
+* num_classes: 6
+* anchors: [[8, 9], [10, 23], [19, 15], [23, 33], [40, 25], [54, 50], [101, 80], [139, 145], [253, 224]]
+* nms/nms_top_k: 400
+* nms/score_threshold: 0.005
+* dataset_dir: dataset/vehicle
+
+### 3. 精度指标
+
+模型在我们内部数据上的精度指标为:
+
+IOU=.50:.05:.95时的AP为 0.545。
+
+IOU=.5时的AP为 0.764。
+
+### 4. 预测
+
+用户可以使用我们训练好的模型进行车辆检测:
+
+```
+export CUDA_VISIBLE_DEVICES=0
+python -u tools/infer.py -c configs/vehicle/vehicle_yolov3_darknet.yml \
+ -o weights=https://paddledet.bj.bcebos.com/models/vehicle_yolov3_darknet.pdparams \
+ --infer_dir configs/vehicle/demo \
+ --draw_threshold 0.2 \
+ --output_dir configs/vehicle/demo/output
+```
+
+预测结果示例:
+
+
+
+
diff --git a/configs/vehicle/demo/001.jpeg b/configs/vehicle/demo/001.jpeg
new file mode 100644
index 0000000..8786db5
Binary files /dev/null and b/configs/vehicle/demo/001.jpeg differ
diff --git a/configs/vehicle/demo/003.png b/configs/vehicle/demo/003.png
new file mode 100644
index 0000000..c01ab4c
Binary files /dev/null and b/configs/vehicle/demo/003.png differ
diff --git a/configs/vehicle/demo/004.png b/configs/vehicle/demo/004.png
new file mode 100644
index 0000000..8907eb8
Binary files /dev/null and b/configs/vehicle/demo/004.png differ
diff --git a/configs/vehicle/demo/005.png b/configs/vehicle/demo/005.png
new file mode 100644
index 0000000..bf17712
Binary files /dev/null and b/configs/vehicle/demo/005.png differ
diff --git a/configs/vehicle/vehicle.json b/configs/vehicle/vehicle.json
new file mode 100644
index 0000000..5863a9a
--- /dev/null
+++ b/configs/vehicle/vehicle.json
@@ -0,0 +1,36 @@
+{
+ "images": [],
+ "annotations": [],
+ "categories": [
+ {
+ "supercategory": "component",
+ "id": 1,
+ "name": "car"
+ },
+ {
+ "supercategory": "component",
+ "id": 2,
+ "name": "truck"
+ },
+ {
+ "supercategory": "component",
+ "id": 3,
+ "name": "bus"
+ },
+ {
+ "supercategory": "component",
+ "id": 4,
+ "name": "motorbike"
+ },
+ {
+ "supercategory": "component",
+ "id": 5,
+ "name": "tricycle"
+ },
+ {
+ "supercategory": "component",
+ "id": 6,
+ "name": "carplate"
+ }
+ ]
+}
diff --git a/configs/vehicle/vehicle_yolov3_darknet.yml b/configs/vehicle/vehicle_yolov3_darknet.yml
new file mode 100644
index 0000000..17f401a
--- /dev/null
+++ b/configs/vehicle/vehicle_yolov3_darknet.yml
@@ -0,0 +1,42 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '../yolov3/_base_/optimizer_270e.yml',
+ '../yolov3/_base_/yolov3_darknet53.yml',
+ '../yolov3/_base_/yolov3_reader.yml',
+]
+
+snapshot_epoch: 5
+weights: https://paddledet.bj.bcebos.com/models/vehicle_yolov3_darknet.pdparams
+
+YOLOv3Head:
+ anchors: [[8, 9], [10, 23], [19, 15],
+ [23, 33], [40, 25], [54, 50],
+ [101, 80], [139, 145], [253, 224]]
+
+BBoxPostProcess:
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 100
+ score_threshold: 0.005
+ nms_threshold: 0.45
+ nms_top_k: 400
+
+num_classes: 6
+
+TrainDataset:
+ !COCODataSet
+ dataset_dir: dataset/vehicle
+ anno_path: annotations/instances_train2017.json
+ image_dir: train2017
+ data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+ !COCODataSet
+ dataset_dir: dataset/vehicle
+ anno_path: annotations/instances_val2017.json
+ image_dir: val2017
+
+TestDataset:
+ !ImageFolder
+ anno_path: configs/vehicle/vehicle.json
diff --git a/configs/yolov3/README.md b/configs/yolov3/README.md
new file mode 100644
index 0000000..e4408c5
--- /dev/null
+++ b/configs/yolov3/README.md
@@ -0,0 +1,70 @@
+# YOLOv3
+
+## Model Zoo
+
+### YOLOv3 on COCO
+
+| 骨架网络 | 输入尺寸 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | 下载 | 配置文件 |
+| :------------------- | :------- | :-----: | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: |
+| DarkNet53(paper) | 608 | 8 | 270e | ---- | 33.0 | - | - |
+| DarkNet53(paper) | 416 | 8 | 270e | ---- | 31.0 | - | - |
+| DarkNet53(paper) | 320 | 8 | 270e | ---- | 28.2 | - | - |
+| DarkNet53 | 608 | 8 | 270e | ---- | 39.0 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_darknet53_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_darknet53_270e_coco.yml) |
+| DarkNet53 | 416 | 8 | 270e | ---- | 37.5 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_darknet53_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_darknet53_270e_coco.yml) |
+| DarkNet53 | 320 | 8 | 270e | ---- | 34.6 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_darknet53_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_darknet53_270e_coco.yml) |
+| ResNet50_vd | 608 | 8 | 270e | ---- | 39.1 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_r50vd_dcn_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_r50vd_dcn_270e_coco.yml) |
+| ResNet50_vd | 416 | 8 | 270e | ---- | 36.6 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_r50vd_dcn_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_r50vd_dcn_270e_coco.yml) |
+| ResNet50_vd | 320 | 8 | 270e | ---- | 33.6 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_r50vd_dcn_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_r50vd_dcn_270e_coco.yml) |
+| ResNet34 | 608 | 8 | 270e | ---- | 36.2 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_r34_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_r34_270e_coco.yml) |
+| ResNet34 | 416 | 8 | 270e | ---- | 34.3 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_r34_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_r34_270e_coco.yml) |
+| ResNet34 | 320 | 8 | 270e | ---- | 31.2 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_r34_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_r34_270e_coco.yml) |
+| MobileNet-V1 | 608 | 8 | 270e | ---- | 29.4 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) |
+| MobileNet-V1 | 416 | 8 | 270e | ---- | 29.3 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) |
+| MobileNet-V1 | 320 | 8 | 270e | ---- | 27.2 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) |
+| MobileNet-V3 | 608 | 8 | 270e | ---- | 31.4 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v3_large_270e_coco.yml) |
+| MobileNet-V3 | 416 | 8 | 270e | ---- | 29.6 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v3_large_270e_coco.yml) |
+| MobileNet-V3 | 320 | 8 | 270e | ---- | 27.1 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v3_large_270e_coco.yml) |
+| MobileNet-V1-SSLD | 608 | 8 | 270e | ---- | 31.0 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_ssld_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v1_ssld_270e_coco.yml) |
+| MobileNet-V1-SSLD | 416 | 8 | 270e | ---- | 30.6 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_ssld_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v1_ssld_270e_coco.yml) |
+| MobileNet-V1-SSLD | 320 | 8 | 270e | ---- | 28.4 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_ssld_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v1_ssld_270e_coco.yml) |
+
+### YOLOv3 on Pasacl VOC
+
+| 骨架网络 | 输入尺寸 | 每张GPU图片个数 | 学习率策略 |推理时间(fps)| Box AP | 下载 | 配置文件 |
+| :----------- | :--: | :-----: | :-----: |:------------: |:----: | :-------: | :----: |
+| MobileNet-V1 | 608 | 8 | 270e | - | 75.2 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml) |
+| MobileNet-V1 | 416 | 8 | 270e | - | 76.2 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml) |
+| MobileNet-V1 | 320 | 8 | 270e | - | 74.3 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml) |
+| MobileNet-V3 | 608 | 8 | 270e | - | 79.6 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v3_large_270e_voc.yml) |
+| MobileNet-V3 | 416 | 8 | 270e | - | 78.6 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v3_large_270e_voc.yml) |
+| MobileNet-V3 | 320 | 8 | 270e | - | 76.4 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v3_large_270e_voc.yml) |
+| MobileNet-V1-SSLD | 608 | 8 | 270e | - | 78.3 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_ssld_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v1_ssld_270e_voc.yml) |
+| MobileNet-V1-SSLD | 416 | 8 | 270e | - | 79.6 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_ssld_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v1_ssld_270e_voc.yml) |
+| MobileNet-V1-SSLD | 320 | 8 | 270e | - | 77.3 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_ssld_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v1_ssld_270e_voc.yml) |
+| MobileNet-V3-SSLD | 608 | 8 | 270e | - | 80.4 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_ssld_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v3_large_ssld_270e_voc.yml) |
+| MobileNet-V3-SSLD | 416 | 8 | 270e | - | 79.2 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_ssld_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v3_large_ssld_270e_voc.yml) |
+| MobileNet-V3-SSLD | 320 | 8 | 270e | - | 77.3 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_ssld_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/yolov3/yolov3_mobilenet_v3_large_ssld_270e_voc.yml) |
+
+**注意:** YOLOv3均使用8GPU训练,训练270个epoch。由于动态图框架整体升级,以下几个PaddleDetection发布的权重模型评估时需要添加--bias字段, 例如
+
+```bash
+# 使用PaddleDetection发布的权重
+CUDA_VISIBLE_DEVICES=0 python tools/eval.py -c configs/yolov3/yolov3_darknet53_270e_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/yolov3_darknet53_270e_coco.pdparams --bias
+```
+主要有:
+
+1.yolov3_darknet53_270e_coco
+
+2.yolov3_r50vd_dcn_270e_coco
+
+## Citations
+```
+@misc{redmon2018yolov3,
+ title={YOLOv3: An Incremental Improvement},
+ author={Joseph Redmon and Ali Farhadi},
+ year={2018},
+ eprint={1804.02767},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
diff --git a/configs/yolov3/_base_/optimizer_270e.yml b/configs/yolov3/_base_/optimizer_270e.yml
new file mode 100644
index 0000000..d92f3df
--- /dev/null
+++ b/configs/yolov3/_base_/optimizer_270e.yml
@@ -0,0 +1,21 @@
+epoch: 270
+
+LearningRate:
+ base_lr: 0.001
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones:
+ - 216
+ - 243
+ - !LinearWarmup
+ start_factor: 0.
+ steps: 4000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0005
+ type: L2
diff --git a/configs/yolov3/_base_/yolov3_darknet53.yml b/configs/yolov3/_base_/yolov3_darknet53.yml
new file mode 100644
index 0000000..1187f6e
--- /dev/null
+++ b/configs/yolov3/_base_/yolov3_darknet53.yml
@@ -0,0 +1,41 @@
+architecture: YOLOv3
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/DarkNet53_pretrained.pdparams
+norm_type: sync_bn
+
+YOLOv3:
+ backbone: DarkNet
+ neck: YOLOv3FPN
+ yolo_head: YOLOv3Head
+ post_process: BBoxPostProcess
+
+DarkNet:
+ depth: 53
+ return_idx: [2, 3, 4]
+
+# use default config
+# YOLOv3FPN:
+
+YOLOv3Head:
+ anchors: [[10, 13], [16, 30], [33, 23],
+ [30, 61], [62, 45], [59, 119],
+ [116, 90], [156, 198], [373, 326]]
+ anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+ loss: YOLOv3Loss
+
+YOLOv3Loss:
+ ignore_thresh: 0.7
+ downsample: [32, 16, 8]
+ label_smooth: false
+
+BBoxPostProcess:
+ decode:
+ name: YOLOBox
+ conf_thresh: 0.005
+ downsample_ratio: 32
+ clip_bbox: true
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 100
+ score_threshold: 0.01
+ nms_threshold: 0.45
+ nms_top_k: 1000
diff --git a/configs/yolov3/_base_/yolov3_mobilenet_v1.yml b/configs/yolov3/_base_/yolov3_mobilenet_v1.yml
new file mode 100644
index 0000000..6452b51
--- /dev/null
+++ b/configs/yolov3/_base_/yolov3_mobilenet_v1.yml
@@ -0,0 +1,43 @@
+architecture: YOLOv3
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV1_pretrained.pdparams
+norm_type: sync_bn
+
+YOLOv3:
+ backbone: MobileNet
+ neck: YOLOv3FPN
+ yolo_head: YOLOv3Head
+ post_process: BBoxPostProcess
+
+MobileNet:
+ scale: 1
+ feature_maps: [4, 6, 13]
+ with_extra_blocks: false
+ extra_block_filters: []
+
+# use default config
+# YOLOv3FPN:
+
+YOLOv3Head:
+ anchors: [[10, 13], [16, 30], [33, 23],
+ [30, 61], [62, 45], [59, 119],
+ [116, 90], [156, 198], [373, 326]]
+ anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+ loss: YOLOv3Loss
+
+YOLOv3Loss:
+ ignore_thresh: 0.7
+ downsample: [32, 16, 8]
+ label_smooth: false
+
+BBoxPostProcess:
+ decode:
+ name: YOLOBox
+ conf_thresh: 0.005
+ downsample_ratio: 32
+ clip_bbox: true
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 100
+ score_threshold: 0.01
+ nms_threshold: 0.45
+ nms_top_k: 1000
diff --git a/configs/yolov3/_base_/yolov3_mobilenet_v3_large.yml b/configs/yolov3/_base_/yolov3_mobilenet_v3_large.yml
new file mode 100644
index 0000000..94b5dea
--- /dev/null
+++ b/configs/yolov3/_base_/yolov3_mobilenet_v3_large.yml
@@ -0,0 +1,44 @@
+architecture: YOLOv3
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_large_x1_0_ssld_pretrained.pdparams
+norm_type: sync_bn
+
+YOLOv3:
+ backbone: MobileNetV3
+ neck: YOLOv3FPN
+ yolo_head: YOLOv3Head
+ post_process: BBoxPostProcess
+
+MobileNetV3:
+ model_name: large
+ scale: 1.
+ with_extra_blocks: false
+ extra_block_filters: []
+ feature_maps: [7, 13, 16]
+
+# use default config
+# YOLOv3FPN:
+
+YOLOv3Head:
+ anchors: [[10, 13], [16, 30], [33, 23],
+ [30, 61], [62, 45], [59, 119],
+ [116, 90], [156, 198], [373, 326]]
+ anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+ loss: YOLOv3Loss
+
+YOLOv3Loss:
+ ignore_thresh: 0.7
+ downsample: [32, 16, 8]
+ label_smooth: false
+
+BBoxPostProcess:
+ decode:
+ name: YOLOBox
+ conf_thresh: 0.005
+ downsample_ratio: 32
+ clip_bbox: true
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 100
+ score_threshold: 0.01
+ nms_threshold: 0.45
+ nms_top_k: 1000
diff --git a/configs/yolov3/_base_/yolov3_mobilenet_v3_small.yml b/configs/yolov3/_base_/yolov3_mobilenet_v3_small.yml
new file mode 100644
index 0000000..f0f144b
--- /dev/null
+++ b/configs/yolov3/_base_/yolov3_mobilenet_v3_small.yml
@@ -0,0 +1,44 @@
+architecture: YOLOv3
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_small_x1_0_ssld_pretrained.pdparams
+norm_type: sync_bn
+
+YOLOv3:
+ backbone: MobileNetV3
+ neck: YOLOv3FPN
+ yolo_head: YOLOv3Head
+ post_process: BBoxPostProcess
+
+MobileNetV3:
+ model_name: small
+ scale: 1.
+ with_extra_blocks: false
+ extra_block_filters: []
+ feature_maps: [4, 9, 12]
+
+# use default config
+# YOLOv3FPN:
+
+YOLOv3Head:
+ anchors: [[10, 13], [16, 30], [33, 23],
+ [30, 61], [62, 45], [59, 119],
+ [116, 90], [156, 198], [373, 326]]
+ anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+ loss: YOLOv3Loss
+
+YOLOv3Loss:
+ ignore_thresh: 0.7
+ downsample: [32, 16, 8]
+ label_smooth: false
+
+BBoxPostProcess:
+ decode:
+ name: YOLOBox
+ conf_thresh: 0.005
+ downsample_ratio: 32
+ clip_bbox: true
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 100
+ score_threshold: 0.01
+ nms_threshold: 0.45
+ nms_top_k: 1000
diff --git a/configs/yolov3/_base_/yolov3_r34.yml b/configs/yolov3/_base_/yolov3_r34.yml
new file mode 100644
index 0000000..c2d1489
--- /dev/null
+++ b/configs/yolov3/_base_/yolov3_r34.yml
@@ -0,0 +1,41 @@
+architecture: YOLOv3
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet34_pretrained.pdparams
+norm_type: sync_bn
+
+YOLOv3:
+ backbone: ResNet
+ neck: YOLOv3FPN
+ yolo_head: YOLOv3Head
+ post_process: BBoxPostProcess
+
+ResNet:
+ depth: 34
+ return_idx: [1, 2, 3]
+ freeze_at: -1
+ freeze_norm: false
+ norm_decay: 0.
+
+YOLOv3Head:
+ anchors: [[10, 13], [16, 30], [33, 23],
+ [30, 61], [62, 45], [59, 119],
+ [116, 90], [156, 198], [373, 326]]
+ anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+ loss: YOLOv3Loss
+
+YOLOv3Loss:
+ ignore_thresh: 0.7
+ downsample: [32, 16, 8]
+ label_smooth: false
+
+BBoxPostProcess:
+ decode:
+ name: YOLOBox
+ conf_thresh: 0.005
+ downsample_ratio: 32
+ clip_bbox: true
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 100
+ score_threshold: 0.01
+ nms_threshold: 0.45
+ nms_top_k: 1000
diff --git a/configs/yolov3/_base_/yolov3_r50vd_dcn.yml b/configs/yolov3/_base_/yolov3_r50vd_dcn.yml
new file mode 100644
index 0000000..0d01148
--- /dev/null
+++ b/configs/yolov3/_base_/yolov3_r50vd_dcn.yml
@@ -0,0 +1,45 @@
+architecture: YOLOv3
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams
+norm_type: sync_bn
+
+YOLOv3:
+ backbone: ResNet
+ neck: YOLOv3FPN
+ yolo_head: YOLOv3Head
+ post_process: BBoxPostProcess
+
+ResNet:
+ depth: 50
+ variant: d
+ return_idx: [1, 2, 3]
+ dcn_v2_stages: [3]
+ freeze_at: -1
+ freeze_norm: false
+ norm_decay: 0.
+
+# YOLOv3FPN:
+
+YOLOv3Head:
+ anchors: [[10, 13], [16, 30], [33, 23],
+ [30, 61], [62, 45], [59, 119],
+ [116, 90], [156, 198], [373, 326]]
+ anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+ loss: YOLOv3Loss
+
+YOLOv3Loss:
+ ignore_thresh: 0.7
+ downsample: [32, 16, 8]
+ label_smooth: false
+
+BBoxPostProcess:
+ decode:
+ name: YOLOBox
+ conf_thresh: 0.005
+ downsample_ratio: 32
+ clip_bbox: true
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 100
+ score_threshold: 0.01
+ nms_threshold: 0.45
+ nms_top_k: 1000
diff --git a/configs/yolov3/_base_/yolov3_reader.yml b/configs/yolov3/_base_/yolov3_reader.yml
new file mode 100644
index 0000000..f0130c1
--- /dev/null
+++ b/configs/yolov3/_base_/yolov3_reader.yml
@@ -0,0 +1,45 @@
+worker_num: 2
+TrainReader:
+ inputs_def:
+ num_max_boxes: 50
+ sample_transforms:
+ - Decode: {}
+ - Mixup: {alpha: 1.5, beta: 1.5}
+ - RandomDistort: {}
+ - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+ - RandomCrop: {}
+ - RandomFlip: {}
+ batch_transforms:
+ - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608], random_size: True, random_interp: True, keep_ratio: False}
+ - NormalizeBox: {}
+ - PadBox: {num_max_boxes: 50}
+ - BboxXYXY2XYWH: {}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - Permute: {}
+ - Gt2YoloTarget: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]], downsample_ratios: [32, 16, 8]}
+ batch_size: 8
+ shuffle: true
+ drop_last: true
+ mixup_epoch: 250
+ use_shared_memory: true
+
+EvalReader:
+ inputs_def:
+ num_max_boxes: 50
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [608, 608], keep_ratio: False, interp: 2}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - Permute: {}
+ batch_size: 1
+ drop_empty: false
+
+TestReader:
+ inputs_def:
+ image_shape: [3, 608, 608]
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [608, 608], keep_ratio: False, interp: 2}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - Permute: {}
+ batch_size: 1
diff --git a/configs/yolov3/yolov3_darknet53_270e_coco.yml b/configs/yolov3/yolov3_darknet53_270e_coco.yml
new file mode 100644
index 0000000..4fbd401
--- /dev/null
+++ b/configs/yolov3/yolov3_darknet53_270e_coco.yml
@@ -0,0 +1,10 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_270e.yml',
+ '_base_/yolov3_darknet53.yml',
+ '_base_/yolov3_reader.yml',
+]
+
+snapshot_epoch: 5
+weights: output/yolov3_darknet53_270e_coco/model_final
diff --git a/configs/yolov3/yolov3_darknet53_270e_voc.yml b/configs/yolov3/yolov3_darknet53_270e_voc.yml
new file mode 100644
index 0000000..e24c01e
--- /dev/null
+++ b/configs/yolov3/yolov3_darknet53_270e_voc.yml
@@ -0,0 +1,14 @@
+_BASE_: [
+ '../datasets/voc.yml',
+ '../runtime.yml',
+ '_base_/optimizer_270e.yml',
+ '_base_/yolov3_darknet53.yml',
+ '_base_/yolov3_reader.yml',
+]
+
+snapshot_epoch: 5
+weights: output/yolov3_darknet53_270e_voc/model_final
+
+EvalReader:
+ batch_transforms:
+ - PadBatch: {pad_gt: True}
diff --git a/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml b/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml
new file mode 100644
index 0000000..b9dd33b
--- /dev/null
+++ b/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml
@@ -0,0 +1,10 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_270e.yml',
+ '_base_/yolov3_mobilenet_v1.yml',
+ '_base_/yolov3_reader.yml',
+]
+
+snapshot_epoch: 5
+weights: output/yolov3_mobilenet_v1_270e_coco/model_final
diff --git a/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml b/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml
new file mode 100644
index 0000000..7b25cd0
--- /dev/null
+++ b/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml
@@ -0,0 +1,22 @@
+_BASE_: [
+ '../datasets/voc.yml',
+ '../runtime.yml',
+ '_base_/optimizer_270e.yml',
+ '_base_/yolov3_mobilenet_v1.yml',
+ '_base_/yolov3_reader.yml',
+]
+
+snapshot_epoch: 5
+weights: output/yolov3_mobilenet_v1_270e_voc/model_final
+
+LearningRate:
+ base_lr: 0.001
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones:
+ - 216
+ - 243
+ - !LinearWarmup
+ start_factor: 0.
+ steps: 1000
diff --git a/configs/yolov3/yolov3_mobilenet_v1_roadsign.yml b/configs/yolov3/yolov3_mobilenet_v1_roadsign.yml
new file mode 100644
index 0000000..d899375
--- /dev/null
+++ b/configs/yolov3/yolov3_mobilenet_v1_roadsign.yml
@@ -0,0 +1,33 @@
+_BASE_: [
+ '../datasets/roadsign_voc.yml',
+ '../runtime.yml',
+ '_base_/yolov3_mobilenet_v1.yml',
+ '_base_/yolov3_reader.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams
+weights: output/yolov3_mobilenet_v1_roadsign/model_final
+
+YOLOv3Loss:
+ ignore_thresh: 0.7
+ label_smooth: true
+
+snapshot_epoch: 2
+epoch: 40
+
+LearningRate:
+ base_lr: 0.0001
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [32, 36]
+ - !LinearWarmup
+ start_factor: 0.3333333333333333
+ steps: 100
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0005
+ type: L2
diff --git a/configs/yolov3/yolov3_mobilenet_v1_ssld_270e_coco.yml b/configs/yolov3/yolov3_mobilenet_v1_ssld_270e_coco.yml
new file mode 100644
index 0000000..10cf816
--- /dev/null
+++ b/configs/yolov3/yolov3_mobilenet_v1_ssld_270e_coco.yml
@@ -0,0 +1,11 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_270e.yml',
+ '_base_/yolov3_mobilenet_v1.yml',
+ '_base_/yolov3_reader.yml',
+]
+
+snapshot_epoch: 5
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV1_ssld_pretrained.pdparams
+weights: output/yolov3_mobilenet_v1_ssld_270e_coco/model_final
diff --git a/configs/yolov3/yolov3_mobilenet_v1_ssld_270e_voc.yml b/configs/yolov3/yolov3_mobilenet_v1_ssld_270e_voc.yml
new file mode 100644
index 0000000..7a3e62f
--- /dev/null
+++ b/configs/yolov3/yolov3_mobilenet_v1_ssld_270e_voc.yml
@@ -0,0 +1,23 @@
+_BASE_: [
+ '../datasets/voc.yml',
+ '../runtime.yml',
+ '_base_/optimizer_270e.yml',
+ '_base_/yolov3_mobilenet_v1.yml',
+ '_base_/yolov3_reader.yml',
+]
+
+snapshot_epoch: 5
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV1_ssld_pretrained.pdparams
+weights: output/yolov3_mobilenet_v1_ssld_270e_voc/model_final
+
+LearningRate:
+ base_lr: 0.001
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones:
+ - 216
+ - 243
+ - !LinearWarmup
+ start_factor: 0.
+ steps: 1000
diff --git a/configs/yolov3/yolov3_mobilenet_v3_large_270e_coco.yml b/configs/yolov3/yolov3_mobilenet_v3_large_270e_coco.yml
new file mode 100644
index 0000000..d1b8af5
--- /dev/null
+++ b/configs/yolov3/yolov3_mobilenet_v3_large_270e_coco.yml
@@ -0,0 +1,10 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_270e.yml',
+ '_base_/yolov3_mobilenet_v3_large.yml',
+ '_base_/yolov3_reader.yml',
+]
+
+snapshot_epoch: 5
+weights: output/yolov3_mobilenet_v3_large_270e_coco/model_final
diff --git a/configs/yolov3/yolov3_mobilenet_v3_large_270e_voc.yml b/configs/yolov3/yolov3_mobilenet_v3_large_270e_voc.yml
new file mode 100644
index 0000000..abf492e
--- /dev/null
+++ b/configs/yolov3/yolov3_mobilenet_v3_large_270e_voc.yml
@@ -0,0 +1,22 @@
+_BASE_: [
+ '../datasets/voc.yml',
+ '../runtime.yml',
+ '_base_/optimizer_270e.yml',
+ '_base_/yolov3_mobilenet_v3_large.yml',
+ '_base_/yolov3_reader.yml',
+]
+
+snapshot_epoch: 5
+weights: output/yolov3_mobilenet_v3_large_270e_voc/model_final
+
+LearningRate:
+ base_lr: 0.001
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones:
+ - 216
+ - 243
+ - !LinearWarmup
+ start_factor: 0.
+ steps: 1000
diff --git a/configs/yolov3/yolov3_mobilenet_v3_large_ssld_270e_voc.yml b/configs/yolov3/yolov3_mobilenet_v3_large_ssld_270e_voc.yml
new file mode 100644
index 0000000..6d183e3
--- /dev/null
+++ b/configs/yolov3/yolov3_mobilenet_v3_large_ssld_270e_voc.yml
@@ -0,0 +1,23 @@
+_BASE_: [
+ '../datasets/voc.yml',
+ '../runtime.yml',
+ '_base_/optimizer_270e.yml',
+ '_base_/yolov3_mobilenet_v3_large.yml',
+ '_base_/yolov3_reader.yml',
+]
+
+snapshot_epoch: 5
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_large_x1_0_ssld_pretrained.pdparams
+weights: output/yolov3_mobilenet_v3_large_ssld_270e_voc/model_final
+
+LearningRate:
+ base_lr: 0.001
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones:
+ - 216
+ - 243
+ - !LinearWarmup
+ start_factor: 0.
+ steps: 1000
diff --git a/configs/yolov3/yolov3_r34_270e_coco.yml b/configs/yolov3/yolov3_r34_270e_coco.yml
new file mode 100644
index 0000000..8653b06
--- /dev/null
+++ b/configs/yolov3/yolov3_r34_270e_coco.yml
@@ -0,0 +1,10 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_270e.yml',
+ '_base_/yolov3_r34.yml',
+ '_base_/yolov3_reader.yml',
+]
+
+snapshot_epoch: 5
+weights: output/yolov3_r34_270e_coco/model_final
diff --git a/configs/yolov3/yolov3_r50vd_dcn_270e_coco.yml b/configs/yolov3/yolov3_r50vd_dcn_270e_coco.yml
new file mode 100644
index 0000000..a07cbdd
--- /dev/null
+++ b/configs/yolov3/yolov3_r50vd_dcn_270e_coco.yml
@@ -0,0 +1,10 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_270e.yml',
+ '_base_/yolov3_r50vd_dcn.yml',
+ '_base_/yolov3_reader.yml',
+]
+
+snapshot_epoch: 5
+weights: output/yolov3_r50vd_dcn_270e_coco/model_final
diff --git a/contrib/VehicleDetection/vehicle_yolov3_darknet.yml b/contrib/VehicleDetection/vehicle_yolov3_darknet.yml
new file mode 100644
index 0000000..825f1c9
--- /dev/null
+++ b/contrib/VehicleDetection/vehicle_yolov3_darknet.yml
@@ -0,0 +1,10 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_270e.yml',
+ '_base_/yolov3_darknet53.yml',
+ '_base_/yolov3_reader.yml',
+]
+
+snapshot_epoch: 5
+weights: output/yolov3_darknet53_270e_coco/model_final
\ No newline at end of file
diff --git a/dataset/coco/download_coco.py b/dataset/coco/download_coco.py
new file mode 100644
index 0000000..47659fa
--- /dev/null
+++ b/dataset/coco/download_coco.py
@@ -0,0 +1,28 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import os.path as osp
+import logging
+# add python path of PadleDetection to sys.path
+parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3)))
+if parent_path not in sys.path:
+ sys.path.append(parent_path)
+
+from ppdet.utils.download import download_dataset
+
+logging.basicConfig(level=logging.INFO)
+
+download_path = osp.split(osp.realpath(sys.argv[0]))[0]
+download_dataset(download_path, 'coco')
diff --git a/dataset/roadsign_voc/download_roadsign_voc.py b/dataset/roadsign_voc/download_roadsign_voc.py
new file mode 100644
index 0000000..3cb517d
--- /dev/null
+++ b/dataset/roadsign_voc/download_roadsign_voc.py
@@ -0,0 +1,28 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import os.path as osp
+import logging
+# add python path of PadleDetection to sys.path
+parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3)))
+if parent_path not in sys.path:
+ sys.path.append(parent_path)
+
+from ppdet.utils.download import download_dataset
+
+logging.basicConfig(level=logging.INFO)
+
+download_path = osp.split(osp.realpath(sys.argv[0]))[0]
+download_dataset(download_path, 'roadsign_voc')
diff --git a/dataset/roadsign_voc/label_list.txt b/dataset/roadsign_voc/label_list.txt
new file mode 100644
index 0000000..1be460f
--- /dev/null
+++ b/dataset/roadsign_voc/label_list.txt
@@ -0,0 +1,4 @@
+speedlimit
+crosswalk
+trafficlight
+stop
\ No newline at end of file
diff --git a/dataset/voc/create_list.py b/dataset/voc/create_list.py
new file mode 100644
index 0000000..5ab8042
--- /dev/null
+++ b/dataset/voc/create_list.py
@@ -0,0 +1,28 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import os.path as osp
+import logging
+# add python path of PadleDetection to sys.path
+parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3)))
+if parent_path not in sys.path:
+ sys.path.append(parent_path)
+
+from ppdet.utils.download import create_voc_list
+
+logging.basicConfig(level=logging.INFO)
+
+voc_path = osp.split(osp.realpath(sys.argv[0]))[0]
+create_voc_list(voc_path)
diff --git a/dataset/voc/download_voc.py b/dataset/voc/download_voc.py
new file mode 100644
index 0000000..e4c449c
--- /dev/null
+++ b/dataset/voc/download_voc.py
@@ -0,0 +1,28 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import os.path as osp
+import logging
+# add python path of PadleDetection to sys.path
+parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3)))
+if parent_path not in sys.path:
+ sys.path.append(parent_path)
+
+from ppdet.utils.download import download_dataset
+
+logging.basicConfig(level=logging.INFO)
+
+download_path = osp.split(osp.realpath(sys.argv[0]))[0]
+download_dataset(download_path, 'voc')
diff --git a/dataset/voc/label_list.txt b/dataset/voc/label_list.txt
new file mode 100644
index 0000000..8420ab3
--- /dev/null
+++ b/dataset/voc/label_list.txt
@@ -0,0 +1,20 @@
+aeroplane
+bicycle
+bird
+boat
+bottle
+bus
+car
+cat
+chair
+cow
+diningtable
+dog
+horse
+motorbike
+person
+pottedplant
+sheep
+sofa
+train
+tvmonitor
diff --git a/dataset/wider_face/download_wider_face.sh b/dataset/wider_face/download_wider_face.sh
new file mode 100644
index 0000000..59a2054
--- /dev/null
+++ b/dataset/wider_face/download_wider_face.sh
@@ -0,0 +1,21 @@
+# All rights `PaddleDetection` reserved
+# References:
+# @inproceedings{yang2016wider,
+# Author = {Yang, Shuo and Luo, Ping and Loy, Chen Change and Tang, Xiaoou},
+# Booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+# Title = {WIDER FACE: A Face Detection Benchmark},
+# Year = {2016}}
+
+DIR="$( cd "$(dirname "$0")" ; pwd -P )"
+cd "$DIR"
+
+# Download the data.
+echo "Downloading..."
+wget https://dataset.bj.bcebos.com/wider_face/WIDER_train.zip
+wget https://dataset.bj.bcebos.com/wider_face/WIDER_val.zip
+wget https://dataset.bj.bcebos.com/wider_face/wider_face_split.zip
+# Extract the data.
+echo "Extracting..."
+unzip -q WIDER_train.zip
+unzip -q WIDER_val.zip
+unzip -q wider_face_split.zip
diff --git a/demo/00000001.jpg b/demo/00000001.jpg
new file mode 100644
index 0000000..b8d24aa
Binary files /dev/null and b/demo/00000001.jpg differ
diff --git a/demo/00000002.jpg b/demo/00000002.jpg
new file mode 100644
index 0000000..3533a1d
Binary files /dev/null and b/demo/00000002.jpg differ
diff --git a/demo/00000003.jpg b/demo/00000003.jpg
new file mode 100644
index 0000000..07ccdb3
Binary files /dev/null and b/demo/00000003.jpg differ
diff --git a/demo/00000004.jpg b/demo/00000004.jpg
new file mode 100644
index 0000000..dc75ddf
Binary files /dev/null and b/demo/00000004.jpg differ
diff --git a/demo/00000005.jpg b/demo/00000005.jpg
new file mode 100644
index 0000000..0304899
Binary files /dev/null and b/demo/00000005.jpg differ
diff --git a/demo/00000006.jpg b/demo/00000006.jpg
new file mode 100644
index 0000000..3e9885c
Binary files /dev/null and b/demo/00000006.jpg differ
diff --git a/demo/00000007.jpg b/demo/00000007.jpg
new file mode 100644
index 0000000..7607eee
Binary files /dev/null and b/demo/00000007.jpg differ
diff --git a/demo/output/00000001.jpg b/demo/output/00000001.jpg
new file mode 100644
index 0000000..a769387
Binary files /dev/null and b/demo/output/00000001.jpg differ
diff --git a/demo/output/00000002.jpg b/demo/output/00000002.jpg
new file mode 100644
index 0000000..454f227
Binary files /dev/null and b/demo/output/00000002.jpg differ
diff --git a/demo/output/00000003.jpg b/demo/output/00000003.jpg
new file mode 100644
index 0000000..35e38c3
Binary files /dev/null and b/demo/output/00000003.jpg differ
diff --git a/demo/output/00000004.jpg b/demo/output/00000004.jpg
new file mode 100644
index 0000000..5cdd44d
Binary files /dev/null and b/demo/output/00000004.jpg differ
diff --git a/demo/output/00000005.jpg b/demo/output/00000005.jpg
new file mode 100644
index 0000000..75693b4
Binary files /dev/null and b/demo/output/00000005.jpg differ
diff --git a/demo/output/00000006.jpg b/demo/output/00000006.jpg
new file mode 100644
index 0000000..9dff9cd
Binary files /dev/null and b/demo/output/00000006.jpg differ
diff --git a/demo/output/00000007.jpg b/demo/output/00000007.jpg
new file mode 100644
index 0000000..57d249f
Binary files /dev/null and b/demo/output/00000007.jpg differ
diff --git a/deploy/BENCHMARK_INFER.md b/deploy/BENCHMARK_INFER.md
new file mode 100644
index 0000000..988cf30
--- /dev/null
+++ b/deploy/BENCHMARK_INFER.md
@@ -0,0 +1,60 @@
+# 推理Benchmark
+
+## 一、环境准备
+- 1、测试环境:
+ - CUDA 10.1
+ - CUDNN 7.6
+ - TensorRT-6.0.1
+ - PaddlePaddle v2.0.1
+ - GPU分别为: Tesla V100和GTX 1080Ti和Jetson AGX Xavier
+- 2、测试方式:
+ - 为了方便比较不同模型的推理速度,输入采用同样大小的图片,为 3x640x640,采用 `demo/000000014439_640x640.jpg` 图片。
+ - Batch Size=1
+ - 去掉前100轮warmup时间,测试100轮的平均时间,单位ms/image,包括网络计算时间、数据拷贝至CPU的时间。
+ - 采用Fluid C++预测引擎: 包含Fluid C++预测、Fluid-TensorRT预测,下面同时测试了Float32 (FP32) 和Float16 (FP16)的推理速度。
+
+**注意:** TensorRT中固定尺寸和动态尺寸区别请参考文档[TENSOR教程](TENSOR_RT.md)。由于固定尺寸下对两阶段模型支持不完善,所以faster rcnn模型采用动态尺寸测试。固定尺寸和动态尺寸支持融合的OP不完全一样,因此同一个模型在固定尺寸和动态尺寸下测试的性能可能会有一点差异。
+
+## 二、推理速度
+
+### 1、Linux系统
+#### (1)Tesla V100
+
+| 模型 | backbone | 是否固定尺寸 | 入网尺寸 | paddle_inference | trt_fp32 | trt_fp16 |
+|-------------------------------|--------------|--------|----------|------------------|----------|----------|
+| Faster RCNN FPN | ResNet50 | 否 | 640x640 | 27.99 | 26.15 | 21.92 |
+| Faster RCNN FPN | ResNet50 | 否 | 800x1312 | 32.49 | 25.54 | 21.70 |
+| YOLOv3 | Mobilenet\_v1 | 是 | 608x608 | 9.74 | 8.61 | 6.28 |
+| YOLOv3 | Darknet53 | 是 | 608x608 | 17.84 | 15.43 | 9.86 |
+| PPYOLO | ResNet50 | 是 | 608x608 | 20.77 | 18.40 | 13.53 |
+| SSD | Mobilenet\_v1 | 是 | 300x300 | 5.17 | 4.43 | 4.29 |
+| TTFNet | Darknet53 | 是 | 512x512 | 10.14 | 8.71 | 5.55 |
+| FCOS | ResNet50 | 是 | 640x640 | 35.47 | 35.02 | 34.24 |
+
+
+#### (2)Jetson AGX Xavier
+
+| 模型 | backbone | 是否固定尺寸 | 入网尺寸 | paddle_inference | trt_fp32 | trt_fp16 |
+|-------------------------------|--------------|--------|----------|------------------|----------|----------|
+| Faster RCNN FPN | ResNet50 | 否 | 640x640 | 169.45 | 158.92 | 119.25 |
+| Faster RCNN FPN | ResNet50 | 否 | 800x1312 | 228.07 | 156.39 | 117.03 |
+| YOLOv3 | Mobilenet\_v1 | 是 | 608x608 | 48.76 | 43.83 | 18.41 |
+| YOLOv3 | Darknet53 | 是 | 608x608 | 121.61 | 110.30 | 42.38 |
+| PPYOLO | ResNet50 | 是 | 608x608 | 111.80 | 99.40 | 48.05 |
+| SSD | Mobilenet\_v1 | 是 | 300x300 | 10.52 | 8.84 | 8.77 |
+| TTFNet | Darknet53 | 是 | 512x512 | 73.77 | 64.03 | 31.46 |
+| FCOS | ResNet50 | 是 | 640x640 | 217.11 | 214.38 | 205.78 |
+
+### 2、Windows系统
+#### (1)GTX 1080Ti
+
+| 模型 | backbone | 是否固定尺寸 | 入网尺寸 | paddle_inference | trt_fp32 | trt_fp16 |
+|-------------------------------|--------------|--------|----------|------------------|----------|----------|
+| Faster RCNN FPN | ResNet50 | 否 | 640x640 | 50.74 | 57.17 | 62.08 |
+| Faster RCNN FPN | ResNet50 | 否 | 800x1312 | 50.31 | 57.61 | 62.05 |
+| YOLOv3 | Mobilenet\_v1 | 是 | 608x608 | 14.51 | 11.23 | 11.13 |
+| YOLOv3 | Darknet53 | 是 | 608x608 | 30.26 | 23.92 | 24.02 |
+| PPYOLO | ResNet50 | 是 | 608x608 | 38.06 | 31.40 | 31.94 |
+| SSD | Mobilenet\_v1 | 是 | 300x300 | 16.47 | 13.87 | 13.76 |
+| TTFNet | Darknet53 | 是 | 512x512 | 21.83 | 17.14 | 17.09 |
+| FCOS | ResNet50 | 是 | 640x640 | 71.88 | 69.93 | 69.52 |
diff --git a/deploy/EXPORT_MODEL.md b/deploy/EXPORT_MODEL.md
new file mode 100644
index 0000000..50f50cb
--- /dev/null
+++ b/deploy/EXPORT_MODEL.md
@@ -0,0 +1,55 @@
+# 模型导出教程
+
+## 一、模型导出
+本章节介绍如何使用`tools/export_model.py`脚本导出模型。
+
+### 1、导出模输入输出说明
+- 输入变量以及输入形状如下:
+
+ | 输入名称 | 输入形状 | 表示含义 |
+ | :---------: | ----------- | ---------- |
+ | image | [None, 3, H, W] | 输入网络的图像,None表示batch维度,如果输入图像大小为变长,则H,W为None |
+ | im_shape | [None, 2] | 图像经过resize后的大小,表示为H,W, None表示batch维度 |
+ | scale_factor | [None, 2] | 输入图像大小比真实图像大小,表示为scale_y, scale_x |
+
+ **注意** : 具体预处理方式可参考配置文件中TestReader部分。
+
+
+- 动转静导出模型输出统一为:
+
+ - bbox, NMS的输出,形状为[N, 6], 其中N为预测框的个数,6为[class_id, score, x1, y1, x2, y2]。
+ - bbox\_num, 每张图片对应预测框的个数,例如batch_size为2,输出为[N1, N2], 表示第一张图包含N1个预测框,第二张图包含N2个预测框,并且预测框的总个数和NMS输出的第一维N相同
+ - mask,如果网络中包含mask,则会输出mask分支
+
+ **注意**模型动转静导出不支持模型结构中包含numpy相关操作的情况。
+
+
+### 2、启动参数说明
+
+| FLAG | 用途 | 默认值 | 备注 |
+|:--------------:|:--------------:|:------------:|:-----------------------------------------:|
+| -c | 指定配置文件 | None | |
+| --output_dir | 模型保存路径 | `./output_inference` | 模型默认保存在`output/配置文件名/`路径下 |
+
+### 3、使用示例
+
+使用训练得到的模型进行试用,脚本如下
+
+```bash
+# 导出YOLOv3模型
+python tools/export_model.py -c configs/yolov3/yolov3_darknet53_270e_coco.yml --output_dir=./inference_model \
+ -o weights=weights/yolov3_darknet53_270e_coco.pdparams
+```
+
+预测模型会导出到`inference_model/yolov3_darknet53_270e_coco`目录下,分别为`infer_cfg.yml`, `model.pdiparams`, `model.pdiparams.info`, `model.pdmodel`。
+
+
+### 4、设置导出模型的输入大小
+
+使用Fluid-TensorRT进行预测时,由于<=TensorRT 5.1的版本仅支持定长输入,保存模型的`data`层的图片大小需要和实际输入图片大小一致。而Fluid C++预测引擎没有此限制。设置TestReader中的`image_shape`可以修改保存模型中的输入图片大小。示例如下:
+
+```bash
+# 导出YOLOv3模型,输入是3x640x640
+python tools/export_model.py -c configs/yolov3/yolov3_darknet53_270e_coco.yml --output_dir=./inference_model \
+ -o weights=weights/yolov3_darknet53_270e_coco.pdparams TestReader.inputs_def.image_shape=[3,640,640]
+```
diff --git a/deploy/README.md b/deploy/README.md
new file mode 100644
index 0000000..b026ded
--- /dev/null
+++ b/deploy/README.md
@@ -0,0 +1,80 @@
+# PaddleDetection 预测部署
+训练得到一个满足要求的模型后,如果想要将该模型部署到已选择的平台上,需要通过`tools/export_model.py`将模型导出预测部署的模型和配置文件。
+并在同一文件夹下导出预测时使用的配置文件,配置文件名为`infer_cfg.yml`。
+
+## 1、`PaddleDetection`目前支持的部署方式按照部署设备可以分为:
+- 在本机`python`语言部署,支持在有`python paddle`(支持`CPU`、`GPU`)环境下部署,有两种方式:
+ - 使用`tools/infer.py`,此种方式依赖`PaddleDetection`代码库。
+ - 将模型导出,使用`deploy/python/infer.py`,此种方式不依赖`PaddleDetection`代码库,可以单个`python`文件部署。
+- 在本机`C++`语言使用`paddle inference`预测库部署,支持在`Linux`和`Windows`系统下部署。请参考文档[C++部署](cpp/README.md)。
+- 在服务器端以服务形式部署,使用[PaddleServing](./serving/README.md)部署。
+- 在手机移动端部署,使用[Paddle-Lite](https://github.com/PaddlePaddle/Paddle-Lite) 在手机移动端部署。
+ 常见模型部署Demo请参考[Paddle-Lite-Demo](https://github.com/PaddlePaddle/Paddle-Lite-Demo) 。
+- `NV Jetson`嵌入式设备上部署
+- `TensorRT`加速请参考文档[TensorRT预测部署教程](TENSOR_RT.md)
+
+## 2、模型导出
+使用`tools/export_model.py`脚本导出模型已经部署时使用的配置文件,配置文件名字为`infer_cfg.yml`。模型导出脚本如下:
+```bash
+# 导出YOLOv3模型
+python tools/export_model.py -c configs/yolov3/yolov3_darknet53_270e_coco.yml -o weights=weights/yolov3_darknet53_270e_coco.pdparams
+```
+预测模型会导出到`output_inference/yolov3_darknet53_270e_coco`目录下,分别为`infer_cfg.yml`, `model.pdiparams`, `model.pdiparams.info`, `model.pdmodel`。
+
+如果需要导出`PaddleServing`格式的模型,需要设置`export_serving_model=True`:
+```buildoutcfg
+python tools/export_model.py -c configs/yolov3/yolov3_darknet53_270e_coco.yml -o weights=weights/yolov3_darknet53_270e_coco.pdparams --export_serving_model=True
+```
+预测模型会导出到`output_inference/yolov3_darknet53_270e_coco`目录下,分别为`infer_cfg.yml`, `model.pdiparams`, `model.pdiparams.info`, `model.pdmodel`, `serving_client/`文件夹, `serving_server/`文件夹。
+
+模型导出具体请参考文档[PaddleDetection模型导出教程](EXPORT_MODEL.md)。
+
+## 3、如何选择部署时依赖库的版本
+
+### (1)CUDA、cuDNN、TensorRT版本选择
+由于CUDA、cuDNN、TENSORRT不一定都是向前兼容的,需要使用与编译Paddle预测库使用的环境完全一致的环境进行部署。
+
+### (2)部署时预测库版本、预测引擎版本选择
+
+- Linux、Windows平台下C++部署,需要使用Paddle预测库进行部署。
+ (1)Paddle官网提供在不同平台、不同环境下编译好的预测库,您可以直接使用,请在这里[Paddle预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/05_inference_deployment/inference/build_and_install_lib_cn.html) 选择。
+ (2)如果您将要部署的平台环境,Paddle官网上没有提供已编译好的预测库,您可以自行编译,编译过程请参考[Paddle源码编译](https://www.paddlepaddle.org.cn/documentation/docs/zh/install/compile/linux-compile.html)。
+
+**注意:** Paddle预测库版本需要>=2.0
+
+- Python语言部署,需要在对应平台上安装Paddle Python包。如果Paddle官网上没有提供该平台下的Paddle Python包,您可以自行编译,编译过程请参考[Paddle源码编译](https://www.paddlepaddle.org.cn/documentation/docs/zh/install/compile/linux-compile.html)。
+
+- PaddleServing部署
+ PaddleServing 0.4.0是基于Paddle 1.8.4开发,PaddleServing 0.5.0是基于Paddle2.0开发。
+
+- Paddle-Lite部署
+ Paddle-Lite支持OP列表请参考:[Paddle-Lite支持的OP列表](https://paddle-lite.readthedocs.io/zh/latest/source_compile/library.html) ,请跟进所部署模型中使用到的op选择Paddle-Lite版本。
+
+- NV Jetson部署
+ Paddle官网提供在NV Jetson平台上已经编译好的预测库,[Paddle NV Jetson预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/05_inference_deployment/inference/build_and_install_lib_cn.html) 。
+ 若列表中没有您需要的预测库,您可以在您的平台上自行编译,编译过程请参考[Paddle源码编译](https://www.paddlepaddle.org.cn/documentation/docs/zh/install/compile/linux-compile.html)。
+
+
+## 4、部署
+- C++部署,先使用跨平台编译工具`CMake`根据`CMakeLists.txt`生成`Makefile`,支持`Windows、Linux、NV Jetson`平台,然后进行编译产出可执行文件。可以直接使用`cpp/scripts/build.sh`脚本编译:
+```buildoutcfg
+cd cpp
+sh scripts/build.sh
+```
+
+- Python部署,可以使用使用`tools/infer.py`(以来PaddleDetection源码)部署,或者使用`deploy/python/infer.py`单文件部署
+
+- PaddleServing部署请参考,[PaddleServing部署](./serving/README.md)部署。
+
+- 手机移动端部署,请参考[Paddle-Lite-Demo](https://github.com/PaddlePaddle/Paddle-Lite-Demo)部署。
+
+
+## 5、常见问题QA
+- 1、`Paddle 1.8.4`训练的模型,可以用`Paddle2.0`部署吗?
+ Paddle 2.0是兼容Paddle 1.8.4的,因此是可以的。但是部分模型(如SOLOv2)使用到了Paddle 2.0中新增OP,这类模型不可以。
+
+- 2、Windows编译时,预测库是VS2015编译的,选择VS2017或VS2019会有问题吗?
+ 关于VS兼容性问题请参考:[C++Visual Studio 2015、2017和2019之间的二进制兼容性](https://docs.microsoft.com/zh-cn/cpp/porting/binary-compat-2015-2017?view=msvc-160)
+
+- 3、cuDNN 8.0.4连续预测会发生内存泄漏吗?
+ 经QA测试,发现cuDNN 8系列连续预测时都有内存泄漏问题,且cuDNN 8性能差于cuDNN 7,推荐使用CUDA + cuDNN7.6.4的方式进行部署。
diff --git a/deploy/TENSOR_RT.md b/deploy/TENSOR_RT.md
new file mode 100644
index 0000000..9d97cf2
--- /dev/null
+++ b/deploy/TENSOR_RT.md
@@ -0,0 +1,93 @@
+# TensorRT预测部署教程
+TensorRT是NVIDIA提出的用于统一模型部署的加速库,可以应用于V100、JETSON Xavier等硬件,它可以极大提高预测速度。Paddle TensorRT教程请参考文档[使用Paddle-TensorRT库预测](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_trt.html#)
+
+## 1. 安装PaddleInference预测库
+- Python安装包,请从[这里](https://www.paddlepaddle.org.cn/documentation/docs/zh/install/Tables.html#whl-release) 下载带有tensorrt的安装包进行安装
+
+- CPP预测库,请从[这里](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/05_inference_deployment/inference/build_and_install_lib_cn.html) 下载带有TensorRT编译的预测库
+
+- 如果Python和CPP官网没有提供已编译好的安装包或预测库,请参考[源码安装](https://www.paddlepaddle.org.cn/documentation/docs/zh/install/compile/linux-compile.html) 自行编译
+
+注意,您的机器上TensorRT的版本需要跟您使用的预测库中TensorRT版本保持一致。
+
+## 2. 导出模型
+模型导出具体请参考文档[PaddleDetection模型导出教程](../EXPORT_MODEL.md)。
+
+## 3. 开启TensorRT加速
+### 3.1 配置TensorRT
+在使用Paddle预测库构建预测器配置config时,打开TensorRT引擎就可以了:
+
+```
+config->EnableUseGpu(100, 0); // 初始化100M显存,使用GPU ID为0
+config->GpuDeviceId(); // 返回正在使用的GPU ID
+// 开启TensorRT预测,可提升GPU预测性能,需要使用带TensorRT的预测库
+config->EnableTensorRtEngine(1 << 20 /*workspace_size*/,
+ batch_size /*max_batch_size*/,
+ 3 /*min_subgraph_size*/,
+ AnalysisConfig::Precision::kFloat32 /*precision*/,
+ false /*use_static*/,
+ false /*use_calib_mode*/);
+
+```
+
+### 3.2 TensorRT固定尺寸预测
+TensorRT版本<=5时,使用TensorRT预测时,只支持固定尺寸输入。
+
+在导出模型时指定模型输入尺寸,设置`TestReader.inputs_def.image_shape=[3,640,640]`,具体请参考[PaddleDetection模型导出教程](../EXPORT_MODEL.md) 。
+
+`TestReader.inputs_def.image_shape`设置的是输入TensorRT引擎的数据尺寸(在像FasterRCNN中,`TestReader.inputs_def.image_shape`指定的是在`Pad`操作之前的图像数据尺寸)。
+
+可以通过[visualdl](https://www.paddlepaddle.org.cn/paddle/visualdl/demo/graph) 打开`model.pdmodel`文件,查看输入的第一个Tensor尺寸是否是固定的,如果不指定,尺寸会用`?`表示,如下图所示:
+
+
+同时需要将图像预处理后的尺寸与设置车模型输入尺寸保持一致,需要设置`infer_cfg.yml`配置文件中`Resize OP`的`target_size`参数和`keep_ratio`参数。
+
+注意:由于TesnorRT不支持在batch维度进行slice操作,Faster RCNN 和 Mask RCNN使用固定尺寸输入预测会报错,这两个模型请使用动态尺寸输入。
+
+以`YOLOv3`为例,使用动态尺寸输入预测:
+```
+python python/infer.py --model_dir=../inference_model/yolov3_darknet53_270e_coco/ --image_file=../demo/000000014439_640x640.jpg --use_gpu=True --run_mode=trt_fp32 --run_benchmark=True
+```
+
+### 3.3 TensorRT动态尺寸预测
+
+TensorRT版本>=6时,使用TensorRT预测时,可以支持动态尺寸输入。
+Paddle预测库关于动态尺寸输入请查看[Paddle CPP预测](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/05_inference_deployment/inference/native_infer.html) 的`SetTRTDynamicShapeInfo`函数说明。
+
+`python/infer.py`设置动态尺寸输入参数说明:
+
+- use_dynamic_shape 用于设定TensorRT的输入尺寸是否是动态尺寸,默认值:False
+
+- trt_min_shape 用于设定TensorRT的输入图像height、width中的最小尺寸,默认值:1
+
+- trt_max_shape 用于设定TensorRT的输入图像height、width中的最大尺寸,默认值:1280
+
+- trt_opt_shape 用于设定TensorRT的输入图像height、width中的最优尺寸,默认值:640
+
+**注意:`TensorRT`中动态尺寸设置是4维的,这里只设置输入图像的尺寸。**
+
+以`Faster RCNN`为例,使用动态尺寸输入预测:
+```
+python python/infer.py --model_dir=../inference_model/faster_rcnn_r50_fpn_1x_coco/ --image_file=../demo/000000014439.jpg --use_gpu=True --run_mode=trt_fp16 --run_benchmark=True --use_dynamic_shape=True --trt_max_shape=1280 --trt_min_shape=800 --trt_opt_shape=960
+```
+
+## 4、常见问题QA
+**Q:** 提示没有`tensorrt_op`
+**A:** 请检查是否使用带有TensorRT的Paddle Python包或预测库。
+
+**Q:** 提示`op out of memory`
+**A:** 检查GPU是否是别人也在使用,请尝试使用空闲GPU
+
+**Q:** 提示`some trt inputs dynamic shape info not set`
+**A:** 这是由于`TensorRT`会把网络结果划分成多个子图,我们只设置了输入数据的动态尺寸,划分的其他子图的输入并未设置动态尺寸。有两个解决方法:
+
+- 方法一:通过增大`min_subgraph_size`,跳过对这些子图的优化。根据提示,设置min_subgraph_size大于并未设置动态尺寸输入的子图中OP个数即可。
+`min_subgraph_size`的意思是,在加载TensorRT引擎的时候,大于`min_subgraph_size`的OP才会被优化,并且这些OP是连续的且是TensorRT可以优化的。
+
+- 方法二:找到子图的这些输入,按照上面方式也设置子图的输入动态尺寸。
+
+**Q:** 如何打开日志
+**A:** 预测库默认是打开日志的,只要注释掉`config.disable_glog_info()`就可以打开日志
+
+**Q:** 开启TensorRT,预测时提示Slice on batch axis is not supported in TensorRT
+**A:** 请尝试使用动态尺寸输入
diff --git a/deploy/cpp/CMakeLists.txt b/deploy/cpp/CMakeLists.txt
new file mode 100644
index 0000000..0bc0be9
--- /dev/null
+++ b/deploy/cpp/CMakeLists.txt
@@ -0,0 +1,241 @@
+cmake_minimum_required(VERSION 3.0)
+project(PaddleObjectDetector CXX C)
+
+option(WITH_MKL "Compile demo with MKL/OpenBlas support,defaultuseMKL." ON)
+option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." ON)
+option(WITH_TENSORRT "Compile demo with TensorRT." OFF)
+
+
+SET(PADDLE_DIR "" CACHE PATH "Location of libraries")
+SET(PADDLE_LIB_NAME "" CACHE STRING "libpaddle_inference")
+SET(OPENCV_DIR "" CACHE PATH "Location of libraries")
+SET(CUDA_LIB "" CACHE PATH "Location of libraries")
+SET(CUDNN_LIB "" CACHE PATH "Location of libraries")
+SET(TENSORRT_INC_DIR "" CACHE PATH "Compile demo with TensorRT")
+SET(TENSORRT_LIB_DIR "" CACHE PATH "Compile demo with TensorRT")
+
+include(cmake/yaml-cpp.cmake)
+
+include_directories("${CMAKE_SOURCE_DIR}/")
+include_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/src/ext-yaml-cpp/include")
+link_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/lib")
+
+macro(safe_set_static_flag)
+ foreach(flag_var
+ CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
+ CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
+ if(${flag_var} MATCHES "/MD")
+ string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
+ endif(${flag_var} MATCHES "/MD")
+ endforeach(flag_var)
+endmacro()
+
+if (WITH_MKL)
+ ADD_DEFINITIONS(-DUSE_MKL)
+endif()
+
+if (NOT DEFINED PADDLE_DIR OR ${PADDLE_DIR} STREQUAL "")
+ message(FATAL_ERROR "please set PADDLE_DIR with -DPADDLE_DIR=/path/paddle_influence_dir")
+endif()
+message("PADDLE_DIR IS:"${PADDLE_DIR})
+
+if (NOT DEFINED OPENCV_DIR OR ${OPENCV_DIR} STREQUAL "")
+ message(FATAL_ERROR "please set OPENCV_DIR with -DOPENCV_DIR=/path/opencv")
+endif()
+
+include_directories("${CMAKE_SOURCE_DIR}/")
+include_directories("${PADDLE_DIR}/")
+include_directories("${PADDLE_DIR}/third_party/install/protobuf/include")
+include_directories("${PADDLE_DIR}/third_party/install/glog/include")
+include_directories("${PADDLE_DIR}/third_party/install/gflags/include")
+include_directories("${PADDLE_DIR}/third_party/install/xxhash/include")
+if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/include")
+ include_directories("${PADDLE_DIR}/third_party/install/snappy/include")
+endif()
+if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/include")
+ include_directories("${PADDLE_DIR}/third_party/install/snappystream/include")
+endif()
+include_directories("${PADDLE_DIR}/third_party/boost")
+include_directories("${PADDLE_DIR}/third_party/eigen3")
+
+if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib")
+ link_directories("${PADDLE_DIR}/third_party/install/snappy/lib")
+endif()
+if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
+ link_directories("${PADDLE_DIR}/third_party/install/snappystream/lib")
+endif()
+
+link_directories("${PADDLE_DIR}/third_party/install/protobuf/lib")
+link_directories("${PADDLE_DIR}/third_party/install/glog/lib")
+link_directories("${PADDLE_DIR}/third_party/install/gflags/lib")
+link_directories("${PADDLE_DIR}/third_party/install/xxhash/lib")
+link_directories("${PADDLE_DIR}/paddle/lib/")
+link_directories("${CMAKE_CURRENT_BINARY_DIR}")
+
+
+
+if (WIN32)
+ include_directories("${PADDLE_DIR}/paddle/fluid/inference")
+ include_directories("${PADDLE_DIR}/paddle/include")
+ link_directories("${PADDLE_DIR}/paddle/fluid/inference")
+ find_package(OpenCV REQUIRED PATHS ${OPENCV_DIR}/build/ NO_DEFAULT_PATH)
+
+else ()
+ find_package(OpenCV REQUIRED PATHS ${OPENCV_DIR}/share/OpenCV NO_DEFAULT_PATH)
+ include_directories("${PADDLE_DIR}/paddle/include")
+ link_directories("${PADDLE_DIR}/paddle/lib")
+endif ()
+include_directories(${OpenCV_INCLUDE_DIRS})
+
+if (WIN32)
+ add_definitions("/DGOOGLE_GLOG_DLL_DECL=")
+ set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd")
+ set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT")
+ set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd")
+ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT")
+else()
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -o2 -fopenmp -std=c++11")
+ set(CMAKE_STATIC_LIBRARY_PREFIX "")
+endif()
+
+# TODO let users define cuda lib path
+if (WITH_GPU)
+ if (NOT DEFINED CUDA_LIB OR ${CUDA_LIB} STREQUAL "")
+ message(FATAL_ERROR "please set CUDA_LIB with -DCUDA_LIB=/path/cuda-8.0/lib64")
+ endif()
+ if (NOT WIN32)
+ if (NOT DEFINED CUDNN_LIB)
+ message(FATAL_ERROR "please set CUDNN_LIB with -DCUDNN_LIB=/path/cudnn_v7.4/cuda/lib64")
+ endif()
+ endif(NOT WIN32)
+endif()
+
+
+if (NOT WIN32)
+ if (WITH_TENSORRT AND WITH_GPU)
+ include_directories("${TENSORRT_INC_DIR}/")
+ link_directories("${TENSORRT_LIB_DIR}/")
+ endif()
+endif(NOT WIN32)
+
+if (NOT WIN32)
+ set(NGRAPH_PATH "${PADDLE_DIR}/third_party/install/ngraph")
+ if(EXISTS ${NGRAPH_PATH})
+ include(GNUInstallDirs)
+ include_directories("${NGRAPH_PATH}/include")
+ link_directories("${NGRAPH_PATH}/${CMAKE_INSTALL_LIBDIR}")
+ set(NGRAPH_LIB ${NGRAPH_PATH}/${CMAKE_INSTALL_LIBDIR}/libngraph${CMAKE_SHARED_LIBRARY_SUFFIX})
+ endif()
+endif()
+
+if(WITH_MKL)
+ include_directories("${PADDLE_DIR}/third_party/install/mklml/include")
+ if (WIN32)
+ set(MATH_LIB ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.lib
+ ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.lib)
+ else ()
+ set(MATH_LIB ${PADDLE_DIR}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX}
+ ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX})
+ execute_process(COMMAND cp -r ${PADDLE_DIR}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} /usr/lib)
+ endif ()
+ set(MKLDNN_PATH "${PADDLE_DIR}/third_party/install/mkldnn")
+ if(EXISTS ${MKLDNN_PATH})
+ include_directories("${MKLDNN_PATH}/include")
+ if (WIN32)
+ set(MKLDNN_LIB ${MKLDNN_PATH}/lib/mkldnn.lib)
+ else ()
+ set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0)
+ endif ()
+ endif()
+else()
+ set(MATH_LIB ${PADDLE_DIR}/third_party/install/openblas/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX})
+endif()
+
+
+if (WIN32)
+ if(EXISTS "${PADDLE_DIR}/paddle/fluid/inference/${PADDLE_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}")
+ set(DEPS
+ ${PADDLE_DIR}/paddle/fluid/inference/${PADDLE_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX})
+ else()
+ set(DEPS
+ ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX})
+ endif()
+endif()
+
+
+if (WIN32)
+ set(DEPS ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX})
+else()
+ set(DEPS ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
+endif()
+
+message("PADDLE_LIB_NAME:" ${PADDLE_LIB_NAME})
+message("DEPS:" $DEPS)
+
+if (NOT WIN32)
+ set(DEPS ${DEPS}
+ ${MATH_LIB} ${MKLDNN_LIB}
+ glog gflags protobuf z xxhash yaml-cpp
+ )
+ if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
+ set(DEPS ${DEPS} snappystream)
+ endif()
+ if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib")
+ set(DEPS ${DEPS} snappy)
+ endif()
+else()
+ set(DEPS ${DEPS}
+ ${MATH_LIB} ${MKLDNN_LIB}
+ glog gflags_static libprotobuf xxhash libyaml-cppmt)
+ set(DEPS ${DEPS} libcmt shlwapi)
+ if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib")
+ set(DEPS ${DEPS} snappy)
+ endif()
+ if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
+ set(DEPS ${DEPS} snappystream)
+ endif()
+endif(NOT WIN32)
+
+if(WITH_GPU)
+ if(NOT WIN32)
+ if (WITH_TENSORRT)
+ set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer${CMAKE_SHARED_LIBRARY_SUFFIX})
+ set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX})
+ endif()
+ set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX})
+ set(DEPS ${DEPS} ${CUDNN_LIB}/libcudnn${CMAKE_SHARED_LIBRARY_SUFFIX})
+ else()
+ set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} )
+ set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} )
+ set(DEPS ${DEPS} ${CUDNN_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX})
+ endif()
+endif()
+
+if (NOT WIN32)
+ set(EXTERNAL_LIB "-ldl -lrt -lgomp -lz -lm -lpthread")
+ set(DEPS ${DEPS} ${EXTERNAL_LIB})
+endif()
+
+set(DEPS ${DEPS} ${OpenCV_LIBS})
+add_executable(main src/main.cc src/preprocess_op.cc src/object_detector.cc)
+ADD_DEPENDENCIES(main ext-yaml-cpp)
+message("DEPS:" $DEPS)
+target_link_libraries(main ${DEPS})
+
+if (WIN32 AND WITH_MKL)
+ add_custom_command(TARGET main POST_BUILD
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./mklml.dll
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./libiomp5md.dll
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./mkldnn.dll
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./release/mklml.dll
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./release/libiomp5md.dll
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./release/mkldnn.dll
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}.dll ./release/${PADDLE_LIB_NAME}.dll
+ )
+endif()
+
+if (WIN32)
+ add_custom_command(TARGET main POST_BUILD
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}.dll ./release/${PADDLE_LIB_NAME}.dll
+ )
+endif()
diff --git a/deploy/cpp/README.md b/deploy/cpp/README.md
new file mode 100644
index 0000000..c9962bd
--- /dev/null
+++ b/deploy/cpp/README.md
@@ -0,0 +1,72 @@
+# C++端预测部署
+
+## 本教程结构
+
+[1.说明](#1说明)
+
+[2.主要目录和文件](#2主要目录和文件)
+
+[3.编译部署](#3编译)
+
+
+
+## 1.说明
+
+本目录为用户提供一个跨平台的`C++`部署方案,让用户通过`PaddleDetection`训练的模型导出后,即可基于本项目快速运行,也可以快速集成代码结合到自己的项目实际应用中去。
+
+主要设计的目标包括以下四点:
+- 跨平台,支持在 `Windows` 和 `Linux` 完成编译、二次开发集成和部署运行
+- 可扩展性,支持用户针对新模型开发自己特殊的数据预处理等逻辑
+- 高性能,除了`PaddlePaddle`自身带来的性能优势,我们还针对图像检测的特点对关键步骤进行了性能优化
+- 支持各种不同检测模型结构,包括`Yolov3`/`Faster_RCNN`/`SSD`等
+
+## 2.主要目录和文件
+
+```bash
+deploy/cpp
+|
+├── src
+│ ├── main.cc # 集成代码示例, 程序入口
+│ ├── object_detector.cc # 模型加载和预测主要逻辑封装类实现
+│ └── preprocess_op.cc # 预处理相关主要逻辑封装实现
+|
+├── include
+│ ├── config_parser.h # 导出模型配置yaml文件解析
+│ ├── object_detector.h # 模型加载和预测主要逻辑封装类
+│ └── preprocess_op.h # 预处理相关主要逻辑类封装
+|
+├── docs
+│ ├── linux_build.md # Linux 编译指南
+│ └── windows_vs2019_build.md # Windows VS2019编译指南
+│
+├── build.sh # 编译命令脚本
+│
+├── CMakeList.txt # cmake编译入口文件
+|
+├── CMakeSettings.json # Visual Studio 2019 CMake项目编译设置
+│
+└── cmake # 依赖的外部项目cmake(目前仅有yaml-cpp)
+
+```
+
+## 3.编译部署
+
+### 3.1 导出模型
+请确认您已经基于`PaddleDetection`的[export_model.py](https://github.com/PaddlePaddle/PaddleDetection/blob/dygraph/tools/export_model.py)导出您的模型,并妥善保存到合适的位置。导出模型细节请参考 [导出模型教程](https://github.com/PaddlePaddle/PaddleDetection/tree/dygraph/deploy/EXPORT_MODEL.md)。
+
+模型导出后, 目录结构如下(以`yolov3_darknet`为例):
+```
+yolov3_darknet # 模型目录
+├── infer_cfg.yml # 模型配置信息
+├── model.pdmodel # 模型文件
+├── model.pdiparams.info #模型公用信息
+└── model.pdiparams # 参数文件
+```
+
+预测时,该目录所在的路径会作为程序的输入参数。
+
+### 3.2 编译
+
+仅支持在`Windows`和`Linux`平台编译和使用
+- [Linux 编译指南](docs/linux_build.md)
+- [Windows编译指南(使用Visual Studio 2019)](docs/windows_vs2019_build.md)
diff --git a/deploy/cpp/cmake/yaml-cpp.cmake b/deploy/cpp/cmake/yaml-cpp.cmake
new file mode 100644
index 0000000..7bc7f34
--- /dev/null
+++ b/deploy/cpp/cmake/yaml-cpp.cmake
@@ -0,0 +1,30 @@
+
+find_package(Git REQUIRED)
+
+include(ExternalProject)
+
+message("${CMAKE_BUILD_TYPE}")
+
+ExternalProject_Add(
+ ext-yaml-cpp
+ URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip
+ URL_MD5 9542d6de397d1fbd649ed468cb5850e6
+ CMAKE_ARGS
+ -DYAML_CPP_BUILD_TESTS=OFF
+ -DYAML_CPP_BUILD_TOOLS=OFF
+ -DYAML_CPP_INSTALL=OFF
+ -DYAML_CPP_BUILD_CONTRIB=OFF
+ -DMSVC_SHARED_RT=OFF
+ -DBUILD_SHARED_LIBS=OFF
+ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+ -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
+ -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
+ -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
+ -DCMAKE_LIBRARY_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib
+ -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib
+ PREFIX "${CMAKE_BINARY_DIR}/ext/yaml-cpp"
+ # Disable install step
+ INSTALL_COMMAND ""
+ LOG_DOWNLOAD ON
+ LOG_BUILD 1
+)
diff --git a/deploy/cpp/docs/Jetson_build.md b/deploy/cpp/docs/Jetson_build.md
new file mode 100644
index 0000000..d7ece30
--- /dev/null
+++ b/deploy/cpp/docs/Jetson_build.md
@@ -0,0 +1,188 @@
+# Jetson平台编译指南
+
+## 说明
+`NVIDIA Jetson`设备是具有`NVIDIA GPU`的嵌入式设备,可以将目标检测算法部署到该设备上。本文档是在`Jetson`硬件上部署`PaddleDetection`模型的教程。
+
+本文档以`Jetson TX2`硬件、`JetPack 4.3`版本为例进行说明。
+
+`Jetson`平台的开发指南请参考[NVIDIA Jetson Linux Developer Guide](https://docs.nvidia.com/jetson/l4t/index.html).
+
+## Jetson环境搭建
+`Jetson`系统软件安装,请参考[NVIDIA Jetson Linux Developer Guide](https://docs.nvidia.com/jetson/l4t/index.html).
+
+* (1) 查看硬件系统的l4t的版本号
+```
+cat /etc/nv_tegra_release
+```
+* (2) 根据硬件,选择硬件可安装的`JetPack`版本,硬件和`JetPack`版本对应关系请参考[jetpack-archive](https://developer.nvidia.com/embedded/jetpack-archive).
+
+* (3) 下载`JetPack`,请参考[NVIDIA Jetson Linux Developer Guide](https://docs.nvidia.com/jetson/l4t/index.html) 中的`Preparing a Jetson Developer Kit for Use`章节内容进行刷写系统镜像。
+
+**注意**: 请在[jetpack-archive](https://developer.nvidia.com/embedded/jetpack-archive) 根据硬件选择适配的`JetPack`版本进行刷机。
+
+## 下载或编译`Paddle`预测库
+本文档使用`Paddle`在`JetPack4.3`上预先编译好的预测库,请根据硬件在[安装与编译 Linux 预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/05_inference_deployment/inference/build_and_install_lib_cn.html) 中选择对应版本的`Paddle`预测库。
+
+这里选择[nv_jetson_cuda10_cudnn7.6_trt6(jetpack4.3)](https://paddle-inference-lib.bj.bcebos.com/2.0.0-nv-jetson-jetpack4.3-all/paddle_inference.tgz), `Paddle`版本`2.0.0-rc0`,`CUDA`版本`10.0`,`CUDNN`版本`7.6`,`TensorRT`版本`6`。
+
+若需要自己在`Jetson`平台上自定义编译`Paddle`库,请参考文档[安装与编译 Linux 预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html) 的`NVIDIA Jetson嵌入式硬件预测库源码编译`部分内容。
+
+### Step1: 下载代码
+
+ `git clone https://github.com/PaddlePaddle/PaddleDetection.git`
+
+**说明**:其中`C++`预测代码在`/root/projects/PaddleDetection/deploy/cpp` 目录,该目录不依赖任何`PaddleDetection`下其他目录。
+
+
+### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference
+
+解压下载的[nv_jetson_cuda10_cudnn7.6_trt6(jetpack4.3)](https://paddle-inference-lib.bj.bcebos.com/2.0.1-nv-jetson-jetpack4.3-all/paddle_inference.tgz) 。
+
+下载并解压后`/root/projects/fluid_inference`目录包含内容为:
+```
+fluid_inference
+├── paddle # paddle核心库和头文件
+|
+├── third_party # 第三方依赖库和头文件
+|
+└── version.txt # 版本和编译信息
+```
+
+**注意:** 预编译库`nv-jetson-cuda10-cudnn7.6-trt6`使用的`GCC`版本是`7.5.0`,其他都是使用`GCC 4.8.5`编译的。使用高版本的GCC可能存在`ABI`兼容性问题,建议降级或[自行编译预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)。
+
+
+### Step4: 编译
+
+编译`cmake`的命令在`scripts/build.sh`中,请根据实际情况修改主要参数,其主要内容说明如下:
+
+注意,`TX2`平台的`CUDA`、`CUDNN`需要通过`JetPack`安装。
+
+```
+# 是否使用GPU(即是否使用 CUDA)
+WITH_GPU=ON
+
+# 是否使用MKL or openblas,TX2需要设置为OFF
+WITH_MKL=OFF
+
+# 是否集成 TensorRT(仅WITH_GPU=ON 有效)
+WITH_TENSORRT=ON
+
+# TensorRT 的include路径
+TENSORRT_INC_DIR=/usr/include/aarch64-linux-gnu
+
+# TensorRT 的lib路径
+TENSORRT_LIB_DIR=/usr/lib/aarch64-linux-gnu
+
+# Paddle 预测库路径
+PADDLE_DIR=/path/to/fluid_inference/
+
+# Paddle 预测库名称
+PADDLE_LIB_NAME=paddle_inference
+
+# Paddle 的预测库是否使用静态库来编译
+# 使用TensorRT时,Paddle的预测库通常为动态库
+WITH_STATIC_LIB=OFF
+
+# CUDA 的 lib 路径
+CUDA_LIB=/usr/local/cuda-10.0/lib64
+
+# CUDNN 的 lib 路径
+CUDNN_LIB=/usr/lib/aarch64-linux-gnu
+
+# OPENCV_DIR 的路径
+# linux平台请下载:https://bj.bcebos.com/paddleseg/deploy/opencv3.4.6gcc4.8ffmpeg.tar.gz2,并解压到deps文件夹下
+# TX2平台请下载:https://paddlemodels.bj.bcebos.com/TX2_JetPack4.3_opencv_3.4.10_gcc7.5.0.zip,并解压到deps文件夹下
+OPENCV_DIR=/path/to/opencv
+
+# 请检查以上各个路径是否正确
+
+# 以下无需改动
+cmake .. \
+ -DWITH_GPU=${WITH_GPU} \
+ -DWITH_MKL=OFF \
+ -DWITH_TENSORRT=${WITH_TENSORRT} \
+ -DTENSORRT_DIR=${TENSORRT_DIR} \
+ -DPADDLE_DIR=${PADDLE_DIR} \
+ -DWITH_STATIC_LIB=${WITH_STATIC_LIB} \
+ -DCUDA_LIB=${CUDA_LIB} \
+ -DCUDNN_LIB=${CUDNN_LIB} \
+ -DOPENCV_DIR=${OPENCV_DIR} \
+ -DPADDLE_LIB_NAME={PADDLE_LIB_NAME}
+make
+```
+
+例如设置如下:
+```
+# 是否使用GPU(即是否使用 CUDA)
+WITH_GPU=ON
+
+# 是否使用MKL or openblas
+WITH_MKL=OFF
+
+# 是否集成 TensorRT(仅WITH_GPU=ON 有效)
+WITH_TENSORRT=OFF
+
+# TensorRT 的include路径
+TENSORRT_INC_DIR=/usr/include/aarch64-linux-gnu
+
+# TensorRT 的lib路径
+TENSORRT_LIB_DIR=/usr/lib/aarch64-linux-gnu
+
+# Paddle 预测库路径
+PADDLE_DIR=/home/nvidia/PaddleDetection_infer/fluid_inference/
+
+# Paddle 预测库名称
+PADDLE_LIB_NAME=paddle_inference
+
+# Paddle 的预测库是否使用静态库来编译
+# 使用TensorRT时,Paddle的预测库通常为动态库
+WITH_STATIC_LIB=OFF
+
+# CUDA 的 lib 路径
+CUDA_LIB=/usr/local/cuda-10.0/lib64
+
+# CUDNN 的 lib 路径
+CUDNN_LIB=/usr/lib/aarch64-linux-gnu/
+```
+
+修改脚本设置好主要参数后,执行`build`脚本:
+ ```shell
+ sh ./scripts/build.sh
+ ```
+
+### Step5: 预测及可视化
+编译成功后,预测入口程序为`build/main`其主要命令参数说明如下:
+| 参数 | 说明 |
+| ---- | ---- |
+| --model_dir | 导出的预测模型所在路径 |
+| --image_path | 要预测的图片文件路径 |
+| --video_path | 要预测的视频文件路径 |
+| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)|
+| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
+| --gpu_id | 指定进行推理的GPU device id(默认值为0)|
+| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
+| --run_benchmark | 是否重复预测来进行benchmark测速 |
+| --output_dir | 输出图片所在的文件夹, 默认为output |
+
+**注意**: 如果同时设置了`video_path`和`image_path`,程序仅预测`video_path`。
+
+
+`样例一`:
+```shell
+#不使用`GPU`测试图片 `/root/projects/images/test.jpeg`
+./main --model_dir=/root/projects/models/yolov3_darknet --image_path=/root/projects/images/test.jpeg
+```
+
+图片文件`可视化预测结果`会保存在当前目录下`output.jpg`文件中。
+
+
+`样例二`:
+```shell
+#使用 `GPU`预测视频`/root/projects/videos/test.mp4`
+./main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --use_gpu=1
+```
+视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。
+
+
+## 性能测试
+benchmark请查看[BENCHMARK_INFER](../../BENCHMARK_INFER.md)
diff --git a/deploy/cpp/docs/linux_build.md b/deploy/cpp/docs/linux_build.md
new file mode 100644
index 0000000..76b9619
--- /dev/null
+++ b/deploy/cpp/docs/linux_build.md
@@ -0,0 +1,129 @@
+# Linux平台编译指南
+
+## 说明
+本文档在 `Linux`平台使用`GCC 8.2`测试过,如果需要使用其他G++版本编译使用,则需要重新编译Paddle预测库,请参考: [从源码编译Paddle预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)。本文档使用的预置的opencv库是在ubuntu 16.04上用gcc4.8编译的,如果需要在ubuntu 16.04以外的系统环境编译,那么需自行编译opencv库。
+
+## 前置条件
+* G++ 8.2
+* CUDA 9.0 / CUDA 10.0, cudnn 7+ (仅在使用GPU版本的预测库时需要)
+* CMake 3.0+
+
+请确保系统已经安装好上述基本软件,**下面所有示例以工作目录为 `/root/projects/`演示**。
+
+### Step1: 下载代码
+
+ `git clone https://github.com/PaddlePaddle/PaddleDetection.git`
+
+**说明**:其中`C++`预测代码在`/root/projects/PaddleDetection/deploy/cpp` 目录,该目录不依赖任何`PaddleDetection`下其他目录。
+
+
+### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference
+
+PaddlePaddle C++ 预测库针对不同的`CPU`和`CUDA`版本提供了不同的预编译版本,请根据实际情况下载: [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/05_inference_deployment/inference/build_and_install_lib_cn.html)
+
+
+下载并解压后`/root/projects/fluid_inference`目录包含内容为:
+```
+fluid_inference
+├── paddle # paddle核心库和头文件
+|
+├── third_party # 第三方依赖库和头文件
+|
+└── version.txt # 版本和编译信息
+```
+
+**注意:** 预编译版本除`nv-jetson-cuda10-cudnn7.5-trt5` 以外其它包都是基于`GCC 4.8.5`编译,使用高版本`GCC`可能存在 `ABI`兼容性问题,建议降级或[自行编译预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)。
+
+
+### Step4: 编译
+
+编译`cmake`的命令在`scripts/build.sh`中,请根据实际情况修改主要参数,其主要内容说明如下:
+
+```
+# 是否使用GPU(即是否使用 CUDA)
+WITH_GPU=OFF
+
+# 使用MKL or openblas
+WITH_MKL=ON
+
+# 是否集成 TensorRT(仅WITH_GPU=ON 有效)
+WITH_TENSORRT=OFF
+
+# TensorRT 的include路径
+TENSORRT_LIB_DIR=/path/to/TensorRT/include
+
+# TensorRT 的lib路径
+TENSORRT_LIB_DIR=/path/to/TensorRT/lib
+
+# Paddle 预测库路径
+PADDLE_DIR=/path/to/fluid_inference
+
+# Paddle 预测库名称
+PADDLE_LIB_NAME=paddle_inference
+
+# CUDA 的 lib 路径
+CUDA_LIB=/path/to/cuda/lib
+
+# CUDNN 的 lib 路径
+CUDNN_LIB=/path/to/cudnn/lib
+
+# 请检查以上各个路径是否正确
+
+# 以下无需改动
+cmake .. \
+ -DWITH_GPU=${WITH_GPU} \
+ -DWITH_MKL=${WITH_MKL} \
+ -DWITH_TENSORRT=${WITH_TENSORRT} \
+ -DTENSORRT_LIB_DIR=${TENSORRT_LIB_DIR} \
+ -DTENSORRT_INC_DIR=${TENSORRT_INC_DIR} \
+ -DPADDLE_DIR=${PADDLE_DIR} \
+ -DCUDA_LIB=${CUDA_LIB} \
+ -DCUDNN_LIB=${CUDNN_LIB} \
+ -DOPENCV_DIR=${OPENCV_DIR} \
+ -DPADDLE_LIB_NAME={PADDLE_LIB_NAME}
+make
+
+```
+
+修改脚本设置好主要参数后,执行`build`脚本:
+ ```shell
+ sh ./scripts/build.sh
+ ```
+
+**注意**: OPENCV依赖OPENBLAS,Ubuntu用户需确认系统是否已存在`libopenblas.so`。如未安装,可执行apt-get install libopenblas-dev进行安装。
+
+### Step5: 预测及可视化
+编译成功后,预测入口程序为`build/main`其主要命令参数说明如下:
+| 参数 | 说明 |
+| ---- | ---- |
+| --model_dir | 导出的预测模型所在路径 |
+| --image_path | 要预测的图片文件路径 |
+| --video_path | 要预测的视频文件路径 |
+| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)|
+| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
+| --gpu_id | 指定进行推理的GPU device id(默认值为0)|
+| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
+| --run_benchmark | 是否重复预测来进行benchmark测速 |
+| --output_dir | 输出图片所在的文件夹, 默认为output |
+
+**注意**: 如果同时设置了`video_path`和`image_path`,程序仅预测`video_path`。
+
+
+`样例一`:
+```shell
+#不使用`GPU`测试图片 `/root/projects/images/test.jpeg`
+./build/main --model_dir=/root/projects/models/yolov3_darknet --image_path=/root/projects/images/test.jpeg
+```
+
+图片文件`可视化预测结果`会保存在当前目录下`output.jpg`文件中。
+
+
+`样例二`:
+```shell
+#使用 `GPU`预测视频`/root/projects/videos/test.mp4`
+./build/main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --use_gpu=1
+```
+视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。
+
+## 性能测试
+benchmark请查看[BENCHMARK_INFER](../../BENCHMARK_INFER.md)
diff --git a/deploy/cpp/docs/windows_vs2019_build.md b/deploy/cpp/docs/windows_vs2019_build.md
new file mode 100644
index 0000000..34607b2
--- /dev/null
+++ b/deploy/cpp/docs/windows_vs2019_build.md
@@ -0,0 +1,128 @@
+# Visual Studio 2019 Community CMake 编译指南
+
+Windows 平台下,我们使用`Visual Studio 2019 Community` 进行了测试。微软从`Visual Studio 2017`开始即支持直接管理`CMake`跨平台编译项目,但是直到`2019`才提供了稳定和完全的支持,所以如果你想使用CMake管理项目编译构建,我们推荐你使用`Visual Studio 2019`环境下构建。
+
+
+## 前置条件
+* Visual Studio 2019 (根据Paddle预测库所使用的VS版本选择,请参考 [Visual Studio 不同版本二进制兼容性](https://docs.microsoft.com/zh-cn/cpp/porting/binary-compat-2015-2017?view=vs-2019) )
+* CUDA 9.0 / CUDA 10.0,cudnn 7+ (仅在使用GPU版本的预测库时需要)
+* CMake 3.0+ [CMake下载](https://cmake.org/download/)
+
+请确保系统已经安装好上述基本软件,我们使用的是`VS2019`的社区版。
+
+**下面所有示例以工作目录为 `D:\projects`演示**。
+
+### Step1: 下载代码
+
+下载源代码
+```shell
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+```
+
+**说明**:其中`C++`预测代码在`PaddleDetection/deploy/cpp` 目录,该目录不依赖任何`PaddleDetection`下其他目录。
+
+
+### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference
+
+PaddlePaddle C++ 预测库针对不同的`CPU`和`CUDA`版本提供了不同的预编译版本,请根据实际情况下载: [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/05_inference_deployment/inference/windows_cpp_inference.html)
+
+解压后`D:\projects\fluid_inference`目录包含内容为:
+```
+fluid_inference
+├── paddle # paddle核心库和头文件
+|
+├── third_party # 第三方依赖库和头文件
+|
+└── version.txt # 版本和编译信息
+```
+
+### Step3: 安装配置OpenCV
+
+1. 在OpenCV官网下载适用于Windows平台的3.4.6版本, [下载地址](https://sourceforge.net/projects/opencvlibrary/files/3.4.6/opencv-3.4.6-vc14_vc15.exe/download)
+2. 运行下载的可执行文件,将OpenCV解压至指定目录,如`D:\projects\opencv`
+3. 配置环境变量,如下流程所示(如果使用全局绝对路径,可以不用设置环境变量)
+ - 我的电脑->属性->高级系统设置->环境变量
+ - 在系统变量中找到Path(如没有,自行创建),并双击编辑
+ - 新建,将opencv路径填入并保存,如`D:\projects\opencv\build\x64\vc14\bin`
+
+### Step4: 编译
+
+1. 进入到`cpp`文件夹
+```
+cd D:\projects\PaddleDetection\deploy\cpp
+```
+
+2. 使用CMake生成项目文件
+
+编译参数的含义说明如下(带*表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量对齐,**使用9.0、10.0版本,不使用9.2、10.1等版本CUDA库**):
+
+| 参数名 | 含义 |
+| ---- | ---- |
+| *CUDA_LIB | CUDA的库路径 |
+| *CUDNN_LIB | CUDNN的库路径 |
+| OPENCV_DIR | OpenCV的安装路径, |
+| PADDLE_DIR | Paddle预测库的路径 |
+| PADDLE_LIB_NAME | Paddle 预测库名称 |
+
+**注意:** 1. 使用`CPU`版预测库,请把`WITH_GPU`的勾去掉 2. 如果使用的是`openblas`版本,请把`WITH_MKL`勾去掉
+
+执行如下命令项目文件:
+```
+cmake . -G "Visual Studio 16 2019" -A x64 -T host=x64 -DWITH_GPU=ON -DWITH_MKL=ON -DCMAKE_BUILD_TYPE=Release -DCUDA_LIB=path_to_cuda_lib -DCUDNN_LIB=path_to_cudnn_lib -DPADDLE_DIR=path_to_paddle_lib -DPADDLE_LIB_NAME=paddle_inference -DOPENCV_DIR=path_to_opencv
+```
+
+例如:
+```
+cmake . -G "Visual Studio 16 2019" -A x64 -T host=x64 -DWITH_GPU=ON -DWITH_MKL=ON -DCMAKE_BUILD_TYPE=Release -DCUDA_LIB=D:\projects\packages\cuda10_0\lib\x64 -DCUDNN_LIB=D:\projects\packages\cuda10_0\lib\x64 -DPADDLE_DIR=D:\projects\packages\fluid_inference -DPADDLE_LIB_NAME=paddle_inference -DOPENCV_DIR=D:\projects\packages\opencv3_4_6
+```
+
+3. 编译
+用`Visual Studio 16 2019`打开`cpp`文件夹下的`PaddleObjectDetector.sln`,将编译模式设置为`Release`,点击`生成`->`全部生成
+
+
+### Step5: 预测及可视化
+
+上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下,打开`cmd`,并切换到该目录:
+
+```
+cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release
+```
+可执行文件`main`即为样例的预测程序,其主要的命令行参数如下:
+
+| 参数 | 说明 |
+| ---- | ---- |
+| --model_dir | 导出的预测模型所在路径 |
+| --image_path | 要预测的图片文件路径 |
+| --video_path | 要预测的视频文件路径 |
+| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)|
+| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
+| --gpu_id | 指定进行推理的GPU device id(默认值为0)|
+| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
+| --run_benchmark | 是否重复预测来进行benchmark测速 |
+| --output_dir | 输出图片所在的文件夹, 默认为output |
+
+**注意**:
+(1)如果同时设置了`video_path`和`image_path`,程序仅预测`video_path`。
+(2)如果提示找不到`opencv_world346.dll`,把`D:\projects\packages\opencv3_4_6\build\x64\vc14\bin`文件夹下的`opencv_world346.dll`拷贝到`main.exe`文件夹下即可。
+
+
+`样例一`:
+```shell
+#不使用`GPU`测试图片 `D:\\images\\test.jpeg`
+.\main --model_dir=D:\\models\\yolov3_darknet --image_path=D:\\images\\test.jpeg
+```
+
+图片文件`可视化预测结果`会保存在当前目录下`output.jpg`文件中。
+
+
+`样例二`:
+```shell
+#使用`GPU`测试视频 `D:\\videos\\test.mp4`
+.\main --model_dir=D:\\models\\yolov3_darknet --video_path=D:\\videos\\test.mp4 --use_gpu=1
+```
+
+视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。
+
+
+## 性能测试
+benchmark请查看[BENCHMARK_INFER](../../BENCHMARK_INFER.md)
diff --git a/deploy/cpp/include/config_parser.h b/deploy/cpp/include/config_parser.h
new file mode 100644
index 0000000..c38049d
--- /dev/null
+++ b/deploy/cpp/include/config_parser.h
@@ -0,0 +1,113 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include
+#include
+#include
+#include