initialize

This commit is contained in:
leaf 2021-06-23 08:58:10 +08:00
commit fd82b5c7eb
603 changed files with 42187 additions and 0 deletions

21
.travis/precommit.sh Normal file
View File

@ -0,0 +1,21 @@
#!/bin/bash
function abort(){
echo "Your commit not fit PaddlePaddle code style" 1>&2
echo "Please use pre-commit scripts to auto-format your code" 1>&2
exit 1
}
trap 'abort' 0
set -e
cd `dirname $0`
cd ..
export PATH=/usr/bin:$PATH
pre-commit install
if ! pre-commit run -a ; then
ls -lh
git diff --exit-code
exit 1
fi
trap : 0

8
.travis/requirements.txt Normal file
View File

@ -0,0 +1,8 @@
# add python requirements for unittests here, note install pycocotools
# directly is not supported in travis ci, it is installed by compiling
# from source files in unittest.sh
tqdm
cython
shapely
llvmlite==0.33
numba==0.50

47
.travis/unittest.sh Normal file
View File

@ -0,0 +1,47 @@
#!/bin/bash
abort(){
echo "Run unittest failed" 1>&2
echo "Please check your code" 1>&2
echo " 1. you can run unit tests by 'bash .travis/unittest.sh' locally" 1>&2
echo " 2. you can add python requirements in .travis/requirements.txt if you use new requirements in unit tests" 1>&2
exit 1
}
unittest(){
if [ $? != 0 ]; then
exit 1
fi
find "./ppdet" -name 'tests' -type d -print0 | \
xargs -0 -I{} -n1 bash -c \
'python -m unittest discover -v -s {}'
}
trap 'abort' 0
set -e
# install travis python dependencies exclude pycocotools
if [ -f ".travis/requirements.txt" ]; then
pip install -r .travis/requirements.txt
fi
# install pycocotools
if [ `pip list | grep pycocotools | wc -l` -eq 0 ]; then
# install git if needed
if [ -n `which git` ]; then
apt-get update
apt-get install -y git
fi;
git clone https://github.com/cocodataset/cocoapi.git
cd cocoapi/PythonAPI
make install
python setup.py install --user
cd ../..
rm -rf cocoapi
fi
export PYTHONPATH=`pwd`:$PYTHONPATH
unittest .
trap : 0

View File

@ -0,0 +1,28 @@
# Cascade R-CNN: High Quality Object Detection and Instance Segmentation
## Model Zoo
| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | Mask AP | 下载 | 配置文件 |
| :------------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----: | :-----------------------------------------------------: | :-----: |
| ResNet50-FPN | Cascade Faster | 1 | 1x | ---- | 41.1 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.yml) |
| ResNet50-FPN | Cascade Mask | 1 | 1x | ---- | 41.8 | 36.3 | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.yml) |
| ResNet50-vd-SSLDv2-FPN | Cascade Faster | 1 | 1x | ---- | 44.4 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_r50_vd_fpn_ssld_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_1x_coco.yml) |
| ResNet50-vd-SSLDv2-FPN | Cascade Faster | 1 | 2x | ---- | 45.0 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.yml) |
| ResNet50-vd-SSLDv2-FPN | Cascade Mask | 1 | 1x | ---- | 44.9 | 39.1 | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml) |
| ResNet50-vd-SSLDv2-FPN | Cascade Mask | 1 | 2x | ---- | 45.7 | 39.7 | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml) |
## Citations
```
@article{Cai_2019,
title={Cascade R-CNN: High Quality Object Detection and Instance Segmentation},
ISSN={1939-3539},
url={http://dx.doi.org/10.1109/tpami.2019.2956516},
DOI={10.1109/tpami.2019.2956516},
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
publisher={Institute of Electrical and Electronics Engineers (IEEE)},
author={Cai, Zhaowei and Vasconcelos, Nuno},
year={2019},
pages={11}
}
```

View File

@ -0,0 +1,40 @@
worker_num: 2
TrainReader:
sample_transforms:
- Decode: {}
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlip: {prob: 0.5}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32, pad_gt: true}
batch_size: 1
shuffle: true
drop_last: true
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32, pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
drop_empty: false
TestReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32, pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false

View File

@ -0,0 +1,40 @@
worker_num: 2
TrainReader:
sample_transforms:
- Decode: {}
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlip: {prob: 0.5}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32, pad_gt: true}
batch_size: 1
shuffle: true
drop_last: true
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32, pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
drop_empty: false
TestReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32, pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false

View File

@ -0,0 +1,97 @@
architecture: CascadeRCNN
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
CascadeRCNN:
backbone: ResNet
neck: FPN
rpn_head: RPNHead
bbox_head: CascadeHead
mask_head: MaskHead
# post process
bbox_post_process: BBoxPostProcess
mask_post_process: MaskPostProcess
ResNet:
# index 0 stands for res2
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
FPN:
out_channel: 256
RPNHead:
anchor_generator:
aspect_ratios: [0.5, 1.0, 2.0]
anchor_sizes: [[32], [64], [128], [256], [512]]
strides: [4, 8, 16, 32, 64]
rpn_target_assign:
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
use_random: True
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 2000
post_nms_top_n: 2000
topk_after_collect: True
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
CascadeHead:
head: CascadeTwoFCHead
roi_extractor:
resolution: 7
sampling_ratio: 0
aligned: True
bbox_assigner: BBoxAssigner
BBoxAssigner:
batch_size_per_im: 512
bg_thresh: 0.5
fg_thresh: 0.5
fg_fraction: 0.25
cascade_iou: [0.5, 0.6, 0.7]
use_random: True
CascadeTwoFCHead:
out_channel: 1024
BBoxPostProcess:
decode:
name: RCNNBox
prior_box_var: [30.0, 30.0, 15.0, 15.0]
nms:
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5
MaskHead:
head: MaskFeat
roi_extractor:
resolution: 14
sampling_ratio: 0
aligned: True
mask_assigner: MaskAssigner
share_bbox_feat: False
MaskFeat:
num_convs: 4
out_channel: 256
MaskAssigner:
mask_resolution: 28
MaskPostProcess:
binary_thresh: 0.5

View File

@ -0,0 +1,75 @@
architecture: CascadeRCNN
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
CascadeRCNN:
backbone: ResNet
neck: FPN
rpn_head: RPNHead
bbox_head: CascadeHead
# post process
bbox_post_process: BBoxPostProcess
ResNet:
# index 0 stands for res2
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
FPN:
out_channel: 256
RPNHead:
anchor_generator:
aspect_ratios: [0.5, 1.0, 2.0]
anchor_sizes: [[32], [64], [128], [256], [512]]
strides: [4, 8, 16, 32, 64]
rpn_target_assign:
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
use_random: True
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 2000
post_nms_top_n: 2000
topk_after_collect: True
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
CascadeHead:
head: CascadeTwoFCHead
roi_extractor:
resolution: 7
sampling_ratio: 0
aligned: True
bbox_assigner: BBoxAssigner
BBoxAssigner:
batch_size_per_im: 512
bg_thresh: 0.5
fg_thresh: 0.5
fg_fraction: 0.25
cascade_iou: [0.5, 0.6, 0.7]
use_random: True
CascadeTwoFCHead:
out_channel: 1024
BBoxPostProcess:
decode:
name: RCNNBox
prior_box_var: [30.0, 30.0, 15.0, 15.0]
nms:
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5

View File

@ -0,0 +1,19 @@
epoch: 12
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [8, 11]
- !LinearWarmup
start_factor: 0.001
steps: 1000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2

View File

@ -0,0 +1,8 @@
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'_base_/optimizer_1x.yml',
'_base_/cascade_mask_rcnn_r50_fpn.yml',
'_base_/cascade_mask_fpn_reader.yml',
]
weights: output/cascade_mask_rcnn_r50_fpn_1x_coco/model_final

View File

@ -0,0 +1,18 @@
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'_base_/optimizer_1x.yml',
'_base_/cascade_mask_rcnn_r50_fpn.yml',
'_base_/cascade_mask_fpn_reader.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
weights: output/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco/model_final
ResNet:
depth: 50
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
lr_mult_list: [0.05, 0.05, 0.1, 0.15]

View File

@ -0,0 +1,29 @@
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'_base_/optimizer_1x.yml',
'_base_/cascade_mask_rcnn_r50_fpn.yml',
'_base_/cascade_mask_fpn_reader.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
weights: output/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco/model_final
ResNet:
depth: 50
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
lr_mult_list: [0.05, 0.05, 0.1, 0.15]
epoch: 24
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [12, 22]
- !LinearWarmup
start_factor: 0.1
steps: 1000

View File

@ -0,0 +1,8 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_1x.yml',
'_base_/cascade_rcnn_r50_fpn.yml',
'_base_/cascade_fpn_reader.yml',
]
weights: output/cascade_rcnn_r50_fpn_1x_coco/model_final

View File

@ -0,0 +1,18 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_1x.yml',
'_base_/cascade_rcnn_r50_fpn.yml',
'_base_/cascade_fpn_reader.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
weights: output/cascade_rcnn_r50_vd_fpn_ssld_1x_coco/model_final
ResNet:
depth: 50
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
lr_mult_list: [0.05, 0.05, 0.1, 0.15]

View File

@ -0,0 +1,29 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_1x.yml',
'_base_/cascade_rcnn_r50_fpn.yml',
'_base_/cascade_fpn_reader.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
weights: output/cascade_rcnn_r50_vd_fpn_ssld_2x_coco/model_final
ResNet:
depth: 50
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
lr_mult_list: [0.05, 0.05, 0.1, 0.15]
epoch: 24
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [12, 22]
- !LinearWarmup
start_factor: 0.1
steps: 1000

View File

@ -0,0 +1,19 @@
metric: COCO
num_classes: 80
TrainDataset:
!COCODataSet
image_dir: train2017
anno_path: annotations/instances_train2017.json
dataset_dir: dataset/coco
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
EvalDataset:
!COCODataSet
image_dir: val2017
anno_path: annotations/instances_val2017.json
dataset_dir: dataset/coco
TestDataset:
!ImageFolder
anno_path: annotations/instances_val2017.json

View File

@ -0,0 +1,19 @@
metric: COCO
num_classes: 80
TrainDataset:
!COCODataSet
image_dir: train2017
anno_path: annotations/instances_train2017.json
dataset_dir: dataset/coco
data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd']
EvalDataset:
!COCODataSet
image_dir: val2017
anno_path: annotations/instances_val2017.json
dataset_dir: dataset/coco
TestDataset:
!ImageFolder
anno_path: annotations/instances_val2017.json

20
configs/datasets/dota.yml Normal file
View File

@ -0,0 +1,20 @@
metric: COCO
num_classes: 15
TrainDataset:
!COCODataSet
image_dir: trainval_split/images
anno_path: trainval_split/s2anet_trainval_paddle_coco.json
dataset_dir: dataset/DOTA_1024_s2anet
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_rbox']
EvalDataset:
!COCODataSet
image_dir: trainval_split/images
anno_path: trainval_split/s2anet_trainval_paddle_coco.json
dataset_dir: dataset/DOTA_1024_s2anet/
TestDataset:
!ImageFolder
anno_path: trainval_split/s2anet_trainval_paddle_coco.json
dataset_dir: dataset/DOTA_1024_s2anet/

View File

@ -0,0 +1,21 @@
metric: VOC
map_type: integral
num_classes: 4
TrainDataset:
!VOCDataSet
dataset_dir: dataset/roadsign_voc
anno_path: train.txt
label_list: label_list.txt
data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
EvalDataset:
!VOCDataSet
dataset_dir: dataset/roadsign_voc
anno_path: valid.txt
label_list: label_list.txt
data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
TestDataset:
!ImageFolder
anno_path: dataset/roadsign_voc/label_list.txt

21
configs/datasets/voc.yml Normal file
View File

@ -0,0 +1,21 @@
metric: VOC
map_type: 11point
num_classes: 20
TrainDataset:
!VOCDataSet
dataset_dir: dataset/voc
anno_path: trainval.txt
label_list: label_list.txt
data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
EvalDataset:
!VOCDataSet
dataset_dir: dataset/voc
anno_path: test.txt
label_list: label_list.txt
data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
TestDataset:
!ImageFolder
anno_path: dataset/voc/label_list.txt

View File

@ -0,0 +1,20 @@
metric: WiderFace
num_classes: 1
TrainDataset:
!WIDERFaceDataSet
dataset_dir: dataset/wider_face
anno_path: wider_face_split/wider_face_train_bbx_gt.txt
image_dir: WIDER_train/images
data_fields: ['image', 'gt_bbox', 'gt_class']
EvalDataset:
!WIDERFaceDataSet
dataset_dir: dataset/wider_face
anno_path: wider_face_split/wider_face_val_bbx_gt.txt
image_dir: WIDER_val/images
data_fields: ['image']
TestDataset:
!ImageFolder
use_default_label: true

37
configs/dcn/README.md Normal file
View File

@ -0,0 +1,37 @@
### Deformable ConvNets v2
| 骨架网络 | 网络类型 | 卷积 | 每张GPU图片个数 | 学习率策略 |推理时间(fps)| Box AP | Mask AP | 下载 | 配置文件 |
| :------------------- | :------------- | :-----: |:--------: | :-----: | :-----------: |:----: | :-----: | :----------------------------------------------------------: | :----: |
| ResNet50-FPN | Faster | c3-c5 | 1 | 1x | - | 42.1 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/dcn/faster_rcnn_dcn_r50_fpn_1x_coco.yml) |
| ResNet50-vd-FPN | Faster | c3-c5 | 1 | 1x | - | 42.7 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r50_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_1x_coco.yml) |
| ResNet50-vd-FPN | Faster | c3-c5 | 1 | 2x | - | 43.7 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r50_vd_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_2x_coco.yml) |
| ResNet101-vd-FPN | Faster | c3-c5 | 1 | 1x | - | 45.1 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r101_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/dcn/faster_rcnn_dcn_r101_vd_fpn_1x_coco.yml) |
| ResNeXt101-vd-FPN | Faster | c3-c5 | 1 | 1x | - | 46.5 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.pdparams) |[配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/dcn/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml) |
| ResNet50-FPN | Mask | c3-c5 | 1 | 1x | - | 42.7 | 38.4 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/dcn/mask_rcnn_dcn_r50_fpn_1x_coco.yml) |
| ResNet50-vd-FPN | Mask | c3-c5 | 1 | 2x | - | 44.6 | 39.8 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_r50_vd_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/dcn/mask_rcnn_dcn_r50_vd_fpn_2x_coco.yml) |
| ResNet101-vd-FPN | Mask | c3-c5 | 1 | 1x | - | 45.6 | 40.6 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_r101_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/dcn/mask_rcnn_dcn_r101_vd_fpn_1x_coco.yml) |
| ResNeXt101-vd-FPN | Mask | c3-c5 | 1 | 1x | - | 47.3 | 42.0 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/dcn/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml) |
| ResNet50-FPN | Cascade Faster | c3-c5 | 1 | 1x | - | 42.1 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_dcn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/dcn/cascade_rcnn_dcn_r50_fpn_1x_coco.yml) |
| ResNeXt101-vd-FPN | Cascade Faster | c3-c5 | 1 | 1x | - | 48.8 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml) |
**注意事项:**
- Deformable卷积网络v2(dcn_v2)参考自论文[Deformable ConvNets v2](https://arxiv.org/abs/1811.11168).
- `c3-c5`意思是在resnet模块的3到5阶段增加`dcn`.
## Citations
```
@inproceedings{dai2017deformable,
title={Deformable Convolutional Networks},
author={Dai, Jifeng and Qi, Haozhi and Xiong, Yuwen and Li, Yi and Zhang, Guodong and Hu, Han and Wei, Yichen},
booktitle={Proceedings of the IEEE international conference on computer vision},
year={2017}
}
@article{zhu2018deformable,
title={Deformable ConvNets v2: More Deformable, Better Results},
author={Zhu, Xizhou and Hu, Han and Lin, Stephen and Dai, Jifeng},
journal={arXiv preprint arXiv:1811.11168},
year={2018}
}
```

View File

@ -0,0 +1,16 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'../cascade_rcnn/_base_/optimizer_1x.yml',
'../cascade_rcnn/_base_/cascade_rcnn_r50_fpn.yml',
'../cascade_rcnn/_base_/cascade_fpn_reader.yml',
]
weights: output/cascade_rcnn_dcn_r50_fpn_1x_coco/model_final
ResNet:
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
dcn_v2_stages: [1,2,3]

View File

@ -0,0 +1,16 @@
_BASE_: [
'cascade_rcnn_dcn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
weights: output/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final
ResNet:
depth: 101
groups: 64
base_width: 4
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
dcn_v2_stages: [1,2,3]

View File

@ -0,0 +1,15 @@
_BASE_: [
'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
weights: output/faster_rcnn_dcn_r101_vd_fpn_1x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 101
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
dcn_v2_stages: [1,2,3]

View File

@ -0,0 +1,16 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'../faster_rcnn/_base_/optimizer_1x.yml',
'../faster_rcnn/_base_/faster_rcnn_r50_fpn.yml',
'../faster_rcnn/_base_/faster_fpn_reader.yml',
]
weights: output/faster_rcnn_dcn_r50_fpn_1x_coco/model_final
ResNet:
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
dcn_v2_stages: [1,2,3]

View File

@ -0,0 +1,15 @@
_BASE_: [
'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
weights: output/faster_rcnn_dcn_r50_vd_fpn_2x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 50
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
dcn_v2_stages: [1,2,3]

View File

@ -0,0 +1,26 @@
_BASE_: [
'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
weights: output/faster_rcnn_dcn_r50_vd_fpn_2x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 50
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
dcn_v2_stages: [1,2,3]
epoch: 24
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [16, 22]
- !LinearWarmup
start_factor: 0.1
steps: 1000

View File

@ -0,0 +1,17 @@
_BASE_: [
'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
weights: output/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final
ResNet:
# for ResNeXt: groups, base_width, base_channels
depth: 101
groups: 64
base_width: 4
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
dcn_v2_stages: [1,2,3]

View File

@ -0,0 +1,15 @@
_BASE_: [
'mask_rcnn_dcn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
weights: output/mask_rcnn_dcn_r101_vd_fpn_1x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 101
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
dcn_v2_stages: [1,2,3]

View File

@ -0,0 +1,16 @@
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'../mask_rcnn/_base_/optimizer_1x.yml',
'../mask_rcnn/_base_/mask_rcnn_r50_fpn.yml',
'../mask_rcnn/_base_/mask_fpn_reader.yml',
]
weights: output/mask_rcnn_dcn_r50_fpn_1x_coco/model_final
ResNet:
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
dcn_v2_stages: [1,2,3]

View File

@ -0,0 +1,26 @@
_BASE_: [
'mask_rcnn_dcn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
weights: output/mask_rcnn_dcn_r50_vd_fpn_2x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 50
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
dcn_v2_stages: [1,2,3]
epoch: 24
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [16, 22]
- !LinearWarmup
start_factor: 0.1
steps: 1000

View File

@ -0,0 +1,17 @@
_BASE_: [
'mask_rcnn_dcn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
weights: output/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final
ResNet:
# for ResNeXt: groups, base_width, base_channels
depth: 101
variant: d
groups: 64
base_width: 4
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
dcn_v2_stages: [1,2,3]

125
configs/dota/README.md Normal file
View File

@ -0,0 +1,125 @@
# S2ANet模型
## 内容
- [简介](#简介)
- [DOTA数据集](#DOTA数据集)
- [模型库](#模型库)
- [训练说明](#训练说明)
## 简介
[S2ANet](https://arxiv.org/pdf/2008.09397.pdf)是用于检测旋转框的模型要求使用PaddlePaddle 2.0.1(可使用pip安装) 或适当的[develop版本](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/install/Tables.html#whl-release)。
## DOTA数据集
[DOTA Dataset]是航空影像中物体检测的数据集包含2806张图像每张图像4000*4000分辨率。
| 数据版本 | 类别数 | 图像数 | 图像尺寸 | 实例数 | 标注方式 |
|:--------:|:-------:|:---------:|:---------:| :---------:| :------------: |
| v1.0 | 15 | 2806 | 800~4000 | 118282 | OBB + HBB |
| v1.5 | 16 | 2806 | 800~4000 | 400000 | OBB + HBB |
OBB标注方式是指标注任意四边形顶点按顺时针顺序排列。HBB标注方式是指标注示例的外接矩形。
DOTA数据集中总共有2806张图像其中1411张图像作为训练集458张图像作为评估集剩余937张图像作为测试集。
如果需要切割图像数据,请参考[DOTA_devkit](https://github.com/CAPTAIN-WHU/DOTA_devkit) 。
设置`crop_size=1024, stride=824, gap=200`参数切割数据后训练集15749张图像评估集5297张图像测试集10833张图像。
## 模型库
### S2ANet模型
| 模型 | GPU个数 | Conv类型 | mAP | 模型下载 | 配置文件 |
|:-----------:|:-------:|:----------:|:--------:| :----------:| :---------: |
| S2ANet | 8 | Conv | 71.42 | [model](https://paddledet.bj.bcebos.com/models/s2anet_conv_1x_dota.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/dota/s2anet_conv_1x_dota.yml) |
**注意:**这里使用`multiclass_nms`与原作者使用nms略有不同精度相比原始论文中高0.15 (71.27-->71.42)。
## 训练说明
### 1. 旋转框IOU计算OP
旋转框IOU计算OP[ext_op](../../ppdet/ext_op)是参考Paddle[自定义外部算子](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/07_new_op/new_custom_op.html) 的方式开发。
若使用旋转框IOU计算OP需要环境满足
- PaddlePaddle >= 2.0.1
- GCC == 8.2
推荐使用docker镜像[paddle:2.0.1-gpu-cuda10.1-cudnn7](registry.baidubce.com/paddlepaddle/paddle:2.0.1-gpu-cuda10.1-cudnn7)。
执行如下命令下载镜像并启动容器:
```
sudo nvidia-docker run -it --name paddle_s2anet -v $PWD:/paddle --network=host registry.baidubce.com/paddlepaddle/paddle:2.0.1-gpu-cuda10.1-cudnn7 /bin/bash
```
镜像中paddle2.0.1已安装好进入python3.7执行如下代码检查paddle安装是否正常
```
import paddle
print(paddle.__version__)
paddle.utils.run_check()
```
进入到`ppdet/ext_op`文件夹,安装:
```
python3.7 setup.py install
```
Windows环境请按照如下步骤安装
1准备Visual Studio (版本需要>=Visual Studio 2015 update3)这里以VS2017为例
2点击开始-->Visual Studio 2017-->适用于 VS 2017 的x64本机工具命令提示
3设置环境变量`set DISTUTILS_USE_SDK=1`
4进入`PaddleDetection/ppdet/ext_op`目录,通过`python3.7 setup.py install`命令进行安装。
安装完成后测试自定义op是否可以正常编译以及计算结果
```
cd PaddleDetecetion/ppdet/ext_op
python3.7 test.py
```
### 2. 数据格式
DOTA 数据集中实例是按照任意四边形标注,在进行训练模型前,需要参考[DOTA2COCO](https://github.com/CAPTAIN-WHU/DOTA_devkit/blob/master/DOTA2COCO.py) 转换成`[xc, yc, bow_w, bow_h, angle]`格式并以coco数据格式存储。
## 评估
执行如下命令,会在`output_dir`文件夹下将每个图像预测结果保存到同文件夹名的txt文本中。
```
python3.7 tools/infer.py -c configs/dota/s2anet_1x_dota.yml -o weights=./weights/s2anet_1x_dota.pdparams --infer_dir=dota_test_images --draw_threshold=0.05 --save_txt=True --output_dir=output
```
请参考[DOTA_devkit](https://github.com/CAPTAIN-WHU/DOTA_devkit) 生成评估文件,评估文件格式请参考[DOTA Test](http://captain.whu.edu.cn/DOTAweb/tasks.html) 生成zip文件每个类一个txt文件txt文件中每行格式为`image_id score x1 y1 x2 y2 x3 y3 x4 y4`,提交服务器进行评估。
## 预测部署
Paddle中`multiclass_nms`算子的输入支持四边形输入因此部署时可以不不需要依赖旋转框IOU计算算子。
```bash
# 预测
CUDA_VISIBLE_DEVICES=0 python tools/infer.py -c configs/dota/s2anet_1x_dota.yml -o weights=model.pdparams --infer_img=demo/P0072__1.0__0___0.png
```
## Citations
```
@article{han2021align,
author={J. {Han} and J. {Ding} and J. {Li} and G. -S. {Xia}},
journal={IEEE Transactions on Geoscience and Remote Sensing},
title={Align Deep Features for Oriented Object Detection},
year={2021},
pages={1-11},
doi={10.1109/TGRS.2021.3062048}}
@inproceedings{xia2018dota,
title={DOTA: A large-scale dataset for object detection in aerial images},
author={Xia, Gui-Song and Bai, Xiang and Ding, Jian and Zhu, Zhen and Belongie, Serge and Luo, Jiebo and Datcu, Mihai and Pelillo, Marcello and Zhang, Liangpei},
booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
pages={3974--3983},
year={2018}
}
```

View File

@ -0,0 +1,55 @@
architecture: S2ANet
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
weights: output/s2anet_r50_fpn_1x_dota/model_final.pdparams
# Model Achitecture
S2ANet:
backbone: ResNet
neck: FPN
s2anet_head: S2ANetHead
s2anet_bbox_post_process: S2ANetBBoxPostProcess
ResNet:
depth: 50
norm_type: bn
return_idx: [1,2,3]
num_stages: 4
FPN:
in_channels: [256, 512, 1024]
out_channel: 256
spatial_scales: [0.25, 0.125, 0.0625]
has_extra_convs: True
extra_stage: 2
relu_before_extra_convs: False
S2ANetHead:
anchor_strides: [8, 16, 32, 64, 128]
anchor_scales: [4]
anchor_ratios: [1.0]
anchor_assign: RBoxAssigner
stacked_convs: 2
feat_in: 256
feat_out: 256
num_classes: 15
align_conv_type: 'Conv' # AlignConv Conv
align_conv_size: 3
use_sigmoid_cls: True
RBoxAssigner:
pos_iou_thr: 0.5
neg_iou_thr: 0.4
min_iou_thr: 0.0
ignore_iof_thr: -2
S2ANetBBoxPostProcess:
nms_pre: 2000
min_bbox_size: 0.0
nms:
name: MultiClassNMS
keep_top_k: -1
score_threshold: 0.05
nms_threshold: 0.1
normalized: False
#background_label: -1

View File

@ -0,0 +1,20 @@
epoch: 12
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [7, 10]
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
clip_grad_by_norm: 35

View File

@ -0,0 +1,42 @@
worker_num: 0
TrainReader:
sample_transforms:
- Decode: {}
- Rbox2Poly: {}
# Resize can process rbox
- Resize: {target_size: [1024, 1024], interp: 2, keep_ratio: False}
- RandomFlip: {prob: 0.5}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- RboxPadBatch: {pad_to_stride: 32, pad_gt: true}
batch_size: 1
shuffle: true
drop_last: true
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [1024, 1024], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- RboxPadBatch: {pad_to_stride: 32, pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
drop_empty: false
TestReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [1024, 1024], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- RboxPadBatch: {pad_to_stride: 32, pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false

View File

@ -0,0 +1,8 @@
_BASE_: [
'../datasets/dota.yml',
'../runtime.yml',
'_base_/s2anet_optimizer_1x.yml',
'_base_/s2anet.yml',
'_base_/s2anet_reader.yml',
]
weights: output/s2anet_1x_dota/model_final

View File

@ -0,0 +1,21 @@
_BASE_: [
'../datasets/dota.yml',
'../runtime.yml',
'_base_/s2anet_optimizer_1x.yml',
'_base_/s2anet.yml',
'_base_/s2anet_reader.yml',
]
weights: output/s2anet_1x_dota/model_final
S2ANetHead:
anchor_strides: [8, 16, 32, 64, 128]
anchor_scales: [4]
anchor_ratios: [1.0]
anchor_assign: RBoxAssigner
stacked_convs: 2
feat_in: 256
feat_out: 256
num_classes: 15
align_conv_type: 'Conv' # AlignConv Conv
align_conv_size: 3
use_sigmoid_cls: True

View File

@ -0,0 +1,106 @@
# 人脸检测模型
## 简介
`face_detection`中提供高效、高速的人脸检测解决方案,包括最先进的模型和经典模型。
![](../../docs/images/12_Group_Group_12_Group_Group_12_935.jpg)
## 模型库
#### WIDER-FACE数据集上的mAP
| 网络结构 | 输入尺寸 | 图片个数/GPU | 学习率策略 | Easy/Medium/Hard Set | 预测时延SD855| 模型大小(MB) | 下载 | 配置文件 |
|:------------:|:--------:|:----:|:-------:|:-------:|:---------:|:----------:|:---------:|:--------:|
| BlazeFace | 640 | 8 | 1000e | 0.885 / 0.855 / 0.731 | - | 0.472 |[下载链接](https://paddledet.bj.bcebos.com/models/blazeface_1000e.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/face_detection/blazeface_1000e.yml) |
**注意:**
- 我们使用多尺度评估策略得到`Easy/Medium/Hard Set`里的mAP。具体细节请参考[在WIDER-FACE数据集上评估](#在WIDER-FACE数据集上评估)。
## 快速开始
### 数据准备
我们使用[WIDER-FACE数据集](http://shuoyang1213.me/WIDERFACE/)进行训练和模型测试,官方网站提供了详细的数据介绍。
- WIDER-Face数据源:
使用如下目录结构加载`wider_face`类型的数据集:
```
dataset/wider_face/
├── wider_face_split
│ ├── wider_face_train_bbx_gt.txt
│ ├── wider_face_val_bbx_gt.txt
├── WIDER_train
│ ├── images
│ │ ├── 0--Parade
│ │ │ ├── 0_Parade_marchingband_1_100.jpg
│ │ │ ├── 0_Parade_marchingband_1_381.jpg
│ │ │ │ ...
│ │ ├── 10--People_Marching
│ │ │ ...
├── WIDER_val
│ ├── images
│ │ ├── 0--Parade
│ │ │ ├── 0_Parade_marchingband_1_1004.jpg
│ │ │ ├── 0_Parade_marchingband_1_1045.jpg
│ │ │ │ ...
│ │ ├── 10--People_Marching
│ │ │ ...
```
- 手动下载数据集:
要下载WIDER-FACE数据集请运行以下命令
```
cd dataset/wider_face && ./download_wider_face.sh
```
### 训练与评估
训练流程与评估流程方法与其他算法一致,请参考[GETTING_STARTED_cn.md](../../docs/tutorials/GETTING_STARTED_cn.md)。
**注意:** 人脸检测模型目前不支持边训练边评估。
#### 在WIDER-FACE数据集上评估
- 步骤一:评估并生成结果文件:
```shell
python -u tools/eval.py -c configs/face_detection/blazeface_1000e.yml \
-o weights=output/blazeface_1000e/model_final \
multi_scale=True
```
设置`multi_scale=True`进行多尺度评估,评估完成后,将在`output/pred`中生成txt格式的测试结果。
- 步骤二下载官方评估脚本和Ground Truth文件
```
wget http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/support/eval_script/eval_tools.zip
unzip eval_tools.zip && rm -f eval_tools.zip
```
- 步骤三:开始评估
方法一python评估
```
git clone https://github.com/wondervictor/WiderFace-Evaluation.git
cd WiderFace-Evaluation
# 编译
python3 setup.py build_ext --inplace
# 开始评估
python3 evaluation.py -p /path/to/PaddleDetection/output/pred -g /path/to/eval_tools/ground_truth
```
方法二MatLab评估
```
# 在`eval_tools/wider_eval.m`中修改保存结果路径和绘制曲线的名称:
pred_dir = './pred';
legend_name = 'Paddle-BlazeFace';
`wider_eval.m` 是评估模块的主要执行程序。运行命令如下:
matlab -nodesktop -nosplash -nojvm -r "run wider_eval.m;quit;"
```
## Citations
```
@article{bazarevsky2019blazeface,
title={BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs},
author={Valentin Bazarevsky and Yury Kartynnik and Andrey Vakunov and Karthik Raveendran and Matthias Grundmann},
year={2019},
eprint={1907.05047},
archivePrefix={arXiv},
```

View File

@ -0,0 +1,39 @@
architecture: SSD
SSD:
backbone: BlazeNet
ssd_head: FaceHead
post_process: BBoxPostProcess
BlazeNet:
blaze_filters: [[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]]
double_blaze_filters: [[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
[96, 24, 96, 2], [96, 24, 96], [96, 24, 96]]
FaceHead:
in_channels: [96, 96]
anchor_generator: AnchorGeneratorSSD
loss: SSDLoss
SSDLoss:
overlap_threshold: 0.35
AnchorGeneratorSSD:
steps: [8., 16.]
aspect_ratios: [[1.], [1.]]
min_sizes: [[16.,24.], [32., 48., 64., 80., 96., 128.]]
max_sizes: [[], []]
offset: 0.5
flip: False
min_max_aspect_ratios_order: false
BBoxPostProcess:
decode:
name: SSDBox
nms:
name: MultiClassNMS
keep_top_k: 750
score_threshold: 0.01
nms_threshold: 0.3
nms_top_k: 5000
nms_eta: 1.0

View File

@ -0,0 +1,45 @@
worker_num: 2
TrainReader:
inputs_def:
num_max_boxes: 90
sample_transforms:
- Decode: {}
- RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False}
- RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
- RandomFlip: {}
- CropWithDataAchorSampling: {
anchor_sampler: [[1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0]],
batch_sampler: [
[1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
],
target_size: 640}
- Resize: {target_size: [640, 640], keep_ratio: False, interp: 1}
- NormalizeBox: {}
- PadBox: {num_max_boxes: 90}
batch_transforms:
- NormalizeImage: {mean: [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false}
- Permute: {}
batch_size: 8
shuffle: true
drop_last: true
EvalReader:
sample_transforms:
- Decode: {}
- NormalizeImage: {mean: [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false}
- Permute: {}
batch_size: 1
drop_empty: false
TestReader:
sample_transforms:
- Decode: {}
- NormalizeImage: {mean: [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false}
- Permute: {}
batch_size: 1

View File

@ -0,0 +1,21 @@
epoch: 1000
LearningRate:
base_lr: 0.001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones:
- 333
- 800
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.0
type: RMSProp
regularizer:
factor: 0.0005
type: L2

View File

@ -0,0 +1,9 @@
_BASE_: [
'../datasets/wider_face.yml',
'../runtime.yml',
'_base_/optimizer_1000e.yml',
'_base_/blazeface.yml',
'_base_/face_reader.yml',
]
weights: output/blazeface_1000e/model_final
multi_scale_eval: True

View File

@ -0,0 +1,35 @@
# Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks
## Model Zoo
| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | 下载 | 配置文件 |
| :------------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: |
| ResNet50 | Faster | 1 | 1x | ---- | 36.7 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r50_1x_coco.yml) |
| ResNet50-vd | Faster | 1 | 1x | ---- | 37.6 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r50_vd_1x_coco.yml) |
| ResNet101 | Faster | 1 | 1x | ---- | 39.0 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r101_1x_coco.yml) |
| ResNet34-FPN | Faster | 1 | 1x | ---- | 37.8 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r34_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r34_fpn_1x_coco.yml) |
| ResNet34-vd-FPN | Faster | 1 | 1x | ---- | 38.5 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r34_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r34_vd_fpn_1x_coco.yml) |
| ResNet50-FPN | Faster | 1 | 1x | ---- | 38.4 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.yml) |
| ResNet50-FPN | Faster | 1 | 2x | ---- | 40.0 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r50_fpn_2x_coco.yml) |
| ResNet50-vd-FPN | Faster | 1 | 1x | ---- | 39.5 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r50_vd_fpn_1x_coco.yml) |
| ResNet50-vd-FPN | Faster | 1 | 2x | ---- | 40.8 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r50_vd_fpn_2x_coco.yml) |
| ResNet101-FPN | Faster | 1 | 2x | ---- | 41.4 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r101_fpn_2x_coco.yml) |
| ResNet101-vd-FPN | Faster | 1 | 1x | ---- | 42.0 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r101_vd_fpn_1x_coco.yml) |
| ResNet101-vd-FPN | Faster | 1 | 2x | ---- | 43.0 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_vd_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r101_vd_fpn_2x_coco.yml) |
| ResNeXt101-vd-FPN | Faster | 1 | 1x | ---- | 43.4 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_x101_vd_64x4d_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_x101_vd_64x4d_fpn_1x_coco.yml) |
| ResNeXt101-vd-FPN | Faster | 1 | 2x | ---- | 44.0 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_x101_vd_64x4d_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_x101_vd_64x4d_fpn_2x_coco.yml) |
| ResNet50-vd-SSLDv2-FPN | Faster | 1 | 1x | ---- | 41.4 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_ssld_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r50_vd_fpn_ssld_1x_coco.yml) |
| ResNet50-vd-SSLDv2-FPN | Faster | 1 | 2x | ---- | 42.3 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_ssld_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/faster_rcnn/faster_rcnn_r50_vd_ssld_fpn_2x_coco.yml) |
## Citations
```
@article{Ren_2017,
title={Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks},
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
publisher={Institute of Electrical and Electronics Engineers (IEEE)},
author={Ren, Shaoqing and He, Kaiming and Girshick, Ross and Sun, Jian},
year={2017},
month={Jun},
}
```

View File

@ -0,0 +1,40 @@
worker_num: 2
TrainReader:
sample_transforms:
- Decode: {}
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlip: {prob: 0.5}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32, pad_gt: true}
batch_size: 1
shuffle: true
drop_last: true
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32, pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
drop_empty: false
TestReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32, pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false

View File

@ -0,0 +1,66 @@
architecture: FasterRCNN
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
FasterRCNN:
backbone: ResNet
rpn_head: RPNHead
bbox_head: BBoxHead
# post process
bbox_post_process: BBoxPostProcess
ResNet:
# index 0 stands for res2
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [2]
num_stages: 3
RPNHead:
anchor_generator:
aspect_ratios: [0.5, 1.0, 2.0]
anchor_sizes: [32, 64, 128, 256, 512]
strides: [16]
rpn_target_assign:
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
use_random: True
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 12000
post_nms_top_n: 2000
topk_after_collect: False
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 6000
post_nms_top_n: 1000
BBoxHead:
head: Res5Head
roi_extractor:
resolution: 14
sampling_ratio: 0
aligned: True
bbox_assigner: BBoxAssigner
with_pool: true
BBoxAssigner:
batch_size_per_im: 512
bg_thresh: 0.5
fg_thresh: 0.5
fg_fraction: 0.25
use_random: True
BBoxPostProcess:
decode: RCNNBox
nms:
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5

View File

@ -0,0 +1,73 @@
architecture: FasterRCNN
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
FasterRCNN:
backbone: ResNet
neck: FPN
rpn_head: RPNHead
bbox_head: BBoxHead
# post process
bbox_post_process: BBoxPostProcess
ResNet:
# index 0 stands for res2
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
FPN:
out_channel: 256
RPNHead:
anchor_generator:
aspect_ratios: [0.5, 1.0, 2.0]
anchor_sizes: [[32], [64], [128], [256], [512]]
strides: [4, 8, 16, 32, 64]
rpn_target_assign:
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
use_random: True
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 2000
post_nms_top_n: 1000
topk_after_collect: True
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
BBoxHead:
head: TwoFCHead
roi_extractor:
resolution: 7
sampling_ratio: 0
aligned: True
bbox_assigner: BBoxAssigner
BBoxAssigner:
batch_size_per_im: 512
bg_thresh: 0.5
fg_thresh: 0.5
fg_fraction: 0.25
use_random: True
TwoFCHead:
out_channel: 1024
BBoxPostProcess:
decode: RCNNBox
nms:
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5

View File

@ -0,0 +1,40 @@
worker_num: 2
TrainReader:
sample_transforms:
- Decode: {}
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlip: {prob: 0.5}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: -1, pad_gt: true}
batch_size: 1
shuffle: true
drop_last: true
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: -1, pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
drop_empty: false
TestReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: -1}
batch_size: 1
shuffle: false
drop_last: false

View File

@ -0,0 +1,19 @@
epoch: 12
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [8, 11]
- !LinearWarmup
start_factor: 0.1
steps: 1000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2

View File

@ -0,0 +1,14 @@
_BASE_: [
'faster_rcnn_r50_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
weights: output/faster_rcnn_r101_1x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 101
norm_type: bn
freeze_at: 0
return_idx: [2]
num_stages: 3

View File

@ -0,0 +1,14 @@
_BASE_: [
'faster_rcnn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
weights: output/faster_rcnn_r101_fpn_1x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 101
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4

View File

@ -0,0 +1,25 @@
_BASE_: [
'faster_rcnn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
weights: output/faster_rcnn_r101_fpn_2x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 101
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
epoch: 24
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [16, 22]
- !LinearWarmup
start_factor: 0.1
steps: 1000

View File

@ -0,0 +1,14 @@
_BASE_: [
'faster_rcnn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
weights: output/faster_rcnn_r101_vd_fpn_1x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 101
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4

View File

@ -0,0 +1,25 @@
_BASE_: [
'faster_rcnn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
weights: output/faster_rcnn_r101_vd_fpn_2x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 101
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
epoch: 24
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [16, 22]
- !LinearWarmup
start_factor: 0.1
steps: 1000

View File

@ -0,0 +1,14 @@
_BASE_: [
'faster_rcnn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet34_pretrained.pdparams
weights: output/faster_rcnn_r34_fpn_1x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 34
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4

View File

@ -0,0 +1,15 @@
_BASE_: [
'faster_rcnn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet34_vd_pretrained.pdparams
weights: output/faster_rcnn_r34_vd_fpn_1x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 34
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4

View File

@ -0,0 +1,8 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_1x.yml',
'_base_/faster_rcnn_r50.yml',
'_base_/faster_reader.yml',
]
weights: output/faster_rcnn_r50_1x_coco/model_final

View File

@ -0,0 +1,8 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_1x.yml',
'_base_/faster_rcnn_r50_fpn.yml',
'_base_/faster_fpn_reader.yml',
]
weights: output/faster_rcnn_r50_fpn_1x_coco/model_final

View File

@ -0,0 +1,15 @@
_BASE_: [
'faster_rcnn_r50_fpn_1x_coco.yml',
]
weights: output/faster_rcnn_r50_fpn_2x_coco/model_final
epoch: 24
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [16, 22]
- !LinearWarmup
start_factor: 0.1
steps: 1000

View File

@ -0,0 +1,14 @@
_BASE_: [
'faster_rcnn_r50_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
weights: output/faster_rcnn_r50_vd_1x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 50
variant: d
norm_type: bn
freeze_at: 0
return_idx: [2]
num_stages: 3

View File

@ -0,0 +1,14 @@
_BASE_: [
'faster_rcnn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
weights: output/faster_rcnn_r50_vd_fpn_1x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 50
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4

View File

@ -0,0 +1,25 @@
_BASE_: [
'faster_rcnn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
weights: output/faster_rcnn_r50_vd_fpn_2x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 50
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
epoch: 24
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [16, 22]
- !LinearWarmup
start_factor: 0.1
steps: 1000

View File

@ -0,0 +1,29 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_1x.yml',
'_base_/faster_rcnn_r50_fpn.yml',
'_base_/faster_fpn_reader.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
weights: output/faster_rcnn_r50_vd_fpn_ssld_1x_coco/model_final
ResNet:
depth: 50
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
lr_mult_list: [0.05, 0.05, 0.1, 0.15]
epoch: 12
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [8, 11]
- !LinearWarmup
start_factor: 0.1
steps: 1000

View File

@ -0,0 +1,29 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_1x.yml',
'_base_/faster_rcnn_r50_fpn.yml',
'_base_/faster_fpn_reader.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
weights: output/faster_rcnn_r50_vd_fpn_ssld_2x_coco/model_final
ResNet:
depth: 50
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
lr_mult_list: [0.05, 0.05, 0.1, 0.15]
epoch: 24
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [12, 22]
- !LinearWarmup
start_factor: 0.1
steps: 1000

View File

@ -0,0 +1,17 @@
_BASE_: [
'faster_rcnn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
weights: output/faster_rcnn_x101_vd_64x4d_fpn_1x_coco/model_final
ResNet:
# for ResNeXt: groups, base_width, base_channels
depth: 101
groups: 64
base_width: 4
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4

View File

@ -0,0 +1,28 @@
_BASE_: [
'faster_rcnn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
weights: output/faster_rcnn_x101_vd_64x4d_fpn_2x_coco/model_final
ResNet:
# for ResNeXt: groups, base_width, base_channels
depth: 101
groups: 64
base_width: 4
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
epoch: 24
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [16, 22]
- !LinearWarmup
start_factor: 0.1
steps: 1000

31
configs/fcos/README.md Normal file
View File

@ -0,0 +1,31 @@
# FCOS for Object Detection
## Introduction
FCOS (Fully Convolutional One-Stage Object Detection) is a fast anchor-free object detection framework with strong performance. We reproduced the model of the paper, and improved and optimized the accuracy of the FCOS.
**Highlights:**
- Training Time: The training time of the model of `fcos_r50_fpn_1x` on Tesla v100 with 8 GPU is only 8.5 hours.
## Model Zoo
| Backbone | Model | images/GPU | lr schedule |FPS | Box AP | download | config |
| :-------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: |
| ResNet50-FPN | FCOS | 2 | 1x | ---- | 39.6 | [download](https://paddledet.bj.bcebos.com/models/fcos_r50_fpn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/fcos/fcos_r50_fpn_1x_coco.yml) |
| ResNet50-FPN | FCOS+DCN | 2 | 1x | ---- | 44.3 | [download](https://paddledet.bj.bcebos.com/models/fcos_dcn_r50_fpn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/fcos/fcos_dcn_r50_fpn_1x_coco.yml) |
| ResNet50-FPN | FCOS+multiscale_train | 2 | 2x | ---- | 41.8 | [download](https://paddledet.bj.bcebos.com/models/fcos_r50_fpn_multiscale_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/fcos/fcos_r50_fpn_multiscale_2x_coco.yml) |
**Notes:**
- FCOS is trained on COCO train2017 dataset and evaluated on val2017 results of `mAP(IoU=0.5:0.95)`.
## Citations
```
@inproceedings{tian2019fcos,
title = {{FCOS}: Fully Convolutional One-Stage Object Detection},
author = {Tian, Zhi and Shen, Chunhua and Chen, Hao and He, Tong},
booktitle = {Proc. Int. Conf. Computer Vision (ICCV)},
year = {2019}
}
```

View File

@ -0,0 +1,55 @@
architecture: FCOS
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
FCOS:
backbone: ResNet
neck: FPN
fcos_head: FCOSHead
fcos_post_process: FCOSPostProcess
ResNet:
# index 0 stands for res2
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [1,2,3]
num_stages: 4
FPN:
out_channel: 256
spatial_scales: [0.125, 0.0625, 0.03125]
extra_stage: 2
has_extra_convs: true
use_c5: false
FCOSHead:
fcos_feat:
name: FCOSFeat
feat_in: 256
feat_out: 256
num_convs: 4
norm_type: "gn"
use_dcn: false
num_classes: 80
fpn_stride: [8, 16, 32, 64, 128]
prior_prob: 0.01
fcos_loss: FCOSLoss
norm_reg_targets: true
centerness_on_reg: true
FCOSLoss:
loss_alpha: 0.25
loss_gamma: 2.0
iou_loss_type: "giou"
reg_weights: 1.0
FCOSPostProcess:
decode:
name: FCOSBox
num_classes: 80
nms:
name: MultiClassNMS
nms_top_k: 1000
keep_top_k: 100
score_threshold: 0.025
nms_threshold: 0.6

View File

@ -0,0 +1,42 @@
worker_num: 2
TrainReader:
sample_transforms:
- Decode: {}
- RandomFlip: {prob: 0.5}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Resize: {target_size: [800, 1333], keep_ratio: true, interp: 1}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 128}
- Gt2FCOSTarget:
object_sizes_boundary: [64, 128, 256, 512]
center_sampling_radius: 1.5
downsample_ratios: [8, 16, 32, 64, 128]
norm_reg_targets: True
batch_size: 2
shuffle: true
drop_last: true
EvalReader:
sample_transforms:
- Decode: {}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 128}
batch_size: 1
shuffle: false
TestReader:
sample_transforms:
- Decode: {}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 128}
batch_size: 1
shuffle: false

View File

@ -0,0 +1,19 @@
epoch: 12
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [8, 11]
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2

View File

@ -0,0 +1,32 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/fcos_r50_fpn.yml',
'_base_/optimizer_1x.yml',
'_base_/fcos_reader.yml',
]
weights: output/fcos_dcn_r50_fpn_1x_coco/model_final
ResNet:
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [1,2,3]
num_stages: 4
dcn_v2_stages: [1,2,3]
FCOSHead:
fcos_feat:
name: FCOSFeat
feat_in: 256
feat_out: 256
num_convs: 4
norm_type: "gn"
use_dcn: true
num_classes: 80
fpn_stride: [8, 16, 32, 64, 128]
prior_prob: 0.01
fcos_loss: FCOSLoss
norm_reg_targets: true
centerness_on_reg: true

View File

@ -0,0 +1,9 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/fcos_r50_fpn.yml',
'_base_/optimizer_1x.yml',
'_base_/fcos_reader.yml',
]
weights: output/fcos_r50_fpn_1x_coco/model_final

View File

@ -0,0 +1,39 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/fcos_r50_fpn.yml',
'_base_/optimizer_1x.yml',
'_base_/fcos_reader.yml',
]
weights: output/fcos_r50_fpn_multiscale_2x_coco/model_final
TrainReader:
sample_transforms:
- Decode: {}
- RandomFlip: {prob: 0.5}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], keep_ratio: true, interp: 1}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 128}
- Gt2FCOSTarget:
object_sizes_boundary: [64, 128, 256, 512]
center_sampling_radius: 1.5
downsample_ratios: [8, 16, 32, 64, 128]
norm_reg_targets: True
batch_size: 2
shuffle: true
drop_last: true
epoch: 24
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [16, 22]
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500

23
configs/gn/README.md Normal file
View File

@ -0,0 +1,23 @@
# Group Normalization
## Model Zoo
| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps)| Box AP | Mask AP | 下载 | 配置文件 |
| :------------- | :------------- | :-----------: | :------: | :--------: |:-----: | :-----: | :----: | :----: |
| ResNet50-FPN | Faster | 1 | 2x | - | 41.9 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_fpn_gn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/gn/faster_rcnn_r50_fpn_gn_2x_coco.yml) |
| ResNet50-FPN | Mask | 1 | 2x | - | 42.3 | 38.4 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_fpn_gn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/gn/mask_rcnn_r50_fpn_gn_2x_coco.yml) |
| ResNet50-FPN | Cascade Faster | 1 | 2x | - | 44.6 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_r50_fpn_gn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/gn/cascade_rcnn_r50_fpn_gn_2x_coco.yml) |
| ResNet50-FPN | Cacade Mask | 1 | 2x | - | 45.0 | 39.3 | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_fpn_gn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/gn/cascade_mask_rcnn_r50_fpn_gn_2x_coco.yml) |
**注意:** Faster R-CNN baseline仅使用 `2fc` head而此处使用[`4conv1fc` head](https://arxiv.org/abs/1803.08494)4层conv之间使用GN并且FPN也使用GN而对于Mask R-CNN是在mask head的4层conv之间也使用GN。
## Citations
```
@inproceedings{wu2018group,
title={Group Normalization},
author={Wu, Yuxin and He, Kaiming},
booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
year={2018}
}
```

View File

@ -0,0 +1,61 @@
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'../cascade_rcnn/_base_/optimizer_1x.yml',
'../cascade_rcnn/_base_/cascade_mask_rcnn_r50_fpn.yml',
'../cascade_rcnn/_base_/cascade_mask_fpn_reader.yml',
]
weights: output/cascade_mask_rcnn_r50_fpn_gn_2x_coco/model_final
CascadeRCNN:
backbone: ResNet
neck: FPN
rpn_head: RPNHead
bbox_head: CascadeHead
mask_head: MaskHead
# post process
bbox_post_process: BBoxPostProcess
mask_post_process: MaskPostProcess
FPN:
out_channel: 256
norm_type: gn
CascadeHead:
head: CascadeXConvNormHead
roi_extractor:
resolution: 7
sampling_ratio: 0
aligned: True
bbox_assigner: BBoxAssigner
CascadeXConvNormHead:
num_convs: 4
out_channel: 1024
norm_type: gn
MaskHead:
head: MaskFeat
roi_extractor:
resolution: 14
sampling_ratio: 0
aligned: True
mask_assigner: MaskAssigner
share_bbox_feat: False
MaskFeat:
num_convs: 4
out_channel: 256
norm_type: gn
epoch: 24
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [16, 22]
- !LinearWarmup
start_factor: 0.1
steps: 1000

View File

@ -0,0 +1,37 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'../cascade_rcnn/_base_/optimizer_1x.yml',
'../cascade_rcnn/_base_/cascade_rcnn_r50_fpn.yml',
'../cascade_rcnn/_base_/cascade_fpn_reader.yml',
]
weights: output/cascade_rcnn_r50_fpn_gn_2x_coco/model_final
FPN:
out_channel: 256
norm_type: gn
CascadeHead:
head: CascadeXConvNormHead
roi_extractor:
resolution: 7
sampling_ratio: 0
aligned: True
bbox_assigner: BBoxAssigner
CascadeXConvNormHead:
num_convs: 4
out_channel: 1024
norm_type: gn
epoch: 24
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [16, 22]
- !LinearWarmup
start_factor: 0.1
steps: 1000

View File

@ -0,0 +1,45 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'../faster_rcnn/_base_/optimizer_1x.yml',
'../faster_rcnn/_base_/faster_rcnn_r50_fpn.yml',
'../faster_rcnn/_base_/faster_fpn_reader.yml',
]
weights: output/faster_rcnn_r50_fpn_gn_2x_coco/model_final
FasterRCNN:
backbone: ResNet
neck: FPN
rpn_head: RPNHead
bbox_head: BBoxHead
# post process
bbox_post_process: BBoxPostProcess
FPN:
out_channel: 256
norm_type: gn
BBoxHead:
head: XConvNormHead
roi_extractor:
resolution: 7
sampling_ratio: 0
aligned: True
bbox_assigner: BBoxAssigner
XConvNormHead:
num_convs: 4
out_channel: 1024
norm_type: gn
epoch: 24
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [16, 22]
- !LinearWarmup
start_factor: 0.1
steps: 1000

View File

@ -0,0 +1,61 @@
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'../mask_rcnn/_base_/optimizer_1x.yml',
'../mask_rcnn/_base_/mask_rcnn_r50_fpn.yml',
'../mask_rcnn/_base_/mask_fpn_reader.yml',
]
weights: output/mask_rcnn_r50_fpn_gn_2x_coco/model_final
MaskRCNN:
backbone: ResNet
neck: FPN
rpn_head: RPNHead
bbox_head: BBoxHead
mask_head: MaskHead
# post process
bbox_post_process: BBoxPostProcess
mask_post_process: MaskPostProcess
FPN:
out_channel: 256
norm_type: gn
BBoxHead:
head: XConvNormHead
roi_extractor:
resolution: 7
sampling_ratio: 0
aligned: True
bbox_assigner: BBoxAssigner
XConvNormHead:
num_convs: 4
out_channel: 1024
norm_type: gn
MaskHead:
head: MaskFeat
roi_extractor:
resolution: 14
sampling_ratio: 0
aligned: True
mask_assigner: MaskAssigner
share_bbox_feat: False
MaskFeat:
num_convs: 4
out_channel: 256
norm_type: gn
epoch: 24
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [16, 22]
- !LinearWarmup
start_factor: 0.1
steps: 1000

34
configs/hrnet/README.md Normal file
View File

@ -0,0 +1,34 @@
# High-resolution networks (HRNets) for object detection
## Introduction
- Deep High-Resolution Representation Learning for Human Pose Estimation: [https://arxiv.org/abs/1902.09212](https://arxiv.org/abs/1902.09212)
```
@inproceedings{SunXLW19,
title={Deep High-Resolution Representation Learning for Human Pose Estimation},
author={Ke Sun and Bin Xiao and Dong Liu and Jingdong Wang},
booktitle={CVPR},
year={2019}
}
```
- High-Resolution Representations for Labeling Pixels and Regions: [https://arxiv.org/abs/1904.04514](https://arxiv.org/abs/1904.04514)
```
@article{SunZJCXLMWLW19,
title={High-Resolution Representations for Labeling Pixels and Regions},
author={Ke Sun and Yang Zhao and Borui Jiang and Tianheng Cheng and Bin Xiao
and Dong Liu and Yadong Mu and Xinggang Wang and Wenyu Liu and Jingdong Wang},
journal = {CoRR},
volume = {abs/1904.04514},
year={2019}
}
```
## Model Zoo
| Backbone | Type | Image/gpu | Lr schd | Inf time (fps) | Box AP | Mask AP | Download | Configs |
| :---------------------- | :------------- | :-------: | :-----: | :------------: | :----: | :-----: | :----------------------------------------------------------: | :-----: |
| HRNetV2p_W18 | Faster | 1 | 1x | - | 36.8 | - | [model](https://paddledet.bj.bcebos.com/models/faster_rcnn_hrnetv2p_w18_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.yml) |
| HRNetV2p_W18 | Faster | 1 | 2x | - | 39.0 | - | [model](https://paddledet.bj.bcebos.com/models/faster_rcnn_hrnetv2p_w18_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco.yml) |

View File

@ -0,0 +1,68 @@
architecture: FasterRCNN
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/HRNet_W18_C_pretrained.pdparams
FasterRCNN:
backbone: HRNet
neck: HRFPN
rpn_head: RPNHead
bbox_head: BBoxHead
# post process
bbox_post_process: BBoxPostProcess
HRNet:
width: 18
freeze_at: 0
return_idx: [0, 1, 2, 3]
HRFPN:
out_channel: 256
share_conv: false
RPNHead:
anchor_generator:
aspect_ratios: [0.5, 1.0, 2.0]
anchor_sizes: [[32], [64], [128], [256], [512]]
strides: [4, 8, 16, 32, 64]
rpn_target_assign:
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
use_random: True
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 2000
post_nms_top_n: 2000
topk_after_collect: True
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
BBoxHead:
head: TwoFCHead
roi_extractor:
resolution: 7
sampling_ratio: 0
aligned: True
bbox_assigner: BBoxAssigner
BBoxAssigner:
batch_size_per_im: 512
bg_thresh: 0.5
fg_thresh: 0.5
fg_fraction: 0.25
use_random: True
TwoFCHead:
out_channel: 1024
BBoxPostProcess:
decode: RCNNBox
nms:
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5

View File

@ -0,0 +1,23 @@
_BASE_: [
'../datasets/coco_detection.yml',
'./_base_/faster_rcnn_hrnetv2p_w18.yml',
'../faster_rcnn/_base_/optimizer_1x.yml',
'../faster_rcnn/_base_/faster_fpn_reader.yml',
'../runtime.yml',
]
weights: output/faster_rcnn_hrnetv2p_w18_1x_coco/model_final
epoch: 12
LearningRate:
base_lr: 0.02
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [8, 11]
- !LinearWarmup
start_factor: 0.1
steps: 1000
TrainReader:
batch_size: 2

View File

@ -0,0 +1,23 @@
_BASE_: [
'../datasets/coco_detection.yml',
'./_base_/faster_rcnn_hrnetv2p_w18.yml',
'../faster_rcnn/_base_/optimizer_1x.yml',
'../faster_rcnn/_base_/faster_fpn_reader.yml',
'../runtime.yml',
]
weights: output/faster_rcnn_hrnetv2p_w18_2x_coco/model_final
epoch: 24
LearningRate:
base_lr: 0.02
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [16, 22]
- !LinearWarmup
start_factor: 0.1
steps: 1000
TrainReader:
batch_size: 2

View File

@ -0,0 +1,31 @@
# Mask R-CNN
## Model Zoo
| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | Mask AP | 下载 | 配置文件 |
| :------------------- | :------------| :-----: | :-----: | :------------: | :-----: | :-----: | :-----------------------------------------------------: | :-----: |
| ResNet50 | Mask | 1 | 1x | ---- | 37.4 | 32.8 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/mask_rcnn/mask_rcnn_r50_1x_coco.yml) |
| ResNet50 | Mask | 1 | 2x | ---- | 39.7 | 34.5 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/mask_rcnn/mask_rcnn_r50_2x_coco.yml) |
| ResNet50-FPN | Mask | 1 | 1x | ---- | 39.2 | 35.6 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.yml) |
| ResNet50-FPN | Mask | 1 | 2x | ---- | 40.5 | 36.7 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/mask_rcnn/mask_rcnn_r50_fpn_2x_coco.yml) |
| ResNet50-vd-FPN | Mask | 1 | 1x | ---- | 40.3 | 36.4 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/mask_rcnn/mask_rcnn_r50_vd_fpn_1x_coco.yml) |
| ResNet50-vd-FPN | Mask | 1 | 2x | ---- | 41.4 | 37.5 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_vd_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/mask_rcnn/mask_rcnn_r50_vd_fpn_2x_coco.yml) |
| ResNet101-FPN | Mask | 1 | 1x | ---- | 40.6 | 36.6 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_r101_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.yml) |
| ResNet101-vd-FPN | Mask | 1 | 1x | ---- | 42.4 | 38.1 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_r101_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/mask_rcnn/mask_rcnn_r101_vd_fpn_1x_coco.yml) |
| ResNeXt101-vd-FPN | Mask | 1 | 1x | ---- | 44.0 | 39.5 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_x101_vd_64x4d_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/mask_rcnn/mask_rcnn_x101_vd_64x4d_fpn_1x_coco.yml) |
| ResNeXt101-vd-FPN | Mask | 1 | 2x | ---- | 44.6 | 39.8 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_x101_vd_64x4d_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/mask_rcnn/mask_rcnn_x101_vd_64x4d_fpn_2x_coco.yml) |
| ResNet50-vd-SSLDv2-FPN | Mask | 1 | 1x | ---- | 42.0 | 38.2 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_vd_fpn_ssld_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/mask_rcnn/mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml) |
| ResNet50-vd-SSLDv2-FPN | Mask | 1 | 2x | ---- | 42.7 | 38.9 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_vd_fpn_ssld_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/mask_rcnn/mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml) |
## Citations
```
@article{He_2017,
title={Mask R-CNN},
journal={2017 IEEE International Conference on Computer Vision (ICCV)},
publisher={IEEE},
author={He, Kaiming and Gkioxari, Georgia and Dollar, Piotr and Girshick, Ross},
year={2017},
month={Oct}
}
```

View File

@ -0,0 +1,39 @@
worker_num: 2
TrainReader:
sample_transforms:
- Decode: {}
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlip: {prob: 0.5}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32, pad_gt: true}
batch_size: 1
shuffle: true
drop_last: true
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32, pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
drop_empty: false
TestReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32, pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false

View File

@ -0,0 +1,87 @@
architecture: MaskRCNN
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
MaskRCNN:
backbone: ResNet
rpn_head: RPNHead
bbox_head: BBoxHead
mask_head: MaskHead
# post process
bbox_post_process: BBoxPostProcess
mask_post_process: MaskPostProcess
ResNet:
# index 0 stands for res2
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [2]
num_stages: 3
RPNHead:
anchor_generator:
aspect_ratios: [0.5, 1.0, 2.0]
anchor_sizes: [32, 64, 128, 256, 512]
strides: [16]
rpn_target_assign:
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
use_random: True
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 12000
post_nms_top_n: 2000
topk_after_collect: False
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 6000
post_nms_top_n: 1000
BBoxHead:
head: Res5Head
roi_extractor:
resolution: 14
sampling_ratio: 0
aligned: True
bbox_assigner: BBoxAssigner
with_pool: true
BBoxAssigner:
batch_size_per_im: 512
bg_thresh: 0.5
fg_thresh: 0.5
fg_fraction: 0.25
use_random: True
BBoxPostProcess:
decode: RCNNBox
nms:
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5
MaskHead:
head: MaskFeat
roi_extractor:
resolution: 14
sampling_ratio: 0
aligned: True
mask_assigner: MaskAssigner
share_bbox_feat: true
MaskFeat:
num_convs: 0
out_channel: 256
MaskAssigner:
mask_resolution: 14
MaskPostProcess:
binary_thresh: 0.5

View File

@ -0,0 +1,91 @@
architecture: MaskRCNN
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
MaskRCNN:
backbone: ResNet
neck: FPN
rpn_head: RPNHead
bbox_head: BBoxHead
mask_head: MaskHead
# post process
bbox_post_process: BBoxPostProcess
mask_post_process: MaskPostProcess
ResNet:
# index 0 stands for res2
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
FPN:
out_channel: 256
RPNHead:
anchor_generator:
aspect_ratios: [0.5, 1.0, 2.0]
anchor_sizes: [[32], [64], [128], [256], [512]]
strides: [4, 8, 16, 32, 64]
rpn_target_assign:
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
use_random: True
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 2000
post_nms_top_n: 1000
topk_after_collect: True
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
BBoxHead:
head: TwoFCHead
roi_extractor:
resolution: 7
sampling_ratio: 0
aligned: True
bbox_assigner: BBoxAssigner
BBoxAssigner:
batch_size_per_im: 512
bg_thresh: 0.5
fg_thresh: 0.5
fg_fraction: 0.25
use_random: True
TwoFCHead:
out_channel: 1024
BBoxPostProcess:
decode: RCNNBox
nms:
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5
MaskHead:
head: MaskFeat
roi_extractor:
resolution: 14
sampling_ratio: 0
aligned: True
mask_assigner: MaskAssigner
share_bbox_feat: False
MaskFeat:
num_convs: 4
out_channel: 256
MaskAssigner:
mask_resolution: 28
MaskPostProcess:
binary_thresh: 0.5

View File

@ -0,0 +1,41 @@
worker_num: 2
TrainReader:
sample_transforms:
- Decode: {}
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlip: {prob: 0.5}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: -1, pad_gt: true}
batch_size: 1
shuffle: true
drop_last: true
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: -1}
batch_size: 1
shuffle: false
drop_last: false
drop_empty: false
TestReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: -1}
batch_size: 1
shuffle: false
drop_last: false
drop_empty: false

View File

@ -0,0 +1,19 @@
epoch: 12
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [8, 11]
- !LinearWarmup
start_factor: 0.001
steps: 1000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2

View File

@ -0,0 +1,13 @@
_BASE_: [
'mask_rcnn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
weights: output/mask_rcnn_r101_fpn_1x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 101
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4

View File

@ -0,0 +1,14 @@
_BASE_: [
'mask_rcnn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
weights: output/mask_rcnn_r101_vd_fpn_1x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 101
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4

View File

@ -0,0 +1,8 @@
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'_base_/optimizer_1x.yml',
'_base_/mask_rcnn_r50.yml',
'_base_/mask_reader.yml',
]
weights: output/mask_rcnn_r50_1x_coco/model_final

View File

@ -0,0 +1,15 @@
_BASE_: [
'mask_rcnn_r50_1x_coco.yml',
]
weights: output/mask_rcnn_r50_2x_coco/model_final
epoch: 24
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [16, 22]
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500

View File

@ -0,0 +1,8 @@
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'_base_/optimizer_1x.yml',
'_base_/mask_rcnn_r50_fpn.yml',
'_base_/mask_fpn_reader.yml',
]
weights: output/mask_rcnn_r50_fpn_1x_coco/model_final

View File

@ -0,0 +1,15 @@
_BASE_: [
'mask_rcnn_r50_fpn_1x_coco.yml',
]
weights: output/mask_rcnn_r50_fpn_2x_coco/model_final
epoch: 24
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [16, 22]
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500

View File

@ -0,0 +1,15 @@
_BASE_: [
'mask_rcnn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
weights: output/mask_rcnn_r50_vd_fpn_1x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 50
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4

View File

@ -0,0 +1,26 @@
_BASE_: [
'mask_rcnn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
weights: output/mask_rcnn_r50_vd_fpn_2x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 50
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
epoch: 24
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [16, 22]
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500

View File

@ -0,0 +1,29 @@
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'_base_/optimizer_1x.yml',
'_base_/mask_rcnn_r50_fpn.yml',
'_base_/mask_fpn_reader.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
weights: output/mask_rcnn_r50_vd_fpn_ssld_1x_coco/model_final
ResNet:
depth: 50
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
lr_mult_list: [0.05, 0.05, 0.1, 0.15]
epoch: 12
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [8, 11]
- !LinearWarmup
start_factor: 0.1
steps: 1000

View File

@ -0,0 +1,29 @@
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'_base_/optimizer_1x.yml',
'_base_/mask_rcnn_r50_fpn.yml',
'_base_/mask_fpn_reader.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
weights: output/mask_rcnn_r50_vd_fpn_ssld_2x_coco/model_final
ResNet:
depth: 50
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
lr_mult_list: [0.05, 0.05, 0.1, 0.15]
epoch: 24
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [12, 22]
- !LinearWarmup
start_factor: 0.1
steps: 1000

View File

@ -0,0 +1,28 @@
_BASE_: [
'mask_rcnn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
weights: output/mask_rcnn_x101_vd_64x4d_fpn_1x_coco/model_final
ResNet:
# for ResNeXt: groups, base_width, base_channels
depth: 101
variant: d
groups: 64
base_width: 4
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
epoch: 12
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [8, 11]
- !LinearWarmup
start_factor: 0.1
steps: 1000

Some files were not shown because too many files have changed in this diff Show More