179 lines
5.2 KiB
Python
179 lines
5.2 KiB
Python
_base_ = [
|
||
'../mmdetection/configs/_base_/schedules/schedule_1x.py',
|
||
'../mmdetection/configs/_base_/default_runtime.py'
|
||
]
|
||
|
||
|
||
img_scale = (640, 640)
|
||
|
||
# model settings
|
||
model = dict(
|
||
type='YOLOX',
|
||
input_size=img_scale,
|
||
random_size_range=(15, 25),
|
||
random_size_interval=10,
|
||
backbone=dict(
|
||
type='CSPDarknet',
|
||
act_cfg=dict(type='LeakyReLU', negative_slope=0.1),
|
||
deepen_factor=0.33,
|
||
widen_factor=0.5),
|
||
neck=dict(
|
||
type='YOLOXPAFPN',
|
||
in_channels=[128, 256, 512],
|
||
out_channels=128,
|
||
act_cfg=dict(type='LeakyReLU', negative_slope=0.1),
|
||
num_csp_blocks=1),
|
||
bbox_head=dict(
|
||
type='YOLOXHead',
|
||
num_classes=80,
|
||
act_cfg=dict(type='LeakyReLU', negative_slope=0.1),
|
||
in_channels=128,
|
||
feat_channels=128),
|
||
train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)),
|
||
# In order to align the source code, the threshold of the val phase is
|
||
# 0.01, and the threshold of the test phase is 0.001.
|
||
test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65)))
|
||
|
||
# dataset settings
|
||
data_root = 'data/coco/'
|
||
dataset_type = 'CocoDataset'
|
||
img_norm_cfg = dict(
|
||
mean=[128.0, 128.0, 128.0], std=[256.0, 256.0, 256.0], to_rgb=True)
|
||
|
||
train_pipeline = [
|
||
dict(type='Mosaic', img_scale=img_scale, pad_val=114.0),
|
||
dict(
|
||
type='RandomAffine',
|
||
scaling_ratio_range=(0.1, 2),
|
||
border=(-img_scale[0] // 2, -img_scale[1] // 2)),
|
||
dict(
|
||
type='MixUp',
|
||
img_scale=img_scale,
|
||
ratio_range=(0.8, 1.6),
|
||
pad_val=114.0),
|
||
dict(type='YOLOXHSVRandomAug'),
|
||
dict(type='RandomFlip', flip_ratio=0.5),
|
||
# According to the official implementation, multi-scale
|
||
# training is not considered here but in the
|
||
# 'mmdet/models/detectors/yolox.py'.
|
||
dict(type='Resize', img_scale=img_scale, keep_ratio=True),
|
||
dict(
|
||
type='Pad',
|
||
pad_to_square=True,
|
||
# If the image is three-channel, the pad value needs
|
||
# to be set separately for each channel.
|
||
pad_val=dict(img=(114.0, 114.0, 114.0))),
|
||
dict(type='Normalize', **img_norm_cfg),
|
||
dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False),
|
||
dict(type='DefaultFormatBundle'),
|
||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
|
||
]
|
||
|
||
train_dataset = dict(
|
||
type='MultiImageMixDataset',
|
||
dataset=dict(
|
||
type=dataset_type,
|
||
ann_file=data_root + 'annotations/instances_train2017.json',
|
||
img_prefix=data_root + 'train2017/',
|
||
pipeline=[
|
||
dict(type='LoadImageFromFile'),
|
||
dict(type='LoadAnnotations', with_bbox=True)
|
||
],
|
||
filter_empty_gt=False,
|
||
),
|
||
pipeline=train_pipeline)
|
||
|
||
test_pipeline = [
|
||
dict(type='LoadImageFromFile'),
|
||
dict(
|
||
type='MultiScaleFlipAug',
|
||
img_scale=img_scale,
|
||
flip=False,
|
||
transforms=[
|
||
dict(type='Resize', keep_ratio=True),
|
||
dict(type='RandomFlip'),
|
||
dict(
|
||
type='Pad',
|
||
pad_to_square=True,
|
||
pad_val=dict(img=(114.0, 114.0, 114.0))),
|
||
dict(type='Normalize', **img_norm_cfg),
|
||
dict(type='DefaultFormatBundle'),
|
||
dict(type='Collect', keys=['img'])
|
||
])
|
||
]
|
||
|
||
data = dict(
|
||
samples_per_gpu=32,
|
||
workers_per_gpu=3,
|
||
persistent_workers=True,
|
||
train=train_dataset,
|
||
val=dict(
|
||
type=dataset_type,
|
||
ann_file=data_root + 'annotations/instances_val2017.json',
|
||
img_prefix=data_root + 'val2017/',
|
||
pipeline=test_pipeline),
|
||
test=dict(
|
||
type=dataset_type,
|
||
ann_file=data_root + 'annotations/instances_val2017.json',
|
||
img_prefix=data_root + 'val2017/',
|
||
pipeline=test_pipeline))
|
||
|
||
# optimizer
|
||
# default 8 gpu
|
||
optimizer = dict(
|
||
type='SGD',
|
||
lr=0.01,
|
||
momentum=0.9,
|
||
weight_decay=5e-4,
|
||
nesterov=True,
|
||
paramwise_cfg=dict(norm_decay_mult=0., bias_decay_mult=0.))
|
||
# optimizer_config = dict(grad_clip=None)
|
||
optimizer_config = dict(
|
||
_delete_=True, grad_clip=dict(max_norm=35, norm_type=2))
|
||
max_epochs = 2
|
||
num_last_epochs = 1
|
||
resume_from = None
|
||
interval = 1
|
||
|
||
# learning policy
|
||
lr_config = dict(
|
||
_delete_=True,
|
||
policy='YOLOX',
|
||
warmup='exp',
|
||
by_epoch=False,
|
||
warmup_by_epoch=True,
|
||
warmup_ratio=1,
|
||
warmup_iters=5, # 5 epoch
|
||
num_last_epochs=num_last_epochs,
|
||
min_lr_ratio=0.05)
|
||
|
||
runner = dict(type='EpochBasedRunner', max_epochs=max_epochs)
|
||
|
||
custom_hooks = [
|
||
dict(
|
||
type='YOLOXModeSwitchHook',
|
||
num_last_epochs=num_last_epochs,
|
||
priority=48),
|
||
dict(
|
||
type='SyncNormHook',
|
||
num_last_epochs=num_last_epochs,
|
||
interval=interval,
|
||
priority=48),
|
||
dict(
|
||
type='ExpMomentumEMAHook',
|
||
resume_from=resume_from,
|
||
momentum=0.0001,
|
||
priority=49)
|
||
]
|
||
checkpoint_config = dict(interval=interval)
|
||
evaluation = dict(
|
||
save_best='auto',
|
||
# The evaluation interval is 'interval' when running epoch is
|
||
# less than ‘max_epochs - num_last_epochs’.
|
||
# The evaluation interval is 1 when running epoch is greater than
|
||
# or equal to ‘max_epochs - num_last_epochs’.
|
||
interval=interval,
|
||
dynamic_intervals=[(max_epochs - num_last_epochs, 1)],
|
||
metric='bbox')
|
||
log_config = dict(interval=50)
|