355 lines
11 KiB
Python
355 lines
11 KiB
Python
# Copyright (c) OpenMMLab. All rights reserved.
|
|
import logging
|
|
import shutil
|
|
import sys
|
|
import tempfile
|
|
from unittest.mock import MagicMock, call
|
|
|
|
import numpy as np
|
|
import pytest
|
|
import torch
|
|
import torch.nn as nn
|
|
from mmcv.runner import (CheckpointHook, IterTimerHook, PaviLoggerHook,
|
|
build_runner)
|
|
from torch.nn.init import constant_
|
|
from torch.utils.data import DataLoader, Dataset
|
|
|
|
from mmdet.core.hook import ExpMomentumEMAHook, YOLOXLrUpdaterHook
|
|
from mmdet.core.hook.sync_norm_hook import SyncNormHook
|
|
from mmdet.core.hook.sync_random_size_hook import SyncRandomSizeHook
|
|
|
|
|
|
def _build_demo_runner_without_hook(runner_type='EpochBasedRunner',
|
|
max_epochs=1,
|
|
max_iters=None,
|
|
multi_optimziers=False):
|
|
|
|
class Model(nn.Module):
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.linear = nn.Linear(2, 1)
|
|
self.conv = nn.Conv2d(3, 3, 3)
|
|
|
|
def forward(self, x):
|
|
return self.linear(x)
|
|
|
|
def train_step(self, x, optimizer, **kwargs):
|
|
return dict(loss=self(x))
|
|
|
|
def val_step(self, x, optimizer, **kwargs):
|
|
return dict(loss=self(x))
|
|
|
|
model = Model()
|
|
|
|
if multi_optimziers:
|
|
optimizer = {
|
|
'model1':
|
|
torch.optim.SGD(model.linear.parameters(), lr=0.02, momentum=0.95),
|
|
'model2':
|
|
torch.optim.SGD(model.conv.parameters(), lr=0.01, momentum=0.9),
|
|
}
|
|
else:
|
|
optimizer = torch.optim.SGD(model.parameters(), lr=0.02, momentum=0.95)
|
|
|
|
tmp_dir = tempfile.mkdtemp()
|
|
runner = build_runner(
|
|
dict(type=runner_type),
|
|
default_args=dict(
|
|
model=model,
|
|
work_dir=tmp_dir,
|
|
optimizer=optimizer,
|
|
logger=logging.getLogger(),
|
|
max_epochs=max_epochs,
|
|
max_iters=max_iters))
|
|
return runner
|
|
|
|
|
|
def _build_demo_runner(runner_type='EpochBasedRunner',
|
|
max_epochs=1,
|
|
max_iters=None,
|
|
multi_optimziers=False):
|
|
log_config = dict(
|
|
interval=1, hooks=[
|
|
dict(type='TextLoggerHook'),
|
|
])
|
|
|
|
runner = _build_demo_runner_without_hook(runner_type, max_epochs,
|
|
max_iters, multi_optimziers)
|
|
|
|
runner.register_checkpoint_hook(dict(interval=1))
|
|
runner.register_logger_hooks(log_config)
|
|
return runner
|
|
|
|
|
|
@pytest.mark.parametrize('multi_optimziers', (True, False))
|
|
def test_yolox_lrupdater_hook(multi_optimziers):
|
|
"""xdoctest -m tests/test_hooks.py test_cosine_runner_hook."""
|
|
# Only used to prevent program errors
|
|
YOLOXLrUpdaterHook(0, min_lr_ratio=0.05)
|
|
|
|
sys.modules['pavi'] = MagicMock()
|
|
loader = DataLoader(torch.ones((10, 2)))
|
|
runner = _build_demo_runner(multi_optimziers=multi_optimziers)
|
|
|
|
hook_cfg = dict(
|
|
type='YOLOXLrUpdaterHook',
|
|
warmup='exp',
|
|
by_epoch=False,
|
|
warmup_by_epoch=True,
|
|
warmup_ratio=1,
|
|
warmup_iters=5, # 5 epoch
|
|
num_last_epochs=15,
|
|
min_lr_ratio=0.05)
|
|
runner.register_hook_from_cfg(hook_cfg)
|
|
runner.register_hook_from_cfg(dict(type='IterTimerHook'))
|
|
runner.register_hook(IterTimerHook())
|
|
|
|
# add pavi hook
|
|
hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True)
|
|
runner.register_hook(hook)
|
|
runner.run([loader], [('train', 1)])
|
|
shutil.rmtree(runner.work_dir)
|
|
|
|
# TODO: use a more elegant way to check values
|
|
assert hasattr(hook, 'writer')
|
|
if multi_optimziers:
|
|
calls = [
|
|
call(
|
|
'train', {
|
|
'learning_rate/model1': 8.000000000000001e-06,
|
|
'learning_rate/model2': 4.000000000000001e-06,
|
|
'momentum/model1': 0.95,
|
|
'momentum/model2': 0.9
|
|
}, 1),
|
|
call(
|
|
'train', {
|
|
'learning_rate/model1': 0.00039200000000000004,
|
|
'learning_rate/model2': 0.00019600000000000002,
|
|
'momentum/model1': 0.95,
|
|
'momentum/model2': 0.9
|
|
}, 7),
|
|
call(
|
|
'train', {
|
|
'learning_rate/model1': 0.0008000000000000001,
|
|
'learning_rate/model2': 0.0004000000000000001,
|
|
'momentum/model1': 0.95,
|
|
'momentum/model2': 0.9
|
|
}, 10)
|
|
]
|
|
else:
|
|
calls = [
|
|
call('train', {
|
|
'learning_rate': 8.000000000000001e-06,
|
|
'momentum': 0.95
|
|
}, 1),
|
|
call('train', {
|
|
'learning_rate': 0.00039200000000000004,
|
|
'momentum': 0.95
|
|
}, 7),
|
|
call('train', {
|
|
'learning_rate': 0.0008000000000000001,
|
|
'momentum': 0.95
|
|
}, 10)
|
|
]
|
|
hook.writer.add_scalars.assert_has_calls(calls, any_order=True)
|
|
|
|
|
|
def test_ema_hook():
|
|
"""xdoctest -m tests/test_hooks.py test_ema_hook."""
|
|
|
|
class DemoModel(nn.Module):
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.conv = nn.Conv2d(
|
|
in_channels=1,
|
|
out_channels=2,
|
|
kernel_size=1,
|
|
padding=1,
|
|
bias=True)
|
|
self.bn = nn.BatchNorm2d(2)
|
|
|
|
self._init_weight()
|
|
|
|
def _init_weight(self):
|
|
constant_(self.conv.weight, 0)
|
|
constant_(self.conv.bias, 0)
|
|
constant_(self.bn.weight, 0)
|
|
constant_(self.bn.bias, 0)
|
|
|
|
def forward(self, x):
|
|
return self.bn(self.conv(x)).sum()
|
|
|
|
def train_step(self, x, optimizer, **kwargs):
|
|
return dict(loss=self(x))
|
|
|
|
def val_step(self, x, optimizer, **kwargs):
|
|
return dict(loss=self(x))
|
|
|
|
loader = DataLoader(torch.ones((1, 1, 1, 1)))
|
|
runner = _build_demo_runner()
|
|
demo_model = DemoModel()
|
|
runner.model = demo_model
|
|
ema_hook = ExpMomentumEMAHook(
|
|
momentum=0.0002,
|
|
total_iter=1,
|
|
skip_buffers=True,
|
|
interval=2,
|
|
resume_from=None)
|
|
checkpointhook = CheckpointHook(interval=1, by_epoch=True)
|
|
runner.register_hook(ema_hook, priority='HIGHEST')
|
|
runner.register_hook(checkpointhook)
|
|
runner.run([loader, loader], [('train', 1), ('val', 1)])
|
|
checkpoint = torch.load(f'{runner.work_dir}/epoch_1.pth')
|
|
num_eam_params = 0
|
|
for name, value in checkpoint['state_dict'].items():
|
|
if 'ema' in name:
|
|
num_eam_params += 1
|
|
value.fill_(1)
|
|
assert num_eam_params == 4
|
|
torch.save(checkpoint, f'{runner.work_dir}/epoch_1.pth')
|
|
|
|
work_dir = runner.work_dir
|
|
resume_ema_hook = ExpMomentumEMAHook(
|
|
momentum=0.5,
|
|
total_iter=10,
|
|
skip_buffers=True,
|
|
interval=1,
|
|
resume_from=f'{work_dir}/epoch_1.pth')
|
|
runner = _build_demo_runner(max_epochs=2)
|
|
runner.model = demo_model
|
|
runner.register_hook(resume_ema_hook, priority='HIGHEST')
|
|
checkpointhook = CheckpointHook(interval=1, by_epoch=True)
|
|
runner.register_hook(checkpointhook)
|
|
runner.run([loader, loader], [('train', 1), ('val', 1)])
|
|
checkpoint = torch.load(f'{runner.work_dir}/epoch_2.pth')
|
|
num_eam_params = 0
|
|
desired_output = [0.9094, 0.9094]
|
|
for name, value in checkpoint['state_dict'].items():
|
|
if 'ema' in name:
|
|
num_eam_params += 1
|
|
assert value.sum() == 2
|
|
else:
|
|
if ('weight' in name) or ('bias' in name):
|
|
np.allclose(value.data.cpu().numpy().reshape(-1),
|
|
desired_output, 1e-4)
|
|
assert num_eam_params == 4
|
|
shutil.rmtree(runner.work_dir)
|
|
shutil.rmtree(work_dir)
|
|
|
|
|
|
def test_sync_norm_hook():
|
|
# Only used to prevent program errors
|
|
SyncNormHook()
|
|
|
|
loader = DataLoader(torch.ones((5, 2)))
|
|
runner = _build_demo_runner()
|
|
runner.register_hook_from_cfg(dict(type='SyncNormHook'))
|
|
runner.run([loader, loader], [('train', 1), ('val', 1)])
|
|
shutil.rmtree(runner.work_dir)
|
|
|
|
|
|
def test_sync_random_size_hook():
|
|
# Only used to prevent program errors
|
|
SyncRandomSizeHook()
|
|
|
|
class DemoDataset(Dataset):
|
|
|
|
def __getitem__(self, item):
|
|
return torch.ones(2)
|
|
|
|
def __len__(self):
|
|
return 5
|
|
|
|
def update_dynamic_scale(self, dynamic_scale):
|
|
pass
|
|
|
|
loader = DataLoader(DemoDataset())
|
|
runner = _build_demo_runner()
|
|
runner.register_hook_from_cfg(
|
|
dict(type='SyncRandomSizeHook', device='cpu'))
|
|
runner.run([loader, loader], [('train', 1), ('val', 1)])
|
|
shutil.rmtree(runner.work_dir)
|
|
|
|
if torch.cuda.is_available():
|
|
runner = _build_demo_runner()
|
|
runner.register_hook_from_cfg(
|
|
dict(type='SyncRandomSizeHook', device='cuda'))
|
|
runner.run([loader, loader], [('train', 1), ('val', 1)])
|
|
shutil.rmtree(runner.work_dir)
|
|
|
|
|
|
@pytest.mark.parametrize('set_loss', [
|
|
dict(set_loss_nan=False, set_loss_inf=False),
|
|
dict(set_loss_nan=True, set_loss_inf=False),
|
|
dict(set_loss_nan=False, set_loss_inf=True)
|
|
])
|
|
def test_check_invalid_loss_hook(set_loss):
|
|
# Check whether loss is valid during training.
|
|
|
|
class DemoModel(nn.Module):
|
|
|
|
def __init__(self, set_loss_nan=False, set_loss_inf=False):
|
|
super().__init__()
|
|
self.set_loss_nan = set_loss_nan
|
|
self.set_loss_inf = set_loss_inf
|
|
self.linear = nn.Linear(2, 1)
|
|
|
|
def forward(self, x):
|
|
return self.linear(x)
|
|
|
|
def train_step(self, x, optimizer, **kwargs):
|
|
if self.set_loss_nan:
|
|
return dict(loss=torch.tensor(float('nan')))
|
|
elif self.set_loss_inf:
|
|
return dict(loss=torch.tensor(float('inf')))
|
|
else:
|
|
return dict(loss=self(x))
|
|
|
|
loader = DataLoader(torch.ones((5, 2)))
|
|
runner = _build_demo_runner()
|
|
|
|
demo_model = DemoModel(**set_loss)
|
|
runner.model = demo_model
|
|
runner.register_hook_from_cfg(
|
|
dict(type='CheckInvalidLossHook', interval=1))
|
|
if not set_loss['set_loss_nan'] \
|
|
and not set_loss['set_loss_inf']:
|
|
# check loss is valid
|
|
runner.run([loader], [('train', 1)])
|
|
else:
|
|
# check loss is nan or inf
|
|
with pytest.raises(AssertionError):
|
|
runner.run([loader], [('train', 1)])
|
|
shutil.rmtree(runner.work_dir)
|
|
|
|
|
|
def test_set_epoch_info_hook():
|
|
"""Test SetEpochInfoHook."""
|
|
|
|
class DemoModel(nn.Module):
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.epoch = 0
|
|
self.linear = nn.Linear(2, 1)
|
|
|
|
def forward(self, x):
|
|
return self.linear(x)
|
|
|
|
def train_step(self, x, optimizer, **kwargs):
|
|
return dict(loss=self(x))
|
|
|
|
def set_epoch(self, epoch):
|
|
self.epoch = epoch
|
|
|
|
loader = DataLoader(torch.ones((5, 2)))
|
|
runner = _build_demo_runner(max_epochs=3)
|
|
|
|
demo_model = DemoModel()
|
|
runner.model = demo_model
|
|
runner.register_hook_from_cfg(dict(type='SetEpochInfoHook'))
|
|
runner.run([loader], [('train', 1)])
|
|
assert demo_model.epoch == 2
|