Support FP16 (#21)

* Support FP16

* add miss folder

* add tests

* remove useless config

* update memory

* reduce config

* migrate fp16 to mmcv

* add model link
This commit is contained in:
Jerry Jiarui XU 2020-07-20 15:17:18 +08:00 committed by GitHub
parent 1af2ad6a9f
commit 1765c12985
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 99 additions and 3 deletions

View File

@ -69,6 +69,7 @@ Supported methods:
- [x] [GCNet](configs/gcnet) - [x] [GCNet](configs/gcnet)
- [x] [ANN](configs/ann) - [x] [ANN](configs/ann)
- [x] [OCRNet](configs/ocrnet) - [x] [OCRNet](configs/ocrnet)
- [x] [Mixed Precision (FP16) Training](configs/fp16/README.md)
## Installation ## Installation

21
configs/fp16/README.md Normal file
View File

@ -0,0 +1,21 @@
# Mixed Precision Training
## Introduction
```
@article{micikevicius2017mixed,
title={Mixed precision training},
author={Micikevicius, Paulius and Narang, Sharan and Alben, Jonah and Diamos, Gregory and Elsen, Erich and Garcia, David and Ginsburg, Boris and Houston, Michael and Kuchaiev, Oleksii and Venkatesh, Ganesh and others},
journal={arXiv preprint arXiv:1710.03740},
year={2017}
}
```
## Results and models
### Cityscapes
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| FCN | R-101-D8 | 512x1024 | 80000 | 5.50 | 2.66 | 76.80 | - | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fp16/fcn_r101-d8_512x1024_80k_fp16_cityscapes/fcn_r101-d8_512x1024_80k_fp16_cityscapes-50245227.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fp16/fcn_r101-d8_512x1024_80k_fp16_cityscapes/fcn_r101-d8_512x1024_80k_fp16_cityscapes_20200717_230921.log.json) |
| PSPNet | R-101-D8 | 512x1024 | 80000 | 5.47 | 2.68 | 79.46 | - | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fp16/pspnet_r101-d8_512x1024_80k_fp16_cityscapes/pspnet_r101-d8_512x1024_80k_fp16_cityscapes-ade37931.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fp16/pspnet_r101-d8_512x1024_80k_fp16_cityscapes/pspnet_r101-d8_512x1024_80k_fp16_cityscapes_20200717_230919.log.json) |
| DeepLabV3 | R-101-D8 | 512x1024 | 80000 | 5.91 | 1.93 | 80.48 | - | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fp16/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes-bc86dc84.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fp16/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes_20200717_230920.log.json) |
| DeepLabV3+ | R-101-D8 | 512x1024 | 80000 | 6.46 | 2.60 | 80.46 | - | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fp16/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes-cc58bc8d.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fp16/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes_20200717_230920.log.json) |

View File

@ -0,0 +1,3 @@
_base_ = '../deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py'
# fp16 settings
optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.)

View File

@ -0,0 +1,3 @@
_base_ = '../deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py'
# fp16 settings
optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.)

View File

@ -0,0 +1,3 @@
_base_ = '../fcn/fcn_r101-d8_512x1024_80k_cityscapes.py'
# fp16 settings
optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.)

View File

@ -0,0 +1,3 @@
_base_ = '../pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py'
# fp16 settings
optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.)

View File

@ -81,6 +81,11 @@ Please refer to [ANN](https://github.com/open-mmlab/mmsegmentation/blob/master/c
Please refer to [OCRNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet) for details. Please refer to [OCRNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet) for details.
### Mixed Precision (FP16) Training
Please refer [Mixed Precision (FP16) Training](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fp16/README.md) for details.
## Speed benchmark ## Speed benchmark
### Hardware ### Hardware

View File

@ -1,3 +1,4 @@
from .dist_utils import allreduce_grads
from .misc import add_prefix from .misc import add_prefix
__all__ = ['add_prefix'] __all__ = ['add_prefix', 'allreduce_grads']

View File

@ -0,0 +1,49 @@
from collections import OrderedDict
import torch.distributed as dist
from torch._utils import (_flatten_dense_tensors, _take_tensors,
_unflatten_dense_tensors)
def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
if bucket_size_mb > 0:
bucket_size_bytes = bucket_size_mb * 1024 * 1024
buckets = _take_tensors(tensors, bucket_size_bytes)
else:
buckets = OrderedDict()
for tensor in tensors:
tp = tensor.type()
if tp not in buckets:
buckets[tp] = []
buckets[tp].append(tensor)
buckets = buckets.values()
for bucket in buckets:
flat_tensors = _flatten_dense_tensors(bucket)
dist.all_reduce(flat_tensors)
flat_tensors.div_(world_size)
for tensor, synced in zip(
bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
tensor.copy_(synced)
def allreduce_grads(params, coalesce=True, bucket_size_mb=-1):
"""Allreduce gradients.
Args:
params (list[torch.Parameters]): List of parameters of a model
coalesce (bool, optional): Whether allreduce parameters as a whole.
Defaults to True.
bucket_size_mb (int, optional): Size of bucket, the unit is MB.
Defaults to -1.
"""
grads = [
param.grad.data for param in params
if param.requires_grad and param.grad is not None
]
world_size = dist.get_world_size()
if coalesce:
_allreduce_coalesced(grads, world_size, bucket_size_mb)
else:
for tensor in grads:
dist.all_reduce(tensor.div_(world_size))

View File

@ -3,6 +3,7 @@ from abc import ABCMeta, abstractmethod
import torch import torch
import torch.nn as nn import torch.nn as nn
from mmcv.cnn import normal_init from mmcv.cnn import normal_init
from mmcv.runner import auto_fp16, force_fp32
from mmseg.core import build_pixel_sampler from mmseg.core import build_pixel_sampler
from mmseg.ops import resize from mmseg.ops import resize
@ -81,6 +82,7 @@ class BaseDecodeHead(nn.Module, metaclass=ABCMeta):
self.dropout = nn.Dropout2d(dropout_ratio) self.dropout = nn.Dropout2d(dropout_ratio)
else: else:
self.dropout = None self.dropout = None
self.fp16_enabled = False
def extra_repr(self): def extra_repr(self):
"""Extra repr.""" """Extra repr."""
@ -158,6 +160,7 @@ class BaseDecodeHead(nn.Module, metaclass=ABCMeta):
return inputs return inputs
@auto_fp16()
@abstractmethod @abstractmethod
def forward(self, inputs): def forward(self, inputs):
"""Placeholder of forward function.""" """Placeholder of forward function."""
@ -207,6 +210,7 @@ class BaseDecodeHead(nn.Module, metaclass=ABCMeta):
output = self.conv_seg(feat) output = self.conv_seg(feat)
return output return output
@force_fp32(apply_to=('seg_logit', ))
def losses(self, seg_logit, seg_label): def losses(self, seg_logit, seg_label):
"""Compute segmentation loss.""" """Compute segmentation loss."""
loss = dict() loss = dict()

View File

@ -8,6 +8,7 @@ import numpy as np
import torch import torch
import torch.distributed as dist import torch.distributed as dist
import torch.nn as nn import torch.nn as nn
from mmcv.runner import auto_fp16
class BaseSegmentor(nn.Module): class BaseSegmentor(nn.Module):
@ -17,6 +18,7 @@ class BaseSegmentor(nn.Module):
def __init__(self): def __init__(self):
super(BaseSegmentor, self).__init__() super(BaseSegmentor, self).__init__()
self.fp16_enabled = False
@property @property
def with_neck(self): def with_neck(self):
@ -105,6 +107,7 @@ class BaseSegmentor(nn.Module):
else: else:
return self.aug_test(imgs, img_metas, **kwargs) return self.aug_test(imgs, img_metas, **kwargs)
@auto_fp16(apply_to=('img', ))
def forward(self, img, img_metas, return_loss=True, **kwargs): def forward(self, img, img_metas, return_loss=True, **kwargs):
"""Calls either :func:`forward_train` or :func:`forward_test` depending """Calls either :func:`forward_train` or :func:`forward_test` depending
on whether ``return_loss`` is ``True``. on whether ``return_loss`` is ``True``.
@ -146,7 +149,7 @@ class BaseSegmentor(nn.Module):
DDP, it means the batch size on each GPU), which is used for DDP, it means the batch size on each GPU), which is used for
averaging the logs. averaging the logs.
""" """
losses = self.forward_train(**data_batch, **kwargs) losses = self(**data_batch)
loss, log_vars = self._parse_losses(losses) loss, log_vars = self._parse_losses(losses)
outputs = dict( outputs = dict(
@ -163,7 +166,7 @@ class BaseSegmentor(nn.Module):
during val epochs. Note that the evaluation after training epochs is during val epochs. Note that the evaluation after training epochs is
not implemented with this method, but an evaluation hook. not implemented with this method, but an evaluation hook.
""" """
output = self.forward_test(**data_batch, **kwargs) output = self(**data_batch, **kwargs)
return output return output
@staticmethod @staticmethod