Support FP16 (#21)
* Support FP16 * add miss folder * add tests * remove useless config * update memory * reduce config * migrate fp16 to mmcv * add model link
This commit is contained in:
parent
1af2ad6a9f
commit
1765c12985
@ -69,6 +69,7 @@ Supported methods:
|
|||||||
- [x] [GCNet](configs/gcnet)
|
- [x] [GCNet](configs/gcnet)
|
||||||
- [x] [ANN](configs/ann)
|
- [x] [ANN](configs/ann)
|
||||||
- [x] [OCRNet](configs/ocrnet)
|
- [x] [OCRNet](configs/ocrnet)
|
||||||
|
- [x] [Mixed Precision (FP16) Training](configs/fp16/README.md)
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
|
|||||||
21
configs/fp16/README.md
Normal file
21
configs/fp16/README.md
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
# Mixed Precision Training
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
```
|
||||||
|
@article{micikevicius2017mixed,
|
||||||
|
title={Mixed precision training},
|
||||||
|
author={Micikevicius, Paulius and Narang, Sharan and Alben, Jonah and Diamos, Gregory and Elsen, Erich and Garcia, David and Ginsburg, Boris and Houston, Michael and Kuchaiev, Oleksii and Venkatesh, Ganesh and others},
|
||||||
|
journal={arXiv preprint arXiv:1710.03740},
|
||||||
|
year={2017}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Results and models
|
||||||
|
|
||||||
|
### Cityscapes
|
||||||
|
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
|
||||||
|
|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||||
|
| FCN | R-101-D8 | 512x1024 | 80000 | 5.50 | 2.66 | 76.80 | - | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fp16/fcn_r101-d8_512x1024_80k_fp16_cityscapes/fcn_r101-d8_512x1024_80k_fp16_cityscapes-50245227.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fp16/fcn_r101-d8_512x1024_80k_fp16_cityscapes/fcn_r101-d8_512x1024_80k_fp16_cityscapes_20200717_230921.log.json) |
|
||||||
|
| PSPNet | R-101-D8 | 512x1024 | 80000 | 5.47 | 2.68 | 79.46 | - | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fp16/pspnet_r101-d8_512x1024_80k_fp16_cityscapes/pspnet_r101-d8_512x1024_80k_fp16_cityscapes-ade37931.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fp16/pspnet_r101-d8_512x1024_80k_fp16_cityscapes/pspnet_r101-d8_512x1024_80k_fp16_cityscapes_20200717_230919.log.json) |
|
||||||
|
| DeepLabV3 | R-101-D8 | 512x1024 | 80000 | 5.91 | 1.93 | 80.48 | - | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fp16/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes-bc86dc84.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fp16/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes_20200717_230920.log.json) |
|
||||||
|
| DeepLabV3+ | R-101-D8 | 512x1024 | 80000 | 6.46 | 2.60 | 80.46 | - | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fp16/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes-cc58bc8d.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fp16/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes_20200717_230920.log.json) |
|
||||||
@ -0,0 +1,3 @@
|
|||||||
|
_base_ = '../deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py'
|
||||||
|
# fp16 settings
|
||||||
|
optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.)
|
||||||
@ -0,0 +1,3 @@
|
|||||||
|
_base_ = '../deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py'
|
||||||
|
# fp16 settings
|
||||||
|
optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.)
|
||||||
3
configs/fp16/fcn_r101-d8_512x1024_80k_fp16_cityscapes.py
Normal file
3
configs/fp16/fcn_r101-d8_512x1024_80k_fp16_cityscapes.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
_base_ = '../fcn/fcn_r101-d8_512x1024_80k_cityscapes.py'
|
||||||
|
# fp16 settings
|
||||||
|
optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.)
|
||||||
@ -0,0 +1,3 @@
|
|||||||
|
_base_ = '../pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py'
|
||||||
|
# fp16 settings
|
||||||
|
optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.)
|
||||||
@ -81,6 +81,11 @@ Please refer to [ANN](https://github.com/open-mmlab/mmsegmentation/blob/master/c
|
|||||||
|
|
||||||
Please refer to [OCRNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet) for details.
|
Please refer to [OCRNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet) for details.
|
||||||
|
|
||||||
|
|
||||||
|
### Mixed Precision (FP16) Training
|
||||||
|
|
||||||
|
Please refer [Mixed Precision (FP16) Training](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fp16/README.md) for details.
|
||||||
|
|
||||||
## Speed benchmark
|
## Speed benchmark
|
||||||
|
|
||||||
### Hardware
|
### Hardware
|
||||||
|
|||||||
@ -1,3 +1,4 @@
|
|||||||
|
from .dist_utils import allreduce_grads
|
||||||
from .misc import add_prefix
|
from .misc import add_prefix
|
||||||
|
|
||||||
__all__ = ['add_prefix']
|
__all__ = ['add_prefix', 'allreduce_grads']
|
||||||
|
|||||||
49
mmseg/core/utils/dist_utils.py
Normal file
49
mmseg/core/utils/dist_utils.py
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
|
import torch.distributed as dist
|
||||||
|
from torch._utils import (_flatten_dense_tensors, _take_tensors,
|
||||||
|
_unflatten_dense_tensors)
|
||||||
|
|
||||||
|
|
||||||
|
def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
|
||||||
|
if bucket_size_mb > 0:
|
||||||
|
bucket_size_bytes = bucket_size_mb * 1024 * 1024
|
||||||
|
buckets = _take_tensors(tensors, bucket_size_bytes)
|
||||||
|
else:
|
||||||
|
buckets = OrderedDict()
|
||||||
|
for tensor in tensors:
|
||||||
|
tp = tensor.type()
|
||||||
|
if tp not in buckets:
|
||||||
|
buckets[tp] = []
|
||||||
|
buckets[tp].append(tensor)
|
||||||
|
buckets = buckets.values()
|
||||||
|
|
||||||
|
for bucket in buckets:
|
||||||
|
flat_tensors = _flatten_dense_tensors(bucket)
|
||||||
|
dist.all_reduce(flat_tensors)
|
||||||
|
flat_tensors.div_(world_size)
|
||||||
|
for tensor, synced in zip(
|
||||||
|
bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
|
||||||
|
tensor.copy_(synced)
|
||||||
|
|
||||||
|
|
||||||
|
def allreduce_grads(params, coalesce=True, bucket_size_mb=-1):
|
||||||
|
"""Allreduce gradients.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
params (list[torch.Parameters]): List of parameters of a model
|
||||||
|
coalesce (bool, optional): Whether allreduce parameters as a whole.
|
||||||
|
Defaults to True.
|
||||||
|
bucket_size_mb (int, optional): Size of bucket, the unit is MB.
|
||||||
|
Defaults to -1.
|
||||||
|
"""
|
||||||
|
grads = [
|
||||||
|
param.grad.data for param in params
|
||||||
|
if param.requires_grad and param.grad is not None
|
||||||
|
]
|
||||||
|
world_size = dist.get_world_size()
|
||||||
|
if coalesce:
|
||||||
|
_allreduce_coalesced(grads, world_size, bucket_size_mb)
|
||||||
|
else:
|
||||||
|
for tensor in grads:
|
||||||
|
dist.all_reduce(tensor.div_(world_size))
|
||||||
@ -3,6 +3,7 @@ from abc import ABCMeta, abstractmethod
|
|||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
from mmcv.cnn import normal_init
|
from mmcv.cnn import normal_init
|
||||||
|
from mmcv.runner import auto_fp16, force_fp32
|
||||||
|
|
||||||
from mmseg.core import build_pixel_sampler
|
from mmseg.core import build_pixel_sampler
|
||||||
from mmseg.ops import resize
|
from mmseg.ops import resize
|
||||||
@ -81,6 +82,7 @@ class BaseDecodeHead(nn.Module, metaclass=ABCMeta):
|
|||||||
self.dropout = nn.Dropout2d(dropout_ratio)
|
self.dropout = nn.Dropout2d(dropout_ratio)
|
||||||
else:
|
else:
|
||||||
self.dropout = None
|
self.dropout = None
|
||||||
|
self.fp16_enabled = False
|
||||||
|
|
||||||
def extra_repr(self):
|
def extra_repr(self):
|
||||||
"""Extra repr."""
|
"""Extra repr."""
|
||||||
@ -158,6 +160,7 @@ class BaseDecodeHead(nn.Module, metaclass=ABCMeta):
|
|||||||
|
|
||||||
return inputs
|
return inputs
|
||||||
|
|
||||||
|
@auto_fp16()
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def forward(self, inputs):
|
def forward(self, inputs):
|
||||||
"""Placeholder of forward function."""
|
"""Placeholder of forward function."""
|
||||||
@ -207,6 +210,7 @@ class BaseDecodeHead(nn.Module, metaclass=ABCMeta):
|
|||||||
output = self.conv_seg(feat)
|
output = self.conv_seg(feat)
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
@force_fp32(apply_to=('seg_logit', ))
|
||||||
def losses(self, seg_logit, seg_label):
|
def losses(self, seg_logit, seg_label):
|
||||||
"""Compute segmentation loss."""
|
"""Compute segmentation loss."""
|
||||||
loss = dict()
|
loss = dict()
|
||||||
|
|||||||
@ -8,6 +8,7 @@ import numpy as np
|
|||||||
import torch
|
import torch
|
||||||
import torch.distributed as dist
|
import torch.distributed as dist
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
|
from mmcv.runner import auto_fp16
|
||||||
|
|
||||||
|
|
||||||
class BaseSegmentor(nn.Module):
|
class BaseSegmentor(nn.Module):
|
||||||
@ -17,6 +18,7 @@ class BaseSegmentor(nn.Module):
|
|||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super(BaseSegmentor, self).__init__()
|
super(BaseSegmentor, self).__init__()
|
||||||
|
self.fp16_enabled = False
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def with_neck(self):
|
def with_neck(self):
|
||||||
@ -105,6 +107,7 @@ class BaseSegmentor(nn.Module):
|
|||||||
else:
|
else:
|
||||||
return self.aug_test(imgs, img_metas, **kwargs)
|
return self.aug_test(imgs, img_metas, **kwargs)
|
||||||
|
|
||||||
|
@auto_fp16(apply_to=('img', ))
|
||||||
def forward(self, img, img_metas, return_loss=True, **kwargs):
|
def forward(self, img, img_metas, return_loss=True, **kwargs):
|
||||||
"""Calls either :func:`forward_train` or :func:`forward_test` depending
|
"""Calls either :func:`forward_train` or :func:`forward_test` depending
|
||||||
on whether ``return_loss`` is ``True``.
|
on whether ``return_loss`` is ``True``.
|
||||||
@ -146,7 +149,7 @@ class BaseSegmentor(nn.Module):
|
|||||||
DDP, it means the batch size on each GPU), which is used for
|
DDP, it means the batch size on each GPU), which is used for
|
||||||
averaging the logs.
|
averaging the logs.
|
||||||
"""
|
"""
|
||||||
losses = self.forward_train(**data_batch, **kwargs)
|
losses = self(**data_batch)
|
||||||
loss, log_vars = self._parse_losses(losses)
|
loss, log_vars = self._parse_losses(losses)
|
||||||
|
|
||||||
outputs = dict(
|
outputs = dict(
|
||||||
@ -163,7 +166,7 @@ class BaseSegmentor(nn.Module):
|
|||||||
during val epochs. Note that the evaluation after training epochs is
|
during val epochs. Note that the evaluation after training epochs is
|
||||||
not implemented with this method, but an evaluation hook.
|
not implemented with this method, but an evaluation hook.
|
||||||
"""
|
"""
|
||||||
output = self.forward_test(**data_batch, **kwargs)
|
output = self(**data_batch, **kwargs)
|
||||||
return output
|
return output
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user