From 1765c1298598035ba4d842aecd61ab85b4def34d Mon Sep 17 00:00:00 2001 From: Jerry Jiarui XU Date: Mon, 20 Jul 2020 15:17:18 +0800 Subject: [PATCH] Support FP16 (#21) * Support FP16 * add miss folder * add tests * remove useless config * update memory * reduce config * migrate fp16 to mmcv * add model link --- README.md | 1 + configs/fp16/README.md | 21 ++++++++ ...v3_r101-d8_512x1024_80k_fp16_cityscapes.py | 3 ++ ...us_r101-d8_512x1024_80k_fp16_cityscapes.py | 3 ++ ...cn_r101-d8_512x1024_80k_fp16_cityscapes.py | 3 ++ ...et_r101-d8_512x1024_80k_fp16_cityscapes.py | 3 ++ docs/model_zoo.md | 5 ++ mmseg/core/utils/__init__.py | 3 +- mmseg/core/utils/dist_utils.py | 49 +++++++++++++++++++ mmseg/models/decode_heads/decode_head.py | 4 ++ mmseg/models/segmentors/base.py | 7 ++- 11 files changed, 99 insertions(+), 3 deletions(-) create mode 100644 configs/fp16/README.md create mode 100644 configs/fp16/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes.py create mode 100644 configs/fp16/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes.py create mode 100644 configs/fp16/fcn_r101-d8_512x1024_80k_fp16_cityscapes.py create mode 100644 configs/fp16/pspnet_r101-d8_512x1024_80k_fp16_cityscapes.py create mode 100644 mmseg/core/utils/dist_utils.py diff --git a/README.md b/README.md index c7ea9e5..69eab45 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,7 @@ Supported methods: - [x] [GCNet](configs/gcnet) - [x] [ANN](configs/ann) - [x] [OCRNet](configs/ocrnet) +- [x] [Mixed Precision (FP16) Training](configs/fp16/README.md) ## Installation diff --git a/configs/fp16/README.md b/configs/fp16/README.md new file mode 100644 index 0000000..757a83c --- /dev/null +++ b/configs/fp16/README.md @@ -0,0 +1,21 @@ +# Mixed Precision Training + +## Introduction +``` +@article{micikevicius2017mixed, + title={Mixed precision training}, + author={Micikevicius, Paulius and Narang, Sharan and Alben, Jonah and Diamos, Gregory and Elsen, Erich and Garcia, David and Ginsburg, Boris and Houston, Michael and Kuchaiev, Oleksii and Venkatesh, Ganesh and others}, + journal={arXiv preprint arXiv:1710.03740}, + year={2017} +} +``` + +## Results and models + +### Cityscapes +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download | +|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| FCN | R-101-D8 | 512x1024 | 80000 | 5.50 | 2.66 | 76.80 | - | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fp16/fcn_r101-d8_512x1024_80k_fp16_cityscapes/fcn_r101-d8_512x1024_80k_fp16_cityscapes-50245227.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fp16/fcn_r101-d8_512x1024_80k_fp16_cityscapes/fcn_r101-d8_512x1024_80k_fp16_cityscapes_20200717_230921.log.json) | +| PSPNet | R-101-D8 | 512x1024 | 80000 | 5.47 | 2.68 | 79.46 | - | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fp16/pspnet_r101-d8_512x1024_80k_fp16_cityscapes/pspnet_r101-d8_512x1024_80k_fp16_cityscapes-ade37931.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fp16/pspnet_r101-d8_512x1024_80k_fp16_cityscapes/pspnet_r101-d8_512x1024_80k_fp16_cityscapes_20200717_230919.log.json) | +| DeepLabV3 | R-101-D8 | 512x1024 | 80000 | 5.91 | 1.93 | 80.48 | - | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fp16/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes-bc86dc84.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fp16/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes_20200717_230920.log.json) | +| DeepLabV3+ | R-101-D8 | 512x1024 | 80000 | 6.46 | 2.60 | 80.46 | - | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fp16/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes-cc58bc8d.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fp16/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes_20200717_230920.log.json) | diff --git a/configs/fp16/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes.py b/configs/fp16/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes.py new file mode 100644 index 0000000..60d8350 --- /dev/null +++ b/configs/fp16/deeplabv3_r101-d8_512x1024_80k_fp16_cityscapes.py @@ -0,0 +1,3 @@ +_base_ = '../deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py' +# fp16 settings +optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.) diff --git a/configs/fp16/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes.py b/configs/fp16/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes.py new file mode 100644 index 0000000..c263d69 --- /dev/null +++ b/configs/fp16/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes.py @@ -0,0 +1,3 @@ +_base_ = '../deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py' +# fp16 settings +optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.) diff --git a/configs/fp16/fcn_r101-d8_512x1024_80k_fp16_cityscapes.py b/configs/fp16/fcn_r101-d8_512x1024_80k_fp16_cityscapes.py new file mode 100644 index 0000000..8100a8e --- /dev/null +++ b/configs/fp16/fcn_r101-d8_512x1024_80k_fp16_cityscapes.py @@ -0,0 +1,3 @@ +_base_ = '../fcn/fcn_r101-d8_512x1024_80k_cityscapes.py' +# fp16 settings +optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.) diff --git a/configs/fp16/pspnet_r101-d8_512x1024_80k_fp16_cityscapes.py b/configs/fp16/pspnet_r101-d8_512x1024_80k_fp16_cityscapes.py new file mode 100644 index 0000000..aefac29 --- /dev/null +++ b/configs/fp16/pspnet_r101-d8_512x1024_80k_fp16_cityscapes.py @@ -0,0 +1,3 @@ +_base_ = '../pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py' +# fp16 settings +optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.) diff --git a/docs/model_zoo.md b/docs/model_zoo.md index 200ae2f..ddca1f5 100644 --- a/docs/model_zoo.md +++ b/docs/model_zoo.md @@ -81,6 +81,11 @@ Please refer to [ANN](https://github.com/open-mmlab/mmsegmentation/blob/master/c Please refer to [OCRNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet) for details. + +### Mixed Precision (FP16) Training + +Please refer [Mixed Precision (FP16) Training](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fp16/README.md) for details. + ## Speed benchmark ### Hardware diff --git a/mmseg/core/utils/__init__.py b/mmseg/core/utils/__init__.py index f2678b3..79d62f0 100644 --- a/mmseg/core/utils/__init__.py +++ b/mmseg/core/utils/__init__.py @@ -1,3 +1,4 @@ +from .dist_utils import allreduce_grads from .misc import add_prefix -__all__ = ['add_prefix'] +__all__ = ['add_prefix', 'allreduce_grads'] diff --git a/mmseg/core/utils/dist_utils.py b/mmseg/core/utils/dist_utils.py new file mode 100644 index 0000000..25219a7 --- /dev/null +++ b/mmseg/core/utils/dist_utils.py @@ -0,0 +1,49 @@ +from collections import OrderedDict + +import torch.distributed as dist +from torch._utils import (_flatten_dense_tensors, _take_tensors, + _unflatten_dense_tensors) + + +def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): + if bucket_size_mb > 0: + bucket_size_bytes = bucket_size_mb * 1024 * 1024 + buckets = _take_tensors(tensors, bucket_size_bytes) + else: + buckets = OrderedDict() + for tensor in tensors: + tp = tensor.type() + if tp not in buckets: + buckets[tp] = [] + buckets[tp].append(tensor) + buckets = buckets.values() + + for bucket in buckets: + flat_tensors = _flatten_dense_tensors(bucket) + dist.all_reduce(flat_tensors) + flat_tensors.div_(world_size) + for tensor, synced in zip( + bucket, _unflatten_dense_tensors(flat_tensors, bucket)): + tensor.copy_(synced) + + +def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): + """Allreduce gradients. + + Args: + params (list[torch.Parameters]): List of parameters of a model + coalesce (bool, optional): Whether allreduce parameters as a whole. + Defaults to True. + bucket_size_mb (int, optional): Size of bucket, the unit is MB. + Defaults to -1. + """ + grads = [ + param.grad.data for param in params + if param.requires_grad and param.grad is not None + ] + world_size = dist.get_world_size() + if coalesce: + _allreduce_coalesced(grads, world_size, bucket_size_mb) + else: + for tensor in grads: + dist.all_reduce(tensor.div_(world_size)) diff --git a/mmseg/models/decode_heads/decode_head.py b/mmseg/models/decode_heads/decode_head.py index 1c2636f..9f55fee 100644 --- a/mmseg/models/decode_heads/decode_head.py +++ b/mmseg/models/decode_heads/decode_head.py @@ -3,6 +3,7 @@ from abc import ABCMeta, abstractmethod import torch import torch.nn as nn from mmcv.cnn import normal_init +from mmcv.runner import auto_fp16, force_fp32 from mmseg.core import build_pixel_sampler from mmseg.ops import resize @@ -81,6 +82,7 @@ class BaseDecodeHead(nn.Module, metaclass=ABCMeta): self.dropout = nn.Dropout2d(dropout_ratio) else: self.dropout = None + self.fp16_enabled = False def extra_repr(self): """Extra repr.""" @@ -158,6 +160,7 @@ class BaseDecodeHead(nn.Module, metaclass=ABCMeta): return inputs + @auto_fp16() @abstractmethod def forward(self, inputs): """Placeholder of forward function.""" @@ -207,6 +210,7 @@ class BaseDecodeHead(nn.Module, metaclass=ABCMeta): output = self.conv_seg(feat) return output + @force_fp32(apply_to=('seg_logit', )) def losses(self, seg_logit, seg_label): """Compute segmentation loss.""" loss = dict() diff --git a/mmseg/models/segmentors/base.py b/mmseg/models/segmentors/base.py index 4f31127..6b4084c 100644 --- a/mmseg/models/segmentors/base.py +++ b/mmseg/models/segmentors/base.py @@ -8,6 +8,7 @@ import numpy as np import torch import torch.distributed as dist import torch.nn as nn +from mmcv.runner import auto_fp16 class BaseSegmentor(nn.Module): @@ -17,6 +18,7 @@ class BaseSegmentor(nn.Module): def __init__(self): super(BaseSegmentor, self).__init__() + self.fp16_enabled = False @property def with_neck(self): @@ -105,6 +107,7 @@ class BaseSegmentor(nn.Module): else: return self.aug_test(imgs, img_metas, **kwargs) + @auto_fp16(apply_to=('img', )) def forward(self, img, img_metas, return_loss=True, **kwargs): """Calls either :func:`forward_train` or :func:`forward_test` depending on whether ``return_loss`` is ``True``. @@ -146,7 +149,7 @@ class BaseSegmentor(nn.Module): DDP, it means the batch size on each GPU), which is used for averaging the logs. """ - losses = self.forward_train(**data_batch, **kwargs) + losses = self(**data_batch) loss, log_vars = self._parse_losses(losses) outputs = dict( @@ -163,7 +166,7 @@ class BaseSegmentor(nn.Module): during val epochs. Note that the evaluation after training epochs is not implemented with this method, but an evaluation hook. """ - output = self.forward_test(**data_batch, **kwargs) + output = self(**data_batch, **kwargs) return output @staticmethod