# All modification made by Kneron Corp.: Copyright (c) 2022 Kneron Corp. # Copyright (c) OpenMMLab. All rights reserved. import torch import torch.nn as nn import torch.utils.checkpoint as cp from mmcv.cnn import build_conv_layer, build_norm_layer from mmcv.runner import BaseModule from torch.nn.modules.batchnorm import _BatchNorm from mmcv.cnn import ConvModule from ..builder import BACKBONES class Focus(BaseModule): """Focus width and height information into channel space. Args: in_channels (int): The input channels of this Module. out_channels (int): The output channels of this Module. kernel_size (int): The kernel size of the convolution. Default: 1 stride (int): The stride of the convolution. Default: 1 conv_cfg (dict): Config dict for convolution layer. Default: None, which means using conv2d. norm_cfg (dict): Config dict for normalization layer. Default: dict(type='BN', momentum=0.03, eps=0.001). act_cfg (dict): Config dict for activation layer. Default: dict(type='Swish'). """ def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, conv_cfg=None, norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), act_cfg=dict(type='LeakyReLU', negative_slope=0.1), init_cfg=None): super().__init__() self.conv = ConvModule( in_channels * 4, out_channels, kernel_size, stride, padding=(kernel_size - 1) // 2, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) def forward(self, x): # shape of x (b,c,w,h) -> y(b,4c,w/2,h/2) patch_top_left = x[..., ::2, ::2] patch_top_right = x[..., ::2, 1::2] patch_bot_left = x[..., 1::2, ::2] patch_bot_right = x[..., 1::2, 1::2] x = torch.cat( ( patch_top_left, patch_bot_left, patch_top_right, patch_bot_right, ), dim=1, ) return self.conv(x) class VggLayer(BaseModule): def __init__(self, inplanes, outplanes, kernel_size=3, stride=1, dilation=1, with_cp=False, conv_cfg=None, norm_cfg=dict(type='BN'), init_cfg=None): super(VggLayer, self).__init__(init_cfg) self.inplanes = inplanes self.stride = stride self.dilation = dilation self.with_cp = with_cp self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.conv1_stride = stride self.norm1_name, norm1 = build_norm_layer( norm_cfg, outplanes, postfix=1) self.conv1 = build_conv_layer( conv_cfg, inplanes, outplanes, kernel_size=kernel_size, stride=self.conv1_stride, padding=(kernel_size - 1) // 2) self.add_module(self.norm1_name, norm1) self.LeakyRelu = nn.LeakyReLU(negative_slope=0.1, inplace=True) @property def norm1(self): """nn.Module: normalization layer after the first convolution layer""" return getattr(self, self.norm1_name) def forward(self, x): """Forward function.""" def _inner_forward(x): out = self.conv1(x) out = self.norm1(out) out = self.LeakyRelu(out) return out if self.with_cp and x.requires_grad: out = cp.checkpoint(_inner_forward, x) else: out = _inner_forward(x) return out @BACKBONES.register_module() class FCOS_VGG(BaseModule): """ Args: stem_channels (int | None): Number of stem channels. If not specified, it will be the same as `base_channels`. Default: None. base_channels (int): Number of base channels of res layer. Default: 64. in_channels (int): Number of input image channels. Default: 3. num_stages (int): Resnet stages. Default: 4. strides (Sequence[int]): Strides of the first block of each stage. dilations (Sequence[int]): Dilation of each stage. out_indices (Sequence[int]): Output from which stages. deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv avg_down (bool): Use AvgPool instead of stride conv when downsampling in the bottleneck. frozen_stages (int): Stages to be frozen (stop grad and set eval mode). -1 means not freezing any parameters. norm_cfg (dict): Dictionary to construct and config norm layer. norm_eval (bool): Whether to set norm layers to eval mode, namely, freeze running stats (mean and var). Note: Effect on Batch Norm and its variants only. plugins (list[dict]): List of plugins for stages, each dict contains: - cfg (dict, required): Cfg dict to build plugin. - position (str, required): Position inside block to insert plugin, options are 'after_conv1', 'after_conv2', 'after_conv3'. - stages (tuple[bool], optional): Stages to apply plugin, length should be same as 'num_stages'. with_cp (bool): Use checkpoint or not. Using checkpoint will save some memory while slowing down the training speed. zero_init_residual (bool): Whether to use zero init for last norm layer in resblocks to let them behave as identity. pretrained (str, optional): model pretrained path. Default: None init_cfg (dict or list[dict], optional): Initialization config dict. Default: None """ def __init__(self, in_channels=3, stem_channels=None, base_channels=32, strides=(2, 1, 2, 2, 2), dilations=(1, 1, 1, 1, 1), out_indices=(0, 1, 2, 3), out_planes=(64, 64, 128, 196, 128), avg_down=False, frozen_stages=-1, conv_cfg=None, norm_cfg=dict(type='BN', requires_grad=True), act_cfg=dict(type='LeakyReLU', negative_slope=0.1), norm_eval=True, plugins=None, with_cp=False, zero_init_residual=True, init_cfg=None): super(FCOS_VGG, self).__init__(init_cfg) self.zero_init_residual = zero_init_residual if stem_channels is None: stem_channels = base_channels self.stem_channels = stem_channels self.base_channels = base_channels self.strides = strides self.dilations = dilations self.out_indices = out_indices self.avg_down = avg_down self.frozen_stages = frozen_stages self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.with_cp = with_cp self.norm_eval = norm_eval self.act_cfg = act_cfg self.inplanes = stem_channels self.stem = Focus( in_channels, self.base_channels, kernel_size=3, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) self.vgg_layers = [] for i in range(len(out_planes)): stride = strides[i] dilation = dilations[i] out_plane = out_planes[i] vgg_layer = VggLayer( inplanes=self.inplanes, outplanes=out_plane, stride=stride, dilation=dilation, with_cp=with_cp, conv_cfg=conv_cfg, norm_cfg=norm_cfg) self.inplanes = out_plane layer_name = f'layer{i + 1}' self.add_module(layer_name, vgg_layer) self.vgg_layers.append(layer_name) self._freeze_stages() @property def norm1(self): """nn.Module: the normalization layer named "norm1" """ return getattr(self, self.norm1_name) def _freeze_stages(self): for i in range(1, self.frozen_stages + 1): m = getattr(self, f'layer{i}') m.eval() for param in m.parameters(): param.requires_grad = False def forward(self, x): """Forward function.""" x = self.stem(x) outs = [] for i, layer_name in enumerate(self.vgg_layers): vgg_layer = getattr(self, layer_name) x = vgg_layer(x) if i in self.out_indices: outs.append(x) return tuple(outs) def train(self, mode=True): """Convert the model into training mode while keep normalization layer freezed.""" super(FCOS_VGG, self).train(mode) self._freeze_stages() if mode and self.norm_eval: for m in self.modules(): # trick: eval have effect on BatchNorm only if isinstance(m, _BatchNorm): m.eval()