[Fix] Fix docstring link problem in readthedocs (#845)
* fix docstring link * fix docstring link * fix docstring link * fix docstring link * fix docstring link * fix docstring link
This commit is contained in:
parent
a7461d96b0
commit
4981ff68c2
@ -249,9 +249,9 @@ class Collect(object):
|
|||||||
keys (Sequence[str]): Keys of results to be collected in ``data``.
|
keys (Sequence[str]): Keys of results to be collected in ``data``.
|
||||||
meta_keys (Sequence[str], optional): Meta keys to be converted to
|
meta_keys (Sequence[str], optional): Meta keys to be converted to
|
||||||
``mmcv.DataContainer`` and collected in ``data[img_metas]``.
|
``mmcv.DataContainer`` and collected in ``data[img_metas]``.
|
||||||
Default: ``('filename', 'ori_filename', 'ori_shape', 'img_shape',
|
Default: (``filename``, ``ori_filename``, ``ori_shape``,
|
||||||
'pad_shape', 'scale_factor', 'flip', 'flip_direction',
|
``img_shape``, ``pad_shape``, ``scale_factor``, ``flip``,
|
||||||
'img_norm_cfg')``
|
``flip_direction``, ``img_norm_cfg``)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
|
|||||||
@ -187,8 +187,8 @@ class InputInjection(nn.Module):
|
|||||||
class CGNet(BaseModule):
|
class CGNet(BaseModule):
|
||||||
"""CGNet backbone.
|
"""CGNet backbone.
|
||||||
|
|
||||||
A Light-weight Context Guided Network for Semantic Segmentation
|
This backbone is the implementation of `A Light-weight Context Guided
|
||||||
arXiv: https://arxiv.org/abs/1811.08201
|
Network for Semantic Segmentation <https://arxiv.org/abs/1811.08201>`_.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
in_channels (int): Number of input image channels. Normally 3.
|
in_channels (int): Number of input image channels. Normally 3.
|
||||||
|
|||||||
@ -272,6 +272,9 @@ class FeatureFusionModule(nn.Module):
|
|||||||
class FastSCNN(BaseModule):
|
class FastSCNN(BaseModule):
|
||||||
"""Fast-SCNN Backbone.
|
"""Fast-SCNN Backbone.
|
||||||
|
|
||||||
|
This backbone is the implementation of `Fast-SCNN: Fast Semantic
|
||||||
|
Segmentation Network <https://arxiv.org/abs/1902.04502>`_.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
in_channels (int): Number of input image channels. Default: 3.
|
in_channels (int): Number of input image channels. Default: 3.
|
||||||
downsample_dw_channels (tuple[int]): Number of output channels after
|
downsample_dw_channels (tuple[int]): Number of output channels after
|
||||||
|
|||||||
@ -218,8 +218,8 @@ class HRModule(BaseModule):
|
|||||||
class HRNet(BaseModule):
|
class HRNet(BaseModule):
|
||||||
"""HRNet backbone.
|
"""HRNet backbone.
|
||||||
|
|
||||||
`High-Resolution Representations for Labeling Pixels and Regions
|
This backbone is the implementation of `High-Resolution Representations
|
||||||
arXiv: <https://arxiv.org/abs/1904.04514>`_.
|
for Labeling Pixels and Regions <https://arxiv.org/abs/1904.04514>`_.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
extra (dict): Detailed configuration for each stage of HRNet.
|
extra (dict): Detailed configuration for each stage of HRNet.
|
||||||
|
|||||||
@ -246,9 +246,9 @@ class TransformerEncoderLayer(BaseModule):
|
|||||||
class MixVisionTransformer(BaseModule):
|
class MixVisionTransformer(BaseModule):
|
||||||
"""The backbone of Segformer.
|
"""The backbone of Segformer.
|
||||||
|
|
||||||
A PyTorch implement of : `SegFormer: Simple and Efficient Design for
|
This backbone is the implementation of `SegFormer: Simple and
|
||||||
Semantic Segmentation with Transformers` -
|
Efficient Design for Semantic Segmentation with
|
||||||
https://arxiv.org/pdf/2105.15203.pdf
|
Transformers <https://arxiv.org/abs/2105.15203>`_.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
in_channels (int): Number of input channels. Default: 3.
|
in_channels (int): Number of input channels. Default: 3.
|
||||||
|
|||||||
@ -14,6 +14,10 @@ from ..utils import InvertedResidual, make_divisible
|
|||||||
class MobileNetV2(BaseModule):
|
class MobileNetV2(BaseModule):
|
||||||
"""MobileNetV2 backbone.
|
"""MobileNetV2 backbone.
|
||||||
|
|
||||||
|
This backbone is the implementation of
|
||||||
|
`MobileNetV2: Inverted Residuals and Linear Bottlenecks
|
||||||
|
<https://arxiv.org/abs/1801.04381>`_.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
widen_factor (float): Width multiplier, multiply number of
|
widen_factor (float): Width multiplier, multiply number of
|
||||||
channels in each layer by this amount. Default: 1.0.
|
channels in each layer by this amount. Default: 1.0.
|
||||||
|
|||||||
@ -271,6 +271,9 @@ class Bottleneck(_Bottleneck):
|
|||||||
class ResNeSt(ResNetV1d):
|
class ResNeSt(ResNetV1d):
|
||||||
"""ResNeSt backbone.
|
"""ResNeSt backbone.
|
||||||
|
|
||||||
|
This backbone is the implementation of `ResNeSt:
|
||||||
|
Split-Attention Networks <https://arxiv.org/abs/2004.08955>`_.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
groups (int): Number of groups of Bottleneck. Default: 1
|
groups (int): Number of groups of Bottleneck. Default: 1
|
||||||
base_width (int): Base width of Bottleneck. Default: 4
|
base_width (int): Base width of Bottleneck. Default: 4
|
||||||
|
|||||||
@ -311,6 +311,9 @@ class Bottleneck(BaseModule):
|
|||||||
class ResNet(BaseModule):
|
class ResNet(BaseModule):
|
||||||
"""ResNet backbone.
|
"""ResNet backbone.
|
||||||
|
|
||||||
|
This backbone is the improved implementation of `Deep Residual Learning
|
||||||
|
for Image Recognition <https://arxiv.org/abs/1512.03385>`_.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.
|
depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.
|
||||||
in_channels (int): Number of input image channels. Default: 3.
|
in_channels (int): Number of input image channels. Default: 3.
|
||||||
@ -686,11 +689,10 @@ class ResNet(BaseModule):
|
|||||||
class ResNetV1c(ResNet):
|
class ResNetV1c(ResNet):
|
||||||
"""ResNetV1c variant described in [1]_.
|
"""ResNetV1c variant described in [1]_.
|
||||||
|
|
||||||
Compared with default ResNet(ResNetV1b), ResNetV1c replaces the 7x7 conv
|
Compared with default ResNet(ResNetV1b), ResNetV1c replaces the 7x7 conv in
|
||||||
in the input stem with three 3x3 convs.
|
the input stem with three 3x3 convs. For more details please refer to `Bag
|
||||||
|
of Tricks for Image Classification with Convolutional Neural Networks
|
||||||
References:
|
<https://arxiv.org/abs/1812.01187>`_.
|
||||||
.. [1] https://arxiv.org/pdf/1812.01187.pdf
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
|
|||||||
@ -88,6 +88,10 @@ class Bottleneck(_Bottleneck):
|
|||||||
class ResNeXt(ResNet):
|
class ResNeXt(ResNet):
|
||||||
"""ResNeXt backbone.
|
"""ResNeXt backbone.
|
||||||
|
|
||||||
|
This backbone is the implementation of `Aggregated
|
||||||
|
Residual Transformations for Deep Neural
|
||||||
|
Networks <https://arxiv.org/abs/1611.05431>`_.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.
|
depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.
|
||||||
in_channels (int): Number of input image channels. Normally 3.
|
in_channels (int): Number of input image channels. Normally 3.
|
||||||
|
|||||||
@ -522,13 +522,12 @@ class SwinBlockSequence(BaseModule):
|
|||||||
|
|
||||||
@BACKBONES.register_module()
|
@BACKBONES.register_module()
|
||||||
class SwinTransformer(BaseModule):
|
class SwinTransformer(BaseModule):
|
||||||
""" Swin Transformer
|
"""Swin Transformer backbone.
|
||||||
A PyTorch implement of : `Swin Transformer:
|
|
||||||
Hierarchical Vision Transformer using Shifted Windows` -
|
|
||||||
https://arxiv.org/abs/2103.14030
|
|
||||||
|
|
||||||
Inspiration from
|
This backbone is the implementation of `Swin Transformer:
|
||||||
https://github.com/microsoft/Swin-Transformer
|
Hierarchical Vision Transformer using Shifted
|
||||||
|
Windows <https://arxiv.org/abs/2103.14030>`_.
|
||||||
|
Inspiration from https://github.com/microsoft/Swin-Transformer.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
pretrain_img_size (int | tuple[int]): The size of input image when
|
pretrain_img_size (int | tuple[int]): The size of input image when
|
||||||
|
|||||||
@ -224,8 +224,9 @@ class InterpConv(nn.Module):
|
|||||||
@BACKBONES.register_module()
|
@BACKBONES.register_module()
|
||||||
class UNet(BaseModule):
|
class UNet(BaseModule):
|
||||||
"""UNet backbone.
|
"""UNet backbone.
|
||||||
U-Net: Convolutional Networks for Biomedical Image Segmentation.
|
|
||||||
https://arxiv.org/pdf/1505.04597.pdf
|
This backbone is the implementation of `U-Net: Convolutional Networks
|
||||||
|
for Biomedical Image Segmentation <https://arxiv.org/abs/1505.04597>`_.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
in_channels (int): Number of input image channels. Default" 3.
|
in_channels (int): Number of input image channels. Default" 3.
|
||||||
@ -277,7 +278,6 @@ class UNet(BaseModule):
|
|||||||
The input image size should be divisible by the whole downsample rate
|
The input image size should be divisible by the whole downsample rate
|
||||||
of the encoder. More detail of the whole downsample rate can be found
|
of the encoder. More detail of the whole downsample rate can be found
|
||||||
in UNet._check_input_divisible.
|
in UNet._check_input_divisible.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
|
|||||||
@ -98,9 +98,9 @@ class TransformerEncoderLayer(BaseModule):
|
|||||||
class VisionTransformer(BaseModule):
|
class VisionTransformer(BaseModule):
|
||||||
"""Vision Transformer.
|
"""Vision Transformer.
|
||||||
|
|
||||||
A PyTorch implement of : `An Image is Worth 16x16 Words:
|
This backbone is the implementation of `An Image is Worth 16x16 Words:
|
||||||
Transformers for Image Recognition at Scale` -
|
Transformers for Image Recognition at
|
||||||
https://arxiv.org/abs/2010.11929
|
Scale <https://arxiv.org/abs/2010.11929>`_.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
img_size (int | tuple): Input image size. Default: 224.
|
img_size (int | tuple): Input image size. Default: 224.
|
||||||
|
|||||||
@ -36,6 +36,8 @@ def calculate_uncertainty(seg_logits):
|
|||||||
class PointHead(BaseCascadeDecodeHead):
|
class PointHead(BaseCascadeDecodeHead):
|
||||||
"""A mask point head use in PointRend.
|
"""A mask point head use in PointRend.
|
||||||
|
|
||||||
|
This head is implemented of `PointRend: Image Segmentation as
|
||||||
|
Rendering <https://arxiv.org/abs/1912.08193>`_.
|
||||||
``PointHead`` use shared multi-layer perceptron (equivalent to
|
``PointHead`` use shared multi-layer perceptron (equivalent to
|
||||||
nn.Conv1d) to predict the logit of input points. The fine-grained feature
|
nn.Conv1d) to predict the logit of input points. The fine-grained feature
|
||||||
and coarse feature will be concatenate together for predication.
|
and coarse feature will be concatenate together for predication.
|
||||||
|
|||||||
@ -10,7 +10,9 @@ class DepthwiseSeparableFCNHead(FCNHead):
|
|||||||
"""Depthwise-Separable Fully Convolutional Network for Semantic
|
"""Depthwise-Separable Fully Convolutional Network for Semantic
|
||||||
Segmentation.
|
Segmentation.
|
||||||
|
|
||||||
This head is implemented according to Fast-SCNN paper.
|
This head is implemented according to `Fast-SCNN: Fast Semantic
|
||||||
|
Segmentation Network <https://arxiv.org/abs/1902.04502>`_.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
in_channels(int): Number of output channels of FFM.
|
in_channels(int): Number of output channels of FFM.
|
||||||
channels(int): Number of middle-stage channels in the decode head.
|
channels(int): Number of middle-stage channels in the decode head.
|
||||||
|
|||||||
@ -12,8 +12,8 @@ from ..builder import NECKS
|
|||||||
class FPN(BaseModule):
|
class FPN(BaseModule):
|
||||||
"""Feature Pyramid Network.
|
"""Feature Pyramid Network.
|
||||||
|
|
||||||
This is an implementation of - Feature Pyramid Networks for Object
|
This neck is the implementation of `Feature Pyramid Networks for Object
|
||||||
Detection (https://arxiv.org/abs/1612.03144)
|
Detection <https://arxiv.org/abs/1612.03144>`_.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
in_channels (List[int]): Number of input channels per scale.
|
in_channels (List[int]): Number of input channels per scale.
|
||||||
|
|||||||
@ -63,8 +63,8 @@ class MLAModule(nn.Module):
|
|||||||
class MLANeck(nn.Module):
|
class MLANeck(nn.Module):
|
||||||
"""Multi-level Feature Aggregation.
|
"""Multi-level Feature Aggregation.
|
||||||
|
|
||||||
The Multi-level Feature Aggregation construction of SETR:
|
This neck is `The Multi-level Feature Aggregation construction of
|
||||||
https://arxiv.org/pdf/2012.15840.pdf
|
SETR <https://arxiv.org/abs/2012.15840>`_.
|
||||||
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
|||||||
@ -11,6 +11,7 @@ class MultiLevelNeck(nn.Module):
|
|||||||
"""MultiLevelNeck.
|
"""MultiLevelNeck.
|
||||||
|
|
||||||
A neck structure connect vit backbone and decoder_heads.
|
A neck structure connect vit backbone and decoder_heads.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
in_channels (List[int]): Number of input channels per scale.
|
in_channels (List[int]): Number of input channels per scale.
|
||||||
out_channels (int): Number of output channels (used at each scale).
|
out_channels (int): Number of output channels (used at each scale).
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user