Yolov5s/ai_training/detection/fcos/augmentor/transform.py

"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import numpy as np
import cv2

identity_matrix = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])


def colvec(*args):
    """
    Create a numpy array representing a column vector.
    """
    return np.array([args]).T


def transform_aabb(transform_matrix, aabb):
    """
    Apply a transformation to an axis aligned bounding box.

    The result is a new AABB in the same coordinate system as the original AABB.
    The new AABB contains all corner points of the original AABB after applying the given transformation.

    Args
        transform: The transformation to apply.
        x1: The minimum x value of the AABB.
        y1: The minimum y value of the AABB.
        x2: The maximum x value of the AABB.
        y2: The maximum y value of the AABB.
    Returns
        The new AABB as tuple (x1, y1, x2, y2)
    """
    x1, y1, x2, y2 = aabb
    # Transform all 4 corners of the AABB.
    points = transform_matrix.dot([
        [x1, x2, x1, x2],
        [y1, y2, y2, y1],
        [1, 1, 1, 1],
    ])

    # Extract the min and max corners again.
    # (3, ) (min_x, min_y, 1)
    min_corner = points.min(axis=1)
    # (3, ) (max_x, max_y, 1)
    max_corner = points.max(axis=1)

    return [min_corner[0], min_corner[1], max_corner[0], max_corner[1]]


def random_value(min, max):
    return np.random.uniform(min, max)


def random_vector(min, max):
    """
    Construct a random vector between min and max.

    Args
        min: the minimum value for each component, (n, )
        max: the maximum value for each component, (n, )
    """
    min = np.array(min)
    max = np.array(max)
    assert min.shape == max.shape
    assert len(min.shape) == 1
    return np.random.uniform(min, max)


def rotation(min=0, max=0, prob=0.5):
    """
    Construct a homogeneous 2D rotation matrix.

    Args
        min: a scalar for the minimum absolute angle in radians
        max: a scalar for the maximum absolute angle in radians
    Returns
        the rotation matrix as 3 by 3 numpy array
    """
    random_prob = np.random.uniform()
    if random_prob > prob:
        # angle: the angle in radians
        angle = random_value(min=min, max=max)
        return np.array([
            [np.cos(angle), -np.sin(angle), 0],
            [np.sin(angle), np.cos(angle), 0],
            [0, 0, 1]
        ])
    else:
        return identity_matrix


def translation_x(min=0, max=0, prob=0.5):
    """
    Construct a homogeneous 2D translation matrix.

    Args:
        min: a scalar for the minimum translation for x axis
        max: a scalar for the maximum translation for x axis

    Returns:
        the translation matrix as 3 by 3 numpy array

    """
    random_prob = np.random.uniform()
    if random_prob > prob:
        # translation: the translation 2D vector
        translation = random_value(min=min, max=max)
        return np.array([
            [1, 0, translation],
            [0, 1, ],
            [0, 0, 1]
        ])
    else:
        return identity_matrix


def translation_y(min=0, max=0, prob=0.5):
    """
    Construct a homogeneous 2D translation matrix.

    Args:
        min: a scalar for the minimum translation for y axis
        max: a scalar for the maximum translation for y axis

    Returns:
        the translation matrix as 3 by 3 numpy array

    """
    random_prob = np.random.uniform()
    if random_prob > prob:
        # translation: the translation 2D vector
        translation = random_value(min=min, max=max)
        return np.array([
            [1, 0],
            [0, 1, translation],
            [0, 0, 1]
        ])
    else:
        return identity_matrix


def translation_xy(min=(0, 0), max=(0, 0), prob=0.5):
    """
    Construct a homogeneous 2D translation matrix.

    Args:
        min: a scalar for the minimum translation for y axis
        max: a scalar for the maximum translation for y axis

    Returns:
        the translation matrix as 3 by 3 numpy array

    """
    random_prob = np.random.uniform()
    if random_prob < prob:
        # translation: the translation 2D vector
        translation = random_vector(min=min, max=max)
        return np.array([
            [1, 0, translation[0]],
            [0, 1, translation[1]],
            [0, 0, 1]
        ])
    else:
        return identity_matrix


def shear_x(min=0, max=0, prob=0.5):
    """
    Construct a homogeneous 2D shear matrix.

    Args
        min:  the minimum shear angle in radians.
        max:  the maximum shear angle in radians.
    Returns
        the shear matrix as 3 by 3 numpy array
    """
    random_prob = np.random.uniform()
    if random_prob > prob:
        # angle: the shear angle in radians
        angle = random_value(min=min, max=max)
        return np.array([
            [1, np.tan(angle), 0],
            [0, 1, 0],
            [0, 0, 1]
        ])
    else:
        return identity_matrix


def shear_y(min, max, prob=0.5):
    """
    Construct a homogeneous 2D shear matrix.

    Args
        min:  the minimum shear angle in radians.
        max:  the maximum shear angle in radians.
    Returns
        the shear matrix as 3 by 3 numpy array
    """
    random_prob = np.random.uniform()
    if random_prob > prob:
        # angle: the shear angle in radians
        angle = random_value(min=min, max=max)
        return np.array([
            [1, 0, 0],
            [np.tan(angle), 1, 0],
            [0, 0, 1]
        ])
    else:
        return identity_matrix


def scaling_x(min=0.9, max=1.1, prob=0.5):
    """
    Construct a homogeneous 2D scaling matrix.

    Args
        factor: a 2D vector for X and Y scaling
    Returns
        the zoom matrix as 3 by 3 numpy array
    """

    random_prob = np.random.uniform()
    if random_prob > prob:
        # angle: the shear angle in radians
        factor = random_value(min=min, max=max)
        return np.array([
            [factor, 0, 0],
            [0, 1, 0],
            [0, 0, 1]
        ])
    else:
        return identity_matrix


def scaling_y(min=0.9, max=1.1, prob=0.5):
    """
    Construct a homogeneous 2D scaling matrix.

    Args
        factor: a 2D vector for X and Y scaling
    Returns
        the zoom matrix as 3 by 3 numpy array
    """

    random_prob = np.random.uniform()
    if random_prob > prob:
        # angle: the shear angle in radians
        factor = random_value(min=min, max=max)
        return np.array([
            [1, 0, 0],
            [0, factor, 0],
            [0, 0, 1]
        ])
    else:
        return identity_matrix


def scaling_xy(min=(0.9, 0.9), max=(1.1, 1.1), prob=0.5):
    """
    Construct a homogeneous 2D scaling matrix.

    Args
        min: a 2D vector containing the minimum scaling factor for X and Y.
        min: a 2D vector containing The maximum scaling factor for X and Y.
    Returns
        the zoom matrix as 3 by 3 numpy array
    """

    random_prob = np.random.uniform()
    if random_prob > prob:
        # factor: a 2D vector for X and Y scaling
        factor = random_vector(min=min, max=max)
        return np.array([
            [factor[0], 0, 0],
            [0, factor[1], 0],
            [0, 0, 1]
        ])
    else:
        return identity_matrix


def flip_x(prob=0.8):
    """
    Construct a transformation randomly containing X/Y flips (or not).

    Args
        flip_x_chance: The chance that the result will contain a flip along the X axis.
        flip_y_chance: The chance that the result will contain a flip along the Y axis.
    Returns
        a homogeneous 3 by 3 transformation matrix
    """
    random_prob = np.random.uniform()
    if random_prob > prob:
        # 1 - 2 * bool gives 1 for False and -1 for True.
        return np.array([
            [-1, 0, 0],
            [0, 1, 0],
            [0, 0, 1]
        ])
    else:
        return identity_matrix


def flip_y(prob=0.8):
    """
    Construct a transformation randomly containing X/Y flips (or not).

    Args
        flip_x_chance: The chance that the result will contain a flip along the X axis.
        flip_y_chance: The chance that the result will contain a flip along the Y axis.
    Returns
        a homogeneous 3 by 3 transformation matrix
    """
    random_prob = np.random.uniform()
    if random_prob > prob:
        # 1 - 2 * bool gives 1 for False and -1 for True.
        return np.array([
            [1, 0, 0],
            [0, -1, 0],
            [0, 0, 1]
        ])
    else:
        return identity_matrix


def change_transform_origin(transform, center):
    """
    Create a new transform representing the same transformation, only with the origin of the linear part changed.

    Args
        transform: the transformation matrix
        center: the new origin of the transformation
    Returns
        translate(center) * transform * translate(-center)
    """
    center = np.array(center)
    return np.linalg.multi_dot([np.array([[1, 0, center[0]], [0, 1, center[1]], [0, 0, 1]]),
                                transform,
                                np.array([[1, 0, -center[0]], [0, 1, -center[1]], [0, 0, 1]])])


def random_transform(
        min_rotation=0,
        max_rotation=0,
        min_translation=(0, 0),
        max_translation=(0, 0),
        min_shear=0,
        max_shear=0,
        min_scaling=(1, 1),
        max_scaling=(1, 1),
):
    """
    Create a random transformation.

    The transformation consists of the following operations in this order (from left to right):
      * rotation
      * translation
      * shear
      * scaling
      * flip x (if applied)
      * flip y (if applied)

    Note that by default, the data generators in `keras_retinanet.preprocessing.generators` interpret the translation
     as factor of the image size. So an X translation of 0.1 would translate the image by 10% of it's width.
    Set `relative_translation` to `False` in the `TransformParameters` of a data generator to have it interpret
    the translation directly as pixel distances instead.

    Args
        min_rotation:    The minimum rotation in radians for the transform as scalar.
        max_rotation:    The maximum rotation in radians for the transform as scalar.
        min_translation: The minimum translation for the transform as 2D column vector.
        max_translation: The maximum translation for the transform as 2D column vector.
        min_shear:       The minimum shear angle for the transform in radians.
        max_shear:       The maximum shear angle for the transform in radians.
        min_scaling:     The minimum scaling for the transform as 2D column vector.
        max_scaling:     The maximum scaling for the transform as 2D column vector.
    """
    return np.linalg.multi_dot([
        rotation(min_rotation, max_rotation),
        translation_xy(min_translation, max_translation),
        shear_x(min_shear, max_shear) if np.random.uniform() > 0.5 else shear_y(min_shear, max_shear),
        scaling_xy(min_scaling, max_scaling),
        flip_x() if np.random.uniform() > 0.5 else flip_y(),
    ])


def random_transform_generator(**kwargs):
    """
    Create a random transform generator.

    The transformation consists of the following operations in this order (from left to right):
      * rotation
      * translation
      * shear
      * scaling
      * flip x (if applied)
      * flip y (if applied)

    Note that by default, the data generators in `keras_retinanet.preprocessing.generators` interpret the translation
    as factor of the image size. So an X translation of 0.1 would translate the image by 10% of it's width.
    Set `relative_translation` to `False` in the `TransformParameters` of a data generator to have it interpret
    the translation directly as pixel distances instead.

    Args
        min_rotation: The minimum rotation in radians for the transform as scalar.
        max_rotation: The maximum rotation in radians for the transform as scalar.
        min_translation: The minimum translation for the transform as 2D column vector.
        max_translation: The maximum translation for the transform as 2D column vector.
        min_shear: The minimum shear angle for the transform in radians.
        max_shear: The maximum shear angle for the transform in radians.
        min_scaling: The minimum scaling for the transform as 2D column vector.
        max_scaling: The maximum scaling for the transform as 2D column vector.
    """

    while True:
        yield random_transform(**kwargs)


def adjust_transform_for_image(transform, image, relative_translation):
    """
    Adjust a transformation for a specific image.

    The translation of the matrix will be scaled with the size of the image.
    The linear part of the transformation will adjusted so that the origin of the transformation will be at the center of the image.
    """
    height, width, channels = image.shape

    result = transform

    # Scale the translation with the image size if specified.
    if relative_translation:
        result[0:2, 2] *= [width, height]

    # Move the origin of transformation.
    result = change_transform_origin(transform, (0.5 * width, 0.5 * height))

    return result


class TransformParameters:
    """
    Struct holding parameters determining how to apply a transformation to an image.

    Args
        fill_mode: One of: 'constant', 'nearest', 'reflect', 'wrap'
        interpolation: One of: 'nearest', 'linear', 'cubic', 'area', 'lanczos4'
        cval: Fill value to use with fill_mode='constant'
        relative_translation:  If true (the default), interpret translation as a factor of the image size.
                               If false, interpret it as absolute pixels.
    """

    def __init__(
            self,
            fill_mode='nearest',
            interpolation='linear',
            cval=0,
            relative_translation=True,
    ):
        self.fill_mode = fill_mode
        self.cval = cval
        self.interpolation = interpolation
        self.relative_translation = relative_translation

    def cv_border_mode(self):
        if self.fill_mode == 'constant':
            return cv2.BORDER_CONSTANT
        if self.fill_mode == 'nearest':
            return cv2.BORDER_REPLICATE
        if self.fill_mode == 'reflect':
            return cv2.BORDER_REFLECT_101
        if self.fill_mode == 'wrap':
            return cv2.BORDER_WRAP

    def cv_interpolation(self):
        if self.interpolation == 'nearest':
            return cv2.INTER_NEAREST
        if self.interpolation == 'linear':
            return cv2.INTER_LINEAR
        if self.interpolation == 'cubic':
            return cv2.INTER_CUBIC
        if self.interpolation == 'area':
            return cv2.INTER_AREA
        if self.interpolation == 'lanczos4':
            return cv2.INTER_LANCZOS4


def apply_transform(matrix, image, params):
    """
    Apply a transformation to an image.

    The origin of transformation is at the top left corner of the image.

    The matrix is interpreted such that a point (x, y) on the original image is moved to transform * (x, y) in the generated image.
    Mathematically speaking, that means that the matrix is a transformation from the transformed image space to the original image space.

    Args
      matrix: A homogeneous 3 by 3 matrix holding representing the transformation to apply.
      image:  The image to transform.
      params: The transform parameters (see TransformParameters)
    """
    output = cv2.warpAffine(
        image,
        matrix[:2, :],
        dsize=(image.shape[1], image.shape[0]),
        flags=params.cvInterpolation(),
        borderMode=params.cvBorderMode(),
        borderValue=params.cval,
    )
    return output