""" Copyright 2017-2018 Fizyr (https://fizyr.com) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import numpy as np import cv2 identity_matrix = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) def colvec(*args): """ Create a numpy array representing a column vector. """ return np.array([args]).T def transform_aabb(transform_matrix, aabb): """ Apply a transformation to an axis aligned bounding box. The result is a new AABB in the same coordinate system as the original AABB. The new AABB contains all corner points of the original AABB after applying the given transformation. Args transform: The transformation to apply. x1: The minimum x value of the AABB. y1: The minimum y value of the AABB. x2: The maximum x value of the AABB. y2: The maximum y value of the AABB. Returns The new AABB as tuple (x1, y1, x2, y2) """ x1, y1, x2, y2 = aabb # Transform all 4 corners of the AABB. points = transform_matrix.dot([ [x1, x2, x1, x2], [y1, y2, y2, y1], [1, 1, 1, 1], ]) # Extract the min and max corners again. # (3, ) (min_x, min_y, 1) min_corner = points.min(axis=1) # (3, ) (max_x, max_y, 1) max_corner = points.max(axis=1) return [min_corner[0], min_corner[1], max_corner[0], max_corner[1]] def random_value(min, max): return np.random.uniform(min, max) def random_vector(min, max): """ Construct a random vector between min and max. Args min: the minimum value for each component, (n, ) max: the maximum value for each component, (n, ) """ min = np.array(min) max = np.array(max) assert min.shape == max.shape assert len(min.shape) == 1 return np.random.uniform(min, max) def rotation(min=0, max=0, prob=0.5): """ Construct a homogeneous 2D rotation matrix. Args min: a scalar for the minimum absolute angle in radians max: a scalar for the maximum absolute angle in radians Returns the rotation matrix as 3 by 3 numpy array """ random_prob = np.random.uniform() if random_prob > prob: # angle: the angle in radians angle = random_value(min=min, max=max) return np.array([ [np.cos(angle), -np.sin(angle), 0], [np.sin(angle), np.cos(angle), 0], [0, 0, 1] ]) else: return identity_matrix def translation_x(min=0, max=0, prob=0.5): """ Construct a homogeneous 2D translation matrix. Args: min: a scalar for the minimum translation for x axis max: a scalar for the maximum translation for x axis Returns: the translation matrix as 3 by 3 numpy array """ random_prob = np.random.uniform() if random_prob > prob: # translation: the translation 2D vector translation = random_value(min=min, max=max) return np.array([ [1, 0, translation], [0, 1, ], [0, 0, 1] ]) else: return identity_matrix def translation_y(min=0, max=0, prob=0.5): """ Construct a homogeneous 2D translation matrix. Args: min: a scalar for the minimum translation for y axis max: a scalar for the maximum translation for y axis Returns: the translation matrix as 3 by 3 numpy array """ random_prob = np.random.uniform() if random_prob > prob: # translation: the translation 2D vector translation = random_value(min=min, max=max) return np.array([ [1, 0], [0, 1, translation], [0, 0, 1] ]) else: return identity_matrix def translation_xy(min=(0, 0), max=(0, 0), prob=0.5): """ Construct a homogeneous 2D translation matrix. Args: min: a scalar for the minimum translation for y axis max: a scalar for the maximum translation for y axis Returns: the translation matrix as 3 by 3 numpy array """ random_prob = np.random.uniform() if random_prob < prob: # translation: the translation 2D vector translation = random_vector(min=min, max=max) return np.array([ [1, 0, translation[0]], [0, 1, translation[1]], [0, 0, 1] ]) else: return identity_matrix def shear_x(min=0, max=0, prob=0.5): """ Construct a homogeneous 2D shear matrix. Args min: the minimum shear angle in radians. max: the maximum shear angle in radians. Returns the shear matrix as 3 by 3 numpy array """ random_prob = np.random.uniform() if random_prob > prob: # angle: the shear angle in radians angle = random_value(min=min, max=max) return np.array([ [1, np.tan(angle), 0], [0, 1, 0], [0, 0, 1] ]) else: return identity_matrix def shear_y(min, max, prob=0.5): """ Construct a homogeneous 2D shear matrix. Args min: the minimum shear angle in radians. max: the maximum shear angle in radians. Returns the shear matrix as 3 by 3 numpy array """ random_prob = np.random.uniform() if random_prob > prob: # angle: the shear angle in radians angle = random_value(min=min, max=max) return np.array([ [1, 0, 0], [np.tan(angle), 1, 0], [0, 0, 1] ]) else: return identity_matrix def scaling_x(min=0.9, max=1.1, prob=0.5): """ Construct a homogeneous 2D scaling matrix. Args factor: a 2D vector for X and Y scaling Returns the zoom matrix as 3 by 3 numpy array """ random_prob = np.random.uniform() if random_prob > prob: # angle: the shear angle in radians factor = random_value(min=min, max=max) return np.array([ [factor, 0, 0], [0, 1, 0], [0, 0, 1] ]) else: return identity_matrix def scaling_y(min=0.9, max=1.1, prob=0.5): """ Construct a homogeneous 2D scaling matrix. Args factor: a 2D vector for X and Y scaling Returns the zoom matrix as 3 by 3 numpy array """ random_prob = np.random.uniform() if random_prob > prob: # angle: the shear angle in radians factor = random_value(min=min, max=max) return np.array([ [1, 0, 0], [0, factor, 0], [0, 0, 1] ]) else: return identity_matrix def scaling_xy(min=(0.9, 0.9), max=(1.1, 1.1), prob=0.5): """ Construct a homogeneous 2D scaling matrix. Args min: a 2D vector containing the minimum scaling factor for X and Y. min: a 2D vector containing The maximum scaling factor for X and Y. Returns the zoom matrix as 3 by 3 numpy array """ random_prob = np.random.uniform() if random_prob > prob: # factor: a 2D vector for X and Y scaling factor = random_vector(min=min, max=max) return np.array([ [factor[0], 0, 0], [0, factor[1], 0], [0, 0, 1] ]) else: return identity_matrix def flip_x(prob=0.8): """ Construct a transformation randomly containing X/Y flips (or not). Args flip_x_chance: The chance that the result will contain a flip along the X axis. flip_y_chance: The chance that the result will contain a flip along the Y axis. Returns a homogeneous 3 by 3 transformation matrix """ random_prob = np.random.uniform() if random_prob > prob: # 1 - 2 * bool gives 1 for False and -1 for True. return np.array([ [-1, 0, 0], [0, 1, 0], [0, 0, 1] ]) else: return identity_matrix def flip_y(prob=0.8): """ Construct a transformation randomly containing X/Y flips (or not). Args flip_x_chance: The chance that the result will contain a flip along the X axis. flip_y_chance: The chance that the result will contain a flip along the Y axis. Returns a homogeneous 3 by 3 transformation matrix """ random_prob = np.random.uniform() if random_prob > prob: # 1 - 2 * bool gives 1 for False and -1 for True. return np.array([ [1, 0, 0], [0, -1, 0], [0, 0, 1] ]) else: return identity_matrix def change_transform_origin(transform, center): """ Create a new transform representing the same transformation, only with the origin of the linear part changed. Args transform: the transformation matrix center: the new origin of the transformation Returns translate(center) * transform * translate(-center) """ center = np.array(center) return np.linalg.multi_dot([np.array([[1, 0, center[0]], [0, 1, center[1]], [0, 0, 1]]), transform, np.array([[1, 0, -center[0]], [0, 1, -center[1]], [0, 0, 1]])]) def random_transform( min_rotation=0, max_rotation=0, min_translation=(0, 0), max_translation=(0, 0), min_shear=0, max_shear=0, min_scaling=(1, 1), max_scaling=(1, 1), ): """ Create a random transformation. The transformation consists of the following operations in this order (from left to right): * rotation * translation * shear * scaling * flip x (if applied) * flip y (if applied) Note that by default, the data generators in `keras_retinanet.preprocessing.generators` interpret the translation as factor of the image size. So an X translation of 0.1 would translate the image by 10% of it's width. Set `relative_translation` to `False` in the `TransformParameters` of a data generator to have it interpret the translation directly as pixel distances instead. Args min_rotation: The minimum rotation in radians for the transform as scalar. max_rotation: The maximum rotation in radians for the transform as scalar. min_translation: The minimum translation for the transform as 2D column vector. max_translation: The maximum translation for the transform as 2D column vector. min_shear: The minimum shear angle for the transform in radians. max_shear: The maximum shear angle for the transform in radians. min_scaling: The minimum scaling for the transform as 2D column vector. max_scaling: The maximum scaling for the transform as 2D column vector. """ return np.linalg.multi_dot([ rotation(min_rotation, max_rotation), translation_xy(min_translation, max_translation), shear_x(min_shear, max_shear) if np.random.uniform() > 0.5 else shear_y(min_shear, max_shear), scaling_xy(min_scaling, max_scaling), flip_x() if np.random.uniform() > 0.5 else flip_y(), ]) def random_transform_generator(**kwargs): """ Create a random transform generator. The transformation consists of the following operations in this order (from left to right): * rotation * translation * shear * scaling * flip x (if applied) * flip y (if applied) Note that by default, the data generators in `keras_retinanet.preprocessing.generators` interpret the translation as factor of the image size. So an X translation of 0.1 would translate the image by 10% of it's width. Set `relative_translation` to `False` in the `TransformParameters` of a data generator to have it interpret the translation directly as pixel distances instead. Args min_rotation: The minimum rotation in radians for the transform as scalar. max_rotation: The maximum rotation in radians for the transform as scalar. min_translation: The minimum translation for the transform as 2D column vector. max_translation: The maximum translation for the transform as 2D column vector. min_shear: The minimum shear angle for the transform in radians. max_shear: The maximum shear angle for the transform in radians. min_scaling: The minimum scaling for the transform as 2D column vector. max_scaling: The maximum scaling for the transform as 2D column vector. """ while True: yield random_transform(**kwargs) def adjust_transform_for_image(transform, image, relative_translation): """ Adjust a transformation for a specific image. The translation of the matrix will be scaled with the size of the image. The linear part of the transformation will adjusted so that the origin of the transformation will be at the center of the image. """ height, width, channels = image.shape result = transform # Scale the translation with the image size if specified. if relative_translation: result[0:2, 2] *= [width, height] # Move the origin of transformation. result = change_transform_origin(transform, (0.5 * width, 0.5 * height)) return result class TransformParameters: """ Struct holding parameters determining how to apply a transformation to an image. Args fill_mode: One of: 'constant', 'nearest', 'reflect', 'wrap' interpolation: One of: 'nearest', 'linear', 'cubic', 'area', 'lanczos4' cval: Fill value to use with fill_mode='constant' relative_translation: If true (the default), interpret translation as a factor of the image size. If false, interpret it as absolute pixels. """ def __init__( self, fill_mode='nearest', interpolation='linear', cval=0, relative_translation=True, ): self.fill_mode = fill_mode self.cval = cval self.interpolation = interpolation self.relative_translation = relative_translation def cv_border_mode(self): if self.fill_mode == 'constant': return cv2.BORDER_CONSTANT if self.fill_mode == 'nearest': return cv2.BORDER_REPLICATE if self.fill_mode == 'reflect': return cv2.BORDER_REFLECT_101 if self.fill_mode == 'wrap': return cv2.BORDER_WRAP def cv_interpolation(self): if self.interpolation == 'nearest': return cv2.INTER_NEAREST if self.interpolation == 'linear': return cv2.INTER_LINEAR if self.interpolation == 'cubic': return cv2.INTER_CUBIC if self.interpolation == 'area': return cv2.INTER_AREA if self.interpolation == 'lanczos4': return cv2.INTER_LANCZOS4 def apply_transform(matrix, image, params): """ Apply a transformation to an image. The origin of transformation is at the top left corner of the image. The matrix is interpreted such that a point (x, y) on the original image is moved to transform * (x, y) in the generated image. Mathematically speaking, that means that the matrix is a transformation from the transformed image space to the original image space. Args matrix: A homogeneous 3 by 3 matrix holding representing the transformation to apply. image: The image to transform. params: The transform parameters (see TransformParameters) """ output = cv2.warpAffine( image, matrix[:2, :], dsize=(image.shape[1], image.shape[0]), flags=params.cvInterpolation(), borderMode=params.cvBorderMode(), borderValue=params.cval, ) return output