524 lines
16 KiB
Python
524 lines
16 KiB
Python
"""
|
|
Copyright 2017-2018 Fizyr (https://fizyr.com)
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
"""
|
|
|
|
import numpy as np
|
|
import cv2
|
|
|
|
identity_matrix = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
|
|
|
|
|
|
def colvec(*args):
|
|
"""
|
|
Create a numpy array representing a column vector.
|
|
"""
|
|
return np.array([args]).T
|
|
|
|
|
|
def transform_aabb(transform_matrix, aabb):
|
|
"""
|
|
Apply a transformation to an axis aligned bounding box.
|
|
|
|
The result is a new AABB in the same coordinate system as the original AABB.
|
|
The new AABB contains all corner points of the original AABB after applying the given transformation.
|
|
|
|
Args
|
|
transform: The transformation to apply.
|
|
x1: The minimum x value of the AABB.
|
|
y1: The minimum y value of the AABB.
|
|
x2: The maximum x value of the AABB.
|
|
y2: The maximum y value of the AABB.
|
|
Returns
|
|
The new AABB as tuple (x1, y1, x2, y2)
|
|
"""
|
|
x1, y1, x2, y2 = aabb
|
|
# Transform all 4 corners of the AABB.
|
|
points = transform_matrix.dot([
|
|
[x1, x2, x1, x2],
|
|
[y1, y2, y2, y1],
|
|
[1, 1, 1, 1],
|
|
])
|
|
|
|
# Extract the min and max corners again.
|
|
# (3, ) (min_x, min_y, 1)
|
|
min_corner = points.min(axis=1)
|
|
# (3, ) (max_x, max_y, 1)
|
|
max_corner = points.max(axis=1)
|
|
|
|
return [min_corner[0], min_corner[1], max_corner[0], max_corner[1]]
|
|
|
|
|
|
def random_value(min, max):
|
|
return np.random.uniform(min, max)
|
|
|
|
|
|
def random_vector(min, max):
|
|
"""
|
|
Construct a random vector between min and max.
|
|
|
|
Args
|
|
min: the minimum value for each component, (n, )
|
|
max: the maximum value for each component, (n, )
|
|
"""
|
|
min = np.array(min)
|
|
max = np.array(max)
|
|
assert min.shape == max.shape
|
|
assert len(min.shape) == 1
|
|
return np.random.uniform(min, max)
|
|
|
|
|
|
def rotation(min=0, max=0, prob=0.5):
|
|
"""
|
|
Construct a homogeneous 2D rotation matrix.
|
|
|
|
Args
|
|
min: a scalar for the minimum absolute angle in radians
|
|
max: a scalar for the maximum absolute angle in radians
|
|
Returns
|
|
the rotation matrix as 3 by 3 numpy array
|
|
"""
|
|
random_prob = np.random.uniform()
|
|
if random_prob > prob:
|
|
# angle: the angle in radians
|
|
angle = random_value(min=min, max=max)
|
|
return np.array([
|
|
[np.cos(angle), -np.sin(angle), 0],
|
|
[np.sin(angle), np.cos(angle), 0],
|
|
[0, 0, 1]
|
|
])
|
|
else:
|
|
return identity_matrix
|
|
|
|
|
|
def translation_x(min=0, max=0, prob=0.5):
|
|
"""
|
|
Construct a homogeneous 2D translation matrix.
|
|
|
|
Args:
|
|
min: a scalar for the minimum translation for x axis
|
|
max: a scalar for the maximum translation for x axis
|
|
|
|
Returns:
|
|
the translation matrix as 3 by 3 numpy array
|
|
|
|
"""
|
|
random_prob = np.random.uniform()
|
|
if random_prob > prob:
|
|
# translation: the translation 2D vector
|
|
translation = random_value(min=min, max=max)
|
|
return np.array([
|
|
[1, 0, translation],
|
|
[0, 1, ],
|
|
[0, 0, 1]
|
|
])
|
|
else:
|
|
return identity_matrix
|
|
|
|
|
|
def translation_y(min=0, max=0, prob=0.5):
|
|
"""
|
|
Construct a homogeneous 2D translation matrix.
|
|
|
|
Args:
|
|
min: a scalar for the minimum translation for y axis
|
|
max: a scalar for the maximum translation for y axis
|
|
|
|
Returns:
|
|
the translation matrix as 3 by 3 numpy array
|
|
|
|
"""
|
|
random_prob = np.random.uniform()
|
|
if random_prob > prob:
|
|
# translation: the translation 2D vector
|
|
translation = random_value(min=min, max=max)
|
|
return np.array([
|
|
[1, 0],
|
|
[0, 1, translation],
|
|
[0, 0, 1]
|
|
])
|
|
else:
|
|
return identity_matrix
|
|
|
|
|
|
def translation_xy(min=(0, 0), max=(0, 0), prob=0.5):
|
|
"""
|
|
Construct a homogeneous 2D translation matrix.
|
|
|
|
Args:
|
|
min: a scalar for the minimum translation for y axis
|
|
max: a scalar for the maximum translation for y axis
|
|
|
|
Returns:
|
|
the translation matrix as 3 by 3 numpy array
|
|
|
|
"""
|
|
random_prob = np.random.uniform()
|
|
if random_prob < prob:
|
|
# translation: the translation 2D vector
|
|
translation = random_vector(min=min, max=max)
|
|
return np.array([
|
|
[1, 0, translation[0]],
|
|
[0, 1, translation[1]],
|
|
[0, 0, 1]
|
|
])
|
|
else:
|
|
return identity_matrix
|
|
|
|
|
|
def shear_x(min=0, max=0, prob=0.5):
|
|
"""
|
|
Construct a homogeneous 2D shear matrix.
|
|
|
|
Args
|
|
min: the minimum shear angle in radians.
|
|
max: the maximum shear angle in radians.
|
|
Returns
|
|
the shear matrix as 3 by 3 numpy array
|
|
"""
|
|
random_prob = np.random.uniform()
|
|
if random_prob > prob:
|
|
# angle: the shear angle in radians
|
|
angle = random_value(min=min, max=max)
|
|
return np.array([
|
|
[1, np.tan(angle), 0],
|
|
[0, 1, 0],
|
|
[0, 0, 1]
|
|
])
|
|
else:
|
|
return identity_matrix
|
|
|
|
|
|
def shear_y(min, max, prob=0.5):
|
|
"""
|
|
Construct a homogeneous 2D shear matrix.
|
|
|
|
Args
|
|
min: the minimum shear angle in radians.
|
|
max: the maximum shear angle in radians.
|
|
Returns
|
|
the shear matrix as 3 by 3 numpy array
|
|
"""
|
|
random_prob = np.random.uniform()
|
|
if random_prob > prob:
|
|
# angle: the shear angle in radians
|
|
angle = random_value(min=min, max=max)
|
|
return np.array([
|
|
[1, 0, 0],
|
|
[np.tan(angle), 1, 0],
|
|
[0, 0, 1]
|
|
])
|
|
else:
|
|
return identity_matrix
|
|
|
|
|
|
def scaling_x(min=0.9, max=1.1, prob=0.5):
|
|
"""
|
|
Construct a homogeneous 2D scaling matrix.
|
|
|
|
Args
|
|
factor: a 2D vector for X and Y scaling
|
|
Returns
|
|
the zoom matrix as 3 by 3 numpy array
|
|
"""
|
|
|
|
random_prob = np.random.uniform()
|
|
if random_prob > prob:
|
|
# angle: the shear angle in radians
|
|
factor = random_value(min=min, max=max)
|
|
return np.array([
|
|
[factor, 0, 0],
|
|
[0, 1, 0],
|
|
[0, 0, 1]
|
|
])
|
|
else:
|
|
return identity_matrix
|
|
|
|
|
|
def scaling_y(min=0.9, max=1.1, prob=0.5):
|
|
"""
|
|
Construct a homogeneous 2D scaling matrix.
|
|
|
|
Args
|
|
factor: a 2D vector for X and Y scaling
|
|
Returns
|
|
the zoom matrix as 3 by 3 numpy array
|
|
"""
|
|
|
|
random_prob = np.random.uniform()
|
|
if random_prob > prob:
|
|
# angle: the shear angle in radians
|
|
factor = random_value(min=min, max=max)
|
|
return np.array([
|
|
[1, 0, 0],
|
|
[0, factor, 0],
|
|
[0, 0, 1]
|
|
])
|
|
else:
|
|
return identity_matrix
|
|
|
|
|
|
def scaling_xy(min=(0.9, 0.9), max=(1.1, 1.1), prob=0.5):
|
|
"""
|
|
Construct a homogeneous 2D scaling matrix.
|
|
|
|
Args
|
|
min: a 2D vector containing the minimum scaling factor for X and Y.
|
|
min: a 2D vector containing The maximum scaling factor for X and Y.
|
|
Returns
|
|
the zoom matrix as 3 by 3 numpy array
|
|
"""
|
|
|
|
random_prob = np.random.uniform()
|
|
if random_prob > prob:
|
|
# factor: a 2D vector for X and Y scaling
|
|
factor = random_vector(min=min, max=max)
|
|
return np.array([
|
|
[factor[0], 0, 0],
|
|
[0, factor[1], 0],
|
|
[0, 0, 1]
|
|
])
|
|
else:
|
|
return identity_matrix
|
|
|
|
|
|
def flip_x(prob=0.8):
|
|
"""
|
|
Construct a transformation randomly containing X/Y flips (or not).
|
|
|
|
Args
|
|
flip_x_chance: The chance that the result will contain a flip along the X axis.
|
|
flip_y_chance: The chance that the result will contain a flip along the Y axis.
|
|
Returns
|
|
a homogeneous 3 by 3 transformation matrix
|
|
"""
|
|
random_prob = np.random.uniform()
|
|
if random_prob > prob:
|
|
# 1 - 2 * bool gives 1 for False and -1 for True.
|
|
return np.array([
|
|
[-1, 0, 0],
|
|
[0, 1, 0],
|
|
[0, 0, 1]
|
|
])
|
|
else:
|
|
return identity_matrix
|
|
|
|
|
|
def flip_y(prob=0.8):
|
|
"""
|
|
Construct a transformation randomly containing X/Y flips (or not).
|
|
|
|
Args
|
|
flip_x_chance: The chance that the result will contain a flip along the X axis.
|
|
flip_y_chance: The chance that the result will contain a flip along the Y axis.
|
|
Returns
|
|
a homogeneous 3 by 3 transformation matrix
|
|
"""
|
|
random_prob = np.random.uniform()
|
|
if random_prob > prob:
|
|
# 1 - 2 * bool gives 1 for False and -1 for True.
|
|
return np.array([
|
|
[1, 0, 0],
|
|
[0, -1, 0],
|
|
[0, 0, 1]
|
|
])
|
|
else:
|
|
return identity_matrix
|
|
|
|
|
|
def change_transform_origin(transform, center):
|
|
"""
|
|
Create a new transform representing the same transformation, only with the origin of the linear part changed.
|
|
|
|
Args
|
|
transform: the transformation matrix
|
|
center: the new origin of the transformation
|
|
Returns
|
|
translate(center) * transform * translate(-center)
|
|
"""
|
|
center = np.array(center)
|
|
return np.linalg.multi_dot([np.array([[1, 0, center[0]], [0, 1, center[1]], [0, 0, 1]]),
|
|
transform,
|
|
np.array([[1, 0, -center[0]], [0, 1, -center[1]], [0, 0, 1]])])
|
|
|
|
|
|
def random_transform(
|
|
min_rotation=0,
|
|
max_rotation=0,
|
|
min_translation=(0, 0),
|
|
max_translation=(0, 0),
|
|
min_shear=0,
|
|
max_shear=0,
|
|
min_scaling=(1, 1),
|
|
max_scaling=(1, 1),
|
|
):
|
|
"""
|
|
Create a random transformation.
|
|
|
|
The transformation consists of the following operations in this order (from left to right):
|
|
* rotation
|
|
* translation
|
|
* shear
|
|
* scaling
|
|
* flip x (if applied)
|
|
* flip y (if applied)
|
|
|
|
Note that by default, the data generators in `keras_retinanet.preprocessing.generators` interpret the translation
|
|
as factor of the image size. So an X translation of 0.1 would translate the image by 10% of it's width.
|
|
Set `relative_translation` to `False` in the `TransformParameters` of a data generator to have it interpret
|
|
the translation directly as pixel distances instead.
|
|
|
|
Args
|
|
min_rotation: The minimum rotation in radians for the transform as scalar.
|
|
max_rotation: The maximum rotation in radians for the transform as scalar.
|
|
min_translation: The minimum translation for the transform as 2D column vector.
|
|
max_translation: The maximum translation for the transform as 2D column vector.
|
|
min_shear: The minimum shear angle for the transform in radians.
|
|
max_shear: The maximum shear angle for the transform in radians.
|
|
min_scaling: The minimum scaling for the transform as 2D column vector.
|
|
max_scaling: The maximum scaling for the transform as 2D column vector.
|
|
"""
|
|
return np.linalg.multi_dot([
|
|
rotation(min_rotation, max_rotation),
|
|
translation_xy(min_translation, max_translation),
|
|
shear_x(min_shear, max_shear) if np.random.uniform() > 0.5 else shear_y(min_shear, max_shear),
|
|
scaling_xy(min_scaling, max_scaling),
|
|
flip_x() if np.random.uniform() > 0.5 else flip_y(),
|
|
])
|
|
|
|
|
|
def random_transform_generator(**kwargs):
|
|
"""
|
|
Create a random transform generator.
|
|
|
|
The transformation consists of the following operations in this order (from left to right):
|
|
* rotation
|
|
* translation
|
|
* shear
|
|
* scaling
|
|
* flip x (if applied)
|
|
* flip y (if applied)
|
|
|
|
Note that by default, the data generators in `keras_retinanet.preprocessing.generators` interpret the translation
|
|
as factor of the image size. So an X translation of 0.1 would translate the image by 10% of it's width.
|
|
Set `relative_translation` to `False` in the `TransformParameters` of a data generator to have it interpret
|
|
the translation directly as pixel distances instead.
|
|
|
|
Args
|
|
min_rotation: The minimum rotation in radians for the transform as scalar.
|
|
max_rotation: The maximum rotation in radians for the transform as scalar.
|
|
min_translation: The minimum translation for the transform as 2D column vector.
|
|
max_translation: The maximum translation for the transform as 2D column vector.
|
|
min_shear: The minimum shear angle for the transform in radians.
|
|
max_shear: The maximum shear angle for the transform in radians.
|
|
min_scaling: The minimum scaling for the transform as 2D column vector.
|
|
max_scaling: The maximum scaling for the transform as 2D column vector.
|
|
"""
|
|
|
|
while True:
|
|
yield random_transform(**kwargs)
|
|
|
|
|
|
def adjust_transform_for_image(transform, image, relative_translation):
|
|
"""
|
|
Adjust a transformation for a specific image.
|
|
|
|
The translation of the matrix will be scaled with the size of the image.
|
|
The linear part of the transformation will adjusted so that the origin of the transformation will be at the center of the image.
|
|
"""
|
|
height, width, channels = image.shape
|
|
|
|
result = transform
|
|
|
|
# Scale the translation with the image size if specified.
|
|
if relative_translation:
|
|
result[0:2, 2] *= [width, height]
|
|
|
|
# Move the origin of transformation.
|
|
result = change_transform_origin(transform, (0.5 * width, 0.5 * height))
|
|
|
|
return result
|
|
|
|
|
|
class TransformParameters:
|
|
"""
|
|
Struct holding parameters determining how to apply a transformation to an image.
|
|
|
|
Args
|
|
fill_mode: One of: 'constant', 'nearest', 'reflect', 'wrap'
|
|
interpolation: One of: 'nearest', 'linear', 'cubic', 'area', 'lanczos4'
|
|
cval: Fill value to use with fill_mode='constant'
|
|
relative_translation: If true (the default), interpret translation as a factor of the image size.
|
|
If false, interpret it as absolute pixels.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
fill_mode='nearest',
|
|
interpolation='linear',
|
|
cval=0,
|
|
relative_translation=True,
|
|
):
|
|
self.fill_mode = fill_mode
|
|
self.cval = cval
|
|
self.interpolation = interpolation
|
|
self.relative_translation = relative_translation
|
|
|
|
def cv_border_mode(self):
|
|
if self.fill_mode == 'constant':
|
|
return cv2.BORDER_CONSTANT
|
|
if self.fill_mode == 'nearest':
|
|
return cv2.BORDER_REPLICATE
|
|
if self.fill_mode == 'reflect':
|
|
return cv2.BORDER_REFLECT_101
|
|
if self.fill_mode == 'wrap':
|
|
return cv2.BORDER_WRAP
|
|
|
|
def cv_interpolation(self):
|
|
if self.interpolation == 'nearest':
|
|
return cv2.INTER_NEAREST
|
|
if self.interpolation == 'linear':
|
|
return cv2.INTER_LINEAR
|
|
if self.interpolation == 'cubic':
|
|
return cv2.INTER_CUBIC
|
|
if self.interpolation == 'area':
|
|
return cv2.INTER_AREA
|
|
if self.interpolation == 'lanczos4':
|
|
return cv2.INTER_LANCZOS4
|
|
|
|
|
|
def apply_transform(matrix, image, params):
|
|
"""
|
|
Apply a transformation to an image.
|
|
|
|
The origin of transformation is at the top left corner of the image.
|
|
|
|
The matrix is interpreted such that a point (x, y) on the original image is moved to transform * (x, y) in the generated image.
|
|
Mathematically speaking, that means that the matrix is a transformation from the transformed image space to the original image space.
|
|
|
|
Args
|
|
matrix: A homogeneous 3 by 3 matrix holding representing the transformation to apply.
|
|
image: The image to transform.
|
|
params: The transform parameters (see TransformParameters)
|
|
"""
|
|
output = cv2.warpAffine(
|
|
image,
|
|
matrix[:2, :],
|
|
dsize=(image.shape[1], image.shape[0]),
|
|
flags=params.cvInterpolation(),
|
|
borderMode=params.cvBorderMode(),
|
|
borderValue=params.cval,
|
|
)
|
|
return output
|