179 lines
6.4 KiB
Python

import cv2
import numpy as np
from augmentor.transform import translation_xy, change_transform_origin, scaling_xy
ROTATE_DEGREE = [90, 180, 270]
def rotate(image, boxes, prob=0.5, border_value=(128, 128, 128)):
random_prob = np.random.uniform()
if random_prob < (1 - prob):
return image, boxes
rotate_degree = ROTATE_DEGREE[np.random.randint(0, 3)]
h, w = image.shape[:2]
# Compute the rotation matrix.
M = cv2.getRotationMatrix2D(center=(w / 2, h / 2),
angle=rotate_degree,
scale=1)
# Get the sine and cosine from the rotation matrix.
abs_cos_angle = np.abs(M[0, 0])
abs_sin_angle = np.abs(M[0, 1])
# Compute the new bounding dimensions of the image.
new_w = int(h * abs_sin_angle + w * abs_cos_angle)
new_h = int(h * abs_cos_angle + w * abs_sin_angle)
# Adjust the rotation matrix to take into account the translation.
M[0, 2] += new_w // 2 - w // 2
M[1, 2] += new_h // 2 - h // 2
# Rotate the image.
image = cv2.warpAffine(image, M=M, dsize=(new_w, new_h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT,
borderValue=border_value)
new_boxes = []
for box in boxes:
x1, y1, x2, y2 = box
points = M.dot([
[x1, x2, x1, x2],
[y1, y2, y2, y1],
[1, 1, 1, 1],
])
# Extract the min and max corners again.
min_xy = np.sort(points, axis=1)[:, :2]
min_x = np.mean(min_xy[0])
min_y = np.mean(min_xy[1])
max_xy = np.sort(points, axis=1)[:, 2:]
max_x = np.mean(max_xy[0])
max_y = np.mean(max_xy[1])
new_boxes.append([min_x, min_y, max_x, max_y])
boxes = np.array(new_boxes)
return image, boxes
def crop(image, boxes, prob=0.5):
random_prob = np.random.uniform()
if random_prob < (1 - prob):
return image, boxes
h, w = image.shape[:2]
min_x1, min_y1 = np.min(boxes, axis=0)[:2]
max_x2, max_y2 = np.max(boxes, axis=0)[2:]
random_x1 = np.random.randint(0, max(min_x1 // 2, 1))
random_y1 = np.random.randint(0, max(min_y1 // 2, 1))
random_x2 = np.random.randint(max_x2, max(min(w, max_x2 + (w - max_x2) // 2), max_x2 + 1))
random_y2 = np.random.randint(max_y2, max(min(h, max_y2 + (h - max_y2) // 2), max_y2 + 1))
image = image[random_y1:random_y2, random_x1:random_x2]
boxes[:, [0, 2]] = boxes[:, [0, 2]] - random_x1
boxes[:, [1, 3]] = boxes[:, [1, 3]] - random_y1
return image, boxes
def flipx(image, boxes, prob=0.5):
random_prob = np.random.uniform()
if random_prob < (1 - prob):
return image, boxes
image = image[:, ::-1]
h, w = image.shape[:2]
tmp = boxes[:, 0].copy()
boxes[:, 0] = w - boxes[:, 2]
boxes[:, 2] = w - tmp
return image, boxes
def multi_scale(image, boxes, prob=1.):
random_prob = np.random.uniform()
if random_prob < (1 - prob):
return image, boxes
h, w = image.shape[:2]
scale = np.random.choice(np.arange(0.7, 1.4, 0.1))
nh, nw = int(round(h * scale)), int(round(w * scale))
image = cv2.resize(image, (nw, nh), interpolation=cv2.INTER_LINEAR)
boxes = np.round(boxes * scale).astype(np.int32)
return image, boxes
def translate(image, boxes, prob=0.5, border_value=(128, 128, 128)):
random_prob = np.random.uniform()
if random_prob < (1 - prob):
return image, boxes
h, w = image.shape[:2]
min_x1, min_y1 = np.min(boxes, axis=0)[:2]
max_x2, max_y2 = np.max(boxes, axis=0)[2:]
translation_matrix = translation_xy(min=(min(-min_x1 // 2, 0), min(-min_y1 // 2, 0)),
max=(max((w - max_x2) // 2, 1), max((h - max_y2) // 2, 1)), prob=1.)
translation_matrix = change_transform_origin(translation_matrix, (w / 2, h / 2))
image = cv2.warpAffine(
image,
translation_matrix[:2, :],
dsize=(w, h),
flags=cv2.INTER_CUBIC,
borderMode=cv2.BORDER_CONSTANT,
borderValue=border_value,
)
new_boxes = []
for box in boxes:
x1, y1, x2, y2 = box
points = translation_matrix.dot([
[x1, x2, x1, x2],
[y1, y2, y2, y1],
[1, 1, 1, 1],
])
min_x, min_y = np.min(points, axis=1)[:2]
max_x, max_y = np.max(points, axis=1)[:2]
new_boxes.append([min_x, min_y, max_x, max_y])
boxes = np.array(new_boxes)
return image, boxes
class MiscEffect:
def __init__(self, multi_scale_prob=0.5, rotate_prob=0.05, flip_prob=0.5, crop_prob=0.5, translate_prob=0.5,
border_value=(128, 128, 128)):
self.multi_scale_prob = multi_scale_prob
self.rotate_prob = rotate_prob
self.flip_prob = flip_prob
self.crop_prob = crop_prob
self.translate_prob = translate_prob
self.border_value = border_value
def __call__(self, image, boxes):
image, boxes = multi_scale(image, boxes, prob=self.multi_scale_prob)
image, boxes = rotate(image, boxes, prob=self.rotate_prob, border_value=self.border_value)
image, boxes = flipx(image, boxes, prob=self.flip_prob)
image, boxes = crop(image, boxes, prob=self.crop_prob)
image, boxes = translate(image, boxes, prob=self.translate_prob, border_value=self.border_value)
return image, boxes
if __name__ == '__main__':
from generators.pascal import PascalVocGenerator
train_generator = PascalVocGenerator(
'datasets/VOC0712',
'trainval',
skip_difficult=True,
batch_size=1,
shuffle_groups=False
)
misc_effect = MiscEffect()
for i in range(train_generator.size()):
image = train_generator.load_image(i)
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
annotations = train_generator.load_annotations(i)
boxes = annotations['bboxes']
for box in boxes.astype(np.int32):
cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), 2)
src_image = image.copy()
# cv2.namedWindow('src_image', cv2.WINDOW_NORMAL)
cv2.imshow('src_image', src_image)
# image, boxes = misc_effect(image, boxes)
image, boxes = multi_scale(image, boxes)
image = image.copy()
for box in boxes.astype(np.int32):
cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 1)
# cv2.namedWindow('image', cv2.WINDOW_NORMAL)
cv2.imshow('image', image)
cv2.waitKey(0)