179 lines
6.4 KiB
Python
179 lines
6.4 KiB
Python
import cv2
|
|
import numpy as np
|
|
from augmentor.transform import translation_xy, change_transform_origin, scaling_xy
|
|
|
|
ROTATE_DEGREE = [90, 180, 270]
|
|
|
|
|
|
def rotate(image, boxes, prob=0.5, border_value=(128, 128, 128)):
|
|
random_prob = np.random.uniform()
|
|
if random_prob < (1 - prob):
|
|
return image, boxes
|
|
rotate_degree = ROTATE_DEGREE[np.random.randint(0, 3)]
|
|
h, w = image.shape[:2]
|
|
# Compute the rotation matrix.
|
|
M = cv2.getRotationMatrix2D(center=(w / 2, h / 2),
|
|
angle=rotate_degree,
|
|
scale=1)
|
|
|
|
# Get the sine and cosine from the rotation matrix.
|
|
abs_cos_angle = np.abs(M[0, 0])
|
|
abs_sin_angle = np.abs(M[0, 1])
|
|
|
|
# Compute the new bounding dimensions of the image.
|
|
new_w = int(h * abs_sin_angle + w * abs_cos_angle)
|
|
new_h = int(h * abs_cos_angle + w * abs_sin_angle)
|
|
|
|
# Adjust the rotation matrix to take into account the translation.
|
|
M[0, 2] += new_w // 2 - w // 2
|
|
M[1, 2] += new_h // 2 - h // 2
|
|
|
|
# Rotate the image.
|
|
image = cv2.warpAffine(image, M=M, dsize=(new_w, new_h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT,
|
|
borderValue=border_value)
|
|
|
|
new_boxes = []
|
|
for box in boxes:
|
|
x1, y1, x2, y2 = box
|
|
points = M.dot([
|
|
[x1, x2, x1, x2],
|
|
[y1, y2, y2, y1],
|
|
[1, 1, 1, 1],
|
|
])
|
|
|
|
# Extract the min and max corners again.
|
|
min_xy = np.sort(points, axis=1)[:, :2]
|
|
min_x = np.mean(min_xy[0])
|
|
min_y = np.mean(min_xy[1])
|
|
max_xy = np.sort(points, axis=1)[:, 2:]
|
|
max_x = np.mean(max_xy[0])
|
|
max_y = np.mean(max_xy[1])
|
|
|
|
new_boxes.append([min_x, min_y, max_x, max_y])
|
|
boxes = np.array(new_boxes)
|
|
return image, boxes
|
|
|
|
|
|
def crop(image, boxes, prob=0.5):
|
|
random_prob = np.random.uniform()
|
|
if random_prob < (1 - prob):
|
|
return image, boxes
|
|
h, w = image.shape[:2]
|
|
min_x1, min_y1 = np.min(boxes, axis=0)[:2]
|
|
max_x2, max_y2 = np.max(boxes, axis=0)[2:]
|
|
random_x1 = np.random.randint(0, max(min_x1 // 2, 1))
|
|
random_y1 = np.random.randint(0, max(min_y1 // 2, 1))
|
|
random_x2 = np.random.randint(max_x2, max(min(w, max_x2 + (w - max_x2) // 2), max_x2 + 1))
|
|
random_y2 = np.random.randint(max_y2, max(min(h, max_y2 + (h - max_y2) // 2), max_y2 + 1))
|
|
image = image[random_y1:random_y2, random_x1:random_x2]
|
|
boxes[:, [0, 2]] = boxes[:, [0, 2]] - random_x1
|
|
boxes[:, [1, 3]] = boxes[:, [1, 3]] - random_y1
|
|
return image, boxes
|
|
|
|
|
|
def flipx(image, boxes, prob=0.5):
|
|
random_prob = np.random.uniform()
|
|
if random_prob < (1 - prob):
|
|
return image, boxes
|
|
image = image[:, ::-1]
|
|
h, w = image.shape[:2]
|
|
tmp = boxes[:, 0].copy()
|
|
boxes[:, 0] = w - boxes[:, 2]
|
|
boxes[:, 2] = w - tmp
|
|
return image, boxes
|
|
|
|
|
|
def multi_scale(image, boxes, prob=1.):
|
|
random_prob = np.random.uniform()
|
|
if random_prob < (1 - prob):
|
|
return image, boxes
|
|
h, w = image.shape[:2]
|
|
scale = np.random.choice(np.arange(0.7, 1.4, 0.1))
|
|
nh, nw = int(round(h * scale)), int(round(w * scale))
|
|
image = cv2.resize(image, (nw, nh), interpolation=cv2.INTER_LINEAR)
|
|
boxes = np.round(boxes * scale).astype(np.int32)
|
|
return image, boxes
|
|
|
|
|
|
def translate(image, boxes, prob=0.5, border_value=(128, 128, 128)):
|
|
random_prob = np.random.uniform()
|
|
if random_prob < (1 - prob):
|
|
return image, boxes
|
|
h, w = image.shape[:2]
|
|
min_x1, min_y1 = np.min(boxes, axis=0)[:2]
|
|
max_x2, max_y2 = np.max(boxes, axis=0)[2:]
|
|
translation_matrix = translation_xy(min=(min(-min_x1 // 2, 0), min(-min_y1 // 2, 0)),
|
|
max=(max((w - max_x2) // 2, 1), max((h - max_y2) // 2, 1)), prob=1.)
|
|
translation_matrix = change_transform_origin(translation_matrix, (w / 2, h / 2))
|
|
image = cv2.warpAffine(
|
|
image,
|
|
translation_matrix[:2, :],
|
|
dsize=(w, h),
|
|
flags=cv2.INTER_CUBIC,
|
|
borderMode=cv2.BORDER_CONSTANT,
|
|
borderValue=border_value,
|
|
)
|
|
new_boxes = []
|
|
for box in boxes:
|
|
x1, y1, x2, y2 = box
|
|
points = translation_matrix.dot([
|
|
[x1, x2, x1, x2],
|
|
[y1, y2, y2, y1],
|
|
[1, 1, 1, 1],
|
|
])
|
|
min_x, min_y = np.min(points, axis=1)[:2]
|
|
max_x, max_y = np.max(points, axis=1)[:2]
|
|
new_boxes.append([min_x, min_y, max_x, max_y])
|
|
boxes = np.array(new_boxes)
|
|
return image, boxes
|
|
|
|
|
|
class MiscEffect:
|
|
def __init__(self, multi_scale_prob=0.5, rotate_prob=0.05, flip_prob=0.5, crop_prob=0.5, translate_prob=0.5,
|
|
border_value=(128, 128, 128)):
|
|
self.multi_scale_prob = multi_scale_prob
|
|
self.rotate_prob = rotate_prob
|
|
self.flip_prob = flip_prob
|
|
self.crop_prob = crop_prob
|
|
self.translate_prob = translate_prob
|
|
self.border_value = border_value
|
|
|
|
def __call__(self, image, boxes):
|
|
image, boxes = multi_scale(image, boxes, prob=self.multi_scale_prob)
|
|
image, boxes = rotate(image, boxes, prob=self.rotate_prob, border_value=self.border_value)
|
|
image, boxes = flipx(image, boxes, prob=self.flip_prob)
|
|
image, boxes = crop(image, boxes, prob=self.crop_prob)
|
|
image, boxes = translate(image, boxes, prob=self.translate_prob, border_value=self.border_value)
|
|
return image, boxes
|
|
|
|
|
|
if __name__ == '__main__':
|
|
from generators.pascal import PascalVocGenerator
|
|
|
|
train_generator = PascalVocGenerator(
|
|
'datasets/VOC0712',
|
|
'trainval',
|
|
skip_difficult=True,
|
|
batch_size=1,
|
|
shuffle_groups=False
|
|
)
|
|
misc_effect = MiscEffect()
|
|
for i in range(train_generator.size()):
|
|
image = train_generator.load_image(i)
|
|
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
|
annotations = train_generator.load_annotations(i)
|
|
boxes = annotations['bboxes']
|
|
for box in boxes.astype(np.int32):
|
|
cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), 2)
|
|
src_image = image.copy()
|
|
# cv2.namedWindow('src_image', cv2.WINDOW_NORMAL)
|
|
cv2.imshow('src_image', src_image)
|
|
# image, boxes = misc_effect(image, boxes)
|
|
image, boxes = multi_scale(image, boxes)
|
|
image = image.copy()
|
|
for box in boxes.astype(np.int32):
|
|
cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 1)
|
|
# cv2.namedWindow('image', cv2.WINDOW_NORMAL)
|
|
cv2.imshow('image', image)
|
|
cv2.waitKey(0)
|