Yolov5s/ai_training/detection/fcos/augmentor/misc.py

import cv2
import numpy as np
from augmentor.transform import translation_xy, change_transform_origin, scaling_xy

ROTATE_DEGREE = [90, 180, 270]


def rotate(image, boxes, prob=0.5, border_value=(128, 128, 128)):
    random_prob = np.random.uniform()
    if random_prob < (1 - prob):
        return image, boxes
    rotate_degree = ROTATE_DEGREE[np.random.randint(0, 3)]
    h, w = image.shape[:2]
    # Compute the rotation matrix.
    M = cv2.getRotationMatrix2D(center=(w / 2, h / 2),
                                angle=rotate_degree,
                                scale=1)

    # Get the sine and cosine from the rotation matrix.
    abs_cos_angle = np.abs(M[0, 0])
    abs_sin_angle = np.abs(M[0, 1])

    # Compute the new bounding dimensions of the image.
    new_w = int(h * abs_sin_angle + w * abs_cos_angle)
    new_h = int(h * abs_cos_angle + w * abs_sin_angle)

    # Adjust the rotation matrix to take into account the translation.
    M[0, 2] += new_w // 2 - w // 2
    M[1, 2] += new_h // 2 - h // 2

    # Rotate the image.
    image = cv2.warpAffine(image, M=M, dsize=(new_w, new_h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT,
                           borderValue=border_value)

    new_boxes = []
    for box in boxes:
        x1, y1, x2, y2 = box
        points = M.dot([
            [x1, x2, x1, x2],
            [y1, y2, y2, y1],
            [1, 1, 1, 1],
        ])

        # Extract the min and max corners again.
        min_xy = np.sort(points, axis=1)[:, :2]
        min_x = np.mean(min_xy[0])
        min_y = np.mean(min_xy[1])
        max_xy = np.sort(points, axis=1)[:, 2:]
        max_x = np.mean(max_xy[0])
        max_y = np.mean(max_xy[1])

        new_boxes.append([min_x, min_y, max_x, max_y])
    boxes = np.array(new_boxes)
    return image, boxes


def crop(image, boxes, prob=0.5):
    random_prob = np.random.uniform()
    if random_prob < (1 - prob):
        return image, boxes
    h, w = image.shape[:2]
    min_x1, min_y1 = np.min(boxes, axis=0)[:2]
    max_x2, max_y2 = np.max(boxes, axis=0)[2:]
    random_x1 = np.random.randint(0, max(min_x1 // 2, 1))
    random_y1 = np.random.randint(0, max(min_y1 // 2, 1))
    random_x2 = np.random.randint(max_x2, max(min(w, max_x2 + (w - max_x2) // 2), max_x2 + 1))
    random_y2 = np.random.randint(max_y2, max(min(h, max_y2 + (h - max_y2) // 2), max_y2 + 1))
    image = image[random_y1:random_y2, random_x1:random_x2]
    boxes[:, [0, 2]] = boxes[:, [0, 2]] - random_x1
    boxes[:, [1, 3]] = boxes[:, [1, 3]] - random_y1
    return image, boxes


def flipx(image, boxes, prob=0.5):
    random_prob = np.random.uniform()
    if random_prob < (1 - prob):
        return image, boxes
    image = image[:, ::-1]
    h, w = image.shape[:2]
    tmp = boxes[:, 0].copy()
    boxes[:, 0] = w - boxes[:, 2]
    boxes[:, 2] = w - tmp
    return image, boxes


def multi_scale(image, boxes, prob=1.):
    random_prob = np.random.uniform()
    if random_prob < (1 - prob):
        return image, boxes
    h, w = image.shape[:2]
    scale = np.random.choice(np.arange(0.7, 1.4, 0.1))
    nh, nw = int(round(h * scale)), int(round(w * scale))
    image = cv2.resize(image, (nw, nh), interpolation=cv2.INTER_LINEAR)
    boxes = np.round(boxes * scale).astype(np.int32)
    return image, boxes


def translate(image, boxes, prob=0.5, border_value=(128, 128, 128)):
    random_prob = np.random.uniform()
    if random_prob < (1 - prob):
        return image, boxes
    h, w = image.shape[:2]
    min_x1, min_y1 = np.min(boxes, axis=0)[:2]
    max_x2, max_y2 = np.max(boxes, axis=0)[2:]
    translation_matrix = translation_xy(min=(min(-min_x1 // 2, 0), min(-min_y1 // 2, 0)),
                                        max=(max((w - max_x2) // 2, 1), max((h - max_y2) // 2, 1)), prob=1.)
    translation_matrix = change_transform_origin(translation_matrix, (w / 2, h / 2))
    image = cv2.warpAffine(
        image,
        translation_matrix[:2, :],
        dsize=(w, h),
        flags=cv2.INTER_CUBIC,
        borderMode=cv2.BORDER_CONSTANT,
        borderValue=border_value,
    )
    new_boxes = []
    for box in boxes:
        x1, y1, x2, y2 = box
        points = translation_matrix.dot([
            [x1, x2, x1, x2],
            [y1, y2, y2, y1],
            [1, 1, 1, 1],
        ])
        min_x, min_y = np.min(points, axis=1)[:2]
        max_x, max_y = np.max(points, axis=1)[:2]
        new_boxes.append([min_x, min_y, max_x, max_y])
    boxes = np.array(new_boxes)
    return image, boxes


class MiscEffect:
    def __init__(self, multi_scale_prob=0.5, rotate_prob=0.05, flip_prob=0.5, crop_prob=0.5, translate_prob=0.5,
                 border_value=(128, 128, 128)):
        self.multi_scale_prob = multi_scale_prob
        self.rotate_prob = rotate_prob
        self.flip_prob = flip_prob
        self.crop_prob = crop_prob
        self.translate_prob = translate_prob
        self.border_value = border_value

    def __call__(self, image, boxes):
        image, boxes = multi_scale(image, boxes, prob=self.multi_scale_prob)
        image, boxes = rotate(image, boxes, prob=self.rotate_prob, border_value=self.border_value)
        image, boxes = flipx(image, boxes, prob=self.flip_prob)
        image, boxes = crop(image, boxes, prob=self.crop_prob)
        image, boxes = translate(image, boxes, prob=self.translate_prob, border_value=self.border_value)
        return image, boxes


if __name__ == '__main__':
    from generators.pascal import PascalVocGenerator

    train_generator = PascalVocGenerator(
        'datasets/VOC0712',
        'trainval',
        skip_difficult=True,
        batch_size=1,
        shuffle_groups=False
    )
    misc_effect = MiscEffect()
    for i in range(train_generator.size()):
        image = train_generator.load_image(i)
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        annotations = train_generator.load_annotations(i)
        boxes = annotations['bboxes']
        for box in boxes.astype(np.int32):
            cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), 2)
        src_image = image.copy()
        # cv2.namedWindow('src_image', cv2.WINDOW_NORMAL)
        cv2.imshow('src_image', src_image)
        # image, boxes = misc_effect(image, boxes)
        image, boxes = multi_scale(image, boxes)
        image = image.copy()
        for box in boxes.astype(np.int32):
            cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 1)
        # cv2.namedWindow('image', cv2.WINDOW_NORMAL)
        cv2.imshow('image', image)
        cv2.waitKey(0)