Yolov5s/ai_training/detection/fcos/utils/anchors.py

# -*- coding: utf-8 -*-
import numpy as np

import keras

from utils.compute_overlap import compute_overlap


# class AnchorParameters:
#     """
#     The parameters that define how anchors are generated.
#
#     Args
#         sizes : List of sizes to use. Each size corresponds to one feature level.
#         strides : List of strides to use. Each stride correspond to one feature level.
#         ratios : List of ratios to use per location in a feature map.
#         scales : List of scales to use per location in a feature map.
#     """
#
#     def __init__(self, sizes, strides, ratios, scales, interest_sizes):
#         self.sizes = sizes
#         self.strides = strides
#         self.ratios = ratios
#         self.scales = scales
#         self.interest_sizes = interest_sizes
#
#     def num_anchors(self):
#         return len(self.ratios) * len(self.scales)

class AnchorParameters:
    """
    The parameters that define how anchors are generated.

    Args
        strides : List of strides to use. Each stride correspond to one feature level.
        scales : List of scales to use per location in a feature map.
    """

    def __init__(self, strides, interest_sizes):
        self.strides = strides
        self.interest_sizes = interest_sizes

"""
The default anchor parameters.
"""
AnchorParameters.default = AnchorParameters(
    # sizes=[32, 64, 128, 256, 512],
    strides=[8, 16, 32, 64, 128],
    # ratios=np.array([0.5, 1, 2], keras.backend.floatx()),
    # scales=np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)], keras.backend.floatx()),
    interest_sizes=[
        [-1, 64],
        [64, 128],
        [128, 256],
        [256, 512],
        [512, 1e8],
    ],
)


def anchor_targets_bbox(
        anchors,
        image_group,
        annotations_group,
        num_classes,
        negative_overlap=0.4,
        positive_overlap=0.5
):
    """
    Generate anchor targets for bbox detection.

    Args
        anchors: np.array of annotations of shape (N, 4) for (x1, y1, x2, y2).
        image_group: List of BGR images.
        annotations_group: List of annotations (np.array of shape (N, 5) for (x1, y1, x2, y2, label)).
        num_classes: Number of classes to predict.
        mask_shape: If the image is padded with zeros, mask_shape can be used to mark the relevant part of the image.
        negative_overlap: IoU overlap for negative anchors (all anchors with overlap < negative_overlap are negative).
        positive_overlap: IoU overlap or positive anchors (all anchors with overlap > positive_overlap are positive).

    Returns
        labels_batch: batch that contains labels & anchor states (np.array of shape (batch_size, N, num_classes + 1),
                      where N is the number of anchors for an image and the last column defines the anchor state (-1 for ignore, 0 for bg, 1 for fg).
        regression_batch: batch that contains bounding-box regression targets for an image & anchor states (np.array of shape (batch_size, N, 4 + 1),
                      where N is the number of anchors for an image, the first 4 columns define regression targets for (x1, y1, x2, y2) and the
                      last column defines anchor states (-1 for ignore, 0 for bg, 1 for fg).
    """

    assert (len(image_group) == len(annotations_group)), "The length of the images and annotations need to be equal."
    assert (len(annotations_group) > 0), "No data received to compute anchor targets for."
    for annotations in annotations_group:
        assert ('bboxes' in annotations), "Annotations should contain bboxes."
        assert ('labels' in annotations), "Annotations should contain labels."

    batch_size = len(image_group)

    regression_batch = np.zeros((batch_size, anchors.shape[0], 4 + 1), dtype=keras.backend.floatx())
    labels_batch = np.zeros((batch_size, anchors.shape[0], num_classes + 1), dtype=keras.backend.floatx())

    # compute labels and regression targets
    for index, (image, annotations) in enumerate(zip(image_group, annotations_group)):
        if annotations['bboxes'].shape[0]:
            # obtain indices of gt annotations with the greatest overlap
            positive_indices, ignore_indices, argmax_overlaps_inds = compute_gt_annotations(anchors,
                                                                                            annotations['bboxes'],
                                                                                            negative_overlap,
                                                                                            positive_overlap)
            labels_batch[index, ignore_indices, -1] = -1
            labels_batch[index, positive_indices, -1] = 1

            regression_batch[index, ignore_indices, -1] = -1
            regression_batch[index, positive_indices, -1] = 1

            # compute target class labels
            labels_batch[
                index, positive_indices, annotations['labels'][argmax_overlaps_inds[positive_indices]].astype(int)] = 1

            regression_batch[index, :, :-1] = bbox_transform(anchors, annotations['bboxes'][argmax_overlaps_inds, :])

        # ignore anchors outside of image
        if image.shape:
            anchors_centers = np.vstack([(anchors[:, 0] + anchors[:, 2]) / 2, (anchors[:, 1] + anchors[:, 3]) / 2]).T
            indices = np.logical_or(anchors_centers[:, 0] >= image.shape[1], anchors_centers[:, 1] >= image.shape[0])

            labels_batch[index, indices, -1] = -1
            regression_batch[index, indices, -1] = -1

    return regression_batch, labels_batch


def compute_gt_annotations(
        anchors,
        annotations,
        negative_overlap=0.4,
        positive_overlap=0.5
):
    """
    Obtain indices of gt annotations with the greatest overlap.

    Args
        anchors: np.array of annotations of shape (N, 4) for (x1, y1, x2, y2).
        annotations: np.array of shape (K, 5) for (x1, y1, x2, y2, label).
        negative_overlap: IoU overlap for negative anchors (all anchors with overlap < negative_overlap are negative).
        positive_overlap: IoU overlap or positive anchors (all anchors with overlap > positive_overlap are positive).

    Returns
        positive_indices: indices of positive anchors
        ignore_indices: indices of ignored anchors
        argmax_overlaps_inds: ordered overlaps indices
    """

    overlaps = compute_overlap(anchors.astype(np.float64), annotations.astype(np.float64))
    argmax_overlaps_inds = np.argmax(overlaps, axis=1)
    max_overlaps = overlaps[np.arange(overlaps.shape[0]), argmax_overlaps_inds]

    # assign "dont care" labels
    positive_indices = max_overlaps >= positive_overlap
    ignore_indices = (max_overlaps > negative_overlap) & ~positive_indices

    return positive_indices, ignore_indices, argmax_overlaps_inds


def layer_shapes(image_shape, model):
    """
    Compute layer shapes given input image shape and the model.

    Args
        image_shape: The shape of the image.
        model: The model to use for computing how the image shape is transformed in the pyramid.

    Returns
        A dictionary mapping layer names to image shapes.
    """
    shape = {
        model.layers[0].name: (None,) + image_shape,
    }

    for layer in model.layers[1:]:
        nodes = layer._inbound_nodes
        for node in nodes:
            input_shapes = [shape[inbound_layer.name] for inbound_layer in node.inbound_layers]
            if not input_shapes:
                continue
            shape[layer.name] = layer.compute_output_shape(input_shapes[0] if len(input_shapes) == 1 else input_shapes)

    return shape


def make_shapes_callback(model):
    """
    Make a function for getting the shape of the pyramid levels.
    """

    def get_shapes(image_shape, pyramid_levels):
        shape = layer_shapes(image_shape, model)
        image_shapes = [shape["P{}".format(level)][1:3] for level in pyramid_levels]
        return image_shapes

    return get_shapes


def guess_shapes(image_shape, pyramid_levels=(3, 4, 5, 6, 7)):
    """
    Guess shapes based on pyramid levels.

    Args
         image_shape: The shape of the image.
         pyramid_levels: A list of what pyramid levels are used.

    Returns
        A list of image shapes at each pyramid level.
    """
    image_shape = np.array(image_shape[:2])
    feature_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in pyramid_levels]
    return feature_shapes


def compute_locations_per_level(h, w, stride):
    # [0, 8, 16]
    shifts_x = np.arange(0, w * stride, step=stride, dtype=np.float32)
    # [0, 8, 16, 24]
    shifts_y = np.arange(0, h * stride, step=stride, dtype=np.float32)
    shift_x, shift_y = np.meshgrid(shifts_x, shifts_y)
    # (h * w, )
    shift_x = shift_x.reshape(-1)
    # (h * w, )
    shift_y = shift_y.reshape(-1)
    locations = np.stack((shift_x, shift_y), axis=1) + stride // 2
    return locations


def compute_locations(feature_shapes, anchor_param):
    """

    Args:
        feature_shapes: list of (h, w)
        anchor_params: instance of AnchorParameters

    Returns:
        locations: list of np.array (shape is (fh * fw, 2))

    """
    if anchor_param is None:
        anchor_param = AnchorParameters.default
    fpn_strides = anchor_param.strides
    n_stage = len(fpn_strides)
    # print('fpn stides in generator', fpn_strides)
    locations = []
    for level, (feature_shape, fpn_stride) in enumerate(zip(feature_shapes[:n_stage], fpn_strides)):
        h, w = feature_shape
        locations_per_level = compute_locations_per_level(
            h, w, fpn_stride
        )
        locations.append(locations_per_level)
    return locations


def compute_interest_sizes(num_locations_each_level, anchor_param):
    """

    Args:
        num_locations_each_level: list of int
        anchor_param:

    Returns:
        interest_sizes (np.array): (sum(fh * fw), 2)

    """
    if anchor_param is None:
        anchor_param = AnchorParameters.default
    interest_sizes = anchor_param.interest_sizes
    # print('interest_sizes in generator', interest_sizes)

    assert len(num_locations_each_level) == len(interest_sizes)
    tiled_interest_sizes = []
    for num_locations, interest_size in zip(num_locations_each_level, interest_sizes):
        interest_size = np.array(interest_size)
        interest_size = np.expand_dims(interest_size, axis=0)
        interest_size = np.tile(interest_size, (num_locations, 1))
        tiled_interest_sizes.append(interest_size)
    interest_sizes = np.concatenate(tiled_interest_sizes, axis=0)
    return interest_sizes

def get_sample_region(gt, anchor_param, num_points_per, cx, cy, radius=1.5):
    '''
    gt: (n, 4)
    strides: []
    num_points_per: []
    gt_xs: (m,)
    gt_ys: (m,)
    This code is from
    https://github.com/yqyao/FCOS_PLUS/blob/0d20ba34ccc316650d8c30febb2eb40cb6eaae37/
    maskrcnn_benchmark/modeling/rpn/fcos/loss.py#L42
    '''
    if anchor_param is None:
        anchor_param = AnchorParameters.default
    strides = anchor_param.strides
    n = gt.shape[0]
    # num of position
    m = len(cx)
    # (m, n, 4)
    gt = np.tile(gt[None], (m,1,1))
    assert gt.shape==(m,n,4)
    # (m, n)
    center_x = (gt[..., 0] + gt[..., 2]) / 2
    # (m, n)
    center_y = (gt[..., 1] + gt[..., 3]) / 2
    # (m, n, 4)
    center_gt = np.zeros(gt.shape)
    # no gt
    if center_x[..., 0].sum() == 0:
        return np.zeros((m, n), dtype=np.uint8)
    beg = 0
    for level, n_p in enumerate(num_points_per):
        end = beg + n_p
        stride = strides[level] * radius
        xmin = center_x[beg:end] - stride
        ymin = center_y[beg:end] - stride
        xmax = center_x[beg:end] + stride
        ymax = center_y[beg:end] + stride
        # limit sample region in gt
        center_gt[beg:end, :, 0] = np.where(
            xmin > gt[beg:end, :, 0], xmin, gt[beg:end, :, 0]
        )
        center_gt[beg:end, :, 1] = np.where(
            ymin > gt[beg:end, :, 1], ymin, gt[beg:end, :, 1]
        )
        center_gt[beg:end, :, 2] = np.where(
            xmax > gt[beg:end, :, 2],
            gt[beg:end, :, 2], xmax
        )
        center_gt[beg:end, :, 3] = np.where(
            ymax > gt[beg:end, :, 3],
            gt[beg:end, :, 3], ymax
        )
        beg = end
    # (m, n) - (1, n) --> (m, n)
    left = cx[:, None] - center_gt[..., 0]
    top = cy[:, None] - center_gt[..., 1]
    # (m, n) - (m, 1) --> (m, n)
    right = center_gt[..., 2] - cx[:, None]
    bottom = center_gt[..., 3] - cy[:, None]
    # (m,n,4)
    center_bbox = np.stack((left, top, right, bottom), -1)
    inside_gt_bbox_mask = center_bbox.min(axis=2) > 0
    return inside_gt_bbox_mask

def anchors_for_shape(
        image_shape,
        pyramid_levels=None,
        anchor_params=None,
        shapes_callback=None,
):
    """
    Generators anchors for a given shape.

    Args
        image_shape: The shape of the image.
        pyramid_levels: List of ints representing which pyramids to use (defaults to [3, 4, 5, 6, 7]).
        anchor_params: Struct containing anchor parameters. If None, default values are used.
        shapes_callback: Function to call for getting the shape of the image at different pyramid levels.

    Returns
        np.array of shape (N, 4) containing the (x1, y1, x2, y2) coordinates for the anchors.
    """

    if pyramid_levels is None:
        pyramid_levels = [3, 4, 5, 6, 7]

    if anchor_params is None:
        anchor_params = AnchorParameters.default

    if shapes_callback is None:
        shapes_callback = guess_shapes
    feature_map_shapes = shapes_callback(image_shape, pyramid_levels)

    # compute anchors over all pyramid levels
    all_anchors = np.zeros((0, 4))
    for idx, p in enumerate(pyramid_levels):
        anchors = generate_anchors(
            base_size=anchor_params.sizes[idx],
            ratios=anchor_params.ratios,
            scales=anchor_params.scales
        )
        shifted_anchors = shift(feature_map_shapes[idx], anchor_params.strides[idx], anchors)
        all_anchors = np.append(all_anchors, shifted_anchors, axis=0)

    return all_anchors


def shift(feature_map_shape, stride, anchors):
    """
    Produce shifted anchors based on shape of the map and stride size.

    Args
        feature_map_shape : Shape to shift the anchors over.
        stride : Stride to shift the anchors with over the shape.
        anchors: The anchors to apply at each location.
    """

    # create a grid starting from half stride from the top left corner
    shift_x = (np.arange(0, feature_map_shape[1]) + 0.5) * stride
    shift_y = (np.arange(0, feature_map_shape[0]) + 0.5) * stride

    shift_x, shift_y = np.meshgrid(shift_x, shift_y)

    shifts = np.vstack((
        shift_x.ravel(), shift_y.ravel(),
        shift_x.ravel(), shift_y.ravel()
    )).transpose()

    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = anchors.shape[0]
    K = shifts.shape[0]
    all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
    all_anchors = all_anchors.reshape((K * A, 4))

    return all_anchors


def generate_anchors(base_size=16, ratios=None, scales=None):
    """
    Generate anchor (reference) windows by enumerating aspect ratios X scales w.r.t. a reference window.

    Args:
        base_size:
        ratios:
        scales:

    Returns:
        anchors: (num_anchors, 4), 4 为以 (0, 0) 为中心点的矩形坐标 (-w/2, -h/2, w/2, h/2)

    """
    if ratios is None:
        ratios = AnchorParameters.default.ratios

    if scales is None:
        scales = AnchorParameters.default.scales

    num_anchors = len(ratios) * len(scales)

    # initialize output anchors
    anchors = np.zeros((num_anchors, 4))

    # scale base_size
    anchors[:, 2:] = base_size * np.tile(scales, (2, len(ratios))).T

    # compute areas of anchors
    # (num_anchors, )
    areas = anchors[:, 2] * anchors[:, 3]

    # correct for ratios
    # (num_anchors, )
    anchors[:, 2] = np.sqrt(areas / np.repeat(ratios, len(scales)))
    anchors[:, 3] = anchors[:, 2] * np.repeat(ratios, len(scales))

    # transform from (cx, cy, w, h) -> (x1, y1, x2, y2)
    anchors[:, 0::2] -= np.tile(anchors[:, 2] * 0.5, (2, 1)).T
    anchors[:, 1::2] -= np.tile(anchors[:, 3] * 0.5, (2, 1)).T

    return anchors


def bbox_transform(anchors, gt_boxes, mean=None, std=None):
    """
    Args:
        anchors: (N, 4)
        gt_boxes: (N, 4)
        mean:
        std:

    Returns:

    """

    if mean is None:
        mean = np.array([0, 0, 0, 0])
    if std is None:
        std = np.array([0.2, 0.2, 0.2, 0.2])

    if isinstance(mean, (list, tuple)):
        mean = np.array(mean)
    elif not isinstance(mean, np.ndarray):
        raise ValueError('Expected mean to be a np.ndarray, list or tuple. Received: {}'.format(type(mean)))

    if isinstance(std, (list, tuple)):
        std = np.array(std)
    elif not isinstance(std, np.ndarray):
        raise ValueError('Expected std to be a np.ndarray, list or tuple. Received: {}'.format(type(std)))

    anchor_widths = anchors[:, 2] - anchors[:, 0]
    anchor_heights = anchors[:, 3] - anchors[:, 1]

    targets_dx1 = (gt_boxes[:, 0] - anchors[:, 0]) / anchor_widths
    targets_dy1 = (gt_boxes[:, 1] - anchors[:, 1]) / anchor_heights
    targets_dx2 = (gt_boxes[:, 2] - anchors[:, 2]) / anchor_widths
    targets_dy2 = (gt_boxes[:, 3] - anchors[:, 3]) / anchor_heights
    # (4, N)
    targets = np.stack((targets_dx1, targets_dy1, targets_dx2, targets_dy2))
    # (N, 4)
    targets = targets.T

    targets = (targets - mean) / std

    return targets