509 lines
17 KiB
Python
509 lines
17 KiB
Python
# -*- coding: utf-8 -*-
|
|
import numpy as np
|
|
|
|
import keras
|
|
|
|
from utils.compute_overlap import compute_overlap
|
|
|
|
|
|
# class AnchorParameters:
|
|
# """
|
|
# The parameters that define how anchors are generated.
|
|
#
|
|
# Args
|
|
# sizes : List of sizes to use. Each size corresponds to one feature level.
|
|
# strides : List of strides to use. Each stride correspond to one feature level.
|
|
# ratios : List of ratios to use per location in a feature map.
|
|
# scales : List of scales to use per location in a feature map.
|
|
# """
|
|
#
|
|
# def __init__(self, sizes, strides, ratios, scales, interest_sizes):
|
|
# self.sizes = sizes
|
|
# self.strides = strides
|
|
# self.ratios = ratios
|
|
# self.scales = scales
|
|
# self.interest_sizes = interest_sizes
|
|
#
|
|
# def num_anchors(self):
|
|
# return len(self.ratios) * len(self.scales)
|
|
|
|
class AnchorParameters:
|
|
"""
|
|
The parameters that define how anchors are generated.
|
|
|
|
Args
|
|
strides : List of strides to use. Each stride correspond to one feature level.
|
|
scales : List of scales to use per location in a feature map.
|
|
"""
|
|
|
|
def __init__(self, strides, interest_sizes):
|
|
self.strides = strides
|
|
self.interest_sizes = interest_sizes
|
|
|
|
"""
|
|
The default anchor parameters.
|
|
"""
|
|
AnchorParameters.default = AnchorParameters(
|
|
# sizes=[32, 64, 128, 256, 512],
|
|
strides=[8, 16, 32, 64, 128],
|
|
# ratios=np.array([0.5, 1, 2], keras.backend.floatx()),
|
|
# scales=np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)], keras.backend.floatx()),
|
|
interest_sizes=[
|
|
[-1, 64],
|
|
[64, 128],
|
|
[128, 256],
|
|
[256, 512],
|
|
[512, 1e8],
|
|
],
|
|
)
|
|
|
|
|
|
def anchor_targets_bbox(
|
|
anchors,
|
|
image_group,
|
|
annotations_group,
|
|
num_classes,
|
|
negative_overlap=0.4,
|
|
positive_overlap=0.5
|
|
):
|
|
"""
|
|
Generate anchor targets for bbox detection.
|
|
|
|
Args
|
|
anchors: np.array of annotations of shape (N, 4) for (x1, y1, x2, y2).
|
|
image_group: List of BGR images.
|
|
annotations_group: List of annotations (np.array of shape (N, 5) for (x1, y1, x2, y2, label)).
|
|
num_classes: Number of classes to predict.
|
|
mask_shape: If the image is padded with zeros, mask_shape can be used to mark the relevant part of the image.
|
|
negative_overlap: IoU overlap for negative anchors (all anchors with overlap < negative_overlap are negative).
|
|
positive_overlap: IoU overlap or positive anchors (all anchors with overlap > positive_overlap are positive).
|
|
|
|
Returns
|
|
labels_batch: batch that contains labels & anchor states (np.array of shape (batch_size, N, num_classes + 1),
|
|
where N is the number of anchors for an image and the last column defines the anchor state (-1 for ignore, 0 for bg, 1 for fg).
|
|
regression_batch: batch that contains bounding-box regression targets for an image & anchor states (np.array of shape (batch_size, N, 4 + 1),
|
|
where N is the number of anchors for an image, the first 4 columns define regression targets for (x1, y1, x2, y2) and the
|
|
last column defines anchor states (-1 for ignore, 0 for bg, 1 for fg).
|
|
"""
|
|
|
|
assert (len(image_group) == len(annotations_group)), "The length of the images and annotations need to be equal."
|
|
assert (len(annotations_group) > 0), "No data received to compute anchor targets for."
|
|
for annotations in annotations_group:
|
|
assert ('bboxes' in annotations), "Annotations should contain bboxes."
|
|
assert ('labels' in annotations), "Annotations should contain labels."
|
|
|
|
batch_size = len(image_group)
|
|
|
|
regression_batch = np.zeros((batch_size, anchors.shape[0], 4 + 1), dtype=keras.backend.floatx())
|
|
labels_batch = np.zeros((batch_size, anchors.shape[0], num_classes + 1), dtype=keras.backend.floatx())
|
|
|
|
# compute labels and regression targets
|
|
for index, (image, annotations) in enumerate(zip(image_group, annotations_group)):
|
|
if annotations['bboxes'].shape[0]:
|
|
# obtain indices of gt annotations with the greatest overlap
|
|
positive_indices, ignore_indices, argmax_overlaps_inds = compute_gt_annotations(anchors,
|
|
annotations['bboxes'],
|
|
negative_overlap,
|
|
positive_overlap)
|
|
labels_batch[index, ignore_indices, -1] = -1
|
|
labels_batch[index, positive_indices, -1] = 1
|
|
|
|
regression_batch[index, ignore_indices, -1] = -1
|
|
regression_batch[index, positive_indices, -1] = 1
|
|
|
|
# compute target class labels
|
|
labels_batch[
|
|
index, positive_indices, annotations['labels'][argmax_overlaps_inds[positive_indices]].astype(int)] = 1
|
|
|
|
regression_batch[index, :, :-1] = bbox_transform(anchors, annotations['bboxes'][argmax_overlaps_inds, :])
|
|
|
|
# ignore anchors outside of image
|
|
if image.shape:
|
|
anchors_centers = np.vstack([(anchors[:, 0] + anchors[:, 2]) / 2, (anchors[:, 1] + anchors[:, 3]) / 2]).T
|
|
indices = np.logical_or(anchors_centers[:, 0] >= image.shape[1], anchors_centers[:, 1] >= image.shape[0])
|
|
|
|
labels_batch[index, indices, -1] = -1
|
|
regression_batch[index, indices, -1] = -1
|
|
|
|
return regression_batch, labels_batch
|
|
|
|
|
|
def compute_gt_annotations(
|
|
anchors,
|
|
annotations,
|
|
negative_overlap=0.4,
|
|
positive_overlap=0.5
|
|
):
|
|
"""
|
|
Obtain indices of gt annotations with the greatest overlap.
|
|
|
|
Args
|
|
anchors: np.array of annotations of shape (N, 4) for (x1, y1, x2, y2).
|
|
annotations: np.array of shape (K, 5) for (x1, y1, x2, y2, label).
|
|
negative_overlap: IoU overlap for negative anchors (all anchors with overlap < negative_overlap are negative).
|
|
positive_overlap: IoU overlap or positive anchors (all anchors with overlap > positive_overlap are positive).
|
|
|
|
Returns
|
|
positive_indices: indices of positive anchors
|
|
ignore_indices: indices of ignored anchors
|
|
argmax_overlaps_inds: ordered overlaps indices
|
|
"""
|
|
|
|
overlaps = compute_overlap(anchors.astype(np.float64), annotations.astype(np.float64))
|
|
argmax_overlaps_inds = np.argmax(overlaps, axis=1)
|
|
max_overlaps = overlaps[np.arange(overlaps.shape[0]), argmax_overlaps_inds]
|
|
|
|
# assign "dont care" labels
|
|
positive_indices = max_overlaps >= positive_overlap
|
|
ignore_indices = (max_overlaps > negative_overlap) & ~positive_indices
|
|
|
|
return positive_indices, ignore_indices, argmax_overlaps_inds
|
|
|
|
|
|
def layer_shapes(image_shape, model):
|
|
"""
|
|
Compute layer shapes given input image shape and the model.
|
|
|
|
Args
|
|
image_shape: The shape of the image.
|
|
model: The model to use for computing how the image shape is transformed in the pyramid.
|
|
|
|
Returns
|
|
A dictionary mapping layer names to image shapes.
|
|
"""
|
|
shape = {
|
|
model.layers[0].name: (None,) + image_shape,
|
|
}
|
|
|
|
for layer in model.layers[1:]:
|
|
nodes = layer._inbound_nodes
|
|
for node in nodes:
|
|
input_shapes = [shape[inbound_layer.name] for inbound_layer in node.inbound_layers]
|
|
if not input_shapes:
|
|
continue
|
|
shape[layer.name] = layer.compute_output_shape(input_shapes[0] if len(input_shapes) == 1 else input_shapes)
|
|
|
|
return shape
|
|
|
|
|
|
def make_shapes_callback(model):
|
|
"""
|
|
Make a function for getting the shape of the pyramid levels.
|
|
"""
|
|
|
|
def get_shapes(image_shape, pyramid_levels):
|
|
shape = layer_shapes(image_shape, model)
|
|
image_shapes = [shape["P{}".format(level)][1:3] for level in pyramid_levels]
|
|
return image_shapes
|
|
|
|
return get_shapes
|
|
|
|
|
|
def guess_shapes(image_shape, pyramid_levels=(3, 4, 5, 6, 7)):
|
|
"""
|
|
Guess shapes based on pyramid levels.
|
|
|
|
Args
|
|
image_shape: The shape of the image.
|
|
pyramid_levels: A list of what pyramid levels are used.
|
|
|
|
Returns
|
|
A list of image shapes at each pyramid level.
|
|
"""
|
|
image_shape = np.array(image_shape[:2])
|
|
feature_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in pyramid_levels]
|
|
return feature_shapes
|
|
|
|
|
|
def compute_locations_per_level(h, w, stride):
|
|
# [0, 8, 16]
|
|
shifts_x = np.arange(0, w * stride, step=stride, dtype=np.float32)
|
|
# [0, 8, 16, 24]
|
|
shifts_y = np.arange(0, h * stride, step=stride, dtype=np.float32)
|
|
shift_x, shift_y = np.meshgrid(shifts_x, shifts_y)
|
|
# (h * w, )
|
|
shift_x = shift_x.reshape(-1)
|
|
# (h * w, )
|
|
shift_y = shift_y.reshape(-1)
|
|
locations = np.stack((shift_x, shift_y), axis=1) + stride // 2
|
|
return locations
|
|
|
|
|
|
def compute_locations(feature_shapes, anchor_param):
|
|
"""
|
|
|
|
Args:
|
|
feature_shapes: list of (h, w)
|
|
anchor_params: instance of AnchorParameters
|
|
|
|
Returns:
|
|
locations: list of np.array (shape is (fh * fw, 2))
|
|
|
|
"""
|
|
if anchor_param is None:
|
|
anchor_param = AnchorParameters.default
|
|
fpn_strides = anchor_param.strides
|
|
n_stage = len(fpn_strides)
|
|
# print('fpn stides in generator', fpn_strides)
|
|
locations = []
|
|
for level, (feature_shape, fpn_stride) in enumerate(zip(feature_shapes[:n_stage], fpn_strides)):
|
|
h, w = feature_shape
|
|
locations_per_level = compute_locations_per_level(
|
|
h, w, fpn_stride
|
|
)
|
|
locations.append(locations_per_level)
|
|
return locations
|
|
|
|
|
|
def compute_interest_sizes(num_locations_each_level, anchor_param):
|
|
"""
|
|
|
|
Args:
|
|
num_locations_each_level: list of int
|
|
anchor_param:
|
|
|
|
Returns:
|
|
interest_sizes (np.array): (sum(fh * fw), 2)
|
|
|
|
"""
|
|
if anchor_param is None:
|
|
anchor_param = AnchorParameters.default
|
|
interest_sizes = anchor_param.interest_sizes
|
|
# print('interest_sizes in generator', interest_sizes)
|
|
|
|
assert len(num_locations_each_level) == len(interest_sizes)
|
|
tiled_interest_sizes = []
|
|
for num_locations, interest_size in zip(num_locations_each_level, interest_sizes):
|
|
interest_size = np.array(interest_size)
|
|
interest_size = np.expand_dims(interest_size, axis=0)
|
|
interest_size = np.tile(interest_size, (num_locations, 1))
|
|
tiled_interest_sizes.append(interest_size)
|
|
interest_sizes = np.concatenate(tiled_interest_sizes, axis=0)
|
|
return interest_sizes
|
|
|
|
def get_sample_region(gt, anchor_param, num_points_per, cx, cy, radius=1.5):
|
|
'''
|
|
gt: (n, 4)
|
|
strides: []
|
|
num_points_per: []
|
|
gt_xs: (m,)
|
|
gt_ys: (m,)
|
|
This code is from
|
|
https://github.com/yqyao/FCOS_PLUS/blob/0d20ba34ccc316650d8c30febb2eb40cb6eaae37/
|
|
maskrcnn_benchmark/modeling/rpn/fcos/loss.py#L42
|
|
'''
|
|
if anchor_param is None:
|
|
anchor_param = AnchorParameters.default
|
|
strides = anchor_param.strides
|
|
n = gt.shape[0]
|
|
# num of position
|
|
m = len(cx)
|
|
# (m, n, 4)
|
|
gt = np.tile(gt[None], (m,1,1))
|
|
assert gt.shape==(m,n,4)
|
|
# (m, n)
|
|
center_x = (gt[..., 0] + gt[..., 2]) / 2
|
|
# (m, n)
|
|
center_y = (gt[..., 1] + gt[..., 3]) / 2
|
|
# (m, n, 4)
|
|
center_gt = np.zeros(gt.shape)
|
|
# no gt
|
|
if center_x[..., 0].sum() == 0:
|
|
return np.zeros((m, n), dtype=np.uint8)
|
|
beg = 0
|
|
for level, n_p in enumerate(num_points_per):
|
|
end = beg + n_p
|
|
stride = strides[level] * radius
|
|
xmin = center_x[beg:end] - stride
|
|
ymin = center_y[beg:end] - stride
|
|
xmax = center_x[beg:end] + stride
|
|
ymax = center_y[beg:end] + stride
|
|
# limit sample region in gt
|
|
center_gt[beg:end, :, 0] = np.where(
|
|
xmin > gt[beg:end, :, 0], xmin, gt[beg:end, :, 0]
|
|
)
|
|
center_gt[beg:end, :, 1] = np.where(
|
|
ymin > gt[beg:end, :, 1], ymin, gt[beg:end, :, 1]
|
|
)
|
|
center_gt[beg:end, :, 2] = np.where(
|
|
xmax > gt[beg:end, :, 2],
|
|
gt[beg:end, :, 2], xmax
|
|
)
|
|
center_gt[beg:end, :, 3] = np.where(
|
|
ymax > gt[beg:end, :, 3],
|
|
gt[beg:end, :, 3], ymax
|
|
)
|
|
beg = end
|
|
# (m, n) - (1, n) --> (m, n)
|
|
left = cx[:, None] - center_gt[..., 0]
|
|
top = cy[:, None] - center_gt[..., 1]
|
|
# (m, n) - (m, 1) --> (m, n)
|
|
right = center_gt[..., 2] - cx[:, None]
|
|
bottom = center_gt[..., 3] - cy[:, None]
|
|
# (m,n,4)
|
|
center_bbox = np.stack((left, top, right, bottom), -1)
|
|
inside_gt_bbox_mask = center_bbox.min(axis=2) > 0
|
|
return inside_gt_bbox_mask
|
|
|
|
def anchors_for_shape(
|
|
image_shape,
|
|
pyramid_levels=None,
|
|
anchor_params=None,
|
|
shapes_callback=None,
|
|
):
|
|
"""
|
|
Generators anchors for a given shape.
|
|
|
|
Args
|
|
image_shape: The shape of the image.
|
|
pyramid_levels: List of ints representing which pyramids to use (defaults to [3, 4, 5, 6, 7]).
|
|
anchor_params: Struct containing anchor parameters. If None, default values are used.
|
|
shapes_callback: Function to call for getting the shape of the image at different pyramid levels.
|
|
|
|
Returns
|
|
np.array of shape (N, 4) containing the (x1, y1, x2, y2) coordinates for the anchors.
|
|
"""
|
|
|
|
if pyramid_levels is None:
|
|
pyramid_levels = [3, 4, 5, 6, 7]
|
|
|
|
if anchor_params is None:
|
|
anchor_params = AnchorParameters.default
|
|
|
|
if shapes_callback is None:
|
|
shapes_callback = guess_shapes
|
|
feature_map_shapes = shapes_callback(image_shape, pyramid_levels)
|
|
|
|
# compute anchors over all pyramid levels
|
|
all_anchors = np.zeros((0, 4))
|
|
for idx, p in enumerate(pyramid_levels):
|
|
anchors = generate_anchors(
|
|
base_size=anchor_params.sizes[idx],
|
|
ratios=anchor_params.ratios,
|
|
scales=anchor_params.scales
|
|
)
|
|
shifted_anchors = shift(feature_map_shapes[idx], anchor_params.strides[idx], anchors)
|
|
all_anchors = np.append(all_anchors, shifted_anchors, axis=0)
|
|
|
|
return all_anchors
|
|
|
|
|
|
def shift(feature_map_shape, stride, anchors):
|
|
"""
|
|
Produce shifted anchors based on shape of the map and stride size.
|
|
|
|
Args
|
|
feature_map_shape : Shape to shift the anchors over.
|
|
stride : Stride to shift the anchors with over the shape.
|
|
anchors: The anchors to apply at each location.
|
|
"""
|
|
|
|
# create a grid starting from half stride from the top left corner
|
|
shift_x = (np.arange(0, feature_map_shape[1]) + 0.5) * stride
|
|
shift_y = (np.arange(0, feature_map_shape[0]) + 0.5) * stride
|
|
|
|
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
|
|
|
|
shifts = np.vstack((
|
|
shift_x.ravel(), shift_y.ravel(),
|
|
shift_x.ravel(), shift_y.ravel()
|
|
)).transpose()
|
|
|
|
# add A anchors (1, A, 4) to
|
|
# cell K shifts (K, 1, 4) to get
|
|
# shift anchors (K, A, 4)
|
|
# reshape to (K*A, 4) shifted anchors
|
|
A = anchors.shape[0]
|
|
K = shifts.shape[0]
|
|
all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
|
|
all_anchors = all_anchors.reshape((K * A, 4))
|
|
|
|
return all_anchors
|
|
|
|
|
|
def generate_anchors(base_size=16, ratios=None, scales=None):
|
|
"""
|
|
Generate anchor (reference) windows by enumerating aspect ratios X scales w.r.t. a reference window.
|
|
|
|
Args:
|
|
base_size:
|
|
ratios:
|
|
scales:
|
|
|
|
Returns:
|
|
anchors: (num_anchors, 4), 4 为以 (0, 0) 为中心点的矩形坐标 (-w/2, -h/2, w/2, h/2)
|
|
|
|
"""
|
|
if ratios is None:
|
|
ratios = AnchorParameters.default.ratios
|
|
|
|
if scales is None:
|
|
scales = AnchorParameters.default.scales
|
|
|
|
num_anchors = len(ratios) * len(scales)
|
|
|
|
# initialize output anchors
|
|
anchors = np.zeros((num_anchors, 4))
|
|
|
|
# scale base_size
|
|
anchors[:, 2:] = base_size * np.tile(scales, (2, len(ratios))).T
|
|
|
|
# compute areas of anchors
|
|
# (num_anchors, )
|
|
areas = anchors[:, 2] * anchors[:, 3]
|
|
|
|
# correct for ratios
|
|
# (num_anchors, )
|
|
anchors[:, 2] = np.sqrt(areas / np.repeat(ratios, len(scales)))
|
|
anchors[:, 3] = anchors[:, 2] * np.repeat(ratios, len(scales))
|
|
|
|
# transform from (cx, cy, w, h) -> (x1, y1, x2, y2)
|
|
anchors[:, 0::2] -= np.tile(anchors[:, 2] * 0.5, (2, 1)).T
|
|
anchors[:, 1::2] -= np.tile(anchors[:, 3] * 0.5, (2, 1)).T
|
|
|
|
return anchors
|
|
|
|
|
|
def bbox_transform(anchors, gt_boxes, mean=None, std=None):
|
|
"""
|
|
Args:
|
|
anchors: (N, 4)
|
|
gt_boxes: (N, 4)
|
|
mean:
|
|
std:
|
|
|
|
Returns:
|
|
|
|
"""
|
|
|
|
if mean is None:
|
|
mean = np.array([0, 0, 0, 0])
|
|
if std is None:
|
|
std = np.array([0.2, 0.2, 0.2, 0.2])
|
|
|
|
if isinstance(mean, (list, tuple)):
|
|
mean = np.array(mean)
|
|
elif not isinstance(mean, np.ndarray):
|
|
raise ValueError('Expected mean to be a np.ndarray, list or tuple. Received: {}'.format(type(mean)))
|
|
|
|
if isinstance(std, (list, tuple)):
|
|
std = np.array(std)
|
|
elif not isinstance(std, np.ndarray):
|
|
raise ValueError('Expected std to be a np.ndarray, list or tuple. Received: {}'.format(type(std)))
|
|
|
|
anchor_widths = anchors[:, 2] - anchors[:, 0]
|
|
anchor_heights = anchors[:, 3] - anchors[:, 1]
|
|
|
|
targets_dx1 = (gt_boxes[:, 0] - anchors[:, 0]) / anchor_widths
|
|
targets_dy1 = (gt_boxes[:, 1] - anchors[:, 1]) / anchor_heights
|
|
targets_dx2 = (gt_boxes[:, 2] - anchors[:, 2]) / anchor_widths
|
|
targets_dy2 = (gt_boxes[:, 3] - anchors[:, 3]) / anchor_heights
|
|
# (4, N)
|
|
targets = np.stack((targets_dx1, targets_dy1, targets_dx2, targets_dy2))
|
|
# (N, 4)
|
|
targets = targets.T
|
|
|
|
targets = (targets - mean) / std
|
|
|
|
return targets
|