# -*- coding: utf-8 -*- import numpy as np import keras from utils.compute_overlap import compute_overlap # class AnchorParameters: # """ # The parameters that define how anchors are generated. # # Args # sizes : List of sizes to use. Each size corresponds to one feature level. # strides : List of strides to use. Each stride correspond to one feature level. # ratios : List of ratios to use per location in a feature map. # scales : List of scales to use per location in a feature map. # """ # # def __init__(self, sizes, strides, ratios, scales, interest_sizes): # self.sizes = sizes # self.strides = strides # self.ratios = ratios # self.scales = scales # self.interest_sizes = interest_sizes # # def num_anchors(self): # return len(self.ratios) * len(self.scales) class AnchorParameters: """ The parameters that define how anchors are generated. Args strides : List of strides to use. Each stride correspond to one feature level. scales : List of scales to use per location in a feature map. """ def __init__(self, strides, interest_sizes): self.strides = strides self.interest_sizes = interest_sizes """ The default anchor parameters. """ AnchorParameters.default = AnchorParameters( # sizes=[32, 64, 128, 256, 512], strides=[8, 16, 32, 64, 128], # ratios=np.array([0.5, 1, 2], keras.backend.floatx()), # scales=np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)], keras.backend.floatx()), interest_sizes=[ [-1, 64], [64, 128], [128, 256], [256, 512], [512, 1e8], ], ) def anchor_targets_bbox( anchors, image_group, annotations_group, num_classes, negative_overlap=0.4, positive_overlap=0.5 ): """ Generate anchor targets for bbox detection. Args anchors: np.array of annotations of shape (N, 4) for (x1, y1, x2, y2). image_group: List of BGR images. annotations_group: List of annotations (np.array of shape (N, 5) for (x1, y1, x2, y2, label)). num_classes: Number of classes to predict. mask_shape: If the image is padded with zeros, mask_shape can be used to mark the relevant part of the image. negative_overlap: IoU overlap for negative anchors (all anchors with overlap < negative_overlap are negative). positive_overlap: IoU overlap or positive anchors (all anchors with overlap > positive_overlap are positive). Returns labels_batch: batch that contains labels & anchor states (np.array of shape (batch_size, N, num_classes + 1), where N is the number of anchors for an image and the last column defines the anchor state (-1 for ignore, 0 for bg, 1 for fg). regression_batch: batch that contains bounding-box regression targets for an image & anchor states (np.array of shape (batch_size, N, 4 + 1), where N is the number of anchors for an image, the first 4 columns define regression targets for (x1, y1, x2, y2) and the last column defines anchor states (-1 for ignore, 0 for bg, 1 for fg). """ assert (len(image_group) == len(annotations_group)), "The length of the images and annotations need to be equal." assert (len(annotations_group) > 0), "No data received to compute anchor targets for." for annotations in annotations_group: assert ('bboxes' in annotations), "Annotations should contain bboxes." assert ('labels' in annotations), "Annotations should contain labels." batch_size = len(image_group) regression_batch = np.zeros((batch_size, anchors.shape[0], 4 + 1), dtype=keras.backend.floatx()) labels_batch = np.zeros((batch_size, anchors.shape[0], num_classes + 1), dtype=keras.backend.floatx()) # compute labels and regression targets for index, (image, annotations) in enumerate(zip(image_group, annotations_group)): if annotations['bboxes'].shape[0]: # obtain indices of gt annotations with the greatest overlap positive_indices, ignore_indices, argmax_overlaps_inds = compute_gt_annotations(anchors, annotations['bboxes'], negative_overlap, positive_overlap) labels_batch[index, ignore_indices, -1] = -1 labels_batch[index, positive_indices, -1] = 1 regression_batch[index, ignore_indices, -1] = -1 regression_batch[index, positive_indices, -1] = 1 # compute target class labels labels_batch[ index, positive_indices, annotations['labels'][argmax_overlaps_inds[positive_indices]].astype(int)] = 1 regression_batch[index, :, :-1] = bbox_transform(anchors, annotations['bboxes'][argmax_overlaps_inds, :]) # ignore anchors outside of image if image.shape: anchors_centers = np.vstack([(anchors[:, 0] + anchors[:, 2]) / 2, (anchors[:, 1] + anchors[:, 3]) / 2]).T indices = np.logical_or(anchors_centers[:, 0] >= image.shape[1], anchors_centers[:, 1] >= image.shape[0]) labels_batch[index, indices, -1] = -1 regression_batch[index, indices, -1] = -1 return regression_batch, labels_batch def compute_gt_annotations( anchors, annotations, negative_overlap=0.4, positive_overlap=0.5 ): """ Obtain indices of gt annotations with the greatest overlap. Args anchors: np.array of annotations of shape (N, 4) for (x1, y1, x2, y2). annotations: np.array of shape (K, 5) for (x1, y1, x2, y2, label). negative_overlap: IoU overlap for negative anchors (all anchors with overlap < negative_overlap are negative). positive_overlap: IoU overlap or positive anchors (all anchors with overlap > positive_overlap are positive). Returns positive_indices: indices of positive anchors ignore_indices: indices of ignored anchors argmax_overlaps_inds: ordered overlaps indices """ overlaps = compute_overlap(anchors.astype(np.float64), annotations.astype(np.float64)) argmax_overlaps_inds = np.argmax(overlaps, axis=1) max_overlaps = overlaps[np.arange(overlaps.shape[0]), argmax_overlaps_inds] # assign "dont care" labels positive_indices = max_overlaps >= positive_overlap ignore_indices = (max_overlaps > negative_overlap) & ~positive_indices return positive_indices, ignore_indices, argmax_overlaps_inds def layer_shapes(image_shape, model): """ Compute layer shapes given input image shape and the model. Args image_shape: The shape of the image. model: The model to use for computing how the image shape is transformed in the pyramid. Returns A dictionary mapping layer names to image shapes. """ shape = { model.layers[0].name: (None,) + image_shape, } for layer in model.layers[1:]: nodes = layer._inbound_nodes for node in nodes: input_shapes = [shape[inbound_layer.name] for inbound_layer in node.inbound_layers] if not input_shapes: continue shape[layer.name] = layer.compute_output_shape(input_shapes[0] if len(input_shapes) == 1 else input_shapes) return shape def make_shapes_callback(model): """ Make a function for getting the shape of the pyramid levels. """ def get_shapes(image_shape, pyramid_levels): shape = layer_shapes(image_shape, model) image_shapes = [shape["P{}".format(level)][1:3] for level in pyramid_levels] return image_shapes return get_shapes def guess_shapes(image_shape, pyramid_levels=(3, 4, 5, 6, 7)): """ Guess shapes based on pyramid levels. Args image_shape: The shape of the image. pyramid_levels: A list of what pyramid levels are used. Returns A list of image shapes at each pyramid level. """ image_shape = np.array(image_shape[:2]) feature_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in pyramid_levels] return feature_shapes def compute_locations_per_level(h, w, stride): # [0, 8, 16] shifts_x = np.arange(0, w * stride, step=stride, dtype=np.float32) # [0, 8, 16, 24] shifts_y = np.arange(0, h * stride, step=stride, dtype=np.float32) shift_x, shift_y = np.meshgrid(shifts_x, shifts_y) # (h * w, ) shift_x = shift_x.reshape(-1) # (h * w, ) shift_y = shift_y.reshape(-1) locations = np.stack((shift_x, shift_y), axis=1) + stride // 2 return locations def compute_locations(feature_shapes, anchor_param): """ Args: feature_shapes: list of (h, w) anchor_params: instance of AnchorParameters Returns: locations: list of np.array (shape is (fh * fw, 2)) """ if anchor_param is None: anchor_param = AnchorParameters.default fpn_strides = anchor_param.strides n_stage = len(fpn_strides) # print('fpn stides in generator', fpn_strides) locations = [] for level, (feature_shape, fpn_stride) in enumerate(zip(feature_shapes[:n_stage], fpn_strides)): h, w = feature_shape locations_per_level = compute_locations_per_level( h, w, fpn_stride ) locations.append(locations_per_level) return locations def compute_interest_sizes(num_locations_each_level, anchor_param): """ Args: num_locations_each_level: list of int anchor_param: Returns: interest_sizes (np.array): (sum(fh * fw), 2) """ if anchor_param is None: anchor_param = AnchorParameters.default interest_sizes = anchor_param.interest_sizes # print('interest_sizes in generator', interest_sizes) assert len(num_locations_each_level) == len(interest_sizes) tiled_interest_sizes = [] for num_locations, interest_size in zip(num_locations_each_level, interest_sizes): interest_size = np.array(interest_size) interest_size = np.expand_dims(interest_size, axis=0) interest_size = np.tile(interest_size, (num_locations, 1)) tiled_interest_sizes.append(interest_size) interest_sizes = np.concatenate(tiled_interest_sizes, axis=0) return interest_sizes def get_sample_region(gt, anchor_param, num_points_per, cx, cy, radius=1.5): ''' gt: (n, 4) strides: [] num_points_per: [] gt_xs: (m,) gt_ys: (m,) This code is from https://github.com/yqyao/FCOS_PLUS/blob/0d20ba34ccc316650d8c30febb2eb40cb6eaae37/ maskrcnn_benchmark/modeling/rpn/fcos/loss.py#L42 ''' if anchor_param is None: anchor_param = AnchorParameters.default strides = anchor_param.strides n = gt.shape[0] # num of position m = len(cx) # (m, n, 4) gt = np.tile(gt[None], (m,1,1)) assert gt.shape==(m,n,4) # (m, n) center_x = (gt[..., 0] + gt[..., 2]) / 2 # (m, n) center_y = (gt[..., 1] + gt[..., 3]) / 2 # (m, n, 4) center_gt = np.zeros(gt.shape) # no gt if center_x[..., 0].sum() == 0: return np.zeros((m, n), dtype=np.uint8) beg = 0 for level, n_p in enumerate(num_points_per): end = beg + n_p stride = strides[level] * radius xmin = center_x[beg:end] - stride ymin = center_y[beg:end] - stride xmax = center_x[beg:end] + stride ymax = center_y[beg:end] + stride # limit sample region in gt center_gt[beg:end, :, 0] = np.where( xmin > gt[beg:end, :, 0], xmin, gt[beg:end, :, 0] ) center_gt[beg:end, :, 1] = np.where( ymin > gt[beg:end, :, 1], ymin, gt[beg:end, :, 1] ) center_gt[beg:end, :, 2] = np.where( xmax > gt[beg:end, :, 2], gt[beg:end, :, 2], xmax ) center_gt[beg:end, :, 3] = np.where( ymax > gt[beg:end, :, 3], gt[beg:end, :, 3], ymax ) beg = end # (m, n) - (1, n) --> (m, n) left = cx[:, None] - center_gt[..., 0] top = cy[:, None] - center_gt[..., 1] # (m, n) - (m, 1) --> (m, n) right = center_gt[..., 2] - cx[:, None] bottom = center_gt[..., 3] - cy[:, None] # (m,n,4) center_bbox = np.stack((left, top, right, bottom), -1) inside_gt_bbox_mask = center_bbox.min(axis=2) > 0 return inside_gt_bbox_mask def anchors_for_shape( image_shape, pyramid_levels=None, anchor_params=None, shapes_callback=None, ): """ Generators anchors for a given shape. Args image_shape: The shape of the image. pyramid_levels: List of ints representing which pyramids to use (defaults to [3, 4, 5, 6, 7]). anchor_params: Struct containing anchor parameters. If None, default values are used. shapes_callback: Function to call for getting the shape of the image at different pyramid levels. Returns np.array of shape (N, 4) containing the (x1, y1, x2, y2) coordinates for the anchors. """ if pyramid_levels is None: pyramid_levels = [3, 4, 5, 6, 7] if anchor_params is None: anchor_params = AnchorParameters.default if shapes_callback is None: shapes_callback = guess_shapes feature_map_shapes = shapes_callback(image_shape, pyramid_levels) # compute anchors over all pyramid levels all_anchors = np.zeros((0, 4)) for idx, p in enumerate(pyramid_levels): anchors = generate_anchors( base_size=anchor_params.sizes[idx], ratios=anchor_params.ratios, scales=anchor_params.scales ) shifted_anchors = shift(feature_map_shapes[idx], anchor_params.strides[idx], anchors) all_anchors = np.append(all_anchors, shifted_anchors, axis=0) return all_anchors def shift(feature_map_shape, stride, anchors): """ Produce shifted anchors based on shape of the map and stride size. Args feature_map_shape : Shape to shift the anchors over. stride : Stride to shift the anchors with over the shape. anchors: The anchors to apply at each location. """ # create a grid starting from half stride from the top left corner shift_x = (np.arange(0, feature_map_shape[1]) + 0.5) * stride shift_y = (np.arange(0, feature_map_shape[0]) + 0.5) * stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack(( shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel() )).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = anchors.shape[0] K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) return all_anchors def generate_anchors(base_size=16, ratios=None, scales=None): """ Generate anchor (reference) windows by enumerating aspect ratios X scales w.r.t. a reference window. Args: base_size: ratios: scales: Returns: anchors: (num_anchors, 4), 4 为以 (0, 0) 为中心点的矩形坐标 (-w/2, -h/2, w/2, h/2) """ if ratios is None: ratios = AnchorParameters.default.ratios if scales is None: scales = AnchorParameters.default.scales num_anchors = len(ratios) * len(scales) # initialize output anchors anchors = np.zeros((num_anchors, 4)) # scale base_size anchors[:, 2:] = base_size * np.tile(scales, (2, len(ratios))).T # compute areas of anchors # (num_anchors, ) areas = anchors[:, 2] * anchors[:, 3] # correct for ratios # (num_anchors, ) anchors[:, 2] = np.sqrt(areas / np.repeat(ratios, len(scales))) anchors[:, 3] = anchors[:, 2] * np.repeat(ratios, len(scales)) # transform from (cx, cy, w, h) -> (x1, y1, x2, y2) anchors[:, 0::2] -= np.tile(anchors[:, 2] * 0.5, (2, 1)).T anchors[:, 1::2] -= np.tile(anchors[:, 3] * 0.5, (2, 1)).T return anchors def bbox_transform(anchors, gt_boxes, mean=None, std=None): """ Args: anchors: (N, 4) gt_boxes: (N, 4) mean: std: Returns: """ if mean is None: mean = np.array([0, 0, 0, 0]) if std is None: std = np.array([0.2, 0.2, 0.2, 0.2]) if isinstance(mean, (list, tuple)): mean = np.array(mean) elif not isinstance(mean, np.ndarray): raise ValueError('Expected mean to be a np.ndarray, list or tuple. Received: {}'.format(type(mean))) if isinstance(std, (list, tuple)): std = np.array(std) elif not isinstance(std, np.ndarray): raise ValueError('Expected std to be a np.ndarray, list or tuple. Received: {}'.format(type(std))) anchor_widths = anchors[:, 2] - anchors[:, 0] anchor_heights = anchors[:, 3] - anchors[:, 1] targets_dx1 = (gt_boxes[:, 0] - anchors[:, 0]) / anchor_widths targets_dy1 = (gt_boxes[:, 1] - anchors[:, 1]) / anchor_heights targets_dx2 = (gt_boxes[:, 2] - anchors[:, 2]) / anchor_widths targets_dy2 = (gt_boxes[:, 3] - anchors[:, 3]) / anchor_heights # (4, N) targets = np.stack((targets_dx1, targets_dy1, targets_dx2, targets_dy2)) # (N, 4) targets = targets.T targets = (targets - mean) / std return targets