Yolov5s/ai_training/detection/fcos/losses.py

"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import tensorflow as tf
import keras.backend as K


def focal(alpha=0.25, gamma=2.0):
    """
    Create a functor for computing the focal loss.

    Args
        alpha: Scale the focal weight with alpha.
        gamma: Take the power of the focal weight with gamma.

    Returns
        A functor that computes the focal loss using the alpha and gamma.
    """

    def _focal(y_true, y_pred):
        """
        Compute the focal loss given the target tensor and the predicted tensor.

        As defined in https://arxiv.org/abs/1708.02002

        Args
            y_true: Tensor of target data from the generator with shape (B, N, num_classes).
            y_pred: Tensor of predicted data from the network with shape (B, N, num_classes).

        Returns
            The focal loss of y_pred w.r.t. y_true.
        """
        # compute the focal loss
        location_state = y_true[:, :, -1]
        labels = y_true[:, :, :-1]
        alpha_factor = K.ones_like(labels) * alpha
        alpha_factor = tf.where(K.equal(labels, 1), alpha_factor, 1 - alpha_factor)
        # (1 - 0.99) ** 2 = 1e-4, (1 - 0.9) ** 2 = 1e-2
        focal_weight = tf.where(K.equal(labels, 1), 1 - y_pred, y_pred)
        focal_weight = alpha_factor * focal_weight ** gamma
        cls_loss = focal_weight * K.binary_crossentropy(labels, y_pred)

        # compute the normalizer: the number of positive anchors
        normalizer = tf.where(K.equal(location_state, 1))
        normalizer = K.cast(K.shape(normalizer)[0], K.floatx())
        normalizer = K.maximum(K.cast_to_floatx(1.0), normalizer)

        return K.sum(cls_loss) / normalizer

    return _focal
# def focal(alpha=2, beta=4):
#     def _focal(hm_true, hm_pred):
#         location_state = hm_true[:, :, -1]
#         hm_true = hm_true[:, :, :-1]
#
#         pos_mask = tf.cast(tf.equal(hm_true, 1), tf.float32)
#         neg_mask = tf.cast(tf.less(hm_true, 1), tf.float32)
#         neg_weights = tf.pow(1 - hm_true, beta)
#
#         pos_loss = -tf.log(tf.clip_by_value(hm_pred, 1e-4, 1. - 1e-4)) * tf.pow(1 - hm_pred, alpha) * pos_mask
#         neg_loss = -tf.log(tf.clip_by_value(1 - hm_pred, 1e-4, 1. - 1e-4)) * tf.pow(hm_pred, alpha) * neg_weights * neg_mask
#
#         num_pos = tf.reduce_sum(pos_mask)
#         pos_loss = tf.reduce_sum(pos_loss)
#         neg_loss = tf.reduce_sum(neg_loss)
#
#         cls_loss = tf.cond(tf.greater(num_pos, 0), lambda: (pos_loss + neg_loss) / num_pos, lambda: neg_loss)
#         return cls_loss
#     return _focal

def iou():
    def iou_(y_true, y_pred):
        location_state = y_true[:, :, -1]
        indices = tf.where(K.equal(location_state, 1))
        if tf.size(indices) == 0:
            return tf.constant(0.0)
        y_regr_pred = tf.gather_nd(y_pred, indices)
        y_true = tf.gather_nd(y_true, indices)
        y_regr_true = y_true[:, :4]
        y_centerness_true = y_true[:, 4]

        # (num_pos, )
        pred_left = y_regr_pred[:, 0]
        pred_top = y_regr_pred[:, 1]
        pred_right = y_regr_pred[:, 2]
        pred_bottom = y_regr_pred[:, 3]

        # (num_pos, )
        target_left = y_regr_true[:, 0]
        target_top = y_regr_true[:, 1]
        target_right = y_regr_true[:, 2]
        target_bottom = y_regr_true[:, 3]

        target_area = (target_left + target_right) * (target_top + target_bottom)
        pred_area = (pred_left + pred_right) * (pred_top + pred_bottom)
        w_intersect = tf.minimum(pred_left, target_left) + tf.minimum(pred_right, target_right)
        h_intersect = tf.minimum(pred_bottom, target_bottom) + tf.minimum(pred_top, target_top)

        area_intersect = w_intersect * h_intersect
        area_union = target_area + pred_area - area_intersect

        # (num_pos, )
        losses = -tf.log((area_intersect + 1.0) / (area_union + 1.0))
        losses = tf.reduce_sum(losses * y_centerness_true) / (tf.reduce_sum(y_centerness_true) + 1e-8)
        return losses

    return iou_

def _calculate_giou(b1, b2, mode='giou'):
    """
    Args:
        b1: bounding box. The coordinates of the each bounding box in boxes are
            encoded as [x_min, y_min, x_max, y_max].
        b2: the other bounding box. The coordinates of the each bounding box
            in boxes are encoded as [y_min, x_min, y_max, x_max].
        mode: one of ['giou', 'iou'], decided to calculate GIoU or IoU loss.
    Returns:
        GIoU loss float `Tensor`.
    """
    zero = tf.convert_to_tensor(0.0, b1.dtype)
    # b1_ymin, b1_xmin, b1_ymax, b1_xmax = tf.unstack(b1, 4, axis=-1)
    # b2_ymin, b2_xmin, b2_ymax, b2_xmax = tf.unstack(b2, 4, axis=-1)
    b1_xmin, b1_ymin, b1_xmax, b1_ymax = tf.unstack(b1, 4, axis=-1)
    b2_xmin, b2_ymin, b2_xmax, b2_ymax = tf.unstack(b2, 4, axis=-1)

    b1_width = tf.maximum(zero, b1_xmax - b1_xmin)
    b1_height = tf.maximum(zero, b1_ymax - b1_ymin)
    b2_width = tf.maximum(zero, b2_xmax - b2_xmin)
    b2_height = tf.maximum(zero, b2_ymax - b2_ymin)
    b1_area = b1_width * b1_height
    b2_area = b2_width * b2_height

    intersect_ymin = tf.maximum(b1_ymin, b2_ymin)
    intersect_xmin = tf.maximum(b1_xmin, b2_xmin)
    intersect_ymax = tf.minimum(b1_ymax, b2_ymax)
    intersect_xmax = tf.minimum(b1_xmax, b2_xmax)
    intersect_width = tf.maximum(zero, intersect_xmax - intersect_xmin)
    intersect_height = tf.maximum(zero, intersect_ymax - intersect_ymin)
    intersect_area = intersect_width * intersect_height

    union_area = b1_area + b2_area - intersect_area
    iou = tf.math.divide(intersect_area, union_area+1e-5)
    if mode == "iou":
        return iou

    enclose_ymin = tf.minimum(b1_ymin, b2_ymin)
    enclose_xmin = tf.minimum(b1_xmin, b2_xmin)
    enclose_ymax = tf.maximum(b1_ymax, b2_ymax)
    enclose_xmax = tf.maximum(b1_xmax, b2_xmax)
    enclose_width = tf.maximum(zero, enclose_xmax - enclose_xmin)
    enclose_height = tf.maximum(zero, enclose_ymax - enclose_ymin)
    enclose_area = enclose_width * enclose_height
    giou = iou - tf.math.divide((enclose_area - union_area), enclose_area+1e-5)
    return giou

def giou_loss(bbox1, bbox2):
    return 1 - _calculate_giou(bbox1,bbox2)

def convertcorners(reg):
    b1_xmin, b1_ymin, b1_xmax, b1_ymax = tf.unstack(reg, 4, axis=-1)

    return tf.stack([-b1_xmin, -b1_ymin, b1_xmax, b1_ymax], axis=-1)
def giou(y_true, y_pred):
    location_state = y_true[:, :, -1]
    indices = tf.where(K.equal(location_state, 1))
    if tf.size(indices) == 0:
        return tf.constant(0.0)
    y_regr_pred = tf.gather_nd(y_pred, indices)
    y_true = tf.gather_nd(y_true, indices)
    y_regr_true = y_true[:, :4]
    y_centerness_true = y_true[:, 4]
    losses = giou_loss(convertcorners(y_regr_pred[:,:4]), convertcorners(y_regr_true))
    losses = tf.reduce_sum(losses * y_centerness_true) / (tf.reduce_sum(y_centerness_true) + 1e-8)
    return losses


def bce():
    def bce_(y_true, y_pred):
        location_state = y_true[:, :, -1]
        indices = tf.where(K.equal(location_state, 1))
        if tf.size(indices) == 0:
            return tf.constant(0.0)
        y_centerness_pred = tf.gather_nd(y_pred, indices)
        y_true = tf.gather_nd(y_true, indices)
        y_centerness_true = y_true[:, 0:1]
        loss = K.switch(tf.size(y_centerness_true) > 0,
                        K.binary_crossentropy(target=y_centerness_true, output=y_centerness_pred),
                        tf.constant(0.0))
        loss = K.mean(loss)
        return loss

    return bce_