206 lines
7.8 KiB
Python

"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import tensorflow as tf
import keras.backend as K
def focal(alpha=0.25, gamma=2.0):
"""
Create a functor for computing the focal loss.
Args
alpha: Scale the focal weight with alpha.
gamma: Take the power of the focal weight with gamma.
Returns
A functor that computes the focal loss using the alpha and gamma.
"""
def _focal(y_true, y_pred):
"""
Compute the focal loss given the target tensor and the predicted tensor.
As defined in https://arxiv.org/abs/1708.02002
Args
y_true: Tensor of target data from the generator with shape (B, N, num_classes).
y_pred: Tensor of predicted data from the network with shape (B, N, num_classes).
Returns
The focal loss of y_pred w.r.t. y_true.
"""
# compute the focal loss
location_state = y_true[:, :, -1]
labels = y_true[:, :, :-1]
alpha_factor = K.ones_like(labels) * alpha
alpha_factor = tf.where(K.equal(labels, 1), alpha_factor, 1 - alpha_factor)
# (1 - 0.99) ** 2 = 1e-4, (1 - 0.9) ** 2 = 1e-2
focal_weight = tf.where(K.equal(labels, 1), 1 - y_pred, y_pred)
focal_weight = alpha_factor * focal_weight ** gamma
cls_loss = focal_weight * K.binary_crossentropy(labels, y_pred)
# compute the normalizer: the number of positive anchors
normalizer = tf.where(K.equal(location_state, 1))
normalizer = K.cast(K.shape(normalizer)[0], K.floatx())
normalizer = K.maximum(K.cast_to_floatx(1.0), normalizer)
return K.sum(cls_loss) / normalizer
return _focal
# def focal(alpha=2, beta=4):
# def _focal(hm_true, hm_pred):
# location_state = hm_true[:, :, -1]
# hm_true = hm_true[:, :, :-1]
#
# pos_mask = tf.cast(tf.equal(hm_true, 1), tf.float32)
# neg_mask = tf.cast(tf.less(hm_true, 1), tf.float32)
# neg_weights = tf.pow(1 - hm_true, beta)
#
# pos_loss = -tf.log(tf.clip_by_value(hm_pred, 1e-4, 1. - 1e-4)) * tf.pow(1 - hm_pred, alpha) * pos_mask
# neg_loss = -tf.log(tf.clip_by_value(1 - hm_pred, 1e-4, 1. - 1e-4)) * tf.pow(hm_pred, alpha) * neg_weights * neg_mask
#
# num_pos = tf.reduce_sum(pos_mask)
# pos_loss = tf.reduce_sum(pos_loss)
# neg_loss = tf.reduce_sum(neg_loss)
#
# cls_loss = tf.cond(tf.greater(num_pos, 0), lambda: (pos_loss + neg_loss) / num_pos, lambda: neg_loss)
# return cls_loss
# return _focal
def iou():
def iou_(y_true, y_pred):
location_state = y_true[:, :, -1]
indices = tf.where(K.equal(location_state, 1))
if tf.size(indices) == 0:
return tf.constant(0.0)
y_regr_pred = tf.gather_nd(y_pred, indices)
y_true = tf.gather_nd(y_true, indices)
y_regr_true = y_true[:, :4]
y_centerness_true = y_true[:, 4]
# (num_pos, )
pred_left = y_regr_pred[:, 0]
pred_top = y_regr_pred[:, 1]
pred_right = y_regr_pred[:, 2]
pred_bottom = y_regr_pred[:, 3]
# (num_pos, )
target_left = y_regr_true[:, 0]
target_top = y_regr_true[:, 1]
target_right = y_regr_true[:, 2]
target_bottom = y_regr_true[:, 3]
target_area = (target_left + target_right) * (target_top + target_bottom)
pred_area = (pred_left + pred_right) * (pred_top + pred_bottom)
w_intersect = tf.minimum(pred_left, target_left) + tf.minimum(pred_right, target_right)
h_intersect = tf.minimum(pred_bottom, target_bottom) + tf.minimum(pred_top, target_top)
area_intersect = w_intersect * h_intersect
area_union = target_area + pred_area - area_intersect
# (num_pos, )
losses = -tf.log((area_intersect + 1.0) / (area_union + 1.0))
losses = tf.reduce_sum(losses * y_centerness_true) / (tf.reduce_sum(y_centerness_true) + 1e-8)
return losses
return iou_
def _calculate_giou(b1, b2, mode='giou'):
"""
Args:
b1: bounding box. The coordinates of the each bounding box in boxes are
encoded as [x_min, y_min, x_max, y_max].
b2: the other bounding box. The coordinates of the each bounding box
in boxes are encoded as [y_min, x_min, y_max, x_max].
mode: one of ['giou', 'iou'], decided to calculate GIoU or IoU loss.
Returns:
GIoU loss float `Tensor`.
"""
zero = tf.convert_to_tensor(0.0, b1.dtype)
# b1_ymin, b1_xmin, b1_ymax, b1_xmax = tf.unstack(b1, 4, axis=-1)
# b2_ymin, b2_xmin, b2_ymax, b2_xmax = tf.unstack(b2, 4, axis=-1)
b1_xmin, b1_ymin, b1_xmax, b1_ymax = tf.unstack(b1, 4, axis=-1)
b2_xmin, b2_ymin, b2_xmax, b2_ymax = tf.unstack(b2, 4, axis=-1)
b1_width = tf.maximum(zero, b1_xmax - b1_xmin)
b1_height = tf.maximum(zero, b1_ymax - b1_ymin)
b2_width = tf.maximum(zero, b2_xmax - b2_xmin)
b2_height = tf.maximum(zero, b2_ymax - b2_ymin)
b1_area = b1_width * b1_height
b2_area = b2_width * b2_height
intersect_ymin = tf.maximum(b1_ymin, b2_ymin)
intersect_xmin = tf.maximum(b1_xmin, b2_xmin)
intersect_ymax = tf.minimum(b1_ymax, b2_ymax)
intersect_xmax = tf.minimum(b1_xmax, b2_xmax)
intersect_width = tf.maximum(zero, intersect_xmax - intersect_xmin)
intersect_height = tf.maximum(zero, intersect_ymax - intersect_ymin)
intersect_area = intersect_width * intersect_height
union_area = b1_area + b2_area - intersect_area
iou = tf.math.divide(intersect_area, union_area+1e-5)
if mode == "iou":
return iou
enclose_ymin = tf.minimum(b1_ymin, b2_ymin)
enclose_xmin = tf.minimum(b1_xmin, b2_xmin)
enclose_ymax = tf.maximum(b1_ymax, b2_ymax)
enclose_xmax = tf.maximum(b1_xmax, b2_xmax)
enclose_width = tf.maximum(zero, enclose_xmax - enclose_xmin)
enclose_height = tf.maximum(zero, enclose_ymax - enclose_ymin)
enclose_area = enclose_width * enclose_height
giou = iou - tf.math.divide((enclose_area - union_area), enclose_area+1e-5)
return giou
def giou_loss(bbox1, bbox2):
return 1 - _calculate_giou(bbox1,bbox2)
def convertcorners(reg):
b1_xmin, b1_ymin, b1_xmax, b1_ymax = tf.unstack(reg, 4, axis=-1)
return tf.stack([-b1_xmin, -b1_ymin, b1_xmax, b1_ymax], axis=-1)
def giou(y_true, y_pred):
location_state = y_true[:, :, -1]
indices = tf.where(K.equal(location_state, 1))
if tf.size(indices) == 0:
return tf.constant(0.0)
y_regr_pred = tf.gather_nd(y_pred, indices)
y_true = tf.gather_nd(y_true, indices)
y_regr_true = y_true[:, :4]
y_centerness_true = y_true[:, 4]
losses = giou_loss(convertcorners(y_regr_pred[:,:4]), convertcorners(y_regr_true))
losses = tf.reduce_sum(losses * y_centerness_true) / (tf.reduce_sum(y_centerness_true) + 1e-8)
return losses
def bce():
def bce_(y_true, y_pred):
location_state = y_true[:, :, -1]
indices = tf.where(K.equal(location_state, 1))
if tf.size(indices) == 0:
return tf.constant(0.0)
y_centerness_pred = tf.gather_nd(y_pred, indices)
y_true = tf.gather_nd(y_true, indices)
y_centerness_true = y_true[:, 0:1]
loss = K.switch(tf.size(y_centerness_true) > 0,
K.binary_crossentropy(target=y_centerness_true, output=y_centerness_pred),
tf.constant(0.0))
loss = K.mean(loss)
return loss
return bce_