206 lines
7.8 KiB
Python
206 lines
7.8 KiB
Python
"""
|
|
Copyright 2017-2018 Fizyr (https://fizyr.com)
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
"""
|
|
|
|
import tensorflow as tf
|
|
import keras.backend as K
|
|
|
|
|
|
def focal(alpha=0.25, gamma=2.0):
|
|
"""
|
|
Create a functor for computing the focal loss.
|
|
|
|
Args
|
|
alpha: Scale the focal weight with alpha.
|
|
gamma: Take the power of the focal weight with gamma.
|
|
|
|
Returns
|
|
A functor that computes the focal loss using the alpha and gamma.
|
|
"""
|
|
|
|
def _focal(y_true, y_pred):
|
|
"""
|
|
Compute the focal loss given the target tensor and the predicted tensor.
|
|
|
|
As defined in https://arxiv.org/abs/1708.02002
|
|
|
|
Args
|
|
y_true: Tensor of target data from the generator with shape (B, N, num_classes).
|
|
y_pred: Tensor of predicted data from the network with shape (B, N, num_classes).
|
|
|
|
Returns
|
|
The focal loss of y_pred w.r.t. y_true.
|
|
"""
|
|
# compute the focal loss
|
|
location_state = y_true[:, :, -1]
|
|
labels = y_true[:, :, :-1]
|
|
alpha_factor = K.ones_like(labels) * alpha
|
|
alpha_factor = tf.where(K.equal(labels, 1), alpha_factor, 1 - alpha_factor)
|
|
# (1 - 0.99) ** 2 = 1e-4, (1 - 0.9) ** 2 = 1e-2
|
|
focal_weight = tf.where(K.equal(labels, 1), 1 - y_pred, y_pred)
|
|
focal_weight = alpha_factor * focal_weight ** gamma
|
|
cls_loss = focal_weight * K.binary_crossentropy(labels, y_pred)
|
|
|
|
# compute the normalizer: the number of positive anchors
|
|
normalizer = tf.where(K.equal(location_state, 1))
|
|
normalizer = K.cast(K.shape(normalizer)[0], K.floatx())
|
|
normalizer = K.maximum(K.cast_to_floatx(1.0), normalizer)
|
|
|
|
return K.sum(cls_loss) / normalizer
|
|
|
|
return _focal
|
|
# def focal(alpha=2, beta=4):
|
|
# def _focal(hm_true, hm_pred):
|
|
# location_state = hm_true[:, :, -1]
|
|
# hm_true = hm_true[:, :, :-1]
|
|
#
|
|
# pos_mask = tf.cast(tf.equal(hm_true, 1), tf.float32)
|
|
# neg_mask = tf.cast(tf.less(hm_true, 1), tf.float32)
|
|
# neg_weights = tf.pow(1 - hm_true, beta)
|
|
#
|
|
# pos_loss = -tf.log(tf.clip_by_value(hm_pred, 1e-4, 1. - 1e-4)) * tf.pow(1 - hm_pred, alpha) * pos_mask
|
|
# neg_loss = -tf.log(tf.clip_by_value(1 - hm_pred, 1e-4, 1. - 1e-4)) * tf.pow(hm_pred, alpha) * neg_weights * neg_mask
|
|
#
|
|
# num_pos = tf.reduce_sum(pos_mask)
|
|
# pos_loss = tf.reduce_sum(pos_loss)
|
|
# neg_loss = tf.reduce_sum(neg_loss)
|
|
#
|
|
# cls_loss = tf.cond(tf.greater(num_pos, 0), lambda: (pos_loss + neg_loss) / num_pos, lambda: neg_loss)
|
|
# return cls_loss
|
|
# return _focal
|
|
|
|
def iou():
|
|
def iou_(y_true, y_pred):
|
|
location_state = y_true[:, :, -1]
|
|
indices = tf.where(K.equal(location_state, 1))
|
|
if tf.size(indices) == 0:
|
|
return tf.constant(0.0)
|
|
y_regr_pred = tf.gather_nd(y_pred, indices)
|
|
y_true = tf.gather_nd(y_true, indices)
|
|
y_regr_true = y_true[:, :4]
|
|
y_centerness_true = y_true[:, 4]
|
|
|
|
# (num_pos, )
|
|
pred_left = y_regr_pred[:, 0]
|
|
pred_top = y_regr_pred[:, 1]
|
|
pred_right = y_regr_pred[:, 2]
|
|
pred_bottom = y_regr_pred[:, 3]
|
|
|
|
# (num_pos, )
|
|
target_left = y_regr_true[:, 0]
|
|
target_top = y_regr_true[:, 1]
|
|
target_right = y_regr_true[:, 2]
|
|
target_bottom = y_regr_true[:, 3]
|
|
|
|
target_area = (target_left + target_right) * (target_top + target_bottom)
|
|
pred_area = (pred_left + pred_right) * (pred_top + pred_bottom)
|
|
w_intersect = tf.minimum(pred_left, target_left) + tf.minimum(pred_right, target_right)
|
|
h_intersect = tf.minimum(pred_bottom, target_bottom) + tf.minimum(pred_top, target_top)
|
|
|
|
area_intersect = w_intersect * h_intersect
|
|
area_union = target_area + pred_area - area_intersect
|
|
|
|
# (num_pos, )
|
|
losses = -tf.log((area_intersect + 1.0) / (area_union + 1.0))
|
|
losses = tf.reduce_sum(losses * y_centerness_true) / (tf.reduce_sum(y_centerness_true) + 1e-8)
|
|
return losses
|
|
|
|
return iou_
|
|
|
|
def _calculate_giou(b1, b2, mode='giou'):
|
|
"""
|
|
Args:
|
|
b1: bounding box. The coordinates of the each bounding box in boxes are
|
|
encoded as [x_min, y_min, x_max, y_max].
|
|
b2: the other bounding box. The coordinates of the each bounding box
|
|
in boxes are encoded as [y_min, x_min, y_max, x_max].
|
|
mode: one of ['giou', 'iou'], decided to calculate GIoU or IoU loss.
|
|
Returns:
|
|
GIoU loss float `Tensor`.
|
|
"""
|
|
zero = tf.convert_to_tensor(0.0, b1.dtype)
|
|
# b1_ymin, b1_xmin, b1_ymax, b1_xmax = tf.unstack(b1, 4, axis=-1)
|
|
# b2_ymin, b2_xmin, b2_ymax, b2_xmax = tf.unstack(b2, 4, axis=-1)
|
|
b1_xmin, b1_ymin, b1_xmax, b1_ymax = tf.unstack(b1, 4, axis=-1)
|
|
b2_xmin, b2_ymin, b2_xmax, b2_ymax = tf.unstack(b2, 4, axis=-1)
|
|
|
|
b1_width = tf.maximum(zero, b1_xmax - b1_xmin)
|
|
b1_height = tf.maximum(zero, b1_ymax - b1_ymin)
|
|
b2_width = tf.maximum(zero, b2_xmax - b2_xmin)
|
|
b2_height = tf.maximum(zero, b2_ymax - b2_ymin)
|
|
b1_area = b1_width * b1_height
|
|
b2_area = b2_width * b2_height
|
|
|
|
intersect_ymin = tf.maximum(b1_ymin, b2_ymin)
|
|
intersect_xmin = tf.maximum(b1_xmin, b2_xmin)
|
|
intersect_ymax = tf.minimum(b1_ymax, b2_ymax)
|
|
intersect_xmax = tf.minimum(b1_xmax, b2_xmax)
|
|
intersect_width = tf.maximum(zero, intersect_xmax - intersect_xmin)
|
|
intersect_height = tf.maximum(zero, intersect_ymax - intersect_ymin)
|
|
intersect_area = intersect_width * intersect_height
|
|
|
|
union_area = b1_area + b2_area - intersect_area
|
|
iou = tf.math.divide(intersect_area, union_area+1e-5)
|
|
if mode == "iou":
|
|
return iou
|
|
|
|
enclose_ymin = tf.minimum(b1_ymin, b2_ymin)
|
|
enclose_xmin = tf.minimum(b1_xmin, b2_xmin)
|
|
enclose_ymax = tf.maximum(b1_ymax, b2_ymax)
|
|
enclose_xmax = tf.maximum(b1_xmax, b2_xmax)
|
|
enclose_width = tf.maximum(zero, enclose_xmax - enclose_xmin)
|
|
enclose_height = tf.maximum(zero, enclose_ymax - enclose_ymin)
|
|
enclose_area = enclose_width * enclose_height
|
|
giou = iou - tf.math.divide((enclose_area - union_area), enclose_area+1e-5)
|
|
return giou
|
|
|
|
def giou_loss(bbox1, bbox2):
|
|
return 1 - _calculate_giou(bbox1,bbox2)
|
|
|
|
def convertcorners(reg):
|
|
b1_xmin, b1_ymin, b1_xmax, b1_ymax = tf.unstack(reg, 4, axis=-1)
|
|
|
|
return tf.stack([-b1_xmin, -b1_ymin, b1_xmax, b1_ymax], axis=-1)
|
|
def giou(y_true, y_pred):
|
|
location_state = y_true[:, :, -1]
|
|
indices = tf.where(K.equal(location_state, 1))
|
|
if tf.size(indices) == 0:
|
|
return tf.constant(0.0)
|
|
y_regr_pred = tf.gather_nd(y_pred, indices)
|
|
y_true = tf.gather_nd(y_true, indices)
|
|
y_regr_true = y_true[:, :4]
|
|
y_centerness_true = y_true[:, 4]
|
|
losses = giou_loss(convertcorners(y_regr_pred[:,:4]), convertcorners(y_regr_true))
|
|
losses = tf.reduce_sum(losses * y_centerness_true) / (tf.reduce_sum(y_centerness_true) + 1e-8)
|
|
return losses
|
|
|
|
|
|
def bce():
|
|
def bce_(y_true, y_pred):
|
|
location_state = y_true[:, :, -1]
|
|
indices = tf.where(K.equal(location_state, 1))
|
|
if tf.size(indices) == 0:
|
|
return tf.constant(0.0)
|
|
y_centerness_pred = tf.gather_nd(y_pred, indices)
|
|
y_true = tf.gather_nd(y_true, indices)
|
|
y_centerness_true = y_true[:, 0:1]
|
|
loss = K.switch(tf.size(y_centerness_true) > 0,
|
|
K.binary_crossentropy(target=y_centerness_true, output=y_centerness_pred),
|
|
tf.constant(0.0))
|
|
loss = K.mean(loss)
|
|
return loss
|
|
|
|
return bce_
|