430 lines
21 KiB
Python

import time
import numpy as np
def xywh2xyxy(x):
# reference : https://github.com/ultralytics/yolov5/blob/master/utils/general.py
# maintainer : doris
# function : Convert boxes from [xc, yc, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right, (xc,yc) - center of bbox
# input :
# x : 2D array with shape(n, 4) dtype(float64). The boxes [xc, yc, w, h]
# output :
# y : 2D array with shape(n, 4) dtype(float64). The boxes [x1, y1, x2, y2]
y = np.zeros_like(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
return y
def same_score_selecting_biggest_bbox_area(boxes, scores, areas):
# maintainer : doris
# function : Boxes with the same score, the larger the area, the higher the front.
# input :
# boxes : 2D array with shape(n, 4) dtype(float64). The boxes [x1, y1, x2, y2] already sorted by scores. where xy1=top-left, xy2=bottom-right.
# scores : 1D array with shape(n) dtype(float64). The scores corresponds to boxes.
# areas : 1D array with shape(n) dtype(float64). The areas corresponds to boxes.
# output :
# boxes : 2D array with shape(n, 4) dtype(float64). The boxes [x1, y1, x2, y2] already sorted by scores and areas.
# scores : 1D array with shape(n) dtype(float64). The scores corresponds to boxes.
# areas : 1D array with shape(n) dtype(float64). The areas corresponds to boxes.
from collections import Counter
# 'same_scores' = The score value that appears more than once in the 'scores'
same_scores = [item for item, count in Counter(scores).items() if count > 1]
# 'index' : the current index of bboxes
index = np.arange(scores.shape[0])
# No boxes with the same score
if len(same_scores)==0:
return boxes, scores, areas, index
for ii, score_i in enumerate(same_scores):
# 'inds_ii' = Find the index of the bbox with the same score value 'score_i'
inds_ii = np.where(scores == score_i)[0]
# 'inds_ii_min' = The first index of the bbox with the score value 'score_i'
inds_ii_min = min(inds_ii)
# 'areas_ii' = The areas of the bbox with the score value 'score_i'
areas_ii = areas[inds_ii]
# 'ind_Sort_by_area' = The index of sorting area of bbox with the score value 'score_i'
ind_Sort_by_area = np.argsort(areas_ii)[::-1]
# 'ind_Sort_by_area' = The actual index are in the 'boxes'
ind_Sort_by_area += inds_ii_min
ll = len(ind_Sort_by_area)
# boxes with the score value 'score_i' are sorted by the areas.
boxes[inds_ii_min:inds_ii_min+ll] = boxes[ind_Sort_by_area]
scores[inds_ii_min:inds_ii_min+ll] = scores[ind_Sort_by_area]
areas[inds_ii_min:inds_ii_min+ll] = areas[ind_Sort_by_area]
index[inds_ii_min:inds_ii_min+ll] = index[ind_Sort_by_area]
return boxes, scores, areas, index
def nms(boxes, scores, thresh):
# reference : https://github.com/rbgirshick/fast-rcnn/blob/master/lib/utils/nms.py
# maintainer : doris
# function : Remove the bboxes B if IOU(A,B) > thresh and score(A) > score(B)
# input :
# boxes : 2D array with shape(n, 4) dtype(float64). The boxes [x1, y1, x2, y2] already sorted by scores. where xy1=top-left, xy2=bottom-right.
# scores : 1D array with shape(n) dtype(float64). The scores corresponds to boxes.
# thresh : constant float. The threshold of the IOU(intersection over union).
# output :
# keep : 1D array with shape(m) dtype(int64). The indexes of the remained boxes.
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
# 'areas' = (width of bbox) * (height of bbox)
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
# Warning: Using same_score_selecting_biggest_bbox_area function would slow speed.
# The reason of using same_score_selecting_biggest_bbox_area function is for bit-true between system and firmware.
# 'boxes' with the same score, the larger the area, the higher the front.
boxes, scores, areas, index = same_score_selecting_biggest_bbox_area(boxes, scores, areas)
# update x1, y1, x2, y2
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
# 'order' = the index of the 'scores'
order = np.arange(0, scores.shape[0])
keep = []
while order.size > 0:
# 'i' = the index of the current bbox_i
i = order[0]
keep.append(i)
# Calculate the overlap between the current bbox_i and the bbox which index is greater than 'i'.
# 'xx1' with shape(n-i). the top-left-x1 of the overlap = max( top-left-x1 of current bbox_i, top-left-x1 of the bbox which index is greater than_'i')
xx1 = np.maximum(x1[i], x1[order[1:]])
# 'yy1' with shape(n-i). the top-left-y1 of the overlap = max( top-left-y1 of current bbox_i, top-left-y1 of the bbox which index is greater than_'i')
yy1 = np.maximum(y1[i], y1[order[1:]])
# 'xx2' with shape(n-i). the bottom-right-x2 of the overlap = min( bottom-right-x2 of current bbox_i, bottom-right-x2 of the bbox which index is greater than_'i')
xx2 = np.minimum(x2[i], x2[order[1:]])
# 'yy2' with shape(n-i). the bottom-right-y2 of the overlap = min( bottom-right-y2 of current bbox_i, bottom-right-y2 of the bbox which index is greater than_'i')
yy2 = np.minimum(y2[i], y2[order[1:]])
# 'w' : width of the overlap must be greater 0.0
w = np.maximum(0.0, xx2 - xx1 + 1)
# 'h' : height of the overlap must be greater 0.0
h = np.maximum(0.0, yy2 - yy1 + 1)
# 'inter' : the area of the overlap with shape(n-i) = (width of the overlap) * (height of the overlap)
inter = w * h
# 'ovr' : IOU(intersection over union) with shape(n-i) = the area of the overlap / the area of the union
ovr = inter / (areas[i] + areas[order[1:]] - inter)
# 'inds' : The indexes of the bbox which IOU are less than or equal to 'thresh'
inds = np.where(ovr <= thresh)[0]
# 'order' : The keeper bboxes go to the next iteration.
order = order[inds + 1]
# 'keep' : The indexes of the remained boxes.
keep = np.asarray(keep)
return keep, index
def non_max_suppression(prediction, conf_thres=0.1, iou_thres=0.6, top_k_num=3000, agnostic=False, only_person_class=False):
# reference : https://github.com/ultralytics/yolov5/blob/master/utils/general.py
# maintainer : doris
# function :
# (1) keep the bbox which 'obj_conf' is greater than 'conf_thres'
# (2) compute 'conf' of all classes = obj_conf * cls_conf
# (3) convert boxes from [xc, yc, w, h] to [x1, y1, x2, y2]
# (4) keep the class which 'conf' is greater than 'conf_thres'
# (5) keep the 'top_k_num' bboxes
# (6) nms : Remove the bboxes B if IOU(A,B) > thresh and score(A) > score(B)
# input :
# prediction : 3D array with shape(bs, 'n', 'no') where 'n'= total number of anchors, 'no'= number of outputs per anchor [x1, y1, x2, y2, conf, cls], ex: 'no'=85(COCO)
# conf_thres : constant float. The threshold of the confidence.
# iou_thres : constant float. The threshold of the IOU(intersection over union).
# top_k_num : constant integer. The number of the top k
# agnostic : boolean: True, if all classes of bboxes have a nms. False, if 'N' classes of bboxes have 'N' nms.
# only_person_class :
# output :
# output : List of the boxes [x1, y1, x2, y2, score, class_id]
# 'nc' : number of classes
nc = prediction[0].shape[1] - 5
xc = prediction[..., 4] > conf_thres # candidates
# Settings
max_wh = 4096 # (pixels) maximum box width and height
max_det = 300 # maximum number of detections per image
time_limit = 10.0 # seconds to quit after
multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img)
t = time.time()
output = [None] * prediction.shape[0]
for xi, x in enumerate(prediction): # image index, image inference
# Apply constraints
x = x[xc[xi]] # confidence
# If none remain process next image
if not x.shape[0]:
continue
# Compute conf
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
box = xywh2xyxy(x[:, :4])
# Detections matrix nx6 (xyxy, conf, cls)
if only_person_class:
n = x.shape[0] # number of boxes
conf = x[:, 5].reshape(-1,1)
j = np.zeros(n).reshape(-1,1)
x = np.concatenate((box, conf, j.astype(float)), axis=1)[conf.reshape(-1) > conf_thres]
else:
if multi_label:
i, j = (x[:, 5:] > conf_thres).nonzero()
i, j = i.T, j.T
x = np.concatenate((box[i], x[i, j + 5, None], j[:, None].astype(float)), axis=1)
else: # best class only
conf = np.max(x[:, 5:],axis=1, keepdims=True)
j = np.argmax(x[:, 5:],axis=1).reshape(-1,1)
x = np.concatenate((box, conf, j.astype(float)), axis=1)[conf.reshape(-1) > conf_thres]
# If none remain process next image
n = x.shape[0] # number of boxes
if not n:
continue
# Batched NMS
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
# # Sort by confidence
ind_Sort_by_confidence = np.argsort(x[:, 4])[::-1]
boxes = boxes[ind_Sort_by_confidence][:top_k_num] #
scores = scores[ind_Sort_by_confidence][:top_k_num] #
x = x[ind_Sort_by_confidence][:top_k_num] #
i, index = nms(boxes, scores, iou_thres)
x = x[index]
if i.shape[0] > max_det: # limit detections
i = i[:max_det]
output[xi] = x[i]
if (time.time() - t) > time_limit:
break # time limit exceeded
return output
def clip_coords(boxes, img_shape, rectangle):
# reference : https://github.com/ultralytics/yolov5/blob/master/utils/general.py
# maintainer : doris
# function : Clip bboxe (x1,y1,x2,y2) to (0, 0, image_width, image_height). where xy1=top-left, xy2=bottom-right.
# input :
# boxes : 2D array with shape(n, 4) dtype(float64). The boxes [x1, y1, x2, y2]
# img_shape : tuple : (image_height, image_width, 3)
if len(rectangle)==4:
boxes[:, 0] = np.clip(boxes[:, 0], a_min=rectangle[0], a_max=rectangle[0]+rectangle[2]) # x1 should be 0.0 <= x1 <= image_width
boxes[:, 1] = np.clip(boxes[:, 1], a_min=rectangle[1], a_max=rectangle[1]+rectangle[3]) # y1 should be 0.0 <= y1 <= image_height
boxes[:, 2] = np.clip(boxes[:, 2], a_min=rectangle[0], a_max=rectangle[0]+rectangle[2]) # x2 should be 0.0 <= x2 <= image_width
boxes[:, 3] = np.clip(boxes[:, 3], a_min=rectangle[1], a_max=rectangle[1]+rectangle[3]) # y2 should be 0.0 <= y2 <= image_height
else:
boxes[:, 0] = np.clip(boxes[:, 0], a_min=0, a_max=img_shape[1]) # x1 should be 0.0 <= x1 <= image_width
boxes[:, 1] = np.clip(boxes[:, 1], a_min=0, a_max=img_shape[0]) # y1 should be 0.0 <= y1 <= image_height
boxes[:, 2] = np.clip(boxes[:, 2], a_min=0, a_max=img_shape[1]) # x2 should be 0.0 <= x2 <= image_width
boxes[:, 3] = np.clip(boxes[:, 3], a_min=0, a_max=img_shape[0]) # y2 should be 0.0 <= y2 <= image_height
def scale_coords(padding, scale, rectangle, coords, img0_shape):
# reference : http://59.125.118.185:8088/jenna/kneron_globalconstant/-/blob/master/base/preprocess.py
# function : mapping for postprocess
# (x-padding[0])*scale[0]+rectangle[0]
# (y-padding[2])*scale[1]+rectangle[1]
# input :
# padding : tuple : (left, right, top, bottom)
# scale : tuple : (scale_w, scale_h)
# rectangle : tuple : (left, top, width, height)
# coords : 2D array with shape(n, 4) dtype(float64). The coords [x1, y1, x2, y2] in the img1
# img0_shape : tuple : (image_h, image_w)
# output :
# coords : 2D array with shape(n, 4) dtype(float64). The coords [x1, y1, x2, y2] in the img0
if rectangle is None: rectangle = (0.0, 0.0)
# (x1 -padding[0]) * scale[0] + rectangle[0]
coords[:, 0] = (coords[:, 0] - padding[0]) * scale[0] + rectangle[0]
# (x2 -padding[0]) * scale[0] + rectangle[0]
coords[:, 2] = (coords[:, 2] - padding[0]) * scale[0] + rectangle[0]
# (y1 -padding[2]) * scale[1] + rectangle[1]
coords[:, 1] = (coords[:, 1] - padding[2]) * scale[1] + rectangle[1]
# (y2 -padding[2]) * scale[1] + rectangle[1]
coords[:, 3] = (coords[:, 3] - padding[2]) * scale[1] + rectangle[1]
# Clip coords (x1,y1,x2,y2) to (0, 0, image_width, image_height). where xy1=top-left, xy2=bottom-right.
clip_coords(coords, img0_shape, rectangle)
# for coords_i in coords:
# w = (coords_i[2]-coords_i[0])
# h = (coords_i[3]-coords_i[1])
# if w<=0 or h<=0:
# raise ValueError('w',w,'h',h)
return coords
def Yolov5_postprocess(model_id, pred, im0_shape, padding, scale, rectangle, conf_thres, iou_thres, top_k_num, vanish_point, filter_large_box_ratio=1.0, agnostic=False) :
# maintainer : doris
# function :
# (1) non_max_suppression : keep the 'top_k_num' bboxes and remove the bboxes B if IOU(A,B) > thresh and score(A) > score(B)
# (2) remove the bbox which y2 is less than('vanish_point' * img_h)
# (3) remove the bbox which (w,h) are greater than 'filter_large_box_ratio' * (img_w, img_h)
# (4) classes_mapping : 'model_id' and 'detection_map' define the classes_mapping.
# input :
# model_id : string
# pred: 3D array with shape(bs, 'n', 'no') where 'n'= total number of anchors, 'no'= number of outputs per anchor [x1, y1, x2, y2, conf, cls], ex: 'no'=85(COCO)
# padding : tuple : (left, right, top, bottom)
# scale : tuple : (scale_w, scale_h)
# rectangle : tuple : (left, top, width, height)
# im0_shape : tuple : (image_h, image_w)
# conf_thres : constant float. The threshold of the confidence.
# iou_thres : constant float. The threshold of the IOU(intersection over union).
# top_k_num : constant integer. The number of the top k
# vanish_point : constant float. The y2 of bbox should be greater than (vanish_point * img_h)
# filter_large_box_ratio : constant float. The (width, height) of bbox should be less than filter_large_box_ratio * (img_w, img_h)
# agnostic : boolean: True, if all classes of bboxes have a nms. False, if 'N' classes of bboxes have 'N' nms.
# output :
# dets : List of bboxes[x1, y1, w, h, score, class_id]. where xy1=top-left
img_h, img_w = im0_shape[:2]
# The y2 of bbox should be greater than (vanish_point * img_h), to remove the bbox floating in the air.
vanish_y2 = vanish_point * float(img_h)
# The (width, height) of bbox should be less than filter_large_box_ratio * (img_w, img_h), to remove the bbox occupying the entire image.
filter_large_box_h = filter_large_box_ratio * float(img_h)
filter_large_box_w = filter_large_box_ratio * float(img_w)
# remove plate bbox
if model_id in ['237'] :
pred = pred[:,:,:-1]
# Apply NMS
pred = non_max_suppression(pred, conf_thres, iou_thres, top_k_num, agnostic)
dets = []
for i, det in enumerate(pred): # detections per image
if det is not None and len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = np.around(scale_coords(padding, scale, rectangle, det[:, :4], im0_shape))
# filter bbox which has y2 < vanish_y2
det = det[det[:,3]>=vanish_y2]
# (x1,y1,x2,y2) -> (x1,y1,w,h) for public_field.py
det[:, 2] = det[:, 2] - det[:, 0]
det[:, 3] = det[:, 3] - det[:, 1]
# filter bbox which has w > filter_large_box_w
det = det[det[:,2]<=filter_large_box_w]
# filter bbox which has h > filter_large_box_h
det = det[det[:,3]<=filter_large_box_h]
det[:, 5] = det[:, 5] + 1.0
dets.append(det)
if dets and len(dets) > 0:
dets = np.asarray(dets)
dets = np.squeeze(dets, axis=0) # remove outer []
dets = dets.tolist()
# dets : List of bboxes[x1, y1, w, h, score, class_id]. where xy1=top-left
return dets
def postprocess_(out, h_ori, w_ori, padding, scale, rectangle, model_id, conf_thres, iou_thres, top_k_num, grids, num_classes, anchors, vanish_point, filter_large_box_ratio, agnostic, **kwargs) :
# reference : https://github.com/ultralytics/yolov5/blob/master/models/yolo.py
# maintainer : doris
# function :
# (1) manipulate the shape of the model output
# (2) calculate the bboxes (xc,yc,w,h) where (xc,yc) - center of bbox
# (3) Yolov5_postprocess: non_max_suppression
# (4) Yolov5_postprocess: remove the bbox which y2 is less than('vanish_point' * img_h)
# (5) Yolov5_postprocess: remove the bbox which (w,h) are greater than 'filter_large_box_ratio' * (img_w, img_h)
# (6) Yolov5_postprocess: 'model_id' and 'detection_map' define the classes_mapping
#
# input :
# out : list of 4D array with shape('bs', 'ny', 'nx', 3 * 'no') dtype(float32) # ex:(1, 80, 80, 255)
# where 'bs' : batch size
# where 'no' : number of outputs per anchor [x1, y1, x2, y2, conf, cls]
# where 'ny' and 'nx' are the height and width of the feature map
# input_shape : List. [moldel_input_h, moldel_input_w]
# h_ori : Integer. Height of the image
# w_ori : Integer. Width of the image
# padding : tuple : (left, right, top, bottom)
# scale : tuple : (scale_w, scale_h)
# rectangle : tuple : (left, top, width, height)
# model_id : string
# conf_thres : constant float. The threshold of the confidence.
# iou_thres : constant float. The threshold of the IOU(intersection over union).
# top_k_num : constant integer. The number of the top k
# grids : list of 5D array with shape(1, 1, ny, nx, 2) dtype(float32) # ex:(1, 1, 80, 80, 2)
# num_classes : number of classes
# anchors : [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
# [layer1[anchor1_h,anchor1_w, anchor2_h,anchor2_w, anchor3_h,anchor3_w],
# layer2[anchor1_h,anchor1_w, anchor2_h,anchor2_w, anchor3_h,anchor3_w],
# layer3[anchor1_h,anchor1_w, anchor2_h,anchor2_w, anchor3_h,anchor3_w] ]
# vanish_point : constant float. The y2 of bbox >= (vanish_point * img_h)
# filter_large_box_ratio : constant float. The (width, height) of bbox <= filter_large_box_ratio * (img_w, img_h)
# agnostic : boolean: True, if all classes of bboxes have a nms. False, if 'N' classes of bboxes have 'N' nms.
# output :
# dets : List of bboxes[x1, y1, w, h, score, class_id]. where xy1=top-left
# print('scale',scale)
im0_shape = (h_ori, w_ori)
# 'nc' : number of classes, ex: 'nc'=80(COCO)
nc = num_classes
# 'no' : number of outputs per anchor [x1, y1, x2, y2, conf, cls], ex: 'no'=85(COCO)
no = nc + 5
# 'nl' : number of detection layers, ex: 'nl'=3
nl = len(anchors)
# 'na' : number of anchors per layer, ex: 'na'=3
na = len(anchors[0]) // 2
# 'a' : 3D array with shape('nl', 'na', 2) # ex: (3, 3, 2)
a = np.asarray(anchors).astype(float).reshape(3, -1, 2)
# 'anchor_grid' : 6D array with shape('nl', 1, 'na', 1, 1, 2) # ex: (3, 1, 3, 1, 1, 2)
anchor_grid = a.reshape(3, 1, -1, 1, 1, 2)
stride = np.asarray([ 8., 16., 32.])
z = []
for i in range(nl):
# 'i' : index of the detection layers
# out[i] : 4D array with shape(bs, 'ny', 'nx', 'na' * 'no') # ex:(1, 80, 80, 255)
# 'x': 4D array with shape(bs, 255, 'ny', 'nx')
x = out[i].transpose([0, 3, 1, 2])
bs, _, ny, nx = x.shape
# 'x': 5D array with shape(bs, 'na', 'ny', 'nx', 'no') # ex:(1, 3, 80, 80, 85)
x = x.reshape((bs, na, no, ny, nx)).transpose([0, 1, 3, 4, 2])
# 'grid': 5D array with shape(1, 1, ny, nx, 2)
grid = grids[i]
# xc,yc
x[..., 0:2] = (x[..., 0:2] * 2. - 0.5 + grid) * stride[i]
# w,h
x[..., 2:4] = (x[..., 2:4] * 2) ** 2 * anchor_grid[i]
# z[i]: 3D array with shape(bs, 'n_i', 'no') # ex:(1, 19200, 85), ex:(1, 4800, 85), ex:(1, 1200, 85)
# where 'n_i'= number of anchors in layer_i = 'na'*'ny'*'nx'
z.append(x.reshape(bs, -1, no))
# pred: 3D array with shape(bs, 'n', 'no') where 'n'= total number of anchors in three layers = 'n_0'+ 'n_1' + 'n_2' # ex:(1, 25200, 85)
pred = np.concatenate(z, axis=1)
# dets : List of bboxes[x1, y1, w, h, score, class_id]. where xy1=top-left
dets = Yolov5_postprocess(model_id, pred, im0_shape, padding, scale, rectangle, conf_thres, iou_thres, top_k_num, vanish_point, filter_large_box_ratio, agnostic)
return dets