import time import numpy as np def xywh2xyxy(x): # reference : https://github.com/ultralytics/yolov5/blob/master/utils/general.py # maintainer : doris # function : Convert boxes from [xc, yc, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right, (xc,yc) - center of bbox # input : # x : 2D array with shape(n, 4) dtype(float64). The boxes [xc, yc, w, h] # output : # y : 2D array with shape(n, 4) dtype(float64). The boxes [x1, y1, x2, y2] y = np.zeros_like(x) y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y return y def same_score_selecting_biggest_bbox_area(boxes, scores, areas): # maintainer : doris # function : Boxes with the same score, the larger the area, the higher the front. # input : # boxes : 2D array with shape(n, 4) dtype(float64). The boxes [x1, y1, x2, y2] already sorted by scores. where xy1=top-left, xy2=bottom-right. # scores : 1D array with shape(n) dtype(float64). The scores corresponds to boxes. # areas : 1D array with shape(n) dtype(float64). The areas corresponds to boxes. # output : # boxes : 2D array with shape(n, 4) dtype(float64). The boxes [x1, y1, x2, y2] already sorted by scores and areas. # scores : 1D array with shape(n) dtype(float64). The scores corresponds to boxes. # areas : 1D array with shape(n) dtype(float64). The areas corresponds to boxes. from collections import Counter # 'same_scores' = The score value that appears more than once in the 'scores' same_scores = [item for item, count in Counter(scores).items() if count > 1] # 'index' : the current index of bboxes index = np.arange(scores.shape[0]) # No boxes with the same score if len(same_scores)==0: return boxes, scores, areas, index for ii, score_i in enumerate(same_scores): # 'inds_ii' = Find the index of the bbox with the same score value 'score_i' inds_ii = np.where(scores == score_i)[0] # 'inds_ii_min' = The first index of the bbox with the score value 'score_i' inds_ii_min = min(inds_ii) # 'areas_ii' = The areas of the bbox with the score value 'score_i' areas_ii = areas[inds_ii] # 'ind_Sort_by_area' = The index of sorting area of bbox with the score value 'score_i' ind_Sort_by_area = np.argsort(areas_ii)[::-1] # 'ind_Sort_by_area' = The actual index are in the 'boxes' ind_Sort_by_area += inds_ii_min ll = len(ind_Sort_by_area) # boxes with the score value 'score_i' are sorted by the areas. boxes[inds_ii_min:inds_ii_min+ll] = boxes[ind_Sort_by_area] scores[inds_ii_min:inds_ii_min+ll] = scores[ind_Sort_by_area] areas[inds_ii_min:inds_ii_min+ll] = areas[ind_Sort_by_area] index[inds_ii_min:inds_ii_min+ll] = index[ind_Sort_by_area] return boxes, scores, areas, index def nms(boxes, scores, thresh): # reference : https://github.com/rbgirshick/fast-rcnn/blob/master/lib/utils/nms.py # maintainer : doris # function : Remove the bboxes B if IOU(A,B) > thresh and score(A) > score(B) # input : # boxes : 2D array with shape(n, 4) dtype(float64). The boxes [x1, y1, x2, y2] already sorted by scores. where xy1=top-left, xy2=bottom-right. # scores : 1D array with shape(n) dtype(float64). The scores corresponds to boxes. # thresh : constant float. The threshold of the IOU(intersection over union). # output : # keep : 1D array with shape(m) dtype(int64). The indexes of the remained boxes. x1 = boxes[:, 0] y1 = boxes[:, 1] x2 = boxes[:, 2] y2 = boxes[:, 3] # 'areas' = (width of bbox) * (height of bbox) areas = (x2 - x1 + 1) * (y2 - y1 + 1) # Warning: Using same_score_selecting_biggest_bbox_area function would slow speed. # The reason of using same_score_selecting_biggest_bbox_area function is for bit-true between system and firmware. # 'boxes' with the same score, the larger the area, the higher the front. boxes, scores, areas, index = same_score_selecting_biggest_bbox_area(boxes, scores, areas) # update x1, y1, x2, y2 x1 = boxes[:, 0] y1 = boxes[:, 1] x2 = boxes[:, 2] y2 = boxes[:, 3] # 'order' = the index of the 'scores' order = np.arange(0, scores.shape[0]) keep = [] while order.size > 0: # 'i' = the index of the current bbox_i i = order[0] keep.append(i) # Calculate the overlap between the current bbox_i and the bbox which index is greater than 'i'. # 'xx1' with shape(n-i). the top-left-x1 of the overlap = max( top-left-x1 of current bbox_i, top-left-x1 of the bbox which index is greater than_'i') xx1 = np.maximum(x1[i], x1[order[1:]]) # 'yy1' with shape(n-i). the top-left-y1 of the overlap = max( top-left-y1 of current bbox_i, top-left-y1 of the bbox which index is greater than_'i') yy1 = np.maximum(y1[i], y1[order[1:]]) # 'xx2' with shape(n-i). the bottom-right-x2 of the overlap = min( bottom-right-x2 of current bbox_i, bottom-right-x2 of the bbox which index is greater than_'i') xx2 = np.minimum(x2[i], x2[order[1:]]) # 'yy2' with shape(n-i). the bottom-right-y2 of the overlap = min( bottom-right-y2 of current bbox_i, bottom-right-y2 of the bbox which index is greater than_'i') yy2 = np.minimum(y2[i], y2[order[1:]]) # 'w' : width of the overlap must be greater 0.0 w = np.maximum(0.0, xx2 - xx1 + 1) # 'h' : height of the overlap must be greater 0.0 h = np.maximum(0.0, yy2 - yy1 + 1) # 'inter' : the area of the overlap with shape(n-i) = (width of the overlap) * (height of the overlap) inter = w * h # 'ovr' : IOU(intersection over union) with shape(n-i) = the area of the overlap / the area of the union ovr = inter / (areas[i] + areas[order[1:]] - inter) # 'inds' : The indexes of the bbox which IOU are less than or equal to 'thresh' inds = np.where(ovr <= thresh)[0] # 'order' : The keeper bboxes go to the next iteration. order = order[inds + 1] # 'keep' : The indexes of the remained boxes. keep = np.asarray(keep) return keep, index def non_max_suppression(prediction, conf_thres=0.1, iou_thres=0.6, top_k_num=3000, agnostic=False, only_person_class=False): # reference : https://github.com/ultralytics/yolov5/blob/master/utils/general.py # maintainer : doris # function : # (1) keep the bbox which 'obj_conf' is greater than 'conf_thres' # (2) compute 'conf' of all classes = obj_conf * cls_conf # (3) convert boxes from [xc, yc, w, h] to [x1, y1, x2, y2] # (4) keep the class which 'conf' is greater than 'conf_thres' # (5) keep the 'top_k_num' bboxes # (6) nms : Remove the bboxes B if IOU(A,B) > thresh and score(A) > score(B) # input : # prediction : 3D array with shape(bs, 'n', 'no') where 'n'= total number of anchors, 'no'= number of outputs per anchor [x1, y1, x2, y2, conf, cls], ex: 'no'=85(COCO) # conf_thres : constant float. The threshold of the confidence. # iou_thres : constant float. The threshold of the IOU(intersection over union). # top_k_num : constant integer. The number of the top k # agnostic : boolean: True, if all classes of bboxes have a nms. False, if 'N' classes of bboxes have 'N' nms. # only_person_class : # output : # output : List of the boxes [x1, y1, x2, y2, score, class_id] # 'nc' : number of classes nc = prediction[0].shape[1] - 5 xc = prediction[..., 4] > conf_thres # candidates # Settings max_wh = 4096 # (pixels) maximum box width and height max_det = 300 # maximum number of detections per image time_limit = 10.0 # seconds to quit after multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img) t = time.time() output = [None] * prediction.shape[0] for xi, x in enumerate(prediction): # image index, image inference # Apply constraints x = x[xc[xi]] # confidence # If none remain process next image if not x.shape[0]: continue # Compute conf x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf # Box (center x, center y, width, height) to (x1, y1, x2, y2) box = xywh2xyxy(x[:, :4]) # Detections matrix nx6 (xyxy, conf, cls) if only_person_class: n = x.shape[0] # number of boxes conf = x[:, 5].reshape(-1,1) j = np.zeros(n).reshape(-1,1) x = np.concatenate((box, conf, j.astype(float)), axis=1)[conf.reshape(-1) > conf_thres] else: if multi_label: i, j = (x[:, 5:] > conf_thres).nonzero() i, j = i.T, j.T x = np.concatenate((box[i], x[i, j + 5, None], j[:, None].astype(float)), axis=1) else: # best class only conf = np.max(x[:, 5:],axis=1, keepdims=True) j = np.argmax(x[:, 5:],axis=1).reshape(-1,1) x = np.concatenate((box, conf, j.astype(float)), axis=1)[conf.reshape(-1) > conf_thres] # If none remain process next image n = x.shape[0] # number of boxes if not n: continue # Batched NMS c = x[:, 5:6] * (0 if agnostic else max_wh) # classes boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores # # Sort by confidence ind_Sort_by_confidence = np.argsort(x[:, 4])[::-1] boxes = boxes[ind_Sort_by_confidence][:top_k_num] # scores = scores[ind_Sort_by_confidence][:top_k_num] # x = x[ind_Sort_by_confidence][:top_k_num] # i, index = nms(boxes, scores, iou_thres) x = x[index] if i.shape[0] > max_det: # limit detections i = i[:max_det] output[xi] = x[i] if (time.time() - t) > time_limit: break # time limit exceeded return output def clip_coords(boxes, img_shape, rectangle): # reference : https://github.com/ultralytics/yolov5/blob/master/utils/general.py # maintainer : doris # function : Clip bboxe (x1,y1,x2,y2) to (0, 0, image_width, image_height). where xy1=top-left, xy2=bottom-right. # input : # boxes : 2D array with shape(n, 4) dtype(float64). The boxes [x1, y1, x2, y2] # img_shape : tuple : (image_height, image_width, 3) if len(rectangle)==4: boxes[:, 0] = np.clip(boxes[:, 0], a_min=rectangle[0], a_max=rectangle[0]+rectangle[2]) # x1 should be 0.0 <= x1 <= image_width boxes[:, 1] = np.clip(boxes[:, 1], a_min=rectangle[1], a_max=rectangle[1]+rectangle[3]) # y1 should be 0.0 <= y1 <= image_height boxes[:, 2] = np.clip(boxes[:, 2], a_min=rectangle[0], a_max=rectangle[0]+rectangle[2]) # x2 should be 0.0 <= x2 <= image_width boxes[:, 3] = np.clip(boxes[:, 3], a_min=rectangle[1], a_max=rectangle[1]+rectangle[3]) # y2 should be 0.0 <= y2 <= image_height else: boxes[:, 0] = np.clip(boxes[:, 0], a_min=0, a_max=img_shape[1]) # x1 should be 0.0 <= x1 <= image_width boxes[:, 1] = np.clip(boxes[:, 1], a_min=0, a_max=img_shape[0]) # y1 should be 0.0 <= y1 <= image_height boxes[:, 2] = np.clip(boxes[:, 2], a_min=0, a_max=img_shape[1]) # x2 should be 0.0 <= x2 <= image_width boxes[:, 3] = np.clip(boxes[:, 3], a_min=0, a_max=img_shape[0]) # y2 should be 0.0 <= y2 <= image_height def scale_coords(padding, scale, rectangle, coords, img0_shape): # reference : http://59.125.118.185:8088/jenna/kneron_globalconstant/-/blob/master/base/preprocess.py # function : mapping for postprocess # (x-padding[0])*scale[0]+rectangle[0] # (y-padding[2])*scale[1]+rectangle[1] # input : # padding : tuple : (left, right, top, bottom) # scale : tuple : (scale_w, scale_h) # rectangle : tuple : (left, top, width, height) # coords : 2D array with shape(n, 4) dtype(float64). The coords [x1, y1, x2, y2] in the img1 # img0_shape : tuple : (image_h, image_w) # output : # coords : 2D array with shape(n, 4) dtype(float64). The coords [x1, y1, x2, y2] in the img0 if rectangle is None: rectangle = (0.0, 0.0) # (x1 -padding[0]) * scale[0] + rectangle[0] coords[:, 0] = (coords[:, 0] - padding[0]) * scale[0] + rectangle[0] # (x2 -padding[0]) * scale[0] + rectangle[0] coords[:, 2] = (coords[:, 2] - padding[0]) * scale[0] + rectangle[0] # (y1 -padding[2]) * scale[1] + rectangle[1] coords[:, 1] = (coords[:, 1] - padding[2]) * scale[1] + rectangle[1] # (y2 -padding[2]) * scale[1] + rectangle[1] coords[:, 3] = (coords[:, 3] - padding[2]) * scale[1] + rectangle[1] # Clip coords (x1,y1,x2,y2) to (0, 0, image_width, image_height). where xy1=top-left, xy2=bottom-right. clip_coords(coords, img0_shape, rectangle) # for coords_i in coords: # w = (coords_i[2]-coords_i[0]) # h = (coords_i[3]-coords_i[1]) # if w<=0 or h<=0: # raise ValueError('w',w,'h',h) return coords def Yolov5_postprocess(model_id, pred, im0_shape, padding, scale, rectangle, conf_thres, iou_thres, top_k_num, vanish_point, filter_large_box_ratio=1.0, agnostic=False) : # maintainer : doris # function : # (1) non_max_suppression : keep the 'top_k_num' bboxes and remove the bboxes B if IOU(A,B) > thresh and score(A) > score(B) # (2) remove the bbox which y2 is less than('vanish_point' * img_h) # (3) remove the bbox which (w,h) are greater than 'filter_large_box_ratio' * (img_w, img_h) # (4) classes_mapping : 'model_id' and 'detection_map' define the classes_mapping. # input : # model_id : string # pred: 3D array with shape(bs, 'n', 'no') where 'n'= total number of anchors, 'no'= number of outputs per anchor [x1, y1, x2, y2, conf, cls], ex: 'no'=85(COCO) # padding : tuple : (left, right, top, bottom) # scale : tuple : (scale_w, scale_h) # rectangle : tuple : (left, top, width, height) # im0_shape : tuple : (image_h, image_w) # conf_thres : constant float. The threshold of the confidence. # iou_thres : constant float. The threshold of the IOU(intersection over union). # top_k_num : constant integer. The number of the top k # vanish_point : constant float. The y2 of bbox should be greater than (vanish_point * img_h) # filter_large_box_ratio : constant float. The (width, height) of bbox should be less than filter_large_box_ratio * (img_w, img_h) # agnostic : boolean: True, if all classes of bboxes have a nms. False, if 'N' classes of bboxes have 'N' nms. # output : # dets : List of bboxes[x1, y1, w, h, score, class_id]. where xy1=top-left img_h, img_w = im0_shape[:2] # The y2 of bbox should be greater than (vanish_point * img_h), to remove the bbox floating in the air. vanish_y2 = vanish_point * float(img_h) # The (width, height) of bbox should be less than filter_large_box_ratio * (img_w, img_h), to remove the bbox occupying the entire image. filter_large_box_h = filter_large_box_ratio * float(img_h) filter_large_box_w = filter_large_box_ratio * float(img_w) # remove plate bbox if model_id in ['237'] : pred = pred[:,:,:-1] # Apply NMS pred = non_max_suppression(pred, conf_thres, iou_thres, top_k_num, agnostic) dets = [] for i, det in enumerate(pred): # detections per image if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = np.around(scale_coords(padding, scale, rectangle, det[:, :4], im0_shape)) # filter bbox which has y2 < vanish_y2 det = det[det[:,3]>=vanish_y2] # (x1,y1,x2,y2) -> (x1,y1,w,h) for public_field.py det[:, 2] = det[:, 2] - det[:, 0] det[:, 3] = det[:, 3] - det[:, 1] # filter bbox which has w > filter_large_box_w det = det[det[:,2]<=filter_large_box_w] # filter bbox which has h > filter_large_box_h det = det[det[:,3]<=filter_large_box_h] det[:, 5] = det[:, 5] + 1.0 dets.append(det) if dets and len(dets) > 0: dets = np.asarray(dets) dets = np.squeeze(dets, axis=0) # remove outer [] dets = dets.tolist() # dets : List of bboxes[x1, y1, w, h, score, class_id]. where xy1=top-left return dets def postprocess_(out, h_ori, w_ori, padding, scale, rectangle, model_id, conf_thres, iou_thres, top_k_num, grids, num_classes, anchors, vanish_point, filter_large_box_ratio, agnostic, **kwargs) : # reference : https://github.com/ultralytics/yolov5/blob/master/models/yolo.py # maintainer : doris # function : # (1) manipulate the shape of the model output # (2) calculate the bboxes (xc,yc,w,h) where (xc,yc) - center of bbox # (3) Yolov5_postprocess: non_max_suppression # (4) Yolov5_postprocess: remove the bbox which y2 is less than('vanish_point' * img_h) # (5) Yolov5_postprocess: remove the bbox which (w,h) are greater than 'filter_large_box_ratio' * (img_w, img_h) # (6) Yolov5_postprocess: 'model_id' and 'detection_map' define the classes_mapping # # input : # out : list of 4D array with shape('bs', 'ny', 'nx', 3 * 'no') dtype(float32) # ex:(1, 80, 80, 255) # where 'bs' : batch size # where 'no' : number of outputs per anchor [x1, y1, x2, y2, conf, cls] # where 'ny' and 'nx' are the height and width of the feature map # input_shape : List. [moldel_input_h, moldel_input_w] # h_ori : Integer. Height of the image # w_ori : Integer. Width of the image # padding : tuple : (left, right, top, bottom) # scale : tuple : (scale_w, scale_h) # rectangle : tuple : (left, top, width, height) # model_id : string # conf_thres : constant float. The threshold of the confidence. # iou_thres : constant float. The threshold of the IOU(intersection over union). # top_k_num : constant integer. The number of the top k # grids : list of 5D array with shape(1, 1, ny, nx, 2) dtype(float32) # ex:(1, 1, 80, 80, 2) # num_classes : number of classes # anchors : [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]] # [layer1[anchor1_h,anchor1_w, anchor2_h,anchor2_w, anchor3_h,anchor3_w], # layer2[anchor1_h,anchor1_w, anchor2_h,anchor2_w, anchor3_h,anchor3_w], # layer3[anchor1_h,anchor1_w, anchor2_h,anchor2_w, anchor3_h,anchor3_w] ] # vanish_point : constant float. The y2 of bbox >= (vanish_point * img_h) # filter_large_box_ratio : constant float. The (width, height) of bbox <= filter_large_box_ratio * (img_w, img_h) # agnostic : boolean: True, if all classes of bboxes have a nms. False, if 'N' classes of bboxes have 'N' nms. # output : # dets : List of bboxes[x1, y1, w, h, score, class_id]. where xy1=top-left # print('scale',scale) im0_shape = (h_ori, w_ori) # 'nc' : number of classes, ex: 'nc'=80(COCO) nc = num_classes # 'no' : number of outputs per anchor [x1, y1, x2, y2, conf, cls], ex: 'no'=85(COCO) no = nc + 5 # 'nl' : number of detection layers, ex: 'nl'=3 nl = len(anchors) # 'na' : number of anchors per layer, ex: 'na'=3 na = len(anchors[0]) // 2 # 'a' : 3D array with shape('nl', 'na', 2) # ex: (3, 3, 2) a = np.asarray(anchors).astype(float).reshape(3, -1, 2) # 'anchor_grid' : 6D array with shape('nl', 1, 'na', 1, 1, 2) # ex: (3, 1, 3, 1, 1, 2) anchor_grid = a.reshape(3, 1, -1, 1, 1, 2) stride = np.asarray([ 8., 16., 32.]) z = [] for i in range(nl): # 'i' : index of the detection layers # out[i] : 4D array with shape(bs, 'ny', 'nx', 'na' * 'no') # ex:(1, 80, 80, 255) # 'x': 4D array with shape(bs, 255, 'ny', 'nx') x = out[i].transpose([0, 3, 1, 2]) bs, _, ny, nx = x.shape # 'x': 5D array with shape(bs, 'na', 'ny', 'nx', 'no') # ex:(1, 3, 80, 80, 85) x = x.reshape((bs, na, no, ny, nx)).transpose([0, 1, 3, 4, 2]) # 'grid': 5D array with shape(1, 1, ny, nx, 2) grid = grids[i] # xc,yc x[..., 0:2] = (x[..., 0:2] * 2. - 0.5 + grid) * stride[i] # w,h x[..., 2:4] = (x[..., 2:4] * 2) ** 2 * anchor_grid[i] # z[i]: 3D array with shape(bs, 'n_i', 'no') # ex:(1, 19200, 85), ex:(1, 4800, 85), ex:(1, 1200, 85) # where 'n_i'= number of anchors in layer_i = 'na'*'ny'*'nx' z.append(x.reshape(bs, -1, no)) # pred: 3D array with shape(bs, 'n', 'no') where 'n'= total number of anchors in three layers = 'n_0'+ 'n_1' + 'n_2' # ex:(1, 25200, 85) pred = np.concatenate(z, axis=1) # dets : List of bboxes[x1, y1, w, h, score, class_id]. where xy1=top-left dets = Yolov5_postprocess(model_id, pred, im0_shape, padding, scale, rectangle, conf_thres, iou_thres, top_k_num, vanish_point, filter_large_box_ratio, agnostic) return dets