Yolov5s/ai_training/regression/litehrnet/yolov5/yolov5_postprocess.py

import time
import numpy as np

def xywh2xyxy(x):
    # reference : https://github.com/ultralytics/yolov5/blob/master/utils/general.py
    # maintainer : doris
    # function : Convert boxes from [xc, yc, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right, (xc,yc) - center of bbox
    # input :
    #   x : 2D array with shape(n, 4) dtype(float64). The boxes [xc, yc, w, h]
    # output :
    #   y : 2D array with shape(n, 4) dtype(float64). The boxes [x1, y1, x2, y2]
    y = np.zeros_like(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
    return y


def same_score_selecting_biggest_bbox_area(boxes, scores, areas):
    # maintainer : doris
    # function : Boxes with the same score, the larger the area, the higher the front.
    # input :
    #   boxes : 2D array with shape(n, 4) dtype(float64). The boxes [x1, y1, x2, y2] already sorted by scores. where xy1=top-left, xy2=bottom-right.
    #   scores : 1D array with shape(n) dtype(float64). The scores corresponds to boxes.
    #   areas : 1D array with shape(n) dtype(float64). The areas corresponds to boxes.
    # output :
    #   boxes : 2D array with shape(n, 4) dtype(float64). The boxes [x1, y1, x2, y2] already sorted by scores and areas.
    #   scores : 1D array with shape(n) dtype(float64). The scores corresponds to boxes.
    #   areas : 1D array with shape(n) dtype(float64). The areas corresponds to boxes.
    from collections import Counter

    # 'same_scores' = The score value that appears more than once in the 'scores'
    same_scores = [item for item, count in Counter(scores).items() if count > 1]

    # 'index' : the current index of bboxes
    index = np.arange(scores.shape[0])

    # No boxes with the same score
    if len(same_scores)==0:
        return boxes, scores, areas, index

    for ii, score_i in enumerate(same_scores):
        # 'inds_ii' = Find the index of the bbox with the same score value 'score_i'
        inds_ii = np.where(scores == score_i)[0]

        # 'inds_ii_min' = The first index of the bbox with the score value 'score_i'
        inds_ii_min = min(inds_ii)

        # 'areas_ii' = The areas of the bbox with the score value 'score_i'
        areas_ii = areas[inds_ii]

        # 'ind_Sort_by_area' = The index of sorting area of bbox with the score value 'score_i'
        ind_Sort_by_area = np.argsort(areas_ii)[::-1]

        # 'ind_Sort_by_area' = The actual index are in the 'boxes'
        ind_Sort_by_area += inds_ii_min
        ll = len(ind_Sort_by_area)

        # boxes with the score value 'score_i' are sorted by the areas.
        boxes[inds_ii_min:inds_ii_min+ll] = boxes[ind_Sort_by_area]
        scores[inds_ii_min:inds_ii_min+ll]  = scores[ind_Sort_by_area]
        areas[inds_ii_min:inds_ii_min+ll]  = areas[ind_Sort_by_area]
        index[inds_ii_min:inds_ii_min+ll]  = index[ind_Sort_by_area]
    return boxes, scores, areas, index


def nms(boxes, scores, thresh):
    # reference : https://github.com/rbgirshick/fast-rcnn/blob/master/lib/utils/nms.py
    # maintainer : doris
    # function : Remove the bboxes B if IOU(A,B) > thresh and score(A) > score(B)
    # input :
    #   boxes : 2D array with shape(n, 4) dtype(float64). The boxes [x1, y1, x2, y2] already sorted by scores. where xy1=top-left, xy2=bottom-right.
    #   scores : 1D array with shape(n) dtype(float64). The scores corresponds to boxes.
    #   thresh : constant float. The threshold of the IOU(intersection over union).
    # output :
    #   keep : 1D array with shape(m) dtype(int64). The indexes of the remained boxes.

    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]

    # 'areas' = (width of bbox) * (height of bbox)
    areas = (x2 - x1 + 1) * (y2 - y1 + 1)

    # Warning: Using same_score_selecting_biggest_bbox_area function would slow speed.
    # The reason of using same_score_selecting_biggest_bbox_area function is for bit-true between system and firmware.
    # 'boxes' with the same score, the larger the area, the higher the front.
    boxes, scores, areas, index = same_score_selecting_biggest_bbox_area(boxes, scores, areas)

    # update x1, y1, x2, y2
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]

    # 'order' = the index of the 'scores'
    order = np.arange(0, scores.shape[0])

    keep = []
    while order.size > 0:
        # 'i' = the index of the current bbox_i
        i = order[0]
        keep.append(i)

        # Calculate the overlap between the current bbox_i and the bbox which index is greater than 'i'.
        # 'xx1' with shape(n-i). the top-left-x1 of the overlap = max( top-left-x1 of current bbox_i, top-left-x1 of the bbox which index is greater than_'i')
        xx1 = np.maximum(x1[i], x1[order[1:]])
        # 'yy1' with shape(n-i). the top-left-y1 of the overlap = max( top-left-y1 of current bbox_i, top-left-y1 of the bbox which index is greater than_'i')
        yy1 = np.maximum(y1[i], y1[order[1:]])
        # 'xx2' with shape(n-i). the bottom-right-x2 of the overlap = min( bottom-right-x2 of current bbox_i, bottom-right-x2 of the bbox which index is greater than_'i')
        xx2 = np.minimum(x2[i], x2[order[1:]])
        # 'yy2' with shape(n-i). the bottom-right-y2 of the overlap = min( bottom-right-y2 of current bbox_i, bottom-right-y2 of the bbox which index is greater than_'i')
        yy2 = np.minimum(y2[i], y2[order[1:]])

        # 'w' : width of the overlap must be greater 0.0
        w = np.maximum(0.0, xx2 - xx1 + 1)
        # 'h' : height of the overlap must be greater 0.0
        h = np.maximum(0.0, yy2 - yy1 + 1)

        # 'inter' : the area of the overlap with shape(n-i) = (width of the overlap) * (height of the overlap)
        inter = w * h

        # 'ovr' : IOU(intersection over union) with shape(n-i) = the area of the overlap / the area of the union
        ovr = inter / (areas[i] + areas[order[1:]] - inter)

        # 'inds' : The indexes of the bbox which IOU are less than or equal to 'thresh'
        inds = np.where(ovr <= thresh)[0]

        # 'order' : The keeper bboxes go to the next iteration.
        order = order[inds + 1]

    # 'keep' : The indexes of the remained boxes.
    keep = np.asarray(keep)
    return keep, index


def non_max_suppression(prediction, conf_thres=0.1, iou_thres=0.6, top_k_num=3000, agnostic=False, only_person_class=False):
    # reference : https://github.com/ultralytics/yolov5/blob/master/utils/general.py
    # maintainer : doris
    # function :
    #   (1) keep the bbox which 'obj_conf' is greater than 'conf_thres'
    #   (2) compute 'conf' of all classes = obj_conf * cls_conf
    #   (3) convert boxes from [xc, yc, w, h] to [x1, y1, x2, y2]
    #   (4) keep the class which 'conf' is greater than 'conf_thres'
    #   (5) keep the 'top_k_num' bboxes
    #   (6) nms : Remove the bboxes B if IOU(A,B) > thresh and score(A) > score(B)
    # input :
    #   prediction : 3D array with shape(bs, 'n', 'no') where 'n'= total number of anchors, 'no'= number of outputs per anchor [x1, y1, x2, y2, conf, cls], ex: 'no'=85(COCO)
    #   conf_thres : constant float. The threshold of the confidence.
    #   iou_thres : constant float. The threshold of the IOU(intersection over union).
    #   top_k_num : constant integer. The number of the top k
    #   agnostic : boolean: True, if all classes of bboxes have a nms. False, if 'N' classes of bboxes have 'N' nms.
    #   only_person_class :
    # output :
    #   output : List of the boxes [x1, y1, x2, y2, score, class_id]

    # 'nc' : number of classes
    nc = prediction[0].shape[1] - 5
    xc = prediction[..., 4] > conf_thres  # candidates

    # Settings
    max_wh = 4096  # (pixels) maximum box width and height
    max_det = 300  # maximum number of detections per image
    time_limit = 10.0  # seconds to quit after
    multi_label = nc > 1  # multiple labels per box (adds 0.5ms/img)

    t = time.time()
    output = [None] * prediction.shape[0]
    for xi, x in enumerate(prediction):  # image index, image inference
        # Apply constraints
        x = x[xc[xi]]  # confidence

        # If none remain process next image
        if not x.shape[0]:
            continue

        # Compute conf
        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf

        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
        box = xywh2xyxy(x[:, :4])

        # Detections matrix nx6 (xyxy, conf, cls)
        if only_person_class:
            n = x.shape[0]  # number of boxes
            conf = x[:, 5].reshape(-1,1)
            j = np.zeros(n).reshape(-1,1)
            x = np.concatenate((box, conf, j.astype(float)), axis=1)[conf.reshape(-1) > conf_thres]
        else:
            if multi_label:
                i, j = (x[:, 5:] > conf_thres).nonzero()
                i, j = i.T, j.T
                x = np.concatenate((box[i], x[i, j + 5, None], j[:, None].astype(float)), axis=1)
            else:  # best class only
                conf = np.max(x[:, 5:],axis=1, keepdims=True)
                j = np.argmax(x[:, 5:],axis=1).reshape(-1,1)
                x = np.concatenate((box, conf, j.astype(float)), axis=1)[conf.reshape(-1) > conf_thres]

        # If none remain process next image
        n = x.shape[0]  # number of boxes
        if not n:
            continue

        # Batched NMS
        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
        boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
        # # Sort by confidence
        ind_Sort_by_confidence = np.argsort(x[:, 4])[::-1]
        boxes = boxes[ind_Sort_by_confidence][:top_k_num] #
        scores = scores[ind_Sort_by_confidence][:top_k_num] #
        x = x[ind_Sort_by_confidence][:top_k_num] #
        i, index = nms(boxes, scores, iou_thres)
        x = x[index]
        if i.shape[0] > max_det:  # limit detections
            i = i[:max_det]
        output[xi] = x[i]
        if (time.time() - t) > time_limit:
            break  # time limit exceeded
    return output


def clip_coords(boxes, img_shape, rectangle):
    # reference : https://github.com/ultralytics/yolov5/blob/master/utils/general.py
    # maintainer : doris
    # function : Clip bboxe (x1,y1,x2,y2) to (0, 0, image_width, image_height). where xy1=top-left, xy2=bottom-right.
    # input :
    #   boxes : 2D array with shape(n, 4) dtype(float64). The boxes [x1, y1, x2, y2]
    #   img_shape : tuple : (image_height, image_width, 3)
    if len(rectangle)==4:
        boxes[:, 0] = np.clip(boxes[:, 0], a_min=rectangle[0], a_max=rectangle[0]+rectangle[2])  # x1 should be 0.0 <= x1 <= image_width
        boxes[:, 1] = np.clip(boxes[:, 1], a_min=rectangle[1], a_max=rectangle[1]+rectangle[3])  # y1 should be 0.0 <= y1 <= image_height
        boxes[:, 2] = np.clip(boxes[:, 2], a_min=rectangle[0], a_max=rectangle[0]+rectangle[2])  # x2 should be 0.0 <= x2 <= image_width
        boxes[:, 3] = np.clip(boxes[:, 3], a_min=rectangle[1], a_max=rectangle[1]+rectangle[3])  # y2 should be 0.0 <= y2 <= image_height
    else:
        boxes[:, 0] = np.clip(boxes[:, 0], a_min=0, a_max=img_shape[1])  # x1 should be 0.0 <= x1 <= image_width
        boxes[:, 1] = np.clip(boxes[:, 1], a_min=0, a_max=img_shape[0])  # y1 should be 0.0 <= y1 <= image_height
        boxes[:, 2] = np.clip(boxes[:, 2], a_min=0, a_max=img_shape[1])  # x2 should be 0.0 <= x2 <= image_width
        boxes[:, 3] = np.clip(boxes[:, 3], a_min=0, a_max=img_shape[0])  # y2 should be 0.0 <= y2 <= image_height


def scale_coords(padding, scale, rectangle, coords, img0_shape):
    # reference : http://59.125.118.185:8088/jenna/kneron_globalconstant/-/blob/master/base/preprocess.py
    # function : mapping for postprocess
    # (x-padding[0])*scale[0]+rectangle[0]
    # (y-padding[2])*scale[1]+rectangle[1]
    # input :
    #   padding : tuple : (left, right, top, bottom)
    #   scale : tuple : (scale_w, scale_h)
    #   rectangle : tuple : (left, top, width, height)
    #   coords : 2D array with shape(n, 4) dtype(float64). The coords [x1, y1, x2, y2] in the img1
    #   img0_shape : tuple : (image_h, image_w)
    # output :
    #   coords : 2D array with shape(n, 4) dtype(float64). The coords [x1, y1, x2, y2] in the img0

    if rectangle is None: rectangle = (0.0, 0.0)

    # (x1 -padding[0]) * scale[0] + rectangle[0]
    coords[:, 0] = (coords[:, 0] - padding[0]) * scale[0] + rectangle[0]

    # (x2 -padding[0]) * scale[0] + rectangle[0]
    coords[:, 2] = (coords[:, 2] - padding[0]) * scale[0] + rectangle[0]

    # (y1 -padding[2]) * scale[1] + rectangle[1]
    coords[:, 1] = (coords[:, 1] - padding[2]) * scale[1] + rectangle[1]

    # (y2 -padding[2]) * scale[1] + rectangle[1]
    coords[:, 3] = (coords[:, 3] - padding[2]) * scale[1] + rectangle[1]


    # Clip coords (x1,y1,x2,y2) to (0, 0, image_width, image_height). where xy1=top-left, xy2=bottom-right.
    clip_coords(coords, img0_shape, rectangle)
    # for coords_i in coords:
    #     w = (coords_i[2]-coords_i[0])
    #     h = (coords_i[3]-coords_i[1])
    #     if w<=0 or h<=0:
    #         raise ValueError('w',w,'h',h)
    return coords

def Yolov5_postprocess(model_id, pred, im0_shape, padding, scale, rectangle, conf_thres, iou_thres, top_k_num, vanish_point, filter_large_box_ratio=1.0, agnostic=False) :
    # maintainer : doris
    # function :
    #   (1) non_max_suppression : keep the 'top_k_num' bboxes and remove the bboxes B if IOU(A,B) > thresh and score(A) > score(B)
    #   (2) remove the bbox which y2 is less than('vanish_point' * img_h)
    #   (3) remove the bbox which (w,h) are greater than 'filter_large_box_ratio' * (img_w, img_h)
    #   (4) classes_mapping : 'model_id' and 'detection_map' define the classes_mapping.
    # input :
    #   model_id : string
    #   pred: 3D array with shape(bs, 'n', 'no') where 'n'= total number of anchors, 'no'= number of outputs per anchor [x1, y1, x2, y2, conf, cls], ex: 'no'=85(COCO)
    #   padding : tuple : (left, right, top, bottom)
    #   scale : tuple : (scale_w, scale_h)
    #   rectangle : tuple : (left, top, width, height)
    #   im0_shape : tuple : (image_h, image_w)
    #   conf_thres : constant float. The threshold of the confidence.
    #   iou_thres : constant float. The threshold of the IOU(intersection over union).
    #   top_k_num : constant integer. The number of the top k
    #   vanish_point : constant float. The y2 of bbox should be greater than (vanish_point * img_h)
    #   filter_large_box_ratio : constant float. The (width, height) of bbox should be less than filter_large_box_ratio * (img_w, img_h)
    #   agnostic : boolean: True, if all classes of bboxes have a nms. False, if 'N' classes of bboxes have 'N' nms.
    # output :
    #   dets : List of bboxes[x1, y1, w, h, score, class_id]. where xy1=top-left
    img_h, img_w = im0_shape[:2]

    # The y2 of bbox should be greater than (vanish_point * img_h), to remove the bbox floating in the air.
    vanish_y2 = vanish_point * float(img_h)

    # The (width, height) of bbox should be less than filter_large_box_ratio * (img_w, img_h), to remove the bbox occupying the entire image.
    filter_large_box_h = filter_large_box_ratio * float(img_h)
    filter_large_box_w = filter_large_box_ratio * float(img_w)

    # remove plate bbox
    if model_id in ['237'] :
        pred = pred[:,:,:-1]
    # Apply NMS
    pred = non_max_suppression(pred, conf_thres, iou_thres, top_k_num, agnostic)
    dets = []
    for i, det in enumerate(pred):  # detections per image
        if det is not None and len(det):
            # Rescale boxes from img_size to im0 size
            det[:, :4] = np.around(scale_coords(padding, scale, rectangle, det[:, :4], im0_shape))
            # filter bbox which has y2 < vanish_y2
            det = det[det[:,3]>=vanish_y2]
            # (x1,y1,x2,y2) -> (x1,y1,w,h) for public_field.py
            det[:, 2] = det[:, 2] - det[:, 0]
            det[:, 3] = det[:, 3] - det[:, 1]
            # filter bbox which has w > filter_large_box_w
            det = det[det[:,2]<=filter_large_box_w]
            # filter bbox which has h > filter_large_box_h
            det = det[det[:,3]<=filter_large_box_h]
            det[:, 5] = det[:, 5] + 1.0
            dets.append(det)
    if dets and len(dets) > 0:
        dets = np.asarray(dets)
        dets = np.squeeze(dets, axis=0) # remove outer []
        dets = dets.tolist()

    # dets : List of bboxes[x1, y1, w, h, score, class_id]. where xy1=top-left
    return dets

def postprocess_(out, h_ori, w_ori, padding, scale, rectangle, model_id, conf_thres, iou_thres, top_k_num, grids, num_classes, anchors, vanish_point, filter_large_box_ratio, agnostic, **kwargs) :
    # reference : https://github.com/ultralytics/yolov5/blob/master/models/yolo.py
    # maintainer : doris
    # function :
    #   (1) manipulate the shape of the model output
    #   (2) calculate the bboxes (xc,yc,w,h) where (xc,yc) - center of bbox
    #   (3) Yolov5_postprocess: non_max_suppression
    #   (4) Yolov5_postprocess: remove the bbox which y2 is less than('vanish_point' * img_h)
    #   (5) Yolov5_postprocess: remove the bbox which (w,h) are greater than 'filter_large_box_ratio' * (img_w, img_h)
    #   (6) Yolov5_postprocess: 'model_id' and 'detection_map' define the classes_mapping
    #
    # input :
    #   out : list of 4D array with shape('bs', 'ny', 'nx', 3 * 'no') dtype(float32) # ex:(1, 80, 80, 255)
    #         where 'bs' : batch size
    #         where 'no' : number of outputs per anchor [x1, y1, x2, y2, conf, cls]
    #         where 'ny' and 'nx' are the height and width of the feature map
    #   input_shape : List. [moldel_input_h, moldel_input_w]
    #   h_ori : Integer. Height of the image
    #   w_ori : Integer. Width of the image
    #   padding : tuple : (left, right, top, bottom)
    #   scale : tuple : (scale_w, scale_h)
    #   rectangle : tuple : (left, top, width, height)
    #   model_id : string
    #   conf_thres : constant float. The threshold of the confidence.
    #   iou_thres : constant float. The threshold of the IOU(intersection over union).
    #   top_k_num : constant integer. The number of the top k
    #   grids : list of 5D array with shape(1, 1, ny, nx, 2) dtype(float32) # ex:(1, 1, 80, 80, 2)
    #   num_classes : number of classes
    #   anchors : [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
    #             [layer1[anchor1_h,anchor1_w, anchor2_h,anchor2_w, anchor3_h,anchor3_w],
    #              layer2[anchor1_h,anchor1_w, anchor2_h,anchor2_w, anchor3_h,anchor3_w],
    #              layer3[anchor1_h,anchor1_w, anchor2_h,anchor2_w, anchor3_h,anchor3_w] ]
    #   vanish_point : constant float. The y2 of bbox >= (vanish_point * img_h)
    #   filter_large_box_ratio : constant float. The (width, height) of bbox <= filter_large_box_ratio * (img_w, img_h)
    #   agnostic : boolean: True, if all classes of bboxes have a nms. False, if 'N' classes of bboxes have 'N' nms.
    # output :
    #   dets : List of bboxes[x1, y1, w, h, score, class_id]. where xy1=top-left
    # print('scale',scale)
    im0_shape = (h_ori, w_ori)

    # 'nc' : number of classes, ex: 'nc'=80(COCO)
    nc = num_classes

    # 'no' :  number of outputs per anchor [x1, y1, x2, y2, conf, cls], ex: 'no'=85(COCO)
    no = nc + 5

    # 'nl' : number of detection layers, ex: 'nl'=3
    nl = len(anchors)

    # 'na' : number of anchors per layer, ex: 'na'=3
    na = len(anchors[0]) // 2

    # 'a' : 3D array with shape('nl', 'na', 2) # ex: (3, 3, 2)
    a = np.asarray(anchors).astype(float).reshape(3, -1, 2)

    # 'anchor_grid' : 6D array with shape('nl', 1, 'na', 1, 1, 2) # ex: (3, 1, 3, 1, 1, 2)
    anchor_grid = a.reshape(3, 1, -1, 1, 1, 2)

    stride = np.asarray([ 8., 16., 32.])
    z = []
    for i in range(nl):
        # 'i' : index of the detection layers
        # out[i] : 4D array with shape(bs, 'ny', 'nx', 'na' * 'no') # ex:(1, 80, 80, 255)
        # 'x': 4D array with shape(bs, 255, 'ny', 'nx')
        x = out[i].transpose([0, 3, 1, 2])
        bs, _, ny, nx = x.shape

        # 'x': 5D array with shape(bs, 'na', 'ny', 'nx', 'no') # ex:(1, 3, 80, 80, 85)
        x = x.reshape((bs, na, no, ny, nx)).transpose([0, 1, 3, 4, 2])

        # 'grid': 5D array with shape(1, 1, ny, nx, 2)
        grid = grids[i]

        # xc,yc
        x[..., 0:2] = (x[..., 0:2] * 2. - 0.5 + grid) * stride[i]

        # w,h
        x[..., 2:4] = (x[..., 2:4] * 2) ** 2 * anchor_grid[i]

        # z[i]: 3D array with shape(bs, 'n_i', 'no') # ex:(1, 19200, 85), ex:(1, 4800, 85), ex:(1, 1200, 85)
        #       where 'n_i'= number of anchors in layer_i = 'na'*'ny'*'nx'
        z.append(x.reshape(bs, -1, no))

    # pred: 3D array with shape(bs, 'n', 'no') where 'n'= total number of anchors in three layers = 'n_0'+ 'n_1' + 'n_2' # ex:(1, 25200, 85)
    pred = np.concatenate(z, axis=1)

    # dets : List of bboxes[x1, y1, w, h, score, class_id]. where xy1=top-left
    dets = Yolov5_postprocess(model_id, pred, im0_shape, padding, scale, rectangle, conf_thres, iou_thres, top_k_num, vanish_point, filter_large_box_ratio, agnostic)
    return dets