Yolov5s/ai_training/regression/litehrnet/lite_hrnet/lite_hrnet_postprocess.py

import numpy as np
'''
Get maximum predictions from output heatmaps
:param network output heatmaps. Input size is (1, 64, 48, 17)
:return: 17 x2 kpts, 17 max values
'''
def _get_max_preds(heatmaps):
    N, W, H, K = heatmaps.shape
    preds = np.zeros((N,K,2))
    maxvals = np.zeros((N,K,2))
    for n in range(N):
        for k in range(K):
            heatmap = heatmaps[n,:,:,k]
            h_idx = np.unravel_index(np.argmax(heatmap),(W,H)) #is reverse of original actually
            max_val = np.amax(heatmap)
            maxvals[n] =[max_val,max_val]
            preds[n][k][0],preds[n][k][1] = h_idx[1],h_idx[0]
    preds = np.where(maxvals > 0.0, preds, -1)
    return preds, maxvals
'''
Convert heatmap coords and scale back to original image
:param coords. 17 x 2 heatmap coords
:param center preprocessing scales for transforming heatmap coords back to original image coords
:param scale preprocessing bbox centers for transforming heatmap coords back to original image coords
:param output_size model heatmap output size
:return: 17 x 2 kpts
'''
def transform_preds(coords, center, scale, output_size):
    scale_x = scale[0] / output_size[0]
    scale_y = scale[1] / output_size[1]
    target_coords = np.ones_like(coords)
    target_coords[:, 0] = coords[:, 0] * scale_x + center[0] - scale[0] * 0.5
    target_coords[:, 1] = coords[:, 1] * scale_y + center[1] - scale[1] * 0.5

    return target_coords
'''
Convert network output heatmaps of dimension output shape to 17 x 2 kpts. Will also scale back to original image size
:param outputs outputs from network of shape. size is (1, 1, 64, 48, 17)
:param centers preprocessing scales for transforming heatmap coords back to original image coords
:param scales preprpcessing bbox centers for transforming heatmap coords back to original image coords
:return: 17 x 2 kpts as flattened list
'''
def postprocess_(outputs,centers,scales,**kwargs):
    output= outputs[0]
    N,H,W,K = output.shape
    preds, _ = _get_max_preds(output)
    # add +/-0.25 shift to the predicted locations for higher acc.
    for n in range(N):
        for k in range(K):
            heatmap = output[n,:,:,k]
            px = int(preds[n][k][0])
            py = int(preds[n][k][1])
            if 1 < px < W - 1 and 1 < py < H - 1:
                diff = np.array([
                    heatmap[py][px + 1] - heatmap[py][px - 1],
                    heatmap[py + 1][px] - heatmap[py - 1][px]
                ])
                preds[n][k] += np.sign(diff) * .25
    # Transform back to the image
    for i in range(N):
        preds[i] = transform_preds(
            preds[i], centers[i], scales[i], [W, H])
    return preds[0].flatten().tolist()