63 lines
2.6 KiB
Python
63 lines
2.6 KiB
Python
import numpy as np
|
|
'''
|
|
Get maximum predictions from output heatmaps
|
|
:param network output heatmaps. Input size is (1, 64, 48, 17)
|
|
:return: 17 x2 kpts, 17 max values
|
|
'''
|
|
def _get_max_preds(heatmaps):
|
|
N, W, H, K = heatmaps.shape
|
|
preds = np.zeros((N,K,2))
|
|
maxvals = np.zeros((N,K,2))
|
|
for n in range(N):
|
|
for k in range(K):
|
|
heatmap = heatmaps[n,:,:,k]
|
|
h_idx = np.unravel_index(np.argmax(heatmap),(W,H)) #is reverse of original actually
|
|
max_val = np.amax(heatmap)
|
|
maxvals[n] =[max_val,max_val]
|
|
preds[n][k][0],preds[n][k][1] = h_idx[1],h_idx[0]
|
|
preds = np.where(maxvals > 0.0, preds, -1)
|
|
return preds, maxvals
|
|
'''
|
|
Convert heatmap coords and scale back to original image
|
|
:param coords. 17 x 2 heatmap coords
|
|
:param center preprocessing scales for transforming heatmap coords back to original image coords
|
|
:param scale preprocessing bbox centers for transforming heatmap coords back to original image coords
|
|
:param output_size model heatmap output size
|
|
:return: 17 x 2 kpts
|
|
'''
|
|
def transform_preds(coords, center, scale, output_size):
|
|
scale_x = scale[0] / output_size[0]
|
|
scale_y = scale[1] / output_size[1]
|
|
target_coords = np.ones_like(coords)
|
|
target_coords[:, 0] = coords[:, 0] * scale_x + center[0] - scale[0] * 0.5
|
|
target_coords[:, 1] = coords[:, 1] * scale_y + center[1] - scale[1] * 0.5
|
|
|
|
return target_coords
|
|
'''
|
|
Convert network output heatmaps of dimension output shape to 17 x 2 kpts. Will also scale back to original image size
|
|
:param outputs outputs from network of shape. size is (1, 1, 64, 48, 17)
|
|
:param centers preprocessing scales for transforming heatmap coords back to original image coords
|
|
:param scales preprpcessing bbox centers for transforming heatmap coords back to original image coords
|
|
:return: 17 x 2 kpts as flattened list
|
|
'''
|
|
def postprocess_(outputs,centers,scales,**kwargs):
|
|
output= outputs[0]
|
|
N,H,W,K = output.shape
|
|
preds, _ = _get_max_preds(output)
|
|
# add +/-0.25 shift to the predicted locations for higher acc.
|
|
for n in range(N):
|
|
for k in range(K):
|
|
heatmap = output[n,:,:,k]
|
|
px = int(preds[n][k][0])
|
|
py = int(preds[n][k][1])
|
|
if 1 < px < W - 1 and 1 < py < H - 1:
|
|
diff = np.array([
|
|
heatmap[py][px + 1] - heatmap[py][px - 1],
|
|
heatmap[py + 1][px] - heatmap[py - 1][px]
|
|
])
|
|
preds[n][k] += np.sign(diff) * .25
|
|
# Transform back to the image
|
|
for i in range(N):
|
|
preds[i] = transform_preds(
|
|
preds[i], centers[i], scales[i], [W, H])
|
|
return preds[0].flatten().tolist() |