- Move test scripts to tests/ directory for better organization - Add improved YOLOv5 postprocessing with reference implementation - Update gitignore to exclude *.mflow files and include main.spec - Add debug capabilities and coordinate scaling improvements - Enhance multi-series support with proper validation - Add AGENTS.md documentation and example utilities 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
147 lines
6.6 KiB
Python
147 lines
6.6 KiB
Python
import numpy as np
|
|
|
|
# Constants based on Kneron example_utils implementation
|
|
YOLO_V3_CELL_BOX_NUM = 3
|
|
YOLO_V5_ANCHORS = np.array([
|
|
[[10, 13], [16, 30], [33, 23]],
|
|
[[30, 61], [62, 45], [59, 119]],
|
|
[[116, 90], [156, 198], [373, 326]]
|
|
])
|
|
NMS_THRESH_YOLOV5 = 0.5
|
|
YOLO_MAX_DETECTION_PER_CLASS = 100
|
|
|
|
|
|
def _sigmoid(x):
|
|
return 1.0 / (1.0 + np.exp(-x))
|
|
|
|
|
|
def _iou(box_src, boxes_dst):
|
|
max_x1 = np.maximum(box_src[0], boxes_dst[:, 0])
|
|
max_y1 = np.maximum(box_src[1], boxes_dst[:, 1])
|
|
min_x2 = np.minimum(box_src[2], boxes_dst[:, 2])
|
|
min_y2 = np.minimum(box_src[3], boxes_dst[:, 3])
|
|
|
|
area_intersection = np.maximum(0, (min_x2 - max_x1)) * np.maximum(0, (min_y2 - max_y1))
|
|
area_src = (box_src[2] - box_src[0]) * (box_src[3] - box_src[1])
|
|
area_dst = (boxes_dst[:, 2] - boxes_dst[:, 0]) * (boxes_dst[:, 1] - boxes_dst[:, 1] + (boxes_dst[:, 3] - boxes_dst[:, 1]))
|
|
# Correct dst area computation
|
|
area_dst = (boxes_dst[:, 2] - boxes_dst[:, 0]) * (boxes_dst[:, 3] - boxes_dst[:, 1])
|
|
area_union = area_src + area_dst - area_intersection
|
|
iou = area_intersection / np.maximum(area_union, 1e-6)
|
|
return iou
|
|
|
|
|
|
def _boxes_scale(boxes, hw):
|
|
"""Rollback padding and scale to original image size using HwPreProcInfo."""
|
|
ratio_w = hw.img_width / max(1, float(getattr(hw, 'resized_img_width', hw.img_width)))
|
|
ratio_h = hw.img_height / max(1, float(getattr(hw, 'resized_img_height', hw.img_height)))
|
|
|
|
pad_left = int(getattr(hw, 'pad_left', 0))
|
|
pad_top = int(getattr(hw, 'pad_top', 0))
|
|
|
|
boxes[..., :4] = boxes[..., :4] - np.array([pad_left, pad_top, pad_left, pad_top])
|
|
boxes[..., :4] = boxes[..., :4] * np.array([ratio_w, ratio_h, ratio_w, ratio_h])
|
|
return boxes
|
|
|
|
|
|
def post_process_yolo_v5_reference(inf_list, hw_preproc_info, thresh_value=0.5):
|
|
"""
|
|
Reference YOLOv5 postprocess copied and adapted from Kneron example_utils.
|
|
|
|
Args:
|
|
inf_list: list of outputs; each item has .ndarray or is ndarray of shape [1, 255, H, W]
|
|
hw_preproc_info: kp.HwPreProcInfo providing model input and resize/pad info
|
|
thresh_value: confidence threshold (0.0~1.0)
|
|
|
|
Returns:
|
|
List of tuples: (x1, y1, x2, y2, score, class_num)
|
|
"""
|
|
feature_map_list = []
|
|
candidate_boxes_list = []
|
|
|
|
for i in range(len(inf_list)):
|
|
arr = inf_list[i].ndarray if hasattr(inf_list[i], 'ndarray') else inf_list[i]
|
|
# Expect shape [1, 255, H, W]
|
|
anchor_offset = int(arr.shape[1] / YOLO_V3_CELL_BOX_NUM)
|
|
feature_map = arr.transpose((0, 2, 3, 1))
|
|
feature_map = _sigmoid(feature_map)
|
|
feature_map = feature_map.reshape((feature_map.shape[0],
|
|
feature_map.shape[1],
|
|
feature_map.shape[2],
|
|
YOLO_V3_CELL_BOX_NUM,
|
|
anchor_offset))
|
|
|
|
# ratio based on model input vs output grid size
|
|
ratio_w = float(getattr(hw_preproc_info, 'model_input_width', arr.shape[3])) / arr.shape[3]
|
|
ratio_h = float(getattr(hw_preproc_info, 'model_input_height', arr.shape[2])) / arr.shape[2]
|
|
nrows = arr.shape[2]
|
|
ncols = arr.shape[3]
|
|
grids = np.expand_dims(np.stack(np.meshgrid(np.arange(ncols), np.arange(nrows)), 2), axis=0)
|
|
|
|
for anchor_idx in range(YOLO_V3_CELL_BOX_NUM):
|
|
feature_map[..., anchor_idx, 0:2] = (feature_map[..., anchor_idx, 0:2] * 2. - 0.5 + grids) * np.array(
|
|
[ratio_h, ratio_w])
|
|
feature_map[..., anchor_idx, 2:4] = (feature_map[..., anchor_idx, 2:4] * 2) ** 2 * YOLO_V5_ANCHORS[i][anchor_idx]
|
|
|
|
# Convert to (x1,y1,x2,y2)
|
|
feature_map[..., anchor_idx, 0:2] = feature_map[..., anchor_idx, 0:2] - (feature_map[..., anchor_idx, 2:4] / 2.)
|
|
feature_map[..., anchor_idx, 2:4] = feature_map[..., anchor_idx, 0:2] + feature_map[..., anchor_idx, 2:4]
|
|
|
|
# Rollback padding and resize to original img size
|
|
feature_map = _boxes_scale(boxes=feature_map, hw=hw_preproc_info)
|
|
feature_map_list.append(feature_map)
|
|
|
|
# Concatenate and apply objectness * class prob
|
|
predict_bboxes = np.concatenate(
|
|
[np.reshape(fm, (-1, fm.shape[-1])) for fm in feature_map_list], axis=0)
|
|
predict_bboxes[..., 5:] = np.repeat(predict_bboxes[..., 4][..., np.newaxis],
|
|
predict_bboxes[..., 5:].shape[1], axis=1) * predict_bboxes[..., 5:]
|
|
predict_bboxes_mask = (predict_bboxes[..., 5:] > thresh_value).sum(axis=1)
|
|
predict_bboxes = predict_bboxes[predict_bboxes_mask >= 1]
|
|
|
|
# Per-class NMS
|
|
H = int(getattr(hw_preproc_info, 'img_height', 0))
|
|
W = int(getattr(hw_preproc_info, 'img_width', 0))
|
|
|
|
for class_idx in range(5, predict_bboxes.shape[1]):
|
|
candidate_boxes_mask = predict_bboxes[..., class_idx] > thresh_value
|
|
class_good_box_count = int(candidate_boxes_mask.sum())
|
|
if class_good_box_count == 1:
|
|
bb = predict_bboxes[candidate_boxes_mask][0]
|
|
candidate_boxes_list.append((
|
|
int(max(0, min(bb[0] + 0.5, W - 1))),
|
|
int(max(0, min(bb[1] + 0.5, H - 1))),
|
|
int(max(0, min(bb[2] + 0.5, W - 1))),
|
|
int(max(0, min(bb[3] + 0.5, H - 1))),
|
|
float(bb[class_idx]),
|
|
class_idx - 5
|
|
))
|
|
elif class_good_box_count > 1:
|
|
candidate_boxes = predict_bboxes[candidate_boxes_mask].copy()
|
|
candidate_boxes = candidate_boxes[candidate_boxes[:, class_idx].argsort()][::-1]
|
|
|
|
for candidate_box_idx in range(candidate_boxes.shape[0] - 1):
|
|
if candidate_boxes[candidate_box_idx][class_idx] != 0:
|
|
ious = _iou(candidate_boxes[candidate_box_idx], candidate_boxes[candidate_box_idx + 1:])
|
|
remove_mask = ious > NMS_THRESH_YOLOV5
|
|
candidate_boxes[candidate_box_idx + 1:][remove_mask, class_idx] = 0
|
|
|
|
good_count = 0
|
|
for candidate_box_idx in range(candidate_boxes.shape[0]):
|
|
if candidate_boxes[candidate_box_idx, class_idx] > 0:
|
|
bb = candidate_boxes[candidate_box_idx]
|
|
candidate_boxes_list.append((
|
|
int(max(0, min(bb[0] + 0.5, W - 1))),
|
|
int(max(0, min(bb[1] + 0.5, H - 1))),
|
|
int(max(0, min(bb[2] + 0.5, W - 1))),
|
|
int(max(0, min(bb[3] + 0.5, H - 1))),
|
|
float(bb[class_idx]),
|
|
class_idx - 5
|
|
))
|
|
good_count += 1
|
|
if good_count == YOLO_MAX_DETECTION_PER_CLASS:
|
|
break
|
|
|
|
return candidate_boxes_list
|
|
|