KNEO-Academy/src/models/custom_inference_worker.py
abin 7e323cf3e1 Fix: resolve 5 bugs found during project onboarding health check
- custom_inference_worker: reuse existing device_group from DeviceController
  to avoid double kp.connect_devices() conflict on same USB port
- custom_inference_worker: add TYPE_CHECKING guard for kp type annotations
  to prevent potential NameError at import time
- utilities_screen: replace missing back_arrow.png with text arrow (←)
- utilities_screen: add set_device_controller() so AppController can inject
  MainWindow's shared DeviceController instance
- main.py: wire UtilitiesScreen to share MainWindow's DeviceController
- video_thread: emit camera_error_signal on failure and max-retry exhaustion
- media_controller: connect camera_error_signal and display error on canvas
- media_panel: fix pause button using wrong delete icon; use video_normal SVG

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-07 14:33:37 +08:00

572 lines
22 KiB
Python

"""
custom_inference_worker.py - Custom Inference Worker
This module provides a worker thread for running inference using user-uploaded
custom models. It uses YOLO V5 pre/post-processing logic for object detection.
"""
from __future__ import annotations
import os
import time
import queue
import cv2
import numpy as np
from typing import List, TYPE_CHECKING
from PyQt5.QtCore import QThread, pyqtSignal
if TYPE_CHECKING:
import kp
# COCO dataset class names (80 classes)
COCO_CLASSES = [
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat',
'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse',
'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]
class ExampleBoundingBox:
"""Bounding box descriptor."""
def __init__(self,
x1: int = 0,
y1: int = 0,
x2: int = 0,
y2: int = 0,
score: float = 0,
class_num: int = 0):
self.x1 = x1
self.y1 = y1
self.x2 = x2
self.y2 = y2
self.score = score
self.class_num = class_num
def get_member_variable_dict(self) -> dict:
return {
'x1': self.x1,
'y1': self.y1,
'x2': self.x2,
'y2': self.y2,
'score': self.score,
'class_num': self.class_num
}
class ExampleYoloResult:
"""YOLO output result descriptor."""
def __init__(self,
class_count: int = 0,
box_count: int = 0,
box_list: List[ExampleBoundingBox] = None):
self.class_count = class_count
self.box_count = box_count
self.box_list = box_list if box_list is not None else []
def get_member_variable_dict(self) -> dict:
member_variable_dict = {
'class_count': self.class_count,
'box_count': self.box_count,
'box_list': {}
}
for idx, box_element in enumerate(self.box_list):
member_variable_dict['box_list'][idx] = box_element.get_member_variable_dict()
return member_variable_dict
# YOLO constants
YOLO_V3_CELL_BOX_NUM = 3
NMS_THRESH_YOLOV5 = 0.5
YOLO_MAX_DETECTION_PER_CLASS = 100
YOLO_V5_ANCHERS = np.array([
[[10, 13], [16, 30], [33, 23]],
[[30, 61], [62, 45], [59, 119]],
[[116, 90], [156, 198], [373, 326]]
])
def _sigmoid(x):
return 1. / (1. + np.exp(-x))
def _iou(box_src, boxes_dst):
max_x1 = np.maximum(box_src[0], boxes_dst[:, 0])
max_y1 = np.maximum(box_src[1], boxes_dst[:, 1])
min_x2 = np.minimum(box_src[2], boxes_dst[:, 2])
min_y2 = np.minimum(box_src[3], boxes_dst[:, 3])
area_intersection = np.maximum(0, (min_x2 - max_x1)) * np.maximum(0, (min_y2 - max_y1))
area_src = (box_src[2] - box_src[0]) * (box_src[3] - box_src[1])
area_dst = (boxes_dst[:, 2] - boxes_dst[:, 0]) * (boxes_dst[:, 3] - boxes_dst[:, 1])
area_union = area_src + area_dst - area_intersection
iou = area_intersection / area_union
return iou
def _boxes_scale(boxes, hardware_preproc_info: kp.HwPreProcInfo):
"""Scale boxes based on hardware preprocessing info."""
ratio_w = hardware_preproc_info.img_width / hardware_preproc_info.resized_img_width
ratio_h = hardware_preproc_info.img_height / hardware_preproc_info.resized_img_height
boxes[..., :4] = boxes[..., :4] - np.array([
hardware_preproc_info.pad_left, hardware_preproc_info.pad_top,
hardware_preproc_info.pad_left, hardware_preproc_info.pad_top
])
boxes[..., :4] = boxes[..., :4] * np.array([ratio_w, ratio_h, ratio_w, ratio_h])
return boxes
def post_process_yolo_v5(inference_float_node_output_list: List[kp.InferenceFloatNodeOutput],
hardware_preproc_info: kp.HwPreProcInfo,
thresh_value: float,
with_sigmoid: bool = True) -> ExampleYoloResult:
"""YOLO V5 post-processing function."""
feature_map_list = []
candidate_boxes_list = []
for i in range(len(inference_float_node_output_list)):
anchor_offset = int(inference_float_node_output_list[i].shape[1] / YOLO_V3_CELL_BOX_NUM)
feature_map = inference_float_node_output_list[i].ndarray.transpose((0, 2, 3, 1))
feature_map = _sigmoid(feature_map) if with_sigmoid else feature_map
feature_map = feature_map.reshape((
feature_map.shape[0],
feature_map.shape[1],
feature_map.shape[2],
YOLO_V3_CELL_BOX_NUM,
anchor_offset
))
ratio_w = hardware_preproc_info.model_input_width / inference_float_node_output_list[i].shape[3]
ratio_h = hardware_preproc_info.model_input_height / inference_float_node_output_list[i].shape[2]
nrows = inference_float_node_output_list[i].shape[2]
ncols = inference_float_node_output_list[i].shape[3]
grids = np.expand_dims(np.stack(np.meshgrid(np.arange(ncols), np.arange(nrows)), 2), axis=0)
for anchor_idx in range(YOLO_V3_CELL_BOX_NUM):
feature_map[..., anchor_idx, 0:2] = (
feature_map[..., anchor_idx, 0:2] * 2. - 0.5 + grids
) * np.array([ratio_h, ratio_w])
feature_map[..., anchor_idx, 2:4] = (
feature_map[..., anchor_idx, 2:4] * 2
) ** 2 * YOLO_V5_ANCHERS[i][anchor_idx]
feature_map[..., anchor_idx, 0:2] = (
feature_map[..., anchor_idx, 0:2] - (feature_map[..., anchor_idx, 2:4] / 2.)
)
feature_map[..., anchor_idx, 2:4] = (
feature_map[..., anchor_idx, 0:2] + feature_map[..., anchor_idx, 2:4]
)
feature_map = _boxes_scale(boxes=feature_map, hardware_preproc_info=hardware_preproc_info)
feature_map_list.append(feature_map)
predict_bboxes = np.concatenate([
np.reshape(feature_map, (-1, feature_map.shape[-1])) for feature_map in feature_map_list
], axis=0)
predict_bboxes[..., 5:] = np.repeat(
predict_bboxes[..., 4][..., np.newaxis],
predict_bboxes[..., 5:].shape[1],
axis=1
) * predict_bboxes[..., 5:]
predict_bboxes_mask = (predict_bboxes[..., 5:] > thresh_value).sum(axis=1)
predict_bboxes = predict_bboxes[predict_bboxes_mask >= 1]
# NMS
for class_idx in range(5, predict_bboxes.shape[1]):
candidate_boxes_mask = predict_bboxes[..., class_idx] > thresh_value
class_good_box_count = candidate_boxes_mask.sum()
if class_good_box_count == 1:
candidate_boxes_list.append(
ExampleBoundingBox(
x1=round(float(predict_bboxes[candidate_boxes_mask, 0][0]), 4),
y1=round(float(predict_bboxes[candidate_boxes_mask, 1][0]), 4),
x2=round(float(predict_bboxes[candidate_boxes_mask, 2][0]), 4),
y2=round(float(predict_bboxes[candidate_boxes_mask, 3][0]), 4),
score=round(float(predict_bboxes[candidate_boxes_mask, class_idx][0]), 4),
class_num=class_idx - 5
)
)
elif class_good_box_count > 1:
candidate_boxes = predict_bboxes[candidate_boxes_mask].copy()
candidate_boxes = candidate_boxes[candidate_boxes[:, class_idx].argsort()][::-1]
for candidate_box_idx in range(candidate_boxes.shape[0] - 1):
if 0 != candidate_boxes[candidate_box_idx][class_idx]:
remove_mask = _iou(
box_src=candidate_boxes[candidate_box_idx],
boxes_dst=candidate_boxes[candidate_box_idx + 1:]
) > NMS_THRESH_YOLOV5
candidate_boxes[candidate_box_idx + 1:][remove_mask, class_idx] = 0
good_count = 0
for candidate_box_idx in range(candidate_boxes.shape[0]):
if candidate_boxes[candidate_box_idx, class_idx] > 0:
candidate_boxes_list.append(
ExampleBoundingBox(
x1=round(float(candidate_boxes[candidate_box_idx, 0]), 4),
y1=round(float(candidate_boxes[candidate_box_idx, 1]), 4),
x2=round(float(candidate_boxes[candidate_box_idx, 2]), 4),
y2=round(float(candidate_boxes[candidate_box_idx, 3]), 4),
score=round(float(candidate_boxes[candidate_box_idx, class_idx]), 4),
class_num=class_idx - 5
)
)
good_count += 1
if YOLO_MAX_DETECTION_PER_CLASS == good_count:
break
for idx, candidate_boxes in enumerate(candidate_boxes_list):
candidate_boxes_list[idx].x1 = 0 if (candidate_boxes_list[idx].x1 + 0.5 < 0) else int(
candidate_boxes_list[idx].x1 + 0.5)
candidate_boxes_list[idx].y1 = 0 if (candidate_boxes_list[idx].y1 + 0.5 < 0) else int(
candidate_boxes_list[idx].y1 + 0.5)
candidate_boxes_list[idx].x2 = int(hardware_preproc_info.img_width - 1) if (
candidate_boxes_list[idx].x2 + 0.5 > hardware_preproc_info.img_width - 1
) else int(candidate_boxes_list[idx].x2 + 0.5)
candidate_boxes_list[idx].y2 = int(hardware_preproc_info.img_height - 1) if (
candidate_boxes_list[idx].y2 + 0.5 > hardware_preproc_info.img_height - 1
) else int(candidate_boxes_list[idx].y2 + 0.5)
return ExampleYoloResult(
class_count=predict_bboxes.shape[1] - 5 if len(predict_bboxes) > 0 else 0,
box_count=len(candidate_boxes_list),
box_list=candidate_boxes_list
)
def preprocess_frame(frame, target_size=640):
"""
Preprocess image frame for YOLO inference.
Args:
frame: Original BGR image (numpy array).
target_size (int): Target size for resizing (default 640 for YOLO).
Returns:
tuple: (processed_frame in BGR565 format, original_width, original_height)
Raises:
Exception: If input frame is None.
"""
if frame is None:
raise Exception("Input frame is None")
original_height, original_width = frame.shape[:2]
# Resize to target size
resized_frame = cv2.resize(frame, (target_size, target_size))
# Convert to BGR565 format
frame_bgr565 = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2BGR565)
return frame_bgr565, original_width, original_height
def postprocess(output_list, hw_preproc_info, original_width, original_height, target_size=640, thresh=0.2):
"""
Post-process YOLO model output.
Args:
output_list: List of model output nodes.
hw_preproc_info: Hardware preprocessing info from Kneron device.
original_width (int): Original image width.
original_height (int): Original image height.
target_size (int): Resize target size used during preprocessing.
thresh (float): Detection confidence threshold.
Returns:
ExampleYoloResult: YOLO detection results with bounding boxes.
"""
yolo_result = post_process_yolo_v5(
inference_float_node_output_list=output_list,
hardware_preproc_info=hw_preproc_info,
thresh_value=thresh
)
# Adjust bounding box coordinates to match original dimensions
width_ratio = original_width / target_size
height_ratio = original_height / target_size
for box in yolo_result.box_list:
box.x1 = int(box.x1 * width_ratio)
box.y1 = int(box.y1 * height_ratio)
box.x2 = int(box.x2 * width_ratio)
box.y2 = int(box.y2 * height_ratio)
return yolo_result
class CustomInferenceWorkerThread(QThread):
"""
Custom model inference worker thread.
Uses user-uploaded model and firmware to perform inference on video frames.
Attributes:
inference_result_signal: Signal emitted with inference results
frame_queue: Queue containing frames to process
device_group: Connected Kneron device group
model_descriptor: Loaded model descriptor
custom_labels: Custom class labels (optional)
"""
inference_result_signal = pyqtSignal(object)
def __init__(self, frame_queue, min_interval=0.5, mse_threshold=500):
"""
Initialize the CustomInferenceWorkerThread.
Args:
frame_queue: Queue containing frames to process.
min_interval (float): Minimum seconds between inferences.
mse_threshold (float): MSE threshold for detecting frame changes.
"""
super().__init__()
self.frame_queue = frame_queue
self.min_interval = min_interval
self.mse_threshold = mse_threshold
self._running = True
self.last_inference_time = 0
self.last_frame = None
self.cached_result = None
self.input_params = {}
# Device and model related
self.device_group = None
self.model_descriptor = None
self.is_initialized = False
# Custom labels
self.custom_labels = None
def initialize_device(self):
"""
Initialize device, upload firmware and model.
If a device_group is already provided in input_params (connected by
DeviceController), reuse it and skip connect_devices to avoid double
connection conflicts with the Kneron SDK.
Returns:
bool: True if initialization successful, False otherwise.
"""
try:
model_path = self.input_params.get("custom_model_path")
scpu_path = self.input_params.get("custom_scpu_path")
ncpu_path = self.input_params.get("custom_ncpu_path")
port_id = self.input_params.get("usb_port_id", 0)
# Load custom labels
self.custom_labels = self.input_params.get("custom_labels")
if self.custom_labels:
print(f'[Custom Labels] Loaded {len(self.custom_labels)} classes')
else:
print('[Custom Labels] Not provided, using default COCO classes')
if not all([model_path, scpu_path, ncpu_path]):
print("Missing required file paths")
return False
import kp
# Reuse existing device_group if provided to avoid double connection
existing_device_group = self.input_params.get("device_group")
if existing_device_group is not None:
print('[Reusing existing device connection]')
self.device_group = existing_device_group
else:
print('[Connecting device]')
self.device_group = kp.core.connect_devices(usb_port_ids=[port_id])
print(' - Connection successful')
kp.core.set_timeout(device_group=self.device_group, milliseconds=5000)
# Upload firmware
print('[Uploading firmware]')
kp.core.load_firmware_from_file(self.device_group, scpu_path, ncpu_path)
print(' - Firmware upload successful')
# Upload model
print('[Uploading model]')
self.model_descriptor = kp.core.load_model_from_file(
self.device_group,
file_path=model_path
)
print(' - Model upload successful')
self.is_initialized = True
return True
except Exception as e:
print(f"Error initializing device: {e}")
import traceback
print(traceback.format_exc())
return False
def run_single_inference(self, frame):
"""
Execute a single inference on the given frame.
Args:
frame: Input image frame (numpy array in BGR format).
Returns:
dict: Inference results with bounding boxes and labels, or None on error.
"""
try:
if not self.is_initialized:
if not self.initialize_device():
return None
# 預處理
img_processed, original_width, original_height = preprocess_frame(frame)
# 建立推論描述符
import kp
descriptor = kp.GenericImageInferenceDescriptor(
model_id=self.model_descriptor.models[0].id,
inference_number=0,
input_node_image_list=[
kp.GenericInputNodeImage(
image=img_processed,
image_format=kp.ImageFormat.KP_IMAGE_FORMAT_RGB565,
resize_mode=kp.ResizeMode.KP_RESIZE_ENABLE,
padding_mode=kp.PaddingMode.KP_PADDING_CORNER,
normalize_mode=kp.NormalizeMode.KP_NORMALIZE_KNERON
)
]
)
# 執行推論
kp.inference.generic_image_inference_send(self.device_group, descriptor)
result = kp.inference.generic_image_inference_receive(self.device_group)
# 取得輸出節點
output_list = []
for node_idx in range(result.header.num_output_node):
node_output = kp.inference.generic_inference_retrieve_float_node(
node_idx=node_idx,
generic_raw_result=result,
channels_ordering=kp.ChannelOrdering.KP_CHANNEL_ORDERING_CHW
)
output_list.append(node_output)
# 後處理
yolo_result = postprocess(
output_list,
result.header.hw_pre_proc_info_list[0],
original_width,
original_height
)
# Convert to standard format
bounding_boxes = [
[box.x1, box.y1, box.x2, box.y2] for box in yolo_result.box_list
]
# Use custom labels or default COCO classes
labels_to_use = self.custom_labels if self.custom_labels else COCO_CLASSES
results = []
for box in yolo_result.box_list:
if 0 <= box.class_num < len(labels_to_use):
results.append(labels_to_use[box.class_num])
else:
results.append(f"class_{box.class_num}")
return {
"num_boxes": len(yolo_result.box_list),
"bounding boxes": bounding_boxes,
"results": results
}
except Exception as e:
print(f"Error during inference: {e}")
import traceback
print(traceback.format_exc())
return None
def run(self):
"""
Main execution loop.
Continuously processes frames from the queue, runs inference,
and emits results. Uses MSE-based frame change detection.
"""
while self._running:
try:
frame = self.frame_queue.get(timeout=0.1)
except queue.Empty:
continue
current_time = time.time()
if current_time - self.last_inference_time < self.min_interval:
continue
# MSE detection to optimize performance
if self.last_frame is not None:
if frame.shape != self.last_frame.shape:
self.last_frame = None
self.cached_result = None
else:
try:
mse = np.mean((frame.astype(np.float32) - self.last_frame.astype(np.float32)) ** 2)
if mse < self.mse_threshold and self.cached_result is not None:
self.inference_result_signal.emit(self.cached_result)
continue
except Exception as e:
print(f"Error calculating MSE: {e}")
self.last_frame = None
self.cached_result = None
# Execute inference
result = self.run_single_inference(frame)
self.last_inference_time = current_time
self.last_frame = frame.copy()
self.cached_result = result
if result is not None:
self.inference_result_signal.emit(result)
# Disconnect device
self.cleanup()
self.quit()
def cleanup(self):
"""Clean up resources.
Only disconnects device if this worker created the connection itself
(i.e. no device_group was provided via input_params).
"""
try:
if self.device_group is not None:
owned_by_worker = self.input_params.get("device_group") is None
if owned_by_worker:
import kp
kp.core.disconnect_devices(self.device_group)
print('[Device disconnected]')
else:
print('[Device connection owned by DeviceController, skipping disconnect]')
self.device_group = None
except Exception as e:
print(f"Error cleaning up resources: {e}")
def stop(self):
"""Stop the worker thread and clean up resources."""
self._running = False
self.wait()
self.cleanup()