""" custom_inference_worker.py - Custom Inference Worker This module provides a worker thread for running inference using user-uploaded custom models. It uses YOLO V5 pre/post-processing logic for object detection. """ from __future__ import annotations import os import time import queue import cv2 import numpy as np from typing import List, TYPE_CHECKING from PyQt5.QtCore import QThread, pyqtSignal if TYPE_CHECKING: import kp # COCO dataset class names (80 classes) COCO_CLASSES = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] class ExampleBoundingBox: """Bounding box descriptor.""" def __init__(self, x1: int = 0, y1: int = 0, x2: int = 0, y2: int = 0, score: float = 0, class_num: int = 0): self.x1 = x1 self.y1 = y1 self.x2 = x2 self.y2 = y2 self.score = score self.class_num = class_num def get_member_variable_dict(self) -> dict: return { 'x1': self.x1, 'y1': self.y1, 'x2': self.x2, 'y2': self.y2, 'score': self.score, 'class_num': self.class_num } class ExampleYoloResult: """YOLO output result descriptor.""" def __init__(self, class_count: int = 0, box_count: int = 0, box_list: List[ExampleBoundingBox] = None): self.class_count = class_count self.box_count = box_count self.box_list = box_list if box_list is not None else [] def get_member_variable_dict(self) -> dict: member_variable_dict = { 'class_count': self.class_count, 'box_count': self.box_count, 'box_list': {} } for idx, box_element in enumerate(self.box_list): member_variable_dict['box_list'][idx] = box_element.get_member_variable_dict() return member_variable_dict # YOLO constants YOLO_V3_CELL_BOX_NUM = 3 NMS_THRESH_YOLOV5 = 0.5 YOLO_MAX_DETECTION_PER_CLASS = 100 YOLO_V5_ANCHERS = np.array([ [[10, 13], [16, 30], [33, 23]], [[30, 61], [62, 45], [59, 119]], [[116, 90], [156, 198], [373, 326]] ]) def _sigmoid(x): return 1. / (1. + np.exp(-x)) def _iou(box_src, boxes_dst): max_x1 = np.maximum(box_src[0], boxes_dst[:, 0]) max_y1 = np.maximum(box_src[1], boxes_dst[:, 1]) min_x2 = np.minimum(box_src[2], boxes_dst[:, 2]) min_y2 = np.minimum(box_src[3], boxes_dst[:, 3]) area_intersection = np.maximum(0, (min_x2 - max_x1)) * np.maximum(0, (min_y2 - max_y1)) area_src = (box_src[2] - box_src[0]) * (box_src[3] - box_src[1]) area_dst = (boxes_dst[:, 2] - boxes_dst[:, 0]) * (boxes_dst[:, 3] - boxes_dst[:, 1]) area_union = area_src + area_dst - area_intersection iou = area_intersection / area_union return iou def _boxes_scale(boxes, hardware_preproc_info: kp.HwPreProcInfo): """Scale boxes based on hardware preprocessing info.""" ratio_w = hardware_preproc_info.img_width / hardware_preproc_info.resized_img_width ratio_h = hardware_preproc_info.img_height / hardware_preproc_info.resized_img_height boxes[..., :4] = boxes[..., :4] - np.array([ hardware_preproc_info.pad_left, hardware_preproc_info.pad_top, hardware_preproc_info.pad_left, hardware_preproc_info.pad_top ]) boxes[..., :4] = boxes[..., :4] * np.array([ratio_w, ratio_h, ratio_w, ratio_h]) return boxes def post_process_yolo_v5(inference_float_node_output_list: List[kp.InferenceFloatNodeOutput], hardware_preproc_info: kp.HwPreProcInfo, thresh_value: float, with_sigmoid: bool = True) -> ExampleYoloResult: """YOLO V5 post-processing function.""" feature_map_list = [] candidate_boxes_list = [] for i in range(len(inference_float_node_output_list)): anchor_offset = int(inference_float_node_output_list[i].shape[1] / YOLO_V3_CELL_BOX_NUM) feature_map = inference_float_node_output_list[i].ndarray.transpose((0, 2, 3, 1)) feature_map = _sigmoid(feature_map) if with_sigmoid else feature_map feature_map = feature_map.reshape(( feature_map.shape[0], feature_map.shape[1], feature_map.shape[2], YOLO_V3_CELL_BOX_NUM, anchor_offset )) ratio_w = hardware_preproc_info.model_input_width / inference_float_node_output_list[i].shape[3] ratio_h = hardware_preproc_info.model_input_height / inference_float_node_output_list[i].shape[2] nrows = inference_float_node_output_list[i].shape[2] ncols = inference_float_node_output_list[i].shape[3] grids = np.expand_dims(np.stack(np.meshgrid(np.arange(ncols), np.arange(nrows)), 2), axis=0) for anchor_idx in range(YOLO_V3_CELL_BOX_NUM): feature_map[..., anchor_idx, 0:2] = ( feature_map[..., anchor_idx, 0:2] * 2. - 0.5 + grids ) * np.array([ratio_h, ratio_w]) feature_map[..., anchor_idx, 2:4] = ( feature_map[..., anchor_idx, 2:4] * 2 ) ** 2 * YOLO_V5_ANCHERS[i][anchor_idx] feature_map[..., anchor_idx, 0:2] = ( feature_map[..., anchor_idx, 0:2] - (feature_map[..., anchor_idx, 2:4] / 2.) ) feature_map[..., anchor_idx, 2:4] = ( feature_map[..., anchor_idx, 0:2] + feature_map[..., anchor_idx, 2:4] ) feature_map = _boxes_scale(boxes=feature_map, hardware_preproc_info=hardware_preproc_info) feature_map_list.append(feature_map) predict_bboxes = np.concatenate([ np.reshape(feature_map, (-1, feature_map.shape[-1])) for feature_map in feature_map_list ], axis=0) predict_bboxes[..., 5:] = np.repeat( predict_bboxes[..., 4][..., np.newaxis], predict_bboxes[..., 5:].shape[1], axis=1 ) * predict_bboxes[..., 5:] predict_bboxes_mask = (predict_bboxes[..., 5:] > thresh_value).sum(axis=1) predict_bboxes = predict_bboxes[predict_bboxes_mask >= 1] # NMS for class_idx in range(5, predict_bboxes.shape[1]): candidate_boxes_mask = predict_bboxes[..., class_idx] > thresh_value class_good_box_count = candidate_boxes_mask.sum() if class_good_box_count == 1: candidate_boxes_list.append( ExampleBoundingBox( x1=round(float(predict_bboxes[candidate_boxes_mask, 0][0]), 4), y1=round(float(predict_bboxes[candidate_boxes_mask, 1][0]), 4), x2=round(float(predict_bboxes[candidate_boxes_mask, 2][0]), 4), y2=round(float(predict_bboxes[candidate_boxes_mask, 3][0]), 4), score=round(float(predict_bboxes[candidate_boxes_mask, class_idx][0]), 4), class_num=class_idx - 5 ) ) elif class_good_box_count > 1: candidate_boxes = predict_bboxes[candidate_boxes_mask].copy() candidate_boxes = candidate_boxes[candidate_boxes[:, class_idx].argsort()][::-1] for candidate_box_idx in range(candidate_boxes.shape[0] - 1): if 0 != candidate_boxes[candidate_box_idx][class_idx]: remove_mask = _iou( box_src=candidate_boxes[candidate_box_idx], boxes_dst=candidate_boxes[candidate_box_idx + 1:] ) > NMS_THRESH_YOLOV5 candidate_boxes[candidate_box_idx + 1:][remove_mask, class_idx] = 0 good_count = 0 for candidate_box_idx in range(candidate_boxes.shape[0]): if candidate_boxes[candidate_box_idx, class_idx] > 0: candidate_boxes_list.append( ExampleBoundingBox( x1=round(float(candidate_boxes[candidate_box_idx, 0]), 4), y1=round(float(candidate_boxes[candidate_box_idx, 1]), 4), x2=round(float(candidate_boxes[candidate_box_idx, 2]), 4), y2=round(float(candidate_boxes[candidate_box_idx, 3]), 4), score=round(float(candidate_boxes[candidate_box_idx, class_idx]), 4), class_num=class_idx - 5 ) ) good_count += 1 if YOLO_MAX_DETECTION_PER_CLASS == good_count: break for idx, candidate_boxes in enumerate(candidate_boxes_list): candidate_boxes_list[idx].x1 = 0 if (candidate_boxes_list[idx].x1 + 0.5 < 0) else int( candidate_boxes_list[idx].x1 + 0.5) candidate_boxes_list[idx].y1 = 0 if (candidate_boxes_list[idx].y1 + 0.5 < 0) else int( candidate_boxes_list[idx].y1 + 0.5) candidate_boxes_list[idx].x2 = int(hardware_preproc_info.img_width - 1) if ( candidate_boxes_list[idx].x2 + 0.5 > hardware_preproc_info.img_width - 1 ) else int(candidate_boxes_list[idx].x2 + 0.5) candidate_boxes_list[idx].y2 = int(hardware_preproc_info.img_height - 1) if ( candidate_boxes_list[idx].y2 + 0.5 > hardware_preproc_info.img_height - 1 ) else int(candidate_boxes_list[idx].y2 + 0.5) return ExampleYoloResult( class_count=predict_bboxes.shape[1] - 5 if len(predict_bboxes) > 0 else 0, box_count=len(candidate_boxes_list), box_list=candidate_boxes_list ) def preprocess_frame(frame, target_size=640): """ Preprocess image frame for YOLO inference. Args: frame: Original BGR image (numpy array). target_size (int): Target size for resizing (default 640 for YOLO). Returns: tuple: (processed_frame in BGR565 format, original_width, original_height) Raises: Exception: If input frame is None. """ if frame is None: raise Exception("Input frame is None") original_height, original_width = frame.shape[:2] # Resize to target size resized_frame = cv2.resize(frame, (target_size, target_size)) # Convert to BGR565 format frame_bgr565 = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2BGR565) return frame_bgr565, original_width, original_height def postprocess(output_list, hw_preproc_info, original_width, original_height, target_size=640, thresh=0.2): """ Post-process YOLO model output. Args: output_list: List of model output nodes. hw_preproc_info: Hardware preprocessing info from Kneron device. original_width (int): Original image width. original_height (int): Original image height. target_size (int): Resize target size used during preprocessing. thresh (float): Detection confidence threshold. Returns: ExampleYoloResult: YOLO detection results with bounding boxes. """ yolo_result = post_process_yolo_v5( inference_float_node_output_list=output_list, hardware_preproc_info=hw_preproc_info, thresh_value=thresh ) # Adjust bounding box coordinates to match original dimensions width_ratio = original_width / target_size height_ratio = original_height / target_size for box in yolo_result.box_list: box.x1 = int(box.x1 * width_ratio) box.y1 = int(box.y1 * height_ratio) box.x2 = int(box.x2 * width_ratio) box.y2 = int(box.y2 * height_ratio) return yolo_result class CustomInferenceWorkerThread(QThread): """ Custom model inference worker thread. Uses user-uploaded model and firmware to perform inference on video frames. Attributes: inference_result_signal: Signal emitted with inference results frame_queue: Queue containing frames to process device_group: Connected Kneron device group model_descriptor: Loaded model descriptor custom_labels: Custom class labels (optional) """ inference_result_signal = pyqtSignal(object) def __init__(self, frame_queue, min_interval=0.5, mse_threshold=500): """ Initialize the CustomInferenceWorkerThread. Args: frame_queue: Queue containing frames to process. min_interval (float): Minimum seconds between inferences. mse_threshold (float): MSE threshold for detecting frame changes. """ super().__init__() self.frame_queue = frame_queue self.min_interval = min_interval self.mse_threshold = mse_threshold self._running = True self.last_inference_time = 0 self.last_frame = None self.cached_result = None self.input_params = {} # Device and model related self.device_group = None self.model_descriptor = None self.is_initialized = False # Custom labels self.custom_labels = None def initialize_device(self): """ Initialize device, upload firmware and model. If a device_group is already provided in input_params (connected by DeviceController), reuse it and skip connect_devices to avoid double connection conflicts with the Kneron SDK. Returns: bool: True if initialization successful, False otherwise. """ try: model_path = self.input_params.get("custom_model_path") scpu_path = self.input_params.get("custom_scpu_path") ncpu_path = self.input_params.get("custom_ncpu_path") port_id = self.input_params.get("usb_port_id", 0) # Load custom labels self.custom_labels = self.input_params.get("custom_labels") if self.custom_labels: print(f'[Custom Labels] Loaded {len(self.custom_labels)} classes') else: print('[Custom Labels] Not provided, using default COCO classes') if not all([model_path, scpu_path, ncpu_path]): print("Missing required file paths") return False import kp # Reuse existing device_group if provided to avoid double connection existing_device_group = self.input_params.get("device_group") if existing_device_group is not None: print('[Reusing existing device connection]') self.device_group = existing_device_group else: print('[Connecting device]') self.device_group = kp.core.connect_devices(usb_port_ids=[port_id]) print(' - Connection successful') kp.core.set_timeout(device_group=self.device_group, milliseconds=5000) # Upload firmware print('[Uploading firmware]') kp.core.load_firmware_from_file(self.device_group, scpu_path, ncpu_path) print(' - Firmware upload successful') # Upload model print('[Uploading model]') self.model_descriptor = kp.core.load_model_from_file( self.device_group, file_path=model_path ) print(' - Model upload successful') self.is_initialized = True return True except Exception as e: print(f"Error initializing device: {e}") import traceback print(traceback.format_exc()) return False def run_single_inference(self, frame): """ Execute a single inference on the given frame. Args: frame: Input image frame (numpy array in BGR format). Returns: dict: Inference results with bounding boxes and labels, or None on error. """ try: if not self.is_initialized: if not self.initialize_device(): return None # 預處理 img_processed, original_width, original_height = preprocess_frame(frame) # 建立推論描述符 import kp descriptor = kp.GenericImageInferenceDescriptor( model_id=self.model_descriptor.models[0].id, inference_number=0, input_node_image_list=[ kp.GenericInputNodeImage( image=img_processed, image_format=kp.ImageFormat.KP_IMAGE_FORMAT_RGB565, resize_mode=kp.ResizeMode.KP_RESIZE_ENABLE, padding_mode=kp.PaddingMode.KP_PADDING_CORNER, normalize_mode=kp.NormalizeMode.KP_NORMALIZE_KNERON ) ] ) # 執行推論 kp.inference.generic_image_inference_send(self.device_group, descriptor) result = kp.inference.generic_image_inference_receive(self.device_group) # 取得輸出節點 output_list = [] for node_idx in range(result.header.num_output_node): node_output = kp.inference.generic_inference_retrieve_float_node( node_idx=node_idx, generic_raw_result=result, channels_ordering=kp.ChannelOrdering.KP_CHANNEL_ORDERING_CHW ) output_list.append(node_output) # 後處理 yolo_result = postprocess( output_list, result.header.hw_pre_proc_info_list[0], original_width, original_height ) # Convert to standard format bounding_boxes = [ [box.x1, box.y1, box.x2, box.y2] for box in yolo_result.box_list ] # Use custom labels or default COCO classes labels_to_use = self.custom_labels if self.custom_labels else COCO_CLASSES results = [] for box in yolo_result.box_list: if 0 <= box.class_num < len(labels_to_use): results.append(labels_to_use[box.class_num]) else: results.append(f"class_{box.class_num}") return { "num_boxes": len(yolo_result.box_list), "bounding boxes": bounding_boxes, "results": results } except Exception as e: print(f"Error during inference: {e}") import traceback print(traceback.format_exc()) return None def run(self): """ Main execution loop. Continuously processes frames from the queue, runs inference, and emits results. Uses MSE-based frame change detection. """ while self._running: try: frame = self.frame_queue.get(timeout=0.1) except queue.Empty: continue current_time = time.time() if current_time - self.last_inference_time < self.min_interval: continue # MSE detection to optimize performance if self.last_frame is not None: if frame.shape != self.last_frame.shape: self.last_frame = None self.cached_result = None else: try: mse = np.mean((frame.astype(np.float32) - self.last_frame.astype(np.float32)) ** 2) if mse < self.mse_threshold and self.cached_result is not None: self.inference_result_signal.emit(self.cached_result) continue except Exception as e: print(f"Error calculating MSE: {e}") self.last_frame = None self.cached_result = None # Execute inference result = self.run_single_inference(frame) self.last_inference_time = current_time self.last_frame = frame.copy() self.cached_result = result if result is not None: self.inference_result_signal.emit(result) # Disconnect device self.cleanup() self.quit() def cleanup(self): """Clean up resources. Only disconnects device if this worker created the connection itself (i.e. no device_group was provided via input_params). """ try: if self.device_group is not None: owned_by_worker = self.input_params.get("device_group") is None if owned_by_worker: import kp kp.core.disconnect_devices(self.device_group) print('[Device disconnected]') else: print('[Device connection owned by DeviceController, skipping disconnect]') self.device_group = None except Exception as e: print(f"Error cleaning up resources: {e}") def stop(self): """Stop the worker thread and clean up resources.""" self._running = False self.wait() self.cleanup()