"""
custom_inference_worker.py - Custom Inference Worker

This module provides a worker thread for running inference using user-uploaded
custom models. It uses YOLO V5 pre/post-processing logic for object detection.
"""
from __future__ import annotations
import os
import time
import queue
import cv2
import numpy as np
from typing import List, TYPE_CHECKING
from PyQt5.QtCore import QThread, pyqtSignal

if TYPE_CHECKING:
    import kp


# COCO dataset class names (80 classes)
COCO_CLASSES = [
    'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
    'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat',
    'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
    'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
    'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
    'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse',
    'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
    'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]


class ExampleBoundingBox:
    """Bounding box descriptor."""

    def __init__(self,
                 x1: int = 0,
                 y1: int = 0,
                 x2: int = 0,
                 y2: int = 0,
                 score: float = 0,
                 class_num: int = 0):
        self.x1 = x1
        self.y1 = y1
        self.x2 = x2
        self.y2 = y2
        self.score = score
        self.class_num = class_num

    def get_member_variable_dict(self) -> dict:
        return {
            'x1': self.x1,
            'y1': self.y1,
            'x2': self.x2,
            'y2': self.y2,
            'score': self.score,
            'class_num': self.class_num
        }


class ExampleYoloResult:
    """YOLO output result descriptor."""

    def __init__(self,
                 class_count: int = 0,
                 box_count: int = 0,
                 box_list: List[ExampleBoundingBox] = None):
        self.class_count = class_count
        self.box_count = box_count
        self.box_list = box_list if box_list is not None else []

    def get_member_variable_dict(self) -> dict:
        member_variable_dict = {
            'class_count': self.class_count,
            'box_count': self.box_count,
            'box_list': {}
        }
        for idx, box_element in enumerate(self.box_list):
            member_variable_dict['box_list'][idx] = box_element.get_member_variable_dict()
        return member_variable_dict


# YOLO constants
YOLO_V3_CELL_BOX_NUM = 3
NMS_THRESH_YOLOV5 = 0.5
YOLO_MAX_DETECTION_PER_CLASS = 100

YOLO_V5_ANCHERS = np.array([
    [[10, 13], [16, 30], [33, 23]],
    [[30, 61], [62, 45], [59, 119]],
    [[116, 90], [156, 198], [373, 326]]
])


def _sigmoid(x):
    return 1. / (1. + np.exp(-x))


def _iou(box_src, boxes_dst):
    max_x1 = np.maximum(box_src[0], boxes_dst[:, 0])
    max_y1 = np.maximum(box_src[1], boxes_dst[:, 1])
    min_x2 = np.minimum(box_src[2], boxes_dst[:, 2])
    min_y2 = np.minimum(box_src[3], boxes_dst[:, 3])

    area_intersection = np.maximum(0, (min_x2 - max_x1)) * np.maximum(0, (min_y2 - max_y1))
    area_src = (box_src[2] - box_src[0]) * (box_src[3] - box_src[1])
    area_dst = (boxes_dst[:, 2] - boxes_dst[:, 0]) * (boxes_dst[:, 3] - boxes_dst[:, 1])
    area_union = area_src + area_dst - area_intersection

    iou = area_intersection / area_union
    return iou


def _boxes_scale(boxes, hardware_preproc_info: kp.HwPreProcInfo):
    """Scale boxes based on hardware preprocessing info."""
    ratio_w = hardware_preproc_info.img_width / hardware_preproc_info.resized_img_width
    ratio_h = hardware_preproc_info.img_height / hardware_preproc_info.resized_img_height

    boxes[..., :4] = boxes[..., :4] - np.array([
        hardware_preproc_info.pad_left, hardware_preproc_info.pad_top,
        hardware_preproc_info.pad_left, hardware_preproc_info.pad_top
    ])
    boxes[..., :4] = boxes[..., :4] * np.array([ratio_w, ratio_h, ratio_w, ratio_h])

    return boxes


def post_process_yolo_v5(inference_float_node_output_list: List[kp.InferenceFloatNodeOutput],
                         hardware_preproc_info: kp.HwPreProcInfo,
                         thresh_value: float,
                         with_sigmoid: bool = True) -> ExampleYoloResult:
    """YOLO V5 post-processing function."""
    feature_map_list = []
    candidate_boxes_list = []

    for i in range(len(inference_float_node_output_list)):
        anchor_offset = int(inference_float_node_output_list[i].shape[1] / YOLO_V3_CELL_BOX_NUM)
        feature_map = inference_float_node_output_list[i].ndarray.transpose((0, 2, 3, 1))
        feature_map = _sigmoid(feature_map) if with_sigmoid else feature_map
        feature_map = feature_map.reshape((
            feature_map.shape[0],
            feature_map.shape[1],
            feature_map.shape[2],
            YOLO_V3_CELL_BOX_NUM,
            anchor_offset
        ))

        ratio_w = hardware_preproc_info.model_input_width / inference_float_node_output_list[i].shape[3]
        ratio_h = hardware_preproc_info.model_input_height / inference_float_node_output_list[i].shape[2]
        nrows = inference_float_node_output_list[i].shape[2]
        ncols = inference_float_node_output_list[i].shape[3]
        grids = np.expand_dims(np.stack(np.meshgrid(np.arange(ncols), np.arange(nrows)), 2), axis=0)

        for anchor_idx in range(YOLO_V3_CELL_BOX_NUM):
            feature_map[..., anchor_idx, 0:2] = (
                feature_map[..., anchor_idx, 0:2] * 2. - 0.5 + grids
            ) * np.array([ratio_h, ratio_w])
            feature_map[..., anchor_idx, 2:4] = (
                feature_map[..., anchor_idx, 2:4] * 2
            ) ** 2 * YOLO_V5_ANCHERS[i][anchor_idx]

            feature_map[..., anchor_idx, 0:2] = (
                feature_map[..., anchor_idx, 0:2] - (feature_map[..., anchor_idx, 2:4] / 2.)
            )
            feature_map[..., anchor_idx, 2:4] = (
                feature_map[..., anchor_idx, 0:2] + feature_map[..., anchor_idx, 2:4]
            )

        feature_map = _boxes_scale(boxes=feature_map, hardware_preproc_info=hardware_preproc_info)
        feature_map_list.append(feature_map)

    predict_bboxes = np.concatenate([
        np.reshape(feature_map, (-1, feature_map.shape[-1])) for feature_map in feature_map_list
    ], axis=0)
    predict_bboxes[..., 5:] = np.repeat(
        predict_bboxes[..., 4][..., np.newaxis],
        predict_bboxes[..., 5:].shape[1],
        axis=1
    ) * predict_bboxes[..., 5:]
    predict_bboxes_mask = (predict_bboxes[..., 5:] > thresh_value).sum(axis=1)
    predict_bboxes = predict_bboxes[predict_bboxes_mask >= 1]

    # NMS
    for class_idx in range(5, predict_bboxes.shape[1]):
        candidate_boxes_mask = predict_bboxes[..., class_idx] > thresh_value
        class_good_box_count = candidate_boxes_mask.sum()
        if class_good_box_count == 1:
            candidate_boxes_list.append(
                ExampleBoundingBox(
                    x1=round(float(predict_bboxes[candidate_boxes_mask, 0][0]), 4),
                    y1=round(float(predict_bboxes[candidate_boxes_mask, 1][0]), 4),
                    x2=round(float(predict_bboxes[candidate_boxes_mask, 2][0]), 4),
                    y2=round(float(predict_bboxes[candidate_boxes_mask, 3][0]), 4),
                    score=round(float(predict_bboxes[candidate_boxes_mask, class_idx][0]), 4),
                    class_num=class_idx - 5
                )
            )
        elif class_good_box_count > 1:
            candidate_boxes = predict_bboxes[candidate_boxes_mask].copy()
            candidate_boxes = candidate_boxes[candidate_boxes[:, class_idx].argsort()][::-1]

            for candidate_box_idx in range(candidate_boxes.shape[0] - 1):
                if 0 != candidate_boxes[candidate_box_idx][class_idx]:
                    remove_mask = _iou(
                        box_src=candidate_boxes[candidate_box_idx],
                        boxes_dst=candidate_boxes[candidate_box_idx + 1:]
                    ) > NMS_THRESH_YOLOV5
                    candidate_boxes[candidate_box_idx + 1:][remove_mask, class_idx] = 0

            good_count = 0
            for candidate_box_idx in range(candidate_boxes.shape[0]):
                if candidate_boxes[candidate_box_idx, class_idx] > 0:
                    candidate_boxes_list.append(
                        ExampleBoundingBox(
                            x1=round(float(candidate_boxes[candidate_box_idx, 0]), 4),
                            y1=round(float(candidate_boxes[candidate_box_idx, 1]), 4),
                            x2=round(float(candidate_boxes[candidate_box_idx, 2]), 4),
                            y2=round(float(candidate_boxes[candidate_box_idx, 3]), 4),
                            score=round(float(candidate_boxes[candidate_box_idx, class_idx]), 4),
                            class_num=class_idx - 5
                        )
                    )
                    good_count += 1

                    if YOLO_MAX_DETECTION_PER_CLASS == good_count:
                        break

    for idx, candidate_boxes in enumerate(candidate_boxes_list):
        candidate_boxes_list[idx].x1 = 0 if (candidate_boxes_list[idx].x1 + 0.5 < 0) else int(
            candidate_boxes_list[idx].x1 + 0.5)
        candidate_boxes_list[idx].y1 = 0 if (candidate_boxes_list[idx].y1 + 0.5 < 0) else int(
            candidate_boxes_list[idx].y1 + 0.5)
        candidate_boxes_list[idx].x2 = int(hardware_preproc_info.img_width - 1) if (
            candidate_boxes_list[idx].x2 + 0.5 > hardware_preproc_info.img_width - 1
        ) else int(candidate_boxes_list[idx].x2 + 0.5)
        candidate_boxes_list[idx].y2 = int(hardware_preproc_info.img_height - 1) if (
            candidate_boxes_list[idx].y2 + 0.5 > hardware_preproc_info.img_height - 1
        ) else int(candidate_boxes_list[idx].y2 + 0.5)

    return ExampleYoloResult(
        class_count=predict_bboxes.shape[1] - 5 if len(predict_bboxes) > 0 else 0,
        box_count=len(candidate_boxes_list),
        box_list=candidate_boxes_list
    )


def preprocess_frame(frame, target_size=640):
    """
    Preprocess image frame for YOLO inference.

    Args:
        frame: Original BGR image (numpy array).
        target_size (int): Target size for resizing (default 640 for YOLO).

    Returns:
        tuple: (processed_frame in BGR565 format, original_width, original_height)

    Raises:
        Exception: If input frame is None.
    """
    if frame is None:
        raise Exception("Input frame is None")

    original_height, original_width = frame.shape[:2]

    # Resize to target size
    resized_frame = cv2.resize(frame, (target_size, target_size))

    # Convert to BGR565 format
    frame_bgr565 = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2BGR565)

    return frame_bgr565, original_width, original_height


def postprocess(output_list, hw_preproc_info, original_width, original_height, target_size=640, thresh=0.2):
    """
    Post-process YOLO model output.

    Args:
        output_list: List of model output nodes.
        hw_preproc_info: Hardware preprocessing info from Kneron device.
        original_width (int): Original image width.
        original_height (int): Original image height.
        target_size (int): Resize target size used during preprocessing.
        thresh (float): Detection confidence threshold.

    Returns:
        ExampleYoloResult: YOLO detection results with bounding boxes.
    """
    yolo_result = post_process_yolo_v5(
        inference_float_node_output_list=output_list,
        hardware_preproc_info=hw_preproc_info,
        thresh_value=thresh
    )

    # Adjust bounding box coordinates to match original dimensions
    width_ratio = original_width / target_size
    height_ratio = original_height / target_size

    for box in yolo_result.box_list:
        box.x1 = int(box.x1 * width_ratio)
        box.y1 = int(box.y1 * height_ratio)
        box.x2 = int(box.x2 * width_ratio)
        box.y2 = int(box.y2 * height_ratio)

    return yolo_result


class CustomInferenceWorkerThread(QThread):
    """
    Custom model inference worker thread.

    Uses user-uploaded model and firmware to perform inference on video frames.

    Attributes:
        inference_result_signal: Signal emitted with inference results
        frame_queue: Queue containing frames to process
        device_group: Connected Kneron device group
        model_descriptor: Loaded model descriptor
        custom_labels: Custom class labels (optional)
    """

    inference_result_signal = pyqtSignal(object)

    def __init__(self, frame_queue, min_interval=0.5, mse_threshold=500):
        """
        Initialize the CustomInferenceWorkerThread.

        Args:
            frame_queue: Queue containing frames to process.
            min_interval (float): Minimum seconds between inferences.
            mse_threshold (float): MSE threshold for detecting frame changes.
        """
        super().__init__()
        self.frame_queue = frame_queue
        self.min_interval = min_interval
        self.mse_threshold = mse_threshold
        self._running = True
        self.last_inference_time = 0
        self.last_frame = None
        self.cached_result = None
        self.input_params = {}

        # Device and model related
        self.device_group = None
        self.model_descriptor = None
        self.is_initialized = False

        # Custom labels
        self.custom_labels = None

    def initialize_device(self):
        """
        Initialize device, upload firmware and model.

        If a device_group is already provided in input_params (connected by
        DeviceController), reuse it and skip connect_devices to avoid double
        connection conflicts with the Kneron SDK.

        Returns:
            bool: True if initialization successful, False otherwise.
        """
        try:
            model_path = self.input_params.get("custom_model_path")
            scpu_path = self.input_params.get("custom_scpu_path")
            ncpu_path = self.input_params.get("custom_ncpu_path")
            port_id = self.input_params.get("usb_port_id", 0)

            # Load custom labels
            self.custom_labels = self.input_params.get("custom_labels")
            if self.custom_labels:
                print(f'[Custom Labels] Loaded {len(self.custom_labels)} classes')
            else:
                print('[Custom Labels] Not provided, using default COCO classes')

            if not all([model_path, scpu_path, ncpu_path]):
                print("Missing required file paths")
                return False

            import kp

            # Reuse existing device_group if provided to avoid double connection
            existing_device_group = self.input_params.get("device_group")
            if existing_device_group is not None:
                print('[Reusing existing device connection]')
                self.device_group = existing_device_group
            else:
                print('[Connecting device]')
                self.device_group = kp.core.connect_devices(usb_port_ids=[port_id])
                print(' - Connection successful')

            kp.core.set_timeout(device_group=self.device_group, milliseconds=5000)

            # Upload firmware
            print('[Uploading firmware]')
            kp.core.load_firmware_from_file(self.device_group, scpu_path, ncpu_path)
            print(' - Firmware upload successful')

            # Upload model
            print('[Uploading model]')
            self.model_descriptor = kp.core.load_model_from_file(
                self.device_group,
                file_path=model_path
            )
            print(' - Model upload successful')

            self.is_initialized = True
            return True

        except Exception as e:
            print(f"Error initializing device: {e}")
            import traceback
            print(traceback.format_exc())
            return False

    def run_single_inference(self, frame):
        """
        Execute a single inference on the given frame.

        Args:
            frame: Input image frame (numpy array in BGR format).

        Returns:
            dict: Inference results with bounding boxes and labels, or None on error.
        """
        try:
            if not self.is_initialized:
                if not self.initialize_device():
                    return None

            # 預處理
            img_processed, original_width, original_height = preprocess_frame(frame)

            # 建立推論描述符
            import kp
            descriptor = kp.GenericImageInferenceDescriptor(
                model_id=self.model_descriptor.models[0].id,
                inference_number=0,
                input_node_image_list=[
                    kp.GenericInputNodeImage(
                        image=img_processed,
                        image_format=kp.ImageFormat.KP_IMAGE_FORMAT_RGB565,
                        resize_mode=kp.ResizeMode.KP_RESIZE_ENABLE,
                        padding_mode=kp.PaddingMode.KP_PADDING_CORNER,
                        normalize_mode=kp.NormalizeMode.KP_NORMALIZE_KNERON
                    )
                ]
            )

            # 執行推論
            kp.inference.generic_image_inference_send(self.device_group, descriptor)
            result = kp.inference.generic_image_inference_receive(self.device_group)

            # 取得輸出節點
            output_list = []
            for node_idx in range(result.header.num_output_node):
                node_output = kp.inference.generic_inference_retrieve_float_node(
                    node_idx=node_idx,
                    generic_raw_result=result,
                    channels_ordering=kp.ChannelOrdering.KP_CHANNEL_ORDERING_CHW
                )
                output_list.append(node_output)

            # 後處理
            yolo_result = postprocess(
                output_list,
                result.header.hw_pre_proc_info_list[0],
                original_width,
                original_height
            )

            # Convert to standard format
            bounding_boxes = [
                [box.x1, box.y1, box.x2, box.y2] for box in yolo_result.box_list
            ]

            # Use custom labels or default COCO classes
            labels_to_use = self.custom_labels if self.custom_labels else COCO_CLASSES

            results = []
            for box in yolo_result.box_list:
                if 0 <= box.class_num < len(labels_to_use):
                    results.append(labels_to_use[box.class_num])
                else:
                    results.append(f"class_{box.class_num}")

            return {
                "num_boxes": len(yolo_result.box_list),
                "bounding boxes": bounding_boxes,
                "results": results
            }

        except Exception as e:
            print(f"Error during inference: {e}")
            import traceback
            print(traceback.format_exc())
            return None

    def run(self):
        """
        Main execution loop.

        Continuously processes frames from the queue, runs inference,
        and emits results. Uses MSE-based frame change detection.
        """
        while self._running:
            try:
                frame = self.frame_queue.get(timeout=0.1)
            except queue.Empty:
                continue

            current_time = time.time()
            if current_time - self.last_inference_time < self.min_interval:
                continue

            # MSE detection to optimize performance
            if self.last_frame is not None:
                if frame.shape != self.last_frame.shape:
                    self.last_frame = None
                    self.cached_result = None
                else:
                    try:
                        mse = np.mean((frame.astype(np.float32) - self.last_frame.astype(np.float32)) ** 2)
                        if mse < self.mse_threshold and self.cached_result is not None:
                            self.inference_result_signal.emit(self.cached_result)
                            continue
                    except Exception as e:
                        print(f"Error calculating MSE: {e}")
                        self.last_frame = None
                        self.cached_result = None

            # Execute inference
            result = self.run_single_inference(frame)

            self.last_inference_time = current_time
            self.last_frame = frame.copy()
            self.cached_result = result

            if result is not None:
                self.inference_result_signal.emit(result)

        # Disconnect device
        self.cleanup()
        self.quit()

    def cleanup(self):
        """Clean up resources.

        Only disconnects device if this worker created the connection itself
        (i.e. no device_group was provided via input_params).
        """
        try:
            if self.device_group is not None:
                owned_by_worker = self.input_params.get("device_group") is None
                if owned_by_worker:
                    import kp
                    kp.core.disconnect_devices(self.device_group)
                    print('[Device disconnected]')
                else:
                    print('[Device connection owned by DeviceController, skipping disconnect]')
                self.device_group = None
        except Exception as e:
            print(f"Error cleaning up resources: {e}")

    def stop(self):
        """Stop the worker thread and clean up resources."""
        self._running = False
        self.wait()
        self.cleanup()