debug: Add comprehensive logging to diagnose pipeline hanging issue

- Add pipeline activity logging every 10 results to track processing
- Add queue size monitoring in InferencePipeline coordinator
- Add camera frame capture logging every 100 frames
- Add MultiDongle send/receive thread logging every 100 operations
- Add error handling for repeated callback failures in camera source

This will help identify where the pipeline stops processing:
- Camera capture stopping
- MultiDongle threads blocking
- Pipeline coordinator hanging
- Queue capacity issues

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Masonmason 2025-07-24 19:49:00 +08:00
parent 4b8fb7fead
commit cde1aac908
3 changed files with 28 additions and 1 deletions

View File

@ -522,6 +522,11 @@ class InferencePipeline:
# Record output timestamp for FPS calculation # Record output timestamp for FPS calculation
self._record_output_timestamp() self._record_output_timestamp()
# Debug: Log pipeline activity every 10 results
if self.completed_counter % 10 == 0:
print(f"[{self.pipeline_name}] Processed {self.completed_counter} results")
print(f"[{self.pipeline_name}] Queue sizes - Input: {self.pipeline_input_queue.qsize()}, Output: {self.pipeline_output_queue.qsize()}")
# Call result callback # Call result callback
if self.result_callback: if self.result_callback:
self.result_callback(current_data) self.result_callback(current_data)

View File

@ -409,6 +409,7 @@ class MultiDongle:
def _send_thread_func(self): def _send_thread_func(self):
"""Internal function run by the send thread, gets images from input queue.""" """Internal function run by the send thread, gets images from input queue."""
print("Send thread started.") print("Send thread started.")
send_count = 0
while not self._stop_event.is_set(): while not self._stop_event.is_set():
if self.generic_inference_input_descriptor is None: if self.generic_inference_input_descriptor is None:
# Wait for descriptor to be ready or stop # Wait for descriptor to be ready or stop
@ -434,6 +435,12 @@ class MultiDongle:
# Configure and send the image # Configure and send the image
self._inference_counter += 1 # Increment counter for each image self._inference_counter += 1 # Increment counter for each image
send_count += 1
# Debug: Log send activity every 100 images
if send_count % 100 == 0:
print(f"[MultiDongle] Sent {send_count} images to inference")
self.generic_inference_input_descriptor.inference_number = self._inference_counter self.generic_inference_input_descriptor.inference_number = self._inference_counter
self.generic_inference_input_descriptor.input_node_image_list = [kp.GenericInputNodeImage( self.generic_inference_input_descriptor.input_node_image_list = [kp.GenericInputNodeImage(
image=image_data, image=image_data,
@ -445,7 +452,6 @@ class MultiDongle:
kp.inference.generic_image_inference_send(device_group=self.device_group, kp.inference.generic_image_inference_send(device_group=self.device_group,
generic_inference_input_descriptor=self.generic_inference_input_descriptor) generic_inference_input_descriptor=self.generic_inference_input_descriptor)
# print("Image sent.") # Optional: add log
# No need for sleep here usually, as queue.get is blocking # No need for sleep here usually, as queue.get is blocking
except kp.ApiKPException as exception: except kp.ApiKPException as exception:
print(f' - Error in send thread: inference send failed, error = {exception}') print(f' - Error in send thread: inference send failed, error = {exception}')
@ -460,10 +466,16 @@ class MultiDongle:
def _receive_thread_func(self): def _receive_thread_func(self):
"""Internal function run by the receive thread, puts results into output queue.""" """Internal function run by the receive thread, puts results into output queue."""
print("Receive thread started.") print("Receive thread started.")
receive_count = 0
while not self._stop_event.is_set(): while not self._stop_event.is_set():
try: try:
generic_inference_output_descriptor = kp.inference.generic_image_inference_receive(device_group=self.device_group) generic_inference_output_descriptor = kp.inference.generic_image_inference_receive(device_group=self.device_group)
self._output_queue.put(generic_inference_output_descriptor) self._output_queue.put(generic_inference_output_descriptor)
receive_count += 1
# Debug: Log receive activity every 100 results
if receive_count % 100 == 0:
print(f"[MultiDongle] Received {receive_count} inference results")
except kp.ApiKPException as exception: except kp.ApiKPException as exception:
if not self._stop_event.is_set(): # Avoid printing error if we are already stopping if not self._stop_event.is_set(): # Avoid printing error if we are already stopping
print(f' - Error in receive thread: inference receive failed, error = {exception}') print(f' - Error in receive thread: inference receive failed, error = {exception}')

View File

@ -95,6 +95,7 @@ class CameraSource:
""" """
The main loop for capturing frames from the camera. The main loop for capturing frames from the camera.
""" """
frame_count = 0
while self.running and not self._stop_event.is_set(): while self.running and not self._stop_event.is_set():
ret, frame = self.cap.read() ret, frame = self.cap.read()
if not ret: if not ret:
@ -104,12 +105,21 @@ class CameraSource:
self.initialize() self.initialize()
continue continue
frame_count += 1
# Debug: Log camera activity every 100 frames
if frame_count % 100 == 0:
print(f"[Camera] Captured {frame_count} frames")
if self.data_callback: if self.data_callback:
try: try:
# Assuming the callback is thread-safe or handles its own locking # Assuming the callback is thread-safe or handles its own locking
self.data_callback(frame) self.data_callback(frame)
except Exception as e: except Exception as e:
print(f"Error in data_callback: {e}") print(f"Error in data_callback: {e}")
# If callback fails repeatedly, camera might need to stop
if frame_count > 10: # Allow some initial failures
print("Too many callback failures, stopping camera")
break
if self.frame_callback: if self.frame_callback:
try: try: