debug: Add comprehensive logging to diagnose pipeline hanging issue

- Add pipeline activity logging every 10 results to track processing
- Add queue size monitoring in InferencePipeline coordinator
- Add camera frame capture logging every 100 frames
- Add MultiDongle send/receive thread logging every 100 operations
- Add error handling for repeated callback failures in camera source

This will help identify where the pipeline stops processing:
- Camera capture stopping
- MultiDongle threads blocking
- Pipeline coordinator hanging
- Queue capacity issues

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Masonmason 2025-07-24 19:49:00 +08:00
parent 4b8fb7fead
commit cde1aac908
3 changed files with 28 additions and 1 deletions

View File

@ -522,6 +522,11 @@ class InferencePipeline:
# Record output timestamp for FPS calculation
self._record_output_timestamp()
# Debug: Log pipeline activity every 10 results
if self.completed_counter % 10 == 0:
print(f"[{self.pipeline_name}] Processed {self.completed_counter} results")
print(f"[{self.pipeline_name}] Queue sizes - Input: {self.pipeline_input_queue.qsize()}, Output: {self.pipeline_output_queue.qsize()}")
# Call result callback
if self.result_callback:
self.result_callback(current_data)

View File

@ -409,6 +409,7 @@ class MultiDongle:
def _send_thread_func(self):
"""Internal function run by the send thread, gets images from input queue."""
print("Send thread started.")
send_count = 0
while not self._stop_event.is_set():
if self.generic_inference_input_descriptor is None:
# Wait for descriptor to be ready or stop
@ -434,6 +435,12 @@ class MultiDongle:
# Configure and send the image
self._inference_counter += 1 # Increment counter for each image
send_count += 1
# Debug: Log send activity every 100 images
if send_count % 100 == 0:
print(f"[MultiDongle] Sent {send_count} images to inference")
self.generic_inference_input_descriptor.inference_number = self._inference_counter
self.generic_inference_input_descriptor.input_node_image_list = [kp.GenericInputNodeImage(
image=image_data,
@ -445,7 +452,6 @@ class MultiDongle:
kp.inference.generic_image_inference_send(device_group=self.device_group,
generic_inference_input_descriptor=self.generic_inference_input_descriptor)
# print("Image sent.") # Optional: add log
# No need for sleep here usually, as queue.get is blocking
except kp.ApiKPException as exception:
print(f' - Error in send thread: inference send failed, error = {exception}')
@ -460,10 +466,16 @@ class MultiDongle:
def _receive_thread_func(self):
"""Internal function run by the receive thread, puts results into output queue."""
print("Receive thread started.")
receive_count = 0
while not self._stop_event.is_set():
try:
generic_inference_output_descriptor = kp.inference.generic_image_inference_receive(device_group=self.device_group)
self._output_queue.put(generic_inference_output_descriptor)
receive_count += 1
# Debug: Log receive activity every 100 results
if receive_count % 100 == 0:
print(f"[MultiDongle] Received {receive_count} inference results")
except kp.ApiKPException as exception:
if not self._stop_event.is_set(): # Avoid printing error if we are already stopping
print(f' - Error in receive thread: inference receive failed, error = {exception}')

View File

@ -95,6 +95,7 @@ class CameraSource:
"""
The main loop for capturing frames from the camera.
"""
frame_count = 0
while self.running and not self._stop_event.is_set():
ret, frame = self.cap.read()
if not ret:
@ -104,12 +105,21 @@ class CameraSource:
self.initialize()
continue
frame_count += 1
# Debug: Log camera activity every 100 frames
if frame_count % 100 == 0:
print(f"[Camera] Captured {frame_count} frames")
if self.data_callback:
try:
# Assuming the callback is thread-safe or handles its own locking
self.data_callback(frame)
except Exception as e:
print(f"Error in data_callback: {e}")
# If callback fails repeatedly, camera might need to stop
if frame_count > 10: # Allow some initial failures
print("Too many callback failures, stopping camera")
break
if self.frame_callback:
try: