debug: Add comprehensive logging to diagnose pipeline hanging issue
- Add pipeline activity logging every 10 results to track processing - Add queue size monitoring in InferencePipeline coordinator - Add camera frame capture logging every 100 frames - Add MultiDongle send/receive thread logging every 100 operations - Add error handling for repeated callback failures in camera source This will help identify where the pipeline stops processing: - Camera capture stopping - MultiDongle threads blocking - Pipeline coordinator hanging - Queue capacity issues 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
4b8fb7fead
commit
cde1aac908
@ -522,6 +522,11 @@ class InferencePipeline:
|
|||||||
# Record output timestamp for FPS calculation
|
# Record output timestamp for FPS calculation
|
||||||
self._record_output_timestamp()
|
self._record_output_timestamp()
|
||||||
|
|
||||||
|
# Debug: Log pipeline activity every 10 results
|
||||||
|
if self.completed_counter % 10 == 0:
|
||||||
|
print(f"[{self.pipeline_name}] Processed {self.completed_counter} results")
|
||||||
|
print(f"[{self.pipeline_name}] Queue sizes - Input: {self.pipeline_input_queue.qsize()}, Output: {self.pipeline_output_queue.qsize()}")
|
||||||
|
|
||||||
# Call result callback
|
# Call result callback
|
||||||
if self.result_callback:
|
if self.result_callback:
|
||||||
self.result_callback(current_data)
|
self.result_callback(current_data)
|
||||||
|
|||||||
@ -409,6 +409,7 @@ class MultiDongle:
|
|||||||
def _send_thread_func(self):
|
def _send_thread_func(self):
|
||||||
"""Internal function run by the send thread, gets images from input queue."""
|
"""Internal function run by the send thread, gets images from input queue."""
|
||||||
print("Send thread started.")
|
print("Send thread started.")
|
||||||
|
send_count = 0
|
||||||
while not self._stop_event.is_set():
|
while not self._stop_event.is_set():
|
||||||
if self.generic_inference_input_descriptor is None:
|
if self.generic_inference_input_descriptor is None:
|
||||||
# Wait for descriptor to be ready or stop
|
# Wait for descriptor to be ready or stop
|
||||||
@ -434,6 +435,12 @@ class MultiDongle:
|
|||||||
|
|
||||||
# Configure and send the image
|
# Configure and send the image
|
||||||
self._inference_counter += 1 # Increment counter for each image
|
self._inference_counter += 1 # Increment counter for each image
|
||||||
|
send_count += 1
|
||||||
|
|
||||||
|
# Debug: Log send activity every 100 images
|
||||||
|
if send_count % 100 == 0:
|
||||||
|
print(f"[MultiDongle] Sent {send_count} images to inference")
|
||||||
|
|
||||||
self.generic_inference_input_descriptor.inference_number = self._inference_counter
|
self.generic_inference_input_descriptor.inference_number = self._inference_counter
|
||||||
self.generic_inference_input_descriptor.input_node_image_list = [kp.GenericInputNodeImage(
|
self.generic_inference_input_descriptor.input_node_image_list = [kp.GenericInputNodeImage(
|
||||||
image=image_data,
|
image=image_data,
|
||||||
@ -445,7 +452,6 @@ class MultiDongle:
|
|||||||
|
|
||||||
kp.inference.generic_image_inference_send(device_group=self.device_group,
|
kp.inference.generic_image_inference_send(device_group=self.device_group,
|
||||||
generic_inference_input_descriptor=self.generic_inference_input_descriptor)
|
generic_inference_input_descriptor=self.generic_inference_input_descriptor)
|
||||||
# print("Image sent.") # Optional: add log
|
|
||||||
# No need for sleep here usually, as queue.get is blocking
|
# No need for sleep here usually, as queue.get is blocking
|
||||||
except kp.ApiKPException as exception:
|
except kp.ApiKPException as exception:
|
||||||
print(f' - Error in send thread: inference send failed, error = {exception}')
|
print(f' - Error in send thread: inference send failed, error = {exception}')
|
||||||
@ -460,10 +466,16 @@ class MultiDongle:
|
|||||||
def _receive_thread_func(self):
|
def _receive_thread_func(self):
|
||||||
"""Internal function run by the receive thread, puts results into output queue."""
|
"""Internal function run by the receive thread, puts results into output queue."""
|
||||||
print("Receive thread started.")
|
print("Receive thread started.")
|
||||||
|
receive_count = 0
|
||||||
while not self._stop_event.is_set():
|
while not self._stop_event.is_set():
|
||||||
try:
|
try:
|
||||||
generic_inference_output_descriptor = kp.inference.generic_image_inference_receive(device_group=self.device_group)
|
generic_inference_output_descriptor = kp.inference.generic_image_inference_receive(device_group=self.device_group)
|
||||||
self._output_queue.put(generic_inference_output_descriptor)
|
self._output_queue.put(generic_inference_output_descriptor)
|
||||||
|
receive_count += 1
|
||||||
|
|
||||||
|
# Debug: Log receive activity every 100 results
|
||||||
|
if receive_count % 100 == 0:
|
||||||
|
print(f"[MultiDongle] Received {receive_count} inference results")
|
||||||
except kp.ApiKPException as exception:
|
except kp.ApiKPException as exception:
|
||||||
if not self._stop_event.is_set(): # Avoid printing error if we are already stopping
|
if not self._stop_event.is_set(): # Avoid printing error if we are already stopping
|
||||||
print(f' - Error in receive thread: inference receive failed, error = {exception}')
|
print(f' - Error in receive thread: inference receive failed, error = {exception}')
|
||||||
|
|||||||
@ -95,6 +95,7 @@ class CameraSource:
|
|||||||
"""
|
"""
|
||||||
The main loop for capturing frames from the camera.
|
The main loop for capturing frames from the camera.
|
||||||
"""
|
"""
|
||||||
|
frame_count = 0
|
||||||
while self.running and not self._stop_event.is_set():
|
while self.running and not self._stop_event.is_set():
|
||||||
ret, frame = self.cap.read()
|
ret, frame = self.cap.read()
|
||||||
if not ret:
|
if not ret:
|
||||||
@ -104,12 +105,21 @@ class CameraSource:
|
|||||||
self.initialize()
|
self.initialize()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
frame_count += 1
|
||||||
|
# Debug: Log camera activity every 100 frames
|
||||||
|
if frame_count % 100 == 0:
|
||||||
|
print(f"[Camera] Captured {frame_count} frames")
|
||||||
|
|
||||||
if self.data_callback:
|
if self.data_callback:
|
||||||
try:
|
try:
|
||||||
# Assuming the callback is thread-safe or handles its own locking
|
# Assuming the callback is thread-safe or handles its own locking
|
||||||
self.data_callback(frame)
|
self.data_callback(frame)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error in data_callback: {e}")
|
print(f"Error in data_callback: {e}")
|
||||||
|
# If callback fails repeatedly, camera might need to stop
|
||||||
|
if frame_count > 10: # Allow some initial failures
|
||||||
|
print("Too many callback failures, stopping camera")
|
||||||
|
break
|
||||||
|
|
||||||
if self.frame_callback:
|
if self.frame_callback:
|
||||||
try:
|
try:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user