Masonmason ee4d1a3e4a Add comprehensive TODO planning and new camera/video source implementations
- Add detailed TODO.md with complete project roadmap and implementation priorities
- Implement CameraSource class with multi-camera support and real-time capture
- Add VideoFileSource class with batch processing and frame control capabilities
- Create foundation for complete input/output data flow integration
- Document current auto-resize preprocessing implementation status
- Establish clear development phases and key missing components

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-07-16 23:19:00 +08:00

501 lines
16 KiB
Python

"""
Camera input source for the Cluster4NPU inference pipeline.
This module provides camera input capabilities with support for multiple cameras,
resolution configuration, and seamless integration with the InferencePipeline.
"""
import cv2
import numpy as np
import threading
import time
from typing import Optional, Callable, Tuple, Dict, Any
from dataclasses import dataclass
from abc import ABC, abstractmethod
@dataclass
class CameraConfig:
"""Configuration for camera input source."""
camera_index: int = 0
width: int = 640
height: int = 480
fps: int = 30
format: str = 'BGR'
auto_exposure: bool = True
brightness: float = 0.5
contrast: float = 0.5
saturation: float = 0.5
class DataSourceBase(ABC):
"""Abstract base class for data sources."""
@abstractmethod
def start(self) -> bool:
"""Start the data source."""
pass
@abstractmethod
def stop(self) -> None:
"""Stop the data source."""
pass
@abstractmethod
def is_running(self) -> bool:
"""Check if the data source is running."""
pass
@abstractmethod
def get_frame(self) -> Optional[np.ndarray]:
"""Get the next frame from the source."""
pass
class CameraSource(DataSourceBase):
"""
Camera input source for real-time video capture.
Features:
- Multiple camera index support
- Resolution and FPS configuration
- Format conversion (BGR → model input format)
- Error handling for camera disconnection
- Thread-safe frame capture
"""
def __init__(self, config: CameraConfig, frame_callback: Optional[Callable[[np.ndarray], None]] = None):
"""
Initialize camera source.
Args:
config: Camera configuration
frame_callback: Optional callback for each captured frame
"""
self.config = config
self.frame_callback = frame_callback
# Camera capture object
self.cap: Optional[cv2.VideoCapture] = None
# Threading control
self._capture_thread: Optional[threading.Thread] = None
self._stop_event = threading.Event()
self._frame_lock = threading.Lock()
# Current frame storage
self._current_frame: Optional[np.ndarray] = None
self._frame_count = 0
self._fps_counter = 0
self._last_fps_time = time.time()
self._actual_fps = 0.0
# Error handling
self._connection_lost = False
self._last_error: Optional[str] = None
def start(self) -> bool:
"""
Start camera capture.
Returns:
bool: True if camera started successfully, False otherwise
"""
if self.is_running():
return True
try:
# Initialize camera
self.cap = cv2.VideoCapture(self.config.camera_index)
if not self.cap.isOpened():
self._last_error = f"Failed to open camera {self.config.camera_index}"
return False
# Configure camera properties
self._configure_camera()
# Test camera capture
ret, frame = self.cap.read()
if not ret or frame is None:
self._last_error = "Failed to read initial frame from camera"
self.cap.release()
self.cap = None
return False
print(f"[CameraSource] Camera {self.config.camera_index} opened successfully")
print(f"[CameraSource] Resolution: {self.config.width}x{self.config.height}, FPS: {self.config.fps}")
# Start capture thread
self._stop_event.clear()
self._connection_lost = False
self._capture_thread = threading.Thread(target=self._capture_loop, daemon=True)
self._capture_thread.start()
return True
except Exception as e:
self._last_error = f"Camera initialization error: {str(e)}"
if self.cap:
self.cap.release()
self.cap = None
return False
def stop(self) -> None:
"""Stop camera capture."""
if not self.is_running():
return
print("[CameraSource] Stopping camera capture...")
# Signal stop
self._stop_event.set()
# Wait for capture thread to finish
if self._capture_thread and self._capture_thread.is_alive():
self._capture_thread.join(timeout=2.0)
# Release camera
if self.cap:
self.cap.release()
self.cap = None
# Clear current frame
with self._frame_lock:
self._current_frame = None
print("[CameraSource] Camera capture stopped")
def is_running(self) -> bool:
"""Check if camera is currently running."""
return (self.cap is not None and
self.cap.isOpened() and
self._capture_thread is not None and
self._capture_thread.is_alive() and
not self._stop_event.is_set())
def get_frame(self) -> Optional[np.ndarray]:
"""
Get the latest captured frame.
Returns:
Optional[np.ndarray]: Latest frame or None if no frame available
"""
with self._frame_lock:
if self._current_frame is not None:
return self._current_frame.copy()
return None
def get_stats(self) -> Dict[str, Any]:
"""
Get camera statistics.
Returns:
Dict[str, Any]: Statistics including FPS, frame count, etc.
"""
return {
'frame_count': self._frame_count,
'actual_fps': self._actual_fps,
'target_fps': self.config.fps,
'resolution': (self.config.width, self.config.height),
'camera_index': self.config.camera_index,
'connection_lost': self._connection_lost,
'last_error': self._last_error,
'is_running': self.is_running()
}
def _configure_camera(self) -> None:
"""Configure camera properties."""
if not self.cap:
return
# Set resolution
self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, self.config.width)
self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, self.config.height)
# Set FPS
self.cap.set(cv2.CAP_PROP_FPS, self.config.fps)
# Set other properties
if hasattr(cv2, 'CAP_PROP_AUTO_EXPOSURE'):
self.cap.set(cv2.CAP_PROP_AUTO_EXPOSURE, 1 if self.config.auto_exposure else 0)
self.cap.set(cv2.CAP_PROP_BRIGHTNESS, self.config.brightness)
self.cap.set(cv2.CAP_PROP_CONTRAST, self.config.contrast)
self.cap.set(cv2.CAP_PROP_SATURATION, self.config.saturation)
# Verify actual settings
actual_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
actual_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
actual_fps = self.cap.get(cv2.CAP_PROP_FPS)
print(f"[CameraSource] Actual resolution: {actual_width}x{actual_height}, FPS: {actual_fps}")
def _capture_loop(self) -> None:
"""Main capture loop running in separate thread."""
print("[CameraSource] Capture loop started")
frame_interval = 1.0 / self.config.fps
last_capture_time = time.time()
while not self._stop_event.is_set():
try:
# Control frame rate
current_time = time.time()
time_since_last = current_time - last_capture_time
if time_since_last < frame_interval:
sleep_time = frame_interval - time_since_last
time.sleep(sleep_time)
continue
last_capture_time = current_time
# Capture frame
if not self.cap or not self.cap.isOpened():
self._connection_lost = True
break
ret, frame = self.cap.read()
if not ret or frame is None:
print("[CameraSource] Failed to read frame from camera")
self._connection_lost = True
break
# Update frame
with self._frame_lock:
self._current_frame = frame
# Update statistics
self._frame_count += 1
self._fps_counter += 1
# Calculate actual FPS
if current_time - self._last_fps_time >= 1.0:
self._actual_fps = self._fps_counter / (current_time - self._last_fps_time)
self._fps_counter = 0
self._last_fps_time = current_time
# Call frame callback if provided
if self.frame_callback:
try:
self.frame_callback(frame)
except Exception as e:
print(f"[CameraSource] Frame callback error: {e}")
except Exception as e:
print(f"[CameraSource] Capture loop error: {e}")
self._last_error = str(e)
time.sleep(0.1) # Brief pause before retrying
print("[CameraSource] Capture loop ended")
def __enter__(self):
"""Context manager entry."""
if not self.start():
raise RuntimeError(f"Failed to start camera: {self._last_error}")
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
self.stop()
class CameraPipelineFeeder:
"""
Helper class to feed camera frames to InferencePipeline.
This class bridges the CameraSource and InferencePipeline,
handling frame format conversion and pipeline data feeding.
"""
def __init__(self, camera_source: CameraSource, pipeline, feed_rate: float = 30.0):
"""
Initialize camera pipeline feeder.
Args:
camera_source: CameraSource instance
pipeline: InferencePipeline instance
feed_rate: Rate at which to feed frames to pipeline (FPS)
"""
self.camera_source = camera_source
self.pipeline = pipeline
self.feed_rate = feed_rate
# Threading control
self._feed_thread: Optional[threading.Thread] = None
self._stop_event = threading.Event()
self._is_feeding = False
# Statistics
self._frames_fed = 0
self._last_feed_time = 0.0
def start_feeding(self) -> bool:
"""
Start feeding camera frames to pipeline.
Returns:
bool: True if feeding started successfully
"""
if self._is_feeding:
return True
if not self.camera_source.is_running():
print("[CameraPipelineFeeder] Camera is not running")
return False
print("[CameraPipelineFeeder] Starting frame feeding...")
self._stop_event.clear()
self._is_feeding = True
self._feed_thread = threading.Thread(target=self._feed_loop, daemon=True)
self._feed_thread.start()
return True
def stop_feeding(self) -> None:
"""Stop feeding frames to pipeline."""
if not self._is_feeding:
return
print("[CameraPipelineFeeder] Stopping frame feeding...")
self._stop_event.set()
self._is_feeding = False
if self._feed_thread and self._feed_thread.is_alive():
self._feed_thread.join(timeout=2.0)
print("[CameraPipelineFeeder] Frame feeding stopped")
def _feed_loop(self) -> None:
"""Main feeding loop."""
feed_interval = 1.0 / self.feed_rate
last_feed_time = time.time()
while not self._stop_event.is_set():
try:
current_time = time.time()
# Control feed rate
if current_time - last_feed_time < feed_interval:
time.sleep(0.001) # Small sleep to prevent busy waiting
continue
# Get frame from camera
frame = self.camera_source.get_frame()
if frame is None:
time.sleep(0.01)
continue
# Feed frame to pipeline
try:
from InferencePipeline import PipelineData
pipeline_data = PipelineData(
data=frame,
metadata={
'source': 'camera',
'camera_index': self.camera_source.config.camera_index,
'timestamp': current_time,
'frame_id': self._frames_fed
}
)
# Put data into pipeline
self.pipeline.put_data(pipeline_data)
self._frames_fed += 1
last_feed_time = current_time
except Exception as e:
print(f"[CameraPipelineFeeder] Error feeding frame to pipeline: {e}")
time.sleep(0.1)
except Exception as e:
print(f"[CameraPipelineFeeder] Feed loop error: {e}")
time.sleep(0.1)
def get_stats(self) -> Dict[str, Any]:
"""Get feeding statistics."""
return {
'frames_fed': self._frames_fed,
'feed_rate': self.feed_rate,
'is_feeding': self._is_feeding,
'camera_stats': self.camera_source.get_stats()
}
def list_available_cameras() -> Dict[int, Dict[str, Any]]:
"""
List all available camera devices.
Returns:
Dict[int, Dict[str, Any]]: Dictionary of camera index to camera info
"""
cameras = {}
for i in range(10): # Check first 10 camera indices
cap = cv2.VideoCapture(i)
if cap.isOpened():
# Get camera properties
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
cameras[i] = {
'index': i,
'width': width,
'height': height,
'fps': fps,
'backend': cap.getBackendName() if hasattr(cap, 'getBackendName') else 'Unknown'
}
cap.release()
return cameras
# Example usage and testing
if __name__ == "__main__":
# List available cameras
print("Available cameras:")
cameras = list_available_cameras()
for idx, info in cameras.items():
print(f" Camera {idx}: {info['width']}x{info['height']} @ {info['fps']} FPS ({info['backend']})")
if not cameras:
print("No cameras found!")
exit(1)
# Test camera capture
config = CameraConfig(
camera_index=0,
width=640,
height=480,
fps=30
)
def frame_callback(frame):
print(f"Frame captured: {frame.shape}")
camera = CameraSource(config, frame_callback)
try:
if camera.start():
print("Camera started successfully")
# Capture for 5 seconds
time.sleep(5)
# Print statistics
stats = camera.get_stats()
print(f"Statistics: {stats}")
else:
print("Failed to start camera")
finally:
camera.stop()