jim800121chen c54f16fca0 Initial commit: visionA monorepo with local-tool subproject
local-tool/: visionA-local desktop app
- M1: Wails shell + Go server + Next.js UI + Mock mode (macOS dmg ready)
- M2: i18n (zh-TW/en) + Settings 4-tab refactor
- M3: Embedded Python 3.12 runtime (python-build-standalone) + KneronPLUS wheels
- M4: Windows Inno Setup script (build on Windows runner)
- M5: Linux AppImage script + udev rule (build on Linux runner)
- M6: ffmpeg (GPL, pending legal review) + yt-dlp bundled
- Lifecycle: watchServer health check, fatal native dialog,
            Wails IPC raise endpoint, stale process cleanup

.autoflow/: full PRD / Design Spec / Architecture / Testing docs
            (4 rounds tri-party discussion + cross review)
.github/workflows/: macOS / Windows / Linux build CI

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-11 22:10:38 +08:00

189 lines
4.5 KiB
Go

package camera
import (
"bufio"
"fmt"
"io"
"os/exec"
"runtime"
"sync"
)
// FFmpegCamera captures webcam frames using ffmpeg subprocess.
// Supports macOS (AVFoundation) and Windows (DirectShow).
// ffmpeg outputs a continuous MJPEG stream to stdout which is parsed
// by scanning for JPEG SOI (0xFFD8) and EOI (0xFFD9) markers.
type FFmpegCamera struct {
cmd *exec.Cmd
stdout io.ReadCloser
latestFrame []byte
mu sync.Mutex
done chan struct{}
err error
}
// NewFFmpegCamera starts an ffmpeg process to capture from the given camera.
// On macOS, cameraIndex is used (e.g. 0 for first camera).
// On Windows, cameraName from device detection is used; cameraIndex is ignored
// unless no name is provided.
func NewFFmpegCamera(cameraIndex, width, height, framerate int) (*FFmpegCamera, error) {
return NewFFmpegCameraWithName(cameraIndex, "", width, height, framerate)
}
// NewFFmpegCameraWithName starts ffmpeg with explicit camera name (needed for Windows dshow).
func NewFFmpegCameraWithName(cameraIndex int, cameraName string, width, height, framerate int) (*FFmpegCamera, error) {
args := buildCaptureArgs(cameraIndex, cameraName, width, height, framerate)
cmd := exec.Command("ffmpeg", args...)
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, fmt.Errorf("failed to get stdout pipe: %w", err)
}
// Suppress ffmpeg's stderr banner/logs
cmd.Stderr = nil
if err := cmd.Start(); err != nil {
return nil, fmt.Errorf("failed to start ffmpeg: %w", err)
}
cam := &FFmpegCamera{
cmd: cmd,
stdout: stdout,
done: make(chan struct{}),
}
go cam.readLoop()
return cam, nil
}
// buildCaptureArgs returns the ffmpeg arguments for the current OS.
func buildCaptureArgs(cameraIndex int, cameraName string, width, height, framerate int) []string {
videoSize := fmt.Sprintf("%dx%d", width, height)
fps := fmt.Sprintf("%d", framerate)
switch runtime.GOOS {
case "windows":
// DirectShow on Windows: -f dshow -i video="Camera Name"
inputName := cameraName
if inputName == "" {
// Fallback: try to detect first camera
devices := ListFFmpegDevices()
if len(devices) > 0 {
inputName = devices[0].Name
} else {
inputName = "Integrated Camera"
}
}
return []string{
"-f", "dshow",
"-framerate", fps,
"-video_size", videoSize,
"-i", fmt.Sprintf("video=%s", inputName),
"-f", "image2pipe",
"-vcodec", "mjpeg",
"-q:v", "5",
"-an",
"-",
}
default:
// AVFoundation on macOS: -f avfoundation -i "index:none"
return []string{
"-f", "avfoundation",
"-framerate", fps,
"-video_size", videoSize,
"-i", fmt.Sprintf("%d:none", cameraIndex),
"-f", "image2pipe",
"-vcodec", "mjpeg",
"-q:v", "5",
"-an",
"-",
}
}
}
// readLoop continuously reads ffmpeg's stdout and extracts JPEG frames.
func (c *FFmpegCamera) readLoop() {
defer close(c.done)
reader := bufio.NewReaderSize(c.stdout, 1024*1024) // 1MB buffer
buf := make([]byte, 0, 512*1024) // 512KB initial frame buffer
inFrame := false
for {
b, err := reader.ReadByte()
if err != nil {
c.mu.Lock()
c.err = fmt.Errorf("ffmpeg stream ended: %w", err)
c.mu.Unlock()
return
}
if !inFrame {
// Look for SOI marker: 0xFF 0xD8
if b == 0xFF {
next, err := reader.ReadByte()
if err != nil {
c.mu.Lock()
c.err = fmt.Errorf("ffmpeg stream ended: %w", err)
c.mu.Unlock()
return
}
if next == 0xD8 {
// Start of JPEG
buf = buf[:0]
buf = append(buf, 0xFF, 0xD8)
inFrame = true
}
}
continue
}
// Inside a frame, collect bytes
buf = append(buf, b)
// Look for EOI marker: 0xFF 0xD9
if b == 0xD9 && len(buf) >= 2 && buf[len(buf)-2] == 0xFF {
// Complete JPEG frame
frame := make([]byte, len(buf))
copy(frame, buf)
c.mu.Lock()
c.latestFrame = frame
c.mu.Unlock()
inFrame = false
}
}
}
// ReadFrame returns the most recently captured JPEG frame.
func (c *FFmpegCamera) ReadFrame() ([]byte, error) {
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return nil, c.err
}
if c.latestFrame == nil {
return nil, fmt.Errorf("no frame available yet")
}
// Return a copy to avoid data races
frame := make([]byte, len(c.latestFrame))
copy(frame, c.latestFrame)
return frame, nil
}
// Close stops the ffmpeg process and cleans up resources.
func (c *FFmpegCamera) Close() error {
if c.cmd != nil && c.cmd.Process != nil {
_ = c.cmd.Process.Kill()
_ = c.cmd.Wait()
}
<-c.done
return nil
}