local-tool/: visionA-local desktop app
- M1: Wails shell + Go server + Next.js UI + Mock mode (macOS dmg ready)
- M2: i18n (zh-TW/en) + Settings 4-tab refactor
- M3: Embedded Python 3.12 runtime (python-build-standalone) + KneronPLUS wheels
- M4: Windows Inno Setup script (build on Windows runner)
- M5: Linux AppImage script + udev rule (build on Linux runner)
- M6: ffmpeg (GPL, pending legal review) + yt-dlp bundled
- Lifecycle: watchServer health check, fatal native dialog,
Wails IPC raise endpoint, stale process cleanup
.autoflow/: full PRD / Design Spec / Architecture / Testing docs
(4 rounds tri-party discussion + cross review)
.github/workflows/: macOS / Windows / Linux build CI
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
189 lines
4.5 KiB
Go
189 lines
4.5 KiB
Go
package camera
|
|
|
|
import (
|
|
"bufio"
|
|
"fmt"
|
|
"io"
|
|
"os/exec"
|
|
"runtime"
|
|
"sync"
|
|
)
|
|
|
|
// FFmpegCamera captures webcam frames using ffmpeg subprocess.
|
|
// Supports macOS (AVFoundation) and Windows (DirectShow).
|
|
// ffmpeg outputs a continuous MJPEG stream to stdout which is parsed
|
|
// by scanning for JPEG SOI (0xFFD8) and EOI (0xFFD9) markers.
|
|
type FFmpegCamera struct {
|
|
cmd *exec.Cmd
|
|
stdout io.ReadCloser
|
|
latestFrame []byte
|
|
mu sync.Mutex
|
|
done chan struct{}
|
|
err error
|
|
}
|
|
|
|
// NewFFmpegCamera starts an ffmpeg process to capture from the given camera.
|
|
// On macOS, cameraIndex is used (e.g. 0 for first camera).
|
|
// On Windows, cameraName from device detection is used; cameraIndex is ignored
|
|
// unless no name is provided.
|
|
func NewFFmpegCamera(cameraIndex, width, height, framerate int) (*FFmpegCamera, error) {
|
|
return NewFFmpegCameraWithName(cameraIndex, "", width, height, framerate)
|
|
}
|
|
|
|
// NewFFmpegCameraWithName starts ffmpeg with explicit camera name (needed for Windows dshow).
|
|
func NewFFmpegCameraWithName(cameraIndex int, cameraName string, width, height, framerate int) (*FFmpegCamera, error) {
|
|
args := buildCaptureArgs(cameraIndex, cameraName, width, height, framerate)
|
|
|
|
cmd := exec.Command("ffmpeg", args...)
|
|
|
|
stdout, err := cmd.StdoutPipe()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get stdout pipe: %w", err)
|
|
}
|
|
|
|
// Suppress ffmpeg's stderr banner/logs
|
|
cmd.Stderr = nil
|
|
|
|
if err := cmd.Start(); err != nil {
|
|
return nil, fmt.Errorf("failed to start ffmpeg: %w", err)
|
|
}
|
|
|
|
cam := &FFmpegCamera{
|
|
cmd: cmd,
|
|
stdout: stdout,
|
|
done: make(chan struct{}),
|
|
}
|
|
|
|
go cam.readLoop()
|
|
|
|
return cam, nil
|
|
}
|
|
|
|
// buildCaptureArgs returns the ffmpeg arguments for the current OS.
|
|
func buildCaptureArgs(cameraIndex int, cameraName string, width, height, framerate int) []string {
|
|
videoSize := fmt.Sprintf("%dx%d", width, height)
|
|
fps := fmt.Sprintf("%d", framerate)
|
|
|
|
switch runtime.GOOS {
|
|
case "windows":
|
|
// DirectShow on Windows: -f dshow -i video="Camera Name"
|
|
inputName := cameraName
|
|
if inputName == "" {
|
|
// Fallback: try to detect first camera
|
|
devices := ListFFmpegDevices()
|
|
if len(devices) > 0 {
|
|
inputName = devices[0].Name
|
|
} else {
|
|
inputName = "Integrated Camera"
|
|
}
|
|
}
|
|
return []string{
|
|
"-f", "dshow",
|
|
"-framerate", fps,
|
|
"-video_size", videoSize,
|
|
"-i", fmt.Sprintf("video=%s", inputName),
|
|
"-f", "image2pipe",
|
|
"-vcodec", "mjpeg",
|
|
"-q:v", "5",
|
|
"-an",
|
|
"-",
|
|
}
|
|
default:
|
|
// AVFoundation on macOS: -f avfoundation -i "index:none"
|
|
return []string{
|
|
"-f", "avfoundation",
|
|
"-framerate", fps,
|
|
"-video_size", videoSize,
|
|
"-i", fmt.Sprintf("%d:none", cameraIndex),
|
|
"-f", "image2pipe",
|
|
"-vcodec", "mjpeg",
|
|
"-q:v", "5",
|
|
"-an",
|
|
"-",
|
|
}
|
|
}
|
|
}
|
|
|
|
// readLoop continuously reads ffmpeg's stdout and extracts JPEG frames.
|
|
func (c *FFmpegCamera) readLoop() {
|
|
defer close(c.done)
|
|
|
|
reader := bufio.NewReaderSize(c.stdout, 1024*1024) // 1MB buffer
|
|
buf := make([]byte, 0, 512*1024) // 512KB initial frame buffer
|
|
inFrame := false
|
|
|
|
for {
|
|
b, err := reader.ReadByte()
|
|
if err != nil {
|
|
c.mu.Lock()
|
|
c.err = fmt.Errorf("ffmpeg stream ended: %w", err)
|
|
c.mu.Unlock()
|
|
return
|
|
}
|
|
|
|
if !inFrame {
|
|
// Look for SOI marker: 0xFF 0xD8
|
|
if b == 0xFF {
|
|
next, err := reader.ReadByte()
|
|
if err != nil {
|
|
c.mu.Lock()
|
|
c.err = fmt.Errorf("ffmpeg stream ended: %w", err)
|
|
c.mu.Unlock()
|
|
return
|
|
}
|
|
if next == 0xD8 {
|
|
// Start of JPEG
|
|
buf = buf[:0]
|
|
buf = append(buf, 0xFF, 0xD8)
|
|
inFrame = true
|
|
}
|
|
}
|
|
continue
|
|
}
|
|
|
|
// Inside a frame, collect bytes
|
|
buf = append(buf, b)
|
|
|
|
// Look for EOI marker: 0xFF 0xD9
|
|
if b == 0xD9 && len(buf) >= 2 && buf[len(buf)-2] == 0xFF {
|
|
// Complete JPEG frame
|
|
frame := make([]byte, len(buf))
|
|
copy(frame, buf)
|
|
|
|
c.mu.Lock()
|
|
c.latestFrame = frame
|
|
c.mu.Unlock()
|
|
|
|
inFrame = false
|
|
}
|
|
}
|
|
}
|
|
|
|
// ReadFrame returns the most recently captured JPEG frame.
|
|
func (c *FFmpegCamera) ReadFrame() ([]byte, error) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
|
|
if c.err != nil {
|
|
return nil, c.err
|
|
}
|
|
if c.latestFrame == nil {
|
|
return nil, fmt.Errorf("no frame available yet")
|
|
}
|
|
|
|
// Return a copy to avoid data races
|
|
frame := make([]byte, len(c.latestFrame))
|
|
copy(frame, c.latestFrame)
|
|
return frame, nil
|
|
}
|
|
|
|
// Close stops the ffmpeg process and cleans up resources.
|
|
func (c *FFmpegCamera) Close() error {
|
|
if c.cmd != nil && c.cmd.Process != nil {
|
|
_ = c.cmd.Process.Kill()
|
|
_ = c.cmd.Wait()
|
|
}
|
|
<-c.done
|
|
return nil
|
|
}
|