jim800121chen 3f0175f1a9 feat(local-agent): Phase 0.5 visionA Agent — Wails 桌面 + tunnel client + 配對 UI
從 local-tool 複製出獨立的「visionA Agent」桌面應用(A3 純橋樑:
tunnel client + 配對 UI + 設定,不開 HTTP port、不做本機裝置/推論 UI)。
Bundle ID 與 local-tool 不同(com.innovedus.visiona-agent vs visiona-local),
雙 app 可共存。fork 後不主動 sync,需要時手動 cherry-pick。

Backend / Wails Go(AB1-AB13):
- internal/tunnel:6 狀態機(Idle/Connecting/Connected/Reconnecting/Failed/Stopped)
  + Pair/Unpair/Reconnect/Disconnect binding + ClientHooks event
- internal/auth:encrypted file token store(AES-GCM + scrypt + machineID
  fallback salt + 13 tests)
- internal/config:YAML validation + atomic write + 11 tests
- internal/log:ring buffer + ExportLog 升級 zip
- visionA-backend /api/pairing/exchange:SessionTokenStore + 17 new tests
- 三平台 build 驗證(macOS DMG 160 MB / Windows EXE / Linux AppImage)
- end-to-end 5 milestone 全綠(pairing → tunnel → forward → reuse 防護
  → tunnel drop failover)

Frontend / Next.js(AF1-AF7,沿用 visionA-frontend 基礎):
- AppShell + Header + TabNav(StatusView / PairView / SettingsView 三 tab)
- ConnectionStatusBadge 5 種狀態
- TokenInput regex 驗證 + 7 種錯誤 + 0.5s auto-switch 到狀態頁
- 設定頁 4 區塊(含重新配對 AlertDialog)
- agent-api.ts 封裝 Wails bindings(mock/real 雙實作)+ 90 tests

Phase 0.7 review-driven fix(Round 2):
- A1 Session fixation 防護(RotateSessionID)
- A3 mock pairing 預設改 false(必須明確 opt-in)+ startup log
- A4 Pair 失敗後 state 清理矩陣(exchange/Save/Start fail 各自終態)
- A5 Pair/Unpair/Reconnect lifecycleMu + 50 goroutine race test
- F1 重新配對次按鈕 / F2 PairView Esc cancel / F3 Wails BrowserOpenURL
  / F4 Settings draft 持久 + 未儲存 badge

驗證:agent backend go test -race -count=3 ./... 4 packages 全綠 /
agent frontend pnpm test 119 tests 全綠

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 11:22:01 +08:00

189 lines
4.5 KiB
Go

package camera
import (
"bufio"
"fmt"
"io"
"os/exec"
"runtime"
"sync"
)
// FFmpegCamera captures webcam frames using ffmpeg subprocess.
// Supports macOS (AVFoundation) and Windows (DirectShow).
// ffmpeg outputs a continuous MJPEG stream to stdout which is parsed
// by scanning for JPEG SOI (0xFFD8) and EOI (0xFFD9) markers.
type FFmpegCamera struct {
cmd *exec.Cmd
stdout io.ReadCloser
latestFrame []byte
mu sync.Mutex
done chan struct{}
err error
}
// NewFFmpegCamera starts an ffmpeg process to capture from the given camera.
// On macOS, cameraIndex is used (e.g. 0 for first camera).
// On Windows, cameraName from device detection is used; cameraIndex is ignored
// unless no name is provided.
func NewFFmpegCamera(cameraIndex, width, height, framerate int) (*FFmpegCamera, error) {
return NewFFmpegCameraWithName(cameraIndex, "", width, height, framerate)
}
// NewFFmpegCameraWithName starts ffmpeg with explicit camera name (needed for Windows dshow).
func NewFFmpegCameraWithName(cameraIndex int, cameraName string, width, height, framerate int) (*FFmpegCamera, error) {
args := buildCaptureArgs(cameraIndex, cameraName, width, height, framerate)
cmd := exec.Command("ffmpeg", args...)
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, fmt.Errorf("failed to get stdout pipe: %w", err)
}
// Suppress ffmpeg's stderr banner/logs
cmd.Stderr = nil
if err := cmd.Start(); err != nil {
return nil, fmt.Errorf("failed to start ffmpeg: %w", err)
}
cam := &FFmpegCamera{
cmd: cmd,
stdout: stdout,
done: make(chan struct{}),
}
go cam.readLoop()
return cam, nil
}
// buildCaptureArgs returns the ffmpeg arguments for the current OS.
func buildCaptureArgs(cameraIndex int, cameraName string, width, height, framerate int) []string {
videoSize := fmt.Sprintf("%dx%d", width, height)
fps := fmt.Sprintf("%d", framerate)
switch runtime.GOOS {
case "windows":
// DirectShow on Windows: -f dshow -i video="Camera Name"
inputName := cameraName
if inputName == "" {
// Fallback: try to detect first camera
devices := ListFFmpegDevices()
if len(devices) > 0 {
inputName = devices[0].Name
} else {
inputName = "Integrated Camera"
}
}
return []string{
"-f", "dshow",
"-framerate", fps,
"-video_size", videoSize,
"-i", fmt.Sprintf("video=%s", inputName),
"-f", "image2pipe",
"-vcodec", "mjpeg",
"-q:v", "5",
"-an",
"-",
}
default:
// AVFoundation on macOS: -f avfoundation -i "index:none"
return []string{
"-f", "avfoundation",
"-framerate", fps,
"-video_size", videoSize,
"-i", fmt.Sprintf("%d:none", cameraIndex),
"-f", "image2pipe",
"-vcodec", "mjpeg",
"-q:v", "5",
"-an",
"-",
}
}
}
// readLoop continuously reads ffmpeg's stdout and extracts JPEG frames.
func (c *FFmpegCamera) readLoop() {
defer close(c.done)
reader := bufio.NewReaderSize(c.stdout, 1024*1024) // 1MB buffer
buf := make([]byte, 0, 512*1024) // 512KB initial frame buffer
inFrame := false
for {
b, err := reader.ReadByte()
if err != nil {
c.mu.Lock()
c.err = fmt.Errorf("ffmpeg stream ended: %w", err)
c.mu.Unlock()
return
}
if !inFrame {
// Look for SOI marker: 0xFF 0xD8
if b == 0xFF {
next, err := reader.ReadByte()
if err != nil {
c.mu.Lock()
c.err = fmt.Errorf("ffmpeg stream ended: %w", err)
c.mu.Unlock()
return
}
if next == 0xD8 {
// Start of JPEG
buf = buf[:0]
buf = append(buf, 0xFF, 0xD8)
inFrame = true
}
}
continue
}
// Inside a frame, collect bytes
buf = append(buf, b)
// Look for EOI marker: 0xFF 0xD9
if b == 0xD9 && len(buf) >= 2 && buf[len(buf)-2] == 0xFF {
// Complete JPEG frame
frame := make([]byte, len(buf))
copy(frame, buf)
c.mu.Lock()
c.latestFrame = frame
c.mu.Unlock()
inFrame = false
}
}
}
// ReadFrame returns the most recently captured JPEG frame.
func (c *FFmpegCamera) ReadFrame() ([]byte, error) {
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return nil, c.err
}
if c.latestFrame == nil {
return nil, fmt.Errorf("no frame available yet")
}
// Return a copy to avoid data races
frame := make([]byte, len(c.latestFrame))
copy(frame, c.latestFrame)
return frame, nil
}
// Close stops the ffmpeg process and cleans up resources.
func (c *FFmpegCamera) Close() error {
if c.cmd != nil && c.cmd.Process != nil {
_ = c.cmd.Process.Kill()
_ = c.cmd.Wait()
}
<-c.done
return nil
}