jim800121chen 8cd5751ce3 feat(local-tool): M8 重構 — Wails 控制台 + 瀏覽器 Web UI(R5 決策)
依 R5 五輪決策把 visionA-local 從「Wails 內嵌 Next.js」重構為「Wails
本機伺服器控制台 + 瀏覽器 Web UI」模式(類比 Docker Desktop / Ollama)。

程式碼變動
  - M8-1 砍 yt-dlp 全套(後端 resolver / URL handler / 前端 URL tab /
    Makefile vendor / installer / bootstrap / CI workflow,-555 行)
  - M8-2 砍 Mock 模式全套(driver/mock、mock_camera、Settings runtimeMode、
    VISIONA_MOCK 環境變數,-528 行)
  - M8-3 ffmpeg 從 GPL 切換到 LGPL 混合方案:Windows/Linux 用 BtbN 現成
    LGPL binary,macOS 自 build minimal decoder-only 進 git
    (vendor/ffmpeg/macos/ffmpeg 5.7MB + ffprobe 5.6MB,比 GPL 版省 85% 空間)
  - M8-4 Wails Server Controller:state machine、log ring buffer 2000 行、
    preferences.json atomic write、boot-id、Gin SkipPaths、shutdown 7+1 秒、
    notify_*.go 三平台 OS 通知、watchServer 改 Error state 不 os.Exit
  - M8-4b 啟動階段管線 R5-E:6 階段進度 event、20s soft / 60s hard timeout、
    stage 5/6 skip 規則、sentinel file、RestartStartupSequence 5 步驟
  - M8-5 Wails 控制台 vanilla HTML/JS/CSS(9 檔 ~2012 行)取代 M7-B splash:
    state 視覺、log panel、startup progress panel、Stage 6 manual CTA
    pulse、shutdown modal、Settings、Dark Mode、i18n 中英雙語
  - M8-6 上傳影片副檔名擴充(mp4/avi/mov/mpeg/mpg)
  - M8-7 Web UI Server Offline Overlay(role=alertdialog + focus trap +
    wsEverConnected 容錯 + Page Visibility)
  - M8-8 CORS middleware(127.0.0.1/localhost only + suffix attack 防護)+
    ws/origin.go 獨立 WebSocket CheckOrigin 避 package cycle
  - MAJ-4 server:shutdown-imminent WebSocket broadcast 機制
    (/ws/system endpoint + notifyShutdownImminent helper)
  - M8-9 Boot-ID + 瀏覽器 tab 自動重連(sessionStorage loop guard)

品質
  - ~105+ 新 unit test + race detector (-count=2) 全綠
  - 10 個 milestone 全部通過 Reviewer 審查
  - 三方 v2 + v2.1 文件(PRD / Design Spec / TDD)+ 交叉互審紀錄
    收錄在 .autoflow/

交付前待處理(M8-10)
  - 重跑 make payload-macos 把舊 GPL 77MB binary 換成新 LGPL
  - 三平台 end-to-end build 驗證

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-15 17:57:54 +08:00

623 lines
16 KiB
Go

package handlers
import (
"fmt"
"io"
"os"
"path/filepath"
"strconv"
"strings"
"time"
"visiona-local/server/internal/api/ws"
"visiona-local/server/internal/camera"
"visiona-local/server/internal/device"
"visiona-local/server/internal/driver"
"visiona-local/server/internal/inference"
"github.com/gin-gonic/gin"
)
type CameraHandler struct {
cameraMgr *camera.Manager
deviceMgr *device.Manager
inferenceSvc *inference.Service
wsHub *ws.Hub
streamer *camera.MJPEGStreamer
pipeline *camera.InferencePipeline
activeSource camera.FrameSource
sourceType camera.SourceType
// Video seek state — preserved across seek operations
videoPath string // original file path
videoFPS float64 // target FPS
videoInfo camera.VideoInfo // duration, total frames
activeDeviceID string // device ID for current video session
}
func NewCameraHandler(
cameraMgr *camera.Manager,
deviceMgr *device.Manager,
inferenceSvc *inference.Service,
wsHub *ws.Hub,
) *CameraHandler {
streamer := camera.NewMJPEGStreamer()
go streamer.Run()
return &CameraHandler{
cameraMgr: cameraMgr,
deviceMgr: deviceMgr,
inferenceSvc: inferenceSvc,
wsHub: wsHub,
streamer: streamer,
}
}
func (h *CameraHandler) ListCameras(c *gin.Context) {
cameras := h.cameraMgr.ListCameras()
c.JSON(200, gin.H{"success": true, "data": gin.H{"cameras": cameras}})
}
func (h *CameraHandler) StartPipeline(c *gin.Context) {
var req struct {
CameraID string `json:"cameraId"`
DeviceID string `json:"deviceId"`
Width int `json:"width"`
Height int `json:"height"`
}
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(400, gin.H{"success": false, "error": gin.H{"code": "BAD_REQUEST", "message": err.Error()}})
return
}
if req.Width == 0 {
req.Width = 640
}
if req.Height == 0 {
req.Height = 480
}
// Clean up any existing pipeline
h.stopActivePipeline()
// Open camera
if err := h.cameraMgr.Open(0, req.Width, req.Height); err != nil {
c.JSON(500, gin.H{"success": false, "error": gin.H{"code": "CAMERA_OPEN_FAILED", "message": err.Error()}})
return
}
// Get device driver
session, err := h.deviceMgr.GetDevice(req.DeviceID)
if err != nil {
c.JSON(404, gin.H{"success": false, "error": gin.H{"code": "DEVICE_NOT_FOUND", "message": err.Error()}})
return
}
// Create inference result channel
resultCh := make(chan *driver.InferenceResult, 10)
// Forward results to WebSocket, enriching with device ID
go func() {
room := "inference:" + req.DeviceID
for result := range resultCh {
result.DeviceID = req.DeviceID
h.wsHub.BroadcastToRoom(room, result)
}
}()
// Start pipeline with camera as source
h.activeSource = h.cameraMgr
h.sourceType = camera.SourceCamera
h.pipeline = camera.NewInferencePipeline(
h.cameraMgr,
camera.SourceCamera,
session.Driver,
h.streamer.FrameChannel(),
resultCh,
)
h.pipeline.Start()
streamURL := "/api/camera/stream"
c.JSON(200, gin.H{
"success": true,
"data": gin.H{
"streamUrl": streamURL,
"sourceType": "camera",
},
})
}
func (h *CameraHandler) StopPipeline(c *gin.Context) {
h.stopActivePipeline()
c.JSON(200, gin.H{"success": true})
}
func (h *CameraHandler) StreamMJPEG(c *gin.Context) {
h.streamer.ServeHTTP(c.Writer, c.Request)
}
// UploadImage handles image file upload for single-shot inference.
func (h *CameraHandler) UploadImage(c *gin.Context) {
h.stopActivePipeline()
deviceID := c.PostForm("deviceId")
if deviceID == "" {
c.JSON(400, gin.H{"success": false, "error": gin.H{"code": "BAD_REQUEST", "message": "deviceId is required"}})
return
}
file, header, err := c.Request.FormFile("file")
if err != nil {
c.JSON(400, gin.H{"success": false, "error": gin.H{"code": "BAD_REQUEST", "message": "file is required"}})
return
}
defer file.Close()
ext := strings.ToLower(filepath.Ext(header.Filename))
if ext != ".jpg" && ext != ".jpeg" && ext != ".png" {
c.JSON(400, gin.H{"success": false, "error": gin.H{"code": "BAD_REQUEST", "message": "only JPG/PNG files are supported"}})
return
}
// Save to temp file
tmpFile, err := os.CreateTemp("", "edge-ai-image-*"+ext)
if err != nil {
c.JSON(500, gin.H{"success": false, "error": gin.H{"code": "STORAGE_ERROR", "message": err.Error()}})
return
}
if _, err := io.Copy(tmpFile, file); err != nil {
tmpFile.Close()
os.Remove(tmpFile.Name())
c.JSON(500, gin.H{"success": false, "error": gin.H{"code": "STORAGE_ERROR", "message": err.Error()}})
return
}
tmpFile.Close()
// Create ImageSource
imgSource, err := camera.NewImageSource(tmpFile.Name())
if err != nil {
os.Remove(tmpFile.Name())
c.JSON(500, gin.H{"success": false, "error": gin.H{"code": "IMAGE_DECODE_FAILED", "message": err.Error()}})
return
}
// Get device driver
session, err := h.deviceMgr.GetDevice(deviceID)
if err != nil {
imgSource.Close()
c.JSON(404, gin.H{"success": false, "error": gin.H{"code": "DEVICE_NOT_FOUND", "message": err.Error()}})
return
}
resultCh := make(chan *driver.InferenceResult, 10)
go func() {
room := "inference:" + deviceID
for result := range resultCh {
result.DeviceID = deviceID
h.wsHub.BroadcastToRoom(room, result)
}
}()
h.activeSource = imgSource
h.sourceType = camera.SourceImage
h.pipeline = camera.NewInferencePipeline(
imgSource,
camera.SourceImage,
session.Driver,
h.streamer.FrameChannel(),
resultCh,
)
h.pipeline.Start()
// Clean up result channel after pipeline completes
go func() {
<-h.pipeline.Done()
close(resultCh)
}()
w, ht := imgSource.Dimensions()
streamURL := "/api/camera/stream"
c.JSON(200, gin.H{
"success": true,
"data": gin.H{
"streamUrl": streamURL,
"sourceType": "image",
"width": w,
"height": ht,
"filename": header.Filename,
},
})
}
// UploadVideo handles video file upload for frame-by-frame inference.
func (h *CameraHandler) UploadVideo(c *gin.Context) {
h.stopActivePipeline()
deviceID := c.PostForm("deviceId")
if deviceID == "" {
c.JSON(400, gin.H{"success": false, "error": gin.H{"code": "BAD_REQUEST", "message": "deviceId is required"}})
return
}
file, header, err := c.Request.FormFile("file")
if err != nil {
c.JSON(400, gin.H{"success": false, "error": gin.H{"code": "BAD_REQUEST", "message": "file is required"}})
return
}
defer file.Close()
ext := strings.ToLower(filepath.Ext(header.Filename))
if ext != ".mp4" && ext != ".avi" && ext != ".mov" && ext != ".mpeg" && ext != ".mpg" {
c.JSON(400, gin.H{"success": false, "error": gin.H{"code": "BAD_REQUEST", "message": "only MP4/AVI/MOV/MPEG/MPG files are supported"}})
return
}
// Save to temp file
tmpFile, err := os.CreateTemp("", "edge-ai-video-*"+ext)
if err != nil {
c.JSON(500, gin.H{"success": false, "error": gin.H{"code": "STORAGE_ERROR", "message": err.Error()}})
return
}
if _, err := io.Copy(tmpFile, file); err != nil {
tmpFile.Close()
os.Remove(tmpFile.Name())
c.JSON(500, gin.H{"success": false, "error": gin.H{"code": "STORAGE_ERROR", "message": err.Error()}})
return
}
tmpFile.Close()
// Probe video info (duration, frame count) before starting pipeline
videoInfo := camera.ProbeVideoInfo(tmpFile.Name(), 15)
// Create VideoSource
videoSource, err := camera.NewVideoSource(tmpFile.Name(), 15)
if err != nil {
os.Remove(tmpFile.Name())
c.JSON(500, gin.H{"success": false, "error": gin.H{"code": "VIDEO_DECODE_FAILED", "message": err.Error()}})
return
}
if videoInfo.TotalFrames > 0 {
videoSource.SetTotalFrames(videoInfo.TotalFrames)
}
// Get device driver
session, err := h.deviceMgr.GetDevice(deviceID)
if err != nil {
videoSource.Close()
c.JSON(404, gin.H{"success": false, "error": gin.H{"code": "DEVICE_NOT_FOUND", "message": err.Error()}})
return
}
resultCh := make(chan *driver.InferenceResult, 10)
go func() {
room := "inference:" + deviceID
for result := range resultCh {
result.DeviceID = deviceID
h.wsHub.BroadcastToRoom(room, result)
}
}()
h.activeSource = videoSource
h.sourceType = camera.SourceVideo
h.videoPath = tmpFile.Name()
h.videoFPS = 15
h.videoInfo = videoInfo
h.activeDeviceID = deviceID
h.pipeline = camera.NewInferencePipeline(
videoSource,
camera.SourceVideo,
session.Driver,
h.streamer.FrameChannel(),
resultCh,
)
h.pipeline.Start()
// Notify frontend when video playback completes
go func() {
<-h.pipeline.Done()
close(resultCh)
h.wsHub.BroadcastToRoom("inference:"+deviceID, map[string]interface{}{
"type": "pipeline_complete",
"sourceType": "video",
})
}()
streamURL := "/api/camera/stream"
c.JSON(200, gin.H{
"success": true,
"data": gin.H{
"streamUrl": streamURL,
"sourceType": "video",
"filename": header.Filename,
"totalFrames": videoInfo.TotalFrames,
"durationSeconds": videoInfo.DurationSec,
},
})
}
// UploadBatchImages handles multiple image files for sequential batch inference.
func (h *CameraHandler) UploadBatchImages(c *gin.Context) {
h.stopActivePipeline()
deviceID := c.PostForm("deviceId")
if deviceID == "" {
c.JSON(400, gin.H{"success": false, "error": gin.H{"code": "BAD_REQUEST", "message": "deviceId is required"}})
return
}
form, err := c.MultipartForm()
if err != nil {
c.JSON(400, gin.H{"success": false, "error": gin.H{"code": "BAD_REQUEST", "message": "multipart form required"}})
return
}
files := form.File["files"]
if len(files) == 0 {
c.JSON(400, gin.H{"success": false, "error": gin.H{"code": "BAD_REQUEST", "message": "at least one file is required"}})
return
}
if len(files) > 50 {
c.JSON(400, gin.H{"success": false, "error": gin.H{"code": "BAD_REQUEST", "message": "maximum 50 images per batch"}})
return
}
// Save all files to temp
filePaths := make([]string, 0, len(files))
filenames := make([]string, 0, len(files))
for _, fh := range files {
ext := strings.ToLower(filepath.Ext(fh.Filename))
if ext != ".jpg" && ext != ".jpeg" && ext != ".png" {
for _, fp := range filePaths {
os.Remove(fp)
}
c.JSON(400, gin.H{"success": false, "error": gin.H{
"code": "BAD_REQUEST",
"message": fmt.Sprintf("unsupported file: %s (only JPG/PNG)", fh.Filename),
}})
return
}
f, openErr := fh.Open()
if openErr != nil {
for _, fp := range filePaths {
os.Remove(fp)
}
c.JSON(500, gin.H{"success": false, "error": gin.H{"code": "STORAGE_ERROR", "message": openErr.Error()}})
return
}
tmpFile, tmpErr := os.CreateTemp("", "edge-ai-batch-*"+ext)
if tmpErr != nil {
f.Close()
for _, fp := range filePaths {
os.Remove(fp)
}
c.JSON(500, gin.H{"success": false, "error": gin.H{"code": "STORAGE_ERROR", "message": tmpErr.Error()}})
return
}
io.Copy(tmpFile, f)
tmpFile.Close()
f.Close()
filePaths = append(filePaths, tmpFile.Name())
filenames = append(filenames, fh.Filename)
}
// Create MultiImageSource
batchSource, err := camera.NewMultiImageSource(filePaths, filenames)
if err != nil {
for _, fp := range filePaths {
os.Remove(fp)
}
c.JSON(500, gin.H{"success": false, "error": gin.H{"code": "IMAGE_DECODE_FAILED", "message": err.Error()}})
return
}
// Get device driver
session, err := h.deviceMgr.GetDevice(deviceID)
if err != nil {
batchSource.Close()
c.JSON(404, gin.H{"success": false, "error": gin.H{"code": "DEVICE_NOT_FOUND", "message": err.Error()}})
return
}
batchID := fmt.Sprintf("batch-%d", time.Now().UnixNano())
resultCh := make(chan *driver.InferenceResult, 10)
go func() {
room := "inference:" + deviceID
for result := range resultCh {
result.DeviceID = deviceID
h.wsHub.BroadcastToRoom(room, result)
}
}()
h.activeSource = batchSource
h.sourceType = camera.SourceBatchImage
h.pipeline = camera.NewInferencePipeline(
batchSource,
camera.SourceBatchImage,
session.Driver,
h.streamer.FrameChannel(),
resultCh,
)
h.pipeline.Start()
// Notify frontend when batch completes
go func() {
<-h.pipeline.Done()
close(resultCh)
h.wsHub.BroadcastToRoom("inference:"+deviceID, map[string]interface{}{
"type": "pipeline_complete",
"sourceType": "batch_image",
"batchId": batchID,
})
}()
// Build image list for response
imageList := make([]gin.H, len(batchSource.Images()))
for i, entry := range batchSource.Images() {
imageList[i] = gin.H{
"index": i,
"filename": entry.Filename,
"width": entry.Width,
"height": entry.Height,
}
}
streamURL := "/api/camera/stream"
c.JSON(200, gin.H{
"success": true,
"data": gin.H{
"streamUrl": streamURL,
"sourceType": "batch_image",
"batchId": batchID,
"totalImages": len(files),
"images": imageList,
},
})
}
// GetBatchImageFrame serves a specific image from the active batch by index.
func (h *CameraHandler) GetBatchImageFrame(c *gin.Context) {
if h.sourceType != camera.SourceBatchImage || h.activeSource == nil {
c.JSON(404, gin.H{"success": false, "error": gin.H{"code": "NO_BATCH", "message": "no batch image source active"}})
return
}
indexStr := c.Param("index")
index, err := strconv.Atoi(indexStr)
if err != nil || index < 0 {
c.JSON(400, gin.H{"success": false, "error": gin.H{"code": "BAD_REQUEST", "message": "invalid index"}})
return
}
mis, ok := h.activeSource.(*camera.MultiImageSource)
if !ok {
c.JSON(500, gin.H{"success": false, "error": gin.H{"code": "INTERNAL_ERROR", "message": "source type mismatch"}})
return
}
jpegData, err := mis.GetImageByIndex(index)
if err != nil {
c.JSON(404, gin.H{"success": false, "error": gin.H{"code": "NOT_FOUND", "message": err.Error()}})
return
}
c.Data(200, "image/jpeg", jpegData)
}
// stopPipelineForSeek stops the pipeline and ffmpeg process but keeps the video file.
func (h *CameraHandler) stopPipelineForSeek() {
if h.pipeline != nil {
h.pipeline.Stop()
h.pipeline = nil
}
if h.activeSource != nil {
if vs, ok := h.activeSource.(*camera.VideoSource); ok {
vs.CloseWithoutRemove()
}
}
h.activeSource = nil
}
// stopActivePipeline stops the current pipeline and cleans up resources.
func (h *CameraHandler) stopActivePipeline() {
if h.pipeline != nil {
h.pipeline.Stop()
h.pipeline = nil
}
// Only close non-camera sources (camera is managed by cameraMgr)
if h.activeSource != nil && h.sourceType != camera.SourceCamera {
h.activeSource.Close()
}
if h.sourceType == camera.SourceCamera {
h.cameraMgr.Close()
}
h.activeSource = nil
h.sourceType = ""
h.videoPath = ""
h.activeDeviceID = ""
}
// SeekVideo seeks to a specific position in the current video and restarts inference.
func (h *CameraHandler) SeekVideo(c *gin.Context) {
var req struct {
TimeSeconds float64 `json:"timeSeconds"`
}
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(400, gin.H{"success": false, "error": gin.H{"code": "BAD_REQUEST", "message": err.Error()}})
return
}
if h.videoPath == "" || h.sourceType != camera.SourceVideo {
c.JSON(400, gin.H{"success": false, "error": gin.H{"code": "NO_VIDEO", "message": "no video is currently playing"}})
return
}
// Clamp seek time
if req.TimeSeconds < 0 {
req.TimeSeconds = 0
}
if h.videoInfo.DurationSec > 0 && req.TimeSeconds > h.videoInfo.DurationSec {
req.TimeSeconds = h.videoInfo.DurationSec
}
// Stop current pipeline without deleting the video file
h.stopPipelineForSeek()
// Create new VideoSource with seek position
videoSource, err := camera.NewVideoSourceWithSeek(h.videoPath, h.videoFPS, req.TimeSeconds)
if err != nil {
c.JSON(500, gin.H{"success": false, "error": gin.H{"code": "SEEK_FAILED", "message": err.Error()}})
return
}
if h.videoInfo.TotalFrames > 0 {
videoSource.SetTotalFrames(h.videoInfo.TotalFrames)
}
// Get device driver
session, err := h.deviceMgr.GetDevice(h.activeDeviceID)
if err != nil {
videoSource.Close()
c.JSON(404, gin.H{"success": false, "error": gin.H{"code": "DEVICE_NOT_FOUND", "message": err.Error()}})
return
}
// Calculate frame offset from seek position
frameOffset := int(req.TimeSeconds * h.videoFPS)
resultCh := make(chan *driver.InferenceResult, 10)
go func() {
room := "inference:" + h.activeDeviceID
for result := range resultCh {
result.DeviceID = h.activeDeviceID
h.wsHub.BroadcastToRoom(room, result)
}
}()
h.activeSource = videoSource
h.pipeline = camera.NewInferencePipelineWithOffset(
videoSource,
camera.SourceVideo,
session.Driver,
h.streamer.FrameChannel(),
resultCh,
frameOffset,
)
h.pipeline.Start()
go func() {
<-h.pipeline.Done()
close(resultCh)
h.wsHub.BroadcastToRoom("inference:"+h.activeDeviceID, map[string]interface{}{
"type": "pipeline_complete",
"sourceType": "video",
})
}()
c.JSON(200, gin.H{
"success": true,
"data": gin.H{
"seekTo": req.TimeSeconds,
"frameOffset": frameOffset,
},
})
}