jim800121chen ff9bbc81ed feat(local-tool): M9-4.5 — server SIGTERM + Wails OnBeforeClose firmware-aware shutdown
A 階段尾端 milestone、雙層防護避免使用者在 firmware 升級進行中關閉 app 造成 dongle brick。

Server 端 (3 改):
- main.go: SIGTERM/SIGINT goroutine 加 firmware-aware preamble
- server/internal/firmware/shutdown.go: 新 211 行(AwaitActiveTasksOrTimeout + 3 interfaces + shutdownBroadcastTask minimal struct + toBroadcastTasks helper)
- server/internal/firmware/shutdown_test.go: 新 384 行、8 tests

Wails 端 (3 新 + 2 改):
- visiona-local/main.go: OnBeforeClose 從 inline → app.OnBeforeClose
- visiona-local/app.go: App struct 加 firmwareCloseGuard
- visiona-local/firmware_close_guard.go: 新 244 行(CloseGuard + OnBeforeClose + ConfirmForceClose)
- visiona-local/firmware_close_guard_test.go: 新 280 行、8 tests
- visiona-local/query_firmware_active_tasks.go: 新 111 行(HTTP helper、fail-open、1s timeout)
- visiona-local/query_firmware_active_tasks_test.go: 新 250 行、7 tests

行為:
- Server SIGTERM 有 active task → broadcast `server:shutdown-pending` to "system" room → RequestShutdown + WaitForActiveTasks(220s) → 走原本 shutdownFn
- Wails OnBeforeClose 有 active task → emit Wails event `app:firmware-in-progress` + return true 擋住關閉
- ConfirmForceClose binding 給 frontend 第二層 FORCE 確認用、走 graceful 7+1s shutdown(不是 SIGKILL bypass、雙層防護)

Reviewer 兩輪審查:
- Round 1: 0 Critical / 1 Major / 3 Minor / 4 Suggestion
- 第 2 輪修法(3 sub-agent 平行):
  - Architect: TDD §8.6 改 event 名 `firmware:shutdown-rejected` → `server:shutdown-pending`、標題「拒絕」→「延遲」、補 payload schema 註明 tasks 不含 startTs
  - Design: control-panel.md §6a 改「SIGKILL bypass」→「graceful 7+1s 雙層防護」、補「為何不採 SIGKILL」5 點設計理由、§6a.11 IPC 規格對齊
  - Backend: MaxShutdownWait 180s → 220s(KL720 200s upgrade + 20s buffer)+ broadcast 過濾 startTs(shutdownBroadcastTask minimal struct + toBroadcastTasks helper)

測試:
- server: go test ./... -race 全綠(firmware 2.7s + api/ws/handlers)
- wails: go test ./... -race 全綠(visiona-local 11.2s、21 tests)
- 合計新增 23 unit tests race-clean、0 regression

下一步: M9-5 三平台實機驗證 + 順手修 MJ3(backend smoke test schema phase→stage / firmware:progress→firmware_progress)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-25 15:07:29 +08:00

415 lines
15 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package main
import (
"context"
"errors"
"fmt"
"log"
"net"
"net/http"
"os"
"os/exec"
"os/signal"
"path/filepath"
"runtime"
"strings"
"syscall"
"time"
"visiona-local/server/internal/api"
"visiona-local/server/internal/api/handlers"
"visiona-local/server/internal/api/ws"
"visiona-local/server/internal/camera"
"visiona-local/server/internal/config"
"visiona-local/server/internal/deps"
"visiona-local/server/internal/device"
"visiona-local/server/internal/firmware"
"visiona-local/server/internal/flash"
"visiona-local/server/internal/inference"
"visiona-local/server/internal/model"
pkglogger "visiona-local/server/pkg/logger"
"visiona-local/server/web"
)
var (
Version = "dev"
BuildTime = "unknown"
)
// baseDir returns the base directory for resolving data/ and scripts/ paths.
// In dev mode (go run), uses the working directory.
// In production (compiled binary), uses the binary's directory so the server
// works correctly regardless of the working directory.
func baseDir(devMode bool) string {
if devMode {
return "."
}
exe, err := os.Executable()
if err != nil {
return "."
}
return filepath.Dir(exe)
}
// findFirstExisting tries each candidate directory and returns the first one
// that contains `sentinel` as a regular file. Returned path is absolute.
//
// If no candidate hits, returns ("", tried) where `tried` is the absolute
// form of every candidate that was checked — callers can log this for
// debugging. Callers are expected to supply their own fallback value.
func findFirstExisting(candidates []string, sentinel string) (string, []string) {
tried := make([]string, 0, len(candidates))
for _, c := range candidates {
abs, err := filepath.Abs(c)
if err != nil {
tried = append(tried, c)
continue
}
tried = append(tried, abs)
if info, err := os.Stat(filepath.Join(abs, sentinel)); err == nil && !info.IsDir() {
return abs, tried
}
}
return "", tried
}
// resolveBridgeScript finds the directory holding kneron_bridge.py across
// different packaging layouts, then returns the absolute path to the script.
//
// Possible locations (tried in order):
// 1. <env VISIONA_BUNDLE_LIB_DIR>/scripts — Linux AppImage (AppRun exports this)
// 2. <base>/scripts — dev mode or flat layout
// 3. <base>/../scripts — Windows/Linux installer: {app}/bin/<exe>, {app}/scripts/
// 4. <base>/../Resources/scripts — macOS app bundle: Contents/Resources/bin/<exe>, Contents/Resources/scripts/
// 5. <base>/../lib/visiona-local/scripts — Linux AppImage FHS: usr/bin/<exe>, usr/lib/visiona-local/scripts/
// 6. ./scripts — cwd fallback
func resolveBridgeScript(base string) string {
candidates := []string{}
if libDir := os.Getenv("VISIONA_BUNDLE_LIB_DIR"); libDir != "" {
candidates = append(candidates, filepath.Join(libDir, "scripts"))
}
candidates = append(candidates,
filepath.Join(base, "scripts"),
filepath.Join(base, "..", "scripts"),
filepath.Join(base, "..", "Resources", "scripts"),
filepath.Join(base, "..", "lib", "visiona-local", "scripts"),
filepath.Join(".", "scripts"),
)
if dir, tried := findFirstExisting(candidates, "kneron_bridge.py"); dir != "" {
return filepath.Join(dir, "kneron_bridge.py")
} else {
log.Printf("warn: kneron_bridge.py not found. Tried: %v", tried)
}
// Fallback — return the default so downstream logs a clear error
abs, err := filepath.Abs(filepath.Join(base, "scripts", "kneron_bridge.py"))
if err != nil {
return filepath.Join(base, "scripts", "kneron_bridge.py")
}
return abs
}
// resolveBuiltInDataDir finds the bundle-internal data/ directory that ships
// with the binary. This directory is *read-only* at runtime and holds the
// built-in model catalog (models.json + nef/kl520/ + nef/kl720/).
//
// This is different from the user data directory (lock, ipc-port, logs,
// custom-models, preferences.json, sentinel file) which is writable and lives
// under the OS-specific app-data location. See main() for the split.
//
// Possible locations (tried in order):
// 1. <env VISIONA_BUNDLE_LIB_DIR>/data — Linux AppImage (AppRun exports this)
// 2. <base>/data — dev mode or flat layout (cwd == repo/server/)
// 3. <base>/../data — Windows/Linux installer: {app}/bin/<exe>, {app}/data/
// 4. <base>/../Resources/data — macOS app bundle: Contents/Resources/bin/<exe>, Contents/Resources/data/
// 5. <base>/../lib/visiona-local/data — Linux AppImage FHS: usr/bin/<exe>, usr/lib/visiona-local/data/
//
// A candidate counts as a hit only if models.json exists inside it as a
// regular file — this avoids false positives from empty `data/` directories
// that Wails sometimes leaves behind in build artifacts.
func resolveBuiltInDataDir(base string) string {
candidates := []string{}
if libDir := os.Getenv("VISIONA_BUNDLE_LIB_DIR"); libDir != "" {
candidates = append(candidates, filepath.Join(libDir, "data"))
}
candidates = append(candidates,
filepath.Join(base, "data"),
filepath.Join(base, "..", "data"),
filepath.Join(base, "..", "Resources", "data"),
filepath.Join(base, "..", "lib", "visiona-local", "data"),
)
if dir, tried := findFirstExisting(candidates, "models.json"); dir != "" {
return dir
} else {
log.Printf("warn: built-in data dir (models.json) not found. Tried: %v", tried)
}
// Fallback — return the default so downstream logs a clear error
abs, err := filepath.Abs(filepath.Join(base, "data"))
if err != nil {
return filepath.Join(base, "data")
}
return abs
}
func main() {
cfg := config.Load()
logger := pkglogger.New(cfg.LogLevel)
logger.Info("Starting visionA-local Server %s (built: %s)", Version, BuildTime)
logger.Info("Dev mode: %v, Python mode: %s", cfg.DevMode, cfg.PythonMode)
// 把 VISIONA_BUNDLE_BIN_DIR 加到 PATH讓 exec.Command("ffmpeg") / exec.Command("ffprobe")
// 能透過 LookPath 找到 bundle 內的 binaryGo 1.19+ Windows 不再搜 cwd
if bundleBin := os.Getenv("VISIONA_BUNDLE_BIN_DIR"); bundleBin != "" {
sep := string(os.PathListSeparator)
os.Setenv("PATH", bundleBin+sep+os.Getenv("PATH"))
logger.Info("Added VISIONA_BUNDLE_BIN_DIR to PATH: %s", bundleBin)
}
// Check external dependencies
deps.PrintStartupReport(logger)
// Resolve base directory.
base := baseDir(cfg.DevMode)
// Resolve built-in data directory (read-only, ships with the binary).
// Holds models.json + nef/kl520/ + nef/kl720/. Auto-detected across
// dev / installer / macOS-bundle layouts; see resolveBuiltInDataDir().
builtInDataDir := resolveBuiltInDataDir(base)
logger.Info("Built-in data dir: %s", builtInDataDir)
// Resolve user data directory (writable). Holds lock, ipc-port, logs,
// custom-models, preferences.json, sentinel. Wails passes this via
// --data-dir pointing at the OS app-data location.
//
// Standalone fallback: when no --data-dir is given we reuse builtInDataDir
// so `go run ./server` and direct binary launches keep working for local
// development. In *production*, Wails always passes --data-dir, so this
// branch never lands on a read-only bundle path. If someone does run the
// packaged binary with no --data-dir, the writable operations (sentinel,
// logs, custom-models) will fail against the read-only bundle dir and the
// affected code paths log warnings — they don't crash the server.
dataDir := cfg.DataDir
if dataDir == "" {
dataDir = builtInDataDir
}
// Initialize model repository (built-in models from JSON).
// Always read from the built-in data dir — not the user data dir —
// so Wails passing --data-dir doesn't accidentally blank out the catalog.
modelRepo := model.NewRepository(filepath.Join(builtInDataDir, "models.json"))
logger.Info("Loaded %d built-in models", modelRepo.Count())
// Initialize model store (custom uploaded models) — writable, user dataDir.
customModelDir := cfg.ModelDir
if customModelDir == "" {
customModelDir = filepath.Join(dataDir, "custom-models")
}
modelStore := model.NewModelStore(customModelDir)
customModels, err := modelStore.LoadCustomModels()
if err != nil {
logger.Warn("Failed to load custom models: %v", err)
}
for _, m := range customModels {
modelRepo.Add(m)
}
if len(customModels) > 0 {
logger.Info("Loaded %d custom models", len(customModels))
}
// Initialize WebSocket hub (before device manager so log broadcaster is ready)
wsHub := ws.NewHub()
// M8-4b注入 dataDir 給 Hub第一個 WebSocket client 連上時會在
// <dataDir>/.first-ws-connected 寫 sentinel file讓 Wails 端的
// StartupPipeline 知道階段 6Wait for Web UI WebSocket已完成。
// 詳見 .autoflow/04-architecture/v2/startup-pipeline.md §3 階段 6。
wsHub.SetStartupSentinel(dataDir)
go wsHub.Run()
// Initialize log broadcaster for real-time log streaming
logBroadcaster := pkglogger.NewBroadcaster(500, func(entry pkglogger.LogEntry) {
wsHub.BroadcastToRoom("server-logs", entry)
})
logger.SetBroadcaster(logBroadcaster)
// Initialize device manager
registry := device.NewRegistry()
bridgeScript := resolveBridgeScript(base)
logger.Info("Kneron bridge script: %s", bridgeScript)
deviceMgr := device.NewManager(registry, bridgeScript)
deviceMgr.SetLogBroadcaster(logBroadcaster)
deviceMgr.Start()
// Initialize camera manager
cameraMgr := camera.NewManager()
// Initialize services.
// flash.Service resolves relative `.nef` paths from models.json against
// builtInDataDir (not dataDir), since the .nef files ship alongside
// models.json in the read-only bundle, not in the writable user dataDir.
flashSvc := flash.NewService(deviceMgr, modelRepo, builtInDataDir)
inferenceSvc := inference.NewService(deviceMgr)
// M9-3firmware service升降版 orchestrator
// firmwareDir 解析bridge script 同一個 scripts/ 目錄下的 firmware/ 子目錄。
// 例scripts/kneron_bridge.py → scripts/firmware/<chip>/{fw_*.bin, VERSION}
firmwareDir := filepath.Join(filepath.Dir(bridgeScript), "firmware")
logger.Info("Firmware bundle dir: %s", firmwareDir)
firmwareSvc := firmware.NewService(
handlers.NewDeviceManagerAdapter(deviceMgr),
firmware.FirmwareDir{Root: firmwareDir},
)
// Determine static file system for embedded frontend
var staticFS http.FileSystem
if !cfg.DevMode {
staticFS = web.StaticFS()
logger.Info("Serving embedded frontend static files")
} else {
logger.Info("Dev mode: frontend static serving disabled (use Wails dev server)")
}
// Build HTTP server (needed for graceful shutdown and restart)
var httpServer *http.Server
restartRequested := make(chan struct{}, 1)
shutdownFn := func() {
// MAJ-3 修復timeout 必須 ≤ Wails shutdownGracePeriod (7s),留 1s buffer。
// TDD §8.1Wails 端 7s grace + 1s modalserver 端 6s 內必須完成清理,
// 否則 Wails 在第 7s SIGKILL 時 server 還在 sync 檔案會被打斷。
ctx, cancel := context.WithTimeout(context.Background(), 6*time.Second)
defer cancel()
inferenceSvc.StopAll()
cameraMgr.Close()
if httpServer != nil {
_ = httpServer.Shutdown(ctx)
}
}
restartFn := func() {
// Signal the main goroutine to perform exec after server shutdown
select {
case restartRequested <- struct{}{}:
default:
}
shutdownFn()
}
// Resolve python bin (used by InstallDriver handler on Windows).
// Priority: VISIONA_PYTHON env var (set by Wails shell) → cfg.PythonBin (--python flag)
pythonBinForSystem := os.Getenv("VISIONA_PYTHON")
if pythonBinForSystem == "" {
pythonBinForSystem = cfg.PythonBin
}
// Create system handler with injected version and restart function
systemHandler := handlers.NewSystemHandler(Version, BuildTime, pythonBinForSystem, restartFn, wsHub)
// Create router
r := api.NewRouter(modelRepo, modelStore, deviceMgr, cameraMgr, flashSvc, inferenceSvc, firmwareSvc, firmwareDir, wsHub, staticFS, logBroadcaster, systemHandler)
// Configure HTTP server (bind to localhost only)
addr := cfg.Addr()
httpServer = &http.Server{
Addr: addr,
Handler: r,
}
// Handle OS signals for graceful shutdown.
//
// M9-4.5TDD §8.6.1 + §8.6.3firmware-aware shutdown
// 1. 若有 active firmware task升降版進行中、寫 flash 中):
// - broadcast `server:shutdown-pending` 給 WebSocket client
// - 呼叫 firmwareSvc.RequestShutdown() 拒絕新 task
// - WaitForActiveTasks(180s) 等到既有 task 結束或 timeout
// - 不論清不清乾淨、最後都走 shutdownFn避免無限等下去把 process 卡死)
// 2. 若沒 active task立刻走原本 shutdownFn 流程
//
// 邏輯本身在 firmware.AwaitActiveTasksOrTimeout 提供(測試友善)、本檔只
// 負責 wiring。timeout 期間 server 仍在跑(不 accept 新 task 但其他 HTTP
// 照常)、確保 Wails 視窗端能查到 firmware status 才好顯示 modal。
quit := make(chan os.Signal, 1)
signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
go func() {
sig := <-quit
logger.Info("Received signal %v, evaluating firmware lifecycle before shutdown...", sig)
// 等到既有 firmware task 結束(或 hard timeout 180s 後強制走)。
// 不論 clean 與否、最後都走 shutdownFn——helper 已 log warning。
_ = firmware.AwaitActiveTasksOrTimeout(
context.Background(),
firmwareSvc,
wsHub,
logger,
)
logger.Info("Proceeding with graceful shutdown after firmware checks")
shutdownFn()
os.Exit(0)
}()
// Kill existing process on the port if occupied
killExistingProcess(addr, logger)
// Start server
logger.Info("Server listening on %s", addr)
if err := httpServer.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) {
log.Fatalf("Failed to start server: %v", err)
}
// If restart was requested, exec the same binary to replace this process
select {
case <-restartRequested:
logger.Info("Performing self-restart via exec...")
exe, err := os.Executable()
if err != nil {
log.Fatalf("Failed to get executable path: %v", err)
}
exe, err = filepath.EvalSymlinks(exe)
if err != nil {
log.Fatalf("Failed to resolve executable symlinks: %v", err)
}
_ = syscall.Exec(exe, os.Args, os.Environ())
log.Fatalf("syscall.Exec failed")
default:
// Normal shutdown, just exit
}
}
// killExistingProcess checks if the port is already in use and kills the
// occupying process so the server can start cleanly.
func killExistingProcess(addr string, logger *pkglogger.Logger) {
// Extract port from addr (e.g. "127.0.0.1:3721" → "3721")
_, port, err := net.SplitHostPort(addr)
if err != nil {
return
}
// Quick check: try to listen — if it works, port is free
ln, err := net.Listen("tcp", addr)
if err == nil {
ln.Close()
return
}
// Port is occupied, find and kill the process
logger.Info("Port %s is in use, killing existing process...", port)
var cmd *exec.Cmd
if runtime.GOOS == "windows" {
cmd = exec.Command("cmd", "/C", fmt.Sprintf("for /f \"tokens=5\" %%a in ('netstat -ano ^| findstr :%s') do taskkill /F /PID %%a", port))
} else {
cmd = exec.Command("sh", "-c", fmt.Sprintf("lsof -ti:%s | xargs kill -9 2>/dev/null", port))
}
output, err := cmd.CombinedOutput()
if err != nil {
logger.Warn("Failed to kill process on port %s: %v (%s)", port, err, strings.TrimSpace(string(output)))
return
}
// Wait briefly for port to be released
time.Sleep(500 * time.Millisecond)
logger.Info("Previous process killed, port %s is now free", port)
}