jim800121chen c03eb6fd0e feat(local-tool): M9-2 — Go driver UpgradeFirmware + firmware service module
A 階段第二個 milestone、銜接 M9-1 bridge.py、暴露 service layer 給 M9-3 API/WebSocket。

New module `server/internal/firmware/`:
- types.go: 123 行(FirmwareVersion / FirmwareProgress / ActiveTaskInfo / UpgradeDriver interface / 8 reason const)
- progress.go: 147 行(仿 flash pattern 的 Tracker、Task.cancel 預留 SIGTERM force-cancel godoc)
- service.go: 373 行(核心 service:UpgradeFirmware / HasActiveTask / GetActiveTaskInfo / RequestShutdown / WaitForActiveTasks / ListBundledVersions / GetCurrentVersion)
- service_test.go: 676 行、13 個 test 含 MultiDeviceParallel

Driver layer:
- kl720_driver.go: 697 → 1054 行(+357、新 UpgradeFirmware method + tryRouteFirmwareEvent + sendCommandForUpgrade snapshot pattern)
- kl720_driver_test.go: 360 行、11 個 test(含 InfoNotBlockedDuringUpgrade / CtxCancelReleasesBridge / StderrEventAfterCtxCancel 100 round stress)

關鍵設計:
- flash 與 firmware 模組分離(不 import flash)
- UpgradeDriver interface 隔離 driver 細節、DeviceLookup interface 隔離 device manager
- 中介 channel pattern(service ↔ driver)方便 service 補欄位(DeviceID / Direction / BeforeVersion)
- timeout 雙保險:chip timeout + 30s margin
- 8 reason enum 對齊 bridge.py、stage 採 Design 命名

Concurrency race 修復(M9-2 Reviewer round 1 → round 2):
- Major 1(mutex deadlock):新 fwUpgradeMu 獨立鎖 + sendCommandForUpgrade snapshot stdin/stdout pattern、避開 d.mu field-level race + 升級期間 Info/Disconnect 不被卡 + timeout 路徑無死鎖
- Major 2(close-channel race):tryRouteFirmwareEvent 持 fwMu 整段、配合 defer setFirmwareProgressCh(nil) 提供 happen-before、絕無 send on closed channel panic

Reviewer 兩輪審查:
- Round 1: 0 Critical / 2 Major / 5 Minor / 5 Suggestion
- Round 2: 0 Critical / 0 Major / 2 Minor / 2 Suggestion(11/12 issue 修到位、Suggestion 4 留 follow-up)

M9-1 follow-up 順手清:
- m5(test 死碼 _firmware_upgrade_start_ts 殘留兩行)已清
- s5(test 註解 idempotent shape 說明)已加

測試:
- go test ./... -race -count=1: 全綠(28s、無 regression)
- Python: 36 tests + 22 subtests 全綠(0.31s)
- go vet / build: 0 output

下一步:M9-3 API handler + WebSocket progress(CI 建議 `go test -race -count=3` 提升 race 偵測強度)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-25 11:27:36 +08:00

361 lines
12 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package kneron
import (
"bufio"
"context"
"io"
"sync"
"testing"
"time"
"visiona-local/server/internal/driver"
"visiona-local/server/internal/firmware"
)
// testCtx 回傳 test 用的 ctx with 5 秒 timeout加自動 cleanup。
func testCtx(t *testing.T) context.Context {
t.Helper()
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
t.Cleanup(cancel)
return ctx
}
// shortTimeout 回傳一個 500ms 的 timer channel、供 test 偵測非阻塞行為。
func shortTimeout() <-chan time.Time {
return time.After(500 * time.Millisecond)
}
// TestTryRouteFirmwareEvent_ValidProgressvalid firmware_progress JSON line
// 會被 unmarshal 並寫到 fwProgressCh。
func TestTryRouteFirmwareEvent_ValidProgress(t *testing.T) {
d := NewKneronDriver(driver.DeviceInfo{ID: "test"}, "/dev/null")
ch := make(chan firmware.FirmwareProgress, 4)
d.setFirmwareProgressCh(ch)
line := `{"event":"firmware_progress","percent":50,"stage":"flashing","message":"writing KDP2","elapsed_ms":1500,"eta_ms":3000}`
if !d.tryRouteFirmwareEvent(line) {
t.Fatalf("expected tryRouteFirmwareEvent=true")
}
select {
case ev := <-ch:
if ev.Stage != firmware.StageFlashing {
t.Errorf("Stage = %q, want flashing", ev.Stage)
}
if ev.Percent != 50 {
t.Errorf("Percent = %d, want 50", ev.Percent)
}
if ev.Message != "writing KDP2" {
t.Errorf("Message = %q", ev.Message)
}
if ev.ElapsedMs != 1500 {
t.Errorf("ElapsedMs = %d, want 1500", ev.ElapsedMs)
}
if ev.EtaMs != 3000 {
t.Errorf("EtaMs = %d, want 3000", ev.EtaMs)
}
default:
t.Fatalf("no event on channel")
}
}
// TestTryRouteFirmwareEvent_ErrorEventerror event 帶 reason / raw_error /
// before_version 都要保留。
func TestTryRouteFirmwareEvent_ErrorEvent(t *testing.T) {
d := NewKneronDriver(driver.DeviceInfo{ID: "test"}, "/dev/null")
ch := make(chan firmware.FirmwareProgress, 4)
d.setFirmwareProgressCh(ch)
line := `{"event":"firmware_progress","percent":-1,"stage":"error","message":"loader missing","elapsed_ms":800,"eta_ms":0,"error":"loader missing","reason":"loader_write_failed","raw_error":"_FwError loader","before_version":"KDP"}`
if !d.tryRouteFirmwareEvent(line) {
t.Fatalf("expected tryRouteFirmwareEvent=true")
}
ev := <-ch
if ev.Stage != firmware.StageError {
t.Errorf("Stage = %q, want error", ev.Stage)
}
if ev.Reason != firmware.ReasonLoaderWriteFailed {
t.Errorf("Reason = %q, want loader_write_failed", ev.Reason)
}
if ev.RawError != "_FwError loader" {
t.Errorf("RawError = %q", ev.RawError)
}
if ev.BeforeVersion != "KDP" {
t.Errorf("BeforeVersion = %q, want KDP", ev.BeforeVersion)
}
}
// TestTryRouteFirmwareEvent_NoChannelchannel 未註冊時 return false、
// caller 應 fall back 到 broadcaster。
func TestTryRouteFirmwareEvent_NoChannel(t *testing.T) {
d := NewKneronDriver(driver.DeviceInfo{ID: "test"}, "/dev/null")
// 不註冊 channel
if d.tryRouteFirmwareEvent(`{"event":"firmware_progress","stage":"done","percent":100}`) {
t.Errorf("expected false when no channel registered")
}
}
// TestTryRouteFirmwareEvent_NonFirmwareEvent非 firmware_progress event 不 route。
func TestTryRouteFirmwareEvent_NonFirmwareEvent(t *testing.T) {
d := NewKneronDriver(driver.DeviceInfo{ID: "test"}, "/dev/null")
ch := make(chan firmware.FirmwareProgress, 4)
d.setFirmwareProgressCh(ch)
// shutdown_rejected event 不 route
if d.tryRouteFirmwareEvent(`{"event":"shutdown_rejected","reason":"firmware_upgrade_in_progress"}`) {
t.Errorf("expected false for non-firmware event")
}
// 亂 JSON
if d.tryRouteFirmwareEvent(`{not json}`) {
t.Errorf("expected false for malformed JSON")
}
// 完全不是 JSON
if d.tryRouteFirmwareEvent(`[kneron_bridge] regular log line`) {
t.Errorf("expected false for plain log line")
}
}
// TestTryRouteFirmwareEvent_ChannelFullchannel 滿 → 不 block、route 仍回 true。
func TestTryRouteFirmwareEvent_ChannelFull(t *testing.T) {
d := NewKneronDriver(driver.DeviceInfo{ID: "test"}, "/dev/null")
ch := make(chan firmware.FirmwareProgress, 1)
// 先塞滿
ch <- firmware.FirmwareProgress{Stage: "x"}
d.setFirmwareProgressCh(ch)
line := `{"event":"firmware_progress","stage":"flashing","percent":50}`
// 應該回 trueroute 函式視為已嘗試處理)、但不 block test
done := make(chan bool, 1)
go func() {
ok := d.tryRouteFirmwareEvent(line)
done <- ok
}()
// 不該卡住
select {
case ok := <-done:
if !ok {
t.Errorf("expected true (event was a firmware_progress event)")
}
case <-shortTimeout():
t.Fatalf("tryRouteFirmwareEvent should not block when channel is full")
}
}
// TestSetFirmwareProgressCh_Unregister設 nil 後恢復原狀。
func TestSetFirmwareProgressCh_Unregister(t *testing.T) {
d := NewKneronDriver(driver.DeviceInfo{ID: "test"}, "/dev/null")
ch := make(chan firmware.FirmwareProgress, 1)
d.setFirmwareProgressCh(ch)
d.setFirmwareProgressCh(nil)
if d.tryRouteFirmwareEvent(`{"event":"firmware_progress","stage":"done","percent":100}`) {
t.Errorf("expected false after unregister")
}
}
// TestUpgradeFirmware_UnsupportedChipA 階段限 KL520 / KL720。
func TestUpgradeFirmware_UnsupportedChip(t *testing.T) {
d := NewKneronDriver(driver.DeviceInfo{ID: "test", Type: "KL720"}, "/dev/null")
// 強制 pythonReady=true、避開 "bridge not running" 早退(測 chip check
d.pythonReady = true
defer func() { d.pythonReady = false }()
ch := make(chan firmware.FirmwareProgress, 4)
err := d.UpgradeFirmware(testCtx(t), "KL630", ch)
if err == nil {
t.Fatalf("expected error for unsupported chip")
}
}
// TestUpgradeFirmware_NoPythonBridge未 Connect 時 UpgradeFirmware 應拒絕。
func TestUpgradeFirmware_NoPythonBridge(t *testing.T) {
d := NewKneronDriver(driver.DeviceInfo{ID: "test", Type: "KL720"}, "/dev/null")
// 預設 pythonReady=false
ch := make(chan firmware.FirmwareProgress, 4)
err := d.UpgradeFirmware(testCtx(t), "KL720", ch)
if err == nil {
t.Fatalf("expected error when python bridge not running")
}
}
// ──────────────────────────────────────────────────────────────────────
// 以下測試使用 in-memory pipes 模擬 bridge stdin / stdout、不真正起 Python。
//
// setupFakeBridge 把 driver 連到一對 io.Pipe、回 (stdinReader, stdoutWriter, close)。
// 測試代碼可從 stdinReader 讀 driver 寫出的 command JSON、用 stdoutWriter
// 模擬 bridge 回應或保持不寫(模擬卡住)。
// ──────────────────────────────────────────────────────────────────────
type fakeBridge struct {
stdinR *io.PipeReader // driver 寫入端的讀方test 端)
stdoutW *io.PipeWriter // driver 讀取端的寫方test 端)
}
func setupFakeBridge(t *testing.T, d *KneronDriver) *fakeBridge {
t.Helper()
stdinR, stdinW := io.Pipe()
stdoutR, stdoutW := io.Pipe()
d.stdin = stdinW
d.stdout = bufio.NewScanner(stdoutR)
d.stdout.Buffer(make([]byte, 0, 64*1024), 1024*1024)
d.pythonReady = true
t.Cleanup(func() {
_ = stdinR.Close()
_ = stdinW.Close()
_ = stdoutR.Close()
_ = stdoutW.Close()
d.pythonReady = false
})
return &fakeBridge{stdinR: stdinR, stdoutW: stdoutW}
}
// TestUpgradeFirmware_InfoNotBlockedDuringUpgradeMajor 1 驗證)
//
// 升級期間 sendCommand goroutine 持 fwUpgradeMu 卡住等 stdout 回應、不可
// 阻塞 d.Info() 等 d.mu 操作。
func TestUpgradeFirmware_InfoNotBlockedDuringUpgrade(t *testing.T) {
d := NewKneronDriver(driver.DeviceInfo{ID: "dev-x", Type: "KL720", Port: "USB"}, "/dev/null")
setupFakeBridge(t, d)
progressCh := make(chan firmware.FirmwareProgress, 16)
// UpgradeFirmware 在 goroutine 跑、sendCommand 會卡在 stdout.Scan 等 bridge 回應
upgradeCtx, cancelUpgrade := context.WithCancel(context.Background())
t.Cleanup(cancelUpgrade)
upgradeDone := make(chan error, 1)
go func() {
upgradeDone <- d.UpgradeFirmware(upgradeCtx, "KL720", progressCh)
}()
// 等一下讓 sendCommand goroutine 進入 stdout.Scan blocking
time.Sleep(50 * time.Millisecond)
// Info 必須能立即回(不被 sendCommand 持鎖卡住)
infoDone := make(chan driver.DeviceInfo, 1)
go func() { infoDone <- d.Info() }()
select {
case info := <-infoDone:
if info.ID != "dev-x" {
t.Errorf("Info().ID = %q, want dev-x", info.ID)
}
case <-time.After(500 * time.Millisecond):
t.Fatalf("Info() blocked during firmware upgrade — Major 1 deadlock not fixed")
}
// IsConnected 也不該被卡(即使 d.connected=false、它走 d.mu
icDone := make(chan bool, 1)
go func() { icDone <- d.IsConnected() }()
select {
case <-icDone:
case <-time.After(500 * time.Millisecond):
t.Fatalf("IsConnected() blocked during firmware upgrade")
}
// 收尾cancel ctx、讓 UpgradeFirmware 走 timeout 路徑stopPython 殺 bridge
cancelUpgrade()
select {
case <-upgradeDone:
case <-time.After(2 * time.Second):
t.Fatalf("UpgradeFirmware did not return after ctx cancel")
}
}
// TestUpgradeFirmware_CtxCancelReleasesBridgeMinor 2 驗證)
//
// ctx cancel 後 UpgradeFirmware 應在合理時間內 return不能因 sendCommand
// goroutine 持鎖造成 d.mu.Lock(stopPython) deadlock。
func TestUpgradeFirmware_CtxCancelReleasesBridge(t *testing.T) {
d := NewKneronDriver(driver.DeviceInfo{ID: "dev-x", Type: "KL520", Port: "USB"}, "/dev/null")
setupFakeBridge(t, d)
progressCh := make(chan firmware.FirmwareProgress, 16)
ctx, cancel := context.WithCancel(context.Background())
errCh := make(chan error, 1)
go func() {
errCh <- d.UpgradeFirmware(ctx, "KL520", progressCh)
}()
// 等 sendCommand 進 stdout.Scan
time.Sleep(50 * time.Millisecond)
// cancel ctx、UpgradeFirmware 應走 ctx.Done branch、stopPython 殺 bridge、return
cancel()
select {
case err := <-errCh:
if err == nil {
t.Errorf("expected ctx-cancel error, got nil")
}
case <-time.After(2 * time.Second):
t.Fatalf("UpgradeFirmware did not return within 2s after ctx cancel (deadlock?)")
}
// timeout event 應該被推到 progressCh
select {
case ev := <-progressCh:
if ev.Stage != firmware.StageError {
t.Errorf("first event.Stage = %q, want error", ev.Stage)
}
if ev.Reason != firmware.ReasonTimeout {
t.Errorf("first event.Reason = %q, want timeout", ev.Reason)
}
case <-time.After(500 * time.Millisecond):
t.Fatalf("no timeout event pushed after ctx cancel")
}
}
// TestUpgradeFirmware_StderrEventAfterCtxCancelMajor 2 驗證)
//
// 模擬 stderr goroutine 在 service 已 close intermediate channel 後仍嘗試
// route eventfwMu + setFirmwareProgressCh(nil) 應保證不會 send on closed
// channel panic。
func TestUpgradeFirmware_StderrEventAfterCtxCancel(t *testing.T) {
d := NewKneronDriver(driver.DeviceInfo{ID: "dev-x", Type: "KL520"}, "/dev/null")
ch := make(chan firmware.FirmwareProgress, 4)
d.setFirmwareProgressCh(ch)
// 模擬 service 端流程unregister → close channel
// 為了驗證 race window、用很多 goroutine 同時做 tryRouteFirmwareEvent
var wg sync.WaitGroup
const N = 100
// 一組 goroutine 不斷試 route模擬 stderr 上來的 inflight events
stopRoute := make(chan struct{})
for i := 0; i < 8; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for {
select {
case <-stopRoute:
return
default:
_ = d.tryRouteFirmwareEvent(`{"event":"firmware_progress","stage":"flashing","percent":50}`)
}
}
}()
}
// 主流程unregister → close
for i := 0; i < N; i++ {
// 暫停一下讓 route goroutine 抓到 ch
time.Sleep(10 * time.Microsecond)
d.setFirmwareProgressCh(nil) // 必須 happen-before close
close(ch)
// 換新 ch、再來一輪
ch = make(chan firmware.FirmwareProgress, 4)
d.setFirmwareProgressCh(ch)
}
// 收工
close(stopRoute)
wg.Wait()
// 沒 panic 就算過
}