jim800121chen 30d0ff5695 fix(local-tool): 推論 bbox 標註不顯示 — 前端 canvas 尺寸 + KL520 reset + 延長 timeout
症狀:Mac 版上傳單張圖推論,畫面完全沒有 bbox 標註。實測追根因後發現
兩層獨立問題疊加(前端 + 後端),擇一修復都無法解決。

## Layer 1: 前端 canvas 尺寸對不上 img 顯示尺寸
- camera-inference-view.tsx renderedSize 初始值硬寫 {w:640, h:480}
- ResizeObserver 理應在 <img> load 後 fire,但實測沒 fire 或時機不對
- 結果 overlay canvas 永遠用 640×480 畫,bbox 嚴重偏位或跑出 canvas

修法(camera-feed.tsx + camera-inference-view.tsx):
- <img> 加 onLoad handler,decode 完立刻用 getBoundingClientRect 回報
- ResizeObserver effect 進來先檢查 img.complete && naturalWidth > 0,
  是就立刻 report(cover HMR / cached image)
- effect 依賴加 streamUrl / batchImageUrl,換圖會重觀察
- renderedSize 初始值改 null,overlay 改為拿到真實尺寸才 render
- setState callback 用 prev 比對,同尺寸不觸發 render
- camera-overlay.tsx 加 [bbox-debug] console.log 保留(debug 成本低,
  對未來排查有幫助)

## Layer 2: KL520 推論炸 ApiKPException Error 15
- kp.inference.generic_image_inference_send 回 SEND_DATA_TOO_LARGE
- 試過 image 尺寸(516×640 / 640×794 / 640×640 host pad)、numpy vs
  bytes、明確傳 width/height — 全部炸
- Python bridge 直接測試(/tmp/test_bridge.py)做完整
  `connect → reset → reconnect → load_model → inference` 序列 → 11 個
  detection 正常回傳
- Go driver 走 `connect → load_model → inference` 跳過 reset

根因:commit ddf0eb8(2026-04-16)「KL520 首次 connect 跳過 reset」當時
為解 Windows 60s HTTP timeout 的優化。但副作用:KL520 若 session 間
firmware 殘留(fw=KDP2 Comp/U),直接 load_model + inference 100% 炸
Error 15。必須走完整 reset → 退回 Loader → 重新載 firmware → Comp/U
流程才能得到能 inference 的 session。

修法(kl720_driver.go):
- 移除「KL520 跳過 reset」特例,讓 KL520 和 KL720 都走 needsReset → restartBridge
- 註解記錄 trade-off:KL520 connect 時間 ~2s → ~15-20s(macOS),
  Windows 可能 60s+

## HTTP timeout 配套調整
- device_handler.go ConnectDevice timeout 60s → 120s
- Windows worst-case(~65s:Loader reconnect 16s + firmware load 31s +
  reboot 8s + reconnect 5s)留 buffer,避免 504 CONNECT_TIMEOUT

## Bridge 清理
- kneron_bridge.py 清掉中途試驗遺留的 `_host_preproc` 死碼
  (還原成原版 _correct_bbox_for_letterbox)
- 加了 debug log(Inference: sending / parse done / EXCEPTION with
  traceback)保留,未來排查 inference 路徑很有用

## 驗證(function 層)
/tmp/test_bridge.py 三種尺寸全通過:
- 516×640 直式 → 11 detections (person×8, tie×3) latency 308ms
- 1920×1080 横式 → 0 detections(合成圖,正常)
- 512×512 正方 → 0 detections

## 待使用者驗證
- Mac UI 實測:上傳 ~/Downloads/000000000459.jpg 應見 11 個 bbox 精準框住
- Windows 實測 connect 耗時 + timeout 是否足夠
- Linux 實測

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-21 01:12:10 +08:00

197 lines
4.8 KiB
Go

package handlers
import (
"context"
"fmt"
"os"
"runtime"
"time"
"visiona-local/server/internal/api/ws"
"visiona-local/server/internal/device"
"visiona-local/server/internal/driver"
"visiona-local/server/internal/flash"
"visiona-local/server/internal/inference"
"github.com/gin-gonic/gin"
)
// udevRuleInstalled checks if the Kneron udev rule is installed on Linux.
func udevRuleInstalled() bool {
_, err := os.Stat("/etc/udev/rules.d/99-kneron.rules")
return err == nil
}
type DeviceHandler struct {
deviceMgr *device.Manager
flashSvc *flash.Service
inferenceSvc *inference.Service
wsHub *ws.Hub
}
func NewDeviceHandler(
deviceMgr *device.Manager,
flashSvc *flash.Service,
inferenceSvc *inference.Service,
wsHub *ws.Hub,
) *DeviceHandler {
return &DeviceHandler{
deviceMgr: deviceMgr,
flashSvc: flashSvc,
inferenceSvc: inferenceSvc,
wsHub: wsHub,
}
}
func (h *DeviceHandler) ScanDevices(c *gin.Context) {
devices := h.deviceMgr.Rescan()
resp := gin.H{
"devices": devices,
}
// Linux: 0 裝置 + udev rule 不存在 → 提示使用者安裝 USB 權限
if runtime.GOOS == "linux" && len(devices) == 0 && !udevRuleInstalled() {
resp["udevHint"] = true
}
c.JSON(200, gin.H{"success": true, "data": resp})
}
func (h *DeviceHandler) ListDevices(c *gin.Context) {
devices := h.deviceMgr.ListDevices()
resp := gin.H{
"devices": devices,
}
if runtime.GOOS == "linux" && len(devices) == 0 && !udevRuleInstalled() {
resp["udevHint"] = true
}
c.JSON(200, gin.H{"success": true, "data": resp})
}
func (h *DeviceHandler) GetDevice(c *gin.Context) {
id := c.Param("id")
session, err := h.deviceMgr.GetDevice(id)
if err != nil {
c.JSON(404, gin.H{
"success": false,
"error": gin.H{"code": "DEVICE_NOT_FOUND", "message": err.Error()},
})
return
}
c.JSON(200, gin.H{"success": true, "data": session.Driver.Info()})
}
func (h *DeviceHandler) ConnectDevice(c *gin.Context) {
id := c.Param("id")
// KL520 USB Boot flow now includes mandatory reset + firmware reload on
// first connect (required for inference to work — see kl720_driver.go
// needsReset block). Worst-case path on Windows: Loader-mode reconnect
// retry (16s) + firmware load (~31s) + reboot wait + second reconnect
// (~13s) = ~60-65s. Use 120s to leave headroom and avoid spurious 504s.
ctx, cancel := context.WithTimeout(c.Request.Context(), 120*time.Second)
defer cancel()
errCh := make(chan error, 1)
go func() {
errCh <- h.deviceMgr.Connect(id)
}()
select {
case err := <-errCh:
if err != nil {
c.JSON(400, gin.H{
"success": false,
"error": gin.H{"code": "CONNECT_FAILED", "message": err.Error()},
})
return
}
c.JSON(200, gin.H{"success": true})
case <-ctx.Done():
c.JSON(504, gin.H{
"success": false,
"error": gin.H{"code": "CONNECT_TIMEOUT", "message": fmt.Sprintf("device connect timed out after 60s for %s", id)},
})
}
}
func (h *DeviceHandler) DisconnectDevice(c *gin.Context) {
id := c.Param("id")
if err := h.deviceMgr.Disconnect(id); err != nil {
c.JSON(400, gin.H{
"success": false,
"error": gin.H{"code": "DISCONNECT_FAILED", "message": err.Error()},
})
return
}
c.JSON(200, gin.H{"success": true})
}
func (h *DeviceHandler) FlashDevice(c *gin.Context) {
id := c.Param("id")
var req struct {
ModelID string `json:"modelId"`
}
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(400, gin.H{
"success": false,
"error": gin.H{"code": "BAD_REQUEST", "message": "modelId is required"},
})
return
}
taskID, progressCh, err := h.flashSvc.StartFlash(id, req.ModelID)
if err != nil {
c.JSON(400, gin.H{
"success": false,
"error": gin.H{"code": "FLASH_FAILED", "message": err.Error()},
})
return
}
// Forward progress to WebSocket, then cleanup task (M2 fix)
go func() {
room := "flash:" + id
for progress := range progressCh {
h.wsHub.BroadcastToRoom(room, progress)
}
h.flashSvc.CleanupTask(taskID)
}()
c.JSON(200, gin.H{"success": true, "data": gin.H{"taskId": taskID}})
}
func (h *DeviceHandler) StartInference(c *gin.Context) {
id := c.Param("id")
resultCh := make(chan *driver.InferenceResult, 10)
if err := h.inferenceSvc.Start(id, resultCh); err != nil {
c.JSON(400, gin.H{
"success": false,
"error": gin.H{"code": "INFERENCE_ERROR", "message": err.Error()},
})
return
}
// Forward results to WebSocket, enriching with device ID
go func() {
room := "inference:" + id
for result := range resultCh {
result.DeviceID = id
h.wsHub.BroadcastToRoom(room, result)
}
}()
c.JSON(200, gin.H{"success": true})
}
func (h *DeviceHandler) StopInference(c *gin.Context) {
id := c.Param("id")
if err := h.inferenceSvc.Stop(id); err != nil {
c.JSON(400, gin.H{
"success": false,
"error": gin.H{"code": "INFERENCE_ERROR", "message": err.Error()},
})
return
}
c.JSON(200, gin.H{"success": true})
}