diff --git a/local-tool/.autoflow/progress.md b/local-tool/.autoflow/progress.md index a8e7a65..6aabac9 100644 --- a/local-tool/.autoflow/progress.md +++ b/local-tool/.autoflow/progress.md @@ -64,10 +64,36 @@ Mac 版 app 上傳單張圖推論,畫面上完全沒有 bbox 標註。 三種尺寸(516×640 直式 / 1920×1080 横式 / 512×512 正方)全通過。 -### 待使用者驗證 -- [ ] Mac UI 端實測:上傳 `~/Downloads/000000000459.jpg` 應見 11 個 bbox 精準框住 person + tie -- [ ] Windows 實測首次 connect 耗時 + 是否還踩 HTTP timeout(現已放寬到 120s) +### 已驗證(2026-04-21) +- [x] Mac UI Comp/U 殘留路徑:reset 後推論 11 個 bbox 正確 +- [x] Mac UI Loader cold-boot 路徑(拔插 USB):skip reset 後推論 11 個 bbox 正確 +- [x] Windows 實測首次 connect:106s 成功(< 120s timeout),推論正確 + +### 後續優化:Windows connect 106s → 預期 ~40s(方案 C) + +Windows 實測發現即使 timeout 120s 夠用,使用者要等 106s 體感太久。拆解 +瓶頸發現走了兩次 firmware load(第一次 connect 進來 Loader → load fw +→ Comp/U ~35s / reset → 回 Loader / reconnect → load fw → Comp/U ~30s), +reset 流程中第二次 firmware load 是白做工。 + +**條件性 reset(方案 C)**: +- `kneron_bridge.py connect` 回報 `fresh_firmware_loaded` flag + - `True`:本次 connect 內部剛做過 firmware load(原本是 Loader) + - `False`:進來就是 Comp/U(上次 session 殘留,需要 reset 清乾淨) +- `kl720_driver.go` 判 flag 決定要不要做 restartBridge reset + +**驗證兩條路徑都 OK(2026-04-21)**: +- Loader cold-boot → skip reset → 推論 11 bbox ✓ +- Comp/U 殘留 → 做 reset → 推論 11 bbox ✓ + +**預期效益**: +- Windows cold-boot(最常見):106s → **~40s**(省 65s) +- Mac 跨 session(最常見):~15-20s 不變 +- 極少數情境(Windows 但 device 未斷電):維持走完整 reset 流程 + +### 待驗證 - [ ] Linux 實測 +- [ ] Windows 實測方案 C 效益(預期 cold-boot 降到 ~40s) ### 前端 debug log 去留 `camera-overlay.tsx` 的 `console.log('[bbox-debug] ...')` 驗證完成後**可清可留**。保留成本低,對未來 debug 有幫助。 diff --git a/local-tool/server/internal/driver/kneron/kl720_driver.go b/local-tool/server/internal/driver/kneron/kl720_driver.go index df13df6..28217c4 100644 --- a/local-tool/server/internal/driver/kneron/kl720_driver.go +++ b/local-tool/server/internal/driver/kneron/kl720_driver.go @@ -286,32 +286,37 @@ func (d *KneronDriver) Connect() error { if fw, ok := resp["firmware"].(string); ok { d.info.FirmwareVer = fw } + // Bridge reports whether firmware was freshly loaded during this connect. + // Freshly loaded firmware = clean state → no reset needed. + // Firmware already present (残留 from previous session) → must reset to + // avoid Error 15 SEND_DATA_TOO_LARGE on first inference. + freshFirmware, _ := resp["fresh_firmware_loaded"].(bool) d.mu.Unlock() - // First connect after server start: reset device to clear stale models. + // First connect after server start: reset device to clear stale session. // - // BOTH KL520 and KL720 需要 reset: + // Why reset is needed: + // - KL720: flash-based,firmware 和 model 保留在 flash,reset 清 stale + // model 才有意義。 + // - KL520: USB Boot / RAM-based。若 session 間 firmware 殘留(不是剛載 + // 的 Comp/U),直接 load_model + inference 100% 炸 Error 15。必須 + // reset → Loader → reload firmware → Comp/U 得到乾淨 session。 // - // - KL720 是 flash-based 裝置,firmware 和 model 會保留在 flash,reset - // 清 stale model 才有意義。 - // - // - KL520 雖然是 USB Boot 裝置(RAM-based firmware,斷電即清),理論上 - // 每次 connect 是 clean state。但實測發現若 session 間 firmware 殘留 - // (fw=KDP2 Comp/U 而非 Loader),直接走 load_model + inference 會 - // 100% 炸 ApiKPException Error 15 (SEND_DATA_TOO_LARGE)。只有走 - // reset → reboot 到 Loader → 重新載 firmware 到 Comp/U 的完整流程, - // 才能得到能正常 inference 的 session。 - // - // 成本:KL520 reset + firmware load + reconnect ~15-20s(macOS 實測)。 - // Windows 上可能更久;若 HTTP connect timeout 60s 不夠,需調高或改 - // 非同步 connect pattern。 - if needsReset { - d.driverLog("INFO", "[kneron] first connect after server start — resetting %s to clear stale session...", d.chipType) + // Why we skip reset when freshFirmware=true: + // - 這次 connect 內部剛做過完整 firmware load → Comp/U 是新鮮乾淨的。 + // 再做 reset 會再砍掉 reload 一次,浪費 30-60s 沒意義。 + // - Windows cold boot 情境最常見(device 斷電後第一次 connect)— + // 省下 restartBridge 的 ~65s 代價。 + skipReset := freshFirmware + if needsReset && !skipReset { + d.driverLog("INFO", "[kneron] first connect — resetting %s to clear stale session (firmware was already present)...", d.chipType) if err := d.restartBridge(); err != nil { d.driverLog("WARN", "[kneron] reset on connect failed (non-fatal): %v", err) } else { d.driverLog("INFO", "[kneron] device reset complete — clean state ready") } + } else if needsReset && skipReset { + d.driverLog("INFO", "[kneron] %s: skipping reset — firmware just loaded, session already clean", d.chipType) } return nil diff --git a/local-tool/server/scripts/kneron_bridge.py b/local-tool/server/scripts/kneron_bridge.py index 62ab4a8..f567461 100644 --- a/local-tool/server/scripts/kneron_bridge.py +++ b/local-tool/server/scripts/kneron_bridge.py @@ -864,7 +864,11 @@ def handle_connect(params): kp.core.set_timeout(device_group=_device_group, milliseconds=_timeout_ms) _log(f"set_timeout succeeded") - # Firmware handling — chip-dependent + # Firmware handling — chip-dependent. + # fresh_firmware_loaded is used by Go driver to decide whether to + # skip the post-connect reset (freshly loaded firmware is already + # in a clean state — reset would just waste 30-60s reloading it). + fresh_firmware_loaded = False if "Loader" in fw_str: # Device is in USB Boot (Loader) mode and needs firmware if _device_chip == "KL720": @@ -904,11 +908,14 @@ def handle_connect(params): device_group=_device_group, milliseconds=_timeout_ms ) fw_str = str(target_dev.firmware) + fresh_firmware_loaded = True _log(f"Reconnected after firmware load, firmware: {fw_str}") else: _log(f"WARNING: {_device_chip} firmware files not found, skipping firmware load") else: - # Not in Loader mode — firmware already present + # Not in Loader mode — firmware already present from a previous + # session. This is the state that triggers Error 15 on inference + # without reset, per observed bug. _log(f"{_device_chip}: firmware already present (normal). fw={fw_str}") return { @@ -916,6 +923,7 @@ def handle_connect(params): "firmware": fw_str, "kn_number": f"0x{target_dev.kn_number:08X}", "chip": _device_chip, + "fresh_firmware_loaded": fresh_firmware_loaded, } except Exception as e: