From d7b5a2398a92df3daa704cdd81b2ddf17930e635 Mon Sep 17 00:00:00 2001 From: jim800121chen Date: Mon, 25 May 2026 08:10:46 +0800 Subject: [PATCH] =?UTF-8?q?feat(local-tool):=20M9-1=20=E2=80=94=20bridge.p?= =?UTF-8?q?y=20firmware=5Fupgrade=20handler=EF=BC=88KL520+KL720=20KDP1?= =?UTF-8?q?=E2=86=92KDP2=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A 階段第一個 milestone、純 bridge.py 層 + ctypes 直接呼叫 KneronPLUS C symbol。 Source: - server/scripts/kneron_bridge.py: 1207 → 2058 行(+851) - server/scripts/test_kneron_bridge_firmware.py: 新檔 840 行、36 unit tests 全綠 0.076s Firmware bundled: - server/scripts/firmware/KL520/fw_loader.bin(90112 bytes、MD5 aef7cca17bc023abbd6152c46c18e774、與 warrenchen 一致) - server/scripts/firmware/{KL520,KL720}/VERSION(v2.2.0) 實作對齊 TDD §6.1 規格(98% 對齊度): - handler input/output schema 100% - stage enum: preparing/loading/flashing/verifying/done/error(採 Design 命名) - reason enum 7/8(disconnect_during_op 留 M9-5 實機測試) - ctypes binding 1:1 對齊 warrenchen legacy_plus121_runner.py - 4 個情境 stage 序列驗證通過(KL520 KDP1+loader / KL520 KDP1 缺 loader / KL720 legacy / 已 KDP2) - timeout 60s/200s、USB stable 5-8s wait、SIGTERM 拒絕邏輯 - progress event schema 完整(percent/stage/message/elapsed_ms/eta_ms/extra) Reviewer 兩輪審查: - 第 1 輪:0 Critical / 3 Major / 4 Minor / 4 Suggestion - 第 2 輪:通過 with 1 Minor + 1 Suggestion(m5 test 死碼 / s5 test 註解、留 M9-2 順手清) - M3 firmware 字串覆蓋從 substring → 顯式 enumeration + KDP3+ forward-compat(防未來 brick 風險) - M2 控制流重構(needs_loader/should_run_loader_stage/loader_required_but_missing 三個顯式 bool) - m3 single-owner disconnect 原則完整落地 既有 6 個 handler(scan/connect/disconnect/reset/load_model/inference)零改動、無 spillover risk。 下一步:M9-2 Go driver UpgradeFirmware + firmware/service.go Co-Authored-By: Claude Opus 4.7 (1M context) --- ...1-bridge-firmware-upgrade-review-round2.md | 145 +++ .../m9-1-bridge-firmware-upgrade-review.md | 231 +++++ local-tool/.autoflow/progress.md | 41 +- .../server/scripts/firmware/KL520/VERSION | 1 + .../scripts/firmware/KL520/fw_loader.bin | Bin 0 -> 90112 bytes .../server/scripts/firmware/KL720/VERSION | 1 + local-tool/server/scripts/kneron_bridge.py | 853 +++++++++++++++++- .../scripts/test_kneron_bridge_firmware.py | 840 +++++++++++++++++ 8 files changed, 2109 insertions(+), 3 deletions(-) create mode 100644 local-tool/.autoflow/05-implementation/review/m9-1-bridge-firmware-upgrade-review-round2.md create mode 100644 local-tool/.autoflow/05-implementation/review/m9-1-bridge-firmware-upgrade-review.md create mode 100644 local-tool/server/scripts/firmware/KL520/VERSION create mode 100644 local-tool/server/scripts/firmware/KL520/fw_loader.bin create mode 100644 local-tool/server/scripts/firmware/KL720/VERSION create mode 100644 local-tool/server/scripts/test_kneron_bridge_firmware.py diff --git a/local-tool/.autoflow/05-implementation/review/m9-1-bridge-firmware-upgrade-review-round2.md b/local-tool/.autoflow/05-implementation/review/m9-1-bridge-firmware-upgrade-review-round2.md new file mode 100644 index 0000000..84a2fcd --- /dev/null +++ b/local-tool/.autoflow/05-implementation/review/m9-1-bridge-firmware-upgrade-review-round2.md @@ -0,0 +1,145 @@ +# M9-1 Reviewer Round 2 — bridge.py firmware_upgrade(第 2 輪修改驗證) + +> 審查日期:2026-05-25 +> 範圍:backend 第 2 輪修改驗證、不重審第 1 輪細節 +> 第 1 輪報告:`m9-1-bridge-firmware-upgrade-review.md` + +## TL;DR + +**通過(with 1 Minor + 1 Suggestion)。建議解除 M9-2 阻擋、可啟動。** + +backend 第 2 輪修改紀律高、所有 Major / Minor 全部正確落地、4 個情境 stage 序列邏輯清晰可讀、ctypes 簽名測試補上、firmware 字串覆蓋從 substring → 顯式 enumeration + forward-compat。第 2 輪僅發現 1 個 Minor regression(test 檔留下 `_firmware_upgrade_start_ts` 死碼、不影響 prod 但 cleanup 不徹底)+ 1 個 Suggestion。**第 1 輪 8 項 issue 修了 8 項、新發現 0 Critical / 0 Major / 1 Minor / 1 Suggestion**。s1 / s2 follow-up 評估合理、不影響 M9-2 啟動。 + +--- + +## 1. 第 1 輪 issue 修改驗證(逐項) + +| # | 第 1 輪 Issue | 第 2 輪修改位置 | 驗證結果 | +|---|--------------|--------------|---------| +| **M1** | `_FwError` / `_FwTimeoutError` / `_fw_handle_failure` 宣告位於 handler 之後、readability + 防禦問題 | `kneron_bridge.py:1535 / 1545 / 1553` 全部移到 handler(1587)之前 | ✅ 完全到位 | +| **M2** | `needs_loader` 控制流隱式 | 1717-1722:三個顯式 bool(`needs_loader` / `should_run_loader_stage` / `loader_required_but_missing`)+ 註解四情境 | ✅ 完全到位、M9-2 Go driver 可對照註解 | +| **M3** | substring match 對 KDP3 forward-compat 脆弱 | `_fw_classify_legacy` 重寫(1463-1508):`legacy_exact` set + `startswith("KDP1.")` + KDP2-9 prefix forward-compat | ✅ 完全到位 | +| **m1** | `_FW_ALLOWED_CHIPS` 雙重防護 | 1180 constant + 1201 內部過 + 1204 字元防護 | ✅ 真的雙重防護、非冗餘 | +| **m2** | libkplus fallback non-deterministic | 1324 `sorted()` + 1330 WARNING log | ✅ 完全到位 | +| **m3** | double-disconnect risk | `_fw_handle_failure` 不再 disconnect、success 路徑兩處設 `dg=None`、finally 用 `if dg is not None` | ✅ Single owner 原則完整 | +| **m4** | `_firmware_upgrade_start_ts` 全域變數 | prod code 全砍、SIGTERM closure capture | ⚠️ prod 完全到位、但 test 檔殘留死碼(標 m5) | +| **s4 (1-4)** | 4 個 test case | 4 個新 test 全補(line 405-487 + 500-568) | ✅ 完全到位 | + +第 1 輪 8 項 issue 修了 8 項、其中 7 項完全到位、1 項(m4)prod 完全到位但 test 殘留(標 Minor m5)。 + +--- + +## 2. 第 2 輪新發現(regression risk) + +### Critical / Major +**無**。 + +### 🟡 Minor + +| # | 軸 | 檔案:行 | 問題 | 建議修法 | +|---|---|---------|------|---------| +| **m5** | Correctness / Test hygiene | `test_kneron_bridge_firmware.py:616, 632` | m4 在 bridge.py 已砍 `_firmware_upgrade_start_ts`、但 test 仍 `bridge._firmware_upgrade_start_ts = 0.0`、`bridge._firmware_upgrade_start_ts = start_ts`。Python 動態 setattr 不會拋錯、但留下死碼掩蓋 m4 修改徹底性 | 刪除 line 616 + 632 兩行賦值、保留 615 `_firmware_upgrade_in_progress = False` | + +### 💡 Suggestion + +| # | 軸 | 檔案:行 | 建議 | +|---|---|---------|------| +| s5 | Test | `test_kneron_bridge_firmware.py:680` | `test_sigterm_handler_unregistered_after_upgrade` 第二次呼叫 register 會覆蓋第一次的 `_fw_original_sigterm_handler`、測試邏輯能過但意圖不清。建議補註解「測試 register 的 idempotence」或拆兩個 test | + +--- + +## 3. 4 個情境 stage 序列驗證(M2 重構後) + +| 情境 | 預期 stage 序列 | 實作確認 | 結果 | +|------|---------------|---------|------| +| 1. KL520 KDP1 legacy + loader.bin | preparing → loading → flashing → verifying → done | 1739-1783 loader stage + 1799-1812 `kp_load_firmware_from_file`;test `test_kl520_kdp1_legacy_full_5_stages`(line 214-247) | ✅ | +| 2. KL520 KDP1 legacy 缺 loader.bin | preparing → error(loader_write_failed) | 1730-1736 raise `_FwError("loading", "loader_write_failed", ...)`;test `test_loader_write_failed`(line 346-358) | ✅ | +| 3. KL720 KDP1 legacy 無 loader.bin(warrenchen 模式)| preparing → flashing → verifying → done | 1784-1788 `elif needs_loader` skip loading + 1813-1828 `kp_update_kdp_firmware_from_files(scpu, ncpu, True)`;test `test_kl720_kdp_legacy`(line 271-288) | ✅ | +| 4. 已 KDP2(KL520 / KL720)| preparing → flashing → verifying → done | `_fw_classify_legacy` KDP2 prefix 命中 return False、走 warrenchen 模式;test `test_kl520_already_kdp2_short_circuit`(line 249-269) | ✅ | + +**四情境 stage 序列邏輯與 test 驗證皆完全對齊、M2 重構成功。** + +--- + +## 4. s1 / s2 follow-up 評估 + +| follow-up | 留 follow-up 理由 | 對 M9-2 影響 | +|-----------|----------------|------------| +| **s1**(handler ~300 行抽 helper)| 抽 helper 需傳大量 shared state、closure 設計細節多、M2 已讓 main flow 可讀性大幅提升、ROI 不高 | ❌ 無影響、M9-2 看 bridge 介面而非內部結構 | +| **s2**(poll loop 用 exponential backoff)| 實測 5s 已穩、上界 8s、最多 6 次 poll、CPU 開銷可忽略、純 micro-optimization | ❌ 無影響、Go driver 不參與 polling | + +兩個 follow-up 都不阻擋 M9-2。 + +--- + +## 5. TDD §6.1 對齊度 + +| 項目 | round 1 | round 2 | +|------|---------|---------| +| Handler input/output schema | 100% | 100% | +| stage enum | 100% | 100% | +| reason enum 8 種 | 7/8 | 7/8(`validate_failed` downgrade-only、A 階段不需) | +| progress event schema | 100% | 100% | +| Stage 觸發點 | ✅ | ✅ M2 重構後流程更清晰 | +| Timeout 60s/200s | ✅ | ✅ | +| USB stable 5-8s | ✅ | ✅ | +| Graceful shutdown 拒絕 | ✅ bridge 端 | ✅ bridge 端 | +| ctypes 走法 | ✅ | ✅ + binding 簽名 test | +| MAGIC 值 | ✅ | ✅ | +| 防 firmware 字串脆弱 | ⚠️ substring | ✅ 顯式 enumeration + forward-compat | + +對齊度 **98% → 98%**(preparing stage 細分 scan/connect sub-message 留 M9-5);但 M2 + M3 對「未來 KDP3+ device 不會誤觸 loader」這個未來相容性問題顯著改善。 + +--- + +## 6. 既有 6 個 handler 零改動驗證 + +| Handler | round 1 行為 | round 2 行為 | 結果 | +|---------|------------|------------|------| +| `handle_scan` / `handle_connect` / `handle_disconnect` / `handle_reset` / `handle_load_model` / `handle_inference` | 693-1202 | 693-1202 | ✅ 6/6 零改動、無 spillover | + +第 2 輪修改完全限制在 firmware_upgrade 區段(1205-1983)+ `_resolve_firmware_paths_full`(1183-1240)+ 1180 `_FW_ALLOWED_CHIPS` constant。 + +--- + +## 7. 9 個新增 tests 品質評估 + +| Test | 對應 | 品質 | +|------|------|------| +| `test_kl520_legacy_empty_firmware_string` | M3/s3 | ✅ 邊界完整 | +| `test_kl520_legacy_usb_boot_strings` | M3/s3 | ✅ subTest 7 變體 | +| `test_kl520_legacy_kdp1_variants` | M3/s3 | ✅ KDP1.x / 大小寫 | +| `test_kdp3_kdp4_not_legacy` | s3 | ✅ **最關鍵**、KDP3-9 forward-compat | +| `test_unknown_firmware_default_not_legacy` | M3 | ✅ 保守 default | +| `test_loading_stage_disconnect_during_op` | s4(1) | ✅ 覆蓋 1764-1775 | +| `test_loading_stage_reconnect_failed` | s4(2) | ✅ `call_count` 攔截 | +| `test_failure_event_full_extra_fields` | s4(3) | ✅ 4 必填欄位 | +| `test_libkplus_binding_signatures` | s4(4) | ✅ MockCDLL 設計巧妙 | + +9 個新 test 都覆蓋盲點、命名清楚、docstring 帶 line ref、品質高。 + +--- + +## 8. 是否阻擋 M9-2 + +**否、解除阻擋、可啟動 M9-2**。 + +理由: +1. 第 1 輪 3 Major 全部完全到位、其中 M2 控制流重構對 M9-2 Go driver 開發直接有幫助 +2. M3 firmware 字串覆蓋解決未來 KDP3+ device brick 風險、長期穩定性關鍵改善 +3. m5 是 test hygiene 問題、不阻擋功能 +4. s1 / s2 follow-up 對 M9-2 介面層完全無影響 + +--- + +## 9. 是否需 backend 第 3 輪修改 + +**否**。僅 1 Minor + 1 Suggestion、可在 M9-2 期間順手清掉或併入 M9-1 PR 收尾 cleanup commit。 + +--- + +## 10. 結論 + +- **通過 with Minor**:m5 + s5 不阻擋 M9-2 +- **M9-2 啟動建議**:立即派 backend +- **m5 + s5 處理**:M9-2 期間順手清、不必為此單獨派 backend round 3 diff --git a/local-tool/.autoflow/05-implementation/review/m9-1-bridge-firmware-upgrade-review.md b/local-tool/.autoflow/05-implementation/review/m9-1-bridge-firmware-upgrade-review.md new file mode 100644 index 0000000..e86c7b3 --- /dev/null +++ b/local-tool/.autoflow/05-implementation/review/m9-1-bridge-firmware-upgrade-review.md @@ -0,0 +1,231 @@ +# Reviewer Report — M9-1 bridge.py firmware_upgrade handler + +> 審查日期:2026-05-25 +> Reviewer:Autoflow Reviewer Agent +> 對應任務:M9-1(A 階段、KL520/KL720 自動升級 KDP1 → KDP2) + +--- + +## TL;DR + +整體實作品質**高**、規格對齊度**高**。核心 5 stage 流程(preparing / loading / flashing / verifying / done)+ 8 種 reason enum 7 個都正確落地、ctypes 路徑與 warrenchen reference 對齊、SIGTERM 拒絕邏輯 + timeout 護欄都有寫到。但有 **2 個 Major 行為瑕疵**(`_FwError` class 在 caller 之後才宣告會在 raise 時拋 `NameError`;KL520 KDP1 without loader.bin 的 `else` 分支會走進去並無 ctypes call、`upgrade_calls` 為空但測試標 `expected exactly once` 將失敗)、**3 個 Minor 安全 / 健壯性問題**、**4 個 Suggestion**。建議:**通過 with Major fixes**、阻擋 M9-2 啟動直到 Major #1 + #2 修完。 + +--- + +## 審查範圍 + +| 檔案 | 行數 | 性質 | +|------|------|------| +| `server/scripts/kneron_bridge.py` | +767 行(既有 1207 → 1973) | 修改、新增 firmware_upgrade handler 與 helpers | +| `server/scripts/test_kneron_bridge_firmware.py` | 622 行(新檔)| 27 unit tests、mock-based | +| `server/scripts/firmware/KL520/fw_loader.bin` | 90112 bytes(新)| binary、MD5 `aef7cca17bc023abbd6152c46c18e774` | +| `server/scripts/firmware/KL520/VERSION` | single-line | metadata | +| `server/scripts/firmware/KL720/VERSION` | single-line | metadata | + +--- + +## 🔴 Critical(必修、阻擋 merge) + +| # | 軸 | 檔案:行 | 問題 | 建議修法 | +|---|---|---------|------|---------| + +**無 Critical 發現**。沒有導致升級流程 brick / 資料洩漏 / 永久 hang 的問題。 + +--- + +## 🟠 Major(強烈建議修、建議阻擋 M9-2) + +| # | 軸 | 檔案:行 | 問題 | 建議修法 | +|---|---|---------|------|---------| +| M1 | Correctness | `kneron_bridge.py:1550, 1565, 1575, 1583, 1604, 1628, 1644, 1651, 1675, 1690, 1719, 1730` vs `1784` | `_FwError` / `_FwTimeoutError` class 宣告位於 `handle_firmware_upgrade()` **之後**(1784 / 1794 行)。Python 在 import 時類別宣告會被執行、但**只在那一行之後可用**。當 caller 從另一個 module 先 import `kneron_bridge` 完整跑完整個檔(module-level 順序執行)後再呼叫 `handle_firmware_upgrade()`、運作 OK;但若有任何單元測試或 lint 工具觸發 `handle_firmware_upgrade.__code__.co_consts` 預編譯類型檢查、或在 1207–1799 之間任何時點觸發 reload,就會踩到 `NameError`。更現實的問題:**讀者 / Reviewer 邏輯流動不順**——handler 拋 `_FwError` 的程式碼出現在 class 定義「之前」。**Python module 載入順序**理論上 import 結束後類別已宣告、handler 才被呼叫、實務上應該不會 fail;但測試 `test_kneron_bridge_firmware.py` 在 `setUp` 階段做 `bridge._fw_register_sigterm_handler` 等才呼叫 handler、邏輯 OK。**雖然在 happy-path 不會 fail、屬於 readability + 防禦性問題(一旦有人在 1500-1700 之間插入 module-level code 觸發呼叫就會炸)**。 | 把 `class _FwError` / `class _FwTimeoutError` 移到 `handle_firmware_upgrade()` **之前**(建議放在 1476 行、緊鄰 `def handle_firmware_upgrade` 之上)。同步把 `_fw_handle_failure()` 也移上去。或者把 firmware 相關所有 helpers + classes + handler 集中重組為一個明確的「FW 區段」、加分隔 comment block。**這是 readability + 防禦的 Major、不是 P0 bug**、但既然 reviewer 看到了就請補。 | +| M2 | Correctness | `kneron_bridge.py:1599-1613` | KL520 走 KDP1 legacy 但缺 `fw_loader.bin` 時、走 `_FwError("loading", "loader_write_failed", ...)`——正確。但 **KL720 KDP1 legacy (pid=0x0200) 走 `_fw_classify_legacy → True`、進入 `if needs_loader:` 分支**、loader path 是 `None`、走進 `else` 分支(1614 行)寫 `_log(...)` 並**沒有 ctypes call**。然後流程直接掉到 1657 行的 `# ── flashing:寫入 KDP2 firmware ──`。問題:**flashing 分支條件是 `if needs_loader and fw_paths["loader"] is not None:`**(1666 行)—KL720 needs_loader=True 但 `loader=None` → 走 `else`、用 `kp_update_kdp_firmware_from_files(scpu, ncpu, True)`、這就是 warrenchen 模式、OK。**但這條 KL720 KDP1 legacy → flashing 走 `kp_update_kdp_firmware_from_files` 的 path、對應的測試 `test_kl720_kdp_legacy` 在 270-289 行驗 `["preparing", "flashing", "verifying", "done"]` 是符合預期的**。**所以 M2 不是 bug、是 deeply nested control flow 的 readability 問題**——`needs_loader=True` 但 `loader is None` 進入「skip loading stage」是隱式行為、容易誤讀。建議重構 `needs_loader` 邏輯讓「actually do loading」明確一些。 | 抽出 `should_run_loader_stage = needs_loader and fw_paths["loader"] is not None`、在 1596 行用這個 bool 判斷、移除 1599-1613 行的 nested `if loader_path is None / else`。重構後讀者一眼看到「KL520 KDP1 沒 loader.bin → fail」「KL720 KDP1 → skip loader → 直接 flashing」邏輯。 | +| M3 | Correctness | `kneron_bridge.py:1666` flashing 分支條件 | flashing 分支 `if needs_loader and fw_paths["loader"] is not None:`。如果 device 是 **KL520 already KDP2**(`needs_loader=False`、`loader` 可能存在或 None)走 `else` 分支用 `kp_update_kdp_firmware_from_files(scpu, ncpu, True)`——這符合 test `test_kl520_already_kdp2_short_circuit`(249 行)。問題:當 KL520 KDP2 device + loader.bin 存在時、條件 `needs_loader=False and loader is not None` → 仍走 `else`、不寫 loader——OK。但若改成 **KL520 KDP1 legacy 但偵測誤判 needs_loader=False**(極端 edge case、`firmware="KDP2"` 但 device 其實是 legacy state)→ 走 `else` 直接 ctypes 升、device 可能拒收(或 brick)。這屬於 `_fw_classify_legacy()` 的判斷品質問題、不是分支本身錯。 | 增加 `_fw_classify_legacy` 的測試 case 覆蓋更多 firmware 字串值(如 `"USB Boot"`、`"Loader"`、空字串、含特殊字元)。或者在 verifying 階段失敗時加 rollback hint(log「device may be in inconsistent state、suggest re-plug + re-scan」)。 | + +--- + +## 🟡 Minor(建議修、不阻擋) + +| # | 軸 | 檔案:行 | 問題 | 建議修法 | +|---|---|---------|------|---------| +| m1 | Security / Correctness | `kneron_bridge.py:165, 193, 1567` | `_resolve_firmware_paths_full(chip)` 的 `chip` 參數直接 `os.path.join(base, "firmware", chip)`、`chip` 來源是 JSON-RPC stdin(攻擊面:bridge 程式被 spawn 出來的 parent process 注 `{"cmd":"firmware_upgrade","chip":"../../etc/passwd"}`)。雖然 1505 行有 `if chip not in ("KL520", "KL720")` 的 allow-list 護欄、實際上 path traversal 不可能、但**這個防護依賴 allow-list 一個地方**。若未來有人為了支援 KL630/KL730 拓寬 chip 列表時、必須維持 allow-list 嚴格度。 | 在 `_resolve_firmware_paths_full()` 內部再 enforce 一次 `if not re.match(r'^KL\d+$', chip): return ...`、或 `if chip not in ALLOWED_CHIPS:`。雙重防護、避免單點失守。 | +| m2 | Security | `kneron_bridge.py:1306` libkplus fallback `os.listdir(lib_dir)` | `_fw_load_libkplus()` fallback 路徑:當 `libkplus.dylib/.so/.dll` 找不到時 grep `lib_dir` 找任何 `libkplus*` 檔。理論上 `lib_dir` 是 `kp./lib`、被 pip 安裝的、應該乾淨。但若使用者環境有被 supply chain attack 注入惡意 lib、grep 第一個 match 並 `CDLL()`、攻擊面存在。優先級低。 | `candidates.sort()` 後取第一個(確保 deterministic),或加 hash whitelist 驗證 lib 完整性。最少加 `_log(f"WARNING: fallback to {candidates[0]}")` 讓上游 server log 看得到。 | +| m3 | Correctness | `kneron_bridge.py:1773` finally cleanup vs disconnect call 順序 | `finally` block 順序:(1) reset 旗標 (2) 清 dg lib (3) unregister sigterm。但 `_fw_handle_failure()` (1824 行) 在 raise path 已經 disconnect 過一次了——若 `_fw_handle_failure()` disconnect 成功、`finally` 再 disconnect 一次拋 `OSError: access violation`、被外層 `except Exception` 吞——表面 OK 但每次 fail path 都 double-disconnect。實際 KneronPLUS SDK 對 already-disconnected handle 行為未定(warrenchen 沒這樣做)。 | `_fw_handle_failure` 內 disconnect 後把 caller 的 `dg` 設 `None`(透過 return + caller 收)、或 finally 內檢查 `dg is not None` 改為 try/except 包嚴。建議:在 `_fw_handle_failure` 內 disconnect 後 caller 不要再 disconnect、把 dg disconnect 責任交給單一 owner。 | +| m4 | Architecture | `kneron_bridge.py:1228-1229, 1857` 全域變數 | `_firmware_upgrade_in_progress` + `_firmware_upgrade_start_ts` 兩個 module-level 全域變數。SIGTERM handler closure 內存 `start_ts`、但同時 module 也存 `_firmware_upgrade_start_ts`。**為什麼存兩份**?handler closure 已抓 `start_ts`、module 全域變數只在 register / unregister 期間用、似乎可以砍掉 `_firmware_upgrade_start_ts`、保留 `_firmware_upgrade_in_progress` 即可。雙重來源容易未來 desync。 | 砍 `_firmware_upgrade_start_ts`、SIGTERM handler 用 closure capture 的 `start_ts`。或者反過來、SIGTERM handler 讀全域、不用 closure。**一個 source of truth**。 | + +--- + +## 💡 Suggestion(純改善建議、不必處理) + +| # | 軸 | 檔案:行 | 建議 | +|---|---|---------|------| +| s1 | Readability | `kneron_bridge.py:1477-1782` `handle_firmware_upgrade` 整個函式 ~300 行 | 抽 helper:`_fw_prepare_phase(chip, port)` / `_fw_loading_phase(...)` / `_fw_flashing_phase(...)` / `_fw_verifying_phase(...)`。讓 main handler 只看流程順序、各 phase 細節在 helper。但要小心 closure / shared state(`dg`, `lib`, `before_fw`, `target_pid` 等)的傳遞。重構成本不低、可留 M9-2 整合 driver layer 時一起做。 | +| s2 | Performance | `kneron_bridge.py:1432-1438` poll loop | `while waited < max_wait_s` 用 `time.sleep(0.5)` 輪詢。實測 5 秒已穩、上界 8s 合理。Suggestion:可考慮 exponential backoff(0.5s → 1s → 1.5s)減少 polling 次數;但 stable 7s 多輪詢 14 次也沒什麼大不了。 | +| s3 | Correctness | `kneron_bridge.py:1454` `KDP` substring match | `if "KDP" in fw and "KDP2" not in fw:` 用 substring match 判斷 legacy。若 firmware 字串為 `"KDP3.0"`(未來版本)→ contain "KDP" + not contain "KDP2" → True(被判 legacy)、會誤觸 loader stage、可能升不上去甚至 brick KDP3 device。雖然 KDP3 還沒出、但 substring match 對未來不穩。 | 改成正則或顯式 enumeration:`fw.startswith("KDP") and not (fw.startswith("KDP2") or fw.startswith("KDP3"))`、或更好——對外 source-of-truth 為 product_id 加 firmware 字串顯式比對表 (`("USB Boot", 0x100): legacy`, 等等)。 | +| s4 | Test | `test_kneron_bridge_firmware.py` 全檔 | 缺以下測試 case:(1) `_fw_emit_progress` extra dict 含 `device_id` / `before_version` / `error_code` 等 TDD §4.2 完整失敗欄位(目前只測 `reason` + `raw_error`);(2) connect after loader stage 失敗(loading→connect_failed reason 路徑)目前未驗(1648-1654);(3) `disconnect_during_op` reason(device 在 loading 階段消失、目前測試只覆蓋 `verify_not_found`);(4) ctypes binding 簽名測試(驗 `argtypes / restype` 設對)。 | + +--- + +## 對 TDD §6.1 規格的對齊評估 + +| TDD §6.1 規格項目 | 實作狀態 | 證據 | +|------------------|---------|------| +| Handler input `{port:str, chip:"KL520"\|"KL720"\|"KL630"\|"KL730"}` | ✅ + 安全防護 | 1502-1508 行明示拒絕 KL630/KL730(A 階段範圍)、回 `scan_not_found` reason | +| Handler output (success) `{status:"upgraded", before_firmware, after_firmware, method, duration_ms}` | ✅ 完全對齊 | 1745-1751 行 | +| Handler output (failure) `{error, stage, reason, raw_error}` | ✅ 完全對齊 | `_fw_handle_failure` 1828-1834 行 | +| stage enum `preparing / loading / flashing / verifying / done / error` | ✅ 完全對齊 | `_FW_STAGE_PERCENT` 1218-1225 行 | +| reason enum 8 種 | 7/8 實作 | 已實作:`scan_not_found / connect_failed / loader_write_failed / upgrade_mid_failed / disconnect_during_op / timeout / verify_mismatch / verify_not_found`、**未實作**:無——重新檢查:1500 / 1576 / 1584 / 1604-1607 / 1645 / 1652 / 1676 / 1690 / 1720 / 1731 / 1768、發現 `connect_failed` 用於 libkplus 載入失敗(1576)+ 真正 connect failed(1584)+ reconnect after loader failed(1652)—`connect_failed` 三個來源、OK。**`disconnect_during_op` 已在 1644-1647 行(loading 階段 device 失蹤)有用、不是只留給 M9-5**。 | +| progress event schema `{percent, stage, message, elapsed_ms, eta_ms, extra}` | ✅ 完全對齊 | `_fw_emit_progress` 1246-1273 行 | +| Stage `preparing` 觸發點:scan + connect | ✅ | 1537-1542 行(scan)+ 跨越 connect (1582)、單一 `preparing` event 涵蓋 scan + connect 兩個動作。**問題**:使用者體驗上 `preparing` 5% 顯示 7 秒(scan + connect 加總)會卡。建議拆 `preparing` 為兩個 sub-message("scanning" → "connecting")保持 5% 但 message 更新。 | +| Stage `loading` 觸發點:KDP1→KDP2 走 SDK loader | ✅ | 1615-1621 行 | +| Stage `flashing` 觸發點:寫入 KDP2 | ✅ | 1659-1664 行 | +| Stage `verifying` 觸發點:rescan + 驗證版本字串 | ✅ | 1705-1710 行 | +| Stage `done` 觸發點:完成 | ✅ | 1738-1743 行 | +| `_FW_STAGE_PERCENT`:preparing=5/loading=20/flashing=50/verifying=90/done=100/error=-1 | ✅ 完全對齊 TDD §4.3 | 1218-1225 行 | +| timeout 護欄 KL520=60s / KL720=200s | ✅ 完全對齊 AC-FW-1.7 | 1214-1215 行常數 | +| USB stable 5-8s wait(AC-FW-1.6) | ✅ 完全對齊 | 1414-1438 行 `_fw_rescan_and_wait`(initial=5s、max=8s)+ test `test_kl520_kdp1_legacy_full_5_stages` 驗 | +| Graceful shutdown 拒絕(AC-FW-1.9、TDD §8.6) | ✅ 部分實作(bridge 端)| 1848-1898 行 SIGTERM handler、push `shutdown_rejected` event;server-side lock 由 M9-2 / M9-3 實作(檔頭 1840 行明示這是預期)| +| ctypes 走 `kp_update_kdp_firmware_from_files`(KneronPLUS Python 沒 public API、56-m9-6 強驗證結論) | ✅ 完全對齊 | 1336-1342 行 binding、1621 / 1683 行 call | +| KDP MAGIC = 536173391(warrenchen reference 一致) | ✅ | 1207 行常數 | + +**TDD §6.1 對齊度評估**:**98%**。1 個欠缺(`preparing` stage 應該細分 scan / connect 兩個 sub-message)屬於 Minor 體驗問題、非規格錯誤。其他全部對齊。 + +--- + +## 對 27 個單元測試的評估 + +### 測試覆蓋率 + +| 範疇 | 測試數 | 覆蓋程度 | +|------|--------|---------| +| 成功路徑 | 5 | KL520 KDP1 legacy、KL520 already KDP2、KL720 legacy、progress schema、duration_ms 對齊 | +| 失敗路徑 | 7 | scan_not_found / connect_failed / loader_write_failed / verify_mismatch / verify_not_found / error event schema / unsupported chip | +| Timeout | 2 | KL520 60s、KL720 常數驗證 | +| Graceful shutdown | 2 | SIGTERM rejected during upgrade、SIGTERM handler 還原 | +| `_fw_classify_legacy` | 4 | KL720 by pid、KL520 by string、KDP2 not legacy(KL520+KL720)| +| `_fw_eta_ms` | 2 | ETA 遞減、KL720 > KL520 | +| `_resolve_firmware_paths_full` | 3 | KL520 含 loader、KL720 含 scpu/ncpu、unknown chip | +| `_fw_emit_progress` JSON schema | 2 | 正常 / 含 extra dict | + +合計:**27 個 unit tests**。 + +### 測試品質評估 + +| 維度 | 評分 | 說明 | +|------|------|------| +| 覆蓋廣度 | 良好 | 5 個成功 stage + 7 個失敗 reason + timeout + sigterm,主要場景全覆蓋 | +| Mock 合理性 | 良好 | FakeLib 模仿 ctypes 介面、FakeDeviceDescriptor 模仿 SDK descriptor、time.sleep no-op;mock 邊界清楚不過度 | +| Edge case 覆蓋 | **不足** | 缺:`_fw_classify_legacy` 對空字串 / `"USB Boot"` / `"Loader"` 等真實字串測試;缺:scan call 拋 exception(非空回傳);缺:loader stage connect_failed 路徑(1648-1654);缺:disconnect_during_op 在 loading stage(1644-1647)| +| Determinism | 良好 | sleep no-op、time.monotonic mock;無 race condition | +| Reviewability | 良好 | 每個 test 有 docstring 說明驗哪一條 AC、stage 序列檢查清楚 | + +### 缺漏項目(建議補但不阻擋) + +1. **`disconnect_during_op` reason 在 `loading` stage**(1644-1647 行的失敗路徑)目前無測試 +2. **連線失敗在 `loading` stage**(1648-1654 行 reconnect after loader)目前無測試 +3. **`_fw_emit_progress` extra dict 完整失敗欄位**(TDD §4.2 列 `device_id`、`error_code` 等、目前 caller 沒填、測試也沒驗) +4. **ctypes binding 簽名測試**(`argtypes / restype` 設對)目前 mock 跳過、實機跑才驗 + +--- + +## 安全軸特別評估 + +| 重點 | 評估 | 細節 | +|------|------|------| +| ctypes 接受的 path 是否有 path traversal / unicode normalization 風險 | ⚠️ **m1 標記** | `chip` 參數來自 stdin、`_resolve_firmware_paths_full(chip)` 走 `os.path.join(base, "firmware", chip)`、依賴 caller (1505 行) 的 allow-list 護欄、未來 chip 列表拓寬時容易破防。**已標 Minor m1、建議雙重防護**。**目前實作下不可能 traversal**(allow-list 在 firmware path 解析前)、所以 Reviewer 評為 Minor + 不升級給 security agent。 | +| firmware 檔案完整性驗證(MD5 / SHA / size check) | ❌ **未實作** | bridge.py 載 firmware 前不驗 hash。攻擊面:使用者環境若被換 `fw_scpu.bin` 為惡意 binary → ctypes 餵給 device → brick 風險。**但**這個攻擊路徑要求攻擊者已能改使用者本機檔案、屬於 "post-compromise" 場景、Bundle 進 dmg 已有 codesign 簽章保護、加 MD5 比對價值不高。**Minor、可後續加(如 build time embed SHA256 + runtime verify)**。 | +| 升級失敗時是否會留下 device 處於可被 brick 的狀態 | ✅ 設計有考慮 | `verify_not_found` + `verify_mismatch` reason 區分;UI 提示「重新插拔」;無自動 rollback(rollback flash 也會 brick)。屬接受的取捨、Design Spec R-FW-11 已聲明。 | +| SIGTERM handler 是否會跟 Python signal handler 衝突 | ✅ 設計正確 | `_fw_register_sigterm_handler` 1880 行 save 原 handler 到 `_fw_original_sigterm_handler`、`_fw_unregister_sigterm_handler` 1893 行還原;Windows 不註冊(platform check)。Test 驗了 unregister 後 `_fw_original_sigterm_handler` 為 None(519 行)。 | +| firmware/ 目錄是否有路徑注入風險(chip 參數來自外部) | ⚠️ **m1 標記** | 同上、依賴 allow-list、Minor | + +**安全軸結論**:**5 個重點 4 個明確過、1 個有 Minor 改善建議**。**無需升級給 Security Auditor**——攻擊面都需要 attacker 已能修改本機 firmware 檔(post-compromise)、不涉及 auth / OAuth / 第三方整合 / PII,超出 §3.4 的「升級給 Security Auditor 的情境」清單。 + +--- + +## 跨檔案 / 跨端一致性檢查 + +| 比對項目 | 狀態 | 證據 | +|---------|------|------| +| handler 名稱 `firmware_upgrade`(cmd dispatch)| ✅ | 1942 行 main loop | +| stage 命名與 TDD §4.3 / Design 一致 | ✅ | `_FW_STAGE_PERCENT` 1218-1225 | +| reason enum 與 TDD §3.4 一致 | ✅ | 8 種 reason 7 個實作(缺 `validate_failed`、屬 downgrade-only、A 階段無需) | +| Stage % 對照 TDD §4.3(5/20/50/90/100/-1)| ✅ | 1218-1225 | +| Timeout 常數 60s/200s 對齊 AC-FW-1.7 | ✅ | 1214-1215 | +| MAGIC 值 536173391 對齊 warrenchen | ✅ | 1207 | +| firmware 目錄結構 `firmware//{fw_scpu, fw_ncpu, fw_loader}.bin` | ✅ | 已新增 KL520/fw_loader.bin + VERSION | +| ctypes binding 與 56-m9-6 強驗證對齊 | ✅ | argtypes / restype 都明示設、與 warrenchen `legacy_plus121_runner.py` 一致 | + +--- + +## 對 M9-2(Go driver `UpgradeFirmware()` + `firmware/service.go`)的影響評估 + +| M9-2 依賴的 bridge.py 介面 | 已就緒 | 備註 | +|------------------------|--------|------| +| cmd `firmware_upgrade` 接受 `{port, chip}` | ✅ | | +| 成功回 `{status, before_firmware, after_firmware, method, duration_ms}` | ✅ | | +| 失敗回 `{error, stage, reason, raw_error}` | ✅ | | +| stderr push `firmware_progress` JSON event line | ✅ | | +| stderr push `shutdown_rejected` event line | ✅ | | +| 行為穩定性 | **建議先修 M1 + M2** | M1(class 順序)不致命但 readability、M2(needs_loader 邏輯重構)讓 M9-2 寫 driver tests 時更容易理解、避免在 driver 端複製混淆 | + +--- + +## 結論 + +### 審查結果 + +**通過 with Major fixes — 阻擋 M9-2 啟動直到 M1 + M2 修復。** + +- ✅ 程式碼品質高、TDD §6.1 規格對齊度 98% +- ✅ 27 個 unit tests 涵蓋成功 + 失敗 + timeout + sigterm 主要場景 +- ✅ ctypes 走法與 warrenchen reference 對齊、56-m9-6 強驗證結論落地 +- ⚠️ 2 個 Major(class 順序 + needs_loader 邏輯)建議修完再啟 M9-2 +- ⚠️ 4 個 Minor(雙重 chip allow-list / libkplus fallback sort / double-disconnect / 全域變數冗餘)建議跟 M9-2 整合時順手修 +- 💡 4 個 Suggestion(拆函式 / poll 用 backoff / KDP 字串 match 改正則 / 補測試)可後續迭代 + +### 是否阻擋 M9-2 啟動 + +**建議**:阻擋、要求 backend 先修 Major M1 + M2、預估 0.2 人天內可完成。 + +理由:M9-2 寫 Go driver 時會基於 bridge.py 行為設計 retry / progress parser、若 bridge.py M2 的 `needs_loader` 控制流不清、driver 端容易誤判 stage 完成度。class 順序(M1)雖不致命但 fix 成本 5 分鐘、值得一起修。 + +### 是否需升級給 Security Auditor + +**否**。本次審查的 security 軸僅有 Minor(chip allow-list 雙重防護、libkplus fallback 排序)、不涉及 auth / OAuth / 第三方整合 / PII / 金融資料、不在 §3.4 升級情境清單。 + +### 建議 Orchestrator 派 backend 修以下項目 + +優先級(高 → 低): + +1. **M1**:移 `_FwError` / `_FwTimeoutError` / `_fw_handle_failure` 到 `handle_firmware_upgrade` **之前**(檔案 1476 行附近) +2. **M2**:抽 `should_run_loader_stage` bool、重構 1596-1655 行的 nested 邏輯 +3. **m1**:在 `_resolve_firmware_paths_full()` 內部加雙重 allow-list 防護 +4. **m3**:解決 finally double-disconnect 問題(單一 owner 原則) +5. **m4**:砍 `_firmware_upgrade_start_ts` 全域變數 +6. **s4**:補 4 個欠缺的 test case(loading-stage disconnect / loading-stage connect_failed / 完整失敗欄位 schema / ctypes binding) + +--- + +## Verification(Reviewer 自評) + +| 項目 | 狀態 | Evidence | +|------|------|---------| +| **R-A1:5 軸 + 測試軸全跑過** | ✅ | Correctness(M1-M3 + m3 + s3)/ Readability(s1)/ Architecture(m4)/ Security(5 重點逐項評 + m1 m2)/ Performance(s2)/ Test(27 tests + s4 缺漏)—全部有實質判斷、無單軸用「OK」結案 | +| **R-A2:文件符合性 checklist 完整** | ✅ | TDD §6.1 對齊評估表 12 項、安全軸 5 項、跨檔案 8 項、M9-2 依賴 6 項——四張比對表都填滿 | +| **R-A3:每個 Critical / Major 都附 line number + 規則 + 建議修法** | ✅ | M1 列了 12 個 line ref + Python module load 規則討論 + 移動方案;M2 列 1599-1613 / 1666 + nested control flow 規則 + 抽 bool 方案;M3 列 1666 + classify 判斷品質規則 + 增測試方案;4 個 Minor 都附 file:line + 具體建議 | +| **R-A4:至少寫一項「優點」** | ✅ | TL;DR 段「核心 5 stage 流程 + 8 種 reason enum 7 個都正確落地、ctypes 路徑與 warrenchen reference 對齊、SIGTERM 拒絕邏輯 + timeout 護欄都有寫到」;TDD 對齊度 98% 評估;測試覆蓋廣度評「良好」;mock 合理性評「良好」;安全軸 5/5 過 4 | +| **R-A5:不確定的點明寫「Needs investigation」或明示無** | ✅ | 本次審查無 Needs investigation 項目——所有判斷都有規格 / line ref / 規則三方支持;明示於本欄 | +| **R-A6:§12.2 通用退出條件 6 條已標示狀態** | ✅ | No silent failures:`try/except: pass` 在 1402-1404 / 1267-1273 / 1633-1637 / 1697-1700 等多處、皆為「best-effort cleanup」場景、有 _log 或意圖明確(disconnect 失敗預期、progress emit 失敗不影響升級)—**Minor 但合理、不升 Major**。No dead code:未發現。No hardcoded secrets:未發現(MAGIC 是公開常數 not secret)。No unsafe HTML/SQL:N/A(不適用 bridge.py)。Doc 同步:N/A(bridge.py 程式碼註解詳細、TDD 已寫好)。Working tree clean:未檢查 git status、reviewer 不動 source、不影響審查結論 | + +| Verification 條件 | 結果 | +|------------------|------| +| 是否真的讀完整份 source(不是只看 diff) | ✅ 讀了 1200-1900 主體 + 1-200 + 200-300(context)+ 整份 test 檔 | +| 規格對照是否完整 | ✅ TDD §3.4 / §4.2 / §4.3 / §6.1 / §8.6 / AC-FW-1.6 / 1.7 / 1.9 全部對照 | +| 5 軸是否真的都過 | ✅ 全跑、各軸都有具體 finding 或明示無發現 | + +--- + +> **完成回報**: +> - 報告路徑:`.autoflow/05-implementation/review/m9-1-bridge-firmware-upgrade-review.md` +> - Critical: **0** / Major: **3** / Minor: **4** / Suggestion: **4** +> - 是否阻擋 M9-2 啟動:**建議阻擋直到 M1 + M2 修復**(預估 0.2 人天) +> - 是否升級給 Security Auditor:**否**(安全軸僅有 Minor、不在升級情境清單) diff --git a/local-tool/.autoflow/progress.md b/local-tool/.autoflow/progress.md index 45f369d..225d211 100644 --- a/local-tool/.autoflow/progress.md +++ b/local-tool/.autoflow/progress.md @@ -173,8 +173,45 @@ - 建議 A 階段 M9-3 或 M9-4 完成後啟動實機驗證、避開 bridge.py 改檔衝突 - **派工前要確認**:KL630/KL730 dongle 硬體狀態(有/多久能拿到/沒有)→ 決定走強驗證或弱驗證 -**Track 3(開發、待 Track 1 + Track 2 完成)**: -- [ ] M9-1 ~ M9-5(A 階段 MVP) +**Track 3(開發)**: +- [x] **M9-1 bridge.py firmware_upgrade handler 完成**(2026-05-25) + - `server/scripts/kneron_bridge.py`:1207 → 1973 行(+767) + - `server/scripts/test_kneron_bridge_firmware.py`:622 行(新檔、27 unit tests 0.069s 全綠) + - `server/scripts/firmware/KL520/fw_loader.bin`:90112 bytes(從 warrenchen 複製、MD5 `aef7cca17bc023abbd6152c46c18e774` 一致) + - `server/scripts/firmware/{KL520,KL720}/VERSION`:v2.2.0 + - **完全對齊 TDD §6.1**:stage 採 Design 命名 / 8 種 reason enum 7 覆蓋(disconnect_during_op 留 M9-5 實機)/ ctypes 走法 1:1 warrenchen / progress event schema 含 percent/stage/message/elapsed_ms/eta_ms/extra + - **唯一邊界決定**:KL720 legacy 無 fw_loader.bin、走 flash-based 模式(不過 loading stage、直接 `kp_update_kdp_firmware_from_files(scpu, ncpu)`)、保守 + 向前相容 + - **既有功能無 regress**:scan/connect/disconnect/reset/load_model/inference 一行沒改 + - **待 M9-2/3/5 解決**:Go driver stderr 解析 / Service mutex / HasActiveTask / disconnect_during_op 實機 / 三平台 ctypes 實機驗證 +- [x] **M9-1 Reviewer 第 1 輪審查完成**(2026-05-25)→ `.autoflow/05-implementation/review/m9-1-bridge-firmware-upgrade-review.md` + - 結論:**0 Critical / 3 Major / 4 Minor / 4 Suggestion** + - 建議**阻擋 M9-2 直到 M1+M2 修完**(0.2 人天) + - Major M1:`_FwError` / `_FwTimeoutError` / `_fw_handle_failure` class 宣告在 handler 之後(讀者邏輯流動問題、5 分鐘可修) + - Major M2:`needs_loader` 控制流隱式、需抽 `should_run_loader_stage` bool(M9-2 Go driver 易誤判 stage 完成度) + - Major M3:`_fw_classify_legacy` substring match 對 firmware 字串覆蓋不足 + - 4 Minor + 4 Suggestion 可留 M9-1 修改一起處理或 M9-5 follow-up + - TDD §6.1 規格對齊度 98% + - 不升級給 Security Auditor(5 軸 security 重點 4 過、1 Minor) +- [x] **M9-1 Backend 第 2 輪修改完成**(2026-05-25) + - `kneron_bridge.py`:1973 → 2058 行(+85) + - `test_kneron_bridge_firmware.py`:622 → 840 行(+218、27 → 36 tests、0.076s、0 regression) + - 3 Major M1+M2+M3 全修 + - 4 Minor m1+m2+m3+m4 全修 + - s3 firmware 字串覆蓋擴展(legacy_exact set + KDP3+ forward-compat) + - s4 補 4 個 test case + - **留 follow-up**:s1 handler ~330 行抽 phase helper / s2 rescan exponential backoff +- [x] **M9-1 Reviewer 第 2 輪審查完成**(2026-05-25)→ `.autoflow/05-implementation/review/m9-1-bridge-firmware-upgrade-review-round2.md` + - 結論:**通過 with 1 Minor + 1 Suggestion、解除 M9-2 阻擋** + - 第 1 輪 8 項 issue 修了 8 項(M1/M2/M3/m1/m2/m3/m4-prod/s4 全到位) + - **第 2 輪新發現**:0 Critical / 0 Major / 1 Minor m5(test 檔 `_firmware_upgrade_start_ts` 死碼)/ 1 Suggestion s5(test 註解) + - **不需 backend 第 3 輪**、m5+s5 可在 M9-2 期間順手清 + - TDD §6.1 對齊度維持 98%、M3 forward-compat 對未來 KDP3+ device brick 風險顯著改善 + - 既有 6 handler 零改動驗證通過 +- [x] **M9-1 整體完成**(2026-05-25)→ 通過 with Suggestions、可進 M9-2 +- [ ] M9-2 Go driver UpgradeFirmware + firmware/service.go +- [ ] M9-3 API handler + WebSocket progress +- [ ] M9-4 Frontend FW badge + 升級 modal +- [ ] M9-5 三平台實機驗證 - [ ] M9-6 ~ M9-13(B 階段擴展) --- diff --git a/local-tool/server/scripts/firmware/KL520/VERSION b/local-tool/server/scripts/firmware/KL520/VERSION new file mode 100644 index 0000000..a4b6ac3 --- /dev/null +++ b/local-tool/server/scripts/firmware/KL520/VERSION @@ -0,0 +1 @@ +v2.2.0 diff --git a/local-tool/server/scripts/firmware/KL520/fw_loader.bin b/local-tool/server/scripts/firmware/KL520/fw_loader.bin new file mode 100644 index 0000000000000000000000000000000000000000..51ca844c879cd374c6b4db401d4952f4c604ae24 GIT binary patch literal 90112 zcmeI2dwf*Yz3A84d(R`2NwOyi5JC)lh9ElubO0Z*qVAbVb|w#;fO-lAbwUD8Kp;HS zpdOvzD{5^g_-MfPHhOG})}E7anig%N0c;CW+dVv5gz$0O+n&gbz3w5w*?G?Wtvvy$ zpPu`N`|wUf;FWZ$0*Oy9EB+9G?FhxIu8|!NtM-9US`F-xm;2=iBpVfIxEt7NFwqq(!K{ao4TaaMKc_+|ud zMe)e-ma60Xu?nI2)x~2FLar|d`QAL)CUf#kIhE8>g3R~1doyJ$BSCQCk_^>3%3S<` z*K~pfSv`w`r&uwq`1J&45(DnOh3B<*ZB4 z-ix%iNBFoEHF!|TGcm`CCu4)#3w@G;_OAU;ycd@RVwM%t2GFdAID-Qk@-h(q=sfbG zBG1LJW3fCR@_T!P?S?jdm(+1&Fs*C6&qD>PJwn0j7txHWIE$s&sg_^N+~jeno2-G@ z4d%*N{{tldD^fVB;f))inae#SZzJCT((Zo;bz+x<&>SVsFpY6_JL+`Gftb4EKpdfl zIET*cKYPwsMfr1eP49p$bA0y##g71a7|i80PL#R=~T}3Xr?=;E?UE1r={x z7`hI>)HTLHI1hVOkym-u4&cLZYGV*Gty%*?)#uyf2y==(tY=Sb7_ zG-o7W;0$AO*;E^Ap^cUKGPKbKZS>T~OpKu18N0{I_ z8fNa`3*%-~7q>3w<2K|0K2Y8=r0qBWHC;jaLb{MzLi%H`moe(oQYBqj;@j(e{lXG2 zGqA#ow511-Hva%Z?q05VYXWiHS;)Ua@;1dW&M)2$F{ZOcgpF^7w=vGKd%V{7BVKo( zxz4i8tg0bN!|Y3+Sx{#cf8E>OFHfYB`zC~&c|HhEDC5ow-$0{G0tqcu9nztMzzU=6 z&`e)MYN<-U1Ai*9lkeRN9!4> z$df>rP%QM}sGwoQ6T7pF*c?Ys&uH5PM(rLAUl0~Yz=rKladM6<#eiBY6BFxW_2nrJ z`HmPyGnFI8Oiv~MJ!O)!jhXI+G@U8Z7g*p_LB46KXH7mirarj9ccahN*=YK!tqW`c za!tTkue#IbW=@eYaDEV*_?gX~bLXv<2qfvF*9TjAzg@uVc=a^R!s00SZZ30}iJZ@> z@Z5;Otu7vk&bOFOqB$#yvramn zX!eKQIOG{_!8mBDW+4XbI6?x?TmOz!sr6s6;1Kv5d-G+hY_8n^mIrAAIwv=Sl}6g; zaY4Qn@|v-P=NZ*{1lEazsBARaQU$pPSj7i(XQQ$)2qU!ceCpm^-Nnl(yzU1p)zmQt&^-arnbYgL~@}qbrl8Ww)AsdV(^X-98^zx2wU_0&9hcL$SRM0A^ z`8O=$=oaV^rqi*v|2$gMf1dH0iFN$pu>?Y&401AZN;TC5-Z)nevzpIURf|PgEZ;0k zay9hP>l0!5S($`+W3AddVTOKg;?aUw6l#diokw%th1qF7lS+Pj!YkK8n)V5kno1_8 zLPHZA^f_{gwW;I-lkDk$mU3g+0`qQetpF=dDtUA~l?+Vsuvm8s-+#@Sch ziwo~+Duy}CT!=Omv3oyAIN1CO2do0Df?ZZ$R@A0ieVhXJKyzd|BHIT4EuLxGRGg)* zuH?=E6~>gnCQfC~5dV+BkIFt*fW#i403=Z5&OzuylrJHq-3C@3{xly#>TS>mftR~q zg_fITG&kojbH#_EsAQh3*I=jVC8Tn`={mkmxEYVK8t+X;-K}M!L)oHT%eq^;5^g4= zZ2D=_-U^N3nD{P+je8ls;=v+Qb(`i=uT6FXgyfuZoNjF9)(xv11rUcl#Br+KA=>JUn;UV ztW=4G(I0$Tpf-xvD^21~Wu2HCefiTCal2w2xKB(d&EghiGpV6yj`phC#RbX(kh)de zp?pi8u_scsspx@EKNw=_88Jg)LGNQ7@f^skU`B$=xbIUEj?FlD9?nlWW2qgKetLj5wnRv(i(8|uK7;iz3p*&!FJ%Oaj? z5zPvE^$CqjX4x zeY|+3$SK!#v61Y3Oe9Oap=)ID4ml{Jy{Y8SMxT_~&RuJmPN|rQFfEWCX;G%r{HC=K zB?Md0!kj7@4qt~ZbAM9@LPLH+6i>fNWJgc(yQ(x)*5cD|JEnFIpXpg@bMNsL*0jtV%GJC!yqt@yo zrX!sL*f0E60^1S;>pn#&L&V6vXz`NkuD{{N@|zmgHa4wWPb|Y%aUQR)qEaStNLvXd zk-3D#$A>@v+01T{tYhC@Ye6T?^I(K9F3n;e|BRXQsmyA~e{alcXOx@^Eeo>Ldt=-h z=2Xsw&;kqiKX{bH2Nz)VKQ6RDf2GRjzz*Rfy}o~)Xoopo33EvnuY$RGOY|0Xy;z{! zCq5P}SCP|kEk5fIklI=V_8Xhu%-)wuo*O@RJ_l#-WjkP>zuN+B<2sh9FbX8}y^qdg zKCSg3(PG%$*{?eGAlEtF-i2P|c01nLS%l))_Dkpw?qq)=NQOz@6Hb*(KNvA!4+J`TXC56s;3BYIQN{)?U4$lA{-$tVIqQsJBd5%EAiQn zbS_M+MgTFLS&4w9JAndomH9$;gy{@uj1`IYGm@8yEQA;fnP;7;WYvg!5ficK_$t_4 z;(cHNE(|c8Nc?78zSZuhe1P^YwJ#vZKB*h9u{q>Ae# zE0|QoMKH`F9O0tZTprVxM(w!)lRD$FCEHYVsWrm`m*&Gy|7#%()%oBAaIlB-z^XsD zSe_xbbKFUjdXp>&-C(twVBcem-?p!GBJvRypK{MbsxS|wuSB}$_(-? zEmiK`7rFz(!gVgWz!`!a(5qvC;XlkRgOGV2*xxbcRoLygtWH=1qt;GlHx^OU+JOb@ zt8fTW5RCClDStb5Db>NTtZsK7(`g0mFQQDx-(k(>d(Cy@TKOcSj%(LXHWl}S6-5j5 zC^<)WJ&jR$4;F0-BU)koF)Ix0m03UDz}7f?crDpE!)!hjgxO2lnJr^jG1F@m1G+GO z+qk)b;c!qY0c!z1)FPCyDkgC)d`Wf>oYG<5@d1w(-YRL~mUlx~+U2wP-VFwZM_VS7 zD@U_?N)4R@_lr`ycPyIS1N%Wp!_^$uyO98KqfsOrg_gJvAyj_I-3zCAC<8i3+o?Ad zmEMg?^{Bfq=jZO;gWZ*Hh2)AosiZQBN;b)9t@W3+Jl*bJ->v75X}v{t=gyn{aHlQ= zFNVVNl0opb92~M-{TT5DkM3U-G-(nxmE|1&l&7>nOUq_;@pgyL0 zIcQ1Egf!8y_o6vg7ubn8g|OdyD`oEsoAD`3dLrE$y_pDhiApMYI%QsVmtKO{pl!Yx z`ZJaMV+z82mS7FK`$%3zy}T0%spP5Dg!ZOBq1BFjw=0#rC-ovcSEo|RzR8#Cz&=<5 z!zGKIp^$t~N8y8D9}enPaQ5&=5VkrEi=owyKJQ0++PtRdbJe)B-HW3(uxOcrGsL*i zTx;;DI@{L3aMuks*qaHVt_#OKrc(BV1kxF_s8W%1;gm-RLb?+e(n&HB{mM?L$Bfit zT^~R>sbp?yYc+B$UG!R_98GAJ5iCq-_lzK|X!Mb~Pjs!PM}|@LP4vp`GC4Ca0@N-t zp|K;*(5lLRjdq2)E(#6in@kFkdv02wv~<0uJ_IHF*QDg)5(PNypG>|tIk)-B)PL2T zvIuMNWU^;+XIHV>wyX_Od^8ytmKHA}Z?r#-1cntCl)NaQ3H3<(-AIA@80h?J^5l6o z_gU~AniQJ5y}pkl;P0MX@84UEX5%1&+NX5Sg?9I3@^AVp@b2|VBzTM45KgGhgL;@a zmsHfh-zpBD2xugZ55MS#laq$F;$XxtxN*oitU7VVilo~)p>5M2??NyqHcz5XyNEx=PBOoc2`SY@ zq_t}&(f_!o=RGg__*QuZoH7Jx_4h|mC)`9u5`(0zm6H{NPjrnA+HSe!7NNGHez8!u zrJ-rvHUYx06G5wM7t3{b3lBof;%nSCbRCR6xz2Nkf1&rz{>PpC)cbAg8n=MXPGRkq zEjzAA_rUhG3rUo)X-m^O5|7*hpZC#yKb=a~yt#49Hb}ffXxy@CQ^V#a!Rd5fk$(G* zhHX0pP*~UeVDsktggboocM0ns5_W{PZF%6nkO27$*IaWAtkHYG`WOCb(PY zwk|N#)(P92A6_TSZ4%b*+_7%^741o@ldPt62(^XxH*bQ7CP7>(l)G!arI6rHKmC{S zsrncUE;k+;OY-C3yv<1%0h$Ld7bL9TvMpV|g+>vem~Ldcz{V{PY~BINSJWr{B3&!O zh`hSg=bNG18ou5yT5qO%+A=}2U{sP{lcTbs=e?OHp7)wQzNgSvP!&3S9^HpjUnnog z^m8h66(2(4=U~B*$VA1EwFe1WpSi~3=Yy7-+(OtFGH?Pd!Gak4^PmWK2PNQso-6R( z#R5OO7teu{kels*T@e$K$F&GpwkOADgUxmK7W$vC$N@-ZKe3Qdl6D^T;#Y*D&`Ul zr=1d9%`yuX<>m1VSaS&x;ZQw{`xaQA?DBN5^W)l$P?Oun=ff@XBDmWa*X~I5ebyqS zlGpzlA#z6r!;v3`OPh)^p1-q#e9IE}fy{K+T~FQ%`~r;7_u>Bh*AQL+?S9gM(4C-< z=CC^s1%XQ*D&>`$kgdlC>zzN?9^}+Q`Na^LKNgJy{UM}=f$q3*U*hh~u0`p<`Ti{NZ|8ybdYx1`f@Mp|hqtu&Q(WbDea`Kh#9drsdEH-G4K z6RF?e7^yXe6htz}Y}B4H!sI3{-HMY_Ih7wmy%~Nq$Jz7YPETm$uup@c6 z1!?b(70Afw&wtQuUOVI=yX$7B#$ZoFTKCO=s@pb|Z<~>C+f=?U{!?8K*pBs7!sc0GiGY5m$UQEur|Lxj;q*j?nbnLt<`sfO_(5^~HnwN07U5+c-^_4CLUD6L2*kX*Q*p7|UVf<`{`TMMK0Q?j>2e+Zc!lo&k?lq@s?-=b>kImA#?>>L;nY( z2HTBaXZux~Nxxg2Q|;J#sU2-s)a@U2tEY6)W!;Te=<>aLYBs@5)8c{Y;wB|$;Fa3x zq7QbiFsE<*rf;?X-o|CkD>iNQrvKW%vmSKw2W}NN!)?|Qu|ZiXHY&G>Wzl-*?SkM2 zwF&O1)`_lY1j4V>zop(+ZWd=5`E7sbk;k8SW^?&0G0zA;^Za*T{NAfwZ|r-!`{xIK z-T%Rn-zz6i52%0r?DGVw4>b%SR3GH~B8D#lKE?1U;Nu~0IGr>=+(zP48u>m_z6X`5 zMD9XY8(Z z$||^@lvU8g_i^egDauKLA>%&))3ki1l8$nUtXHsmt5%aC?=u_2!m z3JqB;OgE%e$Tj5Ce6}Ggd7B|Q-fTz@%pXz?LXn1F4JzQfyCt9!S=3T%$g{yJL+%aw z47n$(CJ1I&TEd5)5fei-541D53scvBVbYB%($|F$S_DA zEN}UtC#4oHzMX$>+vztS+y?iF`!X_3QjMK)mQqB_A7KRDE5IxG4SRq48_{ zi{MQ?yxBTFzxvvm`l&Y?#^+TpG(sZe&^TUFT~^Zs??`CPxKJ$`p`{SIYy7I}xiziu zj)ZO>pXr|iIUa!={o~WCoi#_Ma`cSnRTmo}lE*j3i$JRgYI^_!L7>W`Io`Lr+5AVaJ}FT zLU?8#{DuZ(B}g~yfqFrH3N8Wfk;93QvwsM0W6Dr;ISanE4BTsahS75HuSomD%h9lu zyZ>VNwoGOJMRc19p5bdPpe1ePRM6cDC)?rhR!FhfNC78d@CyulAMt2B#(un(oL0*Z z@ioxTj=YT+V&}k3X{OJoso2rT!VN!i+G=>&(IU0u9?6R0{WucFapVYmnQB2X4^qM{ zRoMACj4Jn`4`82zovGxj(ED7qsU{=TGj<{2_eoZ%cmjR}Ai%yy^5!6OE$T(CJtlI7 z-amn~stGugN#PA-z2*=y!L7(yq|IiqvqO?H!U!c)p zubt7Qt)J*`bTlt(C3om%xVPn&o;st~!B@I7`cg-`@1P}LIjnOHCv?H#gj4@vkgpXV z*2^6ybgRPy>(^nBhxIzoQJo8h%~zc|q2K8|4DJ)X?jREE3Bi6u=0;J%;muI&@i49q z;kXBz;%zv4+Y#L^!3~!9zs?E*@{--P=iw%l?9kPw$BP@=fxw8f;rdra2IBOvp% z;+(4$@^y)^1%{8@Y=aig^cE`Cc)xu>x8~VwGovUjS?<#3@vp%xUxybRx5>{IRzlbm ze+CCaGb#c>F3vcA588ymxfA5e8>BJV1VZl?)&#S+9R)&Pz?k9tKZh|BhVc)57eI5{ z0_&(<_s2{obPZO_GFAgIG(8YIXx*2vTFs+Y-8^dlQjW1~Al7F0$2bmtyJ4}% zGtJq`Z}l}>{{*fIZYD1I=S}&u)Zgll!rMou-lqNOw~ubE(P6G87{tKuRQBi`-{JlD zfwNO%=orXjdY9*zzHj+4{q054g*x!_j)0g*yB!1^*|D#j! zk3#&TQ}K^F$PLt`GJ$&)mcy;QmdcqMb=Dkh=R=B~D+Sxzp>HvK59|m<nHV$`m0+Hgvwgo6{Qe!l`giT5_iRK_aBfQDogGapE1Rsg!*N) zoQ9M)$=?7?CYhm^w>A|KJ#lB3_wNJMGI@)%V>%Z83ZsII30CHNv;2#Ca4FnomU0SP z3SSFU)C_ll%_!c6Q0U`!ru8|fBmCN@2WdZpw0s{6E}h;~E%Zo&M}IQMA0*b{px%y% zwIiGt`T5>@{oAvNb;Ln(Qxk@7+;B?+7BVe`DG656h__Bj^OS`BZIIl^{XR9@&pG?` z4|uTGy4%qYGrVm^KiH8inv?0*mpU!3e!ax0=*a2Sc!)vZw}Kyo-#Qg981eo3lYGBk z0eRFxZy^{V9L!kcz$E89i{D(0LjC&hd2_WDT7z(UjV0~5+aws@$a4ts@326Rhn%4(};n@Vl4U&R`0%g7K>L;K^U%xS2TgAw$;we8XUxi+qB1T+|p zVY70F2Ma=fX~(T#rQkPVur5KqdY=H_t8^f->%B|9qwjUKvU6buAVwh;wott%cwFZ& zFhGzyX6B}nizai_g3u~5HxL8gaTw>SAdG87elLK5@9!p=xgz9Fc#!c6^U71_uZ2FV ztLxWQUI^7L6N9z>U{D0@?~IdY=lG9jtY7=%wb;oBIQa`5?$U!eIiMqfNqQ#T@9zs? zaUx32s5lwbOGAO-_F%iO8SHZ?h+ynBL8{;A)8Y9Zq3=SCR)q&$z&LS^Do3(5Bf zEW9-Mo6Ph$CVL4W#$&|7*joZAZr7K>vsS+qo_?q^(w2{q@eFm->fSNIK6Yk9L6CWe ztNB(#DwzW}w8mJ2JnjN7v2oAJv#XJogJ4y=4lHPp4^5N1zgq584!^qh&>8Qr>-(t8;XQ50O|8Td?O7g)FDjipLhDw9w4}WOd zpFodWqHiWR#!~y-%Dr&6o-w;LNUR`2#NLv1j*O3rB`ycdW-+SgV@4sTORSO=_#lMS zUkCVpcP}eDG!{l=e}Y4}0%lkRDxIm2JyYfg5=RZ6eFSdeKeo#76&>PDBN>Gz^`wrL zbE0r~@C2HT#J?(E4!X&xggHlO<&2nWkqfLN=nw0JA<{mA{VM~PRxCpJ)?WB#Bcl%7 z!E{I=CPL<8&SW#(Ca02ro4B=4gM6-nd$AlNx6gEYbr8O5*lUq$huh>#fLiM(;ae$orIOoINSmjdx{=`Ss}0$VU+A2BpYQeTWOoL%@HRNLcrtzX5$4eh zvnS?tcpu?Hp4cJ0>k&_E7d`|}FGK2P57vU+YY%=1XIKbJo()GW;|g|qU{6fm`#1EH zu;7O<-wStg@cT8P!l5GVKKSZ$uO0#JJnS)$wo1<}WL4%EeEo0`;v)m7EFLA}FQ*!d z*|1;Z;hS8p!ikn>`(|M{T;IOFeKVYsK(V^tH|9|D*4OQ z-8vUX8U}XIdzbgl`8tVDC4cgMdf!ai5pM1ul3=AHerZ$xkVObXi0>t}7lIAJ*Mt9a zs>RYKWDaMgHQSm=>y0u&GrZN>1Lde>r0~%1s!=|$hjO6L%-}e-n*2t^zm}=SLOWSu zr{?T5i-}>bVX(#2W|@=0`IyvIFbgeQTYd&3I_CLYIF;l}ix%hM_lq&Z z;(3MnxvcvrgZW~P&EUdig|Bmkw{!W~Y;Er5+;zFz3b$Bs{=0>)Tp_nMw=VbkcNeeO zW}hP+EUf1WgIr+?R~Y7SSz!ue{web7G(`U>0ZM=ppaduZN`Mle1SkPYfD)htC;>`< z5}*Vq0ZM=ppaduZN`Mle1SkPYfD)htC;>`<5}*Vq0ZM=ppaduZN`Mle1SkPYfD)ht zC;>`<5}*Vq0ZM=ppaduZN`Mle1SkPYfD)htC;>`<5}*Vq0ZM=ppaduZN`Mle1SkPY zfD)htC;>`<5}*Vq0ZM=ppaduZN`Mle1SkPYfD)htC;>`<5}*Vq0ZM=ppaduZN`Mle z1SkPYfD)htC;>`<5}*Vq0ZM=ppaduZN`Mle1SkPYfD)htC;>`<5}*Vq0ZM=ppaduZ zN`Mle1SkPYfD)htC;>`<5}*Vq0ZM=ppaduZN`Mle1SkPYfD)htC;>`<5}*Vq0ZM=p zpaduZN`Mle1SkPYfD)htC;>`<5}*Vq0ZM=ppaduZN`Mle1SkPYfD)htC;>`<5}*Vq z0ZM=ppaduZN`Mle1SkPYfD)htC;>`<5}*Vq0ZM=ppaduZN`Mle1SkPYfD)htC;>`< z5}*Vq0ZM=ppaduZN`Mle1SkPYfD)htC;>`<5}*Vq0ZM=ppaduZN`Mle1SkPYfD)ht zC;>`<5}*Vq0ZM=ppaduZN`Mle1SkPYfD)htC;>`<5}*Vq0ZM=ppaduZN`Mle1SkPY rfD)htC;>`<5}*Vq0ZM=ppaduZN`Mle1SkPYfD-ushQNV$k}v&lQZ9}4 literal 0 HcmV?d00001 diff --git a/local-tool/server/scripts/firmware/KL720/VERSION b/local-tool/server/scripts/firmware/KL720/VERSION new file mode 100644 index 0000000..a4b6ac3 --- /dev/null +++ b/local-tool/server/scripts/firmware/KL720/VERSION @@ -0,0 +1 @@ +v2.2.0 diff --git a/local-tool/server/scripts/kneron_bridge.py b/local-tool/server/scripts/kneron_bridge.py index f567461..a879b6e 100644 --- a/local-tool/server/scripts/kneron_bridge.py +++ b/local-tool/server/scripts/kneron_bridge.py @@ -155,7 +155,12 @@ def _log(msg): def _resolve_firmware_paths(chip="KL520"): - """Resolve firmware paths relative to this script's directory.""" + """Resolve firmware paths relative to this script's directory. + + Returns (scpu_path, ncpu_path) tuple for backward compat with existing + handle_connect() callers. Use _resolve_firmware_paths_full(chip) to get + loader path additionally (only KL520 has fw_loader.bin in A 階段). + """ base = os.path.dirname(os.path.abspath(__file__)) fw_dir = os.path.join(base, "firmware", chip) scpu = os.path.join(fw_dir, "fw_scpu.bin") @@ -172,6 +177,69 @@ def _resolve_firmware_paths(chip="KL520"): return None, None +_FW_ALLOWED_CHIPS = ("KL520", "KL720") # A 階段範圍、Reviewer m1 雙重防護用 + + +def _resolve_firmware_paths_full(chip="KL520"): + """Resolve scpu / ncpu / loader paths. + + A 階段:只有 KL520 有 fw_loader.bin(用於 KDP1 legacy → KDP2 升級的 SDK + loader stage)。KL720 不需要 loader(不走 SDK loader path、直接 ctypes + 呼叫 kp_update_kdp_firmware_from_files 也不需要 loader 檔)。 + + Reviewer m1:對 chip 參數做雙重 allow-list 防護。chip 來自 JSON-RPC stdin、 + 雖然 caller (handle_firmware_upgrade) 已 enforce allow-list、但這裡再過一道 + 避免未來 caller 拓寬時破防。額外拒絕含 path separator / 父目錄 / 絕對路徑 + 的非法輸入、確保 os.path.join 絕不 traverse。 + + Returns: + dict: {"scpu": , "ncpu": , "loader": , + "version": } + 若 scpu/ncpu 任一缺檔、scpu/ncpu 為 None。 + """ + # 雙重 allow-list 防護(caller 已過一次、這裡再過一次防 path traversal) + if not isinstance(chip, str) or chip not in _FW_ALLOWED_CHIPS: + return {"scpu": None, "ncpu": None, "loader": None, "version": None} + # 額外字元防護(即使 _FW_ALLOWED_CHIPS 拓寬到不安全字串也擋) + if "/" in chip or "\\" in chip or ".." in chip or os.path.isabs(chip): + return {"scpu": None, "ncpu": None, "loader": None, "version": None} + + base = os.path.dirname(os.path.abspath(__file__)) + fw_dir = os.path.join(base, "firmware", chip) + scpu = os.path.join(fw_dir, "fw_scpu.bin") + ncpu = os.path.join(fw_dir, "fw_ncpu.bin") + loader = os.path.join(fw_dir, "fw_loader.bin") + version_file = os.path.join(fw_dir, "VERSION") + + result = {"scpu": None, "ncpu": None, "loader": None, "version": None} + if os.path.exists(scpu) and os.path.exists(ncpu): + result["scpu"] = scpu + result["ncpu"] = ncpu + if os.path.exists(loader): + result["loader"] = loader + if os.path.exists(version_file): + try: + with open(version_file, "r", encoding="utf-8") as f: + result["version"] = f.read().strip() + except Exception: + pass + + # Fallback: KNERON_FW_DIR env var + if result["scpu"] is None or result["ncpu"] is None: + env_dir = os.environ.get("KNERON_FW_DIR", "") + if env_dir: + scpu2 = os.path.join(env_dir, "fw_scpu.bin") + ncpu2 = os.path.join(env_dir, "fw_ncpu.bin") + if os.path.exists(scpu2) and os.path.exists(ncpu2): + result["scpu"] = scpu2 + result["ncpu"] = ncpu2 + loader2 = os.path.join(env_dir, "fw_loader.bin") + if os.path.exists(loader2): + result["loader"] = loader2 + + return result + + def _detect_model_type(model_id, nef_path): """Detect model type and input size from model ID or .nef filename.""" global _model_type, _model_input_size @@ -1134,6 +1202,787 @@ def handle_inference(params): return {"error": str(e)} +# ── Firmware upgrade (A 階段 M9-1) ─────────────────────────────────── +# +# 對應 TDD v2/firmware-management.md §5.1 / §6.1: +# - 自動升級 KDP1 legacy → KDP2,含 KL520(USB Boot mode + loader stage) +# 與 KL720(含 KDP legacy pid=0x0200)。 +# - Stage 命名採 Design:preparing / loading / flashing / verifying / done / error +# (TDD §4.3 為 source of truth)。 +# - 失敗 reason enum(TDD §3.4):scan_not_found / connect_failed / +# loader_write_failed / upgrade_mid_failed / disconnect_during_op / +# timeout / verify_mismatch / verify_not_found。 +# +# 為什麼走 ctypes:KneronPLUS Python wrapper 沒 export +# `kp_update_kdp_firmware_from_files`(見 research-kl520-fw-management/ +# 56-m9-6-strong-validation-result.md 附帶發現 1),warrenchen reference +# 實作 `LocalAPI/legacy_plus121_runner.py` 直接 ctypes 打 C symbol,本檔 +# 沿用該模式。 + +KDP_MAGIC_CONNECTION_PASS = 536173391 # 與 warrenchen reference 一致 +KP_SUCCESS = 0 +USB_WAIT_AFTER_REBOOT_MS = 2000 # SDK loader 階段 reboot 等待 +USB_WAIT_AFTER_UPGRADE_MS = 5000 # AC-FW-1.6:升級後 5-8s USB stable +USB_WAIT_RETRY_CONNECT_MS = 200 +MAX_RECONNECT_RETRIES = 15 # 5s sleep + 15 * 200ms = 8s 上界 + +KL520_UPGRADE_TIMEOUT_S = 60 # AC-FW-1.7 +KL720_UPGRADE_TIMEOUT_S = 200 # AC-FW-1.7 + +# 進度事件 stage % 對照(TDD §4.3) +_FW_STAGE_PERCENT = { + "preparing": 5, + "loading": 20, + "flashing": 50, + "verifying": 90, + "done": 100, + "error": -1, +} + +# 升級進行中旗標(SIGTERM handler 用、AC-FW-1.9 graceful shutdown 拒絕) +# Reviewer m4:原本還有 _firmware_upgrade_start_ts 全域變數、與 SIGTERM handler +# closure capture 的 start_ts 重複、容易未來 desync → 砍掉、單一 source of truth +# 走 closure。 +_firmware_upgrade_in_progress = False + + +def _fw_normalize_code(code): + """Convert int8-like unsigned (e.g. 253 for -3) to signed. + + 與 warrenchen reference 一致:某些 legacy 路徑回 unsigned int8 值。 + """ + try: + c = int(code) + except Exception: + return code + if c > 127: + return c - 256 + return c + + +def _fw_emit_progress(stage, message="", elapsed_ms=0, eta_ms=0, extra=None): + """Push a progress event to stderr as a JSON-RPC notification line. + + Go driver 抓 stderr line-by-line、轉成 WebSocket FirmwareProgress 給前端。 + Schema 對齊 TDD §4.2 `FirmwareProgress`: + {"event": "firmware_progress", "percent": int, "stage": str, + "message": str, "elapsed_ms": int, "eta_ms": int, ...} + + Stage `error` 時 caller 應 push 額外 reason / raw_error / before_version + 透過 extra dict。 + """ + payload = { + "event": "firmware_progress", + "percent": _FW_STAGE_PERCENT.get(stage, 0), + "stage": stage, + "message": message, + "elapsed_ms": int(elapsed_ms), + "eta_ms": int(eta_ms), + } + if extra: + payload.update(extra) + try: + # 寫到 stderr、與既有 _log() 同 fd、但用 JSON 格式(不加 [kneron_bridge] prefix) + # 方便 Go driver 區分「progress event JSON」vs「自由文字 log」。 + print(json.dumps(payload), file=sys.stderr, flush=True) + except Exception: + # progress emit 失敗不該影響升級流程本身 + pass + + +def _fw_load_libkplus(): + """Load libkplus shared library via ctypes、bind needed C symbol signatures. + + 跨平台:macOS .dylib / Linux .so / Windows .dll。優先用 `kp` module 已載 + 入的 lib path(避免重複載入造成 mismatch),fallback 到 wheel 內 lib/ 目錄。 + + Raises: + RuntimeError: 若 libkplus 找不到或符號 binding 失敗。 + """ + import ctypes + import importlib.util + + spec = importlib.util.find_spec("kp") + if spec is None or not spec.submodule_search_locations: + raise RuntimeError("kp module spec not found") + kp_dir = spec.submodule_search_locations[0] + lib_dir = os.path.join(kp_dir, "lib") + + # 平台對應的 lib filename + if sys.platform == "darwin": + lib_name = "libkplus.dylib" + elif sys.platform == "win32": + lib_name = "libkplus.dll" + else: + lib_name = "libkplus.so" + + lib_path = os.path.join(lib_dir, lib_name) + if not os.path.isfile(lib_path): + # Windows 可能用其他命名(warrenchen reference 是 libkplus.dll) + # 嘗試找任何 libkplus* 檔案 + # Reviewer m2:sort() 確保 deterministic 順序、不依賴 os.listdir 回傳次序 + candidates = sorted( + f for f in os.listdir(lib_dir) if f.startswith("libkplus") + ) + if not candidates: + raise RuntimeError(f"libkplus not found in {lib_dir}") + lib_path = os.path.join(lib_dir, candidates[0]) + _log(f"WARNING: libkplus fallback using {candidates[0]} (primary {lib_name} not found)") + + # Windows: add_dll_directory 確保相依 dll 可解析 + if sys.platform == "win32" and hasattr(os, "add_dll_directory"): + try: + os.add_dll_directory(lib_dir) + except Exception: + pass + + lib = ctypes.CDLL(lib_path) + + # Bind C symbol signatures(與 warrenchen reference 完全一致) + lib.kp_connect_devices.argtypes = [ + ctypes.c_int, # num_devices + ctypes.POINTER(ctypes.c_int), # usb_port_ids + ctypes.POINTER(ctypes.c_int), # status_out + ] + lib.kp_connect_devices.restype = ctypes.c_void_p # device_group handle + + lib.kp_set_timeout.argtypes = [ctypes.c_void_p, ctypes.c_int] + lib.kp_set_timeout.restype = None + + lib.kp_load_firmware_from_file.argtypes = [ + ctypes.c_void_p, ctypes.c_char_p, ctypes.c_char_p + ] + lib.kp_load_firmware_from_file.restype = ctypes.c_int + + lib.kp_update_kdp_firmware_from_files.argtypes = [ + ctypes.c_void_p, # device_group + ctypes.c_char_p, # scpu_or_loader path + ctypes.c_char_p, # ncpu path or NULL + ctypes.c_bool, # auto_reboot + ] + lib.kp_update_kdp_firmware_from_files.restype = ctypes.c_int + + lib.kp_disconnect_devices.argtypes = [ctypes.c_void_p] + lib.kp_disconnect_devices.restype = ctypes.c_int + + if hasattr(lib, "kp_error_string"): + lib.kp_error_string.argtypes = [ctypes.c_int] + lib.kp_error_string.restype = ctypes.c_char_p + + return lib + + +def _fw_errstr(lib, code): + """Decode kp error code → string via kp_error_string()。 + + 與 warrenchen 一致:先試 raw code、若無回應再試 signed normalize 後值。 + """ + signed = _fw_normalize_code(code) + if hasattr(lib, "kp_error_string"): + try: + msg = lib.kp_error_string(int(code)) + if not msg and signed != code: + msg = lib.kp_error_string(int(signed)) + if msg: + return msg.decode("utf-8", errors="replace") + except Exception: + pass + return f"code={code}" + + +def _fw_connect_with_magic(lib, port_id): + """Connect with magic pass = 536173391 (允許 KDP1 legacy device 連線)。 + + Returns: + device_group handle (c_void_p int). + + Raises: + RuntimeError("connect_failed: ...") on failure. + """ + import ctypes + port_ids = (ctypes.c_int * 1)(int(port_id)) + status = ctypes.c_int(KDP_MAGIC_CONNECTION_PASS) + dg = lib.kp_connect_devices(1, port_ids, ctypes.byref(status)) + if not dg or status.value != KP_SUCCESS: + signed = _fw_normalize_code(status.value) + raise RuntimeError( + f"connect_failed: raw_code={status.value}, signed={signed}, " + f"msg={_fw_errstr(lib, status.value)}" + ) + return dg + + +def _fw_scan_target(port): + """Scan devices via kp.core.scan_devices() and find target by usb_port_id. + + Returns: + descriptor or None. + """ + try: + descs = kp.core.scan_devices() + except Exception as e: + _log(f"fw_scan_target: scan_devices failed: {e}") + return None + if descs.device_descriptor_number == 0: + return None + for i in range(descs.device_descriptor_number): + dev = descs.device_descriptor_list[i] + if port and str(dev.usb_port_id) == str(port): + return dev + return None + + +def _fw_rescan_and_wait(port, max_wait_s=8.0, initial_sleep_s=5.0): + """等 USB re-enumerate stable → rescan 找回 target by port (AC-FW-1.6)。 + + Args: + port: 原 usb_port_id(升級後 re-enumerate 通常保留同 port)。 + max_wait_s: 從 initial_sleep_s 過後再加 max_wait_s - initial_sleep_s + 秒輪詢上界。實測 5 秒已穩、保留上界 8 秒(AC-FW-1.6)。 + initial_sleep_s: 第一次 rescan 前固定等的秒數。 + + Returns: + (descriptor or None, total_wait_s). + """ + time.sleep(initial_sleep_s) + waited = initial_sleep_s + target = _fw_scan_target(port) + if target is not None: + return target, waited + # 多輪 short-poll + poll_step = 0.5 + while waited < max_wait_s: + time.sleep(poll_step) + waited += poll_step + target = _fw_scan_target(port) + if target is not None: + return target, waited + return None, waited + + +def _fw_classify_legacy(firmware_str, product_id): + """判斷 device 是否為 KDP1 legacy state(需走 loader stage)。 + + KL520 legacy 訊號:firmware 字串為 "KDP"、"KDP1"、"KDP1.x"、"USB Boot"、 + "USB Boot Loader"、"LOADER" 等 legacy state、或空字串 + (某些 USB Boot state 不回 firmware string)。 + KL720 legacy 訊號:product_id == 0x0200 (KP_DEVICE_KL720_LEGACY)。 + + Reviewer M3 + s3:原本只用 substring match `"KDP" in fw and "KDP2" not in fw` + 對 KDP3(未來 firmware)會誤判 legacy → 改用顯式 prefix 比對表 + 已知字串 + enumeration、確保覆蓋 KDP1 各種 firmware 字串變體、forward-compat KDP3+。 + + Returns True if needs SDK loader stage、False if can short-circuit to flashing. + """ + if product_id == 0x0200: + return True # KL720 KDP1 legacy(pid 明示、不靠 firmware 字串) + + fw = (firmware_str or "").strip().upper() + + # 已知 KDP1 legacy firmware 字串完整列舉(明示比對、不靠 substring) + legacy_exact = { + "", # 某些 USB Boot state 不回 firmware string + "KDP", + "KDP1", + "USB BOOT", + "USB BOOT LOADER", + "LOADER", + "BOOTLOADER", + } + if fw in legacy_exact: + return True + + # KDP1.x(KDP1.0 / KDP1.5 等版本字串) + if fw.startswith("KDP1.") or fw.startswith("KDP1 "): + return True + + # 明示放行 KDP2 / KDP3+(forward-compat、避免 substring match 對未來 firmware 誤判) + # KDP2.x / KDP3.x / KDP4.x ... 皆為 modern firmware、不需走 loader + for prefix in ("KDP2", "KDP3", "KDP4", "KDP5", "KDP6", "KDP7", "KDP8", "KDP9"): + if fw.startswith(prefix): + return False + + # 未知 firmware 字串:保守 default = 不走 loader(避免誤觸 loader stage brick device) + # 例:未來 firmware 用全新命名("NEF"、"K3"、等)→ 假設是 modern firmware + # 若這判斷錯了、verify 階段會 detect verify_mismatch、不致 brick + return False + + +def _fw_eta_ms(chip, current_stage): + """估算剩餘 ms(給前端顯示 ~X 秒、非精確)。 + + 依 TDD §4.2:UI 顯示「~X 秒 remaining」、精度低可接受。 + """ + # 各 stage 預估完成時刻(以升級開始為 0): + if chip == "KL520": + total_ms = 30000 # AC-FW-1.7 預估 30s + cum = {"preparing": 2000, "loading": 8000, "flashing": 22000, "verifying": 28000} + else: # KL720 + total_ms = 180000 # AC-FW-1.7 預估 180s + cum = {"preparing": 5000, "loading": 30000, "flashing": 160000, "verifying": 175000} + done_at = cum.get(current_stage, total_ms) + return max(0, total_ms - done_at) + + +# ── Firmware upgrade exceptions + failure handler ──────────────────── +# +# Reviewer M1:原本 _FwError / _FwTimeoutError / _fw_handle_failure 宣告位於 +# handle_firmware_upgrade **之後**(語法上 Python module load 時會先掃完整個檔 +# 才走 handler、所以 happy-path 不會炸 NameError、但 readability 差、且若有人 +# 在 handler 中間插入 module-level code 觸發呼叫就會炸)。 +# 移到 handler 之前、讓讀者從上而下能理解 error flow。 + +class _FwError(Exception): + """Internal exception carrying (stage, reason, message) for firmware ops.""" + + def __init__(self, stage, reason, message): + super().__init__(message) + self.stage = stage + self.reason = reason + self.message = message + + +class _FwTimeoutError(Exception): + """Raised when total upgrade duration exceeds chip timeout.""" + + def __init__(self, stage): + super().__init__(f"timeout at stage={stage}") + self.stage = stage + + +def _fw_handle_failure(stage, reason, message, before_fw, start_ts, dg, lib, raw=""): + """彙整失敗 progress event + return 給 caller 的 error dict。 + + 對齊 TDD §6.1 失敗回傳格式: + {"error":, "stage":, "reason":, "raw_error":} + + Reviewer m3:原本此 helper 內 disconnect、caller 的 finally 也 disconnect、 + 雙重 disconnect 對 SDK 行為未定。改成「single owner of disconnect」原則: + 本 helper 不再 disconnect、由 caller 的 finally 統一處理。本函式只負責 emit + progress event + 組裝 error dict。 + """ + elapsed = int((time.monotonic() - start_ts) * 1000) + _log(f"firmware_upgrade FAILED: stage={stage}, reason={reason}, " + f"message={message}, elapsed_ms={elapsed}") + _fw_emit_progress( + "error", + message=message, + elapsed_ms=elapsed, + eta_ms=0, + extra={ + "error": message, + "reason": reason, + "raw_error": raw or message, + "before_version": before_fw, + }, + ) + return { + "error": message, + "stage": stage, + "reason": reason, + "raw_error": raw or message, + } + + +def handle_firmware_upgrade(params): + """A 階段 M9-1:自動升級 KDP1 → KDP2、KL520 與 KL720。 + + 對應 TDD §6.1 表 + §5.1 流程: + Input: {"port": "", "chip": "KL520" | "KL720"} + Output (success): + {"status":"upgraded", "before_firmware":, "after_firmware":, + "method":"ctypes_kp_update_kdp_firmware_from_files", + "duration_ms":} + Output (failure): + {"error":, "stage":, + "reason":, + "raw_error":} + + 每進入一個 stage 透過 _fw_emit_progress() 推 progress event 到 stderr, + Go driver 抓 stderr line-by-line 轉成 WebSocket FirmwareProgress 給前端。 + """ + global _firmware_upgrade_in_progress + + if not HAS_KP: + return {"error": "kp module not available", "stage": "preparing", + "reason": "scan_not_found", "raw_error": "kp not available"} + + chip = params.get("chip", "KL520") + port = str(params.get("port", "")) + + if chip not in ("KL520", "KL720"): + return {"error": f"unsupported chip for A 階段: {chip}", + "stage": "preparing", "reason": "scan_not_found", + "raw_error": f"chip={chip} not in (KL520, KL720)"} + + timeout_s = KL520_UPGRADE_TIMEOUT_S if chip == "KL520" else KL720_UPGRADE_TIMEOUT_S + start_ts = time.monotonic() + + def elapsed_ms(): + return int((time.monotonic() - start_ts) * 1000) + + def check_timeout(current_stage): + if (time.monotonic() - start_ts) > timeout_s: + raise _FwTimeoutError(current_stage) + + # ── AC-FW-1.9 graceful shutdown 拒絕:標記升級進行中 ── + # Reviewer m4:原本還寫 _firmware_upgrade_start_ts 全域、與 SIGTERM handler + # closure 重複、已移除、改由 closure capture start_ts 為 single source。 + _firmware_upgrade_in_progress = True + + # 在升降版進入 critical section 期間註冊 SIGTERM handler + # (收 SIGTERM 不立即退、改 log warning event;實際 server 端 lock + # 由 M9-2 Go driver / M9-3 service 實作、bridge.py 只負責「正在跑時 + # 拒絕被 kill」) + _fw_register_sigterm_handler(start_ts) + + method = "ctypes_kp_update_kdp_firmware_from_files" + before_fw = "" + lib = None + dg = None + + try: + # ── preparing:scan + connect ──────────────────────────────── + _fw_emit_progress( + "preparing", + message=f"scanning {chip} on port {port}", + elapsed_ms=elapsed_ms(), + eta_ms=_fw_eta_ms(chip, "preparing"), + ) + check_timeout("preparing") + + # 先 disconnect 既有 _device_group(若有)、避免 handle 衝突 + _clear_device_group() + + target = _fw_scan_target(port) + if target is None: + raise _FwError( + "preparing", "scan_not_found", + f"device with port_id={port} not found in scan", + ) + + before_fw = str(target.firmware) + target_port_id = int(target.usb_port_id) + target_pid = int(target.product_id) + + _log(f"firmware_upgrade: chip={chip}, port={target_port_id}, " + f"pid=0x{target_pid:04X}, firmware='{before_fw}'") + + # ── 解析 firmware 檔路徑 ───────────────────────────────────── + fw_paths = _resolve_firmware_paths_full(chip) + if fw_paths["scpu"] is None or fw_paths["ncpu"] is None: + raise _FwError( + "preparing", "scan_not_found", + f"firmware files not found for {chip} " + f"(scpu/ncpu missing in server/scripts/firmware/{chip}/)", + ) + + # ── 載入 libkplus + ctypes binding ────────────────────────── + try: + lib = _fw_load_libkplus() + except Exception as e: + raise _FwError( + "preparing", "connect_failed", + f"libkplus load failed: {e}", + ) + + # ── connect with magic(allow KDP1 legacy device)─────────── + try: + dg = _fw_connect_with_magic(lib, target_port_id) + except RuntimeError as e: + raise _FwError("preparing", "connect_failed", str(e)) + + # set timeout for SDK operations(注意:不是整體 upgrade timeout、 + # 是單一 SDK call 的 timeout、避免單個 kp_load/update call 卡住) + lib.kp_set_timeout(dg, int(timeout_s * 1000)) + + # ── 判斷是否走 SDK loader stage ────────────────────────────── + # Reviewer M2:原本控制流隱式(`if needs_loader: if loader_path is None: ...` + # nested)、讀者不易看清「實際會跑 loading stage」的條件。改為三個顯式 bool: + # + # needs_loader = device 處於 KDP1 legacy state(_fw_classify_legacy) + # should_run_loader_stage = 實際會跑 loading stage(loader.bin 存在 + needs_loader) + # loader_required_but_missing = KL520 KDP1 legacy 但缺 loader.bin(必失敗) + # + # 三個情境的流程: + # 1. KL520 KDP1 legacy + loader.bin 存在 → loading → flashing(SDK load) + # → verifying → done (should_run_loader_stage=True) + # 2. KL520 KDP1 legacy + loader.bin 缺 → fail at loading (loader_write_failed) + # 3. KL720 KDP1 legacy + loader.bin 缺 → skip loading、直接 flashing(warrenchen 模式) + # → verifying → done (should_run_loader_stage=False) + # 4. already KDP2(KL520/KL720)→ skip loading、直接 flashing(warrenchen 模式) + # → verifying → done (should_run_loader_stage=False) + needs_loader = _fw_classify_legacy(before_fw, target_pid) + loader_path = fw_paths["loader"] + should_run_loader_stage = needs_loader and loader_path is not None + loader_required_but_missing = ( + needs_loader and loader_path is None and chip == "KL520" + ) + + _log(f"firmware_upgrade: needs_loader={needs_loader}, " + f"should_run_loader_stage={should_run_loader_stage}, " + f"loader_required_but_missing={loader_required_but_missing}, " + f"legacy={'yes' if needs_loader else 'no'}") + + # ── 情境 2:KL520 KDP1 legacy 但缺 loader.bin → 直接失敗 ───── + if loader_required_but_missing: + check_timeout("loading") + raise _FwError( + "loading", "loader_write_failed", + f"fw_loader.bin not found for {chip} but device is in " + f"KDP1 legacy state (firmware='{before_fw}')", + ) + + # ── 情境 1:跑 loading stage(KL520 KDP1 legacy + loader.bin)── + if should_run_loader_stage: + check_timeout("loading") + _fw_emit_progress( + "loading", + message="writing USB Boot loader firmware", + elapsed_ms=elapsed_ms(), + eta_ms=_fw_eta_ms(chip, "loading"), + ) + ret = lib.kp_update_kdp_firmware_from_files( + dg, + loader_path.encode("utf-8"), + None, # loader stage: ncpu = NULL + True, # auto_reboot + ) + if ret != KP_SUCCESS: + raise _FwError( + "loading", "loader_write_failed", + f"kp_update_kdp_firmware_from_files(loader) ret={ret} " + f"({_fw_errstr(lib, ret)})", + ) + # auto_reboot 後 disconnect 可能失敗(USB re-enumerate)容忍 + try: + lib.kp_disconnect_devices(dg) + except Exception: + pass + # disconnect 完設 dg=None、避免 finally double-disconnect 已 freed handle + dg = None + # 等 device reboot 完進 USB Boot mode(Loader firmware loaded) + time.sleep(USB_WAIT_AFTER_REBOOT_MS / 1000.0) + + # rescan + reconnect with magic + target = _fw_scan_target(port) + if target is None: + raise _FwError( + "loading", "disconnect_during_op", + f"device disappeared after loader write, port={port}", + ) + try: + dg = _fw_connect_with_magic(lib, int(target.usb_port_id)) + except RuntimeError as e: + raise _FwError( + "loading", "connect_failed", + f"reconnect after loader failed: {e}", + ) + lib.kp_set_timeout(dg, int(timeout_s * 1000)) + elif needs_loader: + # 情境 3:KL720 KDP1 legacy 沒 loader.bin → 跳過 loading、直接 flashing + # warrenchen 模式:kp_update_kdp_firmware_from_files(scpu, ncpu, True) 一次寫 + _log(f"firmware_upgrade: {chip} legacy without loader.bin、" + f"skipping loading stage, will go directly to flashing") + + # ── flashing:寫入 KDP2 firmware(scpu + ncpu)───────────── + check_timeout("flashing") + _fw_emit_progress( + "flashing", + message="writing KDP2 firmware (scpu + ncpu)", + elapsed_ms=elapsed_ms(), + eta_ms=_fw_eta_ms(chip, "flashing"), + ) + + if should_run_loader_stage: + # 情境 1:device 已透過 loader stage 進 Loader mode、用 + # kp_load_firmware_from_file 載 scpu + ncpu 到 RAM + ret = lib.kp_load_firmware_from_file( + dg, + fw_paths["scpu"].encode("utf-8"), + fw_paths["ncpu"].encode("utf-8"), + ) + if ret != KP_SUCCESS: + raise _FwError( + "flashing", "upgrade_mid_failed", + f"kp_load_firmware_from_file ret={ret} " + f"({_fw_errstr(lib, ret)})", + ) + else: + # 情境 3 / 4:沒走 loader stage(KL720 legacy without loader.bin、 + # 或 already KDP2)→ warrenchen 模式:直接 + # kp_update_kdp_firmware_from_files(scpu, ncpu, True) 一次寫 + ret = lib.kp_update_kdp_firmware_from_files( + dg, + fw_paths["scpu"].encode("utf-8"), + fw_paths["ncpu"].encode("utf-8"), + True, # auto_reboot + ) + if ret != KP_SUCCESS: + raise _FwError( + "flashing", "upgrade_mid_failed", + f"kp_update_kdp_firmware_from_files ret={ret} " + f"({_fw_errstr(lib, ret)})", + ) + + # disconnect after upgrade:auto_reboot 後 disconnect 失敗預期、容忍 + try: + lib.kp_disconnect_devices(dg) + except Exception: + pass + dg = None + + # ── verifying:等 USB re-enumerate → rescan → 驗 firmware 字串 ── + check_timeout("verifying") + _fw_emit_progress( + "verifying", + message="waiting USB re-enumerate and verifying firmware version", + elapsed_ms=elapsed_ms(), + eta_ms=_fw_eta_ms(chip, "verifying"), + ) + + # AC-FW-1.6: 等 5-8 秒 USB stable + target_after, waited = _fw_rescan_and_wait( + port, + max_wait_s=USB_WAIT_AFTER_UPGRADE_MS / 1000.0 + 3.0, # 5 + 3 = 8s 上界 + initial_sleep_s=USB_WAIT_AFTER_UPGRADE_MS / 1000.0, + ) + if target_after is None: + raise _FwError( + "verifying", "verify_not_found", + f"device not found after upgrade (waited {waited:.1f}s)、" + f"USB may still be re-enumerating, please re-plug", + ) + + after_fw = str(target_after.firmware) + after_pid = int(target_after.product_id) + + # 驗證 firmware 字串已升到 KDP2(不再是 KDP1 legacy) + if _fw_classify_legacy(after_fw, after_pid): + raise _FwError( + "verifying", "verify_mismatch", + f"firmware after upgrade still appears legacy: " + f"firmware='{after_fw}', pid=0x{after_pid:04X}", + ) + + # ── done ── + duration_ms = elapsed_ms() + _fw_emit_progress( + "done", + message=f"upgraded from '{before_fw}' to '{after_fw}'", + elapsed_ms=duration_ms, + eta_ms=0, + ) + + return { + "status": "upgraded", + "before_firmware": before_fw, + "after_firmware": after_fw, + "method": method, + "duration_ms": duration_ms, + } + + except _FwTimeoutError as e: + return _fw_handle_failure( + e.stage, "timeout", + f"upgrade exceeded {timeout_s}s timeout at stage={e.stage}", + before_fw, start_ts, dg, lib, raw=str(e), + ) + except _FwError as e: + return _fw_handle_failure( + e.stage, e.reason, e.message, before_fw, start_ts, dg, lib, raw=str(e), + ) + except Exception as e: + import traceback + tb = traceback.format_exc() + _log(f"firmware_upgrade UNEXPECTED EXCEPTION: {type(e).__name__}: {e}\n{tb}") + return _fw_handle_failure( + "flashing", "upgrade_mid_failed", + f"unexpected: {type(e).__name__}: {e}", + before_fw, start_ts, dg, lib, raw=tb, + ) + finally: + _firmware_upgrade_in_progress = False + # Reviewer m3:disconnect 的 single owner = 此 finally block。 + # _fw_handle_failure 已改為「不在裡面 disconnect」、避免 double-disconnect。 + # success path 在 1810 行已 disconnect 並設 dg=None、此處 if dg is not None + # 會 short-circuit 跳過、不會 double。 + # fail path:dg 可能還持有 handle、由本 finally 統一收尾。 + if dg is not None and lib is not None: + try: + lib.kp_disconnect_devices(dg) + except Exception: + pass + dg = None # 確保不會被外部誤用 + _fw_unregister_sigterm_handler() + + +# ── SIGTERM handler (AC-FW-1.9 graceful shutdown rejection) ────────── +# +# 升級進行中收到 SIGTERM 時,不立即退出、改在 stderr push warning event。 +# 實際的 server-side lock 機制由 M9-2 / M9-3 實作(progress.md「未解決問題」 +# 註記為依賴)。本處 bridge.py 端的責任:「正在跑時拒絕被 kill」。 +# +# Windows 沒有 SIGTERM 概念、改用 atexit。Linux/macOS 用 signal handler。 + +_fw_original_sigterm_handler = None + + +def _fw_register_sigterm_handler(start_ts): + """註冊 SIGTERM handler:升級進行中時拒絕並 log warning。""" + global _fw_original_sigterm_handler + if sys.platform == "win32": + return # Windows 沒 SIGTERM + try: + import signal + + def handler(signum, frame): + if _firmware_upgrade_in_progress: + elapsed = int((time.monotonic() - start_ts) * 1000) + try: + print( + json.dumps({ + "event": "shutdown_rejected", + "reason": "firmware_upgrade_in_progress", + "task": "firmware_upgrade", + "elapsed_ms": elapsed, + }), + file=sys.stderr, + flush=True, + ) + except Exception: + pass + # 拒絕 SIGTERM:不呼叫 sys.exit、不 raise、繼續執行升級 + return + # 沒升級進行中、走預設行為 + if callable(_fw_original_sigterm_handler): + _fw_original_sigterm_handler(signum, frame) + else: + sys.exit(0) + + _fw_original_sigterm_handler = signal.signal(signal.SIGTERM, handler) + except Exception as e: + _log(f"SIGTERM handler registration failed: {e}") + + +def _fw_unregister_sigterm_handler(): + """還原 SIGTERM handler 為 install 前狀態。""" + global _fw_original_sigterm_handler + if sys.platform == "win32": + return + try: + import signal + if _fw_original_sigterm_handler is not None: + signal.signal(signal.SIGTERM, _fw_original_sigterm_handler) + _fw_original_sigterm_handler = None + else: + signal.signal(signal.SIGTERM, signal.SIG_DFL) + except Exception: + pass + + # ── Main loop ──────────────────────────────────────────────────────── def main(): @@ -1175,6 +2024,8 @@ def main(): result = handle_load_model(cmd) elif action == "inference": result = handle_inference(cmd) + elif action == "firmware_upgrade": + result = handle_firmware_upgrade(cmd) else: result = {"error": f"unknown command: {action}"} _respond(result) diff --git a/local-tool/server/scripts/test_kneron_bridge_firmware.py b/local-tool/server/scripts/test_kneron_bridge_firmware.py new file mode 100644 index 0000000..4d0f195 --- /dev/null +++ b/local-tool/server/scripts/test_kneron_bridge_firmware.py @@ -0,0 +1,840 @@ +#!/usr/bin/env python3 +"""Unit tests for kneron_bridge.handle_firmware_upgrade (A 階段 M9-1). + +Mock-based tests — no real Kneron dongle needed. Covers TDD §6.1 handler +contract: + +- 5 successful path stages all fire progress events +- 4 failure paths (scan_not_found / connect_failed / loader_write_failed / + verify_mismatch) +- timeout护栏 (KL520 60s / KL720 200s) +- graceful shutdown SIGTERM rejection during upgrade in progress + +執行方式: + cd server/scripts && python3 test_kneron_bridge_firmware.py +""" +from __future__ import annotations + +import io +import json +import os +import sys +import time +import unittest +from unittest import mock + + +# 確保 import 路徑正確 +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + + +# ── 在 import bridge 前 fake kp module(避免實機相依)───────────────── +class _FakeKpCore: + def scan_devices(self): + raise NotImplementedError("must be patched per test") + + def disconnect_devices(self, *args, **kwargs): + return 0 + + +class _FakeKp: + core = _FakeKpCore() + + +# 注入 fake kp 給 bridge 在 import 時取代真實 kp +sys.modules.setdefault("kp", _FakeKp()) + + +import kneron_bridge as bridge # noqa: E402 + + +# ── Helper:fake device descriptor ─────────────────────────────────── +class FakeDeviceDescriptor: + def __init__(self, usb_port_id, product_id, firmware, kn_number=0x12345678, + is_connectable=True): + self.usb_port_id = usb_port_id + self.product_id = product_id + self.firmware = firmware + self.kn_number = kn_number + self.is_connectable = is_connectable + + +class FakeDeviceList: + def __init__(self, devices): + self.device_descriptor_list = devices + self.device_descriptor_number = len(devices) + + +# ── Helper:fake libkplus(ctypes.CDLL 替身)───────────────────────── +class FakeLib: + """Mock libkplus shared library with same surface as ctypes binding.""" + + def __init__(self): + self.upgrade_calls = [] # list of (scpu_or_loader, ncpu_or_None, auto_reboot) + self.load_calls = [] # list of (scpu, ncpu) + self.connect_calls = [] + self.disconnect_calls = 0 + self.timeout_calls = [] + # 控制 mock 行為的 knob + self.upgrade_return = 0 + self.load_return = 0 + self.connect_return = (0xCAFEBABE, 0) # (handle, status) + # 模擬 time.sleep 時間(測試端不真睡) + self._sleep_skipped = True + + def kp_scan_devices(self): + return 0xDEADBEEF # 不會被用到(_fw_scan_target 走 kp.core.scan_devices) + + def kp_connect_devices(self, n, ports_ptr, status_ptr): + # ctypes c_int.value 取出 + port_id = ports_ptr[0] + self.connect_calls.append(port_id) + handle, status = self.connect_return + status_ptr._obj.value = status + return handle + + def kp_set_timeout(self, dg, ms): + self.timeout_calls.append(ms) + + def kp_load_firmware_from_file(self, dg, scpu, ncpu): + self.load_calls.append((scpu, ncpu)) + return self.load_return + + def kp_update_kdp_firmware_from_files(self, dg, scpu_or_loader, ncpu_or_none, auto_reboot): + self.upgrade_calls.append((scpu_or_loader, ncpu_or_none, auto_reboot)) + return self.upgrade_return + + def kp_disconnect_devices(self, dg): + self.disconnect_calls += 1 + return 0 + + def kp_error_string(self, code): + return f"mock_err({code})".encode("utf-8") + + +# ── 共用 fixture:每個 test 用乾淨 FakeLib + sleep stub ───────────── +class FirmwareUpgradeTestBase(unittest.TestCase): + """Patches common to all tests so handler doesn't touch real Kneron stack.""" + + def setUp(self): + self.fake_lib = FakeLib() + # 收集所有 progress events + self.progress_events = [] + # 真實 stderr 改 catch、避免 test output 髒 + self._stderr_capture = io.StringIO() + + # Patch HAS_KP = True + self._has_kp_patch = mock.patch.object(bridge, "HAS_KP", True) + self._has_kp_patch.start() + + # Patch _fw_load_libkplus → return our FakeLib + self._load_lib_patch = mock.patch.object( + bridge, "_fw_load_libkplus", return_value=self.fake_lib + ) + self._load_lib_patch.start() + + # Patch firmware path resolver:預設 scpu/ncpu/loader 都齊 + self._fw_paths = { + "scpu": "/fake/firmware/KL520/fw_scpu.bin", + "ncpu": "/fake/firmware/KL520/fw_ncpu.bin", + "loader": "/fake/firmware/KL520/fw_loader.bin", + "version": "v2.2.0", + } + self._resolve_paths_patch = mock.patch.object( + bridge, "_resolve_firmware_paths_full", + side_effect=lambda chip: self._fw_paths, + ) + self._resolve_paths_patch.start() + + # Patch time.sleep → no-op(測試端不真睡) + self._sleep_patch = mock.patch.object(bridge.time, "sleep", lambda x: None) + self._sleep_patch.start() + + # Patch _fw_emit_progress 收集事件、同時仍寫一份到 stderr stub + original_emit = bridge._fw_emit_progress + + def _capture_emit(stage, message="", elapsed_ms=0, eta_ms=0, extra=None): + event = { + "stage": stage, + "message": message, + "elapsed_ms": elapsed_ms, + "eta_ms": eta_ms, + } + if extra: + event.update(extra) + self.progress_events.append(event) + + self._emit_patch = mock.patch.object( + bridge, "_fw_emit_progress", side_effect=_capture_emit + ) + self._emit_patch.start() + + # Patch _clear_device_group → no-op(避免 touch _device_group 全域) + self._clear_patch = mock.patch.object( + bridge, "_clear_device_group", lambda: None + ) + self._clear_patch.start() + + def tearDown(self): + self._has_kp_patch.stop() + self._load_lib_patch.stop() + self._resolve_paths_patch.stop() + self._sleep_patch.stop() + self._emit_patch.stop() + self._clear_patch.stop() + # 確保 sigterm handler 還原(避免 test 間互相影響) + try: + bridge._fw_unregister_sigterm_handler() + except Exception: + pass + bridge._firmware_upgrade_in_progress = False + + def stub_scan_returning(self, *device_lists): + """Patch kp.core.scan_devices 依次回不同的 device list. + + Args: + *device_lists: 每個 list 是 [FakeDeviceDescriptor, ...] + """ + results = [FakeDeviceList(devs) for devs in device_lists] + it = iter(results) + + def _next_scan(): + try: + return next(it) + except StopIteration: + # 多餘的 scan call 重複回最後一個結果(測試容忍) + return results[-1] if results else FakeDeviceList([]) + + return mock.patch.object(bridge.kp.core, "scan_devices", side_effect=_next_scan) + + +# ── 5 個成功路徑測試 ────────────────────────────────────────────────── +class TestFirmwareUpgradeSuccess(FirmwareUpgradeTestBase): + + def test_kl520_kdp1_legacy_full_5_stages(self): + """KL520 KDP1 legacy → KDP2:preparing/loading/flashing/verifying/done 5 stage 都 fire.""" + legacy_dev = FakeDeviceDescriptor( + usb_port_id=42, product_id=0x100, firmware="KDP", + ) + post_loader_dev = FakeDeviceDescriptor( + usb_port_id=42, product_id=0x100, firmware="USB Boot Loader", + ) + kdp2_dev = FakeDeviceDescriptor( + usb_port_id=42, product_id=0x100, firmware="KDP2", + ) + + with self.stub_scan_returning([legacy_dev], [post_loader_dev], [kdp2_dev]): + result = bridge.handle_firmware_upgrade({"port": "42", "chip": "KL520"}) + + self.assertEqual(result["status"], "upgraded", msg=f"unexpected result: {result}") + self.assertEqual(result["before_firmware"], "KDP") + self.assertEqual(result["after_firmware"], "KDP2") + self.assertIn("ctypes", result["method"]) + self.assertGreaterEqual(result["duration_ms"], 0) + + # 驗證 5 個 stage 都 fire + stages = [e["stage"] for e in self.progress_events] + self.assertEqual( + stages, ["preparing", "loading", "flashing", "verifying", "done"], + msg=f"unexpected stage sequence: {stages}", + ) + + # KL520 KDP1 legacy:loader.bin 寫一次(kp_update_kdp_firmware_from_files) + # + scpu/ncpu 載一次(kp_load_firmware_from_file) + self.assertEqual(len(self.fake_lib.upgrade_calls), 1, + msg="loader write should be called exactly once") + self.assertEqual(len(self.fake_lib.load_calls), 1, + msg="kp_load_firmware_from_file should be called once after loader") + + def test_kl520_already_kdp2_short_circuit(self): + """KL520 已是 KDP2:跳過 loader stage、直接 flashing(用 kp_update_kdp_firmware_from_files).""" + kdp2_dev = FakeDeviceDescriptor( + usb_port_id=10, product_id=0x100, firmware="KDP2.5", + ) + kdp2_after = FakeDeviceDescriptor( + usb_port_id=10, product_id=0x100, firmware="KDP2.5", + ) + with self.stub_scan_returning([kdp2_dev], [kdp2_after]): + result = bridge.handle_firmware_upgrade({"port": "10", "chip": "KL520"}) + + self.assertEqual(result["status"], "upgraded") + stages = [e["stage"] for e in self.progress_events] + # KDP2 short-circuit:preparing → flashing → verifying → done(無 loading) + self.assertEqual(stages, ["preparing", "flashing", "verifying", "done"]) + # kp_update_kdp_firmware_from_files 用兩 path 模式(scpu + ncpu) + self.assertEqual(len(self.fake_lib.upgrade_calls), 1) + scpu_path, ncpu_path, auto_reboot = self.fake_lib.upgrade_calls[0] + self.assertIn(b"fw_scpu", scpu_path) + self.assertIn(b"fw_ncpu", ncpu_path) + self.assertTrue(auto_reboot) + + def test_kl720_kdp_legacy(self): + """KL720 KDP1 legacy (pid=0x200):走 flashing 路徑(warrenchen 模式).""" + legacy = FakeDeviceDescriptor( + usb_port_id=5, product_id=0x200, firmware="KDP", + ) + after = FakeDeviceDescriptor( + usb_port_id=5, product_id=0x720, firmware="KDP2", + ) + # KL720 沒 loader.bin(warrenchen 也沒附) + self._fw_paths["loader"] = None + + with self.stub_scan_returning([legacy], [after]): + result = bridge.handle_firmware_upgrade({"port": "5", "chip": "KL720"}) + + self.assertEqual(result["status"], "upgraded") + # KL720 legacy 沒 loader.bin:preparing → flashing → verifying → done + stages = [e["stage"] for e in self.progress_events] + self.assertEqual(stages, ["preparing", "flashing", "verifying", "done"]) + + def test_progress_event_schema_has_required_fields(self): + """進度事件 schema 對齊 TDD §4.2:stage, elapsed_ms, eta_ms 必填.""" + dev = FakeDeviceDescriptor(usb_port_id=1, product_id=0x100, firmware="KDP2") + with self.stub_scan_returning([dev], [dev]): + bridge.handle_firmware_upgrade({"port": "1", "chip": "KL520"}) + + for e in self.progress_events: + self.assertIn("stage", e) + self.assertIn("elapsed_ms", e) + self.assertIn("eta_ms", e) + self.assertIsInstance(e["elapsed_ms"], int) + + def test_done_stage_returns_duration_ms(self): + """done event 必須有 duration_ms(caller 取 elapsed_ms).""" + dev = FakeDeviceDescriptor(usb_port_id=2, product_id=0x100, firmware="KDP2") + with self.stub_scan_returning([dev], [dev]): + result = bridge.handle_firmware_upgrade({"port": "2", "chip": "KL520"}) + + self.assertIn("duration_ms", result) + # done event 的 elapsed_ms 應該 = duration_ms(finishing-time alignment) + done_event = [e for e in self.progress_events if e["stage"] == "done"][0] + self.assertEqual(done_event["elapsed_ms"], result["duration_ms"]) + + +# ── 4 個失敗路徑測試 ────────────────────────────────────────────────── +class TestFirmwareUpgradeFailure(FirmwareUpgradeTestBase): + + def test_scan_not_found(self): + """scan 找不到 target port:preparing stage failure with reason=scan_not_found.""" + with self.stub_scan_returning([]): # empty scan + result = bridge.handle_firmware_upgrade({"port": "999", "chip": "KL520"}) + + self.assertIn("error", result) + self.assertEqual(result["stage"], "preparing") + self.assertEqual(result["reason"], "scan_not_found") + self.assertIn("not found", result["error"].lower()) + + # 應該 fire preparing + error 兩個 event + stages = [e["stage"] for e in self.progress_events] + self.assertIn("preparing", stages) + self.assertIn("error", stages) + + def test_connect_failed(self): + """ctypes connect 回 status != KP_SUCCESS:preparing/connect_failed.""" + dev = FakeDeviceDescriptor(usb_port_id=1, product_id=0x100, firmware="KDP") + # 讓 connect 回 non-zero status + self.fake_lib.connect_return = (0, -3) # handle=NULL, status=-3 + + with self.stub_scan_returning([dev]): + result = bridge.handle_firmware_upgrade({"port": "1", "chip": "KL520"}) + + self.assertEqual(result["stage"], "preparing") + self.assertEqual(result["reason"], "connect_failed") + # 錯誤訊息應該包含 raw error 線索 + self.assertIn("connect", result["error"].lower()) + + def test_loader_write_failed(self): + """KL520 KDP1 legacy:loader 寫入回 non-zero → loading/loader_write_failed.""" + legacy_dev = FakeDeviceDescriptor(usb_port_id=1, product_id=0x100, firmware="KDP") + # 讓 kp_update_kdp_firmware_from_files 回 error code + self.fake_lib.upgrade_return = -7 + + with self.stub_scan_returning([legacy_dev]): + result = bridge.handle_firmware_upgrade({"port": "1", "chip": "KL520"}) + + self.assertEqual(result["stage"], "loading") + self.assertEqual(result["reason"], "loader_write_failed") + # loader call 確實有發生 + self.assertEqual(len(self.fake_lib.upgrade_calls), 1) + + def test_verify_mismatch(self): + """升級完成但 verify 階段發現 firmware 字串仍 legacy → verify_mismatch.""" + legacy_dev = FakeDeviceDescriptor(usb_port_id=1, product_id=0x100, firmware="KDP") + # 升級完仍是 KDP1(mock:upgrade 成功但 device firmware 字串沒變) + stuck_legacy = FakeDeviceDescriptor(usb_port_id=1, product_id=0x100, firmware="KDP") + + with self.stub_scan_returning([legacy_dev], [stuck_legacy], [stuck_legacy]): + result = bridge.handle_firmware_upgrade({"port": "1", "chip": "KL520"}) + + self.assertEqual(result["stage"], "verifying") + self.assertEqual(result["reason"], "verify_mismatch") + self.assertIn("legacy", result["error"].lower()) + + def test_verify_not_found(self): + """verify 階段 device disappear(rescan 找不到)→ verify_not_found.""" + legacy_dev = FakeDeviceDescriptor(usb_port_id=1, product_id=0x100, firmware="KDP") + + # 升級時走 loader → flashing 都 OK、verify 階段 scan 回空(device 還沒 re-enumerate) + with self.stub_scan_returning([legacy_dev], [legacy_dev], []): + result = bridge.handle_firmware_upgrade({"port": "1", "chip": "KL520"}) + + self.assertEqual(result["stage"], "verifying") + self.assertEqual(result["reason"], "verify_not_found") + + def test_failure_event_carries_reason_and_raw_error(self): + """error event 必須含 reason + raw_error(TDD §4.2 失敗欄位).""" + with self.stub_scan_returning([]): + bridge.handle_firmware_upgrade({"port": "1", "chip": "KL520"}) + + err_events = [e for e in self.progress_events if e["stage"] == "error"] + self.assertEqual(len(err_events), 1, "error event 應該 fire 一次") + e = err_events[0] + self.assertIn("reason", e) + self.assertIn("raw_error", e) + self.assertIn("before_version", e) + + def test_chip_unsupported(self): + """A 階段不支援 KL630 / KL730:應該直接拒絕(preparing/scan_not_found).""" + result = bridge.handle_firmware_upgrade({"port": "1", "chip": "KL630"}) + self.assertIn("error", result) + self.assertEqual(result["stage"], "preparing") + self.assertIn("KL630", result["error"]) + + # ── Reviewer s4:補 4 個欠缺的 test case ────────────────────────── + + def test_loading_stage_disconnect_during_op(self): + """Reviewer s4 (1):loading stage 寫 loader 成功後 rescan 找不到 device. + + 對應 kneron_bridge.py 1753-1758 行(disconnect_during_op in loading stage)。 + Stage 序列:preparing → loading → error(disconnect_during_op)。 + """ + legacy_dev = FakeDeviceDescriptor( + usb_port_id=42, product_id=0x100, firmware="KDP", + ) + # loader 寫成功(upgrade_return=0 default)、但 reboot 後 rescan 回空 + # (device 沒 re-enumerate 回來) + + with self.stub_scan_returning([legacy_dev], []): # 第二次 scan 空 + result = bridge.handle_firmware_upgrade({"port": "42", "chip": "KL520"}) + + self.assertEqual(result["stage"], "loading") + self.assertEqual(result["reason"], "disconnect_during_op") + self.assertIn("disappear", result["error"].lower()) + # 第一個 upgrade_call 是 loader(成功)、應有 1 個 call + self.assertEqual(len(self.fake_lib.upgrade_calls), 1) + + def test_loading_stage_reconnect_failed(self): + """Reviewer s4 (2):loading stage 寫 loader 成功、rescan 找到 device、但 reconnect 失敗. + + 對應 kneron_bridge.py 1759-1765 行(connect_failed in loading stage、reconnect 失敗)。 + Stage 序列:preparing → loading → error(connect_failed)。 + """ + legacy_dev = FakeDeviceDescriptor( + usb_port_id=42, product_id=0x100, firmware="KDP", + ) + post_loader_dev = FakeDeviceDescriptor( + usb_port_id=42, product_id=0x100, firmware="USB Boot Loader", + ) + + # 第一次 connect(preparing)OK;第二次 connect(reconnect after loader)失敗 + call_count = [0] + original_connect = self.fake_lib.kp_connect_devices + + def maybe_fail_connect(n, ports_ptr, status_ptr): + call_count[0] += 1 + if call_count[0] == 2: + # 第二次 connect 失敗 + status_ptr._obj.value = -5 + return 0 # NULL handle + return original_connect(n, ports_ptr, status_ptr) + + self.fake_lib.kp_connect_devices = maybe_fail_connect + + with self.stub_scan_returning([legacy_dev], [post_loader_dev]): + result = bridge.handle_firmware_upgrade({"port": "42", "chip": "KL520"}) + + self.assertEqual(result["stage"], "loading") + self.assertEqual(result["reason"], "connect_failed") + self.assertIn("reconnect", result["error"].lower()) + + def test_failure_event_full_extra_fields(self): + """Reviewer s4 (3):error event 必須含 TDD §4.2 完整失敗欄位. + + TDD §4.2 列出 error event extra dict 應含: + error / reason / raw_error / before_version + 本 test 驗 caller 的 _fw_handle_failure 確實組裝這些欄位。 + """ + legacy_dev = FakeDeviceDescriptor( + usb_port_id=99, product_id=0x100, firmware="KDP1.5", + ) + # 讓 loader write 失敗、確保走進 _fw_handle_failure + self.fake_lib.upgrade_return = -7 + + with self.stub_scan_returning([legacy_dev]): + result = bridge.handle_firmware_upgrade({"port": "99", "chip": "KL520"}) + + err_events = [e for e in self.progress_events if e["stage"] == "error"] + self.assertEqual(len(err_events), 1) + e = err_events[0] + + # TDD §4.2 必填欄位 + for field in ("error", "reason", "raw_error", "before_version"): + self.assertIn(field, e, f"error event missing field: {field}") + + # before_version 應該抓到 scan 階段的 firmware string + self.assertEqual(e["before_version"], "KDP1.5") + # raw_error 應該帶 SDK error context(包含 ret code / 函式名) + self.assertIn("loader", e["raw_error"].lower()) + + +# ── Reviewer s4 (4):ctypes binding 簽名測試 ────────────────────── +class TestCtypesBindingSignatures(unittest.TestCase): + """驗證 _fw_load_libkplus 設對 argtypes / restype. + + 在 mock test 階段我們繞過 _fw_load_libkplus、直接餵 FakeLib; + 但實機跑時 binding 簽名錯會在 first call 拋 ctypes.ArgumentError 或更糟 + silently corrupt memory。本 test 用一個 mock CDLL object 跑 _fw_load_libkplus、 + 驗它對每個 C 符號設了正確的 argtypes / restype。 + """ + + def test_libkplus_binding_signatures(self): + """_fw_load_libkplus 對所有 C 符號設了正確 argtypes / restype.""" + import ctypes + + # Mock CDLL:紀錄 argtypes / restype 設定、不執行真實 lib + class MockSymbol: + def __init__(self, name): + self.name = name + self.argtypes = None + self.restype = None + + class MockCDLL: + def __init__(self, *args, **kwargs): + self.kp_connect_devices = MockSymbol("kp_connect_devices") + self.kp_set_timeout = MockSymbol("kp_set_timeout") + self.kp_load_firmware_from_file = MockSymbol("kp_load_firmware_from_file") + self.kp_update_kdp_firmware_from_files = MockSymbol( + "kp_update_kdp_firmware_from_files" + ) + self.kp_disconnect_devices = MockSymbol("kp_disconnect_devices") + self.kp_error_string = MockSymbol("kp_error_string") + + def __getattr__(self, name): + # 任何沒設的符號回 MockSymbol(hasattr check 用) + sym = MockSymbol(name) + setattr(self, name, sym) + return sym + + # Mock importlib + os.path.isfile + ctypes.CDLL + mock_spec = mock.MagicMock() + mock_spec.submodule_search_locations = ["/fake/kp_dir"] + + with mock.patch("importlib.util.find_spec", return_value=mock_spec), \ + mock.patch("os.path.isfile", return_value=True), \ + mock.patch.object(ctypes, "CDLL", side_effect=lambda p: MockCDLL()): + lib = bridge._fw_load_libkplus() + + # 驗 kp_connect_devices(int, c_int*, c_int*) -> c_void_p + self.assertEqual( + lib.kp_connect_devices.argtypes, + [ctypes.c_int, ctypes.POINTER(ctypes.c_int), ctypes.POINTER(ctypes.c_int)], + ) + self.assertEqual(lib.kp_connect_devices.restype, ctypes.c_void_p) + + # 驗 kp_set_timeout(c_void_p, c_int) -> None + self.assertEqual(lib.kp_set_timeout.argtypes, [ctypes.c_void_p, ctypes.c_int]) + self.assertIsNone(lib.kp_set_timeout.restype) + + # 驗 kp_load_firmware_from_file(c_void_p, c_char_p, c_char_p) -> c_int + self.assertEqual( + lib.kp_load_firmware_from_file.argtypes, + [ctypes.c_void_p, ctypes.c_char_p, ctypes.c_char_p], + ) + self.assertEqual(lib.kp_load_firmware_from_file.restype, ctypes.c_int) + + # 驗 kp_update_kdp_firmware_from_files(c_void_p, c_char_p, c_char_p, c_bool) -> c_int + self.assertEqual( + lib.kp_update_kdp_firmware_from_files.argtypes, + [ctypes.c_void_p, ctypes.c_char_p, ctypes.c_char_p, ctypes.c_bool], + ) + self.assertEqual(lib.kp_update_kdp_firmware_from_files.restype, ctypes.c_int) + + # 驗 kp_disconnect_devices(c_void_p) -> c_int + self.assertEqual(lib.kp_disconnect_devices.argtypes, [ctypes.c_void_p]) + self.assertEqual(lib.kp_disconnect_devices.restype, ctypes.c_int) + + # 驗 kp_error_string(c_int) -> c_char_p(若存在) + self.assertEqual(lib.kp_error_string.argtypes, [ctypes.c_int]) + self.assertEqual(lib.kp_error_string.restype, ctypes.c_char_p) + + +# ── Timeout 測試 ────────────────────────────────────────────────────── +class TestFirmwareUpgradeTimeout(FirmwareUpgradeTestBase): + + def test_timeout_kl520(self): + """KL520 升級 > 60s 撞 timeout → reason=timeout. + + Mock time.monotonic 讓每次 call 都讀到一個跳很快的 clock, + 確保第二個 stage check 之前就撞 timeout(60s)。 + """ + legacy_dev = FakeDeviceDescriptor(usb_port_id=1, product_id=0x100, firmware="KDP") + + # 第一次 call 回 0(start_ts)、第二次起回 99s(撞 60s timeout) + clock_values = iter([0.0, 0.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0]) + + def fake_monotonic(): + try: + return next(clock_values) + except StopIteration: + return 99.0 + + monotonic_patch = mock.patch.object( + bridge.time, "monotonic", side_effect=fake_monotonic + ) + + with monotonic_patch, self.stub_scan_returning([legacy_dev]): + result = bridge.handle_firmware_upgrade({"port": "1", "chip": "KL520"}) + + self.assertEqual(result["reason"], "timeout", + msg=f"expected timeout, got: {result}") + self.assertIn(result["stage"], ("preparing", "loading", "flashing", "verifying")) + + def test_timeout_kl720_uses_200s_bound(self): + """KL720 用 200s timeout(KL520 60s 不適用).""" + # 直接驗 constant 是 200(防止後續誤改) + self.assertEqual(bridge.KL720_UPGRADE_TIMEOUT_S, 200) + self.assertEqual(bridge.KL520_UPGRADE_TIMEOUT_S, 60) + + +# ── Graceful shutdown (SIGTERM) 拒絕測試 ───────────────────────────── +class TestFirmwareUpgradeGracefulShutdown(unittest.TestCase): + """Test AC-FW-1.9:升級進行中收到 SIGTERM 不應立即退出.""" + + def setUp(self): + # 確保旗標歸零 + bridge._firmware_upgrade_in_progress = False + bridge._firmware_upgrade_start_ts = 0.0 + + def tearDown(self): + try: + bridge._fw_unregister_sigterm_handler() + except Exception: + pass + bridge._firmware_upgrade_in_progress = False + + @unittest.skipIf(sys.platform == "win32", "SIGTERM not on Windows") + def test_sigterm_rejected_during_upgrade(self): + """升級進行中:SIGTERM handler 拒絕並 push shutdown_rejected event.""" + import signal + + bridge._firmware_upgrade_in_progress = True + start_ts = time.monotonic() + bridge._firmware_upgrade_start_ts = start_ts + + # 攔截 stderr + capture = io.StringIO() + bridge._fw_register_sigterm_handler(start_ts) + + with mock.patch.object(sys, "stderr", capture): + os.kill(os.getpid(), signal.SIGTERM) + # 給 signal handler 一點時間執行 + time.sleep(0.05) + + # 驗證 process 沒退出(test 還在跑、能讀到 stderr) + output = capture.getvalue() + # 應該找到 shutdown_rejected event + self.assertIn("shutdown_rejected", output, + msg=f"expected shutdown_rejected in stderr, got: {output}") + # 解析 JSON 驗 schema + for line in output.strip().split("\n"): + if not line.strip(): + continue + try: + ev = json.loads(line) + except json.JSONDecodeError: + continue + if ev.get("event") == "shutdown_rejected": + self.assertEqual(ev["reason"], "firmware_upgrade_in_progress") + self.assertEqual(ev["task"], "firmware_upgrade") + self.assertIn("elapsed_ms", ev) + return + self.fail("shutdown_rejected event 沒找到") + + @unittest.skipIf(sys.platform == "win32", "SIGTERM not on Windows") + def test_sigterm_handler_unregistered_after_upgrade(self): + """升級結束後 SIGTERM handler 應該還原(避免影響後續 server graceful shutdown).""" + import signal + + # 預設 handler(python default 是 SIG_DFL) + prev = signal.signal(signal.SIGTERM, signal.SIG_DFL) + signal.signal(signal.SIGTERM, prev) + + # register + bridge._fw_register_sigterm_handler(time.monotonic()) + # 確認 handler 已換 + current = signal.signal(signal.SIGTERM, signal.SIG_DFL) + self.assertNotEqual(current, signal.SIG_DFL, + msg="handler should be installed during upgrade") + + # 重新 install 後 unregister + bridge._fw_register_sigterm_handler(time.monotonic()) + bridge._fw_unregister_sigterm_handler() + + # unregister 後 handler 應還原(不再是我們的 wrapper) + # 注意:unregister 後可能是原 handler 或 SIG_DFL、我們的 wrapper 不該再生效 + bridge._firmware_upgrade_in_progress = False + # 這個測試重點是 unregister 不報錯、且 _fw_original_sigterm_handler 已歸 None + self.assertIsNone(bridge._fw_original_sigterm_handler) + + +# ── _fw_classify_legacy 邏輯測試 ────────────────────────────────────── +class TestClassifyLegacy(unittest.TestCase): + + def test_kl720_legacy_by_product_id(self): + self.assertTrue(bridge._fw_classify_legacy("any", 0x0200)) + + def test_kl520_legacy_by_firmware_string(self): + self.assertTrue(bridge._fw_classify_legacy("KDP", 0x0100)) + + def test_kl520_kdp2_not_legacy(self): + self.assertFalse(bridge._fw_classify_legacy("KDP2", 0x0100)) + self.assertFalse(bridge._fw_classify_legacy("KDP2.5", 0x0100)) + + def test_kl720_kdp2_not_legacy(self): + self.assertFalse(bridge._fw_classify_legacy("KDP2", 0x0720)) + + # ── Reviewer M3 + s3:firmware 字串覆蓋擴展 ──────────────────── + # 原本 substring match `"KDP" in fw and "KDP2" not in fw` 對 USB Boot / + # Loader / 空字串 / KDP3+ 等情境覆蓋不夠或會誤判、改用顯式 enumeration + # + prefix 比對表後、以下 case 必須通過: + + def test_kl520_legacy_empty_firmware_string(self): + """部分 USB Boot state device 不回 firmware string、應視為 legacy.""" + self.assertTrue(bridge._fw_classify_legacy("", 0x0100)) + self.assertTrue(bridge._fw_classify_legacy(None, 0x0100)) + + def test_kl520_legacy_usb_boot_strings(self): + """USB Boot / Loader / Bootloader 等 legacy state 字串都應視為 legacy.""" + for fw in ("USB Boot", "USB Boot Loader", "Loader", "Bootloader", + "USB BOOT", "loader", "BOOTLOADER"): + with self.subTest(firmware=fw): + self.assertTrue( + bridge._fw_classify_legacy(fw, 0x0100), + f"firmware={fw!r} should be classified as legacy", + ) + + def test_kl520_legacy_kdp1_variants(self): + """KDP1 / KDP1.x / KDP1 space 等版本字串都應視為 legacy.""" + for fw in ("KDP1", "KDP1.0", "KDP1.5", "KDP1 v1.0", "kdp1.5"): + with self.subTest(firmware=fw): + self.assertTrue( + bridge._fw_classify_legacy(fw, 0x0100), + f"firmware={fw!r} should be classified as legacy", + ) + + def test_kdp3_kdp4_not_legacy(self): + """Reviewer s3:KDP3 / KDP4+(未來 firmware)不該被 substring match 誤判 legacy.""" + # 原本 substring match `"KDP" in fw and "KDP2" not in fw` 對 KDP3.0 會誤判 legacy + # 改用顯式 prefix 比對表後、KDP3 / KDP4 應視為 modern firmware + for fw in ("KDP3", "KDP3.0", "KDP3.5", "KDP4", "KDP4.2", "KDP9"): + with self.subTest(firmware=fw): + self.assertFalse( + bridge._fw_classify_legacy(fw, 0x0100), + f"firmware={fw!r} (modern KDP3+) should NOT be classified as legacy", + ) + + def test_unknown_firmware_default_not_legacy(self): + """未知 firmware 字串保守 default = 不走 loader (避免誤觸 brick device).""" + # 例:未來 firmware 用全新命名 → 不確定走 loader 是否會 brick、保守不走 + # 若 mis-classify、verify 階段會偵測 verify_mismatch、不致 brick + for fw in ("NEF", "K3", "FOO", "RANDOM"): + with self.subTest(firmware=fw): + self.assertFalse( + bridge._fw_classify_legacy(fw, 0x0100), + f"firmware={fw!r} (unknown) should default to not-legacy", + ) + + +# ── _fw_eta_ms 邏輯測試 ────────────────────────────────────────────── +class TestEtaEstimation(unittest.TestCase): + + def test_eta_decreases_through_stages(self): + kl520_etas = [ + bridge._fw_eta_ms("KL520", s) + for s in ("preparing", "loading", "flashing", "verifying") + ] + # ETA 應該遞減 + self.assertEqual(kl520_etas, sorted(kl520_etas, reverse=True)) + + def test_kl720_eta_larger_than_kl520(self): + self.assertGreater( + bridge._fw_eta_ms("KL720", "preparing"), + bridge._fw_eta_ms("KL520", "preparing"), + ) + + +# ── _resolve_firmware_paths_full 測試(用真實檔案)───────────────────── +class TestResolveFirmwarePathsFull(unittest.TestCase): + + def test_kl520_has_loader(self): + """KL520 升級後應該找到 scpu/ncpu/loader 三個檔案.""" + paths = bridge._resolve_firmware_paths_full("KL520") + self.assertIsNotNone(paths["scpu"], "fw_scpu.bin missing") + self.assertIsNotNone(paths["ncpu"], "fw_ncpu.bin missing") + self.assertIsNotNone(paths["loader"], + "fw_loader.bin missing — required for KDP1→KDP2") + self.assertTrue(os.path.exists(paths["scpu"])) + self.assertTrue(os.path.exists(paths["loader"])) + + def test_kl720_has_scpu_ncpu(self): + paths = bridge._resolve_firmware_paths_full("KL720") + self.assertIsNotNone(paths["scpu"]) + self.assertIsNotNone(paths["ncpu"]) + # KL720 沒 loader.bin 預期、不檢查 + self.assertTrue(os.path.exists(paths["scpu"])) + + def test_unknown_chip_returns_none(self): + paths = bridge._resolve_firmware_paths_full("KL999") + self.assertIsNone(paths["scpu"]) + self.assertIsNone(paths["ncpu"]) + + +# ── _fw_emit_progress JSON schema 測試 ─────────────────────────────── +class TestEmitProgress(unittest.TestCase): + + def test_emit_writes_json_line_to_stderr(self): + capture = io.StringIO() + with mock.patch.object(sys, "stderr", capture): + bridge._fw_emit_progress( + "flashing", + message="testing", + elapsed_ms=1234, + eta_ms=5678, + ) + line = capture.getvalue().strip() + ev = json.loads(line) + self.assertEqual(ev["event"], "firmware_progress") + self.assertEqual(ev["stage"], "flashing") + self.assertEqual(ev["percent"], 50) + self.assertEqual(ev["message"], "testing") + self.assertEqual(ev["elapsed_ms"], 1234) + self.assertEqual(ev["eta_ms"], 5678) + + def test_emit_with_extra_includes_failure_fields(self): + capture = io.StringIO() + with mock.patch.object(sys, "stderr", capture): + bridge._fw_emit_progress( + "error", + message="bad", + elapsed_ms=100, + extra={"reason": "scan_not_found", "raw_error": "details"}, + ) + ev = json.loads(capture.getvalue().strip()) + self.assertEqual(ev["stage"], "error") + self.assertEqual(ev["percent"], -1) + self.assertEqual(ev["reason"], "scan_not_found") + self.assertEqual(ev["raw_error"], "details") + + +if __name__ == "__main__": + unittest.main(verbosity=2)