fix(local-tool): DeviceGroup.__del__ access violation 全面修復

上一個 commit (a6a121a) 只修了 script 結束時的 cleanup,但使用者仍在
connect 重試路徑看到 access violation:

  connect attempt 1 failed → 新 connect attempt 2 → GC 回收 attempt 1
  的舊 DeviceGroup → __del__ → kp_disconnect_devices 對已失效的 native
  handle → OSError: access violation

根因:`_device_group = None` 只是清掉 Python reference,舊物件的 __del__
會延遲到下一次 GC cycle(可能發生在新 connect call 的 allocation 時),
此時 native handle 已 invalid。

修法:
- 新增 `_clear_device_group()` helper:先 kp.core.disconnect_devices 把
  native handle 正常釋放(errors silenced),再設 None
- 全檔搜 `_device_group = None` 共 12 處,除了初始宣告(L40)和兩個 helper
  自身(_clear_device_group / _cleanup)以外全部替換為 _clear_device_group()
- 涵蓋所有 code path:connect retry / firmware load reconnect / disconnect
  handler / reset handler / error fallback

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
jim800121chen 2026-04-16 17:20:12 +08:00
parent a6a121ae86
commit abbe9d4c0b

View File

@ -38,6 +38,24 @@ except ImportError:
# ── Global state ──────────────────────────────────────────────────────
_device_group = None
def _clear_device_group():
"""Safely disconnect and clear the global _device_group.
KneronPLUS SDK's DeviceGroup.__del__ calls kp_disconnect_devices on the
native handle, but if the handle is already invalid (failed connect / stale
state) it causes 'OSError: access violation'. By explicitly disconnecting
before setting None, __del__ becomes a no-op on an already-disconnected
handle. All errors are silenced this is best-effort cleanup.
"""
global _device_group
if _device_group is not None:
try:
kp.core.disconnect_devices(_device_group)
except Exception:
pass
_device_group = None
_model_id = None
_model_nef = None
_model_input_size = 224 # updated on model load
@ -732,7 +750,7 @@ def handle_connect(params):
_log(f"KL720: Reconnected after firmware load, pid=0x{target_dev.product_id:04X}, fw={fw_str}")
else:
_log("WARNING: KL720 firmware files not found. Cannot operate with KDP legacy device.")
_device_group = None
_clear_device_group()
return {"error": "KL720 has legacy KDP firmware but KDP2 firmware files not found. "
"Run update_kl720_firmware.py to flash KDP2 permanently."}
@ -756,9 +774,8 @@ def handle_connect(params):
last_err = None
for attempt in range(max_retries):
try:
# Clear any stale device group from previous failed attempt
# to prevent DeviceGroup.__del__ access violation during GC.
_device_group = None
# Clear any stale device group from previous failed attempt.
_clear_device_group()
if use_without_check:
_log(f"{_device_chip}: connect_devices_without_check(usb_port_id={target_dev.usb_port_id}, connectable={target_dev.is_connectable}) attempt {attempt+1}/{max_retries}...")
@ -774,7 +791,7 @@ def handle_connect(params):
last_err = None
break
except Exception as conn_err:
_device_group = None # prevent __del__ crash on stale handle
_clear_device_group()
last_err = conn_err
_log(f"connect attempt {attempt+1} failed: {conn_err}")
if attempt < max_retries - 1:
@ -822,7 +839,7 @@ def handle_connect(params):
time.sleep(5)
# Reconnect after firmware load (with retry)
_device_group = None
_clear_device_group()
for retry in range(3):
try:
descs = kp.core.scan_devices()
@ -861,7 +878,7 @@ def handle_connect(params):
}
except Exception as e:
_device_group = None
_clear_device_group()
return {"error": str(e)}
@ -870,7 +887,7 @@ def handle_disconnect(params):
global _device_group, _model_id, _model_nef, _firmware_loaded
global _model_type, _model_input_size, _device_chip
_device_group = None
_clear_device_group()
_model_id = None
_model_nef = None
_model_type = "tiny_yolov3"
@ -906,7 +923,7 @@ def handle_reset(params):
# Even if it throws, the device usually does reset.
# Clear all state — the device is gone until it re-enumerates.
_device_group = None
_clear_device_group()
_model_id = None
_model_nef = None
_model_type = "tiny_yolov3"