restore: add back local_service_win to repo root

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
jim800121chen 2026-03-07 03:15:45 +08:00
parent 5324408b84
commit 74fb9ce6aa
100 changed files with 5683 additions and 0 deletions

140
local_service_win/.gitignore vendored Normal file
View File

@ -0,0 +1,140 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff
instance/
.webassets-cache
# Scrapy stuff
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
#Pipfile.lock
# poetry
#poetry.lock
# pdm
.pdm.toml
.pdm-python
.pdm-build/
# PEP 582
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# IDE
.vscode/
.idea/

Binary file not shown.

View File

@ -0,0 +1 @@
# LocalAPI package

View File

@ -0,0 +1,297 @@
from __future__ import annotations
import ctypes
import hashlib
import json
import os
import sys
import time
from pathlib import Path
from typing import Any, Dict
KDP_MAGIC_CONNECTION_PASS = 536173391
KP_SUCCESS = 0
KP_RESET_REBOOT = 0
USB_WAIT_CONNECT_DELAY_MS = 100
USB_WAIT_AFTER_REBOOT_MS = 2000
USB_WAIT_RETRY_CONNECT_MS = 10
MAX_RETRY_CONNECT_TIMES = 10
def _normalize_code(code: int) -> int:
# Some legacy paths may return int8-like unsigned values (e.g. 253 for -3).
if code > 127:
return code - 256
return code
def _load_libkplus(dist_root: Path) -> ctypes.CDLL:
lib_dir = dist_root / "kp" / "lib"
dll_path = lib_dir / "libkplus.dll"
if not dll_path.is_file():
raise RuntimeError(f"libkplus.dll not found: {dll_path}")
if hasattr(os, "add_dll_directory"):
os.add_dll_directory(str(lib_dir))
lib = ctypes.CDLL(str(dll_path))
lib.kp_connect_devices.argtypes = [ctypes.c_int, ctypes.POINTER(ctypes.c_int), ctypes.POINTER(ctypes.c_int)]
lib.kp_connect_devices.restype = ctypes.c_void_p
lib.kp_set_timeout.argtypes = [ctypes.c_void_p, ctypes.c_int]
lib.kp_set_timeout.restype = None
lib.kp_reset_device.argtypes = [ctypes.c_void_p, ctypes.c_int]
lib.kp_reset_device.restype = ctypes.c_int
lib.kp_load_firmware_from_file.argtypes = [ctypes.c_void_p, ctypes.c_char_p, ctypes.c_char_p]
lib.kp_load_firmware_from_file.restype = ctypes.c_int
lib.kp_update_kdp_firmware_from_files.argtypes = [ctypes.c_void_p, ctypes.c_char_p, ctypes.c_char_p, ctypes.c_bool]
lib.kp_update_kdp_firmware_from_files.restype = ctypes.c_int
lib.kp_disconnect_devices.argtypes = [ctypes.c_void_p]
lib.kp_disconnect_devices.restype = ctypes.c_int
lib.kp_scan_devices.argtypes = []
lib.kp_scan_devices.restype = ctypes.c_void_p
if hasattr(lib, "kp_error_string"):
lib.kp_error_string.argtypes = [ctypes.c_int]
lib.kp_error_string.restype = ctypes.c_char_p
return lib
def _errstr(lib: ctypes.CDLL, code: int) -> str:
signed_code = _normalize_code(code)
if hasattr(lib, "kp_error_string"):
try:
msg = lib.kp_error_string(int(code))
if not msg and signed_code != code:
msg = lib.kp_error_string(int(signed_code))
if msg:
return msg.decode("utf-8", errors="replace")
except Exception:
pass
return str(code)
def _find_port_id_with_kp(dist_root: Path, port_id: int | None, scan_index: int | None) -> int:
if port_id is not None:
return int(port_id)
sys.path.insert(0, str(dist_root))
import kp
device_list = kp.core.scan_devices()
if device_list.device_descriptor_number == 0:
raise RuntimeError("NO_DEVICE: no device found")
idx = 0 if scan_index is None else int(scan_index)
if idx < 0 or idx >= device_list.device_descriptor_number:
raise RuntimeError(f"INVALID_SCAN_INDEX: {idx}")
return int(device_list.device_descriptor_list[idx].usb_port_id)
def _file_diag(path_str: str) -> Dict[str, Any]:
p = Path(path_str)
info: Dict[str, Any] = {
"path": str(p),
"name": p.name,
"exists": p.is_file(),
}
if not p.is_file():
return info
data = p.read_bytes()
info["size_bytes"] = len(data)
info["sha256"] = hashlib.sha256(data).hexdigest()
return info
def _scan_diag_with_kp(dist_root: Path) -> Dict[str, Any]:
sys.path.insert(0, str(dist_root))
import kp
scanned = []
dev_list = kp.core.scan_devices()
num = int(dev_list.device_descriptor_number)
for i in range(num):
d = dev_list.device_descriptor_list[i]
scanned.append(
{
"scan_index": i,
"usb_port_id": int(d.usb_port_id),
"vendor_id": int(d.vendor_id),
"product_id": f"0x{int(d.product_id):04X}",
"link_speed": str(d.link_speed),
"usb_port_path": str(d.usb_port_path),
"is_connectable": bool(d.is_connectable),
"firmware": str(d.firmware),
}
)
return {"count": num, "devices": scanned}
def _firmware_from_scan(scan_diag: Dict[str, Any], port_id: int) -> str:
for d in scan_diag.get("devices", []):
if int(d.get("usb_port_id", -1)) == int(port_id):
return str(d.get("firmware", "")).upper()
return ""
def _product_id_from_scan(scan_diag: Dict[str, Any], port_id: int) -> int | None:
for d in scan_diag.get("devices", []):
if int(d.get("usb_port_id", -1)) != int(port_id):
continue
raw = d.get("product_id")
if raw is None:
return None
text = str(raw).strip()
try:
if text.lower().startswith("0x"):
return int(text, 16)
return int(text)
except Exception:
return None
return None
def _connect_with_magic(lib: ctypes.CDLL, port_id: int) -> ctypes.c_void_p:
port_ids = (ctypes.c_int * 1)(int(port_id))
status = ctypes.c_int(KDP_MAGIC_CONNECTION_PASS)
device_group = lib.kp_connect_devices(1, port_ids, ctypes.byref(status))
if not device_group or status.value != KP_SUCCESS:
signed = _normalize_code(status.value)
raise RuntimeError(
f"CONNECT_FAILED: raw_code={status.value}, signed_code={signed}, msg={_errstr(lib, status.value)}"
)
return device_group
def _reboot_and_reconnect(lib: ctypes.CDLL, device_group: ctypes.c_void_p, port_id: int) -> ctypes.c_void_p:
time.sleep(USB_WAIT_CONNECT_DELAY_MS / 1000.0)
ret = lib.kp_reset_device(device_group, KP_RESET_REBOOT)
if ret != KP_SUCCESS:
raise RuntimeError(
f"RESET_FAILED: raw_code={ret}, signed_code={_normalize_code(ret)}, msg={_errstr(lib, ret)}"
)
time.sleep(USB_WAIT_AFTER_REBOOT_MS / 1000.0)
lib.kp_disconnect_devices(device_group)
retries = 0
while retries <= MAX_RETRY_CONNECT_TIMES:
try:
return _connect_with_magic(lib, port_id)
except RuntimeError:
time.sleep(USB_WAIT_RETRY_CONNECT_MS / 1000.0)
retries += 1
raise RuntimeError("RECONNECT_FAILED: max retry exceeded")
def main() -> None:
stage = "init"
diag: Dict[str, Any] = {}
try:
if len(sys.argv) != 2:
raise RuntimeError("missing json payload argument")
req: Dict[str, Any] = json.loads(sys.argv[1])
dist_root = Path(req["legacy_dist_root"])
lib = _load_libkplus(dist_root)
stage = "resolve_port"
port_id = _find_port_id_with_kp(dist_root, req.get("port_id"), req.get("scan_index"))
timeout_ms = req.get("timeout_ms", 5000)
scpu_path = req["scpu_path"]
ncpu_path = req["ncpu_path"]
loader_path = req.get("loader_path") or str(Path(scpu_path).with_name("fw_loader.bin"))
scan_diag = _scan_diag_with_kp(dist_root)
detected_firmware = _firmware_from_scan(scan_diag, int(port_id))
selected_product_id = _product_id_from_scan(scan_diag, int(port_id))
diag = {
"selected_port_id": int(port_id),
"selected_product_id": (
f"0x{int(selected_product_id):04X}" if selected_product_id is not None else None
),
"timeout_ms": int(timeout_ms) if timeout_ms is not None else None,
"firmware_files": {
"loader": _file_diag(loader_path),
"scpu": _file_diag(scpu_path),
"ncpu": _file_diag(ncpu_path),
},
"scan": scan_diag,
"detected_firmware": detected_firmware,
}
stage = "connect"
device_group = _connect_with_magic(lib, port_id)
stage = "set_timeout"
if timeout_ms is not None:
lib.kp_set_timeout(device_group, int(timeout_ms))
method = ""
if detected_firmware == "KDP":
if not Path(loader_path).is_file():
raise RuntimeError(f"LOADER_NOT_FOUND: {loader_path}")
stage = "fw_switch_to_usb_boot_loader"
ret = lib.kp_update_kdp_firmware_from_files(
device_group,
loader_path.encode("utf-8"),
None,
True,
)
method = "kp_update_kdp_firmware_from_files(loader)->kp_load_firmware_from_file"
if ret != KP_SUCCESS:
stage = "disconnect_after_fw_fail"
lib.kp_disconnect_devices(device_group)
raise RuntimeError(
f"FW_LOAD_FAILED: method={method}, raw_code={ret}, msg={_errstr(lib, ret)}"
)
stage = "fw_load_kdp2_after_loader"
if timeout_ms is not None:
lib.kp_set_timeout(device_group, int(timeout_ms))
ret = lib.kp_load_firmware_from_file(
device_group,
scpu_path.encode("utf-8"),
ncpu_path.encode("utf-8"),
)
else:
stage = "fw_load_kdp2_direct"
method = "kp_load_firmware_from_file_direct"
ret = lib.kp_load_firmware_from_file(
device_group,
scpu_path.encode("utf-8"),
ncpu_path.encode("utf-8"),
)
if ret != KP_SUCCESS:
stage = "disconnect_after_fw_fail"
lib.kp_disconnect_devices(device_group)
raise RuntimeError(
f"FW_LOAD_FAILED: method={method}, raw_code={ret}, msg={_errstr(lib, ret)}"
)
stage = "disconnect_after_fw_success"
# After firmware update with auto_reboot, disconnect may fail due to USB re-enumeration.
disc = lib.kp_disconnect_devices(device_group)
if disc != KP_SUCCESS:
disc_info = f"disconnect_nonzero_raw={disc},signed={_normalize_code(disc)}"
else:
disc_info = "disconnect_ok"
print(
json.dumps(
{
"ok": True,
"port_id": int(port_id),
"connect_mode": "kp_connect_devices_with_magic_pass",
"firmware_method": method,
"disconnect_info": disc_info,
"diag": diag,
}
)
)
except Exception as exc:
print(json.dumps({"ok": False, "stage": stage, "error": str(exc), "diag": diag}))
sys.exit(1)
if __name__ == "__main__":
main()

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,293 @@
from __future__ import annotations
import base64
import math
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Sequence, Tuple
import numpy as np
YOLO_DEFAULT_ANCHORS: List[List[Tuple[float, float]]] = [
[(10.0, 14.0), (23.0, 27.0), (37.0, 58.0)],
[(81.0, 82.0), (135.0, 169.0), (344.0, 319.0)],
]
@dataclass
class Box:
cls: int
score: float
x1: float
y1: float
x2: float
y2: float
def _sigmoid(v: np.ndarray | float) -> np.ndarray | float:
return 1.0 / (1.0 + np.exp(-v))
def decode_outputs(raw_outputs: Sequence[Dict[str, Any]]) -> List[Dict[str, Any]]:
decoded: List[Dict[str, Any]] = []
for idx, o in enumerate(raw_outputs):
shape = list(o.get("shape") or [])
data_b64 = str(o.get("data_base64") or "")
raw = base64.b64decode(data_b64)
arr = np.frombuffer(raw, dtype="<f4")
expected = int(np.prod(shape)) if shape else arr.size
if expected != arr.size:
raise RuntimeError(f"Output node {idx} size mismatch: expected={expected}, got={arr.size}")
decoded.append(
{
"idx": idx,
"node_idx": int(o.get("node_idx", idx)),
"shape": shape,
"data": arr,
}
)
return decoded
def _pick_yolo_nodes(all_nodes: Sequence[Dict[str, Any]], num_classes: int) -> List[Dict[str, Any]]:
picked: List[Dict[str, Any]] = []
for o in all_nodes:
shape = o["shape"]
if len(shape) != 4 or shape[0] != 1:
continue
ch = int(shape[1])
if ch % (5 + num_classes) != 0:
continue
picked.append(o)
picked.sort(key=lambda n: int(n["shape"][2]), reverse=True)
return picked
def decode_yolo_common(
all_nodes: Sequence[Dict[str, Any]],
mode: str,
num_classes: int,
input_w: int,
input_h: int,
conf_th: float,
use_sigmoid: bool = True,
use_xy_sigmoid: bool = True,
score_mode: str = "obj_cls",
anchors_by_level: Optional[List[List[Tuple[float, float]]]] = None,
) -> List[Box]:
nodes = _pick_yolo_nodes(all_nodes, num_classes)
if not nodes:
raise RuntimeError("No YOLO-like [1,C,H,W] output nodes found")
anchors_levels = anchors_by_level or YOLO_DEFAULT_ANCHORS
boxes: List[Box] = []
attrs = 5 + num_classes
for lv, o in enumerate(nodes):
_, ch, gh, gw = o["shape"]
na = int(ch // attrs)
data: np.ndarray = o["data"]
anchors = anchors_levels[min(lv, len(anchors_levels) - 1)]
def at(channel_idx: int, y: int, x: int) -> float:
return float(data[channel_idx * gh * gw + y * gw + x])
for a in range(na):
aw, ah = anchors[min(a, len(anchors) - 1)]
base = a * attrs
for y in range(gh):
for x in range(gw):
tx = at(base + 0, y, x)
ty = at(base + 1, y, x)
tw = at(base + 2, y, x)
th = at(base + 3, y, x)
to = at(base + 4, y, x)
obj = float(_sigmoid(to) if use_sigmoid else to)
best_cls = -1
best_prob = -1e9
for k in range(num_classes):
p = at(base + 5 + k, y, x)
p = float(_sigmoid(p) if use_sigmoid else p)
if p > best_prob:
best_prob = p
best_cls = k
if score_mode == "obj":
score = obj
elif score_mode == "cls":
score = best_prob
else:
score = obj * best_prob
if score < conf_th:
continue
if mode == "yolov5":
sx = input_w / gw
sy = input_h / gh
txv = float(_sigmoid(tx) if use_xy_sigmoid else tx)
tyv = float(_sigmoid(ty) if use_xy_sigmoid else ty)
bx = (txv * 2.0 - 0.5 + x) * sx
by = (tyv * 2.0 - 0.5 + y) * sy
bw = (float(_sigmoid(tw)) * 2.0) ** 2 * aw
bh = (float(_sigmoid(th)) * 2.0) ** 2 * ah
else:
txv = float(_sigmoid(tx) if use_xy_sigmoid else tx)
tyv = float(_sigmoid(ty) if use_xy_sigmoid else ty)
bx = (txv + x) / gw * input_w
by = (tyv + y) / gh * input_h
bw = aw * math.exp(tw)
bh = ah * math.exp(th)
boxes.append(
Box(
cls=best_cls,
score=score,
x1=bx - bw / 2.0,
y1=by - bh / 2.0,
x2=bx + bw / 2.0,
y2=by + bh / 2.0,
)
)
return boxes
def _auto_fcos_indices(all_nodes: Sequence[Dict[str, Any]], num_classes: int) -> List[Tuple[int, int, int, int]]:
valid = [o for o in all_nodes if len(o["shape"]) == 4 and o["shape"][0] == 1]
cls_nodes = [o for o in valid if int(o["shape"][1]) == num_classes]
reg_nodes = [o for o in valid if int(o["shape"][1]) == 4]
ctr_nodes = [o for o in valid if int(o["shape"][1]) == 1]
by_hw: Dict[Tuple[int, int], Dict[str, Dict[str, Any]]] = {}
for n in cls_nodes:
by_hw.setdefault((int(n["shape"][2]), int(n["shape"][3])), {})["cls"] = n
for n in reg_nodes:
by_hw.setdefault((int(n["shape"][2]), int(n["shape"][3])), {})["reg"] = n
for n in ctr_nodes:
by_hw.setdefault((int(n["shape"][2]), int(n["shape"][3])), {})["ctr"] = n
levels: List[Tuple[int, int, int, int]] = []
for (h, _w), items in by_hw.items():
if not {"cls", "reg", "ctr"}.issubset(items.keys()):
continue
levels.append(
(
h,
int(items["cls"]["node_idx"]),
int(items["reg"]["node_idx"]),
int(items["ctr"]["node_idx"]),
)
)
levels.sort(key=lambda x: x[0], reverse=True)
strides = [8, 16, 32, 64, 128]
return [
(cls_i, reg_i, ctr_i, strides[min(i, len(strides) - 1)])
for i, (_h, cls_i, reg_i, ctr_i) in enumerate(levels)
]
def decode_fcos(
all_nodes: Sequence[Dict[str, Any]],
num_classes: int,
input_w: int,
input_h: int,
conf_th: float,
use_sigmoid: bool = True,
score_mode: str = "obj_cls",
) -> List[Box]:
levels = _auto_fcos_indices(all_nodes, num_classes)
if not levels:
raise RuntimeError("Cannot auto match FCOS cls/reg/ctr nodes")
boxes: List[Box] = []
by_idx = {int(n["node_idx"]): n for n in all_nodes}
for cls_idx, reg_idx, ctr_idx, stride in levels:
cls_node = by_idx.get(cls_idx)
reg_node = by_idx.get(reg_idx)
ctr_node = by_idx.get(ctr_idx)
if not cls_node or not reg_node or not ctr_node:
continue
gh = int(cls_node["shape"][2])
gw = int(cls_node["shape"][3])
cls_data: np.ndarray = cls_node["data"]
reg_data: np.ndarray = reg_node["data"]
ctr_data: np.ndarray = ctr_node["data"]
def at(node_data: np.ndarray, channel_idx: int, y: int, x: int) -> float:
return float(node_data[channel_idx * gh * gw + y * gw + x])
cls_channels = int(cls_node["shape"][1])
for y in range(gh):
for x in range(gw):
ctr = at(ctr_data, 0, y, x)
ctr = float(_sigmoid(ctr) if use_sigmoid else ctr)
best_cls = -1
best_prob = -1e9
for k in range(min(num_classes, cls_channels)):
p = at(cls_data, k, y, x)
p = float(_sigmoid(p) if use_sigmoid else p)
if p > best_prob:
best_prob = p
best_cls = k
if score_mode == "obj":
score = ctr
elif score_mode == "cls":
score = best_prob
else:
score = math.sqrt(max(0.0, best_prob * ctr))
if score < conf_th:
continue
l = max(0.0, at(reg_data, 0, y, x))
t = max(0.0, at(reg_data, 1, y, x))
r = max(0.0, at(reg_data, 2, y, x))
b = max(0.0, at(reg_data, 3, y, x))
cx = (x + 0.5) * stride
cy = (y + 0.5) * stride
x1 = max(0.0, min(input_w, cx - l))
y1 = max(0.0, min(input_h, cy - t))
x2 = max(0.0, min(input_w, cx + r))
y2 = max(0.0, min(input_h, cy + b))
if x2 <= x1 or y2 <= y1:
continue
boxes.append(Box(cls=best_cls, score=score, x1=x1, y1=y1, x2=x2, y2=y2))
return boxes
def _iou(a: Box, b: Box) -> float:
xx1 = max(a.x1, b.x1)
yy1 = max(a.y1, b.y1)
xx2 = min(a.x2, b.x2)
yy2 = min(a.y2, b.y2)
w = max(0.0, xx2 - xx1)
h = max(0.0, yy2 - yy1)
inter = w * h
if inter <= 0:
return 0.0
area_a = max(0.0, a.x2 - a.x1) * max(0.0, a.y2 - a.y1)
area_b = max(0.0, b.x2 - b.x1) * max(0.0, b.y2 - b.y1)
return inter / max(1e-9, area_a + area_b - inter)
def nms(boxes: Sequence[Box], iou_th: float, max_out: int) -> List[Box]:
by_cls: Dict[int, List[Box]] = {}
for b in boxes:
by_cls.setdefault(b.cls, []).append(b)
kept: List[Box] = []
for cls_boxes in by_cls.values():
cls_boxes = sorted(cls_boxes, key=lambda b: b.score, reverse=True)
picked: List[Box] = []
while cls_boxes:
cur = cls_boxes.pop(0)
picked.append(cur)
cls_boxes = [b for b in cls_boxes if _iou(cur, b) <= iou_th]
kept.extend(picked)
kept.sort(key=lambda b: b.score, reverse=True)
return kept[:max_out]

Binary file not shown.

View File

@ -0,0 +1,544 @@
# Kneron Dongle PoC (Windows) - Strategy
## Scope (PoC)
- OS: Windows only.
- Devices: KL520, KL720.
- Control path: Browser -> localhost HTTP service -> KneronPLUS (kp wrapper + DLL).
- Non-goals: macOS/Linux support, production hardening, installer automation for all platforms.
## Required Installation (Windows)
Before running the local service, install Python dependencies and the KneronPLUS wheel.
### 1. Install dependencies from requirements
```powershell
cd local_service_win
python -m pip install -r requirements.txt
```
### 2. Install KneronPLUS wheel
```powershell
cd local_service_win
python -m pip install .\KneronPLUS-3.1.2-py3-none-any.whl
```
### 3. (Optional) Force reinstall KneronPLUS wheel
Use this when switching versions or seeing package mismatch issues.
```powershell
cd local_service_win
python -m pip install --force-reinstall .\KneronPLUS-3.1.2-py3-none-any.whl
```
## Dependency Strategy
- Open-source packages installed by pip:
- `fastapi`, `uvicorn`, `numpy`, `PyQt5`, `opencv-python`, `pyinstaller`, `pyarmor`
- Non-pip dependency:
- `KneronPLUS` (installed from local wheel)
- Bundled runtime (not pip):
- `third_party/Kneron_DFUT` is copied into this repo and used by LocalAPI to recover old firmware in one tool.
- `third_party/kneron_plus_1_2_1/dist` is extracted from `KneronPLUS-1.2.1` wheel and used by a subprocess runner for old-device firmware update experiments.
## Cross-Project Workflow
This repo is the main PoC implementation. If additional references are required, we can switch to
other repos during the same conversation and return here as needed. This is workable.
## High-Level Architecture
- Browser UI
- Talks to localhost HTTP service for control APIs.
- Uses WebSocket for streaming inference.
- No direct USB access from browser.
- Local Service (Windows)
- Owns Kneron device lifecycle and IO.
- Uses Python `kp` high-level API (backed by `libkplus.dll`).
- Exposes HTTP endpoints for scan/connect/model/firmware/inference.
- KneronPLUS Runtime
- `kp` Python wrapper + DLLs + required USB driver.
- Version pinned inside installer to avoid mismatches.
## API Spec (PoC)
### Conventions
- Base URL: `http://127.0.0.1:4398`
- WebSocket URL: `ws://127.0.0.1:4398/ws`
- Response envelope:
```json
{
"ok": true,
"data": {},
"error": null
}
```
```json
{
"ok": false,
"data": null,
"error": { "code": "KP_ERROR_CONNECT_FAILED", "message": "..." }
}
```
### `GET /health`
Response
```json
{ "ok": true, "data": { "status": "up" }, "error": null }
```
### `GET /version`
Response
```json
{
"ok": true,
"data": {
"service_version": "0.1.0",
"kneronplus_version": "3.0.0"
},
"error": null
}
```
### `GET /devices`
Response
```json
{
"ok": true,
"data": {
"devices": [
{
"scan_index": 0,
"usb_port_id": 32,
"product_id": 0x520,
"link_speed": "High-Speed",
"usb_port_path": "1-3",
"kn_number": 12345,
"is_connectable": true,
"firmware": "KDP2"
}
]
},
"error": null
}
```
### `POST /devices/connect`
Request
```json
{ "port_id": 32 }
```
Response
```json
{
"ok": true,
"data": {
"connected": true,
"port_id": 32
},
"error": null
}
```
### `POST /devices/connect_force`
Notes
- Force connection without firmware validation.
- Use this when firmware is incompatible and you need to call `/firmware/load` first.
Request
```json
{ "port_id": 32 }
```
Response
```json
{
"ok": true,
"data": {
"connected": true,
"port_id": 32,
"forced": true
},
"error": null
}
```
### `POST /devices/disconnect`
Response
```json
{ "ok": true, "data": { "connected": false }, "error": null }
```
### `GET /driver/check`
Notes
- Query currently connected Kneron USB devices from Windows PnP.
- Reports whether each entry is bound to WinUSB.
Response
```json
{
"ok": true,
"data": {
"entries": [
{
"pnp_device_id": "USB\\VID_3231&PID_0720\\...",
"service": "WinUSB",
"pid_hex": "0x0720",
"product_name": "KL720",
"is_winusb": true
}
],
"all_connected_kneron_are_winusb": true
},
"error": null
}
```
### `POST /driver/install`
Notes
- Install/replace driver using `kp.core.install_driver_for_windows`.
- Requires Administrator privilege on Windows.
Request
```json
{ "target": "KL720", "force": false }
```
### `POST /driver/ensure`
Notes
- Check connected device driver binding, auto install if not WinUSB (or `force=true`).
- `target`: `ALL` | `KL520` | `KL720` | `KL630` | `KL730` | `KL830`
Request
```json
{ "target": "ALL", "force": false }
```
### `POST /firmware/load`
Request
```json
{
"scpu_path": "C:\\path\\fw_scpu.bin",
"ncpu_path": "C:\\path\\fw_ncpu.bin"
}
```
Response
```json
{ "ok": true, "data": { "loaded": true }, "error": null }
```
### `POST /firmware/legacy-plus121/load`
Notes
- Experimental endpoint for old hardware/firmware path.
- Independent route from DFUT.
- Runs a subprocess with bundled `KneronPLUS 1.2.1` package and calls `libkplus.dll` directly by `ctypes`.
- Single-endpoint auto flow:
- Scan target device firmware state.
- If firmware is `KDP`: first call loader (`fw_loader.bin`) to switch USB-boot, then call `kp_load_firmware_from_file(scpu,ncpu)`.
- If firmware is not `KDP`: call `kp_load_firmware_from_file(scpu,ncpu)` directly.
- Finally disconnect.
- Diagnostics include selected port, detected firmware, scan snapshot, and firmware file metadata (path/size/sha256).
Request
```json
{
"port_id": 32,
"loader_path": "C:\\path\\fw_loader.bin",
"scpu_path": "C:\\path\\fw_scpu.bin",
"ncpu_path": "C:\\path\\fw_ncpu.bin"
}
```
Response
```json
{
"ok": true,
"data": {
"loaded": true,
"legacy_plus_version": "1.2.1"
},
"error": null
}
```
### `POST /firmware/legacy-upgrade/kl520`
Notes
- Used for old KL520 firmware recovery path.
- Runs bundled DFUT console with `--kl520-update`.
Request
```json
{ "port_id": 32 }
```
Response
```json
{
"ok": true,
"data": {
"upgraded": true,
"target": "KL520",
"port_id": 32
},
"error": null
}
```
### `POST /firmware/legacy-upgrade/kl720`
Notes
- Used for old KL720 / KL720 legacy recovery path.
- Runs bundled DFUT console with `--kl720-update`.
Request
```json
{ "port_id": 32 }
```
Response
```json
{
"ok": true,
"data": {
"upgraded": true,
"target": "KL720",
"port_id": 32
},
"error": null
}
```
### `POST /models/load`
Request
```json
{ "nef_path": "C:\\path\\model.nef" }
```
Response
```json
{
"ok": true,
"data": {
"model_id": 1,
"input_tensor_count": 1,
"output_tensor_count": 1
},
"error": null
}
```
### `POST /models/clear`
Notes
- PoC uses device reset to clear RAM model.
Response
```json
{ "ok": true, "data": { "cleared": true }, "error": null }
```
### `POST /models/reset`
Notes
- Alias of `/models/clear`, uses device reset to clear RAM model.
Response
```json
{ "ok": true, "data": { "reset": true }, "error": null }
```
### `POST /inference/run`
Request (image inference, single image)
```json
{
"model_id": 1,
"image_format": "RGB888",
"width": 224,
"height": 224,
"image_base64": "..."
}
```
Response
```json
{
"ok": true,
"data": {
"outputs": [
{ "node_idx": 0, "dtype": "float", "shape": [1, 1000], "data_base64": "..." }
]
},
"error": null
}
```
### `POST /inference/run_video`
Notes
- Video file upload endpoint for continuous inference in PoC.
- Response is NDJSON stream (`application/x-ndjson`), one JSON object per processed frame.
- ByteTrack-specific tracking output is out of scope for current PoC; this endpoint returns raw model outputs per frame.
Request (`multipart/form-data`)
- `file`: video file (`.mp4/.avi/...`)
- `model_id`: integer
- `image_format`: `RGB565` | `RGBA8888` | `RAW8`
- `channels_ordering`: optional, default `DEFAULT`
- `output_dtype`: optional, default `float32`
- `sample_every_n`: optional, default `1`
- `max_frames`: optional
Response line example (NDJSON)
```json
{
"ok": true,
"data": {
"frame_index": 0,
"width": 640,
"height": 640,
"outputs": [
{ "node_idx": 0, "dtype": "float32", "shape": [1, 255, 80, 80], "data_base64": "..." }
]
},
"error": null
}
```
### `GET /tools/video-inference`
Notes
- Serves a single-page visual test tool from LocalAPI.
- Supports two input sources:
- Video file
- Webcam (browser `getUserMedia`)
- Frontend calls `POST /inference/run` frame-by-frame and draws decoded boxes on canvas.
- Purpose: PoC visual validation for YOLOv5/FCOS/TinyYOLO style models.
- ByteTrack visualization/tracking is intentionally excluded in current phase.
### `WS /ws` (streaming inference)
Notes
- For camera/video stream, use WebSocket for low-latency send/receive.
- HTTP endpoints remain for control operations during PoC.
Message (client -> server)
```json
{
"type": "inference_frame",
"model_id": 1,
"image_format": "RGB888",
"width": 224,
"height": 224,
"image_base64": "..."
}
```
Message (server -> client)
```json
{
"type": "inference_result",
"outputs": [
{ "node_idx": 0, "dtype": "float", "shape": [1, 1000], "data_base64": "..." }
]
}
```
### `POST /firmware/update`
- Reserved for flash update (later; may need C wrapper).
## Packaging (PoC)
- Single Windows installer:
- Includes driver, `kp` wrapper, DLLs, and service.
- Ensures fixed versions (no external Kneron tools required).
- Reference from `C:\Users\user\Documents\KNEOX\README.md`:
- Install KneronPLUS wheel from `external/kneron_plus_{version}/package/{platform}/`
- `pip install KneronPLUS-{version}-py3-none-any.whl` (use `--force-reinstall` if needed)
- PyInstaller must bundle `kp\lib` with the app.
- Example:
```shell
pyinstaller --onefile --windowed main.py --additional-hooks-dir=hooks --add-data "uxui;uxui" --add-data "src;src" --add-data "C:\path\to\venv\Lib\site-packages\kp\lib;kp\lib"
```
## Risks / Constraints
- Flash model update / flash firmware update may not be exposed in Python.
- Use C library or request Kneron to expose in wrapper if required.
- Browser security model prevents direct USB access; local service is required.
- Driver install/update on Windows may require Administrator privileges (`install_driver_for_windows` can fail without elevation).
- MEMO: define production approach for privilege handling (installer-time elevation, helper process with UAC prompt, or enterprise pre-install policy) so end-user flow does not get blocked.
## API Test Progress (Windows PoC)
Updated: 2026-03-04
### Completed
- `GET /health`
- `GET /version`
- `GET /devices`
- `POST /devices/connect`
- `POST /devices/connect_force`
- `POST /devices/disconnect`
- `GET /driver/check`
- `POST /driver/install`
- `POST /driver/ensure`
- `POST /firmware/load`
- `POST /firmware/legacy-plus121/load`
- `POST /firmware/legacy-upgrade/kl520`
- `POST /firmware/legacy-downgrade/kl520`
- `POST /firmware/legacy-upgrade/kl720`
- `POST /models/load`
- `POST /models/clear`
- `POST /models/reset`
- `POST /inference/run`
- `POST /inference/run_video`
- `GET /tools/video-inference`
### Pending
- None (for currently implemented HTTP endpoints).
### Not Implemented Yet (API spec)
- `WS /ws`
- `POST /firmware/update`
### Paired Test Requirement
- `POST /models/load` and `POST /inference/run` must be tested as a pair in the same flow.
- Test pairs are defined in `local_service_win/TestRes/TEST_PAIRS.md`.
### Video/Webcam PoC Test Flow
1. Start LocalAPI service.
2. Connect device and load model:
- `POST /devices/connect`
- `POST /models/load`
3. Visual tool path:
- Open `http://127.0.0.1:4398/tools/video-inference`
- Select source (`Video File` or `Webcam`)
- Use default model presets (YOLOv5=20005, FCOS=20004, TinyYOLO=19), then click `Start`
4. API-only path:
- Use `POST /inference/run_video` with `multipart/form-data`
- Start with small values: `sample_every_n=3`, `max_frames=30`
5. Expected:
- Continuous frame-wise inference results are returned.
- Visual page overlays detection boxes on displayed frames.
6. Current scope note:
- ByteTrack tracking output (`track_id` continuity) is not covered in this PoC phase.
### Model/Inference Test Pairs
#### KL520
1. YOLOv5 (model zoo)
- Model: `kl520_20005_yolov5-noupsample_w640h640.nef`
- Image: `one_bike_many_cars_800x800` (Base64)
2. FCOS (model zoo)
- Model: `kl520_20004_fcos-drk53s_w512h512.nef`
- Image: `one_bike_many_cars_800x800` (Base64)
3. Tiny YOLO v3 (generic demo)
- Model: `models_520.nef`
- Image: `bike_cars_street_224x224` (Base64)
4. Tiny YOLO v3 (multithread demo)
- Model: `models_520.nef`
- Image: `bike_cars_street_224x224` (Base64)
#### KL720
1. YOLOv5 (model zoo)
- Model: `kl720_20005_yolov5-noupsample_w640h640.nef`
- Image: `one_bike_many_cars_800x800` (Base64)
2. FCOS (model zoo)
- Model: `kl720_20004_fcos-drk53s_w512h512.nef`
- Image: `one_bike_many_cars_800x800` (Base64)
## Next Steps (After Strategy)
- Confirm endpoint payloads (JSON schema).
- Decide service framework (FastAPI/Flask).
- Define error model and device state machine.
- Plan installer workflow (driver + service).
## Legacy Firmware Story And Recommended Handling
- Background:
- Many shipped devices are still on old KDP firmware or KL720 legacy states.
- In that state, `kp.core.connect_devices` and even `connect_devices_without_check` may still return `KP_ERROR_INVALID_FIRMWARE_24`.
- Goal:
- Keep user operations in one tool without requiring a separate DFUT install flow.
- Recommended handling:
1. User scans devices via `GET /devices`.
2. If normal connect fails with `KP_ERROR_INVALID_FIRMWARE_24`, call:
- `POST /firmware/legacy-upgrade/kl520` or
- `POST /firmware/legacy-upgrade/kl720`
3. Re-scan and reconnect using `POST /devices/connect`.
4. Continue with `POST /firmware/load` (if needed), `POST /models/load`, and inference.
- Experimental alternative:
- If DFUT route is blocked on specific old-device states, test `POST /firmware/legacy-plus121/load` as an independent non-DFUT legacy SDK compatibility path.
- Notes:
- Recovery endpoints use bundled `third_party/Kneron_DFUT/bin/KneronDFUT.exe`.
- This keeps firmware recovery and inference service in the same product boundary.
## Validation Memo (Next)
- Record and verify on newer KneronPLUS versions:
- For KL520 old KDP state, `loader -> load_firmware_from_file(scpu,ncpu)` sequence works in legacy-plus121 path.
- Hypothesis: the same sequence may also work on newer PLUS runtime.
- Action later: add an explicit validation task on current PLUS branch and capture pass/fail matrix by device FW state.

View File

@ -0,0 +1,271 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>BMP to RGB565</title>
<style>
body {
font-family: Arial, sans-serif;
margin: 16px;
background: #f7f7f7;
}
.container {
max-width: 1000px;
margin: 0 auto;
}
.controls {
margin: 12px 0;
display: flex;
flex-wrap: wrap;
gap: 8px;
align-items: center;
}
textarea {
width: 100%;
min-height: 140px;
padding: 12px;
box-sizing: border-box;
font-family: Consolas, monospace;
}
img {
max-width: 100%;
border: 1px solid #ddd;
background: #fff;
display: block;
margin: 8px 0;
}
.hint {
color: #555;
font-size: 13px;
}
.meta {
margin: 8px 0;
font-family: Consolas, monospace;
background: #fff;
border: 1px solid #ddd;
padding: 8px;
}
.row {
margin-top: 10px;
}
button {
padding: 6px 10px;
}
label {
font-size: 13px;
}
input[type="number"] {
width: 120px;
}
</style>
</head>
<body>
<div class="container">
<h1>BMP to RGB565 (Raw)</h1>
<p class="hint">
Select a BMP file, convert pixels to RGB565 raw bytes (little-endian), then copy Base64 for
<code>/inference/run</code>.
</p>
<div class="controls">
<input id="fileInput" type="file" accept=".bmp,image/bmp" />
<label>Model Preset:</label>
<select id="modelPreset">
<option value="tiny_yolo">TinyYOLO (KL520)</option>
<option value="yolov5">YOLOv5 (KL520/KL720)</option>
<option value="fcos">FCOS (KL520/KL720)</option>
</select>
<label>Target W:</label>
<input id="targetWidth" type="number" min="1" placeholder="original" />
<label>Target H:</label>
<input id="targetHeight" type="number" min="1" placeholder="original" />
<button id="convertBtn">Convert</button>
<button id="clearBtn">Clear</button>
</div>
<img id="preview" alt="Preview will appear here" />
<div class="meta" id="meta">No file loaded.</div>
<div class="row">
<label for="base64Output">RGB565 Base64 (raw bytes)</label>
<textarea id="base64Output" placeholder="RGB565 base64 output..."></textarea>
</div>
<div class="controls">
<label>model_id:</label>
<input id="modelId" type="number" value="19" />
<button id="copyB64Btn">Copy Base64</button>
<button id="copyPayloadBtn">Copy Payload JSON</button>
</div>
<div class="row">
<label for="payloadOutput">Payload sample</label>
<textarea id="payloadOutput" placeholder="Payload JSON..."></textarea>
</div>
</div>
<script>
const fileInput = document.getElementById("fileInput");
const convertBtn = document.getElementById("convertBtn");
const clearBtn = document.getElementById("clearBtn");
const copyB64Btn = document.getElementById("copyB64Btn");
const copyPayloadBtn = document.getElementById("copyPayloadBtn");
const modelIdInput = document.getElementById("modelId");
const modelPresetSelect = document.getElementById("modelPreset");
const targetWidthInput = document.getElementById("targetWidth");
const targetHeightInput = document.getElementById("targetHeight");
const preview = document.getElementById("preview");
const meta = document.getElementById("meta");
const base64Output = document.getElementById("base64Output");
const payloadOutput = document.getElementById("payloadOutput");
const modelPresets = {
tiny_yolo: { modelId: 19, width: 224, height: 224, label: "TinyYOLO (KL520)" },
yolov5: { modelId: 20005, width: 640, height: 640, label: "YOLOv5" },
fcos: { modelId: 20004, width: 512, height: 512, label: "FCOS" }
};
let currentWidth = 0;
let currentHeight = 0;
function toBase64(uint8Array) {
let binary = "";
const chunkSize = 0x8000;
for (let i = 0; i < uint8Array.length; i += chunkSize) {
const chunk = uint8Array.subarray(i, i + chunkSize);
binary += String.fromCharCode.apply(null, chunk);
}
return btoa(binary);
}
function rgbTo565(r, g, b) {
// 5 bits red, 6 bits green, 5 bits blue
const r5 = (r >> 3) & 0x1f;
const g6 = (g >> 2) & 0x3f;
const b5 = (b >> 3) & 0x1f;
return (r5 << 11) | (g6 << 5) | b5;
}
function buildPayload(base64Value, width, height) {
const payload = {
model_id: Number(modelIdInput.value || 0),
image_format: "RGB565",
width: width,
height: height,
image_base64: base64Value,
channels_ordering: "DEFAULT",
output_dtype: "float32"
};
payloadOutput.value = JSON.stringify(payload, null, 2);
}
function setMeta(message) {
meta.textContent = message;
}
function applyModelPreset(presetKey) {
const preset = modelPresets[presetKey];
if (!preset) return;
modelIdInput.value = String(preset.modelId);
targetWidthInput.value = String(preset.width);
targetHeightInput.value = String(preset.height);
setMeta(
`preset=${preset.label}, model_id=${preset.modelId}, target=${preset.width}x${preset.height}`
);
if (base64Output.value && currentWidth > 0 && currentHeight > 0) {
buildPayload(base64Output.value, currentWidth, currentHeight);
}
}
async function convert() {
const file = fileInput.files && fileInput.files[0];
if (!file) {
setMeta("Please select a BMP file first.");
return;
}
const objectUrl = URL.createObjectURL(file);
const img = new Image();
img.onload = () => {
const srcWidth = img.width;
const srcHeight = img.height;
const tw = Number(targetWidthInput.value);
const th = Number(targetHeightInput.value);
const hasTarget = Number.isFinite(tw) && Number.isFinite(th) && tw > 0 && th > 0;
currentWidth = hasTarget ? Math.floor(tw) : srcWidth;
currentHeight = hasTarget ? Math.floor(th) : srcHeight;
const canvas = document.createElement("canvas");
canvas.width = currentWidth;
canvas.height = currentHeight;
const ctx = canvas.getContext("2d");
ctx.drawImage(img, 0, 0, currentWidth, currentHeight);
const imageData = ctx.getImageData(0, 0, currentWidth, currentHeight).data;
const out = new Uint8Array(currentWidth * currentHeight * 2);
let p = 0;
for (let i = 0; i < imageData.length; i += 4) {
const r = imageData[i];
const g = imageData[i + 1];
const b = imageData[i + 2];
const v565 = rgbTo565(r, g, b);
// little-endian bytes
out[p++] = v565 & 0xff;
out[p++] = (v565 >> 8) & 0xff;
}
const b64 = toBase64(out);
base64Output.value = b64;
buildPayload(b64, currentWidth, currentHeight);
preview.src = objectUrl;
setMeta(
`file=${file.name}, src=${srcWidth}x${srcHeight}, out=${currentWidth}x${currentHeight}, rgb565_bytes=${out.length}, expected=${
currentWidth * currentHeight * 2
}`
);
};
img.onerror = () => {
setMeta("Failed to decode image. Ensure the file is a valid BMP.");
URL.revokeObjectURL(objectUrl);
};
img.src = objectUrl;
}
function clearAll() {
fileInput.value = "";
base64Output.value = "";
payloadOutput.value = "";
preview.removeAttribute("src");
currentWidth = 0;
currentHeight = 0;
setMeta("No file loaded.");
}
convertBtn.addEventListener("click", convert);
clearBtn.addEventListener("click", clearAll);
modelPresetSelect.addEventListener("change", () => {
applyModelPreset(modelPresetSelect.value);
});
modelIdInput.addEventListener("change", () => {
if (base64Output.value && currentWidth > 0 && currentHeight > 0) {
buildPayload(base64Output.value, currentWidth, currentHeight);
}
});
copyB64Btn.addEventListener("click", async () => {
if (!base64Output.value) return;
await navigator.clipboard.writeText(base64Output.value);
});
copyPayloadBtn.addEventListener("click", async () => {
if (!payloadOutput.value) return;
await navigator.clipboard.writeText(payloadOutput.value);
});
applyModelPreset(modelPresetSelect.value);
</script>
</body>
</html>

Binary file not shown.

View File

@ -0,0 +1,955 @@
<!DOCTYPE html>
<html lang="zh-Hant">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Payload Detection Viewer (YOLO/TinyYOLO/FCOS)</title>
<style>
:root {
--bg: #f5f6f8;
--card: #ffffff;
--text: #1f2937;
--muted: #6b7280;
--line: #d1d5db;
--accent: #2563eb;
}
body {
margin: 0;
background: var(--bg);
color: var(--text);
font-family: Arial, sans-serif;
}
.wrap {
max-width: 1300px;
margin: 20px auto;
padding: 0 12px 24px;
}
h1 {
margin: 0 0 8px;
}
.hint {
color: var(--muted);
font-size: 13px;
margin: 0 0 12px;
}
.panel {
background: var(--card);
border: 1px solid var(--line);
border-radius: 10px;
padding: 12px;
margin-bottom: 12px;
}
textarea {
width: 100%;
min-height: 200px;
box-sizing: border-box;
padding: 12px;
border: 1px solid var(--line);
border-radius: 8px;
background: #fff;
font-family: Consolas, monospace;
}
.row {
display: flex;
flex-wrap: wrap;
gap: 8px;
align-items: center;
margin-bottom: 10px;
}
.row label {
font-size: 13px;
}
input[type="number"], input[type="text"], select {
padding: 4px 6px;
}
input[type="number"] {
width: 95px;
}
input.wide {
width: 360px;
}
.btn {
border: 1px solid var(--line);
background: #fff;
color: var(--text);
padding: 8px 12px;
border-radius: 8px;
cursor: pointer;
}
.btn.primary {
background: var(--accent);
border-color: var(--accent);
color: #fff;
}
.error {
color: #b91c1c;
background: #fef2f2;
border: 1px solid #fecaca;
border-radius: 8px;
padding: 8px 10px;
white-space: pre-wrap;
}
.grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 10px;
}
.subcard {
border: 1px solid var(--line);
border-radius: 8px;
background: #fff;
padding: 8px;
}
.subcard-title {
font-size: 13px;
font-weight: 700;
margin-bottom: 6px;
}
canvas {
border: 1px solid var(--line);
border-radius: 6px;
max-width: 100%;
height: auto;
background: #fff;
}
.stats {
color: var(--muted);
font-size: 12px;
margin-top: 6px;
white-space: pre-wrap;
}
table {
width: 100%;
border-collapse: collapse;
font-size: 12px;
}
th, td {
border: 1px solid var(--line);
padding: 4px 6px;
text-align: left;
}
@media (max-width: 980px) {
.grid {
grid-template-columns: 1fr;
}
}
</style>
</head>
<body>
<div class="wrap">
<h1>Payload Detection Viewer</h1>
<p class="hint">
專為 POC手動選擇模型 (YOLOv5 / TinyYOLOv3 / FCOS) 後處理,將 payload 推論結果畫成框 + 類別 + 分數。
</p>
<div class="panel">
<div class="row">
<button id="parseBtn" class="btn primary">Parse Payload</button>
<button id="clearBtn" class="btn">Clear</button>
<label>原圖:</label>
<input id="imgInput" type="file" accept="image/*" />
<button id="clearImgBtn" class="btn">Clear Image</button>
</div>
<textarea id="payloadInput" placeholder="貼上完整 payload JSON..."></textarea>
</div>
<div id="errorBox" class="error" style="display:none"></div>
<div class="panel">
<div class="row">
<label>模型類型:</label>
<select id="modelType">
<option value="tinyyolo">TinyYOLOv3</option>
<option value="yolov5">YOLOv5</option>
<option value="fcos">FCOS</option>
</select>
<label>class 數:</label>
<input id="numClasses" type="number" min="1" value="80" />
<label>score 閾值:</label>
<input id="scoreTh" type="number" min="0" max="1" step="0.01" value="0.25" />
<label>NMS IoU:</label>
<input id="nmsTh" type="number" min="0" max="1" step="0.01" value="0.45" />
<label>max boxes:</label>
<input id="maxBoxes" type="number" min="1" value="200" />
</div>
<div class="row">
<label>score mode:</label>
<select id="scoreMode">
<option value="obj_cls">obj * cls</option>
<option value="obj">obj only</option>
<option value="cls">cls only</option>
</select>
<label><input id="objClsSigmoid" type="checkbox" checked /> obj/cls sigmoid</label>
<label><input id="yoloXySigmoid" type="checkbox" checked /> YOLO x/y sigmoid</label>
<button id="presetTinyBtn" class="btn">Preset TinyYOLO</button>
<button id="presetYolo5Btn" class="btn">Preset YOLOv5</button>
<button id="presetFcosBtn" class="btn">Preset FCOS</button>
</div>
<div class="row">
<label>模型輸入寬:</label>
<input id="inW" type="number" min="1" value="224" />
<label>模型輸入高:</label>
<input id="inH" type="number" min="1" value="224" />
<label>YOLO anchors:</label>
<input id="anchors" class="wide" type="text" value="10,14|23,27|37,58;81,82|135,169|344,319" />
</div>
<div class="row">
<label>FCOS class node idx(逗號):</label>
<input id="fcosClsIdx" class="wide" type="text" value="" placeholder="例如: 0,3,6" />
<label>FCOS reg node idx(逗號):</label>
<input id="fcosRegIdx" class="wide" type="text" value="" placeholder="例如: 1,4,7" />
<label>FCOS ctr node idx(逗號):</label>
<input id="fcosCtrIdx" class="wide" type="text" value="" placeholder="例如: 2,5,8" />
<button id="autoFcosIdxBtn" class="btn">Auto Fill FCOS idx</button>
</div>
<div class="row">
<label>FCOS strides:</label>
<input id="fcosStrides" class="wide" type="text" value="8,16,32,64,128" />
<button id="runBtn" class="btn primary">Decode + Draw</button>
</div>
</div>
<div class="grid">
<div class="subcard">
<div class="subcard-title">Detection Overlay</div>
<canvas id="overlayCanvas" width="1" height="1"></canvas>
<div id="overlayStats" class="stats"></div>
</div>
<div class="subcard">
<div class="subcard-title">Top Boxes</div>
<div style="max-height:420px; overflow:auto">
<table>
<thead>
<tr>
<th>#</th><th>cls</th><th>score</th><th>x1</th><th>y1</th><th>x2</th><th>y2</th>
</tr>
</thead>
<tbody id="boxTableBody"></tbody>
</table>
</div>
<div id="decodeStats" class="stats"></div>
<div id="debugStats" class="stats"></div>
</div>
</div>
</div>
<script>
const parseBtn = document.getElementById("parseBtn");
const clearBtn = document.getElementById("clearBtn");
const runBtn = document.getElementById("runBtn");
const clearImgBtn = document.getElementById("clearImgBtn");
const payloadInput = document.getElementById("payloadInput");
const imgInput = document.getElementById("imgInput");
const errorBox = document.getElementById("errorBox");
const overlayCanvas = document.getElementById("overlayCanvas");
const overlayStats = document.getElementById("overlayStats");
const decodeStats = document.getElementById("decodeStats");
const debugStats = document.getElementById("debugStats");
const boxTableBody = document.getElementById("boxTableBody");
const modelType = document.getElementById("modelType");
const numClasses = document.getElementById("numClasses");
const scoreTh = document.getElementById("scoreTh");
const nmsTh = document.getElementById("nmsTh");
const maxBoxes = document.getElementById("maxBoxes");
const inW = document.getElementById("inW");
const inH = document.getElementById("inH");
const anchorsInput = document.getElementById("anchors");
const fcosClsIdx = document.getElementById("fcosClsIdx");
const fcosRegIdx = document.getElementById("fcosRegIdx");
const fcosCtrIdx = document.getElementById("fcosCtrIdx");
const fcosStrides = document.getElementById("fcosStrides");
const autoFcosIdxBtn = document.getElementById("autoFcosIdxBtn");
const scoreMode = document.getElementById("scoreMode");
const objClsSigmoid = document.getElementById("objClsSigmoid");
const yoloXySigmoid = document.getElementById("yoloXySigmoid");
const presetTinyBtn = document.getElementById("presetTinyBtn");
const presetYolo5Btn = document.getElementById("presetYolo5Btn");
const presetFcosBtn = document.getElementById("presetFcosBtn");
let outputs = [];
let srcImg = null;
function showError(msg) {
errorBox.style.display = "block";
errorBox.textContent = msg;
}
function clearError() {
errorBox.style.display = "none";
errorBox.textContent = "";
}
function sigmoid(x) {
return 1 / (1 + Math.exp(-x));
}
function maybeSigmoid(v, on) {
return on ? sigmoid(v) : v;
}
function fmt(v) {
if (!Number.isFinite(v)) return "nan";
return Number(v).toFixed(4);
}
function decodeBase64Float32(base64String) {
const binary = atob(String(base64String || "").trim());
const bytes = new Uint8Array(binary.length);
for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);
const len = Math.floor(bytes.byteLength / 4);
const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
const out = new Float32Array(len);
for (let i = 0; i < len; i++) out[i] = view.getFloat32(i * 4, true);
return out;
}
function arrProduct(a) {
let p = 1;
for (let i = 0; i < a.length; i++) p *= a[i];
return p;
}
function parsePayloadText() {
clearError();
const text = payloadInput.value.trim();
if (!text) throw new Error("請先貼 payload JSON");
let obj;
try {
obj = JSON.parse(text);
} catch (e) {
throw new Error("JSON parse failed: " + e.message);
}
const arr = obj?.data?.outputs || obj?.outputs;
if (!Array.isArray(arr) || arr.length === 0) {
throw new Error("找不到 outputs預期 payload.data.outputs");
}
const parsed = arr.map((o, i) => {
const shape = Array.isArray(o.shape) ? o.shape : [];
const data = decodeBase64Float32(o.data_base64);
const expected = shape.length ? arrProduct(shape) : data.length;
return {
idx: i,
node_idx: Number(o.node_idx ?? i),
shape,
data,
expected
};
});
outputs = parsed;
return parsed;
}
function parseAnchors(text) {
const levels = String(text || "").split(";").map(s => s.trim()).filter(Boolean);
return levels.map(level => level.split("|").map(pair => {
const [w, h] = pair.split(",").map(v => Number(v.trim()));
return [w, h];
}));
}
function pickYoloNodes(all) {
const c = Number(numClasses.value) || 80;
const picked = [];
for (const o of all) {
if (o.shape.length !== 4) continue;
if (o.shape[0] !== 1) continue;
const ch = o.shape[1];
if (ch % (5 + c) !== 0) continue;
picked.push(o);
}
picked.sort((a, b) => b.shape[2] - a.shape[2]);
return picked;
}
function decodeYoloCommon(all, mode) {
const c = Number(numClasses.value) || 80;
const confTh = Number(scoreTh.value) || 0.25;
const inpW = Number(inW.value) || 224;
const inpH = Number(inH.value) || 224;
const useSig = objClsSigmoid.checked;
const useXySig = yoloXySigmoid.checked;
const scoreModeValue = scoreMode.value;
const nodes = pickYoloNodes(all);
if (nodes.length === 0) throw new Error("??? YOLO ?? node????? [1,C,H,W] ? C ?? 5+classes ???");
const anchorLv = parseAnchors(anchorsInput.value);
const boxes = [];
const perLevel = [];
for (let lv = 0; lv < nodes.length; lv++) {
const o = nodes[lv];
const [, ch, gh, gw] = o.shape;
const attrs = 5 + c;
const na = Math.floor(ch / attrs);
const anchors = anchorLv[lv] || anchorLv[anchorLv.length - 1] || [];
let maxObj = -Infinity;
let maxCls = -Infinity;
let maxScore = -Infinity;
for (let a = 0; a < na; a++) {
const anchor = anchors[a] || [10, 10];
const aw = anchor[0];
const ah = anchor[1];
const baseA = a * attrs;
for (let y = 0; y < gh; y++) {
for (let x = 0; x < gw; x++) {
const idx = (ci) => {
const cidx = baseA + ci;
return cidx * gh * gw + y * gw + x;
};
const tx = o.data[idx(0)];
const ty = o.data[idx(1)];
const tw = o.data[idx(2)];
const th = o.data[idx(3)];
const to = o.data[idx(4)];
const obj = maybeSigmoid(to, useSig);
if (obj > maxObj) maxObj = obj;
let bestCls = -1;
let bestProb = -Infinity;
for (let k = 0; k < c; k++) {
const p = maybeSigmoid(o.data[idx(5 + k)], useSig);
if (p > bestProb) {
bestProb = p;
bestCls = k;
}
}
if (bestProb > maxCls) maxCls = bestProb;
let score;
if (scoreModeValue === "obj") score = obj;
else if (scoreModeValue === "cls") score = bestProb;
else score = obj * bestProb;
if (score > maxScore) maxScore = score;
if (score < confTh) continue;
let bx, by, bw, bh;
if (mode === "yolov5") {
const sx = inpW / gw;
const sy = inpH / gh;
const txv = useXySig ? sigmoid(tx) : tx;
const tyv = useXySig ? sigmoid(ty) : ty;
bx = (txv * 2 - 0.5 + x) * sx;
by = (tyv * 2 - 0.5 + y) * sy;
bw = Math.pow(sigmoid(tw) * 2, 2) * aw;
bh = Math.pow(sigmoid(th) * 2, 2) * ah;
} else {
const txv = useXySig ? sigmoid(tx) : tx;
const tyv = useXySig ? sigmoid(ty) : ty;
bx = (txv + x) / gw * inpW;
by = (tyv + y) / gh * inpH;
bw = aw * Math.exp(tw);
bh = ah * Math.exp(th);
}
boxes.push({
cls: bestCls,
score,
x1: bx - bw / 2,
y1: by - bh / 2,
x2: bx + bw / 2,
y2: by + bh / 2
});
}
}
}
perLevel.push({
lv,
shape: `[${o.shape.join(",")}]`,
maxObj,
maxCls,
maxScore
});
}
return {
boxes,
debug: {
type: mode,
scoreMode: scoreModeValue,
useSigmoid: useSig,
useXySigmoid: useXySig,
levels: perLevel
}
};
}
function parseIntList(text) {
return String(text || "")
.split(",")
.map(s => s.trim())
.filter(Boolean)
.map(v => Number(v));
}
function argmax(arr, start, len) {
let best = -1;
let bestVal = -Infinity;
for (let i = 0; i < len; i++) {
const v = arr[start + i];
if (v > bestVal) {
bestVal = v;
best = i;
}
}
return [best, bestVal];
}
function decodeFCOS(all) {
const c = Number(numClasses.value) || 80;
const confTh = Number(scoreTh.value) || 0.25;
const inpW = Number(inW.value) || 512;
const inpH = Number(inH.value) || 512;
const useSig = objClsSigmoid.checked;
const scoreModeValue = scoreMode.value;
const clsIdx = parseIntList(fcosClsIdx.value);
const regIdx = parseIntList(fcosRegIdx.value);
const ctrIdx = parseIntList(fcosCtrIdx.value);
const strides = parseIntList(fcosStrides.value);
if (clsIdx.length === 0 || regIdx.length === 0 || ctrIdx.length === 0) {
throw new Error("FCOS ?????? class/reg/centerness node idx??????");
}
if (!(clsIdx.length === regIdx.length && regIdx.length === ctrIdx.length)) {
throw new Error("FCOS node idx ???????? level ?? 1 ? class/reg/ctr?");
}
const boxes = [];
const perLevel = [];
for (let lv = 0; lv < clsIdx.length; lv++) {
const clsNode = all.find(o => o.node_idx === clsIdx[lv]);
const regNode = all.find(o => o.node_idx === regIdx[lv]);
const ctrNode = all.find(o => o.node_idx === ctrIdx[lv]);
const stride = strides[lv] || strides[strides.length - 1] || 8;
if (!clsNode || !regNode || !ctrNode) {
throw new Error(`FCOS level ${lv} node_idx ??????`);
}
if (clsNode.shape.length !== 4 || regNode.shape.length !== 4 || ctrNode.shape.length !== 4) {
throw new Error(`FCOS level ${lv} shape ??? [1,C,H,W]`);
}
const gh = clsNode.shape[2];
const gw = clsNode.shape[3];
const clsC = clsNode.shape[1];
const regC = regNode.shape[1];
const ctrC = ctrNode.shape[1];
if (regC < 4 || ctrC < 1) {
throw new Error(`FCOS level ${lv} reg/ctr ?????`);
}
let maxCtr = -Infinity;
let maxCls = -Infinity;
let maxScore = -Infinity;
for (let y = 0; y < gh; y++) {
for (let x = 0; x < gw; x++) {
const at = (node, ch) => node.data[ch * gh * gw + y * gw + x];
const ctrRaw = at(ctrNode, 0);
const center = maybeSigmoid(ctrRaw, useSig);
if (center > maxCtr) maxCtr = center;
let bestCls = -1;
let bestProb = -Infinity;
for (let k = 0; k < Math.min(c, clsC); k++) {
const p = maybeSigmoid(at(clsNode, k), useSig);
if (p > bestProb) {
bestProb = p;
bestCls = k;
}
}
if (bestProb > maxCls) maxCls = bestProb;
let score;
if (scoreModeValue === "obj") score = center;
else if (scoreModeValue === "cls") score = bestProb;
else score = Math.sqrt(Math.max(0, bestProb * center));
if (score > maxScore) maxScore = score;
if (score < confTh) continue;
const l = Math.max(0, at(regNode, 0));
const t = Math.max(0, at(regNode, 1));
const r = Math.max(0, at(regNode, 2));
const b = Math.max(0, at(regNode, 3));
const cx = (x + 0.5) * stride;
const cy = (y + 0.5) * stride;
const x1 = cx - l;
const y1 = cy - t;
const x2 = cx + r;
const y2 = cy + b;
if (x2 <= x1 || y2 <= y1) continue;
boxes.push({ cls: bestCls, score, x1, y1, x2, y2 });
}
}
perLevel.push({
lv,
clsShape: `[${clsNode.shape.join(",")}]`,
regShape: `[${regNode.shape.join(",")}]`,
ctrShape: `[${ctrNode.shape.join(",")}]`,
stride,
maxCtr,
maxCls,
maxScore
});
}
const clipped = boxes.map(b => ({
cls: b.cls,
score: b.score,
x1: Math.max(0, Math.min(inpW, b.x1)),
y1: Math.max(0, Math.min(inpH, b.y1)),
x2: Math.max(0, Math.min(inpW, b.x2)),
y2: Math.max(0, Math.min(inpH, b.y2))
}));
return {
boxes: clipped,
debug: {
type: "fcos",
scoreMode: scoreModeValue,
useSigmoid: useSig,
levels: perLevel
}
};
}
function iou(a, b) {
const xx1 = Math.max(a.x1, b.x1);
const yy1 = Math.max(a.y1, b.y1);
const xx2 = Math.min(a.x2, b.x2);
const yy2 = Math.min(a.y2, b.y2);
const w = Math.max(0, xx2 - xx1);
const h = Math.max(0, yy2 - yy1);
const inter = w * h;
if (inter <= 0) return 0;
const areaA = Math.max(0, a.x2 - a.x1) * Math.max(0, a.y2 - a.y1);
const areaB = Math.max(0, b.x2 - b.x1) * Math.max(0, b.y2 - b.y1);
return inter / Math.max(1e-9, areaA + areaB - inter);
}
function nms(boxes, iouTh, maxOut) {
const byCls = new Map();
for (const b of boxes) {
if (!byCls.has(b.cls)) byCls.set(b.cls, []);
byCls.get(b.cls).push(b);
}
const kept = [];
for (const [, arr] of byCls) {
arr.sort((a, b) => b.score - a.score);
const picked = [];
while (arr.length > 0) {
const cur = arr.shift();
picked.push(cur);
for (let i = arr.length - 1; i >= 0; i--) {
if (iou(cur, arr[i]) > iouTh) arr.splice(i, 1);
}
}
kept.push(...picked);
}
kept.sort((a, b) => b.score - a.score);
return kept.slice(0, maxOut);
}
function drawDetections(boxes) {
const ctx = overlayCanvas.getContext("2d");
const iw = Number(inW.value) || 224;
const ih = Number(inH.value) || 224;
const drawW = srcImg ? (srcImg.naturalWidth || srcImg.width) : iw;
const drawH = srcImg ? (srcImg.naturalHeight || srcImg.height) : ih;
overlayCanvas.width = drawW;
overlayCanvas.height = drawH;
ctx.clearRect(0, 0, drawW, drawH);
if (srcImg) {
ctx.drawImage(srcImg, 0, 0, drawW, drawH);
} else {
ctx.fillStyle = "#fff";
ctx.fillRect(0, 0, drawW, drawH);
}
const sx = drawW / iw;
const sy = drawH / ih;
ctx.lineWidth = 2;
ctx.font = "12px Arial";
for (const b of boxes) {
const x1 = b.x1 * sx;
const y1 = b.y1 * sy;
const x2 = b.x2 * sx;
const y2 = b.y2 * sy;
const w = Math.max(1, x2 - x1);
const h = Math.max(1, y2 - y1);
const hue = (b.cls * 47) % 360;
const color = `hsl(${hue} 90% 45%)`;
ctx.strokeStyle = color;
ctx.fillStyle = color;
ctx.strokeRect(x1, y1, w, h);
const tag = `${b.cls}:${b.score.toFixed(3)}`;
const tw = ctx.measureText(tag).width + 6;
const ty = Math.max(12, y1 - 3);
ctx.fillRect(x1, ty - 12, tw, 12);
ctx.fillStyle = "#fff";
ctx.fillText(tag, x1 + 3, ty - 2);
}
}
function fillTable(boxes) {
boxTableBody.innerHTML = "";
const top = boxes.slice(0, 200);
for (let i = 0; i < top.length; i++) {
const b = top[i];
const tr = document.createElement("tr");
tr.innerHTML = `<td>${i + 1}</td><td>${b.cls}</td><td>${b.score.toFixed(4)}</td><td>${b.x1.toFixed(1)}</td><td>${b.y1.toFixed(1)}</td><td>${b.x2.toFixed(1)}</td><td>${b.y2.toFixed(1)}</td>`;
boxTableBody.appendChild(tr);
}
}
function runDecode() {
clearError();
if (outputs.length === 0) {
showError("?? Parse Payload");
return;
}
try {
const type = modelType.value;
let result;
if (type === "yolov5") {
result = decodeYoloCommon(outputs, "yolov5");
} else if (type === "tinyyolo") {
result = decodeYoloCommon(outputs, "tinyyolo");
} else {
result = decodeFCOS(outputs);
}
const raw = result.boxes;
const iouTh = Number(nmsTh.value) || 0.45;
const maxOut = Number(maxBoxes.value) || 200;
const finalBoxes = nms(raw, iouTh, maxOut);
drawDetections(finalBoxes);
fillTable(finalBoxes);
overlayStats.textContent = `draw size: ${overlayCanvas.width}x${overlayCanvas.height}`;
decodeStats.textContent = `decoded raw: ${raw.length}
final after NMS: ${finalBoxes.length}`;
const lvLines = (result.debug?.levels || []).map((lv) => {
if (result.debug.type === "fcos") {
return `L${lv.lv} stride=${lv.stride} cls=${lv.clsShape} reg=${lv.regShape} ctr=${lv.ctrShape} maxCtr=${fmt(lv.maxCtr)} maxCls=${fmt(lv.maxCls)} maxScore=${fmt(lv.maxScore)}`;
}
return `L${lv.lv} shape=${lv.shape} maxObj=${fmt(lv.maxObj)} maxCls=${fmt(lv.maxCls)} maxScore=${fmt(lv.maxScore)}`;
});
debugStats.textContent = [
`decoder: ${result.debug?.type || type}`,
`scoreMode: ${result.debug?.scoreMode || scoreMode.value}`,
`obj/cls sigmoid: ${result.debug?.useSigmoid ?? objClsSigmoid.checked}`,
`yolo x/y sigmoid: ${result.debug?.useXySigmoid ?? yoloXySigmoid.checked}`,
...lvLines
].join("\n");
} catch (e) {
showError("Decode failed: " + e.message);
}
}
function applyTinyPreset() {
modelType.value = "tinyyolo";
numClasses.value = 80;
scoreTh.value = 0.25;
nmsTh.value = 0.45;
maxBoxes.value = 200;
inW.value = 224;
inH.value = 224;
anchorsInput.value = "10,14|23,27|37,58;81,82|135,169|344,319";
scoreMode.value = "obj_cls";
objClsSigmoid.checked = true;
yoloXySigmoid.checked = true;
}
function applyYolo5Preset() {
modelType.value = "yolov5";
numClasses.value = 80;
scoreTh.value = 0.25;
nmsTh.value = 0.45;
maxBoxes.value = 200;
inW.value = 640;
inH.value = 640;
anchorsInput.value = "10,14|23,27|37,58;81,82|135,169|344,319";
scoreMode.value = "obj_cls";
objClsSigmoid.checked = true;
yoloXySigmoid.checked = true;
}
function applyFcosPreset() {
modelType.value = "fcos";
numClasses.value = 80;
scoreTh.value = 0.25;
nmsTh.value = 0.45;
maxBoxes.value = 200;
inW.value = 512;
inH.value = 512;
fcosStrides.value = "8,16,32,64,128";
scoreMode.value = "obj_cls";
objClsSigmoid.checked = true;
if (outputs.length > 0) {
autoFillFcosIndices();
}
}
function autoFillFcosIndices() {
clearError();
if (!Array.isArray(outputs) || outputs.length === 0) {
showError("請先 Parse Payload才能自動填 FCOS idx");
return;
}
const c = Number(numClasses.value) || 80;
const valid = outputs.filter(o => o.shape.length === 4 && o.shape[0] === 1);
const clsNodes = valid.filter(o => o.shape[1] === c);
const regNodes = valid.filter(o => o.shape[1] === 4);
const ctrNodes = valid.filter(o => o.shape[1] === 1);
const byHW = (arr) => {
const m = new Map();
for (const o of arr) {
const key = `${o.shape[2]}x${o.shape[3]}`;
if (!m.has(key)) m.set(key, []);
m.get(key).push(o);
}
return m;
};
const clsMap = byHW(clsNodes);
const regMap = byHW(regNodes);
const ctrMap = byHW(ctrNodes);
const keys = [];
for (const key of clsMap.keys()) {
if (regMap.has(key) && ctrMap.has(key)) keys.push(key);
}
keys.sort((a, b) => {
const ah = Number(a.split("x")[0]);
const bh = Number(b.split("x")[0]);
return bh - ah;
});
const levels = keys.map((key) => {
const pickMinNode = (arr) => arr.slice().sort((x, y) => x.node_idx - y.node_idx)[0];
return {
key,
cls: pickMinNode(clsMap.get(key)),
reg: pickMinNode(regMap.get(key)),
ctr: pickMinNode(ctrMap.get(key))
};
});
if (levels.length === 0) {
showError("找不到可配對的 FCOS cls/reg/ctr node同 HxW");
return;
}
fcosClsIdx.value = levels.map(l => l.cls.node_idx).join(",");
fcosRegIdx.value = levels.map(l => l.reg.node_idx).join(",");
fcosCtrIdx.value = levels.map(l => l.ctr.node_idx).join(",");
const defaultStrides = [8, 16, 32, 64, 128];
fcosStrides.value = defaultStrides.slice(0, levels.length).join(",");
debugStats.textContent = [
"auto filled FCOS idx:",
`class: ${fcosClsIdx.value}`,
`reg: ${fcosRegIdx.value}`,
`ctr: ${fcosCtrIdx.value}`,
`levels(HxW): ${levels.map(l => l.key).join(",")}`
].join("\n");
}
parseBtn.addEventListener("click", () => {
clearError();
try {
const arr = parsePayloadText();
decodeStats.textContent = `parsed outputs: ${arr.length}`;
} catch (e) {
showError(e.message);
}
});
clearBtn.addEventListener("click", () => {
payloadInput.value = "";
outputs = [];
boxTableBody.innerHTML = "";
decodeStats.textContent = "";
debugStats.textContent = "";
overlayStats.textContent = "";
clearError();
});
imgInput.addEventListener("change", (evt) => {
const file = evt.target.files && evt.target.files[0];
if (!file) return;
const reader = new FileReader();
reader.onload = () => {
const img = new Image();
img.onload = () => {
srcImg = img;
if (outputs.length > 0) runDecode();
};
img.src = String(reader.result || "");
};
reader.readAsDataURL(file);
});
clearImgBtn.addEventListener("click", () => {
srcImg = null;
imgInput.value = "";
if (outputs.length > 0) runDecode();
});
runBtn.addEventListener("click", runDecode);
presetTinyBtn.addEventListener("click", applyTinyPreset);
presetYolo5Btn.addEventListener("click", applyYolo5Preset);
presetFcosBtn.addEventListener("click", applyFcosPreset);
autoFcosIdxBtn.addEventListener("click", autoFillFcosIndices);
</script>
</body>
</html>

View File

@ -0,0 +1,624 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Payload Tensor Viewer</title>
<style>
:root {
--bg: #f5f6f8;
--card: #ffffff;
--text: #1f2937;
--muted: #6b7280;
--line: #d1d5db;
--accent: #2563eb;
}
body {
margin: 0;
font-family: Arial, sans-serif;
background: var(--bg);
color: var(--text);
}
.wrap {
max-width: 1200px;
margin: 20px auto;
padding: 0 12px 24px;
}
h1 {
margin: 0 0 8px;
}
.hint {
margin: 0 0 12px;
color: var(--muted);
font-size: 13px;
}
.panel {
background: var(--card);
border: 1px solid var(--line);
border-radius: 10px;
padding: 12px;
margin-bottom: 12px;
}
.panel h2 {
margin: 0 0 8px;
font-size: 18px;
}
.panel .hint {
margin: 0 0 10px;
}
textarea {
width: 100%;
min-height: 220px;
box-sizing: border-box;
padding: 12px;
border: 1px solid var(--line);
border-radius: 8px;
font-family: Consolas, monospace;
background: #fff;
}
.controls {
margin: 10px 0 16px;
display: flex;
flex-wrap: wrap;
gap: 8px;
}
button {
border: 1px solid var(--line);
background: var(--card);
color: var(--text);
padding: 8px 12px;
border-radius: 8px;
cursor: pointer;
}
button.primary {
background: var(--accent);
border-color: var(--accent);
color: #fff;
}
.error {
color: #b91c1c;
background: #fef2f2;
border: 1px solid #fecaca;
border-radius: 8px;
padding: 8px 10px;
margin-bottom: 12px;
white-space: pre-wrap;
}
.grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(360px, 1fr));
gap: 12px;
}
.card {
background: var(--card);
border: 1px solid var(--line);
border-radius: 10px;
padding: 12px;
}
.title {
font-weight: 700;
margin-bottom: 4px;
}
.meta {
color: var(--muted);
font-size: 12px;
margin-bottom: 10px;
word-break: break-all;
}
.row {
display: flex;
align-items: center;
gap: 8px;
flex-wrap: wrap;
margin-bottom: 10px;
}
.row label {
font-size: 13px;
}
.row input[type="number"] {
width: 90px;
padding: 4px 6px;
}
.row input[type="range"] {
width: 160px;
}
.row select {
padding: 4px 6px;
}
.preview-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(320px, 1fr));
gap: 10px;
}
.subcard {
border: 1px solid var(--line);
border-radius: 8px;
padding: 8px;
background: #fff;
}
.subcard-title {
font-weight: 700;
font-size: 13px;
margin-bottom: 6px;
}
canvas {
border: 1px solid var(--line);
border-radius: 6px;
background: #fff;
image-rendering: pixelated;
image-rendering: crisp-edges;
max-width: 100%;
height: auto;
}
.stats {
margin-top: 8px;
color: var(--muted);
font-size: 12px;
}
</style>
</head>
<body>
<div class="wrap">
<h1>Payload Tensor Viewer</h1>
<p class="hint">
Paste full JSON payload and click <b>Parse & Render</b>. Supports base64 float32 tensors in NCHW shape (e.g. [1,255,7,7]).
</p>
<textarea id="payloadInput" placeholder="Paste full payload JSON here..."></textarea>
<div class="controls">
<button id="parseBtn" class="primary">Parse & Render</button>
<button id="clearBtn">Clear</button>
</div>
<div id="errorBox" class="error" style="display:none"></div>
<div class="panel">
<h2>Overlay Viewer</h2>
<p class="hint">
Upload original image, pick output/channel, then overlay activation heatmap on top.
</p>
<div class="row">
<label>Image:</label>
<input id="imageInput" type="file" accept="image/*" />
<button id="clearImageBtn">Clear Image</button>
</div>
<div class="row">
<label>Output:</label>
<select id="overlayOutputSelect"></select>
<label>batch:</label>
<input id="overlayBatchInput" type="number" min="0" value="0" />
<label>channel:</label>
<input id="overlayChannelInput" type="number" min="0" value="0" />
<label>colormap:</label>
<select id="overlayMapSelect">
<option value="jet">jet</option>
<option value="gray">gray</option>
</select>
<label>alpha:</label>
<input id="overlayAlphaInput" type="range" min="0" max="1" step="0.01" value="0.45" />
<span id="overlayAlphaText">0.45</span>
<button id="overlayRenderBtn" class="primary">Render Overlay</button>
</div>
<div class="preview-grid">
<div class="subcard">
<div class="subcard-title">Overlay</div>
<canvas id="overlayCanvas" width="1" height="1"></canvas>
<div id="overlayStats" class="stats"></div>
</div>
<div class="subcard">
<div class="subcard-title">Heatmap Only</div>
<canvas id="heatmapCanvas" width="1" height="1"></canvas>
<div id="heatmapStats" class="stats"></div>
</div>
</div>
</div>
<div id="outputGrid" class="grid"></div>
</div>
<script>
const payloadInput = document.getElementById("payloadInput");
const parseBtn = document.getElementById("parseBtn");
const clearBtn = document.getElementById("clearBtn");
const errorBox = document.getElementById("errorBox");
const outputGrid = document.getElementById("outputGrid");
const imageInput = document.getElementById("imageInput");
const clearImageBtn = document.getElementById("clearImageBtn");
const overlayOutputSelect = document.getElementById("overlayOutputSelect");
const overlayBatchInput = document.getElementById("overlayBatchInput");
const overlayChannelInput = document.getElementById("overlayChannelInput");
const overlayMapSelect = document.getElementById("overlayMapSelect");
const overlayAlphaInput = document.getElementById("overlayAlphaInput");
const overlayAlphaText = document.getElementById("overlayAlphaText");
const overlayRenderBtn = document.getElementById("overlayRenderBtn");
const overlayCanvas = document.getElementById("overlayCanvas");
const heatmapCanvas = document.getElementById("heatmapCanvas");
const overlayStats = document.getElementById("overlayStats");
const heatmapStats = document.getElementById("heatmapStats");
let parsedOutputs = [];
let sourceImage = null;
function showError(message) {
errorBox.style.display = "block";
errorBox.textContent = message;
}
function clearError() {
errorBox.style.display = "none";
errorBox.textContent = "";
}
function decodeBase64Float32(base64String) {
const clean = String(base64String || "").trim();
const binary = atob(clean);
const bytes = new Uint8Array(binary.length);
for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);
const len = Math.floor(bytes.byteLength / 4);
const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
const out = new Float32Array(len);
for (let i = 0; i < len; i++) out[i] = view.getFloat32(i * 4, true);
return out;
}
function product(arr) {
return arr.reduce((a, b) => a * b, 1);
}
function minArray(values) {
let m = Number.POSITIVE_INFINITY;
for (let i = 0; i < values.length; i++) {
if (values[i] < m) m = values[i];
}
return m;
}
function maxArray(values) {
let m = Number.NEGATIVE_INFINITY;
for (let i = 0; i < values.length; i++) {
if (values[i] > m) m = values[i];
}
return m;
}
function slice2D(data, shape, batchIndex, channelIndex) {
if (!Array.isArray(shape) || shape.length < 2) {
throw new Error("Unsupported shape");
}
if (shape.length === 4) {
const n = shape[0], c = shape[1], h = shape[2], w = shape[3];
if (batchIndex < 0 || batchIndex >= n) throw new Error("Batch out of range");
if (channelIndex < 0 || channelIndex >= c) throw new Error("Channel out of range");
const out = new Float32Array(h * w);
let k = 0;
const base = ((batchIndex * c) + channelIndex) * h * w;
for (let y = 0; y < h; y++) {
for (let x = 0; x < w; x++) {
out[k++] = data[base + y * w + x];
}
}
return { w, h, values: out };
}
if (shape.length === 3) {
const c = shape[0], h = shape[1], w = shape[2];
if (channelIndex < 0 || channelIndex >= c) throw new Error("Channel out of range");
const out = new Float32Array(h * w);
let k = 0;
const base = channelIndex * h * w;
for (let y = 0; y < h; y++) {
for (let x = 0; x < w; x++) {
out[k++] = data[base + y * w + x];
}
}
return { w, h, values: out };
}
if (shape.length === 2) {
const h = shape[0], w = shape[1];
return { w, h, values: data.slice(0, h * w) };
}
throw new Error("Unsupported shape length: " + shape.length);
}
function drawGrayscale(canvas, values, w, h) {
const min = minArray(values);
const max = maxArray(values);
const span = max - min || 1;
const temp = document.createElement("canvas");
temp.width = w;
temp.height = h;
const tctx = temp.getContext("2d");
const img = tctx.createImageData(w, h);
for (let i = 0; i < values.length; i++) {
const v = Math.round(((values[i] - min) / span) * 255);
const p = i * 4;
img.data[p] = v;
img.data[p + 1] = v;
img.data[p + 2] = v;
img.data[p + 3] = 255;
}
tctx.putImageData(img, 0, 0);
const targetScale = Math.max(1, Math.floor(300 / Math.max(w, h)));
canvas.width = w * targetScale;
canvas.height = h * targetScale;
const ctx = canvas.getContext("2d");
ctx.imageSmoothingEnabled = false;
ctx.clearRect(0, 0, canvas.width, canvas.height);
ctx.drawImage(temp, 0, 0, canvas.width, canvas.height);
return { min, max };
}
function jetColor(t) {
const x = Math.max(0, Math.min(1, t));
const r = Math.max(0, Math.min(1, 1.5 - Math.abs(4 * x - 3)));
const g = Math.max(0, Math.min(1, 1.5 - Math.abs(4 * x - 2)));
const b = Math.max(0, Math.min(1, 1.5 - Math.abs(4 * x - 1)));
return [Math.round(r * 255), Math.round(g * 255), Math.round(b * 255)];
}
function drawHeatmapToCanvas(canvas, values, w, h, mapMode) {
const min = minArray(values);
const max = maxArray(values);
const span = max - min || 1;
const temp = document.createElement("canvas");
temp.width = w;
temp.height = h;
const tctx = temp.getContext("2d");
const img = tctx.createImageData(w, h);
for (let i = 0; i < values.length; i++) {
const t = (values[i] - min) / span;
const p = i * 4;
let color;
if (mapMode === "gray") {
const g = Math.round(t * 255);
color = [g, g, g];
} else {
color = jetColor(t);
}
img.data[p] = color[0];
img.data[p + 1] = color[1];
img.data[p + 2] = color[2];
img.data[p + 3] = 255;
}
tctx.putImageData(img, 0, 0);
const targetScale = Math.max(1, Math.floor(300 / Math.max(w, h)));
canvas.width = w * targetScale;
canvas.height = h * targetScale;
const ctx = canvas.getContext("2d");
ctx.imageSmoothingEnabled = false;
ctx.clearRect(0, 0, canvas.width, canvas.height);
ctx.drawImage(temp, 0, 0, canvas.width, canvas.height);
return { min, max, temp };
}
function renderOverlay() {
if (parsedOutputs.length === 0) {
overlayStats.textContent = "No payload parsed yet.";
heatmapStats.textContent = "";
return;
}
const outIndex = Number(overlayOutputSelect.value) || 0;
const out = parsedOutputs[outIndex];
if (!out) {
overlayStats.textContent = "Output not found.";
heatmapStats.textContent = "";
return;
}
try {
const b = Number(overlayBatchInput.value) || 0;
const ch = Number(overlayChannelInput.value) || 0;
const mapMode = overlayMapSelect.value || "jet";
const alpha = Number(overlayAlphaInput.value);
const slice = slice2D(out.data, out.shape, b, ch);
const h = drawHeatmapToCanvas(heatmapCanvas, Array.from(slice.values), slice.w, slice.h, mapMode);
heatmapStats.textContent = `Heatmap ${slice.w}x${slice.h}, min=${h.min.toFixed(6)}, max=${h.max.toFixed(6)}`;
const ctx = overlayCanvas.getContext("2d");
if (sourceImage) {
overlayCanvas.width = sourceImage.naturalWidth || sourceImage.width;
overlayCanvas.height = sourceImage.naturalHeight || sourceImage.height;
ctx.imageSmoothingEnabled = true;
ctx.clearRect(0, 0, overlayCanvas.width, overlayCanvas.height);
ctx.globalAlpha = 1;
ctx.drawImage(sourceImage, 0, 0, overlayCanvas.width, overlayCanvas.height);
ctx.save();
ctx.globalAlpha = isNaN(alpha) ? 0.45 : alpha;
ctx.imageSmoothingEnabled = false;
ctx.drawImage(h.temp, 0, 0, overlayCanvas.width, overlayCanvas.height);
ctx.restore();
overlayStats.textContent = `Overlay on image ${overlayCanvas.width}x${overlayCanvas.height}, alpha=${(isNaN(alpha) ? 0.45 : alpha).toFixed(2)}, output#${outIndex}, batch=${b}, channel=${ch}`;
} else {
overlayCanvas.width = heatmapCanvas.width;
overlayCanvas.height = heatmapCanvas.height;
ctx.imageSmoothingEnabled = false;
ctx.clearRect(0, 0, overlayCanvas.width, overlayCanvas.height);
ctx.drawImage(heatmapCanvas, 0, 0);
overlayStats.textContent = `No source image loaded, showing heatmap only. output#${outIndex}, batch=${b}, channel=${ch}`;
}
} catch (e) {
overlayStats.textContent = "Overlay render failed: " + e.message;
heatmapStats.textContent = "";
}
}
function refreshOverlayOutputOptions() {
overlayOutputSelect.innerHTML = "";
parsedOutputs.forEach((o, idx) => {
const opt = document.createElement("option");
opt.value = String(idx);
opt.textContent = `#${idx} node_idx=${o.item.node_idx} shape=${JSON.stringify(o.shape)}`;
overlayOutputSelect.appendChild(opt);
});
}
function buildOutputCard(item, idx) {
const shape = Array.isArray(item.shape) ? item.shape : [];
const data = decodeBase64Float32(item.data_base64);
const expected = shape.length ? product(shape) : data.length;
const card = document.createElement("div");
card.className = "card";
const title = document.createElement("div");
title.className = "title";
title.textContent = `Output #${idx} node_idx=${item.node_idx}`;
const meta = document.createElement("div");
meta.className = "meta";
meta.textContent = `dtype=${item.dtype || "unknown"}, shape=${JSON.stringify(shape)}, decoded=${data.length}, expected=${expected}`;
const row = document.createElement("div");
row.className = "row";
const batchLabel = document.createElement("label");
batchLabel.textContent = "batch:";
const batchInput = document.createElement("input");
batchInput.type = "number";
batchInput.value = 0;
batchInput.min = 0;
const channelLabel = document.createElement("label");
channelLabel.textContent = "channel:";
const channelInput = document.createElement("input");
channelInput.type = "number";
channelInput.value = 0;
channelInput.min = 0;
const renderBtn = document.createElement("button");
renderBtn.textContent = "Render Channel";
row.append(batchLabel, batchInput, channelLabel, channelInput, renderBtn);
const canvas = document.createElement("canvas");
canvas.width = 1;
canvas.height = 1;
const stats = document.createElement("div");
stats.className = "stats";
card.append(title, meta, row, canvas, stats);
const doRender = () => {
try {
const b = Number(batchInput.value) || 0;
const ch = Number(channelInput.value) || 0;
const slice = slice2D(data, shape, b, ch);
const st = drawGrayscale(canvas, Array.from(slice.values), slice.w, slice.h);
stats.textContent = `Rendered ${slice.w}x${slice.h}, min=${st.min.toFixed(6)}, max=${st.max.toFixed(6)}`;
} catch (e) {
stats.textContent = "Render failed: " + e.message;
}
};
renderBtn.addEventListener("click", doRender);
doRender();
return card;
}
function parsePayload() {
clearError();
outputGrid.innerHTML = "";
parsedOutputs = [];
overlayOutputSelect.innerHTML = "";
const text = payloadInput.value.trim();
if (!text) {
showError("Please paste payload JSON first.");
return;
}
let json;
try {
json = JSON.parse(text);
} catch (e) {
showError("JSON parse failed: " + e.message);
return;
}
const outputs = json?.data?.outputs || json?.outputs;
if (!Array.isArray(outputs) || outputs.length === 0) {
showError("No outputs found. Expected payload.data.outputs array.");
return;
}
try {
outputs.forEach((o, i) => {
parsedOutputs.push({
item: o,
data: decodeBase64Float32(o.data_base64),
shape: Array.isArray(o.shape) ? o.shape : []
});
outputGrid.appendChild(buildOutputCard(o, i));
});
refreshOverlayOutputOptions();
if (parsedOutputs.length > 0) {
overlayOutputSelect.value = "0";
renderOverlay();
}
} catch (e) {
showError("Render pipeline failed: " + e.message);
}
}
parseBtn.addEventListener("click", parsePayload);
clearBtn.addEventListener("click", () => {
payloadInput.value = "";
outputGrid.innerHTML = "";
parsedOutputs = [];
overlayOutputSelect.innerHTML = "";
clearError();
overlayStats.textContent = "";
heatmapStats.textContent = "";
});
imageInput.addEventListener("change", (evt) => {
const file = evt.target.files && evt.target.files[0];
if (!file) return;
const reader = new FileReader();
reader.onload = () => {
const img = new Image();
img.onload = () => {
sourceImage = img;
renderOverlay();
};
img.src = String(reader.result || "");
};
reader.readAsDataURL(file);
});
clearImageBtn.addEventListener("click", () => {
sourceImage = null;
imageInput.value = "";
renderOverlay();
});
overlayRenderBtn.addEventListener("click", renderOverlay);
overlayOutputSelect.addEventListener("change", renderOverlay);
overlayBatchInput.addEventListener("change", renderOverlay);
overlayChannelInput.addEventListener("change", renderOverlay);
overlayMapSelect.addEventListener("change", renderOverlay);
overlayAlphaInput.addEventListener("input", () => {
overlayAlphaText.textContent = overlayAlphaInput.value;
renderOverlay();
});
</script>
</body>
</html>

View File

@ -0,0 +1,83 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Pic64View</title>
<style>
body {
font-family: Arial, sans-serif;
margin: 16px;
background: #f7f7f7;
}
.container {
max-width: 1000px;
margin: 0 auto;
}
textarea {
width: 100%;
min-height: 160px;
padding: 12px;
box-sizing: border-box;
font-family: Consolas, monospace;
}
.controls {
margin: 12px 0;
display: flex;
gap: 8px;
align-items: center;
}
img {
max-width: 100%;
border: 1px solid #ddd;
background: #fff;
}
.hint {
color: #555;
font-size: 13px;
}
</style>
</head>
<body>
<div class="container">
<h1>Pic64View</h1>
<p class="hint">
Paste a Base64 image string (with or without data URL prefix) and click "Render".
</p>
<textarea id="base64Input" placeholder="Paste Base64 here..."></textarea>
<div class="controls">
<button id="renderBtn">Render</button>
<button id="clearBtn">Clear</button>
</div>
<img id="preview" alt="Preview will appear here" />
</div>
<script>
const input = document.getElementById("base64Input");
const preview = document.getElementById("preview");
const renderBtn = document.getElementById("renderBtn");
const clearBtn = document.getElementById("clearBtn");
function normalizeBase64(value) {
const trimmed = value.trim();
if (trimmed.startsWith("data:image")) {
return trimmed;
}
return "data:image/png;base64," + trimmed;
}
renderBtn.addEventListener("click", () => {
const value = input.value;
if (!value.trim()) {
return;
}
preview.src = normalizeBase64(value);
});
clearBtn.addEventListener("click", () => {
input.value = "";
preview.removeAttribute("src");
});
</script>
</body>
</html>

View File

@ -0,0 +1,627 @@
<!doctype html>
<html lang="zh-Hant">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Video Inference Viewer</title>
<style>
:root {
--bg: #0f172a;
--panel: #111827;
--panel-2: #1f2937;
--text: #e5e7eb;
--muted: #9ca3af;
--danger: #ef4444;
}
* { box-sizing: border-box; }
body {
margin: 0;
background: radial-gradient(circle at 10% 10%, #1e293b, var(--bg));
color: var(--text);
font-family: "Segoe UI", "Noto Sans TC", sans-serif;
}
.wrap {
max-width: 1400px;
margin: 16px auto;
padding: 0 16px;
display: grid;
grid-template-columns: 420px 1fr;
gap: 16px;
}
.panel {
background: linear-gradient(180deg, var(--panel), var(--panel-2));
border: 1px solid #334155;
border-radius: 12px;
padding: 14px;
}
.row {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 8px;
margin-bottom: 8px;
}
.row-1 { display: grid; grid-template-columns: 1fr; gap: 8px; margin-bottom: 8px; }
label { font-size: 12px; color: var(--muted); display: block; margin-bottom: 4px; }
input, select, button {
width: 100%;
padding: 8px;
border-radius: 8px;
border: 1px solid #475569;
background: #0b1220;
color: var(--text);
}
button { cursor: pointer; font-weight: 600; }
button.primary { background: #14532d; border-color: #15803d; }
button.warn { background: #7f1d1d; border-color: #b91c1c; }
.status {
margin-top: 10px;
padding: 8px;
background: #0b1220;
border: 1px solid #334155;
border-radius: 8px;
font-size: 12px;
line-height: 1.5;
white-space: pre-wrap;
}
.canvas-wrap {
position: relative;
width: 100%;
background: #000;
border-radius: 12px;
overflow: hidden;
border: 1px solid #334155;
}
canvas { width: 100%; height: auto; display: block; }
.hint { font-size: 12px; color: var(--muted); margin-top: 8px; }
.error { color: var(--danger); }
@media (max-width: 1100px) { .wrap { grid-template-columns: 1fr; } }
</style>
</head>
<body>
<div class="wrap">
<section class="panel">
<h3 style="margin-top:0;">Video Inference (API)</h3>
<div class="row-1">
<div>
<label>Source</label>
<select id="sourceType">
<option value="file" selected>Video File</option>
<option value="webcam">Webcam</option>
</select>
</div>
<div id="videoFileWrap">
<label>Video File</label>
<input id="videoFile" type="file" accept="video/*" />
</div>
<div id="webcamControls" style="display:none;">
<label>Webcam Device</label>
<div class="row" style="margin-bottom:0;">
<select id="webcamDevice"></select>
<button id="refreshCamBtn" type="button">Refresh</button>
</div>
</div>
<div>
<label>API Base URL</label>
<input id="baseUrl" type="text" value="http://127.0.0.1:4398" />
</div>
</div>
<div class="row">
<div>
<label>Model Type</label>
<select id="modelType">
<option value="yolov5">YOLOv5</option>
<option value="fcos">FCOS</option>
<option value="tinyyolo">TinyYOLO</option>
</select>
</div>
<div>
<label>Model ID</label>
<input id="modelId" type="number" value="20005" />
</div>
</div>
<div class="row">
<div>
<label>Input Width</label>
<input id="inW" type="number" value="640" />
</div>
<div>
<label>Input Height</label>
<input id="inH" type="number" value="640" />
</div>
</div>
<div class="row">
<div>
<label>Image Format</label>
<select id="imageFormat">
<option value="RGBA8888" selected>RGBA8888</option>
<option value="RAW8">RAW8</option>
</select>
</div>
<div>
<label>Infer Every N Frames</label>
<input id="sampleEveryN" type="number" value="3" min="1" />
</div>
</div>
<div class="row">
<div>
<label>Num Classes</label>
<input id="numClasses" type="number" value="80" />
</div>
<div>
<label>Score Threshold</label>
<input id="scoreTh" type="number" step="0.01" value="0.25" />
</div>
</div>
<div class="row">
<div>
<label>NMS IoU</label>
<input id="nmsTh" type="number" step="0.01" value="0.45" />
</div>
<div>
<label>Max Boxes</label>
<input id="maxBoxes" type="number" value="200" />
</div>
</div>
<div class="row">
<button id="startBtn" class="primary">Start</button>
<button id="stopBtn" class="warn">Stop</button>
</div>
<div id="status" class="status">Ready.</div>
<div class="hint">預設值可直接測 YOLOv5。先確認 LocalAPI 已啟動,並完成 connect + load model。</div>
</section>
<section class="panel">
<div class="canvas-wrap">
<canvas id="displayCanvas" width="960" height="540"></canvas>
</div>
<video id="video" style="display:none;"></video>
<canvas id="inferCanvas" width="640" height="640" style="display:none;"></canvas>
</section>
</div>
<script>
const videoEl = document.getElementById("video");
const displayCanvas = document.getElementById("displayCanvas");
const inferCanvas = document.getElementById("inferCanvas");
const dctx = displayCanvas.getContext("2d");
const ictx = inferCanvas.getContext("2d");
const sourceType = document.getElementById("sourceType");
const videoFileWrap = document.getElementById("videoFileWrap");
const videoFile = document.getElementById("videoFile");
const webcamControls = document.getElementById("webcamControls");
const webcamDevice = document.getElementById("webcamDevice");
const refreshCamBtn = document.getElementById("refreshCamBtn");
const baseUrl = document.getElementById("baseUrl");
const modelType = document.getElementById("modelType");
const modelId = document.getElementById("modelId");
const inW = document.getElementById("inW");
const inH = document.getElementById("inH");
const imageFormat = document.getElementById("imageFormat");
const sampleEveryN = document.getElementById("sampleEveryN");
const numClasses = document.getElementById("numClasses");
const scoreTh = document.getElementById("scoreTh");
const nmsTh = document.getElementById("nmsTh");
const maxBoxes = document.getElementById("maxBoxes");
const startBtn = document.getElementById("startBtn");
const stopBtn = document.getElementById("stopBtn");
const statusEl = document.getElementById("status");
const YOLO_ANCHORS = [
[[10,14],[23,27],[37,58]],
[[81,82],[135,169],[344,319]]
];
const DEFAULT_MODEL_ID = { yolov5: 20005, fcos: 20004, tinyyolo: 19 };
let running = false;
let inFlight = false;
let frameIndex = -1;
let inferCount = 0;
let lastBoxes = [];
let startTs = 0;
let webcamStream = null;
let currentBlobUrl = "";
modelType.addEventListener("change", () => {
if (modelType.value === "fcos") { inW.value = 512; inH.value = 512; }
else if (modelType.value === "tinyyolo") { inW.value = 224; inH.value = 224; }
else { inW.value = 640; inH.value = 640; }
modelId.value = DEFAULT_MODEL_ID[modelType.value] || 1;
});
function setStatus(text, isError=false) {
statusEl.textContent = text;
statusEl.className = isError ? "status error" : "status";
}
function updateSourceUI() {
const isWebcam = sourceType.value === "webcam";
videoFileWrap.style.display = isWebcam ? "none" : "block";
webcamControls.style.display = isWebcam ? "block" : "none";
}
async function listWebcams() {
webcamDevice.innerHTML = "";
try {
const devices = await navigator.mediaDevices.enumerateDevices();
const cams = devices.filter(d => d.kind === "videoinput");
if (!cams.length) {
const opt = document.createElement("option");
opt.value = "";
opt.textContent = "No webcam found";
webcamDevice.appendChild(opt);
return;
}
cams.forEach((d, idx) => {
const opt = document.createElement("option");
opt.value = d.deviceId;
opt.textContent = d.label || `Camera ${idx + 1}`;
webcamDevice.appendChild(opt);
});
} catch (e) {
setStatus(`List webcam failed:\n${String(e)}`, true);
}
}
function sigmoid(v) { return 1 / (1 + Math.exp(-v)); }
function bytesToBase64(bytes) {
const chunk = 0x8000;
let bin = "";
for (let i = 0; i < bytes.length; i += chunk) {
const sub = bytes.subarray(i, i + chunk);
bin += String.fromCharCode.apply(null, sub);
}
return btoa(bin);
}
function decodeBase64Float32(base64String) {
const binary = atob(String(base64String || "").trim());
const bytes = new Uint8Array(binary.length);
for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);
const len = Math.floor(bytes.byteLength / 4);
const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
const out = new Float32Array(len);
for (let i = 0; i < len; i++) out[i] = view.getFloat32(i * 4, true);
return out;
}
function parseOutputs(rawOutputs) {
const arr = rawOutputs || [];
return arr.map((o, i) => ({
node_idx: Number(o.node_idx ?? i),
shape: Array.isArray(o.shape) ? o.shape : [],
data: decodeBase64Float32(o.data_base64)
}));
}
function pickYoloNodes(outputs, clsCount) {
const picked = [];
for (const o of outputs) {
if (o.shape.length !== 4 || o.shape[0] !== 1) continue;
const ch = o.shape[1];
if (ch % (5 + clsCount) !== 0) continue;
picked.push(o);
}
picked.sort((a, b) => b.shape[2] - a.shape[2]);
return picked;
}
function decodeYolo(outputs, mode, clsCount, iw, ih, confTh) {
const nodes = pickYoloNodes(outputs, clsCount);
if (!nodes.length) return [];
const boxes = [];
const attrs = 5 + clsCount;
for (let lv = 0; lv < nodes.length; lv++) {
const o = nodes[lv];
const [, ch, gh, gw] = o.shape;
const na = Math.floor(ch / attrs);
const anchors = YOLO_ANCHORS[Math.min(lv, YOLO_ANCHORS.length - 1)];
const data = o.data;
const at = (ci, y, x) => data[ci * gh * gw + y * gw + x];
for (let a = 0; a < na; a++) {
const [aw, ah] = anchors[Math.min(a, anchors.length - 1)];
const base = a * attrs;
for (let y = 0; y < gh; y++) {
for (let x = 0; x < gw; x++) {
const tx = at(base + 0, y, x);
const ty = at(base + 1, y, x);
const tw = at(base + 2, y, x);
const th = at(base + 3, y, x);
const obj = sigmoid(at(base + 4, y, x));
let bestCls = -1;
let bestProb = -Infinity;
for (let k = 0; k < clsCount; k++) {
const p = sigmoid(at(base + 5 + k, y, x));
if (p > bestProb) { bestProb = p; bestCls = k; }
}
const score = obj * bestProb;
if (score < confTh) continue;
let bx, by, bw, bh;
if (mode === "yolov5") {
const sx = iw / gw, sy = ih / gh;
bx = (sigmoid(tx) * 2 - 0.5 + x) * sx;
by = (sigmoid(ty) * 2 - 0.5 + y) * sy;
bw = Math.pow(sigmoid(tw) * 2, 2) * aw;
bh = Math.pow(sigmoid(th) * 2, 2) * ah;
} else {
bx = (sigmoid(tx) + x) / gw * iw;
by = (sigmoid(ty) + y) / gh * ih;
bw = aw * Math.exp(tw);
bh = ah * Math.exp(th);
}
boxes.push({ cls: bestCls, score, x1: bx - bw / 2, y1: by - bh / 2, x2: bx + bw / 2, y2: by + bh / 2 });
}
}
}
}
return boxes;
}
function decodeFcos(outputs, clsCount, iw, ih, confTh) {
const valid = outputs.filter(o => o.shape.length === 4 && o.shape[0] === 1);
const clsNodes = valid.filter(o => o.shape[1] === clsCount);
const regNodes = valid.filter(o => o.shape[1] === 4);
const ctrNodes = valid.filter(o => o.shape[1] === 1);
const map = new Map();
for (const n of clsNodes) map.set(`${n.shape[2]}x${n.shape[3]}`, { ...(map.get(`${n.shape[2]}x${n.shape[3]}`)||{}), cls:n });
for (const n of regNodes) map.set(`${n.shape[2]}x${n.shape[3]}`, { ...(map.get(`${n.shape[2]}x${n.shape[3]}`)||{}), reg:n });
for (const n of ctrNodes) map.set(`${n.shape[2]}x${n.shape[3]}`, { ...(map.get(`${n.shape[2]}x${n.shape[3]}`)||{}), ctr:n });
const keys = [...map.keys()].filter(k => { const v = map.get(k); return v.cls && v.reg && v.ctr; })
.sort((a,b) => Number(b.split("x")[0]) - Number(a.split("x")[0]));
const strides = [8,16,32,64,128];
const boxes = [];
for (let lv = 0; lv < keys.length; lv++) {
const v = map.get(keys[lv]);
const clsNode = v.cls, regNode = v.reg, ctrNode = v.ctr;
const gh = clsNode.shape[2], gw = clsNode.shape[3], stride = strides[Math.min(lv, strides.length-1)];
const at = (node, ci, y, x) => node.data[ci * gh * gw + y * gw + x];
for (let y = 0; y < gh; y++) {
for (let x = 0; x < gw; x++) {
const ctr = sigmoid(at(ctrNode, 0, y, x));
let bestCls = -1, bestProb = -Infinity;
for (let k = 0; k < Math.min(clsCount, clsNode.shape[1]); k++) {
const p = sigmoid(at(clsNode, k, y, x));
if (p > bestProb) { bestProb = p; bestCls = k; }
}
const score = Math.sqrt(Math.max(0, bestProb * ctr));
if (score < confTh) continue;
const l = Math.max(0, at(regNode, 0, y, x));
const t = Math.max(0, at(regNode, 1, y, x));
const r = Math.max(0, at(regNode, 2, y, x));
const b = Math.max(0, at(regNode, 3, y, x));
const cx = (x + 0.5) * stride, cy = (y + 0.5) * stride;
const x1 = Math.max(0, Math.min(iw, cx - l));
const y1 = Math.max(0, Math.min(ih, cy - t));
const x2 = Math.max(0, Math.min(iw, cx + r));
const y2 = Math.max(0, Math.min(ih, cy + b));
if (x2 <= x1 || y2 <= y1) continue;
boxes.push({ cls: bestCls, score, x1, y1, x2, y2 });
}
}
}
return boxes;
}
function iou(a, b) {
const xx1 = Math.max(a.x1, b.x1), yy1 = Math.max(a.y1, b.y1);
const xx2 = Math.min(a.x2, b.x2), yy2 = Math.min(a.y2, b.y2);
const w = Math.max(0, xx2 - xx1), h = Math.max(0, yy2 - yy1);
const inter = w * h;
if (inter <= 0) return 0;
const areaA = Math.max(0, a.x2 - a.x1) * Math.max(0, a.y2 - a.y1);
const areaB = Math.max(0, b.x2 - b.x1) * Math.max(0, b.y2 - b.y1);
return inter / Math.max(1e-9, areaA + areaB - inter);
}
function nms(boxes, iouTh, maxOutCount) {
const byCls = new Map();
for (const b of boxes) { if (!byCls.has(b.cls)) byCls.set(b.cls, []); byCls.get(b.cls).push(b); }
const kept = [];
for (const arr0 of byCls.values()) {
const arr = arr0.slice().sort((a,b) => b.score - a.score);
const picked = [];
while (arr.length > 0) {
const cur = arr.shift();
picked.push(cur);
for (let i = arr.length - 1; i >= 0; i--) if (iou(cur, arr[i]) > iouTh) arr.splice(i, 1);
}
kept.push(...picked);
}
kept.sort((a,b) => b.score - a.score);
return kept.slice(0, maxOutCount);
}
function drawFrameWithBoxes(boxes) {
const vw = videoEl.videoWidth || 960;
const vh = videoEl.videoHeight || 540;
if (displayCanvas.width !== vw || displayCanvas.height !== vh) {
displayCanvas.width = vw;
displayCanvas.height = vh;
}
dctx.drawImage(videoEl, 0, 0, vw, vh);
const iw = Number(inW.value), ih = Number(inH.value);
const sx = vw / iw, sy = vh / ih;
for (const b of boxes) {
const x1 = b.x1 * sx, y1 = b.y1 * sy, x2 = b.x2 * sx, y2 = b.y2 * sy;
const w = Math.max(1, x2 - x1), h = Math.max(1, y2 - y1);
const hue = (b.cls * 47) % 360;
const color = `hsl(${hue} 90% 50%)`;
dctx.strokeStyle = color;
dctx.lineWidth = 2;
dctx.strokeRect(x1, y1, w, h);
const txt = `${b.cls}:${b.score.toFixed(3)}`;
dctx.font = "12px sans-serif";
dctx.fillStyle = color;
dctx.fillRect(x1, Math.max(0, y1 - 14), dctx.measureText(txt).width + 8, 14);
dctx.fillStyle = "#fff";
dctx.fillText(txt, x1 + 4, Math.max(10, y1 - 3));
}
}
async function inferCurrentFrame() {
const iw = Number(inW.value), ih = Number(inH.value);
inferCanvas.width = iw;
inferCanvas.height = ih;
ictx.drawImage(videoEl, 0, 0, iw, ih);
const raw = ictx.getImageData(0, 0, iw, ih).data;
let bytes;
if (imageFormat.value === "RAW8") {
bytes = new Uint8Array(iw * ih);
for (let i = 0, j = 0; i < raw.length; i += 4, j++) {
const r = raw[i], g = raw[i + 1], b = raw[i + 2];
bytes[j] = Math.max(0, Math.min(255, Math.round(0.299 * r + 0.587 * g + 0.114 * b)));
}
} else {
bytes = new Uint8Array(raw.buffer.slice(raw.byteOffset, raw.byteOffset + raw.byteLength));
}
const payload = {
model_id: Number(modelId.value),
image_format: imageFormat.value,
width: iw,
height: ih,
image_base64: bytesToBase64(bytes),
channels_ordering: "DEFAULT",
output_dtype: "float32"
};
const res = await fetch(`${baseUrl.value.replace(/\/$/, "")}/inference/run`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(payload)
});
const parsed = await res.json();
if (!parsed.ok) throw new Error(JSON.stringify(parsed.error));
const outputs = parseOutputs(parsed.data.outputs || []);
const clsCount = Number(numClasses.value);
const confTh = Number(scoreTh.value);
let rawBoxes = [];
if (modelType.value === "fcos") rawBoxes = decodeFcos(outputs, clsCount, iw, ih, confTh);
else rawBoxes = decodeYolo(outputs, modelType.value === "yolov5" ? "yolov5" : "tinyyolo", clsCount, iw, ih, confTh);
return nms(rawBoxes, Number(nmsTh.value), Number(maxBoxes.value));
}
async function loop() {
if (!running) return;
if (videoEl.paused || videoEl.ended) {
drawFrameWithBoxes(lastBoxes);
requestAnimationFrame(loop);
return;
}
frameIndex++;
if (frameIndex % Math.max(1, Number(sampleEveryN.value || 1)) === 0 && !inFlight) {
inFlight = true;
inferCount++;
try { lastBoxes = await inferCurrentFrame(); }
catch (e) { setStatus(`Inference failed:\n${String(e)}`, true); }
finally { inFlight = false; }
}
drawFrameWithBoxes(lastBoxes);
const sec = (performance.now() - startTs) / 1000;
const apiFps = inferCount / Math.max(sec, 0.001);
setStatus(
`source=${sourceType.value}\n` +
`frame=${frameIndex}\n` +
`infer_count=${inferCount}\n` +
`api_fps=${apiFps.toFixed(2)}\n` +
`boxes=${lastBoxes.length}\n` +
`video_time=${videoEl.currentTime.toFixed(2)}s`
);
requestAnimationFrame(loop);
}
async function stopMediaSource() {
if (webcamStream) {
webcamStream.getTracks().forEach(t => t.stop());
webcamStream = null;
}
if (currentBlobUrl) {
URL.revokeObjectURL(currentBlobUrl);
currentBlobUrl = "";
}
try { videoEl.pause(); } catch {}
videoEl.srcObject = null;
videoEl.removeAttribute("src");
}
function stop() {
running = false;
inFlight = false;
stopMediaSource();
}
sourceType.addEventListener("change", updateSourceUI);
refreshCamBtn.addEventListener("click", async () => {
await listWebcams();
setStatus("Webcam list refreshed.");
});
stopBtn.addEventListener("click", stop);
startBtn.addEventListener("click", async () => {
try {
await stopMediaSource();
const isWebcam = sourceType.value === "webcam";
if (isWebcam) {
const constraints = {
video: webcamDevice.value
? {
deviceId: { exact: webcamDevice.value },
width: { ideal: Number(inW.value) || 640 },
height: { ideal: Number(inH.value) || 640 }
}
: true,
audio: false
};
webcamStream = await navigator.mediaDevices.getUserMedia(constraints);
videoEl.srcObject = webcamStream;
} else {
if (!videoFile.files || !videoFile.files[0]) {
setStatus("Please choose a video file first.", true);
return;
}
currentBlobUrl = URL.createObjectURL(videoFile.files[0]);
videoEl.src = currentBlobUrl;
}
videoEl.muted = true;
videoEl.playsInline = true;
try { videoEl.currentTime = 0; } catch {}
await videoEl.play();
running = true;
frameIndex = -1;
inferCount = 0;
lastBoxes = [];
startTs = performance.now();
setStatus("Running inference...");
requestAnimationFrame(loop);
} catch (e) {
setStatus(`Start failed:\n${String(e)}`, true);
}
});
window.addEventListener("beforeunload", () => { stop(); });
updateSourceUI();
listWebcams();
setStatus("Ready. Choose source and click Start.");
modelType.dispatchEvent(new Event("change"));
</script>
</body>
</html>

Binary file not shown.

After

Width:  |  Height:  |  Size: 147 KiB

File diff suppressed because one or more lines are too long

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.8 MiB

File diff suppressed because one or more lines are too long

Binary file not shown.

View File

@ -0,0 +1,29 @@
# Model/Image Test Pairs (from kneron_plus examples)
## KL520
- YOLOv5 (model zoo)
- Model: `res/models/KL520/yolov5-noupsample_w640h640_kn-model-zoo/kl520_20005_yolov5-noupsample_w640h640.nef`
- Image: `res/images/one_bike_many_cars_800x800.bmp`
- Source: `examples_model_zoo/kl520_kn-model-zoo_generic_inference_post_yolov5/kl520_kn-model-zoo_generic_inference_post_yolov5.c`
- FCOS (model zoo)
- Model: `res/models/KL520/fcos-drk53s_w512h512_kn-model-zoo/kl520_20004_fcos-drk53s_w512h512.nef`
- Image: `res/images/one_bike_many_cars_800x800.bmp`
- Source: `examples_model_zoo/kl520_kn-model-zoo_generic_inference_post_fcos/kl520_kn-model-zoo_generic_inference_post_fcos.c`
- Tiny YOLO v3 (generic demo)
- Model: `res/models/KL520/tiny_yolo_v3/models_520.nef`
- Image: `res/images/bike_cars_street_224x224.bmp`
- Source: `examples/kl520_demo_app_yolo_inference/kl520_demo_app_yolo_inference.c`
- Tiny YOLO v3 (multithread demo)
- Model: `res/models/KL520/tiny_yolo_v3/models_520.nef`
- Image: `res/images/bike_cars_street_224x224.bmp`
- Source: `examples/kl520_demo_app_yolo_inference_multithread/kl520_demo_app_yolo_inference_multithread.c`
## KL720
- YOLOv5 (model zoo)
- Model: `res/models/KL720/yolov5-noupsample_w640h640_kn-model-zoo/kl720_20005_yolov5-noupsample_w640h640.nef`
- Image: `res/images/one_bike_many_cars_800x800.bmp`
- Source: `examples_model_zoo/kl720_kn-model-zoo_generic_inference_post_yolov5/kl720_kn-model-zoo_generic_inference_post_yolov5.c`
- FCOS (model zoo)
- Model: `res/models/KL720/fcos-drk53s_w512h512_kn-model-zoo/kl720_20004_fcos-drk53s_w512h512.nef`
- Image: `res/images/one_bike_many_cars_800x800.bmp`
- Source: `examples_model_zoo/kl720_kn-model-zoo_generic_inference_post_fcos/kl720_kn-model-zoo_generic_inference_post_fcos.c`

View File

@ -0,0 +1,514 @@
from __future__ import annotations
import argparse
import base64
import json
import math
import sys
import time
import urllib.error
import urllib.request
from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence, Tuple
import cv2
import numpy as np
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from LocalAPI import postprocess_core as core
YOLO_DEFAULT_ANCHORS: List[List[Tuple[float, float]]] = [
[(10.0, 14.0), (23.0, 27.0), (37.0, 58.0)],
[(81.0, 82.0), (135.0, 169.0), (344.0, 319.0)],
]
def _sigmoid(v: np.ndarray | float) -> np.ndarray | float:
return 1.0 / (1.0 + np.exp(-v))
def _encode_frame(frame_bgr: np.ndarray, image_format: str) -> bytes:
fmt = image_format.upper()
if fmt == "RGBA8888":
rgba = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGBA)
return rgba.tobytes()
if fmt == "RAW8":
gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
return gray.tobytes()
if fmt == "RGB565":
bgr565 = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2BGR565)
return bgr565.tobytes()
raise ValueError(f"Unsupported image_format: {image_format}")
def _call_inference_run(
base_url: str,
model_id: int,
image_format: str,
width: int,
height: int,
image_bytes: bytes,
channels_ordering: str = "DEFAULT",
output_dtype: str = "float32",
timeout_sec: float = 20.0,
) -> Dict[str, Any]:
body = {
"model_id": model_id,
"image_format": image_format,
"width": width,
"height": height,
"image_base64": base64.b64encode(image_bytes).decode("ascii"),
"channels_ordering": channels_ordering,
"output_dtype": output_dtype,
}
req = urllib.request.Request(
url=f"{base_url.rstrip('/')}/inference/run",
data=json.dumps(body).encode("utf-8"),
headers={"Content-Type": "application/json"},
method="POST",
)
try:
with urllib.request.urlopen(req, timeout=timeout_sec) as resp:
content = resp.read().decode("utf-8", errors="replace")
except urllib.error.HTTPError as exc:
msg = exc.read().decode("utf-8", errors="replace")
raise RuntimeError(f"HTTP {exc.code}: {msg}") from exc
except urllib.error.URLError as exc:
raise RuntimeError(f"Request failed: {exc}") from exc
parsed = json.loads(content)
if not parsed.get("ok"):
raise RuntimeError(json.dumps(parsed.get("error"), ensure_ascii=False))
return parsed["data"]
def _decode_outputs(raw_outputs: Sequence[Dict[str, Any]]) -> List[Dict[str, Any]]:
decoded: List[Dict[str, Any]] = []
for idx, o in enumerate(raw_outputs):
shape = list(o.get("shape") or [])
data_b64 = str(o.get("data_base64") or "")
raw = base64.b64decode(data_b64)
arr = np.frombuffer(raw, dtype="<f4")
expected = int(np.prod(shape)) if shape else arr.size
if expected != arr.size:
raise RuntimeError(f"Output node {idx} size mismatch: expected={expected}, got={arr.size}")
decoded.append(
{
"idx": idx,
"node_idx": int(o.get("node_idx", idx)),
"shape": shape,
"data": arr,
}
)
return decoded
def _pick_yolo_nodes(all_nodes: Sequence[Dict[str, Any]], num_classes: int) -> List[Dict[str, Any]]:
picked: List[Dict[str, Any]] = []
for o in all_nodes:
shape = o["shape"]
if len(shape) != 4 or shape[0] != 1:
continue
ch = int(shape[1])
if ch % (5 + num_classes) != 0:
continue
picked.append(o)
picked.sort(key=lambda n: int(n["shape"][2]), reverse=True)
return picked
def _decode_yolo_common(
all_nodes: Sequence[Dict[str, Any]],
mode: str,
num_classes: int,
input_w: int,
input_h: int,
conf_th: float,
use_sigmoid: bool = True,
use_xy_sigmoid: bool = True,
score_mode: str = "obj_cls",
anchors_by_level: Optional[List[List[Tuple[float, float]]]] = None,
) -> List[Box]:
nodes = _pick_yolo_nodes(all_nodes, num_classes)
if not nodes:
raise RuntimeError("No YOLO-like [1,C,H,W] output nodes found")
anchors_levels = anchors_by_level or YOLO_DEFAULT_ANCHORS
boxes: List[Box] = []
attrs = 5 + num_classes
for lv, o in enumerate(nodes):
_, ch, gh, gw = o["shape"]
na = int(ch // attrs)
data: np.ndarray = o["data"]
anchors = anchors_levels[min(lv, len(anchors_levels) - 1)]
def at(channel_idx: int, y: int, x: int) -> float:
return float(data[channel_idx * gh * gw + y * gw + x])
for a in range(na):
aw, ah = anchors[min(a, len(anchors) - 1)]
base = a * attrs
for y in range(gh):
for x in range(gw):
tx = at(base + 0, y, x)
ty = at(base + 1, y, x)
tw = at(base + 2, y, x)
th = at(base + 3, y, x)
to = at(base + 4, y, x)
obj = float(_sigmoid(to) if use_sigmoid else to)
best_cls = -1
best_prob = -1e9
for k in range(num_classes):
p = at(base + 5 + k, y, x)
p = float(_sigmoid(p) if use_sigmoid else p)
if p > best_prob:
best_prob = p
best_cls = k
if score_mode == "obj":
score = obj
elif score_mode == "cls":
score = best_prob
else:
score = obj * best_prob
if score < conf_th:
continue
if mode == "yolov5":
sx = input_w / gw
sy = input_h / gh
txv = float(_sigmoid(tx) if use_xy_sigmoid else tx)
tyv = float(_sigmoid(ty) if use_xy_sigmoid else ty)
bx = (txv * 2.0 - 0.5 + x) * sx
by = (tyv * 2.0 - 0.5 + y) * sy
bw = (float(_sigmoid(tw)) * 2.0) ** 2 * aw
bh = (float(_sigmoid(th)) * 2.0) ** 2 * ah
else:
txv = float(_sigmoid(tx) if use_xy_sigmoid else tx)
tyv = float(_sigmoid(ty) if use_xy_sigmoid else ty)
bx = (txv + x) / gw * input_w
by = (tyv + y) / gh * input_h
bw = aw * math.exp(tw)
bh = ah * math.exp(th)
boxes.append(
Box(
cls=best_cls,
score=score,
x1=bx - bw / 2.0,
y1=by - bh / 2.0,
x2=bx + bw / 2.0,
y2=by + bh / 2.0,
)
)
return boxes
def _auto_fcos_indices(all_nodes: Sequence[Dict[str, Any]], num_classes: int) -> List[Tuple[int, int, int, int]]:
valid = [o for o in all_nodes if len(o["shape"]) == 4 and o["shape"][0] == 1]
cls_nodes = [o for o in valid if int(o["shape"][1]) == num_classes]
reg_nodes = [o for o in valid if int(o["shape"][1]) == 4]
ctr_nodes = [o for o in valid if int(o["shape"][1]) == 1]
by_hw: Dict[Tuple[int, int], Dict[str, Dict[str, Any]]] = {}
for n in cls_nodes:
by_hw.setdefault((int(n["shape"][2]), int(n["shape"][3])), {})["cls"] = n
for n in reg_nodes:
by_hw.setdefault((int(n["shape"][2]), int(n["shape"][3])), {})["reg"] = n
for n in ctr_nodes:
by_hw.setdefault((int(n["shape"][2]), int(n["shape"][3])), {})["ctr"] = n
levels: List[Tuple[int, int, int, int]] = []
for (h, w), items in by_hw.items():
if not {"cls", "reg", "ctr"}.issubset(items.keys()):
continue
levels.append(
(
h,
int(items["cls"]["node_idx"]),
int(items["reg"]["node_idx"]),
int(items["ctr"]["node_idx"]),
)
)
levels.sort(key=lambda x: x[0], reverse=True)
strides = [8, 16, 32, 64, 128]
return [(cls_i, reg_i, ctr_i, strides[min(i, len(strides) - 1)]) for i, (_, cls_i, reg_i, ctr_i) in enumerate(levels)]
def _decode_fcos(
all_nodes: Sequence[Dict[str, Any]],
num_classes: int,
input_w: int,
input_h: int,
conf_th: float,
use_sigmoid: bool = True,
score_mode: str = "obj_cls",
) -> List[Box]:
levels = _auto_fcos_indices(all_nodes, num_classes)
if not levels:
raise RuntimeError("Cannot auto match FCOS cls/reg/ctr nodes")
boxes: List[Box] = []
by_idx = {int(n["node_idx"]): n for n in all_nodes}
for cls_idx, reg_idx, ctr_idx, stride in levels:
cls_node = by_idx.get(cls_idx)
reg_node = by_idx.get(reg_idx)
ctr_node = by_idx.get(ctr_idx)
if not cls_node or not reg_node or not ctr_node:
continue
gh = int(cls_node["shape"][2])
gw = int(cls_node["shape"][3])
cls_data: np.ndarray = cls_node["data"]
reg_data: np.ndarray = reg_node["data"]
ctr_data: np.ndarray = ctr_node["data"]
def at(node_data: np.ndarray, channel_idx: int, y: int, x: int) -> float:
return float(node_data[channel_idx * gh * gw + y * gw + x])
cls_channels = int(cls_node["shape"][1])
for y in range(gh):
for x in range(gw):
ctr = at(ctr_data, 0, y, x)
ctr = float(_sigmoid(ctr) if use_sigmoid else ctr)
best_cls = -1
best_prob = -1e9
for k in range(min(num_classes, cls_channels)):
p = at(cls_data, k, y, x)
p = float(_sigmoid(p) if use_sigmoid else p)
if p > best_prob:
best_prob = p
best_cls = k
if score_mode == "obj":
score = ctr
elif score_mode == "cls":
score = best_prob
else:
score = math.sqrt(max(0.0, best_prob * ctr))
if score < conf_th:
continue
l = max(0.0, at(reg_data, 0, y, x))
t = max(0.0, at(reg_data, 1, y, x))
r = max(0.0, at(reg_data, 2, y, x))
b = max(0.0, at(reg_data, 3, y, x))
cx = (x + 0.5) * stride
cy = (y + 0.5) * stride
x1 = max(0.0, min(input_w, cx - l))
y1 = max(0.0, min(input_h, cy - t))
x2 = max(0.0, min(input_w, cx + r))
y2 = max(0.0, min(input_h, cy + b))
if x2 <= x1 or y2 <= y1:
continue
boxes.append(Box(cls=best_cls, score=score, x1=x1, y1=y1, x2=x2, y2=y2))
return boxes
def _iou(a: Box, b: Box) -> float:
xx1 = max(a.x1, b.x1)
yy1 = max(a.y1, b.y1)
xx2 = min(a.x2, b.x2)
yy2 = min(a.y2, b.y2)
w = max(0.0, xx2 - xx1)
h = max(0.0, yy2 - yy1)
inter = w * h
if inter <= 0:
return 0.0
area_a = max(0.0, a.x2 - a.x1) * max(0.0, a.y2 - a.y1)
area_b = max(0.0, b.x2 - b.x1) * max(0.0, b.y2 - b.y1)
return inter / max(1e-9, area_a + area_b - inter)
def _nms(boxes: Sequence[Box], iou_th: float, max_out: int) -> List[Box]:
by_cls: Dict[int, List[Box]] = {}
for b in boxes:
by_cls.setdefault(b.cls, []).append(b)
kept: List[Box] = []
for cls_boxes in by_cls.values():
cls_boxes = sorted(cls_boxes, key=lambda b: b.score, reverse=True)
picked: List[Box] = []
while cls_boxes:
cur = cls_boxes.pop(0)
picked.append(cur)
cls_boxes = [b for b in cls_boxes if _iou(cur, b) <= iou_th]
kept.extend(picked)
kept.sort(key=lambda b: b.score, reverse=True)
return kept[:max_out]
def _draw_boxes(frame: np.ndarray, boxes: Sequence[core.Box], input_w: int, input_h: int) -> np.ndarray:
out = frame.copy()
h, w = out.shape[:2]
sx = w / float(input_w)
sy = h / float(input_h)
for b in boxes:
x1 = int(max(0, min(w - 1, round(b.x1 * sx))))
y1 = int(max(0, min(h - 1, round(b.y1 * sy))))
x2 = int(max(0, min(w - 1, round(b.x2 * sx))))
y2 = int(max(0, min(h - 1, round(b.y2 * sy))))
if x2 <= x1 or y2 <= y1:
continue
color = tuple(int(c) for c in cv2.cvtColor(np.uint8([[[b.cls * 47 % 180, 255, 220]]]), cv2.COLOR_HSV2BGR)[0][0])
cv2.rectangle(out, (x1, y1), (x2, y2), color, 2)
text = f"{b.cls}:{b.score:.3f}"
cv2.putText(out, text, (x1, max(14, y1 - 4)), cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2, cv2.LINE_AA)
return out
def _pick_video_via_dialog() -> Optional[str]:
try:
import tkinter as tk
from tkinter import filedialog
except Exception:
return None
root = tk.Tk()
root.withdraw()
path = filedialog.askopenfilename(
title="Select video file",
filetypes=[("Video files", "*.mp4 *.avi *.mov *.mkv *.wmv"), ("All files", "*.*")],
)
root.destroy()
return path or None
def _defaults_for_model(model_type: str) -> Tuple[int, int]:
mt = model_type.lower()
if mt == "fcos":
return 512, 512
if mt == "tinyyolo":
return 224, 224
return 640, 640
def main() -> None:
parser = argparse.ArgumentParser(description="Video -> /inference/run -> draw detection boxes")
parser.add_argument("--base-url", default="http://127.0.0.1:4398")
parser.add_argument("--video", default="")
parser.add_argument("--model-id", type=int, required=True)
parser.add_argument("--model-type", choices=["yolov5", "fcos", "tinyyolo"], default="yolov5")
parser.add_argument("--input-width", type=int, default=0)
parser.add_argument("--input-height", type=int, default=0)
parser.add_argument("--image-format", default="RGBA8888")
parser.add_argument("--num-classes", type=int, default=80)
parser.add_argument("--score-th", type=float, default=0.25)
parser.add_argument("--iou-th", type=float, default=0.45)
parser.add_argument("--max-boxes", type=int, default=200)
parser.add_argument("--sample-every-n", type=int, default=3)
parser.add_argument("--save-output", default="")
args = parser.parse_args()
video_path = args.video.strip() or _pick_video_via_dialog()
if not video_path:
raise SystemExit("No video selected")
if not Path(video_path).is_file():
raise SystemExit(f"Video not found: {video_path}")
default_w, default_h = _defaults_for_model(args.model_type)
in_w = int(args.input_width or default_w)
in_h = int(args.input_height or default_h)
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
raise SystemExit(f"Cannot open video: {video_path}")
writer: Optional[cv2.VideoWriter] = None
if args.save_output:
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
fps = float(cap.get(cv2.CAP_PROP_FPS) or 20.0)
frame_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) or in_w)
frame_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) or in_h)
writer = cv2.VideoWriter(args.save_output, fourcc, fps, (frame_w, frame_h))
print("Press 'q' to quit.")
frame_idx = -1
infer_count = 0
last_boxes: List[core.Box] = []
t0 = time.time()
try:
while True:
ok, frame = cap.read()
if not ok:
break
frame_idx += 1
if frame_idx % max(1, args.sample_every_n) == 0:
infer_count += 1
resized = cv2.resize(frame, (in_w, in_h), interpolation=cv2.INTER_AREA)
image_bytes = _encode_frame(resized, args.image_format)
try:
result = _call_inference_run(
base_url=args.base_url,
model_id=args.model_id,
image_format=args.image_format,
width=in_w,
height=in_h,
image_bytes=image_bytes,
)
raw_outputs = result.get("outputs") or []
outputs = core.decode_outputs(raw_outputs)
if args.model_type == "fcos":
raw_boxes = core.decode_fcos(
outputs,
num_classes=args.num_classes,
input_w=in_w,
input_h=in_h,
conf_th=args.score_th,
)
else:
raw_boxes = core.decode_yolo_common(
outputs,
mode="yolov5" if args.model_type == "yolov5" else "tinyyolo",
num_classes=args.num_classes,
input_w=in_w,
input_h=in_h,
conf_th=args.score_th,
)
last_boxes = core.nms(raw_boxes, iou_th=args.iou_th, max_out=args.max_boxes)
except Exception as exc:
print(f"[frame {frame_idx}] inference failed: {exc}")
vis = _draw_boxes(frame, last_boxes, in_w, in_h)
elapsed = max(1e-6, time.time() - t0)
api_fps = infer_count / elapsed
cv2.putText(
vis,
f"frame={frame_idx} infer={infer_count} api_fps={api_fps:.2f} boxes={len(last_boxes)}",
(10, 24),
cv2.FONT_HERSHEY_SIMPLEX,
0.65,
(0, 255, 0),
2,
cv2.LINE_AA,
)
cv2.imshow("Kneron Video Inference Viewer", vis)
if writer is not None:
writer.write(vis)
key = cv2.waitKey(1) & 0xFF
if key == ord("q"):
break
finally:
cap.release()
if writer is not None:
writer.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()

View File

@ -0,0 +1 @@
2.2.0

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1 @@
SDK-v2.5.7

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1 @@
2.2.0

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1 @@
SDK-v1.3.0

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,16 @@
# Core SDK (installed via local wheel; see STRATEGY.md)
# KneronPLUS==3.0.0
# HTTP service
fastapi
uvicorn
python-multipart
# Reference packages from C:\Users\user\Documents\KNEOX\README.md
PyQt5
opencv-python
pyinstaller
pyarmor
# Common dependency for kp data handling
numpy

View File

@ -0,0 +1 @@
3.0.0

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.