diff --git a/README.md b/README.md index 087fd3b..de7660d 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,28 @@ ONNX → BIE → NEF。系統以 Scheduler 為控制面,搭配 Worker Pool 與 7) NEF Worker 執行完成 8) Scheduler 標記 COMPLETED +## Worker API Flags(可選) +這些旗標用於控制 evaluator / simulator 步驟。皆有預設值,不填不影響既有流程。 + +- ONNX `/api/onnx/process` + - `enable_evaluate` (default: `false`): 是否執行 IP evaluator(原 Web GUI 流程為 OFF) + - `enable_sim_fp` (default: `false`): 是否執行浮點 E2E 模擬(尚未接線) +- BIE `/api/bie/process` + - `enable_sim_fixed` (default: `false`): 是否執行定點 E2E 模擬(尚未接線) +- NEF `/api/nef/process` + - `enable_sim_hw` (default: `false`): 是否執行硬體 E2E 模擬(尚未接線) + +## 流程預設開關對照(原 Web GUI vs 現在 Workers) +| 步驟 | 原 Web GUI 預設 | 現在 Workers 預設 | 開關 | +|---|---|---|---| +| ONNX 轉換/最佳化 | ON | ON | 無 | +| IP Evaluator | OFF | OFF | `enable_evaluate` | +| FP E2E 模擬 | OFF | OFF | `enable_sim_fp` | +| BIE 量化 | ON | ON | 無 | +| Fixed-Point E2E 模擬 | OFF | OFF | `enable_sim_fixed` | +| NEF Compile | ON | ON | 無 | +| HW E2E 模擬 | OFF | OFF | `enable_sim_hw` | + ## 非目標 - 不做任務持久化 - 不做 crash 後 resume diff --git a/docs/Design.md b/docs/Design.md index 274bd67..f77f80b 100644 --- a/docs/Design.md +++ b/docs/Design.md @@ -165,16 +165,34 @@ error: - 輸入:工作目錄下的唯一檔案(不假設檔名 / 副檔名) - 輸出:`out.onnx` - 輸出位置:同一工作目錄 +- 可選旗標: + - `enable_evaluate` (default: `false`):是否執行 IP evaluator(原 Web GUI 流程為 OFF) + - `enable_sim_fp` (default: `false`):是否執行浮點 E2E 模擬(尚未接線) ### 4.1.3 BIE Worker - 輸入:`out.onnx` + `ref_images/*` - 輸出:`out.bie` - 輸出位置:同一工作目錄 +- 可選旗標: + - `enable_sim_fixed` (default: `false`):是否執行定點 E2E 模擬(尚未接線) ### 4.1.4 NEF Worker - 輸入:`out.bie` - 輸出:`out.nef` - 輸出位置:同一工作目錄 +- 可選旗標: + - `enable_sim_hw` (default: `false`):是否執行硬體 E2E 模擬(尚未接線) + +### 4.1.6 流程預設開關對照(原 Web GUI vs 現在 Workers) +| 步驟 | 原 Web GUI 預設 | 現在 Workers 預設 | 開關 | +|---|---|---|---| +| ONNX 轉換/最佳化 | ON | ON | 無 | +| IP Evaluator | OFF | OFF | `enable_evaluate` | +| FP E2E 模擬 | OFF | OFF | `enable_sim_fp` | +| BIE 量化 | ON | ON | 無 | +| Fixed-Point E2E 模擬 | OFF | OFF | `enable_sim_fixed` | +| NEF Compile | ON | ON | 無 | +| HW E2E 模擬 | OFF | OFF | `enable_sim_hw` | ### 4.1.5 Core / Toolchain 路徑一致性 - Worker 需將工作目錄 path 傳給 core diff --git a/docs/flow_modularization_notes.md b/docs/flow_modularization_notes.md index 83d6d63..f2d4106 100644 --- a/docs/flow_modularization_notes.md +++ b/docs/flow_modularization_notes.md @@ -245,7 +245,41 @@ If a file currently mixes multiple module responsibilities: - Move high-risk dependencies first (prebuilt calls, sys_flow usage). - After each phase, re-check boundaries and adjust. -## 6) Minimum Viable API Proposal +## 7) Current Structure and Replacement Strategy (As-Is) +Based on the refactor just completed, the effective call chain is: + +``` +workers (ONNX/BIE/NEF) + -> backends (interfaces + Kneron implementations) + -> ktc (toolchain python API) + -> vendor sys_flow / libs / libs_V2 / prebuilt binaries +``` + +### 7.1 What this means today +- Workers only depend on **backend interfaces**. They no longer call `ktc.ModelConfig` directly. +- Kneron specifics are concentrated in backend implementations. +- `ktc` still wraps the Kneron toolchain and binaries; that dependency remains, but it is **now isolated**. + +### 7.2 How to replace later +1) **Replace backend implementations** (lowest-risk) + - Keep backend interfaces stable. + - Swap `Kneron*Backend` for `Your*Backend` without touching workers. + +2) **Keep backend layer, but replace `ktc` calls** + - Modify `Kneron*Backend` to call your own library instead of `ktc`. + - Workers stay unchanged; only backend code moves. + +3) **Introduce multiple backends** + - Add `get_*_backend(name=...)` selection based on config/env. + - Allows mixed runs: Kneron for NEF, OSS for ONNX, etc. + +### 7.3 Where to implement replacements +- `services/backends/quantization.py` +- `services/backends/compiler.py` +- `services/backends/evaluator.py` +- `services/backends/simulator.py` + +## 8) Minimum Viable API Proposal Keep it minimal to avoid churn: ```python @@ -260,6 +294,24 @@ class CompilerBackend: Then the pipeline is just a pure composition of these two + ONNX ops. +## 9) What This Enables +- Replace ONNX converters / optimizers without touching quantization. +- Run ONNX flow in pure OSS environments (CI, dev) without Kneron binaries. +- Swap in future Kneron versions only inside backend adapters. +- Experiment with alternative quantization or compiler backends. + +--- + +## 10) Next Steps (if you want) +I can draft the following next: +1) A small refactor plan with concrete file edits and minimal API changes. +2) A diagram (Mermaid) of the new modular flow. +3) A compatibility matrix (current vs target dependencies per module). + +Tell me which one you want, and I’ll prepare it. + +Then the pipeline is just a pure composition of these two + ONNX ops. + ## 7) What This Enables - Replace ONNX converters / optimizers without touching quantization. - Run ONNX flow in pure OSS environments (CI, dev) without Kneron binaries. diff --git a/docs/refactor_progress.md b/docs/refactor_progress.md new file mode 100644 index 0000000..6f780e1 --- /dev/null +++ b/docs/refactor_progress.md @@ -0,0 +1,45 @@ +# Refactor Progress Log + +## 2026-02-05 +- Started modularization refactor per `docs/flow_modularization_notes.md`. +- Goal: introduce backend interfaces, decouple ONNX evaluation, keep behavior stable. + +### Planned Steps +1) Create backend interfaces (quantization/compiler, optional evaluator/simulator). +2) Update ONNX/BIE/NEF workers to use backends and make eval optional. +3) Review boundaries and document issues. + +### Issues / Risks +- None yet. + +## 2026-02-05 Update +- Added backend interfaces under `services/backends`. +- ONNX worker now makes IP evaluation optional via `parameters.enable_evaluate`. +- BIE/NEF workers now call backend interfaces instead of direct `ModelConfig` usage. + +### Issues / Risks +- `services/workers/onnx/core.py` now sets `eval_report` to empty string when disabled; check callers if they rely on non-empty. +- Quantization backend supports optional `onnx_model` to avoid duplicate optimization. + +## 2026-02-05 Update 2 +- Added explicit request flags for evaluator/simulator toggles in worker schemas: + - ONNX: `enable_evaluate`, `enable_sim_fp` + - BIE: `enable_sim_fixed` + - NEF: `enable_sim_hw` + +### Issues / Risks +- Simulator flags are defined but not yet wired to execution paths. + +## 2026-02-05 Update 3 +- Documented worker API flags in `README.md` and `docs/Design.md`. + +## 2026-02-05 Update 4 +- Set `enable_evaluate` default to `false` to match original Web GUI flow. +- Documented original Web GUI ON/OFF expectations in `README.md` and `docs/Design.md`. + +## 2026-02-05 Update 5 +- Added ON/OFF comparison table for original Web GUI vs current workers in `README.md` and `docs/Design.md`. + +## 2026-02-05 Update 6 +- Default `enable_evaluate` in `process_onnx_core` set to `False` to match Web GUI defaults. +- Full worker test set passed (onnx/bie/nef/e2e/e2e-tflite). diff --git a/services/backends/__init__.py b/services/backends/__init__.py new file mode 100644 index 0000000..dc1db46 --- /dev/null +++ b/services/backends/__init__.py @@ -0,0 +1 @@ +"""Backend interfaces and implementations.""" diff --git a/services/backends/compiler.py b/services/backends/compiler.py new file mode 100644 index 0000000..29d545e --- /dev/null +++ b/services/backends/compiler.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +from typing import Protocol + + +class CompilerBackend(Protocol): + def compile(self, bie_path: str, output_dir: str, **kwargs) -> str: + """Compile BIE into NEF and return the generated NEF path.""" + + +class KneronCompilerBackend: + def compile(self, bie_path: str, output_dir: str, **kwargs) -> str: + import ktc + + km = ktc.ModelConfig( + kwargs["model_id"], + kwargs["version"], + kwargs["platform"], + bie_path=bie_path, + ) + return ktc.compile([km], output_dir=output_dir or None) + + +def get_compiler_backend(name: str | None = None) -> CompilerBackend: + _ = name + return KneronCompilerBackend() diff --git a/services/backends/evaluator.py b/services/backends/evaluator.py new file mode 100644 index 0000000..6fd2cd8 --- /dev/null +++ b/services/backends/evaluator.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +from typing import Protocol + + +class EvaluatorBackend(Protocol): + def evaluate(self, onnx_path: str, **kwargs) -> str: + """Run IP evaluation and return a report string.""" + + +class KneronEvaluatorBackend: + def evaluate(self, onnx_path: str, **kwargs) -> str: + import ktc + + km = ktc.ModelConfig( + kwargs["model_id"], + kwargs["version"], + kwargs["platform"], + onnx_path=onnx_path, + ) + return km.evaluate() + + +def get_evaluator_backend(name: str | None = None) -> EvaluatorBackend: + _ = name + return KneronEvaluatorBackend() diff --git a/services/backends/quantization.py b/services/backends/quantization.py new file mode 100644 index 0000000..be2f14a --- /dev/null +++ b/services/backends/quantization.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +from typing import Dict, Protocol + + +class QuantizationBackend(Protocol): + def analyze( + self, + onnx_path: str, + input_mapping: Dict, + output_dir: str, + **kwargs, + ) -> str: + """Run quantization and return the generated BIE path.""" + + +class KneronQuantizationBackend: + def analyze( + self, + onnx_path: str, + input_mapping: Dict, + output_dir: str, + **kwargs, + ) -> str: + import ktc + + model = kwargs.get("onnx_model") + if model is None: + import onnx + + model = onnx.load(onnx_path) + model = ktc.onnx_optimizer.onnx2onnx_flow(model, eliminate_tail=True, opt_matmul=True) + + km = ktc.ModelConfig( + kwargs["model_id"], + kwargs["version"], + kwargs["platform"], + onnx_model=model, + ) + return km.analysis(input_mapping, output_dir=output_dir) + + +def get_quantization_backend(name: str | None = None) -> QuantizationBackend: + # Placeholder for future backend selection logic. + _ = name + return KneronQuantizationBackend() diff --git a/services/backends/simulator.py b/services/backends/simulator.py new file mode 100644 index 0000000..b4fb077 --- /dev/null +++ b/services/backends/simulator.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +from typing import Protocol, Sequence + + +class SimulatorBackend(Protocol): + def simulate(self, input_data: Sequence, **kwargs): + """Run E2E simulation and return results.""" + + +class KneronSimulatorBackend: + def simulate(self, input_data: Sequence, **kwargs): + import ktc + + return ktc.kneron_inference( + input_data, + onnx_file=kwargs.get("onnx_file"), + bie_file=kwargs.get("bie_file"), + nef_file=kwargs.get("nef_file"), + input_names=kwargs.get("input_names"), + platform=kwargs.get("platform"), + model_id=kwargs.get("model_id"), + ) + + +def get_simulator_backend(name: str | None = None) -> SimulatorBackend: + _ = name + return KneronSimulatorBackend() diff --git a/services/workers/bie/core.py b/services/workers/bie/core.py index 8ee8677..d5c5217 100644 --- a/services/workers/bie/core.py +++ b/services/workers/bie/core.py @@ -46,14 +46,7 @@ def process_bie_core( input_node_height = input_node.type.tensor_type.shape.dim[2].dim_value input_node_width = input_node.type.tensor_type.shape.dim[3].dim_value - km = ktc.ModelConfig( - parameters["model_id"], - parameters["version"], - parameters["platform"], - onnx_model=model, - ) - - img_list = [] + img_list = [] for dir_path, _, file_names in os.walk(data_dir): for file_name in file_names: fullpath = os.path.join(dir_path, file_name) @@ -66,7 +59,18 @@ def process_bie_core( ) img_list.append(img_data) - bie_model_path = km.analysis({input_node_name: img_list}, output_dir=output_dir or ".") + from services.backends.quantization import get_quantization_backend + + backend = get_quantization_backend() + bie_model_path = backend.analyze( + onnx_file_path, + {input_node_name: img_list}, + output_dir or ".", + onnx_model=model, + model_id=parameters["model_id"], + version=parameters["version"], + platform=parameters["platform"], + ) if os.path.abspath(bie_model_path) != os.path.abspath(output_path): # Move to avoid keeping duplicate large binaries on disk. diff --git a/services/workers/bie/main.py b/services/workers/bie/main.py index 2deb89d..e658fcc 100644 --- a/services/workers/bie/main.py +++ b/services/workers/bie/main.py @@ -58,12 +58,16 @@ class HealthResponse(BaseModel): timestamp: str active_tasks: int -class BIEProcessRequest(BaseModel): - onnx_file_id: str - model_id: int = Field(..., ge=1, le=65535) - version: str = Field(..., regex=r'^[0-9a-fA-F]{4}$') - platform: str = Field(..., regex=r'^(520|720|530|630|730)$') - data_dir: str = Field(..., min_length=1) +class BIEProcessRequest(BaseModel): + onnx_file_id: str + model_id: int = Field(..., ge=1, le=65535) + version: str = Field(..., regex=r'^[0-9a-fA-F]{4}$') + platform: str = Field(..., regex=r'^(520|720|530|630|730)$') + data_dir: str = Field(..., min_length=1) + enable_sim_fixed: bool = Field( + False, + description="Run fixed-point E2E simulation after quantization (not yet wired).", + ) class TaskStatusResponse(BaseModel): task_id: str diff --git a/services/workers/nef/core.py b/services/workers/nef/core.py index 1be650f..335756b 100644 --- a/services/workers/nef/core.py +++ b/services/workers/nef/core.py @@ -23,16 +23,16 @@ def process_nef_core( os.environ.setdefault("KTC_WORKDIR", work_dir) os.environ.setdefault("KTC_SCRIPT_RES", res_dir) - import ktc - - km = ktc.ModelConfig( - parameters["model_id"], - parameters["version"], - parameters["platform"], - bie_path=bie_file_path, - ) - - nef_model_path = ktc.compile([km], output_dir=output_dir or None) + from services.backends.compiler import get_compiler_backend + + backend = get_compiler_backend() + nef_model_path = backend.compile( + bie_file_path, + output_dir or None, + model_id=parameters["model_id"], + version=parameters["version"], + platform=parameters["platform"], + ) if os.path.abspath(nef_model_path) != os.path.abspath(output_path): # Move to avoid keeping duplicate large binaries on disk. shutil.move(str(nef_model_path), output_path) diff --git a/services/workers/nef/main.py b/services/workers/nef/main.py index cfdd04e..5e05716 100644 --- a/services/workers/nef/main.py +++ b/services/workers/nef/main.py @@ -58,11 +58,15 @@ class HealthResponse(BaseModel): timestamp: str active_tasks: int -class NEFProcessRequest(BaseModel): - bie_file_id: str - model_id: int = Field(..., ge=1, le=65535) - version: str = Field(..., regex=r'^[0-9a-fA-F]{4}$') - platform: str = Field(..., regex=r'^(520|720|530|630|730)$') +class NEFProcessRequest(BaseModel): + bie_file_id: str + model_id: int = Field(..., ge=1, le=65535) + version: str = Field(..., regex=r'^[0-9a-fA-F]{4}$') + platform: str = Field(..., regex=r'^(520|720|530|630|730)$') + enable_sim_hw: bool = Field( + False, + description="Run hardware E2E simulation after compilation (not yet wired).", + ) class TaskStatusResponse(BaseModel): task_id: str diff --git a/services/workers/onnx/core.py b/services/workers/onnx/core.py index a56f597..19a2850 100644 --- a/services/workers/onnx/core.py +++ b/services/workers/onnx/core.py @@ -4,11 +4,11 @@ from typing import Dict, Any import onnx -def process_onnx_core( - input_paths: Dict[str, str], - output_path: str, - parameters: Dict[str, Any], -) -> Dict[str, Any]: +def process_onnx_core( + input_paths: Dict[str, str], + output_path: str, + parameters: Dict[str, Any], +) -> Dict[str, Any]: file_path = input_paths["file_path"] if not os.path.exists(file_path): raise FileNotFoundError(f"Input file not found: {file_path}") @@ -36,16 +36,20 @@ def process_onnx_core( model = ktc.onnx_optimizer.onnx2onnx_flow(model, eliminate_tail=True, opt_matmul=True) onnx.save(model, output_path) - km = ktc.ModelConfig( - int(parameters["model_id"]), - str(parameters["version"]), - str(parameters["platform"]), - onnx_model=model, - ) - evaluate_result = km.evaluate() - eval_result = evaluate_result.split(",")[0] - - return { + eval_result = "" + if parameters.get("enable_evaluate", False): + from services.backends.evaluator import get_evaluator_backend + + evaluator = get_evaluator_backend() + evaluate_result = evaluator.evaluate( + output_path, + model_id=int(parameters["model_id"]), + version=str(parameters["version"]), + platform=str(parameters["platform"]), + ) + eval_result = evaluate_result.split(",")[0] + + return { "file_path": output_path, "file_size": os.path.getsize(output_path), "eval_report": eval_result, diff --git a/services/workers/onnx/main.py b/services/workers/onnx/main.py index 32407fc..6faf254 100644 --- a/services/workers/onnx/main.py +++ b/services/workers/onnx/main.py @@ -67,11 +67,19 @@ class FileUploadResponse(BaseModel): file_path: str message: str -class ONNXProcessRequest(BaseModel): - file_id: str - model_id: int = Field(..., ge=1, le=65535) - version: str = Field(..., regex=r'^[0-9a-fA-F]{4}$') - platform: str = Field(..., regex=r'^(520|720|530|630|730)$') +class ONNXProcessRequest(BaseModel): + file_id: str + model_id: int = Field(..., ge=1, le=65535) + version: str = Field(..., regex=r'^[0-9a-fA-F]{4}$') + platform: str = Field(..., regex=r'^(520|720|530|630|730)$') + enable_evaluate: bool = Field( + False, + description="Run IP evaluator (toolchain) after ONNX optimization.", + ) + enable_sim_fp: bool = Field( + False, + description="Run floating-point E2E simulation (not yet wired).", + ) class TaskStatusResponse(BaseModel): task_id: str diff --git a/tests/workers/test_flow_e2e.py b/tests/workers/test_flow_e2e.py index 2370bdc..07b1c50 100644 --- a/tests/workers/test_flow_e2e.py +++ b/tests/workers/test_flow_e2e.py @@ -43,7 +43,13 @@ def test_worker_flow_e2e_uses_single_workdir(): work_input_file = work_inputs[0] onnx_output = work_dir / "out.onnx" - onnx_params = {"model_id": 10, "version": "e2e", "platform": "520", "work_dir": str(work_dir)} + onnx_params = { + "model_id": 10, + "version": "e2e", + "platform": "520", + "work_dir": str(work_dir), + "enable_evaluate": False, + } onnx_result = process_onnx_core( {"file_path": str(work_input_file)}, str(onnx_output), diff --git a/tests/workers/test_flow_e2e_tflite.py b/tests/workers/test_flow_e2e_tflite.py index 7edf4d2..e95eb54 100644 --- a/tests/workers/test_flow_e2e_tflite.py +++ b/tests/workers/test_flow_e2e_tflite.py @@ -43,7 +43,13 @@ def test_worker_flow_e2e_tflite_uses_single_workdir(): work_input_file = work_inputs[0] onnx_output = work_dir / "out.onnx" - onnx_params = {"model_id": 20, "version": "e2e-tflite", "platform": "520", "work_dir": str(work_dir)} + onnx_params = { + "model_id": 20, + "version": "e2e-tflite", + "platform": "520", + "work_dir": str(work_dir), + "enable_evaluate": False, + } onnx_result = process_onnx_core( {"file_path": str(work_input_file)}, str(onnx_output),