From ff5cab6b0ec141c3cec87aee56af98a3190ebe0d Mon Sep 17 00:00:00 2001 From: jim800121chen Date: Thu, 16 Apr 2026 00:45:50 +0800 Subject: [PATCH] =?UTF-8?q?feat(local-tool):=20hard=20timeout=20180s=20+?= =?UTF-8?q?=20=E5=85=A8=20stage=20=E7=B4=B0=E6=AD=A5=20detail=20emit=20+?= =?UTF-8?q?=20Stage1=20seed=20pause?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 使用者回報 Windows 仍看到「啟動時間超過 60 秒」紅 banner,且要求每個 階段做什麼都印出來給使用者知道。三件事一次到位: 1. startupHardTimeout 60s → 180s(直接放寬到 3 分鐘) 即使三段 pause 機制(Stage 1 seed / Stage 2 Python bootstrap / Stage 3 waitHealthy)都生效,Windows 乾淨環境段落間累積延遲仍可能超過 60 秒。 180s 給意料之外的延遲足夠 buffer,搭配 pause + 細步進度 emit 涵蓋 99% 情境。日常啟動只要幾秒,放寬不影響正常情境。 - 同步更新 i18n 紅 banner 文案 60 → 180 - 同步更新 unit tests(HardTimeout 用 -185s、SkipBypass 用 -200s、 PreventsHardTimeout 用 wall=-300/paused=-250 對應 effective=50s) 2. Stage 1 seedUserDataDir 包進 PauseHardTimeout Windows 乾淨環境首次跑會被 Defender real-time scan 對 8 個 nef 檔 逐個掃 5-30 秒。屬一次性 bootstrap,和 Stage 2/3 同理應豁免 hard timeout。第二次啟動 fileExists 早 early return,pause 影響 0ms。 配套:seed 期間每 5 秒 emit slow hint 帶 elapsed 秒數,避免使用者 看 spinner 不動以為當機。 3. 全 stage 細步 detail emit 原本只有 Stage 3 有 sub-step 文案(spawn / waitHealth / waitHealthSlow)。 現在 Stage 1/2/4/5/6 都有: Stage 1: migrate / lock / ipc / seed / seedSlow Stage 2: detect / bootstrap / venv / pip / driver Stage 3: spawn / waitHealth / waitHealthSlow Stage 4: probe Stage 5: open Stage 6: wait 每個 detail 對應一段使用者能讀懂的中英文文案(i18n.js zh-TW + en)。 前端 startup-panel 收到 startup:stage-detail event 後在對應 stage 列 下方顯示文案,比看著「進行中...」靜止文字直觀很多。 Stage 2 driver install 因為發生在 CompleteStage(2) 之後 current=3, emit 到 stage 3 而非 stage 2,避免被前端忽略(detail 只在 stage running 時顯示)。 更新 fix marker 為「9c9e005+ (180s hard timeout + all-stage sub-step detail + Stage1 seed pause)」讓使用者拉新版後能從 wails.log 確認版本。 驗證: - visiona-local 套件 go build / vet / test -race 全綠 - macOS dmg 163MB 重 build OK Co-Authored-By: Claude Opus 4.6 (1M context) --- local-tool/visiona-local/app.go | 52 +++++++++++++++++-- local-tool/visiona-local/frontend/i18n.js | 46 ++++++++++++++-- local-tool/visiona-local/server_control.go | 6 +++ local-tool/visiona-local/startup_pipeline.go | 9 +++- .../visiona-local/startup_pipeline_test.go | 25 ++++----- 5 files changed, 118 insertions(+), 20 deletions(-) diff --git a/local-tool/visiona-local/app.go b/local-tool/visiona-local/app.go index 917f921..1f386df 100644 --- a/local-tool/visiona-local/app.go +++ b/local-tool/visiona-local/app.go @@ -188,7 +188,7 @@ func (a *App) startup(ctx context.Context) { a.appLog("==================================================") a.appLog("visionA-local startup build=%s buildTime=%s", appVersionString(), appBuildTimeString()) a.appLog("platform=%s arch=%s dataDir=%s", runtime.GOOS, runtime.GOARCH, dataDir) - a.appLog("fix marker: c649a81+ (Stage3 waitHealthy pause / shutdown modal safety net)") + a.appLog("fix marker: 9c9e005+ (180s hard timeout + all-stage sub-step detail + Stage1 seed pause)") a.appLog("==================================================") // M8-4:載入 preferences.json(讀取失敗 → 用 DefaultPreferences 預設) @@ -204,6 +204,7 @@ func (a *App) startup(ctx context.Context) { a.startupPipeline.Start(pipelineCtx) // 1. 舊資料目錄遷移(必須在 lock 之前,因為 lock 檔會寫到新路徑) + a.startupPipeline.EmitStageDetail(1, "startup.stage.1.detail.migrate", 0) migrateOldDataDirs(dataDir) // 遷移後再次確認 dataDir 存在(遷移過程若發生異常狀況的保險) @@ -213,6 +214,7 @@ func (a *App) startup(ctx context.Context) { } // 2. single-instance lock + a.startupPipeline.EmitStageDetail(1, "startup.stage.1.detail.lock", 0) release, err := acquireSingleInstance(dataDir) if err != nil { // 區分錯誤類型:只有真的偵測到另一個 instance 才 exit(0) quietly @@ -233,6 +235,7 @@ func (a *App) startup(ctx context.Context) { // 3. 啟動 Wails 自己的 IPC server(L-3) // 供後來的 instance 透過 /ipc/raise 把現有視窗提到前景。 // 失敗不擋啟動,只是犧牲 single-instance raise 能力。 + a.startupPipeline.EmitStageDetail(1, "startup.stage.1.detail.ipc", 0) if err := a.startIPCServer(); err != nil { a.appLog("IPC server start failed: %v", err) } @@ -240,10 +243,38 @@ func (a *App) startup(ctx context.Context) { // 3.5. 首次啟動 seed:把 installer 內建的 models.json / nef 預置模型 / scripts // 複製到 user data-dir,讓 server 能在 --data-dir= 情境下讀到內建模型。 // 失敗不擋啟動,只是 server 啟動後模型庫會是空的。 + // + // Pause hard timeout:seedUserDataDir 在 Windows 乾淨環境首次跑會被 + // Defender real-time scan 對 8 個 nef 檔(每個 7-10 MB)逐個掃描,總時 + // 5-30 秒。屬於一次性 bootstrap,不該算進 pipeline 180 秒 budget。 + // 第二次啟動以後 fileExists(userModelsJSON) 為 true,seedUserDataDir 早 + // early return,不會 pause(不影響日常啟動)。 a.setBootstrapStatus("正在準備應用程式資料...") + a.startupPipeline.EmitStageDetail(1, "startup.stage.1.detail.seed", 0) + a.startupPipeline.PauseHardTimeout() + // 開背景 ticker,每 5 秒 emit slow hint 帶 elapsed 時間,避免使用者 + // 看到 spinner 不動以為 seed 卡住。Goroutine 在 seedUserDataDir return + // 後透過 close(seedDone) 退出。 + seedStart := time.Now() + seedDone := make(chan struct{}) + go func() { + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + for { + select { + case <-seedDone: + return + case <-ticker.C: + elapsed := int(time.Since(seedStart).Seconds()) + a.startupPipeline.EmitStageDetail(1, "startup.stage.1.detail.seedSlow", elapsed) + } + } + }() if err := a.seedUserDataDir(); err != nil { a.appLog("seed user data dir failed: %v", err) } + close(seedDone) + a.startupPipeline.ResumeHardTimeout() // M8-4b:階段 1(初始化 Wails 控制台)完成 → 自動進入階段 2 running a.appLog("startup: Stage 1 complete, entering Stage 2 (Python runtime)") @@ -275,6 +306,7 @@ func (a *App) runStartupStage5() { a.startupPipeline.SkipStage(5) return } + a.startupPipeline.EmitStageDetail(5, "startup.stage.5.detail.open", 0) // 取得 server URL url := "" if a.ctrl != nil { @@ -287,10 +319,12 @@ func (a *App) runStartupStage5() { if url != "" { // 不等瀏覽器真的開(只等命令 return),失敗記 log 不擋流程 if err := openBrowser(url); err != nil { - fmt.Fprintf(os.Stderr, "[visiona-local] startup stage 5: open browser failed: %v\n", err) + a.appLog("startup stage 5: open browser failed: %v", err) } } a.startupPipeline.CompleteStage(5) + // Stage 6 開始等 WebSocket,emit detail + a.startupPipeline.EmitStageDetail(6, "startup.stage.6.detail.wait", 0) } // shutdown 由 Wails 在 app 結束時呼叫。 @@ -808,6 +842,9 @@ func (p *ServerProcess) stop() { // // R5-5a 之後:python 失敗直接擋啟動(沒有模擬回退)。 func (a *App) ensurePythonRuntime(mode PythonMode) (string, PythonMode, error) { + if a.startupPipeline != nil { + a.startupPipeline.EmitStageDetail(2, "startup.stage.2.detail.detect", 0) + } switch mode { case PythonModeAuto: if bin, err := a.findSystemPython(); err == nil { @@ -890,7 +927,7 @@ func (a *App) ensureBundledPython() (string, error) { // 首次 bootstrap 路徑:解壓 tarball + 建 venv + pip install 9 個 wheel // (含 numpy / opencv / KneronPLUS 合計 ~150 MB),乾淨環境可能 2-5 分鐘。 - // 暫停 pipeline hard timeout,避免 60 秒 R5-E1 budget 把使用者擋在 Error state。 + // 暫停 pipeline hard timeout,避免 180 秒 budget 把使用者擋在 Error state。 // Soft timeout(每階段 20 秒提示)繼續照常。 if a.startupPipeline != nil { a.startupPipeline.PauseHardTimeout() @@ -903,6 +940,9 @@ func (a *App) ensureBundledPython() (string, error) { // 解壓 tarball(strip-components=1 剝掉 "python/" 前綴) a.setBootstrapStatus("正在解壓 Python runtime (~10 秒)...") + if a.startupPipeline != nil { + a.startupPipeline.EmitStageDetail(2, "startup.stage.2.detail.bootstrap", 0) + } extract := exec.Command("tar", "-xzf", pyTarball, "-C", pyHome, "--strip-components=1") configureSysProcAttr(extract) if out, err := extract.CombinedOutput(); err != nil { @@ -918,6 +958,9 @@ func (a *App) ensureBundledPython() (string, error) { } a.setBootstrapStatus("正在建立 Python 虛擬環境 (~5 秒)...") + if a.startupPipeline != nil { + a.startupPipeline.EmitStageDetail(2, "startup.stage.2.detail.venv", 0) + } venvCmd := exec.Command(embeddedPython, "-m", "venv", venvPath) configureSysProcAttr(venvCmd) if out, err := venvCmd.CombinedOutput(); err != nil { @@ -940,6 +983,9 @@ func (a *App) ensureBundledPython() (string, error) { } a.setBootstrapStatus(fmt.Sprintf("正在安裝 %d 個 Python 套件 (numpy / opencv / KneronPLUS ...) (~30-60 秒)...", len(wheels))) + if a.startupPipeline != nil { + a.startupPipeline.EmitStageDetail(2, "startup.stage.2.detail.pip", 0) + } args := []string{"-m", "pip", "install", "--no-index", "--find-links", wheelsDir, "--prefer-binary"} args = append(args, wheels...) pipCmd := exec.Command(pythonBin, args...) diff --git a/local-tool/visiona-local/frontend/i18n.js b/local-tool/visiona-local/frontend/i18n.js index a2576e8..80fa9ac 100644 --- a/local-tool/visiona-local/frontend/i18n.js +++ b/local-tool/visiona-local/frontend/i18n.js @@ -62,10 +62,29 @@ const dict = { 'startup.stage.6.label': '等待 Web UI 連線', 'startup.stage.6.description': '正在與瀏覽器建立即時連線', 'startup.stage.6.manualHint': '請點擊控制台的「在瀏覽器開啟」按鈕', - // Stage 3 細步提示(由 Go 的 startup:stage-detail event 觸發) + // 各 stage 細步提示(由 Go 的 startup:stage-detail event 觸發) + // Stage 1 - 初始化控制台 + 'startup.stage.1.detail.migrate': '檢查並遷移舊資料目錄...', + 'startup.stage.1.detail.lock': '建立 single-instance lock...', + 'startup.stage.1.detail.ipc': '啟動 Wails IPC server...', + 'startup.stage.1.detail.seed': '正在準備內建模型資料(首次啟動會花幾秒鐘)...', + 'startup.stage.1.detail.seedSlow': '正在準備內建模型資料(Windows Defender 掃描檔案中,已 {elapsed} 秒)', + // Stage 2 - 檢查 Python 執行環境 + 'startup.stage.2.detail.detect': '偵測系統 Python 執行環境...', + 'startup.stage.2.detail.bootstrap': '正在解壓內建 Python runtime(首次啟動需 1-2 分鐘)...', + 'startup.stage.2.detail.venv': '正在建立 Python 虛擬環境...', + 'startup.stage.2.detail.pip': '正在安裝 Python 套件 numpy / opencv / KneronPLUS(首次啟動需 1-3 分鐘)...', + 'startup.stage.2.detail.driver': '正在安裝 Kneron USB 驅動程式(請點選 UAC 允許)...', + // Stage 3 - 啟動本機伺服器 'startup.stage.3.detail.spawn': '正在啟動伺服器子程序...', 'startup.stage.3.detail.waitHealth': '正在等待伺服器健康檢查通過(已等 {elapsed} 秒)', 'startup.stage.3.detail.waitHealthSlow': '首次啟動較久屬正常,Windows Defender 掃描可能需 1-2 分鐘(已等 {elapsed} 秒)', + // Stage 4 - 偵測 Kneron 裝置 + 'startup.stage.4.detail.probe': '正在掃描 USB 裝置...', + // Stage 5 - 開啟瀏覽器 + 'startup.stage.5.detail.open': '正在開啟系統預設瀏覽器...', + // Stage 6 - 等待 Web UI 連線 + 'startup.stage.6.detail.wait': '正在等待瀏覽器建立 WebSocket 連線...', // 啟動完成後 collapsed 面板的標題與提示 'startup.collapsed.title': '啟動完成', 'startup.collapsed.hint': '· 點此展開檢視', @@ -77,7 +96,7 @@ const dict = { 'startup.status.skipped': '跳過(依偏好設定)', 'startup.timeout.message': '這個步驟花的時間比預期久,正在重試...', 'startup.error.title': '啟動失敗', - 'startup.error.description.timeout': '啟動時間超過 60 秒,可能是系統環境異常或網路中斷。', + 'startup.error.description.timeout': '啟動時間超過 180 秒,可能是系統環境異常或網路中斷。', 'startup.error.description.stageFailed': '階段「{stageLabel}」執行失敗。', 'startup.error.failedStage': '失敗階段:{n} · {label}', 'startup.error.retry': '重試', @@ -149,10 +168,29 @@ const dict = { 'startup.stage.6.label': 'Waiting for Web UI to connect', 'startup.stage.6.description': 'Establishing realtime connection with the browser', 'startup.stage.6.manualHint': 'Please click "Open in Browser" in the Control Panel', - // Stage 3 sub-step hints (triggered by Go startup:stage-detail event) + // All stage sub-step hints (triggered by Go startup:stage-detail event) + // Stage 1 + 'startup.stage.1.detail.migrate': 'Checking and migrating legacy data directories...', + 'startup.stage.1.detail.lock': 'Acquiring single-instance lock...', + 'startup.stage.1.detail.ipc': 'Starting Wails IPC server...', + 'startup.stage.1.detail.seed': 'Preparing built-in model data (takes a few seconds on first launch)...', + 'startup.stage.1.detail.seedSlow': 'Preparing built-in model data (Defender scanning files, {elapsed}s elapsed)', + // Stage 2 + 'startup.stage.2.detail.detect': 'Detecting system Python runtime...', + 'startup.stage.2.detail.bootstrap': 'Extracting bundled Python runtime (takes 1-2 min on first launch)...', + 'startup.stage.2.detail.venv': 'Creating Python virtual environment...', + 'startup.stage.2.detail.pip': 'Installing Python packages numpy / opencv / KneronPLUS (takes 1-3 min on first launch)...', + 'startup.stage.2.detail.driver': 'Installing Kneron USB driver (please allow UAC)...', + // Stage 3 'startup.stage.3.detail.spawn': 'Launching server subprocess...', 'startup.stage.3.detail.waitHealth': 'Waiting for server health check ({elapsed}s elapsed)', 'startup.stage.3.detail.waitHealthSlow': 'First launch is slow — Windows Defender scan may take 1-2 minutes ({elapsed}s elapsed)', + // Stage 4 + 'startup.stage.4.detail.probe': 'Scanning USB devices...', + // Stage 5 + 'startup.stage.5.detail.open': 'Opening system default browser...', + // Stage 6 + 'startup.stage.6.detail.wait': 'Waiting for browser to establish WebSocket connection...', // Collapsed panel after startup ready 'startup.collapsed.title': 'Startup complete', 'startup.collapsed.hint': '· click to expand', @@ -164,7 +202,7 @@ const dict = { 'startup.status.skipped': 'Skipped (per preference)', 'startup.timeout.message': 'This step is taking longer than expected, retrying...', 'startup.error.title': 'Startup failed', - 'startup.error.description.timeout': 'Startup exceeded 60 seconds. Your environment may have issues or the network is interrupted.', + 'startup.error.description.timeout': 'Startup exceeded 180 seconds. Your environment may have issues or the network is interrupted.', 'startup.error.description.stageFailed': 'Stage "{stageLabel}" failed.', 'startup.error.failedStage': 'Failed stage: {n} · {label}', 'startup.error.retry': 'Retry', diff --git a/local-tool/visiona-local/server_control.go b/local-tool/visiona-local/server_control.go index 446d873..1138a7e 100644 --- a/local-tool/visiona-local/server_control.go +++ b/local-tool/visiona-local/server_control.go @@ -529,7 +529,12 @@ func (a *App) startServerV2(preferredPort int) (*ServerProcess, error) { } // 2. 首次啟動自動安裝 Kneron WinUSB driver(Windows only) + // 邏輯上發生在 Stage 2 與 Stage 3 之間(pipeline current 已切到 3), + // 所以 emit 到 stage 3 的 detail,避免 driver detail 被前端忽略。 if pyBin != "" { + if a.startupPipeline != nil && runtime.GOOS == "windows" { + a.startupPipeline.EmitStageDetail(3, "startup.stage.2.detail.driver", 0) + } if derr := a.ensureDriverInstalled(pyBin); derr != nil { a.appLog("driver auto-install failed (non-fatal): %v", derr) } @@ -718,6 +723,7 @@ func (a *App) probeDeviceListAndComplete(port int) { if a.startupPipeline == nil { return } + a.startupPipeline.EmitStageDetail(4, "startup.stage.4.detail.probe", 0) url := fmt.Sprintf("http://127.0.0.1:%d/api/devices", port) client := &http.Client{Timeout: 2 * time.Second} resp, err := client.Get(url) diff --git a/local-tool/visiona-local/startup_pipeline.go b/local-tool/visiona-local/startup_pipeline.go index 9e49aaa..bd72e93 100644 --- a/local-tool/visiona-local/startup_pipeline.go +++ b/local-tool/visiona-local/startup_pipeline.go @@ -39,7 +39,14 @@ import ( const ( startupTotalStages = 6 startupSoftTimeout = 20 * time.Second - startupHardTimeout = 60 * time.Second + // startupHardTimeout 從 R5-E1 原定 60 秒放寬到 180 秒。理由:即使有 + // Stage 1 (seedUserDataDir) / Stage 2 (Python bootstrap) / Stage 3 + // (waitHealthy) 三段 pause 機制豁免,Windows 乾淨環境首次啟動仍可能在 + // 段落間(Defender 掃多個檔/EDR cloud lookup/段落間小工作)累積延遲, + // 使用者體感「應該還在啟動但被當失敗」非常挫折。180 秒給意料之外的 + // 延遲足夠 buffer,搭配 pause 機制 + 細步進度 emit 涵蓋 99% 情境。 + // 日常啟動只要幾秒,放寬不影響正常情境(second launch 通常 < 5 秒)。 + startupHardTimeout = 180 * time.Second startupWatcherTick = 1 * time.Second ) diff --git a/local-tool/visiona-local/startup_pipeline_test.go b/local-tool/visiona-local/startup_pipeline_test.go index 2956c0d..48993d7 100644 --- a/local-tool/visiona-local/startup_pipeline_test.go +++ b/local-tool/visiona-local/startup_pipeline_test.go @@ -180,9 +180,9 @@ func TestStartupPipeline_Watcher_SoftTimeout(t *testing.T) { func TestStartupPipeline_Watcher_HardTimeout(t *testing.T) { a, _ := newPipelineTestApp(t) p := NewStartupPipeline(a) - // 模擬「總時已經 65 秒,當前在階段 3」 + // 模擬「總時已經 185 秒(超過 180 秒 hard timeout),當前在階段 3」 now := time.Now() - p.startedAt = now.Add(-65 * time.Second) + p.startedAt = now.Add(-185 * time.Second) p.current = 3 p.stages[3].status = "running" p.stages[3].startedAt = now.Add(-30 * time.Second) @@ -262,13 +262,14 @@ func TestStartupPipeline_PauseHardTimeout_PreventsHardTimeout(t *testing.T) { a, _ := newPipelineTestApp(t) p := NewStartupPipeline(a) - // 模擬 wall clock 已過 120 秒,但其中 90 秒是「首次 bootstrap」暫停 + // 模擬 wall clock 已過 300 秒,但其中 250 秒是「首次 bootstrap」暫停 + // effective = 50s < 180s hard timeout,pipeline 不該 fail now := time.Now() - p.startedAt = now.Add(-120 * time.Second) - p.pausedDuration = 90 * time.Second // effective = 30s < 60s hard + p.startedAt = now.Add(-300 * time.Second) + p.pausedDuration = 250 * time.Second p.current = 2 p.stages[2].status = "running" - p.stages[2].startedAt = now.Add(-120 * time.Second) + p.stages[2].startedAt = now.Add(-300 * time.Second) ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -283,7 +284,7 @@ func TestStartupPipeline_PauseHardTimeout_PreventsHardTimeout(t *testing.T) { p.mu.Unlock() if cur == -1 { - t.Fatal("pipeline failed due to hard timeout, but effective=30s should be under the 60s limit") + t.Fatal("pipeline failed due to hard timeout, but effective=50s should be under the 180s limit") } if status == "failed" { t.Fatalf("stage 2 failed, want still running (effective time under limit)") @@ -315,12 +316,12 @@ func TestStartupPipeline_Watcher_SkippedStageNoTimeout(t *testing.T) { a.prefs.AutoOpenBrowser = false p := NewStartupPipeline(a) - // 階段 6 + AutoOpenBrowser=false:總時 70s 也不該觸發 hard timeout + // 階段 6 + AutoOpenBrowser=false:總時 200s(已超 180s hard timeout)也不該觸發 now := time.Now() - p.startedAt = now.Add(-70 * time.Second) + p.startedAt = now.Add(-200 * time.Second) p.current = 6 p.stages[6].status = "running" - p.stages[6].startedAt = now.Add(-70 * time.Second) + p.stages[6].startedAt = now.Add(-200 * time.Second) ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -348,10 +349,10 @@ func TestStartupPipeline_Watcher_SkippedStatusBypassesTimeout(t *testing.T) { a, _ := newPipelineTestApp(t) p := NewStartupPipeline(a) - // 階段 5 已 skipped,總時 65s 不該觸發 hard timeout(skipped 跳過所有檢查) + // 階段 5 已 skipped,總時 200s 不該觸發 hard timeout(skipped 跳過所有檢查) // 注意:skip 之後實際上 current 會是 6,但這裡測試的是 skip 狀態本身的 bypass 行為 now := time.Now() - p.startedAt = now.Add(-65 * time.Second) + p.startedAt = now.Add(-200 * time.Second) p.current = 5 p.stages[5].status = "skipped" p.stages[5].startedAt = now.Add(-30 * time.Second)