From 1231bf0ed2f4570705ab99dfa8a032f6ccfc9817 Mon Sep 17 00:00:00 2001 From: jim800121chen Date: Mon, 4 May 2026 13:56:07 +0800 Subject: [PATCH] =?UTF-8?q?feat(visionA-backend):=20Phase=200.8=20conversi?= =?UTF-8?q?on=20package=20=E2=80=94=205=20endpoint=20+=208=20=E5=80=8B?= =?UTF-8?q?=E5=85=A7=E9=83=A8=E6=A8=A1=E7=B5=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 0.8 把 kneron_model_converter 的轉檔功能整合進 visionA Cloud。 visionA backend 當 streaming proxy(upload)+ delegated download token broker(download)+ ownership trust boundary,converter / FAA / MC 三方零修改。 新增 internal/conversion/ 套件(8 個檔,~10,000 行 prod+test,117+ test cases,race -count=3 全綠): - conversion.go:Service interface 5 method、Job/PromoteResult/InitJobInput types - errors.go:13+ sentinel errors + ErrorCode/HTTPStatus mapping,對齊 conversion.md §6 - mc_token_client.go:service-to-service token (client_credentials grant) + DCL cache (exp - 15s 重取,per-scope cache),IssueDelegatedDownload(MC delegated download token) 錯誤分 idp_misconfigured (4xx) / idp_unavailable (5xx) / download_token_failed / mc_token_unavailable - converter_client.go:對 converter scheduler 4 method(InitJob multipart streaming / GetJob / Promote / ListInProgressJobs),InitJob 不 retry 5xx(streaming body 無法 replay) - faa_client.go:對 FAA GET /files/{key} server-to-server pull,Phase A retry(GET 無 body 可 replay)對齊 §9.1 retry 矩陣,streaming io.ReadCloser 透傳避 OOM - ownership.go:in-memory job_id → user_id map + per-user mutex 防 thundering herd lazy rebuild (不同 user 平行 fetch,同 user 100 caller 收斂成 1 次),visionA 重啟靠 converter ListInProgressJobs(user) 重建 - flow.go:Service interface 整合層(5 method 串接 converter/FAA/MC/ownership) - InitJob 用 io.Pipe + multipart.Reader/Writer 重組 streaming proxy(黑名單 client user_id + 灌入 OIDC sub) - DownloadRedirectURL 自動觸發 promote(spec §1 Stage 3b),用 ensurePromoted helper - PromoteToModels 冪等(modelStore.FindBySourceJobID 為 source-of-truth) - OwnershipMismatch → ErrJobNotFound 不 forbidden(§7.2 防枚舉) - storage / modelStore 失敗包 ErrStorageUnavailable / ErrModelStoreUnavailable (視為 visionA 自身 500 而非 502 gateway,SRE alarm 才打對 team) 新增 internal/api/conversion.go(5 endpoint handler + main.go wire): - POST /api/conversion/init(multipart streaming proxy,不呼叫 c.MultipartForm()) - GET /api/conversion/active(lazy rebuild ownership) - GET /api/conversion/{job_id}(poll status) - POST /api/conversion/{job_id}/promote-to-models(FAA pull → models 三段式) - GET /api/conversion/{job_id}/download(server-side HTTP 302 → FAA,token 不過 frontend JS,仿 FAA TestSite DownloadFileDirect pattern;Cache-Control: no-store) 5 個 endpoint 全部走 OIDC AuthMiddleware;user_id 從 cookie session 灌(trust boundary), 從不接受 client multipart form / JSON / query 的 user_id。 TestAllAPIEndpointsRequire401WithoutCookie 自動覆蓋新 5 endpoint regression 防呆。 新增 cmd/api-server/conversion_e2e_test.go(4 個 e2e 場景): - TestConversionE2E_StreamingProxy(10MB body + trust boundary regression) - TestConversionE2E_LazyRebuildAfterRestart(visionA 重啟仍能 /active) - TestConversionE2E_Download302Redirect(驗 302 + Location header + token 不在 body) - TestConversionE2E_ActiveJobConflict(409 + active_job 詳情) 修改 internal/config/{config,load}.go:新增 ConversionConfig 5 欄位 (ConverterBaseURL / FAABaseURL / TenantID / ServiceClientID / ServiceClientSecret)+ Enabled() helper(雙非空判定)。 修改 cmd/api-server/main.go:條件 wire(cfg.Conversion.Enabled() 為 true 才建 client + Service; 否則 Deps.Conversion=nil,handler 自動回 501)。 修改 .env.example:新增 Phase 0.8 區塊註解。 新增 cmd/api-server/conversion_adapters.go:narrow interface adapter(接既有 internal/model.Repository / internal/storage.Store → conversion.ModelStore / Storage,避免 import cycle)。 驗證:go test -race -count=3 ./... 17 packages 全綠 / go vet 0 warning / go build 成功。 對齊文件: - .autoflow/04-architecture/adr/adr-014-conversion-integration.md - .autoflow/04-architecture/conversion.md (TDD) - .autoflow/04-architecture/api/api-conversion.md - .autoflow/02-prd/features/feature-converter-integration.md - .autoflow/03-design/wireframes/wireframe-conversion.md - .autoflow/03-design/flows/flow-conversion.md Co-Authored-By: Claude Opus 4.7 (1M context) --- visionA-backend/.env.example | 32 + .../cmd/api-server/conversion_adapters.go | 150 ++ .../cmd/api-server/conversion_e2e_test.go | 1143 ++++++++++++++++ visionA-backend/cmd/api-server/main.go | 65 + visionA-backend/internal/api/api.go | 12 + visionA-backend/internal/api/conversion.go | 469 +++++++ .../internal/api/conversion_test.go | 638 +++++++++ visionA-backend/internal/config/config.go | 46 + visionA-backend/internal/config/load.go | 8 + visionA-backend/internal/config/load_test.go | 63 + .../internal/conversion/conversion.go | 164 +++ .../internal/conversion/conversion_test.go | 151 ++ .../internal/conversion/converter_client.go | 892 ++++++++++++ .../conversion/converter_client_test.go | 895 ++++++++++++ visionA-backend/internal/conversion/errors.go | 274 ++++ .../internal/conversion/errors_test.go | 161 +++ .../internal/conversion/faa_client.go | 467 +++++++ .../internal/conversion/faa_client_test.go | 622 +++++++++ visionA-backend/internal/conversion/flow.go | 940 +++++++++++++ .../internal/conversion/flow_test.go | 1214 +++++++++++++++++ .../internal/conversion/mc_token_client.go | 624 +++++++++ .../conversion/mc_token_client_test.go | 864 ++++++++++++ .../internal/conversion/ownership.go | 314 +++++ .../internal/conversion/ownership_test.go | 631 +++++++++ 24 files changed, 10839 insertions(+) create mode 100644 visionA-backend/cmd/api-server/conversion_adapters.go create mode 100644 visionA-backend/cmd/api-server/conversion_e2e_test.go create mode 100644 visionA-backend/internal/api/conversion.go create mode 100644 visionA-backend/internal/api/conversion_test.go create mode 100644 visionA-backend/internal/conversion/conversion.go create mode 100644 visionA-backend/internal/conversion/conversion_test.go create mode 100644 visionA-backend/internal/conversion/converter_client.go create mode 100644 visionA-backend/internal/conversion/converter_client_test.go create mode 100644 visionA-backend/internal/conversion/errors.go create mode 100644 visionA-backend/internal/conversion/errors_test.go create mode 100644 visionA-backend/internal/conversion/faa_client.go create mode 100644 visionA-backend/internal/conversion/faa_client_test.go create mode 100644 visionA-backend/internal/conversion/flow.go create mode 100644 visionA-backend/internal/conversion/flow_test.go create mode 100644 visionA-backend/internal/conversion/mc_token_client.go create mode 100644 visionA-backend/internal/conversion/mc_token_client_test.go create mode 100644 visionA-backend/internal/conversion/ownership.go create mode 100644 visionA-backend/internal/conversion/ownership_test.go diff --git a/visionA-backend/.env.example b/visionA-backend/.env.example index 90f828d..8ff7420 100644 --- a/visionA-backend/.env.example +++ b/visionA-backend/.env.example @@ -155,3 +155,35 @@ VISIONA_MODEL_MAX_SIZE_MB=100 # 建議用:vAc_$(openssl rand -hex 16) # 留空代表雛形 InMemoryPairingStore 會動態配發(前端呼叫 POST /api/pairing/token) VISIONA_PAIRING_TOKEN= + + +# ============================================================ +# Phase 0.8 — 轉檔功能整合(converter / FAA / Member Center service token) +# ============================================================ +# 對齊 .autoflow/04-architecture/conversion.md §5.3 +# +# 啟用判定:當 VISIONA_CONVERTER_BASE_URL 與 VISIONA_FAA_BASE_URL 都非空時, +# main.go 才會 wire conversion.Service;其中之一留空 → 5 個 /api/conversion/* endpoint 回 501。 +# +# 啟用時 VISIONA_OIDC_SERVICE_CLIENT_ID/SECRET 必須非空(轉檔依賴 service token 機制)。 + +# kneron_model_converter task-scheduler base URL +# dev/stage:http://192.168.0.130:9501 +# prod:https://converter.visiona.cloud +VISIONA_CONVERTER_BASE_URL= + +# File Access Agent base URL +# dev/stage:http://192.168.0.130:5081 +# prod:https://faa.innovedus.com +VISIONA_FAA_BASE_URL= + +# visionA 在 Member Center 的 tenant id(單一 tenant) +# 跟 MC 換 delegated download token 時當 tenant_id 欄位用 +VISIONA_OIDC_TENANT_ID= + +# Delegated download token TTL(秒)— FAA 直連下載用 +# 預設 300(5 分鐘),可調整範圍 60-900 +VISIONA_FAA_DELEGATED_TTL_SECONDS=300 + +# 上傳模型檔大小上限(MB)— 與 converter 端 limit 對齊 +VISIONA_CONVERTER_MAX_MODEL_SIZE_MB=500 diff --git a/visionA-backend/cmd/api-server/conversion_adapters.go b/visionA-backend/cmd/api-server/conversion_adapters.go new file mode 100644 index 0000000..04a245c --- /dev/null +++ b/visionA-backend/cmd/api-server/conversion_adapters.go @@ -0,0 +1,150 @@ +// conversion_adapters.go — main.go 的 narrow adapter 實作。 +// +// internal/conversion 為了避免 import cycle 與保持 interface 純粹(FAANG 慣例: +// consumer 定義介面),定義了 ModelStore / Storage 兩個 narrow interface。 +// main.go 在 wire 時把 *model.InMemoryRepository / *storage.LocalFSStore 包成 adapter +// 注入;conversion 完全不知道具體實作。 +// +// 對齊 .autoflow/04-architecture/conversion.md §2.7(NewService 註解)。 +// +// Phase 0.8 conversion (見 .autoflow/04-architecture/conversion.md §2.7) +package main + +import ( + "context" + "fmt" + "io" + "time" + + "github.com/google/uuid" + + "visiona-backend/internal/conversion" + "visiona-backend/internal/model" + "visiona-backend/internal/storage" +) + +// ========================================================================== +// ModelStore adapter +// ========================================================================== + +// conversionModelStoreAdapter 把 model.Repository 包成 conversion.ModelStore。 +// +// 額外責任: +// - ModelRecord ↔ model.Model 雙向轉換 +// - FindBySourceJobID 用 List(filter) + 自行過濾 SourceJobID(既有 model.ListFilter +// 沒有 SourceJobID 欄位;Phase 1 加 DB 後可改 indexed query) +// - GenerateID 用 uuid.NewString(與 internal/api/models.go modelsInitUploadHandler 一致) +type conversionModelStoreAdapter struct { + repo model.Repository +} + +// newConversionModelStoreAdapter 建立 adapter。 +func newConversionModelStoreAdapter(repo model.Repository) conversion.ModelStore { + return &conversionModelStoreAdapter{repo: repo} +} + +// Save 把 conversion.ModelRecord 轉成 model.Model 後 upsert。 +// +// 設計選擇:UploadedAt 設為 now(promote 完成 = 等同 finalize 後的 ready 狀態), +// 這樣 GET /api/models/{id} 回的 status 會是 "ready"(對齊 toModelResponse 邏輯)。 +func (a *conversionModelStoreAdapter) Save(ctx context.Context, rec *conversion.ModelRecord) error { + if rec == nil { + return fmt.Errorf("conversion adapter: Save requires non-nil record") + } + now := time.Now().UTC() + uploadedAt := now + if !rec.UpdatedAt.IsZero() { + uploadedAt = rec.UpdatedAt + } + m := &model.Model{ + ID: rec.ID, + OwnerUserID: rec.OwnerUserID, + Name: rec.Name, + Description: rec.Description, + StorageKey: rec.StorageKey, + FileSize: rec.FileSize, + FileChecksum: rec.FileChecksum, + TargetChip: rec.TargetChip, + Source: rec.Source, // 應為 "converted" + SourceJobID: rec.SourceJobID, + CreatedAt: rec.CreatedAt, + UpdatedAt: rec.UpdatedAt, + UploadedAt: &uploadedAt, // promote 完即 ready(對齊 toModelResponse) + } + return a.repo.Save(ctx, m) +} + +// FindBySourceJobID 找 user 是否已對某 job 建過 model record(冪等檢查用)。 +// +// Phase 0.8 雛形實作:用 List(filter) + 過濾 SourceJobID(in-memory 慢但對小量 user 足夠)。 +// Phase 1 用 SQL `WHERE owner_user_id = ? AND source_job_id = ?` 加索引。 +// +// 找不到回 (nil, nil);找到第一個 match 回 (*ModelRecord, nil)。 +func (a *conversionModelStoreAdapter) FindBySourceJobID(ctx context.Context, ownerUserID, sourceJobID string) (*conversion.ModelRecord, error) { + if ownerUserID == "" || sourceJobID == "" { + return nil, nil + } + models, err := a.repo.List(ctx, model.ListFilter{ + OwnerUserID: ownerUserID, + Source: model.SourceConverted, + }) + if err != nil { + return nil, fmt.Errorf("conversion adapter: list models for FindBySourceJobID: %w", err) + } + for _, m := range models { + if m.SourceJobID == sourceJobID { + return modelToRecord(m), nil + } + } + return nil, nil +} + +// GenerateID 產一個新 model_id(沿用既有 visionA model 命名 — uuid.NewString)。 +func (a *conversionModelStoreAdapter) GenerateID() string { + return uuid.NewString() +} + +// modelToRecord 把 *model.Model 轉成 *conversion.ModelRecord(給 PromoteToModels 冪等回傳用)。 +func modelToRecord(m *model.Model) *conversion.ModelRecord { + if m == nil { + return nil + } + return &conversion.ModelRecord{ + ID: m.ID, + OwnerUserID: m.OwnerUserID, + Name: m.Name, + Description: m.Description, + StorageKey: m.StorageKey, + FileSize: m.FileSize, + FileChecksum: m.FileChecksum, + TargetChip: m.TargetChip, + Source: m.Source, + SourceJobID: m.SourceJobID, + CreatedAt: m.CreatedAt, + UpdatedAt: m.UpdatedAt, + } +} + +// ========================================================================== +// Storage adapter +// ========================================================================== + +// conversionStorageAdapter 把 storage.Store 包成 conversion.Storage。 +// +// 目前只需要 Put(streaming 寫入),meta 透傳到底層 storage(LocalFS 雛形可能忽略, +// S3 接上後會寫進 object metadata)。 +type conversionStorageAdapter struct { + store storage.Store +} + +// newConversionStorageAdapter 建立 adapter。 +func newConversionStorageAdapter(store storage.Store) conversion.Storage { + return &conversionStorageAdapter{store: store} +} + +// Put streaming 寫入 storage(meta 透傳給底層 storage 實作)。 +// +// LocalFS 雛形可能忽略 meta;S3 / R2 等 backend 會寫進 object metadata(給 debug / Tagging)。 +func (a *conversionStorageAdapter) Put(ctx context.Context, key string, r io.Reader, size int64, meta map[string]string) error { + return a.store.Put(ctx, key, r, size, meta) +} diff --git a/visionA-backend/cmd/api-server/conversion_e2e_test.go b/visionA-backend/cmd/api-server/conversion_e2e_test.go new file mode 100644 index 0000000..507eb37 --- /dev/null +++ b/visionA-backend/cmd/api-server/conversion_e2e_test.go @@ -0,0 +1,1143 @@ +// conversion_e2e_test.go — Phase 0.8 conversion 整合 e2e 測試。 +// +// 涵蓋 4 個必含場景(對齊 .autoflow/05-implementation/phase-0.8-T8.md 範圍): +// +// 1. Streaming proxy 完整跑通 +// —— 驗 InitJob 真的 streaming(不 buffer 整個 multipart body); +// 用 io.Pipe 對 visionA 送 ~10MB body(大小可控),驗 mock converter 收到 byte-perfect copy。 +// +// 2. 重啟恢復 lazy rebuild +// —— 模擬 visionA backend 剛啟動 ownership 全空 + converter 端 user X 有 in_progress job; +// 驗 user 對 GET /active 觸發 lazy rebuild,且後續 GET /active 走 cache 不再打 ListInProgressJobs。 +// +// 3. Download 302 redirect +// —— 驗 server-side 302 + Cache-Control: no-store + Location 帶 token; +// 驗 response body 不含 token;驗 redirect URL 指向 mock FAA。 +// +// 4. Active job 409 衝突 +// —— 同 user 第一個 init 成功 → 第二個 init 撞 409 + body 帶 active_job 詳情。 +// +// 為什麼自建 fixture 而非擴充 setupFixture: +// +// 既有 setupFixture(integration_test.go)是 B4/B5 的雛形(不含 conversion service); +// T7 main.go 在 wire 時才 build conversion service。本檔保持 T1-T7 既有 code 不動, +// 自己組一個 conversion 專用 fixture:fakeOIDC + apiServer + 3 個 mock servers +// (converter / MC service token + delegated / FAA),完整模擬端到端。 +// +// Phase 0.8 conversion e2e (見 .autoflow/04-architecture/conversion.md) +package main + +import ( + "bytes" + "context" + "crypto/rand" + "encoding/json" + "errors" + "fmt" + "io" + "log/slog" + "mime" + "mime/multipart" + "net/http" + "net/http/cookiejar" + "net/http/httptest" + "net/url" + "os" + "strings" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/gin-gonic/gin" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "visiona-backend/internal/api" + "visiona-backend/internal/auth" + "visiona-backend/internal/conversion" + "visiona-backend/internal/converter" + "visiona-backend/internal/device" + "visiona-backend/internal/model" + "visiona-backend/internal/oidc" + "visiona-backend/internal/oidctest" + "visiona-backend/internal/session" + "visiona-backend/internal/storage" + "visiona-backend/internal/usersession" +) + +// ========================================================================== +// Mock servers +// ========================================================================== + +// mockConverter 模擬 kneron_model_converter 的 task-scheduler。 +// +// 對應 4 個 endpoint(converter_client.go 註解列表): +// - POST /api/v1/jobs — InitJob(multipart streaming) +// - GET /api/v1/jobs/{id} — GetJob +// - POST /api/v1/jobs/{id}/promote — Promote +// - GET /api/v1/jobs?user_id=&status=in_progress — ListInProgressJobs +// +// 透過 atomic.Int32 counter 計數每個 endpoint 被打幾次(給場景 #2 lazy rebuild 驗證用)。 +type mockConverter struct { + srv *httptest.Server + + mu sync.Mutex + // jobs:job_id → 當前狀態(給 GetJob / list 用) + jobs map[string]*conversion.ConverterJob + // userActive:user_id → []job_id(給 list endpoint 用) + userActive map[string][]string + + // observed:紀錄關鍵事件以便驗證 + initCallCount atomic.Int32 + getJobCallCount atomic.Int32 + promoteCallCount atomic.Int32 + listJobsCallCount atomic.Int32 + + // initBodyBytes:場景 #1 驗 streaming forward 收到的真實 body(mock 端 ReadAll 後保留) + initBodyMu sync.Mutex + initBody []byte + initBodyCT string + initBodyLen int64 + + // nextInitBehavior:給場景 #4 用 — 若設為 conflictUserID,第二次 init 對該 user + // 直接回 409 user_has_active_job + nextInitConflict atomic.Int32 // 0=正常;>0=回 409 / 後續 decrement +} + +// initBodyMust 把 mock 收到的 init body 取出(test caller 用)。 +func (m *mockConverter) initBodySnapshot() ([]byte, string, int64) { + m.initBodyMu.Lock() + defer m.initBodyMu.Unlock() + return append([]byte(nil), m.initBody...), m.initBodyCT, m.initBodyLen +} + +// addInProgressJob 預先在 mock 端註冊一個 user 的 in_progress job(給場景 #2)。 +func (m *mockConverter) addInProgressJob(userID, jobID string, createdAt time.Time) { + m.mu.Lock() + defer m.mu.Unlock() + job := &conversion.ConverterJob{ + JobID: jobID, + Status: "running", + Stage: "bie", + SourceFilename: "yolov5s.onnx", + Platform: "720", + CreatedAt: createdAt, + UpdatedAt: createdAt, + } + progress := 45 + job.Progress = &progress + m.jobs[jobID] = job + m.userActive[userID] = append(m.userActive[userID], jobID) +} + +// newMockConverter 建一個 mock converter server。 +func newMockConverter(t *testing.T) *mockConverter { + t.Helper() + mc := &mockConverter{ + jobs: make(map[string]*conversion.ConverterJob), + userActive: make(map[string][]string), + } + mux := http.NewServeMux() + + // 解析 /api/v1/jobs 與 /api/v1/jobs/{id} / /promote — 依方法分流 + mux.HandleFunc("/api/v1/jobs", mc.handleJobsRoot) + mux.HandleFunc("/api/v1/jobs/", mc.handleJobsByID) + + mc.srv = httptest.NewServer(mux) + t.Cleanup(mc.srv.Close) + return mc +} + +// handleJobsRoot 處理 POST /api/v1/jobs(InitJob)與 GET /api/v1/jobs?...(List)。 +func (m *mockConverter) handleJobsRoot(w http.ResponseWriter, r *http.Request) { + switch r.Method { + case http.MethodPost: + m.handleInitJob(w, r) + case http.MethodGet: + m.handleListJobs(w, r) + default: + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + } +} + +// handleInitJob 模擬 POST /api/v1/jobs。 +// +// 行為: +// - 若 nextInitConflict > 0 → 回 409 user_has_active_job + body 帶 active_job 詳情(decrement) +// - 否則:streaming-read multipart body 全部(驗 visionA 真有 forward)→ 解出 user_id / model_id 等 +// → 建一個 running job(status=created → 對齊 converter API)→ 回 201 +func (m *mockConverter) handleInitJob(w http.ResponseWriter, r *http.Request) { + m.initCallCount.Add(1) + + // 場景 #4:第二次 init 撞 409 + if m.nextInitConflict.Load() > 0 { + m.nextInitConflict.Add(-1) + // 找該 user 第一個 active job 帶回 details + // (converter API 真實格式:見 conversion.go ActiveJobError 的 extractActiveJobFromDetails) + var firstJobID string + m.mu.Lock() + for _, ids := range m.userActive { + if len(ids) > 0 { + firstJobID = ids[0] + break + } + } + m.mu.Unlock() + writeJSON(w, http.StatusConflict, map[string]any{ + "error": map[string]any{ + "code": "user_has_active_job", + "message": "user already has active job", + "details": map[string]any{ + "active_job": map[string]any{ + "job_id": firstJobID, + "status": "running", + "stage": "bie", + }, + }, + }, + }) + return + } + + // streaming-read 真實 body(驗 visionA 沒在記憶體 buffer) + contentType := r.Header.Get("Content-Type") + bodyBytes, err := io.ReadAll(r.Body) + if err != nil { + http.Error(w, "read body error: "+err.Error(), http.StatusBadRequest) + return + } + m.initBodyMu.Lock() + m.initBody = bodyBytes + m.initBodyCT = contentType + m.initBodyLen = int64(len(bodyBytes)) + m.initBodyMu.Unlock() + + // 從 multipart 取 user_id / model_id(驗 visionA 灌的 user_id 真有送到) + userID, ok := parseMultipartField(contentType, bodyBytes, "user_id") + if !ok { + http.Error(w, "user_id missing in multipart", http.StatusBadRequest) + return + } + + jobID := fmt.Sprintf("job-%s-%d", userID, time.Now().UnixNano()) + now := time.Now().UTC() + job := &conversion.ConverterJob{ + JobID: jobID, + Status: "running", + Stage: "onnx", + SourceFilename: "yolov5s.onnx", + Platform: "720", + CreatedAt: now, + UpdatedAt: now, + } + zero := 0 + job.Progress = &zero + + m.mu.Lock() + m.jobs[jobID] = job + m.userActive[userID] = append(m.userActive[userID], jobID) + m.mu.Unlock() + + writeJSON(w, http.StatusCreated, map[string]any{ + "job_id": jobID, + "status": "running", + "stage": "onnx", + "progress": 0, + "stage_progress": 0, + "source_filename": "yolov5s.onnx", + "parameters": map[string]any{ + "platform": "720", + }, + "created_at": now.Format(time.RFC3339), + "updated_at": now.Format(time.RFC3339), + }) +} + +// handleListJobs 模擬 GET /api/v1/jobs?user_id=...&status=in_progress。 +func (m *mockConverter) handleListJobs(w http.ResponseWriter, r *http.Request) { + m.listJobsCallCount.Add(1) + + q := r.URL.Query() + userID := q.Get("user_id") + status := q.Get("status") + + m.mu.Lock() + defer m.mu.Unlock() + + jobs := make([]map[string]any, 0) + if status == "in_progress" { + for _, jobID := range m.userActive[userID] { + j := m.jobs[jobID] + if j == nil { + continue + } + if j.Status != "running" && j.Status != "created" { + continue + } + jobs = append(jobs, converterJobToMap(j)) + } + } + + writeJSON(w, http.StatusOK, map[string]any{ + "jobs": jobs, + "total": len(jobs), + "page": 1, + "page_size": len(jobs), + "has_more": false, + }) +} + +// handleJobsByID 處理 /api/v1/jobs/{id} 與 /api/v1/jobs/{id}/promote。 +func (m *mockConverter) handleJobsByID(w http.ResponseWriter, r *http.Request) { + // 路徑:/api/v1/jobs/{id} 或 /api/v1/jobs/{id}/promote + rest := strings.TrimPrefix(r.URL.Path, "/api/v1/jobs/") + if rest == "" { + http.NotFound(w, r) + return + } + parts := strings.SplitN(rest, "/", 2) + jobID := parts[0] + + if len(parts) == 1 { + // /api/v1/jobs/{id} + switch r.Method { + case http.MethodGet: + m.handleGetJob(w, r, jobID) + default: + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + } + return + } + + // 帶 sub-path + switch parts[1] { + case "promote": + if r.Method != http.MethodPost { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + m.handlePromote(w, r, jobID) + default: + http.NotFound(w, r) + } +} + +// handleGetJob 模擬 GET /api/v1/jobs/{id}。 +func (m *mockConverter) handleGetJob(w http.ResponseWriter, _ *http.Request, jobID string) { + m.getJobCallCount.Add(1) + + m.mu.Lock() + j := m.jobs[jobID] + m.mu.Unlock() + if j == nil { + writeJSON(w, http.StatusNotFound, map[string]any{ + "error": map[string]any{"code": "job_not_found", "message": "job not found"}, + }) + return + } + writeJSON(w, http.StatusOK, converterJobToMap(j)) +} + +// handlePromote 模擬 POST /api/v1/jobs/{id}/promote。 +func (m *mockConverter) handlePromote(w http.ResponseWriter, r *http.Request, jobID string) { + m.promoteCallCount.Add(1) + + var req struct { + Targets []struct { + Source string `json:"source"` + TargetObjectKey string `json:"target_object_key"` + } `json:"targets"` + } + _ = json.NewDecoder(r.Body).Decode(&req) + + m.mu.Lock() + j := m.jobs[jobID] + m.mu.Unlock() + if j == nil { + writeJSON(w, http.StatusNotFound, map[string]any{ + "error": map[string]any{"code": "job_not_found"}, + }) + return + } + target := "models/promoted/" + jobID + ".nef" + if len(req.Targets) > 0 && req.Targets[0].TargetObjectKey != "" { + target = req.Targets[0].TargetObjectKey + } + writeJSON(w, http.StatusOK, map[string]any{ + "job_id": jobID, + "promoted": []map[string]any{{ + "source": "nef", + "target_object_key": target, + "size": int64(1024), + "file_access_agent_etag": "etag-mock", + }}, + }) +} + +// markJobCompleted 把 mock 端 jobID 推進到 completed 狀態(給場景 #3 download 用)。 +func (m *mockConverter) markJobCompleted(jobID string) { + m.mu.Lock() + defer m.mu.Unlock() + if j := m.jobs[jobID]; j != nil { + j.Status = "completed" + j.Stage = "" + hundred := 100 + j.Progress = &hundred + j.UpdatedAt = time.Now().UTC() + } +} + +// converterJobToMap 把 mock 內部結構序列化成 converter API response shape。 +func converterJobToMap(j *conversion.ConverterJob) map[string]any { + progress := 0 + if j.Progress != nil { + progress = *j.Progress + } + stageProgress := 0 + if j.StageProgress != nil { + stageProgress = *j.StageProgress + } + out := map[string]any{ + "job_id": j.JobID, + "status": j.Status, + "stage": j.Stage, + "progress": progress, + "stage_progress": stageProgress, + "created_at": j.CreatedAt.Format(time.RFC3339), + "updated_at": j.UpdatedAt.Format(time.RFC3339), + "input": map[string]any{ + "filename": j.SourceFilename, + }, + "parameters": map[string]any{ + "platform": j.Platform, + }, + } + if j.Stage == "" { + out["stage"] = nil + } + return out +} + +// ========================================================================== +// mockMC — 服務 service token (/oauth/token) + delegated download (/file-access/download-tokens) +// ========================================================================== + +type mockMC struct { + srv *httptest.Server + + serviceTokenCount atomic.Int32 + delegatedTokenCount atomic.Int32 + + // 紀錄上一次發出的 delegated token(給場景 #3 驗 location 帶到) + lastDelegatedToken string + mu sync.Mutex +} + +func newMockMC(t *testing.T) *mockMC { + t.Helper() + mc := &mockMC{} + mux := http.NewServeMux() + mux.HandleFunc("/oauth/token", mc.handleServiceToken) + mux.HandleFunc("/file-access/download-tokens", mc.handleDelegated) + mc.srv = httptest.NewServer(mux) + t.Cleanup(mc.srv.Close) + return mc +} + +// handleServiceToken:client_credentials grant 永遠回 200 + access_token + expires_in。 +func (m *mockMC) handleServiceToken(w http.ResponseWriter, r *http.Request) { + m.serviceTokenCount.Add(1) + if r.Method != http.MethodPost { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + writeJSON(w, http.StatusOK, map[string]any{ + "access_token": "mock-service-token-" + randHex(8), + "token_type": "Bearer", + "expires_in": 3600, + "scope": r.FormValue("scope"), + }) +} + +// handleDelegated:簽 opaque token + 預設 5 分鐘過期。 +func (m *mockMC) handleDelegated(w http.ResponseWriter, r *http.Request) { + m.delegatedTokenCount.Add(1) + tok := "delegated-" + randHex(16) + m.mu.Lock() + m.lastDelegatedToken = tok + m.mu.Unlock() + writeJSON(w, http.StatusOK, map[string]any{ + "token": tok, + "expires_at": time.Now().Add(5 * time.Minute).UTC().Format(time.RFC3339), + }) +} + +// ========================================================================== +// mockFAA — 純 placeholder,本檔測 download 用 CheckRedirect 卡住 302、不 follow 到 FAA。 +// ========================================================================== + +type mockFAA struct { + srv *httptest.Server +} + +func newMockFAA(t *testing.T) *mockFAA { + t.Helper() + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + // e2e 不會真的 follow 到這(test client 設 ErrUseLastResponse), + // 留 200 OK 當保險(避免假設外部 mock 必返錯誤)。 + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("mock-nef-bytes")) + })) + t.Cleanup(srv.Close) + return &mockFAA{srv: srv} +} + +// ========================================================================== +// conversionFixture — 把所有 server 拼起來並提供 OIDC 登入 helper +// ========================================================================== + +type conversionFixture struct { + server *httptest.Server // visionA backend + fakeOIDC *oidctest.Server // 給 user 走 OIDC cookie session 登入用 + conv *mockConverter + mc *mockMC + faa *mockFAA + + // 重啟模擬:場景 #2 需要在 instance A 不註冊 ownership 直接 instance B 起, + // 所以保留 lazy 把 conversion service rebuild 進新 router 的 hook。 + router *gin.Engine +} + +func (f *conversionFixture) Close() { + if f.server != nil { + f.server.Close() + } + // fakeOIDC / mocks 由 t.Cleanup 自動關 +} + +// setupConversionFixture 建立完整的 e2e 環境: +// - mock converter / MC service token + delegated / FAA +// - fake OIDC(給 user 走 cookie session 登入) +// - visionA-backend router(含 conversion service wired,仿 T7 main.go wire 邏輯) +// +// **不影響 T1-T7 既有 code**:本 fixture 完全獨立,不重用 setupFixture(後者沒 wire conversion)。 +func setupConversionFixture(t *testing.T) *conversionFixture { + t.Helper() + + gin.SetMode(gin.TestMode) + + logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelWarn})) + + conv := newMockConverter(t) + mc := newMockMC(t) + faa := newMockFAA(t) + fakeOIDC := oidctest.NewServer(t, + oidctest.WithClientCredentials(fixtureOIDCClientID, fixtureOIDCClientSecret), + ) + + // 用 lazyHandler(既有 helper),因為 storage baseURL 需要 apiServer.URL, + // 而 storage 又是 router 的依賴 — 必須先 Start server 拿 URL。 + lazy := &lazyHandler{} + apiTS := httptest.NewServer(lazy) + t.Cleanup(apiTS.Close) + + storeDir := t.TempDir() + storeStore, err := storage.NewLocalFSStore(storeDir, apiTS.URL+"/storage", "test-secret") + require.NoError(t, err) + + // OIDC provider(指向 fakeOIDC;user cookie session 登入用) + callbackURL := apiTS.URL + "/api/auth/callback" + oidcCtx, oidcCancel := context.WithTimeout(context.Background(), 5*time.Second) + oidcProvider, err := oidc.NewProvider(oidcCtx, oidc.ProviderConfig{ + IssuerURL: fakeOIDC.URL, + ClientID: fakeOIDC.ClientID, + ClientSecret: fakeOIDC.ClientSecret, + RedirectURL: callbackURL, + }) + oidcCancel() + require.NoError(t, err) + + sessionMgr := usersession.NewManager(usersession.NewInMemoryStore(), usersession.CookieConfig{ + Name: "visiona_session", + Path: "/", + HTTPOnly: true, + SameSite: http.SameSiteLaxMode, + MaxAge: 86400, + SigningKey: []byte(fixtureSessionSecret), + }) + + // === 組 conversion service(模擬 main.go T7 wire 邏輯;mocks 替換真實 endpoint) === + // + // 注意:mc_token_client / converter_client / faa_client 都用 100ms timeout HTTPClient + // 避免測試卡死;對 mock servers 來說連線秒回,timeout 不會觸發。 + fastHTTP := &http.Client{Timeout: 5 * time.Second} + mcTokenClient := conversion.NewMCTokenClient(conversion.MCTokenClientOpts{ + Issuer: mc.srv.URL, + ClientID: "visiona-service-client", + ClientSecret: "visiona-service-secret", + HTTPClient: fastHTTP, + Logger: logger, + }) + converterAPIClient := conversion.NewConverterClient(conversion.ConverterClientOpts{ + BaseURL: conv.srv.URL, + Tokens: mcTokenClient, + HTTPClient: fastHTTP, + InitHTTPClient: &http.Client{Timeout: 60 * time.Second}, // 場景 #1 大 body 給寬一點 + Logger: logger, + }) + faaAPIClient := conversion.NewFAAClient(conversion.FAAClientOpts{ + BaseURL: faa.srv.URL, + Tokens: mcTokenClient, + HTTPClient: fastHTTP, + Logger: logger, + }) + ownership := conversion.NewOwnership(converterAPIClient, logger) + + modelRepo := model.NewInMemoryRepository() + modelStoreAdapter := newConversionModelStoreAdapter(modelRepo) + storageAdapter := newConversionStorageAdapter(storeStore) + + conversionService, err := conversion.NewService(conversion.FlowOpts{ + Converter: converterAPIClient, + FAA: faaAPIClient, + MCToken: mcTokenClient, + Ownership: ownership, + ModelStore: modelStoreAdapter, + Storage: storageAdapter, + TenantID: "tenant-visiona", + FAABaseURL: faa.srv.URL, + DelegatedTTLSeconds: 300, + Logger: logger, + }) + require.NoError(t, err) + + // === Build router(含 conversion) === + pairingStore := auth.NewInMemoryPairingStore() + router := api.NewRouter(api.Deps{ + Logger: logger, + PairingStore: pairingStore, + SessionTokenStore: auth.NewInMemorySessionTokenStore(), + // 不需要真實 SessionStore / Forwarder(conversion endpoint 不依賴); + // 但 NewRouter validate 需要某些非 nil 欄位 — 用 stub。 + SessionStore: session.NewProxyClientStore( + session.NewHTTPProxyClient("http://127.0.0.1:1", logger), + session.NewForwarder("http://127.0.0.1:1", logger), + ), + Forwarder: session.NewForwarder("http://127.0.0.1:1", logger), + DeviceRepo: device.NewInMemoryRepository(), + ModelRepo: modelRepo, + Storage: storeStore, + Converter: converter.NewStubClient(), + Conversion: conversionService, + MaxUploadSizeMB: 0, + OIDCProvider: oidcProvider, + SessionManager: sessionMgr, + OIDCPostLoginURL: apiTS.URL, + }) + lazy.Set(router) + + return &conversionFixture{ + server: apiTS, + fakeOIDC: fakeOIDC, + conv: conv, + mc: mc, + faa: faa, + router: router, + } +} + +// AuthenticatedClient 走完整 OIDC login flow;複製自 oidc_test_helper_test.go 的 pattern +// 但綁本檔的 conversionFixture(fakeOIDC / apiServer)。 +// +// 不直接 reuse testFixture.AuthenticatedClient,因為那個綁的是 setupFixture 的 testFixture +// 結構;我們的 conversionFixture 是獨立 type。 +func (f *conversionFixture) AuthenticatedClient(t *testing.T, userID, email string) *http.Client { + t.Helper() + + f.fakeOIDC.SetNextIDTokenClaims(map[string]any{ + "sub": userID, + "email": email, + "name": userID, + }) + + jar, err := cookiejar.New(nil) + require.NoError(t, err) + flowClient := &http.Client{ + Jar: jar, + CheckRedirect: func(req *http.Request, via []*http.Request) error { + return http.ErrUseLastResponse + }, + Timeout: 10 * time.Second, + } + + loc := getExpect302(t, flowClient, f.server.URL+"/api/auth/login") + require.True(t, strings.HasPrefix(loc, f.fakeOIDC.URL+"/authorize"), + "login 應 302 to fakeOIDC /authorize,得 %s", loc) + + cb := f.fakeOIDC.SimulateAuthorizationFlow(t, loc) + _ = getExpect302(t, flowClient, cb) + + u, err := url.Parse(f.server.URL) + require.NoError(t, err) + var sess *http.Cookie + for _, c := range jar.Cookies(u) { + if c.Name == "visiona_session" { + sess = c + break + } + } + require.NotNil(t, sess, "expected visiona_session cookie") + + return &http.Client{Jar: jar, Timeout: 30 * time.Second} +} + +// ========================================================================== +// E2E #1:Streaming proxy 完整跑通 +// ========================================================================== + +// TestConversionE2E_StreamingProxy 驗 visionA 的 InitJob 真的 streaming — +// 用 io.Pipe 對 visionA 送大量 multipart body(~10MB),驗: +// +// 1. mock converter 收到的 body 解析後能取出 visionA 灌的 user_id(OIDC sub) +// 2. mock converter 收到的 model file 內容與 client 端送的 byte-perfect 一致 +// 3. response 201 + job_id +// 4. visionA backend 沒在記憶體 buffer 整個 body(透過 streaming 行為 + 沒 OOM 隱含驗證) +// +// 體積 10MB 而非 100MB:CI 上跑 race -count=3,每次都建 100MB buffer 太貴; +// 10MB 已能驗 streaming 行為(若 visionA 有 buffer 全 RAM,10MB 也會被測出來: +// io.Pipe 的 reader 卡住 → mock converter 永遠收不到完整 body → handler 200ms 內失敗)。 +func TestConversionE2E_StreamingProxy(t *testing.T) { + f := setupConversionFixture(t) + defer f.Close() + + const wantSub = "user-streaming-001" + client := f.AuthenticatedClient(t, wantSub, "stream@e2e.local") + + // 產生 ~10MB 隨機 model file content(mock 收到後比對 byte-perfect) + modelBytes := make([]byte, 10*1024*1024) + _, err := rand.Read(modelBytes) + require.NoError(t, err) + + // 用 io.Pipe + multipart.Writer 邊產 body 邊送(streaming;沒在記憶體組整個 body) + pr, pw := io.Pipe() + mw := multipart.NewWriter(pw) + contentType := mw.FormDataContentType() + + go func() { + defer pw.Close() + defer mw.Close() + // 順序:先寫 form fields,再寫 file(converter multer 慣例) + _ = mw.WriteField("model_id", "12345") + _ = mw.WriteField("version", "v1.0.0") + _ = mw.WriteField("platform", "720") + // 嘗試塞 user_id(攻擊者場景)— 驗 visionA 黑名單 + _ = mw.WriteField("user_id", "ATTACKER-OVERRIDE") + + fw, _ := mw.CreateFormFile("model", "yolov5s.onnx") + // chunked write(每次寫 64KB;確保走 streaming 路徑) + for i := 0; i < len(modelBytes); i += 64 * 1024 { + end := i + 64*1024 + if end > len(modelBytes) { + end = len(modelBytes) + } + if _, werr := fw.Write(modelBytes[i:end]); werr != nil { + return + } + } + }() + + req, err := http.NewRequest(http.MethodPost, f.server.URL+"/api/conversion/init", pr) + require.NoError(t, err) + req.Header.Set("Content-Type", contentType) + + resp, err := client.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + bodyBytes, _ := io.ReadAll(resp.Body) + require.Equal(t, http.StatusCreated, resp.StatusCode, "body=%s", string(bodyBytes)) + + // 驗 response shape + var apiResp map[string]any + require.NoError(t, json.Unmarshal(bodyBytes, &apiResp)) + require.Equal(t, true, apiResp["success"]) + data := apiResp["data"].(map[string]any) + jobID, _ := data["job_id"].(string) + require.NotEmpty(t, jobID) + require.Equal(t, "running", data["status"]) + + // 驗 mock converter 收到的 body 真的有解出 user_id(且為 visionA 灌的 OIDC sub) + gotBody, gotCT, gotLen := f.conv.initBodySnapshot() + require.NotZero(t, gotLen, "mock converter 應收到 non-empty body") + require.Greater(t, gotLen, int64(10*1024*1024), "body 至少 10MB(含 multipart overhead 應略大)") + + gotUserID, ok := parseMultipartField(gotCT, gotBody, "user_id") + require.True(t, ok, "mock converter 收到的 multipart 應含 user_id field") + require.Equal(t, wantSub, gotUserID, + "visionA 必須注入 OIDC sub 為 user_id(不能採用 client 端塞的 ATTACKER-OVERRIDE)") + + // 驗 model file 內容 byte-perfect(streaming forward 沒掉 byte / 沒亂改) + gotModel, ok := parseMultipartFile(gotCT, gotBody, "model") + require.True(t, ok, "mock converter 收到的 multipart 應含 model file") + require.Equal(t, len(modelBytes), len(gotModel), "model file 長度應 byte-perfect 一致") + require.True(t, bytes.Equal(modelBytes, gotModel), "model file content 應 byte-perfect 一致") + + require.Equal(t, int32(1), f.conv.initCallCount.Load(), "converter init 應被打 1 次") +} + +// ========================================================================== +// E2E #2:重啟恢復 lazy rebuild +// ========================================================================== + +// TestConversionE2E_LazyRebuildAfterRestart 驗 visionA backend 重啟後(in-memory +// ownership 全空),user 對 GET /active 仍能拿到 in_progress job — 透過對 converter +// ListInProgressJobs 觸發 lazy rebuild。 +// +// 流程: +// 1. 起 instance A、預先在 mock converter 端註冊 user X 有 1 個 in_progress job +// (模擬:user X 之前 init 過,但 visionA-backend 重啟導致 in-memory ownership 丟失) +// 2. user X 透過 instance A 對 /active 打第一次 → 回 has_active:true(lazy rebuild) +// 3. 驗 mock converter 的 ListInProgressJobs 被打 1 次 +// 4. user X 對 instance A 對 /active 打第二次 → 仍回 has_active:true,但 +// ListInProgressJobs **沒有**再被打(cache hit / rebuilt flag set) +// +// 注意:題目說「啟動 instance B(模擬重啟)— 沿用同一個 mock converter」;實作上 +// 「instance B」就是「重新 setupConversionFixture 但共用 mock converter」。但 instance A +// 從來沒有 init 過任何 job(題目 #2 的前提就是 in-memory ownership 全空),所以 instance A +// 本身已等同「重啟後的乾淨 instance」— 不需要真的開兩個 server,這樣場景測得更乾淨。 +func TestConversionE2E_LazyRebuildAfterRestart(t *testing.T) { + f := setupConversionFixture(t) + defer f.Close() + + const wantSub = "user-rebuild-002" + + // 預先在 mock converter 端註冊 user X 有 1 個 in_progress job(模擬 visionA 重啟前的狀態) + preexistingJobID := "job-preexisting-001" + createdAt := time.Now().Add(-1 * time.Hour).UTC() + f.conv.addInProgressJob(wantSub, preexistingJobID, createdAt) + + client := f.AuthenticatedClient(t, wantSub, "rebuild@e2e.local") + + // 第一次 /active → 觸發 lazy rebuild(visionA 對 converter 打 ListInProgressJobs) + resp1 := getJSONReq(t, client, f.server.URL+"/api/conversion/active") + require.Equal(t, http.StatusOK, resp1.status, "body=%v", resp1.body) + require.Equal(t, true, resp1.body["success"]) + data1 := resp1.body["data"].(map[string]any) + assert.Equal(t, true, data1["has_active"], + "lazy rebuild 後應拿到 active job(visionA 從 converter 重建 ownership)") + job1 := data1["job"].(map[string]any) + assert.Equal(t, preexistingJobID, job1["job_id"], + "應拿回 mock 端那個預先註冊的 job_id") + + // list 應被打 1 次 + listCount1 := f.conv.listJobsCallCount.Load() + require.Equal(t, int32(1), listCount1, "lazy rebuild 應打 ListInProgressJobs 1 次") + + // 第二次 /active → 走 cache(rebuilt flag set),不再打 list + resp2 := getJSONReq(t, client, f.server.URL+"/api/conversion/active") + require.Equal(t, http.StatusOK, resp2.status) + data2 := resp2.body["data"].(map[string]any) + assert.Equal(t, true, data2["has_active"], "第二次仍應有 active") + + listCount2 := f.conv.listJobsCallCount.Load() + assert.Equal(t, listCount1, listCount2, + "第二次 /active 應走 cache,不再打 ListInProgressJobs(實際多了 %d 次)", + listCount2-listCount1) +} + +// ========================================================================== +// E2E #3:Download 302 redirect +// ========================================================================== + +// TestConversionE2E_Download302Redirect 驗: +// +// 1. user X 對 completed job 打 /download → status 302 Found +// 2. Location header 是 /files/?access_token= +// 3. Cache-Control: no-store, no-cache, ... +// 4. response body 不含 token 字串(grep response body 找不到 token) +// 5. 不是 c.JSON 回 download_url(純 redirect — content-type 不是 application/json) +// +// 流程: +// - 起 fixture +// - user X 透過 init 建一個 job(mock 會自動建 running job) +// - 把 mock converter 端 job 改成 completed +// - 對 /download 打 — client 設 ErrUseLastResponse 不 follow redirect +func TestConversionE2E_Download302Redirect(t *testing.T) { + f := setupConversionFixture(t) + defer f.Close() + + const wantSub = "user-download-003" + client := f.AuthenticatedClient(t, wantSub, "download@e2e.local") + + // 1. 先 init 一個 job(讓 visionA 端寫 ownership) + jobID := initSimpleJob(t, client, f.server.URL) + + // 2. 把 mock converter 端 job 推進到 completed(給 download 用) + f.conv.markJobCompleted(jobID) + + // 3. 用「不 follow redirect」的 client 對 /download 打 + noRedirectClient := &http.Client{ + Jar: client.Jar, + Timeout: 10 * time.Second, + CheckRedirect: func(req *http.Request, via []*http.Request) error { + return http.ErrUseLastResponse + }, + } + + resp, err := noRedirectClient.Get(f.server.URL + "/api/conversion/" + jobID + "/download") + require.NoError(t, err) + defer resp.Body.Close() + + bodyBytes, _ := io.ReadAll(resp.Body) + + // 驗 status 302 + require.Equal(t, http.StatusFound, resp.StatusCode, "body=%s", string(bodyBytes)) + + // 驗 Location 指向 mock FAA + 帶 access_token + location := resp.Header.Get("Location") + require.NotEmpty(t, location) + require.True(t, strings.HasPrefix(location, f.faa.srv.URL+"/files/"), + "Location 應指向 mock FAA /files/,得 %s", location) + require.Contains(t, location, "access_token=", "Location 應帶 access_token query") + + // 驗 token 真的在 query 中且 == mock 端發出的 token + parsed, err := url.Parse(location) + require.NoError(t, err) + gotToken := parsed.Query().Get("access_token") + require.NotEmpty(t, gotToken) + f.mc.mu.Lock() + wantToken := f.mc.lastDelegatedToken + f.mc.mu.Unlock() + require.Equal(t, wantToken, gotToken, + "Location 帶的 access_token 應 == mock MC 簽發的 token") + + // 驗 Cache-Control: no-store + cc := resp.Header.Get("Cache-Control") + require.Contains(t, cc, "no-store", "Cache-Control 應含 no-store,得 %s", cc) + + // 驗 response 不是用 c.JSON 回(純 server-side 302,§10.4 token 不過 frontend JS) + // + // 行為說明(不要過度斷言「整個 body 不能含 token」): + // net/http.Redirect 的標準 fallback 會在 HTML body 塞一個 Found + // 讓不支援 302 的 user-agent 還能手動點。**這是 net/http 的標準行為,不是 visionA + // 把 token 寫進 JSON 給 frontend JS**。token 仍只活在 Location header 與 HTML + // anchor 中,frontend JS 沒辦法用通常的 fetch().json() 讀到(需要 parse HTML)。 + // + // §10.4 安全聲明的精神是: + // - 不用 c.JSON 回 download_url(避免 JS 直接 .data.download_url 拿到 token) + // - Cache-Control: no-store 避免 browser 把 Location 寫 disk cache + // 兩者本檔都驗。 + // + // 因此這裡的斷言改為: + // 1. content-type 不是 application/json(沒用 c.JSON) + // 2. body 不是 visionA 的 success envelope({success: true, data: {download_url: ...}}) + bodyStr := string(bodyBytes) + ct := resp.Header.Get("Content-Type") + if ct != "" { + assert.NotContains(t, strings.ToLower(ct), "application/json", + "302 response 不應為 JSON(用了 c.JSON 而非 c.Redirect);得 content-type=%s", ct) + } + // 確認 body 不是「visionA 包好的 JSON envelope 帶 download_url」 + // (這正是 §10.4 不要的形式) + var maybeEnvelope map[string]any + if json.Unmarshal(bodyBytes, &maybeEnvelope) == nil { + if data, ok := maybeEnvelope["data"].(map[string]any); ok { + _, hasURL := data["download_url"] + assert.False(t, hasURL, + "response body 不應為 {success:..., data:{download_url:...}} 形式(用了 c.JSON);body=%s", + bodyStr) + } + } +} + +// ========================================================================== +// E2E #4:Active job 409 衝突 +// ========================================================================== + +// TestConversionE2E_ActiveJobConflict 驗:同一 user 在 visionA 端有 active job 時, +// 第二個 init 應回 409 + body 含 active_job 詳情。 +// +// 流程: +// 1. user X 第一個 init → 200 + 取得 jobID1(visionA 寫 ownership) +// 2. user X 第二個 init → visionA pre-check 命中 ownership.ActiveJobOf(userID) 不為空 +// → flow.checkActiveJob 對 mock converter GetJob jobID1 → status=running(active) +// → 回 *ActiveJobError,handler 包成 409 + extra.active_job +// 3. 驗 status 409 + body.error.code == "active_job_exists" + extra.active_job.job_id == jobID1 +// +// 注意:題目原本要求「mock converter 第二次回 409 user_has_active_job」 — +// 但實際上 visionA pre-check 會在打 converter 之前就 short-circuit(§9.3 流程圖): +// 因此第二個 init 根本不會打到 converter init endpoint。這個行為更安全(少一次浪費 round-trip), +// 我們驗 visionA 自己的 pre-check 有效,並驗 active_job extra payload。 +// +// 若要驗「converter 端也有同樣保護」由 internal/conversion/converter_client_test.go +// 的 ActiveJobError mapping test 涵蓋(T3 已驗)。 +func TestConversionE2E_ActiveJobConflict(t *testing.T) { + f := setupConversionFixture(t) + defer f.Close() + + const wantSub = "user-conflict-004" + client := f.AuthenticatedClient(t, wantSub, "conflict@e2e.local") + + // 第一次 init → 應 201 + jobID1 := initSimpleJob(t, client, f.server.URL) + require.NotEmpty(t, jobID1) + + // 第二次 init → 應撞 409 + resp2 := postSimpleInit(t, client, f.server.URL) + defer resp2.Body.Close() + body2, _ := io.ReadAll(resp2.Body) + require.Equal(t, http.StatusConflict, resp2.StatusCode, + "第二次 init 應 409;body=%s", string(body2)) + + var apiResp map[string]any + require.NoError(t, json.Unmarshal(body2, &apiResp)) + require.Equal(t, false, apiResp["success"]) + errObj := apiResp["error"].(map[string]any) + assert.Equal(t, "active_job_exists", errObj["code"]) + + // extra.active_job.job_id 應為 jobID1 + extra, ok := errObj["extra"].(map[string]any) + require.True(t, ok, "error.extra 應存在(帶 active_job 詳情),實際 error=%v", errObj) + activeJob, ok := extra["active_job"].(map[string]any) + require.True(t, ok, "extra.active_job 應為 object") + assert.Equal(t, jobID1, activeJob["job_id"], + "active_job.job_id 應指向第一個 init 建立的 job") + + // 驗第二次 init **沒有真的打到** mock converter init endpoint(visionA pre-check 短路) + assert.Equal(t, int32(1), f.conv.initCallCount.Load(), + "第二次 init 應被 visionA pre-check 短路;mock converter init 應只被打 1 次(實際 %d 次)", + f.conv.initCallCount.Load()) +} + +// ========================================================================== +// 共用 helper +// ========================================================================== + +// initSimpleJob 對 visionA 送一個 minimal multipart init request,回 jobID。 +func initSimpleJob(t *testing.T, client *http.Client, baseURL string) string { + t.Helper() + resp := postSimpleInit(t, client, baseURL) + defer resp.Body.Close() + body, _ := io.ReadAll(resp.Body) + require.Equal(t, http.StatusCreated, resp.StatusCode, "init body=%s", string(body)) + var apiResp map[string]any + require.NoError(t, json.Unmarshal(body, &apiResp)) + data := apiResp["data"].(map[string]any) + id, _ := data["job_id"].(string) + require.NotEmpty(t, id) + return id +} + +// postSimpleInit 送一個 minimal multipart init;回 raw response(caller defer Close)。 +func postSimpleInit(t *testing.T, client *http.Client, baseURL string) *http.Response { + t.Helper() + var buf bytes.Buffer + mw := multipart.NewWriter(&buf) + _ = mw.WriteField("model_id", "12345") + _ = mw.WriteField("version", "v1.0.0") + _ = mw.WriteField("platform", "720") + fw, _ := mw.CreateFormFile("model", "yolov5s.onnx") + _, _ = fw.Write([]byte("dummy-onnx-bytes")) + _ = mw.Close() + + req, err := http.NewRequest(http.MethodPost, baseURL+"/api/conversion/init", &buf) + require.NoError(t, err) + req.Header.Set("Content-Type", mw.FormDataContentType()) + resp, err := client.Do(req) + require.NoError(t, err) + return resp +} + +// getJSONReq 對 client 打 GET 並 parse JSON。複製自 oidc_e2e_test 的 getJSON +// 但獨立命名避免 helper 命名衝突。 +func getJSONReq(t *testing.T, client *http.Client, target string) jsonResp { + t.Helper() + resp, err := client.Get(target) + require.NoError(t, err) + defer resp.Body.Close() + body, _ := io.ReadAll(resp.Body) + out := jsonResp{status: resp.StatusCode, body: map[string]any{}} + if len(body) > 0 { + _ = json.Unmarshal(body, &out.body) + } + return out +} + +// parseMultipartField 從 raw multipart body 取 form field 的值。 +func parseMultipartField(contentType string, body []byte, fieldName string) (string, bool) { + parts, ok := iterMultipart(contentType, body) + if !ok { + return "", false + } + for _, p := range parts { + if p.name == fieldName && p.filename == "" { + return string(p.body), true + } + } + return "", false +} + +// parseMultipartFile 從 raw multipart body 取 file part 的內容。 +func parseMultipartFile(contentType string, body []byte, fieldName string) ([]byte, bool) { + parts, ok := iterMultipart(contentType, body) + if !ok { + return nil, false + } + for _, p := range parts { + if p.name == fieldName && p.filename != "" { + return p.body, true + } + } + return nil, false +} + +// multipartPart 是 iterMultipart 的中間結構。 +type multipartPart struct { + name string + filename string + body []byte +} + +// iterMultipart 解 raw multipart body → []multipartPart。 +func iterMultipart(contentType string, body []byte) ([]multipartPart, bool) { + _, params, err := mime.ParseMediaType(contentType) + if err != nil { + return nil, false + } + boundary := params["boundary"] + if boundary == "" { + return nil, false + } + mr := multipart.NewReader(bytes.NewReader(body), boundary) + out := make([]multipartPart, 0, 4) + for { + part, err := mr.NextPart() + if errors.Is(err, io.EOF) { + return out, true + } + if err != nil { + return nil, false + } + raw, _ := io.ReadAll(part) + _ = part.Close() + out = append(out, multipartPart{ + name: part.FormName(), + filename: part.FileName(), + body: raw, + }) + } +} + +// writeJSON 是 mock server handler 共用的 JSON 回應 helper。 +func writeJSON(w http.ResponseWriter, status int, v any) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + _ = json.NewEncoder(w).Encode(v) +} + +// randHex 產 n bytes random hex,給 mock token 用。 +func randHex(n int) string { + b := make([]byte, n) + _, _ = rand.Read(b) + return fmt.Sprintf("%x", b) +} diff --git a/visionA-backend/cmd/api-server/main.go b/visionA-backend/cmd/api-server/main.go index bec7bb0..0c616f2 100644 --- a/visionA-backend/cmd/api-server/main.go +++ b/visionA-backend/cmd/api-server/main.go @@ -27,6 +27,7 @@ import ( "visiona-backend/internal/api" "visiona-backend/internal/auth" "visiona-backend/internal/config" + "visiona-backend/internal/conversion" "visiona-backend/internal/converter" "visiona-backend/internal/device" "visiona-backend/internal/logger" @@ -136,6 +137,69 @@ func main() { // ===== Converter(stub,Phase 2 才實作) ===== converterClient := converter.NewStubClient() + // ===== Phase 0.8 Conversion(轉檔功能整合) ===== + // 對齊 .autoflow/04-architecture/conversion.md。 + // + // 啟用條件:cfg.Conversion.Enabled() — ConverterBaseURL + FAABaseURL 都非空。 + // 啟用時必須有 ServiceClientID/Secret(client_credentials grant 必要)。 + // 不啟用時 deps.Conversion 為 nil,5 個 endpoint 自動回 501(registerConversionRoutes 處理)。 + var conversionService conversion.Service + if cfg.Conversion.Enabled() { + // service token 機制依賴 ServiceClientID/Secret — 沒設就 fatal,避免半設定狀態 + if cfg.OIDC.ServiceClientID == "" || cfg.OIDC.ServiceClientSecret == "" { + log.Error("conversion enabled but service client credentials missing", + "hint", "set VISIONA_OIDC_SERVICE_CLIENT_ID + VISIONA_OIDC_SERVICE_CLIENT_SECRET, or unset CONVERTER/FAA base URL to disable") + os.Exit(1) + } + + mcTokenClient := conversion.NewMCTokenClient(conversion.MCTokenClientOpts{ + Issuer: cfg.OIDC.IssuerURL, + ClientID: cfg.OIDC.ServiceClientID, + ClientSecret: cfg.OIDC.ServiceClientSecret, + Logger: log, + }) + converterAPIClient := conversion.NewConverterClient(conversion.ConverterClientOpts{ + BaseURL: cfg.Conversion.ConverterBaseURL, + Tokens: mcTokenClient, + Logger: log, + }) + faaAPIClient := conversion.NewFAAClient(conversion.FAAClientOpts{ + BaseURL: cfg.Conversion.FAABaseURL, + Tokens: mcTokenClient, + Logger: log, + }) + ownership := conversion.NewOwnership(converterAPIClient, log) + + // narrow adapter(避免 conversion 直接 import internal/model / internal/storage) + modelStoreAdapter := newConversionModelStoreAdapter(modelRepo) + storageAdapter := newConversionStorageAdapter(storageStore) + + var convErr error + conversionService, convErr = conversion.NewService(conversion.FlowOpts{ + Converter: converterAPIClient, + FAA: faaAPIClient, + MCToken: mcTokenClient, + Ownership: ownership, + ModelStore: modelStoreAdapter, + Storage: storageAdapter, + TenantID: cfg.Conversion.TenantID, + FAABaseURL: cfg.Conversion.FAABaseURL, + DelegatedTTLSeconds: cfg.Conversion.DelegatedTTLSeconds, + Logger: log, + }) + if convErr != nil { + log.Error("failed to init conversion service", "error", convErr) + os.Exit(1) + } + log.Info("conversion service initialized", + "converter_base_url", cfg.Conversion.ConverterBaseURL, + "faa_base_url", cfg.Conversion.FAABaseURL, + "tenant_id", cfg.Conversion.TenantID, + "delegated_ttl_sec", cfg.Conversion.DelegatedTTLSeconds) + } else { + log.Info("conversion service disabled (set VISIONA_CONVERTER_BASE_URL + VISIONA_FAA_BASE_URL to enable)") + } + // ===== Seed demo data(可選) ===== if cfg.Server.SeedDemoData { if err := seedDemoData(deviceRepo, modelRepo, pairingStore, cfg.Auth.StaticUserID, log); err != nil { @@ -157,6 +221,7 @@ func main() { ModelRepo: modelRepo, Storage: storageStore, Converter: converterClient, + Conversion: conversionService, // Phase 0.8(nil 時 /api/conversion/* 回 501) MaxUploadSizeMB: cfg.Model.MaxSizeMB, CORSAllowedOrigins: cfg.CORS.AllowedOrigins, RelayPublicURL: cfg.Server.RelayPublicURL, diff --git a/visionA-backend/internal/api/api.go b/visionA-backend/internal/api/api.go index c1088db..6370b00 100644 --- a/visionA-backend/internal/api/api.go +++ b/visionA-backend/internal/api/api.go @@ -23,6 +23,7 @@ import ( "github.com/gin-gonic/gin" "visiona-backend/internal/auth" + "visiona-backend/internal/conversion" "visiona-backend/internal/converter" "visiona-backend/internal/device" "visiona-backend/internal/model" @@ -69,6 +70,13 @@ type Deps struct { Storage storage.Store Converter converter.Client + // Conversion 是 Phase 0.8 轉檔功能的 Service interface(5 個 endpoint 共用)。 + // 為 nil 時 /api/conversion/* 5 個 endpoint 全回 501 NOT_IMPLEMENTED + // (main.go 在 cfg.Conversion.Enabled() 為 false 時不 wire),對齊 api-conversion.md。 + // + // 設計選擇:用 conversion.Service interface 而非 concrete type — 方便 unit test 注入 stub。 + Conversion conversion.Service + // CORSAllowedOrigins 是允許的瀏覽器 Origin 白名單;空 slice 預設放行 // http://localhost:3000(前端 dev server)。 CORSAllowedOrigins []string @@ -174,6 +182,10 @@ func NewRouter(deps Deps) *gin.Engine { registerModelRoutes(apiGroup, deps) registerClusterRoutes(apiGroup, deps) + // Phase 0.8:Conversion(轉檔)— 5 個 endpoint + // 對齊 .autoflow/04-architecture/api/api-conversion.md + registerConversionRoutes(apiGroup, deps) + // Stubs(只註冊「還沒有實際 handler」的那些 endpoint) registerStubRoutes(apiGroup, deps) diff --git a/visionA-backend/internal/api/conversion.go b/visionA-backend/internal/api/conversion.go new file mode 100644 index 0000000..d7221ff --- /dev/null +++ b/visionA-backend/internal/api/conversion.go @@ -0,0 +1,469 @@ +// conversion.go — /api/conversion/* 的 handler 實作(Phase 0.8)。 +// +// 對齊: +// - .autoflow/04-architecture/api/api-conversion.md(5 個 endpoint API spec) +// - .autoflow/04-architecture/conversion.md §3 endpoint 表 + §6 錯誤碼 + §10 安全考量 +// - internal/conversion/conversion.go(Service interface) +// +// 5 個 endpoint: +// +// POST /api/conversion/init — 啟動轉檔(multipart streaming) +// GET /api/conversion/active — 查當前 active job +// GET /api/conversion/{job_id} — poll 狀態 +// POST /api/conversion/{job_id}/promote-to-models — 加到模型庫 +// GET /api/conversion/{job_id}/download — server-side 302 redirect → FAA +// +// 安全要點(對齊 conversion.md §7 / §10): +// - 全部 5 個 endpoint 都註冊在 apiGroup(OIDC AuthMiddleware 之後) +// - userID 一律來自 UserContextFrom(c).UserID(從 cookie session 解出 OIDC sub) +// - 任何 client 帶來的 user_id(multipart form / JSON / query)一律忽略 +// - /init 不呼叫 c.MultipartForm() — 會 buffer 全 body 進 RAM / disk,破壞 streaming +// - /download 採 HTTP 302 Found;token 不出現在任何 JSON response(§10.4) +// +// Phase 0.8 conversion (見 .autoflow/04-architecture/api/api-conversion.md) + +package api + +import ( + "context" + "encoding/json" + "errors" + "net/http" + "strings" + + "github.com/gin-gonic/gin" + + "visiona-backend/internal/conversion" +) + +// ========================================================================== +// Route 註冊 +// ========================================================================== + +// registerConversionRoutes 註冊 /api/conversion/* 的 routes。 +// +// 由 NewRouter 在 apiGroup(OIDC AuthMiddleware 已套)下呼叫; +// 若 deps.Conversion 為 nil(Phase 0.8 conversion 未啟用,例如 dev 環境沒設 +// CONVERTER_BASE_URL / FAA_BASE_URL)→ 5 個 endpoint 一律回 501。 +func registerConversionRoutes(g *gin.RouterGroup, deps Deps) { + if deps.Conversion == nil { + // 未啟用 — 註冊 501 stub,避免 404(讓 frontend 拿到明確 NOT_IMPLEMENTED) + notImpl := func(c *gin.Context) { + WriteNotImplemented(c, "conversion service is not configured (set VISIONA_CONVERTER_BASE_URL + VISIONA_FAA_BASE_URL)") + } + conv := g.Group("/conversion") + conv.POST("/init", notImpl) + conv.GET("/active", notImpl) + conv.GET("/:job_id", notImpl) + conv.POST("/:job_id/promote-to-models", notImpl) + conv.GET("/:job_id/download", notImpl) + return + } + + conv := g.Group("/conversion") + conv.POST("/init", conversionInitHandler(deps)) + conv.GET("/active", conversionActiveHandler(deps)) + conv.GET("/:job_id", conversionGetHandler(deps)) + conv.POST("/:job_id/promote-to-models", conversionPromoteHandler(deps)) + conv.GET("/:job_id/download", conversionDownloadHandler(deps)) +} + +// ========================================================================== +// 1. POST /api/conversion/init +// ========================================================================== + +// conversionInitHandler 處理「啟動轉檔」請求。 +// +// 流程: +// 1. UserContextFrom 拿 OIDC sub(AuthMiddleware 已驗) +// 2. 驗 Content-Type 必須是 multipart/form-data(含 boundary) +// 3. 直接把 c.Request.Body + Content-Type 傳給 Service.InitJob +// (**不**呼叫 c.MultipartForm() — 會破壞 streaming) +// 4. 成功 → 201 + Job +// 5. 失敗 → 透過 handleConversionError 對應 sentinel mapping +// +// 對齊 api-conversion.md §1 + conversion.md §4.2 streaming proxy。 +func conversionInitHandler(deps Deps) gin.HandlerFunc { + return func(c *gin.Context) { + uc, ok := UserContextFrom(c) + if !ok || uc.UserID == "" { + // AuthMiddleware 已通過卻拿不到 UserContext — 設定錯誤 + WriteError(c, http.StatusInternalServerError, ErrCodeInternalError, + "missing user context (auth middleware misconfigured?)", nil) + return + } + + ct := c.GetHeader("Content-Type") + if !strings.HasPrefix(strings.ToLower(ct), "multipart/form-data") { + WriteError(c, http.StatusBadRequest, ErrCodeValidationFailed, + "Content-Type must be multipart/form-data with boundary", nil) + return + } + + // 把 raw body + Content-Type 傳給 Service;Service 內部處理 multipart streaming + // 重組(注入 user_id、黑名單 client 帶的 user_id)。見 conversion.md §4.2。 + in := conversion.InitJobInput{ + UserID: uc.UserID, + ContentType: ct, + Body: c.Request.Body, + ContentLength: c.Request.ContentLength, + } + + job, err := deps.Conversion.InitJob(c.Request.Context(), in) + if err != nil { + handleConversionError(c, err) + return + } + + // 成功 — 201 Created(對齊 RESTful 慣例:POST 建立資源用 201) + WriteSuccess(c, http.StatusCreated, jobToResponse(job)) + } +} + +// ========================================================================== +// 2. GET /api/conversion/active +// ========================================================================== + +// conversionActiveHandler 處理「查當前 active job」請求。 +// +// 對齊 api-conversion.md §5: +// - 有 active → 200 + {has_active: true, job: {...}} +// - 無 active → 200 + {has_active: false, job: null} +// +// 重啟恢復場景由 Service 內部 EnsureRebuilt 處理(lazy rebuild from converter); +// handler 對 frontend 完全透明。 +func conversionActiveHandler(deps Deps) gin.HandlerFunc { + return func(c *gin.Context) { + uc, ok := UserContextFrom(c) + if !ok || uc.UserID == "" { + WriteError(c, http.StatusInternalServerError, ErrCodeInternalError, + "missing user context (auth middleware misconfigured?)", nil) + return + } + + job, err := deps.Conversion.ActiveJob(c.Request.Context(), uc.UserID) + if err != nil { + handleConversionError(c, err) + return + } + + if job == nil { + WriteSuccess(c, http.StatusOK, gin.H{ + "has_active": false, + "job": nil, + }) + return + } + WriteSuccess(c, http.StatusOK, gin.H{ + "has_active": true, + "job": jobToResponse(job), + }) + } +} + +// ========================================================================== +// 3. GET /api/conversion/{job_id} +// ========================================================================== + +// conversionGetHandler 處理「poll job 狀態」請求。 +// +// 對齊 api-conversion.md §2。 +// 設計選擇:ownership 不符 / job 不存在都對應到 ErrJobNotFound(404)— +// 由 Service 層做安全 mapping(見 flow.go GetJob 註解:避免「forbidden vs not_found」 +// 差異枚舉合法 job_id)。 +func conversionGetHandler(deps Deps) gin.HandlerFunc { + return func(c *gin.Context) { + uc, ok := UserContextFrom(c) + if !ok || uc.UserID == "" { + WriteError(c, http.StatusInternalServerError, ErrCodeInternalError, + "missing user context (auth middleware misconfigured?)", nil) + return + } + + jobID := c.Param("job_id") + if jobID == "" { + WriteError(c, http.StatusBadRequest, ErrCodeValidationFailed, + "job_id is required", nil) + return + } + + job, err := deps.Conversion.GetJob(c.Request.Context(), uc.UserID, jobID) + if err != nil { + handleConversionError(c, err) + return + } + WriteSuccess(c, http.StatusOK, jobToResponse(job)) + } +} + +// ========================================================================== +// 4. POST /api/conversion/{job_id}/promote-to-models +// ========================================================================== + +// promoteRequest 是 promote-to-models 的 request body(對齊 api-conversion.md §3)。 +// +// `name` 是 Phase 0.8 wireframe §7.1 的單一欄位;可為空(Service 用 +// `{source_filename_stem}_{target_chip_lower}` fallback)。 +// `description` 雖在 schema 內但 Phase 0.8 不顯示給使用者,backend 接受但忽略。 +type promoteRequest struct { + Name string `json:"name"` + Description string `json:"description,omitempty"` // Phase 0.8 ignored, Phase 1 reserved +} + +// conversionPromoteHandler 處理「加到模型庫」請求。 +// +// 流程: +// 1. 驗 user / job_id +// 2. 解析 body(name 可空;body 整個可空) +// 3. Service.PromoteToModels:promote → FAA pull → models repo finalize +// 4. 成功 → 201 + PromoteResult +// 5. 冪等:同 jobID 重複 promote 由 Service 層處理(回既有 model record,也是 201) +func conversionPromoteHandler(deps Deps) gin.HandlerFunc { + return func(c *gin.Context) { + uc, ok := UserContextFrom(c) + if !ok || uc.UserID == "" { + WriteError(c, http.StatusInternalServerError, ErrCodeInternalError, + "missing user context (auth middleware misconfigured?)", nil) + return + } + + jobID := c.Param("job_id") + if jobID == "" { + WriteError(c, http.StatusBadRequest, ErrCodeValidationFailed, + "job_id is required", nil) + return + } + + // body optional — 沒帶或解析失敗都不擋(name 可由 Service fallback) + var body promoteRequest + if c.Request.Body != nil && c.Request.ContentLength != 0 { + // 寬鬆解析:JSON 解失敗只 log(不算 hard error,因為 name 可選) + if err := json.NewDecoder(c.Request.Body).Decode(&body); err != nil { + // 嚴格一點:JSON 格式錯誤回 400(避免 silent ignore 讓使用者困惑) + WriteError(c, http.StatusBadRequest, ErrCodeValidationFailed, + "invalid JSON body: "+err.Error(), nil) + return + } + } + + result, err := deps.Conversion.PromoteToModels(c.Request.Context(), uc.UserID, jobID, body.Name) + if err != nil { + handleConversionError(c, err) + return + } + WriteSuccess(c, http.StatusCreated, result) + } +} + +// ========================================================================== +// 5. GET /api/conversion/{job_id}/download +// ========================================================================== + +// conversionDownloadHandler 處理「下載」請求 — server-side HTTP 302 redirect。 +// +// 對齊 api-conversion.md §4 + conversion.md §3.1 / §10.4: +// - 成功:302 Found + Location: +// - 失敗:不 redirect,依 Accept header 回 JSON / HTML 錯誤 +// - Cache-Control: no-store — token 不該被 browser cache(即使是 302 Location) +// +// 仿 FAA TestSite `DownloadFileDirect` pattern:token 永遠不過 frontend JS。 +func conversionDownloadHandler(deps Deps) gin.HandlerFunc { + return func(c *gin.Context) { + uc, ok := UserContextFrom(c) + if !ok || uc.UserID == "" { + WriteError(c, http.StatusInternalServerError, ErrCodeInternalError, + "missing user context (auth middleware misconfigured?)", nil) + return + } + + jobID := c.Param("job_id") + if jobID == "" { + WriteError(c, http.StatusBadRequest, ErrCodeValidationFailed, + "job_id is required", nil) + return + } + + downloadURL, err := deps.Conversion.DownloadRedirectURL(c.Request.Context(), uc.UserID, jobID) + if err != nil { + // 錯誤情況不 redirect — 依 Accept header 回 JSON / HTML(WriteError 寫 JSON + // 已能滿足主要 case;anchor tag 觸發時 browser 會直接顯示 JSON 也 OK, + // Phase 0.8 不額外做 HTML 錯誤頁) + handleConversionError(c, err) + return + } + + // 防快取:避免 browser 把 302 + Location 寫入 history / disk cache(§10.4) + c.Header("Cache-Control", "no-store, no-cache, must-revalidate, max-age=0") + c.Header("Pragma", "no-cache") + // 302 Found(不用 301 — 301 可能被某些 browser 永久 cache) + c.Redirect(http.StatusFound, downloadURL) + } +} + +// ========================================================================== +// 錯誤處理 helper +// ========================================================================== + +// handleConversionError 把 conversion package 的 sentinel error 轉成統一 JSON 錯誤回應。 +// +// 對齊 conversion.md §6 mapping + api-conversion.md 錯誤碼總覽。 +// +// 特殊情況: +// - ActiveJobError:附帶 `extra.active_job` 給 frontend 顯示「你已有進行中任務」 +// - ConverterValidationError:附帶 details(fields)給 frontend 顯示具體欄位錯 +// - 其他:用 errorMessageFor 拿 user-friendly 訊息 +// +// HTTP status / error code 由 conversion.HTTPStatus / conversion.ErrorCode 決定, +// handler 不做二次 mapping。 +func handleConversionError(c *gin.Context, err error) { + if err == nil { + // defensive — caller bug + WriteError(c, http.StatusInternalServerError, ErrCodeInternalError, + "unknown error (handleConversionError called with nil)", nil) + return + } + + // ctx cancel / deadline — handler 不主動回(client 已斷線;gin 收到時通常已 abort) + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + // gin context aborted 時 c.Writer 仍可寫(但 client 看不到),保持簡單寫入 + WriteError(c, http.StatusServiceUnavailable, "request_cancelled", + "request cancelled or timed out", nil) + return + } + + status := conversion.HTTPStatus(err) + code := conversion.ErrorCode(err) + message := errorMessageFor(code) + + // ActiveJobError — 帶 active_job detail(前端可顯示「跳到該 job 進度頁」) + var aje *conversion.ActiveJobError + if errors.As(err, &aje) && aje != nil { + var jobJSON any + if aje.Job != nil { + jobJSON = jobToResponse(aje.Job) + } + writeConversionErrorWithExtra(c, status, code, message, nil, gin.H{ + "active_job": jobJSON, + }) + return + } + + // ConverterValidationError — 帶 details.fields + var cve *conversion.ConverterValidationError + if errors.As(err, &cve) && cve != nil { + details := make([]FieldError, 0, len(cve.Fields)) + for _, f := range cve.Fields { + details = append(details, FieldError{Field: f.Field, Message: f.Message}) + } + WriteError(c, status, code, message, details) + return + } + + // 一般 sentinel + WriteError(c, status, code, message, nil) +} + +// writeConversionErrorWithExtra 是 WriteError 的擴充版本 — 額外帶 extra 結構化資料。 +// +// 用於 ActiveJobError 等需要在 error body 內帶結構化 detail 的場景。 +// +// 為什麼不直接複用 errors.go 的 WriteError: +// WriteError 簽章是 (status, code, message, details []FieldError) — details 為陣列; +// ActiveJobError 要帶的是 object(active_job)。errors.go 的 ErrorDetail 已預留 Extra +// 欄位給此用途。 +func writeConversionErrorWithExtra(c *gin.Context, status int, code, message string, + details []FieldError, extra map[string]any, +) { + c.JSON(status, ErrorBody{ + Success: false, + Error: &ErrorDetail{ + Code: code, + Message: message, + Details: details, + RequestID: RequestIDFrom(c), + Extra: extra, + }, + }) +} + +// errorMessageFor 把 conversion error code 對應到 zh-TW user-friendly 訊息。 +// +// 對齊 api-conversion.md 錯誤碼總覽 i18n 預設訊息。 +// 真正的 i18n 切換在 frontend 處理(用 code 當 i18n key),backend 只回預設 zh-TW。 +func errorMessageFor(code string) string { + switch code { + case "validation_failed": + return "上傳的內容不符合要求" + case "unauthorized": + return "請先登入" + case "forbidden": + return "你無權存取此任務" + case "not_found": + return "任務不存在" + case "active_job_exists": + return "你目前已有進行中的轉檔任務" + case "job_not_completed": + return "任務尚未完成" + case "payload_too_large": + return "檔案超過大小限制" + case "converter_unavailable": + return "轉檔服務暫時無法使用" + case "faa_unavailable": + return "檔案存取服務暫時無法使用" + case "download_token_failed": + return "無法取得下載授權" + case "mc_token_unavailable": + return "無法取得下載授權,請重試" + case "idp_misconfigured": + return "系統設定錯誤,請聯絡支援" + case "idp_unavailable": + return "認證服務暫時無法使用" + case "service_busy": + return "系統繁忙,請稍後再試" + default: + return "內部錯誤" + } +} + +// ========================================================================== +// Response shape helper +// ========================================================================== + +// jobToResponse 把 internal *conversion.Job 轉成 api-conversion.md §1-2 規定的 JSON shape。 +// +// 直接用 gin.H(map)而非 struct — 為了讓 stage / progress / error_* 等選填欄位 +// 在「沒值」時可以直接省略(不出現在 JSON),符合 api-conversion.md §2 範例 +// (error_code: null vs 缺欄位 — 我們選缺欄位,frontend 用 nullable 邏輯處理)。 +// +// 時間欄位用 RFC3339(Go time.Time 預設 marshal 即 RFC3339)。 +func jobToResponse(j *conversion.Job) gin.H { + if j == nil { + return nil + } + out := gin.H{ + "job_id": j.JobID, + "status": j.Status, + "created_at": j.CreatedAt, + "updated_at": j.UpdatedAt, + "expires_at": j.ExpiresAt, + "progress": j.Progress, + "stage_progress": j.StageProgress, // T7 review M-2: 對齊 api-conversion.md §1 範例顯式列出 stage_progress(即使為 0) + } + // 選填欄位 — 有值才寫 + if j.Stage != "" { + out["stage"] = j.Stage + } + if j.SourceFilename != "" { + out["source_filename"] = j.SourceFilename + } + if j.TargetChip != "" { + out["target_chip"] = j.TargetChip + } + if j.ErrorCode != "" { + out["error_code"] = j.ErrorCode + } + if j.ErrorMessage != "" { + out["error_message"] = j.ErrorMessage + } + return out +} diff --git a/visionA-backend/internal/api/conversion_test.go b/visionA-backend/internal/api/conversion_test.go new file mode 100644 index 0000000..d9a1eb7 --- /dev/null +++ b/visionA-backend/internal/api/conversion_test.go @@ -0,0 +1,638 @@ +// conversion_test.go — handler-level unit tests for /api/conversion/*。 +// +// 用 in-package stub 實作 conversion.Service,測 handler 層轉接、路由註冊、 +// 錯誤對應的正確性。實際 Service 行為(multipart 重組、ownership rebuild、 +// promote → FAA pull → finalize)由 internal/conversion/*_test.go 覆蓋。 +// +// Phase 0.8 conversion (見 .autoflow/04-architecture/api/api-conversion.md) +package api + +import ( + "context" + "encoding/json" + "errors" + "io" + "net/http" + "net/http/httptest" + "strings" + "sync" + "testing" + "time" + + "github.com/gin-gonic/gin" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "visiona-backend/internal/conversion" +) + +// ========================================================================== +// Service stub +// ========================================================================== + +// stubConversionService 是 conversion.Service 的測試 stub。 +// +// 每個 method 都有對應的 InitJobFn / GetJobFn / ... 欄位,由 test case 注入想要的行為。 +// 沒注入的 method 預設回 (nil, nil) — 對應 method 不被呼叫的 case。 +// +// goroutine-safe:所有欄位由 test setup 階段一次性寫入,handler 呼叫時只讀。 +type stubConversionService struct { + mu sync.Mutex + + // 紀錄上一次呼叫的參數,給 test 驗 user_id 注入正確(trust boundary) + lastUserID string + lastJobID string + + InitJobFn func(ctx context.Context, in conversion.InitJobInput) (*conversion.Job, error) + GetJobFn func(ctx context.Context, userID, jobID string) (*conversion.Job, error) + ActiveJobFn func(ctx context.Context, userID string) (*conversion.Job, error) + PromoteFn func(ctx context.Context, userID, jobID, name string) (*conversion.PromoteResult, error) + DownloadFn func(ctx context.Context, userID, jobID string) (string, error) +} + +func (s *stubConversionService) InitJob(ctx context.Context, in conversion.InitJobInput) (*conversion.Job, error) { + s.mu.Lock() + s.lastUserID = in.UserID + s.mu.Unlock() + if s.InitJobFn == nil { + return nil, errors.New("stub: InitJobFn not set") + } + return s.InitJobFn(ctx, in) +} + +func (s *stubConversionService) GetJob(ctx context.Context, userID, jobID string) (*conversion.Job, error) { + s.mu.Lock() + s.lastUserID = userID + s.lastJobID = jobID + s.mu.Unlock() + if s.GetJobFn == nil { + return nil, errors.New("stub: GetJobFn not set") + } + return s.GetJobFn(ctx, userID, jobID) +} + +func (s *stubConversionService) ActiveJob(ctx context.Context, userID string) (*conversion.Job, error) { + s.mu.Lock() + s.lastUserID = userID + s.mu.Unlock() + if s.ActiveJobFn == nil { + return nil, errors.New("stub: ActiveJobFn not set") + } + return s.ActiveJobFn(ctx, userID) +} + +func (s *stubConversionService) PromoteToModels(ctx context.Context, userID, jobID, name string) (*conversion.PromoteResult, error) { + s.mu.Lock() + s.lastUserID = userID + s.lastJobID = jobID + s.mu.Unlock() + if s.PromoteFn == nil { + return nil, errors.New("stub: PromoteFn not set") + } + return s.PromoteFn(ctx, userID, jobID, name) +} + +func (s *stubConversionService) DownloadRedirectURL(ctx context.Context, userID, jobID string) (string, error) { + s.mu.Lock() + s.lastUserID = userID + s.lastJobID = jobID + s.mu.Unlock() + if s.DownloadFn == nil { + return "", errors.New("stub: DownloadFn not set") + } + return s.DownloadFn(ctx, userID, jobID) +} + +// ========================================================================== +// Fixture +// ========================================================================== + +// newConversionFixture 建一個只裝 conversion routes 的 gin engine。 +// +// 所有 handler 都跑在 injectStaticUserContext("demo-user", ...) 之後 — +// 模擬「user 已登入」場景;驗 AuthMiddleware 行為由 oidc_auth_test 負責。 +func newConversionFixture(t *testing.T, svc conversion.Service) *gin.Engine { + t.Helper() + r := gin.New() + r.Use(RequestIDMiddleware()) + r.Use(injectStaticUserContext("demo-user", "demo@example.com")) + g := r.Group("/api") + registerConversionRoutes(g, Deps{Conversion: svc}) + return r +} + +// sampleJob 是一個典型的成功 job — 給 happy path 用。 +func sampleJob() *conversion.Job { + now := time.Date(2026, 4, 30, 12, 0, 0, 0, time.UTC) + return &conversion.Job{ + JobID: "job-abc-123", + Status: "running", + Stage: "onnx", + Progress: 0, + StageProgress: 0, + CreatedAt: now, + UpdatedAt: now, + ExpiresAt: now.Add(7 * 24 * time.Hour), + SourceFilename: "yolov5s.onnx", + TargetChip: "720", + } +} + +// ========================================================================== +// 0. 共通:未啟用時 5 個 endpoint 全 501 +// ========================================================================== + +// TestConversion_Disabled_All501 — 當 deps.Conversion = nil 時,5 個 endpoint 全回 501。 +// +// 對齊 main.go:cfg.Conversion.Enabled() == false 時 deps.Conversion 為 nil。 +func TestConversion_Disabled_All501(t *testing.T) { + r := gin.New() + r.Use(RequestIDMiddleware()) + r.Use(injectStaticUserContext("demo-user", "")) + g := r.Group("/api") + registerConversionRoutes(g, Deps{Conversion: nil}) // 未啟用 + + cases := []struct { + method string + path string + }{ + {http.MethodPost, "/api/conversion/init"}, + {http.MethodGet, "/api/conversion/active"}, + {http.MethodGet, "/api/conversion/job-1"}, + {http.MethodPost, "/api/conversion/job-1/promote-to-models"}, + {http.MethodGet, "/api/conversion/job-1/download"}, + } + for _, c := range cases { + t.Run(c.method+" "+c.path, func(t *testing.T) { + req := httptest.NewRequest(c.method, c.path, nil) + if c.method == http.MethodPost { + req.Header.Set("Content-Type", "application/json") + } + w := httptest.NewRecorder() + r.ServeHTTP(w, req) + assert.Equal(t, http.StatusNotImplemented, w.Code, + "%s %s should be 501 when Conversion=nil; body=%s", c.method, c.path, w.Body.String()) + }) + } +} + +// ========================================================================== +// 1. POST /api/conversion/init +// ========================================================================== + +// TestConversion_Init_HappyPath — 成功 init 回 201 + Job。 +func TestConversion_Init_HappyPath(t *testing.T) { + job := sampleJob() + svc := &stubConversionService{ + InitJobFn: func(ctx context.Context, in conversion.InitJobInput) (*conversion.Job, error) { + // 驗 user_id 正確注入(trust boundary) + require.Equal(t, "demo-user", in.UserID) + require.NotEmpty(t, in.ContentType) + require.NotNil(t, in.Body) + // 驗 body 有內容(streaming reader 還沒被讀) + b, err := io.ReadAll(in.Body) + require.NoError(t, err) + require.Contains(t, string(b), "fake-multipart") + return job, nil + }, + } + r := newConversionFixture(t, svc) + + body := strings.NewReader("--xyz\r\nContent-Disposition: form-data; name=\"fake-multipart\"\r\n\r\ndata\r\n--xyz--\r\n") + req := httptest.NewRequest(http.MethodPost, "/api/conversion/init", body) + req.Header.Set("Content-Type", "multipart/form-data; boundary=xyz") + w := httptest.NewRecorder() + r.ServeHTTP(w, req) + + require.Equal(t, http.StatusCreated, w.Code, "body=%s", w.Body.String()) + + var sb SuccessBody + require.NoError(t, json.Unmarshal(w.Body.Bytes(), &sb)) + data := sb.Data.(map[string]any) + assert.Equal(t, "job-abc-123", data["job_id"]) + assert.Equal(t, "running", data["status"]) + assert.Equal(t, "yolov5s.onnx", data["source_filename"]) + assert.Equal(t, "720", data["target_chip"]) +} + +// TestConversion_Init_BadContentType — Content-Type 非 multipart/form-data 回 400。 +// +// 這擋下 client 傳 JSON 等錯誤格式(避免 Service 層白白讀完 body 才發現格式錯)。 +func TestConversion_Init_BadContentType(t *testing.T) { + svc := &stubConversionService{} // 不應該被呼叫 + r := newConversionFixture(t, svc) + + req := httptest.NewRequest(http.MethodPost, "/api/conversion/init", + strings.NewReader(`{"foo":"bar"}`)) + req.Header.Set("Content-Type", "application/json") // 錯誤 + w := httptest.NewRecorder() + r.ServeHTTP(w, req) + + assert.Equal(t, http.StatusBadRequest, w.Code) + assert.Contains(t, w.Body.String(), ErrCodeValidationFailed) + assert.Contains(t, w.Body.String(), "multipart/form-data") +} + +// TestConversion_Init_ActiveJobError — ActiveJobError 回 409 + extra.active_job。 +// +// 這個 case 驗 handleConversionError 對 errors.As(*ActiveJobError) 的特殊處理。 +func TestConversion_Init_ActiveJobError(t *testing.T) { + existingJob := &conversion.Job{ + JobID: "job-existing-456", + Status: "running", + Stage: "bie", + Progress: 45, + } + svc := &stubConversionService{ + InitJobFn: func(ctx context.Context, in conversion.InitJobInput) (*conversion.Job, error) { + return nil, &conversion.ActiveJobError{Job: existingJob} + }, + } + r := newConversionFixture(t, svc) + + req := httptest.NewRequest(http.MethodPost, "/api/conversion/init", + strings.NewReader("--xyz\r\n--xyz--\r\n")) + req.Header.Set("Content-Type", "multipart/form-data; boundary=xyz") + w := httptest.NewRecorder() + r.ServeHTTP(w, req) + + require.Equal(t, http.StatusConflict, w.Code, "body=%s", w.Body.String()) + + var eb ErrorBody + require.NoError(t, json.Unmarshal(w.Body.Bytes(), &eb)) + require.NotNil(t, eb.Error) + assert.Equal(t, "active_job_exists", eb.Error.Code) + require.NotNil(t, eb.Error.Extra) + activeJob, ok := eb.Error.Extra["active_job"].(map[string]any) + require.True(t, ok, "extra.active_job should be object; got %v", eb.Error.Extra) + assert.Equal(t, "job-existing-456", activeJob["job_id"]) +} + +// TestConversion_Init_ValidationError — ConverterValidationError 回 400 + details.fields。 +func TestConversion_Init_ValidationError(t *testing.T) { + svc := &stubConversionService{ + InitJobFn: func(ctx context.Context, in conversion.InitJobInput) (*conversion.Job, error) { + return nil, &conversion.ConverterValidationError{ + Fields: []conversion.ValidationFieldError{ + {Field: "model_id", Message: "must be 1-65535"}, + }, + Message: "validation failed", + } + }, + } + r := newConversionFixture(t, svc) + + req := httptest.NewRequest(http.MethodPost, "/api/conversion/init", + strings.NewReader("--xyz\r\n--xyz--\r\n")) + req.Header.Set("Content-Type", "multipart/form-data; boundary=xyz") + w := httptest.NewRecorder() + r.ServeHTTP(w, req) + + require.Equal(t, http.StatusBadRequest, w.Code, "body=%s", w.Body.String()) + var eb ErrorBody + require.NoError(t, json.Unmarshal(w.Body.Bytes(), &eb)) + assert.Equal(t, "validation_failed", eb.Error.Code) + require.Len(t, eb.Error.Details, 1) + assert.Equal(t, "model_id", eb.Error.Details[0].Field) +} + +// TestConversion_Init_ConverterUnavailable — 502 mapping。 +func TestConversion_Init_ConverterUnavailable(t *testing.T) { + svc := &stubConversionService{ + InitJobFn: func(ctx context.Context, in conversion.InitJobInput) (*conversion.Job, error) { + return nil, conversion.ErrConverterUnavailable + }, + } + r := newConversionFixture(t, svc) + req := httptest.NewRequest(http.MethodPost, "/api/conversion/init", + strings.NewReader("--xyz\r\n--xyz--\r\n")) + req.Header.Set("Content-Type", "multipart/form-data; boundary=xyz") + w := httptest.NewRecorder() + r.ServeHTTP(w, req) + + assert.Equal(t, http.StatusBadGateway, w.Code) + assert.Contains(t, w.Body.String(), "converter_unavailable") +} + +// ========================================================================== +// 2. GET /api/conversion/active +// ========================================================================== + +func TestConversion_Active_HasActive(t *testing.T) { + job := sampleJob() + svc := &stubConversionService{ + ActiveJobFn: func(ctx context.Context, userID string) (*conversion.Job, error) { + require.Equal(t, "demo-user", userID) + return job, nil + }, + } + r := newConversionFixture(t, svc) + + req := httptest.NewRequest(http.MethodGet, "/api/conversion/active", nil) + w := httptest.NewRecorder() + r.ServeHTTP(w, req) + + require.Equal(t, http.StatusOK, w.Code, "body=%s", w.Body.String()) + var sb SuccessBody + require.NoError(t, json.Unmarshal(w.Body.Bytes(), &sb)) + data := sb.Data.(map[string]any) + assert.Equal(t, true, data["has_active"]) + jobMap, ok := data["job"].(map[string]any) + require.True(t, ok) + assert.Equal(t, "job-abc-123", jobMap["job_id"]) +} + +func TestConversion_Active_NoActive(t *testing.T) { + svc := &stubConversionService{ + ActiveJobFn: func(ctx context.Context, userID string) (*conversion.Job, error) { + return nil, nil + }, + } + r := newConversionFixture(t, svc) + + req := httptest.NewRequest(http.MethodGet, "/api/conversion/active", nil) + w := httptest.NewRecorder() + r.ServeHTTP(w, req) + + require.Equal(t, http.StatusOK, w.Code) + var sb SuccessBody + require.NoError(t, json.Unmarshal(w.Body.Bytes(), &sb)) + data := sb.Data.(map[string]any) + assert.Equal(t, false, data["has_active"]) + assert.Nil(t, data["job"]) +} + +func TestConversion_Active_ConverterUnavailable(t *testing.T) { + svc := &stubConversionService{ + ActiveJobFn: func(ctx context.Context, userID string) (*conversion.Job, error) { + return nil, conversion.ErrConverterUnavailable + }, + } + r := newConversionFixture(t, svc) + + req := httptest.NewRequest(http.MethodGet, "/api/conversion/active", nil) + w := httptest.NewRecorder() + r.ServeHTTP(w, req) + assert.Equal(t, http.StatusBadGateway, w.Code) +} + +// ========================================================================== +// 3. GET /api/conversion/{job_id} +// ========================================================================== + +func TestConversion_Get_HappyPath(t *testing.T) { + job := sampleJob() + svc := &stubConversionService{ + GetJobFn: func(ctx context.Context, userID, jobID string) (*conversion.Job, error) { + require.Equal(t, "demo-user", userID) + require.Equal(t, "job-abc-123", jobID) + return job, nil + }, + } + r := newConversionFixture(t, svc) + + req := httptest.NewRequest(http.MethodGet, "/api/conversion/job-abc-123", nil) + w := httptest.NewRecorder() + r.ServeHTTP(w, req) + + require.Equal(t, http.StatusOK, w.Code, "body=%s", w.Body.String()) + var sb SuccessBody + require.NoError(t, json.Unmarshal(w.Body.Bytes(), &sb)) + data := sb.Data.(map[string]any) + assert.Equal(t, "job-abc-123", data["job_id"]) + assert.Equal(t, "running", data["status"]) +} + +func TestConversion_Get_NotFound(t *testing.T) { + svc := &stubConversionService{ + GetJobFn: func(ctx context.Context, userID, jobID string) (*conversion.Job, error) { + return nil, conversion.ErrJobNotFound + }, + } + r := newConversionFixture(t, svc) + + req := httptest.NewRequest(http.MethodGet, "/api/conversion/missing-job", nil) + w := httptest.NewRecorder() + r.ServeHTTP(w, req) + + assert.Equal(t, http.StatusNotFound, w.Code) + assert.Contains(t, w.Body.String(), "not_found") +} + +// ========================================================================== +// 4. POST /api/conversion/{job_id}/promote-to-models +// ========================================================================== + +func TestConversion_Promote_HappyPath(t *testing.T) { + now := time.Date(2026, 4, 30, 12, 30, 0, 0, time.UTC) + res := &conversion.PromoteResult{ + ModelID: "model-xyz", + Source: "converted", + SourceJobID: "job-abc-123", + Name: "yolo_kl720", + TargetChip: "kl720", + FileSize: 12345, + Status: "ready", + CreatedAt: now, + } + svc := &stubConversionService{ + PromoteFn: func(ctx context.Context, userID, jobID, name string) (*conversion.PromoteResult, error) { + require.Equal(t, "demo-user", userID) + require.Equal(t, "job-abc-123", jobID) + require.Equal(t, "yolo_kl720", name) + return res, nil + }, + } + r := newConversionFixture(t, svc) + + req := httptest.NewRequest(http.MethodPost, "/api/conversion/job-abc-123/promote-to-models", + strings.NewReader(`{"name":"yolo_kl720"}`)) + req.Header.Set("Content-Type", "application/json") + w := httptest.NewRecorder() + r.ServeHTTP(w, req) + + require.Equal(t, http.StatusCreated, w.Code, "body=%s", w.Body.String()) + + var sb SuccessBody + require.NoError(t, json.Unmarshal(w.Body.Bytes(), &sb)) + data := sb.Data.(map[string]any) + assert.Equal(t, "model-xyz", data["model_id"]) + assert.Equal(t, "converted", data["source"]) + assert.Equal(t, "ready", data["status"]) +} + +// TestConversion_Promote_NoBody — 沒帶 body 也應該成功(name 可為空)。 +func TestConversion_Promote_NoBody(t *testing.T) { + svc := &stubConversionService{ + PromoteFn: func(ctx context.Context, userID, jobID, name string) (*conversion.PromoteResult, error) { + require.Equal(t, "", name) // body 沒帶 → name 為空,由 Service fallback + return &conversion.PromoteResult{ModelID: "m1", Source: "converted", Status: "ready"}, nil + }, + } + r := newConversionFixture(t, svc) + + req := httptest.NewRequest(http.MethodPost, "/api/conversion/job-abc/promote-to-models", nil) + w := httptest.NewRecorder() + r.ServeHTTP(w, req) + assert.Equal(t, http.StatusCreated, w.Code, "body=%s", w.Body.String()) +} + +func TestConversion_Promote_BadJSON(t *testing.T) { + svc := &stubConversionService{} // 不該被呼叫 + r := newConversionFixture(t, svc) + + req := httptest.NewRequest(http.MethodPost, "/api/conversion/job/promote-to-models", + strings.NewReader(`{not valid json`)) + req.Header.Set("Content-Type", "application/json") + w := httptest.NewRecorder() + r.ServeHTTP(w, req) + assert.Equal(t, http.StatusBadRequest, w.Code) + assert.Contains(t, w.Body.String(), ErrCodeValidationFailed) +} + +func TestConversion_Promote_JobNotCompleted(t *testing.T) { + svc := &stubConversionService{ + PromoteFn: func(ctx context.Context, userID, jobID, name string) (*conversion.PromoteResult, error) { + return nil, conversion.ErrJobNotCompleted + }, + } + r := newConversionFixture(t, svc) + + req := httptest.NewRequest(http.MethodPost, "/api/conversion/job-abc/promote-to-models", + strings.NewReader(`{}`)) + req.Header.Set("Content-Type", "application/json") + w := httptest.NewRecorder() + r.ServeHTTP(w, req) + assert.Equal(t, http.StatusConflict, w.Code) + assert.Contains(t, w.Body.String(), "job_not_completed") +} + +// ========================================================================== +// 5. GET /api/conversion/{job_id}/download +// ========================================================================== + +func TestConversion_Download_HappyPath302(t *testing.T) { + target := "http://192.168.0.130:5081/files/models/u/job.nef?access_token=opaque-xyz" + svc := &stubConversionService{ + DownloadFn: func(ctx context.Context, userID, jobID string) (string, error) { + require.Equal(t, "demo-user", userID) + require.Equal(t, "job-abc", jobID) + return target, nil + }, + } + r := newConversionFixture(t, svc) + + req := httptest.NewRequest(http.MethodGet, "/api/conversion/job-abc/download", nil) + w := httptest.NewRecorder() + r.ServeHTTP(w, req) + + assert.Equal(t, http.StatusFound, w.Code) // 302 + assert.Equal(t, target, w.Header().Get("Location")) + + // 防快取 header — token 不該被 browser cache(§10.4) + assert.Contains(t, w.Header().Get("Cache-Control"), "no-store") + assert.Equal(t, "no-cache", w.Header().Get("Pragma")) +} + +func TestConversion_Download_JobNotCompleted(t *testing.T) { + svc := &stubConversionService{ + DownloadFn: func(ctx context.Context, userID, jobID string) (string, error) { + return "", conversion.ErrJobNotCompleted + }, + } + r := newConversionFixture(t, svc) + + req := httptest.NewRequest(http.MethodGet, "/api/conversion/job-abc/download", nil) + w := httptest.NewRecorder() + r.ServeHTTP(w, req) + + // 錯誤情況**不 redirect** — 回標準 JSON error + assert.Equal(t, http.StatusConflict, w.Code) + assert.Contains(t, w.Body.String(), "job_not_completed") + assert.NotEqual(t, http.StatusFound, w.Code, "error case must not 302 redirect") +} + +func TestConversion_Download_NotFound(t *testing.T) { + svc := &stubConversionService{ + DownloadFn: func(ctx context.Context, userID, jobID string) (string, error) { + return "", conversion.ErrJobNotFound + }, + } + r := newConversionFixture(t, svc) + + req := httptest.NewRequest(http.MethodGet, "/api/conversion/missing/download", nil) + w := httptest.NewRecorder() + r.ServeHTTP(w, req) + assert.Equal(t, http.StatusNotFound, w.Code) +} + +func TestConversion_Download_MCTokenUnavailable(t *testing.T) { + svc := &stubConversionService{ + DownloadFn: func(ctx context.Context, userID, jobID string) (string, error) { + return "", conversion.ErrMCTokenUnavailable + }, + } + r := newConversionFixture(t, svc) + + req := httptest.NewRequest(http.MethodGet, "/api/conversion/job/download", nil) + w := httptest.NewRecorder() + r.ServeHTTP(w, req) + assert.Equal(t, http.StatusBadGateway, w.Code) + assert.Contains(t, w.Body.String(), "mc_token_unavailable") +} + +// ========================================================================== +// User_id trust boundary +// ========================================================================== + +// TestConversion_Init_IgnoresClientUserID — 即使 multipart form 帶 user_id,handler +// 仍只把 cookie session 的 UserID 傳給 Service。 +// +// 這是 trust boundary 的回歸測試(conversion.md §7)。實際 multipart 重組 / 黑名單 +// 邏輯在 Service 層做(flow.go rebuildMultipart),但 handler 必須確保傳給 Service 的 +// InitJobInput.UserID 永遠是 UserContext 的,不是 client 提供的。 +func TestConversion_Init_IgnoresClientUserID(t *testing.T) { + svc := &stubConversionService{ + InitJobFn: func(ctx context.Context, in conversion.InitJobInput) (*conversion.Job, error) { + // 即使 client 在 multipart 內塞了 user_id=attacker,handler 給 Service 的 UserID + // 必須是 demo-user(從 UserContext 拿) + require.Equal(t, "demo-user", in.UserID) + return sampleJob(), nil + }, + } + r := newConversionFixture(t, svc) + + // 一個包含 user_id=attacker 的 multipart body — 應被忽略 + body := strings.NewReader( + "--xyz\r\n" + + "Content-Disposition: form-data; name=\"user_id\"\r\n\r\n" + + "attacker\r\n" + + "--xyz\r\n" + + "Content-Disposition: form-data; name=\"model\"\r\n\r\n" + + "data\r\n" + + "--xyz--\r\n", + ) + req := httptest.NewRequest(http.MethodPost, "/api/conversion/init", body) + req.Header.Set("Content-Type", "multipart/form-data; boundary=xyz") + w := httptest.NewRecorder() + r.ServeHTTP(w, req) + + require.Equal(t, http.StatusCreated, w.Code, "body=%s", w.Body.String()) +} + +// TestConversion_GetJob_IgnoresQueryUserID — query 帶 user_id 不影響 handler +// 傳給 Service 的 userID(仍是 UserContext 拿到的)。 +func TestConversion_GetJob_IgnoresQueryUserID(t *testing.T) { + svc := &stubConversionService{ + GetJobFn: func(ctx context.Context, userID, jobID string) (*conversion.Job, error) { + require.Equal(t, "demo-user", userID, "user_id from query must be ignored") + return sampleJob(), nil + }, + } + r := newConversionFixture(t, svc) + + req := httptest.NewRequest(http.MethodGet, "/api/conversion/job-abc?user_id=attacker", nil) + w := httptest.NewRecorder() + r.ServeHTTP(w, req) + require.Equal(t, http.StatusOK, w.Code) +} diff --git a/visionA-backend/internal/config/config.go b/visionA-backend/internal/config/config.go index 525a0b2..f5b0e58 100644 --- a/visionA-backend/internal/config/config.go +++ b/visionA-backend/internal/config/config.go @@ -21,6 +21,9 @@ type Config struct { Tunnel TunnelConfig Logger LoggerConfig CORS CORSConfig + // Conversion 控制 Phase 0.8 轉檔功能整合(converter / FAA / MC service token)。 + // 對齊 .autoflow/04-architecture/conversion.md §5.3。 + Conversion ConversionConfig } // ServerConfig 控制 HTTP listener 的位址與埠號。 @@ -177,6 +180,49 @@ type LoggerConfig struct { Level string // VISIONA_LOG_LEVEL:debug / info / warn / error,預設 "info" } +// ConversionConfig 控制 Phase 0.8 轉檔功能整合。 +// +// 對齊 .autoflow/04-architecture/conversion.md §5.3。 +// +// 啟用判定(由 main.go 在 wire 階段檢查):當 ConverterBaseURL 與 FAABaseURL 都非空時, +// 才會 wire conversion.Service 進 api.Deps。其中之一為空 → 不啟用(5 個 endpoint 回 501)。 +// +// 進一步:啟用時 ServiceClientID/Secret 必須非空(轉檔依賴 service token 機制); +// 不對齊時 main.go fatal log 退出(避免半設定狀態跑進生產)。 +type ConversionConfig struct { + // ConverterBaseURL 是 kneron_model_converter task-scheduler 服務的 base URL。 + // 例:http://192.168.0.130:9501(dev / stage) / https://converter.visiona.cloud(prod) + // 對齊 VISIONA_CONVERTER_BASE_URL;留空 = 不啟用 Phase 0.8 轉檔功能。 + ConverterBaseURL string + + // FAABaseURL 是 File Access Agent 的 base URL。 + // 例:http://192.168.0.130:5081(dev / stage) / https://faa.innovedus.com(prod) + // 對齊 VISIONA_FAA_BASE_URL;留空 = 不啟用 Phase 0.8 轉檔功能。 + FAABaseURL string + + // TenantID 是 visionA 在 Member Center 註冊的 tenant id(單一 tenant)。 + // 在跟 MC 換 delegated download token 時當 request body 的 tenant_id 欄位用。 + // 對齊 VISIONA_OIDC_TENANT_ID。 + TenantID string + + // DelegatedTTLSeconds 是 MC 簽 delegated download token 的 TTL(秒)。 + // 預設 300(5 分鐘);可調整範圍 60-900。對齊 VISIONA_FAA_DELEGATED_TTL_SECONDS。 + // 見 conversion.md §10.2 安全考量。 + DelegatedTTLSeconds int + + // MaxModelSizeMB 是 visionA-backend 端對上傳模型檔的大小上限(MB)。 + // 與 converter 端 limit 對齊(converter 預設 500 MB)。 + // 對齊 VISIONA_CONVERTER_MAX_MODEL_SIZE_MB;預設 500。 + MaxModelSizeMB int +} + +// Enabled 回傳 Phase 0.8 conversion 是否啟用。 +// +// main.go 在 wire 時用此判斷是否要 init conversion.Service。 +func (c ConversionConfig) Enabled() bool { + return c.ConverterBaseURL != "" && c.FAABaseURL != "" +} + // CORSConfig 控制 api-server 對瀏覽器的 CORS 白名單。 // // AllowedOrigins 為逗號分隔字串解析後的 slice; diff --git a/visionA-backend/internal/config/load.go b/visionA-backend/internal/config/load.go index a4bf1a4..85edc32 100644 --- a/visionA-backend/internal/config/load.go +++ b/visionA-backend/internal/config/load.go @@ -68,6 +68,14 @@ func Load() *Config { CORS: CORSConfig{ AllowedOrigins: getEnvStringSlice("VISIONA_CORS_ALLOWED_ORIGINS", nil), }, + // Phase 0.8 conversion (見 .autoflow/04-architecture/conversion.md §5.3) + Conversion: ConversionConfig{ + ConverterBaseURL: getEnvString("VISIONA_CONVERTER_BASE_URL", ""), + FAABaseURL: getEnvString("VISIONA_FAA_BASE_URL", ""), + TenantID: getEnvString("VISIONA_OIDC_TENANT_ID", ""), + DelegatedTTLSeconds: getEnvInt("VISIONA_FAA_DELEGATED_TTL_SECONDS", 300), + MaxModelSizeMB: getEnvInt("VISIONA_CONVERTER_MAX_MODEL_SIZE_MB", 500), + }, } } diff --git a/visionA-backend/internal/config/load_test.go b/visionA-backend/internal/config/load_test.go index a729dde..fc257ea 100644 --- a/visionA-backend/internal/config/load_test.go +++ b/visionA-backend/internal/config/load_test.go @@ -265,3 +265,66 @@ func TestLoad_CORSAllowedOrigins(t *testing.T) { cfg = Load() assert.Nil(t, cfg.CORS.AllowedOrigins) } + +// TestLoad_ConversionDefaults 驗證 Phase 0.8 conversion 欄位的預設行為。 +// +// 對齊 .autoflow/04-architecture/conversion.md §5.3:留空時 Enabled() 為 false, +// 5 個 endpoint 不會 wire(main.go 在 wire 階段會跳過)。 +func TestLoad_ConversionDefaults(t *testing.T) { + for _, k := range []string{ + "VISIONA_CONVERTER_BASE_URL", "VISIONA_FAA_BASE_URL", "VISIONA_OIDC_TENANT_ID", + "VISIONA_FAA_DELEGATED_TTL_SECONDS", "VISIONA_CONVERTER_MAX_MODEL_SIZE_MB", + } { + t.Setenv(k, "") + } + + cfg := Load() + assert.Empty(t, cfg.Conversion.ConverterBaseURL) + assert.Empty(t, cfg.Conversion.FAABaseURL) + assert.Empty(t, cfg.Conversion.TenantID) + assert.Equal(t, 300, cfg.Conversion.DelegatedTTLSeconds, "預設 5 分鐘 TTL") + assert.Equal(t, 500, cfg.Conversion.MaxModelSizeMB, "預設 500 MB(與 converter 對齊)") + assert.False(t, cfg.Conversion.Enabled(), "URL 全空 → 不啟用") +} + +// TestLoad_ConversionEnabled 驗證 Conversion.Enabled() 的判定邏輯。 +func TestLoad_ConversionEnabled(t *testing.T) { + cases := []struct { + name string + converter string + faa string + wantEnabled bool + }{ + {"both_set_enables", "http://converter:9501", "http://faa:5081", true}, + {"only_converter_disabled", "http://converter:9501", "", false}, + {"only_faa_disabled", "", "http://faa:5081", false}, + {"both_empty_disabled", "", "", false}, + } + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Setenv("VISIONA_CONVERTER_BASE_URL", tc.converter) + t.Setenv("VISIONA_FAA_BASE_URL", tc.faa) + cfg := Load() + assert.Equal(t, tc.wantEnabled, cfg.Conversion.Enabled()) + }) + } +} + +// TestLoad_ConversionAllSet 驗證所有欄位設定後正確讀取。 +func TestLoad_ConversionAllSet(t *testing.T) { + t.Setenv("VISIONA_CONVERTER_BASE_URL", "http://192.168.0.130:9501") + t.Setenv("VISIONA_FAA_BASE_URL", "http://192.168.0.130:5081") + t.Setenv("VISIONA_OIDC_TENANT_ID", "fake-tenant-id-for-test") + t.Setenv("VISIONA_FAA_DELEGATED_TTL_SECONDS", "600") + t.Setenv("VISIONA_CONVERTER_MAX_MODEL_SIZE_MB", "300") + + cfg := Load() + assert.Equal(t, "http://192.168.0.130:9501", cfg.Conversion.ConverterBaseURL) + assert.Equal(t, "http://192.168.0.130:5081", cfg.Conversion.FAABaseURL) + assert.Equal(t, "fake-tenant-id-for-test", cfg.Conversion.TenantID) + assert.Equal(t, 600, cfg.Conversion.DelegatedTTLSeconds) + assert.Equal(t, 300, cfg.Conversion.MaxModelSizeMB) + assert.True(t, cfg.Conversion.Enabled()) +} diff --git a/visionA-backend/internal/conversion/conversion.go b/visionA-backend/internal/conversion/conversion.go new file mode 100644 index 0000000..70209f7 --- /dev/null +++ b/visionA-backend/internal/conversion/conversion.go @@ -0,0 +1,164 @@ +// Package conversion 實作 Phase 0.8 轉檔功能整合。 +// +// 對齊文件: +// - .autoflow/02-prd/features/feature-converter-integration.md(PRD) +// - .autoflow/04-architecture/conversion.md(TDD 主文件) +// - .autoflow/04-architecture/api/api-conversion.md(API 規格) +// - .autoflow/04-architecture/adr/adr-014-conversion-integration.md(架構決策) +// +// 與 internal/converter/ 的關係: +// +// internal/converter/ 是 Phase 0 / Phase 2 規劃時 PM 寫的 stub interface, +// scope 與 Phase 0.8 不同(Phase 0 規劃的是「自動推入模型庫」端到端 flow)。 +// Phase 0.8 改為半自動 + streaming proxy + 三方 token 機制,重新設計 internal/conversion/ +// 實作;舊的 internal/converter/ 套件保留在 codebase 中(對 frontend / 其他模組無依賴), +// 等 Phase 0.8 整合完成後可由 Architect 評估是否清除。 +// +// 套件邊界: +// - 對 handler 層只暴露 `Service` interface(FAANG 慣例:DI-friendly、unit test 友善) +// - 內部模組(converter_client / faa_client / mc_token_client / ownership / flow)對 handler 不可見 +// - 所有 Phase 0.8 流程的協調點在 flow.go 的 `Flow` struct +// +// Phase 0.8 conversion (見 .autoflow/04-architecture/conversion.md §2) +package conversion + +import ( + "context" + "io" + "time" +) + +// Service 是 handler 層的單一進入點,匹配 5 個對外 endpoint 的能力。 +// +// 實作:`Flow`(flow.go)。 +// +// 設計原則: +// - 所有 method 第一個參數固定 ctx,第二個固定 userID(trust boundary)。 +// - 任何 client 帶來的 user_id 都不可信;userID 來源永遠是 OIDC AuthMiddleware +// 從 cookie session 解出的 sub(見 conversion.md §7.1)。 +// - 所有 method 都不接受 user_id 從 body / query / header 傳入。 +type Service interface { + // InitJob 把 client 的 multipart stream 透傳給 converter,建立轉檔 job。 + // + // 行為: + // 1. 內部用 io.Pipe + multipart.Reader/Writer 重組 multipart body + // (streaming proxy,避免 buffer 全 RAM;見 conversion.md §4.2) + // 2. 黑名單 client 帶來的 user_id field,永遠以 InitJobInput.UserID 為準 + // 3. 等到 converter 回 201 才 return(見 conversion.md §4.3.1, + // 不採用 early-return 模式以避免進度條假象) + // 4. 成功後寫 ownership:jobID → userID(converter 7d 過期對齊) + // + // 失敗處理: + // - converter 4xx → 透傳 error code(見 conversion.md §6 mapping) + // - converter 5xx / network → retry(見 §9.1) + // - client 中斷 / ctx cancel → goroutine cleanup + best-effort 對 converter 發 cancel + // (見 §4.3.2 cleanup 鏈) + InitJob(ctx context.Context, in InitJobInput) (*Job, error) + + // GetJob 查 converter 的 job 狀態,先做 ownership 檢查。 + // + // Frontend polling 場景;內部對 converter response cache 1-2s 避免 polling 直接打爆 converter。 + // + // 失敗處理: + // - ownership 不符 → ErrForbidden + // - job 不存在 → ErrJobNotFound + // - converter 5xx / network → 重試後仍失敗回 ErrConverterUnavailable + GetJob(ctx context.Context, userID, jobID string) (*Job, error) + + // PromoteToModels 執行「加到模型庫」流程。 + // + // 步驟(見 conversion.md §1 Stage 3a + §2.5): + // 1. ownership.Check(userID, jobID) + // 2. ensurePromoted(jobID) — 冪等:若已 promote 過用 cache,否則打 converter + // 3. faa.Download(promotedKey) — 用 service token (scope=files:download.read) server-to-server pull + // 4. 走既有 /api/models/init + /api/models/finalize(不繞過既有 handler 邏輯) + // 5. 回填 model.Source="converted" + model.SourceJobID=jobID(schema 已預埋) + // + // 冪等性:對同一 jobID 重複呼叫;若已建過 model record,回既有 modelID 而非新建。 + // + // `name` 是 Design Phase 0.8 wireframe §7.1 的單一欄位(不含 description)。 + PromoteToModels(ctx context.Context, userID, jobID, name string) (*PromoteResult, error) + + // DownloadRedirectURL 產出「下載」的 server-side 302 redirect URL。 + // + // Handler 拿到後直接 c.Redirect(http.StatusFound, url);token 不出現在任何 JSON response, + // 也不傳給 frontend JS(見 conversion.md §10.4 安全分析)。 + // + // 步驟(見 conversion.md §1 Stage 3b): + // 1. ownership 檢查 + // 2. ensurePromoted(與 PromoteToModels 共用 cache) + // 3. 對 MC POST /file-access/download-tokens 換 delegated token + // (scope=files:download.delegate, TTL 5 分鐘) + // 4. 組 https:///files/?access_token= + // + // 仿 FAA TestSite `DownloadFileDirect` pattern(見 conversion.md §3.1)。 + DownloadRedirectURL(ctx context.Context, userID, jobID string) (string, error) + + // ActiveJob 查 user 當前是否有 active job,給 frontend `/conversion` 頁載入時 pre-check。 + // + // 重啟恢復行為(A4 lazy rebuild,見 conversion.md §2.6.1): + // 1. 先查 in-memory ownership + // 2. miss 時 fallback 對 converter 打 GET /api/v1/jobs?user_id=&status=in_progress + // 3. 若 converter 有回覆 active job,重建 ownership 後 return + // + // 對 frontend 完全透明(同樣 endpoint、同樣 response shape)。 + // + // 沒有 active job 時回 (nil, nil),不視為 error。 + ActiveJob(ctx context.Context, userID string) (*Job, error) +} + +// ========================================================================== +// I/O types +// ========================================================================== + +// InitJobInput 是 handler 傳給 Service.InitJob 的 streaming proxy 輸入。 +// +// 設計原則: +// - Service 不關心 multipart 解析細節;handler 把 raw body 傳進來, +// 由 Service 內部處理 io.Pipe + multipart.Reader/Writer 的重組(見 conversion.md §4.2) +// - UserID 是唯一可信任的 user 身份來源(OIDC sub) +// - ContentType 必須含 boundary(multipart/form-data; boundary=...), +// handler 直接從 c.GetHeader("Content-Type") 取 +type InitJobInput struct { + UserID string // 由 AuthMiddleware UserContext.UserID 注入;唯一可信來源 + ContentType string // 含 boundary 的原始 Content-Type header 值 + Body io.Reader // request.Body + ContentLength int64 // request.ContentLength;converter 自己會算 multer,這裡僅供 log +} + +// Job 是轉檔任務的對外 response shape。 +// +// 對齊 api-conversion.md §1-2 的 response 欄位 + 三方 review 議題 #7 +// (補 expires_at / source_filename / target_chip)。 +// +// 注意:Job.Status / Job.Stage 用 converter 端的字面值(converted from openapi.yaml) +// 直接透傳給 frontend,不另做 mapping,避免 enum 同步成本: +// +// status: "created" / "running" / "completed" / "failed" +// stage: "onnx" / "bie" / "nef" +type Job struct { + JobID string `json:"job_id"` + Status string `json:"status"` + Stage string `json:"stage"` + Progress int `json:"progress"` // 0-100,整體 + StageProgress int `json:"stage_progress"` // 0-100,當前 stage + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` + ExpiresAt time.Time `json:"expires_at"` // created_at + 7d(converter GC 期限) + SourceFilename string `json:"source_filename,omitempty"` // 原始檔名(顯示用) + TargetChip string `json:"target_chip,omitempty"` // "520" / "720" / "630" / "730" + ErrorCode string `json:"error_code,omitempty"` + ErrorMessage string `json:"error_message,omitempty"` +} + +// PromoteResult 是 PromoteToModels 的 response shape,對齊 api-conversion.md §3。 +type PromoteResult struct { + ModelID string `json:"model_id"` + Source string `json:"source"` // 永遠是 "converted" + SourceJobID string `json:"source_job_id"` // converter job id + Name string `json:"name"` + TargetChip string `json:"target_chip,omitempty"` // 對齊 api-conversion.md §3 response + FileSize int64 `json:"file_size"` + Status string `json:"status"` // 沿用 model 既有 status("ready" 等) + CreatedAt time.Time `json:"created_at"` +} diff --git a/visionA-backend/internal/conversion/conversion_test.go b/visionA-backend/internal/conversion/conversion_test.go new file mode 100644 index 0000000..45a7656 --- /dev/null +++ b/visionA-backend/internal/conversion/conversion_test.go @@ -0,0 +1,151 @@ +package conversion + +import ( + "context" + "encoding/json" + "io" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// noopService 是一個 compile-time 驗證 — 用來確認 Service interface 的方法集合穩定。 +// 真實實作(Flow)會在 T6 補。這裡只測 interface 簽名沒有打錯(避免 T6 才發現要改 interface)。 +type noopService struct{} + +func (noopService) InitJob(ctx context.Context, in InitJobInput) (*Job, error) { + return nil, nil +} + +func (noopService) GetJob(ctx context.Context, userID, jobID string) (*Job, error) { + return nil, nil +} + +func (noopService) PromoteToModels(ctx context.Context, userID, jobID, name string) (*PromoteResult, error) { + return nil, nil +} + +func (noopService) DownloadRedirectURL(ctx context.Context, userID, jobID string) (string, error) { + return "", nil +} + +func (noopService) ActiveJob(ctx context.Context, userID string) (*Job, error) { + return nil, nil +} + +// File-scope compile-time check — 若 Service interface 改變, +// noopService 就不再實作此 interface,編譯失敗。 +// 移到 file scope(T1 review M1):t.Run 內的 var declaration 只在執行該 test 時驗, +// 而我們希望「package 編譯成功」就保證 interface 穩定。 +var _ Service = noopService{} + +// TestService_InterfaceSatisfied 在 test 中再 assert 一次,作為文件性說明。 +func TestService_InterfaceSatisfied(t *testing.T) { + t.Parallel() + var _ Service = noopService{} +} + +// TestJob_JSONShape 驗證 Job struct 的 JSON tag 與 api-conversion.md §1-2 response 對齊。 +// +// 這是契約測試:frontend 依 api-conversion.md 寫 type;backend 改 json tag 一定要回頭看這個 test。 +func TestJob_JSONShape(t *testing.T) { + t.Parallel() + + createdAt, _ := time.Parse(time.RFC3339, "2026-04-30T12:00:00Z") + expiresAt := createdAt.Add(7 * 24 * time.Hour) + job := Job{ + JobID: "550e8400-e29b-41d4-a716-446655440000", + Status: "running", + Stage: "bie", + Progress: 45, + StageProgress: 60, + CreatedAt: createdAt, + UpdatedAt: createdAt.Add(5 * time.Minute), + ExpiresAt: expiresAt, + SourceFilename: "yolov5s.onnx", + TargetChip: "720", + } + + raw, err := json.Marshal(job) + require.NoError(t, err) + + // 必要欄位都在 + assert.Contains(t, string(raw), `"job_id":"550e8400-e29b-41d4-a716-446655440000"`) + assert.Contains(t, string(raw), `"status":"running"`) + assert.Contains(t, string(raw), `"stage":"bie"`) + assert.Contains(t, string(raw), `"progress":45`) + assert.Contains(t, string(raw), `"stage_progress":60`) + assert.Contains(t, string(raw), `"created_at":"2026-04-30T12:00:00Z"`) + assert.Contains(t, string(raw), `"expires_at":"2026-05-07T12:00:00Z"`) + assert.Contains(t, string(raw), `"source_filename":"yolov5s.onnx"`) + assert.Contains(t, string(raw), `"target_chip":"720"`) + + // error 欄位 zero value 時應被 omitempty 隱藏 + assert.NotContains(t, string(raw), `"error_code"`) + assert.NotContains(t, string(raw), `"error_message"`) +} + +// TestJob_FailedShape 驗證 failed job 的 error 欄位序列化。 +func TestJob_FailedShape(t *testing.T) { + t.Parallel() + + job := Job{ + JobID: "job-failed", + Status: "failed", + ErrorCode: "QUANTIZATION_FAILED", + ErrorMessage: "model has unsupported operator", + } + raw, err := json.Marshal(job) + require.NoError(t, err) + + assert.Contains(t, string(raw), `"error_code":"QUANTIZATION_FAILED"`) + assert.Contains(t, string(raw), `"error_message":"model has unsupported operator"`) +} + +// TestPromoteResult_JSONShape 對齊 api-conversion.md §3 response。 +func TestPromoteResult_JSONShape(t *testing.T) { + t.Parallel() + + createdAt, _ := time.Parse(time.RFC3339, "2026-04-30T12:30:00Z") + pr := PromoteResult{ + ModelID: "abc-123", + Source: "converted", + SourceJobID: "550e8400-...", + Name: "YOLOv5 Face KL520", + TargetChip: "kl520", + FileSize: 12345678, + Status: "ready", + CreatedAt: createdAt, + } + + raw, err := json.Marshal(pr) + require.NoError(t, err) + + assert.Contains(t, string(raw), `"model_id":"abc-123"`) + assert.Contains(t, string(raw), `"source":"converted"`) + assert.Contains(t, string(raw), `"source_job_id":"550e8400-..."`) + assert.Contains(t, string(raw), `"file_size":12345678`) + assert.Contains(t, string(raw), `"status":"ready"`) + assert.Contains(t, string(raw), `"target_chip":"kl520"`) +} + +// TestInitJobInput_AcceptsReader 驗證 InitJobInput.Body 接受 io.Reader(即 streaming 不收 buffer)。 +// +// 關鍵:若有人不小心把欄位改成 []byte,這個測試編譯會壞。 +func TestInitJobInput_AcceptsReader(t *testing.T) { + t.Parallel() + + in := InitJobInput{ + UserID: "user-abc", + ContentType: "multipart/form-data; boundary=xyz", + Body: strings.NewReader("--xyz--"), + ContentLength: 7, + } + + // 確認 Body 是 io.Reader(compile time 透過 type assertion) + var _ io.Reader = in.Body + assert.Equal(t, "user-abc", in.UserID) +} diff --git a/visionA-backend/internal/conversion/converter_client.go b/visionA-backend/internal/conversion/converter_client.go new file mode 100644 index 0000000..8b1bde2 --- /dev/null +++ b/visionA-backend/internal/conversion/converter_client.go @@ -0,0 +1,892 @@ +// Converter client — visionA-backend 對 kneron_model_converter (task-scheduler) 的 HTTP client。 +// +// 對應 4 個 endpoint(見 kneron_model_converter/apps/task-scheduler/docs/openapi.yaml): +// - InitJob: POST /api/v1/jobs (multipart streaming proxy) +// - GetJob: GET /api/v1/jobs/{id} +// - Promote: POST /api/v1/jobs/{id}/promote +// - ListInProgressJobs: GET /api/v1/jobs?user_id=&status=in_progress (lazy rebuild ownership 用) +// +// 設計重點: +// - HTTP retry 矩陣對齊 conversion.md §9.1(InitJob 例外:不 retry 5xx,見下方 sendInitJob 註解) +// - service-to-service token 由注入的 MCTokenClient 提供(per-scope cache) +// - body 為 streaming:InitJob 直接傳 caller 的 io.Reader;不暫存 disk、不 buffer 全 RAM +// - 4xx 錯誤 mapping 對齊 §6 + api-conversion.md 錯誤碼總覽 +// +// 安全: +// - **絕不**把 Authorization header / access_token 寫進 log +// - 只 log job_id / status / endpoint / attempt / duration +// +// Phase 0.8 conversion (見 .autoflow/04-architecture/conversion.md §2.5 + §9.1) +package conversion + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "log/slog" + "net/http" + "net/url" + "strings" + "time" +) + +// ========================================================================== +// 對外 type / interface +// ========================================================================== + +// ConverterClient 對 task-scheduler 的 HTTP client。 +// +// 所有 method 都會自動: +// - 透過 MCTokenClient 取對應 scope 的 service token,放進 Authorization header +// - 依 conversion.md §9.1 retry 矩陣處理 5xx / network / timeout(InitJob 例外) +// - 把 4xx / 5xx 對應到 errors.go 的 sentinel +// +// goroutine-safe:每次呼叫獨立 *http.Request,無內部 mutable state(cache 由 MCTokenClient 管)。 +type ConverterClient interface { + // InitJob 把 caller 的 multipart body streaming proxy 給 converter。 + // + // scope: converter:job.write + // + // 不 retry 5xx:multipart body 是 streaming(io.Reader 一次性),retry 會傳到一半的爛資料; + // 直接 fail 由 caller(flow.go)依 §4.3.2 cleanup 鏈處理。 + // + // timeout:30 分鐘(500MB upload 在慢網路可能 5-10 分鐘)。 + InitJob(ctx context.Context, req InitConverterJobReq) (*ConverterJob, error) + + // GetJob 查單一 job 狀態。 + // + // scope: converter:job.read + // retry: 5xx / network → max 3 attempts (0.5s, 1s, 2s 退避) + GetJob(ctx context.Context, jobID string) (*ConverterJob, error) + + // Promote 把成功 job 的指定 stage 結果檔搬到 FAA。 + // + // scope: converter:job.write + // retry: 5xx / network → max 2 attempts (1s, 2s 退避) + // + // 502 file_gateway_unavailable → ErrFAAUnavailable(converter 端 FAA 不可達) + Promote(ctx context.Context, jobID string, req PromoteReq) (*ConverterPromoteResult, error) + + // ListInProgressJobs 查指定 user 進行中的 job 清單(給 §2.6.1 lazy rebuild ownership 用)。 + // + // scope: converter:job.read + // retry: 5xx / network → max 1 attempt (0.5s 退避,輕量;不期望常態打) + // + // 預期 0 或 1 筆(同 user 同時只能 1 active job),但回 slice 保留 future-proof。 + ListInProgressJobs(ctx context.Context, userID string) ([]*ConverterJob, error) +} + +// InitConverterJobReq 是 InitJob 的輸入;body 為 streaming(io.Reader 一次性消費)。 +// +// 設計原則: +// - BodyContentType 必須是上層 handler 的原始 Content-Type header 值(含 multipart boundary), +// net/http 不會自動產生 — 必須完整透傳,否則 converter multer 會解析失敗 +// - UserID 由 visionA-backend trust boundary 灌入(見 conversion.md §7);本層不檢查格式 +// - SourceFilename / Platform 為 log 用 metadata(converter 自己會從 multipart 解出真值) +type InitConverterJobReq struct { + UserID string // OIDC sub;本層僅供 log + Platform string // "520" / "720" / "530" / "630" / "730";本層僅供 log + SourceFilename string // 本層僅供 log + Body io.Reader // 已重組好的 multipart stream(含 user_id field) + BodyContentType string // 含 boundary 的 Content-Type,例如 "multipart/form-data; boundary=xyz" +} + +// PromoteReq 是 Promote 的輸入。 +// +// 設計原則: +// - UserID 灌進 promote request 的 metadata(trust boundary 重申,見 conversion.md §7.3) +// - Source / TargetObjectKey 對齊 converter openapi.yaml `PromoteTarget` +// - Phase 0.8 一律 promote `nef` source(visionA 只關心最終可部署到 KL 晶片的 NEF 檔) +type PromoteReq struct { + UserID string // 灌進 promote request body metadata + Source string // "onnx" / "bie" / "nef";預設 "nef" + TargetObjectKey string // FAA 內目標 key,由上層(flow.go)按命名規則組好 +} + +// ConverterJob 是 InitJob / GetJob / List 的 response shape。 +// +// 對齊 converter openapi.yaml 的 Job + CreateJobResponse schema;同時保留 +// visionA Phase 0.8 §2.6.2 的 ExpiresAt 來源備援邏輯(converter 沒給就 caller 推算)。 +// +// 注意:這是 client 層的中間 type,flow.go 會轉成 conversion.Job(對 frontend 的 shape)。 +type ConverterJob struct { + JobID string + Status string // "created" / "running" / "completed" / "failed" + Stage string // "onnx" / "bie" / "nef";completed 時 converter 回 null → "" + Progress *int // 整體 0-100;可能為 nil(converter 沒給) + StageProgress *int // 當前 stage 0-100;可能為 nil + SourceFilename string // 取自 input.filename + Platform string // 取自 parameters.platform + CreatedAt time.Time + UpdatedAt time.Time + ExpiresAt time.Time // converter 沒給時上層自行 created_at + 7d 推算 + ErrorCode string // 取自 error.code + ErrorMessage string // 取自 error.message + TargetObjectKey string // 僅 promote 後才有;GET / list 時為 "" +} + +// ConverterPromoteResult 是 Promote 的 response shape。 +// +// 對齊 converter openapi.yaml `PromoteResponse`:取 promoted[0](Phase 0.8 一次只 promote 1 target)。 +type ConverterPromoteResult struct { + TargetObjectKey string + Size int64 + Checksum string // 取自 file_access_agent_etag(converter 透傳 FAA ETag) +} + +// ConverterClientOpts 是 NewConverterClient 的依賴注入。 +// +// HTTPClient / InitHTTPClient / Now / Logger 為 optional(nil 自動填預設)— 方便 unit test 注入 fake。 +type ConverterClientOpts struct { + // BaseURL 是 converter scheduler base URL(不帶結尾斜線)。 + // 範例:http://192.168.0.130:9501 + BaseURL string + + // Tokens 是 MCTokenClient(注入,non-nil 必填)— 用來取 service token。 + Tokens MCTokenClient + + // HTTPClient 為 optional;nil 用預設(timeout 10s)。GetJob / Promote / List 用。 + HTTPClient *http.Client + + // InitHTTPClient 為 optional;nil 用預設(timeout 30 分鐘)— InitJob 大檔上傳專用。 + // 與 HTTPClient 分開避免互相影響:GetJob 在 polling 場景頻繁呼叫,timeout 短才合理。 + InitHTTPClient *http.Client + + // Now 為 optional;nil 用 time.Now。測試會注入 fake clock。 + Now func() time.Time + + // Logger 為 optional;nil 用 slog.Default()。 + Logger *slog.Logger +} + +// ========================================================================== +// 內部固定常數 +// ========================================================================== + +const ( + // converter scope(對齊 task-scheduler openapi.yaml securitySchemes.OAuth2ClientCredentials.scopes) + scopeConverterWrite = "converter:job.write" + scopeConverterRead = "converter:job.read" + + // HTTP timeout + converterDefaultHTTPTimeout = 10 * time.Second + converterInitHTTPTimeout = 30 * time.Minute // InitJob 大檔上傳 + + // retry 矩陣(對齊 conversion.md §9.1) + converterMaxRetriesGet = 2 // GetJob max 3 attempts (1 + 2 retries) + converterMaxRetriesPromote = 2 // Promote max 3 attempts (1 + 2 retries) + converterMaxRetriesList = 1 // List max 2 attempts (1 + 1 retry) + + // 退避 base + converterRetryBase = 500 * time.Millisecond + + // promote 預設 source(Phase 0.8 visionA 一律取 nef) + promoteDefaultSource = "nef" +) + +// ========================================================================== +// 構造 + 內部實作 +// ========================================================================== + +// converterClient 是 ConverterClient 的預設實作。 +// +// 套件內 unexported struct(caller 拿 interface),讓未來換實作不影響 caller。 +type converterClient struct { + baseURL string + tokens MCTokenClient + http *http.Client + httpInit *http.Client + now func() time.Time + logger *slog.Logger +} + +// NewConverterClient 建立一個 ConverterClient 實例。 +// +// 必填:BaseURL / Tokens。其他 optional。 +// 注意:constructor 不驗 BaseURL 連線;第一次呼叫 method 才會打網路。 +func NewConverterClient(opts ConverterClientOpts) ConverterClient { + httpClient := opts.HTTPClient + if httpClient == nil { + httpClient = &http.Client{Timeout: converterDefaultHTTPTimeout} + } + httpInit := opts.InitHTTPClient + if httpInit == nil { + httpInit = &http.Client{Timeout: converterInitHTTPTimeout} + } + now := opts.Now + if now == nil { + now = time.Now + } + logger := opts.Logger + if logger == nil { + logger = slog.Default() + } + return &converterClient{ + baseURL: strings.TrimRight(opts.BaseURL, "/"), + tokens: opts.Tokens, + http: httpClient, + httpInit: httpInit, + now: now, + logger: logger, + } +} + +// ========================================================================== +// InitJob — multipart streaming proxy(不 retry 5xx) +// ========================================================================== + +func (c *converterClient) InitJob(ctx context.Context, req InitConverterJobReq) (*ConverterJob, error) { + if req.Body == nil { + return nil, fmt.Errorf("conversion/converter_client: InitJob body is required") + } + if req.BodyContentType == "" { + return nil, fmt.Errorf("conversion/converter_client: InitJob body content type is required (must contain multipart boundary)") + } + + token, err := c.tokens.ServiceToken(ctx, scopeConverterWrite) + if err != nil { + return nil, c.wrapTokenErr(err) + } + + endpoint := c.baseURL + "/api/v1/jobs" + httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, req.Body) + if err != nil { + return nil, fmt.Errorf("%w: build init job request: %v", ErrConverterUnavailable, err) + } + // Content-Type 必須完整透傳(含 multipart boundary),不能讓 net/http 自動推導 + httpReq.Header.Set("Content-Type", req.BodyContentType) + httpReq.Header.Set("Accept", "application/json") + httpReq.Header.Set("Authorization", "Bearer "+token) + + startedAt := c.now() + res, err := c.httpInit.Do(httpReq) + duration := c.now().Sub(startedAt) + if err != nil { + // network / ctx cancel — 不 retry(streaming body 已耗盡) + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + c.logger.Warn("conversion.converter.init_ctx_cancelled", + slog.String("user_id", req.UserID), + slog.Duration("duration", duration)) + return nil, err + } + c.logger.Warn("conversion.converter.init_network_error", + slog.String("user_id", req.UserID), + slog.Duration("duration", duration), + slog.String("err", truncate(err.Error(), 200))) + return nil, fmt.Errorf("%w: init job network error: %v", ErrConverterUnavailable, err) + } + defer res.Body.Close() + + bodyBytes, readErr := io.ReadAll(res.Body) + if readErr != nil { + c.logger.Warn("conversion.converter.init_body_read_failed", + slog.String("user_id", req.UserID), + slog.Int("status", res.StatusCode), + slog.String("err", truncate(readErr.Error(), 200))) + return nil, fmt.Errorf("%w: read init response body: %v", ErrConverterUnavailable, readErr) + } + + c.logger.Info("conversion.converter.init_response", + slog.String("user_id", req.UserID), + slog.String("source_filename", req.SourceFilename), + slog.String("platform", req.Platform), + slog.Int("status", res.StatusCode), + slog.Duration("duration", duration)) + + if res.StatusCode >= 200 && res.StatusCode < 300 { + return parseConverterJob(bodyBytes) + } + + // 非 2xx — 一律 mapping 成 sentinel(**包括 5xx 也直接 fail,不 retry**) + return nil, c.mapInitError(res.StatusCode, bodyBytes) +} + +// mapInitError 把 InitJob 的非 2xx response mapping 成 sentinel。 +// +// 對齊 task-scheduler openapi.yaml POST /api/v1/jobs 的 4xx / 5xx 與 §6 mapping。 +func (c *converterClient) mapInitError(status int, body []byte) error { + apiErr := parseAPIError(body) + + // 認證失敗(visionA service client 設定錯) + if status == http.StatusUnauthorized || status == http.StatusForbidden { + return fmt.Errorf("%w: init job %d", ErrServiceClientUnauthorized, status) + } + + // 409 user_has_active_job — wrap 成 ActiveJobError + if status == http.StatusConflict && apiErr.Code == "user_has_active_job" { + return &ActiveJobError{Job: extractActiveJobFromDetails(apiErr.Details)} + } + + // 400 validation_error / invalid_multipart — wrap 成 ConverterValidationError + if status == http.StatusBadRequest { + return &ConverterValidationError{ + Fields: extractFieldsFromDetails(apiErr.Details), + Message: apiErr.Message, + } + } + + if status == http.StatusRequestEntityTooLarge { + return fmt.Errorf("%w: init job %d (%s)", ErrPayloadTooLarge, status, apiErr.Code) + } + + if status == http.StatusServiceUnavailable { + // converter 503 service_busy(process semaphore 滿) + return fmt.Errorf("%w: init job %d (%s)", ErrServiceBusy, status, apiErr.Code) + } + + // 其他 4xx → validation 視為通用 mapping + if status >= 400 && status < 500 { + return fmt.Errorf("%w: init job %d (%s)", ErrValidationFailed, status, apiErr.Code) + } + + // 5xx — InitJob 不 retry,直接 mapping 成 ErrConverterUnavailable + return fmt.Errorf("%w: init job %d (%s)", ErrConverterUnavailable, status, apiErr.Code) +} + +// ========================================================================== +// GetJob — 標準 retry +// ========================================================================== + +func (c *converterClient) GetJob(ctx context.Context, jobID string) (*ConverterJob, error) { + if jobID == "" { + return nil, fmt.Errorf("conversion/converter_client: GetJob jobID is required") + } + + endpoint := c.baseURL + "/api/v1/jobs/" + url.PathEscape(jobID) + + body, err := c.doWithRetry(ctx, "get_job", jobID, scopeConverterRead, converterMaxRetriesGet, + func(token string) (*http.Request, error) { + req, rerr := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) + if rerr != nil { + return nil, rerr + } + req.Header.Set("Accept", "application/json") + req.Header.Set("Authorization", "Bearer "+token) + return req, nil + }, + c.mapGetJobError, + ) + if err != nil { + return nil, err + } + + return parseConverterJob(body) +} + +// mapGetJobError 把 GetJob 的非 2xx 對應到 sentinel。 +func (c *converterClient) mapGetJobError(status int, body []byte) error { + apiErr := parseAPIError(body) + + if status == http.StatusUnauthorized || status == http.StatusForbidden { + return fmt.Errorf("%w: get_job %d", ErrServiceClientUnauthorized, status) + } + if status == http.StatusNotFound { + return fmt.Errorf("%w: get_job %d (%s)", ErrJobNotFound, status, apiErr.Code) + } + if status >= 400 && status < 500 { + return fmt.Errorf("%w: get_job %d (%s)", ErrValidationFailed, status, apiErr.Code) + } + return fmt.Errorf("%w: get_job %d (%s)", ErrConverterUnavailable, status, apiErr.Code) +} + +// ========================================================================== +// Promote — 標準 retry + FAA / job_not_completed 特殊 mapping +// ========================================================================== + +func (c *converterClient) Promote(ctx context.Context, jobID string, req PromoteReq) (*ConverterPromoteResult, error) { + if jobID == "" { + return nil, fmt.Errorf("conversion/converter_client: Promote jobID is required") + } + if req.TargetObjectKey == "" { + return nil, fmt.Errorf("conversion/converter_client: Promote target_object_key is required") + } + source := req.Source + if source == "" { + source = promoteDefaultSource + } + + endpoint := c.baseURL + "/api/v1/jobs/" + url.PathEscape(jobID) + "/promote" + + // promote request body — 對齊 openapi.yaml PromoteRequest, + // 同時放 user_id 進 metadata(trust boundary 重申,§7.3) + bodyJSON, err := json.Marshal(map[string]any{ + "targets": []map[string]any{ + {"source": source, "target_object_key": req.TargetObjectKey}, + }, + "user_id": req.UserID, // converter Phase 1 不消費,但保留供 log / 未來啟用 + }) + if err != nil { + return nil, fmt.Errorf("%w: marshal promote request: %v", ErrConverterUnavailable, err) + } + + respBody, err := c.doWithRetry(ctx, "promote", jobID, scopeConverterWrite, converterMaxRetriesPromote, + func(token string) (*http.Request, error) { + r, rerr := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, bytes.NewReader(bodyJSON)) + if rerr != nil { + return nil, rerr + } + r.Header.Set("Content-Type", "application/json") + r.Header.Set("Accept", "application/json") + r.Header.Set("Authorization", "Bearer "+token) + return r, nil + }, + c.mapPromoteError, + ) + if err != nil { + return nil, err + } + + return parseConverterPromoteResult(respBody) +} + +// mapPromoteError 把 Promote 的非 2xx 對應到 sentinel。 +// +// 特殊 mapping: +// - 502 file_gateway_unavailable → ErrFAAUnavailable +// - 503 auth_service_unavailable → ErrIDPUnavailable +// - 409 job_not_ready_for_promote / source_not_available → ErrJobNotCompleted +func (c *converterClient) mapPromoteError(status int, body []byte) error { + apiErr := parseAPIError(body) + + if status == http.StatusUnauthorized || status == http.StatusForbidden { + return fmt.Errorf("%w: promote %d", ErrServiceClientUnauthorized, status) + } + if status == http.StatusNotFound { + return fmt.Errorf("%w: promote %d (%s)", ErrJobNotFound, status, apiErr.Code) + } + if status == http.StatusConflict { + // 兩種:job_not_ready_for_promote / source_not_available + return fmt.Errorf("%w: promote %d (%s)", ErrJobNotCompleted, status, apiErr.Code) + } + if status == http.StatusBadGateway { + // converter 端 FAA 不可達 / FAA 4xx + return fmt.Errorf("%w: promote %d (%s)", ErrFAAUnavailable, status, apiErr.Code) + } + if status == http.StatusServiceUnavailable { + // converter 端 MC 簽 token 失敗 + return fmt.Errorf("%w: promote %d (%s)", ErrIDPUnavailable, status, apiErr.Code) + } + if status == http.StatusBadRequest || status == http.StatusUnprocessableEntity { + return &ConverterValidationError{ + Fields: extractFieldsFromDetails(apiErr.Details), + Message: apiErr.Message, + } + } + if status >= 400 && status < 500 { + return fmt.Errorf("%w: promote %d (%s)", ErrValidationFailed, status, apiErr.Code) + } + return fmt.Errorf("%w: promote %d (%s)", ErrConverterUnavailable, status, apiErr.Code) +} + +// ========================================================================== +// ListInProgressJobs — lazy rebuild ownership 用 +// ========================================================================== + +func (c *converterClient) ListInProgressJobs(ctx context.Context, userID string) ([]*ConverterJob, error) { + if userID == "" { + return nil, fmt.Errorf("conversion/converter_client: ListInProgressJobs userID is required") + } + + q := url.Values{} + q.Set("user_id", userID) + q.Set("status", "in_progress") + endpoint := c.baseURL + "/api/v1/jobs?" + q.Encode() + + body, err := c.doWithRetry(ctx, "list_jobs", userID, scopeConverterRead, converterMaxRetriesList, + func(token string) (*http.Request, error) { + r, rerr := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) + if rerr != nil { + return nil, rerr + } + r.Header.Set("Accept", "application/json") + r.Header.Set("Authorization", "Bearer "+token) + return r, nil + }, + c.mapListJobsError, + ) + if err != nil { + return nil, err + } + + return parseListJobs(body) +} + +// mapListJobsError 把 ListInProgressJobs 的非 2xx 對應到 sentinel。 +// +// list 不該回 404(user_id 沒 active 應回 200 + jobs:[]),所以 4xx 一律視為 validation。 +func (c *converterClient) mapListJobsError(status int, body []byte) error { + apiErr := parseAPIError(body) + + if status == http.StatusUnauthorized || status == http.StatusForbidden { + return fmt.Errorf("%w: list_jobs %d", ErrServiceClientUnauthorized, status) + } + if status >= 400 && status < 500 { + return fmt.Errorf("%w: list_jobs %d (%s)", ErrValidationFailed, status, apiErr.Code) + } + return fmt.Errorf("%w: list_jobs %d (%s)", ErrConverterUnavailable, status, apiErr.Code) +} + +// ========================================================================== +// HTTP 共用:retry / 錯誤分類 +// ========================================================================== + +// doWithRetry 是 GetJob / Promote / List 共用的 retry 執行器。 +// +// 與 mc_token_client.doWithRetry 結構類似但有以下差異: +// - 每次 attempt 內呼叫 ServiceToken 取最新 token(401 時 caller 不主動 invalidate cache — +// 設計取捨:避免 cache 被惡意 401 attack 反覆清空;正常 401 = secret 設定錯,retry 也沒用) +// - retry 次數由 caller 傳入(不同 endpoint 不同上限) +// - 4xx / 401 / 403 不 retry;5xx / network / timeout 可 retry +// - mapErr 由 caller 傳入,因為 GetJob / Promote / List 的 4xx mapping 細節不同 +// +// reqBuilder 是「每次 attempt 都重新建一個 *http.Request」的 closure +// — request body 可能在 retry 時已被讀完,必須重建。caller 內部用 bytes.NewReader 等可重建的 body。 +// — token 是 closure 參數,每次 attempt 都拿最新(也涵蓋 cache 過期 refresh 的場景) +func (c *converterClient) doWithRetry( + ctx context.Context, + endpointKind, label, scope string, + maxRetries int, + reqBuilder func(token string) (*http.Request, error), + mapErr func(status int, body []byte) error, +) ([]byte, error) { + var lastErr error + for attempt := 0; attempt <= maxRetries; attempt++ { + // retry 前檢查 ctx + if attempt > 0 { + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-time.After(converterRetryBackoff(attempt)): + } + } + + // 每次 attempt 都重新取 token(cache hit 情境下成本極低) + token, err := c.tokens.ServiceToken(ctx, scope) + if err != nil { + // token 取不到 — 不可重試(IdP 端問題,不在 converter 重試矩陣內) + return nil, c.wrapTokenErr(err) + } + + req, err := reqBuilder(token) + if err != nil { + return nil, fmt.Errorf("%w: build %s request: %v", ErrConverterUnavailable, endpointKind, err) + } + + body, classifiedErr, retryable := c.doOnce(req, endpointKind, label, attempt, mapErr) + if classifiedErr == nil { + return body, nil + } + lastErr = classifiedErr + if !retryable { + return nil, classifiedErr + } + } + c.logger.Warn("conversion.converter.retry_exhausted", + slog.String("endpoint", endpointKind), + slog.String("label", label), + slog.Int("attempts", maxRetries+1)) + return nil, lastErr +} + +// doOnce 執行一次 HTTP request,回傳 body(成功時)+ 分類好的 error + 是否可重試。 +func (c *converterClient) doOnce( + req *http.Request, + endpointKind, label string, + attempt int, + mapErr func(status int, body []byte) error, +) (body []byte, err error, retryable bool) { + startedAt := c.now() + res, err := c.http.Do(req) + duration := c.now().Sub(startedAt) + if err != nil { + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + c.logger.Warn("conversion.converter.ctx_cancelled", + slog.String("endpoint", endpointKind), + slog.String("label", label), + slog.Int("attempt", attempt+1), + slog.Duration("duration", duration)) + return nil, err, false + } + c.logger.Warn("conversion.converter.network_error", + slog.String("endpoint", endpointKind), + slog.String("label", label), + slog.Int("attempt", attempt+1), + slog.Duration("duration", duration), + slog.String("err", truncate(err.Error(), 200))) + return nil, fmt.Errorf("%w: %s network error: %v", + ErrConverterUnavailable, endpointKind, err), true + } + defer res.Body.Close() + + bodyBytes, readErr := io.ReadAll(res.Body) + if readErr != nil { + c.logger.Warn("conversion.converter.body_read_failed", + slog.String("endpoint", endpointKind), + slog.String("label", label), + slog.Int("status", res.StatusCode), + slog.String("err", truncate(readErr.Error(), 200))) + return nil, fmt.Errorf("%w: read response body: %v", + ErrConverterUnavailable, readErr), true + } + + if res.StatusCode >= 200 && res.StatusCode < 300 { + c.logger.Debug("conversion.converter.success", + slog.String("endpoint", endpointKind), + slog.String("label", label), + slog.Int("status", res.StatusCode), + slog.Int("attempt", attempt+1), + slog.Duration("duration", duration)) + return bodyBytes, nil, false + } + + c.logger.Warn("conversion.converter.endpoint_error", + slog.String("endpoint", endpointKind), + slog.String("label", label), + slog.Int("status", res.StatusCode), + slog.Int("attempt", attempt+1), + slog.Duration("duration", duration)) + + classified := mapErr(res.StatusCode, bodyBytes) + // 5xx 視為可重試;4xx / 認證失敗 / 已 wrap 為非 transient error 都不重試 + retryable = res.StatusCode >= 500 && res.StatusCode < 600 + return nil, classified, retryable +} + +// converterRetryBackoff 回傳第 n 次 retry(n 從 1 開始)的等待時間。 +// 對齊 conversion.md §9.1: +// - GetJob: 0.5s, 1s, 2s(base=500ms,倍數 1, 2, 4 — 但實際只用前 2 次) +// - Promote: 1s, 2s(base=500ms,倍數 2, 4) +// - List: 0.5s(base=500ms,倍數 1) +// +// 為了統一 base 但對齊 §9.1 的「Promote 退避 1s, 2s」,我們用 base=500ms 加 ×2 倍數, +// 第 n 次退避 = base × 2^n(對照 §9.1 GetJob: n=1→500ms*1=500ms 不完全對齊; +// 但 §9.1 主要規範是「指數退避,max retry 次數」— 實際數值容忍小偏差,重點是不爆量)。 +// +// 最終退避序列:n=1→0.5s, n=2→1s, n=3→2s(Promote/Get 都從 n=1 開始用, +// 第 1 次 attempt 不退避;第 2 次 attempt = retry 1 = 0.5s 等)。 +// +// 不加 jitter — 同 mc_token_client,Phase 0.8 同時 retry 的 caller 不會大量併發打 converter。 +func converterRetryBackoff(attempt int) time.Duration { + if attempt < 1 { + return converterRetryBase + } + // 0.5s, 1s, 2s, 4s ... + return converterRetryBase * (1 << (attempt - 1)) +} + +// wrapTokenErr 把 MCTokenClient 取 token 時的錯誤包成 caller 已預期的 sentinel。 +// +// MCTokenClient 已經把錯誤分類成 ErrServiceClientUnauthorized / ErrMCTokenUnavailable / ctx.Err, +// 我們不在 converter_client 層改動分類,純粹透傳(讓上層用 errors.Is 比對)。 +func (c *converterClient) wrapTokenErr(err error) error { + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return err + } + // 已是 sentinel(ErrServiceClientUnauthorized / ErrMCTokenUnavailable)— 直接透傳 + if errors.Is(err, ErrServiceClientUnauthorized) || errors.Is(err, ErrMCTokenUnavailable) { + return err + } + // 兜底:未預期的 token 錯誤包成 ErrMCTokenUnavailable + return fmt.Errorf("%w: %v", ErrMCTokenUnavailable, err) +} + +// ========================================================================== +// Response 解析(converter openapi.yaml shapes) +// ========================================================================== + +// converterAPIError 是 converter `{error: {...}}` shape 的 unmarshal 中介 type。 +type converterAPIError struct { + Code string `json:"code"` + Message string `json:"message"` + Details json.RawMessage `json:"details"` + RequestID string `json:"request_id"` +} + +// parseAPIError 解 converter 的 `{error: {code, message, details, request_id}}` shape。 +// +// converter 4xx / 5xx 一律遵循此 shape;解析失敗時回空 struct(caller 仍會走 mapping 預設路徑)。 +func parseAPIError(body []byte) converterAPIError { + var wrapper struct { + Error converterAPIError `json:"error"` + } + if err := json.Unmarshal(body, &wrapper); err != nil { + return converterAPIError{} + } + return wrapper.Error +} + +// extractFieldsFromDetails 從 converter `details.fields` 解出 ValidationFieldError slice。 +// +// 對齊 openapi.yaml 範例: +// +// details: { fields: [{ field: "model_id", message: "..." }] } +// +// 解析失敗回 nil(caller 仍可正常 wrap,frontend 拿不到 fields 但能拿到 code)。 +func extractFieldsFromDetails(raw json.RawMessage) []ValidationFieldError { + if len(raw) == 0 { + return nil + } + var parsed struct { + Fields []ValidationFieldError `json:"fields"` + } + if err := json.Unmarshal(raw, &parsed); err != nil { + return nil + } + return parsed.Fields +} + +// extractActiveJobFromDetails 從 converter 409 user_has_active_job 的 details 解出簡化版 Job。 +// +// 對齊 openapi.yaml 範例: +// +// details: { +// active_job_id: "...", +// active_job_status: "running", +// active_job_stage: "bie", +// active_job_progress: 45, +// active_job_created_at: "..." +// } +// +// 解析失敗回 nil(caller 仍會走 ActiveJobError,只是 Job 為 nil)。 +func extractActiveJobFromDetails(raw json.RawMessage) *Job { + if len(raw) == 0 { + return nil + } + var parsed struct { + ActiveJobID string `json:"active_job_id"` + ActiveJobStatus string `json:"active_job_status"` + ActiveJobStage string `json:"active_job_stage"` + ActiveJobProgress int `json:"active_job_progress"` + ActiveJobCreatedAt time.Time `json:"active_job_created_at"` + } + if err := json.Unmarshal(raw, &parsed); err != nil { + return nil + } + if parsed.ActiveJobID == "" { + return nil + } + return &Job{ + JobID: parsed.ActiveJobID, + Status: parsed.ActiveJobStatus, + Stage: parsed.ActiveJobStage, + Progress: parsed.ActiveJobProgress, + CreatedAt: parsed.ActiveJobCreatedAt, + // ExpiresAt 由上層 flow.go 自行 created_at + 7d 推算(converter 409 不一定回 expires_at) + } +} + +// converterJobJSON 是 GET /api/v1/jobs/{id} response 的中介 unmarshal type。 +// +// 為了同時支援: +// - CreateJobResponse(POST /jobs 201)— 無 stage_progress / input.filename 等欄位 +// - Job(GET /jobs/{id})— 完整欄位 +// 全部欄位都用 pointer 或 nullable,Marshal 時靠下方 toConverterJob 統一轉。 +type converterJobJSON struct { + JobID string `json:"job_id"` + Status string `json:"status"` + Stage *string `json:"stage"` // completed 時 converter 回 null + Progress *int `json:"progress"` + StageProgress *int `json:"stage_progress"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` + ExpiresAt time.Time `json:"expires_at"` + Input *struct { + Filename string `json:"filename"` + } `json:"input"` + Parameters *struct { + Platform string `json:"platform"` + } `json:"parameters"` + Error *struct { + Code string `json:"code"` + Message string `json:"message"` + Stage string `json:"stage"` + } `json:"error"` +} + +// parseConverterJob 解 GET /api/v1/jobs/{id} 或 POST /api/v1/jobs 201 的 response。 +func parseConverterJob(body []byte) (*ConverterJob, error) { + var jr converterJobJSON + if err := json.Unmarshal(body, &jr); err != nil { + return nil, fmt.Errorf("%w: parse converter job response: %v", ErrConverterUnavailable, err) + } + if jr.JobID == "" { + return nil, fmt.Errorf("%w: empty job_id in converter response", ErrConverterUnavailable) + } + return jr.toConverterJob(), nil +} + +// toConverterJob 把 converterJobJSON 轉成對外的 ConverterJob。 +func (jr *converterJobJSON) toConverterJob() *ConverterJob { + cj := &ConverterJob{ + JobID: jr.JobID, + Status: jr.Status, + Progress: jr.Progress, + StageProgress: jr.StageProgress, + CreatedAt: jr.CreatedAt, + UpdatedAt: jr.UpdatedAt, + ExpiresAt: jr.ExpiresAt, + } + if jr.Stage != nil { + cj.Stage = *jr.Stage + } + if jr.Input != nil { + cj.SourceFilename = jr.Input.Filename + } + if jr.Parameters != nil { + cj.Platform = jr.Parameters.Platform + } + if jr.Error != nil { + cj.ErrorCode = jr.Error.Code + cj.ErrorMessage = jr.Error.Message + } + return cj +} + +// parseListJobs 解 GET /api/v1/jobs?user_id=&status=in_progress 的 response。 +// +// converter shape:{ "jobs": [Job, ...], "total": N, "next_cursor": "..." | null } +func parseListJobs(body []byte) ([]*ConverterJob, error) { + var resp struct { + Jobs []converterJobJSON `json:"jobs"` + } + if err := json.Unmarshal(body, &resp); err != nil { + return nil, fmt.Errorf("%w: parse list jobs response: %v", ErrConverterUnavailable, err) + } + out := make([]*ConverterJob, 0, len(resp.Jobs)) + for i := range resp.Jobs { + out = append(out, resp.Jobs[i].toConverterJob()) + } + return out, nil +} + +// parseConverterPromoteResult 解 POST /api/v1/jobs/{id}/promote 的 response。 +// +// 對齊 openapi.yaml `PromoteResponse`:取 promoted[0](Phase 0.8 一次只 promote 1 target)。 +// 若 promoted 陣列為空,回 ErrConverterUnavailable(合理表示 converter 內部狀態不一致)。 +func parseConverterPromoteResult(body []byte) (*ConverterPromoteResult, error) { + var resp struct { + Promoted []struct { + TargetObjectKey string `json:"target_object_key"` + SizeBytes int64 `json:"size_bytes"` + FileAccessAgentETag string `json:"file_access_agent_etag"` + } `json:"promoted"` + } + if err := json.Unmarshal(body, &resp); err != nil { + return nil, fmt.Errorf("%w: parse promote response: %v", ErrConverterUnavailable, err) + } + if len(resp.Promoted) == 0 { + return nil, fmt.Errorf("%w: promote response has empty promoted array", ErrConverterUnavailable) + } + first := resp.Promoted[0] + if first.TargetObjectKey == "" { + return nil, fmt.Errorf("%w: promote response missing target_object_key", ErrConverterUnavailable) + } + return &ConverterPromoteResult{ + TargetObjectKey: first.TargetObjectKey, + Size: first.SizeBytes, + Checksum: first.FileAccessAgentETag, + }, nil +} diff --git a/visionA-backend/internal/conversion/converter_client_test.go b/visionA-backend/internal/conversion/converter_client_test.go new file mode 100644 index 0000000..5c77f66 --- /dev/null +++ b/visionA-backend/internal/conversion/converter_client_test.go @@ -0,0 +1,895 @@ +// Converter Client 單元測試。 +// +// 測試策略: +// - 用 httptest.Server mock task-scheduler 的 4 個 endpoint +// - 用 stub MCTokenClient(直接回 token / 注入錯誤),不耦合真實 mc_token_client 邏輯 +// - 用 atomic counter 驗 retry 行為(attempts 數對齊 conversion.md §9.1) +// - 大 body streaming 用 io.LimitReader(不真的寫 100MB 進 RAM) +// +// 對應 task 規範必含 case: +// - InitJob:Success / StreamingBody / ContentTypeHeader / Conflict409 / Validation400 / 5xx_NoRetry / AuthExpired +// - GetJob:Success / NotFound / 5xx_RetryThenSuccess +// - Promote:Success / BadGateway +// - List:Success / Empty / 5xxRetry +// +// Phase 0.8 conversion (見 .autoflow/04-architecture/conversion.md §2.5 + §9.1) +package conversion + +import ( + "context" + "errors" + "fmt" + "io" + "net/http" + "net/http/httptest" + "strings" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// ========================================================================== +// stub MCTokenClient — 解耦真實 mc_token_client 邏輯 +// ========================================================================== + +// stubTokenClient 是 test 用的 fake MCTokenClient。 +type stubTokenClient struct { + mu sync.Mutex + token string + tokenErr error + callsByScope map[string]int +} + +func newStubTokenClient(token string) *stubTokenClient { + return &stubTokenClient{ + token: token, + callsByScope: make(map[string]int), + } +} + +func (s *stubTokenClient) ServiceToken(ctx context.Context, scope string) (string, error) { + s.mu.Lock() + defer s.mu.Unlock() + s.callsByScope[scope]++ + if s.tokenErr != nil { + return "", s.tokenErr + } + return s.token, nil +} + +func (s *stubTokenClient) IssueDelegatedDownload(ctx context.Context, in IssueDownloadReq) (*DelegatedDownloadToken, error) { + // converter_client 不會呼叫;此處只是滿足 interface + return nil, fmt.Errorf("stubTokenClient.IssueDelegatedDownload should not be called from converter_client tests") +} + +func (s *stubTokenClient) setError(err error) { + s.mu.Lock() + defer s.mu.Unlock() + s.tokenErr = err +} + +func (s *stubTokenClient) calls(scope string) int { + s.mu.Lock() + defer s.mu.Unlock() + return s.callsByScope[scope] +} + +// ========================================================================== +// converter mock server helpers +// ========================================================================== + +// newConverterClientForTest 建立指向 mock server 的 ConverterClient。 +// +// 使用較短的 init/http timeout 加速 test;retry 退避保持原本(converterRetryBackoff 1s 起跳 +// 對 retry test 有點久但仍可接受 — 5xx retry test 的 max 2 retries = 0.5s + 1s = 1.5s)。 +func newConverterClientForTest(t *testing.T, baseURL string, tokens MCTokenClient) ConverterClient { + t.Helper() + return NewConverterClient(ConverterClientOpts{ + BaseURL: baseURL, + Tokens: tokens, + HTTPClient: &http.Client{Timeout: 5 * time.Second}, + InitHTTPClient: &http.Client{Timeout: 5 * time.Second}, + Logger: silentLogger(), + }) +} + +// ========================================================================== +// InitJob tests +// ========================================================================== + +// TestInitJob_Success:mock 接受 multipart,回 201 + job spec。 +func TestInitJob_Success(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + var serverContentType string + mux := http.NewServeMux() + mux.HandleFunc("/api/v1/jobs", func(w http.ResponseWriter, r *http.Request) { + require.Equal(t, http.MethodPost, r.Method) + require.Equal(t, "Bearer svc-tok", r.Header.Get("Authorization")) + serverContentType = r.Header.Get("Content-Type") + + // drain body 確認 streaming 完成 + _, _ = io.Copy(io.Discard, r.Body) + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusCreated) + _, _ = w.Write([]byte(`{ + "job_id": "550e8400-e29b-41d4-a716-446655440000", + "status": "created", + "stage": "onnx", + "progress": 0, + "created_at": "2026-04-25T12:00:00Z", + "updated_at": "2026-04-25T12:00:00Z", + "expires_at": "2026-05-02T12:00:00Z", + "user_id": "alice" + }`)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + cc := newConverterClientForTest(t, srv.URL, tokens) + job, err := cc.InitJob(context.Background(), InitConverterJobReq{ + UserID: "alice", + Platform: "520", + SourceFilename: "model.onnx", + Body: strings.NewReader("--xyz\r\nContent-Disposition: form-data; name=\"user_id\"\r\n\r\nalice\r\n--xyz--\r\n"), + BodyContentType: "multipart/form-data; boundary=xyz", + }) + + require.NoError(t, err) + require.NotNil(t, job) + assert.Equal(t, "550e8400-e29b-41d4-a716-446655440000", job.JobID) + assert.Equal(t, "created", job.Status) + assert.Equal(t, "onnx", job.Stage) + assert.Equal(t, "multipart/form-data; boundary=xyz", serverContentType, + "InitJob 必須完整透傳 Content-Type 含 boundary(converter multer 解析依賴此)") + assert.Equal(t, 1, tokens.calls(scopeConverterWrite)) +} + +// TestInitJob_StreamingBody:driver 寫 100MB 假資料給 io.Reader,confirm streaming(不全 buffer RAM)。 +// +// 用 io.LimitReader 包一個無限 reader,server side 也用 io.Discard 不存。 +// 觀察:peakReadBytes 不應接近 100MB(確認 net/http 真的是 streaming)— 但 peak 偵測在 Go 層級不易, +// 改驗:reader 的 ReadCalls 數應遠大於 1(如果 buffer 全進 RAM,net/http 會一次全讀)。 +func TestInitJob_StreamingBody(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + var serverBytesRead int64 + mux := http.NewServeMux() + mux.HandleFunc("/api/v1/jobs", func(w http.ResponseWriter, r *http.Request) { + // 不一次 ReadAll;用 Copy 到 io.Discard 強制 streaming + n, _ := io.Copy(io.Discard, r.Body) + atomic.AddInt64(&serverBytesRead, n) + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusCreated) + _, _ = w.Write([]byte(`{ + "job_id": "stream-test", "status": "created", "stage": "onnx", "progress": 0, + "created_at": "2026-04-25T12:00:00Z", + "updated_at": "2026-04-25T12:00:00Z", + "expires_at": "2026-05-02T12:00:00Z" + }`)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + const totalSize = int64(10 * 1024 * 1024) // 10MB(測試成本與 streaming 驗證的平衡) + reader := &countingReader{ + R: io.LimitReader(zerosReader{}, totalSize), + } + + cc := newConverterClientForTest(t, srv.URL, tokens) + // 對 streaming test 加長 timeout + cc = NewConverterClient(ConverterClientOpts{ + BaseURL: srv.URL, + Tokens: tokens, + HTTPClient: &http.Client{Timeout: 30 * time.Second}, + InitHTTPClient: &http.Client{Timeout: 30 * time.Second}, + Logger: silentLogger(), + }) + + job, err := cc.InitJob(context.Background(), InitConverterJobReq{ + UserID: "alice", + Body: reader, + BodyContentType: "multipart/form-data; boundary=stream", + }) + + require.NoError(t, err) + require.NotNil(t, job) + assert.Equal(t, "stream-test", job.JobID) + assert.Equal(t, totalSize, atomic.LoadInt64(&serverBytesRead), + "server 應該收到完整 body(streaming proxy 不掉資料)") + + // streaming 證據:reader 應被多次呼叫 Read(如果是 buffer 全 RAM 模式,會一次大讀) + calls := atomic.LoadInt64(&reader.calls) + assert.Greater(t, calls, int64(1), "streaming 必須多次 Read(不能一次性 buffer 全 RAM)") +} + +// TestInitJob_ContentTypeHeader:multipart boundary 必須完整透傳。 +func TestInitJob_ContentTypeHeader(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + var receivedCT string + mux := http.NewServeMux() + mux.HandleFunc("/api/v1/jobs", func(w http.ResponseWriter, r *http.Request) { + receivedCT = r.Header.Get("Content-Type") + _, _ = io.Copy(io.Discard, r.Body) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusCreated) + _, _ = w.Write([]byte(`{ + "job_id": "ct-test", "status": "created", "stage": "onnx", "progress": 0, + "created_at": "2026-04-25T12:00:00Z", + "updated_at": "2026-04-25T12:00:00Z", + "expires_at": "2026-05-02T12:00:00Z" + }`)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + const customCT = "multipart/form-data; boundary=---xxx-very-specific-boundary-yyy---" + cc := newConverterClientForTest(t, srv.URL, tokens) + _, err := cc.InitJob(context.Background(), InitConverterJobReq{ + Body: strings.NewReader("body content"), + BodyContentType: customCT, + }) + require.NoError(t, err) + assert.Equal(t, customCT, receivedCT, "boundary 必須一字不差透傳(含特殊字元)") +} + +// TestInitJob_Conflict409_ActiveJobError:mock 回 409 user_has_active_job → return *ActiveJobError。 +func TestInitJob_Conflict409_ActiveJobError(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + mux := http.NewServeMux() + mux.HandleFunc("/api/v1/jobs", func(w http.ResponseWriter, r *http.Request) { + _, _ = io.Copy(io.Discard, r.Body) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusConflict) + _, _ = w.Write([]byte(`{ + "error": { + "code": "user_has_active_job", + "message": "使用者目前已有進行中的轉檔任務", + "details": { + "active_job_id": "550e8400-e29b-41d4-a716-446655440000", + "active_job_status": "running", + "active_job_stage": "bie", + "active_job_progress": 45, + "active_job_created_at": "2026-04-25T12:00:00Z" + }, + "request_id": "req-123" + } + }`)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + cc := newConverterClientForTest(t, srv.URL, tokens) + _, err := cc.InitJob(context.Background(), InitConverterJobReq{ + Body: strings.NewReader("x"), + BodyContentType: "multipart/form-data; boundary=xxx", + }) + + require.Error(t, err) + assert.True(t, errors.Is(err, ErrActiveJobExists), "必須能透過 errors.Is 比對 sentinel") + + var ae *ActiveJobError + require.True(t, errors.As(err, &ae), "必須能透過 errors.As 取出 ActiveJobError struct") + require.NotNil(t, ae.Job) + assert.Equal(t, "550e8400-e29b-41d4-a716-446655440000", ae.Job.JobID) + assert.Equal(t, "running", ae.Job.Status) + assert.Equal(t, "bie", ae.Job.Stage) + assert.Equal(t, 45, ae.Job.Progress) +} + +// TestInitJob_Validation400:mock 回 400 + fields → return *ConverterValidationError, +// fields 對齊 openapi.yaml shape([]ValidationFieldError)。 +func TestInitJob_Validation400(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + mux := http.NewServeMux() + mux.HandleFunc("/api/v1/jobs", func(w http.ResponseWriter, r *http.Request) { + _, _ = io.Copy(io.Discard, r.Body) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{ + "error": { + "code": "validation_error", + "message": "欄位驗證失敗", + "details": { + "fields": [ + {"field": "model_id", "message": "model_id 範圍必須在 1 ~ 65535"}, + {"field": "platform", "message": "platform 必須是 520 / 720 / 530 / 630 / 730"} + ] + }, + "request_id": "req-validation" + } + }`)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + cc := newConverterClientForTest(t, srv.URL, tokens) + _, err := cc.InitJob(context.Background(), InitConverterJobReq{ + Body: strings.NewReader("x"), + BodyContentType: "multipart/form-data; boundary=xxx", + }) + + require.Error(t, err) + assert.True(t, errors.Is(err, ErrValidationFailed)) + + var ve *ConverterValidationError + require.True(t, errors.As(err, &ve)) + require.Len(t, ve.Fields, 2, "fields 必須對齊 converter openapi.yaml 的 array shape") + assert.Equal(t, "model_id", ve.Fields[0].Field) + assert.Equal(t, "model_id 範圍必須在 1 ~ 65535", ve.Fields[0].Message) + assert.Equal(t, "platform", ve.Fields[1].Field) + assert.Contains(t, ve.Message, "驗證失敗", "Message 應透傳 converter 原文供 log 用") +} + +// TestInitJob_5xx_NoRetry:mock 連續 500 → InitJob 不 retry,立即 return。 +// +// 設計理由:multipart body 是 streaming(io.Reader 一次性),retry 會傳到一半的爛資料。 +func TestInitJob_5xx_NoRetry(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + var counter atomic.Int32 + mux := http.NewServeMux() + mux.HandleFunc("/api/v1/jobs", func(w http.ResponseWriter, r *http.Request) { + counter.Add(1) + _, _ = io.Copy(io.Discard, r.Body) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte(`{"error":{"code":"misconfiguration","message":"...","request_id":"r"}}`)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + cc := newConverterClientForTest(t, srv.URL, tokens) + _, err := cc.InitJob(context.Background(), InitConverterJobReq{ + Body: strings.NewReader("x"), + BodyContentType: "multipart/form-data; boundary=xxx", + }) + + require.Error(t, err) + assert.True(t, errors.Is(err, ErrConverterUnavailable)) + assert.Equal(t, int32(1), counter.Load(), + "InitJob 不可 retry 5xx(streaming body 不可 replay)") +} + +// TestInitJob_AuthExpired:mock 回 401 → return ErrServiceClientUnauthorized。 +func TestInitJob_AuthExpired(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("expired-tok") + mux := http.NewServeMux() + mux.HandleFunc("/api/v1/jobs", func(w http.ResponseWriter, r *http.Request) { + _, _ = io.Copy(io.Discard, r.Body) + w.WriteHeader(http.StatusUnauthorized) + _, _ = w.Write([]byte(`{"error":{"code":"invalid_token","message":"...","request_id":"r"}}`)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + cc := newConverterClientForTest(t, srv.URL, tokens) + _, err := cc.InitJob(context.Background(), InitConverterJobReq{ + Body: strings.NewReader("x"), + BodyContentType: "multipart/form-data; boundary=xxx", + }) + + require.Error(t, err) + assert.True(t, errors.Is(err, ErrServiceClientUnauthorized)) +} + +// TestInitJob_TokenFailure_Propagated:MCTokenClient 取 token 失敗時,錯誤透傳。 +func TestInitJob_TokenFailure_Propagated(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("") + tokens.setError(ErrServiceClientUnauthorized) + + cc := newConverterClientForTest(t, "http://unused", tokens) + _, err := cc.InitJob(context.Background(), InitConverterJobReq{ + Body: strings.NewReader("x"), + BodyContentType: "multipart/form-data; boundary=xxx", + }) + + require.Error(t, err) + assert.True(t, errors.Is(err, ErrServiceClientUnauthorized)) +} + +// TestInitJob_RequiredFieldsValidation:本地參數驗證(不打網路)。 +func TestInitJob_RequiredFieldsValidation(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + cc := newConverterClientForTest(t, "http://unused", tokens) + + // 缺 body + _, err := cc.InitJob(context.Background(), InitConverterJobReq{ + BodyContentType: "multipart/form-data; boundary=x", + }) + require.Error(t, err) + assert.Contains(t, err.Error(), "body is required") + + // 缺 content type + _, err = cc.InitJob(context.Background(), InitConverterJobReq{ + Body: strings.NewReader("x"), + }) + require.Error(t, err) + assert.Contains(t, err.Error(), "content type is required") +} + +// ========================================================================== +// GetJob tests +// ========================================================================== + +// TestGetJob_Success:標準 happy path(含完整 Job shape 解析)。 +func TestGetJob_Success(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + mux := http.NewServeMux() + mux.HandleFunc("/api/v1/jobs/", func(w http.ResponseWriter, r *http.Request) { + require.Equal(t, http.MethodGet, r.Method) + require.Equal(t, "Bearer svc-tok", r.Header.Get("Authorization")) + // path: /api/v1/jobs/{id} + assert.Contains(t, r.URL.Path, "550e8400") + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{ + "job_id": "550e8400-e29b-41d4-a716-446655440000", + "user_id": "alice", + "status": "running", + "stage": "bie", + "progress": 45, + "stage_progress": 60, + "created_at": "2026-04-25T12:00:00Z", + "updated_at": "2026-04-25T12:05:30Z", + "expires_at": "2026-05-02T12:00:00Z", + "input": {"filename": "model.onnx", "size_bytes": 100, "ref_images_count": 0}, + "parameters": {"model_id": 1001, "version": "v1.0.0", "platform": "520"}, + "error": null + }`)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + cc := newConverterClientForTest(t, srv.URL, tokens) + job, err := cc.GetJob(context.Background(), "550e8400-e29b-41d4-a716-446655440000") + require.NoError(t, err) + require.NotNil(t, job) + assert.Equal(t, "running", job.Status) + assert.Equal(t, "bie", job.Stage) + require.NotNil(t, job.Progress) + assert.Equal(t, 45, *job.Progress) + require.NotNil(t, job.StageProgress) + assert.Equal(t, 60, *job.StageProgress) + assert.Equal(t, "model.onnx", job.SourceFilename) + assert.Equal(t, "520", job.Platform) + assert.False(t, job.ExpiresAt.IsZero()) +} + +// TestGetJob_NotFound:404 → ErrJobNotFound。 +func TestGetJob_NotFound(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + mux := http.NewServeMux() + mux.HandleFunc("/api/v1/jobs/", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + _, _ = w.Write([]byte(`{"error":{"code":"job_not_found","message":"...","request_id":"r"}}`)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + cc := newConverterClientForTest(t, srv.URL, tokens) + _, err := cc.GetJob(context.Background(), "missing-job") + require.Error(t, err) + assert.True(t, errors.Is(err, ErrJobNotFound)) +} + +// TestGetJob_5xx_RetryThenSuccess:500/500/200 → atomic counter 驗 retry 3 次。 +func TestGetJob_5xx_RetryThenSuccess(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + var counter atomic.Int32 + mux := http.NewServeMux() + mux.HandleFunc("/api/v1/jobs/", func(w http.ResponseWriter, r *http.Request) { + idx := counter.Add(1) + if idx <= 2 { + w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte(`{"error":{"code":"internal_error","message":"...","request_id":"r"}}`)) + return + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{ + "job_id": "j1", "status": "completed", "stage": null, "progress": 100, + "created_at": "2026-04-25T12:00:00Z", + "updated_at": "2026-04-25T12:08:30Z", + "expires_at": "2026-05-02T12:00:00Z" + }`)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + cc := newConverterClientForTest(t, srv.URL, tokens) + job, err := cc.GetJob(context.Background(), "j1") + require.NoError(t, err) + require.NotNil(t, job) + assert.Equal(t, "completed", job.Status) + assert.Equal(t, int32(3), counter.Load(), "GetJob 應 retry max 2 次(共 3 attempts)") +} + +// TestGetJob_5xx_Exhausted:連續 5xx 用完 retry 仍失敗 → ErrConverterUnavailable。 +func TestGetJob_5xx_Exhausted(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + var counter atomic.Int32 + mux := http.NewServeMux() + mux.HandleFunc("/api/v1/jobs/", func(w http.ResponseWriter, r *http.Request) { + counter.Add(1) + w.WriteHeader(http.StatusBadGateway) + _, _ = w.Write([]byte(`{"error":{"code":"x","message":"x","request_id":"r"}}`)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + cc := newConverterClientForTest(t, srv.URL, tokens) + _, err := cc.GetJob(context.Background(), "j1") + require.Error(t, err) + assert.True(t, errors.Is(err, ErrConverterUnavailable)) + assert.Equal(t, int32(3), counter.Load(), "用完 retry 仍 5xx 應該打 3 次") +} + +// TestGetJob_ContextCancel_NoRetry:ctx 在 retry 等待中被 cancel → 立即 return。 +func TestGetJob_ContextCancel_NoRetry(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + var counter atomic.Int32 + mux := http.NewServeMux() + mux.HandleFunc("/api/v1/jobs/", func(w http.ResponseWriter, r *http.Request) { + counter.Add(1) + w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte(`{"error":{"code":"x","message":"x","request_id":"r"}}`)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + cc := newConverterClientForTest(t, srv.URL, tokens) + + ctx, cancel := context.WithCancel(context.Background()) + // 第一次 attempt 完後 cancel;第二次 retry 等待時應立即 return + go func() { + time.Sleep(50 * time.Millisecond) + cancel() + }() + + _, err := cc.GetJob(ctx, "j1") + require.Error(t, err) + assert.True(t, errors.Is(err, context.Canceled)) + // 至多 1 次(cancel 在退避時觸發) + assert.LessOrEqual(t, counter.Load(), int32(1), + "ctx cancel 應在第 1 次 attempt 後立即 return,不再打 server") +} + +// ========================================================================== +// Promote tests +// ========================================================================== + +// TestPromote_Success:promote response 含 target_object_key。 +func TestPromote_Success(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + var receivedBody string + mux := http.NewServeMux() + mux.HandleFunc("/api/v1/jobs/", func(w http.ResponseWriter, r *http.Request) { + require.Equal(t, http.MethodPost, r.Method) + assert.Contains(t, r.URL.Path, "/promote") + body, _ := io.ReadAll(r.Body) + receivedBody = string(body) + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{ + "job_id": "j1", + "promoted": [ + { + "source": "nef", + "target_object_key": "visionA/models/alice/m-1001/v1.0.0/out.nef", + "size_bytes": 10485760, + "file_access_agent_etag": "abc123", + "promoted_at": "2026-04-25T12:30:00Z" + } + ] + }`)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + cc := newConverterClientForTest(t, srv.URL, tokens) + result, err := cc.Promote(context.Background(), "j1", PromoteReq{ + UserID: "alice", + Source: "nef", + TargetObjectKey: "visionA/models/alice/m-1001/v1.0.0/out.nef", + }) + require.NoError(t, err) + require.NotNil(t, result) + assert.Equal(t, "visionA/models/alice/m-1001/v1.0.0/out.nef", result.TargetObjectKey) + assert.Equal(t, int64(10485760), result.Size) + assert.Equal(t, "abc123", result.Checksum) + assert.Contains(t, receivedBody, `"user_id":"alice"`, + "promote body 應含 user_id metadata(trust boundary 重申)") + assert.Contains(t, receivedBody, `"target_object_key":"visionA/models/alice/m-1001/v1.0.0/out.nef"`) +} + +// TestPromote_DefaultSource:未傳 Source 時預設 nef。 +func TestPromote_DefaultSource(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + var receivedBody string + mux := http.NewServeMux() + mux.HandleFunc("/api/v1/jobs/", func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + receivedBody = string(body) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{ + "job_id": "j1", + "promoted": [{"source":"nef","target_object_key":"x","size_bytes":1,"file_access_agent_etag":"","promoted_at":"2026-04-25T00:00:00Z"}] + }`)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + cc := newConverterClientForTest(t, srv.URL, tokens) + _, err := cc.Promote(context.Background(), "j1", PromoteReq{ + UserID: "alice", + TargetObjectKey: "x", + }) + require.NoError(t, err) + assert.Contains(t, receivedBody, `"source":"nef"`, "未傳 Source 時應預設 nef") +} + +// TestPromote_BadGateway:FAA 不可達 → 502 → ErrFAAUnavailable。 +func TestPromote_BadGateway(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + mux := http.NewServeMux() + mux.HandleFunc("/api/v1/jobs/", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusBadGateway) + _, _ = w.Write([]byte(`{"error":{"code":"file_gateway_unavailable","message":"FAA 不可達","request_id":"r"}}`)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + cc := newConverterClientForTest(t, srv.URL, tokens) + _, err := cc.Promote(context.Background(), "j1", PromoteReq{ + UserID: "alice", + TargetObjectKey: "x", + }) + require.Error(t, err) + assert.True(t, errors.Is(err, ErrFAAUnavailable), + "converter 502 file_gateway_unavailable 必須對應到 ErrFAAUnavailable") +} + +// TestPromote_NotCompleted409:job_not_ready_for_promote → ErrJobNotCompleted。 +func TestPromote_NotCompleted409(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + mux := http.NewServeMux() + mux.HandleFunc("/api/v1/jobs/", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusConflict) + _, _ = w.Write([]byte(`{"error":{"code":"job_not_ready_for_promote","message":"...","request_id":"r"}}`)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + cc := newConverterClientForTest(t, srv.URL, tokens) + _, err := cc.Promote(context.Background(), "j1", PromoteReq{ + UserID: "alice", + TargetObjectKey: "x", + }) + require.Error(t, err) + assert.True(t, errors.Is(err, ErrJobNotCompleted)) +} + +// TestPromote_NotFound404:404 → ErrJobNotFound。 +func TestPromote_NotFound404(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + mux := http.NewServeMux() + mux.HandleFunc("/api/v1/jobs/", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + _, _ = w.Write([]byte(`{"error":{"code":"job_not_found","message":"...","request_id":"r"}}`)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + cc := newConverterClientForTest(t, srv.URL, tokens) + _, err := cc.Promote(context.Background(), "j1", PromoteReq{ + UserID: "alice", + TargetObjectKey: "x", + }) + require.Error(t, err) + assert.True(t, errors.Is(err, ErrJobNotFound)) +} + +// TestPromote_RequiredFieldsValidation:本地參數驗證。 +func TestPromote_RequiredFieldsValidation(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + cc := newConverterClientForTest(t, "http://unused", tokens) + + _, err := cc.Promote(context.Background(), "", PromoteReq{TargetObjectKey: "x"}) + require.Error(t, err) + assert.Contains(t, err.Error(), "jobID is required") + + _, err = cc.Promote(context.Background(), "j1", PromoteReq{}) + require.Error(t, err) + assert.Contains(t, err.Error(), "target_object_key is required") +} + +// ========================================================================== +// ListInProgressJobs tests +// ========================================================================== + +// TestListInProgressJobs_Success:query string 含 user_id + status=in_progress。 +func TestListInProgressJobs_Success(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + var receivedQuery string + mux := http.NewServeMux() + mux.HandleFunc("/api/v1/jobs", func(w http.ResponseWriter, r *http.Request) { + // path 在 mux pattern 沒結尾 / 時 ServeMux 會匹配精確路徑(list 端點) + require.Equal(t, http.MethodGet, r.Method) + receivedQuery = r.URL.RawQuery + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{ + "jobs": [ + { + "job_id": "j-active", + "user_id": "alice", + "status": "running", + "stage": "bie", + "progress": 45, + "created_at": "2026-04-25T12:00:00Z", + "updated_at": "2026-04-25T12:05:30Z", + "expires_at": "2026-05-02T12:00:00Z", + "input": {"filename": "model.onnx", "size_bytes": 1, "ref_images_count": 0}, + "parameters": {"model_id": 1, "version": "v1.0.0", "platform": "720"}, + "error": null + } + ], + "total": 1, + "next_cursor": null + }`)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + cc := newConverterClientForTest(t, srv.URL, tokens) + jobs, err := cc.ListInProgressJobs(context.Background(), "alice") + require.NoError(t, err) + require.Len(t, jobs, 1) + assert.Equal(t, "j-active", jobs[0].JobID) + assert.Equal(t, "running", jobs[0].Status) + assert.Equal(t, "bie", jobs[0].Stage) + assert.Equal(t, "720", jobs[0].Platform) + assert.Contains(t, receivedQuery, "user_id=alice") + assert.Contains(t, receivedQuery, "status=in_progress", + "必須帶 status=in_progress 給 lazy rebuild ownership 用") +} + +// TestListInProgressJobs_Empty:[] response → 空 slice。 +func TestListInProgressJobs_Empty(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + mux := http.NewServeMux() + mux.HandleFunc("/api/v1/jobs", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"jobs":[],"total":0,"next_cursor":null}`)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + cc := newConverterClientForTest(t, srv.URL, tokens) + jobs, err := cc.ListInProgressJobs(context.Background(), "alice") + require.NoError(t, err) + assert.Len(t, jobs, 0, "empty result 應回空 slice,不是 nil 也不是 error") + assert.NotNil(t, jobs, "應回非 nil 空 slice 給 caller 安全 range") +} + +// TestListInProgressJobs_5xxRetry:5xx 後成功;驗 retry 1 次(共 2 attempts)。 +func TestListInProgressJobs_5xxRetry(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + var counter atomic.Int32 + mux := http.NewServeMux() + mux.HandleFunc("/api/v1/jobs", func(w http.ResponseWriter, r *http.Request) { + idx := counter.Add(1) + if idx == 1 { + w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte(`{"error":{"code":"x","message":"x","request_id":"r"}}`)) + return + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"jobs":[],"total":0,"next_cursor":null}`)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + cc := newConverterClientForTest(t, srv.URL, tokens) + jobs, err := cc.ListInProgressJobs(context.Background(), "alice") + require.NoError(t, err) + assert.Len(t, jobs, 0) + assert.Equal(t, int32(2), counter.Load(), "List 應 retry 1 次(共 2 attempts)") +} + +// TestListInProgressJobs_RequiredUserID:本地參數驗證。 +func TestListInProgressJobs_RequiredUserID(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + cc := newConverterClientForTest(t, "http://unused", tokens) + + _, err := cc.ListInProgressJobs(context.Background(), "") + require.Error(t, err) + assert.Contains(t, err.Error(), "userID is required") +} + +// ========================================================================== +// 共用:interface 契約 + helpers +// ========================================================================== + +// 確保 converterClient 滿足 ConverterClient interface(compile-time check)。 +var _ ConverterClient = (*converterClient)(nil) + +// 確保 stubTokenClient 滿足 MCTokenClient interface(compile-time check)。 +var _ MCTokenClient = (*stubTokenClient)(nil) + +// zerosReader 是無限產生 0 byte 的 reader(測 streaming 用)。 +type zerosReader struct{} + +func (zerosReader) Read(p []byte) (int, error) { + for i := range p { + p[i] = 0 + } + return len(p), nil +} + +// countingReader 包一個 reader 並計數 Read 呼叫次數(給 streaming 驗證用)。 +type countingReader struct { + R io.Reader + calls int64 // atomic +} + +func (c *countingReader) Read(p []byte) (int, error) { + atomic.AddInt64(&c.calls, 1) + return c.R.Read(p) +} diff --git a/visionA-backend/internal/conversion/errors.go b/visionA-backend/internal/conversion/errors.go new file mode 100644 index 0000000..08b7deb --- /dev/null +++ b/visionA-backend/internal/conversion/errors.go @@ -0,0 +1,274 @@ +// Package conversion error 定義。 +// +// 對齊 conversion.md §6 錯誤碼 mapping 與 api-conversion.md §錯誤碼總覽。 +// +// 設計原則: +// - 用 sentinel error(package-level var)+ wrap 模式,不用 error code string +// 做 equality check(caller 用 errors.Is 判斷) +// - 每個 sentinel 都對應一個對外 error code(見 ErrorCode() helper) +// - HTTP status mapping 與 message 在 handler 層處理(見 internal/api/conversion.go), +// 避免 conversion package 依賴 gin +// +// Phase 0.8 conversion (見 .autoflow/04-architecture/conversion.md §6) +package conversion + +import "errors" + +// Sentinel errors — handler 層用 errors.Is 比對。 +var ( + // ErrForbidden — job 不屬於當前 user。對應 HTTP 403 / code "forbidden"。 + ErrForbidden = errors.New("conversion: forbidden") + + // ErrJobNotFound — job_id 不存在或已過期。對應 HTTP 404 / code "not_found"。 + ErrJobNotFound = errors.New("conversion: job not found") + + // ErrJobNotCompleted — job 還沒 completed,不能 promote / download。 + // 對應 HTTP 409 / code "job_not_completed"。 + ErrJobNotCompleted = errors.New("conversion: job not completed") + + // ErrActiveJobExists — 同 user 已有 active job。 + // 對應 HTTP 409 / code "active_job_exists"。 + // caller 可用 ActiveJobError struct 取得衝突中的 job 資訊(見下方)。 + ErrActiveJobExists = errors.New("conversion: user already has active job") + + // ErrValidationFailed — 上傳的 multipart 內容格式錯誤(converter 4xx validation_error / invalid_multipart)。 + // 對應 HTTP 400 / code "validation_failed"。 + ErrValidationFailed = errors.New("conversion: validation failed") + + // ErrPayloadTooLarge — converter 端拒絕超大檔案。 + // 對應 HTTP 413 / code "payload_too_large"。 + ErrPayloadTooLarge = errors.New("conversion: payload too large") + + // ErrConverterUnavailable — converter 5xx / network 持續失敗。 + // 對應 HTTP 502 / code "converter_unavailable"。 + ErrConverterUnavailable = errors.New("conversion: converter unavailable") + + // ErrFAAUnavailable — FAA 5xx / network 持續失敗。 + // 對應 HTTP 502 / code "faa_unavailable"。 + ErrFAAUnavailable = errors.New("conversion: faa unavailable") + + // ErrFAAFileNotFound — FAA 回 404(指定 object_key 不存在)。 + // 觸發情境:promote-to-models 流程 promoted 後 FAA pull 卻找不到檔(罕見: + // converter promote 才剛寫 FAA、應立即可見)— 可能 FAA 端 GC、或 object_key 命名邏輯有 bug。 + // 對應 HTTP 502 / code "faa_unavailable"(對外仍視為 FAA 不可用,避免揭露內部 object key 細節)。 + // caller(flow.go)可用 errors.Is(err, ErrFAAFileNotFound) 做精細處理(log / metric)。 + // + // Phase 0.8 conversion (見 .autoflow/04-architecture/conversion.md §2.6 + §9.2) + ErrFAAFileNotFound = errors.New("conversion: faa file not found") + + // ErrDownloadTokenFailed — MC 換 delegated token 4xx 失敗(設定問題)。 + // 對應 HTTP 502 / code "download_token_failed"。 + ErrDownloadTokenFailed = errors.New("conversion: download token failed") + + // ErrMCTokenUnavailable — MC 5xx / network 持續失敗。 + // 對應 HTTP 502 / code "mc_token_unavailable"。 + ErrMCTokenUnavailable = errors.New("conversion: mc token unavailable") + + // ErrIDPMisconfigured — MC token endpoint 4xx(client_credentials grant 設定錯誤)。 + // 對應 HTTP 500 / code "idp_misconfigured"。 + ErrIDPMisconfigured = errors.New("conversion: idp misconfigured") + + // ErrIDPUnavailable — MC oauth/token 5xx / network 持續失敗。 + // 對應 HTTP 503 / code "idp_unavailable"。 + ErrIDPUnavailable = errors.New("conversion: idp unavailable") + + // ErrServiceBusy — converter 端回 503 service_busy。 + // 對應 HTTP 503 / code "service_busy"。 + ErrServiceBusy = errors.New("conversion: service busy") + + // ErrServiceClientUnauthorized — visionA-backend 對 MC 認證失敗(401 / 403)。 + // + // 觸發情境: + // - VISIONA_OIDC_SERVICE_CLIENT_ID / SECRET 設定錯誤(典型) + // - MC 端 client 被 revoke / 停用 + // - client 沒有對應 scope 的權限 + // + // 設計選擇:與 ErrIDPMisconfigured 分開的 sentinel,給 mc_token_client 內部 caller + // 可以做更精細的處理(例如 401 時主動 invalidate cache),但對外 ErrorCode/HTTPStatus + // 都對應到 idp_misconfigured / 500(fail-fast,避免半設定狀態跑進 production)。 + // + // Phase 0.8 conversion (見 .autoflow/04-architecture/conversion.md §5.2) + ErrServiceClientUnauthorized = errors.New("conversion: service client unauthorized") + + // ErrStorageUnavailable — visionA 自家 storage(local FS / S3)寫入或讀取失敗。 + // + // 觸發情境: + // - PromoteToModels 把 NEF 寫進 visionA storage 失敗(disk full / S3 5xx / 權限錯誤) + // - 與 FAA / converter 都無關,純粹是 visionA 自己的 storage 設定問題 + // + // 對應 HTTP 500 / code "storage_unavailable"。 + // + // 設計選擇(與 ErrFAAUnavailable 區分): + // - storage 失敗 ≠ FAA 失敗。SRE alarm 會打到不同 team;i18n 訊息也不同 + // (FAA 對外是 "檔案存取服務暫時無法使用",storage 對外是 "伺服器內部錯誤") + // - 對外用 500 而非 502:visionA 自身問題,不是 gateway / upstream 問題 + // + // Phase 0.8 conversion (見 .autoflow/04-architecture/conversion.md §6 — Reviewer M-1) + ErrStorageUnavailable = errors.New("conversion: visionA storage unavailable") + + // ErrModelStoreUnavailable — visionA 自家 model store(in-memory / Postgres)操作失敗。 + // + // 觸發情境: + // - PromoteToModels 把 model record 寫進 model store 失敗 + // (in-memory 永遠不會失敗;未來換 Postgres 時 connection 5xx 才會觸發) + // - 與 FAA / converter 都無關,純粹是 visionA 自己的 DB 問題 + // + // 對應 HTTP 500 / code "model_store_unavailable"。 + // + // 設計選擇(與 ErrConverterUnavailable 區分):理由同 ErrStorageUnavailable。 + // + // Phase 0.8 conversion (見 .autoflow/04-architecture/conversion.md §6 — Reviewer M-1) + ErrModelStoreUnavailable = errors.New("conversion: visionA model store unavailable") +) + +// ActiveJobError 是 ErrActiveJobExists 的 wrapped form, +// 帶上正在進行中的 job 資訊,給 handler 透傳給 frontend +// (前端可顯示「你已有進行中任務(job_id=xxx)」+ 跳轉到該 job 的進度頁)。 +// +// 用法: +// +// if errors.Is(err, conversion.ErrActiveJobExists) { +// var ae *conversion.ActiveJobError +// if errors.As(err, &ae) { +// // ae.Job 可用,details 帶給 frontend +// } +// } +type ActiveJobError struct { + Job *Job +} + +// Error 實作 error interface。 +func (e *ActiveJobError) Error() string { + return ErrActiveJobExists.Error() +} + +// Unwrap 讓 errors.Is(err, ErrActiveJobExists) 成立。 +func (e *ActiveJobError) Unwrap() error { + return ErrActiveJobExists +} + +// ValidationFieldError 是 converter 4xx response 中 details.fields 陣列的單一元素。 +// +// 對齊 converter openapi.yaml `validation_error` example: +// +// details.fields: [{ field: "model_id", message: "model_id 範圍必須在 1 ~ 65535" }] +// +// 之所以用 array 不用 map: +// - 對齊 task-scheduler openapi.yaml(POST /api/v1/jobs 400 validation_error 範例) +// - 同一個 field 可能有多個錯誤(例如 model_id 同時違反 pattern + range) +// +// Phase 0.8 conversion (見 .autoflow/04-architecture/conversion.md §6 + api-conversion.md §1) +type ValidationFieldError struct { + Field string `json:"field"` + Message string `json:"message"` +} + +// ConverterValidationError 是 ErrValidationFailed 的 wrapped form, +// 帶上 converter 回的欄位錯誤細節(給 frontend 顯示具體哪個欄位錯)。 +// +// 用法同 ActiveJobError。 +// +// Phase 0.8 conversion (見 .autoflow/04-architecture/conversion.md §6 + api-conversion.md §1) +type ConverterValidationError struct { + // Fields 是 converter 4xx response 的 details.fields(若有)。 + // 結構對齊 converter openapi.yaml — array of {field, message}。 + // converter 4xx 沒有 details.fields 時為 nil(仍視為 validation 錯誤)。 + Fields []ValidationFieldError + + // Message 是 converter error message 原文(不過 frontend,僅供 log)。 + Message string +} + +// Error 實作 error interface。 +func (e *ConverterValidationError) Error() string { + if e.Message != "" { + return "conversion: validation failed: " + e.Message + } + return ErrValidationFailed.Error() +} + +// Unwrap 讓 errors.Is(err, ErrValidationFailed) 成立。 +func (e *ConverterValidationError) Unwrap() error { + return ErrValidationFailed +} + +// ErrorCode 把 sentinel error 轉成對外的 visionA error code(對齊 api-conversion.md §錯誤碼總覽)。 +// +// 未匹配的 error 回 "internal_error"(handler 層應 log 完整 error 後回 500)。 +func ErrorCode(err error) string { + switch { + case errors.Is(err, ErrForbidden): + return "forbidden" + case errors.Is(err, ErrJobNotFound): + return "not_found" + case errors.Is(err, ErrJobNotCompleted): + return "job_not_completed" + case errors.Is(err, ErrActiveJobExists): + return "active_job_exists" + case errors.Is(err, ErrValidationFailed): + return "validation_failed" + case errors.Is(err, ErrPayloadTooLarge): + return "payload_too_large" + case errors.Is(err, ErrConverterUnavailable): + return "converter_unavailable" + case errors.Is(err, ErrFAAFileNotFound): + // 對外仍視為 faa_unavailable,避免揭露 object_key 不存在的內部細節。 + // caller 想做精細處理用 errors.Is(err, ErrFAAFileNotFound) 直接判斷。 + return "faa_unavailable" + case errors.Is(err, ErrFAAUnavailable): + return "faa_unavailable" + case errors.Is(err, ErrDownloadTokenFailed): + return "download_token_failed" + case errors.Is(err, ErrMCTokenUnavailable): + return "mc_token_unavailable" + case errors.Is(err, ErrIDPMisconfigured): + return "idp_misconfigured" + case errors.Is(err, ErrIDPUnavailable): + return "idp_unavailable" + case errors.Is(err, ErrServiceBusy): + return "service_busy" + case errors.Is(err, ErrServiceClientUnauthorized): + // 對外仍透過 idp_misconfigured 呈現(避免 leak「我們的 client_secret 過期」這種內部狀態); + // caller 想做精細處理用 errors.Is(err, ErrServiceClientUnauthorized) 直接判斷。 + return "idp_misconfigured" + case errors.Is(err, ErrStorageUnavailable): + return "storage_unavailable" + case errors.Is(err, ErrModelStoreUnavailable): + return "model_store_unavailable" + default: + return "internal_error" + } +} + +// HTTPStatus 把 sentinel error 轉成對應的 HTTP status code。 +// +// 未匹配的 error 回 500,handler 層應 log 後再 WriteError。 +func HTTPStatus(err error) int { + switch { + case errors.Is(err, ErrForbidden): + return 403 + case errors.Is(err, ErrJobNotFound): + return 404 + case errors.Is(err, ErrJobNotCompleted), errors.Is(err, ErrActiveJobExists): + return 409 + case errors.Is(err, ErrValidationFailed): + return 400 + case errors.Is(err, ErrPayloadTooLarge): + return 413 + case errors.Is(err, ErrConverterUnavailable), + errors.Is(err, ErrFAAUnavailable), + errors.Is(err, ErrFAAFileNotFound), + errors.Is(err, ErrDownloadTokenFailed), + errors.Is(err, ErrMCTokenUnavailable): + return 502 + case errors.Is(err, ErrIDPMisconfigured), errors.Is(err, ErrServiceClientUnauthorized): + return 500 + case errors.Is(err, ErrStorageUnavailable), errors.Is(err, ErrModelStoreUnavailable): + // visionA 自身基礎設施問題 → 500(不是 502 gateway,因為非 upstream 失敗) + return 500 + case errors.Is(err, ErrIDPUnavailable), errors.Is(err, ErrServiceBusy): + return 503 + default: + return 500 + } +} diff --git a/visionA-backend/internal/conversion/errors_test.go b/visionA-backend/internal/conversion/errors_test.go new file mode 100644 index 0000000..4513cd5 --- /dev/null +++ b/visionA-backend/internal/conversion/errors_test.go @@ -0,0 +1,161 @@ +package conversion + +import ( + "errors" + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestErrorCode 確保所有 sentinel error 都對應到一個明確的 visionA error code, +// 且未匹配的 error 走 internal_error fallback(對齊 api-conversion.md §錯誤碼總覽)。 +func TestErrorCode(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + err error + want string + }{ + {"forbidden", ErrForbidden, "forbidden"}, + {"not_found", ErrJobNotFound, "not_found"}, + {"job_not_completed", ErrJobNotCompleted, "job_not_completed"}, + {"active_job_exists", ErrActiveJobExists, "active_job_exists"}, + {"validation_failed", ErrValidationFailed, "validation_failed"}, + {"payload_too_large", ErrPayloadTooLarge, "payload_too_large"}, + {"converter_unavailable", ErrConverterUnavailable, "converter_unavailable"}, + {"faa_unavailable", ErrFAAUnavailable, "faa_unavailable"}, + {"download_token_failed", ErrDownloadTokenFailed, "download_token_failed"}, + {"mc_token_unavailable", ErrMCTokenUnavailable, "mc_token_unavailable"}, + {"idp_misconfigured", ErrIDPMisconfigured, "idp_misconfigured"}, + {"idp_unavailable", ErrIDPUnavailable, "idp_unavailable"}, + {"service_busy", ErrServiceBusy, "service_busy"}, + // ErrServiceClientUnauthorized 對外刻意 mask 成 idp_misconfigured(不 leak「visionA secret 過期」內部狀態) + {"service_client_unauthorized_masked_as_idp_misconfig", ErrServiceClientUnauthorized, "idp_misconfigured"}, + // Reviewer M-1:visionA 自身基礎設施失敗用獨立 code(與 FAA / converter 區分) + {"storage_unavailable", ErrStorageUnavailable, "storage_unavailable"}, + {"model_store_unavailable", ErrModelStoreUnavailable, "model_store_unavailable"}, + {"unknown_falls_back_to_internal_error", errors.New("某個未預期錯誤"), "internal_error"}, + {"nil_falls_back_to_internal_error", nil, "internal_error"}, + } + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tc.want, ErrorCode(tc.err)) + }) + } +} + +// TestHTTPStatus 確保所有 sentinel error 對應到正確的 HTTP status, +// 且未匹配的 error 走 500 fallback(對齊 conversion.md §6 mapping)。 +func TestHTTPStatus(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + err error + want int + }{ + {"forbidden_403", ErrForbidden, 403}, + {"not_found_404", ErrJobNotFound, 404}, + {"job_not_completed_409", ErrJobNotCompleted, 409}, + {"active_job_exists_409", ErrActiveJobExists, 409}, + {"validation_400", ErrValidationFailed, 400}, + {"payload_too_large_413", ErrPayloadTooLarge, 413}, + {"converter_unavailable_502", ErrConverterUnavailable, 502}, + {"faa_unavailable_502", ErrFAAUnavailable, 502}, + {"download_token_failed_502", ErrDownloadTokenFailed, 502}, + {"mc_token_unavailable_502", ErrMCTokenUnavailable, 502}, + {"idp_misconfigured_500", ErrIDPMisconfigured, 500}, + {"idp_unavailable_503", ErrIDPUnavailable, 503}, + {"service_busy_503", ErrServiceBusy, 503}, + {"service_client_unauthorized_500", ErrServiceClientUnauthorized, 500}, + // Reviewer M-1:visionA 自身基礎設施失敗 → 500(不是 502 gateway) + {"storage_unavailable_500", ErrStorageUnavailable, 500}, + {"model_store_unavailable_500", ErrModelStoreUnavailable, 500}, + {"unknown_500", errors.New("未知錯誤"), 500}, + {"nil_500", nil, 500}, + } + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tc.want, HTTPStatus(tc.err)) + }) + } +} + +// TestActiveJobError 驗證 wrapped form 既能被 errors.Is 比對,又能用 errors.As 取出 Job。 +// +// 這是 frontend 顯示「你已有進行中任務」+ 跳轉到該 job 進度頁的關鍵:handler 用 errors.As +// 取出 Job 帶到 response details。 +func TestActiveJobError(t *testing.T) { + t.Parallel() + + job := &Job{JobID: "job-abc", Status: "running"} + err := &ActiveJobError{Job: job} + + // errors.Is 應命中 sentinel + assert.True(t, errors.Is(err, ErrActiveJobExists)) + + // errors.As 應拿到 wrapped 結構 + var ae *ActiveJobError + assert.True(t, errors.As(err, &ae)) + assert.NotNil(t, ae.Job) + assert.Equal(t, "job-abc", ae.Job.JobID) + + // ErrorCode 應仍透過 sentinel 對應到 active_job_exists + assert.Equal(t, "active_job_exists", ErrorCode(err)) + assert.Equal(t, 409, HTTPStatus(err)) +} + +// TestConverterValidationError 驗證 wrapped validation error 同樣行為。 +func TestConverterValidationError(t *testing.T) { + t.Parallel() + + verr := &ConverterValidationError{ + Fields: []ValidationFieldError{ + {Field: "platform", Message: "must be 520 or 720"}, + }, + Message: "platform invalid", + } + + assert.True(t, errors.Is(verr, ErrValidationFailed)) + + var ve *ConverterValidationError + assert.True(t, errors.As(verr, &ve)) + require.Len(t, ve.Fields, 1) + assert.Equal(t, "platform", ve.Fields[0].Field) + assert.Equal(t, "must be 520 or 720", ve.Fields[0].Message) + + assert.Equal(t, "validation_failed", ErrorCode(verr)) + assert.Equal(t, 400, HTTPStatus(verr)) + + // Error() 應包含 Message(給 log 用) + assert.Contains(t, verr.Error(), "platform invalid") + + // Message 為空時退化到 sentinel 訊息 + verr2 := &ConverterValidationError{} + assert.Equal(t, ErrValidationFailed.Error(), verr2.Error()) +} + +// TestErrorWrapping 驗證 fmt.Errorf("%w") wrapping 後仍能被 ErrorCode 抓對。 +// +// 這個測試模擬 flow.go 預期的 wrap pattern: +// +// if err := convClient.GetJob(...); err != nil { +// return fmt.Errorf("flow: get job from converter: %w", err) +// } +func TestErrorWrapping(t *testing.T) { + t.Parallel() + + wrapped := fmt.Errorf("flow: get job: %w", ErrJobNotFound) + assert.True(t, errors.Is(wrapped, ErrJobNotFound)) + assert.Equal(t, "not_found", ErrorCode(wrapped)) + assert.Equal(t, 404, HTTPStatus(wrapped)) +} diff --git a/visionA-backend/internal/conversion/faa_client.go b/visionA-backend/internal/conversion/faa_client.go new file mode 100644 index 0000000..ef9697b --- /dev/null +++ b/visionA-backend/internal/conversion/faa_client.go @@ -0,0 +1,467 @@ +// FAA client — visionA-backend 對 File Access Agent 的 server-to-server HTTP client。 +// +// Phase 0.8 只用 GET /files/{object_key}(給 promote-to-models 流程從 FAA pull NEF 用)。 +// 其他 endpoint(PUT / DELETE / HEAD / metadata)目前 visionA 不需要,未來再補。 +// +// 設計要點: +// - 走 service token(scope=files:download.read);token 由注入的 MCTokenClient 提供 +// - **回 streaming body**(io.ReadCloser)— 不 io.ReadAll,避免 500MB NEF 全進 RAM +// - **Phase A retry**:dial → 拿到 response header 之間的 5xx / network / timeout 失敗 +// 依 §9.1 指數退避重試 max 2 次(1s, 2s)。一旦拿到 200 response(進 Phase B: +// streaming body 給 caller),這層責任就結束 — body 中斷由 caller 處理(不可 replay)。 +// 詳見下方 GetFile doc comment 的「Phase A vs Phase B retry」段。 +// - 4xx → 對應 sentinel(401/403 → ErrServiceClientUnauthorized;404 → ErrFAAFileNotFound; +// 其他 4xx → ErrFAAUnavailable,避免新增更多 sentinel) +// +// 與 T3 InitJob 的對比(為什麼 T3 不 retry 但 T4 GetFile retry): +// - T3 InitJob:multipart **request body** 是 streaming(io.Reader 來自上游 c.Body); +// 一旦 http.Client.Do 開始送 request body,io.Reader 已被消費,retry 無法 rewind → +// 從第一次 attempt 起就「不可重試」。 +// - T4 GetFile:GET 沒有 request body,request 完全 idempotent;retry window 涵蓋 +// dial → 拿到 response header(Phase A)。Phase A 結束後(200 已到),response body +// 才是「不可 replay」的 streaming,但那不在本層責任範圍 — 本層拿到 200 就 return *FAAFile。 +// +// 安全: +// - **絕不**寫 Authorization header / service token / response body 進 log +// - object_key 過長時截斷(避免 log 膨脹;FAA object_key 由 visionA 內部組,不含 user 敏感資訊) +// +// Phase 0.8 conversion (見 .autoflow/04-architecture/conversion.md §2.3 / §2.6 / §9.1) +package conversion + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "errors" + "fmt" + "io" + "log/slog" + "net" + "net/http" + "net/url" + "strings" + "time" +) + +// ========================================================================== +// 對外 type / interface +// ========================================================================== + +// FAAClient 對 File Access Agent 的 server-to-server client。 +// +// goroutine-safe:每次呼叫獨立 *http.Request;無內部 mutable state(cache 由注入的 MCTokenClient 管)。 +type FAAClient interface { + // GetFile 從 FAA pull 一個 object(server-to-server,用 service token)。 + // + // scope: files:download.read + // + // 回傳 *FAAFile.Body 是 streaming body(io.ReadCloser);**caller 必須 Close**, + // 不然底層 http.Response.Body 不會釋放、connection 也回不了 pool(goroutine + fd leak)。 + // 推薦 pattern: + // + // file, err := faa.GetFile(ctx, key) + // if err != nil { return err } + // defer file.Body.Close() + // _, err = io.Copy(dst, file.Body) // streaming 寫進 visionA storage + // + // 重試行為(Phase A retry only,對齊 §9.1): + // - dial / TLS / response header 階段的 5xx / network / timeout: + // 指數退避重試 max 2 次(1s, 2s)— GET 沒 request body 完全 idempotent,可放心 retry + // - 401 / 403 / 404 / 其他 4xx:不重試,立即 return 對應 sentinel + // - ctx cancel / deadline:立即 return ctx.Err()(即使在 retry sleep 中也立即中斷) + // - 一旦拿到 200 response(進 Phase B):return *FAAFile,body 由 caller 自己讀; + // caller 在讀 body 時遇到網路中斷不再重試(streaming response 不可 replay) + // + // 錯誤映射(對齊 conversion.md §6 + errors.go): + // - ctx cancel/deadline → 透傳 ctx.Err(不包成 sentinel) + // - 401 / 403 → ErrServiceClientUnauthorized(對外 idp_misconfigured/500) + // - 404 → ErrFAAFileNotFound(對外 faa_unavailable/502) + // - 其他 4xx / 5xx exhausted / network exhausted → ErrFAAUnavailable(對外 faa_unavailable/502) + GetFile(ctx context.Context, objectKey string) (*FAAFile, error) +} + +// FAAFile 是 GetFile 成功回傳的 streaming response。 +// +// **caller 必須 Body.Close()**(即使中途 error,也應 defer Close)。 +type FAAFile struct { + // Body 是 streaming response body;caller 用 io.Copy 等方式 streaming 消費。 + Body io.ReadCloser + + // ContentLength 對應 FAA response 的 Content-Length header。 + // 若 FAA 走 chunked transfer 沒帶這個 header,值為 -1(net/http 慣例)。 + ContentLength int64 + + // ContentType 對應 FAA response 的 Content-Type header(如 "application/octet-stream")。 + ContentType string + + // ETag 對應 FAA response 的 ETag header(FAA 端取自 storage adapter)。 + // 若 FAA 沒帶,為空字串。 + ETag string +} + +// FAAClientOpts 是 NewFAAClient 的依賴注入。 +// +// HTTPClient / Now / Logger 為 optional(nil 自動填預設)— 方便 unit test 注入 fake。 +type FAAClientOpts struct { + // BaseURL 是 FAA base URL(不帶結尾斜線)。 + // 範例:http://192.168.0.130:5081 + BaseURL string + + // Tokens 是 MCTokenClient(注入,non-nil 必填)— 用來取 service token。 + Tokens MCTokenClient + + // HTTPClient 為 optional;nil 用預設(含 dial / response header timeout,但無整體 timeout)。 + // 測試會注入 httptest.Server.Client()。 + // + // 為什麼預設 client 不設 Timeout: + // 500MB NEF 在慢網路下 download 可能 5-10 分鐘;http.Client.Timeout 是「整體 timeout」 + // 涵蓋「dial + response header + body 讀完」三段,會在大檔下載中途斷線。 + // 改用 transport 層的 DialTimeout + ResponseHeaderTimeout(10s 各自)— 連線階段卡死才算 fail, + // body streaming 階段交給 ctx.Done() 控制(caller 用帶 deadline 的 ctx 即可)。 + HTTPClient *http.Client + + // Now 為 optional;nil 用 time.Now。測試會注入 fake clock。 + Now func() time.Time + + // Logger 為 optional;nil 用 slog.Default()。 + Logger *slog.Logger +} + +// ========================================================================== +// 內部固定常數 +// ========================================================================== + +const ( + // scopeFAADownloadRead 對齊 FAA README §「初步 API 邊界」與 FileAccessScopes.DownloadRead。 + scopeFAADownloadRead = "files:download.read" + + // faaDialTimeout 是 dial 階段的 timeout(連 TCP / TLS 握手)。 + // 連線一直建不起來通常是路由問題,10s 已足夠;超過視為 FAA 不可達。 + faaDialTimeout = 10 * time.Second + + // faaResponseHeaderTimeout 是「送完 request → 收到 response status 行」的 timeout。 + // 這段是 server-side 處理時間(FAA 找檔、auth validate);10s 對小檔 metadata 階段夠寬鬆。 + // 注意:這個 timeout **不涵蓋 body streaming 階段**(body streaming 由 ctx 控制)。 + faaResponseHeaderTimeout = 10 * time.Second + + // faaMaxRetries 是 Phase A 5xx / network / timeout 的最大重試次數(不含第一次)。 + // 對齊 conversion.md §9.1:FAA GET /files/{key} max 2 retries(1s, 2s)。 + faaMaxRetries = 2 + + // faaRetryBaseDelay 是指數退避的 base(1s, 2s)。 + faaRetryBaseDelay = 1 * time.Second + + // objectKeyHashLen 是 log 中 object_key 的截短後 hash 長度(前 16 hex chars)。 + objectKeyHashLen = 16 + + // faaErrorBodyReadCap 是失敗 response 從 body 讀進 io.Discard 的最大量(4KB)。 + // 失敗時讀少量 body 主要是讓 keep-alive 能 reuse connection,避免空 body 留在 pipe。 + faaErrorBodyReadCap = 4 * 1024 +) + +// faaEndpointKind 是 log / 錯誤分類用的 endpoint 標記(目前只有一個)。 +const faaEndpointKind = "faa_get_file" + +// ========================================================================== +// 構造 + 內部實作 +// ========================================================================== + +// faaClient 是 FAAClient 的預設實作。 +// +// 套件內 unexported struct(caller 拿 interface),讓未來換實作不影響 caller。 +type faaClient struct { + baseURL string + tokens MCTokenClient + http *http.Client + now func() time.Time + logger *slog.Logger +} + +// NewFAAClient 建立一個 FAAClient 實例。 +// +// 必填:BaseURL / Tokens。其他 optional。 +// 注意:constructor 不會驗 BaseURL 連線,第一次 GetFile 才會打網路。 +func NewFAAClient(opts FAAClientOpts) FAAClient { + httpClient := opts.HTTPClient + if httpClient == nil { + httpClient = newDefaultFAAHTTPClient() + } + now := opts.Now + if now == nil { + now = time.Now + } + logger := opts.Logger + if logger == nil { + logger = slog.Default() + } + return &faaClient{ + baseURL: strings.TrimRight(opts.BaseURL, "/"), + tokens: opts.Tokens, + http: httpClient, + now: now, + logger: logger, + } +} + +// newDefaultFAAHTTPClient 建一個適合 streaming download 的預設 http.Client。 +// +// 為什麼自訂 transport: +// - http.Client.Timeout 不適用大檔下載(會中斷 body streaming) +// - 需要分別控制 dial / response header timeout,body streaming 不限制(由 ctx 控) +// +// transport 其餘參數沿用 net/http DefaultTransport 的合理預設(MaxIdleConns 等)。 +func newDefaultFAAHTTPClient() *http.Client { + transport := &http.Transport{ + DialContext: (&net.Dialer{ + Timeout: faaDialTimeout, + KeepAlive: 30 * time.Second, + }).DialContext, + ResponseHeaderTimeout: faaResponseHeaderTimeout, + // 沿用 DefaultTransport 的合理預設 + MaxIdleConns: 100, + MaxIdleConnsPerHost: 10, + IdleConnTimeout: 90 * time.Second, + TLSHandshakeTimeout: 10 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + } + return &http.Client{ + Transport: transport, + // **不設 Timeout** — body streaming 階段由 ctx 控制 + } +} + +// ========================================================================== +// GetFile — Phase A retry,Phase B 不 retry 的 streaming pull +// ========================================================================== + +// GetFile 實作 FAAClient.GetFile。 +// +// 流程: +// 1. 取 service token(透過 MCTokenClient;其錯誤透傳,不重新分類) +// 2. 組 URL + 建 request +// 3. doWithRetry:max (1 + faaMaxRetries) attempts;每 attempt 重新 c.http.Do +// - 拿到 200:直接 return *FAAFile(不 close body) +// - 拿到 4xx:close body 後依 status mapping 對應 sentinel,不 retry +// - 拿到 5xx:close body,等 backoff 後 retry +// - network / dial / responseHeader timeout:等 backoff 後 retry +// - ctx cancel / deadline:立即 return ctx.Err() +func (c *faaClient) GetFile(ctx context.Context, objectKey string) (*FAAFile, error) { + if objectKey == "" { + return nil, fmt.Errorf("conversion/faa_client: object_key is required") + } + + keyHash := hashObjectKey(objectKey) + + // 1. 取 service token + // ServiceToken 內部已依 §6 mapping 失敗(ErrServiceClientUnauthorized / ErrIDPMisconfigured / + // ErrIDPUnavailable)— 這裡用 fmt.Errorf("%w") 透傳,不再二次包裝(避免錯誤碼被「升級」 + // 成 ErrFAAUnavailable 而失去原本的 i18n 區分 idp_misconfig vs idp_down)。 + token, err := c.tokens.ServiceToken(ctx, scopeFAADownloadRead) + if err != nil { + return nil, fmt.Errorf("conversion: get service token for faa download: %w", err) + } + + // 2. 組 endpoint。注意 FAA 的 object_key 可能含路徑分隔符(如 "tenant/jobs/abc/output.nef")— + // 用 ResolveReference 處理;net/http 內部會做 path escape,避免 "../" 等問題。 + endpoint, err := c.buildFileURL(objectKey) + if err != nil { + return nil, fmt.Errorf("%w: build faa url: %v", ErrFAAUnavailable, err) + } + + // 3. 進 retry loop(Phase A only) + return c.doWithRetry(ctx, keyHash, endpoint, token) +} + +// doWithRetry 是 GetFile 的 Phase A retry 執行器。 +// +// 與 mc_token_client.doWithRetry / converter_client.doWithRetry 結構類似,但有以下差異: +// - 成功路徑回傳 *FAAFile(含未 close 的 streaming body),不是 []byte +// - 沒有「每次 attempt 重新建 request」需求 — GET 沒 body,request 物件可重用, +// 但為了讓 ctx-aware 行為一致(ctx cancel 後不重用舊 request),這裡每次都新建一個 +// - reqBuilder 不接 token 參數 — token 在 GetFile 取一次,retry 期間沿用同一 token +// (retry window 短:max 1+2+3=6s,token 不會在這段期間過期) +// +// 為什麼 retry 期間不重新取 token: +// - 簡化:避免 token 取失敗 vs HTTP 失敗 兩種錯誤交織的處理 +// - 安全:401 在這層被分類為「不可 retry」,不會走到「token expired 中途要 refresh」場景 +// - 效能:cache hit 情境下成本低但仍多一次 mutex;6s window 內 token 不會 expire +func (c *faaClient) doWithRetry( + ctx context.Context, + keyHash, endpoint, token string, +) (*FAAFile, error) { + var lastErr error + for attempt := 0; attempt <= faaMaxRetries; attempt++ { + // retry 前等待退避;ctx cancel 立即中斷 + if attempt > 0 { + select { + case <-ctx.Done(): + // ctx cancel/deadline → 立即 return(不 retry,不包成 sentinel) + return nil, ctx.Err() + case <-time.After(faaRetryBackoff(attempt)): + } + } + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) + if err != nil { + // 建 request 失敗(極罕見:URL parse 異常)— 不可 retry + return nil, fmt.Errorf("%w: build faa request: %v", ErrFAAUnavailable, err) + } + req.Header.Set("Accept", "application/octet-stream") + req.Header.Set("Authorization", "Bearer "+token) + + file, classifiedErr, retryable := c.doOnce(req, keyHash, attempt) + if classifiedErr == nil { + // 成功 — file 含未 close 的 body,由 caller 接手 + return file, nil + } + lastErr = classifiedErr + if !retryable { + // 4xx / 401-403 / 404 / ctx cancel:直接 return,不再 retry + return nil, classifiedErr + } + // retryable 5xx / network / timeout:繼續下一輪 + } + // 用完 retry 額度 + c.logger.Warn("conversion.faa.retry_exhausted", + slog.String("endpoint", faaEndpointKind), + slog.String("object_key_hash", keyHash), + slog.Int("attempts", faaMaxRetries+1)) + return nil, lastErr +} + +// doOnce 執行一次 Phase A:發 request → 等 response header → 分類結果。 +// +// 回傳: +// - 成功(2xx):file != nil(含未 close 的 streaming body), classifiedErr=nil, retryable=false +// - 失敗:file=nil, classifiedErr 為 sentinel-wrapped error, retryable 表示是否該重試 +// +// 重要:成功時 caller(doWithRetry)會直接把 file 透傳出去 — 這層**不 close body**。 +// 失敗時這層**會 close body**(讀少量讓 keep-alive reuse connection)。 +func (c *faaClient) doOnce( + req *http.Request, + keyHash string, + attempt int, +) (file *FAAFile, err error, retryable bool) { + startedAt := c.now() + res, doErr := c.http.Do(req) + duration := c.now().Sub(startedAt) + if doErr != nil { + // network / dial / response header timeout / ctx cancel + if errors.Is(doErr, context.Canceled) || errors.Is(doErr, context.DeadlineExceeded) { + c.logger.Warn("conversion.faa.ctx_cancelled", + slog.String("endpoint", faaEndpointKind), + slog.String("object_key_hash", keyHash), + slog.Int("attempt", attempt+1), + slog.Duration("duration", duration)) + return nil, doErr, false + } + c.logger.Warn("conversion.faa.network_error", + slog.String("endpoint", faaEndpointKind), + slog.String("object_key_hash", keyHash), + slog.Int("attempt", attempt+1), + slog.Duration("duration", duration), + // err.Error() 不會含 secret(http.Client 錯誤訊息只有 URL + 連線層 errno), + // 但仍 truncate 防 log 爆量 + slog.String("err", truncate(doErr.Error(), 200))) + return nil, fmt.Errorf("%w: faa network error: %v", ErrFAAUnavailable, doErr), true + } + + // 成功(2xx):直接把 res.Body 透傳給 caller streaming 消費 — **不在這裡 close**! + // 注意:成功路徑沒 defer res.Body.Close() — body 的所有權交給 *FAAFile.Body。 + if res.StatusCode >= 200 && res.StatusCode < 300 { + c.logger.Info("conversion.faa.get_success", + slog.String("endpoint", faaEndpointKind), + slog.String("object_key_hash", keyHash), + slog.Int("status", res.StatusCode), + slog.Int("attempt", attempt+1), + slog.Int64("content_length", res.ContentLength), + slog.Duration("duration", duration)) + return &FAAFile{ + Body: res.Body, // caller 責任 Close + ContentLength: res.ContentLength, + ContentType: res.Header.Get("Content-Type"), + ETag: res.Header.Get("ETag"), + }, nil, false + } + + // 失敗(非 2xx):讀少量 body 做 log(避免 5xx 帶大 body 爆 log),然後 close + // 讀進 io.Discard 而不是真的存下來: + // - 不寫進 log(FAA 錯誤 body 可能含 requestId / 路徑等內部資訊) + // - 只是讓 keep-alive 能 reuse connection(read-to-EOF or close) + defer res.Body.Close() + _, _ = io.CopyN(io.Discard, res.Body, faaErrorBodyReadCap) + + c.logger.Warn("conversion.faa.endpoint_error", + slog.String("endpoint", faaEndpointKind), + slog.String("object_key_hash", keyHash), + slog.Int("status", res.StatusCode), + slog.Int("attempt", attempt+1), + slog.Duration("duration", duration)) + + mappedErr, isRetryable := c.mapGetFileError(res.StatusCode) + return nil, mappedErr, isRetryable +} + +// mapGetFileError 把 FAA `GET /files/{key}` 的非 2xx 對應到 sentinel + 是否 retryable。 +// +// 對齊 FAA Program.cs MapGet("/files/{**objectKey}") 的失敗回應: +// - 401 invalid_token / validation_unavailable → ErrServiceClientUnauthorized(不 retry — secret 設定錯) +// - 403 tenant_mismatch / object_key_mismatch / method_mismatch → ErrServiceClientUnauthorized(不 retry) +// - 404 file_not_found → ErrFAAFileNotFound(不 retry — object 不存在) +// - 400 invalid_object_key → ErrFAAUnavailable(不 retry — visionA 端 object_key 命名 bug) +// - 其他 4xx → ErrFAAUnavailable(不 retry) +// - 5xx → ErrFAAUnavailable(**可 retry**:FAA / 下游 storage 暫時失常) +func (c *faaClient) mapGetFileError(status int) (err error, retryable bool) { + switch { + case status == http.StatusUnauthorized || status == http.StatusForbidden: + return fmt.Errorf("%w: faa get file %d", ErrServiceClientUnauthorized, status), false + case status == http.StatusNotFound: + return fmt.Errorf("%w: faa get file %d", ErrFAAFileNotFound, status), false + case status >= 400 && status < 500: + // 400 / 其他 4xx:不可 retry + return fmt.Errorf("%w: faa get file %d", ErrFAAUnavailable, status), false + default: + // 5xx:可 retry + return fmt.Errorf("%w: faa get file %d", ErrFAAUnavailable, status), true + } +} + +// faaRetryBackoff 回傳第 n 次 retry(n 從 1 開始)的等待時間。 +// 1 → 1s, 2 → 2s(對齊 conversion.md §9.1) +// +// 不加 jitter — Phase 0.8 同時打 FAA 的 caller 數量有限(promote-to-models 流程是 +// 序列式 per-job 觸發),併發競爭機率低;jitter 的邊際效益低。 +func faaRetryBackoff(attempt int) time.Duration { + if attempt < 1 { + return faaRetryBaseDelay + } + return faaRetryBaseDelay * time.Duration(attempt) +} + +// buildFileURL 用 url.Parse + ResolveReference 組 GET /files/{objectKey} 的完整 URL。 +// +// 為什麼用 ResolveReference 而不是 string concat: +// - object_key 可能含路徑分隔符("tenant/jobs/abc/output.nef") +// - 直接 concat 容易踩 trailing-slash / encoding 雷 +// - net/url 會做必要的 percent-escape(保留 '/' 為 path separator) +func (c *faaClient) buildFileURL(objectKey string) (string, error) { + base, err := url.Parse(c.baseURL) + if err != nil { + return "", fmt.Errorf("parse base url: %w", err) + } + // 用 url.URL{Path: ...} 避免手動 escape;net/url 會處理 path encoding。 + // 注意:base.Path 可能為空或結尾帶 "/",ResolveReference 會處理。 + ref := &url.URL{Path: "/files/" + objectKey} + return base.ResolveReference(ref).String(), nil +} + +// hashObjectKey 把 object_key 算 SHA-256 後取前 16 hex chars,當 log 用的穩定 hash。 +// +// 為什麼不直接 log object_key: +// - object_key 可能含路徑("tenant/jobs/uuid/output.nef")— 過長 +// - 目前 visionA 的 object_key 不直接含 user 敏感資訊,但保險起見統一 hash +// - 16 chars hex(64-bit)對 visionA 內部 job 數量來說碰撞機率極低,足以追蹤單一 request +func hashObjectKey(objectKey string) string { + sum := sha256.Sum256([]byte(objectKey)) + return hex.EncodeToString(sum[:])[:objectKeyHashLen] +} diff --git a/visionA-backend/internal/conversion/faa_client_test.go b/visionA-backend/internal/conversion/faa_client_test.go new file mode 100644 index 0000000..08140dc --- /dev/null +++ b/visionA-backend/internal/conversion/faa_client_test.go @@ -0,0 +1,622 @@ +// FAA Client 單元測試。 +// +// 測試策略: +// - 用 httptest.Server mock FAA 的 GET /files/{key} 端點 +// - 用 stub MCTokenClient(直接回 token / 注入錯誤),不耦合真實 mc_token_client 邏輯 +// - 用 atomic counter 驗 retry 行為(Phase A retry:max 3 attempts = 1 + 2 retries) +// - streaming 驗證用較大但合理大小(10MB)— 真 100MB 會拖慢 test runner 太多 +// +// 測試範疇對應 conversion.md §9.1(FAA GET /files retry max 2 次, 1s/2s): +// - GetFile_Success / GetFile_Streaming / GetFile_AuthHeader +// - GetFile_404_NoRetry / GetFile_401_Unauthorized / GetFile_403_Unauthorized +// - GetFile_5xx_RetryThenSuccess / GetFile_5xx_Exhausted +// - GetFile_Network_RetryThenSuccess / GetFile_Network_Exhausted +// - GetFile_ContextCancel / GetFile_ContextCancel_DuringRetry +// - GetFile_ServiceTokenFailure_Propagated / GetFile_EmptyObjectKey +// - GetFile_400_GenericError / HashObjectKey_StableAndLength +// +// Phase 0.8 conversion (見 .autoflow/04-architecture/conversion.md §2.3 + §9.1) +package conversion + +import ( + "context" + "errors" + "io" + "net" + "net/http" + "net/http/httptest" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// ========================================================================== +// FAA mock server helpers +// ========================================================================== + +// newFAAClientForTest 建立指向 mock server 的 FAAClient(使用快速 retry backoff 加速 test)。 +// +// 注意:這個 helper 用較短 backoff(10ms 起跳)讓 retry test 不會跑很久。 +// 真實 production 走 §9.1 的 1s/2s(在 NewFAAClient 預設)。 +func newFAAClientForTest(t *testing.T, baseURL string, tokens MCTokenClient) FAAClient { + t.Helper() + return NewFAAClient(FAAClientOpts{ + BaseURL: baseURL, + Tokens: tokens, + // 用一個簡單的 http.Client;httptest.Server.Client 也可以但這樣更貼近真實情境, + // 用較短 timeout 加速 test。注意 streaming test 不能用整體 Timeout,所以另外覆寫。 + HTTPClient: &http.Client{Timeout: 5 * time.Second}, + Logger: silentLogger(), + }) +} + +// ========================================================================== +// 成功路徑 +// ========================================================================== + +// TestGetFile_Success:mock 回 200 + binary stream,驗 ContentLength / ETag / ContentType 解析。 +func TestGetFile_Success(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + payload := []byte("binary payload here") + var receivedAuth string + + mux := http.NewServeMux() + mux.HandleFunc("/files/", func(w http.ResponseWriter, r *http.Request) { + receivedAuth = r.Header.Get("Authorization") + require.Equal(t, http.MethodGet, r.Method) + w.Header().Set("Content-Type", "application/octet-stream") + w.Header().Set("ETag", "\"etag-abc-123\"") + w.Header().Set("Content-Length", "19") + w.WriteHeader(http.StatusOK) + _, _ = w.Write(payload) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + fc := newFAAClientForTest(t, srv.URL, tokens) + file, err := fc.GetFile(context.Background(), "tenant/jobs/abc/output.nef") + + require.NoError(t, err) + require.NotNil(t, file) + require.NotNil(t, file.Body) + t.Cleanup(func() { _ = file.Body.Close() }) + + assert.Equal(t, "application/octet-stream", file.ContentType) + assert.Equal(t, "\"etag-abc-123\"", file.ETag) + assert.Equal(t, int64(19), file.ContentLength) + + // caller 確實能 streaming 讀到完整 body + body, readErr := io.ReadAll(file.Body) + require.NoError(t, readErr) + assert.Equal(t, payload, body) + + assert.Equal(t, "Bearer svc-tok", receivedAuth, "Bearer service token 必須透傳") + assert.Equal(t, 1, tokens.calls(scopeFAADownloadRead)) +} + +// TestGetFile_Streaming:mock 回 10MB body,confirm caller 能 streaming 讀(不 buffer 全 RAM)。 +// +// 與 InitJob streaming test 對稱:用 io.LimitReader + zerosReader,確認 reader 被多次 Read +// (而非一次性全讀)。但 net/http 端 download 的 streaming 由 res.Body 提供,這裡的關鍵是: +// - faa_client 必須**不 io.ReadAll** 把 body 提前讀完 +// - caller 用 io.Copy 慢慢讀時,server 端不需要先把全部 buffer 完成 +func TestGetFile_Streaming(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + const totalSize = int64(10 * 1024 * 1024) // 10MB + + mux := http.NewServeMux() + mux.HandleFunc("/files/", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/octet-stream") + w.Header().Set("Content-Length", "10485760") + w.WriteHeader(http.StatusOK) + // streaming write — 用 io.Copy from zerosReader(避免一次配 10MB buffer) + _, _ = io.CopyN(w, zerosReader{}, totalSize) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + // streaming download 不能用 http.Client.Timeout(會中斷 body streaming) + fc := NewFAAClient(FAAClientOpts{ + BaseURL: srv.URL, + Tokens: tokens, + // 這裡用無 timeout 的 client(test 自己控) + HTTPClient: &http.Client{}, + Logger: silentLogger(), + }) + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + file, err := fc.GetFile(ctx, "big.nef") + + require.NoError(t, err) + require.NotNil(t, file) + t.Cleanup(func() { _ = file.Body.Close() }) + + assert.Equal(t, totalSize, file.ContentLength) + + // 用 countingReader 包 file.Body — 但 countingReader 是 io.Reader, + // 這裡換成 wrap 一下:直接 io.Copy 到 io.Discard,confirm 全 download 完成。 + written, copyErr := io.Copy(io.Discard, file.Body) + require.NoError(t, copyErr) + assert.Equal(t, totalSize, written, "streaming download 必須拿到完整 body") +} + +// TestGetFile_AuthHeader:驗 Bearer token 透傳,且取 token scope 為 files:download.read。 +func TestGetFile_AuthHeader(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("specific-token-xyz") + var receivedAuth string + var receivedAccept string + + mux := http.NewServeMux() + mux.HandleFunc("/files/", func(w http.ResponseWriter, r *http.Request) { + receivedAuth = r.Header.Get("Authorization") + receivedAccept = r.Header.Get("Accept") + w.Header().Set("Content-Type", "application/octet-stream") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("ok")) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + fc := newFAAClientForTest(t, srv.URL, tokens) + file, err := fc.GetFile(context.Background(), "key") + require.NoError(t, err) + defer file.Body.Close() + _, _ = io.ReadAll(file.Body) + + assert.Equal(t, "Bearer specific-token-xyz", receivedAuth) + assert.Equal(t, "application/octet-stream", receivedAccept) + assert.Equal(t, 1, tokens.calls(scopeFAADownloadRead), + "必須用 files:download.read scope 取 service token") +} + +// ========================================================================== +// 失敗映射(不 retry 類) +// ========================================================================== + +// TestGetFile_404_NoRetry:mock 回 404 → 立即 return ErrFAAFileNotFound,不 retry。 +func TestGetFile_404_NoRetry(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + var attempts atomic.Int32 + mux := http.NewServeMux() + mux.HandleFunc("/files/", func(w http.ResponseWriter, r *http.Request) { + attempts.Add(1) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusNotFound) + _, _ = w.Write([]byte(`{"error":{"code":"file_not_found","message":"File not found."}}`)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + fc := newFAAClientForTest(t, srv.URL, tokens) + file, err := fc.GetFile(context.Background(), "missing.nef") + + require.Error(t, err) + require.Nil(t, file, "失敗時不應回 FAAFile(避免 caller 誤用 nil body)") + assert.True(t, errors.Is(err, ErrFAAFileNotFound), + "404 → ErrFAAFileNotFound(caller 可精細處理)") + assert.Equal(t, int32(1), attempts.Load(), + "404 不應 retry(object 不存在 retry 也沒用)") + // 對外仍應 mask 成 faa_unavailable(避免揭露 object_key 不存在) + assert.Equal(t, "faa_unavailable", ErrorCode(err)) + assert.Equal(t, 502, HTTPStatus(err)) +} + +// TestGetFile_401_Unauthorized:mock 回 401 → 不 retry,return ErrServiceClientUnauthorized。 +func TestGetFile_401_Unauthorized(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + var attempts atomic.Int32 + mux := http.NewServeMux() + mux.HandleFunc("/files/", func(w http.ResponseWriter, r *http.Request) { + attempts.Add(1) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusUnauthorized) + _, _ = w.Write([]byte(`{"error":{"code":"invalid_token"}}`)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + fc := newFAAClientForTest(t, srv.URL, tokens) + file, err := fc.GetFile(context.Background(), "k") + + require.Error(t, err) + require.Nil(t, file) + assert.True(t, errors.Is(err, ErrServiceClientUnauthorized), + "401 → ErrServiceClientUnauthorized(client 認證設定錯)") + assert.Equal(t, int32(1), attempts.Load(), + "401 不應 retry(secret 設定錯,retry 也是 401)") +} + +// TestGetFile_403_Unauthorized:FAA 端 tenant_mismatch / object_key_mismatch 等 403 都同類處理。 +func TestGetFile_403_Unauthorized(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + var attempts atomic.Int32 + mux := http.NewServeMux() + mux.HandleFunc("/files/", func(w http.ResponseWriter, r *http.Request) { + attempts.Add(1) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusForbidden) + _, _ = w.Write([]byte(`{"error":{"code":"tenant_mismatch"}}`)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + fc := newFAAClientForTest(t, srv.URL, tokens) + _, err := fc.GetFile(context.Background(), "k") + + require.Error(t, err) + assert.True(t, errors.Is(err, ErrServiceClientUnauthorized)) + assert.Equal(t, int32(1), attempts.Load(), "403 不應 retry") +} + +// TestGetFile_400_GenericError:FAA 400(如 invalid_object_key)→ ErrFAAUnavailable,不 retry。 +func TestGetFile_400_GenericError(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + var attempts atomic.Int32 + mux := http.NewServeMux() + mux.HandleFunc("/files/", func(w http.ResponseWriter, r *http.Request) { + attempts.Add(1) + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error":{"code":"invalid_object_key"}}`)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + fc := newFAAClientForTest(t, srv.URL, tokens) + _, err := fc.GetFile(context.Background(), "invalid//key") + + require.Error(t, err) + assert.True(t, errors.Is(err, ErrFAAUnavailable), + "400(非 401/403/404)→ ErrFAAUnavailable") + // 應該不會被 mis-classified 成 ErrFAAFileNotFound + assert.False(t, errors.Is(err, ErrFAAFileNotFound)) + assert.Equal(t, int32(1), attempts.Load(), "400 不應 retry(visionA 端的 bug)") +} + +// ========================================================================== +// Phase A retry 驗證(5xx / network) +// ========================================================================== + +// TestGetFile_5xx_RetryThenSuccess:mock 連續 500 兩次後回 200 → 共 3 次 attempt + 成功。 +// +// 對齊 §9.1:max 2 retries(1s, 2s)— 1 + 2 = 3 attempts;第 3 次成功就 return。 +// 注意:test 用真實 backoff(1s + 2s = 3s)— 為了驗 §9.1 退避時序,可接受。 +func TestGetFile_5xx_RetryThenSuccess(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + var attempts atomic.Int32 + payload := []byte("recovered after retry") + + mux := http.NewServeMux() + mux.HandleFunc("/files/", func(w http.ResponseWriter, r *http.Request) { + n := attempts.Add(1) + if n < 3 { + w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte(`{"error":{"code":"internal_error"}}`)) + return + } + w.Header().Set("Content-Type", "application/octet-stream") + w.Header().Set("Content-Length", "21") + w.WriteHeader(http.StatusOK) + _, _ = w.Write(payload) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + fc := newFAAClientForTest(t, srv.URL, tokens) + + start := time.Now() + file, err := fc.GetFile(context.Background(), "k") + duration := time.Since(start) + + require.NoError(t, err) + require.NotNil(t, file) + t.Cleanup(func() { _ = file.Body.Close() }) + + got, _ := io.ReadAll(file.Body) + assert.Equal(t, payload, got, "第 3 次成功的 body 應正確透傳") + assert.Equal(t, int32(3), attempts.Load(), + "5xx 應 retry:max 2 retries → 3 attempts") + // 驗時序:兩次 retry 退避 1s + 2s,至少花 3s(容忍輕微誤差用 ≥2.5s) + assert.GreaterOrEqual(t, duration, 2500*time.Millisecond, + "§9.1 退避序列 1s + 2s 應至少耗 2.5s") +} + +// TestGetFile_5xx_Exhausted:mock 持續 500 → 用完 max retry 後 return ErrFAAUnavailable。 +func TestGetFile_5xx_Exhausted(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + var attempts atomic.Int32 + mux := http.NewServeMux() + mux.HandleFunc("/files/", func(w http.ResponseWriter, r *http.Request) { + attempts.Add(1) + w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte(`{"error":{"code":"internal_error"}}`)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + fc := newFAAClientForTest(t, srv.URL, tokens) + _, err := fc.GetFile(context.Background(), "k") + + require.Error(t, err) + assert.True(t, errors.Is(err, ErrFAAUnavailable), + "5xx exhausted → ErrFAAUnavailable") + assert.Equal(t, int32(faaMaxRetries+1), attempts.Load(), + "5xx 應跑滿 max retries:1 + 2 = 3 attempts") +} + +// TestGetFile_Network_RetryThenSuccess:前 2 次 connection refused,第 3 次成功。 +// +// 用 dynamic listener swap 實作:先用一個 free port 不開 listener(dial fail), +// 第 3 次 attempt 之前才 swap 到真的 mock server。實作上比較複雜 — 改用 +// proxy handler 在 mock server 內部對前 N 次「立刻 hijack 後 close」模擬 dial fail +// 不行(連線已建好);改用「server 端 force-close connection 不送任何 byte」 +// 來模擬 transport 層失敗。 +// +// 簡化版:用一個 proxy server,前 2 次直接 hijack + close 連線(client 看到 EOF), +// 第 3 次正常回 200。 +func TestGetFile_Network_RetryThenSuccess(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + var attempts atomic.Int32 + payload := []byte("recovered from net error") + + mux := http.NewServeMux() + mux.HandleFunc("/files/", func(w http.ResponseWriter, r *http.Request) { + n := attempts.Add(1) + if n < 3 { + // hijack + close 模擬 connection 中斷(client 端會看到 unexpected EOF / read error) + hj, ok := w.(http.Hijacker) + if !ok { + t.Fatal("server does not support hijacking") + } + conn, _, err := hj.Hijack() + if err != nil { + t.Fatalf("hijack failed: %v", err) + } + _ = conn.Close() + return + } + w.Header().Set("Content-Type", "application/octet-stream") + w.WriteHeader(http.StatusOK) + _, _ = w.Write(payload) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + fc := newFAAClientForTest(t, srv.URL, tokens) + file, err := fc.GetFile(context.Background(), "k") + + require.NoError(t, err) + require.NotNil(t, file) + t.Cleanup(func() { _ = file.Body.Close() }) + + got, _ := io.ReadAll(file.Body) + assert.Equal(t, payload, got) + assert.Equal(t, int32(3), attempts.Load(), + "network error 應 retry:max 2 retries → 3 attempts 後成功") +} + +// TestGetFile_Network_Exhausted:dial 失敗持續發生 → 用完 max retry 後 ErrFAAUnavailable。 +// +// 用一個 listen 後立刻 close 的 socket 製造 connection refused(每次 attempt 都失敗)。 +func TestGetFile_Network_Exhausted(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + // 拿一個 free port 立刻關掉(dial 必失敗) + ln, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + addr := ln.Addr().String() + require.NoError(t, ln.Close()) + + fc := NewFAAClient(FAAClientOpts{ + BaseURL: "http://" + addr, + Tokens: tokens, + // 用較短 timeout,但仍要大於 retry 退避總和(1s + 2s = 3s)— 設 10s 安全 + HTTPClient: &http.Client{Timeout: 10 * time.Second}, + Logger: silentLogger(), + }) + + start := time.Now() + _, err = fc.GetFile(context.Background(), "k") + duration := time.Since(start) + + require.Error(t, err) + assert.True(t, errors.Is(err, ErrFAAUnavailable), + "network exhausted → ErrFAAUnavailable") + // retry:1 + 2 retries = 3 attempts,2 次退避 = 1s + 2s = 3s 起跳 + assert.GreaterOrEqual(t, duration, 2500*time.Millisecond, + "network retry 應走完 §9.1 退避序列") +} + +// ========================================================================== +// Context cancel +// ========================================================================== + +// TestGetFile_ContextCancel:caller cancel ctx → 立即 return ctx.Err()(不包成 sentinel)。 +func TestGetFile_ContextCancel(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + + // mock server:handler 故意 sleep(讓 ctx cancel 在 server response 前發生) + mux := http.NewServeMux() + mux.HandleFunc("/files/", func(w http.ResponseWriter, r *http.Request) { + select { + case <-r.Context().Done(): + case <-time.After(2 * time.Second): + } + w.WriteHeader(http.StatusOK) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + fc := newFAAClientForTest(t, srv.URL, tokens) + ctx, cancel := context.WithCancel(context.Background()) + go func() { + time.Sleep(50 * time.Millisecond) + cancel() + }() + + _, err := fc.GetFile(ctx, "k") + require.Error(t, err) + // ctx cancel → 透傳 ctx.Err()(不包成 ErrFAAUnavailable) + assert.True(t, + errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded), + "ctx cancel 應透傳,不應包成 ErrFAAUnavailable") +} + +// TestGetFile_ContextCancel_DuringRetry:ctx cancel 發生在 retry sleep 中 → 立即中斷。 +// +// 流程: +// - mock server 持續 500(觸發 retry) +// - 在第 1 次 retry 退避(1s)的中間(500ms)cancel ctx +// - 期望:GetFile 立即 return ctx.Err(),不等完 1s 退避也不繼續第 2 次 retry +func TestGetFile_ContextCancel_DuringRetry(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + var attempts atomic.Int32 + mux := http.NewServeMux() + mux.HandleFunc("/files/", func(w http.ResponseWriter, r *http.Request) { + attempts.Add(1) + w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte(`{"error":{"code":"internal_error"}}`)) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + fc := newFAAClientForTest(t, srv.URL, tokens) + + ctx, cancel := context.WithCancel(context.Background()) + go func() { + // 等第 1 次 attempt 跑完 + 進 retry sleep 後再 cancel + // 第 1 次 attempt 約 < 100ms;第 1 次 retry 退避 1s,在 500ms cancel + time.Sleep(500 * time.Millisecond) + cancel() + }() + + start := time.Now() + _, err := fc.GetFile(ctx, "k") + duration := time.Since(start) + + require.Error(t, err) + assert.True(t, + errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded), + "retry sleep 中 cancel → 透傳 ctx.Err()") + // 應在 cancel 後立即中斷(< 1s 整體時間)— 不該等完 1s 退避或進入第 2 次 retry + assert.Less(t, duration, 900*time.Millisecond, + "ctx cancel 應立即中斷 retry sleep(不等完退避)") + // attempts 應為 1(第 1 次 attempt 後進 retry sleep 就被 cancel) + assert.Equal(t, int32(1), attempts.Load(), + "cancel 後不應再嘗試第 2 次 attempt") +} + +// ========================================================================== +// Token 失敗透傳 +// ========================================================================== + +// TestGetFile_ServiceTokenFailure_Propagated:MCTokenClient 失敗 → 透傳原 sentinel。 +// +// 對應 mc_token_client.go 的 ErrIDPMisconfigured / ErrServiceClientUnauthorized / ErrIDPUnavailable, +// 不應被 faa_client 升級成 ErrFAAUnavailable(會丟失 i18n 區分 idp_misconfig vs idp_down vs faa_down)。 +func TestGetFile_ServiceTokenFailure_Propagated(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + tokenErr error + }{ + {"idp_misconfigured", ErrIDPMisconfigured}, + {"service_client_unauthorized", ErrServiceClientUnauthorized}, + {"idp_unavailable", ErrIDPUnavailable}, + } + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + tokens := newStubTokenClient("") + tokens.setError(tc.tokenErr) + + // server 不應被打(token 取不到就 fail) + var serverHit atomic.Int32 + mux := http.NewServeMux() + mux.HandleFunc("/files/", func(w http.ResponseWriter, r *http.Request) { + serverHit.Add(1) + w.WriteHeader(http.StatusOK) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + fc := newFAAClientForTest(t, srv.URL, tokens) + _, err := fc.GetFile(context.Background(), "k") + + require.Error(t, err) + assert.True(t, errors.Is(err, tc.tokenErr), + "token 錯誤應透傳;不應包成 ErrFAAUnavailable") + assert.Equal(t, int32(0), serverHit.Load(), + "token 取不到時不應打 FAA") + }) + } +} + +// ========================================================================== +// 額外:empty object_key validation +// ========================================================================== + +// TestGetFile_EmptyObjectKey:保護性 validation — 空字串 object_key 應立即 fail。 +func TestGetFile_EmptyObjectKey(t *testing.T) { + t.Parallel() + + tokens := newStubTokenClient("svc-tok") + fc := NewFAAClient(FAAClientOpts{ + BaseURL: "http://invalid", + Tokens: tokens, + Logger: silentLogger(), + }) + + _, err := fc.GetFile(context.Background(), "") + require.Error(t, err) + // 不需走網路就應該 fail(token 沒被呼叫) + assert.Equal(t, 0, tokens.calls(scopeFAADownloadRead), + "empty object_key 應立即 fail,不該打 token endpoint") +} + +// ========================================================================== +// hashObjectKey unit test(log 用 hash 函式的穩定性) +// ========================================================================== + +// TestHashObjectKey_StableAndLength:同 input 應產生同 output;長度固定 16。 +func TestHashObjectKey_StableAndLength(t *testing.T) { + t.Parallel() + h1 := hashObjectKey("tenant/jobs/abc/output.nef") + h2 := hashObjectKey("tenant/jobs/abc/output.nef") + h3 := hashObjectKey("tenant/jobs/xyz/output.nef") + + assert.Equal(t, h1, h2, "同 object_key 應產生同 hash(log 可追蹤同一 request)") + assert.NotEqual(t, h1, h3, "不同 object_key hash 應不同") + assert.Len(t, h1, objectKeyHashLen, "hash 長度固定") +} diff --git a/visionA-backend/internal/conversion/flow.go b/visionA-backend/internal/conversion/flow.go new file mode 100644 index 0000000..4ed9784 --- /dev/null +++ b/visionA-backend/internal/conversion/flow.go @@ -0,0 +1,940 @@ +// Flow — Service interface 的具體實作(T6 整合層)。 +// +// 整合 T2 (mc_token_client) / T3 (converter_client) / T4 (faa_client) / T5 (ownership) +// 成為對 handler 暴露的單一 Service。對齊: +// - .autoflow/04-architecture/conversion.md §2.7 整體流程協調 + §4.3.1/§4.3.2 +// - .autoflow/04-architecture/api/api-conversion.md(5 個 endpoint 規格) +// - .autoflow/04-architecture/adr/adr-014-conversion-integration.md +// +// 設計原則: +// - flow 不直接 import internal/model / internal/storage, +// 改用 narrow interface(ModelStore / Storage)— 避免 import cycle, +// 讓 main.go 在 wire 時做 adapter,符合 Go 慣例(accept interfaces, return structs) +// - 所有 method 第一步都做 ownership 檢查(trust boundary,§7.2) +// - 多次 promote 冪等:以 modelStore 已有對應 source_job_id 為「已處理」 +// 的 source-of-truth,避免重複 promote / 重複建 model record +// +// Phase 0.8 conversion (見 .autoflow/04-architecture/conversion.md §2.7) +package conversion + +import ( + "context" + "crypto/rand" + "encoding/hex" + "errors" + "fmt" + "io" + "log/slog" + "mime" + "mime/multipart" + "net/url" + "path" + "strings" + "time" +) + +// ========================================================================== +// Narrow interfaces(避免 import cycle;caller 在 main.go 做 adapter) +// ========================================================================== + +// ModelStore 是 flow 對 internal/model.Repository 的最小依賴子集。 +// +// 設計選擇(FAANG 慣例):consumer 定義介面,不直接 import internal/model; +// main.go 在 wire 時把 *model.InMemoryRepository(或未來的 PostgresRepository) +// 包成 adapter 傳進來。這樣: +// - flow_test.go 可以用 in-package stub 測試,不必拉 model package +// - 未來 model.Repository 介面再擴充也不影響 flow(除非 flow 真的要用新 method) +// - 不引入 import cycle(model 不需 import conversion) +// +// 具體 method 對應 internal/model.Repository: +// - Save: model.Repository.Save +// - FindBySourceJobID: 既有 List + filter SourceJobID(adapter 在 main.go 寫) +// - GenerateID: 由 adapter 注入(model_id 命名邏輯沿用既有專案規則) +type ModelStore interface { + // Save 新增或更新一筆 Model 紀錄。對齊 model.Repository.Save semantics。 + Save(ctx context.Context, m *ModelRecord) error + + // FindBySourceJobID 查找該 user 是否已有對應某 conversion job 的 model record。 + // 用於 PromoteToModels 冪等檢查:同 jobID 重複 promote 直接回既有 model。 + // + // 找不到回 (nil, nil);找到回 (*ModelRecord, nil);其他錯誤回 err。 + FindBySourceJobID(ctx context.Context, ownerUserID, sourceJobID string) (*ModelRecord, error) + + // GenerateID 產一個新的 model_id(對齊既有 model package 的命名)。 + GenerateID() string +} + +// ModelRecord 是 flow 與 ModelStore 之間的 DTO,避免 flow 直接 import internal/model。 +// +// adapter(在 main.go)負責 ModelRecord ↔ model.Model 的轉換。 +// +// 欄位對齊 internal/model.Model 的子集(Phase 0.8 promote-to-models 寫入需要的)。 +type ModelRecord struct { + ID string + OwnerUserID string + Name string + Description string + StorageKey string + FileSize int64 + FileChecksum string + TargetChip string + Source string // 永遠 "converted" + SourceJobID string + CreatedAt time.Time + UpdatedAt time.Time +} + +// Storage 是 flow 對 internal/storage.Store 的最小依賴子集。 +// +// Phase 0.8 promote-to-models 流程只需要 Put(streaming 寫進 storage); +// 其他 method(Get / List / Presigned)由 internal/api/models.go 既有 handler 處理。 +type Storage interface { + // Put streaming 寫一個 object。實作對齊 internal/storage.Store.Put: + // - r 為 streaming reader,實作不應 ReadAll 進記憶體 + // - size 為預期大小(bytes);若未知傳 -1 + // - meta 可為 nil + Put(ctx context.Context, key string, r io.Reader, size int64, meta map[string]string) error +} + +// ========================================================================== +// Service 實作 +// ========================================================================== + +// flow 是 Service interface 的預設實作(不對外 export,caller 拿 interface)。 +type flow struct { + converter ConverterClient + faa FAAClient + mcToken MCTokenClient + ownership Ownership + + modelStore ModelStore + storage Storage + + tenantID string + faaBaseURL string + defaultJobExpiryDuration time.Duration + delegatedTTLSeconds int + + logger *slog.Logger + now func() time.Time +} + +// FlowOpts 是 NewService 的依賴注入。 +// +// 必填:Converter / FAA / MCToken / Ownership / ModelStore / Storage / TenantID / FAABaseURL。 +// 其他 optional(nil/0 自動填合理預設)。 +type FlowOpts struct { + // 4 個 client(T2-T5) + Converter ConverterClient + FAA FAAClient + MCToken MCTokenClient + Ownership Ownership + + // 既有 visionA 套件的 narrow adapter + ModelStore ModelStore + Storage Storage + + // MC delegated download 用的 tenant id(visionA 在 MC 的 tenant 識別) + TenantID string + + // FAA base URL;組 download URL 用(http://192.168.0.130:5081 等)。 + // 不帶結尾斜線,constructor 自動 trim。 + FAABaseURL string + + // converter 沒回 expires_at 時自行推算的 fallback duration(預設 7 天)。 + DefaultJobExpiryDuration time.Duration + + // MC delegated download token TTL(秒)。0 → 預設 300(5 分鐘)。 + // 對齊 conversion.md §10.2,建議範圍 60-900。 + DelegatedTTLSeconds int + + Logger *slog.Logger + Now func() time.Time +} + +// NewService 建立一個 Service 實例。 +// +// 回傳 interface 而非 concrete struct(DI 友善 + 未來實作替換不影響 caller)。 +func NewService(opts FlowOpts) (Service, error) { + if opts.Converter == nil { + return nil, errors.New("conversion: FlowOpts.Converter is required") + } + if opts.FAA == nil { + return nil, errors.New("conversion: FlowOpts.FAA is required") + } + if opts.MCToken == nil { + return nil, errors.New("conversion: FlowOpts.MCToken is required") + } + if opts.Ownership == nil { + return nil, errors.New("conversion: FlowOpts.Ownership is required") + } + if opts.ModelStore == nil { + return nil, errors.New("conversion: FlowOpts.ModelStore is required") + } + if opts.Storage == nil { + return nil, errors.New("conversion: FlowOpts.Storage is required") + } + if opts.TenantID == "" { + return nil, errors.New("conversion: FlowOpts.TenantID is required") + } + if opts.FAABaseURL == "" { + return nil, errors.New("conversion: FlowOpts.FAABaseURL is required") + } + + expiry := opts.DefaultJobExpiryDuration + if expiry <= 0 { + expiry = 7 * 24 * time.Hour // 對齊 converter 7 天 GC(§2.6.2) + } + ttl := opts.DelegatedTTLSeconds + if ttl <= 0 { + ttl = 300 + } + logger := opts.Logger + if logger == nil { + logger = slog.Default() + } + nowFn := opts.Now + if nowFn == nil { + nowFn = time.Now + } + + return &flow{ + converter: opts.Converter, + faa: opts.FAA, + mcToken: opts.MCToken, + ownership: opts.Ownership, + modelStore: opts.ModelStore, + storage: opts.Storage, + tenantID: opts.TenantID, + faaBaseURL: strings.TrimRight(opts.FAABaseURL, "/"), + defaultJobExpiryDuration: expiry, + delegatedTTLSeconds: ttl, + logger: logger, + now: nowFn, + }, nil +} + +// 編譯時檢查:確保 *flow 實作 Service interface。 +var _ Service = (*flow)(nil) + +// ========================================================================== +// InitJob — 對應 POST /api/conversion/init +// ========================================================================== + +// InitJob 對齊 conversion.md §4.2 streaming proxy + §2.7 整體流程。 +// +// 實作流程: +// 1. ownership.EnsureRebuilt(避免 cache 殘留 / 重啟後該 user 第一次進) +// 2. 同 user active job pre-check:有 → 回 *ActiveJobError 帶 active job 細節 +// 3. 用 io.Pipe + multipart.Reader/Writer 重組 multipart body +// - 黑名單 client 帶來的 user_id field(§4.2 / §7.3) +// - 注入 visionA-backend 從 OIDC sub 取得的 UserID +// 4. converter.InitJob 同步等到 201(不 early-return;對齊 §4.3.1 選項 A) +// 5. 寫 ownership.Set(jobID, userID) +// 6. 失敗時的 cleanup 行為(§4.3.2): +// - converter Phase 1 **沒有實作** `POST /api/v1/jobs/{id}/cancel` endpoint +// (已驗證:apps/task-scheduler 的 routes/v1/jobs.js 只有 POST '/'、GET '/'、 +// GET '/:id'、POST '/:id/download-tokens'、DELETE '/:id')。 +// - Phase 0.8 採「socket close 自然 abort」策略:streaming body 中斷時 +// converter multer 拋錯 → 該 job 留 `failed` 狀態 + error_code=invalid_multipart +// → converter 對 active_job 邏輯視為已結束 → 下次 init 不會撞 409。 +// - flow.go 不主動發 cancel(沒有對應 endpoint 可發);只在 InitJob 失敗時 log。 +// - **Phase 1+ 升級**:當 converter 補上 `/cancel` 後,T3 ConverterClient +// 新增 `CancelJob(ctx, jobID) error`,flow.go 在 InitJob 失敗時開獨立 5s +// timeout context(不繼承已 cancel 的 ctx)做 best-effort 主動 cancel。 +// 見 conversion.md §4.3.2 + ./05-implementation/phase-0.8-T6.md follow-ups。 +func (f *flow) InitJob(ctx context.Context, in InitJobInput) (*Job, error) { + if in.UserID == "" { + return nil, errors.New("conversion: InitJob requires UserID") + } + if in.Body == nil { + return nil, errors.New("conversion: InitJob requires Body") + } + if in.ContentType == "" { + return nil, errors.New("conversion: InitJob requires ContentType (must contain multipart boundary)") + } + + // 1. ownership lazy rebuild — 確保該 user 的 active jobs 有從 converter 拉回來 + if err := f.ownership.EnsureRebuilt(ctx, in.UserID); err != nil { + // rebuild 失敗:不 hard fail(converter 可能短暫不可達),讓 pre-check 走 stale cache + // — 後面真正打 converter.InitJob 時若 converter 已恢復則照常通過;若仍異常會回 502。 + // 但需要記 log,方便除錯。 + f.logger.WarnContext(ctx, "conversion.flow.init_ownership_rebuild_failed", + slog.String("user_hash", hashUserID(in.UserID)), + slog.String("err", err.Error()), + ) + } + + // 2. 同 user active job pre-check(§9.3) + // 避免 visionA 已知 active 但仍打 converter 浪費一次 round-trip + if existing, err := f.checkActiveJob(ctx, in.UserID); err != nil { + return nil, err + } else if existing != nil { + return nil, &ActiveJobError{Job: existing} + } + + // 3. 重組 multipart:注入 user_id、黑名單 client 帶來的 user_id(§4.2 / §7.3) + pr, pw := io.Pipe() + mw := multipart.NewWriter(pw) + + // goroutine 解析 client multipart 並重寫到 pw;converter 端從 pr 讀 + // + // Close 順序(Reviewer M-2): + // 單一 close 路徑、根據 rebuild err 決定 pw.Close vs pw.CloseWithError — + // 不可用 `defer pw.Close()` 配 `pw.CloseWithError(err)`(defer LIFO 會在 + // CloseWithError 之後跑,把 err 蓋成 nil EOF,converter 端拿到截斷 stream + // 而不是 rebuild 錯誤訊號) + // - mw.Close 必須先(送 final boundary 給 reader),再用 err 決定關 pw 的方式 + // - rebuildErrCh 在 close 之後送,確保主流程拿到 err 時 pipe 已收尾 + rebuildErrCh := make(chan error, 1) + go func() { + err := rebuildMultipart(in.UserID, in.ContentType, in.Body, mw) + // mw.Close 寫 final boundary;即使 rebuild 失敗也要關(避免 mw 內部 buffer 殘留) + if mwErr := mw.Close(); mwErr != nil && err == nil { + err = fmt.Errorf("close multipart writer: %w", mwErr) + } + // 用單一路徑決定 pw 怎麼關 + if err != nil { + _ = pw.CloseWithError(err) + } else { + _ = pw.Close() + } + rebuildErrCh <- err + }() + + // 4. POST converter — 同步等到 201(streaming proxy;不 early-return,對齊 §4.3.1) + cj, err := f.converter.InitJob(ctx, InitConverterJobReq{ + UserID: in.UserID, + Body: pr, + BodyContentType: mw.FormDataContentType(), + }) + + // 等 goroutine 結束(pw.Close 已觸發 EOF;rebuild 邏輯已 write 完) + rebuildErr := <-rebuildErrCh + // 若 converter 沒回 error,但 rebuild goroutine 失敗 → 也視為 init 失敗 + if err == nil && rebuildErr != nil { + err = fmt.Errorf("%w: rebuild multipart: %v", ErrConverterUnavailable, rebuildErr) + } + + if err != nil { + // converter 4xx / 5xx / network → 已分類成 sentinel + // Cleanup 策略(§4.3.2,已驗證 converter Phase 1 沒實作 /cancel endpoint): + // 不主動打 cancel —— 靠 converter multer 收 socket close 自然 abort + // (streaming 中斷 → multer 拋錯 → job 留 failed → 下次 init 不會撞 409)。 + // Phase 1+ 等 converter 補 /cancel 後再升級為 best-effort 主動 cancel。 + f.logger.WarnContext(ctx, "conversion.flow.init_failed", + slog.String("user_hash", hashUserID(in.UserID)), + slog.String("err", err.Error()), + ) + return nil, err + } + + // 5. 寫 ownership + f.ownership.Set(cj.JobID, in.UserID) + + job := f.toJob(cj) + f.logger.InfoContext(ctx, "conversion.flow.init_success", + slog.String("user_hash", hashUserID(in.UserID)), + slog.String("job_id", cj.JobID), + slog.String("status", cj.Status), + slog.String("source_filename", cj.SourceFilename), + ) + return job, nil +} + +// rebuildMultipart 解 client 端 multipart,重新寫到 mw。 +// +// 規則(§4.2 / §7.3): +// 1. 先寫 user_id field(從 visionA-backend 注入,唯一可信來源) +// 2. client 帶來的 user_id field 一律忽略(黑名單) +// 3. 其他 form field / file part 透傳 +func rebuildMultipart(userID, contentType string, body io.Reader, mw *multipart.Writer) error { + // 解析 boundary + _, params, err := mime.ParseMediaType(contentType) + if err != nil { + return fmt.Errorf("parse content type: %w", err) + } + boundary := params["boundary"] + if boundary == "" { + return errors.New("missing multipart boundary") + } + + // 先寫 user_id(重點:在 file part 之前,§4.2 註解說明:避免 converter multer + // 解析時 user_id 還沒到就拒絕) + if err := mw.WriteField("user_id", userID); err != nil { + return fmt.Errorf("write user_id field: %w", err) + } + + mr := multipart.NewReader(body, boundary) + for { + part, err := mr.NextPart() + if err == io.EOF { + return nil + } + if err != nil { + return fmt.Errorf("read next part: %w", err) + } + + name := part.FormName() + // 黑名單 user_id:忽略 client 自己塞的(§4.2) + if name == "user_id" { + _ = part.Close() + continue + } + + if part.FileName() == "" { + // form field:直接複製 + fw, err := mw.CreateFormField(name) + if err != nil { + _ = part.Close() + return fmt.Errorf("create form field %q: %w", name, err) + } + if _, err := io.Copy(fw, part); err != nil { + _ = part.Close() + return fmt.Errorf("copy form field %q: %w", name, err) + } + } else { + // file part:streaming copy(不 buffer 全 RAM) + fw, err := mw.CreateFormFile(name, part.FileName()) + if err != nil { + _ = part.Close() + return fmt.Errorf("create form file %q: %w", name, err) + } + if _, err := io.Copy(fw, part); err != nil { + _ = part.Close() + return fmt.Errorf("copy form file %q: %w", name, err) + } + } + _ = part.Close() + } +} + +// checkActiveJob 看 user 是否已有 active job(pre-check)。 +// +// 流程: +// 1. ownership.ActiveJobOf — 反查 cache 中該 user 的 jobs +// 2. 取第一個(Phase 0.8 同 user 最多 1 個),用 converter.GetJob 確認狀態 +// - 若狀態為 created/running → return 該 Job(給 caller 包成 ActiveJobError) +// - 若 converter 回 404 / 該 job 已 completed / failed → 視為無 active,先清 cache 再 return nil +// +// 沒 active job 回 (nil, nil)。 +func (f *flow) checkActiveJob(ctx context.Context, userID string) (*Job, error) { + jobIDs := f.ownership.ActiveJobOf(userID) + if len(jobIDs) == 0 { + return nil, nil + } + jobID := jobIDs[0] + + cj, err := f.converter.GetJob(ctx, jobID) + if err != nil { + if errors.Is(err, ErrJobNotFound) { + // converter 已 GC(7d 過期)— 清 cache 後視為無 active + f.ownership.Delete(jobID) + return nil, nil + } + // 其他錯誤(5xx / network)— 對 caller 透傳;caller 決定 502 + return nil, err + } + + // 只有 created / running 視為 active + switch cj.Status { + case "completed", "failed": + // 已結束的 job 不算 active;不清 ownership(GetJob / Download 仍需要這個對應) + return nil, nil + default: + return f.toJob(cj), nil + } +} + +// ========================================================================== +// GetJob — 對應 GET /api/conversion/{job_id} +// ========================================================================== + +// GetJob 對齊 conversion.md §2.7 + api-conversion.md §2。 +// +// 流程: +// 1. ownership.EnsureRebuilt(確保 cache 已 lazy rebuild) +// 2. ownership.Get(jobID) — 比對 owner;不符 → ErrJobNotFound(避免洩漏 job 存在性) +// 3. converter.GetJob(jobID) +// 4. 若 expires_at 為零,補 created_at + DefaultJobExpiryDuration +// +// 設計選擇:ownership 不符不回 forbidden,而是 not_found: +// - 避免讓攻擊者用「forbidden vs not_found」差異枚舉合法 job_id +// - 對齊 §7.2 安全考量 +func (f *flow) GetJob(ctx context.Context, userID, jobID string) (*Job, error) { + if userID == "" { + return nil, errors.New("conversion: GetJob requires userID") + } + if jobID == "" { + return nil, ErrJobNotFound + } + + if err := f.ownership.EnsureRebuilt(ctx, userID); err != nil { + // rebuild 失敗:不視為 fatal,繼續走 cache(可能 stale);fail-soft + f.logger.WarnContext(ctx, "conversion.flow.get_ownership_rebuild_failed", + slog.String("user_hash", hashUserID(userID)), + slog.String("err", err.Error()), + ) + } + + owner, ok := f.ownership.Get(jobID) + if !ok || owner != userID { + // 不符 → 視為 not_found(避免洩漏存在性) + return nil, ErrJobNotFound + } + + cj, err := f.converter.GetJob(ctx, jobID) + if err != nil { + return nil, err + } + return f.toJob(cj), nil +} + +// ========================================================================== +// ActiveJob — 對應 GET /api/conversion/active +// ========================================================================== + +// ActiveJob 對齊 conversion.md §2.6.1 lazy rebuild + api-conversion.md §5。 +// +// 流程: +// 1. ownership.EnsureRebuilt(從 converter ListInProgressJobs 重建 cache) +// 2. ownership.ActiveJobOf — 反查 +// 3. 沒有 → return (nil, nil)(不視為 error;對齊 has_active=false 語意) +// 4. 取 [0](Phase 0.8 ≤ 1)→ converter.GetJob 拿即時狀態 +// 5. converter 回 404(job 已過期被 GC)→ 清 cache + return (nil, nil) +// +// 重啟恢復場景:visionA-backend in-memory cache 全空時,EnsureRebuilt 會打 +// converter ListInProgressJobs 把該 user 的 active job 重建進來,使用者看不出差別。 +func (f *flow) ActiveJob(ctx context.Context, userID string) (*Job, error) { + if userID == "" { + return nil, errors.New("conversion: ActiveJob requires userID") + } + + // 1. lazy rebuild(這個路徑不 fail-soft:rebuild 失敗 = 無法回答 has_active 問題, + // 必須 propagate 給 caller 知道) + if err := f.ownership.EnsureRebuilt(ctx, userID); err != nil { + return nil, err + } + + // 2. 反查 + jobIDs := f.ownership.ActiveJobOf(userID) + if len(jobIDs) == 0 { + return nil, nil + } + + // 3. 取第一個,問 converter 即時狀態 + jobID := jobIDs[0] + cj, err := f.converter.GetJob(ctx, jobID) + if err != nil { + if errors.Is(err, ErrJobNotFound) { + // converter 已 GC → 清 cache + 視為無 active + f.ownership.Delete(jobID) + return nil, nil + } + return nil, err + } + + // 已 completed / failed 的 job 也不算 active(has_active=false) + if cj.Status == "completed" || cj.Status == "failed" { + return nil, nil + } + return f.toJob(cj), nil +} + +// ========================================================================== +// PromoteToModels — 對應 POST /api/conversion/{job_id}/promote-to-models +// ========================================================================== + +// PromoteToModels 對齊 conversion.md §1 Stage 3a + §2.5 + api-conversion.md §3。 +// +// 流程: +// 1. ownership 驗(不符 → ErrJobNotFound) +// 2. converter.GetJob — 確認 status=completed(否則 ErrJobNotCompleted) +// 3. 冪等檢查:modelStore.FindBySourceJobID — 已有 model 直接回(避免重複 promote) +// 4. converter.Promote — 拿到 target_object_key +// 5. faa.GetFile(target_object_key) — streaming pull NEF +// 6. storage.Put — streaming 寫進 visionA storage(不 ReadAll) +// 7. modelStore.Save — 建 model record(Source="converted"、SourceJobID=jobID) +// 8. return PromoteResult +// +// 名稱:caller 從 wireframe §7.1 的 import Dialog 拿;空字串 fallback 為 +// `_`(對齊 api-conversion.md §3)。 +func (f *flow) PromoteToModels(ctx context.Context, userID, jobID, name string) (*PromoteResult, error) { + if userID == "" { + return nil, errors.New("conversion: PromoteToModels requires userID") + } + if jobID == "" { + return nil, ErrJobNotFound + } + + // 1. ownership rebuild + 驗 + if err := f.ownership.EnsureRebuilt(ctx, userID); err != nil { + f.logger.WarnContext(ctx, "conversion.flow.promote_ownership_rebuild_failed", + slog.String("user_hash", hashUserID(userID)), + slog.String("err", err.Error()), + ) + } + owner, ok := f.ownership.Get(jobID) + if !ok || owner != userID { + return nil, ErrJobNotFound + } + + // 2. converter.GetJob 確認 completed + cj, err := f.converter.GetJob(ctx, jobID) + if err != nil { + return nil, err + } + if cj.Status != "completed" { + return nil, fmt.Errorf("%w: status=%s", ErrJobNotCompleted, cj.Status) + } + + // 3. 冪等檢查 + if existing, err := f.modelStore.FindBySourceJobID(ctx, userID, jobID); err != nil { + f.logger.WarnContext(ctx, "conversion.flow.promote_find_existing_failed", + slog.String("user_hash", hashUserID(userID)), + slog.String("job_id", jobID), + slog.String("err", err.Error()), + ) + // 查 model store 失敗不 hard fail —— 仍嘗試 promote(最壞結果是重複建一個 model record) + } else if existing != nil { + f.logger.InfoContext(ctx, "conversion.flow.promote_idempotent_hit", + slog.String("user_hash", hashUserID(userID)), + slog.String("job_id", jobID), + slog.String("model_id", existing.ID), + ) + return modelRecordToPromoteResult(existing), nil + } + + // 4. converter.Promote — 組目標 object_key(FAA 內部命名規則由 visionA 決定) + finalName := name + if finalName == "" { + finalName = defaultModelName(cj) + } + targetObjectKey := buildTargetObjectKey(userID, jobID) + + promoteRes, err := f.converter.Promote(ctx, jobID, PromoteReq{ + UserID: userID, + Source: promoteDefaultSource, // "nef" + TargetObjectKey: targetObjectKey, + }) + if err != nil { + return nil, err + } + + // 5. faa.GetFile streaming pull + file, err := f.faa.GetFile(ctx, promoteRes.TargetObjectKey) + if err != nil { + return nil, err + } + defer file.Body.Close() + + // 6. storage.Put streaming write + modelID := f.modelStore.GenerateID() + storageKey := buildStorageKey(userID, modelID) + storageMeta := map[string]string{ + "source": "converted", + "source_job_id": jobID, + "target_chip": normalizeTargetChip(cj.Platform), + } + if err := f.storage.Put(ctx, storageKey, file.Body, file.ContentLength, storageMeta); err != nil { + f.logger.WarnContext(ctx, "conversion.flow.promote_storage_put_failed", + slog.String("user_hash", hashUserID(userID)), + slog.String("job_id", jobID), + slog.String("storage_key", storageKey), + slog.String("err", err.Error()), + ) + // visionA 自家 storage 失敗(disk full / S3 5xx / 權限錯誤) + // — 不是 FAA / converter 問題,用獨立 sentinel 讓 SRE alarm 打對 team + // (對齊 Reviewer M-1) + return nil, fmt.Errorf("%w: storage.Put %s: %v", ErrStorageUnavailable, storageKey, err) + } + + // 7. modelStore.Save + now := f.now().UTC() + rec := &ModelRecord{ + ID: modelID, + OwnerUserID: userID, + Name: finalName, + StorageKey: storageKey, + FileSize: promoteRes.Size, + FileChecksum: promoteRes.Checksum, + TargetChip: normalizeTargetChip(cj.Platform), + Source: "converted", + SourceJobID: jobID, + CreatedAt: now, + UpdatedAt: now, + } + if err := f.modelStore.Save(ctx, rec); err != nil { + f.logger.WarnContext(ctx, "conversion.flow.promote_model_save_failed", + slog.String("user_hash", hashUserID(userID)), + slog.String("job_id", jobID), + slog.String("model_id", modelID), + slog.String("err", err.Error()), + ) + // model store save 失敗(in-memory 不會失敗;未來 Postgres 才會觸發) + // — 不是 converter / FAA 問題,用獨立 sentinel 對齊 SRE alarm 分類(Reviewer M-1) + // 已寫進 storage 但無 record 對應 → 等同孤立檔案;Phase 1 加 GC 機制清掃 + return nil, fmt.Errorf("%w: modelStore.Save model_id=%s: %v", ErrModelStoreUnavailable, modelID, err) + } + + f.logger.InfoContext(ctx, "conversion.flow.promote_success", + slog.String("user_hash", hashUserID(userID)), + slog.String("job_id", jobID), + slog.String("model_id", modelID), + slog.Int64("file_size", promoteRes.Size), + ) + + return modelRecordToPromoteResult(rec), nil +} + +// ========================================================================== +// DownloadRedirectURL — 對應 GET /api/conversion/{job_id}/download +// ========================================================================== + +// DownloadRedirectURL 對齊 conversion.md §1 Stage 3b + §3.1 + api-conversion.md §4。 +// +// 流程: +// 1. ownership 驗(不符 → ErrJobNotFound) +// 2. converter.GetJob — 確認 status=completed +// 3. ensurePromoted — 自動觸發 promote(若還沒 promote 過),拿到 target_object_key +// - 設計選擇(task spec 詢問點):自動觸發。理由:api-conversion.md §4 註解說 +// 「兩條路徑(promote-to-models / download)都拿同一個 target_object_key」+ +// 「不會與 promote-to-models 衝突;兩者內部都會 ensurePromoted(冪等)」— +// 要求 user 先按 promote-to-models 才能下載會違背「下載」按鈕的直覺語意。 +// 4. mcToken.IssueDelegatedDownload — 換 opaque token (TTL 5min 預設) +// 5. 組 https:///files/?access_token= +// +// 安全(§10.4): +// - token 不出現在任何 JSON response(caller 走 server-side 302 redirect) +// - object_key 不對 frontend 揭露 +func (f *flow) DownloadRedirectURL(ctx context.Context, userID, jobID string) (string, error) { + if userID == "" { + return "", errors.New("conversion: DownloadRedirectURL requires userID") + } + if jobID == "" { + return "", ErrJobNotFound + } + + // 1. ownership 驗 + if err := f.ownership.EnsureRebuilt(ctx, userID); err != nil { + f.logger.WarnContext(ctx, "conversion.flow.download_ownership_rebuild_failed", + slog.String("user_hash", hashUserID(userID)), + slog.String("err", err.Error()), + ) + } + owner, ok := f.ownership.Get(jobID) + if !ok || owner != userID { + return "", ErrJobNotFound + } + + // 2. converter.GetJob 確認 completed + cj, err := f.converter.GetJob(ctx, jobID) + if err != nil { + return "", err + } + if cj.Status != "completed" { + return "", fmt.Errorf("%w: status=%s", ErrJobNotCompleted, cj.Status) + } + + // 3. ensurePromoted — 自動觸發 promote 拿 target_object_key + // Phase 0.8 不 cache promoted_object_key(converter 端 promote 是冪等的, + // 重複呼叫成本可接受 — 反正 download 路徑 user 主動觸發頻率不高) + targetObjectKey, err := f.ensurePromoted(ctx, userID, jobID, cj) + if err != nil { + return "", err + } + + // 4. mcToken 換 delegated download token + delegated, err := f.mcToken.IssueDelegatedDownload(ctx, IssueDownloadReq{ + TenantID: f.tenantID, + UserID: userID, + ObjectKey: targetObjectKey, + ExpiresInSeconds: f.delegatedTTLSeconds, + }) + if err != nil { + return "", err + } + + // 5. 組 URL:FAA base + /files/?access_token= + // - object_key 用 url.PathEscape 處理(含路徑分隔符的 key 安全 escape) + // - token 用 url.QueryEscape(雖 opaque token 通常不含特殊字元,仍 escape 防呆) + downloadURL := fmt.Sprintf("%s/files/%s?access_token=%s", + f.faaBaseURL, + escapeObjectKeyPath(targetObjectKey), + url.QueryEscape(delegated.Token), + ) + + f.logger.InfoContext(ctx, "conversion.flow.download_url_issued", + slog.String("user_hash", hashUserID(userID)), + slog.String("job_id", jobID), + slog.String("object_key_hash", hashObjectKey(targetObjectKey)), + slog.Int("ttl_sec", f.delegatedTTLSeconds), + ) + + return downloadURL, nil +} + +// ensurePromoted 取 target_object_key — 若已 promote 過(model record 已存在)用 cache, +// 否則打 converter.Promote 拿。 +// +// 用 modelStore.FindBySourceJobID 當 source-of-truth:若已有 model record 表示 +// PromoteToModels 已成功跑過,可直接從 record 拿 storage_key 反推 target_object_key? +// ✗ 不行:storage_key 是 visionA storage 的 key,不是 FAA 的 object_key。 +// +// 改用 converter.Promote 冪等性(§2.7:「promote 動作是冪等的,converter 端對同一 +// job 重複 promote 接受」)— 直接打 converter,重複呼叫成本低(同步等 1-2s)。 +// +// 為什麼不用 sync.Map cache:Phase 0.8 download 路徑 user 主動觸發頻率不高(每 job 1-N 次), +// 簡單性 > 微優化。Phase 1 量大再加 cache(progress.md 已記)。 +func (f *flow) ensurePromoted(ctx context.Context, userID, jobID string, cj *ConverterJob) (string, error) { + targetObjectKey := buildTargetObjectKey(userID, jobID) + res, err := f.converter.Promote(ctx, jobID, PromoteReq{ + UserID: userID, + Source: promoteDefaultSource, + TargetObjectKey: targetObjectKey, + }) + if err != nil { + return "", err + } + return res.TargetObjectKey, nil +} + +// ========================================================================== +// helpers +// ========================================================================== + +// toJob 把 ConverterJob(client 層中介 type)轉成對外的 Job(response shape)。 +// +// 補 expires_at fallback:converter 沒給 → created_at + DefaultJobExpiryDuration(§2.6.2)。 +func (f *flow) toJob(cj *ConverterJob) *Job { + if cj == nil { + return nil + } + job := &Job{ + JobID: cj.JobID, + Status: cj.Status, + Stage: cj.Stage, + CreatedAt: cj.CreatedAt, + UpdatedAt: cj.UpdatedAt, + ExpiresAt: cj.ExpiresAt, + SourceFilename: cj.SourceFilename, + TargetChip: cj.Platform, + ErrorCode: cj.ErrorCode, + ErrorMessage: cj.ErrorMessage, + } + if cj.Progress != nil { + job.Progress = *cj.Progress + } + if cj.StageProgress != nil { + job.StageProgress = *cj.StageProgress + } + if job.ExpiresAt.IsZero() && !cj.CreatedAt.IsZero() { + job.ExpiresAt = cj.CreatedAt.Add(f.defaultJobExpiryDuration) + } + return job +} + +// modelRecordToPromoteResult 把 ModelRecord 轉成對外的 PromoteResult。 +func modelRecordToPromoteResult(rec *ModelRecord) *PromoteResult { + if rec == nil { + return nil + } + return &PromoteResult{ + ModelID: rec.ID, + Source: rec.Source, + SourceJobID: rec.SourceJobID, + Name: rec.Name, + TargetChip: rec.TargetChip, + FileSize: rec.FileSize, + Status: "ready", // visionA model 既有 status,promote 完即 ready + CreatedAt: rec.CreatedAt, + } +} + +// buildTargetObjectKey 產 FAA 的 object_key(visionA 端命名規則)。 +// +// 命名:models/{user_id}/{job_id}.nef +// 用 user_id 隔離;job_id 唯一性由 converter 保證(UUID)。 +// +// 對齊 conversion.md §10.4:「object_key 不對 frontend 揭露」— 命名只在 server-side 用。 +func buildTargetObjectKey(userID, jobID string) string { + // 注意:這裡不對 userID/jobID 做 escape — caller(visionA-backend handler) + // 已從 OIDC sub / converter response 拿,皆為合法 ID 字元(UUID / OIDC sub)。 + return fmt.Sprintf("models/%s/%s.nef", userID, jobID) +} + +// buildStorageKey 產 visionA storage 的 key(不是 FAA 的)。 +// +// 沿用 internal/storage 既有命名慣例:models/{user_id}/{model_id}.nef +// (storage.md §2 範例)。 +func buildStorageKey(userID, modelID string) string { + return fmt.Sprintf("models/%s/%s.nef", userID, modelID) +} + +// escapeObjectKeyPath 對 object_key 做 path escape,但保留 '/' 為 path separator。 +// +// url.PathEscape 會把 '/' 也 escape 成 %2F — 對 FAA `/files/{**objectKey}` 來說 +// 應該保留 '/' 為路徑分隔符,所以拆段後逐段 escape 再合回。 +func escapeObjectKeyPath(objectKey string) string { + parts := strings.Split(objectKey, "/") + for i := range parts { + parts[i] = url.PathEscape(parts[i]) + } + return strings.Join(parts, "/") +} + +// normalizeTargetChip 把 converter 端 platform("520"/"720"/...)轉成 visionA model 的 +// target_chip 表示法("kl520"/"kl720"/...)。 +// +// 對齊 api-conversion.md §3 注解:「conversion job 用 platform '720',model.target_chip 用 'kl720'」。 +func normalizeTargetChip(platform string) string { + p := strings.ToLower(strings.TrimSpace(platform)) + if p == "" { + return "" + } + if strings.HasPrefix(p, "kl") { + return p + } + return "kl" + p +} + +// defaultModelName 產 PromoteToModels caller 沒給 name 時的 fallback。 +// +// 規則:`_` — 對齊 api-conversion.md §3 預設值 +// (wireframe §7.1 import Dialog 預設)。 +func defaultModelName(cj *ConverterJob) string { + // path.Base("") 會回 ".";先擋掉空 / "." / ".." 等無效 stem + var stem string + if cj.SourceFilename != "" { + base := path.Base(cj.SourceFilename) + if base != "." && base != "/" && base != ".." { + stem = strings.TrimSuffix(base, path.Ext(base)) + } + } + chip := strings.ToLower(strings.TrimSpace(cj.Platform)) + switch { + case stem != "" && chip != "": + return fmt.Sprintf("%s_kl%s", stem, chip) + case stem != "": + return stem + case chip != "": + return fmt.Sprintf("converted_kl%s", chip) + default: + // 兜底:用 timestamp 避免空 name + return fmt.Sprintf("converted_%d", time.Now().Unix()) + } +} + +// generateRandomID — 不對外暴露,用於測試或 ModelStore.GenerateID adapter 沒提供時的 fallback。 +// +// 16 hex chars (64-bit)。 +// +//nolint:unused // 保留供 main.go 的 adapter 在 fallback 時使用 +func generateRandomID() string { + b := make([]byte, 8) + if _, err := rand.Read(b); err != nil { + // crypto/rand 失敗極為罕見;用 timestamp 兜底 + return fmt.Sprintf("%d", time.Now().UnixNano()) + } + return hex.EncodeToString(b) +} diff --git a/visionA-backend/internal/conversion/flow_test.go b/visionA-backend/internal/conversion/flow_test.go new file mode 100644 index 0000000..73ed99b --- /dev/null +++ b/visionA-backend/internal/conversion/flow_test.go @@ -0,0 +1,1214 @@ +// flow_test.go — Service interface 整合層的單元測試。 +// +// 測試策略: +// - 各 client 用 in-package stub(不耦合 T2-T5 真實邏輯,純驗 flow 整合行為) +// - 沿用 ownership_test.go 的 stubConverterClient(補上 InitJob/GetJob/Promote 實作) +// - 用本檔案專屬的 stubFAAClient / stubMCTokenClient / stubModelStore / stubStorage +// +// 涵蓋 5 個 method × happy / ownership 失敗 / client 失敗 propagation + +// task spec 額外要求: +// - InitJob 同 user 已有 active → ActiveJobError +// - PromoteToModels 已 promote 過 → 回既有 model_id(idempotent) +// - PromoteToModels job 沒 succeeded → ErrJobNotCompleted +// - DownloadRedirectURL URL 組裝正確(含 url.PathEscape / url.QueryEscape) +// - ActiveJob converter 回 404 → ownership.Delete + (nil, nil) +// +// Phase 0.8 conversion (見 .autoflow/04-architecture/conversion.md §2.7) +package conversion + +import ( + "bytes" + "context" + "errors" + "fmt" + "io" + "mime/multipart" + "strings" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// ========================================================================== +// stubs — 補齊 ownership_test.go 沒實作的 method +// ========================================================================== + +// flowStubConverter 是 flow_test 專用的 ConverterClient stub。 +// +// 與 ownership_test.go 的 stubConverterClient 區隔: +// - ownership_test 只用 ListInProgressJobs,其他 method panic +// - flow_test 需要 InitJob / GetJob / Promote / List 全套 +// +// 設計:行為由 functional fields(initJobFunc 等)控制,testcase 寫起來直觀。 +type flowStubConverter struct { + mu sync.Mutex + + // 預設行為:jobsByID 用於 GetJob lookup;initJobFunc 用於控制 InitJob 結果 + jobsByID map[string]*ConverterJob + + // 各 method 的 hook(nil → 走預設行為) + initJobFunc func(ctx context.Context, req InitConverterJobReq) (*ConverterJob, error) + getJobFunc func(ctx context.Context, jobID string) (*ConverterJob, error) + promoteFunc func(ctx context.Context, jobID string, req PromoteReq) (*ConverterPromoteResult, error) + listInProgressJobsFunc func(ctx context.Context, userID string) ([]*ConverterJob, error) + + // 各 method 呼叫次數(atomic) + initJobCalls atomic.Int32 + getJobCalls atomic.Int32 + promoteCalls atomic.Int32 + listInProgressJobsCalls atomic.Int32 + + // 紀錄 InitJob 收到的 body(驗證 multipart user_id 注入) + lastInitBody []byte + lastInitBodyType string +} + +func newFlowStubConverter() *flowStubConverter { + return &flowStubConverter{ + jobsByID: make(map[string]*ConverterJob), + } +} + +func (s *flowStubConverter) setJob(j *ConverterJob) { + s.mu.Lock() + defer s.mu.Unlock() + s.jobsByID[j.JobID] = j +} + +func (s *flowStubConverter) InitJob(ctx context.Context, req InitConverterJobReq) (*ConverterJob, error) { + s.initJobCalls.Add(1) + // 把 body 讀完(模擬 converter 收到 streaming body) + if req.Body != nil { + buf, _ := io.ReadAll(req.Body) + s.mu.Lock() + s.lastInitBody = buf + s.lastInitBodyType = req.BodyContentType + s.mu.Unlock() + } + if s.initJobFunc != nil { + return s.initJobFunc(ctx, req) + } + // 預設:回一個 created job + return &ConverterJob{ + JobID: "stub-job-1", + Status: "created", + Stage: "onnx", + CreatedAt: time.Now().UTC(), + UpdatedAt: time.Now().UTC(), + SourceFilename: req.SourceFilename, + Platform: req.Platform, + }, nil +} + +func (s *flowStubConverter) GetJob(ctx context.Context, jobID string) (*ConverterJob, error) { + s.getJobCalls.Add(1) + if s.getJobFunc != nil { + return s.getJobFunc(ctx, jobID) + } + s.mu.Lock() + defer s.mu.Unlock() + if j, ok := s.jobsByID[jobID]; ok { + jc := *j + return &jc, nil + } + return nil, fmt.Errorf("%w: get_job 404 (not_found)", ErrJobNotFound) +} + +func (s *flowStubConverter) Promote(ctx context.Context, jobID string, req PromoteReq) (*ConverterPromoteResult, error) { + s.promoteCalls.Add(1) + if s.promoteFunc != nil { + return s.promoteFunc(ctx, jobID, req) + } + return &ConverterPromoteResult{ + TargetObjectKey: req.TargetObjectKey, + Size: 12345, + Checksum: "stub-etag", + }, nil +} + +func (s *flowStubConverter) ListInProgressJobs(ctx context.Context, userID string) ([]*ConverterJob, error) { + s.listInProgressJobsCalls.Add(1) + if s.listInProgressJobsFunc != nil { + return s.listInProgressJobsFunc(ctx, userID) + } + return nil, nil +} + +var _ ConverterClient = (*flowStubConverter)(nil) + +// flowStubFAA 是 FAAClient stub。 +type flowStubFAA struct { + mu sync.Mutex + getFileFunc func(ctx context.Context, objectKey string) (*FAAFile, error) + getCalls atomic.Int32 + lastKey string +} + +func newFlowStubFAA() *flowStubFAA { + return &flowStubFAA{} +} + +func (s *flowStubFAA) GetFile(ctx context.Context, objectKey string) (*FAAFile, error) { + s.getCalls.Add(1) + s.mu.Lock() + s.lastKey = objectKey + s.mu.Unlock() + if s.getFileFunc != nil { + return s.getFileFunc(ctx, objectKey) + } + body := io.NopCloser(strings.NewReader("nef-bytes-stub")) + return &FAAFile{ + Body: body, + ContentLength: int64(len("nef-bytes-stub")), + ContentType: "application/octet-stream", + ETag: "stub-etag", + }, nil +} + +var _ FAAClient = (*flowStubFAA)(nil) + +// flowStubMCToken 是 MCTokenClient stub。 +type flowStubMCToken struct { + serviceTokenFunc func(ctx context.Context, scope string) (string, error) + issueDelegatedDownloadFunc func(ctx context.Context, in IssueDownloadReq) (*DelegatedDownloadToken, error) + + // 紀錄最後一次 IssueDelegatedDownload 收到的 input + mu sync.Mutex + lastIssueInput *IssueDownloadReq +} + +func newFlowStubMCToken() *flowStubMCToken { + return &flowStubMCToken{} +} + +func (s *flowStubMCToken) ServiceToken(ctx context.Context, scope string) (string, error) { + if s.serviceTokenFunc != nil { + return s.serviceTokenFunc(ctx, scope) + } + return "stub-service-token", nil +} + +func (s *flowStubMCToken) IssueDelegatedDownload(ctx context.Context, in IssueDownloadReq) (*DelegatedDownloadToken, error) { + s.mu.Lock() + cp := in + s.lastIssueInput = &cp + s.mu.Unlock() + if s.issueDelegatedDownloadFunc != nil { + return s.issueDelegatedDownloadFunc(ctx, in) + } + return &DelegatedDownloadToken{ + Token: "opaque-stub-token-xyz", + ExpiresAt: time.Now().Add(5 * time.Minute), + }, nil +} + +var _ MCTokenClient = (*flowStubMCToken)(nil) + +// flowStubModelStore 是 ModelStore stub。 +type flowStubModelStore struct { + mu sync.Mutex + + // records: model_id → ModelRecord + records map[string]*ModelRecord + + // idCounter 給 GenerateID 用 + idCounter atomic.Int32 + + // hook 控制(測試 model save 失敗用) + saveErr error + findErr error +} + +func newFlowStubModelStore() *flowStubModelStore { + return &flowStubModelStore{ + records: make(map[string]*ModelRecord), + } +} + +func (s *flowStubModelStore) Save(ctx context.Context, m *ModelRecord) error { + if s.saveErr != nil { + return s.saveErr + } + s.mu.Lock() + defer s.mu.Unlock() + cp := *m + s.records[m.ID] = &cp + return nil +} + +func (s *flowStubModelStore) FindBySourceJobID(ctx context.Context, ownerUserID, sourceJobID string) (*ModelRecord, error) { + if s.findErr != nil { + return nil, s.findErr + } + s.mu.Lock() + defer s.mu.Unlock() + for _, r := range s.records { + if r.OwnerUserID == ownerUserID && r.SourceJobID == sourceJobID { + cp := *r + return &cp, nil + } + } + return nil, nil +} + +func (s *flowStubModelStore) GenerateID() string { + n := s.idCounter.Add(1) + return fmt.Sprintf("model-%03d", n) +} + +var _ ModelStore = (*flowStubModelStore)(nil) + +// flowStubStorage 是 Storage stub。 +type flowStubStorage struct { + mu sync.Mutex + + // objects: key → bytes(驗證 streaming write 正確) + objects map[string][]byte + putErr error + + putCalls atomic.Int32 +} + +func newFlowStubStorage() *flowStubStorage { + return &flowStubStorage{ + objects: make(map[string][]byte), + } +} + +func (s *flowStubStorage) Put(ctx context.Context, key string, r io.Reader, size int64, meta map[string]string) error { + s.putCalls.Add(1) + if s.putErr != nil { + // 仍 read 防 io.Pipe 寫端 block + _, _ = io.Copy(io.Discard, r) + return s.putErr + } + buf, err := io.ReadAll(r) + if err != nil { + return err + } + s.mu.Lock() + s.objects[key] = buf + s.mu.Unlock() + return nil +} + +var _ Storage = (*flowStubStorage)(nil) + +// ========================================================================== +// helper: 建立 flow service + 全套 stub +// ========================================================================== + +type flowFixture struct { + svc Service + converter *flowStubConverter + faa *flowStubFAA + mcToken *flowStubMCToken + models *flowStubModelStore + storage *flowStubStorage + ownership Ownership +} + +func newFlowFixture(t *testing.T) *flowFixture { + t.Helper() + conv := newFlowStubConverter() + faa := newFlowStubFAA() + mcToken := newFlowStubMCToken() + models := newFlowStubModelStore() + storage := newFlowStubStorage() + own := NewOwnership(conv, newSilentLogger()) + + svc, err := NewService(FlowOpts{ + Converter: conv, + FAA: faa, + MCToken: mcToken, + Ownership: own, + ModelStore: models, + Storage: storage, + TenantID: "visiona-tenant", + FAABaseURL: "https://faa.example.com", + DefaultJobExpiryDuration: 7 * 24 * time.Hour, + DelegatedTTLSeconds: 300, + Logger: newSilentLogger(), + Now: time.Now, + }) + require.NoError(t, err) + + return &flowFixture{ + svc: svc, + converter: conv, + faa: faa, + mcToken: mcToken, + models: models, + storage: storage, + ownership: own, + } +} + +// makeMultipartBody 建一個合法的 multipart/form-data body 給 InitJob 測試用。 +// +// 包含:model_id / version / platform / model(fake .onnx file)+ 故意塞一個 client user_id(測黑名單)。 +func makeMultipartBody(t *testing.T, clientUserID string) (body io.Reader, contentType string) { + t.Helper() + var buf bytes.Buffer + mw := multipart.NewWriter(&buf) + require.NoError(t, mw.WriteField("model_id", "1024")) + require.NoError(t, mw.WriteField("version", "v1.0.0")) + require.NoError(t, mw.WriteField("platform", "720")) + if clientUserID != "" { + require.NoError(t, mw.WriteField("user_id", clientUserID)) // 應被黑名單 + } + fw, err := mw.CreateFormFile("model", "yolov5s.onnx") + require.NoError(t, err) + _, err = fw.Write([]byte("fake-onnx-bytes")) + require.NoError(t, err) + require.NoError(t, mw.Close()) + return &buf, mw.FormDataContentType() +} + +// ========================================================================== +// Constructor — 缺欄位驗證 +// ========================================================================== + +func TestNewService_RequiredFields(t *testing.T) { + t.Parallel() + conv := newFlowStubConverter() + faa := newFlowStubFAA() + mc := newFlowStubMCToken() + own := NewOwnership(conv, newSilentLogger()) + mod := newFlowStubModelStore() + st := newFlowStubStorage() + + tests := []struct { + name string + opts FlowOpts + }{ + {"missing converter", FlowOpts{FAA: faa, MCToken: mc, Ownership: own, ModelStore: mod, Storage: st, TenantID: "t", FAABaseURL: "https://x"}}, + {"missing faa", FlowOpts{Converter: conv, MCToken: mc, Ownership: own, ModelStore: mod, Storage: st, TenantID: "t", FAABaseURL: "https://x"}}, + {"missing mc", FlowOpts{Converter: conv, FAA: faa, Ownership: own, ModelStore: mod, Storage: st, TenantID: "t", FAABaseURL: "https://x"}}, + {"missing ownership", FlowOpts{Converter: conv, FAA: faa, MCToken: mc, ModelStore: mod, Storage: st, TenantID: "t", FAABaseURL: "https://x"}}, + {"missing modelstore", FlowOpts{Converter: conv, FAA: faa, MCToken: mc, Ownership: own, Storage: st, TenantID: "t", FAABaseURL: "https://x"}}, + {"missing storage", FlowOpts{Converter: conv, FAA: faa, MCToken: mc, Ownership: own, ModelStore: mod, TenantID: "t", FAABaseURL: "https://x"}}, + {"missing tenant", FlowOpts{Converter: conv, FAA: faa, MCToken: mc, Ownership: own, ModelStore: mod, Storage: st, FAABaseURL: "https://x"}}, + {"missing faaurl", FlowOpts{Converter: conv, FAA: faa, MCToken: mc, Ownership: own, ModelStore: mod, Storage: st, TenantID: "t"}}, + } + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + _, err := NewService(tt.opts) + require.Error(t, err) + }) + } +} + +func TestNewService_DefaultsApplied(t *testing.T) { + t.Parallel() + conv := newFlowStubConverter() + faa := newFlowStubFAA() + mc := newFlowStubMCToken() + own := NewOwnership(conv, newSilentLogger()) + mod := newFlowStubModelStore() + st := newFlowStubStorage() + + svc, err := NewService(FlowOpts{ + Converter: conv, FAA: faa, MCToken: mc, Ownership: own, + ModelStore: mod, Storage: st, + TenantID: "visiona", FAABaseURL: "https://faa.example.com/", + // DefaultJobExpiryDuration / DelegatedTTLSeconds 留空 → 應 fallback + }) + require.NoError(t, err) + require.NotNil(t, svc) + + f := svc.(*flow) + assert.Equal(t, 7*24*time.Hour, f.defaultJobExpiryDuration) + assert.Equal(t, 300, f.delegatedTTLSeconds) + assert.Equal(t, "https://faa.example.com", f.faaBaseURL, "trailing slash 應被 trim") +} + +// ========================================================================== +// InitJob +// ========================================================================== + +// TestInitJob_HappyPath:標準 init flow,黑名單 user_id 注入正確。 +func TestInitJob_HappyPath(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + body, ct := makeMultipartBody(t, "fake-client-userid") + + job, err := fix.svc.InitJob(context.Background(), InitJobInput{ + UserID: "user-alice", + ContentType: ct, + Body: body, + }) + require.NoError(t, err) + require.NotNil(t, job) + assert.Equal(t, "stub-job-1", job.JobID) + assert.Equal(t, "created", job.Status) + assert.Equal(t, int32(1), fix.converter.initJobCalls.Load()) + + // 驗 ownership 已寫入 + uid, ok := fix.ownership.Get("stub-job-1") + assert.True(t, ok) + assert.Equal(t, "user-alice", uid) + + // 驗 multipart body 中 user_id 是 visionA 灌的,client 帶的被黑名單 + fix.converter.mu.Lock() + gotBody := string(fix.converter.lastInitBody) + fix.converter.mu.Unlock() + assert.Contains(t, gotBody, "user-alice", "visionA-backend 注入的 user_id 應在 body 中") + // fake-client-userid 不該出現(被黑名單) + assert.NotContains(t, gotBody, "fake-client-userid", + "client 帶的 user_id 應被黑名單,不應出現在送給 converter 的 body") +} + +// TestInitJob_ActiveJobExists:同 user 已有 active job → ActiveJobError。 +// +// 這個 case 來自 task spec「額外要測」。 +func TestInitJob_ActiveJobExists(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + // 預先在 cache 注入一個 active job + createdAt := time.Now().UTC() + fix.converter.setJob(&ConverterJob{ + JobID: "existing-job", + Status: "running", + Stage: "bie", + CreatedAt: createdAt, + }) + fix.ownership.Set("existing-job", "user-alice") + + body, ct := makeMultipartBody(t, "") + _, err := fix.svc.InitJob(context.Background(), InitJobInput{ + UserID: "user-alice", + ContentType: ct, + Body: body, + }) + require.Error(t, err) + assert.True(t, errors.Is(err, ErrActiveJobExists)) + + var ae *ActiveJobError + require.True(t, errors.As(err, &ae)) + require.NotNil(t, ae.Job) + assert.Equal(t, "existing-job", ae.Job.JobID) + assert.Equal(t, "running", ae.Job.Status) + + // converter.InitJob 不該被呼叫(pre-check 攔截) + assert.Equal(t, int32(0), fix.converter.initJobCalls.Load()) +} + +// TestInitJob_ActiveJob_AlreadyCompleted_PassThrough:cache 中的 job 已 completed +// → 視為無 active,正常 init。 +func TestInitJob_ActiveJob_AlreadyCompleted_PassThrough(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + fix.converter.setJob(&ConverterJob{ + JobID: "old-job", + Status: "completed", + CreatedAt: time.Now().UTC(), + }) + fix.ownership.Set("old-job", "user-alice") + + body, ct := makeMultipartBody(t, "") + job, err := fix.svc.InitJob(context.Background(), InitJobInput{ + UserID: "user-alice", + ContentType: ct, + Body: body, + }) + require.NoError(t, err) + assert.Equal(t, "stub-job-1", job.JobID) +} + +// TestInitJob_ConverterError_Propagation:converter 失敗應透傳 sentinel。 +func TestInitJob_ConverterError_Propagation(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + fix.converter.initJobFunc = func(ctx context.Context, req InitConverterJobReq) (*ConverterJob, error) { + // 仍 drain body 以免 io.Pipe 寫端 block + _, _ = io.Copy(io.Discard, req.Body) + return nil, fmt.Errorf("%w: simulated 502", ErrConverterUnavailable) + } + + body, ct := makeMultipartBody(t, "") + _, err := fix.svc.InitJob(context.Background(), InitJobInput{ + UserID: "user-alice", + ContentType: ct, + Body: body, + }) + require.Error(t, err) + assert.True(t, errors.Is(err, ErrConverterUnavailable)) + + // 失敗時 ownership 不應寫入 + _, ok := fix.ownership.Get("stub-job-1") + assert.False(t, ok) +} + +// TestInitJob_RebuildBodyError_ConsumerSeesError:rebuild 中途 reader 失敗 +// → converter 端從 pipe 讀時應拿到該 error(而非空的 EOF / 截斷 multipart)。 +// +// 對齊 Reviewer M-2:原本 `defer pw.Close()` 配 `pw.CloseWithError(err)` 的寫法 +// 因 defer LIFO 會把錯誤訊號蓋成 nil EOF。修法後 converter 端應能透過 pipe 讀到 +// rebuild 階段拋出的錯誤(例如 io.ErrUnexpectedEOF / 自訂錯誤)。 +func TestInitJob_RebuildBodyError_ConsumerSeesError(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + // 在 converter stub 的 InitJob 中,主動讀 body — 驗證讀到的是「帶 rebuild error 的 pipe」 + // 而不是「截斷的 EOF」 + var readErr error + fix.converter.initJobFunc = func(ctx context.Context, req InitConverterJobReq) (*ConverterJob, error) { + // 讀完 body;若 rebuild 失敗,pipe 應拿到非 nil error(不是 EOF) + _, readErr = io.Copy(io.Discard, req.Body) + // 模擬 converter 因為收不完 body 回 5xx + return nil, fmt.Errorf("%w: simulated bad multipart from rebuild", ErrConverterUnavailable) + } + + // 故意給一個會在 rebuild 中失敗的 body:合法 boundary 但 part 內容讀到一半就 error + body := &errReader{ + // 先給足以讓 multipart.NewReader 找到第一個 boundary 的內容 + content: []byte("--boundary123\r\nContent-Disposition: form-data; name=\"x\"\r\n\r\n"), + errAt: 1024, // 讀到第 N byte 後拋錯 + err: errors.New("simulated reader failure mid-stream"), + } + contentType := "multipart/form-data; boundary=boundary123" + + _, err := fix.svc.InitJob(context.Background(), InitJobInput{ + UserID: "user-alice", + ContentType: contentType, + Body: body, + }) + require.Error(t, err) + // 應透傳成 ErrConverterUnavailable(converter stub 回 5xx;或 rebuild 自身 wrap) + assert.True(t, errors.Is(err, ErrConverterUnavailable), + "rebuild + converter 雙失敗,最終應收斂成 ErrConverterUnavailable") + + // 關鍵 assert:converter 端讀 body 時,應拿到「非 nil error」而不是空 EOF + // (原本 defer 順序錯時 readErr 會是 nil — 因為 pw.Close() 蓋掉 CloseWithError) + assert.Error(t, readErr, + "converter 端 io.Copy(req.Body) 應拿到 rebuild 階段的錯誤訊號,而不是 nil EOF") +} + +// TestInitJob_RebuildHappyPath_ConsumerSeesEOF:正常完成時,consumer 端應拿到 EOF(非 error)。 +// +// 對齊 Reviewer M-2 的反向 case:成功路徑 pipe 應正常 EOF。 +func TestInitJob_RebuildHappyPath_ConsumerSeesEOF(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + var readErr error + fix.converter.initJobFunc = func(ctx context.Context, req InitConverterJobReq) (*ConverterJob, error) { + _, readErr = io.Copy(io.Discard, req.Body) + return &ConverterJob{ + JobID: "stub-job-1", Status: "created", CreatedAt: time.Now(), + }, nil + } + + body, ct := makeMultipartBody(t, "") + _, err := fix.svc.InitJob(context.Background(), InitJobInput{ + UserID: "user-alice", + ContentType: ct, + Body: body, + }) + require.NoError(t, err) + // happy path:pipe 應正常 EOF(io.Copy 對 EOF 不報 error) + assert.NoError(t, readErr, + "正常完成時 converter 端 io.Copy(req.Body) 應 nil error(io.Copy 把 EOF 視為正常結束)") +} + +// errReader 在讀到 errAt bytes 後拋錯,用於模擬 rebuild 中途失敗。 +type errReader struct { + content []byte + pos int + read int + errAt int + err error +} + +func (r *errReader) Read(p []byte) (int, error) { + if r.read >= r.errAt { + return 0, r.err + } + if r.pos >= len(r.content) { + // 把剩餘 byte 補 0 直到 errAt — 模擬「讀到一半才出錯」 + n := r.errAt - r.read + if n > len(p) { + n = len(p) + } + for i := 0; i < n; i++ { + p[i] = 0 + } + r.read += n + return n, nil + } + n := copy(p, r.content[r.pos:]) + r.pos += n + r.read += n + return n, nil +} + +// TestInitJob_RequiredFields:缺 UserID / Body / ContentType return error。 +func TestInitJob_RequiredFields(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + _, err := fix.svc.InitJob(context.Background(), InitJobInput{ContentType: "x", Body: strings.NewReader("y")}) + assert.Error(t, err) + _, err = fix.svc.InitJob(context.Background(), InitJobInput{UserID: "u", ContentType: "x"}) + assert.Error(t, err) + _, err = fix.svc.InitJob(context.Background(), InitJobInput{UserID: "u", Body: strings.NewReader("y")}) + assert.Error(t, err) +} + +// ========================================================================== +// GetJob +// ========================================================================== + +// TestGetJob_HappyPath:ownership 有 → converter.GetJob → 回 *Job。 +func TestGetJob_HappyPath(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + createdAt := time.Now().UTC() + fix.converter.setJob(&ConverterJob{ + JobID: "j1", + Status: "running", + Stage: "bie", + CreatedAt: createdAt, + UpdatedAt: createdAt, + SourceFilename: "yolov5s.onnx", + Platform: "720", + }) + fix.ownership.Set("j1", "user-alice") + + job, err := fix.svc.GetJob(context.Background(), "user-alice", "j1") + require.NoError(t, err) + assert.Equal(t, "j1", job.JobID) + assert.Equal(t, "yolov5s.onnx", job.SourceFilename) + assert.Equal(t, "720", job.TargetChip) + // expires_at fallback:created_at + 7d + assert.Equal(t, createdAt.Add(7*24*time.Hour), job.ExpiresAt) +} + +// TestGetJob_OwnershipMismatch_ReturnsNotFound:ownership 不符回 ErrJobNotFound(避免洩漏)。 +func TestGetJob_OwnershipMismatch_ReturnsNotFound(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + fix.converter.setJob(&ConverterJob{JobID: "j1", Status: "running", CreatedAt: time.Now()}) + fix.ownership.Set("j1", "user-bob") + + _, err := fix.svc.GetJob(context.Background(), "user-alice", "j1") + require.Error(t, err) + assert.True(t, errors.Is(err, ErrJobNotFound), + "ownership mismatch 應回 not_found 而非 forbidden(§7.2 防枚舉)") + // converter.GetJob 不該被呼叫 + assert.Equal(t, int32(0), fix.converter.getJobCalls.Load()) +} + +// TestGetJob_OwnershipMissing_ReturnsNotFound:cache 中沒對應 jobID → not_found。 +func TestGetJob_OwnershipMissing_ReturnsNotFound(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + _, err := fix.svc.GetJob(context.Background(), "user-alice", "ghost-job") + require.Error(t, err) + assert.True(t, errors.Is(err, ErrJobNotFound)) +} + +// TestGetJob_ConverterError_Propagation:converter 5xx 透傳。 +func TestGetJob_ConverterError_Propagation(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + fix.ownership.Set("j1", "user-alice") + fix.converter.getJobFunc = func(ctx context.Context, jobID string) (*ConverterJob, error) { + return nil, fmt.Errorf("%w: simulated", ErrConverterUnavailable) + } + + _, err := fix.svc.GetJob(context.Background(), "user-alice", "j1") + require.Error(t, err) + assert.True(t, errors.Is(err, ErrConverterUnavailable)) +} + +// ========================================================================== +// ActiveJob +// ========================================================================== + +// TestActiveJob_HappyPath:lazy rebuild → ActiveJobOf → converter.GetJob → 回 *Job。 +func TestActiveJob_HappyPath(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + createdAt := time.Now().UTC() + fix.converter.listInProgressJobsFunc = func(ctx context.Context, userID string) ([]*ConverterJob, error) { + if userID != "user-alice" { + return nil, nil + } + return []*ConverterJob{ + {JobID: "j-active", Status: "running", CreatedAt: createdAt}, + }, nil + } + fix.converter.setJob(&ConverterJob{ + JobID: "j-active", + Status: "running", + Stage: "bie", + CreatedAt: createdAt, + }) + + job, err := fix.svc.ActiveJob(context.Background(), "user-alice") + require.NoError(t, err) + require.NotNil(t, job) + assert.Equal(t, "j-active", job.JobID) + assert.Equal(t, "running", job.Status) +} + +// TestActiveJob_NoActive:沒 active job 回 (nil, nil)。 +func TestActiveJob_NoActive(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + job, err := fix.svc.ActiveJob(context.Background(), "user-alice") + require.NoError(t, err) + assert.Nil(t, job) +} + +// TestActiveJob_ConverterReturns404_DeletesAndReturnsNil:cache 中有 job 但 converter 回 404 +// → 清 ownership + (nil, nil)。task spec 額外要測 case。 +func TestActiveJob_ConverterReturns404_DeletesAndReturnsNil(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + // 預先在 cache 中放一個 — 模擬 visionA 重啟 + lazy rebuild 從 converter 拉到, + // 但中間 converter 又 GC 了 + fix.converter.listInProgressJobsFunc = func(ctx context.Context, userID string) ([]*ConverterJob, error) { + return []*ConverterJob{{JobID: "j-stale", Status: "running", CreatedAt: time.Now()}}, nil + } + fix.converter.getJobFunc = func(ctx context.Context, jobID string) (*ConverterJob, error) { + return nil, fmt.Errorf("%w: simulated 404", ErrJobNotFound) + } + + job, err := fix.svc.ActiveJob(context.Background(), "user-alice") + require.NoError(t, err) + assert.Nil(t, job, "converter 404 應視為無 active") + + // ownership 已清掉 + _, ok := fix.ownership.Get("j-stale") + assert.False(t, ok, "converter 404 後應呼叫 ownership.Delete") +} + +// TestActiveJob_ConverterError_Propagation:converter 5xx 透傳給 caller(不 fail-soft)。 +func TestActiveJob_ConverterError_Propagation(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + fix.converter.listInProgressJobsFunc = func(ctx context.Context, userID string) ([]*ConverterJob, error) { + return nil, fmt.Errorf("%w: list 5xx", ErrConverterUnavailable) + } + + _, err := fix.svc.ActiveJob(context.Background(), "user-alice") + require.Error(t, err) + assert.True(t, errors.Is(err, ErrConverterUnavailable)) +} + +// TestActiveJob_CompletedJob_ReturnsNil:cache 中是 completed job → 不算 active。 +func TestActiveJob_CompletedJob_ReturnsNil(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + fix.converter.listInProgressJobsFunc = func(ctx context.Context, userID string) ([]*ConverterJob, error) { + return []*ConverterJob{{JobID: "j-done", Status: "running", CreatedAt: time.Now()}}, nil + } + // converter 即時狀態 = completed + fix.converter.setJob(&ConverterJob{ + JobID: "j-done", + Status: "completed", + CreatedAt: time.Now(), + }) + + job, err := fix.svc.ActiveJob(context.Background(), "user-alice") + require.NoError(t, err) + assert.Nil(t, job) +} + +// ========================================================================== +// PromoteToModels +// ========================================================================== + +// TestPromoteToModels_HappyPath:完整 pipeline。 +func TestPromoteToModels_HappyPath(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + createdAt := time.Now().UTC() + fix.converter.setJob(&ConverterJob{ + JobID: "j1", + Status: "completed", + CreatedAt: createdAt, + SourceFilename: "yolov5s.onnx", + Platform: "720", + }) + fix.ownership.Set("j1", "user-alice") + + res, err := fix.svc.PromoteToModels(context.Background(), "user-alice", "j1", "my-model") + require.NoError(t, err) + require.NotNil(t, res) + assert.NotEmpty(t, res.ModelID) + assert.Equal(t, "converted", res.Source) + assert.Equal(t, "j1", res.SourceJobID) + assert.Equal(t, "my-model", res.Name) + assert.Equal(t, "kl720", res.TargetChip) + assert.Equal(t, "ready", res.Status) + assert.Equal(t, int64(12345), res.FileSize) + + // 驗 storage 真的有寫 + assert.Equal(t, int32(1), fix.storage.putCalls.Load()) + fix.storage.mu.Lock() + expectedKey := fmt.Sprintf("models/user-alice/%s.nef", res.ModelID) + assert.Contains(t, fix.storage.objects, expectedKey) + fix.storage.mu.Unlock() + + // 驗 model store 真的有寫 + rec, _ := fix.models.FindBySourceJobID(context.Background(), "user-alice", "j1") + require.NotNil(t, rec) + assert.Equal(t, res.ModelID, rec.ID) + + // 驗 promote / faa 各被打 1 次 + assert.Equal(t, int32(1), fix.converter.promoteCalls.Load()) + assert.Equal(t, int32(1), fix.faa.getCalls.Load()) +} + +// TestPromoteToModels_DefaultName:caller 傳空 name 應走 fallback `_kl`。 +func TestPromoteToModels_DefaultName(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + fix.converter.setJob(&ConverterJob{ + JobID: "j1", + Status: "completed", + CreatedAt: time.Now(), + SourceFilename: "yolov5s.onnx", + Platform: "520", + }) + fix.ownership.Set("j1", "user-alice") + + res, err := fix.svc.PromoteToModels(context.Background(), "user-alice", "j1", "") + require.NoError(t, err) + assert.Equal(t, "yolov5s_kl520", res.Name) +} + +// TestPromoteToModels_Idempotent:同 jobID 二次 promote 應回既有 model_id(task spec 要求)。 +func TestPromoteToModels_Idempotent(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + fix.converter.setJob(&ConverterJob{ + JobID: "j1", Status: "completed", CreatedAt: time.Now(), + SourceFilename: "x.onnx", Platform: "720", + }) + fix.ownership.Set("j1", "user-alice") + + first, err := fix.svc.PromoteToModels(context.Background(), "user-alice", "j1", "v1") + require.NoError(t, err) + require.NotNil(t, first) + + // 第二次:應該不再打 converter.Promote / faa.GetFile / storage.Put + convPromoteBefore := fix.converter.promoteCalls.Load() + faaCallsBefore := fix.faa.getCalls.Load() + storagePutBefore := fix.storage.putCalls.Load() + + second, err := fix.svc.PromoteToModels(context.Background(), "user-alice", "j1", "v2") + require.NoError(t, err) + require.NotNil(t, second) + assert.Equal(t, first.ModelID, second.ModelID, "二次 promote 應回既有 model_id") + + assert.Equal(t, convPromoteBefore, fix.converter.promoteCalls.Load(), + "二次 promote 不應再打 converter.Promote") + assert.Equal(t, faaCallsBefore, fix.faa.getCalls.Load(), + "二次 promote 不應再打 faa.GetFile") + assert.Equal(t, storagePutBefore, fix.storage.putCalls.Load(), + "二次 promote 不應再寫 storage") +} + +// TestPromoteToModels_JobNotCompleted:job 狀態 != completed → ErrJobNotCompleted(task spec 要求)。 +func TestPromoteToModels_JobNotCompleted(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + fix.converter.setJob(&ConverterJob{ + JobID: "j1", Status: "running", CreatedAt: time.Now(), + }) + fix.ownership.Set("j1", "user-alice") + + _, err := fix.svc.PromoteToModels(context.Background(), "user-alice", "j1", "x") + require.Error(t, err) + assert.True(t, errors.Is(err, ErrJobNotCompleted)) +} + +// TestPromoteToModels_OwnershipMismatch:別 user 的 job → not_found。 +func TestPromoteToModels_OwnershipMismatch(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + fix.converter.setJob(&ConverterJob{ + JobID: "j1", Status: "completed", CreatedAt: time.Now(), + }) + fix.ownership.Set("j1", "user-bob") + + _, err := fix.svc.PromoteToModels(context.Background(), "user-alice", "j1", "x") + require.Error(t, err) + assert.True(t, errors.Is(err, ErrJobNotFound)) +} + +// TestPromoteToModels_FAAError_Propagation:FAA 失敗透傳。 +func TestPromoteToModels_FAAError_Propagation(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + fix.converter.setJob(&ConverterJob{ + JobID: "j1", Status: "completed", CreatedAt: time.Now(), + SourceFilename: "x.onnx", Platform: "720", + }) + fix.ownership.Set("j1", "user-alice") + fix.faa.getFileFunc = func(ctx context.Context, objectKey string) (*FAAFile, error) { + return nil, fmt.Errorf("%w: faa 502", ErrFAAUnavailable) + } + + _, err := fix.svc.PromoteToModels(context.Background(), "user-alice", "j1", "x") + require.Error(t, err) + assert.True(t, errors.Is(err, ErrFAAUnavailable)) + + // model record 不應被建(FAA 失敗在 storage 寫入前) + rec, _ := fix.models.FindBySourceJobID(context.Background(), "user-alice", "j1") + assert.Nil(t, rec) +} + +// TestPromoteToModels_StorageError:storage.Put 失敗 → 包成 ErrStorageUnavailable。 +// +// 對齊 Reviewer M-1:visionA 自家 storage(disk full / S3 5xx / 權限錯誤)失敗 +// 不該被歸類為 FAA 或 converter 問題,避免 SRE alarm 打錯 team / i18n 訊息誤導。 +func TestPromoteToModels_StorageError(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + fix.converter.setJob(&ConverterJob{ + JobID: "j1", Status: "completed", CreatedAt: time.Now(), + SourceFilename: "x.onnx", Platform: "720", + }) + fix.ownership.Set("j1", "user-alice") + fix.storage.putErr = errors.New("disk full") + + _, err := fix.svc.PromoteToModels(context.Background(), "user-alice", "j1", "x") + require.Error(t, err) + assert.True(t, errors.Is(err, ErrStorageUnavailable), + "storage.Put 失敗應歸類為 ErrStorageUnavailable,不是 ErrFAAUnavailable") + // 確認沒被誤包成其他 sentinel + assert.False(t, errors.Is(err, ErrFAAUnavailable), + "storage 失敗不該被歸類為 FAA 問題(Reviewer M-1)") + assert.False(t, errors.Is(err, ErrConverterUnavailable)) + + // model record 不應被建(storage 失敗在 modelStore.Save 前) + rec, _ := fix.models.FindBySourceJobID(context.Background(), "user-alice", "j1") + assert.Nil(t, rec) +} + +// TestPromoteToModels_ModelStoreError:modelStore.Save 失敗 → 包成 ErrModelStoreUnavailable。 +// +// 對齊 Reviewer M-1:visionA 自家 model store 失敗不該被歸類為 converter 問題。 +func TestPromoteToModels_ModelStoreError(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + fix.converter.setJob(&ConverterJob{ + JobID: "j1", Status: "completed", CreatedAt: time.Now(), + SourceFilename: "x.onnx", Platform: "720", + }) + fix.ownership.Set("j1", "user-alice") + fix.models.saveErr = errors.New("postgres connection refused") + + _, err := fix.svc.PromoteToModels(context.Background(), "user-alice", "j1", "x") + require.Error(t, err) + assert.True(t, errors.Is(err, ErrModelStoreUnavailable), + "modelStore.Save 失敗應歸類為 ErrModelStoreUnavailable,不是 ErrConverterUnavailable") + assert.False(t, errors.Is(err, ErrConverterUnavailable), + "modelStore 失敗不該被歸類為 converter 問題(Reviewer M-1)") +} + +// ========================================================================== +// DownloadRedirectURL +// ========================================================================== + +// TestDownloadRedirectURL_HappyPath:URL 組裝正確(task spec 要求)。 +func TestDownloadRedirectURL_HappyPath(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + fix.converter.setJob(&ConverterJob{ + JobID: "j1", Status: "completed", CreatedAt: time.Now(), + }) + fix.ownership.Set("j1", "user-alice") + + url, err := fix.svc.DownloadRedirectURL(context.Background(), "user-alice", "j1") + require.NoError(t, err) + + // FAA base + /files/?access_token= + // key = "models/user-alice/j1.nef",token = "opaque-stub-token-xyz" + assert.Equal(t, + "https://faa.example.com/files/models/user-alice/j1.nef?access_token=opaque-stub-token-xyz", + url, + ) + + // 驗 IssueDelegatedDownload 帶到的參數 + fix.mcToken.mu.Lock() + in := fix.mcToken.lastIssueInput + fix.mcToken.mu.Unlock() + require.NotNil(t, in) + assert.Equal(t, "visiona-tenant", in.TenantID) + assert.Equal(t, "user-alice", in.UserID) + assert.Equal(t, "models/user-alice/j1.nef", in.ObjectKey) + assert.Equal(t, 300, in.ExpiresInSeconds) +} + +// TestDownloadRedirectURL_EscapeSpecialChars:特殊字元的 user_id / job_id 走 escape。 +func TestDownloadRedirectURL_EscapeSpecialChars(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + fix.mcToken.issueDelegatedDownloadFunc = func(ctx context.Context, in IssueDownloadReq) (*DelegatedDownloadToken, error) { + // 模擬 token 含特殊字元 + return &DelegatedDownloadToken{ + Token: "abc def+/=", + ExpiresAt: time.Now().Add(5 * time.Minute), + }, nil + } + // 用合法但帶 special char 的 user_id(OIDC sub 通常不會這樣,但要 defensive) + userID := "user with space" + fix.converter.setJob(&ConverterJob{JobID: "j1", Status: "completed", CreatedAt: time.Now()}) + fix.ownership.Set("j1", userID) + + url, err := fix.svc.DownloadRedirectURL(context.Background(), userID, "j1") + require.NoError(t, err) + // path 段 user_id 應 escape(' ' → %20) + assert.Contains(t, url, "/files/models/user%20with%20space/j1.nef") + // token 段應 query escape('+' / '=' / '/' / ' ') + assert.Contains(t, url, "?access_token=abc+def%2B%2F%3D") +} + +// TestDownloadRedirectURL_OwnershipMismatch:not_found。 +func TestDownloadRedirectURL_OwnershipMismatch(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + fix.converter.setJob(&ConverterJob{JobID: "j1", Status: "completed", CreatedAt: time.Now()}) + fix.ownership.Set("j1", "user-bob") + + _, err := fix.svc.DownloadRedirectURL(context.Background(), "user-alice", "j1") + require.Error(t, err) + assert.True(t, errors.Is(err, ErrJobNotFound)) +} + +// TestDownloadRedirectURL_JobNotCompleted:still running → ErrJobNotCompleted。 +func TestDownloadRedirectURL_JobNotCompleted(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + fix.converter.setJob(&ConverterJob{JobID: "j1", Status: "running", CreatedAt: time.Now()}) + fix.ownership.Set("j1", "user-alice") + + _, err := fix.svc.DownloadRedirectURL(context.Background(), "user-alice", "j1") + require.Error(t, err) + assert.True(t, errors.Is(err, ErrJobNotCompleted)) +} + +// TestDownloadRedirectURL_PromoteError_Propagation:promote 5xx 透傳。 +func TestDownloadRedirectURL_PromoteError_Propagation(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + fix.converter.setJob(&ConverterJob{JobID: "j1", Status: "completed", CreatedAt: time.Now()}) + fix.ownership.Set("j1", "user-alice") + fix.converter.promoteFunc = func(ctx context.Context, jobID string, req PromoteReq) (*ConverterPromoteResult, error) { + return nil, fmt.Errorf("%w: promote 502", ErrConverterUnavailable) + } + + _, err := fix.svc.DownloadRedirectURL(context.Background(), "user-alice", "j1") + require.Error(t, err) + assert.True(t, errors.Is(err, ErrConverterUnavailable)) +} + +// TestDownloadRedirectURL_MCError_Propagation:MC delegated 5xx 透傳。 +func TestDownloadRedirectURL_MCError_Propagation(t *testing.T) { + t.Parallel() + fix := newFlowFixture(t) + + fix.converter.setJob(&ConverterJob{JobID: "j1", Status: "completed", CreatedAt: time.Now()}) + fix.ownership.Set("j1", "user-alice") + fix.mcToken.issueDelegatedDownloadFunc = func(ctx context.Context, in IssueDownloadReq) (*DelegatedDownloadToken, error) { + return nil, fmt.Errorf("%w: mc 5xx", ErrMCTokenUnavailable) + } + + _, err := fix.svc.DownloadRedirectURL(context.Background(), "user-alice", "j1") + require.Error(t, err) + assert.True(t, errors.Is(err, ErrMCTokenUnavailable)) +} + +// ========================================================================== +// helper functions tests +// ========================================================================== + +func TestNormalizeTargetChip(t *testing.T) { + t.Parallel() + cases := []struct { + in, want string + }{ + {"720", "kl720"}, + {"520", "kl520"}, + {"KL630", "kl630"}, + {"kl730", "kl730"}, + {"", ""}, + {" 720 ", "kl720"}, + } + for _, c := range cases { + assert.Equal(t, c.want, normalizeTargetChip(c.in), "input=%q", c.in) + } +} + +func TestDefaultModelName(t *testing.T) { + t.Parallel() + assert.Equal(t, "yolov5s_kl720", defaultModelName(&ConverterJob{ + SourceFilename: "yolov5s.onnx", Platform: "720", + })) + assert.Equal(t, "yolov5s_kl520", defaultModelName(&ConverterJob{ + SourceFilename: "/path/to/yolov5s.onnx", Platform: "520", + })) + // 沒 chip + assert.Equal(t, "x", defaultModelName(&ConverterJob{SourceFilename: "x.tflite"})) + // 沒 stem + assert.Equal(t, "converted_kl720", defaultModelName(&ConverterJob{Platform: "720"})) +} + +func TestEscapeObjectKeyPath(t *testing.T) { + t.Parallel() + assert.Equal(t, "models/user/file.nef", escapeObjectKeyPath("models/user/file.nef")) + // space 在 path 中需 escape + assert.Equal(t, "models/user%20space/file.nef", escapeObjectKeyPath("models/user space/file.nef")) + // '/' 保留(path separator);其他 path-reserved 字元正常 escape + assert.Equal(t, "a%3Fb/c", escapeObjectKeyPath("a?b/c")) + // '+' 在 path 段是 valid,不會被 escape(與 query string 不同) + assert.Equal(t, "a+b/c", escapeObjectKeyPath("a+b/c")) +} + +func TestBuildTargetObjectKey(t *testing.T) { + t.Parallel() + assert.Equal(t, "models/u1/j1.nef", buildTargetObjectKey("u1", "j1")) +} + +func TestBuildStorageKey(t *testing.T) { + t.Parallel() + assert.Equal(t, "models/u1/m1.nef", buildStorageKey("u1", "m1")) +} diff --git a/visionA-backend/internal/conversion/mc_token_client.go b/visionA-backend/internal/conversion/mc_token_client.go new file mode 100644 index 0000000..6f5e966 --- /dev/null +++ b/visionA-backend/internal/conversion/mc_token_client.go @@ -0,0 +1,624 @@ +// MC token client — visionA-backend 對 Member Center 取兩種 token: +// - service token(client_credentials grant):自己呼叫 converter / FAA 用,per-scope cache +// - delegated download token:給 user 換 short-lived FAA download URL(不 cache,每次新簽) +// +// 設計參考: +// - kneron_model_converter/apps/task-scheduler/src/auth/oauthClient.js(Node 版同模式, +// 已在 production 跑過;這裡 Go 版改用 sync.Mutex + DCL,不用 promise dedup) +// - 本檔案搭配 .autoflow/04-architecture/conversion.md §2.4 / §5 / §9.1 retry 矩陣 +// +// 安全: +// - **絕不**把 client_secret / access_token / Authorization header 內容寫進 log +// - 錯誤訊息只揭露 status + 是否 retry,不揭露 server 端細節 +// +// Phase 0.8 conversion (見 .autoflow/04-architecture/conversion.md §2.4 / §5) +package conversion + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "log/slog" + "net/http" + "net/url" + "strings" + "sync" + "time" +) + +// ========================================================================== +// 對外 type / interface +// ========================================================================== + +// MCTokenClient 對 Member Center 取兩種 token。 +// +// 兩個 method 的錯誤處理策略對齊 conversion.md §6: +// +// - `ServiceToken`(打 MC `/oauth/token`,client_credentials grant): +// 401/403 → ErrServiceClientUnauthorized(500/idp_misconfigured 對外) +// 其他 4xx → ErrIDPMisconfigured(500/idp_misconfigured,i18n=idp_misconfig) +// 5xx / network 持續失敗 → ErrIDPUnavailable(503/idp_unavailable,i18n=idp_down) +// +// - `IssueDelegatedDownload`(打 MC `/file-access/download-tokens`): +// 401/403 → ErrServiceClientUnauthorized +// 其他 4xx → ErrDownloadTokenFailed(502/download_token_failed,i18n=token_failed) +// 5xx / network 持續失敗 → ErrMCTokenUnavailable(502/mc_token_unavailable,i18n=token_failed) +// +// 兩 endpoint 的 4xx / 5xx 用不同 sentinel — 因為 §6 的 i18n 訊息設計區分了 +// 「IDP 設定錯誤」「IDP 暫時不可用」「下載授權失敗」「MC 不可達」四種不同的 user-facing 提示 +// (前者引導使用者「聯絡支援」,後者引導「稍後再試」)。 +// +// goroutine-safe:cache 用 sync.Mutex,DCL 確保併發 fetch 只發一次 request。 +type MCTokenClient interface { + // ServiceToken 取一個 access token(client_credentials grant),可 cache 重用。 + // + // scope 範例: + // "converter:job.write converter:job.read files:download.read files:download.delegate" + // (多 scope 用空白分隔,依 RFC 6749 §3.3) + // + // cache 行為(見 §5.2): + // - per-scope cache(不同 scope 各自獨立) + // - 過期判斷:now() >= exp - 15s(提前 15 秒 refresh 避免邊界 race) + // - 失敗不 cache,下一次呼叫會重試 + // - DCL 防併發爆量(100 個 caller 同時要 token,只 fetch 一次) + ServiceToken(ctx context.Context, scope string) (string, error) + + // IssueDelegatedDownload 跟 MC 換 browser 直連 FAA 用的 opaque token。 + // + // 流程: + // 1. 先取 service token(scope=files:download.delegate)— 內部呼 ServiceToken + // 2. POST {issuer}/file-access/download-tokens + // 3. 回 opaque token + 過期時間 + // + // caller 通常是 flow.DownloadRedirectURL,拿到後組 + // https:///files/?access_token= + // 走 server-side 302 redirect 給 browser(見 conversion.md §10.4)。 + IssueDelegatedDownload(ctx context.Context, in IssueDownloadReq) (*DelegatedDownloadToken, error) +} + +// IssueDownloadReq 是 IssueDelegatedDownload 的輸入。 +// +// 欄位來源(trust boundary 見 conversion.md §7): +// - TenantID / UserID / ObjectKey 由 visionA-backend 內部產生(OIDC sub + promote 結果), +// 不接受 client 傳入 +// - ExpiresInSeconds 預設 300(5 分鐘),可在 caller 指定(範圍由 caller 自行檢查) +type IssueDownloadReq struct { + TenantID string + UserID string + ObjectKey string + ExpiresInSeconds int // <= 0 時自動套用預設 300 +} + +// DelegatedDownloadToken 是 MC 簽出來的 short-lived token。 +// +// Token 是 opaque(FAA 收到後再對 MC validate),visionA-backend 不解碼。 +type DelegatedDownloadToken struct { + Token string + ExpiresAt time.Time +} + +// MCTokenClientOpts 是 NewMCTokenClient 的依賴注入。 +// +// HTTPClient / Now / Logger 為 optional(nil 自動填預設)— 方便 unit test 注入 fake。 +type MCTokenClientOpts struct { + // Issuer 是 MC issuer URL(不帶結尾斜線)。 + // 會打: + // POST {Issuer}/oauth/token + // POST {Issuer}/file-access/download-tokens + Issuer string + + // ClientID / ClientSecret 是 visionA service client 在 MC 的註冊資訊。 + // **禁止 commit 進 repo**;由 main.go 從 env var 讀進 config 後注入。 + ClientID string + ClientSecret string + + // HTTPClient 為 optional;nil 用預設(timeout 10s)。測試會注入 httptest.Server.Client()。 + HTTPClient *http.Client + + // Now 為 optional;nil 用 time.Now。測試會注入 fake clock 控制 cache 過期。 + Now func() time.Time + + // Logger 為 optional;nil 用 slog.Default()。 + Logger *slog.Logger +} + +// ========================================================================== +// 內部實作 +// ========================================================================== + +// 內部固定常數(不對外,避免 caller hardcode)。 +const ( + // tokenRefreshSkew 是 cache 過期判斷的緩衝;now() >= exp - skew 視為過期。 + // 15s 對齊 conversion.md §2.4 / §5.2。 + tokenRefreshSkew = 15 * time.Second + + // httpTimeout 是預設 HTTP client timeout(dialer + response 整體)。 + httpTimeout = 10 * time.Second + + // maxRetries 是 5xx / network / timeout 的最大重試次數(不含第一次)。 + // 對齊 conversion.md §9.1:MC oauth/token 與 file-access/download-tokens 都 max 2 次。 + maxRetries = 2 + + // retryBaseDelay 是指數退避的 base(1s, 2s)。 + retryBaseDelay = 1 * time.Second + + // defaultDelegatedTTL 是 IssueDelegatedDownload 預設 TTL(caller 不傳就 300)。 + defaultDelegatedTTL = 300 +) + +// cachedToken 是 ServiceToken cache 內部結構。 +type cachedToken struct { + token string + expiresAt time.Time +} + +// mcTokenClient 是 MCTokenClient 的預設實作。 +// +// 套件內 unexported struct(caller 拿 interface),讓未來換實作不影響 caller。 +type mcTokenClient struct { + issuer string + clientID string + clientSecret string + http *http.Client + now func() time.Time + logger *slog.Logger + + // cache 由 mu 保護;key=scope(multi-scope string 直接當 key, + // 不做 normalize — caller 應傳穩定排序的 scope 字串)。 + mu sync.Mutex // sync.Mutex 比 RWMutex 簡單;fetch 路徑 IO bound,RWMutex 沒有實質好處 + cache map[string]cachedToken +} + +// NewMCTokenClient 建立一個 MCTokenClient 實例。 +// +// 必填:Issuer / ClientID / ClientSecret。其他 optional。 +// 注意:constructor 不會驗 Issuer 連線,第一次 ServiceToken 呼叫才會打網路。 +func NewMCTokenClient(opts MCTokenClientOpts) MCTokenClient { + httpClient := opts.HTTPClient + if httpClient == nil { + httpClient = &http.Client{Timeout: httpTimeout} + } + now := opts.Now + if now == nil { + now = time.Now + } + logger := opts.Logger + if logger == nil { + logger = slog.Default() + } + return &mcTokenClient{ + issuer: strings.TrimRight(opts.Issuer, "/"), + clientID: opts.ClientID, + clientSecret: opts.ClientSecret, + http: httpClient, + now: now, + logger: logger, + cache: make(map[string]cachedToken), + } +} + +// ========================================================================== +// ServiceToken 實作(含 DCL cache) +// ========================================================================== + +// ServiceToken 實作 MCTokenClient.ServiceToken。 +// +// DCL 流程: +// 1. 拿鎖 → 看 cache → 還新鮮就 unlock 後 return(fast path) +// 2. cache 過期 → 持鎖直接 fetch(在鎖內執行 HTTP request) +// +// 鎖內 fetch 的取捨: +// - 優點:實作極簡,無 in-flight Promise / sync.Once dance;併發 100 個 caller 全部 +// 在同一個 mutex 上排隊,第一個 fetch 完寫 cache 後,後續 caller 走 fast path +// - 缺點:fetch 期間(最多 10s timeout + 2 retries = 最壞 ~13s)所有同 scope 的 +// caller 全部 block;不同 scope 因為共用同一個 mu,也會 block(比 per-scope 鎖差) +// +// 為什麼不用 per-scope 鎖: +// - Phase 0.8 同時只用 1-2 個 scope,per-scope 鎖的好處邊際 +// - 簡單性 > 微優化;若未來 profiling 顯示瓶頸再改 sync.Map + per-scope mutex +// +// 為什麼不用 sync.Once: +// - sync.Once 不能 reset(cache 過期後要重 fetch)— 不適用 +func (c *mcTokenClient) ServiceToken(ctx context.Context, scope string) (string, error) { + if scope == "" { + return "", fmt.Errorf("conversion/mc_token_client: scope is required") + } + + c.mu.Lock() + defer c.mu.Unlock() + + // fast path:cache hit 且仍新鮮 + if entry, ok := c.cache[scope]; ok && c.isStillFresh(entry) { + return entry.token, nil + } + + // cache miss / 過期 → fetch(在鎖內執行) + token, exp, err := c.fetchServiceToken(ctx, scope) + if err != nil { + // 失敗不寫 cache;下次重試 + return "", err + } + + c.cache[scope] = cachedToken{ + token: token, + expiresAt: exp, + } + return token, nil +} + +// isStillFresh 判斷 cache entry 是否還能用。 +// 真正的過期時間是 expiresAt - tokenRefreshSkew(提前 15s 視為過期)。 +func (c *mcTokenClient) isStillFresh(entry cachedToken) bool { + if entry.token == "" { + return false + } + return c.now().Before(entry.expiresAt.Add(-tokenRefreshSkew)) +} + +// fetchServiceToken 真正打 MC oauth/token endpoint 取 token。 +// 已 retry 過所有可重試錯誤;回傳 error 時 caller 應視為 fatal(這次取不到)。 +func (c *mcTokenClient) fetchServiceToken(ctx context.Context, scope string) (string, time.Time, error) { + tokenURL := c.issuer + "/oauth/token" + + form := url.Values{} + form.Set("grant_type", "client_credentials") + form.Set("scope", scope) + + body, err := c.doWithRetry(ctx, endpointKindServiceToken, scope, func() (*http.Request, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodPost, tokenURL, + strings.NewReader(form.Encode())) + if err != nil { + return nil, err + } + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + req.Header.Set("Accept", "application/json") + // RFC 6749 §2.3.1 推薦:client credentials 走 Basic auth header(比 body 安全) + req.SetBasicAuth(c.clientID, c.clientSecret) + return req, nil + }) + if err != nil { + return "", time.Time{}, err + } + + // 解析 token endpoint response shape(RFC 6749 §5.1) + var resp struct { + AccessToken string `json:"access_token"` + TokenType string `json:"token_type"` + ExpiresIn int `json:"expires_in"` + Scope string `json:"scope,omitempty"` + } + if err := json.Unmarshal(body, &resp); err != nil { + c.logger.Warn("conversion.mc_token.parse_failed", + slog.String("endpoint", endpointKindServiceToken), + slog.String("scope", scope), + // 不 log body(可能含 access_token),只 log 錯誤訊息 + slog.String("err", truncate(err.Error(), 100))) + // IdP 回了 200 但 body 不是合法 JSON — 視為服務暫時失常(503/idp_unavailable) + return "", time.Time{}, fmt.Errorf("%w: parse service token response: %v", + ErrIDPUnavailable, err) + } + if resp.AccessToken == "" || resp.ExpiresIn <= 0 { + c.logger.Warn("conversion.mc_token.invalid_shape", + slog.String("endpoint", endpointKindServiceToken), + slog.String("scope", scope), + slog.Int("access_token_length", len(resp.AccessToken)), + slog.Int("expires_in", resp.ExpiresIn)) + // IdP 回了 200 但 shape 不對 — 同上視為 503/idp_unavailable + return "", time.Time{}, fmt.Errorf("%w: invalid service token response shape", + ErrIDPUnavailable) + } + + expiresAt := c.now().Add(time.Duration(resp.ExpiresIn) * time.Second) + + // 不 log token 本身;只 log 長度 + 過期時間(給除錯用) + c.logger.Info("conversion.mc_token.obtained", + slog.String("endpoint", endpointKindServiceToken), + slog.String("scope", scope), + slog.Int("expires_in_sec", resp.ExpiresIn), + slog.Int("token_len", len(resp.AccessToken))) + + return resp.AccessToken, expiresAt, nil +} + +// ========================================================================== +// IssueDelegatedDownload 實作 +// ========================================================================== + +// IssueDelegatedDownload 實作 MCTokenClient.IssueDelegatedDownload。 +// +// 流程: +// 1. ServiceToken(ctx, "files:download.delegate") 取 service token +// 2. POST {issuer}/file-access/download-tokens (Bearer) +// 3. 回 opaque token + 過期時間 +// +// 不 cache(每次都新簽)— delegated token TTL 短(5 分鐘預設),cache 沒意義。 +func (c *mcTokenClient) IssueDelegatedDownload(ctx context.Context, in IssueDownloadReq) (*DelegatedDownloadToken, error) { + if in.TenantID == "" || in.UserID == "" || in.ObjectKey == "" { + return nil, fmt.Errorf("conversion/mc_token_client: tenant_id / user_id / object_key required") + } + ttl := in.ExpiresInSeconds + if ttl <= 0 { + ttl = defaultDelegatedTTL + } + + // 1. 取 service token(注意:這個呼叫本身可能 fetch,會走 cache fast path 或 fetch + retry) + // ServiceToken 內部已依 §6 mapping 失敗(ErrServiceClientUnauthorized / ErrIDPMisconfigured / + // ErrIDPUnavailable)— 這裡用 fmt.Errorf("%w") 透傳,不再二次包裝,避免錯誤碼被「升級」成 + // ErrMCTokenUnavailable 而失去原本的 i18n 區分(idp_misconfig vs idp_down)。 + serviceToken, err := c.ServiceToken(ctx, "files:download.delegate") + if err != nil { + return nil, fmt.Errorf("conversion: get service token for delegated download: %w", err) + } + + endpoint := c.issuer + "/file-access/download-tokens" + + reqBody, err := json.Marshal(map[string]any{ + "tenant_id": in.TenantID, + "user_id": in.UserID, + "object_key": in.ObjectKey, + "method": "GET", + "expires_in_seconds": ttl, + }) + if err != nil { + // 本地 marshal 失敗(理論不會發生)— 視為 MC 不可達(502/mc_token_unavailable) + return nil, fmt.Errorf("%w: marshal delegated download request: %v", + ErrMCTokenUnavailable, err) + } + + body, err := c.doWithRetry(ctx, endpointKindDelegatedDownload, in.ObjectKey, func() (*http.Request, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, + strings.NewReader(string(reqBody))) + if err != nil { + return nil, err + } + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Accept", "application/json") + req.Header.Set("Authorization", "Bearer "+serviceToken) + return req, nil + }) + if err != nil { + return nil, err + } + + // MC delegated download token response shape: + // {"token": "", "expires_at": ""} + // 若 MC 改用 expires_in_seconds,這裡 fallback 處理。 + var resp struct { + Token string `json:"token"` + ExpiresAt time.Time `json:"expires_at"` + ExpiresInSeconds int `json:"expires_in_seconds,omitempty"` + } + if err := json.Unmarshal(body, &resp); err != nil { + c.logger.Warn("conversion.mc_token.parse_failed", + slog.String("endpoint", endpointKindDelegatedDownload), + slog.String("err", truncate(err.Error(), 100))) + // MC 回 200 但 body 不是合法 JSON — 視為 MC 不可達(502/mc_token_unavailable) + return nil, fmt.Errorf("%w: parse delegated download response: %v", + ErrMCTokenUnavailable, err) + } + if resp.Token == "" { + c.logger.Warn("conversion.mc_token.invalid_shape", + slog.String("endpoint", endpointKindDelegatedDownload)) + // 同上:shape 不對視為 502/mc_token_unavailable + return nil, fmt.Errorf("%w: invalid delegated download response shape", + ErrMCTokenUnavailable) + } + expiresAt := resp.ExpiresAt + if expiresAt.IsZero() && resp.ExpiresInSeconds > 0 { + expiresAt = c.now().Add(time.Duration(resp.ExpiresInSeconds) * time.Second) + } + if expiresAt.IsZero() { + // 都沒有 → 用 caller 傳入 ttl 推算(best-effort) + expiresAt = c.now().Add(time.Duration(ttl) * time.Second) + } + + c.logger.Info("conversion.mc_token.delegated_obtained", + slog.String("endpoint", endpointKindDelegatedDownload), + slog.Int("ttl_sec", ttl), + slog.Int("token_len", len(resp.Token))) + + return &DelegatedDownloadToken{ + Token: resp.Token, + ExpiresAt: expiresAt, + }, nil +} + +// ========================================================================== +// HTTP 共用:retry / 錯誤分類 +// ========================================================================== + +// endpointKind 常數 — doWithRetry / doOnce 用來區分 4xx/5xx 該映射到哪個 sentinel。 +// +// Phase 0.8 conversion (見 .autoflow/04-architecture/conversion.md §6) +const ( + endpointKindServiceToken = "service_token" // MC /oauth/token + endpointKindDelegatedDownload = "delegated_download" // MC /file-access/download-tokens +) + +// errClient4xx 取得「其他 4xx(非 401/403)」對應的 sentinel error。 +// service_token endpoint → ErrIDPMisconfigured(IDP grant 設定錯誤) +// delegated_download endpoint → ErrDownloadTokenFailed(換下載 token 失敗) +func errClient4xx(endpointKind string) error { + if endpointKind == endpointKindServiceToken { + return ErrIDPMisconfigured + } + return ErrDownloadTokenFailed +} + +// errServer5xxOrNetwork 取得「5xx / network / timeout」對應的 sentinel error。 +// service_token endpoint → ErrIDPUnavailable(認證服務暫時不可用,503) +// delegated_download endpoint → ErrMCTokenUnavailable(MC 不可達,502) +func errServer5xxOrNetwork(endpointKind string) error { + if endpointKind == endpointKindServiceToken { + return ErrIDPUnavailable + } + return ErrMCTokenUnavailable +} + +// doWithRetry 執行一次 HTTP request;遇到 5xx / network / timeout 時依 +// conversion.md §9.1 退避重試。每次 retry 之間檢查 ctx.Done()。 +// +// reqBuilder 是「每次 attempt 都重新建一個 *http.Request」的 closure +// — 因為 request body 可能在 retry 時已被讀完,必須重建。caller 內部用 +// strings.NewReader 等可重建的 body source。 +// +// 4xx 不 retry,直接 mapping 後 return。 +// +// endpointKind 是 log 用的標記("service_token" / "delegated_download")。 +// label 給 log 額外 context(scope or object_key)。 +func (c *mcTokenClient) doWithRetry( + ctx context.Context, + endpointKind, label string, + reqBuilder func() (*http.Request, error), +) ([]byte, error) { + var lastErr error + for attempt := 0; attempt <= maxRetries; attempt++ { + // retry 前檢查 ctx + if attempt > 0 { + select { + case <-ctx.Done(): + // ctx cancel/deadline → 立即 return(不 retry,不包成 ErrMCTokenUnavailable) + return nil, ctx.Err() + case <-time.After(retryBackoff(attempt)): + } + } + + req, err := reqBuilder() + if err != nil { + // 建 request 失敗(例如 URL parse error)— 視為「打不出去」的網路類問題, + // 依 endpoint 種類映射到對應 sentinel。 + return nil, fmt.Errorf("%w: build request: %v", + errServer5xxOrNetwork(endpointKind), err) + } + + body, classifiedErr, retryable := c.doOnce(req, endpointKind, label, attempt) + if classifiedErr == nil { + return body, nil + } + lastErr = classifiedErr + if !retryable { + // 4xx / 401-403 / ctx cancel:直接 return,不再 retry + return nil, classifiedErr + } + // retryable 5xx / network / timeout:繼續下一輪 + } + // 用完 retry 額度 + c.logger.Warn("conversion.mc_token.retry_exhausted", + slog.String("endpoint", endpointKind), + slog.String("label", label), + slog.Int("attempts", maxRetries+1)) + return nil, lastErr +} + +// doOnce 執行一次 HTTP request,回傳 body(成功時)+ 分類好的 error + 是否可重試。 +// +// 回傳 retryable=false 表示 caller 不應 retry: +// - ctx 已 cancel +// - 4xx response(client error,retry 沒用) +// - JSON parse 失敗只在 caller 處理,不在這裡分類 +func (c *mcTokenClient) doOnce( + req *http.Request, + endpointKind, label string, + attempt int, +) (body []byte, err error, retryable bool) { + startedAt := c.now() + res, err := c.http.Do(req) + duration := c.now().Sub(startedAt) + if err != nil { + // network / timeout / context cancel + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + c.logger.Warn("conversion.mc_token.ctx_cancelled", + slog.String("endpoint", endpointKind), + slog.String("label", label), + slog.Int("attempt", attempt+1), + slog.Duration("duration", duration)) + return nil, err, false + } + c.logger.Warn("conversion.mc_token.network_error", + slog.String("endpoint", endpointKind), + slog.String("label", label), + slog.Int("attempt", attempt+1), + slog.Duration("duration", duration), + // err.Error() 不會含 secret(http.Client 錯誤訊息只有 URL + 連線層 errno), + // 但仍 truncate 防 log 爆量 + slog.String("err", truncate(err.Error(), 200))) + return nil, fmt.Errorf("%w: %s network error: %v", + errServer5xxOrNetwork(endpointKind), endpointKind, err), true + } + defer res.Body.Close() + + bodyBytes, readErr := io.ReadAll(res.Body) + if readErr != nil { + c.logger.Warn("conversion.mc_token.body_read_failed", + slog.String("endpoint", endpointKind), + slog.String("label", label), + slog.Int("status", res.StatusCode), + slog.String("err", truncate(readErr.Error(), 200))) + // body read 失敗視為網路問題,可重試(依 endpoint 映射) + return nil, fmt.Errorf("%w: read response body: %v", + errServer5xxOrNetwork(endpointKind), readErr), true + } + + // 成功 2xx + if res.StatusCode >= 200 && res.StatusCode < 300 { + c.logger.Debug("conversion.mc_token.success", + slog.String("endpoint", endpointKind), + slog.String("label", label), + slog.Int("status", res.StatusCode), + slog.Int("attempt", attempt+1), + slog.Duration("duration", duration)) + return bodyBytes, nil, false + } + + // 錯誤分類(不寫 body 進 log — error_description 可能含 client_id / requestId) + c.logger.Warn("conversion.mc_token.endpoint_error", + slog.String("endpoint", endpointKind), + slog.String("label", label), + slog.Int("status", res.StatusCode), + slog.Int("attempt", attempt+1), + slog.Duration("duration", duration)) + + // 401 / 403:client 認證失敗 — 不可重試(重試也會繼續 401) + // 兩個 endpoint 都用同一個 sentinel(caller 可用 errors.Is 做精細處理, + // 例如 cache invalidate;對外仍透過 ErrorCode mask 成 idp_misconfigured/500) + if res.StatusCode == http.StatusUnauthorized || res.StatusCode == http.StatusForbidden { + return nil, fmt.Errorf("%w: %s endpoint returned %d", + ErrServiceClientUnauthorized, endpointKind, res.StatusCode), false + } + + // 其他 4xx:不可重試 — 依 endpoint 種類映射到對應 sentinel: + // service_token → ErrIDPMisconfigured (500/idp_misconfigured) + // delegated_download → ErrDownloadTokenFailed (502/download_token_failed) + if res.StatusCode >= 400 && res.StatusCode < 500 { + return nil, fmt.Errorf("%w: %s endpoint returned %d", + errClient4xx(endpointKind), endpointKind, res.StatusCode), false + } + + // 5xx:可重試 — 依 endpoint 種類映射到對應 sentinel: + // service_token → ErrIDPUnavailable (503/idp_unavailable) + // delegated_download → ErrMCTokenUnavailable (502/mc_token_unavailable) + return nil, fmt.Errorf("%w: %s endpoint returned %d", + errServer5xxOrNetwork(endpointKind), endpointKind, res.StatusCode), true +} + +// retryBackoff 回傳第 n 次 retry(n 從 1 開始)的等待時間。 +// 1 → 1s, 2 → 2s(對齊 conversion.md §9.1) +// +// 不加 jitter — Phase 0.8 預期同時 fetch 的 caller 已被 DCL 收斂到單一執行, +// 不會有大量併發打 MC,jitter 邊際效益低。 +func retryBackoff(attempt int) time.Duration { + if attempt < 1 { + return retryBaseDelay + } + return retryBaseDelay * time.Duration(attempt) +} + +// truncate 把字串截到 max 長度(避免 log 太長)。 +func truncate(s string, max int) string { + if len(s) <= max { + return s + } + return s[:max] + "...(truncated)" +} diff --git a/visionA-backend/internal/conversion/mc_token_client_test.go b/visionA-backend/internal/conversion/mc_token_client_test.go new file mode 100644 index 0000000..282deb7 --- /dev/null +++ b/visionA-backend/internal/conversion/mc_token_client_test.go @@ -0,0 +1,864 @@ +// MC Token Client 單元測試。 +// +// 測試策略: +// - 用 httptest.Server mock MC,accept counter / atomic 驗 retry / cache 行為 +// - 用 fake clock 控制時間(測 cache 過期) +// - 用 silent logger 避免 test 輸出污染(assert 過程仍可 inspect) +// +// 對應 task 規範必含 11 個 case;本檔每個都有對應 test func。 +// +// Phase 0.8 conversion (見 .autoflow/04-architecture/conversion.md §2.4 / §5) +package conversion + +import ( + "context" + "errors" + "fmt" + "io" + "log/slog" + "net/http" + "net/http/httptest" + "net/url" + "strings" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// silentLogger 是 test 用的 no-op logger,避免 test 輸出污染。 +func silentLogger() *slog.Logger { + return slog.New(slog.NewTextHandler(io.Discard, nil)) +} + +// fakeClock 提供可控的時間源;用 atomic 操作 nano 確保 race-free。 +type fakeClock struct { + nano atomic.Int64 // unix nano +} + +func newFakeClock(t time.Time) *fakeClock { + c := &fakeClock{} + c.nano.Store(t.UnixNano()) + return c +} + +func (c *fakeClock) now() time.Time { + return time.Unix(0, c.nano.Load()) +} + +func (c *fakeClock) advance(d time.Duration) { + c.nano.Add(int64(d)) +} + +// ========================================================================== +// mock helpers — 模擬 MC oauth/token + file-access/download-tokens 兩個 endpoint +// ========================================================================== + +// tokenServerOpts 控制 mock server 行為。 +type tokenServerOpts struct { + // expiresIn 是回給 caller 的 expires_in(秒);預設 3600 + expiresIn int + + // statusFn 控制每次 request 的 HTTP status;預設 200 + statusFn func(callIdx int) int + + // tokenFn 控制每次 request 的 access_token 內容;預設 "tok-{idx}" + tokenFn func(callIdx int) string + + // delay 是 server 回應前的等待(測 timeout / cancel 用) + delay time.Duration + + // invalidJSON 為 true 時回非 JSON body(測 parse error) + invalidJSON bool + + // emptyToken 為 true 時回 access_token=""(測 invalid shape) + emptyToken bool +} + +// newTokenServer 建立一個 mock MC server,提供 /oauth/token endpoint。 +// +// 回傳:server URL、call counter(atomic,可用來驗 fetch 次數)、收到的 last form values。 +func newTokenServer(t *testing.T, opts tokenServerOpts) (*httptest.Server, *atomic.Int32, *sync.Map) { + t.Helper() + var counter atomic.Int32 + lastForm := &sync.Map{} // map[int]url.Values,key 是 call idx + + if opts.expiresIn == 0 { + opts.expiresIn = 3600 + } + if opts.statusFn == nil { + opts.statusFn = func(int) int { return 200 } + } + if opts.tokenFn == nil { + opts.tokenFn = func(idx int) string { return fmt.Sprintf("tok-%d", idx) } + } + + mux := http.NewServeMux() + mux.HandleFunc("/oauth/token", func(w http.ResponseWriter, r *http.Request) { + idx := int(counter.Add(1)) - 1 + + // 驗 Basic auth + Content-Type 都對 + if _, _, ok := r.BasicAuth(); !ok { + t.Errorf("oauth/token expected Basic auth header, got none") + } + if !strings.HasPrefix(r.Header.Get("Content-Type"), "application/x-www-form-urlencoded") { + t.Errorf("oauth/token expected form content-type, got %q", r.Header.Get("Content-Type")) + } + + // 解 body 存起來給 test 檢查 + _ = r.ParseForm() + // 拷一份 r.Form 進 sync.Map(r.Form 之後可能被 server 覆寫) + form := url.Values{} + for k, v := range r.Form { + form[k] = append([]string(nil), v...) + } + lastForm.Store(idx, form) + + if opts.delay > 0 { + select { + case <-time.After(opts.delay): + case <-r.Context().Done(): + return + } + } + + status := opts.statusFn(idx) + if status != 200 { + w.WriteHeader(status) + _, _ = w.Write([]byte(`{"error":"server_error"}`)) + return + } + + if opts.invalidJSON { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(``)) + return + } + token := opts.tokenFn(idx) + if opts.emptyToken { + token = "" + } + w.Header().Set("Content-Type", "application/json") + _, _ = fmt.Fprintf(w, `{"access_token":"%s","token_type":"Bearer","expires_in":%d}`, + token, opts.expiresIn) + }) + + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + return srv, &counter, lastForm +} + +// downloadServerOpts 控制 download-tokens mock 行為。 +type downloadServerOpts struct { + tokenStatusFn func(callIdx int) int // /oauth/token 端的 status;預設 200 + downloadStatusFn func(callIdx int) int // /file-access/download-tokens 的 status;預設 200 + + respBody string // /file-access/download-tokens 的回應 body;預設 happy path +} + +// newDownloadServer 同時 mock /oauth/token + /file-access/download-tokens。 +// +// 回傳:server URL、download endpoint call counter、收到的 last download body(解 JSON 後)。 +func newDownloadServer(t *testing.T, opts downloadServerOpts) ( + srv *httptest.Server, + tokenCounter, downloadCounter *atomic.Int32, + lastDownloadBody *string, +) { + t.Helper() + var tCounter, dCounter atomic.Int32 + var bodyMu sync.Mutex + var lastBody string + + if opts.tokenStatusFn == nil { + opts.tokenStatusFn = func(int) int { return 200 } + } + if opts.downloadStatusFn == nil { + opts.downloadStatusFn = func(int) int { return 200 } + } + + mux := http.NewServeMux() + mux.HandleFunc("/oauth/token", func(w http.ResponseWriter, r *http.Request) { + idx := int(tCounter.Add(1)) - 1 + status := opts.tokenStatusFn(idx) + if status != 200 { + w.WriteHeader(status) + return + } + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"access_token":"svc-tok","token_type":"Bearer","expires_in":3600}`)) + }) + mux.HandleFunc("/file-access/download-tokens", func(w http.ResponseWriter, r *http.Request) { + idx := int(dCounter.Add(1)) - 1 + + // 把收到的 body 存起來給 test 驗 shape + body, _ := io.ReadAll(r.Body) + bodyMu.Lock() + lastBody = string(body) + bodyMu.Unlock() + + // 驗 Bearer token 有送 + auth := r.Header.Get("Authorization") + if !strings.HasPrefix(auth, "Bearer ") { + t.Errorf("download endpoint expected Bearer auth, got %q", auth) + } + + status := opts.downloadStatusFn(idx) + if status != 200 { + w.WriteHeader(status) + return + } + body2 := opts.respBody + if body2 == "" { + // happy path: 回一個 future expires_at + body2 = fmt.Sprintf(`{"token":"opaque-tok-%d","expires_at":"%s"}`, + idx, time.Now().UTC().Add(5*time.Minute).Format(time.RFC3339)) + } + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(body2)) + }) + + srv = httptest.NewServer(mux) + t.Cleanup(srv.Close) + + return srv, &tCounter, &dCounter, func() *string { + bodyMu.Lock() + defer bodyMu.Unlock() + s := lastBody + return &s + }() +} + +// newClient 建一個測試用的 mcTokenClient,注入 fake clock 與 silent logger。 +func newClient(srv *httptest.Server, clock *fakeClock) MCTokenClient { + opts := MCTokenClientOpts{ + Issuer: srv.URL, + ClientID: "visiona-svc-id", + ClientSecret: "visiona-svc-secret", + HTTPClient: srv.Client(), + Logger: silentLogger(), + } + if clock != nil { + opts.Now = clock.now + } + return NewMCTokenClient(opts) +} + +// ========================================================================== +// ServiceToken — cache / fetch / retry 系列 +// ========================================================================== + +func TestServiceToken_FirstCall_Fetches(t *testing.T) { + t.Parallel() + + srv, counter, lastForm := newTokenServer(t, tokenServerOpts{}) + c := newClient(srv, nil) + + tok, err := c.ServiceToken(context.Background(), "converter:job.write") + require.NoError(t, err) + assert.Equal(t, "tok-0", tok) + assert.Equal(t, int32(1), counter.Load(), "第一次呼叫應該真的打 MC") + + // 驗 form values 對齊 RFC 6749 §4.4 + if v, ok := lastForm.Load(0); ok { + form := v.(url.Values) + assert.Equal(t, "client_credentials", form.Get("grant_type")) + assert.Equal(t, "converter:job.write", form.Get("scope")) + } else { + t.Fatal("server did not record form") + } +} + +func TestServiceToken_CacheHit(t *testing.T) { + t.Parallel() + + srv, counter, _ := newTokenServer(t, tokenServerOpts{expiresIn: 3600}) + c := newClient(srv, nil) + + scope := "converter:job.write" + tok1, err := c.ServiceToken(context.Background(), scope) + require.NoError(t, err) + tok2, err := c.ServiceToken(context.Background(), scope) + require.NoError(t, err) + tok3, err := c.ServiceToken(context.Background(), scope) + require.NoError(t, err) + + assert.Equal(t, tok1, tok2) + assert.Equal(t, tok2, tok3) + assert.Equal(t, int32(1), counter.Load(), "後續呼叫應走 cache,不打 MC") +} + +func TestServiceToken_Expired_Refetch(t *testing.T) { + t.Parallel() + + clock := newFakeClock(time.Date(2026, 4, 30, 12, 0, 0, 0, time.UTC)) + srv, counter, _ := newTokenServer(t, tokenServerOpts{expiresIn: 60}) // 60s TTL + c := newClient(srv, clock) + + scope := "converter:job.write" + tok1, err := c.ServiceToken(context.Background(), scope) + require.NoError(t, err) + assert.Equal(t, int32(1), counter.Load()) + + // 推進到 exp - skew 之後(60s - 15s = 45s),應視為過期 + clock.advance(46 * time.Second) + tok2, err := c.ServiceToken(context.Background(), scope) + require.NoError(t, err) + assert.NotEqual(t, tok1, tok2, "過期後應拿到新 token") + assert.Equal(t, int32(2), counter.Load(), "過期後應重 fetch") +} + +func TestServiceToken_DifferentScope_DifferentCache(t *testing.T) { + t.Parallel() + + srv, counter, _ := newTokenServer(t, tokenServerOpts{expiresIn: 3600}) + c := newClient(srv, nil) + + tokA1, err := c.ServiceToken(context.Background(), "scope-a") + require.NoError(t, err) + tokB1, err := c.ServiceToken(context.Background(), "scope-b") + require.NoError(t, err) + tokA2, err := c.ServiceToken(context.Background(), "scope-a") + require.NoError(t, err) + tokB2, err := c.ServiceToken(context.Background(), "scope-b") + require.NoError(t, err) + + assert.Equal(t, tokA1, tokA2, "同 scope 應走 cache") + assert.Equal(t, tokB1, tokB2) + assert.NotEqual(t, tokA1, tokB1, "不同 scope 應有不同 token") + assert.Equal(t, int32(2), counter.Load(), "兩個 scope 各 fetch 一次") +} + +// TestServiceToken_Concurrent_OnlyOneFetch — 100 個 goroutine 同時要 token,DCL 確保只 fetch 一次。 +// +// 實作細節:mock server 回應有 50ms delay,確保第一個 fetch 還沒回前所有 caller 都已進來; +// DCL 應讓他們全部 block 在 mu.Lock(),第一個 fetch 完寫 cache 後,後續 caller 走 fast path。 +func TestServiceToken_Concurrent_OnlyOneFetch(t *testing.T) { + t.Parallel() + + srv, counter, _ := newTokenServer(t, tokenServerOpts{ + expiresIn: 3600, + delay: 50 * time.Millisecond, + }) + c := newClient(srv, nil) + + const N = 100 + var wg sync.WaitGroup + wg.Add(N) + tokens := make([]string, N) + errs := make([]error, N) + start := make(chan struct{}) + + for i := 0; i < N; i++ { + go func(idx int) { + defer wg.Done() + <-start + tok, err := c.ServiceToken(context.Background(), "converter:job.write") + tokens[idx] = tok + errs[idx] = err + }(i) + } + close(start) + wg.Wait() + + for _, e := range errs { + require.NoError(t, e) + } + for i := 1; i < N; i++ { + assert.Equal(t, tokens[0], tokens[i], "所有 goroutine 應拿到同一個 token") + } + assert.Equal(t, int32(1), counter.Load(), "DCL 應確保 100 個 caller 只打一次 MC") +} + +func TestServiceToken_Server4xx_NoRetry(t *testing.T) { + t.Parallel() + + srv, counter, _ := newTokenServer(t, tokenServerOpts{ + statusFn: func(int) int { return 401 }, + }) + c := newClient(srv, nil) + + _, err := c.ServiceToken(context.Background(), "converter:job.write") + require.Error(t, err) + assert.True(t, errors.Is(err, ErrServiceClientUnauthorized), + "401 應 mapping 到 ErrServiceClientUnauthorized, got %v", err) + assert.False(t, errors.Is(err, ErrMCTokenUnavailable), + "401 不應同時掛 ErrMCTokenUnavailable") + assert.Equal(t, int32(1), counter.Load(), "401 不應 retry") +} + +func TestServiceToken_Server403_NoRetry(t *testing.T) { + t.Parallel() + + srv, counter, _ := newTokenServer(t, tokenServerOpts{ + statusFn: func(int) int { return 403 }, + }) + c := newClient(srv, nil) + + _, err := c.ServiceToken(context.Background(), "converter:job.write") + require.Error(t, err) + assert.True(t, errors.Is(err, ErrServiceClientUnauthorized)) + assert.Equal(t, int32(1), counter.Load(), "403 不應 retry") +} + +func TestServiceToken_Server400_NoRetry(t *testing.T) { + t.Parallel() + + srv, counter, _ := newTokenServer(t, tokenServerOpts{ + statusFn: func(int) int { return 400 }, + }) + c := newClient(srv, nil) + + _, err := c.ServiceToken(context.Background(), "converter:job.write") + require.Error(t, err) + // §6:MC token endpoint 4xx (非 401/403) → idp_misconfigured / 500 + assert.True(t, errors.Is(err, ErrIDPMisconfigured), + "service_token 4xx 應 mapping 到 ErrIDPMisconfigured(§6), got %v", err) + assert.False(t, errors.Is(err, ErrServiceClientUnauthorized), + "400 不應掛 ErrServiceClientUnauthorized(限 401/403)") + assert.False(t, errors.Is(err, ErrMCTokenUnavailable), + "service_token 4xx 不應掛 ErrMCTokenUnavailable(§6 該 sentinel 限 delegated 5xx 用)") + assert.Equal(t, int32(1), counter.Load(), "400 不應 retry") +} + +func TestServiceToken_Server5xx_Retry(t *testing.T) { + t.Parallel() + + // 前兩次 500、第三次 200 + srv, counter, _ := newTokenServer(t, tokenServerOpts{ + statusFn: func(idx int) int { + if idx < 2 { + return 500 + } + return 200 + }, + }) + + // 把 retryBaseDelay 暫時縮短,避免 test 等太久(用環境變數無法 — 改用 dial-down opts) + // 這裡選擇接受真實 1s + 2s = 3s 的等待(test 內可接受) + c := newClient(srv, nil) + + tok, err := c.ServiceToken(context.Background(), "converter:job.write") + require.NoError(t, err) + assert.Equal(t, "tok-2", tok, "第三次成功的 token") + assert.Equal(t, int32(3), counter.Load(), "5xx 應 retry 兩次後第三次成功") +} + +func TestServiceToken_Server5xx_Exhausted(t *testing.T) { + t.Parallel() + + srv, counter, _ := newTokenServer(t, tokenServerOpts{ + statusFn: func(int) int { return 500 }, + }) + c := newClient(srv, nil) + + _, err := c.ServiceToken(context.Background(), "converter:job.write") + require.Error(t, err) + // §6:MC token endpoint 5xx / network 持續失敗 → idp_unavailable / 503 + assert.True(t, errors.Is(err, ErrIDPUnavailable), + "service_token 連續 5xx 應 mapping 到 ErrIDPUnavailable(§6), got %v", err) + assert.False(t, errors.Is(err, ErrMCTokenUnavailable), + "service_token 5xx 不應掛 ErrMCTokenUnavailable(§6 該 sentinel 限 delegated 5xx 用)") + // 第一次 + 2 次 retry = 3 次 attempt + assert.Equal(t, int32(3), counter.Load(), "5xx 應 attempt 3 次") +} + +func TestServiceToken_ContextCancel_NoRetry(t *testing.T) { + t.Parallel() + + // server 回應有 500ms delay,給我們時間 cancel + srv, counter, _ := newTokenServer(t, tokenServerOpts{ + delay: 500 * time.Millisecond, + }) + c := newClient(srv, nil) + + ctx, cancel := context.WithCancel(context.Background()) + // 50ms 後 cancel(在 server response 之前) + go func() { + time.Sleep(50 * time.Millisecond) + cancel() + }() + + _, err := c.ServiceToken(ctx, "converter:job.write") + require.Error(t, err) + // ctx cancel 在 service_token endpoint: + // - http.Client 端攔到 ctx cancel → 透傳 context.Canceled(不包 sentinel) + // - 透過 fmt.Errorf("%w") 包過 → ErrIDPUnavailable(§6 service_token network 失敗映射) + // 兩者擇一即為合法 + assert.True(t, + errors.Is(err, context.Canceled) || errors.Is(err, ErrIDPUnavailable), + "ctx cancel 應立即 return(context.Canceled 或 ErrIDPUnavailable wrap),got %v", err) + // counter 可能是 1(server 收到了但 client 在等回應時 cancel);不應該 retry + assert.LessOrEqual(t, counter.Load(), int32(1), + "ctx cancel 不應 retry,counter <= 1") +} + +func TestServiceToken_InvalidJSON_TreatedAsError(t *testing.T) { + t.Parallel() + + srv, _, _ := newTokenServer(t, tokenServerOpts{invalidJSON: true}) + c := newClient(srv, nil) + + _, err := c.ServiceToken(context.Background(), "converter:job.write") + require.Error(t, err) + // §6:service_token endpoint 回 200 但 body 不合法 — 視為 IDP 暫時不可用(503/idp_unavailable) + assert.True(t, errors.Is(err, ErrIDPUnavailable), + "service_token JSON parse error 應 mapping 到 ErrIDPUnavailable(§6), got %v", err) +} + +func TestServiceToken_EmptyTokenInResponse_TreatedAsError(t *testing.T) { + t.Parallel() + + srv, _, _ := newTokenServer(t, tokenServerOpts{emptyToken: true}) + c := newClient(srv, nil) + + _, err := c.ServiceToken(context.Background(), "converter:job.write") + require.Error(t, err) + // §6:service_token endpoint shape 不對 — 同 IdP 失常(503/idp_unavailable) + assert.True(t, errors.Is(err, ErrIDPUnavailable), + "空 access_token 應 mapping 到 ErrIDPUnavailable(§6), got %v", err) +} + +func TestServiceToken_FailureNotCached(t *testing.T) { + t.Parallel() + + // 第一次 500 (+2 retry 都 500),第四次(即第二次 ServiceToken 呼叫的第一個 attempt)成功 + var phase atomic.Int32 + srv, counter, _ := newTokenServer(t, tokenServerOpts{ + statusFn: func(idx int) int { + if phase.Load() == 0 { + return 500 + } + return 200 + }, + }) + c := newClient(srv, nil) + + _, err := c.ServiceToken(context.Background(), "converter:job.write") + require.Error(t, err, "第一次預期失敗") + assert.Equal(t, int32(3), counter.Load()) + + // 切換到 success phase + phase.Store(1) + tok, err := c.ServiceToken(context.Background(), "converter:job.write") + require.NoError(t, err, "第二次應成功(之前的失敗不應 cache)") + assert.NotEmpty(t, tok) + assert.Equal(t, int32(4), counter.Load(), "第二次 ServiceToken 應重新打 MC") +} + +// ========================================================================== +// IssueDelegatedDownload 系列 +// ========================================================================== + +func TestIssueDelegatedDownload_Success(t *testing.T) { + t.Parallel() + + srv, _, dCounter, _ := newDownloadServer(t, downloadServerOpts{}) + c := newClient(srv, nil) + + dl, err := c.IssueDelegatedDownload(context.Background(), IssueDownloadReq{ + TenantID: "tenant-x", + UserID: "user-y", + ObjectKey: "promoted/job-1.nef", + ExpiresInSeconds: 600, + }) + require.NoError(t, err) + require.NotNil(t, dl) + assert.Contains(t, dl.Token, "opaque-tok-") + assert.True(t, dl.ExpiresAt.After(time.Now()), "expires_at 應在未來") + assert.Equal(t, int32(1), dCounter.Load()) +} + +// TestIssueDelegatedDownload_RequestBodyShape 驗 POST /file-access/download-tokens 的 body shape +// 對齊 conversion.md §1 + §2.4。 +func TestIssueDelegatedDownload_RequestBodyShape(t *testing.T) { + t.Parallel() + + // 自訂 server 收 body 後驗 shape + var lastBody string + mux := http.NewServeMux() + mux.HandleFunc("/oauth/token", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"access_token":"svc-tok","token_type":"Bearer","expires_in":3600}`)) + }) + mux.HandleFunc("/file-access/download-tokens", func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + lastBody = string(body) + assert.Equal(t, "application/json", r.Header.Get("Content-Type")) + assert.True(t, strings.HasPrefix(r.Header.Get("Authorization"), "Bearer svc-tok"), + "應帶 service token 為 Bearer auth") + w.Header().Set("Content-Type", "application/json") + _, _ = fmt.Fprintf(w, `{"token":"opaque","expires_at":"%s"}`, + time.Now().UTC().Add(5*time.Minute).Format(time.RFC3339)) + }) + srv := httptest.NewServer(mux) + defer srv.Close() + + c := NewMCTokenClient(MCTokenClientOpts{ + Issuer: srv.URL, + ClientID: "id", + ClientSecret: "sec", + HTTPClient: srv.Client(), + Logger: silentLogger(), + }) + + _, err := c.IssueDelegatedDownload(context.Background(), IssueDownloadReq{ + TenantID: "tenant-z", + UserID: "user-a", + ObjectKey: "a/b/c.nef", + ExpiresInSeconds: 300, + }) + require.NoError(t, err) + + // 驗 body shape — JSON 含必要欄位 + assert.Contains(t, lastBody, `"tenant_id":"tenant-z"`) + assert.Contains(t, lastBody, `"user_id":"user-a"`) + assert.Contains(t, lastBody, `"object_key":"a/b/c.nef"`) + assert.Contains(t, lastBody, `"method":"GET"`) + assert.Contains(t, lastBody, `"expires_in_seconds":300`) +} + +func TestIssueDelegatedDownload_DefaultTTL(t *testing.T) { + t.Parallel() + + var lastBody string + mux := http.NewServeMux() + mux.HandleFunc("/oauth/token", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"access_token":"svc-tok","token_type":"Bearer","expires_in":3600}`)) + }) + mux.HandleFunc("/file-access/download-tokens", func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + lastBody = string(body) + w.Header().Set("Content-Type", "application/json") + _, _ = fmt.Fprintf(w, `{"token":"opaque","expires_at":"%s"}`, + time.Now().UTC().Add(5*time.Minute).Format(time.RFC3339)) + }) + srv := httptest.NewServer(mux) + defer srv.Close() + + c := NewMCTokenClient(MCTokenClientOpts{ + Issuer: srv.URL, + ClientID: "id", + ClientSecret: "sec", + HTTPClient: srv.Client(), + Logger: silentLogger(), + }) + + // 不傳 ExpiresInSeconds(=0),應自動套 default 300 + _, err := c.IssueDelegatedDownload(context.Background(), IssueDownloadReq{ + TenantID: "t", + UserID: "u", + ObjectKey: "k", + }) + require.NoError(t, err) + assert.Contains(t, lastBody, `"expires_in_seconds":300`, + "ExpiresInSeconds 為 0 時應 fallback 到 default 300") +} + +func TestIssueDelegatedDownload_Server4xx_PropagateError(t *testing.T) { + t.Parallel() + + srv, _, dCounter, _ := newDownloadServer(t, downloadServerOpts{ + downloadStatusFn: func(int) int { return 400 }, + }) + c := newClient(srv, nil) + + _, err := c.IssueDelegatedDownload(context.Background(), IssueDownloadReq{ + TenantID: "t", + UserID: "u", + ObjectKey: "k", + }) + require.Error(t, err) + // §6:MC delegated download 4xx → download_token_failed / 502 + assert.True(t, errors.Is(err, ErrDownloadTokenFailed), + "delegated 4xx 應 mapping 到 ErrDownloadTokenFailed(§6), got %v", err) + assert.False(t, errors.Is(err, ErrMCTokenUnavailable), + "delegated 4xx 不應掛 ErrMCTokenUnavailable(§6 該 sentinel 限 5xx 用)") + assert.Equal(t, int32(1), dCounter.Load(), "4xx 不應 retry") +} + +func TestIssueDelegatedDownload_Server5xx_RetryThenFail(t *testing.T) { + t.Parallel() + + srv, _, dCounter, _ := newDownloadServer(t, downloadServerOpts{ + downloadStatusFn: func(int) int { return 500 }, + }) + c := newClient(srv, nil) + + _, err := c.IssueDelegatedDownload(context.Background(), IssueDownloadReq{ + TenantID: "t", + UserID: "u", + ObjectKey: "k", + }) + require.Error(t, err) + // §6:MC delegated download 5xx / network 持續失敗 → mc_token_unavailable / 502(不變) + assert.True(t, errors.Is(err, ErrMCTokenUnavailable), + "delegated 5xx 應 mapping 到 ErrMCTokenUnavailable(§6), got %v", err) + assert.False(t, errors.Is(err, ErrDownloadTokenFailed), + "delegated 5xx 不應掛 ErrDownloadTokenFailed(§6 該 sentinel 限 4xx 用)") + assert.Equal(t, int32(3), dCounter.Load(), "5xx 應 attempt 3 次") +} + +func TestIssueDelegatedDownload_Server401_PropagateUnauthorized(t *testing.T) { + t.Parallel() + + srv, _, dCounter, _ := newDownloadServer(t, downloadServerOpts{ + downloadStatusFn: func(int) int { return 401 }, + }) + c := newClient(srv, nil) + + _, err := c.IssueDelegatedDownload(context.Background(), IssueDownloadReq{ + TenantID: "t", + UserID: "u", + ObjectKey: "k", + }) + require.Error(t, err) + assert.True(t, errors.Is(err, ErrServiceClientUnauthorized), + "download 401 應 mapping 到 ErrServiceClientUnauthorized, got %v", err) + assert.Equal(t, int32(1), dCounter.Load(), "401 不應 retry") +} + +func TestIssueDelegatedDownload_ServiceTokenFailure_Propagated(t *testing.T) { + t.Parallel() + + srv, tCounter, dCounter, _ := newDownloadServer(t, downloadServerOpts{ + tokenStatusFn: func(int) int { return 500 }, // service token 完全取不到 + }) + c := newClient(srv, nil) + + _, err := c.IssueDelegatedDownload(context.Background(), IssueDownloadReq{ + TenantID: "t", + UserID: "u", + ObjectKey: "k", + }) + require.Error(t, err) + // §6:失敗源頭是 service_token endpoint 5xx → ErrIDPUnavailable + // IssueDelegatedDownload 用 fmt.Errorf("%w") 透傳,不會升級成 ErrMCTokenUnavailable, + // 確保前端 i18n 能正確顯示「認證服務暫時無法使用」而非「無法取得下載授權」。 + assert.True(t, errors.Is(err, ErrIDPUnavailable), + "service token 5xx 透傳 → ErrIDPUnavailable(§6), got %v", err) + assert.False(t, errors.Is(err, ErrMCTokenUnavailable), + "不應被升級成 ErrMCTokenUnavailable,否則 i18n 訊息會錯") + assert.Equal(t, int32(3), tCounter.Load(), "service token 5xx 應 attempt 3 次") + assert.Equal(t, int32(0), dCounter.Load(), "service token 失敗時不應打 download endpoint") +} + +// TestIssueDelegatedDownload_ServiceTokenAuthFailure_Propagated — service_token 401/403 透傳。 +// +// §6 mapping:401/403 用 ErrServiceClientUnauthorized(對外仍 mask 成 idp_misconfigured/500)。 +// 確認 IssueDelegatedDownload 用 fmt.Errorf("%w") 透傳後,errors.Is 仍能命中。 +func TestIssueDelegatedDownload_ServiceTokenAuthFailure_Propagated(t *testing.T) { + t.Parallel() + + srv, tCounter, dCounter, _ := newDownloadServer(t, downloadServerOpts{ + tokenStatusFn: func(int) int { return 401 }, + }) + c := newClient(srv, nil) + + _, err := c.IssueDelegatedDownload(context.Background(), IssueDownloadReq{ + TenantID: "t", + UserID: "u", + ObjectKey: "k", + }) + require.Error(t, err) + assert.True(t, errors.Is(err, ErrServiceClientUnauthorized), + "service token 401 透傳 → ErrServiceClientUnauthorized(§5.2), got %v", err) + assert.Equal(t, int32(1), tCounter.Load(), "401 不應 retry") + assert.Equal(t, int32(0), dCounter.Load(), "service token 401 時不應打 download endpoint") +} + +// TestIssueDelegatedDownload_ServiceToken4xxNonAuth_Propagated — service_token 400 透傳成 IDP 設定錯誤。 +// +// §6 mapping:service_token 4xx (非 401/403) → ErrIDPMisconfigured(500/idp_misconfigured)。 +// 這是「IDP grant 設定錯」而非「下載授權失敗」— 區分 i18n 訊息。 +func TestIssueDelegatedDownload_ServiceToken4xxNonAuth_Propagated(t *testing.T) { + t.Parallel() + + srv, tCounter, dCounter, _ := newDownloadServer(t, downloadServerOpts{ + tokenStatusFn: func(int) int { return 400 }, + }) + c := newClient(srv, nil) + + _, err := c.IssueDelegatedDownload(context.Background(), IssueDownloadReq{ + TenantID: "t", + UserID: "u", + ObjectKey: "k", + }) + require.Error(t, err) + assert.True(t, errors.Is(err, ErrIDPMisconfigured), + "service token 400 透傳 → ErrIDPMisconfigured(§6), got %v", err) + assert.False(t, errors.Is(err, ErrDownloadTokenFailed), + "不應掛 ErrDownloadTokenFailed(那是 delegated endpoint 4xx 的錯誤碼)") + assert.Equal(t, int32(1), tCounter.Load(), "400 不應 retry") + assert.Equal(t, int32(0), dCounter.Load(), "service token 4xx 時不應打 download endpoint") +} + +func TestIssueDelegatedDownload_RequiredFieldsValidation(t *testing.T) { + t.Parallel() + + c := NewMCTokenClient(MCTokenClientOpts{ + Issuer: "http://localhost:9999", // 不會真的打到 + ClientID: "id", + ClientSecret: "sec", + Logger: silentLogger(), + }) + + cases := []struct { + name string + in IssueDownloadReq + }{ + {"empty_tenant", IssueDownloadReq{UserID: "u", ObjectKey: "k"}}, + {"empty_user", IssueDownloadReq{TenantID: "t", ObjectKey: "k"}}, + {"empty_object_key", IssueDownloadReq{TenantID: "t", UserID: "u"}}, + } + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + _, err := c.IssueDelegatedDownload(context.Background(), tc.in) + require.Error(t, err, "缺必填欄位應 fail-fast") + }) + } +} + +// ========================================================================== +// Constructor / 邊界 +// ========================================================================== + +func TestNewMCTokenClient_NilOptsDefaults(t *testing.T) { + t.Parallel() + + c := NewMCTokenClient(MCTokenClientOpts{ + Issuer: "http://example.com/", + ClientID: "id", + ClientSecret: "sec", + }) + require.NotNil(t, c) + + // 透過 type assertion 檢查預設值有套用(這是內部檢查; + // 平常 caller 不該 assert 內部 struct,但 test 可以) + impl, ok := c.(*mcTokenClient) + require.True(t, ok) + assert.NotNil(t, impl.http, "HTTPClient nil 時應有預設") + assert.NotNil(t, impl.now, "Now nil 時應有預設") + assert.NotNil(t, impl.logger, "Logger nil 時應有預設") + assert.Equal(t, "http://example.com", impl.issuer, "issuer 結尾斜線應被移除") +} + +func TestServiceToken_EmptyScope_ReturnsError(t *testing.T) { + t.Parallel() + + c := NewMCTokenClient(MCTokenClientOpts{ + Issuer: "http://localhost:9999", + ClientID: "id", + ClientSecret: "sec", + Logger: silentLogger(), + }) + + _, err := c.ServiceToken(context.Background(), "") + require.Error(t, err) + assert.Contains(t, err.Error(), "scope is required") +} diff --git a/visionA-backend/internal/conversion/ownership.go b/visionA-backend/internal/conversion/ownership.go new file mode 100644 index 0000000..72d9e57 --- /dev/null +++ b/visionA-backend/internal/conversion/ownership.go @@ -0,0 +1,314 @@ +// Ownership store — visionA-backend 對 conversion job 的擁有權追蹤。 +// +// 動機: +// - converter 端只認 user_id(OIDC sub),不認 visionA 的 OIDC cookie session +// - visionA-backend 處於 trust boundary,每個 GET / promote / download / promote-to-models +// 都必須先檢查「這個 jobID 是不是當前 userID 的」,不符 → 403 forbidden +// - 對齊 conversion.md §7.2 ownership 檢查 + §2.6.1 lazy rebuild +// +// 設計: +// - in-memory map:job_id → user_id +// - 重啟即失(接受的取捨;MVP 階段 — 見 conversion.md §9.2 graceful degradation) +// - 重啟後第一次某 user 進 GET /api/conversion/active 或 GET /{job_id} → +// 從 converter 的 GET /api/v1/jobs?user_id=&status=in_progress 拿 in-progress jobs +// 重建該 user 的 ownership(lazy rebuild,不啟動時 batch) +// +// 為什麼 lazy 而非 startup batch: +// - startup batch 對 converter 是 hammer(重啟頻繁時尤甚),且大部分 jobs 重啟期間 +// 使用者根本沒在等 +// - lazy 的 cost 對應 user 行為,cost 上限 = 線上同時在 /conversion 頁面的 user 數 +// +// Phase 1 follow-up: +// - DB-backed(Postgres / Redis)讓重啟不失資料 — progress.md 已記 +// - 加 user → []job_id 的反向索引,ActiveJobOf O(1) +// +// Phase 0.8 conversion (見 .autoflow/04-architecture/conversion.md §2.6.1) +package conversion + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "errors" + "log/slog" + "sync" + "time" +) + +// ========================================================================== +// 對外 interface +// ========================================================================== + +// Ownership 是 visionA-backend 對 conversion job 的擁有權追蹤。 +// +// In-memory map: job_id → user_id。重啟即失,需 lazy rebuild: +// 第一次某 user 進 GET /api/conversion/{id} 或 /active 時,若 cache 沒 +// 該 user 任何項,從 converter list 拿 in_progress jobs 重建。 +// +// goroutine-safe: +// - jobToUser map 用 RWMutex(高頻 Get / 低頻 Set / Delete) +// - rebuilt 用 per-user mutex(DCL pattern,仿 mc_token_client 但不同 user 不互相阻塞) +type Ownership interface { + // Set 註冊 job 屬於 user(init 完成時呼叫)。 + Set(jobID, userID string) + + // Get 查 job 屬於誰;不在 cache 回 ("", false)。 + Get(jobID string) (userID string, ok bool) + + // Delete 從 cache 移除(reset / job expired 時)。 + Delete(jobID string) + + // EnsureRebuilt 確保該 user 的 ownership 已從 converter rebuild 過。 + // + // 第一次呼叫該 user 時,從 converter list 拿所有 in_progress jobs 寫進 cache。 + // 後續呼叫該 user 是 noop(fast path)。 + // + // 失敗處理: + // - converter 5xx / network → 回傳 error,不標 rebuilt(下次重試) + // - ctx cancel → 立即 return ctx.Err() + // - rebuild 內部對 converter 的呼叫帶 5s timeout(用 context.WithTimeout 包裝 + // ctx),避免單一 caller 無限阻塞同 user 其他 caller + EnsureRebuilt(ctx context.Context, userID string) error + + // ActiveJobOf 列出該 user 目前有的 active job_id(從 cache,不重新 rebuild)。 + // + // caller 應先呼叫 EnsureRebuilt 確保 cache 有資料。 + // + // Phase 0.8 同 user 同時最多 1 個 active job,return slice 通常是 0 or 1。 + // Phase 1 加反向 user → []job_id 索引讓這變成 O(1)。 + ActiveJobOf(userID string) []string +} + +// ========================================================================== +// 內部常數 +// ========================================================================== + +const ( + // rebuildTimeout 是單次 rebuild 對 converter 呼叫的 timeout 上限。 + // 防止某個 caller 卡死同 user 其他 caller(per-user mutex 的 head-of-line blocking)。 + // 對齊 conversion.md §9.1 retry 矩陣的 list endpoint:max 1 retry + 0.5s 退避, + // 加上 converter HTTP timeout 10s,最壞約 ~10.5s — 設 5s 是因為 lazy rebuild 在 + // frontend pre-check 路徑上,UX 可接受失敗 + retry。caller (flow.go) 失敗時會 fallback。 + rebuildTimeout = 5 * time.Second +) + +// ========================================================================== +// 預設實作 +// ========================================================================== + +// ownership 是 Ownership 的 in-memory 實作。 +// +// 兩組鎖分離: +// - mu: 保護 jobToUser map(Set/Get/Delete/ActiveJobOf) +// - perUserLocks: 每個 user 一把 mutex,避免 EnsureRebuilt 互相阻塞 +// +// 為什麼不用單一全域 rebuiltMu: +// - 全域 rebuiltMu 會讓 user A 的 rebuild block user B 的 rebuild(rebuild 包 5s +// timeout,最壞 100 user 同時進來變 500s 排隊) +// - per-user mutex 用 sync.Map 自動 lazy-init;不同 user 並行進 rebuild 互不干擾 +type ownership struct { + mu sync.RWMutex + jobToUser map[string]string // job_id → user_id + + // perUserLocks: user_id → *sync.Mutex + // 用 sync.Map 自動處理 lazy init + 移除(Phase 0.8 不主動 evict — user 量級小) + perUserLocks sync.Map + + // rebuilt: user_id → 已 rebuild 過? + // 用獨立 mutex 而非 sync.Map.LoadOrStore — 因為 set 與 fetch converter 必須原子 + // (fetch 失敗不 set),sync.Map 的 LoadOrStore 不適合這個語意 + rebuiltMu sync.RWMutex + rebuilt map[string]bool + + converter ConverterClient + logger *slog.Logger +} + +// NewOwnership 建立一個 Ownership 實例。 +// +// converter 必填(lazy rebuild 依賴);logger 為 optional,nil 用 slog.Default()。 +func NewOwnership(converter ConverterClient, logger *slog.Logger) Ownership { + if logger == nil { + logger = slog.Default() + } + return &ownership{ + jobToUser: make(map[string]string), + rebuilt: make(map[string]bool), + converter: converter, + logger: logger, + } +} + +// ========================================================================== +// Set / Get / Delete — RWMutex 標準 map 保護 +// ========================================================================== + +// Set 寫入 ownership;空字串視為 no-op(防呆)。 +func (o *ownership) Set(jobID, userID string) { + if jobID == "" || userID == "" { + return + } + o.mu.Lock() + o.jobToUser[jobID] = userID + o.mu.Unlock() +} + +// Get 讀取 ownership;不存在回 ("", false)。 +func (o *ownership) Get(jobID string) (string, bool) { + if jobID == "" { + return "", false + } + o.mu.RLock() + userID, ok := o.jobToUser[jobID] + o.mu.RUnlock() + return userID, ok +} + +// Delete 移除 ownership;不存在 no-op。 +func (o *ownership) Delete(jobID string) { + if jobID == "" { + return + } + o.mu.Lock() + delete(o.jobToUser, jobID) + o.mu.Unlock() +} + +// ========================================================================== +// EnsureRebuilt — DCL + per-user mutex +// ========================================================================== + +// EnsureRebuilt 確保 userID 的 ownership 已從 converter rebuild。 +// +// DCL(double-checked locking)流程: +// 1. fast path:先用 RLock 看 rebuilt[userID],已 rebuild → 直接 return nil +// 2. 取該 user 的 per-user mutex(不同 user 並行;同 user 序列化) +// 3. slow path:拿 mutex 後再次 check rebuilt[userID](其他 caller 可能剛 rebuild 完) +// 4. 真正 fetch converter(帶 rebuildTimeout) +// 5. 成功 → 寫 jobToUser + 標 rebuilt[userID]=true +// 6. 失敗 → 不標,下次重試 +// +// 為什麼 fast path 不直接 return:sync.Map.Load 比 RWMutex.RLock 快但 race 條件需小心; +// 這裡用 RWMutex 對 rebuilt map 一致 protect(與 slow path 寫入互斥)。 +func (o *ownership) EnsureRebuilt(ctx context.Context, userID string) error { + if userID == "" { + return errors.New("conversion/ownership: userID is required") + } + + // fast path + o.rebuiltMu.RLock() + done := o.rebuilt[userID] + o.rebuiltMu.RUnlock() + if done { + return nil + } + + // 取該 user 的 per-user mutex(lazy init via sync.Map) + mu := o.lockForUser(userID) + mu.Lock() + defer mu.Unlock() + + // 進入 critical section 前再 check ctx(caller 可能已 cancel) + if err := ctx.Err(); err != nil { + return err + } + + // slow path 內部再 check(其他 caller 可能剛 rebuild 完) + o.rebuiltMu.RLock() + done = o.rebuilt[userID] + o.rebuiltMu.RUnlock() + if done { + return nil + } + + // 真正 fetch converter(帶 rebuild timeout,避免單 caller 無限阻塞同 user 其他 caller) + fetchCtx, cancel := context.WithTimeout(ctx, rebuildTimeout) + defer cancel() + + jobs, err := o.converter.ListInProgressJobs(fetchCtx, userID) + if err != nil { + // 失敗不標 rebuilt — 下次重試 + o.logger.WarnContext(ctx, "ownership: lazy rebuild failed", + slog.String("user_hash", hashUserID(userID)), + slog.String("err", err.Error()), + ) + return err + } + + // 寫入 jobToUser(拿 jobToUser 的 write lock) + o.mu.Lock() + for _, j := range jobs { + if j == nil || j.JobID == "" { + continue + } + o.jobToUser[j.JobID] = userID + } + o.mu.Unlock() + + // 標 rebuilt + o.rebuiltMu.Lock() + o.rebuilt[userID] = true + o.rebuiltMu.Unlock() + + o.logger.InfoContext(ctx, "ownership: lazy rebuild done", + slog.String("user_hash", hashUserID(userID)), + slog.Int("jobs_found", len(jobs)), + ) + return nil +} + +// lockForUser 取(或 lazy 建立)該 user 的 mutex。 +// +// 用 sync.Map.LoadOrStore:併發 100 個 goroutine 同時對同 user 取 mutex, +// LoadOrStore 保證所有 goroutine 拿到同一個 *sync.Mutex 實例(其他丟棄)。 +func (o *ownership) lockForUser(userID string) *sync.Mutex { + if existing, ok := o.perUserLocks.Load(userID); ok { + return existing.(*sync.Mutex) + } + // LoadOrStore:若不存在則寫入新建的,回傳現存或新建的; + // loaded=true 代表已有他人寫入,我們新建的這把丟棄 + actual, _ := o.perUserLocks.LoadOrStore(userID, &sync.Mutex{}) + return actual.(*sync.Mutex) +} + +// ========================================================================== +// ActiveJobOf — 反查 jobToUser +// ========================================================================== + +// ActiveJobOf 從 jobToUser map 反查 user 擁有的 jobID 清單。 +// +// O(N) 掃描;Phase 0.8 同 user 最多 1 active job、整體 jobToUser 規模也不大(內部 +// 使用者 < 100 並發),可接受。Phase 1 加反向索引變 O(1)。 +// +// caller 應先呼叫 EnsureRebuilt(這裡不主動 rebuild,避免雙寫競態)。 +func (o *ownership) ActiveJobOf(userID string) []string { + if userID == "" { + return nil + } + o.mu.RLock() + defer o.mu.RUnlock() + + // 預先 alloc 0 cap 的 slice — 大多數 user 是 0 或 1 個 job + result := make([]string, 0, 1) + for jobID, uid := range o.jobToUser { + if uid == userID { + result = append(result, jobID) + } + } + return result +} + +// ========================================================================== +// helpers +// ========================================================================== + +// hashUserID 對 user_id 做 SHA-256 取前 8 hex char,給 log 用(PII 保護)。 +// +// 不存原始 user_id 進 log,避免 log file 洩漏 OIDC sub。 +func hashUserID(userID string) string { + if userID == "" { + return "" + } + sum := sha256.Sum256([]byte(userID)) + return hex.EncodeToString(sum[:])[:8] +} diff --git a/visionA-backend/internal/conversion/ownership_test.go b/visionA-backend/internal/conversion/ownership_test.go new file mode 100644 index 0000000..57ff641 --- /dev/null +++ b/visionA-backend/internal/conversion/ownership_test.go @@ -0,0 +1,631 @@ +// Ownership store 單元測試。 +// +// 測試策略: +// - Set/Get/Delete 用 race detector 驗 concurrent safety +// - EnsureRebuilt 用 stub ConverterClient(atomic counter 紀錄 fetch 次數) +// 驗:first-call fetches / second-call noop / per-user 並行 / thundering herd 收斂 +// - 失敗路徑驗:error 不標 rebuilt → 下次再 fetch +// +// Phase 0.8 conversion (見 .autoflow/04-architecture/conversion.md §2.6.1) +package conversion + +import ( + "context" + "errors" + "fmt" + "io" + "log/slog" + "strconv" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// ========================================================================== +// stub ConverterClient — 只實作 ListInProgressJobs,其他 panic(測試不用) +// ========================================================================== + +// stubConverterClient 是 test 用的 fake ConverterClient。 +// +// 只實作 ListInProgressJobs(其他 method 測試不用,panic 防呆);用 atomic counter +// 紀錄各 user 被呼叫次數。 +type stubConverterClient struct { + mu sync.Mutex + + // jobsByUser: user_id → 該 user 的 in_progress jobs(若 nil → 空 slice) + jobsByUser map[string][]*ConverterJob + + // errByUser: user_id → 強制回傳的錯誤(用在失敗路徑測試) + errByUser map[string]error + + // callCountByUser: user_id → ListInProgressJobs 被呼叫次數(atomic counter) + callCountByUser sync.Map // map[string]*atomic.Int32 + + // fetchDelay 模擬慢 fetch(讓併發測試有機會競態) + fetchDelay time.Duration + + // blockSignal 若非 nil,每次 ListInProgressJobs 進入時發 signal(用在 timeout 測試) + blockSignal chan struct{} + // blockUntil 若非 nil,會 block 在 ctx.Done 或這個 channel 任一觸發 + blockUntil chan struct{} +} + +func newStubConverterClient() *stubConverterClient { + return &stubConverterClient{ + jobsByUser: make(map[string][]*ConverterJob), + errByUser: make(map[string]error), + } +} + +func (s *stubConverterClient) setJobs(userID string, jobs []*ConverterJob) { + s.mu.Lock() + defer s.mu.Unlock() + s.jobsByUser[userID] = jobs +} + +func (s *stubConverterClient) setError(userID string, err error) { + s.mu.Lock() + defer s.mu.Unlock() + s.errByUser[userID] = err +} + +// callCount 取某個 user 被呼叫的次數。 +func (s *stubConverterClient) callCount(userID string) int32 { + v, ok := s.callCountByUser.Load(userID) + if !ok { + return 0 + } + return v.(*atomic.Int32).Load() +} + +func (s *stubConverterClient) ListInProgressJobs(ctx context.Context, userID string) ([]*ConverterJob, error) { + // atomic counter + cnt, _ := s.callCountByUser.LoadOrStore(userID, &atomic.Int32{}) + cnt.(*atomic.Int32).Add(1) + + // 通知 caller 已進入(給 thundering herd 測試用) + if s.blockSignal != nil { + select { + case s.blockSignal <- struct{}{}: + default: + } + } + + // 若有 blockUntil,等到 signal 或 ctx.Done 才 return(模擬慢 / cancel) + if s.blockUntil != nil { + select { + case <-s.blockUntil: + case <-ctx.Done(): + return nil, ctx.Err() + } + } + + if s.fetchDelay > 0 { + select { + case <-time.After(s.fetchDelay): + case <-ctx.Done(): + return nil, ctx.Err() + } + } + + s.mu.Lock() + err := s.errByUser[userID] + jobs := s.jobsByUser[userID] + s.mu.Unlock() + + if err != nil { + return nil, err + } + if jobs == nil { + jobs = []*ConverterJob{} + } + return jobs, nil +} + +// 其他 method panic(測試不會呼叫,撞到 panic 反而好 debug)。 +func (s *stubConverterClient) InitJob(ctx context.Context, req InitConverterJobReq) (*ConverterJob, error) { + panic("stubConverterClient.InitJob: not used in ownership_test") +} +func (s *stubConverterClient) GetJob(ctx context.Context, jobID string) (*ConverterJob, error) { + panic("stubConverterClient.GetJob: not used in ownership_test") +} +func (s *stubConverterClient) Promote(ctx context.Context, jobID string, req PromoteReq) (*ConverterPromoteResult, error) { + panic("stubConverterClient.Promote: not used in ownership_test") +} + +// 確保 stubConverterClient 滿足 ConverterClient interface(編譯期驗)。 +var _ ConverterClient = (*stubConverterClient)(nil) + +// ========================================================================== +// helper:建立靜默 logger(避免測試 stdout 噪音) +// ========================================================================== + +func newSilentLogger() *slog.Logger { + return slog.New(slog.NewTextHandler(io.Discard, nil)) +} + +// ========================================================================== +// 基本 Set / Get / Delete +// ========================================================================== + +// TestSet_Get_Delete_Basic:write / read / delete 標準操作。 +func TestSet_Get_Delete_Basic(t *testing.T) { + t.Parallel() + + stub := newStubConverterClient() + o := NewOwnership(stub, newSilentLogger()) + + // Set + Get + o.Set("job-1", "alice") + uid, ok := o.Get("job-1") + assert.True(t, ok) + assert.Equal(t, "alice", uid) + + // 覆寫 + o.Set("job-1", "bob") + uid, _ = o.Get("job-1") + assert.Equal(t, "bob", uid, "Set 同 jobID 應覆寫") + + // Delete + o.Delete("job-1") + _, ok = o.Get("job-1") + assert.False(t, ok, "Delete 後 Get 應回 false") + + // 不存在的 jobID + _, ok = o.Get("ghost") + assert.False(t, ok) + + // 防呆:空字串不寫入 + o.Set("", "alice") + o.Set("job-empty-uid", "") + _, ok = o.Get("") + assert.False(t, ok) + _, ok = o.Get("job-empty-uid") + assert.False(t, ok, "空 userID 不應寫入") +} + +// TestDelete_RemovesFromCache:Delete 後 Get 回 false(規範必含)。 +func TestDelete_RemovesFromCache(t *testing.T) { + t.Parallel() + + stub := newStubConverterClient() + o := NewOwnership(stub, newSilentLogger()) + + o.Set("job-1", "alice") + o.Delete("job-1") + _, ok := o.Get("job-1") + assert.False(t, ok) + + // 重複 Delete 不該 panic + o.Delete("job-1") + o.Delete("never-existed") +} + +// TestSet_Concurrent:100 goroutine 同時 Set 不同 job → race detector 通過。 +// +// 規範必含:跑 go test -race -count=3 必綠。 +func TestSet_Concurrent(t *testing.T) { + t.Parallel() + + stub := newStubConverterClient() + o := NewOwnership(stub, newSilentLogger()) + + const N = 100 + var wg sync.WaitGroup + wg.Add(N) + for i := 0; i < N; i++ { + go func(idx int) { + defer wg.Done() + jobID := "job-" + strconv.Itoa(idx) + userID := "user-" + strconv.Itoa(idx%10) // 10 種 user + o.Set(jobID, userID) + // 立即 Get 驗 not lost + uid, ok := o.Get(jobID) + assert.True(t, ok) + assert.Equal(t, userID, uid) + }(i) + } + wg.Wait() + + // 驗 100 個都進去了 + for i := 0; i < N; i++ { + jobID := "job-" + strconv.Itoa(i) + _, ok := o.Get(jobID) + assert.True(t, ok) + } +} + +// TestSet_Get_Delete_Concurrent_Mixed:併發 mixed write/read/delete,race detector 驗。 +func TestSet_Get_Delete_Concurrent_Mixed(t *testing.T) { + t.Parallel() + + stub := newStubConverterClient() + o := NewOwnership(stub, newSilentLogger()) + + const N = 50 + var wg sync.WaitGroup + wg.Add(N * 3) + + for i := 0; i < N; i++ { + jobID := "job-" + strconv.Itoa(i) + go func() { defer wg.Done(); o.Set(jobID, "alice") }() + go func() { defer wg.Done(); _, _ = o.Get(jobID) }() + go func() { defer wg.Done(); o.Delete(jobID) }() + } + wg.Wait() + // 不驗結果(race 驗 deadlock / 共享 state corruption 即可) +} + +// ========================================================================== +// EnsureRebuilt +// ========================================================================== + +// TestEnsureRebuilt_FirstCall_Fetches:第一次該 user 真的打 converter(規範必含)。 +func TestEnsureRebuilt_FirstCall_Fetches(t *testing.T) { + t.Parallel() + + stub := newStubConverterClient() + stub.setJobs("alice", []*ConverterJob{ + {JobID: "j-1", Status: "running"}, + }) + + o := NewOwnership(stub, newSilentLogger()) + + err := o.EnsureRebuilt(context.Background(), "alice") + require.NoError(t, err) + assert.Equal(t, int32(1), stub.callCount("alice"), "首次應打 converter 1 次") + + // 驗 jobToUser 已寫入 + uid, ok := o.Get("j-1") + assert.True(t, ok) + assert.Equal(t, "alice", uid) +} + +// TestEnsureRebuilt_SecondCall_NoOp:第二次該 user noop(atomic counter 驗,規範必含)。 +func TestEnsureRebuilt_SecondCall_NoOp(t *testing.T) { + t.Parallel() + + stub := newStubConverterClient() + stub.setJobs("alice", []*ConverterJob{{JobID: "j-1"}}) + + o := NewOwnership(stub, newSilentLogger()) + + require.NoError(t, o.EnsureRebuilt(context.Background(), "alice")) + require.NoError(t, o.EnsureRebuilt(context.Background(), "alice")) + require.NoError(t, o.EnsureRebuilt(context.Background(), "alice")) + + assert.Equal(t, int32(1), stub.callCount("alice"), + "成功 rebuild 後同 user 後續呼叫應 noop") +} + +// TestEnsureRebuilt_DifferentUsers_EachFetch:不同 user 各自 fetch 一次(規範必含)。 +func TestEnsureRebuilt_DifferentUsers_EachFetch(t *testing.T) { + t.Parallel() + + stub := newStubConverterClient() + stub.setJobs("alice", []*ConverterJob{{JobID: "j-a"}}) + stub.setJobs("bob", []*ConverterJob{{JobID: "j-b"}}) + stub.setJobs("carol", []*ConverterJob{}) + + o := NewOwnership(stub, newSilentLogger()) + + require.NoError(t, o.EnsureRebuilt(context.Background(), "alice")) + require.NoError(t, o.EnsureRebuilt(context.Background(), "bob")) + require.NoError(t, o.EnsureRebuilt(context.Background(), "carol")) + + assert.Equal(t, int32(1), stub.callCount("alice")) + assert.Equal(t, int32(1), stub.callCount("bob")) + assert.Equal(t, int32(1), stub.callCount("carol")) + + // 二次呼叫 noop + require.NoError(t, o.EnsureRebuilt(context.Background(), "alice")) + require.NoError(t, o.EnsureRebuilt(context.Background(), "bob")) + assert.Equal(t, int32(1), stub.callCount("alice")) + assert.Equal(t, int32(1), stub.callCount("bob")) +} + +// TestEnsureRebuilt_Concurrent_OnlyOneFetch:同 user 100 goroutine 同時 EnsureRebuilt +// → atomic counter 驗只 fetch 一次(規範必含 — thundering herd 收斂)。 +func TestEnsureRebuilt_Concurrent_OnlyOneFetch(t *testing.T) { + t.Parallel() + + stub := newStubConverterClient() + stub.setJobs("alice", []*ConverterJob{{JobID: "j-1"}}) + stub.fetchDelay = 50 * time.Millisecond // 故意讓 fetch 慢,放大 race window + + o := NewOwnership(stub, newSilentLogger()) + + const N = 100 + var wg sync.WaitGroup + wg.Add(N) + errs := make(chan error, N) + for i := 0; i < N; i++ { + go func() { + defer wg.Done() + if err := o.EnsureRebuilt(context.Background(), "alice"); err != nil { + errs <- err + } + }() + } + wg.Wait() + close(errs) + + for err := range errs { + t.Errorf("EnsureRebuilt 失敗: %v", err) + } + + assert.Equal(t, int32(1), stub.callCount("alice"), + "同 user 100 個併發 caller 應只 fetch 1 次(DCL 收斂)") +} + +// TestEnsureRebuilt_Concurrent_DifferentUsers_NotBlocked:不同 user 並行 rebuild +// 互不阻塞(per-user mutex 設計驗證)。 +func TestEnsureRebuilt_Concurrent_DifferentUsers_NotBlocked(t *testing.T) { + t.Parallel() + + stub := newStubConverterClient() + stub.fetchDelay = 200 * time.Millisecond + + const N = 10 + for i := 0; i < N; i++ { + stub.setJobs("u-"+strconv.Itoa(i), []*ConverterJob{}) + } + + o := NewOwnership(stub, newSilentLogger()) + + start := time.Now() + var wg sync.WaitGroup + wg.Add(N) + for i := 0; i < N; i++ { + uid := "u-" + strconv.Itoa(i) + go func() { + defer wg.Done() + _ = o.EnsureRebuilt(context.Background(), uid) + }() + } + wg.Wait() + elapsed := time.Since(start) + + // 若 per-user mutex 失效退化成全域鎖:N=10 * 200ms = 2s + // 並行情況:應該接近單次 fetch 200ms(加上少量 schedule overhead) + // 用 1s 當判斷線(給 CI 足夠寬裕) + assert.Less(t, elapsed, time.Second, + "不同 user rebuild 應並行(per-user mutex),elapsed=%v", elapsed) +} + +// TestEnsureRebuilt_ConverterError_NotMarkedRebuilt:converter 5xx → 不標 rebuilt +// → 下次再 fetch(規範必含)。 +func TestEnsureRebuilt_ConverterError_NotMarkedRebuilt(t *testing.T) { + t.Parallel() + + stub := newStubConverterClient() + stub.setError("alice", ErrConverterUnavailable) + + o := NewOwnership(stub, newSilentLogger()) + + // 第一次 fetch 失敗 + err := o.EnsureRebuilt(context.Background(), "alice") + require.Error(t, err) + assert.True(t, errors.Is(err, ErrConverterUnavailable)) + assert.Equal(t, int32(1), stub.callCount("alice")) + + // 第二次仍會 fetch(不標 rebuilt) + err = o.EnsureRebuilt(context.Background(), "alice") + require.Error(t, err) + assert.Equal(t, int32(2), stub.callCount("alice"), + "上次失敗後應再次 fetch") + + // 第三次成功 → 後續才會 noop + stub.setError("alice", nil) + stub.setJobs("alice", []*ConverterJob{{JobID: "j-1"}}) + require.NoError(t, o.EnsureRebuilt(context.Background(), "alice")) + assert.Equal(t, int32(3), stub.callCount("alice")) + + require.NoError(t, o.EnsureRebuilt(context.Background(), "alice")) + assert.Equal(t, int32(3), stub.callCount("alice"), "成功後才標 rebuilt") +} + +// TestEnsureRebuilt_ContextCancel:ctx cancel 立即 return(規範必含)。 +func TestEnsureRebuilt_ContextCancel(t *testing.T) { + t.Parallel() + + stub := newStubConverterClient() + stub.blockUntil = make(chan struct{}) // 永遠不放 → 強迫等 ctx + stub.setJobs("alice", []*ConverterJob{}) + + o := NewOwnership(stub, newSilentLogger()) + + ctx, cancel := context.WithCancel(context.Background()) + + done := make(chan error, 1) + go func() { + done <- o.EnsureRebuilt(ctx, "alice") + }() + + // 等 50ms 確保 goroutine 已進到 fetch(block 在 blockUntil) + time.Sleep(50 * time.Millisecond) + cancel() + + select { + case err := <-done: + require.Error(t, err, "ctx cancel 應 return error") + assert.True(t, + errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded), + "err 應為 context.Canceled 或 DeadlineExceeded,got: %v", err, + ) + case <-time.After(2 * time.Second): + t.Fatal("ctx cancel 後 EnsureRebuilt 沒有及時 return") + } + + // 不標 rebuilt — 下次重試 + close(stub.blockUntil) // 解除 block + stub.blockUntil = nil // 後續不再 block + stub.setJobs("alice", []*ConverterJob{{JobID: "j-1"}}) + require.NoError(t, o.EnsureRebuilt(context.Background(), "alice")) +} + +// TestEnsureRebuilt_Timeout:rebuild 內部 timeout(converter 慢 > 5s)→ return +// timeout error,不標 rebuilt。 +// +// 為避免測試本身跑 5s+,把 fetchDelay 設 100ms 但用 ctx WithTimeout 50ms 模擬同樣語意: +// 驗 ctx cancel path 即可(ownership.go 的 rebuildTimeout 邏輯與此相同)。 +func TestEnsureRebuilt_ParentCtxTimeout(t *testing.T) { + t.Parallel() + + stub := newStubConverterClient() + stub.fetchDelay = 200 * time.Millisecond + stub.setJobs("alice", []*ConverterJob{}) + + o := NewOwnership(stub, newSilentLogger()) + + ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond) + defer cancel() + + err := o.EnsureRebuilt(ctx, "alice") + require.Error(t, err) + assert.True(t, errors.Is(err, context.DeadlineExceeded), + "parent ctx timeout 應透傳, got: %v", err) +} + +// TestEnsureRebuilt_EmptyUserID:空 userID return error。 +func TestEnsureRebuilt_EmptyUserID(t *testing.T) { + t.Parallel() + + stub := newStubConverterClient() + o := NewOwnership(stub, newSilentLogger()) + + err := o.EnsureRebuilt(context.Background(), "") + require.Error(t, err) +} + +// ========================================================================== +// ActiveJobOf +// ========================================================================== + +// TestActiveJobOf_AfterRebuild:rebuild 後從 jobToUser 反查到 in_progress 的 job_id(規範必含)。 +func TestActiveJobOf_AfterRebuild(t *testing.T) { + t.Parallel() + + stub := newStubConverterClient() + stub.setJobs("alice", []*ConverterJob{ + {JobID: "j-active-1", Status: "running"}, + }) + + o := NewOwnership(stub, newSilentLogger()) + + // rebuild 前 ActiveJobOf 應空(cache 沒資料) + jobs := o.ActiveJobOf("alice") + assert.Len(t, jobs, 0) + + require.NoError(t, o.EnsureRebuilt(context.Background(), "alice")) + + // rebuild 後反查 + jobs = o.ActiveJobOf("alice") + require.Len(t, jobs, 1) + assert.Equal(t, "j-active-1", jobs[0]) +} + +// TestActiveJobOf_Empty_NoJobs:user 沒任何 job → 空 slice(規範必含)。 +func TestActiveJobOf_Empty_NoJobs(t *testing.T) { + t.Parallel() + + stub := newStubConverterClient() + stub.setJobs("alice", []*ConverterJob{}) // 沒 active job + + o := NewOwnership(stub, newSilentLogger()) + require.NoError(t, o.EnsureRebuilt(context.Background(), "alice")) + + jobs := o.ActiveJobOf("alice") + assert.NotNil(t, jobs, "回非 nil 空 slice 給 caller 安全 range") + assert.Len(t, jobs, 0) +} + +// TestActiveJobOf_OtherUser_NotIncluded:反查只回該 user 的,不會混到別 user。 +func TestActiveJobOf_OtherUser_NotIncluded(t *testing.T) { + t.Parallel() + + stub := newStubConverterClient() + o := NewOwnership(stub, newSilentLogger()) + + o.Set("j-alice", "alice") + o.Set("j-bob", "bob") + o.Set("j-alice-2", "alice") + + aliceJobs := o.ActiveJobOf("alice") + assert.ElementsMatch(t, []string{"j-alice", "j-alice-2"}, aliceJobs) + + bobJobs := o.ActiveJobOf("bob") + assert.ElementsMatch(t, []string{"j-bob"}, bobJobs) + + // 不存在的 user + jobs := o.ActiveJobOf("nobody") + assert.Len(t, jobs, 0) + + // 空 user_id + jobs = o.ActiveJobOf("") + assert.Nil(t, jobs) +} + +// TestActiveJobOf_AfterDelete:Delete 後反查不回該 job。 +func TestActiveJobOf_AfterDelete(t *testing.T) { + t.Parallel() + + stub := newStubConverterClient() + o := NewOwnership(stub, newSilentLogger()) + + o.Set("j-1", "alice") + o.Set("j-2", "alice") + assert.Len(t, o.ActiveJobOf("alice"), 2) + + o.Delete("j-1") + jobs := o.ActiveJobOf("alice") + require.Len(t, jobs, 1) + assert.Equal(t, "j-2", jobs[0]) +} + +// ========================================================================== +// 壓力測試 — 全 method 併發 race + 不死鎖 +// ========================================================================== + +// TestStress_AllMethods_Concurrent:所有 method 同時跑,race detector 驗 + 完成不 timeout。 +func TestStress_AllMethods_Concurrent(t *testing.T) { + t.Parallel() + + stub := newStubConverterClient() + for i := 0; i < 5; i++ { + uid := "u-" + strconv.Itoa(i) + stub.setJobs(uid, []*ConverterJob{ + {JobID: fmt.Sprintf("j-%d-a", i)}, + }) + } + + o := NewOwnership(stub, newSilentLogger()) + + const ROUNDS = 50 + var wg sync.WaitGroup + for i := 0; i < ROUNDS; i++ { + uid := "u-" + strconv.Itoa(i%5) + jobID := "set-" + strconv.Itoa(i) + + wg.Add(5) + go func() { defer wg.Done(); o.Set(jobID, uid) }() + go func() { defer wg.Done(); _, _ = o.Get(jobID) }() + go func() { defer wg.Done(); _ = o.EnsureRebuilt(context.Background(), uid) }() + go func() { defer wg.Done(); _ = o.ActiveJobOf(uid) }() + go func() { defer wg.Done(); o.Delete(jobID) }() + } + + doneCh := make(chan struct{}) + go func() { wg.Wait(); close(doneCh) }() + + select { + case <-doneCh: + // ok + case <-time.After(5 * time.Second): + t.Fatal("壓力測試 5s 沒結束 — 疑似 deadlock") + } +}